cerfaparse 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.fr.md +159 -0
- package/README.md +159 -0
- package/dist/cli.d.ts +27 -0
- package/dist/cli.js +620 -0
- package/dist/cli.js.map +1 -0
- package/dist/index.d.ts +134 -0
- package/dist/index.js +630 -0
- package/dist/index.js.map +1 -0
- package/package.json +59 -0
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 calibrae
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.fr.md
ADDED
|
@@ -0,0 +1,159 @@
|
|
|
1
|
+
[English](README.md) | [Français](README.fr.md)
|
|
2
|
+
|
|
3
|
+
# cerfaparse
|
|
4
|
+
|
|
5
|
+
Convertit les formulaires CERFA PDF non interactifs en PDF remplissables (AcroForm) avec des définitions de champs en JSON.
|
|
6
|
+
|
|
7
|
+
## Fonctionnalités
|
|
8
|
+
|
|
9
|
+
1. Extrait les cases caractères et les cases à cocher depuis la géométrie du PDF (via SVG)
|
|
10
|
+
2. Extrait les libellés depuis la couche texte du PDF
|
|
11
|
+
3. Associe les libellés aux groupes de champs pour générer des noms de champs pertinents
|
|
12
|
+
4. Injecte des champs AcroForm (champs texte à cases + cases à cocher) dans le PDF
|
|
13
|
+
5. Produit un fichier `.fields.json` avec toutes les définitions de champs (nom, type, libellé, position, maxLength)
|
|
14
|
+
|
|
15
|
+
## Prérequis
|
|
16
|
+
|
|
17
|
+
- Node.js >= 20
|
|
18
|
+
- Outils CLI [Poppler](https://poppler.freedesktop.org/) (`pdftocairo`, `pdftotext`, `pdfinfo`)
|
|
19
|
+
|
|
20
|
+
```bash
|
|
21
|
+
# macOS
|
|
22
|
+
brew install poppler
|
|
23
|
+
|
|
24
|
+
# Debian/Ubuntu
|
|
25
|
+
sudo apt-get install poppler-utils
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
## Installation
|
|
29
|
+
|
|
30
|
+
```bash
|
|
31
|
+
npm install cerfaparse
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
## Utilisation en ligne de commande
|
|
35
|
+
|
|
36
|
+
```bash
|
|
37
|
+
npx cerfaparse convert <input.pdf> [-o <output.pdf>]
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
Exemple :
|
|
41
|
+
|
|
42
|
+
```bash
|
|
43
|
+
npx cerfaparse convert docs/pdf-cerfa_cs8_bleu-recto-verso-140x202mm.pdf -o /tmp/cs8-fillable.pdf
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
Cela produit :
|
|
47
|
+
- `/tmp/cs8-fillable.pdf` — le PDF original avec les champs AcroForm superposés
|
|
48
|
+
- `/tmp/cs8-fillable.fields.json` — les définitions de champs en JSON
|
|
49
|
+
|
|
50
|
+
## Format de sortie JSON (compatible ngx-formly)
|
|
51
|
+
|
|
52
|
+
Le JSON de sortie utilise des définitions de champs compatibles [ngx-formly](https://formly.dev/) (`key`, `type`, `props`) avec les métadonnées spatiales intégrées dans `props` :
|
|
53
|
+
|
|
54
|
+
```json
|
|
55
|
+
{
|
|
56
|
+
"pages": [
|
|
57
|
+
{
|
|
58
|
+
"pageNumber": 1,
|
|
59
|
+
"fields": [
|
|
60
|
+
{
|
|
61
|
+
"key": "p1_nom",
|
|
62
|
+
"type": "input",
|
|
63
|
+
"props": {
|
|
64
|
+
"label": "Nom :",
|
|
65
|
+
"maxLength": 9,
|
|
66
|
+
"page": 1,
|
|
67
|
+
"pdfRect": { "x": 50.4, "y": 505.6, "width": 104.1, "height": 10.9 }
|
|
68
|
+
}
|
|
69
|
+
},
|
|
70
|
+
{
|
|
71
|
+
"key": "p1_oui",
|
|
72
|
+
"type": "checkbox",
|
|
73
|
+
"props": {
|
|
74
|
+
"label": "Oui",
|
|
75
|
+
"page": 1,
|
|
76
|
+
"pdfRect": { "x": 288.1, "y": 472.3, "width": 8.0, "height": 8.0 }
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
]
|
|
80
|
+
}
|
|
81
|
+
]
|
|
82
|
+
}
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
### Types de champs
|
|
86
|
+
|
|
87
|
+
| Type | Description | Props |
|
|
88
|
+
|------|-------------|-------|
|
|
89
|
+
| `input` | Champ texte — rendu libre dans formly, une case par caractère (peigne) dans le PDF lorsque `maxLength` est défini | `maxLength`, `label`, `page`, `pdfRect` |
|
|
90
|
+
| `checkbox` | Case à cocher | `label`, `page`, `pdfRect` |
|
|
91
|
+
|
|
92
|
+
## Utilisation en tant que bibliothèque (Node.js)
|
|
93
|
+
|
|
94
|
+
```typescript
|
|
95
|
+
import { convert } from 'cerfaparse';
|
|
96
|
+
|
|
97
|
+
const { pdfOut, jsonOut, fields } = await convert('input.pdf', 'output.pdf');
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
Ou utilisez les fonctions individuellement :
|
|
101
|
+
|
|
102
|
+
```typescript
|
|
103
|
+
import { extractBoxes } from 'cerfaparse';
|
|
104
|
+
import { extractSvg } from 'cerfaparse';
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
## Utilisation avec Angular / ngx-formly
|
|
108
|
+
|
|
109
|
+
Le JSON de sortie est directement compatible avec [ngx-formly](https://formly.dev/). Exécutez `convert` au moment du build, puis utilisez les champs JSON tels quels :
|
|
110
|
+
|
|
111
|
+
```typescript
|
|
112
|
+
// Charger le JSON généré
|
|
113
|
+
import fieldDefs from './assets/cerfa-cs8.fields.json';
|
|
114
|
+
|
|
115
|
+
// Les champs sont déjà compatibles formly — il suffit de les aplatir
|
|
116
|
+
const formlyFields = fieldDefs.pages.flatMap(page => page.fields);
|
|
117
|
+
// Chaque champ contient : { key, type, props: { label, maxLength?, page, pdfRect } }
|
|
118
|
+
```
|
|
119
|
+
|
|
120
|
+
Les champs utilisent les types formly standard (`input`, `checkbox`). La prop `maxLength` limite la longueur de saisie dans le formulaire ; lors du remplissage du PDF, `maxLength` déclenche le rendu en peigne (un caractère par case).
|
|
121
|
+
|
|
122
|
+
Pour remplir le PDF côté client avec [pdf-lib](https://pdf-lib.js.org/) (fonctionne dans le navigateur) :
|
|
123
|
+
|
|
124
|
+
```typescript
|
|
125
|
+
import { PDFDocument } from 'pdf-lib';
|
|
126
|
+
|
|
127
|
+
const pdfBytes = await fetch('/assets/cerfa-cs8-fillable.pdf').then(r => r.arrayBuffer());
|
|
128
|
+
const pdfDoc = await PDFDocument.load(pdfBytes);
|
|
129
|
+
const form = pdfDoc.getForm();
|
|
130
|
+
|
|
131
|
+
for (const [key, value] of Object.entries(formValues)) {
|
|
132
|
+
const field = fieldDefs.pages.flatMap(p => p.fields).find(f => f.key === key);
|
|
133
|
+
if (!field) continue;
|
|
134
|
+
if (field.type === 'input') {
|
|
135
|
+
form.getTextField(key).setText(String(value));
|
|
136
|
+
} else {
|
|
137
|
+
const cb = form.getCheckBox(key);
|
|
138
|
+
value ? cb.check() : cb.uncheck();
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
const filledBytes = await pdfDoc.save();
|
|
143
|
+
// Déclencher le téléchargement ou l'affichage
|
|
144
|
+
```
|
|
145
|
+
|
|
146
|
+
## Tests
|
|
147
|
+
|
|
148
|
+
```bash
|
|
149
|
+
npx vitest run
|
|
150
|
+
```
|
|
151
|
+
|
|
152
|
+
## Fonctionnement
|
|
153
|
+
|
|
154
|
+
1. **pdftocairo** convertit chaque page du PDF en SVG
|
|
155
|
+
2. Les éléments SVG `<path>` avec un remplissage blanc sont classés comme cases caractères (contour blanc, épaisseur ~1) ou cases à cocher (contour foncé, épaisseur ~0.5)
|
|
156
|
+
3. Les matrices de transformation affine SVG (y compris les transformations des ancêtres `<g>`/`<use>`) sont composées pour convertir les coordonnées SVG en coordonnées PDF (origine en bas à gauche, Y vers le haut)
|
|
157
|
+
4. Les cases sont regroupées en lignes par proximité Y, puis découpées en champs par écarts X
|
|
158
|
+
5. **pdftotext** fournit le texte des libellés et leurs positions via la sortie bbox, associés aux groupes de champs par proximité spatiale
|
|
159
|
+
6. **pdf-lib** injecte les champs AcroForm avec des fonds transparents aux coordonnées PDF calculées
|
package/README.md
ADDED
|
@@ -0,0 +1,159 @@
|
|
|
1
|
+
[English](README.md) | [Français](README.fr.md)
|
|
2
|
+
|
|
3
|
+
# cerfaparse
|
|
4
|
+
|
|
5
|
+
Convert flat (non-interactive) French CERFA PDF forms into fillable AcroForm PDFs with JSON field definitions.
|
|
6
|
+
|
|
7
|
+
## What it does
|
|
8
|
+
|
|
9
|
+
1. Extracts character cells and checkboxes from the PDF geometry (via SVG)
|
|
10
|
+
2. Extracts labels from the PDF text layer
|
|
11
|
+
3. Maps labels to field groups to generate meaningful field names
|
|
12
|
+
4. Injects AcroForm fields (combed text fields + checkboxes) into the PDF
|
|
13
|
+
5. Outputs a `.fields.json` with all field definitions (name, type, label, position, maxLength)
|
|
14
|
+
|
|
15
|
+
## Prerequisites
|
|
16
|
+
|
|
17
|
+
- Node.js >= 20
|
|
18
|
+
- [Poppler](https://poppler.freedesktop.org/) CLI tools (`pdftocairo`, `pdftotext`, `pdfinfo`)
|
|
19
|
+
|
|
20
|
+
```bash
|
|
21
|
+
# macOS
|
|
22
|
+
brew install poppler
|
|
23
|
+
|
|
24
|
+
# Debian/Ubuntu
|
|
25
|
+
sudo apt-get install poppler-utils
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
## Install
|
|
29
|
+
|
|
30
|
+
```bash
|
|
31
|
+
npm install cerfaparse
|
|
32
|
+
```
|
|
33
|
+
|
|
34
|
+
## CLI Usage
|
|
35
|
+
|
|
36
|
+
```bash
|
|
37
|
+
npx cerfaparse convert <input.pdf> [-o <output.pdf>]
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
Example:
|
|
41
|
+
|
|
42
|
+
```bash
|
|
43
|
+
npx cerfaparse convert docs/pdf-cerfa_cs8_bleu-recto-verso-140x202mm.pdf -o /tmp/cs8-fillable.pdf
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
This produces:
|
|
47
|
+
- `/tmp/cs8-fillable.pdf` — the original PDF with overlay AcroForm fields
|
|
48
|
+
- `/tmp/cs8-fillable.fields.json` — JSON field definitions
|
|
49
|
+
|
|
50
|
+
## JSON Output Format (ngx-formly compatible)
|
|
51
|
+
|
|
52
|
+
The output JSON uses [ngx-formly](https://formly.dev/)-compatible field definitions (`key`, `type`, `props`) with spatial metadata embedded in `props`:
|
|
53
|
+
|
|
54
|
+
```json
|
|
55
|
+
{
|
|
56
|
+
"pages": [
|
|
57
|
+
{
|
|
58
|
+
"pageNumber": 1,
|
|
59
|
+
"fields": [
|
|
60
|
+
{
|
|
61
|
+
"key": "p1_nom",
|
|
62
|
+
"type": "input",
|
|
63
|
+
"props": {
|
|
64
|
+
"label": "Nom :",
|
|
65
|
+
"maxLength": 9,
|
|
66
|
+
"page": 1,
|
|
67
|
+
"pdfRect": { "x": 50.4, "y": 505.6, "width": 104.1, "height": 10.9 }
|
|
68
|
+
}
|
|
69
|
+
},
|
|
70
|
+
{
|
|
71
|
+
"key": "p1_oui",
|
|
72
|
+
"type": "checkbox",
|
|
73
|
+
"props": {
|
|
74
|
+
"label": "Oui",
|
|
75
|
+
"page": 1,
|
|
76
|
+
"pdfRect": { "x": 288.1, "y": 472.3, "width": 8.0, "height": 8.0 }
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
]
|
|
80
|
+
}
|
|
81
|
+
]
|
|
82
|
+
}
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
### Field types
|
|
86
|
+
|
|
87
|
+
| Type | Description | Props |
|
|
88
|
+
|------|-------------|-------|
|
|
89
|
+
| `input` | Text input — rendered as free-form in formly, mapped to one-char-per-box (combed) in PDF when `maxLength` is set | `maxLength`, `label`, `page`, `pdfRect` |
|
|
90
|
+
| `checkbox` | Checkbox | `label`, `page`, `pdfRect` |
|
|
91
|
+
|
|
92
|
+
## Library Usage (Node.js)
|
|
93
|
+
|
|
94
|
+
```typescript
|
|
95
|
+
import { convert } from 'cerfaparse';
|
|
96
|
+
|
|
97
|
+
const { pdfOut, jsonOut, fields } = await convert('input.pdf', 'output.pdf');
|
|
98
|
+
```
|
|
99
|
+
|
|
100
|
+
Or use individual functions:
|
|
101
|
+
|
|
102
|
+
```typescript
|
|
103
|
+
import { extractBoxes } from 'cerfaparse';
|
|
104
|
+
import { extractSvg } from 'cerfaparse';
|
|
105
|
+
```
|
|
106
|
+
|
|
107
|
+
## Using with Angular / ngx-formly
|
|
108
|
+
|
|
109
|
+
The JSON output is directly compatible with [ngx-formly](https://formly.dev/). Run `convert` at build time, then use the JSON fields as-is:
|
|
110
|
+
|
|
111
|
+
```typescript
|
|
112
|
+
// Load the generated JSON
|
|
113
|
+
import fieldDefs from './assets/cerfa-cs8.fields.json';
|
|
114
|
+
|
|
115
|
+
// Fields are already formly-compatible — just flatten across pages
|
|
116
|
+
const formlyFields = fieldDefs.pages.flatMap(page => page.fields);
|
|
117
|
+
// Each field has: { key, type, props: { label, maxLength?, page, pdfRect } }
|
|
118
|
+
```
|
|
119
|
+
|
|
120
|
+
Fields use standard formly types (`input`, `checkbox`). The `maxLength` prop constrains input length in the form; when filling the PDF, `maxLength` triggers combed rendering (one character per cell).
|
|
121
|
+
|
|
122
|
+
To fill the PDF client-side with [pdf-lib](https://pdf-lib.js.org/) (works in the browser):
|
|
123
|
+
|
|
124
|
+
```typescript
|
|
125
|
+
import { PDFDocument } from 'pdf-lib';
|
|
126
|
+
|
|
127
|
+
const pdfBytes = await fetch('/assets/cerfa-cs8-fillable.pdf').then(r => r.arrayBuffer());
|
|
128
|
+
const pdfDoc = await PDFDocument.load(pdfBytes);
|
|
129
|
+
const form = pdfDoc.getForm();
|
|
130
|
+
|
|
131
|
+
for (const [key, value] of Object.entries(formValues)) {
|
|
132
|
+
const field = fieldDefs.pages.flatMap(p => p.fields).find(f => f.key === key);
|
|
133
|
+
if (!field) continue;
|
|
134
|
+
if (field.type === 'input') {
|
|
135
|
+
form.getTextField(key).setText(String(value));
|
|
136
|
+
} else {
|
|
137
|
+
const cb = form.getCheckBox(key);
|
|
138
|
+
value ? cb.check() : cb.uncheck();
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
const filledBytes = await pdfDoc.save();
|
|
143
|
+
// Trigger download or display
|
|
144
|
+
```
|
|
145
|
+
|
|
146
|
+
## Tests
|
|
147
|
+
|
|
148
|
+
```bash
|
|
149
|
+
npx vitest run
|
|
150
|
+
```
|
|
151
|
+
|
|
152
|
+
## How it works
|
|
153
|
+
|
|
154
|
+
1. **pdftocairo** converts each PDF page to SVG
|
|
155
|
+
2. SVG `<path>` elements with white fill are classified as character cells (white stroke, stroke-width ~1) or checkboxes (dark stroke, stroke-width ~0.5)
|
|
156
|
+
3. SVG affine transform matrices (including ancestor `<g>`/`<use>` transforms) are composed to convert from SVG content coordinates to PDF coordinates (bottom-left origin, Y-up)
|
|
157
|
+
4. Boxes are grouped into rows by Y-proximity, then split into fields by X-gaps
|
|
158
|
+
5. **pdftotext** bbox output provides label text and positions, matched to field groups by spatial proximity
|
|
159
|
+
6. **pdf-lib** injects AcroForm fields with transparent backgrounds at the computed PDF coordinates
|
package/dist/cli.d.ts
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
/** Formly-compatible field types */
|
|
2
|
+
type FieldType = 'input' | 'checkbox';
|
|
3
|
+
interface PdfRect {
|
|
4
|
+
x: number;
|
|
5
|
+
y: number;
|
|
6
|
+
width: number;
|
|
7
|
+
height: number;
|
|
8
|
+
}
|
|
9
|
+
/** Formly-compatible field definition with spatial metadata */
|
|
10
|
+
interface Field {
|
|
11
|
+
key: string;
|
|
12
|
+
type: FieldType;
|
|
13
|
+
props: {
|
|
14
|
+
label: string;
|
|
15
|
+
maxLength?: number;
|
|
16
|
+
page: number;
|
|
17
|
+
pdfRect: PdfRect;
|
|
18
|
+
};
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
declare function convert(inputPath: string, outputPath?: string): Promise<{
|
|
22
|
+
pdfOut: string;
|
|
23
|
+
jsonOut: string;
|
|
24
|
+
fields: Field[];
|
|
25
|
+
}>;
|
|
26
|
+
|
|
27
|
+
export { convert };
|