n8n-nodes-docx-filler 1.1.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,7 +1,7 @@
1
1
  {
2
2
  "name": "n8n-nodes-docx-filler",
3
- "version": "1.1.0",
4
- "description": "n8n node to automatically fill DOCX documents (French DC1, DC2, AE forms) with company data. Works as AI Agent tool.",
3
+ "version": "2.0.0",
4
+ "description": "n8n node to automatically fill DOCX documents (French DC1, DC2, AE forms) using AI for semantic understanding and field mapping.",
5
5
  "keywords": [
6
6
  "n8n-community-node-package",
7
7
  "n8n",
@@ -15,7 +15,10 @@
15
15
  "AE",
16
16
  "french",
17
17
  "company",
18
- "ai-tool"
18
+ "ai-tool",
19
+ "langchain",
20
+ "llm",
21
+ "ai-agent"
19
22
  ],
20
23
  "license": "MIT",
21
24
  "homepage": "https://github.com/rokodo-io/n8n-nodes-docx-filler",
@@ -44,7 +47,8 @@
44
47
  "n8n": {
45
48
  "n8nNodesApiVersion": 1,
46
49
  "nodes": [
47
- "dist/DocxFiller/DocxFiller.node.js"
50
+ "dist/DocxFiller/DocxFiller.node.js",
51
+ "dist/DocxFillerAI/DocxFillerAI.node.js"
48
52
  ]
49
53
  },
50
54
  "devDependencies": {
@@ -1,515 +0,0 @@
1
- "use strict";
2
- var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
3
- if (k2 === undefined) k2 = k;
4
- var desc = Object.getOwnPropertyDescriptor(m, k);
5
- if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
6
- desc = { enumerable: true, get: function() { return m[k]; } };
7
- }
8
- Object.defineProperty(o, k2, desc);
9
- }) : (function(o, m, k, k2) {
10
- if (k2 === undefined) k2 = k;
11
- o[k2] = m[k];
12
- }));
13
- var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
14
- Object.defineProperty(o, "default", { enumerable: true, value: v });
15
- }) : function(o, v) {
16
- o["default"] = v;
17
- });
18
- var __importStar = (this && this.__importStar) || (function () {
19
- var ownKeys = function(o) {
20
- ownKeys = Object.getOwnPropertyNames || function (o) {
21
- var ar = [];
22
- for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
23
- return ar;
24
- };
25
- return ownKeys(o);
26
- };
27
- return function (mod) {
28
- if (mod && mod.__esModule) return mod;
29
- var result = {};
30
- if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
31
- __setModuleDefault(result, mod);
32
- return result;
33
- };
34
- })();
35
- var __importDefault = (this && this.__importDefault) || function (mod) {
36
- return (mod && mod.__esModule) ? mod : { "default": mod };
37
- };
38
- Object.defineProperty(exports, "__esModule", { value: true });
39
- exports.DocxFiller = void 0;
40
- const n8n_workflow_1 = require("n8n-workflow");
41
- const pizzip_1 = __importDefault(require("pizzip"));
42
- const fs = __importStar(require("fs"));
43
- // Labels spécifiques à rechercher dans les documents DC/AE
44
- const LABEL_PATTERNS = [
45
- ['siret', ['numéro siret', 'n° siret', 'siret']],
46
- ['tva_intra', ['tva intracommunautaire', 'identification européen']],
47
- ['email', ['adresse électronique', 'courriel', 'e-mail']],
48
- ['telephone', ['numéros de téléphone', 'téléphone', 'télécopie']],
49
- ['adresse', ['adresse postale', 'siège social']],
50
- ['nom_commercial', ['nom commercial', 'dénomination sociale', 'raison sociale']],
51
- ['forme_juridique', ['forme juridique', 'statut juridique']],
52
- ['capital', ['capital social']],
53
- ['code_naf', ['code naf', 'code ape']],
54
- ];
55
- const CHECKBOX_UNCHECKED = ['☐', '□', '▢'];
56
- const CHECKBOX_CHECKED = ['☒', '☑', '▣'];
57
- function normalize(text) {
58
- return text.replace(/^[■▪●○•\s▪]+/, '').toLowerCase().trim();
59
- }
60
- function hasCheckbox(text) {
61
- return [...CHECKBOX_UNCHECKED, ...CHECKBOX_CHECKED].some(c => text.includes(c));
62
- }
63
- function isChecked(text) {
64
- return CHECKBOX_CHECKED.some(c => text.includes(c));
65
- }
66
- function isLabel(text) {
67
- return text.startsWith('■') || text.startsWith('[') || text.startsWith('▪');
68
- }
69
- function isValue(text) {
70
- if (!text)
71
- return false;
72
- if (isLabel(text))
73
- return false;
74
- if (text.length > 200)
75
- return false;
76
- return true;
77
- }
78
- function replaceCheckboxState(text, checked) {
79
- let result = text;
80
- if (checked) {
81
- for (const c of CHECKBOX_UNCHECKED) {
82
- result = result.split(c).join('☒');
83
- }
84
- }
85
- else {
86
- for (const c of CHECKBOX_CHECKED) {
87
- result = result.split(c).join('☐');
88
- }
89
- }
90
- return result;
91
- }
92
- function extractParagraphs(xml) {
93
- const paragraphs = [];
94
- // Regex pour extraire le contenu des paragraphes <w:p>...</w:p>
95
- const pRegex = /<w:p[^>]*>([\s\S]*?)<\/w:p>/g;
96
- let match;
97
- while ((match = pRegex.exec(xml)) !== null) {
98
- // Extraire tout le texte des <w:t> dans ce paragraphe
99
- const pContent = match[1];
100
- const textParts = [];
101
- const tRegex = /<w:t[^>]*>([^<]*)<\/w:t>/g;
102
- let tMatch;
103
- while ((tMatch = tRegex.exec(pContent)) !== null) {
104
- textParts.push(tMatch[1]);
105
- }
106
- paragraphs.push(textParts.join(''));
107
- }
108
- return paragraphs;
109
- }
110
- function extractAllFields(paragraphs) {
111
- const extracted = new Map();
112
- const usedIndices = new Set();
113
- for (const [fieldName, patterns] of LABEL_PATTERNS) {
114
- if (extracted.has(fieldName))
115
- continue;
116
- for (let i = 0; i < paragraphs.length; i++) {
117
- if (usedIndices.has(i))
118
- continue;
119
- const textNorm = normalize(paragraphs[i]);
120
- const matched = patterns.some(pattern => textNorm.includes(pattern.toLowerCase()));
121
- if (matched) {
122
- for (let j = i + 1; j < Math.min(i + 5, paragraphs.length); j++) {
123
- if (usedIndices.has(j))
124
- continue;
125
- const valueText = paragraphs[j].trim();
126
- if (isValue(valueText)) {
127
- extracted.set(fieldName, {
128
- value: valueText,
129
- labelIndex: i,
130
- valueIndex: j,
131
- });
132
- usedIndices.add(j);
133
- break;
134
- }
135
- else if (isLabel(valueText)) {
136
- break;
137
- }
138
- }
139
- usedIndices.add(i);
140
- break;
141
- }
142
- }
143
- }
144
- return extracted;
145
- }
146
- function extractCheckboxes(paragraphs) {
147
- const checkboxes = [];
148
- for (let i = 0; i < paragraphs.length; i++) {
149
- const text = paragraphs[i].trim();
150
- if (hasCheckbox(text)) {
151
- const signature = text.replace(/[☐☒☑□▢▣]/g, '').trim().toLowerCase().slice(0, 60);
152
- checkboxes.push({
153
- index: i,
154
- text,
155
- signature,
156
- checked: isChecked(text),
157
- });
158
- }
159
- }
160
- return checkboxes;
161
- }
162
- function findFillablePositions(paragraphs) {
163
- const positions = new Map();
164
- const usedIndices = new Set();
165
- for (const [fieldName, patterns] of LABEL_PATTERNS) {
166
- if (positions.has(fieldName))
167
- continue;
168
- for (let i = 0; i < paragraphs.length; i++) {
169
- if (usedIndices.has(i))
170
- continue;
171
- const textNorm = normalize(paragraphs[i]);
172
- const matched = patterns.some(pattern => textNorm.includes(pattern.toLowerCase()));
173
- if (matched) {
174
- for (let j = i + 1; j < Math.min(i + 5, paragraphs.length); j++) {
175
- if (usedIndices.has(j))
176
- continue;
177
- const valueText = paragraphs[j].trim();
178
- if (!valueText) {
179
- positions.set(fieldName, {
180
- labelIndex: i,
181
- fillIndex: j,
182
- labelText: paragraphs[i].slice(0, 50),
183
- });
184
- usedIndices.add(j);
185
- break;
186
- }
187
- else if (isLabel(valueText)) {
188
- break;
189
- }
190
- }
191
- usedIndices.add(i);
192
- break;
193
- }
194
- }
195
- }
196
- return positions;
197
- }
198
- function fillDocumentXml(templateXml, sourceData, templatePositions, sourceCheckboxes, templateCheckboxes) {
199
- let xml = templateXml;
200
- const filledFields = [];
201
- let modifiedCheckboxes = 0;
202
- // Remplir les champs texte
203
- // On doit remplacer les paragraphes vides par les valeurs
204
- const pRegex = /<w:p[^>]*>([\s\S]*?)<\/w:p>/g;
205
- const paragraphs = [];
206
- let match;
207
- while ((match = pRegex.exec(templateXml)) !== null) {
208
- paragraphs.push({
209
- match: match[0],
210
- start: match.index,
211
- end: match.index + match[0].length,
212
- });
213
- }
214
- // Pour chaque position à remplir
215
- for (const [fieldName, position] of templatePositions) {
216
- if (sourceData.has(fieldName)) {
217
- const value = sourceData.get(fieldName).value;
218
- const pIndex = position.fillIndex;
219
- if (pIndex < paragraphs.length) {
220
- const p = paragraphs[pIndex];
221
- // Créer un nouveau paragraphe avec la valeur
222
- // On conserve le style du paragraphe original mais on remplace le contenu
223
- const newP = p.match.replace(/(<w:p[^>]*>)([\s\S]*?)(<\/w:p>)/, `$1<w:r><w:t>${escapeXml(value)}</w:t></w:r>$3`);
224
- xml = xml.slice(0, p.start) + newP + xml.slice(p.end);
225
- // Ajuster les indices pour les remplacements suivants
226
- const diff = newP.length - p.match.length;
227
- for (let i = pIndex + 1; i < paragraphs.length; i++) {
228
- paragraphs[i].start += diff;
229
- paragraphs[i].end += diff;
230
- }
231
- paragraphs[pIndex].match = newP;
232
- paragraphs[pIndex].end = paragraphs[pIndex].start + newP.length;
233
- filledFields.push(`${fieldName}: ${value}`);
234
- }
235
- }
236
- }
237
- // Copier l'état des checkboxes
238
- for (const templateCb of templateCheckboxes) {
239
- for (const sourceCb of sourceCheckboxes) {
240
- if (templateCb.signature === sourceCb.signature) {
241
- if (sourceCb.checked !== templateCb.checked) {
242
- const newText = replaceCheckboxState(templateCb.text, sourceCb.checked);
243
- // Remplacer dans le XML
244
- xml = xml.split(escapeXml(templateCb.text)).join(escapeXml(newText));
245
- modifiedCheckboxes++;
246
- }
247
- break;
248
- }
249
- }
250
- }
251
- return { xml, filledFields, modifiedCheckboxes };
252
- }
253
- function escapeXml(text) {
254
- return text
255
- .replace(/&/g, '&amp;')
256
- .replace(/</g, '&lt;')
257
- .replace(/>/g, '&gt;')
258
- .replace(/"/g, '&quot;')
259
- .replace(/'/g, '&apos;');
260
- }
261
- /**
262
- * Détecte le type d'entrée et retourne le buffer du document
263
- * Supporte : chemin de fichier, nom de propriété binaire, ou base64
264
- */
265
- async function getDocumentBuffer(context, itemIndex, input, items) {
266
- // 1. Vérifier si c'est un chemin de fichier (commence par / ou contient .docx)
267
- const looksLikePath = input.startsWith('/') ||
268
- input.startsWith('./') ||
269
- input.startsWith('../') ||
270
- /^[a-zA-Z]:\\/.test(input) || // Windows path
271
- (input.includes('.docx') && !input.includes(' ') && input.length < 500);
272
- if (looksLikePath) {
273
- // Tenter de lire le fichier depuis le système de fichiers
274
- try {
275
- if (fs.existsSync(input)) {
276
- return fs.readFileSync(input);
277
- }
278
- }
279
- catch {
280
- // Si la lecture échoue, continuer avec les autres méthodes
281
- }
282
- }
283
- // 2. Vérifier si c'est un nom de propriété binaire
284
- if (items[itemIndex].binary && items[itemIndex].binary[input]) {
285
- return await context.helpers.getBinaryDataBuffer(itemIndex, input);
286
- }
287
- // 3. Vérifier si c'est du base64 valide (longueur raisonnable et caractères valides)
288
- const base64Regex = /^[A-Za-z0-9+/]+=*$/;
289
- if (input.length > 100 && base64Regex.test(input.replace(/\s/g, ''))) {
290
- const buffer = Buffer.from(input, 'base64');
291
- // Vérifier que c'est bien un ZIP (DOCX = ZIP)
292
- if (buffer.length > 4 && buffer[0] === 0x50 && buffer[1] === 0x4B) {
293
- return buffer;
294
- }
295
- }
296
- // 4. Si rien n'a fonctionné, donner un message d'erreur clair
297
- throw new Error(`Impossible de charger le document "${input.substring(0, 50)}..."\n` +
298
- `Formats acceptés:\n` +
299
- `- Chemin de fichier: /path/to/document.docx\n` +
300
- `- Propriété binaire: data (si le fichier est attaché en binaire)\n` +
301
- `- Base64: contenu encodé en base64`);
302
- }
303
- class DocxFiller {
304
- constructor() {
305
- this.description = {
306
- displayName: 'DOCX Filler',
307
- name: 'docxFiller',
308
- icon: 'file:docx.svg',
309
- group: ['transform'],
310
- version: 1,
311
- subtitle: '={{$parameter["operation"]}}',
312
- description: 'Remplit automatiquement des documents DOCX (DC1, DC2, AE) avec les données entreprise',
313
- defaults: {
314
- name: 'DOCX Filler',
315
- },
316
- // @ts-ignore - Required for AI agent tool usage
317
- usableAsTool: true,
318
- inputs: ['main'],
319
- outputs: ['main'],
320
- properties: [
321
- {
322
- displayName: 'Operation',
323
- name: 'operation',
324
- type: 'options',
325
- noDataExpression: true,
326
- options: [
327
- {
328
- name: 'Fill Document',
329
- value: 'fill',
330
- description: 'Remplit un template avec les données d\'un document source',
331
- action: 'Fill document',
332
- },
333
- {
334
- name: 'Extract Data',
335
- value: 'extract',
336
- description: 'Extrait les données entreprise d\'un document rempli',
337
- action: 'Extract data',
338
- },
339
- {
340
- name: 'Analyze',
341
- value: 'analyze',
342
- description: 'Analyse la structure d\'un document',
343
- action: 'Analyze document',
344
- },
345
- ],
346
- default: 'fill',
347
- },
348
- // Fill operation
349
- {
350
- displayName: 'Source Document',
351
- name: 'sourceDocument',
352
- type: 'string',
353
- default: '',
354
- required: true,
355
- displayOptions: {
356
- show: {
357
- operation: ['fill'],
358
- },
359
- },
360
- description: 'Document source contenant les données entreprise (chemin fichier, binary property name, ou base64)',
361
- },
362
- {
363
- displayName: 'Template Document',
364
- name: 'templateDocument',
365
- type: 'string',
366
- default: '',
367
- required: true,
368
- displayOptions: {
369
- show: {
370
- operation: ['fill'],
371
- },
372
- },
373
- description: 'Document template vide à remplir (chemin fichier, binary property name, ou base64)',
374
- },
375
- {
376
- displayName: 'Output Property',
377
- name: 'outputProperty',
378
- type: 'string',
379
- default: 'data',
380
- displayOptions: {
381
- show: {
382
- operation: ['fill'],
383
- },
384
- },
385
- description: 'Nom de la propriété binary pour le document de sortie',
386
- },
387
- // Extract operation
388
- {
389
- displayName: 'Document',
390
- name: 'document',
391
- type: 'string',
392
- default: '',
393
- required: true,
394
- displayOptions: {
395
- show: {
396
- operation: ['extract', 'analyze'],
397
- },
398
- },
399
- description: 'Document à analyser (chemin fichier, binary property name, ou base64)',
400
- },
401
- ],
402
- };
403
- }
404
- async execute() {
405
- var _a, _b, _c, _d;
406
- const items = this.getInputData();
407
- const returnData = [];
408
- const operation = this.getNodeParameter('operation', 0);
409
- for (let i = 0; i < items.length; i++) {
410
- try {
411
- if (operation === 'fill') {
412
- const sourceDocParam = this.getNodeParameter('sourceDocument', i);
413
- const templateDocParam = this.getNodeParameter('templateDocument', i);
414
- const outputProperty = this.getNodeParameter('outputProperty', i);
415
- // Charger les documents (depuis chemin, binary ou base64)
416
- const sourceBuffer = await getDocumentBuffer(this, i, sourceDocParam, items);
417
- const templateBuffer = await getDocumentBuffer(this, i, templateDocParam, items);
418
- // Charger les DOCX avec PizZip
419
- const sourceZip = new pizzip_1.default(sourceBuffer);
420
- const templateZip = new pizzip_1.default(templateBuffer);
421
- const sourceXml = ((_a = sourceZip.file('word/document.xml')) === null || _a === void 0 ? void 0 : _a.asText()) || '';
422
- const templateXml = ((_b = templateZip.file('word/document.xml')) === null || _b === void 0 ? void 0 : _b.asText()) || '';
423
- // Extraire les paragraphes
424
- const sourceParagraphs = extractParagraphs(sourceXml);
425
- const templateParagraphs = extractParagraphs(templateXml);
426
- // Extraire les données
427
- const sourceData = extractAllFields(sourceParagraphs);
428
- const sourceCheckboxes = extractCheckboxes(sourceParagraphs);
429
- const templatePositions = findFillablePositions(templateParagraphs);
430
- const templateCheckboxes = extractCheckboxes(templateParagraphs);
431
- // Remplir le document
432
- const { xml: filledXml, filledFields, modifiedCheckboxes } = fillDocumentXml(templateXml, sourceData, templatePositions, sourceCheckboxes, templateCheckboxes);
433
- // Mettre à jour le ZIP
434
- templateZip.file('word/document.xml', filledXml);
435
- const outputBuffer = templateZip.generate({
436
- type: 'nodebuffer',
437
- compression: 'DEFLATE',
438
- });
439
- // Créer le résultat
440
- const binaryData = await this.helpers.prepareBinaryData(outputBuffer, 'document_rempli.docx', 'application/vnd.openxmlformats-officedocument.wordprocessingml.document');
441
- returnData.push({
442
- json: {
443
- success: true,
444
- filledFields,
445
- modifiedCheckboxes,
446
- message: `Document rempli avec ${filledFields.length} champs et ${modifiedCheckboxes} checkboxes`,
447
- },
448
- binary: {
449
- [outputProperty]: binaryData,
450
- },
451
- });
452
- }
453
- else if (operation === 'extract') {
454
- const documentParam = this.getNodeParameter('document', i);
455
- const docBuffer = await getDocumentBuffer(this, i, documentParam, items);
456
- const zip = new pizzip_1.default(docBuffer);
457
- const xml = ((_c = zip.file('word/document.xml')) === null || _c === void 0 ? void 0 : _c.asText()) || '';
458
- const paragraphs = extractParagraphs(xml);
459
- const data = extractAllFields(paragraphs);
460
- const checkboxes = extractCheckboxes(paragraphs);
461
- const extractedData = {};
462
- for (const [key, value] of data) {
463
- extractedData[key] = value.value;
464
- }
465
- const checkedBoxes = checkboxes.filter(cb => cb.checked);
466
- returnData.push({
467
- json: {
468
- success: true,
469
- data: extractedData,
470
- checkboxes: {
471
- total: checkboxes.length,
472
- checked: checkedBoxes.length,
473
- items: checkedBoxes.map(cb => cb.signature),
474
- },
475
- },
476
- });
477
- }
478
- else if (operation === 'analyze') {
479
- const documentParam = this.getNodeParameter('document', i);
480
- const docBuffer = await getDocumentBuffer(this, i, documentParam, items);
481
- const zip = new pizzip_1.default(docBuffer);
482
- const xml = ((_d = zip.file('word/document.xml')) === null || _d === void 0 ? void 0 : _d.asText()) || '';
483
- const paragraphs = extractParagraphs(xml);
484
- const positions = findFillablePositions(paragraphs);
485
- returnData.push({
486
- json: {
487
- success: true,
488
- totalParagraphs: paragraphs.length,
489
- fillableFields: Array.from(positions.keys()),
490
- structure: paragraphs.slice(0, 50).map((p, idx) => ({
491
- index: idx,
492
- text: p.slice(0, 100),
493
- hasCheckbox: hasCheckbox(p),
494
- })),
495
- },
496
- });
497
- }
498
- }
499
- catch (error) {
500
- if (this.continueOnFail()) {
501
- returnData.push({
502
- json: {
503
- success: false,
504
- error: error.message,
505
- },
506
- });
507
- continue;
508
- }
509
- throw new n8n_workflow_1.NodeOperationError(this.getNode(), error, { itemIndex: i });
510
- }
511
- }
512
- return [returnData];
513
- }
514
- }
515
- exports.DocxFiller = DocxFiller;
File without changes