@arela/uploader 1.0.19 → 1.0.21
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +2 -1
- package/src/commands/GDriveSyncCommand.js +475 -0
- package/src/commands/IdentifyCommand.js +41 -16
- package/src/commands/ScanCommand.js +6 -3
- package/src/config/config.js +88 -2
- package/src/document-type-shared.js +13 -3
- package/src/document-types/_pedimento-shared-extractors.js +226 -0
- package/src/document-types/pedimento-completo-xml.js +322 -0
- package/src/document-types/pedimento-completo.js +68 -0
- package/src/document-types/pedimento-simplificado.js +7 -286
- package/src/file-detection.js +43 -5
- package/src/index.js +27 -0
- package/src/services/DatabaseService.js +3 -1
- package/src/services/GoogleDriveService.js +217 -0
- package/src/services/LoggingService.js +1 -1
package/src/config/config.js
CHANGED
|
@@ -24,6 +24,7 @@ class Config {
|
|
|
24
24
|
this.watch = this.#loadWatchConfig();
|
|
25
25
|
this.redis = this.#loadRedisConfig();
|
|
26
26
|
this.worker = this.#loadWorkerConfig();
|
|
27
|
+
this.gdrive = this.#loadGDriveConfig();
|
|
27
28
|
}
|
|
28
29
|
|
|
29
30
|
/**
|
|
@@ -36,10 +37,10 @@ class Config {
|
|
|
36
37
|
const __dirname = path.dirname(__filename);
|
|
37
38
|
const packageJsonPath = path.resolve(__dirname, '../../package.json');
|
|
38
39
|
const packageJson = JSON.parse(fs.readFileSync(packageJsonPath, 'utf-8'));
|
|
39
|
-
return packageJson.version || '1.0.
|
|
40
|
+
return packageJson.version || '1.0.21';
|
|
40
41
|
} catch (error) {
|
|
41
42
|
console.warn('⚠️ Could not read package.json version, using fallback');
|
|
42
|
-
return '1.0.
|
|
43
|
+
return '1.0.21';
|
|
43
44
|
}
|
|
44
45
|
}
|
|
45
46
|
|
|
@@ -579,6 +580,91 @@ class Config {
|
|
|
579
580
|
return process.env.ARELA_SERVER_ID || null;
|
|
580
581
|
}
|
|
581
582
|
|
|
583
|
+
/**
|
|
584
|
+
* Load Google Drive sync configuration
|
|
585
|
+
* @private
|
|
586
|
+
*/
|
|
587
|
+
#loadGDriveConfig() {
|
|
588
|
+
const rootFolderId = process.env.GDRIVE_ROOT_FOLDER_ID || null;
|
|
589
|
+
|
|
590
|
+
// Default mirror destination: <UPLOAD_BASE_PATH>/_gdrive_mirror
|
|
591
|
+
let localMirrorPath = process.env.GDRIVE_LOCAL_MIRROR_PATH || null;
|
|
592
|
+
if (!localMirrorPath && process.env.UPLOAD_BASE_PATH) {
|
|
593
|
+
const base = process.env.UPLOAD_BASE_PATH;
|
|
594
|
+
if (base !== '*') {
|
|
595
|
+
localMirrorPath = path.resolve(
|
|
596
|
+
PathNormalizer.toAbsolutePath(base),
|
|
597
|
+
'_gdrive_mirror',
|
|
598
|
+
);
|
|
599
|
+
}
|
|
600
|
+
} else if (localMirrorPath) {
|
|
601
|
+
localMirrorPath = PathNormalizer.toAbsolutePath(localMirrorPath);
|
|
602
|
+
}
|
|
603
|
+
|
|
604
|
+
return {
|
|
605
|
+
rootFolderId,
|
|
606
|
+
localMirrorPath,
|
|
607
|
+
serviceAccountFile: process.env.GDRIVE_SERVICE_ACCOUNT_FILE || null,
|
|
608
|
+
serviceAccountJson: process.env.GDRIVE_SERVICE_ACCOUNT_JSON || null,
|
|
609
|
+
skipNativeDocs: process.env.GDRIVE_SKIP_NATIVE_DOCS !== 'false',
|
|
610
|
+
followShortcuts: process.env.GDRIVE_FOLLOW_SHORTCUTS !== 'false',
|
|
611
|
+
concurrency: parseInt(process.env.GDRIVE_CONCURRENCY) || 5,
|
|
612
|
+
pageSize: parseInt(process.env.GDRIVE_PAGE_SIZE) || 1000,
|
|
613
|
+
maxFileSizeBytes:
|
|
614
|
+
parseInt(process.env.GDRIVE_MAX_FILE_SIZE_BYTES) ||
|
|
615
|
+
2 * 1024 * 1024 * 1024, // 2GB default
|
|
616
|
+
};
|
|
617
|
+
}
|
|
618
|
+
|
|
619
|
+
/**
|
|
620
|
+
* Get Google Drive configuration
|
|
621
|
+
* @returns {Object} GDrive sync settings
|
|
622
|
+
*/
|
|
623
|
+
getGDriveConfig() {
|
|
624
|
+
return this.gdrive;
|
|
625
|
+
}
|
|
626
|
+
|
|
627
|
+
/**
|
|
628
|
+
* Validate Google Drive configuration
|
|
629
|
+
* @throws {Error} If required gdrive configuration is missing
|
|
630
|
+
*/
|
|
631
|
+
validateGDriveConfig() {
|
|
632
|
+
const errors = [];
|
|
633
|
+
|
|
634
|
+
if (!this.gdrive.rootFolderId) {
|
|
635
|
+
errors.push(
|
|
636
|
+
'GDRIVE_ROOT_FOLDER_ID is required (Drive folder ID to sync)',
|
|
637
|
+
);
|
|
638
|
+
}
|
|
639
|
+
|
|
640
|
+
if (!this.gdrive.localMirrorPath) {
|
|
641
|
+
errors.push(
|
|
642
|
+
'Could not determine local mirror path. Set GDRIVE_LOCAL_MIRROR_PATH or UPLOAD_BASE_PATH',
|
|
643
|
+
);
|
|
644
|
+
}
|
|
645
|
+
|
|
646
|
+
if (!this.gdrive.serviceAccountFile && !this.gdrive.serviceAccountJson) {
|
|
647
|
+
errors.push(
|
|
648
|
+
'Either GDRIVE_SERVICE_ACCOUNT_FILE (path to JSON) or GDRIVE_SERVICE_ACCOUNT_JSON (inline JSON) is required',
|
|
649
|
+
);
|
|
650
|
+
}
|
|
651
|
+
|
|
652
|
+
if (this.gdrive.serviceAccountFile) {
|
|
653
|
+
const resolved = PathNormalizer.toAbsolutePath(
|
|
654
|
+
this.gdrive.serviceAccountFile,
|
|
655
|
+
);
|
|
656
|
+
if (!fs.existsSync(resolved)) {
|
|
657
|
+
errors.push(`GDRIVE_SERVICE_ACCOUNT_FILE not found: ${resolved}`);
|
|
658
|
+
}
|
|
659
|
+
}
|
|
660
|
+
|
|
661
|
+
if (errors.length > 0) {
|
|
662
|
+
throw new Error(
|
|
663
|
+
'⚠️ Google Drive configuration errors:\n - ' + errors.join('\n - '),
|
|
664
|
+
);
|
|
665
|
+
}
|
|
666
|
+
}
|
|
667
|
+
|
|
582
668
|
/**
|
|
583
669
|
* Check if worker mode is available (Redis configured)
|
|
584
670
|
* @returns {boolean}
|
|
@@ -2,6 +2,9 @@
|
|
|
2
2
|
import { dodaPdfDefinition } from './document-types/doda-pdf.js';
|
|
3
3
|
import { dodaXmlDefinition } from './document-types/doda-xml.js';
|
|
4
4
|
import { facturasComerciales } from './document-types/facturas-comerciales.js';
|
|
5
|
+
import { pedimentoCompletoDefinition } from './document-types/pedimento-completo.js';
|
|
6
|
+
// TODO: enable XML pedimento detection — implementation ready in pedimento-completo-xml.js
|
|
7
|
+
// import { pedimentoCompletoXmlDefinition } from './document-types/pedimento-completo-xml.js';
|
|
5
8
|
import { pedimentoSimplificadoDefinition } from './document-types/pedimento-simplificado.js';
|
|
6
9
|
import { proformaDefinition } from './document-types/proforma.js';
|
|
7
10
|
import { supportDocumentDefinition } from './document-types/support-document.js';
|
|
@@ -41,6 +44,12 @@ export class DocumentTypeDefinition {
|
|
|
41
44
|
// proformaDefinition is kept as reference but not used directly in the registry since resolution is handled post-extraction.
|
|
42
45
|
const documentTypes = [
|
|
43
46
|
pedimentoSimplificadoDefinition,
|
|
47
|
+
pedimentoCompletoDefinition,
|
|
48
|
+
// TODO: enable XML pedimento detection — uncomment the next line and the
|
|
49
|
+
// matching import at the top of this file. All downstream code
|
|
50
|
+
// (composeArelaPath, arela-api SQL filters, IdentifyCommand counters)
|
|
51
|
+
// already accepts `pedimento_completo_xml`.
|
|
52
|
+
// pedimentoCompletoXmlDefinition,
|
|
44
53
|
supportDocumentDefinition,
|
|
45
54
|
dodaPdfDefinition,
|
|
46
55
|
dodaXmlDefinition,
|
|
@@ -96,12 +105,13 @@ export function extractDocumentFields(source, fileExtension, filePath) {
|
|
|
96
105
|
|
|
97
106
|
console.log(` → Resolved type: ${resolvedType}`);
|
|
98
107
|
|
|
99
|
-
// Extract pedimento number and year
|
|
108
|
+
// Extract pedimento number and year. `filePath` is forwarded so XML
|
|
109
|
+
// matchers (which compose numPedimento from the filename) can use it.
|
|
100
110
|
const pedimento = docType.extractNumPedimento
|
|
101
|
-
? docType.extractNumPedimento(source, fields)
|
|
111
|
+
? docType.extractNumPedimento(source, fields, filePath)
|
|
102
112
|
: null;
|
|
103
113
|
const year = docType.extractPedimentoYear
|
|
104
|
-
? docType.extractPedimentoYear(source, fields)
|
|
114
|
+
? docType.extractPedimentoYear(source, fields, filePath)
|
|
105
115
|
: null;
|
|
106
116
|
|
|
107
117
|
return [resolvedType, fields, pedimento, year];
|
|
@@ -0,0 +1,226 @@
|
|
|
1
|
+
// Shared PDF extractors used by both `pedimento_simplificado` and
|
|
2
|
+
// `pedimento_completo` matchers. Keeping the regexes in a single module
|
|
3
|
+
// prevents drift between the two pedimento variants.
|
|
4
|
+
import { FieldResult } from '../document-type-shared.js';
|
|
5
|
+
|
|
6
|
+
// 1) Número de Pedimento (15 digits, possibly separated by spaces)
|
|
7
|
+
export const numPedimentoExtractor = {
|
|
8
|
+
field: 'numPedimento',
|
|
9
|
+
extract: (source) => {
|
|
10
|
+
const match = source.match(/\d{2}\s?\d{2}\s?\d{4}\s?\d{7}/);
|
|
11
|
+
return new FieldResult(
|
|
12
|
+
'numPedimento',
|
|
13
|
+
!!match,
|
|
14
|
+
match ? match[0].replace(/\s/g, '') : null,
|
|
15
|
+
);
|
|
16
|
+
},
|
|
17
|
+
};
|
|
18
|
+
|
|
19
|
+
// 2) Tipo de Operación: 3 chars after the pedimento number
|
|
20
|
+
export const tipoOperacionExtractor = {
|
|
21
|
+
field: 'tipoOperacion',
|
|
22
|
+
extract: (source) => {
|
|
23
|
+
const match = source.match(/\d{2}\s+\d{2}\s+\d{4}\s+\d{7}\s+([A-Z]{3})/);
|
|
24
|
+
return new FieldResult('tipoOperacion', !!match, match ? match[1] : null);
|
|
25
|
+
},
|
|
26
|
+
};
|
|
27
|
+
|
|
28
|
+
// 3) Clave de Pedimento: 2 chars after tipoOperacion
|
|
29
|
+
export const clavePedimentoExtractor = {
|
|
30
|
+
field: 'clavePedimento',
|
|
31
|
+
extract: (source) => {
|
|
32
|
+
const match = source.match(
|
|
33
|
+
/\d{2}\s+\d{2}\s+\d{4}\s+\d{7}\s+[A-Z]{3}\s+([A-Z0-9]{2})/,
|
|
34
|
+
);
|
|
35
|
+
return new FieldResult('clavePedimento', !!match, match ? match[1] : null);
|
|
36
|
+
},
|
|
37
|
+
};
|
|
38
|
+
|
|
39
|
+
// 4) Aduana E/S: 3-digit code on the peso-bruto line
|
|
40
|
+
export const aduanaEntradaSalidaExtractor = {
|
|
41
|
+
field: 'aduanaEntradaSalida',
|
|
42
|
+
extract: (source) => {
|
|
43
|
+
const match = source.match(/^\s*\d+\s+[\d,.]+\s+(\d{3})\s*$/m);
|
|
44
|
+
return new FieldResult(
|
|
45
|
+
'aduanaEntradaSalida',
|
|
46
|
+
!!match,
|
|
47
|
+
match ? match[1] : null,
|
|
48
|
+
);
|
|
49
|
+
},
|
|
50
|
+
};
|
|
51
|
+
|
|
52
|
+
// 5) RFC: 12–13 alphanumeric chars on its own line
|
|
53
|
+
export const rfcExtractor = {
|
|
54
|
+
field: 'rfc',
|
|
55
|
+
extract: (source) => {
|
|
56
|
+
const match = source.match(/\n\s*([A-Z0-9]{12,13})\s*\n/);
|
|
57
|
+
return new FieldResult('rfc', !!match, match ? match[1] : null);
|
|
58
|
+
},
|
|
59
|
+
};
|
|
60
|
+
|
|
61
|
+
// 6) Código de Aceptación: 8 alphanumeric chars on the line right after the RFC
|
|
62
|
+
export const codigoAceptacionExtractor = {
|
|
63
|
+
field: 'codigoAceptacion',
|
|
64
|
+
extract: (source) => {
|
|
65
|
+
const lines = source
|
|
66
|
+
.split(/\r?\n/)
|
|
67
|
+
.map((l) => l.trim())
|
|
68
|
+
.filter((l) => l.length > 0);
|
|
69
|
+
|
|
70
|
+
const rfcIndex = lines.findIndex((l) => /^[A-Z0-9]{12,13}$/.test(l));
|
|
71
|
+
let code = null;
|
|
72
|
+
if (rfcIndex >= 0 && /^[A-Z0-9]{8}$/.test(lines[rfcIndex + 1] || '')) {
|
|
73
|
+
code = lines[rfcIndex + 1];
|
|
74
|
+
}
|
|
75
|
+
return new FieldResult('codigoAceptacion', code !== null, code);
|
|
76
|
+
},
|
|
77
|
+
};
|
|
78
|
+
|
|
79
|
+
// 7) Num. E-Document: collects all 13-char alphanumeric codes following
|
|
80
|
+
// `NUM. E-DOCUMENT` labels. CoveFact / Pedimento Completo emit one row
|
|
81
|
+
// per ED clave inside the CLAVE/COMPL. IDENTIFICADOR table.
|
|
82
|
+
export const numEDocumentoExtractor = {
|
|
83
|
+
field: 'numEDocumento',
|
|
84
|
+
extract: (source) => {
|
|
85
|
+
const lines = source.split(/\r?\n/);
|
|
86
|
+
const edocLines = lines.filter((line) => /NUM\.?\s*E-DOCUMENT/i.test(line));
|
|
87
|
+
|
|
88
|
+
if (edocLines.length === 0) {
|
|
89
|
+
return new FieldResult('numEDocumento', false, null);
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
const extractedCodes = [];
|
|
93
|
+
edocLines.forEach((line) => {
|
|
94
|
+
const afterEdoc = line.replace(/.*NUM\.?\s*E-DOCUMENT\s*/i, '');
|
|
95
|
+
const codes = afterEdoc.match(/[A-Z0-9]{13}/g) || [];
|
|
96
|
+
extractedCodes.push(...codes);
|
|
97
|
+
});
|
|
98
|
+
|
|
99
|
+
if (extractedCodes.length === 0) {
|
|
100
|
+
return new FieldResult('numEDocumento', false, null);
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
const uniqueCodes = [...new Set(extractedCodes)];
|
|
104
|
+
return new FieldResult('numEDocumento', true, `[${uniqueCodes.join(',')}]`);
|
|
105
|
+
},
|
|
106
|
+
};
|
|
107
|
+
|
|
108
|
+
// 8) Payment date — multiple known label variants
|
|
109
|
+
export const paymentDateExtractor = {
|
|
110
|
+
field: 'paymentDate',
|
|
111
|
+
extract: (source) => {
|
|
112
|
+
let match = source.match(/2\s+PAGO:\s*(\d{2}\/\d{2}\/\d{4})/);
|
|
113
|
+
if (!match) {
|
|
114
|
+
match = source.match(/FECHA DE PAGO:\s*(\d{4}\/\d{2}\/\d{2})/);
|
|
115
|
+
}
|
|
116
|
+
if (!match) {
|
|
117
|
+
match = source.match(/PRESENTACION:\s*(\d{2}\/\d{2}\/\d{4})/);
|
|
118
|
+
}
|
|
119
|
+
return new FieldResult('paymentDate', !!match, match ? match[1] : null);
|
|
120
|
+
},
|
|
121
|
+
};
|
|
122
|
+
|
|
123
|
+
// 9) COVE — collect all `COVE<alphanum>` tokens from lines containing
|
|
124
|
+
// `COVE` or `NUMERO DE ACUSE DE VALOR`. CoveFact variant emits
|
|
125
|
+
// `COVE257W76NF2 / ID250230` → only the leading COVE token is kept
|
|
126
|
+
// because the COVE regex stops at the space before `/`.
|
|
127
|
+
export const coveExtractor = {
|
|
128
|
+
field: 'cove',
|
|
129
|
+
extract: (source) => {
|
|
130
|
+
const lines = source.split(/\r?\n/);
|
|
131
|
+
const coveLines = lines.filter(
|
|
132
|
+
(line) => /COVE/i.test(line) || /NUMERO DE ACUSE DE VALOR/i.test(line),
|
|
133
|
+
);
|
|
134
|
+
|
|
135
|
+
if (coveLines.length === 0) {
|
|
136
|
+
return new FieldResult('cove', false, null);
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
const coveValues = [];
|
|
140
|
+
coveLines.forEach((line) => {
|
|
141
|
+
const coveMatches = line.match(/COVE[A-Z0-9]+/gi) || [];
|
|
142
|
+
coveValues.push(...coveMatches.map((m) => m.toUpperCase()));
|
|
143
|
+
});
|
|
144
|
+
|
|
145
|
+
if (coveValues.length === 0) {
|
|
146
|
+
return new FieldResult('cove', false, null);
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
const unique = [...new Set(coveValues)];
|
|
150
|
+
return new FieldResult('cove', true, `[${unique.join(',')}]`);
|
|
151
|
+
},
|
|
152
|
+
};
|
|
153
|
+
|
|
154
|
+
// 10) Patente: from the PATENTE/PEDIMENTO/ADUANA header table
|
|
155
|
+
export const patenteExtractor = {
|
|
156
|
+
field: 'patente',
|
|
157
|
+
extract: (source) => {
|
|
158
|
+
const lines = source.split(/\r?\n/);
|
|
159
|
+
const patenteHeaderIndex = lines.findIndex((line) =>
|
|
160
|
+
/PATENTE:.*PEDIMENTO:.*ADUANA:/i.test(line),
|
|
161
|
+
);
|
|
162
|
+
|
|
163
|
+
if (patenteHeaderIndex >= 0) {
|
|
164
|
+
for (let i = patenteHeaderIndex + 1; i < lines.length; i++) {
|
|
165
|
+
const line = lines[i].trim();
|
|
166
|
+
if (/^\d+\s+\d+\s+\d+$/.test(line)) {
|
|
167
|
+
const parts = line.split(/\s+/);
|
|
168
|
+
return new FieldResult('patente', true, parts[0]);
|
|
169
|
+
}
|
|
170
|
+
}
|
|
171
|
+
}
|
|
172
|
+
return new FieldResult('patente', false, null);
|
|
173
|
+
},
|
|
174
|
+
};
|
|
175
|
+
|
|
176
|
+
// 11) Fecha de Pago Rectificación — used when clavePedimento is a rectification
|
|
177
|
+
export const fechaPagoRectificacionExtractor = {
|
|
178
|
+
field: 'fechaPagoRectificacion',
|
|
179
|
+
extract: (source) => {
|
|
180
|
+
const rectSectionMatch = source.match(
|
|
181
|
+
/RECTIFICACION[\s\S]{0,500}?(\d{2}\/\d{2}\/\d{4})/i,
|
|
182
|
+
);
|
|
183
|
+
if (rectSectionMatch) {
|
|
184
|
+
return new FieldResult(
|
|
185
|
+
'fechaPagoRectificacion',
|
|
186
|
+
true,
|
|
187
|
+
rectSectionMatch[1],
|
|
188
|
+
);
|
|
189
|
+
}
|
|
190
|
+
const fechaMatch = source.match(
|
|
191
|
+
/FECHA PAGO RECT[\s\S]{0,500}?(\d{2}\/\d{2}\/\d{4})/i,
|
|
192
|
+
);
|
|
193
|
+
if (fechaMatch) {
|
|
194
|
+
return new FieldResult('fechaPagoRectificacion', true, fechaMatch[1]);
|
|
195
|
+
}
|
|
196
|
+
return new FieldResult('fechaPagoRectificacion', false, null);
|
|
197
|
+
},
|
|
198
|
+
};
|
|
199
|
+
|
|
200
|
+
/**
|
|
201
|
+
* Canonical extractor list for both pedimento_simplificado and pedimento_completo PDFs.
|
|
202
|
+
* Order matters only for downstream tooling that inspects the result array.
|
|
203
|
+
*/
|
|
204
|
+
export const sharedPedimentoExtractors = [
|
|
205
|
+
numPedimentoExtractor,
|
|
206
|
+
tipoOperacionExtractor,
|
|
207
|
+
clavePedimentoExtractor,
|
|
208
|
+
aduanaEntradaSalidaExtractor,
|
|
209
|
+
rfcExtractor,
|
|
210
|
+
codigoAceptacionExtractor,
|
|
211
|
+
numEDocumentoExtractor,
|
|
212
|
+
paymentDateExtractor,
|
|
213
|
+
coveExtractor,
|
|
214
|
+
patenteExtractor,
|
|
215
|
+
fechaPagoRectificacionExtractor,
|
|
216
|
+
];
|
|
217
|
+
|
|
218
|
+
/**
|
|
219
|
+
* Shared helper used by both PDF matchers' `extractPedimentoYear`.
|
|
220
|
+
*/
|
|
221
|
+
export function pedimentoYearFromFields(fields) {
|
|
222
|
+
const numPedimento = fields?.find((f) => f.name === 'numPedimento')?.value;
|
|
223
|
+
if (!numPedimento) return null;
|
|
224
|
+
const year = parseInt(numPedimento.substring(0, 2), 10);
|
|
225
|
+
return year < 50 ? year + 2000 : year + 1900;
|
|
226
|
+
}
|
|
@@ -0,0 +1,322 @@
|
|
|
1
|
+
// VUCEM "consultarPedimentoCompleto" XML matcher.
|
|
2
|
+
//
|
|
3
|
+
// STATUS: implemented but NOT registered in `document-type-shared.js`. To
|
|
4
|
+
// activate, uncomment the import + registration in that file. All downstream
|
|
5
|
+
// code (composeArelaPath, arela-api propagation SQL, IdentifyCommand
|
|
6
|
+
// counters) already includes `pedimento_completo_xml`, so re-enabling is a
|
|
7
|
+
// single-line change.
|
|
8
|
+
//
|
|
9
|
+
// Filename patterns recognized (try in order — patente extraction):
|
|
10
|
+
// 1) VU_PATENTE_ADUANA_PEDIMENTO.xml → e.g. VU_3429_070_5016101.xml
|
|
11
|
+
// 2) ADUANA-PATENTE-PEDIMENTO.xml → e.g. 670-3806-2002487.xml
|
|
12
|
+
// 3) {15-digit}[_{15-digit}].xml → e.g. 260734296016642_260734296016642.xml
|
|
13
|
+
// If none match, arela_path is left null and propagation fills it from a
|
|
14
|
+
// sibling PDF in the same directory.
|
|
15
|
+
//
|
|
16
|
+
// numPedimento is composed from XML body + filename because the body never
|
|
17
|
+
// carries the 15-digit form: YY|AA|PPPP|NNNNNNN.
|
|
18
|
+
import { FieldResult } from '../document-type-shared.js';
|
|
19
|
+
|
|
20
|
+
// --------------------------- helpers ---------------------------------------
|
|
21
|
+
|
|
22
|
+
function firstTag(source, tag) {
|
|
23
|
+
// Match <ns2:tag>value</ns2:tag> or unprefixed <tag>value</tag>
|
|
24
|
+
const re = new RegExp(
|
|
25
|
+
`<(?:[a-z0-9]+:)?${tag}>([^<]*)</(?:[a-z0-9]+:)?${tag}>`,
|
|
26
|
+
'i',
|
|
27
|
+
);
|
|
28
|
+
const m = source.match(re);
|
|
29
|
+
return m ? m[1].trim() : null;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
function allTagBlocks(source, tag) {
|
|
33
|
+
const re = new RegExp(
|
|
34
|
+
`<(?:[a-z0-9]+:)?${tag}>([\\s\\S]*?)</(?:[a-z0-9]+:)?${tag}>`,
|
|
35
|
+
'gi',
|
|
36
|
+
);
|
|
37
|
+
const out = [];
|
|
38
|
+
let m;
|
|
39
|
+
while ((m = re.exec(source)) !== null) {
|
|
40
|
+
out.push(m[1]);
|
|
41
|
+
}
|
|
42
|
+
return out;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
function pad(value, length) {
|
|
46
|
+
if (value == null) return null;
|
|
47
|
+
return String(value).padStart(length, '0');
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
/**
|
|
51
|
+
* Try the three known filename patterns and return {patente, aduana, pedimento}
|
|
52
|
+
* with any subset of the fields populated. Returns null if no pattern matches.
|
|
53
|
+
*/
|
|
54
|
+
function parseFilenameParts(filePath) {
|
|
55
|
+
if (!filePath) return null;
|
|
56
|
+
const fileName = filePath.split(/[\\/]/).pop();
|
|
57
|
+
if (!fileName) return null;
|
|
58
|
+
|
|
59
|
+
let m;
|
|
60
|
+
|
|
61
|
+
// 1) VU_PATENTE_ADUANA_PEDIMENTO.xml
|
|
62
|
+
m = fileName.match(/^VU_(\d{4})_(\d{3})_(\d{7})\.xml$/i);
|
|
63
|
+
if (m) {
|
|
64
|
+
return {
|
|
65
|
+
patente: m[1],
|
|
66
|
+
aduana: m[2].substring(0, 2),
|
|
67
|
+
pedimento: m[3],
|
|
68
|
+
year: null,
|
|
69
|
+
};
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
// 2) ADUANA-PATENTE-PEDIMENTO.xml
|
|
73
|
+
m = fileName.match(/^(\d{3})-(\d{4})-(\d{7})\.xml$/i);
|
|
74
|
+
if (m) {
|
|
75
|
+
return {
|
|
76
|
+
patente: m[2],
|
|
77
|
+
aduana: m[1].substring(0, 2),
|
|
78
|
+
pedimento: m[3],
|
|
79
|
+
year: null,
|
|
80
|
+
};
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
// 3) 15-digit form YY|AA|PPPP|NNNNNNN
|
|
84
|
+
m = fileName.match(/^(\d{15})(?:_\d{15})?\.xml$/i);
|
|
85
|
+
if (m) {
|
|
86
|
+
const fifteen = m[1];
|
|
87
|
+
return {
|
|
88
|
+
year: fifteen.substring(0, 2),
|
|
89
|
+
aduana: fifteen.substring(2, 4),
|
|
90
|
+
patente: fifteen.substring(4, 8),
|
|
91
|
+
pedimento: fifteen.substring(8, 15),
|
|
92
|
+
};
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
return null;
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
// Capture the YY year from an ISO date string like "2026-05-08-06:00".
|
|
99
|
+
function yyFromIsoDate(iso) {
|
|
100
|
+
if (!iso) return null;
|
|
101
|
+
const m = iso.match(/^(\d{4})-/);
|
|
102
|
+
return m ? m[1].substring(2, 4) : null;
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
// Find <ns2:fechas> block with nested clave==2 and return its <ns2:fecha>.
|
|
106
|
+
function findPaymentDate(source) {
|
|
107
|
+
const fechasBlocks = allTagBlocks(source, 'fechas');
|
|
108
|
+
for (const block of fechasBlocks) {
|
|
109
|
+
const clave = firstTag(block, 'clave');
|
|
110
|
+
if (clave === '2') {
|
|
111
|
+
const fecha = firstTag(block, 'fecha');
|
|
112
|
+
if (fecha) return fecha;
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
return null;
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
// --------------------------- extractors ------------------------------------
|
|
119
|
+
|
|
120
|
+
const rfcExtractor = {
|
|
121
|
+
field: 'rfc',
|
|
122
|
+
extract: (source) => {
|
|
123
|
+
const value = firstTag(source, 'rfc');
|
|
124
|
+
return new FieldResult('rfc', !!value, value);
|
|
125
|
+
},
|
|
126
|
+
};
|
|
127
|
+
|
|
128
|
+
const clavePedimentoExtractor = {
|
|
129
|
+
field: 'clavePedimento',
|
|
130
|
+
extract: (source) => {
|
|
131
|
+
// <ns2:claveDocumento><ns2:clave>R1</ns2:clave></ns2:claveDocumento>
|
|
132
|
+
const blocks = allTagBlocks(source, 'claveDocumento');
|
|
133
|
+
const clave = blocks.length > 0 ? firstTag(blocks[0], 'clave') : null;
|
|
134
|
+
return new FieldResult('clavePedimento', !!clave, clave);
|
|
135
|
+
},
|
|
136
|
+
};
|
|
137
|
+
|
|
138
|
+
const tipoOperacionExtractor = {
|
|
139
|
+
field: 'tipoOperacion',
|
|
140
|
+
extract: (source) => {
|
|
141
|
+
const blocks = allTagBlocks(source, 'tipoOperacion');
|
|
142
|
+
const desc = blocks.length > 0 ? firstTag(blocks[0], 'descripcion') : null;
|
|
143
|
+
return new FieldResult('tipoOperacion', !!desc, desc);
|
|
144
|
+
},
|
|
145
|
+
};
|
|
146
|
+
|
|
147
|
+
const aduanaEntradaSalidaExtractor = {
|
|
148
|
+
field: 'aduanaEntradaSalida',
|
|
149
|
+
extract: (source) => {
|
|
150
|
+
const blocks = allTagBlocks(source, 'aduanaEntradaSalida');
|
|
151
|
+
const clave = blocks.length > 0 ? firstTag(blocks[0], 'clave') : null;
|
|
152
|
+
return new FieldResult(
|
|
153
|
+
'aduanaEntradaSalida',
|
|
154
|
+
!!clave,
|
|
155
|
+
clave ? pad(clave, 2) : null,
|
|
156
|
+
);
|
|
157
|
+
},
|
|
158
|
+
};
|
|
159
|
+
|
|
160
|
+
const paymentDateExtractor = {
|
|
161
|
+
field: 'paymentDate',
|
|
162
|
+
extract: (source) => {
|
|
163
|
+
const fecha = findPaymentDate(source);
|
|
164
|
+
return new FieldResult('paymentDate', !!fecha, fecha);
|
|
165
|
+
},
|
|
166
|
+
};
|
|
167
|
+
|
|
168
|
+
const fechaPagoRectificacionExtractor = {
|
|
169
|
+
field: 'fechaPagoRectificacion',
|
|
170
|
+
extract: (source) => {
|
|
171
|
+
const rectBlocks = allTagBlocks(source, 'rectificacion');
|
|
172
|
+
if (rectBlocks.length === 0) {
|
|
173
|
+
return new FieldResult('fechaPagoRectificacion', false, null);
|
|
174
|
+
}
|
|
175
|
+
const fechaPago = firstTag(rectBlocks[0], 'fechaPago');
|
|
176
|
+
return new FieldResult('fechaPagoRectificacion', !!fechaPago, fechaPago);
|
|
177
|
+
},
|
|
178
|
+
};
|
|
179
|
+
|
|
180
|
+
const coveExtractor = {
|
|
181
|
+
field: 'cove',
|
|
182
|
+
extract: (source) => {
|
|
183
|
+
// Collect <ns2:numero> values that live inside <ns2:facturas> blocks.
|
|
184
|
+
const facturas = allTagBlocks(source, 'facturas');
|
|
185
|
+
const numeros = [];
|
|
186
|
+
facturas.forEach((block) => {
|
|
187
|
+
const numero = firstTag(block, 'numero');
|
|
188
|
+
if (numero) numeros.push(numero);
|
|
189
|
+
});
|
|
190
|
+
if (numeros.length === 0) {
|
|
191
|
+
return new FieldResult('cove', false, null);
|
|
192
|
+
}
|
|
193
|
+
const unique = [...new Set(numeros)];
|
|
194
|
+
return new FieldResult('cove', true, `[${unique.join(',')}]`);
|
|
195
|
+
},
|
|
196
|
+
};
|
|
197
|
+
|
|
198
|
+
const numEDocumentoExtractor = {
|
|
199
|
+
field: 'numEDocumento',
|
|
200
|
+
extract: (source) => {
|
|
201
|
+
// For each <ns2:identificadores> block, check the nested
|
|
202
|
+
// <ns2:claveIdentificador><ns2:clave> value. If it equals 'ED',
|
|
203
|
+
// collect the sibling <ns2:complemento1> value.
|
|
204
|
+
const blocks = allTagBlocks(source, 'identificadores');
|
|
205
|
+
const codes = [];
|
|
206
|
+
blocks.forEach((block) => {
|
|
207
|
+
const claveBlocks = allTagBlocks(block, 'claveIdentificador');
|
|
208
|
+
const clave =
|
|
209
|
+
claveBlocks.length > 0 ? firstTag(claveBlocks[0], 'clave') : null;
|
|
210
|
+
if (clave === 'ED') {
|
|
211
|
+
const complemento = firstTag(block, 'complemento1');
|
|
212
|
+
if (complemento) codes.push(complemento);
|
|
213
|
+
}
|
|
214
|
+
});
|
|
215
|
+
if (codes.length === 0) {
|
|
216
|
+
return new FieldResult('numEDocumento', false, null);
|
|
217
|
+
}
|
|
218
|
+
const unique = [...new Set(codes)];
|
|
219
|
+
return new FieldResult('numEDocumento', true, `[${unique.join(',')}]`);
|
|
220
|
+
},
|
|
221
|
+
};
|
|
222
|
+
|
|
223
|
+
// Composed numPedimento — needs the filename + already-extracted fields.
|
|
224
|
+
// We expose it as the LAST extractor so `aduanaEntradaSalida` is available
|
|
225
|
+
// via the `fields` array if the runner exposes it. To stay compatible with
|
|
226
|
+
// the existing extractor signature (which only receives `source`), we
|
|
227
|
+
// re-derive aduanaEntradaSalida inside this extractor and read the filename
|
|
228
|
+
// from a closure populated by `match()` via `extractNumPedimento` below.
|
|
229
|
+
// (See `extractNumPedimento` — that is the canonical place numPedimento is
|
|
230
|
+
// composed for XML.)
|
|
231
|
+
|
|
232
|
+
// --------------------------- definition ------------------------------------
|
|
233
|
+
|
|
234
|
+
export const pedimentoCompletoXmlDefinition = {
|
|
235
|
+
type: 'pedimento_completo_xml',
|
|
236
|
+
extensions: ['xml'],
|
|
237
|
+
|
|
238
|
+
match: (source) => {
|
|
239
|
+
return /consultarPedimentoCompletoRespuesta/i.test(source);
|
|
240
|
+
},
|
|
241
|
+
|
|
242
|
+
/**
|
|
243
|
+
* Resolve to `pedimento_completo_xml` only if payment evidence exists:
|
|
244
|
+
* - When a <rectificacion> block is present, require fechaPagoRectificacion.
|
|
245
|
+
* - Otherwise require paymentDate.
|
|
246
|
+
*/
|
|
247
|
+
resolveType: (fields) => {
|
|
248
|
+
const hasRect = !!fields?.find(
|
|
249
|
+
(f) => f.name === 'fechaPagoRectificacion' && f.found,
|
|
250
|
+
);
|
|
251
|
+
if (hasRect) return 'pedimento_completo_xml';
|
|
252
|
+
|
|
253
|
+
const paymentDate =
|
|
254
|
+
fields?.find((f) => f.name === 'paymentDate' && f.found)?.value ?? null;
|
|
255
|
+
return paymentDate ? 'pedimento_completo_xml' : 'proforma_completo_xml';
|
|
256
|
+
},
|
|
257
|
+
|
|
258
|
+
/**
|
|
259
|
+
* Compose the 15-digit pedimento number from XML body + filename.
|
|
260
|
+
* YY: from rectification fechaPago if present, else from the clave==2
|
|
261
|
+
* payment-date fecha; falls back to filename pattern 3.
|
|
262
|
+
* AA: from <aduanaEntradaSalida><clave> padded to 2.
|
|
263
|
+
* PPPP: from the filename (any of the three patterns).
|
|
264
|
+
* NNNNNNN: from <pedimento> padded to 7.
|
|
265
|
+
* Returns null if any component cannot be resolved.
|
|
266
|
+
*/
|
|
267
|
+
extractNumPedimento: (source, fields, filePath) => {
|
|
268
|
+
const parts = parseFilenameParts(filePath);
|
|
269
|
+
|
|
270
|
+
const rect = fields?.find(
|
|
271
|
+
(f) => f.name === 'fechaPagoRectificacion' && f.found,
|
|
272
|
+
)?.value;
|
|
273
|
+
const pay = fields?.find((f) => f.name === 'paymentDate' && f.found)?.value;
|
|
274
|
+
|
|
275
|
+
let yy =
|
|
276
|
+
yyFromIsoDate(rect) ||
|
|
277
|
+
yyFromIsoDate(pay) ||
|
|
278
|
+
(parts && parts.year) ||
|
|
279
|
+
null;
|
|
280
|
+
|
|
281
|
+
const aduanaField = fields?.find(
|
|
282
|
+
(f) => f.name === 'aduanaEntradaSalida',
|
|
283
|
+
)?.value;
|
|
284
|
+
let aduana = aduanaField || (parts && parts.aduana) || null;
|
|
285
|
+
|
|
286
|
+
let patente = parts && parts.patente ? parts.patente : null;
|
|
287
|
+
|
|
288
|
+
const pedimentoBody = firstTag(source, 'pedimento');
|
|
289
|
+
let pedimento = pedimentoBody
|
|
290
|
+
? pad(pedimentoBody, 7)
|
|
291
|
+
: parts && parts.pedimento
|
|
292
|
+
? parts.pedimento
|
|
293
|
+
: null;
|
|
294
|
+
|
|
295
|
+
if (!yy || !aduana || !patente || !pedimento) return null;
|
|
296
|
+
|
|
297
|
+
return `${pad(yy, 2)}${pad(aduana, 2)}${pad(patente, 4)}${pedimento}`;
|
|
298
|
+
},
|
|
299
|
+
|
|
300
|
+
extractPedimentoYear: (source, fields, filePath) => {
|
|
301
|
+
// Reuse extractNumPedimento; the year is its leading 2 digits.
|
|
302
|
+
const num = pedimentoCompletoXmlDefinition.extractNumPedimento(
|
|
303
|
+
source,
|
|
304
|
+
fields,
|
|
305
|
+
filePath,
|
|
306
|
+
);
|
|
307
|
+
if (!num) return null;
|
|
308
|
+
const year = parseInt(num.substring(0, 2), 10);
|
|
309
|
+
return year < 50 ? year + 2000 : year + 1900;
|
|
310
|
+
},
|
|
311
|
+
|
|
312
|
+
extractors: [
|
|
313
|
+
rfcExtractor,
|
|
314
|
+
clavePedimentoExtractor,
|
|
315
|
+
tipoOperacionExtractor,
|
|
316
|
+
aduanaEntradaSalidaExtractor,
|
|
317
|
+
paymentDateExtractor,
|
|
318
|
+
fechaPagoRectificacionExtractor,
|
|
319
|
+
coveExtractor,
|
|
320
|
+
numEDocumentoExtractor,
|
|
321
|
+
],
|
|
322
|
+
};
|