@arela/uploader 1.0.20 → 1.0.22
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +2 -1
- package/src/commands/DatastageCommand.js +164 -0
- package/src/commands/GDriveSyncCommand.js +475 -0
- package/src/commands/IdentifyCommand.js +179 -35
- package/src/commands/PollWorkerCommand.js +2 -0
- package/src/commands/ScanCommand.js +6 -3
- package/src/config/config.js +88 -2
- package/src/document-type-shared.js +13 -3
- package/src/document-types/_pedimento-shared-extractors.js +322 -0
- package/src/document-types/pedimento-completo-xml.js +322 -0
- package/src/document-types/pedimento-completo.js +99 -0
- package/src/document-types/pedimento-simplificado.js +37 -287
- package/src/file-detection.js +36 -2
- package/src/index.js +69 -0
- package/src/services/DatabaseService.js +3 -1
- package/src/services/DatastageApiService.js +240 -0
- package/src/services/GoogleDriveService.js +217 -0
- package/src/services/ScanApiService.js +30 -0
|
@@ -0,0 +1,322 @@
|
|
|
1
|
+
// Shared PDF extractors used by both `pedimento_simplificado` and
|
|
2
|
+
// `pedimento_completo` matchers. Keeping the regexes in a single module
|
|
3
|
+
// prevents drift between the two pedimento variants.
|
|
4
|
+
import { FieldResult } from '../document-type-shared.js';
|
|
5
|
+
|
|
6
|
+
// 1) Número de Pedimento (15 digits, possibly separated by spaces)
|
|
7
|
+
export const numPedimentoExtractor = {
|
|
8
|
+
field: 'numPedimento',
|
|
9
|
+
extract: (source) => {
|
|
10
|
+
const match = source.match(/\d{2}\s?\d{2}\s?\d{4}\s?\d{7}/);
|
|
11
|
+
return new FieldResult(
|
|
12
|
+
'numPedimento',
|
|
13
|
+
!!match,
|
|
14
|
+
match ? match[0].replace(/\s/g, '') : null,
|
|
15
|
+
);
|
|
16
|
+
},
|
|
17
|
+
};
|
|
18
|
+
|
|
19
|
+
// 2) Tipo de Operación: 3 chars after the pedimento number
|
|
20
|
+
export const tipoOperacionExtractor = {
|
|
21
|
+
field: 'tipoOperacion',
|
|
22
|
+
extract: (source) => {
|
|
23
|
+
const match = source.match(/\d{2}\s+\d{2}\s+\d{4}\s+\d{7}\s+([A-Z]{3})/);
|
|
24
|
+
return new FieldResult('tipoOperacion', !!match, match ? match[1] : null);
|
|
25
|
+
},
|
|
26
|
+
};
|
|
27
|
+
|
|
28
|
+
// 3) Clave de Pedimento: 2 chars after tipoOperacion (multiple layout patterns)
|
|
29
|
+
export const clavePedimentoExtractor = {
|
|
30
|
+
field: 'clavePedimento',
|
|
31
|
+
extract: (source) => {
|
|
32
|
+
const patterns = [
|
|
33
|
+
// Standard spaced layout: "22 07 3429 2002089 EXP RT"
|
|
34
|
+
/\d{2}\s+\d{2}\s+\d{4}\s+\d{7}\s+[A-Z]{3}\s+([A-Z0-9]{2})\b/,
|
|
35
|
+
// Concatenated 15-digit layout: "260734296013645 EXP RT"
|
|
36
|
+
/\d{15}\s+[A-Z]{3}\s+([A-Z0-9]{2})\b/,
|
|
37
|
+
// Fallback: T.OPER keyword followed by 2-char clave
|
|
38
|
+
/\b(?:EXP|IMP|TRA|TRN)\s+([A-Z][A-Z0-9])\b/,
|
|
39
|
+
// Explicit label
|
|
40
|
+
/CVE\.?\s*PED(?:IMENTO)?[^A-Z0-9]{0,60}?\b([A-Z][A-Z0-9])\b/i,
|
|
41
|
+
];
|
|
42
|
+
for (const re of patterns) {
|
|
43
|
+
const m = source.match(re);
|
|
44
|
+
if (m) return new FieldResult('clavePedimento', true, m[1]);
|
|
45
|
+
}
|
|
46
|
+
return new FieldResult('clavePedimento', false, null);
|
|
47
|
+
},
|
|
48
|
+
};
|
|
49
|
+
|
|
50
|
+
// 4) Aduana E/S: 3-digit code on the peso-bruto line
|
|
51
|
+
// Fallback A: allow 2-digit code (some SIMP layouts omit the leading zero).
|
|
52
|
+
// Fallback B: derive the 2-digit customs-office code from positions 2-3 of
|
|
53
|
+
// numPedimento (e.g. "260734296013645" → "07"), which is what the
|
|
54
|
+
// arela_path formula uses after padStart(2,'0').
|
|
55
|
+
export const aduanaEntradaSalidaExtractor = {
|
|
56
|
+
field: 'aduanaEntradaSalida',
|
|
57
|
+
extract: (source) => {
|
|
58
|
+
// Primary: 3-digit aduana code at end of peso-bruto line
|
|
59
|
+
const m3 = source.match(/^\s*\d+\s+[\d,.]+\s+(\d{3})\s*$/m);
|
|
60
|
+
if (m3) return new FieldResult('aduanaEntradaSalida', true, m3[1]);
|
|
61
|
+
|
|
62
|
+
// Fallback A: 2-digit aduana code at end of peso-bruto line
|
|
63
|
+
const m2 = source.match(/^\s*\d+\s+[\d,.]+\s+(\d{2})\s*$/m);
|
|
64
|
+
if (m2) return new FieldResult('aduanaEntradaSalida', true, m2[1]);
|
|
65
|
+
|
|
66
|
+
// Fallback B: derive 2-digit customs-office code from numPedimento
|
|
67
|
+
// Format: AA BB CCCC DDDDDDD → BB (positions 2-3) = aduana
|
|
68
|
+
const pedMatch = source.match(/\d{2}\s?\d{2}\s?\d{4}\s?\d{7}/);
|
|
69
|
+
if (pedMatch) {
|
|
70
|
+
const num = pedMatch[0].replace(/\s/g, '');
|
|
71
|
+
if (num.length === 15) {
|
|
72
|
+
return new FieldResult(
|
|
73
|
+
'aduanaEntradaSalida',
|
|
74
|
+
true,
|
|
75
|
+
num.substring(2, 4),
|
|
76
|
+
);
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
return new FieldResult('aduanaEntradaSalida', false, null);
|
|
81
|
+
},
|
|
82
|
+
};
|
|
83
|
+
|
|
84
|
+
// 5) RFC: importer/exporter RFC on its own line.
|
|
85
|
+
// Strategy A: strict whole-line pattern (3-4 letters + 6 consecutive digits +
|
|
86
|
+
// 3 alphanum). COVE codes like COVE2681B1RX8 naturally fail this because
|
|
87
|
+
// their digit section is non-consecutive (2681B1 has a letter at pos 5).
|
|
88
|
+
// Strategy B: RFC as a word within a longer line (handles "RFC: IMS030409FZ0").
|
|
89
|
+
// Strategy C: loose 12-13 alphanum isolated on its own line — iterate ALL
|
|
90
|
+
// matches via matchAll() so that a leading COVE code is skipped and the
|
|
91
|
+
// actual RFC (which appears later in the document) is still found.
|
|
92
|
+
export const rfcExtractor = {
|
|
93
|
+
field: 'rfc',
|
|
94
|
+
extract: (source) => {
|
|
95
|
+
const RFC_STRICT = /^[A-Z]{3,4}\d{6}[A-Z0-9]{3}$/i;
|
|
96
|
+
const lines = source
|
|
97
|
+
.split(/\r?\n/)
|
|
98
|
+
.map((l) => l.trim())
|
|
99
|
+
.filter((l) => l);
|
|
100
|
+
|
|
101
|
+
// Primary: RFC occupies an entire trimmed line
|
|
102
|
+
const strictLine = lines.find((line) => RFC_STRICT.test(line));
|
|
103
|
+
if (strictLine) return new FieldResult('rfc', true, strictLine);
|
|
104
|
+
|
|
105
|
+
// Fallback A: RFC embedded in a longer line (word-boundary search)
|
|
106
|
+
for (const line of lines) {
|
|
107
|
+
const m = line.match(/\b([A-Z]{3,4}\d{6}[A-Z0-9]{3})\b/i);
|
|
108
|
+
if (m) return new FieldResult('rfc', true, m[1]);
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
// Fallback B: loose 12-13 alphanum isolated on its own line.
|
|
112
|
+
// Use matchAll() to iterate ALL occurrences — a leading COVE code must not
|
|
113
|
+
// short-circuit the search; the RFC typically follows it in the document.
|
|
114
|
+
for (const m of source.matchAll(/\n\s*([A-Z0-9]{12,13})\s*\n/g)) {
|
|
115
|
+
if (!/^COVE/i.test(m[1])) return new FieldResult('rfc', true, m[1]);
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
return new FieldResult('rfc', false, null);
|
|
119
|
+
},
|
|
120
|
+
};
|
|
121
|
+
|
|
122
|
+
// 6) Código de Aceptación: 8 alphanumeric chars on the line right after the RFC.
|
|
123
|
+
// Uses the same RFC-line detection logic as rfcExtractor.
|
|
124
|
+
export const codigoAceptacionExtractor = {
|
|
125
|
+
field: 'codigoAceptacion',
|
|
126
|
+
extract: (source) => {
|
|
127
|
+
const RFC_STRICT = /^[A-Z]{3,4}\d{6}[A-Z0-9]{3}$/i;
|
|
128
|
+
const RFC_LOOSE = /^[A-Z0-9]{12,13}$/;
|
|
129
|
+
const lines = source
|
|
130
|
+
.split(/\r?\n/)
|
|
131
|
+
.map((l) => l.trim())
|
|
132
|
+
.filter((l) => l.length > 0);
|
|
133
|
+
|
|
134
|
+
// Find RFC line using strict pattern first, then loose (excluding COVE)
|
|
135
|
+
let rfcIndex = lines.findIndex((l) => RFC_STRICT.test(l));
|
|
136
|
+
if (rfcIndex < 0) {
|
|
137
|
+
rfcIndex = lines.findIndex((l) => RFC_LOOSE.test(l) && !/^COVE/i.test(l));
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
let code = null;
|
|
141
|
+
if (rfcIndex >= 0 && /^[A-Z0-9]{8}$/.test(lines[rfcIndex + 1] || '')) {
|
|
142
|
+
code = lines[rfcIndex + 1];
|
|
143
|
+
}
|
|
144
|
+
return new FieldResult('codigoAceptacion', code !== null, code);
|
|
145
|
+
},
|
|
146
|
+
};
|
|
147
|
+
|
|
148
|
+
// 7) Num. E-Document: collects all 13-char alphanumeric codes following
|
|
149
|
+
// `NUM. E-DOCUMENT` / `NUMERO DE E-DOCUMENT` labels.
|
|
150
|
+
export const numEDocumentoExtractor = {
|
|
151
|
+
field: 'numEDocumento',
|
|
152
|
+
extract: (source) => {
|
|
153
|
+
const lines = source.split(/\r?\n/);
|
|
154
|
+
const extractedCodes = [];
|
|
155
|
+
const titlePatterns = [/NUMERO\s+DE\s+E-DOCUMENT/i, /NUM\.?\s*E-DOCUMENT/i];
|
|
156
|
+
|
|
157
|
+
for (let i = 0; i < lines.length; i++) {
|
|
158
|
+
const line = lines[i];
|
|
159
|
+
const hasTitle = titlePatterns.some((p) => p.test(line));
|
|
160
|
+
if (!hasTitle) continue;
|
|
161
|
+
|
|
162
|
+
// Codes on the title line itself
|
|
163
|
+
const codesInLine = line.match(/[A-Z0-9]{13}/g) || [];
|
|
164
|
+
extractedCodes.push(...codesInLine);
|
|
165
|
+
|
|
166
|
+
// Codes on the next few lines (e.g. CLAVE/COMPL. table rows)
|
|
167
|
+
for (let j = 1; j <= 10 && i + j < lines.length; j++) {
|
|
168
|
+
const nextLine = lines[i + j];
|
|
169
|
+
if (/NUMERO|OBSERVACIONES/i.test(nextLine)) break;
|
|
170
|
+
const codesInNextLine = nextLine.match(/[A-Z0-9]{13}/g) || [];
|
|
171
|
+
extractedCodes.push(...codesInNextLine);
|
|
172
|
+
}
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
if (extractedCodes.length === 0) {
|
|
176
|
+
return new FieldResult('numEDocumento', false, null);
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
const uniqueCodes = [...new Set(extractedCodes)];
|
|
180
|
+
return new FieldResult('numEDocumento', true, `[${uniqueCodes.join(',')}]`);
|
|
181
|
+
},
|
|
182
|
+
};
|
|
183
|
+
|
|
184
|
+
// 8) Payment date — multiple known label variants
|
|
185
|
+
export const paymentDateExtractor = {
|
|
186
|
+
field: 'paymentDate',
|
|
187
|
+
extract: (source) => {
|
|
188
|
+
const patterns = [
|
|
189
|
+
/FECHA\s+DE\s+PAGO:?\s*(\d{2}\/\d{2}\/\d{4})/i,
|
|
190
|
+
/FECHA\s+DE\s+PAGO:?\s*(\d{4}\/\d{2}\/\d{2})/i,
|
|
191
|
+
/2\s+PAGO:\s*(\d{2}\/\d{2}\/\d{4})/,
|
|
192
|
+
/(?:^|\n)\s*PAGO\s+(\d{2}\/\d{2}\/\d{4})/i,
|
|
193
|
+
/PRESENTACION:\s*(\d{2}\/\d{2}\/\d{4})/i,
|
|
194
|
+
];
|
|
195
|
+
for (const re of patterns) {
|
|
196
|
+
const m = source.match(re);
|
|
197
|
+
if (m) return new FieldResult('paymentDate', true, m[1]);
|
|
198
|
+
}
|
|
199
|
+
return new FieldResult('paymentDate', false, null);
|
|
200
|
+
},
|
|
201
|
+
};
|
|
202
|
+
|
|
203
|
+
// 9) COVE — collect all `COVE<alphanum>` tokens from lines containing
|
|
204
|
+
// `COVE` or `NUMERO DE ACUSE DE VALOR`. CoveFact variant emits
|
|
205
|
+
// `COVE257W76NF2 / ID250230` → only the leading COVE token is kept
|
|
206
|
+
// because the COVE regex stops at the space before `/`.
|
|
207
|
+
export const coveExtractor = {
|
|
208
|
+
field: 'cove',
|
|
209
|
+
extract: (source) => {
|
|
210
|
+
const lines = source.split(/\r?\n/);
|
|
211
|
+
const coveLines = lines.filter(
|
|
212
|
+
(line) => /COVE/i.test(line) || /NUMERO DE ACUSE DE VALOR/i.test(line),
|
|
213
|
+
);
|
|
214
|
+
|
|
215
|
+
if (coveLines.length === 0) {
|
|
216
|
+
return new FieldResult('cove', false, null);
|
|
217
|
+
}
|
|
218
|
+
|
|
219
|
+
const coveValues = [];
|
|
220
|
+
coveLines.forEach((line) => {
|
|
221
|
+
const coveMatches = line.match(/COVE[A-Z0-9]+/gi) || [];
|
|
222
|
+
coveValues.push(...coveMatches.map((m) => m.toUpperCase()));
|
|
223
|
+
});
|
|
224
|
+
|
|
225
|
+
if (coveValues.length === 0) {
|
|
226
|
+
return new FieldResult('cove', false, null);
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
const unique = [...new Set(coveValues)];
|
|
230
|
+
return new FieldResult('cove', true, `[${unique.join(',')}]`);
|
|
231
|
+
},
|
|
232
|
+
};
|
|
233
|
+
|
|
234
|
+
// 10) Patente: from the PATENTE/PEDIMENTO/ADUANA header table
|
|
235
|
+
// Fallback A: pago electrónico line "3429 4024126 07" (pedimento_completo).
|
|
236
|
+
// Fallback B: positions 4-7 of numPedimento (always available when found).
|
|
237
|
+
export const patenteExtractor = {
|
|
238
|
+
field: 'patente',
|
|
239
|
+
extract: (source) => {
|
|
240
|
+
// Primary: PATENTE:/PEDIMENTO:/ADUANA: header followed by data line
|
|
241
|
+
const lines = source.split(/\r?\n/);
|
|
242
|
+
const patenteHeaderIndex = lines.findIndex((line) =>
|
|
243
|
+
/PATENTE:.*PEDIMENTO:.*ADUANA:/i.test(line),
|
|
244
|
+
);
|
|
245
|
+
if (patenteHeaderIndex >= 0) {
|
|
246
|
+
for (let i = patenteHeaderIndex + 1; i < lines.length; i++) {
|
|
247
|
+
const line = lines[i].trim();
|
|
248
|
+
if (/^\d+\s+\d+\s+\d+$/.test(line)) {
|
|
249
|
+
const parts = line.split(/\s+/);
|
|
250
|
+
return new FieldResult('patente', true, parts[0]);
|
|
251
|
+
}
|
|
252
|
+
}
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
// Fallback A: pago electrónico line "3429 4024126 07"
|
|
256
|
+
const pagoMatch = source.match(/(\d{4})\s+\d{7}\s+\d{2}/);
|
|
257
|
+
if (pagoMatch) return new FieldResult('patente', true, pagoMatch[1]);
|
|
258
|
+
|
|
259
|
+
// Fallback B: positions 4-7 of numPedimento
|
|
260
|
+
const pedMatch = source.match(/\d{2}\s?\d{2}\s?\d{4}\s?\d{7}/);
|
|
261
|
+
if (pedMatch) {
|
|
262
|
+
const num = pedMatch[0].replace(/\s/g, '');
|
|
263
|
+
if (num.length === 15) {
|
|
264
|
+
return new FieldResult('patente', true, num.substring(4, 8));
|
|
265
|
+
}
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
return new FieldResult('patente', false, null);
|
|
269
|
+
},
|
|
270
|
+
};
|
|
271
|
+
|
|
272
|
+
// 11) Fecha de Pago Rectificación — used when clavePedimento is a rectification
|
|
273
|
+
export const fechaPagoRectificacionExtractor = {
|
|
274
|
+
field: 'fechaPagoRectificacion',
|
|
275
|
+
extract: (source) => {
|
|
276
|
+
const rectSectionMatch = source.match(
|
|
277
|
+
/RECTIFICACION[\s\S]{0,500}?(\d{2}\/\d{2}\/\d{4})/i,
|
|
278
|
+
);
|
|
279
|
+
if (rectSectionMatch) {
|
|
280
|
+
return new FieldResult(
|
|
281
|
+
'fechaPagoRectificacion',
|
|
282
|
+
true,
|
|
283
|
+
rectSectionMatch[1],
|
|
284
|
+
);
|
|
285
|
+
}
|
|
286
|
+
const fechaMatch = source.match(
|
|
287
|
+
/FECHA PAGO RECT[\s\S]{0,500}?(\d{2}\/\d{2}\/\d{4})/i,
|
|
288
|
+
);
|
|
289
|
+
if (fechaMatch) {
|
|
290
|
+
return new FieldResult('fechaPagoRectificacion', true, fechaMatch[1]);
|
|
291
|
+
}
|
|
292
|
+
return new FieldResult('fechaPagoRectificacion', false, null);
|
|
293
|
+
},
|
|
294
|
+
};
|
|
295
|
+
|
|
296
|
+
/**
|
|
297
|
+
* Canonical extractor list for both pedimento_simplificado and pedimento_completo PDFs.
|
|
298
|
+
* Order matters only for downstream tooling that inspects the result array.
|
|
299
|
+
*/
|
|
300
|
+
export const sharedPedimentoExtractors = [
|
|
301
|
+
numPedimentoExtractor,
|
|
302
|
+
tipoOperacionExtractor,
|
|
303
|
+
clavePedimentoExtractor,
|
|
304
|
+
aduanaEntradaSalidaExtractor,
|
|
305
|
+
rfcExtractor,
|
|
306
|
+
codigoAceptacionExtractor,
|
|
307
|
+
numEDocumentoExtractor,
|
|
308
|
+
paymentDateExtractor,
|
|
309
|
+
coveExtractor,
|
|
310
|
+
patenteExtractor,
|
|
311
|
+
fechaPagoRectificacionExtractor,
|
|
312
|
+
];
|
|
313
|
+
|
|
314
|
+
/**
|
|
315
|
+
* Shared helper used by both PDF matchers' `extractPedimentoYear`.
|
|
316
|
+
*/
|
|
317
|
+
export function pedimentoYearFromFields(fields) {
|
|
318
|
+
const numPedimento = fields?.find((f) => f.name === 'numPedimento')?.value;
|
|
319
|
+
if (!numPedimento) return null;
|
|
320
|
+
const year = parseInt(numPedimento.substring(0, 2), 10);
|
|
321
|
+
return year < 50 ? year + 2000 : year + 1900;
|
|
322
|
+
}
|
|
@@ -0,0 +1,322 @@
|
|
|
1
|
+
// VUCEM "consultarPedimentoCompleto" XML matcher.
|
|
2
|
+
//
|
|
3
|
+
// STATUS: implemented but NOT registered in `document-type-shared.js`. To
|
|
4
|
+
// activate, uncomment the import + registration in that file. All downstream
|
|
5
|
+
// code (composeArelaPath, arela-api propagation SQL, IdentifyCommand
|
|
6
|
+
// counters) already includes `pedimento_completo_xml`, so re-enabling is a
|
|
7
|
+
// single-line change.
|
|
8
|
+
//
|
|
9
|
+
// Filename patterns recognized (try in order — patente extraction):
|
|
10
|
+
// 1) VU_PATENTE_ADUANA_PEDIMENTO.xml → e.g. VU_3429_070_5016101.xml
|
|
11
|
+
// 2) ADUANA-PATENTE-PEDIMENTO.xml → e.g. 670-3806-2002487.xml
|
|
12
|
+
// 3) {15-digit}[_{15-digit}].xml → e.g. 260734296016642_260734296016642.xml
|
|
13
|
+
// If none match, arela_path is left null and propagation fills it from a
|
|
14
|
+
// sibling PDF in the same directory.
|
|
15
|
+
//
|
|
16
|
+
// numPedimento is composed from XML body + filename because the body never
|
|
17
|
+
// carries the 15-digit form: YY|AA|PPPP|NNNNNNN.
|
|
18
|
+
import { FieldResult } from '../document-type-shared.js';
|
|
19
|
+
|
|
20
|
+
// --------------------------- helpers ---------------------------------------
|
|
21
|
+
|
|
22
|
+
function firstTag(source, tag) {
|
|
23
|
+
// Match <ns2:tag>value</ns2:tag> or unprefixed <tag>value</tag>
|
|
24
|
+
const re = new RegExp(
|
|
25
|
+
`<(?:[a-z0-9]+:)?${tag}>([^<]*)</(?:[a-z0-9]+:)?${tag}>`,
|
|
26
|
+
'i',
|
|
27
|
+
);
|
|
28
|
+
const m = source.match(re);
|
|
29
|
+
return m ? m[1].trim() : null;
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
function allTagBlocks(source, tag) {
|
|
33
|
+
const re = new RegExp(
|
|
34
|
+
`<(?:[a-z0-9]+:)?${tag}>([\\s\\S]*?)</(?:[a-z0-9]+:)?${tag}>`,
|
|
35
|
+
'gi',
|
|
36
|
+
);
|
|
37
|
+
const out = [];
|
|
38
|
+
let m;
|
|
39
|
+
while ((m = re.exec(source)) !== null) {
|
|
40
|
+
out.push(m[1]);
|
|
41
|
+
}
|
|
42
|
+
return out;
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
function pad(value, length) {
|
|
46
|
+
if (value == null) return null;
|
|
47
|
+
return String(value).padStart(length, '0');
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
/**
|
|
51
|
+
* Try the three known filename patterns and return {patente, aduana, pedimento}
|
|
52
|
+
* with any subset of the fields populated. Returns null if no pattern matches.
|
|
53
|
+
*/
|
|
54
|
+
function parseFilenameParts(filePath) {
|
|
55
|
+
if (!filePath) return null;
|
|
56
|
+
const fileName = filePath.split(/[\\/]/).pop();
|
|
57
|
+
if (!fileName) return null;
|
|
58
|
+
|
|
59
|
+
let m;
|
|
60
|
+
|
|
61
|
+
// 1) VU_PATENTE_ADUANA_PEDIMENTO.xml
|
|
62
|
+
m = fileName.match(/^VU_(\d{4})_(\d{3})_(\d{7})\.xml$/i);
|
|
63
|
+
if (m) {
|
|
64
|
+
return {
|
|
65
|
+
patente: m[1],
|
|
66
|
+
aduana: m[2].substring(0, 2),
|
|
67
|
+
pedimento: m[3],
|
|
68
|
+
year: null,
|
|
69
|
+
};
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
// 2) ADUANA-PATENTE-PEDIMENTO.xml
|
|
73
|
+
m = fileName.match(/^(\d{3})-(\d{4})-(\d{7})\.xml$/i);
|
|
74
|
+
if (m) {
|
|
75
|
+
return {
|
|
76
|
+
patente: m[2],
|
|
77
|
+
aduana: m[1].substring(0, 2),
|
|
78
|
+
pedimento: m[3],
|
|
79
|
+
year: null,
|
|
80
|
+
};
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
// 3) 15-digit form YY|AA|PPPP|NNNNNNN
|
|
84
|
+
m = fileName.match(/^(\d{15})(?:_\d{15})?\.xml$/i);
|
|
85
|
+
if (m) {
|
|
86
|
+
const fifteen = m[1];
|
|
87
|
+
return {
|
|
88
|
+
year: fifteen.substring(0, 2),
|
|
89
|
+
aduana: fifteen.substring(2, 4),
|
|
90
|
+
patente: fifteen.substring(4, 8),
|
|
91
|
+
pedimento: fifteen.substring(8, 15),
|
|
92
|
+
};
|
|
93
|
+
}
|
|
94
|
+
|
|
95
|
+
return null;
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
// Capture the YY year from an ISO date string like "2026-05-08-06:00".
|
|
99
|
+
function yyFromIsoDate(iso) {
|
|
100
|
+
if (!iso) return null;
|
|
101
|
+
const m = iso.match(/^(\d{4})-/);
|
|
102
|
+
return m ? m[1].substring(2, 4) : null;
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
// Find <ns2:fechas> block with nested clave==2 and return its <ns2:fecha>.
|
|
106
|
+
function findPaymentDate(source) {
|
|
107
|
+
const fechasBlocks = allTagBlocks(source, 'fechas');
|
|
108
|
+
for (const block of fechasBlocks) {
|
|
109
|
+
const clave = firstTag(block, 'clave');
|
|
110
|
+
if (clave === '2') {
|
|
111
|
+
const fecha = firstTag(block, 'fecha');
|
|
112
|
+
if (fecha) return fecha;
|
|
113
|
+
}
|
|
114
|
+
}
|
|
115
|
+
return null;
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
// --------------------------- extractors ------------------------------------
|
|
119
|
+
|
|
120
|
+
const rfcExtractor = {
|
|
121
|
+
field: 'rfc',
|
|
122
|
+
extract: (source) => {
|
|
123
|
+
const value = firstTag(source, 'rfc');
|
|
124
|
+
return new FieldResult('rfc', !!value, value);
|
|
125
|
+
},
|
|
126
|
+
};
|
|
127
|
+
|
|
128
|
+
const clavePedimentoExtractor = {
|
|
129
|
+
field: 'clavePedimento',
|
|
130
|
+
extract: (source) => {
|
|
131
|
+
// <ns2:claveDocumento><ns2:clave>R1</ns2:clave></ns2:claveDocumento>
|
|
132
|
+
const blocks = allTagBlocks(source, 'claveDocumento');
|
|
133
|
+
const clave = blocks.length > 0 ? firstTag(blocks[0], 'clave') : null;
|
|
134
|
+
return new FieldResult('clavePedimento', !!clave, clave);
|
|
135
|
+
},
|
|
136
|
+
};
|
|
137
|
+
|
|
138
|
+
const tipoOperacionExtractor = {
|
|
139
|
+
field: 'tipoOperacion',
|
|
140
|
+
extract: (source) => {
|
|
141
|
+
const blocks = allTagBlocks(source, 'tipoOperacion');
|
|
142
|
+
const desc = blocks.length > 0 ? firstTag(blocks[0], 'descripcion') : null;
|
|
143
|
+
return new FieldResult('tipoOperacion', !!desc, desc);
|
|
144
|
+
},
|
|
145
|
+
};
|
|
146
|
+
|
|
147
|
+
const aduanaEntradaSalidaExtractor = {
|
|
148
|
+
field: 'aduanaEntradaSalida',
|
|
149
|
+
extract: (source) => {
|
|
150
|
+
const blocks = allTagBlocks(source, 'aduanaEntradaSalida');
|
|
151
|
+
const clave = blocks.length > 0 ? firstTag(blocks[0], 'clave') : null;
|
|
152
|
+
return new FieldResult(
|
|
153
|
+
'aduanaEntradaSalida',
|
|
154
|
+
!!clave,
|
|
155
|
+
clave ? pad(clave, 2) : null,
|
|
156
|
+
);
|
|
157
|
+
},
|
|
158
|
+
};
|
|
159
|
+
|
|
160
|
+
const paymentDateExtractor = {
|
|
161
|
+
field: 'paymentDate',
|
|
162
|
+
extract: (source) => {
|
|
163
|
+
const fecha = findPaymentDate(source);
|
|
164
|
+
return new FieldResult('paymentDate', !!fecha, fecha);
|
|
165
|
+
},
|
|
166
|
+
};
|
|
167
|
+
|
|
168
|
+
const fechaPagoRectificacionExtractor = {
|
|
169
|
+
field: 'fechaPagoRectificacion',
|
|
170
|
+
extract: (source) => {
|
|
171
|
+
const rectBlocks = allTagBlocks(source, 'rectificacion');
|
|
172
|
+
if (rectBlocks.length === 0) {
|
|
173
|
+
return new FieldResult('fechaPagoRectificacion', false, null);
|
|
174
|
+
}
|
|
175
|
+
const fechaPago = firstTag(rectBlocks[0], 'fechaPago');
|
|
176
|
+
return new FieldResult('fechaPagoRectificacion', !!fechaPago, fechaPago);
|
|
177
|
+
},
|
|
178
|
+
};
|
|
179
|
+
|
|
180
|
+
const coveExtractor = {
|
|
181
|
+
field: 'cove',
|
|
182
|
+
extract: (source) => {
|
|
183
|
+
// Collect <ns2:numero> values that live inside <ns2:facturas> blocks.
|
|
184
|
+
const facturas = allTagBlocks(source, 'facturas');
|
|
185
|
+
const numeros = [];
|
|
186
|
+
facturas.forEach((block) => {
|
|
187
|
+
const numero = firstTag(block, 'numero');
|
|
188
|
+
if (numero) numeros.push(numero);
|
|
189
|
+
});
|
|
190
|
+
if (numeros.length === 0) {
|
|
191
|
+
return new FieldResult('cove', false, null);
|
|
192
|
+
}
|
|
193
|
+
const unique = [...new Set(numeros)];
|
|
194
|
+
return new FieldResult('cove', true, `[${unique.join(',')}]`);
|
|
195
|
+
},
|
|
196
|
+
};
|
|
197
|
+
|
|
198
|
+
const numEDocumentoExtractor = {
|
|
199
|
+
field: 'numEDocumento',
|
|
200
|
+
extract: (source) => {
|
|
201
|
+
// For each <ns2:identificadores> block, check the nested
|
|
202
|
+
// <ns2:claveIdentificador><ns2:clave> value. If it equals 'ED',
|
|
203
|
+
// collect the sibling <ns2:complemento1> value.
|
|
204
|
+
const blocks = allTagBlocks(source, 'identificadores');
|
|
205
|
+
const codes = [];
|
|
206
|
+
blocks.forEach((block) => {
|
|
207
|
+
const claveBlocks = allTagBlocks(block, 'claveIdentificador');
|
|
208
|
+
const clave =
|
|
209
|
+
claveBlocks.length > 0 ? firstTag(claveBlocks[0], 'clave') : null;
|
|
210
|
+
if (clave === 'ED') {
|
|
211
|
+
const complemento = firstTag(block, 'complemento1');
|
|
212
|
+
if (complemento) codes.push(complemento);
|
|
213
|
+
}
|
|
214
|
+
});
|
|
215
|
+
if (codes.length === 0) {
|
|
216
|
+
return new FieldResult('numEDocumento', false, null);
|
|
217
|
+
}
|
|
218
|
+
const unique = [...new Set(codes)];
|
|
219
|
+
return new FieldResult('numEDocumento', true, `[${unique.join(',')}]`);
|
|
220
|
+
},
|
|
221
|
+
};
|
|
222
|
+
|
|
223
|
+
// Composed numPedimento — needs the filename + already-extracted fields.
|
|
224
|
+
// We expose it as the LAST extractor so `aduanaEntradaSalida` is available
|
|
225
|
+
// via the `fields` array if the runner exposes it. To stay compatible with
|
|
226
|
+
// the existing extractor signature (which only receives `source`), we
|
|
227
|
+
// re-derive aduanaEntradaSalida inside this extractor and read the filename
|
|
228
|
+
// from a closure populated by `match()` via `extractNumPedimento` below.
|
|
229
|
+
// (See `extractNumPedimento` — that is the canonical place numPedimento is
|
|
230
|
+
// composed for XML.)
|
|
231
|
+
|
|
232
|
+
// --------------------------- definition ------------------------------------
|
|
233
|
+
|
|
234
|
+
export const pedimentoCompletoXmlDefinition = {
|
|
235
|
+
type: 'pedimento_completo_xml',
|
|
236
|
+
extensions: ['xml'],
|
|
237
|
+
|
|
238
|
+
match: (source) => {
|
|
239
|
+
return /consultarPedimentoCompletoRespuesta/i.test(source);
|
|
240
|
+
},
|
|
241
|
+
|
|
242
|
+
/**
|
|
243
|
+
* Resolve to `pedimento_completo_xml` only if payment evidence exists:
|
|
244
|
+
* - When a <rectificacion> block is present, require fechaPagoRectificacion.
|
|
245
|
+
* - Otherwise require paymentDate.
|
|
246
|
+
*/
|
|
247
|
+
resolveType: (fields) => {
|
|
248
|
+
const hasRect = !!fields?.find(
|
|
249
|
+
(f) => f.name === 'fechaPagoRectificacion' && f.found,
|
|
250
|
+
);
|
|
251
|
+
if (hasRect) return 'pedimento_completo_xml';
|
|
252
|
+
|
|
253
|
+
const paymentDate =
|
|
254
|
+
fields?.find((f) => f.name === 'paymentDate' && f.found)?.value ?? null;
|
|
255
|
+
return paymentDate ? 'pedimento_completo_xml' : 'proforma_completo_xml';
|
|
256
|
+
},
|
|
257
|
+
|
|
258
|
+
/**
|
|
259
|
+
* Compose the 15-digit pedimento number from XML body + filename.
|
|
260
|
+
* YY: from rectification fechaPago if present, else from the clave==2
|
|
261
|
+
* payment-date fecha; falls back to filename pattern 3.
|
|
262
|
+
* AA: from <aduanaEntradaSalida><clave> padded to 2.
|
|
263
|
+
* PPPP: from the filename (any of the three patterns).
|
|
264
|
+
* NNNNNNN: from <pedimento> padded to 7.
|
|
265
|
+
* Returns null if any component cannot be resolved.
|
|
266
|
+
*/
|
|
267
|
+
extractNumPedimento: (source, fields, filePath) => {
|
|
268
|
+
const parts = parseFilenameParts(filePath);
|
|
269
|
+
|
|
270
|
+
const rect = fields?.find(
|
|
271
|
+
(f) => f.name === 'fechaPagoRectificacion' && f.found,
|
|
272
|
+
)?.value;
|
|
273
|
+
const pay = fields?.find((f) => f.name === 'paymentDate' && f.found)?.value;
|
|
274
|
+
|
|
275
|
+
let yy =
|
|
276
|
+
yyFromIsoDate(rect) ||
|
|
277
|
+
yyFromIsoDate(pay) ||
|
|
278
|
+
(parts && parts.year) ||
|
|
279
|
+
null;
|
|
280
|
+
|
|
281
|
+
const aduanaField = fields?.find(
|
|
282
|
+
(f) => f.name === 'aduanaEntradaSalida',
|
|
283
|
+
)?.value;
|
|
284
|
+
let aduana = aduanaField || (parts && parts.aduana) || null;
|
|
285
|
+
|
|
286
|
+
let patente = parts && parts.patente ? parts.patente : null;
|
|
287
|
+
|
|
288
|
+
const pedimentoBody = firstTag(source, 'pedimento');
|
|
289
|
+
let pedimento = pedimentoBody
|
|
290
|
+
? pad(pedimentoBody, 7)
|
|
291
|
+
: parts && parts.pedimento
|
|
292
|
+
? parts.pedimento
|
|
293
|
+
: null;
|
|
294
|
+
|
|
295
|
+
if (!yy || !aduana || !patente || !pedimento) return null;
|
|
296
|
+
|
|
297
|
+
return `${pad(yy, 2)}${pad(aduana, 2)}${pad(patente, 4)}${pedimento}`;
|
|
298
|
+
},
|
|
299
|
+
|
|
300
|
+
extractPedimentoYear: (source, fields, filePath) => {
|
|
301
|
+
// Reuse extractNumPedimento; the year is its leading 2 digits.
|
|
302
|
+
const num = pedimentoCompletoXmlDefinition.extractNumPedimento(
|
|
303
|
+
source,
|
|
304
|
+
fields,
|
|
305
|
+
filePath,
|
|
306
|
+
);
|
|
307
|
+
if (!num) return null;
|
|
308
|
+
const year = parseInt(num.substring(0, 2), 10);
|
|
309
|
+
return year < 50 ? year + 2000 : year + 1900;
|
|
310
|
+
},
|
|
311
|
+
|
|
312
|
+
extractors: [
|
|
313
|
+
rfcExtractor,
|
|
314
|
+
clavePedimentoExtractor,
|
|
315
|
+
tipoOperacionExtractor,
|
|
316
|
+
aduanaEntradaSalidaExtractor,
|
|
317
|
+
paymentDateExtractor,
|
|
318
|
+
fechaPagoRectificacionExtractor,
|
|
319
|
+
coveExtractor,
|
|
320
|
+
numEDocumentoExtractor,
|
|
321
|
+
],
|
|
322
|
+
};
|