@arela/uploader 1.0.20 → 1.0.22

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,322 @@
1
+ // Shared PDF extractors used by both `pedimento_simplificado` and
2
+ // `pedimento_completo` matchers. Keeping the regexes in a single module
3
+ // prevents drift between the two pedimento variants.
4
+ import { FieldResult } from '../document-type-shared.js';
5
+
6
+ // 1) Número de Pedimento (15 digits, possibly separated by spaces)
7
+ export const numPedimentoExtractor = {
8
+ field: 'numPedimento',
9
+ extract: (source) => {
10
+ const match = source.match(/\d{2}\s?\d{2}\s?\d{4}\s?\d{7}/);
11
+ return new FieldResult(
12
+ 'numPedimento',
13
+ !!match,
14
+ match ? match[0].replace(/\s/g, '') : null,
15
+ );
16
+ },
17
+ };
18
+
19
+ // 2) Tipo de Operación: 3 chars after the pedimento number
20
+ export const tipoOperacionExtractor = {
21
+ field: 'tipoOperacion',
22
+ extract: (source) => {
23
+ const match = source.match(/\d{2}\s+\d{2}\s+\d{4}\s+\d{7}\s+([A-Z]{3})/);
24
+ return new FieldResult('tipoOperacion', !!match, match ? match[1] : null);
25
+ },
26
+ };
27
+
28
+ // 3) Clave de Pedimento: 2 chars after tipoOperacion (multiple layout patterns)
29
+ export const clavePedimentoExtractor = {
30
+ field: 'clavePedimento',
31
+ extract: (source) => {
32
+ const patterns = [
33
+ // Standard spaced layout: "22 07 3429 2002089 EXP RT"
34
+ /\d{2}\s+\d{2}\s+\d{4}\s+\d{7}\s+[A-Z]{3}\s+([A-Z0-9]{2})\b/,
35
+ // Concatenated 15-digit layout: "260734296013645 EXP RT"
36
+ /\d{15}\s+[A-Z]{3}\s+([A-Z0-9]{2})\b/,
37
+ // Fallback: T.OPER keyword followed by 2-char clave
38
+ /\b(?:EXP|IMP|TRA|TRN)\s+([A-Z][A-Z0-9])\b/,
39
+ // Explicit label
40
+ /CVE\.?\s*PED(?:IMENTO)?[^A-Z0-9]{0,60}?\b([A-Z][A-Z0-9])\b/i,
41
+ ];
42
+ for (const re of patterns) {
43
+ const m = source.match(re);
44
+ if (m) return new FieldResult('clavePedimento', true, m[1]);
45
+ }
46
+ return new FieldResult('clavePedimento', false, null);
47
+ },
48
+ };
49
+
50
+ // 4) Aduana E/S: 3-digit code on the peso-bruto line
51
+ // Fallback A: allow 2-digit code (some SIMP layouts omit the leading zero).
52
+ // Fallback B: derive the 2-digit customs-office code from positions 2-3 of
53
+ // numPedimento (e.g. "260734296013645" → "07"), which is what the
54
+ // arela_path formula uses after padStart(2,'0').
55
+ export const aduanaEntradaSalidaExtractor = {
56
+ field: 'aduanaEntradaSalida',
57
+ extract: (source) => {
58
+ // Primary: 3-digit aduana code at end of peso-bruto line
59
+ const m3 = source.match(/^\s*\d+\s+[\d,.]+\s+(\d{3})\s*$/m);
60
+ if (m3) return new FieldResult('aduanaEntradaSalida', true, m3[1]);
61
+
62
+ // Fallback A: 2-digit aduana code at end of peso-bruto line
63
+ const m2 = source.match(/^\s*\d+\s+[\d,.]+\s+(\d{2})\s*$/m);
64
+ if (m2) return new FieldResult('aduanaEntradaSalida', true, m2[1]);
65
+
66
+ // Fallback B: derive 2-digit customs-office code from numPedimento
67
+ // Format: AA BB CCCC DDDDDDD → BB (positions 2-3) = aduana
68
+ const pedMatch = source.match(/\d{2}\s?\d{2}\s?\d{4}\s?\d{7}/);
69
+ if (pedMatch) {
70
+ const num = pedMatch[0].replace(/\s/g, '');
71
+ if (num.length === 15) {
72
+ return new FieldResult(
73
+ 'aduanaEntradaSalida',
74
+ true,
75
+ num.substring(2, 4),
76
+ );
77
+ }
78
+ }
79
+
80
+ return new FieldResult('aduanaEntradaSalida', false, null);
81
+ },
82
+ };
83
+
84
+ // 5) RFC: importer/exporter RFC on its own line.
85
+ // Strategy A: strict whole-line pattern (3-4 letters + 6 consecutive digits +
86
+ // 3 alphanum). COVE codes like COVE2681B1RX8 naturally fail this because
87
+ // their digit section is non-consecutive (2681B1 has a letter at pos 5).
88
+ // Strategy B: RFC as a word within a longer line (handles "RFC: IMS030409FZ0").
89
+ // Strategy C: loose 12-13 alphanum isolated on its own line — iterate ALL
90
+ // matches via matchAll() so that a leading COVE code is skipped and the
91
+ // actual RFC (which appears later in the document) is still found.
92
+ export const rfcExtractor = {
93
+ field: 'rfc',
94
+ extract: (source) => {
95
+ const RFC_STRICT = /^[A-Z]{3,4}\d{6}[A-Z0-9]{3}$/i;
96
+ const lines = source
97
+ .split(/\r?\n/)
98
+ .map((l) => l.trim())
99
+ .filter((l) => l);
100
+
101
+ // Primary: RFC occupies an entire trimmed line
102
+ const strictLine = lines.find((line) => RFC_STRICT.test(line));
103
+ if (strictLine) return new FieldResult('rfc', true, strictLine);
104
+
105
+ // Fallback A: RFC embedded in a longer line (word-boundary search)
106
+ for (const line of lines) {
107
+ const m = line.match(/\b([A-Z]{3,4}\d{6}[A-Z0-9]{3})\b/i);
108
+ if (m) return new FieldResult('rfc', true, m[1]);
109
+ }
110
+
111
+ // Fallback B: loose 12-13 alphanum isolated on its own line.
112
+ // Use matchAll() to iterate ALL occurrences — a leading COVE code must not
113
+ // short-circuit the search; the RFC typically follows it in the document.
114
+ for (const m of source.matchAll(/\n\s*([A-Z0-9]{12,13})\s*\n/g)) {
115
+ if (!/^COVE/i.test(m[1])) return new FieldResult('rfc', true, m[1]);
116
+ }
117
+
118
+ return new FieldResult('rfc', false, null);
119
+ },
120
+ };
121
+
122
+ // 6) Código de Aceptación: 8 alphanumeric chars on the line right after the RFC.
123
+ // Uses the same RFC-line detection logic as rfcExtractor.
124
+ export const codigoAceptacionExtractor = {
125
+ field: 'codigoAceptacion',
126
+ extract: (source) => {
127
+ const RFC_STRICT = /^[A-Z]{3,4}\d{6}[A-Z0-9]{3}$/i;
128
+ const RFC_LOOSE = /^[A-Z0-9]{12,13}$/;
129
+ const lines = source
130
+ .split(/\r?\n/)
131
+ .map((l) => l.trim())
132
+ .filter((l) => l.length > 0);
133
+
134
+ // Find RFC line using strict pattern first, then loose (excluding COVE)
135
+ let rfcIndex = lines.findIndex((l) => RFC_STRICT.test(l));
136
+ if (rfcIndex < 0) {
137
+ rfcIndex = lines.findIndex((l) => RFC_LOOSE.test(l) && !/^COVE/i.test(l));
138
+ }
139
+
140
+ let code = null;
141
+ if (rfcIndex >= 0 && /^[A-Z0-9]{8}$/.test(lines[rfcIndex + 1] || '')) {
142
+ code = lines[rfcIndex + 1];
143
+ }
144
+ return new FieldResult('codigoAceptacion', code !== null, code);
145
+ },
146
+ };
147
+
148
+ // 7) Num. E-Document: collects all 13-char alphanumeric codes following
149
+ // `NUM. E-DOCUMENT` / `NUMERO DE E-DOCUMENT` labels.
150
+ export const numEDocumentoExtractor = {
151
+ field: 'numEDocumento',
152
+ extract: (source) => {
153
+ const lines = source.split(/\r?\n/);
154
+ const extractedCodes = [];
155
+ const titlePatterns = [/NUMERO\s+DE\s+E-DOCUMENT/i, /NUM\.?\s*E-DOCUMENT/i];
156
+
157
+ for (let i = 0; i < lines.length; i++) {
158
+ const line = lines[i];
159
+ const hasTitle = titlePatterns.some((p) => p.test(line));
160
+ if (!hasTitle) continue;
161
+
162
+ // Codes on the title line itself
163
+ const codesInLine = line.match(/[A-Z0-9]{13}/g) || [];
164
+ extractedCodes.push(...codesInLine);
165
+
166
+ // Codes on the next few lines (e.g. CLAVE/COMPL. table rows)
167
+ for (let j = 1; j <= 10 && i + j < lines.length; j++) {
168
+ const nextLine = lines[i + j];
169
+ if (/NUMERO|OBSERVACIONES/i.test(nextLine)) break;
170
+ const codesInNextLine = nextLine.match(/[A-Z0-9]{13}/g) || [];
171
+ extractedCodes.push(...codesInNextLine);
172
+ }
173
+ }
174
+
175
+ if (extractedCodes.length === 0) {
176
+ return new FieldResult('numEDocumento', false, null);
177
+ }
178
+
179
+ const uniqueCodes = [...new Set(extractedCodes)];
180
+ return new FieldResult('numEDocumento', true, `[${uniqueCodes.join(',')}]`);
181
+ },
182
+ };
183
+
184
+ // 8) Payment date — multiple known label variants
185
+ export const paymentDateExtractor = {
186
+ field: 'paymentDate',
187
+ extract: (source) => {
188
+ const patterns = [
189
+ /FECHA\s+DE\s+PAGO:?\s*(\d{2}\/\d{2}\/\d{4})/i,
190
+ /FECHA\s+DE\s+PAGO:?\s*(\d{4}\/\d{2}\/\d{2})/i,
191
+ /2\s+PAGO:\s*(\d{2}\/\d{2}\/\d{4})/,
192
+ /(?:^|\n)\s*PAGO\s+(\d{2}\/\d{2}\/\d{4})/i,
193
+ /PRESENTACION:\s*(\d{2}\/\d{2}\/\d{4})/i,
194
+ ];
195
+ for (const re of patterns) {
196
+ const m = source.match(re);
197
+ if (m) return new FieldResult('paymentDate', true, m[1]);
198
+ }
199
+ return new FieldResult('paymentDate', false, null);
200
+ },
201
+ };
202
+
203
+ // 9) COVE — collect all `COVE<alphanum>` tokens from lines containing
204
+ // `COVE` or `NUMERO DE ACUSE DE VALOR`. CoveFact variant emits
205
+ // `COVE257W76NF2 / ID250230` → only the leading COVE token is kept
206
+ // because the COVE regex stops at the space before `/`.
207
+ export const coveExtractor = {
208
+ field: 'cove',
209
+ extract: (source) => {
210
+ const lines = source.split(/\r?\n/);
211
+ const coveLines = lines.filter(
212
+ (line) => /COVE/i.test(line) || /NUMERO DE ACUSE DE VALOR/i.test(line),
213
+ );
214
+
215
+ if (coveLines.length === 0) {
216
+ return new FieldResult('cove', false, null);
217
+ }
218
+
219
+ const coveValues = [];
220
+ coveLines.forEach((line) => {
221
+ const coveMatches = line.match(/COVE[A-Z0-9]+/gi) || [];
222
+ coveValues.push(...coveMatches.map((m) => m.toUpperCase()));
223
+ });
224
+
225
+ if (coveValues.length === 0) {
226
+ return new FieldResult('cove', false, null);
227
+ }
228
+
229
+ const unique = [...new Set(coveValues)];
230
+ return new FieldResult('cove', true, `[${unique.join(',')}]`);
231
+ },
232
+ };
233
+
234
+ // 10) Patente: from the PATENTE/PEDIMENTO/ADUANA header table
235
+ // Fallback A: pago electrónico line "3429 4024126 07" (pedimento_completo).
236
+ // Fallback B: positions 4-7 of numPedimento (always available when found).
237
+ export const patenteExtractor = {
238
+ field: 'patente',
239
+ extract: (source) => {
240
+ // Primary: PATENTE:/PEDIMENTO:/ADUANA: header followed by data line
241
+ const lines = source.split(/\r?\n/);
242
+ const patenteHeaderIndex = lines.findIndex((line) =>
243
+ /PATENTE:.*PEDIMENTO:.*ADUANA:/i.test(line),
244
+ );
245
+ if (patenteHeaderIndex >= 0) {
246
+ for (let i = patenteHeaderIndex + 1; i < lines.length; i++) {
247
+ const line = lines[i].trim();
248
+ if (/^\d+\s+\d+\s+\d+$/.test(line)) {
249
+ const parts = line.split(/\s+/);
250
+ return new FieldResult('patente', true, parts[0]);
251
+ }
252
+ }
253
+ }
254
+
255
+ // Fallback A: pago electrónico line "3429 4024126 07"
256
+ const pagoMatch = source.match(/(\d{4})\s+\d{7}\s+\d{2}/);
257
+ if (pagoMatch) return new FieldResult('patente', true, pagoMatch[1]);
258
+
259
+ // Fallback B: positions 4-7 of numPedimento
260
+ const pedMatch = source.match(/\d{2}\s?\d{2}\s?\d{4}\s?\d{7}/);
261
+ if (pedMatch) {
262
+ const num = pedMatch[0].replace(/\s/g, '');
263
+ if (num.length === 15) {
264
+ return new FieldResult('patente', true, num.substring(4, 8));
265
+ }
266
+ }
267
+
268
+ return new FieldResult('patente', false, null);
269
+ },
270
+ };
271
+
272
+ // 11) Fecha de Pago Rectificación — used when clavePedimento is a rectification
273
+ export const fechaPagoRectificacionExtractor = {
274
+ field: 'fechaPagoRectificacion',
275
+ extract: (source) => {
276
+ const rectSectionMatch = source.match(
277
+ /RECTIFICACION[\s\S]{0,500}?(\d{2}\/\d{2}\/\d{4})/i,
278
+ );
279
+ if (rectSectionMatch) {
280
+ return new FieldResult(
281
+ 'fechaPagoRectificacion',
282
+ true,
283
+ rectSectionMatch[1],
284
+ );
285
+ }
286
+ const fechaMatch = source.match(
287
+ /FECHA PAGO RECT[\s\S]{0,500}?(\d{2}\/\d{2}\/\d{4})/i,
288
+ );
289
+ if (fechaMatch) {
290
+ return new FieldResult('fechaPagoRectificacion', true, fechaMatch[1]);
291
+ }
292
+ return new FieldResult('fechaPagoRectificacion', false, null);
293
+ },
294
+ };
295
+
296
+ /**
297
+ * Canonical extractor list for both pedimento_simplificado and pedimento_completo PDFs.
298
+ * Order matters only for downstream tooling that inspects the result array.
299
+ */
300
+ export const sharedPedimentoExtractors = [
301
+ numPedimentoExtractor,
302
+ tipoOperacionExtractor,
303
+ clavePedimentoExtractor,
304
+ aduanaEntradaSalidaExtractor,
305
+ rfcExtractor,
306
+ codigoAceptacionExtractor,
307
+ numEDocumentoExtractor,
308
+ paymentDateExtractor,
309
+ coveExtractor,
310
+ patenteExtractor,
311
+ fechaPagoRectificacionExtractor,
312
+ ];
313
+
314
+ /**
315
+ * Shared helper used by both PDF matchers' `extractPedimentoYear`.
316
+ */
317
+ export function pedimentoYearFromFields(fields) {
318
+ const numPedimento = fields?.find((f) => f.name === 'numPedimento')?.value;
319
+ if (!numPedimento) return null;
320
+ const year = parseInt(numPedimento.substring(0, 2), 10);
321
+ return year < 50 ? year + 2000 : year + 1900;
322
+ }
@@ -0,0 +1,322 @@
1
+ // VUCEM "consultarPedimentoCompleto" XML matcher.
2
+ //
3
+ // STATUS: implemented but NOT registered in `document-type-shared.js`. To
4
+ // activate, uncomment the import + registration in that file. All downstream
5
+ // code (composeArelaPath, arela-api propagation SQL, IdentifyCommand
6
+ // counters) already includes `pedimento_completo_xml`, so re-enabling is a
7
+ // single-line change.
8
+ //
9
+ // Filename patterns recognized (try in order — patente extraction):
10
+ // 1) VU_PATENTE_ADUANA_PEDIMENTO.xml → e.g. VU_3429_070_5016101.xml
11
+ // 2) ADUANA-PATENTE-PEDIMENTO.xml → e.g. 670-3806-2002487.xml
12
+ // 3) {15-digit}[_{15-digit}].xml → e.g. 260734296016642_260734296016642.xml
13
+ // If none match, arela_path is left null and propagation fills it from a
14
+ // sibling PDF in the same directory.
15
+ //
16
+ // numPedimento is composed from XML body + filename because the body never
17
+ // carries the 15-digit form: YY|AA|PPPP|NNNNNNN.
18
+ import { FieldResult } from '../document-type-shared.js';
19
+
20
+ // --------------------------- helpers ---------------------------------------
21
+
22
+ function firstTag(source, tag) {
23
+ // Match <ns2:tag>value</ns2:tag> or unprefixed <tag>value</tag>
24
+ const re = new RegExp(
25
+ `<(?:[a-z0-9]+:)?${tag}>([^<]*)</(?:[a-z0-9]+:)?${tag}>`,
26
+ 'i',
27
+ );
28
+ const m = source.match(re);
29
+ return m ? m[1].trim() : null;
30
+ }
31
+
32
+ function allTagBlocks(source, tag) {
33
+ const re = new RegExp(
34
+ `<(?:[a-z0-9]+:)?${tag}>([\\s\\S]*?)</(?:[a-z0-9]+:)?${tag}>`,
35
+ 'gi',
36
+ );
37
+ const out = [];
38
+ let m;
39
+ while ((m = re.exec(source)) !== null) {
40
+ out.push(m[1]);
41
+ }
42
+ return out;
43
+ }
44
+
45
+ function pad(value, length) {
46
+ if (value == null) return null;
47
+ return String(value).padStart(length, '0');
48
+ }
49
+
50
+ /**
51
+ * Try the three known filename patterns and return {patente, aduana, pedimento}
52
+ * with any subset of the fields populated. Returns null if no pattern matches.
53
+ */
54
+ function parseFilenameParts(filePath) {
55
+ if (!filePath) return null;
56
+ const fileName = filePath.split(/[\\/]/).pop();
57
+ if (!fileName) return null;
58
+
59
+ let m;
60
+
61
+ // 1) VU_PATENTE_ADUANA_PEDIMENTO.xml
62
+ m = fileName.match(/^VU_(\d{4})_(\d{3})_(\d{7})\.xml$/i);
63
+ if (m) {
64
+ return {
65
+ patente: m[1],
66
+ aduana: m[2].substring(0, 2),
67
+ pedimento: m[3],
68
+ year: null,
69
+ };
70
+ }
71
+
72
+ // 2) ADUANA-PATENTE-PEDIMENTO.xml
73
+ m = fileName.match(/^(\d{3})-(\d{4})-(\d{7})\.xml$/i);
74
+ if (m) {
75
+ return {
76
+ patente: m[2],
77
+ aduana: m[1].substring(0, 2),
78
+ pedimento: m[3],
79
+ year: null,
80
+ };
81
+ }
82
+
83
+ // 3) 15-digit form YY|AA|PPPP|NNNNNNN
84
+ m = fileName.match(/^(\d{15})(?:_\d{15})?\.xml$/i);
85
+ if (m) {
86
+ const fifteen = m[1];
87
+ return {
88
+ year: fifteen.substring(0, 2),
89
+ aduana: fifteen.substring(2, 4),
90
+ patente: fifteen.substring(4, 8),
91
+ pedimento: fifteen.substring(8, 15),
92
+ };
93
+ }
94
+
95
+ return null;
96
+ }
97
+
98
+ // Capture the YY year from an ISO date string like "2026-05-08-06:00".
99
+ function yyFromIsoDate(iso) {
100
+ if (!iso) return null;
101
+ const m = iso.match(/^(\d{4})-/);
102
+ return m ? m[1].substring(2, 4) : null;
103
+ }
104
+
105
+ // Find <ns2:fechas> block with nested clave==2 and return its <ns2:fecha>.
106
+ function findPaymentDate(source) {
107
+ const fechasBlocks = allTagBlocks(source, 'fechas');
108
+ for (const block of fechasBlocks) {
109
+ const clave = firstTag(block, 'clave');
110
+ if (clave === '2') {
111
+ const fecha = firstTag(block, 'fecha');
112
+ if (fecha) return fecha;
113
+ }
114
+ }
115
+ return null;
116
+ }
117
+
118
+ // --------------------------- extractors ------------------------------------
119
+
120
+ const rfcExtractor = {
121
+ field: 'rfc',
122
+ extract: (source) => {
123
+ const value = firstTag(source, 'rfc');
124
+ return new FieldResult('rfc', !!value, value);
125
+ },
126
+ };
127
+
128
+ const clavePedimentoExtractor = {
129
+ field: 'clavePedimento',
130
+ extract: (source) => {
131
+ // <ns2:claveDocumento><ns2:clave>R1</ns2:clave></ns2:claveDocumento>
132
+ const blocks = allTagBlocks(source, 'claveDocumento');
133
+ const clave = blocks.length > 0 ? firstTag(blocks[0], 'clave') : null;
134
+ return new FieldResult('clavePedimento', !!clave, clave);
135
+ },
136
+ };
137
+
138
+ const tipoOperacionExtractor = {
139
+ field: 'tipoOperacion',
140
+ extract: (source) => {
141
+ const blocks = allTagBlocks(source, 'tipoOperacion');
142
+ const desc = blocks.length > 0 ? firstTag(blocks[0], 'descripcion') : null;
143
+ return new FieldResult('tipoOperacion', !!desc, desc);
144
+ },
145
+ };
146
+
147
+ const aduanaEntradaSalidaExtractor = {
148
+ field: 'aduanaEntradaSalida',
149
+ extract: (source) => {
150
+ const blocks = allTagBlocks(source, 'aduanaEntradaSalida');
151
+ const clave = blocks.length > 0 ? firstTag(blocks[0], 'clave') : null;
152
+ return new FieldResult(
153
+ 'aduanaEntradaSalida',
154
+ !!clave,
155
+ clave ? pad(clave, 2) : null,
156
+ );
157
+ },
158
+ };
159
+
160
+ const paymentDateExtractor = {
161
+ field: 'paymentDate',
162
+ extract: (source) => {
163
+ const fecha = findPaymentDate(source);
164
+ return new FieldResult('paymentDate', !!fecha, fecha);
165
+ },
166
+ };
167
+
168
+ const fechaPagoRectificacionExtractor = {
169
+ field: 'fechaPagoRectificacion',
170
+ extract: (source) => {
171
+ const rectBlocks = allTagBlocks(source, 'rectificacion');
172
+ if (rectBlocks.length === 0) {
173
+ return new FieldResult('fechaPagoRectificacion', false, null);
174
+ }
175
+ const fechaPago = firstTag(rectBlocks[0], 'fechaPago');
176
+ return new FieldResult('fechaPagoRectificacion', !!fechaPago, fechaPago);
177
+ },
178
+ };
179
+
180
+ const coveExtractor = {
181
+ field: 'cove',
182
+ extract: (source) => {
183
+ // Collect <ns2:numero> values that live inside <ns2:facturas> blocks.
184
+ const facturas = allTagBlocks(source, 'facturas');
185
+ const numeros = [];
186
+ facturas.forEach((block) => {
187
+ const numero = firstTag(block, 'numero');
188
+ if (numero) numeros.push(numero);
189
+ });
190
+ if (numeros.length === 0) {
191
+ return new FieldResult('cove', false, null);
192
+ }
193
+ const unique = [...new Set(numeros)];
194
+ return new FieldResult('cove', true, `[${unique.join(',')}]`);
195
+ },
196
+ };
197
+
198
+ const numEDocumentoExtractor = {
199
+ field: 'numEDocumento',
200
+ extract: (source) => {
201
+ // For each <ns2:identificadores> block, check the nested
202
+ // <ns2:claveIdentificador><ns2:clave> value. If it equals 'ED',
203
+ // collect the sibling <ns2:complemento1> value.
204
+ const blocks = allTagBlocks(source, 'identificadores');
205
+ const codes = [];
206
+ blocks.forEach((block) => {
207
+ const claveBlocks = allTagBlocks(block, 'claveIdentificador');
208
+ const clave =
209
+ claveBlocks.length > 0 ? firstTag(claveBlocks[0], 'clave') : null;
210
+ if (clave === 'ED') {
211
+ const complemento = firstTag(block, 'complemento1');
212
+ if (complemento) codes.push(complemento);
213
+ }
214
+ });
215
+ if (codes.length === 0) {
216
+ return new FieldResult('numEDocumento', false, null);
217
+ }
218
+ const unique = [...new Set(codes)];
219
+ return new FieldResult('numEDocumento', true, `[${unique.join(',')}]`);
220
+ },
221
+ };
222
+
223
+ // Composed numPedimento — needs the filename + already-extracted fields.
224
+ // We expose it as the LAST extractor so `aduanaEntradaSalida` is available
225
+ // via the `fields` array if the runner exposes it. To stay compatible with
226
+ // the existing extractor signature (which only receives `source`), we
227
+ // re-derive aduanaEntradaSalida inside this extractor and read the filename
228
+ // from a closure populated by `match()` via `extractNumPedimento` below.
229
+ // (See `extractNumPedimento` — that is the canonical place numPedimento is
230
+ // composed for XML.)
231
+
232
+ // --------------------------- definition ------------------------------------
233
+
234
+ export const pedimentoCompletoXmlDefinition = {
235
+ type: 'pedimento_completo_xml',
236
+ extensions: ['xml'],
237
+
238
+ match: (source) => {
239
+ return /consultarPedimentoCompletoRespuesta/i.test(source);
240
+ },
241
+
242
+ /**
243
+ * Resolve to `pedimento_completo_xml` only if payment evidence exists:
244
+ * - When a <rectificacion> block is present, require fechaPagoRectificacion.
245
+ * - Otherwise require paymentDate.
246
+ */
247
+ resolveType: (fields) => {
248
+ const hasRect = !!fields?.find(
249
+ (f) => f.name === 'fechaPagoRectificacion' && f.found,
250
+ );
251
+ if (hasRect) return 'pedimento_completo_xml';
252
+
253
+ const paymentDate =
254
+ fields?.find((f) => f.name === 'paymentDate' && f.found)?.value ?? null;
255
+ return paymentDate ? 'pedimento_completo_xml' : 'proforma_completo_xml';
256
+ },
257
+
258
+ /**
259
+ * Compose the 15-digit pedimento number from XML body + filename.
260
+ * YY: from rectification fechaPago if present, else from the clave==2
261
+ * payment-date fecha; falls back to filename pattern 3.
262
+ * AA: from <aduanaEntradaSalida><clave> padded to 2.
263
+ * PPPP: from the filename (any of the three patterns).
264
+ * NNNNNNN: from <pedimento> padded to 7.
265
+ * Returns null if any component cannot be resolved.
266
+ */
267
+ extractNumPedimento: (source, fields, filePath) => {
268
+ const parts = parseFilenameParts(filePath);
269
+
270
+ const rect = fields?.find(
271
+ (f) => f.name === 'fechaPagoRectificacion' && f.found,
272
+ )?.value;
273
+ const pay = fields?.find((f) => f.name === 'paymentDate' && f.found)?.value;
274
+
275
+ let yy =
276
+ yyFromIsoDate(rect) ||
277
+ yyFromIsoDate(pay) ||
278
+ (parts && parts.year) ||
279
+ null;
280
+
281
+ const aduanaField = fields?.find(
282
+ (f) => f.name === 'aduanaEntradaSalida',
283
+ )?.value;
284
+ let aduana = aduanaField || (parts && parts.aduana) || null;
285
+
286
+ let patente = parts && parts.patente ? parts.patente : null;
287
+
288
+ const pedimentoBody = firstTag(source, 'pedimento');
289
+ let pedimento = pedimentoBody
290
+ ? pad(pedimentoBody, 7)
291
+ : parts && parts.pedimento
292
+ ? parts.pedimento
293
+ : null;
294
+
295
+ if (!yy || !aduana || !patente || !pedimento) return null;
296
+
297
+ return `${pad(yy, 2)}${pad(aduana, 2)}${pad(patente, 4)}${pedimento}`;
298
+ },
299
+
300
+ extractPedimentoYear: (source, fields, filePath) => {
301
+ // Reuse extractNumPedimento; the year is its leading 2 digits.
302
+ const num = pedimentoCompletoXmlDefinition.extractNumPedimento(
303
+ source,
304
+ fields,
305
+ filePath,
306
+ );
307
+ if (!num) return null;
308
+ const year = parseInt(num.substring(0, 2), 10);
309
+ return year < 50 ? year + 2000 : year + 1900;
310
+ },
311
+
312
+ extractors: [
313
+ rfcExtractor,
314
+ clavePedimentoExtractor,
315
+ tipoOperacionExtractor,
316
+ aduanaEntradaSalidaExtractor,
317
+ paymentDateExtractor,
318
+ fechaPagoRectificacionExtractor,
319
+ coveExtractor,
320
+ numEDocumentoExtractor,
321
+ ],
322
+ };