@arela/uploader 1.0.21 → 1.0.23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -25,49 +25,118 @@ export const tipoOperacionExtractor = {
25
25
  },
26
26
  };
27
27
 
28
- // 3) Clave de Pedimento: 2 chars after tipoOperacion
28
+ // 3) Clave de Pedimento: 2 chars after tipoOperacion (multiple layout patterns)
29
29
  export const clavePedimentoExtractor = {
30
30
  field: 'clavePedimento',
31
31
  extract: (source) => {
32
- const match = source.match(
33
- /\d{2}\s+\d{2}\s+\d{4}\s+\d{7}\s+[A-Z]{3}\s+([A-Z0-9]{2})/,
34
- );
35
- return new FieldResult('clavePedimento', !!match, match ? match[1] : null);
32
+ const patterns = [
33
+ // Standard spaced layout: "22 07 3429 2002089 EXP RT"
34
+ /\d{2}\s+\d{2}\s+\d{4}\s+\d{7}\s+[A-Z]{3}\s+([A-Z0-9]{2})\b/,
35
+ // Concatenated 15-digit layout: "260734296013645 EXP RT"
36
+ /\d{15}\s+[A-Z]{3}\s+([A-Z0-9]{2})\b/,
37
+ // Fallback: T.OPER keyword followed by 2-char clave
38
+ /\b(?:EXP|IMP|TRA|TRN)\s+([A-Z][A-Z0-9])\b/,
39
+ // Explicit label
40
+ /CVE\.?\s*PED(?:IMENTO)?[^A-Z0-9]{0,60}?\b([A-Z][A-Z0-9])\b/i,
41
+ ];
42
+ for (const re of patterns) {
43
+ const m = source.match(re);
44
+ if (m) return new FieldResult('clavePedimento', true, m[1]);
45
+ }
46
+ return new FieldResult('clavePedimento', false, null);
36
47
  },
37
48
  };
38
49
 
39
50
  // 4) Aduana E/S: 3-digit code on the peso-bruto line
51
+ // Fallback A: allow 2-digit code (some SIMP layouts omit the leading zero).
52
+ // Fallback B: derive the 2-digit customs-office code from positions 2-3 of
53
+ // numPedimento (e.g. "260734296013645" → "07"), which is what the
54
+ // arela_path formula uses after padStart(2,'0').
40
55
  export const aduanaEntradaSalidaExtractor = {
41
56
  field: 'aduanaEntradaSalida',
42
57
  extract: (source) => {
43
- const match = source.match(/^\s*\d+\s+[\d,.]+\s+(\d{3})\s*$/m);
44
- return new FieldResult(
45
- 'aduanaEntradaSalida',
46
- !!match,
47
- match ? match[1] : null,
48
- );
58
+ // Primary: 3-digit aduana code at end of peso-bruto line
59
+ const m3 = source.match(/^\s*\d+\s+[\d,.]+\s+(\d{3})\s*$/m);
60
+ if (m3) return new FieldResult('aduanaEntradaSalida', true, m3[1]);
61
+
62
+ // Fallback A: 2-digit aduana code at end of peso-bruto line
63
+ const m2 = source.match(/^\s*\d+\s+[\d,.]+\s+(\d{2})\s*$/m);
64
+ if (m2) return new FieldResult('aduanaEntradaSalida', true, m2[1]);
65
+
66
+ // Fallback B: derive 2-digit customs-office code from numPedimento
67
+ // Format: AA BB CCCC DDDDDDD → BB (positions 2-3) = aduana
68
+ const pedMatch = source.match(/\d{2}\s?\d{2}\s?\d{4}\s?\d{7}/);
69
+ if (pedMatch) {
70
+ const num = pedMatch[0].replace(/\s/g, '');
71
+ if (num.length === 15) {
72
+ return new FieldResult(
73
+ 'aduanaEntradaSalida',
74
+ true,
75
+ num.substring(2, 4),
76
+ );
77
+ }
78
+ }
79
+
80
+ return new FieldResult('aduanaEntradaSalida', false, null);
49
81
  },
50
82
  };
51
83
 
52
- // 5) RFC: 12–13 alphanumeric chars on its own line
84
+ // 5) RFC: importer/exporter RFC on its own line.
85
+ // Strategy A: strict whole-line pattern (3-4 letters + 6 consecutive digits +
86
+ // 3 alphanum). COVE codes like COVE2681B1RX8 naturally fail this because
87
+ // their digit section is non-consecutive (2681B1 has a letter at pos 5).
88
+ // Strategy B: RFC as a word within a longer line (handles "RFC: IMS030409FZ0").
89
+ // Strategy C: loose 12-13 alphanum isolated on its own line — iterate ALL
90
+ // matches via matchAll() so that a leading COVE code is skipped and the
91
+ // actual RFC (which appears later in the document) is still found.
53
92
  export const rfcExtractor = {
54
93
  field: 'rfc',
55
94
  extract: (source) => {
56
- const match = source.match(/\n\s*([A-Z0-9]{12,13})\s*\n/);
57
- return new FieldResult('rfc', !!match, match ? match[1] : null);
95
+ const RFC_STRICT = /^[A-Z]{3,4}\d{6}[A-Z0-9]{3}$/i;
96
+ const lines = source
97
+ .split(/\r?\n/)
98
+ .map((l) => l.trim())
99
+ .filter((l) => l);
100
+
101
+ // Primary: RFC occupies an entire trimmed line
102
+ const strictLine = lines.find((line) => RFC_STRICT.test(line));
103
+ if (strictLine) return new FieldResult('rfc', true, strictLine);
104
+
105
+ // Fallback A: RFC embedded in a longer line (word-boundary search)
106
+ for (const line of lines) {
107
+ const m = line.match(/\b([A-Z]{3,4}\d{6}[A-Z0-9]{3})\b/i);
108
+ if (m) return new FieldResult('rfc', true, m[1]);
109
+ }
110
+
111
+ // Fallback B: loose 12-13 alphanum isolated on its own line.
112
+ // Use matchAll() to iterate ALL occurrences — a leading COVE code must not
113
+ // short-circuit the search; the RFC typically follows it in the document.
114
+ for (const m of source.matchAll(/\n\s*([A-Z0-9]{12,13})\s*\n/g)) {
115
+ if (!/^COVE/i.test(m[1])) return new FieldResult('rfc', true, m[1]);
116
+ }
117
+
118
+ return new FieldResult('rfc', false, null);
58
119
  },
59
120
  };
60
121
 
61
- // 6) Código de Aceptación: 8 alphanumeric chars on the line right after the RFC
122
+ // 6) Código de Aceptación: 8 alphanumeric chars on the line right after the RFC.
123
+ // Uses the same RFC-line detection logic as rfcExtractor.
62
124
  export const codigoAceptacionExtractor = {
63
125
  field: 'codigoAceptacion',
64
126
  extract: (source) => {
127
+ const RFC_STRICT = /^[A-Z]{3,4}\d{6}[A-Z0-9]{3}$/i;
128
+ const RFC_LOOSE = /^[A-Z0-9]{12,13}$/;
65
129
  const lines = source
66
130
  .split(/\r?\n/)
67
131
  .map((l) => l.trim())
68
132
  .filter((l) => l.length > 0);
69
133
 
70
- const rfcIndex = lines.findIndex((l) => /^[A-Z0-9]{12,13}$/.test(l));
134
+ // Find RFC line using strict pattern first, then loose (excluding COVE)
135
+ let rfcIndex = lines.findIndex((l) => RFC_STRICT.test(l));
136
+ if (rfcIndex < 0) {
137
+ rfcIndex = lines.findIndex((l) => RFC_LOOSE.test(l) && !/^COVE/i.test(l));
138
+ }
139
+
71
140
  let code = null;
72
141
  if (rfcIndex >= 0 && /^[A-Z0-9]{8}$/.test(lines[rfcIndex + 1] || '')) {
73
142
  code = lines[rfcIndex + 1];
@@ -77,24 +146,31 @@ export const codigoAceptacionExtractor = {
77
146
  };
78
147
 
79
148
  // 7) Num. E-Document: collects all 13-char alphanumeric codes following
80
- // `NUM. E-DOCUMENT` labels. CoveFact / Pedimento Completo emit one row
81
- // per ED clave inside the CLAVE/COMPL. IDENTIFICADOR table.
149
+ // `NUM. E-DOCUMENT` / `NUMERO DE E-DOCUMENT` labels.
82
150
  export const numEDocumentoExtractor = {
83
151
  field: 'numEDocumento',
84
152
  extract: (source) => {
85
153
  const lines = source.split(/\r?\n/);
86
- const edocLines = lines.filter((line) => /NUM\.?\s*E-DOCUMENT/i.test(line));
154
+ const extractedCodes = [];
155
+ const titlePatterns = [/NUMERO\s+DE\s+E-DOCUMENT/i, /NUM\.?\s*E-DOCUMENT/i];
87
156
 
88
- if (edocLines.length === 0) {
89
- return new FieldResult('numEDocumento', false, null);
90
- }
157
+ for (let i = 0; i < lines.length; i++) {
158
+ const line = lines[i];
159
+ const hasTitle = titlePatterns.some((p) => p.test(line));
160
+ if (!hasTitle) continue;
91
161
 
92
- const extractedCodes = [];
93
- edocLines.forEach((line) => {
94
- const afterEdoc = line.replace(/.*NUM\.?\s*E-DOCUMENT\s*/i, '');
95
- const codes = afterEdoc.match(/[A-Z0-9]{13}/g) || [];
96
- extractedCodes.push(...codes);
97
- });
162
+ // Codes on the title line itself
163
+ const codesInLine = line.match(/[A-Z0-9]{13}/g) || [];
164
+ extractedCodes.push(...codesInLine);
165
+
166
+ // Codes on the next few lines (e.g. CLAVE/COMPL. table rows)
167
+ for (let j = 1; j <= 10 && i + j < lines.length; j++) {
168
+ const nextLine = lines[i + j];
169
+ if (/NUMERO|OBSERVACIONES/i.test(nextLine)) break;
170
+ const codesInNextLine = nextLine.match(/[A-Z0-9]{13}/g) || [];
171
+ extractedCodes.push(...codesInNextLine);
172
+ }
173
+ }
98
174
 
99
175
  if (extractedCodes.length === 0) {
100
176
  return new FieldResult('numEDocumento', false, null);
@@ -109,14 +185,37 @@ export const numEDocumentoExtractor = {
109
185
  export const paymentDateExtractor = {
110
186
  field: 'paymentDate',
111
187
  extract: (source) => {
112
- let match = source.match(/2\s+PAGO:\s*(\d{2}\/\d{2}\/\d{4})/);
113
- if (!match) {
114
- match = source.match(/FECHA DE PAGO:\s*(\d{4}\/\d{2}\/\d{2})/);
188
+ const patterns = [
189
+ /FECHA\s+DE\s+PAGO:?\s*(\d{2}\/\d{2}\/\d{4})/i, // 0: explicit label DD/MM/YYYY
190
+ /FECHA\s+DE\s+PAGO:?\s*(\d{4}\/\d{2}\/\d{2})/i, // 1: explicit label YYYY/MM/DD
191
+ /2\s+PAGO:\s*(\d{2}\/\d{2}\/\d{4})/, // 2: forma simplificada scheduled date ⚠️
192
+ /(?:^|\n)\s*PAGO\s+(\d{2}\/\d{2}\/\d{4})/i, // 3: PAGO at line start (original)
193
+ /(?<=\d)PAGO\s+(\d{2}\/\d{2}\/\d{4})/i, // 4: PAGO after digit (pdf-parse artifact)
194
+ /(\d{2}\/\d{2}\/\d{4})[ \t]+PAGO[ \t]*$/im, // 5: reversed layout — date before PAGO (FECHAS column)
195
+ // 6: forma simplificada — pdf-parse extracts table cells out of order, so the
196
+ // label "FECHA DE PAGO:" can appear on its own line and the value (along with
197
+ // other cells like línea de captura, pedimento, importe) follows several lines
198
+ // later. Take the FIRST dd/mm/yyyy after the label within a 400-char window.
199
+ // Safe because `isNoPagado` short-circuits documents without a real payment,
200
+ // so we won't grab the unrelated ENTRADA date from the "FECHAS:" block above.
201
+ /FECHA\s+DE\s+PAGO:[\s\S]{1,400}?(\d{2}\/\d{2}\/\d{4})/i,
202
+ /PRESENTACION:\s*(\d{2}\/\d{2}\/\d{4})/i, // 7: fallback
203
+ ];
204
+ // "*** NO PAGADO" is the explicit SAT marker that no payment has been
205
+ // certified. When present, the bank-certification block is physically
206
+ // absent, so any date matched by the fallback patterns (e.g.
207
+ // "2 PAGO:" with a scheduled date, or "PRESENTACION:") would be a false
208
+ // positive. Return null outright — the document is classified as proforma.
209
+ const isNoPagado = /\*{3}\s*NO\s+PAGADO/i.test(source);
210
+ if (isNoPagado) {
211
+ return new FieldResult('paymentDate', false, null);
115
212
  }
116
- if (!match) {
117
- match = source.match(/PRESENTACION:\s*(\d{2}\/\d{2}\/\d{4})/);
213
+ for (const pattern of patterns) {
214
+ const m = source.match(pattern);
215
+ if (!m) continue;
216
+ return new FieldResult('paymentDate', true, m[1]);
118
217
  }
119
- return new FieldResult('paymentDate', !!match, match ? match[1] : null);
218
+ return new FieldResult('paymentDate', false, null);
120
219
  },
121
220
  };
122
221
 
@@ -152,14 +251,16 @@ export const coveExtractor = {
152
251
  };
153
252
 
154
253
  // 10) Patente: from the PATENTE/PEDIMENTO/ADUANA header table
254
+ // Fallback A: pago electrónico line "3429 4024126 07" (pedimento_completo).
255
+ // Fallback B: positions 4-7 of numPedimento (always available when found).
155
256
  export const patenteExtractor = {
156
257
  field: 'patente',
157
258
  extract: (source) => {
259
+ // Primary: PATENTE:/PEDIMENTO:/ADUANA: header followed by data line
158
260
  const lines = source.split(/\r?\n/);
159
261
  const patenteHeaderIndex = lines.findIndex((line) =>
160
262
  /PATENTE:.*PEDIMENTO:.*ADUANA:/i.test(line),
161
263
  );
162
-
163
264
  if (patenteHeaderIndex >= 0) {
164
265
  for (let i = patenteHeaderIndex + 1; i < lines.length; i++) {
165
266
  const line = lines[i].trim();
@@ -169,6 +270,20 @@ export const patenteExtractor = {
169
270
  }
170
271
  }
171
272
  }
273
+
274
+ // Fallback A: pago electrónico line "3429 4024126 07"
275
+ const pagoMatch = source.match(/(\d{4})\s+\d{7}\s+\d{2}/);
276
+ if (pagoMatch) return new FieldResult('patente', true, pagoMatch[1]);
277
+
278
+ // Fallback B: positions 4-7 of numPedimento
279
+ const pedMatch = source.match(/\d{2}\s?\d{2}\s?\d{4}\s?\d{7}/);
280
+ if (pedMatch) {
281
+ const num = pedMatch[0].replace(/\s/g, '');
282
+ if (num.length === 15) {
283
+ return new FieldResult('patente', true, num.substring(4, 8));
284
+ }
285
+ }
286
+
172
287
  return new FieldResult('patente', false, null);
173
288
  },
174
289
  };
@@ -0,0 +1,186 @@
1
+ // NOTE: We intentionally do NOT import `FieldResult` from
2
+ // '../document-type-shared.js' to avoid a circular-import TDZ when this
3
+ // module is imported directly (e.g. from unit tests). `FieldResult` is a
4
+ // plain data-class with shape `{ name, found, value }`, so we construct
5
+ // equivalent plain objects locally.
6
+ const fieldResult = (name, found, value) => ({ name, found, value });
7
+
8
+ /**
9
+ * Factura Inter-Agencia Document Type Definition
10
+ *
11
+ * Detects CFDIs (XML or PDF) issued between customs broker agencies (e.g.,
12
+ * NORCOM ↔ PALCO). These files are dropped into a pedimento folder by the
13
+ * broker but they are NOT part of the customs electronic file (expediente
14
+ * aduanal) — they are inter-agency billing for broker services.
15
+ *
16
+ * Detection rules (ALL required):
17
+ * 1) CFDI markers present (either xml structure or PDF text representation)
18
+ * 2) Both emisor and receptor RFCs belong to the configured agency pair
19
+ * (NAA120215F20 = NORCOM, PCC1008161WA = PALCO) in any direction.
20
+ * 3) At least one concepto with ClaveProdServ 78141502 (Servicios de
21
+ * agentes aduaneros) — confirms the billing is for broker services.
22
+ *
23
+ * IMPORTANT: This matcher MUST be registered BEFORE `facturasComerciales`
24
+ * in document-type-shared.js — both would match a CFDI in a pedimento
25
+ * folder, but inter-agency invoices must take precedence so they are
26
+ * filtered out of the Arela push pipeline (see arela-api
27
+ * NON_PUSHABLE_TYPES_SQL).
28
+ *
29
+ * Currently scope-limited to NORCOM↔PALCO. To widen, move INTER_AGENCIA_RFCS
30
+ * to env config and require ≥2 distinct RFCs from the configured list.
31
+ */
32
+
33
+ /**
34
+ * RFCs of agencies whose mutual invoices should be excluded from the Arela
35
+ * push pipeline. Order is irrelevant — a match is any pair of distinct RFCs
36
+ * from this set appearing as emisor and receptor.
37
+ */
38
+ export const INTER_AGENCIA_RFCS = ['NAA120215F20', 'PCC1008161WA'];
39
+
40
+ const BROKER_SERVICE_CLAVE_PROD_SERV = '78141502';
41
+
42
+ const CFDI_XML_MARKERS = [
43
+ /cfdi:Comprobante/i,
44
+ /xmlns:cfdi/i,
45
+ /TipoDeComprobante/i,
46
+ ];
47
+
48
+ /**
49
+ * Detect that the source represents a CFDI — either as the original XML
50
+ * structure or as text extracted from a printed CFDI (PDF representation).
51
+ *
52
+ * PDF text loses XML tags, so we look for the human-readable equivalents
53
+ * commonly rendered by SAT-style invoice templates ("Folio Fiscal", "Sello
54
+ * Digital del CFDI", "Cadena Original ... Certificacion Digital del SAT").
55
+ */
56
+ function isCfdiContent(source) {
57
+ const xmlHits = CFDI_XML_MARKERS.filter((re) => re.test(source)).length;
58
+ if (xmlHits >= 2) return true;
59
+
60
+ const pdfMarkers = [
61
+ /folio\s*fiscal/i,
62
+ /sello\s*digital\s*del\s*cfdi/i,
63
+ /cadena\s*original.*certificaci[oó]n\s*digital\s*del\s*sat/i,
64
+ /representaci[oó]n\s*impresa\s*de\s*un\s*cfdi/i,
65
+ ];
66
+ return pdfMarkers.filter((re) => re.test(source)).length >= 2;
67
+ }
68
+
69
+ /**
70
+ * Return the subset of INTER_AGENCIA_RFCS that appear in `source`. Matching is
71
+ * case-insensitive and uses word boundaries so substrings inside larger tokens
72
+ * (cert/sello base64) don't produce false positives.
73
+ */
74
+ function findInterAgenciaRfcs(source) {
75
+ const found = new Set();
76
+ for (const rfc of INTER_AGENCIA_RFCS) {
77
+ const re = new RegExp(`\\b${rfc}\\b`, 'i');
78
+ if (re.test(source)) found.add(rfc.toUpperCase());
79
+ }
80
+ return [...found];
81
+ }
82
+
83
+ export const facturaInterAgenciaDefinition = {
84
+ type: 'factura_inter_agencia',
85
+ extensions: ['xml', 'pdf'],
86
+
87
+ match: (source) => {
88
+ if (!isCfdiContent(source)) return false;
89
+
90
+ // Need ≥2 distinct configured RFCs present (one as emisor, one as receptor)
91
+ const rfcsFound = findInterAgenciaRfcs(source);
92
+ if (rfcsFound.length < 2) return false;
93
+
94
+ // Confirm the invoice is for broker services (customs agent services)
95
+ if (!source.includes(BROKER_SERVICE_CLAVE_PROD_SERV)) return false;
96
+
97
+ return true;
98
+ },
99
+
100
+ // Pedimento extraction is optional / informational — these files are
101
+ // excluded from push, so arela_path is never composed. We still extract
102
+ // a pedimento number when present (from the "Referencias" / "Pedimento:"
103
+ // section of the printable CFDI) for auditability.
104
+ extractNumPedimento: (source, fields) => {
105
+ return fields?.find((f) => f.name === 'numPedimento')?.value ?? null;
106
+ },
107
+
108
+ extractPedimentoYear: (source, fields) => {
109
+ const numPedimento = fields?.find((f) => f.name === 'numPedimento')?.value;
110
+ if (numPedimento && numPedimento.length >= 2) {
111
+ const yy = parseInt(numPedimento.substring(0, 2), 10);
112
+ if (!isNaN(yy)) return yy < 50 ? yy + 2000 : yy + 1900;
113
+ }
114
+ return null;
115
+ },
116
+
117
+ extractors: [
118
+ {
119
+ field: 'rfcEmisor',
120
+ extract: (source) => {
121
+ // XML form: <cfdi:Emisor Rfc="..." />
122
+ const xmlMatch = source.match(
123
+ /<[^>]*Emisor[^>]*Rfc\s*=\s*["']([A-ZÑ&]{3,4}\d{6}[A-Z0-9]{3})["']/i,
124
+ );
125
+ if (xmlMatch) return fieldResult('rfcEmisor', true, xmlMatch[1]);
126
+
127
+ // PDF form: "Emisor" section followed by RFC label/value on later lines.
128
+ // We pick the first INTER_AGENCIA RFC that appears in the document.
129
+ const rfcs = findInterAgenciaRfcs(source);
130
+ if (rfcs.length > 0) return fieldResult('rfcEmisor', true, rfcs[0]);
131
+
132
+ return fieldResult('rfcEmisor', false, null);
133
+ },
134
+ },
135
+ {
136
+ field: 'rfcReceptor',
137
+ extract: (source) => {
138
+ const xmlMatch = source.match(
139
+ /<[^>]*Receptor[^>]*Rfc\s*=\s*["']([A-ZÑ&]{3,4}\d{6}[A-Z0-9]{3})["']/i,
140
+ );
141
+ if (xmlMatch) return fieldResult('rfcReceptor', true, xmlMatch[1]);
142
+
143
+ const rfcs = findInterAgenciaRfcs(source);
144
+ if (rfcs.length >= 2) {
145
+ return fieldResult('rfcReceptor', true, rfcs[1]);
146
+ }
147
+ return fieldResult('rfcReceptor', false, null);
148
+ },
149
+ },
150
+ {
151
+ field: 'folio',
152
+ extract: (source) => {
153
+ // CFDI Folio attribute
154
+ const xmlMatch = source.match(/\bFolio\s*=\s*["']([A-Z0-9-]+)["']/i);
155
+ if (xmlMatch) return fieldResult('folio', true, xmlMatch[1]);
156
+
157
+ // PDF: "Numero Folio 012749"
158
+ const pdfMatch = source.match(/Numero\s+Folio\s+([A-Z0-9-]+)/i);
159
+ if (pdfMatch) return fieldResult('folio', true, pdfMatch[1]);
160
+
161
+ return fieldResult('folio', false, null);
162
+ },
163
+ },
164
+ {
165
+ field: 'uuid',
166
+ extract: (source) => {
167
+ const uuidRe =
168
+ /[0-9A-F]{8}-[0-9A-F]{4}-[0-9A-F]{4}-[0-9A-F]{4}-[0-9A-F]{12}/i;
169
+ const m = source.match(uuidRe);
170
+ return fieldResult('uuid', !!m, m ? m[0].toUpperCase() : null);
171
+ },
172
+ },
173
+ {
174
+ field: 'numPedimento',
175
+ extract: (source) => {
176
+ // Printable CFDI "Pedimento: 3458 6000046 Fecha: ..." — recovers an
177
+ // 11-digit pedimento (no YY prefix). Useful for auditability only.
178
+ const m = source.match(/Pedimento:?\s*(\d{4})\s*(\d{7})/i);
179
+ if (m) {
180
+ return fieldResult('numPedimento', true, `${m[1]}${m[2]}`);
181
+ }
182
+ return fieldResult('numPedimento', false, null);
183
+ },
184
+ },
185
+ ],
186
+ };
@@ -1,10 +1,8 @@
1
1
  // VUCEM "consultarPedimentoCompleto" XML matcher.
2
2
  //
3
- // STATUS: implemented but NOT registered in `document-type-shared.js`. To
4
- // activate, uncomment the import + registration in that file. All downstream
5
- // code (composeArelaPath, arela-api propagation SQL, IdentifyCommand
6
- // counters) already includes `pedimento_completo_xml`, so re-enabling is a
7
- // single-line change.
3
+ // Registered in `document-type-shared.js`. Downstream code
4
+ // (composeArelaPath, arela-api propagation SQL, IdentifyCommand counters)
5
+ // also includes `pedimento_completo_xml`.
8
6
  //
9
7
  // Filename patterns recognized (try in order — patente extraction):
10
8
  // 1) VU_PATENTE_ADUANA_PEDIMENTO.xml → e.g. VU_3429_070_5016101.xml
@@ -47,6 +45,22 @@ function pad(value, length) {
47
45
  return String(value).padStart(length, '0');
48
46
  }
49
47
 
48
+ /**
49
+ * Convert a VUCEM `aduanaEntradaSalida.clave` (e.g. "70", "750", "40") to the
50
+ * 2-digit "sección aduanera" prefix used inside the 15-digit pedimento number.
51
+ *
52
+ * VUCEM strips leading zeros from the canonical 3-digit SAT aduana code,
53
+ * so `070` (Ciudad Juárez) arrives as `70`. The pedimento prefix is the
54
+ * first 2 digits of the 3-digit code:
55
+ * `70` → `070` → `07` (Cd. Juárez)
56
+ * `750` → `750` → `75` (Puebla)
57
+ * `40` → `040` → `04` (Lázaro Cárdenas)
58
+ */
59
+ function aduanaToSeccion(claveValue) {
60
+ if (claveValue == null) return null;
61
+ return pad(claveValue, 3).substring(0, 2);
62
+ }
63
+
50
64
  /**
51
65
  * Try the three known filename patterns and return {patente, aduana, pedimento}
52
66
  * with any subset of the fields populated. Returns null if no pattern matches.
@@ -102,12 +116,17 @@ function yyFromIsoDate(iso) {
102
116
  return m ? m[1].substring(2, 4) : null;
103
117
  }
104
118
 
105
- // Find <ns2:fechas> block with nested clave==2 and return its <ns2:fecha>.
106
- function findPaymentDate(source) {
119
+ // Find <ns2:fechas> block whose nested <clave> matches `claveValue` and
120
+ // return its <ns2:fecha>. Works for both shapes:
121
+ // <fechas><clave>N</clave><fecha>...</fecha></fechas>
122
+ // <fechas><fecha>...</fecha><tipo><clave>N</clave></tipo></fechas>
123
+ // (firstTag finds the FIRST <clave> in the block — both layouts expose only
124
+ // one clave per fechas entry.)
125
+ function findFechaByClave(source, claveValue) {
107
126
  const fechasBlocks = allTagBlocks(source, 'fechas');
108
127
  for (const block of fechasBlocks) {
109
128
  const clave = firstTag(block, 'clave');
110
- if (clave === '2') {
129
+ if (clave === claveValue) {
111
130
  const fecha = firstTag(block, 'fecha');
112
131
  if (fecha) return fecha;
113
132
  }
@@ -115,6 +134,18 @@ function findPaymentDate(source) {
115
134
  return null;
116
135
  }
117
136
 
137
+ // Fecha de pago de las contribuciones (tipo.clave == 2).
138
+ function findPaymentDate(source) {
139
+ return findFechaByClave(source, '2');
140
+ }
141
+
142
+ // Fecha de presentacion (tipo.clave == 5). This is the authoritative source
143
+ // for the pedimento's YY prefix — a pedimento opened in Dec-2025 but paid in
144
+ // Jan-2026 keeps the `25` prefix, matching what VUCEM stamps in the filename.
145
+ function findPresentationDate(source) {
146
+ return findFechaByClave(source, '5');
147
+ }
148
+
118
149
  // --------------------------- extractors ------------------------------------
119
150
 
120
151
  const rfcExtractor = {
@@ -152,7 +183,7 @@ const aduanaEntradaSalidaExtractor = {
152
183
  return new FieldResult(
153
184
  'aduanaEntradaSalida',
154
185
  !!clave,
155
- clave ? pad(clave, 2) : null,
186
+ aduanaToSeccion(clave),
156
187
  );
157
188
  },
158
189
  };
@@ -165,6 +196,14 @@ const paymentDateExtractor = {
165
196
  },
166
197
  };
167
198
 
199
+ const presentationDateExtractor = {
200
+ field: 'presentationDate',
201
+ extract: (source) => {
202
+ const fecha = findPresentationDate(source);
203
+ return new FieldResult('presentationDate', !!fecha, fecha);
204
+ },
205
+ };
206
+
168
207
  const fechaPagoRectificacionExtractor = {
169
208
  field: 'fechaPagoRectificacion',
170
209
  extract: (source) => {
@@ -257,8 +296,14 @@ export const pedimentoCompletoXmlDefinition = {
257
296
 
258
297
  /**
259
298
  * Compose the 15-digit pedimento number from XML body + filename.
260
- * YY: from rectification fechaPago if present, else from the clave==2
261
- * payment-date fecha; falls back to filename pattern 3.
299
+ * YY: priority order (most authoritative first):
300
+ * 1) Filename pattern 3 (`{15-digit}.xml`) VUCEM stamps the correct
301
+ * prefix at export time.
302
+ * 2) Fecha de presentacion (<fechas><clave>5) — the year the pedimento
303
+ * was opened. Authoritative for the YY prefix even when payment
304
+ * crosses calendar year (e.g. opened Dec-2025, paid Jan-2026 → YY=25).
305
+ * 3) Rectification fechaPago (only when no presentation date exists).
306
+ * 4) Payment date (last-resort fallback).
262
307
  * AA: from <aduanaEntradaSalida><clave> padded to 2.
263
308
  * PPPP: from the filename (any of the three patterns).
264
309
  * NNNNNNN: from <pedimento> padded to 7.
@@ -267,15 +312,19 @@ export const pedimentoCompletoXmlDefinition = {
267
312
  extractNumPedimento: (source, fields, filePath) => {
268
313
  const parts = parseFilenameParts(filePath);
269
314
 
315
+ const presentation = fields?.find(
316
+ (f) => f.name === 'presentationDate' && f.found,
317
+ )?.value;
270
318
  const rect = fields?.find(
271
319
  (f) => f.name === 'fechaPagoRectificacion' && f.found,
272
320
  )?.value;
273
321
  const pay = fields?.find((f) => f.name === 'paymentDate' && f.found)?.value;
274
322
 
275
323
  let yy =
324
+ (parts && parts.year) ||
325
+ yyFromIsoDate(presentation) ||
276
326
  yyFromIsoDate(rect) ||
277
327
  yyFromIsoDate(pay) ||
278
- (parts && parts.year) ||
279
328
  null;
280
329
 
281
330
  const aduanaField = fields?.find(
@@ -315,6 +364,7 @@ export const pedimentoCompletoXmlDefinition = {
315
364
  tipoOperacionExtractor,
316
365
  aduanaEntradaSalidaExtractor,
317
366
  paymentDateExtractor,
367
+ presentationDateExtractor,
318
368
  fechaPagoRectificacionExtractor,
319
369
  coveExtractor,
320
370
  numEDocumentoExtractor,
@@ -17,22 +17,55 @@ export const pedimentoCompletoDefinition = {
17
17
  type: 'pedimento_completo',
18
18
  extensions: ['pdf'],
19
19
  match: (source) => {
20
- if (/FORMA SIMPLIFICADA DE PEDIMENTO/i.test(source)) return false;
20
+ // Hard exclude: "FORMA SIMPLIFICADA [DE|DEL] PEDIMENTO" is handled by
21
+ // pedimento_simplificado.
22
+ if (/FORMA\s+SIMPLIFICADA\s+DEL?\s+PEDIMENTO/i.test(source)) return false;
21
23
 
24
+ // Hard exclude: "AVISO CONSOLIDADO" shares the header trio but is a
25
+ // different document type handled by aviso_consolidado.
26
+ if (/AVISO\s+CONSOLIDADO/i.test(source)) return false;
27
+
28
+ // The colon after "T. OPER" is optional — see note in pedimento-simplificado.js.
22
29
  const hasHeaderFields =
23
30
  /NUM\.?\s*PEDIMENTO:/i.test(source) &&
24
31
  /CVE\.?\s*PEDIMENTO:/i.test(source) &&
25
- /T\.?\s*OPER:/i.test(source);
26
- if (!hasHeaderFields) return false;
32
+ /T\.?\s*OPER:?/i.test(source);
33
+ if (hasHeaderFields) {
34
+ const hasCopyMarker =
35
+ /ORIGINAL:\s*ADMINISTRACION GENERAL DE ADUANAS/i.test(source) ||
36
+ /SEGUNDA\s+COPIA/i.test(source) ||
37
+ /TERCERA\s+COPIA/i.test(source) ||
38
+ /COPIA\s+(SIMPLIFICAD[AO])?\s*TRANSPORTISTA/i.test(source) ||
39
+ /DEFINITIVO/i.test(source) ||
40
+ /ANEXO\s+DEL\s+PEDIMENTO/i.test(source) ||
41
+ /\*+FIN\s+DE\s+PEDIMENTO\s*\*+/i.test(source);
42
+ if (hasCopyMarker) return true;
43
+ }
27
44
 
28
- const hasCopyMarker =
29
- /ORIGINAL:\s*ADMINISTRACION GENERAL DE ADUANAS/i.test(source) ||
30
- /SEGUNDA COPIA/i.test(source) ||
31
- /TERCERA COPIA/i.test(source) ||
32
- /COPIA\s+(SIMPLIFICAD[AO])?\s*TRANSPORTISTA/i.test(source) ||
33
- /DEFINITIVO/i.test(source);
45
+ // Fallback clue-counting heuristic for exotic layouts.
46
+ const clues = [
47
+ /\bPEDIMENTO\s*\n.*NUM\.\s*PEDIMENTO:/i,
48
+ /NUM\.\s*PEDIMENTO:\s*T\.OPER:\s*CVE\.PEDIMENTO:\s*REGIMEN:/i,
49
+ /\d{2}\s+\d{2}\s+\d{4}\s+\d{7}\s+[A-Z]{3}\s+[A-Z]{3}/i,
50
+ /CERTIFICACIONES/i,
51
+ /DESTINO\/ORIGEN\s+TIPO\s+CAMBIO\s+PESO\s+BRUTO\s+ADUANA\s+E\/S/i,
52
+ /MEDIOS\s+DE\s+TRANSPORTE/i,
53
+ /DATOS\s+DEL\s+IMPORTADOR\/EXPORTADOR/i,
54
+ /RFC:\s+NOMBRE,\s+DENOMINACION\s+O\s+RAZON\s+SOCIAL:/i,
55
+ /CUADRO\s+DE\s+LIQUIDACION/i,
56
+ /\*\*\*\s+PAGO\s+ELECTRONICO\s+\*\*\*/i,
57
+ /PATENTE:\s+PEDIMENTO:\s+ADUANA:/i,
58
+ /LINEA\s+DE\s+CAPTURA:/i,
59
+ /DATOS\s+DEL\s+PROVEEDOR\s+O\s+COMPRADOR/i,
60
+ /CLAVE\/COMPL\.\s+IDENTIFICADOR/i,
61
+ /ANEXO\s+DEL\s+PEDIMENTO/i,
62
+ /\*+FIN\s+DE\s+PEDIMENTO\s+\*+NUM\.\s+TOTAL\s+DE\s+PARTIDAS:/i,
63
+ /DECLARO\s+BAJO\s+PROTESTA\s+DE\s+DECIR\s+VERDAD/i,
64
+ /PEDIMENTO\s+ELABORADO\s+DE\s+CONFORMIDAD/i,
65
+ ];
34
66
 
35
- return hasCopyMarker;
67
+ const found = clues.filter((clue) => clue.test(source));
68
+ return found.length > clues.length * 0.25;
36
69
  },
37
70
 
38
71
  /**