@arela/uploader 1.0.21 → 1.0.23
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/commands/DatastageCommand.js +164 -0
- package/src/commands/IdentifyCommand.js +144 -25
- package/src/commands/PollWorkerCommand.js +2 -0
- package/src/commands/ScanCommand.js +15 -0
- package/src/config/config.js +28 -2
- package/src/document-type-shared.js +15 -7
- package/src/document-types/_pedimento-shared-extractors.js +150 -35
- package/src/document-types/factura-inter-agencia.js +186 -0
- package/src/document-types/pedimento-completo-xml.js +62 -12
- package/src/document-types/pedimento-completo.js +43 -10
- package/src/document-types/pedimento-simplificado.js +33 -1
- package/src/document-types/proforma.js +2 -2
- package/src/file-detection.js +1 -3
- package/src/index.js +42 -0
- package/src/services/DatastageApiService.js +240 -0
- package/src/services/ScanApiService.js +30 -0
- package/tests/unit/factura-inter-agencia.test.js +218 -0
- package/tests/unit/pedimento-completo-xml-matcher.test.js +271 -0
- package/tests/unit/pedimento-simplificado-matcher.test.js +185 -0
|
@@ -25,49 +25,118 @@ export const tipoOperacionExtractor = {
|
|
|
25
25
|
},
|
|
26
26
|
};
|
|
27
27
|
|
|
28
|
-
// 3) Clave de Pedimento: 2 chars after tipoOperacion
|
|
28
|
+
// 3) Clave de Pedimento: 2 chars after tipoOperacion (multiple layout patterns)
|
|
29
29
|
export const clavePedimentoExtractor = {
|
|
30
30
|
field: 'clavePedimento',
|
|
31
31
|
extract: (source) => {
|
|
32
|
-
const
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
32
|
+
const patterns = [
|
|
33
|
+
// Standard spaced layout: "22 07 3429 2002089 EXP RT"
|
|
34
|
+
/\d{2}\s+\d{2}\s+\d{4}\s+\d{7}\s+[A-Z]{3}\s+([A-Z0-9]{2})\b/,
|
|
35
|
+
// Concatenated 15-digit layout: "260734296013645 EXP RT"
|
|
36
|
+
/\d{15}\s+[A-Z]{3}\s+([A-Z0-9]{2})\b/,
|
|
37
|
+
// Fallback: T.OPER keyword followed by 2-char clave
|
|
38
|
+
/\b(?:EXP|IMP|TRA|TRN)\s+([A-Z][A-Z0-9])\b/,
|
|
39
|
+
// Explicit label
|
|
40
|
+
/CVE\.?\s*PED(?:IMENTO)?[^A-Z0-9]{0,60}?\b([A-Z][A-Z0-9])\b/i,
|
|
41
|
+
];
|
|
42
|
+
for (const re of patterns) {
|
|
43
|
+
const m = source.match(re);
|
|
44
|
+
if (m) return new FieldResult('clavePedimento', true, m[1]);
|
|
45
|
+
}
|
|
46
|
+
return new FieldResult('clavePedimento', false, null);
|
|
36
47
|
},
|
|
37
48
|
};
|
|
38
49
|
|
|
39
50
|
// 4) Aduana E/S: 3-digit code on the peso-bruto line
|
|
51
|
+
// Fallback A: allow 2-digit code (some SIMP layouts omit the leading zero).
|
|
52
|
+
// Fallback B: derive the 2-digit customs-office code from positions 2-3 of
|
|
53
|
+
// numPedimento (e.g. "260734296013645" → "07"), which is what the
|
|
54
|
+
// arela_path formula uses after padStart(2,'0').
|
|
40
55
|
export const aduanaEntradaSalidaExtractor = {
|
|
41
56
|
field: 'aduanaEntradaSalida',
|
|
42
57
|
extract: (source) => {
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
);
|
|
58
|
+
// Primary: 3-digit aduana code at end of peso-bruto line
|
|
59
|
+
const m3 = source.match(/^\s*\d+\s+[\d,.]+\s+(\d{3})\s*$/m);
|
|
60
|
+
if (m3) return new FieldResult('aduanaEntradaSalida', true, m3[1]);
|
|
61
|
+
|
|
62
|
+
// Fallback A: 2-digit aduana code at end of peso-bruto line
|
|
63
|
+
const m2 = source.match(/^\s*\d+\s+[\d,.]+\s+(\d{2})\s*$/m);
|
|
64
|
+
if (m2) return new FieldResult('aduanaEntradaSalida', true, m2[1]);
|
|
65
|
+
|
|
66
|
+
// Fallback B: derive 2-digit customs-office code from numPedimento
|
|
67
|
+
// Format: AA BB CCCC DDDDDDD → BB (positions 2-3) = aduana
|
|
68
|
+
const pedMatch = source.match(/\d{2}\s?\d{2}\s?\d{4}\s?\d{7}/);
|
|
69
|
+
if (pedMatch) {
|
|
70
|
+
const num = pedMatch[0].replace(/\s/g, '');
|
|
71
|
+
if (num.length === 15) {
|
|
72
|
+
return new FieldResult(
|
|
73
|
+
'aduanaEntradaSalida',
|
|
74
|
+
true,
|
|
75
|
+
num.substring(2, 4),
|
|
76
|
+
);
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
return new FieldResult('aduanaEntradaSalida', false, null);
|
|
49
81
|
},
|
|
50
82
|
};
|
|
51
83
|
|
|
52
|
-
// 5) RFC:
|
|
84
|
+
// 5) RFC: importer/exporter RFC on its own line.
|
|
85
|
+
// Strategy A: strict whole-line pattern (3-4 letters + 6 consecutive digits +
|
|
86
|
+
// 3 alphanum). COVE codes like COVE2681B1RX8 naturally fail this because
|
|
87
|
+
// their digit section is non-consecutive (2681B1 has a letter at pos 5).
|
|
88
|
+
// Strategy B: RFC as a word within a longer line (handles "RFC: IMS030409FZ0").
|
|
89
|
+
// Strategy C: loose 12-13 alphanum isolated on its own line — iterate ALL
|
|
90
|
+
// matches via matchAll() so that a leading COVE code is skipped and the
|
|
91
|
+
// actual RFC (which appears later in the document) is still found.
|
|
53
92
|
export const rfcExtractor = {
|
|
54
93
|
field: 'rfc',
|
|
55
94
|
extract: (source) => {
|
|
56
|
-
const
|
|
57
|
-
|
|
95
|
+
const RFC_STRICT = /^[A-Z]{3,4}\d{6}[A-Z0-9]{3}$/i;
|
|
96
|
+
const lines = source
|
|
97
|
+
.split(/\r?\n/)
|
|
98
|
+
.map((l) => l.trim())
|
|
99
|
+
.filter((l) => l);
|
|
100
|
+
|
|
101
|
+
// Primary: RFC occupies an entire trimmed line
|
|
102
|
+
const strictLine = lines.find((line) => RFC_STRICT.test(line));
|
|
103
|
+
if (strictLine) return new FieldResult('rfc', true, strictLine);
|
|
104
|
+
|
|
105
|
+
// Fallback A: RFC embedded in a longer line (word-boundary search)
|
|
106
|
+
for (const line of lines) {
|
|
107
|
+
const m = line.match(/\b([A-Z]{3,4}\d{6}[A-Z0-9]{3})\b/i);
|
|
108
|
+
if (m) return new FieldResult('rfc', true, m[1]);
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
// Fallback B: loose 12-13 alphanum isolated on its own line.
|
|
112
|
+
// Use matchAll() to iterate ALL occurrences — a leading COVE code must not
|
|
113
|
+
// short-circuit the search; the RFC typically follows it in the document.
|
|
114
|
+
for (const m of source.matchAll(/\n\s*([A-Z0-9]{12,13})\s*\n/g)) {
|
|
115
|
+
if (!/^COVE/i.test(m[1])) return new FieldResult('rfc', true, m[1]);
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
return new FieldResult('rfc', false, null);
|
|
58
119
|
},
|
|
59
120
|
};
|
|
60
121
|
|
|
61
|
-
// 6) Código de Aceptación: 8 alphanumeric chars on the line right after the RFC
|
|
122
|
+
// 6) Código de Aceptación: 8 alphanumeric chars on the line right after the RFC.
|
|
123
|
+
// Uses the same RFC-line detection logic as rfcExtractor.
|
|
62
124
|
export const codigoAceptacionExtractor = {
|
|
63
125
|
field: 'codigoAceptacion',
|
|
64
126
|
extract: (source) => {
|
|
127
|
+
const RFC_STRICT = /^[A-Z]{3,4}\d{6}[A-Z0-9]{3}$/i;
|
|
128
|
+
const RFC_LOOSE = /^[A-Z0-9]{12,13}$/;
|
|
65
129
|
const lines = source
|
|
66
130
|
.split(/\r?\n/)
|
|
67
131
|
.map((l) => l.trim())
|
|
68
132
|
.filter((l) => l.length > 0);
|
|
69
133
|
|
|
70
|
-
|
|
134
|
+
// Find RFC line using strict pattern first, then loose (excluding COVE)
|
|
135
|
+
let rfcIndex = lines.findIndex((l) => RFC_STRICT.test(l));
|
|
136
|
+
if (rfcIndex < 0) {
|
|
137
|
+
rfcIndex = lines.findIndex((l) => RFC_LOOSE.test(l) && !/^COVE/i.test(l));
|
|
138
|
+
}
|
|
139
|
+
|
|
71
140
|
let code = null;
|
|
72
141
|
if (rfcIndex >= 0 && /^[A-Z0-9]{8}$/.test(lines[rfcIndex + 1] || '')) {
|
|
73
142
|
code = lines[rfcIndex + 1];
|
|
@@ -77,24 +146,31 @@ export const codigoAceptacionExtractor = {
|
|
|
77
146
|
};
|
|
78
147
|
|
|
79
148
|
// 7) Num. E-Document: collects all 13-char alphanumeric codes following
|
|
80
|
-
// `NUM. E-DOCUMENT`
|
|
81
|
-
// per ED clave inside the CLAVE/COMPL. IDENTIFICADOR table.
|
|
149
|
+
// `NUM. E-DOCUMENT` / `NUMERO DE E-DOCUMENT` labels.
|
|
82
150
|
export const numEDocumentoExtractor = {
|
|
83
151
|
field: 'numEDocumento',
|
|
84
152
|
extract: (source) => {
|
|
85
153
|
const lines = source.split(/\r?\n/);
|
|
86
|
-
const
|
|
154
|
+
const extractedCodes = [];
|
|
155
|
+
const titlePatterns = [/NUMERO\s+DE\s+E-DOCUMENT/i, /NUM\.?\s*E-DOCUMENT/i];
|
|
87
156
|
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
157
|
+
for (let i = 0; i < lines.length; i++) {
|
|
158
|
+
const line = lines[i];
|
|
159
|
+
const hasTitle = titlePatterns.some((p) => p.test(line));
|
|
160
|
+
if (!hasTitle) continue;
|
|
91
161
|
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
162
|
+
// Codes on the title line itself
|
|
163
|
+
const codesInLine = line.match(/[A-Z0-9]{13}/g) || [];
|
|
164
|
+
extractedCodes.push(...codesInLine);
|
|
165
|
+
|
|
166
|
+
// Codes on the next few lines (e.g. CLAVE/COMPL. table rows)
|
|
167
|
+
for (let j = 1; j <= 10 && i + j < lines.length; j++) {
|
|
168
|
+
const nextLine = lines[i + j];
|
|
169
|
+
if (/NUMERO|OBSERVACIONES/i.test(nextLine)) break;
|
|
170
|
+
const codesInNextLine = nextLine.match(/[A-Z0-9]{13}/g) || [];
|
|
171
|
+
extractedCodes.push(...codesInNextLine);
|
|
172
|
+
}
|
|
173
|
+
}
|
|
98
174
|
|
|
99
175
|
if (extractedCodes.length === 0) {
|
|
100
176
|
return new FieldResult('numEDocumento', false, null);
|
|
@@ -109,14 +185,37 @@ export const numEDocumentoExtractor = {
|
|
|
109
185
|
export const paymentDateExtractor = {
|
|
110
186
|
field: 'paymentDate',
|
|
111
187
|
extract: (source) => {
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
188
|
+
const patterns = [
|
|
189
|
+
/FECHA\s+DE\s+PAGO:?\s*(\d{2}\/\d{2}\/\d{4})/i, // 0: explicit label DD/MM/YYYY
|
|
190
|
+
/FECHA\s+DE\s+PAGO:?\s*(\d{4}\/\d{2}\/\d{2})/i, // 1: explicit label YYYY/MM/DD
|
|
191
|
+
/2\s+PAGO:\s*(\d{2}\/\d{2}\/\d{4})/, // 2: forma simplificada scheduled date ⚠️
|
|
192
|
+
/(?:^|\n)\s*PAGO\s+(\d{2}\/\d{2}\/\d{4})/i, // 3: PAGO at line start (original)
|
|
193
|
+
/(?<=\d)PAGO\s+(\d{2}\/\d{2}\/\d{4})/i, // 4: PAGO after digit (pdf-parse artifact)
|
|
194
|
+
/(\d{2}\/\d{2}\/\d{4})[ \t]+PAGO[ \t]*$/im, // 5: reversed layout — date before PAGO (FECHAS column)
|
|
195
|
+
// 6: forma simplificada — pdf-parse extracts table cells out of order, so the
|
|
196
|
+
// label "FECHA DE PAGO:" can appear on its own line and the value (along with
|
|
197
|
+
// other cells like línea de captura, pedimento, importe) follows several lines
|
|
198
|
+
// later. Take the FIRST dd/mm/yyyy after the label within a 400-char window.
|
|
199
|
+
// Safe because `isNoPagado` short-circuits documents without a real payment,
|
|
200
|
+
// so we won't grab the unrelated ENTRADA date from the "FECHAS:" block above.
|
|
201
|
+
/FECHA\s+DE\s+PAGO:[\s\S]{1,400}?(\d{2}\/\d{2}\/\d{4})/i,
|
|
202
|
+
/PRESENTACION:\s*(\d{2}\/\d{2}\/\d{4})/i, // 7: fallback
|
|
203
|
+
];
|
|
204
|
+
// "*** NO PAGADO" is the explicit SAT marker that no payment has been
|
|
205
|
+
// certified. When present, the bank-certification block is physically
|
|
206
|
+
// absent, so any date matched by the fallback patterns (e.g.
|
|
207
|
+
// "2 PAGO:" with a scheduled date, or "PRESENTACION:") would be a false
|
|
208
|
+
// positive. Return null outright — the document is classified as proforma.
|
|
209
|
+
const isNoPagado = /\*{3}\s*NO\s+PAGADO/i.test(source);
|
|
210
|
+
if (isNoPagado) {
|
|
211
|
+
return new FieldResult('paymentDate', false, null);
|
|
115
212
|
}
|
|
116
|
-
|
|
117
|
-
|
|
213
|
+
for (const pattern of patterns) {
|
|
214
|
+
const m = source.match(pattern);
|
|
215
|
+
if (!m) continue;
|
|
216
|
+
return new FieldResult('paymentDate', true, m[1]);
|
|
118
217
|
}
|
|
119
|
-
return new FieldResult('paymentDate',
|
|
218
|
+
return new FieldResult('paymentDate', false, null);
|
|
120
219
|
},
|
|
121
220
|
};
|
|
122
221
|
|
|
@@ -152,14 +251,16 @@ export const coveExtractor = {
|
|
|
152
251
|
};
|
|
153
252
|
|
|
154
253
|
// 10) Patente: from the PATENTE/PEDIMENTO/ADUANA header table
|
|
254
|
+
// Fallback A: pago electrónico line "3429 4024126 07" (pedimento_completo).
|
|
255
|
+
// Fallback B: positions 4-7 of numPedimento (always available when found).
|
|
155
256
|
export const patenteExtractor = {
|
|
156
257
|
field: 'patente',
|
|
157
258
|
extract: (source) => {
|
|
259
|
+
// Primary: PATENTE:/PEDIMENTO:/ADUANA: header followed by data line
|
|
158
260
|
const lines = source.split(/\r?\n/);
|
|
159
261
|
const patenteHeaderIndex = lines.findIndex((line) =>
|
|
160
262
|
/PATENTE:.*PEDIMENTO:.*ADUANA:/i.test(line),
|
|
161
263
|
);
|
|
162
|
-
|
|
163
264
|
if (patenteHeaderIndex >= 0) {
|
|
164
265
|
for (let i = patenteHeaderIndex + 1; i < lines.length; i++) {
|
|
165
266
|
const line = lines[i].trim();
|
|
@@ -169,6 +270,20 @@ export const patenteExtractor = {
|
|
|
169
270
|
}
|
|
170
271
|
}
|
|
171
272
|
}
|
|
273
|
+
|
|
274
|
+
// Fallback A: pago electrónico line "3429 4024126 07"
|
|
275
|
+
const pagoMatch = source.match(/(\d{4})\s+\d{7}\s+\d{2}/);
|
|
276
|
+
if (pagoMatch) return new FieldResult('patente', true, pagoMatch[1]);
|
|
277
|
+
|
|
278
|
+
// Fallback B: positions 4-7 of numPedimento
|
|
279
|
+
const pedMatch = source.match(/\d{2}\s?\d{2}\s?\d{4}\s?\d{7}/);
|
|
280
|
+
if (pedMatch) {
|
|
281
|
+
const num = pedMatch[0].replace(/\s/g, '');
|
|
282
|
+
if (num.length === 15) {
|
|
283
|
+
return new FieldResult('patente', true, num.substring(4, 8));
|
|
284
|
+
}
|
|
285
|
+
}
|
|
286
|
+
|
|
172
287
|
return new FieldResult('patente', false, null);
|
|
173
288
|
},
|
|
174
289
|
};
|
|
@@ -0,0 +1,186 @@
|
|
|
1
|
+
// NOTE: We intentionally do NOT import `FieldResult` from
|
|
2
|
+
// '../document-type-shared.js' to avoid a circular-import TDZ when this
|
|
3
|
+
// module is imported directly (e.g. from unit tests). `FieldResult` is a
|
|
4
|
+
// plain data-class with shape `{ name, found, value }`, so we construct
|
|
5
|
+
// equivalent plain objects locally.
|
|
6
|
+
const fieldResult = (name, found, value) => ({ name, found, value });
|
|
7
|
+
|
|
8
|
+
/**
|
|
9
|
+
* Factura Inter-Agencia Document Type Definition
|
|
10
|
+
*
|
|
11
|
+
* Detects CFDIs (XML or PDF) issued between customs broker agencies (e.g.,
|
|
12
|
+
* NORCOM ↔ PALCO). These files are dropped into a pedimento folder by the
|
|
13
|
+
* broker but they are NOT part of the customs electronic file (expediente
|
|
14
|
+
* aduanal) — they are inter-agency billing for broker services.
|
|
15
|
+
*
|
|
16
|
+
* Detection rules (ALL required):
|
|
17
|
+
* 1) CFDI markers present (either xml structure or PDF text representation)
|
|
18
|
+
* 2) Both emisor and receptor RFCs belong to the configured agency pair
|
|
19
|
+
* (NAA120215F20 = NORCOM, PCC1008161WA = PALCO) in any direction.
|
|
20
|
+
* 3) At least one concepto with ClaveProdServ 78141502 (Servicios de
|
|
21
|
+
* agentes aduaneros) — confirms the billing is for broker services.
|
|
22
|
+
*
|
|
23
|
+
* IMPORTANT: This matcher MUST be registered BEFORE `facturasComerciales`
|
|
24
|
+
* in document-type-shared.js — both would match a CFDI in a pedimento
|
|
25
|
+
* folder, but inter-agency invoices must take precedence so they are
|
|
26
|
+
* filtered out of the Arela push pipeline (see arela-api
|
|
27
|
+
* NON_PUSHABLE_TYPES_SQL).
|
|
28
|
+
*
|
|
29
|
+
* Currently scope-limited to NORCOM↔PALCO. To widen, move INTER_AGENCIA_RFCS
|
|
30
|
+
* to env config and require ≥2 distinct RFCs from the configured list.
|
|
31
|
+
*/
|
|
32
|
+
|
|
33
|
+
/**
|
|
34
|
+
* RFCs of agencies whose mutual invoices should be excluded from the Arela
|
|
35
|
+
* push pipeline. Order is irrelevant — a match is any pair of distinct RFCs
|
|
36
|
+
* from this set appearing as emisor and receptor.
|
|
37
|
+
*/
|
|
38
|
+
export const INTER_AGENCIA_RFCS = ['NAA120215F20', 'PCC1008161WA'];
|
|
39
|
+
|
|
40
|
+
const BROKER_SERVICE_CLAVE_PROD_SERV = '78141502';
|
|
41
|
+
|
|
42
|
+
const CFDI_XML_MARKERS = [
|
|
43
|
+
/cfdi:Comprobante/i,
|
|
44
|
+
/xmlns:cfdi/i,
|
|
45
|
+
/TipoDeComprobante/i,
|
|
46
|
+
];
|
|
47
|
+
|
|
48
|
+
/**
|
|
49
|
+
* Detect that the source represents a CFDI — either as the original XML
|
|
50
|
+
* structure or as text extracted from a printed CFDI (PDF representation).
|
|
51
|
+
*
|
|
52
|
+
* PDF text loses XML tags, so we look for the human-readable equivalents
|
|
53
|
+
* commonly rendered by SAT-style invoice templates ("Folio Fiscal", "Sello
|
|
54
|
+
* Digital del CFDI", "Cadena Original ... Certificacion Digital del SAT").
|
|
55
|
+
*/
|
|
56
|
+
function isCfdiContent(source) {
|
|
57
|
+
const xmlHits = CFDI_XML_MARKERS.filter((re) => re.test(source)).length;
|
|
58
|
+
if (xmlHits >= 2) return true;
|
|
59
|
+
|
|
60
|
+
const pdfMarkers = [
|
|
61
|
+
/folio\s*fiscal/i,
|
|
62
|
+
/sello\s*digital\s*del\s*cfdi/i,
|
|
63
|
+
/cadena\s*original.*certificaci[oó]n\s*digital\s*del\s*sat/i,
|
|
64
|
+
/representaci[oó]n\s*impresa\s*de\s*un\s*cfdi/i,
|
|
65
|
+
];
|
|
66
|
+
return pdfMarkers.filter((re) => re.test(source)).length >= 2;
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
/**
|
|
70
|
+
* Return the subset of INTER_AGENCIA_RFCS that appear in `source`. Matching is
|
|
71
|
+
* case-insensitive and uses word boundaries so substrings inside larger tokens
|
|
72
|
+
* (cert/sello base64) don't produce false positives.
|
|
73
|
+
*/
|
|
74
|
+
function findInterAgenciaRfcs(source) {
|
|
75
|
+
const found = new Set();
|
|
76
|
+
for (const rfc of INTER_AGENCIA_RFCS) {
|
|
77
|
+
const re = new RegExp(`\\b${rfc}\\b`, 'i');
|
|
78
|
+
if (re.test(source)) found.add(rfc.toUpperCase());
|
|
79
|
+
}
|
|
80
|
+
return [...found];
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
export const facturaInterAgenciaDefinition = {
|
|
84
|
+
type: 'factura_inter_agencia',
|
|
85
|
+
extensions: ['xml', 'pdf'],
|
|
86
|
+
|
|
87
|
+
match: (source) => {
|
|
88
|
+
if (!isCfdiContent(source)) return false;
|
|
89
|
+
|
|
90
|
+
// Need ≥2 distinct configured RFCs present (one as emisor, one as receptor)
|
|
91
|
+
const rfcsFound = findInterAgenciaRfcs(source);
|
|
92
|
+
if (rfcsFound.length < 2) return false;
|
|
93
|
+
|
|
94
|
+
// Confirm the invoice is for broker services (customs agent services)
|
|
95
|
+
if (!source.includes(BROKER_SERVICE_CLAVE_PROD_SERV)) return false;
|
|
96
|
+
|
|
97
|
+
return true;
|
|
98
|
+
},
|
|
99
|
+
|
|
100
|
+
// Pedimento extraction is optional / informational — these files are
|
|
101
|
+
// excluded from push, so arela_path is never composed. We still extract
|
|
102
|
+
// a pedimento number when present (from the "Referencias" / "Pedimento:"
|
|
103
|
+
// section of the printable CFDI) for auditability.
|
|
104
|
+
extractNumPedimento: (source, fields) => {
|
|
105
|
+
return fields?.find((f) => f.name === 'numPedimento')?.value ?? null;
|
|
106
|
+
},
|
|
107
|
+
|
|
108
|
+
extractPedimentoYear: (source, fields) => {
|
|
109
|
+
const numPedimento = fields?.find((f) => f.name === 'numPedimento')?.value;
|
|
110
|
+
if (numPedimento && numPedimento.length >= 2) {
|
|
111
|
+
const yy = parseInt(numPedimento.substring(0, 2), 10);
|
|
112
|
+
if (!isNaN(yy)) return yy < 50 ? yy + 2000 : yy + 1900;
|
|
113
|
+
}
|
|
114
|
+
return null;
|
|
115
|
+
},
|
|
116
|
+
|
|
117
|
+
extractors: [
|
|
118
|
+
{
|
|
119
|
+
field: 'rfcEmisor',
|
|
120
|
+
extract: (source) => {
|
|
121
|
+
// XML form: <cfdi:Emisor Rfc="..." />
|
|
122
|
+
const xmlMatch = source.match(
|
|
123
|
+
/<[^>]*Emisor[^>]*Rfc\s*=\s*["']([A-ZÑ&]{3,4}\d{6}[A-Z0-9]{3})["']/i,
|
|
124
|
+
);
|
|
125
|
+
if (xmlMatch) return fieldResult('rfcEmisor', true, xmlMatch[1]);
|
|
126
|
+
|
|
127
|
+
// PDF form: "Emisor" section followed by RFC label/value on later lines.
|
|
128
|
+
// We pick the first INTER_AGENCIA RFC that appears in the document.
|
|
129
|
+
const rfcs = findInterAgenciaRfcs(source);
|
|
130
|
+
if (rfcs.length > 0) return fieldResult('rfcEmisor', true, rfcs[0]);
|
|
131
|
+
|
|
132
|
+
return fieldResult('rfcEmisor', false, null);
|
|
133
|
+
},
|
|
134
|
+
},
|
|
135
|
+
{
|
|
136
|
+
field: 'rfcReceptor',
|
|
137
|
+
extract: (source) => {
|
|
138
|
+
const xmlMatch = source.match(
|
|
139
|
+
/<[^>]*Receptor[^>]*Rfc\s*=\s*["']([A-ZÑ&]{3,4}\d{6}[A-Z0-9]{3})["']/i,
|
|
140
|
+
);
|
|
141
|
+
if (xmlMatch) return fieldResult('rfcReceptor', true, xmlMatch[1]);
|
|
142
|
+
|
|
143
|
+
const rfcs = findInterAgenciaRfcs(source);
|
|
144
|
+
if (rfcs.length >= 2) {
|
|
145
|
+
return fieldResult('rfcReceptor', true, rfcs[1]);
|
|
146
|
+
}
|
|
147
|
+
return fieldResult('rfcReceptor', false, null);
|
|
148
|
+
},
|
|
149
|
+
},
|
|
150
|
+
{
|
|
151
|
+
field: 'folio',
|
|
152
|
+
extract: (source) => {
|
|
153
|
+
// CFDI Folio attribute
|
|
154
|
+
const xmlMatch = source.match(/\bFolio\s*=\s*["']([A-Z0-9-]+)["']/i);
|
|
155
|
+
if (xmlMatch) return fieldResult('folio', true, xmlMatch[1]);
|
|
156
|
+
|
|
157
|
+
// PDF: "Numero Folio 012749"
|
|
158
|
+
const pdfMatch = source.match(/Numero\s+Folio\s+([A-Z0-9-]+)/i);
|
|
159
|
+
if (pdfMatch) return fieldResult('folio', true, pdfMatch[1]);
|
|
160
|
+
|
|
161
|
+
return fieldResult('folio', false, null);
|
|
162
|
+
},
|
|
163
|
+
},
|
|
164
|
+
{
|
|
165
|
+
field: 'uuid',
|
|
166
|
+
extract: (source) => {
|
|
167
|
+
const uuidRe =
|
|
168
|
+
/[0-9A-F]{8}-[0-9A-F]{4}-[0-9A-F]{4}-[0-9A-F]{4}-[0-9A-F]{12}/i;
|
|
169
|
+
const m = source.match(uuidRe);
|
|
170
|
+
return fieldResult('uuid', !!m, m ? m[0].toUpperCase() : null);
|
|
171
|
+
},
|
|
172
|
+
},
|
|
173
|
+
{
|
|
174
|
+
field: 'numPedimento',
|
|
175
|
+
extract: (source) => {
|
|
176
|
+
// Printable CFDI "Pedimento: 3458 6000046 Fecha: ..." — recovers an
|
|
177
|
+
// 11-digit pedimento (no YY prefix). Useful for auditability only.
|
|
178
|
+
const m = source.match(/Pedimento:?\s*(\d{4})\s*(\d{7})/i);
|
|
179
|
+
if (m) {
|
|
180
|
+
return fieldResult('numPedimento', true, `${m[1]}${m[2]}`);
|
|
181
|
+
}
|
|
182
|
+
return fieldResult('numPedimento', false, null);
|
|
183
|
+
},
|
|
184
|
+
},
|
|
185
|
+
],
|
|
186
|
+
};
|
|
@@ -1,10 +1,8 @@
|
|
|
1
1
|
// VUCEM "consultarPedimentoCompleto" XML matcher.
|
|
2
2
|
//
|
|
3
|
-
//
|
|
4
|
-
//
|
|
5
|
-
//
|
|
6
|
-
// counters) already includes `pedimento_completo_xml`, so re-enabling is a
|
|
7
|
-
// single-line change.
|
|
3
|
+
// Registered in `document-type-shared.js`. Downstream code
|
|
4
|
+
// (composeArelaPath, arela-api propagation SQL, IdentifyCommand counters)
|
|
5
|
+
// also includes `pedimento_completo_xml`.
|
|
8
6
|
//
|
|
9
7
|
// Filename patterns recognized (try in order — patente extraction):
|
|
10
8
|
// 1) VU_PATENTE_ADUANA_PEDIMENTO.xml → e.g. VU_3429_070_5016101.xml
|
|
@@ -47,6 +45,22 @@ function pad(value, length) {
|
|
|
47
45
|
return String(value).padStart(length, '0');
|
|
48
46
|
}
|
|
49
47
|
|
|
48
|
+
/**
|
|
49
|
+
* Convert a VUCEM `aduanaEntradaSalida.clave` (e.g. "70", "750", "40") to the
|
|
50
|
+
* 2-digit "sección aduanera" prefix used inside the 15-digit pedimento number.
|
|
51
|
+
*
|
|
52
|
+
* VUCEM strips leading zeros from the canonical 3-digit SAT aduana code,
|
|
53
|
+
* so `070` (Ciudad Juárez) arrives as `70`. The pedimento prefix is the
|
|
54
|
+
* first 2 digits of the 3-digit code:
|
|
55
|
+
* `70` → `070` → `07` (Cd. Juárez)
|
|
56
|
+
* `750` → `750` → `75` (Puebla)
|
|
57
|
+
* `40` → `040` → `04` (Lázaro Cárdenas)
|
|
58
|
+
*/
|
|
59
|
+
function aduanaToSeccion(claveValue) {
|
|
60
|
+
if (claveValue == null) return null;
|
|
61
|
+
return pad(claveValue, 3).substring(0, 2);
|
|
62
|
+
}
|
|
63
|
+
|
|
50
64
|
/**
|
|
51
65
|
* Try the three known filename patterns and return {patente, aduana, pedimento}
|
|
52
66
|
* with any subset of the fields populated. Returns null if no pattern matches.
|
|
@@ -102,12 +116,17 @@ function yyFromIsoDate(iso) {
|
|
|
102
116
|
return m ? m[1].substring(2, 4) : null;
|
|
103
117
|
}
|
|
104
118
|
|
|
105
|
-
// Find <ns2:fechas> block
|
|
106
|
-
|
|
119
|
+
// Find <ns2:fechas> block whose nested <clave> matches `claveValue` and
|
|
120
|
+
// return its <ns2:fecha>. Works for both shapes:
|
|
121
|
+
// <fechas><clave>N</clave><fecha>...</fecha></fechas>
|
|
122
|
+
// <fechas><fecha>...</fecha><tipo><clave>N</clave></tipo></fechas>
|
|
123
|
+
// (firstTag finds the FIRST <clave> in the block — both layouts expose only
|
|
124
|
+
// one clave per fechas entry.)
|
|
125
|
+
function findFechaByClave(source, claveValue) {
|
|
107
126
|
const fechasBlocks = allTagBlocks(source, 'fechas');
|
|
108
127
|
for (const block of fechasBlocks) {
|
|
109
128
|
const clave = firstTag(block, 'clave');
|
|
110
|
-
if (clave ===
|
|
129
|
+
if (clave === claveValue) {
|
|
111
130
|
const fecha = firstTag(block, 'fecha');
|
|
112
131
|
if (fecha) return fecha;
|
|
113
132
|
}
|
|
@@ -115,6 +134,18 @@ function findPaymentDate(source) {
|
|
|
115
134
|
return null;
|
|
116
135
|
}
|
|
117
136
|
|
|
137
|
+
// Fecha de pago de las contribuciones (tipo.clave == 2).
|
|
138
|
+
function findPaymentDate(source) {
|
|
139
|
+
return findFechaByClave(source, '2');
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
// Fecha de presentacion (tipo.clave == 5). This is the authoritative source
|
|
143
|
+
// for the pedimento's YY prefix — a pedimento opened in Dec-2025 but paid in
|
|
144
|
+
// Jan-2026 keeps the `25` prefix, matching what VUCEM stamps in the filename.
|
|
145
|
+
function findPresentationDate(source) {
|
|
146
|
+
return findFechaByClave(source, '5');
|
|
147
|
+
}
|
|
148
|
+
|
|
118
149
|
// --------------------------- extractors ------------------------------------
|
|
119
150
|
|
|
120
151
|
const rfcExtractor = {
|
|
@@ -152,7 +183,7 @@ const aduanaEntradaSalidaExtractor = {
|
|
|
152
183
|
return new FieldResult(
|
|
153
184
|
'aduanaEntradaSalida',
|
|
154
185
|
!!clave,
|
|
155
|
-
|
|
186
|
+
aduanaToSeccion(clave),
|
|
156
187
|
);
|
|
157
188
|
},
|
|
158
189
|
};
|
|
@@ -165,6 +196,14 @@ const paymentDateExtractor = {
|
|
|
165
196
|
},
|
|
166
197
|
};
|
|
167
198
|
|
|
199
|
+
const presentationDateExtractor = {
|
|
200
|
+
field: 'presentationDate',
|
|
201
|
+
extract: (source) => {
|
|
202
|
+
const fecha = findPresentationDate(source);
|
|
203
|
+
return new FieldResult('presentationDate', !!fecha, fecha);
|
|
204
|
+
},
|
|
205
|
+
};
|
|
206
|
+
|
|
168
207
|
const fechaPagoRectificacionExtractor = {
|
|
169
208
|
field: 'fechaPagoRectificacion',
|
|
170
209
|
extract: (source) => {
|
|
@@ -257,8 +296,14 @@ export const pedimentoCompletoXmlDefinition = {
|
|
|
257
296
|
|
|
258
297
|
/**
|
|
259
298
|
* Compose the 15-digit pedimento number from XML body + filename.
|
|
260
|
-
* YY:
|
|
261
|
-
*
|
|
299
|
+
* YY: priority order (most authoritative first):
|
|
300
|
+
* 1) Filename pattern 3 (`{15-digit}.xml`) — VUCEM stamps the correct
|
|
301
|
+
* prefix at export time.
|
|
302
|
+
* 2) Fecha de presentacion (<fechas><clave>5) — the year the pedimento
|
|
303
|
+
* was opened. Authoritative for the YY prefix even when payment
|
|
304
|
+
* crosses calendar year (e.g. opened Dec-2025, paid Jan-2026 → YY=25).
|
|
305
|
+
* 3) Rectification fechaPago (only when no presentation date exists).
|
|
306
|
+
* 4) Payment date (last-resort fallback).
|
|
262
307
|
* AA: from <aduanaEntradaSalida><clave> padded to 2.
|
|
263
308
|
* PPPP: from the filename (any of the three patterns).
|
|
264
309
|
* NNNNNNN: from <pedimento> padded to 7.
|
|
@@ -267,15 +312,19 @@ export const pedimentoCompletoXmlDefinition = {
|
|
|
267
312
|
extractNumPedimento: (source, fields, filePath) => {
|
|
268
313
|
const parts = parseFilenameParts(filePath);
|
|
269
314
|
|
|
315
|
+
const presentation = fields?.find(
|
|
316
|
+
(f) => f.name === 'presentationDate' && f.found,
|
|
317
|
+
)?.value;
|
|
270
318
|
const rect = fields?.find(
|
|
271
319
|
(f) => f.name === 'fechaPagoRectificacion' && f.found,
|
|
272
320
|
)?.value;
|
|
273
321
|
const pay = fields?.find((f) => f.name === 'paymentDate' && f.found)?.value;
|
|
274
322
|
|
|
275
323
|
let yy =
|
|
324
|
+
(parts && parts.year) ||
|
|
325
|
+
yyFromIsoDate(presentation) ||
|
|
276
326
|
yyFromIsoDate(rect) ||
|
|
277
327
|
yyFromIsoDate(pay) ||
|
|
278
|
-
(parts && parts.year) ||
|
|
279
328
|
null;
|
|
280
329
|
|
|
281
330
|
const aduanaField = fields?.find(
|
|
@@ -315,6 +364,7 @@ export const pedimentoCompletoXmlDefinition = {
|
|
|
315
364
|
tipoOperacionExtractor,
|
|
316
365
|
aduanaEntradaSalidaExtractor,
|
|
317
366
|
paymentDateExtractor,
|
|
367
|
+
presentationDateExtractor,
|
|
318
368
|
fechaPagoRectificacionExtractor,
|
|
319
369
|
coveExtractor,
|
|
320
370
|
numEDocumentoExtractor,
|
|
@@ -17,22 +17,55 @@ export const pedimentoCompletoDefinition = {
|
|
|
17
17
|
type: 'pedimento_completo',
|
|
18
18
|
extensions: ['pdf'],
|
|
19
19
|
match: (source) => {
|
|
20
|
-
|
|
20
|
+
// Hard exclude: "FORMA SIMPLIFICADA [DE|DEL] PEDIMENTO" is handled by
|
|
21
|
+
// pedimento_simplificado.
|
|
22
|
+
if (/FORMA\s+SIMPLIFICADA\s+DEL?\s+PEDIMENTO/i.test(source)) return false;
|
|
21
23
|
|
|
24
|
+
// Hard exclude: "AVISO CONSOLIDADO" shares the header trio but is a
|
|
25
|
+
// different document type handled by aviso_consolidado.
|
|
26
|
+
if (/AVISO\s+CONSOLIDADO/i.test(source)) return false;
|
|
27
|
+
|
|
28
|
+
// The colon after "T. OPER" is optional — see note in pedimento-simplificado.js.
|
|
22
29
|
const hasHeaderFields =
|
|
23
30
|
/NUM\.?\s*PEDIMENTO:/i.test(source) &&
|
|
24
31
|
/CVE\.?\s*PEDIMENTO:/i.test(source) &&
|
|
25
|
-
/T\.?\s*OPER
|
|
26
|
-
if (
|
|
32
|
+
/T\.?\s*OPER:?/i.test(source);
|
|
33
|
+
if (hasHeaderFields) {
|
|
34
|
+
const hasCopyMarker =
|
|
35
|
+
/ORIGINAL:\s*ADMINISTRACION GENERAL DE ADUANAS/i.test(source) ||
|
|
36
|
+
/SEGUNDA\s+COPIA/i.test(source) ||
|
|
37
|
+
/TERCERA\s+COPIA/i.test(source) ||
|
|
38
|
+
/COPIA\s+(SIMPLIFICAD[AO])?\s*TRANSPORTISTA/i.test(source) ||
|
|
39
|
+
/DEFINITIVO/i.test(source) ||
|
|
40
|
+
/ANEXO\s+DEL\s+PEDIMENTO/i.test(source) ||
|
|
41
|
+
/\*+FIN\s+DE\s+PEDIMENTO\s*\*+/i.test(source);
|
|
42
|
+
if (hasCopyMarker) return true;
|
|
43
|
+
}
|
|
27
44
|
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
/
|
|
32
|
-
|
|
33
|
-
/
|
|
45
|
+
// Fallback clue-counting heuristic for exotic layouts.
|
|
46
|
+
const clues = [
|
|
47
|
+
/\bPEDIMENTO\s*\n.*NUM\.\s*PEDIMENTO:/i,
|
|
48
|
+
/NUM\.\s*PEDIMENTO:\s*T\.OPER:\s*CVE\.PEDIMENTO:\s*REGIMEN:/i,
|
|
49
|
+
/\d{2}\s+\d{2}\s+\d{4}\s+\d{7}\s+[A-Z]{3}\s+[A-Z]{3}/i,
|
|
50
|
+
/CERTIFICACIONES/i,
|
|
51
|
+
/DESTINO\/ORIGEN\s+TIPO\s+CAMBIO\s+PESO\s+BRUTO\s+ADUANA\s+E\/S/i,
|
|
52
|
+
/MEDIOS\s+DE\s+TRANSPORTE/i,
|
|
53
|
+
/DATOS\s+DEL\s+IMPORTADOR\/EXPORTADOR/i,
|
|
54
|
+
/RFC:\s+NOMBRE,\s+DENOMINACION\s+O\s+RAZON\s+SOCIAL:/i,
|
|
55
|
+
/CUADRO\s+DE\s+LIQUIDACION/i,
|
|
56
|
+
/\*\*\*\s+PAGO\s+ELECTRONICO\s+\*\*\*/i,
|
|
57
|
+
/PATENTE:\s+PEDIMENTO:\s+ADUANA:/i,
|
|
58
|
+
/LINEA\s+DE\s+CAPTURA:/i,
|
|
59
|
+
/DATOS\s+DEL\s+PROVEEDOR\s+O\s+COMPRADOR/i,
|
|
60
|
+
/CLAVE\/COMPL\.\s+IDENTIFICADOR/i,
|
|
61
|
+
/ANEXO\s+DEL\s+PEDIMENTO/i,
|
|
62
|
+
/\*+FIN\s+DE\s+PEDIMENTO\s+\*+NUM\.\s+TOTAL\s+DE\s+PARTIDAS:/i,
|
|
63
|
+
/DECLARO\s+BAJO\s+PROTESTA\s+DE\s+DECIR\s+VERDAD/i,
|
|
64
|
+
/PEDIMENTO\s+ELABORADO\s+DE\s+CONFORMIDAD/i,
|
|
65
|
+
];
|
|
34
66
|
|
|
35
|
-
|
|
67
|
+
const found = clues.filter((clue) => clue.test(source));
|
|
68
|
+
return found.length > clues.length * 0.25;
|
|
36
69
|
},
|
|
37
70
|
|
|
38
71
|
/**
|