@arela/uploader 1.0.21 → 1.0.23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,271 @@
1
+ /**
2
+ * Regression tests for the pedimento_completo_xml matcher.
3
+ *
4
+ * Covers:
5
+ * 1) Basic detection + arela_path composition from a VUCEM
6
+ * `consultarPedimentoCompletoRespuesta` XML.
7
+ * 2) **YY truth source** — when the pedimento is opened in one year and
8
+ * paid in the next (e.g. presentation 2025-12, payment 2026-01), the
9
+ * 15-digit pedimento MUST keep the presentation year (`25...`), not the
10
+ * payment year (`26...`). This matches what VUCEM stamps in the
11
+ * filename and what the PDF matchers produce.
12
+ * 3) **Aduana padding** — VUCEM returns the aduana code without leading
13
+ * zeros (e.g. `70` for Ciudad Juárez instead of the canonical `070`).
14
+ * The 2-digit "sección aduanera" prefix used inside the 15-digit
15
+ * pedimento is the first 2 digits of the 3-digit form (`70` → `07`).
16
+ * 4) **numPedimento backfill** — the XML matcher composes numPedimento
17
+ * externally via `extractNumPedimento` rather than as a field
18
+ * extractor. `extractDocumentFields` must backfill it so that
19
+ * `composeArelaPath` can find it.
20
+ * 5) Resolution to `proforma_completo_xml` when no payment evidence
21
+ * exists in the body.
22
+ */
23
+ import { describe, it, expect } from '@jest/globals';
24
+
25
+ import { extractDocumentFields } from '../../src/document-type-shared.js';
26
+ import { composeArelaPath } from '../../src/file-detection.js';
27
+
28
+ // ---------------------------------------------------------------------------
29
+ // Test fixtures
30
+ // ---------------------------------------------------------------------------
31
+
32
+ /**
33
+ * Build a minimal VUCEM consultarPedimentoCompletoRespuesta XML.
34
+ * Only the tags the matcher actually reads are included.
35
+ */
36
+ function buildXml({
37
+ rfc = 'CEM090106MU3',
38
+ pedimento = '5063036',
39
+ claveDocumento = 'V1',
40
+ tipoOperacionDesc = 'Exportacion',
41
+ aduanaClave = '70',
42
+ presentationDate = '2025-12-01-06:00',
43
+ paymentDate = '2026-01-07-06:00',
44
+ rectFechaPago = null,
45
+ facturas = ['V1-FUJIKURA MEX-202512'],
46
+ edDocs = [],
47
+ } = {}) {
48
+ const fechas = [];
49
+ if (presentationDate) {
50
+ fechas.push(
51
+ `<ns2:fechas><ns2:fecha>${presentationDate}</ns2:fecha><ns2:tipo><ns2:clave>5</ns2:clave><ns2:descripcion>FECHA DE PRESENTACION</ns2:descripcion></ns2:tipo></ns2:fechas>`,
52
+ );
53
+ }
54
+ if (paymentDate) {
55
+ fechas.push(
56
+ `<ns2:fechas><ns2:fecha>${paymentDate}</ns2:fecha><ns2:tipo><ns2:clave>2</ns2:clave><ns2:descripcion>FECHA DE PAGO</ns2:descripcion></ns2:tipo></ns2:fechas>`,
57
+ );
58
+ }
59
+
60
+ const rect = rectFechaPago
61
+ ? `<ns2:rectificacion><ns2:fechaPago>${rectFechaPago}</ns2:fechaPago></ns2:rectificacion>`
62
+ : '';
63
+
64
+ const facturasXml = facturas
65
+ .map(
66
+ (num) =>
67
+ `<ns2:facturas><ns2:numero>${num}</ns2:numero></ns2:facturas>`,
68
+ )
69
+ .join('');
70
+
71
+ const identificadoresXml =
72
+ edDocs.length === 0
73
+ ? ''
74
+ : `<ns2:identificadores>${edDocs
75
+ .map(
76
+ (code) =>
77
+ `<ns2:identificadores><claveIdentificador><clave>ED</clave></claveIdentificador><complemento1>${code}</complemento1></ns2:identificadores>`,
78
+ )
79
+ .join('')}</ns2:identificadores>`;
80
+
81
+ return `<?xml version="1.0" encoding="UTF-8"?>
82
+ <S:Envelope xmlns:S="http://schemas.xmlsoap.org/soap/envelope/">
83
+ <S:Body>
84
+ <ns2:consultarPedimentoCompletoRespuesta xmlns:ns2="http://x">
85
+ <ns2:pedimento>
86
+ <ns2:pedimento>${pedimento}</ns2:pedimento>
87
+ <ns2:encabezado>
88
+ <ns2:claveDocumento><ns2:clave>${claveDocumento}</ns2:clave></ns2:claveDocumento>
89
+ <ns2:tipoOperacion><ns2:clave>2</ns2:clave><ns2:descripcion>${tipoOperacionDesc}</ns2:descripcion></ns2:tipoOperacion>
90
+ <ns2:aduanaEntradaSalida><ns2:clave>${aduanaClave}</ns2:clave></ns2:aduanaEntradaSalida>
91
+ </ns2:encabezado>
92
+ <ns2:importadorExportador>
93
+ <ns2:rfc>${rfc}</ns2:rfc>
94
+ ${fechas.join('\n')}
95
+ </ns2:importadorExportador>
96
+ ${rect}
97
+ ${facturasXml}
98
+ ${identificadoresXml}
99
+ </ns2:pedimento>
100
+ </ns2:consultarPedimentoCompletoRespuesta>
101
+ </S:Body>
102
+ </S:Envelope>`;
103
+ }
104
+
105
+ // ---------------------------------------------------------------------------
106
+ // Tests
107
+ // ---------------------------------------------------------------------------
108
+
109
+ describe('pedimento_completo_xml matcher', () => {
110
+ it('detects, extracts, and composes arela_path for a basic export pedimento', () => {
111
+ const xml = buildXml({
112
+ rfc: 'CEM090106MU3',
113
+ pedimento: '5063036',
114
+ aduanaClave: '70',
115
+ presentationDate: '2025-06-15-06:00',
116
+ paymentDate: '2025-06-20-06:00',
117
+ });
118
+ // 15-digit filename pattern: YY=25 AA=07 PPPP=3429 NNNNNNN=5063036
119
+ const filePath = '/x/2025/250734295063036_250734295063036.xml';
120
+
121
+ const [type, fields, ped, year] = extractDocumentFields(
122
+ xml,
123
+ 'xml',
124
+ filePath,
125
+ );
126
+
127
+ expect(type).toBe('pedimento_completo_xml');
128
+ expect(ped).toBe('250734295063036');
129
+ expect(year).toBe(2025);
130
+ expect(fields.find((f) => f.name === 'rfc')?.value).toBe('CEM090106MU3');
131
+ expect(fields.find((f) => f.name === 'aduanaEntradaSalida')?.value).toBe(
132
+ '07',
133
+ );
134
+ // Backfill check: numPedimento must be exposed as a field so
135
+ // composeArelaPath can find it.
136
+ expect(fields.find((f) => f.name === 'numPedimento')?.value).toBe(
137
+ '250734295063036',
138
+ );
139
+
140
+ const arela = composeArelaPath(type, fields, year, filePath);
141
+ expect(arela).toBe('CEM090106MU3/2025/3429/07/250734295063036/');
142
+ });
143
+
144
+ it('uses presentation date (not payment date) for YY when payment crosses calendar year', () => {
145
+ // Pedimento opened Dec 2025, paid Jan 2026 — the YY must be 25.
146
+ const xml = buildXml({
147
+ pedimento: '5063036',
148
+ aduanaClave: '70',
149
+ presentationDate: '2025-12-01-06:00',
150
+ paymentDate: '2026-01-07-06:00',
151
+ });
152
+ // Use the 3-part filename pattern (no YY in filename) so YY comes from XML body.
153
+ const filePath = '/x/070-3429-5063036.xml';
154
+
155
+ const [type, , ped, year] = extractDocumentFields(xml, 'xml', filePath);
156
+
157
+ expect(type).toBe('pedimento_completo_xml');
158
+ expect(ped).toBe('250734295063036');
159
+ expect(year).toBe(2025);
160
+ });
161
+
162
+ it('falls back to payment date YY when presentation date is missing', () => {
163
+ const xml = buildXml({
164
+ pedimento: '5063036',
165
+ aduanaClave: '70',
166
+ presentationDate: null, // No clave=5 block
167
+ paymentDate: '2026-01-07-06:00',
168
+ });
169
+ const filePath = '/x/070-3429-5063036.xml';
170
+
171
+ const [, , ped, year] = extractDocumentFields(xml, 'xml', filePath);
172
+
173
+ expect(ped).toBe('260734295063036');
174
+ expect(year).toBe(2026);
175
+ });
176
+
177
+ it('prefers filename YY over body fechas (VUCEM-stamped truth)', () => {
178
+ // Filename says YY=24 but body has presentation=2025. Filename wins.
179
+ const xml = buildXml({
180
+ pedimento: '5063036',
181
+ aduanaClave: '70',
182
+ presentationDate: '2025-12-01-06:00',
183
+ paymentDate: '2026-01-07-06:00',
184
+ });
185
+ const filePath = '/x/240734295063036_240734295063036.xml';
186
+
187
+ const [, , ped, year] = extractDocumentFields(xml, 'xml', filePath);
188
+
189
+ expect(ped).toBe('240734295063036');
190
+ expect(year).toBe(2024);
191
+ });
192
+
193
+ it('pads VUCEM aduana correctly: 70 -> 07, 750 -> 75, 40 -> 04', () => {
194
+ const cases = [
195
+ { aduanaClave: '70', expected: '07', // Cd. Juárez (3-digit canonical: 070)
196
+ filename: '/x/070-3429-5000001.xml' },
197
+ { aduanaClave: '750', expected: '75', // Puebla
198
+ filename: '/x/750-3429-5000002.xml' },
199
+ { aduanaClave: '40', expected: '04', // Lázaro Cárdenas (canonical: 040)
200
+ filename: '/x/040-3429-5000003.xml' },
201
+ ];
202
+
203
+ for (const c of cases) {
204
+ const xml = buildXml({
205
+ pedimento: c.filename.match(/-(\d{7})\.xml$/)[1],
206
+ aduanaClave: c.aduanaClave,
207
+ presentationDate: '2025-06-15-06:00',
208
+ paymentDate: '2025-06-20-06:00',
209
+ });
210
+ const [, fields, ped] = extractDocumentFields(xml, 'xml', c.filename);
211
+ expect(fields.find((f) => f.name === 'aduanaEntradaSalida')?.value).toBe(
212
+ c.expected,
213
+ );
214
+ // Positions 2-3 of the composed 15-digit pedimento must equal the
215
+ // aduana prefix.
216
+ expect(ped.substring(2, 4)).toBe(c.expected);
217
+ }
218
+ });
219
+
220
+ it('resolves to proforma_completo_xml when no payment evidence exists', () => {
221
+ const xml = buildXml({
222
+ pedimento: '5063036',
223
+ aduanaClave: '70',
224
+ presentationDate: '2025-12-01-06:00',
225
+ paymentDate: null, // No payment, no rectificacion
226
+ });
227
+ const filePath = '/x/070-3429-5063036.xml';
228
+
229
+ const [type] = extractDocumentFields(xml, 'xml', filePath);
230
+
231
+ expect(type).toBe('proforma_completo_xml');
232
+ });
233
+
234
+ it('extracts cove and rfc correctly', () => {
235
+ const xml = buildXml({
236
+ rfc: 'CEM090106MU3',
237
+ facturas: ['V1-FUJIKURA MEX-202512', 'INV-2'],
238
+ });
239
+ const filePath = '/x/250734295063036_250734295063036.xml';
240
+
241
+ const [, fields] = extractDocumentFields(xml, 'xml', filePath);
242
+
243
+ expect(fields.find((f) => f.name === 'rfc')?.value).toBe('CEM090106MU3');
244
+ expect(fields.find((f) => f.name === 'cove')?.value).toBe(
245
+ '[V1-FUJIKURA MEX-202512,INV-2]',
246
+ );
247
+ });
248
+
249
+ it('returns null arela_path when filename is unrecognized (no patente)', () => {
250
+ const xml = buildXml({
251
+ pedimento: '5063036',
252
+ aduanaClave: '70',
253
+ presentationDate: '2025-06-15-06:00',
254
+ paymentDate: '2025-06-20-06:00',
255
+ });
256
+ // Unrecognized filename — no patente derivable.
257
+ const filePath = '/x/random_name.xml';
258
+
259
+ const [type, fields, ped, year] = extractDocumentFields(
260
+ xml,
261
+ 'xml',
262
+ filePath,
263
+ );
264
+
265
+ expect(type).toBe('pedimento_completo_xml');
266
+ expect(ped).toBeNull();
267
+ expect(year).toBeNull();
268
+ // composeArelaPath returns null because patente is missing.
269
+ expect(composeArelaPath(type, fields, year, filePath)).toBeNull();
270
+ });
271
+ });
@@ -0,0 +1,185 @@
1
+ /**
2
+ * Regression tests for the pedimento_simplificado matcher.
3
+ *
4
+ * Covers PDF layouts where:
5
+ * - The header reads "FORMA SIMPLIFICADA DEL PEDIMENTO" (with DEL),
6
+ * not the canonical "DE PEDIMENTO".
7
+ * - The header trio prints "T. OPER" WITHOUT a trailing colon
8
+ * (the value sits in a separate table cell).
9
+ *
10
+ * Real-world example: REF NQR26-079, Aduana 640 (Querétaro), patente 3458.
11
+ * Before this regression test, the matcher fell through to
12
+ * `facturas_comerciales` because the word "FACTURA" appears in the
13
+ * "OBSERVACIONES" block.
14
+ */
15
+ import { describe, it, expect } from '@jest/globals';
16
+
17
+ // Importing only the dispatcher avoids circular-init issues caused by
18
+ // `_pedimento-shared-extractors.js` pulling FieldResult from document-type-shared.
19
+ import { extractDocumentFields } from '../../src/document-type-shared.js';
20
+
21
+ // REAL pdf-parse output from the NQR26-079 simplificado PDF
22
+ // (CSM9204097Q1, patente 3458, aduana 640).
23
+ // Captured verbatim with `PDFParse({data}).getText()` — pdf-parse extracts
24
+ // table cells out of visual order, so labels and values often live on
25
+ // different lines (see the FECHA DE PAGO block: label appears, then a few
26
+ // unrelated cells, then the date sits on its own line with the importe).
27
+ // This is exactly what the matchers and extractors see in production.
28
+ const SIMP_DEL_NQR26079_TEXT = `A1 CVE. PEDIMENTO: IMP T. OPER 26 64 3458 6000079 NUM. PEDIMENTO:
29
+ CERTIFICACIONES
30
+ ADUANA E/S:
31
+ DATOS DEL IMPORTADOR / EXPORTADOR
32
+ RFC: CURP:
33
+ CÓDIGO DE
34
+ ACEPTACIÓN
35
+ 640
36
+ CSM9204097Q1
37
+ FECHAS:
38
+ 17/03/2026
39
+ Ped. 6000079
40
+ CLAVE DE LA SECCION ADUANERA
41
+ DE DESPACHO:
42
+ QUERETARO, EL MARQUES Y
43
+ COLON, QUERETARO.
44
+ 640
45
+ DESTINO: 9 PESO BRUTO: 5.350
46
+ MARCAS,NUMEROS Y TOTAL DE BULTOS: 1
47
+ 04/03/2026
48
+ ENTRADA
49
+ PAGO
50
+ 3PW4CLHE
51
+ S/M S/N
52
+ CODIGO DE BARRAS
53
+ 0326 0132 XMP1 4914 6243 989
54
+ *** PAGO ELECTRONICO ***
55
+ DEPÓSITO REFERENCIADO - LÍNEA DE CAPTURA
56
+ PATENTE:
57
+ NOMBRE DE LA INSTITUCIÓN BANCARIA:
58
+ LÍNEA DE CAPTURA:
59
+ IMPORTE PAGADO:
60
+ NÚMERO DE OPERACIÓN BANCARIA:
61
+ NÚMERO DE TRANSACCIÓN SAT:
62
+ MEDIO DE PRESENTACIÓN:
63
+ MEDIO DE RECEPCIÓN/COBRO:
64
+ OTROS MEDIOS ELECTRÓNICOS (PAGO ELECTRÓNICO)
65
+ EFECTIVO (CARGO A CUENTA)
66
+ PEDIMENTO: ADUANA:
67
+ FECHA DE PAGO:
68
+ 0326 0132 XMP1 4914 6243
69
+ 6000079 640
70
+ 17/03/2026 $989
71
+ Banco Nacional de México, S.A.
72
+ 00000000703543
73
+ 3458
74
+ 40124170320261403012
75
+ NUMERO (GUIA/ORDEN EMBARQUE)/ID: 023-51315051 M 490453269837 H
76
+ NÚMERO DE ACUSE DE VALOR COVE268074HT1
77
+ NÚMERO DE E-DOCUMENT: 0438261DOG9W3 01702619TYEU7
78
+ OBSERVACIONES
79
+ FACTURA DE ACUERDO AL ARTÍCULO 36-A DE LA LEY ADUANERA VIGENTE Y A LA REGLA 3.1.
80
+ 8. DE LAS REGLAS
81
+ GENERALES DE COMERCIO EXTERIOR VIGENTES.
82
+ SE TRANSMITE PREVIAMENTE A VENTANILLA DIGITAL CONFORME A LA REGLA 1.9.18. DE LAS
83
+ REGLAS GENERALES DE
84
+ COMERCIO EXTERIOR VIGENTES.
85
+ SE EFECTÚA LA TRANSMISIÓN DIGITAL DE CONFORMIDAD A LA REGLA 3.1.17. Y 3.1.31. DE
86
+ LAS REGLAS GENERALES
87
+ DE COMERCIO EXTERIOR VIGENTES.
88
+ LA INFORMACIÓN CONTENIDA EN ESTE PEDIMENTO FUE SUMINISTRADA POR EL IMPORTADOR DE
89
+ CONFORMIDAD CON EL
90
+ ARTICULO 54 DE LA LEY ADUANERA EN VIGOR.
91
+ SE EXIME NOM-024-SCFI-2013 EN TERMINOS DEL NUMERAL 10, FRACC. X INCISO H, IMPORT
92
+ ACIÓN DEFINITIVA,
93
+ TRATÁNDOSE DE IMPORTADORES QUE CUENTEN CON UN PROSEC.
94
+ SE EXIME NOM-003-SCFI-2014 EN TERMINOS DEL NUMERAL 10, FRACC. X INCISO H, IMPORT
95
+ ACIÓN DEFINITIVA,
96
+ TRATÁNDOSE DE IMPORTADORES QUE CUENTEN CON UN PROSEC.
97
+ JOAQUIN GOMEZ ABAD
98
+ AGENTE ADUANAL, AGENCIA ADUANAL, APODERADO ADUANAL O DE ALMACEN
99
+ NOMBRE O RAZ. SOC.:
100
+ RFC: GAA1003111U6 GOAJ641219HDFMBQ09 CURP:
101
+ e.firma:
102
+ NUMERO DE SERIE DEL CERTIFICADO: 00001000000705949781
103
+ GOAJ641219QT5 RFC:
104
+ DECLARO BAJO PROTESTA DE DECIR VERDAD, EN LOS TERMINOS
105
+ DE LO DISPUESTO ARTICULO 81 DE LA LEY: PATENTE O
106
+ AUTORIZACIÓN: 3458 GOMEZ ABAD ASESORES EN COMERCIO EXTERIOR S.C.
107
+ FORMA SIMPLIFICADA DEL PEDIMENTO
108
+ SEGUNDA COPIA: IMPORTADOR EXPORTADOR DESTINO/ORIGEN: INTERIOR DEL PAÍS
109
+ REF: NQR26-079 Página 1 de 2
110
+
111
+ -- 1 of 2 --
112
+
113
+ FORMA SIMPLIFICADA DEL PEDIMENTO
114
+ SEGUNDA COPIA: IMPORTADOR EXPORTADOR DESTINO/ORIGEN: INTERIOR DEL PAÍS
115
+ REF: NQR26-079 Página 1 de 2
116
+ A1 CVE. PEDIMENTO: IMP T. OPER 26 64 3458 6000079 NUM. PEDIMENTO:
117
+ CURP:
118
+ RFC: CSM9204097Q1
119
+ ****** ****** ********** ********** FIN DE PEDIMENTO NUM. TOTAL DE PARTID
120
+ AS: CLAVE PREVALIDADOR: 010 1
121
+ ANEXO DEL PEDIMENTO
122
+ SEGUNDA COPIA: IMPORTADOR EXPORTADOR DESTINO/ORIGEN: INTERIOR DEL PAÍS
123
+ REF: NQR26-079 Página 2 de 2`;
124
+
125
+ describe('pedimento_simplificado matcher — DEL PEDIMENTO variant', () => {
126
+ it('dispatcher resolves NQR26-079 (DEL PEDIMENTO) as pedimento_simplificado', () => {
127
+ const [detectedType, , pedimento] = extractDocumentFields(
128
+ SIMP_DEL_NQR26079_TEXT,
129
+ 'pdf',
130
+ '/scans/CSM9204097Q1/NQR26-079.pdf',
131
+ );
132
+
133
+ // Regression: previously this resolved to `factura_comercial` because
134
+ // (1) the title regex demanded "DE PEDIMENTO" (this PDF says "DEL") and
135
+ // (2) the header trio required a colon after "T. OPER" (this PDF omits it).
136
+ expect(detectedType).toBe('pedimento_simplificado');
137
+ expect(pedimento).toBe('266434586000079');
138
+ });
139
+ });
140
+
141
+ describe('pedimento_simplificado matcher — header trio without colon after T. OPER', () => {
142
+ // Minimal text: title is the canonical "DE PEDIMENTO" so the fast path
143
+ // does NOT apply; only the fallback that requires the header trio runs.
144
+ // The trio MUST tolerate "T. OPER" without a trailing colon, because
145
+ // many printable PDFs render OPER as a column header (value in next cell).
146
+ const FALLBACK_TEXT = `FORMA SIMPLIFICADA DE PEDIMENTO
147
+ NUM. PEDIMENTO: 22 07 3429 2002089 T. OPER IMP CVE. PEDIMENTO: A1
148
+ DATOS DEL IMPORTADOR
149
+ PATENTE: 3429 PEDIMENTO: 2002089 ADUANA: 070
150
+ FECHA DE PAGO: 01/02/2023`;
151
+
152
+ it('resolves via fast-path "FORMA SIMPLIFICADA DE PEDIMENTO" header', () => {
153
+ const [detectedType] = extractDocumentFields(
154
+ FALLBACK_TEXT,
155
+ 'pdf',
156
+ '/scans/SAMPLE/pedimento.pdf',
157
+ );
158
+ expect(detectedType).toBe('pedimento_simplificado');
159
+ });
160
+ });
161
+
162
+ describe('pedimento_simplificado matcher — title accepts both DE and DEL', () => {
163
+ // Same minimal body, only the title differs. Both variants are produced
164
+ // by different prevalidators / agencias in the wild, and BOTH must
165
+ // resolve to pedimento_simplificado.
166
+ const body = `
167
+ NUM. PEDIMENTO: 22 07 3429 2002089 T. OPER IMP CVE. PEDIMENTO: A1
168
+ DATOS DEL IMPORTADOR
169
+ PATENTE: 3429 PEDIMENTO: 2002089 ADUANA: 070
170
+ FECHA DE PAGO: 01/02/2023`;
171
+
172
+ it.each([
173
+ ['FORMA SIMPLIFICADA DE PEDIMENTO', 'pedimento_simplificado'],
174
+ ['FORMA SIMPLIFICADA DEL PEDIMENTO', 'pedimento_simplificado'],
175
+ ['forma simplificada de pedimento', 'pedimento_simplificado'], // case-insensitive
176
+ ['FORMA SIMPLIFICADA DEL PEDIMENTO', 'pedimento_simplificado'], // extra spaces
177
+ ])('title "%s" resolves to %s', (title, expected) => {
178
+ const [detectedType] = extractDocumentFields(
179
+ `${title}\n${body}`,
180
+ 'pdf',
181
+ '/scans/SAMPLE/pedimento.pdf',
182
+ );
183
+ expect(detectedType).toBe(expected);
184
+ });
185
+ });