@arela/uploader 1.0.22 → 1.0.24

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,218 @@
1
+ /**
2
+ * Unit tests for the factura_inter_agencia matcher.
3
+ *
4
+ * Verifies that NORCOM↔PALCO CFDIs (XML and printable PDF text) are
5
+ * detected as `factura_inter_agencia`, and that ordinary CFDIs are NOT
6
+ * mis-classified.
7
+ */
8
+ import { describe, it, expect } from '@jest/globals';
9
+
10
+ import {
11
+ facturaInterAgenciaDefinition,
12
+ INTER_AGENCIA_RFCS,
13
+ } from '../../src/document-types/factura-inter-agencia.js';
14
+ import { extractDocumentFields } from '../../src/document-type-shared.js';
15
+
16
+ const NORCOM_RFC = 'NAA120215F20';
17
+ const PALCO_RFC = 'PCC1008161WA';
18
+
19
+ // Realistic CFDI 4.0 XML between NORCOM (emisor) and PALCO (receptor).
20
+ // Conceptos use ClaveProdServ 78141502 (servicios de agentes aduaneros).
21
+ const CFDI_XML_INTER_AGENCIA = `<?xml version="1.0" encoding="utf-8"?>
22
+ <cfdi:Comprobante xmlns:cfdi="http://www.sat.gob.mx/cfd/4" Version="4.0" Folio="012749"
23
+ TipoDeComprobante="I" SubTotal="3000.00" Total="3480.00" Moneda="MXN">
24
+ <cfdi:Emisor Rfc="${NORCOM_RFC}" Nombre="NORCOM AGENTES ADUANALES" RegimenFiscal="601"/>
25
+ <cfdi:Receptor Rfc="${PALCO_RFC}" Nombre="PALCO, CONSORCIO DE COMERCIO INTERNACIONAL"
26
+ DomicilioFiscalReceptor="32380" RegimenFiscalReceptor="601" UsoCFDI="G03"/>
27
+ <cfdi:Conceptos>
28
+ <cfdi:Concepto ClaveProdServ="78141502" NoIdentificacion="HONO" Cantidad="1.00"
29
+ ClaveUnidad="E48" Unidad="Unidad de servicio" Descripcion="HONORARIOS"
30
+ ValorUnitario="1300.00" Importe="1300.00" ObjetoImp="02"/>
31
+ <cfdi:Concepto ClaveProdServ="78141502" NoIdentificacion="VALID" Cantidad="1.00"
32
+ ClaveUnidad="E48" Unidad="Unidad de servicio" Descripcion="VALIDACION"
33
+ ValorUnitario="200.00" Importe="200.00" ObjetoImp="02"/>
34
+ </cfdi:Conceptos>
35
+ </cfdi:Comprobante>`;
36
+
37
+ // Same agencies but conceptos do NOT use 78141502 — should NOT match.
38
+ const CFDI_XML_INTER_AGENCIA_WRONG_CONCEPT = CFDI_XML_INTER_AGENCIA.replace(
39
+ /78141502/g,
40
+ '90121502',
41
+ );
42
+
43
+ // CFDI between unrelated taxpayers — should NOT match.
44
+ const CFDI_XML_REGULAR = `<?xml version="1.0" encoding="utf-8"?>
45
+ <cfdi:Comprobante xmlns:cfdi="http://www.sat.gob.mx/cfd/4" Version="4.0" Folio="000123"
46
+ TipoDeComprobante="I" SubTotal="100.00" Total="116.00">
47
+ <cfdi:Emisor Rfc="ACME010101AB1" Nombre="ACME COMERCIAL" RegimenFiscal="601"/>
48
+ <cfdi:Receptor Rfc="XYZ020202CD2" Nombre="CLIENTE FINAL"
49
+ DomicilioFiscalReceptor="00000" RegimenFiscalReceptor="601" UsoCFDI="G03"/>
50
+ <cfdi:Conceptos>
51
+ <cfdi:Concepto ClaveProdServ="78141502" NoIdentificacion="ITEM" Cantidad="1.00"
52
+ ClaveUnidad="E48" Unidad="Unidad de servicio" Descripcion="SERVICIO"
53
+ ValorUnitario="100.00" Importe="100.00" ObjetoImp="02"/>
54
+ </cfdi:Conceptos>
55
+ </cfdi:Comprobante>`;
56
+
57
+ // Text extracted from the printable PDF representation of a CFDI inter-agencia.
58
+ // Mirrors what pdf-parse returns for the sample SICINGR70-012749(...).pdf.
59
+ const CFDI_PDF_TEXT_INTER_AGENCIA = `NORCOM AGENTES ADUANALES S.C
60
+ Tipo de Comprobante: (I) Ingreso
61
+ Folio Fiscal 84FC9CE2-00D5-4843-B377-B463321F9FC6
62
+ Numero Folio 012749
63
+ Emisor
64
+ RFC ${NORCOM_RFC}
65
+ Razon Social NORCOM AGENTES ADUANALES
66
+ Receptor
67
+ RFC ${PALCO_RFC}
68
+ Razon Social PALCO, CONSORCIO DE COMERCIO INTERNACIONAL
69
+ Pedimento: 3458 6000046 Fecha: 17/02/2026 Tipo: EXP Clave: A1
70
+ Erogaciones
71
+ 78141502 HONO HONORARIOS 1,300.00
72
+ 78141502 SERCOM SERVICIOS COMPLEMENTARIOS 1,500.00
73
+ 78141502 VALID VALIDACION 200.00
74
+ Sello Digital del CFDI
75
+ c4oBJ8/zAol0zg1jVe4MK8...
76
+ Cadena Original del Complemento de Certificacion Digital del SAT
77
+ ||4.0|012749|...
78
+ Este documento es una representación impresa de un CFDI`;
79
+
80
+ describe('factura_inter_agencia matcher', () => {
81
+ describe('configured RFC set', () => {
82
+ it('includes NORCOM and PALCO RFCs', () => {
83
+ expect(INTER_AGENCIA_RFCS).toContain(NORCOM_RFC);
84
+ expect(INTER_AGENCIA_RFCS).toContain(PALCO_RFC);
85
+ });
86
+ });
87
+
88
+ describe('match()', () => {
89
+ it('matches a NORCOM→PALCO XML CFDI with broker-service conceptos', () => {
90
+ expect(facturaInterAgenciaDefinition.match(CFDI_XML_INTER_AGENCIA)).toBe(
91
+ true,
92
+ );
93
+ });
94
+
95
+ it('matches the PDF-text representation of the same CFDI', () => {
96
+ expect(
97
+ facturaInterAgenciaDefinition.match(CFDI_PDF_TEXT_INTER_AGENCIA),
98
+ ).toBe(true);
99
+ });
100
+
101
+ it('does NOT match when ClaveProdServ is not 78141502', () => {
102
+ expect(
103
+ facturaInterAgenciaDefinition.match(
104
+ CFDI_XML_INTER_AGENCIA_WRONG_CONCEPT,
105
+ ),
106
+ ).toBe(false);
107
+ });
108
+
109
+ it('does NOT match a CFDI between unrelated taxpayers', () => {
110
+ expect(facturaInterAgenciaDefinition.match(CFDI_XML_REGULAR)).toBe(false);
111
+ });
112
+
113
+ it('does NOT match arbitrary non-CFDI text containing the RFCs', () => {
114
+ const text = `Reporte interno
115
+ RFC emisor: ${NORCOM_RFC}
116
+ RFC cliente: ${PALCO_RFC}
117
+ Clave 78141502`;
118
+ // No CFDI markers → should not match.
119
+ expect(facturaInterAgenciaDefinition.match(text)).toBe(false);
120
+ });
121
+
122
+ it('does NOT match if only one of the configured RFCs is present', () => {
123
+ const text = CFDI_XML_INTER_AGENCIA.replace(PALCO_RFC, 'XYZ020202CD2');
124
+ expect(facturaInterAgenciaDefinition.match(text)).toBe(false);
125
+ });
126
+ });
127
+
128
+ describe('extractors', () => {
129
+ it('extracts emisor + receptor RFCs from XML', () => {
130
+ const rfcEmisor = facturaInterAgenciaDefinition.extractors
131
+ .find((e) => e.field === 'rfcEmisor')
132
+ .extract(CFDI_XML_INTER_AGENCIA);
133
+ const rfcReceptor = facturaInterAgenciaDefinition.extractors
134
+ .find((e) => e.field === 'rfcReceptor')
135
+ .extract(CFDI_XML_INTER_AGENCIA);
136
+
137
+ expect(rfcEmisor.found).toBe(true);
138
+ expect(rfcEmisor.value).toBe(NORCOM_RFC);
139
+ expect(rfcReceptor.found).toBe(true);
140
+ expect(rfcReceptor.value).toBe(PALCO_RFC);
141
+ });
142
+
143
+ it('extracts both RFCs from PDF text via fallback', () => {
144
+ const rfcEmisor = facturaInterAgenciaDefinition.extractors
145
+ .find((e) => e.field === 'rfcEmisor')
146
+ .extract(CFDI_PDF_TEXT_INTER_AGENCIA);
147
+ const rfcReceptor = facturaInterAgenciaDefinition.extractors
148
+ .find((e) => e.field === 'rfcReceptor')
149
+ .extract(CFDI_PDF_TEXT_INTER_AGENCIA);
150
+
151
+ expect(rfcEmisor.found).toBe(true);
152
+ expect(rfcReceptor.found).toBe(true);
153
+ // Order is the order of first appearance in the document.
154
+ const found = [rfcEmisor.value, rfcReceptor.value].sort();
155
+ expect(found).toEqual([NORCOM_RFC, PALCO_RFC].sort());
156
+ });
157
+
158
+ it('extracts the UUID (folio fiscal) from both formats', () => {
159
+ const uuidExtractor = facturaInterAgenciaDefinition.extractors.find(
160
+ (e) => e.field === 'uuid',
161
+ );
162
+
163
+ const fromXml = uuidExtractor.extract(CFDI_XML_INTER_AGENCIA);
164
+ // XML sample has no UUID inside the comprobante body — that's fine.
165
+ expect(fromXml.found).toBe(false);
166
+
167
+ const fromPdf = uuidExtractor.extract(CFDI_PDF_TEXT_INTER_AGENCIA);
168
+ expect(fromPdf.found).toBe(true);
169
+ expect(fromPdf.value).toBe('84FC9CE2-00D5-4843-B377-B463321F9FC6');
170
+ });
171
+
172
+ it('extracts numPedimento from the printable PDF "Pedimento:" line', () => {
173
+ const numExtractor = facturaInterAgenciaDefinition.extractors.find(
174
+ (e) => e.field === 'numPedimento',
175
+ );
176
+ const result = numExtractor.extract(CFDI_PDF_TEXT_INTER_AGENCIA);
177
+ expect(result.found).toBe(true);
178
+ expect(result.value).toBe('34586000046');
179
+ });
180
+
181
+ it('extracts the CFDI folio from XML attribute', () => {
182
+ const folio = facturaInterAgenciaDefinition.extractors
183
+ .find((e) => e.field === 'folio')
184
+ .extract(CFDI_XML_INTER_AGENCIA);
185
+ expect(folio.found).toBe(true);
186
+ expect(folio.value).toBe('012749');
187
+ });
188
+ });
189
+
190
+ describe('registry order (factura_inter_agencia precedes facturas_comerciales)', () => {
191
+ it('resolves the inter-agency CFDI XML to factura_inter_agencia, not factura_comercial', () => {
192
+ const [detectedType] = extractDocumentFields(
193
+ CFDI_XML_INTER_AGENCIA,
194
+ 'xml',
195
+ '/tmp/SICINGR70-012749(PALCO).XML',
196
+ );
197
+ expect(detectedType).toBe('factura_inter_agencia');
198
+ });
199
+
200
+ it('resolves the inter-agency CFDI PDF text to factura_inter_agencia', () => {
201
+ const [detectedType] = extractDocumentFields(
202
+ CFDI_PDF_TEXT_INTER_AGENCIA,
203
+ 'pdf',
204
+ '/tmp/SICINGR70-012749(PALCO).pdf',
205
+ );
206
+ expect(detectedType).toBe('factura_inter_agencia');
207
+ });
208
+
209
+ it('falls through to factura_comercial for a regular CFDI', () => {
210
+ const [detectedType] = extractDocumentFields(
211
+ CFDI_XML_REGULAR,
212
+ 'xml',
213
+ '/tmp/regular-invoice.xml',
214
+ );
215
+ expect(detectedType).toBe('factura_comercial');
216
+ });
217
+ });
218
+ });
@@ -0,0 +1,271 @@
1
+ /**
2
+ * Regression tests for the pedimento_completo_xml matcher.
3
+ *
4
+ * Covers:
5
+ * 1) Basic detection + arela_path composition from a VUCEM
6
+ * `consultarPedimentoCompletoRespuesta` XML.
7
+ * 2) **YY truth source** — when the pedimento is opened in one year and
8
+ * paid in the next (e.g. presentation 2025-12, payment 2026-01), the
9
+ * 15-digit pedimento MUST keep the presentation year (`25...`), not the
10
+ * payment year (`26...`). This matches what VUCEM stamps in the
11
+ * filename and what the PDF matchers produce.
12
+ * 3) **Aduana padding** — VUCEM returns the aduana code without leading
13
+ * zeros (e.g. `70` for Ciudad Juárez instead of the canonical `070`).
14
+ * The 2-digit "sección aduanera" prefix used inside the 15-digit
15
+ * pedimento is the first 2 digits of the 3-digit form (`70` → `07`).
16
+ * 4) **numPedimento backfill** — the XML matcher composes numPedimento
17
+ * externally via `extractNumPedimento` rather than as a field
18
+ * extractor. `extractDocumentFields` must backfill it so that
19
+ * `composeArelaPath` can find it.
20
+ * 5) Resolution to `proforma_completo_xml` when no payment evidence
21
+ * exists in the body.
22
+ */
23
+ import { describe, it, expect } from '@jest/globals';
24
+
25
+ import { extractDocumentFields } from '../../src/document-type-shared.js';
26
+ import { composeArelaPath } from '../../src/file-detection.js';
27
+
28
+ // ---------------------------------------------------------------------------
29
+ // Test fixtures
30
+ // ---------------------------------------------------------------------------
31
+
32
+ /**
33
+ * Build a minimal VUCEM consultarPedimentoCompletoRespuesta XML.
34
+ * Only the tags the matcher actually reads are included.
35
+ */
36
+ function buildXml({
37
+ rfc = 'CEM090106MU3',
38
+ pedimento = '5063036',
39
+ claveDocumento = 'V1',
40
+ tipoOperacionDesc = 'Exportacion',
41
+ aduanaClave = '70',
42
+ presentationDate = '2025-12-01-06:00',
43
+ paymentDate = '2026-01-07-06:00',
44
+ rectFechaPago = null,
45
+ facturas = ['V1-FUJIKURA MEX-202512'],
46
+ edDocs = [],
47
+ } = {}) {
48
+ const fechas = [];
49
+ if (presentationDate) {
50
+ fechas.push(
51
+ `<ns2:fechas><ns2:fecha>${presentationDate}</ns2:fecha><ns2:tipo><ns2:clave>5</ns2:clave><ns2:descripcion>FECHA DE PRESENTACION</ns2:descripcion></ns2:tipo></ns2:fechas>`,
52
+ );
53
+ }
54
+ if (paymentDate) {
55
+ fechas.push(
56
+ `<ns2:fechas><ns2:fecha>${paymentDate}</ns2:fecha><ns2:tipo><ns2:clave>2</ns2:clave><ns2:descripcion>FECHA DE PAGO</ns2:descripcion></ns2:tipo></ns2:fechas>`,
57
+ );
58
+ }
59
+
60
+ const rect = rectFechaPago
61
+ ? `<ns2:rectificacion><ns2:fechaPago>${rectFechaPago}</ns2:fechaPago></ns2:rectificacion>`
62
+ : '';
63
+
64
+ const facturasXml = facturas
65
+ .map(
66
+ (num) =>
67
+ `<ns2:facturas><ns2:numero>${num}</ns2:numero></ns2:facturas>`,
68
+ )
69
+ .join('');
70
+
71
+ const identificadoresXml =
72
+ edDocs.length === 0
73
+ ? ''
74
+ : `<ns2:identificadores>${edDocs
75
+ .map(
76
+ (code) =>
77
+ `<ns2:identificadores><claveIdentificador><clave>ED</clave></claveIdentificador><complemento1>${code}</complemento1></ns2:identificadores>`,
78
+ )
79
+ .join('')}</ns2:identificadores>`;
80
+
81
+ return `<?xml version="1.0" encoding="UTF-8"?>
82
+ <S:Envelope xmlns:S="http://schemas.xmlsoap.org/soap/envelope/">
83
+ <S:Body>
84
+ <ns2:consultarPedimentoCompletoRespuesta xmlns:ns2="http://x">
85
+ <ns2:pedimento>
86
+ <ns2:pedimento>${pedimento}</ns2:pedimento>
87
+ <ns2:encabezado>
88
+ <ns2:claveDocumento><ns2:clave>${claveDocumento}</ns2:clave></ns2:claveDocumento>
89
+ <ns2:tipoOperacion><ns2:clave>2</ns2:clave><ns2:descripcion>${tipoOperacionDesc}</ns2:descripcion></ns2:tipoOperacion>
90
+ <ns2:aduanaEntradaSalida><ns2:clave>${aduanaClave}</ns2:clave></ns2:aduanaEntradaSalida>
91
+ </ns2:encabezado>
92
+ <ns2:importadorExportador>
93
+ <ns2:rfc>${rfc}</ns2:rfc>
94
+ ${fechas.join('\n')}
95
+ </ns2:importadorExportador>
96
+ ${rect}
97
+ ${facturasXml}
98
+ ${identificadoresXml}
99
+ </ns2:pedimento>
100
+ </ns2:consultarPedimentoCompletoRespuesta>
101
+ </S:Body>
102
+ </S:Envelope>`;
103
+ }
104
+
105
+ // ---------------------------------------------------------------------------
106
+ // Tests
107
+ // ---------------------------------------------------------------------------
108
+
109
+ describe('pedimento_completo_xml matcher', () => {
110
+ it('detects, extracts, and composes arela_path for a basic export pedimento', () => {
111
+ const xml = buildXml({
112
+ rfc: 'CEM090106MU3',
113
+ pedimento: '5063036',
114
+ aduanaClave: '70',
115
+ presentationDate: '2025-06-15-06:00',
116
+ paymentDate: '2025-06-20-06:00',
117
+ });
118
+ // 15-digit filename pattern: YY=25 AA=07 PPPP=3429 NNNNNNN=5063036
119
+ const filePath = '/x/2025/250734295063036_250734295063036.xml';
120
+
121
+ const [type, fields, ped, year] = extractDocumentFields(
122
+ xml,
123
+ 'xml',
124
+ filePath,
125
+ );
126
+
127
+ expect(type).toBe('pedimento_completo_xml');
128
+ expect(ped).toBe('250734295063036');
129
+ expect(year).toBe(2025);
130
+ expect(fields.find((f) => f.name === 'rfc')?.value).toBe('CEM090106MU3');
131
+ expect(fields.find((f) => f.name === 'aduanaEntradaSalida')?.value).toBe(
132
+ '07',
133
+ );
134
+ // Backfill check: numPedimento must be exposed as a field so
135
+ // composeArelaPath can find it.
136
+ expect(fields.find((f) => f.name === 'numPedimento')?.value).toBe(
137
+ '250734295063036',
138
+ );
139
+
140
+ const arela = composeArelaPath(type, fields, year, filePath);
141
+ expect(arela).toBe('CEM090106MU3/2025/3429/07/250734295063036/');
142
+ });
143
+
144
+ it('uses presentation date (not payment date) for YY when payment crosses calendar year', () => {
145
+ // Pedimento opened Dec 2025, paid Jan 2026 — the YY must be 25.
146
+ const xml = buildXml({
147
+ pedimento: '5063036',
148
+ aduanaClave: '70',
149
+ presentationDate: '2025-12-01-06:00',
150
+ paymentDate: '2026-01-07-06:00',
151
+ });
152
+ // Use the 3-part filename pattern (no YY in filename) so YY comes from XML body.
153
+ const filePath = '/x/070-3429-5063036.xml';
154
+
155
+ const [type, , ped, year] = extractDocumentFields(xml, 'xml', filePath);
156
+
157
+ expect(type).toBe('pedimento_completo_xml');
158
+ expect(ped).toBe('250734295063036');
159
+ expect(year).toBe(2025);
160
+ });
161
+
162
+ it('falls back to payment date YY when presentation date is missing', () => {
163
+ const xml = buildXml({
164
+ pedimento: '5063036',
165
+ aduanaClave: '70',
166
+ presentationDate: null, // No clave=5 block
167
+ paymentDate: '2026-01-07-06:00',
168
+ });
169
+ const filePath = '/x/070-3429-5063036.xml';
170
+
171
+ const [, , ped, year] = extractDocumentFields(xml, 'xml', filePath);
172
+
173
+ expect(ped).toBe('260734295063036');
174
+ expect(year).toBe(2026);
175
+ });
176
+
177
+ it('prefers filename YY over body fechas (VUCEM-stamped truth)', () => {
178
+ // Filename says YY=24 but body has presentation=2025. Filename wins.
179
+ const xml = buildXml({
180
+ pedimento: '5063036',
181
+ aduanaClave: '70',
182
+ presentationDate: '2025-12-01-06:00',
183
+ paymentDate: '2026-01-07-06:00',
184
+ });
185
+ const filePath = '/x/240734295063036_240734295063036.xml';
186
+
187
+ const [, , ped, year] = extractDocumentFields(xml, 'xml', filePath);
188
+
189
+ expect(ped).toBe('240734295063036');
190
+ expect(year).toBe(2024);
191
+ });
192
+
193
+ it('pads VUCEM aduana correctly: 70 -> 07, 750 -> 75, 40 -> 04', () => {
194
+ const cases = [
195
+ { aduanaClave: '70', expected: '07', // Cd. Juárez (3-digit canonical: 070)
196
+ filename: '/x/070-3429-5000001.xml' },
197
+ { aduanaClave: '750', expected: '75', // Puebla
198
+ filename: '/x/750-3429-5000002.xml' },
199
+ { aduanaClave: '40', expected: '04', // Lázaro Cárdenas (canonical: 040)
200
+ filename: '/x/040-3429-5000003.xml' },
201
+ ];
202
+
203
+ for (const c of cases) {
204
+ const xml = buildXml({
205
+ pedimento: c.filename.match(/-(\d{7})\.xml$/)[1],
206
+ aduanaClave: c.aduanaClave,
207
+ presentationDate: '2025-06-15-06:00',
208
+ paymentDate: '2025-06-20-06:00',
209
+ });
210
+ const [, fields, ped] = extractDocumentFields(xml, 'xml', c.filename);
211
+ expect(fields.find((f) => f.name === 'aduanaEntradaSalida')?.value).toBe(
212
+ c.expected,
213
+ );
214
+ // Positions 2-3 of the composed 15-digit pedimento must equal the
215
+ // aduana prefix.
216
+ expect(ped.substring(2, 4)).toBe(c.expected);
217
+ }
218
+ });
219
+
220
+ it('resolves to proforma_completo_xml when no payment evidence exists', () => {
221
+ const xml = buildXml({
222
+ pedimento: '5063036',
223
+ aduanaClave: '70',
224
+ presentationDate: '2025-12-01-06:00',
225
+ paymentDate: null, // No payment, no rectificacion
226
+ });
227
+ const filePath = '/x/070-3429-5063036.xml';
228
+
229
+ const [type] = extractDocumentFields(xml, 'xml', filePath);
230
+
231
+ expect(type).toBe('proforma_completo_xml');
232
+ });
233
+
234
+ it('extracts cove and rfc correctly', () => {
235
+ const xml = buildXml({
236
+ rfc: 'CEM090106MU3',
237
+ facturas: ['V1-FUJIKURA MEX-202512', 'INV-2'],
238
+ });
239
+ const filePath = '/x/250734295063036_250734295063036.xml';
240
+
241
+ const [, fields] = extractDocumentFields(xml, 'xml', filePath);
242
+
243
+ expect(fields.find((f) => f.name === 'rfc')?.value).toBe('CEM090106MU3');
244
+ expect(fields.find((f) => f.name === 'cove')?.value).toBe(
245
+ '[V1-FUJIKURA MEX-202512,INV-2]',
246
+ );
247
+ });
248
+
249
+ it('returns null arela_path when filename is unrecognized (no patente)', () => {
250
+ const xml = buildXml({
251
+ pedimento: '5063036',
252
+ aduanaClave: '70',
253
+ presentationDate: '2025-06-15-06:00',
254
+ paymentDate: '2025-06-20-06:00',
255
+ });
256
+ // Unrecognized filename — no patente derivable.
257
+ const filePath = '/x/random_name.xml';
258
+
259
+ const [type, fields, ped, year] = extractDocumentFields(
260
+ xml,
261
+ 'xml',
262
+ filePath,
263
+ );
264
+
265
+ expect(type).toBe('pedimento_completo_xml');
266
+ expect(ped).toBeNull();
267
+ expect(year).toBeNull();
268
+ // composeArelaPath returns null because patente is missing.
269
+ expect(composeArelaPath(type, fields, year, filePath)).toBeNull();
270
+ });
271
+ });
@@ -0,0 +1,185 @@
1
+ /**
2
+ * Regression tests for the pedimento_simplificado matcher.
3
+ *
4
+ * Covers PDF layouts where:
5
+ * - The header reads "FORMA SIMPLIFICADA DEL PEDIMENTO" (with DEL),
6
+ * not the canonical "DE PEDIMENTO".
7
+ * - The header trio prints "T. OPER" WITHOUT a trailing colon
8
+ * (the value sits in a separate table cell).
9
+ *
10
+ * Real-world example: REF NQR26-079, Aduana 640 (Querétaro), patente 3458.
11
+ * Before this regression test, the matcher fell through to
12
+ * `facturas_comerciales` because the word "FACTURA" appears in the
13
+ * "OBSERVACIONES" block.
14
+ */
15
+ import { describe, it, expect } from '@jest/globals';
16
+
17
+ // Importing only the dispatcher avoids circular-init issues caused by
18
+ // `_pedimento-shared-extractors.js` pulling FieldResult from document-type-shared.
19
+ import { extractDocumentFields } from '../../src/document-type-shared.js';
20
+
21
+ // REAL pdf-parse output from the NQR26-079 simplificado PDF
22
+ // (CSM9204097Q1, patente 3458, aduana 640).
23
+ // Captured verbatim with `PDFParse({data}).getText()` — pdf-parse extracts
24
+ // table cells out of visual order, so labels and values often live on
25
+ // different lines (see the FECHA DE PAGO block: label appears, then a few
26
+ // unrelated cells, then the date sits on its own line with the importe).
27
+ // This is exactly what the matchers and extractors see in production.
28
+ const SIMP_DEL_NQR26079_TEXT = `A1 CVE. PEDIMENTO: IMP T. OPER 26 64 3458 6000079 NUM. PEDIMENTO:
29
+ CERTIFICACIONES
30
+ ADUANA E/S:
31
+ DATOS DEL IMPORTADOR / EXPORTADOR
32
+ RFC: CURP:
33
+ CÓDIGO DE
34
+ ACEPTACIÓN
35
+ 640
36
+ CSM9204097Q1
37
+ FECHAS:
38
+ 17/03/2026
39
+ Ped. 6000079
40
+ CLAVE DE LA SECCION ADUANERA
41
+ DE DESPACHO:
42
+ QUERETARO, EL MARQUES Y
43
+ COLON, QUERETARO.
44
+ 640
45
+ DESTINO: 9 PESO BRUTO: 5.350
46
+ MARCAS,NUMEROS Y TOTAL DE BULTOS: 1
47
+ 04/03/2026
48
+ ENTRADA
49
+ PAGO
50
+ 3PW4CLHE
51
+ S/M S/N
52
+ CODIGO DE BARRAS
53
+ 0326 0132 XMP1 4914 6243 989
54
+ *** PAGO ELECTRONICO ***
55
+ DEPÓSITO REFERENCIADO - LÍNEA DE CAPTURA
56
+ PATENTE:
57
+ NOMBRE DE LA INSTITUCIÓN BANCARIA:
58
+ LÍNEA DE CAPTURA:
59
+ IMPORTE PAGADO:
60
+ NÚMERO DE OPERACIÓN BANCARIA:
61
+ NÚMERO DE TRANSACCIÓN SAT:
62
+ MEDIO DE PRESENTACIÓN:
63
+ MEDIO DE RECEPCIÓN/COBRO:
64
+ OTROS MEDIOS ELECTRÓNICOS (PAGO ELECTRÓNICO)
65
+ EFECTIVO (CARGO A CUENTA)
66
+ PEDIMENTO: ADUANA:
67
+ FECHA DE PAGO:
68
+ 0326 0132 XMP1 4914 6243
69
+ 6000079 640
70
+ 17/03/2026 $989
71
+ Banco Nacional de México, S.A.
72
+ 00000000703543
73
+ 3458
74
+ 40124170320261403012
75
+ NUMERO (GUIA/ORDEN EMBARQUE)/ID: 023-51315051 M 490453269837 H
76
+ NÚMERO DE ACUSE DE VALOR COVE268074HT1
77
+ NÚMERO DE E-DOCUMENT: 0438261DOG9W3 01702619TYEU7
78
+ OBSERVACIONES
79
+ FACTURA DE ACUERDO AL ARTÍCULO 36-A DE LA LEY ADUANERA VIGENTE Y A LA REGLA 3.1.
80
+ 8. DE LAS REGLAS
81
+ GENERALES DE COMERCIO EXTERIOR VIGENTES.
82
+ SE TRANSMITE PREVIAMENTE A VENTANILLA DIGITAL CONFORME A LA REGLA 1.9.18. DE LAS
83
+ REGLAS GENERALES DE
84
+ COMERCIO EXTERIOR VIGENTES.
85
+ SE EFECTÚA LA TRANSMISIÓN DIGITAL DE CONFORMIDAD A LA REGLA 3.1.17. Y 3.1.31. DE
86
+ LAS REGLAS GENERALES
87
+ DE COMERCIO EXTERIOR VIGENTES.
88
+ LA INFORMACIÓN CONTENIDA EN ESTE PEDIMENTO FUE SUMINISTRADA POR EL IMPORTADOR DE
89
+ CONFORMIDAD CON EL
90
+ ARTICULO 54 DE LA LEY ADUANERA EN VIGOR.
91
+ SE EXIME NOM-024-SCFI-2013 EN TERMINOS DEL NUMERAL 10, FRACC. X INCISO H, IMPORT
92
+ ACIÓN DEFINITIVA,
93
+ TRATÁNDOSE DE IMPORTADORES QUE CUENTEN CON UN PROSEC.
94
+ SE EXIME NOM-003-SCFI-2014 EN TERMINOS DEL NUMERAL 10, FRACC. X INCISO H, IMPORT
95
+ ACIÓN DEFINITIVA,
96
+ TRATÁNDOSE DE IMPORTADORES QUE CUENTEN CON UN PROSEC.
97
+ JOAQUIN GOMEZ ABAD
98
+ AGENTE ADUANAL, AGENCIA ADUANAL, APODERADO ADUANAL O DE ALMACEN
99
+ NOMBRE O RAZ. SOC.:
100
+ RFC: GAA1003111U6 GOAJ641219HDFMBQ09 CURP:
101
+ e.firma:
102
+ NUMERO DE SERIE DEL CERTIFICADO: 00001000000705949781
103
+ GOAJ641219QT5 RFC:
104
+ DECLARO BAJO PROTESTA DE DECIR VERDAD, EN LOS TERMINOS
105
+ DE LO DISPUESTO ARTICULO 81 DE LA LEY: PATENTE O
106
+ AUTORIZACIÓN: 3458 GOMEZ ABAD ASESORES EN COMERCIO EXTERIOR S.C.
107
+ FORMA SIMPLIFICADA DEL PEDIMENTO
108
+ SEGUNDA COPIA: IMPORTADOR EXPORTADOR DESTINO/ORIGEN: INTERIOR DEL PAÍS
109
+ REF: NQR26-079 Página 1 de 2
110
+
111
+ -- 1 of 2 --
112
+
113
+ FORMA SIMPLIFICADA DEL PEDIMENTO
114
+ SEGUNDA COPIA: IMPORTADOR EXPORTADOR DESTINO/ORIGEN: INTERIOR DEL PAÍS
115
+ REF: NQR26-079 Página 1 de 2
116
+ A1 CVE. PEDIMENTO: IMP T. OPER 26 64 3458 6000079 NUM. PEDIMENTO:
117
+ CURP:
118
+ RFC: CSM9204097Q1
119
+ ****** ****** ********** ********** FIN DE PEDIMENTO NUM. TOTAL DE PARTID
120
+ AS: CLAVE PREVALIDADOR: 010 1
121
+ ANEXO DEL PEDIMENTO
122
+ SEGUNDA COPIA: IMPORTADOR EXPORTADOR DESTINO/ORIGEN: INTERIOR DEL PAÍS
123
+ REF: NQR26-079 Página 2 de 2`;
124
+
125
+ describe('pedimento_simplificado matcher — DEL PEDIMENTO variant', () => {
126
+ it('dispatcher resolves NQR26-079 (DEL PEDIMENTO) as pedimento_simplificado', () => {
127
+ const [detectedType, , pedimento] = extractDocumentFields(
128
+ SIMP_DEL_NQR26079_TEXT,
129
+ 'pdf',
130
+ '/scans/CSM9204097Q1/NQR26-079.pdf',
131
+ );
132
+
133
+ // Regression: previously this resolved to `factura_comercial` because
134
+ // (1) the title regex demanded "DE PEDIMENTO" (this PDF says "DEL") and
135
+ // (2) the header trio required a colon after "T. OPER" (this PDF omits it).
136
+ expect(detectedType).toBe('pedimento_simplificado');
137
+ expect(pedimento).toBe('266434586000079');
138
+ });
139
+ });
140
+
141
+ describe('pedimento_simplificado matcher — header trio without colon after T. OPER', () => {
142
+ // Minimal text: title is the canonical "DE PEDIMENTO" so the fast path
143
+ // does NOT apply; only the fallback that requires the header trio runs.
144
+ // The trio MUST tolerate "T. OPER" without a trailing colon, because
145
+ // many printable PDFs render OPER as a column header (value in next cell).
146
+ const FALLBACK_TEXT = `FORMA SIMPLIFICADA DE PEDIMENTO
147
+ NUM. PEDIMENTO: 22 07 3429 2002089 T. OPER IMP CVE. PEDIMENTO: A1
148
+ DATOS DEL IMPORTADOR
149
+ PATENTE: 3429 PEDIMENTO: 2002089 ADUANA: 070
150
+ FECHA DE PAGO: 01/02/2023`;
151
+
152
+ it('resolves via fast-path "FORMA SIMPLIFICADA DE PEDIMENTO" header', () => {
153
+ const [detectedType] = extractDocumentFields(
154
+ FALLBACK_TEXT,
155
+ 'pdf',
156
+ '/scans/SAMPLE/pedimento.pdf',
157
+ );
158
+ expect(detectedType).toBe('pedimento_simplificado');
159
+ });
160
+ });
161
+
162
+ describe('pedimento_simplificado matcher — title accepts both DE and DEL', () => {
163
+ // Same minimal body, only the title differs. Both variants are produced
164
+ // by different prevalidators / agencias in the wild, and BOTH must
165
+ // resolve to pedimento_simplificado.
166
+ const body = `
167
+ NUM. PEDIMENTO: 22 07 3429 2002089 T. OPER IMP CVE. PEDIMENTO: A1
168
+ DATOS DEL IMPORTADOR
169
+ PATENTE: 3429 PEDIMENTO: 2002089 ADUANA: 070
170
+ FECHA DE PAGO: 01/02/2023`;
171
+
172
+ it.each([
173
+ ['FORMA SIMPLIFICADA DE PEDIMENTO', 'pedimento_simplificado'],
174
+ ['FORMA SIMPLIFICADA DEL PEDIMENTO', 'pedimento_simplificado'],
175
+ ['forma simplificada de pedimento', 'pedimento_simplificado'], // case-insensitive
176
+ ['FORMA SIMPLIFICADA DEL PEDIMENTO', 'pedimento_simplificado'], // extra spaces
177
+ ])('title "%s" resolves to %s', (title, expected) => {
178
+ const [detectedType] = extractDocumentFields(
179
+ `${title}\n${body}`,
180
+ 'pdf',
181
+ '/scans/SAMPLE/pedimento.pdf',
182
+ );
183
+ expect(detectedType).toBe(expected);
184
+ });
185
+ });