@arela/uploader 1.0.22 → 1.0.24
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/scripts/scoring-compare.js +243 -0
- package/scripts/scoring-phase4-check.js +96 -0
- package/src/commands/IdentifyCommand.js +34 -6
- package/src/commands/ScanCommand.js +15 -0
- package/src/config/config.js +28 -2
- package/src/document-type-shared.js +15 -7
- package/src/document-types/_pedimento-shared-extractors.js +27 -8
- package/src/document-types/factura-inter-agencia.js +186 -0
- package/src/document-types/pedimento-completo-xml.js +62 -12
- package/src/document-types/pedimento-completo.js +5 -3
- package/src/document-types/pedimento-simplificado.js +5 -2
- package/src/document-types/proforma.js +2 -2
- package/src/file-detection.js +30 -6
- package/src/scoring/db-matcher-adapter.js +98 -0
- package/src/scoring/matchers-seed.js +386 -0
- package/src/scoring/scoring-engine.js +218 -0
- package/src/services/ScanApiService.js +14 -0
- package/tests/unit/factura-inter-agencia.test.js +218 -0
- package/tests/unit/pedimento-completo-xml-matcher.test.js +271 -0
- package/tests/unit/pedimento-simplificado-matcher.test.js +185 -0
- package/tests/unit/scoring-engine.test.js +221 -0
|
@@ -0,0 +1,218 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Unit tests for the factura_inter_agencia matcher.
|
|
3
|
+
*
|
|
4
|
+
* Verifies that NORCOM↔PALCO CFDIs (XML and printable PDF text) are
|
|
5
|
+
* detected as `factura_inter_agencia`, and that ordinary CFDIs are NOT
|
|
6
|
+
* mis-classified.
|
|
7
|
+
*/
|
|
8
|
+
import { describe, it, expect } from '@jest/globals';
|
|
9
|
+
|
|
10
|
+
import {
|
|
11
|
+
facturaInterAgenciaDefinition,
|
|
12
|
+
INTER_AGENCIA_RFCS,
|
|
13
|
+
} from '../../src/document-types/factura-inter-agencia.js';
|
|
14
|
+
import { extractDocumentFields } from '../../src/document-type-shared.js';
|
|
15
|
+
|
|
16
|
+
const NORCOM_RFC = 'NAA120215F20';
|
|
17
|
+
const PALCO_RFC = 'PCC1008161WA';
|
|
18
|
+
|
|
19
|
+
// Realistic CFDI 4.0 XML between NORCOM (emisor) and PALCO (receptor).
|
|
20
|
+
// Conceptos use ClaveProdServ 78141502 (servicios de agentes aduaneros).
|
|
21
|
+
const CFDI_XML_INTER_AGENCIA = `<?xml version="1.0" encoding="utf-8"?>
|
|
22
|
+
<cfdi:Comprobante xmlns:cfdi="http://www.sat.gob.mx/cfd/4" Version="4.0" Folio="012749"
|
|
23
|
+
TipoDeComprobante="I" SubTotal="3000.00" Total="3480.00" Moneda="MXN">
|
|
24
|
+
<cfdi:Emisor Rfc="${NORCOM_RFC}" Nombre="NORCOM AGENTES ADUANALES" RegimenFiscal="601"/>
|
|
25
|
+
<cfdi:Receptor Rfc="${PALCO_RFC}" Nombre="PALCO, CONSORCIO DE COMERCIO INTERNACIONAL"
|
|
26
|
+
DomicilioFiscalReceptor="32380" RegimenFiscalReceptor="601" UsoCFDI="G03"/>
|
|
27
|
+
<cfdi:Conceptos>
|
|
28
|
+
<cfdi:Concepto ClaveProdServ="78141502" NoIdentificacion="HONO" Cantidad="1.00"
|
|
29
|
+
ClaveUnidad="E48" Unidad="Unidad de servicio" Descripcion="HONORARIOS"
|
|
30
|
+
ValorUnitario="1300.00" Importe="1300.00" ObjetoImp="02"/>
|
|
31
|
+
<cfdi:Concepto ClaveProdServ="78141502" NoIdentificacion="VALID" Cantidad="1.00"
|
|
32
|
+
ClaveUnidad="E48" Unidad="Unidad de servicio" Descripcion="VALIDACION"
|
|
33
|
+
ValorUnitario="200.00" Importe="200.00" ObjetoImp="02"/>
|
|
34
|
+
</cfdi:Conceptos>
|
|
35
|
+
</cfdi:Comprobante>`;
|
|
36
|
+
|
|
37
|
+
// Same agencies but conceptos do NOT use 78141502 — should NOT match.
|
|
38
|
+
const CFDI_XML_INTER_AGENCIA_WRONG_CONCEPT = CFDI_XML_INTER_AGENCIA.replace(
|
|
39
|
+
/78141502/g,
|
|
40
|
+
'90121502',
|
|
41
|
+
);
|
|
42
|
+
|
|
43
|
+
// CFDI between unrelated taxpayers — should NOT match.
|
|
44
|
+
const CFDI_XML_REGULAR = `<?xml version="1.0" encoding="utf-8"?>
|
|
45
|
+
<cfdi:Comprobante xmlns:cfdi="http://www.sat.gob.mx/cfd/4" Version="4.0" Folio="000123"
|
|
46
|
+
TipoDeComprobante="I" SubTotal="100.00" Total="116.00">
|
|
47
|
+
<cfdi:Emisor Rfc="ACME010101AB1" Nombre="ACME COMERCIAL" RegimenFiscal="601"/>
|
|
48
|
+
<cfdi:Receptor Rfc="XYZ020202CD2" Nombre="CLIENTE FINAL"
|
|
49
|
+
DomicilioFiscalReceptor="00000" RegimenFiscalReceptor="601" UsoCFDI="G03"/>
|
|
50
|
+
<cfdi:Conceptos>
|
|
51
|
+
<cfdi:Concepto ClaveProdServ="78141502" NoIdentificacion="ITEM" Cantidad="1.00"
|
|
52
|
+
ClaveUnidad="E48" Unidad="Unidad de servicio" Descripcion="SERVICIO"
|
|
53
|
+
ValorUnitario="100.00" Importe="100.00" ObjetoImp="02"/>
|
|
54
|
+
</cfdi:Conceptos>
|
|
55
|
+
</cfdi:Comprobante>`;
|
|
56
|
+
|
|
57
|
+
// Text extracted from the printable PDF representation of a CFDI inter-agencia.
|
|
58
|
+
// Mirrors what pdf-parse returns for the sample SICINGR70-012749(...).pdf.
|
|
59
|
+
const CFDI_PDF_TEXT_INTER_AGENCIA = `NORCOM AGENTES ADUANALES S.C
|
|
60
|
+
Tipo de Comprobante: (I) Ingreso
|
|
61
|
+
Folio Fiscal 84FC9CE2-00D5-4843-B377-B463321F9FC6
|
|
62
|
+
Numero Folio 012749
|
|
63
|
+
Emisor
|
|
64
|
+
RFC ${NORCOM_RFC}
|
|
65
|
+
Razon Social NORCOM AGENTES ADUANALES
|
|
66
|
+
Receptor
|
|
67
|
+
RFC ${PALCO_RFC}
|
|
68
|
+
Razon Social PALCO, CONSORCIO DE COMERCIO INTERNACIONAL
|
|
69
|
+
Pedimento: 3458 6000046 Fecha: 17/02/2026 Tipo: EXP Clave: A1
|
|
70
|
+
Erogaciones
|
|
71
|
+
78141502 HONO HONORARIOS 1,300.00
|
|
72
|
+
78141502 SERCOM SERVICIOS COMPLEMENTARIOS 1,500.00
|
|
73
|
+
78141502 VALID VALIDACION 200.00
|
|
74
|
+
Sello Digital del CFDI
|
|
75
|
+
c4oBJ8/zAol0zg1jVe4MK8...
|
|
76
|
+
Cadena Original del Complemento de Certificacion Digital del SAT
|
|
77
|
+
||4.0|012749|...
|
|
78
|
+
Este documento es una representación impresa de un CFDI`;
|
|
79
|
+
|
|
80
|
+
describe('factura_inter_agencia matcher', () => {
|
|
81
|
+
describe('configured RFC set', () => {
|
|
82
|
+
it('includes NORCOM and PALCO RFCs', () => {
|
|
83
|
+
expect(INTER_AGENCIA_RFCS).toContain(NORCOM_RFC);
|
|
84
|
+
expect(INTER_AGENCIA_RFCS).toContain(PALCO_RFC);
|
|
85
|
+
});
|
|
86
|
+
});
|
|
87
|
+
|
|
88
|
+
describe('match()', () => {
|
|
89
|
+
it('matches a NORCOM→PALCO XML CFDI with broker-service conceptos', () => {
|
|
90
|
+
expect(facturaInterAgenciaDefinition.match(CFDI_XML_INTER_AGENCIA)).toBe(
|
|
91
|
+
true,
|
|
92
|
+
);
|
|
93
|
+
});
|
|
94
|
+
|
|
95
|
+
it('matches the PDF-text representation of the same CFDI', () => {
|
|
96
|
+
expect(
|
|
97
|
+
facturaInterAgenciaDefinition.match(CFDI_PDF_TEXT_INTER_AGENCIA),
|
|
98
|
+
).toBe(true);
|
|
99
|
+
});
|
|
100
|
+
|
|
101
|
+
it('does NOT match when ClaveProdServ is not 78141502', () => {
|
|
102
|
+
expect(
|
|
103
|
+
facturaInterAgenciaDefinition.match(
|
|
104
|
+
CFDI_XML_INTER_AGENCIA_WRONG_CONCEPT,
|
|
105
|
+
),
|
|
106
|
+
).toBe(false);
|
|
107
|
+
});
|
|
108
|
+
|
|
109
|
+
it('does NOT match a CFDI between unrelated taxpayers', () => {
|
|
110
|
+
expect(facturaInterAgenciaDefinition.match(CFDI_XML_REGULAR)).toBe(false);
|
|
111
|
+
});
|
|
112
|
+
|
|
113
|
+
it('does NOT match arbitrary non-CFDI text containing the RFCs', () => {
|
|
114
|
+
const text = `Reporte interno
|
|
115
|
+
RFC emisor: ${NORCOM_RFC}
|
|
116
|
+
RFC cliente: ${PALCO_RFC}
|
|
117
|
+
Clave 78141502`;
|
|
118
|
+
// No CFDI markers → should not match.
|
|
119
|
+
expect(facturaInterAgenciaDefinition.match(text)).toBe(false);
|
|
120
|
+
});
|
|
121
|
+
|
|
122
|
+
it('does NOT match if only one of the configured RFCs is present', () => {
|
|
123
|
+
const text = CFDI_XML_INTER_AGENCIA.replace(PALCO_RFC, 'XYZ020202CD2');
|
|
124
|
+
expect(facturaInterAgenciaDefinition.match(text)).toBe(false);
|
|
125
|
+
});
|
|
126
|
+
});
|
|
127
|
+
|
|
128
|
+
describe('extractors', () => {
|
|
129
|
+
it('extracts emisor + receptor RFCs from XML', () => {
|
|
130
|
+
const rfcEmisor = facturaInterAgenciaDefinition.extractors
|
|
131
|
+
.find((e) => e.field === 'rfcEmisor')
|
|
132
|
+
.extract(CFDI_XML_INTER_AGENCIA);
|
|
133
|
+
const rfcReceptor = facturaInterAgenciaDefinition.extractors
|
|
134
|
+
.find((e) => e.field === 'rfcReceptor')
|
|
135
|
+
.extract(CFDI_XML_INTER_AGENCIA);
|
|
136
|
+
|
|
137
|
+
expect(rfcEmisor.found).toBe(true);
|
|
138
|
+
expect(rfcEmisor.value).toBe(NORCOM_RFC);
|
|
139
|
+
expect(rfcReceptor.found).toBe(true);
|
|
140
|
+
expect(rfcReceptor.value).toBe(PALCO_RFC);
|
|
141
|
+
});
|
|
142
|
+
|
|
143
|
+
it('extracts both RFCs from PDF text via fallback', () => {
|
|
144
|
+
const rfcEmisor = facturaInterAgenciaDefinition.extractors
|
|
145
|
+
.find((e) => e.field === 'rfcEmisor')
|
|
146
|
+
.extract(CFDI_PDF_TEXT_INTER_AGENCIA);
|
|
147
|
+
const rfcReceptor = facturaInterAgenciaDefinition.extractors
|
|
148
|
+
.find((e) => e.field === 'rfcReceptor')
|
|
149
|
+
.extract(CFDI_PDF_TEXT_INTER_AGENCIA);
|
|
150
|
+
|
|
151
|
+
expect(rfcEmisor.found).toBe(true);
|
|
152
|
+
expect(rfcReceptor.found).toBe(true);
|
|
153
|
+
// Order is the order of first appearance in the document.
|
|
154
|
+
const found = [rfcEmisor.value, rfcReceptor.value].sort();
|
|
155
|
+
expect(found).toEqual([NORCOM_RFC, PALCO_RFC].sort());
|
|
156
|
+
});
|
|
157
|
+
|
|
158
|
+
it('extracts the UUID (folio fiscal) from both formats', () => {
|
|
159
|
+
const uuidExtractor = facturaInterAgenciaDefinition.extractors.find(
|
|
160
|
+
(e) => e.field === 'uuid',
|
|
161
|
+
);
|
|
162
|
+
|
|
163
|
+
const fromXml = uuidExtractor.extract(CFDI_XML_INTER_AGENCIA);
|
|
164
|
+
// XML sample has no UUID inside the comprobante body — that's fine.
|
|
165
|
+
expect(fromXml.found).toBe(false);
|
|
166
|
+
|
|
167
|
+
const fromPdf = uuidExtractor.extract(CFDI_PDF_TEXT_INTER_AGENCIA);
|
|
168
|
+
expect(fromPdf.found).toBe(true);
|
|
169
|
+
expect(fromPdf.value).toBe('84FC9CE2-00D5-4843-B377-B463321F9FC6');
|
|
170
|
+
});
|
|
171
|
+
|
|
172
|
+
it('extracts numPedimento from the printable PDF "Pedimento:" line', () => {
|
|
173
|
+
const numExtractor = facturaInterAgenciaDefinition.extractors.find(
|
|
174
|
+
(e) => e.field === 'numPedimento',
|
|
175
|
+
);
|
|
176
|
+
const result = numExtractor.extract(CFDI_PDF_TEXT_INTER_AGENCIA);
|
|
177
|
+
expect(result.found).toBe(true);
|
|
178
|
+
expect(result.value).toBe('34586000046');
|
|
179
|
+
});
|
|
180
|
+
|
|
181
|
+
it('extracts the CFDI folio from XML attribute', () => {
|
|
182
|
+
const folio = facturaInterAgenciaDefinition.extractors
|
|
183
|
+
.find((e) => e.field === 'folio')
|
|
184
|
+
.extract(CFDI_XML_INTER_AGENCIA);
|
|
185
|
+
expect(folio.found).toBe(true);
|
|
186
|
+
expect(folio.value).toBe('012749');
|
|
187
|
+
});
|
|
188
|
+
});
|
|
189
|
+
|
|
190
|
+
describe('registry order (factura_inter_agencia precedes facturas_comerciales)', () => {
|
|
191
|
+
it('resolves the inter-agency CFDI XML to factura_inter_agencia, not factura_comercial', () => {
|
|
192
|
+
const [detectedType] = extractDocumentFields(
|
|
193
|
+
CFDI_XML_INTER_AGENCIA,
|
|
194
|
+
'xml',
|
|
195
|
+
'/tmp/SICINGR70-012749(PALCO).XML',
|
|
196
|
+
);
|
|
197
|
+
expect(detectedType).toBe('factura_inter_agencia');
|
|
198
|
+
});
|
|
199
|
+
|
|
200
|
+
it('resolves the inter-agency CFDI PDF text to factura_inter_agencia', () => {
|
|
201
|
+
const [detectedType] = extractDocumentFields(
|
|
202
|
+
CFDI_PDF_TEXT_INTER_AGENCIA,
|
|
203
|
+
'pdf',
|
|
204
|
+
'/tmp/SICINGR70-012749(PALCO).pdf',
|
|
205
|
+
);
|
|
206
|
+
expect(detectedType).toBe('factura_inter_agencia');
|
|
207
|
+
});
|
|
208
|
+
|
|
209
|
+
it('falls through to factura_comercial for a regular CFDI', () => {
|
|
210
|
+
const [detectedType] = extractDocumentFields(
|
|
211
|
+
CFDI_XML_REGULAR,
|
|
212
|
+
'xml',
|
|
213
|
+
'/tmp/regular-invoice.xml',
|
|
214
|
+
);
|
|
215
|
+
expect(detectedType).toBe('factura_comercial');
|
|
216
|
+
});
|
|
217
|
+
});
|
|
218
|
+
});
|
|
@@ -0,0 +1,271 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Regression tests for the pedimento_completo_xml matcher.
|
|
3
|
+
*
|
|
4
|
+
* Covers:
|
|
5
|
+
* 1) Basic detection + arela_path composition from a VUCEM
|
|
6
|
+
* `consultarPedimentoCompletoRespuesta` XML.
|
|
7
|
+
* 2) **YY truth source** — when the pedimento is opened in one year and
|
|
8
|
+
* paid in the next (e.g. presentation 2025-12, payment 2026-01), the
|
|
9
|
+
* 15-digit pedimento MUST keep the presentation year (`25...`), not the
|
|
10
|
+
* payment year (`26...`). This matches what VUCEM stamps in the
|
|
11
|
+
* filename and what the PDF matchers produce.
|
|
12
|
+
* 3) **Aduana padding** — VUCEM returns the aduana code without leading
|
|
13
|
+
* zeros (e.g. `70` for Ciudad Juárez instead of the canonical `070`).
|
|
14
|
+
* The 2-digit "sección aduanera" prefix used inside the 15-digit
|
|
15
|
+
* pedimento is the first 2 digits of the 3-digit form (`70` → `07`).
|
|
16
|
+
* 4) **numPedimento backfill** — the XML matcher composes numPedimento
|
|
17
|
+
* externally via `extractNumPedimento` rather than as a field
|
|
18
|
+
* extractor. `extractDocumentFields` must backfill it so that
|
|
19
|
+
* `composeArelaPath` can find it.
|
|
20
|
+
* 5) Resolution to `proforma_completo_xml` when no payment evidence
|
|
21
|
+
* exists in the body.
|
|
22
|
+
*/
|
|
23
|
+
import { describe, it, expect } from '@jest/globals';
|
|
24
|
+
|
|
25
|
+
import { extractDocumentFields } from '../../src/document-type-shared.js';
|
|
26
|
+
import { composeArelaPath } from '../../src/file-detection.js';
|
|
27
|
+
|
|
28
|
+
// ---------------------------------------------------------------------------
|
|
29
|
+
// Test fixtures
|
|
30
|
+
// ---------------------------------------------------------------------------
|
|
31
|
+
|
|
32
|
+
/**
|
|
33
|
+
* Build a minimal VUCEM consultarPedimentoCompletoRespuesta XML.
|
|
34
|
+
* Only the tags the matcher actually reads are included.
|
|
35
|
+
*/
|
|
36
|
+
function buildXml({
|
|
37
|
+
rfc = 'CEM090106MU3',
|
|
38
|
+
pedimento = '5063036',
|
|
39
|
+
claveDocumento = 'V1',
|
|
40
|
+
tipoOperacionDesc = 'Exportacion',
|
|
41
|
+
aduanaClave = '70',
|
|
42
|
+
presentationDate = '2025-12-01-06:00',
|
|
43
|
+
paymentDate = '2026-01-07-06:00',
|
|
44
|
+
rectFechaPago = null,
|
|
45
|
+
facturas = ['V1-FUJIKURA MEX-202512'],
|
|
46
|
+
edDocs = [],
|
|
47
|
+
} = {}) {
|
|
48
|
+
const fechas = [];
|
|
49
|
+
if (presentationDate) {
|
|
50
|
+
fechas.push(
|
|
51
|
+
`<ns2:fechas><ns2:fecha>${presentationDate}</ns2:fecha><ns2:tipo><ns2:clave>5</ns2:clave><ns2:descripcion>FECHA DE PRESENTACION</ns2:descripcion></ns2:tipo></ns2:fechas>`,
|
|
52
|
+
);
|
|
53
|
+
}
|
|
54
|
+
if (paymentDate) {
|
|
55
|
+
fechas.push(
|
|
56
|
+
`<ns2:fechas><ns2:fecha>${paymentDate}</ns2:fecha><ns2:tipo><ns2:clave>2</ns2:clave><ns2:descripcion>FECHA DE PAGO</ns2:descripcion></ns2:tipo></ns2:fechas>`,
|
|
57
|
+
);
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
const rect = rectFechaPago
|
|
61
|
+
? `<ns2:rectificacion><ns2:fechaPago>${rectFechaPago}</ns2:fechaPago></ns2:rectificacion>`
|
|
62
|
+
: '';
|
|
63
|
+
|
|
64
|
+
const facturasXml = facturas
|
|
65
|
+
.map(
|
|
66
|
+
(num) =>
|
|
67
|
+
`<ns2:facturas><ns2:numero>${num}</ns2:numero></ns2:facturas>`,
|
|
68
|
+
)
|
|
69
|
+
.join('');
|
|
70
|
+
|
|
71
|
+
const identificadoresXml =
|
|
72
|
+
edDocs.length === 0
|
|
73
|
+
? ''
|
|
74
|
+
: `<ns2:identificadores>${edDocs
|
|
75
|
+
.map(
|
|
76
|
+
(code) =>
|
|
77
|
+
`<ns2:identificadores><claveIdentificador><clave>ED</clave></claveIdentificador><complemento1>${code}</complemento1></ns2:identificadores>`,
|
|
78
|
+
)
|
|
79
|
+
.join('')}</ns2:identificadores>`;
|
|
80
|
+
|
|
81
|
+
return `<?xml version="1.0" encoding="UTF-8"?>
|
|
82
|
+
<S:Envelope xmlns:S="http://schemas.xmlsoap.org/soap/envelope/">
|
|
83
|
+
<S:Body>
|
|
84
|
+
<ns2:consultarPedimentoCompletoRespuesta xmlns:ns2="http://x">
|
|
85
|
+
<ns2:pedimento>
|
|
86
|
+
<ns2:pedimento>${pedimento}</ns2:pedimento>
|
|
87
|
+
<ns2:encabezado>
|
|
88
|
+
<ns2:claveDocumento><ns2:clave>${claveDocumento}</ns2:clave></ns2:claveDocumento>
|
|
89
|
+
<ns2:tipoOperacion><ns2:clave>2</ns2:clave><ns2:descripcion>${tipoOperacionDesc}</ns2:descripcion></ns2:tipoOperacion>
|
|
90
|
+
<ns2:aduanaEntradaSalida><ns2:clave>${aduanaClave}</ns2:clave></ns2:aduanaEntradaSalida>
|
|
91
|
+
</ns2:encabezado>
|
|
92
|
+
<ns2:importadorExportador>
|
|
93
|
+
<ns2:rfc>${rfc}</ns2:rfc>
|
|
94
|
+
${fechas.join('\n')}
|
|
95
|
+
</ns2:importadorExportador>
|
|
96
|
+
${rect}
|
|
97
|
+
${facturasXml}
|
|
98
|
+
${identificadoresXml}
|
|
99
|
+
</ns2:pedimento>
|
|
100
|
+
</ns2:consultarPedimentoCompletoRespuesta>
|
|
101
|
+
</S:Body>
|
|
102
|
+
</S:Envelope>`;
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
// ---------------------------------------------------------------------------
|
|
106
|
+
// Tests
|
|
107
|
+
// ---------------------------------------------------------------------------
|
|
108
|
+
|
|
109
|
+
describe('pedimento_completo_xml matcher', () => {
|
|
110
|
+
it('detects, extracts, and composes arela_path for a basic export pedimento', () => {
|
|
111
|
+
const xml = buildXml({
|
|
112
|
+
rfc: 'CEM090106MU3',
|
|
113
|
+
pedimento: '5063036',
|
|
114
|
+
aduanaClave: '70',
|
|
115
|
+
presentationDate: '2025-06-15-06:00',
|
|
116
|
+
paymentDate: '2025-06-20-06:00',
|
|
117
|
+
});
|
|
118
|
+
// 15-digit filename pattern: YY=25 AA=07 PPPP=3429 NNNNNNN=5063036
|
|
119
|
+
const filePath = '/x/2025/250734295063036_250734295063036.xml';
|
|
120
|
+
|
|
121
|
+
const [type, fields, ped, year] = extractDocumentFields(
|
|
122
|
+
xml,
|
|
123
|
+
'xml',
|
|
124
|
+
filePath,
|
|
125
|
+
);
|
|
126
|
+
|
|
127
|
+
expect(type).toBe('pedimento_completo_xml');
|
|
128
|
+
expect(ped).toBe('250734295063036');
|
|
129
|
+
expect(year).toBe(2025);
|
|
130
|
+
expect(fields.find((f) => f.name === 'rfc')?.value).toBe('CEM090106MU3');
|
|
131
|
+
expect(fields.find((f) => f.name === 'aduanaEntradaSalida')?.value).toBe(
|
|
132
|
+
'07',
|
|
133
|
+
);
|
|
134
|
+
// Backfill check: numPedimento must be exposed as a field so
|
|
135
|
+
// composeArelaPath can find it.
|
|
136
|
+
expect(fields.find((f) => f.name === 'numPedimento')?.value).toBe(
|
|
137
|
+
'250734295063036',
|
|
138
|
+
);
|
|
139
|
+
|
|
140
|
+
const arela = composeArelaPath(type, fields, year, filePath);
|
|
141
|
+
expect(arela).toBe('CEM090106MU3/2025/3429/07/250734295063036/');
|
|
142
|
+
});
|
|
143
|
+
|
|
144
|
+
it('uses presentation date (not payment date) for YY when payment crosses calendar year', () => {
|
|
145
|
+
// Pedimento opened Dec 2025, paid Jan 2026 — the YY must be 25.
|
|
146
|
+
const xml = buildXml({
|
|
147
|
+
pedimento: '5063036',
|
|
148
|
+
aduanaClave: '70',
|
|
149
|
+
presentationDate: '2025-12-01-06:00',
|
|
150
|
+
paymentDate: '2026-01-07-06:00',
|
|
151
|
+
});
|
|
152
|
+
// Use the 3-part filename pattern (no YY in filename) so YY comes from XML body.
|
|
153
|
+
const filePath = '/x/070-3429-5063036.xml';
|
|
154
|
+
|
|
155
|
+
const [type, , ped, year] = extractDocumentFields(xml, 'xml', filePath);
|
|
156
|
+
|
|
157
|
+
expect(type).toBe('pedimento_completo_xml');
|
|
158
|
+
expect(ped).toBe('250734295063036');
|
|
159
|
+
expect(year).toBe(2025);
|
|
160
|
+
});
|
|
161
|
+
|
|
162
|
+
it('falls back to payment date YY when presentation date is missing', () => {
|
|
163
|
+
const xml = buildXml({
|
|
164
|
+
pedimento: '5063036',
|
|
165
|
+
aduanaClave: '70',
|
|
166
|
+
presentationDate: null, // No clave=5 block
|
|
167
|
+
paymentDate: '2026-01-07-06:00',
|
|
168
|
+
});
|
|
169
|
+
const filePath = '/x/070-3429-5063036.xml';
|
|
170
|
+
|
|
171
|
+
const [, , ped, year] = extractDocumentFields(xml, 'xml', filePath);
|
|
172
|
+
|
|
173
|
+
expect(ped).toBe('260734295063036');
|
|
174
|
+
expect(year).toBe(2026);
|
|
175
|
+
});
|
|
176
|
+
|
|
177
|
+
it('prefers filename YY over body fechas (VUCEM-stamped truth)', () => {
|
|
178
|
+
// Filename says YY=24 but body has presentation=2025. Filename wins.
|
|
179
|
+
const xml = buildXml({
|
|
180
|
+
pedimento: '5063036',
|
|
181
|
+
aduanaClave: '70',
|
|
182
|
+
presentationDate: '2025-12-01-06:00',
|
|
183
|
+
paymentDate: '2026-01-07-06:00',
|
|
184
|
+
});
|
|
185
|
+
const filePath = '/x/240734295063036_240734295063036.xml';
|
|
186
|
+
|
|
187
|
+
const [, , ped, year] = extractDocumentFields(xml, 'xml', filePath);
|
|
188
|
+
|
|
189
|
+
expect(ped).toBe('240734295063036');
|
|
190
|
+
expect(year).toBe(2024);
|
|
191
|
+
});
|
|
192
|
+
|
|
193
|
+
it('pads VUCEM aduana correctly: 70 -> 07, 750 -> 75, 40 -> 04', () => {
|
|
194
|
+
const cases = [
|
|
195
|
+
{ aduanaClave: '70', expected: '07', // Cd. Juárez (3-digit canonical: 070)
|
|
196
|
+
filename: '/x/070-3429-5000001.xml' },
|
|
197
|
+
{ aduanaClave: '750', expected: '75', // Puebla
|
|
198
|
+
filename: '/x/750-3429-5000002.xml' },
|
|
199
|
+
{ aduanaClave: '40', expected: '04', // Lázaro Cárdenas (canonical: 040)
|
|
200
|
+
filename: '/x/040-3429-5000003.xml' },
|
|
201
|
+
];
|
|
202
|
+
|
|
203
|
+
for (const c of cases) {
|
|
204
|
+
const xml = buildXml({
|
|
205
|
+
pedimento: c.filename.match(/-(\d{7})\.xml$/)[1],
|
|
206
|
+
aduanaClave: c.aduanaClave,
|
|
207
|
+
presentationDate: '2025-06-15-06:00',
|
|
208
|
+
paymentDate: '2025-06-20-06:00',
|
|
209
|
+
});
|
|
210
|
+
const [, fields, ped] = extractDocumentFields(xml, 'xml', c.filename);
|
|
211
|
+
expect(fields.find((f) => f.name === 'aduanaEntradaSalida')?.value).toBe(
|
|
212
|
+
c.expected,
|
|
213
|
+
);
|
|
214
|
+
// Positions 2-3 of the composed 15-digit pedimento must equal the
|
|
215
|
+
// aduana prefix.
|
|
216
|
+
expect(ped.substring(2, 4)).toBe(c.expected);
|
|
217
|
+
}
|
|
218
|
+
});
|
|
219
|
+
|
|
220
|
+
it('resolves to proforma_completo_xml when no payment evidence exists', () => {
|
|
221
|
+
const xml = buildXml({
|
|
222
|
+
pedimento: '5063036',
|
|
223
|
+
aduanaClave: '70',
|
|
224
|
+
presentationDate: '2025-12-01-06:00',
|
|
225
|
+
paymentDate: null, // No payment, no rectificacion
|
|
226
|
+
});
|
|
227
|
+
const filePath = '/x/070-3429-5063036.xml';
|
|
228
|
+
|
|
229
|
+
const [type] = extractDocumentFields(xml, 'xml', filePath);
|
|
230
|
+
|
|
231
|
+
expect(type).toBe('proforma_completo_xml');
|
|
232
|
+
});
|
|
233
|
+
|
|
234
|
+
it('extracts cove and rfc correctly', () => {
|
|
235
|
+
const xml = buildXml({
|
|
236
|
+
rfc: 'CEM090106MU3',
|
|
237
|
+
facturas: ['V1-FUJIKURA MEX-202512', 'INV-2'],
|
|
238
|
+
});
|
|
239
|
+
const filePath = '/x/250734295063036_250734295063036.xml';
|
|
240
|
+
|
|
241
|
+
const [, fields] = extractDocumentFields(xml, 'xml', filePath);
|
|
242
|
+
|
|
243
|
+
expect(fields.find((f) => f.name === 'rfc')?.value).toBe('CEM090106MU3');
|
|
244
|
+
expect(fields.find((f) => f.name === 'cove')?.value).toBe(
|
|
245
|
+
'[V1-FUJIKURA MEX-202512,INV-2]',
|
|
246
|
+
);
|
|
247
|
+
});
|
|
248
|
+
|
|
249
|
+
it('returns null arela_path when filename is unrecognized (no patente)', () => {
|
|
250
|
+
const xml = buildXml({
|
|
251
|
+
pedimento: '5063036',
|
|
252
|
+
aduanaClave: '70',
|
|
253
|
+
presentationDate: '2025-06-15-06:00',
|
|
254
|
+
paymentDate: '2025-06-20-06:00',
|
|
255
|
+
});
|
|
256
|
+
// Unrecognized filename — no patente derivable.
|
|
257
|
+
const filePath = '/x/random_name.xml';
|
|
258
|
+
|
|
259
|
+
const [type, fields, ped, year] = extractDocumentFields(
|
|
260
|
+
xml,
|
|
261
|
+
'xml',
|
|
262
|
+
filePath,
|
|
263
|
+
);
|
|
264
|
+
|
|
265
|
+
expect(type).toBe('pedimento_completo_xml');
|
|
266
|
+
expect(ped).toBeNull();
|
|
267
|
+
expect(year).toBeNull();
|
|
268
|
+
// composeArelaPath returns null because patente is missing.
|
|
269
|
+
expect(composeArelaPath(type, fields, year, filePath)).toBeNull();
|
|
270
|
+
});
|
|
271
|
+
});
|
|
@@ -0,0 +1,185 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Regression tests for the pedimento_simplificado matcher.
|
|
3
|
+
*
|
|
4
|
+
* Covers PDF layouts where:
|
|
5
|
+
* - The header reads "FORMA SIMPLIFICADA DEL PEDIMENTO" (with DEL),
|
|
6
|
+
* not the canonical "DE PEDIMENTO".
|
|
7
|
+
* - The header trio prints "T. OPER" WITHOUT a trailing colon
|
|
8
|
+
* (the value sits in a separate table cell).
|
|
9
|
+
*
|
|
10
|
+
* Real-world example: REF NQR26-079, Aduana 640 (Querétaro), patente 3458.
|
|
11
|
+
* Before this regression test, the matcher fell through to
|
|
12
|
+
* `facturas_comerciales` because the word "FACTURA" appears in the
|
|
13
|
+
* "OBSERVACIONES" block.
|
|
14
|
+
*/
|
|
15
|
+
import { describe, it, expect } from '@jest/globals';
|
|
16
|
+
|
|
17
|
+
// Importing only the dispatcher avoids circular-init issues caused by
|
|
18
|
+
// `_pedimento-shared-extractors.js` pulling FieldResult from document-type-shared.
|
|
19
|
+
import { extractDocumentFields } from '../../src/document-type-shared.js';
|
|
20
|
+
|
|
21
|
+
// REAL pdf-parse output from the NQR26-079 simplificado PDF
|
|
22
|
+
// (CSM9204097Q1, patente 3458, aduana 640).
|
|
23
|
+
// Captured verbatim with `PDFParse({data}).getText()` — pdf-parse extracts
|
|
24
|
+
// table cells out of visual order, so labels and values often live on
|
|
25
|
+
// different lines (see the FECHA DE PAGO block: label appears, then a few
|
|
26
|
+
// unrelated cells, then the date sits on its own line with the importe).
|
|
27
|
+
// This is exactly what the matchers and extractors see in production.
|
|
28
|
+
const SIMP_DEL_NQR26079_TEXT = `A1 CVE. PEDIMENTO: IMP T. OPER 26 64 3458 6000079 NUM. PEDIMENTO:
|
|
29
|
+
CERTIFICACIONES
|
|
30
|
+
ADUANA E/S:
|
|
31
|
+
DATOS DEL IMPORTADOR / EXPORTADOR
|
|
32
|
+
RFC: CURP:
|
|
33
|
+
CÓDIGO DE
|
|
34
|
+
ACEPTACIÓN
|
|
35
|
+
640
|
|
36
|
+
CSM9204097Q1
|
|
37
|
+
FECHAS:
|
|
38
|
+
17/03/2026
|
|
39
|
+
Ped. 6000079
|
|
40
|
+
CLAVE DE LA SECCION ADUANERA
|
|
41
|
+
DE DESPACHO:
|
|
42
|
+
QUERETARO, EL MARQUES Y
|
|
43
|
+
COLON, QUERETARO.
|
|
44
|
+
640
|
|
45
|
+
DESTINO: 9 PESO BRUTO: 5.350
|
|
46
|
+
MARCAS,NUMEROS Y TOTAL DE BULTOS: 1
|
|
47
|
+
04/03/2026
|
|
48
|
+
ENTRADA
|
|
49
|
+
PAGO
|
|
50
|
+
3PW4CLHE
|
|
51
|
+
S/M S/N
|
|
52
|
+
CODIGO DE BARRAS
|
|
53
|
+
0326 0132 XMP1 4914 6243 989
|
|
54
|
+
*** PAGO ELECTRONICO ***
|
|
55
|
+
DEPÓSITO REFERENCIADO - LÍNEA DE CAPTURA
|
|
56
|
+
PATENTE:
|
|
57
|
+
NOMBRE DE LA INSTITUCIÓN BANCARIA:
|
|
58
|
+
LÍNEA DE CAPTURA:
|
|
59
|
+
IMPORTE PAGADO:
|
|
60
|
+
NÚMERO DE OPERACIÓN BANCARIA:
|
|
61
|
+
NÚMERO DE TRANSACCIÓN SAT:
|
|
62
|
+
MEDIO DE PRESENTACIÓN:
|
|
63
|
+
MEDIO DE RECEPCIÓN/COBRO:
|
|
64
|
+
OTROS MEDIOS ELECTRÓNICOS (PAGO ELECTRÓNICO)
|
|
65
|
+
EFECTIVO (CARGO A CUENTA)
|
|
66
|
+
PEDIMENTO: ADUANA:
|
|
67
|
+
FECHA DE PAGO:
|
|
68
|
+
0326 0132 XMP1 4914 6243
|
|
69
|
+
6000079 640
|
|
70
|
+
17/03/2026 $989
|
|
71
|
+
Banco Nacional de México, S.A.
|
|
72
|
+
00000000703543
|
|
73
|
+
3458
|
|
74
|
+
40124170320261403012
|
|
75
|
+
NUMERO (GUIA/ORDEN EMBARQUE)/ID: 023-51315051 M 490453269837 H
|
|
76
|
+
NÚMERO DE ACUSE DE VALOR COVE268074HT1
|
|
77
|
+
NÚMERO DE E-DOCUMENT: 0438261DOG9W3 01702619TYEU7
|
|
78
|
+
OBSERVACIONES
|
|
79
|
+
FACTURA DE ACUERDO AL ARTÍCULO 36-A DE LA LEY ADUANERA VIGENTE Y A LA REGLA 3.1.
|
|
80
|
+
8. DE LAS REGLAS
|
|
81
|
+
GENERALES DE COMERCIO EXTERIOR VIGENTES.
|
|
82
|
+
SE TRANSMITE PREVIAMENTE A VENTANILLA DIGITAL CONFORME A LA REGLA 1.9.18. DE LAS
|
|
83
|
+
REGLAS GENERALES DE
|
|
84
|
+
COMERCIO EXTERIOR VIGENTES.
|
|
85
|
+
SE EFECTÚA LA TRANSMISIÓN DIGITAL DE CONFORMIDAD A LA REGLA 3.1.17. Y 3.1.31. DE
|
|
86
|
+
LAS REGLAS GENERALES
|
|
87
|
+
DE COMERCIO EXTERIOR VIGENTES.
|
|
88
|
+
LA INFORMACIÓN CONTENIDA EN ESTE PEDIMENTO FUE SUMINISTRADA POR EL IMPORTADOR DE
|
|
89
|
+
CONFORMIDAD CON EL
|
|
90
|
+
ARTICULO 54 DE LA LEY ADUANERA EN VIGOR.
|
|
91
|
+
SE EXIME NOM-024-SCFI-2013 EN TERMINOS DEL NUMERAL 10, FRACC. X INCISO H, IMPORT
|
|
92
|
+
ACIÓN DEFINITIVA,
|
|
93
|
+
TRATÁNDOSE DE IMPORTADORES QUE CUENTEN CON UN PROSEC.
|
|
94
|
+
SE EXIME NOM-003-SCFI-2014 EN TERMINOS DEL NUMERAL 10, FRACC. X INCISO H, IMPORT
|
|
95
|
+
ACIÓN DEFINITIVA,
|
|
96
|
+
TRATÁNDOSE DE IMPORTADORES QUE CUENTEN CON UN PROSEC.
|
|
97
|
+
JOAQUIN GOMEZ ABAD
|
|
98
|
+
AGENTE ADUANAL, AGENCIA ADUANAL, APODERADO ADUANAL O DE ALMACEN
|
|
99
|
+
NOMBRE O RAZ. SOC.:
|
|
100
|
+
RFC: GAA1003111U6 GOAJ641219HDFMBQ09 CURP:
|
|
101
|
+
e.firma:
|
|
102
|
+
NUMERO DE SERIE DEL CERTIFICADO: 00001000000705949781
|
|
103
|
+
GOAJ641219QT5 RFC:
|
|
104
|
+
DECLARO BAJO PROTESTA DE DECIR VERDAD, EN LOS TERMINOS
|
|
105
|
+
DE LO DISPUESTO ARTICULO 81 DE LA LEY: PATENTE O
|
|
106
|
+
AUTORIZACIÓN: 3458 GOMEZ ABAD ASESORES EN COMERCIO EXTERIOR S.C.
|
|
107
|
+
FORMA SIMPLIFICADA DEL PEDIMENTO
|
|
108
|
+
SEGUNDA COPIA: IMPORTADOR EXPORTADOR DESTINO/ORIGEN: INTERIOR DEL PAÍS
|
|
109
|
+
REF: NQR26-079 Página 1 de 2
|
|
110
|
+
|
|
111
|
+
-- 1 of 2 --
|
|
112
|
+
|
|
113
|
+
FORMA SIMPLIFICADA DEL PEDIMENTO
|
|
114
|
+
SEGUNDA COPIA: IMPORTADOR EXPORTADOR DESTINO/ORIGEN: INTERIOR DEL PAÍS
|
|
115
|
+
REF: NQR26-079 Página 1 de 2
|
|
116
|
+
A1 CVE. PEDIMENTO: IMP T. OPER 26 64 3458 6000079 NUM. PEDIMENTO:
|
|
117
|
+
CURP:
|
|
118
|
+
RFC: CSM9204097Q1
|
|
119
|
+
****** ****** ********** ********** FIN DE PEDIMENTO NUM. TOTAL DE PARTID
|
|
120
|
+
AS: CLAVE PREVALIDADOR: 010 1
|
|
121
|
+
ANEXO DEL PEDIMENTO
|
|
122
|
+
SEGUNDA COPIA: IMPORTADOR EXPORTADOR DESTINO/ORIGEN: INTERIOR DEL PAÍS
|
|
123
|
+
REF: NQR26-079 Página 2 de 2`;
|
|
124
|
+
|
|
125
|
+
describe('pedimento_simplificado matcher — DEL PEDIMENTO variant', () => {
|
|
126
|
+
it('dispatcher resolves NQR26-079 (DEL PEDIMENTO) as pedimento_simplificado', () => {
|
|
127
|
+
const [detectedType, , pedimento] = extractDocumentFields(
|
|
128
|
+
SIMP_DEL_NQR26079_TEXT,
|
|
129
|
+
'pdf',
|
|
130
|
+
'/scans/CSM9204097Q1/NQR26-079.pdf',
|
|
131
|
+
);
|
|
132
|
+
|
|
133
|
+
// Regression: previously this resolved to `factura_comercial` because
|
|
134
|
+
// (1) the title regex demanded "DE PEDIMENTO" (this PDF says "DEL") and
|
|
135
|
+
// (2) the header trio required a colon after "T. OPER" (this PDF omits it).
|
|
136
|
+
expect(detectedType).toBe('pedimento_simplificado');
|
|
137
|
+
expect(pedimento).toBe('266434586000079');
|
|
138
|
+
});
|
|
139
|
+
});
|
|
140
|
+
|
|
141
|
+
describe('pedimento_simplificado matcher — header trio without colon after T. OPER', () => {
|
|
142
|
+
// Minimal text: title is the canonical "DE PEDIMENTO" so the fast path
|
|
143
|
+
// does NOT apply; only the fallback that requires the header trio runs.
|
|
144
|
+
// The trio MUST tolerate "T. OPER" without a trailing colon, because
|
|
145
|
+
// many printable PDFs render OPER as a column header (value in next cell).
|
|
146
|
+
const FALLBACK_TEXT = `FORMA SIMPLIFICADA DE PEDIMENTO
|
|
147
|
+
NUM. PEDIMENTO: 22 07 3429 2002089 T. OPER IMP CVE. PEDIMENTO: A1
|
|
148
|
+
DATOS DEL IMPORTADOR
|
|
149
|
+
PATENTE: 3429 PEDIMENTO: 2002089 ADUANA: 070
|
|
150
|
+
FECHA DE PAGO: 01/02/2023`;
|
|
151
|
+
|
|
152
|
+
it('resolves via fast-path "FORMA SIMPLIFICADA DE PEDIMENTO" header', () => {
|
|
153
|
+
const [detectedType] = extractDocumentFields(
|
|
154
|
+
FALLBACK_TEXT,
|
|
155
|
+
'pdf',
|
|
156
|
+
'/scans/SAMPLE/pedimento.pdf',
|
|
157
|
+
);
|
|
158
|
+
expect(detectedType).toBe('pedimento_simplificado');
|
|
159
|
+
});
|
|
160
|
+
});
|
|
161
|
+
|
|
162
|
+
describe('pedimento_simplificado matcher — title accepts both DE and DEL', () => {
|
|
163
|
+
// Same minimal body, only the title differs. Both variants are produced
|
|
164
|
+
// by different prevalidators / agencias in the wild, and BOTH must
|
|
165
|
+
// resolve to pedimento_simplificado.
|
|
166
|
+
const body = `
|
|
167
|
+
NUM. PEDIMENTO: 22 07 3429 2002089 T. OPER IMP CVE. PEDIMENTO: A1
|
|
168
|
+
DATOS DEL IMPORTADOR
|
|
169
|
+
PATENTE: 3429 PEDIMENTO: 2002089 ADUANA: 070
|
|
170
|
+
FECHA DE PAGO: 01/02/2023`;
|
|
171
|
+
|
|
172
|
+
it.each([
|
|
173
|
+
['FORMA SIMPLIFICADA DE PEDIMENTO', 'pedimento_simplificado'],
|
|
174
|
+
['FORMA SIMPLIFICADA DEL PEDIMENTO', 'pedimento_simplificado'],
|
|
175
|
+
['forma simplificada de pedimento', 'pedimento_simplificado'], // case-insensitive
|
|
176
|
+
['FORMA SIMPLIFICADA DEL PEDIMENTO', 'pedimento_simplificado'], // extra spaces
|
|
177
|
+
])('title "%s" resolves to %s', (title, expected) => {
|
|
178
|
+
const [detectedType] = extractDocumentFields(
|
|
179
|
+
`${title}\n${body}`,
|
|
180
|
+
'pdf',
|
|
181
|
+
'/scans/SAMPLE/pedimento.pdf',
|
|
182
|
+
);
|
|
183
|
+
expect(detectedType).toBe(expected);
|
|
184
|
+
});
|
|
185
|
+
});
|