@arela/uploader 1.0.22 → 1.0.23
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/commands/IdentifyCommand.js +6 -6
- package/src/commands/ScanCommand.js +15 -0
- package/src/config/config.js +28 -2
- package/src/document-type-shared.js +15 -7
- package/src/document-types/_pedimento-shared-extractors.js +27 -8
- package/src/document-types/factura-inter-agencia.js +186 -0
- package/src/document-types/pedimento-completo-xml.js +62 -12
- package/src/document-types/pedimento-completo.js +5 -3
- package/src/document-types/pedimento-simplificado.js +5 -2
- package/src/document-types/proforma.js +2 -2
- package/src/file-detection.js +1 -3
- package/tests/unit/factura-inter-agencia.test.js +218 -0
- package/tests/unit/pedimento-completo-xml-matcher.test.js +271 -0
- package/tests/unit/pedimento-simplificado-matcher.test.js +185 -0
package/package.json
CHANGED
|
@@ -10,9 +10,7 @@ import { ConfigurationError } from '../errors/ErrorTypes.js';
|
|
|
10
10
|
import FileDetectionService from '../file-detection.js';
|
|
11
11
|
|
|
12
12
|
/**
|
|
13
|
-
* Paid pedimento detected_type values.
|
|
14
|
-
* even though the XML matcher is currently disabled in the registry so that
|
|
15
|
-
* re-enabling it requires no changes here.
|
|
13
|
+
* Paid pedimento detected_type values.
|
|
16
14
|
*/
|
|
17
15
|
const DETECTED_PEDIMENTO_TYPES = new Set([
|
|
18
16
|
'pedimento_simplificado',
|
|
@@ -543,13 +541,15 @@ export class IdentifyCommand {
|
|
|
543
541
|
|
|
544
542
|
// Check if the text contains any required pedimento marker. This must
|
|
545
543
|
// stay aligned with the `match()` predicates in pedimento-simplificado.js
|
|
546
|
-
// and pedimento-completo.js
|
|
544
|
+
// and pedimento-completo.js (which accept both "DE" and "DEL" in the
|
|
545
|
+
// title, and treat the colon after "T. OPER" as optional).
|
|
547
546
|
const text = result.text || '';
|
|
548
|
-
const hasSimplificadoMarker =
|
|
547
|
+
const hasSimplificadoMarker =
|
|
548
|
+
/FORMA\s+SIMPLIFICADA\s+DEL?\s+PEDIMENTO/i.test(text);
|
|
549
549
|
const hasCompletoMarkers =
|
|
550
550
|
/NUM\.?\s*PEDIMENTO:/i.test(text) &&
|
|
551
551
|
/CVE\.?\s*PEDIMENTO:/i.test(text) &&
|
|
552
|
-
/T\.?\s*OPER
|
|
552
|
+
/T\.?\s*OPER:?/i.test(text);
|
|
553
553
|
|
|
554
554
|
return !hasSimplificadoMarker && !hasCompletoMarkers;
|
|
555
555
|
}
|
|
@@ -579,6 +579,9 @@ export class ScanCommand {
|
|
|
579
579
|
* Normalize file record for database insertion
|
|
580
580
|
* Stores paths with forward slashes for consistency but keeps them absolute
|
|
581
581
|
* Sets likelySimplificado to true if file is a PDF and filename contains 'simp'
|
|
582
|
+
* Sets likelyInterAgencia to true if filename matches an inter-agency CFDI
|
|
583
|
+
* pattern (e.g. SICINGR*), so the API forces these XML/PDF through detection
|
|
584
|
+
* even though they lack the 'simp/pedim/covefact' heuristic.
|
|
582
585
|
* @private
|
|
583
586
|
*/
|
|
584
587
|
#normalizeFileRecord(filePath, fileStats, basePath, scanTimestamp) {
|
|
@@ -600,6 +603,17 @@ export class ScanCommand {
|
|
|
600
603
|
const likelySimplificado =
|
|
601
604
|
fileExtension === 'pdf' && /(simp|pedim|covefact)/i.test(fileName);
|
|
602
605
|
|
|
606
|
+
// Flag inter-agency CFDIs by filename so detection picks them up.
|
|
607
|
+
// Patterns are configurable via SCAN_INTER_AGENCIA_PATTERNS env var
|
|
608
|
+
// (see config.js). Only meaningful for PDF and XML.
|
|
609
|
+
let likelyInterAgencia = false;
|
|
610
|
+
if (fileExtension === 'pdf' || fileExtension === 'xml') {
|
|
611
|
+
const patterns = appConfig.scan.interAgenciaPatterns;
|
|
612
|
+
if (patterns && patterns.length > 0) {
|
|
613
|
+
likelyInterAgencia = patterns.some((re) => re.test(fileName));
|
|
614
|
+
}
|
|
615
|
+
}
|
|
616
|
+
|
|
603
617
|
return {
|
|
604
618
|
fileName,
|
|
605
619
|
fileExtension,
|
|
@@ -610,6 +624,7 @@ export class ScanCommand {
|
|
|
610
624
|
modifiedAt: fileStats.mtime.toISOString(),
|
|
611
625
|
scanTimestamp,
|
|
612
626
|
likelySimplificado,
|
|
627
|
+
likelyInterAgencia,
|
|
613
628
|
};
|
|
614
629
|
}
|
|
615
630
|
|
package/src/config/config.js
CHANGED
|
@@ -37,10 +37,10 @@ class Config {
|
|
|
37
37
|
const __dirname = path.dirname(__filename);
|
|
38
38
|
const packageJsonPath = path.resolve(__dirname, '../../package.json');
|
|
39
39
|
const packageJson = JSON.parse(fs.readFileSync(packageJsonPath, 'utf-8'));
|
|
40
|
-
return packageJson.version || '1.0.
|
|
40
|
+
return packageJson.version || '1.0.23';
|
|
41
41
|
} catch (error) {
|
|
42
42
|
console.warn('⚠️ Could not read package.json version, using fallback');
|
|
43
|
-
return '1.0.
|
|
43
|
+
return '1.0.23';
|
|
44
44
|
}
|
|
45
45
|
}
|
|
46
46
|
|
|
@@ -294,6 +294,31 @@ class Config {
|
|
|
294
294
|
.map((p) => p.trim())
|
|
295
295
|
.filter(Boolean);
|
|
296
296
|
|
|
297
|
+
// Parse inter-agency CFDI filename patterns. Files whose basename matches
|
|
298
|
+
// any of these regex patterns are flagged at scan time (likelyInterAgencia)
|
|
299
|
+
// so the API forces them through detection and the factura_inter_agencia
|
|
300
|
+
// matcher can classify them. The push pipeline then excludes them (see
|
|
301
|
+
// NON_PUSHABLE_TYPES_SQL in arela-api). Comma-separated regex source list.
|
|
302
|
+
// Default: ^SICINGR — covers NORCOM's SICINGR70-NNNNNN(...).pdf/.XML files.
|
|
303
|
+
const defaultInterAgenciaPatterns = '^SICINGR';
|
|
304
|
+
const interAgenciaPatterns = (
|
|
305
|
+
process.env.SCAN_INTER_AGENCIA_PATTERNS || defaultInterAgenciaPatterns
|
|
306
|
+
)
|
|
307
|
+
.split(',')
|
|
308
|
+
.map((p) => p.trim())
|
|
309
|
+
.filter(Boolean)
|
|
310
|
+
.map((p) => {
|
|
311
|
+
try {
|
|
312
|
+
return new RegExp(p, 'i');
|
|
313
|
+
} catch (err) {
|
|
314
|
+
console.warn(
|
|
315
|
+
`⚠️ Invalid SCAN_INTER_AGENCIA_PATTERNS regex "${p}": ${err.message}`,
|
|
316
|
+
);
|
|
317
|
+
return null;
|
|
318
|
+
}
|
|
319
|
+
})
|
|
320
|
+
.filter(Boolean);
|
|
321
|
+
|
|
297
322
|
// Generate table name if all components are available
|
|
298
323
|
// Note: This is just for reference; actual table names are generated dynamically
|
|
299
324
|
// in ScanCommand based on discovered directories and levels
|
|
@@ -312,6 +337,7 @@ class Config {
|
|
|
312
337
|
basePathFull: basePathLabel, // Renamed for consistency
|
|
313
338
|
tableName,
|
|
314
339
|
excludePatterns,
|
|
340
|
+
interAgenciaPatterns,
|
|
315
341
|
batchSize: parseInt(process.env.SCAN_BATCH_SIZE) || 2000,
|
|
316
342
|
directoryLevel: parseInt(process.env.SCAN_DIRECTORY_LEVEL) || 0,
|
|
317
343
|
};
|
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
// Import all document type definitions
|
|
2
2
|
import { dodaPdfDefinition } from './document-types/doda-pdf.js';
|
|
3
3
|
import { dodaXmlDefinition } from './document-types/doda-xml.js';
|
|
4
|
+
import { facturaInterAgenciaDefinition } from './document-types/factura-inter-agencia.js';
|
|
4
5
|
import { facturasComerciales } from './document-types/facturas-comerciales.js';
|
|
6
|
+
import { pedimentoCompletoXmlDefinition } from './document-types/pedimento-completo-xml.js';
|
|
5
7
|
import { pedimentoCompletoDefinition } from './document-types/pedimento-completo.js';
|
|
6
|
-
// TODO: enable XML pedimento detection — implementation ready in pedimento-completo-xml.js
|
|
7
|
-
// import { pedimentoCompletoXmlDefinition } from './document-types/pedimento-completo-xml.js';
|
|
8
8
|
import { pedimentoSimplificadoDefinition } from './document-types/pedimento-simplificado.js';
|
|
9
9
|
import { proformaDefinition } from './document-types/proforma.js';
|
|
10
10
|
import { supportDocumentDefinition } from './document-types/support-document.js';
|
|
@@ -45,14 +45,14 @@ export class DocumentTypeDefinition {
|
|
|
45
45
|
const documentTypes = [
|
|
46
46
|
pedimentoSimplificadoDefinition,
|
|
47
47
|
pedimentoCompletoDefinition,
|
|
48
|
-
|
|
49
|
-
// matching import at the top of this file. All downstream code
|
|
50
|
-
// (composeArelaPath, arela-api SQL filters, IdentifyCommand counters)
|
|
51
|
-
// already accepts `pedimento_completo_xml`.
|
|
52
|
-
// pedimentoCompletoXmlDefinition,
|
|
48
|
+
pedimentoCompletoXmlDefinition,
|
|
53
49
|
supportDocumentDefinition,
|
|
54
50
|
dodaPdfDefinition,
|
|
55
51
|
dodaXmlDefinition,
|
|
52
|
+
// factura_inter_agencia MUST be evaluated BEFORE facturasComerciales
|
|
53
|
+
// because a NORCOM↔PALCO CFDI would also match the generic commercial
|
|
54
|
+
// invoice matcher. First match wins (see extractDocumentFields).
|
|
55
|
+
facturaInterAgenciaDefinition,
|
|
56
56
|
facturasComerciales,
|
|
57
57
|
// Add more document types here as needed
|
|
58
58
|
];
|
|
@@ -114,6 +114,14 @@ export function extractDocumentFields(source, fileExtension, filePath) {
|
|
|
114
114
|
? docType.extractPedimentoYear(source, fields, filePath)
|
|
115
115
|
: null;
|
|
116
116
|
|
|
117
|
+
// Ensure downstream code (composeArelaPath) sees `numPedimento` as a
|
|
118
|
+
// field. PDF matchers add it via an explicit extractor; XML matchers
|
|
119
|
+
// compose it externally via extractNumPedimento. Backfill so both paths
|
|
120
|
+
// expose the same shape.
|
|
121
|
+
if (pedimento && !fields.some((f) => f.name === 'numPedimento')) {
|
|
122
|
+
fields.push(new FieldResult('numPedimento', true, pedimento));
|
|
123
|
+
}
|
|
124
|
+
|
|
117
125
|
return [resolvedType, fields, pedimento, year];
|
|
118
126
|
}
|
|
119
127
|
}
|
|
@@ -186,15 +186,34 @@ export const paymentDateExtractor = {
|
|
|
186
186
|
field: 'paymentDate',
|
|
187
187
|
extract: (source) => {
|
|
188
188
|
const patterns = [
|
|
189
|
-
/FECHA\s+DE\s+PAGO:?\s*(\d{2}\/\d{2}\/\d{4})/i,
|
|
190
|
-
/FECHA\s+DE\s+PAGO:?\s*(\d{4}\/\d{2}\/\d{2})/i,
|
|
191
|
-
/2\s+PAGO:\s*(\d{2}\/\d{2}\/\d{4})/,
|
|
192
|
-
/(?:^|\n)\s*PAGO\s+(\d{2}\/\d{2}\/\d{4})/i,
|
|
193
|
-
/
|
|
189
|
+
/FECHA\s+DE\s+PAGO:?\s*(\d{2}\/\d{2}\/\d{4})/i, // 0: explicit label DD/MM/YYYY
|
|
190
|
+
/FECHA\s+DE\s+PAGO:?\s*(\d{4}\/\d{2}\/\d{2})/i, // 1: explicit label YYYY/MM/DD
|
|
191
|
+
/2\s+PAGO:\s*(\d{2}\/\d{2}\/\d{4})/, // 2: forma simplificada scheduled date ⚠️
|
|
192
|
+
/(?:^|\n)\s*PAGO\s+(\d{2}\/\d{2}\/\d{4})/i, // 3: PAGO at line start (original)
|
|
193
|
+
/(?<=\d)PAGO\s+(\d{2}\/\d{2}\/\d{4})/i, // 4: PAGO after digit (pdf-parse artifact)
|
|
194
|
+
/(\d{2}\/\d{2}\/\d{4})[ \t]+PAGO[ \t]*$/im, // 5: reversed layout — date before PAGO (FECHAS column)
|
|
195
|
+
// 6: forma simplificada — pdf-parse extracts table cells out of order, so the
|
|
196
|
+
// label "FECHA DE PAGO:" can appear on its own line and the value (along with
|
|
197
|
+
// other cells like línea de captura, pedimento, importe) follows several lines
|
|
198
|
+
// later. Take the FIRST dd/mm/yyyy after the label within a 400-char window.
|
|
199
|
+
// Safe because `isNoPagado` short-circuits documents without a real payment,
|
|
200
|
+
// so we won't grab the unrelated ENTRADA date from the "FECHAS:" block above.
|
|
201
|
+
/FECHA\s+DE\s+PAGO:[\s\S]{1,400}?(\d{2}\/\d{2}\/\d{4})/i,
|
|
202
|
+
/PRESENTACION:\s*(\d{2}\/\d{2}\/\d{4})/i, // 7: fallback
|
|
194
203
|
];
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
204
|
+
// "*** NO PAGADO" is the explicit SAT marker that no payment has been
|
|
205
|
+
// certified. When present, the bank-certification block is physically
|
|
206
|
+
// absent, so any date matched by the fallback patterns (e.g.
|
|
207
|
+
// "2 PAGO:" with a scheduled date, or "PRESENTACION:") would be a false
|
|
208
|
+
// positive. Return null outright — the document is classified as proforma.
|
|
209
|
+
const isNoPagado = /\*{3}\s*NO\s+PAGADO/i.test(source);
|
|
210
|
+
if (isNoPagado) {
|
|
211
|
+
return new FieldResult('paymentDate', false, null);
|
|
212
|
+
}
|
|
213
|
+
for (const pattern of patterns) {
|
|
214
|
+
const m = source.match(pattern);
|
|
215
|
+
if (!m) continue;
|
|
216
|
+
return new FieldResult('paymentDate', true, m[1]);
|
|
198
217
|
}
|
|
199
218
|
return new FieldResult('paymentDate', false, null);
|
|
200
219
|
},
|
|
@@ -0,0 +1,186 @@
|
|
|
1
|
+
// NOTE: We intentionally do NOT import `FieldResult` from
|
|
2
|
+
// '../document-type-shared.js' to avoid a circular-import TDZ when this
|
|
3
|
+
// module is imported directly (e.g. from unit tests). `FieldResult` is a
|
|
4
|
+
// plain data-class with shape `{ name, found, value }`, so we construct
|
|
5
|
+
// equivalent plain objects locally.
|
|
6
|
+
const fieldResult = (name, found, value) => ({ name, found, value });
|
|
7
|
+
|
|
8
|
+
/**
|
|
9
|
+
* Factura Inter-Agencia Document Type Definition
|
|
10
|
+
*
|
|
11
|
+
* Detects CFDIs (XML or PDF) issued between customs broker agencies (e.g.,
|
|
12
|
+
* NORCOM ↔ PALCO). These files are dropped into a pedimento folder by the
|
|
13
|
+
* broker but they are NOT part of the customs electronic file (expediente
|
|
14
|
+
* aduanal) — they are inter-agency billing for broker services.
|
|
15
|
+
*
|
|
16
|
+
* Detection rules (ALL required):
|
|
17
|
+
* 1) CFDI markers present (either xml structure or PDF text representation)
|
|
18
|
+
* 2) Both emisor and receptor RFCs belong to the configured agency pair
|
|
19
|
+
* (NAA120215F20 = NORCOM, PCC1008161WA = PALCO) in any direction.
|
|
20
|
+
* 3) At least one concepto with ClaveProdServ 78141502 (Servicios de
|
|
21
|
+
* agentes aduaneros) — confirms the billing is for broker services.
|
|
22
|
+
*
|
|
23
|
+
* IMPORTANT: This matcher MUST be registered BEFORE `facturasComerciales`
|
|
24
|
+
* in document-type-shared.js — both would match a CFDI in a pedimento
|
|
25
|
+
* folder, but inter-agency invoices must take precedence so they are
|
|
26
|
+
* filtered out of the Arela push pipeline (see arela-api
|
|
27
|
+
* NON_PUSHABLE_TYPES_SQL).
|
|
28
|
+
*
|
|
29
|
+
* Currently scope-limited to NORCOM↔PALCO. To widen, move INTER_AGENCIA_RFCS
|
|
30
|
+
* to env config and require ≥2 distinct RFCs from the configured list.
|
|
31
|
+
*/
|
|
32
|
+
|
|
33
|
+
/**
|
|
34
|
+
* RFCs of agencies whose mutual invoices should be excluded from the Arela
|
|
35
|
+
* push pipeline. Order is irrelevant — a match is any pair of distinct RFCs
|
|
36
|
+
* from this set appearing as emisor and receptor.
|
|
37
|
+
*/
|
|
38
|
+
export const INTER_AGENCIA_RFCS = ['NAA120215F20', 'PCC1008161WA'];
|
|
39
|
+
|
|
40
|
+
const BROKER_SERVICE_CLAVE_PROD_SERV = '78141502';
|
|
41
|
+
|
|
42
|
+
const CFDI_XML_MARKERS = [
|
|
43
|
+
/cfdi:Comprobante/i,
|
|
44
|
+
/xmlns:cfdi/i,
|
|
45
|
+
/TipoDeComprobante/i,
|
|
46
|
+
];
|
|
47
|
+
|
|
48
|
+
/**
|
|
49
|
+
* Detect that the source represents a CFDI — either as the original XML
|
|
50
|
+
* structure or as text extracted from a printed CFDI (PDF representation).
|
|
51
|
+
*
|
|
52
|
+
* PDF text loses XML tags, so we look for the human-readable equivalents
|
|
53
|
+
* commonly rendered by SAT-style invoice templates ("Folio Fiscal", "Sello
|
|
54
|
+
* Digital del CFDI", "Cadena Original ... Certificacion Digital del SAT").
|
|
55
|
+
*/
|
|
56
|
+
function isCfdiContent(source) {
|
|
57
|
+
const xmlHits = CFDI_XML_MARKERS.filter((re) => re.test(source)).length;
|
|
58
|
+
if (xmlHits >= 2) return true;
|
|
59
|
+
|
|
60
|
+
const pdfMarkers = [
|
|
61
|
+
/folio\s*fiscal/i,
|
|
62
|
+
/sello\s*digital\s*del\s*cfdi/i,
|
|
63
|
+
/cadena\s*original.*certificaci[oó]n\s*digital\s*del\s*sat/i,
|
|
64
|
+
/representaci[oó]n\s*impresa\s*de\s*un\s*cfdi/i,
|
|
65
|
+
];
|
|
66
|
+
return pdfMarkers.filter((re) => re.test(source)).length >= 2;
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
/**
|
|
70
|
+
* Return the subset of INTER_AGENCIA_RFCS that appear in `source`. Matching is
|
|
71
|
+
* case-insensitive and uses word boundaries so substrings inside larger tokens
|
|
72
|
+
* (cert/sello base64) don't produce false positives.
|
|
73
|
+
*/
|
|
74
|
+
function findInterAgenciaRfcs(source) {
|
|
75
|
+
const found = new Set();
|
|
76
|
+
for (const rfc of INTER_AGENCIA_RFCS) {
|
|
77
|
+
const re = new RegExp(`\\b${rfc}\\b`, 'i');
|
|
78
|
+
if (re.test(source)) found.add(rfc.toUpperCase());
|
|
79
|
+
}
|
|
80
|
+
return [...found];
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
export const facturaInterAgenciaDefinition = {
|
|
84
|
+
type: 'factura_inter_agencia',
|
|
85
|
+
extensions: ['xml', 'pdf'],
|
|
86
|
+
|
|
87
|
+
match: (source) => {
|
|
88
|
+
if (!isCfdiContent(source)) return false;
|
|
89
|
+
|
|
90
|
+
// Need ≥2 distinct configured RFCs present (one as emisor, one as receptor)
|
|
91
|
+
const rfcsFound = findInterAgenciaRfcs(source);
|
|
92
|
+
if (rfcsFound.length < 2) return false;
|
|
93
|
+
|
|
94
|
+
// Confirm the invoice is for broker services (customs agent services)
|
|
95
|
+
if (!source.includes(BROKER_SERVICE_CLAVE_PROD_SERV)) return false;
|
|
96
|
+
|
|
97
|
+
return true;
|
|
98
|
+
},
|
|
99
|
+
|
|
100
|
+
// Pedimento extraction is optional / informational — these files are
|
|
101
|
+
// excluded from push, so arela_path is never composed. We still extract
|
|
102
|
+
// a pedimento number when present (from the "Referencias" / "Pedimento:"
|
|
103
|
+
// section of the printable CFDI) for auditability.
|
|
104
|
+
extractNumPedimento: (source, fields) => {
|
|
105
|
+
return fields?.find((f) => f.name === 'numPedimento')?.value ?? null;
|
|
106
|
+
},
|
|
107
|
+
|
|
108
|
+
extractPedimentoYear: (source, fields) => {
|
|
109
|
+
const numPedimento = fields?.find((f) => f.name === 'numPedimento')?.value;
|
|
110
|
+
if (numPedimento && numPedimento.length >= 2) {
|
|
111
|
+
const yy = parseInt(numPedimento.substring(0, 2), 10);
|
|
112
|
+
if (!isNaN(yy)) return yy < 50 ? yy + 2000 : yy + 1900;
|
|
113
|
+
}
|
|
114
|
+
return null;
|
|
115
|
+
},
|
|
116
|
+
|
|
117
|
+
extractors: [
|
|
118
|
+
{
|
|
119
|
+
field: 'rfcEmisor',
|
|
120
|
+
extract: (source) => {
|
|
121
|
+
// XML form: <cfdi:Emisor Rfc="..." />
|
|
122
|
+
const xmlMatch = source.match(
|
|
123
|
+
/<[^>]*Emisor[^>]*Rfc\s*=\s*["']([A-ZÑ&]{3,4}\d{6}[A-Z0-9]{3})["']/i,
|
|
124
|
+
);
|
|
125
|
+
if (xmlMatch) return fieldResult('rfcEmisor', true, xmlMatch[1]);
|
|
126
|
+
|
|
127
|
+
// PDF form: "Emisor" section followed by RFC label/value on later lines.
|
|
128
|
+
// We pick the first INTER_AGENCIA RFC that appears in the document.
|
|
129
|
+
const rfcs = findInterAgenciaRfcs(source);
|
|
130
|
+
if (rfcs.length > 0) return fieldResult('rfcEmisor', true, rfcs[0]);
|
|
131
|
+
|
|
132
|
+
return fieldResult('rfcEmisor', false, null);
|
|
133
|
+
},
|
|
134
|
+
},
|
|
135
|
+
{
|
|
136
|
+
field: 'rfcReceptor',
|
|
137
|
+
extract: (source) => {
|
|
138
|
+
const xmlMatch = source.match(
|
|
139
|
+
/<[^>]*Receptor[^>]*Rfc\s*=\s*["']([A-ZÑ&]{3,4}\d{6}[A-Z0-9]{3})["']/i,
|
|
140
|
+
);
|
|
141
|
+
if (xmlMatch) return fieldResult('rfcReceptor', true, xmlMatch[1]);
|
|
142
|
+
|
|
143
|
+
const rfcs = findInterAgenciaRfcs(source);
|
|
144
|
+
if (rfcs.length >= 2) {
|
|
145
|
+
return fieldResult('rfcReceptor', true, rfcs[1]);
|
|
146
|
+
}
|
|
147
|
+
return fieldResult('rfcReceptor', false, null);
|
|
148
|
+
},
|
|
149
|
+
},
|
|
150
|
+
{
|
|
151
|
+
field: 'folio',
|
|
152
|
+
extract: (source) => {
|
|
153
|
+
// CFDI Folio attribute
|
|
154
|
+
const xmlMatch = source.match(/\bFolio\s*=\s*["']([A-Z0-9-]+)["']/i);
|
|
155
|
+
if (xmlMatch) return fieldResult('folio', true, xmlMatch[1]);
|
|
156
|
+
|
|
157
|
+
// PDF: "Numero Folio 012749"
|
|
158
|
+
const pdfMatch = source.match(/Numero\s+Folio\s+([A-Z0-9-]+)/i);
|
|
159
|
+
if (pdfMatch) return fieldResult('folio', true, pdfMatch[1]);
|
|
160
|
+
|
|
161
|
+
return fieldResult('folio', false, null);
|
|
162
|
+
},
|
|
163
|
+
},
|
|
164
|
+
{
|
|
165
|
+
field: 'uuid',
|
|
166
|
+
extract: (source) => {
|
|
167
|
+
const uuidRe =
|
|
168
|
+
/[0-9A-F]{8}-[0-9A-F]{4}-[0-9A-F]{4}-[0-9A-F]{4}-[0-9A-F]{12}/i;
|
|
169
|
+
const m = source.match(uuidRe);
|
|
170
|
+
return fieldResult('uuid', !!m, m ? m[0].toUpperCase() : null);
|
|
171
|
+
},
|
|
172
|
+
},
|
|
173
|
+
{
|
|
174
|
+
field: 'numPedimento',
|
|
175
|
+
extract: (source) => {
|
|
176
|
+
// Printable CFDI "Pedimento: 3458 6000046 Fecha: ..." — recovers an
|
|
177
|
+
// 11-digit pedimento (no YY prefix). Useful for auditability only.
|
|
178
|
+
const m = source.match(/Pedimento:?\s*(\d{4})\s*(\d{7})/i);
|
|
179
|
+
if (m) {
|
|
180
|
+
return fieldResult('numPedimento', true, `${m[1]}${m[2]}`);
|
|
181
|
+
}
|
|
182
|
+
return fieldResult('numPedimento', false, null);
|
|
183
|
+
},
|
|
184
|
+
},
|
|
185
|
+
],
|
|
186
|
+
};
|
|
@@ -1,10 +1,8 @@
|
|
|
1
1
|
// VUCEM "consultarPedimentoCompleto" XML matcher.
|
|
2
2
|
//
|
|
3
|
-
//
|
|
4
|
-
//
|
|
5
|
-
//
|
|
6
|
-
// counters) already includes `pedimento_completo_xml`, so re-enabling is a
|
|
7
|
-
// single-line change.
|
|
3
|
+
// Registered in `document-type-shared.js`. Downstream code
|
|
4
|
+
// (composeArelaPath, arela-api propagation SQL, IdentifyCommand counters)
|
|
5
|
+
// also includes `pedimento_completo_xml`.
|
|
8
6
|
//
|
|
9
7
|
// Filename patterns recognized (try in order — patente extraction):
|
|
10
8
|
// 1) VU_PATENTE_ADUANA_PEDIMENTO.xml → e.g. VU_3429_070_5016101.xml
|
|
@@ -47,6 +45,22 @@ function pad(value, length) {
|
|
|
47
45
|
return String(value).padStart(length, '0');
|
|
48
46
|
}
|
|
49
47
|
|
|
48
|
+
/**
|
|
49
|
+
* Convert a VUCEM `aduanaEntradaSalida.clave` (e.g. "70", "750", "40") to the
|
|
50
|
+
* 2-digit "sección aduanera" prefix used inside the 15-digit pedimento number.
|
|
51
|
+
*
|
|
52
|
+
* VUCEM strips leading zeros from the canonical 3-digit SAT aduana code,
|
|
53
|
+
* so `070` (Ciudad Juárez) arrives as `70`. The pedimento prefix is the
|
|
54
|
+
* first 2 digits of the 3-digit code:
|
|
55
|
+
* `70` → `070` → `07` (Cd. Juárez)
|
|
56
|
+
* `750` → `750` → `75` (Puebla)
|
|
57
|
+
* `40` → `040` → `04` (Lázaro Cárdenas)
|
|
58
|
+
*/
|
|
59
|
+
function aduanaToSeccion(claveValue) {
|
|
60
|
+
if (claveValue == null) return null;
|
|
61
|
+
return pad(claveValue, 3).substring(0, 2);
|
|
62
|
+
}
|
|
63
|
+
|
|
50
64
|
/**
|
|
51
65
|
* Try the three known filename patterns and return {patente, aduana, pedimento}
|
|
52
66
|
* with any subset of the fields populated. Returns null if no pattern matches.
|
|
@@ -102,12 +116,17 @@ function yyFromIsoDate(iso) {
|
|
|
102
116
|
return m ? m[1].substring(2, 4) : null;
|
|
103
117
|
}
|
|
104
118
|
|
|
105
|
-
// Find <ns2:fechas> block
|
|
106
|
-
|
|
119
|
+
// Find <ns2:fechas> block whose nested <clave> matches `claveValue` and
|
|
120
|
+
// return its <ns2:fecha>. Works for both shapes:
|
|
121
|
+
// <fechas><clave>N</clave><fecha>...</fecha></fechas>
|
|
122
|
+
// <fechas><fecha>...</fecha><tipo><clave>N</clave></tipo></fechas>
|
|
123
|
+
// (firstTag finds the FIRST <clave> in the block — both layouts expose only
|
|
124
|
+
// one clave per fechas entry.)
|
|
125
|
+
function findFechaByClave(source, claveValue) {
|
|
107
126
|
const fechasBlocks = allTagBlocks(source, 'fechas');
|
|
108
127
|
for (const block of fechasBlocks) {
|
|
109
128
|
const clave = firstTag(block, 'clave');
|
|
110
|
-
if (clave ===
|
|
129
|
+
if (clave === claveValue) {
|
|
111
130
|
const fecha = firstTag(block, 'fecha');
|
|
112
131
|
if (fecha) return fecha;
|
|
113
132
|
}
|
|
@@ -115,6 +134,18 @@ function findPaymentDate(source) {
|
|
|
115
134
|
return null;
|
|
116
135
|
}
|
|
117
136
|
|
|
137
|
+
// Fecha de pago de las contribuciones (tipo.clave == 2).
|
|
138
|
+
function findPaymentDate(source) {
|
|
139
|
+
return findFechaByClave(source, '2');
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
// Fecha de presentacion (tipo.clave == 5). This is the authoritative source
|
|
143
|
+
// for the pedimento's YY prefix — a pedimento opened in Dec-2025 but paid in
|
|
144
|
+
// Jan-2026 keeps the `25` prefix, matching what VUCEM stamps in the filename.
|
|
145
|
+
function findPresentationDate(source) {
|
|
146
|
+
return findFechaByClave(source, '5');
|
|
147
|
+
}
|
|
148
|
+
|
|
118
149
|
// --------------------------- extractors ------------------------------------
|
|
119
150
|
|
|
120
151
|
const rfcExtractor = {
|
|
@@ -152,7 +183,7 @@ const aduanaEntradaSalidaExtractor = {
|
|
|
152
183
|
return new FieldResult(
|
|
153
184
|
'aduanaEntradaSalida',
|
|
154
185
|
!!clave,
|
|
155
|
-
|
|
186
|
+
aduanaToSeccion(clave),
|
|
156
187
|
);
|
|
157
188
|
},
|
|
158
189
|
};
|
|
@@ -165,6 +196,14 @@ const paymentDateExtractor = {
|
|
|
165
196
|
},
|
|
166
197
|
};
|
|
167
198
|
|
|
199
|
+
const presentationDateExtractor = {
|
|
200
|
+
field: 'presentationDate',
|
|
201
|
+
extract: (source) => {
|
|
202
|
+
const fecha = findPresentationDate(source);
|
|
203
|
+
return new FieldResult('presentationDate', !!fecha, fecha);
|
|
204
|
+
},
|
|
205
|
+
};
|
|
206
|
+
|
|
168
207
|
const fechaPagoRectificacionExtractor = {
|
|
169
208
|
field: 'fechaPagoRectificacion',
|
|
170
209
|
extract: (source) => {
|
|
@@ -257,8 +296,14 @@ export const pedimentoCompletoXmlDefinition = {
|
|
|
257
296
|
|
|
258
297
|
/**
|
|
259
298
|
* Compose the 15-digit pedimento number from XML body + filename.
|
|
260
|
-
* YY:
|
|
261
|
-
*
|
|
299
|
+
* YY: priority order (most authoritative first):
|
|
300
|
+
* 1) Filename pattern 3 (`{15-digit}.xml`) — VUCEM stamps the correct
|
|
301
|
+
* prefix at export time.
|
|
302
|
+
* 2) Fecha de presentacion (<fechas><clave>5) — the year the pedimento
|
|
303
|
+
* was opened. Authoritative for the YY prefix even when payment
|
|
304
|
+
* crosses calendar year (e.g. opened Dec-2025, paid Jan-2026 → YY=25).
|
|
305
|
+
* 3) Rectification fechaPago (only when no presentation date exists).
|
|
306
|
+
* 4) Payment date (last-resort fallback).
|
|
262
307
|
* AA: from <aduanaEntradaSalida><clave> padded to 2.
|
|
263
308
|
* PPPP: from the filename (any of the three patterns).
|
|
264
309
|
* NNNNNNN: from <pedimento> padded to 7.
|
|
@@ -267,15 +312,19 @@ export const pedimentoCompletoXmlDefinition = {
|
|
|
267
312
|
extractNumPedimento: (source, fields, filePath) => {
|
|
268
313
|
const parts = parseFilenameParts(filePath);
|
|
269
314
|
|
|
315
|
+
const presentation = fields?.find(
|
|
316
|
+
(f) => f.name === 'presentationDate' && f.found,
|
|
317
|
+
)?.value;
|
|
270
318
|
const rect = fields?.find(
|
|
271
319
|
(f) => f.name === 'fechaPagoRectificacion' && f.found,
|
|
272
320
|
)?.value;
|
|
273
321
|
const pay = fields?.find((f) => f.name === 'paymentDate' && f.found)?.value;
|
|
274
322
|
|
|
275
323
|
let yy =
|
|
324
|
+
(parts && parts.year) ||
|
|
325
|
+
yyFromIsoDate(presentation) ||
|
|
276
326
|
yyFromIsoDate(rect) ||
|
|
277
327
|
yyFromIsoDate(pay) ||
|
|
278
|
-
(parts && parts.year) ||
|
|
279
328
|
null;
|
|
280
329
|
|
|
281
330
|
const aduanaField = fields?.find(
|
|
@@ -315,6 +364,7 @@ export const pedimentoCompletoXmlDefinition = {
|
|
|
315
364
|
tipoOperacionExtractor,
|
|
316
365
|
aduanaEntradaSalidaExtractor,
|
|
317
366
|
paymentDateExtractor,
|
|
367
|
+
presentationDateExtractor,
|
|
318
368
|
fechaPagoRectificacionExtractor,
|
|
319
369
|
coveExtractor,
|
|
320
370
|
numEDocumentoExtractor,
|
|
@@ -17,17 +17,19 @@ export const pedimentoCompletoDefinition = {
|
|
|
17
17
|
type: 'pedimento_completo',
|
|
18
18
|
extensions: ['pdf'],
|
|
19
19
|
match: (source) => {
|
|
20
|
-
// Hard exclude: "FORMA SIMPLIFICADA" is handled by
|
|
21
|
-
|
|
20
|
+
// Hard exclude: "FORMA SIMPLIFICADA [DE|DEL] PEDIMENTO" is handled by
|
|
21
|
+
// pedimento_simplificado.
|
|
22
|
+
if (/FORMA\s+SIMPLIFICADA\s+DEL?\s+PEDIMENTO/i.test(source)) return false;
|
|
22
23
|
|
|
23
24
|
// Hard exclude: "AVISO CONSOLIDADO" shares the header trio but is a
|
|
24
25
|
// different document type handled by aviso_consolidado.
|
|
25
26
|
if (/AVISO\s+CONSOLIDADO/i.test(source)) return false;
|
|
26
27
|
|
|
28
|
+
// The colon after "T. OPER" is optional — see note in pedimento-simplificado.js.
|
|
27
29
|
const hasHeaderFields =
|
|
28
30
|
/NUM\.?\s*PEDIMENTO:/i.test(source) &&
|
|
29
31
|
/CVE\.?\s*PEDIMENTO:/i.test(source) &&
|
|
30
|
-
/T\.?\s*OPER
|
|
32
|
+
/T\.?\s*OPER:?/i.test(source);
|
|
31
33
|
if (hasHeaderFields) {
|
|
32
34
|
const hasCopyMarker =
|
|
33
35
|
/ORIGINAL:\s*ADMINISTRACION GENERAL DE ADUANAS/i.test(source) ||
|
|
@@ -12,15 +12,18 @@ export const pedimentoSimplificadoDefinition = {
|
|
|
12
12
|
if (/AVISO\s+CONSOLIDADO/i.test(source)) return false;
|
|
13
13
|
|
|
14
14
|
// Fast path: the literal title appears on standard SIMP layouts.
|
|
15
|
-
|
|
15
|
+
// Some prevalidators print "FORMA SIMPLIFICADA DEL PEDIMENTO" (with DEL).
|
|
16
|
+
if (/FORMA\s+SIMPLIFICADA\s+DEL?\s+PEDIMENTO/i.test(source)) return true;
|
|
16
17
|
|
|
17
18
|
// Some PDFs (single-page anchors) lack that title but still carry the
|
|
18
19
|
// three pedimento header fields. Treat them as simplificado UNLESS they
|
|
19
20
|
// have the multi-page copy markers that uniquely identify a completo.
|
|
21
|
+
// NOTE: the colon after "T. OPER" is optional — many printable layouts
|
|
22
|
+
// render OPER as a table-header label with the value in the next cell.
|
|
20
23
|
const hasHeaderFields =
|
|
21
24
|
/NUM\.?\s*PEDIMENTO:/i.test(source) &&
|
|
22
25
|
/CVE\.?\s*PEDIMENTO:/i.test(source) &&
|
|
23
|
-
/T\.?\s*OPER
|
|
26
|
+
/T\.?\s*OPER:?/i.test(source);
|
|
24
27
|
if (!hasHeaderFields) return false;
|
|
25
28
|
|
|
26
29
|
const hasCompletoCopyMarker =
|
|
@@ -16,9 +16,9 @@ export const proformaDefinition = {
|
|
|
16
16
|
type: 'proforma',
|
|
17
17
|
extensions: ['pdf'],
|
|
18
18
|
|
|
19
|
-
// Same content marker as pedimento simplificado
|
|
19
|
+
// Same content marker as pedimento simplificado (accepts "DE" or "DEL").
|
|
20
20
|
match: (source) => {
|
|
21
|
-
return /FORMA
|
|
21
|
+
return /FORMA\s+SIMPLIFICADA\s+DEL?\s+PEDIMENTO/i.test(source);
|
|
22
22
|
},
|
|
23
23
|
|
|
24
24
|
extractNumPedimento: pedimentoSimplificadoDefinition.extractNumPedimento,
|
package/src/file-detection.js
CHANGED
|
@@ -4,9 +4,7 @@ import { PDFParse } from 'pdf-parse';
|
|
|
4
4
|
|
|
5
5
|
import { extractDocumentFields } from './document-type-shared.js';
|
|
6
6
|
|
|
7
|
-
// Document types that participate in arela_path composition.
|
|
8
|
-
// kept here even though its matcher is currently disabled — once re-enabled
|
|
9
|
-
// in document-type-shared.js no further changes are needed here.
|
|
7
|
+
// Document types that participate in arela_path composition.
|
|
10
8
|
const ARELA_PATH_TYPES = new Set([
|
|
11
9
|
'pedimento_simplificado',
|
|
12
10
|
'pedimento_completo',
|
|
@@ -0,0 +1,218 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Unit tests for the factura_inter_agencia matcher.
|
|
3
|
+
*
|
|
4
|
+
* Verifies that NORCOM↔PALCO CFDIs (XML and printable PDF text) are
|
|
5
|
+
* detected as `factura_inter_agencia`, and that ordinary CFDIs are NOT
|
|
6
|
+
* mis-classified.
|
|
7
|
+
*/
|
|
8
|
+
import { describe, it, expect } from '@jest/globals';
|
|
9
|
+
|
|
10
|
+
import {
|
|
11
|
+
facturaInterAgenciaDefinition,
|
|
12
|
+
INTER_AGENCIA_RFCS,
|
|
13
|
+
} from '../../src/document-types/factura-inter-agencia.js';
|
|
14
|
+
import { extractDocumentFields } from '../../src/document-type-shared.js';
|
|
15
|
+
|
|
16
|
+
const NORCOM_RFC = 'NAA120215F20';
|
|
17
|
+
const PALCO_RFC = 'PCC1008161WA';
|
|
18
|
+
|
|
19
|
+
// Realistic CFDI 4.0 XML between NORCOM (emisor) and PALCO (receptor).
|
|
20
|
+
// Conceptos use ClaveProdServ 78141502 (servicios de agentes aduaneros).
|
|
21
|
+
const CFDI_XML_INTER_AGENCIA = `<?xml version="1.0" encoding="utf-8"?>
|
|
22
|
+
<cfdi:Comprobante xmlns:cfdi="http://www.sat.gob.mx/cfd/4" Version="4.0" Folio="012749"
|
|
23
|
+
TipoDeComprobante="I" SubTotal="3000.00" Total="3480.00" Moneda="MXN">
|
|
24
|
+
<cfdi:Emisor Rfc="${NORCOM_RFC}" Nombre="NORCOM AGENTES ADUANALES" RegimenFiscal="601"/>
|
|
25
|
+
<cfdi:Receptor Rfc="${PALCO_RFC}" Nombre="PALCO, CONSORCIO DE COMERCIO INTERNACIONAL"
|
|
26
|
+
DomicilioFiscalReceptor="32380" RegimenFiscalReceptor="601" UsoCFDI="G03"/>
|
|
27
|
+
<cfdi:Conceptos>
|
|
28
|
+
<cfdi:Concepto ClaveProdServ="78141502" NoIdentificacion="HONO" Cantidad="1.00"
|
|
29
|
+
ClaveUnidad="E48" Unidad="Unidad de servicio" Descripcion="HONORARIOS"
|
|
30
|
+
ValorUnitario="1300.00" Importe="1300.00" ObjetoImp="02"/>
|
|
31
|
+
<cfdi:Concepto ClaveProdServ="78141502" NoIdentificacion="VALID" Cantidad="1.00"
|
|
32
|
+
ClaveUnidad="E48" Unidad="Unidad de servicio" Descripcion="VALIDACION"
|
|
33
|
+
ValorUnitario="200.00" Importe="200.00" ObjetoImp="02"/>
|
|
34
|
+
</cfdi:Conceptos>
|
|
35
|
+
</cfdi:Comprobante>`;
|
|
36
|
+
|
|
37
|
+
// Same agencies but conceptos do NOT use 78141502 — should NOT match.
|
|
38
|
+
const CFDI_XML_INTER_AGENCIA_WRONG_CONCEPT = CFDI_XML_INTER_AGENCIA.replace(
|
|
39
|
+
/78141502/g,
|
|
40
|
+
'90121502',
|
|
41
|
+
);
|
|
42
|
+
|
|
43
|
+
// CFDI between unrelated taxpayers — should NOT match.
|
|
44
|
+
const CFDI_XML_REGULAR = `<?xml version="1.0" encoding="utf-8"?>
|
|
45
|
+
<cfdi:Comprobante xmlns:cfdi="http://www.sat.gob.mx/cfd/4" Version="4.0" Folio="000123"
|
|
46
|
+
TipoDeComprobante="I" SubTotal="100.00" Total="116.00">
|
|
47
|
+
<cfdi:Emisor Rfc="ACME010101AB1" Nombre="ACME COMERCIAL" RegimenFiscal="601"/>
|
|
48
|
+
<cfdi:Receptor Rfc="XYZ020202CD2" Nombre="CLIENTE FINAL"
|
|
49
|
+
DomicilioFiscalReceptor="00000" RegimenFiscalReceptor="601" UsoCFDI="G03"/>
|
|
50
|
+
<cfdi:Conceptos>
|
|
51
|
+
<cfdi:Concepto ClaveProdServ="78141502" NoIdentificacion="ITEM" Cantidad="1.00"
|
|
52
|
+
ClaveUnidad="E48" Unidad="Unidad de servicio" Descripcion="SERVICIO"
|
|
53
|
+
ValorUnitario="100.00" Importe="100.00" ObjetoImp="02"/>
|
|
54
|
+
</cfdi:Conceptos>
|
|
55
|
+
</cfdi:Comprobante>`;
|
|
56
|
+
|
|
57
|
+
// Text extracted from the printable PDF representation of a CFDI inter-agencia.
|
|
58
|
+
// Mirrors what pdf-parse returns for the sample SICINGR70-012749(...).pdf.
|
|
59
|
+
const CFDI_PDF_TEXT_INTER_AGENCIA = `NORCOM AGENTES ADUANALES S.C
|
|
60
|
+
Tipo de Comprobante: (I) Ingreso
|
|
61
|
+
Folio Fiscal 84FC9CE2-00D5-4843-B377-B463321F9FC6
|
|
62
|
+
Numero Folio 012749
|
|
63
|
+
Emisor
|
|
64
|
+
RFC ${NORCOM_RFC}
|
|
65
|
+
Razon Social NORCOM AGENTES ADUANALES
|
|
66
|
+
Receptor
|
|
67
|
+
RFC ${PALCO_RFC}
|
|
68
|
+
Razon Social PALCO, CONSORCIO DE COMERCIO INTERNACIONAL
|
|
69
|
+
Pedimento: 3458 6000046 Fecha: 17/02/2026 Tipo: EXP Clave: A1
|
|
70
|
+
Erogaciones
|
|
71
|
+
78141502 HONO HONORARIOS 1,300.00
|
|
72
|
+
78141502 SERCOM SERVICIOS COMPLEMENTARIOS 1,500.00
|
|
73
|
+
78141502 VALID VALIDACION 200.00
|
|
74
|
+
Sello Digital del CFDI
|
|
75
|
+
c4oBJ8/zAol0zg1jVe4MK8...
|
|
76
|
+
Cadena Original del Complemento de Certificacion Digital del SAT
|
|
77
|
+
||4.0|012749|...
|
|
78
|
+
Este documento es una representación impresa de un CFDI`;
|
|
79
|
+
|
|
80
|
+
describe('factura_inter_agencia matcher', () => {
|
|
81
|
+
describe('configured RFC set', () => {
|
|
82
|
+
it('includes NORCOM and PALCO RFCs', () => {
|
|
83
|
+
expect(INTER_AGENCIA_RFCS).toContain(NORCOM_RFC);
|
|
84
|
+
expect(INTER_AGENCIA_RFCS).toContain(PALCO_RFC);
|
|
85
|
+
});
|
|
86
|
+
});
|
|
87
|
+
|
|
88
|
+
describe('match()', () => {
|
|
89
|
+
it('matches a NORCOM→PALCO XML CFDI with broker-service conceptos', () => {
|
|
90
|
+
expect(facturaInterAgenciaDefinition.match(CFDI_XML_INTER_AGENCIA)).toBe(
|
|
91
|
+
true,
|
|
92
|
+
);
|
|
93
|
+
});
|
|
94
|
+
|
|
95
|
+
it('matches the PDF-text representation of the same CFDI', () => {
|
|
96
|
+
expect(
|
|
97
|
+
facturaInterAgenciaDefinition.match(CFDI_PDF_TEXT_INTER_AGENCIA),
|
|
98
|
+
).toBe(true);
|
|
99
|
+
});
|
|
100
|
+
|
|
101
|
+
it('does NOT match when ClaveProdServ is not 78141502', () => {
|
|
102
|
+
expect(
|
|
103
|
+
facturaInterAgenciaDefinition.match(
|
|
104
|
+
CFDI_XML_INTER_AGENCIA_WRONG_CONCEPT,
|
|
105
|
+
),
|
|
106
|
+
).toBe(false);
|
|
107
|
+
});
|
|
108
|
+
|
|
109
|
+
it('does NOT match a CFDI between unrelated taxpayers', () => {
|
|
110
|
+
expect(facturaInterAgenciaDefinition.match(CFDI_XML_REGULAR)).toBe(false);
|
|
111
|
+
});
|
|
112
|
+
|
|
113
|
+
it('does NOT match arbitrary non-CFDI text containing the RFCs', () => {
|
|
114
|
+
const text = `Reporte interno
|
|
115
|
+
RFC emisor: ${NORCOM_RFC}
|
|
116
|
+
RFC cliente: ${PALCO_RFC}
|
|
117
|
+
Clave 78141502`;
|
|
118
|
+
// No CFDI markers → should not match.
|
|
119
|
+
expect(facturaInterAgenciaDefinition.match(text)).toBe(false);
|
|
120
|
+
});
|
|
121
|
+
|
|
122
|
+
it('does NOT match if only one of the configured RFCs is present', () => {
|
|
123
|
+
const text = CFDI_XML_INTER_AGENCIA.replace(PALCO_RFC, 'XYZ020202CD2');
|
|
124
|
+
expect(facturaInterAgenciaDefinition.match(text)).toBe(false);
|
|
125
|
+
});
|
|
126
|
+
});
|
|
127
|
+
|
|
128
|
+
describe('extractors', () => {
|
|
129
|
+
it('extracts emisor + receptor RFCs from XML', () => {
|
|
130
|
+
const rfcEmisor = facturaInterAgenciaDefinition.extractors
|
|
131
|
+
.find((e) => e.field === 'rfcEmisor')
|
|
132
|
+
.extract(CFDI_XML_INTER_AGENCIA);
|
|
133
|
+
const rfcReceptor = facturaInterAgenciaDefinition.extractors
|
|
134
|
+
.find((e) => e.field === 'rfcReceptor')
|
|
135
|
+
.extract(CFDI_XML_INTER_AGENCIA);
|
|
136
|
+
|
|
137
|
+
expect(rfcEmisor.found).toBe(true);
|
|
138
|
+
expect(rfcEmisor.value).toBe(NORCOM_RFC);
|
|
139
|
+
expect(rfcReceptor.found).toBe(true);
|
|
140
|
+
expect(rfcReceptor.value).toBe(PALCO_RFC);
|
|
141
|
+
});
|
|
142
|
+
|
|
143
|
+
it('extracts both RFCs from PDF text via fallback', () => {
|
|
144
|
+
const rfcEmisor = facturaInterAgenciaDefinition.extractors
|
|
145
|
+
.find((e) => e.field === 'rfcEmisor')
|
|
146
|
+
.extract(CFDI_PDF_TEXT_INTER_AGENCIA);
|
|
147
|
+
const rfcReceptor = facturaInterAgenciaDefinition.extractors
|
|
148
|
+
.find((e) => e.field === 'rfcReceptor')
|
|
149
|
+
.extract(CFDI_PDF_TEXT_INTER_AGENCIA);
|
|
150
|
+
|
|
151
|
+
expect(rfcEmisor.found).toBe(true);
|
|
152
|
+
expect(rfcReceptor.found).toBe(true);
|
|
153
|
+
// Order is the order of first appearance in the document.
|
|
154
|
+
const found = [rfcEmisor.value, rfcReceptor.value].sort();
|
|
155
|
+
expect(found).toEqual([NORCOM_RFC, PALCO_RFC].sort());
|
|
156
|
+
});
|
|
157
|
+
|
|
158
|
+
it('extracts the UUID (folio fiscal) from both formats', () => {
|
|
159
|
+
const uuidExtractor = facturaInterAgenciaDefinition.extractors.find(
|
|
160
|
+
(e) => e.field === 'uuid',
|
|
161
|
+
);
|
|
162
|
+
|
|
163
|
+
const fromXml = uuidExtractor.extract(CFDI_XML_INTER_AGENCIA);
|
|
164
|
+
// XML sample has no UUID inside the comprobante body — that's fine.
|
|
165
|
+
expect(fromXml.found).toBe(false);
|
|
166
|
+
|
|
167
|
+
const fromPdf = uuidExtractor.extract(CFDI_PDF_TEXT_INTER_AGENCIA);
|
|
168
|
+
expect(fromPdf.found).toBe(true);
|
|
169
|
+
expect(fromPdf.value).toBe('84FC9CE2-00D5-4843-B377-B463321F9FC6');
|
|
170
|
+
});
|
|
171
|
+
|
|
172
|
+
it('extracts numPedimento from the printable PDF "Pedimento:" line', () => {
|
|
173
|
+
const numExtractor = facturaInterAgenciaDefinition.extractors.find(
|
|
174
|
+
(e) => e.field === 'numPedimento',
|
|
175
|
+
);
|
|
176
|
+
const result = numExtractor.extract(CFDI_PDF_TEXT_INTER_AGENCIA);
|
|
177
|
+
expect(result.found).toBe(true);
|
|
178
|
+
expect(result.value).toBe('34586000046');
|
|
179
|
+
});
|
|
180
|
+
|
|
181
|
+
it('extracts the CFDI folio from XML attribute', () => {
|
|
182
|
+
const folio = facturaInterAgenciaDefinition.extractors
|
|
183
|
+
.find((e) => e.field === 'folio')
|
|
184
|
+
.extract(CFDI_XML_INTER_AGENCIA);
|
|
185
|
+
expect(folio.found).toBe(true);
|
|
186
|
+
expect(folio.value).toBe('012749');
|
|
187
|
+
});
|
|
188
|
+
});
|
|
189
|
+
|
|
190
|
+
describe('registry order (factura_inter_agencia precedes facturas_comerciales)', () => {
|
|
191
|
+
it('resolves the inter-agency CFDI XML to factura_inter_agencia, not factura_comercial', () => {
|
|
192
|
+
const [detectedType] = extractDocumentFields(
|
|
193
|
+
CFDI_XML_INTER_AGENCIA,
|
|
194
|
+
'xml',
|
|
195
|
+
'/tmp/SICINGR70-012749(PALCO).XML',
|
|
196
|
+
);
|
|
197
|
+
expect(detectedType).toBe('factura_inter_agencia');
|
|
198
|
+
});
|
|
199
|
+
|
|
200
|
+
it('resolves the inter-agency CFDI PDF text to factura_inter_agencia', () => {
|
|
201
|
+
const [detectedType] = extractDocumentFields(
|
|
202
|
+
CFDI_PDF_TEXT_INTER_AGENCIA,
|
|
203
|
+
'pdf',
|
|
204
|
+
'/tmp/SICINGR70-012749(PALCO).pdf',
|
|
205
|
+
);
|
|
206
|
+
expect(detectedType).toBe('factura_inter_agencia');
|
|
207
|
+
});
|
|
208
|
+
|
|
209
|
+
it('falls through to factura_comercial for a regular CFDI', () => {
|
|
210
|
+
const [detectedType] = extractDocumentFields(
|
|
211
|
+
CFDI_XML_REGULAR,
|
|
212
|
+
'xml',
|
|
213
|
+
'/tmp/regular-invoice.xml',
|
|
214
|
+
);
|
|
215
|
+
expect(detectedType).toBe('factura_comercial');
|
|
216
|
+
});
|
|
217
|
+
});
|
|
218
|
+
});
|
|
@@ -0,0 +1,271 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Regression tests for the pedimento_completo_xml matcher.
|
|
3
|
+
*
|
|
4
|
+
* Covers:
|
|
5
|
+
* 1) Basic detection + arela_path composition from a VUCEM
|
|
6
|
+
* `consultarPedimentoCompletoRespuesta` XML.
|
|
7
|
+
* 2) **YY truth source** — when the pedimento is opened in one year and
|
|
8
|
+
* paid in the next (e.g. presentation 2025-12, payment 2026-01), the
|
|
9
|
+
* 15-digit pedimento MUST keep the presentation year (`25...`), not the
|
|
10
|
+
* payment year (`26...`). This matches what VUCEM stamps in the
|
|
11
|
+
* filename and what the PDF matchers produce.
|
|
12
|
+
* 3) **Aduana padding** — VUCEM returns the aduana code without leading
|
|
13
|
+
* zeros (e.g. `70` for Ciudad Juárez instead of the canonical `070`).
|
|
14
|
+
* The 2-digit "sección aduanera" prefix used inside the 15-digit
|
|
15
|
+
* pedimento is the first 2 digits of the 3-digit form (`70` → `07`).
|
|
16
|
+
* 4) **numPedimento backfill** — the XML matcher composes numPedimento
|
|
17
|
+
* externally via `extractNumPedimento` rather than as a field
|
|
18
|
+
* extractor. `extractDocumentFields` must backfill it so that
|
|
19
|
+
* `composeArelaPath` can find it.
|
|
20
|
+
* 5) Resolution to `proforma_completo_xml` when no payment evidence
|
|
21
|
+
* exists in the body.
|
|
22
|
+
*/
|
|
23
|
+
import { describe, it, expect } from '@jest/globals';
|
|
24
|
+
|
|
25
|
+
import { extractDocumentFields } from '../../src/document-type-shared.js';
|
|
26
|
+
import { composeArelaPath } from '../../src/file-detection.js';
|
|
27
|
+
|
|
28
|
+
// ---------------------------------------------------------------------------
|
|
29
|
+
// Test fixtures
|
|
30
|
+
// ---------------------------------------------------------------------------
|
|
31
|
+
|
|
32
|
+
/**
|
|
33
|
+
* Build a minimal VUCEM consultarPedimentoCompletoRespuesta XML.
|
|
34
|
+
* Only the tags the matcher actually reads are included.
|
|
35
|
+
*/
|
|
36
|
+
function buildXml({
|
|
37
|
+
rfc = 'CEM090106MU3',
|
|
38
|
+
pedimento = '5063036',
|
|
39
|
+
claveDocumento = 'V1',
|
|
40
|
+
tipoOperacionDesc = 'Exportacion',
|
|
41
|
+
aduanaClave = '70',
|
|
42
|
+
presentationDate = '2025-12-01-06:00',
|
|
43
|
+
paymentDate = '2026-01-07-06:00',
|
|
44
|
+
rectFechaPago = null,
|
|
45
|
+
facturas = ['V1-FUJIKURA MEX-202512'],
|
|
46
|
+
edDocs = [],
|
|
47
|
+
} = {}) {
|
|
48
|
+
const fechas = [];
|
|
49
|
+
if (presentationDate) {
|
|
50
|
+
fechas.push(
|
|
51
|
+
`<ns2:fechas><ns2:fecha>${presentationDate}</ns2:fecha><ns2:tipo><ns2:clave>5</ns2:clave><ns2:descripcion>FECHA DE PRESENTACION</ns2:descripcion></ns2:tipo></ns2:fechas>`,
|
|
52
|
+
);
|
|
53
|
+
}
|
|
54
|
+
if (paymentDate) {
|
|
55
|
+
fechas.push(
|
|
56
|
+
`<ns2:fechas><ns2:fecha>${paymentDate}</ns2:fecha><ns2:tipo><ns2:clave>2</ns2:clave><ns2:descripcion>FECHA DE PAGO</ns2:descripcion></ns2:tipo></ns2:fechas>`,
|
|
57
|
+
);
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
const rect = rectFechaPago
|
|
61
|
+
? `<ns2:rectificacion><ns2:fechaPago>${rectFechaPago}</ns2:fechaPago></ns2:rectificacion>`
|
|
62
|
+
: '';
|
|
63
|
+
|
|
64
|
+
const facturasXml = facturas
|
|
65
|
+
.map(
|
|
66
|
+
(num) =>
|
|
67
|
+
`<ns2:facturas><ns2:numero>${num}</ns2:numero></ns2:facturas>`,
|
|
68
|
+
)
|
|
69
|
+
.join('');
|
|
70
|
+
|
|
71
|
+
const identificadoresXml =
|
|
72
|
+
edDocs.length === 0
|
|
73
|
+
? ''
|
|
74
|
+
: `<ns2:identificadores>${edDocs
|
|
75
|
+
.map(
|
|
76
|
+
(code) =>
|
|
77
|
+
`<ns2:identificadores><claveIdentificador><clave>ED</clave></claveIdentificador><complemento1>${code}</complemento1></ns2:identificadores>`,
|
|
78
|
+
)
|
|
79
|
+
.join('')}</ns2:identificadores>`;
|
|
80
|
+
|
|
81
|
+
return `<?xml version="1.0" encoding="UTF-8"?>
|
|
82
|
+
<S:Envelope xmlns:S="http://schemas.xmlsoap.org/soap/envelope/">
|
|
83
|
+
<S:Body>
|
|
84
|
+
<ns2:consultarPedimentoCompletoRespuesta xmlns:ns2="http://x">
|
|
85
|
+
<ns2:pedimento>
|
|
86
|
+
<ns2:pedimento>${pedimento}</ns2:pedimento>
|
|
87
|
+
<ns2:encabezado>
|
|
88
|
+
<ns2:claveDocumento><ns2:clave>${claveDocumento}</ns2:clave></ns2:claveDocumento>
|
|
89
|
+
<ns2:tipoOperacion><ns2:clave>2</ns2:clave><ns2:descripcion>${tipoOperacionDesc}</ns2:descripcion></ns2:tipoOperacion>
|
|
90
|
+
<ns2:aduanaEntradaSalida><ns2:clave>${aduanaClave}</ns2:clave></ns2:aduanaEntradaSalida>
|
|
91
|
+
</ns2:encabezado>
|
|
92
|
+
<ns2:importadorExportador>
|
|
93
|
+
<ns2:rfc>${rfc}</ns2:rfc>
|
|
94
|
+
${fechas.join('\n')}
|
|
95
|
+
</ns2:importadorExportador>
|
|
96
|
+
${rect}
|
|
97
|
+
${facturasXml}
|
|
98
|
+
${identificadoresXml}
|
|
99
|
+
</ns2:pedimento>
|
|
100
|
+
</ns2:consultarPedimentoCompletoRespuesta>
|
|
101
|
+
</S:Body>
|
|
102
|
+
</S:Envelope>`;
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
// ---------------------------------------------------------------------------
|
|
106
|
+
// Tests
|
|
107
|
+
// ---------------------------------------------------------------------------
|
|
108
|
+
|
|
109
|
+
describe('pedimento_completo_xml matcher', () => {
|
|
110
|
+
it('detects, extracts, and composes arela_path for a basic export pedimento', () => {
|
|
111
|
+
const xml = buildXml({
|
|
112
|
+
rfc: 'CEM090106MU3',
|
|
113
|
+
pedimento: '5063036',
|
|
114
|
+
aduanaClave: '70',
|
|
115
|
+
presentationDate: '2025-06-15-06:00',
|
|
116
|
+
paymentDate: '2025-06-20-06:00',
|
|
117
|
+
});
|
|
118
|
+
// 15-digit filename pattern: YY=25 AA=07 PPPP=3429 NNNNNNN=5063036
|
|
119
|
+
const filePath = '/x/2025/250734295063036_250734295063036.xml';
|
|
120
|
+
|
|
121
|
+
const [type, fields, ped, year] = extractDocumentFields(
|
|
122
|
+
xml,
|
|
123
|
+
'xml',
|
|
124
|
+
filePath,
|
|
125
|
+
);
|
|
126
|
+
|
|
127
|
+
expect(type).toBe('pedimento_completo_xml');
|
|
128
|
+
expect(ped).toBe('250734295063036');
|
|
129
|
+
expect(year).toBe(2025);
|
|
130
|
+
expect(fields.find((f) => f.name === 'rfc')?.value).toBe('CEM090106MU3');
|
|
131
|
+
expect(fields.find((f) => f.name === 'aduanaEntradaSalida')?.value).toBe(
|
|
132
|
+
'07',
|
|
133
|
+
);
|
|
134
|
+
// Backfill check: numPedimento must be exposed as a field so
|
|
135
|
+
// composeArelaPath can find it.
|
|
136
|
+
expect(fields.find((f) => f.name === 'numPedimento')?.value).toBe(
|
|
137
|
+
'250734295063036',
|
|
138
|
+
);
|
|
139
|
+
|
|
140
|
+
const arela = composeArelaPath(type, fields, year, filePath);
|
|
141
|
+
expect(arela).toBe('CEM090106MU3/2025/3429/07/250734295063036/');
|
|
142
|
+
});
|
|
143
|
+
|
|
144
|
+
it('uses presentation date (not payment date) for YY when payment crosses calendar year', () => {
|
|
145
|
+
// Pedimento opened Dec 2025, paid Jan 2026 — the YY must be 25.
|
|
146
|
+
const xml = buildXml({
|
|
147
|
+
pedimento: '5063036',
|
|
148
|
+
aduanaClave: '70',
|
|
149
|
+
presentationDate: '2025-12-01-06:00',
|
|
150
|
+
paymentDate: '2026-01-07-06:00',
|
|
151
|
+
});
|
|
152
|
+
// Use the 3-part filename pattern (no YY in filename) so YY comes from XML body.
|
|
153
|
+
const filePath = '/x/070-3429-5063036.xml';
|
|
154
|
+
|
|
155
|
+
const [type, , ped, year] = extractDocumentFields(xml, 'xml', filePath);
|
|
156
|
+
|
|
157
|
+
expect(type).toBe('pedimento_completo_xml');
|
|
158
|
+
expect(ped).toBe('250734295063036');
|
|
159
|
+
expect(year).toBe(2025);
|
|
160
|
+
});
|
|
161
|
+
|
|
162
|
+
it('falls back to payment date YY when presentation date is missing', () => {
|
|
163
|
+
const xml = buildXml({
|
|
164
|
+
pedimento: '5063036',
|
|
165
|
+
aduanaClave: '70',
|
|
166
|
+
presentationDate: null, // No clave=5 block
|
|
167
|
+
paymentDate: '2026-01-07-06:00',
|
|
168
|
+
});
|
|
169
|
+
const filePath = '/x/070-3429-5063036.xml';
|
|
170
|
+
|
|
171
|
+
const [, , ped, year] = extractDocumentFields(xml, 'xml', filePath);
|
|
172
|
+
|
|
173
|
+
expect(ped).toBe('260734295063036');
|
|
174
|
+
expect(year).toBe(2026);
|
|
175
|
+
});
|
|
176
|
+
|
|
177
|
+
it('prefers filename YY over body fechas (VUCEM-stamped truth)', () => {
|
|
178
|
+
// Filename says YY=24 but body has presentation=2025. Filename wins.
|
|
179
|
+
const xml = buildXml({
|
|
180
|
+
pedimento: '5063036',
|
|
181
|
+
aduanaClave: '70',
|
|
182
|
+
presentationDate: '2025-12-01-06:00',
|
|
183
|
+
paymentDate: '2026-01-07-06:00',
|
|
184
|
+
});
|
|
185
|
+
const filePath = '/x/240734295063036_240734295063036.xml';
|
|
186
|
+
|
|
187
|
+
const [, , ped, year] = extractDocumentFields(xml, 'xml', filePath);
|
|
188
|
+
|
|
189
|
+
expect(ped).toBe('240734295063036');
|
|
190
|
+
expect(year).toBe(2024);
|
|
191
|
+
});
|
|
192
|
+
|
|
193
|
+
it('pads VUCEM aduana correctly: 70 -> 07, 750 -> 75, 40 -> 04', () => {
|
|
194
|
+
const cases = [
|
|
195
|
+
{ aduanaClave: '70', expected: '07', // Cd. Juárez (3-digit canonical: 070)
|
|
196
|
+
filename: '/x/070-3429-5000001.xml' },
|
|
197
|
+
{ aduanaClave: '750', expected: '75', // Puebla
|
|
198
|
+
filename: '/x/750-3429-5000002.xml' },
|
|
199
|
+
{ aduanaClave: '40', expected: '04', // Lázaro Cárdenas (canonical: 040)
|
|
200
|
+
filename: '/x/040-3429-5000003.xml' },
|
|
201
|
+
];
|
|
202
|
+
|
|
203
|
+
for (const c of cases) {
|
|
204
|
+
const xml = buildXml({
|
|
205
|
+
pedimento: c.filename.match(/-(\d{7})\.xml$/)[1],
|
|
206
|
+
aduanaClave: c.aduanaClave,
|
|
207
|
+
presentationDate: '2025-06-15-06:00',
|
|
208
|
+
paymentDate: '2025-06-20-06:00',
|
|
209
|
+
});
|
|
210
|
+
const [, fields, ped] = extractDocumentFields(xml, 'xml', c.filename);
|
|
211
|
+
expect(fields.find((f) => f.name === 'aduanaEntradaSalida')?.value).toBe(
|
|
212
|
+
c.expected,
|
|
213
|
+
);
|
|
214
|
+
// Positions 2-3 of the composed 15-digit pedimento must equal the
|
|
215
|
+
// aduana prefix.
|
|
216
|
+
expect(ped.substring(2, 4)).toBe(c.expected);
|
|
217
|
+
}
|
|
218
|
+
});
|
|
219
|
+
|
|
220
|
+
it('resolves to proforma_completo_xml when no payment evidence exists', () => {
|
|
221
|
+
const xml = buildXml({
|
|
222
|
+
pedimento: '5063036',
|
|
223
|
+
aduanaClave: '70',
|
|
224
|
+
presentationDate: '2025-12-01-06:00',
|
|
225
|
+
paymentDate: null, // No payment, no rectificacion
|
|
226
|
+
});
|
|
227
|
+
const filePath = '/x/070-3429-5063036.xml';
|
|
228
|
+
|
|
229
|
+
const [type] = extractDocumentFields(xml, 'xml', filePath);
|
|
230
|
+
|
|
231
|
+
expect(type).toBe('proforma_completo_xml');
|
|
232
|
+
});
|
|
233
|
+
|
|
234
|
+
it('extracts cove and rfc correctly', () => {
|
|
235
|
+
const xml = buildXml({
|
|
236
|
+
rfc: 'CEM090106MU3',
|
|
237
|
+
facturas: ['V1-FUJIKURA MEX-202512', 'INV-2'],
|
|
238
|
+
});
|
|
239
|
+
const filePath = '/x/250734295063036_250734295063036.xml';
|
|
240
|
+
|
|
241
|
+
const [, fields] = extractDocumentFields(xml, 'xml', filePath);
|
|
242
|
+
|
|
243
|
+
expect(fields.find((f) => f.name === 'rfc')?.value).toBe('CEM090106MU3');
|
|
244
|
+
expect(fields.find((f) => f.name === 'cove')?.value).toBe(
|
|
245
|
+
'[V1-FUJIKURA MEX-202512,INV-2]',
|
|
246
|
+
);
|
|
247
|
+
});
|
|
248
|
+
|
|
249
|
+
it('returns null arela_path when filename is unrecognized (no patente)', () => {
|
|
250
|
+
const xml = buildXml({
|
|
251
|
+
pedimento: '5063036',
|
|
252
|
+
aduanaClave: '70',
|
|
253
|
+
presentationDate: '2025-06-15-06:00',
|
|
254
|
+
paymentDate: '2025-06-20-06:00',
|
|
255
|
+
});
|
|
256
|
+
// Unrecognized filename — no patente derivable.
|
|
257
|
+
const filePath = '/x/random_name.xml';
|
|
258
|
+
|
|
259
|
+
const [type, fields, ped, year] = extractDocumentFields(
|
|
260
|
+
xml,
|
|
261
|
+
'xml',
|
|
262
|
+
filePath,
|
|
263
|
+
);
|
|
264
|
+
|
|
265
|
+
expect(type).toBe('pedimento_completo_xml');
|
|
266
|
+
expect(ped).toBeNull();
|
|
267
|
+
expect(year).toBeNull();
|
|
268
|
+
// composeArelaPath returns null because patente is missing.
|
|
269
|
+
expect(composeArelaPath(type, fields, year, filePath)).toBeNull();
|
|
270
|
+
});
|
|
271
|
+
});
|
|
@@ -0,0 +1,185 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Regression tests for the pedimento_simplificado matcher.
|
|
3
|
+
*
|
|
4
|
+
* Covers PDF layouts where:
|
|
5
|
+
* - The header reads "FORMA SIMPLIFICADA DEL PEDIMENTO" (with DEL),
|
|
6
|
+
* not the canonical "DE PEDIMENTO".
|
|
7
|
+
* - The header trio prints "T. OPER" WITHOUT a trailing colon
|
|
8
|
+
* (the value sits in a separate table cell).
|
|
9
|
+
*
|
|
10
|
+
* Real-world example: REF NQR26-079, Aduana 640 (Querétaro), patente 3458.
|
|
11
|
+
* Before this regression test, the matcher fell through to
|
|
12
|
+
* `facturas_comerciales` because the word "FACTURA" appears in the
|
|
13
|
+
* "OBSERVACIONES" block.
|
|
14
|
+
*/
|
|
15
|
+
import { describe, it, expect } from '@jest/globals';
|
|
16
|
+
|
|
17
|
+
// Importing only the dispatcher avoids circular-init issues caused by
|
|
18
|
+
// `_pedimento-shared-extractors.js` pulling FieldResult from document-type-shared.
|
|
19
|
+
import { extractDocumentFields } from '../../src/document-type-shared.js';
|
|
20
|
+
|
|
21
|
+
// REAL pdf-parse output from the NQR26-079 simplificado PDF
|
|
22
|
+
// (CSM9204097Q1, patente 3458, aduana 640).
|
|
23
|
+
// Captured verbatim with `PDFParse({data}).getText()` — pdf-parse extracts
|
|
24
|
+
// table cells out of visual order, so labels and values often live on
|
|
25
|
+
// different lines (see the FECHA DE PAGO block: label appears, then a few
|
|
26
|
+
// unrelated cells, then the date sits on its own line with the importe).
|
|
27
|
+
// This is exactly what the matchers and extractors see in production.
|
|
28
|
+
const SIMP_DEL_NQR26079_TEXT = `A1 CVE. PEDIMENTO: IMP T. OPER 26 64 3458 6000079 NUM. PEDIMENTO:
|
|
29
|
+
CERTIFICACIONES
|
|
30
|
+
ADUANA E/S:
|
|
31
|
+
DATOS DEL IMPORTADOR / EXPORTADOR
|
|
32
|
+
RFC: CURP:
|
|
33
|
+
CÓDIGO DE
|
|
34
|
+
ACEPTACIÓN
|
|
35
|
+
640
|
|
36
|
+
CSM9204097Q1
|
|
37
|
+
FECHAS:
|
|
38
|
+
17/03/2026
|
|
39
|
+
Ped. 6000079
|
|
40
|
+
CLAVE DE LA SECCION ADUANERA
|
|
41
|
+
DE DESPACHO:
|
|
42
|
+
QUERETARO, EL MARQUES Y
|
|
43
|
+
COLON, QUERETARO.
|
|
44
|
+
640
|
|
45
|
+
DESTINO: 9 PESO BRUTO: 5.350
|
|
46
|
+
MARCAS,NUMEROS Y TOTAL DE BULTOS: 1
|
|
47
|
+
04/03/2026
|
|
48
|
+
ENTRADA
|
|
49
|
+
PAGO
|
|
50
|
+
3PW4CLHE
|
|
51
|
+
S/M S/N
|
|
52
|
+
CODIGO DE BARRAS
|
|
53
|
+
0326 0132 XMP1 4914 6243 989
|
|
54
|
+
*** PAGO ELECTRONICO ***
|
|
55
|
+
DEPÓSITO REFERENCIADO - LÍNEA DE CAPTURA
|
|
56
|
+
PATENTE:
|
|
57
|
+
NOMBRE DE LA INSTITUCIÓN BANCARIA:
|
|
58
|
+
LÍNEA DE CAPTURA:
|
|
59
|
+
IMPORTE PAGADO:
|
|
60
|
+
NÚMERO DE OPERACIÓN BANCARIA:
|
|
61
|
+
NÚMERO DE TRANSACCIÓN SAT:
|
|
62
|
+
MEDIO DE PRESENTACIÓN:
|
|
63
|
+
MEDIO DE RECEPCIÓN/COBRO:
|
|
64
|
+
OTROS MEDIOS ELECTRÓNICOS (PAGO ELECTRÓNICO)
|
|
65
|
+
EFECTIVO (CARGO A CUENTA)
|
|
66
|
+
PEDIMENTO: ADUANA:
|
|
67
|
+
FECHA DE PAGO:
|
|
68
|
+
0326 0132 XMP1 4914 6243
|
|
69
|
+
6000079 640
|
|
70
|
+
17/03/2026 $989
|
|
71
|
+
Banco Nacional de México, S.A.
|
|
72
|
+
00000000703543
|
|
73
|
+
3458
|
|
74
|
+
40124170320261403012
|
|
75
|
+
NUMERO (GUIA/ORDEN EMBARQUE)/ID: 023-51315051 M 490453269837 H
|
|
76
|
+
NÚMERO DE ACUSE DE VALOR COVE268074HT1
|
|
77
|
+
NÚMERO DE E-DOCUMENT: 0438261DOG9W3 01702619TYEU7
|
|
78
|
+
OBSERVACIONES
|
|
79
|
+
FACTURA DE ACUERDO AL ARTÍCULO 36-A DE LA LEY ADUANERA VIGENTE Y A LA REGLA 3.1.
|
|
80
|
+
8. DE LAS REGLAS
|
|
81
|
+
GENERALES DE COMERCIO EXTERIOR VIGENTES.
|
|
82
|
+
SE TRANSMITE PREVIAMENTE A VENTANILLA DIGITAL CONFORME A LA REGLA 1.9.18. DE LAS
|
|
83
|
+
REGLAS GENERALES DE
|
|
84
|
+
COMERCIO EXTERIOR VIGENTES.
|
|
85
|
+
SE EFECTÚA LA TRANSMISIÓN DIGITAL DE CONFORMIDAD A LA REGLA 3.1.17. Y 3.1.31. DE
|
|
86
|
+
LAS REGLAS GENERALES
|
|
87
|
+
DE COMERCIO EXTERIOR VIGENTES.
|
|
88
|
+
LA INFORMACIÓN CONTENIDA EN ESTE PEDIMENTO FUE SUMINISTRADA POR EL IMPORTADOR DE
|
|
89
|
+
CONFORMIDAD CON EL
|
|
90
|
+
ARTICULO 54 DE LA LEY ADUANERA EN VIGOR.
|
|
91
|
+
SE EXIME NOM-024-SCFI-2013 EN TERMINOS DEL NUMERAL 10, FRACC. X INCISO H, IMPORT
|
|
92
|
+
ACIÓN DEFINITIVA,
|
|
93
|
+
TRATÁNDOSE DE IMPORTADORES QUE CUENTEN CON UN PROSEC.
|
|
94
|
+
SE EXIME NOM-003-SCFI-2014 EN TERMINOS DEL NUMERAL 10, FRACC. X INCISO H, IMPORT
|
|
95
|
+
ACIÓN DEFINITIVA,
|
|
96
|
+
TRATÁNDOSE DE IMPORTADORES QUE CUENTEN CON UN PROSEC.
|
|
97
|
+
JOAQUIN GOMEZ ABAD
|
|
98
|
+
AGENTE ADUANAL, AGENCIA ADUANAL, APODERADO ADUANAL O DE ALMACEN
|
|
99
|
+
NOMBRE O RAZ. SOC.:
|
|
100
|
+
RFC: GAA1003111U6 GOAJ641219HDFMBQ09 CURP:
|
|
101
|
+
e.firma:
|
|
102
|
+
NUMERO DE SERIE DEL CERTIFICADO: 00001000000705949781
|
|
103
|
+
GOAJ641219QT5 RFC:
|
|
104
|
+
DECLARO BAJO PROTESTA DE DECIR VERDAD, EN LOS TERMINOS
|
|
105
|
+
DE LO DISPUESTO ARTICULO 81 DE LA LEY: PATENTE O
|
|
106
|
+
AUTORIZACIÓN: 3458 GOMEZ ABAD ASESORES EN COMERCIO EXTERIOR S.C.
|
|
107
|
+
FORMA SIMPLIFICADA DEL PEDIMENTO
|
|
108
|
+
SEGUNDA COPIA: IMPORTADOR EXPORTADOR DESTINO/ORIGEN: INTERIOR DEL PAÍS
|
|
109
|
+
REF: NQR26-079 Página 1 de 2
|
|
110
|
+
|
|
111
|
+
-- 1 of 2 --
|
|
112
|
+
|
|
113
|
+
FORMA SIMPLIFICADA DEL PEDIMENTO
|
|
114
|
+
SEGUNDA COPIA: IMPORTADOR EXPORTADOR DESTINO/ORIGEN: INTERIOR DEL PAÍS
|
|
115
|
+
REF: NQR26-079 Página 1 de 2
|
|
116
|
+
A1 CVE. PEDIMENTO: IMP T. OPER 26 64 3458 6000079 NUM. PEDIMENTO:
|
|
117
|
+
CURP:
|
|
118
|
+
RFC: CSM9204097Q1
|
|
119
|
+
****** ****** ********** ********** FIN DE PEDIMENTO NUM. TOTAL DE PARTID
|
|
120
|
+
AS: CLAVE PREVALIDADOR: 010 1
|
|
121
|
+
ANEXO DEL PEDIMENTO
|
|
122
|
+
SEGUNDA COPIA: IMPORTADOR EXPORTADOR DESTINO/ORIGEN: INTERIOR DEL PAÍS
|
|
123
|
+
REF: NQR26-079 Página 2 de 2`;
|
|
124
|
+
|
|
125
|
+
describe('pedimento_simplificado matcher — DEL PEDIMENTO variant', () => {
|
|
126
|
+
it('dispatcher resolves NQR26-079 (DEL PEDIMENTO) as pedimento_simplificado', () => {
|
|
127
|
+
const [detectedType, , pedimento] = extractDocumentFields(
|
|
128
|
+
SIMP_DEL_NQR26079_TEXT,
|
|
129
|
+
'pdf',
|
|
130
|
+
'/scans/CSM9204097Q1/NQR26-079.pdf',
|
|
131
|
+
);
|
|
132
|
+
|
|
133
|
+
// Regression: previously this resolved to `factura_comercial` because
|
|
134
|
+
// (1) the title regex demanded "DE PEDIMENTO" (this PDF says "DEL") and
|
|
135
|
+
// (2) the header trio required a colon after "T. OPER" (this PDF omits it).
|
|
136
|
+
expect(detectedType).toBe('pedimento_simplificado');
|
|
137
|
+
expect(pedimento).toBe('266434586000079');
|
|
138
|
+
});
|
|
139
|
+
});
|
|
140
|
+
|
|
141
|
+
describe('pedimento_simplificado matcher — header trio without colon after T. OPER', () => {
|
|
142
|
+
// Minimal text: title is the canonical "DE PEDIMENTO" so the fast path
|
|
143
|
+
// does NOT apply; only the fallback that requires the header trio runs.
|
|
144
|
+
// The trio MUST tolerate "T. OPER" without a trailing colon, because
|
|
145
|
+
// many printable PDFs render OPER as a column header (value in next cell).
|
|
146
|
+
const FALLBACK_TEXT = `FORMA SIMPLIFICADA DE PEDIMENTO
|
|
147
|
+
NUM. PEDIMENTO: 22 07 3429 2002089 T. OPER IMP CVE. PEDIMENTO: A1
|
|
148
|
+
DATOS DEL IMPORTADOR
|
|
149
|
+
PATENTE: 3429 PEDIMENTO: 2002089 ADUANA: 070
|
|
150
|
+
FECHA DE PAGO: 01/02/2023`;
|
|
151
|
+
|
|
152
|
+
it('resolves via fast-path "FORMA SIMPLIFICADA DE PEDIMENTO" header', () => {
|
|
153
|
+
const [detectedType] = extractDocumentFields(
|
|
154
|
+
FALLBACK_TEXT,
|
|
155
|
+
'pdf',
|
|
156
|
+
'/scans/SAMPLE/pedimento.pdf',
|
|
157
|
+
);
|
|
158
|
+
expect(detectedType).toBe('pedimento_simplificado');
|
|
159
|
+
});
|
|
160
|
+
});
|
|
161
|
+
|
|
162
|
+
describe('pedimento_simplificado matcher — title accepts both DE and DEL', () => {
|
|
163
|
+
// Same minimal body, only the title differs. Both variants are produced
|
|
164
|
+
// by different prevalidators / agencias in the wild, and BOTH must
|
|
165
|
+
// resolve to pedimento_simplificado.
|
|
166
|
+
const body = `
|
|
167
|
+
NUM. PEDIMENTO: 22 07 3429 2002089 T. OPER IMP CVE. PEDIMENTO: A1
|
|
168
|
+
DATOS DEL IMPORTADOR
|
|
169
|
+
PATENTE: 3429 PEDIMENTO: 2002089 ADUANA: 070
|
|
170
|
+
FECHA DE PAGO: 01/02/2023`;
|
|
171
|
+
|
|
172
|
+
it.each([
|
|
173
|
+
['FORMA SIMPLIFICADA DE PEDIMENTO', 'pedimento_simplificado'],
|
|
174
|
+
['FORMA SIMPLIFICADA DEL PEDIMENTO', 'pedimento_simplificado'],
|
|
175
|
+
['forma simplificada de pedimento', 'pedimento_simplificado'], // case-insensitive
|
|
176
|
+
['FORMA SIMPLIFICADA DEL PEDIMENTO', 'pedimento_simplificado'], // extra spaces
|
|
177
|
+
])('title "%s" resolves to %s', (title, expected) => {
|
|
178
|
+
const [detectedType] = extractDocumentFields(
|
|
179
|
+
`${title}\n${body}`,
|
|
180
|
+
'pdf',
|
|
181
|
+
'/scans/SAMPLE/pedimento.pdf',
|
|
182
|
+
);
|
|
183
|
+
expect(detectedType).toBe(expected);
|
|
184
|
+
});
|
|
185
|
+
});
|