@arela/uploader 1.0.21 → 1.0.23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -7,7 +7,39 @@ export const pedimentoSimplificadoDefinition = {
7
7
  type: 'pedimento_simplificado',
8
8
  extensions: ['pdf'],
9
9
  match: (source) => {
10
- return /FORMA SIMPLIFICADA DE PEDIMENTO/i.test(source);
10
+ // Hard exclude: "AVISO CONSOLIDADO" shares the header trio but is a
11
+ // different document type handled by aviso_consolidado.
12
+ if (/AVISO\s+CONSOLIDADO/i.test(source)) return false;
13
+
14
+ // Fast path: the literal title appears on standard SIMP layouts.
15
+ // Some prevalidators print "FORMA SIMPLIFICADA DEL PEDIMENTO" (with DEL).
16
+ if (/FORMA\s+SIMPLIFICADA\s+DEL?\s+PEDIMENTO/i.test(source)) return true;
17
+
18
+ // Some PDFs (single-page anchors) lack that title but still carry the
19
+ // three pedimento header fields. Treat them as simplificado UNLESS they
20
+ // have the multi-page copy markers that uniquely identify a completo.
21
+ // NOTE: the colon after "T. OPER" is optional — many printable layouts
22
+ // render OPER as a table-header label with the value in the next cell.
23
+ const hasHeaderFields =
24
+ /NUM\.?\s*PEDIMENTO:/i.test(source) &&
25
+ /CVE\.?\s*PEDIMENTO:/i.test(source) &&
26
+ /T\.?\s*OPER:?/i.test(source);
27
+ if (!hasHeaderFields) return false;
28
+
29
+ const hasCompletoCopyMarker =
30
+ /ORIGINAL:\s*ADMINISTRACION GENERAL DE ADUANAS/i.test(source) ||
31
+ /SEGUNDA\s+COPIA/i.test(source) ||
32
+ /TERCERA\s+COPIA/i.test(source) ||
33
+ /COPIA\s+(SIMPLIFICAD[AO])?\s*TRANSPORTISTA/i.test(source) ||
34
+ /ANEXO\s+DEL\s+PEDIMENTO/i.test(source) ||
35
+ /\*+FIN\s+DE\s+PEDIMENTO\s*\*+/i.test(source);
36
+ if (hasCompletoCopyMarker) return false;
37
+
38
+ // Exclude COVE/eDocument forms that may reference a pedimento in their body.
39
+ if (/COMPROBANTE\s+DE\s+VALOR\s+ELECTR[ÓO]NICO/i.test(source)) return false;
40
+ if (/\bCOVE\b\s*:/i.test(source) && !/PAGO/i.test(source)) return false;
41
+
42
+ return true;
11
43
  },
12
44
 
13
45
  /**
@@ -16,9 +16,9 @@ export const proformaDefinition = {
16
16
  type: 'proforma',
17
17
  extensions: ['pdf'],
18
18
 
19
- // Same content marker as pedimento simplificado
19
+ // Same content marker as pedimento simplificado (accepts "DE" or "DEL").
20
20
  match: (source) => {
21
- return /FORMA SIMPLIFICADA DE PEDIMENTO/i.test(source);
21
+ return /FORMA\s+SIMPLIFICADA\s+DEL?\s+PEDIMENTO/i.test(source);
22
22
  },
23
23
 
24
24
  extractNumPedimento: pedimentoSimplificadoDefinition.extractNumPedimento,
@@ -4,9 +4,7 @@ import { PDFParse } from 'pdf-parse';
4
4
 
5
5
  import { extractDocumentFields } from './document-type-shared.js';
6
6
 
7
- // Document types that participate in arela_path composition. The XML type is
8
- // kept here even though its matcher is currently disabled — once re-enabled
9
- // in document-type-shared.js no further changes are needed here.
7
+ // Document types that participate in arela_path composition.
10
8
  const ARELA_PATH_TYPES = new Set([
11
9
  'pedimento_simplificado',
12
10
  'pedimento_completo',
package/src/index.js CHANGED
@@ -1,6 +1,7 @@
1
1
  #!/usr/bin/env node
2
2
  import { Command } from 'commander';
3
3
 
4
+ import datastageCommand from './commands/DatastageCommand.js';
4
5
  import gdriveSyncCommand from './commands/GDriveSyncCommand.js';
5
6
  import identifyCommand from './commands/IdentifyCommand.js';
6
7
  import pollWorkerCommand from './commands/PollWorkerCommand.js';
@@ -26,6 +27,7 @@ class ArelaUploaderCLI {
26
27
  this.scanCommand = scanCommand;
27
28
  this.uploadCommand = new UploadCommand();
28
29
  this.watchCommand = watchCommand;
30
+ this.datastageCommand = datastageCommand;
29
31
 
30
32
  this.#setupProgram();
31
33
  this.#setupCommands();
@@ -204,6 +206,34 @@ class ArelaUploaderCLI {
204
206
  }
205
207
  });
206
208
 
209
+ // Datastage command — upload monthly datastage *.zip files from a directory
210
+ this.program
211
+ .command('datastage')
212
+ .description(
213
+ '📦 Upload monthly datastage *.zip files from a directory to Arela',
214
+ )
215
+ .requiredOption(
216
+ '--dir <path>',
217
+ 'Directory containing *.zip files (non-recursive)',
218
+ )
219
+ .option(
220
+ '--api <target>',
221
+ 'API target: default|agencia|cliente',
222
+ 'default',
223
+ )
224
+ .option('--retry-failed', 'Re-attempt files in failed status')
225
+ .option('--show-stats', 'Print final stats summary')
226
+ .action(async (options) => {
227
+ try {
228
+ if (options.api && options.api !== 'default') {
229
+ appConfig.setApiTarget(options.api);
230
+ }
231
+ await this.datastageCommand.execute(options);
232
+ } catch (error) {
233
+ this.errorHandler.handleFatalError(error, { command: 'datastage' });
234
+ }
235
+ });
236
+
207
237
  // Detection command
208
238
  this.program
209
239
  .command('detect')
@@ -335,6 +365,18 @@ class ArelaUploaderCLI {
335
365
  'Number of files to process in each batch',
336
366
  '100',
337
367
  )
368
+ .option(
369
+ '--table <tableName>',
370
+ 'Process only this scan table (instead of all instance tables)',
371
+ )
372
+ .option(
373
+ '--reset-attempts',
374
+ 'Reset detection_attempts to 0 before processing so previously-failed files are retried',
375
+ )
376
+ .option(
377
+ '--path-prefix <mapping>',
378
+ 'Remap file path prefix for cross-platform access. Format: FROM:TO e.g. "O:/=/Volumes/nas/"',
379
+ )
338
380
  .option('--show-stats', 'Show performance statistics')
339
381
  .action(async (options) => {
340
382
  try {
@@ -0,0 +1,240 @@
1
+ import FormData from 'form-data';
2
+ import fs from 'fs';
3
+ import { Agent } from 'http';
4
+ import { Agent as HttpsAgent } from 'https';
5
+ import fetch from 'node-fetch';
6
+ import path from 'path';
7
+
8
+ import appConfig from '../config/config.js';
9
+ import logger from './LoggingService.js';
10
+
11
+ /**
12
+ * Datastage API Service
13
+ * Handles API communication for the arela datastage command:
14
+ * - tracking endpoints under /api/uploader/datastage/*
15
+ * - zip upload endpoint POST /api/datastage (multipart, field: zipFile)
16
+ */
17
+ export class DatastageApiService {
18
+ /**
19
+ * @param {string|null} apiTarget - 'default'|'agencia'|'cliente'
20
+ */
21
+ constructor(apiTarget = null) {
22
+ this.apiTarget = apiTarget;
23
+ const apiConfig = appConfig.getApiConfig(apiTarget);
24
+ this.baseUrl = apiConfig.baseUrl;
25
+ this.token = apiConfig.token;
26
+
27
+ const maxApiConnections = parseInt(process.env.MAX_API_CONNECTIONS) || 10;
28
+ const connectionTimeout =
29
+ parseInt(process.env.API_CONNECTION_TIMEOUT) || 300000;
30
+
31
+ this.maxRetries = parseInt(process.env.API_MAX_RETRIES) || 3;
32
+ this.useExponentialBackoff =
33
+ process.env.API_RETRY_EXPONENTIAL_BACKOFF !== 'false';
34
+ this.fixedRetryDelay = parseInt(process.env.API_RETRY_DELAY) || 1000;
35
+
36
+ const agentOpts = {
37
+ keepAlive: true,
38
+ keepAliveMsecs: 30000,
39
+ maxSockets: maxApiConnections,
40
+ maxFreeSockets: Math.ceil(maxApiConnections / 2),
41
+ maxTotalSockets: maxApiConnections + 5,
42
+ timeout: connectionTimeout,
43
+ scheduling: 'fifo',
44
+ };
45
+ this.httpAgent = new Agent(agentOpts);
46
+ this.httpsAgent = new HttpsAgent(agentOpts);
47
+
48
+ logger.debug(
49
+ `🔗 Datastage API Service configured (target=${apiTarget || 'default'})`,
50
+ );
51
+ }
52
+
53
+ #getAgent(url) {
54
+ return url.startsWith('https://') ? this.httpsAgent : this.httpAgent;
55
+ }
56
+
57
+ #isRetryableError(error, response = null) {
58
+ if (
59
+ error?.code === 'ECONNRESET' ||
60
+ error?.code === 'ETIMEDOUT' ||
61
+ error?.code === 'ECONNREFUSED' ||
62
+ error?.code === 'ENOTFOUND' ||
63
+ error?.code === 'EAI_AGAIN'
64
+ ) {
65
+ return true;
66
+ }
67
+ if (response) {
68
+ const s = response.status;
69
+ if (s === 429 || (s >= 500 && s < 600)) return true;
70
+ }
71
+ if (error?.message && error.message.includes('timeout')) return true;
72
+ return false;
73
+ }
74
+
75
+ #calculateBackoff(attempt) {
76
+ if (!this.useExponentialBackoff) {
77
+ const jitter = this.fixedRetryDelay * 0.2 * (Math.random() * 2 - 1);
78
+ return Math.floor(this.fixedRetryDelay + jitter);
79
+ }
80
+ const baseDelay = 1000;
81
+ const maxDelay = 16000;
82
+ const delay = Math.min(baseDelay * Math.pow(2, attempt - 1), maxDelay);
83
+ const jitter = delay * 0.2 * (Math.random() * 2 - 1);
84
+ return Math.floor(delay + jitter);
85
+ }
86
+
87
+ #sleep(ms) {
88
+ return new Promise((r) => setTimeout(r, ms));
89
+ }
90
+
91
+ async #requestJson(endpoint, method = 'GET', body = null, headers = {}) {
92
+ const url = `${this.baseUrl}${endpoint}`;
93
+ const options = {
94
+ method,
95
+ headers: {
96
+ 'x-api-key': this.token,
97
+ 'Content-Type': 'application/json',
98
+ ...headers,
99
+ },
100
+ agent: this.#getAgent(url),
101
+ };
102
+ if (body) options.body = JSON.stringify(body);
103
+
104
+ let lastError;
105
+ let lastResponse = null;
106
+ const retries = this.maxRetries;
107
+
108
+ for (let attempt = 1; attempt <= retries + 1; attempt++) {
109
+ try {
110
+ const response = await fetch(url, options);
111
+ lastResponse = response;
112
+ if (!response.ok) {
113
+ const errorText = await response.text();
114
+ let errorMessage = `API ${method} ${endpoint} failed: ${response.status} ${response.statusText}`;
115
+ try {
116
+ const j = JSON.parse(errorText);
117
+ errorMessage = j.message || errorMessage;
118
+ } catch {
119
+ errorMessage = errorText || errorMessage;
120
+ }
121
+ const err = new Error(errorMessage);
122
+ err.status = response.status;
123
+ if (this.#isRetryableError(err, response) && attempt <= retries) {
124
+ const d = this.#calculateBackoff(attempt);
125
+ logger.warn(
126
+ `Retrying ${method} ${endpoint} (attempt ${attempt}/${retries + 1}) in ${d}ms: ${errorMessage}`,
127
+ );
128
+ await this.#sleep(d);
129
+ continue;
130
+ }
131
+ throw err;
132
+ }
133
+ return await response.json();
134
+ } catch (error) {
135
+ lastError = error;
136
+ if (this.#isRetryableError(error, lastResponse) && attempt <= retries) {
137
+ const d = this.#calculateBackoff(attempt);
138
+ logger.warn(
139
+ `Retrying ${method} ${endpoint} (attempt ${attempt}/${retries + 1}) in ${d}ms: ${error.message}`,
140
+ );
141
+ await this.#sleep(d);
142
+ continue;
143
+ }
144
+ throw error;
145
+ }
146
+ }
147
+ throw lastError;
148
+ }
149
+
150
+ // --- Tracking endpoints ---
151
+
152
+ async registerUpload({
153
+ absolutePath,
154
+ fileName,
155
+ sizeBytes,
156
+ fileModifiedAt,
157
+ sourceDirectory,
158
+ }) {
159
+ return this.#requestJson('/api/uploader/datastage/register', 'POST', {
160
+ absolutePath,
161
+ fileName,
162
+ sizeBytes,
163
+ fileModifiedAt,
164
+ sourceDirectory,
165
+ });
166
+ }
167
+
168
+ async getPending(sourceDirectory = null) {
169
+ const qs = sourceDirectory
170
+ ? `?sourceDirectory=${encodeURIComponent(sourceDirectory)}`
171
+ : '';
172
+ return this.#requestJson(`/api/uploader/datastage/pending${qs}`, 'GET');
173
+ }
174
+
175
+ async getStats(sourceDirectory = null) {
176
+ const qs = sourceDirectory
177
+ ? `?sourceDirectory=${encodeURIComponent(sourceDirectory)}`
178
+ : '';
179
+ return this.#requestJson(`/api/uploader/datastage/stats${qs}`, 'GET');
180
+ }
181
+
182
+ async markUploaded(id, { datastageId, folio }) {
183
+ return this.#requestJson(
184
+ `/api/uploader/datastage/${id}/mark-uploaded`,
185
+ 'PATCH',
186
+ { datastageId, folio },
187
+ );
188
+ }
189
+
190
+ async markFailed(id, error) {
191
+ return this.#requestJson(
192
+ `/api/uploader/datastage/${id}/mark-failed`,
193
+ 'PATCH',
194
+ { error: String(error || 'unknown') },
195
+ );
196
+ }
197
+
198
+ // --- Zip upload ---
199
+
200
+ /**
201
+ * Upload a single zip file to POST /api/datastage (multipart, field name 'zipFile').
202
+ * Returns the created Datastage row { id, folio, ... }.
203
+ */
204
+ async uploadZip(localPath) {
205
+ const url = `${this.baseUrl}/api/datastage`;
206
+ const form = new FormData();
207
+ const fileName = path.basename(localPath);
208
+ form.append('zipFile', fs.createReadStream(localPath), {
209
+ filename: fileName,
210
+ contentType: 'application/zip',
211
+ });
212
+
213
+ const response = await fetch(url, {
214
+ method: 'POST',
215
+ headers: {
216
+ 'x-api-key': this.token,
217
+ ...form.getHeaders(),
218
+ },
219
+ body: form,
220
+ agent: this.#getAgent(url),
221
+ });
222
+
223
+ if (!response.ok) {
224
+ const text = await response.text();
225
+ let msg = `Datastage upload failed: ${response.status} ${response.statusText}`;
226
+ try {
227
+ const j = JSON.parse(text);
228
+ msg = j.message || msg;
229
+ } catch {
230
+ msg = text || msg;
231
+ }
232
+ const err = new Error(msg);
233
+ err.status = response.status;
234
+ throw err;
235
+ }
236
+ return await response.json();
237
+ }
238
+ }
239
+
240
+ export default DatastageApiService;
@@ -375,6 +375,20 @@ export class ScanApiService {
375
375
  * @param {boolean} allTypes - When true, fetch all supported file types instead of just likely-simplificado PDFs
376
376
  * @returns {Promise<Object>} { data: Array, hasMore: boolean }
377
377
  */
378
+ /**
379
+ * Get a single file record by ID (for single-file identify mode).
380
+ * @param {string} tableName - Scan table name (with or without cli. prefix)
381
+ * @param {string} fileId - UUID of the file record
382
+ * @returns {Promise<{ id: string, file_name: string, file_extension: string, absolute_path: string }>}
383
+ */
384
+ async getFileRecord(tableName, fileId) {
385
+ const cleanTable = tableName.replace(/^cli\./, '');
386
+ const url = `/api/uploader/scan/file-record?tableName=${encodeURIComponent(cleanTable)}&fileId=${encodeURIComponent(fileId)}`;
387
+ const result = await this.#request(url, 'GET');
388
+ logger.debug(`Fetched file record ${fileId} from ${cleanTable}`);
389
+ return result;
390
+ }
391
+
378
392
  async fetchPdfsForDetection(
379
393
  tableName,
380
394
  offset = 0,
@@ -398,6 +412,22 @@ export class ScanApiService {
398
412
  return result;
399
413
  }
400
414
 
415
+ /**
416
+ * Reset detection_attempts to 0 for undetected files so they can be re-processed.
417
+ * @param {string} tableName - Target scan table name
418
+ * @param {string|null} absolutePath - If provided, reset only this specific file
419
+ * @returns {Promise<{ reset: number }>}
420
+ */
421
+ async resetDetectionAttempts(tableName, absolutePath = null) {
422
+ let url = `/api/uploader/scan/reset-detection-attempts?tableName=${encodeURIComponent(tableName)}`;
423
+ if (absolutePath) {
424
+ url += `&absolutePath=${encodeURIComponent(absolutePath)}`;
425
+ }
426
+ const result = await this.#request(url, 'PATCH');
427
+ logger.debug(`Reset ${result.reset} detection attempt(s) in ${tableName}`);
428
+ return result;
429
+ }
430
+
401
431
  /**
402
432
  * Batch update detection results
403
433
  * @param {string} tableName - Target table name
@@ -0,0 +1,218 @@
1
+ /**
2
+ * Unit tests for the factura_inter_agencia matcher.
3
+ *
4
+ * Verifies that NORCOM↔PALCO CFDIs (XML and printable PDF text) are
5
+ * detected as `factura_inter_agencia`, and that ordinary CFDIs are NOT
6
+ * mis-classified.
7
+ */
8
+ import { describe, it, expect } from '@jest/globals';
9
+
10
+ import {
11
+ facturaInterAgenciaDefinition,
12
+ INTER_AGENCIA_RFCS,
13
+ } from '../../src/document-types/factura-inter-agencia.js';
14
+ import { extractDocumentFields } from '../../src/document-type-shared.js';
15
+
16
+ const NORCOM_RFC = 'NAA120215F20';
17
+ const PALCO_RFC = 'PCC1008161WA';
18
+
19
+ // Realistic CFDI 4.0 XML between NORCOM (emisor) and PALCO (receptor).
20
+ // Conceptos use ClaveProdServ 78141502 (servicios de agentes aduaneros).
21
+ const CFDI_XML_INTER_AGENCIA = `<?xml version="1.0" encoding="utf-8"?>
22
+ <cfdi:Comprobante xmlns:cfdi="http://www.sat.gob.mx/cfd/4" Version="4.0" Folio="012749"
23
+ TipoDeComprobante="I" SubTotal="3000.00" Total="3480.00" Moneda="MXN">
24
+ <cfdi:Emisor Rfc="${NORCOM_RFC}" Nombre="NORCOM AGENTES ADUANALES" RegimenFiscal="601"/>
25
+ <cfdi:Receptor Rfc="${PALCO_RFC}" Nombre="PALCO, CONSORCIO DE COMERCIO INTERNACIONAL"
26
+ DomicilioFiscalReceptor="32380" RegimenFiscalReceptor="601" UsoCFDI="G03"/>
27
+ <cfdi:Conceptos>
28
+ <cfdi:Concepto ClaveProdServ="78141502" NoIdentificacion="HONO" Cantidad="1.00"
29
+ ClaveUnidad="E48" Unidad="Unidad de servicio" Descripcion="HONORARIOS"
30
+ ValorUnitario="1300.00" Importe="1300.00" ObjetoImp="02"/>
31
+ <cfdi:Concepto ClaveProdServ="78141502" NoIdentificacion="VALID" Cantidad="1.00"
32
+ ClaveUnidad="E48" Unidad="Unidad de servicio" Descripcion="VALIDACION"
33
+ ValorUnitario="200.00" Importe="200.00" ObjetoImp="02"/>
34
+ </cfdi:Conceptos>
35
+ </cfdi:Comprobante>`;
36
+
37
+ // Same agencies but conceptos do NOT use 78141502 — should NOT match.
38
+ const CFDI_XML_INTER_AGENCIA_WRONG_CONCEPT = CFDI_XML_INTER_AGENCIA.replace(
39
+ /78141502/g,
40
+ '90121502',
41
+ );
42
+
43
+ // CFDI between unrelated taxpayers — should NOT match.
44
+ const CFDI_XML_REGULAR = `<?xml version="1.0" encoding="utf-8"?>
45
+ <cfdi:Comprobante xmlns:cfdi="http://www.sat.gob.mx/cfd/4" Version="4.0" Folio="000123"
46
+ TipoDeComprobante="I" SubTotal="100.00" Total="116.00">
47
+ <cfdi:Emisor Rfc="ACME010101AB1" Nombre="ACME COMERCIAL" RegimenFiscal="601"/>
48
+ <cfdi:Receptor Rfc="XYZ020202CD2" Nombre="CLIENTE FINAL"
49
+ DomicilioFiscalReceptor="00000" RegimenFiscalReceptor="601" UsoCFDI="G03"/>
50
+ <cfdi:Conceptos>
51
+ <cfdi:Concepto ClaveProdServ="78141502" NoIdentificacion="ITEM" Cantidad="1.00"
52
+ ClaveUnidad="E48" Unidad="Unidad de servicio" Descripcion="SERVICIO"
53
+ ValorUnitario="100.00" Importe="100.00" ObjetoImp="02"/>
54
+ </cfdi:Conceptos>
55
+ </cfdi:Comprobante>`;
56
+
57
+ // Text extracted from the printable PDF representation of a CFDI inter-agencia.
58
+ // Mirrors what pdf-parse returns for the sample SICINGR70-012749(...).pdf.
59
+ const CFDI_PDF_TEXT_INTER_AGENCIA = `NORCOM AGENTES ADUANALES S.C
60
+ Tipo de Comprobante: (I) Ingreso
61
+ Folio Fiscal 84FC9CE2-00D5-4843-B377-B463321F9FC6
62
+ Numero Folio 012749
63
+ Emisor
64
+ RFC ${NORCOM_RFC}
65
+ Razon Social NORCOM AGENTES ADUANALES
66
+ Receptor
67
+ RFC ${PALCO_RFC}
68
+ Razon Social PALCO, CONSORCIO DE COMERCIO INTERNACIONAL
69
+ Pedimento: 3458 6000046 Fecha: 17/02/2026 Tipo: EXP Clave: A1
70
+ Erogaciones
71
+ 78141502 HONO HONORARIOS 1,300.00
72
+ 78141502 SERCOM SERVICIOS COMPLEMENTARIOS 1,500.00
73
+ 78141502 VALID VALIDACION 200.00
74
+ Sello Digital del CFDI
75
+ c4oBJ8/zAol0zg1jVe4MK8...
76
+ Cadena Original del Complemento de Certificacion Digital del SAT
77
+ ||4.0|012749|...
78
+ Este documento es una representación impresa de un CFDI`;
79
+
80
+ describe('factura_inter_agencia matcher', () => {
81
+ describe('configured RFC set', () => {
82
+ it('includes NORCOM and PALCO RFCs', () => {
83
+ expect(INTER_AGENCIA_RFCS).toContain(NORCOM_RFC);
84
+ expect(INTER_AGENCIA_RFCS).toContain(PALCO_RFC);
85
+ });
86
+ });
87
+
88
+ describe('match()', () => {
89
+ it('matches a NORCOM→PALCO XML CFDI with broker-service conceptos', () => {
90
+ expect(facturaInterAgenciaDefinition.match(CFDI_XML_INTER_AGENCIA)).toBe(
91
+ true,
92
+ );
93
+ });
94
+
95
+ it('matches the PDF-text representation of the same CFDI', () => {
96
+ expect(
97
+ facturaInterAgenciaDefinition.match(CFDI_PDF_TEXT_INTER_AGENCIA),
98
+ ).toBe(true);
99
+ });
100
+
101
+ it('does NOT match when ClaveProdServ is not 78141502', () => {
102
+ expect(
103
+ facturaInterAgenciaDefinition.match(
104
+ CFDI_XML_INTER_AGENCIA_WRONG_CONCEPT,
105
+ ),
106
+ ).toBe(false);
107
+ });
108
+
109
+ it('does NOT match a CFDI between unrelated taxpayers', () => {
110
+ expect(facturaInterAgenciaDefinition.match(CFDI_XML_REGULAR)).toBe(false);
111
+ });
112
+
113
+ it('does NOT match arbitrary non-CFDI text containing the RFCs', () => {
114
+ const text = `Reporte interno
115
+ RFC emisor: ${NORCOM_RFC}
116
+ RFC cliente: ${PALCO_RFC}
117
+ Clave 78141502`;
118
+ // No CFDI markers → should not match.
119
+ expect(facturaInterAgenciaDefinition.match(text)).toBe(false);
120
+ });
121
+
122
+ it('does NOT match if only one of the configured RFCs is present', () => {
123
+ const text = CFDI_XML_INTER_AGENCIA.replace(PALCO_RFC, 'XYZ020202CD2');
124
+ expect(facturaInterAgenciaDefinition.match(text)).toBe(false);
125
+ });
126
+ });
127
+
128
+ describe('extractors', () => {
129
+ it('extracts emisor + receptor RFCs from XML', () => {
130
+ const rfcEmisor = facturaInterAgenciaDefinition.extractors
131
+ .find((e) => e.field === 'rfcEmisor')
132
+ .extract(CFDI_XML_INTER_AGENCIA);
133
+ const rfcReceptor = facturaInterAgenciaDefinition.extractors
134
+ .find((e) => e.field === 'rfcReceptor')
135
+ .extract(CFDI_XML_INTER_AGENCIA);
136
+
137
+ expect(rfcEmisor.found).toBe(true);
138
+ expect(rfcEmisor.value).toBe(NORCOM_RFC);
139
+ expect(rfcReceptor.found).toBe(true);
140
+ expect(rfcReceptor.value).toBe(PALCO_RFC);
141
+ });
142
+
143
+ it('extracts both RFCs from PDF text via fallback', () => {
144
+ const rfcEmisor = facturaInterAgenciaDefinition.extractors
145
+ .find((e) => e.field === 'rfcEmisor')
146
+ .extract(CFDI_PDF_TEXT_INTER_AGENCIA);
147
+ const rfcReceptor = facturaInterAgenciaDefinition.extractors
148
+ .find((e) => e.field === 'rfcReceptor')
149
+ .extract(CFDI_PDF_TEXT_INTER_AGENCIA);
150
+
151
+ expect(rfcEmisor.found).toBe(true);
152
+ expect(rfcReceptor.found).toBe(true);
153
+ // Order is the order of first appearance in the document.
154
+ const found = [rfcEmisor.value, rfcReceptor.value].sort();
155
+ expect(found).toEqual([NORCOM_RFC, PALCO_RFC].sort());
156
+ });
157
+
158
+ it('extracts the UUID (folio fiscal) from both formats', () => {
159
+ const uuidExtractor = facturaInterAgenciaDefinition.extractors.find(
160
+ (e) => e.field === 'uuid',
161
+ );
162
+
163
+ const fromXml = uuidExtractor.extract(CFDI_XML_INTER_AGENCIA);
164
+ // XML sample has no UUID inside the comprobante body — that's fine.
165
+ expect(fromXml.found).toBe(false);
166
+
167
+ const fromPdf = uuidExtractor.extract(CFDI_PDF_TEXT_INTER_AGENCIA);
168
+ expect(fromPdf.found).toBe(true);
169
+ expect(fromPdf.value).toBe('84FC9CE2-00D5-4843-B377-B463321F9FC6');
170
+ });
171
+
172
+ it('extracts numPedimento from the printable PDF "Pedimento:" line', () => {
173
+ const numExtractor = facturaInterAgenciaDefinition.extractors.find(
174
+ (e) => e.field === 'numPedimento',
175
+ );
176
+ const result = numExtractor.extract(CFDI_PDF_TEXT_INTER_AGENCIA);
177
+ expect(result.found).toBe(true);
178
+ expect(result.value).toBe('34586000046');
179
+ });
180
+
181
+ it('extracts the CFDI folio from XML attribute', () => {
182
+ const folio = facturaInterAgenciaDefinition.extractors
183
+ .find((e) => e.field === 'folio')
184
+ .extract(CFDI_XML_INTER_AGENCIA);
185
+ expect(folio.found).toBe(true);
186
+ expect(folio.value).toBe('012749');
187
+ });
188
+ });
189
+
190
+ describe('registry order (factura_inter_agencia precedes facturas_comerciales)', () => {
191
+ it('resolves the inter-agency CFDI XML to factura_inter_agencia, not factura_comercial', () => {
192
+ const [detectedType] = extractDocumentFields(
193
+ CFDI_XML_INTER_AGENCIA,
194
+ 'xml',
195
+ '/tmp/SICINGR70-012749(PALCO).XML',
196
+ );
197
+ expect(detectedType).toBe('factura_inter_agencia');
198
+ });
199
+
200
+ it('resolves the inter-agency CFDI PDF text to factura_inter_agencia', () => {
201
+ const [detectedType] = extractDocumentFields(
202
+ CFDI_PDF_TEXT_INTER_AGENCIA,
203
+ 'pdf',
204
+ '/tmp/SICINGR70-012749(PALCO).pdf',
205
+ );
206
+ expect(detectedType).toBe('factura_inter_agencia');
207
+ });
208
+
209
+ it('falls through to factura_comercial for a regular CFDI', () => {
210
+ const [detectedType] = extractDocumentFields(
211
+ CFDI_XML_REGULAR,
212
+ 'xml',
213
+ '/tmp/regular-invoice.xml',
214
+ );
215
+ expect(detectedType).toBe('factura_comercial');
216
+ });
217
+ });
218
+ });