@arela/uploader 1.0.21 → 1.0.23
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/commands/DatastageCommand.js +164 -0
- package/src/commands/IdentifyCommand.js +144 -25
- package/src/commands/PollWorkerCommand.js +2 -0
- package/src/commands/ScanCommand.js +15 -0
- package/src/config/config.js +28 -2
- package/src/document-type-shared.js +15 -7
- package/src/document-types/_pedimento-shared-extractors.js +150 -35
- package/src/document-types/factura-inter-agencia.js +186 -0
- package/src/document-types/pedimento-completo-xml.js +62 -12
- package/src/document-types/pedimento-completo.js +43 -10
- package/src/document-types/pedimento-simplificado.js +33 -1
- package/src/document-types/proforma.js +2 -2
- package/src/file-detection.js +1 -3
- package/src/index.js +42 -0
- package/src/services/DatastageApiService.js +240 -0
- package/src/services/ScanApiService.js +30 -0
- package/tests/unit/factura-inter-agencia.test.js +218 -0
- package/tests/unit/pedimento-completo-xml-matcher.test.js +271 -0
- package/tests/unit/pedimento-simplificado-matcher.test.js +185 -0
|
@@ -7,7 +7,39 @@ export const pedimentoSimplificadoDefinition = {
|
|
|
7
7
|
type: 'pedimento_simplificado',
|
|
8
8
|
extensions: ['pdf'],
|
|
9
9
|
match: (source) => {
|
|
10
|
-
|
|
10
|
+
// Hard exclude: "AVISO CONSOLIDADO" shares the header trio but is a
|
|
11
|
+
// different document type handled by aviso_consolidado.
|
|
12
|
+
if (/AVISO\s+CONSOLIDADO/i.test(source)) return false;
|
|
13
|
+
|
|
14
|
+
// Fast path: the literal title appears on standard SIMP layouts.
|
|
15
|
+
// Some prevalidators print "FORMA SIMPLIFICADA DEL PEDIMENTO" (with DEL).
|
|
16
|
+
if (/FORMA\s+SIMPLIFICADA\s+DEL?\s+PEDIMENTO/i.test(source)) return true;
|
|
17
|
+
|
|
18
|
+
// Some PDFs (single-page anchors) lack that title but still carry the
|
|
19
|
+
// three pedimento header fields. Treat them as simplificado UNLESS they
|
|
20
|
+
// have the multi-page copy markers that uniquely identify a completo.
|
|
21
|
+
// NOTE: the colon after "T. OPER" is optional — many printable layouts
|
|
22
|
+
// render OPER as a table-header label with the value in the next cell.
|
|
23
|
+
const hasHeaderFields =
|
|
24
|
+
/NUM\.?\s*PEDIMENTO:/i.test(source) &&
|
|
25
|
+
/CVE\.?\s*PEDIMENTO:/i.test(source) &&
|
|
26
|
+
/T\.?\s*OPER:?/i.test(source);
|
|
27
|
+
if (!hasHeaderFields) return false;
|
|
28
|
+
|
|
29
|
+
const hasCompletoCopyMarker =
|
|
30
|
+
/ORIGINAL:\s*ADMINISTRACION GENERAL DE ADUANAS/i.test(source) ||
|
|
31
|
+
/SEGUNDA\s+COPIA/i.test(source) ||
|
|
32
|
+
/TERCERA\s+COPIA/i.test(source) ||
|
|
33
|
+
/COPIA\s+(SIMPLIFICAD[AO])?\s*TRANSPORTISTA/i.test(source) ||
|
|
34
|
+
/ANEXO\s+DEL\s+PEDIMENTO/i.test(source) ||
|
|
35
|
+
/\*+FIN\s+DE\s+PEDIMENTO\s*\*+/i.test(source);
|
|
36
|
+
if (hasCompletoCopyMarker) return false;
|
|
37
|
+
|
|
38
|
+
// Exclude COVE/eDocument forms that may reference a pedimento in their body.
|
|
39
|
+
if (/COMPROBANTE\s+DE\s+VALOR\s+ELECTR[ÓO]NICO/i.test(source)) return false;
|
|
40
|
+
if (/\bCOVE\b\s*:/i.test(source) && !/PAGO/i.test(source)) return false;
|
|
41
|
+
|
|
42
|
+
return true;
|
|
11
43
|
},
|
|
12
44
|
|
|
13
45
|
/**
|
|
@@ -16,9 +16,9 @@ export const proformaDefinition = {
|
|
|
16
16
|
type: 'proforma',
|
|
17
17
|
extensions: ['pdf'],
|
|
18
18
|
|
|
19
|
-
// Same content marker as pedimento simplificado
|
|
19
|
+
// Same content marker as pedimento simplificado (accepts "DE" or "DEL").
|
|
20
20
|
match: (source) => {
|
|
21
|
-
return /FORMA
|
|
21
|
+
return /FORMA\s+SIMPLIFICADA\s+DEL?\s+PEDIMENTO/i.test(source);
|
|
22
22
|
},
|
|
23
23
|
|
|
24
24
|
extractNumPedimento: pedimentoSimplificadoDefinition.extractNumPedimento,
|
package/src/file-detection.js
CHANGED
|
@@ -4,9 +4,7 @@ import { PDFParse } from 'pdf-parse';
|
|
|
4
4
|
|
|
5
5
|
import { extractDocumentFields } from './document-type-shared.js';
|
|
6
6
|
|
|
7
|
-
// Document types that participate in arela_path composition.
|
|
8
|
-
// kept here even though its matcher is currently disabled — once re-enabled
|
|
9
|
-
// in document-type-shared.js no further changes are needed here.
|
|
7
|
+
// Document types that participate in arela_path composition.
|
|
10
8
|
const ARELA_PATH_TYPES = new Set([
|
|
11
9
|
'pedimento_simplificado',
|
|
12
10
|
'pedimento_completo',
|
package/src/index.js
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
2
|
import { Command } from 'commander';
|
|
3
3
|
|
|
4
|
+
import datastageCommand from './commands/DatastageCommand.js';
|
|
4
5
|
import gdriveSyncCommand from './commands/GDriveSyncCommand.js';
|
|
5
6
|
import identifyCommand from './commands/IdentifyCommand.js';
|
|
6
7
|
import pollWorkerCommand from './commands/PollWorkerCommand.js';
|
|
@@ -26,6 +27,7 @@ class ArelaUploaderCLI {
|
|
|
26
27
|
this.scanCommand = scanCommand;
|
|
27
28
|
this.uploadCommand = new UploadCommand();
|
|
28
29
|
this.watchCommand = watchCommand;
|
|
30
|
+
this.datastageCommand = datastageCommand;
|
|
29
31
|
|
|
30
32
|
this.#setupProgram();
|
|
31
33
|
this.#setupCommands();
|
|
@@ -204,6 +206,34 @@ class ArelaUploaderCLI {
|
|
|
204
206
|
}
|
|
205
207
|
});
|
|
206
208
|
|
|
209
|
+
// Datastage command — upload monthly datastage *.zip files from a directory
|
|
210
|
+
this.program
|
|
211
|
+
.command('datastage')
|
|
212
|
+
.description(
|
|
213
|
+
'📦 Upload monthly datastage *.zip files from a directory to Arela',
|
|
214
|
+
)
|
|
215
|
+
.requiredOption(
|
|
216
|
+
'--dir <path>',
|
|
217
|
+
'Directory containing *.zip files (non-recursive)',
|
|
218
|
+
)
|
|
219
|
+
.option(
|
|
220
|
+
'--api <target>',
|
|
221
|
+
'API target: default|agencia|cliente',
|
|
222
|
+
'default',
|
|
223
|
+
)
|
|
224
|
+
.option('--retry-failed', 'Re-attempt files in failed status')
|
|
225
|
+
.option('--show-stats', 'Print final stats summary')
|
|
226
|
+
.action(async (options) => {
|
|
227
|
+
try {
|
|
228
|
+
if (options.api && options.api !== 'default') {
|
|
229
|
+
appConfig.setApiTarget(options.api);
|
|
230
|
+
}
|
|
231
|
+
await this.datastageCommand.execute(options);
|
|
232
|
+
} catch (error) {
|
|
233
|
+
this.errorHandler.handleFatalError(error, { command: 'datastage' });
|
|
234
|
+
}
|
|
235
|
+
});
|
|
236
|
+
|
|
207
237
|
// Detection command
|
|
208
238
|
this.program
|
|
209
239
|
.command('detect')
|
|
@@ -335,6 +365,18 @@ class ArelaUploaderCLI {
|
|
|
335
365
|
'Number of files to process in each batch',
|
|
336
366
|
'100',
|
|
337
367
|
)
|
|
368
|
+
.option(
|
|
369
|
+
'--table <tableName>',
|
|
370
|
+
'Process only this scan table (instead of all instance tables)',
|
|
371
|
+
)
|
|
372
|
+
.option(
|
|
373
|
+
'--reset-attempts',
|
|
374
|
+
'Reset detection_attempts to 0 before processing so previously-failed files are retried',
|
|
375
|
+
)
|
|
376
|
+
.option(
|
|
377
|
+
'--path-prefix <mapping>',
|
|
378
|
+
'Remap file path prefix for cross-platform access. Format: FROM:TO e.g. "O:/=/Volumes/nas/"',
|
|
379
|
+
)
|
|
338
380
|
.option('--show-stats', 'Show performance statistics')
|
|
339
381
|
.action(async (options) => {
|
|
340
382
|
try {
|
|
@@ -0,0 +1,240 @@
|
|
|
1
|
+
import FormData from 'form-data';
|
|
2
|
+
import fs from 'fs';
|
|
3
|
+
import { Agent } from 'http';
|
|
4
|
+
import { Agent as HttpsAgent } from 'https';
|
|
5
|
+
import fetch from 'node-fetch';
|
|
6
|
+
import path from 'path';
|
|
7
|
+
|
|
8
|
+
import appConfig from '../config/config.js';
|
|
9
|
+
import logger from './LoggingService.js';
|
|
10
|
+
|
|
11
|
+
/**
|
|
12
|
+
* Datastage API Service
|
|
13
|
+
* Handles API communication for the arela datastage command:
|
|
14
|
+
* - tracking endpoints under /api/uploader/datastage/*
|
|
15
|
+
* - zip upload endpoint POST /api/datastage (multipart, field: zipFile)
|
|
16
|
+
*/
|
|
17
|
+
export class DatastageApiService {
|
|
18
|
+
/**
|
|
19
|
+
* @param {string|null} apiTarget - 'default'|'agencia'|'cliente'
|
|
20
|
+
*/
|
|
21
|
+
constructor(apiTarget = null) {
|
|
22
|
+
this.apiTarget = apiTarget;
|
|
23
|
+
const apiConfig = appConfig.getApiConfig(apiTarget);
|
|
24
|
+
this.baseUrl = apiConfig.baseUrl;
|
|
25
|
+
this.token = apiConfig.token;
|
|
26
|
+
|
|
27
|
+
const maxApiConnections = parseInt(process.env.MAX_API_CONNECTIONS) || 10;
|
|
28
|
+
const connectionTimeout =
|
|
29
|
+
parseInt(process.env.API_CONNECTION_TIMEOUT) || 300000;
|
|
30
|
+
|
|
31
|
+
this.maxRetries = parseInt(process.env.API_MAX_RETRIES) || 3;
|
|
32
|
+
this.useExponentialBackoff =
|
|
33
|
+
process.env.API_RETRY_EXPONENTIAL_BACKOFF !== 'false';
|
|
34
|
+
this.fixedRetryDelay = parseInt(process.env.API_RETRY_DELAY) || 1000;
|
|
35
|
+
|
|
36
|
+
const agentOpts = {
|
|
37
|
+
keepAlive: true,
|
|
38
|
+
keepAliveMsecs: 30000,
|
|
39
|
+
maxSockets: maxApiConnections,
|
|
40
|
+
maxFreeSockets: Math.ceil(maxApiConnections / 2),
|
|
41
|
+
maxTotalSockets: maxApiConnections + 5,
|
|
42
|
+
timeout: connectionTimeout,
|
|
43
|
+
scheduling: 'fifo',
|
|
44
|
+
};
|
|
45
|
+
this.httpAgent = new Agent(agentOpts);
|
|
46
|
+
this.httpsAgent = new HttpsAgent(agentOpts);
|
|
47
|
+
|
|
48
|
+
logger.debug(
|
|
49
|
+
`🔗 Datastage API Service configured (target=${apiTarget || 'default'})`,
|
|
50
|
+
);
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
#getAgent(url) {
|
|
54
|
+
return url.startsWith('https://') ? this.httpsAgent : this.httpAgent;
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
#isRetryableError(error, response = null) {
|
|
58
|
+
if (
|
|
59
|
+
error?.code === 'ECONNRESET' ||
|
|
60
|
+
error?.code === 'ETIMEDOUT' ||
|
|
61
|
+
error?.code === 'ECONNREFUSED' ||
|
|
62
|
+
error?.code === 'ENOTFOUND' ||
|
|
63
|
+
error?.code === 'EAI_AGAIN'
|
|
64
|
+
) {
|
|
65
|
+
return true;
|
|
66
|
+
}
|
|
67
|
+
if (response) {
|
|
68
|
+
const s = response.status;
|
|
69
|
+
if (s === 429 || (s >= 500 && s < 600)) return true;
|
|
70
|
+
}
|
|
71
|
+
if (error?.message && error.message.includes('timeout')) return true;
|
|
72
|
+
return false;
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
#calculateBackoff(attempt) {
|
|
76
|
+
if (!this.useExponentialBackoff) {
|
|
77
|
+
const jitter = this.fixedRetryDelay * 0.2 * (Math.random() * 2 - 1);
|
|
78
|
+
return Math.floor(this.fixedRetryDelay + jitter);
|
|
79
|
+
}
|
|
80
|
+
const baseDelay = 1000;
|
|
81
|
+
const maxDelay = 16000;
|
|
82
|
+
const delay = Math.min(baseDelay * Math.pow(2, attempt - 1), maxDelay);
|
|
83
|
+
const jitter = delay * 0.2 * (Math.random() * 2 - 1);
|
|
84
|
+
return Math.floor(delay + jitter);
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
#sleep(ms) {
|
|
88
|
+
return new Promise((r) => setTimeout(r, ms));
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
async #requestJson(endpoint, method = 'GET', body = null, headers = {}) {
|
|
92
|
+
const url = `${this.baseUrl}${endpoint}`;
|
|
93
|
+
const options = {
|
|
94
|
+
method,
|
|
95
|
+
headers: {
|
|
96
|
+
'x-api-key': this.token,
|
|
97
|
+
'Content-Type': 'application/json',
|
|
98
|
+
...headers,
|
|
99
|
+
},
|
|
100
|
+
agent: this.#getAgent(url),
|
|
101
|
+
};
|
|
102
|
+
if (body) options.body = JSON.stringify(body);
|
|
103
|
+
|
|
104
|
+
let lastError;
|
|
105
|
+
let lastResponse = null;
|
|
106
|
+
const retries = this.maxRetries;
|
|
107
|
+
|
|
108
|
+
for (let attempt = 1; attempt <= retries + 1; attempt++) {
|
|
109
|
+
try {
|
|
110
|
+
const response = await fetch(url, options);
|
|
111
|
+
lastResponse = response;
|
|
112
|
+
if (!response.ok) {
|
|
113
|
+
const errorText = await response.text();
|
|
114
|
+
let errorMessage = `API ${method} ${endpoint} failed: ${response.status} ${response.statusText}`;
|
|
115
|
+
try {
|
|
116
|
+
const j = JSON.parse(errorText);
|
|
117
|
+
errorMessage = j.message || errorMessage;
|
|
118
|
+
} catch {
|
|
119
|
+
errorMessage = errorText || errorMessage;
|
|
120
|
+
}
|
|
121
|
+
const err = new Error(errorMessage);
|
|
122
|
+
err.status = response.status;
|
|
123
|
+
if (this.#isRetryableError(err, response) && attempt <= retries) {
|
|
124
|
+
const d = this.#calculateBackoff(attempt);
|
|
125
|
+
logger.warn(
|
|
126
|
+
`Retrying ${method} ${endpoint} (attempt ${attempt}/${retries + 1}) in ${d}ms: ${errorMessage}`,
|
|
127
|
+
);
|
|
128
|
+
await this.#sleep(d);
|
|
129
|
+
continue;
|
|
130
|
+
}
|
|
131
|
+
throw err;
|
|
132
|
+
}
|
|
133
|
+
return await response.json();
|
|
134
|
+
} catch (error) {
|
|
135
|
+
lastError = error;
|
|
136
|
+
if (this.#isRetryableError(error, lastResponse) && attempt <= retries) {
|
|
137
|
+
const d = this.#calculateBackoff(attempt);
|
|
138
|
+
logger.warn(
|
|
139
|
+
`Retrying ${method} ${endpoint} (attempt ${attempt}/${retries + 1}) in ${d}ms: ${error.message}`,
|
|
140
|
+
);
|
|
141
|
+
await this.#sleep(d);
|
|
142
|
+
continue;
|
|
143
|
+
}
|
|
144
|
+
throw error;
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
throw lastError;
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
// --- Tracking endpoints ---
|
|
151
|
+
|
|
152
|
+
async registerUpload({
|
|
153
|
+
absolutePath,
|
|
154
|
+
fileName,
|
|
155
|
+
sizeBytes,
|
|
156
|
+
fileModifiedAt,
|
|
157
|
+
sourceDirectory,
|
|
158
|
+
}) {
|
|
159
|
+
return this.#requestJson('/api/uploader/datastage/register', 'POST', {
|
|
160
|
+
absolutePath,
|
|
161
|
+
fileName,
|
|
162
|
+
sizeBytes,
|
|
163
|
+
fileModifiedAt,
|
|
164
|
+
sourceDirectory,
|
|
165
|
+
});
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
async getPending(sourceDirectory = null) {
|
|
169
|
+
const qs = sourceDirectory
|
|
170
|
+
? `?sourceDirectory=${encodeURIComponent(sourceDirectory)}`
|
|
171
|
+
: '';
|
|
172
|
+
return this.#requestJson(`/api/uploader/datastage/pending${qs}`, 'GET');
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
async getStats(sourceDirectory = null) {
|
|
176
|
+
const qs = sourceDirectory
|
|
177
|
+
? `?sourceDirectory=${encodeURIComponent(sourceDirectory)}`
|
|
178
|
+
: '';
|
|
179
|
+
return this.#requestJson(`/api/uploader/datastage/stats${qs}`, 'GET');
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
async markUploaded(id, { datastageId, folio }) {
|
|
183
|
+
return this.#requestJson(
|
|
184
|
+
`/api/uploader/datastage/${id}/mark-uploaded`,
|
|
185
|
+
'PATCH',
|
|
186
|
+
{ datastageId, folio },
|
|
187
|
+
);
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
async markFailed(id, error) {
|
|
191
|
+
return this.#requestJson(
|
|
192
|
+
`/api/uploader/datastage/${id}/mark-failed`,
|
|
193
|
+
'PATCH',
|
|
194
|
+
{ error: String(error || 'unknown') },
|
|
195
|
+
);
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
// --- Zip upload ---
|
|
199
|
+
|
|
200
|
+
/**
|
|
201
|
+
* Upload a single zip file to POST /api/datastage (multipart, field name 'zipFile').
|
|
202
|
+
* Returns the created Datastage row { id, folio, ... }.
|
|
203
|
+
*/
|
|
204
|
+
async uploadZip(localPath) {
|
|
205
|
+
const url = `${this.baseUrl}/api/datastage`;
|
|
206
|
+
const form = new FormData();
|
|
207
|
+
const fileName = path.basename(localPath);
|
|
208
|
+
form.append('zipFile', fs.createReadStream(localPath), {
|
|
209
|
+
filename: fileName,
|
|
210
|
+
contentType: 'application/zip',
|
|
211
|
+
});
|
|
212
|
+
|
|
213
|
+
const response = await fetch(url, {
|
|
214
|
+
method: 'POST',
|
|
215
|
+
headers: {
|
|
216
|
+
'x-api-key': this.token,
|
|
217
|
+
...form.getHeaders(),
|
|
218
|
+
},
|
|
219
|
+
body: form,
|
|
220
|
+
agent: this.#getAgent(url),
|
|
221
|
+
});
|
|
222
|
+
|
|
223
|
+
if (!response.ok) {
|
|
224
|
+
const text = await response.text();
|
|
225
|
+
let msg = `Datastage upload failed: ${response.status} ${response.statusText}`;
|
|
226
|
+
try {
|
|
227
|
+
const j = JSON.parse(text);
|
|
228
|
+
msg = j.message || msg;
|
|
229
|
+
} catch {
|
|
230
|
+
msg = text || msg;
|
|
231
|
+
}
|
|
232
|
+
const err = new Error(msg);
|
|
233
|
+
err.status = response.status;
|
|
234
|
+
throw err;
|
|
235
|
+
}
|
|
236
|
+
return await response.json();
|
|
237
|
+
}
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
export default DatastageApiService;
|
|
@@ -375,6 +375,20 @@ export class ScanApiService {
|
|
|
375
375
|
* @param {boolean} allTypes - When true, fetch all supported file types instead of just likely-simplificado PDFs
|
|
376
376
|
* @returns {Promise<Object>} { data: Array, hasMore: boolean }
|
|
377
377
|
*/
|
|
378
|
+
/**
|
|
379
|
+
* Get a single file record by ID (for single-file identify mode).
|
|
380
|
+
* @param {string} tableName - Scan table name (with or without cli. prefix)
|
|
381
|
+
* @param {string} fileId - UUID of the file record
|
|
382
|
+
* @returns {Promise<{ id: string, file_name: string, file_extension: string, absolute_path: string }>}
|
|
383
|
+
*/
|
|
384
|
+
async getFileRecord(tableName, fileId) {
|
|
385
|
+
const cleanTable = tableName.replace(/^cli\./, '');
|
|
386
|
+
const url = `/api/uploader/scan/file-record?tableName=${encodeURIComponent(cleanTable)}&fileId=${encodeURIComponent(fileId)}`;
|
|
387
|
+
const result = await this.#request(url, 'GET');
|
|
388
|
+
logger.debug(`Fetched file record ${fileId} from ${cleanTable}`);
|
|
389
|
+
return result;
|
|
390
|
+
}
|
|
391
|
+
|
|
378
392
|
async fetchPdfsForDetection(
|
|
379
393
|
tableName,
|
|
380
394
|
offset = 0,
|
|
@@ -398,6 +412,22 @@ export class ScanApiService {
|
|
|
398
412
|
return result;
|
|
399
413
|
}
|
|
400
414
|
|
|
415
|
+
/**
|
|
416
|
+
* Reset detection_attempts to 0 for undetected files so they can be re-processed.
|
|
417
|
+
* @param {string} tableName - Target scan table name
|
|
418
|
+
* @param {string|null} absolutePath - If provided, reset only this specific file
|
|
419
|
+
* @returns {Promise<{ reset: number }>}
|
|
420
|
+
*/
|
|
421
|
+
async resetDetectionAttempts(tableName, absolutePath = null) {
|
|
422
|
+
let url = `/api/uploader/scan/reset-detection-attempts?tableName=${encodeURIComponent(tableName)}`;
|
|
423
|
+
if (absolutePath) {
|
|
424
|
+
url += `&absolutePath=${encodeURIComponent(absolutePath)}`;
|
|
425
|
+
}
|
|
426
|
+
const result = await this.#request(url, 'PATCH');
|
|
427
|
+
logger.debug(`Reset ${result.reset} detection attempt(s) in ${tableName}`);
|
|
428
|
+
return result;
|
|
429
|
+
}
|
|
430
|
+
|
|
401
431
|
/**
|
|
402
432
|
* Batch update detection results
|
|
403
433
|
* @param {string} tableName - Target table name
|
|
@@ -0,0 +1,218 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Unit tests for the factura_inter_agencia matcher.
|
|
3
|
+
*
|
|
4
|
+
* Verifies that NORCOM↔PALCO CFDIs (XML and printable PDF text) are
|
|
5
|
+
* detected as `factura_inter_agencia`, and that ordinary CFDIs are NOT
|
|
6
|
+
* mis-classified.
|
|
7
|
+
*/
|
|
8
|
+
import { describe, it, expect } from '@jest/globals';
|
|
9
|
+
|
|
10
|
+
import {
|
|
11
|
+
facturaInterAgenciaDefinition,
|
|
12
|
+
INTER_AGENCIA_RFCS,
|
|
13
|
+
} from '../../src/document-types/factura-inter-agencia.js';
|
|
14
|
+
import { extractDocumentFields } from '../../src/document-type-shared.js';
|
|
15
|
+
|
|
16
|
+
const NORCOM_RFC = 'NAA120215F20';
|
|
17
|
+
const PALCO_RFC = 'PCC1008161WA';
|
|
18
|
+
|
|
19
|
+
// Realistic CFDI 4.0 XML between NORCOM (emisor) and PALCO (receptor).
|
|
20
|
+
// Conceptos use ClaveProdServ 78141502 (servicios de agentes aduaneros).
|
|
21
|
+
const CFDI_XML_INTER_AGENCIA = `<?xml version="1.0" encoding="utf-8"?>
|
|
22
|
+
<cfdi:Comprobante xmlns:cfdi="http://www.sat.gob.mx/cfd/4" Version="4.0" Folio="012749"
|
|
23
|
+
TipoDeComprobante="I" SubTotal="3000.00" Total="3480.00" Moneda="MXN">
|
|
24
|
+
<cfdi:Emisor Rfc="${NORCOM_RFC}" Nombre="NORCOM AGENTES ADUANALES" RegimenFiscal="601"/>
|
|
25
|
+
<cfdi:Receptor Rfc="${PALCO_RFC}" Nombre="PALCO, CONSORCIO DE COMERCIO INTERNACIONAL"
|
|
26
|
+
DomicilioFiscalReceptor="32380" RegimenFiscalReceptor="601" UsoCFDI="G03"/>
|
|
27
|
+
<cfdi:Conceptos>
|
|
28
|
+
<cfdi:Concepto ClaveProdServ="78141502" NoIdentificacion="HONO" Cantidad="1.00"
|
|
29
|
+
ClaveUnidad="E48" Unidad="Unidad de servicio" Descripcion="HONORARIOS"
|
|
30
|
+
ValorUnitario="1300.00" Importe="1300.00" ObjetoImp="02"/>
|
|
31
|
+
<cfdi:Concepto ClaveProdServ="78141502" NoIdentificacion="VALID" Cantidad="1.00"
|
|
32
|
+
ClaveUnidad="E48" Unidad="Unidad de servicio" Descripcion="VALIDACION"
|
|
33
|
+
ValorUnitario="200.00" Importe="200.00" ObjetoImp="02"/>
|
|
34
|
+
</cfdi:Conceptos>
|
|
35
|
+
</cfdi:Comprobante>`;
|
|
36
|
+
|
|
37
|
+
// Same agencies but conceptos do NOT use 78141502 — should NOT match.
|
|
38
|
+
const CFDI_XML_INTER_AGENCIA_WRONG_CONCEPT = CFDI_XML_INTER_AGENCIA.replace(
|
|
39
|
+
/78141502/g,
|
|
40
|
+
'90121502',
|
|
41
|
+
);
|
|
42
|
+
|
|
43
|
+
// CFDI between unrelated taxpayers — should NOT match.
|
|
44
|
+
const CFDI_XML_REGULAR = `<?xml version="1.0" encoding="utf-8"?>
|
|
45
|
+
<cfdi:Comprobante xmlns:cfdi="http://www.sat.gob.mx/cfd/4" Version="4.0" Folio="000123"
|
|
46
|
+
TipoDeComprobante="I" SubTotal="100.00" Total="116.00">
|
|
47
|
+
<cfdi:Emisor Rfc="ACME010101AB1" Nombre="ACME COMERCIAL" RegimenFiscal="601"/>
|
|
48
|
+
<cfdi:Receptor Rfc="XYZ020202CD2" Nombre="CLIENTE FINAL"
|
|
49
|
+
DomicilioFiscalReceptor="00000" RegimenFiscalReceptor="601" UsoCFDI="G03"/>
|
|
50
|
+
<cfdi:Conceptos>
|
|
51
|
+
<cfdi:Concepto ClaveProdServ="78141502" NoIdentificacion="ITEM" Cantidad="1.00"
|
|
52
|
+
ClaveUnidad="E48" Unidad="Unidad de servicio" Descripcion="SERVICIO"
|
|
53
|
+
ValorUnitario="100.00" Importe="100.00" ObjetoImp="02"/>
|
|
54
|
+
</cfdi:Conceptos>
|
|
55
|
+
</cfdi:Comprobante>`;
|
|
56
|
+
|
|
57
|
+
// Text extracted from the printable PDF representation of a CFDI inter-agencia.
|
|
58
|
+
// Mirrors what pdf-parse returns for the sample SICINGR70-012749(...).pdf.
|
|
59
|
+
const CFDI_PDF_TEXT_INTER_AGENCIA = `NORCOM AGENTES ADUANALES S.C
|
|
60
|
+
Tipo de Comprobante: (I) Ingreso
|
|
61
|
+
Folio Fiscal 84FC9CE2-00D5-4843-B377-B463321F9FC6
|
|
62
|
+
Numero Folio 012749
|
|
63
|
+
Emisor
|
|
64
|
+
RFC ${NORCOM_RFC}
|
|
65
|
+
Razon Social NORCOM AGENTES ADUANALES
|
|
66
|
+
Receptor
|
|
67
|
+
RFC ${PALCO_RFC}
|
|
68
|
+
Razon Social PALCO, CONSORCIO DE COMERCIO INTERNACIONAL
|
|
69
|
+
Pedimento: 3458 6000046 Fecha: 17/02/2026 Tipo: EXP Clave: A1
|
|
70
|
+
Erogaciones
|
|
71
|
+
78141502 HONO HONORARIOS 1,300.00
|
|
72
|
+
78141502 SERCOM SERVICIOS COMPLEMENTARIOS 1,500.00
|
|
73
|
+
78141502 VALID VALIDACION 200.00
|
|
74
|
+
Sello Digital del CFDI
|
|
75
|
+
c4oBJ8/zAol0zg1jVe4MK8...
|
|
76
|
+
Cadena Original del Complemento de Certificacion Digital del SAT
|
|
77
|
+
||4.0|012749|...
|
|
78
|
+
Este documento es una representación impresa de un CFDI`;
|
|
79
|
+
|
|
80
|
+
describe('factura_inter_agencia matcher', () => {
|
|
81
|
+
describe('configured RFC set', () => {
|
|
82
|
+
it('includes NORCOM and PALCO RFCs', () => {
|
|
83
|
+
expect(INTER_AGENCIA_RFCS).toContain(NORCOM_RFC);
|
|
84
|
+
expect(INTER_AGENCIA_RFCS).toContain(PALCO_RFC);
|
|
85
|
+
});
|
|
86
|
+
});
|
|
87
|
+
|
|
88
|
+
describe('match()', () => {
|
|
89
|
+
it('matches a NORCOM→PALCO XML CFDI with broker-service conceptos', () => {
|
|
90
|
+
expect(facturaInterAgenciaDefinition.match(CFDI_XML_INTER_AGENCIA)).toBe(
|
|
91
|
+
true,
|
|
92
|
+
);
|
|
93
|
+
});
|
|
94
|
+
|
|
95
|
+
it('matches the PDF-text representation of the same CFDI', () => {
|
|
96
|
+
expect(
|
|
97
|
+
facturaInterAgenciaDefinition.match(CFDI_PDF_TEXT_INTER_AGENCIA),
|
|
98
|
+
).toBe(true);
|
|
99
|
+
});
|
|
100
|
+
|
|
101
|
+
it('does NOT match when ClaveProdServ is not 78141502', () => {
|
|
102
|
+
expect(
|
|
103
|
+
facturaInterAgenciaDefinition.match(
|
|
104
|
+
CFDI_XML_INTER_AGENCIA_WRONG_CONCEPT,
|
|
105
|
+
),
|
|
106
|
+
).toBe(false);
|
|
107
|
+
});
|
|
108
|
+
|
|
109
|
+
it('does NOT match a CFDI between unrelated taxpayers', () => {
|
|
110
|
+
expect(facturaInterAgenciaDefinition.match(CFDI_XML_REGULAR)).toBe(false);
|
|
111
|
+
});
|
|
112
|
+
|
|
113
|
+
it('does NOT match arbitrary non-CFDI text containing the RFCs', () => {
|
|
114
|
+
const text = `Reporte interno
|
|
115
|
+
RFC emisor: ${NORCOM_RFC}
|
|
116
|
+
RFC cliente: ${PALCO_RFC}
|
|
117
|
+
Clave 78141502`;
|
|
118
|
+
// No CFDI markers → should not match.
|
|
119
|
+
expect(facturaInterAgenciaDefinition.match(text)).toBe(false);
|
|
120
|
+
});
|
|
121
|
+
|
|
122
|
+
it('does NOT match if only one of the configured RFCs is present', () => {
|
|
123
|
+
const text = CFDI_XML_INTER_AGENCIA.replace(PALCO_RFC, 'XYZ020202CD2');
|
|
124
|
+
expect(facturaInterAgenciaDefinition.match(text)).toBe(false);
|
|
125
|
+
});
|
|
126
|
+
});
|
|
127
|
+
|
|
128
|
+
describe('extractors', () => {
|
|
129
|
+
it('extracts emisor + receptor RFCs from XML', () => {
|
|
130
|
+
const rfcEmisor = facturaInterAgenciaDefinition.extractors
|
|
131
|
+
.find((e) => e.field === 'rfcEmisor')
|
|
132
|
+
.extract(CFDI_XML_INTER_AGENCIA);
|
|
133
|
+
const rfcReceptor = facturaInterAgenciaDefinition.extractors
|
|
134
|
+
.find((e) => e.field === 'rfcReceptor')
|
|
135
|
+
.extract(CFDI_XML_INTER_AGENCIA);
|
|
136
|
+
|
|
137
|
+
expect(rfcEmisor.found).toBe(true);
|
|
138
|
+
expect(rfcEmisor.value).toBe(NORCOM_RFC);
|
|
139
|
+
expect(rfcReceptor.found).toBe(true);
|
|
140
|
+
expect(rfcReceptor.value).toBe(PALCO_RFC);
|
|
141
|
+
});
|
|
142
|
+
|
|
143
|
+
it('extracts both RFCs from PDF text via fallback', () => {
|
|
144
|
+
const rfcEmisor = facturaInterAgenciaDefinition.extractors
|
|
145
|
+
.find((e) => e.field === 'rfcEmisor')
|
|
146
|
+
.extract(CFDI_PDF_TEXT_INTER_AGENCIA);
|
|
147
|
+
const rfcReceptor = facturaInterAgenciaDefinition.extractors
|
|
148
|
+
.find((e) => e.field === 'rfcReceptor')
|
|
149
|
+
.extract(CFDI_PDF_TEXT_INTER_AGENCIA);
|
|
150
|
+
|
|
151
|
+
expect(rfcEmisor.found).toBe(true);
|
|
152
|
+
expect(rfcReceptor.found).toBe(true);
|
|
153
|
+
// Order is the order of first appearance in the document.
|
|
154
|
+
const found = [rfcEmisor.value, rfcReceptor.value].sort();
|
|
155
|
+
expect(found).toEqual([NORCOM_RFC, PALCO_RFC].sort());
|
|
156
|
+
});
|
|
157
|
+
|
|
158
|
+
it('extracts the UUID (folio fiscal) from both formats', () => {
|
|
159
|
+
const uuidExtractor = facturaInterAgenciaDefinition.extractors.find(
|
|
160
|
+
(e) => e.field === 'uuid',
|
|
161
|
+
);
|
|
162
|
+
|
|
163
|
+
const fromXml = uuidExtractor.extract(CFDI_XML_INTER_AGENCIA);
|
|
164
|
+
// XML sample has no UUID inside the comprobante body — that's fine.
|
|
165
|
+
expect(fromXml.found).toBe(false);
|
|
166
|
+
|
|
167
|
+
const fromPdf = uuidExtractor.extract(CFDI_PDF_TEXT_INTER_AGENCIA);
|
|
168
|
+
expect(fromPdf.found).toBe(true);
|
|
169
|
+
expect(fromPdf.value).toBe('84FC9CE2-00D5-4843-B377-B463321F9FC6');
|
|
170
|
+
});
|
|
171
|
+
|
|
172
|
+
it('extracts numPedimento from the printable PDF "Pedimento:" line', () => {
|
|
173
|
+
const numExtractor = facturaInterAgenciaDefinition.extractors.find(
|
|
174
|
+
(e) => e.field === 'numPedimento',
|
|
175
|
+
);
|
|
176
|
+
const result = numExtractor.extract(CFDI_PDF_TEXT_INTER_AGENCIA);
|
|
177
|
+
expect(result.found).toBe(true);
|
|
178
|
+
expect(result.value).toBe('34586000046');
|
|
179
|
+
});
|
|
180
|
+
|
|
181
|
+
it('extracts the CFDI folio from XML attribute', () => {
|
|
182
|
+
const folio = facturaInterAgenciaDefinition.extractors
|
|
183
|
+
.find((e) => e.field === 'folio')
|
|
184
|
+
.extract(CFDI_XML_INTER_AGENCIA);
|
|
185
|
+
expect(folio.found).toBe(true);
|
|
186
|
+
expect(folio.value).toBe('012749');
|
|
187
|
+
});
|
|
188
|
+
});
|
|
189
|
+
|
|
190
|
+
describe('registry order (factura_inter_agencia precedes facturas_comerciales)', () => {
|
|
191
|
+
it('resolves the inter-agency CFDI XML to factura_inter_agencia, not factura_comercial', () => {
|
|
192
|
+
const [detectedType] = extractDocumentFields(
|
|
193
|
+
CFDI_XML_INTER_AGENCIA,
|
|
194
|
+
'xml',
|
|
195
|
+
'/tmp/SICINGR70-012749(PALCO).XML',
|
|
196
|
+
);
|
|
197
|
+
expect(detectedType).toBe('factura_inter_agencia');
|
|
198
|
+
});
|
|
199
|
+
|
|
200
|
+
it('resolves the inter-agency CFDI PDF text to factura_inter_agencia', () => {
|
|
201
|
+
const [detectedType] = extractDocumentFields(
|
|
202
|
+
CFDI_PDF_TEXT_INTER_AGENCIA,
|
|
203
|
+
'pdf',
|
|
204
|
+
'/tmp/SICINGR70-012749(PALCO).pdf',
|
|
205
|
+
);
|
|
206
|
+
expect(detectedType).toBe('factura_inter_agencia');
|
|
207
|
+
});
|
|
208
|
+
|
|
209
|
+
it('falls through to factura_comercial for a regular CFDI', () => {
|
|
210
|
+
const [detectedType] = extractDocumentFields(
|
|
211
|
+
CFDI_XML_REGULAR,
|
|
212
|
+
'xml',
|
|
213
|
+
'/tmp/regular-invoice.xml',
|
|
214
|
+
);
|
|
215
|
+
expect(detectedType).toBe('factura_comercial');
|
|
216
|
+
});
|
|
217
|
+
});
|
|
218
|
+
});
|