@arela/uploader 1.0.21 β 1.0.23
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/commands/DatastageCommand.js +164 -0
- package/src/commands/IdentifyCommand.js +144 -25
- package/src/commands/PollWorkerCommand.js +2 -0
- package/src/commands/ScanCommand.js +15 -0
- package/src/config/config.js +28 -2
- package/src/document-type-shared.js +15 -7
- package/src/document-types/_pedimento-shared-extractors.js +150 -35
- package/src/document-types/factura-inter-agencia.js +186 -0
- package/src/document-types/pedimento-completo-xml.js +62 -12
- package/src/document-types/pedimento-completo.js +43 -10
- package/src/document-types/pedimento-simplificado.js +33 -1
- package/src/document-types/proforma.js +2 -2
- package/src/file-detection.js +1 -3
- package/src/index.js +42 -0
- package/src/services/DatastageApiService.js +240 -0
- package/src/services/ScanApiService.js +30 -0
- package/tests/unit/factura-inter-agencia.test.js +218 -0
- package/tests/unit/pedimento-completo-xml-matcher.test.js +271 -0
- package/tests/unit/pedimento-simplificado-matcher.test.js +185 -0
package/package.json
CHANGED
|
@@ -0,0 +1,164 @@
|
|
|
1
|
+
import fs from 'fs';
|
|
2
|
+
import path from 'path';
|
|
3
|
+
|
|
4
|
+
import { DatastageApiService } from '../services/DatastageApiService.js';
|
|
5
|
+
import logger from '../services/LoggingService.js';
|
|
6
|
+
|
|
7
|
+
import appConfig from '../config/config.js';
|
|
8
|
+
import ErrorHandler from '../errors/ErrorHandler.js';
|
|
9
|
+
|
|
10
|
+
/**
|
|
11
|
+
* Datastage Command Handler
|
|
12
|
+
* Uploads monthly Datastage *.zip files from a directory to the API.
|
|
13
|
+
* Sequential, idempotent via the cli `datastage_uploads` tracking table.
|
|
14
|
+
*/
|
|
15
|
+
export class DatastageCommand {
|
|
16
|
+
constructor() {
|
|
17
|
+
this.errorHandler = new ErrorHandler(logger);
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
/**
|
|
21
|
+
* @param {Object} options
|
|
22
|
+
* @param {string} options.dir - directory containing *.zip files (required)
|
|
23
|
+
* @param {string} [options.api] - 'default'|'agencia'|'cliente'
|
|
24
|
+
* @param {boolean} [options.retryFailed] - re-attempt files in 'failed' status
|
|
25
|
+
* @param {boolean} [options.showStats] - print final stats from API
|
|
26
|
+
*/
|
|
27
|
+
async execute(options = {}) {
|
|
28
|
+
const startTime = Date.now();
|
|
29
|
+
|
|
30
|
+
if (!options.dir) {
|
|
31
|
+
throw new Error('--dir <path> is required');
|
|
32
|
+
}
|
|
33
|
+
const sourceDirectory = path.resolve(options.dir);
|
|
34
|
+
if (!fs.existsSync(sourceDirectory)) {
|
|
35
|
+
throw new Error(`Directory not found: ${sourceDirectory}`);
|
|
36
|
+
}
|
|
37
|
+
const dirStat = fs.statSync(sourceDirectory);
|
|
38
|
+
if (!dirStat.isDirectory()) {
|
|
39
|
+
throw new Error(`Not a directory: ${sourceDirectory}`);
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
const apiTarget = options.api || 'default';
|
|
43
|
+
const api = new DatastageApiService(apiTarget);
|
|
44
|
+
|
|
45
|
+
logger.info('π¦ Starting arela datastage command');
|
|
46
|
+
logger.info(`π― API Target: ${apiTarget}`);
|
|
47
|
+
logger.info(`π Source: ${sourceDirectory}`);
|
|
48
|
+
|
|
49
|
+
// 1. Enumerate *.zip in root directory (non-recursive)
|
|
50
|
+
const entries = fs.readdirSync(sourceDirectory, { withFileTypes: true });
|
|
51
|
+
const zipFiles = entries
|
|
52
|
+
.filter((e) => e.isFile() && /\.zip$/i.test(e.name))
|
|
53
|
+
.map((e) => path.join(sourceDirectory, e.name));
|
|
54
|
+
|
|
55
|
+
if (zipFiles.length === 0) {
|
|
56
|
+
logger.warn('No *.zip files found in directory. Nothing to do.');
|
|
57
|
+
return { uploaded: 0, failed: 0, skipped: 0 };
|
|
58
|
+
}
|
|
59
|
+
logger.info(`π Found ${zipFiles.length} zip file(s)`);
|
|
60
|
+
|
|
61
|
+
// 2. Register each file (idempotent upsert)
|
|
62
|
+
logger.info('π Registering files...');
|
|
63
|
+
for (const zipPath of zipFiles) {
|
|
64
|
+
const stats = fs.statSync(zipPath);
|
|
65
|
+
try {
|
|
66
|
+
await api.registerUpload({
|
|
67
|
+
absolutePath: zipPath,
|
|
68
|
+
fileName: path.basename(zipPath),
|
|
69
|
+
sizeBytes: stats.size,
|
|
70
|
+
fileModifiedAt: stats.mtime.toISOString(),
|
|
71
|
+
sourceDirectory,
|
|
72
|
+
});
|
|
73
|
+
} catch (err) {
|
|
74
|
+
logger.error(
|
|
75
|
+
` β register failed for ${path.basename(zipPath)}: ${err.message}`,
|
|
76
|
+
);
|
|
77
|
+
throw err;
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
// 3. Fetch pending list scoped to this directory
|
|
82
|
+
const pending = await api.getPending(sourceDirectory);
|
|
83
|
+
const pendingPaths = new Set(pending.map((p) => p.absolutePath));
|
|
84
|
+
|
|
85
|
+
const alreadyUploaded = zipFiles.length - pendingPaths.size;
|
|
86
|
+
if (alreadyUploaded > 0) {
|
|
87
|
+
logger.info(`β Skipping ${alreadyUploaded} already uploaded file(s)`);
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
if (pending.length === 0) {
|
|
91
|
+
logger.success('β
All files already uploaded. Nothing to do.');
|
|
92
|
+
if (options.showStats) {
|
|
93
|
+
const s = await api.getStats(sourceDirectory);
|
|
94
|
+
logger.info(`π Stats: ${JSON.stringify(s)}`);
|
|
95
|
+
}
|
|
96
|
+
return { uploaded: 0, failed: 0, skipped: alreadyUploaded };
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
// 4. Sequential upload loop
|
|
100
|
+
logger.info(`π Uploading ${pending.length} file(s) sequentially...`);
|
|
101
|
+
let uploaded = 0;
|
|
102
|
+
let failed = 0;
|
|
103
|
+
|
|
104
|
+
for (let i = 0; i < pending.length; i++) {
|
|
105
|
+
const row = pending[i];
|
|
106
|
+
const localPath = row.absolutePath;
|
|
107
|
+
const label = `[${i + 1}/${pending.length}] ${row.fileName}`;
|
|
108
|
+
|
|
109
|
+
if (!fs.existsSync(localPath)) {
|
|
110
|
+
const err = `File missing on disk: ${localPath}`;
|
|
111
|
+
logger.error(`β ${label}: ${err}`);
|
|
112
|
+
try {
|
|
113
|
+
await api.markFailed(row.id, err);
|
|
114
|
+
} catch (e) {
|
|
115
|
+
logger.error(` mark-failed error: ${e.message}`);
|
|
116
|
+
}
|
|
117
|
+
failed++;
|
|
118
|
+
continue;
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
try {
|
|
122
|
+
logger.info(`β¬ ${label}: uploading...`);
|
|
123
|
+
const result = await api.uploadZip(localPath);
|
|
124
|
+
const datastageId = result?.id || result?.data?.id;
|
|
125
|
+
const folio = result?.folio || result?.data?.folio;
|
|
126
|
+
if (!datastageId) {
|
|
127
|
+
throw new Error(
|
|
128
|
+
'API returned no datastage id in response: ' +
|
|
129
|
+
JSON.stringify(result).slice(0, 300),
|
|
130
|
+
);
|
|
131
|
+
}
|
|
132
|
+
await api.markUploaded(row.id, { datastageId, folio });
|
|
133
|
+
logger.success(
|
|
134
|
+
`β ${label}: folio=${folio || 'n/a'} datastageId=${datastageId}`,
|
|
135
|
+
);
|
|
136
|
+
uploaded++;
|
|
137
|
+
} catch (err) {
|
|
138
|
+
logger.error(`β ${label}: ${err.message}`);
|
|
139
|
+
try {
|
|
140
|
+
await api.markFailed(row.id, err.message);
|
|
141
|
+
} catch (e) {
|
|
142
|
+
logger.error(` mark-failed error: ${e.message}`);
|
|
143
|
+
}
|
|
144
|
+
failed++;
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
const elapsed = ((Date.now() - startTime) / 1000).toFixed(1);
|
|
149
|
+
logger.info('β'.repeat(60));
|
|
150
|
+
logger.info(
|
|
151
|
+
`Done in ${elapsed}s β uploaded=${uploaded} failed=${failed} skipped=${alreadyUploaded}`,
|
|
152
|
+
);
|
|
153
|
+
|
|
154
|
+
if (options.showStats) {
|
|
155
|
+
const s = await api.getStats(sourceDirectory);
|
|
156
|
+
logger.info(`π Final stats: ${JSON.stringify(s)}`);
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
return { uploaded, failed, skipped: alreadyUploaded };
|
|
160
|
+
}
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
const datastageCommand = new DatastageCommand();
|
|
164
|
+
export default datastageCommand;
|
|
@@ -10,9 +10,7 @@ import { ConfigurationError } from '../errors/ErrorTypes.js';
|
|
|
10
10
|
import FileDetectionService from '../file-detection.js';
|
|
11
11
|
|
|
12
12
|
/**
|
|
13
|
-
* Paid pedimento detected_type values.
|
|
14
|
-
* even though the XML matcher is currently disabled in the registry so that
|
|
15
|
-
* re-enabling it requires no changes here.
|
|
13
|
+
* Paid pedimento detected_type values.
|
|
16
14
|
*/
|
|
17
15
|
const DETECTED_PEDIMENTO_TYPES = new Set([
|
|
18
16
|
'pedimento_simplificado',
|
|
@@ -74,29 +72,95 @@ export class IdentifyCommand {
|
|
|
74
72
|
const scanConfig = appConfig.getScanConfig();
|
|
75
73
|
const batchSize = parseInt(options.batchSize) || 100;
|
|
76
74
|
|
|
75
|
+
// Parse optional path prefix mapping: "O:/=/Volumes/nas/"
|
|
76
|
+
const pathPrefixMap = options.pathPrefix
|
|
77
|
+
? this.#parsePathPrefix(options.pathPrefix)
|
|
78
|
+
: null;
|
|
79
|
+
|
|
77
80
|
logger.info('π Starting arela identify command');
|
|
78
81
|
logger.info(`π― API Target: ${apiTarget}`);
|
|
79
82
|
logger.info(`π¦ Batch Size: ${batchSize}`);
|
|
83
|
+
if (options.table) logger.info(`π Target table: ${options.table}`);
|
|
84
|
+
if (options.resetAttempts)
|
|
85
|
+
logger.info('β»οΈ Reset detection attempts: ON');
|
|
86
|
+
if (pathPrefixMap)
|
|
87
|
+
logger.info(
|
|
88
|
+
`πΊ Path prefix map: ${pathPrefixMap.from} β ${pathPrefixMap.to}`,
|
|
89
|
+
);
|
|
80
90
|
|
|
81
|
-
//
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
91
|
+
// Resolve the list of tables to process
|
|
92
|
+
let tables;
|
|
93
|
+
if (options.fileId && options.table) {
|
|
94
|
+
// Single-file mode β identify exactly one file record
|
|
95
|
+
logger.info(
|
|
96
|
+
`\nπ― Single-file mode: ${options.table} / ${options.fileId}`,
|
|
97
|
+
);
|
|
98
|
+
this.#reportProgress(0, `Fetching file record ${options.fileId}...`);
|
|
99
|
+
const record = await this.scanApiService.getFileRecord(
|
|
100
|
+
options.table,
|
|
101
|
+
options.fileId,
|
|
102
|
+
);
|
|
103
|
+
const results = await this.#detectFilesLocally(
|
|
104
|
+
[record],
|
|
105
|
+
1,
|
|
106
|
+
pathPrefixMap,
|
|
107
|
+
);
|
|
108
|
+
const updates = results.filter((r) => r !== null);
|
|
109
|
+
if (updates.length > 0) {
|
|
110
|
+
await this.scanApiService.batchUpdateDetection(
|
|
111
|
+
options.table,
|
|
112
|
+
updates,
|
|
113
|
+
);
|
|
114
|
+
}
|
|
115
|
+
this.#reportProgress(100, `Single-file identification complete`);
|
|
116
|
+
logger.success(`\nβ
Single-file identification complete`);
|
|
117
|
+
const firstUpdate = updates[0];
|
|
118
|
+
return {
|
|
119
|
+
processed: 1,
|
|
120
|
+
detected: updates.length,
|
|
121
|
+
proformas: 0,
|
|
122
|
+
errors: 1 - updates.length,
|
|
123
|
+
detectedType: firstUpdate?.detectedType ?? null,
|
|
124
|
+
detectedPedimento: firstUpdate?.detectedPedimento ?? null,
|
|
125
|
+
rfc: firstUpdate?.rfc ?? null,
|
|
126
|
+
arelaPath: firstUpdate?.arelaPath ?? null,
|
|
127
|
+
};
|
|
128
|
+
} else if (options.table) {
|
|
129
|
+
// Single-table mode β no need to match instance tables
|
|
130
|
+
tables = [{ tableName: options.table }];
|
|
131
|
+
logger.info(`\nπ Single-table mode: ${options.table}`);
|
|
132
|
+
} else {
|
|
133
|
+
logger.info('\nπ Fetching instance tables...');
|
|
134
|
+
tables = await this.scanApiService.getInstanceTables(
|
|
135
|
+
scanConfig.companySlug,
|
|
136
|
+
scanConfig.serverId,
|
|
137
|
+
scanConfig.basePathFull,
|
|
138
|
+
);
|
|
139
|
+
|
|
140
|
+
if (tables.length === 0) {
|
|
141
|
+
throw new ConfigurationError(
|
|
142
|
+
'No tables found for this instance. Run "arela scan" first.',
|
|
143
|
+
);
|
|
144
|
+
}
|
|
88
145
|
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
'No tables found for this instance. Run "arela scan" first.',
|
|
146
|
+
logger.info(
|
|
147
|
+
`π Found ${tables.length} table${tables.length === 1 ? '' : 's'} to process`,
|
|
92
148
|
);
|
|
149
|
+
for (const table of tables) {
|
|
150
|
+
logger.info(` - ${table.tableName}`);
|
|
151
|
+
}
|
|
93
152
|
}
|
|
94
153
|
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
154
|
+
// Optionally reset detection attempts so previously-failed files are retried
|
|
155
|
+
if (options.resetAttempts) {
|
|
156
|
+
for (const table of tables) {
|
|
157
|
+
const { reset } = await this.scanApiService.resetDetectionAttempts(
|
|
158
|
+
table.tableName,
|
|
159
|
+
);
|
|
160
|
+
logger.info(
|
|
161
|
+
`β»οΈ Reset ${reset} detection attempt(s) in ${table.tableName}`,
|
|
162
|
+
);
|
|
163
|
+
}
|
|
100
164
|
}
|
|
101
165
|
|
|
102
166
|
// Process each table
|
|
@@ -127,6 +191,7 @@ export class IdentifyCommand {
|
|
|
127
191
|
table.tableName,
|
|
128
192
|
batchSize,
|
|
129
193
|
startTime,
|
|
194
|
+
pathPrefixMap,
|
|
130
195
|
);
|
|
131
196
|
|
|
132
197
|
totalStats.processed += stats.processed;
|
|
@@ -183,7 +248,7 @@ export class IdentifyCommand {
|
|
|
183
248
|
* @param {number} startTime - Start time for speed calculation
|
|
184
249
|
* @returns {Promise<Object>} Processing statistics
|
|
185
250
|
*/
|
|
186
|
-
async #processTable(tableName, batchSize, startTime) {
|
|
251
|
+
async #processTable(tableName, batchSize, startTime, pathPrefixMap = null) {
|
|
187
252
|
// Get detection statistics first (allTypes=true to count all supported file types)
|
|
188
253
|
const initialStats = await this.scanApiService.getDetectionStats(
|
|
189
254
|
tableName,
|
|
@@ -255,7 +320,11 @@ export class IdentifyCommand {
|
|
|
255
320
|
const files = response.data;
|
|
256
321
|
|
|
257
322
|
// Detect files locally with concurrent processing
|
|
258
|
-
const detectionResults = await this.#detectFilesLocally(
|
|
323
|
+
const detectionResults = await this.#detectFilesLocally(
|
|
324
|
+
files,
|
|
325
|
+
10,
|
|
326
|
+
pathPrefixMap,
|
|
327
|
+
);
|
|
259
328
|
|
|
260
329
|
// Batch update to API
|
|
261
330
|
const updateResult = await this.scanApiService.batchUpdateDetection(
|
|
@@ -301,7 +370,7 @@ export class IdentifyCommand {
|
|
|
301
370
|
* @param {number} concurrency - Maximum concurrent detections
|
|
302
371
|
* @returns {Promise<Array>} Detection results
|
|
303
372
|
*/
|
|
304
|
-
async #detectFilesLocally(files, concurrency = 10) {
|
|
373
|
+
async #detectFilesLocally(files, concurrency = 10, pathPrefixMap = null) {
|
|
305
374
|
const limit = pLimit(concurrency);
|
|
306
375
|
const basePath = appConfig.getBasePath();
|
|
307
376
|
|
|
@@ -309,7 +378,17 @@ export class IdentifyCommand {
|
|
|
309
378
|
limit(async () => {
|
|
310
379
|
try {
|
|
311
380
|
// Check if file exists on filesystem
|
|
312
|
-
|
|
381
|
+
let absolutePath = file.absolute_path;
|
|
382
|
+
|
|
383
|
+
// Apply cross-platform path prefix mapping if configured
|
|
384
|
+
if (
|
|
385
|
+
pathPrefixMap &&
|
|
386
|
+
absolutePath &&
|
|
387
|
+
absolutePath.startsWith(pathPrefixMap.from)
|
|
388
|
+
) {
|
|
389
|
+
absolutePath =
|
|
390
|
+
pathPrefixMap.to + absolutePath.slice(pathPrefixMap.from.length);
|
|
391
|
+
}
|
|
313
392
|
|
|
314
393
|
if (!fs.existsSync(absolutePath)) {
|
|
315
394
|
return {
|
|
@@ -462,13 +541,15 @@ export class IdentifyCommand {
|
|
|
462
541
|
|
|
463
542
|
// Check if the text contains any required pedimento marker. This must
|
|
464
543
|
// stay aligned with the `match()` predicates in pedimento-simplificado.js
|
|
465
|
-
// and pedimento-completo.js
|
|
544
|
+
// and pedimento-completo.js (which accept both "DE" and "DEL" in the
|
|
545
|
+
// title, and treat the colon after "T. OPER" as optional).
|
|
466
546
|
const text = result.text || '';
|
|
467
|
-
const hasSimplificadoMarker =
|
|
547
|
+
const hasSimplificadoMarker =
|
|
548
|
+
/FORMA\s+SIMPLIFICADA\s+DEL?\s+PEDIMENTO/i.test(text);
|
|
468
549
|
const hasCompletoMarkers =
|
|
469
550
|
/NUM\.?\s*PEDIMENTO:/i.test(text) &&
|
|
470
551
|
/CVE\.?\s*PEDIMENTO:/i.test(text) &&
|
|
471
|
-
/T\.?\s*OPER
|
|
552
|
+
/T\.?\s*OPER:?/i.test(text);
|
|
472
553
|
|
|
473
554
|
return !hasSimplificadoMarker && !hasCompletoMarkers;
|
|
474
555
|
}
|
|
@@ -543,6 +624,44 @@ export class IdentifyCommand {
|
|
|
543
624
|
}
|
|
544
625
|
}
|
|
545
626
|
}
|
|
627
|
+
|
|
628
|
+
/**
|
|
629
|
+
* Parse a path prefix mapping string such as "O:/=/Volumes/nas/" into { from, to }.
|
|
630
|
+
* Supports both "FROM=TO" and "FROM:TO" separators.
|
|
631
|
+
* @private
|
|
632
|
+
* @param {string} mapping
|
|
633
|
+
* @returns {{ from: string, to: string }}
|
|
634
|
+
*/
|
|
635
|
+
#parsePathPrefix(mapping) {
|
|
636
|
+
// Support either "FROM=TO" or "FROM:TO" as separator
|
|
637
|
+
const eqIdx = mapping.indexOf('=');
|
|
638
|
+
const colonIdx = mapping.indexOf(':');
|
|
639
|
+
|
|
640
|
+
let sep = -1;
|
|
641
|
+
// "O:/=/Volumes" β the colon inside "O:/" is part of a Windows drive letter; prefer '=' separator
|
|
642
|
+
if (eqIdx !== -1) {
|
|
643
|
+
sep = eqIdx;
|
|
644
|
+
} else if (colonIdx !== -1) {
|
|
645
|
+
sep = colonIdx;
|
|
646
|
+
}
|
|
647
|
+
|
|
648
|
+
if (sep === -1) {
|
|
649
|
+
throw new Error(
|
|
650
|
+
`Invalid --path-prefix format: "${mapping}". Expected "FROM=TO" e.g. "O:/=/Volumes/nas/"`,
|
|
651
|
+
);
|
|
652
|
+
}
|
|
653
|
+
|
|
654
|
+
const from = mapping.slice(0, sep);
|
|
655
|
+
const to = mapping.slice(sep + 1);
|
|
656
|
+
|
|
657
|
+
if (!from || !to) {
|
|
658
|
+
throw new Error(
|
|
659
|
+
`Invalid --path-prefix format: "${mapping}". Both FROM and TO parts must be non-empty.`,
|
|
660
|
+
);
|
|
661
|
+
}
|
|
662
|
+
|
|
663
|
+
return { from, to };
|
|
664
|
+
}
|
|
546
665
|
}
|
|
547
666
|
|
|
548
667
|
// Export singleton instance
|
|
@@ -579,6 +579,9 @@ export class ScanCommand {
|
|
|
579
579
|
* Normalize file record for database insertion
|
|
580
580
|
* Stores paths with forward slashes for consistency but keeps them absolute
|
|
581
581
|
* Sets likelySimplificado to true if file is a PDF and filename contains 'simp'
|
|
582
|
+
* Sets likelyInterAgencia to true if filename matches an inter-agency CFDI
|
|
583
|
+
* pattern (e.g. SICINGR*), so the API forces these XML/PDF through detection
|
|
584
|
+
* even though they lack the 'simp/pedim/covefact' heuristic.
|
|
582
585
|
* @private
|
|
583
586
|
*/
|
|
584
587
|
#normalizeFileRecord(filePath, fileStats, basePath, scanTimestamp) {
|
|
@@ -600,6 +603,17 @@ export class ScanCommand {
|
|
|
600
603
|
const likelySimplificado =
|
|
601
604
|
fileExtension === 'pdf' && /(simp|pedim|covefact)/i.test(fileName);
|
|
602
605
|
|
|
606
|
+
// Flag inter-agency CFDIs by filename so detection picks them up.
|
|
607
|
+
// Patterns are configurable via SCAN_INTER_AGENCIA_PATTERNS env var
|
|
608
|
+
// (see config.js). Only meaningful for PDF and XML.
|
|
609
|
+
let likelyInterAgencia = false;
|
|
610
|
+
if (fileExtension === 'pdf' || fileExtension === 'xml') {
|
|
611
|
+
const patterns = appConfig.scan.interAgenciaPatterns;
|
|
612
|
+
if (patterns && patterns.length > 0) {
|
|
613
|
+
likelyInterAgencia = patterns.some((re) => re.test(fileName));
|
|
614
|
+
}
|
|
615
|
+
}
|
|
616
|
+
|
|
603
617
|
return {
|
|
604
618
|
fileName,
|
|
605
619
|
fileExtension,
|
|
@@ -610,6 +624,7 @@ export class ScanCommand {
|
|
|
610
624
|
modifiedAt: fileStats.mtime.toISOString(),
|
|
611
625
|
scanTimestamp,
|
|
612
626
|
likelySimplificado,
|
|
627
|
+
likelyInterAgencia,
|
|
613
628
|
};
|
|
614
629
|
}
|
|
615
630
|
|
package/src/config/config.js
CHANGED
|
@@ -37,10 +37,10 @@ class Config {
|
|
|
37
37
|
const __dirname = path.dirname(__filename);
|
|
38
38
|
const packageJsonPath = path.resolve(__dirname, '../../package.json');
|
|
39
39
|
const packageJson = JSON.parse(fs.readFileSync(packageJsonPath, 'utf-8'));
|
|
40
|
-
return packageJson.version || '1.0.
|
|
40
|
+
return packageJson.version || '1.0.23';
|
|
41
41
|
} catch (error) {
|
|
42
42
|
console.warn('β οΈ Could not read package.json version, using fallback');
|
|
43
|
-
return '1.0.
|
|
43
|
+
return '1.0.23';
|
|
44
44
|
}
|
|
45
45
|
}
|
|
46
46
|
|
|
@@ -294,6 +294,31 @@ class Config {
|
|
|
294
294
|
.map((p) => p.trim())
|
|
295
295
|
.filter(Boolean);
|
|
296
296
|
|
|
297
|
+
// Parse inter-agency CFDI filename patterns. Files whose basename matches
|
|
298
|
+
// any of these regex patterns are flagged at scan time (likelyInterAgencia)
|
|
299
|
+
// so the API forces them through detection and the factura_inter_agencia
|
|
300
|
+
// matcher can classify them. The push pipeline then excludes them (see
|
|
301
|
+
// NON_PUSHABLE_TYPES_SQL in arela-api). Comma-separated regex source list.
|
|
302
|
+
// Default: ^SICINGR β covers NORCOM's SICINGR70-NNNNNN(...).pdf/.XML files.
|
|
303
|
+
const defaultInterAgenciaPatterns = '^SICINGR';
|
|
304
|
+
const interAgenciaPatterns = (
|
|
305
|
+
process.env.SCAN_INTER_AGENCIA_PATTERNS || defaultInterAgenciaPatterns
|
|
306
|
+
)
|
|
307
|
+
.split(',')
|
|
308
|
+
.map((p) => p.trim())
|
|
309
|
+
.filter(Boolean)
|
|
310
|
+
.map((p) => {
|
|
311
|
+
try {
|
|
312
|
+
return new RegExp(p, 'i');
|
|
313
|
+
} catch (err) {
|
|
314
|
+
console.warn(
|
|
315
|
+
`β οΈ Invalid SCAN_INTER_AGENCIA_PATTERNS regex "${p}": ${err.message}`,
|
|
316
|
+
);
|
|
317
|
+
return null;
|
|
318
|
+
}
|
|
319
|
+
})
|
|
320
|
+
.filter(Boolean);
|
|
321
|
+
|
|
297
322
|
// Generate table name if all components are available
|
|
298
323
|
// Note: This is just for reference; actual table names are generated dynamically
|
|
299
324
|
// in ScanCommand based on discovered directories and levels
|
|
@@ -312,6 +337,7 @@ class Config {
|
|
|
312
337
|
basePathFull: basePathLabel, // Renamed for consistency
|
|
313
338
|
tableName,
|
|
314
339
|
excludePatterns,
|
|
340
|
+
interAgenciaPatterns,
|
|
315
341
|
batchSize: parseInt(process.env.SCAN_BATCH_SIZE) || 2000,
|
|
316
342
|
directoryLevel: parseInt(process.env.SCAN_DIRECTORY_LEVEL) || 0,
|
|
317
343
|
};
|
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
// Import all document type definitions
|
|
2
2
|
import { dodaPdfDefinition } from './document-types/doda-pdf.js';
|
|
3
3
|
import { dodaXmlDefinition } from './document-types/doda-xml.js';
|
|
4
|
+
import { facturaInterAgenciaDefinition } from './document-types/factura-inter-agencia.js';
|
|
4
5
|
import { facturasComerciales } from './document-types/facturas-comerciales.js';
|
|
6
|
+
import { pedimentoCompletoXmlDefinition } from './document-types/pedimento-completo-xml.js';
|
|
5
7
|
import { pedimentoCompletoDefinition } from './document-types/pedimento-completo.js';
|
|
6
|
-
// TODO: enable XML pedimento detection β implementation ready in pedimento-completo-xml.js
|
|
7
|
-
// import { pedimentoCompletoXmlDefinition } from './document-types/pedimento-completo-xml.js';
|
|
8
8
|
import { pedimentoSimplificadoDefinition } from './document-types/pedimento-simplificado.js';
|
|
9
9
|
import { proformaDefinition } from './document-types/proforma.js';
|
|
10
10
|
import { supportDocumentDefinition } from './document-types/support-document.js';
|
|
@@ -45,14 +45,14 @@ export class DocumentTypeDefinition {
|
|
|
45
45
|
const documentTypes = [
|
|
46
46
|
pedimentoSimplificadoDefinition,
|
|
47
47
|
pedimentoCompletoDefinition,
|
|
48
|
-
|
|
49
|
-
// matching import at the top of this file. All downstream code
|
|
50
|
-
// (composeArelaPath, arela-api SQL filters, IdentifyCommand counters)
|
|
51
|
-
// already accepts `pedimento_completo_xml`.
|
|
52
|
-
// pedimentoCompletoXmlDefinition,
|
|
48
|
+
pedimentoCompletoXmlDefinition,
|
|
53
49
|
supportDocumentDefinition,
|
|
54
50
|
dodaPdfDefinition,
|
|
55
51
|
dodaXmlDefinition,
|
|
52
|
+
// factura_inter_agencia MUST be evaluated BEFORE facturasComerciales
|
|
53
|
+
// because a NORCOMβPALCO CFDI would also match the generic commercial
|
|
54
|
+
// invoice matcher. First match wins (see extractDocumentFields).
|
|
55
|
+
facturaInterAgenciaDefinition,
|
|
56
56
|
facturasComerciales,
|
|
57
57
|
// Add more document types here as needed
|
|
58
58
|
];
|
|
@@ -114,6 +114,14 @@ export function extractDocumentFields(source, fileExtension, filePath) {
|
|
|
114
114
|
? docType.extractPedimentoYear(source, fields, filePath)
|
|
115
115
|
: null;
|
|
116
116
|
|
|
117
|
+
// Ensure downstream code (composeArelaPath) sees `numPedimento` as a
|
|
118
|
+
// field. PDF matchers add it via an explicit extractor; XML matchers
|
|
119
|
+
// compose it externally via extractNumPedimento. Backfill so both paths
|
|
120
|
+
// expose the same shape.
|
|
121
|
+
if (pedimento && !fields.some((f) => f.name === 'numPedimento')) {
|
|
122
|
+
fields.push(new FieldResult('numPedimento', true, pedimento));
|
|
123
|
+
}
|
|
124
|
+
|
|
117
125
|
return [resolvedType, fields, pedimento, year];
|
|
118
126
|
}
|
|
119
127
|
}
|