@arela/uploader 1.0.21 → 1.0.22
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/commands/DatastageCommand.js +164 -0
- package/src/commands/IdentifyCommand.js +138 -19
- package/src/commands/PollWorkerCommand.js +2 -0
- package/src/config/config.js +2 -2
- package/src/document-types/_pedimento-shared-extractors.js +132 -36
- package/src/document-types/pedimento-completo.js +39 -8
- package/src/document-types/pedimento-simplificado.js +30 -1
- package/src/index.js +42 -0
- package/src/services/DatastageApiService.js +240 -0
- package/src/services/ScanApiService.js +30 -0
package/package.json
CHANGED
|
@@ -0,0 +1,164 @@
|
|
|
1
|
+
import fs from 'fs';
|
|
2
|
+
import path from 'path';
|
|
3
|
+
|
|
4
|
+
import { DatastageApiService } from '../services/DatastageApiService.js';
|
|
5
|
+
import logger from '../services/LoggingService.js';
|
|
6
|
+
|
|
7
|
+
import appConfig from '../config/config.js';
|
|
8
|
+
import ErrorHandler from '../errors/ErrorHandler.js';
|
|
9
|
+
|
|
10
|
+
/**
|
|
11
|
+
* Datastage Command Handler
|
|
12
|
+
* Uploads monthly Datastage *.zip files from a directory to the API.
|
|
13
|
+
* Sequential, idempotent via the cli `datastage_uploads` tracking table.
|
|
14
|
+
*/
|
|
15
|
+
export class DatastageCommand {
|
|
16
|
+
constructor() {
|
|
17
|
+
this.errorHandler = new ErrorHandler(logger);
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
/**
|
|
21
|
+
* @param {Object} options
|
|
22
|
+
* @param {string} options.dir - directory containing *.zip files (required)
|
|
23
|
+
* @param {string} [options.api] - 'default'|'agencia'|'cliente'
|
|
24
|
+
* @param {boolean} [options.retryFailed] - re-attempt files in 'failed' status
|
|
25
|
+
* @param {boolean} [options.showStats] - print final stats from API
|
|
26
|
+
*/
|
|
27
|
+
async execute(options = {}) {
|
|
28
|
+
const startTime = Date.now();
|
|
29
|
+
|
|
30
|
+
if (!options.dir) {
|
|
31
|
+
throw new Error('--dir <path> is required');
|
|
32
|
+
}
|
|
33
|
+
const sourceDirectory = path.resolve(options.dir);
|
|
34
|
+
if (!fs.existsSync(sourceDirectory)) {
|
|
35
|
+
throw new Error(`Directory not found: ${sourceDirectory}`);
|
|
36
|
+
}
|
|
37
|
+
const dirStat = fs.statSync(sourceDirectory);
|
|
38
|
+
if (!dirStat.isDirectory()) {
|
|
39
|
+
throw new Error(`Not a directory: ${sourceDirectory}`);
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
const apiTarget = options.api || 'default';
|
|
43
|
+
const api = new DatastageApiService(apiTarget);
|
|
44
|
+
|
|
45
|
+
logger.info('📦 Starting arela datastage command');
|
|
46
|
+
logger.info(`🎯 API Target: ${apiTarget}`);
|
|
47
|
+
logger.info(`📂 Source: ${sourceDirectory}`);
|
|
48
|
+
|
|
49
|
+
// 1. Enumerate *.zip in root directory (non-recursive)
|
|
50
|
+
const entries = fs.readdirSync(sourceDirectory, { withFileTypes: true });
|
|
51
|
+
const zipFiles = entries
|
|
52
|
+
.filter((e) => e.isFile() && /\.zip$/i.test(e.name))
|
|
53
|
+
.map((e) => path.join(sourceDirectory, e.name));
|
|
54
|
+
|
|
55
|
+
if (zipFiles.length === 0) {
|
|
56
|
+
logger.warn('No *.zip files found in directory. Nothing to do.');
|
|
57
|
+
return { uploaded: 0, failed: 0, skipped: 0 };
|
|
58
|
+
}
|
|
59
|
+
logger.info(`🗂 Found ${zipFiles.length} zip file(s)`);
|
|
60
|
+
|
|
61
|
+
// 2. Register each file (idempotent upsert)
|
|
62
|
+
logger.info('📝 Registering files...');
|
|
63
|
+
for (const zipPath of zipFiles) {
|
|
64
|
+
const stats = fs.statSync(zipPath);
|
|
65
|
+
try {
|
|
66
|
+
await api.registerUpload({
|
|
67
|
+
absolutePath: zipPath,
|
|
68
|
+
fileName: path.basename(zipPath),
|
|
69
|
+
sizeBytes: stats.size,
|
|
70
|
+
fileModifiedAt: stats.mtime.toISOString(),
|
|
71
|
+
sourceDirectory,
|
|
72
|
+
});
|
|
73
|
+
} catch (err) {
|
|
74
|
+
logger.error(
|
|
75
|
+
` ✗ register failed for ${path.basename(zipPath)}: ${err.message}`,
|
|
76
|
+
);
|
|
77
|
+
throw err;
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
// 3. Fetch pending list scoped to this directory
|
|
82
|
+
const pending = await api.getPending(sourceDirectory);
|
|
83
|
+
const pendingPaths = new Set(pending.map((p) => p.absolutePath));
|
|
84
|
+
|
|
85
|
+
const alreadyUploaded = zipFiles.length - pendingPaths.size;
|
|
86
|
+
if (alreadyUploaded > 0) {
|
|
87
|
+
logger.info(`⏭ Skipping ${alreadyUploaded} already uploaded file(s)`);
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
if (pending.length === 0) {
|
|
91
|
+
logger.success('✅ All files already uploaded. Nothing to do.');
|
|
92
|
+
if (options.showStats) {
|
|
93
|
+
const s = await api.getStats(sourceDirectory);
|
|
94
|
+
logger.info(`📊 Stats: ${JSON.stringify(s)}`);
|
|
95
|
+
}
|
|
96
|
+
return { uploaded: 0, failed: 0, skipped: alreadyUploaded };
|
|
97
|
+
}
|
|
98
|
+
|
|
99
|
+
// 4. Sequential upload loop
|
|
100
|
+
logger.info(`🚀 Uploading ${pending.length} file(s) sequentially...`);
|
|
101
|
+
let uploaded = 0;
|
|
102
|
+
let failed = 0;
|
|
103
|
+
|
|
104
|
+
for (let i = 0; i < pending.length; i++) {
|
|
105
|
+
const row = pending[i];
|
|
106
|
+
const localPath = row.absolutePath;
|
|
107
|
+
const label = `[${i + 1}/${pending.length}] ${row.fileName}`;
|
|
108
|
+
|
|
109
|
+
if (!fs.existsSync(localPath)) {
|
|
110
|
+
const err = `File missing on disk: ${localPath}`;
|
|
111
|
+
logger.error(`✗ ${label}: ${err}`);
|
|
112
|
+
try {
|
|
113
|
+
await api.markFailed(row.id, err);
|
|
114
|
+
} catch (e) {
|
|
115
|
+
logger.error(` mark-failed error: ${e.message}`);
|
|
116
|
+
}
|
|
117
|
+
failed++;
|
|
118
|
+
continue;
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
try {
|
|
122
|
+
logger.info(`⬆ ${label}: uploading...`);
|
|
123
|
+
const result = await api.uploadZip(localPath);
|
|
124
|
+
const datastageId = result?.id || result?.data?.id;
|
|
125
|
+
const folio = result?.folio || result?.data?.folio;
|
|
126
|
+
if (!datastageId) {
|
|
127
|
+
throw new Error(
|
|
128
|
+
'API returned no datastage id in response: ' +
|
|
129
|
+
JSON.stringify(result).slice(0, 300),
|
|
130
|
+
);
|
|
131
|
+
}
|
|
132
|
+
await api.markUploaded(row.id, { datastageId, folio });
|
|
133
|
+
logger.success(
|
|
134
|
+
`✓ ${label}: folio=${folio || 'n/a'} datastageId=${datastageId}`,
|
|
135
|
+
);
|
|
136
|
+
uploaded++;
|
|
137
|
+
} catch (err) {
|
|
138
|
+
logger.error(`✗ ${label}: ${err.message}`);
|
|
139
|
+
try {
|
|
140
|
+
await api.markFailed(row.id, err.message);
|
|
141
|
+
} catch (e) {
|
|
142
|
+
logger.error(` mark-failed error: ${e.message}`);
|
|
143
|
+
}
|
|
144
|
+
failed++;
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
const elapsed = ((Date.now() - startTime) / 1000).toFixed(1);
|
|
149
|
+
logger.info('—'.repeat(60));
|
|
150
|
+
logger.info(
|
|
151
|
+
`Done in ${elapsed}s — uploaded=${uploaded} failed=${failed} skipped=${alreadyUploaded}`,
|
|
152
|
+
);
|
|
153
|
+
|
|
154
|
+
if (options.showStats) {
|
|
155
|
+
const s = await api.getStats(sourceDirectory);
|
|
156
|
+
logger.info(`📊 Final stats: ${JSON.stringify(s)}`);
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
return { uploaded, failed, skipped: alreadyUploaded };
|
|
160
|
+
}
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
const datastageCommand = new DatastageCommand();
|
|
164
|
+
export default datastageCommand;
|
|
@@ -74,29 +74,95 @@ export class IdentifyCommand {
|
|
|
74
74
|
const scanConfig = appConfig.getScanConfig();
|
|
75
75
|
const batchSize = parseInt(options.batchSize) || 100;
|
|
76
76
|
|
|
77
|
+
// Parse optional path prefix mapping: "O:/=/Volumes/nas/"
|
|
78
|
+
const pathPrefixMap = options.pathPrefix
|
|
79
|
+
? this.#parsePathPrefix(options.pathPrefix)
|
|
80
|
+
: null;
|
|
81
|
+
|
|
77
82
|
logger.info('🔍 Starting arela identify command');
|
|
78
83
|
logger.info(`🎯 API Target: ${apiTarget}`);
|
|
79
84
|
logger.info(`📦 Batch Size: ${batchSize}`);
|
|
85
|
+
if (options.table) logger.info(`📌 Target table: ${options.table}`);
|
|
86
|
+
if (options.resetAttempts)
|
|
87
|
+
logger.info('♻️ Reset detection attempts: ON');
|
|
88
|
+
if (pathPrefixMap)
|
|
89
|
+
logger.info(
|
|
90
|
+
`🗺 Path prefix map: ${pathPrefixMap.from} → ${pathPrefixMap.to}`,
|
|
91
|
+
);
|
|
80
92
|
|
|
81
|
-
//
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
93
|
+
// Resolve the list of tables to process
|
|
94
|
+
let tables;
|
|
95
|
+
if (options.fileId && options.table) {
|
|
96
|
+
// Single-file mode — identify exactly one file record
|
|
97
|
+
logger.info(
|
|
98
|
+
`\n🎯 Single-file mode: ${options.table} / ${options.fileId}`,
|
|
99
|
+
);
|
|
100
|
+
this.#reportProgress(0, `Fetching file record ${options.fileId}...`);
|
|
101
|
+
const record = await this.scanApiService.getFileRecord(
|
|
102
|
+
options.table,
|
|
103
|
+
options.fileId,
|
|
104
|
+
);
|
|
105
|
+
const results = await this.#detectFilesLocally(
|
|
106
|
+
[record],
|
|
107
|
+
1,
|
|
108
|
+
pathPrefixMap,
|
|
109
|
+
);
|
|
110
|
+
const updates = results.filter((r) => r !== null);
|
|
111
|
+
if (updates.length > 0) {
|
|
112
|
+
await this.scanApiService.batchUpdateDetection(
|
|
113
|
+
options.table,
|
|
114
|
+
updates,
|
|
115
|
+
);
|
|
116
|
+
}
|
|
117
|
+
this.#reportProgress(100, `Single-file identification complete`);
|
|
118
|
+
logger.success(`\n✅ Single-file identification complete`);
|
|
119
|
+
const firstUpdate = updates[0];
|
|
120
|
+
return {
|
|
121
|
+
processed: 1,
|
|
122
|
+
detected: updates.length,
|
|
123
|
+
proformas: 0,
|
|
124
|
+
errors: 1 - updates.length,
|
|
125
|
+
detectedType: firstUpdate?.detectedType ?? null,
|
|
126
|
+
detectedPedimento: firstUpdate?.detectedPedimento ?? null,
|
|
127
|
+
rfc: firstUpdate?.rfc ?? null,
|
|
128
|
+
arelaPath: firstUpdate?.arelaPath ?? null,
|
|
129
|
+
};
|
|
130
|
+
} else if (options.table) {
|
|
131
|
+
// Single-table mode — no need to match instance tables
|
|
132
|
+
tables = [{ tableName: options.table }];
|
|
133
|
+
logger.info(`\n📌 Single-table mode: ${options.table}`);
|
|
134
|
+
} else {
|
|
135
|
+
logger.info('\n📊 Fetching instance tables...');
|
|
136
|
+
tables = await this.scanApiService.getInstanceTables(
|
|
137
|
+
scanConfig.companySlug,
|
|
138
|
+
scanConfig.serverId,
|
|
139
|
+
scanConfig.basePathFull,
|
|
140
|
+
);
|
|
141
|
+
|
|
142
|
+
if (tables.length === 0) {
|
|
143
|
+
throw new ConfigurationError(
|
|
144
|
+
'No tables found for this instance. Run "arela scan" first.',
|
|
145
|
+
);
|
|
146
|
+
}
|
|
88
147
|
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
'No tables found for this instance. Run "arela scan" first.',
|
|
148
|
+
logger.info(
|
|
149
|
+
`📋 Found ${tables.length} table${tables.length === 1 ? '' : 's'} to process`,
|
|
92
150
|
);
|
|
151
|
+
for (const table of tables) {
|
|
152
|
+
logger.info(` - ${table.tableName}`);
|
|
153
|
+
}
|
|
93
154
|
}
|
|
94
155
|
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
156
|
+
// Optionally reset detection attempts so previously-failed files are retried
|
|
157
|
+
if (options.resetAttempts) {
|
|
158
|
+
for (const table of tables) {
|
|
159
|
+
const { reset } = await this.scanApiService.resetDetectionAttempts(
|
|
160
|
+
table.tableName,
|
|
161
|
+
);
|
|
162
|
+
logger.info(
|
|
163
|
+
`♻️ Reset ${reset} detection attempt(s) in ${table.tableName}`,
|
|
164
|
+
);
|
|
165
|
+
}
|
|
100
166
|
}
|
|
101
167
|
|
|
102
168
|
// Process each table
|
|
@@ -127,6 +193,7 @@ export class IdentifyCommand {
|
|
|
127
193
|
table.tableName,
|
|
128
194
|
batchSize,
|
|
129
195
|
startTime,
|
|
196
|
+
pathPrefixMap,
|
|
130
197
|
);
|
|
131
198
|
|
|
132
199
|
totalStats.processed += stats.processed;
|
|
@@ -183,7 +250,7 @@ export class IdentifyCommand {
|
|
|
183
250
|
* @param {number} startTime - Start time for speed calculation
|
|
184
251
|
* @returns {Promise<Object>} Processing statistics
|
|
185
252
|
*/
|
|
186
|
-
async #processTable(tableName, batchSize, startTime) {
|
|
253
|
+
async #processTable(tableName, batchSize, startTime, pathPrefixMap = null) {
|
|
187
254
|
// Get detection statistics first (allTypes=true to count all supported file types)
|
|
188
255
|
const initialStats = await this.scanApiService.getDetectionStats(
|
|
189
256
|
tableName,
|
|
@@ -255,7 +322,11 @@ export class IdentifyCommand {
|
|
|
255
322
|
const files = response.data;
|
|
256
323
|
|
|
257
324
|
// Detect files locally with concurrent processing
|
|
258
|
-
const detectionResults = await this.#detectFilesLocally(
|
|
325
|
+
const detectionResults = await this.#detectFilesLocally(
|
|
326
|
+
files,
|
|
327
|
+
10,
|
|
328
|
+
pathPrefixMap,
|
|
329
|
+
);
|
|
259
330
|
|
|
260
331
|
// Batch update to API
|
|
261
332
|
const updateResult = await this.scanApiService.batchUpdateDetection(
|
|
@@ -301,7 +372,7 @@ export class IdentifyCommand {
|
|
|
301
372
|
* @param {number} concurrency - Maximum concurrent detections
|
|
302
373
|
* @returns {Promise<Array>} Detection results
|
|
303
374
|
*/
|
|
304
|
-
async #detectFilesLocally(files, concurrency = 10) {
|
|
375
|
+
async #detectFilesLocally(files, concurrency = 10, pathPrefixMap = null) {
|
|
305
376
|
const limit = pLimit(concurrency);
|
|
306
377
|
const basePath = appConfig.getBasePath();
|
|
307
378
|
|
|
@@ -309,7 +380,17 @@ export class IdentifyCommand {
|
|
|
309
380
|
limit(async () => {
|
|
310
381
|
try {
|
|
311
382
|
// Check if file exists on filesystem
|
|
312
|
-
|
|
383
|
+
let absolutePath = file.absolute_path;
|
|
384
|
+
|
|
385
|
+
// Apply cross-platform path prefix mapping if configured
|
|
386
|
+
if (
|
|
387
|
+
pathPrefixMap &&
|
|
388
|
+
absolutePath &&
|
|
389
|
+
absolutePath.startsWith(pathPrefixMap.from)
|
|
390
|
+
) {
|
|
391
|
+
absolutePath =
|
|
392
|
+
pathPrefixMap.to + absolutePath.slice(pathPrefixMap.from.length);
|
|
393
|
+
}
|
|
313
394
|
|
|
314
395
|
if (!fs.existsSync(absolutePath)) {
|
|
315
396
|
return {
|
|
@@ -543,6 +624,44 @@ export class IdentifyCommand {
|
|
|
543
624
|
}
|
|
544
625
|
}
|
|
545
626
|
}
|
|
627
|
+
|
|
628
|
+
/**
|
|
629
|
+
* Parse a path prefix mapping string such as "O:/=/Volumes/nas/" into { from, to }.
|
|
630
|
+
* Supports both "FROM=TO" and "FROM:TO" separators.
|
|
631
|
+
* @private
|
|
632
|
+
* @param {string} mapping
|
|
633
|
+
* @returns {{ from: string, to: string }}
|
|
634
|
+
*/
|
|
635
|
+
#parsePathPrefix(mapping) {
|
|
636
|
+
// Support either "FROM=TO" or "FROM:TO" as separator
|
|
637
|
+
const eqIdx = mapping.indexOf('=');
|
|
638
|
+
const colonIdx = mapping.indexOf(':');
|
|
639
|
+
|
|
640
|
+
let sep = -1;
|
|
641
|
+
// "O:/=/Volumes" — the colon inside "O:/" is part of a Windows drive letter; prefer '=' separator
|
|
642
|
+
if (eqIdx !== -1) {
|
|
643
|
+
sep = eqIdx;
|
|
644
|
+
} else if (colonIdx !== -1) {
|
|
645
|
+
sep = colonIdx;
|
|
646
|
+
}
|
|
647
|
+
|
|
648
|
+
if (sep === -1) {
|
|
649
|
+
throw new Error(
|
|
650
|
+
`Invalid --path-prefix format: "${mapping}". Expected "FROM=TO" e.g. "O:/=/Volumes/nas/"`,
|
|
651
|
+
);
|
|
652
|
+
}
|
|
653
|
+
|
|
654
|
+
const from = mapping.slice(0, sep);
|
|
655
|
+
const to = mapping.slice(sep + 1);
|
|
656
|
+
|
|
657
|
+
if (!from || !to) {
|
|
658
|
+
throw new Error(
|
|
659
|
+
`Invalid --path-prefix format: "${mapping}". Both FROM and TO parts must be non-empty.`,
|
|
660
|
+
);
|
|
661
|
+
}
|
|
662
|
+
|
|
663
|
+
return { from, to };
|
|
664
|
+
}
|
|
546
665
|
}
|
|
547
666
|
|
|
548
667
|
// Export singleton instance
|
package/src/config/config.js
CHANGED
|
@@ -37,10 +37,10 @@ class Config {
|
|
|
37
37
|
const __dirname = path.dirname(__filename);
|
|
38
38
|
const packageJsonPath = path.resolve(__dirname, '../../package.json');
|
|
39
39
|
const packageJson = JSON.parse(fs.readFileSync(packageJsonPath, 'utf-8'));
|
|
40
|
-
return packageJson.version || '1.0.
|
|
40
|
+
return packageJson.version || '1.0.22';
|
|
41
41
|
} catch (error) {
|
|
42
42
|
console.warn('⚠️ Could not read package.json version, using fallback');
|
|
43
|
-
return '1.0.
|
|
43
|
+
return '1.0.22';
|
|
44
44
|
}
|
|
45
45
|
}
|
|
46
46
|
|
|
@@ -25,49 +25,118 @@ export const tipoOperacionExtractor = {
|
|
|
25
25
|
},
|
|
26
26
|
};
|
|
27
27
|
|
|
28
|
-
// 3) Clave de Pedimento: 2 chars after tipoOperacion
|
|
28
|
+
// 3) Clave de Pedimento: 2 chars after tipoOperacion (multiple layout patterns)
|
|
29
29
|
export const clavePedimentoExtractor = {
|
|
30
30
|
field: 'clavePedimento',
|
|
31
31
|
extract: (source) => {
|
|
32
|
-
const
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
32
|
+
const patterns = [
|
|
33
|
+
// Standard spaced layout: "22 07 3429 2002089 EXP RT"
|
|
34
|
+
/\d{2}\s+\d{2}\s+\d{4}\s+\d{7}\s+[A-Z]{3}\s+([A-Z0-9]{2})\b/,
|
|
35
|
+
// Concatenated 15-digit layout: "260734296013645 EXP RT"
|
|
36
|
+
/\d{15}\s+[A-Z]{3}\s+([A-Z0-9]{2})\b/,
|
|
37
|
+
// Fallback: T.OPER keyword followed by 2-char clave
|
|
38
|
+
/\b(?:EXP|IMP|TRA|TRN)\s+([A-Z][A-Z0-9])\b/,
|
|
39
|
+
// Explicit label
|
|
40
|
+
/CVE\.?\s*PED(?:IMENTO)?[^A-Z0-9]{0,60}?\b([A-Z][A-Z0-9])\b/i,
|
|
41
|
+
];
|
|
42
|
+
for (const re of patterns) {
|
|
43
|
+
const m = source.match(re);
|
|
44
|
+
if (m) return new FieldResult('clavePedimento', true, m[1]);
|
|
45
|
+
}
|
|
46
|
+
return new FieldResult('clavePedimento', false, null);
|
|
36
47
|
},
|
|
37
48
|
};
|
|
38
49
|
|
|
39
50
|
// 4) Aduana E/S: 3-digit code on the peso-bruto line
|
|
51
|
+
// Fallback A: allow 2-digit code (some SIMP layouts omit the leading zero).
|
|
52
|
+
// Fallback B: derive the 2-digit customs-office code from positions 2-3 of
|
|
53
|
+
// numPedimento (e.g. "260734296013645" → "07"), which is what the
|
|
54
|
+
// arela_path formula uses after padStart(2,'0').
|
|
40
55
|
export const aduanaEntradaSalidaExtractor = {
|
|
41
56
|
field: 'aduanaEntradaSalida',
|
|
42
57
|
extract: (source) => {
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
);
|
|
58
|
+
// Primary: 3-digit aduana code at end of peso-bruto line
|
|
59
|
+
const m3 = source.match(/^\s*\d+\s+[\d,.]+\s+(\d{3})\s*$/m);
|
|
60
|
+
if (m3) return new FieldResult('aduanaEntradaSalida', true, m3[1]);
|
|
61
|
+
|
|
62
|
+
// Fallback A: 2-digit aduana code at end of peso-bruto line
|
|
63
|
+
const m2 = source.match(/^\s*\d+\s+[\d,.]+\s+(\d{2})\s*$/m);
|
|
64
|
+
if (m2) return new FieldResult('aduanaEntradaSalida', true, m2[1]);
|
|
65
|
+
|
|
66
|
+
// Fallback B: derive 2-digit customs-office code from numPedimento
|
|
67
|
+
// Format: AA BB CCCC DDDDDDD → BB (positions 2-3) = aduana
|
|
68
|
+
const pedMatch = source.match(/\d{2}\s?\d{2}\s?\d{4}\s?\d{7}/);
|
|
69
|
+
if (pedMatch) {
|
|
70
|
+
const num = pedMatch[0].replace(/\s/g, '');
|
|
71
|
+
if (num.length === 15) {
|
|
72
|
+
return new FieldResult(
|
|
73
|
+
'aduanaEntradaSalida',
|
|
74
|
+
true,
|
|
75
|
+
num.substring(2, 4),
|
|
76
|
+
);
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
return new FieldResult('aduanaEntradaSalida', false, null);
|
|
49
81
|
},
|
|
50
82
|
};
|
|
51
83
|
|
|
52
|
-
// 5) RFC:
|
|
84
|
+
// 5) RFC: importer/exporter RFC on its own line.
|
|
85
|
+
// Strategy A: strict whole-line pattern (3-4 letters + 6 consecutive digits +
|
|
86
|
+
// 3 alphanum). COVE codes like COVE2681B1RX8 naturally fail this because
|
|
87
|
+
// their digit section is non-consecutive (2681B1 has a letter at pos 5).
|
|
88
|
+
// Strategy B: RFC as a word within a longer line (handles "RFC: IMS030409FZ0").
|
|
89
|
+
// Strategy C: loose 12-13 alphanum isolated on its own line — iterate ALL
|
|
90
|
+
// matches via matchAll() so that a leading COVE code is skipped and the
|
|
91
|
+
// actual RFC (which appears later in the document) is still found.
|
|
53
92
|
export const rfcExtractor = {
|
|
54
93
|
field: 'rfc',
|
|
55
94
|
extract: (source) => {
|
|
56
|
-
const
|
|
57
|
-
|
|
95
|
+
const RFC_STRICT = /^[A-Z]{3,4}\d{6}[A-Z0-9]{3}$/i;
|
|
96
|
+
const lines = source
|
|
97
|
+
.split(/\r?\n/)
|
|
98
|
+
.map((l) => l.trim())
|
|
99
|
+
.filter((l) => l);
|
|
100
|
+
|
|
101
|
+
// Primary: RFC occupies an entire trimmed line
|
|
102
|
+
const strictLine = lines.find((line) => RFC_STRICT.test(line));
|
|
103
|
+
if (strictLine) return new FieldResult('rfc', true, strictLine);
|
|
104
|
+
|
|
105
|
+
// Fallback A: RFC embedded in a longer line (word-boundary search)
|
|
106
|
+
for (const line of lines) {
|
|
107
|
+
const m = line.match(/\b([A-Z]{3,4}\d{6}[A-Z0-9]{3})\b/i);
|
|
108
|
+
if (m) return new FieldResult('rfc', true, m[1]);
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
// Fallback B: loose 12-13 alphanum isolated on its own line.
|
|
112
|
+
// Use matchAll() to iterate ALL occurrences — a leading COVE code must not
|
|
113
|
+
// short-circuit the search; the RFC typically follows it in the document.
|
|
114
|
+
for (const m of source.matchAll(/\n\s*([A-Z0-9]{12,13})\s*\n/g)) {
|
|
115
|
+
if (!/^COVE/i.test(m[1])) return new FieldResult('rfc', true, m[1]);
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
return new FieldResult('rfc', false, null);
|
|
58
119
|
},
|
|
59
120
|
};
|
|
60
121
|
|
|
61
|
-
// 6) Código de Aceptación: 8 alphanumeric chars on the line right after the RFC
|
|
122
|
+
// 6) Código de Aceptación: 8 alphanumeric chars on the line right after the RFC.
|
|
123
|
+
// Uses the same RFC-line detection logic as rfcExtractor.
|
|
62
124
|
export const codigoAceptacionExtractor = {
|
|
63
125
|
field: 'codigoAceptacion',
|
|
64
126
|
extract: (source) => {
|
|
127
|
+
const RFC_STRICT = /^[A-Z]{3,4}\d{6}[A-Z0-9]{3}$/i;
|
|
128
|
+
const RFC_LOOSE = /^[A-Z0-9]{12,13}$/;
|
|
65
129
|
const lines = source
|
|
66
130
|
.split(/\r?\n/)
|
|
67
131
|
.map((l) => l.trim())
|
|
68
132
|
.filter((l) => l.length > 0);
|
|
69
133
|
|
|
70
|
-
|
|
134
|
+
// Find RFC line using strict pattern first, then loose (excluding COVE)
|
|
135
|
+
let rfcIndex = lines.findIndex((l) => RFC_STRICT.test(l));
|
|
136
|
+
if (rfcIndex < 0) {
|
|
137
|
+
rfcIndex = lines.findIndex((l) => RFC_LOOSE.test(l) && !/^COVE/i.test(l));
|
|
138
|
+
}
|
|
139
|
+
|
|
71
140
|
let code = null;
|
|
72
141
|
if (rfcIndex >= 0 && /^[A-Z0-9]{8}$/.test(lines[rfcIndex + 1] || '')) {
|
|
73
142
|
code = lines[rfcIndex + 1];
|
|
@@ -77,24 +146,31 @@ export const codigoAceptacionExtractor = {
|
|
|
77
146
|
};
|
|
78
147
|
|
|
79
148
|
// 7) Num. E-Document: collects all 13-char alphanumeric codes following
|
|
80
|
-
// `NUM. E-DOCUMENT`
|
|
81
|
-
// per ED clave inside the CLAVE/COMPL. IDENTIFICADOR table.
|
|
149
|
+
// `NUM. E-DOCUMENT` / `NUMERO DE E-DOCUMENT` labels.
|
|
82
150
|
export const numEDocumentoExtractor = {
|
|
83
151
|
field: 'numEDocumento',
|
|
84
152
|
extract: (source) => {
|
|
85
153
|
const lines = source.split(/\r?\n/);
|
|
86
|
-
const
|
|
154
|
+
const extractedCodes = [];
|
|
155
|
+
const titlePatterns = [/NUMERO\s+DE\s+E-DOCUMENT/i, /NUM\.?\s*E-DOCUMENT/i];
|
|
87
156
|
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
157
|
+
for (let i = 0; i < lines.length; i++) {
|
|
158
|
+
const line = lines[i];
|
|
159
|
+
const hasTitle = titlePatterns.some((p) => p.test(line));
|
|
160
|
+
if (!hasTitle) continue;
|
|
91
161
|
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
162
|
+
// Codes on the title line itself
|
|
163
|
+
const codesInLine = line.match(/[A-Z0-9]{13}/g) || [];
|
|
164
|
+
extractedCodes.push(...codesInLine);
|
|
165
|
+
|
|
166
|
+
// Codes on the next few lines (e.g. CLAVE/COMPL. table rows)
|
|
167
|
+
for (let j = 1; j <= 10 && i + j < lines.length; j++) {
|
|
168
|
+
const nextLine = lines[i + j];
|
|
169
|
+
if (/NUMERO|OBSERVACIONES/i.test(nextLine)) break;
|
|
170
|
+
const codesInNextLine = nextLine.match(/[A-Z0-9]{13}/g) || [];
|
|
171
|
+
extractedCodes.push(...codesInNextLine);
|
|
172
|
+
}
|
|
173
|
+
}
|
|
98
174
|
|
|
99
175
|
if (extractedCodes.length === 0) {
|
|
100
176
|
return new FieldResult('numEDocumento', false, null);
|
|
@@ -109,14 +185,18 @@ export const numEDocumentoExtractor = {
|
|
|
109
185
|
export const paymentDateExtractor = {
|
|
110
186
|
field: 'paymentDate',
|
|
111
187
|
extract: (source) => {
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
188
|
+
const patterns = [
|
|
189
|
+
/FECHA\s+DE\s+PAGO:?\s*(\d{2}\/\d{2}\/\d{4})/i,
|
|
190
|
+
/FECHA\s+DE\s+PAGO:?\s*(\d{4}\/\d{2}\/\d{2})/i,
|
|
191
|
+
/2\s+PAGO:\s*(\d{2}\/\d{2}\/\d{4})/,
|
|
192
|
+
/(?:^|\n)\s*PAGO\s+(\d{2}\/\d{2}\/\d{4})/i,
|
|
193
|
+
/PRESENTACION:\s*(\d{2}\/\d{2}\/\d{4})/i,
|
|
194
|
+
];
|
|
195
|
+
for (const re of patterns) {
|
|
196
|
+
const m = source.match(re);
|
|
197
|
+
if (m) return new FieldResult('paymentDate', true, m[1]);
|
|
118
198
|
}
|
|
119
|
-
return new FieldResult('paymentDate',
|
|
199
|
+
return new FieldResult('paymentDate', false, null);
|
|
120
200
|
},
|
|
121
201
|
};
|
|
122
202
|
|
|
@@ -152,14 +232,16 @@ export const coveExtractor = {
|
|
|
152
232
|
};
|
|
153
233
|
|
|
154
234
|
// 10) Patente: from the PATENTE/PEDIMENTO/ADUANA header table
|
|
235
|
+
// Fallback A: pago electrónico line "3429 4024126 07" (pedimento_completo).
|
|
236
|
+
// Fallback B: positions 4-7 of numPedimento (always available when found).
|
|
155
237
|
export const patenteExtractor = {
|
|
156
238
|
field: 'patente',
|
|
157
239
|
extract: (source) => {
|
|
240
|
+
// Primary: PATENTE:/PEDIMENTO:/ADUANA: header followed by data line
|
|
158
241
|
const lines = source.split(/\r?\n/);
|
|
159
242
|
const patenteHeaderIndex = lines.findIndex((line) =>
|
|
160
243
|
/PATENTE:.*PEDIMENTO:.*ADUANA:/i.test(line),
|
|
161
244
|
);
|
|
162
|
-
|
|
163
245
|
if (patenteHeaderIndex >= 0) {
|
|
164
246
|
for (let i = patenteHeaderIndex + 1; i < lines.length; i++) {
|
|
165
247
|
const line = lines[i].trim();
|
|
@@ -169,6 +251,20 @@ export const patenteExtractor = {
|
|
|
169
251
|
}
|
|
170
252
|
}
|
|
171
253
|
}
|
|
254
|
+
|
|
255
|
+
// Fallback A: pago electrónico line "3429 4024126 07"
|
|
256
|
+
const pagoMatch = source.match(/(\d{4})\s+\d{7}\s+\d{2}/);
|
|
257
|
+
if (pagoMatch) return new FieldResult('patente', true, pagoMatch[1]);
|
|
258
|
+
|
|
259
|
+
// Fallback B: positions 4-7 of numPedimento
|
|
260
|
+
const pedMatch = source.match(/\d{2}\s?\d{2}\s?\d{4}\s?\d{7}/);
|
|
261
|
+
if (pedMatch) {
|
|
262
|
+
const num = pedMatch[0].replace(/\s/g, '');
|
|
263
|
+
if (num.length === 15) {
|
|
264
|
+
return new FieldResult('patente', true, num.substring(4, 8));
|
|
265
|
+
}
|
|
266
|
+
}
|
|
267
|
+
|
|
172
268
|
return new FieldResult('patente', false, null);
|
|
173
269
|
},
|
|
174
270
|
};
|
|
@@ -17,22 +17,53 @@ export const pedimentoCompletoDefinition = {
|
|
|
17
17
|
type: 'pedimento_completo',
|
|
18
18
|
extensions: ['pdf'],
|
|
19
19
|
match: (source) => {
|
|
20
|
+
// Hard exclude: "FORMA SIMPLIFICADA" is handled by pedimento_simplificado.
|
|
20
21
|
if (/FORMA SIMPLIFICADA DE PEDIMENTO/i.test(source)) return false;
|
|
21
22
|
|
|
23
|
+
// Hard exclude: "AVISO CONSOLIDADO" shares the header trio but is a
|
|
24
|
+
// different document type handled by aviso_consolidado.
|
|
25
|
+
if (/AVISO\s+CONSOLIDADO/i.test(source)) return false;
|
|
26
|
+
|
|
22
27
|
const hasHeaderFields =
|
|
23
28
|
/NUM\.?\s*PEDIMENTO:/i.test(source) &&
|
|
24
29
|
/CVE\.?\s*PEDIMENTO:/i.test(source) &&
|
|
25
30
|
/T\.?\s*OPER:/i.test(source);
|
|
26
|
-
if (
|
|
31
|
+
if (hasHeaderFields) {
|
|
32
|
+
const hasCopyMarker =
|
|
33
|
+
/ORIGINAL:\s*ADMINISTRACION GENERAL DE ADUANAS/i.test(source) ||
|
|
34
|
+
/SEGUNDA\s+COPIA/i.test(source) ||
|
|
35
|
+
/TERCERA\s+COPIA/i.test(source) ||
|
|
36
|
+
/COPIA\s+(SIMPLIFICAD[AO])?\s*TRANSPORTISTA/i.test(source) ||
|
|
37
|
+
/DEFINITIVO/i.test(source) ||
|
|
38
|
+
/ANEXO\s+DEL\s+PEDIMENTO/i.test(source) ||
|
|
39
|
+
/\*+FIN\s+DE\s+PEDIMENTO\s*\*+/i.test(source);
|
|
40
|
+
if (hasCopyMarker) return true;
|
|
41
|
+
}
|
|
27
42
|
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
/
|
|
32
|
-
|
|
33
|
-
/
|
|
43
|
+
// Fallback clue-counting heuristic for exotic layouts.
|
|
44
|
+
const clues = [
|
|
45
|
+
/\bPEDIMENTO\s*\n.*NUM\.\s*PEDIMENTO:/i,
|
|
46
|
+
/NUM\.\s*PEDIMENTO:\s*T\.OPER:\s*CVE\.PEDIMENTO:\s*REGIMEN:/i,
|
|
47
|
+
/\d{2}\s+\d{2}\s+\d{4}\s+\d{7}\s+[A-Z]{3}\s+[A-Z]{3}/i,
|
|
48
|
+
/CERTIFICACIONES/i,
|
|
49
|
+
/DESTINO\/ORIGEN\s+TIPO\s+CAMBIO\s+PESO\s+BRUTO\s+ADUANA\s+E\/S/i,
|
|
50
|
+
/MEDIOS\s+DE\s+TRANSPORTE/i,
|
|
51
|
+
/DATOS\s+DEL\s+IMPORTADOR\/EXPORTADOR/i,
|
|
52
|
+
/RFC:\s+NOMBRE,\s+DENOMINACION\s+O\s+RAZON\s+SOCIAL:/i,
|
|
53
|
+
/CUADRO\s+DE\s+LIQUIDACION/i,
|
|
54
|
+
/\*\*\*\s+PAGO\s+ELECTRONICO\s+\*\*\*/i,
|
|
55
|
+
/PATENTE:\s+PEDIMENTO:\s+ADUANA:/i,
|
|
56
|
+
/LINEA\s+DE\s+CAPTURA:/i,
|
|
57
|
+
/DATOS\s+DEL\s+PROVEEDOR\s+O\s+COMPRADOR/i,
|
|
58
|
+
/CLAVE\/COMPL\.\s+IDENTIFICADOR/i,
|
|
59
|
+
/ANEXO\s+DEL\s+PEDIMENTO/i,
|
|
60
|
+
/\*+FIN\s+DE\s+PEDIMENTO\s+\*+NUM\.\s+TOTAL\s+DE\s+PARTIDAS:/i,
|
|
61
|
+
/DECLARO\s+BAJO\s+PROTESTA\s+DE\s+DECIR\s+VERDAD/i,
|
|
62
|
+
/PEDIMENTO\s+ELABORADO\s+DE\s+CONFORMIDAD/i,
|
|
63
|
+
];
|
|
34
64
|
|
|
35
|
-
|
|
65
|
+
const found = clues.filter((clue) => clue.test(source));
|
|
66
|
+
return found.length > clues.length * 0.25;
|
|
36
67
|
},
|
|
37
68
|
|
|
38
69
|
/**
|
|
@@ -7,7 +7,36 @@ export const pedimentoSimplificadoDefinition = {
|
|
|
7
7
|
type: 'pedimento_simplificado',
|
|
8
8
|
extensions: ['pdf'],
|
|
9
9
|
match: (source) => {
|
|
10
|
-
|
|
10
|
+
// Hard exclude: "AVISO CONSOLIDADO" shares the header trio but is a
|
|
11
|
+
// different document type handled by aviso_consolidado.
|
|
12
|
+
if (/AVISO\s+CONSOLIDADO/i.test(source)) return false;
|
|
13
|
+
|
|
14
|
+
// Fast path: the literal title appears on standard SIMP layouts.
|
|
15
|
+
if (/FORMA SIMPLIFICADA DE PEDIMENTO/i.test(source)) return true;
|
|
16
|
+
|
|
17
|
+
// Some PDFs (single-page anchors) lack that title but still carry the
|
|
18
|
+
// three pedimento header fields. Treat them as simplificado UNLESS they
|
|
19
|
+
// have the multi-page copy markers that uniquely identify a completo.
|
|
20
|
+
const hasHeaderFields =
|
|
21
|
+
/NUM\.?\s*PEDIMENTO:/i.test(source) &&
|
|
22
|
+
/CVE\.?\s*PEDIMENTO:/i.test(source) &&
|
|
23
|
+
/T\.?\s*OPER:/i.test(source);
|
|
24
|
+
if (!hasHeaderFields) return false;
|
|
25
|
+
|
|
26
|
+
const hasCompletoCopyMarker =
|
|
27
|
+
/ORIGINAL:\s*ADMINISTRACION GENERAL DE ADUANAS/i.test(source) ||
|
|
28
|
+
/SEGUNDA\s+COPIA/i.test(source) ||
|
|
29
|
+
/TERCERA\s+COPIA/i.test(source) ||
|
|
30
|
+
/COPIA\s+(SIMPLIFICAD[AO])?\s*TRANSPORTISTA/i.test(source) ||
|
|
31
|
+
/ANEXO\s+DEL\s+PEDIMENTO/i.test(source) ||
|
|
32
|
+
/\*+FIN\s+DE\s+PEDIMENTO\s*\*+/i.test(source);
|
|
33
|
+
if (hasCompletoCopyMarker) return false;
|
|
34
|
+
|
|
35
|
+
// Exclude COVE/eDocument forms that may reference a pedimento in their body.
|
|
36
|
+
if (/COMPROBANTE\s+DE\s+VALOR\s+ELECTR[ÓO]NICO/i.test(source)) return false;
|
|
37
|
+
if (/\bCOVE\b\s*:/i.test(source) && !/PAGO/i.test(source)) return false;
|
|
38
|
+
|
|
39
|
+
return true;
|
|
11
40
|
},
|
|
12
41
|
|
|
13
42
|
/**
|
package/src/index.js
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
2
|
import { Command } from 'commander';
|
|
3
3
|
|
|
4
|
+
import datastageCommand from './commands/DatastageCommand.js';
|
|
4
5
|
import gdriveSyncCommand from './commands/GDriveSyncCommand.js';
|
|
5
6
|
import identifyCommand from './commands/IdentifyCommand.js';
|
|
6
7
|
import pollWorkerCommand from './commands/PollWorkerCommand.js';
|
|
@@ -26,6 +27,7 @@ class ArelaUploaderCLI {
|
|
|
26
27
|
this.scanCommand = scanCommand;
|
|
27
28
|
this.uploadCommand = new UploadCommand();
|
|
28
29
|
this.watchCommand = watchCommand;
|
|
30
|
+
this.datastageCommand = datastageCommand;
|
|
29
31
|
|
|
30
32
|
this.#setupProgram();
|
|
31
33
|
this.#setupCommands();
|
|
@@ -204,6 +206,34 @@ class ArelaUploaderCLI {
|
|
|
204
206
|
}
|
|
205
207
|
});
|
|
206
208
|
|
|
209
|
+
// Datastage command — upload monthly datastage *.zip files from a directory
|
|
210
|
+
this.program
|
|
211
|
+
.command('datastage')
|
|
212
|
+
.description(
|
|
213
|
+
'📦 Upload monthly datastage *.zip files from a directory to Arela',
|
|
214
|
+
)
|
|
215
|
+
.requiredOption(
|
|
216
|
+
'--dir <path>',
|
|
217
|
+
'Directory containing *.zip files (non-recursive)',
|
|
218
|
+
)
|
|
219
|
+
.option(
|
|
220
|
+
'--api <target>',
|
|
221
|
+
'API target: default|agencia|cliente',
|
|
222
|
+
'default',
|
|
223
|
+
)
|
|
224
|
+
.option('--retry-failed', 'Re-attempt files in failed status')
|
|
225
|
+
.option('--show-stats', 'Print final stats summary')
|
|
226
|
+
.action(async (options) => {
|
|
227
|
+
try {
|
|
228
|
+
if (options.api && options.api !== 'default') {
|
|
229
|
+
appConfig.setApiTarget(options.api);
|
|
230
|
+
}
|
|
231
|
+
await this.datastageCommand.execute(options);
|
|
232
|
+
} catch (error) {
|
|
233
|
+
this.errorHandler.handleFatalError(error, { command: 'datastage' });
|
|
234
|
+
}
|
|
235
|
+
});
|
|
236
|
+
|
|
207
237
|
// Detection command
|
|
208
238
|
this.program
|
|
209
239
|
.command('detect')
|
|
@@ -335,6 +365,18 @@ class ArelaUploaderCLI {
|
|
|
335
365
|
'Number of files to process in each batch',
|
|
336
366
|
'100',
|
|
337
367
|
)
|
|
368
|
+
.option(
|
|
369
|
+
'--table <tableName>',
|
|
370
|
+
'Process only this scan table (instead of all instance tables)',
|
|
371
|
+
)
|
|
372
|
+
.option(
|
|
373
|
+
'--reset-attempts',
|
|
374
|
+
'Reset detection_attempts to 0 before processing so previously-failed files are retried',
|
|
375
|
+
)
|
|
376
|
+
.option(
|
|
377
|
+
'--path-prefix <mapping>',
|
|
378
|
+
'Remap file path prefix for cross-platform access. Format: FROM:TO e.g. "O:/=/Volumes/nas/"',
|
|
379
|
+
)
|
|
338
380
|
.option('--show-stats', 'Show performance statistics')
|
|
339
381
|
.action(async (options) => {
|
|
340
382
|
try {
|
|
@@ -0,0 +1,240 @@
|
|
|
1
|
+
import FormData from 'form-data';
|
|
2
|
+
import fs from 'fs';
|
|
3
|
+
import { Agent } from 'http';
|
|
4
|
+
import { Agent as HttpsAgent } from 'https';
|
|
5
|
+
import fetch from 'node-fetch';
|
|
6
|
+
import path from 'path';
|
|
7
|
+
|
|
8
|
+
import appConfig from '../config/config.js';
|
|
9
|
+
import logger from './LoggingService.js';
|
|
10
|
+
|
|
11
|
+
/**
|
|
12
|
+
* Datastage API Service
|
|
13
|
+
* Handles API communication for the arela datastage command:
|
|
14
|
+
* - tracking endpoints under /api/uploader/datastage/*
|
|
15
|
+
* - zip upload endpoint POST /api/datastage (multipart, field: zipFile)
|
|
16
|
+
*/
|
|
17
|
+
export class DatastageApiService {
|
|
18
|
+
/**
|
|
19
|
+
* @param {string|null} apiTarget - 'default'|'agencia'|'cliente'
|
|
20
|
+
*/
|
|
21
|
+
constructor(apiTarget = null) {
|
|
22
|
+
this.apiTarget = apiTarget;
|
|
23
|
+
const apiConfig = appConfig.getApiConfig(apiTarget);
|
|
24
|
+
this.baseUrl = apiConfig.baseUrl;
|
|
25
|
+
this.token = apiConfig.token;
|
|
26
|
+
|
|
27
|
+
const maxApiConnections = parseInt(process.env.MAX_API_CONNECTIONS) || 10;
|
|
28
|
+
const connectionTimeout =
|
|
29
|
+
parseInt(process.env.API_CONNECTION_TIMEOUT) || 300000;
|
|
30
|
+
|
|
31
|
+
this.maxRetries = parseInt(process.env.API_MAX_RETRIES) || 3;
|
|
32
|
+
this.useExponentialBackoff =
|
|
33
|
+
process.env.API_RETRY_EXPONENTIAL_BACKOFF !== 'false';
|
|
34
|
+
this.fixedRetryDelay = parseInt(process.env.API_RETRY_DELAY) || 1000;
|
|
35
|
+
|
|
36
|
+
const agentOpts = {
|
|
37
|
+
keepAlive: true,
|
|
38
|
+
keepAliveMsecs: 30000,
|
|
39
|
+
maxSockets: maxApiConnections,
|
|
40
|
+
maxFreeSockets: Math.ceil(maxApiConnections / 2),
|
|
41
|
+
maxTotalSockets: maxApiConnections + 5,
|
|
42
|
+
timeout: connectionTimeout,
|
|
43
|
+
scheduling: 'fifo',
|
|
44
|
+
};
|
|
45
|
+
this.httpAgent = new Agent(agentOpts);
|
|
46
|
+
this.httpsAgent = new HttpsAgent(agentOpts);
|
|
47
|
+
|
|
48
|
+
logger.debug(
|
|
49
|
+
`🔗 Datastage API Service configured (target=${apiTarget || 'default'})`,
|
|
50
|
+
);
|
|
51
|
+
}
|
|
52
|
+
|
|
53
|
+
#getAgent(url) {
|
|
54
|
+
return url.startsWith('https://') ? this.httpsAgent : this.httpAgent;
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
#isRetryableError(error, response = null) {
|
|
58
|
+
if (
|
|
59
|
+
error?.code === 'ECONNRESET' ||
|
|
60
|
+
error?.code === 'ETIMEDOUT' ||
|
|
61
|
+
error?.code === 'ECONNREFUSED' ||
|
|
62
|
+
error?.code === 'ENOTFOUND' ||
|
|
63
|
+
error?.code === 'EAI_AGAIN'
|
|
64
|
+
) {
|
|
65
|
+
return true;
|
|
66
|
+
}
|
|
67
|
+
if (response) {
|
|
68
|
+
const s = response.status;
|
|
69
|
+
if (s === 429 || (s >= 500 && s < 600)) return true;
|
|
70
|
+
}
|
|
71
|
+
if (error?.message && error.message.includes('timeout')) return true;
|
|
72
|
+
return false;
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
#calculateBackoff(attempt) {
|
|
76
|
+
if (!this.useExponentialBackoff) {
|
|
77
|
+
const jitter = this.fixedRetryDelay * 0.2 * (Math.random() * 2 - 1);
|
|
78
|
+
return Math.floor(this.fixedRetryDelay + jitter);
|
|
79
|
+
}
|
|
80
|
+
const baseDelay = 1000;
|
|
81
|
+
const maxDelay = 16000;
|
|
82
|
+
const delay = Math.min(baseDelay * Math.pow(2, attempt - 1), maxDelay);
|
|
83
|
+
const jitter = delay * 0.2 * (Math.random() * 2 - 1);
|
|
84
|
+
return Math.floor(delay + jitter);
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
#sleep(ms) {
|
|
88
|
+
return new Promise((r) => setTimeout(r, ms));
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
async #requestJson(endpoint, method = 'GET', body = null, headers = {}) {
|
|
92
|
+
const url = `${this.baseUrl}${endpoint}`;
|
|
93
|
+
const options = {
|
|
94
|
+
method,
|
|
95
|
+
headers: {
|
|
96
|
+
'x-api-key': this.token,
|
|
97
|
+
'Content-Type': 'application/json',
|
|
98
|
+
...headers,
|
|
99
|
+
},
|
|
100
|
+
agent: this.#getAgent(url),
|
|
101
|
+
};
|
|
102
|
+
if (body) options.body = JSON.stringify(body);
|
|
103
|
+
|
|
104
|
+
let lastError;
|
|
105
|
+
let lastResponse = null;
|
|
106
|
+
const retries = this.maxRetries;
|
|
107
|
+
|
|
108
|
+
for (let attempt = 1; attempt <= retries + 1; attempt++) {
|
|
109
|
+
try {
|
|
110
|
+
const response = await fetch(url, options);
|
|
111
|
+
lastResponse = response;
|
|
112
|
+
if (!response.ok) {
|
|
113
|
+
const errorText = await response.text();
|
|
114
|
+
let errorMessage = `API ${method} ${endpoint} failed: ${response.status} ${response.statusText}`;
|
|
115
|
+
try {
|
|
116
|
+
const j = JSON.parse(errorText);
|
|
117
|
+
errorMessage = j.message || errorMessage;
|
|
118
|
+
} catch {
|
|
119
|
+
errorMessage = errorText || errorMessage;
|
|
120
|
+
}
|
|
121
|
+
const err = new Error(errorMessage);
|
|
122
|
+
err.status = response.status;
|
|
123
|
+
if (this.#isRetryableError(err, response) && attempt <= retries) {
|
|
124
|
+
const d = this.#calculateBackoff(attempt);
|
|
125
|
+
logger.warn(
|
|
126
|
+
`Retrying ${method} ${endpoint} (attempt ${attempt}/${retries + 1}) in ${d}ms: ${errorMessage}`,
|
|
127
|
+
);
|
|
128
|
+
await this.#sleep(d);
|
|
129
|
+
continue;
|
|
130
|
+
}
|
|
131
|
+
throw err;
|
|
132
|
+
}
|
|
133
|
+
return await response.json();
|
|
134
|
+
} catch (error) {
|
|
135
|
+
lastError = error;
|
|
136
|
+
if (this.#isRetryableError(error, lastResponse) && attempt <= retries) {
|
|
137
|
+
const d = this.#calculateBackoff(attempt);
|
|
138
|
+
logger.warn(
|
|
139
|
+
`Retrying ${method} ${endpoint} (attempt ${attempt}/${retries + 1}) in ${d}ms: ${error.message}`,
|
|
140
|
+
);
|
|
141
|
+
await this.#sleep(d);
|
|
142
|
+
continue;
|
|
143
|
+
}
|
|
144
|
+
throw error;
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
throw lastError;
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
// --- Tracking endpoints ---
|
|
151
|
+
|
|
152
|
+
async registerUpload({
|
|
153
|
+
absolutePath,
|
|
154
|
+
fileName,
|
|
155
|
+
sizeBytes,
|
|
156
|
+
fileModifiedAt,
|
|
157
|
+
sourceDirectory,
|
|
158
|
+
}) {
|
|
159
|
+
return this.#requestJson('/api/uploader/datastage/register', 'POST', {
|
|
160
|
+
absolutePath,
|
|
161
|
+
fileName,
|
|
162
|
+
sizeBytes,
|
|
163
|
+
fileModifiedAt,
|
|
164
|
+
sourceDirectory,
|
|
165
|
+
});
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
async getPending(sourceDirectory = null) {
|
|
169
|
+
const qs = sourceDirectory
|
|
170
|
+
? `?sourceDirectory=${encodeURIComponent(sourceDirectory)}`
|
|
171
|
+
: '';
|
|
172
|
+
return this.#requestJson(`/api/uploader/datastage/pending${qs}`, 'GET');
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
async getStats(sourceDirectory = null) {
|
|
176
|
+
const qs = sourceDirectory
|
|
177
|
+
? `?sourceDirectory=${encodeURIComponent(sourceDirectory)}`
|
|
178
|
+
: '';
|
|
179
|
+
return this.#requestJson(`/api/uploader/datastage/stats${qs}`, 'GET');
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
async markUploaded(id, { datastageId, folio }) {
|
|
183
|
+
return this.#requestJson(
|
|
184
|
+
`/api/uploader/datastage/${id}/mark-uploaded`,
|
|
185
|
+
'PATCH',
|
|
186
|
+
{ datastageId, folio },
|
|
187
|
+
);
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
async markFailed(id, error) {
|
|
191
|
+
return this.#requestJson(
|
|
192
|
+
`/api/uploader/datastage/${id}/mark-failed`,
|
|
193
|
+
'PATCH',
|
|
194
|
+
{ error: String(error || 'unknown') },
|
|
195
|
+
);
|
|
196
|
+
}
|
|
197
|
+
|
|
198
|
+
// --- Zip upload ---
|
|
199
|
+
|
|
200
|
+
/**
|
|
201
|
+
* Upload a single zip file to POST /api/datastage (multipart, field name 'zipFile').
|
|
202
|
+
* Returns the created Datastage row { id, folio, ... }.
|
|
203
|
+
*/
|
|
204
|
+
async uploadZip(localPath) {
|
|
205
|
+
const url = `${this.baseUrl}/api/datastage`;
|
|
206
|
+
const form = new FormData();
|
|
207
|
+
const fileName = path.basename(localPath);
|
|
208
|
+
form.append('zipFile', fs.createReadStream(localPath), {
|
|
209
|
+
filename: fileName,
|
|
210
|
+
contentType: 'application/zip',
|
|
211
|
+
});
|
|
212
|
+
|
|
213
|
+
const response = await fetch(url, {
|
|
214
|
+
method: 'POST',
|
|
215
|
+
headers: {
|
|
216
|
+
'x-api-key': this.token,
|
|
217
|
+
...form.getHeaders(),
|
|
218
|
+
},
|
|
219
|
+
body: form,
|
|
220
|
+
agent: this.#getAgent(url),
|
|
221
|
+
});
|
|
222
|
+
|
|
223
|
+
if (!response.ok) {
|
|
224
|
+
const text = await response.text();
|
|
225
|
+
let msg = `Datastage upload failed: ${response.status} ${response.statusText}`;
|
|
226
|
+
try {
|
|
227
|
+
const j = JSON.parse(text);
|
|
228
|
+
msg = j.message || msg;
|
|
229
|
+
} catch {
|
|
230
|
+
msg = text || msg;
|
|
231
|
+
}
|
|
232
|
+
const err = new Error(msg);
|
|
233
|
+
err.status = response.status;
|
|
234
|
+
throw err;
|
|
235
|
+
}
|
|
236
|
+
return await response.json();
|
|
237
|
+
}
|
|
238
|
+
}
|
|
239
|
+
|
|
240
|
+
export default DatastageApiService;
|
|
@@ -375,6 +375,20 @@ export class ScanApiService {
|
|
|
375
375
|
* @param {boolean} allTypes - When true, fetch all supported file types instead of just likely-simplificado PDFs
|
|
376
376
|
* @returns {Promise<Object>} { data: Array, hasMore: boolean }
|
|
377
377
|
*/
|
|
378
|
+
/**
|
|
379
|
+
* Get a single file record by ID (for single-file identify mode).
|
|
380
|
+
* @param {string} tableName - Scan table name (with or without cli. prefix)
|
|
381
|
+
* @param {string} fileId - UUID of the file record
|
|
382
|
+
* @returns {Promise<{ id: string, file_name: string, file_extension: string, absolute_path: string }>}
|
|
383
|
+
*/
|
|
384
|
+
async getFileRecord(tableName, fileId) {
|
|
385
|
+
const cleanTable = tableName.replace(/^cli\./, '');
|
|
386
|
+
const url = `/api/uploader/scan/file-record?tableName=${encodeURIComponent(cleanTable)}&fileId=${encodeURIComponent(fileId)}`;
|
|
387
|
+
const result = await this.#request(url, 'GET');
|
|
388
|
+
logger.debug(`Fetched file record ${fileId} from ${cleanTable}`);
|
|
389
|
+
return result;
|
|
390
|
+
}
|
|
391
|
+
|
|
378
392
|
async fetchPdfsForDetection(
|
|
379
393
|
tableName,
|
|
380
394
|
offset = 0,
|
|
@@ -398,6 +412,22 @@ export class ScanApiService {
|
|
|
398
412
|
return result;
|
|
399
413
|
}
|
|
400
414
|
|
|
415
|
+
/**
|
|
416
|
+
* Reset detection_attempts to 0 for undetected files so they can be re-processed.
|
|
417
|
+
* @param {string} tableName - Target scan table name
|
|
418
|
+
* @param {string|null} absolutePath - If provided, reset only this specific file
|
|
419
|
+
* @returns {Promise<{ reset: number }>}
|
|
420
|
+
*/
|
|
421
|
+
async resetDetectionAttempts(tableName, absolutePath = null) {
|
|
422
|
+
let url = `/api/uploader/scan/reset-detection-attempts?tableName=${encodeURIComponent(tableName)}`;
|
|
423
|
+
if (absolutePath) {
|
|
424
|
+
url += `&absolutePath=${encodeURIComponent(absolutePath)}`;
|
|
425
|
+
}
|
|
426
|
+
const result = await this.#request(url, 'PATCH');
|
|
427
|
+
logger.debug(`Reset ${result.reset} detection attempt(s) in ${tableName}`);
|
|
428
|
+
return result;
|
|
429
|
+
}
|
|
430
|
+
|
|
401
431
|
/**
|
|
402
432
|
* Batch update detection results
|
|
403
433
|
* @param {string} tableName - Target table name
|