@arela/uploader 1.0.21 → 1.0.22

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@arela/uploader",
3
- "version": "1.0.21",
3
+ "version": "1.0.22",
4
4
  "description": "CLI to upload files/directories to Arela",
5
5
  "bin": {
6
6
  "arela": "./src/index.js"
@@ -0,0 +1,164 @@
1
+ import fs from 'fs';
2
+ import path from 'path';
3
+
4
+ import { DatastageApiService } from '../services/DatastageApiService.js';
5
+ import logger from '../services/LoggingService.js';
6
+
7
+ import appConfig from '../config/config.js';
8
+ import ErrorHandler from '../errors/ErrorHandler.js';
9
+
10
+ /**
11
+ * Datastage Command Handler
12
+ * Uploads monthly Datastage *.zip files from a directory to the API.
13
+ * Sequential, idempotent via the cli `datastage_uploads` tracking table.
14
+ */
15
+ export class DatastageCommand {
16
+ constructor() {
17
+ this.errorHandler = new ErrorHandler(logger);
18
+ }
19
+
20
+ /**
21
+ * @param {Object} options
22
+ * @param {string} options.dir - directory containing *.zip files (required)
23
+ * @param {string} [options.api] - 'default'|'agencia'|'cliente'
24
+ * @param {boolean} [options.retryFailed] - re-attempt files in 'failed' status
25
+ * @param {boolean} [options.showStats] - print final stats from API
26
+ */
27
+ async execute(options = {}) {
28
+ const startTime = Date.now();
29
+
30
+ if (!options.dir) {
31
+ throw new Error('--dir <path> is required');
32
+ }
33
+ const sourceDirectory = path.resolve(options.dir);
34
+ if (!fs.existsSync(sourceDirectory)) {
35
+ throw new Error(`Directory not found: ${sourceDirectory}`);
36
+ }
37
+ const dirStat = fs.statSync(sourceDirectory);
38
+ if (!dirStat.isDirectory()) {
39
+ throw new Error(`Not a directory: ${sourceDirectory}`);
40
+ }
41
+
42
+ const apiTarget = options.api || 'default';
43
+ const api = new DatastageApiService(apiTarget);
44
+
45
+ logger.info('📦 Starting arela datastage command');
46
+ logger.info(`🎯 API Target: ${apiTarget}`);
47
+ logger.info(`📂 Source: ${sourceDirectory}`);
48
+
49
+ // 1. Enumerate *.zip in root directory (non-recursive)
50
+ const entries = fs.readdirSync(sourceDirectory, { withFileTypes: true });
51
+ const zipFiles = entries
52
+ .filter((e) => e.isFile() && /\.zip$/i.test(e.name))
53
+ .map((e) => path.join(sourceDirectory, e.name));
54
+
55
+ if (zipFiles.length === 0) {
56
+ logger.warn('No *.zip files found in directory. Nothing to do.');
57
+ return { uploaded: 0, failed: 0, skipped: 0 };
58
+ }
59
+ logger.info(`🗂 Found ${zipFiles.length} zip file(s)`);
60
+
61
+ // 2. Register each file (idempotent upsert)
62
+ logger.info('📝 Registering files...');
63
+ for (const zipPath of zipFiles) {
64
+ const stats = fs.statSync(zipPath);
65
+ try {
66
+ await api.registerUpload({
67
+ absolutePath: zipPath,
68
+ fileName: path.basename(zipPath),
69
+ sizeBytes: stats.size,
70
+ fileModifiedAt: stats.mtime.toISOString(),
71
+ sourceDirectory,
72
+ });
73
+ } catch (err) {
74
+ logger.error(
75
+ ` ✗ register failed for ${path.basename(zipPath)}: ${err.message}`,
76
+ );
77
+ throw err;
78
+ }
79
+ }
80
+
81
+ // 3. Fetch pending list scoped to this directory
82
+ const pending = await api.getPending(sourceDirectory);
83
+ const pendingPaths = new Set(pending.map((p) => p.absolutePath));
84
+
85
+ const alreadyUploaded = zipFiles.length - pendingPaths.size;
86
+ if (alreadyUploaded > 0) {
87
+ logger.info(`⏭ Skipping ${alreadyUploaded} already uploaded file(s)`);
88
+ }
89
+
90
+ if (pending.length === 0) {
91
+ logger.success('✅ All files already uploaded. Nothing to do.');
92
+ if (options.showStats) {
93
+ const s = await api.getStats(sourceDirectory);
94
+ logger.info(`📊 Stats: ${JSON.stringify(s)}`);
95
+ }
96
+ return { uploaded: 0, failed: 0, skipped: alreadyUploaded };
97
+ }
98
+
99
+ // 4. Sequential upload loop
100
+ logger.info(`🚀 Uploading ${pending.length} file(s) sequentially...`);
101
+ let uploaded = 0;
102
+ let failed = 0;
103
+
104
+ for (let i = 0; i < pending.length; i++) {
105
+ const row = pending[i];
106
+ const localPath = row.absolutePath;
107
+ const label = `[${i + 1}/${pending.length}] ${row.fileName}`;
108
+
109
+ if (!fs.existsSync(localPath)) {
110
+ const err = `File missing on disk: ${localPath}`;
111
+ logger.error(`✗ ${label}: ${err}`);
112
+ try {
113
+ await api.markFailed(row.id, err);
114
+ } catch (e) {
115
+ logger.error(` mark-failed error: ${e.message}`);
116
+ }
117
+ failed++;
118
+ continue;
119
+ }
120
+
121
+ try {
122
+ logger.info(`⬆ ${label}: uploading...`);
123
+ const result = await api.uploadZip(localPath);
124
+ const datastageId = result?.id || result?.data?.id;
125
+ const folio = result?.folio || result?.data?.folio;
126
+ if (!datastageId) {
127
+ throw new Error(
128
+ 'API returned no datastage id in response: ' +
129
+ JSON.stringify(result).slice(0, 300),
130
+ );
131
+ }
132
+ await api.markUploaded(row.id, { datastageId, folio });
133
+ logger.success(
134
+ `✓ ${label}: folio=${folio || 'n/a'} datastageId=${datastageId}`,
135
+ );
136
+ uploaded++;
137
+ } catch (err) {
138
+ logger.error(`✗ ${label}: ${err.message}`);
139
+ try {
140
+ await api.markFailed(row.id, err.message);
141
+ } catch (e) {
142
+ logger.error(` mark-failed error: ${e.message}`);
143
+ }
144
+ failed++;
145
+ }
146
+ }
147
+
148
+ const elapsed = ((Date.now() - startTime) / 1000).toFixed(1);
149
+ logger.info('—'.repeat(60));
150
+ logger.info(
151
+ `Done in ${elapsed}s — uploaded=${uploaded} failed=${failed} skipped=${alreadyUploaded}`,
152
+ );
153
+
154
+ if (options.showStats) {
155
+ const s = await api.getStats(sourceDirectory);
156
+ logger.info(`📊 Final stats: ${JSON.stringify(s)}`);
157
+ }
158
+
159
+ return { uploaded, failed, skipped: alreadyUploaded };
160
+ }
161
+ }
162
+
163
+ const datastageCommand = new DatastageCommand();
164
+ export default datastageCommand;
@@ -74,29 +74,95 @@ export class IdentifyCommand {
74
74
  const scanConfig = appConfig.getScanConfig();
75
75
  const batchSize = parseInt(options.batchSize) || 100;
76
76
 
77
+ // Parse optional path prefix mapping: "O:/=/Volumes/nas/"
78
+ const pathPrefixMap = options.pathPrefix
79
+ ? this.#parsePathPrefix(options.pathPrefix)
80
+ : null;
81
+
77
82
  logger.info('🔍 Starting arela identify command');
78
83
  logger.info(`🎯 API Target: ${apiTarget}`);
79
84
  logger.info(`📦 Batch Size: ${batchSize}`);
85
+ if (options.table) logger.info(`📌 Target table: ${options.table}`);
86
+ if (options.resetAttempts)
87
+ logger.info('♻️ Reset detection attempts: ON');
88
+ if (pathPrefixMap)
89
+ logger.info(
90
+ `🗺 Path prefix map: ${pathPrefixMap.from} → ${pathPrefixMap.to}`,
91
+ );
80
92
 
81
- // Fetch all tables for this instance
82
- logger.info('\n📊 Fetching instance tables...');
83
- const tables = await this.scanApiService.getInstanceTables(
84
- scanConfig.companySlug,
85
- scanConfig.serverId,
86
- scanConfig.basePathFull,
87
- );
93
+ // Resolve the list of tables to process
94
+ let tables;
95
+ if (options.fileId && options.table) {
96
+ // Single-file mode — identify exactly one file record
97
+ logger.info(
98
+ `\n🎯 Single-file mode: ${options.table} / ${options.fileId}`,
99
+ );
100
+ this.#reportProgress(0, `Fetching file record ${options.fileId}...`);
101
+ const record = await this.scanApiService.getFileRecord(
102
+ options.table,
103
+ options.fileId,
104
+ );
105
+ const results = await this.#detectFilesLocally(
106
+ [record],
107
+ 1,
108
+ pathPrefixMap,
109
+ );
110
+ const updates = results.filter((r) => r !== null);
111
+ if (updates.length > 0) {
112
+ await this.scanApiService.batchUpdateDetection(
113
+ options.table,
114
+ updates,
115
+ );
116
+ }
117
+ this.#reportProgress(100, `Single-file identification complete`);
118
+ logger.success(`\n✅ Single-file identification complete`);
119
+ const firstUpdate = updates[0];
120
+ return {
121
+ processed: 1,
122
+ detected: updates.length,
123
+ proformas: 0,
124
+ errors: 1 - updates.length,
125
+ detectedType: firstUpdate?.detectedType ?? null,
126
+ detectedPedimento: firstUpdate?.detectedPedimento ?? null,
127
+ rfc: firstUpdate?.rfc ?? null,
128
+ arelaPath: firstUpdate?.arelaPath ?? null,
129
+ };
130
+ } else if (options.table) {
131
+ // Single-table mode — no need to match instance tables
132
+ tables = [{ tableName: options.table }];
133
+ logger.info(`\n📌 Single-table mode: ${options.table}`);
134
+ } else {
135
+ logger.info('\n📊 Fetching instance tables...');
136
+ tables = await this.scanApiService.getInstanceTables(
137
+ scanConfig.companySlug,
138
+ scanConfig.serverId,
139
+ scanConfig.basePathFull,
140
+ );
141
+
142
+ if (tables.length === 0) {
143
+ throw new ConfigurationError(
144
+ 'No tables found for this instance. Run "arela scan" first.',
145
+ );
146
+ }
88
147
 
89
- if (tables.length === 0) {
90
- throw new ConfigurationError(
91
- 'No tables found for this instance. Run "arela scan" first.',
148
+ logger.info(
149
+ `📋 Found ${tables.length} table${tables.length === 1 ? '' : 's'} to process`,
92
150
  );
151
+ for (const table of tables) {
152
+ logger.info(` - ${table.tableName}`);
153
+ }
93
154
  }
94
155
 
95
- logger.info(
96
- `📋 Found ${tables.length} table${tables.length === 1 ? '' : 's'} to process`,
97
- );
98
- for (const table of tables) {
99
- logger.info(` - ${table.tableName}`);
156
+ // Optionally reset detection attempts so previously-failed files are retried
157
+ if (options.resetAttempts) {
158
+ for (const table of tables) {
159
+ const { reset } = await this.scanApiService.resetDetectionAttempts(
160
+ table.tableName,
161
+ );
162
+ logger.info(
163
+ `♻️ Reset ${reset} detection attempt(s) in ${table.tableName}`,
164
+ );
165
+ }
100
166
  }
101
167
 
102
168
  // Process each table
@@ -127,6 +193,7 @@ export class IdentifyCommand {
127
193
  table.tableName,
128
194
  batchSize,
129
195
  startTime,
196
+ pathPrefixMap,
130
197
  );
131
198
 
132
199
  totalStats.processed += stats.processed;
@@ -183,7 +250,7 @@ export class IdentifyCommand {
183
250
  * @param {number} startTime - Start time for speed calculation
184
251
  * @returns {Promise<Object>} Processing statistics
185
252
  */
186
- async #processTable(tableName, batchSize, startTime) {
253
+ async #processTable(tableName, batchSize, startTime, pathPrefixMap = null) {
187
254
  // Get detection statistics first (allTypes=true to count all supported file types)
188
255
  const initialStats = await this.scanApiService.getDetectionStats(
189
256
  tableName,
@@ -255,7 +322,11 @@ export class IdentifyCommand {
255
322
  const files = response.data;
256
323
 
257
324
  // Detect files locally with concurrent processing
258
- const detectionResults = await this.#detectFilesLocally(files, 10);
325
+ const detectionResults = await this.#detectFilesLocally(
326
+ files,
327
+ 10,
328
+ pathPrefixMap,
329
+ );
259
330
 
260
331
  // Batch update to API
261
332
  const updateResult = await this.scanApiService.batchUpdateDetection(
@@ -301,7 +372,7 @@ export class IdentifyCommand {
301
372
  * @param {number} concurrency - Maximum concurrent detections
302
373
  * @returns {Promise<Array>} Detection results
303
374
  */
304
- async #detectFilesLocally(files, concurrency = 10) {
375
+ async #detectFilesLocally(files, concurrency = 10, pathPrefixMap = null) {
305
376
  const limit = pLimit(concurrency);
306
377
  const basePath = appConfig.getBasePath();
307
378
 
@@ -309,7 +380,17 @@ export class IdentifyCommand {
309
380
  limit(async () => {
310
381
  try {
311
382
  // Check if file exists on filesystem
312
- const absolutePath = file.absolute_path;
383
+ let absolutePath = file.absolute_path;
384
+
385
+ // Apply cross-platform path prefix mapping if configured
386
+ if (
387
+ pathPrefixMap &&
388
+ absolutePath &&
389
+ absolutePath.startsWith(pathPrefixMap.from)
390
+ ) {
391
+ absolutePath =
392
+ pathPrefixMap.to + absolutePath.slice(pathPrefixMap.from.length);
393
+ }
313
394
 
314
395
  if (!fs.existsSync(absolutePath)) {
315
396
  return {
@@ -543,6 +624,44 @@ export class IdentifyCommand {
543
624
  }
544
625
  }
545
626
  }
627
+
628
+ /**
629
+ * Parse a path prefix mapping string such as "O:/=/Volumes/nas/" into { from, to }.
630
+ * Supports both "FROM=TO" and "FROM:TO" separators.
631
+ * @private
632
+ * @param {string} mapping
633
+ * @returns {{ from: string, to: string }}
634
+ */
635
+ #parsePathPrefix(mapping) {
636
+ // Support either "FROM=TO" or "FROM:TO" as separator
637
+ const eqIdx = mapping.indexOf('=');
638
+ const colonIdx = mapping.indexOf(':');
639
+
640
+ let sep = -1;
641
+ // "O:/=/Volumes" — the colon inside "O:/" is part of a Windows drive letter; prefer '=' separator
642
+ if (eqIdx !== -1) {
643
+ sep = eqIdx;
644
+ } else if (colonIdx !== -1) {
645
+ sep = colonIdx;
646
+ }
647
+
648
+ if (sep === -1) {
649
+ throw new Error(
650
+ `Invalid --path-prefix format: "${mapping}". Expected "FROM=TO" e.g. "O:/=/Volumes/nas/"`,
651
+ );
652
+ }
653
+
654
+ const from = mapping.slice(0, sep);
655
+ const to = mapping.slice(sep + 1);
656
+
657
+ if (!from || !to) {
658
+ throw new Error(
659
+ `Invalid --path-prefix format: "${mapping}". Both FROM and TO parts must be non-empty.`,
660
+ );
661
+ }
662
+
663
+ return { from, to };
664
+ }
546
665
  }
547
666
 
548
667
  // Export singleton instance
@@ -342,6 +342,8 @@ export class PollWorkerCommand {
342
342
  batchSize: 100,
343
343
  showStats: false,
344
344
  onProgress,
345
+ ...(job.fileId && { fileId: job.fileId }),
346
+ ...(job.table && { table: job.table }),
345
347
  };
346
348
 
347
349
  return identifyCommand.execute(options);
@@ -37,10 +37,10 @@ class Config {
37
37
  const __dirname = path.dirname(__filename);
38
38
  const packageJsonPath = path.resolve(__dirname, '../../package.json');
39
39
  const packageJson = JSON.parse(fs.readFileSync(packageJsonPath, 'utf-8'));
40
- return packageJson.version || '1.0.21';
40
+ return packageJson.version || '1.0.22';
41
41
  } catch (error) {
42
42
  console.warn('⚠️ Could not read package.json version, using fallback');
43
- return '1.0.21';
43
+ return '1.0.22';
44
44
  }
45
45
  }
46
46
 
@@ -25,49 +25,118 @@ export const tipoOperacionExtractor = {
25
25
  },
26
26
  };
27
27
 
28
- // 3) Clave de Pedimento: 2 chars after tipoOperacion
28
+ // 3) Clave de Pedimento: 2 chars after tipoOperacion (multiple layout patterns)
29
29
  export const clavePedimentoExtractor = {
30
30
  field: 'clavePedimento',
31
31
  extract: (source) => {
32
- const match = source.match(
33
- /\d{2}\s+\d{2}\s+\d{4}\s+\d{7}\s+[A-Z]{3}\s+([A-Z0-9]{2})/,
34
- );
35
- return new FieldResult('clavePedimento', !!match, match ? match[1] : null);
32
+ const patterns = [
33
+ // Standard spaced layout: "22 07 3429 2002089 EXP RT"
34
+ /\d{2}\s+\d{2}\s+\d{4}\s+\d{7}\s+[A-Z]{3}\s+([A-Z0-9]{2})\b/,
35
+ // Concatenated 15-digit layout: "260734296013645 EXP RT"
36
+ /\d{15}\s+[A-Z]{3}\s+([A-Z0-9]{2})\b/,
37
+ // Fallback: T.OPER keyword followed by 2-char clave
38
+ /\b(?:EXP|IMP|TRA|TRN)\s+([A-Z][A-Z0-9])\b/,
39
+ // Explicit label
40
+ /CVE\.?\s*PED(?:IMENTO)?[^A-Z0-9]{0,60}?\b([A-Z][A-Z0-9])\b/i,
41
+ ];
42
+ for (const re of patterns) {
43
+ const m = source.match(re);
44
+ if (m) return new FieldResult('clavePedimento', true, m[1]);
45
+ }
46
+ return new FieldResult('clavePedimento', false, null);
36
47
  },
37
48
  };
38
49
 
39
50
  // 4) Aduana E/S: 3-digit code on the peso-bruto line
51
+ // Fallback A: allow 2-digit code (some SIMP layouts omit the leading zero).
52
+ // Fallback B: derive the 2-digit customs-office code from positions 2-3 of
53
+ // numPedimento (e.g. "260734296013645" → "07"), which is what the
54
+ // arela_path formula uses after padStart(2,'0').
40
55
  export const aduanaEntradaSalidaExtractor = {
41
56
  field: 'aduanaEntradaSalida',
42
57
  extract: (source) => {
43
- const match = source.match(/^\s*\d+\s+[\d,.]+\s+(\d{3})\s*$/m);
44
- return new FieldResult(
45
- 'aduanaEntradaSalida',
46
- !!match,
47
- match ? match[1] : null,
48
- );
58
+ // Primary: 3-digit aduana code at end of peso-bruto line
59
+ const m3 = source.match(/^\s*\d+\s+[\d,.]+\s+(\d{3})\s*$/m);
60
+ if (m3) return new FieldResult('aduanaEntradaSalida', true, m3[1]);
61
+
62
+ // Fallback A: 2-digit aduana code at end of peso-bruto line
63
+ const m2 = source.match(/^\s*\d+\s+[\d,.]+\s+(\d{2})\s*$/m);
64
+ if (m2) return new FieldResult('aduanaEntradaSalida', true, m2[1]);
65
+
66
+ // Fallback B: derive 2-digit customs-office code from numPedimento
67
+ // Format: AA BB CCCC DDDDDDD → BB (positions 2-3) = aduana
68
+ const pedMatch = source.match(/\d{2}\s?\d{2}\s?\d{4}\s?\d{7}/);
69
+ if (pedMatch) {
70
+ const num = pedMatch[0].replace(/\s/g, '');
71
+ if (num.length === 15) {
72
+ return new FieldResult(
73
+ 'aduanaEntradaSalida',
74
+ true,
75
+ num.substring(2, 4),
76
+ );
77
+ }
78
+ }
79
+
80
+ return new FieldResult('aduanaEntradaSalida', false, null);
49
81
  },
50
82
  };
51
83
 
52
- // 5) RFC: 12–13 alphanumeric chars on its own line
84
+ // 5) RFC: importer/exporter RFC on its own line.
85
+ // Strategy A: strict whole-line pattern (3-4 letters + 6 consecutive digits +
86
+ // 3 alphanum). COVE codes like COVE2681B1RX8 naturally fail this because
87
+ // their digit section is non-consecutive (2681B1 has a letter at pos 5).
88
+ // Strategy B: RFC as a word within a longer line (handles "RFC: IMS030409FZ0").
89
+ // Strategy C: loose 12-13 alphanum isolated on its own line — iterate ALL
90
+ // matches via matchAll() so that a leading COVE code is skipped and the
91
+ // actual RFC (which appears later in the document) is still found.
53
92
  export const rfcExtractor = {
54
93
  field: 'rfc',
55
94
  extract: (source) => {
56
- const match = source.match(/\n\s*([A-Z0-9]{12,13})\s*\n/);
57
- return new FieldResult('rfc', !!match, match ? match[1] : null);
95
+ const RFC_STRICT = /^[A-Z]{3,4}\d{6}[A-Z0-9]{3}$/i;
96
+ const lines = source
97
+ .split(/\r?\n/)
98
+ .map((l) => l.trim())
99
+ .filter((l) => l);
100
+
101
+ // Primary: RFC occupies an entire trimmed line
102
+ const strictLine = lines.find((line) => RFC_STRICT.test(line));
103
+ if (strictLine) return new FieldResult('rfc', true, strictLine);
104
+
105
+ // Fallback A: RFC embedded in a longer line (word-boundary search)
106
+ for (const line of lines) {
107
+ const m = line.match(/\b([A-Z]{3,4}\d{6}[A-Z0-9]{3})\b/i);
108
+ if (m) return new FieldResult('rfc', true, m[1]);
109
+ }
110
+
111
+ // Fallback B: loose 12-13 alphanum isolated on its own line.
112
+ // Use matchAll() to iterate ALL occurrences — a leading COVE code must not
113
+ // short-circuit the search; the RFC typically follows it in the document.
114
+ for (const m of source.matchAll(/\n\s*([A-Z0-9]{12,13})\s*\n/g)) {
115
+ if (!/^COVE/i.test(m[1])) return new FieldResult('rfc', true, m[1]);
116
+ }
117
+
118
+ return new FieldResult('rfc', false, null);
58
119
  },
59
120
  };
60
121
 
61
- // 6) Código de Aceptación: 8 alphanumeric chars on the line right after the RFC
122
+ // 6) Código de Aceptación: 8 alphanumeric chars on the line right after the RFC.
123
+ // Uses the same RFC-line detection logic as rfcExtractor.
62
124
  export const codigoAceptacionExtractor = {
63
125
  field: 'codigoAceptacion',
64
126
  extract: (source) => {
127
+ const RFC_STRICT = /^[A-Z]{3,4}\d{6}[A-Z0-9]{3}$/i;
128
+ const RFC_LOOSE = /^[A-Z0-9]{12,13}$/;
65
129
  const lines = source
66
130
  .split(/\r?\n/)
67
131
  .map((l) => l.trim())
68
132
  .filter((l) => l.length > 0);
69
133
 
70
- const rfcIndex = lines.findIndex((l) => /^[A-Z0-9]{12,13}$/.test(l));
134
+ // Find RFC line using strict pattern first, then loose (excluding COVE)
135
+ let rfcIndex = lines.findIndex((l) => RFC_STRICT.test(l));
136
+ if (rfcIndex < 0) {
137
+ rfcIndex = lines.findIndex((l) => RFC_LOOSE.test(l) && !/^COVE/i.test(l));
138
+ }
139
+
71
140
  let code = null;
72
141
  if (rfcIndex >= 0 && /^[A-Z0-9]{8}$/.test(lines[rfcIndex + 1] || '')) {
73
142
  code = lines[rfcIndex + 1];
@@ -77,24 +146,31 @@ export const codigoAceptacionExtractor = {
77
146
  };
78
147
 
79
148
  // 7) Num. E-Document: collects all 13-char alphanumeric codes following
80
- // `NUM. E-DOCUMENT` labels. CoveFact / Pedimento Completo emit one row
81
- // per ED clave inside the CLAVE/COMPL. IDENTIFICADOR table.
149
+ // `NUM. E-DOCUMENT` / `NUMERO DE E-DOCUMENT` labels.
82
150
  export const numEDocumentoExtractor = {
83
151
  field: 'numEDocumento',
84
152
  extract: (source) => {
85
153
  const lines = source.split(/\r?\n/);
86
- const edocLines = lines.filter((line) => /NUM\.?\s*E-DOCUMENT/i.test(line));
154
+ const extractedCodes = [];
155
+ const titlePatterns = [/NUMERO\s+DE\s+E-DOCUMENT/i, /NUM\.?\s*E-DOCUMENT/i];
87
156
 
88
- if (edocLines.length === 0) {
89
- return new FieldResult('numEDocumento', false, null);
90
- }
157
+ for (let i = 0; i < lines.length; i++) {
158
+ const line = lines[i];
159
+ const hasTitle = titlePatterns.some((p) => p.test(line));
160
+ if (!hasTitle) continue;
91
161
 
92
- const extractedCodes = [];
93
- edocLines.forEach((line) => {
94
- const afterEdoc = line.replace(/.*NUM\.?\s*E-DOCUMENT\s*/i, '');
95
- const codes = afterEdoc.match(/[A-Z0-9]{13}/g) || [];
96
- extractedCodes.push(...codes);
97
- });
162
+ // Codes on the title line itself
163
+ const codesInLine = line.match(/[A-Z0-9]{13}/g) || [];
164
+ extractedCodes.push(...codesInLine);
165
+
166
+ // Codes on the next few lines (e.g. CLAVE/COMPL. table rows)
167
+ for (let j = 1; j <= 10 && i + j < lines.length; j++) {
168
+ const nextLine = lines[i + j];
169
+ if (/NUMERO|OBSERVACIONES/i.test(nextLine)) break;
170
+ const codesInNextLine = nextLine.match(/[A-Z0-9]{13}/g) || [];
171
+ extractedCodes.push(...codesInNextLine);
172
+ }
173
+ }
98
174
 
99
175
  if (extractedCodes.length === 0) {
100
176
  return new FieldResult('numEDocumento', false, null);
@@ -109,14 +185,18 @@ export const numEDocumentoExtractor = {
109
185
  export const paymentDateExtractor = {
110
186
  field: 'paymentDate',
111
187
  extract: (source) => {
112
- let match = source.match(/2\s+PAGO:\s*(\d{2}\/\d{2}\/\d{4})/);
113
- if (!match) {
114
- match = source.match(/FECHA DE PAGO:\s*(\d{4}\/\d{2}\/\d{2})/);
115
- }
116
- if (!match) {
117
- match = source.match(/PRESENTACION:\s*(\d{2}\/\d{2}\/\d{4})/);
188
+ const patterns = [
189
+ /FECHA\s+DE\s+PAGO:?\s*(\d{2}\/\d{2}\/\d{4})/i,
190
+ /FECHA\s+DE\s+PAGO:?\s*(\d{4}\/\d{2}\/\d{2})/i,
191
+ /2\s+PAGO:\s*(\d{2}\/\d{2}\/\d{4})/,
192
+ /(?:^|\n)\s*PAGO\s+(\d{2}\/\d{2}\/\d{4})/i,
193
+ /PRESENTACION:\s*(\d{2}\/\d{2}\/\d{4})/i,
194
+ ];
195
+ for (const re of patterns) {
196
+ const m = source.match(re);
197
+ if (m) return new FieldResult('paymentDate', true, m[1]);
118
198
  }
119
- return new FieldResult('paymentDate', !!match, match ? match[1] : null);
199
+ return new FieldResult('paymentDate', false, null);
120
200
  },
121
201
  };
122
202
 
@@ -152,14 +232,16 @@ export const coveExtractor = {
152
232
  };
153
233
 
154
234
  // 10) Patente: from the PATENTE/PEDIMENTO/ADUANA header table
235
+ // Fallback A: pago electrónico line "3429 4024126 07" (pedimento_completo).
236
+ // Fallback B: positions 4-7 of numPedimento (always available when found).
155
237
  export const patenteExtractor = {
156
238
  field: 'patente',
157
239
  extract: (source) => {
240
+ // Primary: PATENTE:/PEDIMENTO:/ADUANA: header followed by data line
158
241
  const lines = source.split(/\r?\n/);
159
242
  const patenteHeaderIndex = lines.findIndex((line) =>
160
243
  /PATENTE:.*PEDIMENTO:.*ADUANA:/i.test(line),
161
244
  );
162
-
163
245
  if (patenteHeaderIndex >= 0) {
164
246
  for (let i = patenteHeaderIndex + 1; i < lines.length; i++) {
165
247
  const line = lines[i].trim();
@@ -169,6 +251,20 @@ export const patenteExtractor = {
169
251
  }
170
252
  }
171
253
  }
254
+
255
+ // Fallback A: pago electrónico line "3429 4024126 07"
256
+ const pagoMatch = source.match(/(\d{4})\s+\d{7}\s+\d{2}/);
257
+ if (pagoMatch) return new FieldResult('patente', true, pagoMatch[1]);
258
+
259
+ // Fallback B: positions 4-7 of numPedimento
260
+ const pedMatch = source.match(/\d{2}\s?\d{2}\s?\d{4}\s?\d{7}/);
261
+ if (pedMatch) {
262
+ const num = pedMatch[0].replace(/\s/g, '');
263
+ if (num.length === 15) {
264
+ return new FieldResult('patente', true, num.substring(4, 8));
265
+ }
266
+ }
267
+
172
268
  return new FieldResult('patente', false, null);
173
269
  },
174
270
  };
@@ -17,22 +17,53 @@ export const pedimentoCompletoDefinition = {
17
17
  type: 'pedimento_completo',
18
18
  extensions: ['pdf'],
19
19
  match: (source) => {
20
+ // Hard exclude: "FORMA SIMPLIFICADA" is handled by pedimento_simplificado.
20
21
  if (/FORMA SIMPLIFICADA DE PEDIMENTO/i.test(source)) return false;
21
22
 
23
+ // Hard exclude: "AVISO CONSOLIDADO" shares the header trio but is a
24
+ // different document type handled by aviso_consolidado.
25
+ if (/AVISO\s+CONSOLIDADO/i.test(source)) return false;
26
+
22
27
  const hasHeaderFields =
23
28
  /NUM\.?\s*PEDIMENTO:/i.test(source) &&
24
29
  /CVE\.?\s*PEDIMENTO:/i.test(source) &&
25
30
  /T\.?\s*OPER:/i.test(source);
26
- if (!hasHeaderFields) return false;
31
+ if (hasHeaderFields) {
32
+ const hasCopyMarker =
33
+ /ORIGINAL:\s*ADMINISTRACION GENERAL DE ADUANAS/i.test(source) ||
34
+ /SEGUNDA\s+COPIA/i.test(source) ||
35
+ /TERCERA\s+COPIA/i.test(source) ||
36
+ /COPIA\s+(SIMPLIFICAD[AO])?\s*TRANSPORTISTA/i.test(source) ||
37
+ /DEFINITIVO/i.test(source) ||
38
+ /ANEXO\s+DEL\s+PEDIMENTO/i.test(source) ||
39
+ /\*+FIN\s+DE\s+PEDIMENTO\s*\*+/i.test(source);
40
+ if (hasCopyMarker) return true;
41
+ }
27
42
 
28
- const hasCopyMarker =
29
- /ORIGINAL:\s*ADMINISTRACION GENERAL DE ADUANAS/i.test(source) ||
30
- /SEGUNDA COPIA/i.test(source) ||
31
- /TERCERA COPIA/i.test(source) ||
32
- /COPIA\s+(SIMPLIFICAD[AO])?\s*TRANSPORTISTA/i.test(source) ||
33
- /DEFINITIVO/i.test(source);
43
+ // Fallback clue-counting heuristic for exotic layouts.
44
+ const clues = [
45
+ /\bPEDIMENTO\s*\n.*NUM\.\s*PEDIMENTO:/i,
46
+ /NUM\.\s*PEDIMENTO:\s*T\.OPER:\s*CVE\.PEDIMENTO:\s*REGIMEN:/i,
47
+ /\d{2}\s+\d{2}\s+\d{4}\s+\d{7}\s+[A-Z]{3}\s+[A-Z]{3}/i,
48
+ /CERTIFICACIONES/i,
49
+ /DESTINO\/ORIGEN\s+TIPO\s+CAMBIO\s+PESO\s+BRUTO\s+ADUANA\s+E\/S/i,
50
+ /MEDIOS\s+DE\s+TRANSPORTE/i,
51
+ /DATOS\s+DEL\s+IMPORTADOR\/EXPORTADOR/i,
52
+ /RFC:\s+NOMBRE,\s+DENOMINACION\s+O\s+RAZON\s+SOCIAL:/i,
53
+ /CUADRO\s+DE\s+LIQUIDACION/i,
54
+ /\*\*\*\s+PAGO\s+ELECTRONICO\s+\*\*\*/i,
55
+ /PATENTE:\s+PEDIMENTO:\s+ADUANA:/i,
56
+ /LINEA\s+DE\s+CAPTURA:/i,
57
+ /DATOS\s+DEL\s+PROVEEDOR\s+O\s+COMPRADOR/i,
58
+ /CLAVE\/COMPL\.\s+IDENTIFICADOR/i,
59
+ /ANEXO\s+DEL\s+PEDIMENTO/i,
60
+ /\*+FIN\s+DE\s+PEDIMENTO\s+\*+NUM\.\s+TOTAL\s+DE\s+PARTIDAS:/i,
61
+ /DECLARO\s+BAJO\s+PROTESTA\s+DE\s+DECIR\s+VERDAD/i,
62
+ /PEDIMENTO\s+ELABORADO\s+DE\s+CONFORMIDAD/i,
63
+ ];
34
64
 
35
- return hasCopyMarker;
65
+ const found = clues.filter((clue) => clue.test(source));
66
+ return found.length > clues.length * 0.25;
36
67
  },
37
68
 
38
69
  /**
@@ -7,7 +7,36 @@ export const pedimentoSimplificadoDefinition = {
7
7
  type: 'pedimento_simplificado',
8
8
  extensions: ['pdf'],
9
9
  match: (source) => {
10
- return /FORMA SIMPLIFICADA DE PEDIMENTO/i.test(source);
10
+ // Hard exclude: "AVISO CONSOLIDADO" shares the header trio but is a
11
+ // different document type handled by aviso_consolidado.
12
+ if (/AVISO\s+CONSOLIDADO/i.test(source)) return false;
13
+
14
+ // Fast path: the literal title appears on standard SIMP layouts.
15
+ if (/FORMA SIMPLIFICADA DE PEDIMENTO/i.test(source)) return true;
16
+
17
+ // Some PDFs (single-page anchors) lack that title but still carry the
18
+ // three pedimento header fields. Treat them as simplificado UNLESS they
19
+ // have the multi-page copy markers that uniquely identify a completo.
20
+ const hasHeaderFields =
21
+ /NUM\.?\s*PEDIMENTO:/i.test(source) &&
22
+ /CVE\.?\s*PEDIMENTO:/i.test(source) &&
23
+ /T\.?\s*OPER:/i.test(source);
24
+ if (!hasHeaderFields) return false;
25
+
26
+ const hasCompletoCopyMarker =
27
+ /ORIGINAL:\s*ADMINISTRACION GENERAL DE ADUANAS/i.test(source) ||
28
+ /SEGUNDA\s+COPIA/i.test(source) ||
29
+ /TERCERA\s+COPIA/i.test(source) ||
30
+ /COPIA\s+(SIMPLIFICAD[AO])?\s*TRANSPORTISTA/i.test(source) ||
31
+ /ANEXO\s+DEL\s+PEDIMENTO/i.test(source) ||
32
+ /\*+FIN\s+DE\s+PEDIMENTO\s*\*+/i.test(source);
33
+ if (hasCompletoCopyMarker) return false;
34
+
35
+ // Exclude COVE/eDocument forms that may reference a pedimento in their body.
36
+ if (/COMPROBANTE\s+DE\s+VALOR\s+ELECTR[ÓO]NICO/i.test(source)) return false;
37
+ if (/\bCOVE\b\s*:/i.test(source) && !/PAGO/i.test(source)) return false;
38
+
39
+ return true;
11
40
  },
12
41
 
13
42
  /**
package/src/index.js CHANGED
@@ -1,6 +1,7 @@
1
1
  #!/usr/bin/env node
2
2
  import { Command } from 'commander';
3
3
 
4
+ import datastageCommand from './commands/DatastageCommand.js';
4
5
  import gdriveSyncCommand from './commands/GDriveSyncCommand.js';
5
6
  import identifyCommand from './commands/IdentifyCommand.js';
6
7
  import pollWorkerCommand from './commands/PollWorkerCommand.js';
@@ -26,6 +27,7 @@ class ArelaUploaderCLI {
26
27
  this.scanCommand = scanCommand;
27
28
  this.uploadCommand = new UploadCommand();
28
29
  this.watchCommand = watchCommand;
30
+ this.datastageCommand = datastageCommand;
29
31
 
30
32
  this.#setupProgram();
31
33
  this.#setupCommands();
@@ -204,6 +206,34 @@ class ArelaUploaderCLI {
204
206
  }
205
207
  });
206
208
 
209
+ // Datastage command — upload monthly datastage *.zip files from a directory
210
+ this.program
211
+ .command('datastage')
212
+ .description(
213
+ '📦 Upload monthly datastage *.zip files from a directory to Arela',
214
+ )
215
+ .requiredOption(
216
+ '--dir <path>',
217
+ 'Directory containing *.zip files (non-recursive)',
218
+ )
219
+ .option(
220
+ '--api <target>',
221
+ 'API target: default|agencia|cliente',
222
+ 'default',
223
+ )
224
+ .option('--retry-failed', 'Re-attempt files in failed status')
225
+ .option('--show-stats', 'Print final stats summary')
226
+ .action(async (options) => {
227
+ try {
228
+ if (options.api && options.api !== 'default') {
229
+ appConfig.setApiTarget(options.api);
230
+ }
231
+ await this.datastageCommand.execute(options);
232
+ } catch (error) {
233
+ this.errorHandler.handleFatalError(error, { command: 'datastage' });
234
+ }
235
+ });
236
+
207
237
  // Detection command
208
238
  this.program
209
239
  .command('detect')
@@ -335,6 +365,18 @@ class ArelaUploaderCLI {
335
365
  'Number of files to process in each batch',
336
366
  '100',
337
367
  )
368
+ .option(
369
+ '--table <tableName>',
370
+ 'Process only this scan table (instead of all instance tables)',
371
+ )
372
+ .option(
373
+ '--reset-attempts',
374
+ 'Reset detection_attempts to 0 before processing so previously-failed files are retried',
375
+ )
376
+ .option(
377
+ '--path-prefix <mapping>',
378
+ 'Remap file path prefix for cross-platform access. Format: FROM:TO e.g. "O:/=/Volumes/nas/"',
379
+ )
338
380
  .option('--show-stats', 'Show performance statistics')
339
381
  .action(async (options) => {
340
382
  try {
@@ -0,0 +1,240 @@
1
+ import FormData from 'form-data';
2
+ import fs from 'fs';
3
+ import { Agent } from 'http';
4
+ import { Agent as HttpsAgent } from 'https';
5
+ import fetch from 'node-fetch';
6
+ import path from 'path';
7
+
8
+ import appConfig from '../config/config.js';
9
+ import logger from './LoggingService.js';
10
+
11
+ /**
12
+ * Datastage API Service
13
+ * Handles API communication for the arela datastage command:
14
+ * - tracking endpoints under /api/uploader/datastage/*
15
+ * - zip upload endpoint POST /api/datastage (multipart, field: zipFile)
16
+ */
17
+ export class DatastageApiService {
18
+ /**
19
+ * @param {string|null} apiTarget - 'default'|'agencia'|'cliente'
20
+ */
21
+ constructor(apiTarget = null) {
22
+ this.apiTarget = apiTarget;
23
+ const apiConfig = appConfig.getApiConfig(apiTarget);
24
+ this.baseUrl = apiConfig.baseUrl;
25
+ this.token = apiConfig.token;
26
+
27
+ const maxApiConnections = parseInt(process.env.MAX_API_CONNECTIONS) || 10;
28
+ const connectionTimeout =
29
+ parseInt(process.env.API_CONNECTION_TIMEOUT) || 300000;
30
+
31
+ this.maxRetries = parseInt(process.env.API_MAX_RETRIES) || 3;
32
+ this.useExponentialBackoff =
33
+ process.env.API_RETRY_EXPONENTIAL_BACKOFF !== 'false';
34
+ this.fixedRetryDelay = parseInt(process.env.API_RETRY_DELAY) || 1000;
35
+
36
+ const agentOpts = {
37
+ keepAlive: true,
38
+ keepAliveMsecs: 30000,
39
+ maxSockets: maxApiConnections,
40
+ maxFreeSockets: Math.ceil(maxApiConnections / 2),
41
+ maxTotalSockets: maxApiConnections + 5,
42
+ timeout: connectionTimeout,
43
+ scheduling: 'fifo',
44
+ };
45
+ this.httpAgent = new Agent(agentOpts);
46
+ this.httpsAgent = new HttpsAgent(agentOpts);
47
+
48
+ logger.debug(
49
+ `🔗 Datastage API Service configured (target=${apiTarget || 'default'})`,
50
+ );
51
+ }
52
+
53
+ #getAgent(url) {
54
+ return url.startsWith('https://') ? this.httpsAgent : this.httpAgent;
55
+ }
56
+
57
+ #isRetryableError(error, response = null) {
58
+ if (
59
+ error?.code === 'ECONNRESET' ||
60
+ error?.code === 'ETIMEDOUT' ||
61
+ error?.code === 'ECONNREFUSED' ||
62
+ error?.code === 'ENOTFOUND' ||
63
+ error?.code === 'EAI_AGAIN'
64
+ ) {
65
+ return true;
66
+ }
67
+ if (response) {
68
+ const s = response.status;
69
+ if (s === 429 || (s >= 500 && s < 600)) return true;
70
+ }
71
+ if (error?.message && error.message.includes('timeout')) return true;
72
+ return false;
73
+ }
74
+
75
+ #calculateBackoff(attempt) {
76
+ if (!this.useExponentialBackoff) {
77
+ const jitter = this.fixedRetryDelay * 0.2 * (Math.random() * 2 - 1);
78
+ return Math.floor(this.fixedRetryDelay + jitter);
79
+ }
80
+ const baseDelay = 1000;
81
+ const maxDelay = 16000;
82
+ const delay = Math.min(baseDelay * Math.pow(2, attempt - 1), maxDelay);
83
+ const jitter = delay * 0.2 * (Math.random() * 2 - 1);
84
+ return Math.floor(delay + jitter);
85
+ }
86
+
87
+ #sleep(ms) {
88
+ return new Promise((r) => setTimeout(r, ms));
89
+ }
90
+
91
+ async #requestJson(endpoint, method = 'GET', body = null, headers = {}) {
92
+ const url = `${this.baseUrl}${endpoint}`;
93
+ const options = {
94
+ method,
95
+ headers: {
96
+ 'x-api-key': this.token,
97
+ 'Content-Type': 'application/json',
98
+ ...headers,
99
+ },
100
+ agent: this.#getAgent(url),
101
+ };
102
+ if (body) options.body = JSON.stringify(body);
103
+
104
+ let lastError;
105
+ let lastResponse = null;
106
+ const retries = this.maxRetries;
107
+
108
+ for (let attempt = 1; attempt <= retries + 1; attempt++) {
109
+ try {
110
+ const response = await fetch(url, options);
111
+ lastResponse = response;
112
+ if (!response.ok) {
113
+ const errorText = await response.text();
114
+ let errorMessage = `API ${method} ${endpoint} failed: ${response.status} ${response.statusText}`;
115
+ try {
116
+ const j = JSON.parse(errorText);
117
+ errorMessage = j.message || errorMessage;
118
+ } catch {
119
+ errorMessage = errorText || errorMessage;
120
+ }
121
+ const err = new Error(errorMessage);
122
+ err.status = response.status;
123
+ if (this.#isRetryableError(err, response) && attempt <= retries) {
124
+ const d = this.#calculateBackoff(attempt);
125
+ logger.warn(
126
+ `Retrying ${method} ${endpoint} (attempt ${attempt}/${retries + 1}) in ${d}ms: ${errorMessage}`,
127
+ );
128
+ await this.#sleep(d);
129
+ continue;
130
+ }
131
+ throw err;
132
+ }
133
+ return await response.json();
134
+ } catch (error) {
135
+ lastError = error;
136
+ if (this.#isRetryableError(error, lastResponse) && attempt <= retries) {
137
+ const d = this.#calculateBackoff(attempt);
138
+ logger.warn(
139
+ `Retrying ${method} ${endpoint} (attempt ${attempt}/${retries + 1}) in ${d}ms: ${error.message}`,
140
+ );
141
+ await this.#sleep(d);
142
+ continue;
143
+ }
144
+ throw error;
145
+ }
146
+ }
147
+ throw lastError;
148
+ }
149
+
150
+ // --- Tracking endpoints ---
151
+
152
+ async registerUpload({
153
+ absolutePath,
154
+ fileName,
155
+ sizeBytes,
156
+ fileModifiedAt,
157
+ sourceDirectory,
158
+ }) {
159
+ return this.#requestJson('/api/uploader/datastage/register', 'POST', {
160
+ absolutePath,
161
+ fileName,
162
+ sizeBytes,
163
+ fileModifiedAt,
164
+ sourceDirectory,
165
+ });
166
+ }
167
+
168
+ async getPending(sourceDirectory = null) {
169
+ const qs = sourceDirectory
170
+ ? `?sourceDirectory=${encodeURIComponent(sourceDirectory)}`
171
+ : '';
172
+ return this.#requestJson(`/api/uploader/datastage/pending${qs}`, 'GET');
173
+ }
174
+
175
+ async getStats(sourceDirectory = null) {
176
+ const qs = sourceDirectory
177
+ ? `?sourceDirectory=${encodeURIComponent(sourceDirectory)}`
178
+ : '';
179
+ return this.#requestJson(`/api/uploader/datastage/stats${qs}`, 'GET');
180
+ }
181
+
182
+ async markUploaded(id, { datastageId, folio }) {
183
+ return this.#requestJson(
184
+ `/api/uploader/datastage/${id}/mark-uploaded`,
185
+ 'PATCH',
186
+ { datastageId, folio },
187
+ );
188
+ }
189
+
190
+ async markFailed(id, error) {
191
+ return this.#requestJson(
192
+ `/api/uploader/datastage/${id}/mark-failed`,
193
+ 'PATCH',
194
+ { error: String(error || 'unknown') },
195
+ );
196
+ }
197
+
198
+ // --- Zip upload ---
199
+
200
+ /**
201
+ * Upload a single zip file to POST /api/datastage (multipart, field name 'zipFile').
202
+ * Returns the created Datastage row { id, folio, ... }.
203
+ */
204
+ async uploadZip(localPath) {
205
+ const url = `${this.baseUrl}/api/datastage`;
206
+ const form = new FormData();
207
+ const fileName = path.basename(localPath);
208
+ form.append('zipFile', fs.createReadStream(localPath), {
209
+ filename: fileName,
210
+ contentType: 'application/zip',
211
+ });
212
+
213
+ const response = await fetch(url, {
214
+ method: 'POST',
215
+ headers: {
216
+ 'x-api-key': this.token,
217
+ ...form.getHeaders(),
218
+ },
219
+ body: form,
220
+ agent: this.#getAgent(url),
221
+ });
222
+
223
+ if (!response.ok) {
224
+ const text = await response.text();
225
+ let msg = `Datastage upload failed: ${response.status} ${response.statusText}`;
226
+ try {
227
+ const j = JSON.parse(text);
228
+ msg = j.message || msg;
229
+ } catch {
230
+ msg = text || msg;
231
+ }
232
+ const err = new Error(msg);
233
+ err.status = response.status;
234
+ throw err;
235
+ }
236
+ return await response.json();
237
+ }
238
+ }
239
+
240
+ export default DatastageApiService;
@@ -375,6 +375,20 @@ export class ScanApiService {
375
375
  * @param {boolean} allTypes - When true, fetch all supported file types instead of just likely-simplificado PDFs
376
376
  * @returns {Promise<Object>} { data: Array, hasMore: boolean }
377
377
  */
378
+ /**
379
+ * Get a single file record by ID (for single-file identify mode).
380
+ * @param {string} tableName - Scan table name (with or without cli. prefix)
381
+ * @param {string} fileId - UUID of the file record
382
+ * @returns {Promise<{ id: string, file_name: string, file_extension: string, absolute_path: string }>}
383
+ */
384
+ async getFileRecord(tableName, fileId) {
385
+ const cleanTable = tableName.replace(/^cli\./, '');
386
+ const url = `/api/uploader/scan/file-record?tableName=${encodeURIComponent(cleanTable)}&fileId=${encodeURIComponent(fileId)}`;
387
+ const result = await this.#request(url, 'GET');
388
+ logger.debug(`Fetched file record ${fileId} from ${cleanTable}`);
389
+ return result;
390
+ }
391
+
378
392
  async fetchPdfsForDetection(
379
393
  tableName,
380
394
  offset = 0,
@@ -398,6 +412,22 @@ export class ScanApiService {
398
412
  return result;
399
413
  }
400
414
 
415
+ /**
416
+ * Reset detection_attempts to 0 for undetected files so they can be re-processed.
417
+ * @param {string} tableName - Target scan table name
418
+ * @param {string|null} absolutePath - If provided, reset only this specific file
419
+ * @returns {Promise<{ reset: number }>}
420
+ */
421
+ async resetDetectionAttempts(tableName, absolutePath = null) {
422
+ let url = `/api/uploader/scan/reset-detection-attempts?tableName=${encodeURIComponent(tableName)}`;
423
+ if (absolutePath) {
424
+ url += `&absolutePath=${encodeURIComponent(absolutePath)}`;
425
+ }
426
+ const result = await this.#request(url, 'PATCH');
427
+ logger.debug(`Reset ${result.reset} detection attempt(s) in ${tableName}`);
428
+ return result;
429
+ }
430
+
401
431
  /**
402
432
  * Batch update detection results
403
433
  * @param {string} tableName - Target table name