@arela/uploader 1.0.20 → 1.0.22

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -9,6 +9,24 @@ import ErrorHandler from '../errors/ErrorHandler.js';
9
9
  import { ConfigurationError } from '../errors/ErrorTypes.js';
10
10
  import FileDetectionService from '../file-detection.js';
11
11
 
12
+ /**
13
+ * Paid pedimento detected_type values. `pedimento_completo_xml` is included
14
+ * even though the XML matcher is currently disabled in the registry so that
15
+ * re-enabling it requires no changes here.
16
+ */
17
+ const DETECTED_PEDIMENTO_TYPES = new Set([
18
+ 'pedimento_simplificado',
19
+ 'pedimento_completo',
20
+ 'pedimento_completo_xml',
21
+ ]);
22
+
23
+ /** Unpaid pedimento detected_type values (proforma variants). */
24
+ const PROFORMA_TYPES = new Set([
25
+ 'proforma',
26
+ 'proforma_completo',
27
+ 'proforma_completo_xml',
28
+ ]);
29
+
12
30
  /**
13
31
  * Identify Command Handler
14
32
  * Optimized replacement for "detect --detect-pdfs"
@@ -56,29 +74,95 @@ export class IdentifyCommand {
56
74
  const scanConfig = appConfig.getScanConfig();
57
75
  const batchSize = parseInt(options.batchSize) || 100;
58
76
 
77
+ // Parse optional path prefix mapping: "O:/=/Volumes/nas/"
78
+ const pathPrefixMap = options.pathPrefix
79
+ ? this.#parsePathPrefix(options.pathPrefix)
80
+ : null;
81
+
59
82
  logger.info('šŸ” Starting arela identify command');
60
83
  logger.info(`šŸŽÆ API Target: ${apiTarget}`);
61
84
  logger.info(`šŸ“¦ Batch Size: ${batchSize}`);
85
+ if (options.table) logger.info(`šŸ“Œ Target table: ${options.table}`);
86
+ if (options.resetAttempts)
87
+ logger.info('ā™»ļø Reset detection attempts: ON');
88
+ if (pathPrefixMap)
89
+ logger.info(
90
+ `šŸ—ŗ Path prefix map: ${pathPrefixMap.from} → ${pathPrefixMap.to}`,
91
+ );
62
92
 
63
- // Fetch all tables for this instance
64
- logger.info('\nšŸ“Š Fetching instance tables...');
65
- const tables = await this.scanApiService.getInstanceTables(
66
- scanConfig.companySlug,
67
- scanConfig.serverId,
68
- scanConfig.basePathFull,
69
- );
93
+ // Resolve the list of tables to process
94
+ let tables;
95
+ if (options.fileId && options.table) {
96
+ // Single-file mode — identify exactly one file record
97
+ logger.info(
98
+ `\nšŸŽÆ Single-file mode: ${options.table} / ${options.fileId}`,
99
+ );
100
+ this.#reportProgress(0, `Fetching file record ${options.fileId}...`);
101
+ const record = await this.scanApiService.getFileRecord(
102
+ options.table,
103
+ options.fileId,
104
+ );
105
+ const results = await this.#detectFilesLocally(
106
+ [record],
107
+ 1,
108
+ pathPrefixMap,
109
+ );
110
+ const updates = results.filter((r) => r !== null);
111
+ if (updates.length > 0) {
112
+ await this.scanApiService.batchUpdateDetection(
113
+ options.table,
114
+ updates,
115
+ );
116
+ }
117
+ this.#reportProgress(100, `Single-file identification complete`);
118
+ logger.success(`\nāœ… Single-file identification complete`);
119
+ const firstUpdate = updates[0];
120
+ return {
121
+ processed: 1,
122
+ detected: updates.length,
123
+ proformas: 0,
124
+ errors: 1 - updates.length,
125
+ detectedType: firstUpdate?.detectedType ?? null,
126
+ detectedPedimento: firstUpdate?.detectedPedimento ?? null,
127
+ rfc: firstUpdate?.rfc ?? null,
128
+ arelaPath: firstUpdate?.arelaPath ?? null,
129
+ };
130
+ } else if (options.table) {
131
+ // Single-table mode — no need to match instance tables
132
+ tables = [{ tableName: options.table }];
133
+ logger.info(`\nšŸ“Œ Single-table mode: ${options.table}`);
134
+ } else {
135
+ logger.info('\nšŸ“Š Fetching instance tables...');
136
+ tables = await this.scanApiService.getInstanceTables(
137
+ scanConfig.companySlug,
138
+ scanConfig.serverId,
139
+ scanConfig.basePathFull,
140
+ );
141
+
142
+ if (tables.length === 0) {
143
+ throw new ConfigurationError(
144
+ 'No tables found for this instance. Run "arela scan" first.',
145
+ );
146
+ }
70
147
 
71
- if (tables.length === 0) {
72
- throw new ConfigurationError(
73
- 'No tables found for this instance. Run "arela scan" first.',
148
+ logger.info(
149
+ `šŸ“‹ Found ${tables.length} table${tables.length === 1 ? '' : 's'} to process`,
74
150
  );
151
+ for (const table of tables) {
152
+ logger.info(` - ${table.tableName}`);
153
+ }
75
154
  }
76
155
 
77
- logger.info(
78
- `šŸ“‹ Found ${tables.length} table${tables.length === 1 ? '' : 's'} to process`,
79
- );
80
- for (const table of tables) {
81
- logger.info(` - ${table.tableName}`);
156
+ // Optionally reset detection attempts so previously-failed files are retried
157
+ if (options.resetAttempts) {
158
+ for (const table of tables) {
159
+ const { reset } = await this.scanApiService.resetDetectionAttempts(
160
+ table.tableName,
161
+ );
162
+ logger.info(
163
+ `ā™»ļø Reset ${reset} detection attempt(s) in ${table.tableName}`,
164
+ );
165
+ }
82
166
  }
83
167
 
84
168
  // Process each table
@@ -109,6 +193,7 @@ export class IdentifyCommand {
109
193
  table.tableName,
110
194
  batchSize,
111
195
  startTime,
196
+ pathPrefixMap,
112
197
  );
113
198
 
114
199
  totalStats.processed += stats.processed;
@@ -165,7 +250,7 @@ export class IdentifyCommand {
165
250
  * @param {number} startTime - Start time for speed calculation
166
251
  * @returns {Promise<Object>} Processing statistics
167
252
  */
168
- async #processTable(tableName, batchSize, startTime) {
253
+ async #processTable(tableName, batchSize, startTime, pathPrefixMap = null) {
169
254
  // Get detection statistics first (allTypes=true to count all supported file types)
170
255
  const initialStats = await this.scanApiService.getDetectionStats(
171
256
  tableName,
@@ -237,7 +322,11 @@ export class IdentifyCommand {
237
322
  const files = response.data;
238
323
 
239
324
  // Detect files locally with concurrent processing
240
- const detectionResults = await this.#detectFilesLocally(files, 10);
325
+ const detectionResults = await this.#detectFilesLocally(
326
+ files,
327
+ 10,
328
+ pathPrefixMap,
329
+ );
241
330
 
242
331
  // Batch update to API
243
332
  const updateResult = await this.scanApiService.batchUpdateDetection(
@@ -247,11 +336,13 @@ export class IdentifyCommand {
247
336
 
248
337
  // Update statistics
249
338
  processedCount += files.length;
250
- detectedCount += detectionResults.filter(
251
- (r) => r.detectedType === 'pedimento_simplificado',
339
+ // "Detected" counts paid pedimentos of any flavour (simplificado,
340
+ // completo, completo_xml when enabled).
341
+ detectedCount += detectionResults.filter((r) =>
342
+ DETECTED_PEDIMENTO_TYPES.has(r.detectedType),
252
343
  ).length;
253
- proformaCount += detectionResults.filter(
254
- (r) => r.detectedType === 'proforma',
344
+ proformaCount += detectionResults.filter((r) =>
345
+ PROFORMA_TYPES.has(r.detectedType),
255
346
  ).length;
256
347
  errorCount += detectionResults.filter((r) => r.detectionError).length;
257
348
 
@@ -281,7 +372,7 @@ export class IdentifyCommand {
281
372
  * @param {number} concurrency - Maximum concurrent detections
282
373
  * @returns {Promise<Array>} Detection results
283
374
  */
284
- async #detectFilesLocally(files, concurrency = 10) {
375
+ async #detectFilesLocally(files, concurrency = 10, pathPrefixMap = null) {
285
376
  const limit = pLimit(concurrency);
286
377
  const basePath = appConfig.getBasePath();
287
378
 
@@ -289,7 +380,17 @@ export class IdentifyCommand {
289
380
  limit(async () => {
290
381
  try {
291
382
  // Check if file exists on filesystem
292
- const absolutePath = file.absolute_path;
383
+ let absolutePath = file.absolute_path;
384
+
385
+ // Apply cross-platform path prefix mapping if configured
386
+ if (
387
+ pathPrefixMap &&
388
+ absolutePath &&
389
+ absolutePath.startsWith(pathPrefixMap.from)
390
+ ) {
391
+ absolutePath =
392
+ pathPrefixMap.to + absolutePath.slice(pathPrefixMap.from.length);
393
+ }
293
394
 
294
395
  if (!fs.existsSync(absolutePath)) {
295
396
  return {
@@ -324,8 +425,8 @@ export class IdentifyCommand {
324
425
  // Detect using existing FileDetectionService
325
426
  const result = await this.detectionService.detectFile(absolutePath);
326
427
 
327
- // If detection succeeded and found a pedimento_simplificado (paid)
328
- if (result.detectedType === 'pedimento_simplificado') {
428
+ // If detection succeeded and found a paid pedimento (any variant)
429
+ if (DETECTED_PEDIMENTO_TYPES.has(result.detectedType)) {
329
430
  return {
330
431
  id: file.id,
331
432
  detectedType: result.detectedType,
@@ -338,8 +439,8 @@ export class IdentifyCommand {
338
439
  };
339
440
  }
340
441
 
341
- // If detection succeeded and found a proforma (unpaid pedimento)
342
- if (result.detectedType === 'proforma') {
442
+ // If detection succeeded and found a proforma (any variant)
443
+ if (PROFORMA_TYPES.has(result.detectedType)) {
343
444
  return {
344
445
  id: file.id,
345
446
  detectedType: result.detectedType,
@@ -365,7 +466,7 @@ export class IdentifyCommand {
365
466
  detectionError = `DETECTION_ERROR: ${result.error}`;
366
467
  } else if (isDefinitelyNotPedimento) {
367
468
  detectionError =
368
- 'NOT_PEDIMENTO: File does not match pedimento-simplificado pattern. Missing key markers: "FORMA SIMPLIFICADA DE PEDIMENTO".';
469
+ 'NOT_PEDIMENTO: File does not match any pedimento pattern. Missing key markers (e.g. "FORMA SIMPLIFICADA DE PEDIMENTO" or "NUM. PEDIMENTO:" + copy markers).';
369
470
  } else {
370
471
  // Partial match - might be a pedimento with missing fields
371
472
  const missingFields = this.#getMissingFields(result);
@@ -434,18 +535,23 @@ export class IdentifyCommand {
434
535
  return false;
435
536
  }
436
537
 
437
- // If it was detected as a proforma, it's related to a pedimento structure
438
- if (result.detectedType === 'proforma') {
538
+ // If it was detected as a proforma (any variant), it's related to a
539
+ // pedimento structure — not "definitely not".
540
+ if (PROFORMA_TYPES.has(result.detectedType)) {
439
541
  return false;
440
542
  }
441
543
 
442
- // Check if the text contains the required pedimento marker
443
- // This must match the criteria in pedimento-simplificado.js match function
544
+ // Check if the text contains any required pedimento marker. This must
545
+ // stay aligned with the `match()` predicates in pedimento-simplificado.js
546
+ // and pedimento-completo.js.
444
547
  const text = result.text || '';
445
- const hasRequiredMarker = /FORMA SIMPLIFICADA DE PEDIMENTO/i.test(text);
548
+ const hasSimplificadoMarker = /FORMA SIMPLIFICADA DE PEDIMENTO/i.test(text);
549
+ const hasCompletoMarkers =
550
+ /NUM\.?\s*PEDIMENTO:/i.test(text) &&
551
+ /CVE\.?\s*PEDIMENTO:/i.test(text) &&
552
+ /T\.?\s*OPER:/i.test(text);
446
553
 
447
- // If the required marker is not found, it's definitely not a pedimento
448
- return !hasRequiredMarker;
554
+ return !hasSimplificadoMarker && !hasCompletoMarkers;
449
555
  }
450
556
 
451
557
  /**
@@ -518,6 +624,44 @@ export class IdentifyCommand {
518
624
  }
519
625
  }
520
626
  }
627
+
628
+ /**
629
+ * Parse a path prefix mapping string such as "O:/=/Volumes/nas/" into { from, to }.
630
+ * Supports both "FROM=TO" and "FROM:TO" separators.
631
+ * @private
632
+ * @param {string} mapping
633
+ * @returns {{ from: string, to: string }}
634
+ */
635
+ #parsePathPrefix(mapping) {
636
+ // Support either "FROM=TO" or "FROM:TO" as separator
637
+ const eqIdx = mapping.indexOf('=');
638
+ const colonIdx = mapping.indexOf(':');
639
+
640
+ let sep = -1;
641
+ // "O:/=/Volumes" — the colon inside "O:/" is part of a Windows drive letter; prefer '=' separator
642
+ if (eqIdx !== -1) {
643
+ sep = eqIdx;
644
+ } else if (colonIdx !== -1) {
645
+ sep = colonIdx;
646
+ }
647
+
648
+ if (sep === -1) {
649
+ throw new Error(
650
+ `Invalid --path-prefix format: "${mapping}". Expected "FROM=TO" e.g. "O:/=/Volumes/nas/"`,
651
+ );
652
+ }
653
+
654
+ const from = mapping.slice(0, sep);
655
+ const to = mapping.slice(sep + 1);
656
+
657
+ if (!from || !to) {
658
+ throw new Error(
659
+ `Invalid --path-prefix format: "${mapping}". Both FROM and TO parts must be non-empty.`,
660
+ );
661
+ }
662
+
663
+ return { from, to };
664
+ }
521
665
  }
522
666
 
523
667
  // Export singleton instance
@@ -342,6 +342,8 @@ export class PollWorkerCommand {
342
342
  batchSize: 100,
343
343
  showStats: false,
344
344
  onProgress,
345
+ ...(job.fileId && { fileId: job.fileId }),
346
+ ...(job.table && { table: job.table }),
345
347
  };
346
348
 
347
349
  return identifyCommand.execute(options);
@@ -592,10 +592,13 @@ export class ScanCommand {
592
592
  const relativePath = PathNormalizer.getRelativePath(filePath, basePath);
593
593
  const absolutePath = PathNormalizer.normalizeSeparators(filePath);
594
594
 
595
- // Determine if this is potentially a simplificado document
596
- // Must be a PDF and filename must contain 'simp' (case-insensitive)
595
+ // Determine if this file is potentially a pedimento (simplificado, completo, or CoveFact).
596
+ // PDFs whose filename contains 'simp', 'pedim' or 'covefact' (case-insensitive)
597
+ // are flagged so the identify stage prioritizes them. The column name
598
+ // `likely_simplificado` is preserved for backwards compatibility; semantics
599
+ // are broader (any likely pedimento PDF).
597
600
  const likelySimplificado =
598
- fileExtension === 'pdf' && fileName.toLowerCase().includes('simp');
601
+ fileExtension === 'pdf' && /(simp|pedim|covefact)/i.test(fileName);
599
602
 
600
603
  return {
601
604
  fileName,
@@ -24,6 +24,7 @@ class Config {
24
24
  this.watch = this.#loadWatchConfig();
25
25
  this.redis = this.#loadRedisConfig();
26
26
  this.worker = this.#loadWorkerConfig();
27
+ this.gdrive = this.#loadGDriveConfig();
27
28
  }
28
29
 
29
30
  /**
@@ -36,10 +37,10 @@ class Config {
36
37
  const __dirname = path.dirname(__filename);
37
38
  const packageJsonPath = path.resolve(__dirname, '../../package.json');
38
39
  const packageJson = JSON.parse(fs.readFileSync(packageJsonPath, 'utf-8'));
39
- return packageJson.version || '1.0.20';
40
+ return packageJson.version || '1.0.22';
40
41
  } catch (error) {
41
42
  console.warn('āš ļø Could not read package.json version, using fallback');
42
- return '1.0.20';
43
+ return '1.0.22';
43
44
  }
44
45
  }
45
46
 
@@ -579,6 +580,91 @@ class Config {
579
580
  return process.env.ARELA_SERVER_ID || null;
580
581
  }
581
582
 
583
+ /**
584
+ * Load Google Drive sync configuration
585
+ * @private
586
+ */
587
+ #loadGDriveConfig() {
588
+ const rootFolderId = process.env.GDRIVE_ROOT_FOLDER_ID || null;
589
+
590
+ // Default mirror destination: <UPLOAD_BASE_PATH>/_gdrive_mirror
591
+ let localMirrorPath = process.env.GDRIVE_LOCAL_MIRROR_PATH || null;
592
+ if (!localMirrorPath && process.env.UPLOAD_BASE_PATH) {
593
+ const base = process.env.UPLOAD_BASE_PATH;
594
+ if (base !== '*') {
595
+ localMirrorPath = path.resolve(
596
+ PathNormalizer.toAbsolutePath(base),
597
+ '_gdrive_mirror',
598
+ );
599
+ }
600
+ } else if (localMirrorPath) {
601
+ localMirrorPath = PathNormalizer.toAbsolutePath(localMirrorPath);
602
+ }
603
+
604
+ return {
605
+ rootFolderId,
606
+ localMirrorPath,
607
+ serviceAccountFile: process.env.GDRIVE_SERVICE_ACCOUNT_FILE || null,
608
+ serviceAccountJson: process.env.GDRIVE_SERVICE_ACCOUNT_JSON || null,
609
+ skipNativeDocs: process.env.GDRIVE_SKIP_NATIVE_DOCS !== 'false',
610
+ followShortcuts: process.env.GDRIVE_FOLLOW_SHORTCUTS !== 'false',
611
+ concurrency: parseInt(process.env.GDRIVE_CONCURRENCY) || 5,
612
+ pageSize: parseInt(process.env.GDRIVE_PAGE_SIZE) || 1000,
613
+ maxFileSizeBytes:
614
+ parseInt(process.env.GDRIVE_MAX_FILE_SIZE_BYTES) ||
615
+ 2 * 1024 * 1024 * 1024, // 2GB default
616
+ };
617
+ }
618
+
619
+ /**
620
+ * Get Google Drive configuration
621
+ * @returns {Object} GDrive sync settings
622
+ */
623
+ getGDriveConfig() {
624
+ return this.gdrive;
625
+ }
626
+
627
+ /**
628
+ * Validate Google Drive configuration
629
+ * @throws {Error} If required gdrive configuration is missing
630
+ */
631
+ validateGDriveConfig() {
632
+ const errors = [];
633
+
634
+ if (!this.gdrive.rootFolderId) {
635
+ errors.push(
636
+ 'GDRIVE_ROOT_FOLDER_ID is required (Drive folder ID to sync)',
637
+ );
638
+ }
639
+
640
+ if (!this.gdrive.localMirrorPath) {
641
+ errors.push(
642
+ 'Could not determine local mirror path. Set GDRIVE_LOCAL_MIRROR_PATH or UPLOAD_BASE_PATH',
643
+ );
644
+ }
645
+
646
+ if (!this.gdrive.serviceAccountFile && !this.gdrive.serviceAccountJson) {
647
+ errors.push(
648
+ 'Either GDRIVE_SERVICE_ACCOUNT_FILE (path to JSON) or GDRIVE_SERVICE_ACCOUNT_JSON (inline JSON) is required',
649
+ );
650
+ }
651
+
652
+ if (this.gdrive.serviceAccountFile) {
653
+ const resolved = PathNormalizer.toAbsolutePath(
654
+ this.gdrive.serviceAccountFile,
655
+ );
656
+ if (!fs.existsSync(resolved)) {
657
+ errors.push(`GDRIVE_SERVICE_ACCOUNT_FILE not found: ${resolved}`);
658
+ }
659
+ }
660
+
661
+ if (errors.length > 0) {
662
+ throw new Error(
663
+ 'āš ļø Google Drive configuration errors:\n - ' + errors.join('\n - '),
664
+ );
665
+ }
666
+ }
667
+
582
668
  /**
583
669
  * Check if worker mode is available (Redis configured)
584
670
  * @returns {boolean}
@@ -2,6 +2,9 @@
2
2
  import { dodaPdfDefinition } from './document-types/doda-pdf.js';
3
3
  import { dodaXmlDefinition } from './document-types/doda-xml.js';
4
4
  import { facturasComerciales } from './document-types/facturas-comerciales.js';
5
+ import { pedimentoCompletoDefinition } from './document-types/pedimento-completo.js';
6
+ // TODO: enable XML pedimento detection — implementation ready in pedimento-completo-xml.js
7
+ // import { pedimentoCompletoXmlDefinition } from './document-types/pedimento-completo-xml.js';
5
8
  import { pedimentoSimplificadoDefinition } from './document-types/pedimento-simplificado.js';
6
9
  import { proformaDefinition } from './document-types/proforma.js';
7
10
  import { supportDocumentDefinition } from './document-types/support-document.js';
@@ -41,6 +44,12 @@ export class DocumentTypeDefinition {
41
44
  // proformaDefinition is kept as reference but not used directly in the registry since resolution is handled post-extraction.
42
45
  const documentTypes = [
43
46
  pedimentoSimplificadoDefinition,
47
+ pedimentoCompletoDefinition,
48
+ // TODO: enable XML pedimento detection — uncomment the next line and the
49
+ // matching import at the top of this file. All downstream code
50
+ // (composeArelaPath, arela-api SQL filters, IdentifyCommand counters)
51
+ // already accepts `pedimento_completo_xml`.
52
+ // pedimentoCompletoXmlDefinition,
44
53
  supportDocumentDefinition,
45
54
  dodaPdfDefinition,
46
55
  dodaXmlDefinition,
@@ -96,12 +105,13 @@ export function extractDocumentFields(source, fileExtension, filePath) {
96
105
 
97
106
  console.log(` → Resolved type: ${resolvedType}`);
98
107
 
99
- // Extract pedimento number and year
108
+ // Extract pedimento number and year. `filePath` is forwarded so XML
109
+ // matchers (which compose numPedimento from the filename) can use it.
100
110
  const pedimento = docType.extractNumPedimento
101
- ? docType.extractNumPedimento(source, fields)
111
+ ? docType.extractNumPedimento(source, fields, filePath)
102
112
  : null;
103
113
  const year = docType.extractPedimentoYear
104
- ? docType.extractPedimentoYear(source, fields)
114
+ ? docType.extractPedimentoYear(source, fields, filePath)
105
115
  : null;
106
116
 
107
117
  return [resolvedType, fields, pedimento, year];