@arela/uploader 1.0.17 → 1.0.19
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/commands/IdentifyCommand.js +6 -2
- package/src/commands/PollWorkerCommand.js +65 -1
- package/src/commands/PropagateCommand.js +121 -0
- package/src/commands/PushCommand.js +4 -1
- package/src/commands/ScanCommand.js +19 -5
- package/src/commands/WorkerCommand.js +3 -0
- package/src/config/config.js +26 -8
- package/src/document-type-shared.js +6 -0
- package/src/document-types/doda-pdf.js +121 -0
- package/src/document-types/doda-xml.js +118 -0
- package/src/document-types/facturas-comerciales.js +233 -0
- package/src/services/ScanApiService.js +85 -13
package/package.json
CHANGED
|
@@ -166,8 +166,11 @@ export class IdentifyCommand {
|
|
|
166
166
|
* @returns {Promise<Object>} Processing statistics
|
|
167
167
|
*/
|
|
168
168
|
async #processTable(tableName, batchSize, startTime) {
|
|
169
|
-
// Get detection statistics first
|
|
170
|
-
const initialStats = await this.scanApiService.getDetectionStats(
|
|
169
|
+
// Get detection statistics first (allTypes=true to count all supported file types)
|
|
170
|
+
const initialStats = await this.scanApiService.getDetectionStats(
|
|
171
|
+
tableName,
|
|
172
|
+
true,
|
|
173
|
+
);
|
|
171
174
|
logger.info(` Total PDFs: ${initialStats.totalPdfs}`);
|
|
172
175
|
logger.info(` Detected: ${initialStats.detected}`);
|
|
173
176
|
logger.info(` Pending: ${initialStats.pending}`);
|
|
@@ -224,6 +227,7 @@ export class IdentifyCommand {
|
|
|
224
227
|
tableName,
|
|
225
228
|
0,
|
|
226
229
|
batchSize,
|
|
230
|
+
true, // allTypes: fetch all supported file types, not just likely-simplificado PDFs
|
|
227
231
|
);
|
|
228
232
|
|
|
229
233
|
if (!response.data || response.data.length === 0) {
|
|
@@ -1,8 +1,11 @@
|
|
|
1
|
+
import path from 'path';
|
|
2
|
+
|
|
1
3
|
import logger from '../services/LoggingService.js';
|
|
2
4
|
import { PipelineApiService } from '../services/PipelineApiService.js';
|
|
3
5
|
|
|
4
6
|
import appConfig from '../config/config.js';
|
|
5
7
|
import ErrorHandler from '../errors/ErrorHandler.js';
|
|
8
|
+
import { PathNormalizer } from '../utils/PathNormalizer.js';
|
|
6
9
|
|
|
7
10
|
/**
|
|
8
11
|
* Poll Worker Command Handler
|
|
@@ -234,13 +237,74 @@ export class PollWorkerCommand {
|
|
|
234
237
|
|
|
235
238
|
// Override scan directories if provided
|
|
236
239
|
if (job.scanDirectories && job.scanDirectories.length > 0) {
|
|
237
|
-
|
|
240
|
+
const allAbsolute = job.scanDirectories.every((d) =>
|
|
241
|
+
PathNormalizer.isAbsolutePath(d),
|
|
242
|
+
);
|
|
243
|
+
|
|
244
|
+
if (allAbsolute) {
|
|
245
|
+
const ancestor = this.#commonAncestor(job.scanDirectories);
|
|
246
|
+
// Check if ancestor is meaningful (not just root or a drive letter)
|
|
247
|
+
const isUseful =
|
|
248
|
+
ancestor.length > 1 && !/^[a-zA-Z]:[/\\]?$/.test(ancestor);
|
|
249
|
+
|
|
250
|
+
if (isUseful) {
|
|
251
|
+
// Common ancestor found — set as base path, make sources relative
|
|
252
|
+
process.env.UPLOAD_BASE_PATH = ancestor;
|
|
253
|
+
process.env.ARELA_BASE_PATH_LABEL = ancestor;
|
|
254
|
+
const relativeSources = job.scanDirectories.map(
|
|
255
|
+
(d) => path.relative(ancestor, d) || '.',
|
|
256
|
+
);
|
|
257
|
+
process.env.UPLOAD_SOURCES = relativeSources.join('|');
|
|
258
|
+
} else {
|
|
259
|
+
// Cross-drive or no common ancestor — wildcard base, absolute sources
|
|
260
|
+
process.env.UPLOAD_BASE_PATH = '*';
|
|
261
|
+
process.env.ARELA_BASE_PATH_LABEL = '*';
|
|
262
|
+
process.env.UPLOAD_SOURCES = job.scanDirectories.join('|');
|
|
263
|
+
}
|
|
264
|
+
} else {
|
|
265
|
+
process.env.UPLOAD_SOURCES = job.scanDirectories.join('|');
|
|
266
|
+
}
|
|
238
267
|
}
|
|
239
268
|
|
|
240
269
|
// Override file extensions if provided
|
|
241
270
|
if (job.fileExtensions && job.fileExtensions.length > 0) {
|
|
242
271
|
process.env.UPLOAD_FILE_EXTENSIONS = job.fileExtensions.join(',');
|
|
243
272
|
}
|
|
273
|
+
|
|
274
|
+
// Reload cached config from the updated env vars
|
|
275
|
+
appConfig.reloadScanConfig();
|
|
276
|
+
}
|
|
277
|
+
|
|
278
|
+
/**
|
|
279
|
+
* Compute the longest common ancestor directory of a list of absolute paths.
|
|
280
|
+
* Uses '/' as separator (PathNormalizer normalizes Windows \\ to /).
|
|
281
|
+
* @param {string[]} paths
|
|
282
|
+
* @returns {string}
|
|
283
|
+
*/
|
|
284
|
+
#commonAncestor(paths) {
|
|
285
|
+
if (paths.length === 0) return '/';
|
|
286
|
+
if (paths.length === 1) return paths[0];
|
|
287
|
+
|
|
288
|
+
// Normalize separators so O:\exp\... becomes O:/exp/...
|
|
289
|
+
const normalized = paths.map((p) => PathNormalizer.normalizeSeparators(p));
|
|
290
|
+
const split = normalized.map((p) => p.split('/').filter(Boolean));
|
|
291
|
+
const minLen = Math.min(...split.map((s) => s.length));
|
|
292
|
+
const common = [];
|
|
293
|
+
|
|
294
|
+
for (let i = 0; i < minLen; i++) {
|
|
295
|
+
const seg = split[0][i];
|
|
296
|
+
if (split.every((s) => s[i] === seg)) {
|
|
297
|
+
common.push(seg);
|
|
298
|
+
} else {
|
|
299
|
+
break;
|
|
300
|
+
}
|
|
301
|
+
}
|
|
302
|
+
|
|
303
|
+
// Preserve drive letter format (e.g., 'O:' → 'O:/')
|
|
304
|
+
if (common.length > 0 && /^[a-zA-Z]:$/.test(common[0])) {
|
|
305
|
+
return common[0] + '/' + common.slice(1).join('/');
|
|
306
|
+
}
|
|
307
|
+
return '/' + common.join('/');
|
|
244
308
|
}
|
|
245
309
|
|
|
246
310
|
/**
|
|
@@ -100,6 +100,15 @@ export class PropagateCommand {
|
|
|
100
100
|
totalStats.directoriesProcessed += stats.directoriesProcessed;
|
|
101
101
|
}
|
|
102
102
|
|
|
103
|
+
// Step 5: Cross-table propagation
|
|
104
|
+
// Match files with detected_pedimento in one table to pedimento sources in other tables
|
|
105
|
+
const crossTableStats = await this.#processCrossTablePropagation(
|
|
106
|
+
scanConfig,
|
|
107
|
+
tables,
|
|
108
|
+
);
|
|
109
|
+
totalStats.filesUpdated += crossTableStats.filesUpdated;
|
|
110
|
+
totalStats.filesFailed += crossTableStats.filesFailed;
|
|
111
|
+
|
|
103
112
|
// Show combined results
|
|
104
113
|
const duration = ((Date.now() - this.stats.startTime) / 1000).toFixed(2);
|
|
105
114
|
const filesPerSec =
|
|
@@ -439,6 +448,118 @@ export class PropagateCommand {
|
|
|
439
448
|
};
|
|
440
449
|
}
|
|
441
450
|
|
|
451
|
+
/**
|
|
452
|
+
* Cross-table propagation phase
|
|
453
|
+
* Matches files with detected_pedimento in one table to pedimento sources in other tables.
|
|
454
|
+
* This enables facturas (in a different directory/table) to get arela_path from their pedimento.
|
|
455
|
+
* @private
|
|
456
|
+
* @param {Object} scanConfig - Scan configuration with companySlug, serverId, basePathFull
|
|
457
|
+
* @param {Array} tables - All tables for this instance
|
|
458
|
+
* @returns {Promise<Object>} { filesUpdated, filesFailed }
|
|
459
|
+
*/
|
|
460
|
+
async #processCrossTablePropagation(scanConfig, tables) {
|
|
461
|
+
console.log('\n🔗 Cross-table propagation phase...\n');
|
|
462
|
+
|
|
463
|
+
const stats = { filesUpdated: 0, filesFailed: 0 };
|
|
464
|
+
|
|
465
|
+
// Step 1: Fetch all pedimento sources across all tables
|
|
466
|
+
const pedimentoSources =
|
|
467
|
+
await this.scanApiService.fetchCrossTablePedimentoSources(
|
|
468
|
+
scanConfig.companySlug,
|
|
469
|
+
scanConfig.serverId,
|
|
470
|
+
scanConfig.basePathFull,
|
|
471
|
+
);
|
|
472
|
+
|
|
473
|
+
if (pedimentoSources.length === 0) {
|
|
474
|
+
console.log(
|
|
475
|
+
' ℹ️ No pedimento sources found across tables. Skipping cross-table phase.\n',
|
|
476
|
+
);
|
|
477
|
+
return stats;
|
|
478
|
+
}
|
|
479
|
+
|
|
480
|
+
// Build a map: detected_pedimento → source info
|
|
481
|
+
const sourceMap = new Map();
|
|
482
|
+
for (const source of pedimentoSources) {
|
|
483
|
+
sourceMap.set(source.detected_pedimento, source);
|
|
484
|
+
}
|
|
485
|
+
|
|
486
|
+
console.log(
|
|
487
|
+
` 📋 Found ${sourceMap.size} unique pedimento sources across ${tables.length} tables`,
|
|
488
|
+
);
|
|
489
|
+
|
|
490
|
+
// Step 2: For each table, find orphan files (have pedimento, no arela_path)
|
|
491
|
+
let totalOrphans = 0;
|
|
492
|
+
|
|
493
|
+
for (const table of tables) {
|
|
494
|
+
let offset = 0;
|
|
495
|
+
let hasMore = true;
|
|
496
|
+
|
|
497
|
+
while (hasMore) {
|
|
498
|
+
const orphanFiles =
|
|
499
|
+
await this.scanApiService.fetchFilesWithPedimentoNoArelaPath(
|
|
500
|
+
table.tableName,
|
|
501
|
+
offset,
|
|
502
|
+
this.options.batchSize,
|
|
503
|
+
);
|
|
504
|
+
|
|
505
|
+
if (orphanFiles.length === 0) {
|
|
506
|
+
hasMore = false;
|
|
507
|
+
break;
|
|
508
|
+
}
|
|
509
|
+
|
|
510
|
+
// Step 3: Match orphans against pedimento source map
|
|
511
|
+
const updates = [];
|
|
512
|
+
for (const file of orphanFiles) {
|
|
513
|
+
const source = sourceMap.get(file.detected_pedimento);
|
|
514
|
+
if (source) {
|
|
515
|
+
updates.push({
|
|
516
|
+
id: file.id,
|
|
517
|
+
arelaPath: source.arela_path,
|
|
518
|
+
rfc: source.rfc,
|
|
519
|
+
detectedPedimento: file.detected_pedimento,
|
|
520
|
+
detectedPedimentoYear: source.detected_pedimento_year,
|
|
521
|
+
propagatedFromId: source.source_id,
|
|
522
|
+
propagatedFromTable: source.source_table,
|
|
523
|
+
propagationError: null,
|
|
524
|
+
});
|
|
525
|
+
}
|
|
526
|
+
}
|
|
527
|
+
|
|
528
|
+
totalOrphans += orphanFiles.length;
|
|
529
|
+
|
|
530
|
+
// Step 4: Batch update matched files
|
|
531
|
+
if (updates.length > 0) {
|
|
532
|
+
try {
|
|
533
|
+
const result = await this.scanApiService.batchUpdatePropagation(
|
|
534
|
+
table.tableName,
|
|
535
|
+
updates,
|
|
536
|
+
);
|
|
537
|
+
stats.filesUpdated += result.updated;
|
|
538
|
+
stats.filesFailed += result.errors;
|
|
539
|
+
} catch (error) {
|
|
540
|
+
logger.error(
|
|
541
|
+
`Failed cross-table update on ${table.tableName}:`,
|
|
542
|
+
error,
|
|
543
|
+
);
|
|
544
|
+
stats.filesFailed += updates.length;
|
|
545
|
+
}
|
|
546
|
+
}
|
|
547
|
+
|
|
548
|
+
offset += orphanFiles.length;
|
|
549
|
+
if (orphanFiles.length < this.options.batchSize) {
|
|
550
|
+
hasMore = false;
|
|
551
|
+
}
|
|
552
|
+
}
|
|
553
|
+
}
|
|
554
|
+
|
|
555
|
+
console.log(` 📊 Cross-table results:`);
|
|
556
|
+
console.log(` Orphan files checked: ${totalOrphans}`);
|
|
557
|
+
console.log(` Files updated: ${stats.filesUpdated}`);
|
|
558
|
+
console.log(` Files failed: ${stats.filesFailed}\n`);
|
|
559
|
+
|
|
560
|
+
return stats;
|
|
561
|
+
}
|
|
562
|
+
|
|
442
563
|
/**
|
|
443
564
|
* Show final propagation statistics
|
|
444
565
|
* @private
|
|
@@ -222,7 +222,10 @@ export class PushCommand {
|
|
|
222
222
|
}
|
|
223
223
|
|
|
224
224
|
const scanConfig = appConfig.getScanConfig();
|
|
225
|
-
|
|
225
|
+
// When basePathFull is '*' (cross-directory wildcard), tableName is intentionally null.
|
|
226
|
+
// The push command fetches tables dynamically via getInstanceTables, so a static
|
|
227
|
+
// tableName is not required — only companySlug, serverId, and basePathFull matter.
|
|
228
|
+
if (!scanConfig.tableName && scanConfig.basePathFull !== '*') {
|
|
226
229
|
errors.push('Could not generate table name from configuration');
|
|
227
230
|
}
|
|
228
231
|
|
|
@@ -49,7 +49,10 @@ export class ScanCommand {
|
|
|
49
49
|
|
|
50
50
|
const scanConfig = appConfig.getScanConfig();
|
|
51
51
|
// Ensure basePath is absolute for scan operations
|
|
52
|
-
|
|
52
|
+
// '*' is a wildcard sentinel for cross-drive pipelines — no real basePath
|
|
53
|
+
const rawBasePath = appConfig.getBasePath();
|
|
54
|
+
const basePath =
|
|
55
|
+
rawBasePath === '*' ? '*' : PathNormalizer.toAbsolutePath(rawBasePath);
|
|
53
56
|
|
|
54
57
|
logger.info('🔍 Starting arela scan command');
|
|
55
58
|
logger.info(`🎯 API Target: ${apiTarget}`);
|
|
@@ -213,8 +216,16 @@ export class ScanCommand {
|
|
|
213
216
|
if (level === 0) {
|
|
214
217
|
// Level 0: Create one entry per source
|
|
215
218
|
return sources.map((source) => {
|
|
216
|
-
|
|
217
|
-
|
|
219
|
+
let sourcePath;
|
|
220
|
+
if (source === '.') {
|
|
221
|
+
sourcePath = basePath;
|
|
222
|
+
} else if (source.startsWith('..') || path.isAbsolute(source)) {
|
|
223
|
+
// Source is a relative-to-CWD path (e.g., from pipeline UI) or absolute
|
|
224
|
+
sourcePath = PathNormalizer.toAbsolutePath(source);
|
|
225
|
+
} else {
|
|
226
|
+
// Source is a subdirectory of basePath
|
|
227
|
+
sourcePath = path.resolve(basePath, source);
|
|
228
|
+
}
|
|
218
229
|
// Label is relative path for display purposes only
|
|
219
230
|
const label = source === '.' ? '' : source;
|
|
220
231
|
return { path: sourcePath, label };
|
|
@@ -238,8 +249,11 @@ export class ScanCommand {
|
|
|
238
249
|
// Source is current directory, use discovered path as-is
|
|
239
250
|
directories.push(levelDir);
|
|
240
251
|
} else {
|
|
241
|
-
//
|
|
242
|
-
const combinedPath =
|
|
252
|
+
// Resolve source: if it starts with ".." it's relative to CWD, not levelDir
|
|
253
|
+
const combinedPath =
|
|
254
|
+
source.startsWith('..') || path.isAbsolute(source)
|
|
255
|
+
? PathNormalizer.toAbsolutePath(source)
|
|
256
|
+
: path.resolve(levelDir.path, source);
|
|
243
257
|
|
|
244
258
|
// Only add if the combined path actually exists
|
|
245
259
|
try {
|
|
@@ -290,6 +290,9 @@ export class WorkerCommand {
|
|
|
290
290
|
if (scanConfig.directoryLevel !== undefined) {
|
|
291
291
|
process.env.SCAN_DIRECTORY_LEVEL = String(scanConfig.directoryLevel);
|
|
292
292
|
}
|
|
293
|
+
|
|
294
|
+
// Reload cached config from the updated env vars
|
|
295
|
+
appConfig.reloadScanConfig();
|
|
293
296
|
}
|
|
294
297
|
|
|
295
298
|
/**
|
package/src/config/config.js
CHANGED
|
@@ -36,10 +36,10 @@ class Config {
|
|
|
36
36
|
const __dirname = path.dirname(__filename);
|
|
37
37
|
const packageJsonPath = path.resolve(__dirname, '../../package.json');
|
|
38
38
|
const packageJson = JSON.parse(fs.readFileSync(packageJsonPath, 'utf-8'));
|
|
39
|
-
return packageJson.version || '1.0.
|
|
39
|
+
return packageJson.version || '1.0.19';
|
|
40
40
|
} catch (error) {
|
|
41
41
|
console.warn('⚠️ Could not read package.json version, using fallback');
|
|
42
|
-
return '1.0.
|
|
42
|
+
return '1.0.19';
|
|
43
43
|
}
|
|
44
44
|
}
|
|
45
45
|
|
|
@@ -263,14 +263,23 @@ class Config {
|
|
|
263
263
|
// IMPORTANT: Always resolve to absolute path for uniqueness
|
|
264
264
|
if (!basePathLabel && process.env.UPLOAD_BASE_PATH) {
|
|
265
265
|
const basePath = process.env.UPLOAD_BASE_PATH;
|
|
266
|
-
//
|
|
267
|
-
|
|
268
|
-
|
|
266
|
+
// '*' is a wildcard sentinel for cross-drive scenarios — keep as-is
|
|
267
|
+
if (basePath === '*') {
|
|
268
|
+
basePathLabel = '*';
|
|
269
|
+
} else {
|
|
270
|
+
// Resolve to absolute path (handles ../sample vs ./sample correctly)
|
|
271
|
+
// Note: toAbsolutePath handles Windows paths (O:\...) even on macOS/Linux
|
|
272
|
+
basePathLabel = PathNormalizer.toAbsolutePath(basePath);
|
|
273
|
+
}
|
|
269
274
|
}
|
|
270
275
|
|
|
271
|
-
// If basePathLabel is provided, ensure it's absolute
|
|
276
|
+
// If basePathLabel is provided, ensure it's absolute (skip wildcard)
|
|
272
277
|
// Use PathNormalizer.isAbsolutePath for cross-platform Windows path detection
|
|
273
|
-
if (
|
|
278
|
+
if (
|
|
279
|
+
basePathLabel &&
|
|
280
|
+
basePathLabel !== '*' &&
|
|
281
|
+
!PathNormalizer.isAbsolutePath(basePathLabel)
|
|
282
|
+
) {
|
|
274
283
|
basePathLabel = PathNormalizer.toAbsolutePath(basePathLabel);
|
|
275
284
|
}
|
|
276
285
|
|
|
@@ -288,7 +297,7 @@ class Config {
|
|
|
288
297
|
// Note: This is just for reference; actual table names are generated dynamically
|
|
289
298
|
// in ScanCommand based on discovered directories and levels
|
|
290
299
|
let tableName = null;
|
|
291
|
-
if (companySlug && serverId && basePathLabel) {
|
|
300
|
+
if (companySlug && serverId && basePathLabel && basePathLabel !== '*') {
|
|
292
301
|
tableName = PathNormalizer.generateTableName({
|
|
293
302
|
companySlug,
|
|
294
303
|
serverId,
|
|
@@ -658,6 +667,15 @@ class Config {
|
|
|
658
667
|
};
|
|
659
668
|
}
|
|
660
669
|
|
|
670
|
+
/**
|
|
671
|
+
* Reload upload and scan config from current process.env values.
|
|
672
|
+
* Must be called after modifying env vars at runtime (e.g., PollWorkerCommand).
|
|
673
|
+
*/
|
|
674
|
+
reloadScanConfig() {
|
|
675
|
+
this.upload = this.#loadUploadConfig();
|
|
676
|
+
this.scan = this.#loadScanConfig();
|
|
677
|
+
}
|
|
678
|
+
|
|
661
679
|
/**
|
|
662
680
|
* Validate watch configuration
|
|
663
681
|
* @param {string[]} directories - Directories to validate
|
|
@@ -1,4 +1,7 @@
|
|
|
1
1
|
// Import all document type definitions
|
|
2
|
+
import { dodaPdfDefinition } from './document-types/doda-pdf.js';
|
|
3
|
+
import { dodaXmlDefinition } from './document-types/doda-xml.js';
|
|
4
|
+
import { facturasComerciales } from './document-types/facturas-comerciales.js';
|
|
2
5
|
import { pedimentoSimplificadoDefinition } from './document-types/pedimento-simplificado.js';
|
|
3
6
|
import { proformaDefinition } from './document-types/proforma.js';
|
|
4
7
|
import { supportDocumentDefinition } from './document-types/support-document.js';
|
|
@@ -39,6 +42,9 @@ export class DocumentTypeDefinition {
|
|
|
39
42
|
const documentTypes = [
|
|
40
43
|
pedimentoSimplificadoDefinition,
|
|
41
44
|
supportDocumentDefinition,
|
|
45
|
+
dodaPdfDefinition,
|
|
46
|
+
dodaXmlDefinition,
|
|
47
|
+
facturasComerciales,
|
|
42
48
|
// Add more document types here as needed
|
|
43
49
|
];
|
|
44
50
|
|
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
import { FieldResult } from '../document-type-shared.js';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* DODA PDF Document Type Definition
|
|
5
|
+
* Detects DODA (Documento de Operación para Despacho Aduanero) in PDF format.
|
|
6
|
+
* DODAs are validation documents generated by VUCEM for customs clearance.
|
|
7
|
+
* They reside in the SAME directory as the pedimento_simplificado,
|
|
8
|
+
* so within-table propagation handles arela_path assignment.
|
|
9
|
+
*/
|
|
10
|
+
export const dodaPdfDefinition = {
|
|
11
|
+
type: 'doda_pdf',
|
|
12
|
+
extensions: ['pdf'],
|
|
13
|
+
match: (source) => {
|
|
14
|
+
// DODA PDFs contain specific markers from VUCEM/customs systems
|
|
15
|
+
const markers = [
|
|
16
|
+
/DOCUMENTO DE OPERACI[OÓ]N PARA DESPACHO ADUANERO/i,
|
|
17
|
+
/DODA/i,
|
|
18
|
+
/VUCEM/i,
|
|
19
|
+
];
|
|
20
|
+
|
|
21
|
+
// Require the primary DODA marker, or at least 2 of the secondary markers
|
|
22
|
+
const primaryMatch = markers[0].test(source);
|
|
23
|
+
if (primaryMatch) return true;
|
|
24
|
+
|
|
25
|
+
const secondaryMatches = markers
|
|
26
|
+
.slice(1)
|
|
27
|
+
.filter((m) => m.test(source)).length;
|
|
28
|
+
|
|
29
|
+
// Also check for pedimento number + DODA-specific context
|
|
30
|
+
const hasPedimentoNumber = /\d{2}\s?\d{2}\s?\d{4}\s?\d{7}/.test(source);
|
|
31
|
+
const hasDodaContext =
|
|
32
|
+
/despacho aduanero|operaci[oó]n aduanera|validaci[oó]n/i.test(source);
|
|
33
|
+
|
|
34
|
+
return (
|
|
35
|
+
(secondaryMatches >= 2 && hasPedimentoNumber) ||
|
|
36
|
+
(hasDodaContext && hasPedimentoNumber && secondaryMatches >= 1)
|
|
37
|
+
);
|
|
38
|
+
},
|
|
39
|
+
|
|
40
|
+
extractNumPedimento: (source, fields) => {
|
|
41
|
+
return fields?.find((f) => f.name === 'numPedimento')?.value ?? null;
|
|
42
|
+
},
|
|
43
|
+
|
|
44
|
+
extractPedimentoYear: (source, fields) => {
|
|
45
|
+
const numPedimento = fields?.find((f) => f.name === 'numPedimento')?.value;
|
|
46
|
+
if (!numPedimento) return null;
|
|
47
|
+
const year = parseInt(numPedimento.substring(0, 2), 10);
|
|
48
|
+
return year < 50 ? year + 2000 : year + 1900;
|
|
49
|
+
},
|
|
50
|
+
|
|
51
|
+
extractors: [
|
|
52
|
+
{
|
|
53
|
+
field: 'numPedimento',
|
|
54
|
+
extract: (source) => {
|
|
55
|
+
// Try cadena original format: ||aduana|patente|...|pedimentos|integracion|...
|
|
56
|
+
// e.g. ||070|3429|2|4009029,4008062|109335668|A231|
|
|
57
|
+
const cadenaMatch = source.match(
|
|
58
|
+
/\|\|(\d{2,3})\|(\d{4})\|\d\|([\d,]+)\|(\d+)\|/,
|
|
59
|
+
);
|
|
60
|
+
if (cadenaMatch) {
|
|
61
|
+
const rawAduana = cadenaMatch[1];
|
|
62
|
+
// 3-digit code = aduana(2) + section(1), e.g. 070 → aduana 07
|
|
63
|
+
const aduana =
|
|
64
|
+
rawAduana.length === 3
|
|
65
|
+
? rawAduana.slice(0, 2)
|
|
66
|
+
: rawAduana.padStart(2, '0');
|
|
67
|
+
const patente = cadenaMatch[2];
|
|
68
|
+
const pedNums = cadenaMatch[3].split(',');
|
|
69
|
+
// Use first pedimento number, pad to 7 digits
|
|
70
|
+
const pedNum = pedNums[0].padStart(7, '0');
|
|
71
|
+
// Extract year from date in cadena
|
|
72
|
+
const yearMatch = source.match(/(\d{4})-\d{2}-\d{2}/);
|
|
73
|
+
const year = yearMatch
|
|
74
|
+
? yearMatch[1].slice(-2)
|
|
75
|
+
: new Date().getFullYear().toString().slice(-2);
|
|
76
|
+
const full = `${year}${aduana}${patente}${pedNum}`;
|
|
77
|
+
if (full.length === 15) {
|
|
78
|
+
return new FieldResult('numPedimento', true, full);
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
// Try dash-separated format: YY-AA-PPPP-NNNNNNN
|
|
83
|
+
const dashMatch = source.match(/(\d{2})-(\d{2})-(\d{4})-(\d{7})/);
|
|
84
|
+
if (dashMatch) {
|
|
85
|
+
const full =
|
|
86
|
+
dashMatch[1] + dashMatch[2] + dashMatch[3] + dashMatch[4];
|
|
87
|
+
return new FieldResult('numPedimento', true, full);
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
// Try 15-digit near pedimento keyword (avoid matching sello digital)
|
|
91
|
+
const contextMatch = source.match(
|
|
92
|
+
/pedimento[^\d]{0,30}(\d{2}\s?\d{2}\s?\d{4}\s?\d{7})/i,
|
|
93
|
+
);
|
|
94
|
+
if (contextMatch) {
|
|
95
|
+
return new FieldResult(
|
|
96
|
+
'numPedimento',
|
|
97
|
+
true,
|
|
98
|
+
contextMatch[1].replace(/\s/g, ''),
|
|
99
|
+
);
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
return new FieldResult('numPedimento', false, null);
|
|
103
|
+
},
|
|
104
|
+
},
|
|
105
|
+
{
|
|
106
|
+
field: 'rfc',
|
|
107
|
+
extract: (source) => {
|
|
108
|
+
// Mexican RFC: 3-4 letters + 6 digits + 3 alphanumeric
|
|
109
|
+
const match = source.match(/\b([A-ZÑ&]{3,4}\d{6}[A-Z0-9]{3})\b/);
|
|
110
|
+
return new FieldResult('rfc', !!match, match ? match[1] : null);
|
|
111
|
+
},
|
|
112
|
+
},
|
|
113
|
+
{
|
|
114
|
+
field: 'aduana',
|
|
115
|
+
extract: (source) => {
|
|
116
|
+
const match = source.match(/aduana[:\s]*(\d{2,4})/i);
|
|
117
|
+
return new FieldResult('aduana', !!match, match ? match[1] : null);
|
|
118
|
+
},
|
|
119
|
+
},
|
|
120
|
+
],
|
|
121
|
+
};
|
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
import { FieldResult } from '../document-type-shared.js';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* DODA XML Document Type Definition
|
|
5
|
+
* Detects DODA (Documento de Operación para Despacho Aduanero) in XML format.
|
|
6
|
+
* XML DODAs contain structured data from VUCEM/customs systems.
|
|
7
|
+
* They reside in the SAME directory as the pedimento_simplificado,
|
|
8
|
+
* so within-table propagation handles arela_path assignment.
|
|
9
|
+
*/
|
|
10
|
+
export const dodaXmlDefinition = {
|
|
11
|
+
type: 'doda_xml',
|
|
12
|
+
extensions: ['xml'],
|
|
13
|
+
match: (source) => {
|
|
14
|
+
// DODA XML files contain specific XML tags/namespaces
|
|
15
|
+
const xmlMarkers = [
|
|
16
|
+
/documentoOperacion/i,
|
|
17
|
+
/despachoAduanero/i,
|
|
18
|
+
/<doda\b/i,
|
|
19
|
+
/xmlns[^"]*doda/i,
|
|
20
|
+
/VUCEM/i,
|
|
21
|
+
];
|
|
22
|
+
|
|
23
|
+
// Also check for pedimento-related XML structure
|
|
24
|
+
const pedimentoXmlMarkers = [
|
|
25
|
+
/numPedimento/i,
|
|
26
|
+
/patenteAduanal/i,
|
|
27
|
+
/aduanaDespacho/i,
|
|
28
|
+
/tipoOperacion/i,
|
|
29
|
+
];
|
|
30
|
+
|
|
31
|
+
const dodaMatches = xmlMarkers.filter((m) => m.test(source)).length;
|
|
32
|
+
const pedimentoMatches = pedimentoXmlMarkers.filter((m) =>
|
|
33
|
+
m.test(source),
|
|
34
|
+
).length;
|
|
35
|
+
|
|
36
|
+
// Match if: has DODA-specific markers, or combination of pedimento markers with XML structure
|
|
37
|
+
return (
|
|
38
|
+
dodaMatches >= 1 || (pedimentoMatches >= 3 && /<\?xml/i.test(source))
|
|
39
|
+
);
|
|
40
|
+
},
|
|
41
|
+
|
|
42
|
+
extractNumPedimento: (source, fields) => {
|
|
43
|
+
return fields?.find((f) => f.name === 'numPedimento')?.value ?? null;
|
|
44
|
+
},
|
|
45
|
+
|
|
46
|
+
extractPedimentoYear: (source, fields) => {
|
|
47
|
+
const numPedimento = fields?.find((f) => f.name === 'numPedimento')?.value;
|
|
48
|
+
if (!numPedimento) {
|
|
49
|
+
// Try to extract year from date in XML
|
|
50
|
+
const dateMatch = source.match(/(\d{4})-\d{2}-\d{2}/);
|
|
51
|
+
if (dateMatch) {
|
|
52
|
+
return parseInt(dateMatch[1], 10);
|
|
53
|
+
}
|
|
54
|
+
return null;
|
|
55
|
+
}
|
|
56
|
+
const year = parseInt(numPedimento.substring(0, 2), 10);
|
|
57
|
+
return year < 50 ? year + 2000 : year + 1900;
|
|
58
|
+
},
|
|
59
|
+
|
|
60
|
+
extractors: [
|
|
61
|
+
{
|
|
62
|
+
field: 'numPedimento',
|
|
63
|
+
extract: (source) => {
|
|
64
|
+
// Try XML tag format first
|
|
65
|
+
const xmlMatch = source.match(/numPedimento[^>]*>(\d{15})<\/[^>]+>/i);
|
|
66
|
+
if (xmlMatch) {
|
|
67
|
+
return new FieldResult('numPedimento', true, xmlMatch[1]);
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
// Try attribute format
|
|
71
|
+
const attrMatch = source.match(
|
|
72
|
+
/numPedimento[=:"]\s*["']?(\d{15})["']?/i,
|
|
73
|
+
);
|
|
74
|
+
if (attrMatch) {
|
|
75
|
+
return new FieldResult('numPedimento', true, attrMatch[1]);
|
|
76
|
+
}
|
|
77
|
+
|
|
78
|
+
// Fallback: 15-digit pattern
|
|
79
|
+
const fallback = source.match(/\d{2}\s?\d{2}\s?\d{4}\s?\d{7}/);
|
|
80
|
+
return new FieldResult(
|
|
81
|
+
'numPedimento',
|
|
82
|
+
!!fallback,
|
|
83
|
+
fallback ? fallback[0].replace(/\s/g, '') : null,
|
|
84
|
+
);
|
|
85
|
+
},
|
|
86
|
+
},
|
|
87
|
+
{
|
|
88
|
+
field: 'rfc',
|
|
89
|
+
extract: (source) => {
|
|
90
|
+
// Try XML tag format
|
|
91
|
+
const xmlMatch = source.match(
|
|
92
|
+
/rfc[^>]*>([A-ZÑ&]{3,4}\d{6}[A-Z0-9]{3})<\/[^>]+>/i,
|
|
93
|
+
);
|
|
94
|
+
if (xmlMatch) {
|
|
95
|
+
return new FieldResult('rfc', true, xmlMatch[1]);
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
// Fallback: generic RFC pattern
|
|
99
|
+
const match = source.match(/\b([A-ZÑ&]{3,4}\d{6}[A-Z0-9]{3})\b/);
|
|
100
|
+
return new FieldResult('rfc', !!match, match ? match[1] : null);
|
|
101
|
+
},
|
|
102
|
+
},
|
|
103
|
+
{
|
|
104
|
+
field: 'patente',
|
|
105
|
+
extract: (source) => {
|
|
106
|
+
const match = source.match(/patenteAduanal[^>]*>(\d{4})<\/[^>]+>/i);
|
|
107
|
+
return new FieldResult('patente', !!match, match ? match[1] : null);
|
|
108
|
+
},
|
|
109
|
+
},
|
|
110
|
+
{
|
|
111
|
+
field: 'aduana',
|
|
112
|
+
extract: (source) => {
|
|
113
|
+
const match = source.match(/aduanaDespacho[^>]*>(\d{2,4})<\/[^>]+>/i);
|
|
114
|
+
return new FieldResult('aduana', !!match, match ? match[1] : null);
|
|
115
|
+
},
|
|
116
|
+
},
|
|
117
|
+
],
|
|
118
|
+
};
|
|
@@ -0,0 +1,233 @@
|
|
|
1
|
+
import { FieldResult } from '../document-type-shared.js';
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* Facturas Comerciales Document Type Definition
|
|
5
|
+
* Detects commercial invoices (facturas) related to customs operations.
|
|
6
|
+
*
|
|
7
|
+
* CRITICAL: Facturas reside in a DIFFERENT directory than the pedimento_simplificado,
|
|
8
|
+
* creating a different CLI scan table. Cross-table propagation uses `detected_pedimento`
|
|
9
|
+
* to link facturas to their corresponding pedimento and assign arela_path.
|
|
10
|
+
*
|
|
11
|
+
* Supported formats: PDF (scanned invoices), XML (CFDI/electronic invoices)
|
|
12
|
+
*/
|
|
13
|
+
export const facturasComerciales = {
|
|
14
|
+
type: 'factura_comercial',
|
|
15
|
+
extensions: ['pdf', 'xml'],
|
|
16
|
+
match: (source) => {
|
|
17
|
+
// CFDI / electronic invoice markers (XML)
|
|
18
|
+
const cfdiMarkers = [
|
|
19
|
+
/cfdi:Comprobante/i,
|
|
20
|
+
/xmlns:cfdi/i,
|
|
21
|
+
/TipoDeComprobante/i,
|
|
22
|
+
/timbreFiscalDigital/i,
|
|
23
|
+
/SelloSAT/i,
|
|
24
|
+
];
|
|
25
|
+
|
|
26
|
+
// PDF invoice markers
|
|
27
|
+
const invoiceMarkers = [
|
|
28
|
+
/factura\s*(comercial|de\s*venta|de\s*exportaci[oó]n)?/i,
|
|
29
|
+
/commercial\s*invoice/i,
|
|
30
|
+
/invoice\s*number/i,
|
|
31
|
+
/n[uú]mero\s*de\s*factura/i,
|
|
32
|
+
];
|
|
33
|
+
|
|
34
|
+
// Customs-related invoice context
|
|
35
|
+
const customsContext = [
|
|
36
|
+
/pedimento/i,
|
|
37
|
+
/aduana/i,
|
|
38
|
+
/importaci[oó]n|exportaci[oó]n/i,
|
|
39
|
+
/despacho\s*aduanero/i,
|
|
40
|
+
/fracci[oó]n\s*arancelaria/i,
|
|
41
|
+
];
|
|
42
|
+
|
|
43
|
+
const cfdiMatches = cfdiMarkers.filter((m) => m.test(source)).length;
|
|
44
|
+
const invoiceMatches = invoiceMarkers.filter((m) => m.test(source)).length;
|
|
45
|
+
const customsMatches = customsContext.filter((m) => m.test(source)).length;
|
|
46
|
+
|
|
47
|
+
// Match if: CFDI structure (>=2 markers), or invoice + customs context
|
|
48
|
+
return cfdiMatches >= 2 || (invoiceMatches >= 1 && customsMatches >= 1);
|
|
49
|
+
},
|
|
50
|
+
|
|
51
|
+
extractNumPedimento: (source, fields) => {
|
|
52
|
+
return fields?.find((f) => f.name === 'numPedimento')?.value ?? null;
|
|
53
|
+
},
|
|
54
|
+
|
|
55
|
+
extractPedimentoYear: (source, fields) => {
|
|
56
|
+
const numPedimento = fields?.find((f) => f.name === 'numPedimento')?.value;
|
|
57
|
+
if (numPedimento) {
|
|
58
|
+
const year = parseInt(numPedimento.substring(0, 2), 10);
|
|
59
|
+
return year < 50 ? year + 2000 : year + 1900;
|
|
60
|
+
}
|
|
61
|
+
// Try invoice date field
|
|
62
|
+
const invoiceDate = fields?.find((f) => f.name === 'invoiceDate')?.value;
|
|
63
|
+
if (invoiceDate) {
|
|
64
|
+
const yearMatch = invoiceDate.match(/^(\d{4})/);
|
|
65
|
+
if (yearMatch) return parseInt(yearMatch[1], 10);
|
|
66
|
+
}
|
|
67
|
+
// Try to extract year from any date in source
|
|
68
|
+
const dateMatch = source.match(/(\d{4})-\d{2}-\d{2}/);
|
|
69
|
+
if (dateMatch) return parseInt(dateMatch[1], 10);
|
|
70
|
+
const mmmMatch = source.match(
|
|
71
|
+
/\d{1,2}\/(?:ene|feb|mar|abr|may|jun|jul|ago|sep|oct|nov|dic)\/(\d{4})/i,
|
|
72
|
+
);
|
|
73
|
+
if (mmmMatch) return parseInt(mmmMatch[1], 10);
|
|
74
|
+
return null;
|
|
75
|
+
},
|
|
76
|
+
|
|
77
|
+
extractors: [
|
|
78
|
+
{
|
|
79
|
+
field: 'numPedimento',
|
|
80
|
+
extract: (source) => {
|
|
81
|
+
// Try dash-separated format: YY-AA-PPPP-NNNNNNN (most common in Mexican import invoices)
|
|
82
|
+
// e.g. "26-07-3429-6016477" — may be wrapped across lines in PDF text
|
|
83
|
+
const dashMatch = source.match(
|
|
84
|
+
/(\d{2})-(\d{2})-(\d{4})-(\d{1,7})\s*(\d*)/,
|
|
85
|
+
);
|
|
86
|
+
if (dashMatch) {
|
|
87
|
+
const lastPart = (dashMatch[4] + dashMatch[5]).substring(0, 7);
|
|
88
|
+
if (lastPart.length === 7) {
|
|
89
|
+
const full = dashMatch[1] + dashMatch[2] + dashMatch[3] + lastPart;
|
|
90
|
+
return new FieldResult('numPedimento', true, full);
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
// Try CFDI XML: NumPedimento attribute or InformacionAduanera
|
|
95
|
+
const cfdiMatch = source.match(
|
|
96
|
+
/(?:NumPedimento|NumeroPedimento)[=:"]\s*["']?(\d{15})["']?/i,
|
|
97
|
+
);
|
|
98
|
+
if (cfdiMatch) {
|
|
99
|
+
return new FieldResult('numPedimento', true, cfdiMatch[1]);
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
// Try InformacionAduanera tag
|
|
103
|
+
const aduaneraMatch = source.match(
|
|
104
|
+
/InformacionAduanera[^>]*NumeroPedimento[=:"]\s*["']?(\d{15})["']?/i,
|
|
105
|
+
);
|
|
106
|
+
if (aduaneraMatch) {
|
|
107
|
+
return new FieldResult('numPedimento', true, aduaneraMatch[1]);
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
// Try generic XML tag
|
|
111
|
+
const xmlMatch = source.match(/pedimento[^>]*>(\d{15})<\/[^>]+>/i);
|
|
112
|
+
if (xmlMatch) {
|
|
113
|
+
return new FieldResult('numPedimento', true, xmlMatch[1]);
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
// Try space-separated near pedimento keyword
|
|
117
|
+
const textMatch = source.match(
|
|
118
|
+
/pedimento[^\d]{0,30}(\d{2}\s?\d{2}\s?\d{4}\s?\d{7})/i,
|
|
119
|
+
);
|
|
120
|
+
if (textMatch) {
|
|
121
|
+
return new FieldResult(
|
|
122
|
+
'numPedimento',
|
|
123
|
+
true,
|
|
124
|
+
textMatch[1].replace(/\s/g, ''),
|
|
125
|
+
);
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
return new FieldResult('numPedimento', false, null);
|
|
129
|
+
},
|
|
130
|
+
},
|
|
131
|
+
{
|
|
132
|
+
field: 'rfc',
|
|
133
|
+
extract: (source) => {
|
|
134
|
+
// Try CFDI Rfc attribute (emisor)
|
|
135
|
+
const cfdiMatch = source.match(
|
|
136
|
+
/Emisor[^>]*Rfc[=:"]\s*["']?([A-ZÑ&]{3,4}\d{6}[A-Z0-9]{3})["']?/i,
|
|
137
|
+
);
|
|
138
|
+
if (cfdiMatch) {
|
|
139
|
+
return new FieldResult('rfc', true, cfdiMatch[1]);
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
// Try Receptor Rfc (for import invoices, receptor is the importer)
|
|
143
|
+
const receptorMatch = source.match(
|
|
144
|
+
/Receptor[^>]*Rfc[=:"]\s*["']?([A-ZÑ&]{3,4}\d{6}[A-Z0-9]{3})["']?/i,
|
|
145
|
+
);
|
|
146
|
+
if (receptorMatch) {
|
|
147
|
+
return new FieldResult('rfc', true, receptorMatch[1]);
|
|
148
|
+
}
|
|
149
|
+
|
|
150
|
+
// Fallback: generic RFC pattern
|
|
151
|
+
const match = source.match(/\b([A-ZÑ&]{3,4}\d{6}[A-Z0-9]{3})\b/);
|
|
152
|
+
return new FieldResult('rfc', !!match, match ? match[1] : null);
|
|
153
|
+
},
|
|
154
|
+
},
|
|
155
|
+
{
|
|
156
|
+
field: 'invoiceNumber',
|
|
157
|
+
extract: (source) => {
|
|
158
|
+
// CFDI Folio
|
|
159
|
+
const folioMatch = source.match(/Folio[=:"]\s*["']?([A-Z0-9-]+)["']?/i);
|
|
160
|
+
if (folioMatch) {
|
|
161
|
+
return new FieldResult('invoiceNumber', true, folioMatch[1]);
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
// Header line followed by invoice number on next line
|
|
165
|
+
// e.g. "FACTURA FECHA ADUANA...\nMIB260064 02/mar/2026..."
|
|
166
|
+
const headerMatch = source.match(
|
|
167
|
+
/FACTURA\s+FECHA[^\n]*\n([A-Z]{2,5}\d{4,10})/i,
|
|
168
|
+
);
|
|
169
|
+
if (headerMatch) {
|
|
170
|
+
return new FieldResult('invoiceNumber', true, headerMatch[1]);
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
// Text-based invoice number
|
|
174
|
+
const textMatch = source.match(
|
|
175
|
+
/(?:factura|invoice)\s*(?:no\.?|number|#|num\.?)?[:\s]*([A-Z]{2,5}\d{4,10})/i,
|
|
176
|
+
);
|
|
177
|
+
return new FieldResult(
|
|
178
|
+
'invoiceNumber',
|
|
179
|
+
!!textMatch,
|
|
180
|
+
textMatch ? textMatch[1] : null,
|
|
181
|
+
);
|
|
182
|
+
},
|
|
183
|
+
},
|
|
184
|
+
{
|
|
185
|
+
field: 'invoiceDate',
|
|
186
|
+
extract: (source) => {
|
|
187
|
+
// CFDI Fecha attribute
|
|
188
|
+
const cfdiMatch = source.match(
|
|
189
|
+
/Fecha[=:"]\s*["']?(\d{4}-\d{2}-\d{2})/i,
|
|
190
|
+
);
|
|
191
|
+
if (cfdiMatch) {
|
|
192
|
+
return new FieldResult('invoiceDate', true, cfdiMatch[1]);
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
// DD/MMM/YYYY format (e.g. "02/mar/2026")
|
|
196
|
+
const mmmMonths = {
|
|
197
|
+
ene: '01',
|
|
198
|
+
feb: '02',
|
|
199
|
+
mar: '03',
|
|
200
|
+
abr: '04',
|
|
201
|
+
may: '05',
|
|
202
|
+
jun: '06',
|
|
203
|
+
jul: '07',
|
|
204
|
+
ago: '08',
|
|
205
|
+
sep: '09',
|
|
206
|
+
oct: '10',
|
|
207
|
+
nov: '11',
|
|
208
|
+
dic: '12',
|
|
209
|
+
};
|
|
210
|
+
const mmmMatch = source.match(
|
|
211
|
+
/(\d{1,2})\/(ene|feb|mar|abr|may|jun|jul|ago|sep|oct|nov|dic)\/(\d{4})/i,
|
|
212
|
+
);
|
|
213
|
+
if (mmmMatch) {
|
|
214
|
+
const day = mmmMatch[1].padStart(2, '0');
|
|
215
|
+
const month = mmmMonths[mmmMatch[2].toLowerCase()];
|
|
216
|
+
return new FieldResult(
|
|
217
|
+
'invoiceDate',
|
|
218
|
+
true,
|
|
219
|
+
`${mmmMatch[3]}-${month}-${day}`,
|
|
220
|
+
);
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
// ISO date
|
|
224
|
+
const isoMatch = source.match(/(\d{4}-\d{2}-\d{2})/);
|
|
225
|
+
return new FieldResult(
|
|
226
|
+
'invoiceDate',
|
|
227
|
+
!!isoMatch,
|
|
228
|
+
isoMatch ? isoMatch[1] : null,
|
|
229
|
+
);
|
|
230
|
+
},
|
|
231
|
+
},
|
|
232
|
+
],
|
|
233
|
+
};
|
|
@@ -368,24 +368,32 @@ export class ScanApiService {
|
|
|
368
368
|
// ============================================================================
|
|
369
369
|
|
|
370
370
|
/**
|
|
371
|
-
* Fetch
|
|
371
|
+
* Fetch files for detection
|
|
372
372
|
* @param {string} tableName - Target table name
|
|
373
373
|
* @param {number} offset - Pagination offset
|
|
374
374
|
* @param {number} limit - Number of records to fetch
|
|
375
|
+
* @param {boolean} allTypes - When true, fetch all supported file types instead of just likely-simplificado PDFs
|
|
375
376
|
* @returns {Promise<Object>} { data: Array, hasMore: boolean }
|
|
376
377
|
*/
|
|
377
|
-
async fetchPdfsForDetection(
|
|
378
|
+
async fetchPdfsForDetection(
|
|
379
|
+
tableName,
|
|
380
|
+
offset = 0,
|
|
381
|
+
limit = 100,
|
|
382
|
+
allTypes = false,
|
|
383
|
+
) {
|
|
378
384
|
logger.debug(
|
|
379
|
-
`Fetching
|
|
385
|
+
`Fetching files for detection (offset: ${offset}, limit: ${limit}, allTypes: ${allTypes})...`,
|
|
380
386
|
);
|
|
381
387
|
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
'
|
|
385
|
-
|
|
388
|
+
let url = `/api/uploader/scan/pdfs-for-detection?tableName=${encodeURIComponent(tableName)}&offset=${offset}&limit=${limit}`;
|
|
389
|
+
if (allTypes) {
|
|
390
|
+
url += '&allTypes=true';
|
|
391
|
+
}
|
|
392
|
+
|
|
393
|
+
const result = await this.#request(url, 'GET');
|
|
386
394
|
|
|
387
395
|
logger.debug(
|
|
388
|
-
`Fetched ${result.data.length}
|
|
396
|
+
`Fetched ${result.data.length} files, hasMore: ${result.hasMore}`,
|
|
389
397
|
);
|
|
390
398
|
return result;
|
|
391
399
|
}
|
|
@@ -420,13 +428,15 @@ export class ScanApiService {
|
|
|
420
428
|
* @param {string} tableName - Target table name
|
|
421
429
|
* @returns {Promise<Object>} { totalPdfs, detected, pending, errors }
|
|
422
430
|
*/
|
|
423
|
-
async getDetectionStats(tableName) {
|
|
431
|
+
async getDetectionStats(tableName, allTypes = false) {
|
|
424
432
|
logger.debug('Fetching detection statistics...');
|
|
425
433
|
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
'
|
|
429
|
-
|
|
434
|
+
let url = `/api/uploader/scan/detection-stats?tableName=${encodeURIComponent(tableName)}`;
|
|
435
|
+
if (allTypes) {
|
|
436
|
+
url += '&allTypes=true';
|
|
437
|
+
}
|
|
438
|
+
|
|
439
|
+
const result = await this.#request(url, 'GET');
|
|
430
440
|
|
|
431
441
|
logger.debug(
|
|
432
442
|
`Detection stats: ${result.detected}/${result.totalPdfs} detected, ${result.pending} pending`,
|
|
@@ -554,6 +564,68 @@ export class ScanApiService {
|
|
|
554
564
|
return result;
|
|
555
565
|
}
|
|
556
566
|
|
|
567
|
+
// ============================================================================
|
|
568
|
+
// CROSS-TABLE PROPAGATION
|
|
569
|
+
// ============================================================================
|
|
570
|
+
|
|
571
|
+
/**
|
|
572
|
+
* Fetch pedimento sources across all tables for cross-table propagation
|
|
573
|
+
* @param {string} companySlug - Company slug
|
|
574
|
+
* @param {string} serverId - Server ID
|
|
575
|
+
* @param {string} basePathFull - Base path
|
|
576
|
+
* @returns {Promise<Array>} Array of pedimento sources with source_table info
|
|
577
|
+
*/
|
|
578
|
+
async fetchCrossTablePedimentoSources(companySlug, serverId, basePathFull) {
|
|
579
|
+
logger.debug('Fetching cross-table pedimento sources...');
|
|
580
|
+
|
|
581
|
+
const result = await this.#request(
|
|
582
|
+
`/api/uploader/scan/cross-table-pedimento-sources?companySlug=${encodeURIComponent(companySlug)}&serverId=${encodeURIComponent(serverId)}&basePathFull=${encodeURIComponent(basePathFull)}`,
|
|
583
|
+
'GET',
|
|
584
|
+
);
|
|
585
|
+
|
|
586
|
+
if (!Array.isArray(result)) {
|
|
587
|
+
logger.error(
|
|
588
|
+
'fetchCrossTablePedimentoSources: Expected array, got:',
|
|
589
|
+
typeof result,
|
|
590
|
+
);
|
|
591
|
+
return [];
|
|
592
|
+
}
|
|
593
|
+
|
|
594
|
+
logger.debug(`Fetched ${result.length} cross-table pedimento sources`);
|
|
595
|
+
return result;
|
|
596
|
+
}
|
|
597
|
+
|
|
598
|
+
/**
|
|
599
|
+
* Fetch files with detected_pedimento but no arela_path (candidates for cross-table propagation)
|
|
600
|
+
* @param {string} tableName - Target table name
|
|
601
|
+
* @param {number} offset - Pagination offset
|
|
602
|
+
* @param {number} limit - Number of records to fetch
|
|
603
|
+
* @returns {Promise<Array>} Array of files needing cross-table propagation
|
|
604
|
+
*/
|
|
605
|
+
async fetchFilesWithPedimentoNoArelaPath(tableName, offset = 0, limit = 100) {
|
|
606
|
+
logger.debug(
|
|
607
|
+
`Fetching files with pedimento but no arela_path (offset: ${offset}, limit: ${limit})...`,
|
|
608
|
+
);
|
|
609
|
+
|
|
610
|
+
const result = await this.#request(
|
|
611
|
+
`/api/uploader/scan/files-with-pedimento-no-arela-path?tableName=${encodeURIComponent(tableName)}&offset=${offset}&limit=${limit}`,
|
|
612
|
+
'GET',
|
|
613
|
+
);
|
|
614
|
+
|
|
615
|
+
if (!Array.isArray(result)) {
|
|
616
|
+
logger.error(
|
|
617
|
+
'fetchFilesWithPedimentoNoArelaPath: Expected array, got:',
|
|
618
|
+
typeof result,
|
|
619
|
+
);
|
|
620
|
+
return [];
|
|
621
|
+
}
|
|
622
|
+
|
|
623
|
+
logger.debug(
|
|
624
|
+
`Fetched ${result.length} files needing cross-table propagation`,
|
|
625
|
+
);
|
|
626
|
+
return result;
|
|
627
|
+
}
|
|
628
|
+
|
|
557
629
|
// ============================================================================
|
|
558
630
|
// PUSH OPERATIONS
|
|
559
631
|
// ============================================================================
|