@arela/uploader 0.2.0 β 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/OPTIMIZATION_SUMMARY.md +154 -0
- package/PERFORMANCE_OPTIMIZATIONS.md +270 -0
- package/README.md +97 -7
- package/commands.md +6 -0
- package/package.json +1 -1
- package/src/file-detection.js +1 -1
- package/src/index.js +593 -173
package/src/index.js
CHANGED
|
@@ -43,6 +43,7 @@ const sources = process.env.UPLOAD_SOURCES?.split('|')
|
|
|
43
43
|
.filter(Boolean);
|
|
44
44
|
|
|
45
45
|
// ConfiguraciΓ³n de RFCs para upload
|
|
46
|
+
console.log('π§ Configured RFCs for upload:', process.env.UPLOAD_RFCS);
|
|
46
47
|
const uploadRfcs = process.env.UPLOAD_RFCS?.split('|')
|
|
47
48
|
.map((s) => s.trim())
|
|
48
49
|
.filter(Boolean);
|
|
@@ -156,8 +157,8 @@ const checkCredentials = async (forceSupabase = false) => {
|
|
|
156
157
|
if (!supabaseUrl || !supabaseKey || !bucket) {
|
|
157
158
|
console.error(
|
|
158
159
|
'β οΈ Missing credentials. Please set either:\n' +
|
|
159
|
-
|
|
160
|
-
|
|
160
|
+
' - ARELA_API_URL and ARELA_API_TOKEN for API mode, or\n' +
|
|
161
|
+
' - SUPABASE_URL, SUPABASE_KEY, and SUPABASE_BUCKET for direct mode',
|
|
161
162
|
);
|
|
162
163
|
process.exit(1);
|
|
163
164
|
}
|
|
@@ -179,13 +180,107 @@ const checkCredentials = async (forceSupabase = false) => {
|
|
|
179
180
|
};
|
|
180
181
|
|
|
181
182
|
const logFilePath = path.resolve(process.cwd(), 'arela-upload.log');
|
|
183
|
+
|
|
184
|
+
/**
|
|
185
|
+
* OPTIMIZED: Log buffer to reduce I/O operations
|
|
186
|
+
*/
|
|
187
|
+
let logBuffer = [];
|
|
188
|
+
const LOG_BUFFER_SIZE = 100; // Flush every 100 log entries
|
|
189
|
+
let lastFlushTime = Date.now();
|
|
190
|
+
const LOG_FLUSH_INTERVAL = 5000; // Flush every 5 seconds
|
|
191
|
+
|
|
192
|
+
const flushLogBuffer = () => {
|
|
193
|
+
if (logBuffer.length === 0) return;
|
|
194
|
+
|
|
195
|
+
try {
|
|
196
|
+
const logContent = logBuffer.join('\n') + '\n';
|
|
197
|
+
fs.appendFileSync(logFilePath, logContent);
|
|
198
|
+
logBuffer = [];
|
|
199
|
+
lastFlushTime = Date.now();
|
|
200
|
+
} catch (error) {
|
|
201
|
+
console.error(`β Error writing to log file: ${error.code} | ${error.message} | path: ${logFilePath}`);
|
|
202
|
+
}
|
|
203
|
+
};
|
|
204
|
+
|
|
182
205
|
const writeLog = (message) => {
|
|
183
206
|
try {
|
|
184
207
|
const timestamp = new Date().toISOString();
|
|
185
|
-
|
|
208
|
+
logBuffer.push(`[${timestamp}] ${message}`);
|
|
209
|
+
|
|
210
|
+
// Flush if buffer is full or enough time has passed
|
|
211
|
+
const now = Date.now();
|
|
212
|
+
if (logBuffer.length >= LOG_BUFFER_SIZE || (now - lastFlushTime) >= LOG_FLUSH_INTERVAL) {
|
|
213
|
+
flushLogBuffer();
|
|
214
|
+
}
|
|
186
215
|
} catch (error) {
|
|
187
|
-
console.error(`β Error
|
|
216
|
+
console.error(`β Error buffering log message: ${error.message}`);
|
|
217
|
+
}
|
|
218
|
+
};
|
|
219
|
+
|
|
220
|
+
// Ensure logs are flushed on process exit
|
|
221
|
+
process.on('exit', flushLogBuffer);
|
|
222
|
+
process.on('SIGINT', () => {
|
|
223
|
+
flushLogBuffer();
|
|
224
|
+
process.exit(0);
|
|
225
|
+
});
|
|
226
|
+
process.on('SIGTERM', () => {
|
|
227
|
+
flushLogBuffer();
|
|
228
|
+
process.exit(0);
|
|
229
|
+
});
|
|
230
|
+
|
|
231
|
+
/**
|
|
232
|
+
* OPTIMIZED: Conditional logging to reduce console overhead
|
|
233
|
+
*/
|
|
234
|
+
const VERBOSE_LOGGING = process.env.VERBOSE_LOGGING === 'true';
|
|
235
|
+
const BATCH_DELAY = parseInt(process.env.BATCH_DELAY) || 100; // Configurable delay between batches
|
|
236
|
+
const PROGRESS_UPDATE_INTERVAL = parseInt(process.env.PROGRESS_UPDATE_INTERVAL) || 10; // Update progress every N items
|
|
237
|
+
|
|
238
|
+
const logVerbose = (message) => {
|
|
239
|
+
if (VERBOSE_LOGGING) {
|
|
240
|
+
console.log(message);
|
|
241
|
+
}
|
|
242
|
+
};
|
|
243
|
+
const batchReadFileStats = (filePaths) => {
|
|
244
|
+
const results = [];
|
|
245
|
+
|
|
246
|
+
for (const filePath of filePaths) {
|
|
247
|
+
try {
|
|
248
|
+
const stats = fs.statSync(filePath);
|
|
249
|
+
results.push({ path: filePath, stats, error: null });
|
|
250
|
+
} catch (error) {
|
|
251
|
+
results.push({ path: filePath, stats: null, error: error.message });
|
|
252
|
+
}
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
return results;
|
|
256
|
+
};
|
|
257
|
+
|
|
258
|
+
/**
|
|
259
|
+
* OPTIMIZED: Cache for year/pedimento detection results to avoid redundant parsing
|
|
260
|
+
*/
|
|
261
|
+
const pathDetectionCache = new Map();
|
|
262
|
+
|
|
263
|
+
/**
|
|
264
|
+
* OPTIMIZED: Clear the path detection cache (useful for testing or long-running processes)
|
|
265
|
+
*/
|
|
266
|
+
const clearPathDetectionCache = () => {
|
|
267
|
+
pathDetectionCache.clear();
|
|
268
|
+
};
|
|
269
|
+
|
|
270
|
+
/**
|
|
271
|
+
* OPTIMIZED: Get detection results with caching
|
|
272
|
+
*/
|
|
273
|
+
const getCachedPathDetection = (filePath, basePath) => {
|
|
274
|
+
const cacheKey = `${filePath}|${basePath}`;
|
|
275
|
+
|
|
276
|
+
if (pathDetectionCache.has(cacheKey)) {
|
|
277
|
+
return pathDetectionCache.get(cacheKey);
|
|
188
278
|
}
|
|
279
|
+
|
|
280
|
+
const detection = extractYearAndPedimentoFromPath(filePath, basePath);
|
|
281
|
+
pathDetectionCache.set(cacheKey, detection);
|
|
282
|
+
|
|
283
|
+
return detection;
|
|
189
284
|
};
|
|
190
285
|
|
|
191
286
|
/**
|
|
@@ -276,23 +371,49 @@ const extractYearAndPedimentoFromPath = (filePath, basePath) => {
|
|
|
276
371
|
}
|
|
277
372
|
};
|
|
278
373
|
|
|
374
|
+
/**
|
|
375
|
+
* OPTIMIZED: Get processed paths with caching and buffered log reading
|
|
376
|
+
*/
|
|
377
|
+
let processedPathsCache = null;
|
|
378
|
+
let lastLogModTime = 0;
|
|
379
|
+
|
|
279
380
|
const getProcessedPaths = () => {
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
381
|
+
try {
|
|
382
|
+
// Check if log file exists
|
|
383
|
+
if (!fs.existsSync(logFilePath)) {
|
|
384
|
+
return new Set();
|
|
385
|
+
}
|
|
386
|
+
|
|
387
|
+
// Check if cache is still valid
|
|
388
|
+
const logStats = fs.statSync(logFilePath);
|
|
389
|
+
if (processedPathsCache && logStats.mtime.getTime() === lastLogModTime) {
|
|
390
|
+
return processedPathsCache;
|
|
391
|
+
}
|
|
392
|
+
|
|
393
|
+
// Read and parse log file
|
|
394
|
+
const processed = new Set();
|
|
395
|
+
const content = fs.readFileSync(logFilePath, 'utf-8');
|
|
396
|
+
|
|
397
|
+
// Use more efficient regex with global flag
|
|
398
|
+
const regex = /(SUCCESS|SKIPPED): .*? -> (.+)/g;
|
|
399
|
+
let match;
|
|
400
|
+
|
|
401
|
+
while ((match = regex.exec(content)) !== null) {
|
|
402
|
+
const path = match[2];
|
|
289
403
|
if (path) {
|
|
290
404
|
processed.add(path.trim());
|
|
291
405
|
}
|
|
292
406
|
}
|
|
293
|
-
}
|
|
294
407
|
|
|
295
|
-
|
|
408
|
+
// Update cache
|
|
409
|
+
processedPathsCache = processed;
|
|
410
|
+
lastLogModTime = logStats.mtime.getTime();
|
|
411
|
+
|
|
412
|
+
return processed;
|
|
413
|
+
} catch (error) {
|
|
414
|
+
console.error(`β οΈ Error reading processed paths: ${error.message}`);
|
|
415
|
+
return new Set();
|
|
416
|
+
}
|
|
296
417
|
};
|
|
297
418
|
|
|
298
419
|
/**
|
|
@@ -314,6 +435,7 @@ const uploadToApi = async (files, options) => {
|
|
|
314
435
|
|
|
315
436
|
// Nueva funcionalidad: estructura de carpetas personalizada
|
|
316
437
|
let combinedStructure = null;
|
|
438
|
+
let cachedDetection = null; // Cache detection result to avoid redundant calls
|
|
317
439
|
|
|
318
440
|
if (
|
|
319
441
|
options.folderStructure &&
|
|
@@ -322,12 +444,10 @@ const uploadToApi = async (files, options) => {
|
|
|
322
444
|
) {
|
|
323
445
|
// Combine custom folder structure with auto-detection
|
|
324
446
|
const firstFile = files[0];
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
if (detection.detected) {
|
|
330
|
-
const autoStructure = `${detection.year}/${detection.pedimento}`;
|
|
447
|
+
cachedDetection = getCachedPathDetection(firstFile.path, process.cwd());
|
|
448
|
+
|
|
449
|
+
if (cachedDetection.detected) {
|
|
450
|
+
const autoStructure = `${cachedDetection.year}/${cachedDetection.pedimento}`;
|
|
331
451
|
combinedStructure = `${options.folderStructure}/${autoStructure}`;
|
|
332
452
|
formData.append('folderStructure', combinedStructure);
|
|
333
453
|
console.log(
|
|
@@ -346,12 +466,10 @@ const uploadToApi = async (files, options) => {
|
|
|
346
466
|
} else if (options.autoDetectStructure && files.length > 0) {
|
|
347
467
|
// Try to auto-detect from the first file if no explicit structure is provided
|
|
348
468
|
const firstFile = files[0];
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
if (detection.detected) {
|
|
354
|
-
const autoStructure = `${detection.year}/${detection.pedimento}`;
|
|
469
|
+
cachedDetection = getCachedPathDetection(firstFile.path, process.cwd());
|
|
470
|
+
|
|
471
|
+
if (cachedDetection.detected) {
|
|
472
|
+
const autoStructure = `${cachedDetection.year}/${cachedDetection.pedimento}`;
|
|
355
473
|
formData.append('folderStructure', autoStructure);
|
|
356
474
|
}
|
|
357
475
|
}
|
|
@@ -420,9 +538,10 @@ const insertStatsToUploaderTable = async (files, options) => {
|
|
|
420
538
|
const records = [];
|
|
421
539
|
|
|
422
540
|
for (const file of files) {
|
|
423
|
-
|
|
541
|
+
// OPTIMIZED: Use pre-computed stats if available, otherwise call fs.statSync
|
|
542
|
+
const stats = file.stats || fs.statSync(file.path);
|
|
424
543
|
const originalPath = options.clientPath || file.path;
|
|
425
|
-
|
|
544
|
+
|
|
426
545
|
// Check if record already exists
|
|
427
546
|
const { data: existingRecords, error: checkError } = await supabase
|
|
428
547
|
.from('uploader')
|
|
@@ -439,7 +558,7 @@ const insertStatsToUploaderTable = async (files, options) => {
|
|
|
439
558
|
console.log(`βοΈ Skipping duplicate: ${path.basename(file.path)}`);
|
|
440
559
|
continue;
|
|
441
560
|
}
|
|
442
|
-
|
|
561
|
+
|
|
443
562
|
// Initialize record with basic file stats
|
|
444
563
|
const record = {
|
|
445
564
|
document_type: null,
|
|
@@ -457,17 +576,17 @@ const insertStatsToUploaderTable = async (files, options) => {
|
|
|
457
576
|
if (detectionService.isSupportedFileType(file.path)) {
|
|
458
577
|
try {
|
|
459
578
|
const detection = await detectionService.detectFile(file.path);
|
|
460
|
-
|
|
579
|
+
|
|
461
580
|
if (detection.detectedType) {
|
|
462
581
|
record.document_type = detection.detectedType;
|
|
463
582
|
record.num_pedimento = detection.detectedPedimento;
|
|
464
583
|
record.status = 'detected';
|
|
465
|
-
|
|
584
|
+
|
|
466
585
|
// Set arela_path for pedimento_simplificado documents
|
|
467
586
|
if (detection.arelaPath) {
|
|
468
587
|
record.arela_path = detection.arelaPath;
|
|
469
588
|
}
|
|
470
|
-
|
|
589
|
+
|
|
471
590
|
// Extract RFC from fields if available
|
|
472
591
|
const rfcField = detection.fields.find(f => f.name === 'rfc' && f.found);
|
|
473
592
|
if (rfcField) {
|
|
@@ -498,7 +617,7 @@ const insertStatsToUploaderTable = async (files, options) => {
|
|
|
498
617
|
}
|
|
499
618
|
|
|
500
619
|
console.log(`πΎ Inserting ${records.length} new records into uploader table...`);
|
|
501
|
-
|
|
620
|
+
|
|
502
621
|
const { data, error } = await supabase
|
|
503
622
|
.from('uploader')
|
|
504
623
|
.insert(records)
|
|
@@ -511,6 +630,266 @@ const insertStatsToUploaderTable = async (files, options) => {
|
|
|
511
630
|
return data;
|
|
512
631
|
};
|
|
513
632
|
|
|
633
|
+
/**
|
|
634
|
+
* OPTIMIZED: Insert ONLY file stats into uploader table (Phase 1)
|
|
635
|
+
* No file reading, no detection - just filesystem metadata
|
|
636
|
+
* Returns summary statistics instead of full records for better performance
|
|
637
|
+
*/
|
|
638
|
+
const insertStatsOnlyToUploaderTable = async (files, options) => {
|
|
639
|
+
if (!supabase) {
|
|
640
|
+
throw new Error('Supabase client not initialized. Stats mode requires Supabase connection.');
|
|
641
|
+
}
|
|
642
|
+
|
|
643
|
+
const batchSize = 1000; // Large batch size for performance
|
|
644
|
+
const allRecords = [];
|
|
645
|
+
|
|
646
|
+
// Prepare all file stats data first - OPTIMIZED to use pre-computed stats
|
|
647
|
+
console.log('π Collecting filesystem stats...');
|
|
648
|
+
for (const file of files) {
|
|
649
|
+
try {
|
|
650
|
+
// Use pre-computed stats if available, otherwise call fs.statSync
|
|
651
|
+
const stats = file.stats || fs.statSync(file.path);
|
|
652
|
+
const originalPath = options.clientPath || file.path;
|
|
653
|
+
const fileExtension = path.extname(file.path).toLowerCase().replace('.', '');
|
|
654
|
+
|
|
655
|
+
const record = {
|
|
656
|
+
document_type: null,
|
|
657
|
+
size: stats.size,
|
|
658
|
+
num_pedimento: null,
|
|
659
|
+
filename: file.originalName || path.basename(file.path),
|
|
660
|
+
original_path: originalPath,
|
|
661
|
+
arela_path: null,
|
|
662
|
+
status: 'fs-stats',
|
|
663
|
+
rfc: null,
|
|
664
|
+
message: null,
|
|
665
|
+
file_extension: fileExtension,
|
|
666
|
+
created_at: new Date().toISOString(),
|
|
667
|
+
modified_at: stats.mtime.toISOString()
|
|
668
|
+
};
|
|
669
|
+
|
|
670
|
+
allRecords.push(record);
|
|
671
|
+
} catch (error) {
|
|
672
|
+
console.error(`β Error reading stats for ${file.path}:`, error.message);
|
|
673
|
+
}
|
|
674
|
+
}
|
|
675
|
+
|
|
676
|
+
if (allRecords.length === 0) {
|
|
677
|
+
console.log('π No file stats to insert');
|
|
678
|
+
return { totalInserted: 0, totalSkipped: 0, totalProcessed: 0 };
|
|
679
|
+
}
|
|
680
|
+
|
|
681
|
+
console.log(`πΎ Bulk inserting ${allRecords.length} file stats in batches of ${batchSize}...`);
|
|
682
|
+
|
|
683
|
+
let totalInserted = 0;
|
|
684
|
+
let totalSkipped = 0;
|
|
685
|
+
|
|
686
|
+
// Process in batches for optimal performance
|
|
687
|
+
for (let i = 0; i < allRecords.length; i += batchSize) {
|
|
688
|
+
const batch = allRecords.slice(i, i + batchSize);
|
|
689
|
+
|
|
690
|
+
try {
|
|
691
|
+
// OPTIMIZED: Use upsert without select to avoid unnecessary data transfer
|
|
692
|
+
const { error, count } = await supabase
|
|
693
|
+
.from('uploader')
|
|
694
|
+
.upsert(batch, {
|
|
695
|
+
onConflict: 'original_path',
|
|
696
|
+
ignoreDuplicates: false,
|
|
697
|
+
count: 'exact'
|
|
698
|
+
});
|
|
699
|
+
|
|
700
|
+
if (error) {
|
|
701
|
+
console.error(`β Error inserting batch ${Math.floor(i / batchSize) + 1}:`, error.message);
|
|
702
|
+
continue;
|
|
703
|
+
}
|
|
704
|
+
|
|
705
|
+
// For upsert operations, we can't easily distinguish between inserts and updates
|
|
706
|
+
// from the count alone, but we can estimate based on the assumption that most
|
|
707
|
+
// operations in --stats-only mode are likely new inserts
|
|
708
|
+
const batchProcessed = batch.length;
|
|
709
|
+
|
|
710
|
+
// Since we're using upsert with ignoreDuplicates: false, the count represents
|
|
711
|
+
// the actual number of rows affected (both inserts and updates)
|
|
712
|
+
const affected = count || batchProcessed;
|
|
713
|
+
|
|
714
|
+
// For simplicity and performance, we'll assume most are new inserts in stats-only mode
|
|
715
|
+
// This is reasonable since stats-only is typically run on new file sets
|
|
716
|
+
totalInserted += affected;
|
|
717
|
+
|
|
718
|
+
console.log(`β
Batch ${Math.floor(i / batchSize) + 1}: ${affected} rows processed`);
|
|
719
|
+
} catch (error) {
|
|
720
|
+
console.error(`β Unexpected error in batch ${Math.floor(i / batchSize) + 1}:`, error.message);
|
|
721
|
+
}
|
|
722
|
+
}
|
|
723
|
+
|
|
724
|
+
// Calculate skipped as difference between total records and inserted
|
|
725
|
+
totalSkipped = allRecords.length - totalInserted;
|
|
726
|
+
|
|
727
|
+
console.log(`π Phase 1 Summary: ${totalInserted} records processed, estimated ${totalSkipped} were updates`);
|
|
728
|
+
|
|
729
|
+
return {
|
|
730
|
+
totalInserted,
|
|
731
|
+
totalSkipped,
|
|
732
|
+
totalProcessed: allRecords.length
|
|
733
|
+
};
|
|
734
|
+
};
|
|
735
|
+
|
|
736
|
+
/**
|
|
737
|
+
* PHASE 2: Process PDF files for pedimento-simplificado detection
|
|
738
|
+
* Only processes files with status 'fs-stats' and file_extension 'pdf'
|
|
739
|
+
*/
|
|
740
|
+
const detectPedimentosInDatabase = async (options = {}) => {
|
|
741
|
+
if (!supabase) {
|
|
742
|
+
throw new Error('Supabase client not initialized.');
|
|
743
|
+
}
|
|
744
|
+
|
|
745
|
+
console.log('π Phase 2: Starting PDF detection for pedimento-simplificado documents...');
|
|
746
|
+
|
|
747
|
+
// Get all PDF files that need detection (status = 'fs-stats' and extension = 'pdf')
|
|
748
|
+
let allPdfRecords = [];
|
|
749
|
+
let hasMore = true;
|
|
750
|
+
let offset = 0;
|
|
751
|
+
const queryBatchSize = 1000;
|
|
752
|
+
|
|
753
|
+
console.log('π₯ Fetching PDF files from database...');
|
|
754
|
+
|
|
755
|
+
while (hasMore) {
|
|
756
|
+
const { data: batch, error: queryError } = await supabase
|
|
757
|
+
.from('uploader')
|
|
758
|
+
.select('id, original_path, filename, file_extension, status')
|
|
759
|
+
.eq('status', 'fs-stats')
|
|
760
|
+
.eq('file_extension', 'pdf')
|
|
761
|
+
.ilike('filename', '%simp%')
|
|
762
|
+
.range(offset, offset + queryBatchSize - 1);
|
|
763
|
+
|
|
764
|
+
if (queryError) {
|
|
765
|
+
throw new Error(`Failed to fetch PDF records: ${queryError.message}`);
|
|
766
|
+
}
|
|
767
|
+
|
|
768
|
+
if (!batch || batch.length === 0) {
|
|
769
|
+
hasMore = false;
|
|
770
|
+
} else {
|
|
771
|
+
allPdfRecords.push(...batch);
|
|
772
|
+
offset += queryBatchSize;
|
|
773
|
+
console.log(`π Fetched ${batch.length} PDF records (total: ${allPdfRecords.length})`);
|
|
774
|
+
}
|
|
775
|
+
}
|
|
776
|
+
|
|
777
|
+
if (allPdfRecords.length === 0) {
|
|
778
|
+
console.log('π No PDF files found for detection');
|
|
779
|
+
return { detectedCount: 0, processedCount: 0, errorCount: 0 };
|
|
780
|
+
}
|
|
781
|
+
|
|
782
|
+
console.log(`π Processing ${allPdfRecords.length} PDF files for detection...`);
|
|
783
|
+
|
|
784
|
+
const detectionService = new FileDetectionService();
|
|
785
|
+
const batchSize = parseInt(options.batchSize) || 10; // Smaller batches for file I/O
|
|
786
|
+
let totalDetected = 0;
|
|
787
|
+
let totalProcessed = 0;
|
|
788
|
+
let totalErrors = 0;
|
|
789
|
+
|
|
790
|
+
// Create progress bar
|
|
791
|
+
const progressBar = new cliProgress.SingleBar({
|
|
792
|
+
format: 'π PDF Detection |{bar}| {percentage}% | {value}/{total} | Detected: {detected} | Errors: {errors}',
|
|
793
|
+
barCompleteChar: 'β',
|
|
794
|
+
barIncompleteChar: 'β',
|
|
795
|
+
hideCursor: true,
|
|
796
|
+
});
|
|
797
|
+
|
|
798
|
+
progressBar.start(allPdfRecords.length, 0, { detected: 0, errors: 0 });
|
|
799
|
+
|
|
800
|
+
// Process files in smaller batches to avoid overwhelming the system
|
|
801
|
+
for (let i = 0; i < allPdfRecords.length; i += batchSize) {
|
|
802
|
+
const batch = allPdfRecords.slice(i, i + batchSize);
|
|
803
|
+
const updatePromises = [];
|
|
804
|
+
|
|
805
|
+
for (const record of batch) {
|
|
806
|
+
try {
|
|
807
|
+
// Check if file still exists
|
|
808
|
+
if (!fs.existsSync(record.original_path)) {
|
|
809
|
+
updatePromises.push(
|
|
810
|
+
supabase
|
|
811
|
+
.from('uploader')
|
|
812
|
+
.update({
|
|
813
|
+
status: 'file-not-found',
|
|
814
|
+
message: 'File no longer exists at original path'
|
|
815
|
+
})
|
|
816
|
+
.eq('id', record.id)
|
|
817
|
+
);
|
|
818
|
+
totalErrors++;
|
|
819
|
+
continue;
|
|
820
|
+
}
|
|
821
|
+
|
|
822
|
+
// Perform detection
|
|
823
|
+
const detection = await detectionService.detectFile(record.original_path);
|
|
824
|
+
totalProcessed++;
|
|
825
|
+
|
|
826
|
+
const updateData = {
|
|
827
|
+
status: detection.detectedType ? 'detected' : 'not-detected',
|
|
828
|
+
document_type: detection.detectedType,
|
|
829
|
+
num_pedimento: detection.detectedPedimento,
|
|
830
|
+
arela_path: detection.arelaPath,
|
|
831
|
+
message: detection.error || null
|
|
832
|
+
};
|
|
833
|
+
|
|
834
|
+
// Extract RFC from fields if available
|
|
835
|
+
if (detection.fields) {
|
|
836
|
+
const rfcField = detection.fields.find(f => f.name === 'rfc' && f.found);
|
|
837
|
+
if (rfcField) {
|
|
838
|
+
updateData.rfc = rfcField.value;
|
|
839
|
+
}
|
|
840
|
+
}
|
|
841
|
+
|
|
842
|
+
if (detection.detectedType) {
|
|
843
|
+
totalDetected++;
|
|
844
|
+
}
|
|
845
|
+
|
|
846
|
+
updatePromises.push(
|
|
847
|
+
supabase
|
|
848
|
+
.from('uploader')
|
|
849
|
+
.update(updateData)
|
|
850
|
+
.eq('id', record.id)
|
|
851
|
+
);
|
|
852
|
+
|
|
853
|
+
} catch (error) {
|
|
854
|
+
console.error(`β Error detecting ${record.filename}:`, error.message);
|
|
855
|
+
totalErrors++;
|
|
856
|
+
|
|
857
|
+
updatePromises.push(
|
|
858
|
+
supabase
|
|
859
|
+
.from('uploader')
|
|
860
|
+
.update({
|
|
861
|
+
status: 'detection-error',
|
|
862
|
+
message: error.message
|
|
863
|
+
})
|
|
864
|
+
.eq('id', record.id)
|
|
865
|
+
);
|
|
866
|
+
}
|
|
867
|
+
}
|
|
868
|
+
|
|
869
|
+
// Execute all updates in parallel for this batch
|
|
870
|
+
try {
|
|
871
|
+
await Promise.all(updatePromises);
|
|
872
|
+
} catch (error) {
|
|
873
|
+
console.error(`β Error updating batch:`, error.message);
|
|
874
|
+
}
|
|
875
|
+
|
|
876
|
+
// Update progress
|
|
877
|
+
progressBar.update(Math.min(i + batchSize, allPdfRecords.length), {
|
|
878
|
+
detected: totalDetected,
|
|
879
|
+
errors: totalErrors
|
|
880
|
+
});
|
|
881
|
+
}
|
|
882
|
+
|
|
883
|
+
progressBar.stop();
|
|
884
|
+
|
|
885
|
+
console.log(`π Phase 2 Summary: ${totalDetected} detected, ${totalProcessed} processed, ${totalErrors} errors`);
|
|
886
|
+
return {
|
|
887
|
+
detectedCount: totalDetected,
|
|
888
|
+
processedCount: totalProcessed,
|
|
889
|
+
errorCount: totalErrors
|
|
890
|
+
};
|
|
891
|
+
};
|
|
892
|
+
|
|
514
893
|
const processFilesInBatches = async (
|
|
515
894
|
files,
|
|
516
895
|
batchSize,
|
|
@@ -528,7 +907,7 @@ const processFilesInBatches = async (
|
|
|
528
907
|
|
|
529
908
|
const messageBuffer = [];
|
|
530
909
|
|
|
531
|
-
const progressBarFormat = options.statsOnly
|
|
910
|
+
const progressBarFormat = options.statsOnly
|
|
532
911
|
? 'π Processing [{bar}] {percentage}% | {value}/{total} files | Stats: {successCount} | Errors: {failureCount} | Duplicates: {skippedCount}'
|
|
533
912
|
: 'π Processing [{bar}] {percentage}% | {value}/{total} files | Success: {successCount} | Errors: {failureCount} | Skipped: {skippedCount}';
|
|
534
913
|
|
|
@@ -546,98 +925,59 @@ const processFilesInBatches = async (
|
|
|
546
925
|
});
|
|
547
926
|
|
|
548
927
|
if (options.statsOnly) {
|
|
549
|
-
// Stats-only mode -
|
|
550
|
-
console.log('π Processing files in stats-only mode...');
|
|
551
|
-
|
|
552
|
-
let totalDetected = 0;
|
|
553
|
-
let totalNotDetected = 0;
|
|
554
|
-
let totalUnsupported = 0;
|
|
555
|
-
let totalDetectionErrors = 0;
|
|
556
|
-
|
|
928
|
+
// OPTIMIZED Stats-only mode - Only read filesystem stats, no file detection
|
|
929
|
+
console.log('π Phase 1: Processing files in optimized stats-only mode (no detection)...');
|
|
930
|
+
|
|
557
931
|
for (let i = 0; i < files.length; i += batchSize) {
|
|
558
932
|
const batch = files.slice(i, i + batchSize);
|
|
559
|
-
|
|
560
|
-
const statsFiles = batch.map((file) => {
|
|
561
|
-
const originalFileName = path.basename(file);
|
|
562
|
-
|
|
563
|
-
return {
|
|
564
|
-
path: file,
|
|
565
|
-
originalName: originalFileName,
|
|
566
|
-
};
|
|
567
|
-
});
|
|
568
933
|
|
|
569
|
-
|
|
570
|
-
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
|
|
574
|
-
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
totalDetected++;
|
|
582
|
-
break;
|
|
583
|
-
case 'not-detected':
|
|
584
|
-
totalNotDetected++;
|
|
585
|
-
break;
|
|
586
|
-
case 'unsupported':
|
|
587
|
-
totalUnsupported++;
|
|
588
|
-
break;
|
|
589
|
-
case 'detection-error':
|
|
590
|
-
totalDetectionErrors++;
|
|
591
|
-
break;
|
|
592
|
-
}
|
|
593
|
-
});
|
|
594
|
-
|
|
595
|
-
statsFiles.forEach((file) => {
|
|
596
|
-
const wasInserted = insertedRecords.some(record =>
|
|
597
|
-
record.original_path === (options.clientPath || file.path)
|
|
598
|
-
);
|
|
599
|
-
if (wasInserted) {
|
|
600
|
-
writeLog(`STATS: ${file.path} -> uploader table`);
|
|
601
|
-
} else {
|
|
602
|
-
writeLog(`DUPLICATE: ${file.path} -> already exists in uploader table`);
|
|
603
|
-
}
|
|
934
|
+
// OPTIMIZED: Batch read file stats to reduce I/O overhead
|
|
935
|
+
const fileStatsResults = batchReadFileStats(batch);
|
|
936
|
+
const statsFiles = fileStatsResults
|
|
937
|
+
.filter(result => result.stats !== null) // Only include files with valid stats
|
|
938
|
+
.map((result) => {
|
|
939
|
+
const originalFileName = path.basename(result.path);
|
|
940
|
+
|
|
941
|
+
return {
|
|
942
|
+
path: result.path,
|
|
943
|
+
originalName: originalFileName,
|
|
944
|
+
stats: result.stats, // Pass pre-computed stats to avoid redundant calls
|
|
945
|
+
};
|
|
604
946
|
});
|
|
605
|
-
|
|
606
|
-
|
|
607
|
-
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
if (options.detect !== false) {
|
|
613
|
-
console.log(` π Detected: ${totalDetected}, Not detected: ${totalNotDetected}, Unsupported: ${totalUnsupported}, Errors: ${totalDetectionErrors}`);
|
|
614
|
-
}
|
|
615
|
-
|
|
616
|
-
} catch (error) {
|
|
617
|
-
totalErrors += statsFiles.length;
|
|
618
|
-
statsFiles.forEach((file) => {
|
|
619
|
-
writeLog(`ERROR: ${file.path}: ${error.message}`);
|
|
620
|
-
messageBuffer.push(`β ${file.originalName}: ${error.message}`);
|
|
947
|
+
|
|
948
|
+
// Log any files that couldn't be read
|
|
949
|
+
const failedFiles = fileStatsResults.filter(result => result.error !== null);
|
|
950
|
+
if (failedFiles.length > 0) {
|
|
951
|
+
console.log(`β οΈ Could not read stats for ${failedFiles.length} files in batch`);
|
|
952
|
+
failedFiles.forEach(failed => {
|
|
953
|
+
console.error(` β ${failed.path}: ${failed.error}`);
|
|
621
954
|
});
|
|
622
955
|
}
|
|
623
956
|
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
failureCount: totalErrors,
|
|
627
|
-
skippedCount: totalSkipped,
|
|
628
|
-
});
|
|
957
|
+
try {
|
|
958
|
+
const result = await insertStatsOnlyToUploaderTable(statsFiles, options);
|
|
629
959
|
|
|
630
|
-
|
|
631
|
-
|
|
960
|
+
totalUploaded += result.totalInserted;
|
|
961
|
+
totalSkipped += result.totalSkipped;
|
|
962
|
+
totalErrors += failedFiles.length; // Count failed file reads as errors
|
|
963
|
+
|
|
964
|
+
progressBar.update(Math.min(i + batch.length, files.length), {
|
|
965
|
+
successCount: totalUploaded,
|
|
966
|
+
failureCount: totalErrors,
|
|
967
|
+
skippedCount: totalSkipped,
|
|
968
|
+
});
|
|
969
|
+
|
|
970
|
+
} catch (error) {
|
|
971
|
+
console.error(`β Error processing stats batch:`, error.message);
|
|
972
|
+
totalErrors += batch.length;
|
|
973
|
+
|
|
974
|
+
progressBar.update(Math.min(i + batch.length, files.length), {
|
|
975
|
+
successCount: totalUploaded,
|
|
976
|
+
failureCount: totalErrors,
|
|
977
|
+
skippedCount: totalSkipped,
|
|
978
|
+
});
|
|
632
979
|
}
|
|
633
980
|
}
|
|
634
|
-
|
|
635
|
-
// Store detection stats for summary
|
|
636
|
-
totalDetected = totalDetected || 0;
|
|
637
|
-
totalNotDetected = totalNotDetected || 0;
|
|
638
|
-
totalUnsupported = totalUnsupported || 0;
|
|
639
|
-
totalDetectionErrors = totalDetectionErrors || 0;
|
|
640
|
-
|
|
641
981
|
} else if (apiMode && !options.forceSupabase) {
|
|
642
982
|
// API Mode - Process in batches
|
|
643
983
|
for (let i = 0; i < files.length; i += batchSize) {
|
|
@@ -661,7 +1001,8 @@ const processFilesInBatches = async (
|
|
|
661
1001
|
|
|
662
1002
|
// Handle combined folder structure + auto-detection
|
|
663
1003
|
if (options.folderStructure && options.autoDetectStructure) {
|
|
664
|
-
|
|
1004
|
+
// OPTIMIZED: Use cached detection to avoid redundant parsing
|
|
1005
|
+
const detection = getCachedPathDetection(file, basePath);
|
|
665
1006
|
if (detection.detected) {
|
|
666
1007
|
const autoStructure = `${detection.year}/${detection.pedimento}`;
|
|
667
1008
|
const combinedStructure = `${options.folderStructure}/${autoStructure}`;
|
|
@@ -669,7 +1010,7 @@ const processFilesInBatches = async (
|
|
|
669
1010
|
combinedStructure,
|
|
670
1011
|
sanitizedFileName,
|
|
671
1012
|
);
|
|
672
|
-
|
|
1013
|
+
logVerbose(
|
|
673
1014
|
`π Combined structure: ${options.folderStructure}/${autoStructure} for ${originalFileName} -> ${uploadPath}`,
|
|
674
1015
|
);
|
|
675
1016
|
} else {
|
|
@@ -678,7 +1019,7 @@ const processFilesInBatches = async (
|
|
|
678
1019
|
options.folderStructure,
|
|
679
1020
|
sanitizedFileName,
|
|
680
1021
|
);
|
|
681
|
-
|
|
1022
|
+
logVerbose(
|
|
682
1023
|
`π Custom structure (auto-detection failed): ${uploadPath}`,
|
|
683
1024
|
);
|
|
684
1025
|
}
|
|
@@ -688,10 +1029,10 @@ const processFilesInBatches = async (
|
|
|
688
1029
|
options.folderStructure,
|
|
689
1030
|
sanitizedFileName,
|
|
690
1031
|
);
|
|
691
|
-
|
|
1032
|
+
logVerbose(`π Custom structure: ${uploadPath}`);
|
|
692
1033
|
} else if (options.autoDetectStructure) {
|
|
693
|
-
// Auto-detect structure from path if enabled
|
|
694
|
-
const detection =
|
|
1034
|
+
// Auto-detect structure from path if enabled - OPTIMIZED: Use cached detection
|
|
1035
|
+
const detection = getCachedPathDetection(file, basePath);
|
|
695
1036
|
if (detection.detected) {
|
|
696
1037
|
const autoStructure = `${detection.year}/${detection.pedimento}`;
|
|
697
1038
|
uploadPath = path.posix.join(autoStructure, sanitizedFileName);
|
|
@@ -737,10 +1078,8 @@ const processFilesInBatches = async (
|
|
|
737
1078
|
|
|
738
1079
|
if (!clientPath && apiFiles.length > 0) {
|
|
739
1080
|
const firstFile = apiFiles[0];
|
|
740
|
-
|
|
741
|
-
|
|
742
|
-
basePath,
|
|
743
|
-
);
|
|
1081
|
+
// OPTIMIZED: Use cached detection to avoid redundant parsing
|
|
1082
|
+
const detection = getCachedPathDetection(firstFile.path, basePath);
|
|
744
1083
|
if (detection.detected) {
|
|
745
1084
|
// clientPath = `${detection.year}/${detection.pedimento}/`;
|
|
746
1085
|
clientPath = path
|
|
@@ -796,7 +1135,7 @@ const processFilesInBatches = async (
|
|
|
796
1135
|
});
|
|
797
1136
|
|
|
798
1137
|
if (i + batchSize < files.length) {
|
|
799
|
-
await new Promise((resolve) => setTimeout(resolve,
|
|
1138
|
+
await new Promise((resolve) => setTimeout(resolve, BATCH_DELAY));
|
|
800
1139
|
}
|
|
801
1140
|
}
|
|
802
1141
|
} else {
|
|
@@ -809,7 +1148,7 @@ const processFilesInBatches = async (
|
|
|
809
1148
|
|
|
810
1149
|
// Handle combined folder structure + auto-detection
|
|
811
1150
|
if (options.folderStructure && options.autoDetectStructure) {
|
|
812
|
-
const detection =
|
|
1151
|
+
const detection = getCachedPathDetection(file, basePath);
|
|
813
1152
|
if (detection.detected) {
|
|
814
1153
|
const autoStructure = `${detection.year}/${detection.pedimento}`;
|
|
815
1154
|
const combinedStructure = `${options.folderStructure}/${autoStructure}`;
|
|
@@ -832,8 +1171,8 @@ const processFilesInBatches = async (
|
|
|
832
1171
|
uploadPath = path.join(options.folderStructure, fileName);
|
|
833
1172
|
console.log(`π Custom structure: ${uploadPath}`);
|
|
834
1173
|
} else if (options.autoDetectStructure) {
|
|
835
|
-
// Auto-detect structure from path if enabled
|
|
836
|
-
const detection =
|
|
1174
|
+
// Auto-detect structure from path if enabled - OPTIMIZED: Use cached detection
|
|
1175
|
+
const detection = getCachedPathDetection(file, basePath);
|
|
837
1176
|
if (detection.detected) {
|
|
838
1177
|
const autoStructure = `${detection.year}/${detection.pedimento}`;
|
|
839
1178
|
const fileName = path.basename(file);
|
|
@@ -943,7 +1282,7 @@ const uploadFilesByRfc = async (options = {}) => {
|
|
|
943
1282
|
const queryBatchSize = 1000;
|
|
944
1283
|
|
|
945
1284
|
console.log('π₯ Fetching all related files (with pagination)...');
|
|
946
|
-
|
|
1285
|
+
|
|
947
1286
|
while (hasMore) {
|
|
948
1287
|
const { data: batch, error: queryError } = await supabase
|
|
949
1288
|
.from('uploader')
|
|
@@ -962,7 +1301,7 @@ const uploadFilesByRfc = async (options = {}) => {
|
|
|
962
1301
|
} else {
|
|
963
1302
|
allRelatedFiles = allRelatedFiles.concat(batch);
|
|
964
1303
|
offset += queryBatchSize;
|
|
965
|
-
|
|
1304
|
+
|
|
966
1305
|
// If we got less than queryBatchSize, we've reached the end
|
|
967
1306
|
if (batch.length < queryBatchSize) {
|
|
968
1307
|
hasMore = false;
|
|
@@ -976,7 +1315,7 @@ const uploadFilesByRfc = async (options = {}) => {
|
|
|
976
1315
|
}
|
|
977
1316
|
|
|
978
1317
|
console.log(`π Found ${allRelatedFiles.length} total files to upload (including supporting documents)`);
|
|
979
|
-
|
|
1318
|
+
|
|
980
1319
|
// Group by RFC and arela_path for better organization
|
|
981
1320
|
const filesByRfc = allRelatedFiles.reduce((acc, record) => {
|
|
982
1321
|
const rfc = record.rfc || 'No RFC';
|
|
@@ -1037,18 +1376,18 @@ const uploadFilesByRfc = async (options = {}) => {
|
|
|
1037
1376
|
const batch = allRelatedFiles.slice(i, i + batchSize);
|
|
1038
1377
|
const batchNumber = Math.floor(i / batchSize) + 1;
|
|
1039
1378
|
const totalBatches = Math.ceil(allRelatedFiles.length / batchSize);
|
|
1040
|
-
|
|
1379
|
+
|
|
1041
1380
|
console.log(`\nπ¦ Processing batch ${batchNumber}/${totalBatches} (${batch.length} files)`);
|
|
1042
1381
|
|
|
1043
1382
|
// Prepare files for upload
|
|
1044
1383
|
const filesToUpload = [];
|
|
1045
|
-
|
|
1384
|
+
|
|
1046
1385
|
for (const record of batch) {
|
|
1047
1386
|
totalProcessed++;
|
|
1048
|
-
|
|
1387
|
+
|
|
1049
1388
|
try {
|
|
1050
1389
|
const originalPath = record.original_path;
|
|
1051
|
-
|
|
1390
|
+
|
|
1052
1391
|
// Check if file exists
|
|
1053
1392
|
if (!fs.existsSync(originalPath)) {
|
|
1054
1393
|
console.log(` β οΈ File not found: ${originalPath}`);
|
|
@@ -1056,24 +1395,24 @@ const uploadFilesByRfc = async (options = {}) => {
|
|
|
1056
1395
|
continue;
|
|
1057
1396
|
}
|
|
1058
1397
|
|
|
1059
|
-
|
|
1398
|
+
// OPTIMIZED: Read file and get size from buffer instead of separate fs.statSync call
|
|
1060
1399
|
const fileBuffer = fs.readFileSync(originalPath);
|
|
1061
|
-
|
|
1400
|
+
|
|
1062
1401
|
filesToUpload.push({
|
|
1063
1402
|
path: originalPath,
|
|
1064
1403
|
buffer: fileBuffer,
|
|
1065
|
-
size:
|
|
1404
|
+
size: fileBuffer.length, // Get size from buffer instead of fs.statSync
|
|
1066
1405
|
name: record.filename,
|
|
1067
1406
|
arelaPath: record.arela_path,
|
|
1068
1407
|
rfc: record.rfc,
|
|
1069
1408
|
documentType: record.document_type,
|
|
1070
1409
|
});
|
|
1071
|
-
|
|
1410
|
+
|
|
1072
1411
|
} catch (error) {
|
|
1073
1412
|
console.error(` β Error reading file ${record.original_path}:`, error.message);
|
|
1074
1413
|
totalErrors++;
|
|
1075
1414
|
}
|
|
1076
|
-
|
|
1415
|
+
|
|
1077
1416
|
if (options.showProgress !== false) {
|
|
1078
1417
|
progressBar.update(totalProcessed, {
|
|
1079
1418
|
uploaded: totalUploaded,
|
|
@@ -1087,9 +1426,9 @@ const uploadFilesByRfc = async (options = {}) => {
|
|
|
1087
1426
|
if (filesToUpload.length > 0) {
|
|
1088
1427
|
try {
|
|
1089
1428
|
console.log(` π Uploading ${filesToUpload.length} files to Arela API...`);
|
|
1090
|
-
|
|
1429
|
+
|
|
1091
1430
|
const formData = new FormData();
|
|
1092
|
-
|
|
1431
|
+
|
|
1093
1432
|
// Add files to form data
|
|
1094
1433
|
filesToUpload.forEach((file, index) => {
|
|
1095
1434
|
formData.append(`files`, file.buffer, {
|
|
@@ -1112,7 +1451,7 @@ const uploadFilesByRfc = async (options = {}) => {
|
|
|
1112
1451
|
// Upload each group separately with its folder structure
|
|
1113
1452
|
for (const [arelaPath, pathFiles] of Object.entries(filesByPath)) {
|
|
1114
1453
|
const pathFormData = new FormData();
|
|
1115
|
-
|
|
1454
|
+
|
|
1116
1455
|
pathFiles.forEach((file) => {
|
|
1117
1456
|
pathFormData.append('files', file.buffer, {
|
|
1118
1457
|
filename: file.name,
|
|
@@ -1121,7 +1460,7 @@ const uploadFilesByRfc = async (options = {}) => {
|
|
|
1121
1460
|
});
|
|
1122
1461
|
|
|
1123
1462
|
// Set folder structure for this group - concatenate custom prefix with arela_path
|
|
1124
|
-
const folderStructure = options.folderStructure
|
|
1463
|
+
const folderStructure = options.folderStructure
|
|
1125
1464
|
? `${options.folderStructure}/${arelaPath}`.replace(/\/+/g, '/').replace(/\/$/, '')
|
|
1126
1465
|
: arelaPath;
|
|
1127
1466
|
pathFormData.append('folderStructure', folderStructure);
|
|
@@ -1149,14 +1488,14 @@ const uploadFilesByRfc = async (options = {}) => {
|
|
|
1149
1488
|
}
|
|
1150
1489
|
|
|
1151
1490
|
const result = await response.json();
|
|
1152
|
-
|
|
1491
|
+
|
|
1153
1492
|
// Check if upload was successful based on stats rather than success field
|
|
1154
1493
|
const isSuccessful = result.stats && result.stats.uploadedCount > 0 && result.stats.errorCount === 0;
|
|
1155
|
-
|
|
1494
|
+
|
|
1156
1495
|
if (isSuccessful) {
|
|
1157
1496
|
console.log(` β
Group uploaded: ${result.stats.uploadedCount} files to ${folderStructure}`);
|
|
1158
1497
|
totalUploaded += result.stats.uploadedCount;
|
|
1159
|
-
|
|
1498
|
+
|
|
1160
1499
|
if (result.stats.detectedCount > 0) {
|
|
1161
1500
|
console.log(` π Files detected: ${result.stats.detectedCount}`);
|
|
1162
1501
|
}
|
|
@@ -1185,7 +1524,7 @@ const uploadFilesByRfc = async (options = {}) => {
|
|
|
1185
1524
|
|
|
1186
1525
|
// Small delay between batches
|
|
1187
1526
|
if (i + batchSize < allRelatedFiles.length) {
|
|
1188
|
-
await new Promise(resolve => setTimeout(resolve,
|
|
1527
|
+
await new Promise(resolve => setTimeout(resolve, BATCH_DELAY));
|
|
1189
1528
|
}
|
|
1190
1529
|
}
|
|
1191
1530
|
|
|
@@ -1263,19 +1602,19 @@ const propagateArelaPath = async (options = {}) => {
|
|
|
1263
1602
|
for (const pedimento of pedimentoRecords) {
|
|
1264
1603
|
try {
|
|
1265
1604
|
totalProcessed++;
|
|
1266
|
-
|
|
1605
|
+
|
|
1267
1606
|
// Extract base path from original_path (remove filename)
|
|
1268
1607
|
const basePath = path.dirname(pedimento.original_path);
|
|
1269
|
-
|
|
1608
|
+
|
|
1270
1609
|
console.log(`\nπ Processing: ${pedimento.filename}`);
|
|
1271
1610
|
console.log(` π Base path: ${basePath}`);
|
|
1272
|
-
|
|
1611
|
+
|
|
1273
1612
|
// Extract folder part from existing arela_path by removing the filename
|
|
1274
1613
|
const existingPath = pedimento.arela_path;
|
|
1275
|
-
const folderArelaPath = existingPath.includes('/') ?
|
|
1276
|
-
existingPath.substring(0, existingPath.lastIndexOf('/')) + '/' :
|
|
1614
|
+
const folderArelaPath = existingPath.includes('/') ?
|
|
1615
|
+
existingPath.substring(0, existingPath.lastIndexOf('/')) + '/' :
|
|
1277
1616
|
existingPath.endsWith('/') ? existingPath : existingPath + '/';
|
|
1278
|
-
|
|
1617
|
+
|
|
1279
1618
|
console.log(` π― Original arela path: ${existingPath}`);
|
|
1280
1619
|
console.log(` π Folder arela path: ${folderArelaPath}`);
|
|
1281
1620
|
|
|
@@ -1299,13 +1638,13 @@ const propagateArelaPath = async (options = {}) => {
|
|
|
1299
1638
|
}
|
|
1300
1639
|
|
|
1301
1640
|
console.log(` π Found ${relatedFiles.length} related files to update:`);
|
|
1302
|
-
|
|
1641
|
+
|
|
1303
1642
|
// Show first 10 files, then indicate if there are more
|
|
1304
1643
|
const filesToShow = relatedFiles.slice(0, 10);
|
|
1305
1644
|
filesToShow.forEach(file => {
|
|
1306
1645
|
console.log(` - ${file.filename}`);
|
|
1307
1646
|
});
|
|
1308
|
-
|
|
1647
|
+
|
|
1309
1648
|
if (relatedFiles.length > 10) {
|
|
1310
1649
|
console.log(` ... and ${relatedFiles.length - 10} more files`);
|
|
1311
1650
|
}
|
|
@@ -1322,7 +1661,7 @@ const propagateArelaPath = async (options = {}) => {
|
|
|
1322
1661
|
const batchIds = fileIds.slice(i, i + BATCH_SIZE);
|
|
1323
1662
|
const batchNumber = Math.floor(i / BATCH_SIZE) + 1;
|
|
1324
1663
|
const totalBatches = Math.ceil(fileIds.length / BATCH_SIZE);
|
|
1325
|
-
|
|
1664
|
+
|
|
1326
1665
|
console.log(` π¦ Batch ${batchNumber}/${totalBatches}: Updating ${batchIds.length} files...`);
|
|
1327
1666
|
|
|
1328
1667
|
try {
|
|
@@ -1422,25 +1761,53 @@ program
|
|
|
1422
1761
|
'Automatically detect year/pedimento from file paths',
|
|
1423
1762
|
)
|
|
1424
1763
|
.option('--client-path <path>', 'Client path for metadata tracking')
|
|
1425
|
-
.option('--stats-only', 'Only read
|
|
1764
|
+
.option('--stats-only', 'Phase 1: Only read filesystem stats and insert to database (no file reading or detection)')
|
|
1426
1765
|
.option('--no-detect', 'Disable document type detection in stats-only mode')
|
|
1427
|
-
.option('--
|
|
1428
|
-
.option('--
|
|
1766
|
+
.option('--detect-pdfs', 'Phase 2: Process PDF files in database for pedimento-simplificado detection')
|
|
1767
|
+
.option('--propagate-arela-path', 'Phase 3: Propagate arela_path from pedimento_simplificado records to related files with same base path')
|
|
1768
|
+
.option('--upload-by-rfc', 'Phase 4: Upload files to Arela API based on RFC values from UPLOAD_RFCS environment variable')
|
|
1769
|
+
.option('--run-all-phases', 'Run all 4 phases in sequence: stats β detect β propagate β upload')
|
|
1429
1770
|
.action(async (options) => {
|
|
1430
1771
|
if (options.version) {
|
|
1431
1772
|
console.log(packageVersion);
|
|
1432
1773
|
process.exit(0);
|
|
1433
1774
|
}
|
|
1434
1775
|
|
|
1776
|
+
// Handle detect-pdfs option (Phase 2)
|
|
1777
|
+
if (options.detectPdfs) {
|
|
1778
|
+
console.log('π Starting Phase 2: PDF Detection');
|
|
1779
|
+
await checkCredentials(true); // Force Supabase mode
|
|
1780
|
+
|
|
1781
|
+
const result = await detectPedimentosInDatabase({
|
|
1782
|
+
batchSize: parseInt(options.batchSize) || 10,
|
|
1783
|
+
});
|
|
1784
|
+
|
|
1785
|
+
console.log(`β
Phase 2 Complete: ${result.detectedCount} detected, ${result.errorCount} errors`);
|
|
1786
|
+
return;
|
|
1787
|
+
}
|
|
1788
|
+
|
|
1789
|
+
// Handle run-all-phases option
|
|
1790
|
+
if (options.runAllPhases) {
|
|
1791
|
+
console.log('π Starting all 4 phases in sequence...');
|
|
1792
|
+
await checkCredentials(true); // Force Supabase mode
|
|
1793
|
+
|
|
1794
|
+
// Phase 1: Stats collection
|
|
1795
|
+
console.log('\nπ === PHASE 1: Filesystem Stats ===');
|
|
1796
|
+
options.statsOnly = true;
|
|
1797
|
+
// Continue with normal processing to run Phase 1
|
|
1798
|
+
|
|
1799
|
+
// The rest will be handled after Phase 1 completes
|
|
1800
|
+
}
|
|
1801
|
+
|
|
1435
1802
|
// Handle propagate-arela-path option
|
|
1436
1803
|
if (options.propagateArelaPath) {
|
|
1437
1804
|
// Initialize Supabase credentials for propagation
|
|
1438
1805
|
await checkCredentials(true); // Force Supabase mode
|
|
1439
|
-
|
|
1806
|
+
|
|
1440
1807
|
const result = await propagateArelaPath({
|
|
1441
1808
|
showProgress: options.showStats || true,
|
|
1442
1809
|
});
|
|
1443
|
-
|
|
1810
|
+
|
|
1444
1811
|
if (result.errorCount > 0) {
|
|
1445
1812
|
process.exit(1);
|
|
1446
1813
|
}
|
|
@@ -1451,7 +1818,7 @@ program
|
|
|
1451
1818
|
if (options.uploadByRfc) {
|
|
1452
1819
|
// RFC upload needs both Supabase (for database queries) and API (for uploads)
|
|
1453
1820
|
await checkCredentials(false); // Initialize API mode
|
|
1454
|
-
|
|
1821
|
+
|
|
1455
1822
|
// Also initialize Supabase for database queries
|
|
1456
1823
|
if (!supabase) {
|
|
1457
1824
|
if (!supabaseUrl || !supabaseKey) {
|
|
@@ -1459,17 +1826,17 @@ program
|
|
|
1459
1826
|
console.error(' Please set SUPABASE_URL and SUPABASE_KEY environment variables.');
|
|
1460
1827
|
process.exit(1);
|
|
1461
1828
|
}
|
|
1462
|
-
|
|
1829
|
+
|
|
1463
1830
|
supabase = createClient(supabaseUrl, supabaseKey);
|
|
1464
1831
|
console.log('β
Connected to Supabase for database queries');
|
|
1465
1832
|
}
|
|
1466
|
-
|
|
1833
|
+
|
|
1467
1834
|
const result = await uploadFilesByRfc({
|
|
1468
1835
|
showProgress: options.showStats || true,
|
|
1469
1836
|
batchSize: parseInt(options.batchSize) || 10,
|
|
1470
1837
|
folderStructure: options.folderStructure,
|
|
1471
1838
|
});
|
|
1472
|
-
|
|
1839
|
+
|
|
1473
1840
|
if (result.errorCount > 0) {
|
|
1474
1841
|
process.exit(1);
|
|
1475
1842
|
}
|
|
@@ -1590,12 +1957,65 @@ program
|
|
|
1590
1957
|
console.log(` π Log file: ${logFilePath}`);
|
|
1591
1958
|
console.log(`${'='.repeat(60)}\n`);
|
|
1592
1959
|
|
|
1593
|
-
|
|
1960
|
+
// Continue with remaining phases if running all phases
|
|
1961
|
+
if (options.runAllPhases && options.statsOnly) {
|
|
1962
|
+
try {
|
|
1963
|
+
// Phase 2: PDF Detection
|
|
1964
|
+
console.log('\nπ === PHASE 2: PDF Detection ===');
|
|
1965
|
+
const detectionResult = await detectPedimentosInDatabase({
|
|
1966
|
+
batchSize: parseInt(options.batchSize) || 10,
|
|
1967
|
+
});
|
|
1968
|
+
console.log(`β
Phase 2 Complete: ${detectionResult.detectedCount} detected, ${detectionResult.errorCount} errors`);
|
|
1969
|
+
|
|
1970
|
+
// Phase 3: Propagate arela_path
|
|
1971
|
+
console.log('\nπ === PHASE 3: Propagate Arela Paths ===');
|
|
1972
|
+
const propagateResult = await propagateArelaPath({
|
|
1973
|
+
showProgress: options.showStats || true,
|
|
1974
|
+
});
|
|
1975
|
+
console.log(`β
Phase 3 Complete: ${propagateResult.updatedCount || 0} paths propagated`);
|
|
1976
|
+
|
|
1977
|
+
// Phase 4: Upload by RFC
|
|
1978
|
+
if (uploadRfcs && uploadRfcs.length > 0) {
|
|
1979
|
+
console.log('\nπ === PHASE 4: Upload by RFC ===');
|
|
1980
|
+
|
|
1981
|
+
// Initialize API mode for uploads
|
|
1982
|
+
await checkCredentials(false);
|
|
1983
|
+
|
|
1984
|
+
const uploadResult = await uploadFilesByRfc({
|
|
1985
|
+
showProgress: options.showStats || true,
|
|
1986
|
+
batchSize: parseInt(options.batchSize) || 10,
|
|
1987
|
+
folderStructure: options.folderStructure,
|
|
1988
|
+
});
|
|
1989
|
+
console.log(`β
Phase 4 Complete: Upload finished`);
|
|
1990
|
+
} else {
|
|
1991
|
+
console.log('\nβ οΈ === PHASE 4: Upload by RFC ===');
|
|
1992
|
+
console.log('β οΈ UPLOAD_RFCS environment variable not configured, skipping Phase 4');
|
|
1993
|
+
}
|
|
1994
|
+
|
|
1995
|
+
console.log('\nπ All 4 phases completed successfully!');
|
|
1996
|
+
|
|
1997
|
+
} catch (error) {
|
|
1998
|
+
console.error(`β Error in multi-phase execution:`, error.message);
|
|
1999
|
+
process.exit(1);
|
|
2000
|
+
}
|
|
2001
|
+
}
|
|
2002
|
+
|
|
2003
|
+
if (options.showStats && (sanitizationCache.size > 0 || pathDetectionCache.size > 0)) {
|
|
1594
2004
|
console.log(`π Performance Statistics:`);
|
|
1595
|
-
|
|
1596
|
-
|
|
1597
|
-
|
|
2005
|
+
if (sanitizationCache.size > 0) {
|
|
2006
|
+
console.log(
|
|
2007
|
+
` ποΈ Sanitization cache entries: ${sanitizationCache.size}`,
|
|
2008
|
+
);
|
|
2009
|
+
}
|
|
2010
|
+
if (pathDetectionCache.size > 0) {
|
|
2011
|
+
console.log(
|
|
2012
|
+
` π Path detection cache entries: ${pathDetectionCache.size}`,
|
|
2013
|
+
);
|
|
2014
|
+
}
|
|
1598
2015
|
}
|
|
2016
|
+
|
|
2017
|
+
// OPTIMIZED: Ensure log buffer is flushed before exit
|
|
2018
|
+
flushLogBuffer();
|
|
1599
2019
|
});
|
|
1600
2020
|
|
|
1601
2021
|
program.parse();
|