@arela/uploader 0.2.0 β†’ 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/index.js CHANGED
@@ -9,6 +9,7 @@ import { globby } from 'globby';
9
9
  import mime from 'mime-types';
10
10
  import fetch from 'node-fetch';
11
11
  import path from 'path';
12
+
12
13
  import { FileDetectionService } from './file-detection.js';
13
14
 
14
15
  config();
@@ -43,6 +44,7 @@ const sources = process.env.UPLOAD_SOURCES?.split('|')
43
44
  .filter(Boolean);
44
45
 
45
46
  // ConfiguraciΓ³n de RFCs para upload
47
+ console.log('πŸ”§ Configured RFCs for upload:', process.env.UPLOAD_RFCS);
46
48
  const uploadRfcs = process.env.UPLOAD_RFCS?.split('|')
47
49
  .map((s) => s.trim())
48
50
  .filter(Boolean);
@@ -179,13 +181,111 @@ const checkCredentials = async (forceSupabase = false) => {
179
181
  };
180
182
 
181
183
  const logFilePath = path.resolve(process.cwd(), 'arela-upload.log');
184
+
185
+ /**
186
+ * OPTIMIZED: Log buffer to reduce I/O operations
187
+ */
188
+ let logBuffer = [];
189
+ const LOG_BUFFER_SIZE = 100; // Flush every 100 log entries
190
+ let lastFlushTime = Date.now();
191
+ const LOG_FLUSH_INTERVAL = 5000; // Flush every 5 seconds
192
+
193
+ const flushLogBuffer = () => {
194
+ if (logBuffer.length === 0) return;
195
+
196
+ try {
197
+ const logContent = logBuffer.join('\n') + '\n';
198
+ fs.appendFileSync(logFilePath, logContent);
199
+ logBuffer = [];
200
+ lastFlushTime = Date.now();
201
+ } catch (error) {
202
+ console.error(`❌ Error writing to log file: ${error.code} | ${error.message} | path: ${logFilePath}`);
203
+ }
204
+ };
205
+
182
206
  const writeLog = (message) => {
183
207
  try {
184
208
  const timestamp = new Date().toISOString();
185
- fs.appendFileSync(logFilePath, `[${timestamp}] ${message}\n`);
209
+ logBuffer.push(`[${timestamp}] ${message}`);
210
+
211
+ // Flush if buffer is full or enough time has passed
212
+ const now = Date.now();
213
+ if (
214
+ logBuffer.length >= LOG_BUFFER_SIZE ||
215
+ now - lastFlushTime >= LOG_FLUSH_INTERVAL
216
+ ) {
217
+ flushLogBuffer();
218
+ }
186
219
  } catch (error) {
187
- console.error(`❌ Error writing to log file: ${error.code} | ${error.message} | path: ${logFilePath}`);
220
+ console.error(`❌ Error buffering log message: ${error.message}`);
221
+ }
222
+ };
223
+
224
+ // Ensure logs are flushed on process exit
225
+ process.on('exit', flushLogBuffer);
226
+ process.on('SIGINT', () => {
227
+ flushLogBuffer();
228
+ process.exit(0);
229
+ });
230
+ process.on('SIGTERM', () => {
231
+ flushLogBuffer();
232
+ process.exit(0);
233
+ });
234
+
235
+ /**
236
+ * OPTIMIZED: Conditional logging to reduce console overhead
237
+ */
238
+ const VERBOSE_LOGGING = process.env.VERBOSE_LOGGING === 'true';
239
+ const BATCH_DELAY = parseInt(process.env.BATCH_DELAY) || 100; // Configurable delay between batches
240
+ const PROGRESS_UPDATE_INTERVAL =
241
+ parseInt(process.env.PROGRESS_UPDATE_INTERVAL) || 10; // Update progress every N items
242
+
243
+ const logVerbose = (message) => {
244
+ if (VERBOSE_LOGGING) {
245
+ console.log(message);
246
+ }
247
+ };
248
+ const batchReadFileStats = (filePaths) => {
249
+ const results = [];
250
+
251
+ for (const filePath of filePaths) {
252
+ try {
253
+ const stats = fs.statSync(filePath);
254
+ results.push({ path: filePath, stats, error: null });
255
+ } catch (error) {
256
+ results.push({ path: filePath, stats: null, error: error.message });
257
+ }
188
258
  }
259
+
260
+ return results;
261
+ };
262
+
263
+ /**
264
+ * OPTIMIZED: Cache for year/pedimento detection results to avoid redundant parsing
265
+ */
266
+ const pathDetectionCache = new Map();
267
+
268
+ /**
269
+ * OPTIMIZED: Clear the path detection cache (useful for testing or long-running processes)
270
+ */
271
+ const clearPathDetectionCache = () => {
272
+ pathDetectionCache.clear();
273
+ };
274
+
275
+ /**
276
+ * OPTIMIZED: Get detection results with caching
277
+ */
278
+ const getCachedPathDetection = (filePath, basePath) => {
279
+ const cacheKey = `${filePath}|${basePath}`;
280
+
281
+ if (pathDetectionCache.has(cacheKey)) {
282
+ return pathDetectionCache.get(cacheKey);
283
+ }
284
+
285
+ const detection = extractYearAndPedimentoFromPath(filePath, basePath);
286
+ pathDetectionCache.set(cacheKey, detection);
287
+
288
+ return detection;
189
289
  };
190
290
 
191
291
  /**
@@ -276,23 +376,49 @@ const extractYearAndPedimentoFromPath = (filePath, basePath) => {
276
376
  }
277
377
  };
278
378
 
379
+ /**
380
+ * OPTIMIZED: Get processed paths with caching and buffered log reading
381
+ */
382
+ let processedPathsCache = null;
383
+ let lastLogModTime = 0;
384
+
279
385
  const getProcessedPaths = () => {
280
- const processed = new Set();
281
- const lines = fs.existsSync(logFilePath)
282
- ? fs.readFileSync(logFilePath, 'utf-8').split('\n')
283
- : [];
284
-
285
- for (const line of lines) {
286
- const match = line.match(/(SUCCESS|SKIPPED): .*? -> (.+)/);
287
- if (match) {
288
- const [, , path] = match;
386
+ try {
387
+ // Check if log file exists
388
+ if (!fs.existsSync(logFilePath)) {
389
+ return new Set();
390
+ }
391
+
392
+ // Check if cache is still valid
393
+ const logStats = fs.statSync(logFilePath);
394
+ if (processedPathsCache && logStats.mtime.getTime() === lastLogModTime) {
395
+ return processedPathsCache;
396
+ }
397
+
398
+ // Read and parse log file
399
+ const processed = new Set();
400
+ const content = fs.readFileSync(logFilePath, 'utf-8');
401
+
402
+ // Use more efficient regex with global flag
403
+ const regex = /(SUCCESS|SKIPPED): .*? -> (.+)/g;
404
+ let match;
405
+
406
+ while ((match = regex.exec(content)) !== null) {
407
+ const path = match[2];
289
408
  if (path) {
290
409
  processed.add(path.trim());
291
410
  }
292
411
  }
293
- }
294
412
 
295
- return processed;
413
+ // Update cache
414
+ processedPathsCache = processed;
415
+ lastLogModTime = logStats.mtime.getTime();
416
+
417
+ return processed;
418
+ } catch (error) {
419
+ console.error(`⚠️ Error reading processed paths: ${error.message}`);
420
+ return new Set();
421
+ }
296
422
  };
297
423
 
298
424
  /**
@@ -314,6 +440,7 @@ const uploadToApi = async (files, options) => {
314
440
 
315
441
  // Nueva funcionalidad: estructura de carpetas personalizada
316
442
  let combinedStructure = null;
443
+ let cachedDetection = null; // Cache detection result to avoid redundant calls
317
444
 
318
445
  if (
319
446
  options.folderStructure &&
@@ -322,12 +449,10 @@ const uploadToApi = async (files, options) => {
322
449
  ) {
323
450
  // Combine custom folder structure with auto-detection
324
451
  const firstFile = files[0];
325
- const detection = extractYearAndPedimentoFromPath(
326
- firstFile.path,
327
- process.cwd(),
328
- );
329
- if (detection.detected) {
330
- const autoStructure = `${detection.year}/${detection.pedimento}`;
452
+ cachedDetection = getCachedPathDetection(firstFile.path, process.cwd());
453
+
454
+ if (cachedDetection.detected) {
455
+ const autoStructure = `${cachedDetection.year}/${cachedDetection.pedimento}`;
331
456
  combinedStructure = `${options.folderStructure}/${autoStructure}`;
332
457
  formData.append('folderStructure', combinedStructure);
333
458
  console.log(
@@ -346,12 +471,10 @@ const uploadToApi = async (files, options) => {
346
471
  } else if (options.autoDetectStructure && files.length > 0) {
347
472
  // Try to auto-detect from the first file if no explicit structure is provided
348
473
  const firstFile = files[0];
349
- const detection = extractYearAndPedimentoFromPath(
350
- firstFile.path,
351
- process.cwd(),
352
- );
353
- if (detection.detected) {
354
- const autoStructure = `${detection.year}/${detection.pedimento}`;
474
+ cachedDetection = getCachedPathDetection(firstFile.path, process.cwd());
475
+
476
+ if (cachedDetection.detected) {
477
+ const autoStructure = `${cachedDetection.year}/${cachedDetection.pedimento}`;
355
478
  formData.append('folderStructure', autoStructure);
356
479
  }
357
480
  }
@@ -413,16 +536,19 @@ const uploadToSupabase = async (file, uploadPath) => {
413
536
  */
414
537
  const insertStatsToUploaderTable = async (files, options) => {
415
538
  if (!supabase) {
416
- throw new Error('Supabase client not initialized. Stats mode requires Supabase connection.');
539
+ throw new Error(
540
+ 'Supabase client not initialized. Stats mode requires Supabase connection.',
541
+ );
417
542
  }
418
543
 
419
544
  const detectionService = new FileDetectionService();
420
545
  const records = [];
421
546
 
422
547
  for (const file of files) {
423
- const stats = fs.statSync(file.path);
548
+ // OPTIMIZED: Use pre-computed stats if available, otherwise call fs.statSync
549
+ const stats = file.stats || fs.statSync(file.path);
424
550
  const originalPath = options.clientPath || file.path;
425
-
551
+
426
552
  // Check if record already exists
427
553
  const { data: existingRecords, error: checkError } = await supabase
428
554
  .from('uploader')
@@ -431,7 +557,9 @@ const insertStatsToUploaderTable = async (files, options) => {
431
557
  .limit(1);
432
558
 
433
559
  if (checkError) {
434
- console.error(`❌ Error checking for existing record: ${checkError.message}`);
560
+ console.error(
561
+ `❌ Error checking for existing record: ${checkError.message}`,
562
+ );
435
563
  continue;
436
564
  }
437
565
 
@@ -439,7 +567,7 @@ const insertStatsToUploaderTable = async (files, options) => {
439
567
  console.log(`⏭️ Skipping duplicate: ${path.basename(file.path)}`);
440
568
  continue;
441
569
  }
442
-
570
+
443
571
  // Initialize record with basic file stats
444
572
  const record = {
445
573
  document_type: null,
@@ -450,26 +578,28 @@ const insertStatsToUploaderTable = async (files, options) => {
450
578
  arela_path: null,
451
579
  status: 'stats',
452
580
  rfc: null,
453
- message: null
581
+ message: null,
454
582
  };
455
583
 
456
584
  // Try to detect document type for supported files
457
585
  if (detectionService.isSupportedFileType(file.path)) {
458
586
  try {
459
587
  const detection = await detectionService.detectFile(file.path);
460
-
588
+
461
589
  if (detection.detectedType) {
462
590
  record.document_type = detection.detectedType;
463
591
  record.num_pedimento = detection.detectedPedimento;
464
592
  record.status = 'detected';
465
-
593
+
466
594
  // Set arela_path for pedimento_simplificado documents
467
595
  if (detection.arelaPath) {
468
596
  record.arela_path = detection.arelaPath;
469
597
  }
470
-
598
+
471
599
  // Extract RFC from fields if available
472
- const rfcField = detection.fields.find(f => f.name === 'rfc' && f.found);
600
+ const rfcField = detection.fields.find(
601
+ (f) => f.name === 'rfc' && f.found,
602
+ );
473
603
  if (rfcField) {
474
604
  record.rfc = rfcField.value;
475
605
  }
@@ -497,8 +627,10 @@ const insertStatsToUploaderTable = async (files, options) => {
497
627
  return [];
498
628
  }
499
629
 
500
- console.log(`πŸ’Ύ Inserting ${records.length} new records into uploader table...`);
501
-
630
+ console.log(
631
+ `πŸ’Ύ Inserting ${records.length} new records into uploader table...`,
632
+ );
633
+
502
634
  const { data, error } = await supabase
503
635
  .from('uploader')
504
636
  .insert(records)
@@ -511,6 +643,320 @@ const insertStatsToUploaderTable = async (files, options) => {
511
643
  return data;
512
644
  };
513
645
 
646
+ /**
647
+ * OPTIMIZED: Insert ONLY file stats into uploader table (Phase 1)
648
+ * No file reading, no detection - just filesystem metadata
649
+ * Returns summary statistics instead of full records for better performance
650
+ */
651
+ const insertStatsOnlyToUploaderTable = async (files, options) => {
652
+ if (!supabase) {
653
+ throw new Error(
654
+ 'Supabase client not initialized. Stats mode requires Supabase connection.',
655
+ );
656
+ }
657
+
658
+ const batchSize = 1000; // Large batch size for performance
659
+ const allRecords = [];
660
+
661
+ // Prepare all file stats data first - OPTIMIZED to use pre-computed stats
662
+ console.log('πŸ“Š Collecting filesystem stats...');
663
+ for (const file of files) {
664
+ try {
665
+ // Use pre-computed stats if available, otherwise call fs.statSync
666
+ const stats = file.stats || fs.statSync(file.path);
667
+ const originalPath = options.clientPath || file.path;
668
+ const fileExtension = path
669
+ .extname(file.path)
670
+ .toLowerCase()
671
+ .replace('.', '');
672
+
673
+ const record = {
674
+ document_type: null,
675
+ size: stats.size,
676
+ num_pedimento: null,
677
+ filename: file.originalName || path.basename(file.path),
678
+ original_path: originalPath,
679
+ arela_path: null,
680
+ status: 'fs-stats',
681
+ rfc: null,
682
+ message: null,
683
+ file_extension: fileExtension,
684
+ created_at: new Date().toISOString(),
685
+ modified_at: stats.mtime.toISOString(),
686
+ };
687
+
688
+ allRecords.push(record);
689
+ } catch (error) {
690
+ console.error(`❌ Error reading stats for ${file.path}:`, error.message);
691
+ }
692
+ }
693
+
694
+ if (allRecords.length === 0) {
695
+ console.log('πŸ“ No file stats to insert');
696
+ return { totalInserted: 0, totalSkipped: 0, totalProcessed: 0 };
697
+ }
698
+
699
+ console.log(
700
+ `πŸ’Ύ Bulk inserting ${allRecords.length} file stats in batches of ${batchSize}...`,
701
+ );
702
+
703
+ let totalInserted = 0;
704
+ let totalSkipped = 0;
705
+
706
+ // Process in batches for optimal performance
707
+ for (let i = 0; i < allRecords.length; i += batchSize) {
708
+ const batch = allRecords.slice(i, i + batchSize);
709
+
710
+ try {
711
+ // OPTIMIZED: Use upsert without select to avoid unnecessary data transfer
712
+ const { error, count } = await supabase.from('uploader').upsert(batch, {
713
+ onConflict: 'original_path',
714
+ ignoreDuplicates: false,
715
+ count: 'exact',
716
+ });
717
+
718
+ if (error) {
719
+ console.error(
720
+ `❌ Error inserting batch ${Math.floor(i / batchSize) + 1}:`,
721
+ error.message,
722
+ );
723
+ continue;
724
+ }
725
+
726
+ // For upsert operations, we can't easily distinguish between inserts and updates
727
+ // from the count alone, but we can estimate based on the assumption that most
728
+ // operations in --stats-only mode are likely new inserts
729
+ const batchProcessed = batch.length;
730
+
731
+ // Since we're using upsert with ignoreDuplicates: false, the count represents
732
+ // the actual number of rows affected (both inserts and updates)
733
+ const affected = count || batchProcessed;
734
+
735
+ // For simplicity and performance, we'll assume most are new inserts in stats-only mode
736
+ // This is reasonable since stats-only is typically run on new file sets
737
+ totalInserted += affected;
738
+
739
+ console.log(
740
+ `βœ… Batch ${Math.floor(i / batchSize) + 1}: ${affected} rows processed`,
741
+ );
742
+ } catch (error) {
743
+ console.error(
744
+ `❌ Unexpected error in batch ${Math.floor(i / batchSize) + 1}:`,
745
+ error.message,
746
+ );
747
+ }
748
+ }
749
+
750
+ // Calculate skipped as difference between total records and inserted
751
+ totalSkipped = allRecords.length - totalInserted;
752
+
753
+ console.log(
754
+ `πŸ“Š Phase 1 Summary: ${totalInserted} records processed, estimated ${totalSkipped} were updates`,
755
+ );
756
+
757
+ return {
758
+ totalInserted,
759
+ totalSkipped,
760
+ totalProcessed: allRecords.length,
761
+ };
762
+ };
763
+
764
+ /**
765
+ * PHASE 2: Process PDF files for pedimento-simplificado detection
766
+ * Only processes files with status 'fs-stats' and file_extension 'pdf'
767
+ * Processes records in chunks of 1000 to avoid loading all records into memory
768
+ */
769
+ const detectPedimentosInDatabase = async (options = {}) => {
770
+ if (!supabase) {
771
+ throw new Error('Supabase client not initialized.');
772
+ }
773
+
774
+ console.log(
775
+ 'πŸ” Phase 2: Starting PDF detection for pedimento-simplificado documents...',
776
+ );
777
+
778
+ const detectionService = new FileDetectionService();
779
+ const processingBatchSize = parseInt(options.batchSize) || 10; // Smaller batches for file I/O
780
+ const queryBatchSize = 1000; // Process 1000 records at a time
781
+
782
+ let totalDetected = 0;
783
+ let totalProcessed = 0;
784
+ let totalErrors = 0;
785
+ let offset = 0;
786
+ let chunkNumber = 1;
787
+
788
+ console.log('οΏ½ Processing PDF files in chunks of 1000 records...');
789
+
790
+ // Process records in chunks of 1000
791
+ while (true) {
792
+ console.log(
793
+ `\nπŸ“₯ Fetching chunk ${chunkNumber} (records ${offset + 1} to ${offset + queryBatchSize})...`,
794
+ );
795
+
796
+ // Fetch next chunk of PDF records
797
+ const { data: pdfRecords, error: queryError } = await supabase
798
+ .from('uploader')
799
+ .select('id, original_path, filename, file_extension, status')
800
+ .eq('status', 'fs-stats')
801
+ .eq('file_extension', 'pdf')
802
+ .ilike('filename', '%simp%')
803
+ .range(offset, offset + queryBatchSize - 1);
804
+
805
+ if (queryError) {
806
+ throw new Error(
807
+ `Failed to fetch PDF records chunk ${chunkNumber}: ${queryError.message}`,
808
+ );
809
+ }
810
+
811
+ // If no records found, we're done
812
+ if (!pdfRecords || pdfRecords.length === 0) {
813
+ console.log(`πŸ“ No more PDF files found. Processing completed.`);
814
+ break;
815
+ }
816
+
817
+ console.log(
818
+ `οΏ½ Processing chunk ${chunkNumber}: ${pdfRecords.length} PDF records`,
819
+ );
820
+
821
+ // Create progress bar for this chunk
822
+ const progressBar = new cliProgress.SingleBar({
823
+ format: `πŸ” Chunk ${chunkNumber} |{bar}| {percentage}% | {value}/{total} | Detected: {detected} | Errors: {errors}`,
824
+ barCompleteChar: 'β–ˆ',
825
+ barIncompleteChar: 'β–‘',
826
+ hideCursor: true,
827
+ });
828
+
829
+ progressBar.start(pdfRecords.length, 0, { detected: 0, errors: 0 });
830
+
831
+ let chunkDetected = 0;
832
+ let chunkProcessed = 0;
833
+ let chunkErrors = 0;
834
+
835
+ // Process files in smaller batches within this chunk
836
+ for (let i = 0; i < pdfRecords.length; i += processingBatchSize) {
837
+ const batch = pdfRecords.slice(i, i + processingBatchSize);
838
+ const updatePromises = [];
839
+
840
+ for (const record of batch) {
841
+ try {
842
+ // Check if file still exists
843
+ if (!fs.existsSync(record.original_path)) {
844
+ updatePromises.push(
845
+ supabase
846
+ .from('uploader')
847
+ .update({
848
+ status: 'file-not-found',
849
+ message: 'File no longer exists at original path',
850
+ })
851
+ .eq('id', record.id),
852
+ );
853
+ chunkErrors++;
854
+ totalErrors++;
855
+ continue;
856
+ }
857
+
858
+ // Perform detection
859
+ const detection = await detectionService.detectFile(
860
+ record.original_path,
861
+ );
862
+ chunkProcessed++;
863
+ totalProcessed++;
864
+
865
+ const updateData = {
866
+ status: detection.detectedType ? 'detected' : 'not-detected',
867
+ document_type: detection.detectedType,
868
+ num_pedimento: detection.detectedPedimento,
869
+ arela_path: detection.arelaPath,
870
+ message: detection.error || null,
871
+ };
872
+
873
+ // Extract RFC from fields if available
874
+ if (detection.fields) {
875
+ const rfcField = detection.fields.find(
876
+ (f) => f.name === 'rfc' && f.found,
877
+ );
878
+ if (rfcField) {
879
+ updateData.rfc = rfcField.value;
880
+ }
881
+ }
882
+
883
+ if (detection.detectedType) {
884
+ chunkDetected++;
885
+ totalDetected++;
886
+ }
887
+
888
+ updatePromises.push(
889
+ supabase.from('uploader').update(updateData).eq('id', record.id),
890
+ );
891
+ } catch (error) {
892
+ console.error(
893
+ `❌ Error detecting ${record.filename}:`,
894
+ error.message,
895
+ );
896
+ chunkErrors++;
897
+ totalErrors++;
898
+
899
+ updatePromises.push(
900
+ supabase
901
+ .from('uploader')
902
+ .update({
903
+ status: 'detection-error',
904
+ message: error.message,
905
+ })
906
+ .eq('id', record.id),
907
+ );
908
+ }
909
+ }
910
+
911
+ // Execute all updates in parallel for this batch
912
+ try {
913
+ await Promise.all(updatePromises);
914
+ } catch (error) {
915
+ console.error(
916
+ `❌ Error updating batch in chunk ${chunkNumber}:`,
917
+ error.message,
918
+ );
919
+ }
920
+
921
+ // Update progress for this chunk
922
+ progressBar.update(Math.min(i + processingBatchSize, pdfRecords.length), {
923
+ detected: chunkDetected,
924
+ errors: chunkErrors,
925
+ });
926
+ }
927
+
928
+ progressBar.stop();
929
+
930
+ console.log(
931
+ `βœ… Chunk ${chunkNumber} completed: ${chunkDetected} detected, ${chunkProcessed} processed, ${chunkErrors} errors`,
932
+ );
933
+
934
+ // Move to next chunk
935
+ offset += queryBatchSize;
936
+ chunkNumber++;
937
+
938
+ // If we got fewer records than queryBatchSize, we've reached the end
939
+ if (pdfRecords.length < queryBatchSize) {
940
+ console.log(
941
+ `πŸ“ Reached end of records (chunk had ${pdfRecords.length} records).`,
942
+ );
943
+ break;
944
+ }
945
+
946
+ // Small delay between chunks to avoid overwhelming the database
947
+ await new Promise((resolve) => setTimeout(resolve, 500));
948
+ }
949
+
950
+ console.log(
951
+ `πŸ“Š Phase 2 Summary: ${totalDetected} detected, ${totalProcessed} processed, ${totalErrors} errors`,
952
+ );
953
+ return {
954
+ detectedCount: totalDetected,
955
+ processedCount: totalProcessed,
956
+ errorCount: totalErrors,
957
+ };
958
+ };
959
+
514
960
  const processFilesInBatches = async (
515
961
  files,
516
962
  batchSize,
@@ -528,7 +974,7 @@ const processFilesInBatches = async (
528
974
 
529
975
  const messageBuffer = [];
530
976
 
531
- const progressBarFormat = options.statsOnly
977
+ const progressBarFormat = options.statsOnly
532
978
  ? 'πŸ“Š Processing [{bar}] {percentage}% | {value}/{total} files | Stats: {successCount} | Errors: {failureCount} | Duplicates: {skippedCount}'
533
979
  : 'πŸ“‚ Processing [{bar}] {percentage}% | {value}/{total} files | Success: {successCount} | Errors: {failureCount} | Skipped: {skippedCount}';
534
980
 
@@ -546,98 +992,67 @@ const processFilesInBatches = async (
546
992
  });
547
993
 
548
994
  if (options.statsOnly) {
549
- // Stats-only mode - Read file stats and insert to uploader table
550
- console.log('πŸ“Š Processing files in stats-only mode...');
551
-
552
- let totalDetected = 0;
553
- let totalNotDetected = 0;
554
- let totalUnsupported = 0;
555
- let totalDetectionErrors = 0;
556
-
995
+ // OPTIMIZED Stats-only mode - Only read filesystem stats, no file detection
996
+ console.log(
997
+ 'πŸ“Š Phase 1: Processing files in optimized stats-only mode (no detection)...',
998
+ );
999
+
557
1000
  for (let i = 0; i < files.length; i += batchSize) {
558
1001
  const batch = files.slice(i, i + batchSize);
559
-
560
- const statsFiles = batch.map((file) => {
561
- const originalFileName = path.basename(file);
562
-
563
- return {
564
- path: file,
565
- originalName: originalFileName,
566
- };
567
- });
568
1002
 
569
- try {
570
- const insertedRecords = await insertStatsToUploaderTable(statsFiles, options);
571
- const actualInserted = insertedRecords.length;
572
- const skippedDuplicates = statsFiles.length - actualInserted;
573
-
574
- totalUploaded += actualInserted;
575
- totalSkipped += skippedDuplicates;
576
-
577
- // Count detection results from inserted records
578
- insertedRecords.forEach(record => {
579
- switch (record.status) {
580
- case 'detected':
581
- totalDetected++;
582
- break;
583
- case 'not-detected':
584
- totalNotDetected++;
585
- break;
586
- case 'unsupported':
587
- totalUnsupported++;
588
- break;
589
- case 'detection-error':
590
- totalDetectionErrors++;
591
- break;
592
- }
593
- });
594
-
595
- statsFiles.forEach((file) => {
596
- const wasInserted = insertedRecords.some(record =>
597
- record.original_path === (options.clientPath || file.path)
598
- );
599
- if (wasInserted) {
600
- writeLog(`STATS: ${file.path} -> uploader table`);
601
- } else {
602
- writeLog(`DUPLICATE: ${file.path} -> already exists in uploader table`);
603
- }
1003
+ // OPTIMIZED: Batch read file stats to reduce I/O overhead
1004
+ const fileStatsResults = batchReadFileStats(batch);
1005
+ const statsFiles = fileStatsResults
1006
+ .filter((result) => result.stats !== null) // Only include files with valid stats
1007
+ .map((result) => {
1008
+ const originalFileName = path.basename(result.path);
1009
+
1010
+ return {
1011
+ path: result.path,
1012
+ originalName: originalFileName,
1013
+ stats: result.stats, // Pass pre-computed stats to avoid redundant calls
1014
+ };
604
1015
  });
605
-
606
- if (actualInserted > 0) {
607
- console.log(`πŸ“ˆ Inserted ${actualInserted} stats records`);
608
- }
609
- if (skippedDuplicates > 0) {
610
- console.log(`⏭️ Skipped ${skippedDuplicates} duplicates`);
611
- }
612
- if (options.detect !== false) {
613
- console.log(` πŸ” Detected: ${totalDetected}, Not detected: ${totalNotDetected}, Unsupported: ${totalUnsupported}, Errors: ${totalDetectionErrors}`);
614
- }
615
-
616
- } catch (error) {
617
- totalErrors += statsFiles.length;
618
- statsFiles.forEach((file) => {
619
- writeLog(`ERROR: ${file.path}: ${error.message}`);
620
- messageBuffer.push(`❌ ${file.originalName}: ${error.message}`);
1016
+
1017
+ // Log any files that couldn't be read
1018
+ const failedFiles = fileStatsResults.filter(
1019
+ (result) => result.error !== null,
1020
+ );
1021
+ if (failedFiles.length > 0) {
1022
+ console.log(
1023
+ `⚠️ Could not read stats for ${failedFiles.length} files in batch`,
1024
+ );
1025
+ failedFiles.forEach((failed) => {
1026
+ console.error(` ❌ ${failed.path}: ${failed.error}`);
621
1027
  });
622
1028
  }
623
1029
 
624
- progressBar.update(i + batch.length, {
625
- successCount: totalUploaded,
626
- failureCount: totalErrors,
627
- skippedCount: totalSkipped,
628
- });
1030
+ try {
1031
+ const result = await insertStatsOnlyToUploaderTable(
1032
+ statsFiles,
1033
+ options,
1034
+ );
629
1035
 
630
- if (i + batchSize < files.length) {
631
- await new Promise((resolve) => setTimeout(resolve, 200));
1036
+ totalUploaded += result.totalInserted;
1037
+ totalSkipped += result.totalSkipped;
1038
+ totalErrors += failedFiles.length; // Count failed file reads as errors
1039
+
1040
+ progressBar.update(Math.min(i + batch.length, files.length), {
1041
+ successCount: totalUploaded,
1042
+ failureCount: totalErrors,
1043
+ skippedCount: totalSkipped,
1044
+ });
1045
+ } catch (error) {
1046
+ console.error(`❌ Error processing stats batch:`, error.message);
1047
+ totalErrors += batch.length;
1048
+
1049
+ progressBar.update(Math.min(i + batch.length, files.length), {
1050
+ successCount: totalUploaded,
1051
+ failureCount: totalErrors,
1052
+ skippedCount: totalSkipped,
1053
+ });
632
1054
  }
633
1055
  }
634
-
635
- // Store detection stats for summary
636
- totalDetected = totalDetected || 0;
637
- totalNotDetected = totalNotDetected || 0;
638
- totalUnsupported = totalUnsupported || 0;
639
- totalDetectionErrors = totalDetectionErrors || 0;
640
-
641
1056
  } else if (apiMode && !options.forceSupabase) {
642
1057
  // API Mode - Process in batches
643
1058
  for (let i = 0; i < files.length; i += batchSize) {
@@ -661,7 +1076,8 @@ const processFilesInBatches = async (
661
1076
 
662
1077
  // Handle combined folder structure + auto-detection
663
1078
  if (options.folderStructure && options.autoDetectStructure) {
664
- const detection = extractYearAndPedimentoFromPath(file, basePath);
1079
+ // OPTIMIZED: Use cached detection to avoid redundant parsing
1080
+ const detection = getCachedPathDetection(file, basePath);
665
1081
  if (detection.detected) {
666
1082
  const autoStructure = `${detection.year}/${detection.pedimento}`;
667
1083
  const combinedStructure = `${options.folderStructure}/${autoStructure}`;
@@ -669,7 +1085,7 @@ const processFilesInBatches = async (
669
1085
  combinedStructure,
670
1086
  sanitizedFileName,
671
1087
  );
672
- console.log(
1088
+ logVerbose(
673
1089
  `πŸ“ Combined structure: ${options.folderStructure}/${autoStructure} for ${originalFileName} -> ${uploadPath}`,
674
1090
  );
675
1091
  } else {
@@ -678,7 +1094,7 @@ const processFilesInBatches = async (
678
1094
  options.folderStructure,
679
1095
  sanitizedFileName,
680
1096
  );
681
- console.log(
1097
+ logVerbose(
682
1098
  `πŸ“ Custom structure (auto-detection failed): ${uploadPath}`,
683
1099
  );
684
1100
  }
@@ -688,10 +1104,10 @@ const processFilesInBatches = async (
688
1104
  options.folderStructure,
689
1105
  sanitizedFileName,
690
1106
  );
691
- console.log(`πŸ“ Custom structure: ${uploadPath}`);
1107
+ logVerbose(`πŸ“ Custom structure: ${uploadPath}`);
692
1108
  } else if (options.autoDetectStructure) {
693
- // Auto-detect structure from path if enabled
694
- const detection = extractYearAndPedimentoFromPath(file, basePath);
1109
+ // Auto-detect structure from path if enabled - OPTIMIZED: Use cached detection
1110
+ const detection = getCachedPathDetection(file, basePath);
695
1111
  if (detection.detected) {
696
1112
  const autoStructure = `${detection.year}/${detection.pedimento}`;
697
1113
  uploadPath = path.posix.join(autoStructure, sanitizedFileName);
@@ -737,10 +1153,8 @@ const processFilesInBatches = async (
737
1153
 
738
1154
  if (!clientPath && apiFiles.length > 0) {
739
1155
  const firstFile = apiFiles[0];
740
- const detection = extractYearAndPedimentoFromPath(
741
- firstFile.path,
742
- basePath,
743
- );
1156
+ // OPTIMIZED: Use cached detection to avoid redundant parsing
1157
+ const detection = getCachedPathDetection(firstFile.path, basePath);
744
1158
  if (detection.detected) {
745
1159
  // clientPath = `${detection.year}/${detection.pedimento}/`;
746
1160
  clientPath = path
@@ -796,7 +1210,7 @@ const processFilesInBatches = async (
796
1210
  });
797
1211
 
798
1212
  if (i + batchSize < files.length) {
799
- await new Promise((resolve) => setTimeout(resolve, 200));
1213
+ await new Promise((resolve) => setTimeout(resolve, BATCH_DELAY));
800
1214
  }
801
1215
  }
802
1216
  } else {
@@ -809,7 +1223,7 @@ const processFilesInBatches = async (
809
1223
 
810
1224
  // Handle combined folder structure + auto-detection
811
1225
  if (options.folderStructure && options.autoDetectStructure) {
812
- const detection = extractYearAndPedimentoFromPath(file, basePath);
1226
+ const detection = getCachedPathDetection(file, basePath);
813
1227
  if (detection.detected) {
814
1228
  const autoStructure = `${detection.year}/${detection.pedimento}`;
815
1229
  const combinedStructure = `${options.folderStructure}/${autoStructure}`;
@@ -832,8 +1246,8 @@ const processFilesInBatches = async (
832
1246
  uploadPath = path.join(options.folderStructure, fileName);
833
1247
  console.log(`πŸ“ Custom structure: ${uploadPath}`);
834
1248
  } else if (options.autoDetectStructure) {
835
- // Auto-detect structure from path if enabled
836
- const detection = extractYearAndPedimentoFromPath(file, basePath);
1249
+ // Auto-detect structure from path if enabled - OPTIMIZED: Use cached detection
1250
+ const detection = getCachedPathDetection(file, basePath);
837
1251
  if (detection.detected) {
838
1252
  const autoStructure = `${detection.year}/${detection.pedimento}`;
839
1253
  const fileName = path.basename(file);
@@ -899,13 +1313,19 @@ const uploadFilesByRfc = async (options = {}) => {
899
1313
  }
900
1314
 
901
1315
  if (!API_BASE_URL || !API_TOKEN) {
902
- console.error('❌ Arela API configuration missing. Please set ARELA_API_URL and ARELA_API_TOKEN environment variables.');
1316
+ console.error(
1317
+ '❌ Arela API configuration missing. Please set ARELA_API_URL and ARELA_API_TOKEN environment variables.',
1318
+ );
903
1319
  process.exit(1);
904
1320
  }
905
1321
 
906
1322
  if (!uploadRfcs || uploadRfcs.length === 0) {
907
- console.error('❌ No RFCs specified. Please set UPLOAD_RFCS environment variable with pipe-separated RFC values.');
908
- console.error(' Example: UPLOAD_RFCS="RFC123456789|RFC987654321|RFC555444333"');
1323
+ console.error(
1324
+ '❌ No RFCs specified. Please set UPLOAD_RFCS environment variable with pipe-separated RFC values.',
1325
+ );
1326
+ console.error(
1327
+ ' Example: UPLOAD_RFCS="RFC123456789|RFC987654321|RFC555444333"',
1328
+ );
909
1329
  process.exit(1);
910
1330
  }
911
1331
 
@@ -927,13 +1347,17 @@ const uploadFilesByRfc = async (options = {}) => {
927
1347
 
928
1348
  if (!rfcRecords || rfcRecords.length === 0) {
929
1349
  console.log('ℹ️ No files found for the specified RFCs with arela_path');
930
- console.log(` Make sure files for RFCs [${uploadRfcs.join(', ')}] have been processed and have arela_path values`);
1350
+ console.log(
1351
+ ` Make sure files for RFCs [${uploadRfcs.join(', ')}] have been processed and have arela_path values`,
1352
+ );
931
1353
  return { processedCount: 0, uploadedCount: 0, errorCount: 0 };
932
1354
  }
933
1355
 
934
1356
  // Step 2: Get unique arela_paths from the RFC matches
935
- const uniqueArelaPaths = [...new Set(rfcRecords.map(r => r.arela_path))];
936
- console.log(`οΏ½ Found ${uniqueArelaPaths.length} unique arela_path(s) for the specified RFCs`);
1357
+ const uniqueArelaPaths = [...new Set(rfcRecords.map((r) => r.arela_path))];
1358
+ console.log(
1359
+ `οΏ½ Found ${uniqueArelaPaths.length} unique arela_path(s) for the specified RFCs`,
1360
+ );
937
1361
 
938
1362
  // Step 3: Get ALL files that have these arela_paths (including supporting documents)
939
1363
  // Use pagination to ensure we get all files, regardless of count
@@ -943,7 +1367,7 @@ const uploadFilesByRfc = async (options = {}) => {
943
1367
  const queryBatchSize = 1000;
944
1368
 
945
1369
  console.log('πŸ“₯ Fetching all related files (with pagination)...');
946
-
1370
+
947
1371
  while (hasMore) {
948
1372
  const { data: batch, error: queryError } = await supabase
949
1373
  .from('uploader')
@@ -962,7 +1386,7 @@ const uploadFilesByRfc = async (options = {}) => {
962
1386
  } else {
963
1387
  allRelatedFiles = allRelatedFiles.concat(batch);
964
1388
  offset += queryBatchSize;
965
-
1389
+
966
1390
  // If we got less than queryBatchSize, we've reached the end
967
1391
  if (batch.length < queryBatchSize) {
968
1392
  hasMore = false;
@@ -975,8 +1399,10 @@ const uploadFilesByRfc = async (options = {}) => {
975
1399
  return { processedCount: 0, uploadedCount: 0, errorCount: 0 };
976
1400
  }
977
1401
 
978
- console.log(`πŸ“ Found ${allRelatedFiles.length} total files to upload (including supporting documents)`);
979
-
1402
+ console.log(
1403
+ `πŸ“ Found ${allRelatedFiles.length} total files to upload (including supporting documents)`,
1404
+ );
1405
+
980
1406
  // Group by RFC and arela_path for better organization
981
1407
  const filesByRfc = allRelatedFiles.reduce((acc, record) => {
982
1408
  const rfc = record.rfc || 'No RFC';
@@ -989,8 +1415,12 @@ const uploadFilesByRfc = async (options = {}) => {
989
1415
 
990
1416
  console.log('πŸ“Š Files by RFC (including supporting documents):');
991
1417
  for (const [rfc, files] of Object.entries(filesByRfc)) {
992
- const documentTypes = [...new Set(files.map(f => f.document_type || 'Unknown'))];
993
- console.log(` ${rfc}: ${files.length} files (${documentTypes.join(', ')})`);
1418
+ const documentTypes = [
1419
+ ...new Set(files.map((f) => f.document_type || 'Unknown')),
1420
+ ];
1421
+ console.log(
1422
+ ` ${rfc}: ${files.length} files (${documentTypes.join(', ')})`,
1423
+ );
994
1424
  }
995
1425
 
996
1426
  // Group by arela_path for upload organization
@@ -1015,7 +1445,8 @@ const uploadFilesByRfc = async (options = {}) => {
1015
1445
 
1016
1446
  // Create progress bar
1017
1447
  const progressBar = new cliProgress.SingleBar({
1018
- format: 'πŸš€ Uploading files |{bar}| {percentage}% | {value}/{total} | Uploaded: {uploaded} | Errors: {errors} | Skipped: {skipped}',
1448
+ format:
1449
+ 'πŸš€ Uploading files |{bar}| {percentage}% | {value}/{total} | Uploaded: {uploaded} | Errors: {errors} | Skipped: {skipped}',
1019
1450
  barCompleteChar: 'β–ˆ',
1020
1451
  barIncompleteChar: 'β–‘',
1021
1452
  hideCursor: true,
@@ -1037,18 +1468,20 @@ const uploadFilesByRfc = async (options = {}) => {
1037
1468
  const batch = allRelatedFiles.slice(i, i + batchSize);
1038
1469
  const batchNumber = Math.floor(i / batchSize) + 1;
1039
1470
  const totalBatches = Math.ceil(allRelatedFiles.length / batchSize);
1040
-
1041
- console.log(`\nπŸ“¦ Processing batch ${batchNumber}/${totalBatches} (${batch.length} files)`);
1471
+
1472
+ console.log(
1473
+ `\nπŸ“¦ Processing batch ${batchNumber}/${totalBatches} (${batch.length} files)`,
1474
+ );
1042
1475
 
1043
1476
  // Prepare files for upload
1044
1477
  const filesToUpload = [];
1045
-
1478
+
1046
1479
  for (const record of batch) {
1047
1480
  totalProcessed++;
1048
-
1481
+
1049
1482
  try {
1050
1483
  const originalPath = record.original_path;
1051
-
1484
+
1052
1485
  // Check if file exists
1053
1486
  if (!fs.existsSync(originalPath)) {
1054
1487
  console.log(` ⚠️ File not found: ${originalPath}`);
@@ -1056,24 +1489,26 @@ const uploadFilesByRfc = async (options = {}) => {
1056
1489
  continue;
1057
1490
  }
1058
1491
 
1059
- const fileStats = fs.statSync(originalPath);
1492
+ // OPTIMIZED: Read file and get size from buffer instead of separate fs.statSync call
1060
1493
  const fileBuffer = fs.readFileSync(originalPath);
1061
-
1494
+
1062
1495
  filesToUpload.push({
1063
1496
  path: originalPath,
1064
1497
  buffer: fileBuffer,
1065
- size: fileStats.size,
1498
+ size: fileBuffer.length, // Get size from buffer instead of fs.statSync
1066
1499
  name: record.filename,
1067
1500
  arelaPath: record.arela_path,
1068
1501
  rfc: record.rfc,
1069
1502
  documentType: record.document_type,
1070
1503
  });
1071
-
1072
1504
  } catch (error) {
1073
- console.error(` ❌ Error reading file ${record.original_path}:`, error.message);
1505
+ console.error(
1506
+ ` ❌ Error reading file ${record.original_path}:`,
1507
+ error.message,
1508
+ );
1074
1509
  totalErrors++;
1075
1510
  }
1076
-
1511
+
1077
1512
  if (options.showProgress !== false) {
1078
1513
  progressBar.update(totalProcessed, {
1079
1514
  uploaded: totalUploaded,
@@ -1086,10 +1521,12 @@ const uploadFilesByRfc = async (options = {}) => {
1086
1521
  // Upload the batch if we have files
1087
1522
  if (filesToUpload.length > 0) {
1088
1523
  try {
1089
- console.log(` πŸš€ Uploading ${filesToUpload.length} files to Arela API...`);
1090
-
1524
+ console.log(
1525
+ ` πŸš€ Uploading ${filesToUpload.length} files to Arela API...`,
1526
+ );
1527
+
1091
1528
  const formData = new FormData();
1092
-
1529
+
1093
1530
  // Add files to form data
1094
1531
  filesToUpload.forEach((file, index) => {
1095
1532
  formData.append(`files`, file.buffer, {
@@ -1112,7 +1549,7 @@ const uploadFilesByRfc = async (options = {}) => {
1112
1549
  // Upload each group separately with its folder structure
1113
1550
  for (const [arelaPath, pathFiles] of Object.entries(filesByPath)) {
1114
1551
  const pathFormData = new FormData();
1115
-
1552
+
1116
1553
  pathFiles.forEach((file) => {
1117
1554
  pathFormData.append('files', file.buffer, {
1118
1555
  filename: file.name,
@@ -1121,8 +1558,10 @@ const uploadFilesByRfc = async (options = {}) => {
1121
1558
  });
1122
1559
 
1123
1560
  // Set folder structure for this group - concatenate custom prefix with arela_path
1124
- const folderStructure = options.folderStructure
1125
- ? `${options.folderStructure}/${arelaPath}`.replace(/\/+/g, '/').replace(/\/$/, '')
1561
+ const folderStructure = options.folderStructure
1562
+ ? `${options.folderStructure}/${arelaPath}`
1563
+ .replace(/\/+/g, '/')
1564
+ .replace(/\/$/, '')
1126
1565
  : arelaPath;
1127
1566
  pathFormData.append('folderStructure', folderStructure);
1128
1567
  pathFormData.append('autoDetect', 'true');
@@ -1133,15 +1572,20 @@ const uploadFilesByRfc = async (options = {}) => {
1133
1572
  pathFormData.append('bucket', bucket);
1134
1573
  }
1135
1574
 
1136
- console.log(` πŸ“ Uploading ${pathFiles.length} files to: ${folderStructure}`);
1575
+ console.log(
1576
+ ` πŸ“ Uploading ${pathFiles.length} files to: ${folderStructure}`,
1577
+ );
1137
1578
 
1138
- const response = await fetch(`${API_BASE_URL}/api/storage/batch-upload-and-process`, {
1139
- method: 'POST',
1140
- headers: {
1141
- 'x-api-key': API_TOKEN,
1579
+ const response = await fetch(
1580
+ `${API_BASE_URL}/api/storage/batch-upload-and-process`,
1581
+ {
1582
+ method: 'POST',
1583
+ headers: {
1584
+ 'x-api-key': API_TOKEN,
1585
+ },
1586
+ body: pathFormData,
1142
1587
  },
1143
- body: pathFormData,
1144
- });
1588
+ );
1145
1589
 
1146
1590
  if (!response.ok) {
1147
1591
  const errorText = await response.text();
@@ -1149,24 +1593,33 @@ const uploadFilesByRfc = async (options = {}) => {
1149
1593
  }
1150
1594
 
1151
1595
  const result = await response.json();
1152
-
1596
+
1153
1597
  // Check if upload was successful based on stats rather than success field
1154
- const isSuccessful = result.stats && result.stats.uploadedCount > 0 && result.stats.errorCount === 0;
1155
-
1598
+ const isSuccessful =
1599
+ result.stats &&
1600
+ result.stats.uploadedCount > 0 &&
1601
+ result.stats.errorCount === 0;
1602
+
1156
1603
  if (isSuccessful) {
1157
- console.log(` βœ… Group uploaded: ${result.stats.uploadedCount} files to ${folderStructure}`);
1604
+ console.log(
1605
+ ` βœ… Group uploaded: ${result.stats.uploadedCount} files to ${folderStructure}`,
1606
+ );
1158
1607
  totalUploaded += result.stats.uploadedCount;
1159
-
1608
+
1160
1609
  if (result.stats.detectedCount > 0) {
1161
- console.log(` πŸ” Files detected: ${result.stats.detectedCount}`);
1610
+ console.log(
1611
+ ` πŸ” Files detected: ${result.stats.detectedCount}`,
1612
+ );
1162
1613
  }
1163
1614
  if (result.stats.organizedCount > 0) {
1164
- console.log(` πŸ“ Files organized: ${result.stats.organizedCount}`);
1615
+ console.log(
1616
+ ` πŸ“ Files organized: ${result.stats.organizedCount}`,
1617
+ );
1165
1618
  }
1166
1619
  } else {
1167
1620
  console.error(` ❌ Upload failed for ${folderStructure}:`);
1168
1621
  if (result.errors && result.errors.length > 0) {
1169
- result.errors.forEach(error => {
1622
+ result.errors.forEach((error) => {
1170
1623
  console.error(` - ${error.fileName}: ${error.error}`);
1171
1624
  });
1172
1625
  }
@@ -1174,18 +1627,20 @@ const uploadFilesByRfc = async (options = {}) => {
1174
1627
  }
1175
1628
 
1176
1629
  // Small delay between path groups
1177
- await new Promise(resolve => setTimeout(resolve, 100));
1630
+ await new Promise((resolve) => setTimeout(resolve, 100));
1178
1631
  }
1179
-
1180
1632
  } catch (error) {
1181
- console.error(` ❌ Error uploading batch ${batchNumber}:`, error.message);
1633
+ console.error(
1634
+ ` ❌ Error uploading batch ${batchNumber}:`,
1635
+ error.message,
1636
+ );
1182
1637
  totalErrors += filesToUpload.length;
1183
1638
  }
1184
1639
  }
1185
1640
 
1186
1641
  // Small delay between batches
1187
1642
  if (i + batchSize < allRelatedFiles.length) {
1188
- await new Promise(resolve => setTimeout(resolve, 200));
1643
+ await new Promise((resolve) => setTimeout(resolve, BATCH_DELAY));
1189
1644
  }
1190
1645
  }
1191
1646
 
@@ -1229,7 +1684,10 @@ const propagateArelaPath = async (options = {}) => {
1229
1684
  .not('arela_path', 'is', null);
1230
1685
 
1231
1686
  if (pedimentoError) {
1232
- console.error('❌ Error fetching pedimento records:', pedimentoError.message);
1687
+ console.error(
1688
+ '❌ Error fetching pedimento records:',
1689
+ pedimentoError.message,
1690
+ );
1233
1691
  return { processedCount: 0, updatedCount: 0, errorCount: 1 };
1234
1692
  }
1235
1693
 
@@ -1238,7 +1696,9 @@ const propagateArelaPath = async (options = {}) => {
1238
1696
  return { processedCount: 0, updatedCount: 0, errorCount: 0 };
1239
1697
  }
1240
1698
 
1241
- console.log(`πŸ“‹ Found ${pedimentoRecords.length} pedimento records with arela_path`);
1699
+ console.log(
1700
+ `πŸ“‹ Found ${pedimentoRecords.length} pedimento records with arela_path`,
1701
+ );
1242
1702
 
1243
1703
  let totalProcessed = 0;
1244
1704
  let totalUpdated = 0;
@@ -1246,7 +1706,8 @@ const propagateArelaPath = async (options = {}) => {
1246
1706
 
1247
1707
  // Create progress bar
1248
1708
  const progressBar = new cliProgress.SingleBar({
1249
- format: 'πŸ”„ Propagating paths |{bar}| {percentage}% | {value}/{total} | Updated: {updated} | Errors: {errors}',
1709
+ format:
1710
+ 'πŸ”„ Propagating paths |{bar}| {percentage}% | {value}/{total} | Updated: {updated} | Errors: {errors}',
1250
1711
  barCompleteChar: 'β–ˆ',
1251
1712
  barIncompleteChar: 'β–‘',
1252
1713
  hideCursor: true,
@@ -1263,19 +1724,21 @@ const propagateArelaPath = async (options = {}) => {
1263
1724
  for (const pedimento of pedimentoRecords) {
1264
1725
  try {
1265
1726
  totalProcessed++;
1266
-
1727
+
1267
1728
  // Extract base path from original_path (remove filename)
1268
1729
  const basePath = path.dirname(pedimento.original_path);
1269
-
1730
+
1270
1731
  console.log(`\nπŸ” Processing: ${pedimento.filename}`);
1271
1732
  console.log(` πŸ“ Base path: ${basePath}`);
1272
-
1733
+
1273
1734
  // Extract folder part from existing arela_path by removing the filename
1274
1735
  const existingPath = pedimento.arela_path;
1275
- const folderArelaPath = existingPath.includes('/') ?
1276
- existingPath.substring(0, existingPath.lastIndexOf('/')) + '/' :
1277
- existingPath.endsWith('/') ? existingPath : existingPath + '/';
1278
-
1736
+ const folderArelaPath = existingPath.includes('/')
1737
+ ? existingPath.substring(0, existingPath.lastIndexOf('/')) + '/'
1738
+ : existingPath.endsWith('/')
1739
+ ? existingPath
1740
+ : existingPath + '/';
1741
+
1279
1742
  console.log(` 🎯 Original arela path: ${existingPath}`);
1280
1743
  console.log(` πŸ“ Folder arela path: ${folderArelaPath}`);
1281
1744
 
@@ -1288,7 +1751,10 @@ const propagateArelaPath = async (options = {}) => {
1288
1751
  .neq('id', pedimento.id); // Exclude the pedimento itself
1289
1752
 
1290
1753
  if (relatedError) {
1291
- console.error(`❌ Error finding related files for ${pedimento.filename}:`, relatedError.message);
1754
+ console.error(
1755
+ `❌ Error finding related files for ${pedimento.filename}:`,
1756
+ relatedError.message,
1757
+ );
1292
1758
  totalErrors++;
1293
1759
  continue;
1294
1760
  }
@@ -1298,32 +1764,38 @@ const propagateArelaPath = async (options = {}) => {
1298
1764
  continue;
1299
1765
  }
1300
1766
 
1301
- console.log(` πŸ“„ Found ${relatedFiles.length} related files to update:`);
1302
-
1767
+ console.log(
1768
+ ` πŸ“„ Found ${relatedFiles.length} related files to update:`,
1769
+ );
1770
+
1303
1771
  // Show first 10 files, then indicate if there are more
1304
1772
  const filesToShow = relatedFiles.slice(0, 10);
1305
- filesToShow.forEach(file => {
1773
+ filesToShow.forEach((file) => {
1306
1774
  console.log(` - ${file.filename}`);
1307
1775
  });
1308
-
1776
+
1309
1777
  if (relatedFiles.length > 10) {
1310
1778
  console.log(` ... and ${relatedFiles.length - 10} more files`);
1311
1779
  }
1312
1780
 
1313
1781
  // Process files in batches to avoid URI length limitations
1314
1782
  const BATCH_SIZE = 50; // Process 50 files at a time
1315
- const fileIds = relatedFiles.map(f => f.id);
1783
+ const fileIds = relatedFiles.map((f) => f.id);
1316
1784
  let batchErrors = 0;
1317
1785
  let batchUpdated = 0;
1318
1786
 
1319
- console.log(` πŸ”„ Processing ${relatedFiles.length} files in batches of ${BATCH_SIZE}...`);
1787
+ console.log(
1788
+ ` πŸ”„ Processing ${relatedFiles.length} files in batches of ${BATCH_SIZE}...`,
1789
+ );
1320
1790
 
1321
1791
  for (let i = 0; i < fileIds.length; i += BATCH_SIZE) {
1322
1792
  const batchIds = fileIds.slice(i, i + BATCH_SIZE);
1323
1793
  const batchNumber = Math.floor(i / BATCH_SIZE) + 1;
1324
1794
  const totalBatches = Math.ceil(fileIds.length / BATCH_SIZE);
1325
-
1326
- console.log(` πŸ“¦ Batch ${batchNumber}/${totalBatches}: Updating ${batchIds.length} files...`);
1795
+
1796
+ console.log(
1797
+ ` πŸ“¦ Batch ${batchNumber}/${totalBatches}: Updating ${batchIds.length} files...`,
1798
+ );
1327
1799
 
1328
1800
  try {
1329
1801
  const { error: updateError } = await supabase
@@ -1332,33 +1804,45 @@ const propagateArelaPath = async (options = {}) => {
1332
1804
  .in('id', batchIds);
1333
1805
 
1334
1806
  if (updateError) {
1335
- console.error(` ❌ Error in batch ${batchNumber}:`, updateError.message);
1807
+ console.error(
1808
+ ` ❌ Error in batch ${batchNumber}:`,
1809
+ updateError.message,
1810
+ );
1336
1811
  batchErrors++;
1337
1812
  } else {
1338
- console.log(` βœ… Batch ${batchNumber} completed: ${batchIds.length} files updated`);
1813
+ console.log(
1814
+ ` βœ… Batch ${batchNumber} completed: ${batchIds.length} files updated`,
1815
+ );
1339
1816
  batchUpdated += batchIds.length;
1340
1817
  }
1341
1818
  } catch (error) {
1342
- console.error(` ❌ Exception in batch ${batchNumber}:`, error.message);
1819
+ console.error(
1820
+ ` ❌ Exception in batch ${batchNumber}:`,
1821
+ error.message,
1822
+ );
1343
1823
  batchErrors++;
1344
1824
  }
1345
1825
 
1346
1826
  // Small delay between batches to avoid overwhelming the database
1347
1827
  if (i + BATCH_SIZE < fileIds.length) {
1348
- await new Promise(resolve => setTimeout(resolve, 100));
1828
+ await new Promise((resolve) => setTimeout(resolve, 100));
1349
1829
  }
1350
1830
  }
1351
1831
 
1352
1832
  if (batchErrors > 0) {
1353
- console.error(`❌ ${batchErrors} batch(es) failed for ${pedimento.filename}`);
1833
+ console.error(
1834
+ `❌ ${batchErrors} batch(es) failed for ${pedimento.filename}`,
1835
+ );
1354
1836
  totalErrors++;
1355
1837
  } else {
1356
1838
  console.log(` 🎯 Successfully updated ${batchUpdated} related files`);
1357
1839
  totalUpdated += batchUpdated;
1358
1840
  }
1359
-
1360
1841
  } catch (error) {
1361
- console.error(`❌ Error processing ${pedimento.filename}:`, error.message);
1842
+ console.error(
1843
+ `❌ Error processing ${pedimento.filename}:`,
1844
+ error.message,
1845
+ );
1362
1846
  totalErrors++;
1363
1847
  }
1364
1848
 
@@ -1422,25 +1906,70 @@ program
1422
1906
  'Automatically detect year/pedimento from file paths',
1423
1907
  )
1424
1908
  .option('--client-path <path>', 'Client path for metadata tracking')
1425
- .option('--stats-only', 'Only read file stats and insert to uploader table, skip file upload')
1909
+ .option(
1910
+ '--stats-only',
1911
+ 'Phase 1: Only read filesystem stats and insert to database (no file reading or detection)',
1912
+ )
1426
1913
  .option('--no-detect', 'Disable document type detection in stats-only mode')
1427
- .option('--propagate-arela-path', 'Propagate arela_path from pedimento_simplificado records to related files with same base path')
1428
- .option('--upload-by-rfc', 'Upload files to Arela API based on RFC values from UPLOAD_RFCS environment variable')
1914
+ .option(
1915
+ '--detect-pdfs',
1916
+ 'Phase 2: Process PDF files in database for pedimento-simplificado detection',
1917
+ )
1918
+ .option(
1919
+ '--propagate-arela-path',
1920
+ 'Phase 3: Propagate arela_path from pedimento_simplificado records to related files with same base path',
1921
+ )
1922
+ .option(
1923
+ '--upload-by-rfc',
1924
+ 'Phase 4: Upload files to Arela API based on RFC values from UPLOAD_RFCS environment variable',
1925
+ )
1926
+ .option(
1927
+ '--run-all-phases',
1928
+ 'Run all 4 phases in sequence: stats β†’ detect β†’ propagate β†’ upload',
1929
+ )
1429
1930
  .action(async (options) => {
1430
1931
  if (options.version) {
1431
1932
  console.log(packageVersion);
1432
1933
  process.exit(0);
1433
1934
  }
1434
1935
 
1936
+ // Handle detect-pdfs option (Phase 2)
1937
+ if (options.detectPdfs) {
1938
+ console.log('πŸ” Starting Phase 2: PDF Detection');
1939
+ await checkCredentials(true); // Force Supabase mode
1940
+
1941
+ const result = await detectPedimentosInDatabase({
1942
+ batchSize: parseInt(options.batchSize) || 10,
1943
+ });
1944
+
1945
+ console.log(
1946
+ `βœ… Phase 2 Complete: ${result.detectedCount} detected, ${result.errorCount} errors`,
1947
+ );
1948
+ return;
1949
+ }
1950
+
1951
+ // Handle run-all-phases option
1952
+ if (options.runAllPhases) {
1953
+ console.log('πŸš€ Starting all 4 phases in sequence...');
1954
+ await checkCredentials(true); // Force Supabase mode
1955
+
1956
+ // Phase 1: Stats collection
1957
+ console.log('\nπŸ“Š === PHASE 1: Filesystem Stats ===');
1958
+ options.statsOnly = true;
1959
+ // Continue with normal processing to run Phase 1
1960
+
1961
+ // The rest will be handled after Phase 1 completes
1962
+ }
1963
+
1435
1964
  // Handle propagate-arela-path option
1436
1965
  if (options.propagateArelaPath) {
1437
1966
  // Initialize Supabase credentials for propagation
1438
1967
  await checkCredentials(true); // Force Supabase mode
1439
-
1968
+
1440
1969
  const result = await propagateArelaPath({
1441
1970
  showProgress: options.showStats || true,
1442
1971
  });
1443
-
1972
+
1444
1973
  if (result.errorCount > 0) {
1445
1974
  process.exit(1);
1446
1975
  }
@@ -1451,25 +1980,29 @@ program
1451
1980
  if (options.uploadByRfc) {
1452
1981
  // RFC upload needs both Supabase (for database queries) and API (for uploads)
1453
1982
  await checkCredentials(false); // Initialize API mode
1454
-
1983
+
1455
1984
  // Also initialize Supabase for database queries
1456
1985
  if (!supabase) {
1457
1986
  if (!supabaseUrl || !supabaseKey) {
1458
- console.error('❌ RFC upload requires Supabase credentials for database queries.');
1459
- console.error(' Please set SUPABASE_URL and SUPABASE_KEY environment variables.');
1987
+ console.error(
1988
+ '❌ RFC upload requires Supabase credentials for database queries.',
1989
+ );
1990
+ console.error(
1991
+ ' Please set SUPABASE_URL and SUPABASE_KEY environment variables.',
1992
+ );
1460
1993
  process.exit(1);
1461
1994
  }
1462
-
1995
+
1463
1996
  supabase = createClient(supabaseUrl, supabaseKey);
1464
1997
  console.log('βœ… Connected to Supabase for database queries');
1465
1998
  }
1466
-
1999
+
1467
2000
  const result = await uploadFilesByRfc({
1468
2001
  showProgress: options.showStats || true,
1469
2002
  batchSize: parseInt(options.batchSize) || 10,
1470
2003
  folderStructure: options.folderStructure,
1471
2004
  });
1472
-
2005
+
1473
2006
  if (result.errorCount > 0) {
1474
2007
  process.exit(1);
1475
2008
  }
@@ -1490,7 +2023,9 @@ program
1490
2023
  const concurrency = parseInt(options.concurrency) || 10;
1491
2024
 
1492
2025
  if (options.statsOnly) {
1493
- console.log('πŸ“Š Mode: Stats Only - Reading file stats and inserting to uploader table');
2026
+ console.log(
2027
+ 'πŸ“Š Mode: Stats Only - Reading file stats and inserting to uploader table',
2028
+ );
1494
2029
  console.log('🚫 Files will NOT be uploaded');
1495
2030
  if (options.detect !== false) {
1496
2031
  console.log('πŸ” Document type detection ENABLED for supported files');
@@ -1590,12 +2125,73 @@ program
1590
2125
  console.log(` πŸ“œ Log file: ${logFilePath}`);
1591
2126
  console.log(`${'='.repeat(60)}\n`);
1592
2127
 
1593
- if (options.showStats && sanitizationCache.size > 0) {
2128
+ // Continue with remaining phases if running all phases
2129
+ if (options.runAllPhases && options.statsOnly) {
2130
+ try {
2131
+ // Phase 2: PDF Detection
2132
+ console.log('\nπŸ” === PHASE 2: PDF Detection ===');
2133
+ const detectionResult = await detectPedimentosInDatabase({
2134
+ batchSize: parseInt(options.batchSize) || 10,
2135
+ });
2136
+ console.log(
2137
+ `βœ… Phase 2 Complete: ${detectionResult.detectedCount} detected, ${detectionResult.errorCount} errors`,
2138
+ );
2139
+
2140
+ // Phase 3: Propagate arela_path
2141
+ console.log('\nπŸ“ === PHASE 3: Propagate Arela Paths ===');
2142
+ const propagateResult = await propagateArelaPath({
2143
+ showProgress: options.showStats || true,
2144
+ });
2145
+ console.log(
2146
+ `βœ… Phase 3 Complete: ${propagateResult.updatedCount || 0} paths propagated`,
2147
+ );
2148
+
2149
+ // Phase 4: Upload by RFC
2150
+ if (uploadRfcs && uploadRfcs.length > 0) {
2151
+ console.log('\nπŸš€ === PHASE 4: Upload by RFC ===');
2152
+
2153
+ // Initialize API mode for uploads
2154
+ await checkCredentials(false);
2155
+
2156
+ const uploadResult = await uploadFilesByRfc({
2157
+ showProgress: options.showStats || true,
2158
+ batchSize: parseInt(options.batchSize) || 10,
2159
+ folderStructure: options.folderStructure,
2160
+ });
2161
+ console.log(`βœ… Phase 4 Complete: Upload finished`);
2162
+ } else {
2163
+ console.log('\n⚠️ === PHASE 4: Upload by RFC ===');
2164
+ console.log(
2165
+ '⚠️ UPLOAD_RFCS environment variable not configured, skipping Phase 4',
2166
+ );
2167
+ }
2168
+
2169
+ console.log('\nπŸŽ‰ All 4 phases completed successfully!');
2170
+ } catch (error) {
2171
+ console.error(`❌ Error in multi-phase execution:`, error.message);
2172
+ process.exit(1);
2173
+ }
2174
+ }
2175
+
2176
+ if (
2177
+ options.showStats &&
2178
+ (sanitizationCache.size > 0 || pathDetectionCache.size > 0)
2179
+ ) {
1594
2180
  console.log(`πŸ“Š Performance Statistics:`);
1595
- console.log(
1596
- ` πŸ—‚οΈ Sanitization cache entries: ${sanitizationCache.size}`,
1597
- );
2181
+ if (sanitizationCache.size > 0) {
2182
+ console.log(
2183
+ ` πŸ—‚οΈ Sanitization cache entries: ${sanitizationCache.size}`,
2184
+ );
2185
+ }
2186
+ if (pathDetectionCache.size > 0) {
2187
+ console.log(
2188
+ ` πŸ“ Path detection cache entries: ${pathDetectionCache.size}`,
2189
+ );
2190
+ }
1598
2191
  }
2192
+
2193
+ // OPTIMIZED: Ensure log buffer is flushed before exit
2194
+ flushLogBuffer();
1599
2195
  });
1600
2196
 
1601
2197
  program.parse();