@arela/uploader 0.2.1 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/index.js CHANGED
@@ -9,6 +9,7 @@ import { globby } from 'globby';
9
9
  import mime from 'mime-types';
10
10
  import fetch from 'node-fetch';
11
11
  import path from 'path';
12
+
12
13
  import { FileDetectionService } from './file-detection.js';
13
14
 
14
15
  config();
@@ -157,8 +158,8 @@ const checkCredentials = async (forceSupabase = false) => {
157
158
  if (!supabaseUrl || !supabaseKey || !bucket) {
158
159
  console.error(
159
160
  '⚠️ Missing credentials. Please set either:\n' +
160
- ' - ARELA_API_URL and ARELA_API_TOKEN for API mode, or\n' +
161
- ' - SUPABASE_URL, SUPABASE_KEY, and SUPABASE_BUCKET for direct mode',
161
+ ' - ARELA_API_URL and ARELA_API_TOKEN for API mode, or\n' +
162
+ ' - SUPABASE_URL, SUPABASE_KEY, and SUPABASE_BUCKET for direct mode',
162
163
  );
163
164
  process.exit(1);
164
165
  }
@@ -209,7 +210,10 @@ const writeLog = (message) => {
209
210
 
210
211
  // Flush if buffer is full or enough time has passed
211
212
  const now = Date.now();
212
- if (logBuffer.length >= LOG_BUFFER_SIZE || (now - lastFlushTime) >= LOG_FLUSH_INTERVAL) {
213
+ if (
214
+ logBuffer.length >= LOG_BUFFER_SIZE ||
215
+ now - lastFlushTime >= LOG_FLUSH_INTERVAL
216
+ ) {
213
217
  flushLogBuffer();
214
218
  }
215
219
  } catch (error) {
@@ -233,7 +237,8 @@ process.on('SIGTERM', () => {
233
237
  */
234
238
  const VERBOSE_LOGGING = process.env.VERBOSE_LOGGING === 'true';
235
239
  const BATCH_DELAY = parseInt(process.env.BATCH_DELAY) || 100; // Configurable delay between batches
236
- const PROGRESS_UPDATE_INTERVAL = parseInt(process.env.PROGRESS_UPDATE_INTERVAL) || 10; // Update progress every N items
240
+ const PROGRESS_UPDATE_INTERVAL =
241
+ parseInt(process.env.PROGRESS_UPDATE_INTERVAL) || 10; // Update progress every N items
237
242
 
238
243
  const logVerbose = (message) => {
239
244
  if (VERBOSE_LOGGING) {
@@ -531,7 +536,9 @@ const uploadToSupabase = async (file, uploadPath) => {
531
536
  */
532
537
  const insertStatsToUploaderTable = async (files, options) => {
533
538
  if (!supabase) {
534
- throw new Error('Supabase client not initialized. Stats mode requires Supabase connection.');
539
+ throw new Error(
540
+ 'Supabase client not initialized. Stats mode requires Supabase connection.',
541
+ );
535
542
  }
536
543
 
537
544
  const detectionService = new FileDetectionService();
@@ -550,7 +557,9 @@ const insertStatsToUploaderTable = async (files, options) => {
550
557
  .limit(1);
551
558
 
552
559
  if (checkError) {
553
- console.error(`❌ Error checking for existing record: ${checkError.message}`);
560
+ console.error(
561
+ `❌ Error checking for existing record: ${checkError.message}`,
562
+ );
554
563
  continue;
555
564
  }
556
565
 
@@ -569,7 +578,7 @@ const insertStatsToUploaderTable = async (files, options) => {
569
578
  arela_path: null,
570
579
  status: 'stats',
571
580
  rfc: null,
572
- message: null
581
+ message: null,
573
582
  };
574
583
 
575
584
  // Try to detect document type for supported files
@@ -588,7 +597,9 @@ const insertStatsToUploaderTable = async (files, options) => {
588
597
  }
589
598
 
590
599
  // Extract RFC from fields if available
591
- const rfcField = detection.fields.find(f => f.name === 'rfc' && f.found);
600
+ const rfcField = detection.fields.find(
601
+ (f) => f.name === 'rfc' && f.found,
602
+ );
592
603
  if (rfcField) {
593
604
  record.rfc = rfcField.value;
594
605
  }
@@ -616,7 +627,9 @@ const insertStatsToUploaderTable = async (files, options) => {
616
627
  return [];
617
628
  }
618
629
 
619
- console.log(`💾 Inserting ${records.length} new records into uploader table...`);
630
+ console.log(
631
+ `💾 Inserting ${records.length} new records into uploader table...`,
632
+ );
620
633
 
621
634
  const { data, error } = await supabase
622
635
  .from('uploader')
@@ -637,7 +650,9 @@ const insertStatsToUploaderTable = async (files, options) => {
637
650
  */
638
651
  const insertStatsOnlyToUploaderTable = async (files, options) => {
639
652
  if (!supabase) {
640
- throw new Error('Supabase client not initialized. Stats mode requires Supabase connection.');
653
+ throw new Error(
654
+ 'Supabase client not initialized. Stats mode requires Supabase connection.',
655
+ );
641
656
  }
642
657
 
643
658
  const batchSize = 1000; // Large batch size for performance
@@ -650,7 +665,10 @@ const insertStatsOnlyToUploaderTable = async (files, options) => {
650
665
  // Use pre-computed stats if available, otherwise call fs.statSync
651
666
  const stats = file.stats || fs.statSync(file.path);
652
667
  const originalPath = options.clientPath || file.path;
653
- const fileExtension = path.extname(file.path).toLowerCase().replace('.', '');
668
+ const fileExtension = path
669
+ .extname(file.path)
670
+ .toLowerCase()
671
+ .replace('.', '');
654
672
 
655
673
  const record = {
656
674
  document_type: null,
@@ -664,7 +682,7 @@ const insertStatsOnlyToUploaderTable = async (files, options) => {
664
682
  message: null,
665
683
  file_extension: fileExtension,
666
684
  created_at: new Date().toISOString(),
667
- modified_at: stats.mtime.toISOString()
685
+ modified_at: stats.mtime.toISOString(),
668
686
  };
669
687
 
670
688
  allRecords.push(record);
@@ -678,7 +696,9 @@ const insertStatsOnlyToUploaderTable = async (files, options) => {
678
696
  return { totalInserted: 0, totalSkipped: 0, totalProcessed: 0 };
679
697
  }
680
698
 
681
- console.log(`💾 Bulk inserting ${allRecords.length} file stats in batches of ${batchSize}...`);
699
+ console.log(
700
+ `💾 Bulk inserting ${allRecords.length} file stats in batches of ${batchSize}...`,
701
+ );
682
702
 
683
703
  let totalInserted = 0;
684
704
  let totalSkipped = 0;
@@ -689,16 +709,17 @@ const insertStatsOnlyToUploaderTable = async (files, options) => {
689
709
 
690
710
  try {
691
711
  // OPTIMIZED: Use upsert without select to avoid unnecessary data transfer
692
- const { error, count } = await supabase
693
- .from('uploader')
694
- .upsert(batch, {
695
- onConflict: 'original_path',
696
- ignoreDuplicates: false,
697
- count: 'exact'
698
- });
712
+ const { error, count } = await supabase.from('uploader').upsert(batch, {
713
+ onConflict: 'original_path',
714
+ ignoreDuplicates: false,
715
+ count: 'exact',
716
+ });
699
717
 
700
718
  if (error) {
701
- console.error(`❌ Error inserting batch ${Math.floor(i / batchSize) + 1}:`, error.message);
719
+ console.error(
720
+ `❌ Error inserting batch ${Math.floor(i / batchSize) + 1}:`,
721
+ error.message,
722
+ );
702
723
  continue;
703
724
  }
704
725
 
@@ -715,45 +736,65 @@ const insertStatsOnlyToUploaderTable = async (files, options) => {
715
736
  // This is reasonable since stats-only is typically run on new file sets
716
737
  totalInserted += affected;
717
738
 
718
- console.log(`✅ Batch ${Math.floor(i / batchSize) + 1}: ${affected} rows processed`);
739
+ console.log(
740
+ `✅ Batch ${Math.floor(i / batchSize) + 1}: ${affected} rows processed`,
741
+ );
719
742
  } catch (error) {
720
- console.error(`❌ Unexpected error in batch ${Math.floor(i / batchSize) + 1}:`, error.message);
743
+ console.error(
744
+ `❌ Unexpected error in batch ${Math.floor(i / batchSize) + 1}:`,
745
+ error.message,
746
+ );
721
747
  }
722
748
  }
723
749
 
724
750
  // Calculate skipped as difference between total records and inserted
725
751
  totalSkipped = allRecords.length - totalInserted;
726
752
 
727
- console.log(`📊 Phase 1 Summary: ${totalInserted} records processed, estimated ${totalSkipped} were updates`);
753
+ console.log(
754
+ `📊 Phase 1 Summary: ${totalInserted} records processed, estimated ${totalSkipped} were updates`,
755
+ );
728
756
 
729
757
  return {
730
758
  totalInserted,
731
759
  totalSkipped,
732
- totalProcessed: allRecords.length
760
+ totalProcessed: allRecords.length,
733
761
  };
734
762
  };
735
763
 
736
764
  /**
737
765
  * PHASE 2: Process PDF files for pedimento-simplificado detection
738
766
  * Only processes files with status 'fs-stats' and file_extension 'pdf'
767
+ * Processes records in chunks of 1000 to avoid loading all records into memory
739
768
  */
740
769
  const detectPedimentosInDatabase = async (options = {}) => {
741
770
  if (!supabase) {
742
771
  throw new Error('Supabase client not initialized.');
743
772
  }
744
773
 
745
- console.log('🔍 Phase 2: Starting PDF detection for pedimento-simplificado documents...');
774
+ console.log(
775
+ '🔍 Phase 2: Starting PDF detection for pedimento-simplificado documents...',
776
+ );
746
777
 
747
- // Get all PDF files that need detection (status = 'fs-stats' and extension = 'pdf')
748
- let allPdfRecords = [];
749
- let hasMore = true;
778
+ const detectionService = new FileDetectionService();
779
+ const processingBatchSize = parseInt(options.batchSize) || 10; // Smaller batches for file I/O
780
+ const queryBatchSize = 1000; // Process 1000 records at a time
781
+
782
+ let totalDetected = 0;
783
+ let totalProcessed = 0;
784
+ let totalErrors = 0;
750
785
  let offset = 0;
751
- const queryBatchSize = 1000;
786
+ let chunkNumber = 1;
752
787
 
753
- console.log('📥 Fetching PDF files from database...');
788
+ console.log(' Processing PDF files in chunks of 1000 records...');
754
789
 
755
- while (hasMore) {
756
- const { data: batch, error: queryError } = await supabase
790
+ // Process records in chunks of 1000
791
+ while (true) {
792
+ console.log(
793
+ `\n📥 Fetching chunk ${chunkNumber} (records ${offset + 1} to ${offset + queryBatchSize})...`,
794
+ );
795
+
796
+ // Fetch next chunk of PDF records
797
+ const { data: pdfRecords, error: queryError } = await supabase
757
798
  .from('uploader')
758
799
  .select('id, original_path, filename, file_extension, status')
759
800
  .eq('status', 'fs-stats')
@@ -762,131 +803,157 @@ const detectPedimentosInDatabase = async (options = {}) => {
762
803
  .range(offset, offset + queryBatchSize - 1);
763
804
 
764
805
  if (queryError) {
765
- throw new Error(`Failed to fetch PDF records: ${queryError.message}`);
806
+ throw new Error(
807
+ `Failed to fetch PDF records chunk ${chunkNumber}: ${queryError.message}`,
808
+ );
766
809
  }
767
810
 
768
- if (!batch || batch.length === 0) {
769
- hasMore = false;
770
- } else {
771
- allPdfRecords.push(...batch);
772
- offset += queryBatchSize;
773
- console.log(`📄 Fetched ${batch.length} PDF records (total: ${allPdfRecords.length})`);
811
+ // If no records found, we're done
812
+ if (!pdfRecords || pdfRecords.length === 0) {
813
+ console.log(`📝 No more PDF files found. Processing completed.`);
814
+ break;
774
815
  }
775
- }
776
816
 
777
- if (allPdfRecords.length === 0) {
778
- console.log('📝 No PDF files found for detection');
779
- return { detectedCount: 0, processedCount: 0, errorCount: 0 };
780
- }
817
+ console.log(
818
+ `� Processing chunk ${chunkNumber}: ${pdfRecords.length} PDF records`,
819
+ );
781
820
 
782
- console.log(`🔍 Processing ${allPdfRecords.length} PDF files for detection...`);
821
+ // Create progress bar for this chunk
822
+ const progressBar = new cliProgress.SingleBar({
823
+ format: `🔍 Chunk ${chunkNumber} |{bar}| {percentage}% | {value}/{total} | Detected: {detected} | Errors: {errors}`,
824
+ barCompleteChar: '█',
825
+ barIncompleteChar: '░',
826
+ hideCursor: true,
827
+ });
783
828
 
784
- const detectionService = new FileDetectionService();
785
- const batchSize = parseInt(options.batchSize) || 10; // Smaller batches for file I/O
786
- let totalDetected = 0;
787
- let totalProcessed = 0;
788
- let totalErrors = 0;
829
+ progressBar.start(pdfRecords.length, 0, { detected: 0, errors: 0 });
789
830
 
790
- // Create progress bar
791
- const progressBar = new cliProgress.SingleBar({
792
- format: '🔍 PDF Detection |{bar}| {percentage}% | {value}/{total} | Detected: {detected} | Errors: {errors}',
793
- barCompleteChar: '█',
794
- barIncompleteChar: '░',
795
- hideCursor: true,
796
- });
831
+ let chunkDetected = 0;
832
+ let chunkProcessed = 0;
833
+ let chunkErrors = 0;
797
834
 
798
- progressBar.start(allPdfRecords.length, 0, { detected: 0, errors: 0 });
835
+ // Process files in smaller batches within this chunk
836
+ for (let i = 0; i < pdfRecords.length; i += processingBatchSize) {
837
+ const batch = pdfRecords.slice(i, i + processingBatchSize);
838
+ const updatePromises = [];
799
839
 
800
- // Process files in smaller batches to avoid overwhelming the system
801
- for (let i = 0; i < allPdfRecords.length; i += batchSize) {
802
- const batch = allPdfRecords.slice(i, i + batchSize);
803
- const updatePromises = [];
840
+ for (const record of batch) {
841
+ try {
842
+ // Check if file still exists
843
+ if (!fs.existsSync(record.original_path)) {
844
+ updatePromises.push(
845
+ supabase
846
+ .from('uploader')
847
+ .update({
848
+ status: 'file-not-found',
849
+ message: 'File no longer exists at original path',
850
+ })
851
+ .eq('id', record.id),
852
+ );
853
+ chunkErrors++;
854
+ totalErrors++;
855
+ continue;
856
+ }
857
+
858
+ // Perform detection
859
+ const detection = await detectionService.detectFile(
860
+ record.original_path,
861
+ );
862
+ chunkProcessed++;
863
+ totalProcessed++;
864
+
865
+ const updateData = {
866
+ status: detection.detectedType ? 'detected' : 'not-detected',
867
+ document_type: detection.detectedType,
868
+ num_pedimento: detection.detectedPedimento,
869
+ arela_path: detection.arelaPath,
870
+ message: detection.error || null,
871
+ };
872
+
873
+ // Extract RFC from fields if available
874
+ if (detection.fields) {
875
+ const rfcField = detection.fields.find(
876
+ (f) => f.name === 'rfc' && f.found,
877
+ );
878
+ if (rfcField) {
879
+ updateData.rfc = rfcField.value;
880
+ }
881
+ }
882
+
883
+ if (detection.detectedType) {
884
+ chunkDetected++;
885
+ totalDetected++;
886
+ }
887
+
888
+ updatePromises.push(
889
+ supabase.from('uploader').update(updateData).eq('id', record.id),
890
+ );
891
+ } catch (error) {
892
+ console.error(
893
+ `❌ Error detecting ${record.filename}:`,
894
+ error.message,
895
+ );
896
+ chunkErrors++;
897
+ totalErrors++;
804
898
 
805
- for (const record of batch) {
806
- try {
807
- // Check if file still exists
808
- if (!fs.existsSync(record.original_path)) {
809
899
  updatePromises.push(
810
900
  supabase
811
901
  .from('uploader')
812
902
  .update({
813
- status: 'file-not-found',
814
- message: 'File no longer exists at original path'
903
+ status: 'detection-error',
904
+ message: error.message,
815
905
  })
816
- .eq('id', record.id)
906
+ .eq('id', record.id),
817
907
  );
818
- totalErrors++;
819
- continue;
820
- }
821
-
822
- // Perform detection
823
- const detection = await detectionService.detectFile(record.original_path);
824
- totalProcessed++;
825
-
826
- const updateData = {
827
- status: detection.detectedType ? 'detected' : 'not-detected',
828
- document_type: detection.detectedType,
829
- num_pedimento: detection.detectedPedimento,
830
- arela_path: detection.arelaPath,
831
- message: detection.error || null
832
- };
833
-
834
- // Extract RFC from fields if available
835
- if (detection.fields) {
836
- const rfcField = detection.fields.find(f => f.name === 'rfc' && f.found);
837
- if (rfcField) {
838
- updateData.rfc = rfcField.value;
839
- }
840
- }
841
-
842
- if (detection.detectedType) {
843
- totalDetected++;
844
908
  }
909
+ }
845
910
 
846
- updatePromises.push(
847
- supabase
848
- .from('uploader')
849
- .update(updateData)
850
- .eq('id', record.id)
851
- );
852
-
911
+ // Execute all updates in parallel for this batch
912
+ try {
913
+ await Promise.all(updatePromises);
853
914
  } catch (error) {
854
- console.error(`❌ Error detecting ${record.filename}:`, error.message);
855
- totalErrors++;
856
-
857
- updatePromises.push(
858
- supabase
859
- .from('uploader')
860
- .update({
861
- status: 'detection-error',
862
- message: error.message
863
- })
864
- .eq('id', record.id)
915
+ console.error(
916
+ `❌ Error updating batch in chunk ${chunkNumber}:`,
917
+ error.message,
865
918
  );
866
919
  }
920
+
921
+ // Update progress for this chunk
922
+ progressBar.update(Math.min(i + processingBatchSize, pdfRecords.length), {
923
+ detected: chunkDetected,
924
+ errors: chunkErrors,
925
+ });
867
926
  }
868
927
 
869
- // Execute all updates in parallel for this batch
870
- try {
871
- await Promise.all(updatePromises);
872
- } catch (error) {
873
- console.error(`❌ Error updating batch:`, error.message);
928
+ progressBar.stop();
929
+
930
+ console.log(
931
+ `✅ Chunk ${chunkNumber} completed: ${chunkDetected} detected, ${chunkProcessed} processed, ${chunkErrors} errors`,
932
+ );
933
+
934
+ // Move to next chunk
935
+ offset += queryBatchSize;
936
+ chunkNumber++;
937
+
938
+ // If we got fewer records than queryBatchSize, we've reached the end
939
+ if (pdfRecords.length < queryBatchSize) {
940
+ console.log(
941
+ `📝 Reached end of records (chunk had ${pdfRecords.length} records).`,
942
+ );
943
+ break;
874
944
  }
875
945
 
876
- // Update progress
877
- progressBar.update(Math.min(i + batchSize, allPdfRecords.length), {
878
- detected: totalDetected,
879
- errors: totalErrors
880
- });
946
+ // Small delay between chunks to avoid overwhelming the database
947
+ await new Promise((resolve) => setTimeout(resolve, 500));
881
948
  }
882
949
 
883
- progressBar.stop();
884
-
885
- console.log(`📊 Phase 2 Summary: ${totalDetected} detected, ${totalProcessed} processed, ${totalErrors} errors`);
950
+ console.log(
951
+ `📊 Phase 2 Summary: ${totalDetected} detected, ${totalProcessed} processed, ${totalErrors} errors`,
952
+ );
886
953
  return {
887
954
  detectedCount: totalDetected,
888
955
  processedCount: totalProcessed,
889
- errorCount: totalErrors
956
+ errorCount: totalErrors,
890
957
  };
891
958
  };
892
959
 
@@ -926,7 +993,9 @@ const processFilesInBatches = async (
926
993
 
927
994
  if (options.statsOnly) {
928
995
  // OPTIMIZED Stats-only mode - Only read filesystem stats, no file detection
929
- console.log('📊 Phase 1: Processing files in optimized stats-only mode (no detection)...');
996
+ console.log(
997
+ '📊 Phase 1: Processing files in optimized stats-only mode (no detection)...',
998
+ );
930
999
 
931
1000
  for (let i = 0; i < files.length; i += batchSize) {
932
1001
  const batch = files.slice(i, i + batchSize);
@@ -934,7 +1003,7 @@ const processFilesInBatches = async (
934
1003
  // OPTIMIZED: Batch read file stats to reduce I/O overhead
935
1004
  const fileStatsResults = batchReadFileStats(batch);
936
1005
  const statsFiles = fileStatsResults
937
- .filter(result => result.stats !== null) // Only include files with valid stats
1006
+ .filter((result) => result.stats !== null) // Only include files with valid stats
938
1007
  .map((result) => {
939
1008
  const originalFileName = path.basename(result.path);
940
1009
 
@@ -946,16 +1015,23 @@ const processFilesInBatches = async (
946
1015
  });
947
1016
 
948
1017
  // Log any files that couldn't be read
949
- const failedFiles = fileStatsResults.filter(result => result.error !== null);
1018
+ const failedFiles = fileStatsResults.filter(
1019
+ (result) => result.error !== null,
1020
+ );
950
1021
  if (failedFiles.length > 0) {
951
- console.log(`⚠️ Could not read stats for ${failedFiles.length} files in batch`);
952
- failedFiles.forEach(failed => {
1022
+ console.log(
1023
+ `⚠️ Could not read stats for ${failedFiles.length} files in batch`,
1024
+ );
1025
+ failedFiles.forEach((failed) => {
953
1026
  console.error(` ❌ ${failed.path}: ${failed.error}`);
954
1027
  });
955
1028
  }
956
1029
 
957
1030
  try {
958
- const result = await insertStatsOnlyToUploaderTable(statsFiles, options);
1031
+ const result = await insertStatsOnlyToUploaderTable(
1032
+ statsFiles,
1033
+ options,
1034
+ );
959
1035
 
960
1036
  totalUploaded += result.totalInserted;
961
1037
  totalSkipped += result.totalSkipped;
@@ -966,7 +1042,6 @@ const processFilesInBatches = async (
966
1042
  failureCount: totalErrors,
967
1043
  skippedCount: totalSkipped,
968
1044
  });
969
-
970
1045
  } catch (error) {
971
1046
  console.error(`❌ Error processing stats batch:`, error.message);
972
1047
  totalErrors += batch.length;
@@ -1238,13 +1313,19 @@ const uploadFilesByRfc = async (options = {}) => {
1238
1313
  }
1239
1314
 
1240
1315
  if (!API_BASE_URL || !API_TOKEN) {
1241
- console.error('❌ Arela API configuration missing. Please set ARELA_API_URL and ARELA_API_TOKEN environment variables.');
1316
+ console.error(
1317
+ '❌ Arela API configuration missing. Please set ARELA_API_URL and ARELA_API_TOKEN environment variables.',
1318
+ );
1242
1319
  process.exit(1);
1243
1320
  }
1244
1321
 
1245
1322
  if (!uploadRfcs || uploadRfcs.length === 0) {
1246
- console.error('❌ No RFCs specified. Please set UPLOAD_RFCS environment variable with pipe-separated RFC values.');
1247
- console.error(' Example: UPLOAD_RFCS="RFC123456789|RFC987654321|RFC555444333"');
1323
+ console.error(
1324
+ ' No RFCs specified. Please set UPLOAD_RFCS environment variable with pipe-separated RFC values.',
1325
+ );
1326
+ console.error(
1327
+ ' Example: UPLOAD_RFCS="RFC123456789|RFC987654321|RFC555444333"',
1328
+ );
1248
1329
  process.exit(1);
1249
1330
  }
1250
1331
 
@@ -1266,13 +1347,17 @@ const uploadFilesByRfc = async (options = {}) => {
1266
1347
 
1267
1348
  if (!rfcRecords || rfcRecords.length === 0) {
1268
1349
  console.log('ℹ️ No files found for the specified RFCs with arela_path');
1269
- console.log(` Make sure files for RFCs [${uploadRfcs.join(', ')}] have been processed and have arela_path values`);
1350
+ console.log(
1351
+ ` Make sure files for RFCs [${uploadRfcs.join(', ')}] have been processed and have arela_path values`,
1352
+ );
1270
1353
  return { processedCount: 0, uploadedCount: 0, errorCount: 0 };
1271
1354
  }
1272
1355
 
1273
1356
  // Step 2: Get unique arela_paths from the RFC matches
1274
- const uniqueArelaPaths = [...new Set(rfcRecords.map(r => r.arela_path))];
1275
- console.log(`� Found ${uniqueArelaPaths.length} unique arela_path(s) for the specified RFCs`);
1357
+ const uniqueArelaPaths = [...new Set(rfcRecords.map((r) => r.arela_path))];
1358
+ console.log(
1359
+ `� Found ${uniqueArelaPaths.length} unique arela_path(s) for the specified RFCs`,
1360
+ );
1276
1361
 
1277
1362
  // Step 3: Get ALL files that have these arela_paths (including supporting documents)
1278
1363
  // Use pagination to ensure we get all files, regardless of count
@@ -1314,7 +1399,9 @@ const uploadFilesByRfc = async (options = {}) => {
1314
1399
  return { processedCount: 0, uploadedCount: 0, errorCount: 0 };
1315
1400
  }
1316
1401
 
1317
- console.log(`📁 Found ${allRelatedFiles.length} total files to upload (including supporting documents)`);
1402
+ console.log(
1403
+ `📁 Found ${allRelatedFiles.length} total files to upload (including supporting documents)`,
1404
+ );
1318
1405
 
1319
1406
  // Group by RFC and arela_path for better organization
1320
1407
  const filesByRfc = allRelatedFiles.reduce((acc, record) => {
@@ -1328,8 +1415,12 @@ const uploadFilesByRfc = async (options = {}) => {
1328
1415
 
1329
1416
  console.log('📊 Files by RFC (including supporting documents):');
1330
1417
  for (const [rfc, files] of Object.entries(filesByRfc)) {
1331
- const documentTypes = [...new Set(files.map(f => f.document_type || 'Unknown'))];
1332
- console.log(` ${rfc}: ${files.length} files (${documentTypes.join(', ')})`);
1418
+ const documentTypes = [
1419
+ ...new Set(files.map((f) => f.document_type || 'Unknown')),
1420
+ ];
1421
+ console.log(
1422
+ ` ${rfc}: ${files.length} files (${documentTypes.join(', ')})`,
1423
+ );
1333
1424
  }
1334
1425
 
1335
1426
  // Group by arela_path for upload organization
@@ -1354,7 +1445,8 @@ const uploadFilesByRfc = async (options = {}) => {
1354
1445
 
1355
1446
  // Create progress bar
1356
1447
  const progressBar = new cliProgress.SingleBar({
1357
- format: '🚀 Uploading files |{bar}| {percentage}% | {value}/{total} | Uploaded: {uploaded} | Errors: {errors} | Skipped: {skipped}',
1448
+ format:
1449
+ '🚀 Uploading files |{bar}| {percentage}% | {value}/{total} | Uploaded: {uploaded} | Errors: {errors} | Skipped: {skipped}',
1358
1450
  barCompleteChar: '█',
1359
1451
  barIncompleteChar: '░',
1360
1452
  hideCursor: true,
@@ -1377,7 +1469,9 @@ const uploadFilesByRfc = async (options = {}) => {
1377
1469
  const batchNumber = Math.floor(i / batchSize) + 1;
1378
1470
  const totalBatches = Math.ceil(allRelatedFiles.length / batchSize);
1379
1471
 
1380
- console.log(`\n📦 Processing batch ${batchNumber}/${totalBatches} (${batch.length} files)`);
1472
+ console.log(
1473
+ `\n📦 Processing batch ${batchNumber}/${totalBatches} (${batch.length} files)`,
1474
+ );
1381
1475
 
1382
1476
  // Prepare files for upload
1383
1477
  const filesToUpload = [];
@@ -1407,9 +1501,11 @@ const uploadFilesByRfc = async (options = {}) => {
1407
1501
  rfc: record.rfc,
1408
1502
  documentType: record.document_type,
1409
1503
  });
1410
-
1411
1504
  } catch (error) {
1412
- console.error(` ❌ Error reading file ${record.original_path}:`, error.message);
1505
+ console.error(
1506
+ ` ❌ Error reading file ${record.original_path}:`,
1507
+ error.message,
1508
+ );
1413
1509
  totalErrors++;
1414
1510
  }
1415
1511
 
@@ -1425,7 +1521,9 @@ const uploadFilesByRfc = async (options = {}) => {
1425
1521
  // Upload the batch if we have files
1426
1522
  if (filesToUpload.length > 0) {
1427
1523
  try {
1428
- console.log(` 🚀 Uploading ${filesToUpload.length} files to Arela API...`);
1524
+ console.log(
1525
+ ` 🚀 Uploading ${filesToUpload.length} files to Arela API...`,
1526
+ );
1429
1527
 
1430
1528
  const formData = new FormData();
1431
1529
 
@@ -1461,7 +1559,9 @@ const uploadFilesByRfc = async (options = {}) => {
1461
1559
 
1462
1560
  // Set folder structure for this group - concatenate custom prefix with arela_path
1463
1561
  const folderStructure = options.folderStructure
1464
- ? `${options.folderStructure}/${arelaPath}`.replace(/\/+/g, '/').replace(/\/$/, '')
1562
+ ? `${options.folderStructure}/${arelaPath}`
1563
+ .replace(/\/+/g, '/')
1564
+ .replace(/\/$/, '')
1465
1565
  : arelaPath;
1466
1566
  pathFormData.append('folderStructure', folderStructure);
1467
1567
  pathFormData.append('autoDetect', 'true');
@@ -1472,15 +1572,20 @@ const uploadFilesByRfc = async (options = {}) => {
1472
1572
  pathFormData.append('bucket', bucket);
1473
1573
  }
1474
1574
 
1475
- console.log(` 📁 Uploading ${pathFiles.length} files to: ${folderStructure}`);
1575
+ console.log(
1576
+ ` 📁 Uploading ${pathFiles.length} files to: ${folderStructure}`,
1577
+ );
1476
1578
 
1477
- const response = await fetch(`${API_BASE_URL}/api/storage/batch-upload-and-process`, {
1478
- method: 'POST',
1479
- headers: {
1480
- 'x-api-key': API_TOKEN,
1579
+ const response = await fetch(
1580
+ `${API_BASE_URL}/api/storage/batch-upload-and-process`,
1581
+ {
1582
+ method: 'POST',
1583
+ headers: {
1584
+ 'x-api-key': API_TOKEN,
1585
+ },
1586
+ body: pathFormData,
1481
1587
  },
1482
- body: pathFormData,
1483
- });
1588
+ );
1484
1589
 
1485
1590
  if (!response.ok) {
1486
1591
  const errorText = await response.text();
@@ -1490,22 +1595,31 @@ const uploadFilesByRfc = async (options = {}) => {
1490
1595
  const result = await response.json();
1491
1596
 
1492
1597
  // Check if upload was successful based on stats rather than success field
1493
- const isSuccessful = result.stats && result.stats.uploadedCount > 0 && result.stats.errorCount === 0;
1598
+ const isSuccessful =
1599
+ result.stats &&
1600
+ result.stats.uploadedCount > 0 &&
1601
+ result.stats.errorCount === 0;
1494
1602
 
1495
1603
  if (isSuccessful) {
1496
- console.log(` ✅ Group uploaded: ${result.stats.uploadedCount} files to ${folderStructure}`);
1604
+ console.log(
1605
+ ` ✅ Group uploaded: ${result.stats.uploadedCount} files to ${folderStructure}`,
1606
+ );
1497
1607
  totalUploaded += result.stats.uploadedCount;
1498
1608
 
1499
1609
  if (result.stats.detectedCount > 0) {
1500
- console.log(` 🔍 Files detected: ${result.stats.detectedCount}`);
1610
+ console.log(
1611
+ ` 🔍 Files detected: ${result.stats.detectedCount}`,
1612
+ );
1501
1613
  }
1502
1614
  if (result.stats.organizedCount > 0) {
1503
- console.log(` 📁 Files organized: ${result.stats.organizedCount}`);
1615
+ console.log(
1616
+ ` 📁 Files organized: ${result.stats.organizedCount}`,
1617
+ );
1504
1618
  }
1505
1619
  } else {
1506
1620
  console.error(` ❌ Upload failed for ${folderStructure}:`);
1507
1621
  if (result.errors && result.errors.length > 0) {
1508
- result.errors.forEach(error => {
1622
+ result.errors.forEach((error) => {
1509
1623
  console.error(` - ${error.fileName}: ${error.error}`);
1510
1624
  });
1511
1625
  }
@@ -1513,18 +1627,20 @@ const uploadFilesByRfc = async (options = {}) => {
1513
1627
  }
1514
1628
 
1515
1629
  // Small delay between path groups
1516
- await new Promise(resolve => setTimeout(resolve, 100));
1630
+ await new Promise((resolve) => setTimeout(resolve, 100));
1517
1631
  }
1518
-
1519
1632
  } catch (error) {
1520
- console.error(` ❌ Error uploading batch ${batchNumber}:`, error.message);
1633
+ console.error(
1634
+ ` ❌ Error uploading batch ${batchNumber}:`,
1635
+ error.message,
1636
+ );
1521
1637
  totalErrors += filesToUpload.length;
1522
1638
  }
1523
1639
  }
1524
1640
 
1525
1641
  // Small delay between batches
1526
1642
  if (i + batchSize < allRelatedFiles.length) {
1527
- await new Promise(resolve => setTimeout(resolve, BATCH_DELAY));
1643
+ await new Promise((resolve) => setTimeout(resolve, BATCH_DELAY));
1528
1644
  }
1529
1645
  }
1530
1646
 
@@ -1568,7 +1684,10 @@ const propagateArelaPath = async (options = {}) => {
1568
1684
  .not('arela_path', 'is', null);
1569
1685
 
1570
1686
  if (pedimentoError) {
1571
- console.error('❌ Error fetching pedimento records:', pedimentoError.message);
1687
+ console.error(
1688
+ '❌ Error fetching pedimento records:',
1689
+ pedimentoError.message,
1690
+ );
1572
1691
  return { processedCount: 0, updatedCount: 0, errorCount: 1 };
1573
1692
  }
1574
1693
 
@@ -1577,7 +1696,9 @@ const propagateArelaPath = async (options = {}) => {
1577
1696
  return { processedCount: 0, updatedCount: 0, errorCount: 0 };
1578
1697
  }
1579
1698
 
1580
- console.log(`📋 Found ${pedimentoRecords.length} pedimento records with arela_path`);
1699
+ console.log(
1700
+ `📋 Found ${pedimentoRecords.length} pedimento records with arela_path`,
1701
+ );
1581
1702
 
1582
1703
  let totalProcessed = 0;
1583
1704
  let totalUpdated = 0;
@@ -1585,7 +1706,8 @@ const propagateArelaPath = async (options = {}) => {
1585
1706
 
1586
1707
  // Create progress bar
1587
1708
  const progressBar = new cliProgress.SingleBar({
1588
- format: '🔄 Propagating paths |{bar}| {percentage}% | {value}/{total} | Updated: {updated} | Errors: {errors}',
1709
+ format:
1710
+ '🔄 Propagating paths |{bar}| {percentage}% | {value}/{total} | Updated: {updated} | Errors: {errors}',
1589
1711
  barCompleteChar: '█',
1590
1712
  barIncompleteChar: '░',
1591
1713
  hideCursor: true,
@@ -1611,9 +1733,11 @@ const propagateArelaPath = async (options = {}) => {
1611
1733
 
1612
1734
  // Extract folder part from existing arela_path by removing the filename
1613
1735
  const existingPath = pedimento.arela_path;
1614
- const folderArelaPath = existingPath.includes('/') ?
1615
- existingPath.substring(0, existingPath.lastIndexOf('/')) + '/' :
1616
- existingPath.endsWith('/') ? existingPath : existingPath + '/';
1736
+ const folderArelaPath = existingPath.includes('/')
1737
+ ? existingPath.substring(0, existingPath.lastIndexOf('/')) + '/'
1738
+ : existingPath.endsWith('/')
1739
+ ? existingPath
1740
+ : existingPath + '/';
1617
1741
 
1618
1742
  console.log(` 🎯 Original arela path: ${existingPath}`);
1619
1743
  console.log(` 📁 Folder arela path: ${folderArelaPath}`);
@@ -1627,7 +1751,10 @@ const propagateArelaPath = async (options = {}) => {
1627
1751
  .neq('id', pedimento.id); // Exclude the pedimento itself
1628
1752
 
1629
1753
  if (relatedError) {
1630
- console.error(`❌ Error finding related files for ${pedimento.filename}:`, relatedError.message);
1754
+ console.error(
1755
+ `❌ Error finding related files for ${pedimento.filename}:`,
1756
+ relatedError.message,
1757
+ );
1631
1758
  totalErrors++;
1632
1759
  continue;
1633
1760
  }
@@ -1637,11 +1764,13 @@ const propagateArelaPath = async (options = {}) => {
1637
1764
  continue;
1638
1765
  }
1639
1766
 
1640
- console.log(` 📄 Found ${relatedFiles.length} related files to update:`);
1767
+ console.log(
1768
+ ` 📄 Found ${relatedFiles.length} related files to update:`,
1769
+ );
1641
1770
 
1642
1771
  // Show first 10 files, then indicate if there are more
1643
1772
  const filesToShow = relatedFiles.slice(0, 10);
1644
- filesToShow.forEach(file => {
1773
+ filesToShow.forEach((file) => {
1645
1774
  console.log(` - ${file.filename}`);
1646
1775
  });
1647
1776
 
@@ -1651,18 +1780,22 @@ const propagateArelaPath = async (options = {}) => {
1651
1780
 
1652
1781
  // Process files in batches to avoid URI length limitations
1653
1782
  const BATCH_SIZE = 50; // Process 50 files at a time
1654
- const fileIds = relatedFiles.map(f => f.id);
1783
+ const fileIds = relatedFiles.map((f) => f.id);
1655
1784
  let batchErrors = 0;
1656
1785
  let batchUpdated = 0;
1657
1786
 
1658
- console.log(` 🔄 Processing ${relatedFiles.length} files in batches of ${BATCH_SIZE}...`);
1787
+ console.log(
1788
+ ` 🔄 Processing ${relatedFiles.length} files in batches of ${BATCH_SIZE}...`,
1789
+ );
1659
1790
 
1660
1791
  for (let i = 0; i < fileIds.length; i += BATCH_SIZE) {
1661
1792
  const batchIds = fileIds.slice(i, i + BATCH_SIZE);
1662
1793
  const batchNumber = Math.floor(i / BATCH_SIZE) + 1;
1663
1794
  const totalBatches = Math.ceil(fileIds.length / BATCH_SIZE);
1664
1795
 
1665
- console.log(` 📦 Batch ${batchNumber}/${totalBatches}: Updating ${batchIds.length} files...`);
1796
+ console.log(
1797
+ ` 📦 Batch ${batchNumber}/${totalBatches}: Updating ${batchIds.length} files...`,
1798
+ );
1666
1799
 
1667
1800
  try {
1668
1801
  const { error: updateError } = await supabase
@@ -1671,33 +1804,45 @@ const propagateArelaPath = async (options = {}) => {
1671
1804
  .in('id', batchIds);
1672
1805
 
1673
1806
  if (updateError) {
1674
- console.error(` ❌ Error in batch ${batchNumber}:`, updateError.message);
1807
+ console.error(
1808
+ ` ❌ Error in batch ${batchNumber}:`,
1809
+ updateError.message,
1810
+ );
1675
1811
  batchErrors++;
1676
1812
  } else {
1677
- console.log(` ✅ Batch ${batchNumber} completed: ${batchIds.length} files updated`);
1813
+ console.log(
1814
+ ` ✅ Batch ${batchNumber} completed: ${batchIds.length} files updated`,
1815
+ );
1678
1816
  batchUpdated += batchIds.length;
1679
1817
  }
1680
1818
  } catch (error) {
1681
- console.error(` ❌ Exception in batch ${batchNumber}:`, error.message);
1819
+ console.error(
1820
+ ` ❌ Exception in batch ${batchNumber}:`,
1821
+ error.message,
1822
+ );
1682
1823
  batchErrors++;
1683
1824
  }
1684
1825
 
1685
1826
  // Small delay between batches to avoid overwhelming the database
1686
1827
  if (i + BATCH_SIZE < fileIds.length) {
1687
- await new Promise(resolve => setTimeout(resolve, 100));
1828
+ await new Promise((resolve) => setTimeout(resolve, 100));
1688
1829
  }
1689
1830
  }
1690
1831
 
1691
1832
  if (batchErrors > 0) {
1692
- console.error(`❌ ${batchErrors} batch(es) failed for ${pedimento.filename}`);
1833
+ console.error(
1834
+ `❌ ${batchErrors} batch(es) failed for ${pedimento.filename}`,
1835
+ );
1693
1836
  totalErrors++;
1694
1837
  } else {
1695
1838
  console.log(` 🎯 Successfully updated ${batchUpdated} related files`);
1696
1839
  totalUpdated += batchUpdated;
1697
1840
  }
1698
-
1699
1841
  } catch (error) {
1700
- console.error(`❌ Error processing ${pedimento.filename}:`, error.message);
1842
+ console.error(
1843
+ `❌ Error processing ${pedimento.filename}:`,
1844
+ error.message,
1845
+ );
1701
1846
  totalErrors++;
1702
1847
  }
1703
1848
 
@@ -1761,12 +1906,27 @@ program
1761
1906
  'Automatically detect year/pedimento from file paths',
1762
1907
  )
1763
1908
  .option('--client-path <path>', 'Client path for metadata tracking')
1764
- .option('--stats-only', 'Phase 1: Only read filesystem stats and insert to database (no file reading or detection)')
1909
+ .option(
1910
+ '--stats-only',
1911
+ 'Phase 1: Only read filesystem stats and insert to database (no file reading or detection)',
1912
+ )
1765
1913
  .option('--no-detect', 'Disable document type detection in stats-only mode')
1766
- .option('--detect-pdfs', 'Phase 2: Process PDF files in database for pedimento-simplificado detection')
1767
- .option('--propagate-arela-path', 'Phase 3: Propagate arela_path from pedimento_simplificado records to related files with same base path')
1768
- .option('--upload-by-rfc', 'Phase 4: Upload files to Arela API based on RFC values from UPLOAD_RFCS environment variable')
1769
- .option('--run-all-phases', 'Run all 4 phases in sequence: stats → detect → propagate → upload')
1914
+ .option(
1915
+ '--detect-pdfs',
1916
+ 'Phase 2: Process PDF files in database for pedimento-simplificado detection',
1917
+ )
1918
+ .option(
1919
+ '--propagate-arela-path',
1920
+ 'Phase 3: Propagate arela_path from pedimento_simplificado records to related files with same base path',
1921
+ )
1922
+ .option(
1923
+ '--upload-by-rfc',
1924
+ 'Phase 4: Upload files to Arela API based on RFC values from UPLOAD_RFCS environment variable',
1925
+ )
1926
+ .option(
1927
+ '--run-all-phases',
1928
+ 'Run all 4 phases in sequence: stats → detect → propagate → upload',
1929
+ )
1770
1930
  .action(async (options) => {
1771
1931
  if (options.version) {
1772
1932
  console.log(packageVersion);
@@ -1782,7 +1942,9 @@ program
1782
1942
  batchSize: parseInt(options.batchSize) || 10,
1783
1943
  });
1784
1944
 
1785
- console.log(`✅ Phase 2 Complete: ${result.detectedCount} detected, ${result.errorCount} errors`);
1945
+ console.log(
1946
+ `✅ Phase 2 Complete: ${result.detectedCount} detected, ${result.errorCount} errors`,
1947
+ );
1786
1948
  return;
1787
1949
  }
1788
1950
 
@@ -1822,8 +1984,12 @@ program
1822
1984
  // Also initialize Supabase for database queries
1823
1985
  if (!supabase) {
1824
1986
  if (!supabaseUrl || !supabaseKey) {
1825
- console.error('❌ RFC upload requires Supabase credentials for database queries.');
1826
- console.error(' Please set SUPABASE_URL and SUPABASE_KEY environment variables.');
1987
+ console.error(
1988
+ ' RFC upload requires Supabase credentials for database queries.',
1989
+ );
1990
+ console.error(
1991
+ ' Please set SUPABASE_URL and SUPABASE_KEY environment variables.',
1992
+ );
1827
1993
  process.exit(1);
1828
1994
  }
1829
1995
 
@@ -1857,7 +2023,9 @@ program
1857
2023
  const concurrency = parseInt(options.concurrency) || 10;
1858
2024
 
1859
2025
  if (options.statsOnly) {
1860
- console.log('📊 Mode: Stats Only - Reading file stats and inserting to uploader table');
2026
+ console.log(
2027
+ '📊 Mode: Stats Only - Reading file stats and inserting to uploader table',
2028
+ );
1861
2029
  console.log('🚫 Files will NOT be uploaded');
1862
2030
  if (options.detect !== false) {
1863
2031
  console.log('🔍 Document type detection ENABLED for supported files');
@@ -1965,14 +2133,18 @@ program
1965
2133
  const detectionResult = await detectPedimentosInDatabase({
1966
2134
  batchSize: parseInt(options.batchSize) || 10,
1967
2135
  });
1968
- console.log(`✅ Phase 2 Complete: ${detectionResult.detectedCount} detected, ${detectionResult.errorCount} errors`);
2136
+ console.log(
2137
+ `✅ Phase 2 Complete: ${detectionResult.detectedCount} detected, ${detectionResult.errorCount} errors`,
2138
+ );
1969
2139
 
1970
2140
  // Phase 3: Propagate arela_path
1971
2141
  console.log('\n📁 === PHASE 3: Propagate Arela Paths ===');
1972
2142
  const propagateResult = await propagateArelaPath({
1973
2143
  showProgress: options.showStats || true,
1974
2144
  });
1975
- console.log(`✅ Phase 3 Complete: ${propagateResult.updatedCount || 0} paths propagated`);
2145
+ console.log(
2146
+ `✅ Phase 3 Complete: ${propagateResult.updatedCount || 0} paths propagated`,
2147
+ );
1976
2148
 
1977
2149
  // Phase 4: Upload by RFC
1978
2150
  if (uploadRfcs && uploadRfcs.length > 0) {
@@ -1989,18 +2161,22 @@ program
1989
2161
  console.log(`✅ Phase 4 Complete: Upload finished`);
1990
2162
  } else {
1991
2163
  console.log('\n⚠️ === PHASE 4: Upload by RFC ===');
1992
- console.log('⚠️ UPLOAD_RFCS environment variable not configured, skipping Phase 4');
2164
+ console.log(
2165
+ '⚠️ UPLOAD_RFCS environment variable not configured, skipping Phase 4',
2166
+ );
1993
2167
  }
1994
2168
 
1995
2169
  console.log('\n🎉 All 4 phases completed successfully!');
1996
-
1997
2170
  } catch (error) {
1998
2171
  console.error(`❌ Error in multi-phase execution:`, error.message);
1999
2172
  process.exit(1);
2000
2173
  }
2001
2174
  }
2002
2175
 
2003
- if (options.showStats && (sanitizationCache.size > 0 || pathDetectionCache.size > 0)) {
2176
+ if (
2177
+ options.showStats &&
2178
+ (sanitizationCache.size > 0 || pathDetectionCache.size > 0)
2179
+ ) {
2004
2180
  console.log(`📊 Performance Statistics:`);
2005
2181
  if (sanitizationCache.size > 0) {
2006
2182
  console.log(