@arela/uploader 0.2.1 → 0.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/document-type-shared.js +22 -8
- package/src/document-types/pedimento-simplificado.js +11 -29
- package/src/file-detection.js +43 -28
- package/src/index.js +409 -219
- package/OPTIMIZATION_SUMMARY.md +0 -154
- package/PERFORMANCE_OPTIMIZATIONS.md +0 -270
package/src/index.js
CHANGED
|
@@ -9,6 +9,7 @@ import { globby } from 'globby';
|
|
|
9
9
|
import mime from 'mime-types';
|
|
10
10
|
import fetch from 'node-fetch';
|
|
11
11
|
import path from 'path';
|
|
12
|
+
|
|
12
13
|
import { FileDetectionService } from './file-detection.js';
|
|
13
14
|
|
|
14
15
|
config();
|
|
@@ -209,7 +210,10 @@ const writeLog = (message) => {
|
|
|
209
210
|
|
|
210
211
|
// Flush if buffer is full or enough time has passed
|
|
211
212
|
const now = Date.now();
|
|
212
|
-
if (
|
|
213
|
+
if (
|
|
214
|
+
logBuffer.length >= LOG_BUFFER_SIZE ||
|
|
215
|
+
now - lastFlushTime >= LOG_FLUSH_INTERVAL
|
|
216
|
+
) {
|
|
213
217
|
flushLogBuffer();
|
|
214
218
|
}
|
|
215
219
|
} catch (error) {
|
|
@@ -233,7 +237,8 @@ process.on('SIGTERM', () => {
|
|
|
233
237
|
*/
|
|
234
238
|
const VERBOSE_LOGGING = process.env.VERBOSE_LOGGING === 'true';
|
|
235
239
|
const BATCH_DELAY = parseInt(process.env.BATCH_DELAY) || 100; // Configurable delay between batches
|
|
236
|
-
const PROGRESS_UPDATE_INTERVAL =
|
|
240
|
+
const PROGRESS_UPDATE_INTERVAL =
|
|
241
|
+
parseInt(process.env.PROGRESS_UPDATE_INTERVAL) || 10; // Update progress every N items
|
|
237
242
|
|
|
238
243
|
const logVerbose = (message) => {
|
|
239
244
|
if (VERBOSE_LOGGING) {
|
|
@@ -531,7 +536,9 @@ const uploadToSupabase = async (file, uploadPath) => {
|
|
|
531
536
|
*/
|
|
532
537
|
const insertStatsToUploaderTable = async (files, options) => {
|
|
533
538
|
if (!supabase) {
|
|
534
|
-
throw new Error(
|
|
539
|
+
throw new Error(
|
|
540
|
+
'Supabase client not initialized. Stats mode requires Supabase connection.',
|
|
541
|
+
);
|
|
535
542
|
}
|
|
536
543
|
|
|
537
544
|
const detectionService = new FileDetectionService();
|
|
@@ -550,7 +557,9 @@ const insertStatsToUploaderTable = async (files, options) => {
|
|
|
550
557
|
.limit(1);
|
|
551
558
|
|
|
552
559
|
if (checkError) {
|
|
553
|
-
console.error(
|
|
560
|
+
console.error(
|
|
561
|
+
`❌ Error checking for existing record: ${checkError.message}`,
|
|
562
|
+
);
|
|
554
563
|
continue;
|
|
555
564
|
}
|
|
556
565
|
|
|
@@ -569,7 +578,7 @@ const insertStatsToUploaderTable = async (files, options) => {
|
|
|
569
578
|
arela_path: null,
|
|
570
579
|
status: 'stats',
|
|
571
580
|
rfc: null,
|
|
572
|
-
message: null
|
|
581
|
+
message: null,
|
|
573
582
|
};
|
|
574
583
|
|
|
575
584
|
// Try to detect document type for supported files
|
|
@@ -588,7 +597,9 @@ const insertStatsToUploaderTable = async (files, options) => {
|
|
|
588
597
|
}
|
|
589
598
|
|
|
590
599
|
// Extract RFC from fields if available
|
|
591
|
-
const rfcField = detection.fields.find(
|
|
600
|
+
const rfcField = detection.fields.find(
|
|
601
|
+
(f) => f.name === 'rfc' && f.found,
|
|
602
|
+
);
|
|
592
603
|
if (rfcField) {
|
|
593
604
|
record.rfc = rfcField.value;
|
|
594
605
|
}
|
|
@@ -616,7 +627,9 @@ const insertStatsToUploaderTable = async (files, options) => {
|
|
|
616
627
|
return [];
|
|
617
628
|
}
|
|
618
629
|
|
|
619
|
-
console.log(
|
|
630
|
+
console.log(
|
|
631
|
+
`💾 Inserting ${records.length} new records into uploader table...`,
|
|
632
|
+
);
|
|
620
633
|
|
|
621
634
|
const { data, error } = await supabase
|
|
622
635
|
.from('uploader')
|
|
@@ -637,7 +650,9 @@ const insertStatsToUploaderTable = async (files, options) => {
|
|
|
637
650
|
*/
|
|
638
651
|
const insertStatsOnlyToUploaderTable = async (files, options) => {
|
|
639
652
|
if (!supabase) {
|
|
640
|
-
throw new Error(
|
|
653
|
+
throw new Error(
|
|
654
|
+
'Supabase client not initialized. Stats mode requires Supabase connection.',
|
|
655
|
+
);
|
|
641
656
|
}
|
|
642
657
|
|
|
643
658
|
const batchSize = 1000; // Large batch size for performance
|
|
@@ -650,7 +665,10 @@ const insertStatsOnlyToUploaderTable = async (files, options) => {
|
|
|
650
665
|
// Use pre-computed stats if available, otherwise call fs.statSync
|
|
651
666
|
const stats = file.stats || fs.statSync(file.path);
|
|
652
667
|
const originalPath = options.clientPath || file.path;
|
|
653
|
-
const fileExtension = path
|
|
668
|
+
const fileExtension = path
|
|
669
|
+
.extname(file.path)
|
|
670
|
+
.toLowerCase()
|
|
671
|
+
.replace('.', '');
|
|
654
672
|
|
|
655
673
|
const record = {
|
|
656
674
|
document_type: null,
|
|
@@ -664,7 +682,7 @@ const insertStatsOnlyToUploaderTable = async (files, options) => {
|
|
|
664
682
|
message: null,
|
|
665
683
|
file_extension: fileExtension,
|
|
666
684
|
created_at: new Date().toISOString(),
|
|
667
|
-
modified_at: stats.mtime.toISOString()
|
|
685
|
+
modified_at: stats.mtime.toISOString(),
|
|
668
686
|
};
|
|
669
687
|
|
|
670
688
|
allRecords.push(record);
|
|
@@ -678,7 +696,9 @@ const insertStatsOnlyToUploaderTable = async (files, options) => {
|
|
|
678
696
|
return { totalInserted: 0, totalSkipped: 0, totalProcessed: 0 };
|
|
679
697
|
}
|
|
680
698
|
|
|
681
|
-
console.log(
|
|
699
|
+
console.log(
|
|
700
|
+
`💾 Bulk inserting ${allRecords.length} file stats in batches of ${batchSize}...`,
|
|
701
|
+
);
|
|
682
702
|
|
|
683
703
|
let totalInserted = 0;
|
|
684
704
|
let totalSkipped = 0;
|
|
@@ -689,16 +709,17 @@ const insertStatsOnlyToUploaderTable = async (files, options) => {
|
|
|
689
709
|
|
|
690
710
|
try {
|
|
691
711
|
// OPTIMIZED: Use upsert without select to avoid unnecessary data transfer
|
|
692
|
-
const { error, count } = await supabase
|
|
693
|
-
|
|
694
|
-
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
count: 'exact'
|
|
698
|
-
});
|
|
712
|
+
const { error, count } = await supabase.from('uploader').upsert(batch, {
|
|
713
|
+
onConflict: 'original_path',
|
|
714
|
+
ignoreDuplicates: false,
|
|
715
|
+
count: 'exact',
|
|
716
|
+
});
|
|
699
717
|
|
|
700
718
|
if (error) {
|
|
701
|
-
console.error(
|
|
719
|
+
console.error(
|
|
720
|
+
`❌ Error inserting batch ${Math.floor(i / batchSize) + 1}:`,
|
|
721
|
+
error.message,
|
|
722
|
+
);
|
|
702
723
|
continue;
|
|
703
724
|
}
|
|
704
725
|
|
|
@@ -715,45 +736,65 @@ const insertStatsOnlyToUploaderTable = async (files, options) => {
|
|
|
715
736
|
// This is reasonable since stats-only is typically run on new file sets
|
|
716
737
|
totalInserted += affected;
|
|
717
738
|
|
|
718
|
-
console.log(
|
|
739
|
+
console.log(
|
|
740
|
+
`✅ Batch ${Math.floor(i / batchSize) + 1}: ${affected} rows processed`,
|
|
741
|
+
);
|
|
719
742
|
} catch (error) {
|
|
720
|
-
console.error(
|
|
743
|
+
console.error(
|
|
744
|
+
`❌ Unexpected error in batch ${Math.floor(i / batchSize) + 1}:`,
|
|
745
|
+
error.message,
|
|
746
|
+
);
|
|
721
747
|
}
|
|
722
748
|
}
|
|
723
749
|
|
|
724
750
|
// Calculate skipped as difference between total records and inserted
|
|
725
751
|
totalSkipped = allRecords.length - totalInserted;
|
|
726
752
|
|
|
727
|
-
console.log(
|
|
753
|
+
console.log(
|
|
754
|
+
`📊 Phase 1 Summary: ${totalInserted} records processed, estimated ${totalSkipped} were updates`,
|
|
755
|
+
);
|
|
728
756
|
|
|
729
757
|
return {
|
|
730
758
|
totalInserted,
|
|
731
759
|
totalSkipped,
|
|
732
|
-
totalProcessed: allRecords.length
|
|
760
|
+
totalProcessed: allRecords.length,
|
|
733
761
|
};
|
|
734
762
|
};
|
|
735
763
|
|
|
736
764
|
/**
|
|
737
765
|
* PHASE 2: Process PDF files for pedimento-simplificado detection
|
|
738
766
|
* Only processes files with status 'fs-stats' and file_extension 'pdf'
|
|
767
|
+
* Processes records in chunks of 1000 to avoid loading all records into memory
|
|
739
768
|
*/
|
|
740
769
|
const detectPedimentosInDatabase = async (options = {}) => {
|
|
741
770
|
if (!supabase) {
|
|
742
771
|
throw new Error('Supabase client not initialized.');
|
|
743
772
|
}
|
|
744
773
|
|
|
745
|
-
console.log(
|
|
774
|
+
console.log(
|
|
775
|
+
'🔍 Phase 2: Starting PDF detection for pedimento-simplificado documents...',
|
|
776
|
+
);
|
|
777
|
+
|
|
778
|
+
const detectionService = new FileDetectionService();
|
|
779
|
+
const processingBatchSize = parseInt(options.batchSize) || 10; // Smaller batches for file I/O
|
|
780
|
+
const queryBatchSize = 1000; // Process 1000 records at a time
|
|
746
781
|
|
|
747
|
-
|
|
748
|
-
let
|
|
749
|
-
let
|
|
782
|
+
let totalDetected = 0;
|
|
783
|
+
let totalProcessed = 0;
|
|
784
|
+
let totalErrors = 0;
|
|
750
785
|
let offset = 0;
|
|
751
|
-
|
|
786
|
+
let chunkNumber = 1;
|
|
787
|
+
|
|
788
|
+
console.log('� Processing PDF files in chunks of 1000 records...');
|
|
752
789
|
|
|
753
|
-
|
|
790
|
+
// Process records in chunks of 1000
|
|
791
|
+
while (true) {
|
|
792
|
+
console.log(
|
|
793
|
+
`\n📥 Fetching chunk ${chunkNumber} (records ${offset + 1} to ${offset + queryBatchSize})...`,
|
|
794
|
+
);
|
|
754
795
|
|
|
755
|
-
|
|
756
|
-
const { data:
|
|
796
|
+
// Fetch next chunk of PDF records
|
|
797
|
+
const { data: pdfRecords, error: queryError } = await supabase
|
|
757
798
|
.from('uploader')
|
|
758
799
|
.select('id, original_path, filename, file_extension, status')
|
|
759
800
|
.eq('status', 'fs-stats')
|
|
@@ -762,131 +803,157 @@ const detectPedimentosInDatabase = async (options = {}) => {
|
|
|
762
803
|
.range(offset, offset + queryBatchSize - 1);
|
|
763
804
|
|
|
764
805
|
if (queryError) {
|
|
765
|
-
throw new Error(
|
|
806
|
+
throw new Error(
|
|
807
|
+
`Failed to fetch PDF records chunk ${chunkNumber}: ${queryError.message}`,
|
|
808
|
+
);
|
|
766
809
|
}
|
|
767
810
|
|
|
768
|
-
|
|
769
|
-
|
|
770
|
-
|
|
771
|
-
|
|
772
|
-
offset += queryBatchSize;
|
|
773
|
-
console.log(`📄 Fetched ${batch.length} PDF records (total: ${allPdfRecords.length})`);
|
|
811
|
+
// If no records found, we're done
|
|
812
|
+
if (!pdfRecords || pdfRecords.length === 0) {
|
|
813
|
+
console.log(`📝 No more PDF files found. Processing completed.`);
|
|
814
|
+
break;
|
|
774
815
|
}
|
|
775
|
-
}
|
|
776
816
|
|
|
777
|
-
|
|
778
|
-
|
|
779
|
-
|
|
780
|
-
}
|
|
817
|
+
console.log(
|
|
818
|
+
`� Processing chunk ${chunkNumber}: ${pdfRecords.length} PDF records`,
|
|
819
|
+
);
|
|
781
820
|
|
|
782
|
-
|
|
821
|
+
// Create progress bar for this chunk
|
|
822
|
+
const progressBar = new cliProgress.SingleBar({
|
|
823
|
+
format: `🔍 Chunk ${chunkNumber} |{bar}| {percentage}% | {value}/{total} | Detected: {detected} | Errors: {errors}`,
|
|
824
|
+
barCompleteChar: '█',
|
|
825
|
+
barIncompleteChar: '░',
|
|
826
|
+
hideCursor: true,
|
|
827
|
+
});
|
|
783
828
|
|
|
784
|
-
|
|
785
|
-
const batchSize = parseInt(options.batchSize) || 10; // Smaller batches for file I/O
|
|
786
|
-
let totalDetected = 0;
|
|
787
|
-
let totalProcessed = 0;
|
|
788
|
-
let totalErrors = 0;
|
|
829
|
+
progressBar.start(pdfRecords.length, 0, { detected: 0, errors: 0 });
|
|
789
830
|
|
|
790
|
-
|
|
791
|
-
|
|
792
|
-
|
|
793
|
-
|
|
794
|
-
|
|
795
|
-
|
|
796
|
-
|
|
831
|
+
let chunkDetected = 0;
|
|
832
|
+
let chunkProcessed = 0;
|
|
833
|
+
let chunkErrors = 0;
|
|
834
|
+
|
|
835
|
+
// Process files in smaller batches within this chunk
|
|
836
|
+
for (let i = 0; i < pdfRecords.length; i += processingBatchSize) {
|
|
837
|
+
const batch = pdfRecords.slice(i, i + processingBatchSize);
|
|
838
|
+
const updatePromises = [];
|
|
797
839
|
|
|
798
|
-
|
|
840
|
+
for (const record of batch) {
|
|
841
|
+
try {
|
|
842
|
+
// Check if file still exists
|
|
843
|
+
if (!fs.existsSync(record.original_path)) {
|
|
844
|
+
updatePromises.push(
|
|
845
|
+
supabase
|
|
846
|
+
.from('uploader')
|
|
847
|
+
.update({
|
|
848
|
+
status: 'file-not-found',
|
|
849
|
+
message: 'File no longer exists at original path',
|
|
850
|
+
})
|
|
851
|
+
.eq('id', record.id),
|
|
852
|
+
);
|
|
853
|
+
chunkErrors++;
|
|
854
|
+
totalErrors++;
|
|
855
|
+
continue;
|
|
856
|
+
}
|
|
799
857
|
|
|
800
|
-
|
|
801
|
-
|
|
802
|
-
|
|
803
|
-
|
|
858
|
+
// Perform detection
|
|
859
|
+
const detection = await detectionService.detectFile(
|
|
860
|
+
record.original_path,
|
|
861
|
+
);
|
|
862
|
+
chunkProcessed++;
|
|
863
|
+
totalProcessed++;
|
|
864
|
+
|
|
865
|
+
const updateData = {
|
|
866
|
+
status: detection.detectedType ? 'detected' : 'not-detected',
|
|
867
|
+
document_type: detection.detectedType,
|
|
868
|
+
num_pedimento: detection.detectedPedimento,
|
|
869
|
+
arela_path: detection.arelaPath,
|
|
870
|
+
message: detection.error || null,
|
|
871
|
+
};
|
|
872
|
+
|
|
873
|
+
// Extract RFC from fields if available
|
|
874
|
+
if (detection.fields) {
|
|
875
|
+
const rfcField = detection.fields.find(
|
|
876
|
+
(f) => f.name === 'rfc' && f.found,
|
|
877
|
+
);
|
|
878
|
+
if (rfcField) {
|
|
879
|
+
updateData.rfc = rfcField.value;
|
|
880
|
+
}
|
|
881
|
+
}
|
|
882
|
+
|
|
883
|
+
if (detection.detectedType) {
|
|
884
|
+
chunkDetected++;
|
|
885
|
+
totalDetected++;
|
|
886
|
+
}
|
|
887
|
+
|
|
888
|
+
updatePromises.push(
|
|
889
|
+
supabase.from('uploader').update(updateData).eq('id', record.id),
|
|
890
|
+
);
|
|
891
|
+
} catch (error) {
|
|
892
|
+
console.error(
|
|
893
|
+
`❌ Error detecting ${record.filename}:`,
|
|
894
|
+
error.message,
|
|
895
|
+
);
|
|
896
|
+
chunkErrors++;
|
|
897
|
+
totalErrors++;
|
|
804
898
|
|
|
805
|
-
for (const record of batch) {
|
|
806
|
-
try {
|
|
807
|
-
// Check if file still exists
|
|
808
|
-
if (!fs.existsSync(record.original_path)) {
|
|
809
899
|
updatePromises.push(
|
|
810
900
|
supabase
|
|
811
901
|
.from('uploader')
|
|
812
902
|
.update({
|
|
813
|
-
status: '
|
|
814
|
-
message:
|
|
903
|
+
status: 'detection-error',
|
|
904
|
+
message: error.message,
|
|
815
905
|
})
|
|
816
|
-
.eq('id', record.id)
|
|
906
|
+
.eq('id', record.id),
|
|
817
907
|
);
|
|
818
|
-
totalErrors++;
|
|
819
|
-
continue;
|
|
820
908
|
}
|
|
909
|
+
}
|
|
821
910
|
|
|
822
|
-
|
|
823
|
-
|
|
824
|
-
|
|
825
|
-
|
|
826
|
-
const updateData = {
|
|
827
|
-
status: detection.detectedType ? 'detected' : 'not-detected',
|
|
828
|
-
document_type: detection.detectedType,
|
|
829
|
-
num_pedimento: detection.detectedPedimento,
|
|
830
|
-
arela_path: detection.arelaPath,
|
|
831
|
-
message: detection.error || null
|
|
832
|
-
};
|
|
833
|
-
|
|
834
|
-
// Extract RFC from fields if available
|
|
835
|
-
if (detection.fields) {
|
|
836
|
-
const rfcField = detection.fields.find(f => f.name === 'rfc' && f.found);
|
|
837
|
-
if (rfcField) {
|
|
838
|
-
updateData.rfc = rfcField.value;
|
|
839
|
-
}
|
|
840
|
-
}
|
|
841
|
-
|
|
842
|
-
if (detection.detectedType) {
|
|
843
|
-
totalDetected++;
|
|
844
|
-
}
|
|
845
|
-
|
|
846
|
-
updatePromises.push(
|
|
847
|
-
supabase
|
|
848
|
-
.from('uploader')
|
|
849
|
-
.update(updateData)
|
|
850
|
-
.eq('id', record.id)
|
|
851
|
-
);
|
|
852
|
-
|
|
911
|
+
// Execute all updates in parallel for this batch
|
|
912
|
+
try {
|
|
913
|
+
await Promise.all(updatePromises);
|
|
853
914
|
} catch (error) {
|
|
854
|
-
console.error(
|
|
855
|
-
|
|
856
|
-
|
|
857
|
-
updatePromises.push(
|
|
858
|
-
supabase
|
|
859
|
-
.from('uploader')
|
|
860
|
-
.update({
|
|
861
|
-
status: 'detection-error',
|
|
862
|
-
message: error.message
|
|
863
|
-
})
|
|
864
|
-
.eq('id', record.id)
|
|
915
|
+
console.error(
|
|
916
|
+
`❌ Error updating batch in chunk ${chunkNumber}:`,
|
|
917
|
+
error.message,
|
|
865
918
|
);
|
|
866
919
|
}
|
|
920
|
+
|
|
921
|
+
// Update progress for this chunk
|
|
922
|
+
progressBar.update(Math.min(i + processingBatchSize, pdfRecords.length), {
|
|
923
|
+
detected: chunkDetected,
|
|
924
|
+
errors: chunkErrors,
|
|
925
|
+
});
|
|
867
926
|
}
|
|
868
927
|
|
|
869
|
-
|
|
870
|
-
|
|
871
|
-
|
|
872
|
-
|
|
873
|
-
|
|
928
|
+
progressBar.stop();
|
|
929
|
+
|
|
930
|
+
console.log(
|
|
931
|
+
`✅ Chunk ${chunkNumber} completed: ${chunkDetected} detected, ${chunkProcessed} processed, ${chunkErrors} errors`,
|
|
932
|
+
);
|
|
933
|
+
|
|
934
|
+
// Move to next chunk
|
|
935
|
+
offset += queryBatchSize;
|
|
936
|
+
chunkNumber++;
|
|
937
|
+
|
|
938
|
+
// If we got fewer records than queryBatchSize, we've reached the end
|
|
939
|
+
if (pdfRecords.length < queryBatchSize) {
|
|
940
|
+
console.log(
|
|
941
|
+
`📝 Reached end of records (chunk had ${pdfRecords.length} records).`,
|
|
942
|
+
);
|
|
943
|
+
break;
|
|
874
944
|
}
|
|
875
945
|
|
|
876
|
-
//
|
|
877
|
-
|
|
878
|
-
detected: totalDetected,
|
|
879
|
-
errors: totalErrors
|
|
880
|
-
});
|
|
946
|
+
// Small delay between chunks to avoid overwhelming the database
|
|
947
|
+
await new Promise((resolve) => setTimeout(resolve, 500));
|
|
881
948
|
}
|
|
882
949
|
|
|
883
|
-
|
|
884
|
-
|
|
885
|
-
|
|
950
|
+
console.log(
|
|
951
|
+
`📊 Phase 2 Summary: ${totalDetected} detected, ${totalProcessed} processed, ${totalErrors} errors`,
|
|
952
|
+
);
|
|
886
953
|
return {
|
|
887
954
|
detectedCount: totalDetected,
|
|
888
955
|
processedCount: totalProcessed,
|
|
889
|
-
errorCount: totalErrors
|
|
956
|
+
errorCount: totalErrors,
|
|
890
957
|
};
|
|
891
958
|
};
|
|
892
959
|
|
|
@@ -926,7 +993,9 @@ const processFilesInBatches = async (
|
|
|
926
993
|
|
|
927
994
|
if (options.statsOnly) {
|
|
928
995
|
// OPTIMIZED Stats-only mode - Only read filesystem stats, no file detection
|
|
929
|
-
console.log(
|
|
996
|
+
console.log(
|
|
997
|
+
'📊 Phase 1: Processing files in optimized stats-only mode (no detection)...',
|
|
998
|
+
);
|
|
930
999
|
|
|
931
1000
|
for (let i = 0; i < files.length; i += batchSize) {
|
|
932
1001
|
const batch = files.slice(i, i + batchSize);
|
|
@@ -934,7 +1003,7 @@ const processFilesInBatches = async (
|
|
|
934
1003
|
// OPTIMIZED: Batch read file stats to reduce I/O overhead
|
|
935
1004
|
const fileStatsResults = batchReadFileStats(batch);
|
|
936
1005
|
const statsFiles = fileStatsResults
|
|
937
|
-
.filter(result => result.stats !== null) // Only include files with valid stats
|
|
1006
|
+
.filter((result) => result.stats !== null) // Only include files with valid stats
|
|
938
1007
|
.map((result) => {
|
|
939
1008
|
const originalFileName = path.basename(result.path);
|
|
940
1009
|
|
|
@@ -946,16 +1015,23 @@ const processFilesInBatches = async (
|
|
|
946
1015
|
});
|
|
947
1016
|
|
|
948
1017
|
// Log any files that couldn't be read
|
|
949
|
-
const failedFiles = fileStatsResults.filter(
|
|
1018
|
+
const failedFiles = fileStatsResults.filter(
|
|
1019
|
+
(result) => result.error !== null,
|
|
1020
|
+
);
|
|
950
1021
|
if (failedFiles.length > 0) {
|
|
951
|
-
console.log(
|
|
952
|
-
|
|
1022
|
+
console.log(
|
|
1023
|
+
`⚠️ Could not read stats for ${failedFiles.length} files in batch`,
|
|
1024
|
+
);
|
|
1025
|
+
failedFiles.forEach((failed) => {
|
|
953
1026
|
console.error(` ❌ ${failed.path}: ${failed.error}`);
|
|
954
1027
|
});
|
|
955
1028
|
}
|
|
956
1029
|
|
|
957
1030
|
try {
|
|
958
|
-
const result = await insertStatsOnlyToUploaderTable(
|
|
1031
|
+
const result = await insertStatsOnlyToUploaderTable(
|
|
1032
|
+
statsFiles,
|
|
1033
|
+
options,
|
|
1034
|
+
);
|
|
959
1035
|
|
|
960
1036
|
totalUploaded += result.totalInserted;
|
|
961
1037
|
totalSkipped += result.totalSkipped;
|
|
@@ -966,7 +1042,6 @@ const processFilesInBatches = async (
|
|
|
966
1042
|
failureCount: totalErrors,
|
|
967
1043
|
skippedCount: totalSkipped,
|
|
968
1044
|
});
|
|
969
|
-
|
|
970
1045
|
} catch (error) {
|
|
971
1046
|
console.error(`❌ Error processing stats batch:`, error.message);
|
|
972
1047
|
totalErrors += batch.length;
|
|
@@ -1238,13 +1313,19 @@ const uploadFilesByRfc = async (options = {}) => {
|
|
|
1238
1313
|
}
|
|
1239
1314
|
|
|
1240
1315
|
if (!API_BASE_URL || !API_TOKEN) {
|
|
1241
|
-
console.error(
|
|
1316
|
+
console.error(
|
|
1317
|
+
'❌ Arela API configuration missing. Please set ARELA_API_URL and ARELA_API_TOKEN environment variables.',
|
|
1318
|
+
);
|
|
1242
1319
|
process.exit(1);
|
|
1243
1320
|
}
|
|
1244
1321
|
|
|
1245
1322
|
if (!uploadRfcs || uploadRfcs.length === 0) {
|
|
1246
|
-
console.error(
|
|
1247
|
-
|
|
1323
|
+
console.error(
|
|
1324
|
+
'❌ No RFCs specified. Please set UPLOAD_RFCS environment variable with pipe-separated RFC values.',
|
|
1325
|
+
);
|
|
1326
|
+
console.error(
|
|
1327
|
+
' Example: UPLOAD_RFCS="RFC123456789|RFC987654321|RFC555444333"',
|
|
1328
|
+
);
|
|
1248
1329
|
process.exit(1);
|
|
1249
1330
|
}
|
|
1250
1331
|
|
|
@@ -1266,47 +1347,65 @@ const uploadFilesByRfc = async (options = {}) => {
|
|
|
1266
1347
|
|
|
1267
1348
|
if (!rfcRecords || rfcRecords.length === 0) {
|
|
1268
1349
|
console.log('ℹ️ No files found for the specified RFCs with arela_path');
|
|
1269
|
-
console.log(
|
|
1350
|
+
console.log(
|
|
1351
|
+
` Make sure files for RFCs [${uploadRfcs.join(', ')}] have been processed and have arela_path values`,
|
|
1352
|
+
);
|
|
1270
1353
|
return { processedCount: 0, uploadedCount: 0, errorCount: 0 };
|
|
1271
1354
|
}
|
|
1272
1355
|
|
|
1273
1356
|
// Step 2: Get unique arela_paths from the RFC matches
|
|
1274
|
-
const uniqueArelaPaths = [...new Set(rfcRecords.map(r => r.arela_path))];
|
|
1275
|
-
console.log(
|
|
1357
|
+
const uniqueArelaPaths = [...new Set(rfcRecords.map((r) => r.arela_path))];
|
|
1358
|
+
console.log(
|
|
1359
|
+
`� Found ${uniqueArelaPaths.length} unique arela_path(s) for the specified RFCs`,
|
|
1360
|
+
);
|
|
1276
1361
|
|
|
1277
1362
|
// Step 3: Get ALL files that have these arela_paths (including supporting documents)
|
|
1278
|
-
//
|
|
1363
|
+
// Process arela_paths in smaller chunks to avoid URI length limits
|
|
1279
1364
|
let allRelatedFiles = [];
|
|
1280
|
-
|
|
1281
|
-
let offset = 0;
|
|
1365
|
+
const arelaPathChunkSize = 50; // Process 50 arela_paths at a time to avoid URI limits
|
|
1282
1366
|
const queryBatchSize = 1000;
|
|
1283
1367
|
|
|
1284
|
-
console.log('📥 Fetching all related files (
|
|
1368
|
+
console.log('📥 Fetching all related files (processing arela_paths in chunks to avoid URI limits)...');
|
|
1285
1369
|
|
|
1286
|
-
|
|
1287
|
-
|
|
1288
|
-
|
|
1289
|
-
|
|
1290
|
-
.in('arela_path', uniqueArelaPaths)
|
|
1291
|
-
.not('original_path', 'is', null)
|
|
1292
|
-
.range(offset, offset + queryBatchSize - 1);
|
|
1370
|
+
// Process arela_paths in chunks
|
|
1371
|
+
for (let i = 0; i < uniqueArelaPaths.length; i += arelaPathChunkSize) {
|
|
1372
|
+
const arelaPathChunk = uniqueArelaPaths.slice(i, i + arelaPathChunkSize);
|
|
1373
|
+
console.log(` Processing arela_path chunk ${Math.floor(i / arelaPathChunkSize) + 1}/${Math.ceil(uniqueArelaPaths.length / arelaPathChunkSize)} (${arelaPathChunk.length} paths)`);
|
|
1293
1374
|
|
|
1294
|
-
|
|
1295
|
-
|
|
1296
|
-
|
|
1297
|
-
}
|
|
1375
|
+
// For each chunk of arela_paths, use pagination to get all related files
|
|
1376
|
+
let hasMore = true;
|
|
1377
|
+
let offset = 0;
|
|
1298
1378
|
|
|
1299
|
-
|
|
1300
|
-
|
|
1301
|
-
|
|
1302
|
-
|
|
1303
|
-
|
|
1379
|
+
while (hasMore) {
|
|
1380
|
+
const { data: batch, error: queryError } = await supabase
|
|
1381
|
+
.from('uploader')
|
|
1382
|
+
.select('id, original_path, arela_path, filename, rfc, document_type')
|
|
1383
|
+
.in('arela_path', arelaPathChunk)
|
|
1384
|
+
.not('original_path', 'is', null)
|
|
1385
|
+
.range(offset, offset + queryBatchSize - 1);
|
|
1386
|
+
|
|
1387
|
+
if (queryError) {
|
|
1388
|
+
console.error(`❌ Error fetching related files for chunk ${Math.floor(i / arelaPathChunkSize) + 1}:`, queryError.message);
|
|
1389
|
+
return { processedCount: 0, uploadedCount: 0, errorCount: 1 };
|
|
1390
|
+
}
|
|
1304
1391
|
|
|
1305
|
-
|
|
1306
|
-
if (batch.length < queryBatchSize) {
|
|
1392
|
+
if (!batch || batch.length === 0) {
|
|
1307
1393
|
hasMore = false;
|
|
1394
|
+
} else {
|
|
1395
|
+
allRelatedFiles = allRelatedFiles.concat(batch);
|
|
1396
|
+
offset += queryBatchSize;
|
|
1397
|
+
|
|
1398
|
+
// If we got less than queryBatchSize, we've reached the end for this chunk
|
|
1399
|
+
if (batch.length < queryBatchSize) {
|
|
1400
|
+
hasMore = false;
|
|
1401
|
+
}
|
|
1308
1402
|
}
|
|
1309
1403
|
}
|
|
1404
|
+
|
|
1405
|
+
// Small delay between chunks to avoid overwhelming the database
|
|
1406
|
+
if (i + arelaPathChunkSize < uniqueArelaPaths.length) {
|
|
1407
|
+
await new Promise((resolve) => setTimeout(resolve, 100));
|
|
1408
|
+
}
|
|
1310
1409
|
}
|
|
1311
1410
|
|
|
1312
1411
|
if (!allRelatedFiles || allRelatedFiles.length === 0) {
|
|
@@ -1314,7 +1413,9 @@ const uploadFilesByRfc = async (options = {}) => {
|
|
|
1314
1413
|
return { processedCount: 0, uploadedCount: 0, errorCount: 0 };
|
|
1315
1414
|
}
|
|
1316
1415
|
|
|
1317
|
-
console.log(
|
|
1416
|
+
console.log(
|
|
1417
|
+
`📁 Found ${allRelatedFiles.length} total files to upload (including supporting documents)`,
|
|
1418
|
+
);
|
|
1318
1419
|
|
|
1319
1420
|
// Group by RFC and arela_path for better organization
|
|
1320
1421
|
const filesByRfc = allRelatedFiles.reduce((acc, record) => {
|
|
@@ -1328,8 +1429,12 @@ const uploadFilesByRfc = async (options = {}) => {
|
|
|
1328
1429
|
|
|
1329
1430
|
console.log('📊 Files by RFC (including supporting documents):');
|
|
1330
1431
|
for (const [rfc, files] of Object.entries(filesByRfc)) {
|
|
1331
|
-
const documentTypes = [
|
|
1332
|
-
|
|
1432
|
+
const documentTypes = [
|
|
1433
|
+
...new Set(files.map((f) => f.document_type || 'Unknown')),
|
|
1434
|
+
];
|
|
1435
|
+
console.log(
|
|
1436
|
+
` ${rfc}: ${files.length} files (${documentTypes.join(', ')})`,
|
|
1437
|
+
);
|
|
1333
1438
|
}
|
|
1334
1439
|
|
|
1335
1440
|
// Group by arela_path for upload organization
|
|
@@ -1354,7 +1459,8 @@ const uploadFilesByRfc = async (options = {}) => {
|
|
|
1354
1459
|
|
|
1355
1460
|
// Create progress bar
|
|
1356
1461
|
const progressBar = new cliProgress.SingleBar({
|
|
1357
|
-
format:
|
|
1462
|
+
format:
|
|
1463
|
+
'🚀 Uploading files |{bar}| {percentage}% | {value}/{total} | Uploaded: {uploaded} | Errors: {errors} | Skipped: {skipped}',
|
|
1358
1464
|
barCompleteChar: '█',
|
|
1359
1465
|
barIncompleteChar: '░',
|
|
1360
1466
|
hideCursor: true,
|
|
@@ -1377,7 +1483,9 @@ const uploadFilesByRfc = async (options = {}) => {
|
|
|
1377
1483
|
const batchNumber = Math.floor(i / batchSize) + 1;
|
|
1378
1484
|
const totalBatches = Math.ceil(allRelatedFiles.length / batchSize);
|
|
1379
1485
|
|
|
1380
|
-
console.log(
|
|
1486
|
+
console.log(
|
|
1487
|
+
`\n📦 Processing batch ${batchNumber}/${totalBatches} (${batch.length} files)`,
|
|
1488
|
+
);
|
|
1381
1489
|
|
|
1382
1490
|
// Prepare files for upload
|
|
1383
1491
|
const filesToUpload = [];
|
|
@@ -1407,9 +1515,11 @@ const uploadFilesByRfc = async (options = {}) => {
|
|
|
1407
1515
|
rfc: record.rfc,
|
|
1408
1516
|
documentType: record.document_type,
|
|
1409
1517
|
});
|
|
1410
|
-
|
|
1411
1518
|
} catch (error) {
|
|
1412
|
-
console.error(
|
|
1519
|
+
console.error(
|
|
1520
|
+
` ❌ Error reading file ${record.original_path}:`,
|
|
1521
|
+
error.message,
|
|
1522
|
+
);
|
|
1413
1523
|
totalErrors++;
|
|
1414
1524
|
}
|
|
1415
1525
|
|
|
@@ -1425,7 +1535,9 @@ const uploadFilesByRfc = async (options = {}) => {
|
|
|
1425
1535
|
// Upload the batch if we have files
|
|
1426
1536
|
if (filesToUpload.length > 0) {
|
|
1427
1537
|
try {
|
|
1428
|
-
console.log(
|
|
1538
|
+
console.log(
|
|
1539
|
+
` 🚀 Uploading ${filesToUpload.length} files to Arela API...`,
|
|
1540
|
+
);
|
|
1429
1541
|
|
|
1430
1542
|
const formData = new FormData();
|
|
1431
1543
|
|
|
@@ -1461,7 +1573,9 @@ const uploadFilesByRfc = async (options = {}) => {
|
|
|
1461
1573
|
|
|
1462
1574
|
// Set folder structure for this group - concatenate custom prefix with arela_path
|
|
1463
1575
|
const folderStructure = options.folderStructure
|
|
1464
|
-
? `${options.folderStructure}/${arelaPath}
|
|
1576
|
+
? `${options.folderStructure}/${arelaPath}`
|
|
1577
|
+
.replace(/\/+/g, '/')
|
|
1578
|
+
.replace(/\/$/, '')
|
|
1465
1579
|
: arelaPath;
|
|
1466
1580
|
pathFormData.append('folderStructure', folderStructure);
|
|
1467
1581
|
pathFormData.append('autoDetect', 'true');
|
|
@@ -1472,15 +1586,20 @@ const uploadFilesByRfc = async (options = {}) => {
|
|
|
1472
1586
|
pathFormData.append('bucket', bucket);
|
|
1473
1587
|
}
|
|
1474
1588
|
|
|
1475
|
-
console.log(
|
|
1589
|
+
console.log(
|
|
1590
|
+
` 📁 Uploading ${pathFiles.length} files to: ${folderStructure}`,
|
|
1591
|
+
);
|
|
1476
1592
|
|
|
1477
|
-
const response = await fetch(
|
|
1478
|
-
|
|
1479
|
-
|
|
1480
|
-
'
|
|
1593
|
+
const response = await fetch(
|
|
1594
|
+
`${API_BASE_URL}/api/storage/batch-upload-and-process`,
|
|
1595
|
+
{
|
|
1596
|
+
method: 'POST',
|
|
1597
|
+
headers: {
|
|
1598
|
+
'x-api-key': API_TOKEN,
|
|
1599
|
+
},
|
|
1600
|
+
body: pathFormData,
|
|
1481
1601
|
},
|
|
1482
|
-
|
|
1483
|
-
});
|
|
1602
|
+
);
|
|
1484
1603
|
|
|
1485
1604
|
if (!response.ok) {
|
|
1486
1605
|
const errorText = await response.text();
|
|
@@ -1490,22 +1609,31 @@ const uploadFilesByRfc = async (options = {}) => {
|
|
|
1490
1609
|
const result = await response.json();
|
|
1491
1610
|
|
|
1492
1611
|
// Check if upload was successful based on stats rather than success field
|
|
1493
|
-
const isSuccessful =
|
|
1612
|
+
const isSuccessful =
|
|
1613
|
+
result.stats &&
|
|
1614
|
+
result.stats.uploadedCount > 0 &&
|
|
1615
|
+
result.stats.errorCount === 0;
|
|
1494
1616
|
|
|
1495
1617
|
if (isSuccessful) {
|
|
1496
|
-
console.log(
|
|
1618
|
+
console.log(
|
|
1619
|
+
` ✅ Group uploaded: ${result.stats.uploadedCount} files to ${folderStructure}`,
|
|
1620
|
+
);
|
|
1497
1621
|
totalUploaded += result.stats.uploadedCount;
|
|
1498
1622
|
|
|
1499
1623
|
if (result.stats.detectedCount > 0) {
|
|
1500
|
-
console.log(
|
|
1624
|
+
console.log(
|
|
1625
|
+
` 🔍 Files detected: ${result.stats.detectedCount}`,
|
|
1626
|
+
);
|
|
1501
1627
|
}
|
|
1502
1628
|
if (result.stats.organizedCount > 0) {
|
|
1503
|
-
console.log(
|
|
1629
|
+
console.log(
|
|
1630
|
+
` 📁 Files organized: ${result.stats.organizedCount}`,
|
|
1631
|
+
);
|
|
1504
1632
|
}
|
|
1505
1633
|
} else {
|
|
1506
1634
|
console.error(` ❌ Upload failed for ${folderStructure}:`);
|
|
1507
1635
|
if (result.errors && result.errors.length > 0) {
|
|
1508
|
-
result.errors.forEach(error => {
|
|
1636
|
+
result.errors.forEach((error) => {
|
|
1509
1637
|
console.error(` - ${error.fileName}: ${error.error}`);
|
|
1510
1638
|
});
|
|
1511
1639
|
}
|
|
@@ -1513,18 +1641,20 @@ const uploadFilesByRfc = async (options = {}) => {
|
|
|
1513
1641
|
}
|
|
1514
1642
|
|
|
1515
1643
|
// Small delay between path groups
|
|
1516
|
-
await new Promise(resolve => setTimeout(resolve, 100));
|
|
1644
|
+
await new Promise((resolve) => setTimeout(resolve, 100));
|
|
1517
1645
|
}
|
|
1518
|
-
|
|
1519
1646
|
} catch (error) {
|
|
1520
|
-
console.error(
|
|
1647
|
+
console.error(
|
|
1648
|
+
` ❌ Error uploading batch ${batchNumber}:`,
|
|
1649
|
+
error.message,
|
|
1650
|
+
);
|
|
1521
1651
|
totalErrors += filesToUpload.length;
|
|
1522
1652
|
}
|
|
1523
1653
|
}
|
|
1524
1654
|
|
|
1525
1655
|
// Small delay between batches
|
|
1526
1656
|
if (i + batchSize < allRelatedFiles.length) {
|
|
1527
|
-
await new Promise(resolve => setTimeout(resolve, BATCH_DELAY));
|
|
1657
|
+
await new Promise((resolve) => setTimeout(resolve, BATCH_DELAY));
|
|
1528
1658
|
}
|
|
1529
1659
|
}
|
|
1530
1660
|
|
|
@@ -1568,7 +1698,10 @@ const propagateArelaPath = async (options = {}) => {
|
|
|
1568
1698
|
.not('arela_path', 'is', null);
|
|
1569
1699
|
|
|
1570
1700
|
if (pedimentoError) {
|
|
1571
|
-
console.error(
|
|
1701
|
+
console.error(
|
|
1702
|
+
'❌ Error fetching pedimento records:',
|
|
1703
|
+
pedimentoError.message,
|
|
1704
|
+
);
|
|
1572
1705
|
return { processedCount: 0, updatedCount: 0, errorCount: 1 };
|
|
1573
1706
|
}
|
|
1574
1707
|
|
|
@@ -1577,7 +1710,9 @@ const propagateArelaPath = async (options = {}) => {
|
|
|
1577
1710
|
return { processedCount: 0, updatedCount: 0, errorCount: 0 };
|
|
1578
1711
|
}
|
|
1579
1712
|
|
|
1580
|
-
console.log(
|
|
1713
|
+
console.log(
|
|
1714
|
+
`📋 Found ${pedimentoRecords.length} pedimento records with arela_path`,
|
|
1715
|
+
);
|
|
1581
1716
|
|
|
1582
1717
|
let totalProcessed = 0;
|
|
1583
1718
|
let totalUpdated = 0;
|
|
@@ -1585,7 +1720,8 @@ const propagateArelaPath = async (options = {}) => {
|
|
|
1585
1720
|
|
|
1586
1721
|
// Create progress bar
|
|
1587
1722
|
const progressBar = new cliProgress.SingleBar({
|
|
1588
|
-
format:
|
|
1723
|
+
format:
|
|
1724
|
+
'🔄 Propagating paths |{bar}| {percentage}% | {value}/{total} | Updated: {updated} | Errors: {errors}',
|
|
1589
1725
|
barCompleteChar: '█',
|
|
1590
1726
|
barIncompleteChar: '░',
|
|
1591
1727
|
hideCursor: true,
|
|
@@ -1611,9 +1747,11 @@ const propagateArelaPath = async (options = {}) => {
|
|
|
1611
1747
|
|
|
1612
1748
|
// Extract folder part from existing arela_path by removing the filename
|
|
1613
1749
|
const existingPath = pedimento.arela_path;
|
|
1614
|
-
const folderArelaPath = existingPath.includes('/')
|
|
1615
|
-
existingPath.substring(0, existingPath.lastIndexOf('/')) + '/'
|
|
1616
|
-
existingPath.endsWith('/')
|
|
1750
|
+
const folderArelaPath = existingPath.includes('/')
|
|
1751
|
+
? existingPath.substring(0, existingPath.lastIndexOf('/')) + '/'
|
|
1752
|
+
: existingPath.endsWith('/')
|
|
1753
|
+
? existingPath
|
|
1754
|
+
: existingPath + '/';
|
|
1617
1755
|
|
|
1618
1756
|
console.log(` 🎯 Original arela path: ${existingPath}`);
|
|
1619
1757
|
console.log(` 📁 Folder arela path: ${folderArelaPath}`);
|
|
@@ -1627,7 +1765,10 @@ const propagateArelaPath = async (options = {}) => {
|
|
|
1627
1765
|
.neq('id', pedimento.id); // Exclude the pedimento itself
|
|
1628
1766
|
|
|
1629
1767
|
if (relatedError) {
|
|
1630
|
-
console.error(
|
|
1768
|
+
console.error(
|
|
1769
|
+
`❌ Error finding related files for ${pedimento.filename}:`,
|
|
1770
|
+
relatedError.message,
|
|
1771
|
+
);
|
|
1631
1772
|
totalErrors++;
|
|
1632
1773
|
continue;
|
|
1633
1774
|
}
|
|
@@ -1637,11 +1778,13 @@ const propagateArelaPath = async (options = {}) => {
|
|
|
1637
1778
|
continue;
|
|
1638
1779
|
}
|
|
1639
1780
|
|
|
1640
|
-
console.log(
|
|
1781
|
+
console.log(
|
|
1782
|
+
` 📄 Found ${relatedFiles.length} related files to update:`,
|
|
1783
|
+
);
|
|
1641
1784
|
|
|
1642
1785
|
// Show first 10 files, then indicate if there are more
|
|
1643
1786
|
const filesToShow = relatedFiles.slice(0, 10);
|
|
1644
|
-
filesToShow.forEach(file => {
|
|
1787
|
+
filesToShow.forEach((file) => {
|
|
1645
1788
|
console.log(` - ${file.filename}`);
|
|
1646
1789
|
});
|
|
1647
1790
|
|
|
@@ -1651,18 +1794,22 @@ const propagateArelaPath = async (options = {}) => {
|
|
|
1651
1794
|
|
|
1652
1795
|
// Process files in batches to avoid URI length limitations
|
|
1653
1796
|
const BATCH_SIZE = 50; // Process 50 files at a time
|
|
1654
|
-
const fileIds = relatedFiles.map(f => f.id);
|
|
1797
|
+
const fileIds = relatedFiles.map((f) => f.id);
|
|
1655
1798
|
let batchErrors = 0;
|
|
1656
1799
|
let batchUpdated = 0;
|
|
1657
1800
|
|
|
1658
|
-
console.log(
|
|
1801
|
+
console.log(
|
|
1802
|
+
` 🔄 Processing ${relatedFiles.length} files in batches of ${BATCH_SIZE}...`,
|
|
1803
|
+
);
|
|
1659
1804
|
|
|
1660
1805
|
for (let i = 0; i < fileIds.length; i += BATCH_SIZE) {
|
|
1661
1806
|
const batchIds = fileIds.slice(i, i + BATCH_SIZE);
|
|
1662
1807
|
const batchNumber = Math.floor(i / BATCH_SIZE) + 1;
|
|
1663
1808
|
const totalBatches = Math.ceil(fileIds.length / BATCH_SIZE);
|
|
1664
1809
|
|
|
1665
|
-
console.log(
|
|
1810
|
+
console.log(
|
|
1811
|
+
` 📦 Batch ${batchNumber}/${totalBatches}: Updating ${batchIds.length} files...`,
|
|
1812
|
+
);
|
|
1666
1813
|
|
|
1667
1814
|
try {
|
|
1668
1815
|
const { error: updateError } = await supabase
|
|
@@ -1671,33 +1818,45 @@ const propagateArelaPath = async (options = {}) => {
|
|
|
1671
1818
|
.in('id', batchIds);
|
|
1672
1819
|
|
|
1673
1820
|
if (updateError) {
|
|
1674
|
-
console.error(
|
|
1821
|
+
console.error(
|
|
1822
|
+
` ❌ Error in batch ${batchNumber}:`,
|
|
1823
|
+
updateError.message,
|
|
1824
|
+
);
|
|
1675
1825
|
batchErrors++;
|
|
1676
1826
|
} else {
|
|
1677
|
-
console.log(
|
|
1827
|
+
console.log(
|
|
1828
|
+
` ✅ Batch ${batchNumber} completed: ${batchIds.length} files updated`,
|
|
1829
|
+
);
|
|
1678
1830
|
batchUpdated += batchIds.length;
|
|
1679
1831
|
}
|
|
1680
1832
|
} catch (error) {
|
|
1681
|
-
console.error(
|
|
1833
|
+
console.error(
|
|
1834
|
+
` ❌ Exception in batch ${batchNumber}:`,
|
|
1835
|
+
error.message,
|
|
1836
|
+
);
|
|
1682
1837
|
batchErrors++;
|
|
1683
1838
|
}
|
|
1684
1839
|
|
|
1685
1840
|
// Small delay between batches to avoid overwhelming the database
|
|
1686
1841
|
if (i + BATCH_SIZE < fileIds.length) {
|
|
1687
|
-
await new Promise(resolve => setTimeout(resolve, 100));
|
|
1842
|
+
await new Promise((resolve) => setTimeout(resolve, 100));
|
|
1688
1843
|
}
|
|
1689
1844
|
}
|
|
1690
1845
|
|
|
1691
1846
|
if (batchErrors > 0) {
|
|
1692
|
-
console.error(
|
|
1847
|
+
console.error(
|
|
1848
|
+
`❌ ${batchErrors} batch(es) failed for ${pedimento.filename}`,
|
|
1849
|
+
);
|
|
1693
1850
|
totalErrors++;
|
|
1694
1851
|
} else {
|
|
1695
1852
|
console.log(` 🎯 Successfully updated ${batchUpdated} related files`);
|
|
1696
1853
|
totalUpdated += batchUpdated;
|
|
1697
1854
|
}
|
|
1698
|
-
|
|
1699
1855
|
} catch (error) {
|
|
1700
|
-
console.error(
|
|
1856
|
+
console.error(
|
|
1857
|
+
`❌ Error processing ${pedimento.filename}:`,
|
|
1858
|
+
error.message,
|
|
1859
|
+
);
|
|
1701
1860
|
totalErrors++;
|
|
1702
1861
|
}
|
|
1703
1862
|
|
|
@@ -1761,12 +1920,27 @@ program
|
|
|
1761
1920
|
'Automatically detect year/pedimento from file paths',
|
|
1762
1921
|
)
|
|
1763
1922
|
.option('--client-path <path>', 'Client path for metadata tracking')
|
|
1764
|
-
.option(
|
|
1923
|
+
.option(
|
|
1924
|
+
'--stats-only',
|
|
1925
|
+
'Phase 1: Only read filesystem stats and insert to database (no file reading or detection)',
|
|
1926
|
+
)
|
|
1765
1927
|
.option('--no-detect', 'Disable document type detection in stats-only mode')
|
|
1766
|
-
.option(
|
|
1767
|
-
|
|
1768
|
-
|
|
1769
|
-
|
|
1928
|
+
.option(
|
|
1929
|
+
'--detect-pdfs',
|
|
1930
|
+
'Phase 2: Process PDF files in database for pedimento-simplificado detection',
|
|
1931
|
+
)
|
|
1932
|
+
.option(
|
|
1933
|
+
'--propagate-arela-path',
|
|
1934
|
+
'Phase 3: Propagate arela_path from pedimento_simplificado records to related files with same base path',
|
|
1935
|
+
)
|
|
1936
|
+
.option(
|
|
1937
|
+
'--upload-by-rfc',
|
|
1938
|
+
'Phase 4: Upload files to Arela API based on RFC values from UPLOAD_RFCS environment variable',
|
|
1939
|
+
)
|
|
1940
|
+
.option(
|
|
1941
|
+
'--run-all-phases',
|
|
1942
|
+
'Run all 4 phases in sequence: stats → detect → propagate → upload',
|
|
1943
|
+
)
|
|
1770
1944
|
.action(async (options) => {
|
|
1771
1945
|
if (options.version) {
|
|
1772
1946
|
console.log(packageVersion);
|
|
@@ -1782,7 +1956,9 @@ program
|
|
|
1782
1956
|
batchSize: parseInt(options.batchSize) || 10,
|
|
1783
1957
|
});
|
|
1784
1958
|
|
|
1785
|
-
console.log(
|
|
1959
|
+
console.log(
|
|
1960
|
+
`✅ Phase 2 Complete: ${result.detectedCount} detected, ${result.errorCount} errors`,
|
|
1961
|
+
);
|
|
1786
1962
|
return;
|
|
1787
1963
|
}
|
|
1788
1964
|
|
|
@@ -1822,8 +1998,12 @@ program
|
|
|
1822
1998
|
// Also initialize Supabase for database queries
|
|
1823
1999
|
if (!supabase) {
|
|
1824
2000
|
if (!supabaseUrl || !supabaseKey) {
|
|
1825
|
-
console.error(
|
|
1826
|
-
|
|
2001
|
+
console.error(
|
|
2002
|
+
'❌ RFC upload requires Supabase credentials for database queries.',
|
|
2003
|
+
);
|
|
2004
|
+
console.error(
|
|
2005
|
+
' Please set SUPABASE_URL and SUPABASE_KEY environment variables.',
|
|
2006
|
+
);
|
|
1827
2007
|
process.exit(1);
|
|
1828
2008
|
}
|
|
1829
2009
|
|
|
@@ -1857,7 +2037,9 @@ program
|
|
|
1857
2037
|
const concurrency = parseInt(options.concurrency) || 10;
|
|
1858
2038
|
|
|
1859
2039
|
if (options.statsOnly) {
|
|
1860
|
-
console.log(
|
|
2040
|
+
console.log(
|
|
2041
|
+
'📊 Mode: Stats Only - Reading file stats and inserting to uploader table',
|
|
2042
|
+
);
|
|
1861
2043
|
console.log('🚫 Files will NOT be uploaded');
|
|
1862
2044
|
if (options.detect !== false) {
|
|
1863
2045
|
console.log('🔍 Document type detection ENABLED for supported files');
|
|
@@ -1965,14 +2147,18 @@ program
|
|
|
1965
2147
|
const detectionResult = await detectPedimentosInDatabase({
|
|
1966
2148
|
batchSize: parseInt(options.batchSize) || 10,
|
|
1967
2149
|
});
|
|
1968
|
-
console.log(
|
|
2150
|
+
console.log(
|
|
2151
|
+
`✅ Phase 2 Complete: ${detectionResult.detectedCount} detected, ${detectionResult.errorCount} errors`,
|
|
2152
|
+
);
|
|
1969
2153
|
|
|
1970
2154
|
// Phase 3: Propagate arela_path
|
|
1971
2155
|
console.log('\n📁 === PHASE 3: Propagate Arela Paths ===');
|
|
1972
2156
|
const propagateResult = await propagateArelaPath({
|
|
1973
2157
|
showProgress: options.showStats || true,
|
|
1974
2158
|
});
|
|
1975
|
-
console.log(
|
|
2159
|
+
console.log(
|
|
2160
|
+
`✅ Phase 3 Complete: ${propagateResult.updatedCount || 0} paths propagated`,
|
|
2161
|
+
);
|
|
1976
2162
|
|
|
1977
2163
|
// Phase 4: Upload by RFC
|
|
1978
2164
|
if (uploadRfcs && uploadRfcs.length > 0) {
|
|
@@ -1989,18 +2175,22 @@ program
|
|
|
1989
2175
|
console.log(`✅ Phase 4 Complete: Upload finished`);
|
|
1990
2176
|
} else {
|
|
1991
2177
|
console.log('\n⚠️ === PHASE 4: Upload by RFC ===');
|
|
1992
|
-
console.log(
|
|
2178
|
+
console.log(
|
|
2179
|
+
'⚠️ UPLOAD_RFCS environment variable not configured, skipping Phase 4',
|
|
2180
|
+
);
|
|
1993
2181
|
}
|
|
1994
2182
|
|
|
1995
2183
|
console.log('\n🎉 All 4 phases completed successfully!');
|
|
1996
|
-
|
|
1997
2184
|
} catch (error) {
|
|
1998
2185
|
console.error(`❌ Error in multi-phase execution:`, error.message);
|
|
1999
2186
|
process.exit(1);
|
|
2000
2187
|
}
|
|
2001
2188
|
}
|
|
2002
2189
|
|
|
2003
|
-
if (
|
|
2190
|
+
if (
|
|
2191
|
+
options.showStats &&
|
|
2192
|
+
(sanitizationCache.size > 0 || pathDetectionCache.size > 0)
|
|
2193
|
+
) {
|
|
2004
2194
|
console.log(`📊 Performance Statistics:`);
|
|
2005
2195
|
if (sanitizationCache.size > 0) {
|
|
2006
2196
|
console.log(
|