@arela/uploader 0.2.1 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/document-type-shared.js +22 -8
- package/src/document-types/pedimento-simplificado.js +11 -29
- package/src/file-detection.js +43 -28
- package/src/index.js +375 -199
- package/OPTIMIZATION_SUMMARY.md +0 -154
- package/PERFORMANCE_OPTIMIZATIONS.md +0 -270
package/src/index.js
CHANGED
|
@@ -9,6 +9,7 @@ import { globby } from 'globby';
|
|
|
9
9
|
import mime from 'mime-types';
|
|
10
10
|
import fetch from 'node-fetch';
|
|
11
11
|
import path from 'path';
|
|
12
|
+
|
|
12
13
|
import { FileDetectionService } from './file-detection.js';
|
|
13
14
|
|
|
14
15
|
config();
|
|
@@ -157,8 +158,8 @@ const checkCredentials = async (forceSupabase = false) => {
|
|
|
157
158
|
if (!supabaseUrl || !supabaseKey || !bucket) {
|
|
158
159
|
console.error(
|
|
159
160
|
'⚠️ Missing credentials. Please set either:\n' +
|
|
160
|
-
|
|
161
|
-
|
|
161
|
+
' - ARELA_API_URL and ARELA_API_TOKEN for API mode, or\n' +
|
|
162
|
+
' - SUPABASE_URL, SUPABASE_KEY, and SUPABASE_BUCKET for direct mode',
|
|
162
163
|
);
|
|
163
164
|
process.exit(1);
|
|
164
165
|
}
|
|
@@ -209,7 +210,10 @@ const writeLog = (message) => {
|
|
|
209
210
|
|
|
210
211
|
// Flush if buffer is full or enough time has passed
|
|
211
212
|
const now = Date.now();
|
|
212
|
-
if (
|
|
213
|
+
if (
|
|
214
|
+
logBuffer.length >= LOG_BUFFER_SIZE ||
|
|
215
|
+
now - lastFlushTime >= LOG_FLUSH_INTERVAL
|
|
216
|
+
) {
|
|
213
217
|
flushLogBuffer();
|
|
214
218
|
}
|
|
215
219
|
} catch (error) {
|
|
@@ -233,7 +237,8 @@ process.on('SIGTERM', () => {
|
|
|
233
237
|
*/
|
|
234
238
|
const VERBOSE_LOGGING = process.env.VERBOSE_LOGGING === 'true';
|
|
235
239
|
const BATCH_DELAY = parseInt(process.env.BATCH_DELAY) || 100; // Configurable delay between batches
|
|
236
|
-
const PROGRESS_UPDATE_INTERVAL =
|
|
240
|
+
const PROGRESS_UPDATE_INTERVAL =
|
|
241
|
+
parseInt(process.env.PROGRESS_UPDATE_INTERVAL) || 10; // Update progress every N items
|
|
237
242
|
|
|
238
243
|
const logVerbose = (message) => {
|
|
239
244
|
if (VERBOSE_LOGGING) {
|
|
@@ -531,7 +536,9 @@ const uploadToSupabase = async (file, uploadPath) => {
|
|
|
531
536
|
*/
|
|
532
537
|
const insertStatsToUploaderTable = async (files, options) => {
|
|
533
538
|
if (!supabase) {
|
|
534
|
-
throw new Error(
|
|
539
|
+
throw new Error(
|
|
540
|
+
'Supabase client not initialized. Stats mode requires Supabase connection.',
|
|
541
|
+
);
|
|
535
542
|
}
|
|
536
543
|
|
|
537
544
|
const detectionService = new FileDetectionService();
|
|
@@ -550,7 +557,9 @@ const insertStatsToUploaderTable = async (files, options) => {
|
|
|
550
557
|
.limit(1);
|
|
551
558
|
|
|
552
559
|
if (checkError) {
|
|
553
|
-
console.error(
|
|
560
|
+
console.error(
|
|
561
|
+
`❌ Error checking for existing record: ${checkError.message}`,
|
|
562
|
+
);
|
|
554
563
|
continue;
|
|
555
564
|
}
|
|
556
565
|
|
|
@@ -569,7 +578,7 @@ const insertStatsToUploaderTable = async (files, options) => {
|
|
|
569
578
|
arela_path: null,
|
|
570
579
|
status: 'stats',
|
|
571
580
|
rfc: null,
|
|
572
|
-
message: null
|
|
581
|
+
message: null,
|
|
573
582
|
};
|
|
574
583
|
|
|
575
584
|
// Try to detect document type for supported files
|
|
@@ -588,7 +597,9 @@ const insertStatsToUploaderTable = async (files, options) => {
|
|
|
588
597
|
}
|
|
589
598
|
|
|
590
599
|
// Extract RFC from fields if available
|
|
591
|
-
const rfcField = detection.fields.find(
|
|
600
|
+
const rfcField = detection.fields.find(
|
|
601
|
+
(f) => f.name === 'rfc' && f.found,
|
|
602
|
+
);
|
|
592
603
|
if (rfcField) {
|
|
593
604
|
record.rfc = rfcField.value;
|
|
594
605
|
}
|
|
@@ -616,7 +627,9 @@ const insertStatsToUploaderTable = async (files, options) => {
|
|
|
616
627
|
return [];
|
|
617
628
|
}
|
|
618
629
|
|
|
619
|
-
console.log(
|
|
630
|
+
console.log(
|
|
631
|
+
`💾 Inserting ${records.length} new records into uploader table...`,
|
|
632
|
+
);
|
|
620
633
|
|
|
621
634
|
const { data, error } = await supabase
|
|
622
635
|
.from('uploader')
|
|
@@ -637,7 +650,9 @@ const insertStatsToUploaderTable = async (files, options) => {
|
|
|
637
650
|
*/
|
|
638
651
|
const insertStatsOnlyToUploaderTable = async (files, options) => {
|
|
639
652
|
if (!supabase) {
|
|
640
|
-
throw new Error(
|
|
653
|
+
throw new Error(
|
|
654
|
+
'Supabase client not initialized. Stats mode requires Supabase connection.',
|
|
655
|
+
);
|
|
641
656
|
}
|
|
642
657
|
|
|
643
658
|
const batchSize = 1000; // Large batch size for performance
|
|
@@ -650,7 +665,10 @@ const insertStatsOnlyToUploaderTable = async (files, options) => {
|
|
|
650
665
|
// Use pre-computed stats if available, otherwise call fs.statSync
|
|
651
666
|
const stats = file.stats || fs.statSync(file.path);
|
|
652
667
|
const originalPath = options.clientPath || file.path;
|
|
653
|
-
const fileExtension = path
|
|
668
|
+
const fileExtension = path
|
|
669
|
+
.extname(file.path)
|
|
670
|
+
.toLowerCase()
|
|
671
|
+
.replace('.', '');
|
|
654
672
|
|
|
655
673
|
const record = {
|
|
656
674
|
document_type: null,
|
|
@@ -664,7 +682,7 @@ const insertStatsOnlyToUploaderTable = async (files, options) => {
|
|
|
664
682
|
message: null,
|
|
665
683
|
file_extension: fileExtension,
|
|
666
684
|
created_at: new Date().toISOString(),
|
|
667
|
-
modified_at: stats.mtime.toISOString()
|
|
685
|
+
modified_at: stats.mtime.toISOString(),
|
|
668
686
|
};
|
|
669
687
|
|
|
670
688
|
allRecords.push(record);
|
|
@@ -678,7 +696,9 @@ const insertStatsOnlyToUploaderTable = async (files, options) => {
|
|
|
678
696
|
return { totalInserted: 0, totalSkipped: 0, totalProcessed: 0 };
|
|
679
697
|
}
|
|
680
698
|
|
|
681
|
-
console.log(
|
|
699
|
+
console.log(
|
|
700
|
+
`💾 Bulk inserting ${allRecords.length} file stats in batches of ${batchSize}...`,
|
|
701
|
+
);
|
|
682
702
|
|
|
683
703
|
let totalInserted = 0;
|
|
684
704
|
let totalSkipped = 0;
|
|
@@ -689,16 +709,17 @@ const insertStatsOnlyToUploaderTable = async (files, options) => {
|
|
|
689
709
|
|
|
690
710
|
try {
|
|
691
711
|
// OPTIMIZED: Use upsert without select to avoid unnecessary data transfer
|
|
692
|
-
const { error, count } = await supabase
|
|
693
|
-
|
|
694
|
-
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
count: 'exact'
|
|
698
|
-
});
|
|
712
|
+
const { error, count } = await supabase.from('uploader').upsert(batch, {
|
|
713
|
+
onConflict: 'original_path',
|
|
714
|
+
ignoreDuplicates: false,
|
|
715
|
+
count: 'exact',
|
|
716
|
+
});
|
|
699
717
|
|
|
700
718
|
if (error) {
|
|
701
|
-
console.error(
|
|
719
|
+
console.error(
|
|
720
|
+
`❌ Error inserting batch ${Math.floor(i / batchSize) + 1}:`,
|
|
721
|
+
error.message,
|
|
722
|
+
);
|
|
702
723
|
continue;
|
|
703
724
|
}
|
|
704
725
|
|
|
@@ -715,45 +736,65 @@ const insertStatsOnlyToUploaderTable = async (files, options) => {
|
|
|
715
736
|
// This is reasonable since stats-only is typically run on new file sets
|
|
716
737
|
totalInserted += affected;
|
|
717
738
|
|
|
718
|
-
console.log(
|
|
739
|
+
console.log(
|
|
740
|
+
`✅ Batch ${Math.floor(i / batchSize) + 1}: ${affected} rows processed`,
|
|
741
|
+
);
|
|
719
742
|
} catch (error) {
|
|
720
|
-
console.error(
|
|
743
|
+
console.error(
|
|
744
|
+
`❌ Unexpected error in batch ${Math.floor(i / batchSize) + 1}:`,
|
|
745
|
+
error.message,
|
|
746
|
+
);
|
|
721
747
|
}
|
|
722
748
|
}
|
|
723
749
|
|
|
724
750
|
// Calculate skipped as difference between total records and inserted
|
|
725
751
|
totalSkipped = allRecords.length - totalInserted;
|
|
726
752
|
|
|
727
|
-
console.log(
|
|
753
|
+
console.log(
|
|
754
|
+
`📊 Phase 1 Summary: ${totalInserted} records processed, estimated ${totalSkipped} were updates`,
|
|
755
|
+
);
|
|
728
756
|
|
|
729
757
|
return {
|
|
730
758
|
totalInserted,
|
|
731
759
|
totalSkipped,
|
|
732
|
-
totalProcessed: allRecords.length
|
|
760
|
+
totalProcessed: allRecords.length,
|
|
733
761
|
};
|
|
734
762
|
};
|
|
735
763
|
|
|
736
764
|
/**
|
|
737
765
|
* PHASE 2: Process PDF files for pedimento-simplificado detection
|
|
738
766
|
* Only processes files with status 'fs-stats' and file_extension 'pdf'
|
|
767
|
+
* Processes records in chunks of 1000 to avoid loading all records into memory
|
|
739
768
|
*/
|
|
740
769
|
const detectPedimentosInDatabase = async (options = {}) => {
|
|
741
770
|
if (!supabase) {
|
|
742
771
|
throw new Error('Supabase client not initialized.');
|
|
743
772
|
}
|
|
744
773
|
|
|
745
|
-
console.log(
|
|
774
|
+
console.log(
|
|
775
|
+
'🔍 Phase 2: Starting PDF detection for pedimento-simplificado documents...',
|
|
776
|
+
);
|
|
746
777
|
|
|
747
|
-
|
|
748
|
-
|
|
749
|
-
|
|
778
|
+
const detectionService = new FileDetectionService();
|
|
779
|
+
const processingBatchSize = parseInt(options.batchSize) || 10; // Smaller batches for file I/O
|
|
780
|
+
const queryBatchSize = 1000; // Process 1000 records at a time
|
|
781
|
+
|
|
782
|
+
let totalDetected = 0;
|
|
783
|
+
let totalProcessed = 0;
|
|
784
|
+
let totalErrors = 0;
|
|
750
785
|
let offset = 0;
|
|
751
|
-
|
|
786
|
+
let chunkNumber = 1;
|
|
752
787
|
|
|
753
|
-
console.log('
|
|
788
|
+
console.log('� Processing PDF files in chunks of 1000 records...');
|
|
754
789
|
|
|
755
|
-
|
|
756
|
-
|
|
790
|
+
// Process records in chunks of 1000
|
|
791
|
+
while (true) {
|
|
792
|
+
console.log(
|
|
793
|
+
`\n📥 Fetching chunk ${chunkNumber} (records ${offset + 1} to ${offset + queryBatchSize})...`,
|
|
794
|
+
);
|
|
795
|
+
|
|
796
|
+
// Fetch next chunk of PDF records
|
|
797
|
+
const { data: pdfRecords, error: queryError } = await supabase
|
|
757
798
|
.from('uploader')
|
|
758
799
|
.select('id, original_path, filename, file_extension, status')
|
|
759
800
|
.eq('status', 'fs-stats')
|
|
@@ -762,131 +803,157 @@ const detectPedimentosInDatabase = async (options = {}) => {
|
|
|
762
803
|
.range(offset, offset + queryBatchSize - 1);
|
|
763
804
|
|
|
764
805
|
if (queryError) {
|
|
765
|
-
throw new Error(
|
|
806
|
+
throw new Error(
|
|
807
|
+
`Failed to fetch PDF records chunk ${chunkNumber}: ${queryError.message}`,
|
|
808
|
+
);
|
|
766
809
|
}
|
|
767
810
|
|
|
768
|
-
|
|
769
|
-
|
|
770
|
-
|
|
771
|
-
|
|
772
|
-
offset += queryBatchSize;
|
|
773
|
-
console.log(`📄 Fetched ${batch.length} PDF records (total: ${allPdfRecords.length})`);
|
|
811
|
+
// If no records found, we're done
|
|
812
|
+
if (!pdfRecords || pdfRecords.length === 0) {
|
|
813
|
+
console.log(`📝 No more PDF files found. Processing completed.`);
|
|
814
|
+
break;
|
|
774
815
|
}
|
|
775
|
-
}
|
|
776
816
|
|
|
777
|
-
|
|
778
|
-
|
|
779
|
-
|
|
780
|
-
}
|
|
817
|
+
console.log(
|
|
818
|
+
`� Processing chunk ${chunkNumber}: ${pdfRecords.length} PDF records`,
|
|
819
|
+
);
|
|
781
820
|
|
|
782
|
-
|
|
821
|
+
// Create progress bar for this chunk
|
|
822
|
+
const progressBar = new cliProgress.SingleBar({
|
|
823
|
+
format: `🔍 Chunk ${chunkNumber} |{bar}| {percentage}% | {value}/{total} | Detected: {detected} | Errors: {errors}`,
|
|
824
|
+
barCompleteChar: '█',
|
|
825
|
+
barIncompleteChar: '░',
|
|
826
|
+
hideCursor: true,
|
|
827
|
+
});
|
|
783
828
|
|
|
784
|
-
|
|
785
|
-
const batchSize = parseInt(options.batchSize) || 10; // Smaller batches for file I/O
|
|
786
|
-
let totalDetected = 0;
|
|
787
|
-
let totalProcessed = 0;
|
|
788
|
-
let totalErrors = 0;
|
|
829
|
+
progressBar.start(pdfRecords.length, 0, { detected: 0, errors: 0 });
|
|
789
830
|
|
|
790
|
-
|
|
791
|
-
|
|
792
|
-
|
|
793
|
-
barCompleteChar: '█',
|
|
794
|
-
barIncompleteChar: '░',
|
|
795
|
-
hideCursor: true,
|
|
796
|
-
});
|
|
831
|
+
let chunkDetected = 0;
|
|
832
|
+
let chunkProcessed = 0;
|
|
833
|
+
let chunkErrors = 0;
|
|
797
834
|
|
|
798
|
-
|
|
835
|
+
// Process files in smaller batches within this chunk
|
|
836
|
+
for (let i = 0; i < pdfRecords.length; i += processingBatchSize) {
|
|
837
|
+
const batch = pdfRecords.slice(i, i + processingBatchSize);
|
|
838
|
+
const updatePromises = [];
|
|
799
839
|
|
|
800
|
-
|
|
801
|
-
|
|
802
|
-
|
|
803
|
-
|
|
840
|
+
for (const record of batch) {
|
|
841
|
+
try {
|
|
842
|
+
// Check if file still exists
|
|
843
|
+
if (!fs.existsSync(record.original_path)) {
|
|
844
|
+
updatePromises.push(
|
|
845
|
+
supabase
|
|
846
|
+
.from('uploader')
|
|
847
|
+
.update({
|
|
848
|
+
status: 'file-not-found',
|
|
849
|
+
message: 'File no longer exists at original path',
|
|
850
|
+
})
|
|
851
|
+
.eq('id', record.id),
|
|
852
|
+
);
|
|
853
|
+
chunkErrors++;
|
|
854
|
+
totalErrors++;
|
|
855
|
+
continue;
|
|
856
|
+
}
|
|
857
|
+
|
|
858
|
+
// Perform detection
|
|
859
|
+
const detection = await detectionService.detectFile(
|
|
860
|
+
record.original_path,
|
|
861
|
+
);
|
|
862
|
+
chunkProcessed++;
|
|
863
|
+
totalProcessed++;
|
|
864
|
+
|
|
865
|
+
const updateData = {
|
|
866
|
+
status: detection.detectedType ? 'detected' : 'not-detected',
|
|
867
|
+
document_type: detection.detectedType,
|
|
868
|
+
num_pedimento: detection.detectedPedimento,
|
|
869
|
+
arela_path: detection.arelaPath,
|
|
870
|
+
message: detection.error || null,
|
|
871
|
+
};
|
|
872
|
+
|
|
873
|
+
// Extract RFC from fields if available
|
|
874
|
+
if (detection.fields) {
|
|
875
|
+
const rfcField = detection.fields.find(
|
|
876
|
+
(f) => f.name === 'rfc' && f.found,
|
|
877
|
+
);
|
|
878
|
+
if (rfcField) {
|
|
879
|
+
updateData.rfc = rfcField.value;
|
|
880
|
+
}
|
|
881
|
+
}
|
|
882
|
+
|
|
883
|
+
if (detection.detectedType) {
|
|
884
|
+
chunkDetected++;
|
|
885
|
+
totalDetected++;
|
|
886
|
+
}
|
|
887
|
+
|
|
888
|
+
updatePromises.push(
|
|
889
|
+
supabase.from('uploader').update(updateData).eq('id', record.id),
|
|
890
|
+
);
|
|
891
|
+
} catch (error) {
|
|
892
|
+
console.error(
|
|
893
|
+
`❌ Error detecting ${record.filename}:`,
|
|
894
|
+
error.message,
|
|
895
|
+
);
|
|
896
|
+
chunkErrors++;
|
|
897
|
+
totalErrors++;
|
|
804
898
|
|
|
805
|
-
for (const record of batch) {
|
|
806
|
-
try {
|
|
807
|
-
// Check if file still exists
|
|
808
|
-
if (!fs.existsSync(record.original_path)) {
|
|
809
899
|
updatePromises.push(
|
|
810
900
|
supabase
|
|
811
901
|
.from('uploader')
|
|
812
902
|
.update({
|
|
813
|
-
status: '
|
|
814
|
-
message:
|
|
903
|
+
status: 'detection-error',
|
|
904
|
+
message: error.message,
|
|
815
905
|
})
|
|
816
|
-
.eq('id', record.id)
|
|
906
|
+
.eq('id', record.id),
|
|
817
907
|
);
|
|
818
|
-
totalErrors++;
|
|
819
|
-
continue;
|
|
820
|
-
}
|
|
821
|
-
|
|
822
|
-
// Perform detection
|
|
823
|
-
const detection = await detectionService.detectFile(record.original_path);
|
|
824
|
-
totalProcessed++;
|
|
825
|
-
|
|
826
|
-
const updateData = {
|
|
827
|
-
status: detection.detectedType ? 'detected' : 'not-detected',
|
|
828
|
-
document_type: detection.detectedType,
|
|
829
|
-
num_pedimento: detection.detectedPedimento,
|
|
830
|
-
arela_path: detection.arelaPath,
|
|
831
|
-
message: detection.error || null
|
|
832
|
-
};
|
|
833
|
-
|
|
834
|
-
// Extract RFC from fields if available
|
|
835
|
-
if (detection.fields) {
|
|
836
|
-
const rfcField = detection.fields.find(f => f.name === 'rfc' && f.found);
|
|
837
|
-
if (rfcField) {
|
|
838
|
-
updateData.rfc = rfcField.value;
|
|
839
|
-
}
|
|
840
|
-
}
|
|
841
|
-
|
|
842
|
-
if (detection.detectedType) {
|
|
843
|
-
totalDetected++;
|
|
844
908
|
}
|
|
909
|
+
}
|
|
845
910
|
|
|
846
|
-
|
|
847
|
-
|
|
848
|
-
|
|
849
|
-
.update(updateData)
|
|
850
|
-
.eq('id', record.id)
|
|
851
|
-
);
|
|
852
|
-
|
|
911
|
+
// Execute all updates in parallel for this batch
|
|
912
|
+
try {
|
|
913
|
+
await Promise.all(updatePromises);
|
|
853
914
|
} catch (error) {
|
|
854
|
-
console.error(
|
|
855
|
-
|
|
856
|
-
|
|
857
|
-
updatePromises.push(
|
|
858
|
-
supabase
|
|
859
|
-
.from('uploader')
|
|
860
|
-
.update({
|
|
861
|
-
status: 'detection-error',
|
|
862
|
-
message: error.message
|
|
863
|
-
})
|
|
864
|
-
.eq('id', record.id)
|
|
915
|
+
console.error(
|
|
916
|
+
`❌ Error updating batch in chunk ${chunkNumber}:`,
|
|
917
|
+
error.message,
|
|
865
918
|
);
|
|
866
919
|
}
|
|
920
|
+
|
|
921
|
+
// Update progress for this chunk
|
|
922
|
+
progressBar.update(Math.min(i + processingBatchSize, pdfRecords.length), {
|
|
923
|
+
detected: chunkDetected,
|
|
924
|
+
errors: chunkErrors,
|
|
925
|
+
});
|
|
867
926
|
}
|
|
868
927
|
|
|
869
|
-
|
|
870
|
-
|
|
871
|
-
|
|
872
|
-
|
|
873
|
-
|
|
928
|
+
progressBar.stop();
|
|
929
|
+
|
|
930
|
+
console.log(
|
|
931
|
+
`✅ Chunk ${chunkNumber} completed: ${chunkDetected} detected, ${chunkProcessed} processed, ${chunkErrors} errors`,
|
|
932
|
+
);
|
|
933
|
+
|
|
934
|
+
// Move to next chunk
|
|
935
|
+
offset += queryBatchSize;
|
|
936
|
+
chunkNumber++;
|
|
937
|
+
|
|
938
|
+
// If we got fewer records than queryBatchSize, we've reached the end
|
|
939
|
+
if (pdfRecords.length < queryBatchSize) {
|
|
940
|
+
console.log(
|
|
941
|
+
`📝 Reached end of records (chunk had ${pdfRecords.length} records).`,
|
|
942
|
+
);
|
|
943
|
+
break;
|
|
874
944
|
}
|
|
875
945
|
|
|
876
|
-
//
|
|
877
|
-
|
|
878
|
-
detected: totalDetected,
|
|
879
|
-
errors: totalErrors
|
|
880
|
-
});
|
|
946
|
+
// Small delay between chunks to avoid overwhelming the database
|
|
947
|
+
await new Promise((resolve) => setTimeout(resolve, 500));
|
|
881
948
|
}
|
|
882
949
|
|
|
883
|
-
|
|
884
|
-
|
|
885
|
-
|
|
950
|
+
console.log(
|
|
951
|
+
`📊 Phase 2 Summary: ${totalDetected} detected, ${totalProcessed} processed, ${totalErrors} errors`,
|
|
952
|
+
);
|
|
886
953
|
return {
|
|
887
954
|
detectedCount: totalDetected,
|
|
888
955
|
processedCount: totalProcessed,
|
|
889
|
-
errorCount: totalErrors
|
|
956
|
+
errorCount: totalErrors,
|
|
890
957
|
};
|
|
891
958
|
};
|
|
892
959
|
|
|
@@ -926,7 +993,9 @@ const processFilesInBatches = async (
|
|
|
926
993
|
|
|
927
994
|
if (options.statsOnly) {
|
|
928
995
|
// OPTIMIZED Stats-only mode - Only read filesystem stats, no file detection
|
|
929
|
-
console.log(
|
|
996
|
+
console.log(
|
|
997
|
+
'📊 Phase 1: Processing files in optimized stats-only mode (no detection)...',
|
|
998
|
+
);
|
|
930
999
|
|
|
931
1000
|
for (let i = 0; i < files.length; i += batchSize) {
|
|
932
1001
|
const batch = files.slice(i, i + batchSize);
|
|
@@ -934,7 +1003,7 @@ const processFilesInBatches = async (
|
|
|
934
1003
|
// OPTIMIZED: Batch read file stats to reduce I/O overhead
|
|
935
1004
|
const fileStatsResults = batchReadFileStats(batch);
|
|
936
1005
|
const statsFiles = fileStatsResults
|
|
937
|
-
.filter(result => result.stats !== null) // Only include files with valid stats
|
|
1006
|
+
.filter((result) => result.stats !== null) // Only include files with valid stats
|
|
938
1007
|
.map((result) => {
|
|
939
1008
|
const originalFileName = path.basename(result.path);
|
|
940
1009
|
|
|
@@ -946,16 +1015,23 @@ const processFilesInBatches = async (
|
|
|
946
1015
|
});
|
|
947
1016
|
|
|
948
1017
|
// Log any files that couldn't be read
|
|
949
|
-
const failedFiles = fileStatsResults.filter(
|
|
1018
|
+
const failedFiles = fileStatsResults.filter(
|
|
1019
|
+
(result) => result.error !== null,
|
|
1020
|
+
);
|
|
950
1021
|
if (failedFiles.length > 0) {
|
|
951
|
-
console.log(
|
|
952
|
-
|
|
1022
|
+
console.log(
|
|
1023
|
+
`⚠️ Could not read stats for ${failedFiles.length} files in batch`,
|
|
1024
|
+
);
|
|
1025
|
+
failedFiles.forEach((failed) => {
|
|
953
1026
|
console.error(` ❌ ${failed.path}: ${failed.error}`);
|
|
954
1027
|
});
|
|
955
1028
|
}
|
|
956
1029
|
|
|
957
1030
|
try {
|
|
958
|
-
const result = await insertStatsOnlyToUploaderTable(
|
|
1031
|
+
const result = await insertStatsOnlyToUploaderTable(
|
|
1032
|
+
statsFiles,
|
|
1033
|
+
options,
|
|
1034
|
+
);
|
|
959
1035
|
|
|
960
1036
|
totalUploaded += result.totalInserted;
|
|
961
1037
|
totalSkipped += result.totalSkipped;
|
|
@@ -966,7 +1042,6 @@ const processFilesInBatches = async (
|
|
|
966
1042
|
failureCount: totalErrors,
|
|
967
1043
|
skippedCount: totalSkipped,
|
|
968
1044
|
});
|
|
969
|
-
|
|
970
1045
|
} catch (error) {
|
|
971
1046
|
console.error(`❌ Error processing stats batch:`, error.message);
|
|
972
1047
|
totalErrors += batch.length;
|
|
@@ -1238,13 +1313,19 @@ const uploadFilesByRfc = async (options = {}) => {
|
|
|
1238
1313
|
}
|
|
1239
1314
|
|
|
1240
1315
|
if (!API_BASE_URL || !API_TOKEN) {
|
|
1241
|
-
console.error(
|
|
1316
|
+
console.error(
|
|
1317
|
+
'❌ Arela API configuration missing. Please set ARELA_API_URL and ARELA_API_TOKEN environment variables.',
|
|
1318
|
+
);
|
|
1242
1319
|
process.exit(1);
|
|
1243
1320
|
}
|
|
1244
1321
|
|
|
1245
1322
|
if (!uploadRfcs || uploadRfcs.length === 0) {
|
|
1246
|
-
console.error(
|
|
1247
|
-
|
|
1323
|
+
console.error(
|
|
1324
|
+
'❌ No RFCs specified. Please set UPLOAD_RFCS environment variable with pipe-separated RFC values.',
|
|
1325
|
+
);
|
|
1326
|
+
console.error(
|
|
1327
|
+
' Example: UPLOAD_RFCS="RFC123456789|RFC987654321|RFC555444333"',
|
|
1328
|
+
);
|
|
1248
1329
|
process.exit(1);
|
|
1249
1330
|
}
|
|
1250
1331
|
|
|
@@ -1266,13 +1347,17 @@ const uploadFilesByRfc = async (options = {}) => {
|
|
|
1266
1347
|
|
|
1267
1348
|
if (!rfcRecords || rfcRecords.length === 0) {
|
|
1268
1349
|
console.log('ℹ️ No files found for the specified RFCs with arela_path');
|
|
1269
|
-
console.log(
|
|
1350
|
+
console.log(
|
|
1351
|
+
` Make sure files for RFCs [${uploadRfcs.join(', ')}] have been processed and have arela_path values`,
|
|
1352
|
+
);
|
|
1270
1353
|
return { processedCount: 0, uploadedCount: 0, errorCount: 0 };
|
|
1271
1354
|
}
|
|
1272
1355
|
|
|
1273
1356
|
// Step 2: Get unique arela_paths from the RFC matches
|
|
1274
|
-
const uniqueArelaPaths = [...new Set(rfcRecords.map(r => r.arela_path))];
|
|
1275
|
-
console.log(
|
|
1357
|
+
const uniqueArelaPaths = [...new Set(rfcRecords.map((r) => r.arela_path))];
|
|
1358
|
+
console.log(
|
|
1359
|
+
`� Found ${uniqueArelaPaths.length} unique arela_path(s) for the specified RFCs`,
|
|
1360
|
+
);
|
|
1276
1361
|
|
|
1277
1362
|
// Step 3: Get ALL files that have these arela_paths (including supporting documents)
|
|
1278
1363
|
// Use pagination to ensure we get all files, regardless of count
|
|
@@ -1314,7 +1399,9 @@ const uploadFilesByRfc = async (options = {}) => {
|
|
|
1314
1399
|
return { processedCount: 0, uploadedCount: 0, errorCount: 0 };
|
|
1315
1400
|
}
|
|
1316
1401
|
|
|
1317
|
-
console.log(
|
|
1402
|
+
console.log(
|
|
1403
|
+
`📁 Found ${allRelatedFiles.length} total files to upload (including supporting documents)`,
|
|
1404
|
+
);
|
|
1318
1405
|
|
|
1319
1406
|
// Group by RFC and arela_path for better organization
|
|
1320
1407
|
const filesByRfc = allRelatedFiles.reduce((acc, record) => {
|
|
@@ -1328,8 +1415,12 @@ const uploadFilesByRfc = async (options = {}) => {
|
|
|
1328
1415
|
|
|
1329
1416
|
console.log('📊 Files by RFC (including supporting documents):');
|
|
1330
1417
|
for (const [rfc, files] of Object.entries(filesByRfc)) {
|
|
1331
|
-
const documentTypes = [
|
|
1332
|
-
|
|
1418
|
+
const documentTypes = [
|
|
1419
|
+
...new Set(files.map((f) => f.document_type || 'Unknown')),
|
|
1420
|
+
];
|
|
1421
|
+
console.log(
|
|
1422
|
+
` ${rfc}: ${files.length} files (${documentTypes.join(', ')})`,
|
|
1423
|
+
);
|
|
1333
1424
|
}
|
|
1334
1425
|
|
|
1335
1426
|
// Group by arela_path for upload organization
|
|
@@ -1354,7 +1445,8 @@ const uploadFilesByRfc = async (options = {}) => {
|
|
|
1354
1445
|
|
|
1355
1446
|
// Create progress bar
|
|
1356
1447
|
const progressBar = new cliProgress.SingleBar({
|
|
1357
|
-
format:
|
|
1448
|
+
format:
|
|
1449
|
+
'🚀 Uploading files |{bar}| {percentage}% | {value}/{total} | Uploaded: {uploaded} | Errors: {errors} | Skipped: {skipped}',
|
|
1358
1450
|
barCompleteChar: '█',
|
|
1359
1451
|
barIncompleteChar: '░',
|
|
1360
1452
|
hideCursor: true,
|
|
@@ -1377,7 +1469,9 @@ const uploadFilesByRfc = async (options = {}) => {
|
|
|
1377
1469
|
const batchNumber = Math.floor(i / batchSize) + 1;
|
|
1378
1470
|
const totalBatches = Math.ceil(allRelatedFiles.length / batchSize);
|
|
1379
1471
|
|
|
1380
|
-
console.log(
|
|
1472
|
+
console.log(
|
|
1473
|
+
`\n📦 Processing batch ${batchNumber}/${totalBatches} (${batch.length} files)`,
|
|
1474
|
+
);
|
|
1381
1475
|
|
|
1382
1476
|
// Prepare files for upload
|
|
1383
1477
|
const filesToUpload = [];
|
|
@@ -1407,9 +1501,11 @@ const uploadFilesByRfc = async (options = {}) => {
|
|
|
1407
1501
|
rfc: record.rfc,
|
|
1408
1502
|
documentType: record.document_type,
|
|
1409
1503
|
});
|
|
1410
|
-
|
|
1411
1504
|
} catch (error) {
|
|
1412
|
-
console.error(
|
|
1505
|
+
console.error(
|
|
1506
|
+
` ❌ Error reading file ${record.original_path}:`,
|
|
1507
|
+
error.message,
|
|
1508
|
+
);
|
|
1413
1509
|
totalErrors++;
|
|
1414
1510
|
}
|
|
1415
1511
|
|
|
@@ -1425,7 +1521,9 @@ const uploadFilesByRfc = async (options = {}) => {
|
|
|
1425
1521
|
// Upload the batch if we have files
|
|
1426
1522
|
if (filesToUpload.length > 0) {
|
|
1427
1523
|
try {
|
|
1428
|
-
console.log(
|
|
1524
|
+
console.log(
|
|
1525
|
+
` 🚀 Uploading ${filesToUpload.length} files to Arela API...`,
|
|
1526
|
+
);
|
|
1429
1527
|
|
|
1430
1528
|
const formData = new FormData();
|
|
1431
1529
|
|
|
@@ -1461,7 +1559,9 @@ const uploadFilesByRfc = async (options = {}) => {
|
|
|
1461
1559
|
|
|
1462
1560
|
// Set folder structure for this group - concatenate custom prefix with arela_path
|
|
1463
1561
|
const folderStructure = options.folderStructure
|
|
1464
|
-
? `${options.folderStructure}/${arelaPath}
|
|
1562
|
+
? `${options.folderStructure}/${arelaPath}`
|
|
1563
|
+
.replace(/\/+/g, '/')
|
|
1564
|
+
.replace(/\/$/, '')
|
|
1465
1565
|
: arelaPath;
|
|
1466
1566
|
pathFormData.append('folderStructure', folderStructure);
|
|
1467
1567
|
pathFormData.append('autoDetect', 'true');
|
|
@@ -1472,15 +1572,20 @@ const uploadFilesByRfc = async (options = {}) => {
|
|
|
1472
1572
|
pathFormData.append('bucket', bucket);
|
|
1473
1573
|
}
|
|
1474
1574
|
|
|
1475
|
-
console.log(
|
|
1575
|
+
console.log(
|
|
1576
|
+
` 📁 Uploading ${pathFiles.length} files to: ${folderStructure}`,
|
|
1577
|
+
);
|
|
1476
1578
|
|
|
1477
|
-
const response = await fetch(
|
|
1478
|
-
|
|
1479
|
-
|
|
1480
|
-
'
|
|
1579
|
+
const response = await fetch(
|
|
1580
|
+
`${API_BASE_URL}/api/storage/batch-upload-and-process`,
|
|
1581
|
+
{
|
|
1582
|
+
method: 'POST',
|
|
1583
|
+
headers: {
|
|
1584
|
+
'x-api-key': API_TOKEN,
|
|
1585
|
+
},
|
|
1586
|
+
body: pathFormData,
|
|
1481
1587
|
},
|
|
1482
|
-
|
|
1483
|
-
});
|
|
1588
|
+
);
|
|
1484
1589
|
|
|
1485
1590
|
if (!response.ok) {
|
|
1486
1591
|
const errorText = await response.text();
|
|
@@ -1490,22 +1595,31 @@ const uploadFilesByRfc = async (options = {}) => {
|
|
|
1490
1595
|
const result = await response.json();
|
|
1491
1596
|
|
|
1492
1597
|
// Check if upload was successful based on stats rather than success field
|
|
1493
|
-
const isSuccessful =
|
|
1598
|
+
const isSuccessful =
|
|
1599
|
+
result.stats &&
|
|
1600
|
+
result.stats.uploadedCount > 0 &&
|
|
1601
|
+
result.stats.errorCount === 0;
|
|
1494
1602
|
|
|
1495
1603
|
if (isSuccessful) {
|
|
1496
|
-
console.log(
|
|
1604
|
+
console.log(
|
|
1605
|
+
` ✅ Group uploaded: ${result.stats.uploadedCount} files to ${folderStructure}`,
|
|
1606
|
+
);
|
|
1497
1607
|
totalUploaded += result.stats.uploadedCount;
|
|
1498
1608
|
|
|
1499
1609
|
if (result.stats.detectedCount > 0) {
|
|
1500
|
-
console.log(
|
|
1610
|
+
console.log(
|
|
1611
|
+
` 🔍 Files detected: ${result.stats.detectedCount}`,
|
|
1612
|
+
);
|
|
1501
1613
|
}
|
|
1502
1614
|
if (result.stats.organizedCount > 0) {
|
|
1503
|
-
console.log(
|
|
1615
|
+
console.log(
|
|
1616
|
+
` 📁 Files organized: ${result.stats.organizedCount}`,
|
|
1617
|
+
);
|
|
1504
1618
|
}
|
|
1505
1619
|
} else {
|
|
1506
1620
|
console.error(` ❌ Upload failed for ${folderStructure}:`);
|
|
1507
1621
|
if (result.errors && result.errors.length > 0) {
|
|
1508
|
-
result.errors.forEach(error => {
|
|
1622
|
+
result.errors.forEach((error) => {
|
|
1509
1623
|
console.error(` - ${error.fileName}: ${error.error}`);
|
|
1510
1624
|
});
|
|
1511
1625
|
}
|
|
@@ -1513,18 +1627,20 @@ const uploadFilesByRfc = async (options = {}) => {
|
|
|
1513
1627
|
}
|
|
1514
1628
|
|
|
1515
1629
|
// Small delay between path groups
|
|
1516
|
-
await new Promise(resolve => setTimeout(resolve, 100));
|
|
1630
|
+
await new Promise((resolve) => setTimeout(resolve, 100));
|
|
1517
1631
|
}
|
|
1518
|
-
|
|
1519
1632
|
} catch (error) {
|
|
1520
|
-
console.error(
|
|
1633
|
+
console.error(
|
|
1634
|
+
` ❌ Error uploading batch ${batchNumber}:`,
|
|
1635
|
+
error.message,
|
|
1636
|
+
);
|
|
1521
1637
|
totalErrors += filesToUpload.length;
|
|
1522
1638
|
}
|
|
1523
1639
|
}
|
|
1524
1640
|
|
|
1525
1641
|
// Small delay between batches
|
|
1526
1642
|
if (i + batchSize < allRelatedFiles.length) {
|
|
1527
|
-
await new Promise(resolve => setTimeout(resolve, BATCH_DELAY));
|
|
1643
|
+
await new Promise((resolve) => setTimeout(resolve, BATCH_DELAY));
|
|
1528
1644
|
}
|
|
1529
1645
|
}
|
|
1530
1646
|
|
|
@@ -1568,7 +1684,10 @@ const propagateArelaPath = async (options = {}) => {
|
|
|
1568
1684
|
.not('arela_path', 'is', null);
|
|
1569
1685
|
|
|
1570
1686
|
if (pedimentoError) {
|
|
1571
|
-
console.error(
|
|
1687
|
+
console.error(
|
|
1688
|
+
'❌ Error fetching pedimento records:',
|
|
1689
|
+
pedimentoError.message,
|
|
1690
|
+
);
|
|
1572
1691
|
return { processedCount: 0, updatedCount: 0, errorCount: 1 };
|
|
1573
1692
|
}
|
|
1574
1693
|
|
|
@@ -1577,7 +1696,9 @@ const propagateArelaPath = async (options = {}) => {
|
|
|
1577
1696
|
return { processedCount: 0, updatedCount: 0, errorCount: 0 };
|
|
1578
1697
|
}
|
|
1579
1698
|
|
|
1580
|
-
console.log(
|
|
1699
|
+
console.log(
|
|
1700
|
+
`📋 Found ${pedimentoRecords.length} pedimento records with arela_path`,
|
|
1701
|
+
);
|
|
1581
1702
|
|
|
1582
1703
|
let totalProcessed = 0;
|
|
1583
1704
|
let totalUpdated = 0;
|
|
@@ -1585,7 +1706,8 @@ const propagateArelaPath = async (options = {}) => {
|
|
|
1585
1706
|
|
|
1586
1707
|
// Create progress bar
|
|
1587
1708
|
const progressBar = new cliProgress.SingleBar({
|
|
1588
|
-
format:
|
|
1709
|
+
format:
|
|
1710
|
+
'🔄 Propagating paths |{bar}| {percentage}% | {value}/{total} | Updated: {updated} | Errors: {errors}',
|
|
1589
1711
|
barCompleteChar: '█',
|
|
1590
1712
|
barIncompleteChar: '░',
|
|
1591
1713
|
hideCursor: true,
|
|
@@ -1611,9 +1733,11 @@ const propagateArelaPath = async (options = {}) => {
|
|
|
1611
1733
|
|
|
1612
1734
|
// Extract folder part from existing arela_path by removing the filename
|
|
1613
1735
|
const existingPath = pedimento.arela_path;
|
|
1614
|
-
const folderArelaPath = existingPath.includes('/')
|
|
1615
|
-
existingPath.substring(0, existingPath.lastIndexOf('/')) + '/'
|
|
1616
|
-
existingPath.endsWith('/')
|
|
1736
|
+
const folderArelaPath = existingPath.includes('/')
|
|
1737
|
+
? existingPath.substring(0, existingPath.lastIndexOf('/')) + '/'
|
|
1738
|
+
: existingPath.endsWith('/')
|
|
1739
|
+
? existingPath
|
|
1740
|
+
: existingPath + '/';
|
|
1617
1741
|
|
|
1618
1742
|
console.log(` 🎯 Original arela path: ${existingPath}`);
|
|
1619
1743
|
console.log(` 📁 Folder arela path: ${folderArelaPath}`);
|
|
@@ -1627,7 +1751,10 @@ const propagateArelaPath = async (options = {}) => {
|
|
|
1627
1751
|
.neq('id', pedimento.id); // Exclude the pedimento itself
|
|
1628
1752
|
|
|
1629
1753
|
if (relatedError) {
|
|
1630
|
-
console.error(
|
|
1754
|
+
console.error(
|
|
1755
|
+
`❌ Error finding related files for ${pedimento.filename}:`,
|
|
1756
|
+
relatedError.message,
|
|
1757
|
+
);
|
|
1631
1758
|
totalErrors++;
|
|
1632
1759
|
continue;
|
|
1633
1760
|
}
|
|
@@ -1637,11 +1764,13 @@ const propagateArelaPath = async (options = {}) => {
|
|
|
1637
1764
|
continue;
|
|
1638
1765
|
}
|
|
1639
1766
|
|
|
1640
|
-
console.log(
|
|
1767
|
+
console.log(
|
|
1768
|
+
` 📄 Found ${relatedFiles.length} related files to update:`,
|
|
1769
|
+
);
|
|
1641
1770
|
|
|
1642
1771
|
// Show first 10 files, then indicate if there are more
|
|
1643
1772
|
const filesToShow = relatedFiles.slice(0, 10);
|
|
1644
|
-
filesToShow.forEach(file => {
|
|
1773
|
+
filesToShow.forEach((file) => {
|
|
1645
1774
|
console.log(` - ${file.filename}`);
|
|
1646
1775
|
});
|
|
1647
1776
|
|
|
@@ -1651,18 +1780,22 @@ const propagateArelaPath = async (options = {}) => {
|
|
|
1651
1780
|
|
|
1652
1781
|
// Process files in batches to avoid URI length limitations
|
|
1653
1782
|
const BATCH_SIZE = 50; // Process 50 files at a time
|
|
1654
|
-
const fileIds = relatedFiles.map(f => f.id);
|
|
1783
|
+
const fileIds = relatedFiles.map((f) => f.id);
|
|
1655
1784
|
let batchErrors = 0;
|
|
1656
1785
|
let batchUpdated = 0;
|
|
1657
1786
|
|
|
1658
|
-
console.log(
|
|
1787
|
+
console.log(
|
|
1788
|
+
` 🔄 Processing ${relatedFiles.length} files in batches of ${BATCH_SIZE}...`,
|
|
1789
|
+
);
|
|
1659
1790
|
|
|
1660
1791
|
for (let i = 0; i < fileIds.length; i += BATCH_SIZE) {
|
|
1661
1792
|
const batchIds = fileIds.slice(i, i + BATCH_SIZE);
|
|
1662
1793
|
const batchNumber = Math.floor(i / BATCH_SIZE) + 1;
|
|
1663
1794
|
const totalBatches = Math.ceil(fileIds.length / BATCH_SIZE);
|
|
1664
1795
|
|
|
1665
|
-
console.log(
|
|
1796
|
+
console.log(
|
|
1797
|
+
` 📦 Batch ${batchNumber}/${totalBatches}: Updating ${batchIds.length} files...`,
|
|
1798
|
+
);
|
|
1666
1799
|
|
|
1667
1800
|
try {
|
|
1668
1801
|
const { error: updateError } = await supabase
|
|
@@ -1671,33 +1804,45 @@ const propagateArelaPath = async (options = {}) => {
|
|
|
1671
1804
|
.in('id', batchIds);
|
|
1672
1805
|
|
|
1673
1806
|
if (updateError) {
|
|
1674
|
-
console.error(
|
|
1807
|
+
console.error(
|
|
1808
|
+
` ❌ Error in batch ${batchNumber}:`,
|
|
1809
|
+
updateError.message,
|
|
1810
|
+
);
|
|
1675
1811
|
batchErrors++;
|
|
1676
1812
|
} else {
|
|
1677
|
-
console.log(
|
|
1813
|
+
console.log(
|
|
1814
|
+
` ✅ Batch ${batchNumber} completed: ${batchIds.length} files updated`,
|
|
1815
|
+
);
|
|
1678
1816
|
batchUpdated += batchIds.length;
|
|
1679
1817
|
}
|
|
1680
1818
|
} catch (error) {
|
|
1681
|
-
console.error(
|
|
1819
|
+
console.error(
|
|
1820
|
+
` ❌ Exception in batch ${batchNumber}:`,
|
|
1821
|
+
error.message,
|
|
1822
|
+
);
|
|
1682
1823
|
batchErrors++;
|
|
1683
1824
|
}
|
|
1684
1825
|
|
|
1685
1826
|
// Small delay between batches to avoid overwhelming the database
|
|
1686
1827
|
if (i + BATCH_SIZE < fileIds.length) {
|
|
1687
|
-
await new Promise(resolve => setTimeout(resolve, 100));
|
|
1828
|
+
await new Promise((resolve) => setTimeout(resolve, 100));
|
|
1688
1829
|
}
|
|
1689
1830
|
}
|
|
1690
1831
|
|
|
1691
1832
|
if (batchErrors > 0) {
|
|
1692
|
-
console.error(
|
|
1833
|
+
console.error(
|
|
1834
|
+
`❌ ${batchErrors} batch(es) failed for ${pedimento.filename}`,
|
|
1835
|
+
);
|
|
1693
1836
|
totalErrors++;
|
|
1694
1837
|
} else {
|
|
1695
1838
|
console.log(` 🎯 Successfully updated ${batchUpdated} related files`);
|
|
1696
1839
|
totalUpdated += batchUpdated;
|
|
1697
1840
|
}
|
|
1698
|
-
|
|
1699
1841
|
} catch (error) {
|
|
1700
|
-
console.error(
|
|
1842
|
+
console.error(
|
|
1843
|
+
`❌ Error processing ${pedimento.filename}:`,
|
|
1844
|
+
error.message,
|
|
1845
|
+
);
|
|
1701
1846
|
totalErrors++;
|
|
1702
1847
|
}
|
|
1703
1848
|
|
|
@@ -1761,12 +1906,27 @@ program
|
|
|
1761
1906
|
'Automatically detect year/pedimento from file paths',
|
|
1762
1907
|
)
|
|
1763
1908
|
.option('--client-path <path>', 'Client path for metadata tracking')
|
|
1764
|
-
.option(
|
|
1909
|
+
.option(
|
|
1910
|
+
'--stats-only',
|
|
1911
|
+
'Phase 1: Only read filesystem stats and insert to database (no file reading or detection)',
|
|
1912
|
+
)
|
|
1765
1913
|
.option('--no-detect', 'Disable document type detection in stats-only mode')
|
|
1766
|
-
.option(
|
|
1767
|
-
|
|
1768
|
-
|
|
1769
|
-
|
|
1914
|
+
.option(
|
|
1915
|
+
'--detect-pdfs',
|
|
1916
|
+
'Phase 2: Process PDF files in database for pedimento-simplificado detection',
|
|
1917
|
+
)
|
|
1918
|
+
.option(
|
|
1919
|
+
'--propagate-arela-path',
|
|
1920
|
+
'Phase 3: Propagate arela_path from pedimento_simplificado records to related files with same base path',
|
|
1921
|
+
)
|
|
1922
|
+
.option(
|
|
1923
|
+
'--upload-by-rfc',
|
|
1924
|
+
'Phase 4: Upload files to Arela API based on RFC values from UPLOAD_RFCS environment variable',
|
|
1925
|
+
)
|
|
1926
|
+
.option(
|
|
1927
|
+
'--run-all-phases',
|
|
1928
|
+
'Run all 4 phases in sequence: stats → detect → propagate → upload',
|
|
1929
|
+
)
|
|
1770
1930
|
.action(async (options) => {
|
|
1771
1931
|
if (options.version) {
|
|
1772
1932
|
console.log(packageVersion);
|
|
@@ -1782,7 +1942,9 @@ program
|
|
|
1782
1942
|
batchSize: parseInt(options.batchSize) || 10,
|
|
1783
1943
|
});
|
|
1784
1944
|
|
|
1785
|
-
console.log(
|
|
1945
|
+
console.log(
|
|
1946
|
+
`✅ Phase 2 Complete: ${result.detectedCount} detected, ${result.errorCount} errors`,
|
|
1947
|
+
);
|
|
1786
1948
|
return;
|
|
1787
1949
|
}
|
|
1788
1950
|
|
|
@@ -1822,8 +1984,12 @@ program
|
|
|
1822
1984
|
// Also initialize Supabase for database queries
|
|
1823
1985
|
if (!supabase) {
|
|
1824
1986
|
if (!supabaseUrl || !supabaseKey) {
|
|
1825
|
-
console.error(
|
|
1826
|
-
|
|
1987
|
+
console.error(
|
|
1988
|
+
'❌ RFC upload requires Supabase credentials for database queries.',
|
|
1989
|
+
);
|
|
1990
|
+
console.error(
|
|
1991
|
+
' Please set SUPABASE_URL and SUPABASE_KEY environment variables.',
|
|
1992
|
+
);
|
|
1827
1993
|
process.exit(1);
|
|
1828
1994
|
}
|
|
1829
1995
|
|
|
@@ -1857,7 +2023,9 @@ program
|
|
|
1857
2023
|
const concurrency = parseInt(options.concurrency) || 10;
|
|
1858
2024
|
|
|
1859
2025
|
if (options.statsOnly) {
|
|
1860
|
-
console.log(
|
|
2026
|
+
console.log(
|
|
2027
|
+
'📊 Mode: Stats Only - Reading file stats and inserting to uploader table',
|
|
2028
|
+
);
|
|
1861
2029
|
console.log('🚫 Files will NOT be uploaded');
|
|
1862
2030
|
if (options.detect !== false) {
|
|
1863
2031
|
console.log('🔍 Document type detection ENABLED for supported files');
|
|
@@ -1965,14 +2133,18 @@ program
|
|
|
1965
2133
|
const detectionResult = await detectPedimentosInDatabase({
|
|
1966
2134
|
batchSize: parseInt(options.batchSize) || 10,
|
|
1967
2135
|
});
|
|
1968
|
-
console.log(
|
|
2136
|
+
console.log(
|
|
2137
|
+
`✅ Phase 2 Complete: ${detectionResult.detectedCount} detected, ${detectionResult.errorCount} errors`,
|
|
2138
|
+
);
|
|
1969
2139
|
|
|
1970
2140
|
// Phase 3: Propagate arela_path
|
|
1971
2141
|
console.log('\n📁 === PHASE 3: Propagate Arela Paths ===');
|
|
1972
2142
|
const propagateResult = await propagateArelaPath({
|
|
1973
2143
|
showProgress: options.showStats || true,
|
|
1974
2144
|
});
|
|
1975
|
-
console.log(
|
|
2145
|
+
console.log(
|
|
2146
|
+
`✅ Phase 3 Complete: ${propagateResult.updatedCount || 0} paths propagated`,
|
|
2147
|
+
);
|
|
1976
2148
|
|
|
1977
2149
|
// Phase 4: Upload by RFC
|
|
1978
2150
|
if (uploadRfcs && uploadRfcs.length > 0) {
|
|
@@ -1989,18 +2161,22 @@ program
|
|
|
1989
2161
|
console.log(`✅ Phase 4 Complete: Upload finished`);
|
|
1990
2162
|
} else {
|
|
1991
2163
|
console.log('\n⚠️ === PHASE 4: Upload by RFC ===');
|
|
1992
|
-
console.log(
|
|
2164
|
+
console.log(
|
|
2165
|
+
'⚠️ UPLOAD_RFCS environment variable not configured, skipping Phase 4',
|
|
2166
|
+
);
|
|
1993
2167
|
}
|
|
1994
2168
|
|
|
1995
2169
|
console.log('\n🎉 All 4 phases completed successfully!');
|
|
1996
|
-
|
|
1997
2170
|
} catch (error) {
|
|
1998
2171
|
console.error(`❌ Error in multi-phase execution:`, error.message);
|
|
1999
2172
|
process.exit(1);
|
|
2000
2173
|
}
|
|
2001
2174
|
}
|
|
2002
2175
|
|
|
2003
|
-
if (
|
|
2176
|
+
if (
|
|
2177
|
+
options.showStats &&
|
|
2178
|
+
(sanitizationCache.size > 0 || pathDetectionCache.size > 0)
|
|
2179
|
+
) {
|
|
2004
2180
|
console.log(`📊 Performance Statistics:`);
|
|
2005
2181
|
if (sanitizationCache.size > 0) {
|
|
2006
2182
|
console.log(
|