@arela/uploader 0.2.9 → 0.2.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/.envbackup ADDED
@@ -0,0 +1,37 @@
1
+ # SUPABASE_URL=https://supabase.prod.app.trader-docs.com
2
+ # SUPABASE_KEY=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJyb2xlIjoic2VydmljZV9yb2xlIiwiaXNzIjoic3VwYWJhc2UiLCJpYXQiOjE3NDU3MzM2MDAsImV4cCI6MTkwMzUwMDAwMH0.eAoXvaBZzZa31txItwEkoLnplWG10ee-U5GMc265xwk
3
+ # SUPABASE_BUCKET=sample
4
+
5
+ # PATH_VARS=o:/ExpedienteElectronicko/expediente/archivos/:rfcPatente/:patente-:aduana/:year/:pedimento/ALSJDKHF ASD,KFHJA SDFK,ASLDKJFH
6
+
7
+ SUPABASE_URL=http://127.0.0.1:54321
8
+ SUPABASE_KEY=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJzdXBhYmFzZS1kZW1vIiwicm9sZSI6InNlcnZpY2Vfcm9sZSIsImV4cCI6MTk4MzgxMjk5Nn0.EGIM96RAZx35lJzdJsyH-qQwv8Hdp7fsn3W0YpN81IU
9
+ SUPABASE_BUCKET=arela
10
+
11
+ # Arela API Configuration
12
+ ARELA_API_URL=http://localhost:3010
13
+ ARELA_API_TOKEN=f0608f83e5faa4c6be32c19975635c4e9012b31f7249a9f0a9270f54d74ec599
14
+
15
+ # Upload Configuration
16
+ UPLOAD_BASE_PATH=./sample
17
+ UPLOAD_SOURCES=2023|2024
18
+
19
+ # RFC Upload Configuration
20
+ # Pipe-separated list of RFCs to upload files for
21
+ # Example: MMJ0810145N1|ABC1234567XY|DEF9876543ZZ
22
+ UPLOAD_RFCS=AKS151005E46|IMS030409FZ0|RDG1107154L7|SHP031226BV2|CSM9301219B4|LIN960124HT8|LME971009SW4|AKM9707151B6|FEL000822AG2|FDM060802J54|MTM9807279B4|AUM9207011CA|MMJ0810145N1|ACC010328EQ6|PED781129JT6|CAD890407NK7|SME140411IK7|JME1903121C2|EIJ110429NF9|PTJ080414TM6|TME050503BM4
23
+
24
+ # AKS151005E46|IMS030409FZ0|RDG1107154L7|SHP031226BV2|CSM9301219B4|LIN960124HT8|LME971009SW4|AKM9707151B6|FEL000822AG2|PTJ080414TM6|TME050503BM4
25
+ # FDM060802J54|MTM9807279B4|AUM9207011CA|MMJ0810145N1|ACC010328EQ6|PED781129JT6|CAD890407NK7|SME140411IK7|JME1903121C2|EIJ110429NF9
26
+
27
+ # Logging and Verbosity
28
+ VERBOSE_LOGGING=true # Disable verbose path logging for better performance
29
+ BATCH_DELAY=50 # Reduce delay between batches (default: 100ms)
30
+ PROGRESS_UPDATE_INTERVAL=20 # Update progress every 20 items (default: 10)
31
+
32
+ # Log Buffering
33
+ LOG_BUFFER_SIZE=200 # Increase buffer size for fewer I/O ops (default: 100)
34
+ LOG_FLUSH_INTERVAL=3000 # Flush logs every 3 seconds (default: 5000ms)
35
+
36
+ # Example for maximum performance
37
+ # VERBOSE_LOGGING=false BATCH_DELAY=25 LOG_BUFFER_SIZE=500 arela --stats-only /path/to/files
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@arela/uploader",
3
- "version": "0.2.9",
3
+ "version": "0.2.11",
4
4
  "description": "CLI to upload files/directories to Arela",
5
5
  "bin": {
6
6
  "arela": "./src/index.js"
@@ -28,10 +28,10 @@ class Config {
28
28
  const __dirname = path.dirname(__filename);
29
29
  const packageJsonPath = path.resolve(__dirname, '../../package.json');
30
30
  const packageJson = JSON.parse(fs.readFileSync(packageJsonPath, 'utf-8'));
31
- return packageJson.version || '0.2.9';
31
+ return packageJson.version || '0.2.11';
32
32
  } catch (error) {
33
33
  console.warn('⚠️ Could not read package.json version, using fallback');
34
- return '0.2.9';
34
+ return '0.2.11';
35
35
  }
36
36
  }
37
37
 
@@ -72,10 +72,15 @@ class Config {
72
72
  .map((s) => s.trim())
73
73
  .filter(Boolean);
74
74
 
75
+ const uploadYears = process.env.UPLOAD_YEARS?.split('|')
76
+ .map((s) => s.trim())
77
+ .filter(Boolean);
78
+
75
79
  return {
76
80
  basePath,
77
81
  sources,
78
82
  rfcs: uploadRfcs,
83
+ years: uploadYears,
79
84
  };
80
85
  }
81
86
 
@@ -316,7 +316,6 @@ export class DatabaseService {
316
316
  filename: record.filename,
317
317
  file_extension: record.file_extension,
318
318
  is_like_simplificado: record.is_like_simplificado,
319
- year: record.year,
320
319
  })
321
320
  .eq('original_path', record.original_path);
322
321
 
@@ -557,169 +556,240 @@ export class DatabaseService {
557
556
  const supabase = await this.#getSupabaseClient();
558
557
 
559
558
  logger.info('Phase 3: Starting arela_path and year propagation process...');
560
- console.log(
561
- '🔍 Finding pedimento_simplificado records with arela_path and year...',
562
- );
563
-
564
- // Get all pedimento_simplificado records that have arela_path
565
- const { data: pedimentoRecords, error: pedimentoError } = await supabase
566
- .from('uploader')
567
- .select('id, original_path, arela_path, filename, year')
568
- .eq('document_type', 'pedimento_simplificado')
569
- .not('arela_path', 'is', null);
570
-
571
- if (pedimentoError) {
572
- const errorMsg = `Error fetching pedimento records: ${pedimentoError.message}`;
573
- logger.error(errorMsg);
574
- throw new Error(errorMsg);
575
- }
559
+ console.log('🔍 Processing pedimento_simplificado records page by page...');
576
560
 
577
- if (!pedimentoRecords || pedimentoRecords.length === 0) {
578
- logger.info('No pedimento_simplificado records with arela_path found');
561
+ // Log year filtering configuration
562
+ if (appConfig.upload.years && appConfig.upload.years.length > 0) {
563
+ logger.info(
564
+ `🗓️ Year filter enabled: ${appConfig.upload.years.join(', ')}`,
565
+ );
579
566
  console.log(
580
- 'ℹ️ No pedimento_simplificado records with arela_path found',
567
+ `🗓️ Year filter enabled: ${appConfig.upload.years.join(', ')}`,
581
568
  );
582
- return { processedCount: 0, updatedCount: 0, errorCount: 0 };
569
+ } else {
570
+ logger.info('🗓️ No year filter configured - processing all years');
571
+ console.log('🗓️ No year filter configured - processing all years');
583
572
  }
584
573
 
585
- console.log(
586
- `📋 Found ${pedimentoRecords.length} pedimento records with arela_path`,
587
- );
588
- logger.info(
589
- `Found ${pedimentoRecords.length} pedimento records with arela_path to process`,
590
- );
591
-
592
574
  let totalProcessed = 0;
593
575
  let totalUpdated = 0;
594
576
  let totalErrors = 0;
577
+ let offset = 0;
578
+ const pageSize = 50;
579
+ let hasMoreData = true;
580
+ let pageNumber = 1;
595
581
  const BATCH_SIZE = 50; // Process files in batches
596
582
 
597
- // Process each pedimento record
598
- for (const pedimento of pedimentoRecords) {
599
- try {
600
- totalProcessed++;
601
-
602
- // Extract base path from original_path (remove filename)
603
- const basePath = path.dirname(pedimento.original_path);
583
+ // Process pedimento records page by page for memory efficiency
584
+ while (hasMoreData) {
585
+ logger.info(
586
+ `Fetching and processing pedimento records page ${pageNumber} (records ${offset + 1} to ${offset + pageSize})...`,
587
+ );
604
588
 
605
- logger.info(
606
- `Processing pedimento: ${pedimento.filename} | Base path: ${basePath} | Year: ${pedimento.year || 'N/A'}`,
607
- );
589
+ let query = supabase
590
+ .from('uploader')
591
+ .select('id, original_path, arela_path, filename, year')
592
+ .eq('document_type', 'pedimento_simplificado')
593
+ .not('arela_path', 'is', null);
608
594
 
609
- // Extract folder part from existing arela_path
610
- const existingPath = pedimento.arela_path;
611
- const folderArelaPath = existingPath.includes('/')
612
- ? existingPath.substring(0, existingPath.lastIndexOf('/')) + '/'
613
- : existingPath.endsWith('/')
614
- ? existingPath
615
- : existingPath + '/';
616
-
617
- // Find all files with the same base path that don't have arela_path yet
618
- // Use pagination to handle cases with more than 1000 related files
619
- let allRelatedFiles = [];
620
- let from = 0;
621
- const pageSize = 1000;
622
- let hasMoreData = true;
623
- let paginationError = null;
624
-
625
- logger.info(`Searching for related files in base path: ${basePath}`);
626
-
627
- while (hasMoreData) {
628
- const { data: relatedFilesPage, error: relatedError } = await supabase
629
- .from('uploader')
630
- .select('id, filename, original_path')
631
- .like('original_path', `${basePath}%`)
632
- .is('arela_path', null)
633
- .neq('id', pedimento.id) // Exclude the pedimento itself
634
- .range(from, from + pageSize - 1);
595
+ // Add year filter if UPLOAD_YEARS is configured
596
+ if (appConfig.upload.years && appConfig.upload.years.length > 0) {
597
+ query = query.in('year', appConfig.upload.years);
598
+ }
635
599
 
636
- if (relatedError) {
637
- logger.error(
638
- `Error finding related files for ${pedimento.filename}: ${relatedError.message}`,
639
- );
640
- paginationError = relatedError;
641
- totalErrors++;
642
- break;
643
- }
600
+ const { data: pedimentoPage, error: pedimentoError } = await query
601
+ .range(offset, offset + pageSize - 1)
602
+ .order('created_at');
644
603
 
645
- if (relatedFilesPage && relatedFilesPage.length > 0) {
646
- allRelatedFiles = allRelatedFiles.concat(relatedFilesPage);
604
+ if (pedimentoError) {
605
+ const errorMsg = `Error fetching pedimento records page ${pageNumber}: ${pedimentoError.message}`;
606
+ logger.error(errorMsg);
607
+ throw new Error(errorMsg);
608
+ }
647
609
 
648
- // Check if we got a full page, indicating there might be more data
649
- if (relatedFilesPage.length < pageSize) {
650
- hasMoreData = false;
651
- } else {
652
- from += pageSize;
653
- logger.info(
654
- `Fetched ${relatedFilesPage.length} files, continuing pagination (total so far: ${allRelatedFiles.length})`,
655
- );
656
- }
657
- } else {
658
- hasMoreData = false;
659
- }
660
- }
610
+ if (!pedimentoPage || pedimentoPage.length === 0) {
611
+ hasMoreData = false;
612
+ logger.info('No more pedimento records found');
613
+ break;
614
+ }
661
615
 
662
- // Continue with error handling if pagination failed
663
- if (paginationError) {
664
- continue;
665
- }
616
+ logger.info(
617
+ `Processing page ${pageNumber}: ${pedimentoPage.length} pedimento records`,
618
+ );
666
619
 
667
- if (!allRelatedFiles || allRelatedFiles.length === 0) {
668
- logger.info(`No related files found for ${pedimento.filename}`);
669
- continue;
670
- }
620
+ // Process each pedimento record in the current page
621
+ for (const pedimento of pedimentoPage) {
622
+ try {
623
+ totalProcessed++;
671
624
 
672
- logger.info(
673
- `Found ${allRelatedFiles.length} related files to update for ${pedimento.filename}`,
674
- );
625
+ // Extract base path from original_path (remove filename)
626
+ const basePath = path.dirname(pedimento.original_path);
675
627
 
676
- // Process files in batches
677
- const fileIds = allRelatedFiles.map((f) => f.id);
628
+ logger.info(
629
+ `Processing pedimento: ${pedimento.filename} | Base path: ${basePath} | Year: ${pedimento.year || 'N/A'}`,
630
+ );
678
631
 
679
- for (let i = 0; i < fileIds.length; i += BATCH_SIZE) {
680
- const batchIds = fileIds.slice(i, i + BATCH_SIZE);
681
- const batchNumber = Math.floor(i / BATCH_SIZE) + 1;
682
- const totalBatches = Math.ceil(fileIds.length / BATCH_SIZE);
632
+ // Extract folder part from existing arela_path
633
+ const existingPath = pedimento.arela_path;
634
+ const folderArelaPath = existingPath.includes('/')
635
+ ? existingPath.substring(0, existingPath.lastIndexOf('/')) + '/'
636
+ : existingPath.endsWith('/')
637
+ ? existingPath
638
+ : existingPath + '/';
639
+
640
+ // Process related files page by page for memory efficiency
641
+ let relatedFilesFrom = 0;
642
+ const relatedFilesPageSize = 50;
643
+ let hasMoreRelatedFiles = true;
644
+ let relatedFilesPageNumber = 1;
645
+ let totalRelatedFilesProcessed = 0;
683
646
 
684
647
  logger.info(
685
- `Batch ${batchNumber}/${totalBatches}: Updating ${batchIds.length} files...`,
648
+ `Searching and processing related files in base path: ${basePath}`,
686
649
  );
687
650
 
688
- try {
689
- const { error: updateError } = await supabase
690
- .from('uploader')
691
- .update({
692
- arela_path: folderArelaPath,
693
- year: pedimento.year,
694
- })
695
- .in('id', batchIds);
651
+ while (hasMoreRelatedFiles) {
652
+ const { data: relatedFilesPage, error: relatedError } =
653
+ await supabase
654
+ .from('uploader')
655
+ .select('id, filename, original_path')
656
+ .like('original_path', `${basePath}%`)
657
+ .is('arela_path', null)
658
+ .neq('id', pedimento.id) // Exclude the pedimento itself
659
+ .range(
660
+ relatedFilesFrom,
661
+ relatedFilesFrom + relatedFilesPageSize - 1,
662
+ );
696
663
 
697
- if (updateError) {
664
+ if (relatedError) {
698
665
  logger.error(
699
- `Error in batch ${batchNumber}: ${updateError.message}`,
666
+ `Error finding related files for ${pedimento.filename}: ${relatedError.message}`,
700
667
  );
701
668
  totalErrors++;
702
- } else {
703
- totalUpdated += batchIds.length;
669
+ break;
670
+ }
671
+ // console.log(`query by basePath: ${basePath} count: [${relatedFilesPage.length}]`);
672
+
673
+ if (!relatedFilesPage || relatedFilesPage.length === 0) {
674
+ hasMoreRelatedFiles = false;
675
+ if (totalRelatedFilesProcessed === 0) {
676
+ logger.info(`No related files found for ${pedimento.filename}`);
677
+ }
678
+ break;
679
+ }
680
+
681
+ logger.info(
682
+ `Processing related files page ${relatedFilesPageNumber}: ${relatedFilesPage.length} files for ${pedimento.filename}`,
683
+ );
684
+
685
+ // Track if any updates occurred in this page
686
+ let updatesOccurred = false;
687
+ // Process this page of related files in batches
688
+ const pageFileIds = relatedFilesPage.map((f) => f.id);
689
+
690
+ for (let i = 0; i < pageFileIds.length; i += BATCH_SIZE) {
691
+ const batchIds = pageFileIds.slice(i, i + BATCH_SIZE);
692
+ const batchNumber =
693
+ Math.floor(relatedFilesFrom / BATCH_SIZE) +
694
+ Math.floor(i / BATCH_SIZE) +
695
+ 1;
696
+
704
697
  logger.info(
705
- `Successfully updated batch ${batchNumber}: ${batchIds.length} files with arela_path and year`,
698
+ `Updating batch ${batchNumber}: ${batchIds.length} files with arela_path and year...`,
706
699
  );
700
+
701
+ try {
702
+ const { error: updateError } = await supabase
703
+ .from('uploader')
704
+ .update({
705
+ arela_path: folderArelaPath,
706
+ year: pedimento.year,
707
+ })
708
+ .in('id', batchIds);
709
+
710
+ if (updateError) {
711
+ logger.error(
712
+ `Error in batch ${batchNumber}: ${updateError.message}`,
713
+ );
714
+ totalErrors++;
715
+ } else {
716
+ totalUpdated += batchIds.length;
717
+ totalRelatedFilesProcessed += batchIds.length;
718
+ updatesOccurred = true; // Mark that updates occurred
719
+ logger.info(
720
+ `Successfully updated batch ${batchNumber}: ${batchIds.length} files with arela_path and year`,
721
+ );
722
+ }
723
+ } catch (batchError) {
724
+ logger.error(
725
+ `Exception in batch ${batchNumber}: ${batchError.message}`,
726
+ );
727
+ totalErrors++;
728
+ }
729
+ }
730
+
731
+ // Check if we need to fetch the next page of related files
732
+ if (relatedFilesPage.length < relatedFilesPageSize) {
733
+ console.log('no more related files for ', basePath);
734
+ hasMoreRelatedFiles = false;
735
+ logger.info(
736
+ `Completed processing related files for ${pedimento.filename}. Total processed: ${totalRelatedFilesProcessed}`,
737
+ );
738
+ } else {
739
+ // If updates occurred, reset pagination to start from beginning
740
+ // since records that matched the query may no longer match after update
741
+ if (updatesOccurred) {
742
+ relatedFilesFrom = 0;
743
+ logger.info(
744
+ `Page ${relatedFilesPageNumber} complete with updates: ${relatedFilesPage.length} files processed, restarting pagination from beginning due to query condition changes...`,
745
+ );
746
+ } else {
747
+ relatedFilesFrom += relatedFilesPageSize - 1;
748
+ logger.info(
749
+ `Page ${relatedFilesPageNumber} complete: ${relatedFilesPage.length} files processed, continuing to next page...`,
750
+ );
751
+ }
752
+ relatedFilesPageNumber++;
707
753
  }
708
- } catch (batchError) {
709
- logger.error(
710
- `Exception in batch ${batchNumber}: ${batchError.message}`,
711
- );
712
- totalErrors++;
713
754
  }
755
+ } catch (error) {
756
+ logger.error(
757
+ `Error processing pedimento ${pedimento.filename}: ${error.message}`,
758
+ );
759
+ totalErrors++;
714
760
  }
715
- } catch (error) {
716
- logger.error(
717
- `Error processing pedimento ${pedimento.filename}: ${error.message}`,
761
+ }
762
+
763
+ // Check if we need to fetch the next page
764
+ if (pedimentoPage.length < pageSize) {
765
+ hasMoreData = false;
766
+ logger.info(
767
+ `Completed processing. Last page ${pageNumber} had ${pedimentoPage.length} records`,
768
+ );
769
+ } else {
770
+ offset += pageSize;
771
+ pageNumber++;
772
+ logger.info(
773
+ `Page ${pageNumber - 1} complete: ${pedimentoPage.length} records processed, moving to next page...`,
718
774
  );
719
- totalErrors++;
720
775
  }
721
776
  }
722
777
 
778
+ // Final summary
779
+ if (totalProcessed === 0) {
780
+ logger.info('No pedimento_simplificado records with arela_path found');
781
+ console.log(
782
+ 'ℹ️ No pedimento_simplificado records with arela_path found',
783
+ );
784
+ } else {
785
+ console.log(
786
+ `📋 Processed ${totalProcessed} pedimento records across ${pageNumber - 1} pages`,
787
+ );
788
+ logger.info(
789
+ `Processed ${totalProcessed} pedimento records across ${pageNumber - 1} pages`,
790
+ );
791
+ }
792
+
723
793
  const result = {
724
794
  processedCount: totalProcessed,
725
795
  updatedCount: totalUpdated,
@@ -773,25 +843,75 @@ export class DatabaseService {
773
843
  logger.info(`Total files for specified RFCs: ${totalRfcFiles || 0}`);
774
844
  }
775
845
 
776
- // Step 1: Get all pedimento_simplificado records that match the specified RFCs and have arela_path
846
+ // Step 1: Get all pedimento_simplificado records that match the specified RFCs and have arela_path using pagination
777
847
  console.log(
778
848
  '🎯 Finding pedimento_simplificado records for specified RFCs...',
779
849
  );
780
- const { data: pedimentoRfcRecords, error: pedimentoRfcError } =
781
- await supabase
850
+
851
+ let allPedimentoRecords = [];
852
+ let offset = 0;
853
+ const pageSize = 500; // Process pedimento records in pages
854
+ let hasMorePedimentoData = true;
855
+ let pageNumber = 1;
856
+
857
+ // Process pedimento records page by page for memory efficiency
858
+ while (hasMorePedimentoData) {
859
+ logger.info(
860
+ `Fetching pedimento records page ${pageNumber} (records ${offset + 1} to ${offset + pageSize})...`,
861
+ );
862
+
863
+ let pedimentoQuery = supabase
782
864
  .from('uploader')
783
865
  .select('arela_path')
784
866
  .eq('document_type', 'pedimento_simplificado')
785
867
  .in('rfc', appConfig.upload.rfcs)
786
868
  .not('arela_path', 'is', null);
787
869
 
788
- if (pedimentoRfcError) {
789
- const errorMsg = `Error fetching pedimento RFC records: ${pedimentoRfcError.message}`;
790
- logger.error(errorMsg);
791
- throw new Error(errorMsg);
870
+ // Add year filter if UPLOAD_YEARS is configured
871
+ if (appConfig.upload.years && appConfig.upload.years.length > 0) {
872
+ pedimentoQuery = pedimentoQuery.in('year', appConfig.upload.years);
873
+ }
874
+
875
+ const { data: pedimentoPage, error: pedimentoError } =
876
+ await pedimentoQuery
877
+ .range(offset, offset + pageSize - 1)
878
+ .order('created_at');
879
+
880
+ if (pedimentoError) {
881
+ const errorMsg = `Error fetching pedimento RFC records page ${pageNumber}: ${pedimentoError.message}`;
882
+ logger.error(errorMsg);
883
+ throw new Error(errorMsg);
884
+ }
885
+
886
+ if (!pedimentoPage || pedimentoPage.length === 0) {
887
+ hasMorePedimentoData = false;
888
+ logger.info('No more pedimento records found');
889
+ break;
890
+ }
891
+
892
+ logger.info(
893
+ `Processing pedimento page ${pageNumber}: ${pedimentoPage.length} pedimento records`,
894
+ );
895
+
896
+ // Add this page to our collection
897
+ allPedimentoRecords = allPedimentoRecords.concat(pedimentoPage);
898
+
899
+ // Check if we need to fetch the next page
900
+ if (pedimentoPage.length < pageSize) {
901
+ hasMorePedimentoData = false;
902
+ logger.info(
903
+ `Completed fetching pedimento records. Last page ${pageNumber} had ${pedimentoPage.length} records`,
904
+ );
905
+ } else {
906
+ offset += pageSize;
907
+ pageNumber++;
908
+ logger.info(
909
+ `Page ${pageNumber - 1} complete: ${pedimentoPage.length} records fetched, moving to next page...`,
910
+ );
911
+ }
792
912
  }
793
913
 
794
- if (!pedimentoRfcRecords || pedimentoRfcRecords.length === 0) {
914
+ if (!allPedimentoRecords || allPedimentoRecords.length === 0) {
795
915
  console.log(
796
916
  'ℹ️ No pedimento_simplificado records found for the specified RFCs with arela_path',
797
917
  );
@@ -799,15 +919,15 @@ export class DatabaseService {
799
919
  return { processedCount: 0, uploadedCount: 0, errorCount: 0 };
800
920
  }
801
921
 
802
- // Get unique arela_paths from pedimento records
922
+ // Get unique arela_paths from all pedimento records
803
923
  const uniqueArelaPaths = [
804
- ...new Set(pedimentoRfcRecords.map((r) => r.arela_path)),
924
+ ...new Set(allPedimentoRecords.map((r) => r.arela_path)),
805
925
  ];
806
926
  console.log(
807
- `📋 Found ${pedimentoRfcRecords.length} pedimento records with ${uniqueArelaPaths.length} unique arela_paths for specified RFCs`,
927
+ `📋 Found ${allPedimentoRecords.length} pedimento records with ${uniqueArelaPaths.length} unique arela_paths for specified RFCs across ${pageNumber - 1} pages`,
808
928
  );
809
929
  logger.info(
810
- `Found ${pedimentoRfcRecords.length} pedimento records with ${uniqueArelaPaths.length} unique arela_paths`,
930
+ `Found ${allPedimentoRecords.length} pedimento records with ${uniqueArelaPaths.length} unique arela_paths across ${pageNumber - 1} pages`,
811
931
  );
812
932
 
813
933
  // Step 2: Get all files with these arela_paths that haven't been uploaded yet
@@ -872,16 +992,24 @@ export class DatabaseService {
872
992
  `Found ${rfcRecords.length} files ready for upload, ${skipped} skipped`,
873
993
  );
874
994
 
875
- // Step 3: Get ALL files that have these arela_paths (including supporting documents)
876
- let allRelatedFiles = [];
995
+ // Step 3: Process files with streaming pagination to avoid memory overload
996
+ let totalProcessed = 0;
997
+ let totalUploaded = 0;
998
+ let totalErrors = 0;
999
+ let globalFileCount = 0;
877
1000
  const arelaPathChunkSize = 50;
878
- const queryBatchSize = 1000;
1001
+ const queryBatchSize = 500; // Reduced batch size for better memory management
1002
+ const batchSize = parseInt(options.batchSize) || 10;
1003
+
1004
+ // Import performance configuration
1005
+ const { performance: perfConfig } = appConfig;
1006
+ const maxConcurrency = perfConfig?.maxApiConnections || 3;
879
1007
 
880
1008
  console.log(
881
- '📥 Fetching all related files (processing arela_paths in chunks to avoid URI limits)...',
1009
+ '📥 Processing files with streaming pagination (processing arela_paths in chunks to avoid URI limits and memory overload)...',
882
1010
  );
883
1011
 
884
- // Process arela_paths in chunks
1012
+ // Process arela_paths in chunks and upload files as we fetch them
885
1013
  for (let i = 0; i < uploadableArelaPaths.length; i += arelaPathChunkSize) {
886
1014
  const arelaPathChunk = uploadableArelaPaths.slice(
887
1015
  i,
@@ -896,9 +1024,10 @@ export class DatabaseService {
896
1024
  ` Processing arela_path chunk ${chunkNumber}/${totalChunks} (${arelaPathChunk.length} paths)`,
897
1025
  );
898
1026
 
899
- // For each chunk of arela_paths, use pagination to get all related files
1027
+ // For each chunk of arela_paths, use pagination to get related files and process them immediately
900
1028
  let hasMore = true;
901
1029
  let offset = 0;
1030
+ let chunkFileCount = 0;
902
1031
 
903
1032
  while (hasMore) {
904
1033
  const { data: batch, error: queryError } = await supabase
@@ -907,7 +1036,8 @@ export class DatabaseService {
907
1036
  .in('arela_path', arelaPathChunk)
908
1037
  .not('original_path', 'is', null)
909
1038
  .neq('status', 'file-uploaded')
910
- .range(offset, offset + queryBatchSize - 1);
1039
+ .range(offset, offset + queryBatchSize - 1)
1040
+ .order('created_at');
911
1041
 
912
1042
  if (queryError) {
913
1043
  const errorMsg = `Error fetching related files for chunk ${chunkNumber}: ${queryError.message}`;
@@ -915,74 +1045,94 @@ export class DatabaseService {
915
1045
  throw new Error(errorMsg);
916
1046
  }
917
1047
 
918
- if (batch && batch.length > 0) {
919
- allRelatedFiles = allRelatedFiles.concat(batch);
1048
+ if (!batch || batch.length === 0) {
1049
+ hasMore = false;
1050
+ break;
920
1051
  }
921
1052
 
922
- hasMore = batch && batch.length === queryBatchSize;
923
- offset += queryBatchSize;
924
- }
925
- }
1053
+ chunkFileCount += batch.length;
1054
+ globalFileCount += batch.length;
926
1055
 
927
- if (!allRelatedFiles || allRelatedFiles.length === 0) {
928
- console.log('ℹ️ No related files found to upload');
929
- logger.info('No related files found to upload');
930
- return { processedCount: 0, uploadedCount: 0, errorCount: 0 };
931
- }
1056
+ console.log(
1057
+ ` 📦 Processing batch within chunk ${chunkNumber}: ${batch.length} files (total processed so far: ${globalFileCount})`,
1058
+ );
932
1059
 
933
- console.log(`📋 Total files to upload: ${allRelatedFiles.length}`);
934
- logger.info(`Total files to upload: ${allRelatedFiles.length}`);
1060
+ // Track if any uploads occurred in this batch
1061
+ let uploadsOccurred = false;
935
1062
 
936
- // Step 4: Upload all related files using concurrent batch processing
937
- let totalProcessed = 0;
938
- let totalUploaded = 0;
939
- let totalErrors = 0;
940
- const batchSize = parseInt(options.batchSize) || 10;
1063
+ // Process this batch of files immediately using concurrent processing
1064
+ // Split batch into upload batches
1065
+ for (let j = 0; j < batch.length; j += batchSize) {
1066
+ const uploadBatch = batch.slice(j, j + batchSize);
1067
+ const batchNum = Math.floor(globalFileCount / batchSize) + 1;
941
1068
 
942
- // Import performance configuration
943
- const { performance: perfConfig } = appConfig;
944
- const maxConcurrency = perfConfig?.maxApiConnections || 3;
1069
+ console.log(
1070
+ `📦 Processing upload batch ${batchNum} (${uploadBatch.length} files)`,
1071
+ );
945
1072
 
946
- console.log(
947
- `🚀 Starting batch upload: ${allRelatedFiles.length} files in batches of ${batchSize}`,
948
- );
949
- console.log(
950
- `⚡ Concurrent processing: up to ${maxConcurrency} parallel operations`,
951
- );
1073
+ // Process batch using concurrent processing similar to UploadCommand
1074
+ const batchResults = await this.#processRfcBatch(
1075
+ uploadBatch,
1076
+ uploadService,
1077
+ supabase,
1078
+ options,
1079
+ maxConcurrency,
1080
+ );
952
1081
 
953
- // Process files in batches with concurrent processing
954
- for (let i = 0; i < allRelatedFiles.length; i += batchSize) {
955
- const batch = allRelatedFiles.slice(i, i + batchSize);
956
- const batchNum = Math.floor(i / batchSize) + 1;
957
- const totalBatches = Math.ceil(allRelatedFiles.length / batchSize);
1082
+ totalProcessed += batchResults.processed;
1083
+ totalUploaded += batchResults.uploaded;
1084
+ totalErrors += batchResults.errors;
958
1085
 
959
- console.log(
960
- `📦 Processing batch ${batchNum}/${totalBatches} (${batch.length} files)`,
961
- );
1086
+ // Track if uploads occurred (status changes from non-uploaded to uploaded)
1087
+ if (batchResults.uploaded > 0) {
1088
+ uploadsOccurred = true;
1089
+ }
962
1090
 
963
- // Process batch using concurrent processing similar to UploadCommand
964
- const batchResults = await this.#processRfcBatch(
965
- batch,
966
- uploadService,
967
- supabase,
968
- options,
969
- maxConcurrency,
970
- );
1091
+ console.log(
1092
+ `📊 Upload batch complete - Total progress: ${totalUploaded} uploaded, ${totalErrors} errors`,
1093
+ );
1094
+ }
971
1095
 
972
- totalProcessed += batchResults.processed;
973
- totalUploaded += batchResults.uploaded;
974
- totalErrors += batchResults.errors;
1096
+ // Check if we need more data from this chunk
1097
+ if (batch.length < queryBatchSize) {
1098
+ hasMore = false;
1099
+ } else {
1100
+ // If uploads occurred, reset pagination to start from beginning
1101
+ // since records that matched the query may no longer match after upload
1102
+ if (uploadsOccurred) {
1103
+ offset = 0;
1104
+ console.log(
1105
+ ` 📄 Batch complete with uploads: ${batch.length} files processed, restarting pagination from beginning due to query condition changes...`,
1106
+ );
1107
+ } else {
1108
+ offset += queryBatchSize;
1109
+ console.log(
1110
+ ` 📄 Batch complete: ${batch.length} files processed, continuing to next page (offset: ${offset})...`,
1111
+ );
1112
+ }
1113
+ }
1114
+
1115
+ if (hasMore) {
1116
+ console.log(
1117
+ ` 📄 Fetching more files for chunk ${chunkNumber}... (offset: ${offset})`,
1118
+ );
1119
+ }
1120
+ }
975
1121
 
976
- // Progress update
977
- const progress = (
978
- ((i + batch.length) / allRelatedFiles.length) *
979
- 100
980
- ).toFixed(1);
981
1122
  console.log(
982
- `📊 Batch ${batchNum} complete - Progress: ${progress}% (${totalUploaded}/${allRelatedFiles.length} uploaded)`,
1123
+ ` ✅ Chunk ${chunkNumber}/${totalChunks} complete: ${chunkFileCount} files processed`,
983
1124
  );
984
1125
  }
985
1126
 
1127
+ if (globalFileCount === 0) {
1128
+ console.log('ℹ️ No related files found to upload');
1129
+ logger.info('No related files found to upload');
1130
+ return { processedCount: 0, uploadedCount: 0, errorCount: 0 };
1131
+ }
1132
+
1133
+ console.log(`📋 Total files processed: ${globalFileCount}`);
1134
+ logger.info(`Total files processed: ${globalFileCount}`);
1135
+
986
1136
  const result = {
987
1137
  processedCount: totalProcessed,
988
1138
  uploadedCount: totalUploaded,
@@ -1037,13 +1187,25 @@ export class DatabaseService {
1037
1187
  console.log(
1038
1188
  '🎯 Finding pedimento_simplificado documents for specified RFCs with arela_path...',
1039
1189
  );
1040
- const { data: pedimentoRecords, error: pedimentoError } = await supabase
1190
+
1191
+ let pedimentoReadyQuery = supabase
1041
1192
  .from('uploader')
1042
1193
  .select('arela_path')
1043
1194
  .eq('document_type', 'pedimento_simplificado')
1044
1195
  .in('rfc', uploadRfcs)
1045
1196
  .not('arela_path', 'is', null);
1046
1197
 
1198
+ // Add year filter if UPLOAD_YEARS is configured
1199
+ if (appConfig.upload.years && appConfig.upload.years.length > 0) {
1200
+ pedimentoReadyQuery = pedimentoReadyQuery.in(
1201
+ 'year',
1202
+ appConfig.upload.years,
1203
+ );
1204
+ }
1205
+
1206
+ const { data: pedimentoRecords, error: pedimentoError } =
1207
+ await pedimentoReadyQuery;
1208
+
1047
1209
  if (pedimentoError) {
1048
1210
  throw new Error(
1049
1211
  `Error querying pedimento_simplificado records: ${pedimentoError.message}`,
package/arela-upload.log DELETED
File without changes