@arela/uploader 0.2.9 → 0.2.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@arela/uploader",
3
- "version": "0.2.9",
3
+ "version": "0.2.10",
4
4
  "description": "CLI to upload files/directories to Arela",
5
5
  "bin": {
6
6
  "arela": "./src/index.js"
@@ -28,10 +28,10 @@ class Config {
28
28
  const __dirname = path.dirname(__filename);
29
29
  const packageJsonPath = path.resolve(__dirname, '../../package.json');
30
30
  const packageJson = JSON.parse(fs.readFileSync(packageJsonPath, 'utf-8'));
31
- return packageJson.version || '0.2.9';
31
+ return packageJson.version || '0.2.10';
32
32
  } catch (error) {
33
33
  console.warn('⚠️ Could not read package.json version, using fallback');
34
- return '0.2.9';
34
+ return '0.2.10';
35
35
  }
36
36
  }
37
37
 
@@ -72,10 +72,15 @@ class Config {
72
72
  .map((s) => s.trim())
73
73
  .filter(Boolean);
74
74
 
75
+ const uploadYears = process.env.UPLOAD_YEARS?.split('|')
76
+ .map((s) => s.trim())
77
+ .filter(Boolean);
78
+
75
79
  return {
76
80
  basePath,
77
81
  sources,
78
82
  rfcs: uploadRfcs,
83
+ years: uploadYears,
79
84
  };
80
85
  }
81
86
 
@@ -557,169 +557,226 @@ export class DatabaseService {
557
557
  const supabase = await this.#getSupabaseClient();
558
558
 
559
559
  logger.info('Phase 3: Starting arela_path and year propagation process...');
560
- console.log(
561
- '🔍 Finding pedimento_simplificado records with arela_path and year...',
562
- );
560
+ console.log('🔍 Processing pedimento_simplificado records page by page...');
563
561
 
564
- // Get all pedimento_simplificado records that have arela_path
565
- const { data: pedimentoRecords, error: pedimentoError } = await supabase
566
- .from('uploader')
567
- .select('id, original_path, arela_path, filename, year')
568
- .eq('document_type', 'pedimento_simplificado')
569
- .not('arela_path', 'is', null);
570
-
571
- if (pedimentoError) {
572
- const errorMsg = `Error fetching pedimento records: ${pedimentoError.message}`;
573
- logger.error(errorMsg);
574
- throw new Error(errorMsg);
575
- }
576
-
577
- if (!pedimentoRecords || pedimentoRecords.length === 0) {
578
- logger.info('No pedimento_simplificado records with arela_path found');
562
+ // Log year filtering configuration
563
+ if (appConfig.upload.years && appConfig.upload.years.length > 0) {
564
+ logger.info(
565
+ `🗓️ Year filter enabled: ${appConfig.upload.years.join(', ')}`,
566
+ );
579
567
  console.log(
580
- 'ℹ️ No pedimento_simplificado records with arela_path found',
568
+ `🗓️ Year filter enabled: ${appConfig.upload.years.join(', ')}`,
581
569
  );
582
- return { processedCount: 0, updatedCount: 0, errorCount: 0 };
570
+ } else {
571
+ logger.info('🗓️ No year filter configured - processing all years');
572
+ console.log('🗓️ No year filter configured - processing all years');
583
573
  }
584
574
 
585
- console.log(
586
- `📋 Found ${pedimentoRecords.length} pedimento records with arela_path`,
587
- );
588
- logger.info(
589
- `Found ${pedimentoRecords.length} pedimento records with arela_path to process`,
590
- );
591
-
592
575
  let totalProcessed = 0;
593
576
  let totalUpdated = 0;
594
577
  let totalErrors = 0;
578
+ let offset = 0;
579
+ const pageSize = 50;
580
+ let hasMoreData = true;
581
+ let pageNumber = 1;
595
582
  const BATCH_SIZE = 50; // Process files in batches
596
583
 
597
- // Process each pedimento record
598
- for (const pedimento of pedimentoRecords) {
599
- try {
600
- totalProcessed++;
601
-
602
- // Extract base path from original_path (remove filename)
603
- const basePath = path.dirname(pedimento.original_path);
584
+ // Process pedimento records page by page for memory efficiency
585
+ while (hasMoreData) {
586
+ logger.info(
587
+ `Fetching and processing pedimento records page ${pageNumber} (records ${offset + 1} to ${offset + pageSize})...`,
588
+ );
604
589
 
605
- logger.info(
606
- `Processing pedimento: ${pedimento.filename} | Base path: ${basePath} | Year: ${pedimento.year || 'N/A'}`,
607
- );
590
+ let query = supabase
591
+ .from('uploader')
592
+ .select('id, original_path, arela_path, filename, year')
593
+ .eq('document_type', 'pedimento_simplificado')
594
+ .not('arela_path', 'is', null);
608
595
 
609
- // Extract folder part from existing arela_path
610
- const existingPath = pedimento.arela_path;
611
- const folderArelaPath = existingPath.includes('/')
612
- ? existingPath.substring(0, existingPath.lastIndexOf('/')) + '/'
613
- : existingPath.endsWith('/')
614
- ? existingPath
615
- : existingPath + '/';
616
-
617
- // Find all files with the same base path that don't have arela_path yet
618
- // Use pagination to handle cases with more than 1000 related files
619
- let allRelatedFiles = [];
620
- let from = 0;
621
- const pageSize = 1000;
622
- let hasMoreData = true;
623
- let paginationError = null;
624
-
625
- logger.info(`Searching for related files in base path: ${basePath}`);
626
-
627
- while (hasMoreData) {
628
- const { data: relatedFilesPage, error: relatedError } = await supabase
629
- .from('uploader')
630
- .select('id, filename, original_path')
631
- .like('original_path', `${basePath}%`)
632
- .is('arela_path', null)
633
- .neq('id', pedimento.id) // Exclude the pedimento itself
634
- .range(from, from + pageSize - 1);
596
+ // Add year filter if UPLOAD_YEARS is configured
597
+ if (appConfig.upload.years && appConfig.upload.years.length > 0) {
598
+ query = query.in('year', appConfig.upload.years);
599
+ }
635
600
 
636
- if (relatedError) {
637
- logger.error(
638
- `Error finding related files for ${pedimento.filename}: ${relatedError.message}`,
639
- );
640
- paginationError = relatedError;
641
- totalErrors++;
642
- break;
643
- }
601
+ const { data: pedimentoPage, error: pedimentoError } = await query
602
+ .range(offset, offset + pageSize - 1)
603
+ .order('created_at');
644
604
 
645
- if (relatedFilesPage && relatedFilesPage.length > 0) {
646
- allRelatedFiles = allRelatedFiles.concat(relatedFilesPage);
605
+ if (pedimentoError) {
606
+ const errorMsg = `Error fetching pedimento records page ${pageNumber}: ${pedimentoError.message}`;
607
+ logger.error(errorMsg);
608
+ throw new Error(errorMsg);
609
+ }
647
610
 
648
- // Check if we got a full page, indicating there might be more data
649
- if (relatedFilesPage.length < pageSize) {
650
- hasMoreData = false;
651
- } else {
652
- from += pageSize;
653
- logger.info(
654
- `Fetched ${relatedFilesPage.length} files, continuing pagination (total so far: ${allRelatedFiles.length})`,
655
- );
656
- }
657
- } else {
658
- hasMoreData = false;
659
- }
660
- }
611
+ if (!pedimentoPage || pedimentoPage.length === 0) {
612
+ hasMoreData = false;
613
+ logger.info('No more pedimento records found');
614
+ break;
615
+ }
661
616
 
662
- // Continue with error handling if pagination failed
663
- if (paginationError) {
664
- continue;
665
- }
617
+ logger.info(
618
+ `Processing page ${pageNumber}: ${pedimentoPage.length} pedimento records`,
619
+ );
666
620
 
667
- if (!allRelatedFiles || allRelatedFiles.length === 0) {
668
- logger.info(`No related files found for ${pedimento.filename}`);
669
- continue;
670
- }
621
+ // Process each pedimento record in the current page
622
+ for (const pedimento of pedimentoPage) {
623
+ try {
624
+ totalProcessed++;
671
625
 
672
- logger.info(
673
- `Found ${allRelatedFiles.length} related files to update for ${pedimento.filename}`,
674
- );
626
+ // Extract base path from original_path (remove filename)
627
+ const basePath = path.dirname(pedimento.original_path);
675
628
 
676
- // Process files in batches
677
- const fileIds = allRelatedFiles.map((f) => f.id);
629
+ logger.info(
630
+ `Processing pedimento: ${pedimento.filename} | Base path: ${basePath} | Year: ${pedimento.year || 'N/A'}`,
631
+ );
678
632
 
679
- for (let i = 0; i < fileIds.length; i += BATCH_SIZE) {
680
- const batchIds = fileIds.slice(i, i + BATCH_SIZE);
681
- const batchNumber = Math.floor(i / BATCH_SIZE) + 1;
682
- const totalBatches = Math.ceil(fileIds.length / BATCH_SIZE);
633
+ // Extract folder part from existing arela_path
634
+ const existingPath = pedimento.arela_path;
635
+ const folderArelaPath = existingPath.includes('/')
636
+ ? existingPath.substring(0, existingPath.lastIndexOf('/')) + '/'
637
+ : existingPath.endsWith('/')
638
+ ? existingPath
639
+ : existingPath + '/';
640
+
641
+ // Process related files page by page for memory efficiency
642
+ let relatedFilesFrom = 0;
643
+ const relatedFilesPageSize = 50;
644
+ let hasMoreRelatedFiles = true;
645
+ let relatedFilesPageNumber = 1;
646
+ let totalRelatedFilesProcessed = 0;
683
647
 
684
648
  logger.info(
685
- `Batch ${batchNumber}/${totalBatches}: Updating ${batchIds.length} files...`,
649
+ `Searching and processing related files in base path: ${basePath}`,
686
650
  );
687
651
 
688
- try {
689
- const { error: updateError } = await supabase
690
- .from('uploader')
691
- .update({
692
- arela_path: folderArelaPath,
693
- year: pedimento.year,
694
- })
695
- .in('id', batchIds);
652
+ while (hasMoreRelatedFiles) {
653
+ const { data: relatedFilesPage, error: relatedError } =
654
+ await supabase
655
+ .from('uploader')
656
+ .select('id, filename, original_path')
657
+ .like('original_path', `${basePath}%`)
658
+ .is('arela_path', null)
659
+ .neq('id', pedimento.id) // Exclude the pedimento itself
660
+ .range(
661
+ relatedFilesFrom,
662
+ relatedFilesFrom + relatedFilesPageSize - 1,
663
+ );
696
664
 
697
- if (updateError) {
665
+ if (relatedError) {
698
666
  logger.error(
699
- `Error in batch ${batchNumber}: ${updateError.message}`,
667
+ `Error finding related files for ${pedimento.filename}: ${relatedError.message}`,
700
668
  );
701
669
  totalErrors++;
670
+ break;
671
+ }
672
+
673
+ if (!relatedFilesPage || relatedFilesPage.length === 0) {
674
+ hasMoreRelatedFiles = false;
675
+ if (totalRelatedFilesProcessed === 0) {
676
+ logger.info(`No related files found for ${pedimento.filename}`);
677
+ }
678
+ break;
679
+ }
680
+
681
+ logger.info(
682
+ `Processing related files page ${relatedFilesPageNumber}: ${relatedFilesPage.length} files for ${pedimento.filename}`,
683
+ );
684
+
685
+ // Process this page of related files in batches
686
+ const pageFileIds = relatedFilesPage.map((f) => f.id);
687
+
688
+ for (let i = 0; i < pageFileIds.length; i += BATCH_SIZE) {
689
+ const batchIds = pageFileIds.slice(i, i + BATCH_SIZE);
690
+ const batchNumber =
691
+ Math.floor(relatedFilesFrom / BATCH_SIZE) +
692
+ Math.floor(i / BATCH_SIZE) +
693
+ 1;
694
+
695
+ logger.info(
696
+ `Updating batch ${batchNumber}: ${batchIds.length} files with arela_path and year...`,
697
+ );
698
+
699
+ try {
700
+ const { error: updateError } = await supabase
701
+ .from('uploader')
702
+ .update({
703
+ arela_path: folderArelaPath,
704
+ year: pedimento.year,
705
+ })
706
+ .in('id', batchIds);
707
+
708
+ if (updateError) {
709
+ logger.error(
710
+ `Error in batch ${batchNumber}: ${updateError.message}`,
711
+ );
712
+ totalErrors++;
713
+ } else {
714
+ totalUpdated += batchIds.length;
715
+ totalRelatedFilesProcessed += batchIds.length;
716
+ logger.info(
717
+ `Successfully updated batch ${batchNumber}: ${batchIds.length} files with arela_path and year`,
718
+ );
719
+ }
720
+ } catch (batchError) {
721
+ logger.error(
722
+ `Exception in batch ${batchNumber}: ${batchError.message}`,
723
+ );
724
+ totalErrors++;
725
+ }
726
+ }
727
+
728
+ // Check if we need to fetch the next page of related files
729
+ if (relatedFilesPage.length < relatedFilesPageSize) {
730
+ hasMoreRelatedFiles = false;
731
+ logger.info(
732
+ `Completed processing related files for ${pedimento.filename}. Total processed: ${totalRelatedFilesProcessed}`,
733
+ );
702
734
  } else {
703
- totalUpdated += batchIds.length;
735
+ relatedFilesFrom += relatedFilesPageSize;
736
+ relatedFilesPageNumber++;
704
737
  logger.info(
705
- `Successfully updated batch ${batchNumber}: ${batchIds.length} files with arela_path and year`,
738
+ `Page ${relatedFilesPageNumber - 1} complete: ${relatedFilesPage.length} files processed, continuing to next page...`,
706
739
  );
707
740
  }
708
- } catch (batchError) {
709
- logger.error(
710
- `Exception in batch ${batchNumber}: ${batchError.message}`,
711
- );
712
- totalErrors++;
713
741
  }
742
+ } catch (error) {
743
+ logger.error(
744
+ `Error processing pedimento ${pedimento.filename}: ${error.message}`,
745
+ );
746
+ totalErrors++;
714
747
  }
715
- } catch (error) {
716
- logger.error(
717
- `Error processing pedimento ${pedimento.filename}: ${error.message}`,
748
+ }
749
+
750
+ // Check if we need to fetch the next page
751
+ if (pedimentoPage.length < pageSize) {
752
+ hasMoreData = false;
753
+ logger.info(
754
+ `Completed processing. Last page ${pageNumber} had ${pedimentoPage.length} records`,
755
+ );
756
+ } else {
757
+ offset += pageSize;
758
+ pageNumber++;
759
+ logger.info(
760
+ `Page ${pageNumber - 1} complete: ${pedimentoPage.length} records processed, moving to next page...`,
718
761
  );
719
- totalErrors++;
720
762
  }
721
763
  }
722
764
 
765
+ // Final summary
766
+ if (totalProcessed === 0) {
767
+ logger.info('No pedimento_simplificado records with arela_path found');
768
+ console.log(
769
+ 'ℹ️ No pedimento_simplificado records with arela_path found',
770
+ );
771
+ } else {
772
+ console.log(
773
+ `📋 Processed ${totalProcessed} pedimento records across ${pageNumber - 1} pages`,
774
+ );
775
+ logger.info(
776
+ `Processed ${totalProcessed} pedimento records across ${pageNumber - 1} pages`,
777
+ );
778
+ }
779
+
723
780
  const result = {
724
781
  processedCount: totalProcessed,
725
782
  updatedCount: totalUpdated,
@@ -777,13 +834,21 @@ export class DatabaseService {
777
834
  console.log(
778
835
  '🎯 Finding pedimento_simplificado records for specified RFCs...',
779
836
  );
837
+
838
+ let pedimentoQuery = supabase
839
+ .from('uploader')
840
+ .select('arela_path')
841
+ .eq('document_type', 'pedimento_simplificado')
842
+ .in('rfc', appConfig.upload.rfcs)
843
+ .not('arela_path', 'is', null);
844
+
845
+ // Add year filter if UPLOAD_YEARS is configured
846
+ if (appConfig.upload.years && appConfig.upload.years.length > 0) {
847
+ pedimentoQuery = pedimentoQuery.in('year', appConfig.upload.years);
848
+ }
849
+
780
850
  const { data: pedimentoRfcRecords, error: pedimentoRfcError } =
781
- await supabase
782
- .from('uploader')
783
- .select('arela_path')
784
- .eq('document_type', 'pedimento_simplificado')
785
- .in('rfc', appConfig.upload.rfcs)
786
- .not('arela_path', 'is', null);
851
+ await pedimentoQuery;
787
852
 
788
853
  if (pedimentoRfcError) {
789
854
  const errorMsg = `Error fetching pedimento RFC records: ${pedimentoRfcError.message}`;
@@ -1037,13 +1102,25 @@ export class DatabaseService {
1037
1102
  console.log(
1038
1103
  '🎯 Finding pedimento_simplificado documents for specified RFCs with arela_path...',
1039
1104
  );
1040
- const { data: pedimentoRecords, error: pedimentoError } = await supabase
1105
+
1106
+ let pedimentoReadyQuery = supabase
1041
1107
  .from('uploader')
1042
1108
  .select('arela_path')
1043
1109
  .eq('document_type', 'pedimento_simplificado')
1044
1110
  .in('rfc', uploadRfcs)
1045
1111
  .not('arela_path', 'is', null);
1046
1112
 
1113
+ // Add year filter if UPLOAD_YEARS is configured
1114
+ if (appConfig.upload.years && appConfig.upload.years.length > 0) {
1115
+ pedimentoReadyQuery = pedimentoReadyQuery.in(
1116
+ 'year',
1117
+ appConfig.upload.years,
1118
+ );
1119
+ }
1120
+
1121
+ const { data: pedimentoRecords, error: pedimentoError } =
1122
+ await pedimentoReadyQuery;
1123
+
1047
1124
  if (pedimentoError) {
1048
1125
  throw new Error(
1049
1126
  `Error querying pedimento_simplificado records: ${pedimentoError.message}`,