@arela/uploader 0.2.10 → 0.2.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/.envbackup ADDED
@@ -0,0 +1,37 @@
1
+ # SUPABASE_URL=https://supabase.prod.app.trader-docs.com
2
+ # SUPABASE_KEY=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJyb2xlIjoic2VydmljZV9yb2xlIiwiaXNzIjoic3VwYWJhc2UiLCJpYXQiOjE3NDU3MzM2MDAsImV4cCI6MTkwMzUwMDAwMH0.eAoXvaBZzZa31txItwEkoLnplWG10ee-U5GMc265xwk
3
+ # SUPABASE_BUCKET=sample
4
+
5
+ # PATH_VARS=o:/ExpedienteElectronicko/expediente/archivos/:rfcPatente/:patente-:aduana/:year/:pedimento/ALSJDKHF ASD,KFHJA SDFK,ASLDKJFH
6
+
7
+ SUPABASE_URL=http://127.0.0.1:54321
8
+ SUPABASE_KEY=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJzdXBhYmFzZS1kZW1vIiwicm9sZSI6InNlcnZpY2Vfcm9sZSIsImV4cCI6MTk4MzgxMjk5Nn0.EGIM96RAZx35lJzdJsyH-qQwv8Hdp7fsn3W0YpN81IU
9
+ SUPABASE_BUCKET=arela
10
+
11
+ # Arela API Configuration
12
+ ARELA_API_URL=http://localhost:3010
13
+ ARELA_API_TOKEN=f0608f83e5faa4c6be32c19975635c4e9012b31f7249a9f0a9270f54d74ec599
14
+
15
+ # Upload Configuration
16
+ UPLOAD_BASE_PATH=./sample
17
+ UPLOAD_SOURCES=2023|2024
18
+
19
+ # RFC Upload Configuration
20
+ # Pipe-separated list of RFCs to upload files for
21
+ # Example: MMJ0810145N1|ABC1234567XY|DEF9876543ZZ
22
+ UPLOAD_RFCS=AKS151005E46|IMS030409FZ0|RDG1107154L7|SHP031226BV2|CSM9301219B4|LIN960124HT8|LME971009SW4|AKM9707151B6|FEL000822AG2|FDM060802J54|MTM9807279B4|AUM9207011CA|MMJ0810145N1|ACC010328EQ6|PED781129JT6|CAD890407NK7|SME140411IK7|JME1903121C2|EIJ110429NF9|PTJ080414TM6|TME050503BM4
23
+
24
+ # AKS151005E46|IMS030409FZ0|RDG1107154L7|SHP031226BV2|CSM9301219B4|LIN960124HT8|LME971009SW4|AKM9707151B6|FEL000822AG2|PTJ080414TM6|TME050503BM4
25
+ # FDM060802J54|MTM9807279B4|AUM9207011CA|MMJ0810145N1|ACC010328EQ6|PED781129JT6|CAD890407NK7|SME140411IK7|JME1903121C2|EIJ110429NF9
26
+
27
+ # Logging and Verbosity
28
+ VERBOSE_LOGGING=true # Disable verbose path logging for better performance
29
+ BATCH_DELAY=50 # Reduce delay between batches (default: 100ms)
30
+ PROGRESS_UPDATE_INTERVAL=20 # Update progress every 20 items (default: 10)
31
+
32
+ # Log Buffering
33
+ LOG_BUFFER_SIZE=200 # Increase buffer size for fewer I/O ops (default: 100)
34
+ LOG_FLUSH_INTERVAL=3000 # Flush logs every 3 seconds (default: 5000ms)
35
+
36
+ # Example for maximum performance
37
+ # VERBOSE_LOGGING=false BATCH_DELAY=25 LOG_BUFFER_SIZE=500 arela --stats-only /path/to/files
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@arela/uploader",
3
- "version": "0.2.10",
3
+ "version": "0.2.11",
4
4
  "description": "CLI to upload files/directories to Arela",
5
5
  "bin": {
6
6
  "arela": "./src/index.js"
@@ -28,10 +28,10 @@ class Config {
28
28
  const __dirname = path.dirname(__filename);
29
29
  const packageJsonPath = path.resolve(__dirname, '../../package.json');
30
30
  const packageJson = JSON.parse(fs.readFileSync(packageJsonPath, 'utf-8'));
31
- return packageJson.version || '0.2.10';
31
+ return packageJson.version || '0.2.11';
32
32
  } catch (error) {
33
33
  console.warn('⚠️ Could not read package.json version, using fallback');
34
- return '0.2.10';
34
+ return '0.2.11';
35
35
  }
36
36
  }
37
37
 
@@ -316,7 +316,6 @@ export class DatabaseService {
316
316
  filename: record.filename,
317
317
  file_extension: record.file_extension,
318
318
  is_like_simplificado: record.is_like_simplificado,
319
- year: record.year,
320
319
  })
321
320
  .eq('original_path', record.original_path);
322
321
 
@@ -669,6 +668,7 @@ export class DatabaseService {
669
668
  totalErrors++;
670
669
  break;
671
670
  }
671
+ // console.log(`query by basePath: ${basePath} count: [${relatedFilesPage.length}]`);
672
672
 
673
673
  if (!relatedFilesPage || relatedFilesPage.length === 0) {
674
674
  hasMoreRelatedFiles = false;
@@ -682,6 +682,8 @@ export class DatabaseService {
682
682
  `Processing related files page ${relatedFilesPageNumber}: ${relatedFilesPage.length} files for ${pedimento.filename}`,
683
683
  );
684
684
 
685
+ // Track if any updates occurred in this page
686
+ let updatesOccurred = false;
685
687
  // Process this page of related files in batches
686
688
  const pageFileIds = relatedFilesPage.map((f) => f.id);
687
689
 
@@ -713,6 +715,7 @@ export class DatabaseService {
713
715
  } else {
714
716
  totalUpdated += batchIds.length;
715
717
  totalRelatedFilesProcessed += batchIds.length;
718
+ updatesOccurred = true; // Mark that updates occurred
716
719
  logger.info(
717
720
  `Successfully updated batch ${batchNumber}: ${batchIds.length} files with arela_path and year`,
718
721
  );
@@ -727,16 +730,26 @@ export class DatabaseService {
727
730
 
728
731
  // Check if we need to fetch the next page of related files
729
732
  if (relatedFilesPage.length < relatedFilesPageSize) {
733
+ console.log('no more related files for ', basePath);
730
734
  hasMoreRelatedFiles = false;
731
735
  logger.info(
732
736
  `Completed processing related files for ${pedimento.filename}. Total processed: ${totalRelatedFilesProcessed}`,
733
737
  );
734
738
  } else {
735
- relatedFilesFrom += relatedFilesPageSize;
739
+ // If updates occurred, reset pagination to start from beginning
740
+ // since records that matched the query may no longer match after update
741
+ if (updatesOccurred) {
742
+ relatedFilesFrom = 0;
743
+ logger.info(
744
+ `Page ${relatedFilesPageNumber} complete with updates: ${relatedFilesPage.length} files processed, restarting pagination from beginning due to query condition changes...`,
745
+ );
746
+ } else {
747
+ relatedFilesFrom += relatedFilesPageSize - 1;
748
+ logger.info(
749
+ `Page ${relatedFilesPageNumber} complete: ${relatedFilesPage.length} files processed, continuing to next page...`,
750
+ );
751
+ }
736
752
  relatedFilesPageNumber++;
737
- logger.info(
738
- `Page ${relatedFilesPageNumber - 1} complete: ${relatedFilesPage.length} files processed, continuing to next page...`,
739
- );
740
753
  }
741
754
  }
742
755
  } catch (error) {
@@ -830,33 +843,75 @@ export class DatabaseService {
830
843
  logger.info(`Total files for specified RFCs: ${totalRfcFiles || 0}`);
831
844
  }
832
845
 
833
- // Step 1: Get all pedimento_simplificado records that match the specified RFCs and have arela_path
846
+ // Step 1: Get all pedimento_simplificado records that match the specified RFCs and have arela_path using pagination
834
847
  console.log(
835
848
  '🎯 Finding pedimento_simplificado records for specified RFCs...',
836
849
  );
837
850
 
838
- let pedimentoQuery = supabase
839
- .from('uploader')
840
- .select('arela_path')
841
- .eq('document_type', 'pedimento_simplificado')
842
- .in('rfc', appConfig.upload.rfcs)
843
- .not('arela_path', 'is', null);
851
+ let allPedimentoRecords = [];
852
+ let offset = 0;
853
+ const pageSize = 500; // Process pedimento records in pages
854
+ let hasMorePedimentoData = true;
855
+ let pageNumber = 1;
844
856
 
845
- // Add year filter if UPLOAD_YEARS is configured
846
- if (appConfig.upload.years && appConfig.upload.years.length > 0) {
847
- pedimentoQuery = pedimentoQuery.in('year', appConfig.upload.years);
848
- }
857
+ // Process pedimento records page by page for memory efficiency
858
+ while (hasMorePedimentoData) {
859
+ logger.info(
860
+ `Fetching pedimento records page ${pageNumber} (records ${offset + 1} to ${offset + pageSize})...`,
861
+ );
849
862
 
850
- const { data: pedimentoRfcRecords, error: pedimentoRfcError } =
851
- await pedimentoQuery;
863
+ let pedimentoQuery = supabase
864
+ .from('uploader')
865
+ .select('arela_path')
866
+ .eq('document_type', 'pedimento_simplificado')
867
+ .in('rfc', appConfig.upload.rfcs)
868
+ .not('arela_path', 'is', null);
852
869
 
853
- if (pedimentoRfcError) {
854
- const errorMsg = `Error fetching pedimento RFC records: ${pedimentoRfcError.message}`;
855
- logger.error(errorMsg);
856
- throw new Error(errorMsg);
870
+ // Add year filter if UPLOAD_YEARS is configured
871
+ if (appConfig.upload.years && appConfig.upload.years.length > 0) {
872
+ pedimentoQuery = pedimentoQuery.in('year', appConfig.upload.years);
873
+ }
874
+
875
+ const { data: pedimentoPage, error: pedimentoError } =
876
+ await pedimentoQuery
877
+ .range(offset, offset + pageSize - 1)
878
+ .order('created_at');
879
+
880
+ if (pedimentoError) {
881
+ const errorMsg = `Error fetching pedimento RFC records page ${pageNumber}: ${pedimentoError.message}`;
882
+ logger.error(errorMsg);
883
+ throw new Error(errorMsg);
884
+ }
885
+
886
+ if (!pedimentoPage || pedimentoPage.length === 0) {
887
+ hasMorePedimentoData = false;
888
+ logger.info('No more pedimento records found');
889
+ break;
890
+ }
891
+
892
+ logger.info(
893
+ `Processing pedimento page ${pageNumber}: ${pedimentoPage.length} pedimento records`,
894
+ );
895
+
896
+ // Add this page to our collection
897
+ allPedimentoRecords = allPedimentoRecords.concat(pedimentoPage);
898
+
899
+ // Check if we need to fetch the next page
900
+ if (pedimentoPage.length < pageSize) {
901
+ hasMorePedimentoData = false;
902
+ logger.info(
903
+ `Completed fetching pedimento records. Last page ${pageNumber} had ${pedimentoPage.length} records`,
904
+ );
905
+ } else {
906
+ offset += pageSize;
907
+ pageNumber++;
908
+ logger.info(
909
+ `Page ${pageNumber - 1} complete: ${pedimentoPage.length} records fetched, moving to next page...`,
910
+ );
911
+ }
857
912
  }
858
913
 
859
- if (!pedimentoRfcRecords || pedimentoRfcRecords.length === 0) {
914
+ if (!allPedimentoRecords || allPedimentoRecords.length === 0) {
860
915
  console.log(
861
916
  'ℹ️ No pedimento_simplificado records found for the specified RFCs with arela_path',
862
917
  );
@@ -864,15 +919,15 @@ export class DatabaseService {
864
919
  return { processedCount: 0, uploadedCount: 0, errorCount: 0 };
865
920
  }
866
921
 
867
- // Get unique arela_paths from pedimento records
922
+ // Get unique arela_paths from all pedimento records
868
923
  const uniqueArelaPaths = [
869
- ...new Set(pedimentoRfcRecords.map((r) => r.arela_path)),
924
+ ...new Set(allPedimentoRecords.map((r) => r.arela_path)),
870
925
  ];
871
926
  console.log(
872
- `📋 Found ${pedimentoRfcRecords.length} pedimento records with ${uniqueArelaPaths.length} unique arela_paths for specified RFCs`,
927
+ `📋 Found ${allPedimentoRecords.length} pedimento records with ${uniqueArelaPaths.length} unique arela_paths for specified RFCs across ${pageNumber - 1} pages`,
873
928
  );
874
929
  logger.info(
875
- `Found ${pedimentoRfcRecords.length} pedimento records with ${uniqueArelaPaths.length} unique arela_paths`,
930
+ `Found ${allPedimentoRecords.length} pedimento records with ${uniqueArelaPaths.length} unique arela_paths across ${pageNumber - 1} pages`,
876
931
  );
877
932
 
878
933
  // Step 2: Get all files with these arela_paths that haven't been uploaded yet
@@ -937,16 +992,24 @@ export class DatabaseService {
937
992
  `Found ${rfcRecords.length} files ready for upload, ${skipped} skipped`,
938
993
  );
939
994
 
940
- // Step 3: Get ALL files that have these arela_paths (including supporting documents)
941
- let allRelatedFiles = [];
995
+ // Step 3: Process files with streaming pagination to avoid memory overload
996
+ let totalProcessed = 0;
997
+ let totalUploaded = 0;
998
+ let totalErrors = 0;
999
+ let globalFileCount = 0;
942
1000
  const arelaPathChunkSize = 50;
943
- const queryBatchSize = 1000;
1001
+ const queryBatchSize = 500; // Reduced batch size for better memory management
1002
+ const batchSize = parseInt(options.batchSize) || 10;
1003
+
1004
+ // Import performance configuration
1005
+ const { performance: perfConfig } = appConfig;
1006
+ const maxConcurrency = perfConfig?.maxApiConnections || 3;
944
1007
 
945
1008
  console.log(
946
- '📥 Fetching all related files (processing arela_paths in chunks to avoid URI limits)...',
1009
+ '📥 Processing files with streaming pagination (processing arela_paths in chunks to avoid URI limits and memory overload)...',
947
1010
  );
948
1011
 
949
- // Process arela_paths in chunks
1012
+ // Process arela_paths in chunks and upload files as we fetch them
950
1013
  for (let i = 0; i < uploadableArelaPaths.length; i += arelaPathChunkSize) {
951
1014
  const arelaPathChunk = uploadableArelaPaths.slice(
952
1015
  i,
@@ -961,9 +1024,10 @@ export class DatabaseService {
961
1024
  ` Processing arela_path chunk ${chunkNumber}/${totalChunks} (${arelaPathChunk.length} paths)`,
962
1025
  );
963
1026
 
964
- // For each chunk of arela_paths, use pagination to get all related files
1027
+ // For each chunk of arela_paths, use pagination to get related files and process them immediately
965
1028
  let hasMore = true;
966
1029
  let offset = 0;
1030
+ let chunkFileCount = 0;
967
1031
 
968
1032
  while (hasMore) {
969
1033
  const { data: batch, error: queryError } = await supabase
@@ -972,7 +1036,8 @@ export class DatabaseService {
972
1036
  .in('arela_path', arelaPathChunk)
973
1037
  .not('original_path', 'is', null)
974
1038
  .neq('status', 'file-uploaded')
975
- .range(offset, offset + queryBatchSize - 1);
1039
+ .range(offset, offset + queryBatchSize - 1)
1040
+ .order('created_at');
976
1041
 
977
1042
  if (queryError) {
978
1043
  const errorMsg = `Error fetching related files for chunk ${chunkNumber}: ${queryError.message}`;
@@ -980,74 +1045,94 @@ export class DatabaseService {
980
1045
  throw new Error(errorMsg);
981
1046
  }
982
1047
 
983
- if (batch && batch.length > 0) {
984
- allRelatedFiles = allRelatedFiles.concat(batch);
1048
+ if (!batch || batch.length === 0) {
1049
+ hasMore = false;
1050
+ break;
985
1051
  }
986
1052
 
987
- hasMore = batch && batch.length === queryBatchSize;
988
- offset += queryBatchSize;
989
- }
990
- }
1053
+ chunkFileCount += batch.length;
1054
+ globalFileCount += batch.length;
991
1055
 
992
- if (!allRelatedFiles || allRelatedFiles.length === 0) {
993
- console.log('ℹ️ No related files found to upload');
994
- logger.info('No related files found to upload');
995
- return { processedCount: 0, uploadedCount: 0, errorCount: 0 };
996
- }
1056
+ console.log(
1057
+ ` 📦 Processing batch within chunk ${chunkNumber}: ${batch.length} files (total processed so far: ${globalFileCount})`,
1058
+ );
997
1059
 
998
- console.log(`📋 Total files to upload: ${allRelatedFiles.length}`);
999
- logger.info(`Total files to upload: ${allRelatedFiles.length}`);
1060
+ // Track if any uploads occurred in this batch
1061
+ let uploadsOccurred = false;
1000
1062
 
1001
- // Step 4: Upload all related files using concurrent batch processing
1002
- let totalProcessed = 0;
1003
- let totalUploaded = 0;
1004
- let totalErrors = 0;
1005
- const batchSize = parseInt(options.batchSize) || 10;
1063
+ // Process this batch of files immediately using concurrent processing
1064
+ // Split batch into upload batches
1065
+ for (let j = 0; j < batch.length; j += batchSize) {
1066
+ const uploadBatch = batch.slice(j, j + batchSize);
1067
+ const batchNum = Math.floor(globalFileCount / batchSize) + 1;
1006
1068
 
1007
- // Import performance configuration
1008
- const { performance: perfConfig } = appConfig;
1009
- const maxConcurrency = perfConfig?.maxApiConnections || 3;
1069
+ console.log(
1070
+ `📦 Processing upload batch ${batchNum} (${uploadBatch.length} files)`,
1071
+ );
1010
1072
 
1011
- console.log(
1012
- `🚀 Starting batch upload: ${allRelatedFiles.length} files in batches of ${batchSize}`,
1013
- );
1014
- console.log(
1015
- `⚡ Concurrent processing: up to ${maxConcurrency} parallel operations`,
1016
- );
1073
+ // Process batch using concurrent processing similar to UploadCommand
1074
+ const batchResults = await this.#processRfcBatch(
1075
+ uploadBatch,
1076
+ uploadService,
1077
+ supabase,
1078
+ options,
1079
+ maxConcurrency,
1080
+ );
1017
1081
 
1018
- // Process files in batches with concurrent processing
1019
- for (let i = 0; i < allRelatedFiles.length; i += batchSize) {
1020
- const batch = allRelatedFiles.slice(i, i + batchSize);
1021
- const batchNum = Math.floor(i / batchSize) + 1;
1022
- const totalBatches = Math.ceil(allRelatedFiles.length / batchSize);
1082
+ totalProcessed += batchResults.processed;
1083
+ totalUploaded += batchResults.uploaded;
1084
+ totalErrors += batchResults.errors;
1023
1085
 
1024
- console.log(
1025
- `📦 Processing batch ${batchNum}/${totalBatches} (${batch.length} files)`,
1026
- );
1086
+ // Track if uploads occurred (status changes from non-uploaded to uploaded)
1087
+ if (batchResults.uploaded > 0) {
1088
+ uploadsOccurred = true;
1089
+ }
1027
1090
 
1028
- // Process batch using concurrent processing similar to UploadCommand
1029
- const batchResults = await this.#processRfcBatch(
1030
- batch,
1031
- uploadService,
1032
- supabase,
1033
- options,
1034
- maxConcurrency,
1035
- );
1091
+ console.log(
1092
+ `📊 Upload batch complete - Total progress: ${totalUploaded} uploaded, ${totalErrors} errors`,
1093
+ );
1094
+ }
1095
+
1096
+ // Check if we need more data from this chunk
1097
+ if (batch.length < queryBatchSize) {
1098
+ hasMore = false;
1099
+ } else {
1100
+ // If uploads occurred, reset pagination to start from beginning
1101
+ // since records that matched the query may no longer match after upload
1102
+ if (uploadsOccurred) {
1103
+ offset = 0;
1104
+ console.log(
1105
+ ` 📄 Batch complete with uploads: ${batch.length} files processed, restarting pagination from beginning due to query condition changes...`,
1106
+ );
1107
+ } else {
1108
+ offset += queryBatchSize;
1109
+ console.log(
1110
+ ` 📄 Batch complete: ${batch.length} files processed, continuing to next page (offset: ${offset})...`,
1111
+ );
1112
+ }
1113
+ }
1036
1114
 
1037
- totalProcessed += batchResults.processed;
1038
- totalUploaded += batchResults.uploaded;
1039
- totalErrors += batchResults.errors;
1115
+ if (hasMore) {
1116
+ console.log(
1117
+ ` 📄 Fetching more files for chunk ${chunkNumber}... (offset: ${offset})`,
1118
+ );
1119
+ }
1120
+ }
1040
1121
 
1041
- // Progress update
1042
- const progress = (
1043
- ((i + batch.length) / allRelatedFiles.length) *
1044
- 100
1045
- ).toFixed(1);
1046
1122
  console.log(
1047
- `📊 Batch ${batchNum} complete - Progress: ${progress}% (${totalUploaded}/${allRelatedFiles.length} uploaded)`,
1123
+ ` ✅ Chunk ${chunkNumber}/${totalChunks} complete: ${chunkFileCount} files processed`,
1048
1124
  );
1049
1125
  }
1050
1126
 
1127
+ if (globalFileCount === 0) {
1128
+ console.log('ℹ️ No related files found to upload');
1129
+ logger.info('No related files found to upload');
1130
+ return { processedCount: 0, uploadedCount: 0, errorCount: 0 };
1131
+ }
1132
+
1133
+ console.log(`📋 Total files processed: ${globalFileCount}`);
1134
+ logger.info(`Total files processed: ${globalFileCount}`);
1135
+
1051
1136
  const result = {
1052
1137
  processedCount: totalProcessed,
1053
1138
  uploadedCount: totalUploaded,
package/arela-upload.log DELETED
File without changes