@arela/uploader 0.2.6 → 0.2.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/.env.template CHANGED
@@ -1,20 +1,74 @@
1
- # Test environment configuration for arela-uploader
2
- # Copy this to .env and update with your actual values
1
+ # Arela Uploader Environment Configuration
2
+ # Copy this to your .env file and adjust values for your setup
3
3
 
4
- # Supabase Configuration
5
- SUPABASE_URL=https://your-project.supabase.co
6
- SUPABASE_KEY=your-supabase-anon-key
7
- SUPABASE_BUCKET=your-bucket-name
4
+ # =============================================================================
5
+ # BASIC CONFIGURATION
6
+ # =============================================================================
8
7
 
9
8
  # Arela API Configuration
10
9
  ARELA_API_URL=https://your-arela-api-url.com
11
- ARELA_API_TOKEN=your-api-token
10
+ ARELA_API_TOKEN=your-api-token-here
11
+
12
+ # Supabase Configuration (fallback)
13
+ SUPABASE_URL=https://your-supabase-url.supabase.co
14
+ SUPABASE_KEY=your-supabase-key-here
15
+ SUPABASE_BUCKET=your-bucket-name
12
16
 
13
- # Upload Configuration
14
- UPLOAD_BASE_PATH=/Users/your-username/documents
17
+ # Upload Sources (separate with |)
18
+ UPLOAD_BASE_PATH=/path/to/your/upload/base
15
19
  UPLOAD_SOURCES=folder1|folder2|folder3
20
+ UPLOAD_RFCS=rfc1|rfc2|rfc3
21
+
22
+ # =============================================================================
23
+ # PERFORMANCE OPTIMIZATION FOR MULTIPLE API REPLICAS
24
+ # =============================================================================
25
+
26
+ # API Connection Configuration
27
+ # Set this to match your number of API replicas (e.g., if you have 10 API instances, set to 10)
28
+ MAX_API_CONNECTIONS=10
29
+
30
+ # API Connection Timeout (milliseconds)
31
+ API_CONNECTION_TIMEOUT=60000
32
+
33
+ # Batch Processing Configuration
34
+ # Files processed concurrently per batch (should be >= MAX_API_CONNECTIONS for best performance)
35
+ BATCH_SIZE=100
36
+
37
+ # Delay between batches (0 for maximum speed)
38
+ BATCH_DELAY=0
39
+
40
+ # Source Processing Concurrency
41
+ # Number of upload sources/folders to process simultaneously
42
+ MAX_CONCURRENT_SOURCES=2
43
+
44
+ # =============================================================================
45
+ # EXAMPLE CONFIGURATIONS FOR DIFFERENT SCENARIOS
46
+ # =============================================================================
47
+
48
+ # For 10 API Replicas (High Performance Setup):
49
+ # MAX_API_CONNECTIONS=10
50
+ # BATCH_SIZE=100
51
+ # MAX_CONCURRENT_SOURCES=3
52
+ # BATCH_DELAY=0
53
+
54
+ # For 5 API Replicas (Medium Performance Setup):
55
+ # MAX_API_CONNECTIONS=5
56
+ # BATCH_SIZE=50
57
+ # MAX_CONCURRENT_SOURCES=2
58
+ # BATCH_DELAY=0
59
+
60
+ # For 1 API Instance (Single Instance Setup):
61
+ # MAX_API_CONNECTIONS=5
62
+ # BATCH_SIZE=20
63
+ # MAX_CONCURRENT_SOURCES=1
64
+ # BATCH_DELAY=100
65
+
66
+ # =============================================================================
67
+ # LOGGING AND MONITORING
68
+ # =============================================================================
69
+
70
+ # Progress bar update frequency
71
+ PROGRESS_UPDATE_INTERVAL=10
16
72
 
17
- # RFC Upload Configuration
18
- # Pipe-separated list of RFCs to upload files for
19
- # Example: MMJ0810145N1|ABC1234567XY|DEF9876543ZZ
20
- UPLOAD_RFCS=RFC1|RFC2|RFC3
73
+ # Enable verbose logging (true/false)
74
+ VERBOSE_LOGGING=false
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@arela/uploader",
3
- "version": "0.2.6",
3
+ "version": "0.2.8",
4
4
  "description": "CLI to upload files/directories to Arela",
5
5
  "bin": {
6
6
  "arela": "./src/index.js"
@@ -50,7 +50,7 @@ export class UploadCommand {
50
50
  logger.info('Log file cleared');
51
51
  }
52
52
 
53
- // Process each source
53
+ // Process each source with configurable concurrency
54
54
  let globalResults = {
55
55
  successCount: 0,
56
56
  detectedCount: 0,
@@ -59,28 +59,88 @@ export class UploadCommand {
59
59
  skippedCount: 0,
60
60
  };
61
61
 
62
- for (const source of sources) {
63
- const sourcePath = path.resolve(basePath, source).replace(/\\/g, '/');
64
- logger.info(`Processing folder: ${sourcePath}`);
65
-
66
- try {
67
- const files = await this.#discoverFiles(sourcePath);
68
- logger.info(`Found ${files.length} files to process`);
69
-
70
- const result = await this.#processFilesInBatches(
71
- files,
72
- options,
73
- uploadService,
74
- basePath,
75
- source,
76
- sourcePath,
77
- );
62
+ // Determine processing strategy based on configuration
63
+ const maxConcurrentSources =
64
+ appConfig.performance?.maxConcurrentSources || 1;
65
+
66
+ if (maxConcurrentSources > 1 && sources.length > 1) {
67
+ // Parallel source processing
68
+ logger.info(
69
+ `Processing ${sources.length} sources with concurrency: ${maxConcurrentSources}`,
70
+ );
78
71
 
79
- this.#updateGlobalResults(globalResults, result);
80
- this.#logSourceSummary(source, result, options);
81
- } catch (error) {
82
- this.errorHandler.handleError(error, { source, sourcePath });
83
- globalResults.failureCount++;
72
+ // Process sources in batches to control concurrency
73
+ for (let i = 0; i < sources.length; i += maxConcurrentSources) {
74
+ const sourceBatch = sources.slice(i, i + maxConcurrentSources);
75
+
76
+ const sourcePromises = sourceBatch.map(async (source) => {
77
+ const sourcePath = path
78
+ .resolve(basePath, source)
79
+ .replace(/\\/g, '/');
80
+ logger.info(`Processing folder: ${sourcePath}`);
81
+
82
+ try {
83
+ const files = await this.#discoverFiles(sourcePath);
84
+ logger.info(`Found ${files.length} files in ${source}`);
85
+
86
+ const result = await this.#processFilesInBatches(
87
+ files,
88
+ options,
89
+ uploadService,
90
+ basePath,
91
+ source,
92
+ sourcePath,
93
+ );
94
+
95
+ this.#logSourceSummary(source, result, options);
96
+ return { success: true, source, result };
97
+ } catch (error) {
98
+ this.errorHandler.handleError(error, { source, sourcePath });
99
+ return { success: false, source, error: error.message };
100
+ }
101
+ });
102
+
103
+ // Wait for this batch of sources to complete
104
+ const results = await Promise.allSettled(sourcePromises);
105
+
106
+ results.forEach((result) => {
107
+ if (result.status === 'fulfilled') {
108
+ const sourceResult = result.value;
109
+ if (sourceResult.success) {
110
+ this.#updateGlobalResults(globalResults, sourceResult.result);
111
+ } else {
112
+ globalResults.failureCount++;
113
+ }
114
+ } else {
115
+ globalResults.failureCount++;
116
+ }
117
+ });
118
+ }
119
+ } else {
120
+ // Sequential source processing (original behavior)
121
+ for (const source of sources) {
122
+ const sourcePath = path.resolve(basePath, source).replace(/\\/g, '/');
123
+ logger.info(`Processing folder: ${sourcePath}`);
124
+
125
+ try {
126
+ const files = await this.#discoverFiles(sourcePath);
127
+ logger.info(`Found ${files.length} files to process`);
128
+
129
+ const result = await this.#processFilesInBatches(
130
+ files,
131
+ options,
132
+ uploadService,
133
+ basePath,
134
+ source,
135
+ sourcePath,
136
+ );
137
+
138
+ this.#updateGlobalResults(globalResults, result);
139
+ this.#logSourceSummary(source, result, options);
140
+ } catch (error) {
141
+ this.errorHandler.handleError(error, { source, sourcePath });
142
+ globalResults.failureCount++;
143
+ }
84
144
  }
85
145
  }
86
146
 
@@ -164,7 +224,8 @@ export class UploadCommand {
164
224
  source,
165
225
  sourcePath,
166
226
  ) {
167
- const batchSize = parseInt(options.batchSize) || 10;
227
+ const batchSize =
228
+ parseInt(options.batchSize) || appConfig.performance.batchSize || 50;
168
229
  const results = {
169
230
  successCount: 0,
170
231
  detectedCount: 0,
@@ -184,6 +245,9 @@ export class UploadCommand {
184
245
  barCompleteChar: '█',
185
246
  barIncompleteChar: '░',
186
247
  hideCursor: true,
248
+ clearOnComplete: false,
249
+ stopOnComplete: true,
250
+ stream: process.stderr, // Use stderr to separate from stdout logging
187
251
  });
188
252
 
189
253
  progressBar.start(files.length, 0, { success: 0, errors: 0 });
@@ -265,22 +329,65 @@ export class UploadCommand {
265
329
  throw new Error(`Failed to insert stats: ${error.message}`);
266
330
  }
267
331
  } else {
268
- // Upload mode: process files for upload
269
- for (const filePath of batch) {
270
- try {
271
- await this.#processFile(
272
- filePath,
273
- options,
274
- uploadService,
275
- basePath,
276
- processedPaths,
277
- batchResults,
278
- );
279
- } catch (error) {
280
- this.errorHandler.handleError(error, { filePath });
281
- batchResults.failureCount++;
332
+ // Upload mode: process files with controlled concurrency to match API replicas
333
+ const maxConcurrentApiCalls =
334
+ appConfig.performance?.maxApiConnections || 10;
335
+
336
+ // Process batch in chunks to respect API replica limits
337
+ const allResults = [];
338
+ for (let i = 0; i < batch.length; i += maxConcurrentApiCalls) {
339
+ const chunk = batch.slice(i, i + maxConcurrentApiCalls);
340
+
341
+ // Process this chunk concurrently (up to API replica count)
342
+ const chunkPromises = chunk.map(async (filePath) => {
343
+ try {
344
+ const result = await this.#processFile(
345
+ filePath,
346
+ options,
347
+ uploadService,
348
+ basePath,
349
+ processedPaths,
350
+ );
351
+ return { success: true, filePath, result };
352
+ } catch (error) {
353
+ this.errorHandler.handleError(error, { filePath });
354
+ return { success: false, filePath, error: error.message };
355
+ }
356
+ });
357
+
358
+ // Wait for this chunk to complete before starting the next
359
+ const chunkResults = await Promise.allSettled(chunkPromises);
360
+ allResults.push(...chunkResults);
361
+
362
+ // Small delay between chunks to prevent overwhelming API
363
+ if (i + maxConcurrentApiCalls < batch.length) {
364
+ await new Promise((resolve) => setTimeout(resolve, 50));
282
365
  }
283
366
  }
367
+
368
+ // Process all results and update batch results
369
+ allResults.forEach((result) => {
370
+ if (result.status === 'fulfilled') {
371
+ const fileResult = result.value;
372
+ if (fileResult.success) {
373
+ if (fileResult.result && fileResult.result.skipped) {
374
+ batchResults.skippedCount++;
375
+ } else {
376
+ batchResults.successCount++;
377
+ if (fileResult.result && fileResult.result.detectedCount) {
378
+ batchResults.detectedCount += fileResult.result.detectedCount;
379
+ }
380
+ if (fileResult.result && fileResult.result.organizedCount) {
381
+ batchResults.organizedCount += fileResult.result.organizedCount;
382
+ }
383
+ }
384
+ } else {
385
+ batchResults.failureCount++;
386
+ }
387
+ } else {
388
+ batchResults.failureCount++;
389
+ }
390
+ });
284
391
  }
285
392
 
286
393
  return batchResults;
@@ -296,12 +403,10 @@ export class UploadCommand {
296
403
  uploadService,
297
404
  basePath,
298
405
  processedPaths,
299
- batchResults,
300
406
  ) {
301
407
  // Skip if already processed
302
408
  if (processedPaths.has(filePath)) {
303
- batchResults.skippedCount++;
304
- return;
409
+ return { skipped: true };
305
410
  }
306
411
 
307
412
  // Prepare file for upload
@@ -325,24 +430,25 @@ export class UploadCommand {
325
430
  };
326
431
 
327
432
  // Upload based on service type
433
+ let result = { successCount: 1 };
434
+
328
435
  if (uploadService.getServiceName() === 'Arela API') {
329
- const result = await uploadService.upload([fileObject], {
436
+ result = await uploadService.upload([fileObject], {
330
437
  ...options,
331
438
  uploadPath,
332
439
  });
333
-
334
- batchResults.successCount++;
335
- if (result.detectedCount)
336
- batchResults.detectedCount += result.detectedCount;
337
- if (result.organizedCount)
338
- batchResults.organizedCount += result.organizedCount;
339
440
  } else {
340
441
  // Supabase direct upload
341
442
  await uploadService.upload([fileObject], { uploadPath });
342
- batchResults.successCount++;
343
443
  }
344
444
 
345
445
  logger.info(`SUCCESS: ${path.basename(filePath)} -> ${uploadPath}`);
446
+
447
+ return {
448
+ skipped: false,
449
+ detectedCount: result.detectedCount || 0,
450
+ organizedCount: result.organizedCount || 0,
451
+ };
346
452
  }
347
453
 
348
454
  /**
@@ -428,7 +534,8 @@ export class UploadCommand {
428
534
  // Phase 2: PDF Detection
429
535
  console.log('\n🔍 === PHASE 2: PDF Detection ===');
430
536
  const detectionResult = await databaseService.detectPedimentosInDatabase({
431
- batchSize: parseInt(options.batchSize) || 10,
537
+ batchSize:
538
+ parseInt(options.batchSize) || appConfig.performance.batchSize || 50,
432
539
  });
433
540
  console.log(
434
541
  `✅ Phase 2 Complete: ${detectionResult.detectedCount} detected, ${detectionResult.errorCount} errors`,
@@ -28,10 +28,10 @@ class Config {
28
28
  const __dirname = path.dirname(__filename);
29
29
  const packageJsonPath = path.resolve(__dirname, '../../package.json');
30
30
  const packageJson = JSON.parse(fs.readFileSync(packageJsonPath, 'utf-8'));
31
- return packageJson.version || '0.2.6';
31
+ return packageJson.version || '0.2.8';
32
32
  } catch (error) {
33
33
  console.warn('⚠️ Could not read package.json version, using fallback');
34
- return '0.2.6';
34
+ return '0.2.8';
35
35
  }
36
36
  }
37
37
 
@@ -85,7 +85,12 @@ class Config {
85
85
  */
86
86
  #loadPerformanceConfig() {
87
87
  return {
88
- batchDelay: parseInt(process.env.BATCH_DELAY) || 100,
88
+ batchDelay: parseInt(process.env.BATCH_DELAY) || 0, // Removed default delay
89
+ batchSize: parseInt(process.env.BATCH_SIZE) || 50, // Increased from 10 to 50
90
+ maxConcurrentSources: parseInt(process.env.MAX_CONCURRENT_SOURCES) || 2,
91
+ maxApiConnections: parseInt(process.env.MAX_API_CONNECTIONS) || 10, // New: API replica support
92
+ apiConnectionTimeout:
93
+ parseInt(process.env.API_CONNECTION_TIMEOUT) || 60000, // New: API timeout
89
94
  progressUpdateInterval:
90
95
  parseInt(process.env.PROGRESS_UPDATE_INTERVAL) || 10,
91
96
  logBufferSize: 100,
@@ -104,16 +104,28 @@ export class DatabaseService {
104
104
  }
105
105
 
106
106
  // Initialize record with basic file stats
107
+ const fileExtension = path
108
+ .extname(file.path)
109
+ .toLowerCase()
110
+ .replace('.', '');
111
+ const filename = file.originalName || path.basename(file.path);
112
+
107
113
  const record = {
108
114
  document_type: null,
109
115
  size: stats.size,
110
116
  num_pedimento: null,
111
- filename: file.originalName || path.basename(file.path),
117
+ filename: filename,
112
118
  original_path: originalPath,
113
119
  arela_path: null,
114
120
  status: 'stats',
115
121
  rfc: null,
116
122
  message: null,
123
+ file_extension: fileExtension,
124
+ is_like_simplificado:
125
+ fileExtension === 'pdf' && filename.toLowerCase().includes('simp'),
126
+ year: null,
127
+ created_at: new Date().toISOString(),
128
+ modified_at: stats.mtime.toISOString(),
117
129
  };
118
130
 
119
131
  // Try to detect document type for supported files
@@ -130,6 +142,10 @@ export class DatabaseService {
130
142
  record.arela_path = detection.arelaPath;
131
143
  }
132
144
 
145
+ if (detection.detectedPedimentoYear) {
146
+ record.year = detection.detectedPedimentoYear;
147
+ }
148
+
133
149
  const rfcField = detection.fields.find(
134
150
  (f) => f.name === 'rfc' && f.found,
135
151
  );
@@ -210,6 +226,12 @@ export class DatabaseService {
210
226
  file_extension: fileExtension,
211
227
  created_at: new Date().toISOString(),
212
228
  modified_at: stats.mtime.toISOString(),
229
+ is_like_simplificado:
230
+ fileExtension === 'pdf' &&
231
+ (file.originalName || path.basename(file.path))
232
+ .toLowerCase()
233
+ .includes('simp'),
234
+ year: null,
213
235
  };
214
236
 
215
237
  allRecords.push(record);
@@ -258,9 +280,15 @@ export class DatabaseService {
258
280
  existingPaths.has(r.original_path),
259
281
  );
260
282
 
261
- logger.info(
262
- `Batch ${Math.floor(i / batchSize) + 1}: ${newRecords.length} new, ${updateRecords.length} updates`,
263
- );
283
+ // Only log every 10th batch to reduce noise
284
+ if (
285
+ (Math.floor(i / batchSize) + 1) % 10 === 0 ||
286
+ Math.floor(i / batchSize) + 1 === 1
287
+ ) {
288
+ logger.info(
289
+ `Batch ${Math.floor(i / batchSize) + 1}: ${newRecords.length} new, ${updateRecords.length} updates`,
290
+ );
291
+ }
264
292
 
265
293
  // Insert new records
266
294
  if (newRecords.length > 0) {
@@ -272,7 +300,7 @@ export class DatabaseService {
272
300
  logger.error(`Error inserting new records: ${insertError.message}`);
273
301
  } else {
274
302
  totalInserted += newRecords.length;
275
- logger.success(`Inserted ${newRecords.length} new records`);
303
+ // Only log the batch insertion, not the summary (which comes at the end)
276
304
  }
277
305
  }
278
306
 
@@ -287,6 +315,8 @@ export class DatabaseService {
287
315
  modified_at: record.modified_at,
288
316
  filename: record.filename,
289
317
  file_extension: record.file_extension,
318
+ is_like_simplificado: record.is_like_simplificado,
319
+ year: record.year,
290
320
  })
291
321
  .eq('original_path', record.original_path);
292
322
 
@@ -295,7 +325,10 @@ export class DatabaseService {
295
325
  }
296
326
  }
297
327
  totalUpdated += batchUpdated;
298
- logger.info(`Updated ${batchUpdated} existing records`);
328
+ // Reduce logging noise - only log when there are updates
329
+ if (batchUpdated > 0) {
330
+ logger.info(`Updated ${batchUpdated} existing records`);
331
+ }
299
332
  }
300
333
  } catch (error) {
301
334
  logger.error(
@@ -329,7 +362,7 @@ export class DatabaseService {
329
362
 
330
363
  const processingBatchSize = parseInt(options.batchSize) || 10;
331
364
  // Reduced query batch size to avoid timeouts
332
- const queryBatchSize = 500; // Reduced from 1000 to 500
365
+ const queryBatchSize = 100; // Reduced from 500 to 100
333
366
 
334
367
  let totalDetected = 0;
335
368
  let totalProcessed = 0;
@@ -355,9 +388,9 @@ export class DatabaseService {
355
388
  .select('id, original_path, filename, file_extension, status')
356
389
  .eq('status', 'fs-stats')
357
390
  .eq('file_extension', 'pdf')
358
- .ilike('filename', '%simp%')
391
+ .eq('is_like_simplificado', true)
359
392
  .range(offset, offset + queryBatchSize - 1)
360
- .order('id'); // Add explicit ordering for consistent pagination
393
+ .order('created_at');
361
394
  }, `fetch PDF records chunk ${chunkNumber}`);
362
395
 
363
396
  if (queryError) {
@@ -416,6 +449,7 @@ export class DatabaseService {
416
449
  num_pedimento: detection.detectedPedimento,
417
450
  arela_path: detection.arelaPath,
418
451
  message: detection.error || null,
452
+ year: detection.detectedPedimentoYear || null,
419
453
  };
420
454
 
421
455
  if (detection.fields) {
@@ -522,13 +556,15 @@ export class DatabaseService {
522
556
  async propagateArelaPath(options = {}) {
523
557
  const supabase = await this.#getSupabaseClient();
524
558
 
525
- logger.info('Phase 3: Starting arela_path propagation process...');
526
- console.log('🔍 Finding pedimento_simplificado records with arela_path...');
559
+ logger.info('Phase 3: Starting arela_path and year propagation process...');
560
+ console.log(
561
+ '🔍 Finding pedimento_simplificado records with arela_path and year...',
562
+ );
527
563
 
528
564
  // Get all pedimento_simplificado records that have arela_path
529
565
  const { data: pedimentoRecords, error: pedimentoError } = await supabase
530
566
  .from('uploader')
531
- .select('id, original_path, arela_path, filename')
567
+ .select('id, original_path, arela_path, filename, year')
532
568
  .eq('document_type', 'pedimento_simplificado')
533
569
  .not('arela_path', 'is', null);
534
570
 
@@ -567,7 +603,7 @@ export class DatabaseService {
567
603
  const basePath = path.dirname(pedimento.original_path);
568
604
 
569
605
  logger.info(
570
- `Processing pedimento: ${pedimento.filename} | Base path: ${basePath}`,
606
+ `Processing pedimento: ${pedimento.filename} | Base path: ${basePath} | Year: ${pedimento.year || 'N/A'}`,
571
607
  );
572
608
 
573
609
  // Extract folder part from existing arela_path
@@ -618,7 +654,10 @@ export class DatabaseService {
618
654
  try {
619
655
  const { error: updateError } = await supabase
620
656
  .from('uploader')
621
- .update({ arela_path: folderArelaPath })
657
+ .update({
658
+ arela_path: folderArelaPath,
659
+ year: pedimento.year,
660
+ })
622
661
  .in('id', batchIds);
623
662
 
624
663
  if (updateError) {
@@ -629,7 +668,7 @@ export class DatabaseService {
629
668
  } else {
630
669
  totalUpdated += batchIds.length;
631
670
  logger.info(
632
- `Successfully updated batch ${batchNumber}: ${batchIds.length} files`,
671
+ `Successfully updated batch ${batchNumber}: ${batchIds.length} files with arela_path and year`,
633
672
  );
634
673
  }
635
674
  } catch (batchError) {
@@ -654,7 +693,7 @@ export class DatabaseService {
654
693
  };
655
694
 
656
695
  logger.success(
657
- `Phase 3 Summary: ${totalProcessed} pedimentos processed, ${totalUpdated} files updated, ${totalErrors} errors`,
696
+ `Phase 3 Summary: ${totalProcessed} pedimentos processed, ${totalUpdated} files updated with arela_path and year, ${totalErrors} errors`,
658
697
  );
659
698
 
660
699
  return result;
@@ -860,133 +899,54 @@ export class DatabaseService {
860
899
  console.log(`📋 Total files to upload: ${allRelatedFiles.length}`);
861
900
  logger.info(`Total files to upload: ${allRelatedFiles.length}`);
862
901
 
863
- // Step 4: Upload all related files
902
+ // Step 4: Upload all related files using concurrent batch processing
864
903
  let totalProcessed = 0;
865
904
  let totalUploaded = 0;
866
905
  let totalErrors = 0;
867
906
  const batchSize = parseInt(options.batchSize) || 10;
868
907
 
869
- for (let i = 0; i < allRelatedFiles.length; i += batchSize) {
870
- const batch = allRelatedFiles.slice(i, i + batchSize);
871
-
872
- for (const file of batch) {
873
- try {
874
- totalProcessed++;
875
-
876
- // Check if file exists
877
- if (!fs.existsSync(file.original_path)) {
878
- logger.warn(
879
- `File not found: ${file.filename} at ${file.original_path}`,
880
- );
881
- await supabase
882
- .from('uploader')
883
- .update({
884
- status: 'file-not-found',
885
- message: 'File no longer exists at original path',
886
- })
887
- .eq('id', file.id);
888
- totalErrors++;
889
- continue;
890
- }
891
-
892
- // Upload the file (handle both API and Supabase services)
893
- let uploadResult;
894
- if (uploadService.getServiceName() === 'Supabase') {
895
- // Supabase requires single file upload with uploadPath
896
- let uploadPath;
897
- if (options.folderStructure && file.arela_path) {
898
- // Combine folder structure with arela_path: palco/RFC/Year/Patente/Aduana/Pedimento/filename
899
- uploadPath = `uploads/${options.folderStructure}/${file.arela_path}${file.filename}`;
900
- } else if (file.arela_path) {
901
- // Use existing arela_path: RFC/Year/Patente/Aduana/Pedimento/filename
902
- uploadPath = `uploads/${file.arela_path}${file.filename}`;
903
- } else {
904
- // Fallback to RFC folder
905
- uploadPath = `uploads/${file.rfc}/${file.filename}`;
906
- }
908
+ // Import performance configuration
909
+ const { performance: perfConfig } = appConfig;
910
+ const maxConcurrency = perfConfig?.maxApiConnections || 3;
907
911
 
908
- uploadResult = await uploadService.upload(
909
- [
910
- {
911
- path: file.original_path,
912
- name: file.filename,
913
- contentType: 'application/octet-stream',
914
- },
915
- ],
916
- {
917
- uploadPath: uploadPath,
918
- },
919
- );
920
- uploadResult = { success: true, data: uploadResult };
921
- } else {
922
- // API service supports batch uploads and returns normalized response
923
- let fullFolderStructure;
924
- if (options.folderStructure && file.arela_path) {
925
- // Combine folder structure with arela_path: palco/RFC/Year/Patente/Aduana/Pedimento/
926
- fullFolderStructure = `${options.folderStructure}/${file.arela_path}`;
927
- } else if (file.arela_path) {
928
- // Use existing arela_path: RFC/Year/Patente/Aduana/Pedimento/
929
- fullFolderStructure = file.arela_path;
930
- } else {
931
- // Fallback to RFC folder
932
- fullFolderStructure = `${file.rfc}/`;
933
- }
912
+ console.log(
913
+ `🚀 Starting batch upload: ${allRelatedFiles.length} files in batches of ${batchSize}`,
914
+ );
915
+ console.log(
916
+ `⚡ Concurrent processing: up to ${maxConcurrency} parallel operations`,
917
+ );
934
918
 
935
- uploadResult = await uploadService.upload(
936
- [
937
- {
938
- path: file.original_path,
939
- name: file.filename,
940
- contentType: 'application/octet-stream',
941
- },
942
- ],
943
- {
944
- folderStructure: fullFolderStructure,
945
- },
946
- );
947
- }
919
+ // Process files in batches with concurrent processing
920
+ for (let i = 0; i < allRelatedFiles.length; i += batchSize) {
921
+ const batch = allRelatedFiles.slice(i, i + batchSize);
922
+ const batchNum = Math.floor(i / batchSize) + 1;
923
+ const totalBatches = Math.ceil(allRelatedFiles.length / batchSize);
948
924
 
949
- if (uploadResult.success) {
950
- // Update database status
951
- await supabase
952
- .from('uploader')
953
- .update({
954
- status: 'file-uploaded',
955
- message: 'Successfully uploaded to Arela API',
956
- })
957
- .eq('id', file.id);
925
+ console.log(
926
+ `📦 Processing batch ${batchNum}/${totalBatches} (${batch.length} files)`,
927
+ );
958
928
 
959
- totalUploaded++;
960
- logger.info(`Uploaded: ${file.filename}`);
961
- } else {
962
- await supabase
963
- .from('uploader')
964
- .update({
965
- status: 'upload-error',
966
- message: uploadResult.error || 'Upload failed',
967
- })
968
- .eq('id', file.id);
929
+ // Process batch using concurrent processing similar to UploadCommand
930
+ const batchResults = await this.#processRfcBatch(
931
+ batch,
932
+ uploadService,
933
+ supabase,
934
+ options,
935
+ maxConcurrency,
936
+ );
969
937
 
970
- totalErrors++;
971
- logger.error(
972
- `Upload failed: ${file.filename} - ${uploadResult.error}`,
973
- );
974
- }
975
- } catch (error) {
976
- totalErrors++;
977
- logger.error(
978
- `Error processing file ${file.filename}: ${error.message}`,
979
- );
938
+ totalProcessed += batchResults.processed;
939
+ totalUploaded += batchResults.uploaded;
940
+ totalErrors += batchResults.errors;
980
941
 
981
- await supabase
982
- .from('uploader')
983
- .update({
984
- status: 'upload-error',
985
- message: `Processing error: ${error.message}`,
986
- })
987
- .eq('id', file.id);
988
- }
989
- }
942
+ // Progress update
943
+ const progress = (
944
+ ((i + batch.length) / allRelatedFiles.length) *
945
+ 100
946
+ ).toFixed(1);
947
+ console.log(
948
+ `📊 Batch ${batchNum} complete - Progress: ${progress}% (${totalUploaded}/${allRelatedFiles.length} uploaded)`,
949
+ );
990
950
  }
991
951
 
992
952
  const result = {
@@ -1133,6 +1093,298 @@ export class DatabaseService {
1133
1093
 
1134
1094
  return readyFiles || [];
1135
1095
  }
1096
+
1097
+ /**
1098
+ * Process a batch of files using concurrent processing for RFC uploads
1099
+ * @param {Array} files - Files to process in this batch
1100
+ * @param {Object} uploadService - Upload service instance
1101
+ * @param {Object} supabase - Supabase client
1102
+ * @param {Object} options - Upload options
1103
+ * @param {number} maxConcurrency - Maximum concurrent operations
1104
+ * @returns {Promise<Object>} Batch processing results
1105
+ */
1106
+ async #processRfcBatch(
1107
+ files,
1108
+ uploadService,
1109
+ supabase,
1110
+ options,
1111
+ maxConcurrency,
1112
+ ) {
1113
+ const fs = (await import('fs')).default;
1114
+
1115
+ let processed = 0;
1116
+ let uploaded = 0;
1117
+ let errors = 0;
1118
+
1119
+ // For Supabase, process files individually (required by service)
1120
+ if (uploadService.getServiceName() === 'Supabase') {
1121
+ // Process files in concurrent chunks within the batch
1122
+ const chunks = [];
1123
+ for (let i = 0; i < files.length; i += maxConcurrency) {
1124
+ chunks.push(files.slice(i, i + maxConcurrency));
1125
+ }
1126
+
1127
+ // Process each chunk concurrently
1128
+ for (const chunk of chunks) {
1129
+ const chunkPromises = chunk.map(async (file) => {
1130
+ return await this.#processRfcSingleFile(
1131
+ file,
1132
+ uploadService,
1133
+ supabase,
1134
+ options,
1135
+ fs,
1136
+ );
1137
+ });
1138
+
1139
+ // Wait for all files in this chunk to complete
1140
+ const chunkResults = await Promise.allSettled(chunkPromises);
1141
+
1142
+ // Count results
1143
+ for (const result of chunkResults) {
1144
+ processed++;
1145
+ if (result.status === 'fulfilled' && result.value.success) {
1146
+ uploaded++;
1147
+ } else {
1148
+ errors++;
1149
+ }
1150
+ }
1151
+ }
1152
+ } else {
1153
+ // For API service, use true batch processing (multiple files per API call)
1154
+ const apiChunks = [];
1155
+ const apiChunkSize = Math.min(
1156
+ 5,
1157
+ Math.ceil(files.length / maxConcurrency),
1158
+ ); // 5 files per API call, or distribute evenly
1159
+
1160
+ for (let i = 0; i < files.length; i += apiChunkSize) {
1161
+ apiChunks.push(files.slice(i, i + apiChunkSize));
1162
+ }
1163
+
1164
+ console.log(
1165
+ ` 🚀 Processing ${apiChunks.length} API calls with ${apiChunkSize} files each (max ${maxConcurrency} concurrent)`,
1166
+ );
1167
+
1168
+ // Process API chunks with controlled concurrency
1169
+ const concurrentChunks = [];
1170
+ for (let i = 0; i < apiChunks.length; i += maxConcurrency) {
1171
+ concurrentChunks.push(apiChunks.slice(i, i + maxConcurrency));
1172
+ }
1173
+
1174
+ for (const concurrentSet of concurrentChunks) {
1175
+ const batchPromises = concurrentSet.map(async (chunk) => {
1176
+ return await this.#processRfcApiBatch(
1177
+ chunk,
1178
+ uploadService,
1179
+ supabase,
1180
+ options,
1181
+ fs,
1182
+ );
1183
+ });
1184
+
1185
+ // Wait for all concurrent batches to complete
1186
+ const batchResults = await Promise.allSettled(batchPromises);
1187
+
1188
+ // Count results
1189
+ for (const result of batchResults) {
1190
+ if (result.status === 'fulfilled') {
1191
+ processed += result.value.processed;
1192
+ uploaded += result.value.uploaded;
1193
+ errors += result.value.errors;
1194
+ } else {
1195
+ errors += result.value?.processed || 0;
1196
+ }
1197
+ }
1198
+ }
1199
+ }
1200
+
1201
+ return { processed, uploaded, errors };
1202
+ }
1203
+
1204
+ /**
1205
+ * Process a single file for RFC upload (Supabase mode)
1206
+ */
1207
+ async #processRfcSingleFile(file, uploadService, supabase, options, fs) {
1208
+ try {
1209
+ // Check if file exists
1210
+ if (!fs.existsSync(file.original_path)) {
1211
+ logger.warn(
1212
+ `File not found: ${file.filename} at ${file.original_path}`,
1213
+ );
1214
+ await supabase
1215
+ .from('uploader')
1216
+ .update({
1217
+ status: 'file-not-found',
1218
+ message: 'File no longer exists at original path',
1219
+ })
1220
+ .eq('id', file.id);
1221
+ return { success: false, error: 'File not found' };
1222
+ }
1223
+
1224
+ // Supabase requires single file upload with uploadPath
1225
+ let uploadPath;
1226
+ if (options.folderStructure && file.arela_path) {
1227
+ uploadPath = `uploads/${options.folderStructure}/${file.arela_path}${file.filename}`;
1228
+ } else if (file.arela_path) {
1229
+ uploadPath = `uploads/${file.arela_path}${file.filename}`;
1230
+ } else {
1231
+ uploadPath = `uploads/${file.rfc}/${file.filename}`;
1232
+ }
1233
+
1234
+ const uploadResult = await uploadService.upload(
1235
+ [
1236
+ {
1237
+ path: file.original_path,
1238
+ name: file.filename,
1239
+ contentType: 'application/octet-stream',
1240
+ },
1241
+ ],
1242
+ { uploadPath: uploadPath },
1243
+ );
1244
+
1245
+ // Update database status
1246
+ await supabase
1247
+ .from('uploader')
1248
+ .update({
1249
+ status: 'file-uploaded',
1250
+ message: 'Successfully uploaded to Supabase',
1251
+ })
1252
+ .eq('id', file.id);
1253
+
1254
+ logger.info(`✅ Uploaded: ${file.filename}`);
1255
+ return { success: true, filename: file.filename };
1256
+ } catch (error) {
1257
+ logger.error(
1258
+ `❌ Error processing file ${file.filename}: ${error.message}`,
1259
+ );
1260
+
1261
+ await supabase
1262
+ .from('uploader')
1263
+ .update({
1264
+ status: 'upload-error',
1265
+ message: `Processing error: ${error.message}`,
1266
+ })
1267
+ .eq('id', file.id);
1268
+
1269
+ return { success: false, error: error.message, filename: file.filename };
1270
+ }
1271
+ }
1272
+
1273
+ /**
1274
+ * Process multiple files in a single API batch call (API service mode)
1275
+ */
1276
+ async #processRfcApiBatch(files, uploadService, supabase, options, fs) {
1277
+ let processed = 0;
1278
+ let uploaded = 0;
1279
+ let errors = 0;
1280
+
1281
+ try {
1282
+ // Prepare files for batch upload
1283
+ const validFiles = [];
1284
+ const invalidFiles = [];
1285
+
1286
+ for (const file of files) {
1287
+ processed++;
1288
+
1289
+ if (!fs.existsSync(file.original_path)) {
1290
+ logger.warn(
1291
+ `File not found: ${file.filename} at ${file.original_path}`,
1292
+ );
1293
+ invalidFiles.push(file);
1294
+ continue;
1295
+ }
1296
+
1297
+ validFiles.push({
1298
+ fileData: {
1299
+ path: file.original_path,
1300
+ name: file.filename,
1301
+ contentType: 'application/octet-stream',
1302
+ },
1303
+ dbRecord: file,
1304
+ });
1305
+ }
1306
+
1307
+ // Update invalid files in database
1308
+ for (const file of invalidFiles) {
1309
+ await supabase
1310
+ .from('uploader')
1311
+ .update({
1312
+ status: 'file-not-found',
1313
+ message: 'File no longer exists at original path',
1314
+ })
1315
+ .eq('id', file.id);
1316
+ errors++;
1317
+ }
1318
+
1319
+ // Process valid files in batch if any exist
1320
+ if (validFiles.length > 0) {
1321
+ // Determine folder structure (all files in this batch should have same arela_path)
1322
+ const sampleFile = validFiles[0].dbRecord;
1323
+ let fullFolderStructure;
1324
+ if (options.folderStructure && sampleFile.arela_path) {
1325
+ fullFolderStructure = `${options.folderStructure}/${sampleFile.arela_path}`;
1326
+ } else if (sampleFile.arela_path) {
1327
+ fullFolderStructure = sampleFile.arela_path;
1328
+ } else {
1329
+ fullFolderStructure = `${sampleFile.rfc}/`;
1330
+ }
1331
+
1332
+ // Make single API call with multiple files
1333
+ const uploadResult = await uploadService.upload(
1334
+ validFiles.map((f) => f.fileData),
1335
+ { folderStructure: fullFolderStructure },
1336
+ );
1337
+
1338
+ if (uploadResult.success) {
1339
+ // Update all files as uploaded
1340
+ const fileIds = validFiles.map((f) => f.dbRecord.id);
1341
+ await supabase
1342
+ .from('uploader')
1343
+ .update({
1344
+ status: 'file-uploaded',
1345
+ message: 'Successfully uploaded to Arela API (batch)',
1346
+ })
1347
+ .in('id', fileIds);
1348
+
1349
+ uploaded += validFiles.length;
1350
+ logger.info(
1351
+ `✅ Batch uploaded: ${validFiles.length} files to ${fullFolderStructure}`,
1352
+ );
1353
+ } else {
1354
+ // Update all files as failed
1355
+ const fileIds = validFiles.map((f) => f.dbRecord.id);
1356
+ await supabase
1357
+ .from('uploader')
1358
+ .update({
1359
+ status: 'upload-error',
1360
+ message: uploadResult.error || 'Batch upload failed',
1361
+ })
1362
+ .in('id', fileIds);
1363
+
1364
+ errors += validFiles.length;
1365
+ logger.error(
1366
+ `❌ Batch upload failed: ${validFiles.length} files - ${uploadResult.error}`,
1367
+ );
1368
+ }
1369
+ }
1370
+ } catch (error) {
1371
+ logger.error(`❌ Error processing batch: ${error.message}`);
1372
+
1373
+ // Mark all files as failed
1374
+ const fileIds = files.map((f) => f.id);
1375
+ await supabase
1376
+ .from('uploader')
1377
+ .update({
1378
+ status: 'upload-error',
1379
+ message: `Batch processing error: ${error.message}`,
1380
+ })
1381
+ .in('id', fileIds);
1382
+
1383
+ errors += files.length;
1384
+ }
1385
+
1386
+ return { processed, uploaded, errors };
1387
+ }
1136
1388
  }
1137
1389
 
1138
1390
  // Export singleton instance
@@ -1,6 +1,8 @@
1
1
  import { Blob } from 'buffer';
2
2
  import { FormData } from 'formdata-node';
3
3
  import fs from 'fs';
4
+ import { Agent } from 'http';
5
+ import { Agent as HttpsAgent } from 'https';
4
6
  import fetch from 'node-fetch';
5
7
  import path from 'path';
6
8
 
@@ -16,6 +18,36 @@ export class ApiUploadService extends BaseUploadService {
16
18
  super();
17
19
  this.baseUrl = appConfig.api.baseUrl;
18
20
  this.token = appConfig.api.token;
21
+
22
+ // Get API connection settings from config/environment
23
+ const maxApiConnections = parseInt(process.env.MAX_API_CONNECTIONS) || 10;
24
+ const connectionTimeout =
25
+ parseInt(process.env.API_CONNECTION_TIMEOUT) || 60000;
26
+
27
+ // Initialize HTTP agents optimized for multiple API replicas
28
+ this.httpAgent = new Agent({
29
+ keepAlive: true,
30
+ keepAliveMsecs: 30000,
31
+ maxSockets: maxApiConnections, // Match your API replica count
32
+ maxFreeSockets: Math.ceil(maxApiConnections / 2),
33
+ maxTotalSockets: maxApiConnections + 5, // Buffer for peak usage
34
+ timeout: connectionTimeout,
35
+ scheduling: 'fifo', // First-in-first-out scheduling
36
+ });
37
+
38
+ this.httpsAgent = new HttpsAgent({
39
+ keepAlive: true,
40
+ keepAliveMsecs: 30000,
41
+ maxSockets: maxApiConnections, // Match your API replica count
42
+ maxFreeSockets: Math.ceil(maxApiConnections / 2),
43
+ maxTotalSockets: maxApiConnections + 5, // Buffer for peak usage
44
+ timeout: connectionTimeout,
45
+ scheduling: 'fifo', // First-in-first-out scheduling
46
+ });
47
+
48
+ console.log(
49
+ `🔗 HTTP Agent configured for ${maxApiConnections} concurrent API connections`,
50
+ );
19
51
  }
20
52
 
21
53
  /**
@@ -27,12 +59,31 @@ export class ApiUploadService extends BaseUploadService {
27
59
  async upload(files, options) {
28
60
  const formData = new FormData();
29
61
 
30
- // Add files to form data
31
- files.forEach((file) => {
32
- const fileBuffer = fs.readFileSync(file.path);
33
- const blob = new Blob([fileBuffer], { type: file.contentType });
34
- formData.append('files', blob, file.name);
35
- });
62
+ // Add files to form data asynchronously
63
+ for (const file of files) {
64
+ try {
65
+ // Check file size for streaming vs buffer approach
66
+ const stats = await fs.promises.stat(file.path);
67
+ const fileSizeThreshold = 10 * 1024 * 1024; // 10MB threshold
68
+
69
+ if (stats.size > fileSizeThreshold) {
70
+ // Use streaming for large files
71
+ const fileStream = fs.createReadStream(file.path);
72
+ formData.append('files', fileStream, {
73
+ filename: file.name,
74
+ contentType: file.contentType,
75
+ knownLength: stats.size,
76
+ });
77
+ } else {
78
+ // Use buffer for smaller files
79
+ const fileBuffer = await fs.promises.readFile(file.path);
80
+ const blob = new Blob([fileBuffer], { type: file.contentType });
81
+ formData.append('files', blob, file.name);
82
+ }
83
+ } catch (error) {
84
+ throw new Error(`Failed to read file ${file.path}: ${error.message}`);
85
+ }
86
+ }
36
87
 
37
88
  // Add configuration parameters
38
89
  if (appConfig.supabase.bucket) {
@@ -61,6 +112,7 @@ export class ApiUploadService extends BaseUploadService {
61
112
  formData.append('clientVersion', appConfig.packageVersion);
62
113
 
63
114
  try {
115
+ const isHttps = this.baseUrl.startsWith('https');
64
116
  const response = await fetch(
65
117
  `${this.baseUrl}/api/storage/batch-upload-and-process`,
66
118
  {
@@ -69,6 +121,7 @@ export class ApiUploadService extends BaseUploadService {
69
121
  'x-api-key': this.token,
70
122
  },
71
123
  body: formData,
124
+ agent: isHttps ? this.httpsAgent : this.httpAgent,
72
125
  },
73
126
  );
74
127
 
@@ -99,10 +152,12 @@ export class ApiUploadService extends BaseUploadService {
99
152
  }
100
153
 
101
154
  try {
155
+ const isHttps = this.baseUrl.startsWith('https');
102
156
  const response = await fetch(`${this.baseUrl}/api/health`, {
103
157
  headers: {
104
158
  'x-api-key': this.token,
105
159
  },
160
+ agent: isHttps ? this.httpsAgent : this.httpAgent,
106
161
  });
107
162
 
108
163
  return response.ok;