@arela/uploader 0.2.7 → 0.2.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/.env.template CHANGED
@@ -1,20 +1,74 @@
1
- # Test environment configuration for arela-uploader
2
- # Copy this to .env and update with your actual values
1
+ # Arela Uploader Environment Configuration
2
+ # Copy this to your .env file and adjust values for your setup
3
3
 
4
- # Supabase Configuration
5
- SUPABASE_URL=https://your-project.supabase.co
6
- SUPABASE_KEY=your-supabase-anon-key
7
- SUPABASE_BUCKET=your-bucket-name
4
+ # =============================================================================
5
+ # BASIC CONFIGURATION
6
+ # =============================================================================
8
7
 
9
8
  # Arela API Configuration
10
9
  ARELA_API_URL=https://your-arela-api-url.com
11
- ARELA_API_TOKEN=your-api-token
10
+ ARELA_API_TOKEN=your-api-token-here
11
+
12
+ # Supabase Configuration (fallback)
13
+ SUPABASE_URL=https://your-supabase-url.supabase.co
14
+ SUPABASE_KEY=your-supabase-key-here
15
+ SUPABASE_BUCKET=your-bucket-name
12
16
 
13
- # Upload Configuration
14
- UPLOAD_BASE_PATH=/Users/your-username/documents
17
+ # Upload Sources (separate with |)
18
+ UPLOAD_BASE_PATH=/path/to/your/upload/base
15
19
  UPLOAD_SOURCES=folder1|folder2|folder3
20
+ UPLOAD_RFCS=rfc1|rfc2|rfc3
21
+
22
+ # =============================================================================
23
+ # PERFORMANCE OPTIMIZATION FOR MULTIPLE API REPLICAS
24
+ # =============================================================================
25
+
26
+ # API Connection Configuration
27
+ # Set this to match your number of API replicas (e.g., if you have 10 API instances, set to 10)
28
+ MAX_API_CONNECTIONS=10
29
+
30
+ # API Connection Timeout (milliseconds)
31
+ API_CONNECTION_TIMEOUT=60000
32
+
33
+ # Batch Processing Configuration
34
+ # Files processed concurrently per batch (should be >= MAX_API_CONNECTIONS for best performance)
35
+ BATCH_SIZE=100
36
+
37
+ # Delay between batches (0 for maximum speed)
38
+ BATCH_DELAY=0
39
+
40
+ # Source Processing Concurrency
41
+ # Number of upload sources/folders to process simultaneously
42
+ MAX_CONCURRENT_SOURCES=2
43
+
44
+ # =============================================================================
45
+ # EXAMPLE CONFIGURATIONS FOR DIFFERENT SCENARIOS
46
+ # =============================================================================
47
+
48
+ # For 10 API Replicas (High Performance Setup):
49
+ # MAX_API_CONNECTIONS=10
50
+ # BATCH_SIZE=100
51
+ # MAX_CONCURRENT_SOURCES=3
52
+ # BATCH_DELAY=0
53
+
54
+ # For 5 API Replicas (Medium Performance Setup):
55
+ # MAX_API_CONNECTIONS=5
56
+ # BATCH_SIZE=50
57
+ # MAX_CONCURRENT_SOURCES=2
58
+ # BATCH_DELAY=0
59
+
60
+ # For 1 API Instance (Single Instance Setup):
61
+ # MAX_API_CONNECTIONS=5
62
+ # BATCH_SIZE=20
63
+ # MAX_CONCURRENT_SOURCES=1
64
+ # BATCH_DELAY=100
65
+
66
+ # =============================================================================
67
+ # LOGGING AND MONITORING
68
+ # =============================================================================
69
+
70
+ # Progress bar update frequency
71
+ PROGRESS_UPDATE_INTERVAL=10
16
72
 
17
- # RFC Upload Configuration
18
- # Pipe-separated list of RFCs to upload files for
19
- # Example: MMJ0810145N1|ABC1234567XY|DEF9876543ZZ
20
- UPLOAD_RFCS=RFC1|RFC2|RFC3
73
+ # Enable verbose logging (true/false)
74
+ VERBOSE_LOGGING=false
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@arela/uploader",
3
- "version": "0.2.7",
3
+ "version": "0.2.9",
4
4
  "description": "CLI to upload files/directories to Arela",
5
5
  "bin": {
6
6
  "arela": "./src/index.js"
@@ -50,7 +50,7 @@ export class UploadCommand {
50
50
  logger.info('Log file cleared');
51
51
  }
52
52
 
53
- // Process each source
53
+ // Process each source with configurable concurrency
54
54
  let globalResults = {
55
55
  successCount: 0,
56
56
  detectedCount: 0,
@@ -59,28 +59,88 @@ export class UploadCommand {
59
59
  skippedCount: 0,
60
60
  };
61
61
 
62
- for (const source of sources) {
63
- const sourcePath = path.resolve(basePath, source).replace(/\\/g, '/');
64
- logger.info(`Processing folder: ${sourcePath}`);
65
-
66
- try {
67
- const files = await this.#discoverFiles(sourcePath);
68
- logger.info(`Found ${files.length} files to process`);
69
-
70
- const result = await this.#processFilesInBatches(
71
- files,
72
- options,
73
- uploadService,
74
- basePath,
75
- source,
76
- sourcePath,
77
- );
62
+ // Determine processing strategy based on configuration
63
+ const maxConcurrentSources =
64
+ appConfig.performance?.maxConcurrentSources || 1;
65
+
66
+ if (maxConcurrentSources > 1 && sources.length > 1) {
67
+ // Parallel source processing
68
+ logger.info(
69
+ `Processing ${sources.length} sources with concurrency: ${maxConcurrentSources}`,
70
+ );
78
71
 
79
- this.#updateGlobalResults(globalResults, result);
80
- this.#logSourceSummary(source, result, options);
81
- } catch (error) {
82
- this.errorHandler.handleError(error, { source, sourcePath });
83
- globalResults.failureCount++;
72
+ // Process sources in batches to control concurrency
73
+ for (let i = 0; i < sources.length; i += maxConcurrentSources) {
74
+ const sourceBatch = sources.slice(i, i + maxConcurrentSources);
75
+
76
+ const sourcePromises = sourceBatch.map(async (source) => {
77
+ const sourcePath = path
78
+ .resolve(basePath, source)
79
+ .replace(/\\/g, '/');
80
+ logger.info(`Processing folder: ${sourcePath}`);
81
+
82
+ try {
83
+ const files = await this.#discoverFiles(sourcePath);
84
+ logger.info(`Found ${files.length} files in ${source}`);
85
+
86
+ const result = await this.#processFilesInBatches(
87
+ files,
88
+ options,
89
+ uploadService,
90
+ basePath,
91
+ source,
92
+ sourcePath,
93
+ );
94
+
95
+ this.#logSourceSummary(source, result, options);
96
+ return { success: true, source, result };
97
+ } catch (error) {
98
+ this.errorHandler.handleError(error, { source, sourcePath });
99
+ return { success: false, source, error: error.message };
100
+ }
101
+ });
102
+
103
+ // Wait for this batch of sources to complete
104
+ const results = await Promise.allSettled(sourcePromises);
105
+
106
+ results.forEach((result) => {
107
+ if (result.status === 'fulfilled') {
108
+ const sourceResult = result.value;
109
+ if (sourceResult.success) {
110
+ this.#updateGlobalResults(globalResults, sourceResult.result);
111
+ } else {
112
+ globalResults.failureCount++;
113
+ }
114
+ } else {
115
+ globalResults.failureCount++;
116
+ }
117
+ });
118
+ }
119
+ } else {
120
+ // Sequential source processing (original behavior)
121
+ for (const source of sources) {
122
+ const sourcePath = path.resolve(basePath, source).replace(/\\/g, '/');
123
+ logger.info(`Processing folder: ${sourcePath}`);
124
+
125
+ try {
126
+ const files = await this.#discoverFiles(sourcePath);
127
+ logger.info(`Found ${files.length} files to process`);
128
+
129
+ const result = await this.#processFilesInBatches(
130
+ files,
131
+ options,
132
+ uploadService,
133
+ basePath,
134
+ source,
135
+ sourcePath,
136
+ );
137
+
138
+ this.#updateGlobalResults(globalResults, result);
139
+ this.#logSourceSummary(source, result, options);
140
+ } catch (error) {
141
+ this.errorHandler.handleError(error, { source, sourcePath });
142
+ globalResults.failureCount++;
143
+ }
84
144
  }
85
145
  }
86
146
 
@@ -164,7 +224,8 @@ export class UploadCommand {
164
224
  source,
165
225
  sourcePath,
166
226
  ) {
167
- const batchSize = parseInt(options.batchSize) || 10;
227
+ const batchSize =
228
+ parseInt(options.batchSize) || appConfig.performance.batchSize || 50;
168
229
  const results = {
169
230
  successCount: 0,
170
231
  detectedCount: 0,
@@ -184,6 +245,9 @@ export class UploadCommand {
184
245
  barCompleteChar: 'ā–ˆ',
185
246
  barIncompleteChar: 'ā–‘',
186
247
  hideCursor: true,
248
+ clearOnComplete: false,
249
+ stopOnComplete: true,
250
+ stream: process.stderr, // Use stderr to separate from stdout logging
187
251
  });
188
252
 
189
253
  progressBar.start(files.length, 0, { success: 0, errors: 0 });
@@ -265,22 +329,65 @@ export class UploadCommand {
265
329
  throw new Error(`Failed to insert stats: ${error.message}`);
266
330
  }
267
331
  } else {
268
- // Upload mode: process files for upload
269
- for (const filePath of batch) {
270
- try {
271
- await this.#processFile(
272
- filePath,
273
- options,
274
- uploadService,
275
- basePath,
276
- processedPaths,
277
- batchResults,
278
- );
279
- } catch (error) {
280
- this.errorHandler.handleError(error, { filePath });
281
- batchResults.failureCount++;
332
+ // Upload mode: process files with controlled concurrency to match API replicas
333
+ const maxConcurrentApiCalls =
334
+ appConfig.performance?.maxApiConnections || 10;
335
+
336
+ // Process batch in chunks to respect API replica limits
337
+ const allResults = [];
338
+ for (let i = 0; i < batch.length; i += maxConcurrentApiCalls) {
339
+ const chunk = batch.slice(i, i + maxConcurrentApiCalls);
340
+
341
+ // Process this chunk concurrently (up to API replica count)
342
+ const chunkPromises = chunk.map(async (filePath) => {
343
+ try {
344
+ const result = await this.#processFile(
345
+ filePath,
346
+ options,
347
+ uploadService,
348
+ basePath,
349
+ processedPaths,
350
+ );
351
+ return { success: true, filePath, result };
352
+ } catch (error) {
353
+ this.errorHandler.handleError(error, { filePath });
354
+ return { success: false, filePath, error: error.message };
355
+ }
356
+ });
357
+
358
+ // Wait for this chunk to complete before starting the next
359
+ const chunkResults = await Promise.allSettled(chunkPromises);
360
+ allResults.push(...chunkResults);
361
+
362
+ // Small delay between chunks to prevent overwhelming API
363
+ if (i + maxConcurrentApiCalls < batch.length) {
364
+ await new Promise((resolve) => setTimeout(resolve, 50));
282
365
  }
283
366
  }
367
+
368
+ // Process all results and update batch results
369
+ allResults.forEach((result) => {
370
+ if (result.status === 'fulfilled') {
371
+ const fileResult = result.value;
372
+ if (fileResult.success) {
373
+ if (fileResult.result && fileResult.result.skipped) {
374
+ batchResults.skippedCount++;
375
+ } else {
376
+ batchResults.successCount++;
377
+ if (fileResult.result && fileResult.result.detectedCount) {
378
+ batchResults.detectedCount += fileResult.result.detectedCount;
379
+ }
380
+ if (fileResult.result && fileResult.result.organizedCount) {
381
+ batchResults.organizedCount += fileResult.result.organizedCount;
382
+ }
383
+ }
384
+ } else {
385
+ batchResults.failureCount++;
386
+ }
387
+ } else {
388
+ batchResults.failureCount++;
389
+ }
390
+ });
284
391
  }
285
392
 
286
393
  return batchResults;
@@ -296,12 +403,10 @@ export class UploadCommand {
296
403
  uploadService,
297
404
  basePath,
298
405
  processedPaths,
299
- batchResults,
300
406
  ) {
301
407
  // Skip if already processed
302
408
  if (processedPaths.has(filePath)) {
303
- batchResults.skippedCount++;
304
- return;
409
+ return { skipped: true };
305
410
  }
306
411
 
307
412
  // Prepare file for upload
@@ -325,24 +430,25 @@ export class UploadCommand {
325
430
  };
326
431
 
327
432
  // Upload based on service type
433
+ let result = { successCount: 1 };
434
+
328
435
  if (uploadService.getServiceName() === 'Arela API') {
329
- const result = await uploadService.upload([fileObject], {
436
+ result = await uploadService.upload([fileObject], {
330
437
  ...options,
331
438
  uploadPath,
332
439
  });
333
-
334
- batchResults.successCount++;
335
- if (result.detectedCount)
336
- batchResults.detectedCount += result.detectedCount;
337
- if (result.organizedCount)
338
- batchResults.organizedCount += result.organizedCount;
339
440
  } else {
340
441
  // Supabase direct upload
341
442
  await uploadService.upload([fileObject], { uploadPath });
342
- batchResults.successCount++;
343
443
  }
344
444
 
345
445
  logger.info(`SUCCESS: ${path.basename(filePath)} -> ${uploadPath}`);
446
+
447
+ return {
448
+ skipped: false,
449
+ detectedCount: result.detectedCount || 0,
450
+ organizedCount: result.organizedCount || 0,
451
+ };
346
452
  }
347
453
 
348
454
  /**
@@ -428,7 +534,8 @@ export class UploadCommand {
428
534
  // Phase 2: PDF Detection
429
535
  console.log('\nšŸ” === PHASE 2: PDF Detection ===');
430
536
  const detectionResult = await databaseService.detectPedimentosInDatabase({
431
- batchSize: parseInt(options.batchSize) || 10,
537
+ batchSize:
538
+ parseInt(options.batchSize) || appConfig.performance.batchSize || 50,
432
539
  });
433
540
  console.log(
434
541
  `āœ… Phase 2 Complete: ${detectionResult.detectedCount} detected, ${detectionResult.errorCount} errors`,
@@ -28,10 +28,10 @@ class Config {
28
28
  const __dirname = path.dirname(__filename);
29
29
  const packageJsonPath = path.resolve(__dirname, '../../package.json');
30
30
  const packageJson = JSON.parse(fs.readFileSync(packageJsonPath, 'utf-8'));
31
- return packageJson.version || '0.2.7';
31
+ return packageJson.version || '0.2.9';
32
32
  } catch (error) {
33
33
  console.warn('āš ļø Could not read package.json version, using fallback');
34
- return '0.2.7';
34
+ return '0.2.9';
35
35
  }
36
36
  }
37
37
 
@@ -85,7 +85,12 @@ class Config {
85
85
  */
86
86
  #loadPerformanceConfig() {
87
87
  return {
88
- batchDelay: parseInt(process.env.BATCH_DELAY) || 100,
88
+ batchDelay: parseInt(process.env.BATCH_DELAY) || 0, // Removed default delay
89
+ batchSize: parseInt(process.env.BATCH_SIZE) || 50, // Increased from 10 to 50
90
+ maxConcurrentSources: parseInt(process.env.MAX_CONCURRENT_SOURCES) || 2,
91
+ maxApiConnections: parseInt(process.env.MAX_API_CONNECTIONS) || 10, // New: API replica support
92
+ apiConnectionTimeout:
93
+ parseInt(process.env.API_CONNECTION_TIMEOUT) || 60000, // New: API timeout
89
94
  progressUpdateInterval:
90
95
  parseInt(process.env.PROGRESS_UPDATE_INTERVAL) || 10,
91
96
  logBufferSize: 100,