@arela/uploader 0.2.7 ā 0.2.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.template +67 -13
- package/package.json +1 -1
- package/src/commands/UploadCommand.js +156 -49
- package/src/config/config.js +8 -3
- package/src/services/DatabaseService.js +623 -156
- package/src/services/upload/ApiUploadService.js +70 -7
- package/src/services/upload/SupabaseUploadService.js +2 -2
package/.env.template
CHANGED
|
@@ -1,20 +1,74 @@
|
|
|
1
|
-
#
|
|
2
|
-
# Copy this to .env and
|
|
1
|
+
# Arela Uploader Environment Configuration
|
|
2
|
+
# Copy this to your .env file and adjust values for your setup
|
|
3
3
|
|
|
4
|
-
#
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
SUPABASE_BUCKET=your-bucket-name
|
|
4
|
+
# =============================================================================
|
|
5
|
+
# BASIC CONFIGURATION
|
|
6
|
+
# =============================================================================
|
|
8
7
|
|
|
9
8
|
# Arela API Configuration
|
|
10
9
|
ARELA_API_URL=https://your-arela-api-url.com
|
|
11
|
-
ARELA_API_TOKEN=your-api-token
|
|
10
|
+
ARELA_API_TOKEN=your-api-token-here
|
|
11
|
+
|
|
12
|
+
# Supabase Configuration (fallback)
|
|
13
|
+
SUPABASE_URL=https://your-supabase-url.supabase.co
|
|
14
|
+
SUPABASE_KEY=your-supabase-key-here
|
|
15
|
+
SUPABASE_BUCKET=your-bucket-name
|
|
12
16
|
|
|
13
|
-
# Upload
|
|
14
|
-
UPLOAD_BASE_PATH=/
|
|
17
|
+
# Upload Sources (separate with |)
|
|
18
|
+
UPLOAD_BASE_PATH=/path/to/your/upload/base
|
|
15
19
|
UPLOAD_SOURCES=folder1|folder2|folder3
|
|
20
|
+
UPLOAD_RFCS=rfc1|rfc2|rfc3
|
|
21
|
+
|
|
22
|
+
# =============================================================================
|
|
23
|
+
# PERFORMANCE OPTIMIZATION FOR MULTIPLE API REPLICAS
|
|
24
|
+
# =============================================================================
|
|
25
|
+
|
|
26
|
+
# API Connection Configuration
|
|
27
|
+
# Set this to match your number of API replicas (e.g., if you have 10 API instances, set to 10)
|
|
28
|
+
MAX_API_CONNECTIONS=10
|
|
29
|
+
|
|
30
|
+
# API Connection Timeout (milliseconds)
|
|
31
|
+
API_CONNECTION_TIMEOUT=60000
|
|
32
|
+
|
|
33
|
+
# Batch Processing Configuration
|
|
34
|
+
# Files processed concurrently per batch (should be >= MAX_API_CONNECTIONS for best performance)
|
|
35
|
+
BATCH_SIZE=100
|
|
36
|
+
|
|
37
|
+
# Delay between batches (0 for maximum speed)
|
|
38
|
+
BATCH_DELAY=0
|
|
39
|
+
|
|
40
|
+
# Source Processing Concurrency
|
|
41
|
+
# Number of upload sources/folders to process simultaneously
|
|
42
|
+
MAX_CONCURRENT_SOURCES=2
|
|
43
|
+
|
|
44
|
+
# =============================================================================
|
|
45
|
+
# EXAMPLE CONFIGURATIONS FOR DIFFERENT SCENARIOS
|
|
46
|
+
# =============================================================================
|
|
47
|
+
|
|
48
|
+
# For 10 API Replicas (High Performance Setup):
|
|
49
|
+
# MAX_API_CONNECTIONS=10
|
|
50
|
+
# BATCH_SIZE=100
|
|
51
|
+
# MAX_CONCURRENT_SOURCES=3
|
|
52
|
+
# BATCH_DELAY=0
|
|
53
|
+
|
|
54
|
+
# For 5 API Replicas (Medium Performance Setup):
|
|
55
|
+
# MAX_API_CONNECTIONS=5
|
|
56
|
+
# BATCH_SIZE=50
|
|
57
|
+
# MAX_CONCURRENT_SOURCES=2
|
|
58
|
+
# BATCH_DELAY=0
|
|
59
|
+
|
|
60
|
+
# For 1 API Instance (Single Instance Setup):
|
|
61
|
+
# MAX_API_CONNECTIONS=5
|
|
62
|
+
# BATCH_SIZE=20
|
|
63
|
+
# MAX_CONCURRENT_SOURCES=1
|
|
64
|
+
# BATCH_DELAY=100
|
|
65
|
+
|
|
66
|
+
# =============================================================================
|
|
67
|
+
# LOGGING AND MONITORING
|
|
68
|
+
# =============================================================================
|
|
69
|
+
|
|
70
|
+
# Progress bar update frequency
|
|
71
|
+
PROGRESS_UPDATE_INTERVAL=10
|
|
16
72
|
|
|
17
|
-
#
|
|
18
|
-
|
|
19
|
-
# Example: MMJ0810145N1|ABC1234567XY|DEF9876543ZZ
|
|
20
|
-
UPLOAD_RFCS=RFC1|RFC2|RFC3
|
|
73
|
+
# Enable verbose logging (true/false)
|
|
74
|
+
VERBOSE_LOGGING=false
|
package/package.json
CHANGED
|
@@ -50,7 +50,7 @@ export class UploadCommand {
|
|
|
50
50
|
logger.info('Log file cleared');
|
|
51
51
|
}
|
|
52
52
|
|
|
53
|
-
// Process each source
|
|
53
|
+
// Process each source with configurable concurrency
|
|
54
54
|
let globalResults = {
|
|
55
55
|
successCount: 0,
|
|
56
56
|
detectedCount: 0,
|
|
@@ -59,28 +59,88 @@ export class UploadCommand {
|
|
|
59
59
|
skippedCount: 0,
|
|
60
60
|
};
|
|
61
61
|
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
files,
|
|
72
|
-
options,
|
|
73
|
-
uploadService,
|
|
74
|
-
basePath,
|
|
75
|
-
source,
|
|
76
|
-
sourcePath,
|
|
77
|
-
);
|
|
62
|
+
// Determine processing strategy based on configuration
|
|
63
|
+
const maxConcurrentSources =
|
|
64
|
+
appConfig.performance?.maxConcurrentSources || 1;
|
|
65
|
+
|
|
66
|
+
if (maxConcurrentSources > 1 && sources.length > 1) {
|
|
67
|
+
// Parallel source processing
|
|
68
|
+
logger.info(
|
|
69
|
+
`Processing ${sources.length} sources with concurrency: ${maxConcurrentSources}`,
|
|
70
|
+
);
|
|
78
71
|
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
72
|
+
// Process sources in batches to control concurrency
|
|
73
|
+
for (let i = 0; i < sources.length; i += maxConcurrentSources) {
|
|
74
|
+
const sourceBatch = sources.slice(i, i + maxConcurrentSources);
|
|
75
|
+
|
|
76
|
+
const sourcePromises = sourceBatch.map(async (source) => {
|
|
77
|
+
const sourcePath = path
|
|
78
|
+
.resolve(basePath, source)
|
|
79
|
+
.replace(/\\/g, '/');
|
|
80
|
+
logger.info(`Processing folder: ${sourcePath}`);
|
|
81
|
+
|
|
82
|
+
try {
|
|
83
|
+
const files = await this.#discoverFiles(sourcePath);
|
|
84
|
+
logger.info(`Found ${files.length} files in ${source}`);
|
|
85
|
+
|
|
86
|
+
const result = await this.#processFilesInBatches(
|
|
87
|
+
files,
|
|
88
|
+
options,
|
|
89
|
+
uploadService,
|
|
90
|
+
basePath,
|
|
91
|
+
source,
|
|
92
|
+
sourcePath,
|
|
93
|
+
);
|
|
94
|
+
|
|
95
|
+
this.#logSourceSummary(source, result, options);
|
|
96
|
+
return { success: true, source, result };
|
|
97
|
+
} catch (error) {
|
|
98
|
+
this.errorHandler.handleError(error, { source, sourcePath });
|
|
99
|
+
return { success: false, source, error: error.message };
|
|
100
|
+
}
|
|
101
|
+
});
|
|
102
|
+
|
|
103
|
+
// Wait for this batch of sources to complete
|
|
104
|
+
const results = await Promise.allSettled(sourcePromises);
|
|
105
|
+
|
|
106
|
+
results.forEach((result) => {
|
|
107
|
+
if (result.status === 'fulfilled') {
|
|
108
|
+
const sourceResult = result.value;
|
|
109
|
+
if (sourceResult.success) {
|
|
110
|
+
this.#updateGlobalResults(globalResults, sourceResult.result);
|
|
111
|
+
} else {
|
|
112
|
+
globalResults.failureCount++;
|
|
113
|
+
}
|
|
114
|
+
} else {
|
|
115
|
+
globalResults.failureCount++;
|
|
116
|
+
}
|
|
117
|
+
});
|
|
118
|
+
}
|
|
119
|
+
} else {
|
|
120
|
+
// Sequential source processing (original behavior)
|
|
121
|
+
for (const source of sources) {
|
|
122
|
+
const sourcePath = path.resolve(basePath, source).replace(/\\/g, '/');
|
|
123
|
+
logger.info(`Processing folder: ${sourcePath}`);
|
|
124
|
+
|
|
125
|
+
try {
|
|
126
|
+
const files = await this.#discoverFiles(sourcePath);
|
|
127
|
+
logger.info(`Found ${files.length} files to process`);
|
|
128
|
+
|
|
129
|
+
const result = await this.#processFilesInBatches(
|
|
130
|
+
files,
|
|
131
|
+
options,
|
|
132
|
+
uploadService,
|
|
133
|
+
basePath,
|
|
134
|
+
source,
|
|
135
|
+
sourcePath,
|
|
136
|
+
);
|
|
137
|
+
|
|
138
|
+
this.#updateGlobalResults(globalResults, result);
|
|
139
|
+
this.#logSourceSummary(source, result, options);
|
|
140
|
+
} catch (error) {
|
|
141
|
+
this.errorHandler.handleError(error, { source, sourcePath });
|
|
142
|
+
globalResults.failureCount++;
|
|
143
|
+
}
|
|
84
144
|
}
|
|
85
145
|
}
|
|
86
146
|
|
|
@@ -164,7 +224,8 @@ export class UploadCommand {
|
|
|
164
224
|
source,
|
|
165
225
|
sourcePath,
|
|
166
226
|
) {
|
|
167
|
-
const batchSize =
|
|
227
|
+
const batchSize =
|
|
228
|
+
parseInt(options.batchSize) || appConfig.performance.batchSize || 50;
|
|
168
229
|
const results = {
|
|
169
230
|
successCount: 0,
|
|
170
231
|
detectedCount: 0,
|
|
@@ -184,6 +245,9 @@ export class UploadCommand {
|
|
|
184
245
|
barCompleteChar: 'ā',
|
|
185
246
|
barIncompleteChar: 'ā',
|
|
186
247
|
hideCursor: true,
|
|
248
|
+
clearOnComplete: false,
|
|
249
|
+
stopOnComplete: true,
|
|
250
|
+
stream: process.stderr, // Use stderr to separate from stdout logging
|
|
187
251
|
});
|
|
188
252
|
|
|
189
253
|
progressBar.start(files.length, 0, { success: 0, errors: 0 });
|
|
@@ -265,22 +329,65 @@ export class UploadCommand {
|
|
|
265
329
|
throw new Error(`Failed to insert stats: ${error.message}`);
|
|
266
330
|
}
|
|
267
331
|
} else {
|
|
268
|
-
// Upload mode: process files
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
332
|
+
// Upload mode: process files with controlled concurrency to match API replicas
|
|
333
|
+
const maxConcurrentApiCalls =
|
|
334
|
+
appConfig.performance?.maxApiConnections || 10;
|
|
335
|
+
|
|
336
|
+
// Process batch in chunks to respect API replica limits
|
|
337
|
+
const allResults = [];
|
|
338
|
+
for (let i = 0; i < batch.length; i += maxConcurrentApiCalls) {
|
|
339
|
+
const chunk = batch.slice(i, i + maxConcurrentApiCalls);
|
|
340
|
+
|
|
341
|
+
// Process this chunk concurrently (up to API replica count)
|
|
342
|
+
const chunkPromises = chunk.map(async (filePath) => {
|
|
343
|
+
try {
|
|
344
|
+
const result = await this.#processFile(
|
|
345
|
+
filePath,
|
|
346
|
+
options,
|
|
347
|
+
uploadService,
|
|
348
|
+
basePath,
|
|
349
|
+
processedPaths,
|
|
350
|
+
);
|
|
351
|
+
return { success: true, filePath, result };
|
|
352
|
+
} catch (error) {
|
|
353
|
+
this.errorHandler.handleError(error, { filePath });
|
|
354
|
+
return { success: false, filePath, error: error.message };
|
|
355
|
+
}
|
|
356
|
+
});
|
|
357
|
+
|
|
358
|
+
// Wait for this chunk to complete before starting the next
|
|
359
|
+
const chunkResults = await Promise.allSettled(chunkPromises);
|
|
360
|
+
allResults.push(...chunkResults);
|
|
361
|
+
|
|
362
|
+
// Small delay between chunks to prevent overwhelming API
|
|
363
|
+
if (i + maxConcurrentApiCalls < batch.length) {
|
|
364
|
+
await new Promise((resolve) => setTimeout(resolve, 50));
|
|
282
365
|
}
|
|
283
366
|
}
|
|
367
|
+
|
|
368
|
+
// Process all results and update batch results
|
|
369
|
+
allResults.forEach((result) => {
|
|
370
|
+
if (result.status === 'fulfilled') {
|
|
371
|
+
const fileResult = result.value;
|
|
372
|
+
if (fileResult.success) {
|
|
373
|
+
if (fileResult.result && fileResult.result.skipped) {
|
|
374
|
+
batchResults.skippedCount++;
|
|
375
|
+
} else {
|
|
376
|
+
batchResults.successCount++;
|
|
377
|
+
if (fileResult.result && fileResult.result.detectedCount) {
|
|
378
|
+
batchResults.detectedCount += fileResult.result.detectedCount;
|
|
379
|
+
}
|
|
380
|
+
if (fileResult.result && fileResult.result.organizedCount) {
|
|
381
|
+
batchResults.organizedCount += fileResult.result.organizedCount;
|
|
382
|
+
}
|
|
383
|
+
}
|
|
384
|
+
} else {
|
|
385
|
+
batchResults.failureCount++;
|
|
386
|
+
}
|
|
387
|
+
} else {
|
|
388
|
+
batchResults.failureCount++;
|
|
389
|
+
}
|
|
390
|
+
});
|
|
284
391
|
}
|
|
285
392
|
|
|
286
393
|
return batchResults;
|
|
@@ -296,12 +403,10 @@ export class UploadCommand {
|
|
|
296
403
|
uploadService,
|
|
297
404
|
basePath,
|
|
298
405
|
processedPaths,
|
|
299
|
-
batchResults,
|
|
300
406
|
) {
|
|
301
407
|
// Skip if already processed
|
|
302
408
|
if (processedPaths.has(filePath)) {
|
|
303
|
-
|
|
304
|
-
return;
|
|
409
|
+
return { skipped: true };
|
|
305
410
|
}
|
|
306
411
|
|
|
307
412
|
// Prepare file for upload
|
|
@@ -325,24 +430,25 @@ export class UploadCommand {
|
|
|
325
430
|
};
|
|
326
431
|
|
|
327
432
|
// Upload based on service type
|
|
433
|
+
let result = { successCount: 1 };
|
|
434
|
+
|
|
328
435
|
if (uploadService.getServiceName() === 'Arela API') {
|
|
329
|
-
|
|
436
|
+
result = await uploadService.upload([fileObject], {
|
|
330
437
|
...options,
|
|
331
438
|
uploadPath,
|
|
332
439
|
});
|
|
333
|
-
|
|
334
|
-
batchResults.successCount++;
|
|
335
|
-
if (result.detectedCount)
|
|
336
|
-
batchResults.detectedCount += result.detectedCount;
|
|
337
|
-
if (result.organizedCount)
|
|
338
|
-
batchResults.organizedCount += result.organizedCount;
|
|
339
440
|
} else {
|
|
340
441
|
// Supabase direct upload
|
|
341
442
|
await uploadService.upload([fileObject], { uploadPath });
|
|
342
|
-
batchResults.successCount++;
|
|
343
443
|
}
|
|
344
444
|
|
|
345
445
|
logger.info(`SUCCESS: ${path.basename(filePath)} -> ${uploadPath}`);
|
|
446
|
+
|
|
447
|
+
return {
|
|
448
|
+
skipped: false,
|
|
449
|
+
detectedCount: result.detectedCount || 0,
|
|
450
|
+
organizedCount: result.organizedCount || 0,
|
|
451
|
+
};
|
|
346
452
|
}
|
|
347
453
|
|
|
348
454
|
/**
|
|
@@ -428,7 +534,8 @@ export class UploadCommand {
|
|
|
428
534
|
// Phase 2: PDF Detection
|
|
429
535
|
console.log('\nš === PHASE 2: PDF Detection ===');
|
|
430
536
|
const detectionResult = await databaseService.detectPedimentosInDatabase({
|
|
431
|
-
batchSize:
|
|
537
|
+
batchSize:
|
|
538
|
+
parseInt(options.batchSize) || appConfig.performance.batchSize || 50,
|
|
432
539
|
});
|
|
433
540
|
console.log(
|
|
434
541
|
`ā
Phase 2 Complete: ${detectionResult.detectedCount} detected, ${detectionResult.errorCount} errors`,
|
package/src/config/config.js
CHANGED
|
@@ -28,10 +28,10 @@ class Config {
|
|
|
28
28
|
const __dirname = path.dirname(__filename);
|
|
29
29
|
const packageJsonPath = path.resolve(__dirname, '../../package.json');
|
|
30
30
|
const packageJson = JSON.parse(fs.readFileSync(packageJsonPath, 'utf-8'));
|
|
31
|
-
return packageJson.version || '0.2.
|
|
31
|
+
return packageJson.version || '0.2.9';
|
|
32
32
|
} catch (error) {
|
|
33
33
|
console.warn('ā ļø Could not read package.json version, using fallback');
|
|
34
|
-
return '0.2.
|
|
34
|
+
return '0.2.9';
|
|
35
35
|
}
|
|
36
36
|
}
|
|
37
37
|
|
|
@@ -85,7 +85,12 @@ class Config {
|
|
|
85
85
|
*/
|
|
86
86
|
#loadPerformanceConfig() {
|
|
87
87
|
return {
|
|
88
|
-
batchDelay: parseInt(process.env.BATCH_DELAY) ||
|
|
88
|
+
batchDelay: parseInt(process.env.BATCH_DELAY) || 0, // Removed default delay
|
|
89
|
+
batchSize: parseInt(process.env.BATCH_SIZE) || 50, // Increased from 10 to 50
|
|
90
|
+
maxConcurrentSources: parseInt(process.env.MAX_CONCURRENT_SOURCES) || 2,
|
|
91
|
+
maxApiConnections: parseInt(process.env.MAX_API_CONNECTIONS) || 10, // New: API replica support
|
|
92
|
+
apiConnectionTimeout:
|
|
93
|
+
parseInt(process.env.API_CONNECTION_TIMEOUT) || 60000, // New: API timeout
|
|
89
94
|
progressUpdateInterval:
|
|
90
95
|
parseInt(process.env.PROGRESS_UPDATE_INTERVAL) || 10,
|
|
91
96
|
logBufferSize: 100,
|