@arela/uploader 0.2.6 → 0.2.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.template +67 -13
- package/package.json +1 -1
- package/src/commands/UploadCommand.js +156 -49
- package/src/config/config.js +8 -3
- package/src/services/DatabaseService.js +384 -132
- package/src/services/upload/ApiUploadService.js +61 -6
package/.env.template
CHANGED
|
@@ -1,20 +1,74 @@
|
|
|
1
|
-
#
|
|
2
|
-
# Copy this to .env and
|
|
1
|
+
# Arela Uploader Environment Configuration
|
|
2
|
+
# Copy this to your .env file and adjust values for your setup
|
|
3
3
|
|
|
4
|
-
#
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
SUPABASE_BUCKET=your-bucket-name
|
|
4
|
+
# =============================================================================
|
|
5
|
+
# BASIC CONFIGURATION
|
|
6
|
+
# =============================================================================
|
|
8
7
|
|
|
9
8
|
# Arela API Configuration
|
|
10
9
|
ARELA_API_URL=https://your-arela-api-url.com
|
|
11
|
-
ARELA_API_TOKEN=your-api-token
|
|
10
|
+
ARELA_API_TOKEN=your-api-token-here
|
|
11
|
+
|
|
12
|
+
# Supabase Configuration (fallback)
|
|
13
|
+
SUPABASE_URL=https://your-supabase-url.supabase.co
|
|
14
|
+
SUPABASE_KEY=your-supabase-key-here
|
|
15
|
+
SUPABASE_BUCKET=your-bucket-name
|
|
12
16
|
|
|
13
|
-
# Upload
|
|
14
|
-
UPLOAD_BASE_PATH=/
|
|
17
|
+
# Upload Sources (separate with |)
|
|
18
|
+
UPLOAD_BASE_PATH=/path/to/your/upload/base
|
|
15
19
|
UPLOAD_SOURCES=folder1|folder2|folder3
|
|
20
|
+
UPLOAD_RFCS=rfc1|rfc2|rfc3
|
|
21
|
+
|
|
22
|
+
# =============================================================================
|
|
23
|
+
# PERFORMANCE OPTIMIZATION FOR MULTIPLE API REPLICAS
|
|
24
|
+
# =============================================================================
|
|
25
|
+
|
|
26
|
+
# API Connection Configuration
|
|
27
|
+
# Set this to match your number of API replicas (e.g., if you have 10 API instances, set to 10)
|
|
28
|
+
MAX_API_CONNECTIONS=10
|
|
29
|
+
|
|
30
|
+
# API Connection Timeout (milliseconds)
|
|
31
|
+
API_CONNECTION_TIMEOUT=60000
|
|
32
|
+
|
|
33
|
+
# Batch Processing Configuration
|
|
34
|
+
# Files processed concurrently per batch (should be >= MAX_API_CONNECTIONS for best performance)
|
|
35
|
+
BATCH_SIZE=100
|
|
36
|
+
|
|
37
|
+
# Delay between batches (0 for maximum speed)
|
|
38
|
+
BATCH_DELAY=0
|
|
39
|
+
|
|
40
|
+
# Source Processing Concurrency
|
|
41
|
+
# Number of upload sources/folders to process simultaneously
|
|
42
|
+
MAX_CONCURRENT_SOURCES=2
|
|
43
|
+
|
|
44
|
+
# =============================================================================
|
|
45
|
+
# EXAMPLE CONFIGURATIONS FOR DIFFERENT SCENARIOS
|
|
46
|
+
# =============================================================================
|
|
47
|
+
|
|
48
|
+
# For 10 API Replicas (High Performance Setup):
|
|
49
|
+
# MAX_API_CONNECTIONS=10
|
|
50
|
+
# BATCH_SIZE=100
|
|
51
|
+
# MAX_CONCURRENT_SOURCES=3
|
|
52
|
+
# BATCH_DELAY=0
|
|
53
|
+
|
|
54
|
+
# For 5 API Replicas (Medium Performance Setup):
|
|
55
|
+
# MAX_API_CONNECTIONS=5
|
|
56
|
+
# BATCH_SIZE=50
|
|
57
|
+
# MAX_CONCURRENT_SOURCES=2
|
|
58
|
+
# BATCH_DELAY=0
|
|
59
|
+
|
|
60
|
+
# For 1 API Instance (Single Instance Setup):
|
|
61
|
+
# MAX_API_CONNECTIONS=5
|
|
62
|
+
# BATCH_SIZE=20
|
|
63
|
+
# MAX_CONCURRENT_SOURCES=1
|
|
64
|
+
# BATCH_DELAY=100
|
|
65
|
+
|
|
66
|
+
# =============================================================================
|
|
67
|
+
# LOGGING AND MONITORING
|
|
68
|
+
# =============================================================================
|
|
69
|
+
|
|
70
|
+
# Progress bar update frequency
|
|
71
|
+
PROGRESS_UPDATE_INTERVAL=10
|
|
16
72
|
|
|
17
|
-
#
|
|
18
|
-
|
|
19
|
-
# Example: MMJ0810145N1|ABC1234567XY|DEF9876543ZZ
|
|
20
|
-
UPLOAD_RFCS=RFC1|RFC2|RFC3
|
|
73
|
+
# Enable verbose logging (true/false)
|
|
74
|
+
VERBOSE_LOGGING=false
|
package/package.json
CHANGED
|
@@ -50,7 +50,7 @@ export class UploadCommand {
|
|
|
50
50
|
logger.info('Log file cleared');
|
|
51
51
|
}
|
|
52
52
|
|
|
53
|
-
// Process each source
|
|
53
|
+
// Process each source with configurable concurrency
|
|
54
54
|
let globalResults = {
|
|
55
55
|
successCount: 0,
|
|
56
56
|
detectedCount: 0,
|
|
@@ -59,28 +59,88 @@ export class UploadCommand {
|
|
|
59
59
|
skippedCount: 0,
|
|
60
60
|
};
|
|
61
61
|
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
files,
|
|
72
|
-
options,
|
|
73
|
-
uploadService,
|
|
74
|
-
basePath,
|
|
75
|
-
source,
|
|
76
|
-
sourcePath,
|
|
77
|
-
);
|
|
62
|
+
// Determine processing strategy based on configuration
|
|
63
|
+
const maxConcurrentSources =
|
|
64
|
+
appConfig.performance?.maxConcurrentSources || 1;
|
|
65
|
+
|
|
66
|
+
if (maxConcurrentSources > 1 && sources.length > 1) {
|
|
67
|
+
// Parallel source processing
|
|
68
|
+
logger.info(
|
|
69
|
+
`Processing ${sources.length} sources with concurrency: ${maxConcurrentSources}`,
|
|
70
|
+
);
|
|
78
71
|
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
72
|
+
// Process sources in batches to control concurrency
|
|
73
|
+
for (let i = 0; i < sources.length; i += maxConcurrentSources) {
|
|
74
|
+
const sourceBatch = sources.slice(i, i + maxConcurrentSources);
|
|
75
|
+
|
|
76
|
+
const sourcePromises = sourceBatch.map(async (source) => {
|
|
77
|
+
const sourcePath = path
|
|
78
|
+
.resolve(basePath, source)
|
|
79
|
+
.replace(/\\/g, '/');
|
|
80
|
+
logger.info(`Processing folder: ${sourcePath}`);
|
|
81
|
+
|
|
82
|
+
try {
|
|
83
|
+
const files = await this.#discoverFiles(sourcePath);
|
|
84
|
+
logger.info(`Found ${files.length} files in ${source}`);
|
|
85
|
+
|
|
86
|
+
const result = await this.#processFilesInBatches(
|
|
87
|
+
files,
|
|
88
|
+
options,
|
|
89
|
+
uploadService,
|
|
90
|
+
basePath,
|
|
91
|
+
source,
|
|
92
|
+
sourcePath,
|
|
93
|
+
);
|
|
94
|
+
|
|
95
|
+
this.#logSourceSummary(source, result, options);
|
|
96
|
+
return { success: true, source, result };
|
|
97
|
+
} catch (error) {
|
|
98
|
+
this.errorHandler.handleError(error, { source, sourcePath });
|
|
99
|
+
return { success: false, source, error: error.message };
|
|
100
|
+
}
|
|
101
|
+
});
|
|
102
|
+
|
|
103
|
+
// Wait for this batch of sources to complete
|
|
104
|
+
const results = await Promise.allSettled(sourcePromises);
|
|
105
|
+
|
|
106
|
+
results.forEach((result) => {
|
|
107
|
+
if (result.status === 'fulfilled') {
|
|
108
|
+
const sourceResult = result.value;
|
|
109
|
+
if (sourceResult.success) {
|
|
110
|
+
this.#updateGlobalResults(globalResults, sourceResult.result);
|
|
111
|
+
} else {
|
|
112
|
+
globalResults.failureCount++;
|
|
113
|
+
}
|
|
114
|
+
} else {
|
|
115
|
+
globalResults.failureCount++;
|
|
116
|
+
}
|
|
117
|
+
});
|
|
118
|
+
}
|
|
119
|
+
} else {
|
|
120
|
+
// Sequential source processing (original behavior)
|
|
121
|
+
for (const source of sources) {
|
|
122
|
+
const sourcePath = path.resolve(basePath, source).replace(/\\/g, '/');
|
|
123
|
+
logger.info(`Processing folder: ${sourcePath}`);
|
|
124
|
+
|
|
125
|
+
try {
|
|
126
|
+
const files = await this.#discoverFiles(sourcePath);
|
|
127
|
+
logger.info(`Found ${files.length} files to process`);
|
|
128
|
+
|
|
129
|
+
const result = await this.#processFilesInBatches(
|
|
130
|
+
files,
|
|
131
|
+
options,
|
|
132
|
+
uploadService,
|
|
133
|
+
basePath,
|
|
134
|
+
source,
|
|
135
|
+
sourcePath,
|
|
136
|
+
);
|
|
137
|
+
|
|
138
|
+
this.#updateGlobalResults(globalResults, result);
|
|
139
|
+
this.#logSourceSummary(source, result, options);
|
|
140
|
+
} catch (error) {
|
|
141
|
+
this.errorHandler.handleError(error, { source, sourcePath });
|
|
142
|
+
globalResults.failureCount++;
|
|
143
|
+
}
|
|
84
144
|
}
|
|
85
145
|
}
|
|
86
146
|
|
|
@@ -164,7 +224,8 @@ export class UploadCommand {
|
|
|
164
224
|
source,
|
|
165
225
|
sourcePath,
|
|
166
226
|
) {
|
|
167
|
-
const batchSize =
|
|
227
|
+
const batchSize =
|
|
228
|
+
parseInt(options.batchSize) || appConfig.performance.batchSize || 50;
|
|
168
229
|
const results = {
|
|
169
230
|
successCount: 0,
|
|
170
231
|
detectedCount: 0,
|
|
@@ -184,6 +245,9 @@ export class UploadCommand {
|
|
|
184
245
|
barCompleteChar: '█',
|
|
185
246
|
barIncompleteChar: '░',
|
|
186
247
|
hideCursor: true,
|
|
248
|
+
clearOnComplete: false,
|
|
249
|
+
stopOnComplete: true,
|
|
250
|
+
stream: process.stderr, // Use stderr to separate from stdout logging
|
|
187
251
|
});
|
|
188
252
|
|
|
189
253
|
progressBar.start(files.length, 0, { success: 0, errors: 0 });
|
|
@@ -265,22 +329,65 @@ export class UploadCommand {
|
|
|
265
329
|
throw new Error(`Failed to insert stats: ${error.message}`);
|
|
266
330
|
}
|
|
267
331
|
} else {
|
|
268
|
-
// Upload mode: process files
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
332
|
+
// Upload mode: process files with controlled concurrency to match API replicas
|
|
333
|
+
const maxConcurrentApiCalls =
|
|
334
|
+
appConfig.performance?.maxApiConnections || 10;
|
|
335
|
+
|
|
336
|
+
// Process batch in chunks to respect API replica limits
|
|
337
|
+
const allResults = [];
|
|
338
|
+
for (let i = 0; i < batch.length; i += maxConcurrentApiCalls) {
|
|
339
|
+
const chunk = batch.slice(i, i + maxConcurrentApiCalls);
|
|
340
|
+
|
|
341
|
+
// Process this chunk concurrently (up to API replica count)
|
|
342
|
+
const chunkPromises = chunk.map(async (filePath) => {
|
|
343
|
+
try {
|
|
344
|
+
const result = await this.#processFile(
|
|
345
|
+
filePath,
|
|
346
|
+
options,
|
|
347
|
+
uploadService,
|
|
348
|
+
basePath,
|
|
349
|
+
processedPaths,
|
|
350
|
+
);
|
|
351
|
+
return { success: true, filePath, result };
|
|
352
|
+
} catch (error) {
|
|
353
|
+
this.errorHandler.handleError(error, { filePath });
|
|
354
|
+
return { success: false, filePath, error: error.message };
|
|
355
|
+
}
|
|
356
|
+
});
|
|
357
|
+
|
|
358
|
+
// Wait for this chunk to complete before starting the next
|
|
359
|
+
const chunkResults = await Promise.allSettled(chunkPromises);
|
|
360
|
+
allResults.push(...chunkResults);
|
|
361
|
+
|
|
362
|
+
// Small delay between chunks to prevent overwhelming API
|
|
363
|
+
if (i + maxConcurrentApiCalls < batch.length) {
|
|
364
|
+
await new Promise((resolve) => setTimeout(resolve, 50));
|
|
282
365
|
}
|
|
283
366
|
}
|
|
367
|
+
|
|
368
|
+
// Process all results and update batch results
|
|
369
|
+
allResults.forEach((result) => {
|
|
370
|
+
if (result.status === 'fulfilled') {
|
|
371
|
+
const fileResult = result.value;
|
|
372
|
+
if (fileResult.success) {
|
|
373
|
+
if (fileResult.result && fileResult.result.skipped) {
|
|
374
|
+
batchResults.skippedCount++;
|
|
375
|
+
} else {
|
|
376
|
+
batchResults.successCount++;
|
|
377
|
+
if (fileResult.result && fileResult.result.detectedCount) {
|
|
378
|
+
batchResults.detectedCount += fileResult.result.detectedCount;
|
|
379
|
+
}
|
|
380
|
+
if (fileResult.result && fileResult.result.organizedCount) {
|
|
381
|
+
batchResults.organizedCount += fileResult.result.organizedCount;
|
|
382
|
+
}
|
|
383
|
+
}
|
|
384
|
+
} else {
|
|
385
|
+
batchResults.failureCount++;
|
|
386
|
+
}
|
|
387
|
+
} else {
|
|
388
|
+
batchResults.failureCount++;
|
|
389
|
+
}
|
|
390
|
+
});
|
|
284
391
|
}
|
|
285
392
|
|
|
286
393
|
return batchResults;
|
|
@@ -296,12 +403,10 @@ export class UploadCommand {
|
|
|
296
403
|
uploadService,
|
|
297
404
|
basePath,
|
|
298
405
|
processedPaths,
|
|
299
|
-
batchResults,
|
|
300
406
|
) {
|
|
301
407
|
// Skip if already processed
|
|
302
408
|
if (processedPaths.has(filePath)) {
|
|
303
|
-
|
|
304
|
-
return;
|
|
409
|
+
return { skipped: true };
|
|
305
410
|
}
|
|
306
411
|
|
|
307
412
|
// Prepare file for upload
|
|
@@ -325,24 +430,25 @@ export class UploadCommand {
|
|
|
325
430
|
};
|
|
326
431
|
|
|
327
432
|
// Upload based on service type
|
|
433
|
+
let result = { successCount: 1 };
|
|
434
|
+
|
|
328
435
|
if (uploadService.getServiceName() === 'Arela API') {
|
|
329
|
-
|
|
436
|
+
result = await uploadService.upload([fileObject], {
|
|
330
437
|
...options,
|
|
331
438
|
uploadPath,
|
|
332
439
|
});
|
|
333
|
-
|
|
334
|
-
batchResults.successCount++;
|
|
335
|
-
if (result.detectedCount)
|
|
336
|
-
batchResults.detectedCount += result.detectedCount;
|
|
337
|
-
if (result.organizedCount)
|
|
338
|
-
batchResults.organizedCount += result.organizedCount;
|
|
339
440
|
} else {
|
|
340
441
|
// Supabase direct upload
|
|
341
442
|
await uploadService.upload([fileObject], { uploadPath });
|
|
342
|
-
batchResults.successCount++;
|
|
343
443
|
}
|
|
344
444
|
|
|
345
445
|
logger.info(`SUCCESS: ${path.basename(filePath)} -> ${uploadPath}`);
|
|
446
|
+
|
|
447
|
+
return {
|
|
448
|
+
skipped: false,
|
|
449
|
+
detectedCount: result.detectedCount || 0,
|
|
450
|
+
organizedCount: result.organizedCount || 0,
|
|
451
|
+
};
|
|
346
452
|
}
|
|
347
453
|
|
|
348
454
|
/**
|
|
@@ -428,7 +534,8 @@ export class UploadCommand {
|
|
|
428
534
|
// Phase 2: PDF Detection
|
|
429
535
|
console.log('\n🔍 === PHASE 2: PDF Detection ===');
|
|
430
536
|
const detectionResult = await databaseService.detectPedimentosInDatabase({
|
|
431
|
-
batchSize:
|
|
537
|
+
batchSize:
|
|
538
|
+
parseInt(options.batchSize) || appConfig.performance.batchSize || 50,
|
|
432
539
|
});
|
|
433
540
|
console.log(
|
|
434
541
|
`✅ Phase 2 Complete: ${detectionResult.detectedCount} detected, ${detectionResult.errorCount} errors`,
|
package/src/config/config.js
CHANGED
|
@@ -28,10 +28,10 @@ class Config {
|
|
|
28
28
|
const __dirname = path.dirname(__filename);
|
|
29
29
|
const packageJsonPath = path.resolve(__dirname, '../../package.json');
|
|
30
30
|
const packageJson = JSON.parse(fs.readFileSync(packageJsonPath, 'utf-8'));
|
|
31
|
-
return packageJson.version || '0.2.
|
|
31
|
+
return packageJson.version || '0.2.8';
|
|
32
32
|
} catch (error) {
|
|
33
33
|
console.warn('⚠️ Could not read package.json version, using fallback');
|
|
34
|
-
return '0.2.
|
|
34
|
+
return '0.2.8';
|
|
35
35
|
}
|
|
36
36
|
}
|
|
37
37
|
|
|
@@ -85,7 +85,12 @@ class Config {
|
|
|
85
85
|
*/
|
|
86
86
|
#loadPerformanceConfig() {
|
|
87
87
|
return {
|
|
88
|
-
batchDelay: parseInt(process.env.BATCH_DELAY) ||
|
|
88
|
+
batchDelay: parseInt(process.env.BATCH_DELAY) || 0, // Removed default delay
|
|
89
|
+
batchSize: parseInt(process.env.BATCH_SIZE) || 50, // Increased from 10 to 50
|
|
90
|
+
maxConcurrentSources: parseInt(process.env.MAX_CONCURRENT_SOURCES) || 2,
|
|
91
|
+
maxApiConnections: parseInt(process.env.MAX_API_CONNECTIONS) || 10, // New: API replica support
|
|
92
|
+
apiConnectionTimeout:
|
|
93
|
+
parseInt(process.env.API_CONNECTION_TIMEOUT) || 60000, // New: API timeout
|
|
89
94
|
progressUpdateInterval:
|
|
90
95
|
parseInt(process.env.PROGRESS_UPDATE_INTERVAL) || 10,
|
|
91
96
|
logBufferSize: 100,
|
|
@@ -104,16 +104,28 @@ export class DatabaseService {
|
|
|
104
104
|
}
|
|
105
105
|
|
|
106
106
|
// Initialize record with basic file stats
|
|
107
|
+
const fileExtension = path
|
|
108
|
+
.extname(file.path)
|
|
109
|
+
.toLowerCase()
|
|
110
|
+
.replace('.', '');
|
|
111
|
+
const filename = file.originalName || path.basename(file.path);
|
|
112
|
+
|
|
107
113
|
const record = {
|
|
108
114
|
document_type: null,
|
|
109
115
|
size: stats.size,
|
|
110
116
|
num_pedimento: null,
|
|
111
|
-
filename:
|
|
117
|
+
filename: filename,
|
|
112
118
|
original_path: originalPath,
|
|
113
119
|
arela_path: null,
|
|
114
120
|
status: 'stats',
|
|
115
121
|
rfc: null,
|
|
116
122
|
message: null,
|
|
123
|
+
file_extension: fileExtension,
|
|
124
|
+
is_like_simplificado:
|
|
125
|
+
fileExtension === 'pdf' && filename.toLowerCase().includes('simp'),
|
|
126
|
+
year: null,
|
|
127
|
+
created_at: new Date().toISOString(),
|
|
128
|
+
modified_at: stats.mtime.toISOString(),
|
|
117
129
|
};
|
|
118
130
|
|
|
119
131
|
// Try to detect document type for supported files
|
|
@@ -130,6 +142,10 @@ export class DatabaseService {
|
|
|
130
142
|
record.arela_path = detection.arelaPath;
|
|
131
143
|
}
|
|
132
144
|
|
|
145
|
+
if (detection.detectedPedimentoYear) {
|
|
146
|
+
record.year = detection.detectedPedimentoYear;
|
|
147
|
+
}
|
|
148
|
+
|
|
133
149
|
const rfcField = detection.fields.find(
|
|
134
150
|
(f) => f.name === 'rfc' && f.found,
|
|
135
151
|
);
|
|
@@ -210,6 +226,12 @@ export class DatabaseService {
|
|
|
210
226
|
file_extension: fileExtension,
|
|
211
227
|
created_at: new Date().toISOString(),
|
|
212
228
|
modified_at: stats.mtime.toISOString(),
|
|
229
|
+
is_like_simplificado:
|
|
230
|
+
fileExtension === 'pdf' &&
|
|
231
|
+
(file.originalName || path.basename(file.path))
|
|
232
|
+
.toLowerCase()
|
|
233
|
+
.includes('simp'),
|
|
234
|
+
year: null,
|
|
213
235
|
};
|
|
214
236
|
|
|
215
237
|
allRecords.push(record);
|
|
@@ -258,9 +280,15 @@ export class DatabaseService {
|
|
|
258
280
|
existingPaths.has(r.original_path),
|
|
259
281
|
);
|
|
260
282
|
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
283
|
+
// Only log every 10th batch to reduce noise
|
|
284
|
+
if (
|
|
285
|
+
(Math.floor(i / batchSize) + 1) % 10 === 0 ||
|
|
286
|
+
Math.floor(i / batchSize) + 1 === 1
|
|
287
|
+
) {
|
|
288
|
+
logger.info(
|
|
289
|
+
`Batch ${Math.floor(i / batchSize) + 1}: ${newRecords.length} new, ${updateRecords.length} updates`,
|
|
290
|
+
);
|
|
291
|
+
}
|
|
264
292
|
|
|
265
293
|
// Insert new records
|
|
266
294
|
if (newRecords.length > 0) {
|
|
@@ -272,7 +300,7 @@ export class DatabaseService {
|
|
|
272
300
|
logger.error(`Error inserting new records: ${insertError.message}`);
|
|
273
301
|
} else {
|
|
274
302
|
totalInserted += newRecords.length;
|
|
275
|
-
|
|
303
|
+
// Only log the batch insertion, not the summary (which comes at the end)
|
|
276
304
|
}
|
|
277
305
|
}
|
|
278
306
|
|
|
@@ -287,6 +315,8 @@ export class DatabaseService {
|
|
|
287
315
|
modified_at: record.modified_at,
|
|
288
316
|
filename: record.filename,
|
|
289
317
|
file_extension: record.file_extension,
|
|
318
|
+
is_like_simplificado: record.is_like_simplificado,
|
|
319
|
+
year: record.year,
|
|
290
320
|
})
|
|
291
321
|
.eq('original_path', record.original_path);
|
|
292
322
|
|
|
@@ -295,7 +325,10 @@ export class DatabaseService {
|
|
|
295
325
|
}
|
|
296
326
|
}
|
|
297
327
|
totalUpdated += batchUpdated;
|
|
298
|
-
|
|
328
|
+
// Reduce logging noise - only log when there are updates
|
|
329
|
+
if (batchUpdated > 0) {
|
|
330
|
+
logger.info(`Updated ${batchUpdated} existing records`);
|
|
331
|
+
}
|
|
299
332
|
}
|
|
300
333
|
} catch (error) {
|
|
301
334
|
logger.error(
|
|
@@ -329,7 +362,7 @@ export class DatabaseService {
|
|
|
329
362
|
|
|
330
363
|
const processingBatchSize = parseInt(options.batchSize) || 10;
|
|
331
364
|
// Reduced query batch size to avoid timeouts
|
|
332
|
-
const queryBatchSize =
|
|
365
|
+
const queryBatchSize = 100; // Reduced from 500 to 100
|
|
333
366
|
|
|
334
367
|
let totalDetected = 0;
|
|
335
368
|
let totalProcessed = 0;
|
|
@@ -355,9 +388,9 @@ export class DatabaseService {
|
|
|
355
388
|
.select('id, original_path, filename, file_extension, status')
|
|
356
389
|
.eq('status', 'fs-stats')
|
|
357
390
|
.eq('file_extension', 'pdf')
|
|
358
|
-
.
|
|
391
|
+
.eq('is_like_simplificado', true)
|
|
359
392
|
.range(offset, offset + queryBatchSize - 1)
|
|
360
|
-
.order('
|
|
393
|
+
.order('created_at');
|
|
361
394
|
}, `fetch PDF records chunk ${chunkNumber}`);
|
|
362
395
|
|
|
363
396
|
if (queryError) {
|
|
@@ -416,6 +449,7 @@ export class DatabaseService {
|
|
|
416
449
|
num_pedimento: detection.detectedPedimento,
|
|
417
450
|
arela_path: detection.arelaPath,
|
|
418
451
|
message: detection.error || null,
|
|
452
|
+
year: detection.detectedPedimentoYear || null,
|
|
419
453
|
};
|
|
420
454
|
|
|
421
455
|
if (detection.fields) {
|
|
@@ -522,13 +556,15 @@ export class DatabaseService {
|
|
|
522
556
|
async propagateArelaPath(options = {}) {
|
|
523
557
|
const supabase = await this.#getSupabaseClient();
|
|
524
558
|
|
|
525
|
-
logger.info('Phase 3: Starting arela_path propagation process...');
|
|
526
|
-
console.log(
|
|
559
|
+
logger.info('Phase 3: Starting arela_path and year propagation process...');
|
|
560
|
+
console.log(
|
|
561
|
+
'🔍 Finding pedimento_simplificado records with arela_path and year...',
|
|
562
|
+
);
|
|
527
563
|
|
|
528
564
|
// Get all pedimento_simplificado records that have arela_path
|
|
529
565
|
const { data: pedimentoRecords, error: pedimentoError } = await supabase
|
|
530
566
|
.from('uploader')
|
|
531
|
-
.select('id, original_path, arela_path, filename')
|
|
567
|
+
.select('id, original_path, arela_path, filename, year')
|
|
532
568
|
.eq('document_type', 'pedimento_simplificado')
|
|
533
569
|
.not('arela_path', 'is', null);
|
|
534
570
|
|
|
@@ -567,7 +603,7 @@ export class DatabaseService {
|
|
|
567
603
|
const basePath = path.dirname(pedimento.original_path);
|
|
568
604
|
|
|
569
605
|
logger.info(
|
|
570
|
-
`Processing pedimento: ${pedimento.filename} | Base path: ${basePath}`,
|
|
606
|
+
`Processing pedimento: ${pedimento.filename} | Base path: ${basePath} | Year: ${pedimento.year || 'N/A'}`,
|
|
571
607
|
);
|
|
572
608
|
|
|
573
609
|
// Extract folder part from existing arela_path
|
|
@@ -618,7 +654,10 @@ export class DatabaseService {
|
|
|
618
654
|
try {
|
|
619
655
|
const { error: updateError } = await supabase
|
|
620
656
|
.from('uploader')
|
|
621
|
-
.update({
|
|
657
|
+
.update({
|
|
658
|
+
arela_path: folderArelaPath,
|
|
659
|
+
year: pedimento.year,
|
|
660
|
+
})
|
|
622
661
|
.in('id', batchIds);
|
|
623
662
|
|
|
624
663
|
if (updateError) {
|
|
@@ -629,7 +668,7 @@ export class DatabaseService {
|
|
|
629
668
|
} else {
|
|
630
669
|
totalUpdated += batchIds.length;
|
|
631
670
|
logger.info(
|
|
632
|
-
`Successfully updated batch ${batchNumber}: ${batchIds.length} files`,
|
|
671
|
+
`Successfully updated batch ${batchNumber}: ${batchIds.length} files with arela_path and year`,
|
|
633
672
|
);
|
|
634
673
|
}
|
|
635
674
|
} catch (batchError) {
|
|
@@ -654,7 +693,7 @@ export class DatabaseService {
|
|
|
654
693
|
};
|
|
655
694
|
|
|
656
695
|
logger.success(
|
|
657
|
-
`Phase 3 Summary: ${totalProcessed} pedimentos processed, ${totalUpdated} files updated, ${totalErrors} errors`,
|
|
696
|
+
`Phase 3 Summary: ${totalProcessed} pedimentos processed, ${totalUpdated} files updated with arela_path and year, ${totalErrors} errors`,
|
|
658
697
|
);
|
|
659
698
|
|
|
660
699
|
return result;
|
|
@@ -860,133 +899,54 @@ export class DatabaseService {
|
|
|
860
899
|
console.log(`📋 Total files to upload: ${allRelatedFiles.length}`);
|
|
861
900
|
logger.info(`Total files to upload: ${allRelatedFiles.length}`);
|
|
862
901
|
|
|
863
|
-
// Step 4: Upload all related files
|
|
902
|
+
// Step 4: Upload all related files using concurrent batch processing
|
|
864
903
|
let totalProcessed = 0;
|
|
865
904
|
let totalUploaded = 0;
|
|
866
905
|
let totalErrors = 0;
|
|
867
906
|
const batchSize = parseInt(options.batchSize) || 10;
|
|
868
907
|
|
|
869
|
-
|
|
870
|
-
|
|
871
|
-
|
|
872
|
-
for (const file of batch) {
|
|
873
|
-
try {
|
|
874
|
-
totalProcessed++;
|
|
875
|
-
|
|
876
|
-
// Check if file exists
|
|
877
|
-
if (!fs.existsSync(file.original_path)) {
|
|
878
|
-
logger.warn(
|
|
879
|
-
`File not found: ${file.filename} at ${file.original_path}`,
|
|
880
|
-
);
|
|
881
|
-
await supabase
|
|
882
|
-
.from('uploader')
|
|
883
|
-
.update({
|
|
884
|
-
status: 'file-not-found',
|
|
885
|
-
message: 'File no longer exists at original path',
|
|
886
|
-
})
|
|
887
|
-
.eq('id', file.id);
|
|
888
|
-
totalErrors++;
|
|
889
|
-
continue;
|
|
890
|
-
}
|
|
891
|
-
|
|
892
|
-
// Upload the file (handle both API and Supabase services)
|
|
893
|
-
let uploadResult;
|
|
894
|
-
if (uploadService.getServiceName() === 'Supabase') {
|
|
895
|
-
// Supabase requires single file upload with uploadPath
|
|
896
|
-
let uploadPath;
|
|
897
|
-
if (options.folderStructure && file.arela_path) {
|
|
898
|
-
// Combine folder structure with arela_path: palco/RFC/Year/Patente/Aduana/Pedimento/filename
|
|
899
|
-
uploadPath = `uploads/${options.folderStructure}/${file.arela_path}${file.filename}`;
|
|
900
|
-
} else if (file.arela_path) {
|
|
901
|
-
// Use existing arela_path: RFC/Year/Patente/Aduana/Pedimento/filename
|
|
902
|
-
uploadPath = `uploads/${file.arela_path}${file.filename}`;
|
|
903
|
-
} else {
|
|
904
|
-
// Fallback to RFC folder
|
|
905
|
-
uploadPath = `uploads/${file.rfc}/${file.filename}`;
|
|
906
|
-
}
|
|
908
|
+
// Import performance configuration
|
|
909
|
+
const { performance: perfConfig } = appConfig;
|
|
910
|
+
const maxConcurrency = perfConfig?.maxApiConnections || 3;
|
|
907
911
|
|
|
908
|
-
|
|
909
|
-
|
|
910
|
-
|
|
911
|
-
|
|
912
|
-
|
|
913
|
-
|
|
914
|
-
},
|
|
915
|
-
],
|
|
916
|
-
{
|
|
917
|
-
uploadPath: uploadPath,
|
|
918
|
-
},
|
|
919
|
-
);
|
|
920
|
-
uploadResult = { success: true, data: uploadResult };
|
|
921
|
-
} else {
|
|
922
|
-
// API service supports batch uploads and returns normalized response
|
|
923
|
-
let fullFolderStructure;
|
|
924
|
-
if (options.folderStructure && file.arela_path) {
|
|
925
|
-
// Combine folder structure with arela_path: palco/RFC/Year/Patente/Aduana/Pedimento/
|
|
926
|
-
fullFolderStructure = `${options.folderStructure}/${file.arela_path}`;
|
|
927
|
-
} else if (file.arela_path) {
|
|
928
|
-
// Use existing arela_path: RFC/Year/Patente/Aduana/Pedimento/
|
|
929
|
-
fullFolderStructure = file.arela_path;
|
|
930
|
-
} else {
|
|
931
|
-
// Fallback to RFC folder
|
|
932
|
-
fullFolderStructure = `${file.rfc}/`;
|
|
933
|
-
}
|
|
912
|
+
console.log(
|
|
913
|
+
`🚀 Starting batch upload: ${allRelatedFiles.length} files in batches of ${batchSize}`,
|
|
914
|
+
);
|
|
915
|
+
console.log(
|
|
916
|
+
`⚡ Concurrent processing: up to ${maxConcurrency} parallel operations`,
|
|
917
|
+
);
|
|
934
918
|
|
|
935
|
-
|
|
936
|
-
|
|
937
|
-
|
|
938
|
-
|
|
939
|
-
|
|
940
|
-
contentType: 'application/octet-stream',
|
|
941
|
-
},
|
|
942
|
-
],
|
|
943
|
-
{
|
|
944
|
-
folderStructure: fullFolderStructure,
|
|
945
|
-
},
|
|
946
|
-
);
|
|
947
|
-
}
|
|
919
|
+
// Process files in batches with concurrent processing
|
|
920
|
+
for (let i = 0; i < allRelatedFiles.length; i += batchSize) {
|
|
921
|
+
const batch = allRelatedFiles.slice(i, i + batchSize);
|
|
922
|
+
const batchNum = Math.floor(i / batchSize) + 1;
|
|
923
|
+
const totalBatches = Math.ceil(allRelatedFiles.length / batchSize);
|
|
948
924
|
|
|
949
|
-
|
|
950
|
-
|
|
951
|
-
|
|
952
|
-
.from('uploader')
|
|
953
|
-
.update({
|
|
954
|
-
status: 'file-uploaded',
|
|
955
|
-
message: 'Successfully uploaded to Arela API',
|
|
956
|
-
})
|
|
957
|
-
.eq('id', file.id);
|
|
925
|
+
console.log(
|
|
926
|
+
`📦 Processing batch ${batchNum}/${totalBatches} (${batch.length} files)`,
|
|
927
|
+
);
|
|
958
928
|
|
|
959
|
-
|
|
960
|
-
|
|
961
|
-
|
|
962
|
-
|
|
963
|
-
|
|
964
|
-
|
|
965
|
-
|
|
966
|
-
|
|
967
|
-
})
|
|
968
|
-
.eq('id', file.id);
|
|
929
|
+
// Process batch using concurrent processing similar to UploadCommand
|
|
930
|
+
const batchResults = await this.#processRfcBatch(
|
|
931
|
+
batch,
|
|
932
|
+
uploadService,
|
|
933
|
+
supabase,
|
|
934
|
+
options,
|
|
935
|
+
maxConcurrency,
|
|
936
|
+
);
|
|
969
937
|
|
|
970
|
-
|
|
971
|
-
|
|
972
|
-
|
|
973
|
-
);
|
|
974
|
-
}
|
|
975
|
-
} catch (error) {
|
|
976
|
-
totalErrors++;
|
|
977
|
-
logger.error(
|
|
978
|
-
`Error processing file ${file.filename}: ${error.message}`,
|
|
979
|
-
);
|
|
938
|
+
totalProcessed += batchResults.processed;
|
|
939
|
+
totalUploaded += batchResults.uploaded;
|
|
940
|
+
totalErrors += batchResults.errors;
|
|
980
941
|
|
|
981
|
-
|
|
982
|
-
|
|
983
|
-
|
|
984
|
-
|
|
985
|
-
|
|
986
|
-
|
|
987
|
-
|
|
988
|
-
|
|
989
|
-
}
|
|
942
|
+
// Progress update
|
|
943
|
+
const progress = (
|
|
944
|
+
((i + batch.length) / allRelatedFiles.length) *
|
|
945
|
+
100
|
|
946
|
+
).toFixed(1);
|
|
947
|
+
console.log(
|
|
948
|
+
`📊 Batch ${batchNum} complete - Progress: ${progress}% (${totalUploaded}/${allRelatedFiles.length} uploaded)`,
|
|
949
|
+
);
|
|
990
950
|
}
|
|
991
951
|
|
|
992
952
|
const result = {
|
|
@@ -1133,6 +1093,298 @@ export class DatabaseService {
|
|
|
1133
1093
|
|
|
1134
1094
|
return readyFiles || [];
|
|
1135
1095
|
}
|
|
1096
|
+
|
|
1097
|
+
/**
|
|
1098
|
+
* Process a batch of files using concurrent processing for RFC uploads
|
|
1099
|
+
* @param {Array} files - Files to process in this batch
|
|
1100
|
+
* @param {Object} uploadService - Upload service instance
|
|
1101
|
+
* @param {Object} supabase - Supabase client
|
|
1102
|
+
* @param {Object} options - Upload options
|
|
1103
|
+
* @param {number} maxConcurrency - Maximum concurrent operations
|
|
1104
|
+
* @returns {Promise<Object>} Batch processing results
|
|
1105
|
+
*/
|
|
1106
|
+
async #processRfcBatch(
|
|
1107
|
+
files,
|
|
1108
|
+
uploadService,
|
|
1109
|
+
supabase,
|
|
1110
|
+
options,
|
|
1111
|
+
maxConcurrency,
|
|
1112
|
+
) {
|
|
1113
|
+
const fs = (await import('fs')).default;
|
|
1114
|
+
|
|
1115
|
+
let processed = 0;
|
|
1116
|
+
let uploaded = 0;
|
|
1117
|
+
let errors = 0;
|
|
1118
|
+
|
|
1119
|
+
// For Supabase, process files individually (required by service)
|
|
1120
|
+
if (uploadService.getServiceName() === 'Supabase') {
|
|
1121
|
+
// Process files in concurrent chunks within the batch
|
|
1122
|
+
const chunks = [];
|
|
1123
|
+
for (let i = 0; i < files.length; i += maxConcurrency) {
|
|
1124
|
+
chunks.push(files.slice(i, i + maxConcurrency));
|
|
1125
|
+
}
|
|
1126
|
+
|
|
1127
|
+
// Process each chunk concurrently
|
|
1128
|
+
for (const chunk of chunks) {
|
|
1129
|
+
const chunkPromises = chunk.map(async (file) => {
|
|
1130
|
+
return await this.#processRfcSingleFile(
|
|
1131
|
+
file,
|
|
1132
|
+
uploadService,
|
|
1133
|
+
supabase,
|
|
1134
|
+
options,
|
|
1135
|
+
fs,
|
|
1136
|
+
);
|
|
1137
|
+
});
|
|
1138
|
+
|
|
1139
|
+
// Wait for all files in this chunk to complete
|
|
1140
|
+
const chunkResults = await Promise.allSettled(chunkPromises);
|
|
1141
|
+
|
|
1142
|
+
// Count results
|
|
1143
|
+
for (const result of chunkResults) {
|
|
1144
|
+
processed++;
|
|
1145
|
+
if (result.status === 'fulfilled' && result.value.success) {
|
|
1146
|
+
uploaded++;
|
|
1147
|
+
} else {
|
|
1148
|
+
errors++;
|
|
1149
|
+
}
|
|
1150
|
+
}
|
|
1151
|
+
}
|
|
1152
|
+
} else {
|
|
1153
|
+
// For API service, use true batch processing (multiple files per API call)
|
|
1154
|
+
const apiChunks = [];
|
|
1155
|
+
const apiChunkSize = Math.min(
|
|
1156
|
+
5,
|
|
1157
|
+
Math.ceil(files.length / maxConcurrency),
|
|
1158
|
+
); // 5 files per API call, or distribute evenly
|
|
1159
|
+
|
|
1160
|
+
for (let i = 0; i < files.length; i += apiChunkSize) {
|
|
1161
|
+
apiChunks.push(files.slice(i, i + apiChunkSize));
|
|
1162
|
+
}
|
|
1163
|
+
|
|
1164
|
+
console.log(
|
|
1165
|
+
` 🚀 Processing ${apiChunks.length} API calls with ${apiChunkSize} files each (max ${maxConcurrency} concurrent)`,
|
|
1166
|
+
);
|
|
1167
|
+
|
|
1168
|
+
// Process API chunks with controlled concurrency
|
|
1169
|
+
const concurrentChunks = [];
|
|
1170
|
+
for (let i = 0; i < apiChunks.length; i += maxConcurrency) {
|
|
1171
|
+
concurrentChunks.push(apiChunks.slice(i, i + maxConcurrency));
|
|
1172
|
+
}
|
|
1173
|
+
|
|
1174
|
+
for (const concurrentSet of concurrentChunks) {
|
|
1175
|
+
const batchPromises = concurrentSet.map(async (chunk) => {
|
|
1176
|
+
return await this.#processRfcApiBatch(
|
|
1177
|
+
chunk,
|
|
1178
|
+
uploadService,
|
|
1179
|
+
supabase,
|
|
1180
|
+
options,
|
|
1181
|
+
fs,
|
|
1182
|
+
);
|
|
1183
|
+
});
|
|
1184
|
+
|
|
1185
|
+
// Wait for all concurrent batches to complete
|
|
1186
|
+
const batchResults = await Promise.allSettled(batchPromises);
|
|
1187
|
+
|
|
1188
|
+
// Count results
|
|
1189
|
+
for (const result of batchResults) {
|
|
1190
|
+
if (result.status === 'fulfilled') {
|
|
1191
|
+
processed += result.value.processed;
|
|
1192
|
+
uploaded += result.value.uploaded;
|
|
1193
|
+
errors += result.value.errors;
|
|
1194
|
+
} else {
|
|
1195
|
+
errors += result.value?.processed || 0;
|
|
1196
|
+
}
|
|
1197
|
+
}
|
|
1198
|
+
}
|
|
1199
|
+
}
|
|
1200
|
+
|
|
1201
|
+
return { processed, uploaded, errors };
|
|
1202
|
+
}
|
|
1203
|
+
|
|
1204
|
+
/**
|
|
1205
|
+
* Process a single file for RFC upload (Supabase mode)
|
|
1206
|
+
*/
|
|
1207
|
+
async #processRfcSingleFile(file, uploadService, supabase, options, fs) {
|
|
1208
|
+
try {
|
|
1209
|
+
// Check if file exists
|
|
1210
|
+
if (!fs.existsSync(file.original_path)) {
|
|
1211
|
+
logger.warn(
|
|
1212
|
+
`File not found: ${file.filename} at ${file.original_path}`,
|
|
1213
|
+
);
|
|
1214
|
+
await supabase
|
|
1215
|
+
.from('uploader')
|
|
1216
|
+
.update({
|
|
1217
|
+
status: 'file-not-found',
|
|
1218
|
+
message: 'File no longer exists at original path',
|
|
1219
|
+
})
|
|
1220
|
+
.eq('id', file.id);
|
|
1221
|
+
return { success: false, error: 'File not found' };
|
|
1222
|
+
}
|
|
1223
|
+
|
|
1224
|
+
// Supabase requires single file upload with uploadPath
|
|
1225
|
+
let uploadPath;
|
|
1226
|
+
if (options.folderStructure && file.arela_path) {
|
|
1227
|
+
uploadPath = `uploads/${options.folderStructure}/${file.arela_path}${file.filename}`;
|
|
1228
|
+
} else if (file.arela_path) {
|
|
1229
|
+
uploadPath = `uploads/${file.arela_path}${file.filename}`;
|
|
1230
|
+
} else {
|
|
1231
|
+
uploadPath = `uploads/${file.rfc}/${file.filename}`;
|
|
1232
|
+
}
|
|
1233
|
+
|
|
1234
|
+
const uploadResult = await uploadService.upload(
|
|
1235
|
+
[
|
|
1236
|
+
{
|
|
1237
|
+
path: file.original_path,
|
|
1238
|
+
name: file.filename,
|
|
1239
|
+
contentType: 'application/octet-stream',
|
|
1240
|
+
},
|
|
1241
|
+
],
|
|
1242
|
+
{ uploadPath: uploadPath },
|
|
1243
|
+
);
|
|
1244
|
+
|
|
1245
|
+
// Update database status
|
|
1246
|
+
await supabase
|
|
1247
|
+
.from('uploader')
|
|
1248
|
+
.update({
|
|
1249
|
+
status: 'file-uploaded',
|
|
1250
|
+
message: 'Successfully uploaded to Supabase',
|
|
1251
|
+
})
|
|
1252
|
+
.eq('id', file.id);
|
|
1253
|
+
|
|
1254
|
+
logger.info(`✅ Uploaded: ${file.filename}`);
|
|
1255
|
+
return { success: true, filename: file.filename };
|
|
1256
|
+
} catch (error) {
|
|
1257
|
+
logger.error(
|
|
1258
|
+
`❌ Error processing file ${file.filename}: ${error.message}`,
|
|
1259
|
+
);
|
|
1260
|
+
|
|
1261
|
+
await supabase
|
|
1262
|
+
.from('uploader')
|
|
1263
|
+
.update({
|
|
1264
|
+
status: 'upload-error',
|
|
1265
|
+
message: `Processing error: ${error.message}`,
|
|
1266
|
+
})
|
|
1267
|
+
.eq('id', file.id);
|
|
1268
|
+
|
|
1269
|
+
return { success: false, error: error.message, filename: file.filename };
|
|
1270
|
+
}
|
|
1271
|
+
}
|
|
1272
|
+
|
|
1273
|
+
/**
|
|
1274
|
+
* Process multiple files in a single API batch call (API service mode)
|
|
1275
|
+
*/
|
|
1276
|
+
async #processRfcApiBatch(files, uploadService, supabase, options, fs) {
|
|
1277
|
+
let processed = 0;
|
|
1278
|
+
let uploaded = 0;
|
|
1279
|
+
let errors = 0;
|
|
1280
|
+
|
|
1281
|
+
try {
|
|
1282
|
+
// Prepare files for batch upload
|
|
1283
|
+
const validFiles = [];
|
|
1284
|
+
const invalidFiles = [];
|
|
1285
|
+
|
|
1286
|
+
for (const file of files) {
|
|
1287
|
+
processed++;
|
|
1288
|
+
|
|
1289
|
+
if (!fs.existsSync(file.original_path)) {
|
|
1290
|
+
logger.warn(
|
|
1291
|
+
`File not found: ${file.filename} at ${file.original_path}`,
|
|
1292
|
+
);
|
|
1293
|
+
invalidFiles.push(file);
|
|
1294
|
+
continue;
|
|
1295
|
+
}
|
|
1296
|
+
|
|
1297
|
+
validFiles.push({
|
|
1298
|
+
fileData: {
|
|
1299
|
+
path: file.original_path,
|
|
1300
|
+
name: file.filename,
|
|
1301
|
+
contentType: 'application/octet-stream',
|
|
1302
|
+
},
|
|
1303
|
+
dbRecord: file,
|
|
1304
|
+
});
|
|
1305
|
+
}
|
|
1306
|
+
|
|
1307
|
+
// Update invalid files in database
|
|
1308
|
+
for (const file of invalidFiles) {
|
|
1309
|
+
await supabase
|
|
1310
|
+
.from('uploader')
|
|
1311
|
+
.update({
|
|
1312
|
+
status: 'file-not-found',
|
|
1313
|
+
message: 'File no longer exists at original path',
|
|
1314
|
+
})
|
|
1315
|
+
.eq('id', file.id);
|
|
1316
|
+
errors++;
|
|
1317
|
+
}
|
|
1318
|
+
|
|
1319
|
+
// Process valid files in batch if any exist
|
|
1320
|
+
if (validFiles.length > 0) {
|
|
1321
|
+
// Determine folder structure (all files in this batch should have same arela_path)
|
|
1322
|
+
const sampleFile = validFiles[0].dbRecord;
|
|
1323
|
+
let fullFolderStructure;
|
|
1324
|
+
if (options.folderStructure && sampleFile.arela_path) {
|
|
1325
|
+
fullFolderStructure = `${options.folderStructure}/${sampleFile.arela_path}`;
|
|
1326
|
+
} else if (sampleFile.arela_path) {
|
|
1327
|
+
fullFolderStructure = sampleFile.arela_path;
|
|
1328
|
+
} else {
|
|
1329
|
+
fullFolderStructure = `${sampleFile.rfc}/`;
|
|
1330
|
+
}
|
|
1331
|
+
|
|
1332
|
+
// Make single API call with multiple files
|
|
1333
|
+
const uploadResult = await uploadService.upload(
|
|
1334
|
+
validFiles.map((f) => f.fileData),
|
|
1335
|
+
{ folderStructure: fullFolderStructure },
|
|
1336
|
+
);
|
|
1337
|
+
|
|
1338
|
+
if (uploadResult.success) {
|
|
1339
|
+
// Update all files as uploaded
|
|
1340
|
+
const fileIds = validFiles.map((f) => f.dbRecord.id);
|
|
1341
|
+
await supabase
|
|
1342
|
+
.from('uploader')
|
|
1343
|
+
.update({
|
|
1344
|
+
status: 'file-uploaded',
|
|
1345
|
+
message: 'Successfully uploaded to Arela API (batch)',
|
|
1346
|
+
})
|
|
1347
|
+
.in('id', fileIds);
|
|
1348
|
+
|
|
1349
|
+
uploaded += validFiles.length;
|
|
1350
|
+
logger.info(
|
|
1351
|
+
`✅ Batch uploaded: ${validFiles.length} files to ${fullFolderStructure}`,
|
|
1352
|
+
);
|
|
1353
|
+
} else {
|
|
1354
|
+
// Update all files as failed
|
|
1355
|
+
const fileIds = validFiles.map((f) => f.dbRecord.id);
|
|
1356
|
+
await supabase
|
|
1357
|
+
.from('uploader')
|
|
1358
|
+
.update({
|
|
1359
|
+
status: 'upload-error',
|
|
1360
|
+
message: uploadResult.error || 'Batch upload failed',
|
|
1361
|
+
})
|
|
1362
|
+
.in('id', fileIds);
|
|
1363
|
+
|
|
1364
|
+
errors += validFiles.length;
|
|
1365
|
+
logger.error(
|
|
1366
|
+
`❌ Batch upload failed: ${validFiles.length} files - ${uploadResult.error}`,
|
|
1367
|
+
);
|
|
1368
|
+
}
|
|
1369
|
+
}
|
|
1370
|
+
} catch (error) {
|
|
1371
|
+
logger.error(`❌ Error processing batch: ${error.message}`);
|
|
1372
|
+
|
|
1373
|
+
// Mark all files as failed
|
|
1374
|
+
const fileIds = files.map((f) => f.id);
|
|
1375
|
+
await supabase
|
|
1376
|
+
.from('uploader')
|
|
1377
|
+
.update({
|
|
1378
|
+
status: 'upload-error',
|
|
1379
|
+
message: `Batch processing error: ${error.message}`,
|
|
1380
|
+
})
|
|
1381
|
+
.in('id', fileIds);
|
|
1382
|
+
|
|
1383
|
+
errors += files.length;
|
|
1384
|
+
}
|
|
1385
|
+
|
|
1386
|
+
return { processed, uploaded, errors };
|
|
1387
|
+
}
|
|
1136
1388
|
}
|
|
1137
1389
|
|
|
1138
1390
|
// Export singleton instance
|
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
import { Blob } from 'buffer';
|
|
2
2
|
import { FormData } from 'formdata-node';
|
|
3
3
|
import fs from 'fs';
|
|
4
|
+
import { Agent } from 'http';
|
|
5
|
+
import { Agent as HttpsAgent } from 'https';
|
|
4
6
|
import fetch from 'node-fetch';
|
|
5
7
|
import path from 'path';
|
|
6
8
|
|
|
@@ -16,6 +18,36 @@ export class ApiUploadService extends BaseUploadService {
|
|
|
16
18
|
super();
|
|
17
19
|
this.baseUrl = appConfig.api.baseUrl;
|
|
18
20
|
this.token = appConfig.api.token;
|
|
21
|
+
|
|
22
|
+
// Get API connection settings from config/environment
|
|
23
|
+
const maxApiConnections = parseInt(process.env.MAX_API_CONNECTIONS) || 10;
|
|
24
|
+
const connectionTimeout =
|
|
25
|
+
parseInt(process.env.API_CONNECTION_TIMEOUT) || 60000;
|
|
26
|
+
|
|
27
|
+
// Initialize HTTP agents optimized for multiple API replicas
|
|
28
|
+
this.httpAgent = new Agent({
|
|
29
|
+
keepAlive: true,
|
|
30
|
+
keepAliveMsecs: 30000,
|
|
31
|
+
maxSockets: maxApiConnections, // Match your API replica count
|
|
32
|
+
maxFreeSockets: Math.ceil(maxApiConnections / 2),
|
|
33
|
+
maxTotalSockets: maxApiConnections + 5, // Buffer for peak usage
|
|
34
|
+
timeout: connectionTimeout,
|
|
35
|
+
scheduling: 'fifo', // First-in-first-out scheduling
|
|
36
|
+
});
|
|
37
|
+
|
|
38
|
+
this.httpsAgent = new HttpsAgent({
|
|
39
|
+
keepAlive: true,
|
|
40
|
+
keepAliveMsecs: 30000,
|
|
41
|
+
maxSockets: maxApiConnections, // Match your API replica count
|
|
42
|
+
maxFreeSockets: Math.ceil(maxApiConnections / 2),
|
|
43
|
+
maxTotalSockets: maxApiConnections + 5, // Buffer for peak usage
|
|
44
|
+
timeout: connectionTimeout,
|
|
45
|
+
scheduling: 'fifo', // First-in-first-out scheduling
|
|
46
|
+
});
|
|
47
|
+
|
|
48
|
+
console.log(
|
|
49
|
+
`🔗 HTTP Agent configured for ${maxApiConnections} concurrent API connections`,
|
|
50
|
+
);
|
|
19
51
|
}
|
|
20
52
|
|
|
21
53
|
/**
|
|
@@ -27,12 +59,31 @@ export class ApiUploadService extends BaseUploadService {
|
|
|
27
59
|
async upload(files, options) {
|
|
28
60
|
const formData = new FormData();
|
|
29
61
|
|
|
30
|
-
// Add files to form data
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
62
|
+
// Add files to form data asynchronously
|
|
63
|
+
for (const file of files) {
|
|
64
|
+
try {
|
|
65
|
+
// Check file size for streaming vs buffer approach
|
|
66
|
+
const stats = await fs.promises.stat(file.path);
|
|
67
|
+
const fileSizeThreshold = 10 * 1024 * 1024; // 10MB threshold
|
|
68
|
+
|
|
69
|
+
if (stats.size > fileSizeThreshold) {
|
|
70
|
+
// Use streaming for large files
|
|
71
|
+
const fileStream = fs.createReadStream(file.path);
|
|
72
|
+
formData.append('files', fileStream, {
|
|
73
|
+
filename: file.name,
|
|
74
|
+
contentType: file.contentType,
|
|
75
|
+
knownLength: stats.size,
|
|
76
|
+
});
|
|
77
|
+
} else {
|
|
78
|
+
// Use buffer for smaller files
|
|
79
|
+
const fileBuffer = await fs.promises.readFile(file.path);
|
|
80
|
+
const blob = new Blob([fileBuffer], { type: file.contentType });
|
|
81
|
+
formData.append('files', blob, file.name);
|
|
82
|
+
}
|
|
83
|
+
} catch (error) {
|
|
84
|
+
throw new Error(`Failed to read file ${file.path}: ${error.message}`);
|
|
85
|
+
}
|
|
86
|
+
}
|
|
36
87
|
|
|
37
88
|
// Add configuration parameters
|
|
38
89
|
if (appConfig.supabase.bucket) {
|
|
@@ -61,6 +112,7 @@ export class ApiUploadService extends BaseUploadService {
|
|
|
61
112
|
formData.append('clientVersion', appConfig.packageVersion);
|
|
62
113
|
|
|
63
114
|
try {
|
|
115
|
+
const isHttps = this.baseUrl.startsWith('https');
|
|
64
116
|
const response = await fetch(
|
|
65
117
|
`${this.baseUrl}/api/storage/batch-upload-and-process`,
|
|
66
118
|
{
|
|
@@ -69,6 +121,7 @@ export class ApiUploadService extends BaseUploadService {
|
|
|
69
121
|
'x-api-key': this.token,
|
|
70
122
|
},
|
|
71
123
|
body: formData,
|
|
124
|
+
agent: isHttps ? this.httpsAgent : this.httpAgent,
|
|
72
125
|
},
|
|
73
126
|
);
|
|
74
127
|
|
|
@@ -99,10 +152,12 @@ export class ApiUploadService extends BaseUploadService {
|
|
|
99
152
|
}
|
|
100
153
|
|
|
101
154
|
try {
|
|
155
|
+
const isHttps = this.baseUrl.startsWith('https');
|
|
102
156
|
const response = await fetch(`${this.baseUrl}/api/health`, {
|
|
103
157
|
headers: {
|
|
104
158
|
'x-api-key': this.token,
|
|
105
159
|
},
|
|
160
|
+
agent: isHttps ? this.httpsAgent : this.httpAgent,
|
|
106
161
|
});
|
|
107
162
|
|
|
108
163
|
return response.ok;
|