@arela/uploader 0.2.5 → 0.2.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +2 -2
- package/src/commands/UploadCommand.js +102 -44
- package/src/config/config.js +15 -10
- package/src/errors/ErrorHandler.js +38 -31
- package/src/errors/ErrorTypes.js +2 -2
- package/src/index-old.js +25 -17
- package/src/index.js +124 -49
- package/src/services/DatabaseService.js +372 -185
- package/src/services/LoggingService.js +3 -3
- package/src/services/upload/ApiUploadService.js +16 -10
- package/src/services/upload/BaseUploadService.js +1 -1
- package/src/services/upload/SupabaseUploadService.js +22 -3
- package/src/services/upload/UploadServiceFactory.js +14 -6
- package/src/utils/FileOperations.js +1 -1
- package/src/utils/FileSanitizer.js +1 -1
- package/src/utils/PathDetector.js +9 -7
|
@@ -25,6 +25,51 @@ export class DatabaseService {
|
|
|
25
25
|
return await supabaseService.getClient();
|
|
26
26
|
}
|
|
27
27
|
|
|
28
|
+
/**
|
|
29
|
+
* Execute database query with retry logic and exponential backoff
|
|
30
|
+
* @private
|
|
31
|
+
* @param {Function} queryFn - Query function to execute
|
|
32
|
+
* @param {string} operation - Description of the operation for logging
|
|
33
|
+
* @param {number} maxRetries - Maximum number of retry attempts (default: 3)
|
|
34
|
+
* @returns {Promise<Object>} Query result
|
|
35
|
+
*/
|
|
36
|
+
async #queryWithRetry(queryFn, operation, maxRetries = 3) {
|
|
37
|
+
let lastError;
|
|
38
|
+
|
|
39
|
+
for (let attempt = 1; attempt <= maxRetries; attempt++) {
|
|
40
|
+
try {
|
|
41
|
+
const result = await queryFn();
|
|
42
|
+
if (attempt > 1) {
|
|
43
|
+
logger.info(`${operation} succeeded on attempt ${attempt}`);
|
|
44
|
+
}
|
|
45
|
+
return result;
|
|
46
|
+
} catch (error) {
|
|
47
|
+
lastError = error;
|
|
48
|
+
|
|
49
|
+
// Check if it's a timeout or connection error
|
|
50
|
+
const isRetriableError =
|
|
51
|
+
error.message?.includes('timeout') ||
|
|
52
|
+
error.message?.includes('canceling statement') ||
|
|
53
|
+
error.message?.includes('connection') ||
|
|
54
|
+
error.code === 'PGRST301'; // PostgREST timeout
|
|
55
|
+
|
|
56
|
+
if (!isRetriableError || attempt === maxRetries) {
|
|
57
|
+
throw error;
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
const backoffDelay = Math.min(1000 * Math.pow(2, attempt - 1), 30000); // Cap at 30 seconds
|
|
61
|
+
logger.warn(
|
|
62
|
+
`${operation} failed on attempt ${attempt}/${maxRetries}: ${error.message}`,
|
|
63
|
+
);
|
|
64
|
+
logger.info(`Retrying in ${backoffDelay}ms...`);
|
|
65
|
+
|
|
66
|
+
await new Promise((resolve) => setTimeout(resolve, backoffDelay));
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
throw lastError;
|
|
71
|
+
}
|
|
72
|
+
|
|
28
73
|
/**
|
|
29
74
|
* Insert file stats with document detection into uploader table
|
|
30
75
|
* @param {Array} files - Array of file objects
|
|
@@ -47,7 +92,9 @@ export class DatabaseService {
|
|
|
47
92
|
.limit(1);
|
|
48
93
|
|
|
49
94
|
if (checkError) {
|
|
50
|
-
logger.error(
|
|
95
|
+
logger.error(
|
|
96
|
+
`Error checking for existing record: ${checkError.message}`,
|
|
97
|
+
);
|
|
51
98
|
continue;
|
|
52
99
|
}
|
|
53
100
|
|
|
@@ -113,7 +160,9 @@ export class DatabaseService {
|
|
|
113
160
|
return [];
|
|
114
161
|
}
|
|
115
162
|
|
|
116
|
-
logger.info(
|
|
163
|
+
logger.info(
|
|
164
|
+
`Inserting ${records.length} new records into uploader table...`,
|
|
165
|
+
);
|
|
117
166
|
|
|
118
167
|
const { data, error } = await supabase
|
|
119
168
|
.from('uploader')
|
|
@@ -174,7 +223,9 @@ export class DatabaseService {
|
|
|
174
223
|
return { totalInserted: 0, totalSkipped: 0, totalProcessed: 0 };
|
|
175
224
|
}
|
|
176
225
|
|
|
177
|
-
logger.info(
|
|
226
|
+
logger.info(
|
|
227
|
+
`Processing ${allRecords.length} file stats in batches of ${batchSize}...`,
|
|
228
|
+
);
|
|
178
229
|
|
|
179
230
|
let totalInserted = 0;
|
|
180
231
|
let totalUpdated = 0;
|
|
@@ -184,20 +235,28 @@ export class DatabaseService {
|
|
|
184
235
|
|
|
185
236
|
try {
|
|
186
237
|
// Check which records already exist
|
|
187
|
-
const originalPaths = batch.map(r => r.original_path);
|
|
238
|
+
const originalPaths = batch.map((r) => r.original_path);
|
|
188
239
|
const { data: existingRecords, error: checkError } = await supabase
|
|
189
240
|
.from('uploader')
|
|
190
241
|
.select('original_path')
|
|
191
242
|
.in('original_path', originalPaths);
|
|
192
243
|
|
|
193
244
|
if (checkError) {
|
|
194
|
-
logger.error(
|
|
245
|
+
logger.error(
|
|
246
|
+
`Error checking existing records: ${checkError.message}`,
|
|
247
|
+
);
|
|
195
248
|
continue;
|
|
196
249
|
}
|
|
197
250
|
|
|
198
|
-
const existingPaths = new Set(
|
|
199
|
-
|
|
200
|
-
|
|
251
|
+
const existingPaths = new Set(
|
|
252
|
+
existingRecords?.map((r) => r.original_path) || [],
|
|
253
|
+
);
|
|
254
|
+
const newRecords = batch.filter(
|
|
255
|
+
(r) => !existingPaths.has(r.original_path),
|
|
256
|
+
);
|
|
257
|
+
const updateRecords = batch.filter((r) =>
|
|
258
|
+
existingPaths.has(r.original_path),
|
|
259
|
+
);
|
|
201
260
|
|
|
202
261
|
logger.info(
|
|
203
262
|
`Batch ${Math.floor(i / batchSize) + 1}: ${newRecords.length} new, ${updateRecords.length} updates`,
|
|
@@ -238,7 +297,6 @@ export class DatabaseService {
|
|
|
238
297
|
totalUpdated += batchUpdated;
|
|
239
298
|
logger.info(`Updated ${batchUpdated} existing records`);
|
|
240
299
|
}
|
|
241
|
-
|
|
242
300
|
} catch (error) {
|
|
243
301
|
logger.error(
|
|
244
302
|
`Unexpected error in batch ${Math.floor(i / batchSize) + 1}: ${error.message}`,
|
|
@@ -264,11 +322,14 @@ export class DatabaseService {
|
|
|
264
322
|
*/
|
|
265
323
|
async detectPedimentosInDatabase(options = {}) {
|
|
266
324
|
const supabase = await this.#getSupabaseClient();
|
|
267
|
-
|
|
268
|
-
logger.info(
|
|
269
|
-
|
|
325
|
+
|
|
326
|
+
logger.info(
|
|
327
|
+
'Phase 2: Starting PDF detection for pedimento-simplificado documents...',
|
|
328
|
+
);
|
|
329
|
+
|
|
270
330
|
const processingBatchSize = parseInt(options.batchSize) || 10;
|
|
271
|
-
|
|
331
|
+
// Reduced query batch size to avoid timeouts
|
|
332
|
+
const queryBatchSize = 500; // Reduced from 1000 to 500
|
|
272
333
|
|
|
273
334
|
let totalDetected = 0;
|
|
274
335
|
let totalProcessed = 0;
|
|
@@ -276,135 +337,168 @@ export class DatabaseService {
|
|
|
276
337
|
let offset = 0;
|
|
277
338
|
let chunkNumber = 1;
|
|
278
339
|
|
|
279
|
-
logger.info(
|
|
340
|
+
logger.info(
|
|
341
|
+
`Processing PDF files in chunks of ${queryBatchSize} records...`,
|
|
342
|
+
);
|
|
280
343
|
|
|
281
344
|
while (true) {
|
|
282
345
|
logger.info(
|
|
283
346
|
`Fetching chunk ${chunkNumber} (records ${offset + 1} to ${offset + queryBatchSize})...`,
|
|
284
347
|
);
|
|
285
348
|
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
349
|
+
try {
|
|
350
|
+
// Split the query to make it more efficient with retry logic
|
|
351
|
+
const { data: pdfRecords, error: queryError } =
|
|
352
|
+
await this.#queryWithRetry(async () => {
|
|
353
|
+
return await supabase
|
|
354
|
+
.from('uploader')
|
|
355
|
+
.select('id, original_path, filename, file_extension, status')
|
|
356
|
+
.eq('status', 'fs-stats')
|
|
357
|
+
.eq('file_extension', 'pdf')
|
|
358
|
+
.ilike('filename', '%simp%')
|
|
359
|
+
.range(offset, offset + queryBatchSize - 1)
|
|
360
|
+
.order('id'); // Add explicit ordering for consistent pagination
|
|
361
|
+
}, `fetch PDF records chunk ${chunkNumber}`);
|
|
293
362
|
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
363
|
+
if (queryError) {
|
|
364
|
+
throw new Error(
|
|
365
|
+
`Failed to fetch PDF records chunk ${chunkNumber}: ${queryError.message}`,
|
|
366
|
+
);
|
|
367
|
+
}
|
|
299
368
|
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
369
|
+
if (!pdfRecords || pdfRecords.length === 0) {
|
|
370
|
+
logger.info('No more PDF files found. Processing completed.');
|
|
371
|
+
break;
|
|
372
|
+
}
|
|
304
373
|
|
|
305
|
-
|
|
374
|
+
logger.info(
|
|
375
|
+
`Processing chunk ${chunkNumber}: ${pdfRecords.length} PDF records`,
|
|
376
|
+
);
|
|
306
377
|
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
378
|
+
let chunkDetected = 0;
|
|
379
|
+
let chunkProcessed = 0;
|
|
380
|
+
let chunkErrors = 0;
|
|
381
|
+
|
|
382
|
+
// Process files in smaller batches
|
|
383
|
+
for (let i = 0; i < pdfRecords.length; i += processingBatchSize) {
|
|
384
|
+
const batch = pdfRecords.slice(i, i + processingBatchSize);
|
|
385
|
+
const updatePromises = [];
|
|
386
|
+
|
|
387
|
+
for (const record of batch) {
|
|
388
|
+
try {
|
|
389
|
+
if (!fs.existsSync(record.original_path)) {
|
|
390
|
+
logger.warn(
|
|
391
|
+
`File not found: ${record.filename} at ${record.original_path}`,
|
|
392
|
+
);
|
|
393
|
+
updatePromises.push(
|
|
394
|
+
supabase
|
|
395
|
+
.from('uploader')
|
|
396
|
+
.update({
|
|
397
|
+
status: 'file-not-found',
|
|
398
|
+
message: 'File no longer exists at original path',
|
|
399
|
+
})
|
|
400
|
+
.eq('id', record.id),
|
|
401
|
+
);
|
|
402
|
+
chunkErrors++;
|
|
403
|
+
totalErrors++;
|
|
404
|
+
continue;
|
|
405
|
+
}
|
|
310
406
|
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
407
|
+
const detection = await this.detectionService.detectFile(
|
|
408
|
+
record.original_path,
|
|
409
|
+
);
|
|
410
|
+
chunkProcessed++;
|
|
411
|
+
totalProcessed++;
|
|
412
|
+
|
|
413
|
+
const updateData = {
|
|
414
|
+
status: detection.detectedType ? 'detected' : 'not-detected',
|
|
415
|
+
document_type: detection.detectedType,
|
|
416
|
+
num_pedimento: detection.detectedPedimento,
|
|
417
|
+
arela_path: detection.arelaPath,
|
|
418
|
+
message: detection.error || null,
|
|
419
|
+
};
|
|
420
|
+
|
|
421
|
+
if (detection.fields) {
|
|
422
|
+
const rfcField = detection.fields.find(
|
|
423
|
+
(f) => f.name === 'rfc' && f.found,
|
|
424
|
+
);
|
|
425
|
+
if (rfcField) {
|
|
426
|
+
updateData.rfc = rfcField.value;
|
|
427
|
+
}
|
|
428
|
+
}
|
|
429
|
+
|
|
430
|
+
if (detection.detectedType) {
|
|
431
|
+
chunkDetected++;
|
|
432
|
+
totalDetected++;
|
|
433
|
+
logger.success(
|
|
434
|
+
`Detected: ${record.filename} -> ${detection.detectedType} | Pedimento: ${detection.detectedPedimento || 'N/A'} | RFC: ${detection.fields?.rfc || 'N/A'}`,
|
|
435
|
+
);
|
|
436
|
+
} else {
|
|
437
|
+
logger.info(
|
|
438
|
+
`Not detected: ${record.filename} - No pedimento-simplificado pattern found`,
|
|
439
|
+
);
|
|
440
|
+
}
|
|
315
441
|
|
|
316
|
-
for (const record of batch) {
|
|
317
|
-
try {
|
|
318
|
-
if (!fs.existsSync(record.original_path)) {
|
|
319
|
-
logger.warn(`File not found: ${record.filename} at ${record.original_path}`);
|
|
320
442
|
updatePromises.push(
|
|
321
443
|
supabase
|
|
322
444
|
.from('uploader')
|
|
323
|
-
.update(
|
|
324
|
-
status: 'file-not-found',
|
|
325
|
-
message: 'File no longer exists at original path',
|
|
326
|
-
})
|
|
445
|
+
.update(updateData)
|
|
327
446
|
.eq('id', record.id),
|
|
328
447
|
);
|
|
448
|
+
} catch (error) {
|
|
449
|
+
logger.error(
|
|
450
|
+
`Error detecting ${record.filename}: ${error.message}`,
|
|
451
|
+
);
|
|
329
452
|
chunkErrors++;
|
|
330
453
|
totalErrors++;
|
|
331
|
-
continue;
|
|
332
|
-
}
|
|
333
454
|
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
arela_path: detection.arelaPath,
|
|
343
|
-
message: detection.error || null,
|
|
344
|
-
};
|
|
345
|
-
|
|
346
|
-
if (detection.fields) {
|
|
347
|
-
const rfcField = detection.fields.find(
|
|
348
|
-
(f) => f.name === 'rfc' && f.found,
|
|
349
|
-
);
|
|
350
|
-
if (rfcField) {
|
|
351
|
-
updateData.rfc = rfcField.value;
|
|
352
|
-
}
|
|
353
|
-
}
|
|
354
|
-
|
|
355
|
-
if (detection.detectedType) {
|
|
356
|
-
chunkDetected++;
|
|
357
|
-
totalDetected++;
|
|
358
|
-
logger.success(
|
|
359
|
-
`Detected: ${record.filename} -> ${detection.detectedType} | Pedimento: ${detection.detectedPedimento || 'N/A'} | RFC: ${detection.fields?.rfc || 'N/A'}`,
|
|
360
|
-
);
|
|
361
|
-
} else {
|
|
362
|
-
logger.info(
|
|
363
|
-
`Not detected: ${record.filename} - No pedimento-simplificado pattern found`,
|
|
455
|
+
updatePromises.push(
|
|
456
|
+
supabase
|
|
457
|
+
.from('uploader')
|
|
458
|
+
.update({
|
|
459
|
+
status: 'detection-error',
|
|
460
|
+
message: error.message,
|
|
461
|
+
})
|
|
462
|
+
.eq('id', record.id),
|
|
364
463
|
);
|
|
365
464
|
}
|
|
465
|
+
}
|
|
366
466
|
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
);
|
|
467
|
+
try {
|
|
468
|
+
await Promise.all(updatePromises);
|
|
370
469
|
} catch (error) {
|
|
371
|
-
logger.error(
|
|
372
|
-
|
|
373
|
-
totalErrors++;
|
|
374
|
-
|
|
375
|
-
updatePromises.push(
|
|
376
|
-
supabase
|
|
377
|
-
.from('uploader')
|
|
378
|
-
.update({
|
|
379
|
-
status: 'detection-error',
|
|
380
|
-
message: error.message,
|
|
381
|
-
})
|
|
382
|
-
.eq('id', record.id),
|
|
470
|
+
logger.error(
|
|
471
|
+
`Error updating batch in chunk ${chunkNumber}: ${error.message}`,
|
|
383
472
|
);
|
|
384
473
|
}
|
|
385
474
|
}
|
|
386
475
|
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
logger.error(`Error updating batch in chunk ${chunkNumber}: ${error.message}`);
|
|
391
|
-
}
|
|
392
|
-
}
|
|
476
|
+
logger.success(
|
|
477
|
+
`Chunk ${chunkNumber} completed: ${chunkDetected} detected, ${chunkProcessed} processed, ${chunkErrors} errors`,
|
|
478
|
+
);
|
|
393
479
|
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
);
|
|
480
|
+
offset += queryBatchSize;
|
|
481
|
+
chunkNumber++;
|
|
397
482
|
|
|
398
|
-
|
|
399
|
-
|
|
483
|
+
if (pdfRecords.length < queryBatchSize) {
|
|
484
|
+
logger.info(
|
|
485
|
+
`Reached end of records (chunk had ${pdfRecords.length} records).`,
|
|
486
|
+
);
|
|
487
|
+
break;
|
|
488
|
+
}
|
|
400
489
|
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
490
|
+
// Small delay between chunks
|
|
491
|
+
await new Promise((resolve) => setTimeout(resolve, 500));
|
|
492
|
+
} catch (chunkError) {
|
|
493
|
+
logger.error(
|
|
494
|
+
`Error processing chunk ${chunkNumber}: ${chunkError.message}`,
|
|
495
|
+
);
|
|
496
|
+
// Continue to next chunk after error
|
|
497
|
+
offset += queryBatchSize;
|
|
498
|
+
chunkNumber++;
|
|
499
|
+
totalErrors++;
|
|
500
|
+
continue;
|
|
404
501
|
}
|
|
405
|
-
|
|
406
|
-
// Small delay between chunks
|
|
407
|
-
await new Promise((resolve) => setTimeout(resolve, 500));
|
|
408
502
|
}
|
|
409
503
|
|
|
410
504
|
const result = {
|
|
@@ -427,7 +521,7 @@ export class DatabaseService {
|
|
|
427
521
|
*/
|
|
428
522
|
async propagateArelaPath(options = {}) {
|
|
429
523
|
const supabase = await this.#getSupabaseClient();
|
|
430
|
-
|
|
524
|
+
|
|
431
525
|
logger.info('Phase 3: Starting arela_path propagation process...');
|
|
432
526
|
console.log('🔍 Finding pedimento_simplificado records with arela_path...');
|
|
433
527
|
|
|
@@ -446,12 +540,18 @@ export class DatabaseService {
|
|
|
446
540
|
|
|
447
541
|
if (!pedimentoRecords || pedimentoRecords.length === 0) {
|
|
448
542
|
logger.info('No pedimento_simplificado records with arela_path found');
|
|
449
|
-
console.log(
|
|
543
|
+
console.log(
|
|
544
|
+
'ℹ️ No pedimento_simplificado records with arela_path found',
|
|
545
|
+
);
|
|
450
546
|
return { processedCount: 0, updatedCount: 0, errorCount: 0 };
|
|
451
547
|
}
|
|
452
548
|
|
|
453
|
-
console.log(
|
|
454
|
-
|
|
549
|
+
console.log(
|
|
550
|
+
`📋 Found ${pedimentoRecords.length} pedimento records with arela_path`,
|
|
551
|
+
);
|
|
552
|
+
logger.info(
|
|
553
|
+
`Found ${pedimentoRecords.length} pedimento records with arela_path to process`,
|
|
554
|
+
);
|
|
455
555
|
|
|
456
556
|
let totalProcessed = 0;
|
|
457
557
|
let totalUpdated = 0;
|
|
@@ -465,8 +565,10 @@ export class DatabaseService {
|
|
|
465
565
|
|
|
466
566
|
// Extract base path from original_path (remove filename)
|
|
467
567
|
const basePath = path.dirname(pedimento.original_path);
|
|
468
|
-
|
|
469
|
-
logger.info(
|
|
568
|
+
|
|
569
|
+
logger.info(
|
|
570
|
+
`Processing pedimento: ${pedimento.filename} | Base path: ${basePath}`,
|
|
571
|
+
);
|
|
470
572
|
|
|
471
573
|
// Extract folder part from existing arela_path
|
|
472
574
|
const existingPath = pedimento.arela_path;
|
|
@@ -485,7 +587,9 @@ export class DatabaseService {
|
|
|
485
587
|
.neq('id', pedimento.id); // Exclude the pedimento itself
|
|
486
588
|
|
|
487
589
|
if (relatedError) {
|
|
488
|
-
logger.error(
|
|
590
|
+
logger.error(
|
|
591
|
+
`Error finding related files for ${pedimento.filename}: ${relatedError.message}`,
|
|
592
|
+
);
|
|
489
593
|
totalErrors++;
|
|
490
594
|
continue;
|
|
491
595
|
}
|
|
@@ -495,17 +599,21 @@ export class DatabaseService {
|
|
|
495
599
|
continue;
|
|
496
600
|
}
|
|
497
601
|
|
|
498
|
-
logger.info(
|
|
602
|
+
logger.info(
|
|
603
|
+
`Found ${relatedFiles.length} related files to update for ${pedimento.filename}`,
|
|
604
|
+
);
|
|
499
605
|
|
|
500
606
|
// Process files in batches
|
|
501
607
|
const fileIds = relatedFiles.map((f) => f.id);
|
|
502
|
-
|
|
608
|
+
|
|
503
609
|
for (let i = 0; i < fileIds.length; i += BATCH_SIZE) {
|
|
504
610
|
const batchIds = fileIds.slice(i, i + BATCH_SIZE);
|
|
505
611
|
const batchNumber = Math.floor(i / BATCH_SIZE) + 1;
|
|
506
612
|
const totalBatches = Math.ceil(fileIds.length / BATCH_SIZE);
|
|
507
613
|
|
|
508
|
-
logger.info(
|
|
614
|
+
logger.info(
|
|
615
|
+
`Batch ${batchNumber}/${totalBatches}: Updating ${batchIds.length} files...`,
|
|
616
|
+
);
|
|
509
617
|
|
|
510
618
|
try {
|
|
511
619
|
const { error: updateError } = await supabase
|
|
@@ -514,19 +622,27 @@ export class DatabaseService {
|
|
|
514
622
|
.in('id', batchIds);
|
|
515
623
|
|
|
516
624
|
if (updateError) {
|
|
517
|
-
logger.error(
|
|
625
|
+
logger.error(
|
|
626
|
+
`Error in batch ${batchNumber}: ${updateError.message}`,
|
|
627
|
+
);
|
|
518
628
|
totalErrors++;
|
|
519
629
|
} else {
|
|
520
630
|
totalUpdated += batchIds.length;
|
|
521
|
-
logger.info(
|
|
631
|
+
logger.info(
|
|
632
|
+
`Successfully updated batch ${batchNumber}: ${batchIds.length} files`,
|
|
633
|
+
);
|
|
522
634
|
}
|
|
523
635
|
} catch (batchError) {
|
|
524
|
-
logger.error(
|
|
636
|
+
logger.error(
|
|
637
|
+
`Exception in batch ${batchNumber}: ${batchError.message}`,
|
|
638
|
+
);
|
|
525
639
|
totalErrors++;
|
|
526
640
|
}
|
|
527
641
|
}
|
|
528
642
|
} catch (error) {
|
|
529
|
-
logger.error(
|
|
643
|
+
logger.error(
|
|
644
|
+
`Error processing pedimento ${pedimento.filename}: ${error.message}`,
|
|
645
|
+
);
|
|
530
646
|
totalErrors++;
|
|
531
647
|
}
|
|
532
648
|
}
|
|
@@ -552,12 +668,15 @@ export class DatabaseService {
|
|
|
552
668
|
async uploadFilesByRfc(options = {}) {
|
|
553
669
|
const supabase = await this.#getSupabaseClient();
|
|
554
670
|
const uploadService = await uploadServiceFactory.getUploadService();
|
|
555
|
-
|
|
671
|
+
|
|
556
672
|
// Get configuration
|
|
557
|
-
const appConfig = await import('../config/config.js').then(
|
|
558
|
-
|
|
673
|
+
const appConfig = await import('../config/config.js').then(
|
|
674
|
+
(m) => m.appConfig,
|
|
675
|
+
);
|
|
676
|
+
|
|
559
677
|
if (!appConfig.upload.rfcs || appConfig.upload.rfcs.length === 0) {
|
|
560
|
-
const errorMsg =
|
|
678
|
+
const errorMsg =
|
|
679
|
+
'No RFCs specified. Please set UPLOAD_RFCS environment variable with pipe-separated RFC values.';
|
|
561
680
|
logger.error(errorMsg);
|
|
562
681
|
throw new Error(errorMsg);
|
|
563
682
|
}
|
|
@@ -582,13 +701,16 @@ export class DatabaseService {
|
|
|
582
701
|
}
|
|
583
702
|
|
|
584
703
|
// Step 1: Get all pedimento_simplificado records that match the specified RFCs and have arela_path
|
|
585
|
-
console.log(
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
|
|
591
|
-
|
|
704
|
+
console.log(
|
|
705
|
+
'🎯 Finding pedimento_simplificado records for specified RFCs...',
|
|
706
|
+
);
|
|
707
|
+
const { data: pedimentoRfcRecords, error: pedimentoRfcError } =
|
|
708
|
+
await supabase
|
|
709
|
+
.from('uploader')
|
|
710
|
+
.select('arela_path')
|
|
711
|
+
.eq('document_type', 'pedimento_simplificado')
|
|
712
|
+
.in('rfc', appConfig.upload.rfcs)
|
|
713
|
+
.not('arela_path', 'is', null);
|
|
592
714
|
|
|
593
715
|
if (pedimentoRfcError) {
|
|
594
716
|
const errorMsg = `Error fetching pedimento RFC records: ${pedimentoRfcError.message}`;
|
|
@@ -597,15 +719,23 @@ export class DatabaseService {
|
|
|
597
719
|
}
|
|
598
720
|
|
|
599
721
|
if (!pedimentoRfcRecords || pedimentoRfcRecords.length === 0) {
|
|
600
|
-
console.log(
|
|
722
|
+
console.log(
|
|
723
|
+
'ℹ️ No pedimento_simplificado records found for the specified RFCs with arela_path',
|
|
724
|
+
);
|
|
601
725
|
logger.info('No pedimento_simplificado records found for specified RFCs');
|
|
602
726
|
return { processedCount: 0, uploadedCount: 0, errorCount: 0 };
|
|
603
727
|
}
|
|
604
728
|
|
|
605
729
|
// Get unique arela_paths from pedimento records
|
|
606
|
-
const uniqueArelaPaths = [
|
|
607
|
-
|
|
608
|
-
|
|
730
|
+
const uniqueArelaPaths = [
|
|
731
|
+
...new Set(pedimentoRfcRecords.map((r) => r.arela_path)),
|
|
732
|
+
];
|
|
733
|
+
console.log(
|
|
734
|
+
`📋 Found ${pedimentoRfcRecords.length} pedimento records with ${uniqueArelaPaths.length} unique arela_paths for specified RFCs`,
|
|
735
|
+
);
|
|
736
|
+
logger.info(
|
|
737
|
+
`Found ${pedimentoRfcRecords.length} pedimento records with ${uniqueArelaPaths.length} unique arela_paths`,
|
|
738
|
+
);
|
|
609
739
|
|
|
610
740
|
// Step 2: Get all files with these arela_paths that haven't been uploaded yet
|
|
611
741
|
let rfcRecords = [];
|
|
@@ -634,41 +764,64 @@ export class DatabaseService {
|
|
|
634
764
|
|
|
635
765
|
if (!rfcRecords || rfcRecords.length === 0) {
|
|
636
766
|
if (totalRfcFiles && totalRfcFiles > 0) {
|
|
637
|
-
console.log(
|
|
767
|
+
console.log(
|
|
768
|
+
`ℹ️ All ${totalRfcFiles} files for the specified RFCs are already uploaded (status: file-uploaded)`,
|
|
769
|
+
);
|
|
638
770
|
console.log(' No new files to upload.');
|
|
639
|
-
logger.info(
|
|
771
|
+
logger.info(
|
|
772
|
+
`All ${totalRfcFiles} files for specified RFCs already uploaded`,
|
|
773
|
+
);
|
|
640
774
|
} else {
|
|
641
|
-
console.log(
|
|
642
|
-
|
|
775
|
+
console.log(
|
|
776
|
+
'ℹ️ No files found for the specified RFCs with arela_path',
|
|
777
|
+
);
|
|
778
|
+
console.log(
|
|
779
|
+
` Make sure files for RFCs [${appConfig.upload.rfcs.join(', ')}] have been processed and have arela_path values`,
|
|
780
|
+
);
|
|
643
781
|
logger.info('No files found for specified RFCs with arela_path');
|
|
644
782
|
}
|
|
645
783
|
return { processedCount: 0, uploadedCount: 0, errorCount: 0 };
|
|
646
784
|
}
|
|
647
785
|
|
|
648
786
|
// Show filtering effect
|
|
649
|
-
const uploadableArelaPaths = [
|
|
787
|
+
const uploadableArelaPaths = [
|
|
788
|
+
...new Set(rfcRecords.map((r) => r.arela_path)),
|
|
789
|
+
];
|
|
650
790
|
const skipped = (totalRfcFiles || 0) - rfcRecords.length;
|
|
651
791
|
if (skipped > 0) {
|
|
652
|
-
console.log(
|
|
792
|
+
console.log(
|
|
793
|
+
`📊 Found ${rfcRecords.length} files ready for upload (${skipped} already uploaded, skipped)`,
|
|
794
|
+
);
|
|
653
795
|
} else {
|
|
654
796
|
console.log(`📊 Found ${rfcRecords.length} files ready for upload`);
|
|
655
797
|
}
|
|
656
|
-
logger.info(
|
|
798
|
+
logger.info(
|
|
799
|
+
`Found ${rfcRecords.length} files ready for upload, ${skipped} skipped`,
|
|
800
|
+
);
|
|
657
801
|
|
|
658
802
|
// Step 3: Get ALL files that have these arela_paths (including supporting documents)
|
|
659
803
|
let allRelatedFiles = [];
|
|
660
804
|
const arelaPathChunkSize = 50;
|
|
661
805
|
const queryBatchSize = 1000;
|
|
662
806
|
|
|
663
|
-
console.log(
|
|
807
|
+
console.log(
|
|
808
|
+
'📥 Fetching all related files (processing arela_paths in chunks to avoid URI limits)...',
|
|
809
|
+
);
|
|
664
810
|
|
|
665
811
|
// Process arela_paths in chunks
|
|
666
812
|
for (let i = 0; i < uploadableArelaPaths.length; i += arelaPathChunkSize) {
|
|
667
|
-
const arelaPathChunk = uploadableArelaPaths.slice(
|
|
813
|
+
const arelaPathChunk = uploadableArelaPaths.slice(
|
|
814
|
+
i,
|
|
815
|
+
i + arelaPathChunkSize,
|
|
816
|
+
);
|
|
668
817
|
const chunkNumber = Math.floor(i / arelaPathChunkSize) + 1;
|
|
669
|
-
const totalChunks = Math.ceil(
|
|
670
|
-
|
|
671
|
-
|
|
818
|
+
const totalChunks = Math.ceil(
|
|
819
|
+
uploadableArelaPaths.length / arelaPathChunkSize,
|
|
820
|
+
);
|
|
821
|
+
|
|
822
|
+
console.log(
|
|
823
|
+
` Processing arela_path chunk ${chunkNumber}/${totalChunks} (${arelaPathChunk.length} paths)`,
|
|
824
|
+
);
|
|
672
825
|
|
|
673
826
|
// For each chunk of arela_paths, use pagination to get all related files
|
|
674
827
|
let hasMore = true;
|
|
@@ -715,14 +868,16 @@ export class DatabaseService {
|
|
|
715
868
|
|
|
716
869
|
for (let i = 0; i < allRelatedFiles.length; i += batchSize) {
|
|
717
870
|
const batch = allRelatedFiles.slice(i, i + batchSize);
|
|
718
|
-
|
|
871
|
+
|
|
719
872
|
for (const file of batch) {
|
|
720
873
|
try {
|
|
721
874
|
totalProcessed++;
|
|
722
|
-
|
|
875
|
+
|
|
723
876
|
// Check if file exists
|
|
724
877
|
if (!fs.existsSync(file.original_path)) {
|
|
725
|
-
logger.warn(
|
|
878
|
+
logger.warn(
|
|
879
|
+
`File not found: ${file.filename} at ${file.original_path}`,
|
|
880
|
+
);
|
|
726
881
|
await supabase
|
|
727
882
|
.from('uploader')
|
|
728
883
|
.update({
|
|
@@ -749,14 +904,19 @@ export class DatabaseService {
|
|
|
749
904
|
// Fallback to RFC folder
|
|
750
905
|
uploadPath = `uploads/${file.rfc}/${file.filename}`;
|
|
751
906
|
}
|
|
752
|
-
|
|
753
|
-
uploadResult = await uploadService.upload(
|
|
754
|
-
|
|
755
|
-
|
|
756
|
-
|
|
757
|
-
|
|
758
|
-
|
|
759
|
-
|
|
907
|
+
|
|
908
|
+
uploadResult = await uploadService.upload(
|
|
909
|
+
[
|
|
910
|
+
{
|
|
911
|
+
path: file.original_path,
|
|
912
|
+
name: file.filename,
|
|
913
|
+
contentType: 'application/octet-stream',
|
|
914
|
+
},
|
|
915
|
+
],
|
|
916
|
+
{
|
|
917
|
+
uploadPath: uploadPath,
|
|
918
|
+
},
|
|
919
|
+
);
|
|
760
920
|
uploadResult = { success: true, data: uploadResult };
|
|
761
921
|
} else {
|
|
762
922
|
// API service supports batch uploads and returns normalized response
|
|
@@ -771,14 +931,19 @@ export class DatabaseService {
|
|
|
771
931
|
// Fallback to RFC folder
|
|
772
932
|
fullFolderStructure = `${file.rfc}/`;
|
|
773
933
|
}
|
|
774
|
-
|
|
775
|
-
uploadResult = await uploadService.upload(
|
|
776
|
-
|
|
777
|
-
|
|
778
|
-
|
|
779
|
-
|
|
780
|
-
|
|
781
|
-
|
|
934
|
+
|
|
935
|
+
uploadResult = await uploadService.upload(
|
|
936
|
+
[
|
|
937
|
+
{
|
|
938
|
+
path: file.original_path,
|
|
939
|
+
name: file.filename,
|
|
940
|
+
contentType: 'application/octet-stream',
|
|
941
|
+
},
|
|
942
|
+
],
|
|
943
|
+
{
|
|
944
|
+
folderStructure: fullFolderStructure,
|
|
945
|
+
},
|
|
946
|
+
);
|
|
782
947
|
}
|
|
783
948
|
|
|
784
949
|
if (uploadResult.success) {
|
|
@@ -790,7 +955,7 @@ export class DatabaseService {
|
|
|
790
955
|
message: 'Successfully uploaded to Arela API',
|
|
791
956
|
})
|
|
792
957
|
.eq('id', file.id);
|
|
793
|
-
|
|
958
|
+
|
|
794
959
|
totalUploaded++;
|
|
795
960
|
logger.info(`Uploaded: ${file.filename}`);
|
|
796
961
|
} else {
|
|
@@ -801,14 +966,18 @@ export class DatabaseService {
|
|
|
801
966
|
message: uploadResult.error || 'Upload failed',
|
|
802
967
|
})
|
|
803
968
|
.eq('id', file.id);
|
|
804
|
-
|
|
969
|
+
|
|
805
970
|
totalErrors++;
|
|
806
|
-
logger.error(
|
|
971
|
+
logger.error(
|
|
972
|
+
`Upload failed: ${file.filename} - ${uploadResult.error}`,
|
|
973
|
+
);
|
|
807
974
|
}
|
|
808
975
|
} catch (error) {
|
|
809
976
|
totalErrors++;
|
|
810
|
-
logger.error(
|
|
811
|
-
|
|
977
|
+
logger.error(
|
|
978
|
+
`Error processing file ${file.filename}: ${error.message}`,
|
|
979
|
+
);
|
|
980
|
+
|
|
812
981
|
await supabase
|
|
813
982
|
.from('uploader')
|
|
814
983
|
.update({
|
|
@@ -859,15 +1028,21 @@ export class DatabaseService {
|
|
|
859
1028
|
// Check if UPLOAD_RFCS is configured
|
|
860
1029
|
const uploadRfcs = appConfig.upload.rfcs;
|
|
861
1030
|
if (!uploadRfcs || uploadRfcs.length === 0) {
|
|
862
|
-
console.log(
|
|
863
|
-
|
|
1031
|
+
console.log(
|
|
1032
|
+
'ℹ️ No UPLOAD_RFCS configured. Please set UPLOAD_RFCS environment variable to see files ready for upload.',
|
|
1033
|
+
);
|
|
1034
|
+
console.log(
|
|
1035
|
+
' Example: UPLOAD_RFCS="RFC123456789|RFC987654321|RFC555444333"',
|
|
1036
|
+
);
|
|
864
1037
|
return [];
|
|
865
1038
|
}
|
|
866
1039
|
|
|
867
1040
|
console.log(`🎯 Using RFC filter: ${uploadRfcs.join(', ')}`);
|
|
868
1041
|
|
|
869
1042
|
// Step 1: Find pedimento_simplificado documents for the specified RFCs that have arela_path
|
|
870
|
-
console.log(
|
|
1043
|
+
console.log(
|
|
1044
|
+
'🎯 Finding pedimento_simplificado documents for specified RFCs with arela_path...',
|
|
1045
|
+
);
|
|
871
1046
|
const { data: pedimentoRecords, error: pedimentoError } = await supabase
|
|
872
1047
|
.from('uploader')
|
|
873
1048
|
.select('arela_path')
|
|
@@ -876,17 +1051,25 @@ export class DatabaseService {
|
|
|
876
1051
|
.not('arela_path', 'is', null);
|
|
877
1052
|
|
|
878
1053
|
if (pedimentoError) {
|
|
879
|
-
throw new Error(
|
|
1054
|
+
throw new Error(
|
|
1055
|
+
`Error querying pedimento_simplificado records: ${pedimentoError.message}`,
|
|
1056
|
+
);
|
|
880
1057
|
}
|
|
881
1058
|
|
|
882
1059
|
if (!pedimentoRecords || pedimentoRecords.length === 0) {
|
|
883
|
-
console.log(
|
|
1060
|
+
console.log(
|
|
1061
|
+
'ℹ️ No pedimento_simplificado records with arela_path found',
|
|
1062
|
+
);
|
|
884
1063
|
return [];
|
|
885
1064
|
}
|
|
886
1065
|
|
|
887
1066
|
// Get unique arela_paths
|
|
888
|
-
const uniqueArelaPaths = [
|
|
889
|
-
|
|
1067
|
+
const uniqueArelaPaths = [
|
|
1068
|
+
...new Set(pedimentoRecords.map((r) => r.arela_path)),
|
|
1069
|
+
];
|
|
1070
|
+
console.log(
|
|
1071
|
+
`📋 Found ${pedimentoRecords.length} pedimento records with ${uniqueArelaPaths.length} unique arela_paths`,
|
|
1072
|
+
);
|
|
890
1073
|
|
|
891
1074
|
// Step 2: Find all related files with these arela_paths that haven't been uploaded yet
|
|
892
1075
|
console.log('🔍 Finding all related files that need to be uploaded...');
|
|
@@ -900,13 +1083,17 @@ export class DatabaseService {
|
|
|
900
1083
|
|
|
901
1084
|
const { data: chunkFiles, error: chunkError } = await supabase
|
|
902
1085
|
.from('uploader')
|
|
903
|
-
.select(
|
|
1086
|
+
.select(
|
|
1087
|
+
'id, original_path, arela_path, filename, rfc, document_type, status',
|
|
1088
|
+
)
|
|
904
1089
|
.in('arela_path', pathChunk)
|
|
905
1090
|
.neq('status', 'file-uploaded')
|
|
906
1091
|
.not('original_path', 'is', null);
|
|
907
1092
|
|
|
908
1093
|
if (chunkError) {
|
|
909
|
-
throw new Error(
|
|
1094
|
+
throw new Error(
|
|
1095
|
+
`Error querying files for arela_paths chunk: ${chunkError.message}`,
|
|
1096
|
+
);
|
|
910
1097
|
}
|
|
911
1098
|
|
|
912
1099
|
if (chunkFiles && chunkFiles.length > 0) {
|
|
@@ -950,4 +1137,4 @@ export class DatabaseService {
|
|
|
950
1137
|
|
|
951
1138
|
// Export singleton instance
|
|
952
1139
|
export const databaseService = new DatabaseService();
|
|
953
|
-
export default databaseService;
|
|
1140
|
+
export default databaseService;
|