@arela/uploader 0.2.5 → 0.2.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +2 -2
- package/src/commands/UploadCommand.js +102 -44
- package/src/config/config.js +15 -10
- package/src/errors/ErrorHandler.js +38 -31
- package/src/errors/ErrorTypes.js +2 -2
- package/src/index-old.js +25 -17
- package/src/index.js +124 -49
- package/src/services/DatabaseService.js +403 -191
- package/src/services/LoggingService.js +3 -3
- package/src/services/upload/ApiUploadService.js +16 -10
- package/src/services/upload/BaseUploadService.js +1 -1
- package/src/services/upload/SupabaseUploadService.js +22 -3
- package/src/services/upload/UploadServiceFactory.js +14 -6
- package/src/utils/FileOperations.js +1 -1
- package/src/utils/FileSanitizer.js +1 -1
- package/src/utils/PathDetector.js +9 -7
|
@@ -25,6 +25,51 @@ export class DatabaseService {
|
|
|
25
25
|
return await supabaseService.getClient();
|
|
26
26
|
}
|
|
27
27
|
|
|
28
|
+
/**
|
|
29
|
+
* Execute database query with retry logic and exponential backoff
|
|
30
|
+
* @private
|
|
31
|
+
* @param {Function} queryFn - Query function to execute
|
|
32
|
+
* @param {string} operation - Description of the operation for logging
|
|
33
|
+
* @param {number} maxRetries - Maximum number of retry attempts (default: 3)
|
|
34
|
+
* @returns {Promise<Object>} Query result
|
|
35
|
+
*/
|
|
36
|
+
async #queryWithRetry(queryFn, operation, maxRetries = 3) {
|
|
37
|
+
let lastError;
|
|
38
|
+
|
|
39
|
+
for (let attempt = 1; attempt <= maxRetries; attempt++) {
|
|
40
|
+
try {
|
|
41
|
+
const result = await queryFn();
|
|
42
|
+
if (attempt > 1) {
|
|
43
|
+
logger.info(`${operation} succeeded on attempt ${attempt}`);
|
|
44
|
+
}
|
|
45
|
+
return result;
|
|
46
|
+
} catch (error) {
|
|
47
|
+
lastError = error;
|
|
48
|
+
|
|
49
|
+
// Check if it's a timeout or connection error
|
|
50
|
+
const isRetriableError =
|
|
51
|
+
error.message?.includes('timeout') ||
|
|
52
|
+
error.message?.includes('canceling statement') ||
|
|
53
|
+
error.message?.includes('connection') ||
|
|
54
|
+
error.code === 'PGRST301'; // PostgREST timeout
|
|
55
|
+
|
|
56
|
+
if (!isRetriableError || attempt === maxRetries) {
|
|
57
|
+
throw error;
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
const backoffDelay = Math.min(1000 * Math.pow(2, attempt - 1), 30000); // Cap at 30 seconds
|
|
61
|
+
logger.warn(
|
|
62
|
+
`${operation} failed on attempt ${attempt}/${maxRetries}: ${error.message}`,
|
|
63
|
+
);
|
|
64
|
+
logger.info(`Retrying in ${backoffDelay}ms...`);
|
|
65
|
+
|
|
66
|
+
await new Promise((resolve) => setTimeout(resolve, backoffDelay));
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
throw lastError;
|
|
71
|
+
}
|
|
72
|
+
|
|
28
73
|
/**
|
|
29
74
|
* Insert file stats with document detection into uploader table
|
|
30
75
|
* @param {Array} files - Array of file objects
|
|
@@ -47,7 +92,9 @@ export class DatabaseService {
|
|
|
47
92
|
.limit(1);
|
|
48
93
|
|
|
49
94
|
if (checkError) {
|
|
50
|
-
logger.error(
|
|
95
|
+
logger.error(
|
|
96
|
+
`Error checking for existing record: ${checkError.message}`,
|
|
97
|
+
);
|
|
51
98
|
continue;
|
|
52
99
|
}
|
|
53
100
|
|
|
@@ -57,16 +104,28 @@ export class DatabaseService {
|
|
|
57
104
|
}
|
|
58
105
|
|
|
59
106
|
// Initialize record with basic file stats
|
|
107
|
+
const fileExtension = path
|
|
108
|
+
.extname(file.path)
|
|
109
|
+
.toLowerCase()
|
|
110
|
+
.replace('.', '');
|
|
111
|
+
const filename = file.originalName || path.basename(file.path);
|
|
112
|
+
|
|
60
113
|
const record = {
|
|
61
114
|
document_type: null,
|
|
62
115
|
size: stats.size,
|
|
63
116
|
num_pedimento: null,
|
|
64
|
-
filename:
|
|
117
|
+
filename: filename,
|
|
65
118
|
original_path: originalPath,
|
|
66
119
|
arela_path: null,
|
|
67
120
|
status: 'stats',
|
|
68
121
|
rfc: null,
|
|
69
122
|
message: null,
|
|
123
|
+
file_extension: fileExtension,
|
|
124
|
+
is_like_simplificado: fileExtension === 'pdf' &&
|
|
125
|
+
filename.toLowerCase().includes('simp'),
|
|
126
|
+
year: null,
|
|
127
|
+
created_at: new Date().toISOString(),
|
|
128
|
+
modified_at: stats.mtime.toISOString(),
|
|
70
129
|
};
|
|
71
130
|
|
|
72
131
|
// Try to detect document type for supported files
|
|
@@ -83,6 +142,10 @@ export class DatabaseService {
|
|
|
83
142
|
record.arela_path = detection.arelaPath;
|
|
84
143
|
}
|
|
85
144
|
|
|
145
|
+
if (detection.detectedPedimentoYear) {
|
|
146
|
+
record.year = detection.detectedPedimentoYear;
|
|
147
|
+
}
|
|
148
|
+
|
|
86
149
|
const rfcField = detection.fields.find(
|
|
87
150
|
(f) => f.name === 'rfc' && f.found,
|
|
88
151
|
);
|
|
@@ -113,7 +176,9 @@ export class DatabaseService {
|
|
|
113
176
|
return [];
|
|
114
177
|
}
|
|
115
178
|
|
|
116
|
-
logger.info(
|
|
179
|
+
logger.info(
|
|
180
|
+
`Inserting ${records.length} new records into uploader table...`,
|
|
181
|
+
);
|
|
117
182
|
|
|
118
183
|
const { data, error } = await supabase
|
|
119
184
|
.from('uploader')
|
|
@@ -161,6 +226,9 @@ export class DatabaseService {
|
|
|
161
226
|
file_extension: fileExtension,
|
|
162
227
|
created_at: new Date().toISOString(),
|
|
163
228
|
modified_at: stats.mtime.toISOString(),
|
|
229
|
+
is_like_simplificado: fileExtension === 'pdf' &&
|
|
230
|
+
(file.originalName || path.basename(file.path)).toLowerCase().includes('simp'),
|
|
231
|
+
year: null,
|
|
164
232
|
};
|
|
165
233
|
|
|
166
234
|
allRecords.push(record);
|
|
@@ -174,7 +242,9 @@ export class DatabaseService {
|
|
|
174
242
|
return { totalInserted: 0, totalSkipped: 0, totalProcessed: 0 };
|
|
175
243
|
}
|
|
176
244
|
|
|
177
|
-
logger.info(
|
|
245
|
+
logger.info(
|
|
246
|
+
`Processing ${allRecords.length} file stats in batches of ${batchSize}...`,
|
|
247
|
+
);
|
|
178
248
|
|
|
179
249
|
let totalInserted = 0;
|
|
180
250
|
let totalUpdated = 0;
|
|
@@ -184,20 +254,28 @@ export class DatabaseService {
|
|
|
184
254
|
|
|
185
255
|
try {
|
|
186
256
|
// Check which records already exist
|
|
187
|
-
const originalPaths = batch.map(r => r.original_path);
|
|
257
|
+
const originalPaths = batch.map((r) => r.original_path);
|
|
188
258
|
const { data: existingRecords, error: checkError } = await supabase
|
|
189
259
|
.from('uploader')
|
|
190
260
|
.select('original_path')
|
|
191
261
|
.in('original_path', originalPaths);
|
|
192
262
|
|
|
193
263
|
if (checkError) {
|
|
194
|
-
logger.error(
|
|
264
|
+
logger.error(
|
|
265
|
+
`Error checking existing records: ${checkError.message}`,
|
|
266
|
+
);
|
|
195
267
|
continue;
|
|
196
268
|
}
|
|
197
269
|
|
|
198
|
-
const existingPaths = new Set(
|
|
199
|
-
|
|
200
|
-
|
|
270
|
+
const existingPaths = new Set(
|
|
271
|
+
existingRecords?.map((r) => r.original_path) || [],
|
|
272
|
+
);
|
|
273
|
+
const newRecords = batch.filter(
|
|
274
|
+
(r) => !existingPaths.has(r.original_path),
|
|
275
|
+
);
|
|
276
|
+
const updateRecords = batch.filter((r) =>
|
|
277
|
+
existingPaths.has(r.original_path),
|
|
278
|
+
);
|
|
201
279
|
|
|
202
280
|
logger.info(
|
|
203
281
|
`Batch ${Math.floor(i / batchSize) + 1}: ${newRecords.length} new, ${updateRecords.length} updates`,
|
|
@@ -228,6 +306,8 @@ export class DatabaseService {
|
|
|
228
306
|
modified_at: record.modified_at,
|
|
229
307
|
filename: record.filename,
|
|
230
308
|
file_extension: record.file_extension,
|
|
309
|
+
is_like_simplificado: record.is_like_simplificado,
|
|
310
|
+
year: record.year,
|
|
231
311
|
})
|
|
232
312
|
.eq('original_path', record.original_path);
|
|
233
313
|
|
|
@@ -238,7 +318,6 @@ export class DatabaseService {
|
|
|
238
318
|
totalUpdated += batchUpdated;
|
|
239
319
|
logger.info(`Updated ${batchUpdated} existing records`);
|
|
240
320
|
}
|
|
241
|
-
|
|
242
321
|
} catch (error) {
|
|
243
322
|
logger.error(
|
|
244
323
|
`Unexpected error in batch ${Math.floor(i / batchSize) + 1}: ${error.message}`,
|
|
@@ -264,11 +343,14 @@ export class DatabaseService {
|
|
|
264
343
|
*/
|
|
265
344
|
async detectPedimentosInDatabase(options = {}) {
|
|
266
345
|
const supabase = await this.#getSupabaseClient();
|
|
267
|
-
|
|
268
|
-
logger.info(
|
|
269
|
-
|
|
346
|
+
|
|
347
|
+
logger.info(
|
|
348
|
+
'Phase 2: Starting PDF detection for pedimento-simplificado documents...',
|
|
349
|
+
);
|
|
350
|
+
|
|
270
351
|
const processingBatchSize = parseInt(options.batchSize) || 10;
|
|
271
|
-
|
|
352
|
+
// Reduced query batch size to avoid timeouts
|
|
353
|
+
const queryBatchSize = 100; // Reduced from 500 to 100
|
|
272
354
|
|
|
273
355
|
let totalDetected = 0;
|
|
274
356
|
let totalProcessed = 0;
|
|
@@ -276,135 +358,169 @@ export class DatabaseService {
|
|
|
276
358
|
let offset = 0;
|
|
277
359
|
let chunkNumber = 1;
|
|
278
360
|
|
|
279
|
-
logger.info(
|
|
361
|
+
logger.info(
|
|
362
|
+
`Processing PDF files in chunks of ${queryBatchSize} records...`,
|
|
363
|
+
);
|
|
280
364
|
|
|
281
365
|
while (true) {
|
|
282
366
|
logger.info(
|
|
283
367
|
`Fetching chunk ${chunkNumber} (records ${offset + 1} to ${offset + queryBatchSize})...`,
|
|
284
368
|
);
|
|
285
369
|
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
|
|
291
|
-
|
|
292
|
-
|
|
370
|
+
try {
|
|
371
|
+
// Split the query to make it more efficient with retry logic
|
|
372
|
+
const { data: pdfRecords, error: queryError } =
|
|
373
|
+
await this.#queryWithRetry(async () => {
|
|
374
|
+
return await supabase
|
|
375
|
+
.from('uploader')
|
|
376
|
+
.select('id, original_path, filename, file_extension, status')
|
|
377
|
+
.eq('status', 'fs-stats')
|
|
378
|
+
.eq('file_extension', 'pdf')
|
|
379
|
+
.eq('is_like_simplificado', true)
|
|
380
|
+
.range(offset, offset + queryBatchSize - 1)
|
|
381
|
+
.order('created_at');
|
|
382
|
+
}, `fetch PDF records chunk ${chunkNumber}`);
|
|
293
383
|
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
384
|
+
if (queryError) {
|
|
385
|
+
throw new Error(
|
|
386
|
+
`Failed to fetch PDF records chunk ${chunkNumber}: ${queryError.message}`,
|
|
387
|
+
);
|
|
388
|
+
}
|
|
299
389
|
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
390
|
+
if (!pdfRecords || pdfRecords.length === 0) {
|
|
391
|
+
logger.info('No more PDF files found. Processing completed.');
|
|
392
|
+
break;
|
|
393
|
+
}
|
|
304
394
|
|
|
305
|
-
|
|
395
|
+
logger.info(
|
|
396
|
+
`Processing chunk ${chunkNumber}: ${pdfRecords.length} PDF records`,
|
|
397
|
+
);
|
|
306
398
|
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
399
|
+
let chunkDetected = 0;
|
|
400
|
+
let chunkProcessed = 0;
|
|
401
|
+
let chunkErrors = 0;
|
|
402
|
+
|
|
403
|
+
// Process files in smaller batches
|
|
404
|
+
for (let i = 0; i < pdfRecords.length; i += processingBatchSize) {
|
|
405
|
+
const batch = pdfRecords.slice(i, i + processingBatchSize);
|
|
406
|
+
const updatePromises = [];
|
|
407
|
+
|
|
408
|
+
for (const record of batch) {
|
|
409
|
+
try {
|
|
410
|
+
if (!fs.existsSync(record.original_path)) {
|
|
411
|
+
logger.warn(
|
|
412
|
+
`File not found: ${record.filename} at ${record.original_path}`,
|
|
413
|
+
);
|
|
414
|
+
updatePromises.push(
|
|
415
|
+
supabase
|
|
416
|
+
.from('uploader')
|
|
417
|
+
.update({
|
|
418
|
+
status: 'file-not-found',
|
|
419
|
+
message: 'File no longer exists at original path',
|
|
420
|
+
})
|
|
421
|
+
.eq('id', record.id),
|
|
422
|
+
);
|
|
423
|
+
chunkErrors++;
|
|
424
|
+
totalErrors++;
|
|
425
|
+
continue;
|
|
426
|
+
}
|
|
310
427
|
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
428
|
+
const detection = await this.detectionService.detectFile(
|
|
429
|
+
record.original_path,
|
|
430
|
+
);
|
|
431
|
+
chunkProcessed++;
|
|
432
|
+
totalProcessed++;
|
|
433
|
+
|
|
434
|
+
const updateData = {
|
|
435
|
+
status: detection.detectedType ? 'detected' : 'not-detected',
|
|
436
|
+
document_type: detection.detectedType,
|
|
437
|
+
num_pedimento: detection.detectedPedimento,
|
|
438
|
+
arela_path: detection.arelaPath,
|
|
439
|
+
message: detection.error || null,
|
|
440
|
+
year: detection.detectedPedimentoYear || null,
|
|
441
|
+
};
|
|
442
|
+
|
|
443
|
+
if (detection.fields) {
|
|
444
|
+
const rfcField = detection.fields.find(
|
|
445
|
+
(f) => f.name === 'rfc' && f.found,
|
|
446
|
+
);
|
|
447
|
+
if (rfcField) {
|
|
448
|
+
updateData.rfc = rfcField.value;
|
|
449
|
+
}
|
|
450
|
+
}
|
|
451
|
+
|
|
452
|
+
if (detection.detectedType) {
|
|
453
|
+
chunkDetected++;
|
|
454
|
+
totalDetected++;
|
|
455
|
+
logger.success(
|
|
456
|
+
`Detected: ${record.filename} -> ${detection.detectedType} | Pedimento: ${detection.detectedPedimento || 'N/A'} | RFC: ${detection.fields?.rfc || 'N/A'}`,
|
|
457
|
+
);
|
|
458
|
+
} else {
|
|
459
|
+
logger.info(
|
|
460
|
+
`Not detected: ${record.filename} - No pedimento-simplificado pattern found`,
|
|
461
|
+
);
|
|
462
|
+
}
|
|
315
463
|
|
|
316
|
-
for (const record of batch) {
|
|
317
|
-
try {
|
|
318
|
-
if (!fs.existsSync(record.original_path)) {
|
|
319
|
-
logger.warn(`File not found: ${record.filename} at ${record.original_path}`);
|
|
320
464
|
updatePromises.push(
|
|
321
465
|
supabase
|
|
322
466
|
.from('uploader')
|
|
323
|
-
.update(
|
|
324
|
-
status: 'file-not-found',
|
|
325
|
-
message: 'File no longer exists at original path',
|
|
326
|
-
})
|
|
467
|
+
.update(updateData)
|
|
327
468
|
.eq('id', record.id),
|
|
328
469
|
);
|
|
470
|
+
} catch (error) {
|
|
471
|
+
logger.error(
|
|
472
|
+
`Error detecting ${record.filename}: ${error.message}`,
|
|
473
|
+
);
|
|
329
474
|
chunkErrors++;
|
|
330
475
|
totalErrors++;
|
|
331
|
-
continue;
|
|
332
|
-
}
|
|
333
|
-
|
|
334
|
-
const detection = await this.detectionService.detectFile(record.original_path);
|
|
335
|
-
chunkProcessed++;
|
|
336
|
-
totalProcessed++;
|
|
337
|
-
|
|
338
|
-
const updateData = {
|
|
339
|
-
status: detection.detectedType ? 'detected' : 'not-detected',
|
|
340
|
-
document_type: detection.detectedType,
|
|
341
|
-
num_pedimento: detection.detectedPedimento,
|
|
342
|
-
arela_path: detection.arelaPath,
|
|
343
|
-
message: detection.error || null,
|
|
344
|
-
};
|
|
345
|
-
|
|
346
|
-
if (detection.fields) {
|
|
347
|
-
const rfcField = detection.fields.find(
|
|
348
|
-
(f) => f.name === 'rfc' && f.found,
|
|
349
|
-
);
|
|
350
|
-
if (rfcField) {
|
|
351
|
-
updateData.rfc = rfcField.value;
|
|
352
|
-
}
|
|
353
|
-
}
|
|
354
476
|
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
`Not detected: ${record.filename} - No pedimento-simplificado pattern found`,
|
|
477
|
+
updatePromises.push(
|
|
478
|
+
supabase
|
|
479
|
+
.from('uploader')
|
|
480
|
+
.update({
|
|
481
|
+
status: 'detection-error',
|
|
482
|
+
message: error.message,
|
|
483
|
+
})
|
|
484
|
+
.eq('id', record.id),
|
|
364
485
|
);
|
|
365
486
|
}
|
|
487
|
+
}
|
|
366
488
|
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
);
|
|
489
|
+
try {
|
|
490
|
+
await Promise.all(updatePromises);
|
|
370
491
|
} catch (error) {
|
|
371
|
-
logger.error(
|
|
372
|
-
|
|
373
|
-
totalErrors++;
|
|
374
|
-
|
|
375
|
-
updatePromises.push(
|
|
376
|
-
supabase
|
|
377
|
-
.from('uploader')
|
|
378
|
-
.update({
|
|
379
|
-
status: 'detection-error',
|
|
380
|
-
message: error.message,
|
|
381
|
-
})
|
|
382
|
-
.eq('id', record.id),
|
|
492
|
+
logger.error(
|
|
493
|
+
`Error updating batch in chunk ${chunkNumber}: ${error.message}`,
|
|
383
494
|
);
|
|
384
495
|
}
|
|
385
496
|
}
|
|
386
497
|
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
logger.error(`Error updating batch in chunk ${chunkNumber}: ${error.message}`);
|
|
391
|
-
}
|
|
392
|
-
}
|
|
498
|
+
logger.success(
|
|
499
|
+
`Chunk ${chunkNumber} completed: ${chunkDetected} detected, ${chunkProcessed} processed, ${chunkErrors} errors`,
|
|
500
|
+
);
|
|
393
501
|
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
);
|
|
502
|
+
offset += queryBatchSize;
|
|
503
|
+
chunkNumber++;
|
|
397
504
|
|
|
398
|
-
|
|
399
|
-
|
|
505
|
+
if (pdfRecords.length < queryBatchSize) {
|
|
506
|
+
logger.info(
|
|
507
|
+
`Reached end of records (chunk had ${pdfRecords.length} records).`,
|
|
508
|
+
);
|
|
509
|
+
break;
|
|
510
|
+
}
|
|
400
511
|
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
512
|
+
// Small delay between chunks
|
|
513
|
+
await new Promise((resolve) => setTimeout(resolve, 500));
|
|
514
|
+
} catch (chunkError) {
|
|
515
|
+
logger.error(
|
|
516
|
+
`Error processing chunk ${chunkNumber}: ${chunkError.message}`,
|
|
517
|
+
);
|
|
518
|
+
// Continue to next chunk after error
|
|
519
|
+
offset += queryBatchSize;
|
|
520
|
+
chunkNumber++;
|
|
521
|
+
totalErrors++;
|
|
522
|
+
continue;
|
|
404
523
|
}
|
|
405
|
-
|
|
406
|
-
// Small delay between chunks
|
|
407
|
-
await new Promise((resolve) => setTimeout(resolve, 500));
|
|
408
524
|
}
|
|
409
525
|
|
|
410
526
|
const result = {
|
|
@@ -427,14 +543,14 @@ export class DatabaseService {
|
|
|
427
543
|
*/
|
|
428
544
|
async propagateArelaPath(options = {}) {
|
|
429
545
|
const supabase = await this.#getSupabaseClient();
|
|
430
|
-
|
|
431
|
-
logger.info('Phase 3: Starting arela_path propagation process...');
|
|
432
|
-
console.log('🔍 Finding pedimento_simplificado records with arela_path...');
|
|
546
|
+
|
|
547
|
+
logger.info('Phase 3: Starting arela_path and year propagation process...');
|
|
548
|
+
console.log('🔍 Finding pedimento_simplificado records with arela_path and year...');
|
|
433
549
|
|
|
434
550
|
// Get all pedimento_simplificado records that have arela_path
|
|
435
551
|
const { data: pedimentoRecords, error: pedimentoError } = await supabase
|
|
436
552
|
.from('uploader')
|
|
437
|
-
.select('id, original_path, arela_path, filename')
|
|
553
|
+
.select('id, original_path, arela_path, filename, year')
|
|
438
554
|
.eq('document_type', 'pedimento_simplificado')
|
|
439
555
|
.not('arela_path', 'is', null);
|
|
440
556
|
|
|
@@ -446,12 +562,18 @@ export class DatabaseService {
|
|
|
446
562
|
|
|
447
563
|
if (!pedimentoRecords || pedimentoRecords.length === 0) {
|
|
448
564
|
logger.info('No pedimento_simplificado records with arela_path found');
|
|
449
|
-
console.log(
|
|
565
|
+
console.log(
|
|
566
|
+
'ℹ️ No pedimento_simplificado records with arela_path found',
|
|
567
|
+
);
|
|
450
568
|
return { processedCount: 0, updatedCount: 0, errorCount: 0 };
|
|
451
569
|
}
|
|
452
570
|
|
|
453
|
-
console.log(
|
|
454
|
-
|
|
571
|
+
console.log(
|
|
572
|
+
`📋 Found ${pedimentoRecords.length} pedimento records with arela_path`,
|
|
573
|
+
);
|
|
574
|
+
logger.info(
|
|
575
|
+
`Found ${pedimentoRecords.length} pedimento records with arela_path to process`,
|
|
576
|
+
);
|
|
455
577
|
|
|
456
578
|
let totalProcessed = 0;
|
|
457
579
|
let totalUpdated = 0;
|
|
@@ -465,8 +587,10 @@ export class DatabaseService {
|
|
|
465
587
|
|
|
466
588
|
// Extract base path from original_path (remove filename)
|
|
467
589
|
const basePath = path.dirname(pedimento.original_path);
|
|
468
|
-
|
|
469
|
-
logger.info(
|
|
590
|
+
|
|
591
|
+
logger.info(
|
|
592
|
+
`Processing pedimento: ${pedimento.filename} | Base path: ${basePath} | Year: ${pedimento.year || 'N/A'}`,
|
|
593
|
+
);
|
|
470
594
|
|
|
471
595
|
// Extract folder part from existing arela_path
|
|
472
596
|
const existingPath = pedimento.arela_path;
|
|
@@ -485,7 +609,9 @@ export class DatabaseService {
|
|
|
485
609
|
.neq('id', pedimento.id); // Exclude the pedimento itself
|
|
486
610
|
|
|
487
611
|
if (relatedError) {
|
|
488
|
-
logger.error(
|
|
612
|
+
logger.error(
|
|
613
|
+
`Error finding related files for ${pedimento.filename}: ${relatedError.message}`,
|
|
614
|
+
);
|
|
489
615
|
totalErrors++;
|
|
490
616
|
continue;
|
|
491
617
|
}
|
|
@@ -495,38 +621,53 @@ export class DatabaseService {
|
|
|
495
621
|
continue;
|
|
496
622
|
}
|
|
497
623
|
|
|
498
|
-
logger.info(
|
|
624
|
+
logger.info(
|
|
625
|
+
`Found ${relatedFiles.length} related files to update for ${pedimento.filename}`,
|
|
626
|
+
);
|
|
499
627
|
|
|
500
628
|
// Process files in batches
|
|
501
629
|
const fileIds = relatedFiles.map((f) => f.id);
|
|
502
|
-
|
|
630
|
+
|
|
503
631
|
for (let i = 0; i < fileIds.length; i += BATCH_SIZE) {
|
|
504
632
|
const batchIds = fileIds.slice(i, i + BATCH_SIZE);
|
|
505
633
|
const batchNumber = Math.floor(i / BATCH_SIZE) + 1;
|
|
506
634
|
const totalBatches = Math.ceil(fileIds.length / BATCH_SIZE);
|
|
507
635
|
|
|
508
|
-
logger.info(
|
|
636
|
+
logger.info(
|
|
637
|
+
`Batch ${batchNumber}/${totalBatches}: Updating ${batchIds.length} files...`,
|
|
638
|
+
);
|
|
509
639
|
|
|
510
640
|
try {
|
|
511
641
|
const { error: updateError } = await supabase
|
|
512
642
|
.from('uploader')
|
|
513
|
-
.update({
|
|
643
|
+
.update({
|
|
644
|
+
arela_path: folderArelaPath,
|
|
645
|
+
year: pedimento.year
|
|
646
|
+
})
|
|
514
647
|
.in('id', batchIds);
|
|
515
648
|
|
|
516
649
|
if (updateError) {
|
|
517
|
-
logger.error(
|
|
650
|
+
logger.error(
|
|
651
|
+
`Error in batch ${batchNumber}: ${updateError.message}`,
|
|
652
|
+
);
|
|
518
653
|
totalErrors++;
|
|
519
654
|
} else {
|
|
520
655
|
totalUpdated += batchIds.length;
|
|
521
|
-
logger.info(
|
|
656
|
+
logger.info(
|
|
657
|
+
`Successfully updated batch ${batchNumber}: ${batchIds.length} files with arela_path and year`,
|
|
658
|
+
);
|
|
522
659
|
}
|
|
523
660
|
} catch (batchError) {
|
|
524
|
-
logger.error(
|
|
661
|
+
logger.error(
|
|
662
|
+
`Exception in batch ${batchNumber}: ${batchError.message}`,
|
|
663
|
+
);
|
|
525
664
|
totalErrors++;
|
|
526
665
|
}
|
|
527
666
|
}
|
|
528
667
|
} catch (error) {
|
|
529
|
-
logger.error(
|
|
668
|
+
logger.error(
|
|
669
|
+
`Error processing pedimento ${pedimento.filename}: ${error.message}`,
|
|
670
|
+
);
|
|
530
671
|
totalErrors++;
|
|
531
672
|
}
|
|
532
673
|
}
|
|
@@ -538,7 +679,7 @@ export class DatabaseService {
|
|
|
538
679
|
};
|
|
539
680
|
|
|
540
681
|
logger.success(
|
|
541
|
-
`Phase 3 Summary: ${totalProcessed} pedimentos processed, ${totalUpdated} files updated, ${totalErrors} errors`,
|
|
682
|
+
`Phase 3 Summary: ${totalProcessed} pedimentos processed, ${totalUpdated} files updated with arela_path and year, ${totalErrors} errors`,
|
|
542
683
|
);
|
|
543
684
|
|
|
544
685
|
return result;
|
|
@@ -552,12 +693,15 @@ export class DatabaseService {
|
|
|
552
693
|
async uploadFilesByRfc(options = {}) {
|
|
553
694
|
const supabase = await this.#getSupabaseClient();
|
|
554
695
|
const uploadService = await uploadServiceFactory.getUploadService();
|
|
555
|
-
|
|
696
|
+
|
|
556
697
|
// Get configuration
|
|
557
|
-
const appConfig = await import('../config/config.js').then(
|
|
558
|
-
|
|
698
|
+
const appConfig = await import('../config/config.js').then(
|
|
699
|
+
(m) => m.appConfig,
|
|
700
|
+
);
|
|
701
|
+
|
|
559
702
|
if (!appConfig.upload.rfcs || appConfig.upload.rfcs.length === 0) {
|
|
560
|
-
const errorMsg =
|
|
703
|
+
const errorMsg =
|
|
704
|
+
'No RFCs specified. Please set UPLOAD_RFCS environment variable with pipe-separated RFC values.';
|
|
561
705
|
logger.error(errorMsg);
|
|
562
706
|
throw new Error(errorMsg);
|
|
563
707
|
}
|
|
@@ -582,13 +726,16 @@ export class DatabaseService {
|
|
|
582
726
|
}
|
|
583
727
|
|
|
584
728
|
// Step 1: Get all pedimento_simplificado records that match the specified RFCs and have arela_path
|
|
585
|
-
console.log(
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
|
|
591
|
-
|
|
729
|
+
console.log(
|
|
730
|
+
'🎯 Finding pedimento_simplificado records for specified RFCs...',
|
|
731
|
+
);
|
|
732
|
+
const { data: pedimentoRfcRecords, error: pedimentoRfcError } =
|
|
733
|
+
await supabase
|
|
734
|
+
.from('uploader')
|
|
735
|
+
.select('arela_path')
|
|
736
|
+
.eq('document_type', 'pedimento_simplificado')
|
|
737
|
+
.in('rfc', appConfig.upload.rfcs)
|
|
738
|
+
.not('arela_path', 'is', null);
|
|
592
739
|
|
|
593
740
|
if (pedimentoRfcError) {
|
|
594
741
|
const errorMsg = `Error fetching pedimento RFC records: ${pedimentoRfcError.message}`;
|
|
@@ -597,15 +744,23 @@ export class DatabaseService {
|
|
|
597
744
|
}
|
|
598
745
|
|
|
599
746
|
if (!pedimentoRfcRecords || pedimentoRfcRecords.length === 0) {
|
|
600
|
-
console.log(
|
|
747
|
+
console.log(
|
|
748
|
+
'ℹ️ No pedimento_simplificado records found for the specified RFCs with arela_path',
|
|
749
|
+
);
|
|
601
750
|
logger.info('No pedimento_simplificado records found for specified RFCs');
|
|
602
751
|
return { processedCount: 0, uploadedCount: 0, errorCount: 0 };
|
|
603
752
|
}
|
|
604
753
|
|
|
605
754
|
// Get unique arela_paths from pedimento records
|
|
606
|
-
const uniqueArelaPaths = [
|
|
607
|
-
|
|
608
|
-
|
|
755
|
+
const uniqueArelaPaths = [
|
|
756
|
+
...new Set(pedimentoRfcRecords.map((r) => r.arela_path)),
|
|
757
|
+
];
|
|
758
|
+
console.log(
|
|
759
|
+
`📋 Found ${pedimentoRfcRecords.length} pedimento records with ${uniqueArelaPaths.length} unique arela_paths for specified RFCs`,
|
|
760
|
+
);
|
|
761
|
+
logger.info(
|
|
762
|
+
`Found ${pedimentoRfcRecords.length} pedimento records with ${uniqueArelaPaths.length} unique arela_paths`,
|
|
763
|
+
);
|
|
609
764
|
|
|
610
765
|
// Step 2: Get all files with these arela_paths that haven't been uploaded yet
|
|
611
766
|
let rfcRecords = [];
|
|
@@ -634,41 +789,64 @@ export class DatabaseService {
|
|
|
634
789
|
|
|
635
790
|
if (!rfcRecords || rfcRecords.length === 0) {
|
|
636
791
|
if (totalRfcFiles && totalRfcFiles > 0) {
|
|
637
|
-
console.log(
|
|
792
|
+
console.log(
|
|
793
|
+
`ℹ️ All ${totalRfcFiles} files for the specified RFCs are already uploaded (status: file-uploaded)`,
|
|
794
|
+
);
|
|
638
795
|
console.log(' No new files to upload.');
|
|
639
|
-
logger.info(
|
|
796
|
+
logger.info(
|
|
797
|
+
`All ${totalRfcFiles} files for specified RFCs already uploaded`,
|
|
798
|
+
);
|
|
640
799
|
} else {
|
|
641
|
-
console.log(
|
|
642
|
-
|
|
800
|
+
console.log(
|
|
801
|
+
'ℹ️ No files found for the specified RFCs with arela_path',
|
|
802
|
+
);
|
|
803
|
+
console.log(
|
|
804
|
+
` Make sure files for RFCs [${appConfig.upload.rfcs.join(', ')}] have been processed and have arela_path values`,
|
|
805
|
+
);
|
|
643
806
|
logger.info('No files found for specified RFCs with arela_path');
|
|
644
807
|
}
|
|
645
808
|
return { processedCount: 0, uploadedCount: 0, errorCount: 0 };
|
|
646
809
|
}
|
|
647
810
|
|
|
648
811
|
// Show filtering effect
|
|
649
|
-
const uploadableArelaPaths = [
|
|
812
|
+
const uploadableArelaPaths = [
|
|
813
|
+
...new Set(rfcRecords.map((r) => r.arela_path)),
|
|
814
|
+
];
|
|
650
815
|
const skipped = (totalRfcFiles || 0) - rfcRecords.length;
|
|
651
816
|
if (skipped > 0) {
|
|
652
|
-
console.log(
|
|
817
|
+
console.log(
|
|
818
|
+
`📊 Found ${rfcRecords.length} files ready for upload (${skipped} already uploaded, skipped)`,
|
|
819
|
+
);
|
|
653
820
|
} else {
|
|
654
821
|
console.log(`📊 Found ${rfcRecords.length} files ready for upload`);
|
|
655
822
|
}
|
|
656
|
-
logger.info(
|
|
823
|
+
logger.info(
|
|
824
|
+
`Found ${rfcRecords.length} files ready for upload, ${skipped} skipped`,
|
|
825
|
+
);
|
|
657
826
|
|
|
658
827
|
// Step 3: Get ALL files that have these arela_paths (including supporting documents)
|
|
659
828
|
let allRelatedFiles = [];
|
|
660
829
|
const arelaPathChunkSize = 50;
|
|
661
830
|
const queryBatchSize = 1000;
|
|
662
831
|
|
|
663
|
-
console.log(
|
|
832
|
+
console.log(
|
|
833
|
+
'📥 Fetching all related files (processing arela_paths in chunks to avoid URI limits)...',
|
|
834
|
+
);
|
|
664
835
|
|
|
665
836
|
// Process arela_paths in chunks
|
|
666
837
|
for (let i = 0; i < uploadableArelaPaths.length; i += arelaPathChunkSize) {
|
|
667
|
-
const arelaPathChunk = uploadableArelaPaths.slice(
|
|
838
|
+
const arelaPathChunk = uploadableArelaPaths.slice(
|
|
839
|
+
i,
|
|
840
|
+
i + arelaPathChunkSize,
|
|
841
|
+
);
|
|
668
842
|
const chunkNumber = Math.floor(i / arelaPathChunkSize) + 1;
|
|
669
|
-
const totalChunks = Math.ceil(
|
|
670
|
-
|
|
671
|
-
|
|
843
|
+
const totalChunks = Math.ceil(
|
|
844
|
+
uploadableArelaPaths.length / arelaPathChunkSize,
|
|
845
|
+
);
|
|
846
|
+
|
|
847
|
+
console.log(
|
|
848
|
+
` Processing arela_path chunk ${chunkNumber}/${totalChunks} (${arelaPathChunk.length} paths)`,
|
|
849
|
+
);
|
|
672
850
|
|
|
673
851
|
// For each chunk of arela_paths, use pagination to get all related files
|
|
674
852
|
let hasMore = true;
|
|
@@ -715,14 +893,16 @@ export class DatabaseService {
|
|
|
715
893
|
|
|
716
894
|
for (let i = 0; i < allRelatedFiles.length; i += batchSize) {
|
|
717
895
|
const batch = allRelatedFiles.slice(i, i + batchSize);
|
|
718
|
-
|
|
896
|
+
|
|
719
897
|
for (const file of batch) {
|
|
720
898
|
try {
|
|
721
899
|
totalProcessed++;
|
|
722
|
-
|
|
900
|
+
|
|
723
901
|
// Check if file exists
|
|
724
902
|
if (!fs.existsSync(file.original_path)) {
|
|
725
|
-
logger.warn(
|
|
903
|
+
logger.warn(
|
|
904
|
+
`File not found: ${file.filename} at ${file.original_path}`,
|
|
905
|
+
);
|
|
726
906
|
await supabase
|
|
727
907
|
.from('uploader')
|
|
728
908
|
.update({
|
|
@@ -749,14 +929,19 @@ export class DatabaseService {
|
|
|
749
929
|
// Fallback to RFC folder
|
|
750
930
|
uploadPath = `uploads/${file.rfc}/${file.filename}`;
|
|
751
931
|
}
|
|
752
|
-
|
|
753
|
-
uploadResult = await uploadService.upload(
|
|
754
|
-
|
|
755
|
-
|
|
756
|
-
|
|
757
|
-
|
|
758
|
-
|
|
759
|
-
|
|
932
|
+
|
|
933
|
+
uploadResult = await uploadService.upload(
|
|
934
|
+
[
|
|
935
|
+
{
|
|
936
|
+
path: file.original_path,
|
|
937
|
+
name: file.filename,
|
|
938
|
+
contentType: 'application/octet-stream',
|
|
939
|
+
},
|
|
940
|
+
],
|
|
941
|
+
{
|
|
942
|
+
uploadPath: uploadPath,
|
|
943
|
+
},
|
|
944
|
+
);
|
|
760
945
|
uploadResult = { success: true, data: uploadResult };
|
|
761
946
|
} else {
|
|
762
947
|
// API service supports batch uploads and returns normalized response
|
|
@@ -771,14 +956,19 @@ export class DatabaseService {
|
|
|
771
956
|
// Fallback to RFC folder
|
|
772
957
|
fullFolderStructure = `${file.rfc}/`;
|
|
773
958
|
}
|
|
774
|
-
|
|
775
|
-
uploadResult = await uploadService.upload(
|
|
776
|
-
|
|
777
|
-
|
|
778
|
-
|
|
779
|
-
|
|
780
|
-
|
|
781
|
-
|
|
959
|
+
|
|
960
|
+
uploadResult = await uploadService.upload(
|
|
961
|
+
[
|
|
962
|
+
{
|
|
963
|
+
path: file.original_path,
|
|
964
|
+
name: file.filename,
|
|
965
|
+
contentType: 'application/octet-stream',
|
|
966
|
+
},
|
|
967
|
+
],
|
|
968
|
+
{
|
|
969
|
+
folderStructure: fullFolderStructure,
|
|
970
|
+
},
|
|
971
|
+
);
|
|
782
972
|
}
|
|
783
973
|
|
|
784
974
|
if (uploadResult.success) {
|
|
@@ -790,7 +980,7 @@ export class DatabaseService {
|
|
|
790
980
|
message: 'Successfully uploaded to Arela API',
|
|
791
981
|
})
|
|
792
982
|
.eq('id', file.id);
|
|
793
|
-
|
|
983
|
+
|
|
794
984
|
totalUploaded++;
|
|
795
985
|
logger.info(`Uploaded: ${file.filename}`);
|
|
796
986
|
} else {
|
|
@@ -801,14 +991,18 @@ export class DatabaseService {
|
|
|
801
991
|
message: uploadResult.error || 'Upload failed',
|
|
802
992
|
})
|
|
803
993
|
.eq('id', file.id);
|
|
804
|
-
|
|
994
|
+
|
|
805
995
|
totalErrors++;
|
|
806
|
-
logger.error(
|
|
996
|
+
logger.error(
|
|
997
|
+
`Upload failed: ${file.filename} - ${uploadResult.error}`,
|
|
998
|
+
);
|
|
807
999
|
}
|
|
808
1000
|
} catch (error) {
|
|
809
1001
|
totalErrors++;
|
|
810
|
-
logger.error(
|
|
811
|
-
|
|
1002
|
+
logger.error(
|
|
1003
|
+
`Error processing file ${file.filename}: ${error.message}`,
|
|
1004
|
+
);
|
|
1005
|
+
|
|
812
1006
|
await supabase
|
|
813
1007
|
.from('uploader')
|
|
814
1008
|
.update({
|
|
@@ -859,15 +1053,21 @@ export class DatabaseService {
|
|
|
859
1053
|
// Check if UPLOAD_RFCS is configured
|
|
860
1054
|
const uploadRfcs = appConfig.upload.rfcs;
|
|
861
1055
|
if (!uploadRfcs || uploadRfcs.length === 0) {
|
|
862
|
-
console.log(
|
|
863
|
-
|
|
1056
|
+
console.log(
|
|
1057
|
+
'ℹ️ No UPLOAD_RFCS configured. Please set UPLOAD_RFCS environment variable to see files ready for upload.',
|
|
1058
|
+
);
|
|
1059
|
+
console.log(
|
|
1060
|
+
' Example: UPLOAD_RFCS="RFC123456789|RFC987654321|RFC555444333"',
|
|
1061
|
+
);
|
|
864
1062
|
return [];
|
|
865
1063
|
}
|
|
866
1064
|
|
|
867
1065
|
console.log(`🎯 Using RFC filter: ${uploadRfcs.join(', ')}`);
|
|
868
1066
|
|
|
869
1067
|
// Step 1: Find pedimento_simplificado documents for the specified RFCs that have arela_path
|
|
870
|
-
console.log(
|
|
1068
|
+
console.log(
|
|
1069
|
+
'🎯 Finding pedimento_simplificado documents for specified RFCs with arela_path...',
|
|
1070
|
+
);
|
|
871
1071
|
const { data: pedimentoRecords, error: pedimentoError } = await supabase
|
|
872
1072
|
.from('uploader')
|
|
873
1073
|
.select('arela_path')
|
|
@@ -876,17 +1076,25 @@ export class DatabaseService {
|
|
|
876
1076
|
.not('arela_path', 'is', null);
|
|
877
1077
|
|
|
878
1078
|
if (pedimentoError) {
|
|
879
|
-
throw new Error(
|
|
1079
|
+
throw new Error(
|
|
1080
|
+
`Error querying pedimento_simplificado records: ${pedimentoError.message}`,
|
|
1081
|
+
);
|
|
880
1082
|
}
|
|
881
1083
|
|
|
882
1084
|
if (!pedimentoRecords || pedimentoRecords.length === 0) {
|
|
883
|
-
console.log(
|
|
1085
|
+
console.log(
|
|
1086
|
+
'ℹ️ No pedimento_simplificado records with arela_path found',
|
|
1087
|
+
);
|
|
884
1088
|
return [];
|
|
885
1089
|
}
|
|
886
1090
|
|
|
887
1091
|
// Get unique arela_paths
|
|
888
|
-
const uniqueArelaPaths = [
|
|
889
|
-
|
|
1092
|
+
const uniqueArelaPaths = [
|
|
1093
|
+
...new Set(pedimentoRecords.map((r) => r.arela_path)),
|
|
1094
|
+
];
|
|
1095
|
+
console.log(
|
|
1096
|
+
`📋 Found ${pedimentoRecords.length} pedimento records with ${uniqueArelaPaths.length} unique arela_paths`,
|
|
1097
|
+
);
|
|
890
1098
|
|
|
891
1099
|
// Step 2: Find all related files with these arela_paths that haven't been uploaded yet
|
|
892
1100
|
console.log('🔍 Finding all related files that need to be uploaded...');
|
|
@@ -900,13 +1108,17 @@ export class DatabaseService {
|
|
|
900
1108
|
|
|
901
1109
|
const { data: chunkFiles, error: chunkError } = await supabase
|
|
902
1110
|
.from('uploader')
|
|
903
|
-
.select(
|
|
1111
|
+
.select(
|
|
1112
|
+
'id, original_path, arela_path, filename, rfc, document_type, status',
|
|
1113
|
+
)
|
|
904
1114
|
.in('arela_path', pathChunk)
|
|
905
1115
|
.neq('status', 'file-uploaded')
|
|
906
1116
|
.not('original_path', 'is', null);
|
|
907
1117
|
|
|
908
1118
|
if (chunkError) {
|
|
909
|
-
throw new Error(
|
|
1119
|
+
throw new Error(
|
|
1120
|
+
`Error querying files for arela_paths chunk: ${chunkError.message}`,
|
|
1121
|
+
);
|
|
910
1122
|
}
|
|
911
1123
|
|
|
912
1124
|
if (chunkFiles && chunkFiles.length > 0) {
|
|
@@ -950,4 +1162,4 @@ export class DatabaseService {
|
|
|
950
1162
|
|
|
951
1163
|
// Export singleton instance
|
|
952
1164
|
export const databaseService = new DatabaseService();
|
|
953
|
-
export default databaseService;
|
|
1165
|
+
export default databaseService;
|