@arela/uploader 0.2.4 → 0.2.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1140 @@
1
+ import fs from 'fs';
2
+ import path from 'path';
3
+
4
+ import appConfig from '../config/config.js';
5
+ import { FileDetectionService } from '../file-detection.js';
6
+ import logger from './LoggingService.js';
7
+ import uploadServiceFactory from './upload/UploadServiceFactory.js';
8
+
9
+ /**
10
+ * Database Service
11
+ * Handles all Supabase database operations for the uploader table
12
+ */
13
+ export class DatabaseService {
14
+ constructor() {
15
+ this.detectionService = new FileDetectionService();
16
+ }
17
+
18
+ /**
19
+ * Get Supabase client
20
+ * @private
21
+ * @returns {Promise<Object>} Supabase client
22
+ */
23
+ async #getSupabaseClient() {
24
+ const supabaseService = uploadServiceFactory.getSupabaseService();
25
+ return await supabaseService.getClient();
26
+ }
27
+
28
+ /**
29
+ * Execute database query with retry logic and exponential backoff
30
+ * @private
31
+ * @param {Function} queryFn - Query function to execute
32
+ * @param {string} operation - Description of the operation for logging
33
+ * @param {number} maxRetries - Maximum number of retry attempts (default: 3)
34
+ * @returns {Promise<Object>} Query result
35
+ */
36
+ async #queryWithRetry(queryFn, operation, maxRetries = 3) {
37
+ let lastError;
38
+
39
+ for (let attempt = 1; attempt <= maxRetries; attempt++) {
40
+ try {
41
+ const result = await queryFn();
42
+ if (attempt > 1) {
43
+ logger.info(`${operation} succeeded on attempt ${attempt}`);
44
+ }
45
+ return result;
46
+ } catch (error) {
47
+ lastError = error;
48
+
49
+ // Check if it's a timeout or connection error
50
+ const isRetriableError =
51
+ error.message?.includes('timeout') ||
52
+ error.message?.includes('canceling statement') ||
53
+ error.message?.includes('connection') ||
54
+ error.code === 'PGRST301'; // PostgREST timeout
55
+
56
+ if (!isRetriableError || attempt === maxRetries) {
57
+ throw error;
58
+ }
59
+
60
+ const backoffDelay = Math.min(1000 * Math.pow(2, attempt - 1), 30000); // Cap at 30 seconds
61
+ logger.warn(
62
+ `${operation} failed on attempt ${attempt}/${maxRetries}: ${error.message}`,
63
+ );
64
+ logger.info(`Retrying in ${backoffDelay}ms...`);
65
+
66
+ await new Promise((resolve) => setTimeout(resolve, backoffDelay));
67
+ }
68
+ }
69
+
70
+ throw lastError;
71
+ }
72
+
73
+ /**
74
+ * Insert file stats with document detection into uploader table
75
+ * @param {Array} files - Array of file objects
76
+ * @param {Object} options - Options including clientPath
77
+ * @returns {Promise<Array>} Inserted records
78
+ */
79
+ async insertStatsToUploaderTable(files, options) {
80
+ const supabase = await this.#getSupabaseClient();
81
+ const records = [];
82
+
83
+ for (const file of files) {
84
+ const stats = file.stats || fs.statSync(file.path);
85
+ const originalPath = options.clientPath || file.path;
86
+
87
+ // Check if record already exists
88
+ const { data: existingRecords, error: checkError } = await supabase
89
+ .from('uploader')
90
+ .select('id, original_path')
91
+ .eq('original_path', originalPath)
92
+ .limit(1);
93
+
94
+ if (checkError) {
95
+ logger.error(
96
+ `Error checking for existing record: ${checkError.message}`,
97
+ );
98
+ continue;
99
+ }
100
+
101
+ if (existingRecords && existingRecords.length > 0) {
102
+ logger.info(`Skipping duplicate: ${path.basename(file.path)}`);
103
+ continue;
104
+ }
105
+
106
+ // Initialize record with basic file stats
107
+ const record = {
108
+ document_type: null,
109
+ size: stats.size,
110
+ num_pedimento: null,
111
+ filename: file.originalName || path.basename(file.path),
112
+ original_path: originalPath,
113
+ arela_path: null,
114
+ status: 'stats',
115
+ rfc: null,
116
+ message: null,
117
+ };
118
+
119
+ // Try to detect document type for supported files
120
+ if (this.detectionService.isSupportedFileType(file.path)) {
121
+ try {
122
+ const detection = await this.detectionService.detectFile(file.path);
123
+
124
+ if (detection.detectedType) {
125
+ record.document_type = detection.detectedType;
126
+ record.num_pedimento = detection.detectedPedimento;
127
+ record.status = 'detected';
128
+
129
+ if (detection.arelaPath) {
130
+ record.arela_path = detection.arelaPath;
131
+ }
132
+
133
+ const rfcField = detection.fields.find(
134
+ (f) => f.name === 'rfc' && f.found,
135
+ );
136
+ if (rfcField) {
137
+ record.rfc = rfcField.value;
138
+ }
139
+ } else {
140
+ record.status = 'not-detected';
141
+ if (detection.error) {
142
+ record.message = detection.error;
143
+ }
144
+ }
145
+ } catch (error) {
146
+ logger.error(`Error detecting ${record.filename}: ${error.message}`);
147
+ record.status = 'detection-error';
148
+ record.message = error.message;
149
+ }
150
+ } else {
151
+ record.status = 'unsupported';
152
+ record.message = 'File type not supported for detection';
153
+ }
154
+
155
+ records.push(record);
156
+ }
157
+
158
+ if (records.length === 0) {
159
+ logger.info('No new records to insert (all were duplicates or errors)');
160
+ return [];
161
+ }
162
+
163
+ logger.info(
164
+ `Inserting ${records.length} new records into uploader table...`,
165
+ );
166
+
167
+ const { data, error } = await supabase
168
+ .from('uploader')
169
+ .insert(records)
170
+ .select();
171
+
172
+ if (error) {
173
+ throw new Error(`Failed to insert stats records: ${error.message}`);
174
+ }
175
+
176
+ return data;
177
+ }
178
+
179
+ /**
180
+ * Insert file stats only (no detection) into uploader table
181
+ * @param {Array} files - Array of file objects
182
+ * @param {Object} options - Options including clientPath
183
+ * @returns {Promise<Object>} Statistics about the operation
184
+ */
185
+ async insertStatsOnlyToUploaderTable(files, options) {
186
+ const supabase = await this.#getSupabaseClient();
187
+ const batchSize = 1000;
188
+ const allRecords = [];
189
+
190
+ logger.info('Collecting filesystem stats...');
191
+ for (const file of files) {
192
+ try {
193
+ const stats = file.stats || fs.statSync(file.path);
194
+ const originalPath = options.clientPath || file.path;
195
+ const fileExtension = path
196
+ .extname(file.path)
197
+ .toLowerCase()
198
+ .replace('.', '');
199
+
200
+ const record = {
201
+ document_type: null,
202
+ size: stats.size,
203
+ num_pedimento: null,
204
+ filename: file.originalName || path.basename(file.path),
205
+ original_path: originalPath,
206
+ arela_path: null,
207
+ status: 'fs-stats',
208
+ rfc: null,
209
+ message: null,
210
+ file_extension: fileExtension,
211
+ created_at: new Date().toISOString(),
212
+ modified_at: stats.mtime.toISOString(),
213
+ };
214
+
215
+ allRecords.push(record);
216
+ } catch (error) {
217
+ logger.error(`Error reading stats for ${file.path}: ${error.message}`);
218
+ }
219
+ }
220
+
221
+ if (allRecords.length === 0) {
222
+ logger.info('No file stats to insert');
223
+ return { totalInserted: 0, totalSkipped: 0, totalProcessed: 0 };
224
+ }
225
+
226
+ logger.info(
227
+ `Processing ${allRecords.length} file stats in batches of ${batchSize}...`,
228
+ );
229
+
230
+ let totalInserted = 0;
231
+ let totalUpdated = 0;
232
+
233
+ for (let i = 0; i < allRecords.length; i += batchSize) {
234
+ const batch = allRecords.slice(i, i + batchSize);
235
+
236
+ try {
237
+ // Check which records already exist
238
+ const originalPaths = batch.map((r) => r.original_path);
239
+ const { data: existingRecords, error: checkError } = await supabase
240
+ .from('uploader')
241
+ .select('original_path')
242
+ .in('original_path', originalPaths);
243
+
244
+ if (checkError) {
245
+ logger.error(
246
+ `Error checking existing records: ${checkError.message}`,
247
+ );
248
+ continue;
249
+ }
250
+
251
+ const existingPaths = new Set(
252
+ existingRecords?.map((r) => r.original_path) || [],
253
+ );
254
+ const newRecords = batch.filter(
255
+ (r) => !existingPaths.has(r.original_path),
256
+ );
257
+ const updateRecords = batch.filter((r) =>
258
+ existingPaths.has(r.original_path),
259
+ );
260
+
261
+ logger.info(
262
+ `Batch ${Math.floor(i / batchSize) + 1}: ${newRecords.length} new, ${updateRecords.length} updates`,
263
+ );
264
+
265
+ // Insert new records
266
+ if (newRecords.length > 0) {
267
+ const { error: insertError } = await supabase
268
+ .from('uploader')
269
+ .insert(newRecords);
270
+
271
+ if (insertError) {
272
+ logger.error(`Error inserting new records: ${insertError.message}`);
273
+ } else {
274
+ totalInserted += newRecords.length;
275
+ logger.success(`Inserted ${newRecords.length} new records`);
276
+ }
277
+ }
278
+
279
+ // Update existing records
280
+ if (updateRecords.length > 0) {
281
+ let batchUpdated = 0;
282
+ for (const record of updateRecords) {
283
+ const { error: updateError } = await supabase
284
+ .from('uploader')
285
+ .update({
286
+ size: record.size,
287
+ modified_at: record.modified_at,
288
+ filename: record.filename,
289
+ file_extension: record.file_extension,
290
+ })
291
+ .eq('original_path', record.original_path);
292
+
293
+ if (!updateError) {
294
+ batchUpdated++;
295
+ }
296
+ }
297
+ totalUpdated += batchUpdated;
298
+ logger.info(`Updated ${batchUpdated} existing records`);
299
+ }
300
+ } catch (error) {
301
+ logger.error(
302
+ `Unexpected error in batch ${Math.floor(i / batchSize) + 1}: ${error.message}`,
303
+ );
304
+ }
305
+ }
306
+
307
+ logger.success(
308
+ `Phase 1 Summary: ${totalInserted} new records inserted, ${totalUpdated} existing records updated`,
309
+ );
310
+
311
+ return {
312
+ totalInserted,
313
+ totalSkipped: totalUpdated,
314
+ totalProcessed: allRecords.length,
315
+ };
316
+ }
317
+
318
+ /**
319
+ * Process PDF files for pedimento-simplificado detection
320
+ * @param {Object} options - Processing options
321
+ * @returns {Promise<Object>} Processing result
322
+ */
323
+ async detectPedimentosInDatabase(options = {}) {
324
+ const supabase = await this.#getSupabaseClient();
325
+
326
+ logger.info(
327
+ 'Phase 2: Starting PDF detection for pedimento-simplificado documents...',
328
+ );
329
+
330
+ const processingBatchSize = parseInt(options.batchSize) || 10;
331
+ // Reduced query batch size to avoid timeouts
332
+ const queryBatchSize = 500; // Reduced from 1000 to 500
333
+
334
+ let totalDetected = 0;
335
+ let totalProcessed = 0;
336
+ let totalErrors = 0;
337
+ let offset = 0;
338
+ let chunkNumber = 1;
339
+
340
+ logger.info(
341
+ `Processing PDF files in chunks of ${queryBatchSize} records...`,
342
+ );
343
+
344
+ while (true) {
345
+ logger.info(
346
+ `Fetching chunk ${chunkNumber} (records ${offset + 1} to ${offset + queryBatchSize})...`,
347
+ );
348
+
349
+ try {
350
+ // Split the query to make it more efficient with retry logic
351
+ const { data: pdfRecords, error: queryError } =
352
+ await this.#queryWithRetry(async () => {
353
+ return await supabase
354
+ .from('uploader')
355
+ .select('id, original_path, filename, file_extension, status')
356
+ .eq('status', 'fs-stats')
357
+ .eq('file_extension', 'pdf')
358
+ .ilike('filename', '%simp%')
359
+ .range(offset, offset + queryBatchSize - 1)
360
+ .order('id'); // Add explicit ordering for consistent pagination
361
+ }, `fetch PDF records chunk ${chunkNumber}`);
362
+
363
+ if (queryError) {
364
+ throw new Error(
365
+ `Failed to fetch PDF records chunk ${chunkNumber}: ${queryError.message}`,
366
+ );
367
+ }
368
+
369
+ if (!pdfRecords || pdfRecords.length === 0) {
370
+ logger.info('No more PDF files found. Processing completed.');
371
+ break;
372
+ }
373
+
374
+ logger.info(
375
+ `Processing chunk ${chunkNumber}: ${pdfRecords.length} PDF records`,
376
+ );
377
+
378
+ let chunkDetected = 0;
379
+ let chunkProcessed = 0;
380
+ let chunkErrors = 0;
381
+
382
+ // Process files in smaller batches
383
+ for (let i = 0; i < pdfRecords.length; i += processingBatchSize) {
384
+ const batch = pdfRecords.slice(i, i + processingBatchSize);
385
+ const updatePromises = [];
386
+
387
+ for (const record of batch) {
388
+ try {
389
+ if (!fs.existsSync(record.original_path)) {
390
+ logger.warn(
391
+ `File not found: ${record.filename} at ${record.original_path}`,
392
+ );
393
+ updatePromises.push(
394
+ supabase
395
+ .from('uploader')
396
+ .update({
397
+ status: 'file-not-found',
398
+ message: 'File no longer exists at original path',
399
+ })
400
+ .eq('id', record.id),
401
+ );
402
+ chunkErrors++;
403
+ totalErrors++;
404
+ continue;
405
+ }
406
+
407
+ const detection = await this.detectionService.detectFile(
408
+ record.original_path,
409
+ );
410
+ chunkProcessed++;
411
+ totalProcessed++;
412
+
413
+ const updateData = {
414
+ status: detection.detectedType ? 'detected' : 'not-detected',
415
+ document_type: detection.detectedType,
416
+ num_pedimento: detection.detectedPedimento,
417
+ arela_path: detection.arelaPath,
418
+ message: detection.error || null,
419
+ };
420
+
421
+ if (detection.fields) {
422
+ const rfcField = detection.fields.find(
423
+ (f) => f.name === 'rfc' && f.found,
424
+ );
425
+ if (rfcField) {
426
+ updateData.rfc = rfcField.value;
427
+ }
428
+ }
429
+
430
+ if (detection.detectedType) {
431
+ chunkDetected++;
432
+ totalDetected++;
433
+ logger.success(
434
+ `Detected: ${record.filename} -> ${detection.detectedType} | Pedimento: ${detection.detectedPedimento || 'N/A'} | RFC: ${detection.fields?.rfc || 'N/A'}`,
435
+ );
436
+ } else {
437
+ logger.info(
438
+ `Not detected: ${record.filename} - No pedimento-simplificado pattern found`,
439
+ );
440
+ }
441
+
442
+ updatePromises.push(
443
+ supabase
444
+ .from('uploader')
445
+ .update(updateData)
446
+ .eq('id', record.id),
447
+ );
448
+ } catch (error) {
449
+ logger.error(
450
+ `Error detecting ${record.filename}: ${error.message}`,
451
+ );
452
+ chunkErrors++;
453
+ totalErrors++;
454
+
455
+ updatePromises.push(
456
+ supabase
457
+ .from('uploader')
458
+ .update({
459
+ status: 'detection-error',
460
+ message: error.message,
461
+ })
462
+ .eq('id', record.id),
463
+ );
464
+ }
465
+ }
466
+
467
+ try {
468
+ await Promise.all(updatePromises);
469
+ } catch (error) {
470
+ logger.error(
471
+ `Error updating batch in chunk ${chunkNumber}: ${error.message}`,
472
+ );
473
+ }
474
+ }
475
+
476
+ logger.success(
477
+ `Chunk ${chunkNumber} completed: ${chunkDetected} detected, ${chunkProcessed} processed, ${chunkErrors} errors`,
478
+ );
479
+
480
+ offset += queryBatchSize;
481
+ chunkNumber++;
482
+
483
+ if (pdfRecords.length < queryBatchSize) {
484
+ logger.info(
485
+ `Reached end of records (chunk had ${pdfRecords.length} records).`,
486
+ );
487
+ break;
488
+ }
489
+
490
+ // Small delay between chunks
491
+ await new Promise((resolve) => setTimeout(resolve, 500));
492
+ } catch (chunkError) {
493
+ logger.error(
494
+ `Error processing chunk ${chunkNumber}: ${chunkError.message}`,
495
+ );
496
+ // Continue to next chunk after error
497
+ offset += queryBatchSize;
498
+ chunkNumber++;
499
+ totalErrors++;
500
+ continue;
501
+ }
502
+ }
503
+
504
+ const result = {
505
+ detectedCount: totalDetected,
506
+ processedCount: totalProcessed,
507
+ errorCount: totalErrors,
508
+ };
509
+
510
+ logger.success(
511
+ `Phase 2 Summary: ${totalDetected} detected, ${totalProcessed} processed, ${totalErrors} errors`,
512
+ );
513
+
514
+ return result;
515
+ }
516
+
517
+ /**
518
+ * Propagate arela_path from pedimento_simplificado records to related files
519
+ * @param {Object} options - Options for propagation
520
+ * @returns {Promise<Object>} Processing result
521
+ */
522
+ async propagateArelaPath(options = {}) {
523
+ const supabase = await this.#getSupabaseClient();
524
+
525
+ logger.info('Phase 3: Starting arela_path propagation process...');
526
+ console.log('🔍 Finding pedimento_simplificado records with arela_path...');
527
+
528
+ // Get all pedimento_simplificado records that have arela_path
529
+ const { data: pedimentoRecords, error: pedimentoError } = await supabase
530
+ .from('uploader')
531
+ .select('id, original_path, arela_path, filename')
532
+ .eq('document_type', 'pedimento_simplificado')
533
+ .not('arela_path', 'is', null);
534
+
535
+ if (pedimentoError) {
536
+ const errorMsg = `Error fetching pedimento records: ${pedimentoError.message}`;
537
+ logger.error(errorMsg);
538
+ throw new Error(errorMsg);
539
+ }
540
+
541
+ if (!pedimentoRecords || pedimentoRecords.length === 0) {
542
+ logger.info('No pedimento_simplificado records with arela_path found');
543
+ console.log(
544
+ 'â„šī¸ No pedimento_simplificado records with arela_path found',
545
+ );
546
+ return { processedCount: 0, updatedCount: 0, errorCount: 0 };
547
+ }
548
+
549
+ console.log(
550
+ `📋 Found ${pedimentoRecords.length} pedimento records with arela_path`,
551
+ );
552
+ logger.info(
553
+ `Found ${pedimentoRecords.length} pedimento records with arela_path to process`,
554
+ );
555
+
556
+ let totalProcessed = 0;
557
+ let totalUpdated = 0;
558
+ let totalErrors = 0;
559
+ const BATCH_SIZE = 50; // Process files in batches
560
+
561
+ // Process each pedimento record
562
+ for (const pedimento of pedimentoRecords) {
563
+ try {
564
+ totalProcessed++;
565
+
566
+ // Extract base path from original_path (remove filename)
567
+ const basePath = path.dirname(pedimento.original_path);
568
+
569
+ logger.info(
570
+ `Processing pedimento: ${pedimento.filename} | Base path: ${basePath}`,
571
+ );
572
+
573
+ // Extract folder part from existing arela_path
574
+ const existingPath = pedimento.arela_path;
575
+ const folderArelaPath = existingPath.includes('/')
576
+ ? existingPath.substring(0, existingPath.lastIndexOf('/')) + '/'
577
+ : existingPath.endsWith('/')
578
+ ? existingPath
579
+ : existingPath + '/';
580
+
581
+ // Find all files with the same base path that don't have arela_path yet
582
+ const { data: relatedFiles, error: relatedError } = await supabase
583
+ .from('uploader')
584
+ .select('id, filename, original_path')
585
+ .like('original_path', `${basePath}%`)
586
+ .is('arela_path', null)
587
+ .neq('id', pedimento.id); // Exclude the pedimento itself
588
+
589
+ if (relatedError) {
590
+ logger.error(
591
+ `Error finding related files for ${pedimento.filename}: ${relatedError.message}`,
592
+ );
593
+ totalErrors++;
594
+ continue;
595
+ }
596
+
597
+ if (!relatedFiles || relatedFiles.length === 0) {
598
+ logger.info(`No related files found for ${pedimento.filename}`);
599
+ continue;
600
+ }
601
+
602
+ logger.info(
603
+ `Found ${relatedFiles.length} related files to update for ${pedimento.filename}`,
604
+ );
605
+
606
+ // Process files in batches
607
+ const fileIds = relatedFiles.map((f) => f.id);
608
+
609
+ for (let i = 0; i < fileIds.length; i += BATCH_SIZE) {
610
+ const batchIds = fileIds.slice(i, i + BATCH_SIZE);
611
+ const batchNumber = Math.floor(i / BATCH_SIZE) + 1;
612
+ const totalBatches = Math.ceil(fileIds.length / BATCH_SIZE);
613
+
614
+ logger.info(
615
+ `Batch ${batchNumber}/${totalBatches}: Updating ${batchIds.length} files...`,
616
+ );
617
+
618
+ try {
619
+ const { error: updateError } = await supabase
620
+ .from('uploader')
621
+ .update({ arela_path: folderArelaPath })
622
+ .in('id', batchIds);
623
+
624
+ if (updateError) {
625
+ logger.error(
626
+ `Error in batch ${batchNumber}: ${updateError.message}`,
627
+ );
628
+ totalErrors++;
629
+ } else {
630
+ totalUpdated += batchIds.length;
631
+ logger.info(
632
+ `Successfully updated batch ${batchNumber}: ${batchIds.length} files`,
633
+ );
634
+ }
635
+ } catch (batchError) {
636
+ logger.error(
637
+ `Exception in batch ${batchNumber}: ${batchError.message}`,
638
+ );
639
+ totalErrors++;
640
+ }
641
+ }
642
+ } catch (error) {
643
+ logger.error(
644
+ `Error processing pedimento ${pedimento.filename}: ${error.message}`,
645
+ );
646
+ totalErrors++;
647
+ }
648
+ }
649
+
650
+ const result = {
651
+ processedCount: totalProcessed,
652
+ updatedCount: totalUpdated,
653
+ errorCount: totalErrors,
654
+ };
655
+
656
+ logger.success(
657
+ `Phase 3 Summary: ${totalProcessed} pedimentos processed, ${totalUpdated} files updated, ${totalErrors} errors`,
658
+ );
659
+
660
+ return result;
661
+ }
662
+
663
+ /**
664
+ * Upload files to Arela API based on specific RFC values
665
+ * @param {Object} options - Upload options
666
+ * @returns {Promise<Object>} Processing result
667
+ */
668
+ async uploadFilesByRfc(options = {}) {
669
+ const supabase = await this.#getSupabaseClient();
670
+ const uploadService = await uploadServiceFactory.getUploadService();
671
+
672
+ // Get configuration
673
+ const appConfig = await import('../config/config.js').then(
674
+ (m) => m.appConfig,
675
+ );
676
+
677
+ if (!appConfig.upload.rfcs || appConfig.upload.rfcs.length === 0) {
678
+ const errorMsg =
679
+ 'No RFCs specified. Please set UPLOAD_RFCS environment variable with pipe-separated RFC values.';
680
+ logger.error(errorMsg);
681
+ throw new Error(errorMsg);
682
+ }
683
+
684
+ logger.info('Phase 4: Starting RFC-based upload process...');
685
+ console.log('đŸŽ¯ RFC-based Upload Mode');
686
+ console.log(`📋 Target RFCs: ${appConfig.upload.rfcs.join(', ')}`);
687
+ console.log('🔍 Searching for files to upload...');
688
+
689
+ // First, count total files for the RFCs to show filtering effect
690
+ const { count: totalRfcFiles, error: countError } = await supabase
691
+ .from('uploader')
692
+ .select('*', { count: 'exact', head: true })
693
+ .in('rfc', appConfig.upload.rfcs)
694
+ .not('arela_path', 'is', null);
695
+
696
+ if (countError) {
697
+ logger.warn(`Could not count total RFC files: ${countError.message}`);
698
+ } else {
699
+ console.log(`📊 Total files for specified RFCs: ${totalRfcFiles || 0}`);
700
+ logger.info(`Total files for specified RFCs: ${totalRfcFiles || 0}`);
701
+ }
702
+
703
+ // Step 1: Get all pedimento_simplificado records that match the specified RFCs and have arela_path
704
+ console.log(
705
+ 'đŸŽ¯ Finding pedimento_simplificado records for specified RFCs...',
706
+ );
707
+ const { data: pedimentoRfcRecords, error: pedimentoRfcError } =
708
+ await supabase
709
+ .from('uploader')
710
+ .select('arela_path')
711
+ .eq('document_type', 'pedimento_simplificado')
712
+ .in('rfc', appConfig.upload.rfcs)
713
+ .not('arela_path', 'is', null);
714
+
715
+ if (pedimentoRfcError) {
716
+ const errorMsg = `Error fetching pedimento RFC records: ${pedimentoRfcError.message}`;
717
+ logger.error(errorMsg);
718
+ throw new Error(errorMsg);
719
+ }
720
+
721
+ if (!pedimentoRfcRecords || pedimentoRfcRecords.length === 0) {
722
+ console.log(
723
+ 'â„šī¸ No pedimento_simplificado records found for the specified RFCs with arela_path',
724
+ );
725
+ logger.info('No pedimento_simplificado records found for specified RFCs');
726
+ return { processedCount: 0, uploadedCount: 0, errorCount: 0 };
727
+ }
728
+
729
+ // Get unique arela_paths from pedimento records
730
+ const uniqueArelaPaths = [
731
+ ...new Set(pedimentoRfcRecords.map((r) => r.arela_path)),
732
+ ];
733
+ console.log(
734
+ `📋 Found ${pedimentoRfcRecords.length} pedimento records with ${uniqueArelaPaths.length} unique arela_paths for specified RFCs`,
735
+ );
736
+ logger.info(
737
+ `Found ${pedimentoRfcRecords.length} pedimento records with ${uniqueArelaPaths.length} unique arela_paths`,
738
+ );
739
+
740
+ // Step 2: Get all files with these arela_paths that haven't been uploaded yet
741
+ let rfcRecords = [];
742
+ const chunkSize = 50;
743
+
744
+ for (let i = 0; i < uniqueArelaPaths.length; i += chunkSize) {
745
+ const pathChunk = uniqueArelaPaths.slice(i, i + chunkSize);
746
+
747
+ const { data: chunkFiles, error: chunkError } = await supabase
748
+ .from('uploader')
749
+ .select('arela_path')
750
+ .in('arela_path', pathChunk)
751
+ .neq('status', 'file-uploaded')
752
+ .not('arela_path', 'is', null);
753
+
754
+ if (chunkError) {
755
+ const errorMsg = `Error fetching files for arela_paths chunk: ${chunkError.message}`;
756
+ logger.error(errorMsg);
757
+ throw new Error(errorMsg);
758
+ }
759
+
760
+ if (chunkFiles && chunkFiles.length > 0) {
761
+ rfcRecords = rfcRecords.concat(chunkFiles);
762
+ }
763
+ }
764
+
765
+ if (!rfcRecords || rfcRecords.length === 0) {
766
+ if (totalRfcFiles && totalRfcFiles > 0) {
767
+ console.log(
768
+ `â„šī¸ All ${totalRfcFiles} files for the specified RFCs are already uploaded (status: file-uploaded)`,
769
+ );
770
+ console.log(' No new files to upload.');
771
+ logger.info(
772
+ `All ${totalRfcFiles} files for specified RFCs already uploaded`,
773
+ );
774
+ } else {
775
+ console.log(
776
+ 'â„šī¸ No files found for the specified RFCs with arela_path',
777
+ );
778
+ console.log(
779
+ ` Make sure files for RFCs [${appConfig.upload.rfcs.join(', ')}] have been processed and have arela_path values`,
780
+ );
781
+ logger.info('No files found for specified RFCs with arela_path');
782
+ }
783
+ return { processedCount: 0, uploadedCount: 0, errorCount: 0 };
784
+ }
785
+
786
+ // Show filtering effect
787
+ const uploadableArelaPaths = [
788
+ ...new Set(rfcRecords.map((r) => r.arela_path)),
789
+ ];
790
+ const skipped = (totalRfcFiles || 0) - rfcRecords.length;
791
+ if (skipped > 0) {
792
+ console.log(
793
+ `📊 Found ${rfcRecords.length} files ready for upload (${skipped} already uploaded, skipped)`,
794
+ );
795
+ } else {
796
+ console.log(`📊 Found ${rfcRecords.length} files ready for upload`);
797
+ }
798
+ logger.info(
799
+ `Found ${rfcRecords.length} files ready for upload, ${skipped} skipped`,
800
+ );
801
+
802
+ // Step 3: Get ALL files that have these arela_paths (including supporting documents)
803
+ let allRelatedFiles = [];
804
+ const arelaPathChunkSize = 50;
805
+ const queryBatchSize = 1000;
806
+
807
+ console.log(
808
+ 'đŸ“Ĩ Fetching all related files (processing arela_paths in chunks to avoid URI limits)...',
809
+ );
810
+
811
+ // Process arela_paths in chunks
812
+ for (let i = 0; i < uploadableArelaPaths.length; i += arelaPathChunkSize) {
813
+ const arelaPathChunk = uploadableArelaPaths.slice(
814
+ i,
815
+ i + arelaPathChunkSize,
816
+ );
817
+ const chunkNumber = Math.floor(i / arelaPathChunkSize) + 1;
818
+ const totalChunks = Math.ceil(
819
+ uploadableArelaPaths.length / arelaPathChunkSize,
820
+ );
821
+
822
+ console.log(
823
+ ` Processing arela_path chunk ${chunkNumber}/${totalChunks} (${arelaPathChunk.length} paths)`,
824
+ );
825
+
826
+ // For each chunk of arela_paths, use pagination to get all related files
827
+ let hasMore = true;
828
+ let offset = 0;
829
+
830
+ while (hasMore) {
831
+ const { data: batch, error: queryError } = await supabase
832
+ .from('uploader')
833
+ .select('id, original_path, arela_path, filename, rfc, document_type')
834
+ .in('arela_path', arelaPathChunk)
835
+ .not('original_path', 'is', null)
836
+ .neq('status', 'file-uploaded')
837
+ .range(offset, offset + queryBatchSize - 1);
838
+
839
+ if (queryError) {
840
+ const errorMsg = `Error fetching related files for chunk ${chunkNumber}: ${queryError.message}`;
841
+ logger.error(errorMsg);
842
+ throw new Error(errorMsg);
843
+ }
844
+
845
+ if (batch && batch.length > 0) {
846
+ allRelatedFiles = allRelatedFiles.concat(batch);
847
+ }
848
+
849
+ hasMore = batch && batch.length === queryBatchSize;
850
+ offset += queryBatchSize;
851
+ }
852
+ }
853
+
854
+ if (!allRelatedFiles || allRelatedFiles.length === 0) {
855
+ console.log('â„šī¸ No related files found to upload');
856
+ logger.info('No related files found to upload');
857
+ return { processedCount: 0, uploadedCount: 0, errorCount: 0 };
858
+ }
859
+
860
+ console.log(`📋 Total files to upload: ${allRelatedFiles.length}`);
861
+ logger.info(`Total files to upload: ${allRelatedFiles.length}`);
862
+
863
+ // Step 4: Upload all related files
864
+ let totalProcessed = 0;
865
+ let totalUploaded = 0;
866
+ let totalErrors = 0;
867
+ const batchSize = parseInt(options.batchSize) || 10;
868
+
869
+ for (let i = 0; i < allRelatedFiles.length; i += batchSize) {
870
+ const batch = allRelatedFiles.slice(i, i + batchSize);
871
+
872
+ for (const file of batch) {
873
+ try {
874
+ totalProcessed++;
875
+
876
+ // Check if file exists
877
+ if (!fs.existsSync(file.original_path)) {
878
+ logger.warn(
879
+ `File not found: ${file.filename} at ${file.original_path}`,
880
+ );
881
+ await supabase
882
+ .from('uploader')
883
+ .update({
884
+ status: 'file-not-found',
885
+ message: 'File no longer exists at original path',
886
+ })
887
+ .eq('id', file.id);
888
+ totalErrors++;
889
+ continue;
890
+ }
891
+
892
+ // Upload the file (handle both API and Supabase services)
893
+ let uploadResult;
894
+ if (uploadService.getServiceName() === 'Supabase') {
895
+ // Supabase requires single file upload with uploadPath
896
+ let uploadPath;
897
+ if (options.folderStructure && file.arela_path) {
898
+ // Combine folder structure with arela_path: palco/RFC/Year/Patente/Aduana/Pedimento/filename
899
+ uploadPath = `uploads/${options.folderStructure}/${file.arela_path}${file.filename}`;
900
+ } else if (file.arela_path) {
901
+ // Use existing arela_path: RFC/Year/Patente/Aduana/Pedimento/filename
902
+ uploadPath = `uploads/${file.arela_path}${file.filename}`;
903
+ } else {
904
+ // Fallback to RFC folder
905
+ uploadPath = `uploads/${file.rfc}/${file.filename}`;
906
+ }
907
+
908
+ uploadResult = await uploadService.upload(
909
+ [
910
+ {
911
+ path: file.original_path,
912
+ name: file.filename,
913
+ contentType: 'application/octet-stream',
914
+ },
915
+ ],
916
+ {
917
+ uploadPath: uploadPath,
918
+ },
919
+ );
920
+ uploadResult = { success: true, data: uploadResult };
921
+ } else {
922
+ // API service supports batch uploads and returns normalized response
923
+ let fullFolderStructure;
924
+ if (options.folderStructure && file.arela_path) {
925
+ // Combine folder structure with arela_path: palco/RFC/Year/Patente/Aduana/Pedimento/
926
+ fullFolderStructure = `${options.folderStructure}/${file.arela_path}`;
927
+ } else if (file.arela_path) {
928
+ // Use existing arela_path: RFC/Year/Patente/Aduana/Pedimento/
929
+ fullFolderStructure = file.arela_path;
930
+ } else {
931
+ // Fallback to RFC folder
932
+ fullFolderStructure = `${file.rfc}/`;
933
+ }
934
+
935
+ uploadResult = await uploadService.upload(
936
+ [
937
+ {
938
+ path: file.original_path,
939
+ name: file.filename,
940
+ contentType: 'application/octet-stream',
941
+ },
942
+ ],
943
+ {
944
+ folderStructure: fullFolderStructure,
945
+ },
946
+ );
947
+ }
948
+
949
+ if (uploadResult.success) {
950
+ // Update database status
951
+ await supabase
952
+ .from('uploader')
953
+ .update({
954
+ status: 'file-uploaded',
955
+ message: 'Successfully uploaded to Arela API',
956
+ })
957
+ .eq('id', file.id);
958
+
959
+ totalUploaded++;
960
+ logger.info(`Uploaded: ${file.filename}`);
961
+ } else {
962
+ await supabase
963
+ .from('uploader')
964
+ .update({
965
+ status: 'upload-error',
966
+ message: uploadResult.error || 'Upload failed',
967
+ })
968
+ .eq('id', file.id);
969
+
970
+ totalErrors++;
971
+ logger.error(
972
+ `Upload failed: ${file.filename} - ${uploadResult.error}`,
973
+ );
974
+ }
975
+ } catch (error) {
976
+ totalErrors++;
977
+ logger.error(
978
+ `Error processing file ${file.filename}: ${error.message}`,
979
+ );
980
+
981
+ await supabase
982
+ .from('uploader')
983
+ .update({
984
+ status: 'upload-error',
985
+ message: `Processing error: ${error.message}`,
986
+ })
987
+ .eq('id', file.id);
988
+ }
989
+ }
990
+ }
991
+
992
+ const result = {
993
+ processedCount: totalProcessed,
994
+ uploadedCount: totalUploaded,
995
+ errorCount: totalErrors,
996
+ };
997
+
998
+ logger.success(
999
+ `Phase 4 Summary: ${totalProcessed} files processed, ${totalUploaded} uploaded, ${totalErrors} errors`,
1000
+ );
1001
+
1002
+ return result;
1003
+ }
1004
+
1005
+ /**
1006
+ * Get processed file paths from log
1007
+ * @returns {Set<string>} Set of processed file paths
1008
+ */
1009
+ getProcessedPaths() {
1010
+ // This would need to be adapted to work with the LoggingService
1011
+ // For now, return empty set
1012
+ return new Set();
1013
+ }
1014
+
1015
+ /**
1016
+ * Query files that are ready for upload
1017
+ * These are files that have been detected but not yet uploaded
1018
+ * Uses the same RFC filtering logic as uploadFilesByRfc for consistency
1019
+ * @param {Object} options - Query options
1020
+ * @returns {Promise<Array>} Array of files ready for upload
1021
+ */
1022
+ async getFilesReadyForUpload(options = {}) {
1023
+ const supabase = await this.#getSupabaseClient();
1024
+
1025
+ logger.info('Querying files ready for upload...');
1026
+ console.log('🔍 Querying files ready for upload...');
1027
+
1028
+ // Check if UPLOAD_RFCS is configured
1029
+ const uploadRfcs = appConfig.upload.rfcs;
1030
+ if (!uploadRfcs || uploadRfcs.length === 0) {
1031
+ console.log(
1032
+ 'â„šī¸ No UPLOAD_RFCS configured. Please set UPLOAD_RFCS environment variable to see files ready for upload.',
1033
+ );
1034
+ console.log(
1035
+ ' Example: UPLOAD_RFCS="RFC123456789|RFC987654321|RFC555444333"',
1036
+ );
1037
+ return [];
1038
+ }
1039
+
1040
+ console.log(`đŸŽ¯ Using RFC filter: ${uploadRfcs.join(', ')}`);
1041
+
1042
+ // Step 1: Find pedimento_simplificado documents for the specified RFCs that have arela_path
1043
+ console.log(
1044
+ 'đŸŽ¯ Finding pedimento_simplificado documents for specified RFCs with arela_path...',
1045
+ );
1046
+ const { data: pedimentoRecords, error: pedimentoError } = await supabase
1047
+ .from('uploader')
1048
+ .select('arela_path')
1049
+ .eq('document_type', 'pedimento_simplificado')
1050
+ .in('rfc', uploadRfcs)
1051
+ .not('arela_path', 'is', null);
1052
+
1053
+ if (pedimentoError) {
1054
+ throw new Error(
1055
+ `Error querying pedimento_simplificado records: ${pedimentoError.message}`,
1056
+ );
1057
+ }
1058
+
1059
+ if (!pedimentoRecords || pedimentoRecords.length === 0) {
1060
+ console.log(
1061
+ 'â„šī¸ No pedimento_simplificado records with arela_path found',
1062
+ );
1063
+ return [];
1064
+ }
1065
+
1066
+ // Get unique arela_paths
1067
+ const uniqueArelaPaths = [
1068
+ ...new Set(pedimentoRecords.map((r) => r.arela_path)),
1069
+ ];
1070
+ console.log(
1071
+ `📋 Found ${pedimentoRecords.length} pedimento records with ${uniqueArelaPaths.length} unique arela_paths`,
1072
+ );
1073
+
1074
+ // Step 2: Find all related files with these arela_paths that haven't been uploaded yet
1075
+ console.log('🔍 Finding all related files that need to be uploaded...');
1076
+
1077
+ // Process arela_paths in chunks to avoid URI length limits
1078
+ let allReadyFiles = [];
1079
+ const chunkSize = 50;
1080
+
1081
+ for (let i = 0; i < uniqueArelaPaths.length; i += chunkSize) {
1082
+ const pathChunk = uniqueArelaPaths.slice(i, i + chunkSize);
1083
+
1084
+ const { data: chunkFiles, error: chunkError } = await supabase
1085
+ .from('uploader')
1086
+ .select(
1087
+ 'id, original_path, arela_path, filename, rfc, document_type, status',
1088
+ )
1089
+ .in('arela_path', pathChunk)
1090
+ .neq('status', 'file-uploaded')
1091
+ .not('original_path', 'is', null);
1092
+
1093
+ if (chunkError) {
1094
+ throw new Error(
1095
+ `Error querying files for arela_paths chunk: ${chunkError.message}`,
1096
+ );
1097
+ }
1098
+
1099
+ if (chunkFiles && chunkFiles.length > 0) {
1100
+ allReadyFiles = allReadyFiles.concat(chunkFiles);
1101
+ }
1102
+ }
1103
+
1104
+ const readyFiles = allReadyFiles;
1105
+
1106
+ console.log(`📋 Found ${readyFiles?.length || 0} files ready for upload`);
1107
+
1108
+ if (readyFiles && readyFiles.length > 0) {
1109
+ // Group by document type for summary
1110
+ const byDocType = readyFiles.reduce((acc, file) => {
1111
+ const docType = file.document_type || 'Unknown';
1112
+ acc[docType] = (acc[docType] || 0) + 1;
1113
+ return acc;
1114
+ }, {});
1115
+
1116
+ console.log('📊 Files by document type:');
1117
+ for (const [docType, count] of Object.entries(byDocType)) {
1118
+ console.log(` ${docType}: ${count} files`);
1119
+ }
1120
+
1121
+ // Group by RFC
1122
+ const byRfc = readyFiles.reduce((acc, file) => {
1123
+ const rfc = file.rfc || 'No RFC';
1124
+ acc[rfc] = (acc[rfc] || 0) + 1;
1125
+ return acc;
1126
+ }, {});
1127
+
1128
+ console.log('📊 Files by RFC:');
1129
+ for (const [rfc, count] of Object.entries(byRfc)) {
1130
+ console.log(` ${rfc}: ${count} files`);
1131
+ }
1132
+ }
1133
+
1134
+ return readyFiles || [];
1135
+ }
1136
+ }
1137
+
1138
+ // Export singleton instance
1139
+ export const databaseService = new DatabaseService();
1140
+ export default databaseService;