@arela/uploader 0.2.4 → 0.2.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,953 @@
1
+ import fs from 'fs';
2
+ import path from 'path';
3
+
4
+ import appConfig from '../config/config.js';
5
+ import { FileDetectionService } from '../file-detection.js';
6
+ import logger from './LoggingService.js';
7
+ import uploadServiceFactory from './upload/UploadServiceFactory.js';
8
+
9
+ /**
10
+ * Database Service
11
+ * Handles all Supabase database operations for the uploader table
12
+ */
13
+ export class DatabaseService {
14
+ constructor() {
15
+ this.detectionService = new FileDetectionService();
16
+ }
17
+
18
+ /**
19
+ * Get Supabase client
20
+ * @private
21
+ * @returns {Promise<Object>} Supabase client
22
+ */
23
+ async #getSupabaseClient() {
24
+ const supabaseService = uploadServiceFactory.getSupabaseService();
25
+ return await supabaseService.getClient();
26
+ }
27
+
28
+ /**
29
+ * Insert file stats with document detection into uploader table
30
+ * @param {Array} files - Array of file objects
31
+ * @param {Object} options - Options including clientPath
32
+ * @returns {Promise<Array>} Inserted records
33
+ */
34
+ async insertStatsToUploaderTable(files, options) {
35
+ const supabase = await this.#getSupabaseClient();
36
+ const records = [];
37
+
38
+ for (const file of files) {
39
+ const stats = file.stats || fs.statSync(file.path);
40
+ const originalPath = options.clientPath || file.path;
41
+
42
+ // Check if record already exists
43
+ const { data: existingRecords, error: checkError } = await supabase
44
+ .from('uploader')
45
+ .select('id, original_path')
46
+ .eq('original_path', originalPath)
47
+ .limit(1);
48
+
49
+ if (checkError) {
50
+ logger.error(`Error checking for existing record: ${checkError.message}`);
51
+ continue;
52
+ }
53
+
54
+ if (existingRecords && existingRecords.length > 0) {
55
+ logger.info(`Skipping duplicate: ${path.basename(file.path)}`);
56
+ continue;
57
+ }
58
+
59
+ // Initialize record with basic file stats
60
+ const record = {
61
+ document_type: null,
62
+ size: stats.size,
63
+ num_pedimento: null,
64
+ filename: file.originalName || path.basename(file.path),
65
+ original_path: originalPath,
66
+ arela_path: null,
67
+ status: 'stats',
68
+ rfc: null,
69
+ message: null,
70
+ };
71
+
72
+ // Try to detect document type for supported files
73
+ if (this.detectionService.isSupportedFileType(file.path)) {
74
+ try {
75
+ const detection = await this.detectionService.detectFile(file.path);
76
+
77
+ if (detection.detectedType) {
78
+ record.document_type = detection.detectedType;
79
+ record.num_pedimento = detection.detectedPedimento;
80
+ record.status = 'detected';
81
+
82
+ if (detection.arelaPath) {
83
+ record.arela_path = detection.arelaPath;
84
+ }
85
+
86
+ const rfcField = detection.fields.find(
87
+ (f) => f.name === 'rfc' && f.found,
88
+ );
89
+ if (rfcField) {
90
+ record.rfc = rfcField.value;
91
+ }
92
+ } else {
93
+ record.status = 'not-detected';
94
+ if (detection.error) {
95
+ record.message = detection.error;
96
+ }
97
+ }
98
+ } catch (error) {
99
+ logger.error(`Error detecting ${record.filename}: ${error.message}`);
100
+ record.status = 'detection-error';
101
+ record.message = error.message;
102
+ }
103
+ } else {
104
+ record.status = 'unsupported';
105
+ record.message = 'File type not supported for detection';
106
+ }
107
+
108
+ records.push(record);
109
+ }
110
+
111
+ if (records.length === 0) {
112
+ logger.info('No new records to insert (all were duplicates or errors)');
113
+ return [];
114
+ }
115
+
116
+ logger.info(`Inserting ${records.length} new records into uploader table...`);
117
+
118
+ const { data, error } = await supabase
119
+ .from('uploader')
120
+ .insert(records)
121
+ .select();
122
+
123
+ if (error) {
124
+ throw new Error(`Failed to insert stats records: ${error.message}`);
125
+ }
126
+
127
+ return data;
128
+ }
129
+
130
+ /**
131
+ * Insert file stats only (no detection) into uploader table
132
+ * @param {Array} files - Array of file objects
133
+ * @param {Object} options - Options including clientPath
134
+ * @returns {Promise<Object>} Statistics about the operation
135
+ */
136
+ async insertStatsOnlyToUploaderTable(files, options) {
137
+ const supabase = await this.#getSupabaseClient();
138
+ const batchSize = 1000;
139
+ const allRecords = [];
140
+
141
+ logger.info('Collecting filesystem stats...');
142
+ for (const file of files) {
143
+ try {
144
+ const stats = file.stats || fs.statSync(file.path);
145
+ const originalPath = options.clientPath || file.path;
146
+ const fileExtension = path
147
+ .extname(file.path)
148
+ .toLowerCase()
149
+ .replace('.', '');
150
+
151
+ const record = {
152
+ document_type: null,
153
+ size: stats.size,
154
+ num_pedimento: null,
155
+ filename: file.originalName || path.basename(file.path),
156
+ original_path: originalPath,
157
+ arela_path: null,
158
+ status: 'fs-stats',
159
+ rfc: null,
160
+ message: null,
161
+ file_extension: fileExtension,
162
+ created_at: new Date().toISOString(),
163
+ modified_at: stats.mtime.toISOString(),
164
+ };
165
+
166
+ allRecords.push(record);
167
+ } catch (error) {
168
+ logger.error(`Error reading stats for ${file.path}: ${error.message}`);
169
+ }
170
+ }
171
+
172
+ if (allRecords.length === 0) {
173
+ logger.info('No file stats to insert');
174
+ return { totalInserted: 0, totalSkipped: 0, totalProcessed: 0 };
175
+ }
176
+
177
+ logger.info(`Processing ${allRecords.length} file stats in batches of ${batchSize}...`);
178
+
179
+ let totalInserted = 0;
180
+ let totalUpdated = 0;
181
+
182
+ for (let i = 0; i < allRecords.length; i += batchSize) {
183
+ const batch = allRecords.slice(i, i + batchSize);
184
+
185
+ try {
186
+ // Check which records already exist
187
+ const originalPaths = batch.map(r => r.original_path);
188
+ const { data: existingRecords, error: checkError } = await supabase
189
+ .from('uploader')
190
+ .select('original_path')
191
+ .in('original_path', originalPaths);
192
+
193
+ if (checkError) {
194
+ logger.error(`Error checking existing records: ${checkError.message}`);
195
+ continue;
196
+ }
197
+
198
+ const existingPaths = new Set(existingRecords?.map(r => r.original_path) || []);
199
+ const newRecords = batch.filter(r => !existingPaths.has(r.original_path));
200
+ const updateRecords = batch.filter(r => existingPaths.has(r.original_path));
201
+
202
+ logger.info(
203
+ `Batch ${Math.floor(i / batchSize) + 1}: ${newRecords.length} new, ${updateRecords.length} updates`,
204
+ );
205
+
206
+ // Insert new records
207
+ if (newRecords.length > 0) {
208
+ const { error: insertError } = await supabase
209
+ .from('uploader')
210
+ .insert(newRecords);
211
+
212
+ if (insertError) {
213
+ logger.error(`Error inserting new records: ${insertError.message}`);
214
+ } else {
215
+ totalInserted += newRecords.length;
216
+ logger.success(`Inserted ${newRecords.length} new records`);
217
+ }
218
+ }
219
+
220
+ // Update existing records
221
+ if (updateRecords.length > 0) {
222
+ let batchUpdated = 0;
223
+ for (const record of updateRecords) {
224
+ const { error: updateError } = await supabase
225
+ .from('uploader')
226
+ .update({
227
+ size: record.size,
228
+ modified_at: record.modified_at,
229
+ filename: record.filename,
230
+ file_extension: record.file_extension,
231
+ })
232
+ .eq('original_path', record.original_path);
233
+
234
+ if (!updateError) {
235
+ batchUpdated++;
236
+ }
237
+ }
238
+ totalUpdated += batchUpdated;
239
+ logger.info(`Updated ${batchUpdated} existing records`);
240
+ }
241
+
242
+ } catch (error) {
243
+ logger.error(
244
+ `Unexpected error in batch ${Math.floor(i / batchSize) + 1}: ${error.message}`,
245
+ );
246
+ }
247
+ }
248
+
249
+ logger.success(
250
+ `Phase 1 Summary: ${totalInserted} new records inserted, ${totalUpdated} existing records updated`,
251
+ );
252
+
253
+ return {
254
+ totalInserted,
255
+ totalSkipped: totalUpdated,
256
+ totalProcessed: allRecords.length,
257
+ };
258
+ }
259
+
260
+ /**
261
+ * Process PDF files for pedimento-simplificado detection
262
+ * @param {Object} options - Processing options
263
+ * @returns {Promise<Object>} Processing result
264
+ */
265
+ async detectPedimentosInDatabase(options = {}) {
266
+ const supabase = await this.#getSupabaseClient();
267
+
268
+ logger.info('Phase 2: Starting PDF detection for pedimento-simplificado documents...');
269
+
270
+ const processingBatchSize = parseInt(options.batchSize) || 10;
271
+ const queryBatchSize = 1000;
272
+
273
+ let totalDetected = 0;
274
+ let totalProcessed = 0;
275
+ let totalErrors = 0;
276
+ let offset = 0;
277
+ let chunkNumber = 1;
278
+
279
+ logger.info('Processing PDF files in chunks of 1000 records...');
280
+
281
+ while (true) {
282
+ logger.info(
283
+ `Fetching chunk ${chunkNumber} (records ${offset + 1} to ${offset + queryBatchSize})...`,
284
+ );
285
+
286
+ const { data: pdfRecords, error: queryError } = await supabase
287
+ .from('uploader')
288
+ .select('id, original_path, filename, file_extension, status')
289
+ .eq('status', 'fs-stats')
290
+ .eq('file_extension', 'pdf')
291
+ .ilike('filename', '%simp%')
292
+ .range(offset, offset + queryBatchSize - 1);
293
+
294
+ if (queryError) {
295
+ throw new Error(
296
+ `Failed to fetch PDF records chunk ${chunkNumber}: ${queryError.message}`,
297
+ );
298
+ }
299
+
300
+ if (!pdfRecords || pdfRecords.length === 0) {
301
+ logger.info('No more PDF files found. Processing completed.');
302
+ break;
303
+ }
304
+
305
+ logger.info(`Processing chunk ${chunkNumber}: ${pdfRecords.length} PDF records`);
306
+
307
+ let chunkDetected = 0;
308
+ let chunkProcessed = 0;
309
+ let chunkErrors = 0;
310
+
311
+ // Process files in smaller batches
312
+ for (let i = 0; i < pdfRecords.length; i += processingBatchSize) {
313
+ const batch = pdfRecords.slice(i, i + processingBatchSize);
314
+ const updatePromises = [];
315
+
316
+ for (const record of batch) {
317
+ try {
318
+ if (!fs.existsSync(record.original_path)) {
319
+ logger.warn(`File not found: ${record.filename} at ${record.original_path}`);
320
+ updatePromises.push(
321
+ supabase
322
+ .from('uploader')
323
+ .update({
324
+ status: 'file-not-found',
325
+ message: 'File no longer exists at original path',
326
+ })
327
+ .eq('id', record.id),
328
+ );
329
+ chunkErrors++;
330
+ totalErrors++;
331
+ continue;
332
+ }
333
+
334
+ const detection = await this.detectionService.detectFile(record.original_path);
335
+ chunkProcessed++;
336
+ totalProcessed++;
337
+
338
+ const updateData = {
339
+ status: detection.detectedType ? 'detected' : 'not-detected',
340
+ document_type: detection.detectedType,
341
+ num_pedimento: detection.detectedPedimento,
342
+ arela_path: detection.arelaPath,
343
+ message: detection.error || null,
344
+ };
345
+
346
+ if (detection.fields) {
347
+ const rfcField = detection.fields.find(
348
+ (f) => f.name === 'rfc' && f.found,
349
+ );
350
+ if (rfcField) {
351
+ updateData.rfc = rfcField.value;
352
+ }
353
+ }
354
+
355
+ if (detection.detectedType) {
356
+ chunkDetected++;
357
+ totalDetected++;
358
+ logger.success(
359
+ `Detected: ${record.filename} -> ${detection.detectedType} | Pedimento: ${detection.detectedPedimento || 'N/A'} | RFC: ${detection.fields?.rfc || 'N/A'}`,
360
+ );
361
+ } else {
362
+ logger.info(
363
+ `Not detected: ${record.filename} - No pedimento-simplificado pattern found`,
364
+ );
365
+ }
366
+
367
+ updatePromises.push(
368
+ supabase.from('uploader').update(updateData).eq('id', record.id),
369
+ );
370
+ } catch (error) {
371
+ logger.error(`Error detecting ${record.filename}: ${error.message}`);
372
+ chunkErrors++;
373
+ totalErrors++;
374
+
375
+ updatePromises.push(
376
+ supabase
377
+ .from('uploader')
378
+ .update({
379
+ status: 'detection-error',
380
+ message: error.message,
381
+ })
382
+ .eq('id', record.id),
383
+ );
384
+ }
385
+ }
386
+
387
+ try {
388
+ await Promise.all(updatePromises);
389
+ } catch (error) {
390
+ logger.error(`Error updating batch in chunk ${chunkNumber}: ${error.message}`);
391
+ }
392
+ }
393
+
394
+ logger.success(
395
+ `Chunk ${chunkNumber} completed: ${chunkDetected} detected, ${chunkProcessed} processed, ${chunkErrors} errors`,
396
+ );
397
+
398
+ offset += queryBatchSize;
399
+ chunkNumber++;
400
+
401
+ if (pdfRecords.length < queryBatchSize) {
402
+ logger.info(`Reached end of records (chunk had ${pdfRecords.length} records).`);
403
+ break;
404
+ }
405
+
406
+ // Small delay between chunks
407
+ await new Promise((resolve) => setTimeout(resolve, 500));
408
+ }
409
+
410
+ const result = {
411
+ detectedCount: totalDetected,
412
+ processedCount: totalProcessed,
413
+ errorCount: totalErrors,
414
+ };
415
+
416
+ logger.success(
417
+ `Phase 2 Summary: ${totalDetected} detected, ${totalProcessed} processed, ${totalErrors} errors`,
418
+ );
419
+
420
+ return result;
421
+ }
422
+
423
+ /**
424
+ * Propagate arela_path from pedimento_simplificado records to related files
425
+ * @param {Object} options - Options for propagation
426
+ * @returns {Promise<Object>} Processing result
427
+ */
428
+ async propagateArelaPath(options = {}) {
429
+ const supabase = await this.#getSupabaseClient();
430
+
431
+ logger.info('Phase 3: Starting arela_path propagation process...');
432
+ console.log('🔍 Finding pedimento_simplificado records with arela_path...');
433
+
434
+ // Get all pedimento_simplificado records that have arela_path
435
+ const { data: pedimentoRecords, error: pedimentoError } = await supabase
436
+ .from('uploader')
437
+ .select('id, original_path, arela_path, filename')
438
+ .eq('document_type', 'pedimento_simplificado')
439
+ .not('arela_path', 'is', null);
440
+
441
+ if (pedimentoError) {
442
+ const errorMsg = `Error fetching pedimento records: ${pedimentoError.message}`;
443
+ logger.error(errorMsg);
444
+ throw new Error(errorMsg);
445
+ }
446
+
447
+ if (!pedimentoRecords || pedimentoRecords.length === 0) {
448
+ logger.info('No pedimento_simplificado records with arela_path found');
449
+ console.log('â„šī¸ No pedimento_simplificado records with arela_path found');
450
+ return { processedCount: 0, updatedCount: 0, errorCount: 0 };
451
+ }
452
+
453
+ console.log(`📋 Found ${pedimentoRecords.length} pedimento records with arela_path`);
454
+ logger.info(`Found ${pedimentoRecords.length} pedimento records with arela_path to process`);
455
+
456
+ let totalProcessed = 0;
457
+ let totalUpdated = 0;
458
+ let totalErrors = 0;
459
+ const BATCH_SIZE = 50; // Process files in batches
460
+
461
+ // Process each pedimento record
462
+ for (const pedimento of pedimentoRecords) {
463
+ try {
464
+ totalProcessed++;
465
+
466
+ // Extract base path from original_path (remove filename)
467
+ const basePath = path.dirname(pedimento.original_path);
468
+
469
+ logger.info(`Processing pedimento: ${pedimento.filename} | Base path: ${basePath}`);
470
+
471
+ // Extract folder part from existing arela_path
472
+ const existingPath = pedimento.arela_path;
473
+ const folderArelaPath = existingPath.includes('/')
474
+ ? existingPath.substring(0, existingPath.lastIndexOf('/')) + '/'
475
+ : existingPath.endsWith('/')
476
+ ? existingPath
477
+ : existingPath + '/';
478
+
479
+ // Find all files with the same base path that don't have arela_path yet
480
+ const { data: relatedFiles, error: relatedError } = await supabase
481
+ .from('uploader')
482
+ .select('id, filename, original_path')
483
+ .like('original_path', `${basePath}%`)
484
+ .is('arela_path', null)
485
+ .neq('id', pedimento.id); // Exclude the pedimento itself
486
+
487
+ if (relatedError) {
488
+ logger.error(`Error finding related files for ${pedimento.filename}: ${relatedError.message}`);
489
+ totalErrors++;
490
+ continue;
491
+ }
492
+
493
+ if (!relatedFiles || relatedFiles.length === 0) {
494
+ logger.info(`No related files found for ${pedimento.filename}`);
495
+ continue;
496
+ }
497
+
498
+ logger.info(`Found ${relatedFiles.length} related files to update for ${pedimento.filename}`);
499
+
500
+ // Process files in batches
501
+ const fileIds = relatedFiles.map((f) => f.id);
502
+
503
+ for (let i = 0; i < fileIds.length; i += BATCH_SIZE) {
504
+ const batchIds = fileIds.slice(i, i + BATCH_SIZE);
505
+ const batchNumber = Math.floor(i / BATCH_SIZE) + 1;
506
+ const totalBatches = Math.ceil(fileIds.length / BATCH_SIZE);
507
+
508
+ logger.info(`Batch ${batchNumber}/${totalBatches}: Updating ${batchIds.length} files...`);
509
+
510
+ try {
511
+ const { error: updateError } = await supabase
512
+ .from('uploader')
513
+ .update({ arela_path: folderArelaPath })
514
+ .in('id', batchIds);
515
+
516
+ if (updateError) {
517
+ logger.error(`Error in batch ${batchNumber}: ${updateError.message}`);
518
+ totalErrors++;
519
+ } else {
520
+ totalUpdated += batchIds.length;
521
+ logger.info(`Successfully updated batch ${batchNumber}: ${batchIds.length} files`);
522
+ }
523
+ } catch (batchError) {
524
+ logger.error(`Exception in batch ${batchNumber}: ${batchError.message}`);
525
+ totalErrors++;
526
+ }
527
+ }
528
+ } catch (error) {
529
+ logger.error(`Error processing pedimento ${pedimento.filename}: ${error.message}`);
530
+ totalErrors++;
531
+ }
532
+ }
533
+
534
+ const result = {
535
+ processedCount: totalProcessed,
536
+ updatedCount: totalUpdated,
537
+ errorCount: totalErrors,
538
+ };
539
+
540
+ logger.success(
541
+ `Phase 3 Summary: ${totalProcessed} pedimentos processed, ${totalUpdated} files updated, ${totalErrors} errors`,
542
+ );
543
+
544
+ return result;
545
+ }
546
+
547
+ /**
548
+ * Upload files to Arela API based on specific RFC values
549
+ * @param {Object} options - Upload options
550
+ * @returns {Promise<Object>} Processing result
551
+ */
552
+ async uploadFilesByRfc(options = {}) {
553
+ const supabase = await this.#getSupabaseClient();
554
+ const uploadService = await uploadServiceFactory.getUploadService();
555
+
556
+ // Get configuration
557
+ const appConfig = await import('../config/config.js').then(m => m.appConfig);
558
+
559
+ if (!appConfig.upload.rfcs || appConfig.upload.rfcs.length === 0) {
560
+ const errorMsg = 'No RFCs specified. Please set UPLOAD_RFCS environment variable with pipe-separated RFC values.';
561
+ logger.error(errorMsg);
562
+ throw new Error(errorMsg);
563
+ }
564
+
565
+ logger.info('Phase 4: Starting RFC-based upload process...');
566
+ console.log('đŸŽ¯ RFC-based Upload Mode');
567
+ console.log(`📋 Target RFCs: ${appConfig.upload.rfcs.join(', ')}`);
568
+ console.log('🔍 Searching for files to upload...');
569
+
570
+ // First, count total files for the RFCs to show filtering effect
571
+ const { count: totalRfcFiles, error: countError } = await supabase
572
+ .from('uploader')
573
+ .select('*', { count: 'exact', head: true })
574
+ .in('rfc', appConfig.upload.rfcs)
575
+ .not('arela_path', 'is', null);
576
+
577
+ if (countError) {
578
+ logger.warn(`Could not count total RFC files: ${countError.message}`);
579
+ } else {
580
+ console.log(`📊 Total files for specified RFCs: ${totalRfcFiles || 0}`);
581
+ logger.info(`Total files for specified RFCs: ${totalRfcFiles || 0}`);
582
+ }
583
+
584
+ // Step 1: Get all pedimento_simplificado records that match the specified RFCs and have arela_path
585
+ console.log('đŸŽ¯ Finding pedimento_simplificado records for specified RFCs...');
586
+ const { data: pedimentoRfcRecords, error: pedimentoRfcError } = await supabase
587
+ .from('uploader')
588
+ .select('arela_path')
589
+ .eq('document_type', 'pedimento_simplificado')
590
+ .in('rfc', appConfig.upload.rfcs)
591
+ .not('arela_path', 'is', null);
592
+
593
+ if (pedimentoRfcError) {
594
+ const errorMsg = `Error fetching pedimento RFC records: ${pedimentoRfcError.message}`;
595
+ logger.error(errorMsg);
596
+ throw new Error(errorMsg);
597
+ }
598
+
599
+ if (!pedimentoRfcRecords || pedimentoRfcRecords.length === 0) {
600
+ console.log('â„šī¸ No pedimento_simplificado records found for the specified RFCs with arela_path');
601
+ logger.info('No pedimento_simplificado records found for specified RFCs');
602
+ return { processedCount: 0, uploadedCount: 0, errorCount: 0 };
603
+ }
604
+
605
+ // Get unique arela_paths from pedimento records
606
+ const uniqueArelaPaths = [...new Set(pedimentoRfcRecords.map((r) => r.arela_path))];
607
+ console.log(`📋 Found ${pedimentoRfcRecords.length} pedimento records with ${uniqueArelaPaths.length} unique arela_paths for specified RFCs`);
608
+ logger.info(`Found ${pedimentoRfcRecords.length} pedimento records with ${uniqueArelaPaths.length} unique arela_paths`);
609
+
610
+ // Step 2: Get all files with these arela_paths that haven't been uploaded yet
611
+ let rfcRecords = [];
612
+ const chunkSize = 50;
613
+
614
+ for (let i = 0; i < uniqueArelaPaths.length; i += chunkSize) {
615
+ const pathChunk = uniqueArelaPaths.slice(i, i + chunkSize);
616
+
617
+ const { data: chunkFiles, error: chunkError } = await supabase
618
+ .from('uploader')
619
+ .select('arela_path')
620
+ .in('arela_path', pathChunk)
621
+ .neq('status', 'file-uploaded')
622
+ .not('arela_path', 'is', null);
623
+
624
+ if (chunkError) {
625
+ const errorMsg = `Error fetching files for arela_paths chunk: ${chunkError.message}`;
626
+ logger.error(errorMsg);
627
+ throw new Error(errorMsg);
628
+ }
629
+
630
+ if (chunkFiles && chunkFiles.length > 0) {
631
+ rfcRecords = rfcRecords.concat(chunkFiles);
632
+ }
633
+ }
634
+
635
+ if (!rfcRecords || rfcRecords.length === 0) {
636
+ if (totalRfcFiles && totalRfcFiles > 0) {
637
+ console.log(`â„šī¸ All ${totalRfcFiles} files for the specified RFCs are already uploaded (status: file-uploaded)`);
638
+ console.log(' No new files to upload.');
639
+ logger.info(`All ${totalRfcFiles} files for specified RFCs already uploaded`);
640
+ } else {
641
+ console.log('â„šī¸ No files found for the specified RFCs with arela_path');
642
+ console.log(` Make sure files for RFCs [${appConfig.upload.rfcs.join(', ')}] have been processed and have arela_path values`);
643
+ logger.info('No files found for specified RFCs with arela_path');
644
+ }
645
+ return { processedCount: 0, uploadedCount: 0, errorCount: 0 };
646
+ }
647
+
648
+ // Show filtering effect
649
+ const uploadableArelaPaths = [...new Set(rfcRecords.map((r) => r.arela_path))];
650
+ const skipped = (totalRfcFiles || 0) - rfcRecords.length;
651
+ if (skipped > 0) {
652
+ console.log(`📊 Found ${rfcRecords.length} files ready for upload (${skipped} already uploaded, skipped)`);
653
+ } else {
654
+ console.log(`📊 Found ${rfcRecords.length} files ready for upload`);
655
+ }
656
+ logger.info(`Found ${rfcRecords.length} files ready for upload, ${skipped} skipped`);
657
+
658
+ // Step 3: Get ALL files that have these arela_paths (including supporting documents)
659
+ let allRelatedFiles = [];
660
+ const arelaPathChunkSize = 50;
661
+ const queryBatchSize = 1000;
662
+
663
+ console.log('đŸ“Ĩ Fetching all related files (processing arela_paths in chunks to avoid URI limits)...');
664
+
665
+ // Process arela_paths in chunks
666
+ for (let i = 0; i < uploadableArelaPaths.length; i += arelaPathChunkSize) {
667
+ const arelaPathChunk = uploadableArelaPaths.slice(i, i + arelaPathChunkSize);
668
+ const chunkNumber = Math.floor(i / arelaPathChunkSize) + 1;
669
+ const totalChunks = Math.ceil(uploadableArelaPaths.length / arelaPathChunkSize);
670
+
671
+ console.log(` Processing arela_path chunk ${chunkNumber}/${totalChunks} (${arelaPathChunk.length} paths)`);
672
+
673
+ // For each chunk of arela_paths, use pagination to get all related files
674
+ let hasMore = true;
675
+ let offset = 0;
676
+
677
+ while (hasMore) {
678
+ const { data: batch, error: queryError } = await supabase
679
+ .from('uploader')
680
+ .select('id, original_path, arela_path, filename, rfc, document_type')
681
+ .in('arela_path', arelaPathChunk)
682
+ .not('original_path', 'is', null)
683
+ .neq('status', 'file-uploaded')
684
+ .range(offset, offset + queryBatchSize - 1);
685
+
686
+ if (queryError) {
687
+ const errorMsg = `Error fetching related files for chunk ${chunkNumber}: ${queryError.message}`;
688
+ logger.error(errorMsg);
689
+ throw new Error(errorMsg);
690
+ }
691
+
692
+ if (batch && batch.length > 0) {
693
+ allRelatedFiles = allRelatedFiles.concat(batch);
694
+ }
695
+
696
+ hasMore = batch && batch.length === queryBatchSize;
697
+ offset += queryBatchSize;
698
+ }
699
+ }
700
+
701
+ if (!allRelatedFiles || allRelatedFiles.length === 0) {
702
+ console.log('â„šī¸ No related files found to upload');
703
+ logger.info('No related files found to upload');
704
+ return { processedCount: 0, uploadedCount: 0, errorCount: 0 };
705
+ }
706
+
707
+ console.log(`📋 Total files to upload: ${allRelatedFiles.length}`);
708
+ logger.info(`Total files to upload: ${allRelatedFiles.length}`);
709
+
710
+ // Step 4: Upload all related files
711
+ let totalProcessed = 0;
712
+ let totalUploaded = 0;
713
+ let totalErrors = 0;
714
+ const batchSize = parseInt(options.batchSize) || 10;
715
+
716
+ for (let i = 0; i < allRelatedFiles.length; i += batchSize) {
717
+ const batch = allRelatedFiles.slice(i, i + batchSize);
718
+
719
+ for (const file of batch) {
720
+ try {
721
+ totalProcessed++;
722
+
723
+ // Check if file exists
724
+ if (!fs.existsSync(file.original_path)) {
725
+ logger.warn(`File not found: ${file.filename} at ${file.original_path}`);
726
+ await supabase
727
+ .from('uploader')
728
+ .update({
729
+ status: 'file-not-found',
730
+ message: 'File no longer exists at original path',
731
+ })
732
+ .eq('id', file.id);
733
+ totalErrors++;
734
+ continue;
735
+ }
736
+
737
+ // Upload the file (handle both API and Supabase services)
738
+ let uploadResult;
739
+ if (uploadService.getServiceName() === 'Supabase') {
740
+ // Supabase requires single file upload with uploadPath
741
+ let uploadPath;
742
+ if (options.folderStructure && file.arela_path) {
743
+ // Combine folder structure with arela_path: palco/RFC/Year/Patente/Aduana/Pedimento/filename
744
+ uploadPath = `uploads/${options.folderStructure}/${file.arela_path}${file.filename}`;
745
+ } else if (file.arela_path) {
746
+ // Use existing arela_path: RFC/Year/Patente/Aduana/Pedimento/filename
747
+ uploadPath = `uploads/${file.arela_path}${file.filename}`;
748
+ } else {
749
+ // Fallback to RFC folder
750
+ uploadPath = `uploads/${file.rfc}/${file.filename}`;
751
+ }
752
+
753
+ uploadResult = await uploadService.upload([{
754
+ path: file.original_path,
755
+ name: file.filename,
756
+ contentType: 'application/octet-stream',
757
+ }], {
758
+ uploadPath: uploadPath,
759
+ });
760
+ uploadResult = { success: true, data: uploadResult };
761
+ } else {
762
+ // API service supports batch uploads and returns normalized response
763
+ let fullFolderStructure;
764
+ if (options.folderStructure && file.arela_path) {
765
+ // Combine folder structure with arela_path: palco/RFC/Year/Patente/Aduana/Pedimento/
766
+ fullFolderStructure = `${options.folderStructure}/${file.arela_path}`;
767
+ } else if (file.arela_path) {
768
+ // Use existing arela_path: RFC/Year/Patente/Aduana/Pedimento/
769
+ fullFolderStructure = file.arela_path;
770
+ } else {
771
+ // Fallback to RFC folder
772
+ fullFolderStructure = `${file.rfc}/`;
773
+ }
774
+
775
+ uploadResult = await uploadService.upload([{
776
+ path: file.original_path,
777
+ name: file.filename,
778
+ contentType: 'application/octet-stream',
779
+ }], {
780
+ folderStructure: fullFolderStructure,
781
+ });
782
+ }
783
+
784
+ if (uploadResult.success) {
785
+ // Update database status
786
+ await supabase
787
+ .from('uploader')
788
+ .update({
789
+ status: 'file-uploaded',
790
+ message: 'Successfully uploaded to Arela API',
791
+ })
792
+ .eq('id', file.id);
793
+
794
+ totalUploaded++;
795
+ logger.info(`Uploaded: ${file.filename}`);
796
+ } else {
797
+ await supabase
798
+ .from('uploader')
799
+ .update({
800
+ status: 'upload-error',
801
+ message: uploadResult.error || 'Upload failed',
802
+ })
803
+ .eq('id', file.id);
804
+
805
+ totalErrors++;
806
+ logger.error(`Upload failed: ${file.filename} - ${uploadResult.error}`);
807
+ }
808
+ } catch (error) {
809
+ totalErrors++;
810
+ logger.error(`Error processing file ${file.filename}: ${error.message}`);
811
+
812
+ await supabase
813
+ .from('uploader')
814
+ .update({
815
+ status: 'upload-error',
816
+ message: `Processing error: ${error.message}`,
817
+ })
818
+ .eq('id', file.id);
819
+ }
820
+ }
821
+ }
822
+
823
+ const result = {
824
+ processedCount: totalProcessed,
825
+ uploadedCount: totalUploaded,
826
+ errorCount: totalErrors,
827
+ };
828
+
829
+ logger.success(
830
+ `Phase 4 Summary: ${totalProcessed} files processed, ${totalUploaded} uploaded, ${totalErrors} errors`,
831
+ );
832
+
833
+ return result;
834
+ }
835
+
836
+ /**
837
+ * Get processed file paths from log
838
+ * @returns {Set<string>} Set of processed file paths
839
+ */
840
+ getProcessedPaths() {
841
+ // This would need to be adapted to work with the LoggingService
842
+ // For now, return empty set
843
+ return new Set();
844
+ }
845
+
846
+ /**
847
+ * Query files that are ready for upload
848
+ * These are files that have been detected but not yet uploaded
849
+ * Uses the same RFC filtering logic as uploadFilesByRfc for consistency
850
+ * @param {Object} options - Query options
851
+ * @returns {Promise<Array>} Array of files ready for upload
852
+ */
853
+ async getFilesReadyForUpload(options = {}) {
854
+ const supabase = await this.#getSupabaseClient();
855
+
856
+ logger.info('Querying files ready for upload...');
857
+ console.log('🔍 Querying files ready for upload...');
858
+
859
+ // Check if UPLOAD_RFCS is configured
860
+ const uploadRfcs = appConfig.upload.rfcs;
861
+ if (!uploadRfcs || uploadRfcs.length === 0) {
862
+ console.log('â„šī¸ No UPLOAD_RFCS configured. Please set UPLOAD_RFCS environment variable to see files ready for upload.');
863
+ console.log(' Example: UPLOAD_RFCS="RFC123456789|RFC987654321|RFC555444333"');
864
+ return [];
865
+ }
866
+
867
+ console.log(`đŸŽ¯ Using RFC filter: ${uploadRfcs.join(', ')}`);
868
+
869
+ // Step 1: Find pedimento_simplificado documents for the specified RFCs that have arela_path
870
+ console.log('đŸŽ¯ Finding pedimento_simplificado documents for specified RFCs with arela_path...');
871
+ const { data: pedimentoRecords, error: pedimentoError } = await supabase
872
+ .from('uploader')
873
+ .select('arela_path')
874
+ .eq('document_type', 'pedimento_simplificado')
875
+ .in('rfc', uploadRfcs)
876
+ .not('arela_path', 'is', null);
877
+
878
+ if (pedimentoError) {
879
+ throw new Error(`Error querying pedimento_simplificado records: ${pedimentoError.message}`);
880
+ }
881
+
882
+ if (!pedimentoRecords || pedimentoRecords.length === 0) {
883
+ console.log('â„šī¸ No pedimento_simplificado records with arela_path found');
884
+ return [];
885
+ }
886
+
887
+ // Get unique arela_paths
888
+ const uniqueArelaPaths = [...new Set(pedimentoRecords.map(r => r.arela_path))];
889
+ console.log(`📋 Found ${pedimentoRecords.length} pedimento records with ${uniqueArelaPaths.length} unique arela_paths`);
890
+
891
+ // Step 2: Find all related files with these arela_paths that haven't been uploaded yet
892
+ console.log('🔍 Finding all related files that need to be uploaded...');
893
+
894
+ // Process arela_paths in chunks to avoid URI length limits
895
+ let allReadyFiles = [];
896
+ const chunkSize = 50;
897
+
898
+ for (let i = 0; i < uniqueArelaPaths.length; i += chunkSize) {
899
+ const pathChunk = uniqueArelaPaths.slice(i, i + chunkSize);
900
+
901
+ const { data: chunkFiles, error: chunkError } = await supabase
902
+ .from('uploader')
903
+ .select('id, original_path, arela_path, filename, rfc, document_type, status')
904
+ .in('arela_path', pathChunk)
905
+ .neq('status', 'file-uploaded')
906
+ .not('original_path', 'is', null);
907
+
908
+ if (chunkError) {
909
+ throw new Error(`Error querying files for arela_paths chunk: ${chunkError.message}`);
910
+ }
911
+
912
+ if (chunkFiles && chunkFiles.length > 0) {
913
+ allReadyFiles = allReadyFiles.concat(chunkFiles);
914
+ }
915
+ }
916
+
917
+ const readyFiles = allReadyFiles;
918
+
919
+ console.log(`📋 Found ${readyFiles?.length || 0} files ready for upload`);
920
+
921
+ if (readyFiles && readyFiles.length > 0) {
922
+ // Group by document type for summary
923
+ const byDocType = readyFiles.reduce((acc, file) => {
924
+ const docType = file.document_type || 'Unknown';
925
+ acc[docType] = (acc[docType] || 0) + 1;
926
+ return acc;
927
+ }, {});
928
+
929
+ console.log('📊 Files by document type:');
930
+ for (const [docType, count] of Object.entries(byDocType)) {
931
+ console.log(` ${docType}: ${count} files`);
932
+ }
933
+
934
+ // Group by RFC
935
+ const byRfc = readyFiles.reduce((acc, file) => {
936
+ const rfc = file.rfc || 'No RFC';
937
+ acc[rfc] = (acc[rfc] || 0) + 1;
938
+ return acc;
939
+ }, {});
940
+
941
+ console.log('📊 Files by RFC:');
942
+ for (const [rfc, count] of Object.entries(byRfc)) {
943
+ console.log(` ${rfc}: ${count} files`);
944
+ }
945
+ }
946
+
947
+ return readyFiles || [];
948
+ }
949
+ }
950
+
951
+ // Export singleton instance
952
+ export const databaseService = new DatabaseService();
953
+ export default databaseService;