@arela/uploader 0.2.3 → 0.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/package.json +1 -1
  2. package/src/index.js +436 -19
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@arela/uploader",
3
- "version": "0.2.3",
3
+ "version": "0.2.4",
4
4
  "description": "CLI to upload files/directories to Arela",
5
5
  "bin": {
6
6
  "arela": "./src/index.js"
package/src/index.js CHANGED
@@ -17,7 +17,7 @@ config();
17
17
  const program = new Command();
18
18
 
19
19
  // Read package.json version at startup
20
- let packageVersion = '1.0.0'; // fallback
20
+ let packageVersion = '0.2.4'; // fallback
21
21
  try {
22
22
  const __filename = new URL(import.meta.url).pathname;
23
23
  const __dirname = path.dirname(__filename);
@@ -774,6 +774,9 @@ const detectPedimentosInDatabase = async (options = {}) => {
774
774
  console.log(
775
775
  'šŸ” Phase 2: Starting PDF detection for pedimento-simplificado documents...',
776
776
  );
777
+ writeLog(
778
+ 'šŸ” Phase 2: Starting PDF detection for pedimento-simplificado documents',
779
+ );
777
780
 
778
781
  const detectionService = new FileDetectionService();
779
782
  const processingBatchSize = parseInt(options.batchSize) || 10; // Smaller batches for file I/O
@@ -786,12 +789,16 @@ const detectPedimentosInDatabase = async (options = {}) => {
786
789
  let chunkNumber = 1;
787
790
 
788
791
  console.log('ļæ½ Processing PDF files in chunks of 1000 records...');
792
+ writeLog('šŸ“ Starting PDF detection processing in chunks of 1000 records');
789
793
 
790
794
  // Process records in chunks of 1000
791
795
  while (true) {
792
796
  console.log(
793
797
  `\nšŸ“„ Fetching chunk ${chunkNumber} (records ${offset + 1} to ${offset + queryBatchSize})...`,
794
798
  );
799
+ writeLog(
800
+ `šŸ“„ Fetching chunk ${chunkNumber} (records ${offset + 1} to ${offset + queryBatchSize})`,
801
+ );
795
802
 
796
803
  // Fetch next chunk of PDF records
797
804
  const { data: pdfRecords, error: queryError } = await supabase
@@ -811,12 +818,18 @@ const detectPedimentosInDatabase = async (options = {}) => {
811
818
  // If no records found, we're done
812
819
  if (!pdfRecords || pdfRecords.length === 0) {
813
820
  console.log(`šŸ“ No more PDF files found. Processing completed.`);
821
+ writeLog(
822
+ `šŸ“ No more PDF files found. Processing completed at chunk ${chunkNumber}`,
823
+ );
814
824
  break;
815
825
  }
816
826
 
817
827
  console.log(
818
828
  `ļæ½ Processing chunk ${chunkNumber}: ${pdfRecords.length} PDF records`,
819
829
  );
830
+ writeLog(
831
+ `šŸ“Š Processing chunk ${chunkNumber}: ${pdfRecords.length} PDF records`,
832
+ );
820
833
 
821
834
  // Create progress bar for this chunk
822
835
  const progressBar = new cliProgress.SingleBar({
@@ -841,6 +854,9 @@ const detectPedimentosInDatabase = async (options = {}) => {
841
854
  try {
842
855
  // Check if file still exists
843
856
  if (!fs.existsSync(record.original_path)) {
857
+ writeLog(
858
+ `āš ļø FILE NOT FOUND: ${record.filename} at ${record.original_path}`,
859
+ );
844
860
  updatePromises.push(
845
861
  supabase
846
862
  .from('uploader')
@@ -883,6 +899,13 @@ const detectPedimentosInDatabase = async (options = {}) => {
883
899
  if (detection.detectedType) {
884
900
  chunkDetected++;
885
901
  totalDetected++;
902
+ writeLog(
903
+ `āœ… DETECTED: ${record.filename} -> ${detection.detectedType} | Pedimento: ${detection.detectedPedimento || 'N/A'} | RFC: ${detection.fields?.rfc || 'N/A'}`,
904
+ );
905
+ } else {
906
+ writeLog(
907
+ `ā­ļø NOT DETECTED: ${record.filename} - No pedimento-simplificado pattern found`,
908
+ );
886
909
  }
887
910
 
888
911
  updatePromises.push(
@@ -893,6 +916,7 @@ const detectPedimentosInDatabase = async (options = {}) => {
893
916
  `āŒ Error detecting ${record.filename}:`,
894
917
  error.message,
895
918
  );
919
+ writeLog(`āŒ ERROR detecting ${record.filename}: ${error.message}`);
896
920
  chunkErrors++;
897
921
  totalErrors++;
898
922
 
@@ -930,6 +954,9 @@ const detectPedimentosInDatabase = async (options = {}) => {
930
954
  console.log(
931
955
  `āœ… Chunk ${chunkNumber} completed: ${chunkDetected} detected, ${chunkProcessed} processed, ${chunkErrors} errors`,
932
956
  );
957
+ writeLog(
958
+ `āœ… Chunk ${chunkNumber} completed: ${chunkDetected} detected, ${chunkProcessed} processed, ${chunkErrors} errors`,
959
+ );
933
960
 
934
961
  // Move to next chunk
935
962
  offset += queryBatchSize;
@@ -940,6 +967,9 @@ const detectPedimentosInDatabase = async (options = {}) => {
940
967
  console.log(
941
968
  `šŸ“ Reached end of records (chunk had ${pdfRecords.length} records).`,
942
969
  );
970
+ writeLog(
971
+ `šŸ“ Reached end of records (chunk had ${pdfRecords.length} records)`,
972
+ );
943
973
  break;
944
974
  }
945
975
 
@@ -950,6 +980,15 @@ const detectPedimentosInDatabase = async (options = {}) => {
950
980
  console.log(
951
981
  `šŸ“Š Phase 2 Summary: ${totalDetected} detected, ${totalProcessed} processed, ${totalErrors} errors`,
952
982
  );
983
+
984
+ // Write comprehensive log summary
985
+ writeLog(
986
+ `šŸ“Š PHASE 2 PDF DETECTION COMPLETED - Summary: Detected: ${totalDetected} pedimento-simplificado documents, Processed: ${totalProcessed} PDF files, Errors: ${totalErrors}`,
987
+ );
988
+
989
+ // Ensure logs are flushed
990
+ flushLogBuffer();
991
+
953
992
  return {
954
993
  detectedCount: totalDetected,
955
994
  processedCount: totalProcessed,
@@ -1186,6 +1225,37 @@ const processFilesInBatches = async (
1186
1225
  }
1187
1226
  });
1188
1227
 
1228
+ // Update status to "file-uploaded" for successfully uploaded files
1229
+ if (result.uploaded && result.uploaded.length > 0 && supabase) {
1230
+ try {
1231
+ const uploadedFilePaths = result.uploaded
1232
+ .map((upload) => {
1233
+ const apiFile = apiFiles.find(
1234
+ (f) =>
1235
+ f.name === upload.originalName ||
1236
+ f.originalName === upload.originalName,
1237
+ );
1238
+ return apiFile ? apiFile.path : null;
1239
+ })
1240
+ .filter(Boolean);
1241
+
1242
+ if (uploadedFilePaths.length > 0) {
1243
+ await supabase
1244
+ .from('uploader')
1245
+ .update({ status: 'file-uploaded' })
1246
+ .in('original_path', uploadedFilePaths);
1247
+
1248
+ console.log(
1249
+ ` šŸ“ Updated status to "file-uploaded" for ${uploadedFilePaths.length} files`,
1250
+ );
1251
+ }
1252
+ } catch (error) {
1253
+ console.error(
1254
+ ` āš ļø Error updating status for uploaded files: ${error.message}`,
1255
+ );
1256
+ }
1257
+ }
1258
+
1189
1259
  result.errors.forEach((error) => {
1190
1260
  writeLog(
1191
1261
  `ERROR: ${error.fileName}: ${error.error} (${error.step})`,
@@ -1266,11 +1336,39 @@ const processFilesInBatches = async (
1266
1336
  if (processedPaths.has(uploadPath)) {
1267
1337
  totalSkipped++;
1268
1338
  writeLog(`SKIPPED: ${file} -> ${uploadPath}`);
1339
+
1340
+ // Update status to "file-uploaded" for skipped files (they already exist)
1341
+ if (supabase) {
1342
+ try {
1343
+ await supabase
1344
+ .from('uploader')
1345
+ .update({ status: 'file-uploaded' })
1346
+ .eq('original_path', file);
1347
+ } catch (error) {
1348
+ console.error(
1349
+ ` āš ļø Error updating status for skipped file: ${error.message}`,
1350
+ );
1351
+ }
1352
+ }
1269
1353
  } else {
1270
1354
  await uploadToSupabase(file, uploadPath);
1271
1355
  totalUploaded++;
1272
1356
  writeLog(`SUCCESS: ${file} -> ${uploadPath}`);
1273
1357
  processedPaths.add(uploadPath);
1358
+
1359
+ // Update status to "file-uploaded" for successfully uploaded files
1360
+ if (supabase) {
1361
+ try {
1362
+ await supabase
1363
+ .from('uploader')
1364
+ .update({ status: 'file-uploaded' })
1365
+ .eq('original_path', file);
1366
+ } catch (error) {
1367
+ console.error(
1368
+ ` āš ļø Error updating status for uploaded file: ${error.message}`,
1369
+ );
1370
+ }
1371
+ }
1274
1372
  }
1275
1373
  } catch (error) {
1276
1374
  totalErrors++;
@@ -1333,30 +1431,110 @@ const uploadFilesByRfc = async (options = {}) => {
1333
1431
  console.log(`šŸ“‹ Target RFCs: ${uploadRfcs.join(', ')}`);
1334
1432
  console.log('šŸ” Searching for files to upload...');
1335
1433
 
1336
- // Step 1: Get all records that match the specified RFCs and have arela_path
1337
- const { data: rfcRecords, error: rfcError } = await supabase
1434
+ // First, count total files for the RFCs to show filtering effect
1435
+ const { count: totalRfcFiles, error: countError } = await supabase
1436
+ .from('uploader')
1437
+ .select('*', { count: 'exact', head: true })
1438
+ .in('rfc', uploadRfcs)
1439
+ .not('arela_path', 'is', null);
1440
+
1441
+ if (countError) {
1442
+ console.warn('āš ļø Could not count total RFC files:', countError.message);
1443
+ } else {
1444
+ console.log(`šŸ“Š Total files for specified RFCs: ${totalRfcFiles || 0}`);
1445
+ }
1446
+
1447
+ // Step 1: Get all pedimento_simplificado records that match the specified RFCs and have arela_path
1448
+ console.log(
1449
+ 'šŸŽÆ Finding pedimento_simplificado records for specified RFCs...',
1450
+ );
1451
+ const { data: pedimentoRfcRecords, error: pedimentoRfcError } = await supabase
1338
1452
  .from('uploader')
1339
1453
  .select('arela_path')
1454
+ .eq('document_type', 'pedimento_simplificado')
1340
1455
  .in('rfc', uploadRfcs)
1341
1456
  .not('arela_path', 'is', null);
1342
1457
 
1343
- if (rfcError) {
1344
- console.error('āŒ Error fetching RFC records:', rfcError.message);
1458
+ if (pedimentoRfcError) {
1459
+ console.error(
1460
+ 'āŒ Error fetching pedimento RFC records:',
1461
+ pedimentoRfcError.message,
1462
+ );
1345
1463
  return { processedCount: 0, uploadedCount: 0, errorCount: 1 };
1346
1464
  }
1347
1465
 
1348
- if (!rfcRecords || rfcRecords.length === 0) {
1349
- console.log('ā„¹ļø No files found for the specified RFCs with arela_path');
1466
+ if (!pedimentoRfcRecords || pedimentoRfcRecords.length === 0) {
1350
1467
  console.log(
1351
- ` Make sure files for RFCs [${uploadRfcs.join(', ')}] have been processed and have arela_path values`,
1468
+ 'ā„¹ļø No pedimento_simplificado records found for the specified RFCs with arela_path',
1352
1469
  );
1353
1470
  return { processedCount: 0, uploadedCount: 0, errorCount: 0 };
1354
1471
  }
1355
1472
 
1356
- // Step 2: Get unique arela_paths from the RFC matches
1357
- const uniqueArelaPaths = [...new Set(rfcRecords.map((r) => r.arela_path))];
1473
+ // Get unique arela_paths from pedimento records
1474
+ const uniqueArelaPaths = [
1475
+ ...new Set(pedimentoRfcRecords.map((r) => r.arela_path)),
1476
+ ];
1358
1477
  console.log(
1359
- `ļæ½ Found ${uniqueArelaPaths.length} unique arela_path(s) for the specified RFCs`,
1478
+ `šŸ“‹ Found ${pedimentoRfcRecords.length} pedimento records with ${uniqueArelaPaths.length} unique arela_paths for specified RFCs`,
1479
+ );
1480
+
1481
+ // Step 2: Get all files with these arela_paths that haven't been uploaded yet
1482
+ let rfcRecords = [];
1483
+ const chunkSize = 50;
1484
+
1485
+ for (let i = 0; i < uniqueArelaPaths.length; i += chunkSize) {
1486
+ const pathChunk = uniqueArelaPaths.slice(i, i + chunkSize);
1487
+
1488
+ const { data: chunkFiles, error: chunkError } = await supabase
1489
+ .from('uploader')
1490
+ .select('arela_path')
1491
+ .in('arela_path', pathChunk)
1492
+ .neq('status', 'file-uploaded')
1493
+ .not('arela_path', 'is', null);
1494
+
1495
+ if (chunkError) {
1496
+ console.error(
1497
+ 'āŒ Error fetching files for arela_paths chunk:',
1498
+ chunkError.message,
1499
+ );
1500
+ return { processedCount: 0, uploadedCount: 0, errorCount: 1 };
1501
+ }
1502
+
1503
+ if (chunkFiles && chunkFiles.length > 0) {
1504
+ rfcRecords = rfcRecords.concat(chunkFiles);
1505
+ }
1506
+ }
1507
+
1508
+ if (!rfcRecords || rfcRecords.length === 0) {
1509
+ if (totalRfcFiles && totalRfcFiles > 0) {
1510
+ console.log(
1511
+ `ā„¹ļø All ${totalRfcFiles} files for the specified RFCs are already uploaded (status: file-uploaded)`,
1512
+ );
1513
+ console.log(' No new files to upload.');
1514
+ } else {
1515
+ console.log('ā„¹ļø No files found for the specified RFCs with arela_path');
1516
+ console.log(
1517
+ ` Make sure files for RFCs [${uploadRfcs.join(', ')}] have been processed and have arela_path values`,
1518
+ );
1519
+ }
1520
+ return { processedCount: 0, uploadedCount: 0, errorCount: 0 };
1521
+ }
1522
+
1523
+ // Show filtering effect
1524
+ const uploadableArelaPaths = [
1525
+ ...new Set(rfcRecords.map((r) => r.arela_path)),
1526
+ ];
1527
+ const skipped = (totalRfcFiles || 0) - rfcRecords.length;
1528
+ if (skipped > 0) {
1529
+ console.log(
1530
+ `šŸ“Š Found ${rfcRecords.length} files ready for upload (${skipped} already uploaded, skipped)`,
1531
+ );
1532
+ } else {
1533
+ console.log(`šŸ“Š Found ${rfcRecords.length} files ready for upload`);
1534
+ }
1535
+
1536
+ console.log(
1537
+ `šŸŽÆ Found ${uploadableArelaPaths.length} unique arela_path(s) with files ready for upload`,
1360
1538
  );
1361
1539
 
1362
1540
  // Step 3: Get ALL files that have these arela_paths (including supporting documents)
@@ -1365,12 +1543,19 @@ const uploadFilesByRfc = async (options = {}) => {
1365
1543
  const arelaPathChunkSize = 50; // Process 50 arela_paths at a time to avoid URI limits
1366
1544
  const queryBatchSize = 1000;
1367
1545
 
1368
- console.log('šŸ“„ Fetching all related files (processing arela_paths in chunks to avoid URI limits)...');
1546
+ console.log(
1547
+ 'šŸ“„ Fetching all related files (processing arela_paths in chunks to avoid URI limits)...',
1548
+ );
1369
1549
 
1370
1550
  // Process arela_paths in chunks
1371
- for (let i = 0; i < uniqueArelaPaths.length; i += arelaPathChunkSize) {
1372
- const arelaPathChunk = uniqueArelaPaths.slice(i, i + arelaPathChunkSize);
1373
- console.log(` Processing arela_path chunk ${Math.floor(i / arelaPathChunkSize) + 1}/${Math.ceil(uniqueArelaPaths.length / arelaPathChunkSize)} (${arelaPathChunk.length} paths)`);
1551
+ for (let i = 0; i < uploadableArelaPaths.length; i += arelaPathChunkSize) {
1552
+ const arelaPathChunk = uploadableArelaPaths.slice(
1553
+ i,
1554
+ i + arelaPathChunkSize,
1555
+ );
1556
+ console.log(
1557
+ ` Processing arela_path chunk ${Math.floor(i / arelaPathChunkSize) + 1}/${Math.ceil(uploadableArelaPaths.length / arelaPathChunkSize)} (${arelaPathChunk.length} paths)`,
1558
+ );
1374
1559
 
1375
1560
  // For each chunk of arela_paths, use pagination to get all related files
1376
1561
  let hasMore = true;
@@ -1382,10 +1567,14 @@ const uploadFilesByRfc = async (options = {}) => {
1382
1567
  .select('id, original_path, arela_path, filename, rfc, document_type')
1383
1568
  .in('arela_path', arelaPathChunk)
1384
1569
  .not('original_path', 'is', null)
1570
+ .neq('status', 'file-uploaded')
1385
1571
  .range(offset, offset + queryBatchSize - 1);
1386
1572
 
1387
1573
  if (queryError) {
1388
- console.error(`āŒ Error fetching related files for chunk ${Math.floor(i / arelaPathChunkSize) + 1}:`, queryError.message);
1574
+ console.error(
1575
+ `āŒ Error fetching related files for chunk ${Math.floor(i / arelaPathChunkSize) + 1}:`,
1576
+ queryError.message,
1577
+ );
1389
1578
  return { processedCount: 0, uploadedCount: 0, errorCount: 1 };
1390
1579
  }
1391
1580
 
@@ -1403,7 +1592,7 @@ const uploadFilesByRfc = async (options = {}) => {
1403
1592
  }
1404
1593
 
1405
1594
  // Small delay between chunks to avoid overwhelming the database
1406
- if (i + arelaPathChunkSize < uniqueArelaPaths.length) {
1595
+ if (i + arelaPathChunkSize < uploadableArelaPaths.length) {
1407
1596
  await new Promise((resolve) => setTimeout(resolve, 100));
1408
1597
  }
1409
1598
  }
@@ -1414,7 +1603,7 @@ const uploadFilesByRfc = async (options = {}) => {
1414
1603
  }
1415
1604
 
1416
1605
  console.log(
1417
- `šŸ“ Found ${allRelatedFiles.length} total files to upload (including supporting documents)`,
1606
+ `šŸ“ Found ${allRelatedFiles.length} total files to upload (including supporting documents, excluding already uploaded)`,
1418
1607
  );
1419
1608
 
1420
1609
  // Group by RFC and arela_path for better organization
@@ -1630,6 +1819,23 @@ const uploadFilesByRfc = async (options = {}) => {
1630
1819
  ` šŸ“ Files organized: ${result.stats.organizedCount}`,
1631
1820
  );
1632
1821
  }
1822
+
1823
+ // Update status to "file-uploaded" for successfully uploaded files
1824
+ try {
1825
+ const uploadedFilePaths = pathFiles.map((file) => file.path);
1826
+ await supabase
1827
+ .from('uploader')
1828
+ .update({ status: 'file-uploaded' })
1829
+ .in('original_path', uploadedFilePaths);
1830
+
1831
+ console.log(
1832
+ ` šŸ“ Updated status to "file-uploaded" for ${uploadedFilePaths.length} files`,
1833
+ );
1834
+ } catch (error) {
1835
+ console.error(
1836
+ ` āš ļø Error updating status for uploaded files: ${error.message}`,
1837
+ );
1838
+ }
1633
1839
  } else {
1634
1840
  console.error(` āŒ Upload failed for ${folderStructure}:`);
1635
1841
  if (result.errors && result.errors.length > 0) {
@@ -1640,6 +1846,42 @@ const uploadFilesByRfc = async (options = {}) => {
1640
1846
  totalErrors += pathFiles.length;
1641
1847
  }
1642
1848
 
1849
+ // Handle files that already exist (usually indicated in result.uploaded or result.skipped)
1850
+ if (result.uploaded && result.uploaded.length > 0) {
1851
+ try {
1852
+ const alreadyUploadedPaths = result.uploaded
1853
+ .filter(
1854
+ (upload) =>
1855
+ upload.status === 'already_exists' || upload.alreadyExists,
1856
+ )
1857
+ .map((upload) => {
1858
+ // Find the corresponding file path from pathFiles
1859
+ const matchingFile = pathFiles.find(
1860
+ (f) =>
1861
+ f.name === upload.fileName ||
1862
+ f.name === upload.originalName,
1863
+ );
1864
+ return matchingFile ? matchingFile.path : null;
1865
+ })
1866
+ .filter(Boolean);
1867
+
1868
+ if (alreadyUploadedPaths.length > 0) {
1869
+ await supabase
1870
+ .from('uploader')
1871
+ .update({ status: 'file-uploaded' })
1872
+ .in('original_path', alreadyUploadedPaths);
1873
+
1874
+ console.log(
1875
+ ` šŸ“ Updated status to "file-uploaded" for ${alreadyUploadedPaths.length} already existing files`,
1876
+ );
1877
+ }
1878
+ } catch (error) {
1879
+ console.error(
1880
+ ` āš ļø Error updating status for already existing files: ${error.message}`,
1881
+ );
1882
+ }
1883
+ }
1884
+
1643
1885
  // Small delay between path groups
1644
1886
  await new Promise((resolve) => setTimeout(resolve, 100));
1645
1887
  }
@@ -1689,6 +1931,7 @@ const propagateArelaPath = async (options = {}) => {
1689
1931
  }
1690
1932
 
1691
1933
  console.log('šŸ” Finding pedimento_simplificado records with arela_path...');
1934
+ writeLog('šŸ” Starting arela_path propagation process');
1692
1935
 
1693
1936
  // Get all pedimento_simplificado records that have arela_path
1694
1937
  const { data: pedimentoRecords, error: pedimentoError } = await supabase
@@ -1707,12 +1950,16 @@ const propagateArelaPath = async (options = {}) => {
1707
1950
 
1708
1951
  if (!pedimentoRecords || pedimentoRecords.length === 0) {
1709
1952
  console.log('ā„¹ļø No pedimento_simplificado records with arela_path found');
1953
+ writeLog('ā„¹ļø No pedimento_simplificado records with arela_path found');
1710
1954
  return { processedCount: 0, updatedCount: 0, errorCount: 0 };
1711
1955
  }
1712
1956
 
1713
1957
  console.log(
1714
1958
  `šŸ“‹ Found ${pedimentoRecords.length} pedimento records with arela_path`,
1715
1959
  );
1960
+ writeLog(
1961
+ `šŸ“‹ Found ${pedimentoRecords.length} pedimento records with arela_path to process`,
1962
+ );
1716
1963
 
1717
1964
  let totalProcessed = 0;
1718
1965
  let totalUpdated = 0;
@@ -1744,6 +1991,9 @@ const propagateArelaPath = async (options = {}) => {
1744
1991
 
1745
1992
  console.log(`\nšŸ” Processing: ${pedimento.filename}`);
1746
1993
  console.log(` šŸ“ Base path: ${basePath}`);
1994
+ writeLog(
1995
+ `šŸ” Processing pedimento: ${pedimento.filename} | Base path: ${basePath}`,
1996
+ );
1747
1997
 
1748
1998
  // Extract folder part from existing arela_path by removing the filename
1749
1999
  const existingPath = pedimento.arela_path;
@@ -1775,12 +2025,16 @@ const propagateArelaPath = async (options = {}) => {
1775
2025
 
1776
2026
  if (!relatedFiles || relatedFiles.length === 0) {
1777
2027
  console.log(` ā„¹ļø No related files found needing arela_path update`);
2028
+ writeLog(`ā„¹ļø No related files found for ${pedimento.filename}`);
1778
2029
  continue;
1779
2030
  }
1780
2031
 
1781
2032
  console.log(
1782
2033
  ` šŸ“„ Found ${relatedFiles.length} related files to update:`,
1783
2034
  );
2035
+ writeLog(
2036
+ `šŸ“„ Found ${relatedFiles.length} related files to update for ${pedimento.filename}`,
2037
+ );
1784
2038
 
1785
2039
  // Show first 10 files, then indicate if there are more
1786
2040
  const filesToShow = relatedFiles.slice(0, 10);
@@ -1847,9 +2101,15 @@ const propagateArelaPath = async (options = {}) => {
1847
2101
  console.error(
1848
2102
  `āŒ ${batchErrors} batch(es) failed for ${pedimento.filename}`,
1849
2103
  );
2104
+ writeLog(
2105
+ `āŒ ${batchErrors} batch(es) failed for ${pedimento.filename}`,
2106
+ );
1850
2107
  totalErrors++;
1851
2108
  } else {
1852
2109
  console.log(` šŸŽÆ Successfully updated ${batchUpdated} related files`);
2110
+ writeLog(
2111
+ `āœ… Successfully updated ${batchUpdated} related files for ${pedimento.filename} -> ${folderArelaPath}`,
2112
+ );
1853
2113
  totalUpdated += batchUpdated;
1854
2114
  }
1855
2115
  } catch (error) {
@@ -1857,6 +2117,7 @@ const propagateArelaPath = async (options = {}) => {
1857
2117
  `āŒ Error processing ${pedimento.filename}:`,
1858
2118
  error.message,
1859
2119
  );
2120
+ writeLog(`āŒ Error processing ${pedimento.filename}: ${error.message}`);
1860
2121
  totalErrors++;
1861
2122
  }
1862
2123
 
@@ -1880,6 +2141,14 @@ const propagateArelaPath = async (options = {}) => {
1880
2141
  console.log(` āŒ Errors: ${totalErrors}`);
1881
2142
  console.log(`${'='.repeat(60)}\n`);
1882
2143
 
2144
+ // Write comprehensive log summary
2145
+ writeLog(
2146
+ `šŸŽÆ ARELA PATH PROPAGATION COMPLETED - Summary: Processed: ${totalProcessed} pedimento records, Updated: ${totalUpdated} related files, Errors: ${totalErrors}`,
2147
+ );
2148
+
2149
+ // Ensure logs are flushed
2150
+ flushLogBuffer();
2151
+
1883
2152
  return {
1884
2153
  processedCount: totalProcessed,
1885
2154
  updatedCount: totalUpdated,
@@ -1887,10 +2156,135 @@ const propagateArelaPath = async (options = {}) => {
1887
2156
  };
1888
2157
  };
1889
2158
 
2159
+ /**
2160
+ * Helper function to query files that need to be uploaded
2161
+ * These are files that have been detected but not yet uploaded
2162
+ * Uses the same RFC filtering logic as uploadFilesByRfc for consistency
2163
+ */
2164
+ const getFilesReadyForUpload = async (options = {}) => {
2165
+ if (!supabase) {
2166
+ throw new Error('Supabase client not initialized');
2167
+ }
2168
+
2169
+ console.log('šŸ” Querying files ready for upload...');
2170
+
2171
+ // Check if UPLOAD_RFCS is configured
2172
+ if (!uploadRfcs || uploadRfcs.length === 0) {
2173
+ console.log(
2174
+ 'ā„¹ļø No UPLOAD_RFCS configured. Please set UPLOAD_RFCS environment variable to see files ready for upload.',
2175
+ );
2176
+ console.log(
2177
+ ' Example: UPLOAD_RFCS="RFC123456789|RFC987654321|RFC555444333"',
2178
+ );
2179
+ return [];
2180
+ }
2181
+
2182
+ console.log(`šŸŽÆ Using RFC filter: ${uploadRfcs.join(', ')}`);
2183
+
2184
+ // Step 1: Find pedimento_simplificado documents for the specified RFCs that have arela_path
2185
+ console.log(
2186
+ 'šŸŽÆ Finding pedimento_simplificado documents for specified RFCs with arela_path...',
2187
+ );
2188
+ const { data: pedimentoRecords, error: pedimentoError } = await supabase
2189
+ .from('uploader')
2190
+ .select('arela_path')
2191
+ .eq('document_type', 'pedimento_simplificado')
2192
+ .in('rfc', uploadRfcs)
2193
+ .not('arela_path', 'is', null);
2194
+
2195
+ if (pedimentoError) {
2196
+ throw new Error(
2197
+ `Error querying pedimento_simplificado records: ${pedimentoError.message}`,
2198
+ );
2199
+ }
2200
+
2201
+ if (!pedimentoRecords || pedimentoRecords.length === 0) {
2202
+ console.log('ā„¹ļø No pedimento_simplificado records with arela_path found');
2203
+ return [];
2204
+ }
2205
+
2206
+ // Get unique arela_paths
2207
+ const uniqueArelaPaths = [
2208
+ ...new Set(pedimentoRecords.map((r) => r.arela_path)),
2209
+ ];
2210
+ console.log(
2211
+ `šŸ“‹ Found ${pedimentoRecords.length} pedimento records with ${uniqueArelaPaths.length} unique arela_paths`,
2212
+ );
2213
+
2214
+ // Step 2: Find all related files with these arela_paths that haven't been uploaded yet
2215
+ console.log('šŸ” Finding all related files that need to be uploaded...');
2216
+
2217
+ // Process arela_paths in chunks to avoid URI length limits
2218
+ let allReadyFiles = [];
2219
+ const chunkSize = 50;
2220
+
2221
+ for (let i = 0; i < uniqueArelaPaths.length; i += chunkSize) {
2222
+ const pathChunk = uniqueArelaPaths.slice(i, i + chunkSize);
2223
+
2224
+ const { data: chunkFiles, error: chunkError } = await supabase
2225
+ .from('uploader')
2226
+ .select(
2227
+ 'id, original_path, arela_path, filename, rfc, document_type, status',
2228
+ )
2229
+ .in('arela_path', pathChunk)
2230
+ .neq('status', 'file-uploaded')
2231
+ .not('original_path', 'is', null);
2232
+
2233
+ if (chunkError) {
2234
+ throw new Error(
2235
+ `Error querying files for arela_paths chunk: ${chunkError.message}`,
2236
+ );
2237
+ }
2238
+
2239
+ if (chunkFiles && chunkFiles.length > 0) {
2240
+ allReadyFiles = allReadyFiles.concat(chunkFiles);
2241
+ }
2242
+ }
2243
+
2244
+ const readyFiles = allReadyFiles;
2245
+
2246
+ console.log(`šŸ“‹ Found ${readyFiles?.length || 0} files ready for upload`);
2247
+
2248
+ if (readyFiles && readyFiles.length > 0) {
2249
+ // Group by document type for summary
2250
+ const byDocType = readyFiles.reduce((acc, file) => {
2251
+ const docType = file.document_type || 'Unknown';
2252
+ acc[docType] = (acc[docType] || 0) + 1;
2253
+ return acc;
2254
+ }, {});
2255
+
2256
+ console.log('šŸ“Š Files by document type:');
2257
+ for (const [docType, count] of Object.entries(byDocType)) {
2258
+ console.log(` ${docType}: ${count} files`);
2259
+ }
2260
+
2261
+ // Group by RFC
2262
+ const byRfc = readyFiles.reduce((acc, file) => {
2263
+ const rfc = file.rfc || 'No RFC';
2264
+ acc[rfc] = (acc[rfc] || 0) + 1;
2265
+ return acc;
2266
+ }, {});
2267
+
2268
+ console.log('šŸ“Š Files by RFC:');
2269
+ for (const [rfc, count] of Object.entries(byRfc)) {
2270
+ console.log(` ${rfc}: ${count} files`);
2271
+ }
2272
+ }
2273
+
2274
+ return readyFiles || [];
2275
+ };
2276
+
1890
2277
  program
1891
2278
  .name('arela-uploader')
1892
2279
  .description(
1893
- 'CLI to upload folders to Arela API or Supabase Storage with automatic processing',
2280
+ 'CLI to upload folders to Arela API or Supabase Storage with automatic processing\n\n' +
2281
+ 'Status workflow:\n' +
2282
+ ' fs-stats → detected → file-uploaded\n' +
2283
+ ' ā”œā”€ Phase 1: --stats-only (collects filesystem stats, status: fs-stats)\n' +
2284
+ ' ā”œā”€ Phase 2: --detect-pdfs (detects document types, status: detected)\n' +
2285
+ ' ā”œā”€ Phase 3: --propagate-arela-path (organizes files by pedimento)\n' +
2286
+ ' └─ Phase 4: --upload-by-rfc (uploads files, status: file-uploaded)\n\n' +
2287
+ 'Use --query-ready-files to see files ready for upload (status: detected with arela_path)',
1894
2288
  )
1895
2289
  .option('-v, --version', 'output the version number')
1896
2290
  .option('-p, --prefix <prefix>', 'Prefix path in bucket', '')
@@ -1941,6 +2335,10 @@ program
1941
2335
  '--run-all-phases',
1942
2336
  'Run all 4 phases in sequence: stats → detect → propagate → upload',
1943
2337
  )
2338
+ .option(
2339
+ '--query-ready-files',
2340
+ 'Query and display files that are ready for upload (have been detected but not uploaded)',
2341
+ )
1944
2342
  .action(async (options) => {
1945
2343
  if (options.version) {
1946
2344
  console.log(packageVersion);
@@ -1962,6 +2360,25 @@ program
1962
2360
  return;
1963
2361
  }
1964
2362
 
2363
+ // Handle query-ready-files option
2364
+ if (options.queryReadyFiles) {
2365
+ await checkCredentials(true); // Force Supabase mode
2366
+
2367
+ const readyFiles = await getFilesReadyForUpload();
2368
+
2369
+ if (readyFiles.length === 0) {
2370
+ console.log('ā„¹ļø No files are currently ready for upload');
2371
+ console.log(
2372
+ ' Tip: Run --detect-pdfs and --propagate-arela-path first to prepare files for upload',
2373
+ );
2374
+ } else {
2375
+ console.log(`\nšŸ“‹ ${readyFiles.length} files are ready for upload!`);
2376
+ console.log(' Use --upload-by-rfc to upload them to Arela API');
2377
+ }
2378
+
2379
+ return;
2380
+ }
2381
+
1965
2382
  // Handle run-all-phases option
1966
2383
  if (options.runAllPhases) {
1967
2384
  console.log('šŸš€ Starting all 4 phases in sequence...');