@arela/uploader 0.2.2 → 0.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/package.json +1 -1
  2. package/src/index.js +469 -38
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@arela/uploader",
3
- "version": "0.2.2",
3
+ "version": "0.2.4",
4
4
  "description": "CLI to upload files/directories to Arela",
5
5
  "bin": {
6
6
  "arela": "./src/index.js"
package/src/index.js CHANGED
@@ -17,7 +17,7 @@ config();
17
17
  const program = new Command();
18
18
 
19
19
  // Read package.json version at startup
20
- let packageVersion = '1.0.0'; // fallback
20
+ let packageVersion = '0.2.4'; // fallback
21
21
  try {
22
22
  const __filename = new URL(import.meta.url).pathname;
23
23
  const __dirname = path.dirname(__filename);
@@ -158,8 +158,8 @@ const checkCredentials = async (forceSupabase = false) => {
158
158
  if (!supabaseUrl || !supabaseKey || !bucket) {
159
159
  console.error(
160
160
  '⚠️ Missing credentials. Please set either:\n' +
161
- ' - ARELA_API_URL and ARELA_API_TOKEN for API mode, or\n' +
162
- ' - SUPABASE_URL, SUPABASE_KEY, and SUPABASE_BUCKET for direct mode',
161
+ ' - ARELA_API_URL and ARELA_API_TOKEN for API mode, or\n' +
162
+ ' - SUPABASE_URL, SUPABASE_KEY, and SUPABASE_BUCKET for direct mode',
163
163
  );
164
164
  process.exit(1);
165
165
  }
@@ -774,6 +774,9 @@ const detectPedimentosInDatabase = async (options = {}) => {
774
774
  console.log(
775
775
  '🔍 Phase 2: Starting PDF detection for pedimento-simplificado documents...',
776
776
  );
777
+ writeLog(
778
+ '🔍 Phase 2: Starting PDF detection for pedimento-simplificado documents',
779
+ );
777
780
 
778
781
  const detectionService = new FileDetectionService();
779
782
  const processingBatchSize = parseInt(options.batchSize) || 10; // Smaller batches for file I/O
@@ -786,12 +789,16 @@ const detectPedimentosInDatabase = async (options = {}) => {
786
789
  let chunkNumber = 1;
787
790
 
788
791
  console.log('� Processing PDF files in chunks of 1000 records...');
792
+ writeLog('📝 Starting PDF detection processing in chunks of 1000 records');
789
793
 
790
794
  // Process records in chunks of 1000
791
795
  while (true) {
792
796
  console.log(
793
797
  `\n📥 Fetching chunk ${chunkNumber} (records ${offset + 1} to ${offset + queryBatchSize})...`,
794
798
  );
799
+ writeLog(
800
+ `📥 Fetching chunk ${chunkNumber} (records ${offset + 1} to ${offset + queryBatchSize})`,
801
+ );
795
802
 
796
803
  // Fetch next chunk of PDF records
797
804
  const { data: pdfRecords, error: queryError } = await supabase
@@ -811,12 +818,18 @@ const detectPedimentosInDatabase = async (options = {}) => {
811
818
  // If no records found, we're done
812
819
  if (!pdfRecords || pdfRecords.length === 0) {
813
820
  console.log(`📝 No more PDF files found. Processing completed.`);
821
+ writeLog(
822
+ `📝 No more PDF files found. Processing completed at chunk ${chunkNumber}`,
823
+ );
814
824
  break;
815
825
  }
816
826
 
817
827
  console.log(
818
828
  `� Processing chunk ${chunkNumber}: ${pdfRecords.length} PDF records`,
819
829
  );
830
+ writeLog(
831
+ `📊 Processing chunk ${chunkNumber}: ${pdfRecords.length} PDF records`,
832
+ );
820
833
 
821
834
  // Create progress bar for this chunk
822
835
  const progressBar = new cliProgress.SingleBar({
@@ -841,6 +854,9 @@ const detectPedimentosInDatabase = async (options = {}) => {
841
854
  try {
842
855
  // Check if file still exists
843
856
  if (!fs.existsSync(record.original_path)) {
857
+ writeLog(
858
+ `⚠️ FILE NOT FOUND: ${record.filename} at ${record.original_path}`,
859
+ );
844
860
  updatePromises.push(
845
861
  supabase
846
862
  .from('uploader')
@@ -883,6 +899,13 @@ const detectPedimentosInDatabase = async (options = {}) => {
883
899
  if (detection.detectedType) {
884
900
  chunkDetected++;
885
901
  totalDetected++;
902
+ writeLog(
903
+ `✅ DETECTED: ${record.filename} -> ${detection.detectedType} | Pedimento: ${detection.detectedPedimento || 'N/A'} | RFC: ${detection.fields?.rfc || 'N/A'}`,
904
+ );
905
+ } else {
906
+ writeLog(
907
+ `⏭️ NOT DETECTED: ${record.filename} - No pedimento-simplificado pattern found`,
908
+ );
886
909
  }
887
910
 
888
911
  updatePromises.push(
@@ -893,6 +916,7 @@ const detectPedimentosInDatabase = async (options = {}) => {
893
916
  `❌ Error detecting ${record.filename}:`,
894
917
  error.message,
895
918
  );
919
+ writeLog(`❌ ERROR detecting ${record.filename}: ${error.message}`);
896
920
  chunkErrors++;
897
921
  totalErrors++;
898
922
 
@@ -930,6 +954,9 @@ const detectPedimentosInDatabase = async (options = {}) => {
930
954
  console.log(
931
955
  `✅ Chunk ${chunkNumber} completed: ${chunkDetected} detected, ${chunkProcessed} processed, ${chunkErrors} errors`,
932
956
  );
957
+ writeLog(
958
+ `✅ Chunk ${chunkNumber} completed: ${chunkDetected} detected, ${chunkProcessed} processed, ${chunkErrors} errors`,
959
+ );
933
960
 
934
961
  // Move to next chunk
935
962
  offset += queryBatchSize;
@@ -940,6 +967,9 @@ const detectPedimentosInDatabase = async (options = {}) => {
940
967
  console.log(
941
968
  `📝 Reached end of records (chunk had ${pdfRecords.length} records).`,
942
969
  );
970
+ writeLog(
971
+ `📝 Reached end of records (chunk had ${pdfRecords.length} records)`,
972
+ );
943
973
  break;
944
974
  }
945
975
 
@@ -950,6 +980,15 @@ const detectPedimentosInDatabase = async (options = {}) => {
950
980
  console.log(
951
981
  `📊 Phase 2 Summary: ${totalDetected} detected, ${totalProcessed} processed, ${totalErrors} errors`,
952
982
  );
983
+
984
+ // Write comprehensive log summary
985
+ writeLog(
986
+ `📊 PHASE 2 PDF DETECTION COMPLETED - Summary: Detected: ${totalDetected} pedimento-simplificado documents, Processed: ${totalProcessed} PDF files, Errors: ${totalErrors}`,
987
+ );
988
+
989
+ // Ensure logs are flushed
990
+ flushLogBuffer();
991
+
953
992
  return {
954
993
  detectedCount: totalDetected,
955
994
  processedCount: totalProcessed,
@@ -1186,6 +1225,37 @@ const processFilesInBatches = async (
1186
1225
  }
1187
1226
  });
1188
1227
 
1228
+ // Update status to "file-uploaded" for successfully uploaded files
1229
+ if (result.uploaded && result.uploaded.length > 0 && supabase) {
1230
+ try {
1231
+ const uploadedFilePaths = result.uploaded
1232
+ .map((upload) => {
1233
+ const apiFile = apiFiles.find(
1234
+ (f) =>
1235
+ f.name === upload.originalName ||
1236
+ f.originalName === upload.originalName,
1237
+ );
1238
+ return apiFile ? apiFile.path : null;
1239
+ })
1240
+ .filter(Boolean);
1241
+
1242
+ if (uploadedFilePaths.length > 0) {
1243
+ await supabase
1244
+ .from('uploader')
1245
+ .update({ status: 'file-uploaded' })
1246
+ .in('original_path', uploadedFilePaths);
1247
+
1248
+ console.log(
1249
+ ` 📝 Updated status to "file-uploaded" for ${uploadedFilePaths.length} files`,
1250
+ );
1251
+ }
1252
+ } catch (error) {
1253
+ console.error(
1254
+ ` ⚠️ Error updating status for uploaded files: ${error.message}`,
1255
+ );
1256
+ }
1257
+ }
1258
+
1189
1259
  result.errors.forEach((error) => {
1190
1260
  writeLog(
1191
1261
  `ERROR: ${error.fileName}: ${error.error} (${error.step})`,
@@ -1266,11 +1336,39 @@ const processFilesInBatches = async (
1266
1336
  if (processedPaths.has(uploadPath)) {
1267
1337
  totalSkipped++;
1268
1338
  writeLog(`SKIPPED: ${file} -> ${uploadPath}`);
1339
+
1340
+ // Update status to "file-uploaded" for skipped files (they already exist)
1341
+ if (supabase) {
1342
+ try {
1343
+ await supabase
1344
+ .from('uploader')
1345
+ .update({ status: 'file-uploaded' })
1346
+ .eq('original_path', file);
1347
+ } catch (error) {
1348
+ console.error(
1349
+ ` ⚠️ Error updating status for skipped file: ${error.message}`,
1350
+ );
1351
+ }
1352
+ }
1269
1353
  } else {
1270
1354
  await uploadToSupabase(file, uploadPath);
1271
1355
  totalUploaded++;
1272
1356
  writeLog(`SUCCESS: ${file} -> ${uploadPath}`);
1273
1357
  processedPaths.add(uploadPath);
1358
+
1359
+ // Update status to "file-uploaded" for successfully uploaded files
1360
+ if (supabase) {
1361
+ try {
1362
+ await supabase
1363
+ .from('uploader')
1364
+ .update({ status: 'file-uploaded' })
1365
+ .eq('original_path', file);
1366
+ } catch (error) {
1367
+ console.error(
1368
+ ` ⚠️ Error updating status for uploaded file: ${error.message}`,
1369
+ );
1370
+ }
1371
+ }
1274
1372
  }
1275
1373
  } catch (error) {
1276
1374
  totalErrors++;
@@ -1333,65 +1431,170 @@ const uploadFilesByRfc = async (options = {}) => {
1333
1431
  console.log(`📋 Target RFCs: ${uploadRfcs.join(', ')}`);
1334
1432
  console.log('🔍 Searching for files to upload...');
1335
1433
 
1336
- // Step 1: Get all records that match the specified RFCs and have arela_path
1337
- const { data: rfcRecords, error: rfcError } = await supabase
1434
+ // First, count total files for the RFCs to show filtering effect
1435
+ const { count: totalRfcFiles, error: countError } = await supabase
1436
+ .from('uploader')
1437
+ .select('*', { count: 'exact', head: true })
1438
+ .in('rfc', uploadRfcs)
1439
+ .not('arela_path', 'is', null);
1440
+
1441
+ if (countError) {
1442
+ console.warn('⚠️ Could not count total RFC files:', countError.message);
1443
+ } else {
1444
+ console.log(`📊 Total files for specified RFCs: ${totalRfcFiles || 0}`);
1445
+ }
1446
+
1447
+ // Step 1: Get all pedimento_simplificado records that match the specified RFCs and have arela_path
1448
+ console.log(
1449
+ '🎯 Finding pedimento_simplificado records for specified RFCs...',
1450
+ );
1451
+ const { data: pedimentoRfcRecords, error: pedimentoRfcError } = await supabase
1338
1452
  .from('uploader')
1339
1453
  .select('arela_path')
1454
+ .eq('document_type', 'pedimento_simplificado')
1340
1455
  .in('rfc', uploadRfcs)
1341
1456
  .not('arela_path', 'is', null);
1342
1457
 
1343
- if (rfcError) {
1344
- console.error('❌ Error fetching RFC records:', rfcError.message);
1458
+ if (pedimentoRfcError) {
1459
+ console.error(
1460
+ '❌ Error fetching pedimento RFC records:',
1461
+ pedimentoRfcError.message,
1462
+ );
1345
1463
  return { processedCount: 0, uploadedCount: 0, errorCount: 1 };
1346
1464
  }
1347
1465
 
1348
- if (!rfcRecords || rfcRecords.length === 0) {
1349
- console.log('ℹ️ No files found for the specified RFCs with arela_path');
1466
+ if (!pedimentoRfcRecords || pedimentoRfcRecords.length === 0) {
1350
1467
  console.log(
1351
- ` Make sure files for RFCs [${uploadRfcs.join(', ')}] have been processed and have arela_path values`,
1468
+ 'ℹ️ No pedimento_simplificado records found for the specified RFCs with arela_path',
1352
1469
  );
1353
1470
  return { processedCount: 0, uploadedCount: 0, errorCount: 0 };
1354
1471
  }
1355
1472
 
1356
- // Step 2: Get unique arela_paths from the RFC matches
1357
- const uniqueArelaPaths = [...new Set(rfcRecords.map((r) => r.arela_path))];
1473
+ // Get unique arela_paths from pedimento records
1474
+ const uniqueArelaPaths = [
1475
+ ...new Set(pedimentoRfcRecords.map((r) => r.arela_path)),
1476
+ ];
1358
1477
  console.log(
1359
- `� Found ${uniqueArelaPaths.length} unique arela_path(s) for the specified RFCs`,
1478
+ `📋 Found ${pedimentoRfcRecords.length} pedimento records with ${uniqueArelaPaths.length} unique arela_paths for specified RFCs`,
1360
1479
  );
1361
1480
 
1362
- // Step 3: Get ALL files that have these arela_paths (including supporting documents)
1363
- // Use pagination to ensure we get all files, regardless of count
1364
- let allRelatedFiles = [];
1365
- let hasMore = true;
1366
- let offset = 0;
1367
- const queryBatchSize = 1000;
1481
+ // Step 2: Get all files with these arela_paths that haven't been uploaded yet
1482
+ let rfcRecords = [];
1483
+ const chunkSize = 50;
1368
1484
 
1369
- console.log('📥 Fetching all related files (with pagination)...');
1485
+ for (let i = 0; i < uniqueArelaPaths.length; i += chunkSize) {
1486
+ const pathChunk = uniqueArelaPaths.slice(i, i + chunkSize);
1370
1487
 
1371
- while (hasMore) {
1372
- const { data: batch, error: queryError } = await supabase
1488
+ const { data: chunkFiles, error: chunkError } = await supabase
1373
1489
  .from('uploader')
1374
- .select('id, original_path, arela_path, filename, rfc, document_type')
1375
- .in('arela_path', uniqueArelaPaths)
1376
- .not('original_path', 'is', null)
1377
- .range(offset, offset + queryBatchSize - 1);
1490
+ .select('arela_path')
1491
+ .in('arela_path', pathChunk)
1492
+ .neq('status', 'file-uploaded')
1493
+ .not('arela_path', 'is', null);
1378
1494
 
1379
- if (queryError) {
1380
- console.error('❌ Error fetching related files:', queryError.message);
1495
+ if (chunkError) {
1496
+ console.error(
1497
+ '❌ Error fetching files for arela_paths chunk:',
1498
+ chunkError.message,
1499
+ );
1381
1500
  return { processedCount: 0, uploadedCount: 0, errorCount: 1 };
1382
1501
  }
1383
1502
 
1384
- if (!batch || batch.length === 0) {
1385
- hasMore = false;
1503
+ if (chunkFiles && chunkFiles.length > 0) {
1504
+ rfcRecords = rfcRecords.concat(chunkFiles);
1505
+ }
1506
+ }
1507
+
1508
+ if (!rfcRecords || rfcRecords.length === 0) {
1509
+ if (totalRfcFiles && totalRfcFiles > 0) {
1510
+ console.log(
1511
+ `ℹ️ All ${totalRfcFiles} files for the specified RFCs are already uploaded (status: file-uploaded)`,
1512
+ );
1513
+ console.log(' No new files to upload.');
1386
1514
  } else {
1387
- allRelatedFiles = allRelatedFiles.concat(batch);
1388
- offset += queryBatchSize;
1515
+ console.log('ℹ️ No files found for the specified RFCs with arela_path');
1516
+ console.log(
1517
+ ` Make sure files for RFCs [${uploadRfcs.join(', ')}] have been processed and have arela_path values`,
1518
+ );
1519
+ }
1520
+ return { processedCount: 0, uploadedCount: 0, errorCount: 0 };
1521
+ }
1522
+
1523
+ // Show filtering effect
1524
+ const uploadableArelaPaths = [
1525
+ ...new Set(rfcRecords.map((r) => r.arela_path)),
1526
+ ];
1527
+ const skipped = (totalRfcFiles || 0) - rfcRecords.length;
1528
+ if (skipped > 0) {
1529
+ console.log(
1530
+ `📊 Found ${rfcRecords.length} files ready for upload (${skipped} already uploaded, skipped)`,
1531
+ );
1532
+ } else {
1533
+ console.log(`📊 Found ${rfcRecords.length} files ready for upload`);
1534
+ }
1535
+
1536
+ console.log(
1537
+ `🎯 Found ${uploadableArelaPaths.length} unique arela_path(s) with files ready for upload`,
1538
+ );
1389
1539
 
1390
- // If we got less than queryBatchSize, we've reached the end
1391
- if (batch.length < queryBatchSize) {
1540
+ // Step 3: Get ALL files that have these arela_paths (including supporting documents)
1541
+ // Process arela_paths in smaller chunks to avoid URI length limits
1542
+ let allRelatedFiles = [];
1543
+ const arelaPathChunkSize = 50; // Process 50 arela_paths at a time to avoid URI limits
1544
+ const queryBatchSize = 1000;
1545
+
1546
+ console.log(
1547
+ '📥 Fetching all related files (processing arela_paths in chunks to avoid URI limits)...',
1548
+ );
1549
+
1550
+ // Process arela_paths in chunks
1551
+ for (let i = 0; i < uploadableArelaPaths.length; i += arelaPathChunkSize) {
1552
+ const arelaPathChunk = uploadableArelaPaths.slice(
1553
+ i,
1554
+ i + arelaPathChunkSize,
1555
+ );
1556
+ console.log(
1557
+ ` Processing arela_path chunk ${Math.floor(i / arelaPathChunkSize) + 1}/${Math.ceil(uploadableArelaPaths.length / arelaPathChunkSize)} (${arelaPathChunk.length} paths)`,
1558
+ );
1559
+
1560
+ // For each chunk of arela_paths, use pagination to get all related files
1561
+ let hasMore = true;
1562
+ let offset = 0;
1563
+
1564
+ while (hasMore) {
1565
+ const { data: batch, error: queryError } = await supabase
1566
+ .from('uploader')
1567
+ .select('id, original_path, arela_path, filename, rfc, document_type')
1568
+ .in('arela_path', arelaPathChunk)
1569
+ .not('original_path', 'is', null)
1570
+ .neq('status', 'file-uploaded')
1571
+ .range(offset, offset + queryBatchSize - 1);
1572
+
1573
+ if (queryError) {
1574
+ console.error(
1575
+ `❌ Error fetching related files for chunk ${Math.floor(i / arelaPathChunkSize) + 1}:`,
1576
+ queryError.message,
1577
+ );
1578
+ return { processedCount: 0, uploadedCount: 0, errorCount: 1 };
1579
+ }
1580
+
1581
+ if (!batch || batch.length === 0) {
1392
1582
  hasMore = false;
1583
+ } else {
1584
+ allRelatedFiles = allRelatedFiles.concat(batch);
1585
+ offset += queryBatchSize;
1586
+
1587
+ // If we got less than queryBatchSize, we've reached the end for this chunk
1588
+ if (batch.length < queryBatchSize) {
1589
+ hasMore = false;
1590
+ }
1393
1591
  }
1394
1592
  }
1593
+
1594
+ // Small delay between chunks to avoid overwhelming the database
1595
+ if (i + arelaPathChunkSize < uploadableArelaPaths.length) {
1596
+ await new Promise((resolve) => setTimeout(resolve, 100));
1597
+ }
1395
1598
  }
1396
1599
 
1397
1600
  if (!allRelatedFiles || allRelatedFiles.length === 0) {
@@ -1400,7 +1603,7 @@ const uploadFilesByRfc = async (options = {}) => {
1400
1603
  }
1401
1604
 
1402
1605
  console.log(
1403
- `📁 Found ${allRelatedFiles.length} total files to upload (including supporting documents)`,
1606
+ `📁 Found ${allRelatedFiles.length} total files to upload (including supporting documents, excluding already uploaded)`,
1404
1607
  );
1405
1608
 
1406
1609
  // Group by RFC and arela_path for better organization
@@ -1560,8 +1763,8 @@ const uploadFilesByRfc = async (options = {}) => {
1560
1763
  // Set folder structure for this group - concatenate custom prefix with arela_path
1561
1764
  const folderStructure = options.folderStructure
1562
1765
  ? `${options.folderStructure}/${arelaPath}`
1563
- .replace(/\/+/g, '/')
1564
- .replace(/\/$/, '')
1766
+ .replace(/\/+/g, '/')
1767
+ .replace(/\/$/, '')
1565
1768
  : arelaPath;
1566
1769
  pathFormData.append('folderStructure', folderStructure);
1567
1770
  pathFormData.append('autoDetect', 'true');
@@ -1616,6 +1819,23 @@ const uploadFilesByRfc = async (options = {}) => {
1616
1819
  ` 📁 Files organized: ${result.stats.organizedCount}`,
1617
1820
  );
1618
1821
  }
1822
+
1823
+ // Update status to "file-uploaded" for successfully uploaded files
1824
+ try {
1825
+ const uploadedFilePaths = pathFiles.map((file) => file.path);
1826
+ await supabase
1827
+ .from('uploader')
1828
+ .update({ status: 'file-uploaded' })
1829
+ .in('original_path', uploadedFilePaths);
1830
+
1831
+ console.log(
1832
+ ` 📝 Updated status to "file-uploaded" for ${uploadedFilePaths.length} files`,
1833
+ );
1834
+ } catch (error) {
1835
+ console.error(
1836
+ ` ⚠️ Error updating status for uploaded files: ${error.message}`,
1837
+ );
1838
+ }
1619
1839
  } else {
1620
1840
  console.error(` ❌ Upload failed for ${folderStructure}:`);
1621
1841
  if (result.errors && result.errors.length > 0) {
@@ -1626,6 +1846,42 @@ const uploadFilesByRfc = async (options = {}) => {
1626
1846
  totalErrors += pathFiles.length;
1627
1847
  }
1628
1848
 
1849
+ // Handle files that already exist (usually indicated in result.uploaded or result.skipped)
1850
+ if (result.uploaded && result.uploaded.length > 0) {
1851
+ try {
1852
+ const alreadyUploadedPaths = result.uploaded
1853
+ .filter(
1854
+ (upload) =>
1855
+ upload.status === 'already_exists' || upload.alreadyExists,
1856
+ )
1857
+ .map((upload) => {
1858
+ // Find the corresponding file path from pathFiles
1859
+ const matchingFile = pathFiles.find(
1860
+ (f) =>
1861
+ f.name === upload.fileName ||
1862
+ f.name === upload.originalName,
1863
+ );
1864
+ return matchingFile ? matchingFile.path : null;
1865
+ })
1866
+ .filter(Boolean);
1867
+
1868
+ if (alreadyUploadedPaths.length > 0) {
1869
+ await supabase
1870
+ .from('uploader')
1871
+ .update({ status: 'file-uploaded' })
1872
+ .in('original_path', alreadyUploadedPaths);
1873
+
1874
+ console.log(
1875
+ ` 📝 Updated status to "file-uploaded" for ${alreadyUploadedPaths.length} already existing files`,
1876
+ );
1877
+ }
1878
+ } catch (error) {
1879
+ console.error(
1880
+ ` ⚠️ Error updating status for already existing files: ${error.message}`,
1881
+ );
1882
+ }
1883
+ }
1884
+
1629
1885
  // Small delay between path groups
1630
1886
  await new Promise((resolve) => setTimeout(resolve, 100));
1631
1887
  }
@@ -1675,6 +1931,7 @@ const propagateArelaPath = async (options = {}) => {
1675
1931
  }
1676
1932
 
1677
1933
  console.log('🔍 Finding pedimento_simplificado records with arela_path...');
1934
+ writeLog('🔍 Starting arela_path propagation process');
1678
1935
 
1679
1936
  // Get all pedimento_simplificado records that have arela_path
1680
1937
  const { data: pedimentoRecords, error: pedimentoError } = await supabase
@@ -1693,12 +1950,16 @@ const propagateArelaPath = async (options = {}) => {
1693
1950
 
1694
1951
  if (!pedimentoRecords || pedimentoRecords.length === 0) {
1695
1952
  console.log('ℹ️ No pedimento_simplificado records with arela_path found');
1953
+ writeLog('ℹ️ No pedimento_simplificado records with arela_path found');
1696
1954
  return { processedCount: 0, updatedCount: 0, errorCount: 0 };
1697
1955
  }
1698
1956
 
1699
1957
  console.log(
1700
1958
  `📋 Found ${pedimentoRecords.length} pedimento records with arela_path`,
1701
1959
  );
1960
+ writeLog(
1961
+ `📋 Found ${pedimentoRecords.length} pedimento records with arela_path to process`,
1962
+ );
1702
1963
 
1703
1964
  let totalProcessed = 0;
1704
1965
  let totalUpdated = 0;
@@ -1730,6 +1991,9 @@ const propagateArelaPath = async (options = {}) => {
1730
1991
 
1731
1992
  console.log(`\n🔍 Processing: ${pedimento.filename}`);
1732
1993
  console.log(` 📁 Base path: ${basePath}`);
1994
+ writeLog(
1995
+ `🔍 Processing pedimento: ${pedimento.filename} | Base path: ${basePath}`,
1996
+ );
1733
1997
 
1734
1998
  // Extract folder part from existing arela_path by removing the filename
1735
1999
  const existingPath = pedimento.arela_path;
@@ -1761,12 +2025,16 @@ const propagateArelaPath = async (options = {}) => {
1761
2025
 
1762
2026
  if (!relatedFiles || relatedFiles.length === 0) {
1763
2027
  console.log(` ℹ️ No related files found needing arela_path update`);
2028
+ writeLog(`ℹ️ No related files found for ${pedimento.filename}`);
1764
2029
  continue;
1765
2030
  }
1766
2031
 
1767
2032
  console.log(
1768
2033
  ` 📄 Found ${relatedFiles.length} related files to update:`,
1769
2034
  );
2035
+ writeLog(
2036
+ `📄 Found ${relatedFiles.length} related files to update for ${pedimento.filename}`,
2037
+ );
1770
2038
 
1771
2039
  // Show first 10 files, then indicate if there are more
1772
2040
  const filesToShow = relatedFiles.slice(0, 10);
@@ -1833,9 +2101,15 @@ const propagateArelaPath = async (options = {}) => {
1833
2101
  console.error(
1834
2102
  `❌ ${batchErrors} batch(es) failed for ${pedimento.filename}`,
1835
2103
  );
2104
+ writeLog(
2105
+ `❌ ${batchErrors} batch(es) failed for ${pedimento.filename}`,
2106
+ );
1836
2107
  totalErrors++;
1837
2108
  } else {
1838
2109
  console.log(` 🎯 Successfully updated ${batchUpdated} related files`);
2110
+ writeLog(
2111
+ `✅ Successfully updated ${batchUpdated} related files for ${pedimento.filename} -> ${folderArelaPath}`,
2112
+ );
1839
2113
  totalUpdated += batchUpdated;
1840
2114
  }
1841
2115
  } catch (error) {
@@ -1843,6 +2117,7 @@ const propagateArelaPath = async (options = {}) => {
1843
2117
  `❌ Error processing ${pedimento.filename}:`,
1844
2118
  error.message,
1845
2119
  );
2120
+ writeLog(`❌ Error processing ${pedimento.filename}: ${error.message}`);
1846
2121
  totalErrors++;
1847
2122
  }
1848
2123
 
@@ -1866,6 +2141,14 @@ const propagateArelaPath = async (options = {}) => {
1866
2141
  console.log(` ❌ Errors: ${totalErrors}`);
1867
2142
  console.log(`${'='.repeat(60)}\n`);
1868
2143
 
2144
+ // Write comprehensive log summary
2145
+ writeLog(
2146
+ `🎯 ARELA PATH PROPAGATION COMPLETED - Summary: Processed: ${totalProcessed} pedimento records, Updated: ${totalUpdated} related files, Errors: ${totalErrors}`,
2147
+ );
2148
+
2149
+ // Ensure logs are flushed
2150
+ flushLogBuffer();
2151
+
1869
2152
  return {
1870
2153
  processedCount: totalProcessed,
1871
2154
  updatedCount: totalUpdated,
@@ -1873,10 +2156,135 @@ const propagateArelaPath = async (options = {}) => {
1873
2156
  };
1874
2157
  };
1875
2158
 
2159
+ /**
2160
+ * Helper function to query files that need to be uploaded
2161
+ * These are files that have been detected but not yet uploaded
2162
+ * Uses the same RFC filtering logic as uploadFilesByRfc for consistency
2163
+ */
2164
+ const getFilesReadyForUpload = async (options = {}) => {
2165
+ if (!supabase) {
2166
+ throw new Error('Supabase client not initialized');
2167
+ }
2168
+
2169
+ console.log('🔍 Querying files ready for upload...');
2170
+
2171
+ // Check if UPLOAD_RFCS is configured
2172
+ if (!uploadRfcs || uploadRfcs.length === 0) {
2173
+ console.log(
2174
+ 'ℹ️ No UPLOAD_RFCS configured. Please set UPLOAD_RFCS environment variable to see files ready for upload.',
2175
+ );
2176
+ console.log(
2177
+ ' Example: UPLOAD_RFCS="RFC123456789|RFC987654321|RFC555444333"',
2178
+ );
2179
+ return [];
2180
+ }
2181
+
2182
+ console.log(`🎯 Using RFC filter: ${uploadRfcs.join(', ')}`);
2183
+
2184
+ // Step 1: Find pedimento_simplificado documents for the specified RFCs that have arela_path
2185
+ console.log(
2186
+ '🎯 Finding pedimento_simplificado documents for specified RFCs with arela_path...',
2187
+ );
2188
+ const { data: pedimentoRecords, error: pedimentoError } = await supabase
2189
+ .from('uploader')
2190
+ .select('arela_path')
2191
+ .eq('document_type', 'pedimento_simplificado')
2192
+ .in('rfc', uploadRfcs)
2193
+ .not('arela_path', 'is', null);
2194
+
2195
+ if (pedimentoError) {
2196
+ throw new Error(
2197
+ `Error querying pedimento_simplificado records: ${pedimentoError.message}`,
2198
+ );
2199
+ }
2200
+
2201
+ if (!pedimentoRecords || pedimentoRecords.length === 0) {
2202
+ console.log('ℹ️ No pedimento_simplificado records with arela_path found');
2203
+ return [];
2204
+ }
2205
+
2206
+ // Get unique arela_paths
2207
+ const uniqueArelaPaths = [
2208
+ ...new Set(pedimentoRecords.map((r) => r.arela_path)),
2209
+ ];
2210
+ console.log(
2211
+ `📋 Found ${pedimentoRecords.length} pedimento records with ${uniqueArelaPaths.length} unique arela_paths`,
2212
+ );
2213
+
2214
+ // Step 2: Find all related files with these arela_paths that haven't been uploaded yet
2215
+ console.log('🔍 Finding all related files that need to be uploaded...');
2216
+
2217
+ // Process arela_paths in chunks to avoid URI length limits
2218
+ let allReadyFiles = [];
2219
+ const chunkSize = 50;
2220
+
2221
+ for (let i = 0; i < uniqueArelaPaths.length; i += chunkSize) {
2222
+ const pathChunk = uniqueArelaPaths.slice(i, i + chunkSize);
2223
+
2224
+ const { data: chunkFiles, error: chunkError } = await supabase
2225
+ .from('uploader')
2226
+ .select(
2227
+ 'id, original_path, arela_path, filename, rfc, document_type, status',
2228
+ )
2229
+ .in('arela_path', pathChunk)
2230
+ .neq('status', 'file-uploaded')
2231
+ .not('original_path', 'is', null);
2232
+
2233
+ if (chunkError) {
2234
+ throw new Error(
2235
+ `Error querying files for arela_paths chunk: ${chunkError.message}`,
2236
+ );
2237
+ }
2238
+
2239
+ if (chunkFiles && chunkFiles.length > 0) {
2240
+ allReadyFiles = allReadyFiles.concat(chunkFiles);
2241
+ }
2242
+ }
2243
+
2244
+ const readyFiles = allReadyFiles;
2245
+
2246
+ console.log(`📋 Found ${readyFiles?.length || 0} files ready for upload`);
2247
+
2248
+ if (readyFiles && readyFiles.length > 0) {
2249
+ // Group by document type for summary
2250
+ const byDocType = readyFiles.reduce((acc, file) => {
2251
+ const docType = file.document_type || 'Unknown';
2252
+ acc[docType] = (acc[docType] || 0) + 1;
2253
+ return acc;
2254
+ }, {});
2255
+
2256
+ console.log('📊 Files by document type:');
2257
+ for (const [docType, count] of Object.entries(byDocType)) {
2258
+ console.log(` ${docType}: ${count} files`);
2259
+ }
2260
+
2261
+ // Group by RFC
2262
+ const byRfc = readyFiles.reduce((acc, file) => {
2263
+ const rfc = file.rfc || 'No RFC';
2264
+ acc[rfc] = (acc[rfc] || 0) + 1;
2265
+ return acc;
2266
+ }, {});
2267
+
2268
+ console.log('📊 Files by RFC:');
2269
+ for (const [rfc, count] of Object.entries(byRfc)) {
2270
+ console.log(` ${rfc}: ${count} files`);
2271
+ }
2272
+ }
2273
+
2274
+ return readyFiles || [];
2275
+ };
2276
+
1876
2277
  program
1877
2278
  .name('arela-uploader')
1878
2279
  .description(
1879
- 'CLI to upload folders to Arela API or Supabase Storage with automatic processing',
2280
+ 'CLI to upload folders to Arela API or Supabase Storage with automatic processing\n\n' +
2281
+ 'Status workflow:\n' +
2282
+ ' fs-stats → detected → file-uploaded\n' +
2283
+ ' ├─ Phase 1: --stats-only (collects filesystem stats, status: fs-stats)\n' +
2284
+ ' ├─ Phase 2: --detect-pdfs (detects document types, status: detected)\n' +
2285
+ ' ├─ Phase 3: --propagate-arela-path (organizes files by pedimento)\n' +
2286
+ ' └─ Phase 4: --upload-by-rfc (uploads files, status: file-uploaded)\n\n' +
2287
+ 'Use --query-ready-files to see files ready for upload (status: detected with arela_path)',
1880
2288
  )
1881
2289
  .option('-v, --version', 'output the version number')
1882
2290
  .option('-p, --prefix <prefix>', 'Prefix path in bucket', '')
@@ -1927,6 +2335,10 @@ program
1927
2335
  '--run-all-phases',
1928
2336
  'Run all 4 phases in sequence: stats → detect → propagate → upload',
1929
2337
  )
2338
+ .option(
2339
+ '--query-ready-files',
2340
+ 'Query and display files that are ready for upload (have been detected but not uploaded)',
2341
+ )
1930
2342
  .action(async (options) => {
1931
2343
  if (options.version) {
1932
2344
  console.log(packageVersion);
@@ -1948,6 +2360,25 @@ program
1948
2360
  return;
1949
2361
  }
1950
2362
 
2363
+ // Handle query-ready-files option
2364
+ if (options.queryReadyFiles) {
2365
+ await checkCredentials(true); // Force Supabase mode
2366
+
2367
+ const readyFiles = await getFilesReadyForUpload();
2368
+
2369
+ if (readyFiles.length === 0) {
2370
+ console.log('ℹ️ No files are currently ready for upload');
2371
+ console.log(
2372
+ ' Tip: Run --detect-pdfs and --propagate-arela-path first to prepare files for upload',
2373
+ );
2374
+ } else {
2375
+ console.log(`\n📋 ${readyFiles.length} files are ready for upload!`);
2376
+ console.log(' Use --upload-by-rfc to upload them to Arela API');
2377
+ }
2378
+
2379
+ return;
2380
+ }
2381
+
1951
2382
  // Handle run-all-phases option
1952
2383
  if (options.runAllPhases) {
1953
2384
  console.log('🚀 Starting all 4 phases in sequence...');