@arela/uploader 0.2.2 → 0.2.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/index.js +469 -38
package/package.json
CHANGED
package/src/index.js
CHANGED
|
@@ -17,7 +17,7 @@ config();
|
|
|
17
17
|
const program = new Command();
|
|
18
18
|
|
|
19
19
|
// Read package.json version at startup
|
|
20
|
-
let packageVersion = '
|
|
20
|
+
let packageVersion = '0.2.4'; // fallback
|
|
21
21
|
try {
|
|
22
22
|
const __filename = new URL(import.meta.url).pathname;
|
|
23
23
|
const __dirname = path.dirname(__filename);
|
|
@@ -158,8 +158,8 @@ const checkCredentials = async (forceSupabase = false) => {
|
|
|
158
158
|
if (!supabaseUrl || !supabaseKey || !bucket) {
|
|
159
159
|
console.error(
|
|
160
160
|
'⚠️ Missing credentials. Please set either:\n' +
|
|
161
|
-
|
|
162
|
-
|
|
161
|
+
' - ARELA_API_URL and ARELA_API_TOKEN for API mode, or\n' +
|
|
162
|
+
' - SUPABASE_URL, SUPABASE_KEY, and SUPABASE_BUCKET for direct mode',
|
|
163
163
|
);
|
|
164
164
|
process.exit(1);
|
|
165
165
|
}
|
|
@@ -774,6 +774,9 @@ const detectPedimentosInDatabase = async (options = {}) => {
|
|
|
774
774
|
console.log(
|
|
775
775
|
'🔍 Phase 2: Starting PDF detection for pedimento-simplificado documents...',
|
|
776
776
|
);
|
|
777
|
+
writeLog(
|
|
778
|
+
'🔍 Phase 2: Starting PDF detection for pedimento-simplificado documents',
|
|
779
|
+
);
|
|
777
780
|
|
|
778
781
|
const detectionService = new FileDetectionService();
|
|
779
782
|
const processingBatchSize = parseInt(options.batchSize) || 10; // Smaller batches for file I/O
|
|
@@ -786,12 +789,16 @@ const detectPedimentosInDatabase = async (options = {}) => {
|
|
|
786
789
|
let chunkNumber = 1;
|
|
787
790
|
|
|
788
791
|
console.log('� Processing PDF files in chunks of 1000 records...');
|
|
792
|
+
writeLog('📝 Starting PDF detection processing in chunks of 1000 records');
|
|
789
793
|
|
|
790
794
|
// Process records in chunks of 1000
|
|
791
795
|
while (true) {
|
|
792
796
|
console.log(
|
|
793
797
|
`\n📥 Fetching chunk ${chunkNumber} (records ${offset + 1} to ${offset + queryBatchSize})...`,
|
|
794
798
|
);
|
|
799
|
+
writeLog(
|
|
800
|
+
`📥 Fetching chunk ${chunkNumber} (records ${offset + 1} to ${offset + queryBatchSize})`,
|
|
801
|
+
);
|
|
795
802
|
|
|
796
803
|
// Fetch next chunk of PDF records
|
|
797
804
|
const { data: pdfRecords, error: queryError } = await supabase
|
|
@@ -811,12 +818,18 @@ const detectPedimentosInDatabase = async (options = {}) => {
|
|
|
811
818
|
// If no records found, we're done
|
|
812
819
|
if (!pdfRecords || pdfRecords.length === 0) {
|
|
813
820
|
console.log(`📝 No more PDF files found. Processing completed.`);
|
|
821
|
+
writeLog(
|
|
822
|
+
`📝 No more PDF files found. Processing completed at chunk ${chunkNumber}`,
|
|
823
|
+
);
|
|
814
824
|
break;
|
|
815
825
|
}
|
|
816
826
|
|
|
817
827
|
console.log(
|
|
818
828
|
`� Processing chunk ${chunkNumber}: ${pdfRecords.length} PDF records`,
|
|
819
829
|
);
|
|
830
|
+
writeLog(
|
|
831
|
+
`📊 Processing chunk ${chunkNumber}: ${pdfRecords.length} PDF records`,
|
|
832
|
+
);
|
|
820
833
|
|
|
821
834
|
// Create progress bar for this chunk
|
|
822
835
|
const progressBar = new cliProgress.SingleBar({
|
|
@@ -841,6 +854,9 @@ const detectPedimentosInDatabase = async (options = {}) => {
|
|
|
841
854
|
try {
|
|
842
855
|
// Check if file still exists
|
|
843
856
|
if (!fs.existsSync(record.original_path)) {
|
|
857
|
+
writeLog(
|
|
858
|
+
`⚠️ FILE NOT FOUND: ${record.filename} at ${record.original_path}`,
|
|
859
|
+
);
|
|
844
860
|
updatePromises.push(
|
|
845
861
|
supabase
|
|
846
862
|
.from('uploader')
|
|
@@ -883,6 +899,13 @@ const detectPedimentosInDatabase = async (options = {}) => {
|
|
|
883
899
|
if (detection.detectedType) {
|
|
884
900
|
chunkDetected++;
|
|
885
901
|
totalDetected++;
|
|
902
|
+
writeLog(
|
|
903
|
+
`✅ DETECTED: ${record.filename} -> ${detection.detectedType} | Pedimento: ${detection.detectedPedimento || 'N/A'} | RFC: ${detection.fields?.rfc || 'N/A'}`,
|
|
904
|
+
);
|
|
905
|
+
} else {
|
|
906
|
+
writeLog(
|
|
907
|
+
`⏭️ NOT DETECTED: ${record.filename} - No pedimento-simplificado pattern found`,
|
|
908
|
+
);
|
|
886
909
|
}
|
|
887
910
|
|
|
888
911
|
updatePromises.push(
|
|
@@ -893,6 +916,7 @@ const detectPedimentosInDatabase = async (options = {}) => {
|
|
|
893
916
|
`❌ Error detecting ${record.filename}:`,
|
|
894
917
|
error.message,
|
|
895
918
|
);
|
|
919
|
+
writeLog(`❌ ERROR detecting ${record.filename}: ${error.message}`);
|
|
896
920
|
chunkErrors++;
|
|
897
921
|
totalErrors++;
|
|
898
922
|
|
|
@@ -930,6 +954,9 @@ const detectPedimentosInDatabase = async (options = {}) => {
|
|
|
930
954
|
console.log(
|
|
931
955
|
`✅ Chunk ${chunkNumber} completed: ${chunkDetected} detected, ${chunkProcessed} processed, ${chunkErrors} errors`,
|
|
932
956
|
);
|
|
957
|
+
writeLog(
|
|
958
|
+
`✅ Chunk ${chunkNumber} completed: ${chunkDetected} detected, ${chunkProcessed} processed, ${chunkErrors} errors`,
|
|
959
|
+
);
|
|
933
960
|
|
|
934
961
|
// Move to next chunk
|
|
935
962
|
offset += queryBatchSize;
|
|
@@ -940,6 +967,9 @@ const detectPedimentosInDatabase = async (options = {}) => {
|
|
|
940
967
|
console.log(
|
|
941
968
|
`📝 Reached end of records (chunk had ${pdfRecords.length} records).`,
|
|
942
969
|
);
|
|
970
|
+
writeLog(
|
|
971
|
+
`📝 Reached end of records (chunk had ${pdfRecords.length} records)`,
|
|
972
|
+
);
|
|
943
973
|
break;
|
|
944
974
|
}
|
|
945
975
|
|
|
@@ -950,6 +980,15 @@ const detectPedimentosInDatabase = async (options = {}) => {
|
|
|
950
980
|
console.log(
|
|
951
981
|
`📊 Phase 2 Summary: ${totalDetected} detected, ${totalProcessed} processed, ${totalErrors} errors`,
|
|
952
982
|
);
|
|
983
|
+
|
|
984
|
+
// Write comprehensive log summary
|
|
985
|
+
writeLog(
|
|
986
|
+
`📊 PHASE 2 PDF DETECTION COMPLETED - Summary: Detected: ${totalDetected} pedimento-simplificado documents, Processed: ${totalProcessed} PDF files, Errors: ${totalErrors}`,
|
|
987
|
+
);
|
|
988
|
+
|
|
989
|
+
// Ensure logs are flushed
|
|
990
|
+
flushLogBuffer();
|
|
991
|
+
|
|
953
992
|
return {
|
|
954
993
|
detectedCount: totalDetected,
|
|
955
994
|
processedCount: totalProcessed,
|
|
@@ -1186,6 +1225,37 @@ const processFilesInBatches = async (
|
|
|
1186
1225
|
}
|
|
1187
1226
|
});
|
|
1188
1227
|
|
|
1228
|
+
// Update status to "file-uploaded" for successfully uploaded files
|
|
1229
|
+
if (result.uploaded && result.uploaded.length > 0 && supabase) {
|
|
1230
|
+
try {
|
|
1231
|
+
const uploadedFilePaths = result.uploaded
|
|
1232
|
+
.map((upload) => {
|
|
1233
|
+
const apiFile = apiFiles.find(
|
|
1234
|
+
(f) =>
|
|
1235
|
+
f.name === upload.originalName ||
|
|
1236
|
+
f.originalName === upload.originalName,
|
|
1237
|
+
);
|
|
1238
|
+
return apiFile ? apiFile.path : null;
|
|
1239
|
+
})
|
|
1240
|
+
.filter(Boolean);
|
|
1241
|
+
|
|
1242
|
+
if (uploadedFilePaths.length > 0) {
|
|
1243
|
+
await supabase
|
|
1244
|
+
.from('uploader')
|
|
1245
|
+
.update({ status: 'file-uploaded' })
|
|
1246
|
+
.in('original_path', uploadedFilePaths);
|
|
1247
|
+
|
|
1248
|
+
console.log(
|
|
1249
|
+
` 📝 Updated status to "file-uploaded" for ${uploadedFilePaths.length} files`,
|
|
1250
|
+
);
|
|
1251
|
+
}
|
|
1252
|
+
} catch (error) {
|
|
1253
|
+
console.error(
|
|
1254
|
+
` ⚠️ Error updating status for uploaded files: ${error.message}`,
|
|
1255
|
+
);
|
|
1256
|
+
}
|
|
1257
|
+
}
|
|
1258
|
+
|
|
1189
1259
|
result.errors.forEach((error) => {
|
|
1190
1260
|
writeLog(
|
|
1191
1261
|
`ERROR: ${error.fileName}: ${error.error} (${error.step})`,
|
|
@@ -1266,11 +1336,39 @@ const processFilesInBatches = async (
|
|
|
1266
1336
|
if (processedPaths.has(uploadPath)) {
|
|
1267
1337
|
totalSkipped++;
|
|
1268
1338
|
writeLog(`SKIPPED: ${file} -> ${uploadPath}`);
|
|
1339
|
+
|
|
1340
|
+
// Update status to "file-uploaded" for skipped files (they already exist)
|
|
1341
|
+
if (supabase) {
|
|
1342
|
+
try {
|
|
1343
|
+
await supabase
|
|
1344
|
+
.from('uploader')
|
|
1345
|
+
.update({ status: 'file-uploaded' })
|
|
1346
|
+
.eq('original_path', file);
|
|
1347
|
+
} catch (error) {
|
|
1348
|
+
console.error(
|
|
1349
|
+
` ⚠️ Error updating status for skipped file: ${error.message}`,
|
|
1350
|
+
);
|
|
1351
|
+
}
|
|
1352
|
+
}
|
|
1269
1353
|
} else {
|
|
1270
1354
|
await uploadToSupabase(file, uploadPath);
|
|
1271
1355
|
totalUploaded++;
|
|
1272
1356
|
writeLog(`SUCCESS: ${file} -> ${uploadPath}`);
|
|
1273
1357
|
processedPaths.add(uploadPath);
|
|
1358
|
+
|
|
1359
|
+
// Update status to "file-uploaded" for successfully uploaded files
|
|
1360
|
+
if (supabase) {
|
|
1361
|
+
try {
|
|
1362
|
+
await supabase
|
|
1363
|
+
.from('uploader')
|
|
1364
|
+
.update({ status: 'file-uploaded' })
|
|
1365
|
+
.eq('original_path', file);
|
|
1366
|
+
} catch (error) {
|
|
1367
|
+
console.error(
|
|
1368
|
+
` ⚠️ Error updating status for uploaded file: ${error.message}`,
|
|
1369
|
+
);
|
|
1370
|
+
}
|
|
1371
|
+
}
|
|
1274
1372
|
}
|
|
1275
1373
|
} catch (error) {
|
|
1276
1374
|
totalErrors++;
|
|
@@ -1333,65 +1431,170 @@ const uploadFilesByRfc = async (options = {}) => {
|
|
|
1333
1431
|
console.log(`📋 Target RFCs: ${uploadRfcs.join(', ')}`);
|
|
1334
1432
|
console.log('🔍 Searching for files to upload...');
|
|
1335
1433
|
|
|
1336
|
-
//
|
|
1337
|
-
const {
|
|
1434
|
+
// First, count total files for the RFCs to show filtering effect
|
|
1435
|
+
const { count: totalRfcFiles, error: countError } = await supabase
|
|
1436
|
+
.from('uploader')
|
|
1437
|
+
.select('*', { count: 'exact', head: true })
|
|
1438
|
+
.in('rfc', uploadRfcs)
|
|
1439
|
+
.not('arela_path', 'is', null);
|
|
1440
|
+
|
|
1441
|
+
if (countError) {
|
|
1442
|
+
console.warn('⚠️ Could not count total RFC files:', countError.message);
|
|
1443
|
+
} else {
|
|
1444
|
+
console.log(`📊 Total files for specified RFCs: ${totalRfcFiles || 0}`);
|
|
1445
|
+
}
|
|
1446
|
+
|
|
1447
|
+
// Step 1: Get all pedimento_simplificado records that match the specified RFCs and have arela_path
|
|
1448
|
+
console.log(
|
|
1449
|
+
'🎯 Finding pedimento_simplificado records for specified RFCs...',
|
|
1450
|
+
);
|
|
1451
|
+
const { data: pedimentoRfcRecords, error: pedimentoRfcError } = await supabase
|
|
1338
1452
|
.from('uploader')
|
|
1339
1453
|
.select('arela_path')
|
|
1454
|
+
.eq('document_type', 'pedimento_simplificado')
|
|
1340
1455
|
.in('rfc', uploadRfcs)
|
|
1341
1456
|
.not('arela_path', 'is', null);
|
|
1342
1457
|
|
|
1343
|
-
if (
|
|
1344
|
-
console.error(
|
|
1458
|
+
if (pedimentoRfcError) {
|
|
1459
|
+
console.error(
|
|
1460
|
+
'❌ Error fetching pedimento RFC records:',
|
|
1461
|
+
pedimentoRfcError.message,
|
|
1462
|
+
);
|
|
1345
1463
|
return { processedCount: 0, uploadedCount: 0, errorCount: 1 };
|
|
1346
1464
|
}
|
|
1347
1465
|
|
|
1348
|
-
if (!
|
|
1349
|
-
console.log('ℹ️ No files found for the specified RFCs with arela_path');
|
|
1466
|
+
if (!pedimentoRfcRecords || pedimentoRfcRecords.length === 0) {
|
|
1350
1467
|
console.log(
|
|
1351
|
-
|
|
1468
|
+
'ℹ️ No pedimento_simplificado records found for the specified RFCs with arela_path',
|
|
1352
1469
|
);
|
|
1353
1470
|
return { processedCount: 0, uploadedCount: 0, errorCount: 0 };
|
|
1354
1471
|
}
|
|
1355
1472
|
|
|
1356
|
-
//
|
|
1357
|
-
const uniqueArelaPaths = [
|
|
1473
|
+
// Get unique arela_paths from pedimento records
|
|
1474
|
+
const uniqueArelaPaths = [
|
|
1475
|
+
...new Set(pedimentoRfcRecords.map((r) => r.arela_path)),
|
|
1476
|
+
];
|
|
1358
1477
|
console.log(
|
|
1359
|
-
|
|
1478
|
+
`📋 Found ${pedimentoRfcRecords.length} pedimento records with ${uniqueArelaPaths.length} unique arela_paths for specified RFCs`,
|
|
1360
1479
|
);
|
|
1361
1480
|
|
|
1362
|
-
// Step
|
|
1363
|
-
|
|
1364
|
-
|
|
1365
|
-
let hasMore = true;
|
|
1366
|
-
let offset = 0;
|
|
1367
|
-
const queryBatchSize = 1000;
|
|
1481
|
+
// Step 2: Get all files with these arela_paths that haven't been uploaded yet
|
|
1482
|
+
let rfcRecords = [];
|
|
1483
|
+
const chunkSize = 50;
|
|
1368
1484
|
|
|
1369
|
-
|
|
1485
|
+
for (let i = 0; i < uniqueArelaPaths.length; i += chunkSize) {
|
|
1486
|
+
const pathChunk = uniqueArelaPaths.slice(i, i + chunkSize);
|
|
1370
1487
|
|
|
1371
|
-
|
|
1372
|
-
const { data: batch, error: queryError } = await supabase
|
|
1488
|
+
const { data: chunkFiles, error: chunkError } = await supabase
|
|
1373
1489
|
.from('uploader')
|
|
1374
|
-
.select('
|
|
1375
|
-
.in('arela_path',
|
|
1376
|
-
.
|
|
1377
|
-
.
|
|
1490
|
+
.select('arela_path')
|
|
1491
|
+
.in('arela_path', pathChunk)
|
|
1492
|
+
.neq('status', 'file-uploaded')
|
|
1493
|
+
.not('arela_path', 'is', null);
|
|
1378
1494
|
|
|
1379
|
-
if (
|
|
1380
|
-
console.error(
|
|
1495
|
+
if (chunkError) {
|
|
1496
|
+
console.error(
|
|
1497
|
+
'❌ Error fetching files for arela_paths chunk:',
|
|
1498
|
+
chunkError.message,
|
|
1499
|
+
);
|
|
1381
1500
|
return { processedCount: 0, uploadedCount: 0, errorCount: 1 };
|
|
1382
1501
|
}
|
|
1383
1502
|
|
|
1384
|
-
if (
|
|
1385
|
-
|
|
1503
|
+
if (chunkFiles && chunkFiles.length > 0) {
|
|
1504
|
+
rfcRecords = rfcRecords.concat(chunkFiles);
|
|
1505
|
+
}
|
|
1506
|
+
}
|
|
1507
|
+
|
|
1508
|
+
if (!rfcRecords || rfcRecords.length === 0) {
|
|
1509
|
+
if (totalRfcFiles && totalRfcFiles > 0) {
|
|
1510
|
+
console.log(
|
|
1511
|
+
`ℹ️ All ${totalRfcFiles} files for the specified RFCs are already uploaded (status: file-uploaded)`,
|
|
1512
|
+
);
|
|
1513
|
+
console.log(' No new files to upload.');
|
|
1386
1514
|
} else {
|
|
1387
|
-
|
|
1388
|
-
|
|
1515
|
+
console.log('ℹ️ No files found for the specified RFCs with arela_path');
|
|
1516
|
+
console.log(
|
|
1517
|
+
` Make sure files for RFCs [${uploadRfcs.join(', ')}] have been processed and have arela_path values`,
|
|
1518
|
+
);
|
|
1519
|
+
}
|
|
1520
|
+
return { processedCount: 0, uploadedCount: 0, errorCount: 0 };
|
|
1521
|
+
}
|
|
1522
|
+
|
|
1523
|
+
// Show filtering effect
|
|
1524
|
+
const uploadableArelaPaths = [
|
|
1525
|
+
...new Set(rfcRecords.map((r) => r.arela_path)),
|
|
1526
|
+
];
|
|
1527
|
+
const skipped = (totalRfcFiles || 0) - rfcRecords.length;
|
|
1528
|
+
if (skipped > 0) {
|
|
1529
|
+
console.log(
|
|
1530
|
+
`📊 Found ${rfcRecords.length} files ready for upload (${skipped} already uploaded, skipped)`,
|
|
1531
|
+
);
|
|
1532
|
+
} else {
|
|
1533
|
+
console.log(`📊 Found ${rfcRecords.length} files ready for upload`);
|
|
1534
|
+
}
|
|
1535
|
+
|
|
1536
|
+
console.log(
|
|
1537
|
+
`🎯 Found ${uploadableArelaPaths.length} unique arela_path(s) with files ready for upload`,
|
|
1538
|
+
);
|
|
1389
1539
|
|
|
1390
|
-
|
|
1391
|
-
|
|
1540
|
+
// Step 3: Get ALL files that have these arela_paths (including supporting documents)
|
|
1541
|
+
// Process arela_paths in smaller chunks to avoid URI length limits
|
|
1542
|
+
let allRelatedFiles = [];
|
|
1543
|
+
const arelaPathChunkSize = 50; // Process 50 arela_paths at a time to avoid URI limits
|
|
1544
|
+
const queryBatchSize = 1000;
|
|
1545
|
+
|
|
1546
|
+
console.log(
|
|
1547
|
+
'📥 Fetching all related files (processing arela_paths in chunks to avoid URI limits)...',
|
|
1548
|
+
);
|
|
1549
|
+
|
|
1550
|
+
// Process arela_paths in chunks
|
|
1551
|
+
for (let i = 0; i < uploadableArelaPaths.length; i += arelaPathChunkSize) {
|
|
1552
|
+
const arelaPathChunk = uploadableArelaPaths.slice(
|
|
1553
|
+
i,
|
|
1554
|
+
i + arelaPathChunkSize,
|
|
1555
|
+
);
|
|
1556
|
+
console.log(
|
|
1557
|
+
` Processing arela_path chunk ${Math.floor(i / arelaPathChunkSize) + 1}/${Math.ceil(uploadableArelaPaths.length / arelaPathChunkSize)} (${arelaPathChunk.length} paths)`,
|
|
1558
|
+
);
|
|
1559
|
+
|
|
1560
|
+
// For each chunk of arela_paths, use pagination to get all related files
|
|
1561
|
+
let hasMore = true;
|
|
1562
|
+
let offset = 0;
|
|
1563
|
+
|
|
1564
|
+
while (hasMore) {
|
|
1565
|
+
const { data: batch, error: queryError } = await supabase
|
|
1566
|
+
.from('uploader')
|
|
1567
|
+
.select('id, original_path, arela_path, filename, rfc, document_type')
|
|
1568
|
+
.in('arela_path', arelaPathChunk)
|
|
1569
|
+
.not('original_path', 'is', null)
|
|
1570
|
+
.neq('status', 'file-uploaded')
|
|
1571
|
+
.range(offset, offset + queryBatchSize - 1);
|
|
1572
|
+
|
|
1573
|
+
if (queryError) {
|
|
1574
|
+
console.error(
|
|
1575
|
+
`❌ Error fetching related files for chunk ${Math.floor(i / arelaPathChunkSize) + 1}:`,
|
|
1576
|
+
queryError.message,
|
|
1577
|
+
);
|
|
1578
|
+
return { processedCount: 0, uploadedCount: 0, errorCount: 1 };
|
|
1579
|
+
}
|
|
1580
|
+
|
|
1581
|
+
if (!batch || batch.length === 0) {
|
|
1392
1582
|
hasMore = false;
|
|
1583
|
+
} else {
|
|
1584
|
+
allRelatedFiles = allRelatedFiles.concat(batch);
|
|
1585
|
+
offset += queryBatchSize;
|
|
1586
|
+
|
|
1587
|
+
// If we got less than queryBatchSize, we've reached the end for this chunk
|
|
1588
|
+
if (batch.length < queryBatchSize) {
|
|
1589
|
+
hasMore = false;
|
|
1590
|
+
}
|
|
1393
1591
|
}
|
|
1394
1592
|
}
|
|
1593
|
+
|
|
1594
|
+
// Small delay between chunks to avoid overwhelming the database
|
|
1595
|
+
if (i + arelaPathChunkSize < uploadableArelaPaths.length) {
|
|
1596
|
+
await new Promise((resolve) => setTimeout(resolve, 100));
|
|
1597
|
+
}
|
|
1395
1598
|
}
|
|
1396
1599
|
|
|
1397
1600
|
if (!allRelatedFiles || allRelatedFiles.length === 0) {
|
|
@@ -1400,7 +1603,7 @@ const uploadFilesByRfc = async (options = {}) => {
|
|
|
1400
1603
|
}
|
|
1401
1604
|
|
|
1402
1605
|
console.log(
|
|
1403
|
-
`📁 Found ${allRelatedFiles.length} total files to upload (including supporting documents)`,
|
|
1606
|
+
`📁 Found ${allRelatedFiles.length} total files to upload (including supporting documents, excluding already uploaded)`,
|
|
1404
1607
|
);
|
|
1405
1608
|
|
|
1406
1609
|
// Group by RFC and arela_path for better organization
|
|
@@ -1560,8 +1763,8 @@ const uploadFilesByRfc = async (options = {}) => {
|
|
|
1560
1763
|
// Set folder structure for this group - concatenate custom prefix with arela_path
|
|
1561
1764
|
const folderStructure = options.folderStructure
|
|
1562
1765
|
? `${options.folderStructure}/${arelaPath}`
|
|
1563
|
-
|
|
1564
|
-
|
|
1766
|
+
.replace(/\/+/g, '/')
|
|
1767
|
+
.replace(/\/$/, '')
|
|
1565
1768
|
: arelaPath;
|
|
1566
1769
|
pathFormData.append('folderStructure', folderStructure);
|
|
1567
1770
|
pathFormData.append('autoDetect', 'true');
|
|
@@ -1616,6 +1819,23 @@ const uploadFilesByRfc = async (options = {}) => {
|
|
|
1616
1819
|
` 📁 Files organized: ${result.stats.organizedCount}`,
|
|
1617
1820
|
);
|
|
1618
1821
|
}
|
|
1822
|
+
|
|
1823
|
+
// Update status to "file-uploaded" for successfully uploaded files
|
|
1824
|
+
try {
|
|
1825
|
+
const uploadedFilePaths = pathFiles.map((file) => file.path);
|
|
1826
|
+
await supabase
|
|
1827
|
+
.from('uploader')
|
|
1828
|
+
.update({ status: 'file-uploaded' })
|
|
1829
|
+
.in('original_path', uploadedFilePaths);
|
|
1830
|
+
|
|
1831
|
+
console.log(
|
|
1832
|
+
` 📝 Updated status to "file-uploaded" for ${uploadedFilePaths.length} files`,
|
|
1833
|
+
);
|
|
1834
|
+
} catch (error) {
|
|
1835
|
+
console.error(
|
|
1836
|
+
` ⚠️ Error updating status for uploaded files: ${error.message}`,
|
|
1837
|
+
);
|
|
1838
|
+
}
|
|
1619
1839
|
} else {
|
|
1620
1840
|
console.error(` ❌ Upload failed for ${folderStructure}:`);
|
|
1621
1841
|
if (result.errors && result.errors.length > 0) {
|
|
@@ -1626,6 +1846,42 @@ const uploadFilesByRfc = async (options = {}) => {
|
|
|
1626
1846
|
totalErrors += pathFiles.length;
|
|
1627
1847
|
}
|
|
1628
1848
|
|
|
1849
|
+
// Handle files that already exist (usually indicated in result.uploaded or result.skipped)
|
|
1850
|
+
if (result.uploaded && result.uploaded.length > 0) {
|
|
1851
|
+
try {
|
|
1852
|
+
const alreadyUploadedPaths = result.uploaded
|
|
1853
|
+
.filter(
|
|
1854
|
+
(upload) =>
|
|
1855
|
+
upload.status === 'already_exists' || upload.alreadyExists,
|
|
1856
|
+
)
|
|
1857
|
+
.map((upload) => {
|
|
1858
|
+
// Find the corresponding file path from pathFiles
|
|
1859
|
+
const matchingFile = pathFiles.find(
|
|
1860
|
+
(f) =>
|
|
1861
|
+
f.name === upload.fileName ||
|
|
1862
|
+
f.name === upload.originalName,
|
|
1863
|
+
);
|
|
1864
|
+
return matchingFile ? matchingFile.path : null;
|
|
1865
|
+
})
|
|
1866
|
+
.filter(Boolean);
|
|
1867
|
+
|
|
1868
|
+
if (alreadyUploadedPaths.length > 0) {
|
|
1869
|
+
await supabase
|
|
1870
|
+
.from('uploader')
|
|
1871
|
+
.update({ status: 'file-uploaded' })
|
|
1872
|
+
.in('original_path', alreadyUploadedPaths);
|
|
1873
|
+
|
|
1874
|
+
console.log(
|
|
1875
|
+
` 📝 Updated status to "file-uploaded" for ${alreadyUploadedPaths.length} already existing files`,
|
|
1876
|
+
);
|
|
1877
|
+
}
|
|
1878
|
+
} catch (error) {
|
|
1879
|
+
console.error(
|
|
1880
|
+
` ⚠️ Error updating status for already existing files: ${error.message}`,
|
|
1881
|
+
);
|
|
1882
|
+
}
|
|
1883
|
+
}
|
|
1884
|
+
|
|
1629
1885
|
// Small delay between path groups
|
|
1630
1886
|
await new Promise((resolve) => setTimeout(resolve, 100));
|
|
1631
1887
|
}
|
|
@@ -1675,6 +1931,7 @@ const propagateArelaPath = async (options = {}) => {
|
|
|
1675
1931
|
}
|
|
1676
1932
|
|
|
1677
1933
|
console.log('🔍 Finding pedimento_simplificado records with arela_path...');
|
|
1934
|
+
writeLog('🔍 Starting arela_path propagation process');
|
|
1678
1935
|
|
|
1679
1936
|
// Get all pedimento_simplificado records that have arela_path
|
|
1680
1937
|
const { data: pedimentoRecords, error: pedimentoError } = await supabase
|
|
@@ -1693,12 +1950,16 @@ const propagateArelaPath = async (options = {}) => {
|
|
|
1693
1950
|
|
|
1694
1951
|
if (!pedimentoRecords || pedimentoRecords.length === 0) {
|
|
1695
1952
|
console.log('ℹ️ No pedimento_simplificado records with arela_path found');
|
|
1953
|
+
writeLog('ℹ️ No pedimento_simplificado records with arela_path found');
|
|
1696
1954
|
return { processedCount: 0, updatedCount: 0, errorCount: 0 };
|
|
1697
1955
|
}
|
|
1698
1956
|
|
|
1699
1957
|
console.log(
|
|
1700
1958
|
`📋 Found ${pedimentoRecords.length} pedimento records with arela_path`,
|
|
1701
1959
|
);
|
|
1960
|
+
writeLog(
|
|
1961
|
+
`📋 Found ${pedimentoRecords.length} pedimento records with arela_path to process`,
|
|
1962
|
+
);
|
|
1702
1963
|
|
|
1703
1964
|
let totalProcessed = 0;
|
|
1704
1965
|
let totalUpdated = 0;
|
|
@@ -1730,6 +1991,9 @@ const propagateArelaPath = async (options = {}) => {
|
|
|
1730
1991
|
|
|
1731
1992
|
console.log(`\n🔍 Processing: ${pedimento.filename}`);
|
|
1732
1993
|
console.log(` 📁 Base path: ${basePath}`);
|
|
1994
|
+
writeLog(
|
|
1995
|
+
`🔍 Processing pedimento: ${pedimento.filename} | Base path: ${basePath}`,
|
|
1996
|
+
);
|
|
1733
1997
|
|
|
1734
1998
|
// Extract folder part from existing arela_path by removing the filename
|
|
1735
1999
|
const existingPath = pedimento.arela_path;
|
|
@@ -1761,12 +2025,16 @@ const propagateArelaPath = async (options = {}) => {
|
|
|
1761
2025
|
|
|
1762
2026
|
if (!relatedFiles || relatedFiles.length === 0) {
|
|
1763
2027
|
console.log(` ℹ️ No related files found needing arela_path update`);
|
|
2028
|
+
writeLog(`ℹ️ No related files found for ${pedimento.filename}`);
|
|
1764
2029
|
continue;
|
|
1765
2030
|
}
|
|
1766
2031
|
|
|
1767
2032
|
console.log(
|
|
1768
2033
|
` 📄 Found ${relatedFiles.length} related files to update:`,
|
|
1769
2034
|
);
|
|
2035
|
+
writeLog(
|
|
2036
|
+
`📄 Found ${relatedFiles.length} related files to update for ${pedimento.filename}`,
|
|
2037
|
+
);
|
|
1770
2038
|
|
|
1771
2039
|
// Show first 10 files, then indicate if there are more
|
|
1772
2040
|
const filesToShow = relatedFiles.slice(0, 10);
|
|
@@ -1833,9 +2101,15 @@ const propagateArelaPath = async (options = {}) => {
|
|
|
1833
2101
|
console.error(
|
|
1834
2102
|
`❌ ${batchErrors} batch(es) failed for ${pedimento.filename}`,
|
|
1835
2103
|
);
|
|
2104
|
+
writeLog(
|
|
2105
|
+
`❌ ${batchErrors} batch(es) failed for ${pedimento.filename}`,
|
|
2106
|
+
);
|
|
1836
2107
|
totalErrors++;
|
|
1837
2108
|
} else {
|
|
1838
2109
|
console.log(` 🎯 Successfully updated ${batchUpdated} related files`);
|
|
2110
|
+
writeLog(
|
|
2111
|
+
`✅ Successfully updated ${batchUpdated} related files for ${pedimento.filename} -> ${folderArelaPath}`,
|
|
2112
|
+
);
|
|
1839
2113
|
totalUpdated += batchUpdated;
|
|
1840
2114
|
}
|
|
1841
2115
|
} catch (error) {
|
|
@@ -1843,6 +2117,7 @@ const propagateArelaPath = async (options = {}) => {
|
|
|
1843
2117
|
`❌ Error processing ${pedimento.filename}:`,
|
|
1844
2118
|
error.message,
|
|
1845
2119
|
);
|
|
2120
|
+
writeLog(`❌ Error processing ${pedimento.filename}: ${error.message}`);
|
|
1846
2121
|
totalErrors++;
|
|
1847
2122
|
}
|
|
1848
2123
|
|
|
@@ -1866,6 +2141,14 @@ const propagateArelaPath = async (options = {}) => {
|
|
|
1866
2141
|
console.log(` ❌ Errors: ${totalErrors}`);
|
|
1867
2142
|
console.log(`${'='.repeat(60)}\n`);
|
|
1868
2143
|
|
|
2144
|
+
// Write comprehensive log summary
|
|
2145
|
+
writeLog(
|
|
2146
|
+
`🎯 ARELA PATH PROPAGATION COMPLETED - Summary: Processed: ${totalProcessed} pedimento records, Updated: ${totalUpdated} related files, Errors: ${totalErrors}`,
|
|
2147
|
+
);
|
|
2148
|
+
|
|
2149
|
+
// Ensure logs are flushed
|
|
2150
|
+
flushLogBuffer();
|
|
2151
|
+
|
|
1869
2152
|
return {
|
|
1870
2153
|
processedCount: totalProcessed,
|
|
1871
2154
|
updatedCount: totalUpdated,
|
|
@@ -1873,10 +2156,135 @@ const propagateArelaPath = async (options = {}) => {
|
|
|
1873
2156
|
};
|
|
1874
2157
|
};
|
|
1875
2158
|
|
|
2159
|
+
/**
|
|
2160
|
+
* Helper function to query files that need to be uploaded
|
|
2161
|
+
* These are files that have been detected but not yet uploaded
|
|
2162
|
+
* Uses the same RFC filtering logic as uploadFilesByRfc for consistency
|
|
2163
|
+
*/
|
|
2164
|
+
const getFilesReadyForUpload = async (options = {}) => {
|
|
2165
|
+
if (!supabase) {
|
|
2166
|
+
throw new Error('Supabase client not initialized');
|
|
2167
|
+
}
|
|
2168
|
+
|
|
2169
|
+
console.log('🔍 Querying files ready for upload...');
|
|
2170
|
+
|
|
2171
|
+
// Check if UPLOAD_RFCS is configured
|
|
2172
|
+
if (!uploadRfcs || uploadRfcs.length === 0) {
|
|
2173
|
+
console.log(
|
|
2174
|
+
'ℹ️ No UPLOAD_RFCS configured. Please set UPLOAD_RFCS environment variable to see files ready for upload.',
|
|
2175
|
+
);
|
|
2176
|
+
console.log(
|
|
2177
|
+
' Example: UPLOAD_RFCS="RFC123456789|RFC987654321|RFC555444333"',
|
|
2178
|
+
);
|
|
2179
|
+
return [];
|
|
2180
|
+
}
|
|
2181
|
+
|
|
2182
|
+
console.log(`🎯 Using RFC filter: ${uploadRfcs.join(', ')}`);
|
|
2183
|
+
|
|
2184
|
+
// Step 1: Find pedimento_simplificado documents for the specified RFCs that have arela_path
|
|
2185
|
+
console.log(
|
|
2186
|
+
'🎯 Finding pedimento_simplificado documents for specified RFCs with arela_path...',
|
|
2187
|
+
);
|
|
2188
|
+
const { data: pedimentoRecords, error: pedimentoError } = await supabase
|
|
2189
|
+
.from('uploader')
|
|
2190
|
+
.select('arela_path')
|
|
2191
|
+
.eq('document_type', 'pedimento_simplificado')
|
|
2192
|
+
.in('rfc', uploadRfcs)
|
|
2193
|
+
.not('arela_path', 'is', null);
|
|
2194
|
+
|
|
2195
|
+
if (pedimentoError) {
|
|
2196
|
+
throw new Error(
|
|
2197
|
+
`Error querying pedimento_simplificado records: ${pedimentoError.message}`,
|
|
2198
|
+
);
|
|
2199
|
+
}
|
|
2200
|
+
|
|
2201
|
+
if (!pedimentoRecords || pedimentoRecords.length === 0) {
|
|
2202
|
+
console.log('ℹ️ No pedimento_simplificado records with arela_path found');
|
|
2203
|
+
return [];
|
|
2204
|
+
}
|
|
2205
|
+
|
|
2206
|
+
// Get unique arela_paths
|
|
2207
|
+
const uniqueArelaPaths = [
|
|
2208
|
+
...new Set(pedimentoRecords.map((r) => r.arela_path)),
|
|
2209
|
+
];
|
|
2210
|
+
console.log(
|
|
2211
|
+
`📋 Found ${pedimentoRecords.length} pedimento records with ${uniqueArelaPaths.length} unique arela_paths`,
|
|
2212
|
+
);
|
|
2213
|
+
|
|
2214
|
+
// Step 2: Find all related files with these arela_paths that haven't been uploaded yet
|
|
2215
|
+
console.log('🔍 Finding all related files that need to be uploaded...');
|
|
2216
|
+
|
|
2217
|
+
// Process arela_paths in chunks to avoid URI length limits
|
|
2218
|
+
let allReadyFiles = [];
|
|
2219
|
+
const chunkSize = 50;
|
|
2220
|
+
|
|
2221
|
+
for (let i = 0; i < uniqueArelaPaths.length; i += chunkSize) {
|
|
2222
|
+
const pathChunk = uniqueArelaPaths.slice(i, i + chunkSize);
|
|
2223
|
+
|
|
2224
|
+
const { data: chunkFiles, error: chunkError } = await supabase
|
|
2225
|
+
.from('uploader')
|
|
2226
|
+
.select(
|
|
2227
|
+
'id, original_path, arela_path, filename, rfc, document_type, status',
|
|
2228
|
+
)
|
|
2229
|
+
.in('arela_path', pathChunk)
|
|
2230
|
+
.neq('status', 'file-uploaded')
|
|
2231
|
+
.not('original_path', 'is', null);
|
|
2232
|
+
|
|
2233
|
+
if (chunkError) {
|
|
2234
|
+
throw new Error(
|
|
2235
|
+
`Error querying files for arela_paths chunk: ${chunkError.message}`,
|
|
2236
|
+
);
|
|
2237
|
+
}
|
|
2238
|
+
|
|
2239
|
+
if (chunkFiles && chunkFiles.length > 0) {
|
|
2240
|
+
allReadyFiles = allReadyFiles.concat(chunkFiles);
|
|
2241
|
+
}
|
|
2242
|
+
}
|
|
2243
|
+
|
|
2244
|
+
const readyFiles = allReadyFiles;
|
|
2245
|
+
|
|
2246
|
+
console.log(`📋 Found ${readyFiles?.length || 0} files ready for upload`);
|
|
2247
|
+
|
|
2248
|
+
if (readyFiles && readyFiles.length > 0) {
|
|
2249
|
+
// Group by document type for summary
|
|
2250
|
+
const byDocType = readyFiles.reduce((acc, file) => {
|
|
2251
|
+
const docType = file.document_type || 'Unknown';
|
|
2252
|
+
acc[docType] = (acc[docType] || 0) + 1;
|
|
2253
|
+
return acc;
|
|
2254
|
+
}, {});
|
|
2255
|
+
|
|
2256
|
+
console.log('📊 Files by document type:');
|
|
2257
|
+
for (const [docType, count] of Object.entries(byDocType)) {
|
|
2258
|
+
console.log(` ${docType}: ${count} files`);
|
|
2259
|
+
}
|
|
2260
|
+
|
|
2261
|
+
// Group by RFC
|
|
2262
|
+
const byRfc = readyFiles.reduce((acc, file) => {
|
|
2263
|
+
const rfc = file.rfc || 'No RFC';
|
|
2264
|
+
acc[rfc] = (acc[rfc] || 0) + 1;
|
|
2265
|
+
return acc;
|
|
2266
|
+
}, {});
|
|
2267
|
+
|
|
2268
|
+
console.log('📊 Files by RFC:');
|
|
2269
|
+
for (const [rfc, count] of Object.entries(byRfc)) {
|
|
2270
|
+
console.log(` ${rfc}: ${count} files`);
|
|
2271
|
+
}
|
|
2272
|
+
}
|
|
2273
|
+
|
|
2274
|
+
return readyFiles || [];
|
|
2275
|
+
};
|
|
2276
|
+
|
|
1876
2277
|
program
|
|
1877
2278
|
.name('arela-uploader')
|
|
1878
2279
|
.description(
|
|
1879
|
-
'CLI to upload folders to Arela API or Supabase Storage with automatic processing'
|
|
2280
|
+
'CLI to upload folders to Arela API or Supabase Storage with automatic processing\n\n' +
|
|
2281
|
+
'Status workflow:\n' +
|
|
2282
|
+
' fs-stats → detected → file-uploaded\n' +
|
|
2283
|
+
' ├─ Phase 1: --stats-only (collects filesystem stats, status: fs-stats)\n' +
|
|
2284
|
+
' ├─ Phase 2: --detect-pdfs (detects document types, status: detected)\n' +
|
|
2285
|
+
' ├─ Phase 3: --propagate-arela-path (organizes files by pedimento)\n' +
|
|
2286
|
+
' └─ Phase 4: --upload-by-rfc (uploads files, status: file-uploaded)\n\n' +
|
|
2287
|
+
'Use --query-ready-files to see files ready for upload (status: detected with arela_path)',
|
|
1880
2288
|
)
|
|
1881
2289
|
.option('-v, --version', 'output the version number')
|
|
1882
2290
|
.option('-p, --prefix <prefix>', 'Prefix path in bucket', '')
|
|
@@ -1927,6 +2335,10 @@ program
|
|
|
1927
2335
|
'--run-all-phases',
|
|
1928
2336
|
'Run all 4 phases in sequence: stats → detect → propagate → upload',
|
|
1929
2337
|
)
|
|
2338
|
+
.option(
|
|
2339
|
+
'--query-ready-files',
|
|
2340
|
+
'Query and display files that are ready for upload (have been detected but not uploaded)',
|
|
2341
|
+
)
|
|
1930
2342
|
.action(async (options) => {
|
|
1931
2343
|
if (options.version) {
|
|
1932
2344
|
console.log(packageVersion);
|
|
@@ -1948,6 +2360,25 @@ program
|
|
|
1948
2360
|
return;
|
|
1949
2361
|
}
|
|
1950
2362
|
|
|
2363
|
+
// Handle query-ready-files option
|
|
2364
|
+
if (options.queryReadyFiles) {
|
|
2365
|
+
await checkCredentials(true); // Force Supabase mode
|
|
2366
|
+
|
|
2367
|
+
const readyFiles = await getFilesReadyForUpload();
|
|
2368
|
+
|
|
2369
|
+
if (readyFiles.length === 0) {
|
|
2370
|
+
console.log('ℹ️ No files are currently ready for upload');
|
|
2371
|
+
console.log(
|
|
2372
|
+
' Tip: Run --detect-pdfs and --propagate-arela-path first to prepare files for upload',
|
|
2373
|
+
);
|
|
2374
|
+
} else {
|
|
2375
|
+
console.log(`\n📋 ${readyFiles.length} files are ready for upload!`);
|
|
2376
|
+
console.log(' Use --upload-by-rfc to upload them to Arela API');
|
|
2377
|
+
}
|
|
2378
|
+
|
|
2379
|
+
return;
|
|
2380
|
+
}
|
|
2381
|
+
|
|
1951
2382
|
// Handle run-all-phases option
|
|
1952
2383
|
if (options.runAllPhases) {
|
|
1953
2384
|
console.log('🚀 Starting all 4 phases in sequence...');
|