@arela/uploader 0.2.0 β 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +97 -7
- package/commands.md +6 -0
- package/package.json +1 -1
- package/src/document-type-shared.js +22 -8
- package/src/document-types/pedimento-simplificado.js +11 -29
- package/src/file-detection.js +44 -29
- package/src/index.js +821 -225
package/src/index.js
CHANGED
|
@@ -9,6 +9,7 @@ import { globby } from 'globby';
|
|
|
9
9
|
import mime from 'mime-types';
|
|
10
10
|
import fetch from 'node-fetch';
|
|
11
11
|
import path from 'path';
|
|
12
|
+
|
|
12
13
|
import { FileDetectionService } from './file-detection.js';
|
|
13
14
|
|
|
14
15
|
config();
|
|
@@ -43,6 +44,7 @@ const sources = process.env.UPLOAD_SOURCES?.split('|')
|
|
|
43
44
|
.filter(Boolean);
|
|
44
45
|
|
|
45
46
|
// ConfiguraciΓ³n de RFCs para upload
|
|
47
|
+
console.log('π§ Configured RFCs for upload:', process.env.UPLOAD_RFCS);
|
|
46
48
|
const uploadRfcs = process.env.UPLOAD_RFCS?.split('|')
|
|
47
49
|
.map((s) => s.trim())
|
|
48
50
|
.filter(Boolean);
|
|
@@ -179,13 +181,111 @@ const checkCredentials = async (forceSupabase = false) => {
|
|
|
179
181
|
};
|
|
180
182
|
|
|
181
183
|
const logFilePath = path.resolve(process.cwd(), 'arela-upload.log');
|
|
184
|
+
|
|
185
|
+
/**
|
|
186
|
+
* OPTIMIZED: Log buffer to reduce I/O operations
|
|
187
|
+
*/
|
|
188
|
+
let logBuffer = [];
|
|
189
|
+
const LOG_BUFFER_SIZE = 100; // Flush every 100 log entries
|
|
190
|
+
let lastFlushTime = Date.now();
|
|
191
|
+
const LOG_FLUSH_INTERVAL = 5000; // Flush every 5 seconds
|
|
192
|
+
|
|
193
|
+
const flushLogBuffer = () => {
|
|
194
|
+
if (logBuffer.length === 0) return;
|
|
195
|
+
|
|
196
|
+
try {
|
|
197
|
+
const logContent = logBuffer.join('\n') + '\n';
|
|
198
|
+
fs.appendFileSync(logFilePath, logContent);
|
|
199
|
+
logBuffer = [];
|
|
200
|
+
lastFlushTime = Date.now();
|
|
201
|
+
} catch (error) {
|
|
202
|
+
console.error(`β Error writing to log file: ${error.code} | ${error.message} | path: ${logFilePath}`);
|
|
203
|
+
}
|
|
204
|
+
};
|
|
205
|
+
|
|
182
206
|
const writeLog = (message) => {
|
|
183
207
|
try {
|
|
184
208
|
const timestamp = new Date().toISOString();
|
|
185
|
-
|
|
209
|
+
logBuffer.push(`[${timestamp}] ${message}`);
|
|
210
|
+
|
|
211
|
+
// Flush if buffer is full or enough time has passed
|
|
212
|
+
const now = Date.now();
|
|
213
|
+
if (
|
|
214
|
+
logBuffer.length >= LOG_BUFFER_SIZE ||
|
|
215
|
+
now - lastFlushTime >= LOG_FLUSH_INTERVAL
|
|
216
|
+
) {
|
|
217
|
+
flushLogBuffer();
|
|
218
|
+
}
|
|
186
219
|
} catch (error) {
|
|
187
|
-
console.error(`β Error
|
|
220
|
+
console.error(`β Error buffering log message: ${error.message}`);
|
|
221
|
+
}
|
|
222
|
+
};
|
|
223
|
+
|
|
224
|
+
// Ensure logs are flushed on process exit
|
|
225
|
+
process.on('exit', flushLogBuffer);
|
|
226
|
+
process.on('SIGINT', () => {
|
|
227
|
+
flushLogBuffer();
|
|
228
|
+
process.exit(0);
|
|
229
|
+
});
|
|
230
|
+
process.on('SIGTERM', () => {
|
|
231
|
+
flushLogBuffer();
|
|
232
|
+
process.exit(0);
|
|
233
|
+
});
|
|
234
|
+
|
|
235
|
+
/**
|
|
236
|
+
* OPTIMIZED: Conditional logging to reduce console overhead
|
|
237
|
+
*/
|
|
238
|
+
const VERBOSE_LOGGING = process.env.VERBOSE_LOGGING === 'true';
|
|
239
|
+
const BATCH_DELAY = parseInt(process.env.BATCH_DELAY) || 100; // Configurable delay between batches
|
|
240
|
+
const PROGRESS_UPDATE_INTERVAL =
|
|
241
|
+
parseInt(process.env.PROGRESS_UPDATE_INTERVAL) || 10; // Update progress every N items
|
|
242
|
+
|
|
243
|
+
const logVerbose = (message) => {
|
|
244
|
+
if (VERBOSE_LOGGING) {
|
|
245
|
+
console.log(message);
|
|
246
|
+
}
|
|
247
|
+
};
|
|
248
|
+
const batchReadFileStats = (filePaths) => {
|
|
249
|
+
const results = [];
|
|
250
|
+
|
|
251
|
+
for (const filePath of filePaths) {
|
|
252
|
+
try {
|
|
253
|
+
const stats = fs.statSync(filePath);
|
|
254
|
+
results.push({ path: filePath, stats, error: null });
|
|
255
|
+
} catch (error) {
|
|
256
|
+
results.push({ path: filePath, stats: null, error: error.message });
|
|
257
|
+
}
|
|
188
258
|
}
|
|
259
|
+
|
|
260
|
+
return results;
|
|
261
|
+
};
|
|
262
|
+
|
|
263
|
+
/**
|
|
264
|
+
* OPTIMIZED: Cache for year/pedimento detection results to avoid redundant parsing
|
|
265
|
+
*/
|
|
266
|
+
const pathDetectionCache = new Map();
|
|
267
|
+
|
|
268
|
+
/**
|
|
269
|
+
* OPTIMIZED: Clear the path detection cache (useful for testing or long-running processes)
|
|
270
|
+
*/
|
|
271
|
+
const clearPathDetectionCache = () => {
|
|
272
|
+
pathDetectionCache.clear();
|
|
273
|
+
};
|
|
274
|
+
|
|
275
|
+
/**
|
|
276
|
+
* OPTIMIZED: Get detection results with caching
|
|
277
|
+
*/
|
|
278
|
+
const getCachedPathDetection = (filePath, basePath) => {
|
|
279
|
+
const cacheKey = `${filePath}|${basePath}`;
|
|
280
|
+
|
|
281
|
+
if (pathDetectionCache.has(cacheKey)) {
|
|
282
|
+
return pathDetectionCache.get(cacheKey);
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
const detection = extractYearAndPedimentoFromPath(filePath, basePath);
|
|
286
|
+
pathDetectionCache.set(cacheKey, detection);
|
|
287
|
+
|
|
288
|
+
return detection;
|
|
189
289
|
};
|
|
190
290
|
|
|
191
291
|
/**
|
|
@@ -276,23 +376,49 @@ const extractYearAndPedimentoFromPath = (filePath, basePath) => {
|
|
|
276
376
|
}
|
|
277
377
|
};
|
|
278
378
|
|
|
379
|
+
/**
|
|
380
|
+
* OPTIMIZED: Get processed paths with caching and buffered log reading
|
|
381
|
+
*/
|
|
382
|
+
let processedPathsCache = null;
|
|
383
|
+
let lastLogModTime = 0;
|
|
384
|
+
|
|
279
385
|
const getProcessedPaths = () => {
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
386
|
+
try {
|
|
387
|
+
// Check if log file exists
|
|
388
|
+
if (!fs.existsSync(logFilePath)) {
|
|
389
|
+
return new Set();
|
|
390
|
+
}
|
|
391
|
+
|
|
392
|
+
// Check if cache is still valid
|
|
393
|
+
const logStats = fs.statSync(logFilePath);
|
|
394
|
+
if (processedPathsCache && logStats.mtime.getTime() === lastLogModTime) {
|
|
395
|
+
return processedPathsCache;
|
|
396
|
+
}
|
|
397
|
+
|
|
398
|
+
// Read and parse log file
|
|
399
|
+
const processed = new Set();
|
|
400
|
+
const content = fs.readFileSync(logFilePath, 'utf-8');
|
|
401
|
+
|
|
402
|
+
// Use more efficient regex with global flag
|
|
403
|
+
const regex = /(SUCCESS|SKIPPED): .*? -> (.+)/g;
|
|
404
|
+
let match;
|
|
405
|
+
|
|
406
|
+
while ((match = regex.exec(content)) !== null) {
|
|
407
|
+
const path = match[2];
|
|
289
408
|
if (path) {
|
|
290
409
|
processed.add(path.trim());
|
|
291
410
|
}
|
|
292
411
|
}
|
|
293
|
-
}
|
|
294
412
|
|
|
295
|
-
|
|
413
|
+
// Update cache
|
|
414
|
+
processedPathsCache = processed;
|
|
415
|
+
lastLogModTime = logStats.mtime.getTime();
|
|
416
|
+
|
|
417
|
+
return processed;
|
|
418
|
+
} catch (error) {
|
|
419
|
+
console.error(`β οΈ Error reading processed paths: ${error.message}`);
|
|
420
|
+
return new Set();
|
|
421
|
+
}
|
|
296
422
|
};
|
|
297
423
|
|
|
298
424
|
/**
|
|
@@ -314,6 +440,7 @@ const uploadToApi = async (files, options) => {
|
|
|
314
440
|
|
|
315
441
|
// Nueva funcionalidad: estructura de carpetas personalizada
|
|
316
442
|
let combinedStructure = null;
|
|
443
|
+
let cachedDetection = null; // Cache detection result to avoid redundant calls
|
|
317
444
|
|
|
318
445
|
if (
|
|
319
446
|
options.folderStructure &&
|
|
@@ -322,12 +449,10 @@ const uploadToApi = async (files, options) => {
|
|
|
322
449
|
) {
|
|
323
450
|
// Combine custom folder structure with auto-detection
|
|
324
451
|
const firstFile = files[0];
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
if (detection.detected) {
|
|
330
|
-
const autoStructure = `${detection.year}/${detection.pedimento}`;
|
|
452
|
+
cachedDetection = getCachedPathDetection(firstFile.path, process.cwd());
|
|
453
|
+
|
|
454
|
+
if (cachedDetection.detected) {
|
|
455
|
+
const autoStructure = `${cachedDetection.year}/${cachedDetection.pedimento}`;
|
|
331
456
|
combinedStructure = `${options.folderStructure}/${autoStructure}`;
|
|
332
457
|
formData.append('folderStructure', combinedStructure);
|
|
333
458
|
console.log(
|
|
@@ -346,12 +471,10 @@ const uploadToApi = async (files, options) => {
|
|
|
346
471
|
} else if (options.autoDetectStructure && files.length > 0) {
|
|
347
472
|
// Try to auto-detect from the first file if no explicit structure is provided
|
|
348
473
|
const firstFile = files[0];
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
if (detection.detected) {
|
|
354
|
-
const autoStructure = `${detection.year}/${detection.pedimento}`;
|
|
474
|
+
cachedDetection = getCachedPathDetection(firstFile.path, process.cwd());
|
|
475
|
+
|
|
476
|
+
if (cachedDetection.detected) {
|
|
477
|
+
const autoStructure = `${cachedDetection.year}/${cachedDetection.pedimento}`;
|
|
355
478
|
formData.append('folderStructure', autoStructure);
|
|
356
479
|
}
|
|
357
480
|
}
|
|
@@ -413,16 +536,19 @@ const uploadToSupabase = async (file, uploadPath) => {
|
|
|
413
536
|
*/
|
|
414
537
|
const insertStatsToUploaderTable = async (files, options) => {
|
|
415
538
|
if (!supabase) {
|
|
416
|
-
throw new Error(
|
|
539
|
+
throw new Error(
|
|
540
|
+
'Supabase client not initialized. Stats mode requires Supabase connection.',
|
|
541
|
+
);
|
|
417
542
|
}
|
|
418
543
|
|
|
419
544
|
const detectionService = new FileDetectionService();
|
|
420
545
|
const records = [];
|
|
421
546
|
|
|
422
547
|
for (const file of files) {
|
|
423
|
-
|
|
548
|
+
// OPTIMIZED: Use pre-computed stats if available, otherwise call fs.statSync
|
|
549
|
+
const stats = file.stats || fs.statSync(file.path);
|
|
424
550
|
const originalPath = options.clientPath || file.path;
|
|
425
|
-
|
|
551
|
+
|
|
426
552
|
// Check if record already exists
|
|
427
553
|
const { data: existingRecords, error: checkError } = await supabase
|
|
428
554
|
.from('uploader')
|
|
@@ -431,7 +557,9 @@ const insertStatsToUploaderTable = async (files, options) => {
|
|
|
431
557
|
.limit(1);
|
|
432
558
|
|
|
433
559
|
if (checkError) {
|
|
434
|
-
console.error(
|
|
560
|
+
console.error(
|
|
561
|
+
`β Error checking for existing record: ${checkError.message}`,
|
|
562
|
+
);
|
|
435
563
|
continue;
|
|
436
564
|
}
|
|
437
565
|
|
|
@@ -439,7 +567,7 @@ const insertStatsToUploaderTable = async (files, options) => {
|
|
|
439
567
|
console.log(`βοΈ Skipping duplicate: ${path.basename(file.path)}`);
|
|
440
568
|
continue;
|
|
441
569
|
}
|
|
442
|
-
|
|
570
|
+
|
|
443
571
|
// Initialize record with basic file stats
|
|
444
572
|
const record = {
|
|
445
573
|
document_type: null,
|
|
@@ -450,26 +578,28 @@ const insertStatsToUploaderTable = async (files, options) => {
|
|
|
450
578
|
arela_path: null,
|
|
451
579
|
status: 'stats',
|
|
452
580
|
rfc: null,
|
|
453
|
-
message: null
|
|
581
|
+
message: null,
|
|
454
582
|
};
|
|
455
583
|
|
|
456
584
|
// Try to detect document type for supported files
|
|
457
585
|
if (detectionService.isSupportedFileType(file.path)) {
|
|
458
586
|
try {
|
|
459
587
|
const detection = await detectionService.detectFile(file.path);
|
|
460
|
-
|
|
588
|
+
|
|
461
589
|
if (detection.detectedType) {
|
|
462
590
|
record.document_type = detection.detectedType;
|
|
463
591
|
record.num_pedimento = detection.detectedPedimento;
|
|
464
592
|
record.status = 'detected';
|
|
465
|
-
|
|
593
|
+
|
|
466
594
|
// Set arela_path for pedimento_simplificado documents
|
|
467
595
|
if (detection.arelaPath) {
|
|
468
596
|
record.arela_path = detection.arelaPath;
|
|
469
597
|
}
|
|
470
|
-
|
|
598
|
+
|
|
471
599
|
// Extract RFC from fields if available
|
|
472
|
-
const rfcField = detection.fields.find(
|
|
600
|
+
const rfcField = detection.fields.find(
|
|
601
|
+
(f) => f.name === 'rfc' && f.found,
|
|
602
|
+
);
|
|
473
603
|
if (rfcField) {
|
|
474
604
|
record.rfc = rfcField.value;
|
|
475
605
|
}
|
|
@@ -497,8 +627,10 @@ const insertStatsToUploaderTable = async (files, options) => {
|
|
|
497
627
|
return [];
|
|
498
628
|
}
|
|
499
629
|
|
|
500
|
-
console.log(
|
|
501
|
-
|
|
630
|
+
console.log(
|
|
631
|
+
`πΎ Inserting ${records.length} new records into uploader table...`,
|
|
632
|
+
);
|
|
633
|
+
|
|
502
634
|
const { data, error } = await supabase
|
|
503
635
|
.from('uploader')
|
|
504
636
|
.insert(records)
|
|
@@ -511,6 +643,320 @@ const insertStatsToUploaderTable = async (files, options) => {
|
|
|
511
643
|
return data;
|
|
512
644
|
};
|
|
513
645
|
|
|
646
|
+
/**
|
|
647
|
+
* OPTIMIZED: Insert ONLY file stats into uploader table (Phase 1)
|
|
648
|
+
* No file reading, no detection - just filesystem metadata
|
|
649
|
+
* Returns summary statistics instead of full records for better performance
|
|
650
|
+
*/
|
|
651
|
+
const insertStatsOnlyToUploaderTable = async (files, options) => {
|
|
652
|
+
if (!supabase) {
|
|
653
|
+
throw new Error(
|
|
654
|
+
'Supabase client not initialized. Stats mode requires Supabase connection.',
|
|
655
|
+
);
|
|
656
|
+
}
|
|
657
|
+
|
|
658
|
+
const batchSize = 1000; // Large batch size for performance
|
|
659
|
+
const allRecords = [];
|
|
660
|
+
|
|
661
|
+
// Prepare all file stats data first - OPTIMIZED to use pre-computed stats
|
|
662
|
+
console.log('π Collecting filesystem stats...');
|
|
663
|
+
for (const file of files) {
|
|
664
|
+
try {
|
|
665
|
+
// Use pre-computed stats if available, otherwise call fs.statSync
|
|
666
|
+
const stats = file.stats || fs.statSync(file.path);
|
|
667
|
+
const originalPath = options.clientPath || file.path;
|
|
668
|
+
const fileExtension = path
|
|
669
|
+
.extname(file.path)
|
|
670
|
+
.toLowerCase()
|
|
671
|
+
.replace('.', '');
|
|
672
|
+
|
|
673
|
+
const record = {
|
|
674
|
+
document_type: null,
|
|
675
|
+
size: stats.size,
|
|
676
|
+
num_pedimento: null,
|
|
677
|
+
filename: file.originalName || path.basename(file.path),
|
|
678
|
+
original_path: originalPath,
|
|
679
|
+
arela_path: null,
|
|
680
|
+
status: 'fs-stats',
|
|
681
|
+
rfc: null,
|
|
682
|
+
message: null,
|
|
683
|
+
file_extension: fileExtension,
|
|
684
|
+
created_at: new Date().toISOString(),
|
|
685
|
+
modified_at: stats.mtime.toISOString(),
|
|
686
|
+
};
|
|
687
|
+
|
|
688
|
+
allRecords.push(record);
|
|
689
|
+
} catch (error) {
|
|
690
|
+
console.error(`β Error reading stats for ${file.path}:`, error.message);
|
|
691
|
+
}
|
|
692
|
+
}
|
|
693
|
+
|
|
694
|
+
if (allRecords.length === 0) {
|
|
695
|
+
console.log('π No file stats to insert');
|
|
696
|
+
return { totalInserted: 0, totalSkipped: 0, totalProcessed: 0 };
|
|
697
|
+
}
|
|
698
|
+
|
|
699
|
+
console.log(
|
|
700
|
+
`πΎ Bulk inserting ${allRecords.length} file stats in batches of ${batchSize}...`,
|
|
701
|
+
);
|
|
702
|
+
|
|
703
|
+
let totalInserted = 0;
|
|
704
|
+
let totalSkipped = 0;
|
|
705
|
+
|
|
706
|
+
// Process in batches for optimal performance
|
|
707
|
+
for (let i = 0; i < allRecords.length; i += batchSize) {
|
|
708
|
+
const batch = allRecords.slice(i, i + batchSize);
|
|
709
|
+
|
|
710
|
+
try {
|
|
711
|
+
// OPTIMIZED: Use upsert without select to avoid unnecessary data transfer
|
|
712
|
+
const { error, count } = await supabase.from('uploader').upsert(batch, {
|
|
713
|
+
onConflict: 'original_path',
|
|
714
|
+
ignoreDuplicates: false,
|
|
715
|
+
count: 'exact',
|
|
716
|
+
});
|
|
717
|
+
|
|
718
|
+
if (error) {
|
|
719
|
+
console.error(
|
|
720
|
+
`β Error inserting batch ${Math.floor(i / batchSize) + 1}:`,
|
|
721
|
+
error.message,
|
|
722
|
+
);
|
|
723
|
+
continue;
|
|
724
|
+
}
|
|
725
|
+
|
|
726
|
+
// For upsert operations, we can't easily distinguish between inserts and updates
|
|
727
|
+
// from the count alone, but we can estimate based on the assumption that most
|
|
728
|
+
// operations in --stats-only mode are likely new inserts
|
|
729
|
+
const batchProcessed = batch.length;
|
|
730
|
+
|
|
731
|
+
// Since we're using upsert with ignoreDuplicates: false, the count represents
|
|
732
|
+
// the actual number of rows affected (both inserts and updates)
|
|
733
|
+
const affected = count || batchProcessed;
|
|
734
|
+
|
|
735
|
+
// For simplicity and performance, we'll assume most are new inserts in stats-only mode
|
|
736
|
+
// This is reasonable since stats-only is typically run on new file sets
|
|
737
|
+
totalInserted += affected;
|
|
738
|
+
|
|
739
|
+
console.log(
|
|
740
|
+
`β
Batch ${Math.floor(i / batchSize) + 1}: ${affected} rows processed`,
|
|
741
|
+
);
|
|
742
|
+
} catch (error) {
|
|
743
|
+
console.error(
|
|
744
|
+
`β Unexpected error in batch ${Math.floor(i / batchSize) + 1}:`,
|
|
745
|
+
error.message,
|
|
746
|
+
);
|
|
747
|
+
}
|
|
748
|
+
}
|
|
749
|
+
|
|
750
|
+
// Calculate skipped as difference between total records and inserted
|
|
751
|
+
totalSkipped = allRecords.length - totalInserted;
|
|
752
|
+
|
|
753
|
+
console.log(
|
|
754
|
+
`π Phase 1 Summary: ${totalInserted} records processed, estimated ${totalSkipped} were updates`,
|
|
755
|
+
);
|
|
756
|
+
|
|
757
|
+
return {
|
|
758
|
+
totalInserted,
|
|
759
|
+
totalSkipped,
|
|
760
|
+
totalProcessed: allRecords.length,
|
|
761
|
+
};
|
|
762
|
+
};
|
|
763
|
+
|
|
764
|
+
/**
|
|
765
|
+
* PHASE 2: Process PDF files for pedimento-simplificado detection
|
|
766
|
+
* Only processes files with status 'fs-stats' and file_extension 'pdf'
|
|
767
|
+
* Processes records in chunks of 1000 to avoid loading all records into memory
|
|
768
|
+
*/
|
|
769
|
+
const detectPedimentosInDatabase = async (options = {}) => {
|
|
770
|
+
if (!supabase) {
|
|
771
|
+
throw new Error('Supabase client not initialized.');
|
|
772
|
+
}
|
|
773
|
+
|
|
774
|
+
console.log(
|
|
775
|
+
'π Phase 2: Starting PDF detection for pedimento-simplificado documents...',
|
|
776
|
+
);
|
|
777
|
+
|
|
778
|
+
const detectionService = new FileDetectionService();
|
|
779
|
+
const processingBatchSize = parseInt(options.batchSize) || 10; // Smaller batches for file I/O
|
|
780
|
+
const queryBatchSize = 1000; // Process 1000 records at a time
|
|
781
|
+
|
|
782
|
+
let totalDetected = 0;
|
|
783
|
+
let totalProcessed = 0;
|
|
784
|
+
let totalErrors = 0;
|
|
785
|
+
let offset = 0;
|
|
786
|
+
let chunkNumber = 1;
|
|
787
|
+
|
|
788
|
+
console.log('οΏ½ Processing PDF files in chunks of 1000 records...');
|
|
789
|
+
|
|
790
|
+
// Process records in chunks of 1000
|
|
791
|
+
while (true) {
|
|
792
|
+
console.log(
|
|
793
|
+
`\nπ₯ Fetching chunk ${chunkNumber} (records ${offset + 1} to ${offset + queryBatchSize})...`,
|
|
794
|
+
);
|
|
795
|
+
|
|
796
|
+
// Fetch next chunk of PDF records
|
|
797
|
+
const { data: pdfRecords, error: queryError } = await supabase
|
|
798
|
+
.from('uploader')
|
|
799
|
+
.select('id, original_path, filename, file_extension, status')
|
|
800
|
+
.eq('status', 'fs-stats')
|
|
801
|
+
.eq('file_extension', 'pdf')
|
|
802
|
+
.ilike('filename', '%simp%')
|
|
803
|
+
.range(offset, offset + queryBatchSize - 1);
|
|
804
|
+
|
|
805
|
+
if (queryError) {
|
|
806
|
+
throw new Error(
|
|
807
|
+
`Failed to fetch PDF records chunk ${chunkNumber}: ${queryError.message}`,
|
|
808
|
+
);
|
|
809
|
+
}
|
|
810
|
+
|
|
811
|
+
// If no records found, we're done
|
|
812
|
+
if (!pdfRecords || pdfRecords.length === 0) {
|
|
813
|
+
console.log(`π No more PDF files found. Processing completed.`);
|
|
814
|
+
break;
|
|
815
|
+
}
|
|
816
|
+
|
|
817
|
+
console.log(
|
|
818
|
+
`οΏ½ Processing chunk ${chunkNumber}: ${pdfRecords.length} PDF records`,
|
|
819
|
+
);
|
|
820
|
+
|
|
821
|
+
// Create progress bar for this chunk
|
|
822
|
+
const progressBar = new cliProgress.SingleBar({
|
|
823
|
+
format: `π Chunk ${chunkNumber} |{bar}| {percentage}% | {value}/{total} | Detected: {detected} | Errors: {errors}`,
|
|
824
|
+
barCompleteChar: 'β',
|
|
825
|
+
barIncompleteChar: 'β',
|
|
826
|
+
hideCursor: true,
|
|
827
|
+
});
|
|
828
|
+
|
|
829
|
+
progressBar.start(pdfRecords.length, 0, { detected: 0, errors: 0 });
|
|
830
|
+
|
|
831
|
+
let chunkDetected = 0;
|
|
832
|
+
let chunkProcessed = 0;
|
|
833
|
+
let chunkErrors = 0;
|
|
834
|
+
|
|
835
|
+
// Process files in smaller batches within this chunk
|
|
836
|
+
for (let i = 0; i < pdfRecords.length; i += processingBatchSize) {
|
|
837
|
+
const batch = pdfRecords.slice(i, i + processingBatchSize);
|
|
838
|
+
const updatePromises = [];
|
|
839
|
+
|
|
840
|
+
for (const record of batch) {
|
|
841
|
+
try {
|
|
842
|
+
// Check if file still exists
|
|
843
|
+
if (!fs.existsSync(record.original_path)) {
|
|
844
|
+
updatePromises.push(
|
|
845
|
+
supabase
|
|
846
|
+
.from('uploader')
|
|
847
|
+
.update({
|
|
848
|
+
status: 'file-not-found',
|
|
849
|
+
message: 'File no longer exists at original path',
|
|
850
|
+
})
|
|
851
|
+
.eq('id', record.id),
|
|
852
|
+
);
|
|
853
|
+
chunkErrors++;
|
|
854
|
+
totalErrors++;
|
|
855
|
+
continue;
|
|
856
|
+
}
|
|
857
|
+
|
|
858
|
+
// Perform detection
|
|
859
|
+
const detection = await detectionService.detectFile(
|
|
860
|
+
record.original_path,
|
|
861
|
+
);
|
|
862
|
+
chunkProcessed++;
|
|
863
|
+
totalProcessed++;
|
|
864
|
+
|
|
865
|
+
const updateData = {
|
|
866
|
+
status: detection.detectedType ? 'detected' : 'not-detected',
|
|
867
|
+
document_type: detection.detectedType,
|
|
868
|
+
num_pedimento: detection.detectedPedimento,
|
|
869
|
+
arela_path: detection.arelaPath,
|
|
870
|
+
message: detection.error || null,
|
|
871
|
+
};
|
|
872
|
+
|
|
873
|
+
// Extract RFC from fields if available
|
|
874
|
+
if (detection.fields) {
|
|
875
|
+
const rfcField = detection.fields.find(
|
|
876
|
+
(f) => f.name === 'rfc' && f.found,
|
|
877
|
+
);
|
|
878
|
+
if (rfcField) {
|
|
879
|
+
updateData.rfc = rfcField.value;
|
|
880
|
+
}
|
|
881
|
+
}
|
|
882
|
+
|
|
883
|
+
if (detection.detectedType) {
|
|
884
|
+
chunkDetected++;
|
|
885
|
+
totalDetected++;
|
|
886
|
+
}
|
|
887
|
+
|
|
888
|
+
updatePromises.push(
|
|
889
|
+
supabase.from('uploader').update(updateData).eq('id', record.id),
|
|
890
|
+
);
|
|
891
|
+
} catch (error) {
|
|
892
|
+
console.error(
|
|
893
|
+
`β Error detecting ${record.filename}:`,
|
|
894
|
+
error.message,
|
|
895
|
+
);
|
|
896
|
+
chunkErrors++;
|
|
897
|
+
totalErrors++;
|
|
898
|
+
|
|
899
|
+
updatePromises.push(
|
|
900
|
+
supabase
|
|
901
|
+
.from('uploader')
|
|
902
|
+
.update({
|
|
903
|
+
status: 'detection-error',
|
|
904
|
+
message: error.message,
|
|
905
|
+
})
|
|
906
|
+
.eq('id', record.id),
|
|
907
|
+
);
|
|
908
|
+
}
|
|
909
|
+
}
|
|
910
|
+
|
|
911
|
+
// Execute all updates in parallel for this batch
|
|
912
|
+
try {
|
|
913
|
+
await Promise.all(updatePromises);
|
|
914
|
+
} catch (error) {
|
|
915
|
+
console.error(
|
|
916
|
+
`β Error updating batch in chunk ${chunkNumber}:`,
|
|
917
|
+
error.message,
|
|
918
|
+
);
|
|
919
|
+
}
|
|
920
|
+
|
|
921
|
+
// Update progress for this chunk
|
|
922
|
+
progressBar.update(Math.min(i + processingBatchSize, pdfRecords.length), {
|
|
923
|
+
detected: chunkDetected,
|
|
924
|
+
errors: chunkErrors,
|
|
925
|
+
});
|
|
926
|
+
}
|
|
927
|
+
|
|
928
|
+
progressBar.stop();
|
|
929
|
+
|
|
930
|
+
console.log(
|
|
931
|
+
`β
Chunk ${chunkNumber} completed: ${chunkDetected} detected, ${chunkProcessed} processed, ${chunkErrors} errors`,
|
|
932
|
+
);
|
|
933
|
+
|
|
934
|
+
// Move to next chunk
|
|
935
|
+
offset += queryBatchSize;
|
|
936
|
+
chunkNumber++;
|
|
937
|
+
|
|
938
|
+
// If we got fewer records than queryBatchSize, we've reached the end
|
|
939
|
+
if (pdfRecords.length < queryBatchSize) {
|
|
940
|
+
console.log(
|
|
941
|
+
`π Reached end of records (chunk had ${pdfRecords.length} records).`,
|
|
942
|
+
);
|
|
943
|
+
break;
|
|
944
|
+
}
|
|
945
|
+
|
|
946
|
+
// Small delay between chunks to avoid overwhelming the database
|
|
947
|
+
await new Promise((resolve) => setTimeout(resolve, 500));
|
|
948
|
+
}
|
|
949
|
+
|
|
950
|
+
console.log(
|
|
951
|
+
`π Phase 2 Summary: ${totalDetected} detected, ${totalProcessed} processed, ${totalErrors} errors`,
|
|
952
|
+
);
|
|
953
|
+
return {
|
|
954
|
+
detectedCount: totalDetected,
|
|
955
|
+
processedCount: totalProcessed,
|
|
956
|
+
errorCount: totalErrors,
|
|
957
|
+
};
|
|
958
|
+
};
|
|
959
|
+
|
|
514
960
|
const processFilesInBatches = async (
|
|
515
961
|
files,
|
|
516
962
|
batchSize,
|
|
@@ -528,7 +974,7 @@ const processFilesInBatches = async (
|
|
|
528
974
|
|
|
529
975
|
const messageBuffer = [];
|
|
530
976
|
|
|
531
|
-
const progressBarFormat = options.statsOnly
|
|
977
|
+
const progressBarFormat = options.statsOnly
|
|
532
978
|
? 'π Processing [{bar}] {percentage}% | {value}/{total} files | Stats: {successCount} | Errors: {failureCount} | Duplicates: {skippedCount}'
|
|
533
979
|
: 'π Processing [{bar}] {percentage}% | {value}/{total} files | Success: {successCount} | Errors: {failureCount} | Skipped: {skippedCount}';
|
|
534
980
|
|
|
@@ -546,98 +992,67 @@ const processFilesInBatches = async (
|
|
|
546
992
|
});
|
|
547
993
|
|
|
548
994
|
if (options.statsOnly) {
|
|
549
|
-
// Stats-only mode -
|
|
550
|
-
console.log(
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
let totalUnsupported = 0;
|
|
555
|
-
let totalDetectionErrors = 0;
|
|
556
|
-
|
|
995
|
+
// OPTIMIZED Stats-only mode - Only read filesystem stats, no file detection
|
|
996
|
+
console.log(
|
|
997
|
+
'π Phase 1: Processing files in optimized stats-only mode (no detection)...',
|
|
998
|
+
);
|
|
999
|
+
|
|
557
1000
|
for (let i = 0; i < files.length; i += batchSize) {
|
|
558
1001
|
const batch = files.slice(i, i + batchSize);
|
|
559
|
-
|
|
560
|
-
const statsFiles = batch.map((file) => {
|
|
561
|
-
const originalFileName = path.basename(file);
|
|
562
|
-
|
|
563
|
-
return {
|
|
564
|
-
path: file,
|
|
565
|
-
originalName: originalFileName,
|
|
566
|
-
};
|
|
567
|
-
});
|
|
568
1002
|
|
|
569
|
-
|
|
570
|
-
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
|
|
574
|
-
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
totalDetected++;
|
|
582
|
-
break;
|
|
583
|
-
case 'not-detected':
|
|
584
|
-
totalNotDetected++;
|
|
585
|
-
break;
|
|
586
|
-
case 'unsupported':
|
|
587
|
-
totalUnsupported++;
|
|
588
|
-
break;
|
|
589
|
-
case 'detection-error':
|
|
590
|
-
totalDetectionErrors++;
|
|
591
|
-
break;
|
|
592
|
-
}
|
|
593
|
-
});
|
|
594
|
-
|
|
595
|
-
statsFiles.forEach((file) => {
|
|
596
|
-
const wasInserted = insertedRecords.some(record =>
|
|
597
|
-
record.original_path === (options.clientPath || file.path)
|
|
598
|
-
);
|
|
599
|
-
if (wasInserted) {
|
|
600
|
-
writeLog(`STATS: ${file.path} -> uploader table`);
|
|
601
|
-
} else {
|
|
602
|
-
writeLog(`DUPLICATE: ${file.path} -> already exists in uploader table`);
|
|
603
|
-
}
|
|
1003
|
+
// OPTIMIZED: Batch read file stats to reduce I/O overhead
|
|
1004
|
+
const fileStatsResults = batchReadFileStats(batch);
|
|
1005
|
+
const statsFiles = fileStatsResults
|
|
1006
|
+
.filter((result) => result.stats !== null) // Only include files with valid stats
|
|
1007
|
+
.map((result) => {
|
|
1008
|
+
const originalFileName = path.basename(result.path);
|
|
1009
|
+
|
|
1010
|
+
return {
|
|
1011
|
+
path: result.path,
|
|
1012
|
+
originalName: originalFileName,
|
|
1013
|
+
stats: result.stats, // Pass pre-computed stats to avoid redundant calls
|
|
1014
|
+
};
|
|
604
1015
|
});
|
|
605
|
-
|
|
606
|
-
|
|
607
|
-
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
|
|
615
|
-
|
|
616
|
-
} catch (error) {
|
|
617
|
-
totalErrors += statsFiles.length;
|
|
618
|
-
statsFiles.forEach((file) => {
|
|
619
|
-
writeLog(`ERROR: ${file.path}: ${error.message}`);
|
|
620
|
-
messageBuffer.push(`β ${file.originalName}: ${error.message}`);
|
|
1016
|
+
|
|
1017
|
+
// Log any files that couldn't be read
|
|
1018
|
+
const failedFiles = fileStatsResults.filter(
|
|
1019
|
+
(result) => result.error !== null,
|
|
1020
|
+
);
|
|
1021
|
+
if (failedFiles.length > 0) {
|
|
1022
|
+
console.log(
|
|
1023
|
+
`β οΈ Could not read stats for ${failedFiles.length} files in batch`,
|
|
1024
|
+
);
|
|
1025
|
+
failedFiles.forEach((failed) => {
|
|
1026
|
+
console.error(` β ${failed.path}: ${failed.error}`);
|
|
621
1027
|
});
|
|
622
1028
|
}
|
|
623
1029
|
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
|
|
1030
|
+
try {
|
|
1031
|
+
const result = await insertStatsOnlyToUploaderTable(
|
|
1032
|
+
statsFiles,
|
|
1033
|
+
options,
|
|
1034
|
+
);
|
|
629
1035
|
|
|
630
|
-
|
|
631
|
-
|
|
1036
|
+
totalUploaded += result.totalInserted;
|
|
1037
|
+
totalSkipped += result.totalSkipped;
|
|
1038
|
+
totalErrors += failedFiles.length; // Count failed file reads as errors
|
|
1039
|
+
|
|
1040
|
+
progressBar.update(Math.min(i + batch.length, files.length), {
|
|
1041
|
+
successCount: totalUploaded,
|
|
1042
|
+
failureCount: totalErrors,
|
|
1043
|
+
skippedCount: totalSkipped,
|
|
1044
|
+
});
|
|
1045
|
+
} catch (error) {
|
|
1046
|
+
console.error(`β Error processing stats batch:`, error.message);
|
|
1047
|
+
totalErrors += batch.length;
|
|
1048
|
+
|
|
1049
|
+
progressBar.update(Math.min(i + batch.length, files.length), {
|
|
1050
|
+
successCount: totalUploaded,
|
|
1051
|
+
failureCount: totalErrors,
|
|
1052
|
+
skippedCount: totalSkipped,
|
|
1053
|
+
});
|
|
632
1054
|
}
|
|
633
1055
|
}
|
|
634
|
-
|
|
635
|
-
// Store detection stats for summary
|
|
636
|
-
totalDetected = totalDetected || 0;
|
|
637
|
-
totalNotDetected = totalNotDetected || 0;
|
|
638
|
-
totalUnsupported = totalUnsupported || 0;
|
|
639
|
-
totalDetectionErrors = totalDetectionErrors || 0;
|
|
640
|
-
|
|
641
1056
|
} else if (apiMode && !options.forceSupabase) {
|
|
642
1057
|
// API Mode - Process in batches
|
|
643
1058
|
for (let i = 0; i < files.length; i += batchSize) {
|
|
@@ -661,7 +1076,8 @@ const processFilesInBatches = async (
|
|
|
661
1076
|
|
|
662
1077
|
// Handle combined folder structure + auto-detection
|
|
663
1078
|
if (options.folderStructure && options.autoDetectStructure) {
|
|
664
|
-
|
|
1079
|
+
// OPTIMIZED: Use cached detection to avoid redundant parsing
|
|
1080
|
+
const detection = getCachedPathDetection(file, basePath);
|
|
665
1081
|
if (detection.detected) {
|
|
666
1082
|
const autoStructure = `${detection.year}/${detection.pedimento}`;
|
|
667
1083
|
const combinedStructure = `${options.folderStructure}/${autoStructure}`;
|
|
@@ -669,7 +1085,7 @@ const processFilesInBatches = async (
|
|
|
669
1085
|
combinedStructure,
|
|
670
1086
|
sanitizedFileName,
|
|
671
1087
|
);
|
|
672
|
-
|
|
1088
|
+
logVerbose(
|
|
673
1089
|
`π Combined structure: ${options.folderStructure}/${autoStructure} for ${originalFileName} -> ${uploadPath}`,
|
|
674
1090
|
);
|
|
675
1091
|
} else {
|
|
@@ -678,7 +1094,7 @@ const processFilesInBatches = async (
|
|
|
678
1094
|
options.folderStructure,
|
|
679
1095
|
sanitizedFileName,
|
|
680
1096
|
);
|
|
681
|
-
|
|
1097
|
+
logVerbose(
|
|
682
1098
|
`π Custom structure (auto-detection failed): ${uploadPath}`,
|
|
683
1099
|
);
|
|
684
1100
|
}
|
|
@@ -688,10 +1104,10 @@ const processFilesInBatches = async (
|
|
|
688
1104
|
options.folderStructure,
|
|
689
1105
|
sanitizedFileName,
|
|
690
1106
|
);
|
|
691
|
-
|
|
1107
|
+
logVerbose(`π Custom structure: ${uploadPath}`);
|
|
692
1108
|
} else if (options.autoDetectStructure) {
|
|
693
|
-
// Auto-detect structure from path if enabled
|
|
694
|
-
const detection =
|
|
1109
|
+
// Auto-detect structure from path if enabled - OPTIMIZED: Use cached detection
|
|
1110
|
+
const detection = getCachedPathDetection(file, basePath);
|
|
695
1111
|
if (detection.detected) {
|
|
696
1112
|
const autoStructure = `${detection.year}/${detection.pedimento}`;
|
|
697
1113
|
uploadPath = path.posix.join(autoStructure, sanitizedFileName);
|
|
@@ -737,10 +1153,8 @@ const processFilesInBatches = async (
|
|
|
737
1153
|
|
|
738
1154
|
if (!clientPath && apiFiles.length > 0) {
|
|
739
1155
|
const firstFile = apiFiles[0];
|
|
740
|
-
|
|
741
|
-
|
|
742
|
-
basePath,
|
|
743
|
-
);
|
|
1156
|
+
// OPTIMIZED: Use cached detection to avoid redundant parsing
|
|
1157
|
+
const detection = getCachedPathDetection(firstFile.path, basePath);
|
|
744
1158
|
if (detection.detected) {
|
|
745
1159
|
// clientPath = `${detection.year}/${detection.pedimento}/`;
|
|
746
1160
|
clientPath = path
|
|
@@ -796,7 +1210,7 @@ const processFilesInBatches = async (
|
|
|
796
1210
|
});
|
|
797
1211
|
|
|
798
1212
|
if (i + batchSize < files.length) {
|
|
799
|
-
await new Promise((resolve) => setTimeout(resolve,
|
|
1213
|
+
await new Promise((resolve) => setTimeout(resolve, BATCH_DELAY));
|
|
800
1214
|
}
|
|
801
1215
|
}
|
|
802
1216
|
} else {
|
|
@@ -809,7 +1223,7 @@ const processFilesInBatches = async (
|
|
|
809
1223
|
|
|
810
1224
|
// Handle combined folder structure + auto-detection
|
|
811
1225
|
if (options.folderStructure && options.autoDetectStructure) {
|
|
812
|
-
const detection =
|
|
1226
|
+
const detection = getCachedPathDetection(file, basePath);
|
|
813
1227
|
if (detection.detected) {
|
|
814
1228
|
const autoStructure = `${detection.year}/${detection.pedimento}`;
|
|
815
1229
|
const combinedStructure = `${options.folderStructure}/${autoStructure}`;
|
|
@@ -832,8 +1246,8 @@ const processFilesInBatches = async (
|
|
|
832
1246
|
uploadPath = path.join(options.folderStructure, fileName);
|
|
833
1247
|
console.log(`π Custom structure: ${uploadPath}`);
|
|
834
1248
|
} else if (options.autoDetectStructure) {
|
|
835
|
-
// Auto-detect structure from path if enabled
|
|
836
|
-
const detection =
|
|
1249
|
+
// Auto-detect structure from path if enabled - OPTIMIZED: Use cached detection
|
|
1250
|
+
const detection = getCachedPathDetection(file, basePath);
|
|
837
1251
|
if (detection.detected) {
|
|
838
1252
|
const autoStructure = `${detection.year}/${detection.pedimento}`;
|
|
839
1253
|
const fileName = path.basename(file);
|
|
@@ -899,13 +1313,19 @@ const uploadFilesByRfc = async (options = {}) => {
|
|
|
899
1313
|
}
|
|
900
1314
|
|
|
901
1315
|
if (!API_BASE_URL || !API_TOKEN) {
|
|
902
|
-
console.error(
|
|
1316
|
+
console.error(
|
|
1317
|
+
'β Arela API configuration missing. Please set ARELA_API_URL and ARELA_API_TOKEN environment variables.',
|
|
1318
|
+
);
|
|
903
1319
|
process.exit(1);
|
|
904
1320
|
}
|
|
905
1321
|
|
|
906
1322
|
if (!uploadRfcs || uploadRfcs.length === 0) {
|
|
907
|
-
console.error(
|
|
908
|
-
|
|
1323
|
+
console.error(
|
|
1324
|
+
'β No RFCs specified. Please set UPLOAD_RFCS environment variable with pipe-separated RFC values.',
|
|
1325
|
+
);
|
|
1326
|
+
console.error(
|
|
1327
|
+
' Example: UPLOAD_RFCS="RFC123456789|RFC987654321|RFC555444333"',
|
|
1328
|
+
);
|
|
909
1329
|
process.exit(1);
|
|
910
1330
|
}
|
|
911
1331
|
|
|
@@ -927,13 +1347,17 @@ const uploadFilesByRfc = async (options = {}) => {
|
|
|
927
1347
|
|
|
928
1348
|
if (!rfcRecords || rfcRecords.length === 0) {
|
|
929
1349
|
console.log('βΉοΈ No files found for the specified RFCs with arela_path');
|
|
930
|
-
console.log(
|
|
1350
|
+
console.log(
|
|
1351
|
+
` Make sure files for RFCs [${uploadRfcs.join(', ')}] have been processed and have arela_path values`,
|
|
1352
|
+
);
|
|
931
1353
|
return { processedCount: 0, uploadedCount: 0, errorCount: 0 };
|
|
932
1354
|
}
|
|
933
1355
|
|
|
934
1356
|
// Step 2: Get unique arela_paths from the RFC matches
|
|
935
|
-
const uniqueArelaPaths = [...new Set(rfcRecords.map(r => r.arela_path))];
|
|
936
|
-
console.log(
|
|
1357
|
+
const uniqueArelaPaths = [...new Set(rfcRecords.map((r) => r.arela_path))];
|
|
1358
|
+
console.log(
|
|
1359
|
+
`οΏ½ Found ${uniqueArelaPaths.length} unique arela_path(s) for the specified RFCs`,
|
|
1360
|
+
);
|
|
937
1361
|
|
|
938
1362
|
// Step 3: Get ALL files that have these arela_paths (including supporting documents)
|
|
939
1363
|
// Use pagination to ensure we get all files, regardless of count
|
|
@@ -943,7 +1367,7 @@ const uploadFilesByRfc = async (options = {}) => {
|
|
|
943
1367
|
const queryBatchSize = 1000;
|
|
944
1368
|
|
|
945
1369
|
console.log('π₯ Fetching all related files (with pagination)...');
|
|
946
|
-
|
|
1370
|
+
|
|
947
1371
|
while (hasMore) {
|
|
948
1372
|
const { data: batch, error: queryError } = await supabase
|
|
949
1373
|
.from('uploader')
|
|
@@ -962,7 +1386,7 @@ const uploadFilesByRfc = async (options = {}) => {
|
|
|
962
1386
|
} else {
|
|
963
1387
|
allRelatedFiles = allRelatedFiles.concat(batch);
|
|
964
1388
|
offset += queryBatchSize;
|
|
965
|
-
|
|
1389
|
+
|
|
966
1390
|
// If we got less than queryBatchSize, we've reached the end
|
|
967
1391
|
if (batch.length < queryBatchSize) {
|
|
968
1392
|
hasMore = false;
|
|
@@ -975,8 +1399,10 @@ const uploadFilesByRfc = async (options = {}) => {
|
|
|
975
1399
|
return { processedCount: 0, uploadedCount: 0, errorCount: 0 };
|
|
976
1400
|
}
|
|
977
1401
|
|
|
978
|
-
console.log(
|
|
979
|
-
|
|
1402
|
+
console.log(
|
|
1403
|
+
`π Found ${allRelatedFiles.length} total files to upload (including supporting documents)`,
|
|
1404
|
+
);
|
|
1405
|
+
|
|
980
1406
|
// Group by RFC and arela_path for better organization
|
|
981
1407
|
const filesByRfc = allRelatedFiles.reduce((acc, record) => {
|
|
982
1408
|
const rfc = record.rfc || 'No RFC';
|
|
@@ -989,8 +1415,12 @@ const uploadFilesByRfc = async (options = {}) => {
|
|
|
989
1415
|
|
|
990
1416
|
console.log('π Files by RFC (including supporting documents):');
|
|
991
1417
|
for (const [rfc, files] of Object.entries(filesByRfc)) {
|
|
992
|
-
const documentTypes = [
|
|
993
|
-
|
|
1418
|
+
const documentTypes = [
|
|
1419
|
+
...new Set(files.map((f) => f.document_type || 'Unknown')),
|
|
1420
|
+
];
|
|
1421
|
+
console.log(
|
|
1422
|
+
` ${rfc}: ${files.length} files (${documentTypes.join(', ')})`,
|
|
1423
|
+
);
|
|
994
1424
|
}
|
|
995
1425
|
|
|
996
1426
|
// Group by arela_path for upload organization
|
|
@@ -1015,7 +1445,8 @@ const uploadFilesByRfc = async (options = {}) => {
|
|
|
1015
1445
|
|
|
1016
1446
|
// Create progress bar
|
|
1017
1447
|
const progressBar = new cliProgress.SingleBar({
|
|
1018
|
-
format:
|
|
1448
|
+
format:
|
|
1449
|
+
'π Uploading files |{bar}| {percentage}% | {value}/{total} | Uploaded: {uploaded} | Errors: {errors} | Skipped: {skipped}',
|
|
1019
1450
|
barCompleteChar: 'β',
|
|
1020
1451
|
barIncompleteChar: 'β',
|
|
1021
1452
|
hideCursor: true,
|
|
@@ -1037,18 +1468,20 @@ const uploadFilesByRfc = async (options = {}) => {
|
|
|
1037
1468
|
const batch = allRelatedFiles.slice(i, i + batchSize);
|
|
1038
1469
|
const batchNumber = Math.floor(i / batchSize) + 1;
|
|
1039
1470
|
const totalBatches = Math.ceil(allRelatedFiles.length / batchSize);
|
|
1040
|
-
|
|
1041
|
-
console.log(
|
|
1471
|
+
|
|
1472
|
+
console.log(
|
|
1473
|
+
`\nπ¦ Processing batch ${batchNumber}/${totalBatches} (${batch.length} files)`,
|
|
1474
|
+
);
|
|
1042
1475
|
|
|
1043
1476
|
// Prepare files for upload
|
|
1044
1477
|
const filesToUpload = [];
|
|
1045
|
-
|
|
1478
|
+
|
|
1046
1479
|
for (const record of batch) {
|
|
1047
1480
|
totalProcessed++;
|
|
1048
|
-
|
|
1481
|
+
|
|
1049
1482
|
try {
|
|
1050
1483
|
const originalPath = record.original_path;
|
|
1051
|
-
|
|
1484
|
+
|
|
1052
1485
|
// Check if file exists
|
|
1053
1486
|
if (!fs.existsSync(originalPath)) {
|
|
1054
1487
|
console.log(` β οΈ File not found: ${originalPath}`);
|
|
@@ -1056,24 +1489,26 @@ const uploadFilesByRfc = async (options = {}) => {
|
|
|
1056
1489
|
continue;
|
|
1057
1490
|
}
|
|
1058
1491
|
|
|
1059
|
-
|
|
1492
|
+
// OPTIMIZED: Read file and get size from buffer instead of separate fs.statSync call
|
|
1060
1493
|
const fileBuffer = fs.readFileSync(originalPath);
|
|
1061
|
-
|
|
1494
|
+
|
|
1062
1495
|
filesToUpload.push({
|
|
1063
1496
|
path: originalPath,
|
|
1064
1497
|
buffer: fileBuffer,
|
|
1065
|
-
size:
|
|
1498
|
+
size: fileBuffer.length, // Get size from buffer instead of fs.statSync
|
|
1066
1499
|
name: record.filename,
|
|
1067
1500
|
arelaPath: record.arela_path,
|
|
1068
1501
|
rfc: record.rfc,
|
|
1069
1502
|
documentType: record.document_type,
|
|
1070
1503
|
});
|
|
1071
|
-
|
|
1072
1504
|
} catch (error) {
|
|
1073
|
-
console.error(
|
|
1505
|
+
console.error(
|
|
1506
|
+
` β Error reading file ${record.original_path}:`,
|
|
1507
|
+
error.message,
|
|
1508
|
+
);
|
|
1074
1509
|
totalErrors++;
|
|
1075
1510
|
}
|
|
1076
|
-
|
|
1511
|
+
|
|
1077
1512
|
if (options.showProgress !== false) {
|
|
1078
1513
|
progressBar.update(totalProcessed, {
|
|
1079
1514
|
uploaded: totalUploaded,
|
|
@@ -1086,10 +1521,12 @@ const uploadFilesByRfc = async (options = {}) => {
|
|
|
1086
1521
|
// Upload the batch if we have files
|
|
1087
1522
|
if (filesToUpload.length > 0) {
|
|
1088
1523
|
try {
|
|
1089
|
-
console.log(
|
|
1090
|
-
|
|
1524
|
+
console.log(
|
|
1525
|
+
` π Uploading ${filesToUpload.length} files to Arela API...`,
|
|
1526
|
+
);
|
|
1527
|
+
|
|
1091
1528
|
const formData = new FormData();
|
|
1092
|
-
|
|
1529
|
+
|
|
1093
1530
|
// Add files to form data
|
|
1094
1531
|
filesToUpload.forEach((file, index) => {
|
|
1095
1532
|
formData.append(`files`, file.buffer, {
|
|
@@ -1112,7 +1549,7 @@ const uploadFilesByRfc = async (options = {}) => {
|
|
|
1112
1549
|
// Upload each group separately with its folder structure
|
|
1113
1550
|
for (const [arelaPath, pathFiles] of Object.entries(filesByPath)) {
|
|
1114
1551
|
const pathFormData = new FormData();
|
|
1115
|
-
|
|
1552
|
+
|
|
1116
1553
|
pathFiles.forEach((file) => {
|
|
1117
1554
|
pathFormData.append('files', file.buffer, {
|
|
1118
1555
|
filename: file.name,
|
|
@@ -1121,8 +1558,10 @@ const uploadFilesByRfc = async (options = {}) => {
|
|
|
1121
1558
|
});
|
|
1122
1559
|
|
|
1123
1560
|
// Set folder structure for this group - concatenate custom prefix with arela_path
|
|
1124
|
-
const folderStructure = options.folderStructure
|
|
1125
|
-
? `${options.folderStructure}/${arelaPath}
|
|
1561
|
+
const folderStructure = options.folderStructure
|
|
1562
|
+
? `${options.folderStructure}/${arelaPath}`
|
|
1563
|
+
.replace(/\/+/g, '/')
|
|
1564
|
+
.replace(/\/$/, '')
|
|
1126
1565
|
: arelaPath;
|
|
1127
1566
|
pathFormData.append('folderStructure', folderStructure);
|
|
1128
1567
|
pathFormData.append('autoDetect', 'true');
|
|
@@ -1133,15 +1572,20 @@ const uploadFilesByRfc = async (options = {}) => {
|
|
|
1133
1572
|
pathFormData.append('bucket', bucket);
|
|
1134
1573
|
}
|
|
1135
1574
|
|
|
1136
|
-
console.log(
|
|
1575
|
+
console.log(
|
|
1576
|
+
` π Uploading ${pathFiles.length} files to: ${folderStructure}`,
|
|
1577
|
+
);
|
|
1137
1578
|
|
|
1138
|
-
const response = await fetch(
|
|
1139
|
-
|
|
1140
|
-
|
|
1141
|
-
'
|
|
1579
|
+
const response = await fetch(
|
|
1580
|
+
`${API_BASE_URL}/api/storage/batch-upload-and-process`,
|
|
1581
|
+
{
|
|
1582
|
+
method: 'POST',
|
|
1583
|
+
headers: {
|
|
1584
|
+
'x-api-key': API_TOKEN,
|
|
1585
|
+
},
|
|
1586
|
+
body: pathFormData,
|
|
1142
1587
|
},
|
|
1143
|
-
|
|
1144
|
-
});
|
|
1588
|
+
);
|
|
1145
1589
|
|
|
1146
1590
|
if (!response.ok) {
|
|
1147
1591
|
const errorText = await response.text();
|
|
@@ -1149,24 +1593,33 @@ const uploadFilesByRfc = async (options = {}) => {
|
|
|
1149
1593
|
}
|
|
1150
1594
|
|
|
1151
1595
|
const result = await response.json();
|
|
1152
|
-
|
|
1596
|
+
|
|
1153
1597
|
// Check if upload was successful based on stats rather than success field
|
|
1154
|
-
const isSuccessful =
|
|
1155
|
-
|
|
1598
|
+
const isSuccessful =
|
|
1599
|
+
result.stats &&
|
|
1600
|
+
result.stats.uploadedCount > 0 &&
|
|
1601
|
+
result.stats.errorCount === 0;
|
|
1602
|
+
|
|
1156
1603
|
if (isSuccessful) {
|
|
1157
|
-
console.log(
|
|
1604
|
+
console.log(
|
|
1605
|
+
` β
Group uploaded: ${result.stats.uploadedCount} files to ${folderStructure}`,
|
|
1606
|
+
);
|
|
1158
1607
|
totalUploaded += result.stats.uploadedCount;
|
|
1159
|
-
|
|
1608
|
+
|
|
1160
1609
|
if (result.stats.detectedCount > 0) {
|
|
1161
|
-
console.log(
|
|
1610
|
+
console.log(
|
|
1611
|
+
` π Files detected: ${result.stats.detectedCount}`,
|
|
1612
|
+
);
|
|
1162
1613
|
}
|
|
1163
1614
|
if (result.stats.organizedCount > 0) {
|
|
1164
|
-
console.log(
|
|
1615
|
+
console.log(
|
|
1616
|
+
` π Files organized: ${result.stats.organizedCount}`,
|
|
1617
|
+
);
|
|
1165
1618
|
}
|
|
1166
1619
|
} else {
|
|
1167
1620
|
console.error(` β Upload failed for ${folderStructure}:`);
|
|
1168
1621
|
if (result.errors && result.errors.length > 0) {
|
|
1169
|
-
result.errors.forEach(error => {
|
|
1622
|
+
result.errors.forEach((error) => {
|
|
1170
1623
|
console.error(` - ${error.fileName}: ${error.error}`);
|
|
1171
1624
|
});
|
|
1172
1625
|
}
|
|
@@ -1174,18 +1627,20 @@ const uploadFilesByRfc = async (options = {}) => {
|
|
|
1174
1627
|
}
|
|
1175
1628
|
|
|
1176
1629
|
// Small delay between path groups
|
|
1177
|
-
await new Promise(resolve => setTimeout(resolve, 100));
|
|
1630
|
+
await new Promise((resolve) => setTimeout(resolve, 100));
|
|
1178
1631
|
}
|
|
1179
|
-
|
|
1180
1632
|
} catch (error) {
|
|
1181
|
-
console.error(
|
|
1633
|
+
console.error(
|
|
1634
|
+
` β Error uploading batch ${batchNumber}:`,
|
|
1635
|
+
error.message,
|
|
1636
|
+
);
|
|
1182
1637
|
totalErrors += filesToUpload.length;
|
|
1183
1638
|
}
|
|
1184
1639
|
}
|
|
1185
1640
|
|
|
1186
1641
|
// Small delay between batches
|
|
1187
1642
|
if (i + batchSize < allRelatedFiles.length) {
|
|
1188
|
-
await new Promise(resolve => setTimeout(resolve,
|
|
1643
|
+
await new Promise((resolve) => setTimeout(resolve, BATCH_DELAY));
|
|
1189
1644
|
}
|
|
1190
1645
|
}
|
|
1191
1646
|
|
|
@@ -1229,7 +1684,10 @@ const propagateArelaPath = async (options = {}) => {
|
|
|
1229
1684
|
.not('arela_path', 'is', null);
|
|
1230
1685
|
|
|
1231
1686
|
if (pedimentoError) {
|
|
1232
|
-
console.error(
|
|
1687
|
+
console.error(
|
|
1688
|
+
'β Error fetching pedimento records:',
|
|
1689
|
+
pedimentoError.message,
|
|
1690
|
+
);
|
|
1233
1691
|
return { processedCount: 0, updatedCount: 0, errorCount: 1 };
|
|
1234
1692
|
}
|
|
1235
1693
|
|
|
@@ -1238,7 +1696,9 @@ const propagateArelaPath = async (options = {}) => {
|
|
|
1238
1696
|
return { processedCount: 0, updatedCount: 0, errorCount: 0 };
|
|
1239
1697
|
}
|
|
1240
1698
|
|
|
1241
|
-
console.log(
|
|
1699
|
+
console.log(
|
|
1700
|
+
`π Found ${pedimentoRecords.length} pedimento records with arela_path`,
|
|
1701
|
+
);
|
|
1242
1702
|
|
|
1243
1703
|
let totalProcessed = 0;
|
|
1244
1704
|
let totalUpdated = 0;
|
|
@@ -1246,7 +1706,8 @@ const propagateArelaPath = async (options = {}) => {
|
|
|
1246
1706
|
|
|
1247
1707
|
// Create progress bar
|
|
1248
1708
|
const progressBar = new cliProgress.SingleBar({
|
|
1249
|
-
format:
|
|
1709
|
+
format:
|
|
1710
|
+
'π Propagating paths |{bar}| {percentage}% | {value}/{total} | Updated: {updated} | Errors: {errors}',
|
|
1250
1711
|
barCompleteChar: 'β',
|
|
1251
1712
|
barIncompleteChar: 'β',
|
|
1252
1713
|
hideCursor: true,
|
|
@@ -1263,19 +1724,21 @@ const propagateArelaPath = async (options = {}) => {
|
|
|
1263
1724
|
for (const pedimento of pedimentoRecords) {
|
|
1264
1725
|
try {
|
|
1265
1726
|
totalProcessed++;
|
|
1266
|
-
|
|
1727
|
+
|
|
1267
1728
|
// Extract base path from original_path (remove filename)
|
|
1268
1729
|
const basePath = path.dirname(pedimento.original_path);
|
|
1269
|
-
|
|
1730
|
+
|
|
1270
1731
|
console.log(`\nπ Processing: ${pedimento.filename}`);
|
|
1271
1732
|
console.log(` π Base path: ${basePath}`);
|
|
1272
|
-
|
|
1733
|
+
|
|
1273
1734
|
// Extract folder part from existing arela_path by removing the filename
|
|
1274
1735
|
const existingPath = pedimento.arela_path;
|
|
1275
|
-
const folderArelaPath = existingPath.includes('/')
|
|
1276
|
-
existingPath.substring(0, existingPath.lastIndexOf('/')) + '/'
|
|
1277
|
-
existingPath.endsWith('/')
|
|
1278
|
-
|
|
1736
|
+
const folderArelaPath = existingPath.includes('/')
|
|
1737
|
+
? existingPath.substring(0, existingPath.lastIndexOf('/')) + '/'
|
|
1738
|
+
: existingPath.endsWith('/')
|
|
1739
|
+
? existingPath
|
|
1740
|
+
: existingPath + '/';
|
|
1741
|
+
|
|
1279
1742
|
console.log(` π― Original arela path: ${existingPath}`);
|
|
1280
1743
|
console.log(` π Folder arela path: ${folderArelaPath}`);
|
|
1281
1744
|
|
|
@@ -1288,7 +1751,10 @@ const propagateArelaPath = async (options = {}) => {
|
|
|
1288
1751
|
.neq('id', pedimento.id); // Exclude the pedimento itself
|
|
1289
1752
|
|
|
1290
1753
|
if (relatedError) {
|
|
1291
|
-
console.error(
|
|
1754
|
+
console.error(
|
|
1755
|
+
`β Error finding related files for ${pedimento.filename}:`,
|
|
1756
|
+
relatedError.message,
|
|
1757
|
+
);
|
|
1292
1758
|
totalErrors++;
|
|
1293
1759
|
continue;
|
|
1294
1760
|
}
|
|
@@ -1298,32 +1764,38 @@ const propagateArelaPath = async (options = {}) => {
|
|
|
1298
1764
|
continue;
|
|
1299
1765
|
}
|
|
1300
1766
|
|
|
1301
|
-
console.log(
|
|
1302
|
-
|
|
1767
|
+
console.log(
|
|
1768
|
+
` π Found ${relatedFiles.length} related files to update:`,
|
|
1769
|
+
);
|
|
1770
|
+
|
|
1303
1771
|
// Show first 10 files, then indicate if there are more
|
|
1304
1772
|
const filesToShow = relatedFiles.slice(0, 10);
|
|
1305
|
-
filesToShow.forEach(file => {
|
|
1773
|
+
filesToShow.forEach((file) => {
|
|
1306
1774
|
console.log(` - ${file.filename}`);
|
|
1307
1775
|
});
|
|
1308
|
-
|
|
1776
|
+
|
|
1309
1777
|
if (relatedFiles.length > 10) {
|
|
1310
1778
|
console.log(` ... and ${relatedFiles.length - 10} more files`);
|
|
1311
1779
|
}
|
|
1312
1780
|
|
|
1313
1781
|
// Process files in batches to avoid URI length limitations
|
|
1314
1782
|
const BATCH_SIZE = 50; // Process 50 files at a time
|
|
1315
|
-
const fileIds = relatedFiles.map(f => f.id);
|
|
1783
|
+
const fileIds = relatedFiles.map((f) => f.id);
|
|
1316
1784
|
let batchErrors = 0;
|
|
1317
1785
|
let batchUpdated = 0;
|
|
1318
1786
|
|
|
1319
|
-
console.log(
|
|
1787
|
+
console.log(
|
|
1788
|
+
` π Processing ${relatedFiles.length} files in batches of ${BATCH_SIZE}...`,
|
|
1789
|
+
);
|
|
1320
1790
|
|
|
1321
1791
|
for (let i = 0; i < fileIds.length; i += BATCH_SIZE) {
|
|
1322
1792
|
const batchIds = fileIds.slice(i, i + BATCH_SIZE);
|
|
1323
1793
|
const batchNumber = Math.floor(i / BATCH_SIZE) + 1;
|
|
1324
1794
|
const totalBatches = Math.ceil(fileIds.length / BATCH_SIZE);
|
|
1325
|
-
|
|
1326
|
-
console.log(
|
|
1795
|
+
|
|
1796
|
+
console.log(
|
|
1797
|
+
` π¦ Batch ${batchNumber}/${totalBatches}: Updating ${batchIds.length} files...`,
|
|
1798
|
+
);
|
|
1327
1799
|
|
|
1328
1800
|
try {
|
|
1329
1801
|
const { error: updateError } = await supabase
|
|
@@ -1332,33 +1804,45 @@ const propagateArelaPath = async (options = {}) => {
|
|
|
1332
1804
|
.in('id', batchIds);
|
|
1333
1805
|
|
|
1334
1806
|
if (updateError) {
|
|
1335
|
-
console.error(
|
|
1807
|
+
console.error(
|
|
1808
|
+
` β Error in batch ${batchNumber}:`,
|
|
1809
|
+
updateError.message,
|
|
1810
|
+
);
|
|
1336
1811
|
batchErrors++;
|
|
1337
1812
|
} else {
|
|
1338
|
-
console.log(
|
|
1813
|
+
console.log(
|
|
1814
|
+
` β
Batch ${batchNumber} completed: ${batchIds.length} files updated`,
|
|
1815
|
+
);
|
|
1339
1816
|
batchUpdated += batchIds.length;
|
|
1340
1817
|
}
|
|
1341
1818
|
} catch (error) {
|
|
1342
|
-
console.error(
|
|
1819
|
+
console.error(
|
|
1820
|
+
` β Exception in batch ${batchNumber}:`,
|
|
1821
|
+
error.message,
|
|
1822
|
+
);
|
|
1343
1823
|
batchErrors++;
|
|
1344
1824
|
}
|
|
1345
1825
|
|
|
1346
1826
|
// Small delay between batches to avoid overwhelming the database
|
|
1347
1827
|
if (i + BATCH_SIZE < fileIds.length) {
|
|
1348
|
-
await new Promise(resolve => setTimeout(resolve, 100));
|
|
1828
|
+
await new Promise((resolve) => setTimeout(resolve, 100));
|
|
1349
1829
|
}
|
|
1350
1830
|
}
|
|
1351
1831
|
|
|
1352
1832
|
if (batchErrors > 0) {
|
|
1353
|
-
console.error(
|
|
1833
|
+
console.error(
|
|
1834
|
+
`β ${batchErrors} batch(es) failed for ${pedimento.filename}`,
|
|
1835
|
+
);
|
|
1354
1836
|
totalErrors++;
|
|
1355
1837
|
} else {
|
|
1356
1838
|
console.log(` π― Successfully updated ${batchUpdated} related files`);
|
|
1357
1839
|
totalUpdated += batchUpdated;
|
|
1358
1840
|
}
|
|
1359
|
-
|
|
1360
1841
|
} catch (error) {
|
|
1361
|
-
console.error(
|
|
1842
|
+
console.error(
|
|
1843
|
+
`β Error processing ${pedimento.filename}:`,
|
|
1844
|
+
error.message,
|
|
1845
|
+
);
|
|
1362
1846
|
totalErrors++;
|
|
1363
1847
|
}
|
|
1364
1848
|
|
|
@@ -1422,25 +1906,70 @@ program
|
|
|
1422
1906
|
'Automatically detect year/pedimento from file paths',
|
|
1423
1907
|
)
|
|
1424
1908
|
.option('--client-path <path>', 'Client path for metadata tracking')
|
|
1425
|
-
.option(
|
|
1909
|
+
.option(
|
|
1910
|
+
'--stats-only',
|
|
1911
|
+
'Phase 1: Only read filesystem stats and insert to database (no file reading or detection)',
|
|
1912
|
+
)
|
|
1426
1913
|
.option('--no-detect', 'Disable document type detection in stats-only mode')
|
|
1427
|
-
.option(
|
|
1428
|
-
|
|
1914
|
+
.option(
|
|
1915
|
+
'--detect-pdfs',
|
|
1916
|
+
'Phase 2: Process PDF files in database for pedimento-simplificado detection',
|
|
1917
|
+
)
|
|
1918
|
+
.option(
|
|
1919
|
+
'--propagate-arela-path',
|
|
1920
|
+
'Phase 3: Propagate arela_path from pedimento_simplificado records to related files with same base path',
|
|
1921
|
+
)
|
|
1922
|
+
.option(
|
|
1923
|
+
'--upload-by-rfc',
|
|
1924
|
+
'Phase 4: Upload files to Arela API based on RFC values from UPLOAD_RFCS environment variable',
|
|
1925
|
+
)
|
|
1926
|
+
.option(
|
|
1927
|
+
'--run-all-phases',
|
|
1928
|
+
'Run all 4 phases in sequence: stats β detect β propagate β upload',
|
|
1929
|
+
)
|
|
1429
1930
|
.action(async (options) => {
|
|
1430
1931
|
if (options.version) {
|
|
1431
1932
|
console.log(packageVersion);
|
|
1432
1933
|
process.exit(0);
|
|
1433
1934
|
}
|
|
1434
1935
|
|
|
1936
|
+
// Handle detect-pdfs option (Phase 2)
|
|
1937
|
+
if (options.detectPdfs) {
|
|
1938
|
+
console.log('π Starting Phase 2: PDF Detection');
|
|
1939
|
+
await checkCredentials(true); // Force Supabase mode
|
|
1940
|
+
|
|
1941
|
+
const result = await detectPedimentosInDatabase({
|
|
1942
|
+
batchSize: parseInt(options.batchSize) || 10,
|
|
1943
|
+
});
|
|
1944
|
+
|
|
1945
|
+
console.log(
|
|
1946
|
+
`β
Phase 2 Complete: ${result.detectedCount} detected, ${result.errorCount} errors`,
|
|
1947
|
+
);
|
|
1948
|
+
return;
|
|
1949
|
+
}
|
|
1950
|
+
|
|
1951
|
+
// Handle run-all-phases option
|
|
1952
|
+
if (options.runAllPhases) {
|
|
1953
|
+
console.log('π Starting all 4 phases in sequence...');
|
|
1954
|
+
await checkCredentials(true); // Force Supabase mode
|
|
1955
|
+
|
|
1956
|
+
// Phase 1: Stats collection
|
|
1957
|
+
console.log('\nπ === PHASE 1: Filesystem Stats ===');
|
|
1958
|
+
options.statsOnly = true;
|
|
1959
|
+
// Continue with normal processing to run Phase 1
|
|
1960
|
+
|
|
1961
|
+
// The rest will be handled after Phase 1 completes
|
|
1962
|
+
}
|
|
1963
|
+
|
|
1435
1964
|
// Handle propagate-arela-path option
|
|
1436
1965
|
if (options.propagateArelaPath) {
|
|
1437
1966
|
// Initialize Supabase credentials for propagation
|
|
1438
1967
|
await checkCredentials(true); // Force Supabase mode
|
|
1439
|
-
|
|
1968
|
+
|
|
1440
1969
|
const result = await propagateArelaPath({
|
|
1441
1970
|
showProgress: options.showStats || true,
|
|
1442
1971
|
});
|
|
1443
|
-
|
|
1972
|
+
|
|
1444
1973
|
if (result.errorCount > 0) {
|
|
1445
1974
|
process.exit(1);
|
|
1446
1975
|
}
|
|
@@ -1451,25 +1980,29 @@ program
|
|
|
1451
1980
|
if (options.uploadByRfc) {
|
|
1452
1981
|
// RFC upload needs both Supabase (for database queries) and API (for uploads)
|
|
1453
1982
|
await checkCredentials(false); // Initialize API mode
|
|
1454
|
-
|
|
1983
|
+
|
|
1455
1984
|
// Also initialize Supabase for database queries
|
|
1456
1985
|
if (!supabase) {
|
|
1457
1986
|
if (!supabaseUrl || !supabaseKey) {
|
|
1458
|
-
console.error(
|
|
1459
|
-
|
|
1987
|
+
console.error(
|
|
1988
|
+
'β RFC upload requires Supabase credentials for database queries.',
|
|
1989
|
+
);
|
|
1990
|
+
console.error(
|
|
1991
|
+
' Please set SUPABASE_URL and SUPABASE_KEY environment variables.',
|
|
1992
|
+
);
|
|
1460
1993
|
process.exit(1);
|
|
1461
1994
|
}
|
|
1462
|
-
|
|
1995
|
+
|
|
1463
1996
|
supabase = createClient(supabaseUrl, supabaseKey);
|
|
1464
1997
|
console.log('β
Connected to Supabase for database queries');
|
|
1465
1998
|
}
|
|
1466
|
-
|
|
1999
|
+
|
|
1467
2000
|
const result = await uploadFilesByRfc({
|
|
1468
2001
|
showProgress: options.showStats || true,
|
|
1469
2002
|
batchSize: parseInt(options.batchSize) || 10,
|
|
1470
2003
|
folderStructure: options.folderStructure,
|
|
1471
2004
|
});
|
|
1472
|
-
|
|
2005
|
+
|
|
1473
2006
|
if (result.errorCount > 0) {
|
|
1474
2007
|
process.exit(1);
|
|
1475
2008
|
}
|
|
@@ -1490,7 +2023,9 @@ program
|
|
|
1490
2023
|
const concurrency = parseInt(options.concurrency) || 10;
|
|
1491
2024
|
|
|
1492
2025
|
if (options.statsOnly) {
|
|
1493
|
-
console.log(
|
|
2026
|
+
console.log(
|
|
2027
|
+
'π Mode: Stats Only - Reading file stats and inserting to uploader table',
|
|
2028
|
+
);
|
|
1494
2029
|
console.log('π« Files will NOT be uploaded');
|
|
1495
2030
|
if (options.detect !== false) {
|
|
1496
2031
|
console.log('π Document type detection ENABLED for supported files');
|
|
@@ -1590,12 +2125,73 @@ program
|
|
|
1590
2125
|
console.log(` π Log file: ${logFilePath}`);
|
|
1591
2126
|
console.log(`${'='.repeat(60)}\n`);
|
|
1592
2127
|
|
|
1593
|
-
|
|
2128
|
+
// Continue with remaining phases if running all phases
|
|
2129
|
+
if (options.runAllPhases && options.statsOnly) {
|
|
2130
|
+
try {
|
|
2131
|
+
// Phase 2: PDF Detection
|
|
2132
|
+
console.log('\nπ === PHASE 2: PDF Detection ===');
|
|
2133
|
+
const detectionResult = await detectPedimentosInDatabase({
|
|
2134
|
+
batchSize: parseInt(options.batchSize) || 10,
|
|
2135
|
+
});
|
|
2136
|
+
console.log(
|
|
2137
|
+
`β
Phase 2 Complete: ${detectionResult.detectedCount} detected, ${detectionResult.errorCount} errors`,
|
|
2138
|
+
);
|
|
2139
|
+
|
|
2140
|
+
// Phase 3: Propagate arela_path
|
|
2141
|
+
console.log('\nπ === PHASE 3: Propagate Arela Paths ===');
|
|
2142
|
+
const propagateResult = await propagateArelaPath({
|
|
2143
|
+
showProgress: options.showStats || true,
|
|
2144
|
+
});
|
|
2145
|
+
console.log(
|
|
2146
|
+
`β
Phase 3 Complete: ${propagateResult.updatedCount || 0} paths propagated`,
|
|
2147
|
+
);
|
|
2148
|
+
|
|
2149
|
+
// Phase 4: Upload by RFC
|
|
2150
|
+
if (uploadRfcs && uploadRfcs.length > 0) {
|
|
2151
|
+
console.log('\nπ === PHASE 4: Upload by RFC ===');
|
|
2152
|
+
|
|
2153
|
+
// Initialize API mode for uploads
|
|
2154
|
+
await checkCredentials(false);
|
|
2155
|
+
|
|
2156
|
+
const uploadResult = await uploadFilesByRfc({
|
|
2157
|
+
showProgress: options.showStats || true,
|
|
2158
|
+
batchSize: parseInt(options.batchSize) || 10,
|
|
2159
|
+
folderStructure: options.folderStructure,
|
|
2160
|
+
});
|
|
2161
|
+
console.log(`β
Phase 4 Complete: Upload finished`);
|
|
2162
|
+
} else {
|
|
2163
|
+
console.log('\nβ οΈ === PHASE 4: Upload by RFC ===');
|
|
2164
|
+
console.log(
|
|
2165
|
+
'β οΈ UPLOAD_RFCS environment variable not configured, skipping Phase 4',
|
|
2166
|
+
);
|
|
2167
|
+
}
|
|
2168
|
+
|
|
2169
|
+
console.log('\nπ All 4 phases completed successfully!');
|
|
2170
|
+
} catch (error) {
|
|
2171
|
+
console.error(`β Error in multi-phase execution:`, error.message);
|
|
2172
|
+
process.exit(1);
|
|
2173
|
+
}
|
|
2174
|
+
}
|
|
2175
|
+
|
|
2176
|
+
if (
|
|
2177
|
+
options.showStats &&
|
|
2178
|
+
(sanitizationCache.size > 0 || pathDetectionCache.size > 0)
|
|
2179
|
+
) {
|
|
1594
2180
|
console.log(`π Performance Statistics:`);
|
|
1595
|
-
|
|
1596
|
-
|
|
1597
|
-
|
|
2181
|
+
if (sanitizationCache.size > 0) {
|
|
2182
|
+
console.log(
|
|
2183
|
+
` ποΈ Sanitization cache entries: ${sanitizationCache.size}`,
|
|
2184
|
+
);
|
|
2185
|
+
}
|
|
2186
|
+
if (pathDetectionCache.size > 0) {
|
|
2187
|
+
console.log(
|
|
2188
|
+
` π Path detection cache entries: ${pathDetectionCache.size}`,
|
|
2189
|
+
);
|
|
2190
|
+
}
|
|
1598
2191
|
}
|
|
2192
|
+
|
|
2193
|
+
// OPTIMIZED: Ensure log buffer is flushed before exit
|
|
2194
|
+
flushLogBuffer();
|
|
1599
2195
|
});
|
|
1600
2196
|
|
|
1601
2197
|
program.parse();
|