@arela/uploader 0.2.11 → 0.2.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/index-old.js DELETED
@@ -1,2658 +0,0 @@
1
- #!/usr/bin/env node
2
- import { createClient } from '@supabase/supabase-js';
3
- import cliProgress from 'cli-progress';
4
- import { Command } from 'commander';
5
- import { config } from 'dotenv';
6
- import FormData from 'form-data';
7
- import fs from 'fs';
8
- import { globby } from 'globby';
9
- import mime from 'mime-types';
10
- import fetch from 'node-fetch';
11
- import path from 'path';
12
-
13
- import { FileDetectionService } from './file-detection.js';
14
-
15
- config();
16
-
17
- const program = new Command();
18
-
19
- // Read package.json version at startup
20
- let packageVersion = '0.2.4'; // fallback
21
- try {
22
- const __filename = new URL(import.meta.url).pathname;
23
- const __dirname = path.dirname(__filename);
24
- const packageJsonPath = path.resolve(__dirname, '../package.json');
25
- const packageJson = JSON.parse(fs.readFileSync(packageJsonPath, 'utf-8'));
26
- packageVersion = packageJson.version || '1.0.0';
27
- } catch (error) {
28
- console.warn('⚠️ Could not read package.json version, using fallback');
29
- }
30
-
31
- // Configuración de Supabase (original)
32
- const supabaseUrl = process.env.SUPABASE_URL;
33
- const supabaseKey = process.env.SUPABASE_KEY;
34
- const bucket = process.env.SUPABASE_BUCKET;
35
-
36
- // Configuración de API (nueva)
37
- const API_BASE_URL = process.env.ARELA_API_URL;
38
- const API_TOKEN = process.env.ARELA_API_TOKEN;
39
-
40
- // Configuración del uploader mejorado
41
- const basePath = process.env.UPLOAD_BASE_PATH;
42
- const sources = process.env.UPLOAD_SOURCES?.split('|')
43
- .map((s) => s.trim())
44
- .filter(Boolean);
45
-
46
- // Configuración de RFCs para upload
47
- console.log('🔧 Configured RFCs for upload:', process.env.UPLOAD_RFCS);
48
- const uploadRfcs = process.env.UPLOAD_RFCS?.split('|')
49
- .map((s) => s.trim())
50
- .filter(Boolean);
51
-
52
- let supabase;
53
- let apiMode = false;
54
-
55
- // Pre-compiled regex patterns for better performance (from original complex uploader)
56
- const SANITIZATION_PATTERNS = [
57
- [/[áàâäãåāăą]/gi, 'a'],
58
- [/[éèêëēĕėę]/gi, 'e'],
59
- [/[íìîïīĭį]/gi, 'i'],
60
- [/[óòôöõōŏő]/gi, 'o'],
61
- [/[úùûüūŭů]/gi, 'u'],
62
- [/[ñň]/gi, 'n'],
63
- [/[ç]/gi, 'c'],
64
- [/[ý]/gi, 'y'],
65
- [/[멕]/g, 'meok'],
66
- [/[시]/g, 'si'],
67
- [/[코]/g, 'ko'],
68
- [/[용]/g, 'yong'],
69
- [/[가-힣]/g, 'kr'],
70
- [/[\u0300-\u036f]/g, ''],
71
- [/[\\?%*:|"<>[\]~`^]/g, '-'],
72
- [/[{}]/g, '-'],
73
- [/[&]/g, 'and'],
74
- [/[()]/g, ''],
75
- [/\s+/g, '-'],
76
- [/-+/g, '-'],
77
- [/^-+|-+$/g, ''],
78
- [/^\.+/, ''],
79
- [/[^\w.-]/g, ''],
80
- ];
81
-
82
- const sanitizationCache = new Map();
83
-
84
- const sanitizeFileName = (fileName) => {
85
- if (sanitizationCache.has(fileName)) {
86
- return sanitizationCache.get(fileName);
87
- }
88
-
89
- const ext = path.extname(fileName);
90
- const nameWithoutExt = path.basename(fileName, ext);
91
-
92
- if (/^[a-zA-Z0-9._-]+$/.test(nameWithoutExt)) {
93
- const result = fileName;
94
- sanitizationCache.set(fileName, result);
95
- return result;
96
- }
97
-
98
- let sanitized = nameWithoutExt.normalize('NFD');
99
-
100
- for (const [pattern, replacement] of SANITIZATION_PATTERNS) {
101
- sanitized = sanitized.replace(pattern, replacement);
102
- }
103
-
104
- // Additional sanitization for problematic characters
105
- sanitized = sanitized
106
- .replace(/~/g, '-') // Replace tildes
107
- .replace(/\s+/g, '-') // Replace spaces with dashes
108
- .replace(/\.+/g, '-') // Replace multiple dots with dashes
109
- .replace(/-+/g, '-') // Collapse multiple dashes
110
- .replace(/^-+|-+$/g, ''); // Remove leading/trailing dashes
111
-
112
- if (!sanitized) {
113
- sanitized = 'unnamed_file';
114
- }
115
-
116
- const result = sanitized + ext;
117
- sanitizationCache.set(fileName, result);
118
- return result;
119
- };
120
-
121
- const checkCredentials = async (forceSupabase = false) => {
122
- // Force Supabase mode if explicitly requested
123
- if (forceSupabase) {
124
- console.log('🔧 Force Supabase mode enabled - skipping API');
125
- apiMode = false;
126
- } else if (API_BASE_URL && API_TOKEN) {
127
- console.log(
128
- '🌐 API mode enabled - files will be uploaded to Arela API with automatic processing',
129
- );
130
- apiMode = true;
131
-
132
- try {
133
- const response = await fetch(`${API_BASE_URL}/api/health`, {
134
- headers: {
135
- 'x-api-key': API_TOKEN,
136
- },
137
- });
138
-
139
- if (!response.ok) {
140
- console.warn(
141
- '⚠️ API connection failed, falling back to direct Supabase upload',
142
- );
143
- apiMode = false;
144
- } else {
145
- console.log('✅ Connected to Arela API');
146
- return;
147
- }
148
- } catch (err) {
149
- console.warn(
150
- '⚠️ API connection failed, falling back to direct Supabase upload',
151
- );
152
- apiMode = false;
153
- }
154
- }
155
-
156
- // Initialize Supabase client if not in API mode or if forced
157
- if (!apiMode || forceSupabase) {
158
- if (!supabaseUrl || !supabaseKey || !bucket) {
159
- console.error(
160
- '⚠️ Missing credentials. Please set either:\n' +
161
- ' - ARELA_API_URL and ARELA_API_TOKEN for API mode, or\n' +
162
- ' - SUPABASE_URL, SUPABASE_KEY, and SUPABASE_BUCKET for direct mode',
163
- );
164
- process.exit(1);
165
- }
166
-
167
- supabase = createClient(supabaseUrl, supabaseKey);
168
-
169
- try {
170
- const { error } = await supabase.storage.from(bucket).list('');
171
- if (error) {
172
- console.error('⚠️ Error connecting to Supabase:', error.message);
173
- process.exit(1);
174
- }
175
- console.log('✅ Connected to Supabase (direct mode)');
176
- } catch (err) {
177
- console.error('⚠️ Error:', err.message);
178
- process.exit(1);
179
- }
180
- }
181
- };
182
-
183
- const logFilePath = path.resolve(process.cwd(), 'arela-upload.log');
184
-
185
- /**
186
- * OPTIMIZED: Log buffer to reduce I/O operations
187
- */
188
- let logBuffer = [];
189
- const LOG_BUFFER_SIZE = 100; // Flush every 100 log entries
190
- let lastFlushTime = Date.now();
191
- const LOG_FLUSH_INTERVAL = 5000; // Flush every 5 seconds
192
-
193
- const flushLogBuffer = () => {
194
- if (logBuffer.length === 0) return;
195
-
196
- try {
197
- const logContent = logBuffer.join('\n') + '\n';
198
- fs.appendFileSync(logFilePath, logContent);
199
- logBuffer = [];
200
- lastFlushTime = Date.now();
201
- } catch (error) {
202
- console.error(`❌ Error writing to log file: ${error.message}`);
203
- }
204
- };
205
-
206
- const writeLog = (message) => {
207
- try {
208
- const timestamp = new Date().toISOString();
209
- logBuffer.push(`[${timestamp}] ${message}`);
210
-
211
- // Flush if buffer is full or enough time has passed
212
- const now = Date.now();
213
- if (
214
- logBuffer.length >= LOG_BUFFER_SIZE ||
215
- now - lastFlushTime >= LOG_FLUSH_INTERVAL
216
- ) {
217
- flushLogBuffer();
218
- }
219
- } catch (error) {
220
- console.error(`❌ Error buffering log message: ${error.message}`);
221
- }
222
- };
223
-
224
- // Ensure logs are flushed on process exit
225
- process.on('exit', flushLogBuffer);
226
- process.on('SIGINT', () => {
227
- flushLogBuffer();
228
- process.exit(0);
229
- });
230
- process.on('SIGTERM', () => {
231
- flushLogBuffer();
232
- process.exit(0);
233
- });
234
-
235
- /**
236
- * OPTIMIZED: Conditional logging to reduce console overhead
237
- */
238
- const VERBOSE_LOGGING = process.env.VERBOSE_LOGGING === 'true';
239
- const BATCH_DELAY = parseInt(process.env.BATCH_DELAY) || 100; // Configurable delay between batches
240
- const PROGRESS_UPDATE_INTERVAL =
241
- parseInt(process.env.PROGRESS_UPDATE_INTERVAL) || 10; // Update progress every N items
242
-
243
- const logVerbose = (message) => {
244
- if (VERBOSE_LOGGING) {
245
- console.log(message);
246
- }
247
- };
248
- const batchReadFileStats = (filePaths) => {
249
- const results = [];
250
-
251
- for (const filePath of filePaths) {
252
- try {
253
- const stats = fs.statSync(filePath);
254
- results.push({ path: filePath, stats, error: null });
255
- } catch (error) {
256
- results.push({ path: filePath, stats: null, error: error.message });
257
- }
258
- }
259
-
260
- return results;
261
- };
262
-
263
- /**
264
- * OPTIMIZED: Cache for year/pedimento detection results to avoid redundant parsing
265
- */
266
- const pathDetectionCache = new Map();
267
-
268
- /**
269
- * OPTIMIZED: Clear the path detection cache (useful for testing or long-running processes)
270
- */
271
- const clearPathDetectionCache = () => {
272
- pathDetectionCache.clear();
273
- };
274
-
275
- /**
276
- * OPTIMIZED: Get detection results with caching
277
- */
278
- const getCachedPathDetection = (filePath, basePath) => {
279
- const cacheKey = `${filePath}|${basePath}`;
280
-
281
- if (pathDetectionCache.has(cacheKey)) {
282
- return pathDetectionCache.get(cacheKey);
283
- }
284
-
285
- const detection = extractYearAndPedimentoFromPath(filePath, basePath);
286
- pathDetectionCache.set(cacheKey, detection);
287
-
288
- return detection;
289
- };
290
-
291
- /**
292
- * Extracts year and pedimento number from file path
293
- * Supports patterns like:
294
- * - /path/to/2024/4023260/file.pdf
295
- * - /path/to/pedimentos/2024/4023260/file.pdf
296
- * - /path/to/docs/año2024/ped4023260/file.pdf
297
- */
298
- const extractYearAndPedimentoFromPath = (filePath, basePath) => {
299
- try {
300
- const relativePath = path.relative(basePath, filePath);
301
- const pathParts = relativePath.split(path.sep);
302
-
303
- let year = null;
304
- let pedimento = null;
305
-
306
- // Pattern 1: Direct year/pedimento structure (2024/4023260)
307
- for (let i = 0; i < pathParts.length - 1; i++) {
308
- const part = pathParts[i];
309
- const nextPart = pathParts[i + 1];
310
-
311
- // Check if current part looks like a year (2020-2030)
312
- const yearMatch = part.match(/^(202[0-9])$/);
313
- if (yearMatch && nextPart) {
314
- year = yearMatch[1];
315
-
316
- // Check if next part looks like a pedimento (4-8 digits)
317
- const pedimentoMatch = nextPart.match(/^(\d{4,8})$/);
318
- if (pedimentoMatch) {
319
- pedimento = pedimentoMatch[1];
320
- break;
321
- }
322
- }
323
- }
324
-
325
- // Pattern 2: Named patterns (año2024, ped4023260)
326
- if (!year || !pedimento) {
327
- for (const part of pathParts) {
328
- if (!year) {
329
- const namedYearMatch = part.match(/(?:año|year|anio)(\d{4})/i);
330
- if (namedYearMatch) {
331
- year = namedYearMatch[1];
332
- }
333
- }
334
-
335
- if (!pedimento) {
336
- const namedPedimentoMatch = part.match(
337
- /(?:ped|pedimento|pedi)(\d{4,8})/i,
338
- );
339
- if (namedPedimentoMatch) {
340
- pedimento = namedPedimentoMatch[1];
341
- }
342
- }
343
- }
344
- }
345
-
346
- // Pattern 3: Loose year detection in any part
347
- if (!year) {
348
- for (const part of pathParts) {
349
- const yearMatch = part.match(/(202[0-9])/);
350
- if (yearMatch) {
351
- year = yearMatch[1];
352
- break;
353
- }
354
- }
355
- }
356
-
357
- // Pattern 4: Loose pedimento detection (4-8 consecutive digits)
358
- if (!pedimento) {
359
- for (const part of pathParts) {
360
- const pedimentoMatch = part.match(/(\d{4,8})/);
361
- if (pedimentoMatch && pedimentoMatch[1].length >= 4) {
362
- pedimento = pedimentoMatch[1];
363
- break;
364
- }
365
- }
366
- }
367
-
368
- return { year, pedimento, detected: !!(year && pedimento) };
369
- } catch (error) {
370
- return {
371
- year: null,
372
- pedimento: null,
373
- detected: false,
374
- error: error.message,
375
- };
376
- }
377
- };
378
-
379
- /**
380
- * OPTIMIZED: Get processed paths with caching and buffered log reading
381
- */
382
- let processedPathsCache = null;
383
- let lastLogModTime = 0;
384
-
385
- const getProcessedPaths = () => {
386
- try {
387
- // Check if log file exists
388
- if (!fs.existsSync(logFilePath)) {
389
- return new Set();
390
- }
391
-
392
- // Check if cache is still valid
393
- const logStats = fs.statSync(logFilePath);
394
- if (processedPathsCache && logStats.mtime.getTime() === lastLogModTime) {
395
- return processedPathsCache;
396
- }
397
-
398
- // Read and parse log file
399
- const processed = new Set();
400
- const content = fs.readFileSync(logFilePath, 'utf-8');
401
-
402
- // Use more efficient regex with global flag
403
- const regex = /(SUCCESS|SKIPPED): .*? -> (.+)/g;
404
- let match;
405
-
406
- while ((match = regex.exec(content)) !== null) {
407
- const path = match[2];
408
- if (path) {
409
- processed.add(path.trim());
410
- }
411
- }
412
-
413
- // Update cache
414
- processedPathsCache = processed;
415
- lastLogModTime = logStats.mtime.getTime();
416
-
417
- return processed;
418
- } catch (error) {
419
- console.error(`⚠️ Error reading processed paths: ${error.message}`);
420
- return new Set();
421
- }
422
- };
423
-
424
- /**
425
- * Upload files to Arela API with automatic detection and organization
426
- */
427
- const uploadToApi = async (files, options) => {
428
- const formData = new FormData();
429
-
430
- files.forEach((file) => {
431
- const fileBuffer = fs.readFileSync(file.path);
432
- formData.append('files', fileBuffer, {
433
- filename: file.name,
434
- contentType: file.contentType,
435
- });
436
- });
437
-
438
- if (bucket) formData.append('bucket', bucket);
439
- if (options.prefix) formData.append('prefix', options.prefix);
440
-
441
- // Nueva funcionalidad: estructura de carpetas personalizada
442
- let combinedStructure = null;
443
- let cachedDetection = null; // Cache detection result to avoid redundant calls
444
-
445
- if (
446
- options.folderStructure &&
447
- options.autoDetectStructure &&
448
- files.length > 0
449
- ) {
450
- // Combine custom folder structure with auto-detection
451
- const firstFile = files[0];
452
- cachedDetection = getCachedPathDetection(firstFile.path, process.cwd());
453
-
454
- if (cachedDetection.detected) {
455
- const autoStructure = `${cachedDetection.year}/${cachedDetection.pedimento}`;
456
- combinedStructure = `${options.folderStructure}/${autoStructure}`;
457
- formData.append('folderStructure', combinedStructure);
458
- console.log(
459
- `📁 Combined folder structure: ${options.folderStructure} + ${autoStructure} = ${combinedStructure}`,
460
- );
461
- } else {
462
- // Fallback to just custom structure if auto-detection fails
463
- formData.append('folderStructure', options.folderStructure);
464
- console.log(
465
- `📁 Using custom folder structure (auto-detection failed): ${options.folderStructure}`,
466
- );
467
- }
468
- } else if (options.folderStructure) {
469
- formData.append('folderStructure', options.folderStructure);
470
- console.log(`📁 Using custom folder structure: ${options.folderStructure}`);
471
- } else if (options.autoDetectStructure && files.length > 0) {
472
- // Try to auto-detect from the first file if no explicit structure is provided
473
- const firstFile = files[0];
474
- cachedDetection = getCachedPathDetection(firstFile.path, process.cwd());
475
-
476
- if (cachedDetection.detected) {
477
- const autoStructure = `${cachedDetection.year}/${cachedDetection.pedimento}`;
478
- formData.append('folderStructure', autoStructure);
479
- }
480
- }
481
-
482
- // Si se especifica clientPath para user_metadata
483
- if (options.clientPath) {
484
- formData.append('clientPath', options.clientPath);
485
- }
486
-
487
- formData.append('autoDetect', String(options.autoDetect ?? true));
488
- formData.append('autoOrganize', String(options.autoOrganize ?? true));
489
- formData.append('batchSize', String(options.batchSize || 10));
490
- formData.append('clientVersion', packageVersion);
491
-
492
- const response = await fetch(
493
- `${API_BASE_URL}/api/storage/batch-upload-and-process`,
494
- {
495
- method: 'POST',
496
- headers: {
497
- 'x-api-key': API_TOKEN,
498
- },
499
- body: formData,
500
- },
501
- );
502
-
503
- if (!response.ok) {
504
- const errorText = await response.text();
505
- throw new Error(
506
- `API request failed: ${response.status} ${response.statusText} - ${errorText}`,
507
- );
508
- }
509
-
510
- return response.json();
511
- };
512
-
513
- /**
514
- * Upload file directly to Supabase (fallback method)
515
- */
516
- const uploadToSupabase = async (file, uploadPath) => {
517
- const content = fs.readFileSync(file);
518
- const contentType = mime.lookup(file) || 'application/octet-stream';
519
-
520
- const { data, error } = await supabase.storage
521
- .from(bucket)
522
- .upload(uploadPath.replace(/\\/g, '/'), content, {
523
- upsert: true,
524
- contentType,
525
- });
526
-
527
- if (error) {
528
- throw new Error(error.message);
529
- }
530
-
531
- return data;
532
- };
533
-
534
- /**
535
- * Insert file stats into uploader table with document detection
536
- */
537
- const insertStatsToUploaderTable = async (files, options) => {
538
- if (!supabase) {
539
- throw new Error(
540
- 'Supabase client not initialized. Stats mode requires Supabase connection.',
541
- );
542
- }
543
-
544
- const detectionService = new FileDetectionService();
545
- const records = [];
546
-
547
- for (const file of files) {
548
- // OPTIMIZED: Use pre-computed stats if available, otherwise call fs.statSync
549
- const stats = file.stats || fs.statSync(file.path);
550
- const originalPath = options.clientPath || file.path;
551
-
552
- // Check if record already exists
553
- const { data: existingRecords, error: checkError } = await supabase
554
- .from('uploader')
555
- .select('id, original_path')
556
- .eq('original_path', originalPath)
557
- .limit(1);
558
-
559
- if (checkError) {
560
- console.error(
561
- `❌ Error checking for existing record: ${checkError.message}`,
562
- );
563
- continue;
564
- }
565
-
566
- if (existingRecords && existingRecords.length > 0) {
567
- console.log(`⏭️ Skipping duplicate: ${path.basename(file.path)}`);
568
- continue;
569
- }
570
-
571
- // Initialize record with basic file stats
572
- const record = {
573
- document_type: null,
574
- size: stats.size,
575
- num_pedimento: null,
576
- filename: file.originalName || path.basename(file.path),
577
- original_path: originalPath,
578
- arela_path: null,
579
- status: 'stats',
580
- rfc: null,
581
- message: null,
582
- };
583
-
584
- // Try to detect document type for supported files
585
- if (detectionService.isSupportedFileType(file.path)) {
586
- try {
587
- const detection = await detectionService.detectFile(file.path);
588
-
589
- if (detection.detectedType) {
590
- record.document_type = detection.detectedType;
591
- record.num_pedimento = detection.detectedPedimento;
592
- record.status = 'detected';
593
-
594
- // Set arela_path for pedimento_simplificado documents
595
- if (detection.arelaPath) {
596
- record.arela_path = detection.arelaPath;
597
- }
598
-
599
- // Extract RFC from fields if available
600
- const rfcField = detection.fields.find(
601
- (f) => f.name === 'rfc' && f.found,
602
- );
603
- if (rfcField) {
604
- record.rfc = rfcField.value;
605
- }
606
- } else {
607
- record.status = 'not-detected';
608
- if (detection.error) {
609
- record.message = detection.error;
610
- }
611
- }
612
- } catch (error) {
613
- console.error(`❌ Error detecting ${record.filename}:`, error.message);
614
- record.status = 'detection-error';
615
- record.message = error.message;
616
- }
617
- } else {
618
- record.status = 'unsupported';
619
- record.message = 'File type not supported for detection';
620
- }
621
-
622
- records.push(record);
623
- }
624
-
625
- if (records.length === 0) {
626
- console.log('📝 No new records to insert (all were duplicates or errors)');
627
- return [];
628
- }
629
-
630
- console.log(
631
- `💾 Inserting ${records.length} new records into uploader table...`,
632
- );
633
-
634
- const { data, error } = await supabase
635
- .from('uploader')
636
- .insert(records)
637
- .select();
638
-
639
- if (error) {
640
- throw new Error(`Failed to insert stats records: ${error.message}`);
641
- }
642
-
643
- return data;
644
- };
645
-
646
- /**
647
- * OPTIMIZED: Insert ONLY file stats into uploader table (Phase 1)
648
- * No file reading, no detection - just filesystem metadata
649
- * Returns summary statistics with accurate counts for new vs updated records
650
- */
651
- const insertStatsOnlyToUploaderTable = async (files, options) => {
652
- if (!supabase) {
653
- throw new Error(
654
- 'Supabase client not initialized. Stats mode requires Supabase connection.',
655
- );
656
- }
657
-
658
- const batchSize = 1000; // Large batch size for performance
659
- const allRecords = [];
660
-
661
- // Prepare all file stats data first - OPTIMIZED to use pre-computed stats
662
- console.log('📊 Collecting filesystem stats...');
663
- for (const file of files) {
664
- try {
665
- // Use pre-computed stats if available, otherwise call fs.statSync
666
- const stats = file.stats || fs.statSync(file.path);
667
- const originalPath = options.clientPath || file.path;
668
- const fileExtension = path
669
- .extname(file.path)
670
- .toLowerCase()
671
- .replace('.', '');
672
-
673
- const record = {
674
- document_type: null,
675
- size: stats.size,
676
- num_pedimento: null,
677
- filename: file.originalName || path.basename(file.path),
678
- original_path: originalPath,
679
- arela_path: null,
680
- status: 'fs-stats',
681
- rfc: null,
682
- message: null,
683
- file_extension: fileExtension,
684
- created_at: new Date().toISOString(),
685
- modified_at: stats.mtime.toISOString(),
686
- };
687
-
688
- allRecords.push(record);
689
- } catch (error) {
690
- console.error(`❌ Error reading stats for ${file.path}:`, error.message);
691
- }
692
- }
693
-
694
- if (allRecords.length === 0) {
695
- console.log('📝 No file stats to insert');
696
- return { totalInserted: 0, totalSkipped: 0, totalProcessed: 0 };
697
- }
698
-
699
- console.log(
700
- `💾 Processing ${allRecords.length} file stats in batches of ${batchSize}...`,
701
- );
702
-
703
- let totalInserted = 0;
704
- let totalUpdated = 0;
705
-
706
- // Process in batches with accurate insert/update tracking
707
- for (let i = 0; i < allRecords.length; i += batchSize) {
708
- const batch = allRecords.slice(i, i + batchSize);
709
-
710
- try {
711
- // First, check which records already exist
712
- const originalPaths = batch.map((r) => r.original_path);
713
- const { data: existingRecords, error: checkError } = await supabase
714
- .from('uploader')
715
- .select('original_path')
716
- .in('original_path', originalPaths);
717
-
718
- if (checkError) {
719
- console.error(
720
- `❌ Error checking existing records:`,
721
- checkError.message,
722
- );
723
- continue;
724
- }
725
-
726
- const existingPaths = new Set(
727
- existingRecords?.map((r) => r.original_path) || [],
728
- );
729
- const newRecords = batch.filter(
730
- (r) => !existingPaths.has(r.original_path),
731
- );
732
- const updateRecords = batch.filter((r) =>
733
- existingPaths.has(r.original_path),
734
- );
735
-
736
- console.log(
737
- `📦 Batch ${Math.floor(i / batchSize) + 1}: ${newRecords.length} new, ${updateRecords.length} updates`,
738
- );
739
-
740
- // Insert new records
741
- if (newRecords.length > 0) {
742
- const { error: insertError } = await supabase
743
- .from('uploader')
744
- .insert(newRecords);
745
-
746
- if (insertError) {
747
- console.error(`❌ Error inserting new records:`, insertError.message);
748
- } else {
749
- totalInserted += newRecords.length;
750
- console.log(`✅ Inserted ${newRecords.length} new records`);
751
- }
752
- }
753
-
754
- // Update existing records with current filesystem stats
755
- if (updateRecords.length > 0) {
756
- let batchUpdated = 0;
757
- for (const record of updateRecords) {
758
- const { error: updateError } = await supabase
759
- .from('uploader')
760
- .update({
761
- size: record.size,
762
- modified_at: record.modified_at,
763
- filename: record.filename,
764
- file_extension: record.file_extension,
765
- })
766
- .eq('original_path', record.original_path);
767
-
768
- if (!updateError) {
769
- batchUpdated++;
770
- }
771
- }
772
- totalUpdated += batchUpdated;
773
- console.log(`🔄 Updated ${batchUpdated} existing records`);
774
- }
775
- } catch (error) {
776
- console.error(
777
- `❌ Unexpected error in batch ${Math.floor(i / batchSize) + 1}:`,
778
- error.message,
779
- );
780
- }
781
- }
782
-
783
- console.log(
784
- `📊 Phase 1 Summary: ${totalInserted} new records inserted, ${totalUpdated} existing records updated`,
785
- );
786
-
787
- return {
788
- totalInserted,
789
- totalSkipped: totalUpdated, // Updated records can be considered "processed but not new"
790
- totalProcessed: allRecords.length,
791
- };
792
- };
793
-
794
- /**
795
- * PHASE 2: Process PDF files for pedimento-simplificado detection
796
- * Only processes files with status 'fs-stats' and file_extension 'pdf'
797
- * Processes records in chunks of 1000 to avoid loading all records into memory
798
- */
799
- const detectPedimentosInDatabase = async (options = {}) => {
800
- if (!supabase) {
801
- throw new Error('Supabase client not initialized.');
802
- }
803
-
804
- console.log(
805
- '🔍 Phase 2: Starting PDF detection for pedimento-simplificado documents...',
806
- );
807
- writeLog(
808
- '🔍 Phase 2: Starting PDF detection for pedimento-simplificado documents',
809
- );
810
-
811
- const detectionService = new FileDetectionService();
812
- const processingBatchSize = parseInt(options.batchSize) || 10; // Smaller batches for file I/O
813
- const queryBatchSize = 1000; // Process 1000 records at a time
814
-
815
- let totalDetected = 0;
816
- let totalProcessed = 0;
817
- let totalErrors = 0;
818
- let offset = 0;
819
- let chunkNumber = 1;
820
-
821
- console.log('� Processing PDF files in chunks of 1000 records...');
822
- writeLog('📝 Starting PDF detection processing in chunks of 1000 records');
823
-
824
- // Process records in chunks of 1000
825
- while (true) {
826
- console.log(
827
- `\n📥 Fetching chunk ${chunkNumber} (records ${offset + 1} to ${offset + queryBatchSize})...`,
828
- );
829
- writeLog(
830
- `📥 Fetching chunk ${chunkNumber} (records ${offset + 1} to ${offset + queryBatchSize})`,
831
- );
832
-
833
- // Fetch next chunk of PDF records
834
- const { data: pdfRecords, error: queryError } = await supabase
835
- .from('uploader')
836
- .select('id, original_path, filename, file_extension, status')
837
- .eq('status', 'fs-stats')
838
- .eq('file_extension', 'pdf')
839
- .ilike('filename', '%simp%')
840
- .range(offset, offset + queryBatchSize - 1);
841
-
842
- if (queryError) {
843
- throw new Error(
844
- `Failed to fetch PDF records chunk ${chunkNumber}: ${queryError.message}`,
845
- );
846
- }
847
-
848
- // If no records found, we're done
849
- if (!pdfRecords || pdfRecords.length === 0) {
850
- console.log(`📝 No more PDF files found. Processing completed.`);
851
- writeLog(
852
- `📝 No more PDF files found. Processing completed at chunk ${chunkNumber}`,
853
- );
854
- break;
855
- }
856
-
857
- console.log(
858
- `� Processing chunk ${chunkNumber}: ${pdfRecords.length} PDF records`,
859
- );
860
- writeLog(
861
- `📊 Processing chunk ${chunkNumber}: ${pdfRecords.length} PDF records`,
862
- );
863
-
864
- // Create progress bar for this chunk
865
- const progressBar = new cliProgress.SingleBar({
866
- format: `🔍 Chunk ${chunkNumber} |{bar}| {percentage}% | {value}/{total} | Detected: {detected} | Errors: {errors}`,
867
- barCompleteChar: '█',
868
- barIncompleteChar: '░',
869
- hideCursor: true,
870
- });
871
-
872
- progressBar.start(pdfRecords.length, 0, { detected: 0, errors: 0 });
873
-
874
- let chunkDetected = 0;
875
- let chunkProcessed = 0;
876
- let chunkErrors = 0;
877
-
878
- // Process files in smaller batches within this chunk
879
- for (let i = 0; i < pdfRecords.length; i += processingBatchSize) {
880
- const batch = pdfRecords.slice(i, i + processingBatchSize);
881
- const updatePromises = [];
882
-
883
- for (const record of batch) {
884
- try {
885
- // Check if file still exists
886
- if (!fs.existsSync(record.original_path)) {
887
- writeLog(
888
- `⚠️ FILE NOT FOUND: ${record.filename} at ${record.original_path}`,
889
- );
890
- updatePromises.push(
891
- supabase
892
- .from('uploader')
893
- .update({
894
- status: 'file-not-found',
895
- message: 'File no longer exists at original path',
896
- })
897
- .eq('id', record.id),
898
- );
899
- chunkErrors++;
900
- totalErrors++;
901
- continue;
902
- }
903
-
904
- // Perform detection
905
- const detection = await detectionService.detectFile(
906
- record.original_path,
907
- );
908
- chunkProcessed++;
909
- totalProcessed++;
910
-
911
- const updateData = {
912
- status: detection.detectedType ? 'detected' : 'not-detected',
913
- document_type: detection.detectedType,
914
- num_pedimento: detection.detectedPedimento,
915
- arela_path: detection.arelaPath,
916
- message: detection.error || null,
917
- };
918
-
919
- // Extract RFC from fields if available
920
- if (detection.fields) {
921
- const rfcField = detection.fields.find(
922
- (f) => f.name === 'rfc' && f.found,
923
- );
924
- if (rfcField) {
925
- updateData.rfc = rfcField.value;
926
- }
927
- }
928
-
929
- if (detection.detectedType) {
930
- chunkDetected++;
931
- totalDetected++;
932
- writeLog(
933
- `✅ DETECTED: ${record.filename} -> ${detection.detectedType} | Pedimento: ${detection.detectedPedimento || 'N/A'} | RFC: ${detection.fields?.rfc || 'N/A'}`,
934
- );
935
- } else {
936
- writeLog(
937
- `⏭️ NOT DETECTED: ${record.filename} - No pedimento-simplificado pattern found`,
938
- );
939
- }
940
-
941
- updatePromises.push(
942
- supabase.from('uploader').update(updateData).eq('id', record.id),
943
- );
944
- } catch (error) {
945
- console.error(
946
- `❌ Error detecting ${record.filename}:`,
947
- error.message,
948
- );
949
- writeLog(`❌ ERROR detecting ${record.filename}: ${error.message}`);
950
- chunkErrors++;
951
- totalErrors++;
952
-
953
- updatePromises.push(
954
- supabase
955
- .from('uploader')
956
- .update({
957
- status: 'detection-error',
958
- message: error.message,
959
- })
960
- .eq('id', record.id),
961
- );
962
- }
963
- }
964
-
965
- // Execute all updates in parallel for this batch
966
- try {
967
- await Promise.all(updatePromises);
968
- } catch (error) {
969
- console.error(
970
- `❌ Error updating batch in chunk ${chunkNumber}:`,
971
- error.message,
972
- );
973
- }
974
-
975
- // Update progress for this chunk
976
- progressBar.update(Math.min(i + processingBatchSize, pdfRecords.length), {
977
- detected: chunkDetected,
978
- errors: chunkErrors,
979
- });
980
- }
981
-
982
- progressBar.stop();
983
-
984
- console.log(
985
- `✅ Chunk ${chunkNumber} completed: ${chunkDetected} detected, ${chunkProcessed} processed, ${chunkErrors} errors`,
986
- );
987
- writeLog(
988
- `✅ Chunk ${chunkNumber} completed: ${chunkDetected} detected, ${chunkProcessed} processed, ${chunkErrors} errors`,
989
- );
990
-
991
- // Move to next chunk
992
- offset += queryBatchSize;
993
- chunkNumber++;
994
-
995
- // If we got fewer records than queryBatchSize, we've reached the end
996
- if (pdfRecords.length < queryBatchSize) {
997
- console.log(
998
- `📝 Reached end of records (chunk had ${pdfRecords.length} records).`,
999
- );
1000
- writeLog(
1001
- `📝 Reached end of records (chunk had ${pdfRecords.length} records)`,
1002
- );
1003
- break;
1004
- }
1005
-
1006
- // Small delay between chunks to avoid overwhelming the database
1007
- await new Promise((resolve) => setTimeout(resolve, 500));
1008
- }
1009
-
1010
- console.log(
1011
- `📊 Phase 2 Summary: ${totalDetected} detected, ${totalProcessed} processed, ${totalErrors} errors`,
1012
- );
1013
-
1014
- // Write comprehensive log summary
1015
- writeLog(
1016
- `📊 PHASE 2 PDF DETECTION COMPLETED - Summary: Detected: ${totalDetected} pedimento-simplificado documents, Processed: ${totalProcessed} PDF files, Errors: ${totalErrors}`,
1017
- );
1018
-
1019
- // Ensure logs are flushed
1020
- flushLogBuffer();
1021
-
1022
- return {
1023
- detectedCount: totalDetected,
1024
- processedCount: totalProcessed,
1025
- errorCount: totalErrors,
1026
- };
1027
- };
1028
-
1029
- const processFilesInBatches = async (
1030
- files,
1031
- batchSize,
1032
- options,
1033
- basePath,
1034
- folder,
1035
- sourcePath,
1036
- processedPaths,
1037
- ) => {
1038
- let totalUploaded = 0;
1039
- let totalDetected = 0;
1040
- let totalOrganized = 0;
1041
- let totalErrors = 0;
1042
- let totalSkipped = 0;
1043
-
1044
- const messageBuffer = [];
1045
-
1046
- const progressBarFormat = options.statsOnly
1047
- ? '📊 Processing [{bar}] {percentage}% | {value}/{total} files | Stats: {successCount} | Errors: {failureCount} | Duplicates: {skippedCount}'
1048
- : '📂 Processing [{bar}] {percentage}% | {value}/{total} files | Success: {successCount} | Errors: {failureCount} | Skipped: {skippedCount}';
1049
-
1050
- const progressBar = new cliProgress.SingleBar({
1051
- format: progressBarFormat,
1052
- barCompleteChar: '█',
1053
- barIncompleteChar: '░',
1054
- hideCursor: true,
1055
- });
1056
-
1057
- progressBar.start(files.length, 0, {
1058
- successCount: 0,
1059
- failureCount: 0,
1060
- skippedCount: 0,
1061
- });
1062
-
1063
- if (options.statsOnly) {
1064
- // OPTIMIZED Stats-only mode - Only read filesystem stats, no file detection
1065
- console.log(
1066
- '📊 Phase 1: Processing files in optimized stats-only mode (no detection)...',
1067
- );
1068
-
1069
- for (let i = 0; i < files.length; i += batchSize) {
1070
- const batch = files.slice(i, i + batchSize);
1071
-
1072
- // OPTIMIZED: Batch read file stats to reduce I/O overhead
1073
- const fileStatsResults = batchReadFileStats(batch);
1074
- const statsFiles = fileStatsResults
1075
- .filter((result) => result.stats !== null) // Only include files with valid stats
1076
- .map((result) => {
1077
- const originalFileName = path.basename(result.path);
1078
-
1079
- return {
1080
- path: result.path,
1081
- originalName: originalFileName,
1082
- stats: result.stats, // Pass pre-computed stats to avoid redundant calls
1083
- };
1084
- });
1085
-
1086
- // Log any files that couldn't be read
1087
- const failedFiles = fileStatsResults.filter(
1088
- (result) => result.error !== null,
1089
- );
1090
- if (failedFiles.length > 0) {
1091
- console.log(
1092
- `⚠️ Could not read stats for ${failedFiles.length} files in batch`,
1093
- );
1094
- failedFiles.forEach((failed) => {
1095
- console.error(` ❌ ${failed.path}: ${failed.error}`);
1096
- });
1097
- }
1098
-
1099
- try {
1100
- const result = await insertStatsOnlyToUploaderTable(
1101
- statsFiles,
1102
- options,
1103
- );
1104
-
1105
- totalUploaded += result.totalInserted;
1106
- totalSkipped += result.totalSkipped;
1107
- totalErrors += failedFiles.length; // Count failed file reads as errors
1108
-
1109
- progressBar.update(Math.min(i + batch.length, files.length), {
1110
- successCount: totalUploaded,
1111
- failureCount: totalErrors,
1112
- skippedCount: totalSkipped,
1113
- });
1114
- } catch (error) {
1115
- console.error(`❌ Error processing stats batch:`, error.message);
1116
- totalErrors += batch.length;
1117
-
1118
- progressBar.update(Math.min(i + batch.length, files.length), {
1119
- successCount: totalUploaded,
1120
- failureCount: totalErrors,
1121
- skippedCount: totalSkipped,
1122
- });
1123
- }
1124
- }
1125
- } else if (apiMode && !options.forceSupabase) {
1126
- // API Mode - Process in batches
1127
- for (let i = 0; i < files.length; i += batchSize) {
1128
- const batch = files.slice(i, i + batchSize);
1129
- let sanitizedRelativePath;
1130
-
1131
- const apiFiles = batch
1132
- .map((file) => {
1133
- const relativePathRaw = path
1134
- .relative(basePath, file)
1135
- .replace(/^[\\/]+/, '')
1136
- .replace(/\\/g, '/');
1137
-
1138
- const pathParts = relativePathRaw.split('/');
1139
- const originalFileName = pathParts[pathParts.length - 1];
1140
- const sanitizedFileName = sanitizeFileName(originalFileName);
1141
- pathParts[pathParts.length - 1] = sanitizedFileName;
1142
- sanitizedRelativePath = pathParts.join('/');
1143
-
1144
- let uploadPath;
1145
-
1146
- // Handle combined folder structure + auto-detection
1147
- if (options.folderStructure && options.autoDetectStructure) {
1148
- // OPTIMIZED: Use cached detection to avoid redundant parsing
1149
- const detection = getCachedPathDetection(file, basePath);
1150
- if (detection.detected) {
1151
- const autoStructure = `${detection.year}/${detection.pedimento}`;
1152
- const combinedStructure = `${options.folderStructure}/${autoStructure}`;
1153
- uploadPath = path.posix.join(
1154
- combinedStructure,
1155
- sanitizedFileName,
1156
- );
1157
- logVerbose(
1158
- `📁 Combined structure: ${options.folderStructure}/${autoStructure} for ${originalFileName} -> ${uploadPath}`,
1159
- );
1160
- } else {
1161
- // Fallback to just custom structure if auto-detection fails
1162
- uploadPath = path.posix.join(
1163
- options.folderStructure,
1164
- sanitizedFileName,
1165
- );
1166
- logVerbose(
1167
- `📁 Custom structure (auto-detection failed): ${uploadPath}`,
1168
- );
1169
- }
1170
- } else if (options.folderStructure) {
1171
- // Use custom folder structure only
1172
- uploadPath = path.posix.join(
1173
- options.folderStructure,
1174
- sanitizedFileName,
1175
- );
1176
- logVerbose(`📁 Custom structure: ${uploadPath}`);
1177
- } else if (options.autoDetectStructure) {
1178
- // Auto-detect structure from path if enabled - OPTIMIZED: Use cached detection
1179
- const detection = getCachedPathDetection(file, basePath);
1180
- if (detection.detected) {
1181
- const autoStructure = `${detection.year}/${detection.pedimento}`;
1182
- uploadPath = path.posix.join(autoStructure, sanitizedFileName);
1183
- console.log(
1184
- `🔍 Auto-detected: ${autoStructure} for ${originalFileName} -> ${uploadPath}`,
1185
- );
1186
- } else {
1187
- uploadPath = options.prefix
1188
- ? path.posix.join(options.prefix, sanitizedRelativePath)
1189
- : sanitizedRelativePath;
1190
- console.log(`📁 Using relative path: ${uploadPath}`);
1191
- }
1192
- } else {
1193
- uploadPath = options.prefix
1194
- ? path.posix.join(options.prefix, sanitizedRelativePath)
1195
- : sanitizedRelativePath;
1196
- console.log(`📁 Using standard path: ${uploadPath}`);
1197
- }
1198
-
1199
- if (processedPaths.has(uploadPath)) {
1200
- totalSkipped++;
1201
- writeLog(`SKIPPED: ${file} -> ${uploadPath}`);
1202
- return null;
1203
- }
1204
-
1205
- return {
1206
- path: file,
1207
- name: sanitizedFileName,
1208
- originalName: originalFileName,
1209
- uploadPath: uploadPath.replace(/\\/g, '/'), // Ensure forward slashes
1210
- contentType: mime.lookup(file) || 'application/octet-stream',
1211
- };
1212
- })
1213
- .filter(Boolean);
1214
-
1215
- if (apiFiles.length > 0) {
1216
- // console.log(`🔄 Processing batch of ${apiFiles.length} files`);
1217
- // apiFiles.forEach(f => console.log(` 📄 ${f.name} -> ${f.uploadPath}`));
1218
-
1219
- try {
1220
- // Use clientPath from options if specified, otherwise construct from detection or folder
1221
- let clientPath = options.clientPath;
1222
-
1223
- if (!clientPath && apiFiles.length > 0) {
1224
- const firstFile = apiFiles[0];
1225
- // OPTIMIZED: Use cached detection to avoid redundant parsing
1226
- const detection = getCachedPathDetection(firstFile.path, basePath);
1227
- if (detection.detected) {
1228
- // clientPath = `${detection.year}/${detection.pedimento}/`;
1229
- clientPath = path
1230
- .resolve(basePath, sanitizedRelativePath)
1231
- .replace(/\\/g, '/');
1232
- } else {
1233
- // Fallback to folder structure if no year/pedimento detected
1234
- clientPath = path.resolve(basePath, folder).replace(/\\/g, '/');
1235
- }
1236
- }
1237
-
1238
- const result = await uploadToApi(apiFiles, {
1239
- ...options,
1240
- clientPath: clientPath,
1241
- });
1242
-
1243
- totalUploaded += result.stats.uploadedCount;
1244
- totalDetected += result.stats.detectedCount;
1245
- totalOrganized += result.stats.organizedCount;
1246
- totalErrors += result.stats.errorCount;
1247
-
1248
- result.uploaded.forEach((upload) => {
1249
- const apiFile = apiFiles.find(
1250
- (f) => f.name === upload.originalName,
1251
- );
1252
- if (apiFile) {
1253
- writeLog(`SUCCESS: ${apiFile.path} -> ${apiFile.uploadPath}`);
1254
- processedPaths.add(apiFile.uploadPath);
1255
- }
1256
- });
1257
-
1258
- // Update status to "file-uploaded" for successfully uploaded files
1259
- if (result.uploaded && result.uploaded.length > 0 && supabase) {
1260
- try {
1261
- const uploadedFilePaths = result.uploaded
1262
- .map((upload) => {
1263
- const apiFile = apiFiles.find(
1264
- (f) =>
1265
- f.name === upload.originalName ||
1266
- f.originalName === upload.originalName,
1267
- );
1268
- return apiFile ? apiFile.path : null;
1269
- })
1270
- .filter(Boolean);
1271
-
1272
- if (uploadedFilePaths.length > 0) {
1273
- await supabase
1274
- .from('uploader')
1275
- .update({ status: 'file-uploaded' })
1276
- .in('original_path', uploadedFilePaths);
1277
-
1278
- console.log(
1279
- ` 📝 Updated status to "file-uploaded" for ${uploadedFilePaths.length} files`,
1280
- );
1281
- }
1282
- } catch (error) {
1283
- console.error(
1284
- ` ⚠️ Error updating status for uploaded files: ${error.message}`,
1285
- );
1286
- }
1287
- }
1288
-
1289
- result.errors.forEach((error) => {
1290
- writeLog(
1291
- `ERROR: ${error.fileName}: ${error.error} (${error.step})`,
1292
- );
1293
- messageBuffer.push(
1294
- `❌ ${error.fileName}: ${error.error} (${error.step})`,
1295
- );
1296
- });
1297
- } catch (error) {
1298
- totalErrors += apiFiles.length;
1299
- apiFiles.forEach((file) => {
1300
- writeLog(`ERROR: ${file.path}: ${error.message}`);
1301
- messageBuffer.push(`❌ ${file.name}: ${error.message}`);
1302
- });
1303
- }
1304
- }
1305
-
1306
- progressBar.update(i + batch.length, {
1307
- successCount: totalUploaded,
1308
- failureCount: totalErrors,
1309
- skippedCount: totalSkipped,
1310
- });
1311
-
1312
- if (i + batchSize < files.length) {
1313
- await new Promise((resolve) => setTimeout(resolve, BATCH_DELAY));
1314
- }
1315
- }
1316
- } else {
1317
- // Direct Supabase mode
1318
- for (let i = 0; i < files.length; i++) {
1319
- const file = files[i];
1320
- try {
1321
- const relativePath = path.relative(basePath, file);
1322
- let uploadPath;
1323
-
1324
- // Handle combined folder structure + auto-detection
1325
- if (options.folderStructure && options.autoDetectStructure) {
1326
- const detection = getCachedPathDetection(file, basePath);
1327
- if (detection.detected) {
1328
- const autoStructure = `${detection.year}/${detection.pedimento}`;
1329
- const combinedStructure = `${options.folderStructure}/${autoStructure}`;
1330
- const fileName = path.basename(file);
1331
- uploadPath = path.join(combinedStructure, fileName);
1332
- console.log(
1333
- `📁 Combined structure: ${options.folderStructure}/${autoStructure} for ${fileName}`,
1334
- );
1335
- } else {
1336
- // Fallback to just custom structure if auto-detection fails
1337
- const fileName = path.basename(file);
1338
- uploadPath = path.join(options.folderStructure, fileName);
1339
- console.log(
1340
- `📁 Custom structure (auto-detection failed): ${uploadPath}`,
1341
- );
1342
- }
1343
- } else if (options.folderStructure) {
1344
- // Use custom folder structure only
1345
- const fileName = path.basename(file);
1346
- uploadPath = path.join(options.folderStructure, fileName);
1347
- console.log(`📁 Custom structure: ${uploadPath}`);
1348
- } else if (options.autoDetectStructure) {
1349
- // Auto-detect structure from path if enabled - OPTIMIZED: Use cached detection
1350
- const detection = getCachedPathDetection(file, basePath);
1351
- if (detection.detected) {
1352
- const autoStructure = `${detection.year}/${detection.pedimento}`;
1353
- const fileName = path.basename(file);
1354
- uploadPath = path.join(autoStructure, fileName);
1355
- } else {
1356
- uploadPath = options.prefix
1357
- ? path.join(options.prefix, relativePath)
1358
- : relativePath;
1359
- }
1360
- } else {
1361
- uploadPath = options.prefix
1362
- ? path.join(options.prefix, relativePath)
1363
- : relativePath;
1364
- }
1365
-
1366
- if (processedPaths.has(uploadPath)) {
1367
- totalSkipped++;
1368
- writeLog(`SKIPPED: ${file} -> ${uploadPath}`);
1369
-
1370
- // Update status to "file-uploaded" for skipped files (they already exist)
1371
- if (supabase) {
1372
- try {
1373
- await supabase
1374
- .from('uploader')
1375
- .update({ status: 'file-uploaded' })
1376
- .eq('original_path', file);
1377
- } catch (error) {
1378
- console.error(
1379
- ` ⚠️ Error updating status for skipped file: ${error.message}`,
1380
- );
1381
- }
1382
- }
1383
- } else {
1384
- await uploadToSupabase(file, uploadPath);
1385
- totalUploaded++;
1386
- writeLog(`SUCCESS: ${file} -> ${uploadPath}`);
1387
- processedPaths.add(uploadPath);
1388
-
1389
- // Update status to "file-uploaded" for successfully uploaded files
1390
- if (supabase) {
1391
- try {
1392
- await supabase
1393
- .from('uploader')
1394
- .update({ status: 'file-uploaded' })
1395
- .eq('original_path', file);
1396
- } catch (error) {
1397
- console.error(
1398
- ` ⚠️ Error updating status for uploaded file: ${error.message}`,
1399
- );
1400
- }
1401
- }
1402
- }
1403
- } catch (error) {
1404
- totalErrors++;
1405
- writeLog(`ERROR: ${file}: ${error.message}`);
1406
- messageBuffer.push(`❌ ${path.basename(file)}: ${error.message}`);
1407
- }
1408
-
1409
- progressBar.update(i + 1, {
1410
- successCount: totalUploaded,
1411
- failureCount: totalErrors,
1412
- skippedCount: totalSkipped,
1413
- });
1414
- }
1415
- }
1416
-
1417
- progressBar.stop();
1418
-
1419
- const errorMessages = messageBuffer.filter((msg) => msg.startsWith('❌'));
1420
- if (errorMessages.length > 0) {
1421
- console.log('\n🚨 Errors encountered during processing:');
1422
- errorMessages.forEach((msg) => console.error(msg));
1423
- }
1424
-
1425
- return {
1426
- successCount: totalUploaded,
1427
- detectedCount: totalDetected,
1428
- organizedCount: totalOrganized,
1429
- failureCount: totalErrors,
1430
- skippedCount: totalSkipped,
1431
- };
1432
- };
1433
-
1434
- /**
1435
- * Upload files to Arela API based on specific RFC values
1436
- */
1437
- const uploadFilesByRfc = async (options = {}) => {
1438
- if (!supabase) {
1439
- console.error('❌ Supabase client not initialized');
1440
- process.exit(1);
1441
- }
1442
-
1443
- if (!API_BASE_URL || !API_TOKEN) {
1444
- console.error(
1445
- '❌ Arela API configuration missing. Please set ARELA_API_URL and ARELA_API_TOKEN environment variables.',
1446
- );
1447
- process.exit(1);
1448
- }
1449
-
1450
- if (!uploadRfcs || uploadRfcs.length === 0) {
1451
- console.error(
1452
- '❌ No RFCs specified. Please set UPLOAD_RFCS environment variable with pipe-separated RFC values.',
1453
- );
1454
- console.error(
1455
- ' Example: UPLOAD_RFCS="RFC123456789|RFC987654321|RFC555444333"',
1456
- );
1457
- process.exit(1);
1458
- }
1459
-
1460
- console.log('🎯 RFC-based Upload Mode');
1461
- console.log(`📋 Target RFCs: ${uploadRfcs.join(', ')}`);
1462
- console.log('🔍 Searching for files to upload...');
1463
-
1464
- // First, count total files for the RFCs to show filtering effect
1465
- const { count: totalRfcFiles, error: countError } = await supabase
1466
- .from('uploader')
1467
- .select('*', { count: 'exact', head: true })
1468
- .in('rfc', uploadRfcs)
1469
- .not('arela_path', 'is', null);
1470
-
1471
- if (countError) {
1472
- console.warn('⚠️ Could not count total RFC files:', countError.message);
1473
- } else {
1474
- console.log(`📊 Total files for specified RFCs: ${totalRfcFiles || 0}`);
1475
- }
1476
-
1477
- // Step 1: Get all pedimento_simplificado records that match the specified RFCs and have arela_path
1478
- console.log(
1479
- '🎯 Finding pedimento_simplificado records for specified RFCs...',
1480
- );
1481
- const { data: pedimentoRfcRecords, error: pedimentoRfcError } = await supabase
1482
- .from('uploader')
1483
- .select('arela_path')
1484
- .eq('document_type', 'pedimento_simplificado')
1485
- .in('rfc', uploadRfcs)
1486
- .not('arela_path', 'is', null);
1487
-
1488
- if (pedimentoRfcError) {
1489
- console.error(
1490
- '❌ Error fetching pedimento RFC records:',
1491
- pedimentoRfcError.message,
1492
- );
1493
- return { processedCount: 0, uploadedCount: 0, errorCount: 1 };
1494
- }
1495
-
1496
- if (!pedimentoRfcRecords || pedimentoRfcRecords.length === 0) {
1497
- console.log(
1498
- 'ℹ️ No pedimento_simplificado records found for the specified RFCs with arela_path',
1499
- );
1500
- return { processedCount: 0, uploadedCount: 0, errorCount: 0 };
1501
- }
1502
-
1503
- // Get unique arela_paths from pedimento records
1504
- const uniqueArelaPaths = [
1505
- ...new Set(pedimentoRfcRecords.map((r) => r.arela_path)),
1506
- ];
1507
- console.log(
1508
- `📋 Found ${pedimentoRfcRecords.length} pedimento records with ${uniqueArelaPaths.length} unique arela_paths for specified RFCs`,
1509
- );
1510
-
1511
- // Step 2: Get all files with these arela_paths that haven't been uploaded yet
1512
- let rfcRecords = [];
1513
- const chunkSize = 50;
1514
-
1515
- for (let i = 0; i < uniqueArelaPaths.length; i += chunkSize) {
1516
- const pathChunk = uniqueArelaPaths.slice(i, i + chunkSize);
1517
-
1518
- const { data: chunkFiles, error: chunkError } = await supabase
1519
- .from('uploader')
1520
- .select('arela_path')
1521
- .in('arela_path', pathChunk)
1522
- .neq('status', 'file-uploaded')
1523
- .not('arela_path', 'is', null);
1524
-
1525
- if (chunkError) {
1526
- console.error(
1527
- '❌ Error fetching files for arela_paths chunk:',
1528
- chunkError.message,
1529
- );
1530
- return { processedCount: 0, uploadedCount: 0, errorCount: 1 };
1531
- }
1532
-
1533
- if (chunkFiles && chunkFiles.length > 0) {
1534
- rfcRecords = rfcRecords.concat(chunkFiles);
1535
- }
1536
- }
1537
-
1538
- if (!rfcRecords || rfcRecords.length === 0) {
1539
- if (totalRfcFiles && totalRfcFiles > 0) {
1540
- console.log(
1541
- `ℹ️ All ${totalRfcFiles} files for the specified RFCs are already uploaded (status: file-uploaded)`,
1542
- );
1543
- console.log(' No new files to upload.');
1544
- } else {
1545
- console.log('ℹ️ No files found for the specified RFCs with arela_path');
1546
- console.log(
1547
- ` Make sure files for RFCs [${uploadRfcs.join(', ')}] have been processed and have arela_path values`,
1548
- );
1549
- }
1550
- return { processedCount: 0, uploadedCount: 0, errorCount: 0 };
1551
- }
1552
-
1553
- // Show filtering effect
1554
- const uploadableArelaPaths = [
1555
- ...new Set(rfcRecords.map((r) => r.arela_path)),
1556
- ];
1557
- const skipped = (totalRfcFiles || 0) - rfcRecords.length;
1558
- if (skipped > 0) {
1559
- console.log(
1560
- `📊 Found ${rfcRecords.length} files ready for upload (${skipped} already uploaded, skipped)`,
1561
- );
1562
- } else {
1563
- console.log(`📊 Found ${rfcRecords.length} files ready for upload`);
1564
- }
1565
-
1566
- console.log(
1567
- `🎯 Found ${uploadableArelaPaths.length} unique arela_path(s) with files ready for upload`,
1568
- );
1569
-
1570
- // Step 3: Get ALL files that have these arela_paths (including supporting documents)
1571
- // Process arela_paths in smaller chunks to avoid URI length limits
1572
- let allRelatedFiles = [];
1573
- const arelaPathChunkSize = 50; // Process 50 arela_paths at a time to avoid URI limits
1574
- const queryBatchSize = 1000;
1575
-
1576
- console.log(
1577
- '📥 Fetching all related files (processing arela_paths in chunks to avoid URI limits)...',
1578
- );
1579
-
1580
- // Process arela_paths in chunks
1581
- for (let i = 0; i < uploadableArelaPaths.length; i += arelaPathChunkSize) {
1582
- const arelaPathChunk = uploadableArelaPaths.slice(
1583
- i,
1584
- i + arelaPathChunkSize,
1585
- );
1586
- console.log(
1587
- ` Processing arela_path chunk ${Math.floor(i / arelaPathChunkSize) + 1}/${Math.ceil(uploadableArelaPaths.length / arelaPathChunkSize)} (${arelaPathChunk.length} paths)`,
1588
- );
1589
-
1590
- // For each chunk of arela_paths, use pagination to get all related files
1591
- let hasMore = true;
1592
- let offset = 0;
1593
-
1594
- while (hasMore) {
1595
- const { data: batch, error: queryError } = await supabase
1596
- .from('uploader')
1597
- .select('id, original_path, arela_path, filename, rfc, document_type')
1598
- .in('arela_path', arelaPathChunk)
1599
- .not('original_path', 'is', null)
1600
- .neq('status', 'file-uploaded')
1601
- .range(offset, offset + queryBatchSize - 1);
1602
-
1603
- if (queryError) {
1604
- console.error(
1605
- `❌ Error fetching related files for chunk ${Math.floor(i / arelaPathChunkSize) + 1}:`,
1606
- queryError.message,
1607
- );
1608
- return { processedCount: 0, uploadedCount: 0, errorCount: 1 };
1609
- }
1610
-
1611
- if (!batch || batch.length === 0) {
1612
- hasMore = false;
1613
- } else {
1614
- allRelatedFiles = allRelatedFiles.concat(batch);
1615
- offset += queryBatchSize;
1616
-
1617
- // If we got less than queryBatchSize, we've reached the end for this chunk
1618
- if (batch.length < queryBatchSize) {
1619
- hasMore = false;
1620
- }
1621
- }
1622
- }
1623
-
1624
- // Small delay between chunks to avoid overwhelming the database
1625
- if (i + arelaPathChunkSize < uploadableArelaPaths.length) {
1626
- await new Promise((resolve) => setTimeout(resolve, 100));
1627
- }
1628
- }
1629
-
1630
- if (!allRelatedFiles || allRelatedFiles.length === 0) {
1631
- console.log('ℹ️ No related files found for the arela_paths');
1632
- return { processedCount: 0, uploadedCount: 0, errorCount: 0 };
1633
- }
1634
-
1635
- console.log(
1636
- `📁 Found ${allRelatedFiles.length} total files to upload (including supporting documents, excluding already uploaded)`,
1637
- );
1638
-
1639
- // Group by RFC and arela_path for better organization
1640
- const filesByRfc = allRelatedFiles.reduce((acc, record) => {
1641
- const rfc = record.rfc || 'No RFC';
1642
- if (!acc[rfc]) {
1643
- acc[rfc] = [];
1644
- }
1645
- acc[rfc].push(record);
1646
- return acc;
1647
- }, {});
1648
-
1649
- console.log('📊 Files by RFC (including supporting documents):');
1650
- for (const [rfc, files] of Object.entries(filesByRfc)) {
1651
- const documentTypes = [
1652
- ...new Set(files.map((f) => f.document_type || 'Unknown')),
1653
- ];
1654
- console.log(
1655
- ` ${rfc}: ${files.length} files (${documentTypes.join(', ')})`,
1656
- );
1657
- }
1658
-
1659
- // Group by arela_path for upload organization
1660
- const filesByPath = allRelatedFiles.reduce((acc, record) => {
1661
- const path = record.arela_path;
1662
- if (!acc[path]) {
1663
- acc[path] = [];
1664
- }
1665
- acc[path].push(record);
1666
- return acc;
1667
- }, {});
1668
-
1669
- console.log('� Files grouped by arela_path:');
1670
- for (const [path, files] of Object.entries(filesByPath)) {
1671
- console.log(` ${path}: ${files.length} files`);
1672
- }
1673
-
1674
- let totalProcessed = 0;
1675
- let totalUploaded = 0;
1676
- let totalErrors = 0;
1677
- let totalSkipped = 0;
1678
-
1679
- // Create progress bar
1680
- const progressBar = new cliProgress.SingleBar({
1681
- format:
1682
- '🚀 Uploading files |{bar}| {percentage}% | {value}/{total} | Uploaded: {uploaded} | Errors: {errors} | Skipped: {skipped}',
1683
- barCompleteChar: '█',
1684
- barIncompleteChar: '░',
1685
- hideCursor: true,
1686
- });
1687
-
1688
- if (options.showProgress !== false) {
1689
- progressBar.start(allRelatedFiles.length, 0, {
1690
- uploaded: 0,
1691
- errors: 0,
1692
- skipped: 0,
1693
- });
1694
- }
1695
-
1696
- const batchSize = parseInt(options.batchSize) || 10;
1697
- console.log(`📦 Processing in batches of ${batchSize} files`);
1698
-
1699
- // Process files in batches
1700
- for (let i = 0; i < allRelatedFiles.length; i += batchSize) {
1701
- const batch = allRelatedFiles.slice(i, i + batchSize);
1702
- const batchNumber = Math.floor(i / batchSize) + 1;
1703
- const totalBatches = Math.ceil(allRelatedFiles.length / batchSize);
1704
-
1705
- console.log(
1706
- `\n📦 Processing batch ${batchNumber}/${totalBatches} (${batch.length} files)`,
1707
- );
1708
-
1709
- // Prepare files for upload
1710
- const filesToUpload = [];
1711
-
1712
- for (const record of batch) {
1713
- totalProcessed++;
1714
-
1715
- try {
1716
- const originalPath = record.original_path;
1717
-
1718
- // Check if file exists
1719
- if (!fs.existsSync(originalPath)) {
1720
- console.log(` ⚠️ File not found: ${originalPath}`);
1721
- totalSkipped++;
1722
- continue;
1723
- }
1724
-
1725
- // OPTIMIZED: Read file and get size from buffer instead of separate fs.statSync call
1726
- const fileBuffer = fs.readFileSync(originalPath);
1727
-
1728
- filesToUpload.push({
1729
- path: originalPath,
1730
- buffer: fileBuffer,
1731
- size: fileBuffer.length, // Get size from buffer instead of fs.statSync
1732
- name: record.filename,
1733
- arelaPath: record.arela_path,
1734
- rfc: record.rfc,
1735
- documentType: record.document_type,
1736
- });
1737
- } catch (error) {
1738
- console.error(
1739
- ` ❌ Error reading file ${record.original_path}:`,
1740
- error.message,
1741
- );
1742
- totalErrors++;
1743
- }
1744
-
1745
- if (options.showProgress !== false) {
1746
- progressBar.update(totalProcessed, {
1747
- uploaded: totalUploaded,
1748
- errors: totalErrors,
1749
- skipped: totalSkipped,
1750
- });
1751
- }
1752
- }
1753
-
1754
- // Upload the batch if we have files
1755
- if (filesToUpload.length > 0) {
1756
- try {
1757
- console.log(
1758
- ` 🚀 Uploading ${filesToUpload.length} files to Arela API...`,
1759
- );
1760
-
1761
- const formData = new FormData();
1762
-
1763
- // Add files to form data
1764
- filesToUpload.forEach((file, index) => {
1765
- formData.append(`files`, file.buffer, {
1766
- filename: file.name,
1767
- contentType: mime.lookup(file.name) || 'application/octet-stream',
1768
- });
1769
- });
1770
-
1771
- // Instead of using per-file folder structures, we'll group by arela_path and upload separately
1772
- // Group files by their arela_path to upload them in correct structure
1773
- const filesByPath = filesToUpload.reduce((acc, file) => {
1774
- const path = file.arelaPath.replace(/\/$/, '');
1775
- if (!acc[path]) {
1776
- acc[path] = [];
1777
- }
1778
- acc[path].push(file);
1779
- return acc;
1780
- }, {});
1781
-
1782
- // Upload each group separately with its folder structure
1783
- for (const [arelaPath, pathFiles] of Object.entries(filesByPath)) {
1784
- const pathFormData = new FormData();
1785
-
1786
- pathFiles.forEach((file) => {
1787
- pathFormData.append('files', file.buffer, {
1788
- filename: file.name,
1789
- contentType: mime.lookup(file.name) || 'application/octet-stream',
1790
- });
1791
- });
1792
-
1793
- // Set folder structure for this group - concatenate custom prefix with arela_path
1794
- const folderStructure = options.folderStructure
1795
- ? `${options.folderStructure}/${arelaPath}`
1796
- .replace(/\/+/g, '/')
1797
- .replace(/\/$/, '')
1798
- : arelaPath;
1799
- pathFormData.append('folderStructure', folderStructure);
1800
- pathFormData.append('autoDetect', 'true');
1801
- pathFormData.append('autoOrganize', 'false');
1802
- pathFormData.append('batchSize', String(pathFiles.length));
1803
- pathFormData.append('clientVersion', packageVersion);
1804
- if (bucket) {
1805
- pathFormData.append('bucket', bucket);
1806
- }
1807
-
1808
- console.log(
1809
- ` 📁 Uploading ${pathFiles.length} files to: ${folderStructure}`,
1810
- );
1811
-
1812
- const response = await fetch(
1813
- `${API_BASE_URL}/api/storage/batch-upload-and-process`,
1814
- {
1815
- method: 'POST',
1816
- headers: {
1817
- 'x-api-key': API_TOKEN,
1818
- },
1819
- body: pathFormData,
1820
- },
1821
- );
1822
-
1823
- if (!response.ok) {
1824
- const errorText = await response.text();
1825
- throw new Error(`HTTP ${response.status}: ${errorText}`);
1826
- }
1827
-
1828
- const result = await response.json();
1829
-
1830
- // Check if upload was successful based on stats rather than success field
1831
- const isSuccessful =
1832
- result.stats &&
1833
- result.stats.uploadedCount > 0 &&
1834
- result.stats.errorCount === 0;
1835
-
1836
- if (isSuccessful) {
1837
- console.log(
1838
- ` ✅ Group uploaded: ${result.stats.uploadedCount} files to ${folderStructure}`,
1839
- );
1840
- totalUploaded += result.stats.uploadedCount;
1841
-
1842
- if (result.stats.detectedCount > 0) {
1843
- console.log(
1844
- ` 🔍 Files detected: ${result.stats.detectedCount}`,
1845
- );
1846
- }
1847
- if (result.stats.organizedCount > 0) {
1848
- console.log(
1849
- ` 📁 Files organized: ${result.stats.organizedCount}`,
1850
- );
1851
- }
1852
-
1853
- // Update status to "file-uploaded" for successfully uploaded files
1854
- try {
1855
- const uploadedFilePaths = pathFiles.map((file) => file.path);
1856
- await supabase
1857
- .from('uploader')
1858
- .update({ status: 'file-uploaded' })
1859
- .in('original_path', uploadedFilePaths);
1860
-
1861
- console.log(
1862
- ` 📝 Updated status to "file-uploaded" for ${uploadedFilePaths.length} files`,
1863
- );
1864
- } catch (error) {
1865
- console.error(
1866
- ` ⚠️ Error updating status for uploaded files: ${error.message}`,
1867
- );
1868
- }
1869
- } else {
1870
- console.error(` ❌ Upload failed for ${folderStructure}:`);
1871
- if (result.errors && result.errors.length > 0) {
1872
- result.errors.forEach((error) => {
1873
- console.error(` - ${error.fileName}: ${error.error}`);
1874
- });
1875
- }
1876
- totalErrors += pathFiles.length;
1877
- }
1878
-
1879
- // Handle files that already exist (usually indicated in result.uploaded or result.skipped)
1880
- if (result.uploaded && result.uploaded.length > 0) {
1881
- try {
1882
- const alreadyUploadedPaths = result.uploaded
1883
- .filter(
1884
- (upload) =>
1885
- upload.status === 'already_exists' || upload.alreadyExists,
1886
- )
1887
- .map((upload) => {
1888
- // Find the corresponding file path from pathFiles
1889
- const matchingFile = pathFiles.find(
1890
- (f) =>
1891
- f.name === upload.fileName ||
1892
- f.name === upload.originalName,
1893
- );
1894
- return matchingFile ? matchingFile.path : null;
1895
- })
1896
- .filter(Boolean);
1897
-
1898
- if (alreadyUploadedPaths.length > 0) {
1899
- await supabase
1900
- .from('uploader')
1901
- .update({ status: 'file-uploaded' })
1902
- .in('original_path', alreadyUploadedPaths);
1903
-
1904
- console.log(
1905
- ` 📝 Updated status to "file-uploaded" for ${alreadyUploadedPaths.length} already existing files`,
1906
- );
1907
- }
1908
- } catch (error) {
1909
- console.error(
1910
- ` ⚠️ Error updating status for already existing files: ${error.message}`,
1911
- );
1912
- }
1913
- }
1914
-
1915
- // Small delay between path groups
1916
- await new Promise((resolve) => setTimeout(resolve, 100));
1917
- }
1918
- } catch (error) {
1919
- console.error(
1920
- ` ❌ Error uploading batch ${batchNumber}:`,
1921
- error.message,
1922
- );
1923
- totalErrors += filesToUpload.length;
1924
- }
1925
- }
1926
-
1927
- // Small delay between batches
1928
- if (i + batchSize < allRelatedFiles.length) {
1929
- await new Promise((resolve) => setTimeout(resolve, BATCH_DELAY));
1930
- }
1931
- }
1932
-
1933
- if (options.showProgress !== false) {
1934
- progressBar.stop();
1935
- }
1936
-
1937
- console.log(`\n${'='.repeat(60)}`);
1938
- console.log(`🎯 RFC-BASED UPLOAD COMPLETED`);
1939
- console.log(`${'='.repeat(60)}`);
1940
- console.log(` 📋 Files processed: ${totalProcessed}`);
1941
- console.log(` ✅ Files uploaded: ${totalUploaded}`);
1942
- console.log(` ⏭️ Files skipped: ${totalSkipped}`);
1943
- console.log(` ❌ Errors: ${totalErrors}`);
1944
- console.log(`${'='.repeat(60)}\n`);
1945
-
1946
- return {
1947
- processedCount: totalProcessed,
1948
- uploadedCount: totalUploaded,
1949
- skippedCount: totalSkipped,
1950
- errorCount: totalErrors,
1951
- };
1952
- };
1953
-
1954
- /**
1955
- * Propagate arela_path from pedimento_simplificado records to related files with same base path
1956
- */
1957
- const propagateArelaPath = async (options = {}) => {
1958
- if (!supabase) {
1959
- console.error('❌ Supabase client not initialized');
1960
- process.exit(1);
1961
- }
1962
-
1963
- console.log('🔍 Finding pedimento_simplificado records with arela_path...');
1964
- writeLog('🔍 Starting arela_path propagation process');
1965
-
1966
- // Get all pedimento_simplificado records that have arela_path
1967
- const { data: pedimentoRecords, error: pedimentoError } = await supabase
1968
- .from('uploader')
1969
- .select('id, original_path, arela_path, filename')
1970
- .eq('document_type', 'pedimento_simplificado')
1971
- .not('arela_path', 'is', null);
1972
-
1973
- if (pedimentoError) {
1974
- console.error(
1975
- '❌ Error fetching pedimento records:',
1976
- pedimentoError.message,
1977
- );
1978
- return { processedCount: 0, updatedCount: 0, errorCount: 1 };
1979
- }
1980
-
1981
- if (!pedimentoRecords || pedimentoRecords.length === 0) {
1982
- console.log('ℹ️ No pedimento_simplificado records with arela_path found');
1983
- writeLog('ℹ️ No pedimento_simplificado records with arela_path found');
1984
- return { processedCount: 0, updatedCount: 0, errorCount: 0 };
1985
- }
1986
-
1987
- console.log(
1988
- `📋 Found ${pedimentoRecords.length} pedimento records with arela_path`,
1989
- );
1990
- writeLog(
1991
- `📋 Found ${pedimentoRecords.length} pedimento records with arela_path to process`,
1992
- );
1993
-
1994
- let totalProcessed = 0;
1995
- let totalUpdated = 0;
1996
- let totalErrors = 0;
1997
-
1998
- // Create progress bar
1999
- const progressBar = new cliProgress.SingleBar({
2000
- format:
2001
- '🔄 Propagating paths |{bar}| {percentage}% | {value}/{total} | Updated: {updated} | Errors: {errors}',
2002
- barCompleteChar: '█',
2003
- barIncompleteChar: '░',
2004
- hideCursor: true,
2005
- });
2006
-
2007
- if (options.showProgress !== false) {
2008
- progressBar.start(pedimentoRecords.length, 0, {
2009
- updated: 0,
2010
- errors: 0,
2011
- });
2012
- }
2013
-
2014
- // Process each pedimento record
2015
- for (const pedimento of pedimentoRecords) {
2016
- try {
2017
- totalProcessed++;
2018
-
2019
- // Extract base path from original_path (remove filename)
2020
- const basePath = path.dirname(pedimento.original_path);
2021
-
2022
- console.log(`\n🔍 Processing: ${pedimento.filename}`);
2023
- console.log(` 📁 Base path: ${basePath}`);
2024
- writeLog(
2025
- `🔍 Processing pedimento: ${pedimento.filename} | Base path: ${basePath}`,
2026
- );
2027
-
2028
- // Extract folder part from existing arela_path by removing the filename
2029
- const existingPath = pedimento.arela_path;
2030
- const folderArelaPath = existingPath.includes('/')
2031
- ? existingPath.substring(0, existingPath.lastIndexOf('/')) + '/'
2032
- : existingPath.endsWith('/')
2033
- ? existingPath
2034
- : existingPath + '/';
2035
-
2036
- console.log(` 🎯 Original arela path: ${existingPath}`);
2037
- console.log(` 📁 Folder arela path: ${folderArelaPath}`);
2038
-
2039
- // Find all files with the same base path that don't have arela_path yet
2040
- const { data: relatedFiles, error: relatedError } = await supabase
2041
- .from('uploader')
2042
- .select('id, filename, original_path')
2043
- .like('original_path', `${basePath}%`)
2044
- .is('arela_path', null)
2045
- .neq('id', pedimento.id); // Exclude the pedimento itself
2046
-
2047
- if (relatedError) {
2048
- console.error(
2049
- `❌ Error finding related files for ${pedimento.filename}:`,
2050
- relatedError.message,
2051
- );
2052
- totalErrors++;
2053
- continue;
2054
- }
2055
-
2056
- if (!relatedFiles || relatedFiles.length === 0) {
2057
- console.log(` ℹ️ No related files found needing arela_path update`);
2058
- writeLog(`ℹ️ No related files found for ${pedimento.filename}`);
2059
- continue;
2060
- }
2061
-
2062
- console.log(
2063
- ` 📄 Found ${relatedFiles.length} related files to update:`,
2064
- );
2065
- writeLog(
2066
- `📄 Found ${relatedFiles.length} related files to update for ${pedimento.filename}`,
2067
- );
2068
-
2069
- // Show first 10 files, then indicate if there are more
2070
- const filesToShow = relatedFiles.slice(0, 10);
2071
- filesToShow.forEach((file) => {
2072
- console.log(` - ${file.filename}`);
2073
- });
2074
-
2075
- if (relatedFiles.length > 10) {
2076
- console.log(` ... and ${relatedFiles.length - 10} more files`);
2077
- }
2078
-
2079
- // Process files in batches to avoid URI length limitations
2080
- const BATCH_SIZE = 50; // Process 50 files at a time
2081
- const fileIds = relatedFiles.map((f) => f.id);
2082
- let batchErrors = 0;
2083
- let batchUpdated = 0;
2084
-
2085
- console.log(
2086
- ` 🔄 Processing ${relatedFiles.length} files in batches of ${BATCH_SIZE}...`,
2087
- );
2088
-
2089
- for (let i = 0; i < fileIds.length; i += BATCH_SIZE) {
2090
- const batchIds = fileIds.slice(i, i + BATCH_SIZE);
2091
- const batchNumber = Math.floor(i / BATCH_SIZE) + 1;
2092
- const totalBatches = Math.ceil(fileIds.length / BATCH_SIZE);
2093
-
2094
- console.log(
2095
- ` 📦 Batch ${batchNumber}/${totalBatches}: Updating ${batchIds.length} files...`,
2096
- );
2097
-
2098
- try {
2099
- const { error: updateError } = await supabase
2100
- .from('uploader')
2101
- .update({ arela_path: folderArelaPath })
2102
- .in('id', batchIds);
2103
-
2104
- if (updateError) {
2105
- console.error(
2106
- ` ❌ Error in batch ${batchNumber}:`,
2107
- updateError.message,
2108
- );
2109
- batchErrors++;
2110
- } else {
2111
- console.log(
2112
- ` ✅ Batch ${batchNumber} completed: ${batchIds.length} files updated`,
2113
- );
2114
- batchUpdated += batchIds.length;
2115
- }
2116
- } catch (error) {
2117
- console.error(
2118
- ` ❌ Exception in batch ${batchNumber}:`,
2119
- error.message,
2120
- );
2121
- batchErrors++;
2122
- }
2123
-
2124
- // Small delay between batches to avoid overwhelming the database
2125
- if (i + BATCH_SIZE < fileIds.length) {
2126
- await new Promise((resolve) => setTimeout(resolve, 100));
2127
- }
2128
- }
2129
-
2130
- if (batchErrors > 0) {
2131
- console.error(
2132
- `❌ ${batchErrors} batch(es) failed for ${pedimento.filename}`,
2133
- );
2134
- writeLog(
2135
- `❌ ${batchErrors} batch(es) failed for ${pedimento.filename}`,
2136
- );
2137
- totalErrors++;
2138
- } else {
2139
- console.log(` 🎯 Successfully updated ${batchUpdated} related files`);
2140
- writeLog(
2141
- `✅ Successfully updated ${batchUpdated} related files for ${pedimento.filename} -> ${folderArelaPath}`,
2142
- );
2143
- totalUpdated += batchUpdated;
2144
- }
2145
- } catch (error) {
2146
- console.error(
2147
- `❌ Error processing ${pedimento.filename}:`,
2148
- error.message,
2149
- );
2150
- writeLog(`❌ Error processing ${pedimento.filename}: ${error.message}`);
2151
- totalErrors++;
2152
- }
2153
-
2154
- if (options.showProgress !== false) {
2155
- progressBar.update(totalProcessed, {
2156
- updated: totalUpdated,
2157
- errors: totalErrors,
2158
- });
2159
- }
2160
- }
2161
-
2162
- if (options.showProgress !== false) {
2163
- progressBar.stop();
2164
- }
2165
-
2166
- console.log(`\n${'='.repeat(60)}`);
2167
- console.log(`🎯 ARELA PATH PROPAGATION COMPLETED`);
2168
- console.log(`${'='.repeat(60)}`);
2169
- console.log(` 📋 Pedimento records processed: ${totalProcessed}`);
2170
- console.log(` ✅ Related files updated: ${totalUpdated}`);
2171
- console.log(` ❌ Errors: ${totalErrors}`);
2172
- console.log(`${'='.repeat(60)}\n`);
2173
-
2174
- // Write comprehensive log summary
2175
- writeLog(
2176
- `🎯 ARELA PATH PROPAGATION COMPLETED - Summary: Processed: ${totalProcessed} pedimento records, Updated: ${totalUpdated} related files, Errors: ${totalErrors}`,
2177
- );
2178
-
2179
- // Ensure logs are flushed
2180
- flushLogBuffer();
2181
-
2182
- return {
2183
- processedCount: totalProcessed,
2184
- updatedCount: totalUpdated,
2185
- errorCount: totalErrors,
2186
- };
2187
- };
2188
-
2189
- /**
2190
- * Helper function to query files that need to be uploaded
2191
- * These are files that have been detected but not yet uploaded
2192
- * Uses the same RFC filtering logic as uploadFilesByRfc for consistency
2193
- */
2194
- const getFilesReadyForUpload = async (options = {}) => {
2195
- if (!supabase) {
2196
- throw new Error('Supabase client not initialized');
2197
- }
2198
-
2199
- console.log('🔍 Querying files ready for upload...');
2200
-
2201
- // Check if UPLOAD_RFCS is configured
2202
- if (!uploadRfcs || uploadRfcs.length === 0) {
2203
- console.log(
2204
- 'ℹ️ No UPLOAD_RFCS configured. Please set UPLOAD_RFCS environment variable to see files ready for upload.',
2205
- );
2206
- console.log(
2207
- ' Example: UPLOAD_RFCS="RFC123456789|RFC987654321|RFC555444333"',
2208
- );
2209
- return [];
2210
- }
2211
-
2212
- console.log(`🎯 Using RFC filter: ${uploadRfcs.join(', ')}`);
2213
-
2214
- // Step 1: Find pedimento_simplificado documents for the specified RFCs that have arela_path
2215
- console.log(
2216
- '🎯 Finding pedimento_simplificado documents for specified RFCs with arela_path...',
2217
- );
2218
- const { data: pedimentoRecords, error: pedimentoError } = await supabase
2219
- .from('uploader')
2220
- .select('arela_path')
2221
- .eq('document_type', 'pedimento_simplificado')
2222
- .in('rfc', uploadRfcs)
2223
- .not('arela_path', 'is', null);
2224
-
2225
- if (pedimentoError) {
2226
- throw new Error(
2227
- `Error querying pedimento_simplificado records: ${pedimentoError.message}`,
2228
- );
2229
- }
2230
-
2231
- if (!pedimentoRecords || pedimentoRecords.length === 0) {
2232
- console.log('ℹ️ No pedimento_simplificado records with arela_path found');
2233
- return [];
2234
- }
2235
-
2236
- // Get unique arela_paths
2237
- const uniqueArelaPaths = [
2238
- ...new Set(pedimentoRecords.map((r) => r.arela_path)),
2239
- ];
2240
- console.log(
2241
- `📋 Found ${pedimentoRecords.length} pedimento records with ${uniqueArelaPaths.length} unique arela_paths`,
2242
- );
2243
-
2244
- // Step 2: Find all related files with these arela_paths that haven't been uploaded yet
2245
- console.log('🔍 Finding all related files that need to be uploaded...');
2246
-
2247
- // Process arela_paths in chunks to avoid URI length limits
2248
- let allReadyFiles = [];
2249
- const chunkSize = 50;
2250
-
2251
- for (let i = 0; i < uniqueArelaPaths.length; i += chunkSize) {
2252
- const pathChunk = uniqueArelaPaths.slice(i, i + chunkSize);
2253
-
2254
- const { data: chunkFiles, error: chunkError } = await supabase
2255
- .from('uploader')
2256
- .select(
2257
- 'id, original_path, arela_path, filename, rfc, document_type, status',
2258
- )
2259
- .in('arela_path', pathChunk)
2260
- .neq('status', 'file-uploaded')
2261
- .not('original_path', 'is', null);
2262
-
2263
- if (chunkError) {
2264
- throw new Error(
2265
- `Error querying files for arela_paths chunk: ${chunkError.message}`,
2266
- );
2267
- }
2268
-
2269
- if (chunkFiles && chunkFiles.length > 0) {
2270
- allReadyFiles = allReadyFiles.concat(chunkFiles);
2271
- }
2272
- }
2273
-
2274
- const readyFiles = allReadyFiles;
2275
-
2276
- console.log(`📋 Found ${readyFiles?.length || 0} files ready for upload`);
2277
-
2278
- if (readyFiles && readyFiles.length > 0) {
2279
- // Group by document type for summary
2280
- const byDocType = readyFiles.reduce((acc, file) => {
2281
- const docType = file.document_type || 'Unknown';
2282
- acc[docType] = (acc[docType] || 0) + 1;
2283
- return acc;
2284
- }, {});
2285
-
2286
- console.log('📊 Files by document type:');
2287
- for (const [docType, count] of Object.entries(byDocType)) {
2288
- console.log(` ${docType}: ${count} files`);
2289
- }
2290
-
2291
- // Group by RFC
2292
- const byRfc = readyFiles.reduce((acc, file) => {
2293
- const rfc = file.rfc || 'No RFC';
2294
- acc[rfc] = (acc[rfc] || 0) + 1;
2295
- return acc;
2296
- }, {});
2297
-
2298
- console.log('📊 Files by RFC:');
2299
- for (const [rfc, count] of Object.entries(byRfc)) {
2300
- console.log(` ${rfc}: ${count} files`);
2301
- }
2302
- }
2303
-
2304
- return readyFiles || [];
2305
- };
2306
-
2307
- program
2308
- .name('arela-uploader')
2309
- .description(
2310
- 'CLI to upload folders to Arela API or Supabase Storage with automatic processing\n\n' +
2311
- 'Status workflow:\n' +
2312
- ' fs-stats → detected → file-uploaded\n' +
2313
- ' ├─ Phase 1: --stats-only (collects filesystem stats, status: fs-stats)\n' +
2314
- ' ├─ Phase 2: --detect-pdfs (detects document types, status: detected)\n' +
2315
- ' ├─ Phase 3: --propagate-arela-path (organizes files by pedimento)\n' +
2316
- ' └─ Phase 4: --upload-by-rfc (uploads files, status: file-uploaded)\n\n' +
2317
- 'Use --query-ready-files to see files ready for upload (status: detected with arela_path)',
2318
- )
2319
- .option('-v, --version', 'output the version number')
2320
- .option('-p, --prefix <prefix>', 'Prefix path in bucket', '')
2321
- .option('-b, --bucket <bucket>', 'Bucket name override')
2322
- .option('--force-supabase', 'Force direct Supabase upload (skip API)')
2323
- .option(
2324
- '--no-auto-detect',
2325
- 'Disable automatic file detection (API mode only)',
2326
- )
2327
- .option(
2328
- '--no-auto-organize',
2329
- 'Disable automatic file organization (API mode only)',
2330
- )
2331
- .option(
2332
- '-c, --concurrency <number>',
2333
- 'Files per batch for processing (default: 10)',
2334
- '10',
2335
- )
2336
- .option('--batch-size <number>', 'API batch size (default: 10)', '10')
2337
- .option('--show-stats', 'Show detailed processing statistics')
2338
- .option(
2339
- '--folder-structure <structure>',
2340
- 'Custom folder structure (e.g., "2024/4023260" or "cliente1/pedimentos")',
2341
- )
2342
- .option(
2343
- '--auto-detect-structure',
2344
- 'Automatically detect year/pedimento from file paths',
2345
- )
2346
- .option('--client-path <path>', 'Client path for metadata tracking')
2347
- .option(
2348
- '--stats-only',
2349
- 'Phase 1: Only read filesystem stats and insert to database (no file reading or detection)',
2350
- )
2351
- .option('--no-detect', 'Disable document type detection in stats-only mode')
2352
- .option(
2353
- '--detect-pdfs',
2354
- 'Phase 2: Process PDF files in database for pedimento-simplificado detection',
2355
- )
2356
- .option(
2357
- '--propagate-arela-path',
2358
- 'Phase 3: Propagate arela_path from pedimento_simplificado records to related files with same base path',
2359
- )
2360
- .option(
2361
- '--upload-by-rfc',
2362
- 'Phase 4: Upload files to Arela API based on RFC values from UPLOAD_RFCS environment variable',
2363
- )
2364
- .option(
2365
- '--run-all-phases',
2366
- 'Run all 4 phases in sequence: stats → detect → propagate → upload',
2367
- )
2368
- .option(
2369
- '--query-ready-files',
2370
- 'Query and display files that are ready for upload (have been detected but not uploaded)',
2371
- )
2372
- .action(async (options) => {
2373
- if (options.version) {
2374
- console.log(packageVersion);
2375
- process.exit(0);
2376
- }
2377
-
2378
- // Handle detect-pdfs option (Phase 2)
2379
- if (options.detectPdfs) {
2380
- console.log('🔍 Starting Phase 2: PDF Detection');
2381
- await checkCredentials(true); // Force Supabase mode
2382
-
2383
- const result = await detectPedimentosInDatabase({
2384
- batchSize: parseInt(options.batchSize) || 10,
2385
- });
2386
-
2387
- console.log(
2388
- `✅ Phase 2 Complete: ${result.detectedCount} detected, ${result.errorCount} errors`,
2389
- );
2390
- return;
2391
- }
2392
-
2393
- // Handle query-ready-files option
2394
- if (options.queryReadyFiles) {
2395
- await checkCredentials(true); // Force Supabase mode
2396
-
2397
- const readyFiles = await getFilesReadyForUpload();
2398
-
2399
- if (readyFiles.length === 0) {
2400
- console.log('ℹ️ No files are currently ready for upload');
2401
- console.log(
2402
- ' Tip: Run --detect-pdfs and --propagate-arela-path first to prepare files for upload',
2403
- );
2404
- } else {
2405
- console.log(`\n📋 ${readyFiles.length} files are ready for upload!`);
2406
- console.log(' Use --upload-by-rfc to upload them to Arela API');
2407
- }
2408
-
2409
- return;
2410
- }
2411
-
2412
- // Handle run-all-phases option
2413
- if (options.runAllPhases) {
2414
- console.log('🚀 Starting all 4 phases in sequence...');
2415
- await checkCredentials(true); // Force Supabase mode
2416
-
2417
- // Phase 1: Stats collection
2418
- console.log('\n📊 === PHASE 1: Filesystem Stats ===');
2419
- options.statsOnly = true;
2420
- // Continue with normal processing to run Phase 1
2421
-
2422
- // The rest will be handled after Phase 1 completes
2423
- }
2424
-
2425
- // Handle propagate-arela-path option
2426
- if (options.propagateArelaPath) {
2427
- // Initialize Supabase credentials for propagation
2428
- await checkCredentials(true); // Force Supabase mode
2429
-
2430
- const result = await propagateArelaPath({
2431
- showProgress: options.showStats || true,
2432
- });
2433
-
2434
- if (result.errorCount > 0) {
2435
- process.exit(1);
2436
- }
2437
- return;
2438
- }
2439
-
2440
- // Handle upload-by-rfc option
2441
- if (options.uploadByRfc) {
2442
- // RFC upload needs both Supabase (for database queries) and API (for uploads)
2443
- await checkCredentials(false); // Initialize API mode
2444
-
2445
- // Also initialize Supabase for database queries
2446
- if (!supabase) {
2447
- if (!supabaseUrl || !supabaseKey) {
2448
- console.error(
2449
- '❌ RFC upload requires Supabase credentials for database queries.',
2450
- );
2451
- console.error(
2452
- ' Please set SUPABASE_URL and SUPABASE_KEY environment variables.',
2453
- );
2454
- process.exit(1);
2455
- }
2456
-
2457
- supabase = createClient(supabaseUrl, supabaseKey);
2458
- console.log('✅ Connected to Supabase for database queries');
2459
- }
2460
-
2461
- const result = await uploadFilesByRfc({
2462
- showProgress: options.showStats || true,
2463
- batchSize: parseInt(options.batchSize) || 10,
2464
- folderStructure: options.folderStructure,
2465
- });
2466
-
2467
- if (result.errorCount > 0) {
2468
- process.exit(1);
2469
- }
2470
- return;
2471
- }
2472
-
2473
- // Initialize credentials with force supabase flag (for stats mode, always need Supabase)
2474
- await checkCredentials(options.forceSupabase || options.statsOnly);
2475
-
2476
- if (!basePath || !sources || sources.length === 0) {
2477
- console.error(
2478
- '⚠️ UPLOAD_BASE_PATH or UPLOAD_SOURCES not defined in environment variables.',
2479
- );
2480
- process.exit(1);
2481
- }
2482
-
2483
- const batchSize = parseInt(options.batchSize) || 10;
2484
- const concurrency = parseInt(options.concurrency) || 10;
2485
-
2486
- if (options.statsOnly) {
2487
- console.log(
2488
- '📊 Mode: Stats Only - Reading file stats and inserting to uploader table',
2489
- );
2490
- console.log('🚫 Files will NOT be uploaded');
2491
- if (options.detect !== false) {
2492
- console.log('🔍 Document type detection ENABLED for supported files');
2493
- } else {
2494
- console.log('🔍 Document type detection DISABLED');
2495
- }
2496
- } else {
2497
- console.log(
2498
- `🚀 Mode: ${apiMode ? 'Arela API with auto-processing' : 'Direct Supabase'}`,
2499
- );
2500
- }
2501
- console.log(`📦 Batch size: ${batchSize}`);
2502
- console.log(`⚡ Concurrency: ${concurrency}`);
2503
-
2504
- const processedPaths = getProcessedPaths();
2505
- let globalSuccess = 0;
2506
- let globalDetected = 0;
2507
- let globalOrganized = 0;
2508
- let globalFailure = 0;
2509
- let globalSkipped = 0;
2510
-
2511
- for (const folder of sources) {
2512
- const sourcePath = path.resolve(basePath, folder).replace(/\\/g, '/');
2513
- console.log(`📂 Processing folder: ${sourcePath}`);
2514
-
2515
- try {
2516
- const stats = fs.statSync(sourcePath);
2517
- const files = stats.isDirectory()
2518
- ? await globby([`${sourcePath}/**/*`], { onlyFiles: true })
2519
- : [sourcePath];
2520
-
2521
- console.log(`📊 Found ${files.length} files to process`);
2522
-
2523
- const result = await processFilesInBatches(
2524
- files,
2525
- batchSize,
2526
- options,
2527
- basePath,
2528
- folder,
2529
- sourcePath,
2530
- processedPaths,
2531
- );
2532
-
2533
- globalSuccess += result.successCount;
2534
- globalDetected += result.detectedCount || 0;
2535
- globalOrganized += result.organizedCount || 0;
2536
- globalFailure += result.failureCount;
2537
- globalSkipped += result.skippedCount;
2538
-
2539
- console.log(`\n📦 Summary for ${folder}:`);
2540
- if (options.statsOnly) {
2541
- console.log(` 📊 Stats recorded: ${result.successCount}`);
2542
- } else {
2543
- console.log(` ✅ Uploaded: ${result.successCount}`);
2544
- if (apiMode) {
2545
- console.log(` 🔍 Detected: ${result.detectedCount || 0}`);
2546
- console.log(` 📁 Organized: ${result.organizedCount || 0}`);
2547
- }
2548
- }
2549
- console.log(` ❌ Errors: ${result.failureCount}`);
2550
- if (options.statsOnly) {
2551
- console.log(` ⏭️ Duplicates: ${result.skippedCount}`);
2552
- } else {
2553
- console.log(` ⏭️ Skipped: ${result.skippedCount}`);
2554
- }
2555
-
2556
- writeLog(
2557
- `📦 Summary for ${folder}: Success: ${result.successCount}, Detected: ${result.detectedCount || 0}, Organized: ${result.organizedCount || 0}, Errors: ${result.failureCount}, ${options.statsOnly ? 'Duplicates' : 'Skipped'}: ${result.skippedCount}`,
2558
- );
2559
- } catch (err) {
2560
- console.error(`⚠️ Error processing folder ${folder}:`, err.message);
2561
- writeLog(`⚠️ Error processing folder ${folder}: ${err.message}`);
2562
- globalFailure++;
2563
- }
2564
- }
2565
-
2566
- console.log(`\n${'='.repeat(60)}`);
2567
- if (options.statsOnly) {
2568
- console.log(`📊 STATS COLLECTION COMPLETED`);
2569
- console.log(`${'='.repeat(60)}`);
2570
- console.log(` 📊 Total stats recorded: ${globalSuccess}`);
2571
- } else {
2572
- console.log(`🎯 ${apiMode ? 'ARELA API' : 'SUPABASE'} UPLOAD COMPLETED`);
2573
- console.log(`${'='.repeat(60)}`);
2574
- console.log(` ✅ Total uploaded: ${globalSuccess}`);
2575
- if (apiMode) {
2576
- console.log(` 🔍 Total detected: ${globalDetected}`);
2577
- console.log(` 📁 Total organized: ${globalOrganized}`);
2578
- }
2579
- }
2580
- if (options.statsOnly) {
2581
- console.log(` ⏭️ Total duplicates: ${globalSkipped}`);
2582
- } else {
2583
- console.log(` ⏭️ Total skipped: ${globalSkipped}`);
2584
- }
2585
- console.log(` ❌ Total errors: ${globalFailure}`);
2586
- console.log(` 📜 Log file: ${logFilePath}`);
2587
- console.log(`${'='.repeat(60)}\n`);
2588
-
2589
- // Continue with remaining phases if running all phases
2590
- if (options.runAllPhases && options.statsOnly) {
2591
- try {
2592
- // Phase 2: PDF Detection
2593
- console.log('\n🔍 === PHASE 2: PDF Detection ===');
2594
- const detectionResult = await detectPedimentosInDatabase({
2595
- batchSize: parseInt(options.batchSize) || 10,
2596
- });
2597
- console.log(
2598
- `✅ Phase 2 Complete: ${detectionResult.detectedCount} detected, ${detectionResult.errorCount} errors`,
2599
- );
2600
-
2601
- // Phase 3: Propagate arela_path
2602
- console.log('\n📁 === PHASE 3: Propagate Arela Paths ===');
2603
- const propagateResult = await propagateArelaPath({
2604
- showProgress: options.showStats || true,
2605
- });
2606
- console.log(
2607
- `✅ Phase 3 Complete: ${propagateResult.updatedCount || 0} paths propagated`,
2608
- );
2609
-
2610
- // Phase 4: Upload by RFC
2611
- if (uploadRfcs && uploadRfcs.length > 0) {
2612
- console.log('\n🚀 === PHASE 4: Upload by RFC ===');
2613
-
2614
- // Initialize API mode for uploads
2615
- await checkCredentials(false);
2616
-
2617
- const uploadResult = await uploadFilesByRfc({
2618
- showProgress: options.showStats || true,
2619
- batchSize: parseInt(options.batchSize) || 10,
2620
- folderStructure: options.folderStructure,
2621
- });
2622
- console.log(`✅ Phase 4 Complete: Upload finished`);
2623
- } else {
2624
- console.log('\n⚠️ === PHASE 4: Upload by RFC ===');
2625
- console.log(
2626
- '⚠️ UPLOAD_RFCS environment variable not configured, skipping Phase 4',
2627
- );
2628
- }
2629
-
2630
- console.log('\n🎉 All 4 phases completed successfully!');
2631
- } catch (error) {
2632
- console.error(`❌ Error in multi-phase execution:`, error.message);
2633
- process.exit(1);
2634
- }
2635
- }
2636
-
2637
- if (
2638
- options.showStats &&
2639
- (sanitizationCache.size > 0 || pathDetectionCache.size > 0)
2640
- ) {
2641
- console.log(`📊 Performance Statistics:`);
2642
- if (sanitizationCache.size > 0) {
2643
- console.log(
2644
- ` 🗂️ Sanitization cache entries: ${sanitizationCache.size}`,
2645
- );
2646
- }
2647
- if (pathDetectionCache.size > 0) {
2648
- console.log(
2649
- ` 📁 Path detection cache entries: ${pathDetectionCache.size}`,
2650
- );
2651
- }
2652
- }
2653
-
2654
- // OPTIMIZED: Ensure log buffer is flushed before exit
2655
- flushLogBuffer();
2656
- });
2657
-
2658
- program.parse();