@arela/uploader 0.1.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/index.js CHANGED
@@ -3,32 +3,56 @@ import { createClient } from '@supabase/supabase-js';
3
3
  import cliProgress from 'cli-progress';
4
4
  import { Command } from 'commander';
5
5
  import { config } from 'dotenv';
6
+ import FormData from 'form-data';
6
7
  import fs from 'fs';
7
8
  import { globby } from 'globby';
8
9
  import mime from 'mime-types';
9
- import { createRequire } from 'module';
10
+ import fetch from 'node-fetch';
10
11
  import path from 'path';
11
-
12
- const require = createRequire(import.meta.url);
13
- const { version } = require('../package.json');
12
+ import { FileDetectionService } from './file-detection.js';
14
13
 
15
14
  config();
16
15
 
17
16
  const program = new Command();
18
17
 
18
+ // Read package.json version at startup
19
+ let packageVersion = '1.0.0'; // fallback
20
+ try {
21
+ const __filename = new URL(import.meta.url).pathname;
22
+ const __dirname = path.dirname(__filename);
23
+ const packageJsonPath = path.resolve(__dirname, '../package.json');
24
+ const packageJson = JSON.parse(fs.readFileSync(packageJsonPath, 'utf-8'));
25
+ packageVersion = packageJson.version || '1.0.0';
26
+ } catch (error) {
27
+ console.warn('⚠️ Could not read package.json version, using fallback');
28
+ }
29
+
30
+ // Configuración de Supabase (original)
19
31
  const supabaseUrl = process.env.SUPABASE_URL;
20
32
  const supabaseKey = process.env.SUPABASE_KEY;
21
33
  const bucket = process.env.SUPABASE_BUCKET;
34
+
35
+ // Configuración de API (nueva)
36
+ const API_BASE_URL = process.env.ARELA_API_URL;
37
+ const API_TOKEN = process.env.ARELA_API_TOKEN;
38
+
39
+ // Configuración del uploader mejorado
22
40
  const basePath = process.env.UPLOAD_BASE_PATH;
23
41
  const sources = process.env.UPLOAD_SOURCES?.split('|')
24
42
  .map((s) => s.trim())
25
43
  .filter(Boolean);
26
44
 
27
- const supabase = createClient(supabaseUrl, supabaseKey);
45
+ // Configuración de RFCs para upload
46
+ console.log('🔧 Configured RFCs for upload:', process.env.UPLOAD_RFCS);
47
+ const uploadRfcs = process.env.UPLOAD_RFCS?.split('|')
48
+ .map((s) => s.trim())
49
+ .filter(Boolean);
50
+
51
+ let supabase;
52
+ let apiMode = false;
28
53
 
29
- // Pre-compiled regex patterns for better performance
54
+ // Pre-compiled regex patterns for better performance (from original complex uploader)
30
55
  const SANITIZATION_PATTERNS = [
31
- // Character replacements (grouped for efficiency)
32
56
  [/[áàâäãåāăą]/gi, 'a'],
33
57
  [/[éèêëēĕėę]/gi, 'e'],
34
58
  [/[íìîïīĭį]/gi, 'i'],
@@ -37,579 +61,835 @@ const SANITIZATION_PATTERNS = [
37
61
  [/[ñň]/gi, 'n'],
38
62
  [/[ç]/gi, 'c'],
39
63
  [/[ý]/gi, 'y'],
40
- // Korean characters (compiled once)
41
64
  [/[멕]/g, 'meok'],
42
65
  [/[시]/g, 'si'],
43
66
  [/[코]/g, 'ko'],
44
67
  [/[용]/g, 'yong'],
45
68
  [/[가-힣]/g, 'kr'],
46
- // Unicode diacritics (after normalize)
47
69
  [/[\u0300-\u036f]/g, ''],
48
- // Problematic symbols
49
70
  [/[\\?%*:|"<>[\]~`^]/g, '-'],
50
71
  [/[{}]/g, '-'],
51
72
  [/[&]/g, 'and'],
52
- [/[()]/g, ''], // Remove parentheses
53
- // Cleanup patterns
54
- [/\s+/g, '-'], // Replace spaces with dashes
55
- [/-+/g, '-'], // Replace multiple dashes with single dash
56
- [/^-+|-+$/g, ''], // Remove leading/trailing dashes
57
- [/^\.+/, ''], // Remove leading dots
58
- [/[^\w.-]/g, ''], // Remove any remaining non-alphanumeric chars
73
+ [/[()]/g, ''],
74
+ [/\s+/g, '-'],
75
+ [/-+/g, '-'],
76
+ [/^-+|-+$/g, ''],
77
+ [/^\.+/, ''],
78
+ [/[^\w.-]/g, ''],
59
79
  ];
60
80
 
61
- // Cache for sanitized filenames to avoid repeated processing
62
81
  const sanitizationCache = new Map();
63
82
 
64
- // Enhanced sanitization function with caching and pre-compiled regex
65
83
  const sanitizeFileName = (fileName) => {
66
- // Check cache first
67
84
  if (sanitizationCache.has(fileName)) {
68
85
  return sanitizationCache.get(fileName);
69
86
  }
70
87
 
71
- // Get file extension
72
88
  const ext = path.extname(fileName);
73
89
  const nameWithoutExt = path.basename(fileName, ext);
74
90
 
75
- // Fast path for already clean filenames
76
91
  if (/^[a-zA-Z0-9._-]+$/.test(nameWithoutExt)) {
77
92
  const result = fileName;
78
93
  sanitizationCache.set(fileName, result);
79
94
  return result;
80
95
  }
81
96
 
82
- // Normalize unicode first (more efficient to do once)
83
97
  let sanitized = nameWithoutExt.normalize('NFD');
84
98
 
85
- // Apply all sanitization patterns
86
99
  for (const [pattern, replacement] of SANITIZATION_PATTERNS) {
87
100
  sanitized = sanitized.replace(pattern, replacement);
88
101
  }
89
102
 
90
- // Ensure the filename is not empty
103
+ // Additional sanitization for problematic characters
104
+ sanitized = sanitized
105
+ .replace(/~/g, '-') // Replace tildes
106
+ .replace(/\s+/g, '-') // Replace spaces with dashes
107
+ .replace(/\.+/g, '-') // Replace multiple dots with dashes
108
+ .replace(/-+/g, '-') // Collapse multiple dashes
109
+ .replace(/^-+|-+$/g, ''); // Remove leading/trailing dashes
110
+
91
111
  if (!sanitized) {
92
112
  sanitized = 'unnamed_file';
93
113
  }
94
114
 
95
115
  const result = sanitized + ext;
96
-
97
- // Cache the result for future use
98
116
  sanitizationCache.set(fileName, result);
99
-
100
117
  return result;
101
118
  };
102
119
 
103
- // Pre-compiled regex patterns for path sanitization
104
- const PATH_SANITIZATION_PATTERNS = [
105
- [/[\\?%*:|"<>[\]~]/g, '-'],
106
- [/ +/g, ' '],
107
- [/^\.+/, ''],
108
- [/\/+/g, '/'],
109
- ];
120
+ const checkCredentials = async (forceSupabase = false) => {
121
+ // Force Supabase mode if explicitly requested
122
+ if (forceSupabase) {
123
+ console.log('🔧 Force Supabase mode enabled - skipping API');
124
+ apiMode = false;
125
+ } else if (API_BASE_URL && API_TOKEN) {
126
+ console.log(
127
+ '🌐 API mode enabled - files will be uploaded to Arela API with automatic processing',
128
+ );
129
+ apiMode = true;
110
130
 
111
- // Cache for sanitized paths
112
- const pathSanitizationCache = new Map();
113
-
114
- // Batch logging system for performance
115
- class LogBatcher {
116
- constructor(batchSize = 50, flushInterval = 5000) {
117
- this.batch = [];
118
- this.batchSize = batchSize;
119
- this.flushInterval = flushInterval;
120
- this.lastFlush = Date.now();
121
- this.flushTimer = null;
122
- }
123
-
124
- add(logEntry) {
125
- this.batch.push({
126
- filename: path.basename(logEntry.file),
127
- path: logEntry.uploadPath,
128
- status: logEntry.status,
129
- message: logEntry.message,
130
- });
131
+ try {
132
+ const response = await fetch(`${API_BASE_URL}/api/health`, {
133
+ headers: {
134
+ 'x-api-key': API_TOKEN,
135
+ },
136
+ });
131
137
 
132
- // Auto-flush if batch is full or enough time has passed
133
- if (
134
- this.batch.length >= this.batchSize ||
135
- Date.now() - this.lastFlush > this.flushInterval
136
- ) {
137
- this.flush();
138
+ if (!response.ok) {
139
+ console.warn(
140
+ '⚠️ API connection failed, falling back to direct Supabase upload',
141
+ );
142
+ apiMode = false;
143
+ } else {
144
+ console.log('✅ Connected to Arela API');
145
+ return;
146
+ }
147
+ } catch (err) {
148
+ console.warn(
149
+ '⚠️ API connection failed, falling back to direct Supabase upload',
150
+ );
151
+ apiMode = false;
138
152
  }
139
153
  }
140
154
 
141
- async flush() {
142
- if (this.batch.length === 0) return;
143
-
144
- const logsToSend = [...this.batch];
145
- this.batch = [];
146
- this.lastFlush = Date.now();
147
-
148
- // Clear any pending timer
149
- if (this.flushTimer) {
150
- clearTimeout(this.flushTimer);
151
- this.flushTimer = null;
155
+ // Initialize Supabase client if not in API mode or if forced
156
+ if (!apiMode || forceSupabase) {
157
+ if (!supabaseUrl || !supabaseKey || !bucket) {
158
+ console.error(
159
+ '⚠️ Missing credentials. Please set either:\n' +
160
+ ' - ARELA_API_URL and ARELA_API_TOKEN for API mode, or\n' +
161
+ ' - SUPABASE_URL, SUPABASE_KEY, and SUPABASE_BUCKET for direct mode',
162
+ );
163
+ process.exit(1);
152
164
  }
153
165
 
166
+ supabase = createClient(supabaseUrl, supabaseKey);
167
+
154
168
  try {
155
- const { error } = await supabase.from('upload_logs').insert(logsToSend);
169
+ const { error } = await supabase.storage.from(bucket).list('');
156
170
  if (error) {
157
- console.error(
158
- `⚠️ Error saving batch of ${logsToSend.length} logs to Supabase: ${error.message}`,
159
- );
160
- // Re-add failed logs to batch for retry (optional)
161
- this.batch.unshift(...logsToSend);
162
- } else {
163
- // Only show verbose output if requested
164
- if (process.env.LOG_BATCH_VERBOSE === 'true') {
165
- console.log(`📊 Flushed ${logsToSend.length} logs to Supabase`);
166
- }
171
+ console.error('⚠️ Error connecting to Supabase:', error.message);
172
+ process.exit(1);
167
173
  }
174
+ console.log('✅ Connected to Supabase (direct mode)');
168
175
  } catch (err) {
169
- console.error(`⚠️ Error during batch flush: ${err.message}`);
170
- // Re-add failed logs to batch for retry (optional)
171
- this.batch.unshift(...logsToSend);
172
- }
173
- }
174
-
175
- // Schedule auto-flush if not already scheduled
176
- scheduleFlush() {
177
- if (!this.flushTimer && this.batch.length > 0) {
178
- this.flushTimer = setTimeout(() => {
179
- this.flush();
180
- }, this.flushInterval);
176
+ console.error('⚠️ Error:', err.message);
177
+ process.exit(1);
181
178
  }
182
179
  }
180
+ };
183
181
 
184
- // Force flush all pending logs (called at end of process)
185
- async forceFlush() {
186
- if (this.flushTimer) {
187
- clearTimeout(this.flushTimer);
188
- this.flushTimer = null;
189
- }
190
- await this.flush();
191
- }
192
- }
182
+ const logFilePath = path.resolve(process.cwd(), 'arela-upload.log');
193
183
 
194
- // Global log batcher instance
195
- const logBatcher = new LogBatcher();
184
+ /**
185
+ * OPTIMIZED: Log buffer to reduce I/O operations
186
+ */
187
+ let logBuffer = [];
188
+ const LOG_BUFFER_SIZE = 100; // Flush every 100 log entries
189
+ let lastFlushTime = Date.now();
190
+ const LOG_FLUSH_INTERVAL = 5000; // Flush every 5 seconds
196
191
 
197
- // Function to manage cache size (prevent memory issues in long sessions)
198
- const manageCaches = () => {
199
- const MAX_CACHE_SIZE = 1000;
192
+ const flushLogBuffer = () => {
193
+ if (logBuffer.length === 0) return;
200
194
 
201
- if (sanitizationCache.size > MAX_CACHE_SIZE) {
202
- // Keep only the most recent 500 entries
203
- const entries = Array.from(sanitizationCache.entries());
204
- sanitizationCache.clear();
205
- entries.slice(-500).forEach(([key, value]) => {
206
- sanitizationCache.set(key, value);
207
- });
195
+ try {
196
+ const logContent = logBuffer.join('\n') + '\n';
197
+ fs.appendFileSync(logFilePath, logContent);
198
+ logBuffer = [];
199
+ lastFlushTime = Date.now();
200
+ } catch (error) {
201
+ console.error(`❌ Error writing to log file: ${error.code} | ${error.message} | path: ${logFilePath}`);
208
202
  }
203
+ };
209
204
 
210
- if (pathSanitizationCache.size > MAX_CACHE_SIZE) {
211
- const entries = Array.from(pathSanitizationCache.entries());
212
- pathSanitizationCache.clear();
213
- entries.slice(-500).forEach(([key, value]) => {
214
- pathSanitizationCache.set(key, value);
215
- });
205
+ const writeLog = (message) => {
206
+ try {
207
+ const timestamp = new Date().toISOString();
208
+ logBuffer.push(`[${timestamp}] ${message}`);
209
+
210
+ // Flush if buffer is full or enough time has passed
211
+ const now = Date.now();
212
+ if (logBuffer.length >= LOG_BUFFER_SIZE || (now - lastFlushTime) >= LOG_FLUSH_INTERVAL) {
213
+ flushLogBuffer();
214
+ }
215
+ } catch (error) {
216
+ console.error(`❌ Error buffering log message: ${error.message}`);
216
217
  }
217
218
  };
218
219
 
219
- const sanitizePath = (inputPath) => {
220
- // Check cache first
221
- if (pathSanitizationCache.has(inputPath)) {
222
- return pathSanitizationCache.get(inputPath);
220
+ // Ensure logs are flushed on process exit
221
+ process.on('exit', flushLogBuffer);
222
+ process.on('SIGINT', () => {
223
+ flushLogBuffer();
224
+ process.exit(0);
225
+ });
226
+ process.on('SIGTERM', () => {
227
+ flushLogBuffer();
228
+ process.exit(0);
229
+ });
230
+
231
+ /**
232
+ * OPTIMIZED: Conditional logging to reduce console overhead
233
+ */
234
+ const VERBOSE_LOGGING = process.env.VERBOSE_LOGGING === 'true';
235
+ const BATCH_DELAY = parseInt(process.env.BATCH_DELAY) || 100; // Configurable delay between batches
236
+ const PROGRESS_UPDATE_INTERVAL = parseInt(process.env.PROGRESS_UPDATE_INTERVAL) || 10; // Update progress every N items
237
+
238
+ const logVerbose = (message) => {
239
+ if (VERBOSE_LOGGING) {
240
+ console.log(message);
223
241
  }
242
+ };
243
+ const batchReadFileStats = (filePaths) => {
244
+ const results = [];
224
245
 
225
- // Fast path for already clean paths
226
- if (!/[\\?%*:|"<>[\]~]|^ +|^\.+|\/\/+/.test(inputPath)) {
227
- pathSanitizationCache.set(inputPath, inputPath);
228
- return inputPath;
246
+ for (const filePath of filePaths) {
247
+ try {
248
+ const stats = fs.statSync(filePath);
249
+ results.push({ path: filePath, stats, error: null });
250
+ } catch (error) {
251
+ results.push({ path: filePath, stats: null, error: error.message });
252
+ }
229
253
  }
230
254
 
231
- let sanitized = inputPath;
255
+ return results;
256
+ };
257
+
258
+ /**
259
+ * OPTIMIZED: Cache for year/pedimento detection results to avoid redundant parsing
260
+ */
261
+ const pathDetectionCache = new Map();
232
262
 
233
- // Apply path sanitization patterns
234
- for (const [pattern, replacement] of PATH_SANITIZATION_PATTERNS) {
235
- sanitized = sanitized.replace(pattern, replacement);
263
+ /**
264
+ * OPTIMIZED: Clear the path detection cache (useful for testing or long-running processes)
265
+ */
266
+ const clearPathDetectionCache = () => {
267
+ pathDetectionCache.clear();
268
+ };
269
+
270
+ /**
271
+ * OPTIMIZED: Get detection results with caching
272
+ */
273
+ const getCachedPathDetection = (filePath, basePath) => {
274
+ const cacheKey = `${filePath}|${basePath}`;
275
+
276
+ if (pathDetectionCache.has(cacheKey)) {
277
+ return pathDetectionCache.get(cacheKey);
236
278
  }
237
279
 
238
- // Cache the result
239
- pathSanitizationCache.set(inputPath, sanitized);
280
+ const detection = extractYearAndPedimentoFromPath(filePath, basePath);
281
+ pathDetectionCache.set(cacheKey, detection);
240
282
 
241
- return sanitized;
283
+ return detection;
242
284
  };
243
285
 
244
- const sendLogToSupabase = async ({ file, uploadPath, status, message }) => {
245
- // Add to batch instead of sending immediately
246
- logBatcher.add({ file, uploadPath, status, message });
286
+ /**
287
+ * Extracts year and pedimento number from file path
288
+ * Supports patterns like:
289
+ * - /path/to/2024/4023260/file.pdf
290
+ * - /path/to/pedimentos/2024/4023260/file.pdf
291
+ * - /path/to/docs/año2024/ped4023260/file.pdf
292
+ */
293
+ const extractYearAndPedimentoFromPath = (filePath, basePath) => {
294
+ try {
295
+ const relativePath = path.relative(basePath, filePath);
296
+ const pathParts = relativePath.split(path.sep);
297
+
298
+ let year = null;
299
+ let pedimento = null;
300
+
301
+ // Pattern 1: Direct year/pedimento structure (2024/4023260)
302
+ for (let i = 0; i < pathParts.length - 1; i++) {
303
+ const part = pathParts[i];
304
+ const nextPart = pathParts[i + 1];
305
+
306
+ // Check if current part looks like a year (2020-2030)
307
+ const yearMatch = part.match(/^(202[0-9])$/);
308
+ if (yearMatch && nextPart) {
309
+ year = yearMatch[1];
310
+
311
+ // Check if next part looks like a pedimento (4-8 digits)
312
+ const pedimentoMatch = nextPart.match(/^(\d{4,8})$/);
313
+ if (pedimentoMatch) {
314
+ pedimento = pedimentoMatch[1];
315
+ break;
316
+ }
317
+ }
318
+ }
319
+
320
+ // Pattern 2: Named patterns (año2024, ped4023260)
321
+ if (!year || !pedimento) {
322
+ for (const part of pathParts) {
323
+ if (!year) {
324
+ const namedYearMatch = part.match(/(?:año|year|anio)(\d{4})/i);
325
+ if (namedYearMatch) {
326
+ year = namedYearMatch[1];
327
+ }
328
+ }
247
329
 
248
- // Schedule auto-flush if needed
249
- logBatcher.scheduleFlush();
250
- };
330
+ if (!pedimento) {
331
+ const namedPedimentoMatch = part.match(
332
+ /(?:ped|pedimento|pedi)(\d{4,8})/i,
333
+ );
334
+ if (namedPedimentoMatch) {
335
+ pedimento = namedPedimentoMatch[1];
336
+ }
337
+ }
338
+ }
339
+ }
251
340
 
252
- // Enhanced version for immediate sending (used for critical errors)
253
- const sendLogToSupabaseImmediate = async ({
254
- file,
255
- uploadPath,
256
- status,
257
- message,
258
- }) => {
259
- const { error } = await supabase.from('upload_logs').insert([
260
- {
261
- filename: path.basename(file),
262
- path: uploadPath,
263
- status,
264
- message,
265
- },
266
- ]);
341
+ // Pattern 3: Loose year detection in any part
342
+ if (!year) {
343
+ for (const part of pathParts) {
344
+ const yearMatch = part.match(/(202[0-9])/);
345
+ if (yearMatch) {
346
+ year = yearMatch[1];
347
+ break;
348
+ }
349
+ }
350
+ }
267
351
 
268
- if (error) {
269
- console.error(
270
- `⚠️ Error saving immediate log to Supabase: ${error.message}`,
271
- );
352
+ // Pattern 4: Loose pedimento detection (4-8 consecutive digits)
353
+ if (!pedimento) {
354
+ for (const part of pathParts) {
355
+ const pedimentoMatch = part.match(/(\d{4,8})/);
356
+ if (pedimentoMatch && pedimentoMatch[1].length >= 4) {
357
+ pedimento = pedimentoMatch[1];
358
+ break;
359
+ }
360
+ }
361
+ }
362
+
363
+ return { year, pedimento, detected: !!(year && pedimento) };
364
+ } catch (error) {
365
+ return {
366
+ year: null,
367
+ pedimento: null,
368
+ detected: false,
369
+ error: error.message,
370
+ };
272
371
  }
273
372
  };
274
373
 
275
- const checkCredentials = async () => {
276
- if (!supabaseUrl || !supabaseKey || !bucket) {
277
- console.error(
278
- '⚠️ Missing Supabase credentials. Please set SUPABASE_URL, SUPABASE_KEY, and SUPABASE_BUCKET in your environment variables.',
279
- );
280
- writeLog('⚠️ Missing Supabase credentials.');
281
- await sendLogToSupabaseImmediate({
282
- file: 'Error',
283
- uploadPath: 'Error',
284
- status: 'error',
285
- message: 'Missing Supabase credentials.',
286
- });
287
- process.exit(1);
288
- }
374
+ /**
375
+ * OPTIMIZED: Get processed paths with caching and buffered log reading
376
+ */
377
+ let processedPathsCache = null;
378
+ let lastLogModTime = 0;
289
379
 
380
+ const getProcessedPaths = () => {
290
381
  try {
291
- const { error } = await supabase.storage.from(bucket).list('');
292
- if (error) {
293
- console.error('⚠️ Error connecting to Supabase:', error.message);
294
- writeLog(`⚠️ Error connecting to Supabase: ${error.message}`);
295
- await sendLogToSupabaseImmediate({
296
- file: 'Error',
297
- uploadPath: 'Error',
298
- status: 'error',
299
- message: error.message,
300
- });
301
- process.exit(1);
382
+ // Check if log file exists
383
+ if (!fs.existsSync(logFilePath)) {
384
+ return new Set();
385
+ }
386
+
387
+ // Check if cache is still valid
388
+ const logStats = fs.statSync(logFilePath);
389
+ if (processedPathsCache && logStats.mtime.getTime() === lastLogModTime) {
390
+ return processedPathsCache;
302
391
  }
303
- } catch (err) {
304
- console.error('⚠️ Error:', err.message);
305
- writeLog(`⚠️ Error: ${err.message}`);
306
- await sendLogToSupabaseImmediate({
307
- file: 'Error',
308
- uploadPath: 'Error',
309
- status: 'error',
310
- message: err.message,
392
+
393
+ // Read and parse log file
394
+ const processed = new Set();
395
+ const content = fs.readFileSync(logFilePath, 'utf-8');
396
+
397
+ // Use more efficient regex with global flag
398
+ const regex = /(SUCCESS|SKIPPED): .*? -> (.+)/g;
399
+ let match;
400
+
401
+ while ((match = regex.exec(content)) !== null) {
402
+ const path = match[2];
403
+ if (path) {
404
+ processed.add(path.trim());
405
+ }
406
+ }
407
+
408
+ // Update cache
409
+ processedPathsCache = processed;
410
+ lastLogModTime = logStats.mtime.getTime();
411
+
412
+ return processed;
413
+ } catch (error) {
414
+ console.error(`⚠️ Error reading processed paths: ${error.message}`);
415
+ return new Set();
416
+ }
417
+ };
418
+
419
+ /**
420
+ * Upload files to Arela API with automatic detection and organization
421
+ */
422
+ const uploadToApi = async (files, options) => {
423
+ const formData = new FormData();
424
+
425
+ files.forEach((file) => {
426
+ const fileBuffer = fs.readFileSync(file.path);
427
+ formData.append('files', fileBuffer, {
428
+ filename: file.name,
429
+ contentType: file.contentType,
311
430
  });
312
- process.exit(1);
431
+ });
432
+
433
+ if (bucket) formData.append('bucket', bucket);
434
+ if (options.prefix) formData.append('prefix', options.prefix);
435
+
436
+ // Nueva funcionalidad: estructura de carpetas personalizada
437
+ let combinedStructure = null;
438
+ let cachedDetection = null; // Cache detection result to avoid redundant calls
439
+
440
+ if (
441
+ options.folderStructure &&
442
+ options.autoDetectStructure &&
443
+ files.length > 0
444
+ ) {
445
+ // Combine custom folder structure with auto-detection
446
+ const firstFile = files[0];
447
+ cachedDetection = getCachedPathDetection(firstFile.path, process.cwd());
448
+
449
+ if (cachedDetection.detected) {
450
+ const autoStructure = `${cachedDetection.year}/${cachedDetection.pedimento}`;
451
+ combinedStructure = `${options.folderStructure}/${autoStructure}`;
452
+ formData.append('folderStructure', combinedStructure);
453
+ console.log(
454
+ `📁 Combined folder structure: ${options.folderStructure} + ${autoStructure} = ${combinedStructure}`,
455
+ );
456
+ } else {
457
+ // Fallback to just custom structure if auto-detection fails
458
+ formData.append('folderStructure', options.folderStructure);
459
+ console.log(
460
+ `📁 Using custom folder structure (auto-detection failed): ${options.folderStructure}`,
461
+ );
462
+ }
463
+ } else if (options.folderStructure) {
464
+ formData.append('folderStructure', options.folderStructure);
465
+ console.log(`📁 Using custom folder structure: ${options.folderStructure}`);
466
+ } else if (options.autoDetectStructure && files.length > 0) {
467
+ // Try to auto-detect from the first file if no explicit structure is provided
468
+ const firstFile = files[0];
469
+ cachedDetection = getCachedPathDetection(firstFile.path, process.cwd());
470
+
471
+ if (cachedDetection.detected) {
472
+ const autoStructure = `${cachedDetection.year}/${cachedDetection.pedimento}`;
473
+ formData.append('folderStructure', autoStructure);
474
+ }
475
+ }
476
+
477
+ // Si se especifica clientPath para user_metadata
478
+ if (options.clientPath) {
479
+ formData.append('clientPath', options.clientPath);
480
+ }
481
+
482
+ formData.append('autoDetect', String(options.autoDetect ?? true));
483
+ formData.append('autoOrganize', String(options.autoOrganize ?? true));
484
+ formData.append('batchSize', String(options.batchSize || 10));
485
+ formData.append('clientVersion', packageVersion);
486
+
487
+ const response = await fetch(
488
+ `${API_BASE_URL}/api/storage/batch-upload-and-process`,
489
+ {
490
+ method: 'POST',
491
+ headers: {
492
+ 'x-api-key': API_TOKEN,
493
+ },
494
+ body: formData,
495
+ },
496
+ );
497
+
498
+ if (!response.ok) {
499
+ const errorText = await response.text();
500
+ throw new Error(
501
+ `API request failed: ${response.status} ${response.statusText} - ${errorText}`,
502
+ );
313
503
  }
504
+
505
+ return response.json();
314
506
  };
315
507
 
316
- await checkCredentials();
508
+ /**
509
+ * Upload file directly to Supabase (fallback method)
510
+ */
511
+ const uploadToSupabase = async (file, uploadPath) => {
512
+ const content = fs.readFileSync(file);
513
+ const contentType = mime.lookup(file) || 'application/octet-stream';
317
514
 
318
- const fileExistsInBucket = async (pathInBucket) => {
319
- const dir = path.dirname(pathInBucket);
320
- const filename = path.basename(pathInBucket);
321
515
  const { data, error } = await supabase.storage
322
516
  .from(bucket)
323
- .list(dir === '.' ? '' : dir, { limit: 1000 });
324
- if (error) {
325
- console.error(`⚠️ Could not verify duplicate: ${error.message}`);
326
- writeLog(`⚠️ Could not verify duplicate: ${error.message}`);
327
- await sendLogToSupabaseImmediate({
328
- file: 'Error',
329
- uploadPath: 'Error',
330
- status: 'error',
331
- message: error.message,
517
+ .upload(uploadPath.replace(/\\/g, '/'), content, {
518
+ upsert: true,
519
+ contentType,
332
520
  });
333
- return false;
521
+
522
+ if (error) {
523
+ throw new Error(error.message);
334
524
  }
335
- return data.some((file) => file.name === filename);
525
+
526
+ return data;
336
527
  };
337
528
 
338
- const logFilePath = path.resolve(process.cwd(), 'upload.log');
339
- const writeLog = (message) => {
340
- try {
341
- const timestamp = new Date().toISOString();
342
- fs.appendFileSync(logFilePath, `[${timestamp}] ${message}\n`);
343
- } catch (error) {
344
- console.error(`❌ Error writing to log file: ${error.message}`);
529
+ /**
530
+ * Insert file stats into uploader table with document detection
531
+ */
532
+ const insertStatsToUploaderTable = async (files, options) => {
533
+ if (!supabase) {
534
+ throw new Error('Supabase client not initialized. Stats mode requires Supabase connection.');
345
535
  }
346
- };
347
536
 
348
- // Modified to fetch from Supabase first, then fallback to local log
349
- const getProcessedPaths = async () => {
350
- const processed = new Set();
537
+ const detectionService = new FileDetectionService();
538
+ const records = [];
351
539
 
352
- // Try to fetch from Supabase first
353
- try {
354
- const { data, error } = await supabase
355
- .from('upload_logs')
356
- .select('path')
357
- .in('status', ['success', 'skipped']);
540
+ for (const file of files) {
541
+ // OPTIMIZED: Use pre-computed stats if available, otherwise call fs.statSync
542
+ const stats = file.stats || fs.statSync(file.path);
543
+ const originalPath = options.clientPath || file.path;
358
544
 
359
- if (error) {
360
- console.warn(
361
- `⚠️ Could not fetch processed paths from Supabase: ${error.message}. Falling back to local log.`,
362
- );
363
- // Fallback to local log if Supabase fetch fails
364
- const lines = fs.existsSync(logFilePath)
365
- ? fs.readFileSync(logFilePath, 'utf-8').split('\\n')
366
- : [];
367
- for (const line of lines) {
368
- const match = line.match(/(SUCCESS|SKIPPED): .*? -> (.+)/);
369
- if (match) {
370
- const [, , path] = match;
371
- processed.add(path.trim());
372
- }
373
- }
374
- } else if (data) {
375
- data.forEach((log) => {
376
- if (log.path) {
377
- processed.add(log.path.trim());
378
- }
379
- });
380
- // Also read from local log to ensure any paths logged before this change or during a Supabase outage are included
381
- const lines = fs.existsSync(logFilePath)
382
- ? fs.readFileSync(logFilePath, 'utf-8').split('\\n')
383
- : [];
384
- for (const line of lines) {
385
- const match = line.match(/(SUCCESS|SKIPPED): .*? -> (.+)/);
386
- if (match) {
387
- const [, , pathValue] = match;
388
- if (pathValue) {
389
- processed.add(pathValue.trim());
545
+ // Check if record already exists
546
+ const { data: existingRecords, error: checkError } = await supabase
547
+ .from('uploader')
548
+ .select('id, original_path')
549
+ .eq('original_path', originalPath)
550
+ .limit(1);
551
+
552
+ if (checkError) {
553
+ console.error(`❌ Error checking for existing record: ${checkError.message}`);
554
+ continue;
555
+ }
556
+
557
+ if (existingRecords && existingRecords.length > 0) {
558
+ console.log(`⏭️ Skipping duplicate: ${path.basename(file.path)}`);
559
+ continue;
560
+ }
561
+
562
+ // Initialize record with basic file stats
563
+ const record = {
564
+ document_type: null,
565
+ size: stats.size,
566
+ num_pedimento: null,
567
+ filename: file.originalName || path.basename(file.path),
568
+ original_path: originalPath,
569
+ arela_path: null,
570
+ status: 'stats',
571
+ rfc: null,
572
+ message: null
573
+ };
574
+
575
+ // Try to detect document type for supported files
576
+ if (detectionService.isSupportedFileType(file.path)) {
577
+ try {
578
+ const detection = await detectionService.detectFile(file.path);
579
+
580
+ if (detection.detectedType) {
581
+ record.document_type = detection.detectedType;
582
+ record.num_pedimento = detection.detectedPedimento;
583
+ record.status = 'detected';
584
+
585
+ // Set arela_path for pedimento_simplificado documents
586
+ if (detection.arelaPath) {
587
+ record.arela_path = detection.arelaPath;
588
+ }
589
+
590
+ // Extract RFC from fields if available
591
+ const rfcField = detection.fields.find(f => f.name === 'rfc' && f.found);
592
+ if (rfcField) {
593
+ record.rfc = rfcField.value;
594
+ }
595
+ } else {
596
+ record.status = 'not-detected';
597
+ if (detection.error) {
598
+ record.message = detection.error;
390
599
  }
391
600
  }
601
+ } catch (error) {
602
+ console.error(`❌ Error detecting ${record.filename}:`, error.message);
603
+ record.status = 'detection-error';
604
+ record.message = error.message;
392
605
  }
606
+ } else {
607
+ record.status = 'unsupported';
608
+ record.message = 'File type not supported for detection';
393
609
  }
394
- } catch (e) {
395
- console.warn(
396
- `⚠️ Error fetching from Supabase or reading local log: ${e.message}. Proceeding with an empty set of processed paths initially.`,
397
- );
398
- // Ensure local log is still attempted if Supabase connection itself fails
399
- const lines = fs.existsSync(logFilePath)
400
- ? fs.readFileSync(logFilePath, 'utf-8').split('\\n')
401
- : [];
402
- for (const line of lines) {
403
- const match = line.match(/(SUCCESS|SKIPPED): .*? -> (.+)/);
404
- if (match) {
405
- const [, , path] = match;
406
- processed.add(path.trim());
407
- }
408
- }
610
+
611
+ records.push(record);
409
612
  }
410
- return processed;
613
+
614
+ if (records.length === 0) {
615
+ console.log('📝 No new records to insert (all were duplicates or errors)');
616
+ return [];
617
+ }
618
+
619
+ console.log(`💾 Inserting ${records.length} new records into uploader table...`);
620
+
621
+ const { data, error } = await supabase
622
+ .from('uploader')
623
+ .insert(records)
624
+ .select();
625
+
626
+ if (error) {
627
+ throw new Error(`Failed to insert stats records: ${error.message}`);
628
+ }
629
+
630
+ return data;
411
631
  };
412
632
 
413
- const delay = (ms) => new Promise((resolve) => setTimeout(resolve, ms));
633
+ /**
634
+ * OPTIMIZED: Insert ONLY file stats into uploader table (Phase 1)
635
+ * No file reading, no detection - just filesystem metadata
636
+ * Returns summary statistics instead of full records for better performance
637
+ */
638
+ const insertStatsOnlyToUploaderTable = async (files, options) => {
639
+ if (!supabase) {
640
+ throw new Error('Supabase client not initialized. Stats mode requires Supabase connection.');
641
+ }
414
642
 
415
- const uploadWithRetry = async (uploadFn, maxRetries = 5, delayMs = 2000) => {
416
- let attempt = 0;
417
- let lastError;
643
+ const batchSize = 1000; // Large batch size for performance
644
+ const allRecords = [];
418
645
 
419
- while (attempt < maxRetries) {
646
+ // Prepare all file stats data first - OPTIMIZED to use pre-computed stats
647
+ console.log('📊 Collecting filesystem stats...');
648
+ for (const file of files) {
420
649
  try {
421
- const result = await uploadFn();
422
- if (!result.error) return result;
423
- lastError = result.error;
424
- attempt++;
425
-
426
- // Exponential backoff with jitter
427
- if (attempt < maxRetries) {
428
- const backoffDelay =
429
- delayMs * Math.pow(2, attempt - 1) + Math.random() * 1000;
430
- console.log(
431
- `Retry ${attempt}/${maxRetries} after ${Math.round(backoffDelay)}ms...`,
432
- );
433
- await delay(backoffDelay);
434
- }
650
+ // Use pre-computed stats if available, otherwise call fs.statSync
651
+ const stats = file.stats || fs.statSync(file.path);
652
+ const originalPath = options.clientPath || file.path;
653
+ const fileExtension = path.extname(file.path).toLowerCase().replace('.', '');
654
+
655
+ const record = {
656
+ document_type: null,
657
+ size: stats.size,
658
+ num_pedimento: null,
659
+ filename: file.originalName || path.basename(file.path),
660
+ original_path: originalPath,
661
+ arela_path: null,
662
+ status: 'fs-stats',
663
+ rfc: null,
664
+ message: null,
665
+ file_extension: fileExtension,
666
+ created_at: new Date().toISOString(),
667
+ modified_at: stats.mtime.toISOString()
668
+ };
669
+
670
+ allRecords.push(record);
435
671
  } catch (error) {
436
- lastError = error;
437
- attempt++;
672
+ console.error(`❌ Error reading stats for ${file.path}:`, error.message);
673
+ }
674
+ }
438
675
 
439
- if (attempt < maxRetries) {
440
- const backoffDelay =
441
- delayMs * Math.pow(2, attempt - 1) + Math.random() * 1000;
442
- console.log(
443
- `Retry ${attempt}/${maxRetries} after ${Math.round(backoffDelay)}ms due to exception...`,
444
- );
445
- await delay(backoffDelay);
676
+ if (allRecords.length === 0) {
677
+ console.log('📝 No file stats to insert');
678
+ return { totalInserted: 0, totalSkipped: 0, totalProcessed: 0 };
679
+ }
680
+
681
+ console.log(`💾 Bulk inserting ${allRecords.length} file stats in batches of ${batchSize}...`);
682
+
683
+ let totalInserted = 0;
684
+ let totalSkipped = 0;
685
+
686
+ // Process in batches for optimal performance
687
+ for (let i = 0; i < allRecords.length; i += batchSize) {
688
+ const batch = allRecords.slice(i, i + batchSize);
689
+
690
+ try {
691
+ // OPTIMIZED: Use upsert without select to avoid unnecessary data transfer
692
+ const { error, count } = await supabase
693
+ .from('uploader')
694
+ .upsert(batch, {
695
+ onConflict: 'original_path',
696
+ ignoreDuplicates: false,
697
+ count: 'exact'
698
+ });
699
+
700
+ if (error) {
701
+ console.error(`❌ Error inserting batch ${Math.floor(i / batchSize) + 1}:`, error.message);
702
+ continue;
446
703
  }
704
+
705
+ // For upsert operations, we can't easily distinguish between inserts and updates
706
+ // from the count alone, but we can estimate based on the assumption that most
707
+ // operations in --stats-only mode are likely new inserts
708
+ const batchProcessed = batch.length;
709
+
710
+ // Since we're using upsert with ignoreDuplicates: false, the count represents
711
+ // the actual number of rows affected (both inserts and updates)
712
+ const affected = count || batchProcessed;
713
+
714
+ // For simplicity and performance, we'll assume most are new inserts in stats-only mode
715
+ // This is reasonable since stats-only is typically run on new file sets
716
+ totalInserted += affected;
717
+
718
+ console.log(`✅ Batch ${Math.floor(i / batchSize) + 1}: ${affected} rows processed`);
719
+ } catch (error) {
720
+ console.error(`❌ Unexpected error in batch ${Math.floor(i / batchSize) + 1}:`, error.message);
447
721
  }
448
722
  }
449
723
 
724
+ // Calculate skipped as difference between total records and inserted
725
+ totalSkipped = allRecords.length - totalInserted;
726
+
727
+ console.log(`📊 Phase 1 Summary: ${totalInserted} records processed, estimated ${totalSkipped} were updates`);
728
+
450
729
  return {
451
- error: new Error(
452
- `Max retries exceeded. Last error: ${lastError?.message || 'Unknown error'}`,
453
- ),
730
+ totalInserted,
731
+ totalSkipped,
732
+ totalProcessed: allRecords.length
454
733
  };
455
734
  };
456
735
 
457
- // Function to process a single file
458
- const processFile = async (
459
- file,
460
- options,
461
- basePath,
462
- folder,
463
- sourcePath,
464
- processedPaths,
465
- ) => {
466
- let currentFile = file;
467
- let result = {
468
- success: false,
469
- skipped: false,
470
- error: null,
471
- message: '',
472
- };
736
+ /**
737
+ * PHASE 2: Process PDF files for pedimento-simplificado detection
738
+ * Only processes files with status 'fs-stats' and file_extension 'pdf'
739
+ */
740
+ const detectPedimentosInDatabase = async (options = {}) => {
741
+ if (!supabase) {
742
+ throw new Error('Supabase client not initialized.');
743
+ }
473
744
 
474
- try {
475
- // Check if we need to rename the file
476
- if (options.renameFiles) {
477
- const originalName = path.basename(file);
478
- const sanitizedName = sanitizeFileName(originalName);
479
-
480
- if (originalName !== sanitizedName) {
481
- const newFilePath = path.join(path.dirname(file), sanitizedName);
482
-
483
- if (options.dryRun) {
484
- result.message = `Would rename: ${originalName} → ${sanitizedName}`;
485
- result.skipped = true;
486
- return result;
487
- } else {
488
- try {
489
- fs.renameSync(file, newFilePath);
490
- currentFile = newFilePath;
491
- writeLog(`RENAMED: ${originalName} → ${sanitizedName}`);
492
- await sendLogToSupabase({
493
- file: originalName,
494
- uploadPath: sanitizedName,
495
- status: 'renamed',
496
- message: `Renamed from ${originalName}`,
497
- });
498
- } catch (renameError) {
499
- result.error = `Failed to rename ${originalName}: ${renameError.message}`;
500
- writeLog(`RENAME_ERROR: ${originalName} | ${renameError.message}`);
501
- return result;
502
- }
503
- }
504
- }
505
- }
745
+ console.log('🔍 Phase 2: Starting PDF detection for pedimento-simplificado documents...');
506
746
 
507
- const content = fs.readFileSync(currentFile);
508
- const relativePathRaw = path
509
- .relative(basePath, currentFile)
510
- .replace(/^[\\/]+/, '')
511
- .replace(/\\/g, '/');
512
-
513
- // Always sanitize the filename for upload path
514
- const pathParts = relativePathRaw.split('/');
515
- const originalFileName = pathParts[pathParts.length - 1];
516
- const sanitizedFileName = sanitizeFileName(originalFileName);
517
- pathParts[pathParts.length - 1] = sanitizedFileName;
518
- const sanitizedRelativePath = pathParts.join('/');
519
-
520
- const uploadPathRaw = options.prefix
521
- ? path.posix.join(options.prefix, sanitizedRelativePath)
522
- : sanitizedRelativePath;
523
- const uploadPath = sanitizePath(uploadPathRaw);
524
-
525
- if (
526
- uploadPath !== uploadPathRaw ||
527
- originalFileName !== sanitizedFileName
528
- ) {
529
- writeLog(`SANITIZED: ${relativePathRaw} → ${uploadPath}`);
530
- await sendLogToSupabase({
531
- file: currentFile,
532
- uploadPath: relativePathRaw,
533
- status: 'sanitized',
534
- message: `Sanitized to ${uploadPath} (Arela Version: ${version})`,
535
- });
747
+ // Get all PDF files that need detection (status = 'fs-stats' and extension = 'pdf')
748
+ let allPdfRecords = [];
749
+ let hasMore = true;
750
+ let offset = 0;
751
+ const queryBatchSize = 1000;
752
+
753
+ console.log('📥 Fetching PDF files from database...');
754
+
755
+ while (hasMore) {
756
+ const { data: batch, error: queryError } = await supabase
757
+ .from('uploader')
758
+ .select('id, original_path, filename, file_extension, status')
759
+ .eq('status', 'fs-stats')
760
+ .eq('file_extension', 'pdf')
761
+ .ilike('filename', '%simp%')
762
+ .range(offset, offset + queryBatchSize - 1);
763
+
764
+ if (queryError) {
765
+ throw new Error(`Failed to fetch PDF records: ${queryError.message}`);
536
766
  }
537
767
 
538
- if (processedPaths.has(uploadPath)) {
539
- result.skipped = true;
540
- result.message = `Already processed (log): ${currentFile}`;
541
- return result;
768
+ if (!batch || batch.length === 0) {
769
+ hasMore = false;
770
+ } else {
771
+ allPdfRecords.push(...batch);
772
+ offset += queryBatchSize;
773
+ console.log(`📄 Fetched ${batch.length} PDF records (total: ${allPdfRecords.length})`);
542
774
  }
775
+ }
543
776
 
544
- const contentType = mime.lookup(currentFile) || 'application/octet-stream';
777
+ if (allPdfRecords.length === 0) {
778
+ console.log('📝 No PDF files found for detection');
779
+ return { detectedCount: 0, processedCount: 0, errorCount: 0 };
780
+ }
545
781
 
546
- const exists = await fileExistsInBucket(uploadPath);
782
+ console.log(`🔍 Processing ${allPdfRecords.length} PDF files for detection...`);
547
783
 
548
- if (exists) {
549
- result.skipped = true;
550
- result.message = `Skipped (already exists): ${currentFile}`;
551
- writeLog(`SKIPPED: ${currentFile} -> ${uploadPath}`);
552
- await sendLogToSupabase({
553
- file: currentFile,
554
- uploadPath,
555
- status: 'skipped',
556
- message: 'Already exists in bucket',
557
- });
558
- return result;
559
- }
560
-
561
- const { error } = await uploadWithRetry(() =>
562
- supabase.storage.from(bucket).upload(uploadPath, content, {
563
- upsert: true,
564
- contentType,
565
- metadata: {
566
- originalName: path.basename(currentFile),
567
- sanitizedName: path.basename(uploadPath),
568
- clientPath: path.posix.join(
569
- basePath,
570
- folder,
571
- path.relative(sourcePath, currentFile).replace(/\\/g, '/'),
572
- ),
573
- arelaVersion: version,
574
- },
575
- }),
576
- );
784
+ const detectionService = new FileDetectionService();
785
+ const batchSize = parseInt(options.batchSize) || 10; // Smaller batches for file I/O
786
+ let totalDetected = 0;
787
+ let totalProcessed = 0;
788
+ let totalErrors = 0;
577
789
 
578
- if (error) {
579
- result.error = error.message || JSON.stringify(error);
580
- writeLog(`ERROR: ${currentFile} -> ${uploadPath} | ${result.error}`);
581
- await sendLogToSupabase({
582
- file: currentFile,
583
- uploadPath,
584
- status: 'error',
585
- message: result.error,
586
- });
587
- } else {
588
- result.success = true;
589
- result.message = `Uploaded ${currentFile} -> ${uploadPath}`;
590
- writeLog(`SUCCESS: ${currentFile} -> ${uploadPath}`);
591
- await sendLogToSupabase({
592
- file: currentFile,
593
- uploadPath,
594
- status: 'success',
595
- message: 'Uploaded successfully',
596
- });
790
+ // Create progress bar
791
+ const progressBar = new cliProgress.SingleBar({
792
+ format: '🔍 PDF Detection |{bar}| {percentage}% | {value}/{total} | Detected: {detected} | Errors: {errors}',
793
+ barCompleteChar: '█',
794
+ barIncompleteChar: '░',
795
+ hideCursor: true,
796
+ });
797
+
798
+ progressBar.start(allPdfRecords.length, 0, { detected: 0, errors: 0 });
799
+
800
+ // Process files in smaller batches to avoid overwhelming the system
801
+ for (let i = 0; i < allPdfRecords.length; i += batchSize) {
802
+ const batch = allPdfRecords.slice(i, i + batchSize);
803
+ const updatePromises = [];
804
+
805
+ for (const record of batch) {
806
+ try {
807
+ // Check if file still exists
808
+ if (!fs.existsSync(record.original_path)) {
809
+ updatePromises.push(
810
+ supabase
811
+ .from('uploader')
812
+ .update({
813
+ status: 'file-not-found',
814
+ message: 'File no longer exists at original path'
815
+ })
816
+ .eq('id', record.id)
817
+ );
818
+ totalErrors++;
819
+ continue;
820
+ }
821
+
822
+ // Perform detection
823
+ const detection = await detectionService.detectFile(record.original_path);
824
+ totalProcessed++;
825
+
826
+ const updateData = {
827
+ status: detection.detectedType ? 'detected' : 'not-detected',
828
+ document_type: detection.detectedType,
829
+ num_pedimento: detection.detectedPedimento,
830
+ arela_path: detection.arelaPath,
831
+ message: detection.error || null
832
+ };
833
+
834
+ // Extract RFC from fields if available
835
+ if (detection.fields) {
836
+ const rfcField = detection.fields.find(f => f.name === 'rfc' && f.found);
837
+ if (rfcField) {
838
+ updateData.rfc = rfcField.value;
839
+ }
840
+ }
841
+
842
+ if (detection.detectedType) {
843
+ totalDetected++;
844
+ }
845
+
846
+ updatePromises.push(
847
+ supabase
848
+ .from('uploader')
849
+ .update(updateData)
850
+ .eq('id', record.id)
851
+ );
852
+
853
+ } catch (error) {
854
+ console.error(`❌ Error detecting ${record.filename}:`, error.message);
855
+ totalErrors++;
856
+
857
+ updatePromises.push(
858
+ supabase
859
+ .from('uploader')
860
+ .update({
861
+ status: 'detection-error',
862
+ message: error.message
863
+ })
864
+ .eq('id', record.id)
865
+ );
866
+ }
867
+ }
868
+
869
+ // Execute all updates in parallel for this batch
870
+ try {
871
+ await Promise.all(updatePromises);
872
+ } catch (error) {
873
+ console.error(`❌ Error updating batch:`, error.message);
597
874
  }
598
- } catch (err) {
599
- result.error = err.message || JSON.stringify(err);
600
- writeLog(`ERROR: ${currentFile} | ${result.error}`);
601
- await sendLogToSupabase({
602
- file: currentFile,
603
- uploadPath: currentFile,
604
- status: 'error',
605
- message: result.error,
875
+
876
+ // Update progress
877
+ progressBar.update(Math.min(i + batchSize, allPdfRecords.length), {
878
+ detected: totalDetected,
879
+ errors: totalErrors
606
880
  });
607
881
  }
608
882
 
609
- return result;
883
+ progressBar.stop();
884
+
885
+ console.log(`📊 Phase 2 Summary: ${totalDetected} detected, ${totalProcessed} processed, ${totalErrors} errors`);
886
+ return {
887
+ detectedCount: totalDetected,
888
+ processedCount: totalProcessed,
889
+ errorCount: totalErrors
890
+ };
610
891
  };
611
892
 
612
- // Function to process files in parallel batches
613
893
  const processFilesInBatches = async (
614
894
  files,
615
895
  batchSize,
@@ -619,13 +899,20 @@ const processFilesInBatches = async (
619
899
  sourcePath,
620
900
  processedPaths,
621
901
  ) => {
622
- let successCount = 0;
623
- let failureCount = 0;
624
- let skippedCount = 0;
902
+ let totalUploaded = 0;
903
+ let totalDetected = 0;
904
+ let totalOrganized = 0;
905
+ let totalErrors = 0;
906
+ let totalSkipped = 0;
907
+
908
+ const messageBuffer = [];
909
+
910
+ const progressBarFormat = options.statsOnly
911
+ ? '📊 Processing [{bar}] {percentage}% | {value}/{total} files | Stats: {successCount} | Errors: {failureCount} | Duplicates: {skippedCount}'
912
+ : '📂 Processing [{bar}] {percentage}% | {value}/{total} files | Success: {successCount} | Errors: {failureCount} | Skipped: {skippedCount}';
625
913
 
626
914
  const progressBar = new cliProgress.SingleBar({
627
- format:
628
- '📂 Processing [{bar}] {percentage}% | {value}/{total} files | Success: {successCount} | Errors: {failureCount} | Skipped: {skippedCount}',
915
+ format: progressBarFormat,
629
916
  barCompleteChar: '█',
630
917
  barIncompleteChar: '░',
631
918
  hideCursor: true,
@@ -637,101 +924,928 @@ const processFilesInBatches = async (
637
924
  skippedCount: 0,
638
925
  });
639
926
 
640
- for (let i = 0; i < files.length; i += batchSize) {
641
- const batch = files.slice(i, i + batchSize);
927
+ if (options.statsOnly) {
928
+ // OPTIMIZED Stats-only mode - Only read filesystem stats, no file detection
929
+ console.log('📊 Phase 1: Processing files in optimized stats-only mode (no detection)...');
930
+
931
+ for (let i = 0; i < files.length; i += batchSize) {
932
+ const batch = files.slice(i, i + batchSize);
933
+
934
+ // OPTIMIZED: Batch read file stats to reduce I/O overhead
935
+ const fileStatsResults = batchReadFileStats(batch);
936
+ const statsFiles = fileStatsResults
937
+ .filter(result => result.stats !== null) // Only include files with valid stats
938
+ .map((result) => {
939
+ const originalFileName = path.basename(result.path);
940
+
941
+ return {
942
+ path: result.path,
943
+ originalName: originalFileName,
944
+ stats: result.stats, // Pass pre-computed stats to avoid redundant calls
945
+ };
946
+ });
642
947
 
643
- // Process batch in parallel
644
- const batchResults = await Promise.all(
645
- batch.map((file) =>
646
- processFile(
647
- file,
648
- options,
649
- basePath,
650
- folder,
651
- sourcePath,
652
- processedPaths,
653
- ),
654
- ),
655
- );
948
+ // Log any files that couldn't be read
949
+ const failedFiles = fileStatsResults.filter(result => result.error !== null);
950
+ if (failedFiles.length > 0) {
951
+ console.log(`⚠️ Could not read stats for ${failedFiles.length} files in batch`);
952
+ failedFiles.forEach(failed => {
953
+ console.error(` ❌ ${failed.path}: ${failed.error}`);
954
+ });
955
+ }
956
+
957
+ try {
958
+ const result = await insertStatsOnlyToUploaderTable(statsFiles, options);
959
+
960
+ totalUploaded += result.totalInserted;
961
+ totalSkipped += result.totalSkipped;
962
+ totalErrors += failedFiles.length; // Count failed file reads as errors
963
+
964
+ progressBar.update(Math.min(i + batch.length, files.length), {
965
+ successCount: totalUploaded,
966
+ failureCount: totalErrors,
967
+ skippedCount: totalSkipped,
968
+ });
969
+
970
+ } catch (error) {
971
+ console.error(`❌ Error processing stats batch:`, error.message);
972
+ totalErrors += batch.length;
973
+
974
+ progressBar.update(Math.min(i + batch.length, files.length), {
975
+ successCount: totalUploaded,
976
+ failureCount: totalErrors,
977
+ skippedCount: totalSkipped,
978
+ });
979
+ }
980
+ }
981
+ } else if (apiMode && !options.forceSupabase) {
982
+ // API Mode - Process in batches
983
+ for (let i = 0; i < files.length; i += batchSize) {
984
+ const batch = files.slice(i, i + batchSize);
985
+ let sanitizedRelativePath;
986
+
987
+ const apiFiles = batch
988
+ .map((file) => {
989
+ const relativePathRaw = path
990
+ .relative(basePath, file)
991
+ .replace(/^[\\/]+/, '')
992
+ .replace(/\\/g, '/');
993
+
994
+ const pathParts = relativePathRaw.split('/');
995
+ const originalFileName = pathParts[pathParts.length - 1];
996
+ const sanitizedFileName = sanitizeFileName(originalFileName);
997
+ pathParts[pathParts.length - 1] = sanitizedFileName;
998
+ sanitizedRelativePath = pathParts.join('/');
999
+
1000
+ let uploadPath;
1001
+
1002
+ // Handle combined folder structure + auto-detection
1003
+ if (options.folderStructure && options.autoDetectStructure) {
1004
+ // OPTIMIZED: Use cached detection to avoid redundant parsing
1005
+ const detection = getCachedPathDetection(file, basePath);
1006
+ if (detection.detected) {
1007
+ const autoStructure = `${detection.year}/${detection.pedimento}`;
1008
+ const combinedStructure = `${options.folderStructure}/${autoStructure}`;
1009
+ uploadPath = path.posix.join(
1010
+ combinedStructure,
1011
+ sanitizedFileName,
1012
+ );
1013
+ logVerbose(
1014
+ `📁 Combined structure: ${options.folderStructure}/${autoStructure} for ${originalFileName} -> ${uploadPath}`,
1015
+ );
1016
+ } else {
1017
+ // Fallback to just custom structure if auto-detection fails
1018
+ uploadPath = path.posix.join(
1019
+ options.folderStructure,
1020
+ sanitizedFileName,
1021
+ );
1022
+ logVerbose(
1023
+ `📁 Custom structure (auto-detection failed): ${uploadPath}`,
1024
+ );
1025
+ }
1026
+ } else if (options.folderStructure) {
1027
+ // Use custom folder structure only
1028
+ uploadPath = path.posix.join(
1029
+ options.folderStructure,
1030
+ sanitizedFileName,
1031
+ );
1032
+ logVerbose(`📁 Custom structure: ${uploadPath}`);
1033
+ } else if (options.autoDetectStructure) {
1034
+ // Auto-detect structure from path if enabled - OPTIMIZED: Use cached detection
1035
+ const detection = getCachedPathDetection(file, basePath);
1036
+ if (detection.detected) {
1037
+ const autoStructure = `${detection.year}/${detection.pedimento}`;
1038
+ uploadPath = path.posix.join(autoStructure, sanitizedFileName);
1039
+ console.log(
1040
+ `🔍 Auto-detected: ${autoStructure} for ${originalFileName} -> ${uploadPath}`,
1041
+ );
1042
+ } else {
1043
+ uploadPath = options.prefix
1044
+ ? path.posix.join(options.prefix, sanitizedRelativePath)
1045
+ : sanitizedRelativePath;
1046
+ console.log(`📁 Using relative path: ${uploadPath}`);
1047
+ }
1048
+ } else {
1049
+ uploadPath = options.prefix
1050
+ ? path.posix.join(options.prefix, sanitizedRelativePath)
1051
+ : sanitizedRelativePath;
1052
+ console.log(`📁 Using standard path: ${uploadPath}`);
1053
+ }
1054
+
1055
+ if (processedPaths.has(uploadPath)) {
1056
+ totalSkipped++;
1057
+ writeLog(`SKIPPED: ${file} -> ${uploadPath}`);
1058
+ return null;
1059
+ }
1060
+
1061
+ return {
1062
+ path: file,
1063
+ name: sanitizedFileName,
1064
+ originalName: originalFileName,
1065
+ uploadPath: uploadPath.replace(/\\/g, '/'), // Ensure forward slashes
1066
+ contentType: mime.lookup(file) || 'application/octet-stream',
1067
+ };
1068
+ })
1069
+ .filter(Boolean);
1070
+
1071
+ if (apiFiles.length > 0) {
1072
+ // console.log(`🔄 Processing batch of ${apiFiles.length} files`);
1073
+ // apiFiles.forEach(f => console.log(` 📄 ${f.name} -> ${f.uploadPath}`));
1074
+
1075
+ try {
1076
+ // Use clientPath from options if specified, otherwise construct from detection or folder
1077
+ let clientPath = options.clientPath;
1078
+
1079
+ if (!clientPath && apiFiles.length > 0) {
1080
+ const firstFile = apiFiles[0];
1081
+ // OPTIMIZED: Use cached detection to avoid redundant parsing
1082
+ const detection = getCachedPathDetection(firstFile.path, basePath);
1083
+ if (detection.detected) {
1084
+ // clientPath = `${detection.year}/${detection.pedimento}/`;
1085
+ clientPath = path
1086
+ .resolve(basePath, sanitizedRelativePath)
1087
+ .replace(/\\/g, '/');
1088
+ } else {
1089
+ // Fallback to folder structure if no year/pedimento detected
1090
+ clientPath = path.resolve(basePath, folder).replace(/\\/g, '/');
1091
+ }
1092
+ }
1093
+
1094
+ const result = await uploadToApi(apiFiles, {
1095
+ ...options,
1096
+ clientPath: clientPath,
1097
+ });
1098
+
1099
+ totalUploaded += result.stats.uploadedCount;
1100
+ totalDetected += result.stats.detectedCount;
1101
+ totalOrganized += result.stats.organizedCount;
1102
+ totalErrors += result.stats.errorCount;
1103
+
1104
+ result.uploaded.forEach((upload) => {
1105
+ const apiFile = apiFiles.find(
1106
+ (f) => f.name === upload.originalName,
1107
+ );
1108
+ if (apiFile) {
1109
+ writeLog(`SUCCESS: ${apiFile.path} -> ${apiFile.uploadPath}`);
1110
+ processedPaths.add(apiFile.uploadPath);
1111
+ }
1112
+ });
1113
+
1114
+ result.errors.forEach((error) => {
1115
+ writeLog(
1116
+ `ERROR: ${error.fileName}: ${error.error} (${error.step})`,
1117
+ );
1118
+ messageBuffer.push(
1119
+ `❌ ${error.fileName}: ${error.error} (${error.step})`,
1120
+ );
1121
+ });
1122
+ } catch (error) {
1123
+ totalErrors += apiFiles.length;
1124
+ apiFiles.forEach((file) => {
1125
+ writeLog(`ERROR: ${file.path}: ${error.message}`);
1126
+ messageBuffer.push(`❌ ${file.name}: ${error.message}`);
1127
+ });
1128
+ }
1129
+ }
1130
+
1131
+ progressBar.update(i + batch.length, {
1132
+ successCount: totalUploaded,
1133
+ failureCount: totalErrors,
1134
+ skippedCount: totalSkipped,
1135
+ });
1136
+
1137
+ if (i + batchSize < files.length) {
1138
+ await new Promise((resolve) => setTimeout(resolve, BATCH_DELAY));
1139
+ }
1140
+ }
1141
+ } else {
1142
+ // Direct Supabase mode
1143
+ for (let i = 0; i < files.length; i++) {
1144
+ const file = files[i];
1145
+ try {
1146
+ const relativePath = path.relative(basePath, file);
1147
+ let uploadPath;
1148
+
1149
+ // Handle combined folder structure + auto-detection
1150
+ if (options.folderStructure && options.autoDetectStructure) {
1151
+ const detection = getCachedPathDetection(file, basePath);
1152
+ if (detection.detected) {
1153
+ const autoStructure = `${detection.year}/${detection.pedimento}`;
1154
+ const combinedStructure = `${options.folderStructure}/${autoStructure}`;
1155
+ const fileName = path.basename(file);
1156
+ uploadPath = path.join(combinedStructure, fileName);
1157
+ console.log(
1158
+ `📁 Combined structure: ${options.folderStructure}/${autoStructure} for ${fileName}`,
1159
+ );
1160
+ } else {
1161
+ // Fallback to just custom structure if auto-detection fails
1162
+ const fileName = path.basename(file);
1163
+ uploadPath = path.join(options.folderStructure, fileName);
1164
+ console.log(
1165
+ `📁 Custom structure (auto-detection failed): ${uploadPath}`,
1166
+ );
1167
+ }
1168
+ } else if (options.folderStructure) {
1169
+ // Use custom folder structure only
1170
+ const fileName = path.basename(file);
1171
+ uploadPath = path.join(options.folderStructure, fileName);
1172
+ console.log(`📁 Custom structure: ${uploadPath}`);
1173
+ } else if (options.autoDetectStructure) {
1174
+ // Auto-detect structure from path if enabled - OPTIMIZED: Use cached detection
1175
+ const detection = getCachedPathDetection(file, basePath);
1176
+ if (detection.detected) {
1177
+ const autoStructure = `${detection.year}/${detection.pedimento}`;
1178
+ const fileName = path.basename(file);
1179
+ uploadPath = path.join(autoStructure, fileName);
1180
+ } else {
1181
+ uploadPath = options.prefix
1182
+ ? path.join(options.prefix, relativePath)
1183
+ : relativePath;
1184
+ }
1185
+ } else {
1186
+ uploadPath = options.prefix
1187
+ ? path.join(options.prefix, relativePath)
1188
+ : relativePath;
1189
+ }
656
1190
 
657
- // Update counters and progress
658
- for (const result of batchResults) {
659
- if (result.success) {
660
- successCount++;
661
- } else if (result.skipped) {
662
- skippedCount++;
663
- } else if (result.error) {
664
- failureCount++;
665
- console.error(`❌ ${result.error}`);
1191
+ if (processedPaths.has(uploadPath)) {
1192
+ totalSkipped++;
1193
+ writeLog(`SKIPPED: ${file} -> ${uploadPath}`);
1194
+ } else {
1195
+ await uploadToSupabase(file, uploadPath);
1196
+ totalUploaded++;
1197
+ writeLog(`SUCCESS: ${file} -> ${uploadPath}`);
1198
+ processedPaths.add(uploadPath);
1199
+ }
1200
+ } catch (error) {
1201
+ totalErrors++;
1202
+ writeLog(`ERROR: ${file}: ${error.message}`);
1203
+ messageBuffer.push(`❌ ${path.basename(file)}: ${error.message}`);
666
1204
  }
667
1205
 
668
- if (result.message && !result.error) {
669
- console.log(`✅ ${result.message}`);
1206
+ progressBar.update(i + 1, {
1207
+ successCount: totalUploaded,
1208
+ failureCount: totalErrors,
1209
+ skippedCount: totalSkipped,
1210
+ });
1211
+ }
1212
+ }
1213
+
1214
+ progressBar.stop();
1215
+
1216
+ const errorMessages = messageBuffer.filter((msg) => msg.startsWith('❌'));
1217
+ if (errorMessages.length > 0) {
1218
+ console.log('\n🚨 Errors encountered during processing:');
1219
+ errorMessages.forEach((msg) => console.error(msg));
1220
+ }
1221
+
1222
+ return {
1223
+ successCount: totalUploaded,
1224
+ detectedCount: totalDetected,
1225
+ organizedCount: totalOrganized,
1226
+ failureCount: totalErrors,
1227
+ skippedCount: totalSkipped,
1228
+ };
1229
+ };
1230
+
1231
+ /**
1232
+ * Upload files to Arela API based on specific RFC values
1233
+ */
1234
+ const uploadFilesByRfc = async (options = {}) => {
1235
+ if (!supabase) {
1236
+ console.error('❌ Supabase client not initialized');
1237
+ process.exit(1);
1238
+ }
1239
+
1240
+ if (!API_BASE_URL || !API_TOKEN) {
1241
+ console.error('❌ Arela API configuration missing. Please set ARELA_API_URL and ARELA_API_TOKEN environment variables.');
1242
+ process.exit(1);
1243
+ }
1244
+
1245
+ if (!uploadRfcs || uploadRfcs.length === 0) {
1246
+ console.error('❌ No RFCs specified. Please set UPLOAD_RFCS environment variable with pipe-separated RFC values.');
1247
+ console.error(' Example: UPLOAD_RFCS="RFC123456789|RFC987654321|RFC555444333"');
1248
+ process.exit(1);
1249
+ }
1250
+
1251
+ console.log('🎯 RFC-based Upload Mode');
1252
+ console.log(`📋 Target RFCs: ${uploadRfcs.join(', ')}`);
1253
+ console.log('🔍 Searching for files to upload...');
1254
+
1255
+ // Step 1: Get all records that match the specified RFCs and have arela_path
1256
+ const { data: rfcRecords, error: rfcError } = await supabase
1257
+ .from('uploader')
1258
+ .select('arela_path')
1259
+ .in('rfc', uploadRfcs)
1260
+ .not('arela_path', 'is', null);
1261
+
1262
+ if (rfcError) {
1263
+ console.error('❌ Error fetching RFC records:', rfcError.message);
1264
+ return { processedCount: 0, uploadedCount: 0, errorCount: 1 };
1265
+ }
1266
+
1267
+ if (!rfcRecords || rfcRecords.length === 0) {
1268
+ console.log('ℹ️ No files found for the specified RFCs with arela_path');
1269
+ console.log(` Make sure files for RFCs [${uploadRfcs.join(', ')}] have been processed and have arela_path values`);
1270
+ return { processedCount: 0, uploadedCount: 0, errorCount: 0 };
1271
+ }
1272
+
1273
+ // Step 2: Get unique arela_paths from the RFC matches
1274
+ const uniqueArelaPaths = [...new Set(rfcRecords.map(r => r.arela_path))];
1275
+ console.log(`� Found ${uniqueArelaPaths.length} unique arela_path(s) for the specified RFCs`);
1276
+
1277
+ // Step 3: Get ALL files that have these arela_paths (including supporting documents)
1278
+ // Use pagination to ensure we get all files, regardless of count
1279
+ let allRelatedFiles = [];
1280
+ let hasMore = true;
1281
+ let offset = 0;
1282
+ const queryBatchSize = 1000;
1283
+
1284
+ console.log('📥 Fetching all related files (with pagination)...');
1285
+
1286
+ while (hasMore) {
1287
+ const { data: batch, error: queryError } = await supabase
1288
+ .from('uploader')
1289
+ .select('id, original_path, arela_path, filename, rfc, document_type')
1290
+ .in('arela_path', uniqueArelaPaths)
1291
+ .not('original_path', 'is', null)
1292
+ .range(offset, offset + queryBatchSize - 1);
1293
+
1294
+ if (queryError) {
1295
+ console.error('❌ Error fetching related files:', queryError.message);
1296
+ return { processedCount: 0, uploadedCount: 0, errorCount: 1 };
1297
+ }
1298
+
1299
+ if (!batch || batch.length === 0) {
1300
+ hasMore = false;
1301
+ } else {
1302
+ allRelatedFiles = allRelatedFiles.concat(batch);
1303
+ offset += queryBatchSize;
1304
+
1305
+ // If we got less than queryBatchSize, we've reached the end
1306
+ if (batch.length < queryBatchSize) {
1307
+ hasMore = false;
670
1308
  }
671
1309
  }
1310
+ }
1311
+
1312
+ if (!allRelatedFiles || allRelatedFiles.length === 0) {
1313
+ console.log('ℹ️ No related files found for the arela_paths');
1314
+ return { processedCount: 0, uploadedCount: 0, errorCount: 0 };
1315
+ }
1316
+
1317
+ console.log(`📁 Found ${allRelatedFiles.length} total files to upload (including supporting documents)`);
1318
+
1319
+ // Group by RFC and arela_path for better organization
1320
+ const filesByRfc = allRelatedFiles.reduce((acc, record) => {
1321
+ const rfc = record.rfc || 'No RFC';
1322
+ if (!acc[rfc]) {
1323
+ acc[rfc] = [];
1324
+ }
1325
+ acc[rfc].push(record);
1326
+ return acc;
1327
+ }, {});
1328
+
1329
+ console.log('📊 Files by RFC (including supporting documents):');
1330
+ for (const [rfc, files] of Object.entries(filesByRfc)) {
1331
+ const documentTypes = [...new Set(files.map(f => f.document_type || 'Unknown'))];
1332
+ console.log(` ${rfc}: ${files.length} files (${documentTypes.join(', ')})`);
1333
+ }
1334
+
1335
+ // Group by arela_path for upload organization
1336
+ const filesByPath = allRelatedFiles.reduce((acc, record) => {
1337
+ const path = record.arela_path;
1338
+ if (!acc[path]) {
1339
+ acc[path] = [];
1340
+ }
1341
+ acc[path].push(record);
1342
+ return acc;
1343
+ }, {});
1344
+
1345
+ console.log('� Files grouped by arela_path:');
1346
+ for (const [path, files] of Object.entries(filesByPath)) {
1347
+ console.log(` ${path}: ${files.length} files`);
1348
+ }
1349
+
1350
+ let totalProcessed = 0;
1351
+ let totalUploaded = 0;
1352
+ let totalErrors = 0;
1353
+ let totalSkipped = 0;
1354
+
1355
+ // Create progress bar
1356
+ const progressBar = new cliProgress.SingleBar({
1357
+ format: '🚀 Uploading files |{bar}| {percentage}% | {value}/{total} | Uploaded: {uploaded} | Errors: {errors} | Skipped: {skipped}',
1358
+ barCompleteChar: '█',
1359
+ barIncompleteChar: '░',
1360
+ hideCursor: true,
1361
+ });
672
1362
 
673
- progressBar.update(i + batch.length, {
674
- successCount,
675
- failureCount,
676
- skippedCount,
1363
+ if (options.showProgress !== false) {
1364
+ progressBar.start(allRelatedFiles.length, 0, {
1365
+ uploaded: 0,
1366
+ errors: 0,
1367
+ skipped: 0,
677
1368
  });
1369
+ }
1370
+
1371
+ const batchSize = parseInt(options.batchSize) || 10;
1372
+ console.log(`📦 Processing in batches of ${batchSize} files`);
1373
+
1374
+ // Process files in batches
1375
+ for (let i = 0; i < allRelatedFiles.length; i += batchSize) {
1376
+ const batch = allRelatedFiles.slice(i, i + batchSize);
1377
+ const batchNumber = Math.floor(i / batchSize) + 1;
1378
+ const totalBatches = Math.ceil(allRelatedFiles.length / batchSize);
1379
+
1380
+ console.log(`\n📦 Processing batch ${batchNumber}/${totalBatches} (${batch.length} files)`);
1381
+
1382
+ // Prepare files for upload
1383
+ const filesToUpload = [];
1384
+
1385
+ for (const record of batch) {
1386
+ totalProcessed++;
1387
+
1388
+ try {
1389
+ const originalPath = record.original_path;
1390
+
1391
+ // Check if file exists
1392
+ if (!fs.existsSync(originalPath)) {
1393
+ console.log(` ⚠️ File not found: ${originalPath}`);
1394
+ totalSkipped++;
1395
+ continue;
1396
+ }
1397
+
1398
+ // OPTIMIZED: Read file and get size from buffer instead of separate fs.statSync call
1399
+ const fileBuffer = fs.readFileSync(originalPath);
1400
+
1401
+ filesToUpload.push({
1402
+ path: originalPath,
1403
+ buffer: fileBuffer,
1404
+ size: fileBuffer.length, // Get size from buffer instead of fs.statSync
1405
+ name: record.filename,
1406
+ arelaPath: record.arela_path,
1407
+ rfc: record.rfc,
1408
+ documentType: record.document_type,
1409
+ });
1410
+
1411
+ } catch (error) {
1412
+ console.error(` ❌ Error reading file ${record.original_path}:`, error.message);
1413
+ totalErrors++;
1414
+ }
1415
+
1416
+ if (options.showProgress !== false) {
1417
+ progressBar.update(totalProcessed, {
1418
+ uploaded: totalUploaded,
1419
+ errors: totalErrors,
1420
+ skipped: totalSkipped,
1421
+ });
1422
+ }
1423
+ }
1424
+
1425
+ // Upload the batch if we have files
1426
+ if (filesToUpload.length > 0) {
1427
+ try {
1428
+ console.log(` 🚀 Uploading ${filesToUpload.length} files to Arela API...`);
1429
+
1430
+ const formData = new FormData();
1431
+
1432
+ // Add files to form data
1433
+ filesToUpload.forEach((file, index) => {
1434
+ formData.append(`files`, file.buffer, {
1435
+ filename: file.name,
1436
+ contentType: mime.lookup(file.name) || 'application/octet-stream',
1437
+ });
1438
+ });
1439
+
1440
+ // Instead of using per-file folder structures, we'll group by arela_path and upload separately
1441
+ // Group files by their arela_path to upload them in correct structure
1442
+ const filesByPath = filesToUpload.reduce((acc, file) => {
1443
+ const path = file.arelaPath.replace(/\/$/, '');
1444
+ if (!acc[path]) {
1445
+ acc[path] = [];
1446
+ }
1447
+ acc[path].push(file);
1448
+ return acc;
1449
+ }, {});
1450
+
1451
+ // Upload each group separately with its folder structure
1452
+ for (const [arelaPath, pathFiles] of Object.entries(filesByPath)) {
1453
+ const pathFormData = new FormData();
1454
+
1455
+ pathFiles.forEach((file) => {
1456
+ pathFormData.append('files', file.buffer, {
1457
+ filename: file.name,
1458
+ contentType: mime.lookup(file.name) || 'application/octet-stream',
1459
+ });
1460
+ });
1461
+
1462
+ // Set folder structure for this group - concatenate custom prefix with arela_path
1463
+ const folderStructure = options.folderStructure
1464
+ ? `${options.folderStructure}/${arelaPath}`.replace(/\/+/g, '/').replace(/\/$/, '')
1465
+ : arelaPath;
1466
+ pathFormData.append('folderStructure', folderStructure);
1467
+ pathFormData.append('autoDetect', 'true');
1468
+ pathFormData.append('autoOrganize', 'false');
1469
+ pathFormData.append('batchSize', String(pathFiles.length));
1470
+ pathFormData.append('clientVersion', packageVersion);
1471
+ if (bucket) {
1472
+ pathFormData.append('bucket', bucket);
1473
+ }
678
1474
 
679
- // Manage cache size periodically (every 100 files processed)
680
- if ((i + batch.length) % 100 === 0) {
681
- manageCaches();
682
- // Also flush logs every 100 files to maintain responsiveness
683
- await logBatcher.flush();
1475
+ console.log(` 📁 Uploading ${pathFiles.length} files to: ${folderStructure}`);
1476
+
1477
+ const response = await fetch(`${API_BASE_URL}/api/storage/batch-upload-and-process`, {
1478
+ method: 'POST',
1479
+ headers: {
1480
+ 'x-api-key': API_TOKEN,
1481
+ },
1482
+ body: pathFormData,
1483
+ });
1484
+
1485
+ if (!response.ok) {
1486
+ const errorText = await response.text();
1487
+ throw new Error(`HTTP ${response.status}: ${errorText}`);
1488
+ }
1489
+
1490
+ const result = await response.json();
1491
+
1492
+ // Check if upload was successful based on stats rather than success field
1493
+ const isSuccessful = result.stats && result.stats.uploadedCount > 0 && result.stats.errorCount === 0;
1494
+
1495
+ if (isSuccessful) {
1496
+ console.log(` ✅ Group uploaded: ${result.stats.uploadedCount} files to ${folderStructure}`);
1497
+ totalUploaded += result.stats.uploadedCount;
1498
+
1499
+ if (result.stats.detectedCount > 0) {
1500
+ console.log(` 🔍 Files detected: ${result.stats.detectedCount}`);
1501
+ }
1502
+ if (result.stats.organizedCount > 0) {
1503
+ console.log(` 📁 Files organized: ${result.stats.organizedCount}`);
1504
+ }
1505
+ } else {
1506
+ console.error(` ❌ Upload failed for ${folderStructure}:`);
1507
+ if (result.errors && result.errors.length > 0) {
1508
+ result.errors.forEach(error => {
1509
+ console.error(` - ${error.fileName}: ${error.error}`);
1510
+ });
1511
+ }
1512
+ totalErrors += pathFiles.length;
1513
+ }
1514
+
1515
+ // Small delay between path groups
1516
+ await new Promise(resolve => setTimeout(resolve, 100));
1517
+ }
1518
+
1519
+ } catch (error) {
1520
+ console.error(` ❌ Error uploading batch ${batchNumber}:`, error.message);
1521
+ totalErrors += filesToUpload.length;
1522
+ }
684
1523
  }
685
1524
 
686
- // Small delay between batches to prevent overwhelming the server
687
- if (i + batchSize < files.length) {
688
- await delay(100);
1525
+ // Small delay between batches
1526
+ if (i + batchSize < allRelatedFiles.length) {
1527
+ await new Promise(resolve => setTimeout(resolve, BATCH_DELAY));
689
1528
  }
690
1529
  }
691
1530
 
692
- progressBar.stop();
1531
+ if (options.showProgress !== false) {
1532
+ progressBar.stop();
1533
+ }
1534
+
1535
+ console.log(`\n${'='.repeat(60)}`);
1536
+ console.log(`🎯 RFC-BASED UPLOAD COMPLETED`);
1537
+ console.log(`${'='.repeat(60)}`);
1538
+ console.log(` 📋 Files processed: ${totalProcessed}`);
1539
+ console.log(` ✅ Files uploaded: ${totalUploaded}`);
1540
+ console.log(` ⏭️ Files skipped: ${totalSkipped}`);
1541
+ console.log(` ❌ Errors: ${totalErrors}`);
1542
+ console.log(`${'='.repeat(60)}\n`);
1543
+
1544
+ return {
1545
+ processedCount: totalProcessed,
1546
+ uploadedCount: totalUploaded,
1547
+ skippedCount: totalSkipped,
1548
+ errorCount: totalErrors,
1549
+ };
1550
+ };
1551
+
1552
+ /**
1553
+ * Propagate arela_path from pedimento_simplificado records to related files with same base path
1554
+ */
1555
+ const propagateArelaPath = async (options = {}) => {
1556
+ if (!supabase) {
1557
+ console.error('❌ Supabase client not initialized');
1558
+ process.exit(1);
1559
+ }
1560
+
1561
+ console.log('🔍 Finding pedimento_simplificado records with arela_path...');
1562
+
1563
+ // Get all pedimento_simplificado records that have arela_path
1564
+ const { data: pedimentoRecords, error: pedimentoError } = await supabase
1565
+ .from('uploader')
1566
+ .select('id, original_path, arela_path, filename')
1567
+ .eq('document_type', 'pedimento_simplificado')
1568
+ .not('arela_path', 'is', null);
1569
+
1570
+ if (pedimentoError) {
1571
+ console.error('❌ Error fetching pedimento records:', pedimentoError.message);
1572
+ return { processedCount: 0, updatedCount: 0, errorCount: 1 };
1573
+ }
1574
+
1575
+ if (!pedimentoRecords || pedimentoRecords.length === 0) {
1576
+ console.log('ℹ️ No pedimento_simplificado records with arela_path found');
1577
+ return { processedCount: 0, updatedCount: 0, errorCount: 0 };
1578
+ }
1579
+
1580
+ console.log(`📋 Found ${pedimentoRecords.length} pedimento records with arela_path`);
1581
+
1582
+ let totalProcessed = 0;
1583
+ let totalUpdated = 0;
1584
+ let totalErrors = 0;
1585
+
1586
+ // Create progress bar
1587
+ const progressBar = new cliProgress.SingleBar({
1588
+ format: '🔄 Propagating paths |{bar}| {percentage}% | {value}/{total} | Updated: {updated} | Errors: {errors}',
1589
+ barCompleteChar: '█',
1590
+ barIncompleteChar: '░',
1591
+ hideCursor: true,
1592
+ });
1593
+
1594
+ if (options.showProgress !== false) {
1595
+ progressBar.start(pedimentoRecords.length, 0, {
1596
+ updated: 0,
1597
+ errors: 0,
1598
+ });
1599
+ }
1600
+
1601
+ // Process each pedimento record
1602
+ for (const pedimento of pedimentoRecords) {
1603
+ try {
1604
+ totalProcessed++;
1605
+
1606
+ // Extract base path from original_path (remove filename)
1607
+ const basePath = path.dirname(pedimento.original_path);
1608
+
1609
+ console.log(`\n🔍 Processing: ${pedimento.filename}`);
1610
+ console.log(` 📁 Base path: ${basePath}`);
1611
+
1612
+ // Extract folder part from existing arela_path by removing the filename
1613
+ const existingPath = pedimento.arela_path;
1614
+ const folderArelaPath = existingPath.includes('/') ?
1615
+ existingPath.substring(0, existingPath.lastIndexOf('/')) + '/' :
1616
+ existingPath.endsWith('/') ? existingPath : existingPath + '/';
1617
+
1618
+ console.log(` 🎯 Original arela path: ${existingPath}`);
1619
+ console.log(` 📁 Folder arela path: ${folderArelaPath}`);
1620
+
1621
+ // Find all files with the same base path that don't have arela_path yet
1622
+ const { data: relatedFiles, error: relatedError } = await supabase
1623
+ .from('uploader')
1624
+ .select('id, filename, original_path')
1625
+ .like('original_path', `${basePath}%`)
1626
+ .is('arela_path', null)
1627
+ .neq('id', pedimento.id); // Exclude the pedimento itself
1628
+
1629
+ if (relatedError) {
1630
+ console.error(`❌ Error finding related files for ${pedimento.filename}:`, relatedError.message);
1631
+ totalErrors++;
1632
+ continue;
1633
+ }
1634
+
1635
+ if (!relatedFiles || relatedFiles.length === 0) {
1636
+ console.log(` ℹ️ No related files found needing arela_path update`);
1637
+ continue;
1638
+ }
1639
+
1640
+ console.log(` 📄 Found ${relatedFiles.length} related files to update:`);
1641
+
1642
+ // Show first 10 files, then indicate if there are more
1643
+ const filesToShow = relatedFiles.slice(0, 10);
1644
+ filesToShow.forEach(file => {
1645
+ console.log(` - ${file.filename}`);
1646
+ });
1647
+
1648
+ if (relatedFiles.length > 10) {
1649
+ console.log(` ... and ${relatedFiles.length - 10} more files`);
1650
+ }
1651
+
1652
+ // Process files in batches to avoid URI length limitations
1653
+ const BATCH_SIZE = 50; // Process 50 files at a time
1654
+ const fileIds = relatedFiles.map(f => f.id);
1655
+ let batchErrors = 0;
1656
+ let batchUpdated = 0;
1657
+
1658
+ console.log(` 🔄 Processing ${relatedFiles.length} files in batches of ${BATCH_SIZE}...`);
1659
+
1660
+ for (let i = 0; i < fileIds.length; i += BATCH_SIZE) {
1661
+ const batchIds = fileIds.slice(i, i + BATCH_SIZE);
1662
+ const batchNumber = Math.floor(i / BATCH_SIZE) + 1;
1663
+ const totalBatches = Math.ceil(fileIds.length / BATCH_SIZE);
1664
+
1665
+ console.log(` 📦 Batch ${batchNumber}/${totalBatches}: Updating ${batchIds.length} files...`);
1666
+
1667
+ try {
1668
+ const { error: updateError } = await supabase
1669
+ .from('uploader')
1670
+ .update({ arela_path: folderArelaPath })
1671
+ .in('id', batchIds);
1672
+
1673
+ if (updateError) {
1674
+ console.error(` ❌ Error in batch ${batchNumber}:`, updateError.message);
1675
+ batchErrors++;
1676
+ } else {
1677
+ console.log(` ✅ Batch ${batchNumber} completed: ${batchIds.length} files updated`);
1678
+ batchUpdated += batchIds.length;
1679
+ }
1680
+ } catch (error) {
1681
+ console.error(` ❌ Exception in batch ${batchNumber}:`, error.message);
1682
+ batchErrors++;
1683
+ }
1684
+
1685
+ // Small delay between batches to avoid overwhelming the database
1686
+ if (i + BATCH_SIZE < fileIds.length) {
1687
+ await new Promise(resolve => setTimeout(resolve, 100));
1688
+ }
1689
+ }
1690
+
1691
+ if (batchErrors > 0) {
1692
+ console.error(`❌ ${batchErrors} batch(es) failed for ${pedimento.filename}`);
1693
+ totalErrors++;
1694
+ } else {
1695
+ console.log(` 🎯 Successfully updated ${batchUpdated} related files`);
1696
+ totalUpdated += batchUpdated;
1697
+ }
1698
+
1699
+ } catch (error) {
1700
+ console.error(`❌ Error processing ${pedimento.filename}:`, error.message);
1701
+ totalErrors++;
1702
+ }
1703
+
1704
+ if (options.showProgress !== false) {
1705
+ progressBar.update(totalProcessed, {
1706
+ updated: totalUpdated,
1707
+ errors: totalErrors,
1708
+ });
1709
+ }
1710
+ }
1711
+
1712
+ if (options.showProgress !== false) {
1713
+ progressBar.stop();
1714
+ }
1715
+
1716
+ console.log(`\n${'='.repeat(60)}`);
1717
+ console.log(`🎯 ARELA PATH PROPAGATION COMPLETED`);
1718
+ console.log(`${'='.repeat(60)}`);
1719
+ console.log(` 📋 Pedimento records processed: ${totalProcessed}`);
1720
+ console.log(` ✅ Related files updated: ${totalUpdated}`);
1721
+ console.log(` ❌ Errors: ${totalErrors}`);
1722
+ console.log(`${'='.repeat(60)}\n`);
693
1723
 
694
1724
  return {
695
- successCount,
696
- failureCount,
697
- skippedCount,
1725
+ processedCount: totalProcessed,
1726
+ updatedCount: totalUpdated,
1727
+ errorCount: totalErrors,
698
1728
  };
699
1729
  };
700
1730
 
701
1731
  program
702
- .name('supabase-uploader')
703
- .description('CLI to upload folders from a base path to Supabase Storage')
1732
+ .name('arela-uploader')
1733
+ .description(
1734
+ 'CLI to upload folders to Arela API or Supabase Storage with automatic processing',
1735
+ )
704
1736
  .option('-v, --version', 'output the version number')
705
1737
  .option('-p, --prefix <prefix>', 'Prefix path in bucket', '')
1738
+ .option('-b, --bucket <bucket>', 'Bucket name override')
1739
+ .option('--force-supabase', 'Force direct Supabase upload (skip API)')
706
1740
  .option(
707
- '-r, --rename-files',
708
- 'Rename files with problematic characters before uploading',
1741
+ '--no-auto-detect',
1742
+ 'Disable automatic file detection (API mode only)',
709
1743
  )
710
1744
  .option(
711
- '--dry-run',
712
- 'Show what files would be renamed without actually renaming them',
1745
+ '--no-auto-organize',
1746
+ 'Disable automatic file organization (API mode only)',
713
1747
  )
714
1748
  .option(
715
1749
  '-c, --concurrency <number>',
716
- 'Number of files to process concurrently (default: 3)',
717
- '3',
1750
+ 'Files per batch for processing (default: 10)',
1751
+ '10',
718
1752
  )
1753
+ .option('--batch-size <number>', 'API batch size (default: 10)', '10')
1754
+ .option('--show-stats', 'Show detailed processing statistics')
719
1755
  .option(
720
- '--show-cache-stats',
721
- 'Show cache statistics for performance analysis',
1756
+ '--folder-structure <structure>',
1757
+ 'Custom folder structure (e.g., "2024/4023260" or "cliente1/pedimentos")',
722
1758
  )
723
1759
  .option(
724
- '--batch-size <number>',
725
- 'Number of logs to batch before sending to Supabase (default: 50)',
726
- '50',
1760
+ '--auto-detect-structure',
1761
+ 'Automatically detect year/pedimento from file paths',
727
1762
  )
1763
+ .option('--client-path <path>', 'Client path for metadata tracking')
1764
+ .option('--stats-only', 'Phase 1: Only read filesystem stats and insert to database (no file reading or detection)')
1765
+ .option('--no-detect', 'Disable document type detection in stats-only mode')
1766
+ .option('--detect-pdfs', 'Phase 2: Process PDF files in database for pedimento-simplificado detection')
1767
+ .option('--propagate-arela-path', 'Phase 3: Propagate arela_path from pedimento_simplificado records to related files with same base path')
1768
+ .option('--upload-by-rfc', 'Phase 4: Upload files to Arela API based on RFC values from UPLOAD_RFCS environment variable')
1769
+ .option('--run-all-phases', 'Run all 4 phases in sequence: stats → detect → propagate → upload')
728
1770
  .action(async (options) => {
729
- // Handle version option
730
1771
  if (options.version) {
731
- console.log(version);
1772
+ console.log(packageVersion);
732
1773
  process.exit(0);
733
1774
  }
734
1775
 
1776
+ // Handle detect-pdfs option (Phase 2)
1777
+ if (options.detectPdfs) {
1778
+ console.log('🔍 Starting Phase 2: PDF Detection');
1779
+ await checkCredentials(true); // Force Supabase mode
1780
+
1781
+ const result = await detectPedimentosInDatabase({
1782
+ batchSize: parseInt(options.batchSize) || 10,
1783
+ });
1784
+
1785
+ console.log(`✅ Phase 2 Complete: ${result.detectedCount} detected, ${result.errorCount} errors`);
1786
+ return;
1787
+ }
1788
+
1789
+ // Handle run-all-phases option
1790
+ if (options.runAllPhases) {
1791
+ console.log('🚀 Starting all 4 phases in sequence...');
1792
+ await checkCredentials(true); // Force Supabase mode
1793
+
1794
+ // Phase 1: Stats collection
1795
+ console.log('\n📊 === PHASE 1: Filesystem Stats ===');
1796
+ options.statsOnly = true;
1797
+ // Continue with normal processing to run Phase 1
1798
+
1799
+ // The rest will be handled after Phase 1 completes
1800
+ }
1801
+
1802
+ // Handle propagate-arela-path option
1803
+ if (options.propagateArelaPath) {
1804
+ // Initialize Supabase credentials for propagation
1805
+ await checkCredentials(true); // Force Supabase mode
1806
+
1807
+ const result = await propagateArelaPath({
1808
+ showProgress: options.showStats || true,
1809
+ });
1810
+
1811
+ if (result.errorCount > 0) {
1812
+ process.exit(1);
1813
+ }
1814
+ return;
1815
+ }
1816
+
1817
+ // Handle upload-by-rfc option
1818
+ if (options.uploadByRfc) {
1819
+ // RFC upload needs both Supabase (for database queries) and API (for uploads)
1820
+ await checkCredentials(false); // Initialize API mode
1821
+
1822
+ // Also initialize Supabase for database queries
1823
+ if (!supabase) {
1824
+ if (!supabaseUrl || !supabaseKey) {
1825
+ console.error('❌ RFC upload requires Supabase credentials for database queries.');
1826
+ console.error(' Please set SUPABASE_URL and SUPABASE_KEY environment variables.');
1827
+ process.exit(1);
1828
+ }
1829
+
1830
+ supabase = createClient(supabaseUrl, supabaseKey);
1831
+ console.log('✅ Connected to Supabase for database queries');
1832
+ }
1833
+
1834
+ const result = await uploadFilesByRfc({
1835
+ showProgress: options.showStats || true,
1836
+ batchSize: parseInt(options.batchSize) || 10,
1837
+ folderStructure: options.folderStructure,
1838
+ });
1839
+
1840
+ if (result.errorCount > 0) {
1841
+ process.exit(1);
1842
+ }
1843
+ return;
1844
+ }
1845
+
1846
+ // Initialize credentials with force supabase flag (for stats mode, always need Supabase)
1847
+ await checkCredentials(options.forceSupabase || options.statsOnly);
1848
+
735
1849
  if (!basePath || !sources || sources.length === 0) {
736
1850
  console.error(
737
1851
  '⚠️ UPLOAD_BASE_PATH or UPLOAD_SOURCES not defined in environment variables.',
@@ -739,18 +1853,31 @@ program
739
1853
  process.exit(1);
740
1854
  }
741
1855
 
742
- const concurrency = parseInt(options.concurrency) || 3;
743
- const batchSize = parseInt(options.batchSize) || 50;
744
-
745
- // Configure log batcher with custom batch size
746
- logBatcher.batchSize = batchSize;
1856
+ const batchSize = parseInt(options.batchSize) || 10;
1857
+ const concurrency = parseInt(options.concurrency) || 10;
747
1858
 
748
- console.log(`🚀 Using concurrency level: ${concurrency}`);
749
- console.log(`📦 Using log batch size: ${batchSize}`);
1859
+ if (options.statsOnly) {
1860
+ console.log('📊 Mode: Stats Only - Reading file stats and inserting to uploader table');
1861
+ console.log('🚫 Files will NOT be uploaded');
1862
+ if (options.detect !== false) {
1863
+ console.log('🔍 Document type detection ENABLED for supported files');
1864
+ } else {
1865
+ console.log('🔍 Document type detection DISABLED');
1866
+ }
1867
+ } else {
1868
+ console.log(
1869
+ `🚀 Mode: ${apiMode ? 'Arela API with auto-processing' : 'Direct Supabase'}`,
1870
+ );
1871
+ }
1872
+ console.log(`📦 Batch size: ${batchSize}`);
1873
+ console.log(`⚡ Concurrency: ${concurrency}`);
750
1874
 
751
- const processedPaths = await getProcessedPaths();
1875
+ const processedPaths = getProcessedPaths();
752
1876
  let globalSuccess = 0;
1877
+ let globalDetected = 0;
1878
+ let globalOrganized = 0;
753
1879
  let globalFailure = 0;
1880
+ let globalSkipped = 0;
754
1881
 
755
1882
  for (const folder of sources) {
756
1883
  const sourcePath = path.resolve(basePath, folder).replace(/\\/g, '/');
@@ -764,78 +1891,131 @@ program
764
1891
 
765
1892
  console.log(`📊 Found ${files.length} files to process`);
766
1893
 
767
- // Process files in parallel batches
768
- const { successCount, failureCount, skippedCount } =
769
- await processFilesInBatches(
770
- files,
771
- concurrency,
772
- options,
773
- basePath,
774
- folder,
775
- sourcePath,
776
- processedPaths,
777
- );
778
-
779
- globalSuccess += successCount;
780
- globalFailure += failureCount;
781
-
782
- console.log(`\n📦 Upload Summary for ${folder}:`);
783
- console.log(` ✅ Successfully uploaded files: ${successCount}`);
784
- console.log(` ❌ Files with errors: ${failureCount}`);
785
- console.log(` ⏭️ Files skipped (already exist): ${skippedCount}`);
786
- console.log(` 📜 Log file: ${logFilePath} \n`);
1894
+ const result = await processFilesInBatches(
1895
+ files,
1896
+ batchSize,
1897
+ options,
1898
+ basePath,
1899
+ folder,
1900
+ sourcePath,
1901
+ processedPaths,
1902
+ );
787
1903
 
788
- // Show cache statistics if requested
789
- if (options.showCacheStats) {
790
- console.log(`📊 Cache Statistics:`);
791
- console.log(
792
- ` 🗂️ Filename sanitization cache: ${sanitizationCache.size} entries`,
793
- );
794
- console.log(
795
- ` 📁 Path sanitization cache: ${pathSanitizationCache.size} entries`,
796
- );
797
- console.log(
798
- ` 📋 Log batch pending: ${logBatcher.batch.length} entries`,
799
- );
1904
+ globalSuccess += result.successCount;
1905
+ globalDetected += result.detectedCount || 0;
1906
+ globalOrganized += result.organizedCount || 0;
1907
+ globalFailure += result.failureCount;
1908
+ globalSkipped += result.skippedCount;
800
1909
 
801
- // Calculate cache hit rate (rough estimation)
802
- const totalProcessed = successCount + failureCount + skippedCount;
803
- const estimatedCacheHitRate =
804
- totalProcessed > 0
805
- ? Math.round(
806
- ((totalProcessed - sanitizationCache.size) / totalProcessed) *
807
- 100,
808
- )
809
- : 0;
810
- console.log(
811
- ` 🎯 Estimated cache hit rate: ${Math.max(0, estimatedCacheHitRate)}%\n`,
812
- );
1910
+ console.log(`\n📦 Summary for ${folder}:`);
1911
+ if (options.statsOnly) {
1912
+ console.log(` 📊 Stats recorded: ${result.successCount}`);
1913
+ } else {
1914
+ console.log(` ✅ Uploaded: ${result.successCount}`);
1915
+ if (apiMode) {
1916
+ console.log(` 🔍 Detected: ${result.detectedCount || 0}`);
1917
+ console.log(` 📁 Organized: ${result.organizedCount || 0}`);
1918
+ }
1919
+ }
1920
+ console.log(` Errors: ${result.failureCount}`);
1921
+ if (options.statsOnly) {
1922
+ console.log(` ⏭️ Duplicates: ${result.skippedCount}`);
1923
+ } else {
1924
+ console.log(` ⏭️ Skipped: ${result.skippedCount}`);
813
1925
  }
814
1926
 
815
1927
  writeLog(
816
- `📦 Upload Summary for folder ${folder}: Success: ${successCount}, Errors: ${failureCount}, Skipped: ${skippedCount}`,
1928
+ `📦 Summary for ${folder}: Success: ${result.successCount}, Detected: ${result.detectedCount || 0}, Organized: ${result.organizedCount || 0}, Errors: ${result.failureCount}, ${options.statsOnly ? 'Duplicates' : 'Skipped'}: ${result.skippedCount}`,
817
1929
  );
818
1930
  } catch (err) {
819
1931
  console.error(`⚠️ Error processing folder ${folder}:`, err.message);
820
1932
  writeLog(`⚠️ Error processing folder ${folder}: ${err.message}`);
821
- await sendLogToSupabase({
822
- file: folder,
823
- uploadPath: folder,
824
- status: 'error',
825
- message: err.message,
826
- });
827
1933
  globalFailure++;
828
1934
  }
829
1935
  }
830
1936
 
831
- // Force flush any remaining logs before finishing
832
- console.log(`📤 Flushing remaining logs...`);
833
- await logBatcher.forceFlush();
834
-
835
- console.log(`🎯 Upload completed.`);
836
- console.log(` ✅ Total uploaded: ${globalSuccess}`);
837
- console.log(` ❌ Total with errors: ${globalFailure}`);
1937
+ console.log(`\n${'='.repeat(60)}`);
1938
+ if (options.statsOnly) {
1939
+ console.log(`📊 STATS COLLECTION COMPLETED`);
1940
+ console.log(`${'='.repeat(60)}`);
1941
+ console.log(` 📊 Total stats recorded: ${globalSuccess}`);
1942
+ } else {
1943
+ console.log(`🎯 ${apiMode ? 'ARELA API' : 'SUPABASE'} UPLOAD COMPLETED`);
1944
+ console.log(`${'='.repeat(60)}`);
1945
+ console.log(` ✅ Total uploaded: ${globalSuccess}`);
1946
+ if (apiMode) {
1947
+ console.log(` 🔍 Total detected: ${globalDetected}`);
1948
+ console.log(` 📁 Total organized: ${globalOrganized}`);
1949
+ }
1950
+ }
1951
+ if (options.statsOnly) {
1952
+ console.log(` ⏭️ Total duplicates: ${globalSkipped}`);
1953
+ } else {
1954
+ console.log(` ⏭️ Total skipped: ${globalSkipped}`);
1955
+ }
1956
+ console.log(` ❌ Total errors: ${globalFailure}`);
838
1957
  console.log(` 📜 Log file: ${logFilePath}`);
1958
+ console.log(`${'='.repeat(60)}\n`);
1959
+
1960
+ // Continue with remaining phases if running all phases
1961
+ if (options.runAllPhases && options.statsOnly) {
1962
+ try {
1963
+ // Phase 2: PDF Detection
1964
+ console.log('\n🔍 === PHASE 2: PDF Detection ===');
1965
+ const detectionResult = await detectPedimentosInDatabase({
1966
+ batchSize: parseInt(options.batchSize) || 10,
1967
+ });
1968
+ console.log(`✅ Phase 2 Complete: ${detectionResult.detectedCount} detected, ${detectionResult.errorCount} errors`);
1969
+
1970
+ // Phase 3: Propagate arela_path
1971
+ console.log('\n📁 === PHASE 3: Propagate Arela Paths ===');
1972
+ const propagateResult = await propagateArelaPath({
1973
+ showProgress: options.showStats || true,
1974
+ });
1975
+ console.log(`✅ Phase 3 Complete: ${propagateResult.updatedCount || 0} paths propagated`);
1976
+
1977
+ // Phase 4: Upload by RFC
1978
+ if (uploadRfcs && uploadRfcs.length > 0) {
1979
+ console.log('\n🚀 === PHASE 4: Upload by RFC ===');
1980
+
1981
+ // Initialize API mode for uploads
1982
+ await checkCredentials(false);
1983
+
1984
+ const uploadResult = await uploadFilesByRfc({
1985
+ showProgress: options.showStats || true,
1986
+ batchSize: parseInt(options.batchSize) || 10,
1987
+ folderStructure: options.folderStructure,
1988
+ });
1989
+ console.log(`✅ Phase 4 Complete: Upload finished`);
1990
+ } else {
1991
+ console.log('\n⚠️ === PHASE 4: Upload by RFC ===');
1992
+ console.log('⚠️ UPLOAD_RFCS environment variable not configured, skipping Phase 4');
1993
+ }
1994
+
1995
+ console.log('\n🎉 All 4 phases completed successfully!');
1996
+
1997
+ } catch (error) {
1998
+ console.error(`❌ Error in multi-phase execution:`, error.message);
1999
+ process.exit(1);
2000
+ }
2001
+ }
2002
+
2003
+ if (options.showStats && (sanitizationCache.size > 0 || pathDetectionCache.size > 0)) {
2004
+ console.log(`📊 Performance Statistics:`);
2005
+ if (sanitizationCache.size > 0) {
2006
+ console.log(
2007
+ ` 🗂️ Sanitization cache entries: ${sanitizationCache.size}`,
2008
+ );
2009
+ }
2010
+ if (pathDetectionCache.size > 0) {
2011
+ console.log(
2012
+ ` 📁 Path detection cache entries: ${pathDetectionCache.size}`,
2013
+ );
2014
+ }
2015
+ }
2016
+
2017
+ // OPTIMIZED: Ensure log buffer is flushed before exit
2018
+ flushLogBuffer();
839
2019
  });
840
2020
 
841
2021
  program.parse();