@arela/uploader 0.0.12 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/index.js CHANGED
@@ -3,32 +3,55 @@ import { createClient } from '@supabase/supabase-js';
3
3
  import cliProgress from 'cli-progress';
4
4
  import { Command } from 'commander';
5
5
  import { config } from 'dotenv';
6
+ import FormData from 'form-data';
6
7
  import fs from 'fs';
7
8
  import { globby } from 'globby';
8
9
  import mime from 'mime-types';
9
- import { createRequire } from 'module';
10
+ import fetch from 'node-fetch';
10
11
  import path from 'path';
11
-
12
- const require = createRequire(import.meta.url);
13
- const { version } = require('../package.json');
12
+ import { FileDetectionService } from './file-detection.js';
14
13
 
15
14
  config();
16
15
 
17
16
  const program = new Command();
18
17
 
18
+ // Read package.json version at startup
19
+ let packageVersion = '1.0.0'; // fallback
20
+ try {
21
+ const __filename = new URL(import.meta.url).pathname;
22
+ const __dirname = path.dirname(__filename);
23
+ const packageJsonPath = path.resolve(__dirname, '../package.json');
24
+ const packageJson = JSON.parse(fs.readFileSync(packageJsonPath, 'utf-8'));
25
+ packageVersion = packageJson.version || '1.0.0';
26
+ } catch (error) {
27
+ console.warn('⚠️ Could not read package.json version, using fallback');
28
+ }
29
+
30
+ // Configuración de Supabase (original)
19
31
  const supabaseUrl = process.env.SUPABASE_URL;
20
32
  const supabaseKey = process.env.SUPABASE_KEY;
21
33
  const bucket = process.env.SUPABASE_BUCKET;
34
+
35
+ // Configuración de API (nueva)
36
+ const API_BASE_URL = process.env.ARELA_API_URL;
37
+ const API_TOKEN = process.env.ARELA_API_TOKEN;
38
+
39
+ // Configuración del uploader mejorado
22
40
  const basePath = process.env.UPLOAD_BASE_PATH;
23
41
  const sources = process.env.UPLOAD_SOURCES?.split('|')
24
42
  .map((s) => s.trim())
25
43
  .filter(Boolean);
26
44
 
27
- const supabase = createClient(supabaseUrl, supabaseKey);
45
+ // Configuración de RFCs para upload
46
+ const uploadRfcs = process.env.UPLOAD_RFCS?.split('|')
47
+ .map((s) => s.trim())
48
+ .filter(Boolean);
28
49
 
29
- // Pre-compiled regex patterns for better performance
50
+ let supabase;
51
+ let apiMode = false;
52
+
53
+ // Pre-compiled regex patterns for better performance (from original complex uploader)
30
54
  const SANITIZATION_PATTERNS = [
31
- // Character replacements (grouped for efficiency)
32
55
  [/[áàâäãåāăą]/gi, 'a'],
33
56
  [/[éèêëēĕėę]/gi, 'e'],
34
57
  [/[íìîïīĭį]/gi, 'i'],
@@ -37,730 +60,1425 @@ const SANITIZATION_PATTERNS = [
37
60
  [/[ñň]/gi, 'n'],
38
61
  [/[ç]/gi, 'c'],
39
62
  [/[ý]/gi, 'y'],
40
- // Korean characters (compiled once)
41
63
  [/[멕]/g, 'meok'],
42
64
  [/[시]/g, 'si'],
43
65
  [/[코]/g, 'ko'],
44
66
  [/[용]/g, 'yong'],
45
67
  [/[가-힣]/g, 'kr'],
46
- // Unicode diacritics (after normalize)
47
68
  [/[\u0300-\u036f]/g, ''],
48
- // Problematic symbols
49
69
  [/[\\?%*:|"<>[\]~`^]/g, '-'],
50
70
  [/[{}]/g, '-'],
51
71
  [/[&]/g, 'and'],
52
- [/[()]/g, ''], // Remove parentheses
53
- // Cleanup patterns
54
- [/\s+/g, '-'], // Replace spaces with dashes
55
- [/-+/g, '-'], // Replace multiple dashes with single dash
56
- [/^-+|-+$/g, ''], // Remove leading/trailing dashes
57
- [/^\.+/, ''], // Remove leading dots
58
- [/[^\w.-]/g, ''], // Remove any remaining non-alphanumeric chars
72
+ [/[()]/g, ''],
73
+ [/\s+/g, '-'],
74
+ [/-+/g, '-'],
75
+ [/^-+|-+$/g, ''],
76
+ [/^\.+/, ''],
77
+ [/[^\w.-]/g, ''],
59
78
  ];
60
79
 
61
- // Cache for sanitized filenames to avoid repeated processing
62
80
  const sanitizationCache = new Map();
63
81
 
64
- // Enhanced sanitization function with caching and pre-compiled regex
65
82
  const sanitizeFileName = (fileName) => {
66
- // Check cache first
67
83
  if (sanitizationCache.has(fileName)) {
68
84
  return sanitizationCache.get(fileName);
69
85
  }
70
86
 
71
- // Get file extension
72
87
  const ext = path.extname(fileName);
73
88
  const nameWithoutExt = path.basename(fileName, ext);
74
89
 
75
- // Fast path for already clean filenames
76
90
  if (/^[a-zA-Z0-9._-]+$/.test(nameWithoutExt)) {
77
91
  const result = fileName;
78
92
  sanitizationCache.set(fileName, result);
79
93
  return result;
80
94
  }
81
95
 
82
- // Normalize unicode first (more efficient to do once)
83
96
  let sanitized = nameWithoutExt.normalize('NFD');
84
97
 
85
- // Apply all sanitization patterns
86
98
  for (const [pattern, replacement] of SANITIZATION_PATTERNS) {
87
99
  sanitized = sanitized.replace(pattern, replacement);
88
100
  }
89
101
 
90
- // Ensure the filename is not empty
102
+ // Additional sanitization for problematic characters
103
+ sanitized = sanitized
104
+ .replace(/~/g, '-') // Replace tildes
105
+ .replace(/\s+/g, '-') // Replace spaces with dashes
106
+ .replace(/\.+/g, '-') // Replace multiple dots with dashes
107
+ .replace(/-+/g, '-') // Collapse multiple dashes
108
+ .replace(/^-+|-+$/g, ''); // Remove leading/trailing dashes
109
+
91
110
  if (!sanitized) {
92
111
  sanitized = 'unnamed_file';
93
112
  }
94
113
 
95
114
  const result = sanitized + ext;
96
-
97
- // Cache the result for future use
98
115
  sanitizationCache.set(fileName, result);
99
-
100
116
  return result;
101
117
  };
102
118
 
103
- // Pre-compiled regex patterns for path sanitization
104
- const PATH_SANITIZATION_PATTERNS = [
105
- [/[\\?%*:|"<>[\]~]/g, '-'],
106
- [/ +/g, ' '],
107
- [/^\.+/, ''],
108
- [/\/+/g, '/'],
109
- ];
119
+ const checkCredentials = async (forceSupabase = false) => {
120
+ // Force Supabase mode if explicitly requested
121
+ if (forceSupabase) {
122
+ console.log('🔧 Force Supabase mode enabled - skipping API');
123
+ apiMode = false;
124
+ } else if (API_BASE_URL && API_TOKEN) {
125
+ console.log(
126
+ '🌐 API mode enabled - files will be uploaded to Arela API with automatic processing',
127
+ );
128
+ apiMode = true;
110
129
 
111
- // Cache for sanitized paths
112
- const pathSanitizationCache = new Map();
113
-
114
- // Batch logging system for performance
115
- class LogBatcher {
116
- constructor(batchSize = 50, flushInterval = 5000) {
117
- this.batch = [];
118
- this.batchSize = batchSize;
119
- this.flushInterval = flushInterval;
120
- this.lastFlush = Date.now();
121
- this.flushTimer = null;
122
- }
123
-
124
- add(logEntry) {
125
- this.batch.push({
126
- filename: path.basename(logEntry.file),
127
- path: logEntry.uploadPath,
128
- status: logEntry.status,
129
- message: logEntry.message,
130
- });
130
+ try {
131
+ const response = await fetch(`${API_BASE_URL}/api/health`, {
132
+ headers: {
133
+ 'x-api-key': API_TOKEN,
134
+ },
135
+ });
131
136
 
132
- // Auto-flush if batch is full or enough time has passed
133
- if (
134
- this.batch.length >= this.batchSize ||
135
- Date.now() - this.lastFlush > this.flushInterval
136
- ) {
137
- this.flush();
137
+ if (!response.ok) {
138
+ console.warn(
139
+ '⚠️ API connection failed, falling back to direct Supabase upload',
140
+ );
141
+ apiMode = false;
142
+ } else {
143
+ console.log('✅ Connected to Arela API');
144
+ return;
145
+ }
146
+ } catch (err) {
147
+ console.warn(
148
+ '⚠️ API connection failed, falling back to direct Supabase upload',
149
+ );
150
+ apiMode = false;
138
151
  }
139
152
  }
140
153
 
141
- async flush() {
142
- if (this.batch.length === 0) return;
143
-
144
- const logsToSend = [...this.batch];
145
- this.batch = [];
146
- this.lastFlush = Date.now();
147
-
148
- // Clear any pending timer
149
- if (this.flushTimer) {
150
- clearTimeout(this.flushTimer);
151
- this.flushTimer = null;
154
+ // Initialize Supabase client if not in API mode or if forced
155
+ if (!apiMode || forceSupabase) {
156
+ if (!supabaseUrl || !supabaseKey || !bucket) {
157
+ console.error(
158
+ '⚠️ Missing credentials. Please set either:\n' +
159
+ ' - ARELA_API_URL and ARELA_API_TOKEN for API mode, or\n' +
160
+ ' - SUPABASE_URL, SUPABASE_KEY, and SUPABASE_BUCKET for direct mode',
161
+ );
162
+ process.exit(1);
152
163
  }
153
164
 
165
+ supabase = createClient(supabaseUrl, supabaseKey);
166
+
154
167
  try {
155
- const { error } = await supabase.from('upload_logs').insert(logsToSend);
168
+ const { error } = await supabase.storage.from(bucket).list('');
156
169
  if (error) {
157
- console.error(
158
- `⚠️ Error saving batch of ${logsToSend.length} logs to Supabase: ${error.message}`,
159
- );
160
- // Re-add failed logs to batch for retry (optional)
161
- this.batch.unshift(...logsToSend);
162
- } else {
163
- // Only show verbose output if requested
164
- if (process.env.LOG_BATCH_VERBOSE === 'true') {
165
- console.log(`📊 Flushed ${logsToSend.length} logs to Supabase`);
166
- }
170
+ console.error('⚠️ Error connecting to Supabase:', error.message);
171
+ process.exit(1);
167
172
  }
173
+ console.log('✅ Connected to Supabase (direct mode)');
168
174
  } catch (err) {
169
- console.error(`⚠️ Error during batch flush: ${err.message}`);
170
- // Re-add failed logs to batch for retry (optional)
171
- this.batch.unshift(...logsToSend);
175
+ console.error('⚠️ Error:', err.message);
176
+ process.exit(1);
172
177
  }
173
178
  }
179
+ };
174
180
 
175
- // Schedule auto-flush if not already scheduled
176
- scheduleFlush() {
177
- if (!this.flushTimer && this.batch.length > 0) {
178
- this.flushTimer = setTimeout(() => {
179
- this.flush();
180
- }, this.flushInterval);
181
- }
181
+ const logFilePath = path.resolve(process.cwd(), 'arela-upload.log');
182
+ const writeLog = (message) => {
183
+ try {
184
+ const timestamp = new Date().toISOString();
185
+ fs.appendFileSync(logFilePath, `[${timestamp}] ${message}\n`);
186
+ } catch (error) {
187
+ console.error(`❌ Error writing to log file: ${error.code} | ${error.message} | path: ${logFilePath}`);
182
188
  }
189
+ };
183
190
 
184
- // Force flush all pending logs (called at end of process)
185
- async forceFlush() {
186
- if (this.flushTimer) {
187
- clearTimeout(this.flushTimer);
188
- this.flushTimer = null;
191
+ /**
192
+ * Extracts year and pedimento number from file path
193
+ * Supports patterns like:
194
+ * - /path/to/2024/4023260/file.pdf
195
+ * - /path/to/pedimentos/2024/4023260/file.pdf
196
+ * - /path/to/docs/año2024/ped4023260/file.pdf
197
+ */
198
+ const extractYearAndPedimentoFromPath = (filePath, basePath) => {
199
+ try {
200
+ const relativePath = path.relative(basePath, filePath);
201
+ const pathParts = relativePath.split(path.sep);
202
+
203
+ let year = null;
204
+ let pedimento = null;
205
+
206
+ // Pattern 1: Direct year/pedimento structure (2024/4023260)
207
+ for (let i = 0; i < pathParts.length - 1; i++) {
208
+ const part = pathParts[i];
209
+ const nextPart = pathParts[i + 1];
210
+
211
+ // Check if current part looks like a year (2020-2030)
212
+ const yearMatch = part.match(/^(202[0-9])$/);
213
+ if (yearMatch && nextPart) {
214
+ year = yearMatch[1];
215
+
216
+ // Check if next part looks like a pedimento (4-8 digits)
217
+ const pedimentoMatch = nextPart.match(/^(\d{4,8})$/);
218
+ if (pedimentoMatch) {
219
+ pedimento = pedimentoMatch[1];
220
+ break;
221
+ }
222
+ }
189
223
  }
190
- await this.flush();
191
- }
192
- }
193
224
 
194
- // Global log batcher instance
195
- const logBatcher = new LogBatcher();
225
+ // Pattern 2: Named patterns (año2024, ped4023260)
226
+ if (!year || !pedimento) {
227
+ for (const part of pathParts) {
228
+ if (!year) {
229
+ const namedYearMatch = part.match(/(?:año|year|anio)(\d{4})/i);
230
+ if (namedYearMatch) {
231
+ year = namedYearMatch[1];
232
+ }
233
+ }
196
234
 
197
- // Function to manage cache size (prevent memory issues in long sessions)
198
- const manageCaches = () => {
199
- const MAX_CACHE_SIZE = 1000;
235
+ if (!pedimento) {
236
+ const namedPedimentoMatch = part.match(
237
+ /(?:ped|pedimento|pedi)(\d{4,8})/i,
238
+ );
239
+ if (namedPedimentoMatch) {
240
+ pedimento = namedPedimentoMatch[1];
241
+ }
242
+ }
243
+ }
244
+ }
200
245
 
201
- if (sanitizationCache.size > MAX_CACHE_SIZE) {
202
- // Keep only the most recent 500 entries
203
- const entries = Array.from(sanitizationCache.entries());
204
- sanitizationCache.clear();
205
- entries.slice(-500).forEach(([key, value]) => {
206
- sanitizationCache.set(key, value);
207
- });
208
- }
246
+ // Pattern 3: Loose year detection in any part
247
+ if (!year) {
248
+ for (const part of pathParts) {
249
+ const yearMatch = part.match(/(202[0-9])/);
250
+ if (yearMatch) {
251
+ year = yearMatch[1];
252
+ break;
253
+ }
254
+ }
255
+ }
209
256
 
210
- if (pathSanitizationCache.size > MAX_CACHE_SIZE) {
211
- const entries = Array.from(pathSanitizationCache.entries());
212
- pathSanitizationCache.clear();
213
- entries.slice(-500).forEach(([key, value]) => {
214
- pathSanitizationCache.set(key, value);
215
- });
257
+ // Pattern 4: Loose pedimento detection (4-8 consecutive digits)
258
+ if (!pedimento) {
259
+ for (const part of pathParts) {
260
+ const pedimentoMatch = part.match(/(\d{4,8})/);
261
+ if (pedimentoMatch && pedimentoMatch[1].length >= 4) {
262
+ pedimento = pedimentoMatch[1];
263
+ break;
264
+ }
265
+ }
266
+ }
267
+
268
+ return { year, pedimento, detected: !!(year && pedimento) };
269
+ } catch (error) {
270
+ return {
271
+ year: null,
272
+ pedimento: null,
273
+ detected: false,
274
+ error: error.message,
275
+ };
216
276
  }
217
277
  };
218
278
 
219
- const sanitizePath = (inputPath) => {
220
- // Check cache first
221
- if (pathSanitizationCache.has(inputPath)) {
222
- return pathSanitizationCache.get(inputPath);
279
+ const getProcessedPaths = () => {
280
+ const processed = new Set();
281
+ const lines = fs.existsSync(logFilePath)
282
+ ? fs.readFileSync(logFilePath, 'utf-8').split('\n')
283
+ : [];
284
+
285
+ for (const line of lines) {
286
+ const match = line.match(/(SUCCESS|SKIPPED): .*? -> (.+)/);
287
+ if (match) {
288
+ const [, , path] = match;
289
+ if (path) {
290
+ processed.add(path.trim());
291
+ }
292
+ }
223
293
  }
224
294
 
225
- // Fast path for already clean paths
226
- if (!/[\\?%*:|"<>[\]~]|^ +|^\.+|\/\/+/.test(inputPath)) {
227
- pathSanitizationCache.set(inputPath, inputPath);
228
- return inputPath;
229
- }
295
+ return processed;
296
+ };
230
297
 
231
- let sanitized = inputPath;
298
+ /**
299
+ * Upload files to Arela API with automatic detection and organization
300
+ */
301
+ const uploadToApi = async (files, options) => {
302
+ const formData = new FormData();
303
+
304
+ files.forEach((file) => {
305
+ const fileBuffer = fs.readFileSync(file.path);
306
+ formData.append('files', fileBuffer, {
307
+ filename: file.name,
308
+ contentType: file.contentType,
309
+ });
310
+ });
232
311
 
233
- // Apply path sanitization patterns
234
- for (const [pattern, replacement] of PATH_SANITIZATION_PATTERNS) {
235
- sanitized = sanitized.replace(pattern, replacement);
312
+ if (bucket) formData.append('bucket', bucket);
313
+ if (options.prefix) formData.append('prefix', options.prefix);
314
+
315
+ // Nueva funcionalidad: estructura de carpetas personalizada
316
+ let combinedStructure = null;
317
+
318
+ if (
319
+ options.folderStructure &&
320
+ options.autoDetectStructure &&
321
+ files.length > 0
322
+ ) {
323
+ // Combine custom folder structure with auto-detection
324
+ const firstFile = files[0];
325
+ const detection = extractYearAndPedimentoFromPath(
326
+ firstFile.path,
327
+ process.cwd(),
328
+ );
329
+ if (detection.detected) {
330
+ const autoStructure = `${detection.year}/${detection.pedimento}`;
331
+ combinedStructure = `${options.folderStructure}/${autoStructure}`;
332
+ formData.append('folderStructure', combinedStructure);
333
+ console.log(
334
+ `📁 Combined folder structure: ${options.folderStructure} + ${autoStructure} = ${combinedStructure}`,
335
+ );
336
+ } else {
337
+ // Fallback to just custom structure if auto-detection fails
338
+ formData.append('folderStructure', options.folderStructure);
339
+ console.log(
340
+ `📁 Using custom folder structure (auto-detection failed): ${options.folderStructure}`,
341
+ );
342
+ }
343
+ } else if (options.folderStructure) {
344
+ formData.append('folderStructure', options.folderStructure);
345
+ console.log(`📁 Using custom folder structure: ${options.folderStructure}`);
346
+ } else if (options.autoDetectStructure && files.length > 0) {
347
+ // Try to auto-detect from the first file if no explicit structure is provided
348
+ const firstFile = files[0];
349
+ const detection = extractYearAndPedimentoFromPath(
350
+ firstFile.path,
351
+ process.cwd(),
352
+ );
353
+ if (detection.detected) {
354
+ const autoStructure = `${detection.year}/${detection.pedimento}`;
355
+ formData.append('folderStructure', autoStructure);
356
+ }
236
357
  }
237
358
 
238
- // Cache the result
239
- pathSanitizationCache.set(inputPath, sanitized);
240
-
241
- return sanitized;
242
- };
243
-
244
- const sendLogToSupabase = async ({ file, uploadPath, status, message }) => {
245
- // Add to batch instead of sending immediately
246
- logBatcher.add({ file, uploadPath, status, message });
359
+ // Si se especifica clientPath para user_metadata
360
+ if (options.clientPath) {
361
+ formData.append('clientPath', options.clientPath);
362
+ }
247
363
 
248
- // Schedule auto-flush if needed
249
- logBatcher.scheduleFlush();
250
- };
364
+ formData.append('autoDetect', String(options.autoDetect ?? true));
365
+ formData.append('autoOrganize', String(options.autoOrganize ?? true));
366
+ formData.append('batchSize', String(options.batchSize || 10));
367
+ formData.append('clientVersion', packageVersion);
251
368
 
252
- // Enhanced version for immediate sending (used for critical errors)
253
- const sendLogToSupabaseImmediate = async ({
254
- file,
255
- uploadPath,
256
- status,
257
- message,
258
- }) => {
259
- const { error } = await supabase.from('upload_logs').insert([
369
+ const response = await fetch(
370
+ `${API_BASE_URL}/api/storage/batch-upload-and-process`,
260
371
  {
261
- filename: path.basename(file),
262
- path: uploadPath,
263
- status,
264
- message,
372
+ method: 'POST',
373
+ headers: {
374
+ 'x-api-key': API_TOKEN,
375
+ },
376
+ body: formData,
265
377
  },
266
- ]);
267
-
268
- if (error) {
269
- console.error(
270
- `⚠️ Error saving immediate log to Supabase: ${error.message}`,
271
- );
272
- }
273
- };
378
+ );
274
379
 
275
- const checkCredentials = async () => {
276
- if (!supabaseUrl || !supabaseKey || !bucket) {
277
- console.error(
278
- '⚠️ Missing Supabase credentials. Please set SUPABASE_URL, SUPABASE_KEY, and SUPABASE_BUCKET in your environment variables.',
380
+ if (!response.ok) {
381
+ const errorText = await response.text();
382
+ throw new Error(
383
+ `API request failed: ${response.status} ${response.statusText} - ${errorText}`,
279
384
  );
280
- writeLog('⚠️ Missing Supabase credentials.');
281
- await sendLogToSupabaseImmediate({
282
- file: 'Error',
283
- uploadPath: 'Error',
284
- status: 'error',
285
- message: 'Missing Supabase credentials.',
286
- });
287
- process.exit(1);
288
385
  }
289
386
 
290
- try {
291
- const { error } = await supabase.storage.from(bucket).list('');
292
- if (error) {
293
- console.error('⚠️ Error connecting to Supabase:', error.message);
294
- writeLog(`⚠️ Error connecting to Supabase: ${error.message}`);
295
- await sendLogToSupabaseImmediate({
296
- file: 'Error',
297
- uploadPath: 'Error',
298
- status: 'error',
299
- message: error.message,
300
- });
301
- process.exit(1);
302
- }
303
- } catch (err) {
304
- console.error('⚠️ Error:', err.message);
305
- writeLog(`⚠️ Error: ${err.message}`);
306
- await sendLogToSupabaseImmediate({
307
- file: 'Error',
308
- uploadPath: 'Error',
309
- status: 'error',
310
- message: err.message,
311
- });
312
- process.exit(1);
313
- }
387
+ return response.json();
314
388
  };
315
389
 
316
- await checkCredentials();
390
+ /**
391
+ * Upload file directly to Supabase (fallback method)
392
+ */
393
+ const uploadToSupabase = async (file, uploadPath) => {
394
+ const content = fs.readFileSync(file);
395
+ const contentType = mime.lookup(file) || 'application/octet-stream';
317
396
 
318
- const fileExistsInBucket = async (pathInBucket) => {
319
- const dir = path.dirname(pathInBucket);
320
- const filename = path.basename(pathInBucket);
321
397
  const { data, error } = await supabase.storage
322
398
  .from(bucket)
323
- .list(dir === '.' ? '' : dir, { limit: 1000 });
324
- if (error) {
325
- console.error(`⚠️ Could not verify duplicate: ${error.message}`);
326
- writeLog(`⚠️ Could not verify duplicate: ${error.message}`);
327
- await sendLogToSupabaseImmediate({
328
- file: 'Error',
329
- uploadPath: 'Error',
330
- status: 'error',
331
- message: error.message,
399
+ .upload(uploadPath.replace(/\\/g, '/'), content, {
400
+ upsert: true,
401
+ contentType,
332
402
  });
333
- return false;
334
- }
335
- return data.some((file) => file.name === filename);
336
- };
337
403
 
338
- const logFilePath = path.resolve(process.cwd(), 'upload.log');
339
- const writeLog = (message) => {
340
- try {
341
- const timestamp = new Date().toISOString();
342
- fs.appendFileSync(logFilePath, `[${timestamp}] ${message}\n`);
343
- } catch (error) {
344
- console.error(`❌ Error writing to log file: ${error.code} | ${error.message} | path: ${logFilePath}`);
404
+ if (error) {
405
+ throw new Error(error.message);
345
406
  }
407
+
408
+ return data;
346
409
  };
347
410
 
348
- // Modified to fetch from Supabase first, then fallback to local log
349
- const getProcessedPaths = async () => {
350
- const processed = new Set();
411
+ /**
412
+ * Insert file stats into uploader table with document detection
413
+ */
414
+ const insertStatsToUploaderTable = async (files, options) => {
415
+ if (!supabase) {
416
+ throw new Error('Supabase client not initialized. Stats mode requires Supabase connection.');
417
+ }
351
418
 
352
- // Try to fetch from Supabase first
353
- try {
354
- const { data, error } = await supabase
355
- .from('upload_logs')
356
- .select('path')
357
- .in('status', ['success', 'skipped']);
419
+ const detectionService = new FileDetectionService();
420
+ const records = [];
421
+
422
+ for (const file of files) {
423
+ const stats = fs.statSync(file.path);
424
+ const originalPath = options.clientPath || file.path;
425
+
426
+ // Check if record already exists
427
+ const { data: existingRecords, error: checkError } = await supabase
428
+ .from('uploader')
429
+ .select('id, original_path')
430
+ .eq('original_path', originalPath)
431
+ .limit(1);
432
+
433
+ if (checkError) {
434
+ console.error(`❌ Error checking for existing record: ${checkError.message}`);
435
+ continue;
436
+ }
358
437
 
359
- if (error) {
360
- console.warn(
361
- `⚠️ Could not fetch processed paths from Supabase: ${error.message}. Falling back to local log.`,
362
- );
363
- // Fallback to local log if Supabase fetch fails
364
- const lines = fs.existsSync(logFilePath)
365
- ? fs.readFileSync(logFilePath, 'utf-8').split('\\n')
366
- : [];
367
- for (const line of lines) {
368
- const match = line.match(/(SUCCESS|SKIPPED): .*? -> (.+)/);
369
- if (match) {
370
- const [, , path] = match;
371
- processed.add(path.trim());
372
- }
373
- }
374
- } else if (data) {
375
- data.forEach((log) => {
376
- if (log.path) {
377
- processed.add(log.path.trim());
378
- }
379
- });
380
- // Also read from local log to ensure any paths logged before this change or during a Supabase outage are included
381
- const lines = fs.existsSync(logFilePath)
382
- ? fs.readFileSync(logFilePath, 'utf-8').split('\\n')
383
- : [];
384
- for (const line of lines) {
385
- const match = line.match(/(SUCCESS|SKIPPED): .*? -> (.+)/);
386
- if (match) {
387
- const [, , pathValue] = match;
388
- if (pathValue) {
389
- processed.add(pathValue.trim());
438
+ if (existingRecords && existingRecords.length > 0) {
439
+ console.log(`⏭️ Skipping duplicate: ${path.basename(file.path)}`);
440
+ continue;
441
+ }
442
+
443
+ // Initialize record with basic file stats
444
+ const record = {
445
+ document_type: null,
446
+ size: stats.size,
447
+ num_pedimento: null,
448
+ filename: file.originalName || path.basename(file.path),
449
+ original_path: originalPath,
450
+ arela_path: null,
451
+ status: 'stats',
452
+ rfc: null,
453
+ message: null
454
+ };
455
+
456
+ // Try to detect document type for supported files
457
+ if (detectionService.isSupportedFileType(file.path)) {
458
+ try {
459
+ const detection = await detectionService.detectFile(file.path);
460
+
461
+ if (detection.detectedType) {
462
+ record.document_type = detection.detectedType;
463
+ record.num_pedimento = detection.detectedPedimento;
464
+ record.status = 'detected';
465
+
466
+ // Set arela_path for pedimento_simplificado documents
467
+ if (detection.arelaPath) {
468
+ record.arela_path = detection.arelaPath;
469
+ }
470
+
471
+ // Extract RFC from fields if available
472
+ const rfcField = detection.fields.find(f => f.name === 'rfc' && f.found);
473
+ if (rfcField) {
474
+ record.rfc = rfcField.value;
475
+ }
476
+ } else {
477
+ record.status = 'not-detected';
478
+ if (detection.error) {
479
+ record.message = detection.error;
390
480
  }
391
481
  }
482
+ } catch (error) {
483
+ console.error(`❌ Error detecting ${record.filename}:`, error.message);
484
+ record.status = 'detection-error';
485
+ record.message = error.message;
392
486
  }
487
+ } else {
488
+ record.status = 'unsupported';
489
+ record.message = 'File type not supported for detection';
393
490
  }
394
- } catch (e) {
395
- console.warn(
396
- `⚠️ Error fetching from Supabase or reading local log: ${e.message}. Proceeding with an empty set of processed paths initially.`,
397
- );
398
- // Ensure local log is still attempted if Supabase connection itself fails
399
- const lines = fs.existsSync(logFilePath)
400
- ? fs.readFileSync(logFilePath, 'utf-8').split('\\n')
401
- : [];
402
- for (const line of lines) {
403
- const match = line.match(/(SUCCESS|SKIPPED): .*? -> (.+)/);
404
- if (match) {
405
- const [, , path] = match;
406
- processed.add(path.trim());
407
- }
408
- }
491
+
492
+ records.push(record);
409
493
  }
410
- return processed;
411
- };
412
494
 
413
- const delay = (ms) => new Promise((resolve) => setTimeout(resolve, ms));
495
+ if (records.length === 0) {
496
+ console.log('📝 No new records to insert (all were duplicates or errors)');
497
+ return [];
498
+ }
414
499
 
415
- const uploadWithRetry = async (uploadFn, maxRetries = 5, delayMs = 2000) => {
416
- let attempt = 0;
417
- let lastError;
500
+ console.log(`💾 Inserting ${records.length} new records into uploader table...`);
501
+
502
+ const { data, error } = await supabase
503
+ .from('uploader')
504
+ .insert(records)
505
+ .select();
418
506
 
419
- while (attempt < maxRetries) {
420
- try {
421
- const result = await uploadFn();
422
- if (!result.error) return result;
423
- lastError = result.error;
424
- attempt++;
425
-
426
- // Exponential backoff with jitter
427
- if (attempt < maxRetries) {
428
- const backoffDelay =
429
- delayMs * Math.pow(2, attempt - 1) + Math.random() * 1000;
430
- console.log(
431
- `Retry ${attempt}/${maxRetries} after ${Math.round(backoffDelay)}ms...`,
432
- );
433
- await delay(backoffDelay);
434
- }
435
- } catch (error) {
436
- lastError = error;
437
- attempt++;
438
-
439
- if (attempt < maxRetries) {
440
- const backoffDelay =
441
- delayMs * Math.pow(2, attempt - 1) + Math.random() * 1000;
442
- console.log(
443
- `Retry ${attempt}/${maxRetries} after ${Math.round(backoffDelay)}ms due to exception...`,
444
- );
445
- await delay(backoffDelay);
446
- }
447
- }
507
+ if (error) {
508
+ throw new Error(`Failed to insert stats records: ${error.message}`);
448
509
  }
449
510
 
450
- return {
451
- error: new Error(
452
- `Max retries exceeded. Last error: ${lastError?.message || 'Unknown error'}`,
453
- ),
454
- };
511
+ return data;
455
512
  };
456
513
 
457
- // Function to process a single file
458
- const processFile = async (
459
- file,
514
+ const processFilesInBatches = async (
515
+ files,
516
+ batchSize,
460
517
  options,
461
518
  basePath,
462
519
  folder,
463
520
  sourcePath,
464
521
  processedPaths,
465
522
  ) => {
466
- let currentFile = file;
467
- let result = {
468
- success: false,
469
- skipped: false,
470
- error: null,
471
- message: '',
472
- };
523
+ let totalUploaded = 0;
524
+ let totalDetected = 0;
525
+ let totalOrganized = 0;
526
+ let totalErrors = 0;
527
+ let totalSkipped = 0;
473
528
 
474
- try {
475
- // Check if we need to rename the file
476
- if (options.renameFiles) {
477
- const originalName = path.basename(file);
478
- const sanitizedName = sanitizeFileName(originalName);
479
-
480
- if (originalName !== sanitizedName) {
481
- const newFilePath = path.join(path.dirname(file), sanitizedName);
482
-
483
- if (options.dryRun) {
484
- result.message = `Would rename: ${originalName} → ${sanitizedName}`;
485
- result.skipped = true;
486
- return result;
487
- } else {
488
- try {
489
- fs.renameSync(file, newFilePath);
490
- currentFile = newFilePath;
491
- writeLog(`RENAMED: ${originalName} → ${sanitizedName}`);
492
- await sendLogToSupabase({
493
- file: originalName,
494
- uploadPath: sanitizedName,
495
- status: 'renamed',
496
- message: `Renamed from ${originalName}`,
497
- });
498
- } catch (renameError) {
499
- result.error = `Failed to rename ${originalName}: ${renameError.message}`;
500
- writeLog(`RENAME_ERROR: ${originalName} | ${renameError.message}`);
501
- return result;
529
+ const messageBuffer = [];
530
+
531
+ const progressBarFormat = options.statsOnly
532
+ ? '📊 Processing [{bar}] {percentage}% | {value}/{total} files | Stats: {successCount} | Errors: {failureCount} | Duplicates: {skippedCount}'
533
+ : '📂 Processing [{bar}] {percentage}% | {value}/{total} files | Success: {successCount} | Errors: {failureCount} | Skipped: {skippedCount}';
534
+
535
+ const progressBar = new cliProgress.SingleBar({
536
+ format: progressBarFormat,
537
+ barCompleteChar: '█',
538
+ barIncompleteChar: '░',
539
+ hideCursor: true,
540
+ });
541
+
542
+ progressBar.start(files.length, 0, {
543
+ successCount: 0,
544
+ failureCount: 0,
545
+ skippedCount: 0,
546
+ });
547
+
548
+ if (options.statsOnly) {
549
+ // Stats-only mode - Read file stats and insert to uploader table
550
+ console.log('📊 Processing files in stats-only mode...');
551
+
552
+ let totalDetected = 0;
553
+ let totalNotDetected = 0;
554
+ let totalUnsupported = 0;
555
+ let totalDetectionErrors = 0;
556
+
557
+ for (let i = 0; i < files.length; i += batchSize) {
558
+ const batch = files.slice(i, i + batchSize);
559
+
560
+ const statsFiles = batch.map((file) => {
561
+ const originalFileName = path.basename(file);
562
+
563
+ return {
564
+ path: file,
565
+ originalName: originalFileName,
566
+ };
567
+ });
568
+
569
+ try {
570
+ const insertedRecords = await insertStatsToUploaderTable(statsFiles, options);
571
+ const actualInserted = insertedRecords.length;
572
+ const skippedDuplicates = statsFiles.length - actualInserted;
573
+
574
+ totalUploaded += actualInserted;
575
+ totalSkipped += skippedDuplicates;
576
+
577
+ // Count detection results from inserted records
578
+ insertedRecords.forEach(record => {
579
+ switch (record.status) {
580
+ case 'detected':
581
+ totalDetected++;
582
+ break;
583
+ case 'not-detected':
584
+ totalNotDetected++;
585
+ break;
586
+ case 'unsupported':
587
+ totalUnsupported++;
588
+ break;
589
+ case 'detection-error':
590
+ totalDetectionErrors++;
591
+ break;
592
+ }
593
+ });
594
+
595
+ statsFiles.forEach((file) => {
596
+ const wasInserted = insertedRecords.some(record =>
597
+ record.original_path === (options.clientPath || file.path)
598
+ );
599
+ if (wasInserted) {
600
+ writeLog(`STATS: ${file.path} -> uploader table`);
601
+ } else {
602
+ writeLog(`DUPLICATE: ${file.path} -> already exists in uploader table`);
502
603
  }
604
+ });
605
+
606
+ if (actualInserted > 0) {
607
+ console.log(`📈 Inserted ${actualInserted} stats records`);
608
+ }
609
+ if (skippedDuplicates > 0) {
610
+ console.log(`⏭️ Skipped ${skippedDuplicates} duplicates`);
503
611
  }
612
+ if (options.detect !== false) {
613
+ console.log(` 🔍 Detected: ${totalDetected}, Not detected: ${totalNotDetected}, Unsupported: ${totalUnsupported}, Errors: ${totalDetectionErrors}`);
614
+ }
615
+
616
+ } catch (error) {
617
+ totalErrors += statsFiles.length;
618
+ statsFiles.forEach((file) => {
619
+ writeLog(`ERROR: ${file.path}: ${error.message}`);
620
+ messageBuffer.push(`❌ ${file.originalName}: ${error.message}`);
621
+ });
504
622
  }
505
- }
506
623
 
507
- const content = fs.readFileSync(currentFile);
508
- const relativePathRaw = path
509
- .relative(basePath, currentFile)
510
- .replace(/^[\\/]+/, '')
511
- .replace(/\\/g, '/');
512
-
513
- // Always sanitize the filename for upload path
514
- const pathParts = relativePathRaw.split('/');
515
- const originalFileName = pathParts[pathParts.length - 1];
516
- const sanitizedFileName = sanitizeFileName(originalFileName);
517
- pathParts[pathParts.length - 1] = sanitizedFileName;
518
- const sanitizedRelativePath = pathParts.join('/');
519
-
520
- const uploadPathRaw = options.prefix
521
- ? path.posix.join(options.prefix, sanitizedRelativePath)
522
- : sanitizedRelativePath;
523
- const uploadPath = sanitizePath(uploadPathRaw);
524
-
525
- if (
526
- uploadPath !== uploadPathRaw ||
527
- originalFileName !== sanitizedFileName
528
- ) {
529
- writeLog(`SANITIZED: ${relativePathRaw} → ${uploadPath}`);
530
- await sendLogToSupabase({
531
- file: currentFile,
532
- uploadPath: relativePathRaw,
533
- status: 'sanitized',
534
- message: `Sanitized to ${uploadPath} (Arela Version: ${version})`,
624
+ progressBar.update(i + batch.length, {
625
+ successCount: totalUploaded,
626
+ failureCount: totalErrors,
627
+ skippedCount: totalSkipped,
535
628
  });
536
- }
537
629
 
538
- if (processedPaths.has(uploadPath)) {
539
- result.skipped = true;
540
- result.message = `Already processed (log): ${currentFile}`;
541
- return result;
630
+ if (i + batchSize < files.length) {
631
+ await new Promise((resolve) => setTimeout(resolve, 200));
632
+ }
542
633
  }
634
+
635
+ // Store detection stats for summary
636
+ totalDetected = totalDetected || 0;
637
+ totalNotDetected = totalNotDetected || 0;
638
+ totalUnsupported = totalUnsupported || 0;
639
+ totalDetectionErrors = totalDetectionErrors || 0;
640
+
641
+ } else if (apiMode && !options.forceSupabase) {
642
+ // API Mode - Process in batches
643
+ for (let i = 0; i < files.length; i += batchSize) {
644
+ const batch = files.slice(i, i + batchSize);
645
+ let sanitizedRelativePath;
646
+
647
+ const apiFiles = batch
648
+ .map((file) => {
649
+ const relativePathRaw = path
650
+ .relative(basePath, file)
651
+ .replace(/^[\\/]+/, '')
652
+ .replace(/\\/g, '/');
653
+
654
+ const pathParts = relativePathRaw.split('/');
655
+ const originalFileName = pathParts[pathParts.length - 1];
656
+ const sanitizedFileName = sanitizeFileName(originalFileName);
657
+ pathParts[pathParts.length - 1] = sanitizedFileName;
658
+ sanitizedRelativePath = pathParts.join('/');
659
+
660
+ let uploadPath;
661
+
662
+ // Handle combined folder structure + auto-detection
663
+ if (options.folderStructure && options.autoDetectStructure) {
664
+ const detection = extractYearAndPedimentoFromPath(file, basePath);
665
+ if (detection.detected) {
666
+ const autoStructure = `${detection.year}/${detection.pedimento}`;
667
+ const combinedStructure = `${options.folderStructure}/${autoStructure}`;
668
+ uploadPath = path.posix.join(
669
+ combinedStructure,
670
+ sanitizedFileName,
671
+ );
672
+ console.log(
673
+ `📁 Combined structure: ${options.folderStructure}/${autoStructure} for ${originalFileName} -> ${uploadPath}`,
674
+ );
675
+ } else {
676
+ // Fallback to just custom structure if auto-detection fails
677
+ uploadPath = path.posix.join(
678
+ options.folderStructure,
679
+ sanitizedFileName,
680
+ );
681
+ console.log(
682
+ `📁 Custom structure (auto-detection failed): ${uploadPath}`,
683
+ );
684
+ }
685
+ } else if (options.folderStructure) {
686
+ // Use custom folder structure only
687
+ uploadPath = path.posix.join(
688
+ options.folderStructure,
689
+ sanitizedFileName,
690
+ );
691
+ console.log(`📁 Custom structure: ${uploadPath}`);
692
+ } else if (options.autoDetectStructure) {
693
+ // Auto-detect structure from path if enabled
694
+ const detection = extractYearAndPedimentoFromPath(file, basePath);
695
+ if (detection.detected) {
696
+ const autoStructure = `${detection.year}/${detection.pedimento}`;
697
+ uploadPath = path.posix.join(autoStructure, sanitizedFileName);
698
+ console.log(
699
+ `🔍 Auto-detected: ${autoStructure} for ${originalFileName} -> ${uploadPath}`,
700
+ );
701
+ } else {
702
+ uploadPath = options.prefix
703
+ ? path.posix.join(options.prefix, sanitizedRelativePath)
704
+ : sanitizedRelativePath;
705
+ console.log(`📁 Using relative path: ${uploadPath}`);
706
+ }
707
+ } else {
708
+ uploadPath = options.prefix
709
+ ? path.posix.join(options.prefix, sanitizedRelativePath)
710
+ : sanitizedRelativePath;
711
+ console.log(`📁 Using standard path: ${uploadPath}`);
712
+ }
543
713
 
544
- const contentType = mime.lookup(currentFile) || 'application/octet-stream';
714
+ if (processedPaths.has(uploadPath)) {
715
+ totalSkipped++;
716
+ writeLog(`SKIPPED: ${file} -> ${uploadPath}`);
717
+ return null;
718
+ }
719
+
720
+ return {
721
+ path: file,
722
+ name: sanitizedFileName,
723
+ originalName: originalFileName,
724
+ uploadPath: uploadPath.replace(/\\/g, '/'), // Ensure forward slashes
725
+ contentType: mime.lookup(file) || 'application/octet-stream',
726
+ };
727
+ })
728
+ .filter(Boolean);
729
+
730
+ if (apiFiles.length > 0) {
731
+ // console.log(`🔄 Processing batch of ${apiFiles.length} files`);
732
+ // apiFiles.forEach(f => console.log(` 📄 ${f.name} -> ${f.uploadPath}`));
733
+
734
+ try {
735
+ // Use clientPath from options if specified, otherwise construct from detection or folder
736
+ let clientPath = options.clientPath;
737
+
738
+ if (!clientPath && apiFiles.length > 0) {
739
+ const firstFile = apiFiles[0];
740
+ const detection = extractYearAndPedimentoFromPath(
741
+ firstFile.path,
742
+ basePath,
743
+ );
744
+ if (detection.detected) {
745
+ // clientPath = `${detection.year}/${detection.pedimento}/`;
746
+ clientPath = path
747
+ .resolve(basePath, sanitizedRelativePath)
748
+ .replace(/\\/g, '/');
749
+ } else {
750
+ // Fallback to folder structure if no year/pedimento detected
751
+ clientPath = path.resolve(basePath, folder).replace(/\\/g, '/');
752
+ }
753
+ }
545
754
 
546
- const exists = await fileExistsInBucket(uploadPath);
755
+ const result = await uploadToApi(apiFiles, {
756
+ ...options,
757
+ clientPath: clientPath,
758
+ });
759
+
760
+ totalUploaded += result.stats.uploadedCount;
761
+ totalDetected += result.stats.detectedCount;
762
+ totalOrganized += result.stats.organizedCount;
763
+ totalErrors += result.stats.errorCount;
764
+
765
+ result.uploaded.forEach((upload) => {
766
+ const apiFile = apiFiles.find(
767
+ (f) => f.name === upload.originalName,
768
+ );
769
+ if (apiFile) {
770
+ writeLog(`SUCCESS: ${apiFile.path} -> ${apiFile.uploadPath}`);
771
+ processedPaths.add(apiFile.uploadPath);
772
+ }
773
+ });
774
+
775
+ result.errors.forEach((error) => {
776
+ writeLog(
777
+ `ERROR: ${error.fileName}: ${error.error} (${error.step})`,
778
+ );
779
+ messageBuffer.push(
780
+ `❌ ${error.fileName}: ${error.error} (${error.step})`,
781
+ );
782
+ });
783
+ } catch (error) {
784
+ totalErrors += apiFiles.length;
785
+ apiFiles.forEach((file) => {
786
+ writeLog(`ERROR: ${file.path}: ${error.message}`);
787
+ messageBuffer.push(`❌ ${file.name}: ${error.message}`);
788
+ });
789
+ }
790
+ }
547
791
 
548
- if (exists) {
549
- result.skipped = true;
550
- result.message = `Skipped (already exists): ${currentFile}`;
551
- writeLog(`SKIPPED: ${currentFile} -> ${uploadPath}`);
552
- await sendLogToSupabase({
553
- file: currentFile,
554
- uploadPath,
555
- status: 'skipped',
556
- message: 'Already exists in bucket',
792
+ progressBar.update(i + batch.length, {
793
+ successCount: totalUploaded,
794
+ failureCount: totalErrors,
795
+ skippedCount: totalSkipped,
557
796
  });
558
- return result;
559
- }
560
-
561
- const { error } = await uploadWithRetry(() =>
562
- supabase.storage.from(bucket).upload(uploadPath, content, {
563
- upsert: true,
564
- contentType,
565
- metadata: {
566
- originalName: path.basename(currentFile),
567
- sanitizedName: path.basename(uploadPath),
568
- clientPath: path.posix.join(
569
- basePath,
570
- folder,
571
- path.relative(sourcePath, currentFile).replace(/\\/g, '/'),
572
- ),
573
- arelaVersion: version,
574
- },
575
- }),
576
- );
577
797
 
578
- if (error) {
579
- result.error = error.message || JSON.stringify(error);
580
- writeLog(`ERROR: ${currentFile} -> ${uploadPath} | ${result.error}`);
581
- await sendLogToSupabase({
582
- file: currentFile,
583
- uploadPath,
584
- status: 'error',
585
- message: result.error,
798
+ if (i + batchSize < files.length) {
799
+ await new Promise((resolve) => setTimeout(resolve, 200));
800
+ }
801
+ }
802
+ } else {
803
+ // Direct Supabase mode
804
+ for (let i = 0; i < files.length; i++) {
805
+ const file = files[i];
806
+ try {
807
+ const relativePath = path.relative(basePath, file);
808
+ let uploadPath;
809
+
810
+ // Handle combined folder structure + auto-detection
811
+ if (options.folderStructure && options.autoDetectStructure) {
812
+ const detection = extractYearAndPedimentoFromPath(file, basePath);
813
+ if (detection.detected) {
814
+ const autoStructure = `${detection.year}/${detection.pedimento}`;
815
+ const combinedStructure = `${options.folderStructure}/${autoStructure}`;
816
+ const fileName = path.basename(file);
817
+ uploadPath = path.join(combinedStructure, fileName);
818
+ console.log(
819
+ `📁 Combined structure: ${options.folderStructure}/${autoStructure} for ${fileName}`,
820
+ );
821
+ } else {
822
+ // Fallback to just custom structure if auto-detection fails
823
+ const fileName = path.basename(file);
824
+ uploadPath = path.join(options.folderStructure, fileName);
825
+ console.log(
826
+ `📁 Custom structure (auto-detection failed): ${uploadPath}`,
827
+ );
828
+ }
829
+ } else if (options.folderStructure) {
830
+ // Use custom folder structure only
831
+ const fileName = path.basename(file);
832
+ uploadPath = path.join(options.folderStructure, fileName);
833
+ console.log(`📁 Custom structure: ${uploadPath}`);
834
+ } else if (options.autoDetectStructure) {
835
+ // Auto-detect structure from path if enabled
836
+ const detection = extractYearAndPedimentoFromPath(file, basePath);
837
+ if (detection.detected) {
838
+ const autoStructure = `${detection.year}/${detection.pedimento}`;
839
+ const fileName = path.basename(file);
840
+ uploadPath = path.join(autoStructure, fileName);
841
+ } else {
842
+ uploadPath = options.prefix
843
+ ? path.join(options.prefix, relativePath)
844
+ : relativePath;
845
+ }
846
+ } else {
847
+ uploadPath = options.prefix
848
+ ? path.join(options.prefix, relativePath)
849
+ : relativePath;
850
+ }
851
+
852
+ if (processedPaths.has(uploadPath)) {
853
+ totalSkipped++;
854
+ writeLog(`SKIPPED: ${file} -> ${uploadPath}`);
855
+ } else {
856
+ await uploadToSupabase(file, uploadPath);
857
+ totalUploaded++;
858
+ writeLog(`SUCCESS: ${file} -> ${uploadPath}`);
859
+ processedPaths.add(uploadPath);
860
+ }
861
+ } catch (error) {
862
+ totalErrors++;
863
+ writeLog(`ERROR: ${file}: ${error.message}`);
864
+ messageBuffer.push(`❌ ${path.basename(file)}: ${error.message}`);
865
+ }
866
+
867
+ progressBar.update(i + 1, {
868
+ successCount: totalUploaded,
869
+ failureCount: totalErrors,
870
+ skippedCount: totalSkipped,
586
871
  });
872
+ }
873
+ }
874
+
875
+ progressBar.stop();
876
+
877
+ const errorMessages = messageBuffer.filter((msg) => msg.startsWith('❌'));
878
+ if (errorMessages.length > 0) {
879
+ console.log('\n🚨 Errors encountered during processing:');
880
+ errorMessages.forEach((msg) => console.error(msg));
881
+ }
882
+
883
+ return {
884
+ successCount: totalUploaded,
885
+ detectedCount: totalDetected,
886
+ organizedCount: totalOrganized,
887
+ failureCount: totalErrors,
888
+ skippedCount: totalSkipped,
889
+ };
890
+ };
891
+
892
+ /**
893
+ * Upload files to Arela API based on specific RFC values
894
+ */
895
+ const uploadFilesByRfc = async (options = {}) => {
896
+ if (!supabase) {
897
+ console.error('❌ Supabase client not initialized');
898
+ process.exit(1);
899
+ }
900
+
901
+ if (!API_BASE_URL || !API_TOKEN) {
902
+ console.error('❌ Arela API configuration missing. Please set ARELA_API_URL and ARELA_API_TOKEN environment variables.');
903
+ process.exit(1);
904
+ }
905
+
906
+ if (!uploadRfcs || uploadRfcs.length === 0) {
907
+ console.error('❌ No RFCs specified. Please set UPLOAD_RFCS environment variable with pipe-separated RFC values.');
908
+ console.error(' Example: UPLOAD_RFCS="RFC123456789|RFC987654321|RFC555444333"');
909
+ process.exit(1);
910
+ }
911
+
912
+ console.log('🎯 RFC-based Upload Mode');
913
+ console.log(`📋 Target RFCs: ${uploadRfcs.join(', ')}`);
914
+ console.log('🔍 Searching for files to upload...');
915
+
916
+ // Step 1: Get all records that match the specified RFCs and have arela_path
917
+ const { data: rfcRecords, error: rfcError } = await supabase
918
+ .from('uploader')
919
+ .select('arela_path')
920
+ .in('rfc', uploadRfcs)
921
+ .not('arela_path', 'is', null);
922
+
923
+ if (rfcError) {
924
+ console.error('❌ Error fetching RFC records:', rfcError.message);
925
+ return { processedCount: 0, uploadedCount: 0, errorCount: 1 };
926
+ }
927
+
928
+ if (!rfcRecords || rfcRecords.length === 0) {
929
+ console.log('ℹ️ No files found for the specified RFCs with arela_path');
930
+ console.log(` Make sure files for RFCs [${uploadRfcs.join(', ')}] have been processed and have arela_path values`);
931
+ return { processedCount: 0, uploadedCount: 0, errorCount: 0 };
932
+ }
933
+
934
+ // Step 2: Get unique arela_paths from the RFC matches
935
+ const uniqueArelaPaths = [...new Set(rfcRecords.map(r => r.arela_path))];
936
+ console.log(`� Found ${uniqueArelaPaths.length} unique arela_path(s) for the specified RFCs`);
937
+
938
+ // Step 3: Get ALL files that have these arela_paths (including supporting documents)
939
+ // Use pagination to ensure we get all files, regardless of count
940
+ let allRelatedFiles = [];
941
+ let hasMore = true;
942
+ let offset = 0;
943
+ const queryBatchSize = 1000;
944
+
945
+ console.log('📥 Fetching all related files (with pagination)...');
946
+
947
+ while (hasMore) {
948
+ const { data: batch, error: queryError } = await supabase
949
+ .from('uploader')
950
+ .select('id, original_path, arela_path, filename, rfc, document_type')
951
+ .in('arela_path', uniqueArelaPaths)
952
+ .not('original_path', 'is', null)
953
+ .range(offset, offset + queryBatchSize - 1);
954
+
955
+ if (queryError) {
956
+ console.error('❌ Error fetching related files:', queryError.message);
957
+ return { processedCount: 0, uploadedCount: 0, errorCount: 1 };
958
+ }
959
+
960
+ if (!batch || batch.length === 0) {
961
+ hasMore = false;
587
962
  } else {
588
- result.success = true;
589
- result.message = `Uploaded ${currentFile} -> ${uploadPath}`;
590
- writeLog(`SUCCESS: ${currentFile} -> ${uploadPath}`);
591
- await sendLogToSupabase({
592
- file: currentFile,
593
- uploadPath,
594
- status: 'success',
595
- message: 'Uploaded successfully',
596
- });
963
+ allRelatedFiles = allRelatedFiles.concat(batch);
964
+ offset += queryBatchSize;
965
+
966
+ // If we got less than queryBatchSize, we've reached the end
967
+ if (batch.length < queryBatchSize) {
968
+ hasMore = false;
969
+ }
970
+ }
971
+ }
972
+
973
+ if (!allRelatedFiles || allRelatedFiles.length === 0) {
974
+ console.log('ℹ️ No related files found for the arela_paths');
975
+ return { processedCount: 0, uploadedCount: 0, errorCount: 0 };
976
+ }
977
+
978
+ console.log(`📁 Found ${allRelatedFiles.length} total files to upload (including supporting documents)`);
979
+
980
+ // Group by RFC and arela_path for better organization
981
+ const filesByRfc = allRelatedFiles.reduce((acc, record) => {
982
+ const rfc = record.rfc || 'No RFC';
983
+ if (!acc[rfc]) {
984
+ acc[rfc] = [];
985
+ }
986
+ acc[rfc].push(record);
987
+ return acc;
988
+ }, {});
989
+
990
+ console.log('📊 Files by RFC (including supporting documents):');
991
+ for (const [rfc, files] of Object.entries(filesByRfc)) {
992
+ const documentTypes = [...new Set(files.map(f => f.document_type || 'Unknown'))];
993
+ console.log(` ${rfc}: ${files.length} files (${documentTypes.join(', ')})`);
994
+ }
995
+
996
+ // Group by arela_path for upload organization
997
+ const filesByPath = allRelatedFiles.reduce((acc, record) => {
998
+ const path = record.arela_path;
999
+ if (!acc[path]) {
1000
+ acc[path] = [];
597
1001
  }
598
- } catch (err) {
599
- result.error = err.message || JSON.stringify(err);
600
- writeLog(`ERROR: ${currentFile} | ${result.error}`);
601
- await sendLogToSupabase({
602
- file: currentFile,
603
- uploadPath: currentFile,
604
- status: 'error',
605
- message: result.error,
1002
+ acc[path].push(record);
1003
+ return acc;
1004
+ }, {});
1005
+
1006
+ console.log('� Files grouped by arela_path:');
1007
+ for (const [path, files] of Object.entries(filesByPath)) {
1008
+ console.log(` ${path}: ${files.length} files`);
1009
+ }
1010
+
1011
+ let totalProcessed = 0;
1012
+ let totalUploaded = 0;
1013
+ let totalErrors = 0;
1014
+ let totalSkipped = 0;
1015
+
1016
+ // Create progress bar
1017
+ const progressBar = new cliProgress.SingleBar({
1018
+ format: '🚀 Uploading files |{bar}| {percentage}% | {value}/{total} | Uploaded: {uploaded} | Errors: {errors} | Skipped: {skipped}',
1019
+ barCompleteChar: '█',
1020
+ barIncompleteChar: '░',
1021
+ hideCursor: true,
1022
+ });
1023
+
1024
+ if (options.showProgress !== false) {
1025
+ progressBar.start(allRelatedFiles.length, 0, {
1026
+ uploaded: 0,
1027
+ errors: 0,
1028
+ skipped: 0,
606
1029
  });
607
1030
  }
608
1031
 
609
- return result;
1032
+ const batchSize = parseInt(options.batchSize) || 10;
1033
+ console.log(`📦 Processing in batches of ${batchSize} files`);
1034
+
1035
+ // Process files in batches
1036
+ for (let i = 0; i < allRelatedFiles.length; i += batchSize) {
1037
+ const batch = allRelatedFiles.slice(i, i + batchSize);
1038
+ const batchNumber = Math.floor(i / batchSize) + 1;
1039
+ const totalBatches = Math.ceil(allRelatedFiles.length / batchSize);
1040
+
1041
+ console.log(`\n📦 Processing batch ${batchNumber}/${totalBatches} (${batch.length} files)`);
1042
+
1043
+ // Prepare files for upload
1044
+ const filesToUpload = [];
1045
+
1046
+ for (const record of batch) {
1047
+ totalProcessed++;
1048
+
1049
+ try {
1050
+ const originalPath = record.original_path;
1051
+
1052
+ // Check if file exists
1053
+ if (!fs.existsSync(originalPath)) {
1054
+ console.log(` ⚠️ File not found: ${originalPath}`);
1055
+ totalSkipped++;
1056
+ continue;
1057
+ }
1058
+
1059
+ const fileStats = fs.statSync(originalPath);
1060
+ const fileBuffer = fs.readFileSync(originalPath);
1061
+
1062
+ filesToUpload.push({
1063
+ path: originalPath,
1064
+ buffer: fileBuffer,
1065
+ size: fileStats.size,
1066
+ name: record.filename,
1067
+ arelaPath: record.arela_path,
1068
+ rfc: record.rfc,
1069
+ documentType: record.document_type,
1070
+ });
1071
+
1072
+ } catch (error) {
1073
+ console.error(` ❌ Error reading file ${record.original_path}:`, error.message);
1074
+ totalErrors++;
1075
+ }
1076
+
1077
+ if (options.showProgress !== false) {
1078
+ progressBar.update(totalProcessed, {
1079
+ uploaded: totalUploaded,
1080
+ errors: totalErrors,
1081
+ skipped: totalSkipped,
1082
+ });
1083
+ }
1084
+ }
1085
+
1086
+ // Upload the batch if we have files
1087
+ if (filesToUpload.length > 0) {
1088
+ try {
1089
+ console.log(` 🚀 Uploading ${filesToUpload.length} files to Arela API...`);
1090
+
1091
+ const formData = new FormData();
1092
+
1093
+ // Add files to form data
1094
+ filesToUpload.forEach((file, index) => {
1095
+ formData.append(`files`, file.buffer, {
1096
+ filename: file.name,
1097
+ contentType: mime.lookup(file.name) || 'application/octet-stream',
1098
+ });
1099
+ });
1100
+
1101
+ // Instead of using per-file folder structures, we'll group by arela_path and upload separately
1102
+ // Group files by their arela_path to upload them in correct structure
1103
+ const filesByPath = filesToUpload.reduce((acc, file) => {
1104
+ const path = file.arelaPath.replace(/\/$/, '');
1105
+ if (!acc[path]) {
1106
+ acc[path] = [];
1107
+ }
1108
+ acc[path].push(file);
1109
+ return acc;
1110
+ }, {});
1111
+
1112
+ // Upload each group separately with its folder structure
1113
+ for (const [arelaPath, pathFiles] of Object.entries(filesByPath)) {
1114
+ const pathFormData = new FormData();
1115
+
1116
+ pathFiles.forEach((file) => {
1117
+ pathFormData.append('files', file.buffer, {
1118
+ filename: file.name,
1119
+ contentType: mime.lookup(file.name) || 'application/octet-stream',
1120
+ });
1121
+ });
1122
+
1123
+ // Set folder structure for this group - concatenate custom prefix with arela_path
1124
+ const folderStructure = options.folderStructure
1125
+ ? `${options.folderStructure}/${arelaPath}`.replace(/\/+/g, '/').replace(/\/$/, '')
1126
+ : arelaPath;
1127
+ pathFormData.append('folderStructure', folderStructure);
1128
+ pathFormData.append('autoDetect', 'true');
1129
+ pathFormData.append('autoOrganize', 'false');
1130
+ pathFormData.append('batchSize', String(pathFiles.length));
1131
+ pathFormData.append('clientVersion', packageVersion);
1132
+ if (bucket) {
1133
+ pathFormData.append('bucket', bucket);
1134
+ }
1135
+
1136
+ console.log(` 📁 Uploading ${pathFiles.length} files to: ${folderStructure}`);
1137
+
1138
+ const response = await fetch(`${API_BASE_URL}/api/storage/batch-upload-and-process`, {
1139
+ method: 'POST',
1140
+ headers: {
1141
+ 'x-api-key': API_TOKEN,
1142
+ },
1143
+ body: pathFormData,
1144
+ });
1145
+
1146
+ if (!response.ok) {
1147
+ const errorText = await response.text();
1148
+ throw new Error(`HTTP ${response.status}: ${errorText}`);
1149
+ }
1150
+
1151
+ const result = await response.json();
1152
+
1153
+ // Check if upload was successful based on stats rather than success field
1154
+ const isSuccessful = result.stats && result.stats.uploadedCount > 0 && result.stats.errorCount === 0;
1155
+
1156
+ if (isSuccessful) {
1157
+ console.log(` ✅ Group uploaded: ${result.stats.uploadedCount} files to ${folderStructure}`);
1158
+ totalUploaded += result.stats.uploadedCount;
1159
+
1160
+ if (result.stats.detectedCount > 0) {
1161
+ console.log(` 🔍 Files detected: ${result.stats.detectedCount}`);
1162
+ }
1163
+ if (result.stats.organizedCount > 0) {
1164
+ console.log(` 📁 Files organized: ${result.stats.organizedCount}`);
1165
+ }
1166
+ } else {
1167
+ console.error(` ❌ Upload failed for ${folderStructure}:`);
1168
+ if (result.errors && result.errors.length > 0) {
1169
+ result.errors.forEach(error => {
1170
+ console.error(` - ${error.fileName}: ${error.error}`);
1171
+ });
1172
+ }
1173
+ totalErrors += pathFiles.length;
1174
+ }
1175
+
1176
+ // Small delay between path groups
1177
+ await new Promise(resolve => setTimeout(resolve, 100));
1178
+ }
1179
+
1180
+ } catch (error) {
1181
+ console.error(` ❌ Error uploading batch ${batchNumber}:`, error.message);
1182
+ totalErrors += filesToUpload.length;
1183
+ }
1184
+ }
1185
+
1186
+ // Small delay between batches
1187
+ if (i + batchSize < allRelatedFiles.length) {
1188
+ await new Promise(resolve => setTimeout(resolve, 200));
1189
+ }
1190
+ }
1191
+
1192
+ if (options.showProgress !== false) {
1193
+ progressBar.stop();
1194
+ }
1195
+
1196
+ console.log(`\n${'='.repeat(60)}`);
1197
+ console.log(`🎯 RFC-BASED UPLOAD COMPLETED`);
1198
+ console.log(`${'='.repeat(60)}`);
1199
+ console.log(` 📋 Files processed: ${totalProcessed}`);
1200
+ console.log(` ✅ Files uploaded: ${totalUploaded}`);
1201
+ console.log(` ⏭️ Files skipped: ${totalSkipped}`);
1202
+ console.log(` ❌ Errors: ${totalErrors}`);
1203
+ console.log(`${'='.repeat(60)}\n`);
1204
+
1205
+ return {
1206
+ processedCount: totalProcessed,
1207
+ uploadedCount: totalUploaded,
1208
+ skippedCount: totalSkipped,
1209
+ errorCount: totalErrors,
1210
+ };
610
1211
  };
611
1212
 
612
- // Function to process files in parallel batches
613
- const processFilesInBatches = async (
614
- files,
615
- batchSize,
616
- options,
617
- basePath,
618
- folder,
619
- sourcePath,
620
- processedPaths,
621
- ) => {
622
- let successCount = 0;
623
- let failureCount = 0;
624
- let skippedCount = 0;
1213
+ /**
1214
+ * Propagate arela_path from pedimento_simplificado records to related files with same base path
1215
+ */
1216
+ const propagateArelaPath = async (options = {}) => {
1217
+ if (!supabase) {
1218
+ console.error('❌ Supabase client not initialized');
1219
+ process.exit(1);
1220
+ }
625
1221
 
626
- // Buffer for messages to show after progress bar completes
627
- const messageBuffer = [];
1222
+ console.log('🔍 Finding pedimento_simplificado records with arela_path...');
1223
+
1224
+ // Get all pedimento_simplificado records that have arela_path
1225
+ const { data: pedimentoRecords, error: pedimentoError } = await supabase
1226
+ .from('uploader')
1227
+ .select('id, original_path, arela_path, filename')
1228
+ .eq('document_type', 'pedimento_simplificado')
1229
+ .not('arela_path', 'is', null);
628
1230
 
1231
+ if (pedimentoError) {
1232
+ console.error('❌ Error fetching pedimento records:', pedimentoError.message);
1233
+ return { processedCount: 0, updatedCount: 0, errorCount: 1 };
1234
+ }
1235
+
1236
+ if (!pedimentoRecords || pedimentoRecords.length === 0) {
1237
+ console.log('ℹ️ No pedimento_simplificado records with arela_path found');
1238
+ return { processedCount: 0, updatedCount: 0, errorCount: 0 };
1239
+ }
1240
+
1241
+ console.log(`📋 Found ${pedimentoRecords.length} pedimento records with arela_path`);
1242
+
1243
+ let totalProcessed = 0;
1244
+ let totalUpdated = 0;
1245
+ let totalErrors = 0;
1246
+
1247
+ // Create progress bar
629
1248
  const progressBar = new cliProgress.SingleBar({
630
- format:
631
- '📂 Processing [{bar}] {percentage}% | {value}/{total} files | Success: {successCount} | Errors: {failureCount} | Skipped: {skippedCount}',
1249
+ format: '🔄 Propagating paths |{bar}| {percentage}% | {value}/{total} | Updated: {updated} | Errors: {errors}',
632
1250
  barCompleteChar: '█',
633
1251
  barIncompleteChar: '░',
634
1252
  hideCursor: true,
635
1253
  });
636
1254
 
637
- progressBar.start(files.length, 0, {
638
- successCount: 0,
639
- failureCount: 0,
640
- skippedCount: 0,
641
- });
1255
+ if (options.showProgress !== false) {
1256
+ progressBar.start(pedimentoRecords.length, 0, {
1257
+ updated: 0,
1258
+ errors: 0,
1259
+ });
1260
+ }
642
1261
 
643
- for (let i = 0; i < files.length; i += batchSize) {
644
- const batch = files.slice(i, i + batchSize);
1262
+ // Process each pedimento record
1263
+ for (const pedimento of pedimentoRecords) {
1264
+ try {
1265
+ totalProcessed++;
1266
+
1267
+ // Extract base path from original_path (remove filename)
1268
+ const basePath = path.dirname(pedimento.original_path);
1269
+
1270
+ console.log(`\n🔍 Processing: ${pedimento.filename}`);
1271
+ console.log(` 📁 Base path: ${basePath}`);
1272
+
1273
+ // Extract folder part from existing arela_path by removing the filename
1274
+ const existingPath = pedimento.arela_path;
1275
+ const folderArelaPath = existingPath.includes('/') ?
1276
+ existingPath.substring(0, existingPath.lastIndexOf('/')) + '/' :
1277
+ existingPath.endsWith('/') ? existingPath : existingPath + '/';
1278
+
1279
+ console.log(` 🎯 Original arela path: ${existingPath}`);
1280
+ console.log(` 📁 Folder arela path: ${folderArelaPath}`);
1281
+
1282
+ // Find all files with the same base path that don't have arela_path yet
1283
+ const { data: relatedFiles, error: relatedError } = await supabase
1284
+ .from('uploader')
1285
+ .select('id, filename, original_path')
1286
+ .like('original_path', `${basePath}%`)
1287
+ .is('arela_path', null)
1288
+ .neq('id', pedimento.id); // Exclude the pedimento itself
1289
+
1290
+ if (relatedError) {
1291
+ console.error(`❌ Error finding related files for ${pedimento.filename}:`, relatedError.message);
1292
+ totalErrors++;
1293
+ continue;
1294
+ }
645
1295
 
646
- // Process batch in parallel
647
- const batchResults = await Promise.all(
648
- batch.map((file) =>
649
- processFile(
650
- file,
651
- options,
652
- basePath,
653
- folder,
654
- sourcePath,
655
- processedPaths,
656
- ),
657
- ),
658
- );
1296
+ if (!relatedFiles || relatedFiles.length === 0) {
1297
+ console.log(` ℹ️ No related files found needing arela_path update`);
1298
+ continue;
1299
+ }
1300
+
1301
+ console.log(` 📄 Found ${relatedFiles.length} related files to update:`);
1302
+
1303
+ // Show first 10 files, then indicate if there are more
1304
+ const filesToShow = relatedFiles.slice(0, 10);
1305
+ filesToShow.forEach(file => {
1306
+ console.log(` - ${file.filename}`);
1307
+ });
1308
+
1309
+ if (relatedFiles.length > 10) {
1310
+ console.log(` ... and ${relatedFiles.length - 10} more files`);
1311
+ }
659
1312
 
660
- // Update counters and buffer messages (don't print them yet)
661
- for (const result of batchResults) {
662
- if (result.success) {
663
- successCount++;
664
- // Only buffer verbose success messages if needed
665
- if (
666
- process.env.UPLOAD_VERBOSE === 'true' &&
667
- result.message &&
668
- !result.error
669
- ) {
670
- messageBuffer.push(`✅ ${result.message}`);
1313
+ // Process files in batches to avoid URI length limitations
1314
+ const BATCH_SIZE = 50; // Process 50 files at a time
1315
+ const fileIds = relatedFiles.map(f => f.id);
1316
+ let batchErrors = 0;
1317
+ let batchUpdated = 0;
1318
+
1319
+ console.log(` 🔄 Processing ${relatedFiles.length} files in batches of ${BATCH_SIZE}...`);
1320
+
1321
+ for (let i = 0; i < fileIds.length; i += BATCH_SIZE) {
1322
+ const batchIds = fileIds.slice(i, i + BATCH_SIZE);
1323
+ const batchNumber = Math.floor(i / BATCH_SIZE) + 1;
1324
+ const totalBatches = Math.ceil(fileIds.length / BATCH_SIZE);
1325
+
1326
+ console.log(` 📦 Batch ${batchNumber}/${totalBatches}: Updating ${batchIds.length} files...`);
1327
+
1328
+ try {
1329
+ const { error: updateError } = await supabase
1330
+ .from('uploader')
1331
+ .update({ arela_path: folderArelaPath })
1332
+ .in('id', batchIds);
1333
+
1334
+ if (updateError) {
1335
+ console.error(` ❌ Error in batch ${batchNumber}:`, updateError.message);
1336
+ batchErrors++;
1337
+ } else {
1338
+ console.log(` ✅ Batch ${batchNumber} completed: ${batchIds.length} files updated`);
1339
+ batchUpdated += batchIds.length;
1340
+ }
1341
+ } catch (error) {
1342
+ console.error(` ❌ Exception in batch ${batchNumber}:`, error.message);
1343
+ batchErrors++;
671
1344
  }
672
- } else if (result.skipped) {
673
- skippedCount++;
674
- // Only buffer verbose skip messages if needed
675
- if (process.env.UPLOAD_VERBOSE === 'true' && result.message) {
676
- messageBuffer.push(`⏭️ ${result.message}`);
1345
+
1346
+ // Small delay between batches to avoid overwhelming the database
1347
+ if (i + BATCH_SIZE < fileIds.length) {
1348
+ await new Promise(resolve => setTimeout(resolve, 100));
677
1349
  }
678
- } else if (result.error) {
679
- failureCount++;
680
- // Always buffer error messages to show later
681
- messageBuffer.push(`❌ ${result.error}`);
682
1350
  }
683
- }
684
1351
 
685
- progressBar.update(i + batch.length, {
686
- successCount,
687
- failureCount,
688
- skippedCount,
689
- });
1352
+ if (batchErrors > 0) {
1353
+ console.error(`❌ ${batchErrors} batch(es) failed for ${pedimento.filename}`);
1354
+ totalErrors++;
1355
+ } else {
1356
+ console.log(` 🎯 Successfully updated ${batchUpdated} related files`);
1357
+ totalUpdated += batchUpdated;
1358
+ }
690
1359
 
691
- // Manage cache size periodically (every 100 files processed)
692
- if ((i + batch.length) % 100 === 0) {
693
- manageCaches();
694
- // Also flush logs every 100 files to maintain responsiveness
695
- await logBatcher.flush();
1360
+ } catch (error) {
1361
+ console.error(`❌ Error processing ${pedimento.filename}:`, error.message);
1362
+ totalErrors++;
696
1363
  }
697
1364
 
698
- // Small delay between batches to prevent overwhelming the server
699
- if (i + batchSize < files.length) {
700
- await delay(100);
1365
+ if (options.showProgress !== false) {
1366
+ progressBar.update(totalProcessed, {
1367
+ updated: totalUpdated,
1368
+ errors: totalErrors,
1369
+ });
701
1370
  }
702
1371
  }
703
1372
 
704
- // Stop progress bar cleanly before showing any messages
705
- progressBar.stop();
706
-
707
- // Now show buffered messages if there are any important ones to show
708
- const errorMessages = messageBuffer.filter((msg) => msg.startsWith('❌'));
709
- if (errorMessages.length > 0) {
710
- console.log('\n🚨 Errors encountered during processing:');
711
- errorMessages.forEach((msg) => console.error(msg));
1373
+ if (options.showProgress !== false) {
1374
+ progressBar.stop();
712
1375
  }
713
1376
 
714
- // Show verbose messages only if requested
715
- if (process.env.UPLOAD_VERBOSE === 'true') {
716
- const otherMessages = messageBuffer.filter((msg) => !msg.startsWith('❌'));
717
- if (otherMessages.length > 0) {
718
- console.log('\n📝 Detailed processing log:');
719
- otherMessages.forEach((msg) => console.log(msg));
720
- }
721
- }
1377
+ console.log(`\n${'='.repeat(60)}`);
1378
+ console.log(`🎯 ARELA PATH PROPAGATION COMPLETED`);
1379
+ console.log(`${'='.repeat(60)}`);
1380
+ console.log(` 📋 Pedimento records processed: ${totalProcessed}`);
1381
+ console.log(` ✅ Related files updated: ${totalUpdated}`);
1382
+ console.log(` ❌ Errors: ${totalErrors}`);
1383
+ console.log(`${'='.repeat(60)}\n`);
722
1384
 
723
1385
  return {
724
- successCount,
725
- failureCount,
726
- skippedCount,
1386
+ processedCount: totalProcessed,
1387
+ updatedCount: totalUpdated,
1388
+ errorCount: totalErrors,
727
1389
  };
728
1390
  };
729
1391
 
730
1392
  program
731
- .name('supabase-uploader')
732
- .description('CLI to upload folders from a base path to Supabase Storage')
1393
+ .name('arela-uploader')
1394
+ .description(
1395
+ 'CLI to upload folders to Arela API or Supabase Storage with automatic processing',
1396
+ )
733
1397
  .option('-v, --version', 'output the version number')
734
1398
  .option('-p, --prefix <prefix>', 'Prefix path in bucket', '')
1399
+ .option('-b, --bucket <bucket>', 'Bucket name override')
1400
+ .option('--force-supabase', 'Force direct Supabase upload (skip API)')
735
1401
  .option(
736
- '-r, --rename-files',
737
- 'Rename files with problematic characters before uploading',
1402
+ '--no-auto-detect',
1403
+ 'Disable automatic file detection (API mode only)',
738
1404
  )
739
1405
  .option(
740
- '--dry-run',
741
- 'Show what files would be renamed without actually renaming them',
1406
+ '--no-auto-organize',
1407
+ 'Disable automatic file organization (API mode only)',
742
1408
  )
743
1409
  .option(
744
1410
  '-c, --concurrency <number>',
745
- 'Number of files to process concurrently (default: 3)',
746
- '3',
1411
+ 'Files per batch for processing (default: 10)',
1412
+ '10',
747
1413
  )
1414
+ .option('--batch-size <number>', 'API batch size (default: 10)', '10')
1415
+ .option('--show-stats', 'Show detailed processing statistics')
748
1416
  .option(
749
- '--show-cache-stats',
750
- 'Show cache statistics for performance analysis',
1417
+ '--folder-structure <structure>',
1418
+ 'Custom folder structure (e.g., "2024/4023260" or "cliente1/pedimentos")',
751
1419
  )
752
1420
  .option(
753
- '--batch-size <number>',
754
- 'Number of logs to batch before sending to Supabase (default: 50)',
755
- '50',
1421
+ '--auto-detect-structure',
1422
+ 'Automatically detect year/pedimento from file paths',
756
1423
  )
1424
+ .option('--client-path <path>', 'Client path for metadata tracking')
1425
+ .option('--stats-only', 'Only read file stats and insert to uploader table, skip file upload')
1426
+ .option('--no-detect', 'Disable document type detection in stats-only mode')
1427
+ .option('--propagate-arela-path', 'Propagate arela_path from pedimento_simplificado records to related files with same base path')
1428
+ .option('--upload-by-rfc', 'Upload files to Arela API based on RFC values from UPLOAD_RFCS environment variable')
757
1429
  .action(async (options) => {
758
- // Handle version option
759
1430
  if (options.version) {
760
- console.log(version);
1431
+ console.log(packageVersion);
761
1432
  process.exit(0);
762
1433
  }
763
1434
 
1435
+ // Handle propagate-arela-path option
1436
+ if (options.propagateArelaPath) {
1437
+ // Initialize Supabase credentials for propagation
1438
+ await checkCredentials(true); // Force Supabase mode
1439
+
1440
+ const result = await propagateArelaPath({
1441
+ showProgress: options.showStats || true,
1442
+ });
1443
+
1444
+ if (result.errorCount > 0) {
1445
+ process.exit(1);
1446
+ }
1447
+ return;
1448
+ }
1449
+
1450
+ // Handle upload-by-rfc option
1451
+ if (options.uploadByRfc) {
1452
+ // RFC upload needs both Supabase (for database queries) and API (for uploads)
1453
+ await checkCredentials(false); // Initialize API mode
1454
+
1455
+ // Also initialize Supabase for database queries
1456
+ if (!supabase) {
1457
+ if (!supabaseUrl || !supabaseKey) {
1458
+ console.error('❌ RFC upload requires Supabase credentials for database queries.');
1459
+ console.error(' Please set SUPABASE_URL and SUPABASE_KEY environment variables.');
1460
+ process.exit(1);
1461
+ }
1462
+
1463
+ supabase = createClient(supabaseUrl, supabaseKey);
1464
+ console.log('✅ Connected to Supabase for database queries');
1465
+ }
1466
+
1467
+ const result = await uploadFilesByRfc({
1468
+ showProgress: options.showStats || true,
1469
+ batchSize: parseInt(options.batchSize) || 10,
1470
+ folderStructure: options.folderStructure,
1471
+ });
1472
+
1473
+ if (result.errorCount > 0) {
1474
+ process.exit(1);
1475
+ }
1476
+ return;
1477
+ }
1478
+
1479
+ // Initialize credentials with force supabase flag (for stats mode, always need Supabase)
1480
+ await checkCredentials(options.forceSupabase || options.statsOnly);
1481
+
764
1482
  if (!basePath || !sources || sources.length === 0) {
765
1483
  console.error(
766
1484
  '⚠️ UPLOAD_BASE_PATH or UPLOAD_SOURCES not defined in environment variables.',
@@ -768,18 +1486,31 @@ program
768
1486
  process.exit(1);
769
1487
  }
770
1488
 
771
- const concurrency = parseInt(options.concurrency) || 3;
772
- const batchSize = parseInt(options.batchSize) || 50;
773
-
774
- // Configure log batcher with custom batch size
775
- logBatcher.batchSize = batchSize;
1489
+ const batchSize = parseInt(options.batchSize) || 10;
1490
+ const concurrency = parseInt(options.concurrency) || 10;
776
1491
 
777
- console.log(`🚀 Using concurrency level: ${concurrency}`);
778
- console.log(`📦 Using log batch size: ${batchSize}`);
1492
+ if (options.statsOnly) {
1493
+ console.log('📊 Mode: Stats Only - Reading file stats and inserting to uploader table');
1494
+ console.log('🚫 Files will NOT be uploaded');
1495
+ if (options.detect !== false) {
1496
+ console.log('🔍 Document type detection ENABLED for supported files');
1497
+ } else {
1498
+ console.log('🔍 Document type detection DISABLED');
1499
+ }
1500
+ } else {
1501
+ console.log(
1502
+ `🚀 Mode: ${apiMode ? 'Arela API with auto-processing' : 'Direct Supabase'}`,
1503
+ );
1504
+ }
1505
+ console.log(`📦 Batch size: ${batchSize}`);
1506
+ console.log(`⚡ Concurrency: ${concurrency}`);
779
1507
 
780
- const processedPaths = await getProcessedPaths();
1508
+ const processedPaths = getProcessedPaths();
781
1509
  let globalSuccess = 0;
1510
+ let globalDetected = 0;
1511
+ let globalOrganized = 0;
782
1512
  let globalFailure = 0;
1513
+ let globalSkipped = 0;
783
1514
 
784
1515
  for (const folder of sources) {
785
1516
  const sourcePath = path.resolve(basePath, folder).replace(/\\/g, '/');
@@ -793,85 +1524,78 @@ program
793
1524
 
794
1525
  console.log(`📊 Found ${files.length} files to process`);
795
1526
 
796
- // Process files in parallel batches
797
- const { successCount, failureCount, skippedCount } =
798
- await processFilesInBatches(
799
- files,
800
- concurrency,
801
- options,
802
- basePath,
803
- folder,
804
- sourcePath,
805
- processedPaths,
806
- );
807
-
808
- globalSuccess += successCount;
809
- globalFailure += failureCount;
810
-
811
- // Small delay to ensure progress bar is fully cleared
812
- await delay(100);
813
-
814
- console.log(`\n📦 Upload Summary for ${folder}:`);
815
- console.log(` ✅ Successfully uploaded files: ${successCount}`);
816
- console.log(` ❌ Files with errors: ${failureCount}`);
817
- console.log(` ⏭️ Files skipped (already exist): ${skippedCount}`);
818
- console.log(` 📜 Log file: ${logFilePath} \n`);
1527
+ const result = await processFilesInBatches(
1528
+ files,
1529
+ batchSize,
1530
+ options,
1531
+ basePath,
1532
+ folder,
1533
+ sourcePath,
1534
+ processedPaths,
1535
+ );
819
1536
 
820
- // Show cache statistics if requested
821
- if (options.showCacheStats) {
822
- console.log(`📊 Cache Statistics:`);
823
- console.log(
824
- ` 🗂️ Filename sanitization cache: ${sanitizationCache.size} entries`,
825
- );
826
- console.log(
827
- ` 📁 Path sanitization cache: ${pathSanitizationCache.size} entries`,
828
- );
829
- console.log(
830
- ` 📋 Log batch pending: ${logBatcher.batch.length} entries`,
831
- );
1537
+ globalSuccess += result.successCount;
1538
+ globalDetected += result.detectedCount || 0;
1539
+ globalOrganized += result.organizedCount || 0;
1540
+ globalFailure += result.failureCount;
1541
+ globalSkipped += result.skippedCount;
832
1542
 
833
- // Calculate cache hit rate (rough estimation)
834
- const totalProcessed = successCount + failureCount + skippedCount;
835
- const estimatedCacheHitRate =
836
- totalProcessed > 0
837
- ? Math.round(
838
- ((totalProcessed - sanitizationCache.size) / totalProcessed) *
839
- 100,
840
- )
841
- : 0;
842
- console.log(
843
- ` 🎯 Estimated cache hit rate: ${Math.max(0, estimatedCacheHitRate)}%\n`,
844
- );
1543
+ console.log(`\n📦 Summary for ${folder}:`);
1544
+ if (options.statsOnly) {
1545
+ console.log(` 📊 Stats recorded: ${result.successCount}`);
1546
+ } else {
1547
+ console.log(` ✅ Uploaded: ${result.successCount}`);
1548
+ if (apiMode) {
1549
+ console.log(` 🔍 Detected: ${result.detectedCount || 0}`);
1550
+ console.log(` 📁 Organized: ${result.organizedCount || 0}`);
1551
+ }
1552
+ }
1553
+ console.log(` Errors: ${result.failureCount}`);
1554
+ if (options.statsOnly) {
1555
+ console.log(` ⏭️ Duplicates: ${result.skippedCount}`);
1556
+ } else {
1557
+ console.log(` ⏭️ Skipped: ${result.skippedCount}`);
845
1558
  }
846
1559
 
847
1560
  writeLog(
848
- `📦 Upload Summary for folder ${folder}: Success: ${successCount}, Errors: ${failureCount}, Skipped: ${skippedCount}`,
1561
+ `📦 Summary for ${folder}: Success: ${result.successCount}, Detected: ${result.detectedCount || 0}, Organized: ${result.organizedCount || 0}, Errors: ${result.failureCount}, ${options.statsOnly ? 'Duplicates' : 'Skipped'}: ${result.skippedCount}`,
849
1562
  );
850
1563
  } catch (err) {
851
1564
  console.error(`⚠️ Error processing folder ${folder}:`, err.message);
852
1565
  writeLog(`⚠️ Error processing folder ${folder}: ${err.message}`);
853
- await sendLogToSupabase({
854
- file: folder,
855
- uploadPath: folder,
856
- status: 'error',
857
- message: err.message,
858
- });
859
1566
  globalFailure++;
860
1567
  }
861
1568
  }
862
1569
 
863
- // Force flush any remaining logs before finishing
864
- console.log(`\n📤 Flushing remaining logs...`);
865
- await logBatcher.forceFlush();
866
-
867
- // Final summary with clear separation
868
- console.log(`\n${'='.repeat(50)}`);
869
- console.log(`🎯 UPLOAD COMPLETED`);
870
- console.log(`${'='.repeat(50)}`);
871
- console.log(` ✅ Total uploaded: ${globalSuccess}`);
872
- console.log(` ❌ Total with errors: ${globalFailure}`);
1570
+ console.log(`\n${'='.repeat(60)}`);
1571
+ if (options.statsOnly) {
1572
+ console.log(`📊 STATS COLLECTION COMPLETED`);
1573
+ console.log(`${'='.repeat(60)}`);
1574
+ console.log(` 📊 Total stats recorded: ${globalSuccess}`);
1575
+ } else {
1576
+ console.log(`🎯 ${apiMode ? 'ARELA API' : 'SUPABASE'} UPLOAD COMPLETED`);
1577
+ console.log(`${'='.repeat(60)}`);
1578
+ console.log(` ✅ Total uploaded: ${globalSuccess}`);
1579
+ if (apiMode) {
1580
+ console.log(` 🔍 Total detected: ${globalDetected}`);
1581
+ console.log(` 📁 Total organized: ${globalOrganized}`);
1582
+ }
1583
+ }
1584
+ if (options.statsOnly) {
1585
+ console.log(` ⏭️ Total duplicates: ${globalSkipped}`);
1586
+ } else {
1587
+ console.log(` ⏭️ Total skipped: ${globalSkipped}`);
1588
+ }
1589
+ console.log(` ❌ Total errors: ${globalFailure}`);
873
1590
  console.log(` 📜 Log file: ${logFilePath}`);
874
- console.log(`${'='.repeat(50)}\n`);
1591
+ console.log(`${'='.repeat(60)}\n`);
1592
+
1593
+ if (options.showStats && sanitizationCache.size > 0) {
1594
+ console.log(`📊 Performance Statistics:`);
1595
+ console.log(
1596
+ ` 🗂️ Sanitization cache entries: ${sanitizationCache.size}`,
1597
+ );
1598
+ }
875
1599
  });
876
1600
 
877
1601
  program.parse();