@arela/uploader 0.1.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.template +20 -0
- package/OPTIMIZATION_SUMMARY.md +154 -0
- package/PERFORMANCE_OPTIMIZATIONS.md +270 -0
- package/README.md +412 -24
- package/arela-upload.log +0 -0
- package/commands.md +6 -0
- package/package.json +12 -9
- package/src/document-type-shared.js +80 -0
- package/src/document-types/pedimento-simplificado.js +289 -0
- package/src/file-detection.js +194 -0
- package/src/index.js +1755 -575
package/src/index.js
CHANGED
|
@@ -3,32 +3,56 @@ import { createClient } from '@supabase/supabase-js';
|
|
|
3
3
|
import cliProgress from 'cli-progress';
|
|
4
4
|
import { Command } from 'commander';
|
|
5
5
|
import { config } from 'dotenv';
|
|
6
|
+
import FormData from 'form-data';
|
|
6
7
|
import fs from 'fs';
|
|
7
8
|
import { globby } from 'globby';
|
|
8
9
|
import mime from 'mime-types';
|
|
9
|
-
import
|
|
10
|
+
import fetch from 'node-fetch';
|
|
10
11
|
import path from 'path';
|
|
11
|
-
|
|
12
|
-
const require = createRequire(import.meta.url);
|
|
13
|
-
const { version } = require('../package.json');
|
|
12
|
+
import { FileDetectionService } from './file-detection.js';
|
|
14
13
|
|
|
15
14
|
config();
|
|
16
15
|
|
|
17
16
|
const program = new Command();
|
|
18
17
|
|
|
18
|
+
// Read package.json version at startup
|
|
19
|
+
let packageVersion = '1.0.0'; // fallback
|
|
20
|
+
try {
|
|
21
|
+
const __filename = new URL(import.meta.url).pathname;
|
|
22
|
+
const __dirname = path.dirname(__filename);
|
|
23
|
+
const packageJsonPath = path.resolve(__dirname, '../package.json');
|
|
24
|
+
const packageJson = JSON.parse(fs.readFileSync(packageJsonPath, 'utf-8'));
|
|
25
|
+
packageVersion = packageJson.version || '1.0.0';
|
|
26
|
+
} catch (error) {
|
|
27
|
+
console.warn('⚠️ Could not read package.json version, using fallback');
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
// Configuración de Supabase (original)
|
|
19
31
|
const supabaseUrl = process.env.SUPABASE_URL;
|
|
20
32
|
const supabaseKey = process.env.SUPABASE_KEY;
|
|
21
33
|
const bucket = process.env.SUPABASE_BUCKET;
|
|
34
|
+
|
|
35
|
+
// Configuración de API (nueva)
|
|
36
|
+
const API_BASE_URL = process.env.ARELA_API_URL;
|
|
37
|
+
const API_TOKEN = process.env.ARELA_API_TOKEN;
|
|
38
|
+
|
|
39
|
+
// Configuración del uploader mejorado
|
|
22
40
|
const basePath = process.env.UPLOAD_BASE_PATH;
|
|
23
41
|
const sources = process.env.UPLOAD_SOURCES?.split('|')
|
|
24
42
|
.map((s) => s.trim())
|
|
25
43
|
.filter(Boolean);
|
|
26
44
|
|
|
27
|
-
|
|
45
|
+
// Configuración de RFCs para upload
|
|
46
|
+
console.log('🔧 Configured RFCs for upload:', process.env.UPLOAD_RFCS);
|
|
47
|
+
const uploadRfcs = process.env.UPLOAD_RFCS?.split('|')
|
|
48
|
+
.map((s) => s.trim())
|
|
49
|
+
.filter(Boolean);
|
|
50
|
+
|
|
51
|
+
let supabase;
|
|
52
|
+
let apiMode = false;
|
|
28
53
|
|
|
29
|
-
// Pre-compiled regex patterns for better performance
|
|
54
|
+
// Pre-compiled regex patterns for better performance (from original complex uploader)
|
|
30
55
|
const SANITIZATION_PATTERNS = [
|
|
31
|
-
// Character replacements (grouped for efficiency)
|
|
32
56
|
[/[áàâäãåāăą]/gi, 'a'],
|
|
33
57
|
[/[éèêëēĕėę]/gi, 'e'],
|
|
34
58
|
[/[íìîïīĭį]/gi, 'i'],
|
|
@@ -37,579 +61,835 @@ const SANITIZATION_PATTERNS = [
|
|
|
37
61
|
[/[ñň]/gi, 'n'],
|
|
38
62
|
[/[ç]/gi, 'c'],
|
|
39
63
|
[/[ý]/gi, 'y'],
|
|
40
|
-
// Korean characters (compiled once)
|
|
41
64
|
[/[멕]/g, 'meok'],
|
|
42
65
|
[/[시]/g, 'si'],
|
|
43
66
|
[/[코]/g, 'ko'],
|
|
44
67
|
[/[용]/g, 'yong'],
|
|
45
68
|
[/[가-힣]/g, 'kr'],
|
|
46
|
-
// Unicode diacritics (after normalize)
|
|
47
69
|
[/[\u0300-\u036f]/g, ''],
|
|
48
|
-
// Problematic symbols
|
|
49
70
|
[/[\\?%*:|"<>[\]~`^]/g, '-'],
|
|
50
71
|
[/[{}]/g, '-'],
|
|
51
72
|
[/[&]/g, 'and'],
|
|
52
|
-
[/[()]/g, ''],
|
|
53
|
-
|
|
54
|
-
[
|
|
55
|
-
[
|
|
56
|
-
[
|
|
57
|
-
[
|
|
58
|
-
[/[^\w.-]/g, ''], // Remove any remaining non-alphanumeric chars
|
|
73
|
+
[/[()]/g, ''],
|
|
74
|
+
[/\s+/g, '-'],
|
|
75
|
+
[/-+/g, '-'],
|
|
76
|
+
[/^-+|-+$/g, ''],
|
|
77
|
+
[/^\.+/, ''],
|
|
78
|
+
[/[^\w.-]/g, ''],
|
|
59
79
|
];
|
|
60
80
|
|
|
61
|
-
// Cache for sanitized filenames to avoid repeated processing
|
|
62
81
|
const sanitizationCache = new Map();
|
|
63
82
|
|
|
64
|
-
// Enhanced sanitization function with caching and pre-compiled regex
|
|
65
83
|
const sanitizeFileName = (fileName) => {
|
|
66
|
-
// Check cache first
|
|
67
84
|
if (sanitizationCache.has(fileName)) {
|
|
68
85
|
return sanitizationCache.get(fileName);
|
|
69
86
|
}
|
|
70
87
|
|
|
71
|
-
// Get file extension
|
|
72
88
|
const ext = path.extname(fileName);
|
|
73
89
|
const nameWithoutExt = path.basename(fileName, ext);
|
|
74
90
|
|
|
75
|
-
// Fast path for already clean filenames
|
|
76
91
|
if (/^[a-zA-Z0-9._-]+$/.test(nameWithoutExt)) {
|
|
77
92
|
const result = fileName;
|
|
78
93
|
sanitizationCache.set(fileName, result);
|
|
79
94
|
return result;
|
|
80
95
|
}
|
|
81
96
|
|
|
82
|
-
// Normalize unicode first (more efficient to do once)
|
|
83
97
|
let sanitized = nameWithoutExt.normalize('NFD');
|
|
84
98
|
|
|
85
|
-
// Apply all sanitization patterns
|
|
86
99
|
for (const [pattern, replacement] of SANITIZATION_PATTERNS) {
|
|
87
100
|
sanitized = sanitized.replace(pattern, replacement);
|
|
88
101
|
}
|
|
89
102
|
|
|
90
|
-
//
|
|
103
|
+
// Additional sanitization for problematic characters
|
|
104
|
+
sanitized = sanitized
|
|
105
|
+
.replace(/~/g, '-') // Replace tildes
|
|
106
|
+
.replace(/\s+/g, '-') // Replace spaces with dashes
|
|
107
|
+
.replace(/\.+/g, '-') // Replace multiple dots with dashes
|
|
108
|
+
.replace(/-+/g, '-') // Collapse multiple dashes
|
|
109
|
+
.replace(/^-+|-+$/g, ''); // Remove leading/trailing dashes
|
|
110
|
+
|
|
91
111
|
if (!sanitized) {
|
|
92
112
|
sanitized = 'unnamed_file';
|
|
93
113
|
}
|
|
94
114
|
|
|
95
115
|
const result = sanitized + ext;
|
|
96
|
-
|
|
97
|
-
// Cache the result for future use
|
|
98
116
|
sanitizationCache.set(fileName, result);
|
|
99
|
-
|
|
100
117
|
return result;
|
|
101
118
|
};
|
|
102
119
|
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
120
|
+
const checkCredentials = async (forceSupabase = false) => {
|
|
121
|
+
// Force Supabase mode if explicitly requested
|
|
122
|
+
if (forceSupabase) {
|
|
123
|
+
console.log('🔧 Force Supabase mode enabled - skipping API');
|
|
124
|
+
apiMode = false;
|
|
125
|
+
} else if (API_BASE_URL && API_TOKEN) {
|
|
126
|
+
console.log(
|
|
127
|
+
'🌐 API mode enabled - files will be uploaded to Arela API with automatic processing',
|
|
128
|
+
);
|
|
129
|
+
apiMode = true;
|
|
110
130
|
|
|
111
|
-
|
|
112
|
-
const
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
this.batch = [];
|
|
118
|
-
this.batchSize = batchSize;
|
|
119
|
-
this.flushInterval = flushInterval;
|
|
120
|
-
this.lastFlush = Date.now();
|
|
121
|
-
this.flushTimer = null;
|
|
122
|
-
}
|
|
123
|
-
|
|
124
|
-
add(logEntry) {
|
|
125
|
-
this.batch.push({
|
|
126
|
-
filename: path.basename(logEntry.file),
|
|
127
|
-
path: logEntry.uploadPath,
|
|
128
|
-
status: logEntry.status,
|
|
129
|
-
message: logEntry.message,
|
|
130
|
-
});
|
|
131
|
+
try {
|
|
132
|
+
const response = await fetch(`${API_BASE_URL}/api/health`, {
|
|
133
|
+
headers: {
|
|
134
|
+
'x-api-key': API_TOKEN,
|
|
135
|
+
},
|
|
136
|
+
});
|
|
131
137
|
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
+
if (!response.ok) {
|
|
139
|
+
console.warn(
|
|
140
|
+
'⚠️ API connection failed, falling back to direct Supabase upload',
|
|
141
|
+
);
|
|
142
|
+
apiMode = false;
|
|
143
|
+
} else {
|
|
144
|
+
console.log('✅ Connected to Arela API');
|
|
145
|
+
return;
|
|
146
|
+
}
|
|
147
|
+
} catch (err) {
|
|
148
|
+
console.warn(
|
|
149
|
+
'⚠️ API connection failed, falling back to direct Supabase upload',
|
|
150
|
+
);
|
|
151
|
+
apiMode = false;
|
|
138
152
|
}
|
|
139
153
|
}
|
|
140
154
|
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
clearTimeout(this.flushTimer);
|
|
151
|
-
this.flushTimer = null;
|
|
155
|
+
// Initialize Supabase client if not in API mode or if forced
|
|
156
|
+
if (!apiMode || forceSupabase) {
|
|
157
|
+
if (!supabaseUrl || !supabaseKey || !bucket) {
|
|
158
|
+
console.error(
|
|
159
|
+
'⚠️ Missing credentials. Please set either:\n' +
|
|
160
|
+
' - ARELA_API_URL and ARELA_API_TOKEN for API mode, or\n' +
|
|
161
|
+
' - SUPABASE_URL, SUPABASE_KEY, and SUPABASE_BUCKET for direct mode',
|
|
162
|
+
);
|
|
163
|
+
process.exit(1);
|
|
152
164
|
}
|
|
153
165
|
|
|
166
|
+
supabase = createClient(supabaseUrl, supabaseKey);
|
|
167
|
+
|
|
154
168
|
try {
|
|
155
|
-
const { error } = await supabase.from(
|
|
169
|
+
const { error } = await supabase.storage.from(bucket).list('');
|
|
156
170
|
if (error) {
|
|
157
|
-
console.error(
|
|
158
|
-
|
|
159
|
-
);
|
|
160
|
-
// Re-add failed logs to batch for retry (optional)
|
|
161
|
-
this.batch.unshift(...logsToSend);
|
|
162
|
-
} else {
|
|
163
|
-
// Only show verbose output if requested
|
|
164
|
-
if (process.env.LOG_BATCH_VERBOSE === 'true') {
|
|
165
|
-
console.log(`📊 Flushed ${logsToSend.length} logs to Supabase`);
|
|
166
|
-
}
|
|
171
|
+
console.error('⚠️ Error connecting to Supabase:', error.message);
|
|
172
|
+
process.exit(1);
|
|
167
173
|
}
|
|
174
|
+
console.log('✅ Connected to Supabase (direct mode)');
|
|
168
175
|
} catch (err) {
|
|
169
|
-
console.error(
|
|
170
|
-
|
|
171
|
-
this.batch.unshift(...logsToSend);
|
|
172
|
-
}
|
|
173
|
-
}
|
|
174
|
-
|
|
175
|
-
// Schedule auto-flush if not already scheduled
|
|
176
|
-
scheduleFlush() {
|
|
177
|
-
if (!this.flushTimer && this.batch.length > 0) {
|
|
178
|
-
this.flushTimer = setTimeout(() => {
|
|
179
|
-
this.flush();
|
|
180
|
-
}, this.flushInterval);
|
|
176
|
+
console.error('⚠️ Error:', err.message);
|
|
177
|
+
process.exit(1);
|
|
181
178
|
}
|
|
182
179
|
}
|
|
180
|
+
};
|
|
183
181
|
|
|
184
|
-
|
|
185
|
-
async forceFlush() {
|
|
186
|
-
if (this.flushTimer) {
|
|
187
|
-
clearTimeout(this.flushTimer);
|
|
188
|
-
this.flushTimer = null;
|
|
189
|
-
}
|
|
190
|
-
await this.flush();
|
|
191
|
-
}
|
|
192
|
-
}
|
|
182
|
+
const logFilePath = path.resolve(process.cwd(), 'arela-upload.log');
|
|
193
183
|
|
|
194
|
-
|
|
195
|
-
|
|
184
|
+
/**
|
|
185
|
+
* OPTIMIZED: Log buffer to reduce I/O operations
|
|
186
|
+
*/
|
|
187
|
+
let logBuffer = [];
|
|
188
|
+
const LOG_BUFFER_SIZE = 100; // Flush every 100 log entries
|
|
189
|
+
let lastFlushTime = Date.now();
|
|
190
|
+
const LOG_FLUSH_INTERVAL = 5000; // Flush every 5 seconds
|
|
196
191
|
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
const MAX_CACHE_SIZE = 1000;
|
|
192
|
+
const flushLogBuffer = () => {
|
|
193
|
+
if (logBuffer.length === 0) return;
|
|
200
194
|
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
});
|
|
195
|
+
try {
|
|
196
|
+
const logContent = logBuffer.join('\n') + '\n';
|
|
197
|
+
fs.appendFileSync(logFilePath, logContent);
|
|
198
|
+
logBuffer = [];
|
|
199
|
+
lastFlushTime = Date.now();
|
|
200
|
+
} catch (error) {
|
|
201
|
+
console.error(`❌ Error writing to log file: ${error.code} | ${error.message} | path: ${logFilePath}`);
|
|
208
202
|
}
|
|
203
|
+
};
|
|
209
204
|
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
205
|
+
const writeLog = (message) => {
|
|
206
|
+
try {
|
|
207
|
+
const timestamp = new Date().toISOString();
|
|
208
|
+
logBuffer.push(`[${timestamp}] ${message}`);
|
|
209
|
+
|
|
210
|
+
// Flush if buffer is full or enough time has passed
|
|
211
|
+
const now = Date.now();
|
|
212
|
+
if (logBuffer.length >= LOG_BUFFER_SIZE || (now - lastFlushTime) >= LOG_FLUSH_INTERVAL) {
|
|
213
|
+
flushLogBuffer();
|
|
214
|
+
}
|
|
215
|
+
} catch (error) {
|
|
216
|
+
console.error(`❌ Error buffering log message: ${error.message}`);
|
|
216
217
|
}
|
|
217
218
|
};
|
|
218
219
|
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
220
|
+
// Ensure logs are flushed on process exit
|
|
221
|
+
process.on('exit', flushLogBuffer);
|
|
222
|
+
process.on('SIGINT', () => {
|
|
223
|
+
flushLogBuffer();
|
|
224
|
+
process.exit(0);
|
|
225
|
+
});
|
|
226
|
+
process.on('SIGTERM', () => {
|
|
227
|
+
flushLogBuffer();
|
|
228
|
+
process.exit(0);
|
|
229
|
+
});
|
|
230
|
+
|
|
231
|
+
/**
|
|
232
|
+
* OPTIMIZED: Conditional logging to reduce console overhead
|
|
233
|
+
*/
|
|
234
|
+
const VERBOSE_LOGGING = process.env.VERBOSE_LOGGING === 'true';
|
|
235
|
+
const BATCH_DELAY = parseInt(process.env.BATCH_DELAY) || 100; // Configurable delay between batches
|
|
236
|
+
const PROGRESS_UPDATE_INTERVAL = parseInt(process.env.PROGRESS_UPDATE_INTERVAL) || 10; // Update progress every N items
|
|
237
|
+
|
|
238
|
+
const logVerbose = (message) => {
|
|
239
|
+
if (VERBOSE_LOGGING) {
|
|
240
|
+
console.log(message);
|
|
223
241
|
}
|
|
242
|
+
};
|
|
243
|
+
const batchReadFileStats = (filePaths) => {
|
|
244
|
+
const results = [];
|
|
224
245
|
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
246
|
+
for (const filePath of filePaths) {
|
|
247
|
+
try {
|
|
248
|
+
const stats = fs.statSync(filePath);
|
|
249
|
+
results.push({ path: filePath, stats, error: null });
|
|
250
|
+
} catch (error) {
|
|
251
|
+
results.push({ path: filePath, stats: null, error: error.message });
|
|
252
|
+
}
|
|
229
253
|
}
|
|
230
254
|
|
|
231
|
-
|
|
255
|
+
return results;
|
|
256
|
+
};
|
|
257
|
+
|
|
258
|
+
/**
|
|
259
|
+
* OPTIMIZED: Cache for year/pedimento detection results to avoid redundant parsing
|
|
260
|
+
*/
|
|
261
|
+
const pathDetectionCache = new Map();
|
|
232
262
|
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
263
|
+
/**
|
|
264
|
+
* OPTIMIZED: Clear the path detection cache (useful for testing or long-running processes)
|
|
265
|
+
*/
|
|
266
|
+
const clearPathDetectionCache = () => {
|
|
267
|
+
pathDetectionCache.clear();
|
|
268
|
+
};
|
|
269
|
+
|
|
270
|
+
/**
|
|
271
|
+
* OPTIMIZED: Get detection results with caching
|
|
272
|
+
*/
|
|
273
|
+
const getCachedPathDetection = (filePath, basePath) => {
|
|
274
|
+
const cacheKey = `${filePath}|${basePath}`;
|
|
275
|
+
|
|
276
|
+
if (pathDetectionCache.has(cacheKey)) {
|
|
277
|
+
return pathDetectionCache.get(cacheKey);
|
|
236
278
|
}
|
|
237
279
|
|
|
238
|
-
|
|
239
|
-
|
|
280
|
+
const detection = extractYearAndPedimentoFromPath(filePath, basePath);
|
|
281
|
+
pathDetectionCache.set(cacheKey, detection);
|
|
240
282
|
|
|
241
|
-
return
|
|
283
|
+
return detection;
|
|
242
284
|
};
|
|
243
285
|
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
286
|
+
/**
|
|
287
|
+
* Extracts year and pedimento number from file path
|
|
288
|
+
* Supports patterns like:
|
|
289
|
+
* - /path/to/2024/4023260/file.pdf
|
|
290
|
+
* - /path/to/pedimentos/2024/4023260/file.pdf
|
|
291
|
+
* - /path/to/docs/año2024/ped4023260/file.pdf
|
|
292
|
+
*/
|
|
293
|
+
const extractYearAndPedimentoFromPath = (filePath, basePath) => {
|
|
294
|
+
try {
|
|
295
|
+
const relativePath = path.relative(basePath, filePath);
|
|
296
|
+
const pathParts = relativePath.split(path.sep);
|
|
297
|
+
|
|
298
|
+
let year = null;
|
|
299
|
+
let pedimento = null;
|
|
300
|
+
|
|
301
|
+
// Pattern 1: Direct year/pedimento structure (2024/4023260)
|
|
302
|
+
for (let i = 0; i < pathParts.length - 1; i++) {
|
|
303
|
+
const part = pathParts[i];
|
|
304
|
+
const nextPart = pathParts[i + 1];
|
|
305
|
+
|
|
306
|
+
// Check if current part looks like a year (2020-2030)
|
|
307
|
+
const yearMatch = part.match(/^(202[0-9])$/);
|
|
308
|
+
if (yearMatch && nextPart) {
|
|
309
|
+
year = yearMatch[1];
|
|
310
|
+
|
|
311
|
+
// Check if next part looks like a pedimento (4-8 digits)
|
|
312
|
+
const pedimentoMatch = nextPart.match(/^(\d{4,8})$/);
|
|
313
|
+
if (pedimentoMatch) {
|
|
314
|
+
pedimento = pedimentoMatch[1];
|
|
315
|
+
break;
|
|
316
|
+
}
|
|
317
|
+
}
|
|
318
|
+
}
|
|
319
|
+
|
|
320
|
+
// Pattern 2: Named patterns (año2024, ped4023260)
|
|
321
|
+
if (!year || !pedimento) {
|
|
322
|
+
for (const part of pathParts) {
|
|
323
|
+
if (!year) {
|
|
324
|
+
const namedYearMatch = part.match(/(?:año|year|anio)(\d{4})/i);
|
|
325
|
+
if (namedYearMatch) {
|
|
326
|
+
year = namedYearMatch[1];
|
|
327
|
+
}
|
|
328
|
+
}
|
|
247
329
|
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
}
|
|
330
|
+
if (!pedimento) {
|
|
331
|
+
const namedPedimentoMatch = part.match(
|
|
332
|
+
/(?:ped|pedimento|pedi)(\d{4,8})/i,
|
|
333
|
+
);
|
|
334
|
+
if (namedPedimentoMatch) {
|
|
335
|
+
pedimento = namedPedimentoMatch[1];
|
|
336
|
+
}
|
|
337
|
+
}
|
|
338
|
+
}
|
|
339
|
+
}
|
|
251
340
|
|
|
252
|
-
//
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
path: uploadPath,
|
|
263
|
-
status,
|
|
264
|
-
message,
|
|
265
|
-
},
|
|
266
|
-
]);
|
|
341
|
+
// Pattern 3: Loose year detection in any part
|
|
342
|
+
if (!year) {
|
|
343
|
+
for (const part of pathParts) {
|
|
344
|
+
const yearMatch = part.match(/(202[0-9])/);
|
|
345
|
+
if (yearMatch) {
|
|
346
|
+
year = yearMatch[1];
|
|
347
|
+
break;
|
|
348
|
+
}
|
|
349
|
+
}
|
|
350
|
+
}
|
|
267
351
|
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
352
|
+
// Pattern 4: Loose pedimento detection (4-8 consecutive digits)
|
|
353
|
+
if (!pedimento) {
|
|
354
|
+
for (const part of pathParts) {
|
|
355
|
+
const pedimentoMatch = part.match(/(\d{4,8})/);
|
|
356
|
+
if (pedimentoMatch && pedimentoMatch[1].length >= 4) {
|
|
357
|
+
pedimento = pedimentoMatch[1];
|
|
358
|
+
break;
|
|
359
|
+
}
|
|
360
|
+
}
|
|
361
|
+
}
|
|
362
|
+
|
|
363
|
+
return { year, pedimento, detected: !!(year && pedimento) };
|
|
364
|
+
} catch (error) {
|
|
365
|
+
return {
|
|
366
|
+
year: null,
|
|
367
|
+
pedimento: null,
|
|
368
|
+
detected: false,
|
|
369
|
+
error: error.message,
|
|
370
|
+
};
|
|
272
371
|
}
|
|
273
372
|
};
|
|
274
373
|
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
writeLog('⚠️ Missing Supabase credentials.');
|
|
281
|
-
await sendLogToSupabaseImmediate({
|
|
282
|
-
file: 'Error',
|
|
283
|
-
uploadPath: 'Error',
|
|
284
|
-
status: 'error',
|
|
285
|
-
message: 'Missing Supabase credentials.',
|
|
286
|
-
});
|
|
287
|
-
process.exit(1);
|
|
288
|
-
}
|
|
374
|
+
/**
|
|
375
|
+
* OPTIMIZED: Get processed paths with caching and buffered log reading
|
|
376
|
+
*/
|
|
377
|
+
let processedPathsCache = null;
|
|
378
|
+
let lastLogModTime = 0;
|
|
289
379
|
|
|
380
|
+
const getProcessedPaths = () => {
|
|
290
381
|
try {
|
|
291
|
-
|
|
292
|
-
if (
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
});
|
|
301
|
-
process.exit(1);
|
|
382
|
+
// Check if log file exists
|
|
383
|
+
if (!fs.existsSync(logFilePath)) {
|
|
384
|
+
return new Set();
|
|
385
|
+
}
|
|
386
|
+
|
|
387
|
+
// Check if cache is still valid
|
|
388
|
+
const logStats = fs.statSync(logFilePath);
|
|
389
|
+
if (processedPathsCache && logStats.mtime.getTime() === lastLogModTime) {
|
|
390
|
+
return processedPathsCache;
|
|
302
391
|
}
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
392
|
+
|
|
393
|
+
// Read and parse log file
|
|
394
|
+
const processed = new Set();
|
|
395
|
+
const content = fs.readFileSync(logFilePath, 'utf-8');
|
|
396
|
+
|
|
397
|
+
// Use more efficient regex with global flag
|
|
398
|
+
const regex = /(SUCCESS|SKIPPED): .*? -> (.+)/g;
|
|
399
|
+
let match;
|
|
400
|
+
|
|
401
|
+
while ((match = regex.exec(content)) !== null) {
|
|
402
|
+
const path = match[2];
|
|
403
|
+
if (path) {
|
|
404
|
+
processed.add(path.trim());
|
|
405
|
+
}
|
|
406
|
+
}
|
|
407
|
+
|
|
408
|
+
// Update cache
|
|
409
|
+
processedPathsCache = processed;
|
|
410
|
+
lastLogModTime = logStats.mtime.getTime();
|
|
411
|
+
|
|
412
|
+
return processed;
|
|
413
|
+
} catch (error) {
|
|
414
|
+
console.error(`⚠️ Error reading processed paths: ${error.message}`);
|
|
415
|
+
return new Set();
|
|
416
|
+
}
|
|
417
|
+
};
|
|
418
|
+
|
|
419
|
+
/**
|
|
420
|
+
* Upload files to Arela API with automatic detection and organization
|
|
421
|
+
*/
|
|
422
|
+
const uploadToApi = async (files, options) => {
|
|
423
|
+
const formData = new FormData();
|
|
424
|
+
|
|
425
|
+
files.forEach((file) => {
|
|
426
|
+
const fileBuffer = fs.readFileSync(file.path);
|
|
427
|
+
formData.append('files', fileBuffer, {
|
|
428
|
+
filename: file.name,
|
|
429
|
+
contentType: file.contentType,
|
|
311
430
|
});
|
|
312
|
-
|
|
431
|
+
});
|
|
432
|
+
|
|
433
|
+
if (bucket) formData.append('bucket', bucket);
|
|
434
|
+
if (options.prefix) formData.append('prefix', options.prefix);
|
|
435
|
+
|
|
436
|
+
// Nueva funcionalidad: estructura de carpetas personalizada
|
|
437
|
+
let combinedStructure = null;
|
|
438
|
+
let cachedDetection = null; // Cache detection result to avoid redundant calls
|
|
439
|
+
|
|
440
|
+
if (
|
|
441
|
+
options.folderStructure &&
|
|
442
|
+
options.autoDetectStructure &&
|
|
443
|
+
files.length > 0
|
|
444
|
+
) {
|
|
445
|
+
// Combine custom folder structure with auto-detection
|
|
446
|
+
const firstFile = files[0];
|
|
447
|
+
cachedDetection = getCachedPathDetection(firstFile.path, process.cwd());
|
|
448
|
+
|
|
449
|
+
if (cachedDetection.detected) {
|
|
450
|
+
const autoStructure = `${cachedDetection.year}/${cachedDetection.pedimento}`;
|
|
451
|
+
combinedStructure = `${options.folderStructure}/${autoStructure}`;
|
|
452
|
+
formData.append('folderStructure', combinedStructure);
|
|
453
|
+
console.log(
|
|
454
|
+
`📁 Combined folder structure: ${options.folderStructure} + ${autoStructure} = ${combinedStructure}`,
|
|
455
|
+
);
|
|
456
|
+
} else {
|
|
457
|
+
// Fallback to just custom structure if auto-detection fails
|
|
458
|
+
formData.append('folderStructure', options.folderStructure);
|
|
459
|
+
console.log(
|
|
460
|
+
`📁 Using custom folder structure (auto-detection failed): ${options.folderStructure}`,
|
|
461
|
+
);
|
|
462
|
+
}
|
|
463
|
+
} else if (options.folderStructure) {
|
|
464
|
+
formData.append('folderStructure', options.folderStructure);
|
|
465
|
+
console.log(`📁 Using custom folder structure: ${options.folderStructure}`);
|
|
466
|
+
} else if (options.autoDetectStructure && files.length > 0) {
|
|
467
|
+
// Try to auto-detect from the first file if no explicit structure is provided
|
|
468
|
+
const firstFile = files[0];
|
|
469
|
+
cachedDetection = getCachedPathDetection(firstFile.path, process.cwd());
|
|
470
|
+
|
|
471
|
+
if (cachedDetection.detected) {
|
|
472
|
+
const autoStructure = `${cachedDetection.year}/${cachedDetection.pedimento}`;
|
|
473
|
+
formData.append('folderStructure', autoStructure);
|
|
474
|
+
}
|
|
475
|
+
}
|
|
476
|
+
|
|
477
|
+
// Si se especifica clientPath para user_metadata
|
|
478
|
+
if (options.clientPath) {
|
|
479
|
+
formData.append('clientPath', options.clientPath);
|
|
480
|
+
}
|
|
481
|
+
|
|
482
|
+
formData.append('autoDetect', String(options.autoDetect ?? true));
|
|
483
|
+
formData.append('autoOrganize', String(options.autoOrganize ?? true));
|
|
484
|
+
formData.append('batchSize', String(options.batchSize || 10));
|
|
485
|
+
formData.append('clientVersion', packageVersion);
|
|
486
|
+
|
|
487
|
+
const response = await fetch(
|
|
488
|
+
`${API_BASE_URL}/api/storage/batch-upload-and-process`,
|
|
489
|
+
{
|
|
490
|
+
method: 'POST',
|
|
491
|
+
headers: {
|
|
492
|
+
'x-api-key': API_TOKEN,
|
|
493
|
+
},
|
|
494
|
+
body: formData,
|
|
495
|
+
},
|
|
496
|
+
);
|
|
497
|
+
|
|
498
|
+
if (!response.ok) {
|
|
499
|
+
const errorText = await response.text();
|
|
500
|
+
throw new Error(
|
|
501
|
+
`API request failed: ${response.status} ${response.statusText} - ${errorText}`,
|
|
502
|
+
);
|
|
313
503
|
}
|
|
504
|
+
|
|
505
|
+
return response.json();
|
|
314
506
|
};
|
|
315
507
|
|
|
316
|
-
|
|
508
|
+
/**
|
|
509
|
+
* Upload file directly to Supabase (fallback method)
|
|
510
|
+
*/
|
|
511
|
+
const uploadToSupabase = async (file, uploadPath) => {
|
|
512
|
+
const content = fs.readFileSync(file);
|
|
513
|
+
const contentType = mime.lookup(file) || 'application/octet-stream';
|
|
317
514
|
|
|
318
|
-
const fileExistsInBucket = async (pathInBucket) => {
|
|
319
|
-
const dir = path.dirname(pathInBucket);
|
|
320
|
-
const filename = path.basename(pathInBucket);
|
|
321
515
|
const { data, error } = await supabase.storage
|
|
322
516
|
.from(bucket)
|
|
323
|
-
.
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
writeLog(`⚠️ Could not verify duplicate: ${error.message}`);
|
|
327
|
-
await sendLogToSupabaseImmediate({
|
|
328
|
-
file: 'Error',
|
|
329
|
-
uploadPath: 'Error',
|
|
330
|
-
status: 'error',
|
|
331
|
-
message: error.message,
|
|
517
|
+
.upload(uploadPath.replace(/\\/g, '/'), content, {
|
|
518
|
+
upsert: true,
|
|
519
|
+
contentType,
|
|
332
520
|
});
|
|
333
|
-
|
|
521
|
+
|
|
522
|
+
if (error) {
|
|
523
|
+
throw new Error(error.message);
|
|
334
524
|
}
|
|
335
|
-
|
|
525
|
+
|
|
526
|
+
return data;
|
|
336
527
|
};
|
|
337
528
|
|
|
338
|
-
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
console.error(`❌ Error writing to log file: ${error.message}`);
|
|
529
|
+
/**
|
|
530
|
+
* Insert file stats into uploader table with document detection
|
|
531
|
+
*/
|
|
532
|
+
const insertStatsToUploaderTable = async (files, options) => {
|
|
533
|
+
if (!supabase) {
|
|
534
|
+
throw new Error('Supabase client not initialized. Stats mode requires Supabase connection.');
|
|
345
535
|
}
|
|
346
|
-
};
|
|
347
536
|
|
|
348
|
-
|
|
349
|
-
const
|
|
350
|
-
const processed = new Set();
|
|
537
|
+
const detectionService = new FileDetectionService();
|
|
538
|
+
const records = [];
|
|
351
539
|
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
const
|
|
355
|
-
|
|
356
|
-
.select('path')
|
|
357
|
-
.in('status', ['success', 'skipped']);
|
|
540
|
+
for (const file of files) {
|
|
541
|
+
// OPTIMIZED: Use pre-computed stats if available, otherwise call fs.statSync
|
|
542
|
+
const stats = file.stats || fs.statSync(file.path);
|
|
543
|
+
const originalPath = options.clientPath || file.path;
|
|
358
544
|
|
|
359
|
-
if
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
)
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
}
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
545
|
+
// Check if record already exists
|
|
546
|
+
const { data: existingRecords, error: checkError } = await supabase
|
|
547
|
+
.from('uploader')
|
|
548
|
+
.select('id, original_path')
|
|
549
|
+
.eq('original_path', originalPath)
|
|
550
|
+
.limit(1);
|
|
551
|
+
|
|
552
|
+
if (checkError) {
|
|
553
|
+
console.error(`❌ Error checking for existing record: ${checkError.message}`);
|
|
554
|
+
continue;
|
|
555
|
+
}
|
|
556
|
+
|
|
557
|
+
if (existingRecords && existingRecords.length > 0) {
|
|
558
|
+
console.log(`⏭️ Skipping duplicate: ${path.basename(file.path)}`);
|
|
559
|
+
continue;
|
|
560
|
+
}
|
|
561
|
+
|
|
562
|
+
// Initialize record with basic file stats
|
|
563
|
+
const record = {
|
|
564
|
+
document_type: null,
|
|
565
|
+
size: stats.size,
|
|
566
|
+
num_pedimento: null,
|
|
567
|
+
filename: file.originalName || path.basename(file.path),
|
|
568
|
+
original_path: originalPath,
|
|
569
|
+
arela_path: null,
|
|
570
|
+
status: 'stats',
|
|
571
|
+
rfc: null,
|
|
572
|
+
message: null
|
|
573
|
+
};
|
|
574
|
+
|
|
575
|
+
// Try to detect document type for supported files
|
|
576
|
+
if (detectionService.isSupportedFileType(file.path)) {
|
|
577
|
+
try {
|
|
578
|
+
const detection = await detectionService.detectFile(file.path);
|
|
579
|
+
|
|
580
|
+
if (detection.detectedType) {
|
|
581
|
+
record.document_type = detection.detectedType;
|
|
582
|
+
record.num_pedimento = detection.detectedPedimento;
|
|
583
|
+
record.status = 'detected';
|
|
584
|
+
|
|
585
|
+
// Set arela_path for pedimento_simplificado documents
|
|
586
|
+
if (detection.arelaPath) {
|
|
587
|
+
record.arela_path = detection.arelaPath;
|
|
588
|
+
}
|
|
589
|
+
|
|
590
|
+
// Extract RFC from fields if available
|
|
591
|
+
const rfcField = detection.fields.find(f => f.name === 'rfc' && f.found);
|
|
592
|
+
if (rfcField) {
|
|
593
|
+
record.rfc = rfcField.value;
|
|
594
|
+
}
|
|
595
|
+
} else {
|
|
596
|
+
record.status = 'not-detected';
|
|
597
|
+
if (detection.error) {
|
|
598
|
+
record.message = detection.error;
|
|
390
599
|
}
|
|
391
600
|
}
|
|
601
|
+
} catch (error) {
|
|
602
|
+
console.error(`❌ Error detecting ${record.filename}:`, error.message);
|
|
603
|
+
record.status = 'detection-error';
|
|
604
|
+
record.message = error.message;
|
|
392
605
|
}
|
|
606
|
+
} else {
|
|
607
|
+
record.status = 'unsupported';
|
|
608
|
+
record.message = 'File type not supported for detection';
|
|
393
609
|
}
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
`⚠️ Error fetching from Supabase or reading local log: ${e.message}. Proceeding with an empty set of processed paths initially.`,
|
|
397
|
-
);
|
|
398
|
-
// Ensure local log is still attempted if Supabase connection itself fails
|
|
399
|
-
const lines = fs.existsSync(logFilePath)
|
|
400
|
-
? fs.readFileSync(logFilePath, 'utf-8').split('\\n')
|
|
401
|
-
: [];
|
|
402
|
-
for (const line of lines) {
|
|
403
|
-
const match = line.match(/(SUCCESS|SKIPPED): .*? -> (.+)/);
|
|
404
|
-
if (match) {
|
|
405
|
-
const [, , path] = match;
|
|
406
|
-
processed.add(path.trim());
|
|
407
|
-
}
|
|
408
|
-
}
|
|
610
|
+
|
|
611
|
+
records.push(record);
|
|
409
612
|
}
|
|
410
|
-
|
|
613
|
+
|
|
614
|
+
if (records.length === 0) {
|
|
615
|
+
console.log('📝 No new records to insert (all were duplicates or errors)');
|
|
616
|
+
return [];
|
|
617
|
+
}
|
|
618
|
+
|
|
619
|
+
console.log(`💾 Inserting ${records.length} new records into uploader table...`);
|
|
620
|
+
|
|
621
|
+
const { data, error } = await supabase
|
|
622
|
+
.from('uploader')
|
|
623
|
+
.insert(records)
|
|
624
|
+
.select();
|
|
625
|
+
|
|
626
|
+
if (error) {
|
|
627
|
+
throw new Error(`Failed to insert stats records: ${error.message}`);
|
|
628
|
+
}
|
|
629
|
+
|
|
630
|
+
return data;
|
|
411
631
|
};
|
|
412
632
|
|
|
413
|
-
|
|
633
|
+
/**
|
|
634
|
+
* OPTIMIZED: Insert ONLY file stats into uploader table (Phase 1)
|
|
635
|
+
* No file reading, no detection - just filesystem metadata
|
|
636
|
+
* Returns summary statistics instead of full records for better performance
|
|
637
|
+
*/
|
|
638
|
+
const insertStatsOnlyToUploaderTable = async (files, options) => {
|
|
639
|
+
if (!supabase) {
|
|
640
|
+
throw new Error('Supabase client not initialized. Stats mode requires Supabase connection.');
|
|
641
|
+
}
|
|
414
642
|
|
|
415
|
-
const
|
|
416
|
-
|
|
417
|
-
let lastError;
|
|
643
|
+
const batchSize = 1000; // Large batch size for performance
|
|
644
|
+
const allRecords = [];
|
|
418
645
|
|
|
419
|
-
|
|
646
|
+
// Prepare all file stats data first - OPTIMIZED to use pre-computed stats
|
|
647
|
+
console.log('📊 Collecting filesystem stats...');
|
|
648
|
+
for (const file of files) {
|
|
420
649
|
try {
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
|
|
650
|
+
// Use pre-computed stats if available, otherwise call fs.statSync
|
|
651
|
+
const stats = file.stats || fs.statSync(file.path);
|
|
652
|
+
const originalPath = options.clientPath || file.path;
|
|
653
|
+
const fileExtension = path.extname(file.path).toLowerCase().replace('.', '');
|
|
654
|
+
|
|
655
|
+
const record = {
|
|
656
|
+
document_type: null,
|
|
657
|
+
size: stats.size,
|
|
658
|
+
num_pedimento: null,
|
|
659
|
+
filename: file.originalName || path.basename(file.path),
|
|
660
|
+
original_path: originalPath,
|
|
661
|
+
arela_path: null,
|
|
662
|
+
status: 'fs-stats',
|
|
663
|
+
rfc: null,
|
|
664
|
+
message: null,
|
|
665
|
+
file_extension: fileExtension,
|
|
666
|
+
created_at: new Date().toISOString(),
|
|
667
|
+
modified_at: stats.mtime.toISOString()
|
|
668
|
+
};
|
|
669
|
+
|
|
670
|
+
allRecords.push(record);
|
|
435
671
|
} catch (error) {
|
|
436
|
-
|
|
437
|
-
|
|
672
|
+
console.error(`❌ Error reading stats for ${file.path}:`, error.message);
|
|
673
|
+
}
|
|
674
|
+
}
|
|
438
675
|
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
|
|
444
|
-
|
|
445
|
-
|
|
676
|
+
if (allRecords.length === 0) {
|
|
677
|
+
console.log('📝 No file stats to insert');
|
|
678
|
+
return { totalInserted: 0, totalSkipped: 0, totalProcessed: 0 };
|
|
679
|
+
}
|
|
680
|
+
|
|
681
|
+
console.log(`💾 Bulk inserting ${allRecords.length} file stats in batches of ${batchSize}...`);
|
|
682
|
+
|
|
683
|
+
let totalInserted = 0;
|
|
684
|
+
let totalSkipped = 0;
|
|
685
|
+
|
|
686
|
+
// Process in batches for optimal performance
|
|
687
|
+
for (let i = 0; i < allRecords.length; i += batchSize) {
|
|
688
|
+
const batch = allRecords.slice(i, i + batchSize);
|
|
689
|
+
|
|
690
|
+
try {
|
|
691
|
+
// OPTIMIZED: Use upsert without select to avoid unnecessary data transfer
|
|
692
|
+
const { error, count } = await supabase
|
|
693
|
+
.from('uploader')
|
|
694
|
+
.upsert(batch, {
|
|
695
|
+
onConflict: 'original_path',
|
|
696
|
+
ignoreDuplicates: false,
|
|
697
|
+
count: 'exact'
|
|
698
|
+
});
|
|
699
|
+
|
|
700
|
+
if (error) {
|
|
701
|
+
console.error(`❌ Error inserting batch ${Math.floor(i / batchSize) + 1}:`, error.message);
|
|
702
|
+
continue;
|
|
446
703
|
}
|
|
704
|
+
|
|
705
|
+
// For upsert operations, we can't easily distinguish between inserts and updates
|
|
706
|
+
// from the count alone, but we can estimate based on the assumption that most
|
|
707
|
+
// operations in --stats-only mode are likely new inserts
|
|
708
|
+
const batchProcessed = batch.length;
|
|
709
|
+
|
|
710
|
+
// Since we're using upsert with ignoreDuplicates: false, the count represents
|
|
711
|
+
// the actual number of rows affected (both inserts and updates)
|
|
712
|
+
const affected = count || batchProcessed;
|
|
713
|
+
|
|
714
|
+
// For simplicity and performance, we'll assume most are new inserts in stats-only mode
|
|
715
|
+
// This is reasonable since stats-only is typically run on new file sets
|
|
716
|
+
totalInserted += affected;
|
|
717
|
+
|
|
718
|
+
console.log(`✅ Batch ${Math.floor(i / batchSize) + 1}: ${affected} rows processed`);
|
|
719
|
+
} catch (error) {
|
|
720
|
+
console.error(`❌ Unexpected error in batch ${Math.floor(i / batchSize) + 1}:`, error.message);
|
|
447
721
|
}
|
|
448
722
|
}
|
|
449
723
|
|
|
724
|
+
// Calculate skipped as difference between total records and inserted
|
|
725
|
+
totalSkipped = allRecords.length - totalInserted;
|
|
726
|
+
|
|
727
|
+
console.log(`📊 Phase 1 Summary: ${totalInserted} records processed, estimated ${totalSkipped} were updates`);
|
|
728
|
+
|
|
450
729
|
return {
|
|
451
|
-
|
|
452
|
-
|
|
453
|
-
|
|
730
|
+
totalInserted,
|
|
731
|
+
totalSkipped,
|
|
732
|
+
totalProcessed: allRecords.length
|
|
454
733
|
};
|
|
455
734
|
};
|
|
456
735
|
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
) => {
|
|
466
|
-
let currentFile = file;
|
|
467
|
-
let result = {
|
|
468
|
-
success: false,
|
|
469
|
-
skipped: false,
|
|
470
|
-
error: null,
|
|
471
|
-
message: '',
|
|
472
|
-
};
|
|
736
|
+
/**
|
|
737
|
+
* PHASE 2: Process PDF files for pedimento-simplificado detection
|
|
738
|
+
* Only processes files with status 'fs-stats' and file_extension 'pdf'
|
|
739
|
+
*/
|
|
740
|
+
const detectPedimentosInDatabase = async (options = {}) => {
|
|
741
|
+
if (!supabase) {
|
|
742
|
+
throw new Error('Supabase client not initialized.');
|
|
743
|
+
}
|
|
473
744
|
|
|
474
|
-
|
|
475
|
-
// Check if we need to rename the file
|
|
476
|
-
if (options.renameFiles) {
|
|
477
|
-
const originalName = path.basename(file);
|
|
478
|
-
const sanitizedName = sanitizeFileName(originalName);
|
|
479
|
-
|
|
480
|
-
if (originalName !== sanitizedName) {
|
|
481
|
-
const newFilePath = path.join(path.dirname(file), sanitizedName);
|
|
482
|
-
|
|
483
|
-
if (options.dryRun) {
|
|
484
|
-
result.message = `Would rename: ${originalName} → ${sanitizedName}`;
|
|
485
|
-
result.skipped = true;
|
|
486
|
-
return result;
|
|
487
|
-
} else {
|
|
488
|
-
try {
|
|
489
|
-
fs.renameSync(file, newFilePath);
|
|
490
|
-
currentFile = newFilePath;
|
|
491
|
-
writeLog(`RENAMED: ${originalName} → ${sanitizedName}`);
|
|
492
|
-
await sendLogToSupabase({
|
|
493
|
-
file: originalName,
|
|
494
|
-
uploadPath: sanitizedName,
|
|
495
|
-
status: 'renamed',
|
|
496
|
-
message: `Renamed from ${originalName}`,
|
|
497
|
-
});
|
|
498
|
-
} catch (renameError) {
|
|
499
|
-
result.error = `Failed to rename ${originalName}: ${renameError.message}`;
|
|
500
|
-
writeLog(`RENAME_ERROR: ${originalName} | ${renameError.message}`);
|
|
501
|
-
return result;
|
|
502
|
-
}
|
|
503
|
-
}
|
|
504
|
-
}
|
|
505
|
-
}
|
|
745
|
+
console.log('🔍 Phase 2: Starting PDF detection for pedimento-simplificado documents...');
|
|
506
746
|
|
|
507
|
-
|
|
508
|
-
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
const
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
uploadPath !== uploadPathRaw ||
|
|
527
|
-
originalFileName !== sanitizedFileName
|
|
528
|
-
) {
|
|
529
|
-
writeLog(`SANITIZED: ${relativePathRaw} → ${uploadPath}`);
|
|
530
|
-
await sendLogToSupabase({
|
|
531
|
-
file: currentFile,
|
|
532
|
-
uploadPath: relativePathRaw,
|
|
533
|
-
status: 'sanitized',
|
|
534
|
-
message: `Sanitized to ${uploadPath} (Arela Version: ${version})`,
|
|
535
|
-
});
|
|
747
|
+
// Get all PDF files that need detection (status = 'fs-stats' and extension = 'pdf')
|
|
748
|
+
let allPdfRecords = [];
|
|
749
|
+
let hasMore = true;
|
|
750
|
+
let offset = 0;
|
|
751
|
+
const queryBatchSize = 1000;
|
|
752
|
+
|
|
753
|
+
console.log('📥 Fetching PDF files from database...');
|
|
754
|
+
|
|
755
|
+
while (hasMore) {
|
|
756
|
+
const { data: batch, error: queryError } = await supabase
|
|
757
|
+
.from('uploader')
|
|
758
|
+
.select('id, original_path, filename, file_extension, status')
|
|
759
|
+
.eq('status', 'fs-stats')
|
|
760
|
+
.eq('file_extension', 'pdf')
|
|
761
|
+
.ilike('filename', '%simp%')
|
|
762
|
+
.range(offset, offset + queryBatchSize - 1);
|
|
763
|
+
|
|
764
|
+
if (queryError) {
|
|
765
|
+
throw new Error(`Failed to fetch PDF records: ${queryError.message}`);
|
|
536
766
|
}
|
|
537
767
|
|
|
538
|
-
if (
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
|
|
768
|
+
if (!batch || batch.length === 0) {
|
|
769
|
+
hasMore = false;
|
|
770
|
+
} else {
|
|
771
|
+
allPdfRecords.push(...batch);
|
|
772
|
+
offset += queryBatchSize;
|
|
773
|
+
console.log(`📄 Fetched ${batch.length} PDF records (total: ${allPdfRecords.length})`);
|
|
542
774
|
}
|
|
775
|
+
}
|
|
543
776
|
|
|
544
|
-
|
|
777
|
+
if (allPdfRecords.length === 0) {
|
|
778
|
+
console.log('📝 No PDF files found for detection');
|
|
779
|
+
return { detectedCount: 0, processedCount: 0, errorCount: 0 };
|
|
780
|
+
}
|
|
545
781
|
|
|
546
|
-
|
|
782
|
+
console.log(`🔍 Processing ${allPdfRecords.length} PDF files for detection...`);
|
|
547
783
|
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
|
|
553
|
-
file: currentFile,
|
|
554
|
-
uploadPath,
|
|
555
|
-
status: 'skipped',
|
|
556
|
-
message: 'Already exists in bucket',
|
|
557
|
-
});
|
|
558
|
-
return result;
|
|
559
|
-
}
|
|
560
|
-
|
|
561
|
-
const { error } = await uploadWithRetry(() =>
|
|
562
|
-
supabase.storage.from(bucket).upload(uploadPath, content, {
|
|
563
|
-
upsert: true,
|
|
564
|
-
contentType,
|
|
565
|
-
metadata: {
|
|
566
|
-
originalName: path.basename(currentFile),
|
|
567
|
-
sanitizedName: path.basename(uploadPath),
|
|
568
|
-
clientPath: path.posix.join(
|
|
569
|
-
basePath,
|
|
570
|
-
folder,
|
|
571
|
-
path.relative(sourcePath, currentFile).replace(/\\/g, '/'),
|
|
572
|
-
),
|
|
573
|
-
arelaVersion: version,
|
|
574
|
-
},
|
|
575
|
-
}),
|
|
576
|
-
);
|
|
784
|
+
const detectionService = new FileDetectionService();
|
|
785
|
+
const batchSize = parseInt(options.batchSize) || 10; // Smaller batches for file I/O
|
|
786
|
+
let totalDetected = 0;
|
|
787
|
+
let totalProcessed = 0;
|
|
788
|
+
let totalErrors = 0;
|
|
577
789
|
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
|
|
591
|
-
|
|
592
|
-
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
790
|
+
// Create progress bar
|
|
791
|
+
const progressBar = new cliProgress.SingleBar({
|
|
792
|
+
format: '🔍 PDF Detection |{bar}| {percentage}% | {value}/{total} | Detected: {detected} | Errors: {errors}',
|
|
793
|
+
barCompleteChar: '█',
|
|
794
|
+
barIncompleteChar: '░',
|
|
795
|
+
hideCursor: true,
|
|
796
|
+
});
|
|
797
|
+
|
|
798
|
+
progressBar.start(allPdfRecords.length, 0, { detected: 0, errors: 0 });
|
|
799
|
+
|
|
800
|
+
// Process files in smaller batches to avoid overwhelming the system
|
|
801
|
+
for (let i = 0; i < allPdfRecords.length; i += batchSize) {
|
|
802
|
+
const batch = allPdfRecords.slice(i, i + batchSize);
|
|
803
|
+
const updatePromises = [];
|
|
804
|
+
|
|
805
|
+
for (const record of batch) {
|
|
806
|
+
try {
|
|
807
|
+
// Check if file still exists
|
|
808
|
+
if (!fs.existsSync(record.original_path)) {
|
|
809
|
+
updatePromises.push(
|
|
810
|
+
supabase
|
|
811
|
+
.from('uploader')
|
|
812
|
+
.update({
|
|
813
|
+
status: 'file-not-found',
|
|
814
|
+
message: 'File no longer exists at original path'
|
|
815
|
+
})
|
|
816
|
+
.eq('id', record.id)
|
|
817
|
+
);
|
|
818
|
+
totalErrors++;
|
|
819
|
+
continue;
|
|
820
|
+
}
|
|
821
|
+
|
|
822
|
+
// Perform detection
|
|
823
|
+
const detection = await detectionService.detectFile(record.original_path);
|
|
824
|
+
totalProcessed++;
|
|
825
|
+
|
|
826
|
+
const updateData = {
|
|
827
|
+
status: detection.detectedType ? 'detected' : 'not-detected',
|
|
828
|
+
document_type: detection.detectedType,
|
|
829
|
+
num_pedimento: detection.detectedPedimento,
|
|
830
|
+
arela_path: detection.arelaPath,
|
|
831
|
+
message: detection.error || null
|
|
832
|
+
};
|
|
833
|
+
|
|
834
|
+
// Extract RFC from fields if available
|
|
835
|
+
if (detection.fields) {
|
|
836
|
+
const rfcField = detection.fields.find(f => f.name === 'rfc' && f.found);
|
|
837
|
+
if (rfcField) {
|
|
838
|
+
updateData.rfc = rfcField.value;
|
|
839
|
+
}
|
|
840
|
+
}
|
|
841
|
+
|
|
842
|
+
if (detection.detectedType) {
|
|
843
|
+
totalDetected++;
|
|
844
|
+
}
|
|
845
|
+
|
|
846
|
+
updatePromises.push(
|
|
847
|
+
supabase
|
|
848
|
+
.from('uploader')
|
|
849
|
+
.update(updateData)
|
|
850
|
+
.eq('id', record.id)
|
|
851
|
+
);
|
|
852
|
+
|
|
853
|
+
} catch (error) {
|
|
854
|
+
console.error(`❌ Error detecting ${record.filename}:`, error.message);
|
|
855
|
+
totalErrors++;
|
|
856
|
+
|
|
857
|
+
updatePromises.push(
|
|
858
|
+
supabase
|
|
859
|
+
.from('uploader')
|
|
860
|
+
.update({
|
|
861
|
+
status: 'detection-error',
|
|
862
|
+
message: error.message
|
|
863
|
+
})
|
|
864
|
+
.eq('id', record.id)
|
|
865
|
+
);
|
|
866
|
+
}
|
|
867
|
+
}
|
|
868
|
+
|
|
869
|
+
// Execute all updates in parallel for this batch
|
|
870
|
+
try {
|
|
871
|
+
await Promise.all(updatePromises);
|
|
872
|
+
} catch (error) {
|
|
873
|
+
console.error(`❌ Error updating batch:`, error.message);
|
|
597
874
|
}
|
|
598
|
-
|
|
599
|
-
|
|
600
|
-
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
uploadPath: currentFile,
|
|
604
|
-
status: 'error',
|
|
605
|
-
message: result.error,
|
|
875
|
+
|
|
876
|
+
// Update progress
|
|
877
|
+
progressBar.update(Math.min(i + batchSize, allPdfRecords.length), {
|
|
878
|
+
detected: totalDetected,
|
|
879
|
+
errors: totalErrors
|
|
606
880
|
});
|
|
607
881
|
}
|
|
608
882
|
|
|
609
|
-
|
|
883
|
+
progressBar.stop();
|
|
884
|
+
|
|
885
|
+
console.log(`📊 Phase 2 Summary: ${totalDetected} detected, ${totalProcessed} processed, ${totalErrors} errors`);
|
|
886
|
+
return {
|
|
887
|
+
detectedCount: totalDetected,
|
|
888
|
+
processedCount: totalProcessed,
|
|
889
|
+
errorCount: totalErrors
|
|
890
|
+
};
|
|
610
891
|
};
|
|
611
892
|
|
|
612
|
-
// Function to process files in parallel batches
|
|
613
893
|
const processFilesInBatches = async (
|
|
614
894
|
files,
|
|
615
895
|
batchSize,
|
|
@@ -619,13 +899,20 @@ const processFilesInBatches = async (
|
|
|
619
899
|
sourcePath,
|
|
620
900
|
processedPaths,
|
|
621
901
|
) => {
|
|
622
|
-
let
|
|
623
|
-
let
|
|
624
|
-
let
|
|
902
|
+
let totalUploaded = 0;
|
|
903
|
+
let totalDetected = 0;
|
|
904
|
+
let totalOrganized = 0;
|
|
905
|
+
let totalErrors = 0;
|
|
906
|
+
let totalSkipped = 0;
|
|
907
|
+
|
|
908
|
+
const messageBuffer = [];
|
|
909
|
+
|
|
910
|
+
const progressBarFormat = options.statsOnly
|
|
911
|
+
? '📊 Processing [{bar}] {percentage}% | {value}/{total} files | Stats: {successCount} | Errors: {failureCount} | Duplicates: {skippedCount}'
|
|
912
|
+
: '📂 Processing [{bar}] {percentage}% | {value}/{total} files | Success: {successCount} | Errors: {failureCount} | Skipped: {skippedCount}';
|
|
625
913
|
|
|
626
914
|
const progressBar = new cliProgress.SingleBar({
|
|
627
|
-
format:
|
|
628
|
-
'📂 Processing [{bar}] {percentage}% | {value}/{total} files | Success: {successCount} | Errors: {failureCount} | Skipped: {skippedCount}',
|
|
915
|
+
format: progressBarFormat,
|
|
629
916
|
barCompleteChar: '█',
|
|
630
917
|
barIncompleteChar: '░',
|
|
631
918
|
hideCursor: true,
|
|
@@ -637,101 +924,928 @@ const processFilesInBatches = async (
|
|
|
637
924
|
skippedCount: 0,
|
|
638
925
|
});
|
|
639
926
|
|
|
640
|
-
|
|
641
|
-
|
|
927
|
+
if (options.statsOnly) {
|
|
928
|
+
// OPTIMIZED Stats-only mode - Only read filesystem stats, no file detection
|
|
929
|
+
console.log('📊 Phase 1: Processing files in optimized stats-only mode (no detection)...');
|
|
930
|
+
|
|
931
|
+
for (let i = 0; i < files.length; i += batchSize) {
|
|
932
|
+
const batch = files.slice(i, i + batchSize);
|
|
933
|
+
|
|
934
|
+
// OPTIMIZED: Batch read file stats to reduce I/O overhead
|
|
935
|
+
const fileStatsResults = batchReadFileStats(batch);
|
|
936
|
+
const statsFiles = fileStatsResults
|
|
937
|
+
.filter(result => result.stats !== null) // Only include files with valid stats
|
|
938
|
+
.map((result) => {
|
|
939
|
+
const originalFileName = path.basename(result.path);
|
|
940
|
+
|
|
941
|
+
return {
|
|
942
|
+
path: result.path,
|
|
943
|
+
originalName: originalFileName,
|
|
944
|
+
stats: result.stats, // Pass pre-computed stats to avoid redundant calls
|
|
945
|
+
};
|
|
946
|
+
});
|
|
642
947
|
|
|
643
|
-
|
|
644
|
-
|
|
645
|
-
|
|
646
|
-
|
|
647
|
-
|
|
648
|
-
|
|
649
|
-
|
|
650
|
-
|
|
651
|
-
|
|
652
|
-
|
|
653
|
-
)
|
|
654
|
-
|
|
655
|
-
|
|
948
|
+
// Log any files that couldn't be read
|
|
949
|
+
const failedFiles = fileStatsResults.filter(result => result.error !== null);
|
|
950
|
+
if (failedFiles.length > 0) {
|
|
951
|
+
console.log(`⚠️ Could not read stats for ${failedFiles.length} files in batch`);
|
|
952
|
+
failedFiles.forEach(failed => {
|
|
953
|
+
console.error(` ❌ ${failed.path}: ${failed.error}`);
|
|
954
|
+
});
|
|
955
|
+
}
|
|
956
|
+
|
|
957
|
+
try {
|
|
958
|
+
const result = await insertStatsOnlyToUploaderTable(statsFiles, options);
|
|
959
|
+
|
|
960
|
+
totalUploaded += result.totalInserted;
|
|
961
|
+
totalSkipped += result.totalSkipped;
|
|
962
|
+
totalErrors += failedFiles.length; // Count failed file reads as errors
|
|
963
|
+
|
|
964
|
+
progressBar.update(Math.min(i + batch.length, files.length), {
|
|
965
|
+
successCount: totalUploaded,
|
|
966
|
+
failureCount: totalErrors,
|
|
967
|
+
skippedCount: totalSkipped,
|
|
968
|
+
});
|
|
969
|
+
|
|
970
|
+
} catch (error) {
|
|
971
|
+
console.error(`❌ Error processing stats batch:`, error.message);
|
|
972
|
+
totalErrors += batch.length;
|
|
973
|
+
|
|
974
|
+
progressBar.update(Math.min(i + batch.length, files.length), {
|
|
975
|
+
successCount: totalUploaded,
|
|
976
|
+
failureCount: totalErrors,
|
|
977
|
+
skippedCount: totalSkipped,
|
|
978
|
+
});
|
|
979
|
+
}
|
|
980
|
+
}
|
|
981
|
+
} else if (apiMode && !options.forceSupabase) {
|
|
982
|
+
// API Mode - Process in batches
|
|
983
|
+
for (let i = 0; i < files.length; i += batchSize) {
|
|
984
|
+
const batch = files.slice(i, i + batchSize);
|
|
985
|
+
let sanitizedRelativePath;
|
|
986
|
+
|
|
987
|
+
const apiFiles = batch
|
|
988
|
+
.map((file) => {
|
|
989
|
+
const relativePathRaw = path
|
|
990
|
+
.relative(basePath, file)
|
|
991
|
+
.replace(/^[\\/]+/, '')
|
|
992
|
+
.replace(/\\/g, '/');
|
|
993
|
+
|
|
994
|
+
const pathParts = relativePathRaw.split('/');
|
|
995
|
+
const originalFileName = pathParts[pathParts.length - 1];
|
|
996
|
+
const sanitizedFileName = sanitizeFileName(originalFileName);
|
|
997
|
+
pathParts[pathParts.length - 1] = sanitizedFileName;
|
|
998
|
+
sanitizedRelativePath = pathParts.join('/');
|
|
999
|
+
|
|
1000
|
+
let uploadPath;
|
|
1001
|
+
|
|
1002
|
+
// Handle combined folder structure + auto-detection
|
|
1003
|
+
if (options.folderStructure && options.autoDetectStructure) {
|
|
1004
|
+
// OPTIMIZED: Use cached detection to avoid redundant parsing
|
|
1005
|
+
const detection = getCachedPathDetection(file, basePath);
|
|
1006
|
+
if (detection.detected) {
|
|
1007
|
+
const autoStructure = `${detection.year}/${detection.pedimento}`;
|
|
1008
|
+
const combinedStructure = `${options.folderStructure}/${autoStructure}`;
|
|
1009
|
+
uploadPath = path.posix.join(
|
|
1010
|
+
combinedStructure,
|
|
1011
|
+
sanitizedFileName,
|
|
1012
|
+
);
|
|
1013
|
+
logVerbose(
|
|
1014
|
+
`📁 Combined structure: ${options.folderStructure}/${autoStructure} for ${originalFileName} -> ${uploadPath}`,
|
|
1015
|
+
);
|
|
1016
|
+
} else {
|
|
1017
|
+
// Fallback to just custom structure if auto-detection fails
|
|
1018
|
+
uploadPath = path.posix.join(
|
|
1019
|
+
options.folderStructure,
|
|
1020
|
+
sanitizedFileName,
|
|
1021
|
+
);
|
|
1022
|
+
logVerbose(
|
|
1023
|
+
`📁 Custom structure (auto-detection failed): ${uploadPath}`,
|
|
1024
|
+
);
|
|
1025
|
+
}
|
|
1026
|
+
} else if (options.folderStructure) {
|
|
1027
|
+
// Use custom folder structure only
|
|
1028
|
+
uploadPath = path.posix.join(
|
|
1029
|
+
options.folderStructure,
|
|
1030
|
+
sanitizedFileName,
|
|
1031
|
+
);
|
|
1032
|
+
logVerbose(`📁 Custom structure: ${uploadPath}`);
|
|
1033
|
+
} else if (options.autoDetectStructure) {
|
|
1034
|
+
// Auto-detect structure from path if enabled - OPTIMIZED: Use cached detection
|
|
1035
|
+
const detection = getCachedPathDetection(file, basePath);
|
|
1036
|
+
if (detection.detected) {
|
|
1037
|
+
const autoStructure = `${detection.year}/${detection.pedimento}`;
|
|
1038
|
+
uploadPath = path.posix.join(autoStructure, sanitizedFileName);
|
|
1039
|
+
console.log(
|
|
1040
|
+
`🔍 Auto-detected: ${autoStructure} for ${originalFileName} -> ${uploadPath}`,
|
|
1041
|
+
);
|
|
1042
|
+
} else {
|
|
1043
|
+
uploadPath = options.prefix
|
|
1044
|
+
? path.posix.join(options.prefix, sanitizedRelativePath)
|
|
1045
|
+
: sanitizedRelativePath;
|
|
1046
|
+
console.log(`📁 Using relative path: ${uploadPath}`);
|
|
1047
|
+
}
|
|
1048
|
+
} else {
|
|
1049
|
+
uploadPath = options.prefix
|
|
1050
|
+
? path.posix.join(options.prefix, sanitizedRelativePath)
|
|
1051
|
+
: sanitizedRelativePath;
|
|
1052
|
+
console.log(`📁 Using standard path: ${uploadPath}`);
|
|
1053
|
+
}
|
|
1054
|
+
|
|
1055
|
+
if (processedPaths.has(uploadPath)) {
|
|
1056
|
+
totalSkipped++;
|
|
1057
|
+
writeLog(`SKIPPED: ${file} -> ${uploadPath}`);
|
|
1058
|
+
return null;
|
|
1059
|
+
}
|
|
1060
|
+
|
|
1061
|
+
return {
|
|
1062
|
+
path: file,
|
|
1063
|
+
name: sanitizedFileName,
|
|
1064
|
+
originalName: originalFileName,
|
|
1065
|
+
uploadPath: uploadPath.replace(/\\/g, '/'), // Ensure forward slashes
|
|
1066
|
+
contentType: mime.lookup(file) || 'application/octet-stream',
|
|
1067
|
+
};
|
|
1068
|
+
})
|
|
1069
|
+
.filter(Boolean);
|
|
1070
|
+
|
|
1071
|
+
if (apiFiles.length > 0) {
|
|
1072
|
+
// console.log(`🔄 Processing batch of ${apiFiles.length} files`);
|
|
1073
|
+
// apiFiles.forEach(f => console.log(` 📄 ${f.name} -> ${f.uploadPath}`));
|
|
1074
|
+
|
|
1075
|
+
try {
|
|
1076
|
+
// Use clientPath from options if specified, otherwise construct from detection or folder
|
|
1077
|
+
let clientPath = options.clientPath;
|
|
1078
|
+
|
|
1079
|
+
if (!clientPath && apiFiles.length > 0) {
|
|
1080
|
+
const firstFile = apiFiles[0];
|
|
1081
|
+
// OPTIMIZED: Use cached detection to avoid redundant parsing
|
|
1082
|
+
const detection = getCachedPathDetection(firstFile.path, basePath);
|
|
1083
|
+
if (detection.detected) {
|
|
1084
|
+
// clientPath = `${detection.year}/${detection.pedimento}/`;
|
|
1085
|
+
clientPath = path
|
|
1086
|
+
.resolve(basePath, sanitizedRelativePath)
|
|
1087
|
+
.replace(/\\/g, '/');
|
|
1088
|
+
} else {
|
|
1089
|
+
// Fallback to folder structure if no year/pedimento detected
|
|
1090
|
+
clientPath = path.resolve(basePath, folder).replace(/\\/g, '/');
|
|
1091
|
+
}
|
|
1092
|
+
}
|
|
1093
|
+
|
|
1094
|
+
const result = await uploadToApi(apiFiles, {
|
|
1095
|
+
...options,
|
|
1096
|
+
clientPath: clientPath,
|
|
1097
|
+
});
|
|
1098
|
+
|
|
1099
|
+
totalUploaded += result.stats.uploadedCount;
|
|
1100
|
+
totalDetected += result.stats.detectedCount;
|
|
1101
|
+
totalOrganized += result.stats.organizedCount;
|
|
1102
|
+
totalErrors += result.stats.errorCount;
|
|
1103
|
+
|
|
1104
|
+
result.uploaded.forEach((upload) => {
|
|
1105
|
+
const apiFile = apiFiles.find(
|
|
1106
|
+
(f) => f.name === upload.originalName,
|
|
1107
|
+
);
|
|
1108
|
+
if (apiFile) {
|
|
1109
|
+
writeLog(`SUCCESS: ${apiFile.path} -> ${apiFile.uploadPath}`);
|
|
1110
|
+
processedPaths.add(apiFile.uploadPath);
|
|
1111
|
+
}
|
|
1112
|
+
});
|
|
1113
|
+
|
|
1114
|
+
result.errors.forEach((error) => {
|
|
1115
|
+
writeLog(
|
|
1116
|
+
`ERROR: ${error.fileName}: ${error.error} (${error.step})`,
|
|
1117
|
+
);
|
|
1118
|
+
messageBuffer.push(
|
|
1119
|
+
`❌ ${error.fileName}: ${error.error} (${error.step})`,
|
|
1120
|
+
);
|
|
1121
|
+
});
|
|
1122
|
+
} catch (error) {
|
|
1123
|
+
totalErrors += apiFiles.length;
|
|
1124
|
+
apiFiles.forEach((file) => {
|
|
1125
|
+
writeLog(`ERROR: ${file.path}: ${error.message}`);
|
|
1126
|
+
messageBuffer.push(`❌ ${file.name}: ${error.message}`);
|
|
1127
|
+
});
|
|
1128
|
+
}
|
|
1129
|
+
}
|
|
1130
|
+
|
|
1131
|
+
progressBar.update(i + batch.length, {
|
|
1132
|
+
successCount: totalUploaded,
|
|
1133
|
+
failureCount: totalErrors,
|
|
1134
|
+
skippedCount: totalSkipped,
|
|
1135
|
+
});
|
|
1136
|
+
|
|
1137
|
+
if (i + batchSize < files.length) {
|
|
1138
|
+
await new Promise((resolve) => setTimeout(resolve, BATCH_DELAY));
|
|
1139
|
+
}
|
|
1140
|
+
}
|
|
1141
|
+
} else {
|
|
1142
|
+
// Direct Supabase mode
|
|
1143
|
+
for (let i = 0; i < files.length; i++) {
|
|
1144
|
+
const file = files[i];
|
|
1145
|
+
try {
|
|
1146
|
+
const relativePath = path.relative(basePath, file);
|
|
1147
|
+
let uploadPath;
|
|
1148
|
+
|
|
1149
|
+
// Handle combined folder structure + auto-detection
|
|
1150
|
+
if (options.folderStructure && options.autoDetectStructure) {
|
|
1151
|
+
const detection = getCachedPathDetection(file, basePath);
|
|
1152
|
+
if (detection.detected) {
|
|
1153
|
+
const autoStructure = `${detection.year}/${detection.pedimento}`;
|
|
1154
|
+
const combinedStructure = `${options.folderStructure}/${autoStructure}`;
|
|
1155
|
+
const fileName = path.basename(file);
|
|
1156
|
+
uploadPath = path.join(combinedStructure, fileName);
|
|
1157
|
+
console.log(
|
|
1158
|
+
`📁 Combined structure: ${options.folderStructure}/${autoStructure} for ${fileName}`,
|
|
1159
|
+
);
|
|
1160
|
+
} else {
|
|
1161
|
+
// Fallback to just custom structure if auto-detection fails
|
|
1162
|
+
const fileName = path.basename(file);
|
|
1163
|
+
uploadPath = path.join(options.folderStructure, fileName);
|
|
1164
|
+
console.log(
|
|
1165
|
+
`📁 Custom structure (auto-detection failed): ${uploadPath}`,
|
|
1166
|
+
);
|
|
1167
|
+
}
|
|
1168
|
+
} else if (options.folderStructure) {
|
|
1169
|
+
// Use custom folder structure only
|
|
1170
|
+
const fileName = path.basename(file);
|
|
1171
|
+
uploadPath = path.join(options.folderStructure, fileName);
|
|
1172
|
+
console.log(`📁 Custom structure: ${uploadPath}`);
|
|
1173
|
+
} else if (options.autoDetectStructure) {
|
|
1174
|
+
// Auto-detect structure from path if enabled - OPTIMIZED: Use cached detection
|
|
1175
|
+
const detection = getCachedPathDetection(file, basePath);
|
|
1176
|
+
if (detection.detected) {
|
|
1177
|
+
const autoStructure = `${detection.year}/${detection.pedimento}`;
|
|
1178
|
+
const fileName = path.basename(file);
|
|
1179
|
+
uploadPath = path.join(autoStructure, fileName);
|
|
1180
|
+
} else {
|
|
1181
|
+
uploadPath = options.prefix
|
|
1182
|
+
? path.join(options.prefix, relativePath)
|
|
1183
|
+
: relativePath;
|
|
1184
|
+
}
|
|
1185
|
+
} else {
|
|
1186
|
+
uploadPath = options.prefix
|
|
1187
|
+
? path.join(options.prefix, relativePath)
|
|
1188
|
+
: relativePath;
|
|
1189
|
+
}
|
|
656
1190
|
|
|
657
|
-
|
|
658
|
-
|
|
659
|
-
|
|
660
|
-
|
|
661
|
-
|
|
662
|
-
|
|
663
|
-
|
|
664
|
-
|
|
665
|
-
|
|
1191
|
+
if (processedPaths.has(uploadPath)) {
|
|
1192
|
+
totalSkipped++;
|
|
1193
|
+
writeLog(`SKIPPED: ${file} -> ${uploadPath}`);
|
|
1194
|
+
} else {
|
|
1195
|
+
await uploadToSupabase(file, uploadPath);
|
|
1196
|
+
totalUploaded++;
|
|
1197
|
+
writeLog(`SUCCESS: ${file} -> ${uploadPath}`);
|
|
1198
|
+
processedPaths.add(uploadPath);
|
|
1199
|
+
}
|
|
1200
|
+
} catch (error) {
|
|
1201
|
+
totalErrors++;
|
|
1202
|
+
writeLog(`ERROR: ${file}: ${error.message}`);
|
|
1203
|
+
messageBuffer.push(`❌ ${path.basename(file)}: ${error.message}`);
|
|
666
1204
|
}
|
|
667
1205
|
|
|
668
|
-
|
|
669
|
-
|
|
1206
|
+
progressBar.update(i + 1, {
|
|
1207
|
+
successCount: totalUploaded,
|
|
1208
|
+
failureCount: totalErrors,
|
|
1209
|
+
skippedCount: totalSkipped,
|
|
1210
|
+
});
|
|
1211
|
+
}
|
|
1212
|
+
}
|
|
1213
|
+
|
|
1214
|
+
progressBar.stop();
|
|
1215
|
+
|
|
1216
|
+
const errorMessages = messageBuffer.filter((msg) => msg.startsWith('❌'));
|
|
1217
|
+
if (errorMessages.length > 0) {
|
|
1218
|
+
console.log('\n🚨 Errors encountered during processing:');
|
|
1219
|
+
errorMessages.forEach((msg) => console.error(msg));
|
|
1220
|
+
}
|
|
1221
|
+
|
|
1222
|
+
return {
|
|
1223
|
+
successCount: totalUploaded,
|
|
1224
|
+
detectedCount: totalDetected,
|
|
1225
|
+
organizedCount: totalOrganized,
|
|
1226
|
+
failureCount: totalErrors,
|
|
1227
|
+
skippedCount: totalSkipped,
|
|
1228
|
+
};
|
|
1229
|
+
};
|
|
1230
|
+
|
|
1231
|
+
/**
|
|
1232
|
+
* Upload files to Arela API based on specific RFC values
|
|
1233
|
+
*/
|
|
1234
|
+
const uploadFilesByRfc = async (options = {}) => {
|
|
1235
|
+
if (!supabase) {
|
|
1236
|
+
console.error('❌ Supabase client not initialized');
|
|
1237
|
+
process.exit(1);
|
|
1238
|
+
}
|
|
1239
|
+
|
|
1240
|
+
if (!API_BASE_URL || !API_TOKEN) {
|
|
1241
|
+
console.error('❌ Arela API configuration missing. Please set ARELA_API_URL and ARELA_API_TOKEN environment variables.');
|
|
1242
|
+
process.exit(1);
|
|
1243
|
+
}
|
|
1244
|
+
|
|
1245
|
+
if (!uploadRfcs || uploadRfcs.length === 0) {
|
|
1246
|
+
console.error('❌ No RFCs specified. Please set UPLOAD_RFCS environment variable with pipe-separated RFC values.');
|
|
1247
|
+
console.error(' Example: UPLOAD_RFCS="RFC123456789|RFC987654321|RFC555444333"');
|
|
1248
|
+
process.exit(1);
|
|
1249
|
+
}
|
|
1250
|
+
|
|
1251
|
+
console.log('🎯 RFC-based Upload Mode');
|
|
1252
|
+
console.log(`📋 Target RFCs: ${uploadRfcs.join(', ')}`);
|
|
1253
|
+
console.log('🔍 Searching for files to upload...');
|
|
1254
|
+
|
|
1255
|
+
// Step 1: Get all records that match the specified RFCs and have arela_path
|
|
1256
|
+
const { data: rfcRecords, error: rfcError } = await supabase
|
|
1257
|
+
.from('uploader')
|
|
1258
|
+
.select('arela_path')
|
|
1259
|
+
.in('rfc', uploadRfcs)
|
|
1260
|
+
.not('arela_path', 'is', null);
|
|
1261
|
+
|
|
1262
|
+
if (rfcError) {
|
|
1263
|
+
console.error('❌ Error fetching RFC records:', rfcError.message);
|
|
1264
|
+
return { processedCount: 0, uploadedCount: 0, errorCount: 1 };
|
|
1265
|
+
}
|
|
1266
|
+
|
|
1267
|
+
if (!rfcRecords || rfcRecords.length === 0) {
|
|
1268
|
+
console.log('ℹ️ No files found for the specified RFCs with arela_path');
|
|
1269
|
+
console.log(` Make sure files for RFCs [${uploadRfcs.join(', ')}] have been processed and have arela_path values`);
|
|
1270
|
+
return { processedCount: 0, uploadedCount: 0, errorCount: 0 };
|
|
1271
|
+
}
|
|
1272
|
+
|
|
1273
|
+
// Step 2: Get unique arela_paths from the RFC matches
|
|
1274
|
+
const uniqueArelaPaths = [...new Set(rfcRecords.map(r => r.arela_path))];
|
|
1275
|
+
console.log(`� Found ${uniqueArelaPaths.length} unique arela_path(s) for the specified RFCs`);
|
|
1276
|
+
|
|
1277
|
+
// Step 3: Get ALL files that have these arela_paths (including supporting documents)
|
|
1278
|
+
// Use pagination to ensure we get all files, regardless of count
|
|
1279
|
+
let allRelatedFiles = [];
|
|
1280
|
+
let hasMore = true;
|
|
1281
|
+
let offset = 0;
|
|
1282
|
+
const queryBatchSize = 1000;
|
|
1283
|
+
|
|
1284
|
+
console.log('📥 Fetching all related files (with pagination)...');
|
|
1285
|
+
|
|
1286
|
+
while (hasMore) {
|
|
1287
|
+
const { data: batch, error: queryError } = await supabase
|
|
1288
|
+
.from('uploader')
|
|
1289
|
+
.select('id, original_path, arela_path, filename, rfc, document_type')
|
|
1290
|
+
.in('arela_path', uniqueArelaPaths)
|
|
1291
|
+
.not('original_path', 'is', null)
|
|
1292
|
+
.range(offset, offset + queryBatchSize - 1);
|
|
1293
|
+
|
|
1294
|
+
if (queryError) {
|
|
1295
|
+
console.error('❌ Error fetching related files:', queryError.message);
|
|
1296
|
+
return { processedCount: 0, uploadedCount: 0, errorCount: 1 };
|
|
1297
|
+
}
|
|
1298
|
+
|
|
1299
|
+
if (!batch || batch.length === 0) {
|
|
1300
|
+
hasMore = false;
|
|
1301
|
+
} else {
|
|
1302
|
+
allRelatedFiles = allRelatedFiles.concat(batch);
|
|
1303
|
+
offset += queryBatchSize;
|
|
1304
|
+
|
|
1305
|
+
// If we got less than queryBatchSize, we've reached the end
|
|
1306
|
+
if (batch.length < queryBatchSize) {
|
|
1307
|
+
hasMore = false;
|
|
670
1308
|
}
|
|
671
1309
|
}
|
|
1310
|
+
}
|
|
1311
|
+
|
|
1312
|
+
if (!allRelatedFiles || allRelatedFiles.length === 0) {
|
|
1313
|
+
console.log('ℹ️ No related files found for the arela_paths');
|
|
1314
|
+
return { processedCount: 0, uploadedCount: 0, errorCount: 0 };
|
|
1315
|
+
}
|
|
1316
|
+
|
|
1317
|
+
console.log(`📁 Found ${allRelatedFiles.length} total files to upload (including supporting documents)`);
|
|
1318
|
+
|
|
1319
|
+
// Group by RFC and arela_path for better organization
|
|
1320
|
+
const filesByRfc = allRelatedFiles.reduce((acc, record) => {
|
|
1321
|
+
const rfc = record.rfc || 'No RFC';
|
|
1322
|
+
if (!acc[rfc]) {
|
|
1323
|
+
acc[rfc] = [];
|
|
1324
|
+
}
|
|
1325
|
+
acc[rfc].push(record);
|
|
1326
|
+
return acc;
|
|
1327
|
+
}, {});
|
|
1328
|
+
|
|
1329
|
+
console.log('📊 Files by RFC (including supporting documents):');
|
|
1330
|
+
for (const [rfc, files] of Object.entries(filesByRfc)) {
|
|
1331
|
+
const documentTypes = [...new Set(files.map(f => f.document_type || 'Unknown'))];
|
|
1332
|
+
console.log(` ${rfc}: ${files.length} files (${documentTypes.join(', ')})`);
|
|
1333
|
+
}
|
|
1334
|
+
|
|
1335
|
+
// Group by arela_path for upload organization
|
|
1336
|
+
const filesByPath = allRelatedFiles.reduce((acc, record) => {
|
|
1337
|
+
const path = record.arela_path;
|
|
1338
|
+
if (!acc[path]) {
|
|
1339
|
+
acc[path] = [];
|
|
1340
|
+
}
|
|
1341
|
+
acc[path].push(record);
|
|
1342
|
+
return acc;
|
|
1343
|
+
}, {});
|
|
1344
|
+
|
|
1345
|
+
console.log('� Files grouped by arela_path:');
|
|
1346
|
+
for (const [path, files] of Object.entries(filesByPath)) {
|
|
1347
|
+
console.log(` ${path}: ${files.length} files`);
|
|
1348
|
+
}
|
|
1349
|
+
|
|
1350
|
+
let totalProcessed = 0;
|
|
1351
|
+
let totalUploaded = 0;
|
|
1352
|
+
let totalErrors = 0;
|
|
1353
|
+
let totalSkipped = 0;
|
|
1354
|
+
|
|
1355
|
+
// Create progress bar
|
|
1356
|
+
const progressBar = new cliProgress.SingleBar({
|
|
1357
|
+
format: '🚀 Uploading files |{bar}| {percentage}% | {value}/{total} | Uploaded: {uploaded} | Errors: {errors} | Skipped: {skipped}',
|
|
1358
|
+
barCompleteChar: '█',
|
|
1359
|
+
barIncompleteChar: '░',
|
|
1360
|
+
hideCursor: true,
|
|
1361
|
+
});
|
|
672
1362
|
|
|
673
|
-
|
|
674
|
-
|
|
675
|
-
|
|
676
|
-
|
|
1363
|
+
if (options.showProgress !== false) {
|
|
1364
|
+
progressBar.start(allRelatedFiles.length, 0, {
|
|
1365
|
+
uploaded: 0,
|
|
1366
|
+
errors: 0,
|
|
1367
|
+
skipped: 0,
|
|
677
1368
|
});
|
|
1369
|
+
}
|
|
1370
|
+
|
|
1371
|
+
const batchSize = parseInt(options.batchSize) || 10;
|
|
1372
|
+
console.log(`📦 Processing in batches of ${batchSize} files`);
|
|
1373
|
+
|
|
1374
|
+
// Process files in batches
|
|
1375
|
+
for (let i = 0; i < allRelatedFiles.length; i += batchSize) {
|
|
1376
|
+
const batch = allRelatedFiles.slice(i, i + batchSize);
|
|
1377
|
+
const batchNumber = Math.floor(i / batchSize) + 1;
|
|
1378
|
+
const totalBatches = Math.ceil(allRelatedFiles.length / batchSize);
|
|
1379
|
+
|
|
1380
|
+
console.log(`\n📦 Processing batch ${batchNumber}/${totalBatches} (${batch.length} files)`);
|
|
1381
|
+
|
|
1382
|
+
// Prepare files for upload
|
|
1383
|
+
const filesToUpload = [];
|
|
1384
|
+
|
|
1385
|
+
for (const record of batch) {
|
|
1386
|
+
totalProcessed++;
|
|
1387
|
+
|
|
1388
|
+
try {
|
|
1389
|
+
const originalPath = record.original_path;
|
|
1390
|
+
|
|
1391
|
+
// Check if file exists
|
|
1392
|
+
if (!fs.existsSync(originalPath)) {
|
|
1393
|
+
console.log(` ⚠️ File not found: ${originalPath}`);
|
|
1394
|
+
totalSkipped++;
|
|
1395
|
+
continue;
|
|
1396
|
+
}
|
|
1397
|
+
|
|
1398
|
+
// OPTIMIZED: Read file and get size from buffer instead of separate fs.statSync call
|
|
1399
|
+
const fileBuffer = fs.readFileSync(originalPath);
|
|
1400
|
+
|
|
1401
|
+
filesToUpload.push({
|
|
1402
|
+
path: originalPath,
|
|
1403
|
+
buffer: fileBuffer,
|
|
1404
|
+
size: fileBuffer.length, // Get size from buffer instead of fs.statSync
|
|
1405
|
+
name: record.filename,
|
|
1406
|
+
arelaPath: record.arela_path,
|
|
1407
|
+
rfc: record.rfc,
|
|
1408
|
+
documentType: record.document_type,
|
|
1409
|
+
});
|
|
1410
|
+
|
|
1411
|
+
} catch (error) {
|
|
1412
|
+
console.error(` ❌ Error reading file ${record.original_path}:`, error.message);
|
|
1413
|
+
totalErrors++;
|
|
1414
|
+
}
|
|
1415
|
+
|
|
1416
|
+
if (options.showProgress !== false) {
|
|
1417
|
+
progressBar.update(totalProcessed, {
|
|
1418
|
+
uploaded: totalUploaded,
|
|
1419
|
+
errors: totalErrors,
|
|
1420
|
+
skipped: totalSkipped,
|
|
1421
|
+
});
|
|
1422
|
+
}
|
|
1423
|
+
}
|
|
1424
|
+
|
|
1425
|
+
// Upload the batch if we have files
|
|
1426
|
+
if (filesToUpload.length > 0) {
|
|
1427
|
+
try {
|
|
1428
|
+
console.log(` 🚀 Uploading ${filesToUpload.length} files to Arela API...`);
|
|
1429
|
+
|
|
1430
|
+
const formData = new FormData();
|
|
1431
|
+
|
|
1432
|
+
// Add files to form data
|
|
1433
|
+
filesToUpload.forEach((file, index) => {
|
|
1434
|
+
formData.append(`files`, file.buffer, {
|
|
1435
|
+
filename: file.name,
|
|
1436
|
+
contentType: mime.lookup(file.name) || 'application/octet-stream',
|
|
1437
|
+
});
|
|
1438
|
+
});
|
|
1439
|
+
|
|
1440
|
+
// Instead of using per-file folder structures, we'll group by arela_path and upload separately
|
|
1441
|
+
// Group files by their arela_path to upload them in correct structure
|
|
1442
|
+
const filesByPath = filesToUpload.reduce((acc, file) => {
|
|
1443
|
+
const path = file.arelaPath.replace(/\/$/, '');
|
|
1444
|
+
if (!acc[path]) {
|
|
1445
|
+
acc[path] = [];
|
|
1446
|
+
}
|
|
1447
|
+
acc[path].push(file);
|
|
1448
|
+
return acc;
|
|
1449
|
+
}, {});
|
|
1450
|
+
|
|
1451
|
+
// Upload each group separately with its folder structure
|
|
1452
|
+
for (const [arelaPath, pathFiles] of Object.entries(filesByPath)) {
|
|
1453
|
+
const pathFormData = new FormData();
|
|
1454
|
+
|
|
1455
|
+
pathFiles.forEach((file) => {
|
|
1456
|
+
pathFormData.append('files', file.buffer, {
|
|
1457
|
+
filename: file.name,
|
|
1458
|
+
contentType: mime.lookup(file.name) || 'application/octet-stream',
|
|
1459
|
+
});
|
|
1460
|
+
});
|
|
1461
|
+
|
|
1462
|
+
// Set folder structure for this group - concatenate custom prefix with arela_path
|
|
1463
|
+
const folderStructure = options.folderStructure
|
|
1464
|
+
? `${options.folderStructure}/${arelaPath}`.replace(/\/+/g, '/').replace(/\/$/, '')
|
|
1465
|
+
: arelaPath;
|
|
1466
|
+
pathFormData.append('folderStructure', folderStructure);
|
|
1467
|
+
pathFormData.append('autoDetect', 'true');
|
|
1468
|
+
pathFormData.append('autoOrganize', 'false');
|
|
1469
|
+
pathFormData.append('batchSize', String(pathFiles.length));
|
|
1470
|
+
pathFormData.append('clientVersion', packageVersion);
|
|
1471
|
+
if (bucket) {
|
|
1472
|
+
pathFormData.append('bucket', bucket);
|
|
1473
|
+
}
|
|
678
1474
|
|
|
679
|
-
|
|
680
|
-
|
|
681
|
-
|
|
682
|
-
|
|
683
|
-
|
|
1475
|
+
console.log(` 📁 Uploading ${pathFiles.length} files to: ${folderStructure}`);
|
|
1476
|
+
|
|
1477
|
+
const response = await fetch(`${API_BASE_URL}/api/storage/batch-upload-and-process`, {
|
|
1478
|
+
method: 'POST',
|
|
1479
|
+
headers: {
|
|
1480
|
+
'x-api-key': API_TOKEN,
|
|
1481
|
+
},
|
|
1482
|
+
body: pathFormData,
|
|
1483
|
+
});
|
|
1484
|
+
|
|
1485
|
+
if (!response.ok) {
|
|
1486
|
+
const errorText = await response.text();
|
|
1487
|
+
throw new Error(`HTTP ${response.status}: ${errorText}`);
|
|
1488
|
+
}
|
|
1489
|
+
|
|
1490
|
+
const result = await response.json();
|
|
1491
|
+
|
|
1492
|
+
// Check if upload was successful based on stats rather than success field
|
|
1493
|
+
const isSuccessful = result.stats && result.stats.uploadedCount > 0 && result.stats.errorCount === 0;
|
|
1494
|
+
|
|
1495
|
+
if (isSuccessful) {
|
|
1496
|
+
console.log(` ✅ Group uploaded: ${result.stats.uploadedCount} files to ${folderStructure}`);
|
|
1497
|
+
totalUploaded += result.stats.uploadedCount;
|
|
1498
|
+
|
|
1499
|
+
if (result.stats.detectedCount > 0) {
|
|
1500
|
+
console.log(` 🔍 Files detected: ${result.stats.detectedCount}`);
|
|
1501
|
+
}
|
|
1502
|
+
if (result.stats.organizedCount > 0) {
|
|
1503
|
+
console.log(` 📁 Files organized: ${result.stats.organizedCount}`);
|
|
1504
|
+
}
|
|
1505
|
+
} else {
|
|
1506
|
+
console.error(` ❌ Upload failed for ${folderStructure}:`);
|
|
1507
|
+
if (result.errors && result.errors.length > 0) {
|
|
1508
|
+
result.errors.forEach(error => {
|
|
1509
|
+
console.error(` - ${error.fileName}: ${error.error}`);
|
|
1510
|
+
});
|
|
1511
|
+
}
|
|
1512
|
+
totalErrors += pathFiles.length;
|
|
1513
|
+
}
|
|
1514
|
+
|
|
1515
|
+
// Small delay between path groups
|
|
1516
|
+
await new Promise(resolve => setTimeout(resolve, 100));
|
|
1517
|
+
}
|
|
1518
|
+
|
|
1519
|
+
} catch (error) {
|
|
1520
|
+
console.error(` ❌ Error uploading batch ${batchNumber}:`, error.message);
|
|
1521
|
+
totalErrors += filesToUpload.length;
|
|
1522
|
+
}
|
|
684
1523
|
}
|
|
685
1524
|
|
|
686
|
-
// Small delay between batches
|
|
687
|
-
if (i + batchSize <
|
|
688
|
-
await
|
|
1525
|
+
// Small delay between batches
|
|
1526
|
+
if (i + batchSize < allRelatedFiles.length) {
|
|
1527
|
+
await new Promise(resolve => setTimeout(resolve, BATCH_DELAY));
|
|
689
1528
|
}
|
|
690
1529
|
}
|
|
691
1530
|
|
|
692
|
-
|
|
1531
|
+
if (options.showProgress !== false) {
|
|
1532
|
+
progressBar.stop();
|
|
1533
|
+
}
|
|
1534
|
+
|
|
1535
|
+
console.log(`\n${'='.repeat(60)}`);
|
|
1536
|
+
console.log(`🎯 RFC-BASED UPLOAD COMPLETED`);
|
|
1537
|
+
console.log(`${'='.repeat(60)}`);
|
|
1538
|
+
console.log(` 📋 Files processed: ${totalProcessed}`);
|
|
1539
|
+
console.log(` ✅ Files uploaded: ${totalUploaded}`);
|
|
1540
|
+
console.log(` ⏭️ Files skipped: ${totalSkipped}`);
|
|
1541
|
+
console.log(` ❌ Errors: ${totalErrors}`);
|
|
1542
|
+
console.log(`${'='.repeat(60)}\n`);
|
|
1543
|
+
|
|
1544
|
+
return {
|
|
1545
|
+
processedCount: totalProcessed,
|
|
1546
|
+
uploadedCount: totalUploaded,
|
|
1547
|
+
skippedCount: totalSkipped,
|
|
1548
|
+
errorCount: totalErrors,
|
|
1549
|
+
};
|
|
1550
|
+
};
|
|
1551
|
+
|
|
1552
|
+
/**
|
|
1553
|
+
* Propagate arela_path from pedimento_simplificado records to related files with same base path
|
|
1554
|
+
*/
|
|
1555
|
+
const propagateArelaPath = async (options = {}) => {
|
|
1556
|
+
if (!supabase) {
|
|
1557
|
+
console.error('❌ Supabase client not initialized');
|
|
1558
|
+
process.exit(1);
|
|
1559
|
+
}
|
|
1560
|
+
|
|
1561
|
+
console.log('🔍 Finding pedimento_simplificado records with arela_path...');
|
|
1562
|
+
|
|
1563
|
+
// Get all pedimento_simplificado records that have arela_path
|
|
1564
|
+
const { data: pedimentoRecords, error: pedimentoError } = await supabase
|
|
1565
|
+
.from('uploader')
|
|
1566
|
+
.select('id, original_path, arela_path, filename')
|
|
1567
|
+
.eq('document_type', 'pedimento_simplificado')
|
|
1568
|
+
.not('arela_path', 'is', null);
|
|
1569
|
+
|
|
1570
|
+
if (pedimentoError) {
|
|
1571
|
+
console.error('❌ Error fetching pedimento records:', pedimentoError.message);
|
|
1572
|
+
return { processedCount: 0, updatedCount: 0, errorCount: 1 };
|
|
1573
|
+
}
|
|
1574
|
+
|
|
1575
|
+
if (!pedimentoRecords || pedimentoRecords.length === 0) {
|
|
1576
|
+
console.log('ℹ️ No pedimento_simplificado records with arela_path found');
|
|
1577
|
+
return { processedCount: 0, updatedCount: 0, errorCount: 0 };
|
|
1578
|
+
}
|
|
1579
|
+
|
|
1580
|
+
console.log(`📋 Found ${pedimentoRecords.length} pedimento records with arela_path`);
|
|
1581
|
+
|
|
1582
|
+
let totalProcessed = 0;
|
|
1583
|
+
let totalUpdated = 0;
|
|
1584
|
+
let totalErrors = 0;
|
|
1585
|
+
|
|
1586
|
+
// Create progress bar
|
|
1587
|
+
const progressBar = new cliProgress.SingleBar({
|
|
1588
|
+
format: '🔄 Propagating paths |{bar}| {percentage}% | {value}/{total} | Updated: {updated} | Errors: {errors}',
|
|
1589
|
+
barCompleteChar: '█',
|
|
1590
|
+
barIncompleteChar: '░',
|
|
1591
|
+
hideCursor: true,
|
|
1592
|
+
});
|
|
1593
|
+
|
|
1594
|
+
if (options.showProgress !== false) {
|
|
1595
|
+
progressBar.start(pedimentoRecords.length, 0, {
|
|
1596
|
+
updated: 0,
|
|
1597
|
+
errors: 0,
|
|
1598
|
+
});
|
|
1599
|
+
}
|
|
1600
|
+
|
|
1601
|
+
// Process each pedimento record
|
|
1602
|
+
for (const pedimento of pedimentoRecords) {
|
|
1603
|
+
try {
|
|
1604
|
+
totalProcessed++;
|
|
1605
|
+
|
|
1606
|
+
// Extract base path from original_path (remove filename)
|
|
1607
|
+
const basePath = path.dirname(pedimento.original_path);
|
|
1608
|
+
|
|
1609
|
+
console.log(`\n🔍 Processing: ${pedimento.filename}`);
|
|
1610
|
+
console.log(` 📁 Base path: ${basePath}`);
|
|
1611
|
+
|
|
1612
|
+
// Extract folder part from existing arela_path by removing the filename
|
|
1613
|
+
const existingPath = pedimento.arela_path;
|
|
1614
|
+
const folderArelaPath = existingPath.includes('/') ?
|
|
1615
|
+
existingPath.substring(0, existingPath.lastIndexOf('/')) + '/' :
|
|
1616
|
+
existingPath.endsWith('/') ? existingPath : existingPath + '/';
|
|
1617
|
+
|
|
1618
|
+
console.log(` 🎯 Original arela path: ${existingPath}`);
|
|
1619
|
+
console.log(` 📁 Folder arela path: ${folderArelaPath}`);
|
|
1620
|
+
|
|
1621
|
+
// Find all files with the same base path that don't have arela_path yet
|
|
1622
|
+
const { data: relatedFiles, error: relatedError } = await supabase
|
|
1623
|
+
.from('uploader')
|
|
1624
|
+
.select('id, filename, original_path')
|
|
1625
|
+
.like('original_path', `${basePath}%`)
|
|
1626
|
+
.is('arela_path', null)
|
|
1627
|
+
.neq('id', pedimento.id); // Exclude the pedimento itself
|
|
1628
|
+
|
|
1629
|
+
if (relatedError) {
|
|
1630
|
+
console.error(`❌ Error finding related files for ${pedimento.filename}:`, relatedError.message);
|
|
1631
|
+
totalErrors++;
|
|
1632
|
+
continue;
|
|
1633
|
+
}
|
|
1634
|
+
|
|
1635
|
+
if (!relatedFiles || relatedFiles.length === 0) {
|
|
1636
|
+
console.log(` ℹ️ No related files found needing arela_path update`);
|
|
1637
|
+
continue;
|
|
1638
|
+
}
|
|
1639
|
+
|
|
1640
|
+
console.log(` 📄 Found ${relatedFiles.length} related files to update:`);
|
|
1641
|
+
|
|
1642
|
+
// Show first 10 files, then indicate if there are more
|
|
1643
|
+
const filesToShow = relatedFiles.slice(0, 10);
|
|
1644
|
+
filesToShow.forEach(file => {
|
|
1645
|
+
console.log(` - ${file.filename}`);
|
|
1646
|
+
});
|
|
1647
|
+
|
|
1648
|
+
if (relatedFiles.length > 10) {
|
|
1649
|
+
console.log(` ... and ${relatedFiles.length - 10} more files`);
|
|
1650
|
+
}
|
|
1651
|
+
|
|
1652
|
+
// Process files in batches to avoid URI length limitations
|
|
1653
|
+
const BATCH_SIZE = 50; // Process 50 files at a time
|
|
1654
|
+
const fileIds = relatedFiles.map(f => f.id);
|
|
1655
|
+
let batchErrors = 0;
|
|
1656
|
+
let batchUpdated = 0;
|
|
1657
|
+
|
|
1658
|
+
console.log(` 🔄 Processing ${relatedFiles.length} files in batches of ${BATCH_SIZE}...`);
|
|
1659
|
+
|
|
1660
|
+
for (let i = 0; i < fileIds.length; i += BATCH_SIZE) {
|
|
1661
|
+
const batchIds = fileIds.slice(i, i + BATCH_SIZE);
|
|
1662
|
+
const batchNumber = Math.floor(i / BATCH_SIZE) + 1;
|
|
1663
|
+
const totalBatches = Math.ceil(fileIds.length / BATCH_SIZE);
|
|
1664
|
+
|
|
1665
|
+
console.log(` 📦 Batch ${batchNumber}/${totalBatches}: Updating ${batchIds.length} files...`);
|
|
1666
|
+
|
|
1667
|
+
try {
|
|
1668
|
+
const { error: updateError } = await supabase
|
|
1669
|
+
.from('uploader')
|
|
1670
|
+
.update({ arela_path: folderArelaPath })
|
|
1671
|
+
.in('id', batchIds);
|
|
1672
|
+
|
|
1673
|
+
if (updateError) {
|
|
1674
|
+
console.error(` ❌ Error in batch ${batchNumber}:`, updateError.message);
|
|
1675
|
+
batchErrors++;
|
|
1676
|
+
} else {
|
|
1677
|
+
console.log(` ✅ Batch ${batchNumber} completed: ${batchIds.length} files updated`);
|
|
1678
|
+
batchUpdated += batchIds.length;
|
|
1679
|
+
}
|
|
1680
|
+
} catch (error) {
|
|
1681
|
+
console.error(` ❌ Exception in batch ${batchNumber}:`, error.message);
|
|
1682
|
+
batchErrors++;
|
|
1683
|
+
}
|
|
1684
|
+
|
|
1685
|
+
// Small delay between batches to avoid overwhelming the database
|
|
1686
|
+
if (i + BATCH_SIZE < fileIds.length) {
|
|
1687
|
+
await new Promise(resolve => setTimeout(resolve, 100));
|
|
1688
|
+
}
|
|
1689
|
+
}
|
|
1690
|
+
|
|
1691
|
+
if (batchErrors > 0) {
|
|
1692
|
+
console.error(`❌ ${batchErrors} batch(es) failed for ${pedimento.filename}`);
|
|
1693
|
+
totalErrors++;
|
|
1694
|
+
} else {
|
|
1695
|
+
console.log(` 🎯 Successfully updated ${batchUpdated} related files`);
|
|
1696
|
+
totalUpdated += batchUpdated;
|
|
1697
|
+
}
|
|
1698
|
+
|
|
1699
|
+
} catch (error) {
|
|
1700
|
+
console.error(`❌ Error processing ${pedimento.filename}:`, error.message);
|
|
1701
|
+
totalErrors++;
|
|
1702
|
+
}
|
|
1703
|
+
|
|
1704
|
+
if (options.showProgress !== false) {
|
|
1705
|
+
progressBar.update(totalProcessed, {
|
|
1706
|
+
updated: totalUpdated,
|
|
1707
|
+
errors: totalErrors,
|
|
1708
|
+
});
|
|
1709
|
+
}
|
|
1710
|
+
}
|
|
1711
|
+
|
|
1712
|
+
if (options.showProgress !== false) {
|
|
1713
|
+
progressBar.stop();
|
|
1714
|
+
}
|
|
1715
|
+
|
|
1716
|
+
console.log(`\n${'='.repeat(60)}`);
|
|
1717
|
+
console.log(`🎯 ARELA PATH PROPAGATION COMPLETED`);
|
|
1718
|
+
console.log(`${'='.repeat(60)}`);
|
|
1719
|
+
console.log(` 📋 Pedimento records processed: ${totalProcessed}`);
|
|
1720
|
+
console.log(` ✅ Related files updated: ${totalUpdated}`);
|
|
1721
|
+
console.log(` ❌ Errors: ${totalErrors}`);
|
|
1722
|
+
console.log(`${'='.repeat(60)}\n`);
|
|
693
1723
|
|
|
694
1724
|
return {
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
|
|
1725
|
+
processedCount: totalProcessed,
|
|
1726
|
+
updatedCount: totalUpdated,
|
|
1727
|
+
errorCount: totalErrors,
|
|
698
1728
|
};
|
|
699
1729
|
};
|
|
700
1730
|
|
|
701
1731
|
program
|
|
702
|
-
.name('
|
|
703
|
-
.description(
|
|
1732
|
+
.name('arela-uploader')
|
|
1733
|
+
.description(
|
|
1734
|
+
'CLI to upload folders to Arela API or Supabase Storage with automatic processing',
|
|
1735
|
+
)
|
|
704
1736
|
.option('-v, --version', 'output the version number')
|
|
705
1737
|
.option('-p, --prefix <prefix>', 'Prefix path in bucket', '')
|
|
1738
|
+
.option('-b, --bucket <bucket>', 'Bucket name override')
|
|
1739
|
+
.option('--force-supabase', 'Force direct Supabase upload (skip API)')
|
|
706
1740
|
.option(
|
|
707
|
-
'
|
|
708
|
-
'
|
|
1741
|
+
'--no-auto-detect',
|
|
1742
|
+
'Disable automatic file detection (API mode only)',
|
|
709
1743
|
)
|
|
710
1744
|
.option(
|
|
711
|
-
'--
|
|
712
|
-
'
|
|
1745
|
+
'--no-auto-organize',
|
|
1746
|
+
'Disable automatic file organization (API mode only)',
|
|
713
1747
|
)
|
|
714
1748
|
.option(
|
|
715
1749
|
'-c, --concurrency <number>',
|
|
716
|
-
'
|
|
717
|
-
'
|
|
1750
|
+
'Files per batch for processing (default: 10)',
|
|
1751
|
+
'10',
|
|
718
1752
|
)
|
|
1753
|
+
.option('--batch-size <number>', 'API batch size (default: 10)', '10')
|
|
1754
|
+
.option('--show-stats', 'Show detailed processing statistics')
|
|
719
1755
|
.option(
|
|
720
|
-
'--
|
|
721
|
-
'
|
|
1756
|
+
'--folder-structure <structure>',
|
|
1757
|
+
'Custom folder structure (e.g., "2024/4023260" or "cliente1/pedimentos")',
|
|
722
1758
|
)
|
|
723
1759
|
.option(
|
|
724
|
-
'--
|
|
725
|
-
'
|
|
726
|
-
'50',
|
|
1760
|
+
'--auto-detect-structure',
|
|
1761
|
+
'Automatically detect year/pedimento from file paths',
|
|
727
1762
|
)
|
|
1763
|
+
.option('--client-path <path>', 'Client path for metadata tracking')
|
|
1764
|
+
.option('--stats-only', 'Phase 1: Only read filesystem stats and insert to database (no file reading or detection)')
|
|
1765
|
+
.option('--no-detect', 'Disable document type detection in stats-only mode')
|
|
1766
|
+
.option('--detect-pdfs', 'Phase 2: Process PDF files in database for pedimento-simplificado detection')
|
|
1767
|
+
.option('--propagate-arela-path', 'Phase 3: Propagate arela_path from pedimento_simplificado records to related files with same base path')
|
|
1768
|
+
.option('--upload-by-rfc', 'Phase 4: Upload files to Arela API based on RFC values from UPLOAD_RFCS environment variable')
|
|
1769
|
+
.option('--run-all-phases', 'Run all 4 phases in sequence: stats → detect → propagate → upload')
|
|
728
1770
|
.action(async (options) => {
|
|
729
|
-
// Handle version option
|
|
730
1771
|
if (options.version) {
|
|
731
|
-
console.log(
|
|
1772
|
+
console.log(packageVersion);
|
|
732
1773
|
process.exit(0);
|
|
733
1774
|
}
|
|
734
1775
|
|
|
1776
|
+
// Handle detect-pdfs option (Phase 2)
|
|
1777
|
+
if (options.detectPdfs) {
|
|
1778
|
+
console.log('🔍 Starting Phase 2: PDF Detection');
|
|
1779
|
+
await checkCredentials(true); // Force Supabase mode
|
|
1780
|
+
|
|
1781
|
+
const result = await detectPedimentosInDatabase({
|
|
1782
|
+
batchSize: parseInt(options.batchSize) || 10,
|
|
1783
|
+
});
|
|
1784
|
+
|
|
1785
|
+
console.log(`✅ Phase 2 Complete: ${result.detectedCount} detected, ${result.errorCount} errors`);
|
|
1786
|
+
return;
|
|
1787
|
+
}
|
|
1788
|
+
|
|
1789
|
+
// Handle run-all-phases option
|
|
1790
|
+
if (options.runAllPhases) {
|
|
1791
|
+
console.log('🚀 Starting all 4 phases in sequence...');
|
|
1792
|
+
await checkCredentials(true); // Force Supabase mode
|
|
1793
|
+
|
|
1794
|
+
// Phase 1: Stats collection
|
|
1795
|
+
console.log('\n📊 === PHASE 1: Filesystem Stats ===');
|
|
1796
|
+
options.statsOnly = true;
|
|
1797
|
+
// Continue with normal processing to run Phase 1
|
|
1798
|
+
|
|
1799
|
+
// The rest will be handled after Phase 1 completes
|
|
1800
|
+
}
|
|
1801
|
+
|
|
1802
|
+
// Handle propagate-arela-path option
|
|
1803
|
+
if (options.propagateArelaPath) {
|
|
1804
|
+
// Initialize Supabase credentials for propagation
|
|
1805
|
+
await checkCredentials(true); // Force Supabase mode
|
|
1806
|
+
|
|
1807
|
+
const result = await propagateArelaPath({
|
|
1808
|
+
showProgress: options.showStats || true,
|
|
1809
|
+
});
|
|
1810
|
+
|
|
1811
|
+
if (result.errorCount > 0) {
|
|
1812
|
+
process.exit(1);
|
|
1813
|
+
}
|
|
1814
|
+
return;
|
|
1815
|
+
}
|
|
1816
|
+
|
|
1817
|
+
// Handle upload-by-rfc option
|
|
1818
|
+
if (options.uploadByRfc) {
|
|
1819
|
+
// RFC upload needs both Supabase (for database queries) and API (for uploads)
|
|
1820
|
+
await checkCredentials(false); // Initialize API mode
|
|
1821
|
+
|
|
1822
|
+
// Also initialize Supabase for database queries
|
|
1823
|
+
if (!supabase) {
|
|
1824
|
+
if (!supabaseUrl || !supabaseKey) {
|
|
1825
|
+
console.error('❌ RFC upload requires Supabase credentials for database queries.');
|
|
1826
|
+
console.error(' Please set SUPABASE_URL and SUPABASE_KEY environment variables.');
|
|
1827
|
+
process.exit(1);
|
|
1828
|
+
}
|
|
1829
|
+
|
|
1830
|
+
supabase = createClient(supabaseUrl, supabaseKey);
|
|
1831
|
+
console.log('✅ Connected to Supabase for database queries');
|
|
1832
|
+
}
|
|
1833
|
+
|
|
1834
|
+
const result = await uploadFilesByRfc({
|
|
1835
|
+
showProgress: options.showStats || true,
|
|
1836
|
+
batchSize: parseInt(options.batchSize) || 10,
|
|
1837
|
+
folderStructure: options.folderStructure,
|
|
1838
|
+
});
|
|
1839
|
+
|
|
1840
|
+
if (result.errorCount > 0) {
|
|
1841
|
+
process.exit(1);
|
|
1842
|
+
}
|
|
1843
|
+
return;
|
|
1844
|
+
}
|
|
1845
|
+
|
|
1846
|
+
// Initialize credentials with force supabase flag (for stats mode, always need Supabase)
|
|
1847
|
+
await checkCredentials(options.forceSupabase || options.statsOnly);
|
|
1848
|
+
|
|
735
1849
|
if (!basePath || !sources || sources.length === 0) {
|
|
736
1850
|
console.error(
|
|
737
1851
|
'⚠️ UPLOAD_BASE_PATH or UPLOAD_SOURCES not defined in environment variables.',
|
|
@@ -739,18 +1853,31 @@ program
|
|
|
739
1853
|
process.exit(1);
|
|
740
1854
|
}
|
|
741
1855
|
|
|
742
|
-
const
|
|
743
|
-
const
|
|
744
|
-
|
|
745
|
-
// Configure log batcher with custom batch size
|
|
746
|
-
logBatcher.batchSize = batchSize;
|
|
1856
|
+
const batchSize = parseInt(options.batchSize) || 10;
|
|
1857
|
+
const concurrency = parseInt(options.concurrency) || 10;
|
|
747
1858
|
|
|
748
|
-
|
|
749
|
-
|
|
1859
|
+
if (options.statsOnly) {
|
|
1860
|
+
console.log('📊 Mode: Stats Only - Reading file stats and inserting to uploader table');
|
|
1861
|
+
console.log('🚫 Files will NOT be uploaded');
|
|
1862
|
+
if (options.detect !== false) {
|
|
1863
|
+
console.log('🔍 Document type detection ENABLED for supported files');
|
|
1864
|
+
} else {
|
|
1865
|
+
console.log('🔍 Document type detection DISABLED');
|
|
1866
|
+
}
|
|
1867
|
+
} else {
|
|
1868
|
+
console.log(
|
|
1869
|
+
`🚀 Mode: ${apiMode ? 'Arela API with auto-processing' : 'Direct Supabase'}`,
|
|
1870
|
+
);
|
|
1871
|
+
}
|
|
1872
|
+
console.log(`📦 Batch size: ${batchSize}`);
|
|
1873
|
+
console.log(`⚡ Concurrency: ${concurrency}`);
|
|
750
1874
|
|
|
751
|
-
const processedPaths =
|
|
1875
|
+
const processedPaths = getProcessedPaths();
|
|
752
1876
|
let globalSuccess = 0;
|
|
1877
|
+
let globalDetected = 0;
|
|
1878
|
+
let globalOrganized = 0;
|
|
753
1879
|
let globalFailure = 0;
|
|
1880
|
+
let globalSkipped = 0;
|
|
754
1881
|
|
|
755
1882
|
for (const folder of sources) {
|
|
756
1883
|
const sourcePath = path.resolve(basePath, folder).replace(/\\/g, '/');
|
|
@@ -764,78 +1891,131 @@ program
|
|
|
764
1891
|
|
|
765
1892
|
console.log(`📊 Found ${files.length} files to process`);
|
|
766
1893
|
|
|
767
|
-
|
|
768
|
-
|
|
769
|
-
|
|
770
|
-
|
|
771
|
-
|
|
772
|
-
|
|
773
|
-
|
|
774
|
-
|
|
775
|
-
|
|
776
|
-
processedPaths,
|
|
777
|
-
);
|
|
778
|
-
|
|
779
|
-
globalSuccess += successCount;
|
|
780
|
-
globalFailure += failureCount;
|
|
781
|
-
|
|
782
|
-
console.log(`\n📦 Upload Summary for ${folder}:`);
|
|
783
|
-
console.log(` ✅ Successfully uploaded files: ${successCount}`);
|
|
784
|
-
console.log(` ❌ Files with errors: ${failureCount}`);
|
|
785
|
-
console.log(` ⏭️ Files skipped (already exist): ${skippedCount}`);
|
|
786
|
-
console.log(` 📜 Log file: ${logFilePath} \n`);
|
|
1894
|
+
const result = await processFilesInBatches(
|
|
1895
|
+
files,
|
|
1896
|
+
batchSize,
|
|
1897
|
+
options,
|
|
1898
|
+
basePath,
|
|
1899
|
+
folder,
|
|
1900
|
+
sourcePath,
|
|
1901
|
+
processedPaths,
|
|
1902
|
+
);
|
|
787
1903
|
|
|
788
|
-
|
|
789
|
-
|
|
790
|
-
|
|
791
|
-
|
|
792
|
-
|
|
793
|
-
);
|
|
794
|
-
console.log(
|
|
795
|
-
` 📁 Path sanitization cache: ${pathSanitizationCache.size} entries`,
|
|
796
|
-
);
|
|
797
|
-
console.log(
|
|
798
|
-
` 📋 Log batch pending: ${logBatcher.batch.length} entries`,
|
|
799
|
-
);
|
|
1904
|
+
globalSuccess += result.successCount;
|
|
1905
|
+
globalDetected += result.detectedCount || 0;
|
|
1906
|
+
globalOrganized += result.organizedCount || 0;
|
|
1907
|
+
globalFailure += result.failureCount;
|
|
1908
|
+
globalSkipped += result.skippedCount;
|
|
800
1909
|
|
|
801
|
-
|
|
802
|
-
|
|
803
|
-
|
|
804
|
-
|
|
805
|
-
|
|
806
|
-
|
|
807
|
-
|
|
808
|
-
|
|
809
|
-
|
|
810
|
-
|
|
811
|
-
|
|
812
|
-
|
|
1910
|
+
console.log(`\n📦 Summary for ${folder}:`);
|
|
1911
|
+
if (options.statsOnly) {
|
|
1912
|
+
console.log(` 📊 Stats recorded: ${result.successCount}`);
|
|
1913
|
+
} else {
|
|
1914
|
+
console.log(` ✅ Uploaded: ${result.successCount}`);
|
|
1915
|
+
if (apiMode) {
|
|
1916
|
+
console.log(` 🔍 Detected: ${result.detectedCount || 0}`);
|
|
1917
|
+
console.log(` 📁 Organized: ${result.organizedCount || 0}`);
|
|
1918
|
+
}
|
|
1919
|
+
}
|
|
1920
|
+
console.log(` ❌ Errors: ${result.failureCount}`);
|
|
1921
|
+
if (options.statsOnly) {
|
|
1922
|
+
console.log(` ⏭️ Duplicates: ${result.skippedCount}`);
|
|
1923
|
+
} else {
|
|
1924
|
+
console.log(` ⏭️ Skipped: ${result.skippedCount}`);
|
|
813
1925
|
}
|
|
814
1926
|
|
|
815
1927
|
writeLog(
|
|
816
|
-
`📦
|
|
1928
|
+
`📦 Summary for ${folder}: Success: ${result.successCount}, Detected: ${result.detectedCount || 0}, Organized: ${result.organizedCount || 0}, Errors: ${result.failureCount}, ${options.statsOnly ? 'Duplicates' : 'Skipped'}: ${result.skippedCount}`,
|
|
817
1929
|
);
|
|
818
1930
|
} catch (err) {
|
|
819
1931
|
console.error(`⚠️ Error processing folder ${folder}:`, err.message);
|
|
820
1932
|
writeLog(`⚠️ Error processing folder ${folder}: ${err.message}`);
|
|
821
|
-
await sendLogToSupabase({
|
|
822
|
-
file: folder,
|
|
823
|
-
uploadPath: folder,
|
|
824
|
-
status: 'error',
|
|
825
|
-
message: err.message,
|
|
826
|
-
});
|
|
827
1933
|
globalFailure++;
|
|
828
1934
|
}
|
|
829
1935
|
}
|
|
830
1936
|
|
|
831
|
-
|
|
832
|
-
|
|
833
|
-
|
|
834
|
-
|
|
835
|
-
|
|
836
|
-
|
|
837
|
-
|
|
1937
|
+
console.log(`\n${'='.repeat(60)}`);
|
|
1938
|
+
if (options.statsOnly) {
|
|
1939
|
+
console.log(`📊 STATS COLLECTION COMPLETED`);
|
|
1940
|
+
console.log(`${'='.repeat(60)}`);
|
|
1941
|
+
console.log(` 📊 Total stats recorded: ${globalSuccess}`);
|
|
1942
|
+
} else {
|
|
1943
|
+
console.log(`🎯 ${apiMode ? 'ARELA API' : 'SUPABASE'} UPLOAD COMPLETED`);
|
|
1944
|
+
console.log(`${'='.repeat(60)}`);
|
|
1945
|
+
console.log(` ✅ Total uploaded: ${globalSuccess}`);
|
|
1946
|
+
if (apiMode) {
|
|
1947
|
+
console.log(` 🔍 Total detected: ${globalDetected}`);
|
|
1948
|
+
console.log(` 📁 Total organized: ${globalOrganized}`);
|
|
1949
|
+
}
|
|
1950
|
+
}
|
|
1951
|
+
if (options.statsOnly) {
|
|
1952
|
+
console.log(` ⏭️ Total duplicates: ${globalSkipped}`);
|
|
1953
|
+
} else {
|
|
1954
|
+
console.log(` ⏭️ Total skipped: ${globalSkipped}`);
|
|
1955
|
+
}
|
|
1956
|
+
console.log(` ❌ Total errors: ${globalFailure}`);
|
|
838
1957
|
console.log(` 📜 Log file: ${logFilePath}`);
|
|
1958
|
+
console.log(`${'='.repeat(60)}\n`);
|
|
1959
|
+
|
|
1960
|
+
// Continue with remaining phases if running all phases
|
|
1961
|
+
if (options.runAllPhases && options.statsOnly) {
|
|
1962
|
+
try {
|
|
1963
|
+
// Phase 2: PDF Detection
|
|
1964
|
+
console.log('\n🔍 === PHASE 2: PDF Detection ===');
|
|
1965
|
+
const detectionResult = await detectPedimentosInDatabase({
|
|
1966
|
+
batchSize: parseInt(options.batchSize) || 10,
|
|
1967
|
+
});
|
|
1968
|
+
console.log(`✅ Phase 2 Complete: ${detectionResult.detectedCount} detected, ${detectionResult.errorCount} errors`);
|
|
1969
|
+
|
|
1970
|
+
// Phase 3: Propagate arela_path
|
|
1971
|
+
console.log('\n📁 === PHASE 3: Propagate Arela Paths ===');
|
|
1972
|
+
const propagateResult = await propagateArelaPath({
|
|
1973
|
+
showProgress: options.showStats || true,
|
|
1974
|
+
});
|
|
1975
|
+
console.log(`✅ Phase 3 Complete: ${propagateResult.updatedCount || 0} paths propagated`);
|
|
1976
|
+
|
|
1977
|
+
// Phase 4: Upload by RFC
|
|
1978
|
+
if (uploadRfcs && uploadRfcs.length > 0) {
|
|
1979
|
+
console.log('\n🚀 === PHASE 4: Upload by RFC ===');
|
|
1980
|
+
|
|
1981
|
+
// Initialize API mode for uploads
|
|
1982
|
+
await checkCredentials(false);
|
|
1983
|
+
|
|
1984
|
+
const uploadResult = await uploadFilesByRfc({
|
|
1985
|
+
showProgress: options.showStats || true,
|
|
1986
|
+
batchSize: parseInt(options.batchSize) || 10,
|
|
1987
|
+
folderStructure: options.folderStructure,
|
|
1988
|
+
});
|
|
1989
|
+
console.log(`✅ Phase 4 Complete: Upload finished`);
|
|
1990
|
+
} else {
|
|
1991
|
+
console.log('\n⚠️ === PHASE 4: Upload by RFC ===');
|
|
1992
|
+
console.log('⚠️ UPLOAD_RFCS environment variable not configured, skipping Phase 4');
|
|
1993
|
+
}
|
|
1994
|
+
|
|
1995
|
+
console.log('\n🎉 All 4 phases completed successfully!');
|
|
1996
|
+
|
|
1997
|
+
} catch (error) {
|
|
1998
|
+
console.error(`❌ Error in multi-phase execution:`, error.message);
|
|
1999
|
+
process.exit(1);
|
|
2000
|
+
}
|
|
2001
|
+
}
|
|
2002
|
+
|
|
2003
|
+
if (options.showStats && (sanitizationCache.size > 0 || pathDetectionCache.size > 0)) {
|
|
2004
|
+
console.log(`📊 Performance Statistics:`);
|
|
2005
|
+
if (sanitizationCache.size > 0) {
|
|
2006
|
+
console.log(
|
|
2007
|
+
` 🗂️ Sanitization cache entries: ${sanitizationCache.size}`,
|
|
2008
|
+
);
|
|
2009
|
+
}
|
|
2010
|
+
if (pathDetectionCache.size > 0) {
|
|
2011
|
+
console.log(
|
|
2012
|
+
` 📁 Path detection cache entries: ${pathDetectionCache.size}`,
|
|
2013
|
+
);
|
|
2014
|
+
}
|
|
2015
|
+
}
|
|
2016
|
+
|
|
2017
|
+
// OPTIMIZED: Ensure log buffer is flushed before exit
|
|
2018
|
+
flushLogBuffer();
|
|
839
2019
|
});
|
|
840
2020
|
|
|
841
2021
|
program.parse();
|