@arela/uploader 0.2.4 â 0.2.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +3 -2
- package/src/commands/UploadCommand.js +446 -0
- package/src/config/config.js +178 -0
- package/src/errors/ErrorHandler.js +278 -0
- package/src/errors/ErrorTypes.js +104 -0
- package/src/index-old.js +2658 -0
- package/src/index.js +302 -2573
- package/src/services/DatabaseService.js +1140 -0
- package/src/services/LoggingService.js +194 -0
- package/src/services/upload/ApiUploadService.js +153 -0
- package/src/services/upload/BaseUploadService.js +36 -0
- package/src/services/upload/SupabaseUploadService.js +126 -0
- package/src/services/upload/UploadServiceFactory.js +76 -0
- package/src/utils/FileOperations.js +148 -0
- package/src/utils/FileSanitizer.js +99 -0
- package/src/utils/PathDetector.js +198 -0
|
@@ -0,0 +1,1140 @@
|
|
|
1
|
+
import fs from 'fs';
|
|
2
|
+
import path from 'path';
|
|
3
|
+
|
|
4
|
+
import appConfig from '../config/config.js';
|
|
5
|
+
import { FileDetectionService } from '../file-detection.js';
|
|
6
|
+
import logger from './LoggingService.js';
|
|
7
|
+
import uploadServiceFactory from './upload/UploadServiceFactory.js';
|
|
8
|
+
|
|
9
|
+
/**
|
|
10
|
+
* Database Service
|
|
11
|
+
* Handles all Supabase database operations for the uploader table
|
|
12
|
+
*/
|
|
13
|
+
export class DatabaseService {
|
|
14
|
+
constructor() {
|
|
15
|
+
this.detectionService = new FileDetectionService();
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
/**
|
|
19
|
+
* Get Supabase client
|
|
20
|
+
* @private
|
|
21
|
+
* @returns {Promise<Object>} Supabase client
|
|
22
|
+
*/
|
|
23
|
+
async #getSupabaseClient() {
|
|
24
|
+
const supabaseService = uploadServiceFactory.getSupabaseService();
|
|
25
|
+
return await supabaseService.getClient();
|
|
26
|
+
}
|
|
27
|
+
|
|
28
|
+
/**
|
|
29
|
+
* Execute database query with retry logic and exponential backoff
|
|
30
|
+
* @private
|
|
31
|
+
* @param {Function} queryFn - Query function to execute
|
|
32
|
+
* @param {string} operation - Description of the operation for logging
|
|
33
|
+
* @param {number} maxRetries - Maximum number of retry attempts (default: 3)
|
|
34
|
+
* @returns {Promise<Object>} Query result
|
|
35
|
+
*/
|
|
36
|
+
async #queryWithRetry(queryFn, operation, maxRetries = 3) {
|
|
37
|
+
let lastError;
|
|
38
|
+
|
|
39
|
+
for (let attempt = 1; attempt <= maxRetries; attempt++) {
|
|
40
|
+
try {
|
|
41
|
+
const result = await queryFn();
|
|
42
|
+
if (attempt > 1) {
|
|
43
|
+
logger.info(`${operation} succeeded on attempt ${attempt}`);
|
|
44
|
+
}
|
|
45
|
+
return result;
|
|
46
|
+
} catch (error) {
|
|
47
|
+
lastError = error;
|
|
48
|
+
|
|
49
|
+
// Check if it's a timeout or connection error
|
|
50
|
+
const isRetriableError =
|
|
51
|
+
error.message?.includes('timeout') ||
|
|
52
|
+
error.message?.includes('canceling statement') ||
|
|
53
|
+
error.message?.includes('connection') ||
|
|
54
|
+
error.code === 'PGRST301'; // PostgREST timeout
|
|
55
|
+
|
|
56
|
+
if (!isRetriableError || attempt === maxRetries) {
|
|
57
|
+
throw error;
|
|
58
|
+
}
|
|
59
|
+
|
|
60
|
+
const backoffDelay = Math.min(1000 * Math.pow(2, attempt - 1), 30000); // Cap at 30 seconds
|
|
61
|
+
logger.warn(
|
|
62
|
+
`${operation} failed on attempt ${attempt}/${maxRetries}: ${error.message}`,
|
|
63
|
+
);
|
|
64
|
+
logger.info(`Retrying in ${backoffDelay}ms...`);
|
|
65
|
+
|
|
66
|
+
await new Promise((resolve) => setTimeout(resolve, backoffDelay));
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
throw lastError;
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
/**
|
|
74
|
+
* Insert file stats with document detection into uploader table
|
|
75
|
+
* @param {Array} files - Array of file objects
|
|
76
|
+
* @param {Object} options - Options including clientPath
|
|
77
|
+
* @returns {Promise<Array>} Inserted records
|
|
78
|
+
*/
|
|
79
|
+
async insertStatsToUploaderTable(files, options) {
|
|
80
|
+
const supabase = await this.#getSupabaseClient();
|
|
81
|
+
const records = [];
|
|
82
|
+
|
|
83
|
+
for (const file of files) {
|
|
84
|
+
const stats = file.stats || fs.statSync(file.path);
|
|
85
|
+
const originalPath = options.clientPath || file.path;
|
|
86
|
+
|
|
87
|
+
// Check if record already exists
|
|
88
|
+
const { data: existingRecords, error: checkError } = await supabase
|
|
89
|
+
.from('uploader')
|
|
90
|
+
.select('id, original_path')
|
|
91
|
+
.eq('original_path', originalPath)
|
|
92
|
+
.limit(1);
|
|
93
|
+
|
|
94
|
+
if (checkError) {
|
|
95
|
+
logger.error(
|
|
96
|
+
`Error checking for existing record: ${checkError.message}`,
|
|
97
|
+
);
|
|
98
|
+
continue;
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
if (existingRecords && existingRecords.length > 0) {
|
|
102
|
+
logger.info(`Skipping duplicate: ${path.basename(file.path)}`);
|
|
103
|
+
continue;
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
// Initialize record with basic file stats
|
|
107
|
+
const record = {
|
|
108
|
+
document_type: null,
|
|
109
|
+
size: stats.size,
|
|
110
|
+
num_pedimento: null,
|
|
111
|
+
filename: file.originalName || path.basename(file.path),
|
|
112
|
+
original_path: originalPath,
|
|
113
|
+
arela_path: null,
|
|
114
|
+
status: 'stats',
|
|
115
|
+
rfc: null,
|
|
116
|
+
message: null,
|
|
117
|
+
};
|
|
118
|
+
|
|
119
|
+
// Try to detect document type for supported files
|
|
120
|
+
if (this.detectionService.isSupportedFileType(file.path)) {
|
|
121
|
+
try {
|
|
122
|
+
const detection = await this.detectionService.detectFile(file.path);
|
|
123
|
+
|
|
124
|
+
if (detection.detectedType) {
|
|
125
|
+
record.document_type = detection.detectedType;
|
|
126
|
+
record.num_pedimento = detection.detectedPedimento;
|
|
127
|
+
record.status = 'detected';
|
|
128
|
+
|
|
129
|
+
if (detection.arelaPath) {
|
|
130
|
+
record.arela_path = detection.arelaPath;
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
const rfcField = detection.fields.find(
|
|
134
|
+
(f) => f.name === 'rfc' && f.found,
|
|
135
|
+
);
|
|
136
|
+
if (rfcField) {
|
|
137
|
+
record.rfc = rfcField.value;
|
|
138
|
+
}
|
|
139
|
+
} else {
|
|
140
|
+
record.status = 'not-detected';
|
|
141
|
+
if (detection.error) {
|
|
142
|
+
record.message = detection.error;
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
} catch (error) {
|
|
146
|
+
logger.error(`Error detecting ${record.filename}: ${error.message}`);
|
|
147
|
+
record.status = 'detection-error';
|
|
148
|
+
record.message = error.message;
|
|
149
|
+
}
|
|
150
|
+
} else {
|
|
151
|
+
record.status = 'unsupported';
|
|
152
|
+
record.message = 'File type not supported for detection';
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
records.push(record);
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
if (records.length === 0) {
|
|
159
|
+
logger.info('No new records to insert (all were duplicates or errors)');
|
|
160
|
+
return [];
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
logger.info(
|
|
164
|
+
`Inserting ${records.length} new records into uploader table...`,
|
|
165
|
+
);
|
|
166
|
+
|
|
167
|
+
const { data, error } = await supabase
|
|
168
|
+
.from('uploader')
|
|
169
|
+
.insert(records)
|
|
170
|
+
.select();
|
|
171
|
+
|
|
172
|
+
if (error) {
|
|
173
|
+
throw new Error(`Failed to insert stats records: ${error.message}`);
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
return data;
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
/**
|
|
180
|
+
* Insert file stats only (no detection) into uploader table
|
|
181
|
+
* @param {Array} files - Array of file objects
|
|
182
|
+
* @param {Object} options - Options including clientPath
|
|
183
|
+
* @returns {Promise<Object>} Statistics about the operation
|
|
184
|
+
*/
|
|
185
|
+
async insertStatsOnlyToUploaderTable(files, options) {
|
|
186
|
+
const supabase = await this.#getSupabaseClient();
|
|
187
|
+
const batchSize = 1000;
|
|
188
|
+
const allRecords = [];
|
|
189
|
+
|
|
190
|
+
logger.info('Collecting filesystem stats...');
|
|
191
|
+
for (const file of files) {
|
|
192
|
+
try {
|
|
193
|
+
const stats = file.stats || fs.statSync(file.path);
|
|
194
|
+
const originalPath = options.clientPath || file.path;
|
|
195
|
+
const fileExtension = path
|
|
196
|
+
.extname(file.path)
|
|
197
|
+
.toLowerCase()
|
|
198
|
+
.replace('.', '');
|
|
199
|
+
|
|
200
|
+
const record = {
|
|
201
|
+
document_type: null,
|
|
202
|
+
size: stats.size,
|
|
203
|
+
num_pedimento: null,
|
|
204
|
+
filename: file.originalName || path.basename(file.path),
|
|
205
|
+
original_path: originalPath,
|
|
206
|
+
arela_path: null,
|
|
207
|
+
status: 'fs-stats',
|
|
208
|
+
rfc: null,
|
|
209
|
+
message: null,
|
|
210
|
+
file_extension: fileExtension,
|
|
211
|
+
created_at: new Date().toISOString(),
|
|
212
|
+
modified_at: stats.mtime.toISOString(),
|
|
213
|
+
};
|
|
214
|
+
|
|
215
|
+
allRecords.push(record);
|
|
216
|
+
} catch (error) {
|
|
217
|
+
logger.error(`Error reading stats for ${file.path}: ${error.message}`);
|
|
218
|
+
}
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
if (allRecords.length === 0) {
|
|
222
|
+
logger.info('No file stats to insert');
|
|
223
|
+
return { totalInserted: 0, totalSkipped: 0, totalProcessed: 0 };
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
logger.info(
|
|
227
|
+
`Processing ${allRecords.length} file stats in batches of ${batchSize}...`,
|
|
228
|
+
);
|
|
229
|
+
|
|
230
|
+
let totalInserted = 0;
|
|
231
|
+
let totalUpdated = 0;
|
|
232
|
+
|
|
233
|
+
for (let i = 0; i < allRecords.length; i += batchSize) {
|
|
234
|
+
const batch = allRecords.slice(i, i + batchSize);
|
|
235
|
+
|
|
236
|
+
try {
|
|
237
|
+
// Check which records already exist
|
|
238
|
+
const originalPaths = batch.map((r) => r.original_path);
|
|
239
|
+
const { data: existingRecords, error: checkError } = await supabase
|
|
240
|
+
.from('uploader')
|
|
241
|
+
.select('original_path')
|
|
242
|
+
.in('original_path', originalPaths);
|
|
243
|
+
|
|
244
|
+
if (checkError) {
|
|
245
|
+
logger.error(
|
|
246
|
+
`Error checking existing records: ${checkError.message}`,
|
|
247
|
+
);
|
|
248
|
+
continue;
|
|
249
|
+
}
|
|
250
|
+
|
|
251
|
+
const existingPaths = new Set(
|
|
252
|
+
existingRecords?.map((r) => r.original_path) || [],
|
|
253
|
+
);
|
|
254
|
+
const newRecords = batch.filter(
|
|
255
|
+
(r) => !existingPaths.has(r.original_path),
|
|
256
|
+
);
|
|
257
|
+
const updateRecords = batch.filter((r) =>
|
|
258
|
+
existingPaths.has(r.original_path),
|
|
259
|
+
);
|
|
260
|
+
|
|
261
|
+
logger.info(
|
|
262
|
+
`Batch ${Math.floor(i / batchSize) + 1}: ${newRecords.length} new, ${updateRecords.length} updates`,
|
|
263
|
+
);
|
|
264
|
+
|
|
265
|
+
// Insert new records
|
|
266
|
+
if (newRecords.length > 0) {
|
|
267
|
+
const { error: insertError } = await supabase
|
|
268
|
+
.from('uploader')
|
|
269
|
+
.insert(newRecords);
|
|
270
|
+
|
|
271
|
+
if (insertError) {
|
|
272
|
+
logger.error(`Error inserting new records: ${insertError.message}`);
|
|
273
|
+
} else {
|
|
274
|
+
totalInserted += newRecords.length;
|
|
275
|
+
logger.success(`Inserted ${newRecords.length} new records`);
|
|
276
|
+
}
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
// Update existing records
|
|
280
|
+
if (updateRecords.length > 0) {
|
|
281
|
+
let batchUpdated = 0;
|
|
282
|
+
for (const record of updateRecords) {
|
|
283
|
+
const { error: updateError } = await supabase
|
|
284
|
+
.from('uploader')
|
|
285
|
+
.update({
|
|
286
|
+
size: record.size,
|
|
287
|
+
modified_at: record.modified_at,
|
|
288
|
+
filename: record.filename,
|
|
289
|
+
file_extension: record.file_extension,
|
|
290
|
+
})
|
|
291
|
+
.eq('original_path', record.original_path);
|
|
292
|
+
|
|
293
|
+
if (!updateError) {
|
|
294
|
+
batchUpdated++;
|
|
295
|
+
}
|
|
296
|
+
}
|
|
297
|
+
totalUpdated += batchUpdated;
|
|
298
|
+
logger.info(`Updated ${batchUpdated} existing records`);
|
|
299
|
+
}
|
|
300
|
+
} catch (error) {
|
|
301
|
+
logger.error(
|
|
302
|
+
`Unexpected error in batch ${Math.floor(i / batchSize) + 1}: ${error.message}`,
|
|
303
|
+
);
|
|
304
|
+
}
|
|
305
|
+
}
|
|
306
|
+
|
|
307
|
+
logger.success(
|
|
308
|
+
`Phase 1 Summary: ${totalInserted} new records inserted, ${totalUpdated} existing records updated`,
|
|
309
|
+
);
|
|
310
|
+
|
|
311
|
+
return {
|
|
312
|
+
totalInserted,
|
|
313
|
+
totalSkipped: totalUpdated,
|
|
314
|
+
totalProcessed: allRecords.length,
|
|
315
|
+
};
|
|
316
|
+
}
|
|
317
|
+
|
|
318
|
+
/**
|
|
319
|
+
* Process PDF files for pedimento-simplificado detection
|
|
320
|
+
* @param {Object} options - Processing options
|
|
321
|
+
* @returns {Promise<Object>} Processing result
|
|
322
|
+
*/
|
|
323
|
+
async detectPedimentosInDatabase(options = {}) {
|
|
324
|
+
const supabase = await this.#getSupabaseClient();
|
|
325
|
+
|
|
326
|
+
logger.info(
|
|
327
|
+
'Phase 2: Starting PDF detection for pedimento-simplificado documents...',
|
|
328
|
+
);
|
|
329
|
+
|
|
330
|
+
const processingBatchSize = parseInt(options.batchSize) || 10;
|
|
331
|
+
// Reduced query batch size to avoid timeouts
|
|
332
|
+
const queryBatchSize = 500; // Reduced from 1000 to 500
|
|
333
|
+
|
|
334
|
+
let totalDetected = 0;
|
|
335
|
+
let totalProcessed = 0;
|
|
336
|
+
let totalErrors = 0;
|
|
337
|
+
let offset = 0;
|
|
338
|
+
let chunkNumber = 1;
|
|
339
|
+
|
|
340
|
+
logger.info(
|
|
341
|
+
`Processing PDF files in chunks of ${queryBatchSize} records...`,
|
|
342
|
+
);
|
|
343
|
+
|
|
344
|
+
while (true) {
|
|
345
|
+
logger.info(
|
|
346
|
+
`Fetching chunk ${chunkNumber} (records ${offset + 1} to ${offset + queryBatchSize})...`,
|
|
347
|
+
);
|
|
348
|
+
|
|
349
|
+
try {
|
|
350
|
+
// Split the query to make it more efficient with retry logic
|
|
351
|
+
const { data: pdfRecords, error: queryError } =
|
|
352
|
+
await this.#queryWithRetry(async () => {
|
|
353
|
+
return await supabase
|
|
354
|
+
.from('uploader')
|
|
355
|
+
.select('id, original_path, filename, file_extension, status')
|
|
356
|
+
.eq('status', 'fs-stats')
|
|
357
|
+
.eq('file_extension', 'pdf')
|
|
358
|
+
.ilike('filename', '%simp%')
|
|
359
|
+
.range(offset, offset + queryBatchSize - 1)
|
|
360
|
+
.order('id'); // Add explicit ordering for consistent pagination
|
|
361
|
+
}, `fetch PDF records chunk ${chunkNumber}`);
|
|
362
|
+
|
|
363
|
+
if (queryError) {
|
|
364
|
+
throw new Error(
|
|
365
|
+
`Failed to fetch PDF records chunk ${chunkNumber}: ${queryError.message}`,
|
|
366
|
+
);
|
|
367
|
+
}
|
|
368
|
+
|
|
369
|
+
if (!pdfRecords || pdfRecords.length === 0) {
|
|
370
|
+
logger.info('No more PDF files found. Processing completed.');
|
|
371
|
+
break;
|
|
372
|
+
}
|
|
373
|
+
|
|
374
|
+
logger.info(
|
|
375
|
+
`Processing chunk ${chunkNumber}: ${pdfRecords.length} PDF records`,
|
|
376
|
+
);
|
|
377
|
+
|
|
378
|
+
let chunkDetected = 0;
|
|
379
|
+
let chunkProcessed = 0;
|
|
380
|
+
let chunkErrors = 0;
|
|
381
|
+
|
|
382
|
+
// Process files in smaller batches
|
|
383
|
+
for (let i = 0; i < pdfRecords.length; i += processingBatchSize) {
|
|
384
|
+
const batch = pdfRecords.slice(i, i + processingBatchSize);
|
|
385
|
+
const updatePromises = [];
|
|
386
|
+
|
|
387
|
+
for (const record of batch) {
|
|
388
|
+
try {
|
|
389
|
+
if (!fs.existsSync(record.original_path)) {
|
|
390
|
+
logger.warn(
|
|
391
|
+
`File not found: ${record.filename} at ${record.original_path}`,
|
|
392
|
+
);
|
|
393
|
+
updatePromises.push(
|
|
394
|
+
supabase
|
|
395
|
+
.from('uploader')
|
|
396
|
+
.update({
|
|
397
|
+
status: 'file-not-found',
|
|
398
|
+
message: 'File no longer exists at original path',
|
|
399
|
+
})
|
|
400
|
+
.eq('id', record.id),
|
|
401
|
+
);
|
|
402
|
+
chunkErrors++;
|
|
403
|
+
totalErrors++;
|
|
404
|
+
continue;
|
|
405
|
+
}
|
|
406
|
+
|
|
407
|
+
const detection = await this.detectionService.detectFile(
|
|
408
|
+
record.original_path,
|
|
409
|
+
);
|
|
410
|
+
chunkProcessed++;
|
|
411
|
+
totalProcessed++;
|
|
412
|
+
|
|
413
|
+
const updateData = {
|
|
414
|
+
status: detection.detectedType ? 'detected' : 'not-detected',
|
|
415
|
+
document_type: detection.detectedType,
|
|
416
|
+
num_pedimento: detection.detectedPedimento,
|
|
417
|
+
arela_path: detection.arelaPath,
|
|
418
|
+
message: detection.error || null,
|
|
419
|
+
};
|
|
420
|
+
|
|
421
|
+
if (detection.fields) {
|
|
422
|
+
const rfcField = detection.fields.find(
|
|
423
|
+
(f) => f.name === 'rfc' && f.found,
|
|
424
|
+
);
|
|
425
|
+
if (rfcField) {
|
|
426
|
+
updateData.rfc = rfcField.value;
|
|
427
|
+
}
|
|
428
|
+
}
|
|
429
|
+
|
|
430
|
+
if (detection.detectedType) {
|
|
431
|
+
chunkDetected++;
|
|
432
|
+
totalDetected++;
|
|
433
|
+
logger.success(
|
|
434
|
+
`Detected: ${record.filename} -> ${detection.detectedType} | Pedimento: ${detection.detectedPedimento || 'N/A'} | RFC: ${detection.fields?.rfc || 'N/A'}`,
|
|
435
|
+
);
|
|
436
|
+
} else {
|
|
437
|
+
logger.info(
|
|
438
|
+
`Not detected: ${record.filename} - No pedimento-simplificado pattern found`,
|
|
439
|
+
);
|
|
440
|
+
}
|
|
441
|
+
|
|
442
|
+
updatePromises.push(
|
|
443
|
+
supabase
|
|
444
|
+
.from('uploader')
|
|
445
|
+
.update(updateData)
|
|
446
|
+
.eq('id', record.id),
|
|
447
|
+
);
|
|
448
|
+
} catch (error) {
|
|
449
|
+
logger.error(
|
|
450
|
+
`Error detecting ${record.filename}: ${error.message}`,
|
|
451
|
+
);
|
|
452
|
+
chunkErrors++;
|
|
453
|
+
totalErrors++;
|
|
454
|
+
|
|
455
|
+
updatePromises.push(
|
|
456
|
+
supabase
|
|
457
|
+
.from('uploader')
|
|
458
|
+
.update({
|
|
459
|
+
status: 'detection-error',
|
|
460
|
+
message: error.message,
|
|
461
|
+
})
|
|
462
|
+
.eq('id', record.id),
|
|
463
|
+
);
|
|
464
|
+
}
|
|
465
|
+
}
|
|
466
|
+
|
|
467
|
+
try {
|
|
468
|
+
await Promise.all(updatePromises);
|
|
469
|
+
} catch (error) {
|
|
470
|
+
logger.error(
|
|
471
|
+
`Error updating batch in chunk ${chunkNumber}: ${error.message}`,
|
|
472
|
+
);
|
|
473
|
+
}
|
|
474
|
+
}
|
|
475
|
+
|
|
476
|
+
logger.success(
|
|
477
|
+
`Chunk ${chunkNumber} completed: ${chunkDetected} detected, ${chunkProcessed} processed, ${chunkErrors} errors`,
|
|
478
|
+
);
|
|
479
|
+
|
|
480
|
+
offset += queryBatchSize;
|
|
481
|
+
chunkNumber++;
|
|
482
|
+
|
|
483
|
+
if (pdfRecords.length < queryBatchSize) {
|
|
484
|
+
logger.info(
|
|
485
|
+
`Reached end of records (chunk had ${pdfRecords.length} records).`,
|
|
486
|
+
);
|
|
487
|
+
break;
|
|
488
|
+
}
|
|
489
|
+
|
|
490
|
+
// Small delay between chunks
|
|
491
|
+
await new Promise((resolve) => setTimeout(resolve, 500));
|
|
492
|
+
} catch (chunkError) {
|
|
493
|
+
logger.error(
|
|
494
|
+
`Error processing chunk ${chunkNumber}: ${chunkError.message}`,
|
|
495
|
+
);
|
|
496
|
+
// Continue to next chunk after error
|
|
497
|
+
offset += queryBatchSize;
|
|
498
|
+
chunkNumber++;
|
|
499
|
+
totalErrors++;
|
|
500
|
+
continue;
|
|
501
|
+
}
|
|
502
|
+
}
|
|
503
|
+
|
|
504
|
+
const result = {
|
|
505
|
+
detectedCount: totalDetected,
|
|
506
|
+
processedCount: totalProcessed,
|
|
507
|
+
errorCount: totalErrors,
|
|
508
|
+
};
|
|
509
|
+
|
|
510
|
+
logger.success(
|
|
511
|
+
`Phase 2 Summary: ${totalDetected} detected, ${totalProcessed} processed, ${totalErrors} errors`,
|
|
512
|
+
);
|
|
513
|
+
|
|
514
|
+
return result;
|
|
515
|
+
}
|
|
516
|
+
|
|
517
|
+
/**
|
|
518
|
+
* Propagate arela_path from pedimento_simplificado records to related files
|
|
519
|
+
* @param {Object} options - Options for propagation
|
|
520
|
+
* @returns {Promise<Object>} Processing result
|
|
521
|
+
*/
|
|
522
|
+
async propagateArelaPath(options = {}) {
|
|
523
|
+
const supabase = await this.#getSupabaseClient();
|
|
524
|
+
|
|
525
|
+
logger.info('Phase 3: Starting arela_path propagation process...');
|
|
526
|
+
console.log('đ Finding pedimento_simplificado records with arela_path...');
|
|
527
|
+
|
|
528
|
+
// Get all pedimento_simplificado records that have arela_path
|
|
529
|
+
const { data: pedimentoRecords, error: pedimentoError } = await supabase
|
|
530
|
+
.from('uploader')
|
|
531
|
+
.select('id, original_path, arela_path, filename')
|
|
532
|
+
.eq('document_type', 'pedimento_simplificado')
|
|
533
|
+
.not('arela_path', 'is', null);
|
|
534
|
+
|
|
535
|
+
if (pedimentoError) {
|
|
536
|
+
const errorMsg = `Error fetching pedimento records: ${pedimentoError.message}`;
|
|
537
|
+
logger.error(errorMsg);
|
|
538
|
+
throw new Error(errorMsg);
|
|
539
|
+
}
|
|
540
|
+
|
|
541
|
+
if (!pedimentoRecords || pedimentoRecords.length === 0) {
|
|
542
|
+
logger.info('No pedimento_simplificado records with arela_path found');
|
|
543
|
+
console.log(
|
|
544
|
+
'âšī¸ No pedimento_simplificado records with arela_path found',
|
|
545
|
+
);
|
|
546
|
+
return { processedCount: 0, updatedCount: 0, errorCount: 0 };
|
|
547
|
+
}
|
|
548
|
+
|
|
549
|
+
console.log(
|
|
550
|
+
`đ Found ${pedimentoRecords.length} pedimento records with arela_path`,
|
|
551
|
+
);
|
|
552
|
+
logger.info(
|
|
553
|
+
`Found ${pedimentoRecords.length} pedimento records with arela_path to process`,
|
|
554
|
+
);
|
|
555
|
+
|
|
556
|
+
let totalProcessed = 0;
|
|
557
|
+
let totalUpdated = 0;
|
|
558
|
+
let totalErrors = 0;
|
|
559
|
+
const BATCH_SIZE = 50; // Process files in batches
|
|
560
|
+
|
|
561
|
+
// Process each pedimento record
|
|
562
|
+
for (const pedimento of pedimentoRecords) {
|
|
563
|
+
try {
|
|
564
|
+
totalProcessed++;
|
|
565
|
+
|
|
566
|
+
// Extract base path from original_path (remove filename)
|
|
567
|
+
const basePath = path.dirname(pedimento.original_path);
|
|
568
|
+
|
|
569
|
+
logger.info(
|
|
570
|
+
`Processing pedimento: ${pedimento.filename} | Base path: ${basePath}`,
|
|
571
|
+
);
|
|
572
|
+
|
|
573
|
+
// Extract folder part from existing arela_path
|
|
574
|
+
const existingPath = pedimento.arela_path;
|
|
575
|
+
const folderArelaPath = existingPath.includes('/')
|
|
576
|
+
? existingPath.substring(0, existingPath.lastIndexOf('/')) + '/'
|
|
577
|
+
: existingPath.endsWith('/')
|
|
578
|
+
? existingPath
|
|
579
|
+
: existingPath + '/';
|
|
580
|
+
|
|
581
|
+
// Find all files with the same base path that don't have arela_path yet
|
|
582
|
+
const { data: relatedFiles, error: relatedError } = await supabase
|
|
583
|
+
.from('uploader')
|
|
584
|
+
.select('id, filename, original_path')
|
|
585
|
+
.like('original_path', `${basePath}%`)
|
|
586
|
+
.is('arela_path', null)
|
|
587
|
+
.neq('id', pedimento.id); // Exclude the pedimento itself
|
|
588
|
+
|
|
589
|
+
if (relatedError) {
|
|
590
|
+
logger.error(
|
|
591
|
+
`Error finding related files for ${pedimento.filename}: ${relatedError.message}`,
|
|
592
|
+
);
|
|
593
|
+
totalErrors++;
|
|
594
|
+
continue;
|
|
595
|
+
}
|
|
596
|
+
|
|
597
|
+
if (!relatedFiles || relatedFiles.length === 0) {
|
|
598
|
+
logger.info(`No related files found for ${pedimento.filename}`);
|
|
599
|
+
continue;
|
|
600
|
+
}
|
|
601
|
+
|
|
602
|
+
logger.info(
|
|
603
|
+
`Found ${relatedFiles.length} related files to update for ${pedimento.filename}`,
|
|
604
|
+
);
|
|
605
|
+
|
|
606
|
+
// Process files in batches
|
|
607
|
+
const fileIds = relatedFiles.map((f) => f.id);
|
|
608
|
+
|
|
609
|
+
for (let i = 0; i < fileIds.length; i += BATCH_SIZE) {
|
|
610
|
+
const batchIds = fileIds.slice(i, i + BATCH_SIZE);
|
|
611
|
+
const batchNumber = Math.floor(i / BATCH_SIZE) + 1;
|
|
612
|
+
const totalBatches = Math.ceil(fileIds.length / BATCH_SIZE);
|
|
613
|
+
|
|
614
|
+
logger.info(
|
|
615
|
+
`Batch ${batchNumber}/${totalBatches}: Updating ${batchIds.length} files...`,
|
|
616
|
+
);
|
|
617
|
+
|
|
618
|
+
try {
|
|
619
|
+
const { error: updateError } = await supabase
|
|
620
|
+
.from('uploader')
|
|
621
|
+
.update({ arela_path: folderArelaPath })
|
|
622
|
+
.in('id', batchIds);
|
|
623
|
+
|
|
624
|
+
if (updateError) {
|
|
625
|
+
logger.error(
|
|
626
|
+
`Error in batch ${batchNumber}: ${updateError.message}`,
|
|
627
|
+
);
|
|
628
|
+
totalErrors++;
|
|
629
|
+
} else {
|
|
630
|
+
totalUpdated += batchIds.length;
|
|
631
|
+
logger.info(
|
|
632
|
+
`Successfully updated batch ${batchNumber}: ${batchIds.length} files`,
|
|
633
|
+
);
|
|
634
|
+
}
|
|
635
|
+
} catch (batchError) {
|
|
636
|
+
logger.error(
|
|
637
|
+
`Exception in batch ${batchNumber}: ${batchError.message}`,
|
|
638
|
+
);
|
|
639
|
+
totalErrors++;
|
|
640
|
+
}
|
|
641
|
+
}
|
|
642
|
+
} catch (error) {
|
|
643
|
+
logger.error(
|
|
644
|
+
`Error processing pedimento ${pedimento.filename}: ${error.message}`,
|
|
645
|
+
);
|
|
646
|
+
totalErrors++;
|
|
647
|
+
}
|
|
648
|
+
}
|
|
649
|
+
|
|
650
|
+
const result = {
|
|
651
|
+
processedCount: totalProcessed,
|
|
652
|
+
updatedCount: totalUpdated,
|
|
653
|
+
errorCount: totalErrors,
|
|
654
|
+
};
|
|
655
|
+
|
|
656
|
+
logger.success(
|
|
657
|
+
`Phase 3 Summary: ${totalProcessed} pedimentos processed, ${totalUpdated} files updated, ${totalErrors} errors`,
|
|
658
|
+
);
|
|
659
|
+
|
|
660
|
+
return result;
|
|
661
|
+
}
|
|
662
|
+
|
|
663
|
+
/**
|
|
664
|
+
* Upload files to Arela API based on specific RFC values
|
|
665
|
+
* @param {Object} options - Upload options
|
|
666
|
+
* @returns {Promise<Object>} Processing result
|
|
667
|
+
*/
|
|
668
|
+
async uploadFilesByRfc(options = {}) {
|
|
669
|
+
const supabase = await this.#getSupabaseClient();
|
|
670
|
+
const uploadService = await uploadServiceFactory.getUploadService();
|
|
671
|
+
|
|
672
|
+
// Get configuration
|
|
673
|
+
const appConfig = await import('../config/config.js').then(
|
|
674
|
+
(m) => m.appConfig,
|
|
675
|
+
);
|
|
676
|
+
|
|
677
|
+
if (!appConfig.upload.rfcs || appConfig.upload.rfcs.length === 0) {
|
|
678
|
+
const errorMsg =
|
|
679
|
+
'No RFCs specified. Please set UPLOAD_RFCS environment variable with pipe-separated RFC values.';
|
|
680
|
+
logger.error(errorMsg);
|
|
681
|
+
throw new Error(errorMsg);
|
|
682
|
+
}
|
|
683
|
+
|
|
684
|
+
logger.info('Phase 4: Starting RFC-based upload process...');
|
|
685
|
+
console.log('đ¯ RFC-based Upload Mode');
|
|
686
|
+
console.log(`đ Target RFCs: ${appConfig.upload.rfcs.join(', ')}`);
|
|
687
|
+
console.log('đ Searching for files to upload...');
|
|
688
|
+
|
|
689
|
+
// First, count total files for the RFCs to show filtering effect
|
|
690
|
+
const { count: totalRfcFiles, error: countError } = await supabase
|
|
691
|
+
.from('uploader')
|
|
692
|
+
.select('*', { count: 'exact', head: true })
|
|
693
|
+
.in('rfc', appConfig.upload.rfcs)
|
|
694
|
+
.not('arela_path', 'is', null);
|
|
695
|
+
|
|
696
|
+
if (countError) {
|
|
697
|
+
logger.warn(`Could not count total RFC files: ${countError.message}`);
|
|
698
|
+
} else {
|
|
699
|
+
console.log(`đ Total files for specified RFCs: ${totalRfcFiles || 0}`);
|
|
700
|
+
logger.info(`Total files for specified RFCs: ${totalRfcFiles || 0}`);
|
|
701
|
+
}
|
|
702
|
+
|
|
703
|
+
// Step 1: Get all pedimento_simplificado records that match the specified RFCs and have arela_path
|
|
704
|
+
console.log(
|
|
705
|
+
'đ¯ Finding pedimento_simplificado records for specified RFCs...',
|
|
706
|
+
);
|
|
707
|
+
const { data: pedimentoRfcRecords, error: pedimentoRfcError } =
|
|
708
|
+
await supabase
|
|
709
|
+
.from('uploader')
|
|
710
|
+
.select('arela_path')
|
|
711
|
+
.eq('document_type', 'pedimento_simplificado')
|
|
712
|
+
.in('rfc', appConfig.upload.rfcs)
|
|
713
|
+
.not('arela_path', 'is', null);
|
|
714
|
+
|
|
715
|
+
if (pedimentoRfcError) {
|
|
716
|
+
const errorMsg = `Error fetching pedimento RFC records: ${pedimentoRfcError.message}`;
|
|
717
|
+
logger.error(errorMsg);
|
|
718
|
+
throw new Error(errorMsg);
|
|
719
|
+
}
|
|
720
|
+
|
|
721
|
+
if (!pedimentoRfcRecords || pedimentoRfcRecords.length === 0) {
|
|
722
|
+
console.log(
|
|
723
|
+
'âšī¸ No pedimento_simplificado records found for the specified RFCs with arela_path',
|
|
724
|
+
);
|
|
725
|
+
logger.info('No pedimento_simplificado records found for specified RFCs');
|
|
726
|
+
return { processedCount: 0, uploadedCount: 0, errorCount: 0 };
|
|
727
|
+
}
|
|
728
|
+
|
|
729
|
+
// Get unique arela_paths from pedimento records
|
|
730
|
+
const uniqueArelaPaths = [
|
|
731
|
+
...new Set(pedimentoRfcRecords.map((r) => r.arela_path)),
|
|
732
|
+
];
|
|
733
|
+
console.log(
|
|
734
|
+
`đ Found ${pedimentoRfcRecords.length} pedimento records with ${uniqueArelaPaths.length} unique arela_paths for specified RFCs`,
|
|
735
|
+
);
|
|
736
|
+
logger.info(
|
|
737
|
+
`Found ${pedimentoRfcRecords.length} pedimento records with ${uniqueArelaPaths.length} unique arela_paths`,
|
|
738
|
+
);
|
|
739
|
+
|
|
740
|
+
// Step 2: Get all files with these arela_paths that haven't been uploaded yet
|
|
741
|
+
let rfcRecords = [];
|
|
742
|
+
const chunkSize = 50;
|
|
743
|
+
|
|
744
|
+
for (let i = 0; i < uniqueArelaPaths.length; i += chunkSize) {
|
|
745
|
+
const pathChunk = uniqueArelaPaths.slice(i, i + chunkSize);
|
|
746
|
+
|
|
747
|
+
const { data: chunkFiles, error: chunkError } = await supabase
|
|
748
|
+
.from('uploader')
|
|
749
|
+
.select('arela_path')
|
|
750
|
+
.in('arela_path', pathChunk)
|
|
751
|
+
.neq('status', 'file-uploaded')
|
|
752
|
+
.not('arela_path', 'is', null);
|
|
753
|
+
|
|
754
|
+
if (chunkError) {
|
|
755
|
+
const errorMsg = `Error fetching files for arela_paths chunk: ${chunkError.message}`;
|
|
756
|
+
logger.error(errorMsg);
|
|
757
|
+
throw new Error(errorMsg);
|
|
758
|
+
}
|
|
759
|
+
|
|
760
|
+
if (chunkFiles && chunkFiles.length > 0) {
|
|
761
|
+
rfcRecords = rfcRecords.concat(chunkFiles);
|
|
762
|
+
}
|
|
763
|
+
}
|
|
764
|
+
|
|
765
|
+
if (!rfcRecords || rfcRecords.length === 0) {
|
|
766
|
+
if (totalRfcFiles && totalRfcFiles > 0) {
|
|
767
|
+
console.log(
|
|
768
|
+
`âšī¸ All ${totalRfcFiles} files for the specified RFCs are already uploaded (status: file-uploaded)`,
|
|
769
|
+
);
|
|
770
|
+
console.log(' No new files to upload.');
|
|
771
|
+
logger.info(
|
|
772
|
+
`All ${totalRfcFiles} files for specified RFCs already uploaded`,
|
|
773
|
+
);
|
|
774
|
+
} else {
|
|
775
|
+
console.log(
|
|
776
|
+
'âšī¸ No files found for the specified RFCs with arela_path',
|
|
777
|
+
);
|
|
778
|
+
console.log(
|
|
779
|
+
` Make sure files for RFCs [${appConfig.upload.rfcs.join(', ')}] have been processed and have arela_path values`,
|
|
780
|
+
);
|
|
781
|
+
logger.info('No files found for specified RFCs with arela_path');
|
|
782
|
+
}
|
|
783
|
+
return { processedCount: 0, uploadedCount: 0, errorCount: 0 };
|
|
784
|
+
}
|
|
785
|
+
|
|
786
|
+
// Show filtering effect
|
|
787
|
+
const uploadableArelaPaths = [
|
|
788
|
+
...new Set(rfcRecords.map((r) => r.arela_path)),
|
|
789
|
+
];
|
|
790
|
+
const skipped = (totalRfcFiles || 0) - rfcRecords.length;
|
|
791
|
+
if (skipped > 0) {
|
|
792
|
+
console.log(
|
|
793
|
+
`đ Found ${rfcRecords.length} files ready for upload (${skipped} already uploaded, skipped)`,
|
|
794
|
+
);
|
|
795
|
+
} else {
|
|
796
|
+
console.log(`đ Found ${rfcRecords.length} files ready for upload`);
|
|
797
|
+
}
|
|
798
|
+
logger.info(
|
|
799
|
+
`Found ${rfcRecords.length} files ready for upload, ${skipped} skipped`,
|
|
800
|
+
);
|
|
801
|
+
|
|
802
|
+
// Step 3: Get ALL files that have these arela_paths (including supporting documents)
|
|
803
|
+
let allRelatedFiles = [];
|
|
804
|
+
const arelaPathChunkSize = 50;
|
|
805
|
+
const queryBatchSize = 1000;
|
|
806
|
+
|
|
807
|
+
console.log(
|
|
808
|
+
'đĨ Fetching all related files (processing arela_paths in chunks to avoid URI limits)...',
|
|
809
|
+
);
|
|
810
|
+
|
|
811
|
+
// Process arela_paths in chunks
|
|
812
|
+
for (let i = 0; i < uploadableArelaPaths.length; i += arelaPathChunkSize) {
|
|
813
|
+
const arelaPathChunk = uploadableArelaPaths.slice(
|
|
814
|
+
i,
|
|
815
|
+
i + arelaPathChunkSize,
|
|
816
|
+
);
|
|
817
|
+
const chunkNumber = Math.floor(i / arelaPathChunkSize) + 1;
|
|
818
|
+
const totalChunks = Math.ceil(
|
|
819
|
+
uploadableArelaPaths.length / arelaPathChunkSize,
|
|
820
|
+
);
|
|
821
|
+
|
|
822
|
+
console.log(
|
|
823
|
+
` Processing arela_path chunk ${chunkNumber}/${totalChunks} (${arelaPathChunk.length} paths)`,
|
|
824
|
+
);
|
|
825
|
+
|
|
826
|
+
// For each chunk of arela_paths, use pagination to get all related files
|
|
827
|
+
let hasMore = true;
|
|
828
|
+
let offset = 0;
|
|
829
|
+
|
|
830
|
+
while (hasMore) {
|
|
831
|
+
const { data: batch, error: queryError } = await supabase
|
|
832
|
+
.from('uploader')
|
|
833
|
+
.select('id, original_path, arela_path, filename, rfc, document_type')
|
|
834
|
+
.in('arela_path', arelaPathChunk)
|
|
835
|
+
.not('original_path', 'is', null)
|
|
836
|
+
.neq('status', 'file-uploaded')
|
|
837
|
+
.range(offset, offset + queryBatchSize - 1);
|
|
838
|
+
|
|
839
|
+
if (queryError) {
|
|
840
|
+
const errorMsg = `Error fetching related files for chunk ${chunkNumber}: ${queryError.message}`;
|
|
841
|
+
logger.error(errorMsg);
|
|
842
|
+
throw new Error(errorMsg);
|
|
843
|
+
}
|
|
844
|
+
|
|
845
|
+
if (batch && batch.length > 0) {
|
|
846
|
+
allRelatedFiles = allRelatedFiles.concat(batch);
|
|
847
|
+
}
|
|
848
|
+
|
|
849
|
+
hasMore = batch && batch.length === queryBatchSize;
|
|
850
|
+
offset += queryBatchSize;
|
|
851
|
+
}
|
|
852
|
+
}
|
|
853
|
+
|
|
854
|
+
if (!allRelatedFiles || allRelatedFiles.length === 0) {
|
|
855
|
+
console.log('âšī¸ No related files found to upload');
|
|
856
|
+
logger.info('No related files found to upload');
|
|
857
|
+
return { processedCount: 0, uploadedCount: 0, errorCount: 0 };
|
|
858
|
+
}
|
|
859
|
+
|
|
860
|
+
console.log(`đ Total files to upload: ${allRelatedFiles.length}`);
|
|
861
|
+
logger.info(`Total files to upload: ${allRelatedFiles.length}`);
|
|
862
|
+
|
|
863
|
+
// Step 4: Upload all related files
|
|
864
|
+
let totalProcessed = 0;
|
|
865
|
+
let totalUploaded = 0;
|
|
866
|
+
let totalErrors = 0;
|
|
867
|
+
const batchSize = parseInt(options.batchSize) || 10;
|
|
868
|
+
|
|
869
|
+
for (let i = 0; i < allRelatedFiles.length; i += batchSize) {
|
|
870
|
+
const batch = allRelatedFiles.slice(i, i + batchSize);
|
|
871
|
+
|
|
872
|
+
for (const file of batch) {
|
|
873
|
+
try {
|
|
874
|
+
totalProcessed++;
|
|
875
|
+
|
|
876
|
+
// Check if file exists
|
|
877
|
+
if (!fs.existsSync(file.original_path)) {
|
|
878
|
+
logger.warn(
|
|
879
|
+
`File not found: ${file.filename} at ${file.original_path}`,
|
|
880
|
+
);
|
|
881
|
+
await supabase
|
|
882
|
+
.from('uploader')
|
|
883
|
+
.update({
|
|
884
|
+
status: 'file-not-found',
|
|
885
|
+
message: 'File no longer exists at original path',
|
|
886
|
+
})
|
|
887
|
+
.eq('id', file.id);
|
|
888
|
+
totalErrors++;
|
|
889
|
+
continue;
|
|
890
|
+
}
|
|
891
|
+
|
|
892
|
+
// Upload the file (handle both API and Supabase services)
|
|
893
|
+
let uploadResult;
|
|
894
|
+
if (uploadService.getServiceName() === 'Supabase') {
|
|
895
|
+
// Supabase requires single file upload with uploadPath
|
|
896
|
+
let uploadPath;
|
|
897
|
+
if (options.folderStructure && file.arela_path) {
|
|
898
|
+
// Combine folder structure with arela_path: palco/RFC/Year/Patente/Aduana/Pedimento/filename
|
|
899
|
+
uploadPath = `uploads/${options.folderStructure}/${file.arela_path}${file.filename}`;
|
|
900
|
+
} else if (file.arela_path) {
|
|
901
|
+
// Use existing arela_path: RFC/Year/Patente/Aduana/Pedimento/filename
|
|
902
|
+
uploadPath = `uploads/${file.arela_path}${file.filename}`;
|
|
903
|
+
} else {
|
|
904
|
+
// Fallback to RFC folder
|
|
905
|
+
uploadPath = `uploads/${file.rfc}/${file.filename}`;
|
|
906
|
+
}
|
|
907
|
+
|
|
908
|
+
uploadResult = await uploadService.upload(
|
|
909
|
+
[
|
|
910
|
+
{
|
|
911
|
+
path: file.original_path,
|
|
912
|
+
name: file.filename,
|
|
913
|
+
contentType: 'application/octet-stream',
|
|
914
|
+
},
|
|
915
|
+
],
|
|
916
|
+
{
|
|
917
|
+
uploadPath: uploadPath,
|
|
918
|
+
},
|
|
919
|
+
);
|
|
920
|
+
uploadResult = { success: true, data: uploadResult };
|
|
921
|
+
} else {
|
|
922
|
+
// API service supports batch uploads and returns normalized response
|
|
923
|
+
let fullFolderStructure;
|
|
924
|
+
if (options.folderStructure && file.arela_path) {
|
|
925
|
+
// Combine folder structure with arela_path: palco/RFC/Year/Patente/Aduana/Pedimento/
|
|
926
|
+
fullFolderStructure = `${options.folderStructure}/${file.arela_path}`;
|
|
927
|
+
} else if (file.arela_path) {
|
|
928
|
+
// Use existing arela_path: RFC/Year/Patente/Aduana/Pedimento/
|
|
929
|
+
fullFolderStructure = file.arela_path;
|
|
930
|
+
} else {
|
|
931
|
+
// Fallback to RFC folder
|
|
932
|
+
fullFolderStructure = `${file.rfc}/`;
|
|
933
|
+
}
|
|
934
|
+
|
|
935
|
+
uploadResult = await uploadService.upload(
|
|
936
|
+
[
|
|
937
|
+
{
|
|
938
|
+
path: file.original_path,
|
|
939
|
+
name: file.filename,
|
|
940
|
+
contentType: 'application/octet-stream',
|
|
941
|
+
},
|
|
942
|
+
],
|
|
943
|
+
{
|
|
944
|
+
folderStructure: fullFolderStructure,
|
|
945
|
+
},
|
|
946
|
+
);
|
|
947
|
+
}
|
|
948
|
+
|
|
949
|
+
if (uploadResult.success) {
|
|
950
|
+
// Update database status
|
|
951
|
+
await supabase
|
|
952
|
+
.from('uploader')
|
|
953
|
+
.update({
|
|
954
|
+
status: 'file-uploaded',
|
|
955
|
+
message: 'Successfully uploaded to Arela API',
|
|
956
|
+
})
|
|
957
|
+
.eq('id', file.id);
|
|
958
|
+
|
|
959
|
+
totalUploaded++;
|
|
960
|
+
logger.info(`Uploaded: ${file.filename}`);
|
|
961
|
+
} else {
|
|
962
|
+
await supabase
|
|
963
|
+
.from('uploader')
|
|
964
|
+
.update({
|
|
965
|
+
status: 'upload-error',
|
|
966
|
+
message: uploadResult.error || 'Upload failed',
|
|
967
|
+
})
|
|
968
|
+
.eq('id', file.id);
|
|
969
|
+
|
|
970
|
+
totalErrors++;
|
|
971
|
+
logger.error(
|
|
972
|
+
`Upload failed: ${file.filename} - ${uploadResult.error}`,
|
|
973
|
+
);
|
|
974
|
+
}
|
|
975
|
+
} catch (error) {
|
|
976
|
+
totalErrors++;
|
|
977
|
+
logger.error(
|
|
978
|
+
`Error processing file ${file.filename}: ${error.message}`,
|
|
979
|
+
);
|
|
980
|
+
|
|
981
|
+
await supabase
|
|
982
|
+
.from('uploader')
|
|
983
|
+
.update({
|
|
984
|
+
status: 'upload-error',
|
|
985
|
+
message: `Processing error: ${error.message}`,
|
|
986
|
+
})
|
|
987
|
+
.eq('id', file.id);
|
|
988
|
+
}
|
|
989
|
+
}
|
|
990
|
+
}
|
|
991
|
+
|
|
992
|
+
const result = {
|
|
993
|
+
processedCount: totalProcessed,
|
|
994
|
+
uploadedCount: totalUploaded,
|
|
995
|
+
errorCount: totalErrors,
|
|
996
|
+
};
|
|
997
|
+
|
|
998
|
+
logger.success(
|
|
999
|
+
`Phase 4 Summary: ${totalProcessed} files processed, ${totalUploaded} uploaded, ${totalErrors} errors`,
|
|
1000
|
+
);
|
|
1001
|
+
|
|
1002
|
+
return result;
|
|
1003
|
+
}
|
|
1004
|
+
|
|
1005
|
+
/**
|
|
1006
|
+
* Get processed file paths from log
|
|
1007
|
+
* @returns {Set<string>} Set of processed file paths
|
|
1008
|
+
*/
|
|
1009
|
+
getProcessedPaths() {
|
|
1010
|
+
// This would need to be adapted to work with the LoggingService
|
|
1011
|
+
// For now, return empty set
|
|
1012
|
+
return new Set();
|
|
1013
|
+
}
|
|
1014
|
+
|
|
1015
|
+
/**
|
|
1016
|
+
* Query files that are ready for upload
|
|
1017
|
+
* These are files that have been detected but not yet uploaded
|
|
1018
|
+
* Uses the same RFC filtering logic as uploadFilesByRfc for consistency
|
|
1019
|
+
* @param {Object} options - Query options
|
|
1020
|
+
* @returns {Promise<Array>} Array of files ready for upload
|
|
1021
|
+
*/
|
|
1022
|
+
async getFilesReadyForUpload(options = {}) {
|
|
1023
|
+
const supabase = await this.#getSupabaseClient();
|
|
1024
|
+
|
|
1025
|
+
logger.info('Querying files ready for upload...');
|
|
1026
|
+
console.log('đ Querying files ready for upload...');
|
|
1027
|
+
|
|
1028
|
+
// Check if UPLOAD_RFCS is configured
|
|
1029
|
+
const uploadRfcs = appConfig.upload.rfcs;
|
|
1030
|
+
if (!uploadRfcs || uploadRfcs.length === 0) {
|
|
1031
|
+
console.log(
|
|
1032
|
+
'âšī¸ No UPLOAD_RFCS configured. Please set UPLOAD_RFCS environment variable to see files ready for upload.',
|
|
1033
|
+
);
|
|
1034
|
+
console.log(
|
|
1035
|
+
' Example: UPLOAD_RFCS="RFC123456789|RFC987654321|RFC555444333"',
|
|
1036
|
+
);
|
|
1037
|
+
return [];
|
|
1038
|
+
}
|
|
1039
|
+
|
|
1040
|
+
console.log(`đ¯ Using RFC filter: ${uploadRfcs.join(', ')}`);
|
|
1041
|
+
|
|
1042
|
+
// Step 1: Find pedimento_simplificado documents for the specified RFCs that have arela_path
|
|
1043
|
+
console.log(
|
|
1044
|
+
'đ¯ Finding pedimento_simplificado documents for specified RFCs with arela_path...',
|
|
1045
|
+
);
|
|
1046
|
+
const { data: pedimentoRecords, error: pedimentoError } = await supabase
|
|
1047
|
+
.from('uploader')
|
|
1048
|
+
.select('arela_path')
|
|
1049
|
+
.eq('document_type', 'pedimento_simplificado')
|
|
1050
|
+
.in('rfc', uploadRfcs)
|
|
1051
|
+
.not('arela_path', 'is', null);
|
|
1052
|
+
|
|
1053
|
+
if (pedimentoError) {
|
|
1054
|
+
throw new Error(
|
|
1055
|
+
`Error querying pedimento_simplificado records: ${pedimentoError.message}`,
|
|
1056
|
+
);
|
|
1057
|
+
}
|
|
1058
|
+
|
|
1059
|
+
if (!pedimentoRecords || pedimentoRecords.length === 0) {
|
|
1060
|
+
console.log(
|
|
1061
|
+
'âšī¸ No pedimento_simplificado records with arela_path found',
|
|
1062
|
+
);
|
|
1063
|
+
return [];
|
|
1064
|
+
}
|
|
1065
|
+
|
|
1066
|
+
// Get unique arela_paths
|
|
1067
|
+
const uniqueArelaPaths = [
|
|
1068
|
+
...new Set(pedimentoRecords.map((r) => r.arela_path)),
|
|
1069
|
+
];
|
|
1070
|
+
console.log(
|
|
1071
|
+
`đ Found ${pedimentoRecords.length} pedimento records with ${uniqueArelaPaths.length} unique arela_paths`,
|
|
1072
|
+
);
|
|
1073
|
+
|
|
1074
|
+
// Step 2: Find all related files with these arela_paths that haven't been uploaded yet
|
|
1075
|
+
console.log('đ Finding all related files that need to be uploaded...');
|
|
1076
|
+
|
|
1077
|
+
// Process arela_paths in chunks to avoid URI length limits
|
|
1078
|
+
let allReadyFiles = [];
|
|
1079
|
+
const chunkSize = 50;
|
|
1080
|
+
|
|
1081
|
+
for (let i = 0; i < uniqueArelaPaths.length; i += chunkSize) {
|
|
1082
|
+
const pathChunk = uniqueArelaPaths.slice(i, i + chunkSize);
|
|
1083
|
+
|
|
1084
|
+
const { data: chunkFiles, error: chunkError } = await supabase
|
|
1085
|
+
.from('uploader')
|
|
1086
|
+
.select(
|
|
1087
|
+
'id, original_path, arela_path, filename, rfc, document_type, status',
|
|
1088
|
+
)
|
|
1089
|
+
.in('arela_path', pathChunk)
|
|
1090
|
+
.neq('status', 'file-uploaded')
|
|
1091
|
+
.not('original_path', 'is', null);
|
|
1092
|
+
|
|
1093
|
+
if (chunkError) {
|
|
1094
|
+
throw new Error(
|
|
1095
|
+
`Error querying files for arela_paths chunk: ${chunkError.message}`,
|
|
1096
|
+
);
|
|
1097
|
+
}
|
|
1098
|
+
|
|
1099
|
+
if (chunkFiles && chunkFiles.length > 0) {
|
|
1100
|
+
allReadyFiles = allReadyFiles.concat(chunkFiles);
|
|
1101
|
+
}
|
|
1102
|
+
}
|
|
1103
|
+
|
|
1104
|
+
const readyFiles = allReadyFiles;
|
|
1105
|
+
|
|
1106
|
+
console.log(`đ Found ${readyFiles?.length || 0} files ready for upload`);
|
|
1107
|
+
|
|
1108
|
+
if (readyFiles && readyFiles.length > 0) {
|
|
1109
|
+
// Group by document type for summary
|
|
1110
|
+
const byDocType = readyFiles.reduce((acc, file) => {
|
|
1111
|
+
const docType = file.document_type || 'Unknown';
|
|
1112
|
+
acc[docType] = (acc[docType] || 0) + 1;
|
|
1113
|
+
return acc;
|
|
1114
|
+
}, {});
|
|
1115
|
+
|
|
1116
|
+
console.log('đ Files by document type:');
|
|
1117
|
+
for (const [docType, count] of Object.entries(byDocType)) {
|
|
1118
|
+
console.log(` ${docType}: ${count} files`);
|
|
1119
|
+
}
|
|
1120
|
+
|
|
1121
|
+
// Group by RFC
|
|
1122
|
+
const byRfc = readyFiles.reduce((acc, file) => {
|
|
1123
|
+
const rfc = file.rfc || 'No RFC';
|
|
1124
|
+
acc[rfc] = (acc[rfc] || 0) + 1;
|
|
1125
|
+
return acc;
|
|
1126
|
+
}, {});
|
|
1127
|
+
|
|
1128
|
+
console.log('đ Files by RFC:');
|
|
1129
|
+
for (const [rfc, count] of Object.entries(byRfc)) {
|
|
1130
|
+
console.log(` ${rfc}: ${count} files`);
|
|
1131
|
+
}
|
|
1132
|
+
}
|
|
1133
|
+
|
|
1134
|
+
return readyFiles || [];
|
|
1135
|
+
}
|
|
1136
|
+
}
|
|
1137
|
+
|
|
1138
|
+
// Export singleton instance
|
|
1139
|
+
export const databaseService = new DatabaseService();
|
|
1140
|
+
export default databaseService;
|