@arela/uploader 1.0.5 → 1.0.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.template +5 -0
- package/package.json +1 -1
- package/src/commands/IdentifyCommand.js +7 -7
- package/src/commands/PushCommand.js +176 -7
- package/src/config/config.js +8 -2
- package/src/index.js +4 -0
- package/src/utils/PathNormalizer.js +9 -7
- package/tests/commands/IdentifyCommand.test.js +2 -2
- package/.env.local +0 -316
package/.env.template
CHANGED
|
@@ -77,6 +77,11 @@ PUSH_UPLOAD_BATCH_SIZE=10
|
|
|
77
77
|
# Examples: "archivos", "documents", "storage"
|
|
78
78
|
PUSH_BUCKET=arela
|
|
79
79
|
|
|
80
|
+
# Folder structure prefix for uploaded files (optional)
|
|
81
|
+
# This prefix is prepended to the arela_path when uploading files
|
|
82
|
+
# Examples: "agencia/cliente", "2024/docs", "imports/batch1"
|
|
83
|
+
PUSH_FOLDER_STRUCTURE=
|
|
84
|
+
|
|
80
85
|
# =============================================================================
|
|
81
86
|
# PERFORMANCE OPTIMIZATION FOR MULTIPLE API REPLICAS
|
|
82
87
|
# =============================================================================
|
package/package.json
CHANGED
|
@@ -273,7 +273,7 @@ export class IdentifyCommand {
|
|
|
273
273
|
arelaPath: null,
|
|
274
274
|
detectionError:
|
|
275
275
|
'FILE_NOT_FOUND: File does not exist on filesystem. May have been moved or deleted after scan.',
|
|
276
|
-
|
|
276
|
+
isPedimento: null, // Unknown - can't determine
|
|
277
277
|
};
|
|
278
278
|
}
|
|
279
279
|
|
|
@@ -289,7 +289,7 @@ export class IdentifyCommand {
|
|
|
289
289
|
rfc: null,
|
|
290
290
|
arelaPath: null,
|
|
291
291
|
detectionError: `FILE_TOO_LARGE: File size ${(stats.size / 1024 / 1024).toFixed(2)}MB exceeds ${maxSizeBytes / 1024 / 1024}MB limit.`,
|
|
292
|
-
|
|
292
|
+
isPedimento: null, // Unknown - can't determine
|
|
293
293
|
};
|
|
294
294
|
}
|
|
295
295
|
|
|
@@ -306,19 +306,19 @@ export class IdentifyCommand {
|
|
|
306
306
|
rfc: result.rfc,
|
|
307
307
|
arelaPath: result.arelaPath,
|
|
308
308
|
detectionError: result.error,
|
|
309
|
-
|
|
309
|
+
isPedimento: true, // Confirmed pedimento
|
|
310
310
|
};
|
|
311
311
|
}
|
|
312
312
|
|
|
313
313
|
// If no detection, determine if it's definitely not a pedimento
|
|
314
314
|
// This helps avoid re-processing files we know aren't pedimentos
|
|
315
|
-
const
|
|
315
|
+
const isDefinitelyNotPedimento = this.#isDefinitelyNotPedimento(result, file);
|
|
316
316
|
|
|
317
317
|
// Build descriptive error message
|
|
318
318
|
let detectionError = null;
|
|
319
319
|
if (result.error) {
|
|
320
320
|
detectionError = `DETECTION_ERROR: ${result.error}`;
|
|
321
|
-
} else if (
|
|
321
|
+
} else if (isDefinitelyNotPedimento) {
|
|
322
322
|
detectionError =
|
|
323
323
|
'NOT_PEDIMENTO: File does not match pedimento-simplificado pattern. Missing key markers: "FORMA SIMPLIFICADA DE PEDIMENTO".';
|
|
324
324
|
} else {
|
|
@@ -340,7 +340,7 @@ export class IdentifyCommand {
|
|
|
340
340
|
rfc: result.rfc,
|
|
341
341
|
arelaPath: result.arelaPath,
|
|
342
342
|
detectionError,
|
|
343
|
-
|
|
343
|
+
isPedimento: isDefinitelyNotPedimento ? false : null, // false = not pedimento, null = unknown
|
|
344
344
|
};
|
|
345
345
|
} catch (error) {
|
|
346
346
|
logger.warn(
|
|
@@ -367,7 +367,7 @@ export class IdentifyCommand {
|
|
|
367
367
|
rfc: null,
|
|
368
368
|
arelaPath: null,
|
|
369
369
|
detectionError: `${errorCategory}: ${error.message}`,
|
|
370
|
-
|
|
370
|
+
isPedimento: null, // Unknown - error occurred
|
|
371
371
|
};
|
|
372
372
|
}
|
|
373
373
|
}),
|
|
@@ -39,6 +39,15 @@ export class PushCommand {
|
|
|
39
39
|
const pushConfig = appConfig.getPushConfig();
|
|
40
40
|
const tableName = scanConfig.tableName;
|
|
41
41
|
|
|
42
|
+
// Override folderStructure from command option if provided
|
|
43
|
+
if (options.folderStructure) {
|
|
44
|
+
// Clean the folder structure: remove leading/trailing slashes
|
|
45
|
+
pushConfig.folderStructure = options.folderStructure
|
|
46
|
+
.trim()
|
|
47
|
+
.replace(/^\/+/, '')
|
|
48
|
+
.replace(/\/+$/, '');
|
|
49
|
+
}
|
|
50
|
+
|
|
42
51
|
// Set API target for scan/push operations
|
|
43
52
|
const scanApiTarget = options.api || options.scanApi || 'default';
|
|
44
53
|
const pushApiTarget = options.pushApi || scanApiTarget;
|
|
@@ -57,6 +66,9 @@ export class PushCommand {
|
|
|
57
66
|
);
|
|
58
67
|
console.log(`📦 Fetch Batch Size: ${options.batchSize}`);
|
|
59
68
|
console.log(`📤 Upload Batch Size: ${options.uploadBatchSize}`);
|
|
69
|
+
if (pushConfig.folderStructure) {
|
|
70
|
+
console.log(`📁 Folder Structure Prefix: ${pushConfig.folderStructure}`);
|
|
71
|
+
}
|
|
60
72
|
|
|
61
73
|
// Apply filters
|
|
62
74
|
const filters = {
|
|
@@ -272,18 +284,17 @@ export class PushCommand {
|
|
|
272
284
|
break;
|
|
273
285
|
}
|
|
274
286
|
|
|
275
|
-
// Upload files in smaller batches
|
|
287
|
+
// Upload files in smaller batches using new CLI upload endpoint
|
|
276
288
|
for (let i = 0; i < files.length; i += uploadBatchSize) {
|
|
277
289
|
const uploadBatch = files.slice(i, i + uploadBatchSize);
|
|
278
|
-
const batchResults = await this.#
|
|
290
|
+
const batchResults = await this.#uploadBatchViaCli(
|
|
291
|
+
tableName,
|
|
279
292
|
uploadBatch,
|
|
280
293
|
uploadApiConfig,
|
|
281
294
|
);
|
|
282
295
|
|
|
283
|
-
// Update
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
// Update counters
|
|
296
|
+
// Update counters from API response
|
|
297
|
+
// Note: The CLI endpoint now handles updating the scan table directly
|
|
287
298
|
batchResults.forEach((result) => {
|
|
288
299
|
results.processed++;
|
|
289
300
|
if (result.uploaded) {
|
|
@@ -317,8 +328,166 @@ export class PushCommand {
|
|
|
317
328
|
}
|
|
318
329
|
|
|
319
330
|
/**
|
|
320
|
-
* Upload a batch of files
|
|
331
|
+
* Upload a batch of files using the new CLI upload endpoint
|
|
332
|
+
* The endpoint updates the CLI scan table directly
|
|
333
|
+
* @private
|
|
334
|
+
*/
|
|
335
|
+
async #uploadBatchViaCli(tableName, files, uploadApiConfig) {
|
|
336
|
+
const pushConfig = appConfig.getPushConfig();
|
|
337
|
+
const results = [];
|
|
338
|
+
|
|
339
|
+
// Process files one by one (simpler for now, can optimize to true batch later)
|
|
340
|
+
for (const file of files) {
|
|
341
|
+
const result = await this.#uploadFileViaCli(
|
|
342
|
+
tableName,
|
|
343
|
+
file,
|
|
344
|
+
uploadApiConfig,
|
|
345
|
+
pushConfig,
|
|
346
|
+
);
|
|
347
|
+
results.push(result);
|
|
348
|
+
}
|
|
349
|
+
|
|
350
|
+
return results;
|
|
351
|
+
}
|
|
352
|
+
|
|
353
|
+
/**
|
|
354
|
+
* Upload a single file using the CLI upload endpoint
|
|
355
|
+
* @private
|
|
356
|
+
*/
|
|
357
|
+
async #uploadFileViaCli(tableName, file, uploadApiConfig, pushConfig) {
|
|
358
|
+
const result = {
|
|
359
|
+
id: file.id,
|
|
360
|
+
uploaded: false,
|
|
361
|
+
uploadError: null,
|
|
362
|
+
uploadPath: null,
|
|
363
|
+
uploadedToStorageId: null,
|
|
364
|
+
};
|
|
365
|
+
|
|
366
|
+
try {
|
|
367
|
+
// Check if file exists
|
|
368
|
+
if (!fs.existsSync(file.absolute_path)) {
|
|
369
|
+
result.uploadError =
|
|
370
|
+
'FILE_NOT_FOUND: File does not exist on filesystem';
|
|
371
|
+
// Update the scan table with the error
|
|
372
|
+
await this.scanApiService.batchUpdateUpload(tableName, [result]);
|
|
373
|
+
return result;
|
|
374
|
+
}
|
|
375
|
+
|
|
376
|
+
// Get file stats
|
|
377
|
+
const stats = fs.statSync(file.absolute_path);
|
|
378
|
+
if (!stats.isFile()) {
|
|
379
|
+
result.uploadError = 'NOT_A_FILE: Path is not a regular file';
|
|
380
|
+
await this.scanApiService.batchUpdateUpload(tableName, [result]);
|
|
381
|
+
return result;
|
|
382
|
+
}
|
|
383
|
+
|
|
384
|
+
// Construct upload path using arela_path
|
|
385
|
+
// arela_path format: RFC/Year/Patente/Aduana/Pedimento/
|
|
386
|
+
const uploadPath = `${file.arela_path}${file.file_name}`;
|
|
387
|
+
result.uploadPath = uploadPath;
|
|
388
|
+
|
|
389
|
+
// Create form data for CLI upload endpoint
|
|
390
|
+
const form = new FormData();
|
|
391
|
+
|
|
392
|
+
// Build folder structure: optionally prefix with PUSH_FOLDER_STRUCTURE env var
|
|
393
|
+
// arela_path already contains the logical path (RFC/Year/Patente/Aduana/Pedimento)
|
|
394
|
+
// folderStructure from config is the bucket prefix (e.g., "documents" or "uploads/2024")
|
|
395
|
+
let arelaPath = file.arela_path.endsWith('/')
|
|
396
|
+
? file.arela_path.slice(0, -1)
|
|
397
|
+
: file.arela_path;
|
|
398
|
+
|
|
399
|
+
// Prepend folder structure prefix if configured
|
|
400
|
+
const folderStructure = pushConfig.folderStructure
|
|
401
|
+
? `${pushConfig.folderStructure}/${arelaPath}`
|
|
402
|
+
: arelaPath;
|
|
403
|
+
|
|
404
|
+
// Create a read stream with the original filename (no encoding needed)
|
|
405
|
+
const fileStream = fs.createReadStream(file.absolute_path);
|
|
406
|
+
form.append('files', fileStream, {
|
|
407
|
+
filename: file.file_name,
|
|
408
|
+
contentType: this.#getMimeType(file.file_extension),
|
|
409
|
+
});
|
|
410
|
+
|
|
411
|
+
// Add required fields for CLI upload
|
|
412
|
+
form.append('tableName', tableName);
|
|
413
|
+
form.append('fileId', file.id);
|
|
414
|
+
form.append('folderStructure', folderStructure);
|
|
415
|
+
form.append('rfc', file.rfc);
|
|
416
|
+
form.append('bucket', pushConfig.bucket);
|
|
417
|
+
form.append('autoDetect', 'true');
|
|
418
|
+
form.append('autoOrganize', 'false');
|
|
419
|
+
form.append('batchSize', '1');
|
|
420
|
+
form.append('clientVersion', appConfig.packageVersion);
|
|
421
|
+
|
|
422
|
+
// Upload file using new CLI upload endpoint
|
|
423
|
+
const response = await fetch(
|
|
424
|
+
`${uploadApiConfig.baseUrl}/api/storage/cli-upload`,
|
|
425
|
+
{
|
|
426
|
+
method: 'POST',
|
|
427
|
+
headers: {
|
|
428
|
+
'x-api-key': uploadApiConfig.token,
|
|
429
|
+
...form.getHeaders(),
|
|
430
|
+
},
|
|
431
|
+
body: form,
|
|
432
|
+
},
|
|
433
|
+
);
|
|
434
|
+
|
|
435
|
+
if (!response.ok) {
|
|
436
|
+
const errorText = await response.text();
|
|
437
|
+
result.uploadError = `HTTP ${response.status}: ${errorText}`;
|
|
438
|
+
logger.error(`✗ Failed: ${file.file_name} - ${result.uploadError}`);
|
|
439
|
+
return result;
|
|
440
|
+
}
|
|
441
|
+
|
|
442
|
+
const apiResult = await response.json();
|
|
443
|
+
|
|
444
|
+
// Check response from CLI upload endpoint
|
|
445
|
+
if (apiResult.uploaded && apiResult.uploaded.length > 0) {
|
|
446
|
+
const uploadedFile = apiResult.uploaded[0];
|
|
447
|
+
result.uploaded = true;
|
|
448
|
+
result.uploadedToStorageId = uploadedFile.storageId;
|
|
449
|
+
logger.info(`✓ Uploaded: ${file.file_name} → ${uploadPath}`);
|
|
450
|
+
} else if (apiResult.errors && apiResult.errors.length > 0) {
|
|
451
|
+
const error = apiResult.errors[0];
|
|
452
|
+
result.uploadError = `UPLOAD_FAILED: ${error.error || 'Upload failed'}`;
|
|
453
|
+
logger.error(`✗ Failed: ${file.file_name} - ${result.uploadError}`);
|
|
454
|
+
} else {
|
|
455
|
+
result.uploadError = 'Unknown upload error - no files uploaded';
|
|
456
|
+
logger.error(`✗ Failed: ${file.file_name} - ${result.uploadError}`);
|
|
457
|
+
}
|
|
458
|
+
} catch (error) {
|
|
459
|
+
result.uploadError = `UPLOAD_ERROR: ${error.message}`;
|
|
460
|
+
logger.error(`✗ Error uploading ${file.file_name}:`, error.message);
|
|
461
|
+
}
|
|
462
|
+
|
|
463
|
+
return result;
|
|
464
|
+
}
|
|
465
|
+
|
|
466
|
+
/**
|
|
467
|
+
* Get MIME type from file extension
|
|
468
|
+
* @private
|
|
469
|
+
*/
|
|
470
|
+
#getMimeType(extension) {
|
|
471
|
+
const mimeTypes = {
|
|
472
|
+
pdf: 'application/pdf',
|
|
473
|
+
doc: 'application/msword',
|
|
474
|
+
docx: 'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
|
|
475
|
+
xls: 'application/vnd.ms-excel',
|
|
476
|
+
xlsx: 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
|
|
477
|
+
txt: 'text/plain',
|
|
478
|
+
jpg: 'image/jpeg',
|
|
479
|
+
jpeg: 'image/jpeg',
|
|
480
|
+
png: 'image/png',
|
|
481
|
+
gif: 'image/gif',
|
|
482
|
+
xml: 'application/xml',
|
|
483
|
+
};
|
|
484
|
+
return mimeTypes[extension?.toLowerCase()] || 'application/octet-stream';
|
|
485
|
+
}
|
|
486
|
+
|
|
487
|
+
/**
|
|
488
|
+
* Upload a batch of files (legacy - kept for compatibility)
|
|
321
489
|
* @private
|
|
490
|
+
* @deprecated Use #uploadBatchViaCli instead
|
|
322
491
|
*/
|
|
323
492
|
async #uploadBatch(files, uploadApiConfig) {
|
|
324
493
|
const uploadPromises = files.map((file) =>
|
package/src/config/config.js
CHANGED
|
@@ -34,10 +34,10 @@ class Config {
|
|
|
34
34
|
const __dirname = path.dirname(__filename);
|
|
35
35
|
const packageJsonPath = path.resolve(__dirname, '../../package.json');
|
|
36
36
|
const packageJson = JSON.parse(fs.readFileSync(packageJsonPath, 'utf-8'));
|
|
37
|
-
return packageJson.version || '1.0.
|
|
37
|
+
return packageJson.version || '1.0.7';
|
|
38
38
|
} catch (error) {
|
|
39
39
|
console.warn('⚠️ Could not read package.json version, using fallback');
|
|
40
|
-
return '1.0.
|
|
40
|
+
return '1.0.7';
|
|
41
41
|
}
|
|
42
42
|
}
|
|
43
43
|
|
|
@@ -313,6 +313,11 @@ class Config {
|
|
|
313
313
|
.map((s) => parseInt(s.trim(), 10))
|
|
314
314
|
.filter((y) => !isNaN(y));
|
|
315
315
|
|
|
316
|
+
// Clean folder structure: remove leading/trailing slashes
|
|
317
|
+
const folderStructure = process.env.PUSH_FOLDER_STRUCTURE?.trim()
|
|
318
|
+
.replace(/^\/+/, '')
|
|
319
|
+
.replace(/\/+$/, '');
|
|
320
|
+
|
|
316
321
|
return {
|
|
317
322
|
rfcs: pushRfcs,
|
|
318
323
|
years: pushYears,
|
|
@@ -320,6 +325,7 @@ class Config {
|
|
|
320
325
|
uploadBatchSize: parseInt(process.env.PUSH_UPLOAD_BATCH_SIZE) || 10,
|
|
321
326
|
bucket:
|
|
322
327
|
process.env.PUSH_BUCKET || process.env.SUPABASE_BUCKET || 'archivos',
|
|
328
|
+
folderStructure: folderStructure || '', // Prefix for storage path (e.g., "documents" or "uploads/2024")
|
|
323
329
|
};
|
|
324
330
|
}
|
|
325
331
|
|
package/src/index.js
CHANGED
|
@@ -399,6 +399,10 @@ class ArelaUploaderCLI {
|
|
|
399
399
|
'--years <years>',
|
|
400
400
|
'Comma-separated years to filter (overrides PUSH_YEARS env var)',
|
|
401
401
|
)
|
|
402
|
+
.option(
|
|
403
|
+
'--folder-structure <path>',
|
|
404
|
+
'Storage path prefix (overrides PUSH_FOLDER_STRUCTURE env var)',
|
|
405
|
+
)
|
|
402
406
|
.option('--show-stats', 'Show performance statistics')
|
|
403
407
|
.action(async (options) => {
|
|
404
408
|
try {
|
|
@@ -189,7 +189,7 @@ export class PathNormalizer {
|
|
|
189
189
|
|
|
190
190
|
const basePrefix = 'scan_';
|
|
191
191
|
const prefix = `${basePrefix}${sanitizedCompany}_${sanitizedServer}_`;
|
|
192
|
-
|
|
192
|
+
|
|
193
193
|
// Check if even prefix + hash exceeds limit
|
|
194
194
|
if (prefix.length + hash.length > 63) {
|
|
195
195
|
// Company/server names are too long, need to truncate them too
|
|
@@ -197,15 +197,15 @@ export class PathNormalizer {
|
|
|
197
197
|
const halfLength = Math.floor(maxCompanyServerLength / 2);
|
|
198
198
|
const companyLength = halfLength;
|
|
199
199
|
const serverLength = maxCompanyServerLength - companyLength;
|
|
200
|
-
|
|
200
|
+
|
|
201
201
|
const truncatedCompany = sanitizedCompany.substring(0, companyLength);
|
|
202
202
|
const truncatedServer = sanitizedServer.substring(0, serverLength);
|
|
203
|
-
|
|
203
|
+
|
|
204
204
|
tableName = `${basePrefix}${truncatedCompany}_${truncatedServer}_${hash}`;
|
|
205
205
|
} else {
|
|
206
206
|
// Preserve start and end of path, put hash in middle
|
|
207
207
|
const availableSpace = 63 - prefix.length - hash.length - 2; // -2 for underscores around hash
|
|
208
|
-
|
|
208
|
+
|
|
209
209
|
if (availableSpace <= 0 || !sanitizedPath) {
|
|
210
210
|
// If no space for path or path is empty, just use hash
|
|
211
211
|
tableName = `${prefix}${hash}`;
|
|
@@ -217,11 +217,13 @@ export class PathNormalizer {
|
|
|
217
217
|
const halfSpace = Math.floor(availableSpace / 2);
|
|
218
218
|
const startLength = halfSpace;
|
|
219
219
|
const endLength = availableSpace - startLength;
|
|
220
|
-
|
|
220
|
+
|
|
221
221
|
// Extract start and end portions of the sanitized path
|
|
222
222
|
const pathStart = sanitizedPath.substring(0, startLength);
|
|
223
|
-
const pathEnd = sanitizedPath.substring(
|
|
224
|
-
|
|
223
|
+
const pathEnd = sanitizedPath.substring(
|
|
224
|
+
sanitizedPath.length - endLength,
|
|
225
|
+
);
|
|
226
|
+
|
|
225
227
|
// Build table name with start, hash, and end
|
|
226
228
|
tableName = `${prefix}${pathStart}_${hash}_${pathEnd}`;
|
|
227
229
|
}
|
|
@@ -539,7 +539,7 @@ describe('IdentifyCommand', () => {
|
|
|
539
539
|
expect.any(String),
|
|
540
540
|
expect.arrayContaining([
|
|
541
541
|
expect.objectContaining({
|
|
542
|
-
|
|
542
|
+
isPedimento: false, // Confirmed NOT a pedimento
|
|
543
543
|
}),
|
|
544
544
|
])
|
|
545
545
|
);
|
|
@@ -561,7 +561,7 @@ describe('IdentifyCommand', () => {
|
|
|
561
561
|
expect.any(String),
|
|
562
562
|
expect.arrayContaining([
|
|
563
563
|
expect.objectContaining({
|
|
564
|
-
|
|
564
|
+
isPedimento: null, // Unknown - might be pedimento but missing fields
|
|
565
565
|
}),
|
|
566
566
|
])
|
|
567
567
|
);
|
package/.env.local
DELETED
|
@@ -1,316 +0,0 @@
|
|
|
1
|
-
# ============================================
|
|
2
|
-
# ARELA UPLOADER CONFIGURATION
|
|
3
|
-
# ============================================
|
|
4
|
-
|
|
5
|
-
# Localhost Arela API Configuration
|
|
6
|
-
ARELA_API_URL=http://localhost:3010
|
|
7
|
-
ARELA_API_TOKEN=555f1d5c1b5020a132002a6fa201e0074e1b057895776bd33619db0cd26b259b
|
|
8
|
-
|
|
9
|
-
# Localhost Supabase Configuration
|
|
10
|
-
SUPABASE_URL=http://127.0.0.1:54321
|
|
11
|
-
SUPABASE_KEY=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJzdXBhYmFzZS1kZW1vIiwicm9sZSI6InNlcnZpY2Vfcm9sZSIsImV4cCI6MTk4MzgxMjk5Nn0.EGIM96RAZx35lJzdJsyH-qQwv8Hdp7fsn3W0YpN81IU
|
|
12
|
-
SUPABASE_BUCKET=arela
|
|
13
|
-
|
|
14
|
-
# ARELA_API_TARGET=default
|
|
15
|
-
|
|
16
|
-
# Localhost Upload Configuration
|
|
17
|
-
UPLOAD_BASE_PATH=./sample
|
|
18
|
-
UPLOAD_SOURCES=2023|2024
|
|
19
|
-
UPLOAD_RFCS=DTM090831LF0|AKS151005E46|IMS030409FZ0|RDG1107154L7|SHP031226BV2|CSM9301219B4|LIN960124HT8|LME971009SW4|AKM9707151B6|FEL000822AG2|FDM060802J54|MTM9807279B4|AUM9207011CA|MMJ0810145N1|ACC010328EQ6|PED781129JT6|CAD890407NK7|SME140411IK7|JME1903121C2|EIJ110429NF9|PTJ080414TM6|TME050503BM4
|
|
20
|
-
# UPLOAD_RFCS=FDM060802J54|CSM9301219B4
|
|
21
|
-
UPLOAD_YEARS=2023|2024|2025
|
|
22
|
-
|
|
23
|
-
# =============================================================================
|
|
24
|
-
# SCAN CONFIGURATION (for arela scan command)
|
|
25
|
-
# =============================================================================
|
|
26
|
-
|
|
27
|
-
# Company identifier for this CLI instance (required)
|
|
28
|
-
# Use a short, descriptive slug for your company/agency/client
|
|
29
|
-
# Examples: "acme_corp", "cliente_123", "agencia_xyz"
|
|
30
|
-
ARELA_COMPANY_SLUG=palco
|
|
31
|
-
|
|
32
|
-
# Server identifier (required)
|
|
33
|
-
# Use a unique ID for each server/NAS where arela-cli is installed
|
|
34
|
-
# Examples: "nas01", "server-mx", "storage-01"
|
|
35
|
-
ARELA_SERVER_ID=local
|
|
36
|
-
|
|
37
|
-
# Base path label (optional, auto-derived from UPLOAD_BASE_PATH if not set)
|
|
38
|
-
# Short label describing the base path being scanned
|
|
39
|
-
# Examples: "data", "documents", "archive"
|
|
40
|
-
ARELA_BASE_PATH_LABEL=
|
|
41
|
-
|
|
42
|
-
# System file patterns to exclude from scan (comma-separated)
|
|
43
|
-
# These files will be filtered before uploading stats to reduce payload
|
|
44
|
-
SCAN_EXCLUDE_PATTERNS=.DS_Store,Thumbs.db,desktop.ini,__pycache__,.pyc,.tmp,.swp,$RECYCLE.BIN,System Volume Information,~$*
|
|
45
|
-
|
|
46
|
-
# Batch size for scan operations (default: 2000 records per API call)
|
|
47
|
-
SCAN_BATCH_SIZE=2000
|
|
48
|
-
|
|
49
|
-
# Directory depth level for creating separate tables (default: 0)
|
|
50
|
-
# 0 = single table for entire base path
|
|
51
|
-
# 1 = one table per first-level subdirectory
|
|
52
|
-
# 2 = one table per second-level subdirectory, etc.
|
|
53
|
-
# Example: with level=1 and base=/data, creates tables for /data/folder1, /data/folder2, etc.
|
|
54
|
-
SCAN_DIRECTORY_LEVEL=0
|
|
55
|
-
|
|
56
|
-
# =============================================================================
|
|
57
|
-
# PUSH CONFIGURATION (for arela push command)
|
|
58
|
-
# =============================================================================
|
|
59
|
-
|
|
60
|
-
# Filter files to upload by RFC (pipe-separated, optional)
|
|
61
|
-
# If not set, all files with arela_path will be uploaded
|
|
62
|
-
# Examples: "RFC123456ABC|RFC789012DEF"
|
|
63
|
-
PUSH_RFCS=
|
|
64
|
-
|
|
65
|
-
# Filter files to upload by year (pipe-separated, optional)
|
|
66
|
-
# If not set, all files with arela_path will be uploaded
|
|
67
|
-
# Examples: "2023|2024|2025"
|
|
68
|
-
PUSH_YEARS=
|
|
69
|
-
|
|
70
|
-
# Batch size for fetching files from database (default: 100)
|
|
71
|
-
PUSH_BATCH_SIZE=100
|
|
72
|
-
|
|
73
|
-
# Concurrent upload batch size (default: 10)
|
|
74
|
-
# Number of files to upload simultaneously
|
|
75
|
-
PUSH_UPLOAD_BATCH_SIZE=10
|
|
76
|
-
|
|
77
|
-
# Storage bucket for uploaded files (optional, defaults to SUPABASE_BUCKET)
|
|
78
|
-
# Examples: "archivos", "documents", "storage"
|
|
79
|
-
PUSH_BUCKET=cli
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
# =============================================================================
|
|
83
|
-
# PERFORMANCE OPTIMIZATION FOR MULTIPLE API REPLICAS
|
|
84
|
-
# =============================================================================
|
|
85
|
-
|
|
86
|
-
# API Connection Configuration
|
|
87
|
-
# Set this to match your number of API replicas (e.g., if you have 10 API instances, set to 10)
|
|
88
|
-
MAX_API_CONNECTIONS=10
|
|
89
|
-
|
|
90
|
-
# API Connection Timeout (milliseconds)
|
|
91
|
-
API_CONNECTION_TIMEOUT=60000
|
|
92
|
-
|
|
93
|
-
# Batch Processing Configuration
|
|
94
|
-
# Files processed concurrently per batch (should be >= MAX_API_CONNECTIONS for best performance)
|
|
95
|
-
BATCH_SIZE=100
|
|
96
|
-
|
|
97
|
-
# Delay between batches (0 for maximum speed)
|
|
98
|
-
BATCH_DELAY=0
|
|
99
|
-
|
|
100
|
-
# Source Processing Concurrency
|
|
101
|
-
# Number of upload sources/folders to process simultaneously
|
|
102
|
-
MAX_CONCURRENT_SOURCES=2
|
|
103
|
-
|
|
104
|
-
# API Retry Configuration
|
|
105
|
-
# Maximum number of retry attempts for failed API requests
|
|
106
|
-
API_MAX_RETRIES=3
|
|
107
|
-
|
|
108
|
-
# Enable exponential backoff for retries (true/false)
|
|
109
|
-
# When true, retry delays increase: 1s, 2s, 4s, 8s, 16s
|
|
110
|
-
# When false, uses fixed delay (API_RETRY_DELAY)
|
|
111
|
-
API_RETRY_EXPONENTIAL_BACKOFF=true
|
|
112
|
-
|
|
113
|
-
# Fixed retry delay in milliseconds (only used if exponential backoff is disabled)
|
|
114
|
-
API_RETRY_DELAY=1000
|
|
115
|
-
|
|
116
|
-
# =============================================================================
|
|
117
|
-
# EXAMPLE CONFIGURATIONS FOR DIFFERENT SCENARIOS
|
|
118
|
-
# =============================================================================
|
|
119
|
-
|
|
120
|
-
# For 10 API Replicas (High Performance Setup):
|
|
121
|
-
# MAX_API_CONNECTIONS=10
|
|
122
|
-
# BATCH_SIZE=100
|
|
123
|
-
# MAX_CONCURRENT_SOURCES=3
|
|
124
|
-
# BATCH_DELAY=0
|
|
125
|
-
|
|
126
|
-
# For 5 API Replicas (Medium Performance Setup):
|
|
127
|
-
# MAX_API_CONNECTIONS=5
|
|
128
|
-
# BATCH_SIZE=50
|
|
129
|
-
# MAX_CONCURRENT_SOURCES=2
|
|
130
|
-
# BATCH_DELAY=0
|
|
131
|
-
|
|
132
|
-
# For 1 API Instance (Single Instance Setup):
|
|
133
|
-
# MAX_API_CONNECTIONS=5
|
|
134
|
-
# BATCH_SIZE=20
|
|
135
|
-
# MAX_CONCURRENT_SOURCES=1
|
|
136
|
-
# BATCH_DELAY=100
|
|
137
|
-
|
|
138
|
-
# =============================================================================
|
|
139
|
-
# LOGGING AND MONITORING
|
|
140
|
-
# =============================================================================
|
|
141
|
-
|
|
142
|
-
# Progress bar update frequency
|
|
143
|
-
PROGRESS_UPDATE_INTERVAL=10
|
|
144
|
-
|
|
145
|
-
# Enable verbose logging (true/false)
|
|
146
|
-
VERBOSE_LOGGING=false
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
# ============================================
|
|
173
|
-
|
|
174
|
-
# # Cloud Service Arela API Configuration
|
|
175
|
-
# # ARELA_API_URL=https://api.aws.arela.com.mx
|
|
176
|
-
# # ARELA_API_TOKEN=6bd75c5b3699ecf19e6726c10ae88ae0528f0b72d6c10f8b284f92563d3822a7
|
|
177
|
-
|
|
178
|
-
# # # Cloud Service Supabase Configuration
|
|
179
|
-
# SUPABASE_URL=https://qlospyfsbwvkskivmsgq.supabase.co
|
|
180
|
-
# SUPABASE_KEY=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJzdXBhYmFzZSIsInJlZiI6InFsb3NweWZzYnd2a3NraXZtc2dxIiwicm9sZSI6ImFub24iLCJpYXQiOjE3NTU5MDg2NjUsImV4cCI6MjA3MTQ4NDY2NX0.BrqcCLxTmpU6Swl7h3gam6TeW4jVf4WssMbRm0sH7l4
|
|
181
|
-
# SUPABASE_BUCKET=zips
|
|
182
|
-
|
|
183
|
-
# # # Cloud Service Upload Configuration
|
|
184
|
-
# UPLOAD_BASE_PATH=./sample
|
|
185
|
-
# UPLOAD_SOURCES=zips
|
|
186
|
-
# # UPLOAD_RFCS=AKS151005E46|IMS030409FZ0|RDG1107154L7|SHP031226BV2|CSM9301219B4|LIN960124HT8|LME971009SW4|AKM9707151B6|FEL000822AG2|FDM060802J54|MTM9807279B4|AUM9207011CA|MMJ0810145N1|ACC010328EQ6|PED781129JT6|CAD890407NK7|SME140411IK7|JME1903121C2|EIJ110429NF9|PTJ080414TM6|TME050503BM4
|
|
187
|
-
# UPLOAD_RFCS=KTJ931117P55|AUM9207011CA
|
|
188
|
-
# UPLOAD_YEARS=2023|
|
|
189
|
-
|
|
190
|
-
# # =============================================================================
|
|
191
|
-
# # PERFORMANCE OPTIMIZATION FOR MULTIPLE API REPLICAS
|
|
192
|
-
# # =============================================================================
|
|
193
|
-
|
|
194
|
-
# # API Connection Configuration
|
|
195
|
-
# # Set this to match your number of API replicas (e.g., if you have 10 API instances, set to 10)
|
|
196
|
-
# MAX_API_CONNECTIONS=10
|
|
197
|
-
|
|
198
|
-
# # API Connection Timeout (milliseconds)
|
|
199
|
-
# API_CONNECTION_TIMEOUT=60000
|
|
200
|
-
|
|
201
|
-
# # Batch Processing Configuration
|
|
202
|
-
# # Files processed concurrently per batch (should be >= MAX_API_CONNECTIONS for best performance)
|
|
203
|
-
# BATCH_SIZE=100
|
|
204
|
-
|
|
205
|
-
# # Delay between batches (0 for maximum speed)
|
|
206
|
-
# BATCH_DELAY=0
|
|
207
|
-
|
|
208
|
-
# # Source Processing Concurrency
|
|
209
|
-
# # Number of upload sources/folders to process simultaneously
|
|
210
|
-
# MAX_CONCURRENT_SOURCES=2
|
|
211
|
-
|
|
212
|
-
# # =============================================================================
|
|
213
|
-
# # EXAMPLE CONFIGURATIONS FOR DIFFERENT SCENARIOS
|
|
214
|
-
# # =============================================================================
|
|
215
|
-
|
|
216
|
-
# # For 10 API Replicas (High Performance Setup):
|
|
217
|
-
# # MAX_API_CONNECTIONS=10
|
|
218
|
-
# # BATCH_SIZE=100
|
|
219
|
-
# # MAX_CONCURRENT_SOURCES=3
|
|
220
|
-
# # BATCH_DELAY=0
|
|
221
|
-
|
|
222
|
-
# # For 5 API Replicas (Medium Performance Setup):
|
|
223
|
-
# # MAX_API_CONNECTIONS=5
|
|
224
|
-
# # BATCH_SIZE=50
|
|
225
|
-
# # MAX_CONCURRENT_SOURCES=2
|
|
226
|
-
# # BATCH_DELAY=0
|
|
227
|
-
|
|
228
|
-
# # For 1 API Instance (Single Instance Setup):
|
|
229
|
-
# # MAX_API_CONNECTIONS=5
|
|
230
|
-
# # BATCH_SIZE=20
|
|
231
|
-
# # MAX_CONCURRENT_SOURCES=1
|
|
232
|
-
# # BATCH_DELAY=100
|
|
233
|
-
|
|
234
|
-
# # =============================================================================
|
|
235
|
-
# # LOGGING AND MONITORING
|
|
236
|
-
# # =============================================================================
|
|
237
|
-
|
|
238
|
-
# # Progress bar update frequency
|
|
239
|
-
# PROGRESS_UPDATE_INTERVAL=10
|
|
240
|
-
|
|
241
|
-
# # Enable verbose logging (true/false)
|
|
242
|
-
# VERBOSE_LOGGING=false
|
|
243
|
-
|
|
244
|
-
# # ============================================
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
# =============================================================================
|
|
249
|
-
# WATCH MODE CONFIGURATION
|
|
250
|
-
# =============================================================================
|
|
251
|
-
|
|
252
|
-
# Habilitar watch mode (true/false)
|
|
253
|
-
WATCH_ENABLED=true
|
|
254
|
-
|
|
255
|
-
# Configuración de directorios a observar (formato JSON)
|
|
256
|
-
# Cada directorio puede tener su propia folderStructure para organizar en el bucket
|
|
257
|
-
# Formato: {"ruta/directorio1":"estructura-1","ruta/directorio2":"estructura-2"}
|
|
258
|
-
WATCH_DIRECTORY_CONFIGS={"./sample/watcher":"prueba-watcher"}
|
|
259
|
-
|
|
260
|
-
# Estrategia de upload (opciones: individual|batch|full-structure)
|
|
261
|
-
# - individual: Sube solo el archivo modificado más reciente
|
|
262
|
-
# - batch: Sube un lote de N archivos recientes
|
|
263
|
-
# - full-structure: Sube la estructura completa de carpetas
|
|
264
|
-
WATCH_STRATEGY=batch
|
|
265
|
-
|
|
266
|
-
# Debouncing en milisegundos (esperar entre eventos antes de procesar)
|
|
267
|
-
WATCH_DEBOUNCE_MS=1000
|
|
268
|
-
|
|
269
|
-
# Tamaño de batch para strategy batch
|
|
270
|
-
WATCH_BATCH_SIZE=10
|
|
271
|
-
|
|
272
|
-
# Usar polling en lugar de eventos nativos del filesystem
|
|
273
|
-
# Útil para sistemas de archivos remotos o NFS
|
|
274
|
-
WATCH_USE_POLLING=false
|
|
275
|
-
|
|
276
|
-
# Interval de polling en milisegundos (solo si WATCH_USE_POLLING=true)
|
|
277
|
-
WATCH_POLL_INTERVAL=100
|
|
278
|
-
|
|
279
|
-
# Umbral de estabilidad en ms (esperar a que el archivo deje de cambiar)
|
|
280
|
-
WATCH_STABILITY_THRESHOLD=300
|
|
281
|
-
|
|
282
|
-
# Patrones a ignorar (separados por coma, se usan como regex)
|
|
283
|
-
WATCH_IGNORE_PATTERNS=.tmp,.bak,*.swp
|
|
284
|
-
|
|
285
|
-
# Detección automática de tipos de documento
|
|
286
|
-
WATCH_AUTO_DETECT=true
|
|
287
|
-
|
|
288
|
-
# Organización automática de archivos
|
|
289
|
-
WATCH_AUTO_ORGANIZE=true
|
|
290
|
-
|
|
291
|
-
# =============================================================================
|
|
292
|
-
# WATCH MODE - AUTOMATIC PROCESSING PIPELINE
|
|
293
|
-
# =============================================================================
|
|
294
|
-
#
|
|
295
|
-
# El pipeline automático ejecuta la siguiente secuencia cuando se detecta un archivo nuevo:
|
|
296
|
-
# 1. Stats Collection → stats --stats-only (recopila información del archivo)
|
|
297
|
-
# 2. PDF Detection → detect --detect-pdfs (identifica pedimentos simplificados)
|
|
298
|
-
# 3. Path Propagation → detect --propagate-arela-path (propaga a documentos relacionados)
|
|
299
|
-
# 4. RFC Upload → upload --upload-by-rfc --folder-structure (sube con estructura)
|
|
300
|
-
#
|
|
301
|
-
# El pipeline se habilita automáticamente en watch mode y usa la folderStructure
|
|
302
|
-
# definida para cada WATCH_DIRECTORY_CONFIGS
|
|
303
|
-
#
|
|
304
|
-
# Para deshabilitar en CLI, usa: arela watch --no-auto-processing
|
|
305
|
-
|
|
306
|
-
# =============================================================================
|
|
307
|
-
# LOGGING AND MONITORING
|
|
308
|
-
# =============================================================================
|
|
309
|
-
|
|
310
|
-
# Progress bar update frequency
|
|
311
|
-
PROGRESS_UPDATE_INTERVAL=10
|
|
312
|
-
|
|
313
|
-
# Enable verbose logging (true/false)
|
|
314
|
-
VERBOSE_LOGGING=false
|
|
315
|
-
|
|
316
|
-
# ============================================
|