@arela/uploader 1.0.6 → 1.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/.env.template CHANGED
@@ -77,6 +77,11 @@ PUSH_UPLOAD_BATCH_SIZE=10
77
77
  # Examples: "archivos", "documents", "storage"
78
78
  PUSH_BUCKET=arela
79
79
 
80
+ # Folder structure prefix for uploaded files (optional)
81
+ # This prefix is prepended to the arela_path when uploading files
82
+ # Examples: "agencia/cliente", "2024/docs", "imports/batch1"
83
+ PUSH_FOLDER_STRUCTURE=
84
+
80
85
  # =============================================================================
81
86
  # PERFORMANCE OPTIMIZATION FOR MULTIPLE API REPLICAS
82
87
  # =============================================================================
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@arela/uploader",
3
- "version": "1.0.6",
3
+ "version": "1.0.7",
4
4
  "description": "CLI to upload files/directories to Arela",
5
5
  "bin": {
6
6
  "arela": "./src/index.js"
@@ -273,7 +273,7 @@ export class IdentifyCommand {
273
273
  arelaPath: null,
274
274
  detectionError:
275
275
  'FILE_NOT_FOUND: File does not exist on filesystem. May have been moved or deleted after scan.',
276
- isNotPedimento: false,
276
+ isPedimento: null, // Unknown - can't determine
277
277
  };
278
278
  }
279
279
 
@@ -289,7 +289,7 @@ export class IdentifyCommand {
289
289
  rfc: null,
290
290
  arelaPath: null,
291
291
  detectionError: `FILE_TOO_LARGE: File size ${(stats.size / 1024 / 1024).toFixed(2)}MB exceeds ${maxSizeBytes / 1024 / 1024}MB limit.`,
292
- isNotPedimento: false,
292
+ isPedimento: null, // Unknown - can't determine
293
293
  };
294
294
  }
295
295
 
@@ -306,19 +306,19 @@ export class IdentifyCommand {
306
306
  rfc: result.rfc,
307
307
  arelaPath: result.arelaPath,
308
308
  detectionError: result.error,
309
- isNotPedimento: false,
309
+ isPedimento: true, // Confirmed pedimento
310
310
  };
311
311
  }
312
312
 
313
313
  // If no detection, determine if it's definitely not a pedimento
314
314
  // This helps avoid re-processing files we know aren't pedimentos
315
- const isNotPedimento = this.#isDefinitelyNotPedimento(result, file);
315
+ const isDefinitelyNotPedimento = this.#isDefinitelyNotPedimento(result, file);
316
316
 
317
317
  // Build descriptive error message
318
318
  let detectionError = null;
319
319
  if (result.error) {
320
320
  detectionError = `DETECTION_ERROR: ${result.error}`;
321
- } else if (isNotPedimento) {
321
+ } else if (isDefinitelyNotPedimento) {
322
322
  detectionError =
323
323
  'NOT_PEDIMENTO: File does not match pedimento-simplificado pattern. Missing key markers: "FORMA SIMPLIFICADA DE PEDIMENTO".';
324
324
  } else {
@@ -340,7 +340,7 @@ export class IdentifyCommand {
340
340
  rfc: result.rfc,
341
341
  arelaPath: result.arelaPath,
342
342
  detectionError,
343
- isNotPedimento,
343
+ isPedimento: isDefinitelyNotPedimento ? false : null, // false = not pedimento, null = unknown
344
344
  };
345
345
  } catch (error) {
346
346
  logger.warn(
@@ -367,7 +367,7 @@ export class IdentifyCommand {
367
367
  rfc: null,
368
368
  arelaPath: null,
369
369
  detectionError: `${errorCategory}: ${error.message}`,
370
- isNotPedimento: false,
370
+ isPedimento: null, // Unknown - error occurred
371
371
  };
372
372
  }
373
373
  }),
@@ -39,6 +39,15 @@ export class PushCommand {
39
39
  const pushConfig = appConfig.getPushConfig();
40
40
  const tableName = scanConfig.tableName;
41
41
 
42
+ // Override folderStructure from command option if provided
43
+ if (options.folderStructure) {
44
+ // Clean the folder structure: remove leading/trailing slashes
45
+ pushConfig.folderStructure = options.folderStructure
46
+ .trim()
47
+ .replace(/^\/+/, '')
48
+ .replace(/\/+$/, '');
49
+ }
50
+
42
51
  // Set API target for scan/push operations
43
52
  const scanApiTarget = options.api || options.scanApi || 'default';
44
53
  const pushApiTarget = options.pushApi || scanApiTarget;
@@ -57,6 +66,9 @@ export class PushCommand {
57
66
  );
58
67
  console.log(`📦 Fetch Batch Size: ${options.batchSize}`);
59
68
  console.log(`📤 Upload Batch Size: ${options.uploadBatchSize}`);
69
+ if (pushConfig.folderStructure) {
70
+ console.log(`📁 Folder Structure Prefix: ${pushConfig.folderStructure}`);
71
+ }
60
72
 
61
73
  // Apply filters
62
74
  const filters = {
@@ -377,23 +389,29 @@ export class PushCommand {
377
389
  // Create form data for CLI upload endpoint
378
390
  const form = new FormData();
379
391
 
380
- // Encode fileId and folderStructure in the filename
381
- // Format: [fileId][folderStructure]filename
382
- const folderStructure = file.arela_path.endsWith('/')
392
+ // Build folder structure: optionally prefix with PUSH_FOLDER_STRUCTURE env var
393
+ // arela_path already contains the logical path (RFC/Year/Patente/Aduana/Pedimento)
394
+ // folderStructure from config is the bucket prefix (e.g., "documents" or "uploads/2024")
395
+ let arelaPath = file.arela_path.endsWith('/')
383
396
  ? file.arela_path.slice(0, -1)
384
397
  : file.arela_path;
385
398
 
386
- const encodedFilename = `[${file.id}][${folderStructure}]${file.file_name}`;
399
+ // Prepend folder structure prefix if configured
400
+ const folderStructure = pushConfig.folderStructure
401
+ ? `${pushConfig.folderStructure}/${arelaPath}`
402
+ : arelaPath;
387
403
 
388
- // Create a read stream with the encoded filename
404
+ // Create a read stream with the original filename (no encoding needed)
389
405
  const fileStream = fs.createReadStream(file.absolute_path);
390
406
  form.append('files', fileStream, {
391
- filename: encodedFilename,
407
+ filename: file.file_name,
392
408
  contentType: this.#getMimeType(file.file_extension),
393
409
  });
394
410
 
395
411
  // Add required fields for CLI upload
396
412
  form.append('tableName', tableName);
413
+ form.append('fileId', file.id);
414
+ form.append('folderStructure', folderStructure);
397
415
  form.append('rfc', file.rfc);
398
416
  form.append('bucket', pushConfig.bucket);
399
417
  form.append('autoDetect', 'true');
@@ -34,10 +34,10 @@ class Config {
34
34
  const __dirname = path.dirname(__filename);
35
35
  const packageJsonPath = path.resolve(__dirname, '../../package.json');
36
36
  const packageJson = JSON.parse(fs.readFileSync(packageJsonPath, 'utf-8'));
37
- return packageJson.version || '1.0.6';
37
+ return packageJson.version || '1.0.7';
38
38
  } catch (error) {
39
39
  console.warn('⚠️ Could not read package.json version, using fallback');
40
- return '1.0.6';
40
+ return '1.0.7';
41
41
  }
42
42
  }
43
43
 
@@ -313,6 +313,11 @@ class Config {
313
313
  .map((s) => parseInt(s.trim(), 10))
314
314
  .filter((y) => !isNaN(y));
315
315
 
316
+ // Clean folder structure: remove leading/trailing slashes
317
+ const folderStructure = process.env.PUSH_FOLDER_STRUCTURE?.trim()
318
+ .replace(/^\/+/, '')
319
+ .replace(/\/+$/, '');
320
+
316
321
  return {
317
322
  rfcs: pushRfcs,
318
323
  years: pushYears,
@@ -320,6 +325,7 @@ class Config {
320
325
  uploadBatchSize: parseInt(process.env.PUSH_UPLOAD_BATCH_SIZE) || 10,
321
326
  bucket:
322
327
  process.env.PUSH_BUCKET || process.env.SUPABASE_BUCKET || 'archivos',
328
+ folderStructure: folderStructure || '', // Prefix for storage path (e.g., "documents" or "uploads/2024")
323
329
  };
324
330
  }
325
331
 
package/src/index.js CHANGED
@@ -399,6 +399,10 @@ class ArelaUploaderCLI {
399
399
  '--years <years>',
400
400
  'Comma-separated years to filter (overrides PUSH_YEARS env var)',
401
401
  )
402
+ .option(
403
+ '--folder-structure <path>',
404
+ 'Storage path prefix (overrides PUSH_FOLDER_STRUCTURE env var)',
405
+ )
402
406
  .option('--show-stats', 'Show performance statistics')
403
407
  .action(async (options) => {
404
408
  try {
@@ -539,7 +539,7 @@ describe('IdentifyCommand', () => {
539
539
  expect.any(String),
540
540
  expect.arrayContaining([
541
541
  expect.objectContaining({
542
- isNotPedimento: true,
542
+ isPedimento: false, // Confirmed NOT a pedimento
543
543
  }),
544
544
  ])
545
545
  );
@@ -561,7 +561,7 @@ describe('IdentifyCommand', () => {
561
561
  expect.any(String),
562
562
  expect.arrayContaining([
563
563
  expect.objectContaining({
564
- isNotPedimento: false,
564
+ isPedimento: null, // Unknown - might be pedimento but missing fields
565
565
  }),
566
566
  ])
567
567
  );
package/.env.local DELETED
@@ -1,316 +0,0 @@
1
- # ============================================
2
- # ARELA UPLOADER CONFIGURATION
3
- # ============================================
4
-
5
- # Localhost Arela API Configuration
6
- ARELA_API_URL=http://localhost:3010
7
- ARELA_API_TOKEN=555f1d5c1b5020a132002a6fa201e0074e1b057895776bd33619db0cd26b259b
8
-
9
- # Localhost Supabase Configuration
10
- SUPABASE_URL=http://127.0.0.1:54321
11
- SUPABASE_KEY=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJzdXBhYmFzZS1kZW1vIiwicm9sZSI6InNlcnZpY2Vfcm9sZSIsImV4cCI6MTk4MzgxMjk5Nn0.EGIM96RAZx35lJzdJsyH-qQwv8Hdp7fsn3W0YpN81IU
12
- SUPABASE_BUCKET=arela
13
-
14
- # ARELA_API_TARGET=default
15
-
16
- # Localhost Upload Configuration
17
- UPLOAD_BASE_PATH=./sample
18
- UPLOAD_SOURCES=2023|2024
19
- UPLOAD_RFCS=DTM090831LF0|AKS151005E46|IMS030409FZ0|RDG1107154L7|SHP031226BV2|CSM9301219B4|LIN960124HT8|LME971009SW4|AKM9707151B6|FEL000822AG2|FDM060802J54|MTM9807279B4|AUM9207011CA|MMJ0810145N1|ACC010328EQ6|PED781129JT6|CAD890407NK7|SME140411IK7|JME1903121C2|EIJ110429NF9|PTJ080414TM6|TME050503BM4
20
- # UPLOAD_RFCS=FDM060802J54|CSM9301219B4
21
- UPLOAD_YEARS=2023|2024|2025
22
-
23
- # =============================================================================
24
- # SCAN CONFIGURATION (for arela scan command)
25
- # =============================================================================
26
-
27
- # Company identifier for this CLI instance (required)
28
- # Use a short, descriptive slug for your company/agency/client
29
- # Examples: "acme_corp", "cliente_123", "agencia_xyz"
30
- ARELA_COMPANY_SLUG=palco
31
-
32
- # Server identifier (required)
33
- # Use a unique ID for each server/NAS where arela-cli is installed
34
- # Examples: "nas01", "server-mx", "storage-01"
35
- ARELA_SERVER_ID=local
36
-
37
- # Base path label (optional, auto-derived from UPLOAD_BASE_PATH if not set)
38
- # Short label describing the base path being scanned
39
- # Examples: "data", "documents", "archive"
40
- ARELA_BASE_PATH_LABEL=
41
-
42
- # System file patterns to exclude from scan (comma-separated)
43
- # These files will be filtered before uploading stats to reduce payload
44
- SCAN_EXCLUDE_PATTERNS=.DS_Store,Thumbs.db,desktop.ini,__pycache__,.pyc,.tmp,.swp,$RECYCLE.BIN,System Volume Information,~$*
45
-
46
- # Batch size for scan operations (default: 2000 records per API call)
47
- SCAN_BATCH_SIZE=2000
48
-
49
- # Directory depth level for creating separate tables (default: 0)
50
- # 0 = single table for entire base path
51
- # 1 = one table per first-level subdirectory
52
- # 2 = one table per second-level subdirectory, etc.
53
- # Example: with level=1 and base=/data, creates tables for /data/folder1, /data/folder2, etc.
54
- SCAN_DIRECTORY_LEVEL=0
55
-
56
- # =============================================================================
57
- # PUSH CONFIGURATION (for arela push command)
58
- # =============================================================================
59
-
60
- # Filter files to upload by RFC (pipe-separated, optional)
61
- # If not set, all files with arela_path will be uploaded
62
- # Examples: "RFC123456ABC|RFC789012DEF"
63
- PUSH_RFCS=
64
-
65
- # Filter files to upload by year (pipe-separated, optional)
66
- # If not set, all files with arela_path will be uploaded
67
- # Examples: "2023|2024|2025"
68
- PUSH_YEARS=
69
-
70
- # Batch size for fetching files from database (default: 100)
71
- PUSH_BATCH_SIZE=100
72
-
73
- # Concurrent upload batch size (default: 10)
74
- # Number of files to upload simultaneously
75
- PUSH_UPLOAD_BATCH_SIZE=10
76
-
77
- # Storage bucket for uploaded files (optional, defaults to SUPABASE_BUCKET)
78
- # Examples: "archivos", "documents", "storage"
79
- PUSH_BUCKET=cli
80
-
81
-
82
- # =============================================================================
83
- # PERFORMANCE OPTIMIZATION FOR MULTIPLE API REPLICAS
84
- # =============================================================================
85
-
86
- # API Connection Configuration
87
- # Set this to match your number of API replicas (e.g., if you have 10 API instances, set to 10)
88
- MAX_API_CONNECTIONS=10
89
-
90
- # API Connection Timeout (milliseconds)
91
- API_CONNECTION_TIMEOUT=60000
92
-
93
- # Batch Processing Configuration
94
- # Files processed concurrently per batch (should be >= MAX_API_CONNECTIONS for best performance)
95
- BATCH_SIZE=100
96
-
97
- # Delay between batches (0 for maximum speed)
98
- BATCH_DELAY=0
99
-
100
- # Source Processing Concurrency
101
- # Number of upload sources/folders to process simultaneously
102
- MAX_CONCURRENT_SOURCES=2
103
-
104
- # API Retry Configuration
105
- # Maximum number of retry attempts for failed API requests
106
- API_MAX_RETRIES=3
107
-
108
- # Enable exponential backoff for retries (true/false)
109
- # When true, retry delays increase: 1s, 2s, 4s, 8s, 16s
110
- # When false, uses fixed delay (API_RETRY_DELAY)
111
- API_RETRY_EXPONENTIAL_BACKOFF=true
112
-
113
- # Fixed retry delay in milliseconds (only used if exponential backoff is disabled)
114
- API_RETRY_DELAY=1000
115
-
116
- # =============================================================================
117
- # EXAMPLE CONFIGURATIONS FOR DIFFERENT SCENARIOS
118
- # =============================================================================
119
-
120
- # For 10 API Replicas (High Performance Setup):
121
- # MAX_API_CONNECTIONS=10
122
- # BATCH_SIZE=100
123
- # MAX_CONCURRENT_SOURCES=3
124
- # BATCH_DELAY=0
125
-
126
- # For 5 API Replicas (Medium Performance Setup):
127
- # MAX_API_CONNECTIONS=5
128
- # BATCH_SIZE=50
129
- # MAX_CONCURRENT_SOURCES=2
130
- # BATCH_DELAY=0
131
-
132
- # For 1 API Instance (Single Instance Setup):
133
- # MAX_API_CONNECTIONS=5
134
- # BATCH_SIZE=20
135
- # MAX_CONCURRENT_SOURCES=1
136
- # BATCH_DELAY=100
137
-
138
- # =============================================================================
139
- # LOGGING AND MONITORING
140
- # =============================================================================
141
-
142
- # Progress bar update frequency
143
- PROGRESS_UPDATE_INTERVAL=10
144
-
145
- # Enable verbose logging (true/false)
146
- VERBOSE_LOGGING=false
147
-
148
-
149
-
150
-
151
-
152
-
153
-
154
-
155
-
156
-
157
-
158
-
159
-
160
-
161
-
162
-
163
-
164
-
165
-
166
-
167
-
168
-
169
-
170
-
171
-
172
- # ============================================
173
-
174
- # # Cloud Service Arela API Configuration
175
- # # ARELA_API_URL=https://api.aws.arela.com.mx
176
- # # ARELA_API_TOKEN=6bd75c5b3699ecf19e6726c10ae88ae0528f0b72d6c10f8b284f92563d3822a7
177
-
178
- # # # Cloud Service Supabase Configuration
179
- # SUPABASE_URL=https://qlospyfsbwvkskivmsgq.supabase.co
180
- # SUPABASE_KEY=eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJpc3MiOiJzdXBhYmFzZSIsInJlZiI6InFsb3NweWZzYnd2a3NraXZtc2dxIiwicm9sZSI6ImFub24iLCJpYXQiOjE3NTU5MDg2NjUsImV4cCI6MjA3MTQ4NDY2NX0.BrqcCLxTmpU6Swl7h3gam6TeW4jVf4WssMbRm0sH7l4
181
- # SUPABASE_BUCKET=zips
182
-
183
- # # # Cloud Service Upload Configuration
184
- # UPLOAD_BASE_PATH=./sample
185
- # UPLOAD_SOURCES=zips
186
- # # UPLOAD_RFCS=AKS151005E46|IMS030409FZ0|RDG1107154L7|SHP031226BV2|CSM9301219B4|LIN960124HT8|LME971009SW4|AKM9707151B6|FEL000822AG2|FDM060802J54|MTM9807279B4|AUM9207011CA|MMJ0810145N1|ACC010328EQ6|PED781129JT6|CAD890407NK7|SME140411IK7|JME1903121C2|EIJ110429NF9|PTJ080414TM6|TME050503BM4
187
- # UPLOAD_RFCS=KTJ931117P55|AUM9207011CA
188
- # UPLOAD_YEARS=2023|
189
-
190
- # # =============================================================================
191
- # # PERFORMANCE OPTIMIZATION FOR MULTIPLE API REPLICAS
192
- # # =============================================================================
193
-
194
- # # API Connection Configuration
195
- # # Set this to match your number of API replicas (e.g., if you have 10 API instances, set to 10)
196
- # MAX_API_CONNECTIONS=10
197
-
198
- # # API Connection Timeout (milliseconds)
199
- # API_CONNECTION_TIMEOUT=60000
200
-
201
- # # Batch Processing Configuration
202
- # # Files processed concurrently per batch (should be >= MAX_API_CONNECTIONS for best performance)
203
- # BATCH_SIZE=100
204
-
205
- # # Delay between batches (0 for maximum speed)
206
- # BATCH_DELAY=0
207
-
208
- # # Source Processing Concurrency
209
- # # Number of upload sources/folders to process simultaneously
210
- # MAX_CONCURRENT_SOURCES=2
211
-
212
- # # =============================================================================
213
- # # EXAMPLE CONFIGURATIONS FOR DIFFERENT SCENARIOS
214
- # # =============================================================================
215
-
216
- # # For 10 API Replicas (High Performance Setup):
217
- # # MAX_API_CONNECTIONS=10
218
- # # BATCH_SIZE=100
219
- # # MAX_CONCURRENT_SOURCES=3
220
- # # BATCH_DELAY=0
221
-
222
- # # For 5 API Replicas (Medium Performance Setup):
223
- # # MAX_API_CONNECTIONS=5
224
- # # BATCH_SIZE=50
225
- # # MAX_CONCURRENT_SOURCES=2
226
- # # BATCH_DELAY=0
227
-
228
- # # For 1 API Instance (Single Instance Setup):
229
- # # MAX_API_CONNECTIONS=5
230
- # # BATCH_SIZE=20
231
- # # MAX_CONCURRENT_SOURCES=1
232
- # # BATCH_DELAY=100
233
-
234
- # # =============================================================================
235
- # # LOGGING AND MONITORING
236
- # # =============================================================================
237
-
238
- # # Progress bar update frequency
239
- # PROGRESS_UPDATE_INTERVAL=10
240
-
241
- # # Enable verbose logging (true/false)
242
- # VERBOSE_LOGGING=false
243
-
244
- # # ============================================
245
-
246
-
247
-
248
- # =============================================================================
249
- # WATCH MODE CONFIGURATION
250
- # =============================================================================
251
-
252
- # Habilitar watch mode (true/false)
253
- WATCH_ENABLED=true
254
-
255
- # Configuración de directorios a observar (formato JSON)
256
- # Cada directorio puede tener su propia folderStructure para organizar en el bucket
257
- # Formato: {"ruta/directorio1":"estructura-1","ruta/directorio2":"estructura-2"}
258
- WATCH_DIRECTORY_CONFIGS={"./sample/watcher":"prueba-watcher"}
259
-
260
- # Estrategia de upload (opciones: individual|batch|full-structure)
261
- # - individual: Sube solo el archivo modificado más reciente
262
- # - batch: Sube un lote de N archivos recientes
263
- # - full-structure: Sube la estructura completa de carpetas
264
- WATCH_STRATEGY=batch
265
-
266
- # Debouncing en milisegundos (esperar entre eventos antes de procesar)
267
- WATCH_DEBOUNCE_MS=1000
268
-
269
- # Tamaño de batch para strategy batch
270
- WATCH_BATCH_SIZE=10
271
-
272
- # Usar polling en lugar de eventos nativos del filesystem
273
- # Útil para sistemas de archivos remotos o NFS
274
- WATCH_USE_POLLING=false
275
-
276
- # Interval de polling en milisegundos (solo si WATCH_USE_POLLING=true)
277
- WATCH_POLL_INTERVAL=100
278
-
279
- # Umbral de estabilidad en ms (esperar a que el archivo deje de cambiar)
280
- WATCH_STABILITY_THRESHOLD=300
281
-
282
- # Patrones a ignorar (separados por coma, se usan como regex)
283
- WATCH_IGNORE_PATTERNS=.tmp,.bak,*.swp
284
-
285
- # Detección automática de tipos de documento
286
- WATCH_AUTO_DETECT=true
287
-
288
- # Organización automática de archivos
289
- WATCH_AUTO_ORGANIZE=true
290
-
291
- # =============================================================================
292
- # WATCH MODE - AUTOMATIC PROCESSING PIPELINE
293
- # =============================================================================
294
- #
295
- # El pipeline automático ejecuta la siguiente secuencia cuando se detecta un archivo nuevo:
296
- # 1. Stats Collection → stats --stats-only (recopila información del archivo)
297
- # 2. PDF Detection → detect --detect-pdfs (identifica pedimentos simplificados)
298
- # 3. Path Propagation → detect --propagate-arela-path (propaga a documentos relacionados)
299
- # 4. RFC Upload → upload --upload-by-rfc --folder-structure (sube con estructura)
300
- #
301
- # El pipeline se habilita automáticamente en watch mode y usa la folderStructure
302
- # definida para cada WATCH_DIRECTORY_CONFIGS
303
- #
304
- # Para deshabilitar en CLI, usa: arela watch --no-auto-processing
305
-
306
- # =============================================================================
307
- # LOGGING AND MONITORING
308
- # =============================================================================
309
-
310
- # Progress bar update frequency
311
- PROGRESS_UPDATE_INTERVAL=10
312
-
313
- # Enable verbose logging (true/false)
314
- VERBOSE_LOGGING=false
315
-
316
- # ============================================