@arela/uploader 1.0.13 → 1.0.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@arela/uploader",
3
- "version": "1.0.13",
3
+ "version": "1.0.15",
4
4
  "description": "CLI to upload files/directories to Arela",
5
5
  "bin": {
6
6
  "arela": "./src/index.js"
@@ -83,6 +83,7 @@ export class IdentifyCommand {
83
83
  let totalStats = {
84
84
  processed: 0,
85
85
  detected: 0,
86
+ proformas: 0,
86
87
  errors: 0,
87
88
  pending: 0,
88
89
  };
@@ -99,6 +100,7 @@ export class IdentifyCommand {
99
100
 
100
101
  totalStats.processed += stats.processed;
101
102
  totalStats.detected += stats.detected;
103
+ totalStats.proformas += stats.proformas;
102
104
  totalStats.errors += stats.errors;
103
105
  }
104
106
 
@@ -110,7 +112,8 @@ export class IdentifyCommand {
110
112
  logger.info(`\n📊 Total Results:`);
111
113
  logger.info(` Tables Processed: ${tables.length}`);
112
114
  logger.info(` Files Processed: ${totalStats.processed}`);
113
- logger.info(` Pedimentos Detected: ${totalStats.detected}`);
115
+ logger.info(` Pedimentos Detected (paid): ${totalStats.detected}`);
116
+ logger.info(` Proformas Detected (unpaid): ${totalStats.proformas}`);
114
117
  logger.info(` Errors: ${totalStats.errors}`);
115
118
  logger.info(` Duration: ${duration}s`);
116
119
  logger.info(` Speed: ${avgSpeed} files/sec`);
@@ -188,6 +191,7 @@ export class IdentifyCommand {
188
191
 
189
192
  let processedCount = 0;
190
193
  let detectedCount = 0;
194
+ let proformaCount = 0;
191
195
  let errorCount = 0;
192
196
  let hasMore = true;
193
197
  let batchNumber = 0;
@@ -223,6 +227,9 @@ export class IdentifyCommand {
223
227
  detectedCount += detectionResults.filter(
224
228
  (r) => r.detectedType === 'pedimento_simplificado',
225
229
  ).length;
230
+ proformaCount += detectionResults.filter(
231
+ (r) => r.detectedType === 'proforma',
232
+ ).length;
226
233
  errorCount += detectionResults.filter((r) => r.detectionError).length;
227
234
 
228
235
  // Update progress bar
@@ -239,6 +246,7 @@ export class IdentifyCommand {
239
246
  return {
240
247
  processed: processedCount,
241
248
  detected: detectedCount,
249
+ proformas: proformaCount,
242
250
  errors: errorCount,
243
251
  };
244
252
  }
@@ -293,7 +301,7 @@ export class IdentifyCommand {
293
301
  // Detect using existing FileDetectionService
294
302
  const result = await this.detectionService.detectFile(absolutePath);
295
303
 
296
- // If detection succeeded and found a pedimento
304
+ // If detection succeeded and found a pedimento_simplificado (paid)
297
305
  if (result.detectedType === 'pedimento_simplificado') {
298
306
  return {
299
307
  id: file.id,
@@ -303,7 +311,21 @@ export class IdentifyCommand {
303
311
  rfc: result.rfc,
304
312
  arelaPath: result.arelaPath,
305
313
  detectionError: result.error,
306
- isPedimento: true, // Confirmed pedimento
314
+ isPedimento: true, // Confirmed paid pedimento
315
+ };
316
+ }
317
+
318
+ // If detection succeeded and found a proforma (unpaid pedimento)
319
+ if (result.detectedType === 'proforma') {
320
+ return {
321
+ id: file.id,
322
+ detectedType: result.detectedType,
323
+ detectedPedimento: result.detectedPedimento,
324
+ detectedPedimentoYear: result.detectedPedimentoYear,
325
+ rfc: result.rfc,
326
+ arelaPath: null, // Proformas don't get arela_path (not paid yet)
327
+ detectionError: result.error,
328
+ isPedimento: false, // Not a paid pedimento — proforma only
307
329
  };
308
330
  }
309
331
 
@@ -377,18 +399,23 @@ export class IdentifyCommand {
377
399
  }
378
400
 
379
401
  /**
380
- * Determine if a file is definitely not a pedimento
402
+ * Determine if a file is definitely not a pedimento/proforma document
381
403
  * @private
382
404
  * @param {Object} result - Detection result
383
405
  * @param {Object} file - File metadata
384
- * @returns {boolean} True if definitely not a pedimento
406
+ * @returns {boolean} True if definitely not a pedimento/proforma
385
407
  */
386
408
  #isDefinitelyNotPedimento(result, file) {
387
- // If we got any pedimento-related fields, it might be a pedimento
409
+ // If we got any pedimento-related fields, it might be a pedimento or proforma
388
410
  if (result.detectedPedimento || result.rfc || result.arelaPath) {
389
411
  return false;
390
412
  }
391
413
 
414
+ // If it was detected as a proforma, it's related to a pedimento structure
415
+ if (result.detectedType === 'proforma') {
416
+ return false;
417
+ }
418
+
392
419
  // Check if the text contains the required pedimento marker
393
420
  // This must match the criteria in pedimento-simplificado.js match function
394
421
  const text = result.text || '';
@@ -283,6 +283,11 @@ export class PushCommand {
283
283
  // After each batch upload, those files are no longer "pending", so the next query
284
284
  // at offset=0 will naturally return the next batch of unprocessed files
285
285
 
286
+ // Track seen file IDs to detect infinite loops (scan table update failures)
287
+ const seenFileIds = new Set();
288
+ let consecutiveRepeats = 0;
289
+ const MAX_CONSECUTIVE_REPEATS = 3;
290
+
286
291
  // Start progress bar with known total
287
292
  progressBar.start(totalToProcess, 0, {
288
293
  speed: 0,
@@ -303,6 +308,23 @@ export class PushCommand {
303
308
  break;
304
309
  }
305
310
 
311
+ // Infinite loop protection: if the same files keep coming back,
312
+ // the scan table update is failing and they stay "pending" forever.
313
+ const allSeen = files.every((f) => seenFileIds.has(f.id));
314
+ if (allSeen) {
315
+ consecutiveRepeats++;
316
+ if (consecutiveRepeats >= MAX_CONSECUTIVE_REPEATS) {
317
+ const msg = `Aborting: same ${files.length} files returned ${MAX_CONSECUTIVE_REPEATS} times — scan table updates are likely failing.`;
318
+ logger.error(msg);
319
+ console.error(`\n⚠ ${msg}`);
320
+ hasMore = false;
321
+ break;
322
+ }
323
+ } else {
324
+ consecutiveRepeats = 0;
325
+ }
326
+ files.forEach((f) => seenFileIds.add(f.id));
327
+
306
328
  // Upload files in smaller batches using new CLI upload endpoint
307
329
  for (let i = 0; i < files.length; i += uploadBatchSize) {
308
330
  const uploadBatch = files.slice(i, i + uploadBatchSize);
@@ -328,9 +350,9 @@ export class PushCommand {
328
350
  try {
329
351
  await this.scanApiService.batchUpdateUpload(tableName, batchResults);
330
352
  } catch (updateError) {
331
- logger.error(
332
- `Failed to update scan table for batch: ${updateError.message}`,
333
- );
353
+ const msg = `Failed to update scan table for batch: ${updateError.message}`;
354
+ logger.error(msg);
355
+ console.error(`\n⚠ ${msg}`);
334
356
  // Don't fail the entire process, just log the error
335
357
  }
336
358
 
@@ -482,7 +504,13 @@ export class PushCommand {
482
504
  if (apiResult.uploaded && apiResult.uploaded.length > 0) {
483
505
  const uploadedFile = apiResult.uploaded[0];
484
506
  result.uploaded = true;
485
- result.uploadedToStorageId = uploadedFile.storageId;
507
+ // Only assign storageId if it is a valid UUID; ignore placeholder values
508
+ const UUID_RE =
509
+ /^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i;
510
+ result.uploadedToStorageId =
511
+ uploadedFile.storageId && UUID_RE.test(uploadedFile.storageId)
512
+ ? uploadedFile.storageId
513
+ : null;
486
514
  logger.info(`✓ Uploaded: ${file.file_name} → ${uploadPath}`);
487
515
  } else if (apiResult.errors && apiResult.errors.length > 0) {
488
516
  const error = apiResult.errors[0];
@@ -34,10 +34,10 @@ class Config {
34
34
  const __dirname = path.dirname(__filename);
35
35
  const packageJsonPath = path.resolve(__dirname, '../../package.json');
36
36
  const packageJson = JSON.parse(fs.readFileSync(packageJsonPath, 'utf-8'));
37
- return packageJson.version || '1.0.13';
37
+ return packageJson.version || '1.0.15';
38
38
  } catch (error) {
39
39
  console.warn('⚠️ Could not read package.json version, using fallback');
40
- return '1.0.13';
40
+ return '1.0.15';
41
41
  }
42
42
  }
43
43
 
@@ -1,5 +1,6 @@
1
1
  // Import all document type definitions
2
2
  import { pedimentoSimplificadoDefinition } from './document-types/pedimento-simplificado.js';
3
+ import { proformaDefinition } from './document-types/proforma.js';
3
4
  import { supportDocumentDefinition } from './document-types/support-document.js';
4
5
 
5
6
  // Document type definitions and extraction utilities
@@ -32,6 +33,9 @@ export class DocumentTypeDefinition {
32
33
  }
33
34
 
34
35
  // Registry of all document types
36
+ // pedimentoSimplificadoDefinition is the primary matcher for "FORMA SIMPLIFICADA" documents.
37
+ // After field extraction, resolveType() differentiates between pedimento_simplificado (paid) and proforma (unpaid).
38
+ // proformaDefinition is kept as reference but not used directly in the registry since resolution is handled post-extraction.
35
39
  const documentTypes = [
36
40
  pedimentoSimplificadoDefinition,
37
41
  supportDocumentDefinition,
@@ -79,6 +83,13 @@ export function extractDocumentFields(source, fileExtension, filePath) {
79
83
  }
80
84
  }
81
85
 
86
+ // Resolve final type if the definition supports it (e.g., pedimento_simplificado vs proforma)
87
+ const resolvedType = docType.resolveType
88
+ ? docType.resolveType(fields)
89
+ : docType.type;
90
+
91
+ console.log(` → Resolved type: ${resolvedType}`);
92
+
82
93
  // Extract pedimento number and year
83
94
  const pedimento = docType.extractNumPedimento
84
95
  ? docType.extractNumPedimento(source, fields)
@@ -87,7 +98,7 @@ export function extractDocumentFields(source, fileExtension, filePath) {
87
98
  ? docType.extractPedimentoYear(source, fields)
88
99
  : null;
89
100
 
90
- return [docType.type, fields, pedimento, year];
101
+ return [resolvedType, fields, pedimento, year];
91
102
  }
92
103
  }
93
104
 
@@ -4,15 +4,37 @@ export const pedimentoSimplificadoDefinition = {
4
4
  type: 'pedimento_simplificado',
5
5
  extensions: ['pdf'],
6
6
  match: (source) => {
7
- const clues = [/FORMA SIMPLIFICADA DE PEDIMENTO/i];
7
+ return /FORMA SIMPLIFICADA DE PEDIMENTO/i.test(source);
8
+ },
8
9
 
9
- const found = clues.filter((clue) => clue.test(source));
10
+ /**
11
+ * Resolve the final document type after fields have been extracted.
12
+ * A document is a "pedimento_simplificado" only if it has been paid:
13
+ * - For clavePedimento = 'R1': must have fechaPagoRectificacion
14
+ * - Otherwise: must have paymentDate
15
+ * If no payment evidence is found, it's a "proforma".
16
+ *
17
+ * @param {FieldResult[]} fields - Extracted fields
18
+ * @returns {string} - 'pedimento_simplificado' or 'proforma'
19
+ */
20
+ resolveType: (fields) => {
21
+ const clavePedimento =
22
+ fields?.find((f) => f.name === 'clavePedimento')?.value ?? null;
23
+ const paymentDate =
24
+ fields?.find((f) => f.name === 'paymentDate' && f.found)?.value ?? null;
25
+ const fechaPagoRectificacion =
26
+ fields?.find((f) => f.name === 'fechaPagoRectificacion' && f.found)
27
+ ?.value ?? null;
10
28
 
11
- if (found.length > clues.length / 2) {
12
- return true;
29
+ if (clavePedimento === 'R1') {
30
+ // Rectification pedimentos require fechaPagoRectificacion
31
+ return fechaPagoRectificacion ? 'pedimento_simplificado' : 'proforma';
13
32
  }
14
- return false;
33
+
34
+ // Regular pedimentos require paymentDate
35
+ return paymentDate ? 'pedimento_simplificado' : 'proforma';
15
36
  },
37
+
16
38
  extractNumPedimento: (source, fields) => {
17
39
  return fields?.find((f) => f.name === 'numPedimento')?.value ?? null;
18
40
  },
@@ -121,37 +143,37 @@ export const pedimentoSimplificadoDefinition = {
121
143
  },
122
144
 
123
145
  // 7) Num. E-Document: exactamente 13 caracteres tras la etiqueta (puede haber múltiples líneas)
124
- {
125
- field: 'numEDocumento',
126
- extract: (source) => {
127
- // Split into lines and find all lines containing NUM. E-DOCUMENT
128
- const lines = source.split(/\r?\n/);
129
- const edocLines = lines.filter((line) =>
130
- /NUM\.?\s*E-DOCUMENT/i.test(line),
131
- );
146
+ // {
147
+ // field: 'numEDocumento',
148
+ // extract: (source) => {
149
+ // // Split into lines and find all lines containing NUM. E-DOCUMENT
150
+ // const lines = source.split(/\r?\n/);
151
+ // const edocLines = lines.filter((line) =>
152
+ // /NUM\.?\s*E-DOCUMENT/i.test(line),
153
+ // );
132
154
 
133
- if (edocLines.length === 0) {
134
- return new FieldResult('numEDocumento', false, null);
135
- }
155
+ // if (edocLines.length === 0) {
156
+ // return new FieldResult('numEDocumento', false, null);
157
+ // }
136
158
 
137
- // Extract all 13-character alphanumeric codes from all NUM. E-DOCUMENT lines
138
- const extractedCodes = [];
139
- edocLines.forEach((line) => {
140
- const afterEdoc = line.replace(/.*NUM\.?\s*E-DOCUMENT\s*/i, '');
141
- const codes = afterEdoc.match(/[A-Z0-9]{13}/g) || [];
142
- extractedCodes.push(...codes);
143
- });
159
+ // // Extract all 13-character alphanumeric codes from all NUM. E-DOCUMENT lines
160
+ // const extractedCodes = [];
161
+ // edocLines.forEach((line) => {
162
+ // const afterEdoc = line.replace(/.*NUM\.?\s*E-DOCUMENT\s*/i, '');
163
+ // const codes = afterEdoc.match(/[A-Z0-9]{13}/g) || [];
164
+ // extractedCodes.push(...codes);
165
+ // });
144
166
 
145
- if (extractedCodes.length === 0) {
146
- return new FieldResult('numEDocumento', false, null);
147
- }
167
+ // if (extractedCodes.length === 0) {
168
+ // return new FieldResult('numEDocumento', false, null);
169
+ // }
148
170
 
149
- // Remove duplicates using Set
150
- const uniqueCodes = [...new Set(extractedCodes)];
151
- const formattedValue = `[${uniqueCodes.join(',')}]`;
152
- return new FieldResult('numEDocumento', true, formattedValue);
153
- },
154
- },
171
+ // // Remove duplicates using Set
172
+ // const uniqueCodes = [...new Set(extractedCodes)];
173
+ // const formattedValue = `[${uniqueCodes.join(',')}]`;
174
+ // return new FieldResult('numEDocumento', true, formattedValue);
175
+ // },
176
+ // },
155
177
 
156
178
  // 8) Fecha de Pago: Look for various payment date patterns
157
179
  {
@@ -170,49 +192,49 @@ export const pedimentoSimplificadoDefinition = {
170
192
  },
171
193
 
172
194
  // 9) COVE: NUMERO DE ACUSE DE VALOR (puede haber múltiples líneas)
173
- {
174
- field: 'cove',
175
- extract: (source) => {
176
- // Split into lines and find all lines containing NUMERO DE ACUSE DE VALOR or COVE
177
- const lines = source.split(/\r?\n/);
178
- const coveLines = lines.filter(
179
- (line) =>
180
- /COVE/i.test(line) || /NUMERO DE ACUSE DE VALOR/i.test(line),
181
- );
195
+ // {
196
+ // field: 'cove',
197
+ // extract: (source) => {
198
+ // // Split into lines and find all lines containing NUMERO DE ACUSE DE VALOR or COVE
199
+ // const lines = source.split(/\r?\n/);
200
+ // const coveLines = lines.filter(
201
+ // (line) =>
202
+ // /COVE/i.test(line) || /NUMERO DE ACUSE DE VALOR/i.test(line),
203
+ // );
182
204
 
183
- if (coveLines.length === 0) {
184
- return new FieldResult('cove', false, null);
185
- }
205
+ // if (coveLines.length === 0) {
206
+ // return new FieldResult('cove', false, null);
207
+ // }
186
208
 
187
- // Extract all COVE values from all matching lines
188
- const coveValues = [];
189
- coveLines.forEach((line) => {
190
- const coveMatches = line.match(/COVE([A-Z0-9]+)/gi) || [];
191
- // Extract just the alphanumeric parts (remove the "COVE" prefix)
192
- const codes = coveMatches.map((match) => match);
193
- coveValues.push(...codes);
194
- });
195
-
196
- if (coveValues.length === 0) {
197
- return new FieldResult('cove', false, null);
198
- }
209
+ // // Extract all COVE values from all matching lines
210
+ // const coveValues = [];
211
+ // coveLines.forEach((line) => {
212
+ // const coveMatches = line.match(/COVE([A-Z0-9]+)/gi) || [];
213
+ // // Extract just the alphanumeric parts (remove the "COVE" prefix)
214
+ // const codes = coveMatches.map((match) => match);
215
+ // coveValues.push(...codes);
216
+ // });
199
217
 
200
- // Remove duplicates using Set
201
- const uniqueCoveValues = [...new Set(coveValues)];
202
- const formattedValue = `[${uniqueCoveValues.join(',')}]`;
203
- return new FieldResult('cove', true, formattedValue);
204
- },
205
- },
218
+ // if (coveValues.length === 0) {
219
+ // return new FieldResult('cove', false, null);
220
+ // }
221
+
222
+ // // Remove duplicates using Set
223
+ // const uniqueCoveValues = [...new Set(coveValues)];
224
+ // const formattedValue = `[${uniqueCoveValues.join(',')}]`;
225
+ // return new FieldResult('cove', true, formattedValue);
226
+ // },
227
+ // },
206
228
 
207
229
  // 10) Peso Bruto: Extract weight value
208
- {
209
- field: 'pesoBruto',
210
- extract: (source) => {
211
- // Look for the peso bruto value with decimal format
212
- const match = source.match(/(\d+\.\d+)\d{3}/);
213
- return new FieldResult('pesoBruto', !!match, match ? match[1] : null);
214
- },
215
- },
230
+ // {
231
+ // field: 'pesoBruto',
232
+ // extract: (source) => {
233
+ // // Look for the peso bruto value with decimal format
234
+ // const match = source.match(/(\d+\.\d+)\d{3}/);
235
+ // return new FieldResult('pesoBruto', !!match, match ? match[1] : null);
236
+ // },
237
+ // },
216
238
 
217
239
  // 11) Patente: Extract patent number
218
240
  {
@@ -241,30 +263,60 @@ export const pedimentoSimplificadoDefinition = {
241
263
  },
242
264
 
243
265
  // 12) Numero de Operacion Bancaria
244
- {
245
- field: 'numeroOperacionBancaria',
246
- extract: (source) => {
247
- const match = source.match(
248
- /NUMERO DE OPERACION BANCARIA:\s*([A-Z0-9]+)/i,
249
- );
250
- return new FieldResult(
251
- 'numeroOperacionBancaria',
252
- !!match,
253
- match ? match[1] : null,
254
- );
255
- },
256
- },
266
+ // {
267
+ // field: 'numeroOperacionBancaria',
268
+ // extract: (source) => {
269
+ // const match = source.match(
270
+ // /NUMERO DE OPERACION BANCARIA:\s*([A-Z0-9]+)/i,
271
+ // );
272
+ // return new FieldResult(
273
+ // 'numeroOperacionBancaria',
274
+ // !!match,
275
+ // match ? match[1] : null,
276
+ // );
277
+ // },
278
+ // },
257
279
 
258
280
  // 13) Numero de Transaccion SAT
281
+ // {
282
+ // field: 'numeroTransaccionSAT',
283
+ // extract: (source) => {
284
+ // const match = source.match(/NUMERO DE TRANSACCION SAT:\s*([A-Z0-9]+)/i);
285
+ // return new FieldResult(
286
+ // 'numeroTransaccionSAT',
287
+ // !!match,
288
+ // match ? match[1] : null,
289
+ // );
290
+ // },
291
+ // },
292
+
293
+ // 14) Fecha de Pago Rectificación
259
294
  {
260
- field: 'numeroTransaccionSAT',
295
+ field: 'fechaPagoRectificacion',
261
296
  extract: (source) => {
262
- const match = source.match(/NUMERO DE TRANSACCION SAT:\s*([A-Z0-9]+)/i);
263
- return new FieldResult(
264
- 'numeroTransaccionSAT',
265
- !!match,
266
- match ? match[1] : null,
297
+ // Look for the RECTIFICACION section header
298
+ const rectSectionMatch = source.match(
299
+ /RECTIFICACION[\s\S]{0,500}?(\d{2}\/\d{2}\/\d{4})/i,
267
300
  );
301
+
302
+ if (rectSectionMatch) {
303
+ return new FieldResult(
304
+ 'fechaPagoRectificacion',
305
+ true,
306
+ rectSectionMatch[1],
307
+ );
308
+ }
309
+
310
+ // Fallback: look for any date after FECHA PAGO RECT
311
+ const fechaMatch = source.match(
312
+ /FECHA PAGO RECT[\s\S]{0,500}?(\d{2}\/\d{2}\/\d{4})/i,
313
+ );
314
+
315
+ if (fechaMatch) {
316
+ return new FieldResult('fechaPagoRectificacion', true, fechaMatch[1]);
317
+ }
318
+
319
+ return new FieldResult('fechaPagoRectificacion', false, null);
268
320
  },
269
321
  },
270
322
  ],
@@ -0,0 +1,29 @@
1
+ import { FieldResult } from '../document-type-shared.js';
2
+ import { pedimentoSimplificadoDefinition } from './pedimento-simplificado.js';
3
+
4
+ /**
5
+ * Proforma Document Type Definition
6
+ *
7
+ * A proforma is essentially the same document as a pedimento simplificado,
8
+ * but it has NOT been paid yet. It shares the same extractors and structure.
9
+ *
10
+ * Classification logic:
11
+ * - If clavePedimento = 'R1': needs fechaPagoRectificacion to be pedimento_simplificado
12
+ * - Otherwise: needs paymentDate to be pedimento_simplificado
13
+ * - If neither payment field is found → it's a proforma
14
+ */
15
+ export const proformaDefinition = {
16
+ type: 'proforma',
17
+ extensions: ['pdf'],
18
+
19
+ // Same content marker as pedimento simplificado
20
+ match: (source) => {
21
+ return /FORMA SIMPLIFICADA DE PEDIMENTO/i.test(source);
22
+ },
23
+
24
+ extractNumPedimento: pedimentoSimplificadoDefinition.extractNumPedimento,
25
+ extractPedimentoYear: pedimentoSimplificadoDefinition.extractPedimentoYear,
26
+
27
+ // Reuse all extractors from pedimento simplificado
28
+ extractors: pedimentoSimplificadoDefinition.extractors,
29
+ };