@arela/uploader 1.0.20 → 1.0.22

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,99 @@
1
+ import {
2
+ pedimentoYearFromFields,
3
+ sharedPedimentoExtractors,
4
+ } from './_pedimento-shared-extractors.js';
5
+
6
+ /**
7
+ * "Pedimento Completo" matcher — the 7-page DEF / SEGUNDA / TERCERA copy
8
+ * (and the "CoveFact" variant). This is distinct from `pedimento_simplificado`
9
+ * which uses the "FORMA SIMPLIFICADA DE PEDIMENTO" header.
10
+ *
11
+ * Match strategy: require the structural fields that the long-form pedimento
12
+ * always carries (`NUM. PEDIMENTO:`, `CVE.PEDIMENTO:`, `T.OPER:`) plus at
13
+ * least one of the printed copy markers, while explicitly excluding any
14
+ * document that already declares itself as a "FORMA SIMPLIFICADA".
15
+ */
16
+ export const pedimentoCompletoDefinition = {
17
+ type: 'pedimento_completo',
18
+ extensions: ['pdf'],
19
+ match: (source) => {
20
+ // Hard exclude: "FORMA SIMPLIFICADA" is handled by pedimento_simplificado.
21
+ if (/FORMA SIMPLIFICADA DE PEDIMENTO/i.test(source)) return false;
22
+
23
+ // Hard exclude: "AVISO CONSOLIDADO" shares the header trio but is a
24
+ // different document type handled by aviso_consolidado.
25
+ if (/AVISO\s+CONSOLIDADO/i.test(source)) return false;
26
+
27
+ const hasHeaderFields =
28
+ /NUM\.?\s*PEDIMENTO:/i.test(source) &&
29
+ /CVE\.?\s*PEDIMENTO:/i.test(source) &&
30
+ /T\.?\s*OPER:/i.test(source);
31
+ if (hasHeaderFields) {
32
+ const hasCopyMarker =
33
+ /ORIGINAL:\s*ADMINISTRACION GENERAL DE ADUANAS/i.test(source) ||
34
+ /SEGUNDA\s+COPIA/i.test(source) ||
35
+ /TERCERA\s+COPIA/i.test(source) ||
36
+ /COPIA\s+(SIMPLIFICAD[AO])?\s*TRANSPORTISTA/i.test(source) ||
37
+ /DEFINITIVO/i.test(source) ||
38
+ /ANEXO\s+DEL\s+PEDIMENTO/i.test(source) ||
39
+ /\*+FIN\s+DE\s+PEDIMENTO\s*\*+/i.test(source);
40
+ if (hasCopyMarker) return true;
41
+ }
42
+
43
+ // Fallback clue-counting heuristic for exotic layouts.
44
+ const clues = [
45
+ /\bPEDIMENTO\s*\n.*NUM\.\s*PEDIMENTO:/i,
46
+ /NUM\.\s*PEDIMENTO:\s*T\.OPER:\s*CVE\.PEDIMENTO:\s*REGIMEN:/i,
47
+ /\d{2}\s+\d{2}\s+\d{4}\s+\d{7}\s+[A-Z]{3}\s+[A-Z]{3}/i,
48
+ /CERTIFICACIONES/i,
49
+ /DESTINO\/ORIGEN\s+TIPO\s+CAMBIO\s+PESO\s+BRUTO\s+ADUANA\s+E\/S/i,
50
+ /MEDIOS\s+DE\s+TRANSPORTE/i,
51
+ /DATOS\s+DEL\s+IMPORTADOR\/EXPORTADOR/i,
52
+ /RFC:\s+NOMBRE,\s+DENOMINACION\s+O\s+RAZON\s+SOCIAL:/i,
53
+ /CUADRO\s+DE\s+LIQUIDACION/i,
54
+ /\*\*\*\s+PAGO\s+ELECTRONICO\s+\*\*\*/i,
55
+ /PATENTE:\s+PEDIMENTO:\s+ADUANA:/i,
56
+ /LINEA\s+DE\s+CAPTURA:/i,
57
+ /DATOS\s+DEL\s+PROVEEDOR\s+O\s+COMPRADOR/i,
58
+ /CLAVE\/COMPL\.\s+IDENTIFICADOR/i,
59
+ /ANEXO\s+DEL\s+PEDIMENTO/i,
60
+ /\*+FIN\s+DE\s+PEDIMENTO\s+\*+NUM\.\s+TOTAL\s+DE\s+PARTIDAS:/i,
61
+ /DECLARO\s+BAJO\s+PROTESTA\s+DE\s+DECIR\s+VERDAD/i,
62
+ /PEDIMENTO\s+ELABORADO\s+DE\s+CONFORMIDAD/i,
63
+ ];
64
+
65
+ const found = clues.filter((clue) => clue.test(source));
66
+ return found.length > clues.length * 0.25;
67
+ },
68
+
69
+ /**
70
+ * Resolve the final document type after fields have been extracted.
71
+ * Mirrors the simplificado logic:
72
+ * - R1 rectifications require fechaPagoRectificacion
73
+ * - Everything else requires paymentDate
74
+ * No payment evidence ⇒ proforma_completo.
75
+ */
76
+ resolveType: (fields) => {
77
+ const clavePedimento =
78
+ fields?.find((f) => f.name === 'clavePedimento')?.value ?? null;
79
+ const paymentDate =
80
+ fields?.find((f) => f.name === 'paymentDate' && f.found)?.value ?? null;
81
+ const fechaPagoRectificacion =
82
+ fields?.find((f) => f.name === 'fechaPagoRectificacion' && f.found)
83
+ ?.value ?? null;
84
+
85
+ if (clavePedimento === 'R1') {
86
+ return fechaPagoRectificacion
87
+ ? 'pedimento_completo'
88
+ : 'proforma_completo';
89
+ }
90
+ return paymentDate ? 'pedimento_completo' : 'proforma_completo';
91
+ },
92
+
93
+ extractNumPedimento: (source, fields) => {
94
+ return fields?.find((f) => f.name === 'numPedimento')?.value ?? null;
95
+ },
96
+ extractPedimentoYear: (source, fields) => pedimentoYearFromFields(fields),
97
+
98
+ extractors: sharedPedimentoExtractors,
99
+ };
@@ -1,10 +1,42 @@
1
- import { FieldResult } from '../document-type-shared.js';
1
+ import {
2
+ pedimentoYearFromFields,
3
+ sharedPedimentoExtractors,
4
+ } from './_pedimento-shared-extractors.js';
2
5
 
3
6
  export const pedimentoSimplificadoDefinition = {
4
7
  type: 'pedimento_simplificado',
5
8
  extensions: ['pdf'],
6
9
  match: (source) => {
7
- return /FORMA SIMPLIFICADA DE PEDIMENTO/i.test(source);
10
+ // Hard exclude: "AVISO CONSOLIDADO" shares the header trio but is a
11
+ // different document type handled by aviso_consolidado.
12
+ if (/AVISO\s+CONSOLIDADO/i.test(source)) return false;
13
+
14
+ // Fast path: the literal title appears on standard SIMP layouts.
15
+ if (/FORMA SIMPLIFICADA DE PEDIMENTO/i.test(source)) return true;
16
+
17
+ // Some PDFs (single-page anchors) lack that title but still carry the
18
+ // three pedimento header fields. Treat them as simplificado UNLESS they
19
+ // have the multi-page copy markers that uniquely identify a completo.
20
+ const hasHeaderFields =
21
+ /NUM\.?\s*PEDIMENTO:/i.test(source) &&
22
+ /CVE\.?\s*PEDIMENTO:/i.test(source) &&
23
+ /T\.?\s*OPER:/i.test(source);
24
+ if (!hasHeaderFields) return false;
25
+
26
+ const hasCompletoCopyMarker =
27
+ /ORIGINAL:\s*ADMINISTRACION GENERAL DE ADUANAS/i.test(source) ||
28
+ /SEGUNDA\s+COPIA/i.test(source) ||
29
+ /TERCERA\s+COPIA/i.test(source) ||
30
+ /COPIA\s+(SIMPLIFICAD[AO])?\s*TRANSPORTISTA/i.test(source) ||
31
+ /ANEXO\s+DEL\s+PEDIMENTO/i.test(source) ||
32
+ /\*+FIN\s+DE\s+PEDIMENTO\s*\*+/i.test(source);
33
+ if (hasCompletoCopyMarker) return false;
34
+
35
+ // Exclude COVE/eDocument forms that may reference a pedimento in their body.
36
+ if (/COMPROBANTE\s+DE\s+VALOR\s+ELECTR[ÓO]NICO/i.test(source)) return false;
37
+ if (/\bCOVE\b\s*:/i.test(source) && !/PAGO/i.test(source)) return false;
38
+
39
+ return true;
8
40
  },
9
41
 
10
42
  /**
@@ -14,7 +46,7 @@ export const pedimentoSimplificadoDefinition = {
14
46
  * - Otherwise: must have paymentDate
15
47
  * If no payment evidence is found, it's a "proforma".
16
48
  *
17
- * @param {FieldResult[]} fields - Extracted fields
49
+ * @param {import('../document-type-shared.js').FieldResult[]} fields
18
50
  * @returns {string} - 'pedimento_simplificado' or 'proforma'
19
51
  */
20
52
  resolveType: (fields) => {
@@ -27,297 +59,15 @@ export const pedimentoSimplificadoDefinition = {
27
59
  ?.value ?? null;
28
60
 
29
61
  if (clavePedimento === 'R1') {
30
- // Rectification pedimentos require fechaPagoRectificacion
31
62
  return fechaPagoRectificacion ? 'pedimento_simplificado' : 'proforma';
32
63
  }
33
-
34
- // Regular pedimentos require paymentDate
35
64
  return paymentDate ? 'pedimento_simplificado' : 'proforma';
36
65
  },
37
66
 
38
67
  extractNumPedimento: (source, fields) => {
39
68
  return fields?.find((f) => f.name === 'numPedimento')?.value ?? null;
40
69
  },
41
- extractPedimentoYear: (source, fields) => {
42
- const numPedimento = fields?.find((f) => f.name === 'numPedimento')?.value;
43
- if (!numPedimento) {
44
- return null;
45
- }
46
- const year = parseInt(numPedimento.substring(0, 2), 10);
47
- return year < 50 ? year + 2000 : year + 1900;
48
- },
49
- extractors: [
50
- // 1) Número de Pedimento (15 dígitos)
51
- {
52
- field: 'numPedimento',
53
- extract: (source) => {
54
- const match = source.match(/\d{2}\s?\d{2}\s?\d{4}\s?\d{7}/);
55
- return new FieldResult(
56
- 'numPedimento',
57
- !!match,
58
- match ? match[0].replace(/\s/g, '') : null,
59
- );
60
- },
61
- },
62
-
63
- // 2) Tipo de Operación: los 3 caracteres justo después del número
64
- {
65
- field: 'tipoOperacion',
66
- extract: (source) => {
67
- // Look for the pedimento number pattern followed by operation type
68
- // Pattern matches: "22 07 3429 2002089 EXP RT"
69
- const match = source.match(
70
- /\d{2}\s+\d{2}\s+\d{4}\s+\d{7}\s+([A-Z]{3})/,
71
- );
72
- return new FieldResult(
73
- 'tipoOperacion',
74
- !!match,
75
- match ? match[1] : null,
76
- );
77
- },
78
- },
79
-
80
- // 3) Clave de Pedimento: los 2 caracteres justo después de la operación
81
- {
82
- field: 'clavePedimento',
83
- extract: (source) => {
84
- // Look for the pedimento number pattern followed by operation type and then the key
85
- // Pattern matches: "22 07 3429 2002089 EXP RT" to capture "RT"
86
- const match = source.match(
87
- /\d{2}\s+\d{2}\s+\d{4}\s+\d{7}\s+[A-Z]{3}\s+([A-Z0-9]{2})/,
88
- );
89
- return new FieldResult(
90
- 'clavePedimento',
91
- !!match,
92
- match ? match[1] : null,
93
- );
94
- },
95
- },
96
-
97
- // 4) Aduana E/S: Extract the 3-digit aduana code that appears at the end of the peso bruto line
98
- {
99
- field: 'aduanaEntradaSalida',
100
- extract: (source) => {
101
- // Look for the peso bruto line format: number followed by decimal amount followed by 3-digit aduana code
102
- // Pattern matches formats like: "7 1.100 071" or "7 19,834.260 071" to capture "071"
103
- // Uses multiline flag to match line boundaries precisely
104
- const match = source.match(/^\s*\d+\s+[\d,.]+\s+(\d{3})\s*$/m);
105
- return new FieldResult(
106
- 'aduanaEntradaSalida',
107
- !!match,
108
- match ? match[1] : null,
109
- );
110
- },
111
- },
112
-
113
- // 5) RFC: línea con 12-13 caracteres alfanuméricos
114
- {
115
- field: 'rfc',
116
- extract: (source) => {
117
- const match = source.match(/\n\s*([A-Z0-9]{12,13})\s*\n/);
118
- return new FieldResult('rfc', !!match, match ? match[1] : null);
119
- },
120
- },
121
-
122
- // 6) Código de Aceptación: línea con 8 caracteres alfanuméricos justo después del RFC
123
- {
124
- field: 'codigoAceptacion',
125
- extract: (source) => {
126
- // 1) split into trimmed, non-empty lines
127
- const lines = source
128
- .split(/\r?\n/)
129
- .map((l) => l.trim())
130
- .filter((l) => l.length > 0);
131
-
132
- // 2) find the index of an RFC line (12–13 alnum chars)
133
- const rfcIndex = lines.findIndex((l) => /^[A-Z0-9]{12,13}$/.test(l));
134
- let code = null;
135
-
136
- // 3) if next line exists and is exactly 8 alnum chars, that's the code
137
- if (rfcIndex >= 0 && /^[A-Z0-9]{8}$/.test(lines[rfcIndex + 1] || '')) {
138
- code = lines[rfcIndex + 1];
139
- }
140
-
141
- return new FieldResult('codigoAceptacion', code !== null, code);
142
- },
143
- },
144
-
145
- // 7) Num. E-Document: exactamente 13 caracteres tras la etiqueta (puede haber múltiples líneas)
146
- // {
147
- // field: 'numEDocumento',
148
- // extract: (source) => {
149
- // // Split into lines and find all lines containing NUM. E-DOCUMENT
150
- // const lines = source.split(/\r?\n/);
151
- // const edocLines = lines.filter((line) =>
152
- // /NUM\.?\s*E-DOCUMENT/i.test(line),
153
- // );
154
-
155
- // if (edocLines.length === 0) {
156
- // return new FieldResult('numEDocumento', false, null);
157
- // }
158
-
159
- // // Extract all 13-character alphanumeric codes from all NUM. E-DOCUMENT lines
160
- // const extractedCodes = [];
161
- // edocLines.forEach((line) => {
162
- // const afterEdoc = line.replace(/.*NUM\.?\s*E-DOCUMENT\s*/i, '');
163
- // const codes = afterEdoc.match(/[A-Z0-9]{13}/g) || [];
164
- // extractedCodes.push(...codes);
165
- // });
166
-
167
- // if (extractedCodes.length === 0) {
168
- // return new FieldResult('numEDocumento', false, null);
169
- // }
170
-
171
- // // Remove duplicates using Set
172
- // const uniqueCodes = [...new Set(extractedCodes)];
173
- // const formattedValue = `[${uniqueCodes.join(',')}]`;
174
- // return new FieldResult('numEDocumento', true, formattedValue);
175
- // },
176
- // },
177
-
178
- // 8) Fecha de Pago: Look for various payment date patterns
179
- {
180
- field: 'paymentDate',
181
- extract: (source) => {
182
- // Try multiple patterns for payment dates
183
- let match = source.match(/2\s+PAGO:\s*(\d{2}\/\d{2}\/\d{4})/);
184
- if (!match) {
185
- match = source.match(/FECHA DE PAGO:\s*(\d{4}\/\d{2}\/\d{2})/);
186
- }
187
- if (!match) {
188
- match = source.match(/PRESENTACION:\s*(\d{2}\/\d{2}\/\d{4})/);
189
- }
190
- return new FieldResult('paymentDate', !!match, match ? match[1] : null);
191
- },
192
- },
193
-
194
- // 9) COVE: NUMERO DE ACUSE DE VALOR (puede haber múltiples líneas)
195
- // {
196
- // field: 'cove',
197
- // extract: (source) => {
198
- // // Split into lines and find all lines containing NUMERO DE ACUSE DE VALOR or COVE
199
- // const lines = source.split(/\r?\n/);
200
- // const coveLines = lines.filter(
201
- // (line) =>
202
- // /COVE/i.test(line) || /NUMERO DE ACUSE DE VALOR/i.test(line),
203
- // );
204
-
205
- // if (coveLines.length === 0) {
206
- // return new FieldResult('cove', false, null);
207
- // }
208
-
209
- // // Extract all COVE values from all matching lines
210
- // const coveValues = [];
211
- // coveLines.forEach((line) => {
212
- // const coveMatches = line.match(/COVE([A-Z0-9]+)/gi) || [];
213
- // // Extract just the alphanumeric parts (remove the "COVE" prefix)
214
- // const codes = coveMatches.map((match) => match);
215
- // coveValues.push(...codes);
216
- // });
217
-
218
- // if (coveValues.length === 0) {
219
- // return new FieldResult('cove', false, null);
220
- // }
221
-
222
- // // Remove duplicates using Set
223
- // const uniqueCoveValues = [...new Set(coveValues)];
224
- // const formattedValue = `[${uniqueCoveValues.join(',')}]`;
225
- // return new FieldResult('cove', true, formattedValue);
226
- // },
227
- // },
228
-
229
- // 10) Peso Bruto: Extract weight value
230
- // {
231
- // field: 'pesoBruto',
232
- // extract: (source) => {
233
- // // Look for the peso bruto value with decimal format
234
- // const match = source.match(/(\d+\.\d+)\d{3}/);
235
- // return new FieldResult('pesoBruto', !!match, match ? match[1] : null);
236
- // },
237
- // },
238
-
239
- // 11) Patente: Extract patent number
240
- {
241
- field: 'patente',
242
- extract: (source) => {
243
- // Look for the PATENTE: PEDIMENTO: ADUANA: header line
244
- // Then find the corresponding data line with three numbers
245
- const lines = source.split(/\r?\n/);
246
- const patenteHeaderIndex = lines.findIndex((line) =>
247
- /PATENTE:.*PEDIMENTO:.*ADUANA:/i.test(line),
248
- );
249
-
250
- if (patenteHeaderIndex >= 0) {
251
- // Look for the data line after the header (format: "3429 2002089 07")
252
- for (let i = patenteHeaderIndex + 1; i < lines.length; i++) {
253
- const line = lines[i].trim();
254
- if (/^\d+\s+\d+\s+\d+$/.test(line)) {
255
- const parts = line.split(/\s+/);
256
- return new FieldResult('patente', true, parts[0]); // First number is the PATENTE
257
- }
258
- }
259
- }
260
-
261
- return new FieldResult('patente', false, null);
262
- },
263
- },
264
-
265
- // 12) Numero de Operacion Bancaria
266
- // {
267
- // field: 'numeroOperacionBancaria',
268
- // extract: (source) => {
269
- // const match = source.match(
270
- // /NUMERO DE OPERACION BANCARIA:\s*([A-Z0-9]+)/i,
271
- // );
272
- // return new FieldResult(
273
- // 'numeroOperacionBancaria',
274
- // !!match,
275
- // match ? match[1] : null,
276
- // );
277
- // },
278
- // },
279
-
280
- // 13) Numero de Transaccion SAT
281
- // {
282
- // field: 'numeroTransaccionSAT',
283
- // extract: (source) => {
284
- // const match = source.match(/NUMERO DE TRANSACCION SAT:\s*([A-Z0-9]+)/i);
285
- // return new FieldResult(
286
- // 'numeroTransaccionSAT',
287
- // !!match,
288
- // match ? match[1] : null,
289
- // );
290
- // },
291
- // },
292
-
293
- // 14) Fecha de Pago Rectificación
294
- {
295
- field: 'fechaPagoRectificacion',
296
- extract: (source) => {
297
- // Look for the RECTIFICACION section header
298
- const rectSectionMatch = source.match(
299
- /RECTIFICACION[\s\S]{0,500}?(\d{2}\/\d{2}\/\d{4})/i,
300
- );
301
-
302
- if (rectSectionMatch) {
303
- return new FieldResult(
304
- 'fechaPagoRectificacion',
305
- true,
306
- rectSectionMatch[1],
307
- );
308
- }
309
-
310
- // Fallback: look for any date after FECHA PAGO RECT
311
- const fechaMatch = source.match(
312
- /FECHA PAGO RECT[\s\S]{0,500}?(\d{2}\/\d{2}\/\d{4})/i,
313
- );
314
-
315
- if (fechaMatch) {
316
- return new FieldResult('fechaPagoRectificacion', true, fechaMatch[1]);
317
- }
70
+ extractPedimentoYear: (source, fields) => pedimentoYearFromFields(fields),
318
71
 
319
- return new FieldResult('fechaPagoRectificacion', false, null);
320
- },
321
- },
322
- ],
72
+ extractors: sharedPedimentoExtractors,
323
73
  };
@@ -4,6 +4,35 @@ import { PDFParse } from 'pdf-parse';
4
4
 
5
5
  import { extractDocumentFields } from './document-type-shared.js';
6
6
 
7
+ // Document types that participate in arela_path composition. The XML type is
8
+ // kept here even though its matcher is currently disabled — once re-enabled
9
+ // in document-type-shared.js no further changes are needed here.
10
+ const ARELA_PATH_TYPES = new Set([
11
+ 'pedimento_simplificado',
12
+ 'pedimento_completo',
13
+ 'pedimento_completo_xml',
14
+ ]);
15
+
16
+ /**
17
+ * For `pedimento_completo_xml` the patente is not present in the XML body —
18
+ * it must be parsed from the filename. Three known patterns are tried.
19
+ */
20
+ function patenteFromXmlFilename(filePath) {
21
+ if (!filePath) return null;
22
+ const fileName = path.basename(filePath);
23
+
24
+ let m = fileName.match(/^VU_(\d{4})_\d{3}_\d{7}\.xml$/i);
25
+ if (m) return m[1];
26
+
27
+ m = fileName.match(/^\d{3}-(\d{4})-\d{7}\.xml$/i);
28
+ if (m) return m[1];
29
+
30
+ m = fileName.match(/^\d{4}(\d{4})\d{7}(?:_\d{15})?\.xml$/i);
31
+ if (m) return m[1];
32
+
33
+ return null;
34
+ }
35
+
7
36
  /**
8
37
  * Compose arela_path from extracted pedimento fields
9
38
  * Format: RFC/Year/Patente/Aduana/Pedimento/
@@ -15,16 +44,21 @@ function composeArelaPath(
15
44
  detectedPedimentoYear,
16
45
  filePath,
17
46
  ) {
18
- if (detectedType !== 'pedimento_simplificado') {
47
+ if (!ARELA_PATH_TYPES.has(detectedType)) {
19
48
  return null;
20
49
  }
21
50
 
22
51
  const rfc = fields?.find((f) => f.name === 'rfc')?.value;
23
- const patente = fields?.find((f) => f.name === 'patente')?.value;
52
+ let patente = fields?.find((f) => f.name === 'patente')?.value;
24
53
  const aduana = fields?.find((f) => f.name === 'aduanaEntradaSalida')?.value;
25
54
  const pedimento = fields?.find((f) => f.name === 'numPedimento')?.value;
26
55
  const year = detectedPedimentoYear;
27
56
 
57
+ // XML matcher does not extract patente from the body — derive from filename.
58
+ if (!patente && detectedType === 'pedimento_completo_xml') {
59
+ patente = patenteFromXmlFilename(filePath);
60
+ }
61
+
28
62
  // All components are required for a valid arela_path
29
63
  if (!rfc || !year || !patente || !aduana || !pedimento) {
30
64
  console.log('⚠️ Missing required fields for arela_path composition:', {
package/src/index.js CHANGED
@@ -1,6 +1,8 @@
1
1
  #!/usr/bin/env node
2
2
  import { Command } from 'commander';
3
3
 
4
+ import datastageCommand from './commands/DatastageCommand.js';
5
+ import gdriveSyncCommand from './commands/GDriveSyncCommand.js';
4
6
  import identifyCommand from './commands/IdentifyCommand.js';
5
7
  import pollWorkerCommand from './commands/PollWorkerCommand.js';
6
8
  import PropagateCommand from './commands/PropagateCommand.js';
@@ -25,6 +27,7 @@ class ArelaUploaderCLI {
25
27
  this.scanCommand = scanCommand;
26
28
  this.uploadCommand = new UploadCommand();
27
29
  this.watchCommand = watchCommand;
30
+ this.datastageCommand = datastageCommand;
28
31
 
29
32
  this.#setupProgram();
30
33
  this.#setupCommands();
@@ -203,6 +206,34 @@ class ArelaUploaderCLI {
203
206
  }
204
207
  });
205
208
 
209
+ // Datastage command — upload monthly datastage *.zip files from a directory
210
+ this.program
211
+ .command('datastage')
212
+ .description(
213
+ '📦 Upload monthly datastage *.zip files from a directory to Arela',
214
+ )
215
+ .requiredOption(
216
+ '--dir <path>',
217
+ 'Directory containing *.zip files (non-recursive)',
218
+ )
219
+ .option(
220
+ '--api <target>',
221
+ 'API target: default|agencia|cliente',
222
+ 'default',
223
+ )
224
+ .option('--retry-failed', 'Re-attempt files in failed status')
225
+ .option('--show-stats', 'Print final stats summary')
226
+ .action(async (options) => {
227
+ try {
228
+ if (options.api && options.api !== 'default') {
229
+ appConfig.setApiTarget(options.api);
230
+ }
231
+ await this.datastageCommand.execute(options);
232
+ } catch (error) {
233
+ this.errorHandler.handleFatalError(error, { command: 'datastage' });
234
+ }
235
+ });
236
+
206
237
  // Detection command
207
238
  this.program
208
239
  .command('detect')
@@ -334,6 +365,18 @@ class ArelaUploaderCLI {
334
365
  'Number of files to process in each batch',
335
366
  '100',
336
367
  )
368
+ .option(
369
+ '--table <tableName>',
370
+ 'Process only this scan table (instead of all instance tables)',
371
+ )
372
+ .option(
373
+ '--reset-attempts',
374
+ 'Reset detection_attempts to 0 before processing so previously-failed files are retried',
375
+ )
376
+ .option(
377
+ '--path-prefix <mapping>',
378
+ 'Remap file path prefix for cross-platform access. Format: FROM:TO e.g. "O:/=/Volumes/nas/"',
379
+ )
337
380
  .option('--show-stats', 'Show performance statistics')
338
381
  .action(async (options) => {
339
382
  try {
@@ -458,6 +501,32 @@ class ArelaUploaderCLI {
458
501
  // END OF NEW SIMPLIFIED COMMANDS
459
502
  // ============================================================================
460
503
 
504
+ // GDrive sync command - mirror a Google Drive folder to local before scan
505
+ this.program
506
+ .command('gdrive-sync')
507
+ .description(
508
+ '☁️ Mirror a Google Drive folder to local filesystem (pre-scan source)',
509
+ )
510
+ .option(
511
+ '--root-folder <id>',
512
+ 'Drive folder ID to sync (overrides GDRIVE_ROOT_FOLDER_ID)',
513
+ )
514
+ .option(
515
+ '--dest <path>',
516
+ 'Local mirror destination (overrides GDRIVE_LOCAL_MIRROR_PATH)',
517
+ )
518
+ .option('--full', 'Ignore state file and re-verify all files')
519
+ .option('--dry-run', 'List/plan only, no downloads or writes')
520
+ .action(async (options) => {
521
+ try {
522
+ await gdriveSyncCommand.execute(options);
523
+ } catch (error) {
524
+ this.errorHandler.handleFatalError(error, {
525
+ command: 'gdrive-sync',
526
+ });
527
+ }
528
+ });
529
+
461
530
  // Watch command
462
531
  this.program
463
532
  .command('watch')
@@ -144,7 +144,9 @@ export class DatabaseService {
144
144
  rfc: null,
145
145
  message: null,
146
146
  file_extension: fileExtension,
147
- is_like_simplificado: filename.toLowerCase().includes('simp'),
147
+ // Flag any PDF whose filename hints at a pedimento (simplificado,
148
+ // completo, or CoveFact). Column name preserved; semantics broadened.
149
+ is_like_simplificado: /(simp|pedim|covefact)/i.test(filename),
148
150
  year: null,
149
151
  created_at: new Date().toISOString(),
150
152
  updated_at: new Date().toISOString(),