@arela/uploader 1.0.20 → 1.0.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -24,6 +24,7 @@ class Config {
24
24
  this.watch = this.#loadWatchConfig();
25
25
  this.redis = this.#loadRedisConfig();
26
26
  this.worker = this.#loadWorkerConfig();
27
+ this.gdrive = this.#loadGDriveConfig();
27
28
  }
28
29
 
29
30
  /**
@@ -36,10 +37,10 @@ class Config {
36
37
  const __dirname = path.dirname(__filename);
37
38
  const packageJsonPath = path.resolve(__dirname, '../../package.json');
38
39
  const packageJson = JSON.parse(fs.readFileSync(packageJsonPath, 'utf-8'));
39
- return packageJson.version || '1.0.20';
40
+ return packageJson.version || '1.0.21';
40
41
  } catch (error) {
41
42
  console.warn('⚠️ Could not read package.json version, using fallback');
42
- return '1.0.20';
43
+ return '1.0.21';
43
44
  }
44
45
  }
45
46
 
@@ -579,6 +580,91 @@ class Config {
579
580
  return process.env.ARELA_SERVER_ID || null;
580
581
  }
581
582
 
583
+ /**
584
+ * Load Google Drive sync configuration
585
+ * @private
586
+ */
587
+ #loadGDriveConfig() {
588
+ const rootFolderId = process.env.GDRIVE_ROOT_FOLDER_ID || null;
589
+
590
+ // Default mirror destination: <UPLOAD_BASE_PATH>/_gdrive_mirror
591
+ let localMirrorPath = process.env.GDRIVE_LOCAL_MIRROR_PATH || null;
592
+ if (!localMirrorPath && process.env.UPLOAD_BASE_PATH) {
593
+ const base = process.env.UPLOAD_BASE_PATH;
594
+ if (base !== '*') {
595
+ localMirrorPath = path.resolve(
596
+ PathNormalizer.toAbsolutePath(base),
597
+ '_gdrive_mirror',
598
+ );
599
+ }
600
+ } else if (localMirrorPath) {
601
+ localMirrorPath = PathNormalizer.toAbsolutePath(localMirrorPath);
602
+ }
603
+
604
+ return {
605
+ rootFolderId,
606
+ localMirrorPath,
607
+ serviceAccountFile: process.env.GDRIVE_SERVICE_ACCOUNT_FILE || null,
608
+ serviceAccountJson: process.env.GDRIVE_SERVICE_ACCOUNT_JSON || null,
609
+ skipNativeDocs: process.env.GDRIVE_SKIP_NATIVE_DOCS !== 'false',
610
+ followShortcuts: process.env.GDRIVE_FOLLOW_SHORTCUTS !== 'false',
611
+ concurrency: parseInt(process.env.GDRIVE_CONCURRENCY) || 5,
612
+ pageSize: parseInt(process.env.GDRIVE_PAGE_SIZE) || 1000,
613
+ maxFileSizeBytes:
614
+ parseInt(process.env.GDRIVE_MAX_FILE_SIZE_BYTES) ||
615
+ 2 * 1024 * 1024 * 1024, // 2GB default
616
+ };
617
+ }
618
+
619
+ /**
620
+ * Get Google Drive configuration
621
+ * @returns {Object} GDrive sync settings
622
+ */
623
+ getGDriveConfig() {
624
+ return this.gdrive;
625
+ }
626
+
627
+ /**
628
+ * Validate Google Drive configuration
629
+ * @throws {Error} If required gdrive configuration is missing
630
+ */
631
+ validateGDriveConfig() {
632
+ const errors = [];
633
+
634
+ if (!this.gdrive.rootFolderId) {
635
+ errors.push(
636
+ 'GDRIVE_ROOT_FOLDER_ID is required (Drive folder ID to sync)',
637
+ );
638
+ }
639
+
640
+ if (!this.gdrive.localMirrorPath) {
641
+ errors.push(
642
+ 'Could not determine local mirror path. Set GDRIVE_LOCAL_MIRROR_PATH or UPLOAD_BASE_PATH',
643
+ );
644
+ }
645
+
646
+ if (!this.gdrive.serviceAccountFile && !this.gdrive.serviceAccountJson) {
647
+ errors.push(
648
+ 'Either GDRIVE_SERVICE_ACCOUNT_FILE (path to JSON) or GDRIVE_SERVICE_ACCOUNT_JSON (inline JSON) is required',
649
+ );
650
+ }
651
+
652
+ if (this.gdrive.serviceAccountFile) {
653
+ const resolved = PathNormalizer.toAbsolutePath(
654
+ this.gdrive.serviceAccountFile,
655
+ );
656
+ if (!fs.existsSync(resolved)) {
657
+ errors.push(`GDRIVE_SERVICE_ACCOUNT_FILE not found: ${resolved}`);
658
+ }
659
+ }
660
+
661
+ if (errors.length > 0) {
662
+ throw new Error(
663
+ '⚠️ Google Drive configuration errors:\n - ' + errors.join('\n - '),
664
+ );
665
+ }
666
+ }
667
+
582
668
  /**
583
669
  * Check if worker mode is available (Redis configured)
584
670
  * @returns {boolean}
@@ -2,6 +2,9 @@
2
2
  import { dodaPdfDefinition } from './document-types/doda-pdf.js';
3
3
  import { dodaXmlDefinition } from './document-types/doda-xml.js';
4
4
  import { facturasComerciales } from './document-types/facturas-comerciales.js';
5
+ import { pedimentoCompletoDefinition } from './document-types/pedimento-completo.js';
6
+ // TODO: enable XML pedimento detection — implementation ready in pedimento-completo-xml.js
7
+ // import { pedimentoCompletoXmlDefinition } from './document-types/pedimento-completo-xml.js';
5
8
  import { pedimentoSimplificadoDefinition } from './document-types/pedimento-simplificado.js';
6
9
  import { proformaDefinition } from './document-types/proforma.js';
7
10
  import { supportDocumentDefinition } from './document-types/support-document.js';
@@ -41,6 +44,12 @@ export class DocumentTypeDefinition {
41
44
  // proformaDefinition is kept as reference but not used directly in the registry since resolution is handled post-extraction.
42
45
  const documentTypes = [
43
46
  pedimentoSimplificadoDefinition,
47
+ pedimentoCompletoDefinition,
48
+ // TODO: enable XML pedimento detection — uncomment the next line and the
49
+ // matching import at the top of this file. All downstream code
50
+ // (composeArelaPath, arela-api SQL filters, IdentifyCommand counters)
51
+ // already accepts `pedimento_completo_xml`.
52
+ // pedimentoCompletoXmlDefinition,
44
53
  supportDocumentDefinition,
45
54
  dodaPdfDefinition,
46
55
  dodaXmlDefinition,
@@ -96,12 +105,13 @@ export function extractDocumentFields(source, fileExtension, filePath) {
96
105
 
97
106
  console.log(` → Resolved type: ${resolvedType}`);
98
107
 
99
- // Extract pedimento number and year
108
+ // Extract pedimento number and year. `filePath` is forwarded so XML
109
+ // matchers (which compose numPedimento from the filename) can use it.
100
110
  const pedimento = docType.extractNumPedimento
101
- ? docType.extractNumPedimento(source, fields)
111
+ ? docType.extractNumPedimento(source, fields, filePath)
102
112
  : null;
103
113
  const year = docType.extractPedimentoYear
104
- ? docType.extractPedimentoYear(source, fields)
114
+ ? docType.extractPedimentoYear(source, fields, filePath)
105
115
  : null;
106
116
 
107
117
  return [resolvedType, fields, pedimento, year];
@@ -0,0 +1,226 @@
1
+ // Shared PDF extractors used by both `pedimento_simplificado` and
2
+ // `pedimento_completo` matchers. Keeping the regexes in a single module
3
+ // prevents drift between the two pedimento variants.
4
+ import { FieldResult } from '../document-type-shared.js';
5
+
6
+ // 1) Número de Pedimento (15 digits, possibly separated by spaces)
7
+ export const numPedimentoExtractor = {
8
+ field: 'numPedimento',
9
+ extract: (source) => {
10
+ const match = source.match(/\d{2}\s?\d{2}\s?\d{4}\s?\d{7}/);
11
+ return new FieldResult(
12
+ 'numPedimento',
13
+ !!match,
14
+ match ? match[0].replace(/\s/g, '') : null,
15
+ );
16
+ },
17
+ };
18
+
19
+ // 2) Tipo de Operación: 3 chars after the pedimento number
20
+ export const tipoOperacionExtractor = {
21
+ field: 'tipoOperacion',
22
+ extract: (source) => {
23
+ const match = source.match(/\d{2}\s+\d{2}\s+\d{4}\s+\d{7}\s+([A-Z]{3})/);
24
+ return new FieldResult('tipoOperacion', !!match, match ? match[1] : null);
25
+ },
26
+ };
27
+
28
+ // 3) Clave de Pedimento: 2 chars after tipoOperacion
29
+ export const clavePedimentoExtractor = {
30
+ field: 'clavePedimento',
31
+ extract: (source) => {
32
+ const match = source.match(
33
+ /\d{2}\s+\d{2}\s+\d{4}\s+\d{7}\s+[A-Z]{3}\s+([A-Z0-9]{2})/,
34
+ );
35
+ return new FieldResult('clavePedimento', !!match, match ? match[1] : null);
36
+ },
37
+ };
38
+
39
+ // 4) Aduana E/S: 3-digit code on the peso-bruto line
40
+ export const aduanaEntradaSalidaExtractor = {
41
+ field: 'aduanaEntradaSalida',
42
+ extract: (source) => {
43
+ const match = source.match(/^\s*\d+\s+[\d,.]+\s+(\d{3})\s*$/m);
44
+ return new FieldResult(
45
+ 'aduanaEntradaSalida',
46
+ !!match,
47
+ match ? match[1] : null,
48
+ );
49
+ },
50
+ };
51
+
52
+ // 5) RFC: 12–13 alphanumeric chars on its own line
53
+ export const rfcExtractor = {
54
+ field: 'rfc',
55
+ extract: (source) => {
56
+ const match = source.match(/\n\s*([A-Z0-9]{12,13})\s*\n/);
57
+ return new FieldResult('rfc', !!match, match ? match[1] : null);
58
+ },
59
+ };
60
+
61
+ // 6) Código de Aceptación: 8 alphanumeric chars on the line right after the RFC
62
+ export const codigoAceptacionExtractor = {
63
+ field: 'codigoAceptacion',
64
+ extract: (source) => {
65
+ const lines = source
66
+ .split(/\r?\n/)
67
+ .map((l) => l.trim())
68
+ .filter((l) => l.length > 0);
69
+
70
+ const rfcIndex = lines.findIndex((l) => /^[A-Z0-9]{12,13}$/.test(l));
71
+ let code = null;
72
+ if (rfcIndex >= 0 && /^[A-Z0-9]{8}$/.test(lines[rfcIndex + 1] || '')) {
73
+ code = lines[rfcIndex + 1];
74
+ }
75
+ return new FieldResult('codigoAceptacion', code !== null, code);
76
+ },
77
+ };
78
+
79
+ // 7) Num. E-Document: collects all 13-char alphanumeric codes following
80
+ // `NUM. E-DOCUMENT` labels. CoveFact / Pedimento Completo emit one row
81
+ // per ED clave inside the CLAVE/COMPL. IDENTIFICADOR table.
82
+ export const numEDocumentoExtractor = {
83
+ field: 'numEDocumento',
84
+ extract: (source) => {
85
+ const lines = source.split(/\r?\n/);
86
+ const edocLines = lines.filter((line) => /NUM\.?\s*E-DOCUMENT/i.test(line));
87
+
88
+ if (edocLines.length === 0) {
89
+ return new FieldResult('numEDocumento', false, null);
90
+ }
91
+
92
+ const extractedCodes = [];
93
+ edocLines.forEach((line) => {
94
+ const afterEdoc = line.replace(/.*NUM\.?\s*E-DOCUMENT\s*/i, '');
95
+ const codes = afterEdoc.match(/[A-Z0-9]{13}/g) || [];
96
+ extractedCodes.push(...codes);
97
+ });
98
+
99
+ if (extractedCodes.length === 0) {
100
+ return new FieldResult('numEDocumento', false, null);
101
+ }
102
+
103
+ const uniqueCodes = [...new Set(extractedCodes)];
104
+ return new FieldResult('numEDocumento', true, `[${uniqueCodes.join(',')}]`);
105
+ },
106
+ };
107
+
108
+ // 8) Payment date — multiple known label variants
109
+ export const paymentDateExtractor = {
110
+ field: 'paymentDate',
111
+ extract: (source) => {
112
+ let match = source.match(/2\s+PAGO:\s*(\d{2}\/\d{2}\/\d{4})/);
113
+ if (!match) {
114
+ match = source.match(/FECHA DE PAGO:\s*(\d{4}\/\d{2}\/\d{2})/);
115
+ }
116
+ if (!match) {
117
+ match = source.match(/PRESENTACION:\s*(\d{2}\/\d{2}\/\d{4})/);
118
+ }
119
+ return new FieldResult('paymentDate', !!match, match ? match[1] : null);
120
+ },
121
+ };
122
+
123
+ // 9) COVE — collect all `COVE<alphanum>` tokens from lines containing
124
+ // `COVE` or `NUMERO DE ACUSE DE VALOR`. CoveFact variant emits
125
+ // `COVE257W76NF2 / ID250230` → only the leading COVE token is kept
126
+ // because the COVE regex stops at the space before `/`.
127
+ export const coveExtractor = {
128
+ field: 'cove',
129
+ extract: (source) => {
130
+ const lines = source.split(/\r?\n/);
131
+ const coveLines = lines.filter(
132
+ (line) => /COVE/i.test(line) || /NUMERO DE ACUSE DE VALOR/i.test(line),
133
+ );
134
+
135
+ if (coveLines.length === 0) {
136
+ return new FieldResult('cove', false, null);
137
+ }
138
+
139
+ const coveValues = [];
140
+ coveLines.forEach((line) => {
141
+ const coveMatches = line.match(/COVE[A-Z0-9]+/gi) || [];
142
+ coveValues.push(...coveMatches.map((m) => m.toUpperCase()));
143
+ });
144
+
145
+ if (coveValues.length === 0) {
146
+ return new FieldResult('cove', false, null);
147
+ }
148
+
149
+ const unique = [...new Set(coveValues)];
150
+ return new FieldResult('cove', true, `[${unique.join(',')}]`);
151
+ },
152
+ };
153
+
154
+ // 10) Patente: from the PATENTE/PEDIMENTO/ADUANA header table
155
+ export const patenteExtractor = {
156
+ field: 'patente',
157
+ extract: (source) => {
158
+ const lines = source.split(/\r?\n/);
159
+ const patenteHeaderIndex = lines.findIndex((line) =>
160
+ /PATENTE:.*PEDIMENTO:.*ADUANA:/i.test(line),
161
+ );
162
+
163
+ if (patenteHeaderIndex >= 0) {
164
+ for (let i = patenteHeaderIndex + 1; i < lines.length; i++) {
165
+ const line = lines[i].trim();
166
+ if (/^\d+\s+\d+\s+\d+$/.test(line)) {
167
+ const parts = line.split(/\s+/);
168
+ return new FieldResult('patente', true, parts[0]);
169
+ }
170
+ }
171
+ }
172
+ return new FieldResult('patente', false, null);
173
+ },
174
+ };
175
+
176
+ // 11) Fecha de Pago Rectificación — used when clavePedimento is a rectification
177
+ export const fechaPagoRectificacionExtractor = {
178
+ field: 'fechaPagoRectificacion',
179
+ extract: (source) => {
180
+ const rectSectionMatch = source.match(
181
+ /RECTIFICACION[\s\S]{0,500}?(\d{2}\/\d{2}\/\d{4})/i,
182
+ );
183
+ if (rectSectionMatch) {
184
+ return new FieldResult(
185
+ 'fechaPagoRectificacion',
186
+ true,
187
+ rectSectionMatch[1],
188
+ );
189
+ }
190
+ const fechaMatch = source.match(
191
+ /FECHA PAGO RECT[\s\S]{0,500}?(\d{2}\/\d{2}\/\d{4})/i,
192
+ );
193
+ if (fechaMatch) {
194
+ return new FieldResult('fechaPagoRectificacion', true, fechaMatch[1]);
195
+ }
196
+ return new FieldResult('fechaPagoRectificacion', false, null);
197
+ },
198
+ };
199
+
200
+ /**
201
+ * Canonical extractor list for both pedimento_simplificado and pedimento_completo PDFs.
202
+ * Order matters only for downstream tooling that inspects the result array.
203
+ */
204
+ export const sharedPedimentoExtractors = [
205
+ numPedimentoExtractor,
206
+ tipoOperacionExtractor,
207
+ clavePedimentoExtractor,
208
+ aduanaEntradaSalidaExtractor,
209
+ rfcExtractor,
210
+ codigoAceptacionExtractor,
211
+ numEDocumentoExtractor,
212
+ paymentDateExtractor,
213
+ coveExtractor,
214
+ patenteExtractor,
215
+ fechaPagoRectificacionExtractor,
216
+ ];
217
+
218
+ /**
219
+ * Shared helper used by both PDF matchers' `extractPedimentoYear`.
220
+ */
221
+ export function pedimentoYearFromFields(fields) {
222
+ const numPedimento = fields?.find((f) => f.name === 'numPedimento')?.value;
223
+ if (!numPedimento) return null;
224
+ const year = parseInt(numPedimento.substring(0, 2), 10);
225
+ return year < 50 ? year + 2000 : year + 1900;
226
+ }
@@ -0,0 +1,322 @@
1
+ // VUCEM "consultarPedimentoCompleto" XML matcher.
2
+ //
3
+ // STATUS: implemented but NOT registered in `document-type-shared.js`. To
4
+ // activate, uncomment the import + registration in that file. All downstream
5
+ // code (composeArelaPath, arela-api propagation SQL, IdentifyCommand
6
+ // counters) already includes `pedimento_completo_xml`, so re-enabling is a
7
+ // single-line change.
8
+ //
9
+ // Filename patterns recognized (try in order — patente extraction):
10
+ // 1) VU_PATENTE_ADUANA_PEDIMENTO.xml → e.g. VU_3429_070_5016101.xml
11
+ // 2) ADUANA-PATENTE-PEDIMENTO.xml → e.g. 670-3806-2002487.xml
12
+ // 3) {15-digit}[_{15-digit}].xml → e.g. 260734296016642_260734296016642.xml
13
+ // If none match, arela_path is left null and propagation fills it from a
14
+ // sibling PDF in the same directory.
15
+ //
16
+ // numPedimento is composed from XML body + filename because the body never
17
+ // carries the 15-digit form: YY|AA|PPPP|NNNNNNN.
18
+ import { FieldResult } from '../document-type-shared.js';
19
+
20
+ // --------------------------- helpers ---------------------------------------
21
+
22
+ function firstTag(source, tag) {
23
+ // Match <ns2:tag>value</ns2:tag> or unprefixed <tag>value</tag>
24
+ const re = new RegExp(
25
+ `<(?:[a-z0-9]+:)?${tag}>([^<]*)</(?:[a-z0-9]+:)?${tag}>`,
26
+ 'i',
27
+ );
28
+ const m = source.match(re);
29
+ return m ? m[1].trim() : null;
30
+ }
31
+
32
+ function allTagBlocks(source, tag) {
33
+ const re = new RegExp(
34
+ `<(?:[a-z0-9]+:)?${tag}>([\\s\\S]*?)</(?:[a-z0-9]+:)?${tag}>`,
35
+ 'gi',
36
+ );
37
+ const out = [];
38
+ let m;
39
+ while ((m = re.exec(source)) !== null) {
40
+ out.push(m[1]);
41
+ }
42
+ return out;
43
+ }
44
+
45
+ function pad(value, length) {
46
+ if (value == null) return null;
47
+ return String(value).padStart(length, '0');
48
+ }
49
+
50
+ /**
51
+ * Try the three known filename patterns and return {patente, aduana, pedimento}
52
+ * with any subset of the fields populated. Returns null if no pattern matches.
53
+ */
54
+ function parseFilenameParts(filePath) {
55
+ if (!filePath) return null;
56
+ const fileName = filePath.split(/[\\/]/).pop();
57
+ if (!fileName) return null;
58
+
59
+ let m;
60
+
61
+ // 1) VU_PATENTE_ADUANA_PEDIMENTO.xml
62
+ m = fileName.match(/^VU_(\d{4})_(\d{3})_(\d{7})\.xml$/i);
63
+ if (m) {
64
+ return {
65
+ patente: m[1],
66
+ aduana: m[2].substring(0, 2),
67
+ pedimento: m[3],
68
+ year: null,
69
+ };
70
+ }
71
+
72
+ // 2) ADUANA-PATENTE-PEDIMENTO.xml
73
+ m = fileName.match(/^(\d{3})-(\d{4})-(\d{7})\.xml$/i);
74
+ if (m) {
75
+ return {
76
+ patente: m[2],
77
+ aduana: m[1].substring(0, 2),
78
+ pedimento: m[3],
79
+ year: null,
80
+ };
81
+ }
82
+
83
+ // 3) 15-digit form YY|AA|PPPP|NNNNNNN
84
+ m = fileName.match(/^(\d{15})(?:_\d{15})?\.xml$/i);
85
+ if (m) {
86
+ const fifteen = m[1];
87
+ return {
88
+ year: fifteen.substring(0, 2),
89
+ aduana: fifteen.substring(2, 4),
90
+ patente: fifteen.substring(4, 8),
91
+ pedimento: fifteen.substring(8, 15),
92
+ };
93
+ }
94
+
95
+ return null;
96
+ }
97
+
98
+ // Capture the YY year from an ISO date string like "2026-05-08-06:00".
99
+ function yyFromIsoDate(iso) {
100
+ if (!iso) return null;
101
+ const m = iso.match(/^(\d{4})-/);
102
+ return m ? m[1].substring(2, 4) : null;
103
+ }
104
+
105
+ // Find <ns2:fechas> block with nested clave==2 and return its <ns2:fecha>.
106
+ function findPaymentDate(source) {
107
+ const fechasBlocks = allTagBlocks(source, 'fechas');
108
+ for (const block of fechasBlocks) {
109
+ const clave = firstTag(block, 'clave');
110
+ if (clave === '2') {
111
+ const fecha = firstTag(block, 'fecha');
112
+ if (fecha) return fecha;
113
+ }
114
+ }
115
+ return null;
116
+ }
117
+
118
+ // --------------------------- extractors ------------------------------------
119
+
120
+ const rfcExtractor = {
121
+ field: 'rfc',
122
+ extract: (source) => {
123
+ const value = firstTag(source, 'rfc');
124
+ return new FieldResult('rfc', !!value, value);
125
+ },
126
+ };
127
+
128
+ const clavePedimentoExtractor = {
129
+ field: 'clavePedimento',
130
+ extract: (source) => {
131
+ // <ns2:claveDocumento><ns2:clave>R1</ns2:clave></ns2:claveDocumento>
132
+ const blocks = allTagBlocks(source, 'claveDocumento');
133
+ const clave = blocks.length > 0 ? firstTag(blocks[0], 'clave') : null;
134
+ return new FieldResult('clavePedimento', !!clave, clave);
135
+ },
136
+ };
137
+
138
+ const tipoOperacionExtractor = {
139
+ field: 'tipoOperacion',
140
+ extract: (source) => {
141
+ const blocks = allTagBlocks(source, 'tipoOperacion');
142
+ const desc = blocks.length > 0 ? firstTag(blocks[0], 'descripcion') : null;
143
+ return new FieldResult('tipoOperacion', !!desc, desc);
144
+ },
145
+ };
146
+
147
+ const aduanaEntradaSalidaExtractor = {
148
+ field: 'aduanaEntradaSalida',
149
+ extract: (source) => {
150
+ const blocks = allTagBlocks(source, 'aduanaEntradaSalida');
151
+ const clave = blocks.length > 0 ? firstTag(blocks[0], 'clave') : null;
152
+ return new FieldResult(
153
+ 'aduanaEntradaSalida',
154
+ !!clave,
155
+ clave ? pad(clave, 2) : null,
156
+ );
157
+ },
158
+ };
159
+
160
+ const paymentDateExtractor = {
161
+ field: 'paymentDate',
162
+ extract: (source) => {
163
+ const fecha = findPaymentDate(source);
164
+ return new FieldResult('paymentDate', !!fecha, fecha);
165
+ },
166
+ };
167
+
168
+ const fechaPagoRectificacionExtractor = {
169
+ field: 'fechaPagoRectificacion',
170
+ extract: (source) => {
171
+ const rectBlocks = allTagBlocks(source, 'rectificacion');
172
+ if (rectBlocks.length === 0) {
173
+ return new FieldResult('fechaPagoRectificacion', false, null);
174
+ }
175
+ const fechaPago = firstTag(rectBlocks[0], 'fechaPago');
176
+ return new FieldResult('fechaPagoRectificacion', !!fechaPago, fechaPago);
177
+ },
178
+ };
179
+
180
+ const coveExtractor = {
181
+ field: 'cove',
182
+ extract: (source) => {
183
+ // Collect <ns2:numero> values that live inside <ns2:facturas> blocks.
184
+ const facturas = allTagBlocks(source, 'facturas');
185
+ const numeros = [];
186
+ facturas.forEach((block) => {
187
+ const numero = firstTag(block, 'numero');
188
+ if (numero) numeros.push(numero);
189
+ });
190
+ if (numeros.length === 0) {
191
+ return new FieldResult('cove', false, null);
192
+ }
193
+ const unique = [...new Set(numeros)];
194
+ return new FieldResult('cove', true, `[${unique.join(',')}]`);
195
+ },
196
+ };
197
+
198
+ const numEDocumentoExtractor = {
199
+ field: 'numEDocumento',
200
+ extract: (source) => {
201
+ // For each <ns2:identificadores> block, check the nested
202
+ // <ns2:claveIdentificador><ns2:clave> value. If it equals 'ED',
203
+ // collect the sibling <ns2:complemento1> value.
204
+ const blocks = allTagBlocks(source, 'identificadores');
205
+ const codes = [];
206
+ blocks.forEach((block) => {
207
+ const claveBlocks = allTagBlocks(block, 'claveIdentificador');
208
+ const clave =
209
+ claveBlocks.length > 0 ? firstTag(claveBlocks[0], 'clave') : null;
210
+ if (clave === 'ED') {
211
+ const complemento = firstTag(block, 'complemento1');
212
+ if (complemento) codes.push(complemento);
213
+ }
214
+ });
215
+ if (codes.length === 0) {
216
+ return new FieldResult('numEDocumento', false, null);
217
+ }
218
+ const unique = [...new Set(codes)];
219
+ return new FieldResult('numEDocumento', true, `[${unique.join(',')}]`);
220
+ },
221
+ };
222
+
223
+ // Composed numPedimento — needs the filename + already-extracted fields.
224
+ // We expose it as the LAST extractor so `aduanaEntradaSalida` is available
225
+ // via the `fields` array if the runner exposes it. To stay compatible with
226
+ // the existing extractor signature (which only receives `source`), we
227
+ // re-derive aduanaEntradaSalida inside this extractor and read the filename
228
+ // from a closure populated by `match()` via `extractNumPedimento` below.
229
+ // (See `extractNumPedimento` — that is the canonical place numPedimento is
230
+ // composed for XML.)
231
+
232
+ // --------------------------- definition ------------------------------------
233
+
234
+ export const pedimentoCompletoXmlDefinition = {
235
+ type: 'pedimento_completo_xml',
236
+ extensions: ['xml'],
237
+
238
+ match: (source) => {
239
+ return /consultarPedimentoCompletoRespuesta/i.test(source);
240
+ },
241
+
242
+ /**
243
+ * Resolve to `pedimento_completo_xml` only if payment evidence exists:
244
+ * - When a <rectificacion> block is present, require fechaPagoRectificacion.
245
+ * - Otherwise require paymentDate.
246
+ */
247
+ resolveType: (fields) => {
248
+ const hasRect = !!fields?.find(
249
+ (f) => f.name === 'fechaPagoRectificacion' && f.found,
250
+ );
251
+ if (hasRect) return 'pedimento_completo_xml';
252
+
253
+ const paymentDate =
254
+ fields?.find((f) => f.name === 'paymentDate' && f.found)?.value ?? null;
255
+ return paymentDate ? 'pedimento_completo_xml' : 'proforma_completo_xml';
256
+ },
257
+
258
+ /**
259
+ * Compose the 15-digit pedimento number from XML body + filename.
260
+ * YY: from rectification fechaPago if present, else from the clave==2
261
+ * payment-date fecha; falls back to filename pattern 3.
262
+ * AA: from <aduanaEntradaSalida><clave> padded to 2.
263
+ * PPPP: from the filename (any of the three patterns).
264
+ * NNNNNNN: from <pedimento> padded to 7.
265
+ * Returns null if any component cannot be resolved.
266
+ */
267
+ extractNumPedimento: (source, fields, filePath) => {
268
+ const parts = parseFilenameParts(filePath);
269
+
270
+ const rect = fields?.find(
271
+ (f) => f.name === 'fechaPagoRectificacion' && f.found,
272
+ )?.value;
273
+ const pay = fields?.find((f) => f.name === 'paymentDate' && f.found)?.value;
274
+
275
+ let yy =
276
+ yyFromIsoDate(rect) ||
277
+ yyFromIsoDate(pay) ||
278
+ (parts && parts.year) ||
279
+ null;
280
+
281
+ const aduanaField = fields?.find(
282
+ (f) => f.name === 'aduanaEntradaSalida',
283
+ )?.value;
284
+ let aduana = aduanaField || (parts && parts.aduana) || null;
285
+
286
+ let patente = parts && parts.patente ? parts.patente : null;
287
+
288
+ const pedimentoBody = firstTag(source, 'pedimento');
289
+ let pedimento = pedimentoBody
290
+ ? pad(pedimentoBody, 7)
291
+ : parts && parts.pedimento
292
+ ? parts.pedimento
293
+ : null;
294
+
295
+ if (!yy || !aduana || !patente || !pedimento) return null;
296
+
297
+ return `${pad(yy, 2)}${pad(aduana, 2)}${pad(patente, 4)}${pedimento}`;
298
+ },
299
+
300
+ extractPedimentoYear: (source, fields, filePath) => {
301
+ // Reuse extractNumPedimento; the year is its leading 2 digits.
302
+ const num = pedimentoCompletoXmlDefinition.extractNumPedimento(
303
+ source,
304
+ fields,
305
+ filePath,
306
+ );
307
+ if (!num) return null;
308
+ const year = parseInt(num.substring(0, 2), 10);
309
+ return year < 50 ? year + 2000 : year + 1900;
310
+ },
311
+
312
+ extractors: [
313
+ rfcExtractor,
314
+ clavePedimentoExtractor,
315
+ tipoOperacionExtractor,
316
+ aduanaEntradaSalidaExtractor,
317
+ paymentDateExtractor,
318
+ fechaPagoRectificacionExtractor,
319
+ coveExtractor,
320
+ numEDocumentoExtractor,
321
+ ],
322
+ };