@arela/uploader 1.0.21 β†’ 1.0.23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@arela/uploader",
3
- "version": "1.0.21",
3
+ "version": "1.0.23",
4
4
  "description": "CLI to upload files/directories to Arela",
5
5
  "bin": {
6
6
  "arela": "./src/index.js"
@@ -0,0 +1,164 @@
1
+ import fs from 'fs';
2
+ import path from 'path';
3
+
4
+ import { DatastageApiService } from '../services/DatastageApiService.js';
5
+ import logger from '../services/LoggingService.js';
6
+
7
+ import appConfig from '../config/config.js';
8
+ import ErrorHandler from '../errors/ErrorHandler.js';
9
+
10
+ /**
11
+ * Datastage Command Handler
12
+ * Uploads monthly Datastage *.zip files from a directory to the API.
13
+ * Sequential, idempotent via the cli `datastage_uploads` tracking table.
14
+ */
15
+ export class DatastageCommand {
16
+ constructor() {
17
+ this.errorHandler = new ErrorHandler(logger);
18
+ }
19
+
20
+ /**
21
+ * @param {Object} options
22
+ * @param {string} options.dir - directory containing *.zip files (required)
23
+ * @param {string} [options.api] - 'default'|'agencia'|'cliente'
24
+ * @param {boolean} [options.retryFailed] - re-attempt files in 'failed' status
25
+ * @param {boolean} [options.showStats] - print final stats from API
26
+ */
27
+ async execute(options = {}) {
28
+ const startTime = Date.now();
29
+
30
+ if (!options.dir) {
31
+ throw new Error('--dir <path> is required');
32
+ }
33
+ const sourceDirectory = path.resolve(options.dir);
34
+ if (!fs.existsSync(sourceDirectory)) {
35
+ throw new Error(`Directory not found: ${sourceDirectory}`);
36
+ }
37
+ const dirStat = fs.statSync(sourceDirectory);
38
+ if (!dirStat.isDirectory()) {
39
+ throw new Error(`Not a directory: ${sourceDirectory}`);
40
+ }
41
+
42
+ const apiTarget = options.api || 'default';
43
+ const api = new DatastageApiService(apiTarget);
44
+
45
+ logger.info('πŸ“¦ Starting arela datastage command');
46
+ logger.info(`🎯 API Target: ${apiTarget}`);
47
+ logger.info(`πŸ“‚ Source: ${sourceDirectory}`);
48
+
49
+ // 1. Enumerate *.zip in root directory (non-recursive)
50
+ const entries = fs.readdirSync(sourceDirectory, { withFileTypes: true });
51
+ const zipFiles = entries
52
+ .filter((e) => e.isFile() && /\.zip$/i.test(e.name))
53
+ .map((e) => path.join(sourceDirectory, e.name));
54
+
55
+ if (zipFiles.length === 0) {
56
+ logger.warn('No *.zip files found in directory. Nothing to do.');
57
+ return { uploaded: 0, failed: 0, skipped: 0 };
58
+ }
59
+ logger.info(`πŸ—‚ Found ${zipFiles.length} zip file(s)`);
60
+
61
+ // 2. Register each file (idempotent upsert)
62
+ logger.info('πŸ“ Registering files...');
63
+ for (const zipPath of zipFiles) {
64
+ const stats = fs.statSync(zipPath);
65
+ try {
66
+ await api.registerUpload({
67
+ absolutePath: zipPath,
68
+ fileName: path.basename(zipPath),
69
+ sizeBytes: stats.size,
70
+ fileModifiedAt: stats.mtime.toISOString(),
71
+ sourceDirectory,
72
+ });
73
+ } catch (err) {
74
+ logger.error(
75
+ ` βœ— register failed for ${path.basename(zipPath)}: ${err.message}`,
76
+ );
77
+ throw err;
78
+ }
79
+ }
80
+
81
+ // 3. Fetch pending list scoped to this directory
82
+ const pending = await api.getPending(sourceDirectory);
83
+ const pendingPaths = new Set(pending.map((p) => p.absolutePath));
84
+
85
+ const alreadyUploaded = zipFiles.length - pendingPaths.size;
86
+ if (alreadyUploaded > 0) {
87
+ logger.info(`⏭ Skipping ${alreadyUploaded} already uploaded file(s)`);
88
+ }
89
+
90
+ if (pending.length === 0) {
91
+ logger.success('βœ… All files already uploaded. Nothing to do.');
92
+ if (options.showStats) {
93
+ const s = await api.getStats(sourceDirectory);
94
+ logger.info(`πŸ“Š Stats: ${JSON.stringify(s)}`);
95
+ }
96
+ return { uploaded: 0, failed: 0, skipped: alreadyUploaded };
97
+ }
98
+
99
+ // 4. Sequential upload loop
100
+ logger.info(`πŸš€ Uploading ${pending.length} file(s) sequentially...`);
101
+ let uploaded = 0;
102
+ let failed = 0;
103
+
104
+ for (let i = 0; i < pending.length; i++) {
105
+ const row = pending[i];
106
+ const localPath = row.absolutePath;
107
+ const label = `[${i + 1}/${pending.length}] ${row.fileName}`;
108
+
109
+ if (!fs.existsSync(localPath)) {
110
+ const err = `File missing on disk: ${localPath}`;
111
+ logger.error(`βœ— ${label}: ${err}`);
112
+ try {
113
+ await api.markFailed(row.id, err);
114
+ } catch (e) {
115
+ logger.error(` mark-failed error: ${e.message}`);
116
+ }
117
+ failed++;
118
+ continue;
119
+ }
120
+
121
+ try {
122
+ logger.info(`⬆ ${label}: uploading...`);
123
+ const result = await api.uploadZip(localPath);
124
+ const datastageId = result?.id || result?.data?.id;
125
+ const folio = result?.folio || result?.data?.folio;
126
+ if (!datastageId) {
127
+ throw new Error(
128
+ 'API returned no datastage id in response: ' +
129
+ JSON.stringify(result).slice(0, 300),
130
+ );
131
+ }
132
+ await api.markUploaded(row.id, { datastageId, folio });
133
+ logger.success(
134
+ `βœ“ ${label}: folio=${folio || 'n/a'} datastageId=${datastageId}`,
135
+ );
136
+ uploaded++;
137
+ } catch (err) {
138
+ logger.error(`βœ— ${label}: ${err.message}`);
139
+ try {
140
+ await api.markFailed(row.id, err.message);
141
+ } catch (e) {
142
+ logger.error(` mark-failed error: ${e.message}`);
143
+ }
144
+ failed++;
145
+ }
146
+ }
147
+
148
+ const elapsed = ((Date.now() - startTime) / 1000).toFixed(1);
149
+ logger.info('β€”'.repeat(60));
150
+ logger.info(
151
+ `Done in ${elapsed}s β€” uploaded=${uploaded} failed=${failed} skipped=${alreadyUploaded}`,
152
+ );
153
+
154
+ if (options.showStats) {
155
+ const s = await api.getStats(sourceDirectory);
156
+ logger.info(`πŸ“Š Final stats: ${JSON.stringify(s)}`);
157
+ }
158
+
159
+ return { uploaded, failed, skipped: alreadyUploaded };
160
+ }
161
+ }
162
+
163
+ const datastageCommand = new DatastageCommand();
164
+ export default datastageCommand;
@@ -10,9 +10,7 @@ import { ConfigurationError } from '../errors/ErrorTypes.js';
10
10
  import FileDetectionService from '../file-detection.js';
11
11
 
12
12
  /**
13
- * Paid pedimento detected_type values. `pedimento_completo_xml` is included
14
- * even though the XML matcher is currently disabled in the registry so that
15
- * re-enabling it requires no changes here.
13
+ * Paid pedimento detected_type values.
16
14
  */
17
15
  const DETECTED_PEDIMENTO_TYPES = new Set([
18
16
  'pedimento_simplificado',
@@ -74,29 +72,95 @@ export class IdentifyCommand {
74
72
  const scanConfig = appConfig.getScanConfig();
75
73
  const batchSize = parseInt(options.batchSize) || 100;
76
74
 
75
+ // Parse optional path prefix mapping: "O:/=/Volumes/nas/"
76
+ const pathPrefixMap = options.pathPrefix
77
+ ? this.#parsePathPrefix(options.pathPrefix)
78
+ : null;
79
+
77
80
  logger.info('πŸ” Starting arela identify command');
78
81
  logger.info(`🎯 API Target: ${apiTarget}`);
79
82
  logger.info(`πŸ“¦ Batch Size: ${batchSize}`);
83
+ if (options.table) logger.info(`πŸ“Œ Target table: ${options.table}`);
84
+ if (options.resetAttempts)
85
+ logger.info('♻️ Reset detection attempts: ON');
86
+ if (pathPrefixMap)
87
+ logger.info(
88
+ `πŸ—Ί Path prefix map: ${pathPrefixMap.from} β†’ ${pathPrefixMap.to}`,
89
+ );
80
90
 
81
- // Fetch all tables for this instance
82
- logger.info('\nπŸ“Š Fetching instance tables...');
83
- const tables = await this.scanApiService.getInstanceTables(
84
- scanConfig.companySlug,
85
- scanConfig.serverId,
86
- scanConfig.basePathFull,
87
- );
91
+ // Resolve the list of tables to process
92
+ let tables;
93
+ if (options.fileId && options.table) {
94
+ // Single-file mode β€” identify exactly one file record
95
+ logger.info(
96
+ `\n🎯 Single-file mode: ${options.table} / ${options.fileId}`,
97
+ );
98
+ this.#reportProgress(0, `Fetching file record ${options.fileId}...`);
99
+ const record = await this.scanApiService.getFileRecord(
100
+ options.table,
101
+ options.fileId,
102
+ );
103
+ const results = await this.#detectFilesLocally(
104
+ [record],
105
+ 1,
106
+ pathPrefixMap,
107
+ );
108
+ const updates = results.filter((r) => r !== null);
109
+ if (updates.length > 0) {
110
+ await this.scanApiService.batchUpdateDetection(
111
+ options.table,
112
+ updates,
113
+ );
114
+ }
115
+ this.#reportProgress(100, `Single-file identification complete`);
116
+ logger.success(`\nβœ… Single-file identification complete`);
117
+ const firstUpdate = updates[0];
118
+ return {
119
+ processed: 1,
120
+ detected: updates.length,
121
+ proformas: 0,
122
+ errors: 1 - updates.length,
123
+ detectedType: firstUpdate?.detectedType ?? null,
124
+ detectedPedimento: firstUpdate?.detectedPedimento ?? null,
125
+ rfc: firstUpdate?.rfc ?? null,
126
+ arelaPath: firstUpdate?.arelaPath ?? null,
127
+ };
128
+ } else if (options.table) {
129
+ // Single-table mode β€” no need to match instance tables
130
+ tables = [{ tableName: options.table }];
131
+ logger.info(`\nπŸ“Œ Single-table mode: ${options.table}`);
132
+ } else {
133
+ logger.info('\nπŸ“Š Fetching instance tables...');
134
+ tables = await this.scanApiService.getInstanceTables(
135
+ scanConfig.companySlug,
136
+ scanConfig.serverId,
137
+ scanConfig.basePathFull,
138
+ );
139
+
140
+ if (tables.length === 0) {
141
+ throw new ConfigurationError(
142
+ 'No tables found for this instance. Run "arela scan" first.',
143
+ );
144
+ }
88
145
 
89
- if (tables.length === 0) {
90
- throw new ConfigurationError(
91
- 'No tables found for this instance. Run "arela scan" first.',
146
+ logger.info(
147
+ `πŸ“‹ Found ${tables.length} table${tables.length === 1 ? '' : 's'} to process`,
92
148
  );
149
+ for (const table of tables) {
150
+ logger.info(` - ${table.tableName}`);
151
+ }
93
152
  }
94
153
 
95
- logger.info(
96
- `πŸ“‹ Found ${tables.length} table${tables.length === 1 ? '' : 's'} to process`,
97
- );
98
- for (const table of tables) {
99
- logger.info(` - ${table.tableName}`);
154
+ // Optionally reset detection attempts so previously-failed files are retried
155
+ if (options.resetAttempts) {
156
+ for (const table of tables) {
157
+ const { reset } = await this.scanApiService.resetDetectionAttempts(
158
+ table.tableName,
159
+ );
160
+ logger.info(
161
+ `♻️ Reset ${reset} detection attempt(s) in ${table.tableName}`,
162
+ );
163
+ }
100
164
  }
101
165
 
102
166
  // Process each table
@@ -127,6 +191,7 @@ export class IdentifyCommand {
127
191
  table.tableName,
128
192
  batchSize,
129
193
  startTime,
194
+ pathPrefixMap,
130
195
  );
131
196
 
132
197
  totalStats.processed += stats.processed;
@@ -183,7 +248,7 @@ export class IdentifyCommand {
183
248
  * @param {number} startTime - Start time for speed calculation
184
249
  * @returns {Promise<Object>} Processing statistics
185
250
  */
186
- async #processTable(tableName, batchSize, startTime) {
251
+ async #processTable(tableName, batchSize, startTime, pathPrefixMap = null) {
187
252
  // Get detection statistics first (allTypes=true to count all supported file types)
188
253
  const initialStats = await this.scanApiService.getDetectionStats(
189
254
  tableName,
@@ -255,7 +320,11 @@ export class IdentifyCommand {
255
320
  const files = response.data;
256
321
 
257
322
  // Detect files locally with concurrent processing
258
- const detectionResults = await this.#detectFilesLocally(files, 10);
323
+ const detectionResults = await this.#detectFilesLocally(
324
+ files,
325
+ 10,
326
+ pathPrefixMap,
327
+ );
259
328
 
260
329
  // Batch update to API
261
330
  const updateResult = await this.scanApiService.batchUpdateDetection(
@@ -301,7 +370,7 @@ export class IdentifyCommand {
301
370
  * @param {number} concurrency - Maximum concurrent detections
302
371
  * @returns {Promise<Array>} Detection results
303
372
  */
304
- async #detectFilesLocally(files, concurrency = 10) {
373
+ async #detectFilesLocally(files, concurrency = 10, pathPrefixMap = null) {
305
374
  const limit = pLimit(concurrency);
306
375
  const basePath = appConfig.getBasePath();
307
376
 
@@ -309,7 +378,17 @@ export class IdentifyCommand {
309
378
  limit(async () => {
310
379
  try {
311
380
  // Check if file exists on filesystem
312
- const absolutePath = file.absolute_path;
381
+ let absolutePath = file.absolute_path;
382
+
383
+ // Apply cross-platform path prefix mapping if configured
384
+ if (
385
+ pathPrefixMap &&
386
+ absolutePath &&
387
+ absolutePath.startsWith(pathPrefixMap.from)
388
+ ) {
389
+ absolutePath =
390
+ pathPrefixMap.to + absolutePath.slice(pathPrefixMap.from.length);
391
+ }
313
392
 
314
393
  if (!fs.existsSync(absolutePath)) {
315
394
  return {
@@ -462,13 +541,15 @@ export class IdentifyCommand {
462
541
 
463
542
  // Check if the text contains any required pedimento marker. This must
464
543
  // stay aligned with the `match()` predicates in pedimento-simplificado.js
465
- // and pedimento-completo.js.
544
+ // and pedimento-completo.js (which accept both "DE" and "DEL" in the
545
+ // title, and treat the colon after "T. OPER" as optional).
466
546
  const text = result.text || '';
467
- const hasSimplificadoMarker = /FORMA SIMPLIFICADA DE PEDIMENTO/i.test(text);
547
+ const hasSimplificadoMarker =
548
+ /FORMA\s+SIMPLIFICADA\s+DEL?\s+PEDIMENTO/i.test(text);
468
549
  const hasCompletoMarkers =
469
550
  /NUM\.?\s*PEDIMENTO:/i.test(text) &&
470
551
  /CVE\.?\s*PEDIMENTO:/i.test(text) &&
471
- /T\.?\s*OPER:/i.test(text);
552
+ /T\.?\s*OPER:?/i.test(text);
472
553
 
473
554
  return !hasSimplificadoMarker && !hasCompletoMarkers;
474
555
  }
@@ -543,6 +624,44 @@ export class IdentifyCommand {
543
624
  }
544
625
  }
545
626
  }
627
+
628
+ /**
629
+ * Parse a path prefix mapping string such as "O:/=/Volumes/nas/" into { from, to }.
630
+ * Supports both "FROM=TO" and "FROM:TO" separators.
631
+ * @private
632
+ * @param {string} mapping
633
+ * @returns {{ from: string, to: string }}
634
+ */
635
+ #parsePathPrefix(mapping) {
636
+ // Support either "FROM=TO" or "FROM:TO" as separator
637
+ const eqIdx = mapping.indexOf('=');
638
+ const colonIdx = mapping.indexOf(':');
639
+
640
+ let sep = -1;
641
+ // "O:/=/Volumes" β€” the colon inside "O:/" is part of a Windows drive letter; prefer '=' separator
642
+ if (eqIdx !== -1) {
643
+ sep = eqIdx;
644
+ } else if (colonIdx !== -1) {
645
+ sep = colonIdx;
646
+ }
647
+
648
+ if (sep === -1) {
649
+ throw new Error(
650
+ `Invalid --path-prefix format: "${mapping}". Expected "FROM=TO" e.g. "O:/=/Volumes/nas/"`,
651
+ );
652
+ }
653
+
654
+ const from = mapping.slice(0, sep);
655
+ const to = mapping.slice(sep + 1);
656
+
657
+ if (!from || !to) {
658
+ throw new Error(
659
+ `Invalid --path-prefix format: "${mapping}". Both FROM and TO parts must be non-empty.`,
660
+ );
661
+ }
662
+
663
+ return { from, to };
664
+ }
546
665
  }
547
666
 
548
667
  // Export singleton instance
@@ -342,6 +342,8 @@ export class PollWorkerCommand {
342
342
  batchSize: 100,
343
343
  showStats: false,
344
344
  onProgress,
345
+ ...(job.fileId && { fileId: job.fileId }),
346
+ ...(job.table && { table: job.table }),
345
347
  };
346
348
 
347
349
  return identifyCommand.execute(options);
@@ -579,6 +579,9 @@ export class ScanCommand {
579
579
  * Normalize file record for database insertion
580
580
  * Stores paths with forward slashes for consistency but keeps them absolute
581
581
  * Sets likelySimplificado to true if file is a PDF and filename contains 'simp'
582
+ * Sets likelyInterAgencia to true if filename matches an inter-agency CFDI
583
+ * pattern (e.g. SICINGR*), so the API forces these XML/PDF through detection
584
+ * even though they lack the 'simp/pedim/covefact' heuristic.
582
585
  * @private
583
586
  */
584
587
  #normalizeFileRecord(filePath, fileStats, basePath, scanTimestamp) {
@@ -600,6 +603,17 @@ export class ScanCommand {
600
603
  const likelySimplificado =
601
604
  fileExtension === 'pdf' && /(simp|pedim|covefact)/i.test(fileName);
602
605
 
606
+ // Flag inter-agency CFDIs by filename so detection picks them up.
607
+ // Patterns are configurable via SCAN_INTER_AGENCIA_PATTERNS env var
608
+ // (see config.js). Only meaningful for PDF and XML.
609
+ let likelyInterAgencia = false;
610
+ if (fileExtension === 'pdf' || fileExtension === 'xml') {
611
+ const patterns = appConfig.scan.interAgenciaPatterns;
612
+ if (patterns && patterns.length > 0) {
613
+ likelyInterAgencia = patterns.some((re) => re.test(fileName));
614
+ }
615
+ }
616
+
603
617
  return {
604
618
  fileName,
605
619
  fileExtension,
@@ -610,6 +624,7 @@ export class ScanCommand {
610
624
  modifiedAt: fileStats.mtime.toISOString(),
611
625
  scanTimestamp,
612
626
  likelySimplificado,
627
+ likelyInterAgencia,
613
628
  };
614
629
  }
615
630
 
@@ -37,10 +37,10 @@ class Config {
37
37
  const __dirname = path.dirname(__filename);
38
38
  const packageJsonPath = path.resolve(__dirname, '../../package.json');
39
39
  const packageJson = JSON.parse(fs.readFileSync(packageJsonPath, 'utf-8'));
40
- return packageJson.version || '1.0.21';
40
+ return packageJson.version || '1.0.23';
41
41
  } catch (error) {
42
42
  console.warn('⚠️ Could not read package.json version, using fallback');
43
- return '1.0.21';
43
+ return '1.0.23';
44
44
  }
45
45
  }
46
46
 
@@ -294,6 +294,31 @@ class Config {
294
294
  .map((p) => p.trim())
295
295
  .filter(Boolean);
296
296
 
297
+ // Parse inter-agency CFDI filename patterns. Files whose basename matches
298
+ // any of these regex patterns are flagged at scan time (likelyInterAgencia)
299
+ // so the API forces them through detection and the factura_inter_agencia
300
+ // matcher can classify them. The push pipeline then excludes them (see
301
+ // NON_PUSHABLE_TYPES_SQL in arela-api). Comma-separated regex source list.
302
+ // Default: ^SICINGR β€” covers NORCOM's SICINGR70-NNNNNN(...).pdf/.XML files.
303
+ const defaultInterAgenciaPatterns = '^SICINGR';
304
+ const interAgenciaPatterns = (
305
+ process.env.SCAN_INTER_AGENCIA_PATTERNS || defaultInterAgenciaPatterns
306
+ )
307
+ .split(',')
308
+ .map((p) => p.trim())
309
+ .filter(Boolean)
310
+ .map((p) => {
311
+ try {
312
+ return new RegExp(p, 'i');
313
+ } catch (err) {
314
+ console.warn(
315
+ `⚠️ Invalid SCAN_INTER_AGENCIA_PATTERNS regex "${p}": ${err.message}`,
316
+ );
317
+ return null;
318
+ }
319
+ })
320
+ .filter(Boolean);
321
+
297
322
  // Generate table name if all components are available
298
323
  // Note: This is just for reference; actual table names are generated dynamically
299
324
  // in ScanCommand based on discovered directories and levels
@@ -312,6 +337,7 @@ class Config {
312
337
  basePathFull: basePathLabel, // Renamed for consistency
313
338
  tableName,
314
339
  excludePatterns,
340
+ interAgenciaPatterns,
315
341
  batchSize: parseInt(process.env.SCAN_BATCH_SIZE) || 2000,
316
342
  directoryLevel: parseInt(process.env.SCAN_DIRECTORY_LEVEL) || 0,
317
343
  };
@@ -1,10 +1,10 @@
1
1
  // Import all document type definitions
2
2
  import { dodaPdfDefinition } from './document-types/doda-pdf.js';
3
3
  import { dodaXmlDefinition } from './document-types/doda-xml.js';
4
+ import { facturaInterAgenciaDefinition } from './document-types/factura-inter-agencia.js';
4
5
  import { facturasComerciales } from './document-types/facturas-comerciales.js';
6
+ import { pedimentoCompletoXmlDefinition } from './document-types/pedimento-completo-xml.js';
5
7
  import { pedimentoCompletoDefinition } from './document-types/pedimento-completo.js';
6
- // TODO: enable XML pedimento detection β€” implementation ready in pedimento-completo-xml.js
7
- // import { pedimentoCompletoXmlDefinition } from './document-types/pedimento-completo-xml.js';
8
8
  import { pedimentoSimplificadoDefinition } from './document-types/pedimento-simplificado.js';
9
9
  import { proformaDefinition } from './document-types/proforma.js';
10
10
  import { supportDocumentDefinition } from './document-types/support-document.js';
@@ -45,14 +45,14 @@ export class DocumentTypeDefinition {
45
45
  const documentTypes = [
46
46
  pedimentoSimplificadoDefinition,
47
47
  pedimentoCompletoDefinition,
48
- // TODO: enable XML pedimento detection β€” uncomment the next line and the
49
- // matching import at the top of this file. All downstream code
50
- // (composeArelaPath, arela-api SQL filters, IdentifyCommand counters)
51
- // already accepts `pedimento_completo_xml`.
52
- // pedimentoCompletoXmlDefinition,
48
+ pedimentoCompletoXmlDefinition,
53
49
  supportDocumentDefinition,
54
50
  dodaPdfDefinition,
55
51
  dodaXmlDefinition,
52
+ // factura_inter_agencia MUST be evaluated BEFORE facturasComerciales
53
+ // because a NORCOM↔PALCO CFDI would also match the generic commercial
54
+ // invoice matcher. First match wins (see extractDocumentFields).
55
+ facturaInterAgenciaDefinition,
56
56
  facturasComerciales,
57
57
  // Add more document types here as needed
58
58
  ];
@@ -114,6 +114,14 @@ export function extractDocumentFields(source, fileExtension, filePath) {
114
114
  ? docType.extractPedimentoYear(source, fields, filePath)
115
115
  : null;
116
116
 
117
+ // Ensure downstream code (composeArelaPath) sees `numPedimento` as a
118
+ // field. PDF matchers add it via an explicit extractor; XML matchers
119
+ // compose it externally via extractNumPedimento. Backfill so both paths
120
+ // expose the same shape.
121
+ if (pedimento && !fields.some((f) => f.name === 'numPedimento')) {
122
+ fields.push(new FieldResult('numPedimento', true, pedimento));
123
+ }
124
+
117
125
  return [resolvedType, fields, pedimento, year];
118
126
  }
119
127
  }