npm - @arela/uploader - Versions diffs - 1.0.17 → 1.0.19 - Mend

@arela/uploader 1.0.17 → 1.0.19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

package/package.json +1 -1
package/src/commands/IdentifyCommand.js +6 -2
package/src/commands/PollWorkerCommand.js +65 -1
package/src/commands/PropagateCommand.js +121 -0
package/src/commands/PushCommand.js +4 -1
package/src/commands/ScanCommand.js +19 -5
package/src/commands/WorkerCommand.js +3 -0
package/src/config/config.js +26 -8
package/src/document-type-shared.js +6 -0
package/src/document-types/doda-pdf.js +121 -0
package/src/document-types/doda-xml.js +118 -0
package/src/document-types/facturas-comerciales.js +233 -0
package/src/services/ScanApiService.js +85 -13

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@arela/uploader",
-  "version": "1.0.17",
+  "version": "1.0.19",
   "description": "CLI to upload files/directories to Arela",
   "bin": {
     "arela": "./src/index.js"

package/src/commands/IdentifyCommand.js CHANGED Viewed

@@ -166,8 +166,11 @@ export class IdentifyCommand {
    * @returns {Promise<Object>} Processing statistics
    */
   async #processTable(tableName, batchSize, startTime) {
-    // Get detection statistics first
-    const initialStats = await this.scanApiService.getDetectionStats(tableName);
+    // Get detection statistics first (allTypes=true to count all supported file types)
+    const initialStats = await this.scanApiService.getDetectionStats(
+      tableName,
+      true,
+    );
     logger.info(`   Total PDFs: ${initialStats.totalPdfs}`);
     logger.info(`   Detected: ${initialStats.detected}`);
     logger.info(`   Pending: ${initialStats.pending}`);
@@ -224,6 +227,7 @@ export class IdentifyCommand {
         tableName,
         0,
         batchSize,
+        true, // allTypes: fetch all supported file types, not just likely-simplificado PDFs
       );
       if (!response.data || response.data.length === 0) {

package/src/commands/PollWorkerCommand.js CHANGED Viewed

@@ -1,8 +1,11 @@
+import path from 'path';
 import logger from '../services/LoggingService.js';
 import { PipelineApiService } from '../services/PipelineApiService.js';
 import appConfig from '../config/config.js';
 import ErrorHandler from '../errors/ErrorHandler.js';
+import { PathNormalizer } from '../utils/PathNormalizer.js';
 /**
  * Poll Worker Command Handler
@@ -234,13 +237,74 @@ export class PollWorkerCommand {
     // Override scan directories if provided
     if (job.scanDirectories && job.scanDirectories.length > 0) {
-      process.env.UPLOAD_SOURCES = job.scanDirectories.join('|');
+      const allAbsolute = job.scanDirectories.every((d) =>
+        PathNormalizer.isAbsolutePath(d),
+      );
+      if (allAbsolute) {
+        const ancestor = this.#commonAncestor(job.scanDirectories);
+        // Check if ancestor is meaningful (not just root or a drive letter)
+        const isUseful =
+          ancestor.length > 1 && !/^[a-zA-Z]:[/\\]?$/.test(ancestor);
+        if (isUseful) {
+          // Common ancestor found — set as base path, make sources relative
+          process.env.UPLOAD_BASE_PATH = ancestor;
+          process.env.ARELA_BASE_PATH_LABEL = ancestor;
+          const relativeSources = job.scanDirectories.map(
+            (d) => path.relative(ancestor, d) || '.',
+          );
+          process.env.UPLOAD_SOURCES = relativeSources.join('|');
+        } else {
+          // Cross-drive or no common ancestor — wildcard base, absolute sources
+          process.env.UPLOAD_BASE_PATH = '*';
+          process.env.ARELA_BASE_PATH_LABEL = '*';
+          process.env.UPLOAD_SOURCES = job.scanDirectories.join('|');
+        }
+      } else {
+        process.env.UPLOAD_SOURCES = job.scanDirectories.join('|');
+      }
     }
     // Override file extensions if provided
     if (job.fileExtensions && job.fileExtensions.length > 0) {
       process.env.UPLOAD_FILE_EXTENSIONS = job.fileExtensions.join(',');
     }
+    // Reload cached config from the updated env vars
+    appConfig.reloadScanConfig();
+  }
+  /**
+   * Compute the longest common ancestor directory of a list of absolute paths.
+   * Uses '/' as separator (PathNormalizer normalizes Windows \\ to /).
+   * @param {string[]} paths
+   * @returns {string}
+   */
+  #commonAncestor(paths) {
+    if (paths.length === 0) return '/';
+    if (paths.length === 1) return paths[0];
+    // Normalize separators so O:\exp\... becomes O:/exp/...
+    const normalized = paths.map((p) => PathNormalizer.normalizeSeparators(p));
+    const split = normalized.map((p) => p.split('/').filter(Boolean));
+    const minLen = Math.min(...split.map((s) => s.length));
+    const common = [];
+    for (let i = 0; i < minLen; i++) {
+      const seg = split[0][i];
+      if (split.every((s) => s[i] === seg)) {
+        common.push(seg);
+      } else {
+        break;
+      }
+    }
+    // Preserve drive letter format (e.g., 'O:' → 'O:/')
+    if (common.length > 0 && /^[a-zA-Z]:$/.test(common[0])) {
+      return common[0] + '/' + common.slice(1).join('/');
+    }
+    return '/' + common.join('/');
   }
   /**

package/src/commands/PropagateCommand.js CHANGED Viewed

@@ -100,6 +100,15 @@ export class PropagateCommand {
         totalStats.directoriesProcessed += stats.directoriesProcessed;
       }
+      // Step 5: Cross-table propagation
+      // Match files with detected_pedimento in one table to pedimento sources in other tables
+      const crossTableStats = await this.#processCrossTablePropagation(
+        scanConfig,
+        tables,
+      );
+      totalStats.filesUpdated += crossTableStats.filesUpdated;
+      totalStats.filesFailed += crossTableStats.filesFailed;
       // Show combined results
       const duration = ((Date.now() - this.stats.startTime) / 1000).toFixed(2);
       const filesPerSec =
@@ -439,6 +448,118 @@ export class PropagateCommand {
     };
   }
+  /**
+   * Cross-table propagation phase
+   * Matches files with detected_pedimento in one table to pedimento sources in other tables.
+   * This enables facturas (in a different directory/table) to get arela_path from their pedimento.
+   * @private
+   * @param {Object} scanConfig - Scan configuration with companySlug, serverId, basePathFull
+   * @param {Array} tables - All tables for this instance
+   * @returns {Promise<Object>} { filesUpdated, filesFailed }
+   */
+  async #processCrossTablePropagation(scanConfig, tables) {
+    console.log('\n🔗 Cross-table propagation phase...\n');
+    const stats = { filesUpdated: 0, filesFailed: 0 };
+    // Step 1: Fetch all pedimento sources across all tables
+    const pedimentoSources =
+      await this.scanApiService.fetchCrossTablePedimentoSources(
+        scanConfig.companySlug,
+        scanConfig.serverId,
+        scanConfig.basePathFull,
+      );
+    if (pedimentoSources.length === 0) {
+      console.log(
+        '   ℹ️  No pedimento sources found across tables. Skipping cross-table phase.\n',
+      );
+      return stats;
+    }
+    // Build a map: detected_pedimento → source info
+    const sourceMap = new Map();
+    for (const source of pedimentoSources) {
+      sourceMap.set(source.detected_pedimento, source);
+    }
+    console.log(
+      `   📋 Found ${sourceMap.size} unique pedimento sources across ${tables.length} tables`,
+    );
+    // Step 2: For each table, find orphan files (have pedimento, no arela_path)
+    let totalOrphans = 0;
+    for (const table of tables) {
+      let offset = 0;
+      let hasMore = true;
+      while (hasMore) {
+        const orphanFiles =
+          await this.scanApiService.fetchFilesWithPedimentoNoArelaPath(
+            table.tableName,
+            offset,
+            this.options.batchSize,
+          );
+        if (orphanFiles.length === 0) {
+          hasMore = false;
+          break;
+        }
+        // Step 3: Match orphans against pedimento source map
+        const updates = [];
+        for (const file of orphanFiles) {
+          const source = sourceMap.get(file.detected_pedimento);
+          if (source) {
+            updates.push({
+              id: file.id,
+              arelaPath: source.arela_path,
+              rfc: source.rfc,
+              detectedPedimento: file.detected_pedimento,
+              detectedPedimentoYear: source.detected_pedimento_year,
+              propagatedFromId: source.source_id,
+              propagatedFromTable: source.source_table,
+              propagationError: null,
+            });
+          }
+        }
+        totalOrphans += orphanFiles.length;
+        // Step 4: Batch update matched files
+        if (updates.length > 0) {
+          try {
+            const result = await this.scanApiService.batchUpdatePropagation(
+              table.tableName,
+              updates,
+            );
+            stats.filesUpdated += result.updated;
+            stats.filesFailed += result.errors;
+          } catch (error) {
+            logger.error(
+              `Failed cross-table update on ${table.tableName}:`,
+              error,
+            );
+            stats.filesFailed += updates.length;
+          }
+        }
+        offset += orphanFiles.length;
+        if (orphanFiles.length < this.options.batchSize) {
+          hasMore = false;
+        }
+      }
+    }
+    console.log(`   📊 Cross-table results:`);
+    console.log(`      Orphan files checked: ${totalOrphans}`);
+    console.log(`      Files updated: ${stats.filesUpdated}`);
+    console.log(`      Files failed: ${stats.filesFailed}\n`);
+    return stats;
+  }
   /**
    * Show final propagation statistics
    * @private

package/src/commands/PushCommand.js CHANGED Viewed

@@ -222,7 +222,10 @@ export class PushCommand {
     }
     const scanConfig = appConfig.getScanConfig();
-    if (!scanConfig.tableName) {
+    // When basePathFull is '*' (cross-directory wildcard), tableName is intentionally null.
+    // The push command fetches tables dynamically via getInstanceTables, so a static
+    // tableName is not required — only companySlug, serverId, and basePathFull matter.
+    if (!scanConfig.tableName && scanConfig.basePathFull !== '*') {
       errors.push('Could not generate table name from configuration');
     }

package/src/commands/ScanCommand.js CHANGED Viewed

@@ -49,7 +49,10 @@ export class ScanCommand {
       const scanConfig = appConfig.getScanConfig();
       // Ensure basePath is absolute for scan operations
-      const basePath = PathNormalizer.toAbsolutePath(appConfig.getBasePath());
+      // '*' is a wildcard sentinel for cross-drive pipelines — no real basePath
+      const rawBasePath = appConfig.getBasePath();
+      const basePath =
+        rawBasePath === '*' ? '*' : PathNormalizer.toAbsolutePath(rawBasePath);
       logger.info('🔍 Starting arela scan command');
       logger.info(`🎯 API Target: ${apiTarget}`);
@@ -213,8 +216,16 @@ export class ScanCommand {
     if (level === 0) {
       // Level 0: Create one entry per source
       return sources.map((source) => {
-        const sourcePath =
-          source === '.' ? basePath : path.resolve(basePath, source);
+        let sourcePath;
+        if (source === '.') {
+          sourcePath = basePath;
+        } else if (source.startsWith('..') || path.isAbsolute(source)) {
+          // Source is a relative-to-CWD path (e.g., from pipeline UI) or absolute
+          sourcePath = PathNormalizer.toAbsolutePath(source);
+        } else {
+          // Source is a subdirectory of basePath
+          sourcePath = path.resolve(basePath, source);
+        }
         // Label is relative path for display purposes only
         const label = source === '.' ? '' : source;
         return { path: sourcePath, label };
@@ -238,8 +249,11 @@ export class ScanCommand {
             // Source is current directory, use discovered path as-is
             directories.push(levelDir);
           } else {
-            // Append source to path
-            const combinedPath = path.resolve(levelDir.path, source);
+            // Resolve source: if it starts with ".." it's relative to CWD, not levelDir
+            const combinedPath =
+              source.startsWith('..') || path.isAbsolute(source)
+                ? PathNormalizer.toAbsolutePath(source)
+                : path.resolve(levelDir.path, source);
             // Only add if the combined path actually exists
             try {

package/src/commands/WorkerCommand.js CHANGED Viewed

@@ -290,6 +290,9 @@ export class WorkerCommand {
     if (scanConfig.directoryLevel !== undefined) {
       process.env.SCAN_DIRECTORY_LEVEL = String(scanConfig.directoryLevel);
     }
+    // Reload cached config from the updated env vars
+    appConfig.reloadScanConfig();
   }
   /**

package/src/config/config.js CHANGED Viewed

@@ -36,10 +36,10 @@ class Config {
       const __dirname = path.dirname(__filename);
       const packageJsonPath = path.resolve(__dirname, '../../package.json');
       const packageJson = JSON.parse(fs.readFileSync(packageJsonPath, 'utf-8'));
-      return packageJson.version || '1.0.17';
+      return packageJson.version || '1.0.19';
     } catch (error) {
       console.warn('⚠️ Could not read package.json version, using fallback');
-      return '1.0.17';
+      return '1.0.19';
     }
   }
@@ -263,14 +263,23 @@ class Config {
     // IMPORTANT: Always resolve to absolute path for uniqueness
     if (!basePathLabel && process.env.UPLOAD_BASE_PATH) {
       const basePath = process.env.UPLOAD_BASE_PATH;
-      // Resolve to absolute path (handles ../sample vs ./sample correctly)
-      // Note: toAbsolutePath handles Windows paths (O:\...) even on macOS/Linux
-      basePathLabel = PathNormalizer.toAbsolutePath(basePath);
+      // '*' is a wildcard sentinel for cross-drive scenarios — keep as-is
+      if (basePath === '*') {
+        basePathLabel = '*';
+      } else {
+        // Resolve to absolute path (handles ../sample vs ./sample correctly)
+        // Note: toAbsolutePath handles Windows paths (O:\...) even on macOS/Linux
+        basePathLabel = PathNormalizer.toAbsolutePath(basePath);
+      }
     }
-    // If basePathLabel is provided, ensure it's absolute
+    // If basePathLabel is provided, ensure it's absolute (skip wildcard)
     // Use PathNormalizer.isAbsolutePath for cross-platform Windows path detection
-    if (basePathLabel && !PathNormalizer.isAbsolutePath(basePathLabel)) {
+    if (
+      basePathLabel &&
+      basePathLabel !== '*' &&
+      !PathNormalizer.isAbsolutePath(basePathLabel)
+    ) {
       basePathLabel = PathNormalizer.toAbsolutePath(basePathLabel);
     }
@@ -288,7 +297,7 @@ class Config {
     // Note: This is just for reference; actual table names are generated dynamically
     // in ScanCommand based on discovered directories and levels
     let tableName = null;
-    if (companySlug && serverId && basePathLabel) {
+    if (companySlug && serverId && basePathLabel && basePathLabel !== '*') {
       tableName = PathNormalizer.generateTableName({
         companySlug,
         serverId,
@@ -658,6 +667,15 @@ class Config {
     };
   }
+  /**
+   * Reload upload and scan config from current process.env values.
+   * Must be called after modifying env vars at runtime (e.g., PollWorkerCommand).
+   */
+  reloadScanConfig() {
+    this.upload = this.#loadUploadConfig();
+    this.scan = this.#loadScanConfig();
+  }
   /**
    * Validate watch configuration
    * @param {string[]} directories - Directories to validate

package/src/document-type-shared.js CHANGED Viewed

@@ -1,4 +1,7 @@
 // Import all document type definitions
+import { dodaPdfDefinition } from './document-types/doda-pdf.js';
+import { dodaXmlDefinition } from './document-types/doda-xml.js';
+import { facturasComerciales } from './document-types/facturas-comerciales.js';
 import { pedimentoSimplificadoDefinition } from './document-types/pedimento-simplificado.js';
 import { proformaDefinition } from './document-types/proforma.js';
 import { supportDocumentDefinition } from './document-types/support-document.js';
@@ -39,6 +42,9 @@ export class DocumentTypeDefinition {
 const documentTypes = [
   pedimentoSimplificadoDefinition,
   supportDocumentDefinition,
+  dodaPdfDefinition,
+  dodaXmlDefinition,
+  facturasComerciales,
   // Add more document types here as needed
 ];

package/src/document-types/doda-pdf.js ADDED Viewed

@@ -0,0 +1,121 @@
+import { FieldResult } from '../document-type-shared.js';
+/**
+ * DODA PDF Document Type Definition
+ * Detects DODA (Documento de Operación para Despacho Aduanero) in PDF format.
+ * DODAs are validation documents generated by VUCEM for customs clearance.
+ * They reside in the SAME directory as the pedimento_simplificado,
+ * so within-table propagation handles arela_path assignment.
+ */
+export const dodaPdfDefinition = {
+  type: 'doda_pdf',
+  extensions: ['pdf'],
+  match: (source) => {
+    // DODA PDFs contain specific markers from VUCEM/customs systems
+    const markers = [
+      /DOCUMENTO DE OPERACI[OÓ]N PARA DESPACHO ADUANERO/i,
+      /DODA/i,
+      /VUCEM/i,
+    ];
+    // Require the primary DODA marker, or at least 2 of the secondary markers
+    const primaryMatch = markers[0].test(source);
+    if (primaryMatch) return true;
+    const secondaryMatches = markers
+      .slice(1)
+      .filter((m) => m.test(source)).length;
+    // Also check for pedimento number + DODA-specific context
+    const hasPedimentoNumber = /\d{2}\s?\d{2}\s?\d{4}\s?\d{7}/.test(source);
+    const hasDodaContext =
+      /despacho aduanero|operaci[oó]n aduanera|validaci[oó]n/i.test(source);
+    return (
+      (secondaryMatches >= 2 && hasPedimentoNumber) ||
+      (hasDodaContext && hasPedimentoNumber && secondaryMatches >= 1)
+    );
+  },
+  extractNumPedimento: (source, fields) => {
+    return fields?.find((f) => f.name === 'numPedimento')?.value ?? null;
+  },
+  extractPedimentoYear: (source, fields) => {
+    const numPedimento = fields?.find((f) => f.name === 'numPedimento')?.value;
+    if (!numPedimento) return null;
+    const year = parseInt(numPedimento.substring(0, 2), 10);
+    return year < 50 ? year + 2000 : year + 1900;
+  },
+  extractors: [
+    {
+      field: 'numPedimento',
+      extract: (source) => {
+        // Try cadena original format: ||aduana|patente|...|pedimentos|integracion|...
+        // e.g. ||070|3429|2|4009029,4008062|109335668|A231|
+        const cadenaMatch = source.match(
+          /\|\|(\d{2,3})\|(\d{4})\|\d\|([\d,]+)\|(\d+)\|/,
+        );
+        if (cadenaMatch) {
+          const rawAduana = cadenaMatch[1];
+          // 3-digit code = aduana(2) + section(1), e.g. 070 → aduana 07
+          const aduana =
+            rawAduana.length === 3
+              ? rawAduana.slice(0, 2)
+              : rawAduana.padStart(2, '0');
+          const patente = cadenaMatch[2];
+          const pedNums = cadenaMatch[3].split(',');
+          // Use first pedimento number, pad to 7 digits
+          const pedNum = pedNums[0].padStart(7, '0');
+          // Extract year from date in cadena
+          const yearMatch = source.match(/(\d{4})-\d{2}-\d{2}/);
+          const year = yearMatch
+            ? yearMatch[1].slice(-2)
+            : new Date().getFullYear().toString().slice(-2);
+          const full = `${year}${aduana}${patente}${pedNum}`;
+          if (full.length === 15) {
+            return new FieldResult('numPedimento', true, full);
+          }
+        }
+        // Try dash-separated format: YY-AA-PPPP-NNNNNNN
+        const dashMatch = source.match(/(\d{2})-(\d{2})-(\d{4})-(\d{7})/);
+        if (dashMatch) {
+          const full =
+            dashMatch[1] + dashMatch[2] + dashMatch[3] + dashMatch[4];
+          return new FieldResult('numPedimento', true, full);
+        }
+        // Try 15-digit near pedimento keyword (avoid matching sello digital)
+        const contextMatch = source.match(
+          /pedimento[^\d]{0,30}(\d{2}\s?\d{2}\s?\d{4}\s?\d{7})/i,
+        );
+        if (contextMatch) {
+          return new FieldResult(
+            'numPedimento',
+            true,
+            contextMatch[1].replace(/\s/g, ''),
+          );
+        }
+        return new FieldResult('numPedimento', false, null);
+      },
+    },
+    {
+      field: 'rfc',
+      extract: (source) => {
+        // Mexican RFC: 3-4 letters + 6 digits + 3 alphanumeric
+        const match = source.match(/\b([A-ZÑ&]{3,4}\d{6}[A-Z0-9]{3})\b/);
+        return new FieldResult('rfc', !!match, match ? match[1] : null);
+      },
+    },
+    {
+      field: 'aduana',
+      extract: (source) => {
+        const match = source.match(/aduana[:\s]*(\d{2,4})/i);
+        return new FieldResult('aduana', !!match, match ? match[1] : null);
+      },
+    },
+  ],
+};

package/src/document-types/doda-xml.js ADDED Viewed

@@ -0,0 +1,118 @@
+import { FieldResult } from '../document-type-shared.js';
+/**
+ * DODA XML Document Type Definition
+ * Detects DODA (Documento de Operación para Despacho Aduanero) in XML format.
+ * XML DODAs contain structured data from VUCEM/customs systems.
+ * They reside in the SAME directory as the pedimento_simplificado,
+ * so within-table propagation handles arela_path assignment.
+ */
+export const dodaXmlDefinition = {
+  type: 'doda_xml',
+  extensions: ['xml'],
+  match: (source) => {
+    // DODA XML files contain specific XML tags/namespaces
+    const xmlMarkers = [
+      /documentoOperacion/i,
+      /despachoAduanero/i,
+      /<doda\b/i,
+      /xmlns[^"]*doda/i,
+      /VUCEM/i,
+    ];
+    // Also check for pedimento-related XML structure
+    const pedimentoXmlMarkers = [
+      /numPedimento/i,
+      /patenteAduanal/i,
+      /aduanaDespacho/i,
+      /tipoOperacion/i,
+    ];
+    const dodaMatches = xmlMarkers.filter((m) => m.test(source)).length;
+    const pedimentoMatches = pedimentoXmlMarkers.filter((m) =>
+      m.test(source),
+    ).length;
+    // Match if: has DODA-specific markers, or combination of pedimento markers with XML structure
+    return (
+      dodaMatches >= 1 || (pedimentoMatches >= 3 && /<\?xml/i.test(source))
+    );
+  },
+  extractNumPedimento: (source, fields) => {
+    return fields?.find((f) => f.name === 'numPedimento')?.value ?? null;
+  },
+  extractPedimentoYear: (source, fields) => {
+    const numPedimento = fields?.find((f) => f.name === 'numPedimento')?.value;
+    if (!numPedimento) {
+      // Try to extract year from date in XML
+      const dateMatch = source.match(/(\d{4})-\d{2}-\d{2}/);
+      if (dateMatch) {
+        return parseInt(dateMatch[1], 10);
+      }
+      return null;
+    }
+    const year = parseInt(numPedimento.substring(0, 2), 10);
+    return year < 50 ? year + 2000 : year + 1900;
+  },
+  extractors: [
+    {
+      field: 'numPedimento',
+      extract: (source) => {
+        // Try XML tag format first
+        const xmlMatch = source.match(/numPedimento[^>]*>(\d{15})<\/[^>]+>/i);
+        if (xmlMatch) {
+          return new FieldResult('numPedimento', true, xmlMatch[1]);
+        }
+        // Try attribute format
+        const attrMatch = source.match(
+          /numPedimento[=:"]\s*["']?(\d{15})["']?/i,
+        );
+        if (attrMatch) {
+          return new FieldResult('numPedimento', true, attrMatch[1]);
+        }
+        // Fallback: 15-digit pattern
+        const fallback = source.match(/\d{2}\s?\d{2}\s?\d{4}\s?\d{7}/);
+        return new FieldResult(
+          'numPedimento',
+          !!fallback,
+          fallback ? fallback[0].replace(/\s/g, '') : null,
+        );
+      },
+    },
+    {
+      field: 'rfc',
+      extract: (source) => {
+        // Try XML tag format
+        const xmlMatch = source.match(
+          /rfc[^>]*>([A-ZÑ&]{3,4}\d{6}[A-Z0-9]{3})<\/[^>]+>/i,
+        );
+        if (xmlMatch) {
+          return new FieldResult('rfc', true, xmlMatch[1]);
+        }
+        // Fallback: generic RFC pattern
+        const match = source.match(/\b([A-ZÑ&]{3,4}\d{6}[A-Z0-9]{3})\b/);
+        return new FieldResult('rfc', !!match, match ? match[1] : null);
+      },
+    },
+    {
+      field: 'patente',
+      extract: (source) => {
+        const match = source.match(/patenteAduanal[^>]*>(\d{4})<\/[^>]+>/i);
+        return new FieldResult('patente', !!match, match ? match[1] : null);
+      },
+    },
+    {
+      field: 'aduana',
+      extract: (source) => {
+        const match = source.match(/aduanaDespacho[^>]*>(\d{2,4})<\/[^>]+>/i);
+        return new FieldResult('aduana', !!match, match ? match[1] : null);
+      },
+    },
+  ],
+};

package/src/document-types/facturas-comerciales.js ADDED Viewed

@@ -0,0 +1,233 @@
+import { FieldResult } from '../document-type-shared.js';
+/**
+ * Facturas Comerciales Document Type Definition
+ * Detects commercial invoices (facturas) related to customs operations.
+ *
+ * CRITICAL: Facturas reside in a DIFFERENT directory than the pedimento_simplificado,
+ * creating a different CLI scan table. Cross-table propagation uses `detected_pedimento`
+ * to link facturas to their corresponding pedimento and assign arela_path.
+ *
+ * Supported formats: PDF (scanned invoices), XML (CFDI/electronic invoices)
+ */
+export const facturasComerciales = {
+  type: 'factura_comercial',
+  extensions: ['pdf', 'xml'],
+  match: (source) => {
+    // CFDI / electronic invoice markers (XML)
+    const cfdiMarkers = [
+      /cfdi:Comprobante/i,
+      /xmlns:cfdi/i,
+      /TipoDeComprobante/i,
+      /timbreFiscalDigital/i,
+      /SelloSAT/i,
+    ];
+    // PDF invoice markers
+    const invoiceMarkers = [
+      /factura\s*(comercial|de\s*venta|de\s*exportaci[oó]n)?/i,
+      /commercial\s*invoice/i,
+      /invoice\s*number/i,
+      /n[uú]mero\s*de\s*factura/i,
+    ];
+    // Customs-related invoice context
+    const customsContext = [
+      /pedimento/i,
+      /aduana/i,
+      /importaci[oó]n|exportaci[oó]n/i,
+      /despacho\s*aduanero/i,
+      /fracci[oó]n\s*arancelaria/i,
+    ];
+    const cfdiMatches = cfdiMarkers.filter((m) => m.test(source)).length;
+    const invoiceMatches = invoiceMarkers.filter((m) => m.test(source)).length;
+    const customsMatches = customsContext.filter((m) => m.test(source)).length;
+    // Match if: CFDI structure (>=2 markers), or invoice + customs context
+    return cfdiMatches >= 2 || (invoiceMatches >= 1 && customsMatches >= 1);
+  },
+  extractNumPedimento: (source, fields) => {
+    return fields?.find((f) => f.name === 'numPedimento')?.value ?? null;
+  },
+  extractPedimentoYear: (source, fields) => {
+    const numPedimento = fields?.find((f) => f.name === 'numPedimento')?.value;
+    if (numPedimento) {
+      const year = parseInt(numPedimento.substring(0, 2), 10);
+      return year < 50 ? year + 2000 : year + 1900;
+    }
+    // Try invoice date field
+    const invoiceDate = fields?.find((f) => f.name === 'invoiceDate')?.value;
+    if (invoiceDate) {
+      const yearMatch = invoiceDate.match(/^(\d{4})/);
+      if (yearMatch) return parseInt(yearMatch[1], 10);
+    }
+    // Try to extract year from any date in source
+    const dateMatch = source.match(/(\d{4})-\d{2}-\d{2}/);
+    if (dateMatch) return parseInt(dateMatch[1], 10);
+    const mmmMatch = source.match(
+      /\d{1,2}\/(?:ene|feb|mar|abr|may|jun|jul|ago|sep|oct|nov|dic)\/(\d{4})/i,
+    );
+    if (mmmMatch) return parseInt(mmmMatch[1], 10);
+    return null;
+  },
+  extractors: [
+    {
+      field: 'numPedimento',
+      extract: (source) => {
+        // Try dash-separated format: YY-AA-PPPP-NNNNNNN (most common in Mexican import invoices)
+        // e.g. "26-07-3429-6016477" — may be wrapped across lines in PDF text
+        const dashMatch = source.match(
+          /(\d{2})-(\d{2})-(\d{4})-(\d{1,7})\s*(\d*)/,
+        );
+        if (dashMatch) {
+          const lastPart = (dashMatch[4] + dashMatch[5]).substring(0, 7);
+          if (lastPart.length === 7) {
+            const full = dashMatch[1] + dashMatch[2] + dashMatch[3] + lastPart;
+            return new FieldResult('numPedimento', true, full);
+          }
+        }
+        // Try CFDI XML: NumPedimento attribute or InformacionAduanera
+        const cfdiMatch = source.match(
+          /(?:NumPedimento|NumeroPedimento)[=:"]\s*["']?(\d{15})["']?/i,
+        );
+        if (cfdiMatch) {
+          return new FieldResult('numPedimento', true, cfdiMatch[1]);
+        }
+        // Try InformacionAduanera tag
+        const aduaneraMatch = source.match(
+          /InformacionAduanera[^>]*NumeroPedimento[=:"]\s*["']?(\d{15})["']?/i,
+        );
+        if (aduaneraMatch) {
+          return new FieldResult('numPedimento', true, aduaneraMatch[1]);
+        }
+        // Try generic XML tag
+        const xmlMatch = source.match(/pedimento[^>]*>(\d{15})<\/[^>]+>/i);
+        if (xmlMatch) {
+          return new FieldResult('numPedimento', true, xmlMatch[1]);
+        }
+        // Try space-separated near pedimento keyword
+        const textMatch = source.match(
+          /pedimento[^\d]{0,30}(\d{2}\s?\d{2}\s?\d{4}\s?\d{7})/i,
+        );
+        if (textMatch) {
+          return new FieldResult(
+            'numPedimento',
+            true,
+            textMatch[1].replace(/\s/g, ''),
+          );
+        }
+        return new FieldResult('numPedimento', false, null);
+      },
+    },
+    {
+      field: 'rfc',
+      extract: (source) => {
+        // Try CFDI Rfc attribute (emisor)
+        const cfdiMatch = source.match(
+          /Emisor[^>]*Rfc[=:"]\s*["']?([A-ZÑ&]{3,4}\d{6}[A-Z0-9]{3})["']?/i,
+        );
+        if (cfdiMatch) {
+          return new FieldResult('rfc', true, cfdiMatch[1]);
+        }
+        // Try Receptor Rfc (for import invoices, receptor is the importer)
+        const receptorMatch = source.match(
+          /Receptor[^>]*Rfc[=:"]\s*["']?([A-ZÑ&]{3,4}\d{6}[A-Z0-9]{3})["']?/i,
+        );
+        if (receptorMatch) {
+          return new FieldResult('rfc', true, receptorMatch[1]);
+        }
+        // Fallback: generic RFC pattern
+        const match = source.match(/\b([A-ZÑ&]{3,4}\d{6}[A-Z0-9]{3})\b/);
+        return new FieldResult('rfc', !!match, match ? match[1] : null);
+      },
+    },
+    {
+      field: 'invoiceNumber',
+      extract: (source) => {
+        // CFDI Folio
+        const folioMatch = source.match(/Folio[=:"]\s*["']?([A-Z0-9-]+)["']?/i);
+        if (folioMatch) {
+          return new FieldResult('invoiceNumber', true, folioMatch[1]);
+        }
+        // Header line followed by invoice number on next line
+        // e.g. "FACTURA FECHA ADUANA...\nMIB260064 02/mar/2026..."
+        const headerMatch = source.match(
+          /FACTURA\s+FECHA[^\n]*\n([A-Z]{2,5}\d{4,10})/i,
+        );
+        if (headerMatch) {
+          return new FieldResult('invoiceNumber', true, headerMatch[1]);
+        }
+        // Text-based invoice number
+        const textMatch = source.match(
+          /(?:factura|invoice)\s*(?:no\.?|number|#|num\.?)?[:\s]*([A-Z]{2,5}\d{4,10})/i,
+        );
+        return new FieldResult(
+          'invoiceNumber',
+          !!textMatch,
+          textMatch ? textMatch[1] : null,
+        );
+      },
+    },
+    {
+      field: 'invoiceDate',
+      extract: (source) => {
+        // CFDI Fecha attribute
+        const cfdiMatch = source.match(
+          /Fecha[=:"]\s*["']?(\d{4}-\d{2}-\d{2})/i,
+        );
+        if (cfdiMatch) {
+          return new FieldResult('invoiceDate', true, cfdiMatch[1]);
+        }
+        // DD/MMM/YYYY format (e.g. "02/mar/2026")
+        const mmmMonths = {
+          ene: '01',
+          feb: '02',
+          mar: '03',
+          abr: '04',
+          may: '05',
+          jun: '06',
+          jul: '07',
+          ago: '08',
+          sep: '09',
+          oct: '10',
+          nov: '11',
+          dic: '12',
+        };
+        const mmmMatch = source.match(
+          /(\d{1,2})\/(ene|feb|mar|abr|may|jun|jul|ago|sep|oct|nov|dic)\/(\d{4})/i,
+        );
+        if (mmmMatch) {
+          const day = mmmMatch[1].padStart(2, '0');
+          const month = mmmMonths[mmmMatch[2].toLowerCase()];
+          return new FieldResult(
+            'invoiceDate',
+            true,
+            `${mmmMatch[3]}-${month}-${day}`,
+          );
+        }
+        // ISO date
+        const isoMatch = source.match(/(\d{4}-\d{2}-\d{2})/);
+        return new FieldResult(
+          'invoiceDate',
+          !!isoMatch,
+          isoMatch ? isoMatch[1] : null,
+        );
+      },
+    },
+  ],
+};

package/src/services/ScanApiService.js CHANGED Viewed

@@ -368,24 +368,32 @@ export class ScanApiService {
   // ============================================================================
   /**
-   * Fetch PDF files for detection
+   * Fetch files for detection
    * @param {string} tableName - Target table name
    * @param {number} offset - Pagination offset
    * @param {number} limit - Number of records to fetch
+   * @param {boolean} allTypes - When true, fetch all supported file types instead of just likely-simplificado PDFs
    * @returns {Promise<Object>} { data: Array, hasMore: boolean }
    */
-  async fetchPdfsForDetection(tableName, offset = 0, limit = 100) {
+  async fetchPdfsForDetection(
+    tableName,
+    offset = 0,
+    limit = 100,
+    allTypes = false,
+  ) {
     logger.debug(
-      `Fetching PDFs for detection (offset: ${offset}, limit: ${limit})...`,
+      `Fetching files for detection (offset: ${offset}, limit: ${limit}, allTypes: ${allTypes})...`,
     );
-    const result = await this.#request(
-      `/api/uploader/scan/pdfs-for-detection?tableName=${encodeURIComponent(tableName)}&offset=${offset}&limit=${limit}`,
-      'GET',
-    );
+    let url = `/api/uploader/scan/pdfs-for-detection?tableName=${encodeURIComponent(tableName)}&offset=${offset}&limit=${limit}`;
+    if (allTypes) {
+      url += '&allTypes=true';
+    }
+    const result = await this.#request(url, 'GET');
     logger.debug(
-      `Fetched ${result.data.length} PDFs, hasMore: ${result.hasMore}`,
+      `Fetched ${result.data.length} files, hasMore: ${result.hasMore}`,
     );
     return result;
   }
@@ -420,13 +428,15 @@ export class ScanApiService {
    * @param {string} tableName - Target table name
    * @returns {Promise<Object>} { totalPdfs, detected, pending, errors }
    */
-  async getDetectionStats(tableName) {
+  async getDetectionStats(tableName, allTypes = false) {
     logger.debug('Fetching detection statistics...');
-    const result = await this.#request(
-      `/api/uploader/scan/detection-stats?tableName=${encodeURIComponent(tableName)}`,
-      'GET',
-    );
+    let url = `/api/uploader/scan/detection-stats?tableName=${encodeURIComponent(tableName)}`;
+    if (allTypes) {
+      url += '&allTypes=true';
+    }
+    const result = await this.#request(url, 'GET');
     logger.debug(
       `Detection stats: ${result.detected}/${result.totalPdfs} detected, ${result.pending} pending`,
@@ -554,6 +564,68 @@ export class ScanApiService {
     return result;
   }
+  // ============================================================================
+  // CROSS-TABLE PROPAGATION
+  // ============================================================================
+  /**
+   * Fetch pedimento sources across all tables for cross-table propagation
+   * @param {string} companySlug - Company slug
+   * @param {string} serverId - Server ID
+   * @param {string} basePathFull - Base path
+   * @returns {Promise<Array>} Array of pedimento sources with source_table info
+   */
+  async fetchCrossTablePedimentoSources(companySlug, serverId, basePathFull) {
+    logger.debug('Fetching cross-table pedimento sources...');
+    const result = await this.#request(
+      `/api/uploader/scan/cross-table-pedimento-sources?companySlug=${encodeURIComponent(companySlug)}&serverId=${encodeURIComponent(serverId)}&basePathFull=${encodeURIComponent(basePathFull)}`,
+      'GET',
+    );
+    if (!Array.isArray(result)) {
+      logger.error(
+        'fetchCrossTablePedimentoSources: Expected array, got:',
+        typeof result,
+      );
+      return [];
+    }
+    logger.debug(`Fetched ${result.length} cross-table pedimento sources`);
+    return result;
+  }
+  /**
+   * Fetch files with detected_pedimento but no arela_path (candidates for cross-table propagation)
+   * @param {string} tableName - Target table name
+   * @param {number} offset - Pagination offset
+   * @param {number} limit - Number of records to fetch
+   * @returns {Promise<Array>} Array of files needing cross-table propagation
+   */
+  async fetchFilesWithPedimentoNoArelaPath(tableName, offset = 0, limit = 100) {
+    logger.debug(
+      `Fetching files with pedimento but no arela_path (offset: ${offset}, limit: ${limit})...`,
+    );
+    const result = await this.#request(
+      `/api/uploader/scan/files-with-pedimento-no-arela-path?tableName=${encodeURIComponent(tableName)}&offset=${offset}&limit=${limit}`,
+      'GET',
+    );
+    if (!Array.isArray(result)) {
+      logger.error(
+        'fetchFilesWithPedimentoNoArelaPath: Expected array, got:',
+        typeof result,
+      );
+      return [];
+    }
+    logger.debug(
+      `Fetched ${result.length} files needing cross-table propagation`,
+    );
+    return result;
+  }
   // ============================================================================
   // PUSH OPERATIONS
   // ============================================================================