@arela/uploader 1.0.23 → 1.0.24

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@arela/uploader",
3
- "version": "1.0.23",
3
+ "version": "1.0.24",
4
4
  "description": "CLI to upload files/directories to Arela",
5
5
  "bin": {
6
6
  "arela": "./src/index.js"
@@ -0,0 +1,243 @@
1
+ /**
2
+ * Scoring engine validation harness (PROTOTYPE).
3
+ *
4
+ * Runs the CURRENT first-match-wins engine (`extractDocumentFields`) and the new
5
+ * best-match scoring engine (`classifyDocument`) over the same corpus and prints
6
+ * a side-by-side comparison so we can confirm best-match reproduces (or
7
+ * improves on) the current behaviour before wiring it into the pipeline.
8
+ *
9
+ * Usage:
10
+ * node scripts/scoring-compare.js # built-in synthetic samples
11
+ * node scripts/scoring-compare.js <folder> # + real .pdf/.xml/.txt files
12
+ *
13
+ * The built-in samples include the `factura_inter_agencia` vs
14
+ * `factura_comercial` case, which the current engine only resolves via
15
+ * registration order — the harness shows best-match resolving it by score,
16
+ * independent of matcher order.
17
+ */
18
+ import fs from 'fs';
19
+ import path from 'path';
20
+
21
+ import { extractDocumentFields } from '../src/document-type-shared.js';
22
+ import FileDetectionService from '../src/file-detection.js';
23
+ import { classifyDocument, scoreAll } from '../src/scoring/scoring-engine.js';
24
+ import { scoringMatchers } from '../src/scoring/matchers-seed.js';
25
+
26
+ // --------------------------- synthetic corpus -------------------------------
27
+ // Compact, representative texts that trigger the relevant clues. Real pdf-parse
28
+ // output is messier — pass a folder to validate against production documents.
29
+ const SAMPLES = [
30
+ {
31
+ name: 'simplificado-paid',
32
+ extension: 'pdf',
33
+ expected: 'pedimento_simplificado',
34
+ text: `FORMA SIMPLIFICADA DEL PEDIMENTO
35
+ NUM. PEDIMENTO: 26 07 3429 6000079
36
+ CVE. PEDIMENTO: A1
37
+ T. OPER: IMP
38
+ RFC: CSM9204097Q1
39
+ FECHA DE PAGO: 04/03/2026
40
+ *** PAGO ELECTRONICO ***`,
41
+ },
42
+ {
43
+ name: 'simplificado-unpaid (proforma)',
44
+ extension: 'pdf',
45
+ expected: 'proforma',
46
+ text: `FORMA SIMPLIFICADA DE PEDIMENTO
47
+ NUM. PEDIMENTO: 26 07 3429 6000080
48
+ CVE. PEDIMENTO: A1
49
+ T. OPER: IMP
50
+ RFC: CSM9204097Q1
51
+ *** NO PAGADO ***`,
52
+ },
53
+ {
54
+ name: 'completo',
55
+ extension: 'pdf',
56
+ expected: 'pedimento_completo',
57
+ text: `NUM. PEDIMENTO: 26 07 3429 2002089
58
+ CVE. PEDIMENTO: A1
59
+ T. OPER: IMP
60
+ SEGUNDA COPIA TRANSPORTISTA
61
+ CERTIFICACIONES
62
+ CUADRO DE LIQUIDACION
63
+ *** PAGO ELECTRONICO ***
64
+ FECHA DE PAGO: 02/03/2026`,
65
+ },
66
+ {
67
+ name: 'completo-xml',
68
+ extension: 'xml',
69
+ filePath: '/tmp/260734296016642.xml',
70
+ expected: 'pedimento_completo_xml',
71
+ text: `<?xml version="1.0"?>
72
+ <ns2:consultarPedimentoCompletoRespuesta>
73
+ <ns2:pedimento>6016642</ns2:pedimento>
74
+ <ns2:aduanaEntradaSalida><ns2:clave>70</ns2:clave></ns2:aduanaEntradaSalida>
75
+ <ns2:fechas><ns2:clave>2</ns2:clave><ns2:fecha>2026-03-02-06:00</ns2:fecha></ns2:fechas>
76
+ <ns2:fechas><ns2:clave>5</ns2:clave><ns2:fecha>2026-02-20-06:00</ns2:fecha></ns2:fechas>
77
+ <ns2:rfc>CSM9204097Q1</ns2:rfc>
78
+ </ns2:consultarPedimentoCompletoRespuesta>`,
79
+ },
80
+ {
81
+ name: 'doda-pdf',
82
+ extension: 'pdf',
83
+ expected: 'doda_pdf',
84
+ text: `DOCUMENTO DE OPERACION PARA DESPACHO ADUANERO
85
+ DODA
86
+ VUCEM
87
+ ||070|3429|2|4009029|109335668|A231|
88
+ 2026-03-02`,
89
+ },
90
+ {
91
+ name: 'doda-xml',
92
+ extension: 'xml',
93
+ expected: 'doda_xml',
94
+ text: `<?xml version="1.0"?>
95
+ <documentoOperacion>
96
+ <numPedimento>260734292002089</numPedimento>
97
+ <patenteAduanal>3429</patenteAduanal>
98
+ <aduanaDespacho>07</aduanaDespacho>
99
+ </documentoOperacion>`,
100
+ },
101
+ {
102
+ name: 'inter-agencia (vs comercial)',
103
+ extension: 'xml',
104
+ expected: 'factura_inter_agencia',
105
+ text: `<cfdi:Comprobante xmlns:cfdi="..." TipoDeComprobante="I">
106
+ <cfdi:Emisor Rfc="NAA120215F20"/>
107
+ <cfdi:Receptor Rfc="PCC1008161WA"/>
108
+ <cfdi:Concepto ClaveProdServ="78141502" Descripcion="Servicios de agente aduanal"/>
109
+ </cfdi:Comprobante>`,
110
+ },
111
+ {
112
+ name: 'factura-comercial',
113
+ extension: 'xml',
114
+ expected: 'factura_comercial',
115
+ text: `<cfdi:Comprobante xmlns:cfdi="..." TipoDeComprobante="I">
116
+ <cfdi:Emisor Rfc="ABC010101AB1"/>
117
+ <cfdi:Receptor Rfc="XYZ020202CD2"/>
118
+ <tfd:TimbreFiscalDigital/>
119
+ pedimento 26 07 3429 6016477
120
+ </cfdi:Comprobante>`,
121
+ },
122
+ {
123
+ name: 'support-document',
124
+ extension: 'xml',
125
+ expected: 'support_document',
126
+ text: `<?xml version="1.0"?>
127
+ <soapenv:Envelope xmlns:soapenv="http://schemas.xmlsoap.org/soap/envelope/">
128
+ <oxml:tipoOperacion>IMP</oxml:tipoOperacion>
129
+ <oxml:patenteAduanal>3429</oxml:patenteAduanal>
130
+ </soapenv:Envelope>`,
131
+ },
132
+ ];
133
+
134
+ // --------------------------- comparison -------------------------------------
135
+ function firstMatchType(source, extension, filePath) {
136
+ const [type] = extractDocumentFields(source, extension, filePath);
137
+ return type;
138
+ }
139
+
140
+ function bestMatchResult(source, extension, filePath) {
141
+ return classifyDocument(scoringMatchers, { source, extension, filePath });
142
+ }
143
+
144
+ function topCandidates(source, extension, filePath, n = 3) {
145
+ return scoreAll(scoringMatchers, {
146
+ source,
147
+ extension,
148
+ fileName: filePath ? path.basename(filePath) : '',
149
+ })
150
+ .slice(0, n)
151
+ .map((c) => `${c.documentType}:${c.score}`)
152
+ .join(', ');
153
+ }
154
+
155
+ function row(name, first, best, expected) {
156
+ const agree = first === best ? 'sí ' : 'NO ';
157
+ const vsExp = expected ? (best === expected ? 'ok ' : '⚠️ ') : ' ';
158
+ return (
159
+ `${name.padEnd(34)} first=${String(first).padEnd(24)} ` +
160
+ `best=${String(best).padEnd(24)} coinciden=${agree} esperado=${vsExp}`
161
+ );
162
+ }
163
+
164
+ async function run() {
165
+ const folder = process.argv[2];
166
+ let total = 0;
167
+ let disagreements = 0;
168
+
169
+ console.log('\n=== Muestras sintéticas ===');
170
+ for (const s of SAMPLES) {
171
+ const first = firstMatchType(s.text, s.extension, s.filePath);
172
+ const best = bestMatchResult(s.text, s.extension, s.filePath).detectedType;
173
+ total++;
174
+ if (first !== best) disagreements++;
175
+ console.log(row(s.name, first, best, s.expected));
176
+ }
177
+
178
+ // Order-independence demonstration for the inter-agencia/comercial case.
179
+ const ia = SAMPLES.find((s) => s.name.startsWith('inter-agencia'));
180
+ const reversed = [...scoringMatchers].reverse();
181
+ const normalWinner = classifyDocument(scoringMatchers, {
182
+ source: ia.text,
183
+ extension: ia.extension,
184
+ }).detectedType;
185
+ const reversedWinner = classifyDocument(reversed, {
186
+ source: ia.text,
187
+ extension: ia.extension,
188
+ }).detectedType;
189
+ console.log('\n=== Independencia de orden (inter-agencia) ===');
190
+ console.log(`candidatos (por score): ${topCandidates(ia.text, ia.extension)}`);
191
+ console.log(`seed normal -> ${normalWinner}`);
192
+ console.log(`seed invertido-> ${reversedWinner}`);
193
+ console.log(
194
+ `order-independent: ${normalWinner === reversedWinner ? 'sí ✅' : 'NO ❌'}`,
195
+ );
196
+
197
+ // Optional: real files from a folder.
198
+ if (folder) {
199
+ if (!fs.existsSync(folder)) {
200
+ console.error(`\nCarpeta no existe: ${folder}`);
201
+ } else {
202
+ console.log(`\n=== Archivos reales (${folder}) ===`);
203
+ const detection = new FileDetectionService();
204
+ const files = walk(folder).filter((f) =>
205
+ ['.pdf', '.xml', '.txt'].includes(path.extname(f).toLowerCase()),
206
+ );
207
+ for (const file of files) {
208
+ const ext = path.extname(file).toLowerCase().replace('.', '');
209
+ let text = '';
210
+ try {
211
+ text =
212
+ ext === 'pdf'
213
+ ? await detection.extractTextFromPDF(file)
214
+ : fs.readFileSync(file, 'utf8');
215
+ } catch (err) {
216
+ console.log(`${path.basename(file).padEnd(34)} ERROR: ${err.message}`);
217
+ continue;
218
+ }
219
+ const first = firstMatchType(text, ext, file);
220
+ const best = bestMatchResult(text, ext, file).detectedType;
221
+ total++;
222
+ if (first !== best) disagreements++;
223
+ console.log(row(path.basename(file), first, best, null));
224
+ }
225
+ }
226
+ }
227
+
228
+ console.log(
229
+ `\n=== Resumen: ${total} documentos, ${disagreements} divergencias first-vs-best ===\n`,
230
+ );
231
+ }
232
+
233
+ function walk(dir) {
234
+ const out = [];
235
+ for (const entry of fs.readdirSync(dir, { withFileTypes: true })) {
236
+ const full = path.join(dir, entry.name);
237
+ if (entry.isDirectory()) out.push(...walk(full));
238
+ else out.push(full);
239
+ }
240
+ return out;
241
+ }
242
+
243
+ run();
@@ -0,0 +1,96 @@
1
+ /**
2
+ * Phase 4 validation: runs the REAL runtime path the uploader now uses
3
+ * (DB-shape matchers -> adaptDbMatchers -> classifyDocument with rich extraction)
4
+ * against a corpus and compares it to the legacy engine (extractDocumentFields).
5
+ *
6
+ * Usage: node scripts/scoring-phase4-check.js <folder>
7
+ */
8
+ import fs from 'fs';
9
+ import path from 'path';
10
+
11
+ import { extractDocumentFields } from '../src/document-type-shared.js';
12
+ import FileDetectionService from '../src/file-detection.js';
13
+ import { adaptDbMatchers } from '../src/scoring/db-matcher-adapter.js';
14
+ import { scoringMatchers } from '../src/scoring/matchers-seed.js';
15
+ import { classifyDocument } from '../src/scoring/scoring-engine.js';
16
+
17
+ // Serialize the local seed to the shape the API `/resolved` endpoint returns,
18
+ // so we exercise the adapter exactly as in production.
19
+ function toDbShape(matchers) {
20
+ return matchers.map((m) => ({
21
+ documentType: m.documentType,
22
+ extensions: m.extensions,
23
+ minScore: m.minScore ?? null,
24
+ priority: m.priority ?? 0,
25
+ qualify: m.qualify ?? null,
26
+ clues: (m.clues || []).map((c) => ({
27
+ kind: c.kind,
28
+ pattern: c.pattern instanceof RegExp ? c.pattern.source : c.pattern,
29
+ flags: c.pattern instanceof RegExp ? c.pattern.flags : c.flags || '',
30
+ weight: c.weight ?? 1,
31
+ group: c.group ?? null,
32
+ required: !!c.required,
33
+ negative: !!c.negative,
34
+ })),
35
+ fieldExtractors: [], // rich extraction comes from the registry by documentType
36
+ }));
37
+ }
38
+
39
+ const adapted = adaptDbMatchers(toDbShape(scoringMatchers));
40
+
41
+ function walk(dir) {
42
+ const out = [];
43
+ for (const e of fs.readdirSync(dir, { withFileTypes: true })) {
44
+ const full = path.join(dir, e.name);
45
+ if (e.isDirectory()) out.push(...walk(full));
46
+ else out.push(full);
47
+ }
48
+ return out;
49
+ }
50
+
51
+ async function run() {
52
+ const folder = process.argv[2];
53
+ if (!folder) {
54
+ console.error('Pass a folder: node scripts/scoring-phase4-check.js <folder>');
55
+ process.exit(1);
56
+ }
57
+ const detection = new FileDetectionService();
58
+ const files = walk(folder).filter((f) =>
59
+ ['.pdf', '.xml', '.txt'].includes(path.extname(f).toLowerCase()),
60
+ );
61
+
62
+ let total = 0;
63
+ let diverge = 0;
64
+ const patterns = {};
65
+
66
+ for (const file of files) {
67
+ const ext = path.extname(file).toLowerCase().replace('.', '');
68
+ let text = '';
69
+ try {
70
+ text =
71
+ ext === 'pdf'
72
+ ? await detection.extractTextFromPDF(file)
73
+ : fs.readFileSync(file, 'utf8');
74
+ } catch {
75
+ continue;
76
+ }
77
+ const legacy = extractDocumentFields(text, ext, file)[0];
78
+ const phase4 = classifyDocument(adapted, {
79
+ source: text,
80
+ extension: ext,
81
+ filePath: file,
82
+ }).detectedType;
83
+ total++;
84
+ if (legacy !== phase4) {
85
+ diverge++;
86
+ const key = `${legacy} -> ${phase4}`;
87
+ patterns[key] = (patterns[key] || 0) + 1;
88
+ console.log(`NO ${path.basename(file).padEnd(40)} ${key}`);
89
+ }
90
+ }
91
+
92
+ console.log(`\n=== Fase 4 vs legacy: ${total} docs, ${diverge} divergencias ===`);
93
+ for (const [k, n] of Object.entries(patterns)) console.log(` ${n}× ${k}`);
94
+ }
95
+
96
+ run();
@@ -8,6 +8,8 @@ import appConfig from '../config/config.js';
8
8
  import ErrorHandler from '../errors/ErrorHandler.js';
9
9
  import { ConfigurationError } from '../errors/ErrorTypes.js';
10
10
  import FileDetectionService from '../file-detection.js';
11
+ import { adaptDbMatchers } from '../scoring/db-matcher-adapter.js';
12
+ import { scoringMatchers } from '../scoring/matchers-seed.js';
11
13
 
12
14
  /**
13
15
  * Paid pedimento detected_type values.
@@ -69,6 +71,32 @@ export class IdentifyCommand {
69
71
  );
70
72
  this.scanApiService = new ScanApiService(apiTarget);
71
73
 
74
+ // Load matchers for best-match classification (phase 4 hybrid). Prefer the
75
+ // DB-resolved set (this RFC + globals); fall back to the validated local
76
+ // seed; set DISABLE_SCORING_MATCHERS=true to force legacy first-match.
77
+ if (process.env.DISABLE_SCORING_MATCHERS === 'true') {
78
+ logger.info('🧩 Scoring matchers disabled — legacy detection');
79
+ } else {
80
+ let matchers = null;
81
+ try {
82
+ const rfc = process.env.MATCHER_RFC || null;
83
+ const dbMatchers = await this.scanApiService.getResolvedMatchers(rfc);
84
+ if (dbMatchers.length) {
85
+ matchers = adaptDbMatchers(dbMatchers);
86
+ logger.info(`🧩 Loaded ${matchers.length} matchers from API`);
87
+ }
88
+ } catch (err) {
89
+ logger.warn(`🧩 Could not load matchers from API: ${err.message}`);
90
+ }
91
+ if (!matchers) {
92
+ matchers = scoringMatchers;
93
+ logger.info(`🧩 Using local seed matchers (${matchers.length})`);
94
+ }
95
+ if (typeof this.detectionService.setMatchers === 'function') {
96
+ this.detectionService.setMatchers(matchers);
97
+ }
98
+ }
99
+
72
100
  const scanConfig = appConfig.getScanConfig();
73
101
  const batchSize = parseInt(options.batchSize) || 100;
74
102
 
@@ -37,10 +37,10 @@ class Config {
37
37
  const __dirname = path.dirname(__filename);
38
38
  const packageJsonPath = path.resolve(__dirname, '../../package.json');
39
39
  const packageJson = JSON.parse(fs.readFileSync(packageJsonPath, 'utf-8'));
40
- return packageJson.version || '1.0.23';
40
+ return packageJson.version || '1.0.24';
41
41
  } catch (error) {
42
42
  console.warn('⚠️ Could not read package.json version, using fallback');
43
- return '1.0.23';
43
+ return '1.0.24';
44
44
  }
45
45
  }
46
46
 
@@ -3,6 +3,7 @@ import path from 'path';
3
3
  import { PDFParse } from 'pdf-parse';
4
4
 
5
5
  import { extractDocumentFields } from './document-type-shared.js';
6
+ import { classifyDocument } from './scoring/scoring-engine.js';
6
7
 
7
8
  // Document types that participate in arela_path composition.
8
9
  const ARELA_PATH_TYPES = new Set([
@@ -84,6 +85,17 @@ function composeArelaPath(
84
85
  * Detects document types and extracts metadata from files
85
86
  */
86
87
  export class FileDetectionService {
88
+ constructor() {
89
+ // Best-match matchers (adapted from the API). When set, classification uses
90
+ // the scoring engine; otherwise it falls back to legacy first-match-wins.
91
+ this.matchers = null;
92
+ }
93
+
94
+ /** Provide the resolved+adapted matcher set for scoring-based classification. */
95
+ setMatchers(matchers) {
96
+ this.matchers = matchers && matchers.length ? matchers : null;
97
+ }
98
+
87
99
  /**
88
100
  * Detect document type from a file
89
101
  * @param {string} filePath - Path to the file to analyze
@@ -140,9 +152,23 @@ export class FileDetectionService {
140
152
  };
141
153
  }
142
154
 
143
- // Extract document fields and detect type
144
- const [detectedType, fields, detectedPedimento, detectedPedimentoYear] =
145
- extractDocumentFields(text, fileExtension, filePath);
155
+ // Extract document fields and detect type. Use the best-match scoring
156
+ // engine when matchers are configured; otherwise legacy first-match-wins.
157
+ let detectedType, fields, detectedPedimento, detectedPedimentoYear;
158
+ if (this.matchers) {
159
+ const r = classifyDocument(this.matchers, {
160
+ source: text,
161
+ extension: fileExtension,
162
+ filePath,
163
+ });
164
+ detectedType = r.detectedType;
165
+ fields = r.fields;
166
+ detectedPedimento = r.detectedPedimento;
167
+ detectedPedimentoYear = r.detectedPedimentoYear;
168
+ } else {
169
+ [detectedType, fields, detectedPedimento, detectedPedimentoYear] =
170
+ extractDocumentFields(text, fileExtension, filePath);
171
+ }
146
172
 
147
173
  // Extract RFC from fields
148
174
  const rfc = fields?.find((f) => f.name === 'rfc')?.value ?? null;
@@ -0,0 +1,98 @@
1
+ /**
2
+ * Adapt DB matchers (from arela-api `GET /document-matcher/resolved`) into the
3
+ * shape the scoring engine consumes — the HYBRID model:
4
+ *
5
+ * - SELECTION comes from the DB matcher's clues / qualify (per-RFC + globals).
6
+ * - EXTRACTION uses the rich JS extractors keyed by `documentType` when one
7
+ * exists (resolveType, multi-pattern field extractors, pedimento composition);
8
+ * otherwise it falls back to building simple regex extractors from the DB
9
+ * matcher's `fieldExtractors`.
10
+ *
11
+ * This keeps per-client matching configurable from the UI while preserving the
12
+ * robust field extraction that already ships in the uploader.
13
+ */
14
+ // IMPORTANT: load document-type-shared FIRST so it becomes the root of the
15
+ // shared<->definitions import cycle and fully evaluates before the individual
16
+ // definitions are referenced (otherwise: "Cannot access X before initialization").
17
+ import { FieldResult } from '../document-type-shared.js';
18
+ import { dodaPdfDefinition } from '../document-types/doda-pdf.js';
19
+ import { dodaXmlDefinition } from '../document-types/doda-xml.js';
20
+ import { facturaInterAgenciaDefinition } from '../document-types/factura-inter-agencia.js';
21
+ import { facturasComerciales } from '../document-types/facturas-comerciales.js';
22
+ import { pedimentoCompletoXmlDefinition } from '../document-types/pedimento-completo-xml.js';
23
+ import { pedimentoCompletoDefinition } from '../document-types/pedimento-completo.js';
24
+ import { pedimentoSimplificadoDefinition } from '../document-types/pedimento-simplificado.js';
25
+ import { supportDocumentDefinition } from '../document-types/support-document.js';
26
+
27
+ // documentType -> rich extraction half of the JS definition.
28
+ function extractionOf(def) {
29
+ return {
30
+ extractors: def.extractors,
31
+ resolveType: def.resolveType,
32
+ extractNumPedimento: def.extractNumPedimento,
33
+ extractPedimentoYear: def.extractPedimentoYear,
34
+ };
35
+ }
36
+
37
+ const EXTRACTION_REGISTRY = {
38
+ pedimento_simplificado: extractionOf(pedimentoSimplificadoDefinition),
39
+ pedimento_completo: extractionOf(pedimentoCompletoDefinition),
40
+ pedimento_completo_xml: extractionOf(pedimentoCompletoXmlDefinition),
41
+ doda_pdf: extractionOf(dodaPdfDefinition),
42
+ doda_xml: extractionOf(dodaXmlDefinition),
43
+ factura_inter_agencia: extractionOf(facturaInterAgenciaDefinition),
44
+ factura_comercial: extractionOf(facturasComerciales),
45
+ support_document: extractionOf(supportDocumentDefinition),
46
+ };
47
+
48
+ // Build a scoring-engine extractor from a DB fieldExtractor (regex + capture).
49
+ function regexExtractor(fe) {
50
+ return {
51
+ field: fe.field,
52
+ extract: (source) => {
53
+ try {
54
+ const m = source.match(new RegExp(fe.extractor, fe.flags || ''));
55
+ return new FieldResult(fe.field, !!m, m ? (m[1] ?? m[0]) : null);
56
+ } catch {
57
+ return new FieldResult(fe.field, false, null);
58
+ }
59
+ },
60
+ };
61
+ }
62
+
63
+ /**
64
+ * Convert resolved DB matchers into scoring-engine matchers.
65
+ * @param {Array} dbMatchers - matchers from the API (with clues + fieldExtractors)
66
+ * @returns {Array} scoring matchers
67
+ */
68
+ export function adaptDbMatchers(dbMatchers) {
69
+ return (dbMatchers || []).map((m) => {
70
+ const rich = EXTRACTION_REGISTRY[m.documentType];
71
+ const extraction = rich
72
+ ? rich
73
+ : { extractors: (m.fieldExtractors || []).map(regexExtractor) };
74
+
75
+ return {
76
+ documentType: m.documentType,
77
+ extensions: Array.isArray(m.extensions)
78
+ ? m.extensions
79
+ : String(m.extensions || '')
80
+ .split(',')
81
+ .map((s) => s.trim())
82
+ .filter(Boolean),
83
+ minScore: m.minScore ?? undefined,
84
+ priority: m.priority ?? 0,
85
+ qualify: m.qualify ?? undefined,
86
+ clues: (m.clues || []).map((c) => ({
87
+ kind: c.kind,
88
+ pattern: c.pattern,
89
+ flags: c.flags || undefined,
90
+ weight: c.weight ?? 1,
91
+ group: c.group || undefined,
92
+ required: !!c.required,
93
+ negative: !!c.negative,
94
+ })),
95
+ ...extraction,
96
+ };
97
+ });
98
+ }