@arela/uploader 1.0.23 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (85) hide show
  1. package/docs/AUTO_PROCESSING_PIPELINE.md +258 -0
  2. package/docs/COMPLETE_USAGE_GUIDE.md +1363 -0
  3. package/docs/DATABASESERVICE_IMPROVEMENTS.md +546 -0
  4. package/docs/PASO_2_TEST_RESULTS.md +298 -0
  5. package/docs/PASO_3_PLAN.md +385 -0
  6. package/docs/PHASE_1_FILE_DETECTION.md +366 -0
  7. package/docs/PHASE_2_API_INTEGRATION.md +426 -0
  8. package/docs/PHASE_3_DATABASE_MANAGEMENT.md +480 -0
  9. package/docs/PHASE_4_FILE_OPERATIONS.md +448 -0
  10. package/docs/PHASE_5_WATCH_MODE.md +450 -0
  11. package/docs/PHASE_6_SIGNAL_HANDLING.md +472 -0
  12. package/docs/PHASE_7_ADVANCED_FEATURES.md +560 -0
  13. package/docs/PLAN_WATCH_FEATURE.md +417 -0
  14. package/docs/README.md +480 -0
  15. package/docs/SCHEMA_ALIGNMENT_SUMMARY.md +301 -0
  16. package/docs/SMARTWATCH_DATABASE_REFACTORING.md +181 -0
  17. package/docs/SMART_WATCH_DATABASE_CHANGES.md +502 -0
  18. package/docs/TESTING_WATCH_MODE.md +212 -0
  19. package/docs/WATCHER_API_IMPLEMENTATION.md +520 -0
  20. package/docs/WATCHER_API_INTEGRATION.md +562 -0
  21. package/docs/WATCHER_SETUP_GUIDE.md +614 -0
  22. package/docs/WATCH_ARCHITECTURE.md +395 -0
  23. package/docs/WATCH_AUTO_PIPELINE.md +334 -0
  24. package/docs/WATCH_CONFIGURATION.md +267 -0
  25. package/docs/WATCH_USAGE_GUIDE.md +567 -0
  26. package/docs/commands.md +14 -0
  27. package/package.json +1 -1
  28. package/scripts/scoring-compare.js +243 -0
  29. package/scripts/scoring-phase4-check.js +96 -0
  30. package/src/commands/IdentifyCommand.js +36 -0
  31. package/src/config/config.js +2 -2
  32. package/src/file-detection.js +71 -4
  33. package/src/scoring/db-matcher-adapter.js +98 -0
  34. package/src/scoring/matchers-seed.js +386 -0
  35. package/src/scoring/scoring-engine.js +246 -0
  36. package/src/services/ScanApiService.js +14 -0
  37. package/tests/unit/scoring-engine.test.js +221 -0
  38. package/.vscode/settings.json +0 -1
  39. package/coverage/IdentifyCommand.js.html +0 -1462
  40. package/coverage/PropagateCommand.js.html +0 -1507
  41. package/coverage/PushCommand.js.html +0 -1504
  42. package/coverage/ScanCommand.js.html +0 -1654
  43. package/coverage/UploadCommand.js.html +0 -1846
  44. package/coverage/WatchCommand.js.html +0 -4111
  45. package/coverage/base.css +0 -224
  46. package/coverage/block-navigation.js +0 -87
  47. package/coverage/favicon.png +0 -0
  48. package/coverage/index.html +0 -191
  49. package/coverage/lcov-report/IdentifyCommand.js.html +0 -1462
  50. package/coverage/lcov-report/PropagateCommand.js.html +0 -1507
  51. package/coverage/lcov-report/PushCommand.js.html +0 -1504
  52. package/coverage/lcov-report/ScanCommand.js.html +0 -1654
  53. package/coverage/lcov-report/UploadCommand.js.html +0 -1846
  54. package/coverage/lcov-report/WatchCommand.js.html +0 -4111
  55. package/coverage/lcov-report/base.css +0 -224
  56. package/coverage/lcov-report/block-navigation.js +0 -87
  57. package/coverage/lcov-report/favicon.png +0 -0
  58. package/coverage/lcov-report/index.html +0 -191
  59. package/coverage/lcov-report/prettify.css +0 -1
  60. package/coverage/lcov-report/prettify.js +0 -2
  61. package/coverage/lcov-report/sort-arrow-sprite.png +0 -0
  62. package/coverage/lcov-report/sorter.js +0 -210
  63. package/coverage/lcov.info +0 -1937
  64. package/coverage/prettify.css +0 -1
  65. package/coverage/prettify.js +0 -2
  66. package/coverage/sort-arrow-sprite.png +0 -0
  67. package/coverage/sorter.js +0 -210
  68. package/docs/API_ENDPOINTS_FOR_DETECTION.md +0 -647
  69. package/docs/API_RETRY_MECHANISM.md +0 -338
  70. package/docs/ARELA_IDENTIFY_IMPLEMENTATION.md +0 -489
  71. package/docs/ARELA_IDENTIFY_QUICKREF.md +0 -186
  72. package/docs/ARELA_PROPAGATE_IMPLEMENTATION.md +0 -581
  73. package/docs/ARELA_PROPAGATE_QUICKREF.md +0 -272
  74. package/docs/ARELA_PUSH_IMPLEMENTATION.md +0 -577
  75. package/docs/ARELA_PUSH_QUICKREF.md +0 -322
  76. package/docs/ARELA_SCAN_IMPLEMENTATION.md +0 -373
  77. package/docs/ARELA_SCAN_QUICKREF.md +0 -139
  78. package/docs/CROSS_PLATFORM_PATH_HANDLING.md +0 -597
  79. package/docs/DETECTION_ATTEMPT_TRACKING.md +0 -414
  80. package/docs/MIGRATION_UPLOADER_TO_FILE_STATS.md +0 -1020
  81. package/docs/MULTI_LEVEL_DIRECTORY_SCANNING.md +0 -494
  82. package/docs/QUICK_REFERENCE_API_DETECTION.md +0 -264
  83. package/docs/REFACTORING_SUMMARY_DETECT_PEDIMENTOS.md +0 -200
  84. package/docs/STATS_COMMAND_SEQUENCE_DIAGRAM.md +0 -287
  85. package/docs/STATS_COMMAND_SIMPLE.md +0 -93
@@ -0,0 +1,221 @@
1
+ /**
2
+ * Tests for the best-match scoring engine (PROTOTYPE).
3
+ *
4
+ * Two layers:
5
+ * 1. Engine mechanics (pure): extension filter, required/negative
6
+ * disqualification, weighted scoring, threshold, tie-breaking.
7
+ * 2. Faithfulness on representative documents: the seed matchers select the
8
+ * same document type best-match would, including the key
9
+ * `factura_inter_agencia` vs `factura_comercial` case resolved by SCORE
10
+ * (not registration order).
11
+ */
12
+ import { describe, it, expect } from '@jest/globals';
13
+
14
+ import {
15
+ classifyDocument,
16
+ scoreMatcher,
17
+ selectBestMatch,
18
+ } from '../../src/scoring/scoring-engine.js';
19
+ import { scoringMatchers } from '../../src/scoring/matchers-seed.js';
20
+
21
+ // ----------------------------- engine mechanics -----------------------------
22
+ describe('scoreMatcher (mechanics)', () => {
23
+ const base = {
24
+ documentType: 't',
25
+ extensions: ['pdf'],
26
+ minScore: 2,
27
+ clues: [
28
+ { kind: 'CONTENT_REGEX', pattern: /alpha/i, weight: 2 },
29
+ { kind: 'CONTENT_REGEX', pattern: /beta/i, weight: 1 },
30
+ ],
31
+ };
32
+
33
+ it('returns null when the extension does not apply', () => {
34
+ expect(scoreMatcher(base, { source: 'alpha', extension: 'xml' })).toBeNull();
35
+ });
36
+
37
+ it('sums the weights of matched clues', () => {
38
+ const r = scoreMatcher(base, { source: 'alpha beta', extension: 'pdf' });
39
+ expect(r.score).toBe(3);
40
+ expect(r.totalWeight).toBe(3);
41
+ expect(r.fraction).toBeCloseTo(1);
42
+ expect(r.passed).toBe(true);
43
+ });
44
+
45
+ it('does not pass below minScore', () => {
46
+ const r = scoreMatcher(base, { source: 'beta', extension: 'pdf' });
47
+ expect(r.score).toBe(1);
48
+ expect(r.passed).toBe(false);
49
+ });
50
+
51
+ it('disqualifies on a negative clue hit', () => {
52
+ const m = {
53
+ ...base,
54
+ clues: [
55
+ { kind: 'CONTENT_REGEX', pattern: /alpha/i, weight: 2 },
56
+ { kind: 'CONTENT_REGEX', pattern: /excluded/i, negative: true },
57
+ ],
58
+ };
59
+ const r = scoreMatcher(m, { source: 'alpha excluded', extension: 'pdf' });
60
+ expect(r.disqualified).toBe(true);
61
+ });
62
+
63
+ it('disqualifies when a required clue is missing', () => {
64
+ const m = {
65
+ ...base,
66
+ clues: [
67
+ { kind: 'CONTENT_REGEX', pattern: /alpha/i, weight: 2 },
68
+ { kind: 'CONTENT_REGEX', pattern: /mandatory/i, required: true, weight: 1 },
69
+ ],
70
+ };
71
+ const r = scoreMatcher(m, { source: 'alpha', extension: 'pdf' });
72
+ expect(r.disqualified).toBe(true);
73
+ });
74
+ });
75
+
76
+ describe('selectBestMatch (ordering)', () => {
77
+ const ctx = { source: 'alpha beta gamma', extension: 'pdf' };
78
+ const low = {
79
+ documentType: 'low',
80
+ extensions: ['pdf'],
81
+ minScore: 1,
82
+ clues: [{ kind: 'CONTENT_REGEX', pattern: /alpha/i, weight: 1 }],
83
+ };
84
+ const high = {
85
+ documentType: 'high',
86
+ extensions: ['pdf'],
87
+ minScore: 1,
88
+ clues: [
89
+ { kind: 'CONTENT_REGEX', pattern: /alpha/i, weight: 1 },
90
+ { kind: 'CONTENT_REGEX', pattern: /beta/i, weight: 1 },
91
+ { kind: 'CONTENT_REGEX', pattern: /gamma/i, weight: 1 },
92
+ ],
93
+ };
94
+
95
+ it('picks the highest score regardless of array order', () => {
96
+ expect(selectBestMatch([low, high], ctx).documentType).toBe('high');
97
+ expect(selectBestMatch([high, low], ctx).documentType).toBe('high');
98
+ });
99
+
100
+ it('breaks score ties by priority', () => {
101
+ const a = {
102
+ documentType: 'a',
103
+ extensions: ['pdf'],
104
+ minScore: 1,
105
+ priority: 0,
106
+ clues: [{ kind: 'CONTENT_REGEX', pattern: /alpha/i, weight: 2 }],
107
+ };
108
+ const b = { ...a, documentType: 'b', priority: 5 };
109
+ expect(selectBestMatch([a, b], ctx).documentType).toBe('b');
110
+ });
111
+ });
112
+
113
+ describe('qualify rules (group gate, OR-of-ANDs)', () => {
114
+ // mirrors factura_comercial: (cfdi>=2) OR (invoice>=1 AND customs>=1)
115
+ const m = {
116
+ documentType: 'q',
117
+ extensions: ['xml'],
118
+ qualify: [{ cfdi: 2 }, { invoice: 1, customs: 1 }],
119
+ clues: [
120
+ { kind: 'CONTENT_REGEX', pattern: /cfdiA/i, group: 'cfdi' },
121
+ { kind: 'CONTENT_REGEX', pattern: /cfdiB/i, group: 'cfdi' },
122
+ { kind: 'CONTENT_REGEX', pattern: /factura/i, group: 'invoice' },
123
+ { kind: 'CONTENT_REGEX', pattern: /aduana/i, group: 'customs' },
124
+ ],
125
+ };
126
+
127
+ it('qualifies when a single rule is fully satisfied (cfdi>=2)', () => {
128
+ expect(scoreMatcher(m, { source: 'cfdiA cfdiB', extension: 'xml' }).passed).toBe(true);
129
+ });
130
+
131
+ it('qualifies on the AND rule (invoice AND customs)', () => {
132
+ expect(scoreMatcher(m, { source: 'factura aduana', extension: 'xml' }).passed).toBe(true);
133
+ });
134
+
135
+ it('does NOT qualify on a partial AND rule (customs alone)', () => {
136
+ // This is the COVE-acuse false-positive case the flat model produced.
137
+ const r = scoreMatcher(m, { source: 'aduana pedimento aduana', extension: 'xml' });
138
+ expect(r.passed).toBe(false);
139
+ });
140
+
141
+ it('does NOT qualify with only one cfdi marker', () => {
142
+ expect(scoreMatcher(m, { source: 'cfdiA', extension: 'xml' }).passed).toBe(false);
143
+ });
144
+ });
145
+
146
+ // --------------------------- faithfulness on docs ---------------------------
147
+ const DOCS = {
148
+ simplificadoPaid: {
149
+ extension: 'pdf',
150
+ source: `FORMA SIMPLIFICADA DEL PEDIMENTO
151
+ NUM. PEDIMENTO: 26 07 3429 6000079
152
+ CVE. PEDIMENTO: A1
153
+ T. OPER: IMP
154
+ RFC: CSM9204097Q1
155
+ FECHA DE PAGO: 04/03/2026`,
156
+ },
157
+ simplificadoUnpaid: {
158
+ extension: 'pdf',
159
+ source: `FORMA SIMPLIFICADA DE PEDIMENTO
160
+ NUM. PEDIMENTO: 26 07 3429 6000080
161
+ CVE. PEDIMENTO: A1
162
+ T. OPER: IMP
163
+ RFC: CSM9204097Q1
164
+ *** NO PAGADO ***`,
165
+ },
166
+ completo: {
167
+ extension: 'pdf',
168
+ source: `NUM. PEDIMENTO: 26 07 3429 2002089
169
+ CVE. PEDIMENTO: A1
170
+ T. OPER: IMP
171
+ SEGUNDA COPIA TRANSPORTISTA
172
+ CUADRO DE LIQUIDACION
173
+ *** PAGO ELECTRONICO ***
174
+ FECHA DE PAGO: 02/03/2026`,
175
+ },
176
+ interAgencia: {
177
+ extension: 'xml',
178
+ source: `<cfdi:Comprobante xmlns:cfdi="..." TipoDeComprobante="I">
179
+ <cfdi:Emisor Rfc="NAA120215F20"/>
180
+ <cfdi:Receptor Rfc="PCC1008161WA"/>
181
+ <cfdi:Concepto ClaveProdServ="78141502"/>
182
+ </cfdi:Comprobante>`,
183
+ },
184
+ };
185
+
186
+ describe('seed matchers select the correct document type', () => {
187
+ it('classifies a paid simplificado', () => {
188
+ const r = classifyDocument(scoringMatchers, DOCS.simplificadoPaid);
189
+ expect(r.detectedType).toBe('pedimento_simplificado');
190
+ });
191
+
192
+ it('resolves an unpaid simplificado to proforma (resolveType reused)', () => {
193
+ const r = classifyDocument(scoringMatchers, DOCS.simplificadoUnpaid);
194
+ expect(r.detectedType).toBe('proforma');
195
+ });
196
+
197
+ it('separates completo from simplificado without registration order', () => {
198
+ const r = classifyDocument(scoringMatchers, DOCS.completo);
199
+ expect(r.detectedType).toBe('pedimento_completo');
200
+ });
201
+ });
202
+
203
+ describe('inter-agencia precedence is by score, not order', () => {
204
+ it('wins over factura_comercial with the default seed', () => {
205
+ const r = classifyDocument(scoringMatchers, DOCS.interAgencia);
206
+ expect(r.detectedType).toBe('factura_inter_agencia');
207
+ // factura_comercial also qualifies but scores far lower
208
+ const comercial = r.candidates.find(
209
+ (c) => c.documentType === 'factura_comercial',
210
+ );
211
+ const winner = r.candidates[0];
212
+ expect(winner.documentType).toBe('factura_inter_agencia');
213
+ if (comercial) expect(winner.score).toBeGreaterThan(comercial.score);
214
+ });
215
+
216
+ it('still wins when the matcher array is reversed (order-independent)', () => {
217
+ const reversed = [...scoringMatchers].reverse();
218
+ const r = classifyDocument(reversed, DOCS.interAgencia);
219
+ expect(r.detectedType).toBe('factura_inter_agencia');
220
+ });
221
+ });
@@ -1 +0,0 @@
1
- {}