goldenmatch 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (162) hide show
  1. package/README.md +140 -0
  2. package/dist/cli.cjs +6079 -0
  3. package/dist/cli.cjs.map +1 -0
  4. package/dist/cli.d.cts +1 -0
  5. package/dist/cli.d.ts +1 -0
  6. package/dist/cli.js +6076 -0
  7. package/dist/cli.js.map +1 -0
  8. package/dist/core/index.cjs +8449 -0
  9. package/dist/core/index.cjs.map +1 -0
  10. package/dist/core/index.d.cts +1972 -0
  11. package/dist/core/index.d.ts +1972 -0
  12. package/dist/core/index.js +8318 -0
  13. package/dist/core/index.js.map +1 -0
  14. package/dist/index.cjs +8449 -0
  15. package/dist/index.cjs.map +1 -0
  16. package/dist/index.d.cts +2 -0
  17. package/dist/index.d.ts +2 -0
  18. package/dist/index.js +8318 -0
  19. package/dist/index.js.map +1 -0
  20. package/dist/node/backends/score-worker.cjs +934 -0
  21. package/dist/node/backends/score-worker.cjs.map +1 -0
  22. package/dist/node/backends/score-worker.d.cts +14 -0
  23. package/dist/node/backends/score-worker.d.ts +14 -0
  24. package/dist/node/backends/score-worker.js +932 -0
  25. package/dist/node/backends/score-worker.js.map +1 -0
  26. package/dist/node/index.cjs +11430 -0
  27. package/dist/node/index.cjs.map +1 -0
  28. package/dist/node/index.d.cts +554 -0
  29. package/dist/node/index.d.ts +554 -0
  30. package/dist/node/index.js +11277 -0
  31. package/dist/node/index.js.map +1 -0
  32. package/dist/types-DhUdX5Rc.d.cts +304 -0
  33. package/dist/types-DhUdX5Rc.d.ts +304 -0
  34. package/examples/01-basic-dedupe.ts +60 -0
  35. package/examples/02-match-two-datasets.ts +48 -0
  36. package/examples/03-csv-file-pipeline.ts +62 -0
  37. package/examples/04-string-scoring.ts +63 -0
  38. package/examples/05-custom-config.ts +94 -0
  39. package/examples/06-probabilistic-fs.ts +72 -0
  40. package/examples/07-pprl-privacy.ts +76 -0
  41. package/examples/08-streaming.ts +79 -0
  42. package/examples/09-llm-scorer.ts +79 -0
  43. package/examples/10-explain.ts +60 -0
  44. package/examples/11-evaluate.ts +61 -0
  45. package/examples/README.md +53 -0
  46. package/package.json +66 -0
  47. package/src/cli.ts +372 -0
  48. package/src/core/ann-blocker.ts +593 -0
  49. package/src/core/api.ts +220 -0
  50. package/src/core/autoconfig.ts +363 -0
  51. package/src/core/autofix.ts +102 -0
  52. package/src/core/blocker.ts +655 -0
  53. package/src/core/cluster.ts +699 -0
  54. package/src/core/compare-clusters.ts +176 -0
  55. package/src/core/config/loader.ts +869 -0
  56. package/src/core/cross-encoder.ts +614 -0
  57. package/src/core/data.ts +430 -0
  58. package/src/core/domain.ts +277 -0
  59. package/src/core/embedder.ts +562 -0
  60. package/src/core/evaluate.ts +156 -0
  61. package/src/core/explain.ts +352 -0
  62. package/src/core/golden.ts +524 -0
  63. package/src/core/graph-er.ts +371 -0
  64. package/src/core/index.ts +314 -0
  65. package/src/core/ingest.ts +112 -0
  66. package/src/core/learned-blocking.ts +305 -0
  67. package/src/core/lineage.ts +221 -0
  68. package/src/core/llm/budget.ts +258 -0
  69. package/src/core/llm/cluster.ts +542 -0
  70. package/src/core/llm/scorer.ts +396 -0
  71. package/src/core/match-one.ts +95 -0
  72. package/src/core/matchkey.ts +97 -0
  73. package/src/core/memory/corrections.ts +179 -0
  74. package/src/core/memory/learner.ts +218 -0
  75. package/src/core/memory/store.ts +114 -0
  76. package/src/core/pipeline.ts +366 -0
  77. package/src/core/pprl/protocol.ts +216 -0
  78. package/src/core/probabilistic.ts +511 -0
  79. package/src/core/profiler.ts +212 -0
  80. package/src/core/quality.ts +197 -0
  81. package/src/core/review-queue.ts +177 -0
  82. package/src/core/scorer.ts +855 -0
  83. package/src/core/sensitivity.ts +196 -0
  84. package/src/core/standardize.ts +279 -0
  85. package/src/core/streaming.ts +128 -0
  86. package/src/core/transforms.ts +599 -0
  87. package/src/core/types.ts +570 -0
  88. package/src/core/validate.ts +243 -0
  89. package/src/index.ts +8 -0
  90. package/src/node/a2a/server.ts +470 -0
  91. package/src/node/api/server.ts +412 -0
  92. package/src/node/backends/duckdb.ts +130 -0
  93. package/src/node/backends/score-worker.ts +41 -0
  94. package/src/node/backends/workers.ts +212 -0
  95. package/src/node/config-file.ts +66 -0
  96. package/src/node/connectors/base.ts +57 -0
  97. package/src/node/connectors/bigquery.ts +61 -0
  98. package/src/node/connectors/databricks.ts +69 -0
  99. package/src/node/connectors/file.ts +350 -0
  100. package/src/node/connectors/hubspot.ts +62 -0
  101. package/src/node/connectors/index.ts +43 -0
  102. package/src/node/connectors/salesforce.ts +93 -0
  103. package/src/node/connectors/snowflake.ts +73 -0
  104. package/src/node/db/postgres.ts +173 -0
  105. package/src/node/db/sync.ts +103 -0
  106. package/src/node/dedupe-file.ts +156 -0
  107. package/src/node/index.ts +89 -0
  108. package/src/node/mcp/server.ts +940 -0
  109. package/src/node/tui/app.ts +756 -0
  110. package/src/node/tui/index.ts +6 -0
  111. package/src/node/tui/widgets.ts +128 -0
  112. package/tests/parity/scorer-ground-truth.test.ts +118 -0
  113. package/tests/smoke.test.ts +46 -0
  114. package/tests/unit/a2a-server.test.ts +175 -0
  115. package/tests/unit/ann-blocker.test.ts +117 -0
  116. package/tests/unit/api-server.test.ts +239 -0
  117. package/tests/unit/api.test.ts +77 -0
  118. package/tests/unit/autoconfig.test.ts +103 -0
  119. package/tests/unit/autofix.test.ts +71 -0
  120. package/tests/unit/blocker.test.ts +164 -0
  121. package/tests/unit/buildBlocksAsync.test.ts +63 -0
  122. package/tests/unit/cluster.test.ts +213 -0
  123. package/tests/unit/compare-clusters.test.ts +42 -0
  124. package/tests/unit/config-loader.test.ts +301 -0
  125. package/tests/unit/connectors-base.test.ts +48 -0
  126. package/tests/unit/cross-encoder-model.test.ts +198 -0
  127. package/tests/unit/cross-encoder.test.ts +173 -0
  128. package/tests/unit/db-connectors.test.ts +37 -0
  129. package/tests/unit/domain.test.ts +80 -0
  130. package/tests/unit/embedder.test.ts +151 -0
  131. package/tests/unit/evaluate.test.ts +85 -0
  132. package/tests/unit/explain.test.ts +73 -0
  133. package/tests/unit/golden.test.ts +97 -0
  134. package/tests/unit/graph-er.test.ts +173 -0
  135. package/tests/unit/hnsw-ann.test.ts +283 -0
  136. package/tests/unit/hubspot-connector.test.ts +118 -0
  137. package/tests/unit/ingest.test.ts +97 -0
  138. package/tests/unit/learned-blocking.test.ts +134 -0
  139. package/tests/unit/lineage.test.ts +135 -0
  140. package/tests/unit/match-one.test.ts +129 -0
  141. package/tests/unit/matchkey.test.ts +97 -0
  142. package/tests/unit/mcp-server.test.ts +183 -0
  143. package/tests/unit/memory.test.ts +119 -0
  144. package/tests/unit/pipeline.test.ts +118 -0
  145. package/tests/unit/pprl-protocol.test.ts +381 -0
  146. package/tests/unit/probabilistic.test.ts +494 -0
  147. package/tests/unit/profiler.test.ts +68 -0
  148. package/tests/unit/review-queue.test.ts +68 -0
  149. package/tests/unit/salesforce-connector.test.ts +148 -0
  150. package/tests/unit/scorer.test.ts +301 -0
  151. package/tests/unit/sensitivity.test.ts +154 -0
  152. package/tests/unit/standardize.test.ts +84 -0
  153. package/tests/unit/streaming.test.ts +82 -0
  154. package/tests/unit/transforms.test.ts +208 -0
  155. package/tests/unit/tui-widgets.test.ts +42 -0
  156. package/tests/unit/tui.test.ts +24 -0
  157. package/tests/unit/validate.test.ts +145 -0
  158. package/tests/unit/workers-parallel.test.ts +99 -0
  159. package/tests/unit/workers.test.ts +74 -0
  160. package/tsconfig.json +25 -0
  161. package/tsup.config.ts +37 -0
  162. package/vitest.config.ts +11 -0
@@ -0,0 +1,179 @@
1
+ /**
2
+ * memory/corrections.ts — Apply stored corrections to scored pairs.
3
+ * Edge-safe: no `node:` imports.
4
+ *
5
+ * Ports goldenmatch/core/memory/corrections.py. A correction is only
6
+ * applied if both rows still hash to the values seen when the correction
7
+ * was recorded (dual-hash staleness detection).
8
+ */
9
+
10
+ import type { Row, ScoredPair } from "../types.js";
11
+ import { makeScoredPair } from "../types.js";
12
+ import type { Correction, MemoryStore } from "./store.js";
13
+
14
+ // ---------------------------------------------------------------------------
15
+ // Row hashing
16
+ // ---------------------------------------------------------------------------
17
+
18
+ /**
19
+ * Deterministic FNV-1a 32-bit hash. Matches store-side hashing so
20
+ * corrections can survive serialization/round-trips.
21
+ */
22
+ function hashString(s: string): string {
23
+ let h = 2166136261;
24
+ for (let i = 0; i < s.length; i++) {
25
+ h ^= s.charCodeAt(i);
26
+ h = Math.imul(h, 16777619);
27
+ }
28
+ return (h >>> 0).toString(16);
29
+ }
30
+
31
+ /** Hash of a row across its non-internal fields (sorted, stringified). */
32
+ export function hashRow(row: Row): string {
33
+ const keys = Object.keys(row)
34
+ .filter((k) => !k.startsWith("__"))
35
+ .sort();
36
+ const parts: string[] = [];
37
+ for (const k of keys) {
38
+ const v = row[k];
39
+ const s = v === null || v === undefined ? "\u0000null" : String(v);
40
+ parts.push(`${k}=${s}`);
41
+ }
42
+ return hashString(parts.join("|"));
43
+ }
44
+
45
+ // ---------------------------------------------------------------------------
46
+ // Helpers
47
+ // ---------------------------------------------------------------------------
48
+
49
+ function pairKey(a: number, b: number): string {
50
+ return a < b ? `${a}|${b}` : `${b}|${a}`;
51
+ }
52
+
53
+ function getRowId(row: Row): number | null {
54
+ const raw = row["__row_id__"];
55
+ if (typeof raw === "number") return raw;
56
+ if (typeof raw === "string") {
57
+ const n = Number(raw);
58
+ return Number.isFinite(n) ? n : null;
59
+ }
60
+ return null;
61
+ }
62
+
63
+ // ---------------------------------------------------------------------------
64
+ // Stored-correction metadata
65
+ // ---------------------------------------------------------------------------
66
+
67
+ export interface StoredRowHashes {
68
+ readonly rowIdAHash: string;
69
+ readonly rowIdBHash: string;
70
+ }
71
+
72
+ /**
73
+ * A caller can either provide a per-correction hash map (populated at
74
+ * collection time) or ask applyCorrections to compute current hashes alone
75
+ * — in which case staleness detection is a no-op (hashes always match).
76
+ */
77
+ export interface ApplyCorrectionsOptions {
78
+ readonly originalHashes?: ReadonlyMap<string, StoredRowHashes>;
79
+ /** When a correction matches, clamp pair score to this value. Default 1.0 for match, 0.0 for no_match. */
80
+ readonly matchScore?: number;
81
+ readonly noMatchScore?: number;
82
+ }
83
+
84
+ // ---------------------------------------------------------------------------
85
+ // Apply corrections
86
+ // ---------------------------------------------------------------------------
87
+
88
+ /**
89
+ * Apply user corrections stored in `store` to a list of scored pairs.
90
+ *
91
+ * For each correction:
92
+ * - Find the pair (idA,idB) in the scored_pairs list.
93
+ * - If caller supplied original hashes, compare them against a fresh
94
+ * hash of the current row. Mismatch => stale, skip.
95
+ * - Otherwise apply the verdict:
96
+ * "match" -> score clamped to matchScore (default 1.0)
97
+ * "no_match" -> score clamped to noMatchScore (default 0.0)
98
+ *
99
+ * Returns the modified pairs plus counts of applied / stale corrections.
100
+ */
101
+ export function applyCorrections(
102
+ pairs: readonly ScoredPair[],
103
+ rows: readonly Row[],
104
+ store: MemoryStore,
105
+ options?: ApplyCorrectionsOptions,
106
+ ): { pairs: readonly ScoredPair[]; applied: number; stale: number } {
107
+ const matchScore = options?.matchScore ?? 1.0;
108
+ const noMatchScore = options?.noMatchScore ?? 0.0;
109
+
110
+ // Build index: rowId -> Row for current-state hashing.
111
+ const rowById = new Map<number, Row>();
112
+ for (const r of rows) {
113
+ const id = getRowId(r);
114
+ if (id !== null) rowById.set(id, r);
115
+ }
116
+
117
+ // Index corrections by canonical pair key.
118
+ const byPair = new Map<string, Correction>();
119
+ for (const c of store.list()) {
120
+ const key = pairKey(c.rowIdA, c.rowIdB);
121
+ const existing = byPair.get(key);
122
+ // Keep the highest-trust correction per pair (most recent on tie).
123
+ if (
124
+ existing === undefined ||
125
+ c.trust > existing.trust ||
126
+ (c.trust === existing.trust && c.timestamp > existing.timestamp)
127
+ ) {
128
+ byPair.set(key, c);
129
+ }
130
+ }
131
+
132
+ let applied = 0;
133
+ let stale = 0;
134
+ const out: ScoredPair[] = [];
135
+
136
+ for (const pair of pairs) {
137
+ const key = pairKey(pair.idA, pair.idB);
138
+ const correction = byPair.get(key);
139
+ if (!correction) {
140
+ out.push(pair);
141
+ continue;
142
+ }
143
+
144
+ // Dual-hash staleness check (if caller populated `originalHashes`).
145
+ if (options?.originalHashes) {
146
+ const stored = options.originalHashes.get(key);
147
+ if (stored) {
148
+ const rowA = rowById.get(pair.idA);
149
+ const rowB = rowById.get(pair.idB);
150
+ if (!rowA || !rowB) {
151
+ stale++;
152
+ out.push(pair);
153
+ continue;
154
+ }
155
+ const currentA = hashRow(rowA);
156
+ const currentB = hashRow(rowB);
157
+ const match =
158
+ (currentA === stored.rowIdAHash && currentB === stored.rowIdBHash) ||
159
+ (currentA === stored.rowIdBHash && currentB === stored.rowIdAHash);
160
+ if (!match) {
161
+ stale++;
162
+ out.push(pair);
163
+ continue;
164
+ }
165
+ }
166
+ }
167
+
168
+ applied++;
169
+ out.push(
170
+ makeScoredPair(
171
+ pair.idA,
172
+ pair.idB,
173
+ correction.verdict === "match" ? matchScore : noMatchScore,
174
+ ),
175
+ );
176
+ }
177
+
178
+ return { pairs: out, applied, stale };
179
+ }
@@ -0,0 +1,218 @@
1
+ /**
2
+ * memory/learner.ts — Threshold tuning & weight learning from corrections.
3
+ * Edge-safe: no `node:` imports.
4
+ *
5
+ * Ports goldenmatch/core/memory/learner.py. Given ≥10 corrections, sweep
6
+ * thresholds and pick the one maximizing F1 on the correction set. Given
7
+ * ≥50 corrections with per-field subscores, fit a simple logistic-
8
+ * regression-like weight update.
9
+ */
10
+
11
+ import type { LearningConfig, MatchkeyConfig } from "../types.js";
12
+ import type { Correction } from "./store.js";
13
+
14
+ // ---------------------------------------------------------------------------
15
+ // Types
16
+ // ---------------------------------------------------------------------------
17
+
18
+ export interface LearnedParams {
19
+ readonly threshold?: number;
20
+ readonly fieldWeights?: Readonly<Record<string, number>>;
21
+ readonly correctionCount: number;
22
+ }
23
+
24
+ /**
25
+ * Per-correction subscores. When present, keys correspond to matchkey field
26
+ * names and values are in [0,1] representing each field's contribution.
27
+ * The learner uses these only when ≥ weightsMinCorrections samples include
28
+ * them.
29
+ */
30
+ export interface CorrectionSubscores {
31
+ readonly pairKey: string; // "minId|maxId"
32
+ readonly subscores: Readonly<Record<string, number>>;
33
+ }
34
+
35
+ // ---------------------------------------------------------------------------
36
+ // Learner
37
+ // ---------------------------------------------------------------------------
38
+
39
+ const DEFAULT_LEARNING_CONFIG: LearningConfig = {
40
+ thresholdMinCorrections: 10,
41
+ weightsMinCorrections: 50,
42
+ };
43
+
44
+ export class MemoryLearner {
45
+ constructor(
46
+ private readonly config: LearningConfig = DEFAULT_LEARNING_CONFIG,
47
+ ) {}
48
+
49
+ /**
50
+ * Tune threshold and (optionally) field weights from corrections.
51
+ *
52
+ * Threshold tuning: sweep 0.5..0.95 in 0.05 steps, compute F1 using each
53
+ * correction's stored `score` vs its verdict. Returns the threshold with
54
+ * the best F1 (ties break toward higher threshold for precision).
55
+ *
56
+ * Field weights: requires subscores. Fits a tiny gradient update that
57
+ * nudges weights toward better discrimination of match / no_match.
58
+ */
59
+ learn(
60
+ corrections: readonly Correction[],
61
+ baseline: MatchkeyConfig,
62
+ subscores?: readonly CorrectionSubscores[],
63
+ ): LearnedParams {
64
+ const result: {
65
+ threshold?: number;
66
+ fieldWeights?: Record<string, number>;
67
+ correctionCount: number;
68
+ } = { correctionCount: corrections.length };
69
+
70
+ if (corrections.length >= this.config.thresholdMinCorrections) {
71
+ const tuned = tuneThreshold(corrections);
72
+ if (tuned !== null) result.threshold = tuned;
73
+ }
74
+
75
+ if (
76
+ subscores &&
77
+ corrections.length >= this.config.weightsMinCorrections &&
78
+ subscores.length >= this.config.weightsMinCorrections
79
+ ) {
80
+ const learnedWeights = tuneWeights(corrections, subscores, baseline);
81
+ if (learnedWeights) result.fieldWeights = learnedWeights;
82
+ }
83
+
84
+ return result;
85
+ }
86
+ }
87
+
88
+ // ---------------------------------------------------------------------------
89
+ // Threshold tuning
90
+ // ---------------------------------------------------------------------------
91
+
92
+ /**
93
+ * Sweep thresholds in [0.5, 0.95] step 0.05 and pick one maximizing F1.
94
+ * Returns null if corrections cannot produce a meaningful F1 (e.g. all
95
+ * same verdict).
96
+ */
97
+ function tuneThreshold(corrections: readonly Correction[]): number | null {
98
+ const positives = corrections.filter((c) => c.verdict === "match");
99
+ const negatives = corrections.filter((c) => c.verdict === "no_match");
100
+ if (positives.length === 0 || negatives.length === 0) return null;
101
+
102
+ let bestThreshold = 0.85;
103
+ let bestF1 = -1;
104
+
105
+ for (let t = 0.5; t <= 0.95 + 1e-9; t += 0.05) {
106
+ let tp = 0;
107
+ let fp = 0;
108
+ let fn = 0;
109
+ for (const c of corrections) {
110
+ const predicted = c.score >= t;
111
+ if (c.verdict === "match") {
112
+ if (predicted) tp++;
113
+ else fn++;
114
+ } else {
115
+ if (predicted) fp++;
116
+ }
117
+ }
118
+ const precision = tp + fp === 0 ? 0 : tp / (tp + fp);
119
+ const recall = tp + fn === 0 ? 0 : tp / (tp + fn);
120
+ const f1 =
121
+ precision + recall === 0 ? 0 : (2 * precision * recall) / (precision + recall);
122
+ if (f1 > bestF1 || (f1 === bestF1 && t > bestThreshold)) {
123
+ bestF1 = f1;
124
+ bestThreshold = t;
125
+ }
126
+ }
127
+
128
+ return Number(bestThreshold.toFixed(3));
129
+ }
130
+
131
+ // ---------------------------------------------------------------------------
132
+ // Weight tuning (simple gradient pass)
133
+ // ---------------------------------------------------------------------------
134
+
135
+ function sigmoid(x: number): number {
136
+ if (x >= 0) {
137
+ const ex = Math.exp(-x);
138
+ return 1 / (1 + ex);
139
+ }
140
+ const ex = Math.exp(x);
141
+ return ex / (1 + ex);
142
+ }
143
+
144
+ function tuneWeights(
145
+ corrections: readonly Correction[],
146
+ subscores: readonly CorrectionSubscores[],
147
+ baseline: MatchkeyConfig,
148
+ ): Record<string, number> | null {
149
+ const subByPair = new Map<string, Record<string, number>>();
150
+ for (const s of subscores) {
151
+ subByPair.set(s.pairKey, { ...s.subscores });
152
+ }
153
+
154
+ // Collect field list from baseline matchkey.
155
+ const fields = baseline.fields.map((f) => f.field);
156
+ if (fields.length === 0) return null;
157
+
158
+ // Initialize weights from baseline.
159
+ const weights = new Map<string, number>();
160
+ for (const f of baseline.fields) weights.set(f.field, f.weight);
161
+
162
+ // Build training set: for each correction we find its subscores.
163
+ type Sample = { y: number; x: Record<string, number> };
164
+ const samples: Sample[] = [];
165
+ for (const c of corrections) {
166
+ const [a, b] = c.rowIdA < c.rowIdB ? [c.rowIdA, c.rowIdB] : [c.rowIdB, c.rowIdA];
167
+ const key = `${a}|${b}`;
168
+ const sub = subByPair.get(key);
169
+ if (!sub) continue;
170
+ samples.push({
171
+ y: c.verdict === "match" ? 1 : 0,
172
+ x: sub,
173
+ });
174
+ }
175
+ if (samples.length < 10) return null;
176
+
177
+ const learningRate = 0.1;
178
+ const iterations = 50;
179
+ for (let iter = 0; iter < iterations; iter++) {
180
+ const grad = new Map<string, number>();
181
+ for (const f of fields) grad.set(f, 0);
182
+
183
+ for (const sample of samples) {
184
+ let z = 0;
185
+ for (const f of fields) {
186
+ const w = weights.get(f) ?? 0;
187
+ const x = sample.x[f] ?? 0;
188
+ z += w * x;
189
+ }
190
+ const pred = sigmoid(z);
191
+ const err = pred - sample.y;
192
+ for (const f of fields) {
193
+ const x = sample.x[f] ?? 0;
194
+ grad.set(f, (grad.get(f) ?? 0) + err * x);
195
+ }
196
+ }
197
+
198
+ for (const f of fields) {
199
+ const g = (grad.get(f) ?? 0) / samples.length;
200
+ const w = weights.get(f) ?? 0;
201
+ weights.set(f, w - learningRate * g);
202
+ }
203
+ }
204
+
205
+ // Re-normalize weights so they sum to 1 (matchkey weights must average
206
+ // out to the original budget; keep same total).
207
+ const originalTotal = baseline.fields.reduce((acc, f) => acc + f.weight, 0);
208
+ const newTotal = fields.reduce((acc, f) => acc + Math.max(0, weights.get(f) ?? 0), 0);
209
+ if (newTotal <= 0) return null;
210
+ const scale = originalTotal / newTotal;
211
+
212
+ const out: Record<string, number> = {};
213
+ for (const f of fields) {
214
+ const w = Math.max(0, weights.get(f) ?? 0) * scale;
215
+ out[f] = Number(w.toFixed(4));
216
+ }
217
+ return out;
218
+ }
@@ -0,0 +1,114 @@
1
+ /**
2
+ * memory/store.ts — Learning Memory store (in-memory backend).
3
+ * Edge-safe: no `node:` imports.
4
+ *
5
+ * Ports goldenmatch/core/memory/store.py. SQLite / Postgres backends are
6
+ * deferred (they require host-specific drivers); the in-memory backend
7
+ * keeps all corrections in a plain array with trust-based upsert.
8
+ */
9
+
10
+ // ---------------------------------------------------------------------------
11
+ // Types
12
+ // ---------------------------------------------------------------------------
13
+
14
+ export interface Correction {
15
+ readonly rowIdA: number;
16
+ readonly rowIdB: number;
17
+ readonly verdict: "match" | "no_match";
18
+ readonly feature: string;
19
+ readonly score: number;
20
+ readonly timestamp: number;
21
+ readonly trust: number;
22
+ readonly source: string;
23
+ }
24
+
25
+ export interface MemoryStoreConfig {
26
+ readonly backend: "memory" | "sqlite" | "postgres";
27
+ readonly path?: string;
28
+ readonly trustDefault?: number;
29
+ }
30
+
31
+ // ---------------------------------------------------------------------------
32
+ // Helpers
33
+ // ---------------------------------------------------------------------------
34
+
35
+ function pairFeatureKey(c: Correction): string {
36
+ const [a, b] = c.rowIdA < c.rowIdB ? [c.rowIdA, c.rowIdB] : [c.rowIdB, c.rowIdA];
37
+ return `${a}|${b}|${c.feature}`;
38
+ }
39
+
40
+ // ---------------------------------------------------------------------------
41
+ // MemoryStore
42
+ // ---------------------------------------------------------------------------
43
+
44
+ export class MemoryStore {
45
+ private corrections: Correction[] = [];
46
+
47
+ constructor(private readonly config: MemoryStoreConfig = { backend: "memory" }) {
48
+ if (config.backend !== "memory") {
49
+ // SQLite/Postgres backends intentionally unsupported in edge-safe code.
50
+ // Callers that need persistence should swap in a host-specific wrapper.
51
+ // We don't throw here to keep the class usable for tests.
52
+ }
53
+ }
54
+
55
+ /** Append a correction unconditionally. */
56
+ add(correction: Correction): void {
57
+ this.corrections.push(correction);
58
+ }
59
+
60
+ /** Append many corrections unconditionally. */
61
+ addBatch(corrections: readonly Correction[]): void {
62
+ for (const c of corrections) this.corrections.push(c);
63
+ }
64
+
65
+ /** All corrections, in insertion order. */
66
+ list(): readonly Correction[] {
67
+ return this.corrections;
68
+ }
69
+
70
+ /** Corrections whose verdict is "match". */
71
+ listMatches(): readonly Correction[] {
72
+ return this.corrections.filter((c) => c.verdict === "match");
73
+ }
74
+
75
+ /** Corrections whose verdict is "no_match". */
76
+ listNonMatches(): readonly Correction[] {
77
+ return this.corrections.filter((c) => c.verdict === "no_match");
78
+ }
79
+
80
+ count(): number {
81
+ return this.corrections.length;
82
+ }
83
+
84
+ clear(): void {
85
+ this.corrections = [];
86
+ }
87
+
88
+ /**
89
+ * Trust-based upsert: if a correction for the same (pair, feature) already
90
+ * exists, keep whichever has higher trust. Ties break toward the more recent
91
+ * correction.
92
+ */
93
+ upsert(correction: Correction): void {
94
+ const key = pairFeatureKey(correction);
95
+ for (let i = 0; i < this.corrections.length; i++) {
96
+ const existing = this.corrections[i]!;
97
+ if (pairFeatureKey(existing) !== key) continue;
98
+
99
+ const newer = correction.timestamp >= existing.timestamp;
100
+ const higherTrust = correction.trust > existing.trust;
101
+ const equalTrustButNewer = correction.trust === existing.trust && newer;
102
+ if (higherTrust || equalTrustButNewer) {
103
+ this.corrections[i] = correction;
104
+ }
105
+ return;
106
+ }
107
+ this.corrections.push(correction);
108
+ }
109
+
110
+ /** Return the effective config (for debugging). */
111
+ getConfig(): MemoryStoreConfig {
112
+ return this.config;
113
+ }
114
+ }