goldenmatch 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (162) hide show
  1. package/README.md +140 -0
  2. package/dist/cli.cjs +6079 -0
  3. package/dist/cli.cjs.map +1 -0
  4. package/dist/cli.d.cts +1 -0
  5. package/dist/cli.d.ts +1 -0
  6. package/dist/cli.js +6076 -0
  7. package/dist/cli.js.map +1 -0
  8. package/dist/core/index.cjs +8449 -0
  9. package/dist/core/index.cjs.map +1 -0
  10. package/dist/core/index.d.cts +1972 -0
  11. package/dist/core/index.d.ts +1972 -0
  12. package/dist/core/index.js +8318 -0
  13. package/dist/core/index.js.map +1 -0
  14. package/dist/index.cjs +8449 -0
  15. package/dist/index.cjs.map +1 -0
  16. package/dist/index.d.cts +2 -0
  17. package/dist/index.d.ts +2 -0
  18. package/dist/index.js +8318 -0
  19. package/dist/index.js.map +1 -0
  20. package/dist/node/backends/score-worker.cjs +934 -0
  21. package/dist/node/backends/score-worker.cjs.map +1 -0
  22. package/dist/node/backends/score-worker.d.cts +14 -0
  23. package/dist/node/backends/score-worker.d.ts +14 -0
  24. package/dist/node/backends/score-worker.js +932 -0
  25. package/dist/node/backends/score-worker.js.map +1 -0
  26. package/dist/node/index.cjs +11430 -0
  27. package/dist/node/index.cjs.map +1 -0
  28. package/dist/node/index.d.cts +554 -0
  29. package/dist/node/index.d.ts +554 -0
  30. package/dist/node/index.js +11277 -0
  31. package/dist/node/index.js.map +1 -0
  32. package/dist/types-DhUdX5Rc.d.cts +304 -0
  33. package/dist/types-DhUdX5Rc.d.ts +304 -0
  34. package/examples/01-basic-dedupe.ts +60 -0
  35. package/examples/02-match-two-datasets.ts +48 -0
  36. package/examples/03-csv-file-pipeline.ts +62 -0
  37. package/examples/04-string-scoring.ts +63 -0
  38. package/examples/05-custom-config.ts +94 -0
  39. package/examples/06-probabilistic-fs.ts +72 -0
  40. package/examples/07-pprl-privacy.ts +76 -0
  41. package/examples/08-streaming.ts +79 -0
  42. package/examples/09-llm-scorer.ts +79 -0
  43. package/examples/10-explain.ts +60 -0
  44. package/examples/11-evaluate.ts +61 -0
  45. package/examples/README.md +53 -0
  46. package/package.json +66 -0
  47. package/src/cli.ts +372 -0
  48. package/src/core/ann-blocker.ts +593 -0
  49. package/src/core/api.ts +220 -0
  50. package/src/core/autoconfig.ts +363 -0
  51. package/src/core/autofix.ts +102 -0
  52. package/src/core/blocker.ts +655 -0
  53. package/src/core/cluster.ts +699 -0
  54. package/src/core/compare-clusters.ts +176 -0
  55. package/src/core/config/loader.ts +869 -0
  56. package/src/core/cross-encoder.ts +614 -0
  57. package/src/core/data.ts +430 -0
  58. package/src/core/domain.ts +277 -0
  59. package/src/core/embedder.ts +562 -0
  60. package/src/core/evaluate.ts +156 -0
  61. package/src/core/explain.ts +352 -0
  62. package/src/core/golden.ts +524 -0
  63. package/src/core/graph-er.ts +371 -0
  64. package/src/core/index.ts +314 -0
  65. package/src/core/ingest.ts +112 -0
  66. package/src/core/learned-blocking.ts +305 -0
  67. package/src/core/lineage.ts +221 -0
  68. package/src/core/llm/budget.ts +258 -0
  69. package/src/core/llm/cluster.ts +542 -0
  70. package/src/core/llm/scorer.ts +396 -0
  71. package/src/core/match-one.ts +95 -0
  72. package/src/core/matchkey.ts +97 -0
  73. package/src/core/memory/corrections.ts +179 -0
  74. package/src/core/memory/learner.ts +218 -0
  75. package/src/core/memory/store.ts +114 -0
  76. package/src/core/pipeline.ts +366 -0
  77. package/src/core/pprl/protocol.ts +216 -0
  78. package/src/core/probabilistic.ts +511 -0
  79. package/src/core/profiler.ts +212 -0
  80. package/src/core/quality.ts +197 -0
  81. package/src/core/review-queue.ts +177 -0
  82. package/src/core/scorer.ts +855 -0
  83. package/src/core/sensitivity.ts +196 -0
  84. package/src/core/standardize.ts +279 -0
  85. package/src/core/streaming.ts +128 -0
  86. package/src/core/transforms.ts +599 -0
  87. package/src/core/types.ts +570 -0
  88. package/src/core/validate.ts +243 -0
  89. package/src/index.ts +8 -0
  90. package/src/node/a2a/server.ts +470 -0
  91. package/src/node/api/server.ts +412 -0
  92. package/src/node/backends/duckdb.ts +130 -0
  93. package/src/node/backends/score-worker.ts +41 -0
  94. package/src/node/backends/workers.ts +212 -0
  95. package/src/node/config-file.ts +66 -0
  96. package/src/node/connectors/base.ts +57 -0
  97. package/src/node/connectors/bigquery.ts +61 -0
  98. package/src/node/connectors/databricks.ts +69 -0
  99. package/src/node/connectors/file.ts +350 -0
  100. package/src/node/connectors/hubspot.ts +62 -0
  101. package/src/node/connectors/index.ts +43 -0
  102. package/src/node/connectors/salesforce.ts +93 -0
  103. package/src/node/connectors/snowflake.ts +73 -0
  104. package/src/node/db/postgres.ts +173 -0
  105. package/src/node/db/sync.ts +103 -0
  106. package/src/node/dedupe-file.ts +156 -0
  107. package/src/node/index.ts +89 -0
  108. package/src/node/mcp/server.ts +940 -0
  109. package/src/node/tui/app.ts +756 -0
  110. package/src/node/tui/index.ts +6 -0
  111. package/src/node/tui/widgets.ts +128 -0
  112. package/tests/parity/scorer-ground-truth.test.ts +118 -0
  113. package/tests/smoke.test.ts +46 -0
  114. package/tests/unit/a2a-server.test.ts +175 -0
  115. package/tests/unit/ann-blocker.test.ts +117 -0
  116. package/tests/unit/api-server.test.ts +239 -0
  117. package/tests/unit/api.test.ts +77 -0
  118. package/tests/unit/autoconfig.test.ts +103 -0
  119. package/tests/unit/autofix.test.ts +71 -0
  120. package/tests/unit/blocker.test.ts +164 -0
  121. package/tests/unit/buildBlocksAsync.test.ts +63 -0
  122. package/tests/unit/cluster.test.ts +213 -0
  123. package/tests/unit/compare-clusters.test.ts +42 -0
  124. package/tests/unit/config-loader.test.ts +301 -0
  125. package/tests/unit/connectors-base.test.ts +48 -0
  126. package/tests/unit/cross-encoder-model.test.ts +198 -0
  127. package/tests/unit/cross-encoder.test.ts +173 -0
  128. package/tests/unit/db-connectors.test.ts +37 -0
  129. package/tests/unit/domain.test.ts +80 -0
  130. package/tests/unit/embedder.test.ts +151 -0
  131. package/tests/unit/evaluate.test.ts +85 -0
  132. package/tests/unit/explain.test.ts +73 -0
  133. package/tests/unit/golden.test.ts +97 -0
  134. package/tests/unit/graph-er.test.ts +173 -0
  135. package/tests/unit/hnsw-ann.test.ts +283 -0
  136. package/tests/unit/hubspot-connector.test.ts +118 -0
  137. package/tests/unit/ingest.test.ts +97 -0
  138. package/tests/unit/learned-blocking.test.ts +134 -0
  139. package/tests/unit/lineage.test.ts +135 -0
  140. package/tests/unit/match-one.test.ts +129 -0
  141. package/tests/unit/matchkey.test.ts +97 -0
  142. package/tests/unit/mcp-server.test.ts +183 -0
  143. package/tests/unit/memory.test.ts +119 -0
  144. package/tests/unit/pipeline.test.ts +118 -0
  145. package/tests/unit/pprl-protocol.test.ts +381 -0
  146. package/tests/unit/probabilistic.test.ts +494 -0
  147. package/tests/unit/profiler.test.ts +68 -0
  148. package/tests/unit/review-queue.test.ts +68 -0
  149. package/tests/unit/salesforce-connector.test.ts +148 -0
  150. package/tests/unit/scorer.test.ts +301 -0
  151. package/tests/unit/sensitivity.test.ts +154 -0
  152. package/tests/unit/standardize.test.ts +84 -0
  153. package/tests/unit/streaming.test.ts +82 -0
  154. package/tests/unit/transforms.test.ts +208 -0
  155. package/tests/unit/tui-widgets.test.ts +42 -0
  156. package/tests/unit/tui.test.ts +24 -0
  157. package/tests/unit/validate.test.ts +145 -0
  158. package/tests/unit/workers-parallel.test.ts +99 -0
  159. package/tests/unit/workers.test.ts +74 -0
  160. package/tsconfig.json +25 -0
  161. package/tsup.config.ts +37 -0
  162. package/vitest.config.ts +11 -0
@@ -0,0 +1,756 @@
1
+ /**
2
+ * app.ts -- GoldenMatch interactive TUI built on `ink` (React for CLIs).
3
+ *
4
+ * This module loads `ink` and `react` lazily via `createRequire` so the rest
5
+ * of the package stays usable without those optional peer dependencies.
6
+ *
7
+ * The UI mirrors the Python Textual TUI: 6 tabs (Data, Config, Matches,
8
+ * Golden, Boost, Export) with keyboard navigation [1..6], [Tab] to cycle,
9
+ * [r] to run dedupe, [q] / [Esc] to quit.
10
+ *
11
+ * Richer ink-ecosystem addons (ink-table, ink-select-input, ink-text-input,
12
+ * ink-spinner, ink-gradient) are optional peer deps loaded lazily via
13
+ * ./widgets.js. Each tab degrades gracefully to plain text when an addon is
14
+ * not installed.
15
+ *
16
+ * Implementation notes:
17
+ * - Uses React.createElement directly (no JSX) so we don't need a JSX
18
+ * transform in the existing tsup build.
19
+ * - The `ink` / `react` modules are typed as `any` at the boundary because
20
+ * they're optional peer deps; we don't want to require `@types/react`
21
+ * just to satisfy strict typecheck.
22
+ */
23
+
24
+ import { createRequire } from "node:module";
25
+ import type { Row, GoldenMatchConfig, DedupeResult } from "../../core/types.js";
26
+ import { loadAddons, type LoadedAddons } from "./widgets.js";
27
+
28
+ const require = createRequire(import.meta.url);
29
+
30
+ // ---------------------------------------------------------------------------
31
+ // Optional peer dependency loaders
32
+ // ---------------------------------------------------------------------------
33
+
34
+ /* eslint-disable @typescript-eslint/no-explicit-any */
35
+
36
+ function loadInk(): any {
37
+ try {
38
+ return require("ink");
39
+ } catch {
40
+ throw new Error(
41
+ "'ink' and 'react' are required for the TUI. Install with: npm install ink react",
42
+ );
43
+ }
44
+ }
45
+
46
+ function loadReact(): any {
47
+ try {
48
+ return require("react");
49
+ } catch {
50
+ throw new Error(
51
+ "'react' is required for the TUI. Install with: npm install react",
52
+ );
53
+ }
54
+ }
55
+
56
+ // ---------------------------------------------------------------------------
57
+ // Public API
58
+ // ---------------------------------------------------------------------------
59
+
60
+ export interface TuiOptions {
61
+ readonly files?: readonly string[];
62
+ readonly config?: GoldenMatchConfig;
63
+ }
64
+
65
+ /**
66
+ * Launch the GoldenMatch TUI. Resolves once the user quits.
67
+ */
68
+ export async function startTui(options: TuiOptions = {}): Promise<void> {
69
+ const ink = loadInk();
70
+ const React = loadReact();
71
+ const h = React.createElement;
72
+
73
+ // Load optional ink-ecosystem addons (each may be null).
74
+ const addons: LoadedAddons = await loadAddons();
75
+
76
+ // -------------------------------------------------------------------------
77
+ // File loader (lazy import so the TUI module stays light)
78
+ // -------------------------------------------------------------------------
79
+
80
+ const loadFiles = async (files: readonly string[]): Promise<Row[]> => {
81
+ const { readFile } = await import("../connectors/file.js");
82
+ const all: Row[] = [];
83
+ for (let i = 0; i < files.length; i++) {
84
+ const f = files[i]!;
85
+ const fileRows = readFile(f);
86
+ for (const r of fileRows) {
87
+ all.push({ ...r, __source__: `file_${i}` });
88
+ }
89
+ }
90
+ return all;
91
+ };
92
+
93
+ // -------------------------------------------------------------------------
94
+ // Tab components
95
+ // -------------------------------------------------------------------------
96
+
97
+ const MAX_TABLE_COLS = 5;
98
+ const MAX_TABLE_ROWS = 10;
99
+
100
+ const visibleCols = (row: Row): string[] =>
101
+ Object.keys(row).filter((c) => !c.startsWith("__"));
102
+
103
+ const DataTab = (props: { rows: readonly Row[] }) => {
104
+ const { rows } = props;
105
+ if (rows.length === 0) {
106
+ return h(
107
+ ink.Text,
108
+ { dimColor: true },
109
+ "No data loaded. Pass files as CLI args.",
110
+ );
111
+ }
112
+ const cols = visibleCols(rows[0]!).slice(0, MAX_TABLE_COLS);
113
+
114
+ if (addons.Table) {
115
+ const display = rows.slice(0, MAX_TABLE_ROWS).map((r) => {
116
+ const d: Record<string, string> = {};
117
+ for (const c of cols) {
118
+ const v = (r as Record<string, unknown>)[c];
119
+ d[c] = v === undefined || v === null ? "" : String(v);
120
+ }
121
+ return d;
122
+ });
123
+ return h(
124
+ ink.Box,
125
+ { flexDirection: "column" },
126
+ h(
127
+ ink.Text,
128
+ {},
129
+ `${rows.length} rows, showing first ${Math.min(
130
+ MAX_TABLE_ROWS,
131
+ rows.length,
132
+ )} (cols: ${cols.join(", ")})`,
133
+ ),
134
+ h(addons.Table, { data: display }),
135
+ );
136
+ }
137
+
138
+ return h(
139
+ ink.Box,
140
+ { flexDirection: "column" },
141
+ h(ink.Text, {}, `${rows.length} rows loaded`),
142
+ h(
143
+ ink.Text,
144
+ { dimColor: true },
145
+ "Columns: " + (cols.length > 0 ? cols.join(", ") : "-"),
146
+ ),
147
+ ...rows.slice(0, MAX_TABLE_ROWS).map((row, i) =>
148
+ h(
149
+ ink.Text,
150
+ { key: `row-${i}`, dimColor: true },
151
+ cols
152
+ .map((c) => {
153
+ const v = (row as Record<string, unknown>)[c];
154
+ return `${c}=${v ?? ""}`;
155
+ })
156
+ .join(" | "),
157
+ ),
158
+ ),
159
+ );
160
+ };
161
+
162
+ const ConfigTab = (props: { config: GoldenMatchConfig | null }) => {
163
+ const { config } = props;
164
+ const mks = config?.matchkeys ?? config?.matchSettings ?? [];
165
+ const blockingDesc = config?.blocking?.strategy ?? "-";
166
+ const blockingKeys =
167
+ config?.blocking?.keys?.map((k) => k.fields.join("+")).join(", ") ?? "-";
168
+
169
+ // Local UI state for interactive config editing. Hooks must be at the
170
+ // top level of the component — we always declare them and only use the
171
+ // interactive branch when SelectInput/TextInput are available.
172
+ const [selectedMk, setSelectedMk] = React.useState(null) as [
173
+ number | null,
174
+ (v: number | null) => void,
175
+ ];
176
+ const [thresholdDraft, setThresholdDraft] = React.useState("") as [
177
+ string,
178
+ (v: string) => void,
179
+ ];
180
+
181
+ const header = h(
182
+ ink.Box,
183
+ { flexDirection: "column" },
184
+ h(ink.Text, { bold: true }, "Config"),
185
+ h(ink.Text, {}, `Matchkeys: ${mks.length}`),
186
+ h(ink.Text, {}, `Blocking: ${blockingDesc}, keys: ${blockingKeys}`),
187
+ );
188
+
189
+ if (mks.length === 0) {
190
+ return header;
191
+ }
192
+
193
+ const mkThreshold = (mk: { readonly type: string }): string => {
194
+ if (mk.type === "exact") return "-";
195
+ const t = (mk as { threshold?: number }).threshold;
196
+ return t === undefined ? "-" : String(t);
197
+ };
198
+
199
+ if (addons.SelectInput && selectedMk === null) {
200
+ const items = mks.map((mk, i) => ({
201
+ label: `${mk.name} (${mk.type}) threshold=${mkThreshold(mk)}`,
202
+ value: String(i),
203
+ }));
204
+ return h(
205
+ ink.Box,
206
+ { flexDirection: "column" },
207
+ header,
208
+ h(ink.Text, { dimColor: true }, "Select a matchkey to inspect:"),
209
+ h(addons.SelectInput, {
210
+ items,
211
+ onSelect: (item: { value: string }) => {
212
+ const idx = Number(item.value);
213
+ setSelectedMk(idx);
214
+ const picked = mks[idx];
215
+ const thr =
216
+ picked && picked.type !== "exact"
217
+ ? ((picked as { threshold?: number }).threshold ?? "")
218
+ : "";
219
+ setThresholdDraft(String(thr));
220
+ },
221
+ }),
222
+ );
223
+ }
224
+
225
+ if (addons.SelectInput && selectedMk !== null) {
226
+ const mk = mks[selectedMk];
227
+ if (!mk) {
228
+ setSelectedMk(null);
229
+ return header;
230
+ }
231
+ const fields = mk.fields.map((f) => f.field).join(", ");
232
+ return h(
233
+ ink.Box,
234
+ { flexDirection: "column" },
235
+ header,
236
+ h(ink.Text, { bold: true }, `Editing matchkey: ${mk.name}`),
237
+ h(ink.Text, {}, ` type: ${mk.type}`),
238
+ h(ink.Text, {}, ` fields: ${fields}`),
239
+ addons.TextInput
240
+ ? h(
241
+ ink.Box,
242
+ {},
243
+ h(ink.Text, {}, " threshold: "),
244
+ h(addons.TextInput, {
245
+ value: thresholdDraft,
246
+ onChange: setThresholdDraft,
247
+ onSubmit: (value: string) => {
248
+ const n = Number(value);
249
+ if (!Number.isNaN(n)) {
250
+ (mk as { threshold?: number }).threshold = n;
251
+ }
252
+ setSelectedMk(null);
253
+ },
254
+ }),
255
+ )
256
+ : h(
257
+ ink.Text,
258
+ { dimColor: true },
259
+ ` threshold: ${mkThreshold(mk)} (install ink-text-input to edit)`,
260
+ ),
261
+ h(ink.Text, { dimColor: true }, "[Enter] save [Esc] back"),
262
+ );
263
+ }
264
+
265
+ // Fallback: plain listing
266
+ return h(
267
+ ink.Box,
268
+ { flexDirection: "column" },
269
+ header,
270
+ ...mks.map((mk, i) =>
271
+ h(
272
+ ink.Text,
273
+ { key: `mk-${i}`, dimColor: true },
274
+ ` - ${mk.name} (${mk.type}), threshold=${mkThreshold(mk)}, fields: ${mk.fields
275
+ .map((f) => f.field)
276
+ .join(", ")}`,
277
+ ),
278
+ ),
279
+ );
280
+ };
281
+
282
+ const MatchesTab = (props: { result: DedupeResult | null }) => {
283
+ const { result } = props;
284
+ const [selectedPair, setSelectedPair] = React.useState(null) as [
285
+ number | null,
286
+ (v: number | null) => void,
287
+ ];
288
+
289
+ if (!result) {
290
+ return h(
291
+ ink.Text,
292
+ { dimColor: true },
293
+ "No results yet. Press 'r' to run dedupe.",
294
+ );
295
+ }
296
+ const pairs = result.scoredPairs.slice(0, MAX_TABLE_ROWS);
297
+ if (pairs.length === 0) {
298
+ return h(ink.Text, {}, "No scored pairs");
299
+ }
300
+
301
+ // Drill-in view
302
+ if (addons.SelectInput && selectedPair !== null) {
303
+ const p = pairs[selectedPair];
304
+ if (!p) {
305
+ setSelectedPair(null);
306
+ return h(ink.Text, {}, "");
307
+ }
308
+ return h(
309
+ ink.Box,
310
+ { flexDirection: "column" },
311
+ h(ink.Text, { bold: true }, `Pair detail ${selectedPair + 1}/${pairs.length}`),
312
+ h(ink.Text, {}, ` idA: ${p.idA}`),
313
+ h(ink.Text, {}, ` idB: ${p.idB}`),
314
+ h(ink.Text, {}, ` score: ${p.score.toFixed(4)}`),
315
+ h(ink.Text, { dimColor: true }, "(select another pair from list)"),
316
+ h(addons.SelectInput, {
317
+ items: pairs.map((pp, i) => ({
318
+ label: `${pp.idA} <-> ${pp.idB} (${pp.score.toFixed(3)})`,
319
+ value: String(i),
320
+ })),
321
+ onSelect: (item: { value: string }) =>
322
+ setSelectedPair(Number(item.value)),
323
+ }),
324
+ );
325
+ }
326
+
327
+ if (addons.Table) {
328
+ const data = pairs.map((p) => ({
329
+ idA: String(p.idA),
330
+ idB: String(p.idB),
331
+ score: p.score.toFixed(4),
332
+ }));
333
+ return h(
334
+ ink.Box,
335
+ { flexDirection: "column" },
336
+ h(
337
+ ink.Text,
338
+ { bold: true },
339
+ `Scored pairs: ${result.scoredPairs.length} (showing first ${pairs.length})`,
340
+ ),
341
+ h(addons.Table, { data }),
342
+ addons.SelectInput
343
+ ? h(addons.SelectInput, {
344
+ items: pairs.map((p, i) => ({
345
+ label: `Inspect ${p.idA} <-> ${p.idB}`,
346
+ value: String(i),
347
+ })),
348
+ onSelect: (item: { value: string }) =>
349
+ setSelectedPair(Number(item.value)),
350
+ })
351
+ : h(
352
+ ink.Text,
353
+ { dimColor: true },
354
+ "(install ink-select-input for drill-in)",
355
+ ),
356
+ );
357
+ }
358
+
359
+ return h(
360
+ ink.Box,
361
+ { flexDirection: "column" },
362
+ h(
363
+ ink.Text,
364
+ { bold: true },
365
+ `Scored pairs: ${result.scoredPairs.length}`,
366
+ ),
367
+ ...pairs.map((p, i) =>
368
+ h(
369
+ ink.Text,
370
+ { key: `pair-${i}` },
371
+ ` ${p.idA} <-> ${p.idB}: ${p.score.toFixed(3)}`,
372
+ ),
373
+ ),
374
+ );
375
+ };
376
+
377
+ const GoldenTab = (props: { result: DedupeResult | null }) => {
378
+ const { result } = props;
379
+ if (!result) return h(ink.Text, { dimColor: true }, "No results yet.");
380
+ const records = result.goldenRecords.slice(0, MAX_TABLE_ROWS);
381
+
382
+ if (records.length === 0) {
383
+ return h(
384
+ ink.Text,
385
+ { bold: true },
386
+ `Golden records: ${result.goldenRecords.length}`,
387
+ );
388
+ }
389
+
390
+ if (addons.Table) {
391
+ const cols = visibleCols(records[0]!).slice(0, MAX_TABLE_COLS);
392
+ const data = records.map((r) => {
393
+ const d: Record<string, string> = {};
394
+ for (const c of cols) {
395
+ const v = (r as Record<string, unknown>)[c];
396
+ d[c] = v === undefined || v === null ? "" : String(v);
397
+ }
398
+ return d;
399
+ });
400
+ return h(
401
+ ink.Box,
402
+ { flexDirection: "column" },
403
+ h(
404
+ ink.Text,
405
+ { bold: true },
406
+ `Golden records: ${result.goldenRecords.length}`,
407
+ ),
408
+ h(addons.Table, { data }),
409
+ );
410
+ }
411
+
412
+ return h(
413
+ ink.Box,
414
+ { flexDirection: "column" },
415
+ h(
416
+ ink.Text,
417
+ { bold: true },
418
+ `Golden records: ${result.goldenRecords.length}`,
419
+ ),
420
+ ...records.map((r, i) =>
421
+ h(
422
+ ink.Text,
423
+ { key: `g-${i}`, dimColor: true },
424
+ JSON.stringify(r).slice(0, 100),
425
+ ),
426
+ ),
427
+ );
428
+ };
429
+
430
+ const BoostTab = (props: { result: DedupeResult | null }) => {
431
+ const { result } = props;
432
+ const [idx, setIdx] = React.useState(0) as [
433
+ number,
434
+ (v: number | ((prev: number) => number)) => void,
435
+ ];
436
+ const [labels, setLabels] = React.useState({}) as [
437
+ Record<number, string>,
438
+ (v: Record<number, string>) => void,
439
+ ];
440
+
441
+ if (!result) {
442
+ return h(
443
+ ink.Text,
444
+ { dimColor: true },
445
+ "No results yet. Press 'r' to run dedupe.",
446
+ );
447
+ }
448
+
449
+ const borderline = result.scoredPairs
450
+ .filter((p) => p.score >= 0.7 && p.score < 0.9)
451
+ .slice(0, 20);
452
+
453
+ if (borderline.length === 0) {
454
+ return h(
455
+ ink.Box,
456
+ { flexDirection: "column" },
457
+ h(ink.Text, { bold: true }, "Boost - active learning"),
458
+ h(ink.Text, {}, "No borderline pairs (0.7-0.9 score) to label."),
459
+ );
460
+ }
461
+
462
+ if (idx >= borderline.length) {
463
+ const counts = Object.values(labels).reduce(
464
+ (acc, v) => {
465
+ acc[v] = (acc[v] ?? 0) + 1;
466
+ return acc;
467
+ },
468
+ {} as Record<string, number>,
469
+ );
470
+ return h(
471
+ ink.Box,
472
+ { flexDirection: "column" },
473
+ h(ink.Text, { color: "green", bold: true }, "All pairs labeled!"),
474
+ h(
475
+ ink.Text,
476
+ {},
477
+ `y=${counts["y"] ?? 0} n=${counts["n"] ?? 0} s=${counts["s"] ?? 0}`,
478
+ ),
479
+ );
480
+ }
481
+
482
+ const pair = borderline[idx]!;
483
+
484
+ if (addons.SelectInput) {
485
+ return h(
486
+ ink.Box,
487
+ { flexDirection: "column" },
488
+ h(
489
+ ink.Text,
490
+ { bold: true },
491
+ `Pair ${idx + 1}/${borderline.length} - Score: ${pair.score.toFixed(3)}`,
492
+ ),
493
+ h(ink.Text, {}, ` Record ${pair.idA}`),
494
+ h(ink.Text, {}, ` Record ${pair.idB}`),
495
+ h(addons.SelectInput, {
496
+ items: [
497
+ { label: "Yes, this is a match", value: "y" },
498
+ { label: "No, different entities", value: "n" },
499
+ { label: "Skip", value: "s" },
500
+ ],
501
+ onSelect: (item: { value: string }) => {
502
+ setLabels({ ...labels, [idx]: item.value });
503
+ setIdx((prev) => prev + 1);
504
+ },
505
+ }),
506
+ );
507
+ }
508
+
509
+ return h(
510
+ ink.Box,
511
+ { flexDirection: "column" },
512
+ h(ink.Text, { bold: true }, "Boost - active learning"),
513
+ h(ink.Text, { dimColor: true }, "Label borderline pairs: y/n/s (skip)"),
514
+ h(
515
+ ink.Text,
516
+ {},
517
+ `Pair ${idx + 1}/${borderline.length}: ${pair.idA} <-> ${pair.idB} (${pair.score.toFixed(3)})`,
518
+ ),
519
+ h(
520
+ ink.Text,
521
+ { dimColor: true },
522
+ "Install ink-select-input for interactive labeling",
523
+ ),
524
+ );
525
+ };
526
+
527
+ const ExportTab = (props: {
528
+ result: DedupeResult | null;
529
+ setStatus: (s: string) => void;
530
+ }) => {
531
+ const { result, setStatus } = props;
532
+ const [exporting, setExporting] = React.useState(false) as [
533
+ boolean,
534
+ (v: boolean) => void,
535
+ ];
536
+ const [done, setDone] = React.useState(null) as [
537
+ string | null,
538
+ (v: string | null) => void,
539
+ ];
540
+
541
+ if (!result) {
542
+ return h(ink.Text, { dimColor: true }, "No results yet.");
543
+ }
544
+
545
+ const doExport = (format: string) => {
546
+ setExporting(true);
547
+ setDone(null);
548
+ setStatus(`Exporting as ${format}...`);
549
+ // Simulate async write. Real impl would dispatch to a writer.
550
+ setTimeout(() => {
551
+ setExporting(false);
552
+ setDone(format);
553
+ setStatus(`Export complete (${format})`);
554
+ }, 400);
555
+ };
556
+
557
+ if (exporting) {
558
+ return h(
559
+ ink.Box,
560
+ { flexDirection: "column" },
561
+ h(ink.Text, { bold: true }, "Export"),
562
+ addons.Spinner
563
+ ? h(
564
+ ink.Box,
565
+ {},
566
+ h(addons.Spinner, { type: "dots" }),
567
+ h(ink.Text, {}, " writing..."),
568
+ )
569
+ : h(ink.Text, {}, "writing..."),
570
+ );
571
+ }
572
+
573
+ const summary = h(
574
+ ink.Text,
575
+ {},
576
+ `Ready: ${result.goldenRecords.length} golden, ${result.dupes.length} dupes, ${result.unique.length} unique`,
577
+ );
578
+
579
+ if (addons.SelectInput) {
580
+ return h(
581
+ ink.Box,
582
+ { flexDirection: "column" },
583
+ h(ink.Text, { bold: true }, "Export"),
584
+ summary,
585
+ done
586
+ ? h(
587
+ ink.Text,
588
+ { color: "green" },
589
+ `Last export: ${done}. Choose another format to export again.`,
590
+ )
591
+ : h(ink.Text, { dimColor: true }, "Choose output format:"),
592
+ h(addons.SelectInput, {
593
+ items: [
594
+ { label: "CSV", value: "csv" },
595
+ { label: "JSON", value: "json" },
596
+ ],
597
+ onSelect: (item: { value: string }) => doExport(item.value),
598
+ }),
599
+ );
600
+ }
601
+
602
+ return h(
603
+ ink.Box,
604
+ { flexDirection: "column" },
605
+ h(ink.Text, { bold: true }, "Export"),
606
+ h(
607
+ ink.Text,
608
+ { dimColor: true },
609
+ "Press [g] for golden, [d] for dupes, [u] for unique",
610
+ ),
611
+ summary,
612
+ );
613
+ };
614
+
615
+ // -------------------------------------------------------------------------
616
+ // Top-level App
617
+ // -------------------------------------------------------------------------
618
+
619
+ const App = (props: { options: TuiOptions }) => {
620
+ const [tab, setTab] = React.useState(0) as [
621
+ number,
622
+ (v: number | ((prev: number) => number)) => void,
623
+ ];
624
+ const [rows, setRows] = React.useState([]) as [
625
+ readonly Row[],
626
+ (v: readonly Row[]) => void,
627
+ ];
628
+ const [result, setResult] = React.useState(null) as [
629
+ DedupeResult | null,
630
+ (v: DedupeResult | null) => void,
631
+ ];
632
+ const [config] = React.useState(props.options.config ?? null) as [
633
+ GoldenMatchConfig | null,
634
+ (v: GoldenMatchConfig | null) => void,
635
+ ];
636
+ const [status, setStatus] = React.useState("Ready") as [
637
+ string,
638
+ (v: string) => void,
639
+ ];
640
+
641
+ const { exit } = ink.useApp();
642
+
643
+ const runDedupe = React.useCallback(async () => {
644
+ if (rows.length === 0) {
645
+ setStatus("No rows loaded");
646
+ return;
647
+ }
648
+ setStatus("Running dedupe...");
649
+ try {
650
+ const { dedupe } = await import("../../core/api.js");
651
+ const r = dedupe(rows, config ? { config } : {});
652
+ setResult(r);
653
+ setStatus(`Complete: ${r.stats.totalClusters} clusters`);
654
+ } catch (err) {
655
+ const msg = err instanceof Error ? err.message : String(err);
656
+ setStatus(`Error: ${msg}`);
657
+ }
658
+ }, [rows, config]);
659
+
660
+ ink.useInput((input: string, key: any) => {
661
+ if (key.escape || input === "q") {
662
+ exit();
663
+ return;
664
+ }
665
+ if (input === "1") setTab(0);
666
+ else if (input === "2") setTab(1);
667
+ else if (input === "3") setTab(2);
668
+ else if (input === "4") setTab(3);
669
+ else if (input === "5") setTab(4);
670
+ else if (input === "6") setTab(5);
671
+ else if (key.tab) setTab((t: number) => (t + 1) % 6);
672
+ else if (input === "r") {
673
+ void runDedupe();
674
+ }
675
+ });
676
+
677
+ React.useEffect(() => {
678
+ const files = props.options.files;
679
+ if (files && files.length > 0) {
680
+ loadFiles(files)
681
+ .then((rs) => {
682
+ setRows(rs);
683
+ setStatus(`Loaded ${rs.length} rows from ${files.length} file(s)`);
684
+ })
685
+ .catch((err: unknown) => {
686
+ const msg = err instanceof Error ? err.message : String(err);
687
+ setStatus(`Error: ${msg}`);
688
+ });
689
+ }
690
+ // eslint-disable-next-line react-hooks/exhaustive-deps
691
+ }, []);
692
+
693
+ const tabs = ["Data", "Config", "Matches", "Golden", "Boost", "Export"];
694
+
695
+ let body: any = null;
696
+ if (tab === 0) body = h(DataTab, { rows });
697
+ else if (tab === 1) body = h(ConfigTab, { config });
698
+ else if (tab === 2) body = h(MatchesTab, { result });
699
+ else if (tab === 3) body = h(GoldenTab, { result });
700
+ else if (tab === 4) body = h(BoostTab, { result });
701
+ else if (tab === 5) body = h(ExportTab, { result, setStatus });
702
+
703
+ const titleText = "GoldenMatch TUI - v0.1.0";
704
+ const title = addons.Gradient
705
+ ? h(
706
+ addons.Gradient,
707
+ { name: "rainbow" },
708
+ h(ink.Text, { bold: true }, titleText),
709
+ )
710
+ : h(ink.Text, { bold: true, color: "cyan" }, titleText);
711
+
712
+ return h(
713
+ ink.Box,
714
+ { flexDirection: "column", padding: 1 },
715
+ // Header
716
+ h(ink.Box, { borderStyle: "double", paddingX: 1 }, title),
717
+ // Tab bar
718
+ h(
719
+ ink.Box,
720
+ { marginTop: 1 },
721
+ ...tabs.map((name: string, i: number) =>
722
+ h(
723
+ ink.Box,
724
+ { key: `tab-${i}`, marginRight: 2 },
725
+ h(
726
+ ink.Text,
727
+ { color: tab === i ? "green" : "gray", bold: tab === i },
728
+ `[${i + 1}] ${name}`,
729
+ ),
730
+ ),
731
+ ),
732
+ ),
733
+ // Tab content
734
+ h(
735
+ ink.Box,
736
+ { marginTop: 1, flexDirection: "column", minHeight: 10 },
737
+ body,
738
+ ),
739
+ // Footer
740
+ h(
741
+ ink.Box,
742
+ { marginTop: 1, borderStyle: "single", paddingX: 1 },
743
+ h(
744
+ ink.Text,
745
+ { dimColor: true },
746
+ `[q]uit [1-6] tabs [Tab] cycle [r]un dedupe * ${status}`,
747
+ ),
748
+ ),
749
+ );
750
+ };
751
+
752
+ const { waitUntilExit } = ink.render(h(App, { options }));
753
+ await waitUntilExit();
754
+ }
755
+
756
+ /* eslint-enable @typescript-eslint/no-explicit-any */