goldenmatch 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +140 -0
- package/dist/cli.cjs +6079 -0
- package/dist/cli.cjs.map +1 -0
- package/dist/cli.d.cts +1 -0
- package/dist/cli.d.ts +1 -0
- package/dist/cli.js +6076 -0
- package/dist/cli.js.map +1 -0
- package/dist/core/index.cjs +8449 -0
- package/dist/core/index.cjs.map +1 -0
- package/dist/core/index.d.cts +1972 -0
- package/dist/core/index.d.ts +1972 -0
- package/dist/core/index.js +8318 -0
- package/dist/core/index.js.map +1 -0
- package/dist/index.cjs +8449 -0
- package/dist/index.cjs.map +1 -0
- package/dist/index.d.cts +2 -0
- package/dist/index.d.ts +2 -0
- package/dist/index.js +8318 -0
- package/dist/index.js.map +1 -0
- package/dist/node/backends/score-worker.cjs +934 -0
- package/dist/node/backends/score-worker.cjs.map +1 -0
- package/dist/node/backends/score-worker.d.cts +14 -0
- package/dist/node/backends/score-worker.d.ts +14 -0
- package/dist/node/backends/score-worker.js +932 -0
- package/dist/node/backends/score-worker.js.map +1 -0
- package/dist/node/index.cjs +11430 -0
- package/dist/node/index.cjs.map +1 -0
- package/dist/node/index.d.cts +554 -0
- package/dist/node/index.d.ts +554 -0
- package/dist/node/index.js +11277 -0
- package/dist/node/index.js.map +1 -0
- package/dist/types-DhUdX5Rc.d.cts +304 -0
- package/dist/types-DhUdX5Rc.d.ts +304 -0
- package/examples/01-basic-dedupe.ts +60 -0
- package/examples/02-match-two-datasets.ts +48 -0
- package/examples/03-csv-file-pipeline.ts +62 -0
- package/examples/04-string-scoring.ts +63 -0
- package/examples/05-custom-config.ts +94 -0
- package/examples/06-probabilistic-fs.ts +72 -0
- package/examples/07-pprl-privacy.ts +76 -0
- package/examples/08-streaming.ts +79 -0
- package/examples/09-llm-scorer.ts +79 -0
- package/examples/10-explain.ts +60 -0
- package/examples/11-evaluate.ts +61 -0
- package/examples/README.md +53 -0
- package/package.json +66 -0
- package/src/cli.ts +372 -0
- package/src/core/ann-blocker.ts +593 -0
- package/src/core/api.ts +220 -0
- package/src/core/autoconfig.ts +363 -0
- package/src/core/autofix.ts +102 -0
- package/src/core/blocker.ts +655 -0
- package/src/core/cluster.ts +699 -0
- package/src/core/compare-clusters.ts +176 -0
- package/src/core/config/loader.ts +869 -0
- package/src/core/cross-encoder.ts +614 -0
- package/src/core/data.ts +430 -0
- package/src/core/domain.ts +277 -0
- package/src/core/embedder.ts +562 -0
- package/src/core/evaluate.ts +156 -0
- package/src/core/explain.ts +352 -0
- package/src/core/golden.ts +524 -0
- package/src/core/graph-er.ts +371 -0
- package/src/core/index.ts +314 -0
- package/src/core/ingest.ts +112 -0
- package/src/core/learned-blocking.ts +305 -0
- package/src/core/lineage.ts +221 -0
- package/src/core/llm/budget.ts +258 -0
- package/src/core/llm/cluster.ts +542 -0
- package/src/core/llm/scorer.ts +396 -0
- package/src/core/match-one.ts +95 -0
- package/src/core/matchkey.ts +97 -0
- package/src/core/memory/corrections.ts +179 -0
- package/src/core/memory/learner.ts +218 -0
- package/src/core/memory/store.ts +114 -0
- package/src/core/pipeline.ts +366 -0
- package/src/core/pprl/protocol.ts +216 -0
- package/src/core/probabilistic.ts +511 -0
- package/src/core/profiler.ts +212 -0
- package/src/core/quality.ts +197 -0
- package/src/core/review-queue.ts +177 -0
- package/src/core/scorer.ts +855 -0
- package/src/core/sensitivity.ts +196 -0
- package/src/core/standardize.ts +279 -0
- package/src/core/streaming.ts +128 -0
- package/src/core/transforms.ts +599 -0
- package/src/core/types.ts +570 -0
- package/src/core/validate.ts +243 -0
- package/src/index.ts +8 -0
- package/src/node/a2a/server.ts +470 -0
- package/src/node/api/server.ts +412 -0
- package/src/node/backends/duckdb.ts +130 -0
- package/src/node/backends/score-worker.ts +41 -0
- package/src/node/backends/workers.ts +212 -0
- package/src/node/config-file.ts +66 -0
- package/src/node/connectors/base.ts +57 -0
- package/src/node/connectors/bigquery.ts +61 -0
- package/src/node/connectors/databricks.ts +69 -0
- package/src/node/connectors/file.ts +350 -0
- package/src/node/connectors/hubspot.ts +62 -0
- package/src/node/connectors/index.ts +43 -0
- package/src/node/connectors/salesforce.ts +93 -0
- package/src/node/connectors/snowflake.ts +73 -0
- package/src/node/db/postgres.ts +173 -0
- package/src/node/db/sync.ts +103 -0
- package/src/node/dedupe-file.ts +156 -0
- package/src/node/index.ts +89 -0
- package/src/node/mcp/server.ts +940 -0
- package/src/node/tui/app.ts +756 -0
- package/src/node/tui/index.ts +6 -0
- package/src/node/tui/widgets.ts +128 -0
- package/tests/parity/scorer-ground-truth.test.ts +118 -0
- package/tests/smoke.test.ts +46 -0
- package/tests/unit/a2a-server.test.ts +175 -0
- package/tests/unit/ann-blocker.test.ts +117 -0
- package/tests/unit/api-server.test.ts +239 -0
- package/tests/unit/api.test.ts +77 -0
- package/tests/unit/autoconfig.test.ts +103 -0
- package/tests/unit/autofix.test.ts +71 -0
- package/tests/unit/blocker.test.ts +164 -0
- package/tests/unit/buildBlocksAsync.test.ts +63 -0
- package/tests/unit/cluster.test.ts +213 -0
- package/tests/unit/compare-clusters.test.ts +42 -0
- package/tests/unit/config-loader.test.ts +301 -0
- package/tests/unit/connectors-base.test.ts +48 -0
- package/tests/unit/cross-encoder-model.test.ts +198 -0
- package/tests/unit/cross-encoder.test.ts +173 -0
- package/tests/unit/db-connectors.test.ts +37 -0
- package/tests/unit/domain.test.ts +80 -0
- package/tests/unit/embedder.test.ts +151 -0
- package/tests/unit/evaluate.test.ts +85 -0
- package/tests/unit/explain.test.ts +73 -0
- package/tests/unit/golden.test.ts +97 -0
- package/tests/unit/graph-er.test.ts +173 -0
- package/tests/unit/hnsw-ann.test.ts +283 -0
- package/tests/unit/hubspot-connector.test.ts +118 -0
- package/tests/unit/ingest.test.ts +97 -0
- package/tests/unit/learned-blocking.test.ts +134 -0
- package/tests/unit/lineage.test.ts +135 -0
- package/tests/unit/match-one.test.ts +129 -0
- package/tests/unit/matchkey.test.ts +97 -0
- package/tests/unit/mcp-server.test.ts +183 -0
- package/tests/unit/memory.test.ts +119 -0
- package/tests/unit/pipeline.test.ts +118 -0
- package/tests/unit/pprl-protocol.test.ts +381 -0
- package/tests/unit/probabilistic.test.ts +494 -0
- package/tests/unit/profiler.test.ts +68 -0
- package/tests/unit/review-queue.test.ts +68 -0
- package/tests/unit/salesforce-connector.test.ts +148 -0
- package/tests/unit/scorer.test.ts +301 -0
- package/tests/unit/sensitivity.test.ts +154 -0
- package/tests/unit/standardize.test.ts +84 -0
- package/tests/unit/streaming.test.ts +82 -0
- package/tests/unit/transforms.test.ts +208 -0
- package/tests/unit/tui-widgets.test.ts +42 -0
- package/tests/unit/tui.test.ts +24 -0
- package/tests/unit/validate.test.ts +145 -0
- package/tests/unit/workers-parallel.test.ts +99 -0
- package/tests/unit/workers.test.ts +74 -0
- package/tsconfig.json +25 -0
- package/tsup.config.ts +37 -0
- package/vitest.config.ts +11 -0
|
@@ -0,0 +1,756 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* app.ts -- GoldenMatch interactive TUI built on `ink` (React for CLIs).
|
|
3
|
+
*
|
|
4
|
+
* This module loads `ink` and `react` lazily via `createRequire` so the rest
|
|
5
|
+
* of the package stays usable without those optional peer dependencies.
|
|
6
|
+
*
|
|
7
|
+
* The UI mirrors the Python Textual TUI: 6 tabs (Data, Config, Matches,
|
|
8
|
+
* Golden, Boost, Export) with keyboard navigation [1..6], [Tab] to cycle,
|
|
9
|
+
* [r] to run dedupe, [q] / [Esc] to quit.
|
|
10
|
+
*
|
|
11
|
+
* Richer ink-ecosystem addons (ink-table, ink-select-input, ink-text-input,
|
|
12
|
+
* ink-spinner, ink-gradient) are optional peer deps loaded lazily via
|
|
13
|
+
* ./widgets.js. Each tab degrades gracefully to plain text when an addon is
|
|
14
|
+
* not installed.
|
|
15
|
+
*
|
|
16
|
+
* Implementation notes:
|
|
17
|
+
* - Uses React.createElement directly (no JSX) so we don't need a JSX
|
|
18
|
+
* transform in the existing tsup build.
|
|
19
|
+
* - The `ink` / `react` modules are typed as `any` at the boundary because
|
|
20
|
+
* they're optional peer deps; we don't want to require `@types/react`
|
|
21
|
+
* just to satisfy strict typecheck.
|
|
22
|
+
*/
|
|
23
|
+
|
|
24
|
+
import { createRequire } from "node:module";
|
|
25
|
+
import type { Row, GoldenMatchConfig, DedupeResult } from "../../core/types.js";
|
|
26
|
+
import { loadAddons, type LoadedAddons } from "./widgets.js";
|
|
27
|
+
|
|
28
|
+
const require = createRequire(import.meta.url);
|
|
29
|
+
|
|
30
|
+
// ---------------------------------------------------------------------------
|
|
31
|
+
// Optional peer dependency loaders
|
|
32
|
+
// ---------------------------------------------------------------------------
|
|
33
|
+
|
|
34
|
+
/* eslint-disable @typescript-eslint/no-explicit-any */
|
|
35
|
+
|
|
36
|
+
function loadInk(): any {
|
|
37
|
+
try {
|
|
38
|
+
return require("ink");
|
|
39
|
+
} catch {
|
|
40
|
+
throw new Error(
|
|
41
|
+
"'ink' and 'react' are required for the TUI. Install with: npm install ink react",
|
|
42
|
+
);
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
function loadReact(): any {
|
|
47
|
+
try {
|
|
48
|
+
return require("react");
|
|
49
|
+
} catch {
|
|
50
|
+
throw new Error(
|
|
51
|
+
"'react' is required for the TUI. Install with: npm install react",
|
|
52
|
+
);
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
|
|
56
|
+
// ---------------------------------------------------------------------------
|
|
57
|
+
// Public API
|
|
58
|
+
// ---------------------------------------------------------------------------
|
|
59
|
+
|
|
60
|
+
export interface TuiOptions {
|
|
61
|
+
readonly files?: readonly string[];
|
|
62
|
+
readonly config?: GoldenMatchConfig;
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
/**
|
|
66
|
+
* Launch the GoldenMatch TUI. Resolves once the user quits.
|
|
67
|
+
*/
|
|
68
|
+
export async function startTui(options: TuiOptions = {}): Promise<void> {
|
|
69
|
+
const ink = loadInk();
|
|
70
|
+
const React = loadReact();
|
|
71
|
+
const h = React.createElement;
|
|
72
|
+
|
|
73
|
+
// Load optional ink-ecosystem addons (each may be null).
|
|
74
|
+
const addons: LoadedAddons = await loadAddons();
|
|
75
|
+
|
|
76
|
+
// -------------------------------------------------------------------------
|
|
77
|
+
// File loader (lazy import so the TUI module stays light)
|
|
78
|
+
// -------------------------------------------------------------------------
|
|
79
|
+
|
|
80
|
+
const loadFiles = async (files: readonly string[]): Promise<Row[]> => {
|
|
81
|
+
const { readFile } = await import("../connectors/file.js");
|
|
82
|
+
const all: Row[] = [];
|
|
83
|
+
for (let i = 0; i < files.length; i++) {
|
|
84
|
+
const f = files[i]!;
|
|
85
|
+
const fileRows = readFile(f);
|
|
86
|
+
for (const r of fileRows) {
|
|
87
|
+
all.push({ ...r, __source__: `file_${i}` });
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
return all;
|
|
91
|
+
};
|
|
92
|
+
|
|
93
|
+
// -------------------------------------------------------------------------
|
|
94
|
+
// Tab components
|
|
95
|
+
// -------------------------------------------------------------------------
|
|
96
|
+
|
|
97
|
+
const MAX_TABLE_COLS = 5;
|
|
98
|
+
const MAX_TABLE_ROWS = 10;
|
|
99
|
+
|
|
100
|
+
const visibleCols = (row: Row): string[] =>
|
|
101
|
+
Object.keys(row).filter((c) => !c.startsWith("__"));
|
|
102
|
+
|
|
103
|
+
const DataTab = (props: { rows: readonly Row[] }) => {
|
|
104
|
+
const { rows } = props;
|
|
105
|
+
if (rows.length === 0) {
|
|
106
|
+
return h(
|
|
107
|
+
ink.Text,
|
|
108
|
+
{ dimColor: true },
|
|
109
|
+
"No data loaded. Pass files as CLI args.",
|
|
110
|
+
);
|
|
111
|
+
}
|
|
112
|
+
const cols = visibleCols(rows[0]!).slice(0, MAX_TABLE_COLS);
|
|
113
|
+
|
|
114
|
+
if (addons.Table) {
|
|
115
|
+
const display = rows.slice(0, MAX_TABLE_ROWS).map((r) => {
|
|
116
|
+
const d: Record<string, string> = {};
|
|
117
|
+
for (const c of cols) {
|
|
118
|
+
const v = (r as Record<string, unknown>)[c];
|
|
119
|
+
d[c] = v === undefined || v === null ? "" : String(v);
|
|
120
|
+
}
|
|
121
|
+
return d;
|
|
122
|
+
});
|
|
123
|
+
return h(
|
|
124
|
+
ink.Box,
|
|
125
|
+
{ flexDirection: "column" },
|
|
126
|
+
h(
|
|
127
|
+
ink.Text,
|
|
128
|
+
{},
|
|
129
|
+
`${rows.length} rows, showing first ${Math.min(
|
|
130
|
+
MAX_TABLE_ROWS,
|
|
131
|
+
rows.length,
|
|
132
|
+
)} (cols: ${cols.join(", ")})`,
|
|
133
|
+
),
|
|
134
|
+
h(addons.Table, { data: display }),
|
|
135
|
+
);
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
return h(
|
|
139
|
+
ink.Box,
|
|
140
|
+
{ flexDirection: "column" },
|
|
141
|
+
h(ink.Text, {}, `${rows.length} rows loaded`),
|
|
142
|
+
h(
|
|
143
|
+
ink.Text,
|
|
144
|
+
{ dimColor: true },
|
|
145
|
+
"Columns: " + (cols.length > 0 ? cols.join(", ") : "-"),
|
|
146
|
+
),
|
|
147
|
+
...rows.slice(0, MAX_TABLE_ROWS).map((row, i) =>
|
|
148
|
+
h(
|
|
149
|
+
ink.Text,
|
|
150
|
+
{ key: `row-${i}`, dimColor: true },
|
|
151
|
+
cols
|
|
152
|
+
.map((c) => {
|
|
153
|
+
const v = (row as Record<string, unknown>)[c];
|
|
154
|
+
return `${c}=${v ?? ""}`;
|
|
155
|
+
})
|
|
156
|
+
.join(" | "),
|
|
157
|
+
),
|
|
158
|
+
),
|
|
159
|
+
);
|
|
160
|
+
};
|
|
161
|
+
|
|
162
|
+
const ConfigTab = (props: { config: GoldenMatchConfig | null }) => {
|
|
163
|
+
const { config } = props;
|
|
164
|
+
const mks = config?.matchkeys ?? config?.matchSettings ?? [];
|
|
165
|
+
const blockingDesc = config?.blocking?.strategy ?? "-";
|
|
166
|
+
const blockingKeys =
|
|
167
|
+
config?.blocking?.keys?.map((k) => k.fields.join("+")).join(", ") ?? "-";
|
|
168
|
+
|
|
169
|
+
// Local UI state for interactive config editing. Hooks must be at the
|
|
170
|
+
// top level of the component — we always declare them and only use the
|
|
171
|
+
// interactive branch when SelectInput/TextInput are available.
|
|
172
|
+
const [selectedMk, setSelectedMk] = React.useState(null) as [
|
|
173
|
+
number | null,
|
|
174
|
+
(v: number | null) => void,
|
|
175
|
+
];
|
|
176
|
+
const [thresholdDraft, setThresholdDraft] = React.useState("") as [
|
|
177
|
+
string,
|
|
178
|
+
(v: string) => void,
|
|
179
|
+
];
|
|
180
|
+
|
|
181
|
+
const header = h(
|
|
182
|
+
ink.Box,
|
|
183
|
+
{ flexDirection: "column" },
|
|
184
|
+
h(ink.Text, { bold: true }, "Config"),
|
|
185
|
+
h(ink.Text, {}, `Matchkeys: ${mks.length}`),
|
|
186
|
+
h(ink.Text, {}, `Blocking: ${blockingDesc}, keys: ${blockingKeys}`),
|
|
187
|
+
);
|
|
188
|
+
|
|
189
|
+
if (mks.length === 0) {
|
|
190
|
+
return header;
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
const mkThreshold = (mk: { readonly type: string }): string => {
|
|
194
|
+
if (mk.type === "exact") return "-";
|
|
195
|
+
const t = (mk as { threshold?: number }).threshold;
|
|
196
|
+
return t === undefined ? "-" : String(t);
|
|
197
|
+
};
|
|
198
|
+
|
|
199
|
+
if (addons.SelectInput && selectedMk === null) {
|
|
200
|
+
const items = mks.map((mk, i) => ({
|
|
201
|
+
label: `${mk.name} (${mk.type}) threshold=${mkThreshold(mk)}`,
|
|
202
|
+
value: String(i),
|
|
203
|
+
}));
|
|
204
|
+
return h(
|
|
205
|
+
ink.Box,
|
|
206
|
+
{ flexDirection: "column" },
|
|
207
|
+
header,
|
|
208
|
+
h(ink.Text, { dimColor: true }, "Select a matchkey to inspect:"),
|
|
209
|
+
h(addons.SelectInput, {
|
|
210
|
+
items,
|
|
211
|
+
onSelect: (item: { value: string }) => {
|
|
212
|
+
const idx = Number(item.value);
|
|
213
|
+
setSelectedMk(idx);
|
|
214
|
+
const picked = mks[idx];
|
|
215
|
+
const thr =
|
|
216
|
+
picked && picked.type !== "exact"
|
|
217
|
+
? ((picked as { threshold?: number }).threshold ?? "")
|
|
218
|
+
: "";
|
|
219
|
+
setThresholdDraft(String(thr));
|
|
220
|
+
},
|
|
221
|
+
}),
|
|
222
|
+
);
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
if (addons.SelectInput && selectedMk !== null) {
|
|
226
|
+
const mk = mks[selectedMk];
|
|
227
|
+
if (!mk) {
|
|
228
|
+
setSelectedMk(null);
|
|
229
|
+
return header;
|
|
230
|
+
}
|
|
231
|
+
const fields = mk.fields.map((f) => f.field).join(", ");
|
|
232
|
+
return h(
|
|
233
|
+
ink.Box,
|
|
234
|
+
{ flexDirection: "column" },
|
|
235
|
+
header,
|
|
236
|
+
h(ink.Text, { bold: true }, `Editing matchkey: ${mk.name}`),
|
|
237
|
+
h(ink.Text, {}, ` type: ${mk.type}`),
|
|
238
|
+
h(ink.Text, {}, ` fields: ${fields}`),
|
|
239
|
+
addons.TextInput
|
|
240
|
+
? h(
|
|
241
|
+
ink.Box,
|
|
242
|
+
{},
|
|
243
|
+
h(ink.Text, {}, " threshold: "),
|
|
244
|
+
h(addons.TextInput, {
|
|
245
|
+
value: thresholdDraft,
|
|
246
|
+
onChange: setThresholdDraft,
|
|
247
|
+
onSubmit: (value: string) => {
|
|
248
|
+
const n = Number(value);
|
|
249
|
+
if (!Number.isNaN(n)) {
|
|
250
|
+
(mk as { threshold?: number }).threshold = n;
|
|
251
|
+
}
|
|
252
|
+
setSelectedMk(null);
|
|
253
|
+
},
|
|
254
|
+
}),
|
|
255
|
+
)
|
|
256
|
+
: h(
|
|
257
|
+
ink.Text,
|
|
258
|
+
{ dimColor: true },
|
|
259
|
+
` threshold: ${mkThreshold(mk)} (install ink-text-input to edit)`,
|
|
260
|
+
),
|
|
261
|
+
h(ink.Text, { dimColor: true }, "[Enter] save [Esc] back"),
|
|
262
|
+
);
|
|
263
|
+
}
|
|
264
|
+
|
|
265
|
+
// Fallback: plain listing
|
|
266
|
+
return h(
|
|
267
|
+
ink.Box,
|
|
268
|
+
{ flexDirection: "column" },
|
|
269
|
+
header,
|
|
270
|
+
...mks.map((mk, i) =>
|
|
271
|
+
h(
|
|
272
|
+
ink.Text,
|
|
273
|
+
{ key: `mk-${i}`, dimColor: true },
|
|
274
|
+
` - ${mk.name} (${mk.type}), threshold=${mkThreshold(mk)}, fields: ${mk.fields
|
|
275
|
+
.map((f) => f.field)
|
|
276
|
+
.join(", ")}`,
|
|
277
|
+
),
|
|
278
|
+
),
|
|
279
|
+
);
|
|
280
|
+
};
|
|
281
|
+
|
|
282
|
+
const MatchesTab = (props: { result: DedupeResult | null }) => {
|
|
283
|
+
const { result } = props;
|
|
284
|
+
const [selectedPair, setSelectedPair] = React.useState(null) as [
|
|
285
|
+
number | null,
|
|
286
|
+
(v: number | null) => void,
|
|
287
|
+
];
|
|
288
|
+
|
|
289
|
+
if (!result) {
|
|
290
|
+
return h(
|
|
291
|
+
ink.Text,
|
|
292
|
+
{ dimColor: true },
|
|
293
|
+
"No results yet. Press 'r' to run dedupe.",
|
|
294
|
+
);
|
|
295
|
+
}
|
|
296
|
+
const pairs = result.scoredPairs.slice(0, MAX_TABLE_ROWS);
|
|
297
|
+
if (pairs.length === 0) {
|
|
298
|
+
return h(ink.Text, {}, "No scored pairs");
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
// Drill-in view
|
|
302
|
+
if (addons.SelectInput && selectedPair !== null) {
|
|
303
|
+
const p = pairs[selectedPair];
|
|
304
|
+
if (!p) {
|
|
305
|
+
setSelectedPair(null);
|
|
306
|
+
return h(ink.Text, {}, "");
|
|
307
|
+
}
|
|
308
|
+
return h(
|
|
309
|
+
ink.Box,
|
|
310
|
+
{ flexDirection: "column" },
|
|
311
|
+
h(ink.Text, { bold: true }, `Pair detail ${selectedPair + 1}/${pairs.length}`),
|
|
312
|
+
h(ink.Text, {}, ` idA: ${p.idA}`),
|
|
313
|
+
h(ink.Text, {}, ` idB: ${p.idB}`),
|
|
314
|
+
h(ink.Text, {}, ` score: ${p.score.toFixed(4)}`),
|
|
315
|
+
h(ink.Text, { dimColor: true }, "(select another pair from list)"),
|
|
316
|
+
h(addons.SelectInput, {
|
|
317
|
+
items: pairs.map((pp, i) => ({
|
|
318
|
+
label: `${pp.idA} <-> ${pp.idB} (${pp.score.toFixed(3)})`,
|
|
319
|
+
value: String(i),
|
|
320
|
+
})),
|
|
321
|
+
onSelect: (item: { value: string }) =>
|
|
322
|
+
setSelectedPair(Number(item.value)),
|
|
323
|
+
}),
|
|
324
|
+
);
|
|
325
|
+
}
|
|
326
|
+
|
|
327
|
+
if (addons.Table) {
|
|
328
|
+
const data = pairs.map((p) => ({
|
|
329
|
+
idA: String(p.idA),
|
|
330
|
+
idB: String(p.idB),
|
|
331
|
+
score: p.score.toFixed(4),
|
|
332
|
+
}));
|
|
333
|
+
return h(
|
|
334
|
+
ink.Box,
|
|
335
|
+
{ flexDirection: "column" },
|
|
336
|
+
h(
|
|
337
|
+
ink.Text,
|
|
338
|
+
{ bold: true },
|
|
339
|
+
`Scored pairs: ${result.scoredPairs.length} (showing first ${pairs.length})`,
|
|
340
|
+
),
|
|
341
|
+
h(addons.Table, { data }),
|
|
342
|
+
addons.SelectInput
|
|
343
|
+
? h(addons.SelectInput, {
|
|
344
|
+
items: pairs.map((p, i) => ({
|
|
345
|
+
label: `Inspect ${p.idA} <-> ${p.idB}`,
|
|
346
|
+
value: String(i),
|
|
347
|
+
})),
|
|
348
|
+
onSelect: (item: { value: string }) =>
|
|
349
|
+
setSelectedPair(Number(item.value)),
|
|
350
|
+
})
|
|
351
|
+
: h(
|
|
352
|
+
ink.Text,
|
|
353
|
+
{ dimColor: true },
|
|
354
|
+
"(install ink-select-input for drill-in)",
|
|
355
|
+
),
|
|
356
|
+
);
|
|
357
|
+
}
|
|
358
|
+
|
|
359
|
+
return h(
|
|
360
|
+
ink.Box,
|
|
361
|
+
{ flexDirection: "column" },
|
|
362
|
+
h(
|
|
363
|
+
ink.Text,
|
|
364
|
+
{ bold: true },
|
|
365
|
+
`Scored pairs: ${result.scoredPairs.length}`,
|
|
366
|
+
),
|
|
367
|
+
...pairs.map((p, i) =>
|
|
368
|
+
h(
|
|
369
|
+
ink.Text,
|
|
370
|
+
{ key: `pair-${i}` },
|
|
371
|
+
` ${p.idA} <-> ${p.idB}: ${p.score.toFixed(3)}`,
|
|
372
|
+
),
|
|
373
|
+
),
|
|
374
|
+
);
|
|
375
|
+
};
|
|
376
|
+
|
|
377
|
+
const GoldenTab = (props: { result: DedupeResult | null }) => {
|
|
378
|
+
const { result } = props;
|
|
379
|
+
if (!result) return h(ink.Text, { dimColor: true }, "No results yet.");
|
|
380
|
+
const records = result.goldenRecords.slice(0, MAX_TABLE_ROWS);
|
|
381
|
+
|
|
382
|
+
if (records.length === 0) {
|
|
383
|
+
return h(
|
|
384
|
+
ink.Text,
|
|
385
|
+
{ bold: true },
|
|
386
|
+
`Golden records: ${result.goldenRecords.length}`,
|
|
387
|
+
);
|
|
388
|
+
}
|
|
389
|
+
|
|
390
|
+
if (addons.Table) {
|
|
391
|
+
const cols = visibleCols(records[0]!).slice(0, MAX_TABLE_COLS);
|
|
392
|
+
const data = records.map((r) => {
|
|
393
|
+
const d: Record<string, string> = {};
|
|
394
|
+
for (const c of cols) {
|
|
395
|
+
const v = (r as Record<string, unknown>)[c];
|
|
396
|
+
d[c] = v === undefined || v === null ? "" : String(v);
|
|
397
|
+
}
|
|
398
|
+
return d;
|
|
399
|
+
});
|
|
400
|
+
return h(
|
|
401
|
+
ink.Box,
|
|
402
|
+
{ flexDirection: "column" },
|
|
403
|
+
h(
|
|
404
|
+
ink.Text,
|
|
405
|
+
{ bold: true },
|
|
406
|
+
`Golden records: ${result.goldenRecords.length}`,
|
|
407
|
+
),
|
|
408
|
+
h(addons.Table, { data }),
|
|
409
|
+
);
|
|
410
|
+
}
|
|
411
|
+
|
|
412
|
+
return h(
|
|
413
|
+
ink.Box,
|
|
414
|
+
{ flexDirection: "column" },
|
|
415
|
+
h(
|
|
416
|
+
ink.Text,
|
|
417
|
+
{ bold: true },
|
|
418
|
+
`Golden records: ${result.goldenRecords.length}`,
|
|
419
|
+
),
|
|
420
|
+
...records.map((r, i) =>
|
|
421
|
+
h(
|
|
422
|
+
ink.Text,
|
|
423
|
+
{ key: `g-${i}`, dimColor: true },
|
|
424
|
+
JSON.stringify(r).slice(0, 100),
|
|
425
|
+
),
|
|
426
|
+
),
|
|
427
|
+
);
|
|
428
|
+
};
|
|
429
|
+
|
|
430
|
+
const BoostTab = (props: { result: DedupeResult | null }) => {
|
|
431
|
+
const { result } = props;
|
|
432
|
+
const [idx, setIdx] = React.useState(0) as [
|
|
433
|
+
number,
|
|
434
|
+
(v: number | ((prev: number) => number)) => void,
|
|
435
|
+
];
|
|
436
|
+
const [labels, setLabels] = React.useState({}) as [
|
|
437
|
+
Record<number, string>,
|
|
438
|
+
(v: Record<number, string>) => void,
|
|
439
|
+
];
|
|
440
|
+
|
|
441
|
+
if (!result) {
|
|
442
|
+
return h(
|
|
443
|
+
ink.Text,
|
|
444
|
+
{ dimColor: true },
|
|
445
|
+
"No results yet. Press 'r' to run dedupe.",
|
|
446
|
+
);
|
|
447
|
+
}
|
|
448
|
+
|
|
449
|
+
const borderline = result.scoredPairs
|
|
450
|
+
.filter((p) => p.score >= 0.7 && p.score < 0.9)
|
|
451
|
+
.slice(0, 20);
|
|
452
|
+
|
|
453
|
+
if (borderline.length === 0) {
|
|
454
|
+
return h(
|
|
455
|
+
ink.Box,
|
|
456
|
+
{ flexDirection: "column" },
|
|
457
|
+
h(ink.Text, { bold: true }, "Boost - active learning"),
|
|
458
|
+
h(ink.Text, {}, "No borderline pairs (0.7-0.9 score) to label."),
|
|
459
|
+
);
|
|
460
|
+
}
|
|
461
|
+
|
|
462
|
+
if (idx >= borderline.length) {
|
|
463
|
+
const counts = Object.values(labels).reduce(
|
|
464
|
+
(acc, v) => {
|
|
465
|
+
acc[v] = (acc[v] ?? 0) + 1;
|
|
466
|
+
return acc;
|
|
467
|
+
},
|
|
468
|
+
{} as Record<string, number>,
|
|
469
|
+
);
|
|
470
|
+
return h(
|
|
471
|
+
ink.Box,
|
|
472
|
+
{ flexDirection: "column" },
|
|
473
|
+
h(ink.Text, { color: "green", bold: true }, "All pairs labeled!"),
|
|
474
|
+
h(
|
|
475
|
+
ink.Text,
|
|
476
|
+
{},
|
|
477
|
+
`y=${counts["y"] ?? 0} n=${counts["n"] ?? 0} s=${counts["s"] ?? 0}`,
|
|
478
|
+
),
|
|
479
|
+
);
|
|
480
|
+
}
|
|
481
|
+
|
|
482
|
+
const pair = borderline[idx]!;
|
|
483
|
+
|
|
484
|
+
if (addons.SelectInput) {
|
|
485
|
+
return h(
|
|
486
|
+
ink.Box,
|
|
487
|
+
{ flexDirection: "column" },
|
|
488
|
+
h(
|
|
489
|
+
ink.Text,
|
|
490
|
+
{ bold: true },
|
|
491
|
+
`Pair ${idx + 1}/${borderline.length} - Score: ${pair.score.toFixed(3)}`,
|
|
492
|
+
),
|
|
493
|
+
h(ink.Text, {}, ` Record ${pair.idA}`),
|
|
494
|
+
h(ink.Text, {}, ` Record ${pair.idB}`),
|
|
495
|
+
h(addons.SelectInput, {
|
|
496
|
+
items: [
|
|
497
|
+
{ label: "Yes, this is a match", value: "y" },
|
|
498
|
+
{ label: "No, different entities", value: "n" },
|
|
499
|
+
{ label: "Skip", value: "s" },
|
|
500
|
+
],
|
|
501
|
+
onSelect: (item: { value: string }) => {
|
|
502
|
+
setLabels({ ...labels, [idx]: item.value });
|
|
503
|
+
setIdx((prev) => prev + 1);
|
|
504
|
+
},
|
|
505
|
+
}),
|
|
506
|
+
);
|
|
507
|
+
}
|
|
508
|
+
|
|
509
|
+
return h(
|
|
510
|
+
ink.Box,
|
|
511
|
+
{ flexDirection: "column" },
|
|
512
|
+
h(ink.Text, { bold: true }, "Boost - active learning"),
|
|
513
|
+
h(ink.Text, { dimColor: true }, "Label borderline pairs: y/n/s (skip)"),
|
|
514
|
+
h(
|
|
515
|
+
ink.Text,
|
|
516
|
+
{},
|
|
517
|
+
`Pair ${idx + 1}/${borderline.length}: ${pair.idA} <-> ${pair.idB} (${pair.score.toFixed(3)})`,
|
|
518
|
+
),
|
|
519
|
+
h(
|
|
520
|
+
ink.Text,
|
|
521
|
+
{ dimColor: true },
|
|
522
|
+
"Install ink-select-input for interactive labeling",
|
|
523
|
+
),
|
|
524
|
+
);
|
|
525
|
+
};
|
|
526
|
+
|
|
527
|
+
const ExportTab = (props: {
|
|
528
|
+
result: DedupeResult | null;
|
|
529
|
+
setStatus: (s: string) => void;
|
|
530
|
+
}) => {
|
|
531
|
+
const { result, setStatus } = props;
|
|
532
|
+
const [exporting, setExporting] = React.useState(false) as [
|
|
533
|
+
boolean,
|
|
534
|
+
(v: boolean) => void,
|
|
535
|
+
];
|
|
536
|
+
const [done, setDone] = React.useState(null) as [
|
|
537
|
+
string | null,
|
|
538
|
+
(v: string | null) => void,
|
|
539
|
+
];
|
|
540
|
+
|
|
541
|
+
if (!result) {
|
|
542
|
+
return h(ink.Text, { dimColor: true }, "No results yet.");
|
|
543
|
+
}
|
|
544
|
+
|
|
545
|
+
const doExport = (format: string) => {
|
|
546
|
+
setExporting(true);
|
|
547
|
+
setDone(null);
|
|
548
|
+
setStatus(`Exporting as ${format}...`);
|
|
549
|
+
// Simulate async write. Real impl would dispatch to a writer.
|
|
550
|
+
setTimeout(() => {
|
|
551
|
+
setExporting(false);
|
|
552
|
+
setDone(format);
|
|
553
|
+
setStatus(`Export complete (${format})`);
|
|
554
|
+
}, 400);
|
|
555
|
+
};
|
|
556
|
+
|
|
557
|
+
if (exporting) {
|
|
558
|
+
return h(
|
|
559
|
+
ink.Box,
|
|
560
|
+
{ flexDirection: "column" },
|
|
561
|
+
h(ink.Text, { bold: true }, "Export"),
|
|
562
|
+
addons.Spinner
|
|
563
|
+
? h(
|
|
564
|
+
ink.Box,
|
|
565
|
+
{},
|
|
566
|
+
h(addons.Spinner, { type: "dots" }),
|
|
567
|
+
h(ink.Text, {}, " writing..."),
|
|
568
|
+
)
|
|
569
|
+
: h(ink.Text, {}, "writing..."),
|
|
570
|
+
);
|
|
571
|
+
}
|
|
572
|
+
|
|
573
|
+
const summary = h(
|
|
574
|
+
ink.Text,
|
|
575
|
+
{},
|
|
576
|
+
`Ready: ${result.goldenRecords.length} golden, ${result.dupes.length} dupes, ${result.unique.length} unique`,
|
|
577
|
+
);
|
|
578
|
+
|
|
579
|
+
if (addons.SelectInput) {
|
|
580
|
+
return h(
|
|
581
|
+
ink.Box,
|
|
582
|
+
{ flexDirection: "column" },
|
|
583
|
+
h(ink.Text, { bold: true }, "Export"),
|
|
584
|
+
summary,
|
|
585
|
+
done
|
|
586
|
+
? h(
|
|
587
|
+
ink.Text,
|
|
588
|
+
{ color: "green" },
|
|
589
|
+
`Last export: ${done}. Choose another format to export again.`,
|
|
590
|
+
)
|
|
591
|
+
: h(ink.Text, { dimColor: true }, "Choose output format:"),
|
|
592
|
+
h(addons.SelectInput, {
|
|
593
|
+
items: [
|
|
594
|
+
{ label: "CSV", value: "csv" },
|
|
595
|
+
{ label: "JSON", value: "json" },
|
|
596
|
+
],
|
|
597
|
+
onSelect: (item: { value: string }) => doExport(item.value),
|
|
598
|
+
}),
|
|
599
|
+
);
|
|
600
|
+
}
|
|
601
|
+
|
|
602
|
+
return h(
|
|
603
|
+
ink.Box,
|
|
604
|
+
{ flexDirection: "column" },
|
|
605
|
+
h(ink.Text, { bold: true }, "Export"),
|
|
606
|
+
h(
|
|
607
|
+
ink.Text,
|
|
608
|
+
{ dimColor: true },
|
|
609
|
+
"Press [g] for golden, [d] for dupes, [u] for unique",
|
|
610
|
+
),
|
|
611
|
+
summary,
|
|
612
|
+
);
|
|
613
|
+
};
|
|
614
|
+
|
|
615
|
+
// -------------------------------------------------------------------------
|
|
616
|
+
// Top-level App
|
|
617
|
+
// -------------------------------------------------------------------------
|
|
618
|
+
|
|
619
|
+
const App = (props: { options: TuiOptions }) => {
|
|
620
|
+
const [tab, setTab] = React.useState(0) as [
|
|
621
|
+
number,
|
|
622
|
+
(v: number | ((prev: number) => number)) => void,
|
|
623
|
+
];
|
|
624
|
+
const [rows, setRows] = React.useState([]) as [
|
|
625
|
+
readonly Row[],
|
|
626
|
+
(v: readonly Row[]) => void,
|
|
627
|
+
];
|
|
628
|
+
const [result, setResult] = React.useState(null) as [
|
|
629
|
+
DedupeResult | null,
|
|
630
|
+
(v: DedupeResult | null) => void,
|
|
631
|
+
];
|
|
632
|
+
const [config] = React.useState(props.options.config ?? null) as [
|
|
633
|
+
GoldenMatchConfig | null,
|
|
634
|
+
(v: GoldenMatchConfig | null) => void,
|
|
635
|
+
];
|
|
636
|
+
const [status, setStatus] = React.useState("Ready") as [
|
|
637
|
+
string,
|
|
638
|
+
(v: string) => void,
|
|
639
|
+
];
|
|
640
|
+
|
|
641
|
+
const { exit } = ink.useApp();
|
|
642
|
+
|
|
643
|
+
const runDedupe = React.useCallback(async () => {
|
|
644
|
+
if (rows.length === 0) {
|
|
645
|
+
setStatus("No rows loaded");
|
|
646
|
+
return;
|
|
647
|
+
}
|
|
648
|
+
setStatus("Running dedupe...");
|
|
649
|
+
try {
|
|
650
|
+
const { dedupe } = await import("../../core/api.js");
|
|
651
|
+
const r = dedupe(rows, config ? { config } : {});
|
|
652
|
+
setResult(r);
|
|
653
|
+
setStatus(`Complete: ${r.stats.totalClusters} clusters`);
|
|
654
|
+
} catch (err) {
|
|
655
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
656
|
+
setStatus(`Error: ${msg}`);
|
|
657
|
+
}
|
|
658
|
+
}, [rows, config]);
|
|
659
|
+
|
|
660
|
+
ink.useInput((input: string, key: any) => {
|
|
661
|
+
if (key.escape || input === "q") {
|
|
662
|
+
exit();
|
|
663
|
+
return;
|
|
664
|
+
}
|
|
665
|
+
if (input === "1") setTab(0);
|
|
666
|
+
else if (input === "2") setTab(1);
|
|
667
|
+
else if (input === "3") setTab(2);
|
|
668
|
+
else if (input === "4") setTab(3);
|
|
669
|
+
else if (input === "5") setTab(4);
|
|
670
|
+
else if (input === "6") setTab(5);
|
|
671
|
+
else if (key.tab) setTab((t: number) => (t + 1) % 6);
|
|
672
|
+
else if (input === "r") {
|
|
673
|
+
void runDedupe();
|
|
674
|
+
}
|
|
675
|
+
});
|
|
676
|
+
|
|
677
|
+
React.useEffect(() => {
|
|
678
|
+
const files = props.options.files;
|
|
679
|
+
if (files && files.length > 0) {
|
|
680
|
+
loadFiles(files)
|
|
681
|
+
.then((rs) => {
|
|
682
|
+
setRows(rs);
|
|
683
|
+
setStatus(`Loaded ${rs.length} rows from ${files.length} file(s)`);
|
|
684
|
+
})
|
|
685
|
+
.catch((err: unknown) => {
|
|
686
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
687
|
+
setStatus(`Error: ${msg}`);
|
|
688
|
+
});
|
|
689
|
+
}
|
|
690
|
+
// eslint-disable-next-line react-hooks/exhaustive-deps
|
|
691
|
+
}, []);
|
|
692
|
+
|
|
693
|
+
const tabs = ["Data", "Config", "Matches", "Golden", "Boost", "Export"];
|
|
694
|
+
|
|
695
|
+
let body: any = null;
|
|
696
|
+
if (tab === 0) body = h(DataTab, { rows });
|
|
697
|
+
else if (tab === 1) body = h(ConfigTab, { config });
|
|
698
|
+
else if (tab === 2) body = h(MatchesTab, { result });
|
|
699
|
+
else if (tab === 3) body = h(GoldenTab, { result });
|
|
700
|
+
else if (tab === 4) body = h(BoostTab, { result });
|
|
701
|
+
else if (tab === 5) body = h(ExportTab, { result, setStatus });
|
|
702
|
+
|
|
703
|
+
const titleText = "GoldenMatch TUI - v0.1.0";
|
|
704
|
+
const title = addons.Gradient
|
|
705
|
+
? h(
|
|
706
|
+
addons.Gradient,
|
|
707
|
+
{ name: "rainbow" },
|
|
708
|
+
h(ink.Text, { bold: true }, titleText),
|
|
709
|
+
)
|
|
710
|
+
: h(ink.Text, { bold: true, color: "cyan" }, titleText);
|
|
711
|
+
|
|
712
|
+
return h(
|
|
713
|
+
ink.Box,
|
|
714
|
+
{ flexDirection: "column", padding: 1 },
|
|
715
|
+
// Header
|
|
716
|
+
h(ink.Box, { borderStyle: "double", paddingX: 1 }, title),
|
|
717
|
+
// Tab bar
|
|
718
|
+
h(
|
|
719
|
+
ink.Box,
|
|
720
|
+
{ marginTop: 1 },
|
|
721
|
+
...tabs.map((name: string, i: number) =>
|
|
722
|
+
h(
|
|
723
|
+
ink.Box,
|
|
724
|
+
{ key: `tab-${i}`, marginRight: 2 },
|
|
725
|
+
h(
|
|
726
|
+
ink.Text,
|
|
727
|
+
{ color: tab === i ? "green" : "gray", bold: tab === i },
|
|
728
|
+
`[${i + 1}] ${name}`,
|
|
729
|
+
),
|
|
730
|
+
),
|
|
731
|
+
),
|
|
732
|
+
),
|
|
733
|
+
// Tab content
|
|
734
|
+
h(
|
|
735
|
+
ink.Box,
|
|
736
|
+
{ marginTop: 1, flexDirection: "column", minHeight: 10 },
|
|
737
|
+
body,
|
|
738
|
+
),
|
|
739
|
+
// Footer
|
|
740
|
+
h(
|
|
741
|
+
ink.Box,
|
|
742
|
+
{ marginTop: 1, borderStyle: "single", paddingX: 1 },
|
|
743
|
+
h(
|
|
744
|
+
ink.Text,
|
|
745
|
+
{ dimColor: true },
|
|
746
|
+
`[q]uit [1-6] tabs [Tab] cycle [r]un dedupe * ${status}`,
|
|
747
|
+
),
|
|
748
|
+
),
|
|
749
|
+
);
|
|
750
|
+
};
|
|
751
|
+
|
|
752
|
+
const { waitUntilExit } = ink.render(h(App, { options }));
|
|
753
|
+
await waitUntilExit();
|
|
754
|
+
}
|
|
755
|
+
|
|
756
|
+
/* eslint-enable @typescript-eslint/no-explicit-any */
|