goldenmatch 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +140 -0
- package/dist/cli.cjs +6079 -0
- package/dist/cli.cjs.map +1 -0
- package/dist/cli.d.cts +1 -0
- package/dist/cli.d.ts +1 -0
- package/dist/cli.js +6076 -0
- package/dist/cli.js.map +1 -0
- package/dist/core/index.cjs +8449 -0
- package/dist/core/index.cjs.map +1 -0
- package/dist/core/index.d.cts +1972 -0
- package/dist/core/index.d.ts +1972 -0
- package/dist/core/index.js +8318 -0
- package/dist/core/index.js.map +1 -0
- package/dist/index.cjs +8449 -0
- package/dist/index.cjs.map +1 -0
- package/dist/index.d.cts +2 -0
- package/dist/index.d.ts +2 -0
- package/dist/index.js +8318 -0
- package/dist/index.js.map +1 -0
- package/dist/node/backends/score-worker.cjs +934 -0
- package/dist/node/backends/score-worker.cjs.map +1 -0
- package/dist/node/backends/score-worker.d.cts +14 -0
- package/dist/node/backends/score-worker.d.ts +14 -0
- package/dist/node/backends/score-worker.js +932 -0
- package/dist/node/backends/score-worker.js.map +1 -0
- package/dist/node/index.cjs +11430 -0
- package/dist/node/index.cjs.map +1 -0
- package/dist/node/index.d.cts +554 -0
- package/dist/node/index.d.ts +554 -0
- package/dist/node/index.js +11277 -0
- package/dist/node/index.js.map +1 -0
- package/dist/types-DhUdX5Rc.d.cts +304 -0
- package/dist/types-DhUdX5Rc.d.ts +304 -0
- package/examples/01-basic-dedupe.ts +60 -0
- package/examples/02-match-two-datasets.ts +48 -0
- package/examples/03-csv-file-pipeline.ts +62 -0
- package/examples/04-string-scoring.ts +63 -0
- package/examples/05-custom-config.ts +94 -0
- package/examples/06-probabilistic-fs.ts +72 -0
- package/examples/07-pprl-privacy.ts +76 -0
- package/examples/08-streaming.ts +79 -0
- package/examples/09-llm-scorer.ts +79 -0
- package/examples/10-explain.ts +60 -0
- package/examples/11-evaluate.ts +61 -0
- package/examples/README.md +53 -0
- package/package.json +66 -0
- package/src/cli.ts +372 -0
- package/src/core/ann-blocker.ts +593 -0
- package/src/core/api.ts +220 -0
- package/src/core/autoconfig.ts +363 -0
- package/src/core/autofix.ts +102 -0
- package/src/core/blocker.ts +655 -0
- package/src/core/cluster.ts +699 -0
- package/src/core/compare-clusters.ts +176 -0
- package/src/core/config/loader.ts +869 -0
- package/src/core/cross-encoder.ts +614 -0
- package/src/core/data.ts +430 -0
- package/src/core/domain.ts +277 -0
- package/src/core/embedder.ts +562 -0
- package/src/core/evaluate.ts +156 -0
- package/src/core/explain.ts +352 -0
- package/src/core/golden.ts +524 -0
- package/src/core/graph-er.ts +371 -0
- package/src/core/index.ts +314 -0
- package/src/core/ingest.ts +112 -0
- package/src/core/learned-blocking.ts +305 -0
- package/src/core/lineage.ts +221 -0
- package/src/core/llm/budget.ts +258 -0
- package/src/core/llm/cluster.ts +542 -0
- package/src/core/llm/scorer.ts +396 -0
- package/src/core/match-one.ts +95 -0
- package/src/core/matchkey.ts +97 -0
- package/src/core/memory/corrections.ts +179 -0
- package/src/core/memory/learner.ts +218 -0
- package/src/core/memory/store.ts +114 -0
- package/src/core/pipeline.ts +366 -0
- package/src/core/pprl/protocol.ts +216 -0
- package/src/core/probabilistic.ts +511 -0
- package/src/core/profiler.ts +212 -0
- package/src/core/quality.ts +197 -0
- package/src/core/review-queue.ts +177 -0
- package/src/core/scorer.ts +855 -0
- package/src/core/sensitivity.ts +196 -0
- package/src/core/standardize.ts +279 -0
- package/src/core/streaming.ts +128 -0
- package/src/core/transforms.ts +599 -0
- package/src/core/types.ts +570 -0
- package/src/core/validate.ts +243 -0
- package/src/index.ts +8 -0
- package/src/node/a2a/server.ts +470 -0
- package/src/node/api/server.ts +412 -0
- package/src/node/backends/duckdb.ts +130 -0
- package/src/node/backends/score-worker.ts +41 -0
- package/src/node/backends/workers.ts +212 -0
- package/src/node/config-file.ts +66 -0
- package/src/node/connectors/base.ts +57 -0
- package/src/node/connectors/bigquery.ts +61 -0
- package/src/node/connectors/databricks.ts +69 -0
- package/src/node/connectors/file.ts +350 -0
- package/src/node/connectors/hubspot.ts +62 -0
- package/src/node/connectors/index.ts +43 -0
- package/src/node/connectors/salesforce.ts +93 -0
- package/src/node/connectors/snowflake.ts +73 -0
- package/src/node/db/postgres.ts +173 -0
- package/src/node/db/sync.ts +103 -0
- package/src/node/dedupe-file.ts +156 -0
- package/src/node/index.ts +89 -0
- package/src/node/mcp/server.ts +940 -0
- package/src/node/tui/app.ts +756 -0
- package/src/node/tui/index.ts +6 -0
- package/src/node/tui/widgets.ts +128 -0
- package/tests/parity/scorer-ground-truth.test.ts +118 -0
- package/tests/smoke.test.ts +46 -0
- package/tests/unit/a2a-server.test.ts +175 -0
- package/tests/unit/ann-blocker.test.ts +117 -0
- package/tests/unit/api-server.test.ts +239 -0
- package/tests/unit/api.test.ts +77 -0
- package/tests/unit/autoconfig.test.ts +103 -0
- package/tests/unit/autofix.test.ts +71 -0
- package/tests/unit/blocker.test.ts +164 -0
- package/tests/unit/buildBlocksAsync.test.ts +63 -0
- package/tests/unit/cluster.test.ts +213 -0
- package/tests/unit/compare-clusters.test.ts +42 -0
- package/tests/unit/config-loader.test.ts +301 -0
- package/tests/unit/connectors-base.test.ts +48 -0
- package/tests/unit/cross-encoder-model.test.ts +198 -0
- package/tests/unit/cross-encoder.test.ts +173 -0
- package/tests/unit/db-connectors.test.ts +37 -0
- package/tests/unit/domain.test.ts +80 -0
- package/tests/unit/embedder.test.ts +151 -0
- package/tests/unit/evaluate.test.ts +85 -0
- package/tests/unit/explain.test.ts +73 -0
- package/tests/unit/golden.test.ts +97 -0
- package/tests/unit/graph-er.test.ts +173 -0
- package/tests/unit/hnsw-ann.test.ts +283 -0
- package/tests/unit/hubspot-connector.test.ts +118 -0
- package/tests/unit/ingest.test.ts +97 -0
- package/tests/unit/learned-blocking.test.ts +134 -0
- package/tests/unit/lineage.test.ts +135 -0
- package/tests/unit/match-one.test.ts +129 -0
- package/tests/unit/matchkey.test.ts +97 -0
- package/tests/unit/mcp-server.test.ts +183 -0
- package/tests/unit/memory.test.ts +119 -0
- package/tests/unit/pipeline.test.ts +118 -0
- package/tests/unit/pprl-protocol.test.ts +381 -0
- package/tests/unit/probabilistic.test.ts +494 -0
- package/tests/unit/profiler.test.ts +68 -0
- package/tests/unit/review-queue.test.ts +68 -0
- package/tests/unit/salesforce-connector.test.ts +148 -0
- package/tests/unit/scorer.test.ts +301 -0
- package/tests/unit/sensitivity.test.ts +154 -0
- package/tests/unit/standardize.test.ts +84 -0
- package/tests/unit/streaming.test.ts +82 -0
- package/tests/unit/transforms.test.ts +208 -0
- package/tests/unit/tui-widgets.test.ts +42 -0
- package/tests/unit/tui.test.ts +24 -0
- package/tests/unit/validate.test.ts +145 -0
- package/tests/unit/workers-parallel.test.ts +99 -0
- package/tests/unit/workers.test.ts +74 -0
- package/tsconfig.json +25 -0
- package/tsup.config.ts +37 -0
- package/vitest.config.ts +11 -0
|
@@ -0,0 +1,940 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* mcp/server.ts -- GoldenMatch MCP server (stdio transport, JSON-RPC).
|
|
3
|
+
*
|
|
4
|
+
* Node-only: uses node:fs, node:path, node:readline. NOT edge-safe.
|
|
5
|
+
*
|
|
6
|
+
* Exposes ~20 tools covering dedupe, match, scoring, explanation,
|
|
7
|
+
* profiling, auto-config (shorthand), evaluation, and listings.
|
|
8
|
+
*
|
|
9
|
+
* Every tool dispatch is wrapped in try/catch so a single failure never
|
|
10
|
+
* crashes the JSON-RPC loop; errors come back as `{ error: "<msg>" }`.
|
|
11
|
+
*
|
|
12
|
+
* Ports ideas from goldenmatch/mcp/server.py.
|
|
13
|
+
*/
|
|
14
|
+
|
|
15
|
+
import { readFileSync } from "node:fs";
|
|
16
|
+
import { resolve, isAbsolute, sep } from "node:path";
|
|
17
|
+
import { createInterface } from "node:readline";
|
|
18
|
+
|
|
19
|
+
import { dedupe, match, scoreStrings } from "../../core/api.js";
|
|
20
|
+
import { readFile, writeCsv, writeJson } from "../connectors/file.js";
|
|
21
|
+
import { loadConfigFile } from "../config-file.js";
|
|
22
|
+
import type { Row, MatchkeyField } from "../../core/types.js";
|
|
23
|
+
import {
|
|
24
|
+
makeMatchkeyConfig,
|
|
25
|
+
makeMatchkeyField,
|
|
26
|
+
VALID_SCORERS,
|
|
27
|
+
VALID_TRANSFORMS,
|
|
28
|
+
VALID_STRATEGIES,
|
|
29
|
+
} from "../../core/types.js";
|
|
30
|
+
import {
|
|
31
|
+
scoreField,
|
|
32
|
+
findExactMatches,
|
|
33
|
+
findFuzzyMatches,
|
|
34
|
+
scorePair,
|
|
35
|
+
} from "../../core/scorer.js";
|
|
36
|
+
import { addRowIds } from "../../core/matchkey.js";
|
|
37
|
+
import { buildClusters } from "../../core/cluster.js";
|
|
38
|
+
import { explainPair, explainCluster } from "../../core/explain.js";
|
|
39
|
+
import { profileRows } from "../../core/profiler.js";
|
|
40
|
+
import { evaluatePairs, loadGroundTruthPairs } from "../../core/evaluate.js";
|
|
41
|
+
|
|
42
|
+
// ---------------------------------------------------------------------------
|
|
43
|
+
// Tool definitions
|
|
44
|
+
// ---------------------------------------------------------------------------
|
|
45
|
+
|
|
46
|
+
interface Tool {
|
|
47
|
+
readonly name: string;
|
|
48
|
+
readonly description: string;
|
|
49
|
+
readonly inputSchema: Readonly<Record<string, unknown>>;
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
const pathArg = { type: "string", description: "File path (csv/tsv/json/jsonl)" };
|
|
53
|
+
const optionalConfigArg = {
|
|
54
|
+
type: "string",
|
|
55
|
+
description: "Optional path to YAML config file",
|
|
56
|
+
};
|
|
57
|
+
const optionalFieldsArg = {
|
|
58
|
+
type: "array",
|
|
59
|
+
items: { type: "string" },
|
|
60
|
+
description: "Column names",
|
|
61
|
+
};
|
|
62
|
+
const stringArg = { type: "string" };
|
|
63
|
+
const rowArg = {
|
|
64
|
+
type: "object",
|
|
65
|
+
additionalProperties: true,
|
|
66
|
+
description: "Record object (column -> value)",
|
|
67
|
+
};
|
|
68
|
+
|
|
69
|
+
export const TOOLS: readonly Tool[] = [
|
|
70
|
+
{
|
|
71
|
+
name: "dedupe",
|
|
72
|
+
description:
|
|
73
|
+
"Deduplicate records in a file. Returns cluster counts and optional output path.",
|
|
74
|
+
inputSchema: {
|
|
75
|
+
type: "object",
|
|
76
|
+
properties: {
|
|
77
|
+
path: pathArg,
|
|
78
|
+
config: optionalConfigArg,
|
|
79
|
+
exact: optionalFieldsArg,
|
|
80
|
+
fuzzy: {
|
|
81
|
+
type: "object",
|
|
82
|
+
additionalProperties: { type: "number" },
|
|
83
|
+
description: "Map of field -> fuzzy threshold",
|
|
84
|
+
},
|
|
85
|
+
blocking: optionalFieldsArg,
|
|
86
|
+
threshold: { type: "number", description: "Overall fuzzy threshold" },
|
|
87
|
+
output: { type: "string", description: "Optional output path for golden records" },
|
|
88
|
+
},
|
|
89
|
+
required: ["path"],
|
|
90
|
+
},
|
|
91
|
+
},
|
|
92
|
+
{
|
|
93
|
+
name: "match",
|
|
94
|
+
description:
|
|
95
|
+
"Match a target file against a reference file. Returns matched/unmatched counts.",
|
|
96
|
+
inputSchema: {
|
|
97
|
+
type: "object",
|
|
98
|
+
properties: {
|
|
99
|
+
target: pathArg,
|
|
100
|
+
reference: pathArg,
|
|
101
|
+
config: optionalConfigArg,
|
|
102
|
+
exact: optionalFieldsArg,
|
|
103
|
+
fuzzy: {
|
|
104
|
+
type: "object",
|
|
105
|
+
additionalProperties: { type: "number" },
|
|
106
|
+
},
|
|
107
|
+
blocking: optionalFieldsArg,
|
|
108
|
+
threshold: { type: "number" },
|
|
109
|
+
output: { type: "string" },
|
|
110
|
+
},
|
|
111
|
+
required: ["target", "reference"],
|
|
112
|
+
},
|
|
113
|
+
},
|
|
114
|
+
{
|
|
115
|
+
name: "score_strings",
|
|
116
|
+
description:
|
|
117
|
+
"Score similarity between two strings using the requested scorer.",
|
|
118
|
+
inputSchema: {
|
|
119
|
+
type: "object",
|
|
120
|
+
properties: {
|
|
121
|
+
a: stringArg,
|
|
122
|
+
b: stringArg,
|
|
123
|
+
scorer: {
|
|
124
|
+
type: "string",
|
|
125
|
+
description:
|
|
126
|
+
"Scorer name (exact, jaro_winkler, levenshtein, token_sort, soundex_match, dice, jaccard, ensemble)",
|
|
127
|
+
},
|
|
128
|
+
},
|
|
129
|
+
required: ["a", "b"],
|
|
130
|
+
},
|
|
131
|
+
},
|
|
132
|
+
{
|
|
133
|
+
name: "score_pair",
|
|
134
|
+
description:
|
|
135
|
+
"Score two record objects across weighted fields. Returns a combined score.",
|
|
136
|
+
inputSchema: {
|
|
137
|
+
type: "object",
|
|
138
|
+
properties: {
|
|
139
|
+
row_a: rowArg,
|
|
140
|
+
row_b: rowArg,
|
|
141
|
+
fields: {
|
|
142
|
+
type: "array",
|
|
143
|
+
items: {
|
|
144
|
+
type: "object",
|
|
145
|
+
properties: {
|
|
146
|
+
field: { type: "string" },
|
|
147
|
+
scorer: { type: "string" },
|
|
148
|
+
weight: { type: "number" },
|
|
149
|
+
transforms: { type: "array", items: { type: "string" } },
|
|
150
|
+
},
|
|
151
|
+
required: ["field"],
|
|
152
|
+
},
|
|
153
|
+
},
|
|
154
|
+
},
|
|
155
|
+
required: ["row_a", "row_b", "fields"],
|
|
156
|
+
},
|
|
157
|
+
},
|
|
158
|
+
{
|
|
159
|
+
name: "explain_pair",
|
|
160
|
+
description:
|
|
161
|
+
"Explain why two records match (or don't) using a matchkey definition.",
|
|
162
|
+
inputSchema: {
|
|
163
|
+
type: "object",
|
|
164
|
+
properties: {
|
|
165
|
+
row_a: rowArg,
|
|
166
|
+
row_b: rowArg,
|
|
167
|
+
fields: {
|
|
168
|
+
type: "array",
|
|
169
|
+
items: {
|
|
170
|
+
type: "object",
|
|
171
|
+
properties: {
|
|
172
|
+
field: { type: "string" },
|
|
173
|
+
scorer: { type: "string" },
|
|
174
|
+
weight: { type: "number" },
|
|
175
|
+
transforms: { type: "array", items: { type: "string" } },
|
|
176
|
+
},
|
|
177
|
+
required: ["field"],
|
|
178
|
+
},
|
|
179
|
+
},
|
|
180
|
+
threshold: { type: "number" },
|
|
181
|
+
},
|
|
182
|
+
required: ["row_a", "row_b", "fields"],
|
|
183
|
+
},
|
|
184
|
+
},
|
|
185
|
+
{
|
|
186
|
+
name: "explain_cluster",
|
|
187
|
+
description:
|
|
188
|
+
"Run dedupe on a file and explain the cluster containing the given row id.",
|
|
189
|
+
inputSchema: {
|
|
190
|
+
type: "object",
|
|
191
|
+
properties: {
|
|
192
|
+
path: pathArg,
|
|
193
|
+
config: optionalConfigArg,
|
|
194
|
+
exact: optionalFieldsArg,
|
|
195
|
+
fuzzy: {
|
|
196
|
+
type: "object",
|
|
197
|
+
additionalProperties: { type: "number" },
|
|
198
|
+
},
|
|
199
|
+
blocking: optionalFieldsArg,
|
|
200
|
+
row_id: { type: "number" },
|
|
201
|
+
},
|
|
202
|
+
required: ["path", "row_id"],
|
|
203
|
+
},
|
|
204
|
+
},
|
|
205
|
+
{
|
|
206
|
+
name: "profile",
|
|
207
|
+
description:
|
|
208
|
+
"Profile a dataset: per-column null rate, cardinality, inferred type, samples.",
|
|
209
|
+
inputSchema: {
|
|
210
|
+
type: "object",
|
|
211
|
+
properties: { path: pathArg },
|
|
212
|
+
required: ["path"],
|
|
213
|
+
},
|
|
214
|
+
},
|
|
215
|
+
{
|
|
216
|
+
name: "suggest_config",
|
|
217
|
+
description:
|
|
218
|
+
"Suggest a shorthand dedupe config based on a profile of the dataset.",
|
|
219
|
+
inputSchema: {
|
|
220
|
+
type: "object",
|
|
221
|
+
properties: { path: pathArg },
|
|
222
|
+
required: ["path"],
|
|
223
|
+
},
|
|
224
|
+
},
|
|
225
|
+
{
|
|
226
|
+
name: "evaluate",
|
|
227
|
+
description:
|
|
228
|
+
"Evaluate predicted pairs from a dedupe run against ground truth pairs.",
|
|
229
|
+
inputSchema: {
|
|
230
|
+
type: "object",
|
|
231
|
+
properties: {
|
|
232
|
+
path: pathArg,
|
|
233
|
+
ground_truth: pathArg,
|
|
234
|
+
id_col_a: { type: "string", description: "Ground truth id column A (default id_a)" },
|
|
235
|
+
id_col_b: { type: "string", description: "Ground truth id column B (default id_b)" },
|
|
236
|
+
config: optionalConfigArg,
|
|
237
|
+
exact: optionalFieldsArg,
|
|
238
|
+
fuzzy: {
|
|
239
|
+
type: "object",
|
|
240
|
+
additionalProperties: { type: "number" },
|
|
241
|
+
},
|
|
242
|
+
blocking: optionalFieldsArg,
|
|
243
|
+
threshold: { type: "number" },
|
|
244
|
+
},
|
|
245
|
+
required: ["path", "ground_truth"],
|
|
246
|
+
},
|
|
247
|
+
},
|
|
248
|
+
{
|
|
249
|
+
name: "find_exact_matches",
|
|
250
|
+
description: "Find exact matches on a field in a file. Returns pairs.",
|
|
251
|
+
inputSchema: {
|
|
252
|
+
type: "object",
|
|
253
|
+
properties: {
|
|
254
|
+
path: pathArg,
|
|
255
|
+
field: { type: "string" },
|
|
256
|
+
transforms: {
|
|
257
|
+
type: "array",
|
|
258
|
+
items: { type: "string" },
|
|
259
|
+
description: "Transforms applied before matching (default lowercase, strip)",
|
|
260
|
+
},
|
|
261
|
+
},
|
|
262
|
+
required: ["path", "field"],
|
|
263
|
+
},
|
|
264
|
+
},
|
|
265
|
+
{
|
|
266
|
+
name: "find_fuzzy_matches",
|
|
267
|
+
description: "Find fuzzy matches in a block of rows. Returns scored pairs.",
|
|
268
|
+
inputSchema: {
|
|
269
|
+
type: "object",
|
|
270
|
+
properties: {
|
|
271
|
+
path: pathArg,
|
|
272
|
+
field: { type: "string" },
|
|
273
|
+
scorer: { type: "string", description: "Scorer (default jaro_winkler)" },
|
|
274
|
+
threshold: { type: "number", description: "Threshold (default 0.85)" },
|
|
275
|
+
transforms: { type: "array", items: { type: "string" } },
|
|
276
|
+
},
|
|
277
|
+
required: ["path", "field"],
|
|
278
|
+
},
|
|
279
|
+
},
|
|
280
|
+
{
|
|
281
|
+
name: "build_clusters",
|
|
282
|
+
description:
|
|
283
|
+
"Group records into clusters given a file and matchkey definition.",
|
|
284
|
+
inputSchema: {
|
|
285
|
+
type: "object",
|
|
286
|
+
properties: {
|
|
287
|
+
path: pathArg,
|
|
288
|
+
exact: optionalFieldsArg,
|
|
289
|
+
fuzzy: {
|
|
290
|
+
type: "object",
|
|
291
|
+
additionalProperties: { type: "number" },
|
|
292
|
+
},
|
|
293
|
+
blocking: optionalFieldsArg,
|
|
294
|
+
threshold: { type: "number" },
|
|
295
|
+
},
|
|
296
|
+
required: ["path"],
|
|
297
|
+
},
|
|
298
|
+
},
|
|
299
|
+
{
|
|
300
|
+
name: "list_scorers",
|
|
301
|
+
description: "List all available similarity scorers.",
|
|
302
|
+
inputSchema: { type: "object", properties: {} },
|
|
303
|
+
},
|
|
304
|
+
{
|
|
305
|
+
name: "list_transforms",
|
|
306
|
+
description: "List all available field transforms.",
|
|
307
|
+
inputSchema: { type: "object", properties: {} },
|
|
308
|
+
},
|
|
309
|
+
{
|
|
310
|
+
name: "list_strategies",
|
|
311
|
+
description: "List all golden-record survivorship strategies.",
|
|
312
|
+
inputSchema: { type: "object", properties: {} },
|
|
313
|
+
},
|
|
314
|
+
{
|
|
315
|
+
name: "list_blocking_strategies",
|
|
316
|
+
description: "List all blocking strategy names.",
|
|
317
|
+
inputSchema: { type: "object", properties: {} },
|
|
318
|
+
},
|
|
319
|
+
{
|
|
320
|
+
name: "server_info",
|
|
321
|
+
description: "Return metadata about this GoldenMatch MCP server.",
|
|
322
|
+
inputSchema: { type: "object", properties: {} },
|
|
323
|
+
},
|
|
324
|
+
{
|
|
325
|
+
name: "read_file",
|
|
326
|
+
description: "Read a CSV/JSON file and return the first N records.",
|
|
327
|
+
inputSchema: {
|
|
328
|
+
type: "object",
|
|
329
|
+
properties: {
|
|
330
|
+
path: pathArg,
|
|
331
|
+
limit: { type: "number", description: "Max rows to return (default 100)" },
|
|
332
|
+
},
|
|
333
|
+
required: ["path"],
|
|
334
|
+
},
|
|
335
|
+
},
|
|
336
|
+
{
|
|
337
|
+
name: "write_csv",
|
|
338
|
+
description: "Write a list of record objects to a CSV file.",
|
|
339
|
+
inputSchema: {
|
|
340
|
+
type: "object",
|
|
341
|
+
properties: {
|
|
342
|
+
path: pathArg,
|
|
343
|
+
rows: { type: "array", items: { type: "object", additionalProperties: true } },
|
|
344
|
+
},
|
|
345
|
+
required: ["path", "rows"],
|
|
346
|
+
},
|
|
347
|
+
},
|
|
348
|
+
];
|
|
349
|
+
|
|
350
|
+
// ---------------------------------------------------------------------------
|
|
351
|
+
// Helpers
|
|
352
|
+
// ---------------------------------------------------------------------------
|
|
353
|
+
|
|
354
|
+
function sanitizePath(raw: string): string {
|
|
355
|
+
if (typeof raw !== "string" || raw.length === 0) {
|
|
356
|
+
throw new Error("path must be a non-empty string");
|
|
357
|
+
}
|
|
358
|
+
const resolved = isAbsolute(raw) ? resolve(raw) : resolve(process.cwd(), raw);
|
|
359
|
+
const cwd = resolve(process.cwd());
|
|
360
|
+
// Guard against prefix-bypass: cwd="/app/foo" must NOT accept "/app/foobar".
|
|
361
|
+
if (resolved !== cwd && !resolved.startsWith(cwd + sep)) {
|
|
362
|
+
throw new Error(`Path '${raw}' is outside the working directory`);
|
|
363
|
+
}
|
|
364
|
+
return resolved;
|
|
365
|
+
}
|
|
366
|
+
|
|
367
|
+
function asStringArray(v: unknown): string[] | undefined {
|
|
368
|
+
if (v === undefined || v === null) return undefined;
|
|
369
|
+
if (!Array.isArray(v)) return undefined;
|
|
370
|
+
return v.map((x) => String(x));
|
|
371
|
+
}
|
|
372
|
+
|
|
373
|
+
function asNumberMap(v: unknown): Record<string, number> | undefined {
|
|
374
|
+
if (v === undefined || v === null) return undefined;
|
|
375
|
+
if (typeof v !== "object" || Array.isArray(v)) return undefined;
|
|
376
|
+
const out: Record<string, number> = {};
|
|
377
|
+
for (const [k, val] of Object.entries(v as Record<string, unknown>)) {
|
|
378
|
+
const n = typeof val === "number" ? val : Number(val);
|
|
379
|
+
if (Number.isFinite(n)) out[k] = n;
|
|
380
|
+
}
|
|
381
|
+
return out;
|
|
382
|
+
}
|
|
383
|
+
|
|
384
|
+
interface ShorthandArgs {
|
|
385
|
+
exact?: readonly string[];
|
|
386
|
+
fuzzy?: Readonly<Record<string, number>>;
|
|
387
|
+
blocking?: readonly string[];
|
|
388
|
+
threshold?: number;
|
|
389
|
+
configPath?: string;
|
|
390
|
+
}
|
|
391
|
+
|
|
392
|
+
function buildDedupeOptions(args: Record<string, unknown>): {
|
|
393
|
+
config?: ReturnType<typeof loadConfigFile>;
|
|
394
|
+
exact?: readonly string[];
|
|
395
|
+
fuzzy?: Readonly<Record<string, number>>;
|
|
396
|
+
blocking?: readonly string[];
|
|
397
|
+
threshold?: number;
|
|
398
|
+
} {
|
|
399
|
+
const opts: {
|
|
400
|
+
config?: ReturnType<typeof loadConfigFile>;
|
|
401
|
+
exact?: readonly string[];
|
|
402
|
+
fuzzy?: Readonly<Record<string, number>>;
|
|
403
|
+
blocking?: readonly string[];
|
|
404
|
+
threshold?: number;
|
|
405
|
+
} = {};
|
|
406
|
+
|
|
407
|
+
if (typeof args["config"] === "string" && args["config"]) {
|
|
408
|
+
opts.config = loadConfigFile(sanitizePath(args["config"] as string));
|
|
409
|
+
}
|
|
410
|
+
const exact = asStringArray(args["exact"]);
|
|
411
|
+
if (exact) opts.exact = exact;
|
|
412
|
+
const fuzzy = asNumberMap(args["fuzzy"]);
|
|
413
|
+
if (fuzzy) opts.fuzzy = fuzzy;
|
|
414
|
+
const blocking = asStringArray(args["blocking"]);
|
|
415
|
+
if (blocking) opts.blocking = blocking;
|
|
416
|
+
if (typeof args["threshold"] === "number") opts.threshold = args["threshold"];
|
|
417
|
+
|
|
418
|
+
return opts;
|
|
419
|
+
}
|
|
420
|
+
|
|
421
|
+
function buildFieldsFromArg(raw: unknown): MatchkeyField[] {
|
|
422
|
+
if (!Array.isArray(raw)) {
|
|
423
|
+
throw new Error("fields must be an array of field configs");
|
|
424
|
+
}
|
|
425
|
+
const out: MatchkeyField[] = [];
|
|
426
|
+
for (const entry of raw) {
|
|
427
|
+
if (entry === null || typeof entry !== "object") continue;
|
|
428
|
+
const e = entry as Record<string, unknown>;
|
|
429
|
+
if (typeof e["field"] !== "string") {
|
|
430
|
+
throw new Error("each field entry needs a string 'field' property");
|
|
431
|
+
}
|
|
432
|
+
const transforms = asStringArray(e["transforms"]) ?? ["lowercase", "strip"];
|
|
433
|
+
const scorer = typeof e["scorer"] === "string" ? (e["scorer"] as string) : "jaro_winkler";
|
|
434
|
+
const weight = typeof e["weight"] === "number" ? (e["weight"] as number) : 1.0;
|
|
435
|
+
out.push(
|
|
436
|
+
makeMatchkeyField({
|
|
437
|
+
field: e["field"] as string,
|
|
438
|
+
transforms,
|
|
439
|
+
scorer,
|
|
440
|
+
weight,
|
|
441
|
+
}),
|
|
442
|
+
);
|
|
443
|
+
}
|
|
444
|
+
return out;
|
|
445
|
+
}
|
|
446
|
+
|
|
447
|
+
// ---------------------------------------------------------------------------
|
|
448
|
+
// Tool dispatch
|
|
449
|
+
// ---------------------------------------------------------------------------
|
|
450
|
+
|
|
451
|
+
export async function handleTool(
|
|
452
|
+
name: string,
|
|
453
|
+
rawArgs: Record<string, unknown>,
|
|
454
|
+
): Promise<unknown> {
|
|
455
|
+
const args = rawArgs ?? {};
|
|
456
|
+
try {
|
|
457
|
+
switch (name) {
|
|
458
|
+
case "dedupe": {
|
|
459
|
+
const path = sanitizePath(String(args["path"]));
|
|
460
|
+
const rows = readFile(path);
|
|
461
|
+
const options = buildDedupeOptions(args);
|
|
462
|
+
const result = dedupe(rows, options);
|
|
463
|
+
let output_written: string | null = null;
|
|
464
|
+
if (typeof args["output"] === "string" && args["output"]) {
|
|
465
|
+
const outPath = sanitizePath(args["output"] as string);
|
|
466
|
+
try {
|
|
467
|
+
writeCsv(outPath, result.goldenRecords);
|
|
468
|
+
output_written = outPath;
|
|
469
|
+
} catch (err) {
|
|
470
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
471
|
+
return {
|
|
472
|
+
stats: result.stats,
|
|
473
|
+
total_clusters: result.stats.totalClusters,
|
|
474
|
+
total_records: result.stats.totalRecords,
|
|
475
|
+
match_rate: result.stats.matchRate,
|
|
476
|
+
output_error: msg,
|
|
477
|
+
};
|
|
478
|
+
}
|
|
479
|
+
}
|
|
480
|
+
return {
|
|
481
|
+
total_records: result.stats.totalRecords,
|
|
482
|
+
total_clusters: result.stats.totalClusters,
|
|
483
|
+
match_rate: result.stats.matchRate,
|
|
484
|
+
matched_records: result.stats.matchedRecords,
|
|
485
|
+
unique_records: result.stats.uniqueRecords,
|
|
486
|
+
golden_records_count: result.goldenRecords.length,
|
|
487
|
+
output_written,
|
|
488
|
+
};
|
|
489
|
+
}
|
|
490
|
+
|
|
491
|
+
case "match": {
|
|
492
|
+
const targetPath = sanitizePath(String(args["target"]));
|
|
493
|
+
const referencePath = sanitizePath(String(args["reference"]));
|
|
494
|
+
const targetRows = readFile(targetPath);
|
|
495
|
+
const referenceRows = readFile(referencePath);
|
|
496
|
+
const options = buildDedupeOptions(args);
|
|
497
|
+
const result = match(
|
|
498
|
+
targetRows.map((r) => ({ ...r, __source__: "target" })),
|
|
499
|
+
referenceRows.map((r) => ({ ...r, __source__: "reference" })),
|
|
500
|
+
options,
|
|
501
|
+
);
|
|
502
|
+
let output_written: string | null = null;
|
|
503
|
+
if (typeof args["output"] === "string" && args["output"]) {
|
|
504
|
+
const outPath = sanitizePath(args["output"] as string);
|
|
505
|
+
try {
|
|
506
|
+
writeCsv(outPath, result.matched);
|
|
507
|
+
output_written = outPath;
|
|
508
|
+
} catch (err) {
|
|
509
|
+
return {
|
|
510
|
+
matched: result.matched.length,
|
|
511
|
+
unmatched: result.unmatched.length,
|
|
512
|
+
output_error: err instanceof Error ? err.message : String(err),
|
|
513
|
+
};
|
|
514
|
+
}
|
|
515
|
+
}
|
|
516
|
+
return {
|
|
517
|
+
matched: result.matched.length,
|
|
518
|
+
unmatched: result.unmatched.length,
|
|
519
|
+
output_written,
|
|
520
|
+
};
|
|
521
|
+
}
|
|
522
|
+
|
|
523
|
+
case "score_strings": {
|
|
524
|
+
const a = String(args["a"] ?? "");
|
|
525
|
+
const b = String(args["b"] ?? "");
|
|
526
|
+
const scorer =
|
|
527
|
+
typeof args["scorer"] === "string" ? (args["scorer"] as string) : "jaro_winkler";
|
|
528
|
+
const score = scoreStrings(a, b, scorer);
|
|
529
|
+
return { scorer, score };
|
|
530
|
+
}
|
|
531
|
+
|
|
532
|
+
case "score_pair": {
|
|
533
|
+
const rowA = args["row_a"] as Row;
|
|
534
|
+
const rowB = args["row_b"] as Row;
|
|
535
|
+
if (!rowA || !rowB) throw new Error("row_a and row_b are required");
|
|
536
|
+
const fields = buildFieldsFromArg(args["fields"]);
|
|
537
|
+
const score = scorePair(rowA, rowB, fields);
|
|
538
|
+
return { score, field_count: fields.length };
|
|
539
|
+
}
|
|
540
|
+
|
|
541
|
+
case "explain_pair": {
|
|
542
|
+
const rowA = args["row_a"] as Row;
|
|
543
|
+
const rowB = args["row_b"] as Row;
|
|
544
|
+
if (!rowA || !rowB) throw new Error("row_a and row_b are required");
|
|
545
|
+
const fields = buildFieldsFromArg(args["fields"]);
|
|
546
|
+
const threshold =
|
|
547
|
+
typeof args["threshold"] === "number" ? (args["threshold"] as number) : 0.85;
|
|
548
|
+
const mk = makeMatchkeyConfig({
|
|
549
|
+
name: "adhoc",
|
|
550
|
+
type: "weighted",
|
|
551
|
+
fields,
|
|
552
|
+
threshold,
|
|
553
|
+
});
|
|
554
|
+
const explanation = explainPair(rowA, rowB, mk);
|
|
555
|
+
return {
|
|
556
|
+
score: explanation.score,
|
|
557
|
+
confidence: explanation.confidence,
|
|
558
|
+
explanation: explanation.explanation,
|
|
559
|
+
field_scores: explanation.fieldScores,
|
|
560
|
+
};
|
|
561
|
+
}
|
|
562
|
+
|
|
563
|
+
case "explain_cluster": {
|
|
564
|
+
const path = sanitizePath(String(args["path"]));
|
|
565
|
+
const rowId = Number(args["row_id"]);
|
|
566
|
+
if (!Number.isFinite(rowId)) {
|
|
567
|
+
throw new Error("row_id must be a number");
|
|
568
|
+
}
|
|
569
|
+
const rows = readFile(path);
|
|
570
|
+
const options = buildDedupeOptions(args);
|
|
571
|
+
const result = dedupe(rows, options);
|
|
572
|
+
// Find cluster containing rowId
|
|
573
|
+
let foundId: number | null = null;
|
|
574
|
+
let found: typeof result.clusters extends ReadonlyMap<number, infer V> ? V : never;
|
|
575
|
+
found = undefined as unknown as typeof found;
|
|
576
|
+
for (const [cid, info] of result.clusters.entries()) {
|
|
577
|
+
if (info.members.includes(rowId)) {
|
|
578
|
+
foundId = cid;
|
|
579
|
+
found = info;
|
|
580
|
+
break;
|
|
581
|
+
}
|
|
582
|
+
}
|
|
583
|
+
if (foundId === null || !found) {
|
|
584
|
+
return { error: `row_id ${rowId} not found in any cluster` };
|
|
585
|
+
}
|
|
586
|
+
// Get matchkey
|
|
587
|
+
const mks = (result.config.matchkeys ?? []) as readonly ReturnType<
|
|
588
|
+
typeof makeMatchkeyConfig
|
|
589
|
+
>[];
|
|
590
|
+
const mk =
|
|
591
|
+
mks.length > 0
|
|
592
|
+
? mks[0]!
|
|
593
|
+
: makeMatchkeyConfig({
|
|
594
|
+
name: "placeholder",
|
|
595
|
+
type: "weighted",
|
|
596
|
+
fields: [
|
|
597
|
+
makeMatchkeyField({
|
|
598
|
+
field: Object.keys(rows[0] ?? {})[0] ?? "",
|
|
599
|
+
transforms: ["lowercase", "strip"],
|
|
600
|
+
scorer: "jaro_winkler",
|
|
601
|
+
}),
|
|
602
|
+
],
|
|
603
|
+
});
|
|
604
|
+
const withIds = addRowIds(rows);
|
|
605
|
+
const explanation = explainCluster(foundId, found, withIds, mk);
|
|
606
|
+
return {
|
|
607
|
+
cluster_id: explanation.clusterId,
|
|
608
|
+
size: explanation.size,
|
|
609
|
+
confidence: explanation.confidence,
|
|
610
|
+
quality: explanation.quality,
|
|
611
|
+
summary: explanation.summary,
|
|
612
|
+
};
|
|
613
|
+
}
|
|
614
|
+
|
|
615
|
+
case "profile": {
|
|
616
|
+
const path = sanitizePath(String(args["path"]));
|
|
617
|
+
const rows = readFile(path);
|
|
618
|
+
const profile = profileRows(rows);
|
|
619
|
+
return {
|
|
620
|
+
row_count: profile.rowCount,
|
|
621
|
+
columns: profile.columns.map((c) => ({
|
|
622
|
+
name: c.name,
|
|
623
|
+
inferred_type: c.inferredType,
|
|
624
|
+
null_count: c.nullCount,
|
|
625
|
+
null_rate: c.nullRate,
|
|
626
|
+
distinct_count: c.distinctCount,
|
|
627
|
+
cardinality_ratio: c.cardinalityRatio,
|
|
628
|
+
avg_length: c.avgLength,
|
|
629
|
+
max_length: c.maxLength,
|
|
630
|
+
sample_values: c.sampleValues,
|
|
631
|
+
})),
|
|
632
|
+
};
|
|
633
|
+
}
|
|
634
|
+
|
|
635
|
+
case "suggest_config": {
|
|
636
|
+
const path = sanitizePath(String(args["path"]));
|
|
637
|
+
const rows = readFile(path);
|
|
638
|
+
const profile = profileRows(rows);
|
|
639
|
+
const exact: string[] = [];
|
|
640
|
+
const fuzzy: Record<string, number> = {};
|
|
641
|
+
const blocking: string[] = [];
|
|
642
|
+
|
|
643
|
+
for (const col of profile.columns) {
|
|
644
|
+
if (col.nullRate > 0.2) continue;
|
|
645
|
+
if (col.inferredType === "email") {
|
|
646
|
+
if (col.cardinalityRatio >= 0.5) exact.push(col.name);
|
|
647
|
+
} else if (col.inferredType === "zip") {
|
|
648
|
+
blocking.push(col.name);
|
|
649
|
+
} else if (col.inferredType === "name") {
|
|
650
|
+
fuzzy[col.name] = 0.85;
|
|
651
|
+
} else if (col.inferredType === "phone") {
|
|
652
|
+
if (col.cardinalityRatio >= 0.5) exact.push(col.name);
|
|
653
|
+
} else if (col.inferredType === "geo") {
|
|
654
|
+
blocking.push(col.name);
|
|
655
|
+
} else if (col.inferredType === "text" && col.avgLength > 4) {
|
|
656
|
+
fuzzy[col.name] = 0.8;
|
|
657
|
+
}
|
|
658
|
+
}
|
|
659
|
+
|
|
660
|
+
return {
|
|
661
|
+
row_count: profile.rowCount,
|
|
662
|
+
suggested: {
|
|
663
|
+
exact,
|
|
664
|
+
fuzzy,
|
|
665
|
+
blocking,
|
|
666
|
+
threshold: 0.85,
|
|
667
|
+
},
|
|
668
|
+
};
|
|
669
|
+
}
|
|
670
|
+
|
|
671
|
+
case "evaluate": {
|
|
672
|
+
const path = sanitizePath(String(args["path"]));
|
|
673
|
+
const gtPath = sanitizePath(String(args["ground_truth"]));
|
|
674
|
+
const idColA =
|
|
675
|
+
typeof args["id_col_a"] === "string" ? (args["id_col_a"] as string) : "id_a";
|
|
676
|
+
const idColB =
|
|
677
|
+
typeof args["id_col_b"] === "string" ? (args["id_col_b"] as string) : "id_b";
|
|
678
|
+
const rows = readFile(path);
|
|
679
|
+
const gtRows = readFile(gtPath);
|
|
680
|
+
const options = buildDedupeOptions(args);
|
|
681
|
+
const result = dedupe(rows, options);
|
|
682
|
+
const truth = loadGroundTruthPairs(gtRows, idColA, idColB);
|
|
683
|
+
const metrics = evaluatePairs(result.scoredPairs, truth);
|
|
684
|
+
return {
|
|
685
|
+
tp: metrics.truePositives,
|
|
686
|
+
fp: metrics.falsePositives,
|
|
687
|
+
fn: metrics.falseNegatives,
|
|
688
|
+
precision: metrics.precision,
|
|
689
|
+
recall: metrics.recall,
|
|
690
|
+
f1: metrics.f1,
|
|
691
|
+
total_predicted: result.scoredPairs.length,
|
|
692
|
+
total_truth: truth.length,
|
|
693
|
+
};
|
|
694
|
+
}
|
|
695
|
+
|
|
696
|
+
case "find_exact_matches": {
|
|
697
|
+
const path = sanitizePath(String(args["path"]));
|
|
698
|
+
const field = String(args["field"]);
|
|
699
|
+
const transforms = asStringArray(args["transforms"]) ?? ["lowercase", "strip"];
|
|
700
|
+
const rows = addRowIds(readFile(path));
|
|
701
|
+
const mk = makeMatchkeyConfig({
|
|
702
|
+
name: "adhoc_exact",
|
|
703
|
+
type: "exact",
|
|
704
|
+
fields: [makeMatchkeyField({ field, transforms, scorer: "exact" })],
|
|
705
|
+
});
|
|
706
|
+
const pairs = findExactMatches(rows, mk);
|
|
707
|
+
return {
|
|
708
|
+
pair_count: pairs.length,
|
|
709
|
+
pairs: pairs.slice(0, 100).map((p) => [p.idA, p.idB, p.score]),
|
|
710
|
+
};
|
|
711
|
+
}
|
|
712
|
+
|
|
713
|
+
case "find_fuzzy_matches": {
|
|
714
|
+
const path = sanitizePath(String(args["path"]));
|
|
715
|
+
const field = String(args["field"]);
|
|
716
|
+
const scorer =
|
|
717
|
+
typeof args["scorer"] === "string" ? (args["scorer"] as string) : "jaro_winkler";
|
|
718
|
+
const threshold =
|
|
719
|
+
typeof args["threshold"] === "number" ? (args["threshold"] as number) : 0.85;
|
|
720
|
+
const transforms = asStringArray(args["transforms"]) ?? ["lowercase", "strip"];
|
|
721
|
+
const rows = addRowIds(readFile(path));
|
|
722
|
+
const mk = makeMatchkeyConfig({
|
|
723
|
+
name: "adhoc_fuzzy",
|
|
724
|
+
type: "weighted",
|
|
725
|
+
fields: [makeMatchkeyField({ field, transforms, scorer })],
|
|
726
|
+
threshold,
|
|
727
|
+
});
|
|
728
|
+
const pairs = findFuzzyMatches(rows, mk);
|
|
729
|
+
return {
|
|
730
|
+
pair_count: pairs.length,
|
|
731
|
+
pairs: pairs.slice(0, 100).map((p) => [p.idA, p.idB, p.score]),
|
|
732
|
+
};
|
|
733
|
+
}
|
|
734
|
+
|
|
735
|
+
case "build_clusters": {
|
|
736
|
+
const path = sanitizePath(String(args["path"]));
|
|
737
|
+
const options = buildDedupeOptions(args);
|
|
738
|
+
const rows = readFile(path);
|
|
739
|
+
const result = dedupe(rows, options);
|
|
740
|
+
const clusters: Array<{
|
|
741
|
+
cluster_id: number;
|
|
742
|
+
size: number;
|
|
743
|
+
confidence: number;
|
|
744
|
+
quality: string;
|
|
745
|
+
members: readonly number[];
|
|
746
|
+
}> = [];
|
|
747
|
+
for (const [cid, info] of result.clusters.entries()) {
|
|
748
|
+
clusters.push({
|
|
749
|
+
cluster_id: cid,
|
|
750
|
+
size: info.size,
|
|
751
|
+
confidence: info.confidence,
|
|
752
|
+
quality: info.clusterQuality,
|
|
753
|
+
members: info.members,
|
|
754
|
+
});
|
|
755
|
+
}
|
|
756
|
+
return {
|
|
757
|
+
cluster_count: clusters.length,
|
|
758
|
+
clusters: clusters.slice(0, 200),
|
|
759
|
+
};
|
|
760
|
+
}
|
|
761
|
+
|
|
762
|
+
case "list_scorers":
|
|
763
|
+
return { scorers: [...VALID_SCORERS] };
|
|
764
|
+
|
|
765
|
+
case "list_transforms":
|
|
766
|
+
return { transforms: [...VALID_TRANSFORMS] };
|
|
767
|
+
|
|
768
|
+
case "list_strategies":
|
|
769
|
+
return { strategies: [...VALID_STRATEGIES] };
|
|
770
|
+
|
|
771
|
+
case "list_blocking_strategies":
|
|
772
|
+
return {
|
|
773
|
+
strategies: [
|
|
774
|
+
"static",
|
|
775
|
+
"adaptive",
|
|
776
|
+
"sorted_neighborhood",
|
|
777
|
+
"multi_pass",
|
|
778
|
+
"ann",
|
|
779
|
+
"canopy",
|
|
780
|
+
"ann_pairs",
|
|
781
|
+
"learned",
|
|
782
|
+
],
|
|
783
|
+
};
|
|
784
|
+
|
|
785
|
+
case "server_info":
|
|
786
|
+
return {
|
|
787
|
+
name: "goldenmatch-js",
|
|
788
|
+
version: "0.1.0",
|
|
789
|
+
tool_count: TOOLS.length,
|
|
790
|
+
description:
|
|
791
|
+
"Node-only GoldenMatch MCP server over stdio (JSON-RPC 2.0)",
|
|
792
|
+
};
|
|
793
|
+
|
|
794
|
+
case "read_file": {
|
|
795
|
+
const path = sanitizePath(String(args["path"]));
|
|
796
|
+
const limit =
|
|
797
|
+
typeof args["limit"] === "number" ? Math.max(0, Math.floor(args["limit"] as number)) : 100;
|
|
798
|
+
const rows = readFile(path);
|
|
799
|
+
return {
|
|
800
|
+
total: rows.length,
|
|
801
|
+
returned: Math.min(rows.length, limit),
|
|
802
|
+
rows: rows.slice(0, limit),
|
|
803
|
+
};
|
|
804
|
+
}
|
|
805
|
+
|
|
806
|
+
case "write_csv": {
|
|
807
|
+
const path = sanitizePath(String(args["path"]));
|
|
808
|
+
const rowsArg = args["rows"];
|
|
809
|
+
if (!Array.isArray(rowsArg)) {
|
|
810
|
+
throw new Error("rows must be an array of objects");
|
|
811
|
+
}
|
|
812
|
+
writeCsv(path, rowsArg as Row[]);
|
|
813
|
+
return { written: rowsArg.length, path };
|
|
814
|
+
}
|
|
815
|
+
|
|
816
|
+
default:
|
|
817
|
+
return { error: `Unknown tool: ${name}` };
|
|
818
|
+
}
|
|
819
|
+
} catch (err) {
|
|
820
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
821
|
+
return { error: msg };
|
|
822
|
+
}
|
|
823
|
+
}
|
|
824
|
+
|
|
825
|
+
// ---------------------------------------------------------------------------
|
|
826
|
+
// JSON-RPC over stdio
|
|
827
|
+
// ---------------------------------------------------------------------------
|
|
828
|
+
|
|
829
|
+
interface JsonRpcRequest {
|
|
830
|
+
jsonrpc?: string;
|
|
831
|
+
id?: number | string | null;
|
|
832
|
+
method?: string;
|
|
833
|
+
params?: Record<string, unknown>;
|
|
834
|
+
}
|
|
835
|
+
|
|
836
|
+
function writeMessage(msg: Record<string, unknown>): void {
|
|
837
|
+
process.stdout.write(JSON.stringify(msg) + "\n");
|
|
838
|
+
}
|
|
839
|
+
|
|
840
|
+
/**
|
|
841
|
+
* Start the MCP server reading JSON-RPC messages one per line from stdin
|
|
842
|
+
* and writing responses to stdout. Intended for Claude Desktop / any MCP
|
|
843
|
+
* client using stdio transport.
|
|
844
|
+
*
|
|
845
|
+
* Unknown methods return a JSON-RPC error. Bad JSON is logged to stderr
|
|
846
|
+
* (via console.warn) but does not crash the loop.
|
|
847
|
+
*/
|
|
848
|
+
export function startMcpServer(): void {
|
|
849
|
+
const rl = createInterface({ input: process.stdin, terminal: false });
|
|
850
|
+
|
|
851
|
+
rl.on("line", (line: string) => {
|
|
852
|
+
if (line.trim() === "") return;
|
|
853
|
+
let req: JsonRpcRequest;
|
|
854
|
+
try {
|
|
855
|
+
req = JSON.parse(line) as JsonRpcRequest;
|
|
856
|
+
} catch (err) {
|
|
857
|
+
console.warn(
|
|
858
|
+
"MCP parse error:",
|
|
859
|
+
err instanceof Error ? err.message : String(err),
|
|
860
|
+
);
|
|
861
|
+
return;
|
|
862
|
+
}
|
|
863
|
+
|
|
864
|
+
const id = req.id ?? null;
|
|
865
|
+
|
|
866
|
+
void (async () => {
|
|
867
|
+
try {
|
|
868
|
+
if (req.method === "initialize") {
|
|
869
|
+
writeMessage({
|
|
870
|
+
jsonrpc: "2.0",
|
|
871
|
+
id,
|
|
872
|
+
result: {
|
|
873
|
+
protocolVersion: "2024-11-05",
|
|
874
|
+
serverInfo: { name: "goldenmatch-js", version: "0.1.0" },
|
|
875
|
+
capabilities: { tools: {} },
|
|
876
|
+
},
|
|
877
|
+
});
|
|
878
|
+
return;
|
|
879
|
+
}
|
|
880
|
+
|
|
881
|
+
if (req.method === "tools/list") {
|
|
882
|
+
writeMessage({
|
|
883
|
+
jsonrpc: "2.0",
|
|
884
|
+
id,
|
|
885
|
+
result: { tools: TOOLS },
|
|
886
|
+
});
|
|
887
|
+
return;
|
|
888
|
+
}
|
|
889
|
+
|
|
890
|
+
if (req.method === "tools/call") {
|
|
891
|
+
const params = req.params ?? {};
|
|
892
|
+
const toolName = String(params["name"] ?? "");
|
|
893
|
+
const toolArgs =
|
|
894
|
+
(params["arguments"] as Record<string, unknown> | undefined) ?? {};
|
|
895
|
+
const result = await handleTool(toolName, toolArgs);
|
|
896
|
+
writeMessage({
|
|
897
|
+
jsonrpc: "2.0",
|
|
898
|
+
id,
|
|
899
|
+
result: {
|
|
900
|
+
content: [
|
|
901
|
+
{ type: "text", text: JSON.stringify(result) },
|
|
902
|
+
],
|
|
903
|
+
},
|
|
904
|
+
});
|
|
905
|
+
return;
|
|
906
|
+
}
|
|
907
|
+
|
|
908
|
+
if (
|
|
909
|
+
req.method === "notifications/initialized" ||
|
|
910
|
+
req.method === "notifications/cancelled"
|
|
911
|
+
) {
|
|
912
|
+
// No response to notifications.
|
|
913
|
+
return;
|
|
914
|
+
}
|
|
915
|
+
|
|
916
|
+
writeMessage({
|
|
917
|
+
jsonrpc: "2.0",
|
|
918
|
+
id,
|
|
919
|
+
error: { code: -32601, message: `Method not found: ${req.method}` },
|
|
920
|
+
});
|
|
921
|
+
} catch (err) {
|
|
922
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
923
|
+
writeMessage({
|
|
924
|
+
jsonrpc: "2.0",
|
|
925
|
+
id,
|
|
926
|
+
error: { code: -32603, message: msg },
|
|
927
|
+
});
|
|
928
|
+
}
|
|
929
|
+
})();
|
|
930
|
+
});
|
|
931
|
+
|
|
932
|
+
rl.on("close", () => {
|
|
933
|
+
// Clean exit when stdin closes.
|
|
934
|
+
process.exit(0);
|
|
935
|
+
});
|
|
936
|
+
}
|
|
937
|
+
|
|
938
|
+
// Re-export for callers that want to pre-warm / test
|
|
939
|
+
export { readFileSync, isAbsolute };
|
|
940
|
+
export { writeJson };
|