@samesake/cli 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 octalpixel
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in
13
+ all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21
+ THE SOFTWARE.
package/README.md ADDED
@@ -0,0 +1,9 @@
1
+ # @samesake/cli
2
+
3
+ Operations CLI for samesake — apply schemas, seed data, match queries, calibrate thresholds, and manage aliases over HTTP.
4
+
5
+ ```bash
6
+ bun add -g @samesake/cli
7
+ ```
8
+
9
+ See the [samesake README](https://github.com/asyncdotengineering/samesake#readme) for setup, examples, and docs.
package/dist/index.js ADDED
@@ -0,0 +1,808 @@
1
+ #!/usr/bin/env node
2
+
3
+ // src/index.ts
4
+ import { createMatcher, prepareMigrations } from "@samesake/server";
5
+ import { readFileSync, existsSync, writeFileSync, watch } from "fs";
6
+ import { basename, dirname, join, resolve } from "path";
7
+ import { pathToFileURL } from "url";
8
+ var args = process.argv.slice(2);
9
+ var cmd = args[0];
10
+ var URL = process.env.SAMESAKE_URL ?? "http://localhost:3030";
11
+ var KEY = process.env.SAMESAKE_API_KEY ?? "dev-key-please-change";
12
+ var PROJECT = process.env.SAMESAKE_PROJECT;
13
+ function header() {
14
+ return { Authorization: `Bearer ${KEY}` };
15
+ }
16
+ function fail(msg) {
17
+ console.error(`error: ${msg}`);
18
+ process.exit(1);
19
+ }
20
+ function parseFlags(rest) {
21
+ const out = {};
22
+ for (let i = 0; i < rest.length; i++) {
23
+ const a = rest[i];
24
+ if (a.startsWith("--")) {
25
+ const eq = a.indexOf("=");
26
+ if (eq > 0) {
27
+ out[a.slice(2, eq)] = a.slice(eq + 1);
28
+ } else {
29
+ const next = rest[i + 1];
30
+ if (next && !next.startsWith("--")) {
31
+ out[a.slice(2)] = next;
32
+ i++;
33
+ } else {
34
+ out[a.slice(2)] = "true";
35
+ }
36
+ }
37
+ }
38
+ }
39
+ return out;
40
+ }
41
+ function parseScopeArgs(rest) {
42
+ const out = {};
43
+ for (let i = 0; i < rest.length; i++) {
44
+ const a = rest[i];
45
+ if (a === "--scope" || a.startsWith("--scope=")) {
46
+ const v = a.startsWith("--scope=") ? a.slice("--scope=".length) : rest[++i];
47
+ if (v && v.includes("=")) {
48
+ const [k, val] = v.split("=", 2);
49
+ if (k && val !== void 0) out[k] = val;
50
+ }
51
+ }
52
+ }
53
+ return out;
54
+ }
55
+ async function get(path) {
56
+ const r = await fetch(`${URL}${path}`, { headers: header() });
57
+ const body = await r.json();
58
+ if (!r.ok) fail(`GET ${path} failed: ${JSON.stringify(body)}`);
59
+ return body;
60
+ }
61
+ async function post(path, body) {
62
+ const r = await fetch(`${URL}${path}`, {
63
+ method: "POST",
64
+ headers: { ...header(), "Content-Type": "application/json" },
65
+ body: JSON.stringify(body)
66
+ });
67
+ const resp = await r.json();
68
+ if (!r.ok) fail(`POST ${path} failed: ${JSON.stringify(resp)}`);
69
+ return resp;
70
+ }
71
+ async function cmdHelp() {
72
+ console.log(`
73
+ samesake \u2014 commerce search and entity resolution CLI
74
+
75
+ USAGE
76
+ samesake <command> [options]
77
+
78
+ PROJECT LIFECYCLE
79
+ init --name=NAME [--out=PATH] Scaffold a new samesake.config.ts
80
+ apply --project=NAME --config=PATH Apply schema to a project
81
+ list-projects List every applied project
82
+ seed --project=NAME --file=PATH Load JSON test data
83
+
84
+ MATCHING & FEEDBACK
85
+ match --project=NAME --kind=K --text=T --scope k=v
86
+ [--limit=N] [--json] Run a single match
87
+ explain --project=NAME --kind=K --query-text=T --candidate-id=ID
88
+ --scope k=v [--phone=P] [--json] Per-channel scoring breakdown
89
+ confirm --project=NAME --kind=K --query-text=T --chosen=ID --scope k=v
90
+ Mark a candidate as correct (writes alias)
91
+ decline --project=NAME --kind=K --query-text=T --declined=ID --scope k=v
92
+ Mark a candidate as wrong (penalty)
93
+
94
+ ANALYSIS
95
+ calibrate --project=NAME --kind=K --scope k=v [--min-sample=N] [--json]
96
+ F1-optimise the auto-link threshold
97
+ duplicates --project=NAME [--kind=K] [--scope k=v]
98
+ [--score-floor=0.95] [--min-cluster=2] [--limit=100] [--json]
99
+ List dedup clusters
100
+ variants --project=NAME [--kind=K] [--scope k=v]
101
+ [--min-cluster=3] [--limit=50] [--json]
102
+ List variant suggestions (parse-shape only)
103
+
104
+ DEV & EVAL
105
+ dev --config=PATH --project=NAME [--port=8788]
106
+ Load config, migrate+apply, serve matcher on port, watch+re-apply on change
107
+ eval --golden=FILE --project=NAME --collection=COL [--base=URL]
108
+ Run golden queries against search (retrieval only \u2014 no LLM judge)
109
+
110
+ OPERATIONS
111
+ healthz Check matcher health
112
+ doctor Full env + service + projects health report
113
+ migrate --db=URL [--schema=public] Apply system DDL directly to Postgres (no matcher needed).
114
+ Run BEFORE booting the app \u2014 the prisma-migrate-deploy /
115
+ drizzle-kit-push pattern. Idempotent, safe in CI.
116
+ migrate --project=NAME --config=PATH --plan Show collection schema migration plan (dry-run).
117
+ migrate --project=NAME --config=PATH --apply Apply collection schema migrations.
118
+ [--allow-destructive] [--db=URL]
119
+
120
+ SEARCH PIPELINE
121
+ ingest --project=NAME --collection=COL Pull configured sources into collection
122
+ enrich --project=NAME --collection=COL Run enrichment pipeline on pending docs
123
+ [--concurrency=N] [--limit=N]
124
+ index --project=NAME --collection=COL Embed + populate filter columns
125
+ search-explain --project=NAME --collection=COL --q=QUERY [--json]
126
+ Per-channel search ranking breakdown
127
+ rotate-key --project=NAME Issue a new per-project API key (master only)
128
+ review-list --project=NAME --collection=COL [--limit=20] [--max-confidence=0.7]
129
+ List low-confidence enrichments for review
130
+ review-correct --project=NAME --collection=COL --id=DOC --field=value [...]
131
+ Apply human corrections (arrays comma-separated)
132
+ [--limit=N]
133
+
134
+ GLOBAL ENV
135
+ SAMESAKE_URL (default http://localhost:3030)
136
+ SAMESAKE_API_KEY (default dev-key-please-change)
137
+ SAMESAKE_PROJECT default --project for every command
138
+ SAMESAKE_DATABASE_URL used by 'migrate' if --db is omitted
139
+ SAMESAKE_SCHEMA used by 'migrate' if --schema is omitted (default "public")
140
+
141
+ EXAMPLES
142
+ # Deploy pipeline: migrate first, then start the app.
143
+ samesake migrate --db=$DATABASE_URL --schema=public
144
+ bun apps/matcher/src/index.ts &
145
+
146
+ # Author + use a project
147
+ samesake init --name=mystore --out=./samesake.config.ts
148
+ samesake apply --project=hello --config=examples/hello/samesake.config.ts
149
+ samesake seed --project=hello --file=examples/hello/seed.json
150
+ samesake match --project=hello --kind=customer --text="Smyth" --scope tenantId=acme
151
+ samesake explain --project=hello --kind=customer --query-text=Smyth --candidate-id=1 --scope tenantId=acme
152
+ samesake calibrate --project=hello --kind=customer --scope tenantId=acme
153
+ samesake doctor
154
+ `);
155
+ }
156
+ async function cmdHealthz() {
157
+ const r = await fetch(`${URL}/v1/healthz`);
158
+ const body = await r.json();
159
+ console.log(JSON.stringify(body, null, 2));
160
+ }
161
+ async function cmdApply(flags) {
162
+ const project = flags.project ?? PROJECT ?? fail("--project is required");
163
+ const configPath = flags.config ?? fail("--config is required");
164
+ const abs = resolve(configPath);
165
+ if (!existsSync(abs)) fail(`config not found: ${abs}`);
166
+ const mod = await import(pathToFileURL(abs).href);
167
+ const entities = [];
168
+ for (const v of Object.values(mod)) {
169
+ if (v && typeof v === "object" && "name" in v && "fields" in v && "scopes" in v) {
170
+ entities.push(v);
171
+ }
172
+ }
173
+ if (entities.length === 0) fail("no entities exported from config file");
174
+ console.log(`Applying ${entities.length} entit${entities.length === 1 ? "y" : "ies"} to project '${project}'...`);
175
+ const body = await post(`/v1/projects/${project}/schema/apply`, { entities });
176
+ console.log(`\u2713 Applied schema to ${body.schema}`);
177
+ console.log(` - ${body.appliedStatements} DDL statements`);
178
+ console.log(` - entities: ${body.entities.join(", ")}`);
179
+ }
180
+ async function cmdSeed(flags) {
181
+ const project = flags.project ?? PROJECT ?? fail("--project is required");
182
+ const file = flags.file ?? fail("--file is required");
183
+ const abs = resolve(file);
184
+ if (!existsSync(abs)) fail(`file not found: ${abs}`);
185
+ const data = JSON.parse(readFileSync(abs, "utf8"));
186
+ console.log(`Seeding ${data.items.length} ${data.entityType} into '${project}'...`);
187
+ const start = Date.now();
188
+ const body = await post(
189
+ `/v1/projects/${project}/entities/${data.entityType}/upsert-batch`,
190
+ { items: data.items }
191
+ );
192
+ const dur = ((Date.now() - start) / 1e3).toFixed(2);
193
+ console.log(`\u2713 ${body.ids.length} rows seeded in ${dur}s`);
194
+ }
195
+ async function cmdMatch(flags, rest) {
196
+ const project = flags.project ?? PROJECT ?? fail("--project is required");
197
+ const kind = flags.kind ?? fail("--kind is required");
198
+ const text = flags.text ?? fail("--text is required");
199
+ const scope = parseScopeArgs(rest);
200
+ const limit = flags.limit ? Number(flags.limit) : 5;
201
+ const m = await post(`/v1/projects/${project}/match`, {
202
+ kind,
203
+ text,
204
+ scope,
205
+ opts: { limit }
206
+ });
207
+ if (flags.json === "true") {
208
+ console.log(JSON.stringify(m, null, 2));
209
+ return;
210
+ }
211
+ console.log(`Top ${m.candidates.length} candidates for "${text}" (scope: ${JSON.stringify(scope)})`);
212
+ console.log("");
213
+ for (let i = 0; i < m.candidates.length; i++) {
214
+ const c = m.candidates[i];
215
+ const name = c.name.length > 36 ? c.name.slice(0, 33) + "..." : c.name;
216
+ console.log(
217
+ ` ${i + 1}. [id=${c.entityId.padStart(3)}] ${name.padEnd(36)} combined: ${c.combined.toFixed(3)} cos: ${c.components.cosSim?.toFixed(2) ?? " --"} trgm: ${c.components.trgmSim.toFixed(2)} phon: ${c.components.phonEq ? "\u2713" : "\xB7"} alias: ${c.components.aliasHit ? "\u2713" : "\xB7"}`
218
+ );
219
+ }
220
+ if (m.resolved) {
221
+ console.log("");
222
+ console.log(`Resolved: ${m.resolved.entityId} (auto-link, confidence ${m.resolved.confidence.toFixed(3)})`);
223
+ }
224
+ }
225
+ async function cmdExplain(flags, rest) {
226
+ const project = flags.project ?? PROJECT ?? fail("--project is required");
227
+ const kind = flags.kind ?? fail("--kind is required");
228
+ const queryText = flags["query-text"] ?? flags.text ?? fail("--query-text is required");
229
+ const candidateId = flags["candidate-id"] ?? flags.candidate ?? fail("--candidate-id is required");
230
+ const scope = parseScopeArgs(rest);
231
+ const phone = flags.phone;
232
+ const r = await post(`/v1/projects/${project}/explain`, {
233
+ kind,
234
+ queryText,
235
+ candidateId,
236
+ scope,
237
+ phone
238
+ });
239
+ if (flags.json === "true") {
240
+ console.log(JSON.stringify(r, null, 2));
241
+ return;
242
+ }
243
+ console.log(`Query: "${r.query.text}" \u2192 normalised "${r.query.normalised}"`);
244
+ console.log(`Candidate: ${r.candidate.name} (id=${r.candidate.entityId})`);
245
+ console.log("");
246
+ console.log(`Channel Value Weight Contribution`);
247
+ console.log(`\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500 \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500 \u2500\u2500\u2500\u2500\u2500\u2500 \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500`);
248
+ const fmt = (v) => {
249
+ if (v === null) return "(null)";
250
+ if (typeof v === "boolean") return v ? "true" : "false";
251
+ return v.toFixed(3);
252
+ };
253
+ const row = (label, s) => {
254
+ console.log(`${label.padEnd(14)} ${fmt(s.value).padEnd(14)} ${s.weight.toFixed(2).padEnd(8)} ${s.contribution.toFixed(3)}`);
255
+ };
256
+ row("cosine", r.scores.cosSim);
257
+ row("trigram", r.scores.trgmSim);
258
+ row("phonetic-eq", r.scores.phonEq);
259
+ row("phone-exact", r.scores.phoneEq);
260
+ row("alias-hit", r.scores.aliasHit);
261
+ console.log("");
262
+ console.log(`Combined: ${r.combined.toFixed(3)}`);
263
+ console.log(`Decision: ${r.decision} (auto-link \u2265 ${r.thresholds.autoLink}, suggest \u2265 ${r.thresholds.suggest})`);
264
+ if (r.decisiveChannels.length > 0) {
265
+ console.log(`Decisive channels: ${r.decisiveChannels.join(", ")}`);
266
+ }
267
+ }
268
+ async function cmdConfirm(flags, rest) {
269
+ const project = flags.project ?? PROJECT ?? fail("--project is required");
270
+ const kind = flags.kind ?? fail("--kind is required");
271
+ const queryText = flags["query-text"] ?? flags.text ?? fail("--query-text is required");
272
+ const chosen = flags.chosen ?? null;
273
+ const scope = parseScopeArgs(rest);
274
+ const body = await post(`/v1/projects/${project}/confirm`, {
275
+ kind,
276
+ queryText,
277
+ scope,
278
+ chosenEntityId: chosen
279
+ });
280
+ console.log(`\u2713 ${JSON.stringify(body)}`);
281
+ }
282
+ async function cmdDecline(flags, rest) {
283
+ const project = flags.project ?? PROJECT ?? fail("--project is required");
284
+ const kind = flags.kind ?? fail("--kind is required");
285
+ const queryText = flags["query-text"] ?? flags.text ?? fail("--query-text is required");
286
+ const declined = flags.declined ?? flags.declinedId ?? fail("--declined=ID is required");
287
+ const scope = parseScopeArgs(rest);
288
+ const body = await post(`/v1/projects/${project}/decline`, {
289
+ kind,
290
+ queryText,
291
+ scope,
292
+ declinedEntityId: declined
293
+ });
294
+ console.log(`\u2713 ${JSON.stringify(body)}`);
295
+ }
296
+ async function cmdCalibrate(flags, rest) {
297
+ const project = flags.project ?? PROJECT ?? fail("--project is required");
298
+ const kind = flags.kind ?? fail("--kind is required");
299
+ const scope = parseScopeArgs(rest);
300
+ const minSampleSize = flags["min-sample"] ? Number(flags["min-sample"]) : void 0;
301
+ const r = await post(`/v1/projects/${project}/calibrate`, {
302
+ kind,
303
+ scope,
304
+ minSampleSize
305
+ });
306
+ if (flags.json === "true") {
307
+ console.log(JSON.stringify(r, null, 2));
308
+ return;
309
+ }
310
+ console.log(`Calibrated auto-link threshold for ${kind} @ ${JSON.stringify(scope)}`);
311
+ console.log("");
312
+ console.log(` threshold: ${r.threshold.toFixed(3)}`);
313
+ console.log(` F1: ${r.f1.toFixed(3)}`);
314
+ console.log(` precision: ${r.precision.toFixed(3)}`);
315
+ console.log(` recall: ${r.recall.toFixed(3)}`);
316
+ console.log(` sample size: ${r.sampleSize} (${r.positives} positives, ${r.negatives} negatives)`);
317
+ }
318
+ async function cmdDuplicates(flags, rest) {
319
+ const project = flags.project ?? PROJECT ?? fail("--project is required");
320
+ const kind = flags.kind ?? "customer";
321
+ const scope = parseScopeArgs(rest);
322
+ const params = new URLSearchParams();
323
+ params.set("kind", kind);
324
+ if (Object.keys(scope).length > 0) params.set("scope", JSON.stringify(scope));
325
+ if (flags["score-floor"]) params.set("scoreFloor", flags["score-floor"]);
326
+ if (flags["min-cluster"]) params.set("minClusterSize", flags["min-cluster"]);
327
+ if (flags.limit) params.set("limit", flags.limit);
328
+ const r = await get(`/v1/projects/${project}/duplicates?${params.toString()}`);
329
+ if (flags.json === "true") {
330
+ console.log(JSON.stringify(r, null, 2));
331
+ return;
332
+ }
333
+ if (r.clusters.length === 0) {
334
+ console.log("No duplicate clusters above floor.");
335
+ return;
336
+ }
337
+ console.log(`${r.clusters.length} cluster${r.clusters.length === 1 ? "" : "s"} for ${kind}:`);
338
+ for (const c of r.clusters) {
339
+ console.log(`
340
+ cluster (n=${c.totalCount}, min-score=${c.estimatedConfidence.toFixed(3)})`);
341
+ for (const m of c.members) {
342
+ console.log(` [id=${m.entityId.padStart(3)}] ${m.name}`);
343
+ }
344
+ }
345
+ }
346
+ async function cmdVariants(flags, rest) {
347
+ const project = flags.project ?? PROJECT ?? fail("--project is required");
348
+ const kind = flags.kind ?? "asset";
349
+ const scope = parseScopeArgs(rest);
350
+ const params = new URLSearchParams();
351
+ params.set("kind", kind);
352
+ if (Object.keys(scope).length > 0) params.set("scope", JSON.stringify(scope));
353
+ if (flags["min-cluster"]) params.set("minClusterSize", flags["min-cluster"]);
354
+ if (flags.limit) params.set("limit", flags.limit);
355
+ const r = await get(`/v1/projects/${project}/variant-suggestions?${params.toString()}`);
356
+ if (flags.json === "true") {
357
+ console.log(JSON.stringify(r, null, 2));
358
+ return;
359
+ }
360
+ if (r.suggestions.length === 0) {
361
+ console.log("No variant suggestions. (Only parse-shape entities produce these.)");
362
+ return;
363
+ }
364
+ console.log(`${r.suggestions.length} variant suggestion${r.suggestions.length === 1 ? "" : "s"}:`);
365
+ for (const s of r.suggestions) {
366
+ console.log(`
367
+ ${s.proposedBase.suggestedName} (${s.totalCount} members)`);
368
+ const axes = s.detectedAxes.map((a) => `${a.axis}:[${a.distinctValues.join(",")}]`).join(" ");
369
+ if (axes) console.log(` axes: ${axes}`);
370
+ for (const m of s.members) {
371
+ const sz = m.size.value !== null ? `${m.size.value}${m.size.unit ?? ""}` : "";
372
+ console.log(` [id=${m.entityId.padStart(3)}] ${m.name.padEnd(40)} variant=${m.variant ?? "\xB7"} size=${sz || "\xB7"}`);
373
+ }
374
+ }
375
+ }
376
+ async function cmdListProjects(flags) {
377
+ const r = await get(`/v1/projects`);
378
+ if (flags.json === "true") {
379
+ console.log(JSON.stringify(r, null, 2));
380
+ return;
381
+ }
382
+ if (r.projects.length === 0) {
383
+ console.log("No projects applied yet. Use `samesake apply` to create one.");
384
+ return;
385
+ }
386
+ console.log(`${r.projects.length} project${r.projects.length === 1 ? "" : "s"} applied:`);
387
+ for (const p of r.projects) {
388
+ const date = p.updatedAt.slice(0, 10);
389
+ const ents = p.entities.length > 0 ? p.entities.join(", ") : "(no entities)";
390
+ console.log(` ${p.slug.padEnd(24)} ${date} ${p.entities.length} entit${p.entities.length === 1 ? "y" : "ies"}: ${ents}`);
391
+ }
392
+ }
393
+ async function cmdSearchExplain(flags) {
394
+ const project = flags.project ?? PROJECT ?? fail("--project is required");
395
+ const collection = flags.collection ?? fail("--collection is required");
396
+ const q = flags.q ?? fail("--q is required");
397
+ const body = await post(
398
+ `/v1/projects/${project}/collections/${collection}/search/explain`,
399
+ { q, limit: flags.limit ? Number(flags.limit) : void 0 }
400
+ );
401
+ if (flags.json === "true") {
402
+ console.log(JSON.stringify(body, null, 2));
403
+ return;
404
+ }
405
+ console.log(`explain: ${q}`);
406
+ const docs = body.docs ?? [];
407
+ for (const d of docs.slice(0, 10)) {
408
+ console.log(
409
+ ` id=${d.id} rrf=${Number(d.rrf_score).toFixed(4)} fts=${d.fts_rank ?? "\xB7"} cos=${d.cosine_rank ?? "\xB7"} spc=${d.spaces_rank ?? "\xB7"}`
410
+ );
411
+ }
412
+ }
413
+ async function cmdRotateKey(flags) {
414
+ const project = flags.project ?? PROJECT ?? fail("--project is required");
415
+ const body = await post(`/v1/projects/${project}/rotate-key`, {});
416
+ console.log(body.apiKey);
417
+ }
418
+ async function cmdIngest(flags) {
419
+ const project = flags.project ?? PROJECT ?? fail("--project is required");
420
+ const collection = flags.collection ?? fail("--collection is required");
421
+ const body = await post(
422
+ `/v1/projects/${project}/collections/${collection}/ingest`,
423
+ {}
424
+ );
425
+ console.log(`\u2713 ingested ${body.upserted} documents${body.connectors ? ` from ${body.connectors.join(", ")}` : ""}`);
426
+ }
427
+ async function cmdEnrich(flags) {
428
+ const project = flags.project ?? PROJECT ?? fail("--project is required");
429
+ const collection = flags.collection ?? fail("--collection is required");
430
+ const body = await post(
431
+ `/v1/projects/${project}/collections/${collection}/enrich`,
432
+ {
433
+ concurrency: flags.concurrency ? Number(flags.concurrency) : void 0,
434
+ limit: flags.limit ? Number(flags.limit) : void 0
435
+ }
436
+ );
437
+ console.log(`\u2713 enriched ${body.enriched} (skipped ${body.skipped}, failed ${body.failed})`);
438
+ }
439
+ async function cmdReviewList(flags) {
440
+ const project = flags.project ?? PROJECT ?? fail("--project is required");
441
+ const collection = flags.collection ?? fail("--collection is required");
442
+ const qs = new URLSearchParams();
443
+ if (flags.limit) qs.set("limit", flags.limit);
444
+ if (flags["max-confidence"]) qs.set("max_confidence", flags["max-confidence"]);
445
+ const rows = await get(`/v1/projects/${project}/collections/${collection}/review?${qs}`);
446
+ if (!rows.length) {
447
+ console.log("no low-confidence enrichments \u2014 nothing to review");
448
+ return;
449
+ }
450
+ for (const r of rows) {
451
+ const conf = r.confidence == null ? " n/a" : r.confidence.toFixed(2);
452
+ const unc = r.uncertain_fields.length ? ` uncertain: ${r.uncertain_fields.join(",")}` : "";
453
+ console.log(`${r.id.padEnd(10)} conf=${conf} ${String(r.category).padEnd(12)} ${(r.title ?? "").slice(0, 50)}${unc}${r.corrected ? " [corrected]" : ""}`);
454
+ }
455
+ }
456
+ async function cmdReviewCorrect(flags) {
457
+ const project = flags.project ?? PROJECT ?? fail("--project is required");
458
+ const collection = flags.collection ?? fail("--collection is required");
459
+ const id = flags.id ?? fail("--id is required");
460
+ const fields = {};
461
+ for (const [k, v] of Object.entries(flags)) {
462
+ if (["project", "collection", "id"].includes(k)) continue;
463
+ fields[k] = v.includes(",") ? v.split(",").map((x) => x.trim()) : v;
464
+ }
465
+ if (!Object.keys(fields).length) fail("supply corrections as --field=value (arrays comma-separated)");
466
+ const body = await post(
467
+ `/v1/projects/${project}/collections/${collection}/review/${id}`,
468
+ { fields }
469
+ );
470
+ console.log(`\u2713 corrected ${body.corrected.join(", ")} on ${id} (doc re-indexes on next \`index\` run)`);
471
+ }
472
+ async function cmdIndex(flags) {
473
+ const project = flags.project ?? PROJECT ?? fail("--project is required");
474
+ const collection = flags.collection ?? fail("--collection is required");
475
+ const body = await post(
476
+ `/v1/projects/${project}/collections/${collection}/index`,
477
+ { limit: flags.limit ? Number(flags.limit) : void 0 }
478
+ );
479
+ console.log(`\u2713 indexed ${body.indexed} documents`);
480
+ }
481
+ function stubDevEmbed(text, dim) {
482
+ const t = text ?? "";
483
+ const out = new Array(dim).fill(0);
484
+ for (let i = 0; i < t.length; i++) {
485
+ out[i % dim] = (out[i % dim] + t.charCodeAt(i) * 1e-3) % 1;
486
+ }
487
+ const norm = Math.sqrt(out.reduce((s, x) => s + x * x, 0)) || 1;
488
+ return out.map((x) => x / norm);
489
+ }
490
+ async function resolveDevEmbed(configPath) {
491
+ const abs = resolve(configPath);
492
+ const mod = await import(pathToFileURL(abs).href);
493
+ const direct = mod.embed ?? mod.embedFn;
494
+ if (typeof direct === "function") return direct;
495
+ const stubPath = join(dirname(abs), "stub-embed.ts");
496
+ if (existsSync(stubPath)) {
497
+ const stubMod = await import(pathToFileURL(stubPath).href);
498
+ if (typeof stubMod.stubEmbed === "function") {
499
+ return async ({ text, dim }) => stubMod.stubEmbed(text ?? "", dim);
500
+ }
501
+ }
502
+ return async ({ text, dim }) => stubDevEmbed(text, dim);
503
+ }
504
+ async function applyDevConfig(matcher, project, config, label) {
505
+ const dry = await matcher.apply(project, config, { dryRun: true });
506
+ console.log(`[dev] migration plan (${label}):`);
507
+ console.log(JSON.stringify(dry.plan, null, 2));
508
+ const applied = await matcher.apply(project, config, { dryRun: false });
509
+ console.log(`[dev] applied ${applied.appliedStatements} statements \u2192 ${applied.schema}`);
510
+ }
511
+ async function cmdDev(flags) {
512
+ const configPath = flags.config ?? fail("--config is required");
513
+ const project = flags.project ?? PROJECT ?? fail("--project is required");
514
+ const port = flags.port ? Number(flags.port) : 8788;
515
+ const databaseUrl = flags.db ?? process.env.DATABASE_URL ?? process.env.SAMESAKE_DATABASE_URL;
516
+ if (!databaseUrl) fail("DATABASE_URL required (or --db= / SAMESAKE_DATABASE_URL)");
517
+ const configAbs = resolve(configPath);
518
+ const embed = await resolveDevEmbed(configPath);
519
+ const matcher = createMatcher({
520
+ databaseUrl,
521
+ apiKey: flags["api-key"] ?? KEY,
522
+ embed,
523
+ migrate: "eager"
524
+ });
525
+ await matcher.migrate();
526
+ const config = await loadProjectConfig(configPath);
527
+ await applyDevConfig(matcher, project, config, "boot");
528
+ const server = Bun.serve({
529
+ port,
530
+ fetch: matcher.fetch
531
+ });
532
+ console.log(`[dev] listening on http://localhost:${port} (project=${project})`);
533
+ console.log(`[dev] watching ${configAbs}`);
534
+ const configBase = basename(configAbs);
535
+ let debounce = null;
536
+ const scheduleReapply = () => {
537
+ if (debounce) clearTimeout(debounce);
538
+ debounce = setTimeout(async () => {
539
+ try {
540
+ console.log("[dev] config changed \u2014 re-applying...");
541
+ const next = await loadProjectConfig(configPath);
542
+ await applyDevConfig(matcher, project, next, "watch");
543
+ } catch (e) {
544
+ console.error(`[dev] re-apply failed: ${e instanceof Error ? e.message : e}`);
545
+ }
546
+ }, 300);
547
+ };
548
+ if (typeof Bun !== "undefined" && "watch" in Bun && typeof Bun.watch === "function") {
549
+ Bun.watch(configAbs, { persistent: true }, () => scheduleReapply());
550
+ } else {
551
+ watch(dirname(configAbs), (_event, filename) => {
552
+ if (filename === configBase) scheduleReapply();
553
+ });
554
+ }
555
+ process.on("SIGINT", async () => {
556
+ server.stop();
557
+ await matcher.close();
558
+ process.exit(0);
559
+ });
560
+ await new Promise(() => {
561
+ });
562
+ }
563
+ async function cmdEval(flags) {
564
+ const goldenPath = flags.golden ?? fail("--golden is required");
565
+ const base = (flags.base ?? URL).replace(/\/$/, "");
566
+ const project = flags.project ?? PROJECT ?? fail("--project is required");
567
+ const collection = flags.collection ?? fail("--collection is required");
568
+ const apiKey = flags["api-key"] ?? KEY;
569
+ const abs = resolve(goldenPath);
570
+ if (!existsSync(abs)) fail(`golden file not found: ${abs}`);
571
+ const golden = JSON.parse(readFileSync(abs, "utf8"));
572
+ if (!golden.queries?.length) fail("golden file has no queries");
573
+ console.log(`eval: ${golden.queries.length} queries \u2192 ${base}/v1/projects/${project}/collections/${collection}/search`);
574
+ console.log("");
575
+ console.log(`${"id".padEnd(12)} ${"ms".padStart(6)} ${"hits".padStart(5)} top`);
576
+ console.log(`${"\u2500".repeat(12)} ${"\u2500".repeat(6)} ${"\u2500".repeat(5)} ${"\u2500".repeat(24)}`);
577
+ for (const gq of golden.queries) {
578
+ const url = `${base}/v1/projects/${project}/collections/${collection}/search?q=${encodeURIComponent(gq.query)}&limit=10`;
579
+ const start = Date.now();
580
+ const r = await fetch(url, { headers: { Authorization: `Bearer ${apiKey}` } });
581
+ const ms = Date.now() - start;
582
+ if (!r.ok) {
583
+ const body2 = await r.text();
584
+ fail(`query ${gq.id} failed (${r.status}): ${body2.slice(0, 200)}`);
585
+ }
586
+ const body = await r.json();
587
+ const hits = body.hits ?? [];
588
+ const top = hits[0]?.title ?? hits[0]?.id ?? "\u2014";
589
+ console.log(`${gq.id.padEnd(12)} ${String(ms).padStart(6)} ${String(hits.length).padStart(5)} ${String(top).slice(0, 40)}`);
590
+ }
591
+ console.log("");
592
+ console.log("Retrieval only \u2014 no LLM judge. Graded evals (ESCI, mean@10) belong in your consumer harness.");
593
+ console.log("Reference: docs/context/spike/eval-search.js");
594
+ }
595
+ async function loadProjectConfig(configPath) {
596
+ const abs = resolve(configPath);
597
+ if (!existsSync(abs)) fail(`config not found: ${abs}`);
598
+ const mod = await import(pathToFileURL(abs).href);
599
+ const entities = [];
600
+ const collections = [];
601
+ for (const v of Object.values(mod)) {
602
+ if (!v || typeof v !== "object" || !("name" in v) || !("fields" in v)) continue;
603
+ if ("scopes" in v) {
604
+ entities.push(v);
605
+ } else if ("search" in v || "embeddings" in v || "enrich" in v) {
606
+ collections.push(v);
607
+ }
608
+ }
609
+ return { entities, collections };
610
+ }
611
+ async function cmdMigrate(flags) {
612
+ const project = flags.project ?? PROJECT;
613
+ const configPath = flags.config;
614
+ const isProjectMigrate = !!(project && configPath && (flags.plan === "true" || flags.apply === "true"));
615
+ if (isProjectMigrate) {
616
+ const databaseUrl2 = flags.db ?? process.env.DATABASE_URL ?? process.env.SAMESAKE_DATABASE_URL;
617
+ if (!databaseUrl2) {
618
+ fail("--db=postgres://... required (or set DATABASE_URL / SAMESAKE_DATABASE_URL)");
619
+ }
620
+ const dryRun = flags.apply !== "true";
621
+ const config = await loadProjectConfig(configPath);
622
+ const matcher = createMatcher({
623
+ databaseUrl: databaseUrl2,
624
+ apiKey: flags["api-key"] ?? KEY,
625
+ migrate: "eager",
626
+ embed: async () => [0]
627
+ });
628
+ await matcher.migrate();
629
+ const r = await matcher.apply(project, config, {
630
+ dryRun,
631
+ allowDestructive: flags["allow-destructive"] === "true"
632
+ });
633
+ await matcher.close();
634
+ console.log(JSON.stringify({ schema: r.schema, dryRun: r.dryRun ?? dryRun, plan: r.plan, appliedStatements: r.appliedStatements }, null, 2));
635
+ if (!dryRun) {
636
+ console.log(`\u2713 applied ${r.appliedStatements} statements to ${r.schema}`);
637
+ }
638
+ return;
639
+ }
640
+ const databaseUrl = flags.db ?? process.env.SAMESAKE_DATABASE_URL;
641
+ if (!databaseUrl) {
642
+ fail("--db=postgres://... required (or set SAMESAKE_DATABASE_URL)");
643
+ }
644
+ const schema = flags.schema ?? process.env.SAMESAKE_SCHEMA ?? "public";
645
+ console.log(`Applying samesake system DDL to schema '${schema}'...`);
646
+ const start = Date.now();
647
+ await prepareMigrations({ databaseUrl, schema });
648
+ console.log(`\u2713 migrations applied in ${Date.now() - start}ms`);
649
+ }
650
+ async function cmdDoctor() {
651
+ console.log("samesake doctor\n");
652
+ console.log("Environment:");
653
+ console.log(` SAMESAKE_URL ${URL}`);
654
+ console.log(` SAMESAKE_API_KEY ${process.env.SAMESAKE_API_KEY ? "set" : "MISSING (using default \u2014 set it for production)"}`);
655
+ console.log(` SAMESAKE_PROJECT ${PROJECT ?? "(unset)"}
656
+ `);
657
+ console.log("Matcher health:");
658
+ try {
659
+ const h = await get("/v1/healthz");
660
+ console.log(` Status: ${h.status}`);
661
+ console.log(` Postgres: ${(h.postgres ?? "?").split(",")[0]}`);
662
+ console.log(` Extensions: ${h.extensions.join(", ") || "(none)"}`);
663
+ console.log(` Uptime: ${h.uptime_seconds}s
664
+ `);
665
+ } catch (e) {
666
+ console.log(` \u2717 Could not reach matcher at ${URL}`);
667
+ console.log(` ${e instanceof Error ? e.message : e}
668
+ `);
669
+ return;
670
+ }
671
+ try {
672
+ const r = await get("/v1/projects");
673
+ console.log(`Projects applied: ${r.projects.length}`);
674
+ for (const p of r.projects) {
675
+ console.log(` - ${p.slug.padEnd(24)} (${p.entities.length} entit${p.entities.length === 1 ? "y" : "ies"})`);
676
+ }
677
+ } catch (e) {
678
+ console.log(` \u2717 Could not list projects: ${e instanceof Error ? e.message : e}`);
679
+ }
680
+ }
681
+ var INIT_TEMPLATE = (name) => `// samesake.config.ts \u2014 entities for project '${name}'.
682
+ //
683
+ // Apply via:
684
+ // bunx samesake apply --project=${name} --config=./samesake.config.ts
685
+ import { entity, fields, Scorers, providers } from "@samesake/core";
686
+
687
+ export const customer = entity("customer", {
688
+ fields: {
689
+ name: fields.text({ required: true }),
690
+ phone: fields.text({ optional: true }),
691
+ },
692
+ scopes: ["tenantId"],
693
+ embeddings: {
694
+ name_emb: { source: "name", model: providers.gemini.embed001({ dim: 768 }) },
695
+ },
696
+ phonetic: {
697
+ name_phon: { source: "name", algorithm: "indic-soundex" },
698
+ },
699
+ scoring: {
700
+ channels: [
701
+ Scorers.phoneExact({ field: "phone", weight: 1.0 }),
702
+ Scorers.cosine({ embedding: "name_emb", weight: 0.6 }),
703
+ Scorers.trigram({ field: "name", weight: 0.25, latinOnlyPartial: true }),
704
+ Scorers.aliasHit({ weight: 0.4 }),
705
+ Scorers.phoneticEq({ phonetic: "name_phon", weight: 0.2 }),
706
+ ],
707
+ },
708
+ });
709
+ `;
710
+ async function cmdInit(flags) {
711
+ const name = flags.name ?? fail("--name is required (e.g. --name=mystore)");
712
+ if (!/^[a-z][a-z0-9_-]{0,62}$/i.test(name)) {
713
+ fail(`invalid project name: ${name} (must match /^[a-z][a-z0-9_-]+$/)`);
714
+ }
715
+ const out = resolve(flags.out ?? "./samesake.config.ts");
716
+ if (existsSync(out) && flags.force !== "true") {
717
+ fail(`${out} already exists \u2014 pass --force to overwrite`);
718
+ }
719
+ writeFileSync(out, INIT_TEMPLATE(name));
720
+ console.log(`\u2713 Wrote ${out}`);
721
+ console.log(`
722
+ Next steps:`);
723
+ console.log(` 1. Adjust the import path at the top of ${out}`);
724
+ console.log(` 2. samesake apply --project=${name} --config=${out}`);
725
+ console.log(` 3. samesake seed --project=${name} --file=seed.json`);
726
+ }
727
+ async function main() {
728
+ if (!cmd || cmd === "help" || cmd === "--help" || cmd === "-h") {
729
+ await cmdHelp();
730
+ return;
731
+ }
732
+ const rest = args.slice(1);
733
+ const flags = parseFlags(rest);
734
+ switch (cmd) {
735
+ case "healthz":
736
+ await cmdHealthz();
737
+ break;
738
+ case "doctor":
739
+ await cmdDoctor();
740
+ break;
741
+ case "init":
742
+ await cmdInit(flags);
743
+ break;
744
+ case "migrate":
745
+ await cmdMigrate(flags);
746
+ break;
747
+ case "apply":
748
+ await cmdApply(flags);
749
+ break;
750
+ case "seed":
751
+ await cmdSeed(flags);
752
+ break;
753
+ case "list-projects":
754
+ await cmdListProjects(flags);
755
+ break;
756
+ case "ingest":
757
+ await cmdIngest(flags);
758
+ break;
759
+ case "enrich":
760
+ await cmdEnrich(flags);
761
+ break;
762
+ case "index":
763
+ await cmdIndex(flags);
764
+ break;
765
+ case "search-explain":
766
+ await cmdSearchExplain(flags);
767
+ break;
768
+ case "rotate-key":
769
+ await cmdRotateKey(flags);
770
+ break;
771
+ case "review-list":
772
+ await cmdReviewList(flags);
773
+ break;
774
+ case "review-correct":
775
+ await cmdReviewCorrect(flags);
776
+ break;
777
+ case "match":
778
+ await cmdMatch(flags, rest);
779
+ break;
780
+ case "explain":
781
+ await cmdExplain(flags, rest);
782
+ break;
783
+ case "confirm":
784
+ await cmdConfirm(flags, rest);
785
+ break;
786
+ case "decline":
787
+ await cmdDecline(flags, rest);
788
+ break;
789
+ case "calibrate":
790
+ await cmdCalibrate(flags, rest);
791
+ break;
792
+ case "duplicates":
793
+ await cmdDuplicates(flags, rest);
794
+ break;
795
+ case "variants":
796
+ await cmdVariants(flags, rest);
797
+ break;
798
+ case "dev":
799
+ await cmdDev(flags);
800
+ break;
801
+ case "eval":
802
+ await cmdEval(flags);
803
+ break;
804
+ default:
805
+ fail(`unknown command: ${cmd}. try 'samesake help'`);
806
+ }
807
+ }
808
+ await main();
package/package.json ADDED
@@ -0,0 +1,31 @@
1
+ {
2
+ "name": "@samesake/cli",
3
+ "version": "1.0.0",
4
+ "repository": {
5
+ "type": "git",
6
+ "url": "https://github.com/asyncdotengineering/samesake"
7
+ },
8
+ "description": "Operations CLI for samesake. Apply schemas, seed data, run match queries, explain scoring, confirm/decline active-learning feedback, calibrate thresholds, list duplicates and variant clusters. Talks to a @samesake/server matcher over HTTP.",
9
+ "type": "module",
10
+ "license": "MIT",
11
+ "main": "./dist/index.js",
12
+ "module": "./dist/index.js",
13
+ "bin": {
14
+ "samesake": "./dist/index.js"
15
+ },
16
+ "files": ["dist", "README.md", "LICENSE"],
17
+ "publishConfig": {
18
+ "access": "public"
19
+ },
20
+ "scripts": {
21
+ "build": "tsup"
22
+ },
23
+ "keywords": ["entity-resolution", "cli", "samesake"],
24
+ "dependencies": {
25
+ "@samesake/core": "^1.0.0",
26
+ "@samesake/server": "^1.0.0"
27
+ },
28
+ "devDependencies": {
29
+ "tsup": "^8.5.1"
30
+ }
31
+ }