@samesake/cli 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +9 -0
- package/dist/index.js +808 -0
- package/package.json +31 -0
package/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 octalpixel
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
|
13
|
+
all copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
21
|
+
THE SOFTWARE.
|
package/README.md
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
# @samesake/cli
|
|
2
|
+
|
|
3
|
+
Operations CLI for samesake — apply schemas, seed data, match queries, calibrate thresholds, and manage aliases over HTTP.
|
|
4
|
+
|
|
5
|
+
```bash
|
|
6
|
+
bun add -g @samesake/cli
|
|
7
|
+
```
|
|
8
|
+
|
|
9
|
+
See the [samesake README](https://github.com/asyncdotengineering/samesake#readme) for setup, examples, and docs.
|
package/dist/index.js
ADDED
|
@@ -0,0 +1,808 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
|
|
3
|
+
// src/index.ts
|
|
4
|
+
import { createMatcher, prepareMigrations } from "@samesake/server";
|
|
5
|
+
import { readFileSync, existsSync, writeFileSync, watch } from "fs";
|
|
6
|
+
import { basename, dirname, join, resolve } from "path";
|
|
7
|
+
import { pathToFileURL } from "url";
|
|
8
|
+
var args = process.argv.slice(2);
|
|
9
|
+
var cmd = args[0];
|
|
10
|
+
var URL = process.env.SAMESAKE_URL ?? "http://localhost:3030";
|
|
11
|
+
var KEY = process.env.SAMESAKE_API_KEY ?? "dev-key-please-change";
|
|
12
|
+
var PROJECT = process.env.SAMESAKE_PROJECT;
|
|
13
|
+
function header() {
|
|
14
|
+
return { Authorization: `Bearer ${KEY}` };
|
|
15
|
+
}
|
|
16
|
+
function fail(msg) {
|
|
17
|
+
console.error(`error: ${msg}`);
|
|
18
|
+
process.exit(1);
|
|
19
|
+
}
|
|
20
|
+
function parseFlags(rest) {
|
|
21
|
+
const out = {};
|
|
22
|
+
for (let i = 0; i < rest.length; i++) {
|
|
23
|
+
const a = rest[i];
|
|
24
|
+
if (a.startsWith("--")) {
|
|
25
|
+
const eq = a.indexOf("=");
|
|
26
|
+
if (eq > 0) {
|
|
27
|
+
out[a.slice(2, eq)] = a.slice(eq + 1);
|
|
28
|
+
} else {
|
|
29
|
+
const next = rest[i + 1];
|
|
30
|
+
if (next && !next.startsWith("--")) {
|
|
31
|
+
out[a.slice(2)] = next;
|
|
32
|
+
i++;
|
|
33
|
+
} else {
|
|
34
|
+
out[a.slice(2)] = "true";
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
return out;
|
|
40
|
+
}
|
|
41
|
+
function parseScopeArgs(rest) {
|
|
42
|
+
const out = {};
|
|
43
|
+
for (let i = 0; i < rest.length; i++) {
|
|
44
|
+
const a = rest[i];
|
|
45
|
+
if (a === "--scope" || a.startsWith("--scope=")) {
|
|
46
|
+
const v = a.startsWith("--scope=") ? a.slice("--scope=".length) : rest[++i];
|
|
47
|
+
if (v && v.includes("=")) {
|
|
48
|
+
const [k, val] = v.split("=", 2);
|
|
49
|
+
if (k && val !== void 0) out[k] = val;
|
|
50
|
+
}
|
|
51
|
+
}
|
|
52
|
+
}
|
|
53
|
+
return out;
|
|
54
|
+
}
|
|
55
|
+
async function get(path) {
|
|
56
|
+
const r = await fetch(`${URL}${path}`, { headers: header() });
|
|
57
|
+
const body = await r.json();
|
|
58
|
+
if (!r.ok) fail(`GET ${path} failed: ${JSON.stringify(body)}`);
|
|
59
|
+
return body;
|
|
60
|
+
}
|
|
61
|
+
async function post(path, body) {
|
|
62
|
+
const r = await fetch(`${URL}${path}`, {
|
|
63
|
+
method: "POST",
|
|
64
|
+
headers: { ...header(), "Content-Type": "application/json" },
|
|
65
|
+
body: JSON.stringify(body)
|
|
66
|
+
});
|
|
67
|
+
const resp = await r.json();
|
|
68
|
+
if (!r.ok) fail(`POST ${path} failed: ${JSON.stringify(resp)}`);
|
|
69
|
+
return resp;
|
|
70
|
+
}
|
|
71
|
+
async function cmdHelp() {
|
|
72
|
+
console.log(`
|
|
73
|
+
samesake \u2014 commerce search and entity resolution CLI
|
|
74
|
+
|
|
75
|
+
USAGE
|
|
76
|
+
samesake <command> [options]
|
|
77
|
+
|
|
78
|
+
PROJECT LIFECYCLE
|
|
79
|
+
init --name=NAME [--out=PATH] Scaffold a new samesake.config.ts
|
|
80
|
+
apply --project=NAME --config=PATH Apply schema to a project
|
|
81
|
+
list-projects List every applied project
|
|
82
|
+
seed --project=NAME --file=PATH Load JSON test data
|
|
83
|
+
|
|
84
|
+
MATCHING & FEEDBACK
|
|
85
|
+
match --project=NAME --kind=K --text=T --scope k=v
|
|
86
|
+
[--limit=N] [--json] Run a single match
|
|
87
|
+
explain --project=NAME --kind=K --query-text=T --candidate-id=ID
|
|
88
|
+
--scope k=v [--phone=P] [--json] Per-channel scoring breakdown
|
|
89
|
+
confirm --project=NAME --kind=K --query-text=T --chosen=ID --scope k=v
|
|
90
|
+
Mark a candidate as correct (writes alias)
|
|
91
|
+
decline --project=NAME --kind=K --query-text=T --declined=ID --scope k=v
|
|
92
|
+
Mark a candidate as wrong (penalty)
|
|
93
|
+
|
|
94
|
+
ANALYSIS
|
|
95
|
+
calibrate --project=NAME --kind=K --scope k=v [--min-sample=N] [--json]
|
|
96
|
+
F1-optimise the auto-link threshold
|
|
97
|
+
duplicates --project=NAME [--kind=K] [--scope k=v]
|
|
98
|
+
[--score-floor=0.95] [--min-cluster=2] [--limit=100] [--json]
|
|
99
|
+
List dedup clusters
|
|
100
|
+
variants --project=NAME [--kind=K] [--scope k=v]
|
|
101
|
+
[--min-cluster=3] [--limit=50] [--json]
|
|
102
|
+
List variant suggestions (parse-shape only)
|
|
103
|
+
|
|
104
|
+
DEV & EVAL
|
|
105
|
+
dev --config=PATH --project=NAME [--port=8788]
|
|
106
|
+
Load config, migrate+apply, serve matcher on port, watch+re-apply on change
|
|
107
|
+
eval --golden=FILE --project=NAME --collection=COL [--base=URL]
|
|
108
|
+
Run golden queries against search (retrieval only \u2014 no LLM judge)
|
|
109
|
+
|
|
110
|
+
OPERATIONS
|
|
111
|
+
healthz Check matcher health
|
|
112
|
+
doctor Full env + service + projects health report
|
|
113
|
+
migrate --db=URL [--schema=public] Apply system DDL directly to Postgres (no matcher needed).
|
|
114
|
+
Run BEFORE booting the app \u2014 the prisma-migrate-deploy /
|
|
115
|
+
drizzle-kit-push pattern. Idempotent, safe in CI.
|
|
116
|
+
migrate --project=NAME --config=PATH --plan Show collection schema migration plan (dry-run).
|
|
117
|
+
migrate --project=NAME --config=PATH --apply Apply collection schema migrations.
|
|
118
|
+
[--allow-destructive] [--db=URL]
|
|
119
|
+
|
|
120
|
+
SEARCH PIPELINE
|
|
121
|
+
ingest --project=NAME --collection=COL Pull configured sources into collection
|
|
122
|
+
enrich --project=NAME --collection=COL Run enrichment pipeline on pending docs
|
|
123
|
+
[--concurrency=N] [--limit=N]
|
|
124
|
+
index --project=NAME --collection=COL Embed + populate filter columns
|
|
125
|
+
search-explain --project=NAME --collection=COL --q=QUERY [--json]
|
|
126
|
+
Per-channel search ranking breakdown
|
|
127
|
+
rotate-key --project=NAME Issue a new per-project API key (master only)
|
|
128
|
+
review-list --project=NAME --collection=COL [--limit=20] [--max-confidence=0.7]
|
|
129
|
+
List low-confidence enrichments for review
|
|
130
|
+
review-correct --project=NAME --collection=COL --id=DOC --field=value [...]
|
|
131
|
+
Apply human corrections (arrays comma-separated)
|
|
132
|
+
[--limit=N]
|
|
133
|
+
|
|
134
|
+
GLOBAL ENV
|
|
135
|
+
SAMESAKE_URL (default http://localhost:3030)
|
|
136
|
+
SAMESAKE_API_KEY (default dev-key-please-change)
|
|
137
|
+
SAMESAKE_PROJECT default --project for every command
|
|
138
|
+
SAMESAKE_DATABASE_URL used by 'migrate' if --db is omitted
|
|
139
|
+
SAMESAKE_SCHEMA used by 'migrate' if --schema is omitted (default "public")
|
|
140
|
+
|
|
141
|
+
EXAMPLES
|
|
142
|
+
# Deploy pipeline: migrate first, then start the app.
|
|
143
|
+
samesake migrate --db=$DATABASE_URL --schema=public
|
|
144
|
+
bun apps/matcher/src/index.ts &
|
|
145
|
+
|
|
146
|
+
# Author + use a project
|
|
147
|
+
samesake init --name=mystore --out=./samesake.config.ts
|
|
148
|
+
samesake apply --project=hello --config=examples/hello/samesake.config.ts
|
|
149
|
+
samesake seed --project=hello --file=examples/hello/seed.json
|
|
150
|
+
samesake match --project=hello --kind=customer --text="Smyth" --scope tenantId=acme
|
|
151
|
+
samesake explain --project=hello --kind=customer --query-text=Smyth --candidate-id=1 --scope tenantId=acme
|
|
152
|
+
samesake calibrate --project=hello --kind=customer --scope tenantId=acme
|
|
153
|
+
samesake doctor
|
|
154
|
+
`);
|
|
155
|
+
}
|
|
156
|
+
async function cmdHealthz() {
|
|
157
|
+
const r = await fetch(`${URL}/v1/healthz`);
|
|
158
|
+
const body = await r.json();
|
|
159
|
+
console.log(JSON.stringify(body, null, 2));
|
|
160
|
+
}
|
|
161
|
+
async function cmdApply(flags) {
|
|
162
|
+
const project = flags.project ?? PROJECT ?? fail("--project is required");
|
|
163
|
+
const configPath = flags.config ?? fail("--config is required");
|
|
164
|
+
const abs = resolve(configPath);
|
|
165
|
+
if (!existsSync(abs)) fail(`config not found: ${abs}`);
|
|
166
|
+
const mod = await import(pathToFileURL(abs).href);
|
|
167
|
+
const entities = [];
|
|
168
|
+
for (const v of Object.values(mod)) {
|
|
169
|
+
if (v && typeof v === "object" && "name" in v && "fields" in v && "scopes" in v) {
|
|
170
|
+
entities.push(v);
|
|
171
|
+
}
|
|
172
|
+
}
|
|
173
|
+
if (entities.length === 0) fail("no entities exported from config file");
|
|
174
|
+
console.log(`Applying ${entities.length} entit${entities.length === 1 ? "y" : "ies"} to project '${project}'...`);
|
|
175
|
+
const body = await post(`/v1/projects/${project}/schema/apply`, { entities });
|
|
176
|
+
console.log(`\u2713 Applied schema to ${body.schema}`);
|
|
177
|
+
console.log(` - ${body.appliedStatements} DDL statements`);
|
|
178
|
+
console.log(` - entities: ${body.entities.join(", ")}`);
|
|
179
|
+
}
|
|
180
|
+
async function cmdSeed(flags) {
|
|
181
|
+
const project = flags.project ?? PROJECT ?? fail("--project is required");
|
|
182
|
+
const file = flags.file ?? fail("--file is required");
|
|
183
|
+
const abs = resolve(file);
|
|
184
|
+
if (!existsSync(abs)) fail(`file not found: ${abs}`);
|
|
185
|
+
const data = JSON.parse(readFileSync(abs, "utf8"));
|
|
186
|
+
console.log(`Seeding ${data.items.length} ${data.entityType} into '${project}'...`);
|
|
187
|
+
const start = Date.now();
|
|
188
|
+
const body = await post(
|
|
189
|
+
`/v1/projects/${project}/entities/${data.entityType}/upsert-batch`,
|
|
190
|
+
{ items: data.items }
|
|
191
|
+
);
|
|
192
|
+
const dur = ((Date.now() - start) / 1e3).toFixed(2);
|
|
193
|
+
console.log(`\u2713 ${body.ids.length} rows seeded in ${dur}s`);
|
|
194
|
+
}
|
|
195
|
+
async function cmdMatch(flags, rest) {
|
|
196
|
+
const project = flags.project ?? PROJECT ?? fail("--project is required");
|
|
197
|
+
const kind = flags.kind ?? fail("--kind is required");
|
|
198
|
+
const text = flags.text ?? fail("--text is required");
|
|
199
|
+
const scope = parseScopeArgs(rest);
|
|
200
|
+
const limit = flags.limit ? Number(flags.limit) : 5;
|
|
201
|
+
const m = await post(`/v1/projects/${project}/match`, {
|
|
202
|
+
kind,
|
|
203
|
+
text,
|
|
204
|
+
scope,
|
|
205
|
+
opts: { limit }
|
|
206
|
+
});
|
|
207
|
+
if (flags.json === "true") {
|
|
208
|
+
console.log(JSON.stringify(m, null, 2));
|
|
209
|
+
return;
|
|
210
|
+
}
|
|
211
|
+
console.log(`Top ${m.candidates.length} candidates for "${text}" (scope: ${JSON.stringify(scope)})`);
|
|
212
|
+
console.log("");
|
|
213
|
+
for (let i = 0; i < m.candidates.length; i++) {
|
|
214
|
+
const c = m.candidates[i];
|
|
215
|
+
const name = c.name.length > 36 ? c.name.slice(0, 33) + "..." : c.name;
|
|
216
|
+
console.log(
|
|
217
|
+
` ${i + 1}. [id=${c.entityId.padStart(3)}] ${name.padEnd(36)} combined: ${c.combined.toFixed(3)} cos: ${c.components.cosSim?.toFixed(2) ?? " --"} trgm: ${c.components.trgmSim.toFixed(2)} phon: ${c.components.phonEq ? "\u2713" : "\xB7"} alias: ${c.components.aliasHit ? "\u2713" : "\xB7"}`
|
|
218
|
+
);
|
|
219
|
+
}
|
|
220
|
+
if (m.resolved) {
|
|
221
|
+
console.log("");
|
|
222
|
+
console.log(`Resolved: ${m.resolved.entityId} (auto-link, confidence ${m.resolved.confidence.toFixed(3)})`);
|
|
223
|
+
}
|
|
224
|
+
}
|
|
225
|
+
async function cmdExplain(flags, rest) {
|
|
226
|
+
const project = flags.project ?? PROJECT ?? fail("--project is required");
|
|
227
|
+
const kind = flags.kind ?? fail("--kind is required");
|
|
228
|
+
const queryText = flags["query-text"] ?? flags.text ?? fail("--query-text is required");
|
|
229
|
+
const candidateId = flags["candidate-id"] ?? flags.candidate ?? fail("--candidate-id is required");
|
|
230
|
+
const scope = parseScopeArgs(rest);
|
|
231
|
+
const phone = flags.phone;
|
|
232
|
+
const r = await post(`/v1/projects/${project}/explain`, {
|
|
233
|
+
kind,
|
|
234
|
+
queryText,
|
|
235
|
+
candidateId,
|
|
236
|
+
scope,
|
|
237
|
+
phone
|
|
238
|
+
});
|
|
239
|
+
if (flags.json === "true") {
|
|
240
|
+
console.log(JSON.stringify(r, null, 2));
|
|
241
|
+
return;
|
|
242
|
+
}
|
|
243
|
+
console.log(`Query: "${r.query.text}" \u2192 normalised "${r.query.normalised}"`);
|
|
244
|
+
console.log(`Candidate: ${r.candidate.name} (id=${r.candidate.entityId})`);
|
|
245
|
+
console.log("");
|
|
246
|
+
console.log(`Channel Value Weight Contribution`);
|
|
247
|
+
console.log(`\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500 \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500 \u2500\u2500\u2500\u2500\u2500\u2500 \u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500\u2500`);
|
|
248
|
+
const fmt = (v) => {
|
|
249
|
+
if (v === null) return "(null)";
|
|
250
|
+
if (typeof v === "boolean") return v ? "true" : "false";
|
|
251
|
+
return v.toFixed(3);
|
|
252
|
+
};
|
|
253
|
+
const row = (label, s) => {
|
|
254
|
+
console.log(`${label.padEnd(14)} ${fmt(s.value).padEnd(14)} ${s.weight.toFixed(2).padEnd(8)} ${s.contribution.toFixed(3)}`);
|
|
255
|
+
};
|
|
256
|
+
row("cosine", r.scores.cosSim);
|
|
257
|
+
row("trigram", r.scores.trgmSim);
|
|
258
|
+
row("phonetic-eq", r.scores.phonEq);
|
|
259
|
+
row("phone-exact", r.scores.phoneEq);
|
|
260
|
+
row("alias-hit", r.scores.aliasHit);
|
|
261
|
+
console.log("");
|
|
262
|
+
console.log(`Combined: ${r.combined.toFixed(3)}`);
|
|
263
|
+
console.log(`Decision: ${r.decision} (auto-link \u2265 ${r.thresholds.autoLink}, suggest \u2265 ${r.thresholds.suggest})`);
|
|
264
|
+
if (r.decisiveChannels.length > 0) {
|
|
265
|
+
console.log(`Decisive channels: ${r.decisiveChannels.join(", ")}`);
|
|
266
|
+
}
|
|
267
|
+
}
|
|
268
|
+
async function cmdConfirm(flags, rest) {
|
|
269
|
+
const project = flags.project ?? PROJECT ?? fail("--project is required");
|
|
270
|
+
const kind = flags.kind ?? fail("--kind is required");
|
|
271
|
+
const queryText = flags["query-text"] ?? flags.text ?? fail("--query-text is required");
|
|
272
|
+
const chosen = flags.chosen ?? null;
|
|
273
|
+
const scope = parseScopeArgs(rest);
|
|
274
|
+
const body = await post(`/v1/projects/${project}/confirm`, {
|
|
275
|
+
kind,
|
|
276
|
+
queryText,
|
|
277
|
+
scope,
|
|
278
|
+
chosenEntityId: chosen
|
|
279
|
+
});
|
|
280
|
+
console.log(`\u2713 ${JSON.stringify(body)}`);
|
|
281
|
+
}
|
|
282
|
+
async function cmdDecline(flags, rest) {
|
|
283
|
+
const project = flags.project ?? PROJECT ?? fail("--project is required");
|
|
284
|
+
const kind = flags.kind ?? fail("--kind is required");
|
|
285
|
+
const queryText = flags["query-text"] ?? flags.text ?? fail("--query-text is required");
|
|
286
|
+
const declined = flags.declined ?? flags.declinedId ?? fail("--declined=ID is required");
|
|
287
|
+
const scope = parseScopeArgs(rest);
|
|
288
|
+
const body = await post(`/v1/projects/${project}/decline`, {
|
|
289
|
+
kind,
|
|
290
|
+
queryText,
|
|
291
|
+
scope,
|
|
292
|
+
declinedEntityId: declined
|
|
293
|
+
});
|
|
294
|
+
console.log(`\u2713 ${JSON.stringify(body)}`);
|
|
295
|
+
}
|
|
296
|
+
async function cmdCalibrate(flags, rest) {
|
|
297
|
+
const project = flags.project ?? PROJECT ?? fail("--project is required");
|
|
298
|
+
const kind = flags.kind ?? fail("--kind is required");
|
|
299
|
+
const scope = parseScopeArgs(rest);
|
|
300
|
+
const minSampleSize = flags["min-sample"] ? Number(flags["min-sample"]) : void 0;
|
|
301
|
+
const r = await post(`/v1/projects/${project}/calibrate`, {
|
|
302
|
+
kind,
|
|
303
|
+
scope,
|
|
304
|
+
minSampleSize
|
|
305
|
+
});
|
|
306
|
+
if (flags.json === "true") {
|
|
307
|
+
console.log(JSON.stringify(r, null, 2));
|
|
308
|
+
return;
|
|
309
|
+
}
|
|
310
|
+
console.log(`Calibrated auto-link threshold for ${kind} @ ${JSON.stringify(scope)}`);
|
|
311
|
+
console.log("");
|
|
312
|
+
console.log(` threshold: ${r.threshold.toFixed(3)}`);
|
|
313
|
+
console.log(` F1: ${r.f1.toFixed(3)}`);
|
|
314
|
+
console.log(` precision: ${r.precision.toFixed(3)}`);
|
|
315
|
+
console.log(` recall: ${r.recall.toFixed(3)}`);
|
|
316
|
+
console.log(` sample size: ${r.sampleSize} (${r.positives} positives, ${r.negatives} negatives)`);
|
|
317
|
+
}
|
|
318
|
+
async function cmdDuplicates(flags, rest) {
|
|
319
|
+
const project = flags.project ?? PROJECT ?? fail("--project is required");
|
|
320
|
+
const kind = flags.kind ?? "customer";
|
|
321
|
+
const scope = parseScopeArgs(rest);
|
|
322
|
+
const params = new URLSearchParams();
|
|
323
|
+
params.set("kind", kind);
|
|
324
|
+
if (Object.keys(scope).length > 0) params.set("scope", JSON.stringify(scope));
|
|
325
|
+
if (flags["score-floor"]) params.set("scoreFloor", flags["score-floor"]);
|
|
326
|
+
if (flags["min-cluster"]) params.set("minClusterSize", flags["min-cluster"]);
|
|
327
|
+
if (flags.limit) params.set("limit", flags.limit);
|
|
328
|
+
const r = await get(`/v1/projects/${project}/duplicates?${params.toString()}`);
|
|
329
|
+
if (flags.json === "true") {
|
|
330
|
+
console.log(JSON.stringify(r, null, 2));
|
|
331
|
+
return;
|
|
332
|
+
}
|
|
333
|
+
if (r.clusters.length === 0) {
|
|
334
|
+
console.log("No duplicate clusters above floor.");
|
|
335
|
+
return;
|
|
336
|
+
}
|
|
337
|
+
console.log(`${r.clusters.length} cluster${r.clusters.length === 1 ? "" : "s"} for ${kind}:`);
|
|
338
|
+
for (const c of r.clusters) {
|
|
339
|
+
console.log(`
|
|
340
|
+
cluster (n=${c.totalCount}, min-score=${c.estimatedConfidence.toFixed(3)})`);
|
|
341
|
+
for (const m of c.members) {
|
|
342
|
+
console.log(` [id=${m.entityId.padStart(3)}] ${m.name}`);
|
|
343
|
+
}
|
|
344
|
+
}
|
|
345
|
+
}
|
|
346
|
+
async function cmdVariants(flags, rest) {
|
|
347
|
+
const project = flags.project ?? PROJECT ?? fail("--project is required");
|
|
348
|
+
const kind = flags.kind ?? "asset";
|
|
349
|
+
const scope = parseScopeArgs(rest);
|
|
350
|
+
const params = new URLSearchParams();
|
|
351
|
+
params.set("kind", kind);
|
|
352
|
+
if (Object.keys(scope).length > 0) params.set("scope", JSON.stringify(scope));
|
|
353
|
+
if (flags["min-cluster"]) params.set("minClusterSize", flags["min-cluster"]);
|
|
354
|
+
if (flags.limit) params.set("limit", flags.limit);
|
|
355
|
+
const r = await get(`/v1/projects/${project}/variant-suggestions?${params.toString()}`);
|
|
356
|
+
if (flags.json === "true") {
|
|
357
|
+
console.log(JSON.stringify(r, null, 2));
|
|
358
|
+
return;
|
|
359
|
+
}
|
|
360
|
+
if (r.suggestions.length === 0) {
|
|
361
|
+
console.log("No variant suggestions. (Only parse-shape entities produce these.)");
|
|
362
|
+
return;
|
|
363
|
+
}
|
|
364
|
+
console.log(`${r.suggestions.length} variant suggestion${r.suggestions.length === 1 ? "" : "s"}:`);
|
|
365
|
+
for (const s of r.suggestions) {
|
|
366
|
+
console.log(`
|
|
367
|
+
${s.proposedBase.suggestedName} (${s.totalCount} members)`);
|
|
368
|
+
const axes = s.detectedAxes.map((a) => `${a.axis}:[${a.distinctValues.join(",")}]`).join(" ");
|
|
369
|
+
if (axes) console.log(` axes: ${axes}`);
|
|
370
|
+
for (const m of s.members) {
|
|
371
|
+
const sz = m.size.value !== null ? `${m.size.value}${m.size.unit ?? ""}` : "";
|
|
372
|
+
console.log(` [id=${m.entityId.padStart(3)}] ${m.name.padEnd(40)} variant=${m.variant ?? "\xB7"} size=${sz || "\xB7"}`);
|
|
373
|
+
}
|
|
374
|
+
}
|
|
375
|
+
}
|
|
376
|
+
async function cmdListProjects(flags) {
|
|
377
|
+
const r = await get(`/v1/projects`);
|
|
378
|
+
if (flags.json === "true") {
|
|
379
|
+
console.log(JSON.stringify(r, null, 2));
|
|
380
|
+
return;
|
|
381
|
+
}
|
|
382
|
+
if (r.projects.length === 0) {
|
|
383
|
+
console.log("No projects applied yet. Use `samesake apply` to create one.");
|
|
384
|
+
return;
|
|
385
|
+
}
|
|
386
|
+
console.log(`${r.projects.length} project${r.projects.length === 1 ? "" : "s"} applied:`);
|
|
387
|
+
for (const p of r.projects) {
|
|
388
|
+
const date = p.updatedAt.slice(0, 10);
|
|
389
|
+
const ents = p.entities.length > 0 ? p.entities.join(", ") : "(no entities)";
|
|
390
|
+
console.log(` ${p.slug.padEnd(24)} ${date} ${p.entities.length} entit${p.entities.length === 1 ? "y" : "ies"}: ${ents}`);
|
|
391
|
+
}
|
|
392
|
+
}
|
|
393
|
+
async function cmdSearchExplain(flags) {
|
|
394
|
+
const project = flags.project ?? PROJECT ?? fail("--project is required");
|
|
395
|
+
const collection = flags.collection ?? fail("--collection is required");
|
|
396
|
+
const q = flags.q ?? fail("--q is required");
|
|
397
|
+
const body = await post(
|
|
398
|
+
`/v1/projects/${project}/collections/${collection}/search/explain`,
|
|
399
|
+
{ q, limit: flags.limit ? Number(flags.limit) : void 0 }
|
|
400
|
+
);
|
|
401
|
+
if (flags.json === "true") {
|
|
402
|
+
console.log(JSON.stringify(body, null, 2));
|
|
403
|
+
return;
|
|
404
|
+
}
|
|
405
|
+
console.log(`explain: ${q}`);
|
|
406
|
+
const docs = body.docs ?? [];
|
|
407
|
+
for (const d of docs.slice(0, 10)) {
|
|
408
|
+
console.log(
|
|
409
|
+
` id=${d.id} rrf=${Number(d.rrf_score).toFixed(4)} fts=${d.fts_rank ?? "\xB7"} cos=${d.cosine_rank ?? "\xB7"} spc=${d.spaces_rank ?? "\xB7"}`
|
|
410
|
+
);
|
|
411
|
+
}
|
|
412
|
+
}
|
|
413
|
+
async function cmdRotateKey(flags) {
|
|
414
|
+
const project = flags.project ?? PROJECT ?? fail("--project is required");
|
|
415
|
+
const body = await post(`/v1/projects/${project}/rotate-key`, {});
|
|
416
|
+
console.log(body.apiKey);
|
|
417
|
+
}
|
|
418
|
+
async function cmdIngest(flags) {
|
|
419
|
+
const project = flags.project ?? PROJECT ?? fail("--project is required");
|
|
420
|
+
const collection = flags.collection ?? fail("--collection is required");
|
|
421
|
+
const body = await post(
|
|
422
|
+
`/v1/projects/${project}/collections/${collection}/ingest`,
|
|
423
|
+
{}
|
|
424
|
+
);
|
|
425
|
+
console.log(`\u2713 ingested ${body.upserted} documents${body.connectors ? ` from ${body.connectors.join(", ")}` : ""}`);
|
|
426
|
+
}
|
|
427
|
+
async function cmdEnrich(flags) {
|
|
428
|
+
const project = flags.project ?? PROJECT ?? fail("--project is required");
|
|
429
|
+
const collection = flags.collection ?? fail("--collection is required");
|
|
430
|
+
const body = await post(
|
|
431
|
+
`/v1/projects/${project}/collections/${collection}/enrich`,
|
|
432
|
+
{
|
|
433
|
+
concurrency: flags.concurrency ? Number(flags.concurrency) : void 0,
|
|
434
|
+
limit: flags.limit ? Number(flags.limit) : void 0
|
|
435
|
+
}
|
|
436
|
+
);
|
|
437
|
+
console.log(`\u2713 enriched ${body.enriched} (skipped ${body.skipped}, failed ${body.failed})`);
|
|
438
|
+
}
|
|
439
|
+
async function cmdReviewList(flags) {
|
|
440
|
+
const project = flags.project ?? PROJECT ?? fail("--project is required");
|
|
441
|
+
const collection = flags.collection ?? fail("--collection is required");
|
|
442
|
+
const qs = new URLSearchParams();
|
|
443
|
+
if (flags.limit) qs.set("limit", flags.limit);
|
|
444
|
+
if (flags["max-confidence"]) qs.set("max_confidence", flags["max-confidence"]);
|
|
445
|
+
const rows = await get(`/v1/projects/${project}/collections/${collection}/review?${qs}`);
|
|
446
|
+
if (!rows.length) {
|
|
447
|
+
console.log("no low-confidence enrichments \u2014 nothing to review");
|
|
448
|
+
return;
|
|
449
|
+
}
|
|
450
|
+
for (const r of rows) {
|
|
451
|
+
const conf = r.confidence == null ? " n/a" : r.confidence.toFixed(2);
|
|
452
|
+
const unc = r.uncertain_fields.length ? ` uncertain: ${r.uncertain_fields.join(",")}` : "";
|
|
453
|
+
console.log(`${r.id.padEnd(10)} conf=${conf} ${String(r.category).padEnd(12)} ${(r.title ?? "").slice(0, 50)}${unc}${r.corrected ? " [corrected]" : ""}`);
|
|
454
|
+
}
|
|
455
|
+
}
|
|
456
|
+
async function cmdReviewCorrect(flags) {
|
|
457
|
+
const project = flags.project ?? PROJECT ?? fail("--project is required");
|
|
458
|
+
const collection = flags.collection ?? fail("--collection is required");
|
|
459
|
+
const id = flags.id ?? fail("--id is required");
|
|
460
|
+
const fields = {};
|
|
461
|
+
for (const [k, v] of Object.entries(flags)) {
|
|
462
|
+
if (["project", "collection", "id"].includes(k)) continue;
|
|
463
|
+
fields[k] = v.includes(",") ? v.split(",").map((x) => x.trim()) : v;
|
|
464
|
+
}
|
|
465
|
+
if (!Object.keys(fields).length) fail("supply corrections as --field=value (arrays comma-separated)");
|
|
466
|
+
const body = await post(
|
|
467
|
+
`/v1/projects/${project}/collections/${collection}/review/${id}`,
|
|
468
|
+
{ fields }
|
|
469
|
+
);
|
|
470
|
+
console.log(`\u2713 corrected ${body.corrected.join(", ")} on ${id} (doc re-indexes on next \`index\` run)`);
|
|
471
|
+
}
|
|
472
|
+
async function cmdIndex(flags) {
|
|
473
|
+
const project = flags.project ?? PROJECT ?? fail("--project is required");
|
|
474
|
+
const collection = flags.collection ?? fail("--collection is required");
|
|
475
|
+
const body = await post(
|
|
476
|
+
`/v1/projects/${project}/collections/${collection}/index`,
|
|
477
|
+
{ limit: flags.limit ? Number(flags.limit) : void 0 }
|
|
478
|
+
);
|
|
479
|
+
console.log(`\u2713 indexed ${body.indexed} documents`);
|
|
480
|
+
}
|
|
481
|
+
function stubDevEmbed(text, dim) {
|
|
482
|
+
const t = text ?? "";
|
|
483
|
+
const out = new Array(dim).fill(0);
|
|
484
|
+
for (let i = 0; i < t.length; i++) {
|
|
485
|
+
out[i % dim] = (out[i % dim] + t.charCodeAt(i) * 1e-3) % 1;
|
|
486
|
+
}
|
|
487
|
+
const norm = Math.sqrt(out.reduce((s, x) => s + x * x, 0)) || 1;
|
|
488
|
+
return out.map((x) => x / norm);
|
|
489
|
+
}
|
|
490
|
+
async function resolveDevEmbed(configPath) {
|
|
491
|
+
const abs = resolve(configPath);
|
|
492
|
+
const mod = await import(pathToFileURL(abs).href);
|
|
493
|
+
const direct = mod.embed ?? mod.embedFn;
|
|
494
|
+
if (typeof direct === "function") return direct;
|
|
495
|
+
const stubPath = join(dirname(abs), "stub-embed.ts");
|
|
496
|
+
if (existsSync(stubPath)) {
|
|
497
|
+
const stubMod = await import(pathToFileURL(stubPath).href);
|
|
498
|
+
if (typeof stubMod.stubEmbed === "function") {
|
|
499
|
+
return async ({ text, dim }) => stubMod.stubEmbed(text ?? "", dim);
|
|
500
|
+
}
|
|
501
|
+
}
|
|
502
|
+
return async ({ text, dim }) => stubDevEmbed(text, dim);
|
|
503
|
+
}
|
|
504
|
+
async function applyDevConfig(matcher, project, config, label) {
|
|
505
|
+
const dry = await matcher.apply(project, config, { dryRun: true });
|
|
506
|
+
console.log(`[dev] migration plan (${label}):`);
|
|
507
|
+
console.log(JSON.stringify(dry.plan, null, 2));
|
|
508
|
+
const applied = await matcher.apply(project, config, { dryRun: false });
|
|
509
|
+
console.log(`[dev] applied ${applied.appliedStatements} statements \u2192 ${applied.schema}`);
|
|
510
|
+
}
|
|
511
|
+
async function cmdDev(flags) {
|
|
512
|
+
const configPath = flags.config ?? fail("--config is required");
|
|
513
|
+
const project = flags.project ?? PROJECT ?? fail("--project is required");
|
|
514
|
+
const port = flags.port ? Number(flags.port) : 8788;
|
|
515
|
+
const databaseUrl = flags.db ?? process.env.DATABASE_URL ?? process.env.SAMESAKE_DATABASE_URL;
|
|
516
|
+
if (!databaseUrl) fail("DATABASE_URL required (or --db= / SAMESAKE_DATABASE_URL)");
|
|
517
|
+
const configAbs = resolve(configPath);
|
|
518
|
+
const embed = await resolveDevEmbed(configPath);
|
|
519
|
+
const matcher = createMatcher({
|
|
520
|
+
databaseUrl,
|
|
521
|
+
apiKey: flags["api-key"] ?? KEY,
|
|
522
|
+
embed,
|
|
523
|
+
migrate: "eager"
|
|
524
|
+
});
|
|
525
|
+
await matcher.migrate();
|
|
526
|
+
const config = await loadProjectConfig(configPath);
|
|
527
|
+
await applyDevConfig(matcher, project, config, "boot");
|
|
528
|
+
const server = Bun.serve({
|
|
529
|
+
port,
|
|
530
|
+
fetch: matcher.fetch
|
|
531
|
+
});
|
|
532
|
+
console.log(`[dev] listening on http://localhost:${port} (project=${project})`);
|
|
533
|
+
console.log(`[dev] watching ${configAbs}`);
|
|
534
|
+
const configBase = basename(configAbs);
|
|
535
|
+
let debounce = null;
|
|
536
|
+
const scheduleReapply = () => {
|
|
537
|
+
if (debounce) clearTimeout(debounce);
|
|
538
|
+
debounce = setTimeout(async () => {
|
|
539
|
+
try {
|
|
540
|
+
console.log("[dev] config changed \u2014 re-applying...");
|
|
541
|
+
const next = await loadProjectConfig(configPath);
|
|
542
|
+
await applyDevConfig(matcher, project, next, "watch");
|
|
543
|
+
} catch (e) {
|
|
544
|
+
console.error(`[dev] re-apply failed: ${e instanceof Error ? e.message : e}`);
|
|
545
|
+
}
|
|
546
|
+
}, 300);
|
|
547
|
+
};
|
|
548
|
+
if (typeof Bun !== "undefined" && "watch" in Bun && typeof Bun.watch === "function") {
|
|
549
|
+
Bun.watch(configAbs, { persistent: true }, () => scheduleReapply());
|
|
550
|
+
} else {
|
|
551
|
+
watch(dirname(configAbs), (_event, filename) => {
|
|
552
|
+
if (filename === configBase) scheduleReapply();
|
|
553
|
+
});
|
|
554
|
+
}
|
|
555
|
+
process.on("SIGINT", async () => {
|
|
556
|
+
server.stop();
|
|
557
|
+
await matcher.close();
|
|
558
|
+
process.exit(0);
|
|
559
|
+
});
|
|
560
|
+
await new Promise(() => {
|
|
561
|
+
});
|
|
562
|
+
}
|
|
563
|
+
async function cmdEval(flags) {
|
|
564
|
+
const goldenPath = flags.golden ?? fail("--golden is required");
|
|
565
|
+
const base = (flags.base ?? URL).replace(/\/$/, "");
|
|
566
|
+
const project = flags.project ?? PROJECT ?? fail("--project is required");
|
|
567
|
+
const collection = flags.collection ?? fail("--collection is required");
|
|
568
|
+
const apiKey = flags["api-key"] ?? KEY;
|
|
569
|
+
const abs = resolve(goldenPath);
|
|
570
|
+
if (!existsSync(abs)) fail(`golden file not found: ${abs}`);
|
|
571
|
+
const golden = JSON.parse(readFileSync(abs, "utf8"));
|
|
572
|
+
if (!golden.queries?.length) fail("golden file has no queries");
|
|
573
|
+
console.log(`eval: ${golden.queries.length} queries \u2192 ${base}/v1/projects/${project}/collections/${collection}/search`);
|
|
574
|
+
console.log("");
|
|
575
|
+
console.log(`${"id".padEnd(12)} ${"ms".padStart(6)} ${"hits".padStart(5)} top`);
|
|
576
|
+
console.log(`${"\u2500".repeat(12)} ${"\u2500".repeat(6)} ${"\u2500".repeat(5)} ${"\u2500".repeat(24)}`);
|
|
577
|
+
for (const gq of golden.queries) {
|
|
578
|
+
const url = `${base}/v1/projects/${project}/collections/${collection}/search?q=${encodeURIComponent(gq.query)}&limit=10`;
|
|
579
|
+
const start = Date.now();
|
|
580
|
+
const r = await fetch(url, { headers: { Authorization: `Bearer ${apiKey}` } });
|
|
581
|
+
const ms = Date.now() - start;
|
|
582
|
+
if (!r.ok) {
|
|
583
|
+
const body2 = await r.text();
|
|
584
|
+
fail(`query ${gq.id} failed (${r.status}): ${body2.slice(0, 200)}`);
|
|
585
|
+
}
|
|
586
|
+
const body = await r.json();
|
|
587
|
+
const hits = body.hits ?? [];
|
|
588
|
+
const top = hits[0]?.title ?? hits[0]?.id ?? "\u2014";
|
|
589
|
+
console.log(`${gq.id.padEnd(12)} ${String(ms).padStart(6)} ${String(hits.length).padStart(5)} ${String(top).slice(0, 40)}`);
|
|
590
|
+
}
|
|
591
|
+
console.log("");
|
|
592
|
+
console.log("Retrieval only \u2014 no LLM judge. Graded evals (ESCI, mean@10) belong in your consumer harness.");
|
|
593
|
+
console.log("Reference: docs/context/spike/eval-search.js");
|
|
594
|
+
}
|
|
595
|
+
async function loadProjectConfig(configPath) {
|
|
596
|
+
const abs = resolve(configPath);
|
|
597
|
+
if (!existsSync(abs)) fail(`config not found: ${abs}`);
|
|
598
|
+
const mod = await import(pathToFileURL(abs).href);
|
|
599
|
+
const entities = [];
|
|
600
|
+
const collections = [];
|
|
601
|
+
for (const v of Object.values(mod)) {
|
|
602
|
+
if (!v || typeof v !== "object" || !("name" in v) || !("fields" in v)) continue;
|
|
603
|
+
if ("scopes" in v) {
|
|
604
|
+
entities.push(v);
|
|
605
|
+
} else if ("search" in v || "embeddings" in v || "enrich" in v) {
|
|
606
|
+
collections.push(v);
|
|
607
|
+
}
|
|
608
|
+
}
|
|
609
|
+
return { entities, collections };
|
|
610
|
+
}
|
|
611
|
+
async function cmdMigrate(flags) {
|
|
612
|
+
const project = flags.project ?? PROJECT;
|
|
613
|
+
const configPath = flags.config;
|
|
614
|
+
const isProjectMigrate = !!(project && configPath && (flags.plan === "true" || flags.apply === "true"));
|
|
615
|
+
if (isProjectMigrate) {
|
|
616
|
+
const databaseUrl2 = flags.db ?? process.env.DATABASE_URL ?? process.env.SAMESAKE_DATABASE_URL;
|
|
617
|
+
if (!databaseUrl2) {
|
|
618
|
+
fail("--db=postgres://... required (or set DATABASE_URL / SAMESAKE_DATABASE_URL)");
|
|
619
|
+
}
|
|
620
|
+
const dryRun = flags.apply !== "true";
|
|
621
|
+
const config = await loadProjectConfig(configPath);
|
|
622
|
+
const matcher = createMatcher({
|
|
623
|
+
databaseUrl: databaseUrl2,
|
|
624
|
+
apiKey: flags["api-key"] ?? KEY,
|
|
625
|
+
migrate: "eager",
|
|
626
|
+
embed: async () => [0]
|
|
627
|
+
});
|
|
628
|
+
await matcher.migrate();
|
|
629
|
+
const r = await matcher.apply(project, config, {
|
|
630
|
+
dryRun,
|
|
631
|
+
allowDestructive: flags["allow-destructive"] === "true"
|
|
632
|
+
});
|
|
633
|
+
await matcher.close();
|
|
634
|
+
console.log(JSON.stringify({ schema: r.schema, dryRun: r.dryRun ?? dryRun, plan: r.plan, appliedStatements: r.appliedStatements }, null, 2));
|
|
635
|
+
if (!dryRun) {
|
|
636
|
+
console.log(`\u2713 applied ${r.appliedStatements} statements to ${r.schema}`);
|
|
637
|
+
}
|
|
638
|
+
return;
|
|
639
|
+
}
|
|
640
|
+
const databaseUrl = flags.db ?? process.env.SAMESAKE_DATABASE_URL;
|
|
641
|
+
if (!databaseUrl) {
|
|
642
|
+
fail("--db=postgres://... required (or set SAMESAKE_DATABASE_URL)");
|
|
643
|
+
}
|
|
644
|
+
const schema = flags.schema ?? process.env.SAMESAKE_SCHEMA ?? "public";
|
|
645
|
+
console.log(`Applying samesake system DDL to schema '${schema}'...`);
|
|
646
|
+
const start = Date.now();
|
|
647
|
+
await prepareMigrations({ databaseUrl, schema });
|
|
648
|
+
console.log(`\u2713 migrations applied in ${Date.now() - start}ms`);
|
|
649
|
+
}
|
|
650
|
+
async function cmdDoctor() {
|
|
651
|
+
console.log("samesake doctor\n");
|
|
652
|
+
console.log("Environment:");
|
|
653
|
+
console.log(` SAMESAKE_URL ${URL}`);
|
|
654
|
+
console.log(` SAMESAKE_API_KEY ${process.env.SAMESAKE_API_KEY ? "set" : "MISSING (using default \u2014 set it for production)"}`);
|
|
655
|
+
console.log(` SAMESAKE_PROJECT ${PROJECT ?? "(unset)"}
|
|
656
|
+
`);
|
|
657
|
+
console.log("Matcher health:");
|
|
658
|
+
try {
|
|
659
|
+
const h = await get("/v1/healthz");
|
|
660
|
+
console.log(` Status: ${h.status}`);
|
|
661
|
+
console.log(` Postgres: ${(h.postgres ?? "?").split(",")[0]}`);
|
|
662
|
+
console.log(` Extensions: ${h.extensions.join(", ") || "(none)"}`);
|
|
663
|
+
console.log(` Uptime: ${h.uptime_seconds}s
|
|
664
|
+
`);
|
|
665
|
+
} catch (e) {
|
|
666
|
+
console.log(` \u2717 Could not reach matcher at ${URL}`);
|
|
667
|
+
console.log(` ${e instanceof Error ? e.message : e}
|
|
668
|
+
`);
|
|
669
|
+
return;
|
|
670
|
+
}
|
|
671
|
+
try {
|
|
672
|
+
const r = await get("/v1/projects");
|
|
673
|
+
console.log(`Projects applied: ${r.projects.length}`);
|
|
674
|
+
for (const p of r.projects) {
|
|
675
|
+
console.log(` - ${p.slug.padEnd(24)} (${p.entities.length} entit${p.entities.length === 1 ? "y" : "ies"})`);
|
|
676
|
+
}
|
|
677
|
+
} catch (e) {
|
|
678
|
+
console.log(` \u2717 Could not list projects: ${e instanceof Error ? e.message : e}`);
|
|
679
|
+
}
|
|
680
|
+
}
|
|
681
|
+
var INIT_TEMPLATE = (name) => `// samesake.config.ts \u2014 entities for project '${name}'.
|
|
682
|
+
//
|
|
683
|
+
// Apply via:
|
|
684
|
+
// bunx samesake apply --project=${name} --config=./samesake.config.ts
|
|
685
|
+
import { entity, fields, Scorers, providers } from "@samesake/core";
|
|
686
|
+
|
|
687
|
+
export const customer = entity("customer", {
|
|
688
|
+
fields: {
|
|
689
|
+
name: fields.text({ required: true }),
|
|
690
|
+
phone: fields.text({ optional: true }),
|
|
691
|
+
},
|
|
692
|
+
scopes: ["tenantId"],
|
|
693
|
+
embeddings: {
|
|
694
|
+
name_emb: { source: "name", model: providers.gemini.embed001({ dim: 768 }) },
|
|
695
|
+
},
|
|
696
|
+
phonetic: {
|
|
697
|
+
name_phon: { source: "name", algorithm: "indic-soundex" },
|
|
698
|
+
},
|
|
699
|
+
scoring: {
|
|
700
|
+
channels: [
|
|
701
|
+
Scorers.phoneExact({ field: "phone", weight: 1.0 }),
|
|
702
|
+
Scorers.cosine({ embedding: "name_emb", weight: 0.6 }),
|
|
703
|
+
Scorers.trigram({ field: "name", weight: 0.25, latinOnlyPartial: true }),
|
|
704
|
+
Scorers.aliasHit({ weight: 0.4 }),
|
|
705
|
+
Scorers.phoneticEq({ phonetic: "name_phon", weight: 0.2 }),
|
|
706
|
+
],
|
|
707
|
+
},
|
|
708
|
+
});
|
|
709
|
+
`;
|
|
710
|
+
async function cmdInit(flags) {
|
|
711
|
+
const name = flags.name ?? fail("--name is required (e.g. --name=mystore)");
|
|
712
|
+
if (!/^[a-z][a-z0-9_-]{0,62}$/i.test(name)) {
|
|
713
|
+
fail(`invalid project name: ${name} (must match /^[a-z][a-z0-9_-]+$/)`);
|
|
714
|
+
}
|
|
715
|
+
const out = resolve(flags.out ?? "./samesake.config.ts");
|
|
716
|
+
if (existsSync(out) && flags.force !== "true") {
|
|
717
|
+
fail(`${out} already exists \u2014 pass --force to overwrite`);
|
|
718
|
+
}
|
|
719
|
+
writeFileSync(out, INIT_TEMPLATE(name));
|
|
720
|
+
console.log(`\u2713 Wrote ${out}`);
|
|
721
|
+
console.log(`
|
|
722
|
+
Next steps:`);
|
|
723
|
+
console.log(` 1. Adjust the import path at the top of ${out}`);
|
|
724
|
+
console.log(` 2. samesake apply --project=${name} --config=${out}`);
|
|
725
|
+
console.log(` 3. samesake seed --project=${name} --file=seed.json`);
|
|
726
|
+
}
|
|
727
|
+
async function main() {
|
|
728
|
+
if (!cmd || cmd === "help" || cmd === "--help" || cmd === "-h") {
|
|
729
|
+
await cmdHelp();
|
|
730
|
+
return;
|
|
731
|
+
}
|
|
732
|
+
const rest = args.slice(1);
|
|
733
|
+
const flags = parseFlags(rest);
|
|
734
|
+
switch (cmd) {
|
|
735
|
+
case "healthz":
|
|
736
|
+
await cmdHealthz();
|
|
737
|
+
break;
|
|
738
|
+
case "doctor":
|
|
739
|
+
await cmdDoctor();
|
|
740
|
+
break;
|
|
741
|
+
case "init":
|
|
742
|
+
await cmdInit(flags);
|
|
743
|
+
break;
|
|
744
|
+
case "migrate":
|
|
745
|
+
await cmdMigrate(flags);
|
|
746
|
+
break;
|
|
747
|
+
case "apply":
|
|
748
|
+
await cmdApply(flags);
|
|
749
|
+
break;
|
|
750
|
+
case "seed":
|
|
751
|
+
await cmdSeed(flags);
|
|
752
|
+
break;
|
|
753
|
+
case "list-projects":
|
|
754
|
+
await cmdListProjects(flags);
|
|
755
|
+
break;
|
|
756
|
+
case "ingest":
|
|
757
|
+
await cmdIngest(flags);
|
|
758
|
+
break;
|
|
759
|
+
case "enrich":
|
|
760
|
+
await cmdEnrich(flags);
|
|
761
|
+
break;
|
|
762
|
+
case "index":
|
|
763
|
+
await cmdIndex(flags);
|
|
764
|
+
break;
|
|
765
|
+
case "search-explain":
|
|
766
|
+
await cmdSearchExplain(flags);
|
|
767
|
+
break;
|
|
768
|
+
case "rotate-key":
|
|
769
|
+
await cmdRotateKey(flags);
|
|
770
|
+
break;
|
|
771
|
+
case "review-list":
|
|
772
|
+
await cmdReviewList(flags);
|
|
773
|
+
break;
|
|
774
|
+
case "review-correct":
|
|
775
|
+
await cmdReviewCorrect(flags);
|
|
776
|
+
break;
|
|
777
|
+
case "match":
|
|
778
|
+
await cmdMatch(flags, rest);
|
|
779
|
+
break;
|
|
780
|
+
case "explain":
|
|
781
|
+
await cmdExplain(flags, rest);
|
|
782
|
+
break;
|
|
783
|
+
case "confirm":
|
|
784
|
+
await cmdConfirm(flags, rest);
|
|
785
|
+
break;
|
|
786
|
+
case "decline":
|
|
787
|
+
await cmdDecline(flags, rest);
|
|
788
|
+
break;
|
|
789
|
+
case "calibrate":
|
|
790
|
+
await cmdCalibrate(flags, rest);
|
|
791
|
+
break;
|
|
792
|
+
case "duplicates":
|
|
793
|
+
await cmdDuplicates(flags, rest);
|
|
794
|
+
break;
|
|
795
|
+
case "variants":
|
|
796
|
+
await cmdVariants(flags, rest);
|
|
797
|
+
break;
|
|
798
|
+
case "dev":
|
|
799
|
+
await cmdDev(flags);
|
|
800
|
+
break;
|
|
801
|
+
case "eval":
|
|
802
|
+
await cmdEval(flags);
|
|
803
|
+
break;
|
|
804
|
+
default:
|
|
805
|
+
fail(`unknown command: ${cmd}. try 'samesake help'`);
|
|
806
|
+
}
|
|
807
|
+
}
|
|
808
|
+
await main();
|
package/package.json
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@samesake/cli",
|
|
3
|
+
"version": "1.0.0",
|
|
4
|
+
"repository": {
|
|
5
|
+
"type": "git",
|
|
6
|
+
"url": "https://github.com/asyncdotengineering/samesake"
|
|
7
|
+
},
|
|
8
|
+
"description": "Operations CLI for samesake. Apply schemas, seed data, run match queries, explain scoring, confirm/decline active-learning feedback, calibrate thresholds, list duplicates and variant clusters. Talks to a @samesake/server matcher over HTTP.",
|
|
9
|
+
"type": "module",
|
|
10
|
+
"license": "MIT",
|
|
11
|
+
"main": "./dist/index.js",
|
|
12
|
+
"module": "./dist/index.js",
|
|
13
|
+
"bin": {
|
|
14
|
+
"samesake": "./dist/index.js"
|
|
15
|
+
},
|
|
16
|
+
"files": ["dist", "README.md", "LICENSE"],
|
|
17
|
+
"publishConfig": {
|
|
18
|
+
"access": "public"
|
|
19
|
+
},
|
|
20
|
+
"scripts": {
|
|
21
|
+
"build": "tsup"
|
|
22
|
+
},
|
|
23
|
+
"keywords": ["entity-resolution", "cli", "samesake"],
|
|
24
|
+
"dependencies": {
|
|
25
|
+
"@samesake/core": "^1.0.0",
|
|
26
|
+
"@samesake/server": "^1.0.0"
|
|
27
|
+
},
|
|
28
|
+
"devDependencies": {
|
|
29
|
+
"tsup": "^8.5.1"
|
|
30
|
+
}
|
|
31
|
+
}
|