@geravant/sinain 1.22.9 → 1.23.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/launcher.js CHANGED
@@ -56,6 +56,26 @@ const children = []; // { name, proc, pid }
56
56
  await main();
57
57
 
58
58
  async function main() {
59
+ // ── Platform guard (ENG-05, CONTEXT.md D-03) ─────────────────────────────
60
+ // Friendly blocker for non-macOS platforms. SINAIN_FAKE_PLATFORM enables
61
+ // env-var spoof testing without an actual Windows/Linux host.
62
+ const platform = process.env.SINAIN_FAKE_PLATFORM || os.platform();
63
+ if (platform !== "darwin") {
64
+ const isWindows = platform === "win32";
65
+ console.log("");
66
+ console.log(" ┌─────────────────────────────────────────────────────────┐");
67
+ console.log(" │ Sinain is macOS-only for this launch │");
68
+ console.log(" │ │");
69
+ console.log(" │ " + (isWindows
70
+ ? "Windows support is in progress — star the repo for updates."
71
+ : "Linux support is planned — star the repo for updates. ") + " │");
72
+ console.log(" │ https://github.com/geravant/sinain-hud │");
73
+ console.log(" └─────────────────────────────────────────────────────────┘");
74
+ console.log("");
75
+ process.exit(0);
76
+ }
77
+ // ── End platform guard ────────────────────────────────────────────────────
78
+
59
79
  setupSignalHandlers();
60
80
 
61
81
  log("Preflight checks...");
@@ -0,0 +1,572 @@
1
+ /**
2
+ * sinain mcp-register — detect and register the sinain MCP server
3
+ * into MCP-aware agents (Claude Code, Claude Desktop, Cursor, Codex, Goose, Junie).
4
+ *
5
+ * Designed to be called from two surfaces:
6
+ * - the wizard (`stepMcpInstall` from onboard.js)
7
+ * - a standalone CLI (`runMcpCli` from cli.js)
8
+ *
9
+ * Idempotency is the design contract for every backend: read existing config,
10
+ * upsert the `sinain` entry only, never duplicate. Re-running this module
11
+ * after upgrading the npm package is the canonical "re-point my paths" flow.
12
+ */
13
+
14
+ import * as p from "@clack/prompts";
15
+ import fs from "fs";
16
+ import path from "path";
17
+ import os from "os";
18
+ import { execFileSync, execSync } from "child_process";
19
+ import { c, guard, cmdExists, PKG_DIR, HOME, SINAIN_DIR, ENV_PATH, IS_WINDOWS, IS_MAC } from "./config-shared.js";
20
+
21
+ // ── Paths to the bundled MCP server ─────────────────────────────────────────
22
+
23
+ const MCP_SERVER_DIR = path.join(PKG_DIR, "sinain-mcp-server");
24
+ const MCP_ENTRY = path.join(MCP_SERVER_DIR, "index.ts");
25
+ const TSX_BIN = path.join(MCP_SERVER_DIR, "node_modules", ".bin", IS_WINDOWS ? "tsx.cmd" : "tsx");
26
+
27
+ const DEFAULT_ENV = {
28
+ SINAIN_CORE_URL: "http://localhost:9500",
29
+ SINAIN_WORKSPACE: path.join(HOME, ".openclaw", "workspace"),
30
+ };
31
+
32
+ function mcpServerReady() {
33
+ return fs.existsSync(MCP_ENTRY) && fs.existsSync(TSX_BIN);
34
+ }
35
+
36
+ function ensureMcpServerDeps() {
37
+ if (mcpServerReady()) return true;
38
+ if (!fs.existsSync(MCP_ENTRY)) return false;
39
+ try {
40
+ execSync("npm install --silent", { cwd: MCP_SERVER_DIR, stdio: "pipe" });
41
+ return mcpServerReady();
42
+ } catch {
43
+ return false;
44
+ }
45
+ }
46
+
47
+ function mcpServerPayload() {
48
+ return {
49
+ command: TSX_BIN,
50
+ args: [MCP_ENTRY],
51
+ env: { ...DEFAULT_ENV },
52
+ };
53
+ }
54
+
55
+ // ── Backend: Claude Code (CLI-managed via `claude mcp`) ─────────────────────
56
+
57
+ function claudeListJson() {
58
+ try {
59
+ const out = execFileSync("claude", ["mcp", "list", "--json"], { stdio: "pipe", encoding: "utf-8" });
60
+ return JSON.parse(out);
61
+ } catch {
62
+ return null;
63
+ }
64
+ }
65
+
66
+ function claudeAlreadyRegistered() {
67
+ const list = claudeListJson();
68
+ if (list && typeof list === "object") {
69
+ if (list.sinain) return true;
70
+ if (Array.isArray(list)) return list.some((e) => e?.name === "sinain");
71
+ }
72
+ try {
73
+ const out = execFileSync("claude", ["mcp", "list"], { stdio: "pipe", encoding: "utf-8" });
74
+ return /\bsinain\b/.test(out);
75
+ } catch {
76
+ return false;
77
+ }
78
+ }
79
+
80
+ function claudeRegister({ extraEnv = {} } = {}) {
81
+ if (!ensureMcpServerDeps()) {
82
+ throw new Error(`MCP server not built. Run: cd ${MCP_SERVER_DIR} && npm install`);
83
+ }
84
+ // Idempotent: remove first (no-op if absent), then add.
85
+ try { execFileSync("claude", ["mcp", "remove", "sinain"], { stdio: "pipe" }); } catch { /* not registered */ }
86
+ const env = { ...DEFAULT_ENV, ...extraEnv };
87
+ const args = ["mcp", "add", "sinain", "--scope", "user"];
88
+ for (const [k, v] of Object.entries(env)) args.push("--env", `${k}=${v}`);
89
+ args.push("--", TSX_BIN, MCP_ENTRY);
90
+ execFileSync("claude", args, { stdio: "pipe", env: { ...process.env, ...env } });
91
+ }
92
+
93
+ function claudeUnregister() {
94
+ try { execFileSync("claude", ["mcp", "remove", "sinain"], { stdio: "pipe" }); } catch { /* idempotent */ }
95
+ }
96
+
97
+ // ── Backend: Codex (CLI-managed via `codex mcp`) ────────────────────────────
98
+
99
+ function codexAlreadyRegistered() {
100
+ try {
101
+ const out = execFileSync("codex", ["mcp", "list"], { stdio: "pipe", encoding: "utf-8" });
102
+ return /\bsinain\b/.test(out);
103
+ } catch {
104
+ return false;
105
+ }
106
+ }
107
+
108
+ function codexRegister() {
109
+ if (!ensureMcpServerDeps()) {
110
+ throw new Error(`MCP server not built. Run: cd ${MCP_SERVER_DIR} && npm install`);
111
+ }
112
+ try { execFileSync("codex", ["mcp", "remove", "sinain"], { stdio: "pipe" }); } catch { /* not registered */ }
113
+ const args = ["mcp", "add", "sinain"];
114
+ for (const [k, v] of Object.entries(DEFAULT_ENV)) args.push("--env", `${k}=${v}`);
115
+ args.push("--", TSX_BIN, MCP_ENTRY);
116
+ execFileSync("codex", args, { stdio: "pipe" });
117
+ }
118
+
119
+ function codexUnregister() {
120
+ try { execFileSync("codex", ["mcp", "remove", "sinain"], { stdio: "pipe" }); } catch { /* idempotent */ }
121
+ }
122
+
123
+ // ── Backend: JSON-config agents (Claude Desktop, Cursor, Junie) ─────────────
124
+ //
125
+ // All three use the same `mcpServers` object shape. The only thing that
126
+ // varies is the file path. Read-merge-write keeps untouched entries intact.
127
+
128
+ function jsonConfigPath(agentId) {
129
+ switch (agentId) {
130
+ case "claude-desktop":
131
+ if (IS_MAC) return path.join(HOME, "Library", "Application Support", "Claude", "claude_desktop_config.json");
132
+ if (IS_WINDOWS) return path.join(process.env.APPDATA || path.join(HOME, "AppData", "Roaming"), "Claude", "claude_desktop_config.json");
133
+ return path.join(HOME, ".config", "Claude", "claude_desktop_config.json");
134
+ case "cursor":
135
+ return path.join(HOME, ".cursor", "mcp.json");
136
+ case "junie":
137
+ return path.join(HOME, ".junie", "mcp", "mcp.json");
138
+ default:
139
+ throw new Error(`Unknown JSON-config agent: ${agentId}`);
140
+ }
141
+ }
142
+
143
+ function jsonAlreadyRegistered(agentId) {
144
+ const file = jsonConfigPath(agentId);
145
+ if (!fs.existsSync(file)) return false;
146
+ try {
147
+ const json = JSON.parse(fs.readFileSync(file, "utf-8"));
148
+ return !!json?.mcpServers?.sinain;
149
+ } catch {
150
+ return false;
151
+ }
152
+ }
153
+
154
+ function jsonRegister(agentId) {
155
+ if (!ensureMcpServerDeps()) {
156
+ throw new Error(`MCP server not built. Run: cd ${MCP_SERVER_DIR} && npm install`);
157
+ }
158
+ const file = jsonConfigPath(agentId);
159
+ fs.mkdirSync(path.dirname(file), { recursive: true });
160
+ let config = {};
161
+ if (fs.existsSync(file)) {
162
+ try {
163
+ config = JSON.parse(fs.readFileSync(file, "utf-8")) || {};
164
+ } catch (err) {
165
+ throw new Error(`Existing config at ${file} is not valid JSON: ${err.message}`);
166
+ }
167
+ }
168
+ config.mcpServers = config.mcpServers || {};
169
+ config.mcpServers.sinain = mcpServerPayload();
170
+ fs.writeFileSync(file, JSON.stringify(config, null, 2) + "\n");
171
+ }
172
+
173
+ function jsonUnregister(agentId) {
174
+ const file = jsonConfigPath(agentId);
175
+ if (!fs.existsSync(file)) return;
176
+ let config;
177
+ try { config = JSON.parse(fs.readFileSync(file, "utf-8")); } catch { return; }
178
+ if (config?.mcpServers?.sinain) {
179
+ delete config.mcpServers.sinain;
180
+ fs.writeFileSync(file, JSON.stringify(config, null, 2) + "\n");
181
+ }
182
+ }
183
+
184
+ // ── Backend: Goose (YAML, hand-rolled splice with snippet fallback) ─────────
185
+ //
186
+ // Goose's config is YAML and we don't want a YAML dep just for this site.
187
+ // Strategy: detect a `sinain:` block under `extensions:` via line scanning;
188
+ // if present, splice replace; if absent, append. On any structural surprise,
189
+ // abort the write and return a printable snippet for the user to paste.
190
+
191
+ const GOOSE_CONFIG = path.join(HOME, ".config", "goose", "config.yaml");
192
+
193
+ function gooseSnippet() {
194
+ return [
195
+ "extensions:",
196
+ " sinain:",
197
+ " enabled: true",
198
+ ` cmd: "${TSX_BIN}"`,
199
+ ` args: ["${MCP_ENTRY}"]`,
200
+ " envs:",
201
+ ` SINAIN_CORE_URL: "${DEFAULT_ENV.SINAIN_CORE_URL}"`,
202
+ ` SINAIN_WORKSPACE: "${DEFAULT_ENV.SINAIN_WORKSPACE}"`,
203
+ ].join("\n");
204
+ }
205
+
206
+ function gooseAlreadyRegistered() {
207
+ if (!fs.existsSync(GOOSE_CONFIG)) return false;
208
+ const content = fs.readFileSync(GOOSE_CONFIG, "utf-8");
209
+ return /^\s+sinain:/m.test(content);
210
+ }
211
+
212
+ function gooseRegister() {
213
+ if (!ensureMcpServerDeps()) {
214
+ throw new Error(`MCP server not built. Run: cd ${MCP_SERVER_DIR} && npm install`);
215
+ }
216
+ fs.mkdirSync(path.dirname(GOOSE_CONFIG), { recursive: true });
217
+ const exists = fs.existsSync(GOOSE_CONFIG);
218
+ const content = exists ? fs.readFileSync(GOOSE_CONFIG, "utf-8") : "";
219
+
220
+ // If the file is empty or only has top-level keys we recognise as safe to
221
+ // append to, we add the extensions block fresh.
222
+ if (!exists || !content.trim()) {
223
+ fs.writeFileSync(GOOSE_CONFIG, gooseSnippet() + "\n");
224
+ return { ok: true };
225
+ }
226
+
227
+ const hasExtensions = /^extensions:/m.test(content);
228
+ const hasSinain = /^\s+sinain:/m.test(content);
229
+
230
+ if (!hasExtensions) {
231
+ const sep = content.endsWith("\n") ? "" : "\n";
232
+ fs.writeFileSync(GOOSE_CONFIG, content + sep + "\n" + gooseSnippet() + "\n");
233
+ return { ok: true };
234
+ }
235
+
236
+ if (!hasSinain) {
237
+ // Insert `sinain:` block right after the `extensions:` line.
238
+ const lines = content.split("\n");
239
+ const idx = lines.findIndex((l) => /^extensions:/.test(l));
240
+ const block = gooseSnippet().split("\n").slice(1); // drop the `extensions:` header
241
+ lines.splice(idx + 1, 0, ...block);
242
+ fs.writeFileSync(GOOSE_CONFIG, lines.join("\n"));
243
+ return { ok: true };
244
+ }
245
+
246
+ // Sinain entry already exists — replace it. We replace the `sinain:` block
247
+ // and every immediately-following line that's deeper-indented, stopping at
248
+ // the next sibling key or end of file.
249
+ const lines = content.split("\n");
250
+ const start = lines.findIndex((l) => /^\s+sinain:/.test(l));
251
+ if (start === -1) return { ok: false, reason: "structural-mismatch", snippet: gooseSnippet() };
252
+ const indentMatch = lines[start].match(/^(\s+)/);
253
+ const baseIndent = indentMatch ? indentMatch[1].length : 2;
254
+ let end = start + 1;
255
+ while (end < lines.length) {
256
+ const line = lines[end];
257
+ if (line.trim() === "") { end++; continue; }
258
+ const m = line.match(/^(\s*)/);
259
+ if ((m ? m[1].length : 0) <= baseIndent) break;
260
+ end++;
261
+ }
262
+ const replacement = gooseSnippet().split("\n").slice(1); // drop `extensions:` header
263
+ // re-indent replacement to baseIndent
264
+ const reindented = replacement.map((l) => l.replace(/^ {2}/, " ".repeat(baseIndent)));
265
+ lines.splice(start, end - start, ...reindented);
266
+ fs.writeFileSync(GOOSE_CONFIG, lines.join("\n"));
267
+ return { ok: true };
268
+ }
269
+
270
+ function gooseUnregister() {
271
+ if (!fs.existsSync(GOOSE_CONFIG)) return;
272
+ const content = fs.readFileSync(GOOSE_CONFIG, "utf-8");
273
+ if (!/^\s+sinain:/m.test(content)) return;
274
+ const lines = content.split("\n");
275
+ const start = lines.findIndex((l) => /^\s+sinain:/.test(l));
276
+ if (start === -1) return;
277
+ const baseIndent = (lines[start].match(/^(\s+)/)?.[1].length) || 2;
278
+ let end = start + 1;
279
+ while (end < lines.length) {
280
+ const line = lines[end];
281
+ if (line.trim() === "") { end++; continue; }
282
+ if ((line.match(/^(\s*)/)?.[1].length || 0) <= baseIndent) break;
283
+ end++;
284
+ }
285
+ lines.splice(start, end - start);
286
+ fs.writeFileSync(GOOSE_CONFIG, lines.join("\n"));
287
+ }
288
+
289
+ // ── Agent registry ──────────────────────────────────────────────────────────
290
+
291
+ const AGENTS = [
292
+ {
293
+ id: "claude",
294
+ label: "Claude Code",
295
+ detect: () => cmdExists("claude"),
296
+ isRegistered: claudeAlreadyRegistered,
297
+ register: (opts) => claudeRegister(opts),
298
+ unregister: claudeUnregister,
299
+ },
300
+ {
301
+ id: "claude-desktop",
302
+ label: "Claude Desktop",
303
+ detect: () => fs.existsSync(path.dirname(jsonConfigPath("claude-desktop"))),
304
+ isRegistered: () => jsonAlreadyRegistered("claude-desktop"),
305
+ register: () => jsonRegister("claude-desktop"),
306
+ unregister: () => jsonUnregister("claude-desktop"),
307
+ },
308
+ {
309
+ id: "cursor",
310
+ label: "Cursor",
311
+ detect: () => cmdExists("cursor") || fs.existsSync(path.join(HOME, ".cursor")),
312
+ isRegistered: () => jsonAlreadyRegistered("cursor"),
313
+ register: () => jsonRegister("cursor"),
314
+ unregister: () => jsonUnregister("cursor"),
315
+ },
316
+ {
317
+ id: "codex",
318
+ label: "Codex",
319
+ detect: () => cmdExists("codex"),
320
+ isRegistered: codexAlreadyRegistered,
321
+ register: codexRegister,
322
+ unregister: codexUnregister,
323
+ },
324
+ {
325
+ id: "goose",
326
+ label: "Goose",
327
+ detect: () => cmdExists("goose") || fs.existsSync(GOOSE_CONFIG),
328
+ isRegistered: gooseAlreadyRegistered,
329
+ register: gooseRegister,
330
+ unregister: gooseUnregister,
331
+ },
332
+ {
333
+ id: "junie",
334
+ label: "Junie",
335
+ detect: () => cmdExists("junie") || fs.existsSync(path.join(HOME, ".junie")),
336
+ isRegistered: () => jsonAlreadyRegistered("junie"),
337
+ register: () => jsonRegister("junie"),
338
+ unregister: () => jsonUnregister("junie"),
339
+ },
340
+ ];
341
+
342
+ function findAgent(id) {
343
+ return AGENTS.find((a) => a.id === id);
344
+ }
345
+
346
+ // ── Public API ──────────────────────────────────────────────────────────────
347
+
348
+ export async function detectMcpAgents() {
349
+ return AGENTS.map((a) => {
350
+ const present = !!a.detect();
351
+ let alreadyRegistered = false;
352
+ if (present) {
353
+ try { alreadyRegistered = !!a.isRegistered(); } catch { /* ignore */ }
354
+ }
355
+ return { id: a.id, label: a.label, present, alreadyRegistered };
356
+ });
357
+ }
358
+
359
+ export async function registerSinainMcp(agentId, opts = {}) {
360
+ const a = findAgent(agentId);
361
+ if (!a) throw new Error(`Unknown agent: ${agentId}`);
362
+ await a.register(opts);
363
+ }
364
+
365
+ export async function unregisterSinainMcp(agentId) {
366
+ const a = findAgent(agentId);
367
+ if (!a) throw new Error(`Unknown agent: ${agentId}`);
368
+ await a.unregister();
369
+ }
370
+
371
+ // ── Wizard step ─────────────────────────────────────────────────────────────
372
+
373
+ export async function stepMcpInstall(_existing, label = "MCP agents") {
374
+ p.log.step(label);
375
+
376
+ if (!mcpServerReady()) {
377
+ const s = p.spinner();
378
+ s.start("Preparing sinain MCP server...");
379
+ const ok = ensureMcpServerDeps();
380
+ s.stop(ok ? c.green("MCP server ready.") : c.yellow("MCP server deps not installed."));
381
+ if (!ok) {
382
+ p.note(
383
+ `Could not install dependencies for the bundled MCP server.\nRun manually: cd ${MCP_SERVER_DIR} && npm install`,
384
+ "MCP server",
385
+ );
386
+ return;
387
+ }
388
+ }
389
+
390
+ const detectSpinner = p.spinner();
391
+ detectSpinner.start("Detecting MCP-aware agents...");
392
+ const agents = await detectMcpAgents();
393
+ detectSpinner.stop(c.green("Detection done."));
394
+
395
+ const detected = agents.filter((a) => a.present);
396
+ if (detected.length === 0) {
397
+ p.note(
398
+ "No MCP-aware agents detected on this machine.\nInstall Claude Code, Cursor, Codex, Goose, or Junie first.\nThen re-run: npx @geravant/sinain mcp install",
399
+ "Skipped",
400
+ );
401
+ return;
402
+ }
403
+
404
+ // Status summary
405
+ const summary = detected
406
+ .map((a) => `${a.label}: ${a.alreadyRegistered ? c.dim("registered") : c.green("detected")}`)
407
+ .join("\n");
408
+ p.note(summary, "Agents found");
409
+
410
+ // Multi-select: pre-check unregistered detected agents
411
+ const choice = guard(await p.multiselect({
412
+ message: "Register sinain MCP for:",
413
+ options: detected.map((a) => ({
414
+ value: a.id,
415
+ label: a.label,
416
+ hint: a.alreadyRegistered ? "already registered (re-register to refresh paths)" : undefined,
417
+ })),
418
+ initialValues: detected.filter((a) => !a.alreadyRegistered).map((a) => a.id),
419
+ required: false,
420
+ }));
421
+
422
+ if (!choice || choice.length === 0) {
423
+ p.log.info("No agents selected — skipping MCP registration.");
424
+ return;
425
+ }
426
+
427
+ for (const id of choice) {
428
+ const a = findAgent(id);
429
+ const s = p.spinner();
430
+ s.start(`Registering for ${a.label}...`);
431
+ try {
432
+ await a.register();
433
+ s.stop(c.green(`${a.label}: registered.`));
434
+ } catch (err) {
435
+ s.stop(c.yellow(`${a.label}: failed — ${err.message}`));
436
+ if (id === "goose") {
437
+ p.note(`Paste this into ${GOOSE_CONFIG}:\n\n${gooseSnippet()}`, "Goose snippet");
438
+ }
439
+ }
440
+ }
441
+
442
+ // Bonus: alternate Claude config dir (pclaude / CLAUDE_CONFIG_DIR)
443
+ if (process.env.CLAUDE_CONFIG_DIR && choice.includes("claude")) {
444
+ const altDir = process.env.CLAUDE_CONFIG_DIR;
445
+ const alsoRegister = guard(await p.confirm({
446
+ message: `Also register sinain for CLAUDE_CONFIG_DIR=${altDir}?`,
447
+ initialValue: true,
448
+ }));
449
+ if (alsoRegister) {
450
+ const s = p.spinner();
451
+ s.start(`Registering for Claude (${altDir})...`);
452
+ try {
453
+ const env = { ...process.env, CLAUDE_CONFIG_DIR: altDir };
454
+ const args = ["mcp", "remove", "sinain"];
455
+ try { execFileSync("claude", args, { stdio: "pipe", env }); } catch { /* not registered */ }
456
+ const addArgs = ["mcp", "add", "sinain", "--scope", "user"];
457
+ for (const [k, v] of Object.entries(DEFAULT_ENV)) addArgs.push("--env", `${k}=${v}`);
458
+ addArgs.push("--", TSX_BIN, MCP_ENTRY);
459
+ execFileSync("claude", addArgs, { stdio: "pipe", env });
460
+ s.stop(c.green(`Claude (${altDir}): registered.`));
461
+ } catch (err) {
462
+ s.stop(c.yellow(`Alt config dir: failed — ${err.message}`));
463
+ }
464
+ }
465
+ }
466
+ }
467
+
468
+ // ── Standalone CLI ──────────────────────────────────────────────────────────
469
+
470
+ export async function runMcpCli(sub, args = []) {
471
+ switch (sub) {
472
+ case "install":
473
+ return cliInstall(args);
474
+ case "list":
475
+ return cliList();
476
+ case "remove":
477
+ return cliRemove(args);
478
+ default:
479
+ printMcpUsage();
480
+ process.exit(sub ? 1 : 0);
481
+ }
482
+ }
483
+
484
+ async function cliInstall(args) {
485
+ const all = args.includes("--all");
486
+ const agentArg = args.find((a) => a.startsWith("--agent="));
487
+ const agentId = agentArg ? agentArg.slice("--agent=".length) : null;
488
+
489
+ if (agentId) {
490
+ const a = findAgent(agentId);
491
+ if (!a) { console.error(c.red(`Unknown agent: ${agentId}`)); process.exit(1); }
492
+ if (!a.detect()) { console.error(c.yellow(`${a.label} not detected on this machine.`)); process.exit(1); }
493
+ try {
494
+ await a.register();
495
+ console.log(c.green(`✓ ${a.label}: registered.`));
496
+ } catch (err) {
497
+ console.error(c.red(`✗ ${a.label}: ${err.message}`));
498
+ if (agentId === "goose") console.log(`\nPaste this into ${GOOSE_CONFIG}:\n\n${gooseSnippet()}`);
499
+ process.exit(1);
500
+ }
501
+ return;
502
+ }
503
+
504
+ if (all) {
505
+ if (!mcpServerReady() && !ensureMcpServerDeps()) {
506
+ console.error(c.red(`MCP server deps not installed. Run: cd ${MCP_SERVER_DIR} && npm install`));
507
+ process.exit(1);
508
+ }
509
+ const agents = await detectMcpAgents();
510
+ const detected = agents.filter((a) => a.present);
511
+ if (detected.length === 0) {
512
+ console.log(c.yellow("No MCP-aware agents detected."));
513
+ return;
514
+ }
515
+ for (const a of detected) {
516
+ const backend = findAgent(a.id);
517
+ try {
518
+ await backend.register();
519
+ console.log(c.green(`✓ ${a.label}: registered.`));
520
+ } catch (err) {
521
+ console.error(c.yellow(`! ${a.label}: ${err.message}`));
522
+ }
523
+ }
524
+ return;
525
+ }
526
+
527
+ // Interactive
528
+ await stepMcpInstall({}, "MCP agents");
529
+ }
530
+
531
+ async function cliList() {
532
+ const agents = await detectMcpAgents();
533
+ console.log();
534
+ console.log(c.bold(" Sinain MCP — agent status"));
535
+ console.log();
536
+ for (const a of agents) {
537
+ const present = a.present ? c.green("present") : c.dim("absent ");
538
+ const reg = a.alreadyRegistered ? c.green("✓ registered") : (a.present ? c.dim("· not registered") : c.dim("·"));
539
+ console.log(` ${present} ${a.label.padEnd(18)} ${reg}`);
540
+ }
541
+ console.log();
542
+ }
543
+
544
+ async function cliRemove(args) {
545
+ const id = args[0];
546
+ if (!id) { console.error(c.red("Usage: sinain mcp remove <agent>")); process.exit(1); }
547
+ const a = findAgent(id);
548
+ if (!a) { console.error(c.red(`Unknown agent: ${id}`)); process.exit(1); }
549
+ try {
550
+ await a.unregister();
551
+ console.log(c.green(`✓ ${a.label}: removed.`));
552
+ } catch (err) {
553
+ console.error(c.red(`✗ ${a.label}: ${err.message}`));
554
+ process.exit(1);
555
+ }
556
+ }
557
+
558
+ function printMcpUsage() {
559
+ console.log(`
560
+ sinain mcp — register the sinain MCP server with your agents
561
+
562
+ Usage:
563
+ sinain mcp install Interactive install (multi-select)
564
+ sinain mcp install --all Register for every detected agent
565
+ sinain mcp install --agent=<id> Register for one agent
566
+ sinain mcp list Show agent status
567
+ sinain mcp remove <agent> Unregister sinain from <agent>
568
+
569
+ Supported agent IDs:
570
+ claude, claude-desktop, cursor, codex, goose, junie
571
+ `);
572
+ }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@geravant/sinain",
3
- "version": "1.22.9",
3
+ "version": "1.23.2",
4
4
  "description": "Ambient intelligence that sees what you see, hears what you hear, and acts on your behalf",
5
5
  "type": "module",
6
6
  "bin": {
@@ -19,6 +19,7 @@
19
19
  "config-shared.js",
20
20
  "onboard.js",
21
21
  "launcher.js",
22
+ "mcp-register.js",
22
23
  "setup-overlay.js",
23
24
  "setup-sck-capture.js",
24
25
  "pack-prepare.js",
@@ -212,6 +212,8 @@ export class AudioPipeline extends EventEmitter {
212
212
 
213
213
  let headerSkipped = name !== "sox";
214
214
  let headerBuf = Buffer.alloc(0);
215
+ let stderrAccum = "";
216
+ const spawnTime = Date.now();
215
217
 
216
218
  proc.stdout?.on("data", (data: Buffer) => {
217
219
  if (!this.running) return;
@@ -237,6 +239,12 @@ export class AudioPipeline extends EventEmitter {
237
239
  if (msg && !/^In:.*Out:/.test(msg)) {
238
240
  log(TAG, `${name} stderr: ${msg.slice(0, 200)}`);
239
241
  }
242
+ // Accumulate stderr for TCC detection on exit
243
+ stderrAccum += data.toString();
244
+ // Cap accumulation to avoid unbounded growth (4KB is enough for any TCC message)
245
+ if (stderrAccum.length > 4096) {
246
+ stderrAccum = stderrAccum.slice(-4096);
247
+ }
240
248
  });
241
249
 
242
250
  proc.on("error", (err) => {
@@ -252,6 +260,45 @@ export class AudioPipeline extends EventEmitter {
252
260
  if (this.running && code !== 0) {
253
261
  this.errorCount++;
254
262
  this.profiler?.gauge("audio.errors", this.errorCount);
263
+
264
+ // Detect TCC (macOS Screen Recording / Microphone) permission denial.
265
+ // sck-capture logs "declined TCCs" to stderr when the entitlement is
266
+ // missing. The chicken-and-egg: clicking "Allow" on the prompt doesn't
267
+ // apply to a running process — the user must restart Terminal and
268
+ // re-run. We print a prominent banner and request graceful shutdown
269
+ // so users aren't left wondering why the agent never escalates.
270
+ const elapsedMs = Date.now() - spawnTime;
271
+ const isTccDenial = stderrAccum.includes("declined TCCs");
272
+ if (isTccDenial && elapsedMs < 5000) {
273
+ process.stdout.write([
274
+ "",
275
+ "=======================================================================",
276
+ " WARNING: Screen Recording permission needed",
277
+ "=======================================================================",
278
+ "",
279
+ " sck-capture cannot access screen capture and audio without",
280
+ " TCC (Screen Recording) permission from macOS.",
281
+ "",
282
+ " If you just clicked Allow -- that is normal! macOS does not apply",
283
+ " the permission to processes that are already running. To fix:",
284
+ "",
285
+ " 1. Press Ctrl+C to stop sinain",
286
+ " 2. Quit and restart your Terminal app (Cmd+Q, then reopen)",
287
+ " 3. Run again: npx @geravant/sinain@latest start",
288
+ "",
289
+ " Already declined? Re-grant permission:",
290
+ " System Settings > Privacy & Security > Screen Recording",
291
+ " > enable Terminal (or your terminal app)",
292
+ "",
293
+ "=======================================================================",
294
+ "",
295
+ ].join("\n"));
296
+
297
+ // Emit TCC-specific error so index.ts can initiate graceful shutdown
298
+ this.emit("tcc-denied");
299
+ return;
300
+ }
301
+
255
302
  warn(TAG, `${name} exited unexpectedly, stopping pipeline`);
256
303
  this.stop();
257
304
  }
@@ -543,7 +543,14 @@ async function importKnowledgeToLocal(data: string): Promise<string> {
543
543
  const dbPath = `${localDir}/knowledge-graph.db`;
544
544
 
545
545
  const __dir = dirname(fileURLToPath(import.meta.url));
546
- const scriptsDir = resolve(__dir, "..", "..", "sinain-hud-plugin", "sinain-memory");
546
+ // Two package layouts are supported:
547
+ // dev/monorepo: <repo>/sinain-core/src/ → ../../sinain-hud-plugin/sinain-memory
548
+ // npm-published flat: <pkg>/sinain-core/src/ → ../../sinain-memory
549
+ const scriptsDir = [
550
+ resolve(__dir, "..", "..", "sinain-hud-plugin", "sinain-memory"), // dev/monorepo layout
551
+ resolve(__dir, "..", "..", "sinain-memory"), // npm-published flat layout
552
+ resolve(__dir, "..", "sinain-memory"), // legacy alt
553
+ ].find(p => existsSync(`${p}/triplestore.py`)) || resolve(__dir, "..", "..", "sinain-memory");
547
554
 
548
555
  // Convert facts to graph ops for knowledge_integrator
549
556
  const graphOps = facts.map((f: any) => ({
@@ -834,6 +841,12 @@ async function main() {
834
841
  wsHandler.updateState({ audio: "muted" });
835
842
  });
836
843
 
844
+ systemAudioPipeline.on("tcc-denied", () => {
845
+ // Banner already printed by pipeline.ts. Initiate graceful shutdown so
846
+ // sinain exits cleanly rather than continuing with audio dead.
847
+ shutdown("TCC-DENIED").catch(() => process.exit(1));
848
+ });
849
+
837
850
  systemAudioPipeline.on("muted", () => {
838
851
  log(TAG, "system audio muted (capture process still running)");
839
852
  wsHandler.updateState({ audio: "muted" });
@@ -55,10 +55,14 @@ export class EntityCache {
55
55
  return;
56
56
  }
57
57
 
58
- // Query entity names directly via SQLite
58
+ // Query entity names directly via SQLite.
59
+ // Two package layouts are supported:
60
+ // dev/monorepo: <repo>/sinain-core/src/learning/ → ../../../sinain-hud-plugin/sinain-memory
61
+ // npm-published flat: <pkg>/sinain-core/src/learning/ → ../../../sinain-memory
59
62
  const scriptCandidates = [
60
- resolve(__dir, "..", "..", "sinain-hud-plugin", "sinain-memory", "graph_query.py"),
61
- resolve(__dir, "..", "sinain-memory", "graph_query.py"),
63
+ resolve(__dir, "..", "..", "..", "sinain-hud-plugin", "sinain-memory", "graph_query.py"), // dev/monorepo layout
64
+ resolve(__dir, "..", "..", "..", "sinain-memory", "graph_query.py"), // npm-published flat layout
65
+ resolve(__dir, "..", "..", "sinain-memory", "graph_query.py"), // legacy alt
62
66
  ];
63
67
  const scriptPath = scriptCandidates.find(p => existsSync(p));
64
68
  if (!scriptPath) return;
@@ -23,18 +23,26 @@ const __dirname = dirname(fileURLToPath(import.meta.url));
23
23
 
24
24
  /** Resolve the sinain-memory Python scripts directory. */
25
25
  function resolveScriptsDir(): string {
26
- // Look for sinain-memory scripts in known locations
26
+ // Look for sinain-memory scripts in known locations.
27
+ // Two package layouts are supported:
28
+ // dev/monorepo: <repo>/sinain-core/src/learning/ → ../../../sinain-hud-plugin/sinain-memory
29
+ // npm-published flat: <pkg>/sinain-core/src/learning/ → ../../../sinain-memory
27
30
  const candidates = [
28
- resolve(__dirname, "..", "..", "..", "sinain-hud-plugin", "sinain-memory"),
29
- resolve(__dirname, "..", "..", "sinain-memory"),
30
- resolve(process.env.HOME || "", ".sinain", "sinain-memory"),
31
+ resolve(__dirname, "..", "..", "..", "sinain-hud-plugin", "sinain-memory"), // dev/monorepo layout
32
+ resolve(__dirname, "..", "..", "..", "sinain-memory"), // npm-published flat layout
33
+ resolve(__dirname, "..", "..", "sinain-memory"), // legacy alt
34
+ resolve(process.env.HOME || "", ".sinain", "sinain-memory"), // user-local fallback
31
35
  ];
32
36
  for (const dir of candidates) {
33
37
  if (existsSync(resolve(dir, "session_distiller.py"))) {
34
38
  return dir;
35
39
  }
36
40
  }
37
- return candidates[0]; // Fallback
41
+ error(TAG, `sinain-memory scripts not found. Searched ${candidates.length} locations:`);
42
+ for (const dir of candidates) {
43
+ error(TAG, ` - ${dir}`);
44
+ }
45
+ return candidates[candidates.length - 1]; // Return user-local path as sentinel
38
46
  }
39
47
 
40
48
  /** Resolve the local memory directory. */
@@ -942,14 +942,16 @@ function setupSearch() {
942
942
  const q = input.value.trim();
943
943
  if (!q) { dropdown.classList.remove("open"); dropdown.innerHTML = ""; return; }
944
944
  const result = await api("/knowledge/search?q=" + encodeURIComponent(q) + "&limit=15");
945
+ // Always show "Search: query" as first option → topic page with combined recall
946
+ const topicLink = \`
947
+ <div class="search-result" onclick="navigate('/knowledge/ui/topic/' + encodeURIComponent('\${esc(q)}'))" style="border-bottom:1px solid rgba(255,255,255,0.1)">
948
+ <div class="entity">🔍 Search: \${esc(q)}</div>
949
+ <div class="snippet">Combined query — find facts across multiple entities</div>
950
+ </div>\`;
945
951
  if (!result.results || result.results.length === 0) {
946
- dropdown.innerHTML = \`
947
- <div class="search-result" onclick="navigate('/knowledge/ui/topic/' + encodeURIComponent('\${esc(q)}'))">
948
- <div class="entity">View as topic page</div>
949
- <div class="snippet">No matching entities — synthesize from search hits.</div>
950
- </div>\`;
952
+ dropdown.innerHTML = topicLink;
951
953
  } else {
952
- dropdown.innerHTML = result.results.map(r => \`
954
+ dropdown.innerHTML = topicLink + result.results.map(r => \`
953
955
  <div class="search-result" onclick="navigate('/knowledge/ui/entity/' + encodeURIComponent('\${esc(r.entity)}'))">
954
956
  <div class="entity">\${esc(r.entity)}</div>
955
957
  <div class="meta">\${esc(r.type)} · \${r.fact_count} fact\${r.fact_count === 1 ? "" : "s"}</div>
@@ -960,6 +962,12 @@ function setupSearch() {
960
962
  }, 220);
961
963
  input.addEventListener("input", handleQuery);
962
964
  input.addEventListener("focus", () => { if (input.value) handleQuery(); });
965
+ input.addEventListener("keydown", (e) => {
966
+ if (e.key === "Enter" && input.value.trim()) {
967
+ dropdown.classList.remove("open");
968
+ navigate("/knowledge/ui/topic/" + encodeURIComponent(input.value.trim()));
969
+ }
970
+ });
963
971
  document.addEventListener("click", (e) => {
964
972
  if (!e.target.closest(".search-wrap")) dropdown.classList.remove("open");
965
973
  });
@@ -1339,25 +1347,109 @@ function renderMissingConcept(entity, root) {
1339
1347
 
1340
1348
  // ── Topic page (simple, v1) ───────────────────────────────────────────────
1341
1349
  async function renderTopicPage(q) {
1342
- document.title = "Topic: " + q;
1350
+ document.title = "Topic: " + q + " · Sinain";
1343
1351
  const root = $("#root");
1344
- root.innerHTML = \`
1345
- <h1>Topic: \${esc(q)}</h1>
1346
- <div class="loading-block"><span class="spinner"></span> Searching…</div>\`;
1347
- const r = await api("/knowledge/search?q=" + encodeURIComponent(q) + "&limit=50");
1348
- if (!r.results || r.results.length === 0) {
1349
- root.innerHTML = \`<h1>Topic: \${esc(q)}</h1>
1352
+ root.innerHTML = \`<div class="loading-block"><span class="spinner"></span> Searching…</div>\`;
1353
+
1354
+ // Parallel: get combined facts + matching entities
1355
+ const [qr, sr] = await Promise.all([
1356
+ api("/knowledge/query?q=" + encodeURIComponent(q) + "&max=30"),
1357
+ api("/knowledge/search?q=" + encodeURIComponent(q) + "&limit=10"),
1358
+ ]);
1359
+ const factsText = qr.facts_text || "";
1360
+ const entities = sr.results || [];
1361
+
1362
+ if (!factsText && entities.length === 0) {
1363
+ root.innerHTML = \`
1364
+ <div class="page-header"><div class="title">Topic: \${esc(q)}</div></div>
1350
1365
  <div class="error-block">No matching facts.</div>\`;
1351
1366
  return;
1352
1367
  }
1368
+
1369
+ // Parse compact facts into structured items, group by entity
1370
+ const factItems = factsText ? factsText.split("; ").filter(Boolean) : [];
1371
+ const grouped = {};
1372
+ const ungrouped = [];
1373
+ for (const f of factItems) {
1374
+ const m = f.match(/^([^:]*?):\\s*(.+?)\\s*\\(([^)]+)\\)$/);
1375
+ if (m) {
1376
+ const ent = m[1].trim() || "general";
1377
+ (grouped[ent] = grouped[ent] || []).push({text: m[2], meta: m[3], raw: f});
1378
+ } else {
1379
+ ungrouped.push({text: f, meta: "", raw: f});
1380
+ }
1381
+ }
1382
+
1383
+ // Build summary from top entities
1384
+ const topEnts = Object.keys(grouped).slice(0, 5).join(", ");
1385
+ const summary = factItems.length > 0
1386
+ ? \`\${factItems.length} facts retrieved across \${Object.keys(grouped).length} entities\${topEnts ? ": " + topEnts : ""}\`
1387
+ : "No facts found for this query.";
1388
+
1353
1389
  root.innerHTML = \`
1354
- <h1>Topic: \${esc(q)}</h1>
1355
- <div class="summary">Top \${r.results.length} matches across the knowledge graph.</div>
1356
- \${r.results.map(rr => \`
1357
- <div class="bullet" onclick="navigate('/knowledge/ui/entity/' + encodeURIComponent('\${esc(rr.entity)}'))" style="cursor:pointer">
1358
- <span class="text"><strong>\${esc(rr.entity)}</strong> \${esc(rr.snippet || "")}</span>
1359
- <span class="conf">\${rr.fact_count} fact\${rr.fact_count === 1 ? "" : "s"}</span>
1360
- </div>\`).join("")}\`;
1390
+ <div class="page-header">
1391
+ <div class="title">Topic: \${esc(q)}</div>
1392
+ <div class="badges">
1393
+ <span class="badge">\${factItems.length} fact\${factItems.length === 1 ? "" : "s"}</span>
1394
+ <span class="badge">\${Object.keys(grouped).length} entit\${Object.keys(grouped).length === 1 ? "y" : "ies"}</span>
1395
+ </div>
1396
+ <div class="page-actions">
1397
+ <button id="topicCopyLink" class="icon" title="Copy topic URL">🔗</button>
1398
+ <button id="topicShare" class="icon" title="Share topic (auto-imports for recipient)">📤</button>
1399
+ </div>
1400
+ </div>
1401
+ <div class="summary">\${esc(summary)}</div>
1402
+ <div id="topicSections">
1403
+ \${Object.entries(grouped).map(([ent, facts], i) => \`
1404
+ <div class="section" id="sec-\${i}">
1405
+ <div class="section-heading" onclick="this.parentElement.classList.toggle('collapsed')">
1406
+ \${esc(ent)}
1407
+ <span style="opacity:0.5;font-size:0.85em;margin-left:8px">\${facts.length} fact\${facts.length === 1 ? "" : "s"}</span>
1408
+ </div>
1409
+ <ul class="bullets">\${facts.map(f => \`
1410
+ <li class="bullet">
1411
+ <span class="text">\${esc(f.text)}</span>
1412
+ <span class="conf">\${esc(f.meta)}</span>
1413
+ </li>\`).join("")}</ul>
1414
+ </div>\`).join("")}
1415
+ \${ungrouped.length > 0 ? \`
1416
+ <div class="section">
1417
+ <div class="section-heading">Other</div>
1418
+ <ul class="bullets">\${ungrouped.map(f => \`
1419
+ <li class="bullet"><span class="text">\${esc(f.text)}</span></li>\`).join("")}</ul>
1420
+ </div>\` : ""}
1421
+ </div>
1422
+ \${entities.length > 0 ? \`
1423
+ <div class="section" style="margin-top:16px">
1424
+ <div class="section-heading" onclick="this.parentElement.classList.toggle('collapsed')">
1425
+ Related Entities
1426
+ </div>
1427
+ <ul class="bullets">\${entities.map(rr => \`
1428
+ <li class="bullet" onclick="navigate('/knowledge/ui/entity/' + encodeURIComponent('\${esc(rr.entity)}'))" style="cursor:pointer">
1429
+ <span class="text"><strong>\${esc(rr.entity)}</strong> — \${esc(rr.snippet || "")}</span>
1430
+ <span class="conf">\${rr.fact_count} fact\${rr.fact_count === 1 ? "" : "s"}</span>
1431
+ </li>\`).join("")}</ul>
1432
+ </div>\` : ""}\`;
1433
+
1434
+ // Wire actions
1435
+ $("#topicCopyLink").onclick = () => {
1436
+ const url = location.origin + "/knowledge/ui/topic/" + encodeURIComponent(q);
1437
+ navigator.clipboard.writeText(url);
1438
+ showToast("✓ Link copied");
1439
+ };
1440
+ $("#topicShare").onclick = async () => {
1441
+ // Share all entities mentioned in the query
1442
+ const ents = (qr.entities || q.split(/[\\s,+]+/)).filter(Boolean);
1443
+ if (ents.length === 0) { showToast("No entities to share"); return; }
1444
+ showToast('<span class="spinner"></span> Preparing share…', 30_000);
1445
+ try {
1446
+ for (const ent of ents.slice(0, 3)) {
1447
+ await ShareManager.createShare(ent);
1448
+ }
1449
+ } catch (e) {
1450
+ showToast("Share failed: " + (e && e.message ? e.message : String(e)));
1451
+ }
1452
+ };
1361
1453
  }
1362
1454
 
1363
1455
  // ── Dropzone wiring (shared) ──────────────────────────────────────────────
@@ -1816,6 +1908,30 @@ export function createAppServer(deps: ServerDeps) {
1816
1908
  return;
1817
1909
  }
1818
1910
 
1911
+ // ── /knowledge/query ── (combined entity recall — used by topic page) ──
1912
+ if (req.method === "GET" && url.pathname === "/knowledge/query") {
1913
+ const q = url.searchParams.get("q") || "";
1914
+ const maxFacts = Math.min(parseInt(url.searchParams.get("max") || "20"), 50);
1915
+ if (!q.trim()) {
1916
+ res.writeHead(400);
1917
+ res.end(JSON.stringify({ ok: false, error: "q parameter required" }));
1918
+ return;
1919
+ }
1920
+ // Split query into entity keywords for queryKnowledgeFacts
1921
+ const entities = q.trim().split(/[\s,+]+/).filter(Boolean);
1922
+ if (deps.queryKnowledgeFacts) {
1923
+ try {
1924
+ const factsText = await deps.queryKnowledgeFacts(entities, maxFacts);
1925
+ res.end(JSON.stringify({ ok: true, query: q, facts_text: factsText, entities }));
1926
+ } catch (err) {
1927
+ res.end(JSON.stringify({ ok: false, error: String(err) }));
1928
+ }
1929
+ } else {
1930
+ res.end(JSON.stringify({ ok: true, query: q, facts_text: "", entities }));
1931
+ }
1932
+ return;
1933
+ }
1934
+
1819
1935
  // ── /knowledge/search ── (entity-prioritized) ──
1820
1936
  if (req.method === "GET" && url.pathname === "/knowledge/search") {
1821
1937
  const q = url.searchParams.get("q") || "";
@@ -328,6 +328,70 @@ def _cooccurring_entities(
328
328
  return ranked[:max_entities]
329
329
 
330
330
 
331
+ _SEMANTIC_CACHE: dict = {} # {"db_path": {"names": [...], "embs": ndarray, "ts": float}}
332
+
333
+
334
+ def _expand_keywords_semantic(
335
+ keywords: list[str],
336
+ db_path: str,
337
+ threshold: float = 0.50,
338
+ max_expansions: int = 3,
339
+ ) -> list[str]:
340
+ """Expand keywords with semantically similar entity names from the graph.
341
+
342
+ "AI" → ["ai", "machine-learning", "ai-agents", ...]. Caches model + entity
343
+ embeddings for fast repeated calls (<50ms after first load).
344
+ """
345
+ import time as _t
346
+ try:
347
+ from sentence_transformers import SentenceTransformer
348
+ import numpy as np
349
+ from triplestore import TripleStore
350
+
351
+ if not hasattr(_expand_keywords_semantic, "_model"):
352
+ _expand_keywords_semantic._model = SentenceTransformer("all-MiniLM-L6-v2")
353
+ model = _expand_keywords_semantic._model
354
+
355
+ # Cache entity names + embeddings (refresh every 5 min)
356
+ cache = _SEMANTIC_CACHE.get(db_path)
357
+ if not cache or _t.time() - cache["ts"] > 300:
358
+ store = TripleStore(db_path)
359
+ entity_names = [n for eid, n in store.entities_with_attr("name")
360
+ if eid.startswith("entity:") and len(n) >= 4]
361
+ store.close()
362
+ if not entity_names:
363
+ return keywords
364
+ entity_embs = model.encode(entity_names, show_progress_bar=False)
365
+ _SEMANTIC_CACHE[db_path] = {"names": entity_names, "embs": entity_embs, "ts": _t.time()}
366
+ cache = _SEMANTIC_CACHE[db_path]
367
+
368
+ entity_names = cache["names"]
369
+ entity_embs = cache["embs"]
370
+
371
+ kw_embs = model.encode(keywords, show_progress_bar=False)
372
+
373
+ expanded = list(keywords)
374
+ for i, kw in enumerate(keywords):
375
+ # Skip expansion for very short keywords — embeddings are unreliable
376
+ # for abbreviations like "ml", "ai" (use community detection instead)
377
+ if len(kw) < 4:
378
+ continue
379
+ sims = []
380
+ for j, name in enumerate(entity_names):
381
+ if name == kw or name in expanded:
382
+ continue
383
+ sim = float(np.dot(kw_embs[i], entity_embs[j]) /
384
+ (np.linalg.norm(kw_embs[i]) * np.linalg.norm(entity_embs[j]) + 1e-9))
385
+ if sim >= threshold:
386
+ sims.append((name, sim))
387
+ sims.sort(key=lambda x: -x[1])
388
+ expanded.extend(name for name, _ in sims[:max_expansions])
389
+
390
+ return expanded
391
+ except (ImportError, Exception):
392
+ return keywords
393
+
394
+
331
395
  def query_facts_hybrid(
332
396
  db_path: str,
333
397
  query: str,
@@ -342,15 +406,32 @@ def query_facts_hybrid(
342
406
  import time
343
407
  keywords = [w.lower() for w in re.findall(r"[a-zA-Z][a-zA-Z0-9-]+", query) if len(w) > 2]
344
408
 
345
- # Entity graph pre-filter: find facts linked to mentioned entities via backrefs.
346
- # Used to BOOST relevant facts in RRF, not as a separate tier (avoids dilution).
409
+ # Change 0: Semantic entity expansion "ML" ["ml", "machine-learning", "ai", ...]
410
+ expanded_keywords = keywords
411
+ if len(keywords) >= 1:
412
+ expanded_keywords = _expand_keywords_semantic(keywords, db_path)
413
+
414
+ # Entity graph pre-filter with per-entity tracking for intersection (Change A)
347
415
  graph_fact_ids: set[str] = set()
416
+ graph_intersection: set[str] = set()
348
417
  community_fact_ids: set[str] = set()
349
- for kw in keywords:
418
+ per_entity_facts: dict[str, set[str]] = {}
419
+ for kw in expanded_keywords:
420
+ kw_facts: set[str] = set()
350
421
  for f in query_facts_by_entity_graph(db_path, kw, max_facts=50):
351
422
  eid = f.get("entity_id", "")
352
423
  if eid:
424
+ kw_facts.add(eid)
353
425
  graph_fact_ids.add(eid)
426
+ if kw_facts:
427
+ per_entity_facts[kw] = kw_facts
428
+
429
+ # Compute intersection: facts linked to ALL original query keywords
430
+ if len(per_entity_facts) >= 2:
431
+ try:
432
+ graph_intersection = set.intersection(*per_entity_facts.values())
433
+ except TypeError:
434
+ pass
354
435
 
355
436
  # Community expansion: follow mentions edges to find related entities
356
437
  t0 = time.monotonic()
@@ -359,14 +440,14 @@ def query_facts_hybrid(
359
440
  store = TripleStore(db_path)
360
441
 
361
442
  matched_entities = set()
362
- for kw in keywords:
443
+ for kw in expanded_keywords:
363
444
  node_id = f"entity:{kw}"
364
445
  if store.entity(node_id):
365
446
  matched_entities.add(kw)
366
447
 
367
448
  for ent in matched_entities:
368
449
  if time.monotonic() - t0 > 0.5:
369
- break # timing guard
450
+ break
370
451
  community = expand_entity_community(store, ent, max_related=3)
371
452
  for related_name, _count in community:
372
453
  for f in query_facts_by_entity_graph(db_path, related_name, max_facts=20):
@@ -378,12 +459,50 @@ def query_facts_hybrid(
378
459
  except Exception:
379
460
  pass
380
461
 
381
- # Run three retrieval methods independently
462
+ # Run retrieval methods independently
382
463
  candidate_limit = max_facts * 3
383
- fts_results = query_facts_fts(db_path, query, max_facts=candidate_limit)
384
- tag_results = query_facts_by_entities(db_path, keywords, max_facts=candidate_limit) if keywords else []
464
+
465
+ # Change C: FTS5 AND mode for multi-keyword queries
466
+ if len(keywords) > 1:
467
+ fts_and_query = " AND ".join(keywords)
468
+ fts_results = query_facts_fts(db_path, fts_and_query, max_facts=candidate_limit)
469
+ if len(fts_results) < candidate_limit:
470
+ fts_or = query_facts_fts(db_path, " OR ".join(keywords), max_facts=candidate_limit)
471
+ fts_results.extend(fts_or)
472
+ else:
473
+ fts_results = query_facts_fts(db_path, query, max_facts=candidate_limit)
474
+
475
+ tag_results = query_facts_by_entities(db_path, expanded_keywords, max_facts=candidate_limit) if expanded_keywords else []
385
476
  top_results = query_top_facts(db_path, limit=candidate_limit)
386
477
 
478
+ # Change B: Tag intersection tier (facts tagged with ALL keywords)
479
+ intersection_results: list[dict] = []
480
+ if len(keywords) >= 2:
481
+ try:
482
+ from triplestore import TripleStore
483
+ _istore = TripleStore(db_path)
484
+ placeholders = ",".join("?" for _ in keywords)
485
+ rows = _istore._conn.execute(
486
+ f"""SELECT entity_id, COUNT(DISTINCT value) as matches
487
+ FROM triples WHERE attribute = 'tag' AND NOT retracted
488
+ AND value IN ({placeholders})
489
+ GROUP BY entity_id HAVING COUNT(DISTINCT value) >= ?
490
+ ORDER BY matches DESC LIMIT ?""",
491
+ (*keywords, len(keywords), candidate_limit),
492
+ ).fetchall()
493
+ for r in rows:
494
+ fid = r["entity_id"]
495
+ attrs = _istore.entity(fid)
496
+ if attrs and "value" in attrs:
497
+ fact = {"entity_id": fid}
498
+ for attr_name, values in attrs.items():
499
+ if attr_name != "tag":
500
+ fact[attr_name] = values[0] if len(values) == 1 else values
501
+ intersection_results.append(fact)
502
+ _istore.close()
503
+ except Exception:
504
+ pass
505
+
387
506
  # Build ranked lists by entity_id
388
507
  def _ranked_ids(facts: list[dict]) -> list[str]:
389
508
  seen = set()
@@ -398,41 +517,58 @@ def query_facts_hybrid(
398
517
  fts_ranked = _ranked_ids(fts_results)
399
518
  tag_ranked = _ranked_ids(tag_results)
400
519
  top_ranked = _ranked_ids(top_results)
520
+ intersection_ranked = _ranked_ids(intersection_results)
401
521
 
402
522
  # Reciprocal Rank Fusion: RRF(d) = Σ 1/(k + rank_i(d))
403
- K = 60 # standard RRF constant
523
+ K = 60
404
524
  rrf_scores: dict[str, float] = {}
405
- for ranked_list in [fts_ranked, tag_ranked, top_ranked]:
525
+ tiers = [fts_ranked, tag_ranked, top_ranked]
526
+ if intersection_ranked:
527
+ tiers.append(intersection_ranked)
528
+ for ranked_list in tiers:
406
529
  for rank, eid in enumerate(ranked_list):
407
530
  rrf_scores[eid] = rrf_scores.get(eid, 0.0) + 1.0 / (K + rank)
408
531
 
409
- # Co-occurrence boost: use FTS/tag results to find temporally related entities
410
- import time as _time
411
- _t_cooccur = _time.monotonic()
412
- query_matched_ids = {f.get("entity_id", "") for f in fts_results + tag_results if f.get("entity_id")}
413
- if query_matched_ids and _time.monotonic() - _t_cooccur < 0.3:
532
+ # Change D: Session co-occurrence for multi-entity queries
533
+ if len(keywords) >= 2 and time.monotonic() - t0 < 1.0:
414
534
  try:
415
535
  from triplestore import TripleStore
416
- _store = TripleStore(db_path)
417
- cooccur = _cooccurring_entities(_store, query_matched_ids, max_entities=5)
418
- for ent_name in cooccur:
419
- for f in query_facts_by_entity_graph(db_path, ent_name, max_facts=10):
420
- eid = f.get("entity_id", "")
421
- if eid and eid not in graph_fact_ids:
536
+ _sstore = TripleStore(db_path)
537
+ # Find sessions where facts about BOTH keywords exist
538
+ kw_a, kw_b = keywords[0], keywords[1]
539
+ sess_rows = _sstore._conn.execute(
540
+ """SELECT DISTINCT t1.value as ts FROM triples t1
541
+ JOIN triples t2 ON t2.attribute='first_seen' AND t2.value=t1.value AND t2.retracted=0
542
+ WHERE t1.attribute='first_seen' AND t1.retracted=0
543
+ AND t1.entity_id IN (SELECT entity_id FROM triples WHERE attribute='tag' AND value=? AND NOT retracted)
544
+ AND t2.entity_id IN (SELECT entity_id FROM triples WHERE attribute='tag' AND value=? AND NOT retracted)
545
+ LIMIT 10""",
546
+ (kw_a, kw_b),
547
+ ).fetchall()
548
+ if sess_rows:
549
+ ts_values = [r[0] for r in sess_rows]
550
+ ph = ",".join("?" for _ in ts_values)
551
+ fact_rows = _sstore._conn.execute(
552
+ f"SELECT DISTINCT entity_id FROM triples WHERE attribute='first_seen' AND value IN ({ph}) AND NOT retracted AND entity_id LIKE 'fact:%' LIMIT 30",
553
+ ts_values,
554
+ ).fetchall()
555
+ for r in fact_rows:
556
+ eid = r[0]
557
+ if eid not in graph_fact_ids:
422
558
  community_fact_ids.add(eid)
423
- _store.close()
559
+ _sstore.close()
424
560
  except Exception:
425
561
  pass
426
562
 
427
- # Graph boost: facts linked to mentioned entities via backrefs get priority
428
- # +0.05 is significant vs RRF scores of ~0.015-0.033 — ensures entity-linked facts
429
- # rank above FTS noise in large graphs (100K+ triples)
430
- if graph_fact_ids or community_fact_ids:
563
+ # Graph boost with intersection bonus (Change A continued)
564
+ if graph_fact_ids or community_fact_ids or graph_intersection:
431
565
  for eid in rrf_scores:
432
- if eid in graph_fact_ids:
566
+ if eid in graph_intersection:
567
+ rrf_scores[eid] += 0.10 # intersection: linked to ALL queried entities
568
+ elif eid in graph_fact_ids:
433
569
  rrf_scores[eid] += 0.05 # direct graph-linked facts
434
570
  elif eid in community_fact_ids:
435
- rrf_scores[eid] += 0.025 # community-expanded facts (half weight)
571
+ rrf_scores[eid] += 0.025 # community-expanded facts
436
572
 
437
573
  # Apply confidence decay as secondary signal (fresh facts rank above stale ones)
438
574
  from triplestore import decayed_confidence
@@ -462,11 +598,30 @@ def query_facts_hybrid(
462
598
  if eid and eid not in fact_map:
463
599
  fact_map[eid] = f
464
600
 
465
- # Return top RRF candidates. Embedding re-ranking is done by the caller
466
- # (sinain-core Node.js) to avoid deadlock — the Python subprocess can't call
467
- # back to sinain-core's /embed endpoint while sinain-core is blocked waiting
468
- # for the subprocess.
469
- results = [fact_map[eid] for eid in sorted_ids[:max_facts] if eid in fact_map]
601
+ # Return top RRF candidates, optionally re-ranked by embedding similarity.
602
+ # When called from sinain-core subprocess, embedding re-ranking happens in
603
+ # Node.js (to avoid deadlock). When called standalone (benchmark, CLI),
604
+ # we re-rank in-process if sentence-transformers is available.
605
+ rrf_candidates = [fact_map[eid] for eid in sorted_ids[:max_facts * 2] if eid in fact_map]
606
+
607
+ results = rrf_candidates[:max_facts]
608
+ try:
609
+ from sentence_transformers import SentenceTransformer
610
+ import numpy as np
611
+ if not hasattr(query_facts_hybrid, "_embed_model"):
612
+ query_facts_hybrid._embed_model = SentenceTransformer("all-MiniLM-L6-v2")
613
+ model = query_facts_hybrid._embed_model
614
+ texts = [query] + [f.get("value", "") for f in rrf_candidates]
615
+ embs = model.encode(texts, show_progress_bar=False)
616
+ q_emb = embs[0]
617
+ scored = []
618
+ for i, f in enumerate(rrf_candidates):
619
+ sim = float(np.dot(q_emb, embs[i + 1]) / (np.linalg.norm(q_emb) * np.linalg.norm(embs[i + 1]) + 1e-9))
620
+ scored.append((sim, f))
621
+ scored.sort(key=lambda x: -x[0])
622
+ results = [f for _, f in scored[:max_facts]]
623
+ except ImportError:
624
+ pass # sentence-transformers not installed — use RRF order
470
625
 
471
626
  # Expand top results with 1-hop graph neighbors
472
627
  if results and len(results) < max_facts: