kyp-mem 0.7.1 → 0.7.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/bin/cli.mjs CHANGED
@@ -3,8 +3,9 @@
3
3
  import { spawnSync } from "child_process";
4
4
  import { appendFileSync, mkdirSync } from "fs";
5
5
  import { homedir } from "os";
6
- import { delimiter, dirname, join, resolve } from "path";
6
+ import { delimiter, dirname, resolve, join } from "path";
7
7
  import { fileURLToPath } from "url";
8
+ import { ensureVenv, resolvePython, venvDir } from "./python-env.mjs";
8
9
 
9
10
  const args = process.argv.slice(2);
10
11
  const __dirname = dirname(fileURLToPath(import.meta.url));
@@ -21,45 +22,29 @@ function run(command, cmdArgs, stdio = "ignore") {
21
22
  return spawnSync(command, cmdArgs, { stdio, env });
22
23
  }
23
24
 
24
- function pythonCandidates() {
25
- if (process.env.KYP_MEM_PYTHON) {
26
- return [[process.env.KYP_MEM_PYTHON, []]];
27
- }
28
-
29
- const candidates = [
30
- ["python3", []],
31
- ["python", []],
32
- ];
33
-
34
- if (process.platform === "win32") {
35
- candidates.unshift(["py", ["-3"]]);
36
- }
37
-
38
- return candidates;
39
- }
40
-
41
- function findPython() {
42
- for (const [command, prefixArgs] of pythonCandidates()) {
43
- const result = run(command, [...prefixArgs, "--version"]);
44
- if (result.status === 0) {
45
- return [command, prefixArgs];
46
- }
47
- }
48
-
49
- return null;
50
- }
51
-
52
25
  // --- Hook fast path (pure Node, no Python startup) ---
53
26
  if (args[0] === "hook") {
54
27
  const hookType = args[1];
55
28
  const sessionDir = join(homedir(), ".kyp-mem", "sessions");
56
- const sessionFile = join(sessionDir, "current.jsonl");
57
29
 
58
30
  const chunks = [];
59
31
  process.stdin.on("data", (chunk) => chunks.push(chunk));
60
32
  await new Promise((r) => process.stdin.on("end", r));
61
33
  const raw = Buffer.concat(chunks).toString();
62
34
 
35
+ // Partition the activity log per Claude session id. Previously every session
36
+ // (across all projects) appended to one shared current.jsonl, so concurrent
37
+ // sessions interleaved and the Stop hook filed the whole batch under whichever
38
+ // project logged first — leaking foreign summaries into unrelated projects.
39
+ let sessionId = "";
40
+ try {
41
+ sessionId = ((JSON.parse(raw) || {}).session_id || "").toString();
42
+ } catch (_) {}
43
+ const safeId = sessionId.replace(/[^A-Za-z0-9_-]/g, "");
44
+ const sessionFile = safeId
45
+ ? join(sessionDir, `current-${safeId}.jsonl`)
46
+ : join(sessionDir, "current.jsonl");
47
+
63
48
  if (hookType === "user-prompt") {
64
49
  try {
65
50
  const data = JSON.parse(raw);
@@ -86,7 +71,7 @@ if (args[0] === "hook") {
86
71
  const input = data.tool_input || {};
87
72
  const rawResp = data.tool_response || "";
88
73
  const resp = (typeof rawResp === "string" ? rawResp : JSON.stringify(rawResp)).slice(0, 2000);
89
- const entry = { ts: new Date().toISOString(), tool, cwd: process.cwd() };
74
+ const entry = { ts: new Date().toISOString(), tool, cwd: process.env.CLAUDE_PROJECT_DIR || process.cwd() };
90
75
 
91
76
  if (tool === "Edit" || tool === "Write") {
92
77
  entry.file = input.file_path || "";
@@ -116,17 +101,19 @@ if (args[0] === "hook") {
116
101
  }
117
102
 
118
103
  if (hookType === "stop") {
119
- const py = findPython();
104
+ // Bootstrap is fine here — the Stop hook is not latency-critical.
105
+ const py = resolvePython();
120
106
  if (py) {
121
107
  const [cmd, pre] = py;
122
- const r = run(cmd, [...pre, "-m", "kyp_mem.hooks", "stop"], "inherit");
108
+ const r = run(cmd, [...pre, "-m", "kyp_mem.hooks", "stop", safeId], "inherit");
123
109
  process.exit(r.status ?? 0);
124
110
  }
125
111
  process.exit(0);
126
112
  }
127
113
 
128
114
  if (hookType === "session-start") {
129
- const py = findPython();
115
+ // Good place to self-heal the venv at the start of a session.
116
+ const py = resolvePython();
130
117
  if (py) {
131
118
  const [cmd, pre] = py;
132
119
  const r = run(cmd, [...pre, "-m", "kyp_mem.cli", "hook", "session-start"], "inherit");
@@ -139,7 +126,19 @@ if (args[0] === "hook") {
139
126
  process.exit(1);
140
127
  }
141
128
 
142
- const python = findPython();
129
+ // --- doctor: (re)provision the managed venv, then run the Python health check ---
130
+ // Done in Node so it self-heals even when the venv is too broken to run Python.
131
+ if (args[0] === "doctor") {
132
+ console.log(" Checking kyp-mem Python environment...");
133
+ if (!ensureVenv({ stdio: "inherit", force: true })) {
134
+ console.error(" \x1b[31m✗\x1b[0m Could not build the environment. Is Python 3.10+ installed?");
135
+ process.exit(1);
136
+ }
137
+ console.log(` \x1b[32m✓\x1b[0m Environment ready (${venvDir()})`);
138
+ // fall through to run `kyp_mem.cli doctor` for the full health report
139
+ }
140
+
141
+ const python = resolvePython();
143
142
 
144
143
  if (python) {
145
144
  const [command, prefixArgs] = python;
package/bin/install.mjs CHANGED
@@ -3,11 +3,8 @@
3
3
  import { spawnSync } from "child_process";
4
4
  import { mkdirSync } from "fs";
5
5
  import { homedir } from "os";
6
- import { fileURLToPath } from "url";
7
- import { dirname, join, resolve } from "path";
8
-
9
- const __dirname = dirname(fileURLToPath(import.meta.url));
10
- const root = resolve(__dirname, "..");
6
+ import { join } from "path";
7
+ import { ensureVenv, findSystemPython, resolvePython, venvDir } from "./python-env.mjs";
11
8
 
12
9
  const G = "\x1b[32m";
13
10
  const Y = "\x1b[33m";
@@ -17,74 +14,33 @@ const R = "\x1b[0m";
17
14
 
18
15
  function run(command, args, options = {}) {
19
16
  return spawnSync(command, args, {
20
- cwd: root,
21
17
  stdio: options.stdio ?? "ignore",
22
- env: {
23
- ...process.env,
24
- PIP_DISABLE_PIP_VERSION_CHECK: "1",
25
- },
18
+ env: { ...process.env, PIP_DISABLE_PIP_VERSION_CHECK: "1" },
26
19
  });
27
20
  }
28
21
 
29
- function pythonCandidates() {
30
- if (process.env.KYP_MEM_PYTHON) {
31
- return [[process.env.KYP_MEM_PYTHON, []]];
32
- }
33
-
34
- const candidates = [
35
- ["python3", []],
36
- ["python", []],
37
- ];
38
-
39
- if (process.platform === "win32") {
40
- candidates.unshift(["py", ["-3"]]);
41
- }
42
-
43
- return candidates;
44
- }
45
-
46
- function findPython() {
47
- for (const [command, prefixArgs] of pythonCandidates()) {
48
- const result = run(command, [...prefixArgs, "--version"]);
49
- if (result.status === 0) {
50
- return [command, prefixArgs];
51
- }
52
- }
53
-
54
- return null;
55
- }
56
-
57
22
  if (process.env.KYP_MEM_SKIP_PYTHON_INSTALL === "1") {
58
23
  process.exit(0);
59
24
  }
60
25
 
61
- const python = findPython();
62
-
63
- if (!python) {
26
+ if (!findSystemPython()) {
64
27
  console.log(` ${Y}!${R} Python 3 was not found.`);
65
- console.log(` ${Y}!${R} Install Python 3.10+ and run: python3 -m pip install --user .`);
28
+ console.log(` ${Y}!${R} Install Python 3.10+ and re-run: ${C}npm rebuild kyp-mem${R}`);
66
29
  process.exit(0);
67
30
  }
68
31
 
69
- const [pythonCommand, pythonPrefixArgs] = python;
32
+ // Step 1: Provision kyp-mem's own virtualenv with all dependencies.
33
+ // A dedicated venv works even when the system Python is externally managed
34
+ // (PEP 668), so users never have to create or manage one themselves.
35
+ console.log(` Setting up kyp-mem Python environment...`);
70
36
 
71
- // Step 1: Install Python package
72
- console.log(` Installing kyp-mem Python package...`);
73
-
74
- const pipResult = run(
75
- pythonCommand,
76
- [...pythonPrefixArgs, "-m", "pip", "install", "--user", "."],
77
- { stdio: "inherit" },
78
- );
79
-
80
- if (pipResult.status !== 0) {
81
- console.log(` ${Y}!${R} Could not auto-install the Python package.`);
82
- console.log(` ${Y}!${R} Run manually from ${root}:`);
83
- console.log(" python3 -m pip install --user .");
37
+ if (!ensureVenv({ stdio: "inherit", force: true })) {
38
+ console.log(` ${Y}!${R} Could not provision the Python environment automatically.`);
39
+ console.log(` ${Y}!${R} kyp-mem will retry on first run, or run it now: ${C}kyp-mem doctor${R}`);
84
40
  process.exit(0);
85
41
  }
86
42
 
87
- console.log(` ${G}✓${R} Python package installed`);
43
+ console.log(` ${G}✓${R} Python environment ready ${D}(${venvDir()})${R}`);
88
44
 
89
45
  // Step 2: Create default vault directory
90
46
  const vaultDir = join(homedir(), ".kyp-mem", "vault");
@@ -95,12 +51,12 @@ try {
95
51
  console.log(` ${Y}!${R} Could not create vault at ${vaultDir}`);
96
52
  }
97
53
 
54
+ const [py, pre] = resolvePython({ allowBootstrap: false });
55
+
98
56
  // Step 3: Register MCP server with Claude Code (global)
99
- const setupResult = run(
100
- pythonCommand,
101
- [...pythonPrefixArgs, "-m", "kyp_mem.cli", "setup-claude", "--global"],
102
- { stdio: "inherit" },
103
- );
57
+ const setupResult = run(py, [...pre, "-m", "kyp_mem.cli", "setup-claude", "--global"], {
58
+ stdio: "inherit",
59
+ });
104
60
 
105
61
  if (setupResult.status === 0) {
106
62
  console.log(` ${G}✓${R} MCP server registered with Claude Code`);
@@ -109,11 +65,9 @@ if (setupResult.status === 0) {
109
65
  }
110
66
 
111
67
  // Step 4: Install hooks (global)
112
- const hooksResult = run(
113
- pythonCommand,
114
- [...pythonPrefixArgs, "-m", "kyp_mem.cli", "install-hooks", "--global"],
115
- { stdio: "inherit" },
116
- );
68
+ const hooksResult = run(py, [...pre, "-m", "kyp_mem.cli", "install-hooks", "--global"], {
69
+ stdio: "inherit",
70
+ });
117
71
 
118
72
  if (hooksResult.status === 0) {
119
73
  console.log(` ${G}✓${R} Session capture hooks installed`);
@@ -0,0 +1,115 @@
1
+ // Shared Python environment management for kyp-mem.
2
+ //
3
+ // kyp-mem ships a Python backend, but modern system interpreters (Homebrew,
4
+ // recent Debian/Ubuntu) are "externally managed" (PEP 668), so installing
5
+ // dependencies into them with pip is blocked. Rather than ask every user to
6
+ // create and manage a virtualenv, kyp-mem owns one: it is created at install
7
+ // time, auto-detected at runtime, and lazily rebuilt if it ever goes missing.
8
+ // Users never have to create, activate, or even know about it.
9
+
10
+ import { spawnSync } from "child_process";
11
+ import { existsSync, mkdirSync, readFileSync, writeFileSync } from "fs";
12
+ import { homedir } from "os";
13
+ import { dirname, join, resolve } from "path";
14
+ import { fileURLToPath } from "url";
15
+
16
+ const __dirname = dirname(fileURLToPath(import.meta.url));
17
+ export const projectRoot = resolve(__dirname, "..");
18
+ const isWin = process.platform === "win32";
19
+
20
+ export function packageVersion() {
21
+ try {
22
+ const pkg = JSON.parse(readFileSync(join(projectRoot, "package.json"), "utf8"));
23
+ return pkg.version || "0";
24
+ } catch (_) {
25
+ return "0";
26
+ }
27
+ }
28
+
29
+ export function venvDir() {
30
+ return join(homedir(), ".kyp-mem", "venv");
31
+ }
32
+
33
+ export function venvPython() {
34
+ return isWin
35
+ ? join(venvDir(), "Scripts", "python.exe")
36
+ : join(venvDir(), "bin", "python");
37
+ }
38
+
39
+ // Records the package version the venv was last provisioned for, so a kyp-mem
40
+ // upgrade transparently reinstalls dependencies on next run.
41
+ function stampFile() {
42
+ return join(venvDir(), ".kyp-installed");
43
+ }
44
+
45
+ function run(command, cmdArgs, stdio = "ignore") {
46
+ return spawnSync(command, cmdArgs, {
47
+ stdio,
48
+ env: { ...process.env, PIP_DISABLE_PIP_VERSION_CHECK: "1" },
49
+ });
50
+ }
51
+
52
+ // Find a system Python to build the venv from (or to fall back to).
53
+ export function findSystemPython() {
54
+ const candidates = [];
55
+ if (process.env.KYP_MEM_PYTHON) candidates.push([process.env.KYP_MEM_PYTHON, []]);
56
+ if (isWin) candidates.push(["py", ["-3"]]);
57
+ candidates.push(["python3", []], ["python", []]);
58
+
59
+ for (const [command, prefixArgs] of candidates) {
60
+ const result = run(command, [...prefixArgs, "--version"]);
61
+ if (result.status === 0) return [command, prefixArgs];
62
+ }
63
+ return null;
64
+ }
65
+
66
+ // The venv exists and was provisioned for the current package version.
67
+ export function venvReady() {
68
+ if (!existsSync(venvPython())) return false;
69
+ try {
70
+ return readFileSync(stampFile(), "utf8").trim() === packageVersion();
71
+ } catch (_) {
72
+ return false;
73
+ }
74
+ }
75
+
76
+ // Create the venv (if needed) and install kyp-mem + its dependencies into it.
77
+ // Returns true on success. `stdio` controls pip/venv output; pass "inherit"
78
+ // during `npm install` so users see progress, "ignore" for runtime bootstrap.
79
+ export function ensureVenv({ stdio = "ignore", force = false } = {}) {
80
+ if (!force && venvReady()) return true;
81
+
82
+ const sys = findSystemPython();
83
+ if (!sys) return false;
84
+ const [cmd, pre] = sys;
85
+
86
+ if (!existsSync(venvPython())) {
87
+ mkdirSync(venvDir(), { recursive: true });
88
+ const created = run(cmd, [...pre, "-m", "venv", venvDir()], stdio);
89
+ if (created.status !== 0 || !existsSync(venvPython())) return false;
90
+ }
91
+
92
+ const py = venvPython();
93
+ run(py, ["-m", "pip", "install", "--upgrade", "pip"], stdio);
94
+ const installed = run(py, ["-m", "pip", "install", projectRoot], stdio);
95
+ if (installed.status !== 0) return false;
96
+
97
+ try {
98
+ writeFileSync(stampFile(), packageVersion());
99
+ } catch (_) {}
100
+ return true;
101
+ }
102
+
103
+ // Resolve the Python interpreter kyp-mem should run with.
104
+ // 1. KYP_MEM_PYTHON, if set, is an explicit override (power-user escape hatch).
105
+ // 2. The managed venv — bootstrapped on demand when `allowBootstrap`.
106
+ // 3. Whatever venv/system Python exists, even if not fully provisioned.
107
+ // `allowBootstrap: false` skips the (slow) install step for latency-sensitive
108
+ // callers so they never block on a pip run.
109
+ export function resolvePython({ allowBootstrap = true } = {}) {
110
+ if (process.env.KYP_MEM_PYTHON) return [process.env.KYP_MEM_PYTHON, []];
111
+ if (venvReady()) return [venvPython(), []];
112
+ if (allowBootstrap && ensureVenv()) return [venvPython(), []];
113
+ if (existsSync(venvPython())) return [venvPython(), []];
114
+ return findSystemPython();
115
+ }
package/kyp_mem/cli.py CHANGED
@@ -191,11 +191,29 @@ def _run_setup_claude(global_config: bool = False):
191
191
  print()
192
192
 
193
193
 
194
+ def _is_in_venv(bin_path: Path) -> bool:
195
+ import sys
196
+ if sys.prefix == sys.base_prefix:
197
+ return False
198
+ try:
199
+ bin_path.resolve().relative_to(Path(sys.prefix).resolve())
200
+ return True
201
+ except ValueError:
202
+ return False
203
+
204
+
194
205
  def _get_mcp_command() -> tuple[str, list[str]]:
195
206
  kyp_mem_bin = shutil.which("kyp-mem")
196
207
  npx_bin = shutil.which("npx")
197
208
 
198
209
  if kyp_mem_bin and "_npx" not in Path(kyp_mem_bin).parts:
210
+ bin_path = Path(kyp_mem_bin)
211
+ if _is_in_venv(bin_path):
212
+ print(f" {Y}Warning:{R} kyp-mem found inside a virtual env — this path won't work outside it.")
213
+ if npx_bin:
214
+ print(f" {D}Using npx for a stable path instead.{R}")
215
+ return npx_bin, ["-y", "kyp-mem", "serve"]
216
+ print(f" {D}Install globally: npm install -g kyp-mem{R}")
199
217
  return kyp_mem_bin, ["serve"]
200
218
  if npx_bin:
201
219
  return npx_bin, ["-y", "kyp-mem", "serve"]
@@ -326,7 +344,7 @@ def _run_uninstall(purge: bool = False):
326
344
  print()
327
345
 
328
346
 
329
-
347
+ def _run_install_hooks(global_config: bool = False, remove: bool = False):
330
348
  mcp_command, _ = _get_mcp_command()
331
349
 
332
350
  if global_config:
package/kyp_mem/hooks.py CHANGED
@@ -10,6 +10,37 @@ from pathlib import Path
10
10
  SESSION_DIR = Path.home() / ".kyp-mem" / "sessions"
11
11
  CURRENT_SESSION = SESSION_DIR / "current.jsonl"
12
12
 
13
+
14
+ def _session_file(session_id):
15
+ """Per-Claude-session activity log path.
16
+
17
+ Each Claude session gets its own ``current-<session_id>.jsonl`` so that
18
+ concurrent sessions in different projects never share one file. Falls back
19
+ to the legacy shared ``current.jsonl`` when no session id is available.
20
+ """
21
+ if session_id:
22
+ safe = "".join(c for c in str(session_id) if c.isalnum() or c in "_-")
23
+ if safe:
24
+ return SESSION_DIR / f"current-{safe}.jsonl"
25
+ return CURRENT_SESSION
26
+
27
+
28
+ def _prune_stale_logs(max_age_days=3):
29
+ """Remove orphaned activity logs left by sessions that never fired Stop
30
+ (crashes, kills) plus the legacy shared current.jsonl once it goes idle."""
31
+ import time
32
+
33
+ cutoff = time.time() - max_age_days * 86400
34
+ try:
35
+ for f in SESSION_DIR.glob("current*.jsonl"):
36
+ try:
37
+ if f.stat().st_mtime < cutoff:
38
+ f.unlink(missing_ok=True)
39
+ except OSError:
40
+ pass
41
+ except Exception:
42
+ pass
43
+
13
44
  MIN_ACTIONS = 5
14
45
  CHARS_PER_TOKEN = 4
15
46
 
@@ -123,20 +154,26 @@ def handle_session_start():
123
154
  cwd = os.environ.get("CLAUDE_PROJECT_DIR", os.getcwd())
124
155
  project_name = Path(cwd).name
125
156
 
157
+ _prune_stale_logs()
158
+
126
159
  try:
127
160
  from .config import get_vault_path
128
161
  from .vault import Vault
129
162
 
130
163
  vault = Vault(get_vault_path())
131
164
 
132
- project_notes = [p for p in vault.index.notes if p.startswith(f"{project_name}/")]
165
+ # Match case-insensitively: the project dir may be stored in the vault
166
+ # with different casing than the cwd basename (e.g. on case-insensitive
167
+ # filesystems "KYP-MEM" and "kyp-mem" are the same directory).
168
+ prefix = f"{project_name}/".lower()
169
+ project_notes = [p for p in vault.index.notes if p.lower().startswith(prefix)]
133
170
  if not project_notes:
134
171
  return
135
172
 
136
173
  sessions = sorted(
137
- (p for p in project_notes if "/Sessions/" in p),
174
+ (p for p in project_notes if "/sessions/" in p.lower()),
138
175
  reverse=True,
139
- )[:3]
176
+ )[:10]
140
177
  if not sessions:
141
178
  return
142
179
 
@@ -193,7 +230,7 @@ def handle_user_prompt():
193
230
  }
194
231
 
195
232
  SESSION_DIR.mkdir(parents=True, exist_ok=True)
196
- with open(CURRENT_SESSION, "a") as f:
233
+ with open(_session_file(data.get("session_id", "")), "a") as f:
197
234
  f.write(json.dumps(entry) + "\n")
198
235
 
199
236
 
@@ -255,7 +292,7 @@ def handle_post_tool_use():
255
292
  return
256
293
 
257
294
  SESSION_DIR.mkdir(parents=True, exist_ok=True)
258
- with open(CURRENT_SESSION, "a") as f:
295
+ with open(_session_file(data.get("session_id", "")), "a") as f:
259
296
  f.write(json.dumps(entry) + "\n")
260
297
 
261
298
 
@@ -513,13 +550,14 @@ Raw session data:
513
550
  return None
514
551
 
515
552
 
516
- def handle_stop():
517
- if _is_subprocess() or not CURRENT_SESSION.exists():
553
+ def handle_stop(session_id=""):
554
+ session_file = _session_file(session_id)
555
+ if _is_subprocess() or not session_file.exists():
518
556
  return
519
557
 
520
- text = CURRENT_SESSION.read_text().strip()
558
+ text = session_file.read_text().strip()
521
559
  if not text:
522
- CURRENT_SESSION.unlink(missing_ok=True)
560
+ session_file.unlink(missing_ok=True)
523
561
  return
524
562
 
525
563
  entries = []
@@ -530,12 +568,12 @@ def handle_stop():
530
568
  continue
531
569
 
532
570
  if not entries:
533
- CURRENT_SESSION.unlink(missing_ok=True)
571
+ session_file.unlink(missing_ok=True)
534
572
  return
535
573
 
536
574
  write_actions = [e for e in entries if e.get("action") in ("edit", "create", "command")]
537
575
  if len(write_actions) < MIN_ACTIONS:
538
- CURRENT_SESSION.unlink(missing_ok=True)
576
+ session_file.unlink(missing_ok=True)
539
577
  return
540
578
 
541
579
  project_dir = entries[0].get("cwd", "unknown")
@@ -691,8 +729,8 @@ def handle_stop():
691
729
  pass
692
730
 
693
731
  # Delete session file BEFORE summarization so the spawned claude subprocess
694
- # doesn't pollute it via hooks writing back into current.jsonl
695
- CURRENT_SESSION.unlink(missing_ok=True)
732
+ # doesn't pollute it via hooks writing back into the session log
733
+ session_file.unlink(missing_ok=True)
696
734
 
697
735
  # Try Claude summarization, fall back to raw sections
698
736
  summarized = _summarize_with_claude(raw_note, project_name)
@@ -765,7 +803,8 @@ def handle_stop():
765
803
 
766
804
  def main():
767
805
  if len(sys.argv) > 1 and sys.argv[1] == "stop":
768
- handle_stop()
806
+ session_id = sys.argv[2] if len(sys.argv) > 2 else ""
807
+ handle_stop(session_id)
769
808
  else:
770
809
  raw = sys.stdin.read().strip()
771
810
  if not raw:
@@ -775,7 +814,7 @@ def main():
775
814
  except json.JSONDecodeError:
776
815
  return
777
816
  if "stop_reason" in data:
778
- handle_stop()
817
+ handle_stop(data.get("session_id", ""))
779
818
 
780
819
 
781
820
  if __name__ == "__main__":
package/kyp_mem/vault.py CHANGED
@@ -233,10 +233,14 @@ class Vault:
233
233
 
234
234
  def _sync_vector_db(self):
235
235
  mem = get_session_memory()
236
- for path, note in self.index.notes.items():
237
- if "/Sessions/" in path or path.startswith("Sessions/"):
238
- folder = note.folder
239
- mem.upsert_session(path, folder, note.content)
236
+ if mem is None:
237
+ return
238
+ items = {
239
+ path: (note.folder, note.content)
240
+ for path, note in self.index.notes.items()
241
+ if "/Sessions/" in path or path.startswith("Sessions/")
242
+ }
243
+ mem.sync_sessions(items)
240
244
 
241
245
  def _load_all(self):
242
246
  notes = {}
package/kyp_mem/vector.py CHANGED
@@ -1,43 +1,212 @@
1
+ import sys
2
+ import shutil
3
+ import hashlib
1
4
  import chromadb
2
5
  from pathlib import Path
6
+ from contextlib import contextmanager
7
+
8
+
9
+ def _content_hash(content: str) -> str:
10
+ return hashlib.sha256(content.encode("utf-8")).hexdigest()
11
+
12
+ try:
13
+ import fcntl # POSIX (macOS/Linux)
14
+ except ImportError: # pragma: no cover - Windows fallback
15
+ fcntl = None
16
+
17
+
18
+ def _log(msg: str):
19
+ print(f"[kyp-mem vector] {msg}", file=sys.stderr)
20
+
21
+
22
+ def _clear_chroma_cache():
23
+ """Drop Chroma's process-wide PersistentClient cache so the next
24
+ PersistentClient(path=...) re-reads from disk instead of returning a stale
25
+ cached instance."""
26
+ try:
27
+ from chromadb.api.shared_system_client import SharedSystemClient
28
+ SharedSystemClient.clear_system_cache()
29
+ except Exception:
30
+ pass
31
+
3
32
 
4
33
  class SessionMemory:
34
+ """Semantic session store backed by ChromaDB.
35
+
36
+ Several processes touch the same on-disk Chroma directory at once: the web
37
+ UI, the MCP server, and the short-lived Claude Code hooks. ChromaDB's
38
+ PersistentClient is not built for concurrent multi-process writes, so
39
+ interleaved writes can corrupt the HNSW segment's pickle on disk. We guard
40
+ against that two ways:
41
+
42
+ 1. A cross-process file lock serializes writes (and isolates them from
43
+ reads) so concurrent processes don't clobber each other.
44
+ 2. If the index is already corrupt, we detect it and rebuild from the
45
+ markdown vault, which is the source of truth.
46
+ """
47
+
5
48
  def __init__(self, vault_path: str):
6
49
  self.db_path = Path(vault_path).parent / "chroma"
7
50
  self.db_path.mkdir(parents=True, exist_ok=True)
51
+ self._lock_path = self.db_path / ".kyp.lock"
52
+ self._open()
53
+ self._heal_if_corrupt()
54
+
55
+ # --- connection / recovery -------------------------------------------------
56
+
57
+ def _open(self):
8
58
  self.client = chromadb.PersistentClient(path=str(self.db_path))
9
59
  self.collection = self.client.get_or_create_collection(name="sessions")
10
60
 
61
+ def _rebuild(self):
62
+ """Drop the corrupted index and start fresh.
63
+
64
+ Safe because every session is re-embedded from its markdown note on the
65
+ next sync (see Vault._sync_vector_db).
66
+
67
+ We first try dropping the collection through Chroma's API (which orphans
68
+ the bad segment and creates a clean one). If that fails, we wipe the
69
+ directory on disk. Either way we must clear Chroma's process-wide client
70
+ cache: PersistentClient instances are cached by path, so without this a
71
+ re-created client would keep pointing at the deleted files (surfacing as
72
+ "attempt to write a readonly database")."""
73
+ _log("index appears corrupt — rebuilding chroma store from the vault")
74
+ try:
75
+ self.client.delete_collection(name="sessions")
76
+ self.collection = self.client.get_or_create_collection(name="sessions")
77
+ return
78
+ except Exception as e:
79
+ _log(f"in-place collection reset failed ({e!r}); wiping store on disk")
80
+
81
+ self.client = None
82
+ self.collection = None
83
+ _clear_chroma_cache()
84
+ shutil.rmtree(self.db_path, ignore_errors=True)
85
+ self.db_path.mkdir(parents=True, exist_ok=True)
86
+ self._open()
87
+
88
+ def _heal_if_corrupt(self):
89
+ """Force the write/compaction path that surfaces a corrupt segment.
90
+
91
+ Corruption only throws when Chroma deserializes the HNSW segment during
92
+ a write. We run a sentinel upsert+delete under the lock; if that raises,
93
+ we rebuild before any real sync runs, so the rebuilt store fills cleanly
94
+ in one pass."""
95
+ sentinel = "__kyp_healthcheck__"
96
+ try:
97
+ with self._locked(write=True):
98
+ # upsert+delete exercises the write/compaction path; the query
99
+ # forces the HNSW segment to load (the read path). Between them
100
+ # they surface both ways a corrupt segment manifests.
101
+ self.collection.upsert(documents=["ok"], ids=[sentinel])
102
+ self.collection.query(query_texts=["ok"], n_results=1)
103
+ self.collection.delete(ids=[sentinel])
104
+ except Exception as e:
105
+ _log(f"health check failed: {e!r}")
106
+ with self._locked(write=True):
107
+ self._rebuild()
108
+
109
+ # --- locking ---------------------------------------------------------------
110
+
111
+ @contextmanager
112
+ def _locked(self, write: bool):
113
+ if fcntl is None:
114
+ yield
115
+ return
116
+ mode = fcntl.LOCK_EX if write else fcntl.LOCK_SH
117
+ with open(self._lock_path, "a+") as lf:
118
+ fcntl.flock(lf, mode)
119
+ try:
120
+ yield
121
+ finally:
122
+ fcntl.flock(lf, fcntl.LOCK_UN)
123
+
124
+ # --- operations ------------------------------------------------------------
125
+
126
+ def _write_with_recovery(self, op):
127
+ """Run a write op under the lock. If it fails (e.g. a corrupt segment
128
+ slipped past the init health check), rebuild the store once and retry.
129
+
130
+ ``op`` must be self-contained — after a rebuild the collection is empty,
131
+ so an op that derives its work from the current collection state
132
+ naturally repopulates everything on the retry."""
133
+ for attempt in (1, 2):
134
+ try:
135
+ with self._locked(write=True):
136
+ op()
137
+ return
138
+ except Exception as e:
139
+ if attempt == 1:
140
+ _log(f"write failed ({e!r}); rebuilding and retrying")
141
+ with self._locked(write=True):
142
+ self._rebuild()
143
+ else:
144
+ _log(f"write failed after rebuild: {e!r}")
145
+
11
146
  def upsert_session(self, path: str, project: str, content: str):
12
- self.collection.upsert(
13
- documents=[content],
14
- metadatas=[{"project": project}],
15
- ids=[path]
16
- )
147
+ meta = {"project": project, "hash": _content_hash(content)}
148
+
149
+ def op():
150
+ self.collection.upsert(documents=[content], metadatas=[meta], ids=[path])
151
+
152
+ self._write_with_recovery(op)
153
+
154
+ def sync_sessions(self, items: dict):
155
+ """Reconcile the store with the current set of session notes.
156
+
157
+ ``items`` maps note path -> (project, content). New and changed notes
158
+ (by content hash) are (re)embedded, deleted notes are pruned, and
159
+ unchanged notes are skipped so we don't re-embed the whole corpus on
160
+ every Vault init/refresh."""
161
+ desired = {p: (proj, c, _content_hash(c)) for p, (proj, c) in items.items()}
162
+
163
+ def op():
164
+ existing = self.collection.get(include=["metadatas"])
165
+ existing_hash = {
166
+ i: (m or {}).get("hash")
167
+ for i, m in zip(existing["ids"], existing["metadatas"])
168
+ }
169
+ up_ids, up_docs, up_meta = [], [], []
170
+ for p, (proj, c, h) in desired.items():
171
+ if existing_hash.get(p) != h:
172
+ up_ids.append(p)
173
+ up_docs.append(c)
174
+ up_meta.append({"project": proj, "hash": h})
175
+ stale = [i for i in existing_hash if i not in desired]
176
+ if up_ids:
177
+ self.collection.upsert(documents=up_docs, metadatas=up_meta, ids=up_ids)
178
+ if stale:
179
+ self.collection.delete(ids=stale)
180
+
181
+ self._write_with_recovery(op)
17
182
 
18
183
  def delete_session(self, path: str):
19
184
  try:
20
- self.collection.delete(ids=[path])
185
+ with self._locked(write=True):
186
+ self.collection.delete(ids=[path])
21
187
  except Exception:
22
188
  pass
23
189
 
24
190
  def search_sessions(self, query: str, project: str = None, n_results: int = 5):
25
191
  where = {"project": project} if project else None
26
192
  try:
27
- results = self.collection.query(
28
- query_texts=[query],
29
- n_results=n_results,
30
- where=where
31
- )
32
- return results
193
+ with self._locked(write=False):
194
+ return self.collection.query(
195
+ query_texts=[query],
196
+ n_results=n_results,
197
+ where=where,
198
+ )
33
199
  except Exception:
34
200
  return {"ids": [], "documents": [], "metadatas": [], "distances": []}
35
201
 
202
+
36
203
  session_memory = None
37
204
 
205
+
38
206
  def init_vector_db(vault_path: str):
39
207
  global session_memory
40
208
  session_memory = SessionMemory(vault_path)
41
209
 
210
+
42
211
  def get_session_memory():
43
212
  return session_memory
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "kyp-mem",
3
- "version": "0.7.1",
3
+ "version": "0.7.4",
4
4
  "description": "Know Your Project — Persistent & Session level knowledge base for AI agents. MCP-powered with wikilinks, backlinks, auto-learning, and neon web UI.",
5
5
  "bin": {
6
6
  "kyp-mem": "bin/cli.mjs"
package/pyproject.toml CHANGED
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "kyp-mem"
7
- version = "0.7.1"
7
+ version = "0.7.4"
8
8
  description = "Know Your Project — Persistent knowledge base for AI agents. MCP-powered with wikilinks, backlinks, auto-learning, and neon web UI."
9
9
  readme = "README.md"
10
10
  license = {text = "MIT"}