kyp-mem 0.7.1 → 0.7.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/cli.mjs +19 -32
- package/bin/install.mjs +21 -67
- package/bin/python-env.mjs +115 -0
- package/kyp_mem/cli.py +19 -1
- package/kyp_mem/vault.py +8 -4
- package/kyp_mem/vector.py +181 -12
- package/package.json +1 -1
- package/pyproject.toml +1 -1
package/bin/cli.mjs
CHANGED
|
@@ -3,8 +3,9 @@
|
|
|
3
3
|
import { spawnSync } from "child_process";
|
|
4
4
|
import { appendFileSync, mkdirSync } from "fs";
|
|
5
5
|
import { homedir } from "os";
|
|
6
|
-
import { delimiter, dirname,
|
|
6
|
+
import { delimiter, dirname, resolve, join } from "path";
|
|
7
7
|
import { fileURLToPath } from "url";
|
|
8
|
+
import { ensureVenv, resolvePython, venvDir } from "./python-env.mjs";
|
|
8
9
|
|
|
9
10
|
const args = process.argv.slice(2);
|
|
10
11
|
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
@@ -21,34 +22,6 @@ function run(command, cmdArgs, stdio = "ignore") {
|
|
|
21
22
|
return spawnSync(command, cmdArgs, { stdio, env });
|
|
22
23
|
}
|
|
23
24
|
|
|
24
|
-
function pythonCandidates() {
|
|
25
|
-
if (process.env.KYP_MEM_PYTHON) {
|
|
26
|
-
return [[process.env.KYP_MEM_PYTHON, []]];
|
|
27
|
-
}
|
|
28
|
-
|
|
29
|
-
const candidates = [
|
|
30
|
-
["python3", []],
|
|
31
|
-
["python", []],
|
|
32
|
-
];
|
|
33
|
-
|
|
34
|
-
if (process.platform === "win32") {
|
|
35
|
-
candidates.unshift(["py", ["-3"]]);
|
|
36
|
-
}
|
|
37
|
-
|
|
38
|
-
return candidates;
|
|
39
|
-
}
|
|
40
|
-
|
|
41
|
-
function findPython() {
|
|
42
|
-
for (const [command, prefixArgs] of pythonCandidates()) {
|
|
43
|
-
const result = run(command, [...prefixArgs, "--version"]);
|
|
44
|
-
if (result.status === 0) {
|
|
45
|
-
return [command, prefixArgs];
|
|
46
|
-
}
|
|
47
|
-
}
|
|
48
|
-
|
|
49
|
-
return null;
|
|
50
|
-
}
|
|
51
|
-
|
|
52
25
|
// --- Hook fast path (pure Node, no Python startup) ---
|
|
53
26
|
if (args[0] === "hook") {
|
|
54
27
|
const hookType = args[1];
|
|
@@ -116,7 +89,8 @@ if (args[0] === "hook") {
|
|
|
116
89
|
}
|
|
117
90
|
|
|
118
91
|
if (hookType === "stop") {
|
|
119
|
-
|
|
92
|
+
// Bootstrap is fine here — the Stop hook is not latency-critical.
|
|
93
|
+
const py = resolvePython();
|
|
120
94
|
if (py) {
|
|
121
95
|
const [cmd, pre] = py;
|
|
122
96
|
const r = run(cmd, [...pre, "-m", "kyp_mem.hooks", "stop"], "inherit");
|
|
@@ -126,7 +100,8 @@ if (args[0] === "hook") {
|
|
|
126
100
|
}
|
|
127
101
|
|
|
128
102
|
if (hookType === "session-start") {
|
|
129
|
-
|
|
103
|
+
// Good place to self-heal the venv at the start of a session.
|
|
104
|
+
const py = resolvePython();
|
|
130
105
|
if (py) {
|
|
131
106
|
const [cmd, pre] = py;
|
|
132
107
|
const r = run(cmd, [...pre, "-m", "kyp_mem.cli", "hook", "session-start"], "inherit");
|
|
@@ -139,7 +114,19 @@ if (args[0] === "hook") {
|
|
|
139
114
|
process.exit(1);
|
|
140
115
|
}
|
|
141
116
|
|
|
142
|
-
|
|
117
|
+
// --- doctor: (re)provision the managed venv, then run the Python health check ---
|
|
118
|
+
// Done in Node so it self-heals even when the venv is too broken to run Python.
|
|
119
|
+
if (args[0] === "doctor") {
|
|
120
|
+
console.log(" Checking kyp-mem Python environment...");
|
|
121
|
+
if (!ensureVenv({ stdio: "inherit", force: true })) {
|
|
122
|
+
console.error(" \x1b[31m✗\x1b[0m Could not build the environment. Is Python 3.10+ installed?");
|
|
123
|
+
process.exit(1);
|
|
124
|
+
}
|
|
125
|
+
console.log(` \x1b[32m✓\x1b[0m Environment ready (${venvDir()})`);
|
|
126
|
+
// fall through to run `kyp_mem.cli doctor` for the full health report
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
const python = resolvePython();
|
|
143
130
|
|
|
144
131
|
if (python) {
|
|
145
132
|
const [command, prefixArgs] = python;
|
package/bin/install.mjs
CHANGED
|
@@ -3,11 +3,8 @@
|
|
|
3
3
|
import { spawnSync } from "child_process";
|
|
4
4
|
import { mkdirSync } from "fs";
|
|
5
5
|
import { homedir } from "os";
|
|
6
|
-
import {
|
|
7
|
-
import {
|
|
8
|
-
|
|
9
|
-
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
10
|
-
const root = resolve(__dirname, "..");
|
|
6
|
+
import { join } from "path";
|
|
7
|
+
import { ensureVenv, findSystemPython, resolvePython, venvDir } from "./python-env.mjs";
|
|
11
8
|
|
|
12
9
|
const G = "\x1b[32m";
|
|
13
10
|
const Y = "\x1b[33m";
|
|
@@ -17,74 +14,33 @@ const R = "\x1b[0m";
|
|
|
17
14
|
|
|
18
15
|
function run(command, args, options = {}) {
|
|
19
16
|
return spawnSync(command, args, {
|
|
20
|
-
cwd: root,
|
|
21
17
|
stdio: options.stdio ?? "ignore",
|
|
22
|
-
env: {
|
|
23
|
-
...process.env,
|
|
24
|
-
PIP_DISABLE_PIP_VERSION_CHECK: "1",
|
|
25
|
-
},
|
|
18
|
+
env: { ...process.env, PIP_DISABLE_PIP_VERSION_CHECK: "1" },
|
|
26
19
|
});
|
|
27
20
|
}
|
|
28
21
|
|
|
29
|
-
function pythonCandidates() {
|
|
30
|
-
if (process.env.KYP_MEM_PYTHON) {
|
|
31
|
-
return [[process.env.KYP_MEM_PYTHON, []]];
|
|
32
|
-
}
|
|
33
|
-
|
|
34
|
-
const candidates = [
|
|
35
|
-
["python3", []],
|
|
36
|
-
["python", []],
|
|
37
|
-
];
|
|
38
|
-
|
|
39
|
-
if (process.platform === "win32") {
|
|
40
|
-
candidates.unshift(["py", ["-3"]]);
|
|
41
|
-
}
|
|
42
|
-
|
|
43
|
-
return candidates;
|
|
44
|
-
}
|
|
45
|
-
|
|
46
|
-
function findPython() {
|
|
47
|
-
for (const [command, prefixArgs] of pythonCandidates()) {
|
|
48
|
-
const result = run(command, [...prefixArgs, "--version"]);
|
|
49
|
-
if (result.status === 0) {
|
|
50
|
-
return [command, prefixArgs];
|
|
51
|
-
}
|
|
52
|
-
}
|
|
53
|
-
|
|
54
|
-
return null;
|
|
55
|
-
}
|
|
56
|
-
|
|
57
22
|
if (process.env.KYP_MEM_SKIP_PYTHON_INSTALL === "1") {
|
|
58
23
|
process.exit(0);
|
|
59
24
|
}
|
|
60
25
|
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
if (!python) {
|
|
26
|
+
if (!findSystemPython()) {
|
|
64
27
|
console.log(` ${Y}!${R} Python 3 was not found.`);
|
|
65
|
-
console.log(` ${Y}!${R} Install Python 3.10+ and run:
|
|
28
|
+
console.log(` ${Y}!${R} Install Python 3.10+ and re-run: ${C}npm rebuild kyp-mem${R}`);
|
|
66
29
|
process.exit(0);
|
|
67
30
|
}
|
|
68
31
|
|
|
69
|
-
|
|
32
|
+
// Step 1: Provision kyp-mem's own virtualenv with all dependencies.
|
|
33
|
+
// A dedicated venv works even when the system Python is externally managed
|
|
34
|
+
// (PEP 668), so users never have to create or manage one themselves.
|
|
35
|
+
console.log(` Setting up kyp-mem Python environment...`);
|
|
70
36
|
|
|
71
|
-
|
|
72
|
-
console.log(`
|
|
73
|
-
|
|
74
|
-
const pipResult = run(
|
|
75
|
-
pythonCommand,
|
|
76
|
-
[...pythonPrefixArgs, "-m", "pip", "install", "--user", "."],
|
|
77
|
-
{ stdio: "inherit" },
|
|
78
|
-
);
|
|
79
|
-
|
|
80
|
-
if (pipResult.status !== 0) {
|
|
81
|
-
console.log(` ${Y}!${R} Could not auto-install the Python package.`);
|
|
82
|
-
console.log(` ${Y}!${R} Run manually from ${root}:`);
|
|
83
|
-
console.log(" python3 -m pip install --user .");
|
|
37
|
+
if (!ensureVenv({ stdio: "inherit", force: true })) {
|
|
38
|
+
console.log(` ${Y}!${R} Could not provision the Python environment automatically.`);
|
|
39
|
+
console.log(` ${Y}!${R} kyp-mem will retry on first run, or run it now: ${C}kyp-mem doctor${R}`);
|
|
84
40
|
process.exit(0);
|
|
85
41
|
}
|
|
86
42
|
|
|
87
|
-
console.log(` ${G}✓${R} Python
|
|
43
|
+
console.log(` ${G}✓${R} Python environment ready ${D}(${venvDir()})${R}`);
|
|
88
44
|
|
|
89
45
|
// Step 2: Create default vault directory
|
|
90
46
|
const vaultDir = join(homedir(), ".kyp-mem", "vault");
|
|
@@ -95,12 +51,12 @@ try {
|
|
|
95
51
|
console.log(` ${Y}!${R} Could not create vault at ${vaultDir}`);
|
|
96
52
|
}
|
|
97
53
|
|
|
54
|
+
const [py, pre] = resolvePython({ allowBootstrap: false });
|
|
55
|
+
|
|
98
56
|
// Step 3: Register MCP server with Claude Code (global)
|
|
99
|
-
const setupResult = run(
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
{ stdio: "inherit" },
|
|
103
|
-
);
|
|
57
|
+
const setupResult = run(py, [...pre, "-m", "kyp_mem.cli", "setup-claude", "--global"], {
|
|
58
|
+
stdio: "inherit",
|
|
59
|
+
});
|
|
104
60
|
|
|
105
61
|
if (setupResult.status === 0) {
|
|
106
62
|
console.log(` ${G}✓${R} MCP server registered with Claude Code`);
|
|
@@ -109,11 +65,9 @@ if (setupResult.status === 0) {
|
|
|
109
65
|
}
|
|
110
66
|
|
|
111
67
|
// Step 4: Install hooks (global)
|
|
112
|
-
const hooksResult = run(
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
{ stdio: "inherit" },
|
|
116
|
-
);
|
|
68
|
+
const hooksResult = run(py, [...pre, "-m", "kyp_mem.cli", "install-hooks", "--global"], {
|
|
69
|
+
stdio: "inherit",
|
|
70
|
+
});
|
|
117
71
|
|
|
118
72
|
if (hooksResult.status === 0) {
|
|
119
73
|
console.log(` ${G}✓${R} Session capture hooks installed`);
|
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
// Shared Python environment management for kyp-mem.
|
|
2
|
+
//
|
|
3
|
+
// kyp-mem ships a Python backend, but modern system interpreters (Homebrew,
|
|
4
|
+
// recent Debian/Ubuntu) are "externally managed" (PEP 668), so installing
|
|
5
|
+
// dependencies into them with pip is blocked. Rather than ask every user to
|
|
6
|
+
// create and manage a virtualenv, kyp-mem owns one: it is created at install
|
|
7
|
+
// time, auto-detected at runtime, and lazily rebuilt if it ever goes missing.
|
|
8
|
+
// Users never have to create, activate, or even know about it.
|
|
9
|
+
|
|
10
|
+
import { spawnSync } from "child_process";
|
|
11
|
+
import { existsSync, mkdirSync, readFileSync, writeFileSync } from "fs";
|
|
12
|
+
import { homedir } from "os";
|
|
13
|
+
import { dirname, join, resolve } from "path";
|
|
14
|
+
import { fileURLToPath } from "url";
|
|
15
|
+
|
|
16
|
+
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
17
|
+
export const projectRoot = resolve(__dirname, "..");
|
|
18
|
+
const isWin = process.platform === "win32";
|
|
19
|
+
|
|
20
|
+
export function packageVersion() {
|
|
21
|
+
try {
|
|
22
|
+
const pkg = JSON.parse(readFileSync(join(projectRoot, "package.json"), "utf8"));
|
|
23
|
+
return pkg.version || "0";
|
|
24
|
+
} catch (_) {
|
|
25
|
+
return "0";
|
|
26
|
+
}
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
export function venvDir() {
|
|
30
|
+
return join(homedir(), ".kyp-mem", "venv");
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
export function venvPython() {
|
|
34
|
+
return isWin
|
|
35
|
+
? join(venvDir(), "Scripts", "python.exe")
|
|
36
|
+
: join(venvDir(), "bin", "python");
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
// Records the package version the venv was last provisioned for, so a kyp-mem
|
|
40
|
+
// upgrade transparently reinstalls dependencies on next run.
|
|
41
|
+
function stampFile() {
|
|
42
|
+
return join(venvDir(), ".kyp-installed");
|
|
43
|
+
}
|
|
44
|
+
|
|
45
|
+
function run(command, cmdArgs, stdio = "ignore") {
|
|
46
|
+
return spawnSync(command, cmdArgs, {
|
|
47
|
+
stdio,
|
|
48
|
+
env: { ...process.env, PIP_DISABLE_PIP_VERSION_CHECK: "1" },
|
|
49
|
+
});
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
// Find a system Python to build the venv from (or to fall back to).
|
|
53
|
+
export function findSystemPython() {
|
|
54
|
+
const candidates = [];
|
|
55
|
+
if (process.env.KYP_MEM_PYTHON) candidates.push([process.env.KYP_MEM_PYTHON, []]);
|
|
56
|
+
if (isWin) candidates.push(["py", ["-3"]]);
|
|
57
|
+
candidates.push(["python3", []], ["python", []]);
|
|
58
|
+
|
|
59
|
+
for (const [command, prefixArgs] of candidates) {
|
|
60
|
+
const result = run(command, [...prefixArgs, "--version"]);
|
|
61
|
+
if (result.status === 0) return [command, prefixArgs];
|
|
62
|
+
}
|
|
63
|
+
return null;
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
// The venv exists and was provisioned for the current package version.
|
|
67
|
+
export function venvReady() {
|
|
68
|
+
if (!existsSync(venvPython())) return false;
|
|
69
|
+
try {
|
|
70
|
+
return readFileSync(stampFile(), "utf8").trim() === packageVersion();
|
|
71
|
+
} catch (_) {
|
|
72
|
+
return false;
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
// Create the venv (if needed) and install kyp-mem + its dependencies into it.
|
|
77
|
+
// Returns true on success. `stdio` controls pip/venv output; pass "inherit"
|
|
78
|
+
// during `npm install` so users see progress, "ignore" for runtime bootstrap.
|
|
79
|
+
export function ensureVenv({ stdio = "ignore", force = false } = {}) {
|
|
80
|
+
if (!force && venvReady()) return true;
|
|
81
|
+
|
|
82
|
+
const sys = findSystemPython();
|
|
83
|
+
if (!sys) return false;
|
|
84
|
+
const [cmd, pre] = sys;
|
|
85
|
+
|
|
86
|
+
if (!existsSync(venvPython())) {
|
|
87
|
+
mkdirSync(venvDir(), { recursive: true });
|
|
88
|
+
const created = run(cmd, [...pre, "-m", "venv", venvDir()], stdio);
|
|
89
|
+
if (created.status !== 0 || !existsSync(venvPython())) return false;
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
const py = venvPython();
|
|
93
|
+
run(py, ["-m", "pip", "install", "--upgrade", "pip"], stdio);
|
|
94
|
+
const installed = run(py, ["-m", "pip", "install", projectRoot], stdio);
|
|
95
|
+
if (installed.status !== 0) return false;
|
|
96
|
+
|
|
97
|
+
try {
|
|
98
|
+
writeFileSync(stampFile(), packageVersion());
|
|
99
|
+
} catch (_) {}
|
|
100
|
+
return true;
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
// Resolve the Python interpreter kyp-mem should run with.
|
|
104
|
+
// 1. KYP_MEM_PYTHON, if set, is an explicit override (power-user escape hatch).
|
|
105
|
+
// 2. The managed venv — bootstrapped on demand when `allowBootstrap`.
|
|
106
|
+
// 3. Whatever venv/system Python exists, even if not fully provisioned.
|
|
107
|
+
// `allowBootstrap: false` skips the (slow) install step for latency-sensitive
|
|
108
|
+
// callers so they never block on a pip run.
|
|
109
|
+
export function resolvePython({ allowBootstrap = true } = {}) {
|
|
110
|
+
if (process.env.KYP_MEM_PYTHON) return [process.env.KYP_MEM_PYTHON, []];
|
|
111
|
+
if (venvReady()) return [venvPython(), []];
|
|
112
|
+
if (allowBootstrap && ensureVenv()) return [venvPython(), []];
|
|
113
|
+
if (existsSync(venvPython())) return [venvPython(), []];
|
|
114
|
+
return findSystemPython();
|
|
115
|
+
}
|
package/kyp_mem/cli.py
CHANGED
|
@@ -191,11 +191,29 @@ def _run_setup_claude(global_config: bool = False):
|
|
|
191
191
|
print()
|
|
192
192
|
|
|
193
193
|
|
|
194
|
+
def _is_in_venv(bin_path: Path) -> bool:
|
|
195
|
+
import sys
|
|
196
|
+
if sys.prefix == sys.base_prefix:
|
|
197
|
+
return False
|
|
198
|
+
try:
|
|
199
|
+
bin_path.resolve().relative_to(Path(sys.prefix).resolve())
|
|
200
|
+
return True
|
|
201
|
+
except ValueError:
|
|
202
|
+
return False
|
|
203
|
+
|
|
204
|
+
|
|
194
205
|
def _get_mcp_command() -> tuple[str, list[str]]:
|
|
195
206
|
kyp_mem_bin = shutil.which("kyp-mem")
|
|
196
207
|
npx_bin = shutil.which("npx")
|
|
197
208
|
|
|
198
209
|
if kyp_mem_bin and "_npx" not in Path(kyp_mem_bin).parts:
|
|
210
|
+
bin_path = Path(kyp_mem_bin)
|
|
211
|
+
if _is_in_venv(bin_path):
|
|
212
|
+
print(f" {Y}Warning:{R} kyp-mem found inside a virtual env — this path won't work outside it.")
|
|
213
|
+
if npx_bin:
|
|
214
|
+
print(f" {D}Using npx for a stable path instead.{R}")
|
|
215
|
+
return npx_bin, ["-y", "kyp-mem", "serve"]
|
|
216
|
+
print(f" {D}Install globally: npm install -g kyp-mem{R}")
|
|
199
217
|
return kyp_mem_bin, ["serve"]
|
|
200
218
|
if npx_bin:
|
|
201
219
|
return npx_bin, ["-y", "kyp-mem", "serve"]
|
|
@@ -326,7 +344,7 @@ def _run_uninstall(purge: bool = False):
|
|
|
326
344
|
print()
|
|
327
345
|
|
|
328
346
|
|
|
329
|
-
|
|
347
|
+
def _run_install_hooks(global_config: bool = False, remove: bool = False):
|
|
330
348
|
mcp_command, _ = _get_mcp_command()
|
|
331
349
|
|
|
332
350
|
if global_config:
|
package/kyp_mem/vault.py
CHANGED
|
@@ -233,10 +233,14 @@ class Vault:
|
|
|
233
233
|
|
|
234
234
|
def _sync_vector_db(self):
|
|
235
235
|
mem = get_session_memory()
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
236
|
+
if mem is None:
|
|
237
|
+
return
|
|
238
|
+
items = {
|
|
239
|
+
path: (note.folder, note.content)
|
|
240
|
+
for path, note in self.index.notes.items()
|
|
241
|
+
if "/Sessions/" in path or path.startswith("Sessions/")
|
|
242
|
+
}
|
|
243
|
+
mem.sync_sessions(items)
|
|
240
244
|
|
|
241
245
|
def _load_all(self):
|
|
242
246
|
notes = {}
|
package/kyp_mem/vector.py
CHANGED
|
@@ -1,43 +1,212 @@
|
|
|
1
|
+
import sys
|
|
2
|
+
import shutil
|
|
3
|
+
import hashlib
|
|
1
4
|
import chromadb
|
|
2
5
|
from pathlib import Path
|
|
6
|
+
from contextlib import contextmanager
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def _content_hash(content: str) -> str:
|
|
10
|
+
return hashlib.sha256(content.encode("utf-8")).hexdigest()
|
|
11
|
+
|
|
12
|
+
try:
|
|
13
|
+
import fcntl # POSIX (macOS/Linux)
|
|
14
|
+
except ImportError: # pragma: no cover - Windows fallback
|
|
15
|
+
fcntl = None
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def _log(msg: str):
|
|
19
|
+
print(f"[kyp-mem vector] {msg}", file=sys.stderr)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def _clear_chroma_cache():
|
|
23
|
+
"""Drop Chroma's process-wide PersistentClient cache so the next
|
|
24
|
+
PersistentClient(path=...) re-reads from disk instead of returning a stale
|
|
25
|
+
cached instance."""
|
|
26
|
+
try:
|
|
27
|
+
from chromadb.api.shared_system_client import SharedSystemClient
|
|
28
|
+
SharedSystemClient.clear_system_cache()
|
|
29
|
+
except Exception:
|
|
30
|
+
pass
|
|
31
|
+
|
|
3
32
|
|
|
4
33
|
class SessionMemory:
|
|
34
|
+
"""Semantic session store backed by ChromaDB.
|
|
35
|
+
|
|
36
|
+
Several processes touch the same on-disk Chroma directory at once: the web
|
|
37
|
+
UI, the MCP server, and the short-lived Claude Code hooks. ChromaDB's
|
|
38
|
+
PersistentClient is not built for concurrent multi-process writes, so
|
|
39
|
+
interleaved writes can corrupt the HNSW segment's pickle on disk. We guard
|
|
40
|
+
against that two ways:
|
|
41
|
+
|
|
42
|
+
1. A cross-process file lock serializes writes (and isolates them from
|
|
43
|
+
reads) so concurrent processes don't clobber each other.
|
|
44
|
+
2. If the index is already corrupt, we detect it and rebuild from the
|
|
45
|
+
markdown vault, which is the source of truth.
|
|
46
|
+
"""
|
|
47
|
+
|
|
5
48
|
def __init__(self, vault_path: str):
|
|
6
49
|
self.db_path = Path(vault_path).parent / "chroma"
|
|
7
50
|
self.db_path.mkdir(parents=True, exist_ok=True)
|
|
51
|
+
self._lock_path = self.db_path / ".kyp.lock"
|
|
52
|
+
self._open()
|
|
53
|
+
self._heal_if_corrupt()
|
|
54
|
+
|
|
55
|
+
# --- connection / recovery -------------------------------------------------
|
|
56
|
+
|
|
57
|
+
def _open(self):
|
|
8
58
|
self.client = chromadb.PersistentClient(path=str(self.db_path))
|
|
9
59
|
self.collection = self.client.get_or_create_collection(name="sessions")
|
|
10
60
|
|
|
61
|
+
def _rebuild(self):
|
|
62
|
+
"""Drop the corrupted index and start fresh.
|
|
63
|
+
|
|
64
|
+
Safe because every session is re-embedded from its markdown note on the
|
|
65
|
+
next sync (see Vault._sync_vector_db).
|
|
66
|
+
|
|
67
|
+
We first try dropping the collection through Chroma's API (which orphans
|
|
68
|
+
the bad segment and creates a clean one). If that fails, we wipe the
|
|
69
|
+
directory on disk. Either way we must clear Chroma's process-wide client
|
|
70
|
+
cache: PersistentClient instances are cached by path, so without this a
|
|
71
|
+
re-created client would keep pointing at the deleted files (surfacing as
|
|
72
|
+
"attempt to write a readonly database")."""
|
|
73
|
+
_log("index appears corrupt — rebuilding chroma store from the vault")
|
|
74
|
+
try:
|
|
75
|
+
self.client.delete_collection(name="sessions")
|
|
76
|
+
self.collection = self.client.get_or_create_collection(name="sessions")
|
|
77
|
+
return
|
|
78
|
+
except Exception as e:
|
|
79
|
+
_log(f"in-place collection reset failed ({e!r}); wiping store on disk")
|
|
80
|
+
|
|
81
|
+
self.client = None
|
|
82
|
+
self.collection = None
|
|
83
|
+
_clear_chroma_cache()
|
|
84
|
+
shutil.rmtree(self.db_path, ignore_errors=True)
|
|
85
|
+
self.db_path.mkdir(parents=True, exist_ok=True)
|
|
86
|
+
self._open()
|
|
87
|
+
|
|
88
|
+
def _heal_if_corrupt(self):
|
|
89
|
+
"""Force the write/compaction path that surfaces a corrupt segment.
|
|
90
|
+
|
|
91
|
+
Corruption only throws when Chroma deserializes the HNSW segment during
|
|
92
|
+
a write. We run a sentinel upsert+delete under the lock; if that raises,
|
|
93
|
+
we rebuild before any real sync runs, so the rebuilt store fills cleanly
|
|
94
|
+
in one pass."""
|
|
95
|
+
sentinel = "__kyp_healthcheck__"
|
|
96
|
+
try:
|
|
97
|
+
with self._locked(write=True):
|
|
98
|
+
# upsert+delete exercises the write/compaction path; the query
|
|
99
|
+
# forces the HNSW segment to load (the read path). Between them
|
|
100
|
+
# they surface both ways a corrupt segment manifests.
|
|
101
|
+
self.collection.upsert(documents=["ok"], ids=[sentinel])
|
|
102
|
+
self.collection.query(query_texts=["ok"], n_results=1)
|
|
103
|
+
self.collection.delete(ids=[sentinel])
|
|
104
|
+
except Exception as e:
|
|
105
|
+
_log(f"health check failed: {e!r}")
|
|
106
|
+
with self._locked(write=True):
|
|
107
|
+
self._rebuild()
|
|
108
|
+
|
|
109
|
+
# --- locking ---------------------------------------------------------------
|
|
110
|
+
|
|
111
|
+
@contextmanager
|
|
112
|
+
def _locked(self, write: bool):
|
|
113
|
+
if fcntl is None:
|
|
114
|
+
yield
|
|
115
|
+
return
|
|
116
|
+
mode = fcntl.LOCK_EX if write else fcntl.LOCK_SH
|
|
117
|
+
with open(self._lock_path, "a+") as lf:
|
|
118
|
+
fcntl.flock(lf, mode)
|
|
119
|
+
try:
|
|
120
|
+
yield
|
|
121
|
+
finally:
|
|
122
|
+
fcntl.flock(lf, fcntl.LOCK_UN)
|
|
123
|
+
|
|
124
|
+
# --- operations ------------------------------------------------------------
|
|
125
|
+
|
|
126
|
+
def _write_with_recovery(self, op):
|
|
127
|
+
"""Run a write op under the lock. If it fails (e.g. a corrupt segment
|
|
128
|
+
slipped past the init health check), rebuild the store once and retry.
|
|
129
|
+
|
|
130
|
+
``op`` must be self-contained — after a rebuild the collection is empty,
|
|
131
|
+
so an op that derives its work from the current collection state
|
|
132
|
+
naturally repopulates everything on the retry."""
|
|
133
|
+
for attempt in (1, 2):
|
|
134
|
+
try:
|
|
135
|
+
with self._locked(write=True):
|
|
136
|
+
op()
|
|
137
|
+
return
|
|
138
|
+
except Exception as e:
|
|
139
|
+
if attempt == 1:
|
|
140
|
+
_log(f"write failed ({e!r}); rebuilding and retrying")
|
|
141
|
+
with self._locked(write=True):
|
|
142
|
+
self._rebuild()
|
|
143
|
+
else:
|
|
144
|
+
_log(f"write failed after rebuild: {e!r}")
|
|
145
|
+
|
|
11
146
|
def upsert_session(self, path: str, project: str, content: str):
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
ids=[path]
|
|
16
|
-
|
|
147
|
+
meta = {"project": project, "hash": _content_hash(content)}
|
|
148
|
+
|
|
149
|
+
def op():
|
|
150
|
+
self.collection.upsert(documents=[content], metadatas=[meta], ids=[path])
|
|
151
|
+
|
|
152
|
+
self._write_with_recovery(op)
|
|
153
|
+
|
|
154
|
+
def sync_sessions(self, items: dict):
|
|
155
|
+
"""Reconcile the store with the current set of session notes.
|
|
156
|
+
|
|
157
|
+
``items`` maps note path -> (project, content). New and changed notes
|
|
158
|
+
(by content hash) are (re)embedded, deleted notes are pruned, and
|
|
159
|
+
unchanged notes are skipped so we don't re-embed the whole corpus on
|
|
160
|
+
every Vault init/refresh."""
|
|
161
|
+
desired = {p: (proj, c, _content_hash(c)) for p, (proj, c) in items.items()}
|
|
162
|
+
|
|
163
|
+
def op():
|
|
164
|
+
existing = self.collection.get(include=["metadatas"])
|
|
165
|
+
existing_hash = {
|
|
166
|
+
i: (m or {}).get("hash")
|
|
167
|
+
for i, m in zip(existing["ids"], existing["metadatas"])
|
|
168
|
+
}
|
|
169
|
+
up_ids, up_docs, up_meta = [], [], []
|
|
170
|
+
for p, (proj, c, h) in desired.items():
|
|
171
|
+
if existing_hash.get(p) != h:
|
|
172
|
+
up_ids.append(p)
|
|
173
|
+
up_docs.append(c)
|
|
174
|
+
up_meta.append({"project": proj, "hash": h})
|
|
175
|
+
stale = [i for i in existing_hash if i not in desired]
|
|
176
|
+
if up_ids:
|
|
177
|
+
self.collection.upsert(documents=up_docs, metadatas=up_meta, ids=up_ids)
|
|
178
|
+
if stale:
|
|
179
|
+
self.collection.delete(ids=stale)
|
|
180
|
+
|
|
181
|
+
self._write_with_recovery(op)
|
|
17
182
|
|
|
18
183
|
def delete_session(self, path: str):
|
|
19
184
|
try:
|
|
20
|
-
self.
|
|
185
|
+
with self._locked(write=True):
|
|
186
|
+
self.collection.delete(ids=[path])
|
|
21
187
|
except Exception:
|
|
22
188
|
pass
|
|
23
189
|
|
|
24
190
|
def search_sessions(self, query: str, project: str = None, n_results: int = 5):
|
|
25
191
|
where = {"project": project} if project else None
|
|
26
192
|
try:
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
193
|
+
with self._locked(write=False):
|
|
194
|
+
return self.collection.query(
|
|
195
|
+
query_texts=[query],
|
|
196
|
+
n_results=n_results,
|
|
197
|
+
where=where,
|
|
198
|
+
)
|
|
33
199
|
except Exception:
|
|
34
200
|
return {"ids": [], "documents": [], "metadatas": [], "distances": []}
|
|
35
201
|
|
|
202
|
+
|
|
36
203
|
session_memory = None
|
|
37
204
|
|
|
205
|
+
|
|
38
206
|
def init_vector_db(vault_path: str):
|
|
39
207
|
global session_memory
|
|
40
208
|
session_memory = SessionMemory(vault_path)
|
|
41
209
|
|
|
210
|
+
|
|
42
211
|
def get_session_memory():
|
|
43
212
|
return session_memory
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "kyp-mem",
|
|
3
|
-
"version": "0.7.
|
|
3
|
+
"version": "0.7.3",
|
|
4
4
|
"description": "Know Your Project — Persistent & Session level knowledge base for AI agents. MCP-powered with wikilinks, backlinks, auto-learning, and neon web UI.",
|
|
5
5
|
"bin": {
|
|
6
6
|
"kyp-mem": "bin/cli.mjs"
|
package/pyproject.toml
CHANGED
|
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "kyp-mem"
|
|
7
|
-
version = "0.7.
|
|
7
|
+
version = "0.7.3"
|
|
8
8
|
description = "Know Your Project — Persistent knowledge base for AI agents. MCP-powered with wikilinks, backlinks, auto-learning, and neon web UI."
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
license = {text = "MIT"}
|