@lh8ppl/claude-memory-kit 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
File without changes
File without changes
File without changes
File without changes
File without changes
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@lh8ppl/claude-memory-kit",
3
- "version": "0.1.1",
3
+ "version": "0.1.2",
4
4
  "description": "cmk — the CLI for claude-memory-kit. Per-project, in-repo memory system for Claude Code.",
5
5
  "type": "module",
6
6
  "bin": {
@@ -28,6 +28,7 @@
28
28
  // never needs Read either — the turn content arrives in the prompt).
29
29
 
30
30
  import { spawn as defaultSpawn } from 'node:child_process';
31
+ import { spawnBin } from './spawn-bin.mjs';
31
32
  import { writeFileSync, mkdtempSync, rmSync } from 'node:fs';
32
33
  import { tmpdir } from 'node:os';
33
34
  import { join } from 'node:path';
@@ -203,25 +204,24 @@ export class HaikuViaAnthropicApi extends CompressorBackend {
203
204
  const env = { ...process.env };
204
205
  delete env.CLAUDECODE;
205
206
 
206
- // shell:true required on Windows so that .cmd shims (claude.cmd)
207
- // resolve through cmd.exe. Without it, node's spawn fails with
208
- // EINVAL/ENOENT because it won't auto-resolve .cmd extensions
209
- // (CVE-2024-27980 hardening). On Linux/macOS shell:true is a
210
- // no-op for argv-style invocation when the arguments don't contain
211
- // shell metacharacters ours don't (the prompt goes via stdin).
207
+ // spawnBin handles the Windows .cmd-shim problem WITHOUT the
208
+ // `shell:true` + args-array combo that broke paths with spaces (#4):
209
+ // POSIX spawns argv-style (shell:false); Windows builds a single
210
+ // pre-quoted command string so `--mcp-config C:\Users\First Last\…`
211
+ // survives cmd.exe tokenization. See spawn-bin.mjs. `windowsHide`
212
+ // still suppresses the transient cmd.exe console flash on Windows.
212
213
  // spawn-discipline: caller-managed terminateSubprocess (kit's kill-chain helper) + setTimeout (per design §8.5; PR-A composition-verification instance #4; substance pinned by tests/cli-compressor-timeout.test.js + tests/spawn-smoke-kill-chain.test.js). The function signature `timeoutMs` parameter (line 162) is the caller-supplied bound; the setTimeout below (search "Timeout timer") fires the kill chain.
213
- const child = this._spawn(this._bin, args, {
214
- cwd: tmpdir(), // OS-native temp dir; replaces `/tmp` which fails to resolve on Windows
215
- env,
216
- stdio: ['pipe', 'pipe', 'pipe'],
217
- shell: true,
218
- // Suppress the transient cmd.exe console window on Windows —
219
- // every shell:true spawn flashes a window otherwise (visible
220
- // to the user when auto-extract / compress-session fires
221
- // dozens of times per session). stdio is piped so we still
222
- // capture the child's output through the regular handlers.
223
- windowsHide: true,
224
- });
214
+ const child = spawnBin(
215
+ this._bin,
216
+ args,
217
+ {
218
+ cwd: tmpdir(), // OS-native temp dir; replaces `/tmp` which fails to resolve on Windows
219
+ env,
220
+ stdio: ['pipe', 'pipe', 'pipe'],
221
+ windowsHide: true,
222
+ },
223
+ { spawnImpl: this._spawn },
224
+ );
225
225
 
226
226
  const cleanupSandbox = () => {
227
227
  // Single-use sandbox: the directory and the empty-mcp.json file
package/src/doctor.mjs CHANGED
@@ -38,7 +38,7 @@ import {
38
38
  statSync,
39
39
  writeFileSync,
40
40
  } from 'node:fs';
41
- import { spawnSync } from 'node:child_process';
41
+ import { spawnBinSync } from './spawn-bin.mjs';
42
42
  import { homedir } from 'node:os';
43
43
  import { basename, join } from 'node:path';
44
44
  import { nowIso } from './audit-log.mjs';
@@ -62,7 +62,10 @@ async function hc1Memsearch() {
62
62
  // semantic requires a separate `pip install memsearch[onnx]`.
63
63
  // `requiresInstall: true` so the CLI prompts before auto-installing.
64
64
  try {
65
- const r = spawnSync('memsearch', ['--version'], {
65
+ // spawnBinSync resolves the Windows .cmd shim without `shell:true`+args
66
+ // (no DEP0190; #4). memsearch's only arg is `--version` (no spaces), so
67
+ // the quoting is a no-op here — the win is dropping the deprecated combo.
68
+ const r = spawnBinSync('memsearch', ['--version'], {
66
69
  encoding: 'utf8',
67
70
  // M1 fix (skill-review 2026-05-28): 3.5s tolerates Windows
68
71
  // cold-Python startup (AV scan + .pyc generation on first hit
@@ -71,7 +74,6 @@ async function hc1Memsearch() {
71
74
  // still fits comfortably inside the 5s NFR budget. Timeout →
72
75
  // 'skip' so cmk doctor completes regardless.
73
76
  timeout: 3_500,
74
- shell: process.platform === 'win32',
75
77
  });
76
78
  if (r.status === 0) {
77
79
  return {
@@ -367,14 +367,36 @@ export function reindexBoot({ projectRoot, userDir, db, now }) {
367
367
 
368
368
  for (const source of sources) {
369
369
  filesScanned++;
370
- const content = readFileSync(source.path, 'utf8');
371
- const sha1 = sha1OfContent(content);
372
370
  const relPath = relativeSource(source.path, { projectRoot, userDir });
373
371
  const existing = db
374
- .prepare('SELECT sha1 FROM files WHERE path = ?')
372
+ .prepare('SELECT mtime, sha1 FROM files WHERE path = ?')
375
373
  .get(relPath);
374
+ // Fast path: if the file's mtime matches the checkpoint, the content is
375
+ // unchanged — skip the read + sha1 entirely. This realizes design §9.2's
376
+ // "mtime+sha1 diff" intent (the prior impl sha1'd every file on every
377
+ // call) and is what makes reindexBoot cheap enough to run before every
378
+ // `cmk search` (finding #0) even as the memory corpus grows.
379
+ let mtime = null;
380
+ try {
381
+ mtime = Math.floor(statSync(source.path).mtimeMs);
382
+ } catch {
383
+ // stat failed (file vanished mid-walk); fall through to the read,
384
+ // which surfaces the error naturally.
385
+ }
386
+ if (existing && mtime !== null && existing.mtime === mtime) {
387
+ continue; // unchanged (mtime match — no read needed)
388
+ }
389
+ // Caveat: a content change that PRESERVES the old mtime (e.g. a restore
390
+ // tool that sets --times) is missed until the next real change or a
391
+ // `reindex --full`. Negligible in practice — the kit always writes a
392
+ // fresh mtime after the indexed one — and standard for mtime-based diffs.
393
+ //
394
+ // mtime differs (or no checkpoint) — confirm via sha1 so a mere mtime
395
+ // touch (content identical) doesn't trigger a needless reindex.
396
+ const content = readFileSync(source.path, 'utf8');
397
+ const sha1 = sha1OfContent(content);
376
398
  if (existing && existing.sha1 === sha1) {
377
- continue; // unchanged
399
+ continue; // content unchanged despite mtime touch
378
400
  }
379
401
  const n = txn(source);
380
402
  filesReindexed++;
@@ -84,17 +84,20 @@ const TIER_BUDGETS = Object.freeze({
84
84
  });
85
85
 
86
86
  // Per-tier reading plan. The hook reads the scratchpads allowed at that
87
- // tier (per SCRATCHPADS_BY_TIER) plus the tier's INDEX file, plusfor
88
- // the project tier — the most recent rolling-window day file.
87
+ // tier (per SCRATCHPADS_BY_TIER) plus for the project tierthe most
88
+ // recent rolling-window day file.
89
+ //
90
+ // INDEX.md is deliberately NOT in the snapshot (#R, 2026-05-30). It is a
91
+ // pointer/reference doc that self-declares "NOT auto-loaded at session
92
+ // start" in its own template body — injecting it both violated that
93
+ // contract and pushed ~2 KB of reference prose into Claude's context,
94
+ // crowding out real facts. It stays on disk for lookup via `cmk search` /
95
+ // the granular archive; it is not session-start content.
89
96
  function plannedFilesForTier(tier, tierRoot) {
90
97
  const files = [];
91
98
  for (const name of SCRATCHPADS_BY_TIER[tier]) {
92
99
  files.push(join(tierRoot, name));
93
100
  }
94
- // INDEX: P/L use memory/INDEX.md; U uses fragments/INDEX.md (per
95
- // resolveFactDir asymmetry in tier-paths.mjs).
96
- const indexDir = tier === 'U' ? 'fragments' : 'memory';
97
- files.push(join(tierRoot, indexDir, 'INDEX.md'));
98
101
  if (tier === 'P') {
99
102
  const sessionsDir = join(tierRoot, 'sessions');
100
103
  const latest = latestDaySession(sessionsDir);
@@ -138,10 +141,145 @@ function tierDirExists(tier, tierRoot) {
138
141
  return existsSync(tierRoot) && statSync(tierRoot).isDirectory();
139
142
  }
140
143
 
144
+ // The all-zero sha1 is the kit's template-seed sentinel: every scaffolded
145
+ // placeholder bullet (in machine-paths/overrides/SOUL/USER/HABITS/LESSONS)
146
+ // carries `sha1: 0000…0000` + `at: 2020-01-01T…`. A real captured fact
147
+ // always has a real content sha1. We use this to distinguish "scaffolding
148
+ // the user never replaced" from "a fact worth injecting".
149
+ const SEED_SHA1_RE = /sha1:\s*0{40}/;
150
+
151
+ // All HTML-comment handling below uses STRING SCANNING (indexOf/startsWith),
152
+ // never a regex tag-filter. Regex-based HTML-comment stripping is fragile by
153
+ // nature (it can't see newlines, leaves partial `<!--`, etc. — flagged by
154
+ // CodeQL's js/bad-tag-filter). String scanning is both more robust and not a
155
+ // tag-filter, so it sidesteps that whole class.
156
+
157
+ // True if `line`, ignoring surrounding whitespace, is exactly one self-
158
+ // contained HTML comment (`<!-- … -->`) — e.g. a per-bullet provenance line.
159
+ function isCommentOnlyLine(line) {
160
+ if (typeof line !== 'string') return false;
161
+ const t = line.trim();
162
+ return t.startsWith('<!--') && t.endsWith('-->') && t.length >= 7;
163
+ }
164
+
165
+ // Remove every self-contained `<!-- … -->` span WITHIN a single line, by
166
+ // scanning for delimiter pairs. An unterminated `<!--` (no `-->` on this
167
+ // line) is left in place for the multi-line state machine to handle.
168
+ function stripInlineComments(line) {
169
+ let out = '';
170
+ let i = 0;
171
+ for (;;) {
172
+ const open = line.indexOf('<!--', i);
173
+ if (open === -1) return out + line.slice(i);
174
+ const close = line.indexOf('-->', open + 4);
175
+ if (close === -1) return out + line.slice(i); // unterminated; leave it
176
+ out += line.slice(i, open);
177
+ i = close + 3;
178
+ }
179
+ }
180
+
181
+ // Is `bulletLine` a placeholder/seed bullet that should NOT be injected?
182
+ // Primary signal: a following provenance comment carrying the all-zero seed
183
+ // sha1 (every scaffolded template bullet has it; a real captured fact never
184
+ // does). Secondary: the `(example)` marker — but ONLY in the template's
185
+ // exact `(P-XXXXXXXX) (example) …` shape (right after the citation id), so a
186
+ // real fact whose text merely mentions "(example)" is not mis-dropped.
187
+ function isSeedBullet(bulletLine, nextLine) {
188
+ if (/^\s*-\s+\([PUL]-[A-Za-z0-9]{8}\)\s+\(example\)/.test(bulletLine)) {
189
+ return true;
190
+ }
191
+ const prov = isCommentOnlyLine(nextLine) ? nextLine : '';
192
+ return SEED_SHA1_RE.test(prov);
193
+ }
194
+
195
+ // Remove HTML comments robustly, including the kit templates' multi-line
196
+ // format-explanation headers that ILLUSTRATIVELY embed a single-line
197
+ // `<!-- source… -->` example inside the outer `<!-- … -->` block (a naive
198
+ // "first <!-- to first -->" pass closes on that inner `-->` and orphans the
199
+ // tail). We strip inline comments first (killing the nested one) and only
200
+ // then walk the now-cleanly-delimited multi-line blocks. All string-scan.
201
+ function stripHtmlComments(text) {
202
+ // Pass 1 — remove every self-contained `<!-- … -->` on a single line.
203
+ const lines = text.split('\n').map(stripInlineComments);
204
+ // Pass 2 — remove multi-line blocks (each now free of any inner `-->`).
205
+ const out = [];
206
+ let inBlock = false;
207
+ for (let line of lines) {
208
+ if (inBlock) {
209
+ const close = line.indexOf('-->');
210
+ if (close === -1) continue; // still inside the block; drop the line
211
+ inBlock = false;
212
+ line = line.slice(close + 3);
213
+ }
214
+ const open = line.indexOf('<!--');
215
+ if (open !== -1) {
216
+ inBlock = true;
217
+ line = line.slice(0, open);
218
+ }
219
+ if (line.trim() !== '' || out.length === 0 || out[out.length - 1] !== '') {
220
+ out.push(line.replace(/[ \t]+$/, ''));
221
+ }
222
+ }
223
+ return out.join('\n');
224
+ }
225
+
226
+ // Clean a scratchpad body for INJECTION (not for on-disk storage — the
227
+ // files keep their human-editing headers). Self-test finding #R: the raw
228
+ // bodies are ~70% template-comment noise + placeholder seed bullets that
229
+ // bury (and crowd out) the real captured facts, so the model concludes
230
+ // "no real facts populated yet". This strips:
231
+ // 1. placeholder seed bullets (all-zero sha1 / `(example)`) + their
232
+ // provenance comment line, and
233
+ // 2. ALL remaining `<!-- -->` comments (multi-line format-explanation
234
+ // headers AND per-bullet provenance — the fact text + its `(P-…)`
235
+ // citation id carry everything the model needs to read & cite).
236
+ // Whitespace is normalized so stripped regions don't leave holes.
237
+ //
238
+ // Known limitation (rare): a captured fact whose TEXT contains a literal
239
+ // `<!--`/`-->` (e.g. a note about HTML/templating) has that fragment
240
+ // stripped from the INJECTED view. The on-disk fact and the search index
241
+ // are unaffected — only the session-start snapshot loses the literal
242
+ // comment markers. Accepted as a rare edge vs. the cost of distinguishing
243
+ // real comments from comment-shaped fact text.
244
+ function cleanScratchpadBody(body) {
245
+ // Normalize CRLF so user-edited (Windows) scratchpads don't leave stray
246
+ // \r after comment/seed stripping.
247
+ const lines = body.replace(/\r\n/g, '\n').split('\n');
248
+ const kept = [];
249
+ for (let i = 0; i < lines.length; i++) {
250
+ const line = lines[i];
251
+ if (
252
+ /^\s*-\s/.test(line) &&
253
+ ID_TOKEN_RE.test(line) &&
254
+ isSeedBullet(line, lines[i + 1])
255
+ ) {
256
+ if (isCommentOnlyLine(lines[i + 1])) i++;
257
+ continue;
258
+ }
259
+ kept.push(line);
260
+ }
261
+ // Step 2 — strip all remaining comments (format headers + real-bullet
262
+ // provenance), then normalize whitespace.
263
+ return stripHtmlComments(kept.join('\n'))
264
+ .replace(/\n{3,}/g, '\n\n')
265
+ .replace(/^\n+|\n+$/g, '');
266
+ }
267
+
268
+ // After cleaning, does a body carry any real content — i.e. a non-blank
269
+ // line that isn't a markdown heading? A body of only headings (every
270
+ // bullet was a stripped seed) is pure scaffolding and must NOT contribute
271
+ // a tier block (otherwise the model sees an empty "## …" skeleton).
272
+ function hasRealContent(cleaned) {
273
+ return cleaned
274
+ .split('\n')
275
+ .some((l) => l.trim() !== '' && !/^#{1,6}\s/.test(l));
276
+ }
277
+
141
278
  // Read the snapshot-eligible content for one tier as a single string. If
142
- // no tier files exist (or the tier dir itself is absent), returns ''. The
143
- // per-file content is wrapped in a fenced header so the snapshot is
144
- // self-describing to whoever reads Claude's context window.
279
+ // no tier files exist (or the tier dir itself is absent), returns ''. Each
280
+ // file body is cleaned for injection (see cleanScratchpadBody); files that
281
+ // reduce to scaffolding-only contribute nothing, and a tier whose every
282
+ // file is scaffolding-only is excluded entirely (no header, no skeleton).
145
283
  function readTierBlock(tier, tierRoot) {
146
284
  if (!tierDirExists(tier, tierRoot)) return '';
147
285
  const sections = [];
@@ -154,7 +292,9 @@ function readTierBlock(tier, tierRoot) {
154
292
  continue;
155
293
  }
156
294
  if (body.trim() === '') continue;
157
- sections.push(body);
295
+ const cleaned = cleanScratchpadBody(body);
296
+ if (!hasRealContent(cleaned)) continue;
297
+ sections.push(cleaned);
158
298
  }
159
299
  if (sections.length === 0) return '';
160
300
  const header = `<!-- cmk: ${TIER_LABELS[tier]} tier (${tier}) -->`;
@@ -34,6 +34,7 @@ import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js'
34
34
  import { z } from 'zod';
35
35
  import { resolve as resolvePath, isAbsolute } from 'node:path';
36
36
  import { openIndexDb } from './index-db.mjs';
37
+ import { reindexBoot } from './index-rebuild.mjs';
37
38
  import { search, SEARCH_MODES } from './search.mjs';
38
39
  import { memoryWrite } from './memory-write.mjs';
39
40
  import { ID_PATTERN, resolveTierRoot } from './tier-paths.mjs';
@@ -451,6 +452,22 @@ export function buildMcpServer({ projectRoot, userDir, db, semanticBackend }) {
451
452
  */
452
453
  export async function runMcpServer({ projectRoot, userDir, db: dbOverride, semanticBackend } = {}) {
453
454
  const db = dbOverride ?? openIndexDb({ projectRoot });
455
+ // Refresh the index at server startup so mk_search sees facts already on
456
+ // disk — same fresh-install gap as `cmk search` (self-test finding #0):
457
+ // nothing reindexes for a just-installed project, so without this the
458
+ // model's first mk_search returns empty for facts sitting in the
459
+ // scratchpads. Incremental (mtime/sha1 diff) + best-effort; in-session
460
+ // freshness for facts written AFTER startup is the runtime watcher's job
461
+ // (future). The in-process buildMcpServer tests bypass this path.
462
+ if (projectRoot) {
463
+ try {
464
+ reindexBoot({ projectRoot, userDir, db });
465
+ } catch (err) {
466
+ process.stderr.write(
467
+ `cmk-mcp-server: startup index refresh failed: ${err?.message ?? err}\n`,
468
+ );
469
+ }
470
+ }
454
471
  const server = buildMcpServer({ projectRoot, userDir, db, semanticBackend });
455
472
  const transport = new StdioServerTransport();
456
473
 
@@ -57,6 +57,7 @@ import { appendScratchpadBullet } from './scratchpad.mjs';
57
57
  import { parseBulletProvenance } from './provenance.mjs';
58
58
  import { checkPoisonGuard, logPoisonGuardRejection } from './poison-guard.mjs';
59
59
  import { detectConflicts, writeConflictEntry } from './conflict-queue.mjs';
60
+ import { sanitizeHomePaths } from './sanitize.mjs';
60
61
 
61
62
  const VALID_ACTIONS = new Set(['add', 'replace', 'remove']);
62
63
 
@@ -252,8 +253,20 @@ function doAdd(opts) {
252
253
  if (errors.length > 0) {
253
254
  return errorResult({ category: ERROR_CATEGORIES.SCHEMA, errors });
254
255
  }
256
+ // Privacy (write-path fix #1): abstract home-dir paths to `~` for
257
+ // committed/shared tiers (P/U) BEFORE the bullet is screened, conflict-
258
+ // checked, dedup-keyed, and written — so a captured fact never ships the
259
+ // local username and stays portable. Local tier (L) keeps machine paths
260
+ // verbatim (its purpose). Everything downstream uses `addOpts`.
261
+ const sanitizedText =
262
+ opts.tier === 'P' || opts.tier === 'U'
263
+ ? sanitizeHomePaths(opts.text)
264
+ : opts.text;
265
+ const addOpts =
266
+ sanitizedText === opts.text ? opts : { ...opts, text: sanitizedText };
267
+
255
268
  const poisonResult = runPoisonGuard({
256
- text: opts.text,
269
+ text: addOpts.text,
257
270
  projectRoot: opts.projectRoot,
258
271
  source: opts.source,
259
272
  sessionId: opts.sessionId,
@@ -276,7 +289,7 @@ function doAdd(opts) {
276
289
  userDir: opts.userDir,
277
290
  });
278
291
  const conflict = detectConflicts({
279
- newText: opts.text,
292
+ newText: addOpts.text,
280
293
  newTrust,
281
294
  scratchpadPath,
282
295
  sectionTitle: opts.section,
@@ -296,14 +309,14 @@ function doAdd(opts) {
296
309
  // appendScratchpadBullet would have used, then route to the queue.
297
310
  // (Task 25b fix: generateId is positional `(tier, text)`, not
298
311
  // named-args — Task 25 originally called it as an object.)
299
- const proposedId = generateId(opts.tier, opts.text);
312
+ const proposedId = generateId(addOpts.tier, addOpts.text);
300
313
  const ts = opts.now ?? nowIso();
301
314
  return writeConflictEntry({
302
315
  tier: opts.tier,
303
316
  projectRoot: opts.projectRoot,
304
317
  userDir: opts.userDir,
305
318
  newId: proposedId,
306
- newText: opts.text,
319
+ newText: addOpts.text,
307
320
  newTrust,
308
321
  existingId: conflict.existingId,
309
322
  existingText: conflict.existingText,
@@ -313,7 +326,7 @@ function doAdd(opts) {
313
326
  detectedAt: ts,
314
327
  });
315
328
  }
316
- return appendBulletGuarded(opts);
329
+ return appendBulletGuarded(addOpts);
317
330
  }
318
331
 
319
332
  function appendBulletGuarded(opts) {
@@ -0,0 +1,39 @@
1
+ // sanitize.mjs — privacy sanitizers applied before durable writes to a
2
+ // committed/shared tier. Sibling to poison-guard.mjs, but where Poison_Guard
3
+ // REJECTS a write (secrets/poison), these REWRITE it (privacy abstraction).
4
+ //
5
+ // Write-path fix #1 (the self-test privacy leak): a durable fact written to a
6
+ // committed project tier carried the local username inside an absolute
7
+ // interpreter path (C:\Users\<you>\...\python.exe), shipping it to git and
8
+ // making the fact non-portable. sanitizeHomePaths abstracts the home-dir
9
+ // prefix to `~` — killing the username leak AND making the fact portable
10
+ // across machines — while preserving everything after the home dir.
11
+ //
12
+ // Applied to P (committed) and U (cross-project) tier writes. NOT to L
13
+ // (local, gitignored) — machine-specific absolute paths are the whole point
14
+ // of the local tier, so they stay verbatim there.
15
+
16
+ // Each pattern matches an absolute home-directory prefix up to (but not
17
+ // including) the next path separator / whitespace / quote, so the remainder
18
+ // of the path is preserved. Username char class excludes separators, spaces,
19
+ // quotes, and shell/redirect metacharacters.
20
+ const USER = "[^\\\\/\\s\"'`<>|]+";
21
+ // Case-INSENSITIVE: Windows + macOS filesystems are case-insensitive, so a
22
+ // fact may carry `c:\users\you\…` or `/users/you`; the `i` flag keeps the
23
+ // privacy abstraction from being bypassed by lowercasing.
24
+ const HOME_PATH_PATTERNS = [
25
+ new RegExp(`[A-Za-z]:[\\\\/]Users[\\\\/]${USER}`, 'gi'), // Windows C:\Users\name (either slash)
26
+ new RegExp(`/Users/${USER}`, 'gi'), // macOS /Users/name
27
+ new RegExp(`/home/${USER}`, 'gi'), // Linux /home/name
28
+ ];
29
+
30
+ /**
31
+ * Abstract absolute home-directory prefixes to `~`. Returns non-string input
32
+ * unchanged (callers may pass undefined for optional fields).
33
+ */
34
+ export function sanitizeHomePaths(text) {
35
+ if (typeof text !== 'string') return text;
36
+ let out = text;
37
+ for (const re of HOME_PATH_PATTERNS) out = out.replace(re, '~');
38
+ return out;
39
+ }
@@ -0,0 +1,83 @@
1
+ // spawn-bin.mjs — cross-platform subprocess spawning that AVOIDS the
2
+ // `shell:true` + args-array combination (self-test finding #4).
3
+ //
4
+ // Why this exists
5
+ // ---------------
6
+ // Spawning an npm-global bin (claude, memsearch, cmk-*) needs `shell:true` on
7
+ // Windows so the `.cmd` shim resolves through cmd.exe — Node won't auto-resolve
8
+ // `.cmd`/`.bat` without a shell (CVE-2024-27980 hardening). But `shell:true`
9
+ // WITH an args array is doubly bad:
10
+ // 1. Node emits DEP0190 ("passing args to a child process with shell:true …
11
+ // arguments are not escaped, only concatenated").
12
+ // 2. The args ARE concatenated unescaped, so a path containing a space
13
+ // (e.g. `--mcp-config C:\Users\First Last\…\empty-mcp.json` under tmpdir)
14
+ // is re-tokenized by cmd.exe and breaks parsing — silently failing
15
+ // auto-extract/compression for any Windows user whose profile has a space.
16
+ //
17
+ // The fix: never pass an args array together with shell:true.
18
+ // - POSIX: spawn(bin, args, {shell:false}) — Node resolves PATH directly and
19
+ // passes argv safely. No shell, no concatenation.
20
+ // - Windows: shell:true with a SINGLE pre-quoted command string (no args
21
+ // array) — clears DEP0190 and lets us quote each arg so spaces survive.
22
+ //
23
+ // The kit's spawn args are controlled (flags + filesystem paths, never
24
+ // user-supplied shell text), so cmd.exe double-quoting is sufficient: inside
25
+ // double-quotes cmd.exe treats &|<>^ as literal; `%` is the only residual
26
+ // special char and kit paths/flags never contain it.
27
+
28
+ import { spawn, spawnSync } from 'node:child_process';
29
+
30
+ /**
31
+ * Quote one argument for a cmd.exe command line. Quotes args that contain a
32
+ * space or a double-quote, AND empty-string args (an unquoted empty arg would
33
+ * vanish and shift the next token into its value slot — the compressor passes
34
+ * `--allowed-tools ''`). Embedded double-quotes are doubled.
35
+ */
36
+ function quoteWinArg(s) {
37
+ const str = String(s);
38
+ if (str === '' || /[\s"]/.test(str)) {
39
+ // Double embedded double-quotes, AND double any trailing backslash run so
40
+ // a quoted value ending in `\` (e.g. a directory path with a space) does
41
+ // NOT escape the closing quote — the classic Windows CommandLineToArgv /
42
+ // cmd.exe footgun (`"C:\dir\"` parses as `C:\dir"`).
43
+ const escaped = str.replace(/"/g, '""').replace(/(\\+)$/, '$1$1');
44
+ return `"${escaped}"`;
45
+ }
46
+ return str;
47
+ }
48
+
49
+ /**
50
+ * Build the single cmd.exe command string for a Windows `shell:true` spawn.
51
+ * Exported for direct unit testing of the quoting (platform-independent).
52
+ */
53
+ export function winCommandLine(bin, args = []) {
54
+ return [bin, ...args].map(quoteWinArg).join(' ');
55
+ }
56
+
57
+ /**
58
+ * Spawn a bin cross-platform without ever pairing `shell:true` with an args
59
+ * array. `deps` allows tests to inject a recording spawn + force a platform:
60
+ * deps.spawnImpl — defaults to node:child_process spawn (compressor injects
61
+ * its own for the kill-chain / testability).
62
+ * deps.platform — defaults to process.platform.
63
+ * Returns whatever the spawn impl returns (a ChildProcess in production).
64
+ */
65
+ export function spawnBin(bin, args = [], opts = {}, deps = {}) {
66
+ const { spawnImpl = spawn, platform = process.platform } = deps;
67
+ // spawn-discipline: ignore pass-through helper — timeout/kill is the
68
+ // caller's contract (compressor terminateSubprocess + setTimeout; doctor
69
+ // timeout:3500), not this wrapper's.
70
+ if (platform === 'win32') {
71
+ return spawnImpl(winCommandLine(bin, args), { ...opts, shell: true });
72
+ }
73
+ return spawnImpl(bin, args, { ...opts, shell: false });
74
+ }
75
+
76
+ /** Synchronous twin of spawnBin (for one-shot checks like `cmk doctor`'s memsearch probe). */
77
+ export function spawnBinSync(bin, args = [], opts = {}, deps = {}) {
78
+ const { spawnImpl = spawnSync, platform = process.platform } = deps;
79
+ if (platform === 'win32') {
80
+ return spawnImpl(winCommandLine(bin, args), { ...opts, shell: true });
81
+ }
82
+ return spawnImpl(bin, args, { ...opts, shell: false });
83
+ }
@@ -19,6 +19,7 @@ import { reindex as reindexAction } from './reindex.mjs';
19
19
  import { openIndexDb } from './index-db.mjs';
20
20
  import { reindexBoot, reindexFull } from './index-rebuild.mjs';
21
21
  import { search as searchAction, SEARCH_MODES } from './search.mjs';
22
+ import { memoryWrite } from './memory-write.mjs';
22
23
  import { runMcpServer } from './mcp-server.mjs';
23
24
  import { dailyDistill } from './daily-distill.mjs';
24
25
  import { weeklyCurate } from './weekly-curate.mjs';
@@ -50,7 +51,7 @@ import { overrideTrust as overrideTrustAction } from './trust.mjs';
50
51
  import { resolveConflictQueue, mergeScratchpadBullets } from './conflict-queue.mjs';
51
52
  import { resolveReviewQueue } from './review-queue.mjs';
52
53
  import { createInterface } from 'node:readline';
53
- import { resolve as resolvePath, join } from 'node:path';
54
+ import { resolve as resolvePath, join, basename } from 'node:path';
54
55
 
55
56
  const NOTICE_PREFIX = 'not yet implemented in v0.1.0';
56
57
 
@@ -64,20 +65,49 @@ const NOTICE_PREFIX = 'not yet implemented in v0.1.0';
64
65
  async function runInstall(options /* , command */) {
65
66
  // commander maps `--no-hooks` to options.hooks === false.
66
67
  const noHooks = !!(options && options.hooks === false);
68
+ const verbose = !!(options && options.verbose);
67
69
  const result = await installAction({ force: !!(options && options.force), noHooks });
68
- const parts = [
69
- `scaffolded ${result.created.length} file(s)`,
70
- result.skipped.length ? `skipped ${result.skipped.length} existing` : null,
71
- `.gitignore=${result.gitignore.action}`,
72
- `CLAUDE.md=${result.claudeMd.action}`,
73
- `hooks=${result.hooks.action}`,
74
- ].filter(Boolean);
75
- console.log('cmk install: ' + parts.join(', '));
76
70
 
77
- if (result.hooks.action === 'wired' || result.hooks.action === 'unchanged') {
71
+ // Outcome over inventory (self-test UX finding): state the resulting state +
72
+ // next action, not a file tally. The old "scaffolded 5, skipped 4 existing"
73
+ // read like a problem on a FRESH folder — the "skipped" are the cross-project
74
+ // user tier at ~/.claude-memory-kit/ (OUTSIDE this folder), already on disk.
75
+ // The full per-tier breakdown is --verbose only.
76
+ const projectName = basename(resolvePath(process.cwd()));
77
+ const wired =
78
+ result.hooks.action === 'wired' || result.hooks.action === 'unchanged';
79
+ const broughtSomethingNew =
80
+ result.created.length > 0 ||
81
+ result.gitignore.action === 'created' ||
82
+ result.claudeMd.action === 'created';
83
+
84
+ if (broughtSomethingNew) {
85
+ console.log(
86
+ `cmk install: ${projectName} ready — context/ scaffolded${
87
+ wired ? ', hooks wired' : ''
88
+ }.`,
89
+ );
90
+ } else {
91
+ console.log(
92
+ `cmk install: ${projectName} already set up (your edits preserved)${
93
+ wired ? ', hooks refreshed' : ''
94
+ }.`,
95
+ );
96
+ }
97
+ if (wired) {
78
98
  console.log(
79
- ' hooks wired into .claude/settings.jsonrestart Claude Code to activate. ' +
80
- 'This is a COMPLETE install; no separate /plugin step is needed.',
99
+ ' Restart Claude Code to activate. Complete install no separate /plugin step needed.',
100
+ );
101
+ }
102
+ if (verbose) {
103
+ console.log(
104
+ ` files: ${result.created.length} created, ${result.skipped.length} already present` +
105
+ (result.skipped.length
106
+ ? ' (incl. the cross-project user tier at ~/.claude-memory-kit/, outside this folder)'
107
+ : ''),
108
+ );
109
+ console.log(
110
+ ` .gitignore=${result.gitignore.action} · CLAUDE.md=${result.claudeMd.action} · hooks=${result.hooks.action}`,
81
111
  );
82
112
  }
83
113
 
@@ -178,9 +208,28 @@ function runTrust(id, level /* , options, command */) {
178
208
  */
179
209
  function runSearch(queryParts, options) {
180
210
  const projectRoot = resolvePath(process.cwd());
211
+ const userDir =
212
+ process.env.MEMORY_KIT_USER_DIR ?? join(homedir(), '.claude-memory-kit');
181
213
  const query = Array.isArray(queryParts) ? queryParts.join(' ') : queryParts;
182
214
  const db = openIndexDb({ projectRoot });
183
215
  try {
216
+ // Refresh the index before querying. On a fresh install the FTS5 index
217
+ // is empty (auto-extract writes facts to MEMORY.md but doesn't reindex,
218
+ // and the runtime chokidar watcher isn't running for a one-shot CLI
219
+ // call), so without this `cmk search` returns "no results" for facts
220
+ // that are sitting right there in the scratchpads (self-test finding
221
+ // #0). reindexBoot is incremental — mtime/sha1 diff, only changed files
222
+ // — so it's cheap to run on every search. Degrade gracefully: a reindex
223
+ // failure falls back to whatever's already indexed rather than crashing
224
+ // the query.
225
+ try {
226
+ reindexBoot({ projectRoot, userDir, db });
227
+ } catch (err) {
228
+ console.error(
229
+ `cmk search: index refresh failed (${err?.message ?? err}); ` +
230
+ 'searching the existing index. Run `cmk reindex --full` if results look stale.',
231
+ );
232
+ }
184
233
  const r = searchAction({
185
234
  db,
186
235
  query,
@@ -235,6 +284,67 @@ function runSearch(queryParts, options) {
235
284
  * (b) keeping it always-current avoids users having to think about which
236
285
  * index to rebuild when.
237
286
  */
287
+ /**
288
+ * `cmk remember <text...>` — explicit durable capture (write-path fix #0b).
289
+ *
290
+ * Writes a provenance-tracked bullet to MEMORY.md (the session-start-recalled
291
+ * layer) through the SAME hardened path as auto-extract: Poison_Guard +
292
+ * home-path abstraction (#1) + conflict detection + dedup. This is the entry
293
+ * the scaffolded CLAUDE.md points the agent at INSTEAD of freehand-writing
294
+ * fact files — which produced wrong-schema, unindexable, username-leaking
295
+ * files in the self-test. Guaranteed-correct because it never touches raw
296
+ * frontmatter.
297
+ *
298
+ * Tier: v0.1.0 writes tier P (project MEMORY.md). U/L need per-tier scratchpad
299
+ * routing (same deferral as mk_remember, design §16) — the always-on home-path
300
+ * abstraction is the privacy net regardless of tier.
301
+ */
302
+ function runRemember(textParts, options) {
303
+ const projectRoot = resolvePath(process.cwd());
304
+ const userDir =
305
+ process.env.MEMORY_KIT_USER_DIR ?? join(homedir(), '.claude-memory-kit');
306
+ const text = Array.isArray(textParts) ? textParts.join(' ') : textParts;
307
+ const tier = options?.tier ?? 'P';
308
+ if (tier !== 'P') {
309
+ console.error(
310
+ `cmk remember: tier '${tier}' not yet supported — v0.1.0 writes the project tier (P). ` +
311
+ 'For machine-only config, edit context.local/machine-paths.md directly (v0.1.x will add --tier routing).',
312
+ );
313
+ process.exitCode = 2;
314
+ return;
315
+ }
316
+ const trust = options?.trust ?? 'high';
317
+ const section = options?.section ?? 'Active Threads';
318
+ const r = memoryWrite({
319
+ action: 'add',
320
+ text,
321
+ tier,
322
+ scratchpad: 'MEMORY.md',
323
+ section,
324
+ trust,
325
+ source: 'user-explicit',
326
+ projectRoot,
327
+ userDir,
328
+ });
329
+ if (r.action === 'error') {
330
+ for (const e of r.errors ?? [`error (${r.errorCategory})`]) {
331
+ console.error(`cmk remember: ${e}`);
332
+ }
333
+ process.exitCode = 2;
334
+ return;
335
+ }
336
+ if (r.action === 'queued') {
337
+ console.log(
338
+ `cmk remember: queued for review — a higher-trust fact already covers this. ` +
339
+ `Resolve with \`cmk queue conflicts\` (${r.path}).`,
340
+ );
341
+ return;
342
+ }
343
+ console.log(
344
+ `cmk remember: saved to P/MEMORY.md (${section})${r.id ? ` [${r.id}]` : ''}`,
345
+ );
346
+ }
347
+
238
348
  function runReindex(options /* , command */) {
239
349
  const projectRoot = resolvePath(process.cwd());
240
350
  const userDir = join(homedir(), '.claude-memory-kit');
@@ -1022,6 +1132,7 @@ export const subcommands = [
1022
1132
  optionSpec: [
1023
1133
  { flags: '--force', description: 'allow downgrade of an existing newer-version CLAUDE.md block' },
1024
1134
  { flags: '--no-hooks', description: 'scaffold only; do NOT wire hooks into .claude/settings.json' },
1135
+ { flags: '--verbose', description: 'show the per-tier created/skipped file breakdown' },
1025
1136
  ],
1026
1137
  action: runInstall,
1027
1138
  },
@@ -1037,6 +1148,18 @@ export const subcommands = [
1037
1148
  milestone: 14,
1038
1149
  action: runInitUserTier,
1039
1150
  },
1151
+ {
1152
+ name: 'remember',
1153
+ description: 'explicitly capture a durable fact to MEMORY.md (Poison_Guard + home-path abstraction + dedup; the safe alternative to hand-writing fact files)',
1154
+ milestone: 24,
1155
+ argSpec: [{ flags: '<text...>', description: 'the fact to remember' }],
1156
+ optionSpec: [
1157
+ { flags: '--tier <tier>', description: 'P (default; U/L are v0.1.x)' },
1158
+ { flags: '--trust <level>', description: 'high | medium | low (default: high)' },
1159
+ { flags: '--section <name>', description: 'MEMORY.md section (default: Active Threads)' },
1160
+ ],
1161
+ action: runRemember,
1162
+ },
1040
1163
  {
1041
1164
  name: 'search',
1042
1165
  description: 'search memory — hybrid keyword + optional semantic',
@@ -19,6 +19,8 @@ import { VALID_TIERS, resolveTierRoot, resolveFactDir } from './tier-paths.mjs';
19
19
  import { parse, format } from './frontmatter.mjs';
20
20
  import { appendAuditEntry, nowIso, REASON_CODES } from './audit-log.mjs';
21
21
  import { ERROR_CATEGORIES, errorResult } from './result-shapes.mjs';
22
+ import { sanitizeHomePaths } from './sanitize.mjs';
23
+ import { checkPoisonGuard, logPoisonGuardRejection } from './poison-guard.mjs';
22
24
 
23
25
  const VALID_TYPES = new Set(['user', 'feedback', 'project', 'reference']);
24
26
  const VALID_WRITE_SOURCES = new Set([
@@ -148,7 +150,48 @@ export function writeFact(opts = {}) {
148
150
  });
149
151
  }
150
152
 
151
- const id = opts.id ?? generateId(opts.tier, opts.body);
153
+ // Privacy (write-path fix #1): abstract absolute home-dir paths to `~` in
154
+ // committed/shared tiers (P/U) so a fact never ships the local username
155
+ // and stays portable. Local tier (L) keeps machine-specific paths verbatim
156
+ // — that's its purpose. The id hashes the SANITIZED body, so dedup keys on
157
+ // what actually lands on disk.
158
+ let { body, title } = opts;
159
+ if (opts.tier === 'P' || opts.tier === 'U') {
160
+ body = sanitizeHomePaths(body);
161
+ title = sanitizeHomePaths(title);
162
+ }
163
+
164
+ // Poison_Guard (write-path fix #1): fact files previously bypassed the
165
+ // secret/poison screen that scratchpad writes get via memoryWrite. Screen
166
+ // the (sanitized) body before any disk write; a rejection logs the redacted
167
+ // excerpt to .locks/poison-guard.log and returns a poison_guard error.
168
+ const guard = checkPoisonGuard(body);
169
+ if (guard.rejected) {
170
+ // Best-effort log; guard on projectRoot so a U-tier write with no
171
+ // project context can't turn a clean rejection into a crash.
172
+ if (guard.pattern_id !== 'schema' && opts.projectRoot) {
173
+ logPoisonGuardRejection({
174
+ projectRoot: opts.projectRoot,
175
+ ts: opts.createdAt ?? nowIso(),
176
+ pattern_id: guard.pattern_id,
177
+ source_file: `write-fact:${opts.type}_${opts.slug}`,
178
+ source_line: 1,
179
+ redacted_excerpt: guard.redacted_excerpt,
180
+ });
181
+ }
182
+ return errorResult({
183
+ category: ERROR_CATEGORIES.POISON_GUARD,
184
+ errors: [`Poison_Guard rejected write: pattern_id=${guard.pattern_id}`],
185
+ pattern_id: guard.pattern_id,
186
+ redacted_excerpt: guard.redacted_excerpt,
187
+ id: null,
188
+ path: null,
189
+ });
190
+ }
191
+
192
+ // Use the sanitized body/title for id, frontmatter, and the file body.
193
+ const factOpts = { ...opts, body, title };
194
+ const id = opts.id ?? generateId(opts.tier, body);
152
195
  const createdAt = opts.createdAt ?? nowIso();
153
196
  const tierRoot = resolveTierRoot(opts);
154
197
  const factDir = resolveFactDir(opts.tier, tierRoot);
@@ -198,8 +241,8 @@ export function writeFact(opts = {}) {
198
241
  }
199
242
 
200
243
  mkdirSync(factDir, { recursive: true });
201
- const frontmatter = buildFrontmatterObject(opts, { id, createdAt });
202
- writeFileSync(path, format({ frontmatter, body: `\n${opts.body}\n` }), 'utf8');
244
+ const frontmatter = buildFrontmatterObject(factOpts, { id, createdAt });
245
+ writeFileSync(path, format({ frontmatter, body: `\n${factOpts.body}\n` }), 'utf8');
203
246
 
204
247
  return { action: 'created', id, path };
205
248
  }
@@ -26,17 +26,21 @@ Health checks (HC-1..HC-8) verify each layer is wired correctly: install integri
26
26
 
27
27
  ### Memory write rules (for Claude)
28
28
 
29
- When you learn something durable about this project or the user:
29
+ Most capture is automatic — the Stop hook extracts durable facts each turn, no action needed. When you want to capture something **explicitly**:
30
30
 
31
- 1. **Working state** (current threads, today's environment, open decisions) write to `context/MEMORY.md` (≤2,500 chars). Consolidate at the cap.
32
- 2. **Typed durable fact** (user role / project decision / feedback / external reference) → create `context/memory/<type>_<slug>.md` with full YAML frontmatter; add a one-line entry to `context/memory/INDEX.md`.
33
- 3. **Cross-project lesson** user-tier `~/.claude-memory-kit/LESSONS.md` (via `cmk lessons promote`).
34
- 4. **Never duplicate** between scratchpad and granular archive. If a working-state item becomes durable, MOVE it.
35
- 5. **Confirm silently.** Frozen-snapshot semantics mean the write takes effect next session.
31
+ 1. **Use `cmk remember "<the fact>"`** — do NOT hand-write files under `context/memory/`. The command routes through the kit's safe write path: it screens for secrets (Poison_Guard), abstracts machine-specific home paths to `~` (so a committed fact never leaks the local username), dedups, and writes the correct schema. Hand-writing fact files bypasses all of that and produces files the index can't read.
32
+ ```bash
33
+ cmk remember "We deploy with Kamal to Hetzner; never to Vercel."
34
+ cmk remember "Lior prefers terse responses, no preamble." --trust high
35
+ ```
36
+ 2. **Machine-specific config** (absolute tool paths that only make sense on this machine) → put it in `context.local/machine-paths.md` (gitignored, never committed). `cmk remember` writes the committed project tier; for machine-only paths, edit the local tier directly.
37
+ 3. **Cross-project lesson** (true on every project, not just this one) → `cmk lessons promote <id>` moves a project fact to the user tier (`~/.claude-memory-kit/`).
38
+ 4. **Confirm silently.** Don't announce captures. Frozen-snapshot semantics mean a write takes effect next session.
36
39
 
37
40
  ### Privacy
38
41
 
39
- Anything inside `<private>...</private>` tags in a user prompt is stripped before any disk write — never persisted in any form. Per-fact `private: true` frontmatter excludes a fact from the session-start digest.
42
+ - Anything inside `<private>...</private>` tags in a user prompt is stripped before any disk write — never persisted in any form.
43
+ - `cmk remember` (and auto-extract) abstract absolute home-dir paths (`C:\Users\you\…`, `/home/you/…`, `/Users/you/…`) to `~` before writing to a committed/shared tier, so a fact never ships your username and stays portable across machines. Genuinely machine-specific paths belong in `context.local/` (gitignored).
40
44
 
41
45
  ### Uninstall / remove this block
42
46