npm - @lh8ppl/claude-memory-kit - Versions diffs - 0.3.0 → 0.3.1 - Mend

@lh8ppl/claude-memory-kit 0.3.0 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (31) hide show

package/README.md +6 -3
package/package.json +1 -1
package/src/audit-log.mjs +1 -0
package/src/auto-drain.mjs +17 -1
package/src/auto-extract.mjs +4 -5
package/src/auto-persona.mjs +86 -1
package/src/capture-prompt.mjs +2 -1
package/src/config-core.mjs +161 -0
package/src/conflict-queue.mjs +2 -2
package/src/content-hash.mjs +30 -0
package/src/doctor.mjs +62 -3
package/src/import-anthropic-memory.mjs +2 -2
package/src/import-claude-md.mjs +333 -0
package/src/index-rebuild.mjs +6 -2
package/src/index.mjs +10 -0
package/src/inject-context.mjs +130 -1
package/src/install.mjs +75 -2
package/src/mcp-server.mjs +6 -1
package/src/memory-health.mjs +229 -0
package/src/memory-write.mjs +32 -10
package/src/native-binding.mjs +142 -0
package/src/poison-guard.mjs +55 -0
package/src/remember-core.mjs +53 -8
package/src/repair.mjs +20 -3
package/src/semantic-backend.mjs +114 -0
package/src/subcommands.mjs +268 -27
package/src/transcript-index.mjs +5 -2
package/src/write-fact.mjs +34 -3
package/template/.claude/skills/memory-search/SKILL.md +1 -1
package/template/.gitattributes.fragment +16 -0
package/template/CLAUDE.md.template +1 -1

package/README.md CHANGED Viewed

@@ -11,8 +11,9 @@
 - **Explicit capture when you want it** — say "remember this" / "from now on" / "we decided" / "forget X" (the `memory-write` skill), or run `cmk remember "<fact>"`. Both dedup, screen for secrets, abstract machine paths to `~`, and write silently. For backtick/quote-heavy rich facts, capture them shell-safe as JSON: `cmk remember --from-file fact.json` (or `--json` from stdin) — content never touches the shell.
 - **Search + MCP — Claude runs every memory op for you, in conversation** — `cmk search "<term>"` (keyword over facts + scratchpads; with the optional local embedder, **semantic + hybrid recall**: ask in your own words and get the fact even with zero keyword overlap — measured R@5 0.941 / paraphrase 1.000 on the kit's benchmark, no API calls). `cmk install` registers the kit's **MCP server**, so Claude can do the whole memory surface as tools without you ever typing `cmk`: capture (`mk_remember`, rich Why/How too), recall (`mk_search` / `mk_get` / `mk_timeline` / `mk_cite`), adjust trust (`mk_trust`), promote a fact across projects (`mk_lessons_promote`), forget (`mk_forget` — previews first, then deletes on confirm), and clear the review/conflict queues (`mk_queue_list` / `mk_queue_resolve`). The tools are allow-listed on install, so they run prompt-free.
 - **Bounded by compression** — session → daily → weekly Haiku rollups (cron or lazy-on-read) keep the snapshot small as history grows. The session-buffer rollup self-heals at session start too, so memory stays bounded even if you never cleanly close the window.
+- **Don't start empty — import the rules you already own** — `cmk import-claude-md` parses an existing `CLAUDE.md` / `.cursorrules` / `AGENTS.md` into typed, searchable facts through the same safe write path (secret screening, sanitization, dedup), with provenance back to source file + line. `--dry-run` previews first.
 - **Per-project, in-repo** — `context/` lives inside your project and travels with `git clone`. Each project keeps its own memory.
-- **7 health checks** — `cmk doctor` validates hook wiring, distill freshness, transcript firing, INDEX consistency, cron registration, native-memory coexistence, and stale locks — each failure with a repair command.
+- **8 health checks** — `cmk doctor` validates hook wiring, distill freshness, transcript firing, INDEX consistency, cron registration, native-memory coexistence, stale locks, and native-binding health (npm 12 readiness) — each failure with a repair command.
 ## Install — pick ONE route
@@ -26,10 +27,11 @@ cd ~/my-project
 cmk install        # scaffolds context/ + the memory-write + memory-search skills AND wires the lifecycle hooks into .claude/settings.json
 cmk install --with-semantic   # (optional) local semantic recall — one-time ~260 MB, search defaults to hybrid
 cmk register-crons            # (optional) scheduled background compression — otherwise self-heals lazily
+cmk import-claude-md --yes    # (optional) seed memory from an existing CLAUDE.md / .cursorrules (--dry-run previews)
 cmk doctor         # verify, then restart Claude Code
 ```
-`cmk install` is a complete entry point: it scaffolds `context/`, drops the `memory-write` + `memory-search` skills into `.claude/skills/` (committed — travels with `git clone`), and writes the 5 lifecycle hooks (PATH-resolved, cross-OS) into the project's `.claude/settings.json`. It also **registers the kit's MCP server** in `.mcp.json` and allow-lists its tools (`mcp__cmk__*`) in `.claude/settings.json`, so Claude can drive memory as tools with no per-call prompt. No separate `/plugin` step needed. Use `cmk install --no-hooks` to skip the hooks + MCP wiring (scaffold-only).
+`cmk install` is a complete entry point: it scaffolds `context/`, drops the `memory-write` + `memory-search` skills into `.claude/skills/` (committed — travels with `git clone`), and writes the 5 lifecycle hooks (PATH-resolved, cross-OS) into the project's `.claude/settings.json`. It also **registers the kit's MCP server** in `.mcp.json` and allow-lists its tools (`mcp__cmk__*`) in `.claude/settings.json`, so Claude can drive memory as tools with no per-call prompt, and writes a `.gitattributes` block pinning committed memory to LF (so a Windows clone can't mangle line endings — your memory stays readable cross-platform). No separate `/plugin` step needed. Use `cmk install --no-hooks` to skip the hooks + MCP wiring (scaffold-only).
 > Installing the package globally adds the `cmk` CLI **and** the installer. It's the `cmk install` *subcommand* that wires the hooks — not the bare `npm install`.
@@ -51,7 +53,7 @@ Most-used commands (full list via `cmk --help`):
 | Command | Purpose |
 | --- | --- |
 | `cmk install` | Scaffold `context/` + the `memory-write`/`memory-search` skills + `.gitignore` + CLAUDE.md block + wire hooks (`--no-hooks` for scaffold-only) |
-| `cmk doctor` | Run HC-1..HC-7 health checks, surface repair commands |
+| `cmk doctor` | Run HC-1..HC-8 health checks, surface repair commands |
 | `cmk repair --hooks` / `--locks` / `--index` / `--all` | Idempotent self-repair |
 | `cmk search "<query>" [--mode keyword\|semantic\|hybrid] [--scope facts\|transcripts]` | Search memory — by meaning with the embedder (hybrid default after `--with-semantic`); `--scope transcripts` = the raw session record |
 | `cmk get <id…>` / `cmk timeline <id>` / `cmk cite <id>` / `cmk recent-activity` | Read the index back — full fact bodies + provenance, sequential context around an observation, a canonical citation link, recent changes (the CLI side of the `mk_*` MCP read tools) |
@@ -63,6 +65,7 @@ Most-used commands (full list via `cmk --help`):
 | `cmk persona generate` | Run cross-project persona synthesis on demand (instead of waiting for the weekly pass) |
 | `cmk persona export <file>` / `import <file>` | Carry your cross-project persona (the user tier) to another of **your** machines — export to one portable bundle, import on the other (overwrites with backup + rollback). The persona stays private (never committed to a project) |
 | `cmk import-anthropic-memory [--dry-run] [--yes]` | Merge bullets from Anthropic's native auto-memory into MEMORY.md |
+| `cmk import-claude-md [file] [--dry-run] [--yes]` | Onboard from the rules you already own — parse an existing `CLAUDE.md` / `.cursorrules` / `AGENTS.md` into typed facts through the safe write path (Poison_Guard + sanitization + dedup) |
 ## Requirements

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@lh8ppl/claude-memory-kit",
-  "version": "0.3.0",
+  "version": "0.3.1",
   "description": "cmk — the CLI for claude-memory-kit. Per-project, in-repo memory system for Claude Code.",
   "type": "module",
   "bin": {

package/src/audit-log.mjs CHANGED Viewed

@@ -33,6 +33,7 @@ export const REASON_CODES = Object.freeze({
   FACT_CREATED: 'fact-created', // writeFact: a new fact file was written (Task 123.A — the default create audit; callers emitting a richer code opt out via audit:false)
   DUPLICATE: 'duplicate', // writeFact: same path + same id
   DUPLICATE_ELSEWHERE: 'duplicate-elsewhere', // writeFact: different path + same id
+  INDEX_REBUILD_FAILED: 'index-rebuild-failed', // writeFact: the fact landed on disk but the best-effort INDEX.md rebuild threw (e.g. a detached auto-extract child killed mid-rebuild). Surfaces what was previously a SILENTLY swallowed catch (D-152) so a lagging committed INDEX is diagnosable; the next reindex/cmk reindex self-heals.
   USER_REQUESTED: 'user-requested', // forget: user-initiated tombstone
   CURATED_MERGE: 'curated-merge', // mergeFacts: explicit merge of A + B → C
   SCRATCHPAD_APPEND: 'scratchpad-append', // scratchpad: appendScratchpadBullet (Task 12)

package/src/auto-drain.mjs CHANGED Viewed

@@ -22,6 +22,7 @@
 import { resolveReviewQueue } from './review-queue.mjs';
 import { resolveConflictQueue } from './conflict-queue.mjs';
+import { resolvePersonaReviewQueue } from './auto-persona.mjs';
 import { mergeFacts } from './merge-facts.mjs';
 // Stateless optimistic resolvers (no per-entry judgement — that's the point).
@@ -55,5 +56,20 @@ export async function autoDrainQueues({ tier = 'P', projectRoot, userDir, scratc
     mergeFn: mergeFacts, // never invoked under KEEP_OLD; wired for correctness
   });
-  return { review, conflict };
+  // Persona-review queue (D-154): the medium-confidence cross-project persona
+  // candidates that were ROUTED here with the promise of an auto-drain that was
+  // never implemented — so they stranded (the v0.3.1 cold-open found the user's
+  // architecture philosophy stuck here). Drain it optimistically like the review
+  // queue (sync; userDir-scoped so it runs regardless of `tier`). Best-effort: a
+  // persona-drain hiccup must not fail the project-tier review/conflict drain.
+  let persona = { promoted: 0, drained: 0, queuePath: null };
+  if (userDir) {
+    try {
+      persona = resolvePersonaReviewQueue({ userDir });
+    } catch {
+      // best-effort; the queue file survives for the next pass
+    }
+  }
+  return { review, conflict, persona };
 }

package/src/auto-extract.mjs CHANGED Viewed

@@ -48,8 +48,8 @@ import {
   appendFileSync,
 } from 'node:fs';
 import { join, dirname } from 'node:path';
-import { createHash } from 'node:crypto';
 import { generateId } from '@lh8ppl/cmk-canonicalize';
+import { hashContent } from './content-hash.mjs';
 import { memoryWrite } from './memory-write.mjs';
 import { writeFact } from './write-fact.mjs';
 import { buildRichFactBody, slugifyFact } from './rich-fact.mjs';
@@ -663,10 +663,9 @@ function routeRichFact({ candidate, projectRoot, ts }) {
     sourceFile: 'auto-extract',
     sourceLine: 1,
     // Content fingerprint for the provenance field — NOT a security context.
-    // Matches the kit's sha1-of-content convention (write-fact.mjs caller in
-    // subcommands.runRememberRich, memory-write.mjs); writeFact dedups by the
-    // content-addressed id, this is just source_sha1. // NOSONAR
-    sourceSha1: createHash('sha1').update(body).digest('hex'), // NOSONAR
+    // Routes through the shared hashContent (SHA-256, D-149); writeFact dedups
+    // by the content-addressed id, this is just source_sha1 metadata.
+    sourceSha1: hashContent(body),
     createdAt: ts,
     projectRoot,
   });

package/src/auto-persona.mjs CHANGED Viewed

@@ -38,7 +38,7 @@
 // promotion primitive), audit-log, result-shapes, cooldown, compressor.
 // Per design §16.16 + §6.2 (conflict) + §6.8 (auto-drain) + §8.3 + tasks.md 45.
-import { readFileSync, appendFileSync, mkdirSync, existsSync, readdirSync } from 'node:fs';
+import { readFileSync, writeFileSync, appendFileSync, mkdirSync, existsSync, readdirSync } from 'node:fs';
 import { join, dirname } from 'node:path';
 import { generateId } from '@lh8ppl/cmk-canonicalize';
 import { ERROR_CATEGORIES, errorResult } from './result-shapes.mjs';
@@ -425,6 +425,91 @@ export function appendPersonaReviewQueue({ userDir, entries, now }) {
   return queuePath;
 }
+// Parse persona-review.md back into candidate objects. The queue lines are
+//   - (U-XXXXXXXX) [TARGET § SECTION] <text>
+//     <!-- target: TARGET, section: SECTION, confidence: C, reason: ..., ... -->
+// The HTML comment is authoritative for target/section/confidence (the bracket
+// prefix is human-readable redundancy); fall back to the bracket if absent.
+// ReDoS-safe: NEGATED character classes (not lazy `.+?...+?` pairs) so the regex
+// is linear — each group matches "anything but the delimiter that ends it", which
+// cannot backtrack across that delimiter (the canonicalize stripTrailingPunct
+// lesson — Task 140 / D-143 — applied at write).
+const PERSONA_QUEUE_LINE_RE = /^- \([UPL]-[^)]+\)\s+\[([^§\]]+)§([^\]]+)\]\s+(\S.*)$/;
+// No `\s*` sits adjacent to a `[^,]+` capture: `\s*` and `[^,]+` both match a
+// space, and that overlap is the super-linear-backtracking ambiguity Sonar
+// flags. Each value is captured by `[^,]+` (which absorbs leading/trailing
+// space — we `.trim()` below), with the `,` and label as fixed delimiters.
+const PERSONA_QUEUE_META_RE = /target:([^,]+),\s*section:([^,]+),\s*confidence:\s*(\w+)/;
+export function parsePersonaReviewQueue(text) {
+  const lines = (text ?? '').split(/\r?\n/);
+  const candidates = [];
+  for (let i = 0; i < lines.length; i++) {
+    const m = PERSONA_QUEUE_LINE_RE.exec(lines[i].trim());
+    if (!m) continue;
+    let [, target, section, body] = m;
+    let confidence = 'medium';
+    const meta = PERSONA_QUEUE_META_RE.exec(lines[i + 1] ?? '');
+    if (meta) {
+      target = meta[1].trim();
+      section = meta[2].trim();
+      confidence = meta[3].trim().toLowerCase();
+    }
+    candidates.push({ target: target.trim(), section: section.trim(), confidence, text: body.trim() });
+  }
+  return candidates;
+}
+/**
+ * Auto-drain the persona-review queue (the down-payment for Task 151 / D-154).
+ *
+ * The medium-confidence persona candidates were ROUTED to persona-review.md with
+ * the documented promise that "the daily/weekly auto-drain acts on them" — but
+ * that drain was never implemented, so they STRANDED (the v0.3.1 cold-open found
+ * the user's architecture philosophy stuck here, never reaching the persona).
+ * This makes the promise real: the same optimistic auto-promote the review queue
+ * already gets (D-6) — trust the synthesis, mistakes self-correct via `cmk forget`
+ * (the post-hoc-reversibility model every surveyed memory system uses instead of
+ * a pre-promotion human gate). NOT a manual command: runs inside autoDrainQueues
+ * on the daily/weekly maintenance passes. The full recurrence-scored redesign is
+ * Task 151 (v0.4); this just stops the stranding.
+ *
+ * @returns {{promoted: number, drained: number, queuePath: string|null}}
+ */
+export function resolvePersonaReviewQueue({ userDir, now, settings } = {}) {
+  const userTierRoot = resolveTierRoot({ tier: 'U', userDir });
+  const queuePath = join(userTierRoot, 'queues', 'persona-review.md');
+  let text;
+  try {
+    text = readFileSync(queuePath, 'utf8');
+  } catch {
+    return { promoted: 0, drained: 0, queuePath: null }; // no queue → nothing to drain
+  }
+  const candidates = parsePersonaReviewQueue(text);
+  if (candidates.length === 0) return { promoted: 0, drained: 0, queuePath };
+  // Re-feed through the SAME promote path the synthesis uses (home-path sanitize
+  // + Poison_Guard + dedup + audit all inherited). OPTIMISTIC AUTO-DRAIN: these
+  // candidates already SURVIVED a synthesis pass without being superseded; the
+  // drain IS the decision to promote them (the field-standard "auto-promote then
+  // post-hoc revert via cmk forget" posture — see the persona-promotion research
+  // note). So force confidence:'high' to clear promoteCandidatesToUserTier's
+  // confidence gate — otherwise they'd re-queue forever (the gate that stranded
+  // them in the first place). The full recurrence-scored model is Task 151 (v0.4).
+  const promotable = candidates.map((c) => ({ ...c, confidence: 'high' }));
+  const r = promoteCandidatesToUserTier({ candidates: promotable, userDir, now, settings });
+  const promoted = r.promoted?.length ?? 0;
+  // Clear the queue — the candidates are now resolved (promoted or de-duped into
+  // existing persona). Leave a tombstone header so the file isn't silently empty.
+  const ts = now ?? new Date().toISOString().replace(/\.\d{3}Z$/, 'Z');
+  writeFileSync(
+    queuePath,
+    `<!-- persona-review queue — auto-drained ${ts}: ${candidates.length} candidate(s) promoted to the persona. -->\n`,
+    'utf8',
+  );
+  return { promoted, drained: candidates.length, queuePath };
+}
 export function promoteCandidatesToUserTier({ candidates, userDir, now, settings, trust = 'medium', source = 'persona-synthesis' }) {
   // `trust`/`source` default to the AUTO-persona posture (medium, system-derived
   // — 45.6). The EXPLICIT path (`cmk lessons promote`) passes trust:'high' +

package/src/capture-prompt.mjs CHANGED Viewed

@@ -63,7 +63,8 @@ export function buildMemoryHint({ projectRoot, prompt } = {}) {
   }
   return (
     '[claude-memory-kit] Recorded memory available beyond the session snapshot — ' +
-    'use the memory-search skill when the answer may already be recorded (prior decisions, history, conventions).'
+    'use the memory-search skill when the answer may already be recorded (prior decisions, history, conventions, ' +
+    'project structure/architecture, where things live). Recall it; do not re-read the code to reconstruct it.'
   );
 }

package/src/config-core.mjs ADDED Viewed

@@ -0,0 +1,161 @@
+// `cmk config get/set/--show-origin` core (Task 129, D-121).
+//
+// The v0.1.0 stub became real the day `--with-semantic` shipped:
+// context/settings.json now carries a user-facing setting
+// (search.default_mode) and hand-editing JSON was the only path. This is
+// the read-merge-write surface over the kit's settings files.
+//
+// Settings live in `<tier-root>/settings.json` for each of the three tiers
+// (resolveTierRoot — the shared module, not re-derived). Resolution
+// precedence mirrors the kit's memory model + git config semantics:
+//   local (context.local/) > project (context/) > user (~/.claude-memory-kit/)
+// A `get` returns the highest-precedence tier that defines the dotted key;
+// `--show-origin` lists every tier that defines it (winner + shadowed), the
+// direnv lesson (design §7.2: "without --show-origin, users rage-quit when
+// settings appear from nowhere"). `set` writes one tier (project default),
+// preserving every sibling key (the mergeProjectSettings discipline,
+// generalized per tier).
+//
+// Scope (D-121): the kit's own JSON settings files. NOT the richer
+// settings-or-observation `--show-origin` sketch in design §7.2's example
+// (observations have their own provenance/shadowed_by surface, §6); this is
+// the concrete settings half the semantic default forced into existence.
+import { existsSync, mkdirSync, readFileSync, writeFileSync } from 'node:fs';
+import { dirname, join } from 'node:path';
+import { resolveTierRoot } from './tier-paths.mjs';
+// Highest-precedence first.
+const TIERS = Object.freeze([
+  { name: 'local', tier: 'L' },
+  { name: 'project', tier: 'P' },
+  { name: 'user', tier: 'U' },
+]);
+// Keys that would pollute the prototype chain — rejected on both read and
+// write. `cmk config set __proto__.x y` must never reach Object.prototype
+// (skill-review blocking finding); a key path containing any of these is
+// invalid, not a silent no-op.
+const FORBIDDEN_KEYS = new Set(['__proto__', 'constructor', 'prototype']);
+function hasForbiddenSegment(dottedKey) {
+  return dottedKey.split('.').some((p) => FORBIDDEN_KEYS.has(p));
+}
+function settingsPathFor(tierName, { projectRoot, userDir }) {
+  const tier = TIERS.find((t) => t.name === tierName)?.tier;
+  return join(resolveTierRoot({ tier, projectRoot, userDir }), 'settings.json');
+}
+function readSettings(path) {
+  if (!existsSync(path)) return null;
+  try {
+    return JSON.parse(readFileSync(path, 'utf8'));
+  } catch {
+    // A malformed settings file is treated as absent for resolution — never
+    // throw on a read (a hand-broken JSON shouldn't crash `cmk config get`).
+    return null;
+  }
+}
+// Walk a dotted path; returns {found, value}. `found` distinguishes a key
+// set to `undefined`-ish from a key that isn't there (the honesty contract).
+function dig(obj, dottedKey) {
+  if (obj == null || typeof obj !== 'object') return { found: false };
+  const parts = dottedKey.split('.');
+  let cur = obj;
+  for (const p of parts) {
+    if (cur == null || typeof cur !== 'object' || !(p in cur)) return { found: false };
+    cur = cur[p];
+  }
+  return { found: true, value: cur };
+}
+/**
+ * Resolve a dotted setting key across tiers (local > project > user).
+ *
+ * @returns {{found: boolean, value?: *, tier?: 'local'|'project'|'user'}}
+ */
+export function configGet(key, { projectRoot, userDir } = {}) {
+  if (!key || !String(key).trim()) return { found: false };
+  if (hasForbiddenSegment(key)) return { found: false };
+  for (const { name } of TIERS) {
+    const settings = readSettings(settingsPathFor(name, { projectRoot, userDir }));
+    const hit = dig(settings, key);
+    if (hit.found) return { found: true, value: hit.value, tier: name };
+  }
+  return { found: false };
+}
+/** Scalar coercion: true/false/null → primitives, integer/float strings →
+ *  numbers, everything else stays a string. JSON settings are typed, and a
+ *  CLI arg is always a string — `cmk config set x true` should write a bool. */
+function coerce(raw) {
+  if (raw === 'true') return true;
+  if (raw === 'false') return false;
+  if (raw === 'null') return null;
+  if (/^-?\d+$/.test(raw)) return Number.parseInt(raw, 10);
+  if (/^-?\d*\.\d+$/.test(raw)) return Number.parseFloat(raw);
+  return raw;
+}
+function setDeep(obj, dottedKey, value) {
+  const parts = dottedKey.split('.');
+  let cur = obj;
+  for (let i = 0; i < parts.length - 1; i++) {
+    const p = parts[i];
+    if (cur[p] == null || typeof cur[p] !== 'object' || Array.isArray(cur[p])) cur[p] = {};
+    cur = cur[p];
+  }
+  cur[parts[parts.length - 1]] = value;
+}
+/**
+ * Set a dotted key in one tier's settings.json (project default), preserving
+ * every sibling key (read-merge-write).
+ *
+ * @returns {{ok: boolean, tier?: string, path?: string, error?: string}}
+ */
+export function configSet(key, rawValue, { projectRoot, userDir, tier = 'project' } = {}) {
+  if (!key || !String(key).trim()) return { ok: false, error: 'key is required (dotted path)' };
+  if (hasForbiddenSegment(key)) {
+    return { ok: false, error: `key contains a forbidden segment (${[...FORBIDDEN_KEYS].join('/')}) — prototype-pollution guard` };
+  }
+  if (!TIERS.some((t) => t.name === tier)) {
+    return { ok: false, error: `tier must be one of local/project/user (got ${tier})` };
+  }
+  const path = settingsPathFor(tier, { projectRoot, userDir });
+  try {
+    const current = readSettings(path) ?? {};
+    setDeep(current, key, coerce(String(rawValue)));
+    mkdirSync(dirname(path), { recursive: true });
+    writeFileSync(path, JSON.stringify(current, null, 2) + '\n', 'utf8');
+    return { ok: true, tier, path };
+  } catch (err) {
+    return { ok: false, error: err?.message ?? String(err) };
+  }
+}
+/**
+ * Every tier that defines the key, highest-precedence first. The winner is
+ * the first; the rest carry `shadowedBy` = the winning tier (the direnv
+ * "where did this come from?" surface).
+ *
+ * @returns {{found: boolean, entries: Array<{tier, value, path, winner, shadowedBy?}>}}
+ */
+export function configShowOrigin(key, { projectRoot, userDir } = {}) {
+  const entries = [];
+  if (!key || !String(key).trim()) return { found: false, entries };
+  if (hasForbiddenSegment(key)) return { found: false, entries };
+  for (const { name } of TIERS) {
+    const path = settingsPathFor(name, { projectRoot, userDir });
+    const hit = dig(readSettings(path), key);
+    if (hit.found) entries.push({ tier: name, value: hit.value, path });
+  }
+  if (entries.length === 0) return { found: false, entries: [] };
+  const winnerTier = entries[0].tier;
+  for (let i = 0; i < entries.length; i++) {
+    entries[i].winner = i === 0;
+    if (i > 0) entries[i].shadowedBy = winnerTier;
+  }
+  return { found: true, entries };
+}

package/src/conflict-queue.mjs CHANGED Viewed

@@ -50,7 +50,7 @@ import {
 import { join } from 'node:path';
 import { resolveTierRoot, VALID_TIERS } from './tier-paths.mjs';
 import { writeBullet } from './provenance.mjs';
-import { createHash } from 'node:crypto';
+import { hashContent } from './content-hash.mjs';
 import { nowIso, appendAuditEntry, REASON_CODES } from './audit-log.mjs';
 import { ERROR_CATEGORIES, errorResult } from './result-shapes.mjs';
 import { generateId } from '@lh8ppl/cmk-canonicalize';
@@ -792,7 +792,7 @@ export function mergeScratchpadBullets({
   // no `write:` key, so the first reindex after a merge-both resolution hit
   // the NOT-NULL observations.write_source constraint. Canonical shape via
   // the shared builder; the merged_from trail lives in the audit entry below.
-  const sha1 = createHash('sha1').update(combinedText, 'utf8').digest('hex');
+  const sha1 = hashContent(combinedText);
   const formatted = writeBullet({
     id: newId,
     text: combinedText,

package/src/content-hash.mjs ADDED Viewed

@@ -0,0 +1,30 @@
+// Content-fingerprint helper — the single home for the kit's content hash.
+//
+// Every "fingerprint this text/file content" site (provenance source_sha1,
+// the `files` checkpoint diff key, transcript dedup, conflict-merge keys)
+// MUST route through hashContent so the algorithm is defined in exactly one
+// place. Eight modules previously rolled their own `createHash('sha1')`,
+// which (a) let the algorithm drift per-site and (b) tripped CodeQL's
+// js/weak-cryptographic-algorithm on each one independently.
+//
+// SHA-256, not SHA-1: the digests are non-cryptographic content fingerprints
+// (dedup + change-detection), so SHA-1 was never a security flaw here — but a
+// weak-hash sink on every site is noise that hides real findings, and the
+// whole-convention move to SHA-256 (the user's call, D-149) removes the sink
+// kit-wide while keeping the digest consistent across writers. The on-disk
+// FIELD name stays `source_sha1` / `sha1` for back-compat (renaming the YAML
+// key + db column would break existing fact files + checkpoints); only the
+// algorithm changes. Existing `files`-table checkpoints mismatch once on the
+// first boot after upgrade and self-heal via the normal reindex.
+import { createHash } from 'node:crypto';
+/**
+ * Hash text/file content to a hex digest used as a non-cryptographic
+ * fingerprint (dedup, drift-detection, provenance). UTF-8 input.
+ * @param {string} content
+ * @returns {string} 64-char lowercase hex SHA-256 digest
+ */
+export function hashContent(content) {
+  return createHash('sha256').update(content, 'utf8').digest('hex');
+}

package/src/doctor.mjs CHANGED Viewed

@@ -1,4 +1,4 @@
-// `cmk doctor` — health checks HC-1..HC-7 (Task 37, T-031; memsearch HC-1/HC-7 removed in Task 120).
+// `cmk doctor` — health checks HC-1..HC-8 (Task 37, T-031; memsearch HC-1/HC-7 removed in Task 120; HC-8 native bindings added in Task 141a).
 //
 // Public boundary:
 //   async runDoctor({projectRoot, userDir, now, promptUser?, ...overrides})
@@ -44,6 +44,8 @@ import { nowIso } from './audit-log.mjs';
 import { detectStaleLocks } from './lock-discipline.mjs';
 import { cronSentinelPath } from './lazy-compress.mjs';
 import { getNativeAutoMemoryState } from './native-memory.mjs';
+import { checkKitBinding, checkEmbedderBinding } from './native-binding.mjs';
+import { resolveDefaultSearchMode } from './semantic-backend.mjs';
 const TWO_DAYS_MS = 2 * 24 * 60 * 60 * 1000;
 const THREE_DAYS_MS = 3 * 24 * 60 * 60 * 1000;
@@ -470,13 +472,67 @@ function hc7StaleLocks({ projectRoot, userDir }) {
   };
 }
+// --- HC-8: native bindings present (npm 12 readiness, Task 141a) -------
+// The BACKSTOP, not the primary UX: `cmk install` probes + asks inline
+// (the user's 2026-06-12 steer); HC-8 catches the after-the-fact states
+// (npm upgraded later, package reinstalled without the allow flag).
+// The repair is an `npm install -g` → requiresInstall per the design §14
+// ask-before-install rule.
+async function hc8NativeBindings({ projectRoot, kitBindingProbe, embedderBindingProbe }) {
+  const kitProbe = kitBindingProbe ?? checkKitBinding;
+  const kit = kitProbe();
+  if (!kit.ok) {
+    return {
+      id: 'HC-8',
+      name: 'Native bindings present (npm 12 readiness)',
+      status: 'fail',
+      message: `better-sqlite3 native binding unavailable (${kit.reason}) — most common cause: npm 12 blocks dependency install scripts by default, so a fresh install skips the binding build (a Node major upgrade is the other); search/reindex will crash until it is rebuilt`,
+      recoveryCommand: kit.remedy,
+      requiresInstall: true,
+    };
+  }
+  // The embedder matters only when this project actually defaults to it.
+  const mode = resolveDefaultSearchMode({ projectRoot });
+  if (mode === 'keyword') {
+    return {
+      id: 'HC-8',
+      name: 'Native bindings present (npm 12 readiness)',
+      status: 'pass',
+      message: 'better-sqlite3 binding healthy (semantic not configured — embedder not checked)',
+    };
+  }
+  const embedderProbe = embedderBindingProbe ?? checkEmbedderBinding;
+  const embedder = await embedderProbe();
+  if (!embedder.ok) {
+    const state = embedder.installed
+      ? `installed but its native binding failed (${embedder.reason}) — npm 12 blocks onnxruntime-node's install script by default`
+      : `not installed, but search.default_mode is '${mode}'`;
+    return {
+      id: 'HC-8',
+      name: 'Native bindings present (npm 12 readiness)',
+      status: 'fail',
+      message: `semantic embedder ${state}; searches degrade to keyword until fixed`,
+      recoveryCommand: embedder.remedy,
+      requiresInstall: true,
+    };
+  }
+  return {
+    id: 'HC-8',
+    name: 'Native bindings present (npm 12 readiness)',
+    status: 'pass',
+    message: `better-sqlite3 binding healthy; embedder import OK (default mode: ${mode}; the deep pipeline check runs at --with-semantic warm)`,
+  };
+}
 /**
- * Run the full 7-check health audit.
+ * Run the full 8-check health audit.
  *
  * @param {object} opts
  * @param {string} opts.projectRoot
  * @param {string} [opts.userDir]
  * @param {string} [opts.now]
+ * @param {Function} [opts.kitBindingProbe] - HC-8 test seam.
+ * @param {Function} [opts.embedderBindingProbe] - HC-8 test seam.
  * @returns {Promise<{action, checks, duration_ms}>}
  *
  * Note: M3 fix (skill-review 2026-05-28) dropped the v0.1.0 `promptUser`
@@ -489,6 +545,8 @@ export async function runDoctor({
   projectRoot,
   userDir,
   now,
+  kitBindingProbe,
+  embedderBindingProbe,
 } = {}) {
   const t0 = Date.now();
   if (!projectRoot) {
@@ -510,10 +568,11 @@ export async function runDoctor({
   const c5 = hc5CronRegistered({ projectRoot });
   const c6 = hc6NativeAutoMemory({ projectRoot, now: ts });
   const c7 = hc7StaleLocks({ projectRoot, userDir: resolvedUserDir });
+  const c8 = await hc8NativeBindings({ projectRoot, kitBindingProbe, embedderBindingProbe });
   return {
     action: 'completed',
-    checks: [c1, c2, c3, c4, c5, c6, c7],
+    checks: [c1, c2, c3, c4, c5, c6, c7, c8],
     duration_ms: Date.now() - t0,
   };
 }

package/src/import-anthropic-memory.mjs CHANGED Viewed

@@ -40,7 +40,7 @@ import {
 } from './audit-log.mjs';
 import { ERROR_CATEGORIES, errorResult } from './result-shapes.mjs';
 import { writeBullet } from './provenance.mjs';
-import { createHash } from 'node:crypto';
+import { hashContent } from './content-hash.mjs';
 const MEMORY_REL = ['context', 'MEMORY.md'];
@@ -236,7 +236,7 @@ export async function importAnthropicMemory({
   // an import failed and search degraded to the stale index (cut-gate9 F-13).
   const bulletLines = proposals
     .map((p) => {
-      const sha1 = createHash('sha1').update(p.text, 'utf8').digest('hex');
+      const sha1 = hashContent(p.text);
       const formatted = writeBullet({
         id: p.id,
         text: p.text,