npm - @lh8ppl/claude-memory-kit - Versions diffs - 0.3.0 → 0.3.1 - Mend

@lh8ppl/claude-memory-kit 0.3.0 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (31) hide show

package/README.md +6 -3
package/package.json +1 -1
package/src/audit-log.mjs +1 -0
package/src/auto-drain.mjs +17 -1
package/src/auto-extract.mjs +4 -5
package/src/auto-persona.mjs +86 -1
package/src/capture-prompt.mjs +2 -1
package/src/config-core.mjs +161 -0
package/src/conflict-queue.mjs +2 -2
package/src/content-hash.mjs +30 -0
package/src/doctor.mjs +62 -3
package/src/import-anthropic-memory.mjs +2 -2
package/src/import-claude-md.mjs +333 -0
package/src/index-rebuild.mjs +6 -2
package/src/index.mjs +10 -0
package/src/inject-context.mjs +130 -1
package/src/install.mjs +75 -2
package/src/mcp-server.mjs +6 -1
package/src/memory-health.mjs +229 -0
package/src/memory-write.mjs +32 -10
package/src/native-binding.mjs +142 -0
package/src/poison-guard.mjs +55 -0
package/src/remember-core.mjs +53 -8
package/src/repair.mjs +20 -3
package/src/semantic-backend.mjs +114 -0
package/src/subcommands.mjs +268 -27
package/src/transcript-index.mjs +5 -2
package/src/write-fact.mjs +34 -3
package/template/.claude/skills/memory-search/SKILL.md +1 -1
package/template/.gitattributes.fragment +16 -0
package/template/CLAUDE.md.template +1 -1

package/src/memory-health.mjs ADDED Viewed

@@ -0,0 +1,229 @@
+// Memory-health analysis — content quality, not plumbing (Task 144, D-130).
+//
+// Public boundary:
+//   analyzeMemoryHealth({projectRoot, now?, staleDays?, ...seams}) → report
+//   formatMemoryHealth(report) → string (the doctor's informational section)
+//
+// Read-only by contract: pure reads over the fact archive, the audit log,
+// and the queue files. Never mutates, never logs, never affects the doctor
+// exit code — the section is INFORMATIONAL ("42 facts: 3 old-and-untouched,
+// 2 possible duplicates, 1 conflict pending"), making curation visible
+// before Task 95 automates it. Candidates are SURFACED, never auto-acted
+// (the reviewable-not-silent rule).
+//
+// SPEC DEVIATION (recorded in tasks.md 144): the task entry assumed "the
+// audit log has every recall" — it does not. The audit log is
+// MUTATIONS-only by design (glossary: "any mutating operation"); search /
+// get / cite write nothing. "Stale" is therefore defined honestly as
+// OLD-AND-UNTOUCHED — created > staleDays ago with no audit-trail mutation
+// mentioning the fact's id since creation. True recall-tracking is parked
+// for Task 95 (trigger: when curation automation needs recall frequency).
+//
+// Near-dup detection here is the LITERAL tier: normalized-token Jaccard
+// over fact bodies (cheap, embedder-free). Task 143 adds the semantic
+// tier at write time; this section is the batch view over what already
+// landed.
+import { existsSync, readFileSync, readdirSync } from 'node:fs';
+import { join } from 'node:path';
+import { parse as parseFrontmatter } from './frontmatter.mjs';
+import { listConflictQueue } from './conflict-queue.mjs';
+import { listReviewQueue } from './review-queue.mjs';
+import { nowIso } from './audit-log.mjs';
+const DAY_MS = 24 * 60 * 60 * 1000;
+const DEFAULT_STALE_DAYS = 60;
+// Jaccard threshold for "possible duplicate" — tuned to catch paraphrase
+// pairs sharing most content words while leaving topically-adjacent facts
+// alone. A candidate list errs slightly eager (a human reviews it).
+const NEAR_DUP_JACCARD = 0.6;
+// Short bodies make Jaccard noisy; require a minimal token set.
+const MIN_TOKENS_FOR_DUP = 4;
+// Above this many facts the O(n²) pair scan is skipped (the report notes it).
+const PAIR_SCAN_CAP = 2000;
+const STOPWORDS = new Set([
+  'the', 'a', 'an', 'is', 'are', 'was', 'were', 'be', 'to', 'of', 'in',
+  'on', 'for', 'and', 'or', 'not', 'with', 'at', 'by', 'it', 'this',
+  'that', 'we', 'you', 'always', 'never', 'use', 'from',
+]);
+function tokenize(text) {
+  return new Set(
+    String(text)
+      .toLowerCase()
+      .split(/[^a-z0-9]+/)
+      .filter((t) => t.length > 1 && !STOPWORDS.has(t)),
+  );
+}
+function jaccard(aSet, bSet) {
+  let inter = 0;
+  for (const t of aSet) if (bSet.has(t)) inter += 1;
+  const union = aSet.size + bSet.size - inter;
+  return union === 0 ? 0 : inter / union;
+}
+function readFacts(projectRoot) {
+  const dir = join(projectRoot, 'context', 'memory');
+  const facts = [];
+  if (!existsSync(dir)) return facts;
+  for (const name of readdirSync(dir)) {
+    if (!name.endsWith('.md') || name === 'INDEX.md') continue;
+    try {
+      const { frontmatter, body } = parseFrontmatter(readFileSync(join(dir, name), 'utf8'));
+      if (!frontmatter?.id) continue;
+      facts.push({
+        slug: name.replace(/\.md$/, ''),
+        id: frontmatter.id,
+        type: frontmatter.type ?? 'unknown',
+        trust: frontmatter.trust ?? 'unknown',
+        createdAt: frontmatter.created_at ?? null,
+        body: String(body ?? ''),
+      });
+    } catch {
+      // unparseable file — content health can't read it; HC-4/reindex own that class
+    }
+  }
+  return facts;
+}
+// Every audit ts per fact id AFTER its creation entry — any mutation counts
+// as "touched" (trust override, merge, graduation, tombstone...).
+function readTouchedIds(projectRoot) {
+  const touched = new Map(); // id → latest mutation ts
+  try {
+    const auditPath = join(projectRoot, 'context', '.locks', 'audit.log');
+    if (!existsSync(auditPath)) return touched;
+    for (const line of readFileSync(auditPath, 'utf8').split(/\r?\n/)) {
+      if (!line.trim()) continue;
+      try {
+        const e = JSON.parse(line);
+        if (!e.id || !e.ts) continue;
+        // Creation-class entries aren't touches: 'created', and 'import'
+        // (an imported fact's OWN creation writes action:'import' — counting
+        // it would make imported facts permanently un-stale; and a later
+        // skipped-duplicate import entry proves the SOURCE still holds the
+        // text, not that anyone curated the fact). Skill-review finding.
+        if (e.action === 'created' || e.action === 'import') continue;
+        const prev = touched.get(e.id);
+        if (!prev || e.ts > prev) touched.set(e.id, e.ts);
+      } catch {
+        // torn line
+      }
+    }
+  } catch {
+    // unreadable log — degrade to "nothing touched"
+  }
+  return touched;
+}
+/**
+ * Read-only content-health analysis of the project tier.
+ *
+ * @param {object} opts
+ * @param {string} opts.projectRoot
+ * @param {string} [opts.now]
+ * @param {number} [opts.staleDays]
+ * @param {Function} [opts.listConflictsImpl] - test seam.
+ * @param {Function} [opts.listReviewImpl] - test seam.
+ */
+export function analyzeMemoryHealth({
+  projectRoot,
+  now,
+  staleDays = DEFAULT_STALE_DAYS,
+  listConflictsImpl,
+  listReviewImpl,
+} = {}) {
+  const nowMs = Date.parse(now ?? nowIso());
+  const facts = readFacts(projectRoot);
+  const byType = {};
+  const byTrust = {};
+  for (const f of facts) {
+    byType[f.type] = (byType[f.type] ?? 0) + 1;
+    byTrust[f.trust] = (byTrust[f.trust] ?? 0) + 1;
+  }
+  // Old-and-untouched: created > staleDays ago, no post-creation mutation.
+  const touched = readTouchedIds(projectRoot);
+  const oldUntouched = facts.filter((f) => {
+    if (!f.createdAt) return false;
+    const ageMs = nowMs - Date.parse(f.createdAt);
+    if (!(ageMs > staleDays * DAY_MS)) return false;
+    return !touched.has(f.id);
+  });
+  // Near-dup candidate pairs (literal tier). O(n²) pairwise scan — fine at
+  // memory-archive scale (106 facts ≈ 5.5K pairs on the dogfood); guarded
+  // above PAIR_SCAN_CAP so a pathological archive can't stall the doctor.
+  const tokenized = facts.length <= PAIR_SCAN_CAP ? facts.map((f) => ({ f, tokens: tokenize(f.body) })) : [];
+  const nearDupPairs = [];
+  for (let i = 0; i < tokenized.length; i++) {
+    for (let j = i + 1; j < tokenized.length; j++) {
+      const { f: fa, tokens: ta } = tokenized[i];
+      const { f: fb, tokens: tb } = tokenized[j];
+      if (ta.size < MIN_TOKENS_FOR_DUP || tb.size < MIN_TOKENS_FOR_DUP) continue;
+      const score = jaccard(ta, tb);
+      if (score >= NEAR_DUP_JACCARD) {
+        nearDupPairs.push({ a: fa.slug, b: fb.slug, idA: fa.id, idB: fb.id, score: Number(score.toFixed(2)) });
+      }
+    }
+  }
+  // The detected-contradiction surface = the pending queues.
+  let conflicts = 0;
+  let review = 0;
+  try {
+    conflicts = (listConflictsImpl ?? listConflictQueue)({ tier: 'P', projectRoot }).length;
+  } catch {
+    // queue unreadable — degrade to zero
+  }
+  try {
+    review = (listReviewImpl ?? listReviewQueue)({ tier: 'P', projectRoot }).length;
+  } catch {
+    // queue unreadable — degrade to zero
+  }
+  return {
+    facts: { total: facts.length, byType, byTrust },
+    oldUntouched: oldUntouched.map((f) => ({ slug: f.slug, id: f.id, createdAt: f.createdAt })),
+    nearDupPairs,
+    queues: { conflicts, review },
+    staleDays,
+  };
+}
+/**
+ * Render the doctor's informational section. Zero-concerns stay silent —
+ * a healthy memory earns one quiet line, never noise.
+ */
+export function formatMemoryHealth(report) {
+  const lines = [];
+  const t = report.facts;
+  const trustBits = Object.entries(t.byTrust)
+    .map(([k, v]) => `${v} ${k}`)
+    .join(' · ');
+  lines.push(
+    `Memory health (informational): ${t.total} fact(s)` + (trustBits ? ` — trust: ${trustBits}` : ''),
+  );
+  if (report.oldUntouched.length > 0) {
+    lines.push(
+      `  ${report.oldUntouched.length} old-and-untouched (> ${report.staleDays}d, no mutation since creation) — worth a skim: ` +
+        report.oldUntouched.slice(0, 3).map((f) => f.slug).join(', ') +
+        (report.oldUntouched.length > 3 ? ', …' : ''),
+    );
+  }
+  if (report.nearDupPairs.length > 0) {
+    lines.push(
+      `  ${report.nearDupPairs.length} possible duplicate pair(s): ` +
+        report.nearDupPairs.slice(0, 3).map((p) => `${p.a} ↔ ${p.b}`).join('; ') +
+        (report.nearDupPairs.length > 3 ? '; …' : ''),
+    );
+  }
+  const q = [];
+  if (report.queues.conflicts > 0) q.push(`${report.queues.conflicts} conflict(s)`);
+  if (report.queues.review > 0) q.push(`${report.queues.review} review item(s)`);
+  if (q.length > 0) lines.push(`  ${q.join(' + ')} pending — cmk queue`);
+  return lines.join('\n');
+}

package/src/memory-write.mjs CHANGED Viewed

@@ -43,8 +43,8 @@ import {
   mkdirSync,
 } from 'node:fs';
 import { join, dirname } from 'node:path';
-import { createHash } from 'node:crypto';
 import { generateId } from '@lh8ppl/cmk-canonicalize';
+import { hashContent } from './content-hash.mjs';
 import {
   resolveTierRoot,
   resolveScratchpadPath,
@@ -58,6 +58,7 @@ import { parseBulletProvenance, isProvenanceCommentLine } from './provenance.mjs
 import { checkPoisonGuard, logPoisonGuardRejection } from './poison-guard.mjs';
 import { detectConflicts, writeConflictEntry } from './conflict-queue.mjs';
 import { sanitizeHomePaths } from './sanitize.mjs';
+import { sanitizePrivacyTags } from './privacy.mjs';
 const VALID_ACTIONS = new Set(['add', 'replace', 'remove']);
@@ -253,15 +254,21 @@ function doAdd(opts) {
   if (errors.length > 0) {
     return errorResult({ category: ERROR_CATEGORIES.SCHEMA, errors });
   }
-  // Privacy (write-path fix #1): abstract home-dir paths to `~` for
-  // committed/shared tiers (P/U) BEFORE the bullet is screened, conflict-
-  // checked, dedup-keyed, and written — so a captured fact never ships the
-  // local username and stays portable. Local tier (L) keeps machine paths
-  // verbatim (its purpose). Everything downstream uses `addOpts`.
+  // Privacy: strip <private>…</private> FIRST, on EVERY tier (cut-gate
+  // v0.3.1 finding — the tag was honored only by the UserPromptSubmit hook,
+  // so `cmk remember`/`mk_remember` wrote the secret verbatim). Runs before
+  // home-path sanitization, Poison_Guard, conflict-check, dedup, and the
+  // write — so the redacted text is what everything downstream sees, on
+  // committed AND local tiers (private content must not reach context.local
+  // either). The same single-safe-path philosophy as Poison_Guard.
+  const privacyStripped = sanitizePrivacyTags(opts.text);
+  // Then abstract home-dir paths to `~` for committed/shared tiers (P/U) so a
+  // captured fact never ships the local username + stays portable; local
+  // tier (L) keeps machine paths verbatim (its purpose).
   const sanitizedText =
     opts.tier === 'P' || opts.tier === 'U'
-      ? sanitizeHomePaths(opts.text)
-      : opts.text;
+      ? sanitizeHomePaths(privacyStripped)
+      : privacyStripped;
   const addOpts =
     sanitizedText === opts.text ? opts : { ...opts, text: sanitizedText };
@@ -293,6 +300,11 @@ function doAdd(opts) {
     newTrust,
     scratchpadPath,
     sectionTitle: opts.section,
+    // Task 143 (D-130): the async adapters may inject a semantic similarity
+    // fn (prepareSemanticSimilarity) + its threshold; absent → the literal
+    // tokenJaccard default (graceful degradation).
+    similarityFn: opts.similarityFn,
+    similarityThreshold: opts.similarityThreshold,
   });
   // Defensive guard against a future detectConflicts schema-error
   // path. Today the upstream validator catches bad opts before this
@@ -304,7 +316,17 @@ function doAdd(opts) {
   if (conflict.action === 'error') {
     return conflict;
   }
-  if (conflict.conflict === true && conflict.action === 'queue') {
+  // Task 143 (D-130): near-dup proposals. The pre-143 contract queues only
+  // when new.trust < existing.trust — an EQUAL-trust paraphrase ("use uv not
+  // pip" twice) takes the 'supersede' action and APPENDS, which is exactly
+  // the memory-rot case. When the caller opts in (queueNearDups, set by the
+  // semantic-equipped adapters), ANY above-threshold match routes to the
+  // conflict queue as a reviewable proposal — never auto-dropped, never
+  // silently duplicated. Default behavior unchanged.
+  const routeToQueue =
+    conflict.conflict === true &&
+    (conflict.action === 'queue' || (opts.queueNearDups === true && conflict.action === 'supersede'));
+  if (routeToQueue) {
     // Compute the proposed ID using the same canonical-id derivation
     // appendScratchpadBullet would have used, then route to the queue.
     // (Task 25b fix: generateId is positional `(tier, text)`, not
@@ -333,7 +355,7 @@ function appendBulletGuarded(opts) {
   // Caller MUST have run Poison_Guard already. This is the inner
   // write step — delegates to the existing scratchpad writer which
   // handles dedup + cap + consolidation + audit + ID derivation.
-  const sha1 = createHash('sha1').update(opts.text, 'utf8').digest('hex');
+  const sha1 = hashContent(opts.text);
   const ts = opts.now ?? nowIso();
   return appendScratchpadBullet({
     tier: opts.tier,

package/src/native-binding.mjs ADDED Viewed

@@ -0,0 +1,142 @@
+// Native-binding health probes (Task 141a, D-129/D-133).
+//
+// npm 12 (~July 2026) flips `allowScripts` OFF by default: dependency
+// install scripts — including the IMPLICIT node-gyp build a binding.gyp
+// package gets — silently don't run on a fresh `npm install -g`. The kit's
+// two native deps are exactly that shape:
+//   - better-sqlite3 (core: the search index) — kit-level remedy
+//   - onnxruntime-node (inside the optional @huggingface/transformers
+//     embedder) — semantic-level remedy
+//
+// Without the binding the package LOOKS installed but `cmk search`/reindex
+// crash at first use. These probes detect that state cheaply so:
+//   - `cmk install` can ask the user and fix INLINE (the primary UX — the
+//     user's 2026-06-12 steer: ask at install, not a secondary command);
+//   - `cmk doctor` HC-8 stays as the ongoing backstop;
+//   - the --with-semantic runner passes the allow flag itself.
+//
+// Remediation verified against the primary sources (2026-06-12): GitHub
+// changelog "Upcoming breaking changes for npm v12" + npm v11 config docs —
+// the `allow-scripts` CONFIG (comma-separated package list) is the
+// documented path "for one-off and global contexts: npm exec, npx, and
+// npm install -g"; the project-level `npm approve-scripts` allowlist in
+// package.json does not apply to `-g` installs. Warnings (and the config
+// key) exist from npm 11.16.0.
+import { createRequire } from 'node:module';
+import { spawnSync } from 'node:child_process';
+export const KIT_BINDING_REMEDY =
+  'npm install -g @lh8ppl/claude-memory-kit --allow-scripts=better-sqlite3';
+export const EMBEDDER_BINDING_REMEDY =
+  'npm install -g @huggingface/transformers --allow-scripts=onnxruntime-node';
+// The `allow-scripts` config key ships (as warnings + config) in 11.16.0.
+const ALLOW_SCRIPTS_MIN = [11, 16, 0];
+const requireFromHere = createRequire(import.meta.url);
+/**
+ * Probe the kit's own native dep (better-sqlite3). A bare require is NOT
+ * enough: better-sqlite3 v12 loads its .node binding LAZILY — `bindings()`
+ * fires inside `new Database(...)`, so on a script-blocked install the
+ * require succeeds and only instantiation throws ("Could not locate the
+ * bindings file"). Live-verified 2026-06-12 against a real
+ * `--ignore-scripts` install (npm 12's exact effect): require → loaded,
+ * `new Database(':memory:')` → the bindings error. The probe therefore
+ * opens (and closes) an in-memory DB. Synchronous on purpose (CJS) so
+ * install and doctor call it without changing their flow.
+ *
+ * @param {object} [opts] - { requireImpl } test seam (throw = broken).
+ * @returns {{ok: true} | {ok: false, reason: string, remedy: string}}
+ */
+export function checkKitBinding({ requireImpl } = {}) {
+  const req =
+    requireImpl ??
+    (() => {
+      const Database = requireFromHere('better-sqlite3');
+      const db = new Database(':memory:');
+      db.close();
+    });
+  try {
+    req();
+    return { ok: true };
+  } catch (err) {
+    return {
+      ok: false,
+      reason: err?.message ?? String(err),
+      remedy: KIT_BINDING_REMEDY,
+    };
+  }
+}
+/**
+ * Probe the optional semantic embedder. Distinguishes NOT-INSTALLED (the
+ * normal opt-out state — `installed: false`) from INSTALLED-BUT-BROKEN
+ * (npm 12 blocked onnxruntime-node's script — `installed: true`). The
+ * semantic-backend's own loader collapses both into "not installed"
+ * (loadExtractor's catch), which under npm 12 would report the wrong
+ * reason — this probe is what tells the truth.
+ *
+ * Honest limitation: this is an IMPORT-level probe. Like better-sqlite3,
+ * onnxruntime may bind lazily — an installed-but-script-blocked embedder
+ * can pass the import and only fail at pipeline construction. The deep
+ * check is `warmEmbedder` (it builds a real pipeline), which runs at
+ * `--with-semantic` install time; and the runner's `--allow-scripts`
+ * flag prevents the broken state from being created at all. A broken
+ * embedder also degrades GRACEFULLY (keyword fallback + note, D-111) —
+ * unlike the kit binding, it can't crash search.
+ *
+ * @param {object} [opts] - { importImpl } test seam.
+ * @returns {Promise<{ok: true} | {ok: false, installed: boolean, reason: string, remedy: string}>}
+ */
+export async function checkEmbedderBinding({ importImpl } = {}) {
+  const imp = importImpl ?? (() => import('@huggingface/transformers'));
+  try {
+    await imp();
+    return { ok: true };
+  } catch (err) {
+    const message = err?.message ?? String(err);
+    const notInstalled =
+      err?.code === 'ERR_MODULE_NOT_FOUND' && message.includes('@huggingface/transformers');
+    return {
+      ok: false,
+      installed: !notInstalled,
+      reason: notInstalled ? 'not-installed' : message,
+      remedy: EMBEDDER_BINDING_REMEDY,
+    };
+  }
+}
+/**
+ * Whether the host npm understands the `allow-scripts` config (≥ 11.16.0).
+ * Conservative on any probe failure: report unsupported so callers never
+ * emit a flag the host npm would reject as unknown.
+ *
+ * @param {object} [opts] - { spawnSyncImpl } test seam.
+ * @returns {{supported: boolean, version: string | null}}
+ */
+export function npmSupportsAllowScripts({ spawnSyncImpl = spawnSync } = {}) {
+  try {
+    // Constant command under shell:true — npm is npm.cmd on Windows; the
+    // shell resolves it cross-platform (the buildDefaultNpmRunner pattern).
+    const r = spawnSyncImpl('npm --version', {
+      encoding: 'utf8',
+      shell: true,
+      timeout: 30_000,
+    });
+    if (r.status !== 0 || !r.stdout) return { supported: false, version: null };
+    const version = String(r.stdout).trim();
+    const parts = version.split('.').map((n) => Number.parseInt(n, 10));
+    if (parts.length < 3 || parts.some(Number.isNaN)) {
+      return { supported: false, version };
+    }
+    for (let i = 0; i < 3; i++) {
+      if (parts[i] > ALLOW_SCRIPTS_MIN[i]) return { supported: true, version };
+      if (parts[i] < ALLOW_SCRIPTS_MIN[i]) return { supported: false, version };
+    }
+    return { supported: true, version };
+  } catch {
+    return { supported: false, version: null };
+  }
+}

package/src/poison-guard.mjs CHANGED Viewed

@@ -93,6 +93,61 @@ const SECRET_PATTERNS = [
     category: 'secret',
     re: /\bghp_[A-Za-z0-9]{36}/,
   },
+  // Task 134: the other GitHub token classes — OAuth (gho_), user-to-server
+  // (ghu_), server-to-server (ghs_), refresh (ghr_). Same ghp_ shape (prefix
+  // + 36 alnum); the 36-char floor is what keeps "ghost"/"ghs config" prose
+  // out (a real English word can't reach prefix+36 alphanumerics).
+  {
+    id: 'secret_github_token',
+    category: 'secret',
+    re: /\bgh[ousr]_[A-Za-z0-9]{36}/,
+  },
+  // GitHub fine-grained PAT: github_pat_ + 82 chars of [A-Za-z0-9_]
+  // (GitHub's documented detection regex; the token is the 11-char prefix +
+  // 82-char body = 93 total. The body's internal underscore placement is not
+  // contractually fixed, so match the whole-body class rather than a
+  // prefix_body split — verified against GitHub Docs + GitGuardian's
+  // detector, 2026-06-13).
+  {
+    id: 'secret_github_fine_grained_pat',
+    category: 'secret',
+    re: /\bgithub_pat_[A-Za-z0-9_]{82}/,
+  },
+  // Stripe secret keys: sk_live_ / rk_live_ (restricted) + 24+ alnum. The
+  // _live_ infix + the floor keep benign "sk_" / "Stripe" prose out.
+  {
+    id: 'secret_stripe_key',
+    category: 'secret',
+    re: /\b[sr]k_live_[A-Za-z0-9]{24,}/,
+  },
+  // Google API key: AIza + 35 of [A-Za-z0-9_-] (39 total — the documented
+  // length). The 4-char prefix alone is harmless prose ("AIza" mentioned);
+  // the 35-char body is the gate.
+  {
+    id: 'secret_google_api_key',
+    category: 'secret',
+    re: /\bAIza[A-Za-z0-9_-]{35}\b/,
+  },
+  // GitLab personal access token: glpat- + 20+ alnum/dash/underscore.
+  {
+    id: 'secret_gitlab_pat',
+    category: 'secret',
+    re: /\bglpat-[A-Za-z0-9_-]{20,}/,
+  },
+  // npm access token: npm_ + 36 alnum (the modern granular/automation shape).
+  {
+    id: 'secret_npm_token',
+    category: 'secret',
+    re: /\bnpm_[A-Za-z0-9]{36}/,
+  },
+  // Hugging Face access token: hf_ + 34+ alnum (kit-relevant — the semantic
+  // install pulls models from HF; a leaked hf_ token in a captured fact is
+  // a real risk). The 34-char floor keeps "hf"/"half" prose out.
+  {
+    id: 'secret_huggingface_token',
+    category: 'secret',
+    re: /\bhf_[A-Za-z0-9]{34,}/,
+  },
   // OpenAI / Anthropic style keys. sk- prefix + optional ant-/proj-
   // qualifier + ≥40 chars of alphanumeric/dash/underscore.
   {

package/src/remember-core.mjs CHANGED Viewed

@@ -17,7 +17,8 @@
 // to keep (design §10.1), not the core's.
 import { resolve as resolvePath } from 'node:path';
-import { createHash } from 'node:crypto';
+import { hashContent } from './content-hash.mjs';
+import { sanitizePrivacyTags } from './privacy.mjs';
 import { writeFact as defaultWriteFact } from './write-fact.mjs';
 import { buildRichFactBody, slugifyFact } from './rich-fact.mjs';
@@ -53,8 +54,17 @@ export function rememberRich(text, options = {}, deps = {}) {
   const projectRoot = deps.projectRoot ?? resolvePath(process.cwd());
   const write = deps.writeFact ?? defaultWriteFact;
-  const headline = String(text).trim();
-  const title = (options.title && String(options.title).trim()) || headline.split('\n')[0].slice(0, 80);
+  // Strip <private>…</private> BEFORE deriving/slicing the title (cut-gate
+  // v0.3.1 clean-build finding). writeFact also strips, but it receives a title
+  // already sliced to 80 chars — and an 80-char cut that lands inside a private
+  // span SEVERS the closing tag, so writeFact's `<private>…</private>` regex no
+  // longer matches and the secret survives in the frontmatter title + INDEX.md.
+  // Stripping the intact text here means the slice only ever sees redacted text.
+  const headline = sanitizePrivacyTags(String(text).trim());
+  const safeTitle = options.title
+    ? sanitizePrivacyTags(String(options.title).trim())
+    : '';
+  const title = safeTitle || headline.split('\n')[0].slice(0, 80);
   const body = buildRichFactBody({ text: headline, why: options.why, how: options.how });
   // `links` arrives as an ARRAY from the MCP tool (z.array) and as a
   // comma-STRING from the CLI flag — accept both. The old `String(links)` path
@@ -76,10 +86,10 @@ export function rememberRich(text, options = {}, deps = {}) {
     trust: options.trust ?? 'high',
     sourceFile: 'user-explicit',
     sourceLine: 1,
-    // Content fingerprint for provenance/dedup — NOT a security context. Matches
-    // the kit's sha1-of-content convention (memory-write.mjs, index-rebuild.mjs);
-    // writeFact dedups by content-addressed id, this is the source_sha1 field. // NOSONAR
-    sourceSha1: createHash('sha1').update(body).digest('hex'), // NOSONAR
+    // Content fingerprint for provenance/dedup — NOT a security context.
+    // Routes through the shared hashContent (SHA-256, D-149); writeFact dedups
+    // by content-addressed id, this is the source_sha1 metadata field.
+    sourceSha1: hashContent(body),
     related,
     projectRoot,
   });
@@ -87,5 +97,40 @@ export function rememberRich(text, options = {}, deps = {}) {
 /** The title rememberRich() will derive for `text`/`options` (for caller messages). */
 export function richFactTitle(text, options = {}) {
-  return (options.title && String(options.title).trim()) || String(text).trim().split('\n')[0].slice(0, 80);
+  // Mirror rememberRich: strip <private> before slicing so the preview a caller
+  // echoes to the console never carries private content either (cut-gate v0.3.1).
+  const safeTitle = options.title ? sanitizePrivacyTags(String(options.title).trim()) : '';
+  return safeTitle || sanitizePrivacyTags(String(text).trim()).split('\n')[0].slice(0, 80);
+}
+/**
+ * Task 143 (D-130): the write-time near-dup guard for the EXPLICIT terse
+ * capture paths (cmk remember / mk_remember). Returns extra memoryWrite
+ * options — `{similarityFn, queueNearDups: true}` when this project is
+ * semantic-configured AND the local embedder is available; `{}` otherwise.
+ *
+ * One shared gate for both adapters (the shared-modules rule). Best-effort
+ * by contract: ANY failure (no embedder, model error, db hiccup) returns {}
+ * so capture proceeds on the literal pipeline — losing a capture to a
+ * similarity upgrade would invert the kit's priorities. The auto-extract
+ * hook path deliberately does NOT call this (its detached child is
+ * budget-constrained; the landed corpus gets the doctor's batch near-dup
+ * view, Task 144, and re-curation, Task 95).
+ *
+ * @param {object} opts - { projectRoot, text, prepareImpl?, resolveModeImpl? } (seams for tests).
+ * @returns {Promise<object>} extra memoryWrite options (possibly empty).
+ */
+export async function prepareNearDupGuard({ projectRoot, text, prepareImpl, resolveModeImpl } = {}) {
+  try {
+    const { resolveDefaultSearchMode, prepareSemanticSimilarity, SEMANTIC_NEARDUP_THRESHOLD } = await import('./semantic-backend.mjs');
+    const mode = (resolveModeImpl ?? resolveDefaultSearchMode)({ projectRoot });
+    if (mode === 'keyword') return {};
+    const sem = await (prepareImpl ?? prepareSemanticSimilarity)({ projectRoot, newText: text });
+    if (!sem.ok) return {};
+    // The MEASURED bge-base threshold (see SEMANTIC_NEARDUP_THRESHOLD) — the
+    // generic 0.85 default would miss the canonical "use uv not pip" pair.
+    return { similarityFn: sem.similarityFn, similarityThreshold: SEMANTIC_NEARDUP_THRESHOLD, queueNearDups: true };
+  } catch {
+    return {};
+  }
 }

package/src/repair.mjs CHANGED Viewed

@@ -159,12 +159,25 @@ function repairLocks({ projectRoot, userDir, staleLockMs, now, ts }) {
  * @param {Function} [opts.reindexer]  test-injected reindex function; defaults to import('./index-rebuild.mjs').reindexFull
  */
 async function repairIndex({ projectRoot, userDir, reindexer }) {
+  // Production reindexFull requires a `db` (it calls db.exec) — repairIndex
+  // must open + own + close it, exactly like runReindex does. The earlier
+  // code called reindexFull({projectRoot,userDir}) with NO db, so
+  // `cmk repair --index`/`--all` threw "undefined (reading 'exec')" since
+  // Task 49 (cut-gate v0.3.1 finding — every test mocked the reindexer, so
+  // the real call-shape was never exercised). An injected reindexer (tests)
+  // takes whatever args it wants; we only open a db for the real one.
   let reindexFn = reindexer;
+  let db = null;
   if (!reindexFn) {
-    const mod = await import('./index-rebuild.mjs');
-    reindexFn = mod.reindexFull;
+    const [{ reindexFull }, { openIndexDb }] = await Promise.all([
+      import('./index-rebuild.mjs'),
+      import('./index-db.mjs'),
+    ]);
+    reindexFn = reindexFull;
+    db = openIndexDb({ projectRoot });
   }
   if (typeof reindexFn !== 'function') {
+    if (db) db.close();
     return {
       kind: 'index',
       changed: false,
@@ -172,7 +185,9 @@ async function repairIndex({ projectRoot, userDir, reindexer }) {
     };
   }
   try {
-    const r = await reindexFn({ projectRoot, userDir });
+    const r = db
+      ? await reindexFn({ projectRoot, userDir, db })
+      : await reindexFn({ projectRoot, userDir });
     return {
       kind: 'index',
       changed: true,
@@ -184,6 +199,8 @@ async function repairIndex({ projectRoot, userDir, reindexer }) {
       changed: false,
       error: `reindex failed: ${err?.message ?? err}`,
     };
+  } finally {
+    if (db) db.close();
   }
 }