npm - @lh8ppl/claude-memory-kit - Versions diffs - 0.3.0 → 0.3.1 - Mend

@lh8ppl/claude-memory-kit 0.3.0 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (31) hide show

package/README.md +6 -3
package/package.json +1 -1
package/src/audit-log.mjs +1 -0
package/src/auto-drain.mjs +17 -1
package/src/auto-extract.mjs +4 -5
package/src/auto-persona.mjs +86 -1
package/src/capture-prompt.mjs +2 -1
package/src/config-core.mjs +161 -0
package/src/conflict-queue.mjs +2 -2
package/src/content-hash.mjs +30 -0
package/src/doctor.mjs +62 -3
package/src/import-anthropic-memory.mjs +2 -2
package/src/import-claude-md.mjs +333 -0
package/src/index-rebuild.mjs +6 -2
package/src/index.mjs +10 -0
package/src/inject-context.mjs +130 -1
package/src/install.mjs +75 -2
package/src/mcp-server.mjs +6 -1
package/src/memory-health.mjs +229 -0
package/src/memory-write.mjs +32 -10
package/src/native-binding.mjs +142 -0
package/src/poison-guard.mjs +55 -0
package/src/remember-core.mjs +53 -8
package/src/repair.mjs +20 -3
package/src/semantic-backend.mjs +114 -0
package/src/subcommands.mjs +268 -27
package/src/transcript-index.mjs +5 -2
package/src/write-fact.mjs +34 -3
package/template/.claude/skills/memory-search/SKILL.md +1 -1
package/template/.gitattributes.fragment +16 -0
package/template/CLAUDE.md.template +1 -1

package/src/import-claude-md.mjs ADDED Viewed

@@ -0,0 +1,333 @@
+// `cmk import-claude-md` (Task 142, D-130).
+//
+// Public boundary:
+//   async importClaudeMd({projectRoot, file?, now?, dryRun?, acceptAll?, writeFactImpl?})
+//     → {action, mode?, reason?, proposals, accepted, skipped, rejected, errors, sourcePath, duration_ms}
+//
+// Onboards a project from the rules file the user already owns (CLAUDE.md,
+// .cursorrules, AGENTS.md, any markdown/plain rules file): parses it into
+// TYPED fact candidates and writes each through writeFact() — the kit's one
+// safe write path. That composition (not re-implementation) is the point:
+// writeFact already gives Poison_Guard screening, home-path sanitization,
+// content-addressed dedup, INDEX reindex, and create-audit. The D-125 bug
+// (import-anthropic hand-rolling its provenance comment and breaking the next
+// reindex) is the precedent this design avoids.
+//
+// Differences from `cmk import-anthropic-memory` (the structural template):
+//   - target is the GRANULAR fact archive (context/memory/), not MEMORY.md
+//     bullets — rules-file content is durable and typed, not scratchpad;
+//   - fact `type` is inferred from the nearest markdown heading
+//     (user / feedback / reference, default project);
+//   - candidates inside the kit's own managed CLAUDE.md block and inside
+//     code fences are never proposed (boilerplate / shell examples).
+//
+// Explicit user action only. Never automatic. `--dry-run` previews; apply
+// requires explicit `--yes` (same confirmation contract as the precedent).
+import { existsSync, readFileSync, readdirSync } from 'node:fs';
+import { isAbsolute, join } from 'node:path';
+import { canonicalize, generateId } from '@lh8ppl/cmk-canonicalize';
+import { hashContent } from './content-hash.mjs';
+import { appendAuditEntry, nowIso, REASON_CODES } from './audit-log.mjs';
+import { ERROR_CATEGORIES, errorResult } from './result-shapes.mjs';
+import { writeFact } from './write-fact.mjs';
+import { slugifyFact } from './rich-fact.mjs';
+import { sanitizeHomePaths } from './sanitize.mjs';
+import { parse as parseFrontmatter } from './frontmatter.mjs';
+const DEFAULT_FILE = 'CLAUDE.md';
+const IMPORT_SOURCE = 'claude-md';
+// Below this length a line is noise ("go", "etc."), not a rule.
+const MIN_CANDIDATE_CHARS = 8;
+const MANAGED_BLOCK_START = /<!--\s*claude-memory-kit:start\b/;
+const MANAGED_BLOCK_END = /<!--\s*claude-memory-kit:end\s*-->/;
+// Linear-time by construction (S5852, the D-128 class): every adjacent
+// pair is disjoint — `[ \t]+` can never donate characters to the `\S` that
+// starts the capture — so the regex engine has no backtracking ambiguity.
+// Captures keep trailing whitespace; every consumer already calls .trim().
+const HEADING = /^(#{1,6})[ \t]+(\S.*)$/;
+const LIST_ITEM = /^[ \t]*(?:[-*+]|\d+[.)])[ \t]+(\S.*)$/;
+const CODE_FENCE = /^\s*(```|~~~)/;
+/**
+ * Infer the kit fact type from the heading a candidate sits under.
+ * Heuristic by design — `--dry-run` shows the inferred type so the user can
+ * inspect before applying. Order matters: user-profile phrasing wins over the
+ * broad rule/style class, and \b on "reference" keeps "Preferences" from
+ * matching it.
+ *
+ * @param {string|null} heading
+ * @returns {'user'|'feedback'|'project'|'reference'}
+ */
+export function inferFactType(heading) {
+  if (!heading) return 'project';
+  const h = String(heading).toLowerCase();
+  if (/prefer|about (me|the user)|profile|persona|communicat/.test(h)) return 'user';
+  if (/\b(link|reference|resource|url|bookmark)/.test(h)) return 'reference';
+  if (/rule|discipline|workflow|convention|anti-pattern|style|verification|review|testing|engineering|working/.test(h)) {
+    return 'feedback';
+  }
+  return 'project';
+}
+/**
+ * Parse a rules file into typed fact candidates.
+ *
+ * Primary shape: markdown list items (-, *, +, 1.) with the nearest heading
+ * as type context. Fallback shape (.cursorrules and other plain-text rules
+ * files): when the file has NO list items at all, every non-empty,
+ * non-heading line outside code fences is a candidate.
+ *
+ * Skipped in both shapes: code-fence content (shell examples, not rules) and
+ * the kit's own managed CLAUDE.md block (importing our boilerplate back into
+ * memory would be noise for every kit user).
+ *
+ * @param {string} text - the rules-file content.
+ * @returns {Array<{text: string, line: number, heading: string|null, type: string}>}
+ */
+export function parseRulesFile(text) {
+  const lines = String(text).split(/\r?\n/);
+  const bullets = [];
+  const plain = [];
+  let heading = null;
+  let inFence = false;
+  let inManagedBlock = false;
+  for (let i = 0; i < lines.length; i++) {
+    const line = lines[i];
+    if (MANAGED_BLOCK_START.test(line)) {
+      inManagedBlock = true;
+      continue;
+    }
+    if (inManagedBlock) {
+      if (MANAGED_BLOCK_END.test(line)) inManagedBlock = false;
+      continue;
+    }
+    if (CODE_FENCE.test(line)) {
+      inFence = !inFence;
+      continue;
+    }
+    if (inFence) continue;
+    const h = HEADING.exec(line);
+    if (h) {
+      heading = h[2].trim();
+      continue;
+    }
+    const item = { line: i + 1, heading, type: inferFactType(heading) };
+    const m = LIST_ITEM.exec(line);
+    if (m && m[1].trim().length >= MIN_CANDIDATE_CHARS) {
+      bullets.push({ ...item, text: m[1].trim() });
+      continue;
+    }
+    const t = line.trim();
+    if (!m && t.length >= MIN_CANDIDATE_CHARS && !t.startsWith('<!--')) {
+      plain.push({ ...item, text: t });
+    }
+  }
+  return bullets.length > 0 ? bullets : plain;
+}
+// Canonical forms already present in memory: every MEMORY.md scratchpad
+// bullet + every granular fact body. Imported fact bodies are the bare rule
+// text, so a re-run canonicalize-matches its own first run here.
+function collectExistingCanonical(projectRoot) {
+  const existing = new Set();
+  const memPath = join(projectRoot, 'context', 'MEMORY.md');
+  if (existsSync(memPath)) {
+    try {
+      for (const line of readFileSync(memPath, 'utf8').split(/\r?\n/)) {
+        const m = LIST_ITEM.exec(line);
+        if (m) {
+          const c = canonicalize(m[1].trim());
+          if (c) existing.add(c);
+        }
+      }
+    } catch {
+      // best-effort: unreadable scratchpad means no dedup hits from it
+    }
+  }
+  const factDir = join(projectRoot, 'context', 'memory');
+  if (existsSync(factDir)) {
+    for (const name of readdirSync(factDir)) {
+      if (!name.endsWith('.md') || name === 'INDEX.md') continue;
+      try {
+        const { body } = parseFrontmatter(readFileSync(join(factDir, name), 'utf8'));
+        const c = canonicalize(String(body ?? '').trim());
+        if (c) existing.add(c);
+      } catch {
+        // skip unparseable files; writeFact's own id dedup still backstops
+      }
+    }
+  }
+  return existing;
+}
+/**
+ * Run the import pipeline.
+ *
+ * @param {object} opts
+ * @param {string} opts.projectRoot
+ * @param {string} [opts.file] - rules file, relative to projectRoot or absolute (default CLAUDE.md)
+ * @param {string} [opts.now]
+ * @param {boolean} [opts.dryRun] - preview proposals; no file modified
+ * @param {boolean} [opts.acceptAll] - apply every proposal (the CLI's --yes)
+ * @param {Function} [opts.writeFactImpl] - test seam (default: the real writeFact)
+ * @returns {Promise<object>}
+ */
+export async function importClaudeMd({
+  projectRoot,
+  file,
+  now,
+  dryRun = false,
+  acceptAll = false,
+  writeFactImpl = writeFact,
+} = {}) {
+  const ts = now ?? nowIso();
+  const t0 = Date.now();
+  if (!projectRoot) {
+    return errorResult({
+      category: ERROR_CATEGORIES.MISSING_PROJECT_ROOT,
+      errors: ['projectRoot is required'],
+      duration_ms: Date.now() - t0,
+    });
+  }
+  const fileRel = file && String(file).trim() ? String(file).trim() : DEFAULT_FILE;
+  const sourcePath = isAbsolute(fileRel) ? fileRel : join(projectRoot, fileRel);
+  const done = (extra) => ({
+    action: 'completed',
+    proposals: [],
+    accepted: 0,
+    skipped: 0,
+    rejected: 0,
+    errors: 0,
+    sourcePath,
+    duration_ms: Date.now() - t0,
+    ...extra,
+  });
+  if (!existsSync(sourcePath)) return done({ reason: 'no-source' });
+  let sourceText;
+  try {
+    sourceText = readFileSync(sourcePath, 'utf8');
+  } catch (err) {
+    return done({ errors: 1, reason: `read-source-failed: ${err?.message ?? err}` });
+  }
+  const existingCanonical = collectExistingCanonical(projectRoot);
+  const tierRoot = join(projectRoot, 'context');
+  const proposals = [];
+  let skipped = 0;
+  // Dry-run / requires-confirmation must not touch ANY file — including the
+  // audit log. Skip entries are only audited when the user actually applied.
+  const auditSkips = acceptAll && !dryRun;
+  for (const candidate of parseRulesFile(sourceText)) {
+    // Sanitize BEFORE canonicalizing so the dedup key matches what writeFact
+    // actually lands on disk (it ids the sanitized body).
+    const sanitized = sanitizeHomePaths(candidate.text);
+    const canonical = canonicalize(sanitized);
+    if (!canonical) continue;
+    const id = generateId('P', sanitized);
+    if (existingCanonical.has(canonical)) {
+      skipped += 1;
+      if (auditSkips) {
+        try {
+          appendAuditEntry(tierRoot, {
+            ts,
+            action: 'import',
+            tier: 'P',
+            id,
+            reasonCode: REASON_CODES.IMPORT_SKIPPED_DUPLICATE,
+            extra: { source: IMPORT_SOURCE },
+          });
+        } catch {
+          // best-effort — never block the import flow on audit-log failure
+        }
+      }
+      continue;
+    }
+    existingCanonical.add(canonical); // same-file duplicates collapse to one proposal
+    proposals.push({
+      text: candidate.text,
+      line: candidate.line,
+      heading: candidate.heading,
+      type: candidate.type,
+      id,
+    });
+  }
+  if (dryRun) return done({ mode: 'dry-run', proposals, skipped });
+  if (!acceptAll && proposals.length > 0) {
+    return done({ mode: 'requires-confirmation', proposals, skipped });
+  }
+  if (proposals.length === 0) return done({ mode: 'apply', skipped });
+  let accepted = 0;
+  let rejected = 0;
+  let errors = 0;
+  // Two distinct rules can share a 60-char slug prefix (slugifyFact caps);
+  // the second would hit writeFact's filename-collision error and be lost.
+  // De-collide within the run by suffixing the (unique) source line.
+  const usedSlugs = new Set();
+  // The committed source_file field must never carry a username from an
+  // absolute --file argument (the D-51 name-privacy class).
+  const sourceFileField = sanitizeHomePaths(fileRel);
+  for (const p of proposals) {
+    const title = p.text.split('\n')[0].slice(0, 80);
+    let slug = slugifyFact(title);
+    if (usedSlugs.has(`${p.type}/${slug}`)) slug = `${slug}-l${p.line}`;
+    usedSlugs.add(`${p.type}/${slug}`);
+    const r = writeFactImpl({
+      tier: 'P',
+      type: p.type,
+      slug,
+      title,
+      body: p.text,
+      writeSource: 'imported',
+      trust: 'medium',
+      sourceFile: sourceFileField,
+      sourceLine: p.line,
+      // Content fingerprint for provenance — NOT a security context. Routes
+      // through the shared hashContent (SHA-256, D-149); see remember-core.mjs.
+      sourceSha1: hashContent(p.text),
+      projectRoot,
+      // writeFact's default create-audit is replaced by the richer-semantic
+      // IMPORT_APPLIED entry below (the merge-facts precedent).
+      audit: false,
+    });
+    if (r.action === 'created') {
+      accepted += 1;
+      try {
+        appendAuditEntry(tierRoot, {
+          ts,
+          action: 'import',
+          tier: 'P',
+          id: r.id,
+          reasonCode: REASON_CODES.IMPORT_APPLIED,
+          paths: { after: r.path },
+          extra: { source: IMPORT_SOURCE, trust: 'medium', write_source: 'imported' },
+        });
+      } catch {
+        // best-effort
+      }
+    } else if (r.action === 'skipped') {
+      skipped += 1;
+    } else if (r.errorCategory === ERROR_CATEGORIES.POISON_GUARD) {
+      // writeFact already logged the rejection to poison-guard.log (Door 4);
+      // count it honestly — a rejected secret is not an "error", it's the
+      // guard doing its job.
+      rejected += 1;
+    } else {
+      errors += 1;
+    }
+  }
+  return done({ mode: 'apply', proposals, accepted, skipped, rejected, errors });
+}

package/src/index-rebuild.mjs CHANGED Viewed

@@ -42,11 +42,11 @@
 // established sources of truth and does NOT re-implement bullet/frontmatter
 // parsing or path resolution.
-import { createHash } from 'node:crypto';
 import { existsSync, readdirSync, readFileSync, statSync } from 'node:fs';
 import { basename, join, relative } from 'node:path';
 import chokidar from 'chokidar';
 import { INDEX_DB_SCHEMA } from './index-db.mjs';
+import { hashContent } from './content-hash.mjs';
 import { syncTranscriptChunks } from './transcript-index.mjs';
 import { readBullet, parseBulletProvenance } from './provenance.mjs';
 import { parse as parseFrontmatter } from './frontmatter.mjs';
@@ -95,8 +95,12 @@ export function listObservationSources({ projectRoot, userDir }) {
 // --- Helpers ----------------------------------------------------------
+// Content fingerprint for the `files`-table mtime+sha1 diff key. The column
+// name stays `sha1` for checkpoint back-compat; hashContent is SHA-256 (D-149).
+// On the first boot after the algorithm change every checkpoint mismatches
+// once and self-heals via the normal reindex.
 function sha1OfContent(content) {
-  return createHash('sha1').update(content, 'utf8').digest('hex');
+  return hashContent(content);
 }
 function isoToEpochMs(iso) {

package/src/index.mjs CHANGED Viewed

@@ -70,6 +70,16 @@ export function buildProgram() {
           childCmd.action(() => sub.action(child.name));
         }
       }
+      // Task 129: a parent that has children AND its own action (e.g. `cmk
+      // config --show-origin <key>`, handled by the parent while get/set are
+      // children) must wire the parent action too — otherwise commander
+      // falls to the default "show help, exit 1" on a bare parent invocation
+      // with a flag. (Caught by the Task-129 live-test: `--show-origin`
+      // printed help instead of running.) Children still take precedence
+      // when a subcommand name is given.
+      if (typeof sub.action === 'function') {
+        cmd.action((...cmdArgs) => sub.action(...cmdArgs));
+      }
     } else {
       cmd.action((...cmdArgs) => sub.action(...cmdArgs));
     }

package/src/inject-context.mjs CHANGED Viewed

@@ -26,14 +26,19 @@ import {
   readdirSync,
   appendFileSync,
   statSync,
+  openSync,
+  readSync,
+  closeSync,
 } from 'node:fs';
 import { spawn } from 'node:child_process';
 import { join } from 'node:path';
 import { homedir } from 'node:os';
-import { SCRATCHPADS_BY_TIER, resolveTierRoot } from './tier-paths.mjs';
+import { SCRATCHPADS_BY_TIER, resolveTierRoot, ID_PATTERN } from './tier-paths.mjs';
 import { nowIso } from './audit-log.mjs';
 import { detectStaleness } from './lazy-compress.mjs';
 import { isProvenanceCommentLine, parseBulletProvenance } from './provenance.mjs';
+import { listConflictQueue } from './conflict-queue.mjs';
+import { listReviewQueue } from './review-queue.mjs';
 // Importance ranking for value-ordered inject eviction (Task 93 / design §19.3).
 // When a tier exceeds its budget we drop the LOWEST-value sections first, not the
@@ -800,7 +805,14 @@ export function injectContext({
   // 7. Emit the Anthropic SessionStart hook output shape (design §5.1 +
   // Anthropic hook protocol). When the snapshot is empty, we still emit
   // the shape so downstream tooling can rely on the field's presence.
+  //
+  // Task 145 (D-130): `systemMessage` is the USER-DISPLAY channel (the
+  // D-116 primary-source check: additionalContext is model-facing,
+  // systemMessage is shown to the user) — one status line per session
+  // start, zero model-token cost. The trust loop every silent system
+  // lacks: when the kit works, the user finally SEES it working.
   const hookOutput = {
+    systemMessage: buildStatusLine({ snapshot, projectRoot, now: ts }),
     hookSpecificOutput: {
       hookEventName: HOOK_EVENT_NAME,
       additionalContext: snapshot,
@@ -816,3 +828,120 @@ export function injectContext({
     bytes: Buffer.byteLength(snapshot, 'utf8'),
   };
 }
+// --- Task 145: the session-start status line (user-display) -------------
+// Tail-read budget for audit.log: recency lives at the end; reading the
+// whole file would grow with project age inside a 500ms-budget hook.
+const STATUS_AUDIT_TAIL_BYTES = 64 * 1024;
+const DAY_MS = 24 * 60 * 60 * 1000;
+// Derived from the shared ID_PATTERN (tier-paths.mjs) — strip its ^/$
+// anchors and wrap in the `(id)` bullet form. One alphabet, one source.
+const SNAPSHOT_ID_RE = new RegExp(`\\((${ID_PATTERN.source.slice(1, -1)})\\)`, 'g');
+/**
+ * One user-facing line summarizing what the kit just did for this session.
+ * Best-effort everywhere: a status line must NEVER turn a working hook into
+ * a crash — every data source degrades to its zero independently.
+ *
+ * @param {object} opts
+ * @param {string} opts.snapshot - the composed injection snapshot.
+ * @param {string} opts.projectRoot
+ * @param {string} [opts.now]
+ * @param {Function} [opts.listConflictsImpl] - test seam (default: the real queue lister).
+ * @param {Function} [opts.listReviewImpl] - test seam.
+ * @returns {string} the status line (always a string, never throws).
+ */
+export function buildStatusLine({
+  snapshot,
+  projectRoot,
+  now,
+  listConflictsImpl,
+  listReviewImpl,
+} = {}) {
+  const prefix = 'claude-memory-kit:';
+  try {
+    // 1. Unique injected fact ids — what the model can actually see.
+    const ids = new Set();
+    for (const m of String(snapshot ?? '').matchAll(SNAPSHOT_ID_RE)) ids.add(m[1]);
+    if (ids.size === 0) {
+      return `${prefix} memory is empty — capture starts this session`;
+    }
+    const parts = [`${ids.size} fact(s) in context`];
+    // 2. Captures in the last 24h, from the audit-log tail. A capture is a
+    // `created` entry or an APPLIED import — `action: 'import'` alone also
+    // covers skipped duplicates (reasonCode import-skipped-duplicate), and
+    // counting those would let a re-run import inflate the line by its
+    // whole dup count (skill-review finding, 2026-06-12).
+    const nowMs = Date.parse(now ?? nowIso());
+    let recent = 0;
+    try {
+      const auditPath = join(projectRoot, 'context', '.locks', 'audit.log');
+      if (existsSync(auditPath)) {
+        // Positioned read of the LAST 64KB only — recency lives at the end,
+        // and this runs inside the 500ms-budget SessionStart hook; reading a
+        // months-old multi-MB log in full would pay for history we discard.
+        const size = statSync(auditPath).size;
+        const start = Math.max(0, size - STATUS_AUDIT_TAIL_BYTES);
+        const buf = Buffer.alloc(size - start);
+        const fd = openSync(auditPath, 'r');
+        try {
+          readSync(fd, buf, 0, buf.length, start);
+        } finally {
+          closeSync(fd);
+        }
+        // Drop the (possibly torn) first line when we started mid-file.
+        const tail = start > 0 ? buf.toString('utf8').replace(/^[^\n]*\n/, '') : buf.toString('utf8');
+        for (const line of tail.split(/\r?\n/)) {
+          if (!line.trim()) continue;
+          try {
+            const e = JSON.parse(line);
+            const isCapture =
+              e.action === 'created' ||
+              (e.action === 'import' && e.reasonCode === 'import-applied');
+            if (
+              isCapture &&
+              nowMs - Date.parse(e.ts) <= DAY_MS &&
+              nowMs - Date.parse(e.ts) >= 0
+            ) {
+              recent += 1;
+            }
+          } catch {
+            // torn NDJSON line — skip
+          }
+        }
+      }
+    } catch {
+      // audit log unreadable — the count degrades to absent
+    }
+    if (recent > 0) parts.push(`${recent} captured in the last 24h`);
+    // 3. Pending curation — only mentioned when non-zero (a quiet queue
+    // earns a quiet line).
+    let conflicts = 0;
+    let review = 0;
+    try {
+      conflicts = (listConflictsImpl ?? listConflictQueue)({ tier: 'P', projectRoot }).length;
+    } catch {
+      // queue unreadable — degrade to zero
+    }
+    try {
+      review = (listReviewImpl ?? listReviewQueue)({ tier: 'P', projectRoot }).length;
+    } catch {
+      // queue unreadable — degrade to zero
+    }
+    if (conflicts > 0 || review > 0) {
+      const q = [];
+      if (conflicts > 0) q.push(`${conflicts} conflict(s)`);
+      if (review > 0) q.push(`${review} review item(s)`);
+      parts.push(`${q.join(' + ')} pending — cmk queue`);
+    }
+    return `${prefix} ${parts.join(', ')}`;
+  } catch {
+    // The line is decoration; the snapshot is the cargo. Never crash.
+    return `${prefix} memory loaded`;
+  }
+}

package/src/install.mjs CHANGED Viewed

@@ -43,6 +43,7 @@ import { spawnSync } from 'node:child_process';
 import { basename, dirname, join, relative, resolve } from 'node:path';
 import { fileURLToPath } from 'node:url';
 import { injectClaudeMdBlock } from './claude-md.mjs';
+import { checkKitBinding, npmSupportsAllowScripts } from './native-binding.mjs';
 import { writeKitHooks, writeKitMcpServer } from './settings-hooks.mjs';
 import { appendAuditEntry, nowIso, REASON_CODES } from './audit-log.mjs';
@@ -58,6 +59,13 @@ const CLI_PKG_DIR = resolve(CLI_SRC_DIR, '..');
 // it must not show a stale hardcode (was `v0.1.0` in every install). Built per
 // install from the kit version; see gitignoreStartMarker().
 const GITIGNORE_END = '# claude-memory-kit:gitignore:end';
+// D-126 CRLF-prevention: the .gitattributes managed block uses the SAME
+// marker discipline as .gitignore (version-stamped start, in-place refresh).
+const GITATTRIBUTES_END = '# claude-memory-kit:gitattributes:end';
+function gitattributesStartMarker(version) {
+  return `# claude-memory-kit:gitattributes:start v${version}`;
+}
 function gitignoreStartMarker(version) {
   return `# claude-memory-kit:gitignore:start v${version}`;
@@ -233,6 +241,52 @@ function buildGitignoreBlock(templateDir, version = getKitVersion()) {
   return `${gitignoreStartMarker(version)}\n${fragment}\n${GITIGNORE_END}\n`;
 }
+/**
+ * Build the canonical .gitattributes managed block from
+ * template/.gitattributes.fragment (D-126 CRLF prevention — force LF on the
+ * committed memory tiers so default Windows git doesn't mangle the bytes at
+ * clone). Same marker discipline as the .gitignore block.
+ */
+function buildGitattributesBlock(templateDir, version = getKitVersion()) {
+  const fragmentPath = join(templateDir, '.gitattributes.fragment');
+  const fragment = existsSync(fragmentPath)
+    ? readFileSync(fragmentPath, 'utf8').trim()
+    : 'context/**/*.md text eol=lf\ncontext/**/*.json text eol=lf';
+  return `${gitattributesStartMarker(version)}\n${fragment}\n${GITATTRIBUTES_END}\n`;
+}
+/**
+ * Inject (or refresh) the managed .gitattributes block. Same algorithm as
+ * injectGitignore (create / append-if-no-markers / replace-in-place),
+ * byte-preserving everything outside the markers.
+ *
+ * Returns: { action: 'created' | 'replaced' | 'unchanged', path: string }
+ */
+function injectGitattributes(projectRoot, block) {
+  const gaPath = join(projectRoot, '.gitattributes');
+  const startRe = /# claude-memory-kit:gitattributes:start[^\n]*\n/;
+  const endRe = /# claude-memory-kit:gitattributes:end\n?/;
+  if (!existsSync(gaPath)) {
+    writeFileSync(gaPath, block, 'utf8');
+    return { action: 'created', path: gaPath };
+  }
+  const existing = readFileSync(gaPath, 'utf8');
+  const startMatch = existing.match(startRe);
+  const endMatch = existing.match(endRe);
+  if (!startMatch || !endMatch || startMatch.index > endMatch.index) {
+    const sep = existing.endsWith('\n') ? '\n' : '\n\n';
+    writeFileSync(gaPath, existing + sep + block, 'utf8');
+    return { action: 'created', path: gaPath };
+  }
+  const before = existing.slice(0, startMatch.index);
+  const after = existing.slice(endMatch.index + endMatch[0].length);
+  const next = before + block + after;
+  if (next === existing) return { action: 'unchanged', path: gaPath };
+  writeFileSync(gaPath, next, 'utf8');
+  return { action: 'replaced', path: gaPath };
+}
 /**
  * Inject (or refresh) the managed .gitignore block in `<projectRoot>/.gitignore`.
  *
@@ -328,6 +382,10 @@ export async function install(options = {}) {
   }
   const gitignore = injectGitignore(projectRoot, buildGitignoreBlock(templateDir, version));
+  // D-126 CRLF prevention: pin LF on the committed memory tiers so default
+  // Windows git can't mangle the bytes at clone (the read-side self-heal
+  // shipped in v0.3.0; this prevents the mangling in the first place).
+  const gitattributes = injectGitattributes(projectRoot, buildGitattributesBlock(templateDir, version));
   // CLAUDE.md loader block — Task 4. Read the block content from the kit's
   // template/ and inject (or refresh) it inside marker delimiters. Never
@@ -433,7 +491,14 @@ export async function install(options = {}) {
     if (!r.ok) errors.push({ path: r.path, error: r.error });
   }
-  return { projectRoot, userTier, created, skipped, gitignore, claudeMd, hooks, mcpServer, semantic, errors };
+  // Task 141a (D-129): probe the kit's native binding so the CLI can ask the
+  // user to fix it INLINE (npm 12 blocks better-sqlite3's binding build on a
+  // fresh install). Reported, never an installer error — scaffold + hooks
+  // are fully functional without it; only search/reindex need the binding.
+  const bindingProbe = options.bindingProbe ?? checkKitBinding;
+  const nativeBinding = bindingProbe();
+  return { projectRoot, userTier, created, skipped, gitignore, gitattributes, claudeMd, hooks, mcpServer, semantic, nativeBinding, errors };
 }
 /**
@@ -470,10 +535,18 @@ export function mergeProjectSettings(projectRoot, patch) {
  */
 export function buildDefaultNpmRunner({ spawnSyncImpl = spawnSync } = {}) {
   return () => {
+    // Task 141a (D-129): on npm ≥ 11.16 the `allow-scripts` config exists
+    // and npm 12 BLOCKS onnxruntime-node's install script without it — the
+    // kit runs this install itself, so it carries the allow flag itself
+    // (no user friction). Older npm: plain command, no unknown-config noise.
+    const { supported } = npmSupportsAllowScripts({ spawnSyncImpl });
+    const cmd = supported
+      ? 'npm install -g @huggingface/transformers --allow-scripts=onnxruntime-node'
+      : 'npm install -g @huggingface/transformers';
     // One constant command string under shell:true (no user input — and
     // an args array + shell:true trips Node's DEP0190). npm is npm.cmd
     // on Windows; the shell resolves it cross-platform.
-    const r = spawnSyncImpl('npm install -g @huggingface/transformers', {
+    const r = spawnSyncImpl(cmd, {
       encoding: 'utf8',
       stdio: 'inherit',
       shell: true,

package/src/mcp-server.mjs CHANGED Viewed

@@ -40,7 +40,7 @@ import { openIndexDb } from './index-db.mjs';
 import { reindexBoot } from './index-rebuild.mjs';
 import { search, SEARCH_MODES } from './search.mjs';
 import { memoryWrite } from './memory-write.mjs';
-import { rememberRich, nonProjectTierNote } from './remember-core.mjs';
+import { rememberRich, nonProjectTierNote, prepareNearDupGuard } from './remember-core.mjs';
 import { forget } from './forget.mjs';
 import { overrideTrust } from './trust.mjs';
 import { lessonsPromote } from './lessons-promote.mjs';
@@ -281,6 +281,10 @@ function makeMkRemember({ projectRoot, userDir }) {
         ],
       };
     }
+    // Task 143 (D-130): the semantic near-dup guard (one embed of the
+    // incoming text when the project is semantic-configured + the embedder
+    // is available; {} = literal pipeline, never blocks capture).
+    const nearDup = await prepareNearDupGuard({ projectRoot, text });
     const r = memoryWrite({
       action: 'add',
       text,
@@ -291,6 +295,7 @@ function makeMkRemember({ projectRoot, userDir }) {
       sessionId: 'mcp-server',
       projectRoot,
       userDir,
+      ...nearDup,
     });
     if (r.action === 'error') {
       return {