npm - @lh8ppl/claude-memory-kit - Versions diffs - 0.2.1 → 0.2.3 - Mend

@lh8ppl/claude-memory-kit 0.2.1 → 0.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (37) hide show

package/README.md +7 -6
package/bin/cmk-capture-prompt.mjs +17 -17
package/bin/cmk-capture-turn.mjs +22 -21
package/bin/cmk-compress-session.mjs +2 -2
package/bin/cmk-inject-context.mjs +11 -11
package/bin/cmk-observe-edit.mjs +17 -16
package/package.json +1 -1
package/src/audit-log.mjs +1 -0
package/src/auto-extract.mjs +258 -6
package/src/auto-persona.mjs +40 -8
package/src/capture-turn.mjs +48 -1
package/src/compress-session.mjs +89 -26
package/src/compressor.mjs +1 -1
package/src/conflict-queue.mjs +14 -0
package/src/doctor.mjs +3 -3
package/src/forget.mjs +29 -0
package/src/graduation.mjs +1 -1
package/src/index-rebuild.mjs +42 -0
package/src/inject-context.mjs +5 -1
package/src/install.mjs +29 -6
package/src/lazy-compress.mjs +58 -9
package/src/mcp-server.mjs +353 -124
package/src/merge-facts.mjs +4 -0
package/src/persona-portability.mjs +24 -1
package/src/read-core.mjs +87 -0
package/src/register-crons.mjs +64 -33
package/src/remember-core.mjs +91 -0
package/src/review-queue.mjs +13 -0
package/src/rich-fact.mjs +46 -0
package/src/settings-hooks.mjs +56 -2
package/src/subcommands.mjs +419 -182
package/src/weekly-curate.mjs +5 -0
package/src/write-fact.mjs +25 -1
package/template/.claude/skills/memory-write/SKILL.md +52 -35
package/template/.gitignore.fragment +9 -3
package/template/CLAUDE.md.template +2 -2
package/template/docs/journey/journey-log.md.template +1 -1

package/src/auto-extract.mjs CHANGED Viewed

@@ -48,8 +48,11 @@ import {
   appendFileSync,
 } from 'node:fs';
 import { join, dirname } from 'node:path';
+import { createHash } from 'node:crypto';
 import { generateId } from '@lh8ppl/cmk-canonicalize';
 import { memoryWrite } from './memory-write.mjs';
+import { writeFact } from './write-fact.mjs';
+import { buildRichFactBody, slugifyFact } from './rich-fact.mjs';
 import { HaikuTimeoutError } from './compressor.mjs';
 import { pidIsAlive } from './lock-discipline.mjs';
 import { nowIso } from './audit-log.mjs';
@@ -284,6 +287,21 @@ export function buildExtractionInstructions() {
     '',
     'Note: assistant-origin candidates are auto-demoted one trust level before routing (HIGH → MEDIUM → LOW → discarded). This is intentional — assistant inferences need user review. Emit your honest trust assessment; the routing layer handles demotion.',
     '',
+    'ALSO — rich fact files (durable project KNOWLEDGE). This is a SEPARATE output from the terse TRUST_ lines. When a turn reveals a durable, substantive piece of project knowledge worth a FULL record — a setup/configuration fact (trigger 3), a project convention (trigger 4), a completed multi-step workflow worth recording (trigger 5), or a tool quirk/workaround (trigger 6) — emit a BEGIN_FACT block (below) INSTEAD OF a terse TRUST_ line for it. Keep terse TRUST_ lines for the LIGHTER signals: user corrections and discovered preferences (triggers 1–2) and active threads. Emit each fact EITHER as a rich BEGIN_FACT block OR as a terse TRUST_ line — NEVER both.',
+    'Format (one block per durable fact):',
+    '  BEGIN_FACT',
+    '  type: project',
+    '  title: <short Title-Case headline, ≤ 80 chars>',
+    '  body: <what is true; if it has parts, give a short labelled markdown breakdown over multiple lines, NOT one vague sentence>',
+    '  why: <why it is true / why it matters — the rationale a future session needs>',
+    '  how: <how the next session should apply it>',
+    '  END_FACT',
+    'Rules for BEGIN_FACT blocks:',
+    '  - body may span multiple lines (markdown bullets are encouraged when the knowledge has parts — make the saved fact genuinely useful to a future session, at least as detailed as a careful hand-written note). Write it as plain markdown on the lines after `body:` — do NOT use a YAML block scalar (`|` or `>`).',
+    '  - title AND body are required; why/how are strongly preferred but optional. type defaults to project.',
+    '  - Do NOT invent facts; synthesize only what the turn shows. Never put a secret, token, password, or key in a block.',
+    '  - These facts are saved automatically (no review step), so be selective: only genuinely durable knowledge, at most a few per turn.',
+    '',
     'ALSO — cross-project doctrine. This is a REQUIRED, PER-FACT pass, separate from the TRUST_ lines above. Re-scan the SAME turn for EVERY fact that expresses how this user works in ALL their projects (tooling habits, how they structure their work, communication / process style — NOT specifics that belong to this ONE project, like a particular value, name, or detail that would not carry to their other projects). **For EACH such cross-project fact, emit its OWN PERSONA CANDIDATE line — one line per fact. If the turn states THREE cross-project rules, emit THREE PERSONA CANDIDATE lines. Never collapse several rules into one line, and never skip a rule because the turn is busy or already has TRUST_ lines.** Format (one line per cross-project fact):',
     '  PERSONA CANDIDATE | target=<HABITS.md|LESSONS.md|USER.md> | section=<Section> | confidence=<high|medium|low> | <one-line restatement>',
     '    - HABITS.md  → sections: Iteration Cadence | Destructive Operations | Communication Style',
@@ -310,7 +328,11 @@ function buildExtractionPrompt({ userTurn, assistantTurn, dedupContext }) {
   return sections.join('\n');
 }
-function parseCandidates(haikuOutput) {
+// Exported for the live-Haiku smoke (spawn-smoke-auto-extract-rich.test.js),
+// which asserts the enriched prompt still elicits parseable terse OR rich
+// output from real Haiku. The terse format is the extraction prompt's contract,
+// same as parseRichFacts above.
+export function parseCandidates(haikuOutput) {
   if (!haikuOutput || typeof haikuOutput !== 'string') return [];
   const lines = haikuOutput.split('\n');
   const candidates = [];
@@ -328,6 +350,127 @@ function parseCandidates(haikuOutput) {
   return candidates;
 }
+// --- Rich-fact parser (Task 103) ------------------------------------
+// Durable project KNOWLEDGE (the six triggers' config / convention / workflow /
+// quirk facts) is emitted by Haiku as a fenced block, parsed here into the
+// fields writeFact() needs. Lives next to parseCandidates + buildExtraction-
+// Instructions — the format and its parser stay together (same as the terse
+// TRUST_ surface). See design §6.4.
+//
+//   BEGIN_FACT
+//   type: project
+//   title: <short title>
+//   body: <summary; MAY continue as markdown bullets on following lines>
+//   why: <rationale>
+//   how: <how to apply>
+//   END_FACT
+//
+// A field's value continues across lines until the next recognized key or the
+// block close — so `body` can hold a multi-line structured breakdown (the
+// native-parity bar). type defaults to 'project' when absent/invalid; a block
+// missing title OR body is skipped (writeFact requires both).
+const RICH_FACT_VALID_TYPES = new Set(['user', 'feedback', 'project', 'reference']);
+const RICH_FACT_KEYS = new Set(['type', 'title', 'body', 'why', 'how']);
+// Defensive per-field cap so a runaway block can't write an unbounded fact body.
+const RICH_FACT_FIELD_CAP = 4000;
+// Match a `key: value` field line. String-based (not a regex) — deterministically
+// linear, no backtracking surface. Semantics: the key must be at the START of
+// the line (no leading whitespace, mirroring an `^key` anchor), with optional
+// whitespace before the colon. Returns {key, value} or null (a continuation /
+// non-key line, e.g. a `- bullet:` inside a body).
+function matchRichFactKey(line) {
+  const idx = line.indexOf(':');
+  if (idx <= 0) return null;
+  const keyPart = line.slice(0, idx);
+  if (keyPart.trimStart().length !== keyPart.length) return null; // leading ws → not a key
+  const key = keyPart.trimEnd().toLowerCase();
+  if (!RICH_FACT_KEYS.has(key)) return null;
+  return { key, value: line.slice(idx + 1).trimStart() };
+}
+// A YAML block-scalar indicator as a field's entire first-line value (`|`,
+// `|-`, `>`, `>+`, `|2`, …). Live Haiku formats a multi-line body as `body: |`
+// then indents the content — we must not keep the literal `|` or the indent.
+const BLOCK_SCALAR_RE = /^[|>][+-]?\d*$/;
+// Normalize a parsed field value: drop a leading block-scalar indicator line,
+// then dedent (strip the common leading whitespace the block scalar adds). A
+// plain single-line value passes through untouched.
+function cleanFieldValue(raw) {
+  const lines = (raw ?? '').split('\n');
+  if (lines.length && BLOCK_SCALAR_RE.test(lines[0].trim())) lines.shift();
+  const indents = lines
+    .filter((l) => l.trim() !== '')
+    .map((l) => (l.match(/^[ \t]*/)?.[0].length ?? 0));
+  const minIndent = indents.length ? Math.min(...indents) : 0;
+  return lines.map((l) => l.slice(minIndent)).join('\n').trim();
+}
+function parseRichFactBlock(blockLines) {
+  const fields = {};
+  let currentKey = null;
+  for (const line of blockLines) {
+    const m = matchRichFactKey(line);
+    if (m) {
+      currentKey = m.key;
+      fields[currentKey] = m.value; // first-line value (may be '' or a `|` scalar)
+    } else if (currentKey) {
+      // Continuation of the current field — multi-line body / why / how.
+      fields[currentKey] += '\n' + line;
+    }
+    // A non-key line before any key is ignored.
+  }
+  const title = cleanFieldValue(fields.title);
+  const body = cleanFieldValue(fields.body);
+  if (!title || !body) return null; // writeFact requires both
+  let type = cleanFieldValue(fields.type).toLowerCase();
+  if (!RICH_FACT_VALID_TYPES.has(type)) type = 'project';
+  const why = cleanFieldValue(fields.why);
+  const how = cleanFieldValue(fields.how);
+  return {
+    type,
+    title: title.slice(0, RICH_FACT_FIELD_CAP),
+    body: body.slice(0, RICH_FACT_FIELD_CAP),
+    why: why ? why.slice(0, RICH_FACT_FIELD_CAP) : '',
+    how: how ? how.slice(0, RICH_FACT_FIELD_CAP) : '',
+  };
+}
+// Exported for direct unit-testing (cli-rich-fact.test.js) — the BEGIN_FACT
+// format is the extraction prompt's contract, pinned independently of a live
+// Haiku call.
+export function parseRichFacts(haikuOutput) {
+  if (!haikuOutput || typeof haikuOutput !== 'string') return [];
+  const lines = haikuOutput.split('\n');
+  const facts = [];
+  let i = 0;
+  while (i < lines.length) {
+    if (lines[i].trim().toUpperCase() !== 'BEGIN_FACT') {
+      i++;
+      continue;
+    }
+    // Collect block lines until END_FACT, the next BEGIN_FACT (missing close —
+    // don't let it swallow the following block), or end-of-output.
+    i++;
+    const blockLines = [];
+    while (i < lines.length) {
+      const marker = lines[i].trim().toUpperCase();
+      if (marker === 'END_FACT') {
+        i++;
+        break;
+      }
+      if (marker === 'BEGIN_FACT') break; // close here; leave i for the outer loop
+      blockLines.push(lines[i]);
+      i++;
+    }
+    const fact = parseRichFactBlock(blockLines);
+    if (fact) facts.push(fact);
+  }
+  return facts;
+}
 // Demote assistant-origin candidates one trust level. User-origin
 // candidates pass through unchanged — they're authoritative.
 // Order: must run BEFORE applyRetainOverride so the override beats
@@ -456,6 +599,45 @@ function routeMedium({ candidate, projectRoot, ts }) {
   return { action: 'queued', id, path: reviewPath };
 }
+// Route a rich fact to the project fact store via writeFact() (Task 103).
+//
+// Direct-to-fact-store (NOT the review queue the terse medium-trust path uses):
+// the point of Task 103 is AUTOMATIC native-parity capture — native writes its
+// fact files with no approval step, so parity requires the same. The fact store
+// is searchable-but-not-full-trust-injected, writeFact already screens every
+// write (home-path sanitize + Poison_Guard + schema + INDEX/reindex), and a
+// later explicit `cmk remember` (trust:high) supersedes. See design §6.4.
+//
+// trust:medium / write_source:auto-extract marks it as a Haiku synthesis
+// (proposal-grade), below the explicit-high tier. The body is built by the SAME
+// rich-fact.mjs helper the explicit path uses, so an auto-extracted fact reads
+// identically to a `cmk remember --why/--how` one.
+function routeRichFact({ candidate, projectRoot, ts }) {
+  const body = buildRichFactBody({
+    text: candidate.body,
+    why: candidate.why,
+    how: candidate.how,
+  });
+  return writeFact({
+    tier: 'P',
+    type: candidate.type,
+    slug: slugifyFact(candidate.title),
+    title: candidate.title,
+    body,
+    writeSource: 'auto-extract',
+    trust: 'medium',
+    sourceFile: 'auto-extract',
+    sourceLine: 1,
+    // Content fingerprint for the provenance field — NOT a security context.
+    // Matches the kit's sha1-of-content convention (write-fact.mjs caller in
+    // subcommands.runRememberRich, memory-write.mjs); writeFact dedups by the
+    // content-addressed id, this is just source_sha1. // NOSONAR
+    sourceSha1: createHash('sha1').update(body).digest('hex'), // NOSONAR
+    createdAt: ts,
+    projectRoot,
+  });
+}
 // --- NDJSON extract.log ---------------------------------------------
 function writeExtractLogEntry({ projectRoot, ts, entry }) {
@@ -614,7 +796,7 @@ export async function runAutoExtract({
         // duration ≈ 25000ms = hitting the cap, not finishing) → automatic
         // capture + persona promotion (F2) silently never ran. This call is
         // DETACHED (fire-and-forget, never blocks the session), so a generous
-        // ceiling is free. Live-test finding (2026-06-01, lior-test-4 baseline).
+        // ceiling is free. Live-test finding (2026-06-01, live-test-4 baseline).
         timeoutMs: 90_000,
       });
       // Touch the cooldown marker IMMEDIATELY after the Haiku call
@@ -668,6 +850,22 @@ export async function runAutoExtract({
     candidates = applyRetainOverride(candidates, retainSegments);
     candidates = dedupByCanonicalId(candidates);
+    // Task 103 — rich fact synthesis on the native-immune Stop-hook path. The
+    // SAME Haiku output may carry BEGIN_FACT blocks (durable project KNOWLEDGE)
+    // alongside the terse TRUST_ lines; route them to the fact store via
+    // writeFact (richer + searchable). No second LLM call — same outputText.
+    const richFacts = parseRichFacts(haikuResult.outputText);
+    // XOR safety net: the prompt asks Haiku to emit a fact as EITHER a rich
+    // block OR a terse line, never both. If it does both for the same fact, the
+    // rich block wins — drop any terse candidate whose canonical id matches a
+    // rich fact's body, so it isn't ALSO written as a MEMORY.md bullet. (Keyed
+    // on the rich fact's raw `body` headline vs the terse `text` — the prompt
+    // enforces the semantic XOR; this catches the exact-restatement case.)
+    if (richFacts.length > 0) {
+      const richIds = new Set(richFacts.map((f) => generateId('P', f.body)));
+      candidates = candidates.filter((c) => !richIds.has(generateId('P', c.text)));
+    }
     // Task 61 — inline cross-project promotion. The SAME Haiku output may
     // carry PERSONA CANDIDATE lines (cross-project doctrine); promote them to
     // the user tier THIS run (vs the weekly auto-persona janitor). No second
@@ -719,10 +917,11 @@ export async function runAutoExtract({
         }
       : {};
-    if (candidates.length === 0 && !personaLanded) {
+    if (candidates.length === 0 && richFacts.length === 0 && !personaLanded) {
       const entry = {
         ...baseEntry,
         ...personaLogFields,
+        rich_facts_written: 0,
         success: true,
         skipped_reason: 'nothing_durable',
         duration_ms: Date.now() - t0,
@@ -735,6 +934,7 @@ export async function runAutoExtract({
         duration_ms: entry.duration_ms,
         logPath,
         candidates: [],
+        richFacts: [],
         persona,
       };
     }
@@ -787,9 +987,57 @@ export async function runAutoExtract({
       }
     }
-    const observation_count = writes.filter(
-      (w) => w.written === 'memory' || w.written === 'review' || w.written === 'conflict',
-    ).length;
+    // 6b. Route rich facts to the fact store (Task 103). Each writeFact is
+    //     isolated in try/catch — a Poison_Guard / schema / collision rejection
+    //     (or an unexpected throw) must NOT take down terse routing or the
+    //     persona pass, exactly like the inline persona isolation above. A
+    //     'created' counts toward observation_count; a 'skipped' (content
+    //     duplicate) is a no-op success that doesn't re-count; anything else is
+    //     'rejected' with its category for analytics (Door 4).
+    const richWrites = [];
+    for (const fact of richFacts) {
+      try {
+        const r = routeRichFact({ candidate: fact, projectRoot, ts });
+        let written;
+        if (r?.action === 'created') written = 'fact';
+        else if (r?.action === 'skipped') written = 'fact-duplicate';
+        else written = 'rejected';
+        const rec = { ...fact, written, result: r };
+        if (written === 'rejected') {
+          rec.rejected_category = r?.errorCategory ?? 'unknown';
+          // Trace the drop (§6.5 don't-lose-without-trace), mirroring the terse
+          // low-discard trace — a rejected rich fact is otherwise invisible once
+          // the detached process exits. TITLE ONLY, never the body: a
+          // poison_guard rejection means the body may carry a secret (the
+          // redacted excerpt is already in poison-guard.log). One NDJSON entry
+          // per rejection (Door 4).
+          writeExtractLogEntry({
+            projectRoot,
+            ts,
+            entry: {
+              event: 'rich_fact_rejected',
+              reason: 'rich_fact_rejected',
+              rejected_category: rec.rejected_category,
+              title: fact.title.slice(0, LOW_DISCARD_EXCERPT_MAX),
+            },
+          });
+        }
+        richWrites.push(rec);
+      } catch (err) {
+        richWrites.push({
+          ...fact,
+          written: 'rejected',
+          rejected_category: 'exception',
+          error: err?.message ?? String(err),
+        });
+      }
+    }
+    const richFactsWritten = richWrites.filter((w) => w.written === 'fact').length;
+    const observation_count =
+      writes.filter(
+        (w) => w.written === 'memory' || w.written === 'review' || w.written === 'conflict',
+      ).length + richFactsWritten;
     // Persona-only turn: no project candidate landed, but cross-project
     // doctrine promoted to the user tier this run. That IS a durable
@@ -799,6 +1047,7 @@ export async function runAutoExtract({
       const entry = {
         ...baseEntry,
         ...personaLogFields,
+        rich_facts_written: richFactsWritten,
         success: true,
         skipped_reason: 'nothing_durable',
         duration_ms: Date.now() - t0,
@@ -811,6 +1060,7 @@ export async function runAutoExtract({
         duration_ms: entry.duration_ms,
         logPath,
         candidates: writes,
+        richFacts: richWrites,
         persona,
       };
     }
@@ -818,6 +1068,7 @@ export async function runAutoExtract({
     const entry = {
       ...baseEntry,
       ...personaLogFields,
+      rich_facts_written: richFactsWritten,
       success: true,
       observation_count,
       duration_ms: Date.now() - t0,
@@ -829,6 +1080,7 @@ export async function runAutoExtract({
       duration_ms: entry.duration_ms,
       logPath,
       candidates: writes,
+      richFacts: richWrites,
       persona,
     };
   } finally {

package/src/auto-persona.mjs CHANGED Viewed

@@ -4,11 +4,11 @@
 // reproduced design §16.16's predicted failure: cross-project doctrine
 // ("how I work everywhere" — venv-3.13, layered-backend) was captured
 // but filed PROJECT-tier; the USER tier stayed empty, collapsing the
-// 3-tier value prop to project+local. Lior won't hand-curate the user
+// 3-tier value prop to project+local. The user won't hand-curate the user
 // tier ("too much of a hassle"), so the user tier must fill itself.
 //
 // Posture (tasks.md 45.6 — supersedes 45.2/45.3's manual gate):
-// OPTIMISTIC AUTO-PROMOTE. Lior 2026-05-30: "i dont want to do
+// OPTIMISTIC AUTO-PROMOTE. The user (2026-05-30): "i dont want to do
 // anything, i want it to be automatic." A synthesized doctrine that
 // applies beyond the current project is auto-promoted to the user tier
 // at trust:medium — no manual `cmk persona accept` step. A confidence
@@ -75,6 +75,11 @@ export const PERSONA_CANDIDATE_RE =
 // userDir is passed through to listObservationSources purely to keep the
 // U-tier resolution sandbox-scoped (never walk the real home dir —
 // design §16.36); we then filter to tier P, the synthesis SOURCE.
+// Byte budget for the `facts` persona corpus (Task 111 / F-2). Bounds the Haiku
+// classifier input so a large project's whole-memory sweep can't blow the timeout.
+// Generous (facts are high-signal) but bounded; whole facts only (see below).
+export const PERSONA_CORPUS_BYTES = 60_000;
 function assembleProjectCorpus({ projectRoot, userDir }) {
   const sources = listObservationSources({ projectRoot, userDir });
   const parts = [];
@@ -94,7 +99,30 @@ function assembleProjectCorpus({ projectRoot, userDir }) {
       parts.push((content ?? '').trim());
     }
   }
-  return parts.filter(Boolean).join('\n\n');
+  // Task 111 (F-2): BOUND the corpus. Previously this joined EVERY tier-P fact
+  // + scratchpad with no cap, so on a real project with substantial memory the
+  // classifier prompt grew unbounded and the Haiku `claude --print` call blew the
+  // timeout (the reported "did not return within 50000ms"). Accumulate WHOLE
+  // facts up to a byte budget (never split a fact mid-body) and mark truncation.
+  // KNOWN LIMITATION (mirrors TRANSCRIPT_WINDOW_BYTES): facts past the budget are
+  // dropped in file-iteration order — a doctrine fact in the tail can be missed
+  // on one pass, but the weekly janitor re-runs, and some doctrine beats a
+  // timed-out zero. A value-ordered (trust/recency-first) accumulation is the
+  // follow-up if a large corpus drops doctrine.
+  const out = [];
+  let used = 0;
+  let truncated = false;
+  for (const part of parts.filter(Boolean)) {
+    const cost = Buffer.byteLength(part, 'utf8') + 2; // +2 for the '\n\n' join
+    if (used + cost > PERSONA_CORPUS_BYTES) {
+      truncated = true;
+      break;
+    }
+    out.push(part);
+    used += cost;
+  }
+  if (truncated) out.push('### …\n(corpus truncated — additional project facts omitted for this pass)');
+  return out.join('\n\n');
 }
 // Default size of the recent-transcript window handed to the SessionEnd persona
@@ -111,7 +139,7 @@ function assembleProjectCorpus({ projectRoot, userDir }) {
 // 40k chars ≈ a long session's worth of turns ≈ ~10k tokens — trivial cost for a
 // once-per-session call, and the classifier prompt's "IGNORE anything specific to
 // this ONE project" instruction guards precision at the larger size (live test:
-// clean 2/2, no false promotes). The exact bound is a lior-test-9 tuning item.
+// clean 2/2, no false promotes). The exact bound is a live-test-9 tuning item.
 // KNOWN LIMITATION (documented, not yet fixed): only the most-recent date-named
 // file is read, so a session spanning midnight loses the pre-midnight turns. Rare;
 // a multi-file read is the follow-up if it bites.
@@ -250,7 +278,7 @@ export function parsePersonaCandidates(outputText) {
  */
 export async function autoPersona(opts = {}) {
   const t0 = Date.now();
-  const { projectRoot, userDir, backend, now, settings, cooldownMs = DEFAULT_COOLDOWN_MS, source = 'facts' } = opts;
+  const { projectRoot, userDir, backend, now, settings, cooldownMs = DEFAULT_COOLDOWN_MS, source = 'facts', timeoutMs = 50_000 } = opts;
   if (!projectRoot) {
     return errorResult({
@@ -302,7 +330,11 @@ export async function autoPersona(opts = {}) {
       instructions: buildClassifierInstructions(source),
       preserveCitationIds: false,
       maxOutputBytes: 4096,
-      timeoutMs: 50_000,
+      // Task 111 (F-2): the timeout is caller-supplied. The SessionEnd hook path
+      // keeps the 50_000 default (it composes with the 60s SessionEnd ceiling per
+      // design §8.5 / D-42). The CLI `cmk persona generate` has NO outer hook
+      // ceiling, so it passes a generous value — the explicit command can wait.
+      timeoutMs,
     });
     // Spent a Haiku call — refresh the shared cooldown marker so the next
     // gated caller backs off. (touch even on cooldownMs:0 cycles: the call
@@ -349,7 +381,7 @@ export async function autoPersona(opts = {}) {
  *     inferred noise. This still holds for every medium/inferred write.
  *   - trust:'high' (explicit path — Task 76 `cmk lessons promote` + Task 78
  *     inline grading of an EXPLICITLY-STATED rule). **45.4 REFINEMENT
- *     (2026-06-02, D-32 — Lior chose "latest explicit wins"):** an explicit,
+ *     (2026-06-02, D-32 — the user chose "latest explicit wins"):** an explicit,
  *     user-attested rule at trust:high MAY supersede an equal-trust same-topic
  *     entry (high >= high → supersede). The newest explicit statement wins,
  *     even over a hand-curated high. The original protection is unchanged for
@@ -359,7 +391,7 @@ export async function autoPersona(opts = {}) {
  */
 // Persist low/medium-confidence (and otherwise-not-promoted) candidates to a
 // durable review-queue FILE at <userDir>/queues/persona-review.md, so they are
-// not lost when only returned in the response (Lior 2026-05-31: "response
+// not lost when only returned in the response (the user, 2026-05-31: "response
 // object can get lost — i dont like it"). Dedup by canonical id against what's
 // already in the file so repeated synthesis passes don't pile up duplicates.
 // Returns the queue path (or null when there's nothing to write).

package/src/capture-turn.mjs CHANGED Viewed

@@ -48,6 +48,9 @@ import {
   appendFileSync,
   readFileSync,
   writeFileSync,
+  readdirSync,
+  statSync,
+  unlinkSync,
 } from 'node:fs';
 import { join } from 'node:path';
 import { spawn } from 'node:child_process';
@@ -57,6 +60,41 @@ function dateFromIso(iso) {
   return String(iso).slice(0, 10);
 }
+// A `.extract-<ts>.tmp` turn-file lives only for the duration of one
+// auto-extract run (bounded by the Stop-hook ceiling, design §8.5). The owning
+// child unlinks it in its `finally`; capture-turn unlinks it here when the spawn
+// fails. But a child KILLED before its finally (hook ceiling), or a Windows
+// unlink refused by a scanner, leaks the temp (cut-gate7 found 2 lingering —
+// D-103 finding E). This janitor sweeps any `.extract-*.tmp` older than the
+// threshold — far longer than any live run, so it can't race an in-flight child.
+// Best-effort: a sweep hiccup must never block the capture.
+const STALE_TURN_FILE_MS = 10 * 60 * 1000; // 10 min — well beyond the hook ceiling
+export function sweepStaleTurnFiles(transcriptsDir, maxAgeMs = STALE_TURN_FILE_MS, now = Date.now()) {
+  let swept = 0;
+  if (!existsSync(transcriptsDir)) return swept;
+  let entries;
+  try {
+    entries = readdirSync(transcriptsDir);
+  } catch {
+    return swept;
+  }
+  for (const name of entries) {
+    if (!name.startsWith('.extract-') || !name.endsWith('.tmp')) continue;
+    const p = join(transcriptsDir, name);
+    try {
+      if (now - statSync(p).mtimeMs > maxAgeMs) {
+        unlinkSync(p);
+        swept += 1;
+      }
+    } catch {
+      // best-effort: a stat/unlink failure (already gone, or briefly locked)
+      // must not abort the sweep or the capture.
+    }
+  }
+  return swept;
+}
 // Write a `phase: 'spawn'` NDJSON entry to `<projectRoot>/context/sessions/{date}.extract.log`
 // when the auto-extract spawn fails. This closes PR-A's class-1 audit
 // deferral (capture-turn Door 5 observability gap). Auto-extract's own
@@ -143,7 +181,7 @@ function readLastUserTurnFromTranscript(transcriptPath) {
 // (context/sessions/now.md). Before this, now.md was fed ONLY by observe-edit's
 // file-write lines ("[ts] Write file=X lines=N"), so the SessionEnd compressor
 // summarized a list of filenames and hallucinated content the dialogue never
-// contained (lior-test-6: "Flask app: app.py" — inferred a framework from a
+// contained (live-test-6: "Flask app: app.py" — inferred a framework from a
 // filename). Buffering the actual user+assistant turns here means the summary
 // reflects what was DISCUSSED. Same `## <ts> — speaker` shape as the transcript
 // so the compressor reads it as dialogue; now.md is truncated after each compress
@@ -281,6 +319,10 @@ export function captureTurn({
   // summarizes the DIALOGUE, not observe-edit's filename log. Best-effort.
   appendConversationToNowMd({ projectRoot, ts, userTurn, assistantTurn: sanitized });
+  // Janitor: clear any orphaned turn-files from a prior killed/crashed child
+  // before writing this turn's (D-103 finding E). Best-effort.
+  sweepStaleTurnFiles(transcriptsDir);
   const turnFile = join(transcriptsDir, `.extract-${Date.now()}.tmp`);
   try {
     writeFileSync(
@@ -316,6 +358,11 @@ export function captureTurn({
       reason: spawnResult.reason,
       error: spawnResult.error,
     });
+    // NB: we do NOT unlink the turn-file here. Ownership is clean — auto-extract
+    // owns deletion (its `finally`); when the spawn fails (or a child is killed
+    // before its finally), the file becomes an orphan that the entry-sweep above
+    // reaps once it's stale (D-103 finding E). capture-turn never deletes a file
+    // it handed off, so tests can still inspect the IPC shape on the no-spawn path.
   }
   return {