npm - clawmem - Versions diffs - 0.8.5 → 0.9.0 - Mend

clawmem 0.8.5 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

package/AGENTS.md +13 -1
package/CLAUDE.md +13 -1
package/README.md +2 -0
package/SKILL.md +14 -1
package/package.json +1 -1
package/src/clawmem.ts +97 -0
package/src/config.ts +14 -3
package/src/hooks/context-surfacing.ts +87 -6
package/src/session-focus.ts +227 -0
package/src/store.ts +5 -0
package/src/vault-facts.ts +506 -0

package/AGENTS.md CHANGED Viewed

@@ -258,7 +258,7 @@ ClawMem hooks handle ~90% of retrieval automatically. Agent-initiated MCP calls
 | Hook | Trigger | Budget | Content |
 |------|---------|--------|---------|
-| `context-surfacing` | UserPromptSubmit | profile-driven (default 800) | retrieval gate → **multi-turn query construction** (v0.8.1: current prompt + up to 2 recent same-session priors from `context_usage.query_text`, 10-min max age, capped at 2000 chars with current-first preservation — used only for discovery: vector/FTS/expansion, NOT for rerank/scoring/snippet extraction) → profile-driven hybrid search (vector if `useVector`, timeout from profile) → FTS supplement → file-aware supplemental search (E13, raw current prompt) → snooze filter → noise filter → spreading activation (E11: co-activated doc boost) → memory type diversification (E10) → tiered injection (HOT/WARM/COLD snippets) → `<vault-context><instruction>…</instruction><facts>…</facts><relationships>…</relationships></vault-context>` (v0.7.1: instruction always prepended when context is returned; relationships block lists memory-graph edges where BOTH endpoints are in the surfaced set, truncated first when over budget) + optional `<vault-routing>` hint. Budget, max results, vector timeout, and min score all driven by `CLAWMEM_PROFILE`. Raw prompt persisted to `context_usage.query_text` for future multi-turn lookback — except on gated skip paths (slash commands, heartbeats, too-short prompts) where the text is withheld for privacy. |
+| `context-surfacing` | UserPromptSubmit | profile-driven (default 800 + factsTokens sub-budget) | retrieval gate → **multi-turn query construction** (v0.8.1: current prompt + up to 2 recent same-session priors from `context_usage.query_text`, 10-min max age, capped at 2000 chars with current-first preservation — used only for discovery: vector/FTS/expansion, NOT for rerank/scoring/snippet extraction) → **session focus topic resolution** (v0.9.0 §11.4: reads per-session focus file at `~/.cache/clawmem/sessions/<id>.focus`, threaded as intent hint to `expandQuery` + `rerank` + `extractSnippet`) → profile-driven hybrid search (vector if `useVector`, timeout from profile) → FTS supplement → file-aware supplemental search (E13, raw current prompt) → snooze filter → noise filter → spreading activation (E11: co-activated doc boost) → composite scoring → **session focus topic boost** (v0.9.0 §11.4: 1.4× match / 0.75× demote floor 50%, NO-OP on zero matches to preserve baseline ordering) → adaptive threshold → memory type diversification (E10) → tiered injection (HOT/WARM/COLD snippets) → `<vault-context><instruction>…</instruction><facts>…</facts><relationships>…</relationships><vault-facts>…</vault-facts></vault-context>` (v0.7.1: instruction always prepended when context is returned; relationships block lists memory-graph edges where BOTH endpoints are in the surfaced set, truncated first when over budget. **v0.9.0 §11.1:** `<vault-facts>` KG injection block appends raw SPO triple lines from entities seeded by the prompt via three-path prompt-only extraction — canonical IDs + proper nouns + longer-first n-grams — with a dedicated `factsTokens` sub-budget per profile (speed=0 disables the stage, balanced=200, deep=250), cross-entity triple dedup, and truncate-at-triple-boundary budget discipline; fail-open on every error path) + optional `<vault-routing>` hint. Budget, max results, vector timeout, min score, and facts sub-budget all driven by `CLAWMEM_PROFILE`. Raw prompt persisted to `context_usage.query_text` for future multi-turn lookback — except on gated skip paths (slash commands, heartbeats, too-short prompts) where the text is withheld for privacy. |
 | `postcompact-inject` | SessionStart (compact) | 1200 tokens | re-injects authoritative context after compaction: precompact state (600) + recent decisions (400) + antipatterns (150) + vault context (200) → `<vault-postcompact>` |
 | `curator-nudge` | SessionStart | 200 tokens | surfaces curator report actions, nudges when report is stale (>7 days) |
 | `precompact-extract` | PreCompact | — | extracts decisions, file paths, open questions → writes `precompact-state.md` to auto-memory. Query-aware decision ranking. Reindexes auto-memory collection. |
@@ -712,6 +712,18 @@ clawmem consolidate [--dry-run] # Find and archive duplicate low-confidence docu
                                 # Uses Jaccard similarity within same collection
 ```
+**Session focus topic (v0.9.0 §11.4):** Per-session topic biasing for context-surfacing. Writes a focus file at `~/.cache/clawmem/sessions/<session_id>.focus` that steers query expansion, reranking, snippet extraction, and post-composite-score topic boost (1.4× match / 0.75× demote, NO-OP on zero matches). Session-isolated — never writes to SQLite or lifecycle columns. The session ID is read from `--session-id <id>`, then `CLAUDE_SESSION_ID`, then `CLAWMEM_SESSION_ID`. When to use: user says "focus on authentication for this session" / "only surface X-related docs right now" / "let's work on Y this session." Clear the focus at the end of the subsession to return to baseline surfacing.
+```bash
+# Set a focus topic for the current session (multi-word OK)
+clawmem focus set "authentication flow"                       # uses CLAUDE_SESSION_ID / CLAWMEM_SESSION_ID env var
+clawmem focus set "authentication flow" --session-id abc123   # explicit
+# Show / clear
+clawmem focus show --session-id abc123
+clawmem focus clear --session-id abc123
+```
 ## Integration Notes
 - **Memory nudge (v0.2.0):** Every N prompts (default 15) without a lifecycle MCP tool call (`memory_pin`/`memory_forget`/`memory_snooze`), context-surfacing appends `<vault-nudge>` prompting proactive memory management. Counter resets on lifecycle tool use. Configure via `CLAWMEM_NUDGE_INTERVAL` (0 to disable).

package/CLAUDE.md CHANGED Viewed

@@ -258,7 +258,7 @@ ClawMem hooks handle ~90% of retrieval automatically. Agent-initiated MCP calls
 | Hook | Trigger | Budget | Content |
 |------|---------|--------|---------|
-| `context-surfacing` | UserPromptSubmit | profile-driven (default 800) | retrieval gate → **multi-turn query construction** (v0.8.1: current prompt + up to 2 recent same-session priors from `context_usage.query_text`, 10-min max age, capped at 2000 chars with current-first preservation — used only for discovery: vector/FTS/expansion, NOT for rerank/scoring/snippet extraction) → profile-driven hybrid search (vector if `useVector`, timeout from profile) → FTS supplement → file-aware supplemental search (E13, raw current prompt) → snooze filter → noise filter → spreading activation (E11: co-activated doc boost) → memory type diversification (E10) → tiered injection (HOT/WARM/COLD snippets) → `<vault-context><instruction>…</instruction><facts>…</facts><relationships>…</relationships></vault-context>` (v0.7.1: instruction always prepended when context is returned; relationships block lists memory-graph edges where BOTH endpoints are in the surfaced set, truncated first when over budget) + optional `<vault-routing>` hint. Budget, max results, vector timeout, and min score all driven by `CLAWMEM_PROFILE`. Raw prompt persisted to `context_usage.query_text` for future multi-turn lookback — except on gated skip paths (slash commands, heartbeats, too-short prompts) where the text is withheld for privacy. |
+| `context-surfacing` | UserPromptSubmit | profile-driven (default 800 + factsTokens sub-budget) | retrieval gate → **multi-turn query construction** (v0.8.1: current prompt + up to 2 recent same-session priors from `context_usage.query_text`, 10-min max age, capped at 2000 chars with current-first preservation — used only for discovery: vector/FTS/expansion, NOT for rerank/scoring/snippet extraction) → **session focus topic resolution** (v0.9.0 §11.4: reads per-session focus file at `~/.cache/clawmem/sessions/<id>.focus`, threaded as intent hint to `expandQuery` + `rerank` + `extractSnippet`) → profile-driven hybrid search (vector if `useVector`, timeout from profile) → FTS supplement → file-aware supplemental search (E13, raw current prompt) → snooze filter → noise filter → spreading activation (E11: co-activated doc boost) → composite scoring → **session focus topic boost** (v0.9.0 §11.4: 1.4× match / 0.75× demote floor 50%, NO-OP on zero matches to preserve baseline ordering) → adaptive threshold → memory type diversification (E10) → tiered injection (HOT/WARM/COLD snippets) → `<vault-context><instruction>…</instruction><facts>…</facts><relationships>…</relationships><vault-facts>…</vault-facts></vault-context>` (v0.7.1: instruction always prepended when context is returned; relationships block lists memory-graph edges where BOTH endpoints are in the surfaced set, truncated first when over budget. **v0.9.0 §11.1:** `<vault-facts>` KG injection block appends raw SPO triple lines from entities seeded by the prompt via three-path prompt-only extraction — canonical IDs + proper nouns + longer-first n-grams — with a dedicated `factsTokens` sub-budget per profile (speed=0 disables the stage, balanced=200, deep=250), cross-entity triple dedup, and truncate-at-triple-boundary budget discipline; fail-open on every error path) + optional `<vault-routing>` hint. Budget, max results, vector timeout, min score, and facts sub-budget all driven by `CLAWMEM_PROFILE`. Raw prompt persisted to `context_usage.query_text` for future multi-turn lookback — except on gated skip paths (slash commands, heartbeats, too-short prompts) where the text is withheld for privacy. |
 | `postcompact-inject` | SessionStart (compact) | 1200 tokens | re-injects authoritative context after compaction: precompact state (600) + recent decisions (400) + antipatterns (150) + vault context (200) → `<vault-postcompact>` |
 | `curator-nudge` | SessionStart | 200 tokens | surfaces curator report actions, nudges when report is stale (>7 days) |
 | `precompact-extract` | PreCompact | — | extracts decisions, file paths, open questions → writes `precompact-state.md` to auto-memory. Query-aware decision ranking. Reindexes auto-memory collection. |
@@ -712,6 +712,18 @@ clawmem consolidate [--dry-run] # Find and archive duplicate low-confidence docu
                                 # Uses Jaccard similarity within same collection
 ```
+**Session focus topic (v0.9.0 §11.4):** Per-session topic biasing for context-surfacing. Writes a focus file at `~/.cache/clawmem/sessions/<session_id>.focus` that steers query expansion, reranking, snippet extraction, and post-composite-score topic boost (1.4× match / 0.75× demote, NO-OP on zero matches). Session-isolated — never writes to SQLite or lifecycle columns. The session ID is read from `--session-id <id>`, then `CLAUDE_SESSION_ID`, then `CLAWMEM_SESSION_ID`. When to use: user says "focus on authentication for this session" / "only surface X-related docs right now" / "let's work on Y this session." Clear the focus at the end of the subsession to return to baseline surfacing.
+```bash
+# Set a focus topic for the current session (multi-word OK)
+clawmem focus set "authentication flow"                       # uses CLAUDE_SESSION_ID / CLAWMEM_SESSION_ID env var
+clawmem focus set "authentication flow" --session-id abc123   # explicit
+# Show / clear
+clawmem focus show --session-id abc123
+clawmem focus clear --session-id abc123
+```
 ## Integration Notes
 - **Memory nudge (v0.2.0):** Every N prompts (default 15) without a lifecycle MCP tool call (`memory_pin`/`memory_forget`/`memory_snooze`), context-surfacing appends `<vault-nudge>` prompting proactive memory management. Counter resets on lifecycle tool use. Configure via `CLAWMEM_NUDGE_INTERVAL` (0 to disable).

package/README.md CHANGED Viewed

@@ -31,6 +31,8 @@ ClawMem turns your markdown notes, project docs, and research dumps into persist
 - **Guards against cross-entity merges** during consolidation — name-aware dual-threshold merge safety compares entity anchors before merging similar observations, preventing "Alice decided X" from merging into "Bob decided X" (v0.7.1)
 - **Prevents context bleed in derived insights** — the Phase 3 deductive synthesis pipeline validates every draft against an anti-contamination wrapper (deterministic entity contamination check + LLM validator + dedupe) before writing cross-session deductive observations (v0.7.1)
 - **Frames surfaced facts as background knowledge** — `context-surfacing` wraps injected content in `<instruction>` + `<facts>` + `<relationships>` blocks, telling the model to treat facts as already-known and exposing memory-graph edges between surfaced docs directly in-prompt (v0.7.1)
+- **Injects knowledge-graph facts as structured triples** — when the user's prompt mentions entities already known to the vault, `context-surfacing` resolves them via a three-path prompt-only extractor (canonical IDs, proper nouns, lowercased n-grams), queries the SPO graph for current-state triples, and appends a `<vault-facts>` block of raw `subject predicate object` lines to `<vault-context>` — off for `speed`, 200 tokens on `balanced`, 250 on `deep`, token-truncated at the triple boundary (v0.9.0)
+- **Session-scoped focus topic boost** — `clawmem focus set "<topic>" --session-id <id>` writes a per-session focus file that steers query expansion, reranking, chunk selection, snippet extraction, and post-composite-score topic boosting (1.4× match / 0.75× demote) for that session only — session-isolated, fail-open, never writes to SQLite or lifecycle columns (v0.9.0)
 - **Scores document quality** using structure, keywords, and metadata richness signals
 - **Boosts co-accessed documents** — notes frequently surfaced together get retrieval reinforcement
 - **Decomposes complex queries** into typed retrieval clauses (BM25/vector/graph) for multi-topic questions

package/SKILL.md CHANGED Viewed

@@ -190,7 +190,7 @@ Hooks handle ~90% of retrieval. Zero agent effort.
 | Hook | Trigger | Budget | Content |
 |------|---------|--------|---------|
-| `context-surfacing` | UserPromptSubmit | profile-driven (default 800) | retrieval gate -> **multi-turn query** (v0.8.1: current + up to 2 recent same-session priors from `context_usage.query_text`, 10-min max age, 2000-char cap with current-first, used only for discovery — not rerank/scoring/snippet) -> profile-driven hybrid search (vector if `useVector`, timeout from profile) -> FTS supplement -> file-aware search (E13, raw current) -> snooze filter -> noise filter -> spreading activation (E11) -> memory type diversification (E10) -> tiered injection (HOT/WARM/COLD) -> `<vault-context><instruction>...</instruction><facts>...</facts><relationships>...</relationships></vault-context>` (v0.7.1: instruction always prepended; relationships list memory-graph edges where BOTH endpoints are in the surfaced set; relationships truncated first when over budget) + optional `<vault-routing>` hint. Budget, max results, vector timeout, min score all driven by `CLAWMEM_PROFILE`. Raw prompt persisted to `context_usage.query_text` for future lookback — gated skip paths (slash commands, heartbeats, too-short prompts) withhold the text for privacy. |
+| `context-surfacing` | UserPromptSubmit | profile-driven (default 800 + factsTokens sub-budget) | retrieval gate -> **multi-turn query** (v0.8.1: current + up to 2 recent same-session priors, discovery only) -> **session focus topic resolution** (v0.9.0 §11.4: reads `~/.cache/clawmem/sessions/<id>.focus`, threaded as intent hint to expansion/rerank/snippet) -> profile-driven hybrid search -> FTS supplement -> file-aware search (E13) -> snooze/noise filters -> spreading activation (E11) -> composite scoring -> **session focus topic boost** (v0.9.0 §11.4: 1.4x match / 0.75x demote, NO-OP on zero matches) -> adaptive threshold -> memory type diversification (E10) -> tiered injection (HOT/WARM/COLD) -> `<vault-context><instruction>...</instruction><facts>...</facts><relationships>...</relationships><vault-facts>...</vault-facts></vault-context>` (v0.7.1: instruction always prepended; relationships = memory-graph edges where BOTH endpoints are in the surfaced set, truncated first when over budget. **v0.9.0 §11.1:** `<vault-facts>` appends raw SPO triple lines when the prompt mentions known entities via three-path extraction (canonical-id regex + proper-noun validation + longer-first n-grams), dedicated `factsTokens` sub-budget per profile (speed=0, balanced=200, deep=250), cross-entity triple dedup, truncate-at-triple-boundary, fail-open on every error path) + optional `<vault-routing>` hint. Budget, max results, vector timeout, min score, facts sub-budget all driven by `CLAWMEM_PROFILE`. Raw prompt persisted to `context_usage.query_text` for future lookback — gated skip paths withhold the text for privacy. |
 | `postcompact-inject` | SessionStart (compact) | 1200 tokens | re-injects authoritative context after compaction: precompact state (600) + decisions (400) + antipatterns (150) + vault context (200) -> `<vault-postcompact>` |
 | `curator-nudge` | SessionStart | 200 tokens | surfaces curator report actions, nudges when report is stale (>7 days) |
 | `precompact-extract` | PreCompact | — | extracts decisions, file paths, open questions -> writes `precompact-state.md`. Query-aware ranking. Reindexes auto-memory. |
@@ -761,6 +761,19 @@ clawmem consolidate [--dry-run] # Find and archive duplicate low-confidence docu
                                 # Jaccard similarity within same collection
 ```
+### Session Focus Topic (v0.9.0 §11.4)
+Per-session topic biasing for context-surfacing. Writes a focus file at `~/.cache/clawmem/sessions/<session_id>.focus` that steers query expansion, reranking, snippet extraction, and post-composite-score topic boost (1.4x match / 0.75x demote, NO-OP on zero matches). Session-isolated — never writes to SQLite or lifecycle columns. Session ID resolved from `--session-id <id>` > `CLAUDE_SESSION_ID` env > `CLAWMEM_SESSION_ID` env.
+**When to use:** user says "focus on X for this session" / "only surface Y right now" / "let's work on Z." Clear at end of subsession to return to baseline.
+```bash
+clawmem focus set "authentication flow"                       # uses CLAUDE_SESSION_ID env
+clawmem focus set "authentication flow" --session-id abc123   # explicit session id
+clawmem focus show --session-id abc123
+clawmem focus clear --session-id abc123
+```
 ---

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "clawmem",
-  "version": "0.8.5",
+  "version": "0.9.0",
   "description": "On-device context engine and memory for AI agents. Claude Code and OpenClaw. Hooks + MCP server + hybrid RAG search.",
   "type": "module",
   "bin": {

package/src/clawmem.ts CHANGED Viewed

@@ -64,6 +64,12 @@ import { precompactExtract } from "./hooks/precompact-extract.ts";
 import { postcompactInject } from "./hooks/postcompact-inject.ts";
 import { pretoolInject } from "./hooks/pretool-inject.ts";
 import { curatorNudge } from "./hooks/curator-nudge.ts";
+import {
+  readSessionFocus,
+  writeSessionFocus,
+  clearSessionFocus,
+  focusFilePath,
+} from "./session-focus.ts";
 enableProductionMode();
@@ -1906,6 +1912,91 @@ async function cmdProfile(args: string[]) {
   }
 }
+// §11.4 (v0.9.0): session-scoped focus topic — read/write/clear the
+// per-session focus file at ~/.cache/clawmem/sessions/<session_id>.focus.
+// The file is the primary signal read by context-surfacing for topic
+// boosting; the CLAWMEM_SESSION_FOCUS env var is a debug-only override
+// that does NOT provide per-session scoping on multi-session hosts.
+async function cmdFocus(args: string[]) {
+  const subCmd = args[0];
+  function resolveSessionId(rest: string[]): string {
+    const sidIdx = rest.indexOf("--session-id");
+    if (sidIdx >= 0 && rest[sidIdx + 1]) return rest[sidIdx + 1]!;
+    const envSid = (
+      process.env.CLAUDE_SESSION_ID ||
+      process.env.CLAWMEM_SESSION_ID ||
+      ""
+    ).trim();
+    if (envSid) return envSid;
+    die(
+      "No session id. Pass --session-id <id>, or set CLAUDE_SESSION_ID " +
+        "(Claude Code exposes this) or CLAWMEM_SESSION_ID env var before " +
+        "invoking this command."
+    );
+  }
+  function stripSessionIdArg(rest: string[]): string[] {
+    const sidIdx = rest.indexOf("--session-id");
+    if (sidIdx < 0) return rest;
+    return [...rest.slice(0, sidIdx), ...rest.slice(sidIdx + 2)];
+  }
+  switch (subCmd) {
+    case "set": {
+      const rest = args.slice(1);
+      const sessionId = resolveSessionId(rest);
+      const positional = stripSessionIdArg(rest);
+      const topic = positional.join(" ").trim();
+      if (!topic) {
+        die("Usage: clawmem focus set <topic> [--session-id <id>]");
+      }
+      try {
+        writeSessionFocus(sessionId, topic);
+      } catch (err: any) {
+        die(`Failed to set focus: ${err?.message ?? err}`);
+      }
+      console.log(
+        `${c.green}Focus set${c.reset} for session ${c.cyan}${sessionId}${c.reset}: ${topic}`
+      );
+      console.log(`${c.dim}File: ${focusFilePath(sessionId)}${c.reset}`);
+      break;
+    }
+    case "show": {
+      const rest = args.slice(1);
+      const sessionId = resolveSessionId(rest);
+      const topic = readSessionFocus(sessionId);
+      if (topic) {
+        console.log(
+          `${c.green}Focus${c.reset} for session ${c.cyan}${sessionId}${c.reset}: ${topic}`
+        );
+        console.log(`${c.dim}File: ${focusFilePath(sessionId)}${c.reset}`);
+      } else {
+        console.log(
+          `${c.yellow}No focus${c.reset} set for session ${c.cyan}${sessionId}${c.reset}.`
+        );
+        console.log(
+          `${c.dim}Expected file: ${focusFilePath(sessionId)}${c.reset}`
+        );
+      }
+      break;
+    }
+    case "clear": {
+      const rest = args.slice(1);
+      const sessionId = resolveSessionId(rest);
+      clearSessionFocus(sessionId);
+      console.log(
+        `${c.green}Focus cleared${c.reset} for session ${c.cyan}${sessionId}${c.reset}.`
+      );
+      break;
+    }
+    default:
+      die(
+        "Usage: clawmem focus <set|show|clear> [<topic>] [--session-id <id>]"
+      );
+  }
+}
 // =============================================================================
 // Main dispatch
 // =============================================================================
@@ -1994,6 +2085,9 @@ async function main() {
       case "profile":
         await cmdProfile(subArgs);
         break;
+      case "focus":
+        await cmdFocus(subArgs);
+        break;
       case "update-context":
         await cmdUpdateContext();
         break;
@@ -2644,6 +2738,9 @@ ${c.bold}Memory:${c.reset}
   clawmem log [--last N]               Session history
   clawmem profile                      Show user profile
   clawmem profile rebuild              Force profile rebuild
+  clawmem focus set <topic> [--session-id ID]   Set per-session focus topic (steers context-surfacing)
+  clawmem focus show [--session-id ID]          Show current focus topic
+  clawmem focus clear [--session-id ID]         Clear focus topic
 ${c.bold}Hooks:${c.reset}
   clawmem hook <name>                  Run hook (stdin JSON)

package/src/config.ts CHANGED Viewed

@@ -84,12 +84,23 @@ export interface ProfileConfig {
   deepEscalation: boolean;
   /** Max time (ms) allowed for the fast path before escalation is considered */
   escalationBudgetMs: number;
+  /**
+   * §11.1 (v0.9.0): sub-budget for the `<vault-facts>` KG injection block.
+   * Dedicated token allowance so `<vault-facts>` cannot steal budget from
+   * the existing `<facts>` / `<relationships>` blocks. `speed` profile is
+   * gated off (factsTokens=0 → stage skipped entirely). `balanced` / `deep`
+   * get 200 / 250 respectively. If the serialized facts would exceed this
+   * sub-budget, truncation happens at the triple boundary. If the total
+   * hook output would push past `tokenBudget + factsTokens`, the whole
+   * `<vault-facts>` block is dropped (established blocks take priority).
+   */
+  factsTokens: number;
 }
 export const PROFILES: Record<PerformanceProfile, ProfileConfig> = {
-  speed:    { tokenBudget: 400,  maxResults: 5,  useVector: false, vectorTimeout: 0,    minScore: 0.55, minScoreRatio: 0.65, absoluteFloor: 0.18, activationFloor: 0.24, thresholdMode: "adaptive", deepEscalation: false, escalationBudgetMs: 0 },
-  balanced: { tokenBudget: 800,  maxResults: 10, useVector: true,  vectorTimeout: 900,  minScore: 0.45, minScoreRatio: 0.55, absoluteFloor: 0.15, activationFloor: 0.20, thresholdMode: "adaptive", deepEscalation: false, escalationBudgetMs: 0 },
-  deep:     { tokenBudget: 1200, maxResults: 15, useVector: true,  vectorTimeout: 2000, minScore: 0.25, minScoreRatio: 0.45, absoluteFloor: 0.12, activationFloor: 0.16, thresholdMode: "adaptive", deepEscalation: true,  escalationBudgetMs: 4000 },
+  speed:    { tokenBudget: 400,  maxResults: 5,  useVector: false, vectorTimeout: 0,    minScore: 0.55, minScoreRatio: 0.65, absoluteFloor: 0.18, activationFloor: 0.24, thresholdMode: "adaptive", deepEscalation: false, escalationBudgetMs: 0,    factsTokens: 0   },
+  balanced: { tokenBudget: 800,  maxResults: 10, useVector: true,  vectorTimeout: 900,  minScore: 0.45, minScoreRatio: 0.55, absoluteFloor: 0.15, activationFloor: 0.20, thresholdMode: "adaptive", deepEscalation: false, escalationBudgetMs: 0,    factsTokens: 200 },
+  deep:     { tokenBudget: 1200, maxResults: 15, useVector: true,  vectorTimeout: 2000, minScore: 0.25, minScoreRatio: 0.45, absoluteFloor: 0.12, activationFloor: 0.16, thresholdMode: "adaptive", deepEscalation: true,  escalationBudgetMs: 4000, factsTokens: 250 },
 };
 export function getActiveProfile(): ProfileConfig {

package/src/hooks/context-surfacing.ts CHANGED Viewed

@@ -31,6 +31,12 @@ import { sanitizeSnippet } from "../promptguard.ts";
 import { shouldSkipRetrieval, isRetrievedNoise } from "../retrieval-gate.ts";
 import { MAX_QUERY_LENGTH } from "../limits.ts";
 import { writeRecallEvents, hashQuery } from "../recall-buffer.ts";
+import { resolveSessionTopic, applyTopicBoost } from "../session-focus.ts";
+import {
+  extractPromptEntities,
+  buildVaultFactsBlock,
+  type VaultFactsTriple,
+} from "../vault-facts.ts";
 // =============================================================================
 // Config
@@ -143,6 +149,20 @@ export async function contextSurfacing(
   const tokenBudget = profile.tokenBudget;
   const startTime = Date.now();
+  // §11.4: Resolve session-scoped focus topic. Primary signal is the
+  // per-session focus file at ~/.cache/clawmem/sessions/<id>.focus
+  // (file > env var precedence via resolveSessionTopic). Env var
+  // CLAWMEM_SESSION_FOCUS is a debug-only override and does NOT
+  // provide per-session scoping on multi-session hosts. Used as
+  // (a) optional `intent` on expandQuery/rerank/extractSnippet call
+  // sites below, and (b) the driver for the post-composite topic
+  // boost stage. Fail-open: missing / unreadable / corrupt / empty /
+  // oversized focus file → undefined → every consumer no-ops.
+  const sessionTopic = resolveSessionTopic(
+    input.sessionId,
+    process.env.CLAWMEM_SESSION_FOCUS
+  );
   const isRecency = hasRecencyIntent(prompt);
   const minScore = isRecency ? MIN_COMPOSITE_SCORE_RECENCY : profile.minScore;
@@ -239,7 +259,7 @@ export async function contextSurfacing(
     if (elapsed < profile.escalationBudgetMs) {
       try {
         // Phase 1: Query expansion — discover candidates BM25+vector missed
-        const expanded = await store.expandQuery(retrievalQuery, DEFAULT_QUERY_MODEL);
+        const expanded = await store.expandQuery(retrievalQuery, DEFAULT_QUERY_MODEL, sessionTopic);
         if (expanded.length > 0) {
           const seen = new Set(results.map(r => r.filepath));
           for (const eq of expanded.slice(0, 3)) {
@@ -263,7 +283,7 @@ export async function contextSurfacing(
             file: r.filepath,
             text: (r.body || "").slice(0, 2000),
           }));
-          const reranked = await store.rerank(prompt, toRerank, DEFAULT_RERANK_MODEL);
+          const reranked = await store.rerank(prompt, toRerank, DEFAULT_RERANK_MODEL, sessionTopic);
           if (reranked.length > 0) {
             const rerankedMap = new Map(reranked.map(r => [r.file, r.score]));
             // Blend: 60% original score + 40% reranker score for stability
@@ -335,6 +355,15 @@ export async function contextSurfacing(
   // Apply composite scoring
   const allScored = applyCompositeScoring(enriched, prompt);
+  // §11.4: Session-scoped topic boost — post-composite, pre-threshold.
+  // Boosts docs whose title/path/body match all tokens of the declared
+  // session focus topic (1.4×); demotes non-matching docs (0.75×, floor
+  // 50%). Mutates compositeScore in place and re-sorts. Fail-open: no
+  // topic set → no-op (byte-identical pre-§11.4 output).
+  if (sessionTopic) {
+    applyTopicBoost(allScored, sessionTopic, { boostFactor: 1.4, demoteFactor: 0.75 });
+  }
   // Threshold filtering — adaptive (ratio-based) or absolute (legacy)
   let scored: typeof allScored;
   if (profile.thresholdMode === "adaptive") {
@@ -400,7 +429,7 @@ export async function contextSurfacing(
   // in afterward using whatever budget remains and are the first thing
   // truncated when the payload would overflow.
   const factsBudget = Math.max(0, tokenBudget - INSTRUCTION_TOKEN_COST);
-  const { context, paths, tokens } = buildContext(scored, prompt, factsBudget);
+  const { context, paths, tokens } = buildContext(scored, prompt, factsBudget, sessionTopic);
   if (!context) {
     logEmptyTurn(store, input, prompt);
@@ -489,9 +518,60 @@ export async function contextSurfacing(
   );
   const vaultInner = buildVaultContextInner(context, relationSnippets, relationBudget);
+  // §11.1 (v0.9.0): `<vault-facts>` KG injection.
+  //
+  // Stage ordering (frozen in BACKLOG.md §11.1): retrieval + rerank +
+  // scoring + topic boost (§11.4) + threshold + diversification → build
+  // <facts>/<relationships> → compute remaining facts-block budget →
+  // inject <vault-facts> if entities resolve AND budget allows.
+  //
+  // Prompt-only seeding (HARD CONSTRAINT): entity seeds come from the
+  // raw user prompt ONLY, never from `surfacedDocs[i].body`, snippets,
+  // or any retrieval-phase field. Without this, a topic-boosted
+  // off-topic doc (§11.4) could pollute the facts block with facts
+  // about entities that have nothing to do with the user's actual
+  // prompt.
+  //
+  // Profile-gated via `profile.factsTokens`: `speed` profile sets this
+  // to 0, which naturally disables the stage. `balanced`/`deep` get a
+  // dedicated sub-budget that cannot steal from <facts>/<relationships>.
+  //
+  // Fail-open: any DB error, empty entity set, empty triple set, or
+  // budget-too-small case returns the baseline `vaultInner` unchanged
+  // (byte-identical pre-§11.1 output).
+  let vaultInnerWithFacts = vaultInner;
+  if (profile.factsTokens > 0) {
+    try {
+      const entities = extractPromptEntities(prompt, store.db, "default");
+      if (entities.length > 0) {
+        const queryTriples = (entityId: string): VaultFactsTriple[] =>
+          store
+            .queryEntityTriples(entityId)
+            .map(t => ({
+              subject: t.subject,
+              predicate: t.predicate,
+              object: t.object,
+              validTo: t.validTo,
+              confidence: t.confidence,
+            }));
+        const factsBlock = buildVaultFactsBlock(
+          entities,
+          queryTriples,
+          profile.factsTokens,
+          { estimateTokens }
+        );
+        if (factsBlock) {
+          vaultInnerWithFacts = `${vaultInner}\n${factsBlock}`;
+        }
+      }
+    } catch {
+      /* fail-open: degraded vault behaves identically to pre-§11.1 */
+    }
+  }
   const parts: string[] = [];
   if (routingHint) parts.push(`<vault-routing>${routingHint}</vault-routing>`);
-  parts.push(`<vault-context>\n${vaultInner}\n</vault-context>`);
+  parts.push(`<vault-context>\n${vaultInnerWithFacts}\n</vault-context>`);
   if (nudge) parts.push(`<vault-nudge>${NUDGE_TEXT}</vault-nudge>`);
   return makeContextOutput("context-surfacing", parts.join("\n"));
@@ -552,7 +632,8 @@ function detectRoutingHint(prompt: string): string | null {
 function buildContext(
   scored: ScoredResult[],
   query: string,
-  budget: number = DEFAULT_TOKEN_BUDGET
+  budget: number = DEFAULT_TOKEN_BUDGET,
+  intent?: string
 ): { context: string; paths: string[]; tokens: number } {
   const lines: string[] = [];
   const paths: string[] = [];
@@ -579,7 +660,7 @@ function buildContext(
       if (sanitized === "[content filtered for security]") continue;
       const snippet = smartTruncate(
-        extractSnippet(sanitized, query, tier.snippetLen, r.chunkPos).snippet,
+        extractSnippet(sanitized, query, tier.snippetLen, r.chunkPos, intent).snippet,
         tier.snippetLen
       );
       entry = `**${safeTitle}**${typeTag}\n${safePath}\n${snippet}`;

package/src/session-focus.ts ADDED Viewed

@@ -0,0 +1,227 @@
+/**
+ * Session-Scoped Focus (§11.4 — v0.9.0)
+ *
+ * Per-session topic primitive that biases context-surfacing ranking toward
+ * docs relevant to the declared working context — WITHOUT persisting any
+ * state to SQLite. Intra-session curation that cannot contaminate other
+ * sessions.
+ *
+ * Primary signal: per-session state file at
+ *   ~/.cache/clawmem/sessions/<session_id>.focus
+ *
+ * The env var CLAWMEM_SESSION_FOCUS is a DEBUG-ONLY override: it bypasses
+ * the per-session file entirely, and because it is a single process-wide
+ * variable it does NOT provide per-session scoping in multi-session host
+ * processes (e.g. a long-lived MCP server handling multiple Claude Code
+ * sessions). Use the file path for correctness; use the env var for
+ * ad-hoc single-session debugging only.
+ *
+ * All read paths are fail-open. Unreadable, corrupt, empty, missing,
+ * invalid-UTF-8, or oversized focus files return undefined and the
+ * caller proceeds with baseline ranking (byte-identical to pre-§11.4).
+ * The stage must NEVER half-apply a malformed topic.
+ */
+import * as fs from "fs";
+import * as path from "path";
+import * as os from "os";
+import type { ScoredResult } from "./memory.ts";
+const MAX_TOPIC_LEN = 256;
+/**
+ * Resolve the root directory for session focus files. Defaults to
+ * `~/.cache/clawmem/sessions`, overridable via `CLAWMEM_FOCUS_ROOT`.
+ * The override is primarily a test hook (so `bun:test` can redirect
+ * writes to a tmp dir) but is also safe to use in production if an
+ * operator wants to relocate the focus files out of `$HOME`.
+ *
+ * Computed lazily on every call so env-var changes in tests take
+ * effect without module reload.
+ */
+export function focusRoot(): string {
+  const override = process.env.CLAWMEM_FOCUS_ROOT;
+  if (override && override.trim().length > 0) return override;
+  return path.join(os.homedir(), ".cache", "clawmem", "sessions");
+}
+export function focusFilePath(sessionId: string): string {
+  return path.join(focusRoot(), `${sessionId}.focus`);
+}
+/**
+ * Read the session focus topic. Returns undefined on any failure:
+ * - sessionId missing/empty
+ * - file does not exist
+ * - file unreadable (permissions, etc.)
+ * - file empty or whitespace-only
+ * - file exceeds MAX_TOPIC_LEN
+ * - file contains invalid UTF-8 (readFileSync throws)
+ *
+ * Never throws. Caller treats undefined as "no topic set" and skips
+ * the boost stage entirely.
+ */
+export function readSessionFocus(sessionId?: string): string | undefined {
+  if (!sessionId) return undefined;
+  try {
+    const p = focusFilePath(sessionId);
+    if (!fs.existsSync(p)) return undefined;
+    const raw = fs.readFileSync(p, { encoding: "utf-8" });
+    const topic = raw.trim();
+    if (!topic) return undefined;
+    if (topic.length > MAX_TOPIC_LEN) return undefined;
+    return topic;
+  } catch {
+    return undefined;
+  }
+}
+/**
+ * Write a session focus topic. Creates the sessions directory if needed.
+ * Overwrites any existing file. Throws on invalid input or I/O errors
+ * (caller surface — CLI command that should fail loudly on misuse).
+ */
+export function writeSessionFocus(sessionId: string, topic: string): void {
+  if (!sessionId || !sessionId.trim()) {
+    throw new Error("writeSessionFocus: sessionId required");
+  }
+  const trimmed = topic.trim();
+  if (!trimmed) {
+    throw new Error("writeSessionFocus: topic required");
+  }
+  if (trimmed.length > MAX_TOPIC_LEN) {
+    throw new Error(`writeSessionFocus: topic exceeds max length ${MAX_TOPIC_LEN}`);
+  }
+  fs.mkdirSync(focusRoot(), { recursive: true });
+  fs.writeFileSync(focusFilePath(sessionId), trimmed, { encoding: "utf-8" });
+}
+/**
+ * Clear a session focus. No-op if the file does not exist.
+ * Never throws (caller is typically "revert ranking to baseline").
+ */
+export function clearSessionFocus(sessionId: string): void {
+  if (!sessionId) return;
+  try {
+    const p = focusFilePath(sessionId);
+    if (fs.existsSync(p)) fs.unlinkSync(p);
+  } catch {
+    /* ignore — clearing is best-effort */
+  }
+}
+/**
+ * Resolve the effective session focus topic by checking the per-session
+ * focus file first, then falling back to a provided env-var value (the
+ * CLAWMEM_SESSION_FOCUS debug override). Returns undefined when neither
+ * yields a valid topic.
+ *
+ * Precedence is file > env var because the file is the only signal
+ * that provides per-session scoping on multi-session host processes.
+ * Exposed here (rather than inlined at the call site) so the hook's
+ * precedence logic can be unit-tested directly without spinning up a
+ * full contextSurfacing invocation.
+ *
+ * Never throws. Never logs. Every failure path returns undefined and
+ * the caller treats that as "no topic set" (byte-identical to
+ * pre-§11.4 hook behavior).
+ */
+export function resolveSessionTopic(
+  sessionId: string | undefined,
+  envVar: string | undefined
+): string | undefined {
+  const fromFile = readSessionFocus(sessionId);
+  if (fromFile) return fromFile;
+  const fromEnv = envVar?.trim();
+  if (fromEnv) return fromEnv;
+  return undefined;
+}
+/**
+ * Case-insensitive tokenized AND-match against title + displayPath + body.
+ * Tokens shorter than 2 chars are dropped (common stopwords and typos).
+ * Returns true only if every remaining token appears in the haystack.
+ */
+function matchesTopic(result: ScoredResult, topic: string): boolean {
+  const tokens = topic
+    .toLowerCase()
+    .split(/\s+/)
+    .map(t => t.trim())
+    .filter(t => t.length >= 2);
+  if (tokens.length === 0) return false;
+  const haystack = [
+    result.title || "",
+    result.displayPath || "",
+    (result.body || "").slice(0, 800),
+  ]
+    .join(" ")
+    .toLowerCase();
+  return tokens.every(t => haystack.includes(t));
+}
+export interface TopicBoostOptions {
+  /** Multiplier applied to docs whose title/path/body match all topic tokens. Default 1.4. */
+  boostFactor?: number;
+  /**
+   * Multiplier applied to non-matching docs. Default 0.75.
+   * Clamped to a 0.5 floor so the boost is a re-ranker, not a hide —
+   * non-matching docs are demoted but never suppressed to zero.
+   */
+  demoteFactor?: number;
+}
+/**
+ * Apply session-topic boost/demote to a scored result set as a POST-COMPOSITE
+ * reranking pass. Runs AFTER applyCompositeScoring(...) and BEFORE threshold
+ * filtering (the specific architectural placement Codex approved in Turn 1 of
+ * the v0.9.0 design review).
+ *
+ * Behavior:
+ *   - Empty/undefined topic → returns input unchanged (no-op, byte-identical).
+ *   - Topic present but ZERO docs match → returns input unchanged (no-op).
+ *     This is the fail-open contract from the approved §11.4 spec: "topic
+ *     set + zero matching docs → proceed with the normal results." Without
+ *     this short-circuit, uniformly demoting every doc would push some
+ *     below the downstream threshold filter and silently shrink the
+ *     result set — a regression vs the no-topic baseline.
+ *     (Caught by Codex in §11.4 code review Turn 1, 2026-04-13.)
+ *   - Topic present AND at least one match → each result's compositeScore
+ *     is multiplied by either boostFactor (matching) or demoteFactor
+ *     (non-matching), then results are re-sorted descending.
+ *
+ * Matching is computed exactly once per result in a pre-pass so the
+ * short-circuit can decide without double-evaluating the token match.
+ *
+ * This is a pure function over the scored set — it does NOT call the DB,
+ * does NOT write SQLite state, does NOT touch any lifecycle column.
+ * Mutates compositeScore in place (consistent with existing scoring
+ * helpers in this codebase; single caller, single thread).
+ */
+export function applyTopicBoost<T extends ScoredResult>(
+  scored: T[],
+  topic: string | undefined,
+  options: TopicBoostOptions = {}
+): T[] {
+  if (!topic || !topic.trim()) return scored;
+  if (scored.length === 0) return scored;
+  const boostFactor = options.boostFactor ?? 1.4;
+  const demoteFactor = Math.max(options.demoteFactor ?? 0.75, 0.5);
+  // Pre-compute per-result match flags so we can early-return on zero
+  // matches without double-evaluating matchesTopic during the mutation
+  // pass. Caching is also a (small) perf win for any single call.
+  const matches = scored.map(r => matchesTopic(r, topic));
+  const anyMatch = matches.some(Boolean);
+  if (!anyMatch) return scored; // fail-open: baseline ordering preserved
+  for (let i = 0; i < scored.length; i++) {
+    const factor = matches[i] ? boostFactor : demoteFactor;
+    scored[i]!.compositeScore = scored[i]!.compositeScore * factor;
+  }
+  scored.sort((a, b) => b.compositeScore - a.compositeScore);
+  return scored;
+}

package/src/store.ts CHANGED Viewed

@@ -711,6 +711,11 @@ function initializeDatabase(db: Database): void {
   db.exec(`CREATE INDEX IF NOT EXISTS idx_entity_nodes_type ON entity_nodes(entity_type)`);
   db.exec(`CREATE INDEX IF NOT EXISTS idx_entity_nodes_vault ON entity_nodes(vault)`);
   db.exec(`CREATE INDEX IF NOT EXISTS idx_entity_nodes_mentions ON entity_nodes(mention_count DESC)`);
+  // §11.1 (v0.9.0): expression index backing the `LOWER(name) IN (...) AND vault = ?`
+  // batch lookup used by the context-surfacing entity-detection hot path.
+  // Without this index the batch query devolves into a full scan on large vaults.
+  // Idempotent via IF NOT EXISTS — existing vaults pick it up on next open.
+  db.exec(`CREATE INDEX IF NOT EXISTS idx_entity_nodes_lower_name ON entity_nodes(LOWER(name), vault)`);
   // Entity mentions: entity ↔ document junction table
   db.exec(`

package/src/vault-facts.ts ADDED Viewed

@@ -0,0 +1,506 @@
+/**
+ * §11.1 — `<vault-facts>` KG injection for context-surfacing (v0.9.0)
+ *
+ * Prompt-only entity detection + exact-match validation + triple query +
+ * token-budgeted XML serialization. Wires the SPO knowledge graph
+ * (populated by v0.8.5 decision-extractor + A-MEM enrichment) into the
+ * retrieval hot path without ever reading from ranked documents.
+ *
+ * Hard constraint from the approved design (§11.1, prompt-only seeding):
+ * entity seeds come from `input.prompt` text ONLY, never from ranked
+ * document bodies or snippets. Without this, a topic-boosted off-topic
+ * doc (§11.4) could pollute the `<vault-facts>` block with facts about
+ * entities that have nothing to do with the user's actual prompt.
+ *
+ * Three-path candidate generation (BACKLOG §11.1 "Concrete implementation"):
+ *   (a) canonical-ID regex `/^[a-z][a-z0-9-]*:[a-z_]+:[a-z0-9_]+$/`
+ *   (b) proper-noun extraction (capitalized tokens + all-caps acronyms)
+ *   (c) normalized n-gram scan against entity_nodes.name (1-3 grams,
+ *       keep internal hyphens whole, batch SQL lookup via
+ *       `WHERE LOWER(name) IN (?, ?, ...) AND vault = ?` backed by
+ *       the `idx_entity_nodes_lower_name` expression index added in
+ *       the v0.9.0 schema migration)
+ *
+ * Per-path validate-then-count ordering (Codex §11.1 Turn 5):
+ *   - Path (a): validate via direct PK lookup, count immediately.
+ *   - Path (b): validate via `resolveEntityTypeExact` BEFORE counting —
+ *     only non-null results consume budget. Raw capitalized tokens that
+ *     fail validation are dropped silently without starving path (c).
+ *   - Path (c): validated hits fill remaining slots up to the 100-cap.
+ *     Within path (c): 3-grams > 2-grams > 1-grams; prompt order is
+ *     the final tie-breaker within each length class.
+ *
+ * Cross-path dedup: path (b) / (c) hits that resolve to the same
+ * entity_id as an earlier path (a) hit are no-ops — they do not
+ * consume a second cap slot.
+ *
+ * Fail-open discipline (BACKLOG §11.1 "CRITICAL fail-open requirement"):
+ *   - Empty prompt / zero candidates → return [] (caller skips stage).
+ *   - SQLite error during any lookup → caught per-candidate, silent skip.
+ *   - Empty triples for every validated entity → return null from
+ *     buildVaultFactsBlock (caller omits the block entirely).
+ *   - Token budget too small to fit even one triple → return null.
+ *   - Exact-match ambiguity (two entities with the same name) → skip
+ *     that entity via `resolveEntityTypeExact` returning null.
+ */
+import type { Database } from "bun:sqlite";
+import { resolveEntityTypeExact, ensureEntityCanonical } from "./entity.ts";
+// =============================================================================
+// Constants
+// =============================================================================
+/** Hard upper bound on the number of VALIDATED entity candidates per prompt. */
+const CANDIDATE_CAP = 100;
+/** Maximum n-gram length (inclusive). 3-grams provide the best recall
+ *  vs. signal trade-off per the Codex Turn 3 analysis; 4-grams dilute. */
+const MAX_NGRAM_LEN = 3;
+/**
+ * Canonical entity ID shape: `vault:type:slug`. The slug segment can
+ * include hyphens (e.g. `skill:tool:forge-stack`). Use a non-hyphen
+ * boundary on both ends so a trailing `.` or `,` doesn't swallow the
+ * last character but interior hyphens survive intact.
+ */
+const CANONICAL_ID_REGEX = /(?<![a-zA-Z0-9_-])[a-z][a-z0-9-]*:[a-z_]+:[a-z0-9_](?:[a-z0-9_-]*[a-z0-9_])?(?![a-zA-Z0-9_-])/g;
+/**
+ * Proper-noun shape: capitalized first letter + optional mixed case, OR
+ * all-caps acronyms (2+ chars). Matches `ClawMem`, `OAuth`, `API`, `Bun`,
+ * `PostgreSQL`, `JWT`, etc. Intentionally does NOT match lowercase
+ * technical identifiers like `clawmem`, `forge-stack`, `oauth2` — those
+ * are the job of path (c) n-gram scanning.
+ */
+const PROPER_NOUN_REGEX = /\b(?:[A-Z][a-z0-9]+(?:[A-Z][a-z0-9]*)*|[A-Z]{2,}[a-z0-9]*)\b/g;
+// =============================================================================
+// Types
+// =============================================================================
+export interface ValidatedEntity {
+  /** Canonical `vault:type:slug` entity id for querying triples. */
+  entityId: string;
+  /** Lowercase name used internally for dedup / audit. */
+  name: string;
+  /** Entity type as stored in `entity_nodes.entity_type`. */
+  type: string;
+  /** Which extraction path surfaced this candidate (for debugging). */
+  sourcePath: "canonical-id" | "proper-noun" | "ngram";
+}
+export interface NgramCandidate {
+  /** Lowercase / whitespace-normalized n-gram text. */
+  normalized: string;
+  /** N-gram length: 1, 2, or 3. Used for the longer-first tie-breaker. */
+  length: 1 | 2 | 3;
+  /** First-token index in the prompt, for stable prompt-order tie-break. */
+  promptOrder: number;
+}
+/** Lightweight shape of a knowledge-graph triple the caller needs for serialization. */
+export interface VaultFactsTriple {
+  subject: string;
+  predicate: string;
+  object: string;
+  validTo: string | null;
+  confidence: number;
+}
+/** Function shape used to query triples for a single entity id.
+ *  Decoupled from `Store` so unit tests can inject a mock. */
+export type TripleQueryFn = (entityId: string) => VaultFactsTriple[];
+// =============================================================================
+// Path (a) — canonical-ID regex
+// =============================================================================
+/**
+ * Extract all canonical-ID matches from a prompt. Deduplicated preserving
+ * first-occurrence order. Purely syntactic — does NOT consult the DB.
+ */
+export function extractCanonicalIds(prompt: string): string[] {
+  if (!prompt) return [];
+  const matches = prompt.match(CANONICAL_ID_REGEX) ?? [];
+  const seen = new Set<string>();
+  const out: string[] = [];
+  for (const m of matches) {
+    if (seen.has(m)) continue;
+    seen.add(m);
+    out.push(m);
+  }
+  return out;
+}
+// =============================================================================
+// Path (b) — proper-noun extraction
+// =============================================================================
+/**
+ * Extract all proper-noun-shaped tokens from a prompt. Deduplicated
+ * preserving first-occurrence order. Purely syntactic — does NOT consult
+ * the DB. Validation happens via `resolveEntityTypeExact` at the
+ * per-path budget-accounting step, NOT here.
+ */
+export function extractProperNouns(prompt: string): string[] {
+  if (!prompt) return [];
+  const matches = prompt.match(PROPER_NOUN_REGEX) ?? [];
+  const seen = new Set<string>();
+  const out: string[] = [];
+  for (const m of matches) {
+    if (seen.has(m)) continue;
+    seen.add(m);
+    out.push(m);
+  }
+  return out;
+}
+// =============================================================================
+// Path (c) — normalized n-gram scan
+// =============================================================================
+/**
+ * Tokenize a prompt for n-gram generation. Splits on whitespace and
+ * common punctuation while keeping internal hyphens whole — so
+ * `forge-stack` stays one token, not two. Strips edge punctuation
+ * (quotes, backticks, brackets) from each token boundary.
+ */
+function tokenizeForNgrams(prompt: string): string[] {
+  if (!prompt) return [];
+  return prompt
+    .split(/[\s,;:!?"'`()\[\]{}<>]+/)
+    .map(t => t.replace(/^[^a-zA-Z0-9\-]+|[^a-zA-Z0-9\-]+$/g, ""))
+    .filter(t => t.length > 0);
+}
+/**
+ * Generate 1-gram, 2-gram, and 3-gram windows from a prompt. Windows are
+ * deduplicated on their normalized form (lowercase, trimmed, internal
+ * whitespace collapsed). Result preserves generation order: all 1-grams
+ * first (in prompt order), then 2-grams, then 3-grams. The caller re-sorts
+ * by length+promptOrder at validation time for the Turn 5 tie-breaker.
+ */
+export function generateNgramCandidates(prompt: string): NgramCandidate[] {
+  const tokens = tokenizeForNgrams(prompt);
+  const seen = new Set<string>();
+  const out: NgramCandidate[] = [];
+  for (let n = 1; n <= MAX_NGRAM_LEN; n++) {
+    for (let i = 0; i + n <= tokens.length; i++) {
+      const slice = tokens.slice(i, i + n).join(" ");
+      const normalized = slice.toLowerCase().trim().replace(/\s+/g, " ");
+      if (!normalized) continue;
+      if (seen.has(normalized)) continue;
+      seen.add(normalized);
+      out.push({ normalized, length: n as 1 | 2 | 3, promptOrder: i });
+    }
+  }
+  return out;
+}
+/**
+ * Batch-lookup a set of normalized candidate names against entity_nodes.
+ * Uses a single parameterized SQL query backed by the
+ * `idx_entity_nodes_lower_name` expression index (added in the v0.9.0
+ * schema migration). Duplicate names are deduped in SQL (`DISTINCT`).
+ * Returns a map from `LOWER(name)` → `{ entityId, entityType }`.
+ *
+ * Fail-open: any SQL error returns an empty map. The caller proceeds
+ * as if the batch returned zero hits, and path (c) contributes nothing
+ * for that prompt.
+ */
+export function batchLookupNames(
+  db: Database,
+  candidates: string[],
+  vault: string = "default"
+): Map<string, { entityId: string; entityType: string }> {
+  const out = new Map<string, { entityId: string; entityType: string }>();
+  if (candidates.length === 0) return out;
+  // Dedupe and bound the candidate set for the SQL `IN` clause.
+  // The per-path budget accounting above us already bounds path (c) to
+  // `CANDIDATE_CAP - len(path a + path b)` entries, but we cap the
+  // raw n-gram set independently here for safety: a worst-case prompt
+  // could generate many distinct normalized n-grams even if only a
+  // few would survive the candidate accounting, and a single giant
+  // SQL IN clause is wasted work. The 500 cap is intentionally larger
+  // than `CANDIDATE_CAP` so the batch query still gets the headroom
+  // to return overflow n-grams that the prioritization step then
+  // drops at budget time.
+  const unique = Array.from(new Set(candidates)).slice(0, 500);
+  const placeholders = unique.map(() => "?").join(", ");
+  const sql = `
+    SELECT DISTINCT LOWER(name) AS lname, entity_id, entity_type
+    FROM entity_nodes
+    WHERE LOWER(name) IN (${placeholders})
+      AND vault = ?
+  `;
+  try {
+    const rows = db.prepare(sql).all(...unique, vault) as Array<{
+      lname: string;
+      entity_id: string;
+      entity_type: string;
+    }>;
+    for (const row of rows) {
+      out.set(row.lname, { entityId: row.entity_id, entityType: row.entity_type });
+    }
+  } catch {
+    /* fail-open: empty map */
+  }
+  return out;
+}
+// =============================================================================
+// Main entity extraction — three-path, validate-then-count, 100-cap
+// =============================================================================
+/**
+ * Three-path prompt entity extraction with per-path validate-then-count
+ * ordering, cross-path dedup by resolved entity_id, and the Codex-approved
+ * 100-candidate cap.
+ *
+ * Reads `input.prompt` text ONLY — NEVER touches ranked documents,
+ * surfaced snippets, or any retrieval-phase field. This is the §11.1
+ * prompt-only hard constraint.
+ *
+ * Returns a list of validated entities ready for triple-query seeding.
+ * Empty array on empty prompt, zero matches, or any fail-open branch.
+ */
+export function extractPromptEntities(
+  prompt: string,
+  db: Database,
+  vault: string = "default"
+): ValidatedEntity[] {
+  if (!prompt) return [];
+  const validated: ValidatedEntity[] = [];
+  const seenEntityIds = new Set<string>();
+  // --------------------------------------------------------------------
+  // Path (a): Canonical-ID regex → direct primary-key lookup
+  // --------------------------------------------------------------------
+  const canonicalIds = extractCanonicalIds(prompt);
+  for (const id of canonicalIds) {
+    if (validated.length >= CANDIDATE_CAP) break;
+    if (seenEntityIds.has(id)) continue;
+    try {
+      const exists = db
+        .prepare(`SELECT entity_id, entity_type FROM entity_nodes WHERE entity_id = ? AND vault = ?`)
+        .get(id, vault) as { entity_id: string; entity_type: string } | undefined;
+      if (!exists) continue;
+      seenEntityIds.add(id);
+      validated.push({
+        entityId: id,
+        name: id,
+        type: exists.entity_type,
+        sourcePath: "canonical-id",
+      });
+    } catch {
+      /* fail-open per candidate */
+    }
+  }
+  // --------------------------------------------------------------------
+  // Path (b): Proper-noun extraction → validate-then-count via
+  // resolveEntityTypeExact. Non-null return means exactly-one match.
+  // After confirming type, use ensureEntityCanonical to get the
+  // canonical `vault:type:slug` entity_id. Note: ensureEntityCanonical
+  // is effectively read-only in production because every entity_nodes
+  // row has a matching entities_fts row inserted at upsert time — the
+  // fallback INSERT OR IGNORE fires only when the FTS index got out
+  // of sync (rare / migration edge case), in which case it self-heals.
+  // --------------------------------------------------------------------
+  const properNouns = extractProperNouns(prompt);
+  for (const name of properNouns) {
+    if (validated.length >= CANDIDATE_CAP) break;
+    try {
+      const type = resolveEntityTypeExact(db, name, vault);
+      if (!type) continue; // null = zero or multi-match → skip silently
+      const entityId = ensureEntityCanonical(db, name, type, vault);
+      if (!entityId) continue;
+      if (seenEntityIds.has(entityId)) continue; // cross-path dedup
+      seenEntityIds.add(entityId);
+      validated.push({
+        entityId,
+        name: name.toLowerCase(),
+        type,
+        sourcePath: "proper-noun",
+      });
+    } catch {
+      /* fail-open per candidate */
+    }
+  }
+  // --------------------------------------------------------------------
+  // Path (c): Normalized n-gram scan → batch SQL → per-candidate validate
+  // → longer-first tie-breaker → fill remaining budget.
+  // --------------------------------------------------------------------
+  if (validated.length < CANDIDATE_CAP) {
+    const ngrams = generateNgramCandidates(prompt);
+    const normalizedSet = ngrams.map(g => g.normalized);
+    const hits = batchLookupNames(db, normalizedSet, vault);
+    if (hits.size > 0) {
+      // First pass: collect every ngram that the SQL batch confirms
+      // exists in entity_nodes, THEN run the exact-match validator to
+      // enforce the exactly-one-entity constraint. Attach length +
+      // promptOrder so the sort step can apply the Turn 5 tie-breaker.
+      interface ValidatedNgram extends ValidatedEntity {
+        length: 1 | 2 | 3;
+        promptOrder: number;
+      }
+      const validatedNgrams: ValidatedNgram[] = [];
+      const ngramSeen = new Set<string>();
+      for (const gram of ngrams) {
+        const hit = hits.get(gram.normalized);
+        if (!hit) continue;
+        if (ngramSeen.has(hit.entityId)) continue; // dedup within path (c)
+        if (seenEntityIds.has(hit.entityId)) continue; // dedup across paths
+        try {
+          const confirmedType = resolveEntityTypeExact(db, gram.normalized, vault);
+          if (!confirmedType) continue; // multi-match or zero-match → skip
+          ngramSeen.add(hit.entityId);
+          validatedNgrams.push({
+            entityId: hit.entityId,
+            name: gram.normalized,
+            type: confirmedType,
+            sourcePath: "ngram",
+            length: gram.length,
+            promptOrder: gram.promptOrder,
+          });
+        } catch {
+          /* fail-open per candidate */
+        }
+      }
+      // Turn 5 tie-breaker: longer n-grams first (3 → 2 → 1), then
+      // prompt order within each length class. Longer n-grams are more
+      // semantically specific and should win the remaining budget.
+      validatedNgrams.sort((a, b) => {
+        if (a.length !== b.length) return b.length - a.length;
+        return a.promptOrder - b.promptOrder;
+      });
+      // Fill remaining budget.
+      for (const g of validatedNgrams) {
+        if (validated.length >= CANDIDATE_CAP) break;
+        if (seenEntityIds.has(g.entityId)) continue; // paranoid re-check
+        seenEntityIds.add(g.entityId);
+        validated.push({
+          entityId: g.entityId,
+          name: g.name,
+          type: g.type,
+          sourcePath: "ngram",
+        });
+      }
+    }
+  }
+  return validated;
+}
+// =============================================================================
+// Vault-facts block builder
+// =============================================================================
+export interface BuildVaultFactsOptions {
+  /** Cap on triples emitted per entity. Default 10. */
+  maxTriplesPerEntity?: number;
+  /** Token estimator. Defaults to ~4 chars per token heuristic. */
+  estimateTokens?: (s: string) => number;
+  /** ISO "now" used to filter `validTo > now`. Defaults to `new Date().toISOString()`. */
+  now?: string;
+}
+const DEFAULT_ESTIMATE_TOKENS = (s: string): number => Math.ceil(s.length / 4);
+/**
+ * Build the `<vault-facts>` XML block for a set of validated entities
+ * and a budget in tokens. Returns null if:
+ *   - No entities (caller: skip the stage entirely).
+ *   - Zero current triples after filtering (caller: do NOT emit an
+ *     empty `<vault-facts/>` element).
+ *   - Budget too small to fit even one triple (caller: drop block,
+ *     preserve established blocks' budget).
+ *   - Query callback throws for every entity (fail-open).
+ *
+ * Truncation rule (BACKLOG §11.1 budget guidance): if the serialized
+ * block would exceed the budget, truncate at the triple boundary.
+ * Never mid-triple, never emit an empty block.
+ *
+ * This function does NOT query the DB directly — the caller passes a
+ * `TripleQueryFn` functor so tests can inject a mock query.
+ */
+export function buildVaultFactsBlock(
+  entities: ValidatedEntity[],
+  queryTriples: TripleQueryFn,
+  budgetTokens: number,
+  options: BuildVaultFactsOptions = {}
+): string | null {
+  if (entities.length === 0) return null;
+  if (budgetTokens <= 0) return null;
+  const maxPerEntity = options.maxTriplesPerEntity ?? 10;
+  const estimate = options.estimateTokens ?? DEFAULT_ESTIMATE_TOKENS;
+  const now = options.now ?? new Date().toISOString();
+  // Collect all current triples from all entities, deduping across
+  // entities by (subject, predicate, object). Without this, prompts that
+  // resolve both endpoints of a triple (e.g. "ClawMem depends_on Bun"
+  // when both `ClawMem` and `Bun` are validated entities) would emit
+  // the same fact twice and spend budget twice — once from the
+  // outgoing side of ClawMem's query, once from the incoming side of
+  // Bun's query. Caught by Codex §11.1 code review Turn 1, 2026-04-13.
+  const lines: string[] = [];
+  const seen = new Set<string>();
+  for (const entity of entities) {
+    let triples: VaultFactsTriple[] = [];
+    try {
+      triples = queryTriples(entity.entityId);
+    } catch {
+      continue; // fail-open per entity
+    }
+    // Current-only filter: validTo IS NULL OR validTo > now.
+    // Cap at maxPerEntity per entity so one chatty entity does not
+    // monopolize the shared budget below.
+    const current = triples
+      .filter(t => !t.validTo || t.validTo > now)
+      .slice(0, maxPerEntity);
+    for (const t of current) {
+      const key = `${t.subject}\u0000${t.predicate}\u0000${t.object}`;
+      if (seen.has(key)) continue;
+      seen.add(key);
+      lines.push(`${t.subject} ${t.predicate} ${t.object}`);
+    }
+  }
+  if (lines.length === 0) return null;
+  // Token-bounded serialization. Start with the structural XML overhead
+  // (open + close tag + two newlines) and greedily add triple lines
+  // until the next line would overflow the budget. Drop entire block
+  // if even one line does not fit — never emit an empty block.
+  const OPEN = "<vault-facts>\n";
+  const CLOSE = "\n</vault-facts>";
+  const OVERHEAD = estimate(OPEN + CLOSE);
+  if (OVERHEAD >= budgetTokens) return null;
+  const outLines: string[] = [];
+  let runningTokens = OVERHEAD;
+  for (const line of lines) {
+    const lineTokens = estimate(line) + 1; // +1 for the trailing newline
+    if (runningTokens + lineTokens > budgetTokens) break;
+    outLines.push(line);
+    runningTokens += lineTokens;
+  }
+  if (outLines.length === 0) return null;
+  return `${OPEN}${outLines.join("\n")}${CLOSE}`;
+}