clawmem 0.8.5 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/AGENTS.md CHANGED
@@ -258,7 +258,7 @@ ClawMem hooks handle ~90% of retrieval automatically. Agent-initiated MCP calls
258
258
 
259
259
  | Hook | Trigger | Budget | Content |
260
260
  |------|---------|--------|---------|
261
- | `context-surfacing` | UserPromptSubmit | profile-driven (default 800) | retrieval gate → **multi-turn query construction** (v0.8.1: current prompt + up to 2 recent same-session priors from `context_usage.query_text`, 10-min max age, capped at 2000 chars with current-first preservation — used only for discovery: vector/FTS/expansion, NOT for rerank/scoring/snippet extraction) → profile-driven hybrid search (vector if `useVector`, timeout from profile) → FTS supplement → file-aware supplemental search (E13, raw current prompt) → snooze filter → noise filter → spreading activation (E11: co-activated doc boost) → memory type diversification (E10) → tiered injection (HOT/WARM/COLD snippets) → `<vault-context><instruction>…</instruction><facts>…</facts><relationships>…</relationships></vault-context>` (v0.7.1: instruction always prepended when context is returned; relationships block lists memory-graph edges where BOTH endpoints are in the surfaced set, truncated first when over budget) + optional `<vault-routing>` hint. Budget, max results, vector timeout, and min score all driven by `CLAWMEM_PROFILE`. Raw prompt persisted to `context_usage.query_text` for future multi-turn lookback — except on gated skip paths (slash commands, heartbeats, too-short prompts) where the text is withheld for privacy. |
261
+ | `context-surfacing` | UserPromptSubmit | profile-driven (default 800 + factsTokens sub-budget) | retrieval gate → **multi-turn query construction** (v0.8.1: current prompt + up to 2 recent same-session priors from `context_usage.query_text`, 10-min max age, capped at 2000 chars with current-first preservation — used only for discovery: vector/FTS/expansion, NOT for rerank/scoring/snippet extraction) → **session focus topic resolution** (v0.9.0 §11.4: reads per-session focus file at `~/.cache/clawmem/sessions/<id>.focus`, threaded as intent hint to `expandQuery` + `rerank` + `extractSnippet`) → profile-driven hybrid search (vector if `useVector`, timeout from profile) → FTS supplement → file-aware supplemental search (E13, raw current prompt) → snooze filter → noise filter → spreading activation (E11: co-activated doc boost) → composite scoring → **session focus topic boost** (v0.9.0 §11.4: 1.4× match / 0.75× demote floor 50%, NO-OP on zero matches to preserve baseline ordering) → adaptive threshold → memory type diversification (E10) → tiered injection (HOT/WARM/COLD snippets) → `<vault-context><instruction>…</instruction><facts>…</facts><relationships>…</relationships><vault-facts>…</vault-facts></vault-context>` (v0.7.1: instruction always prepended when context is returned; relationships block lists memory-graph edges where BOTH endpoints are in the surfaced set, truncated first when over budget. **v0.9.0 §11.1:** `<vault-facts>` KG injection block appends raw SPO triple lines from entities seeded by the prompt via three-path prompt-only extraction — canonical IDs + proper nouns + longer-first n-grams — with a dedicated `factsTokens` sub-budget per profile (speed=0 disables the stage, balanced=200, deep=250), cross-entity triple dedup, and truncate-at-triple-boundary budget discipline; fail-open on every error path) + optional `<vault-routing>` hint. Budget, max results, vector timeout, min score, and facts sub-budget all driven by `CLAWMEM_PROFILE`. Raw prompt persisted to `context_usage.query_text` for future multi-turn lookback — except on gated skip paths (slash commands, heartbeats, too-short prompts) where the text is withheld for privacy. |
262
262
  | `postcompact-inject` | SessionStart (compact) | 1200 tokens | re-injects authoritative context after compaction: precompact state (600) + recent decisions (400) + antipatterns (150) + vault context (200) → `<vault-postcompact>` |
263
263
  | `curator-nudge` | SessionStart | 200 tokens | surfaces curator report actions, nudges when report is stale (>7 days) |
264
264
  | `precompact-extract` | PreCompact | — | extracts decisions, file paths, open questions → writes `precompact-state.md` to auto-memory. Query-aware decision ranking. Reindexes auto-memory collection. |
@@ -712,6 +712,18 @@ clawmem consolidate [--dry-run] # Find and archive duplicate low-confidence docu
712
712
  # Uses Jaccard similarity within same collection
713
713
  ```
714
714
 
715
+ **Session focus topic (v0.9.0 §11.4):** Per-session topic biasing for context-surfacing. Writes a focus file at `~/.cache/clawmem/sessions/<session_id>.focus` that steers query expansion, reranking, snippet extraction, and post-composite-score topic boost (1.4× match / 0.75× demote, NO-OP on zero matches). Session-isolated — never writes to SQLite or lifecycle columns. The session ID is read from `--session-id <id>`, then `CLAUDE_SESSION_ID`, then `CLAWMEM_SESSION_ID`. When to use: user says "focus on authentication for this session" / "only surface X-related docs right now" / "let's work on Y this session." Clear the focus at the end of the subsession to return to baseline surfacing.
716
+
717
+ ```bash
718
+ # Set a focus topic for the current session (multi-word OK)
719
+ clawmem focus set "authentication flow" # uses CLAUDE_SESSION_ID / CLAWMEM_SESSION_ID env var
720
+ clawmem focus set "authentication flow" --session-id abc123 # explicit
721
+
722
+ # Show / clear
723
+ clawmem focus show --session-id abc123
724
+ clawmem focus clear --session-id abc123
725
+ ```
726
+
715
727
  ## Integration Notes
716
728
 
717
729
  - **Memory nudge (v0.2.0):** Every N prompts (default 15) without a lifecycle MCP tool call (`memory_pin`/`memory_forget`/`memory_snooze`), context-surfacing appends `<vault-nudge>` prompting proactive memory management. Counter resets on lifecycle tool use. Configure via `CLAWMEM_NUDGE_INTERVAL` (0 to disable).
package/CLAUDE.md CHANGED
@@ -258,7 +258,7 @@ ClawMem hooks handle ~90% of retrieval automatically. Agent-initiated MCP calls
258
258
 
259
259
  | Hook | Trigger | Budget | Content |
260
260
  |------|---------|--------|---------|
261
- | `context-surfacing` | UserPromptSubmit | profile-driven (default 800) | retrieval gate → **multi-turn query construction** (v0.8.1: current prompt + up to 2 recent same-session priors from `context_usage.query_text`, 10-min max age, capped at 2000 chars with current-first preservation — used only for discovery: vector/FTS/expansion, NOT for rerank/scoring/snippet extraction) → profile-driven hybrid search (vector if `useVector`, timeout from profile) → FTS supplement → file-aware supplemental search (E13, raw current prompt) → snooze filter → noise filter → spreading activation (E11: co-activated doc boost) → memory type diversification (E10) → tiered injection (HOT/WARM/COLD snippets) → `<vault-context><instruction>…</instruction><facts>…</facts><relationships>…</relationships></vault-context>` (v0.7.1: instruction always prepended when context is returned; relationships block lists memory-graph edges where BOTH endpoints are in the surfaced set, truncated first when over budget) + optional `<vault-routing>` hint. Budget, max results, vector timeout, and min score all driven by `CLAWMEM_PROFILE`. Raw prompt persisted to `context_usage.query_text` for future multi-turn lookback — except on gated skip paths (slash commands, heartbeats, too-short prompts) where the text is withheld for privacy. |
261
+ | `context-surfacing` | UserPromptSubmit | profile-driven (default 800 + factsTokens sub-budget) | retrieval gate → **multi-turn query construction** (v0.8.1: current prompt + up to 2 recent same-session priors from `context_usage.query_text`, 10-min max age, capped at 2000 chars with current-first preservation — used only for discovery: vector/FTS/expansion, NOT for rerank/scoring/snippet extraction) → **session focus topic resolution** (v0.9.0 §11.4: reads per-session focus file at `~/.cache/clawmem/sessions/<id>.focus`, threaded as intent hint to `expandQuery` + `rerank` + `extractSnippet`) → profile-driven hybrid search (vector if `useVector`, timeout from profile) → FTS supplement → file-aware supplemental search (E13, raw current prompt) → snooze filter → noise filter → spreading activation (E11: co-activated doc boost) → composite scoring → **session focus topic boost** (v0.9.0 §11.4: 1.4× match / 0.75× demote floor 50%, NO-OP on zero matches to preserve baseline ordering) → adaptive threshold → memory type diversification (E10) → tiered injection (HOT/WARM/COLD snippets) → `<vault-context><instruction>…</instruction><facts>…</facts><relationships>…</relationships><vault-facts>…</vault-facts></vault-context>` (v0.7.1: instruction always prepended when context is returned; relationships block lists memory-graph edges where BOTH endpoints are in the surfaced set, truncated first when over budget. **v0.9.0 §11.1:** `<vault-facts>` KG injection block appends raw SPO triple lines from entities seeded by the prompt via three-path prompt-only extraction — canonical IDs + proper nouns + longer-first n-grams — with a dedicated `factsTokens` sub-budget per profile (speed=0 disables the stage, balanced=200, deep=250), cross-entity triple dedup, and truncate-at-triple-boundary budget discipline; fail-open on every error path) + optional `<vault-routing>` hint. Budget, max results, vector timeout, min score, and facts sub-budget all driven by `CLAWMEM_PROFILE`. Raw prompt persisted to `context_usage.query_text` for future multi-turn lookback — except on gated skip paths (slash commands, heartbeats, too-short prompts) where the text is withheld for privacy. |
262
262
  | `postcompact-inject` | SessionStart (compact) | 1200 tokens | re-injects authoritative context after compaction: precompact state (600) + recent decisions (400) + antipatterns (150) + vault context (200) → `<vault-postcompact>` |
263
263
  | `curator-nudge` | SessionStart | 200 tokens | surfaces curator report actions, nudges when report is stale (>7 days) |
264
264
  | `precompact-extract` | PreCompact | — | extracts decisions, file paths, open questions → writes `precompact-state.md` to auto-memory. Query-aware decision ranking. Reindexes auto-memory collection. |
@@ -712,6 +712,18 @@ clawmem consolidate [--dry-run] # Find and archive duplicate low-confidence docu
712
712
  # Uses Jaccard similarity within same collection
713
713
  ```
714
714
 
715
+ **Session focus topic (v0.9.0 §11.4):** Per-session topic biasing for context-surfacing. Writes a focus file at `~/.cache/clawmem/sessions/<session_id>.focus` that steers query expansion, reranking, snippet extraction, and post-composite-score topic boost (1.4× match / 0.75× demote, NO-OP on zero matches). Session-isolated — never writes to SQLite or lifecycle columns. The session ID is read from `--session-id <id>`, then `CLAUDE_SESSION_ID`, then `CLAWMEM_SESSION_ID`. When to use: user says "focus on authentication for this session" / "only surface X-related docs right now" / "let's work on Y this session." Clear the focus at the end of the subsession to return to baseline surfacing.
716
+
717
+ ```bash
718
+ # Set a focus topic for the current session (multi-word OK)
719
+ clawmem focus set "authentication flow" # uses CLAUDE_SESSION_ID / CLAWMEM_SESSION_ID env var
720
+ clawmem focus set "authentication flow" --session-id abc123 # explicit
721
+
722
+ # Show / clear
723
+ clawmem focus show --session-id abc123
724
+ clawmem focus clear --session-id abc123
725
+ ```
726
+
715
727
  ## Integration Notes
716
728
 
717
729
  - **Memory nudge (v0.2.0):** Every N prompts (default 15) without a lifecycle MCP tool call (`memory_pin`/`memory_forget`/`memory_snooze`), context-surfacing appends `<vault-nudge>` prompting proactive memory management. Counter resets on lifecycle tool use. Configure via `CLAWMEM_NUDGE_INTERVAL` (0 to disable).
package/README.md CHANGED
@@ -31,6 +31,8 @@ ClawMem turns your markdown notes, project docs, and research dumps into persist
31
31
  - **Guards against cross-entity merges** during consolidation — name-aware dual-threshold merge safety compares entity anchors before merging similar observations, preventing "Alice decided X" from merging into "Bob decided X" (v0.7.1)
32
32
  - **Prevents context bleed in derived insights** — the Phase 3 deductive synthesis pipeline validates every draft against an anti-contamination wrapper (deterministic entity contamination check + LLM validator + dedupe) before writing cross-session deductive observations (v0.7.1)
33
33
  - **Frames surfaced facts as background knowledge** — `context-surfacing` wraps injected content in `<instruction>` + `<facts>` + `<relationships>` blocks, telling the model to treat facts as already-known and exposing memory-graph edges between surfaced docs directly in-prompt (v0.7.1)
34
+ - **Injects knowledge-graph facts as structured triples** — when the user's prompt mentions entities already known to the vault, `context-surfacing` resolves them via a three-path prompt-only extractor (canonical IDs, proper nouns, lowercased n-grams), queries the SPO graph for current-state triples, and appends a `<vault-facts>` block of raw `subject predicate object` lines to `<vault-context>` — off for `speed`, 200 tokens on `balanced`, 250 on `deep`, token-truncated at the triple boundary (v0.9.0)
35
+ - **Session-scoped focus topic boost** — `clawmem focus set "<topic>" --session-id <id>` writes a per-session focus file that steers query expansion, reranking, chunk selection, snippet extraction, and post-composite-score topic boosting (1.4× match / 0.75× demote) for that session only — session-isolated, fail-open, never writes to SQLite or lifecycle columns (v0.9.0)
34
36
  - **Scores document quality** using structure, keywords, and metadata richness signals
35
37
  - **Boosts co-accessed documents** — notes frequently surfaced together get retrieval reinforcement
36
38
  - **Decomposes complex queries** into typed retrieval clauses (BM25/vector/graph) for multi-topic questions
package/SKILL.md CHANGED
@@ -190,7 +190,7 @@ Hooks handle ~90% of retrieval. Zero agent effort.
190
190
 
191
191
  | Hook | Trigger | Budget | Content |
192
192
  |------|---------|--------|---------|
193
- | `context-surfacing` | UserPromptSubmit | profile-driven (default 800) | retrieval gate -> **multi-turn query** (v0.8.1: current + up to 2 recent same-session priors from `context_usage.query_text`, 10-min max age, 2000-char cap with current-first, used only for discovery not rerank/scoring/snippet) -> profile-driven hybrid search (vector if `useVector`, timeout from profile) -> FTS supplement -> file-aware search (E13, raw current) -> snooze filter -> noise filter -> spreading activation (E11) -> memory type diversification (E10) -> tiered injection (HOT/WARM/COLD) -> `<vault-context><instruction>...</instruction><facts>...</facts><relationships>...</relationships></vault-context>` (v0.7.1: instruction always prepended; relationships list memory-graph edges where BOTH endpoints are in the surfaced set; relationships truncated first when over budget) + optional `<vault-routing>` hint. Budget, max results, vector timeout, min score all driven by `CLAWMEM_PROFILE`. Raw prompt persisted to `context_usage.query_text` for future lookback — gated skip paths (slash commands, heartbeats, too-short prompts) withhold the text for privacy. |
193
+ | `context-surfacing` | UserPromptSubmit | profile-driven (default 800 + factsTokens sub-budget) | retrieval gate -> **multi-turn query** (v0.8.1: current + up to 2 recent same-session priors, discovery only) -> **session focus topic resolution** (v0.9.0 §11.4: reads `~/.cache/clawmem/sessions/<id>.focus`, threaded as intent hint to expansion/rerank/snippet) -> profile-driven hybrid search -> FTS supplement -> file-aware search (E13) -> snooze/noise filters -> spreading activation (E11) -> composite scoring -> **session focus topic boost** (v0.9.0 §11.4: 1.4x match / 0.75x demote, NO-OP on zero matches) -> adaptive threshold -> memory type diversification (E10) -> tiered injection (HOT/WARM/COLD) -> `<vault-context><instruction>...</instruction><facts>...</facts><relationships>...</relationships><vault-facts>...</vault-facts></vault-context>` (v0.7.1: instruction always prepended; relationships = memory-graph edges where BOTH endpoints are in the surfaced set, truncated first when over budget. **v0.9.0 §11.1:** `<vault-facts>` appends raw SPO triple lines when the prompt mentions known entities via three-path extraction (canonical-id regex + proper-noun validation + longer-first n-grams), dedicated `factsTokens` sub-budget per profile (speed=0, balanced=200, deep=250), cross-entity triple dedup, truncate-at-triple-boundary, fail-open on every error path) + optional `<vault-routing>` hint. Budget, max results, vector timeout, min score, facts sub-budget all driven by `CLAWMEM_PROFILE`. Raw prompt persisted to `context_usage.query_text` for future lookback — gated skip paths withhold the text for privacy. |
194
194
  | `postcompact-inject` | SessionStart (compact) | 1200 tokens | re-injects authoritative context after compaction: precompact state (600) + decisions (400) + antipatterns (150) + vault context (200) -> `<vault-postcompact>` |
195
195
  | `curator-nudge` | SessionStart | 200 tokens | surfaces curator report actions, nudges when report is stale (>7 days) |
196
196
  | `precompact-extract` | PreCompact | — | extracts decisions, file paths, open questions -> writes `precompact-state.md`. Query-aware ranking. Reindexes auto-memory. |
@@ -761,6 +761,19 @@ clawmem consolidate [--dry-run] # Find and archive duplicate low-confidence docu
761
761
  # Jaccard similarity within same collection
762
762
  ```
763
763
 
764
+ ### Session Focus Topic (v0.9.0 §11.4)
765
+
766
+ Per-session topic biasing for context-surfacing. Writes a focus file at `~/.cache/clawmem/sessions/<session_id>.focus` that steers query expansion, reranking, snippet extraction, and post-composite-score topic boost (1.4x match / 0.75x demote, NO-OP on zero matches). Session-isolated — never writes to SQLite or lifecycle columns. Session ID resolved from `--session-id <id>` > `CLAUDE_SESSION_ID` env > `CLAWMEM_SESSION_ID` env.
767
+
768
+ **When to use:** user says "focus on X for this session" / "only surface Y right now" / "let's work on Z." Clear at end of subsession to return to baseline.
769
+
770
+ ```bash
771
+ clawmem focus set "authentication flow" # uses CLAUDE_SESSION_ID env
772
+ clawmem focus set "authentication flow" --session-id abc123 # explicit session id
773
+ clawmem focus show --session-id abc123
774
+ clawmem focus clear --session-id abc123
775
+ ```
776
+
764
777
 
765
778
  ---
766
779
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "clawmem",
3
- "version": "0.8.5",
3
+ "version": "0.9.0",
4
4
  "description": "On-device context engine and memory for AI agents. Claude Code and OpenClaw. Hooks + MCP server + hybrid RAG search.",
5
5
  "type": "module",
6
6
  "bin": {
package/src/clawmem.ts CHANGED
@@ -64,6 +64,12 @@ import { precompactExtract } from "./hooks/precompact-extract.ts";
64
64
  import { postcompactInject } from "./hooks/postcompact-inject.ts";
65
65
  import { pretoolInject } from "./hooks/pretool-inject.ts";
66
66
  import { curatorNudge } from "./hooks/curator-nudge.ts";
67
+ import {
68
+ readSessionFocus,
69
+ writeSessionFocus,
70
+ clearSessionFocus,
71
+ focusFilePath,
72
+ } from "./session-focus.ts";
67
73
 
68
74
  enableProductionMode();
69
75
 
@@ -1906,6 +1912,91 @@ async function cmdProfile(args: string[]) {
1906
1912
  }
1907
1913
  }
1908
1914
 
1915
+ // §11.4 (v0.9.0): session-scoped focus topic — read/write/clear the
1916
+ // per-session focus file at ~/.cache/clawmem/sessions/<session_id>.focus.
1917
+ // The file is the primary signal read by context-surfacing for topic
1918
+ // boosting; the CLAWMEM_SESSION_FOCUS env var is a debug-only override
1919
+ // that does NOT provide per-session scoping on multi-session hosts.
1920
+ async function cmdFocus(args: string[]) {
1921
+ const subCmd = args[0];
1922
+
1923
+ function resolveSessionId(rest: string[]): string {
1924
+ const sidIdx = rest.indexOf("--session-id");
1925
+ if (sidIdx >= 0 && rest[sidIdx + 1]) return rest[sidIdx + 1]!;
1926
+ const envSid = (
1927
+ process.env.CLAUDE_SESSION_ID ||
1928
+ process.env.CLAWMEM_SESSION_ID ||
1929
+ ""
1930
+ ).trim();
1931
+ if (envSid) return envSid;
1932
+ die(
1933
+ "No session id. Pass --session-id <id>, or set CLAUDE_SESSION_ID " +
1934
+ "(Claude Code exposes this) or CLAWMEM_SESSION_ID env var before " +
1935
+ "invoking this command."
1936
+ );
1937
+ }
1938
+
1939
+ function stripSessionIdArg(rest: string[]): string[] {
1940
+ const sidIdx = rest.indexOf("--session-id");
1941
+ if (sidIdx < 0) return rest;
1942
+ return [...rest.slice(0, sidIdx), ...rest.slice(sidIdx + 2)];
1943
+ }
1944
+
1945
+ switch (subCmd) {
1946
+ case "set": {
1947
+ const rest = args.slice(1);
1948
+ const sessionId = resolveSessionId(rest);
1949
+ const positional = stripSessionIdArg(rest);
1950
+ const topic = positional.join(" ").trim();
1951
+ if (!topic) {
1952
+ die("Usage: clawmem focus set <topic> [--session-id <id>]");
1953
+ }
1954
+ try {
1955
+ writeSessionFocus(sessionId, topic);
1956
+ } catch (err: any) {
1957
+ die(`Failed to set focus: ${err?.message ?? err}`);
1958
+ }
1959
+ console.log(
1960
+ `${c.green}Focus set${c.reset} for session ${c.cyan}${sessionId}${c.reset}: ${topic}`
1961
+ );
1962
+ console.log(`${c.dim}File: ${focusFilePath(sessionId)}${c.reset}`);
1963
+ break;
1964
+ }
1965
+ case "show": {
1966
+ const rest = args.slice(1);
1967
+ const sessionId = resolveSessionId(rest);
1968
+ const topic = readSessionFocus(sessionId);
1969
+ if (topic) {
1970
+ console.log(
1971
+ `${c.green}Focus${c.reset} for session ${c.cyan}${sessionId}${c.reset}: ${topic}`
1972
+ );
1973
+ console.log(`${c.dim}File: ${focusFilePath(sessionId)}${c.reset}`);
1974
+ } else {
1975
+ console.log(
1976
+ `${c.yellow}No focus${c.reset} set for session ${c.cyan}${sessionId}${c.reset}.`
1977
+ );
1978
+ console.log(
1979
+ `${c.dim}Expected file: ${focusFilePath(sessionId)}${c.reset}`
1980
+ );
1981
+ }
1982
+ break;
1983
+ }
1984
+ case "clear": {
1985
+ const rest = args.slice(1);
1986
+ const sessionId = resolveSessionId(rest);
1987
+ clearSessionFocus(sessionId);
1988
+ console.log(
1989
+ `${c.green}Focus cleared${c.reset} for session ${c.cyan}${sessionId}${c.reset}.`
1990
+ );
1991
+ break;
1992
+ }
1993
+ default:
1994
+ die(
1995
+ "Usage: clawmem focus <set|show|clear> [<topic>] [--session-id <id>]"
1996
+ );
1997
+ }
1998
+ }
1999
+
1909
2000
  // =============================================================================
1910
2001
  // Main dispatch
1911
2002
  // =============================================================================
@@ -1994,6 +2085,9 @@ async function main() {
1994
2085
  case "profile":
1995
2086
  await cmdProfile(subArgs);
1996
2087
  break;
2088
+ case "focus":
2089
+ await cmdFocus(subArgs);
2090
+ break;
1997
2091
  case "update-context":
1998
2092
  await cmdUpdateContext();
1999
2093
  break;
@@ -2644,6 +2738,9 @@ ${c.bold}Memory:${c.reset}
2644
2738
  clawmem log [--last N] Session history
2645
2739
  clawmem profile Show user profile
2646
2740
  clawmem profile rebuild Force profile rebuild
2741
+ clawmem focus set <topic> [--session-id ID] Set per-session focus topic (steers context-surfacing)
2742
+ clawmem focus show [--session-id ID] Show current focus topic
2743
+ clawmem focus clear [--session-id ID] Clear focus topic
2647
2744
 
2648
2745
  ${c.bold}Hooks:${c.reset}
2649
2746
  clawmem hook <name> Run hook (stdin JSON)
package/src/config.ts CHANGED
@@ -84,12 +84,23 @@ export interface ProfileConfig {
84
84
  deepEscalation: boolean;
85
85
  /** Max time (ms) allowed for the fast path before escalation is considered */
86
86
  escalationBudgetMs: number;
87
+ /**
88
+ * §11.1 (v0.9.0): sub-budget for the `<vault-facts>` KG injection block.
89
+ * Dedicated token allowance so `<vault-facts>` cannot steal budget from
90
+ * the existing `<facts>` / `<relationships>` blocks. `speed` profile is
91
+ * gated off (factsTokens=0 → stage skipped entirely). `balanced` / `deep`
92
+ * get 200 / 250 respectively. If the serialized facts would exceed this
93
+ * sub-budget, truncation happens at the triple boundary. If the total
94
+ * hook output would push past `tokenBudget + factsTokens`, the whole
95
+ * `<vault-facts>` block is dropped (established blocks take priority).
96
+ */
97
+ factsTokens: number;
87
98
  }
88
99
 
89
100
  export const PROFILES: Record<PerformanceProfile, ProfileConfig> = {
90
- speed: { tokenBudget: 400, maxResults: 5, useVector: false, vectorTimeout: 0, minScore: 0.55, minScoreRatio: 0.65, absoluteFloor: 0.18, activationFloor: 0.24, thresholdMode: "adaptive", deepEscalation: false, escalationBudgetMs: 0 },
91
- balanced: { tokenBudget: 800, maxResults: 10, useVector: true, vectorTimeout: 900, minScore: 0.45, minScoreRatio: 0.55, absoluteFloor: 0.15, activationFloor: 0.20, thresholdMode: "adaptive", deepEscalation: false, escalationBudgetMs: 0 },
92
- deep: { tokenBudget: 1200, maxResults: 15, useVector: true, vectorTimeout: 2000, minScore: 0.25, minScoreRatio: 0.45, absoluteFloor: 0.12, activationFloor: 0.16, thresholdMode: "adaptive", deepEscalation: true, escalationBudgetMs: 4000 },
101
+ speed: { tokenBudget: 400, maxResults: 5, useVector: false, vectorTimeout: 0, minScore: 0.55, minScoreRatio: 0.65, absoluteFloor: 0.18, activationFloor: 0.24, thresholdMode: "adaptive", deepEscalation: false, escalationBudgetMs: 0, factsTokens: 0 },
102
+ balanced: { tokenBudget: 800, maxResults: 10, useVector: true, vectorTimeout: 900, minScore: 0.45, minScoreRatio: 0.55, absoluteFloor: 0.15, activationFloor: 0.20, thresholdMode: "adaptive", deepEscalation: false, escalationBudgetMs: 0, factsTokens: 200 },
103
+ deep: { tokenBudget: 1200, maxResults: 15, useVector: true, vectorTimeout: 2000, minScore: 0.25, minScoreRatio: 0.45, absoluteFloor: 0.12, activationFloor: 0.16, thresholdMode: "adaptive", deepEscalation: true, escalationBudgetMs: 4000, factsTokens: 250 },
93
104
  };
94
105
 
95
106
  export function getActiveProfile(): ProfileConfig {
@@ -31,6 +31,12 @@ import { sanitizeSnippet } from "../promptguard.ts";
31
31
  import { shouldSkipRetrieval, isRetrievedNoise } from "../retrieval-gate.ts";
32
32
  import { MAX_QUERY_LENGTH } from "../limits.ts";
33
33
  import { writeRecallEvents, hashQuery } from "../recall-buffer.ts";
34
+ import { resolveSessionTopic, applyTopicBoost } from "../session-focus.ts";
35
+ import {
36
+ extractPromptEntities,
37
+ buildVaultFactsBlock,
38
+ type VaultFactsTriple,
39
+ } from "../vault-facts.ts";
34
40
 
35
41
  // =============================================================================
36
42
  // Config
@@ -143,6 +149,20 @@ export async function contextSurfacing(
143
149
  const tokenBudget = profile.tokenBudget;
144
150
  const startTime = Date.now();
145
151
 
152
+ // §11.4: Resolve session-scoped focus topic. Primary signal is the
153
+ // per-session focus file at ~/.cache/clawmem/sessions/<id>.focus
154
+ // (file > env var precedence via resolveSessionTopic). Env var
155
+ // CLAWMEM_SESSION_FOCUS is a debug-only override and does NOT
156
+ // provide per-session scoping on multi-session hosts. Used as
157
+ // (a) optional `intent` on expandQuery/rerank/extractSnippet call
158
+ // sites below, and (b) the driver for the post-composite topic
159
+ // boost stage. Fail-open: missing / unreadable / corrupt / empty /
160
+ // oversized focus file → undefined → every consumer no-ops.
161
+ const sessionTopic = resolveSessionTopic(
162
+ input.sessionId,
163
+ process.env.CLAWMEM_SESSION_FOCUS
164
+ );
165
+
146
166
  const isRecency = hasRecencyIntent(prompt);
147
167
  const minScore = isRecency ? MIN_COMPOSITE_SCORE_RECENCY : profile.minScore;
148
168
 
@@ -239,7 +259,7 @@ export async function contextSurfacing(
239
259
  if (elapsed < profile.escalationBudgetMs) {
240
260
  try {
241
261
  // Phase 1: Query expansion — discover candidates BM25+vector missed
242
- const expanded = await store.expandQuery(retrievalQuery, DEFAULT_QUERY_MODEL);
262
+ const expanded = await store.expandQuery(retrievalQuery, DEFAULT_QUERY_MODEL, sessionTopic);
243
263
  if (expanded.length > 0) {
244
264
  const seen = new Set(results.map(r => r.filepath));
245
265
  for (const eq of expanded.slice(0, 3)) {
@@ -263,7 +283,7 @@ export async function contextSurfacing(
263
283
  file: r.filepath,
264
284
  text: (r.body || "").slice(0, 2000),
265
285
  }));
266
- const reranked = await store.rerank(prompt, toRerank, DEFAULT_RERANK_MODEL);
286
+ const reranked = await store.rerank(prompt, toRerank, DEFAULT_RERANK_MODEL, sessionTopic);
267
287
  if (reranked.length > 0) {
268
288
  const rerankedMap = new Map(reranked.map(r => [r.file, r.score]));
269
289
  // Blend: 60% original score + 40% reranker score for stability
@@ -335,6 +355,15 @@ export async function contextSurfacing(
335
355
  // Apply composite scoring
336
356
  const allScored = applyCompositeScoring(enriched, prompt);
337
357
 
358
+ // §11.4: Session-scoped topic boost — post-composite, pre-threshold.
359
+ // Boosts docs whose title/path/body match all tokens of the declared
360
+ // session focus topic (1.4×); demotes non-matching docs (0.75×, floor
361
+ // 50%). Mutates compositeScore in place and re-sorts. Fail-open: no
362
+ // topic set → no-op (byte-identical pre-§11.4 output).
363
+ if (sessionTopic) {
364
+ applyTopicBoost(allScored, sessionTopic, { boostFactor: 1.4, demoteFactor: 0.75 });
365
+ }
366
+
338
367
  // Threshold filtering — adaptive (ratio-based) or absolute (legacy)
339
368
  let scored: typeof allScored;
340
369
  if (profile.thresholdMode === "adaptive") {
@@ -400,7 +429,7 @@ export async function contextSurfacing(
400
429
  // in afterward using whatever budget remains and are the first thing
401
430
  // truncated when the payload would overflow.
402
431
  const factsBudget = Math.max(0, tokenBudget - INSTRUCTION_TOKEN_COST);
403
- const { context, paths, tokens } = buildContext(scored, prompt, factsBudget);
432
+ const { context, paths, tokens } = buildContext(scored, prompt, factsBudget, sessionTopic);
404
433
 
405
434
  if (!context) {
406
435
  logEmptyTurn(store, input, prompt);
@@ -489,9 +518,60 @@ export async function contextSurfacing(
489
518
  );
490
519
  const vaultInner = buildVaultContextInner(context, relationSnippets, relationBudget);
491
520
 
521
+ // §11.1 (v0.9.0): `<vault-facts>` KG injection.
522
+ //
523
+ // Stage ordering (frozen in BACKLOG.md §11.1): retrieval + rerank +
524
+ // scoring + topic boost (§11.4) + threshold + diversification → build
525
+ // <facts>/<relationships> → compute remaining facts-block budget →
526
+ // inject <vault-facts> if entities resolve AND budget allows.
527
+ //
528
+ // Prompt-only seeding (HARD CONSTRAINT): entity seeds come from the
529
+ // raw user prompt ONLY, never from `surfacedDocs[i].body`, snippets,
530
+ // or any retrieval-phase field. Without this, a topic-boosted
531
+ // off-topic doc (§11.4) could pollute the facts block with facts
532
+ // about entities that have nothing to do with the user's actual
533
+ // prompt.
534
+ //
535
+ // Profile-gated via `profile.factsTokens`: `speed` profile sets this
536
+ // to 0, which naturally disables the stage. `balanced`/`deep` get a
537
+ // dedicated sub-budget that cannot steal from <facts>/<relationships>.
538
+ //
539
+ // Fail-open: any DB error, empty entity set, empty triple set, or
540
+ // budget-too-small case returns the baseline `vaultInner` unchanged
541
+ // (byte-identical pre-§11.1 output).
542
+ let vaultInnerWithFacts = vaultInner;
543
+ if (profile.factsTokens > 0) {
544
+ try {
545
+ const entities = extractPromptEntities(prompt, store.db, "default");
546
+ if (entities.length > 0) {
547
+ const queryTriples = (entityId: string): VaultFactsTriple[] =>
548
+ store
549
+ .queryEntityTriples(entityId)
550
+ .map(t => ({
551
+ subject: t.subject,
552
+ predicate: t.predicate,
553
+ object: t.object,
554
+ validTo: t.validTo,
555
+ confidence: t.confidence,
556
+ }));
557
+ const factsBlock = buildVaultFactsBlock(
558
+ entities,
559
+ queryTriples,
560
+ profile.factsTokens,
561
+ { estimateTokens }
562
+ );
563
+ if (factsBlock) {
564
+ vaultInnerWithFacts = `${vaultInner}\n${factsBlock}`;
565
+ }
566
+ }
567
+ } catch {
568
+ /* fail-open: degraded vault behaves identically to pre-§11.1 */
569
+ }
570
+ }
571
+
492
572
  const parts: string[] = [];
493
573
  if (routingHint) parts.push(`<vault-routing>${routingHint}</vault-routing>`);
494
- parts.push(`<vault-context>\n${vaultInner}\n</vault-context>`);
574
+ parts.push(`<vault-context>\n${vaultInnerWithFacts}\n</vault-context>`);
495
575
  if (nudge) parts.push(`<vault-nudge>${NUDGE_TEXT}</vault-nudge>`);
496
576
 
497
577
  return makeContextOutput("context-surfacing", parts.join("\n"));
@@ -552,7 +632,8 @@ function detectRoutingHint(prompt: string): string | null {
552
632
  function buildContext(
553
633
  scored: ScoredResult[],
554
634
  query: string,
555
- budget: number = DEFAULT_TOKEN_BUDGET
635
+ budget: number = DEFAULT_TOKEN_BUDGET,
636
+ intent?: string
556
637
  ): { context: string; paths: string[]; tokens: number } {
557
638
  const lines: string[] = [];
558
639
  const paths: string[] = [];
@@ -579,7 +660,7 @@ function buildContext(
579
660
  if (sanitized === "[content filtered for security]") continue;
580
661
 
581
662
  const snippet = smartTruncate(
582
- extractSnippet(sanitized, query, tier.snippetLen, r.chunkPos).snippet,
663
+ extractSnippet(sanitized, query, tier.snippetLen, r.chunkPos, intent).snippet,
583
664
  tier.snippetLen
584
665
  );
585
666
  entry = `**${safeTitle}**${typeTag}\n${safePath}\n${snippet}`;
@@ -0,0 +1,227 @@
1
+ /**
2
+ * Session-Scoped Focus (§11.4 — v0.9.0)
3
+ *
4
+ * Per-session topic primitive that biases context-surfacing ranking toward
5
+ * docs relevant to the declared working context — WITHOUT persisting any
6
+ * state to SQLite. Intra-session curation that cannot contaminate other
7
+ * sessions.
8
+ *
9
+ * Primary signal: per-session state file at
10
+ * ~/.cache/clawmem/sessions/<session_id>.focus
11
+ *
12
+ * The env var CLAWMEM_SESSION_FOCUS is a DEBUG-ONLY override: it bypasses
13
+ * the per-session file entirely, and because it is a single process-wide
14
+ * variable it does NOT provide per-session scoping in multi-session host
15
+ * processes (e.g. a long-lived MCP server handling multiple Claude Code
16
+ * sessions). Use the file path for correctness; use the env var for
17
+ * ad-hoc single-session debugging only.
18
+ *
19
+ * All read paths are fail-open. Unreadable, corrupt, empty, missing,
20
+ * invalid-UTF-8, or oversized focus files return undefined and the
21
+ * caller proceeds with baseline ranking (byte-identical to pre-§11.4).
22
+ * The stage must NEVER half-apply a malformed topic.
23
+ */
24
+
25
+ import * as fs from "fs";
26
+ import * as path from "path";
27
+ import * as os from "os";
28
+ import type { ScoredResult } from "./memory.ts";
29
+
30
+ const MAX_TOPIC_LEN = 256;
31
+
32
+ /**
33
+ * Resolve the root directory for session focus files. Defaults to
34
+ * `~/.cache/clawmem/sessions`, overridable via `CLAWMEM_FOCUS_ROOT`.
35
+ * The override is primarily a test hook (so `bun:test` can redirect
36
+ * writes to a tmp dir) but is also safe to use in production if an
37
+ * operator wants to relocate the focus files out of `$HOME`.
38
+ *
39
+ * Computed lazily on every call so env-var changes in tests take
40
+ * effect without module reload.
41
+ */
42
+ export function focusRoot(): string {
43
+ const override = process.env.CLAWMEM_FOCUS_ROOT;
44
+ if (override && override.trim().length > 0) return override;
45
+ return path.join(os.homedir(), ".cache", "clawmem", "sessions");
46
+ }
47
+
48
+ export function focusFilePath(sessionId: string): string {
49
+ return path.join(focusRoot(), `${sessionId}.focus`);
50
+ }
51
+
52
+ /**
53
+ * Read the session focus topic. Returns undefined on any failure:
54
+ * - sessionId missing/empty
55
+ * - file does not exist
56
+ * - file unreadable (permissions, etc.)
57
+ * - file empty or whitespace-only
58
+ * - file exceeds MAX_TOPIC_LEN
59
+ * - file contains invalid UTF-8 (readFileSync throws)
60
+ *
61
+ * Never throws. Caller treats undefined as "no topic set" and skips
62
+ * the boost stage entirely.
63
+ */
64
+ export function readSessionFocus(sessionId?: string): string | undefined {
65
+ if (!sessionId) return undefined;
66
+ try {
67
+ const p = focusFilePath(sessionId);
68
+ if (!fs.existsSync(p)) return undefined;
69
+ const raw = fs.readFileSync(p, { encoding: "utf-8" });
70
+ const topic = raw.trim();
71
+ if (!topic) return undefined;
72
+ if (topic.length > MAX_TOPIC_LEN) return undefined;
73
+ return topic;
74
+ } catch {
75
+ return undefined;
76
+ }
77
+ }
78
+
79
+ /**
80
+ * Write a session focus topic. Creates the sessions directory if needed.
81
+ * Overwrites any existing file. Throws on invalid input or I/O errors
82
+ * (caller surface — CLI command that should fail loudly on misuse).
83
+ */
84
+ export function writeSessionFocus(sessionId: string, topic: string): void {
85
+ if (!sessionId || !sessionId.trim()) {
86
+ throw new Error("writeSessionFocus: sessionId required");
87
+ }
88
+ const trimmed = topic.trim();
89
+ if (!trimmed) {
90
+ throw new Error("writeSessionFocus: topic required");
91
+ }
92
+ if (trimmed.length > MAX_TOPIC_LEN) {
93
+ throw new Error(`writeSessionFocus: topic exceeds max length ${MAX_TOPIC_LEN}`);
94
+ }
95
+ fs.mkdirSync(focusRoot(), { recursive: true });
96
+ fs.writeFileSync(focusFilePath(sessionId), trimmed, { encoding: "utf-8" });
97
+ }
98
+
99
+ /**
100
+ * Clear a session focus. No-op if the file does not exist.
101
+ * Never throws (caller is typically "revert ranking to baseline").
102
+ */
103
+ export function clearSessionFocus(sessionId: string): void {
104
+ if (!sessionId) return;
105
+ try {
106
+ const p = focusFilePath(sessionId);
107
+ if (fs.existsSync(p)) fs.unlinkSync(p);
108
+ } catch {
109
+ /* ignore — clearing is best-effort */
110
+ }
111
+ }
112
+
113
+ /**
114
+ * Resolve the effective session focus topic by checking the per-session
115
+ * focus file first, then falling back to a provided env-var value (the
116
+ * CLAWMEM_SESSION_FOCUS debug override). Returns undefined when neither
117
+ * yields a valid topic.
118
+ *
119
+ * Precedence is file > env var because the file is the only signal
120
+ * that provides per-session scoping on multi-session host processes.
121
+ * Exposed here (rather than inlined at the call site) so the hook's
122
+ * precedence logic can be unit-tested directly without spinning up a
123
+ * full contextSurfacing invocation.
124
+ *
125
+ * Never throws. Never logs. Every failure path returns undefined and
126
+ * the caller treats that as "no topic set" (byte-identical to
127
+ * pre-§11.4 hook behavior).
128
+ */
129
+ export function resolveSessionTopic(
130
+ sessionId: string | undefined,
131
+ envVar: string | undefined
132
+ ): string | undefined {
133
+ const fromFile = readSessionFocus(sessionId);
134
+ if (fromFile) return fromFile;
135
+ const fromEnv = envVar?.trim();
136
+ if (fromEnv) return fromEnv;
137
+ return undefined;
138
+ }
139
+
140
+ /**
141
+ * Case-insensitive tokenized AND-match against title + displayPath + body.
142
+ * Tokens shorter than 2 chars are dropped (common stopwords and typos).
143
+ * Returns true only if every remaining token appears in the haystack.
144
+ */
145
+ function matchesTopic(result: ScoredResult, topic: string): boolean {
146
+ const tokens = topic
147
+ .toLowerCase()
148
+ .split(/\s+/)
149
+ .map(t => t.trim())
150
+ .filter(t => t.length >= 2);
151
+ if (tokens.length === 0) return false;
152
+
153
+ const haystack = [
154
+ result.title || "",
155
+ result.displayPath || "",
156
+ (result.body || "").slice(0, 800),
157
+ ]
158
+ .join(" ")
159
+ .toLowerCase();
160
+
161
+ return tokens.every(t => haystack.includes(t));
162
+ }
163
+
164
+ export interface TopicBoostOptions {
165
+ /** Multiplier applied to docs whose title/path/body match all topic tokens. Default 1.4. */
166
+ boostFactor?: number;
167
+ /**
168
+ * Multiplier applied to non-matching docs. Default 0.75.
169
+ * Clamped to a 0.5 floor so the boost is a re-ranker, not a hide —
170
+ * non-matching docs are demoted but never suppressed to zero.
171
+ */
172
+ demoteFactor?: number;
173
+ }
174
+
175
+ /**
176
+ * Apply session-topic boost/demote to a scored result set as a POST-COMPOSITE
177
+ * reranking pass. Runs AFTER applyCompositeScoring(...) and BEFORE threshold
178
+ * filtering (the specific architectural placement Codex approved in Turn 1 of
179
+ * the v0.9.0 design review).
180
+ *
181
+ * Behavior:
182
+ * - Empty/undefined topic → returns input unchanged (no-op, byte-identical).
183
+ * - Topic present but ZERO docs match → returns input unchanged (no-op).
184
+ * This is the fail-open contract from the approved §11.4 spec: "topic
185
+ * set + zero matching docs → proceed with the normal results." Without
186
+ * this short-circuit, uniformly demoting every doc would push some
187
+ * below the downstream threshold filter and silently shrink the
188
+ * result set — a regression vs the no-topic baseline.
189
+ * (Caught by Codex in §11.4 code review Turn 1, 2026-04-13.)
190
+ * - Topic present AND at least one match → each result's compositeScore
191
+ * is multiplied by either boostFactor (matching) or demoteFactor
192
+ * (non-matching), then results are re-sorted descending.
193
+ *
194
+ * Matching is computed exactly once per result in a pre-pass so the
195
+ * short-circuit can decide without double-evaluating the token match.
196
+ *
197
+ * This is a pure function over the scored set — it does NOT call the DB,
198
+ * does NOT write SQLite state, does NOT touch any lifecycle column.
199
+ * Mutates compositeScore in place (consistent with existing scoring
200
+ * helpers in this codebase; single caller, single thread).
201
+ */
202
+ export function applyTopicBoost<T extends ScoredResult>(
203
+ scored: T[],
204
+ topic: string | undefined,
205
+ options: TopicBoostOptions = {}
206
+ ): T[] {
207
+ if (!topic || !topic.trim()) return scored;
208
+ if (scored.length === 0) return scored;
209
+
210
+ const boostFactor = options.boostFactor ?? 1.4;
211
+ const demoteFactor = Math.max(options.demoteFactor ?? 0.75, 0.5);
212
+
213
+ // Pre-compute per-result match flags so we can early-return on zero
214
+ // matches without double-evaluating matchesTopic during the mutation
215
+ // pass. Caching is also a (small) perf win for any single call.
216
+ const matches = scored.map(r => matchesTopic(r, topic));
217
+ const anyMatch = matches.some(Boolean);
218
+ if (!anyMatch) return scored; // fail-open: baseline ordering preserved
219
+
220
+ for (let i = 0; i < scored.length; i++) {
221
+ const factor = matches[i] ? boostFactor : demoteFactor;
222
+ scored[i]!.compositeScore = scored[i]!.compositeScore * factor;
223
+ }
224
+
225
+ scored.sort((a, b) => b.compositeScore - a.compositeScore);
226
+ return scored;
227
+ }
package/src/store.ts CHANGED
@@ -711,6 +711,11 @@ function initializeDatabase(db: Database): void {
711
711
  db.exec(`CREATE INDEX IF NOT EXISTS idx_entity_nodes_type ON entity_nodes(entity_type)`);
712
712
  db.exec(`CREATE INDEX IF NOT EXISTS idx_entity_nodes_vault ON entity_nodes(vault)`);
713
713
  db.exec(`CREATE INDEX IF NOT EXISTS idx_entity_nodes_mentions ON entity_nodes(mention_count DESC)`);
714
+ // §11.1 (v0.9.0): expression index backing the `LOWER(name) IN (...) AND vault = ?`
715
+ // batch lookup used by the context-surfacing entity-detection hot path.
716
+ // Without this index the batch query devolves into a full scan on large vaults.
717
+ // Idempotent via IF NOT EXISTS — existing vaults pick it up on next open.
718
+ db.exec(`CREATE INDEX IF NOT EXISTS idx_entity_nodes_lower_name ON entity_nodes(LOWER(name), vault)`);
714
719
 
715
720
  // Entity mentions: entity ↔ document junction table
716
721
  db.exec(`
@@ -0,0 +1,506 @@
1
+ /**
2
+ * §11.1 — `<vault-facts>` KG injection for context-surfacing (v0.9.0)
3
+ *
4
+ * Prompt-only entity detection + exact-match validation + triple query +
5
+ * token-budgeted XML serialization. Wires the SPO knowledge graph
6
+ * (populated by v0.8.5 decision-extractor + A-MEM enrichment) into the
7
+ * retrieval hot path without ever reading from ranked documents.
8
+ *
9
+ * Hard constraint from the approved design (§11.1, prompt-only seeding):
10
+ * entity seeds come from `input.prompt` text ONLY, never from ranked
11
+ * document bodies or snippets. Without this, a topic-boosted off-topic
12
+ * doc (§11.4) could pollute the `<vault-facts>` block with facts about
13
+ * entities that have nothing to do with the user's actual prompt.
14
+ *
15
+ * Three-path candidate generation (BACKLOG §11.1 "Concrete implementation"):
16
+ * (a) canonical-ID regex `/^[a-z][a-z0-9-]*:[a-z_]+:[a-z0-9_]+$/`
17
+ * (b) proper-noun extraction (capitalized tokens + all-caps acronyms)
18
+ * (c) normalized n-gram scan against entity_nodes.name (1-3 grams,
19
+ * keep internal hyphens whole, batch SQL lookup via
20
+ * `WHERE LOWER(name) IN (?, ?, ...) AND vault = ?` backed by
21
+ * the `idx_entity_nodes_lower_name` expression index added in
22
+ * the v0.9.0 schema migration)
23
+ *
24
+ * Per-path validate-then-count ordering (Codex §11.1 Turn 5):
25
+ * - Path (a): validate via direct PK lookup, count immediately.
26
+ * - Path (b): validate via `resolveEntityTypeExact` BEFORE counting —
27
+ * only non-null results consume budget. Raw capitalized tokens that
28
+ * fail validation are dropped silently without starving path (c).
29
+ * - Path (c): validated hits fill remaining slots up to the 100-cap.
30
+ * Within path (c): 3-grams > 2-grams > 1-grams; prompt order is
31
+ * the final tie-breaker within each length class.
32
+ *
33
+ * Cross-path dedup: path (b) / (c) hits that resolve to the same
34
+ * entity_id as an earlier path (a) hit are no-ops — they do not
35
+ * consume a second cap slot.
36
+ *
37
+ * Fail-open discipline (BACKLOG §11.1 "CRITICAL fail-open requirement"):
38
+ * - Empty prompt / zero candidates → return [] (caller skips stage).
39
+ * - SQLite error during any lookup → caught per-candidate, silent skip.
40
+ * - Empty triples for every validated entity → return null from
41
+ * buildVaultFactsBlock (caller omits the block entirely).
42
+ * - Token budget too small to fit even one triple → return null.
43
+ * - Exact-match ambiguity (two entities with the same name) → skip
44
+ * that entity via `resolveEntityTypeExact` returning null.
45
+ */
46
+
47
+ import type { Database } from "bun:sqlite";
48
+ import { resolveEntityTypeExact, ensureEntityCanonical } from "./entity.ts";
49
+
50
+ // =============================================================================
51
+ // Constants
52
+ // =============================================================================
53
+
54
+ /** Hard upper bound on the number of VALIDATED entity candidates per prompt. */
55
+ const CANDIDATE_CAP = 100;
56
+
57
+ /** Maximum n-gram length (inclusive). 3-grams provide the best recall
58
+ * vs. signal trade-off per the Codex Turn 3 analysis; 4-grams dilute. */
59
+ const MAX_NGRAM_LEN = 3;
60
+
61
+ /**
62
+ * Canonical entity ID shape: `vault:type:slug`. The slug segment can
63
+ * include hyphens (e.g. `skill:tool:forge-stack`). Use a non-hyphen
64
+ * boundary on both ends so a trailing `.` or `,` doesn't swallow the
65
+ * last character but interior hyphens survive intact.
66
+ */
67
+ const CANONICAL_ID_REGEX = /(?<![a-zA-Z0-9_-])[a-z][a-z0-9-]*:[a-z_]+:[a-z0-9_](?:[a-z0-9_-]*[a-z0-9_])?(?![a-zA-Z0-9_-])/g;
68
+
69
+ /**
70
+ * Proper-noun shape: capitalized first letter + optional mixed case, OR
71
+ * all-caps acronyms (2+ chars). Matches `ClawMem`, `OAuth`, `API`, `Bun`,
72
+ * `PostgreSQL`, `JWT`, etc. Intentionally does NOT match lowercase
73
+ * technical identifiers like `clawmem`, `forge-stack`, `oauth2` — those
74
+ * are the job of path (c) n-gram scanning.
75
+ */
76
+ const PROPER_NOUN_REGEX = /\b(?:[A-Z][a-z0-9]+(?:[A-Z][a-z0-9]*)*|[A-Z]{2,}[a-z0-9]*)\b/g;
77
+
78
+ // =============================================================================
79
+ // Types
80
+ // =============================================================================
81
+
82
+ export interface ValidatedEntity {
83
+ /** Canonical `vault:type:slug` entity id for querying triples. */
84
+ entityId: string;
85
+ /** Lowercase name used internally for dedup / audit. */
86
+ name: string;
87
+ /** Entity type as stored in `entity_nodes.entity_type`. */
88
+ type: string;
89
+ /** Which extraction path surfaced this candidate (for debugging). */
90
+ sourcePath: "canonical-id" | "proper-noun" | "ngram";
91
+ }
92
+
93
+ export interface NgramCandidate {
94
+ /** Lowercase / whitespace-normalized n-gram text. */
95
+ normalized: string;
96
+ /** N-gram length: 1, 2, or 3. Used for the longer-first tie-breaker. */
97
+ length: 1 | 2 | 3;
98
+ /** First-token index in the prompt, for stable prompt-order tie-break. */
99
+ promptOrder: number;
100
+ }
101
+
102
+ /** Lightweight shape of a knowledge-graph triple the caller needs for serialization. */
103
+ export interface VaultFactsTriple {
104
+ subject: string;
105
+ predicate: string;
106
+ object: string;
107
+ validTo: string | null;
108
+ confidence: number;
109
+ }
110
+
111
+ /** Function shape used to query triples for a single entity id.
112
+ * Decoupled from `Store` so unit tests can inject a mock. */
113
+ export type TripleQueryFn = (entityId: string) => VaultFactsTriple[];
114
+
115
+ // =============================================================================
116
+ // Path (a) — canonical-ID regex
117
+ // =============================================================================
118
+
119
+ /**
120
+ * Extract all canonical-ID matches from a prompt. Deduplicated preserving
121
+ * first-occurrence order. Purely syntactic — does NOT consult the DB.
122
+ */
123
+ export function extractCanonicalIds(prompt: string): string[] {
124
+ if (!prompt) return [];
125
+ const matches = prompt.match(CANONICAL_ID_REGEX) ?? [];
126
+ const seen = new Set<string>();
127
+ const out: string[] = [];
128
+ for (const m of matches) {
129
+ if (seen.has(m)) continue;
130
+ seen.add(m);
131
+ out.push(m);
132
+ }
133
+ return out;
134
+ }
135
+
136
+ // =============================================================================
137
+ // Path (b) — proper-noun extraction
138
+ // =============================================================================
139
+
140
+ /**
141
+ * Extract all proper-noun-shaped tokens from a prompt. Deduplicated
142
+ * preserving first-occurrence order. Purely syntactic — does NOT consult
143
+ * the DB. Validation happens via `resolveEntityTypeExact` at the
144
+ * per-path budget-accounting step, NOT here.
145
+ */
146
+ export function extractProperNouns(prompt: string): string[] {
147
+ if (!prompt) return [];
148
+ const matches = prompt.match(PROPER_NOUN_REGEX) ?? [];
149
+ const seen = new Set<string>();
150
+ const out: string[] = [];
151
+ for (const m of matches) {
152
+ if (seen.has(m)) continue;
153
+ seen.add(m);
154
+ out.push(m);
155
+ }
156
+ return out;
157
+ }
158
+
159
+ // =============================================================================
160
+ // Path (c) — normalized n-gram scan
161
+ // =============================================================================
162
+
163
+ /**
164
+ * Tokenize a prompt for n-gram generation. Splits on whitespace and
165
+ * common punctuation while keeping internal hyphens whole — so
166
+ * `forge-stack` stays one token, not two. Strips edge punctuation
167
+ * (quotes, backticks, brackets) from each token boundary.
168
+ */
169
+ function tokenizeForNgrams(prompt: string): string[] {
170
+ if (!prompt) return [];
171
+ return prompt
172
+ .split(/[\s,;:!?"'`()\[\]{}<>]+/)
173
+ .map(t => t.replace(/^[^a-zA-Z0-9\-]+|[^a-zA-Z0-9\-]+$/g, ""))
174
+ .filter(t => t.length > 0);
175
+ }
176
+
177
+ /**
178
+ * Generate 1-gram, 2-gram, and 3-gram windows from a prompt. Windows are
179
+ * deduplicated on their normalized form (lowercase, trimmed, internal
180
+ * whitespace collapsed). Result preserves generation order: all 1-grams
181
+ * first (in prompt order), then 2-grams, then 3-grams. The caller re-sorts
182
+ * by length+promptOrder at validation time for the Turn 5 tie-breaker.
183
+ */
184
+ export function generateNgramCandidates(prompt: string): NgramCandidate[] {
185
+ const tokens = tokenizeForNgrams(prompt);
186
+ const seen = new Set<string>();
187
+ const out: NgramCandidate[] = [];
188
+
189
+ for (let n = 1; n <= MAX_NGRAM_LEN; n++) {
190
+ for (let i = 0; i + n <= tokens.length; i++) {
191
+ const slice = tokens.slice(i, i + n).join(" ");
192
+ const normalized = slice.toLowerCase().trim().replace(/\s+/g, " ");
193
+ if (!normalized) continue;
194
+ if (seen.has(normalized)) continue;
195
+ seen.add(normalized);
196
+ out.push({ normalized, length: n as 1 | 2 | 3, promptOrder: i });
197
+ }
198
+ }
199
+ return out;
200
+ }
201
+
202
+ /**
203
+ * Batch-lookup a set of normalized candidate names against entity_nodes.
204
+ * Uses a single parameterized SQL query backed by the
205
+ * `idx_entity_nodes_lower_name` expression index (added in the v0.9.0
206
+ * schema migration). Duplicate names are deduped in SQL (`DISTINCT`).
207
+ * Returns a map from `LOWER(name)` → `{ entityId, entityType }`.
208
+ *
209
+ * Fail-open: any SQL error returns an empty map. The caller proceeds
210
+ * as if the batch returned zero hits, and path (c) contributes nothing
211
+ * for that prompt.
212
+ */
213
+ export function batchLookupNames(
214
+ db: Database,
215
+ candidates: string[],
216
+ vault: string = "default"
217
+ ): Map<string, { entityId: string; entityType: string }> {
218
+ const out = new Map<string, { entityId: string; entityType: string }>();
219
+ if (candidates.length === 0) return out;
220
+
221
+ // Dedupe and bound the candidate set for the SQL `IN` clause.
222
+ // The per-path budget accounting above us already bounds path (c) to
223
+ // `CANDIDATE_CAP - len(path a + path b)` entries, but we cap the
224
+ // raw n-gram set independently here for safety: a worst-case prompt
225
+ // could generate many distinct normalized n-grams even if only a
226
+ // few would survive the candidate accounting, and a single giant
227
+ // SQL IN clause is wasted work. The 500 cap is intentionally larger
228
+ // than `CANDIDATE_CAP` so the batch query still gets the headroom
229
+ // to return overflow n-grams that the prioritization step then
230
+ // drops at budget time.
231
+ const unique = Array.from(new Set(candidates)).slice(0, 500);
232
+ const placeholders = unique.map(() => "?").join(", ");
233
+ const sql = `
234
+ SELECT DISTINCT LOWER(name) AS lname, entity_id, entity_type
235
+ FROM entity_nodes
236
+ WHERE LOWER(name) IN (${placeholders})
237
+ AND vault = ?
238
+ `;
239
+
240
+ try {
241
+ const rows = db.prepare(sql).all(...unique, vault) as Array<{
242
+ lname: string;
243
+ entity_id: string;
244
+ entity_type: string;
245
+ }>;
246
+ for (const row of rows) {
247
+ out.set(row.lname, { entityId: row.entity_id, entityType: row.entity_type });
248
+ }
249
+ } catch {
250
+ /* fail-open: empty map */
251
+ }
252
+ return out;
253
+ }
254
+
255
+ // =============================================================================
256
+ // Main entity extraction — three-path, validate-then-count, 100-cap
257
+ // =============================================================================
258
+
259
+ /**
260
+ * Three-path prompt entity extraction with per-path validate-then-count
261
+ * ordering, cross-path dedup by resolved entity_id, and the Codex-approved
262
+ * 100-candidate cap.
263
+ *
264
+ * Reads `input.prompt` text ONLY — NEVER touches ranked documents,
265
+ * surfaced snippets, or any retrieval-phase field. This is the §11.1
266
+ * prompt-only hard constraint.
267
+ *
268
+ * Returns a list of validated entities ready for triple-query seeding.
269
+ * Empty array on empty prompt, zero matches, or any fail-open branch.
270
+ */
271
+ export function extractPromptEntities(
272
+ prompt: string,
273
+ db: Database,
274
+ vault: string = "default"
275
+ ): ValidatedEntity[] {
276
+ if (!prompt) return [];
277
+
278
+ const validated: ValidatedEntity[] = [];
279
+ const seenEntityIds = new Set<string>();
280
+
281
+ // --------------------------------------------------------------------
282
+ // Path (a): Canonical-ID regex → direct primary-key lookup
283
+ // --------------------------------------------------------------------
284
+ const canonicalIds = extractCanonicalIds(prompt);
285
+ for (const id of canonicalIds) {
286
+ if (validated.length >= CANDIDATE_CAP) break;
287
+ if (seenEntityIds.has(id)) continue;
288
+ try {
289
+ const exists = db
290
+ .prepare(`SELECT entity_id, entity_type FROM entity_nodes WHERE entity_id = ? AND vault = ?`)
291
+ .get(id, vault) as { entity_id: string; entity_type: string } | undefined;
292
+ if (!exists) continue;
293
+ seenEntityIds.add(id);
294
+ validated.push({
295
+ entityId: id,
296
+ name: id,
297
+ type: exists.entity_type,
298
+ sourcePath: "canonical-id",
299
+ });
300
+ } catch {
301
+ /* fail-open per candidate */
302
+ }
303
+ }
304
+
305
+ // --------------------------------------------------------------------
306
+ // Path (b): Proper-noun extraction → validate-then-count via
307
+ // resolveEntityTypeExact. Non-null return means exactly-one match.
308
+ // After confirming type, use ensureEntityCanonical to get the
309
+ // canonical `vault:type:slug` entity_id. Note: ensureEntityCanonical
310
+ // is effectively read-only in production because every entity_nodes
311
+ // row has a matching entities_fts row inserted at upsert time — the
312
+ // fallback INSERT OR IGNORE fires only when the FTS index got out
313
+ // of sync (rare / migration edge case), in which case it self-heals.
314
+ // --------------------------------------------------------------------
315
+ const properNouns = extractProperNouns(prompt);
316
+ for (const name of properNouns) {
317
+ if (validated.length >= CANDIDATE_CAP) break;
318
+ try {
319
+ const type = resolveEntityTypeExact(db, name, vault);
320
+ if (!type) continue; // null = zero or multi-match → skip silently
321
+ const entityId = ensureEntityCanonical(db, name, type, vault);
322
+ if (!entityId) continue;
323
+ if (seenEntityIds.has(entityId)) continue; // cross-path dedup
324
+ seenEntityIds.add(entityId);
325
+ validated.push({
326
+ entityId,
327
+ name: name.toLowerCase(),
328
+ type,
329
+ sourcePath: "proper-noun",
330
+ });
331
+ } catch {
332
+ /* fail-open per candidate */
333
+ }
334
+ }
335
+
336
+ // --------------------------------------------------------------------
337
+ // Path (c): Normalized n-gram scan → batch SQL → per-candidate validate
338
+ // → longer-first tie-breaker → fill remaining budget.
339
+ // --------------------------------------------------------------------
340
+ if (validated.length < CANDIDATE_CAP) {
341
+ const ngrams = generateNgramCandidates(prompt);
342
+ const normalizedSet = ngrams.map(g => g.normalized);
343
+
344
+ const hits = batchLookupNames(db, normalizedSet, vault);
345
+ if (hits.size > 0) {
346
+ // First pass: collect every ngram that the SQL batch confirms
347
+ // exists in entity_nodes, THEN run the exact-match validator to
348
+ // enforce the exactly-one-entity constraint. Attach length +
349
+ // promptOrder so the sort step can apply the Turn 5 tie-breaker.
350
+ interface ValidatedNgram extends ValidatedEntity {
351
+ length: 1 | 2 | 3;
352
+ promptOrder: number;
353
+ }
354
+ const validatedNgrams: ValidatedNgram[] = [];
355
+ const ngramSeen = new Set<string>();
356
+
357
+ for (const gram of ngrams) {
358
+ const hit = hits.get(gram.normalized);
359
+ if (!hit) continue;
360
+ if (ngramSeen.has(hit.entityId)) continue; // dedup within path (c)
361
+ if (seenEntityIds.has(hit.entityId)) continue; // dedup across paths
362
+
363
+ try {
364
+ const confirmedType = resolveEntityTypeExact(db, gram.normalized, vault);
365
+ if (!confirmedType) continue; // multi-match or zero-match → skip
366
+ ngramSeen.add(hit.entityId);
367
+ validatedNgrams.push({
368
+ entityId: hit.entityId,
369
+ name: gram.normalized,
370
+ type: confirmedType,
371
+ sourcePath: "ngram",
372
+ length: gram.length,
373
+ promptOrder: gram.promptOrder,
374
+ });
375
+ } catch {
376
+ /* fail-open per candidate */
377
+ }
378
+ }
379
+
380
+ // Turn 5 tie-breaker: longer n-grams first (3 → 2 → 1), then
381
+ // prompt order within each length class. Longer n-grams are more
382
+ // semantically specific and should win the remaining budget.
383
+ validatedNgrams.sort((a, b) => {
384
+ if (a.length !== b.length) return b.length - a.length;
385
+ return a.promptOrder - b.promptOrder;
386
+ });
387
+
388
+ // Fill remaining budget.
389
+ for (const g of validatedNgrams) {
390
+ if (validated.length >= CANDIDATE_CAP) break;
391
+ if (seenEntityIds.has(g.entityId)) continue; // paranoid re-check
392
+ seenEntityIds.add(g.entityId);
393
+ validated.push({
394
+ entityId: g.entityId,
395
+ name: g.name,
396
+ type: g.type,
397
+ sourcePath: "ngram",
398
+ });
399
+ }
400
+ }
401
+ }
402
+
403
+ return validated;
404
+ }
405
+
406
+ // =============================================================================
407
+ // Vault-facts block builder
408
+ // =============================================================================
409
+
410
+ export interface BuildVaultFactsOptions {
411
+ /** Cap on triples emitted per entity. Default 10. */
412
+ maxTriplesPerEntity?: number;
413
+ /** Token estimator. Defaults to ~4 chars per token heuristic. */
414
+ estimateTokens?: (s: string) => number;
415
+ /** ISO "now" used to filter `validTo > now`. Defaults to `new Date().toISOString()`. */
416
+ now?: string;
417
+ }
418
+
419
+ const DEFAULT_ESTIMATE_TOKENS = (s: string): number => Math.ceil(s.length / 4);
420
+
421
+ /**
422
+ * Build the `<vault-facts>` XML block for a set of validated entities
423
+ * and a budget in tokens. Returns null if:
424
+ * - No entities (caller: skip the stage entirely).
425
+ * - Zero current triples after filtering (caller: do NOT emit an
426
+ * empty `<vault-facts/>` element).
427
+ * - Budget too small to fit even one triple (caller: drop block,
428
+ * preserve established blocks' budget).
429
+ * - Query callback throws for every entity (fail-open).
430
+ *
431
+ * Truncation rule (BACKLOG §11.1 budget guidance): if the serialized
432
+ * block would exceed the budget, truncate at the triple boundary.
433
+ * Never mid-triple, never emit an empty block.
434
+ *
435
+ * This function does NOT query the DB directly — the caller passes a
436
+ * `TripleQueryFn` functor so tests can inject a mock query.
437
+ */
438
+ export function buildVaultFactsBlock(
439
+ entities: ValidatedEntity[],
440
+ queryTriples: TripleQueryFn,
441
+ budgetTokens: number,
442
+ options: BuildVaultFactsOptions = {}
443
+ ): string | null {
444
+ if (entities.length === 0) return null;
445
+ if (budgetTokens <= 0) return null;
446
+
447
+ const maxPerEntity = options.maxTriplesPerEntity ?? 10;
448
+ const estimate = options.estimateTokens ?? DEFAULT_ESTIMATE_TOKENS;
449
+ const now = options.now ?? new Date().toISOString();
450
+
451
+ // Collect all current triples from all entities, deduping across
452
+ // entities by (subject, predicate, object). Without this, prompts that
453
+ // resolve both endpoints of a triple (e.g. "ClawMem depends_on Bun"
454
+ // when both `ClawMem` and `Bun` are validated entities) would emit
455
+ // the same fact twice and spend budget twice — once from the
456
+ // outgoing side of ClawMem's query, once from the incoming side of
457
+ // Bun's query. Caught by Codex §11.1 code review Turn 1, 2026-04-13.
458
+ const lines: string[] = [];
459
+ const seen = new Set<string>();
460
+ for (const entity of entities) {
461
+ let triples: VaultFactsTriple[] = [];
462
+ try {
463
+ triples = queryTriples(entity.entityId);
464
+ } catch {
465
+ continue; // fail-open per entity
466
+ }
467
+
468
+ // Current-only filter: validTo IS NULL OR validTo > now.
469
+ // Cap at maxPerEntity per entity so one chatty entity does not
470
+ // monopolize the shared budget below.
471
+ const current = triples
472
+ .filter(t => !t.validTo || t.validTo > now)
473
+ .slice(0, maxPerEntity);
474
+
475
+ for (const t of current) {
476
+ const key = `${t.subject}\u0000${t.predicate}\u0000${t.object}`;
477
+ if (seen.has(key)) continue;
478
+ seen.add(key);
479
+ lines.push(`${t.subject} ${t.predicate} ${t.object}`);
480
+ }
481
+ }
482
+
483
+ if (lines.length === 0) return null;
484
+
485
+ // Token-bounded serialization. Start with the structural XML overhead
486
+ // (open + close tag + two newlines) and greedily add triple lines
487
+ // until the next line would overflow the budget. Drop entire block
488
+ // if even one line does not fit — never emit an empty block.
489
+ const OPEN = "<vault-facts>\n";
490
+ const CLOSE = "\n</vault-facts>";
491
+ const OVERHEAD = estimate(OPEN + CLOSE);
492
+ if (OVERHEAD >= budgetTokens) return null;
493
+
494
+ const outLines: string[] = [];
495
+ let runningTokens = OVERHEAD;
496
+ for (const line of lines) {
497
+ const lineTokens = estimate(line) + 1; // +1 for the trailing newline
498
+ if (runningTokens + lineTokens > budgetTokens) break;
499
+ outLines.push(line);
500
+ runningTokens += lineTokens;
501
+ }
502
+
503
+ if (outLines.length === 0) return null;
504
+
505
+ return `${OPEN}${outLines.join("\n")}${CLOSE}`;
506
+ }