npm - kongbrain - Versions diffs - 0.4.1 → 0.4.3 - Mend

kongbrain 0.4.1 → 0.4.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (36) hide show

package/.github/workflows/ci.yml +45 -0
package/.github/workflows/pr-check.yml +16 -0
package/CHANGELOG.md +64 -0
package/README.github.md +40 -1
package/SKILL.md +1 -1
package/TOKEN_FLOW.md +184 -0
package/package.json +1 -1
package/src/acan.ts +28 -5
package/src/causal.ts +18 -25
package/src/cognitive-bootstrap.ts +6 -6
package/src/cognitive-check.ts +17 -19
package/src/config.ts +1 -1
package/src/context-engine.ts +105 -50
package/src/daemon-manager.ts +70 -19
package/src/deferred-cleanup.ts +12 -10
package/src/embeddings.ts +6 -7
package/src/errors.ts +5 -3
package/src/graph-context.ts +281 -178
package/src/hooks/after-tool-call.ts +2 -1
package/src/hooks/before-tool-call.ts +15 -11
package/src/hooks/llm-output.ts +18 -10
package/src/index.ts +39 -18
package/src/intent.ts +9 -8
package/src/log.ts +11 -0
package/src/memory-daemon.ts +1 -0
package/src/orchestrator.ts +11 -4
package/src/prefetch.ts +2 -2
package/src/reflection.ts +9 -2
package/src/schema.surql +7 -0
package/src/skills.ts +32 -10
package/src/soul.ts +17 -1
package/src/state.ts +31 -0
package/src/supersedes.ts +99 -0
package/src/surreal.ts +174 -110
package/src/tools/introspect.ts +1 -1
package/src/wakeup.ts +0 -142

package/src/graph-context.ts CHANGED Viewed

@@ -23,11 +23,29 @@ import { getCachedContext, recordPrefetchHit, recordPrefetchMiss } from "./prefe
 import { stageRetrieval, getHistoricalUtilityBatch } from "./retrieval-quality.js";
 import { isACANActive, scoreWithACAN, type ACANCandidate } from "./acan.js";
 import { swallow } from "./errors.js";
+import { log } from "./log.js";
 // ── Message type guards ────────────────────────────────────────────────────────
 type ContentBlock = TextContent | ThinkingContent | ToolCall | ImageContent;
+/**
+ * Loose content block type for message stripping — covers the full range of
+ * block shapes that may appear in pi-ai messages beyond the typed union
+ * (e.g., toolResult blocks with nested content, image_url, source).
+ */
+type AnyContentBlock = {
+  type: string;
+  text?: string;
+  thinking?: string;
+  media_type?: string;
+  content?: AnyContentBlock[];
+  [key: string]: unknown;
+};
+/** Mutable view of a message for in-place content stripping. */
+type MutableMessage = { role: string; content: AnyContentBlock[] | string };
 function isUser(msg: AgentMessage): msg is UserMessage {
   return (msg as UserMessage).role === "user";
 }
@@ -56,15 +74,28 @@ function msgContentBlocks(msg: AgentMessage): ContentBlock[] {
 // ── Constants ──────────────────────────────────────────────────────────────────
-const CHARS_PER_TOKEN = 3.4;
-const BUDGET_FRACTION = 0.70;
-const CONVERSATION_SHARE = 0.50;
-const RETRIEVAL_SHARE = 0.30;
-const CORE_MEMORY_SHARE = 0.15;
+// Token estimation ratios (aligned with Claude Code's roughTokenCountEstimation):
+// - Prose/code: 4 bytes per token (claw-code default)
+// - JSON (tool results, structured data): 2 bytes per token (denser single-char tokens)
+// - Safety margin: 4/3 (33%) applied to aggregate estimates
+const BYTES_PER_TOKEN = 4;
+const BYTES_PER_TOKEN_JSON = 2;
+const CHARS_PER_TOKEN = BYTES_PER_TOKEN; // backward compat alias for budget math
+const TOKEN_SAFETY_MARGIN = 4 / 3;
+const IMAGE_TOKEN_ESTIMATE = 2000; // claw-code: hardcoded for images/documents
+const BUDGET_FRACTION = 0.325;       // ~65k of 200k window (leaves ~135k for LLM generation + tool results)
+const CONVERSATION_SHARE = 0.23;     // ~15k for recent user/assistant exchanges
+const RETRIEVAL_SHARE = 0.385;       // ~25k for graph-curated context
+const CORE_MEMORY_SHARE = 0.155;     // ~10k for core memory/directives
+const TOOL_HISTORY_SHARE = 0.23;     // ~15k for recent tool results
 const CORE_MEMORY_TTL = 300_000;
-const MAX_ITEM_CHARS = 1200; // ~350 tokens per item cap (claw-code: MAX_INSTRUCTION_FILE_CHARS)
-const MIN_RELEVANCE_SCORE = 0.35;
-const MIN_COSINE = 0.25;
+const MAX_ITEM_CHARS = 1200; // ~350 tokens per item (matches claw-code MAX_INSTRUCTION_FILE_CHARS)
+const MIN_RELEVANCE_SCORE = 0.40; // Floor for graph-scored results after WMR/ACAN (tuned: cosine-heavy weights produce lower absolute scores)
+const MIN_COSINE = 0.35; // Minimum cosine similarity to consider a result (raised from 0.25)
+// Deduplication thresholds
+const DEDUP_COSINE_THRESHOLD = 0.88;
+const DEDUP_JACCARD_THRESHOLD = 0.80;
 // Recency decay
 const RECENCY_DECAY_FAST = 0.99;
@@ -87,20 +118,24 @@ const INTENT_REMINDER_THRESHOLD = 10;
 // ── Budget calculation ─────────────────────────────────────────────────────────
-interface Budgets {
+/** @internal Exported for testing. */
+export interface Budgets {
   conversation: number;
   retrieval: number;
   core: number;
+  toolHistory: number;
   maxContextItems: number;
 }
-function calcBudgets(contextWindow: number): Budgets {
+/** Split the context window into 4 budgets: conversation, retrieval, core memory, and tool history. @internal */
+export function calcBudgets(contextWindow: number): Budgets {
   const total = contextWindow * BUDGET_FRACTION;
   const retrieval = Math.round(total * RETRIEVAL_SHARE);
   return {
     conversation: Math.round(total * CONVERSATION_SHARE),
     retrieval,
     core: Math.round(total * CORE_MEMORY_SHARE),
+    toolHistory: Math.round(total * TOOL_HISTORY_SHARE),
     maxContextItems: Math.max(20, Math.round(retrieval / 300)),
   };
 }
@@ -150,32 +185,43 @@ function extractLastUserText(messages: AgentMessage[]): string | null {
   return null;
 }
-/** Estimate char count for a single content block (claw-code: per-block-type estimation). */
+/** Estimate char-equivalent count for a single content block (claw-code: per-block-type estimation). */
 function blockCharLen(c: any): number {
   if (c.type === "text") return c.text.length;
   if (c.type === "thinking") return c.thinking.length;
   if (c.type === "toolCall") {
-    // Count tool name + serialized args (claw-code: compact.rs:326-338)
-    return (c.name?.length ?? 0) + (c.args ? JSON.stringify(c.args).length : 0);
+    // Tool name + serialized args — JSON is denser (2 bytes/token vs 4)
+    // Scale JSON args to char-equivalent at prose ratio
+    const argsJson = c.args ? JSON.stringify(c.args) : "";
+    const argsCharEquiv = argsJson.length * (BYTES_PER_TOKEN / BYTES_PER_TOKEN_JSON);
+    return (c.name?.length ?? 0) + argsCharEquiv;
   }
   if (c.type === "toolResult" && Array.isArray(c.content)) {
     let len = 0;
     for (const rc of c.content) {
-      if (rc.type === "text") len += rc.text.length;
-      else len += 100;
+      if (rc.type === "text") {
+        // Detect JSON-heavy tool results and scale accordingly
+        const text = rc.text ?? "";
+        const isJson = text.length > 20 && (text[0] === "{" || text[0] === "[");
+        len += isJson ? text.length * (BYTES_PER_TOKEN / BYTES_PER_TOKEN_JSON) : text.length;
+      } else {
+        // Images/documents: claw-code hardcodes 2000 tokens
+        len += IMAGE_TOKEN_ESTIMATE * BYTES_PER_TOKEN;
+      }
     }
     return len;
   }
-  return 100; // image, etc.
+  return IMAGE_TOKEN_ESTIMATE * BYTES_PER_TOKEN; // image, document, etc.
 }
 function estimateTokens(messages: AgentMessage[]): number {
   let chars = 0;
   for (const msg of messages) {
     for (const c of msgContentBlocks(msg)) chars += blockCharLen(c);
-    chars += 4; // per-message structural overhead
+    chars += 20; // per-message structural overhead (role token, framing, separators)
   }
-  return Math.ceil(chars / CHARS_PER_TOKEN);
+  // Apply safety margin (claw-code: 4/3 multiplier on rough estimates)
+  return Math.ceil((chars / CHARS_PER_TOKEN) * TOKEN_SAFETY_MARGIN);
 }
 function msgCharLen(msg: AgentMessage): number {
@@ -212,6 +258,7 @@ function accessBoost(accessCount: number | undefined): number {
   return Math.log1p(accessCount ?? 0);
 }
+/** Dot-product cosine similarity between two equal-length vectors. Returns 0 if either has zero magnitude. */
 export function cosineSimilarity(a: number[], b: number[]): number {
   let dot = 0, magA = 0, magB = 0;
   for (let i = 0; i < a.length; i++) {
@@ -241,35 +288,15 @@ function buildRulesSuffix(session: SessionState): string {
     );
   }
-  // First time — full examples
+  // First time — compact rules (no verbose examples)
   session.injectedSections.add("rules_full");
   return (
     "\n<rules_reminder>" +
     `\nBudget: ${session.toolCallCount} used, ${remaining} remaining.${urgency}` +
-    "\n\nYOUR BUDGET IS SMALL. Plan the whole task, not just the next call." +
-    "\n" +
-    "\nTask: Fix broken import" +
-    "\n  WASTEFUL (6 calls): grep old → read file → grep new → read context → edit → read to verify" +
-    "\n  DENSE (2 calls):" +
-    "\n    1. grep -n 'oldImport' src/**/*.ts; grep -rn 'newModule' src/" +
-    "\n    2. edit file && npm test -- --grep 'relevant' 2>&1 | tail -20" +
-    "\n" +
-    "\nTask: Debug failing test" +
-    "\n  WASTEFUL (8 calls): run test → read output → read test → read source → grep → read more → edit → rerun" +
-    "\n  DENSE (3 calls):" +
-    "\n    1. npm test 2>&1 | tail -30" +
-    "\n    2. grep -n 'failingTest\\|relevantFn' test/*.ts src/*.ts" +
-    "\n    3. edit fix && npm test 2>&1 | tail -15" +
-    "\n" +
-    "\nTask: Read/understand multiple files" +
-    "\n  WASTEFUL (10 calls): cat file1 → cat file2 → cat file3 → ..." +
-    "\n  DENSE (1-2 calls):" +
-    "\n    1. head -80 src/a.ts src/b.ts src/c.ts src/d.ts  (4 files in ONE call)" +
-    "\n    2. grep -n 'keyPattern' src/*.ts  (search all files at once, not one by one)" +
-    "\n" +
-    "\nEvery step still happens — investigation, edit, verification — but COMBINED into fewer calls." +
-    "\nThe answer is often already in context. Don't call if you already know." +
-    "\nAnnounce: task type (LOOKUP=1/EDIT=2/REFACTOR=6), planned calls, what each does." +
+    "\nClassify: LOOKUP(≤3) | EDIT(≤4) | REFACTOR(≤8). Announce type + plan before tools." +
+    "\nCombine: grep+grep in 1 call, edit+test in 1 bash. Read multiple files in 1 call." +
+    "\nSkip: if <graph_context> already answers it, zero calls needed." +
+    "\nBe dense: lead with answer, no filler, no repeating context back." +
     "\n</rules_reminder>"
   );
 }
@@ -303,43 +330,18 @@ function injectRulesSuffix(messages: AgentMessage[], session: SessionState): Age
 async function buildContextualQueryVec(
   queryText: string,
-  messages: AgentMessage[],
+  _messages: AgentMessage[],
   embeddings: EmbeddingService,
+  session?: SessionState,
 ): Promise<number[]> {
-  const queryVec = await embeddings.embed(queryText);
-  const recentTexts: string[] = [];
-  for (let i = messages.length - 2; i >= 0 && recentTexts.length < 3; i--) {
-    const msg = messages[i] as UserMessage | AssistantMessage;
-    if (msg.role === "user" || msg.role === "assistant") {
-      const text = extractText(msg);
-      if (text && text.length > 10) {
-        recentTexts.push(text.slice(0, 500));
-      }
-    }
-  }
-  if (recentTexts.length === 0) return queryVec;
-  try {
-    const recentVecs = await Promise.all(recentTexts.map((t) => embeddings.embed(t)));
-    const dim = queryVec.length;
-    const blended = new Array(dim).fill(0);
-    const queryWeight = 2;
-    const totalWeight = queryWeight + recentVecs.length;
-    for (let d = 0; d < dim; d++) {
-      blended[d] = queryVec[d] * queryWeight;
-      for (const rv of recentVecs) {
-        blended[d] += rv[d];
-      }
-      blended[d] /= totalWeight;
-    }
-    return blended;
-  } catch (e) {
-    swallow.warn("graph-context:contextualQuery", e);
-    return queryVec;
+  // Reuse embedding from ingest if available (same user message, already embedded)
+  if (session?.lastUserEmbedding) {
+    return session.lastUserEmbedding;
   }
+  // Fallback: embed the query text (first turn, or ingest didn't fire yet)
+  return embeddings.embed(queryText);
+  // Note: removed the 3-message "blend" — pure query vector is sufficient for retrieval
+  // and saves 1-3 embedding calls per turn (~15-200ms)
 }
 // ── Scoring ────────────────────────────────────────────────────────────────────
@@ -355,7 +357,11 @@ async function scoreResults(
     .filter((r) => r.table === "memory" || r.table === "concept")
     .map((r) => r.id);
-  const cacheEntries = await store.getUtilityCacheEntries(eligibleIds);
+  // Parallelize independent DB lookups (utility cache + reflection sessions)
+  const [cacheEntries, reflectedSessions] = await Promise.all([
+    store.getUtilityCacheEntries(eligibleIds),
+    store.getReflectionSessionIds(),
+  ]);
   const preFiltered = results.filter((r) => {
     const entry = cacheEntries.get(r.id);
@@ -371,8 +377,6 @@ async function scoreResults(
   if (utilityMap.size === 0 && eligibleIds.length > 0) {
     utilityMap = await getHistoricalUtilityBatch(eligibleIds);
   }
-  const reflectedSessions = await store.getReflectionSessionIds();
   const floor = INTENT_SCORE_FLOORS[currentIntent] ?? SCORE_FLOOR_DEFAULT;
   // ACAN path
@@ -413,8 +417,8 @@ async function scoreResults(
       const reflectionBoost = r.sessionId ? (reflectedSessions.has(r.sessionId) ? 1.0 : 0) : 0;
       const finalScore =
-        0.27 * cosine + 0.28 * recency + 0.05 * importance +
-        0.05 * access + 0.10 * neighborBonus + 0.15 * provenUtility +
+        0.35 * cosine + 0.18 * recency + 0.07 * importance +
+        0.02 * access + 0.10 * neighborBonus + 0.18 * provenUtility +
         0.10 * reflectionBoost - utilityPenalty;
       return { ...r, finalScore, fromNeighbor: neighborIds.has(r.id) };
@@ -426,23 +430,30 @@ async function scoreResults(
 // ── Deduplication ──────────────────────────────────────────────────────────────
 function deduplicateResults(ranked: ScoredResult[]): ScoredResult[] {
+  // Pre-compute word sets to avoid re-splitting in O(n^2) inner loop
+  const wordSets = ranked.map(r =>
+    new Set((r.text ?? "").toLowerCase().split(/\s+/).filter((w) => w.length > 2)),
+  );
   const kept: ScoredResult[] = [];
-  for (const item of ranked) {
+  const keptIndexes: number[] = [];
+  for (let i = 0; i < ranked.length; i++) {
+    const item = ranked[i];
     let isDup = false;
-    for (const existing of kept) {
+    for (const ki of keptIndexes) {
+      const existing = ranked[ki];
       if (item.embedding?.length && existing.embedding?.length
           && item.embedding.length === existing.embedding.length) {
-        if (cosineSimilarity(item.embedding, existing.embedding) > 0.88) { isDup = true; break; }
+        if (cosineSimilarity(item.embedding, existing.embedding) > DEDUP_COSINE_THRESHOLD) { isDup = true; break; }
         continue;
       }
-      const words = new Set((item.text ?? "").toLowerCase().split(/\s+/).filter((w) => w.length > 2));
-      const eWords = new Set((existing.text ?? "").toLowerCase().split(/\s+/).filter((w) => w.length > 2));
+      const words = wordSets[i];
+      const eWords = wordSets[ki];
       let intersection = 0;
       for (const w of words) { if (eWords.has(w)) intersection++; }
       const union = words.size + eWords.size - intersection;
-      if (union > 0 && intersection / union > 0.80) { isDup = true; break; }
+      if (union > 0 && intersection / union > DEDUP_JACCARD_THRESHOLD) { isDup = true; break; }
     }
-    if (!isDup) kept.push(item);
+    if (!isDup) { kept.push(item); keptIndexes.push(i); }
   }
   return kept;
 }
@@ -514,24 +525,17 @@ function formatTierSection(entries: CoreMemoryEntry[], label: string): string {
 function buildSystemPromptSection(session: SessionState, tier0Entries: CoreMemoryEntry[]): string | undefined {
   const parts: string[] = [];
-  // IKONG architecture description (static, ~120 tokens)
+  // Graph pillar IDs (compact — the model doesn't need architecture descriptions)
   const pillarLines: string[] = [];
   if (session.agentId) pillarLines.push(`Agent: ${session.agentId}`);
   if (session.projectId) pillarLines.push(`Project: ${session.projectId}`);
   if (session.taskId) pillarLines.push(`Task: ${session.taskId}`);
   if (pillarLines.length > 0) {
-    parts.push(
-      "GRAPH PILLARS (your structural context):\n" +
-      `  ${pillarLines.join(" | ")}\n` +
-      "  IKONG cognitive architecture:\n" +
-      "    I(ntelligence): intent classification → adaptive orchestration per turn\n" +
-      "    K(nowledge): memory graph, concepts, skills, reflections, identity chunks\n" +
-      "    O(peration): tool execution, skill procedures, causal chain tracking\n" +
-      "    N(etwork): graph traversal, cross-pillar edges, neighbor expansion\n" +
-      "    G(raph): SurrealDB persistence, vector search, BGE-M3 embeddings",
-    );
+    parts.push(`GRAPH PILLARS: ${pillarLines.join(" | ")}`);
   }
+  // Token-density rules are in buildRulesSuffix (injected per-turn) — no duplication here
   // Tier 0 core directives (semi-static, changes rarely)
   const t0Section = formatTierSection(tier0Entries, "CORE DIRECTIVES (always loaded, never evicted)");
   if (t0Section) parts.push(t0Section);
@@ -597,7 +601,7 @@ async function formatContextMessage(
   const sections: string[] = [];
-  // Pillar context — structural awareness of who/what/where
+  // Pillar context — structural IDs only (architecture description is unnecessary token spend)
   // Skip if model already has it in the conversation window (claw-code static section dedup)
   if (!session.injectedSections.has("ikong")) {
     const pillarLines: string[] = [];
@@ -605,16 +609,7 @@ async function formatContextMessage(
     if (session.projectId) pillarLines.push(`Project: ${session.projectId}`);
     if (session.taskId) pillarLines.push(`Task: ${session.taskId}`);
     if (pillarLines.length > 0) {
-      sections.push(
-        "GRAPH PILLARS (your structural context):\n" +
-        `  ${pillarLines.join(" | ")}\n` +
-        "  IKONG cognitive architecture:\n" +
-        "    I(ntelligence): intent classification → adaptive orchestration per turn\n" +
-        "    K(nowledge): memory graph, concepts, skills, reflections, identity chunks\n" +
-        "    O(peration): tool execution, skill procedures, causal chain tracking\n" +
-        "    N(etwork): graph traversal, cross-pillar edges, neighbor expansion\n" +
-        "    G(raph): SurrealDB persistence, vector search, BGE-M3 embeddings",
-      );
+      sections.push(`GRAPH PILLARS: ${pillarLines.join(" | ")}`);
       session.injectedSections.add("ikong");
     }
   }
@@ -648,8 +643,10 @@ async function formatContextMessage(
     clearPendingDirectives(session);
   }
-  // Fibonacci resurfacing
-  try {
+  // Fibonacci resurfacing — only during conversational intents (noise during deep code work)
+  const RESURFACE_INTENTS = new Set(["simple-question", "meta-session", "unknown"]);
+  const currentIntent = session.currentConfig?.intent ?? "unknown";
+  if (RESURFACE_INTENTS.has(currentIntent)) try {
     const dueMemories = await store.getDueMemories(3);
     if (dueMemories.length > 0) {
       const memLines = dueMemories.map((m: any) => {
@@ -659,14 +656,7 @@ async function formatContextMessage(
         return `  - [${m.id}] (${ageStr}, surfaced ${m.surface_count}x): ${m.text}`;
       }).join("\n");
       sections.push(
-        `RESURFACING MEMORIES (Fibonacci schedule — these are due for a mention):\n` +
-        `These memories are important but fading. Bring them up naturally when appropriate:\n` +
-        `- If mid-task on something important, wait until finished\n` +
-        `- During casual interaction: "I was thinking..." or "remember when you mentioned..."\n` +
-        `- If user engages: great! Continue that thread. The memory stays alive.\n` +
-        `- If user ignores or dismisses: let it fade. Don't force it.\n` +
-        `- NEVER say "my memory system scheduled this" — just bring it up like a thought you had.\n` +
-        memLines
+        `RESURFACING MEMORIES (mention naturally during conversation, never reveal scheduling):\n` + memLines
       );
     }
   } catch { /* non-critical */ }
@@ -748,11 +738,20 @@ function truncateToolResult(msg: AgentMessage, maxChars: number): AgentMessage {
   return { ...msg, content };
 }
-function getRecentTurns(messages: AgentMessage[], maxTokens: number, contextWindow: number, session?: SessionState): AgentMessage[] {
-  const budgetChars = maxTokens * CHARS_PER_TOKEN;
-  const TOOL_RESULT_MAX = Math.round(contextWindow * 0.03);
-  // Transform error messages into compact annotations
+function getRecentTurns(
+  messages: AgentMessage[],
+  convTokens: number,
+  toolTokens: number,
+  contextWindow: number,
+  session?: SessionState,
+): AgentMessage[] {
+  const convBudgetChars = convTokens * CHARS_PER_TOKEN;
+  const toolBudgetChars = toolTokens * CHARS_PER_TOKEN;
+  // Per-tool-result char cap (claw-code: DEFAULT_MAX_RESULT_SIZE_CHARS = 50,000)
+  // Scale with context window but floor at 20k, cap at 50k
+  const TOOL_RESULT_MAX = Math.min(50_000, Math.max(20_000, Math.round(contextWindow * 0.10)));
+  // ── Phase 1: Transform error messages into compact annotations ──
   const clean = messages.map((m) => {
     if (isAssistant(m) && m.stopReason === "error") {
       const errorText = m.content
@@ -769,12 +768,88 @@ function getRecentTurns(messages: AgentMessage[], maxTokens: number, contextWind
     return m;
   });
-  // Group messages into structural units
+  // ── Phase 2: Strip token-heavy content from non-recent messages ──
+  // (claw-code patterns: microcompact content-clearing, image stripping, thinking clearing)
+  const RECENT_KEEP = 5; // keep last N groups fully intact
+  const msgCount = clean.length;
+  // Find recency boundary: messages in the last RECENT_KEEP groups stay intact
+  // We need to identify which messages are "old" vs "recent"
+  // Count groups from the end to find the boundary index
+  let recentBoundary = msgCount;
+  {
+    let groupsSeen = 0;
+    for (let k = clean.length - 1; k >= 0 && groupsSeen < RECENT_KEEP; k--) {
+      recentBoundary = k;
+      const msg = clean[k];
+      // Each user message or standalone assistant message starts a new group
+      if (isUser(msg) || (isAssistant(msg) && !msg.content.some((c: ContentBlock) => c.type === "toolCall"))) {
+        groupsSeen++;
+      } else if (isAssistant(msg) && msg.content.some((c: ContentBlock) => c.type === "toolCall")) {
+        groupsSeen++;
+        // Skip past associated tool results (they're part of this group)
+      }
+    }
+  }
+  // Apply stripping to messages before the recency boundary
+  for (let k = 0; k < recentBoundary; k++) {
+    const msg = clean[k] as MutableMessage;
+    if (!msg.content || !Array.isArray(msg.content)) continue;
+    // Collapse old assistant filler text (agentic loop: "I'll now read..." / "Let me check...")
+    // Keep tool calls intact but shrink prose to 1-line summary
+    if (isAssistant(clean[k]) && msg.content.some((c: AnyContentBlock) => c.type === "toolCall")) {
+      msg.content = msg.content.map((c: AnyContentBlock) => {
+        if (c.type === "text" && c.text && c.text.length > 120) {
+          // Keep first line as summary (usually the intent statement)
+          const firstLine = c.text.split("\n")[0].slice(0, 120);
+          return { ...c, text: firstLine };
+        }
+        if (c.type === "thinking") {
+          return { type: "text" as const, text: "[thinking]" };
+        }
+        return c; // preserve toolCall blocks
+      });
+      continue; // skip generic stripping for this message
+    }
+    msg.content = msg.content.map((c: AnyContentBlock) => {
+      // Strip thinking blocks → [thinking] marker (often 1-5k tokens each)
+      if (c.type === "thinking") {
+        return { type: "text" as const, text: "[thinking]" };
+      }
+      // Strip images → [image] marker (2000 tokens each)
+      if (c.type === "image" || c.type === "image_url" || (c.type === "source" && c.media_type?.startsWith("image/"))) {
+        return { type: "text" as const, text: "[image]" };
+      }
+      // Content-clear old tool results → stub (claw-code: microcompact pattern)
+      if (c.type === "toolResult" && Array.isArray(c.content)) {
+        const stub = c.content.map((rc: AnyContentBlock) => {
+          if (rc.type === "text" && rc.text && rc.text.length > 200) {
+            return { ...rc, text: `[Old tool result cleared — ${rc.text.length} chars]` };
+          }
+          if (rc.type === "image" || rc.type === "image_url") {
+            return { type: "text" as const, text: "[image]" };
+          }
+          return rc;
+        });
+        return { ...c, content: stub };
+      }
+      // For tool result messages (top-level), clear oversized text blocks
+      if (c.type === "text" && isToolResult(clean[k]) && c.text && c.text.length > 200) {
+        return { ...c, text: `[Old tool result cleared — ${c.text.length} chars]` };
+      }
+      return c;
+    });
+  }
+  // ── Phase 3: Group messages into structural units ──
   const groups: AgentMessage[][] = [];
   let i = 0;
   while (i < clean.length) {
     const msg = clean[i];
-    if (isAssistant(msg) && msg.content.some((c: any) => c.type === "toolCall")) {
+    if (isAssistant(msg) && msg.content.some((c: ContentBlock) => c.type === "toolCall")) {
       const group: AgentMessage[] = [clean[i]];
       let j = i + 1;
       while (j < clean.length && isToolResult(clean[j])) {
@@ -800,17 +875,38 @@ function getRecentTurns(messages: AgentMessage[], maxTokens: number, contextWind
     }
   }
-  // Take groups from end within budget
-  const pinnedLen = pinnedGroup ? pinnedGroup.reduce((s, m) => s + msgCharLen(m), 0) : 0;
-  const remainingBudget = budgetChars - pinnedLen;
-  let used = 0;
+  // Measure pinned group against both budgets
+  let pinnedConv = 0;
+  let pinnedTool = 0;
+  if (pinnedGroup) {
+    for (const m of pinnedGroup) {
+      if (isToolResult(m)) pinnedTool += msgCharLen(m);
+      else pinnedConv += msgCharLen(m);
+    }
+  }
+  // Take groups from end within split budgets
+  const remainingConv = convBudgetChars - pinnedConv;
+  const remainingTool = toolBudgetChars - pinnedTool;
+  let convUsed = 0;
+  let toolUsed = 0;
   const selectedGroups: AgentMessage[][] = [];
   for (let g = groups.length - 1; g >= 0; g--) {
     if (g === pinnedGroupIdx) continue;
-    const groupLen = groups[g].reduce((s, m) => s + msgCharLen(m), 0);
-    if (used + groupLen > remainingBudget && selectedGroups.length > 0) break;
+    let groupConv = 0;
+    let groupTool = 0;
+    for (const m of groups[g]) {
+      if (isToolResult(m)) groupTool += msgCharLen(m);
+      else groupConv += msgCharLen(m);
+    }
+    // Stop if either budget would overflow (but always include at least one group)
+    if (selectedGroups.length > 0) {
+      if (convUsed + groupConv > remainingConv) break;
+      if (groupTool > 0 && toolUsed + groupTool > remainingTool) break;
+    }
     selectedGroups.unshift(groups[g]);
-    used += groupLen;
+    convUsed += groupConv;
+    toolUsed += groupTool;
   }
   if (pinnedGroup && pinnedGroupIdx !== -1) {
@@ -852,8 +948,8 @@ export interface GraphTransformResult {
 }
 /**
- * Transform conversation messages using graph-based context retrieval.
- * This is the core "assemble" logic — called from ContextEngine.assemble().
+ * Main entry point for graph-based context assembly. Retrieves, scores, deduplicates,
+ * and budget-trims graph nodes, then splices them into the conversation message array.
  */
 export async function graphTransformContext(
   params: GraphTransformParams,
@@ -866,18 +962,24 @@ export async function graphTransformContext(
   // Done here (wrapper) so it attaches to any inner return path.
   // (claw-code pattern: static sections above __SYSTEM_PROMPT_DYNAMIC_BOUNDARY__)
   let systemPromptSection: string | undefined;
+  let tier0ForSys: CoreMemoryEntry[] = [];
   try {
-    const tier0ForSys = store.isAvailable()
+    tier0ForSys = store.isAvailable()
       ? applyCoreBudget(await store.getAllCoreMemory(0), getTier0BudgetChars(budgets))
       : [];
     systemPromptSection = buildSystemPromptSection(session, tier0ForSys);
+    // Mark sections as injected so formatContextMessage() skips them (prevents duplication)
+    if (systemPromptSection) {
+      if (systemPromptSection.includes("GRAPH PILLARS")) session.injectedSections.add("ikong");
+      if (systemPromptSection.includes("CORE DIRECTIVES")) session.injectedSections.add("tier0");
+    }
   } catch { /* non-critical — tier0 will still appear in user message */ }
   // Never throw — return raw messages on any failure
   try {
     const TRANSFORM_TIMEOUT_MS = 10_000;
     const result = await Promise.race([
-      graphTransformInner(messages, session, store, embeddings, contextWindow, budgets, signal),
+      graphTransformInner(messages, session, store, embeddings, contextWindow, budgets, signal, tier0ForSys),
       new Promise<never>((_, reject) =>
         setTimeout(() => reject(new Error("graphTransformContext timed out")), TRANSFORM_TIMEOUT_MS),
       ),
@@ -885,7 +987,7 @@ export async function graphTransformContext(
     result.systemPromptSection = systemPromptSection;
     return result;
   } catch (err) {
-    console.error("graphTransformContext fatal error, returning raw messages:", err);
+    log.error("graphTransformContext fatal error, returning raw messages:", err);
     return {
       messages,
       stats: {
@@ -912,6 +1014,8 @@ async function graphTransformInner(
   contextWindow: number,
   budgets: Budgets,
   _signal?: AbortSignal,
+  /** Tier 0 entries already fetched by wrapper — avoids double DB fetch. */
+  tier0FromWrapper: CoreMemoryEntry[] = [],
 ): Promise<GraphTransformResult> {
   function makeStats(
     sent: AgentMessage[], graphNodes: number, neighborNodes: number,
@@ -940,7 +1044,7 @@ async function graphTransformInner(
   // Skip retrieval fast path — avoid DB queries entirely when model already has core memory
   // (claw-code pattern: simple_mode skips the load, not load-then-discard)
   if (skipRetrieval) {
-    const recentTurns = getRecentTurns(messages, budgets.conversation, contextWindow, session);
+    const recentTurns = getRecentTurns(messages, budgets.conversation, budgets.toolHistory, contextWindow, session);
     // If model already saw core memory, just return recent turns + compressed rules. Zero DB queries.
     if (session.injectedSections.has("tier0")) {
       return { messages: injectRulesSuffix(recentTurns, session), stats: makeStats(recentTurns, 0, 0, recentTurns.length, "passthrough") };
@@ -956,7 +1060,7 @@ async function graphTransformInner(
       tier0 = applyCoreBudget(tier0, getTier0BudgetChars(budgets));
       tier1 = applyCoreBudget(tier1, getTier1BudgetChars(budgets));
     } catch (e) {
-      console.warn("[warn] Core memory load failed:", e);
+      log.warn("Core memory load failed:", e);
     }
     if (tier0.length > 0 || tier1.length > 0) {
       const coreContext = await formatContextMessage([], store, session, "", tier0, tier1);
@@ -970,14 +1074,13 @@ async function graphTransformInner(
   let tier0: CoreMemoryEntry[] = [];
   let tier1: CoreMemoryEntry[] = [];
   try {
-    [tier0, tier1] = await Promise.all([
-      store.getAllCoreMemory(0),
-      store.getAllCoreMemory(1),
-    ]);
-    tier0 = applyCoreBudget(tier0, getTier0BudgetChars(budgets));
-    tier1 = applyCoreBudget(tier1, getTier1BudgetChars(budgets));
+    // Tier 0 already fetched by wrapper (avoids double DB query)
+    tier0 = tier0FromWrapper.length > 0
+      ? tier0FromWrapper
+      : applyCoreBudget(await store.getAllCoreMemory(0), getTier0BudgetChars(budgets));
+    tier1 = applyCoreBudget(await store.getAllCoreMemory(1), getTier1BudgetChars(budgets));
   } catch (e) {
-    console.warn("[warn] Core memory load failed:", e);
+    swallow.warn("graph-context:coreMemoryLoad", e);
   }
   // Graceful degradation
@@ -985,7 +1088,7 @@ async function graphTransformInner(
   const surrealUp = store.isAvailable();
   if (!embeddingsUp || !surrealUp) {
-    const recentTurns = getRecentTurns(messages, budgets.conversation, contextWindow, session);
+    const recentTurns = getRecentTurns(messages, budgets.conversation, budgets.toolHistory, contextWindow, session);
     if (tier0.length > 0 || tier1.length > 0) {
       const coreContext = await formatContextMessage([], store, session, "", tier0, tier1);
       const result = [coreContext, ...recentTurns];
@@ -1001,7 +1104,7 @@ async function graphTransformInner(
   const currentIntent = config?.intent ?? "unknown";
   const baseLimits = config?.vectorSearchLimits ?? {
-    turn: 25, identity: 10, concept: 20, memory: 20, artifact: 10,
+    turn: 25, identity: 10, concept: 35, memory: 20, artifact: 10,
   };
   // Scale search limits with context window — larger windows can use more results
   const cwScale = Math.max(0.5, Math.min(2.0, contextWindow / 200_000));
@@ -1016,7 +1119,7 @@ async function graphTransformInner(
   let tokenBudget = Math.min(config?.tokenBudget ?? 6000, budgets.retrieval);
   try {
-    const queryVec = await buildContextualQueryVec(queryText, messages, embeddings);
+    const queryVec = await buildContextualQueryVec(queryText, messages, embeddings, session);
     session.lastQueryVec = queryVec; // Stash for redundant recall detection
     // Prefetch cache check
@@ -1042,15 +1145,22 @@ async function graphTransformInner(
         const reflCtx = cached.reflections.length > 0 ? formatReflectionContext(cached.reflections) : "";
         const injectedContext = await formatContextMessage(contextNodes, store, session, skillCtx + reflCtx, tier0, tier1);
-        const recentTurns = getRecentTurns(messages, budgets.conversation, contextWindow, session);
+        const recentTurns = getRecentTurns(messages, budgets.conversation, budgets.toolHistory, contextWindow, session);
         const result = [injectedContext, ...recentTurns];
         return { messages: injectRulesSuffix(result, session), stats: makeStats(result, contextNodes.length, 0, recentTurns.length, "graph", true) };
       }
     }
-    // Vector search (cache miss path)
+    // Vector search + tag-boosted retrieval (cache miss path, run in parallel)
     recordPrefetchMiss();
-    const results = await store.vectorSearch(queryVec, session.sessionId, vectorSearchLimits, isACANActive());
+    const [vectorResults, tagResults] = await Promise.all([
+      store.vectorSearch(queryVec, session.sessionId, vectorSearchLimits, isACANActive()),
+      store.tagBoostedConcepts(queryText, queryVec, 10).catch(e => { swallow.warn("graph-context:tagBoost", e); return [] as VectorSearchResult[]; }),
+    ]);
+    // Merge: dedupe tag results against vector results, then combine
+    const vectorIds = new Set(vectorResults.map(r => r.id));
+    const uniqueTagResults = tagResults.filter(r => !vectorIds.has(r.id));
+    const results = [...vectorResults, ...uniqueTagResults];
     // Graph neighbor expansion
     const topIds = results
@@ -1061,28 +1171,21 @@ async function graphTransformInner(
     const DEEP_INTENTS = new Set(["code-debug", "deep-explore", "multi-step", "reference-prior"]);
     const graphHops = DEEP_INTENTS.has(currentIntent) ? 2 : 1;
+    // Graph expand + causal traversal run in parallel (both depend only on topIds)
     let neighborIds = new Set<string>();
     let neighborResults: VectorSearchResult[] = [];
-    if (topIds.length > 0) {
-      try {
-        neighborResults = await store.graphExpand(topIds, queryVec, graphHops);
-        neighborIds = new Set(neighborResults.map((n) => n.id));
-        const existingIds = new Set(results.map((r) => r.id));
-        neighborResults = neighborResults.filter((n) => !existingIds.has(n.id));
-      } catch (e) {
-        swallow.error("graph-context:graphExpand", e);
-      }
-    }
-    // Causal chain traversal
     let causalResults: VectorSearchResult[] = [];
-    if (topIds.length > 0 && queryVec) {
-      try {
-        const causal = await queryCausalContext(topIds, queryVec, 2, 0.4, store);
-        const existingIds = new Set([...results.map((r) => r.id), ...neighborResults.map((r) => r.id)]);
-        causalResults = causal.filter((c) => !existingIds.has(c.id));
-        for (const c of causalResults) { neighborIds.add(c.id); }
-      } catch (e) { swallow("graph-context:causal", e); }
+    if (topIds.length > 0) {
+      const existingIds = new Set(results.map((r) => r.id));
+      const [expandResult, causalResult] = await Promise.all([
+        store.graphExpand(topIds, queryVec, graphHops).catch(e => { swallow.error("graph-context:graphExpand", e); return [] as VectorSearchResult[]; }),
+        queryVec ? queryCausalContext(topIds, queryVec, 2, 0.4, store).catch(e => { swallow("graph-context:causal", e); return [] as VectorSearchResult[]; }) : Promise.resolve([] as VectorSearchResult[]),
+      ]);
+      neighborResults = expandResult.filter((n) => !existingIds.has(n.id));
+      neighborIds = new Set(neighborResults.map((n) => n.id));
+      const allExisting = new Set([...existingIds, ...neighborResults.map((r) => r.id)]);
+      causalResults = causalResult.filter((c) => !allExisting.has(c.id));
+      for (const c of causalResults) { neighborIds.add(c.id); }
     }
     // Combine, filter, score
@@ -1099,7 +1202,7 @@ async function graphTransformInner(
     contextNodes = await ensureRecentTurns(contextNodes, session.sessionId, store);
     if (contextNodes.length === 0) {
-      const result = getRecentTurns(messages, budgets.conversation, contextWindow, session);
+      const result = getRecentTurns(messages, budgets.conversation, budgets.toolHistory, contextWindow, session);
       return { messages: injectRulesSuffix(result, session), stats: makeStats(result, 0, 0, result.length, "graph") };
     }
@@ -1131,7 +1234,7 @@ async function graphTransformInner(
     } catch (e) { swallow("graph-context:reflections", e); }
     const injectedContext = await formatContextMessage(contextNodes, store, session, skillContext + reflectionContext, tier0, tier1);
-    const recentTurns = getRecentTurns(messages, budgets.conversation, contextWindow, session);
+    const recentTurns = getRecentTurns(messages, budgets.conversation, budgets.toolHistory, contextWindow, session);
     const result = [injectedContext, ...recentTurns];
     return {
       messages: injectRulesSuffix(result, session),
@@ -1143,8 +1246,8 @@ async function graphTransformInner(
       ),
     };
   } catch (err) {
-    console.error("Graph context error, falling back:", err);
-    const result = getRecentTurns(messages, budgets.conversation, contextWindow, session);
+    log.error("Graph context error, falling back:", err);
+    const result = getRecentTurns(messages, budgets.conversation, budgets.toolHistory, contextWindow, session);
     return { messages: injectRulesSuffix(result, session), stats: makeStats(result, 0, 0, result.length, "recency-only") };
   }
 }