npm - kongbrain - Versions diffs - 0.3.16 → 0.4.0 - Mend

kongbrain 0.3.16 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

package/SKILL.md +1 -1
package/package.json +1 -1
package/src/context-engine.ts +128 -4
package/src/graph-context.ts +220 -69
package/src/hooks/before-tool-call.ts +48 -1
package/src/hooks/llm-output.ts +28 -8
package/src/hooks/subagent-lifecycle.ts +142 -0
package/src/index.ts +3 -0
package/src/state.ts +18 -0
package/src/tools/core-memory.ts +9 -1
package/src/tools/recall.ts +2 -2

package/SKILL.md CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 name: kongbrain
 description: Graph-backed persistent memory engine for OpenClaw. Replaces the default context window with SurrealDB + vector embeddings that learn across sessions.
-version: 0.3.16
+version: 0.4.0
 homepage: https://github.com/42U/kongbrain
 metadata:
   openclaw:

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "kongbrain",
-  "version": "0.3.16",
+  "version": "0.4.0",
   "description": "Graph-backed persistent memory engine for OpenClaw. Replaces the default context window with SurrealDB + vector embeddings that learn across sessions.",
   "type": "module",
   "license": "MIT",

package/src/context-engine.ts CHANGED Viewed

@@ -48,6 +48,7 @@ import { runDeferredCleanup } from "./deferred-cleanup.js";
 import { extractSkill } from "./skills.js";
 import { generateReflection } from "./reflection.js";
 import { graduateCausalToSkills } from "./skills.js";
+import { attemptGraduation, evolveSoul, checkStageTransition } from "./soul.js";
 import { swallow } from "./errors.js";
 export class KongBrainContextEngine implements ContextEngine {
@@ -152,7 +153,7 @@ export class KongBrainContextEngine implements ContextEngine {
     const contextWindow = params.tokenBudget ?? 200000;
-    const { messages, stats } = await graphTransformContext({
+    const { messages, stats, systemPromptSection } = await graphTransformContext({
       messages: params.messages,
       session,
       store,
@@ -160,9 +161,24 @@ export class KongBrainContextEngine implements ContextEngine {
       contextWindow,
     });
+    // Stash retrieval summary for planning gate (claw-code pattern: pre-compute and show)
+    session.lastRetrievalSummary = stats.graphNodes > 0
+      ? `${stats.graphNodes} context items + ${stats.neighborNodes} neighbors injected (${stats.mode} mode)`
+      : "no graph context retrieved this turn";
     // Build system prompt additions
     const additions: string[] = [];
+    // Static content for API prefix caching (claw-code: prompt.rs static/dynamic split)
+    if (systemPromptSection) additions.push(systemPromptSection);
+    // Compaction summary (claw-code: compact.rs structured signals — inject once after compaction)
+    const compactionSummary = (session as any)._compactionSummary as string | undefined;
+    if (compactionSummary) {
+      additions.push("[POST-COMPACTION CONTEXT]\n" + compactionSummary);
+      delete (session as any)._compactionSummary;
+    }
     // Wakeup briefing (synthesized at session start, may still be in-flight)
     const wakeupPromise = (session as any)._wakeupPromise as Promise<string | null> | undefined;
     if (wakeupPromise) {
@@ -318,11 +334,70 @@ export class KongBrainContextEngine implements ContextEngine {
     tokenBudget?: number;
     force?: boolean;
   }): Promise<CompactResult> {
-    // Graph retrieval IS the compaction — ownsCompaction: true
+    // Graph retrieval IS the compaction — ownsCompaction: true.
+    // But we extract structured signals so the model doesn't lose context
+    // about pending work and key files after old messages are dropped.
+    // (claw-code pattern: compact.rs extracts pending work, key files, continuation directive)
+    const sessionKey = params.sessionKey ?? params.sessionId;
+    const session = this.state.getSession(sessionKey);
+    if (session) {
+      session.injectedSections.clear();
+    }
+    // Extract structured compaction signals from stored turns
+    let summary: string | undefined;
+    try {
+      const { store } = this.state;
+      if (store.isAvailable()) {
+        const turns = await store.getSessionTurnsRich(params.sessionId, 30);
+        if (turns.length > 0) {
+          const fullText = turns.map(t => t.text).join("\n");
+          // Pending work detection (claw-code: compact.rs:235-254)
+          const pendingRe = /\b(todo|next|pending|follow up|remaining|unfinished|still need)\b[^.\n]{0,100}/gi;
+          const pendingMatches = [...fullText.matchAll(pendingRe)]
+            .map(m => m[0].trim().slice(0, 160))
+            .slice(0, 5);
+          // Key file extraction (claw-code: compact.rs:256-269)
+          const filePaths = [...new Set(
+            (fullText.match(/[\w\-/.]+\.\w{1,5}/g) ?? [])
+              .filter(p => /\.(ts|js|py|rs|go|md|json|yaml|toml|tsx|jsx)$/.test(p))
+          )].slice(0, 10);
+          // Tool names used (claw-code: compact.rs:127-137)
+          const toolNames = [...new Set(
+            turns.filter(t => t.tool_name).map(t => t.tool_name!)
+          )];
+          // Current work inference (claw-code: compact.rs:272-279)
+          const lastText = turns.filter(t => t.text.length > 10).at(-1)?.text.slice(0, 200) ?? "";
+          const parts: string[] = [];
+          if (pendingMatches.length > 0) parts.push(`PENDING: ${pendingMatches.join("; ")}`);
+          if (filePaths.length > 0) parts.push(`FILES: ${filePaths.join(", ")}`);
+          if (toolNames.length > 0) parts.push(`TOOLS USED: ${toolNames.join(", ")}`);
+          if (lastText) parts.push(`LAST: ${lastText}`);
+          parts.push("Resume directly — do not recap what was happening.");
+          if (parts.length > 1) {
+            summary = parts.join("\n");
+            // Stash for next assemble() to inject
+            if (session) {
+              (session as any)._compactionSummary = summary;
+            }
+          }
+        }
+      }
+    } catch { /* non-critical */ }
     return {
       ok: true,
-      compacted: false,
-      reason: "Graph retrieval handles context selection; no LLM-based compaction needed.",
+      compacted: !!summary,
+      reason: summary
+        ? "Extracted structured signals for continuation."
+        : "Graph retrieval handles context selection; no LLM-based compaction needed.",
+      result: summary ? { summary, tokensBefore: 0 } : undefined,
     };
   }
@@ -494,6 +569,55 @@ export class KongBrainContextEngine implements ContextEngine {
         })().catch(e => swallow.warn("midCleanup:handoff", e)),
       );
+      // Soul graduation + stage transition — run mid-session so marathon
+      // sessions don't miss milestones that would normally fire at session_end
+      cleanupOps.push(
+        (async () => {
+          const gradResult = await attemptGraduation(store, this.state.complete);
+          if (gradResult?.graduated && gradResult.soul) {
+            if (gradResult.report.stage === "ready") {
+              // New graduation — persist event for celebration
+              await store.queryExec(
+                `CREATE graduation_event CONTENT $data`,
+                {
+                  data: {
+                    session_id: session.sessionId,
+                    acknowledged: false,
+                    quality_score: gradResult.report.qualityScore,
+                    volume_score: gradResult.report.volumeScore,
+                    stage: gradResult.report.stage,
+                    created_at: new Date().toISOString(),
+                  },
+                },
+              );
+              if (this.state.enqueueSystemEvent) {
+                this.state.enqueueSystemEvent(
+                  "[GRADUATION] KongBrain has achieved soul graduation! " +
+                  "The agent will share this milestone when ready.",
+                  { sessionKey: session.sessionKey },
+                );
+              }
+            } else {
+              // Pre-existing soul — check for evolution
+              await evolveSoul(store, this.state.complete);
+            }
+          }
+        })().catch(e => swallow.warn("midCleanup:soulGraduation", e)),
+      );
+      cleanupOps.push(
+        (async () => {
+          const transition = await checkStageTransition(store);
+          if (transition.transitioned && this.state.enqueueSystemEvent) {
+            this.state.enqueueSystemEvent(
+              `[MATURITY] Stage transition: ${transition.previousStage ?? "nascent"} → ${transition.currentStage}. ` +
+              `Volume: ${transition.report.met.length}/7 | Quality: ${transition.report.qualityScore.toFixed(2)}`,
+              { sessionKey: session.sessionKey },
+            );
+          }
+        })().catch(e => swallow.warn("midCleanup:stageTransition", e)),
+      );
       // Don't await — let cleanup run in background
       Promise.allSettled(cleanupOps).catch(() => {});
     }

package/src/graph-context.ts CHANGED Viewed

@@ -62,6 +62,7 @@ const CONVERSATION_SHARE = 0.50;
 const RETRIEVAL_SHARE = 0.30;
 const CORE_MEMORY_SHARE = 0.15;
 const CORE_MEMORY_TTL = 300_000;
+const MAX_ITEM_CHARS = 1200; // ~350 tokens per item cap (claw-code: MAX_INSTRUCTION_FILE_CHARS)
 const MIN_RELEVANCE_SCORE = 0.35;
 const MIN_COSINE = 0.25;
@@ -149,25 +150,37 @@ function extractLastUserText(messages: AgentMessage[]): string | null {
   return null;
 }
+/** Estimate char count for a single content block (claw-code: per-block-type estimation). */
+function blockCharLen(c: any): number {
+  if (c.type === "text") return c.text.length;
+  if (c.type === "thinking") return c.thinking.length;
+  if (c.type === "toolCall") {
+    // Count tool name + serialized args (claw-code: compact.rs:326-338)
+    return (c.name?.length ?? 0) + (c.args ? JSON.stringify(c.args).length : 0);
+  }
+  if (c.type === "toolResult" && Array.isArray(c.content)) {
+    let len = 0;
+    for (const rc of c.content) {
+      if (rc.type === "text") len += rc.text.length;
+      else len += 100;
+    }
+    return len;
+  }
+  return 100; // image, etc.
+}
 function estimateTokens(messages: AgentMessage[]): number {
   let chars = 0;
   for (const msg of messages) {
-    for (const c of msgContentBlocks(msg)) {
-      if (c.type === "text") chars += c.text.length;
-      else if (c.type === "thinking") chars += c.thinking.length;
-      else chars += 100;
-    }
+    for (const c of msgContentBlocks(msg)) chars += blockCharLen(c);
+    chars += 4; // per-message structural overhead
   }
   return Math.ceil(chars / CHARS_PER_TOKEN);
 }
 function msgCharLen(msg: AgentMessage): number {
   let len = 0;
-  for (const c of msgContentBlocks(msg)) {
-    if (c.type === "text") len += c.text.length;
-    else if (c.type === "thinking") len += c.thinking.length;
-    else len += 100;
-  }
+  for (const c of msgContentBlocks(msg)) len += blockCharLen(c);
   return len;
 }
@@ -199,7 +212,7 @@ function accessBoost(accessCount: number | undefined): number {
   return Math.log1p(accessCount ?? 0);
 }
-function cosineSimilarity(a: number[], b: number[]): number {
+export function cosineSimilarity(a: number[], b: number[]): number {
   let dot = 0, magA = 0, magB = 0;
   for (let i = 0; i < a.length; i++) {
     dot += a[i] * b[i];
@@ -217,6 +230,19 @@ function buildRulesSuffix(session: SessionState): string {
     ? "unlimited" : String(Math.max(0, session.toolLimit - session.toolCallCount));
   const urgency = session.toolLimit !== Infinity && (session.toolLimit - session.toolCallCount) <= 3
     ? "\n⚠ WRAP UP or check in with user." : "";
+  // After first exposure, send only the budget line (claw-code: don't re-send static content)
+  if (session.injectedSections.has("rules_full")) {
+    return (
+      "\n<rules_reminder>" +
+      `\nBudget: ${session.toolCallCount} used, ${remaining} remaining.${urgency}` +
+      "\nCombine steps. If context already answers it, zero calls." +
+      "\n</rules_reminder>"
+    );
+  }
+  // First time — full examples
+  session.injectedSections.add("rules_full");
   return (
     "\n<rules_reminder>" +
     `\nBudget: ${session.toolCallCount} used, ${remaining} remaining.${urgency}` +
@@ -430,7 +456,7 @@ function takeWithConstraints(ranked: ScoredResult[], budgetTokens: number, maxIt
   for (const r of ranked) {
     if (selected.length >= maxItems) break;
     if ((r.finalScore ?? 0) < MIN_RELEVANCE_SCORE && selected.length > 0) break;
-    const len = r.text?.length ?? 0;
+    const len = Math.min(r.text?.length ?? 0, MAX_ITEM_CHARS); // Cap per-item size for budget accounting
     if (used + len > budgetChars && selected.length > 0) break;
     selected.push(r);
     used += len;
@@ -447,13 +473,19 @@ function getTier1BudgetChars(budgets: Budgets): number {
   return Math.round(budgets.core * 0.45 * CHARS_PER_TOKEN);
 }
+const MAX_CORE_MEMORY_CHARS = 800; // Per-item cap (claw-code: MAX_INSTRUCTION_FILE_CHARS)
 function applyCoreBudget(entries: CoreMemoryEntry[], budgetChars: number): CoreMemoryEntry[] {
   let used = 0;
   const result: CoreMemoryEntry[] = [];
   for (const e of entries) {
-    const len = e.text.length + 6;
+    // Cap individual entries so one large directive doesn't starve others
+    const text = e.text.length > MAX_CORE_MEMORY_CHARS
+      ? e.text.slice(0, MAX_CORE_MEMORY_CHARS) + "..."
+      : e.text;
+    const len = text.length + 6;
     if (used + len > budgetChars) continue;
-    result.push(e);
+    result.push(text !== e.text ? { ...e, text } : e);
     used += len;
   }
   return result;
@@ -473,6 +505,40 @@ function formatTierSection(entries: CoreMemoryEntry[], label: string): string {
   return `${label}:\n${lines.join("\n")}`;
 }
+/**
+ * Build static system prompt section for API prefix caching.
+ * Content here goes into systemPromptAddition where it benefits from
+ * cache-read rates (10% cost) on subsequent API calls in the agentic loop.
+ * (claw-code pattern: __SYSTEM_PROMPT_DYNAMIC_BOUNDARY__ — prompt.rs:37-140)
+ */
+function buildSystemPromptSection(session: SessionState, tier0Entries: CoreMemoryEntry[]): string | undefined {
+  const parts: string[] = [];
+  // IKONG architecture description (static, ~120 tokens)
+  const pillarLines: string[] = [];
+  if (session.agentId) pillarLines.push(`Agent: ${session.agentId}`);
+  if (session.projectId) pillarLines.push(`Project: ${session.projectId}`);
+  if (session.taskId) pillarLines.push(`Task: ${session.taskId}`);
+  if (pillarLines.length > 0) {
+    parts.push(
+      "GRAPH PILLARS (your structural context):\n" +
+      `  ${pillarLines.join(" | ")}\n` +
+      "  IKONG cognitive architecture:\n" +
+      "    I(ntelligence): intent classification → adaptive orchestration per turn\n" +
+      "    K(nowledge): memory graph, concepts, skills, reflections, identity chunks\n" +
+      "    O(peration): tool execution, skill procedures, causal chain tracking\n" +
+      "    N(etwork): graph traversal, cross-pillar edges, neighbor expansion\n" +
+      "    G(raph): SurrealDB persistence, vector search, BGE-M3 embeddings",
+    );
+  }
+  // Tier 0 core directives (semi-static, changes rarely)
+  const t0Section = formatTierSection(tier0Entries, "CORE DIRECTIVES (always loaded, never evicted)");
+  if (t0Section) parts.push(t0Section);
+  return parts.length > 0 ? parts.join("\n\n") : undefined;
+}
 // ── Guaranteed recent turns from previous sessions ─────────────────────────────
 async function ensureRecentTurns(
@@ -532,27 +598,42 @@ async function formatContextMessage(
   const sections: string[] = [];
   // Pillar context — structural awareness of who/what/where
-  const pillarLines: string[] = [];
-  if (session.agentId) pillarLines.push(`Agent: ${session.agentId}`);
-  if (session.projectId) pillarLines.push(`Project: ${session.projectId}`);
-  if (session.taskId) pillarLines.push(`Task: ${session.taskId}`);
-  if (pillarLines.length > 0) {
-    sections.push(
-      "GRAPH PILLARS (your structural context):\n" +
-      `  ${pillarLines.join(" | ")}\n` +
-      "  IKONG cognitive architecture:\n" +
-      "    I(ntelligence): intent classification → adaptive orchestration per turn\n" +
-      "    K(nowledge): memory graph, concepts, skills, reflections, identity chunks\n" +
-      "    O(peration): tool execution, skill procedures, causal chain tracking\n" +
-      "    N(etwork): graph traversal, cross-pillar edges, neighbor expansion\n" +
-      "    G(raph): SurrealDB persistence, vector search, BGE-M3 embeddings",
-    );
+  // Skip if model already has it in the conversation window (claw-code static section dedup)
+  if (!session.injectedSections.has("ikong")) {
+    const pillarLines: string[] = [];
+    if (session.agentId) pillarLines.push(`Agent: ${session.agentId}`);
+    if (session.projectId) pillarLines.push(`Project: ${session.projectId}`);
+    if (session.taskId) pillarLines.push(`Task: ${session.taskId}`);
+    if (pillarLines.length > 0) {
+      sections.push(
+        "GRAPH PILLARS (your structural context):\n" +
+        `  ${pillarLines.join(" | ")}\n` +
+        "  IKONG cognitive architecture:\n" +
+        "    I(ntelligence): intent classification → adaptive orchestration per turn\n" +
+        "    K(nowledge): memory graph, concepts, skills, reflections, identity chunks\n" +
+        "    O(peration): tool execution, skill procedures, causal chain tracking\n" +
+        "    N(etwork): graph traversal, cross-pillar edges, neighbor expansion\n" +
+        "    G(raph): SurrealDB persistence, vector search, BGE-M3 embeddings",
+      );
+      session.injectedSections.add("ikong");
+    }
   }
-  const t0Section = formatTierSection(tier0Entries, "CORE DIRECTIVES (always loaded, never evicted)");
-  if (t0Section) sections.push(t0Section);
-  const t1Section = formatTierSection(tier1Entries, "SESSION CONTEXT (pinned for this session)");
-  if (t1Section) sections.push(t1Section);
+  // Core directives — skip if model already has them
+  if (!session.injectedSections.has("tier0")) {
+    const t0Section = formatTierSection(tier0Entries, "CORE DIRECTIVES (always loaded, never evicted)");
+    if (t0Section) {
+      sections.push(t0Section);
+      session.injectedSections.add("tier0");
+    }
+  }
+  if (!session.injectedSections.has("tier1")) {
+    const t1Section = formatTierSection(tier1Entries, "SESSION CONTEXT (pinned for this session)");
+    if (t1Section) {
+      sections.push(t1Section);
+      session.injectedSections.add("tier1");
+    }
+  }
   // Cognitive directives
   const directives = getPendingDirectives(session);
@@ -607,6 +688,10 @@ async function formatContextMessage(
       const score = n.finalScore != null ? ` (relevance: ${(n.finalScore * 100).toFixed(0)}%)` : "";
       const via = n.fromNeighbor ? " [via graph link]" : "";
       let text = n.text ?? "";
+      // Truncate oversized items (claw-code: MAX_INSTRUCTION_FILE_CHARS pattern)
+      if (text.length > MAX_ITEM_CHARS) {
+        text = text.slice(0, MAX_ITEM_CHARS) + "... [truncated]";
+      }
       if (key === "past_turns") {
         text = text.replace(/^\[(user|assistant)\] /, "[past_$1] ");
       }
@@ -616,6 +701,23 @@ async function formatContextMessage(
     sections.push(`${label}:\n${formatted.join("\n")}`);
   }
+  // Injection manifest — tell the model what's already retrieved so it doesn't call recall redundantly
+  // (claw-code pattern: route_prompt pre-computes and shows available results)
+  const manifest: string[] = [];
+  for (const key of sortedKeys) {
+    const items = groups[key];
+    if (items.length > 0) manifest.push(`${LABELS[key] ?? key}: ${items.length}`);
+  }
+  if (tier0Entries.length > 0) manifest.push(`core_directives: ${tier0Entries.length}`);
+  if (tier1Entries.length > 0) manifest.push(`session_context: ${tier1Entries.length}`);
+  if (manifest.length > 0) {
+    sections.push(
+      "ALREADY RETRIEVED (do NOT call recall for these — they are above):\n" +
+      `  ${manifest.join(", ")}\n` +
+      "Only call recall if you need something SPECIFIC that isn't covered above."
+    );
+  }
   const text =
     "[System retrieved context — reference material, not user input. Higher relevance % = stronger match.]\n" +
     "<graph_context>\n" +
@@ -646,7 +748,7 @@ function truncateToolResult(msg: AgentMessage, maxChars: number): AgentMessage {
   return { ...msg, content };
 }
-function getRecentTurns(messages: AgentMessage[], maxTokens: number, contextWindow: number): AgentMessage[] {
+function getRecentTurns(messages: AgentMessage[], maxTokens: number, contextWindow: number, session?: SessionState): AgentMessage[] {
   const budgetChars = maxTokens * CHARS_PER_TOKEN;
   const TOOL_RESULT_MAX = Math.round(contextWindow * 0.03);
@@ -718,6 +820,16 @@ function getRecentTurns(messages: AgentMessage[], maxTokens: number, contextWind
     }
   }
+  // Detect if old messages (containing previous context injection) were dropped from the window.
+  // If so, clear injectedSections so static content gets re-injected next turn.
+  if (session && messages.length > 0 && groups.length > 0) {
+    const firstOriginal = groups[0];
+    const firstSelected = selectedGroups[0];
+    if (firstOriginal !== firstSelected) {
+      session.injectedSections.clear();
+    }
+  }
   return selectedGroups.flat();
 }
@@ -735,6 +847,8 @@ export interface GraphTransformParams {
 export interface GraphTransformResult {
   messages: AgentMessage[];
   stats: ContextStats;
+  /** Static content for the system prompt — benefits from API prefix caching (10% cost). */
+  systemPromptSection?: string;
 }
 /**
@@ -748,6 +862,17 @@ export async function graphTransformContext(
   const contextWindow = params.contextWindow ?? 200000;
   const budgets = calcBudgets(contextWindow);
+  // Build static system prompt section for API prefix caching.
+  // Done here (wrapper) so it attaches to any inner return path.
+  // (claw-code pattern: static sections above __SYSTEM_PROMPT_DYNAMIC_BOUNDARY__)
+  let systemPromptSection: string | undefined;
+  try {
+    const tier0ForSys = store.isAvailable()
+      ? applyCoreBudget(await store.getAllCoreMemory(0), getTier0BudgetChars(budgets))
+      : [];
+    systemPromptSection = buildSystemPromptSection(session, tier0ForSys);
+  } catch { /* non-critical — tier0 will still appear in user message */ }
   // Never throw — return raw messages on any failure
   try {
     const TRANSFORM_TIMEOUT_MS = 10_000;
@@ -757,6 +882,7 @@ export async function graphTransformContext(
         setTimeout(() => reject(new Error("graphTransformContext timed out")), TRANSFORM_TIMEOUT_MS),
       ),
     ]);
+    result.systemPromptSection = systemPromptSection;
     return result;
   } catch (err) {
     console.error("graphTransformContext fatal error, returning raw messages:", err);
@@ -773,6 +899,7 @@ export async function graphTransformContext(
         mode: "passthrough",
         prefetchHit: false,
       },
+      systemPromptSection,
     };
   }
 }
@@ -786,20 +913,6 @@ async function graphTransformInner(
   budgets: Budgets,
   _signal?: AbortSignal,
 ): Promise<GraphTransformResult> {
-  // Load tiered core memory
-  let tier0: CoreMemoryEntry[] = [];
-  let tier1: CoreMemoryEntry[] = [];
-  try {
-    [tier0, tier1] = await Promise.all([
-      store.getAllCoreMemory(0),
-      store.getAllCoreMemory(1),
-    ]);
-    tier0 = applyCoreBudget(tier0, getTier0BudgetChars(budgets));
-    tier1 = applyCoreBudget(tier1, getTier1BudgetChars(budgets));
-  } catch (e) {
-    console.warn("[warn] Core memory load failed:", e);
-  }
   function makeStats(
     sent: AgentMessage[], graphNodes: number, neighborNodes: number,
     recentTurnCount: number, mode: ContextStats["mode"], prefetchHit = false,
@@ -814,12 +927,65 @@ async function graphTransformInner(
     };
   }
+  function makeResult(
+    msgs: AgentMessage[], stats: ContextStats, sysSection?: string,
+  ): GraphTransformResult {
+    return { messages: msgs, stats, systemPromptSection: sysSection };
+  }
+  // Derive retrieval config from session's current adaptive config
+  const config = session.currentConfig;
+  const skipRetrieval = config?.skipRetrieval ?? false;
+  // Skip retrieval fast path — avoid DB queries entirely when model already has core memory
+  // (claw-code pattern: simple_mode skips the load, not load-then-discard)
+  if (skipRetrieval) {
+    const recentTurns = getRecentTurns(messages, budgets.conversation, contextWindow, session);
+    // If model already saw core memory, just return recent turns + compressed rules. Zero DB queries.
+    if (session.injectedSections.has("tier0")) {
+      return { messages: injectRulesSuffix(recentTurns, session), stats: makeStats(recentTurns, 0, 0, recentTurns.length, "passthrough") };
+    }
+    // First turn or after compaction cleared injectedSections — load and inject
+    let tier0: CoreMemoryEntry[] = [];
+    let tier1: CoreMemoryEntry[] = [];
+    try {
+      [tier0, tier1] = await Promise.all([
+        store.getAllCoreMemory(0),
+        store.getAllCoreMemory(1),
+      ]);
+      tier0 = applyCoreBudget(tier0, getTier0BudgetChars(budgets));
+      tier1 = applyCoreBudget(tier1, getTier1BudgetChars(budgets));
+    } catch (e) {
+      console.warn("[warn] Core memory load failed:", e);
+    }
+    if (tier0.length > 0 || tier1.length > 0) {
+      const coreContext = await formatContextMessage([], store, session, "", tier0, tier1);
+      const result = [coreContext, ...recentTurns];
+      return { messages: injectRulesSuffix(result, session), stats: makeStats(result, 0, 0, recentTurns.length, "passthrough") };
+    }
+    return { messages: injectRulesSuffix(recentTurns, session), stats: makeStats(recentTurns, 0, 0, recentTurns.length, "passthrough") };
+  }
+  // Load tiered core memory (full retrieval path)
+  let tier0: CoreMemoryEntry[] = [];
+  let tier1: CoreMemoryEntry[] = [];
+  try {
+    [tier0, tier1] = await Promise.all([
+      store.getAllCoreMemory(0),
+      store.getAllCoreMemory(1),
+    ]);
+    tier0 = applyCoreBudget(tier0, getTier0BudgetChars(budgets));
+    tier1 = applyCoreBudget(tier1, getTier1BudgetChars(budgets));
+  } catch (e) {
+    console.warn("[warn] Core memory load failed:", e);
+  }
   // Graceful degradation
   const embeddingsUp = embeddings.isAvailable();
   const surrealUp = store.isAvailable();
   if (!embeddingsUp || !surrealUp) {
-    const recentTurns = getRecentTurns(messages, budgets.conversation, contextWindow);
+    const recentTurns = getRecentTurns(messages, budgets.conversation, contextWindow, session);
     if (tier0.length > 0 || tier1.length > 0) {
       const coreContext = await formatContextMessage([], store, session, "", tier0, tier1);
       const result = [coreContext, ...recentTurns];
@@ -833,9 +999,6 @@ async function graphTransformInner(
     return { messages: injectRulesSuffix(messages, session), stats: makeStats(messages, 0, 0, messages.length, "passthrough") };
   }
-  // Derive retrieval config from session's current adaptive config
-  const config = session.currentConfig;
-  const skipRetrieval = config?.skipRetrieval ?? false;
   const currentIntent = config?.intent ?? "unknown";
   const baseLimits = config?.vectorSearchLimits ?? {
     turn: 25, identity: 10, concept: 20, memory: 20, artifact: 10,
@@ -852,21 +1015,9 @@ async function graphTransformInner(
   };
   let tokenBudget = Math.min(config?.tokenBudget ?? 6000, budgets.retrieval);
-  // Pressure-based adaptive scaling
-  // (In Phase 2, _usedTokens will be tracked per-session via hooks)
-  if (skipRetrieval) {
-    const recentTurns = getRecentTurns(messages, budgets.conversation, contextWindow);
-    if (tier0.length > 0 || tier1.length > 0) {
-      const coreContext = await formatContextMessage([], store, session, "", tier0, tier1);
-      const result = [coreContext, ...recentTurns];
-      return { messages: injectRulesSuffix(result, session), stats: makeStats(result, 0, 0, recentTurns.length, "passthrough") };
-    }
-    return { messages: injectRulesSuffix(recentTurns, session), stats: makeStats(recentTurns, 0, 0, recentTurns.length, "passthrough") };
-  }
   try {
     const queryVec = await buildContextualQueryVec(queryText, messages, embeddings);
+    session.lastQueryVec = queryVec; // Stash for redundant recall detection
     // Prefetch cache check
     const cached = getCachedContext(queryVec);
@@ -891,7 +1042,7 @@ async function graphTransformInner(
         const reflCtx = cached.reflections.length > 0 ? formatReflectionContext(cached.reflections) : "";
         const injectedContext = await formatContextMessage(contextNodes, store, session, skillCtx + reflCtx, tier0, tier1);
-        const recentTurns = getRecentTurns(messages, budgets.conversation, contextWindow);
+        const recentTurns = getRecentTurns(messages, budgets.conversation, contextWindow, session);
         const result = [injectedContext, ...recentTurns];
         return { messages: injectRulesSuffix(result, session), stats: makeStats(result, contextNodes.length, 0, recentTurns.length, "graph", true) };
       }
@@ -948,7 +1099,7 @@ async function graphTransformInner(
     contextNodes = await ensureRecentTurns(contextNodes, session.sessionId, store);
     if (contextNodes.length === 0) {
-      const result = getRecentTurns(messages, budgets.conversation, contextWindow);
+      const result = getRecentTurns(messages, budgets.conversation, contextWindow, session);
       return { messages: injectRulesSuffix(result, session), stats: makeStats(result, 0, 0, result.length, "graph") };
     }
@@ -980,7 +1131,7 @@ async function graphTransformInner(
     } catch (e) { swallow("graph-context:reflections", e); }
     const injectedContext = await formatContextMessage(contextNodes, store, session, skillContext + reflectionContext, tier0, tier1);
-    const recentTurns = getRecentTurns(messages, budgets.conversation, contextWindow);
+    const recentTurns = getRecentTurns(messages, budgets.conversation, contextWindow, session);
     const result = [injectedContext, ...recentTurns];
     return {
       messages: injectRulesSuffix(result, session),
@@ -993,7 +1144,7 @@ async function graphTransformInner(
     };
   } catch (err) {
     console.error("Graph context error, falling back:", err);
-    const result = getRecentTurns(messages, budgets.conversation, contextWindow);
+    const result = getRecentTurns(messages, budgets.conversation, contextWindow, session);
     return { messages: injectRulesSuffix(result, session), stats: makeStats(result, 0, 0, result.length, "recency-only") };
   }
 }

package/src/hooks/before-tool-call.ts CHANGED Viewed

@@ -8,9 +8,12 @@
 import type { GlobalPluginState } from "../state.js";
 import { recordToolCall } from "../orchestrator.js";
+import { cosineSimilarity } from "../graph-context.js";
 const DEFAULT_TOOL_LIMIT = 10;
 const CLASSIFICATION_LIMITS: Record<string, number> = { LOOKUP: 3, EDIT: 4, REFACTOR: 8 };
+const API_CYCLE_CAP = 16;
+const RECALL_SIMILARITY_THRESHOLD = 0.80;
 export function createBeforeToolCallHandler(state: GlobalPluginState) {
   return async (
@@ -30,6 +33,7 @@ export function createBeforeToolCallHandler(state: GlobalPluginState) {
     session.toolCallCount++;
     session.toolCallsSinceLastText++;
+    session.apiCycleCount++;
     // Record for steering analysis
     recordToolCall(session, event.toolName);
@@ -46,6 +50,14 @@ export function createBeforeToolCallHandler(state: GlobalPluginState) {
       };
     }
+    // API cycle cap (claw-code pattern: max_iterations — conversation.rs:119)
+    if (session.apiCycleCount > API_CYCLE_CAP) {
+      return {
+        block: true,
+        blockReason: `Hard API cycle cap (${API_CYCLE_CAP}) reached. Deliver your answer now.`,
+      };
+    }
     // Tool limit
     if (session.toolCallCount > session.toolLimit) {
       return {
@@ -54,14 +66,49 @@ export function createBeforeToolCallHandler(state: GlobalPluginState) {
       };
     }
+    // Intent-based tool gating (claw-code pattern: simple_mode/MCP toggle — tools.py:62-72)
+    // On skipRetrieval turns, recall has nothing to add — context was skipped intentionally
+    if (event.toolName === "recall" && session.currentConfig?.skipRetrieval) {
+      return {
+        block: true,
+        blockReason: "Context retrieval was skipped this turn (continuation/trivial input). " +
+          "Recall would return the same results as previous turns. Continue with what you have.",
+      };
+    }
+    // Redundant recall blocker (claw-code pattern: _infer_permission_denials — runtime.py:169-174)
+    // Block recall when its query would return the same results as context retrieval
+    if (event.toolName === "recall" && session.lastQueryVec) {
+      const recallQuery = (event.params as { query?: string }).query;
+      if (recallQuery && typeof recallQuery === "string" && recallQuery.length > 5) {
+        try {
+          const recallVec = await state.embeddings.embed(recallQuery);
+          const sim = cosineSimilarity(session.lastQueryVec, recallVec);
+          if (sim > RECALL_SIMILARITY_THRESHOLD) {
+            return {
+              block: true,
+              blockReason:
+                `This recall query is ${(sim * 100).toFixed(0)}% similar to the context already retrieved this turn. ` +
+                "The results are in <graph_context> above. Read what you have. " +
+                "Only call recall with a DIFFERENT query targeting something specific not already covered.",
+            };
+          }
+        } catch { /* fail-open: allow recall if embedding fails */ }
+      }
+    }
     // Planning gate: model must output text before first tool call
     if (textLengthSoFar === 0 && toolIndex === 0) {
+      const retrievalNote = session.lastRetrievalSummary
+        ? `\nContext already injected: ${session.lastRetrievalSummary}. Read <graph_context> before calling tools.`
+        : "";
       return {
         block: true,
         blockReason:
           "PLANNING GATE — You must announce your plan before making tool calls.\n" +
           "1. Classify: LOOKUP (3 calls max), EDIT (4 max), REFACTOR (8 max)\n" +
-          "2. STATE WHAT YOU ALREADY KNOW from injected memory/context — if you have prior knowledge about these files, say so\n" +
+          "2. STATE WHAT YOU ALREADY KNOW from injected memory/context — if you have prior knowledge about these files, say so" +
+          retrievalNote + "\n" +
           "3. List each planned call and what SPECIFIC GAP it fills that memory doesn't cover\n" +
           "4. Every step still happens, but COMBINED. Edit + test in one bash call, not two.\n" +
           "If injected context already answers the question, you may need ZERO tool calls.\n" +

package/src/hooks/llm-output.ts CHANGED Viewed

@@ -33,14 +33,34 @@ export function createLlmOutputHandler(state: GlobalPluginState) {
     // Measure assistant text output (used for token estimation and planning gate)
     const textLen = event.assistantTexts.reduce((s, t) => s + t.length, 0);
-    // Extract token counts — fall back to text-length estimate when provider
-    // doesn't report usage (OpenClaw often passes 0 or undefined)
-    let inputTokens = event.usage?.input ?? 0;
-    let outputTokens = event.usage?.output ?? 0;
-    if (inputTokens + outputTokens === 0 && textLen > 0) {
-      outputTokens = Math.ceil(textLen / 4); // ~4 chars per token
+    // Extract token counts — OpenClaw's getUsageTotals() returns CUMULATIVE totals
+    // across all API calls in the session, not per-response values.
+    // Compute the delta since last call to avoid quadratic overcounting.
+    const reportedInput = event.usage?.input ?? 0;
+    const reportedOutput = event.usage?.output ?? 0;
+    const reportedCacheRead = event.usage?.cacheRead ?? 0;
+    const reportedCacheWrite = event.usage?.cacheWrite ?? 0;
+    const reportedTotal = reportedInput + reportedOutput + reportedCacheRead + reportedCacheWrite;
+    let deltaTokens: number;
+    if (reportedTotal > 0) {
+      deltaTokens = Math.max(0, reportedTotal - session.lastSeenUsageTotal);
+      session.lastSeenUsageTotal = reportedTotal;
+    } else if (textLen > 0) {
+      // No usage data — fall back to text-length estimate
+      deltaTokens = Math.ceil(textLen / 4); // ~4 chars per token
+    } else {
+      deltaTokens = 0;
     }
+    // DB stats: approximate input/output split from the delta
+    const inputTokens = reportedTotal > 0 && deltaTokens > 0
+      ? Math.round(deltaTokens * (reportedInput / reportedTotal))
+      : 0;
+    const outputTokens = reportedTotal > 0 && deltaTokens > 0
+      ? Math.round(deltaTokens * (reportedOutput / reportedTotal))
+      : (deltaTokens > 0 ? deltaTokens : Math.ceil(textLen / 4));
     // Always update session stats — turn_count must increment even without usage data
     if (session.surrealSessionId) {
       try {
@@ -55,8 +75,8 @@ export function createLlmOutputHandler(state: GlobalPluginState) {
     }
     // Accumulate for daemon batching and mid-session cleanup
-    session.newContentTokens += inputTokens + outputTokens;
-    session.cumulativeTokens += inputTokens + outputTokens;
+    session.newContentTokens += deltaTokens;
+    session.cumulativeTokens += deltaTokens;
     // Track accumulated text output for planning gate
     session.turnTextLength += textLen;

package/src/hooks/subagent-lifecycle.ts ADDED Viewed

@@ -0,0 +1,142 @@
+/**
+ * subagent_spawned / subagent_ended hooks — track spawned subagents in the graph.
+ *
+ * Creates `subagent` records and `spawned` edges (session → subagent).
+ * Updates subagent records with outcome on completion.
+ */
+import type { GlobalPluginState } from "../state.js";
+import { swallow } from "../errors.js";
+// ── Event shapes (from OpenClaw gateway) ─────────────────────────────────
+interface SubagentSpawnedEvent {
+  runId: string;
+  childSessionKey: string;
+  agentId?: string;
+  label?: string;
+  requester?: {
+    channel?: string;
+    accountId?: string;
+    to?: string;
+    threadId?: string;
+  };
+  threadRequested?: boolean;
+  mode?: string; // "run" | "session"
+}
+interface SubagentSpawnedContext {
+  runId: string;
+  childSessionKey: string;
+  requesterSessionKey?: string;
+}
+interface SubagentEndedEvent {
+  targetSessionKey: string;
+  targetKind?: string;
+  reason?: string;
+  sendFarewell?: boolean;
+  accountId?: string;
+  runId: string;
+  endedAt?: string;
+  outcome?: string;
+  error?: string;
+}
+interface SubagentEndedContext {
+  runId: string;
+  childSessionKey: string;
+  requesterSessionKey?: string;
+}
+// ── Handlers ─────────────────────────────────────────────────────────────
+export function createSubagentSpawnedHandler(state: GlobalPluginState) {
+  return async (event: SubagentSpawnedEvent, ctx: SubagentSpawnedContext) => {
+    try {
+      const store = state.store;
+      // Create the subagent record
+      const rows = await store.queryFirst<{ id: string }>(
+        `CREATE subagent CONTENT {
+          run_id: $run_id,
+          parent_session_key: $parent_key,
+          child_session_key: $child_key,
+          parent_session_id: $parent_key,
+          child_session_id: $child_key,
+          agent_id: $agent_id,
+          label: $label,
+          mode: $mode,
+          task: $label,
+          status: "running",
+          created_at: time::now()
+        } RETURN id`,
+        {
+          run_id: event.runId,
+          parent_key: ctx.requesterSessionKey ?? "unknown",
+          child_key: event.childSessionKey,
+          agent_id: event.agentId ?? "default",
+          label: event.label ?? null,
+          mode: event.mode ?? "run",
+        },
+      );
+      const subagentId = String(rows[0]?.id ?? "");
+      if (!subagentId) return;
+      // Find the parent's surreal session ID to create the spawned edge.
+      // The requesterSessionKey is the OpenClaw session key — we need to
+      // find the matching surreal session record.
+      if (ctx.requesterSessionKey) {
+        // Look up active session state first (fast path)
+        const parentSession = state.getSession(ctx.requesterSessionKey);
+        if (parentSession?.surrealSessionId) {
+          await store.relate(parentSession.surrealSessionId, "spawned", subagentId);
+        } else {
+          // Fallback: find the most recent session record that's still active
+          const sessions = await store.queryFirst<{ id: string }>(
+            `SELECT id FROM session
+             WHERE ended_at IS NONE
+             ORDER BY started_at DESC LIMIT 1`,
+          );
+          if (sessions.length > 0) {
+            await store.relate(String(sessions[0].id), "spawned", subagentId);
+          }
+        }
+      }
+    } catch (e) {
+      swallow.warn("hook:subagentSpawned", e);
+    }
+  };
+}
+export function createSubagentEndedHandler(state: GlobalPluginState) {
+  return async (event: SubagentEndedEvent, ctx: SubagentEndedContext) => {
+    try {
+      const store = state.store;
+      // Update the subagent record by run_id
+      await store.queryExec(
+        `UPDATE subagent SET
+          status = $status,
+          outcome = $outcome,
+          error = $error,
+          reason = $reason,
+          ended_at = $ended_at
+        WHERE run_id = $run_id`,
+        {
+          run_id: event.runId,
+          status: event.outcome === "success" ? "completed"
+            : event.reason === "spawn-failed" ? "error"
+            : event.outcome ?? "completed",
+          outcome: event.outcome ?? null,
+          error: event.error ?? null,
+          reason: event.reason ?? null,
+          ended_at: event.endedAt ?? new Date().toISOString(),
+        },
+      );
+    } catch (e) {
+      swallow.warn("hook:subagentEnded", e);
+    }
+  };
+}

package/src/index.ts CHANGED Viewed

@@ -20,6 +20,7 @@ import { createBeforePromptBuildHandler } from "./hooks/before-prompt-build.js";
 import { createBeforeToolCallHandler } from "./hooks/before-tool-call.js";
 import { createAfterToolCallHandler } from "./hooks/after-tool-call.js";
 import { createLlmOutputHandler } from "./hooks/llm-output.js";
+import { createSubagentSpawnedHandler, createSubagentEndedHandler } from "./hooks/subagent-lifecycle.js";
 import { startMemoryDaemon } from "./daemon-manager.js";
 import { seedIdentity } from "./identity.js";
 import { seedCognitiveBootstrap } from "./cognitive-bootstrap.js";
@@ -421,6 +422,8 @@ export default definePluginEntry({
       api.on("before_tool_call", createBeforeToolCallHandler(globalState));
       api.on("after_tool_call", createAfterToolCallHandler(globalState));
       api.on("llm_output", createLlmOutputHandler(globalState));
+      api.on("subagent_spawned", createSubagentSpawnedHandler(globalState));
+      api.on("subagent_ended", createSubagentEndedHandler(globalState));
     }
     // ── Session lifecycle (also register once) ─────────────────────────

package/src/state.ts CHANGED Viewed

@@ -62,6 +62,9 @@ export class SessionState {
   cumulativeTokens = 0;
   lastCleanupTokens = 0;
   midSessionCleanupThreshold = 25_000;
+  /** Last cumulative usage total seen from OpenClaw — used to compute per-call deltas
+   *  since getUsageTotals() returns running totals, not per-response values. */
+  lastSeenUsageTotal = 0;
   // Cleanup tracking
   cleanedUp = false;
@@ -72,6 +75,17 @@ export class SessionState {
   // Pending tool args for artifact tracking
   readonly pendingToolArgs = new Map<string, unknown>();
+  // Tool call optimization state (claw-code patterns)
+  /** Query vector from this turn's context retrieval — used to detect redundant recall calls. */
+  lastQueryVec: number[] | null = null;
+  /** Summary of what graphTransformContext injected — shown in planning gate. */
+  lastRetrievalSummary = "";
+  /** API request cycle counter — hard cap prevents runaway token spend. */
+  apiCycleCount = 0;
+  /** Tracks which static context sections the model has already seen in the conversation window.
+   *  Persists across turns (NOT cleared in resetTurn) — cleared only when messages drop from window. */
+  readonly injectedSections = new Set<string>();
   // 5-pillar IDs (populated at bootstrap)
   agentId = "";
   projectId = "";
@@ -92,6 +106,10 @@ export class SessionState {
     this.softInterrupted = false;
     this.turnStartMs = Date.now();
     this.pendingThinking.length = 0;
+    this.lastRetrievalSummary = "";
+    this.apiCycleCount = 0;
+    // NOTE: lastQueryVec and injectedSections are NOT cleared here —
+    // they persist across turns within the session.
   }
 }

package/src/tools/core-memory.ts CHANGED Viewed

@@ -46,7 +46,7 @@ export function createCoreMemoryToolDef(state: GlobalPluginState, session: Sessi
             }
             const formatted = entries.map((e, i) => {
               const sid = e.session_id ? ` session:${e.session_id}` : "";
-              return `${i + 1}. [T${e.tier}/${e.category}/p${e.priority}${sid}] ${e.id}\n   ${e.text.slice(0, 200)}`;
+              return `${i + 1}. [T${e.tier}/${e.category}/p${e.priority}${sid}] ${e.id}\n   ${e.text.slice(0, 120)}`;
             }).join("\n\n");
             return {
               content: [{ type: "text" as const, text: `${entries.length} core memory entries:\n\n${formatted}` }],
@@ -73,6 +73,8 @@ export function createCoreMemoryToolDef(state: GlobalPluginState, session: Sessi
                 details: { error: true },
               };
             }
+            // Invalidate cached section so updated content re-injects next turn
+            session.injectedSections.delete(tier === 0 ? "tier0" : "tier1");
             return {
               content: [{ type: "text" as const, text: `Created core memory: ${id} (tier ${tier}, ${params.category ?? "general"}, p${params.priority ?? 50})` }],
               details: { id },
@@ -95,6 +97,9 @@ export function createCoreMemoryToolDef(state: GlobalPluginState, session: Sessi
                 details: { error: true },
               };
             }
+            // Invalidate both tiers — update may have changed the tier
+            session.injectedSections.delete("tier0");
+            session.injectedSections.delete("tier1");
             return {
               content: [{ type: "text" as const, text: `Updated core memory: ${params.id}` }],
               details: { id: params.id },
@@ -106,6 +111,9 @@ export function createCoreMemoryToolDef(state: GlobalPluginState, session: Sessi
               return { content: [{ type: "text" as const, text: "Error: 'id' is required for deactivate action." }], details: null };
             }
             await store.deleteCoreMemory(params.id);
+            // Invalidate both tiers so removal is reflected next turn
+            session.injectedSections.delete("tier0");
+            session.injectedSections.delete("tier1");
             return {
               content: [{ type: "text" as const, text: `Deactivated core memory: ${params.id}` }],
               details: { id: params.id },

package/src/tools/recall.ts CHANGED Viewed

@@ -34,7 +34,7 @@ export function createRecallToolDef(state: GlobalPluginState, session: SessionSt
         return { content: [{ type: "text" as const, text: "Memory system unavailable." }], details: null };
       }
-      const maxResults = Math.min(params.limit ?? 5, 15);
+      const maxResults = Math.min(params.limit ?? 3, 15);
       try {
         const queryVec = await embeddings.embed(params.query);
@@ -87,7 +87,7 @@ export function createRecallToolDef(state: GlobalPluginState, session: SessionSt
           const tag = r.table === "turn" ? `[${r.role ?? "turn"}]` : `[${r.table}]`;
           const time = r.timestamp ? ` (${new Date(r.timestamp).toLocaleDateString()})` : "";
           const score = r.score ? ` score:${r.score.toFixed(2)}` : "";
-          return `${i + 1}. ${tag}${time}${score}\n   ${(r.text ?? "").slice(0, 500)}`;
+          return `${i + 1}. ${tag}${time}${score}\n   ${(r.text ?? "").slice(0, 300)}`;
         }).join("\n\n");
         return {