npm - kongbrain - Versions diffs - 0.3.16 → 0.4.1 - Mend

kongbrain 0.3.16 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

package/SKILL.md +1 -1
package/package.json +1 -1
package/src/acan.ts +4 -1
package/src/cognitive-check.ts +2 -2
package/src/concept-extract.ts +1 -1
package/src/context-engine.ts +128 -4
package/src/daemon-manager.ts +17 -11
package/src/deferred-cleanup.ts +3 -7
package/src/graph-context.ts +220 -69
package/src/handoff-file.ts +12 -5
package/src/hooks/after-tool-call.ts +3 -3
package/src/hooks/before-tool-call.ts +48 -1
package/src/hooks/llm-output.ts +28 -8
package/src/hooks/subagent-lifecycle.ts +142 -0
package/src/index.ts +11 -2
package/src/orchestrator.ts +1 -1
package/src/reflection.ts +1 -0
package/src/soul.ts +1 -1
package/src/state.ts +18 -0
package/src/surreal.ts +4 -0
package/src/tools/core-memory.ts +9 -1
package/src/tools/recall.ts +3 -3

package/src/graph-context.ts CHANGED Viewed

@@ -62,6 +62,7 @@ const CONVERSATION_SHARE = 0.50;
 const RETRIEVAL_SHARE = 0.30;
 const CORE_MEMORY_SHARE = 0.15;
 const CORE_MEMORY_TTL = 300_000;
+const MAX_ITEM_CHARS = 1200; // ~350 tokens per item cap (claw-code: MAX_INSTRUCTION_FILE_CHARS)
 const MIN_RELEVANCE_SCORE = 0.35;
 const MIN_COSINE = 0.25;
@@ -149,25 +150,37 @@ function extractLastUserText(messages: AgentMessage[]): string | null {
   return null;
 }
+/** Estimate char count for a single content block (claw-code: per-block-type estimation). */
+function blockCharLen(c: any): number {
+  if (c.type === "text") return c.text.length;
+  if (c.type === "thinking") return c.thinking.length;
+  if (c.type === "toolCall") {
+    // Count tool name + serialized args (claw-code: compact.rs:326-338)
+    return (c.name?.length ?? 0) + (c.args ? JSON.stringify(c.args).length : 0);
+  }
+  if (c.type === "toolResult" && Array.isArray(c.content)) {
+    let len = 0;
+    for (const rc of c.content) {
+      if (rc.type === "text") len += rc.text.length;
+      else len += 100;
+    }
+    return len;
+  }
+  return 100; // image, etc.
+}
 function estimateTokens(messages: AgentMessage[]): number {
   let chars = 0;
   for (const msg of messages) {
-    for (const c of msgContentBlocks(msg)) {
-      if (c.type === "text") chars += c.text.length;
-      else if (c.type === "thinking") chars += c.thinking.length;
-      else chars += 100;
-    }
+    for (const c of msgContentBlocks(msg)) chars += blockCharLen(c);
+    chars += 4; // per-message structural overhead
   }
   return Math.ceil(chars / CHARS_PER_TOKEN);
 }
 function msgCharLen(msg: AgentMessage): number {
   let len = 0;
-  for (const c of msgContentBlocks(msg)) {
-    if (c.type === "text") len += c.text.length;
-    else if (c.type === "thinking") len += c.thinking.length;
-    else len += 100;
-  }
+  for (const c of msgContentBlocks(msg)) len += blockCharLen(c);
   return len;
 }
@@ -199,7 +212,7 @@ function accessBoost(accessCount: number | undefined): number {
   return Math.log1p(accessCount ?? 0);
 }
-function cosineSimilarity(a: number[], b: number[]): number {
+export function cosineSimilarity(a: number[], b: number[]): number {
   let dot = 0, magA = 0, magB = 0;
   for (let i = 0; i < a.length; i++) {
     dot += a[i] * b[i];
@@ -217,6 +230,19 @@ function buildRulesSuffix(session: SessionState): string {
     ? "unlimited" : String(Math.max(0, session.toolLimit - session.toolCallCount));
   const urgency = session.toolLimit !== Infinity && (session.toolLimit - session.toolCallCount) <= 3
     ? "\n⚠ WRAP UP or check in with user." : "";
+  // After first exposure, send only the budget line (claw-code: don't re-send static content)
+  if (session.injectedSections.has("rules_full")) {
+    return (
+      "\n<rules_reminder>" +
+      `\nBudget: ${session.toolCallCount} used, ${remaining} remaining.${urgency}` +
+      "\nCombine steps. If context already answers it, zero calls." +
+      "\n</rules_reminder>"
+    );
+  }
+  // First time — full examples
+  session.injectedSections.add("rules_full");
   return (
     "\n<rules_reminder>" +
     `\nBudget: ${session.toolCallCount} used, ${remaining} remaining.${urgency}` +
@@ -430,7 +456,7 @@ function takeWithConstraints(ranked: ScoredResult[], budgetTokens: number, maxIt
   for (const r of ranked) {
     if (selected.length >= maxItems) break;
     if ((r.finalScore ?? 0) < MIN_RELEVANCE_SCORE && selected.length > 0) break;
-    const len = r.text?.length ?? 0;
+    const len = Math.min(r.text?.length ?? 0, MAX_ITEM_CHARS); // Cap per-item size for budget accounting
     if (used + len > budgetChars && selected.length > 0) break;
     selected.push(r);
     used += len;
@@ -447,13 +473,19 @@ function getTier1BudgetChars(budgets: Budgets): number {
   return Math.round(budgets.core * 0.45 * CHARS_PER_TOKEN);
 }
+const MAX_CORE_MEMORY_CHARS = 800; // Per-item cap (claw-code: MAX_INSTRUCTION_FILE_CHARS)
 function applyCoreBudget(entries: CoreMemoryEntry[], budgetChars: number): CoreMemoryEntry[] {
   let used = 0;
   const result: CoreMemoryEntry[] = [];
   for (const e of entries) {
-    const len = e.text.length + 6;
+    // Cap individual entries so one large directive doesn't starve others
+    const text = e.text.length > MAX_CORE_MEMORY_CHARS
+      ? e.text.slice(0, MAX_CORE_MEMORY_CHARS) + "..."
+      : e.text;
+    const len = text.length + 6;
     if (used + len > budgetChars) continue;
-    result.push(e);
+    result.push(text !== e.text ? { ...e, text } : e);
     used += len;
   }
   return result;
@@ -473,6 +505,40 @@ function formatTierSection(entries: CoreMemoryEntry[], label: string): string {
   return `${label}:\n${lines.join("\n")}`;
 }
+/**
+ * Build static system prompt section for API prefix caching.
+ * Content here goes into systemPromptAddition where it benefits from
+ * cache-read rates (10% cost) on subsequent API calls in the agentic loop.
+ * (claw-code pattern: __SYSTEM_PROMPT_DYNAMIC_BOUNDARY__ — prompt.rs:37-140)
+ */
+function buildSystemPromptSection(session: SessionState, tier0Entries: CoreMemoryEntry[]): string | undefined {
+  const parts: string[] = [];
+  // IKONG architecture description (static, ~120 tokens)
+  const pillarLines: string[] = [];
+  if (session.agentId) pillarLines.push(`Agent: ${session.agentId}`);
+  if (session.projectId) pillarLines.push(`Project: ${session.projectId}`);
+  if (session.taskId) pillarLines.push(`Task: ${session.taskId}`);
+  if (pillarLines.length > 0) {
+    parts.push(
+      "GRAPH PILLARS (your structural context):\n" +
+      `  ${pillarLines.join(" | ")}\n` +
+      "  IKONG cognitive architecture:\n" +
+      "    I(ntelligence): intent classification → adaptive orchestration per turn\n" +
+      "    K(nowledge): memory graph, concepts, skills, reflections, identity chunks\n" +
+      "    O(peration): tool execution, skill procedures, causal chain tracking\n" +
+      "    N(etwork): graph traversal, cross-pillar edges, neighbor expansion\n" +
+      "    G(raph): SurrealDB persistence, vector search, BGE-M3 embeddings",
+    );
+  }
+  // Tier 0 core directives (semi-static, changes rarely)
+  const t0Section = formatTierSection(tier0Entries, "CORE DIRECTIVES (always loaded, never evicted)");
+  if (t0Section) parts.push(t0Section);
+  return parts.length > 0 ? parts.join("\n\n") : undefined;
+}
 // ── Guaranteed recent turns from previous sessions ─────────────────────────────
 async function ensureRecentTurns(
@@ -532,27 +598,42 @@ async function formatContextMessage(
   const sections: string[] = [];
   // Pillar context — structural awareness of who/what/where
-  const pillarLines: string[] = [];
-  if (session.agentId) pillarLines.push(`Agent: ${session.agentId}`);
-  if (session.projectId) pillarLines.push(`Project: ${session.projectId}`);
-  if (session.taskId) pillarLines.push(`Task: ${session.taskId}`);
-  if (pillarLines.length > 0) {
-    sections.push(
-      "GRAPH PILLARS (your structural context):\n" +
-      `  ${pillarLines.join(" | ")}\n` +
-      "  IKONG cognitive architecture:\n" +
-      "    I(ntelligence): intent classification → adaptive orchestration per turn\n" +
-      "    K(nowledge): memory graph, concepts, skills, reflections, identity chunks\n" +
-      "    O(peration): tool execution, skill procedures, causal chain tracking\n" +
-      "    N(etwork): graph traversal, cross-pillar edges, neighbor expansion\n" +
-      "    G(raph): SurrealDB persistence, vector search, BGE-M3 embeddings",
-    );
+  // Skip if model already has it in the conversation window (claw-code static section dedup)
+  if (!session.injectedSections.has("ikong")) {
+    const pillarLines: string[] = [];
+    if (session.agentId) pillarLines.push(`Agent: ${session.agentId}`);
+    if (session.projectId) pillarLines.push(`Project: ${session.projectId}`);
+    if (session.taskId) pillarLines.push(`Task: ${session.taskId}`);
+    if (pillarLines.length > 0) {
+      sections.push(
+        "GRAPH PILLARS (your structural context):\n" +
+        `  ${pillarLines.join(" | ")}\n` +
+        "  IKONG cognitive architecture:\n" +
+        "    I(ntelligence): intent classification → adaptive orchestration per turn\n" +
+        "    K(nowledge): memory graph, concepts, skills, reflections, identity chunks\n" +
+        "    O(peration): tool execution, skill procedures, causal chain tracking\n" +
+        "    N(etwork): graph traversal, cross-pillar edges, neighbor expansion\n" +
+        "    G(raph): SurrealDB persistence, vector search, BGE-M3 embeddings",
+      );
+      session.injectedSections.add("ikong");
+    }
   }
-  const t0Section = formatTierSection(tier0Entries, "CORE DIRECTIVES (always loaded, never evicted)");
-  if (t0Section) sections.push(t0Section);
-  const t1Section = formatTierSection(tier1Entries, "SESSION CONTEXT (pinned for this session)");
-  if (t1Section) sections.push(t1Section);
+  // Core directives — skip if model already has them
+  if (!session.injectedSections.has("tier0")) {
+    const t0Section = formatTierSection(tier0Entries, "CORE DIRECTIVES (always loaded, never evicted)");
+    if (t0Section) {
+      sections.push(t0Section);
+      session.injectedSections.add("tier0");
+    }
+  }
+  if (!session.injectedSections.has("tier1")) {
+    const t1Section = formatTierSection(tier1Entries, "SESSION CONTEXT (pinned for this session)");
+    if (t1Section) {
+      sections.push(t1Section);
+      session.injectedSections.add("tier1");
+    }
+  }
   // Cognitive directives
   const directives = getPendingDirectives(session);
@@ -607,6 +688,10 @@ async function formatContextMessage(
       const score = n.finalScore != null ? ` (relevance: ${(n.finalScore * 100).toFixed(0)}%)` : "";
       const via = n.fromNeighbor ? " [via graph link]" : "";
       let text = n.text ?? "";
+      // Truncate oversized items (claw-code: MAX_INSTRUCTION_FILE_CHARS pattern)
+      if (text.length > MAX_ITEM_CHARS) {
+        text = text.slice(0, MAX_ITEM_CHARS) + "... [truncated]";
+      }
       if (key === "past_turns") {
         text = text.replace(/^\[(user|assistant)\] /, "[past_$1] ");
       }
@@ -616,6 +701,23 @@ async function formatContextMessage(
     sections.push(`${label}:\n${formatted.join("\n")}`);
   }
+  // Injection manifest — tell the model what's already retrieved so it doesn't call recall redundantly
+  // (claw-code pattern: route_prompt pre-computes and shows available results)
+  const manifest: string[] = [];
+  for (const key of sortedKeys) {
+    const items = groups[key];
+    if (items.length > 0) manifest.push(`${LABELS[key] ?? key}: ${items.length}`);
+  }
+  if (tier0Entries.length > 0) manifest.push(`core_directives: ${tier0Entries.length}`);
+  if (tier1Entries.length > 0) manifest.push(`session_context: ${tier1Entries.length}`);
+  if (manifest.length > 0) {
+    sections.push(
+      "ALREADY RETRIEVED (do NOT call recall for these — they are above):\n" +
+      `  ${manifest.join(", ")}\n` +
+      "Only call recall if you need something SPECIFIC that isn't covered above."
+    );
+  }
   const text =
     "[System retrieved context — reference material, not user input. Higher relevance % = stronger match.]\n" +
     "<graph_context>\n" +
@@ -646,7 +748,7 @@ function truncateToolResult(msg: AgentMessage, maxChars: number): AgentMessage {
   return { ...msg, content };
 }
-function getRecentTurns(messages: AgentMessage[], maxTokens: number, contextWindow: number): AgentMessage[] {
+function getRecentTurns(messages: AgentMessage[], maxTokens: number, contextWindow: number, session?: SessionState): AgentMessage[] {
   const budgetChars = maxTokens * CHARS_PER_TOKEN;
   const TOOL_RESULT_MAX = Math.round(contextWindow * 0.03);
@@ -718,6 +820,16 @@ function getRecentTurns(messages: AgentMessage[], maxTokens: number, contextWind
     }
   }
+  // Detect if old messages (containing previous context injection) were dropped from the window.
+  // If so, clear injectedSections so static content gets re-injected next turn.
+  if (session && messages.length > 0 && groups.length > 0) {
+    const firstOriginal = groups[0];
+    const firstSelected = selectedGroups[0];
+    if (firstOriginal !== firstSelected) {
+      session.injectedSections.clear();
+    }
+  }
   return selectedGroups.flat();
 }
@@ -735,6 +847,8 @@ export interface GraphTransformParams {
 export interface GraphTransformResult {
   messages: AgentMessage[];
   stats: ContextStats;
+  /** Static content for the system prompt — benefits from API prefix caching (10% cost). */
+  systemPromptSection?: string;
 }
 /**
@@ -748,6 +862,17 @@ export async function graphTransformContext(
   const contextWindow = params.contextWindow ?? 200000;
   const budgets = calcBudgets(contextWindow);
+  // Build static system prompt section for API prefix caching.
+  // Done here (wrapper) so it attaches to any inner return path.
+  // (claw-code pattern: static sections above __SYSTEM_PROMPT_DYNAMIC_BOUNDARY__)
+  let systemPromptSection: string | undefined;
+  try {
+    const tier0ForSys = store.isAvailable()
+      ? applyCoreBudget(await store.getAllCoreMemory(0), getTier0BudgetChars(budgets))
+      : [];
+    systemPromptSection = buildSystemPromptSection(session, tier0ForSys);
+  } catch { /* non-critical — tier0 will still appear in user message */ }
   // Never throw — return raw messages on any failure
   try {
     const TRANSFORM_TIMEOUT_MS = 10_000;
@@ -757,6 +882,7 @@ export async function graphTransformContext(
         setTimeout(() => reject(new Error("graphTransformContext timed out")), TRANSFORM_TIMEOUT_MS),
       ),
     ]);
+    result.systemPromptSection = systemPromptSection;
     return result;
   } catch (err) {
     console.error("graphTransformContext fatal error, returning raw messages:", err);
@@ -773,6 +899,7 @@ export async function graphTransformContext(
         mode: "passthrough",
         prefetchHit: false,
       },
+      systemPromptSection,
     };
   }
 }
@@ -786,20 +913,6 @@ async function graphTransformInner(
   budgets: Budgets,
   _signal?: AbortSignal,
 ): Promise<GraphTransformResult> {
-  // Load tiered core memory
-  let tier0: CoreMemoryEntry[] = [];
-  let tier1: CoreMemoryEntry[] = [];
-  try {
-    [tier0, tier1] = await Promise.all([
-      store.getAllCoreMemory(0),
-      store.getAllCoreMemory(1),
-    ]);
-    tier0 = applyCoreBudget(tier0, getTier0BudgetChars(budgets));
-    tier1 = applyCoreBudget(tier1, getTier1BudgetChars(budgets));
-  } catch (e) {
-    console.warn("[warn] Core memory load failed:", e);
-  }
   function makeStats(
     sent: AgentMessage[], graphNodes: number, neighborNodes: number,
     recentTurnCount: number, mode: ContextStats["mode"], prefetchHit = false,
@@ -814,12 +927,65 @@ async function graphTransformInner(
     };
   }
+  function makeResult(
+    msgs: AgentMessage[], stats: ContextStats, sysSection?: string,
+  ): GraphTransformResult {
+    return { messages: msgs, stats, systemPromptSection: sysSection };
+  }
+  // Derive retrieval config from session's current adaptive config
+  const config = session.currentConfig;
+  const skipRetrieval = config?.skipRetrieval ?? false;
+  // Skip retrieval fast path — avoid DB queries entirely when model already has core memory
+  // (claw-code pattern: simple_mode skips the load, not load-then-discard)
+  if (skipRetrieval) {
+    const recentTurns = getRecentTurns(messages, budgets.conversation, contextWindow, session);
+    // If model already saw core memory, just return recent turns + compressed rules. Zero DB queries.
+    if (session.injectedSections.has("tier0")) {
+      return { messages: injectRulesSuffix(recentTurns, session), stats: makeStats(recentTurns, 0, 0, recentTurns.length, "passthrough") };
+    }
+    // First turn or after compaction cleared injectedSections — load and inject
+    let tier0: CoreMemoryEntry[] = [];
+    let tier1: CoreMemoryEntry[] = [];
+    try {
+      [tier0, tier1] = await Promise.all([
+        store.getAllCoreMemory(0),
+        store.getAllCoreMemory(1),
+      ]);
+      tier0 = applyCoreBudget(tier0, getTier0BudgetChars(budgets));
+      tier1 = applyCoreBudget(tier1, getTier1BudgetChars(budgets));
+    } catch (e) {
+      console.warn("[warn] Core memory load failed:", e);
+    }
+    if (tier0.length > 0 || tier1.length > 0) {
+      const coreContext = await formatContextMessage([], store, session, "", tier0, tier1);
+      const result = [coreContext, ...recentTurns];
+      return { messages: injectRulesSuffix(result, session), stats: makeStats(result, 0, 0, recentTurns.length, "passthrough") };
+    }
+    return { messages: injectRulesSuffix(recentTurns, session), stats: makeStats(recentTurns, 0, 0, recentTurns.length, "passthrough") };
+  }
+  // Load tiered core memory (full retrieval path)
+  let tier0: CoreMemoryEntry[] = [];
+  let tier1: CoreMemoryEntry[] = [];
+  try {
+    [tier0, tier1] = await Promise.all([
+      store.getAllCoreMemory(0),
+      store.getAllCoreMemory(1),
+    ]);
+    tier0 = applyCoreBudget(tier0, getTier0BudgetChars(budgets));
+    tier1 = applyCoreBudget(tier1, getTier1BudgetChars(budgets));
+  } catch (e) {
+    console.warn("[warn] Core memory load failed:", e);
+  }
   // Graceful degradation
   const embeddingsUp = embeddings.isAvailable();
   const surrealUp = store.isAvailable();
   if (!embeddingsUp || !surrealUp) {
-    const recentTurns = getRecentTurns(messages, budgets.conversation, contextWindow);
+    const recentTurns = getRecentTurns(messages, budgets.conversation, contextWindow, session);
     if (tier0.length > 0 || tier1.length > 0) {
       const coreContext = await formatContextMessage([], store, session, "", tier0, tier1);
       const result = [coreContext, ...recentTurns];
@@ -833,9 +999,6 @@ async function graphTransformInner(
     return { messages: injectRulesSuffix(messages, session), stats: makeStats(messages, 0, 0, messages.length, "passthrough") };
   }
-  // Derive retrieval config from session's current adaptive config
-  const config = session.currentConfig;
-  const skipRetrieval = config?.skipRetrieval ?? false;
   const currentIntent = config?.intent ?? "unknown";
   const baseLimits = config?.vectorSearchLimits ?? {
     turn: 25, identity: 10, concept: 20, memory: 20, artifact: 10,
@@ -852,21 +1015,9 @@ async function graphTransformInner(
   };
   let tokenBudget = Math.min(config?.tokenBudget ?? 6000, budgets.retrieval);
-  // Pressure-based adaptive scaling
-  // (In Phase 2, _usedTokens will be tracked per-session via hooks)
-  if (skipRetrieval) {
-    const recentTurns = getRecentTurns(messages, budgets.conversation, contextWindow);
-    if (tier0.length > 0 || tier1.length > 0) {
-      const coreContext = await formatContextMessage([], store, session, "", tier0, tier1);
-      const result = [coreContext, ...recentTurns];
-      return { messages: injectRulesSuffix(result, session), stats: makeStats(result, 0, 0, recentTurns.length, "passthrough") };
-    }
-    return { messages: injectRulesSuffix(recentTurns, session), stats: makeStats(recentTurns, 0, 0, recentTurns.length, "passthrough") };
-  }
   try {
     const queryVec = await buildContextualQueryVec(queryText, messages, embeddings);
+    session.lastQueryVec = queryVec; // Stash for redundant recall detection
     // Prefetch cache check
     const cached = getCachedContext(queryVec);
@@ -891,7 +1042,7 @@ async function graphTransformInner(
         const reflCtx = cached.reflections.length > 0 ? formatReflectionContext(cached.reflections) : "";
         const injectedContext = await formatContextMessage(contextNodes, store, session, skillCtx + reflCtx, tier0, tier1);
-        const recentTurns = getRecentTurns(messages, budgets.conversation, contextWindow);
+        const recentTurns = getRecentTurns(messages, budgets.conversation, contextWindow, session);
         const result = [injectedContext, ...recentTurns];
         return { messages: injectRulesSuffix(result, session), stats: makeStats(result, contextNodes.length, 0, recentTurns.length, "graph", true) };
       }
@@ -948,7 +1099,7 @@ async function graphTransformInner(
     contextNodes = await ensureRecentTurns(contextNodes, session.sessionId, store);
     if (contextNodes.length === 0) {
-      const result = getRecentTurns(messages, budgets.conversation, contextWindow);
+      const result = getRecentTurns(messages, budgets.conversation, contextWindow, session);
       return { messages: injectRulesSuffix(result, session), stats: makeStats(result, 0, 0, result.length, "graph") };
     }
@@ -980,7 +1131,7 @@ async function graphTransformInner(
     } catch (e) { swallow("graph-context:reflections", e); }
     const injectedContext = await formatContextMessage(contextNodes, store, session, skillContext + reflectionContext, tier0, tier1);
-    const recentTurns = getRecentTurns(messages, budgets.conversation, contextWindow);
+    const recentTurns = getRecentTurns(messages, budgets.conversation, contextWindow, session);
     const result = [injectedContext, ...recentTurns];
     return {
       messages: injectRulesSuffix(result, session),
@@ -993,7 +1144,7 @@ async function graphTransformInner(
     };
   } catch (err) {
     console.error("Graph context error, falling back:", err);
-    const result = getRecentTurns(messages, budgets.conversation, contextWindow);
+    const result = getRecentTurns(messages, budgets.conversation, contextWindow, session);
     return { messages: injectRulesSuffix(result, session), stats: makeStats(result, 0, 0, result.length, "recency-only") };
   }
 }

package/src/handoff-file.ts CHANGED Viewed

@@ -6,7 +6,7 @@
  * so the next session's wakeup has context even before deferred
  * extraction runs.
  */
-import { readFileSync, writeFileSync, unlinkSync, existsSync, chmodSync } from "node:fs";
+import { readFileSync, writeFileSync, unlinkSync, existsSync, renameSync } from "node:fs";
 import { join } from "node:path";
 const HANDOFF_FILENAME = ".kongbrain-handoff.json";
@@ -42,14 +42,21 @@ export function readAndDeleteHandoffFile(
   workspaceDir: string,
 ): HandoffFileData | null {
   const path = join(workspaceDir, HANDOFF_FILENAME);
+  const processingPath = path + ".processing";
+  // Also clean up stale .processing files from prior crashes
+  if (existsSync(processingPath) && !existsSync(path)) {
+    try { unlinkSync(processingPath); } catch { /* ignore */ }
+  }
   if (!existsSync(path)) return null;
   try {
-    const raw = readFileSync(path, "utf-8");
-    unlinkSync(path);
+    // Atomic rename first so a crash between read and delete can't re-process
+    renameSync(path, processingPath);
+    const raw = readFileSync(processingPath, "utf-8");
+    unlinkSync(processingPath);
     const parsed = JSON.parse(raw);
     // Runtime validation — reject prototype pollution and malformed data
     if (parsed == null || typeof parsed !== "object" || Array.isArray(parsed)) return null;
-    if ("__proto__" in parsed || "constructor" in parsed) return null;
+    if (Object.hasOwn(parsed, "__proto__") || Object.hasOwn(parsed, "constructor")) return null;
     const data: HandoffFileData = {
       sessionId: typeof parsed.sessionId === "string" ? parsed.sessionId.slice(0, 200) : "",
       timestamp: typeof parsed.timestamp === "string" ? parsed.timestamp.slice(0, 50) : "",
@@ -61,7 +68,7 @@ export function readAndDeleteHandoffFile(
     return data;
   } catch {
     // Corrupted or deleted between check and read
-    try { unlinkSync(path); } catch { /* ignore */ }
+    try { unlinkSync(processingPath); } catch { /* ignore */ }
     return null;
   }
 }

package/src/hooks/after-tool-call.ts CHANGED Viewed

@@ -54,7 +54,7 @@ export function createAfterToolCallHandler(state: GlobalPluginState) {
             });
             if (assistantTurnId) session.lastAssistantTurnId = assistantTurnId;
           } catch (e) {
-            swallow("hook:afterToolCall:eagerAssistantTurn", e);
+            swallow.warn("hook:afterToolCall:eagerAssistantTurn", e);
           }
         }
         if (session.lastAssistantTurnId) {
@@ -63,12 +63,12 @@ export function createAfterToolCallHandler(state: GlobalPluginState) {
         }
       }
     } catch (e) {
-      swallow("hook:afterToolCall:store", e);
+      swallow.warn("hook:afterToolCall:store", e);
     }
     // Auto-track file artifacts from write/edit tools
     if (!isError) {
-      trackArtifact(event.toolName, event.params, session.taskId, session.projectId, state)
+      await trackArtifact(event.toolName, event.params, session.taskId, session.projectId, state)
         .catch(e => swallow.warn("hook:afterToolCall:artifact", e));
     }

package/src/hooks/before-tool-call.ts CHANGED Viewed

@@ -8,9 +8,12 @@
 import type { GlobalPluginState } from "../state.js";
 import { recordToolCall } from "../orchestrator.js";
+import { cosineSimilarity } from "../graph-context.js";
 const DEFAULT_TOOL_LIMIT = 10;
 const CLASSIFICATION_LIMITS: Record<string, number> = { LOOKUP: 3, EDIT: 4, REFACTOR: 8 };
+const API_CYCLE_CAP = 16;
+const RECALL_SIMILARITY_THRESHOLD = 0.80;
 export function createBeforeToolCallHandler(state: GlobalPluginState) {
   return async (
@@ -30,6 +33,7 @@ export function createBeforeToolCallHandler(state: GlobalPluginState) {
     session.toolCallCount++;
     session.toolCallsSinceLastText++;
+    session.apiCycleCount++;
     // Record for steering analysis
     recordToolCall(session, event.toolName);
@@ -46,6 +50,14 @@ export function createBeforeToolCallHandler(state: GlobalPluginState) {
       };
     }
+    // API cycle cap (claw-code pattern: max_iterations — conversation.rs:119)
+    if (session.apiCycleCount > API_CYCLE_CAP) {
+      return {
+        block: true,
+        blockReason: `Hard API cycle cap (${API_CYCLE_CAP}) reached. Deliver your answer now.`,
+      };
+    }
     // Tool limit
     if (session.toolCallCount > session.toolLimit) {
       return {
@@ -54,14 +66,49 @@ export function createBeforeToolCallHandler(state: GlobalPluginState) {
       };
     }
+    // Intent-based tool gating (claw-code pattern: simple_mode/MCP toggle — tools.py:62-72)
+    // On skipRetrieval turns, recall has nothing to add — context was skipped intentionally
+    if (event.toolName === "recall" && session.currentConfig?.skipRetrieval) {
+      return {
+        block: true,
+        blockReason: "Context retrieval was skipped this turn (continuation/trivial input). " +
+          "Recall would return the same results as previous turns. Continue with what you have.",
+      };
+    }
+    // Redundant recall blocker (claw-code pattern: _infer_permission_denials — runtime.py:169-174)
+    // Block recall when its query would return the same results as context retrieval
+    if (event.toolName === "recall" && session.lastQueryVec) {
+      const recallQuery = (event.params as { query?: string }).query;
+      if (recallQuery && typeof recallQuery === "string" && recallQuery.length > 5) {
+        try {
+          const recallVec = await state.embeddings.embed(recallQuery);
+          const sim = cosineSimilarity(session.lastQueryVec, recallVec);
+          if (sim > RECALL_SIMILARITY_THRESHOLD) {
+            return {
+              block: true,
+              blockReason:
+                `This recall query is ${(sim * 100).toFixed(0)}% similar to the context already retrieved this turn. ` +
+                "The results are in <graph_context> above. Read what you have. " +
+                "Only call recall with a DIFFERENT query targeting something specific not already covered.",
+            };
+          }
+        } catch { /* fail-open: allow recall if embedding fails */ }
+      }
+    }
     // Planning gate: model must output text before first tool call
     if (textLengthSoFar === 0 && toolIndex === 0) {
+      const retrievalNote = session.lastRetrievalSummary
+        ? `\nContext already injected: ${session.lastRetrievalSummary}. Read <graph_context> before calling tools.`
+        : "";
       return {
         block: true,
         blockReason:
           "PLANNING GATE — You must announce your plan before making tool calls.\n" +
           "1. Classify: LOOKUP (3 calls max), EDIT (4 max), REFACTOR (8 max)\n" +
-          "2. STATE WHAT YOU ALREADY KNOW from injected memory/context — if you have prior knowledge about these files, say so\n" +
+          "2. STATE WHAT YOU ALREADY KNOW from injected memory/context — if you have prior knowledge about these files, say so" +
+          retrievalNote + "\n" +
           "3. List each planned call and what SPECIFIC GAP it fills that memory doesn't cover\n" +
           "4. Every step still happens, but COMBINED. Edit + test in one bash call, not two.\n" +
           "If injected context already answers the question, you may need ZERO tool calls.\n" +