kongbrain 0.3.16 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -62,6 +62,7 @@ const CONVERSATION_SHARE = 0.50;
62
62
  const RETRIEVAL_SHARE = 0.30;
63
63
  const CORE_MEMORY_SHARE = 0.15;
64
64
  const CORE_MEMORY_TTL = 300_000;
65
+ const MAX_ITEM_CHARS = 1200; // ~350 tokens per item cap (claw-code: MAX_INSTRUCTION_FILE_CHARS)
65
66
  const MIN_RELEVANCE_SCORE = 0.35;
66
67
  const MIN_COSINE = 0.25;
67
68
 
@@ -149,25 +150,37 @@ function extractLastUserText(messages: AgentMessage[]): string | null {
149
150
  return null;
150
151
  }
151
152
 
153
+ /** Estimate char count for a single content block (claw-code: per-block-type estimation). */
154
+ function blockCharLen(c: any): number {
155
+ if (c.type === "text") return c.text.length;
156
+ if (c.type === "thinking") return c.thinking.length;
157
+ if (c.type === "toolCall") {
158
+ // Count tool name + serialized args (claw-code: compact.rs:326-338)
159
+ return (c.name?.length ?? 0) + (c.args ? JSON.stringify(c.args).length : 0);
160
+ }
161
+ if (c.type === "toolResult" && Array.isArray(c.content)) {
162
+ let len = 0;
163
+ for (const rc of c.content) {
164
+ if (rc.type === "text") len += rc.text.length;
165
+ else len += 100;
166
+ }
167
+ return len;
168
+ }
169
+ return 100; // image, etc.
170
+ }
171
+
152
172
  function estimateTokens(messages: AgentMessage[]): number {
153
173
  let chars = 0;
154
174
  for (const msg of messages) {
155
- for (const c of msgContentBlocks(msg)) {
156
- if (c.type === "text") chars += c.text.length;
157
- else if (c.type === "thinking") chars += c.thinking.length;
158
- else chars += 100;
159
- }
175
+ for (const c of msgContentBlocks(msg)) chars += blockCharLen(c);
176
+ chars += 4; // per-message structural overhead
160
177
  }
161
178
  return Math.ceil(chars / CHARS_PER_TOKEN);
162
179
  }
163
180
 
164
181
  function msgCharLen(msg: AgentMessage): number {
165
182
  let len = 0;
166
- for (const c of msgContentBlocks(msg)) {
167
- if (c.type === "text") len += c.text.length;
168
- else if (c.type === "thinking") len += c.thinking.length;
169
- else len += 100;
170
- }
183
+ for (const c of msgContentBlocks(msg)) len += blockCharLen(c);
171
184
  return len;
172
185
  }
173
186
 
@@ -199,7 +212,7 @@ function accessBoost(accessCount: number | undefined): number {
199
212
  return Math.log1p(accessCount ?? 0);
200
213
  }
201
214
 
202
- function cosineSimilarity(a: number[], b: number[]): number {
215
+ export function cosineSimilarity(a: number[], b: number[]): number {
203
216
  let dot = 0, magA = 0, magB = 0;
204
217
  for (let i = 0; i < a.length; i++) {
205
218
  dot += a[i] * b[i];
@@ -217,6 +230,19 @@ function buildRulesSuffix(session: SessionState): string {
217
230
  ? "unlimited" : String(Math.max(0, session.toolLimit - session.toolCallCount));
218
231
  const urgency = session.toolLimit !== Infinity && (session.toolLimit - session.toolCallCount) <= 3
219
232
  ? "\n⚠ WRAP UP or check in with user." : "";
233
+
234
+ // After first exposure, send only the budget line (claw-code: don't re-send static content)
235
+ if (session.injectedSections.has("rules_full")) {
236
+ return (
237
+ "\n<rules_reminder>" +
238
+ `\nBudget: ${session.toolCallCount} used, ${remaining} remaining.${urgency}` +
239
+ "\nCombine steps. If context already answers it, zero calls." +
240
+ "\n</rules_reminder>"
241
+ );
242
+ }
243
+
244
+ // First time — full examples
245
+ session.injectedSections.add("rules_full");
220
246
  return (
221
247
  "\n<rules_reminder>" +
222
248
  `\nBudget: ${session.toolCallCount} used, ${remaining} remaining.${urgency}` +
@@ -430,7 +456,7 @@ function takeWithConstraints(ranked: ScoredResult[], budgetTokens: number, maxIt
430
456
  for (const r of ranked) {
431
457
  if (selected.length >= maxItems) break;
432
458
  if ((r.finalScore ?? 0) < MIN_RELEVANCE_SCORE && selected.length > 0) break;
433
- const len = r.text?.length ?? 0;
459
+ const len = Math.min(r.text?.length ?? 0, MAX_ITEM_CHARS); // Cap per-item size for budget accounting
434
460
  if (used + len > budgetChars && selected.length > 0) break;
435
461
  selected.push(r);
436
462
  used += len;
@@ -447,13 +473,19 @@ function getTier1BudgetChars(budgets: Budgets): number {
447
473
  return Math.round(budgets.core * 0.45 * CHARS_PER_TOKEN);
448
474
  }
449
475
 
476
+ const MAX_CORE_MEMORY_CHARS = 800; // Per-item cap (claw-code: MAX_INSTRUCTION_FILE_CHARS)
477
+
450
478
  function applyCoreBudget(entries: CoreMemoryEntry[], budgetChars: number): CoreMemoryEntry[] {
451
479
  let used = 0;
452
480
  const result: CoreMemoryEntry[] = [];
453
481
  for (const e of entries) {
454
- const len = e.text.length + 6;
482
+ // Cap individual entries so one large directive doesn't starve others
483
+ const text = e.text.length > MAX_CORE_MEMORY_CHARS
484
+ ? e.text.slice(0, MAX_CORE_MEMORY_CHARS) + "..."
485
+ : e.text;
486
+ const len = text.length + 6;
455
487
  if (used + len > budgetChars) continue;
456
- result.push(e);
488
+ result.push(text !== e.text ? { ...e, text } : e);
457
489
  used += len;
458
490
  }
459
491
  return result;
@@ -473,6 +505,40 @@ function formatTierSection(entries: CoreMemoryEntry[], label: string): string {
473
505
  return `${label}:\n${lines.join("\n")}`;
474
506
  }
475
507
 
508
+ /**
509
+ * Build static system prompt section for API prefix caching.
510
+ * Content here goes into systemPromptAddition where it benefits from
511
+ * cache-read rates (10% cost) on subsequent API calls in the agentic loop.
512
+ * (claw-code pattern: __SYSTEM_PROMPT_DYNAMIC_BOUNDARY__ — prompt.rs:37-140)
513
+ */
514
+ function buildSystemPromptSection(session: SessionState, tier0Entries: CoreMemoryEntry[]): string | undefined {
515
+ const parts: string[] = [];
516
+
517
+ // IKONG architecture description (static, ~120 tokens)
518
+ const pillarLines: string[] = [];
519
+ if (session.agentId) pillarLines.push(`Agent: ${session.agentId}`);
520
+ if (session.projectId) pillarLines.push(`Project: ${session.projectId}`);
521
+ if (session.taskId) pillarLines.push(`Task: ${session.taskId}`);
522
+ if (pillarLines.length > 0) {
523
+ parts.push(
524
+ "GRAPH PILLARS (your structural context):\n" +
525
+ ` ${pillarLines.join(" | ")}\n` +
526
+ " IKONG cognitive architecture:\n" +
527
+ " I(ntelligence): intent classification → adaptive orchestration per turn\n" +
528
+ " K(nowledge): memory graph, concepts, skills, reflections, identity chunks\n" +
529
+ " O(peration): tool execution, skill procedures, causal chain tracking\n" +
530
+ " N(etwork): graph traversal, cross-pillar edges, neighbor expansion\n" +
531
+ " G(raph): SurrealDB persistence, vector search, BGE-M3 embeddings",
532
+ );
533
+ }
534
+
535
+ // Tier 0 core directives (semi-static, changes rarely)
536
+ const t0Section = formatTierSection(tier0Entries, "CORE DIRECTIVES (always loaded, never evicted)");
537
+ if (t0Section) parts.push(t0Section);
538
+
539
+ return parts.length > 0 ? parts.join("\n\n") : undefined;
540
+ }
541
+
476
542
  // ── Guaranteed recent turns from previous sessions ─────────────────────────────
477
543
 
478
544
  async function ensureRecentTurns(
@@ -532,27 +598,42 @@ async function formatContextMessage(
532
598
  const sections: string[] = [];
533
599
 
534
600
  // Pillar context — structural awareness of who/what/where
535
- const pillarLines: string[] = [];
536
- if (session.agentId) pillarLines.push(`Agent: ${session.agentId}`);
537
- if (session.projectId) pillarLines.push(`Project: ${session.projectId}`);
538
- if (session.taskId) pillarLines.push(`Task: ${session.taskId}`);
539
- if (pillarLines.length > 0) {
540
- sections.push(
541
- "GRAPH PILLARS (your structural context):\n" +
542
- ` ${pillarLines.join(" | ")}\n` +
543
- " IKONG cognitive architecture:\n" +
544
- " I(ntelligence): intent classification → adaptive orchestration per turn\n" +
545
- " K(nowledge): memory graph, concepts, skills, reflections, identity chunks\n" +
546
- " O(peration): tool execution, skill procedures, causal chain tracking\n" +
547
- " N(etwork): graph traversal, cross-pillar edges, neighbor expansion\n" +
548
- " G(raph): SurrealDB persistence, vector search, BGE-M3 embeddings",
549
- );
601
+ // Skip if model already has it in the conversation window (claw-code static section dedup)
602
+ if (!session.injectedSections.has("ikong")) {
603
+ const pillarLines: string[] = [];
604
+ if (session.agentId) pillarLines.push(`Agent: ${session.agentId}`);
605
+ if (session.projectId) pillarLines.push(`Project: ${session.projectId}`);
606
+ if (session.taskId) pillarLines.push(`Task: ${session.taskId}`);
607
+ if (pillarLines.length > 0) {
608
+ sections.push(
609
+ "GRAPH PILLARS (your structural context):\n" +
610
+ ` ${pillarLines.join(" | ")}\n` +
611
+ " IKONG cognitive architecture:\n" +
612
+ " I(ntelligence): intent classification adaptive orchestration per turn\n" +
613
+ " K(nowledge): memory graph, concepts, skills, reflections, identity chunks\n" +
614
+ " O(peration): tool execution, skill procedures, causal chain tracking\n" +
615
+ " N(etwork): graph traversal, cross-pillar edges, neighbor expansion\n" +
616
+ " G(raph): SurrealDB persistence, vector search, BGE-M3 embeddings",
617
+ );
618
+ session.injectedSections.add("ikong");
619
+ }
550
620
  }
551
621
 
552
- const t0Section = formatTierSection(tier0Entries, "CORE DIRECTIVES (always loaded, never evicted)");
553
- if (t0Section) sections.push(t0Section);
554
- const t1Section = formatTierSection(tier1Entries, "SESSION CONTEXT (pinned for this session)");
555
- if (t1Section) sections.push(t1Section);
622
+ // Core directives skip if model already has them
623
+ if (!session.injectedSections.has("tier0")) {
624
+ const t0Section = formatTierSection(tier0Entries, "CORE DIRECTIVES (always loaded, never evicted)");
625
+ if (t0Section) {
626
+ sections.push(t0Section);
627
+ session.injectedSections.add("tier0");
628
+ }
629
+ }
630
+ if (!session.injectedSections.has("tier1")) {
631
+ const t1Section = formatTierSection(tier1Entries, "SESSION CONTEXT (pinned for this session)");
632
+ if (t1Section) {
633
+ sections.push(t1Section);
634
+ session.injectedSections.add("tier1");
635
+ }
636
+ }
556
637
 
557
638
  // Cognitive directives
558
639
  const directives = getPendingDirectives(session);
@@ -607,6 +688,10 @@ async function formatContextMessage(
607
688
  const score = n.finalScore != null ? ` (relevance: ${(n.finalScore * 100).toFixed(0)}%)` : "";
608
689
  const via = n.fromNeighbor ? " [via graph link]" : "";
609
690
  let text = n.text ?? "";
691
+ // Truncate oversized items (claw-code: MAX_INSTRUCTION_FILE_CHARS pattern)
692
+ if (text.length > MAX_ITEM_CHARS) {
693
+ text = text.slice(0, MAX_ITEM_CHARS) + "... [truncated]";
694
+ }
610
695
  if (key === "past_turns") {
611
696
  text = text.replace(/^\[(user|assistant)\] /, "[past_$1] ");
612
697
  }
@@ -616,6 +701,23 @@ async function formatContextMessage(
616
701
  sections.push(`${label}:\n${formatted.join("\n")}`);
617
702
  }
618
703
 
704
+ // Injection manifest — tell the model what's already retrieved so it doesn't call recall redundantly
705
+ // (claw-code pattern: route_prompt pre-computes and shows available results)
706
+ const manifest: string[] = [];
707
+ for (const key of sortedKeys) {
708
+ const items = groups[key];
709
+ if (items.length > 0) manifest.push(`${LABELS[key] ?? key}: ${items.length}`);
710
+ }
711
+ if (tier0Entries.length > 0) manifest.push(`core_directives: ${tier0Entries.length}`);
712
+ if (tier1Entries.length > 0) manifest.push(`session_context: ${tier1Entries.length}`);
713
+ if (manifest.length > 0) {
714
+ sections.push(
715
+ "ALREADY RETRIEVED (do NOT call recall for these — they are above):\n" +
716
+ ` ${manifest.join(", ")}\n` +
717
+ "Only call recall if you need something SPECIFIC that isn't covered above."
718
+ );
719
+ }
720
+
619
721
  const text =
620
722
  "[System retrieved context — reference material, not user input. Higher relevance % = stronger match.]\n" +
621
723
  "<graph_context>\n" +
@@ -646,7 +748,7 @@ function truncateToolResult(msg: AgentMessage, maxChars: number): AgentMessage {
646
748
  return { ...msg, content };
647
749
  }
648
750
 
649
- function getRecentTurns(messages: AgentMessage[], maxTokens: number, contextWindow: number): AgentMessage[] {
751
+ function getRecentTurns(messages: AgentMessage[], maxTokens: number, contextWindow: number, session?: SessionState): AgentMessage[] {
650
752
  const budgetChars = maxTokens * CHARS_PER_TOKEN;
651
753
  const TOOL_RESULT_MAX = Math.round(contextWindow * 0.03);
652
754
 
@@ -718,6 +820,16 @@ function getRecentTurns(messages: AgentMessage[], maxTokens: number, contextWind
718
820
  }
719
821
  }
720
822
 
823
+ // Detect if old messages (containing previous context injection) were dropped from the window.
824
+ // If so, clear injectedSections so static content gets re-injected next turn.
825
+ if (session && messages.length > 0 && groups.length > 0) {
826
+ const firstOriginal = groups[0];
827
+ const firstSelected = selectedGroups[0];
828
+ if (firstOriginal !== firstSelected) {
829
+ session.injectedSections.clear();
830
+ }
831
+ }
832
+
721
833
  return selectedGroups.flat();
722
834
  }
723
835
 
@@ -735,6 +847,8 @@ export interface GraphTransformParams {
735
847
  export interface GraphTransformResult {
736
848
  messages: AgentMessage[];
737
849
  stats: ContextStats;
850
+ /** Static content for the system prompt — benefits from API prefix caching (10% cost). */
851
+ systemPromptSection?: string;
738
852
  }
739
853
 
740
854
  /**
@@ -748,6 +862,17 @@ export async function graphTransformContext(
748
862
  const contextWindow = params.contextWindow ?? 200000;
749
863
  const budgets = calcBudgets(contextWindow);
750
864
 
865
+ // Build static system prompt section for API prefix caching.
866
+ // Done here (wrapper) so it attaches to any inner return path.
867
+ // (claw-code pattern: static sections above __SYSTEM_PROMPT_DYNAMIC_BOUNDARY__)
868
+ let systemPromptSection: string | undefined;
869
+ try {
870
+ const tier0ForSys = store.isAvailable()
871
+ ? applyCoreBudget(await store.getAllCoreMemory(0), getTier0BudgetChars(budgets))
872
+ : [];
873
+ systemPromptSection = buildSystemPromptSection(session, tier0ForSys);
874
+ } catch { /* non-critical — tier0 will still appear in user message */ }
875
+
751
876
  // Never throw — return raw messages on any failure
752
877
  try {
753
878
  const TRANSFORM_TIMEOUT_MS = 10_000;
@@ -757,6 +882,7 @@ export async function graphTransformContext(
757
882
  setTimeout(() => reject(new Error("graphTransformContext timed out")), TRANSFORM_TIMEOUT_MS),
758
883
  ),
759
884
  ]);
885
+ result.systemPromptSection = systemPromptSection;
760
886
  return result;
761
887
  } catch (err) {
762
888
  console.error("graphTransformContext fatal error, returning raw messages:", err);
@@ -773,6 +899,7 @@ export async function graphTransformContext(
773
899
  mode: "passthrough",
774
900
  prefetchHit: false,
775
901
  },
902
+ systemPromptSection,
776
903
  };
777
904
  }
778
905
  }
@@ -786,20 +913,6 @@ async function graphTransformInner(
786
913
  budgets: Budgets,
787
914
  _signal?: AbortSignal,
788
915
  ): Promise<GraphTransformResult> {
789
- // Load tiered core memory
790
- let tier0: CoreMemoryEntry[] = [];
791
- let tier1: CoreMemoryEntry[] = [];
792
- try {
793
- [tier0, tier1] = await Promise.all([
794
- store.getAllCoreMemory(0),
795
- store.getAllCoreMemory(1),
796
- ]);
797
- tier0 = applyCoreBudget(tier0, getTier0BudgetChars(budgets));
798
- tier1 = applyCoreBudget(tier1, getTier1BudgetChars(budgets));
799
- } catch (e) {
800
- console.warn("[warn] Core memory load failed:", e);
801
- }
802
-
803
916
  function makeStats(
804
917
  sent: AgentMessage[], graphNodes: number, neighborNodes: number,
805
918
  recentTurnCount: number, mode: ContextStats["mode"], prefetchHit = false,
@@ -814,12 +927,65 @@ async function graphTransformInner(
814
927
  };
815
928
  }
816
929
 
930
+ function makeResult(
931
+ msgs: AgentMessage[], stats: ContextStats, sysSection?: string,
932
+ ): GraphTransformResult {
933
+ return { messages: msgs, stats, systemPromptSection: sysSection };
934
+ }
935
+
936
+ // Derive retrieval config from session's current adaptive config
937
+ const config = session.currentConfig;
938
+ const skipRetrieval = config?.skipRetrieval ?? false;
939
+
940
+ // Skip retrieval fast path — avoid DB queries entirely when model already has core memory
941
+ // (claw-code pattern: simple_mode skips the load, not load-then-discard)
942
+ if (skipRetrieval) {
943
+ const recentTurns = getRecentTurns(messages, budgets.conversation, contextWindow, session);
944
+ // If model already saw core memory, just return recent turns + compressed rules. Zero DB queries.
945
+ if (session.injectedSections.has("tier0")) {
946
+ return { messages: injectRulesSuffix(recentTurns, session), stats: makeStats(recentTurns, 0, 0, recentTurns.length, "passthrough") };
947
+ }
948
+ // First turn or after compaction cleared injectedSections — load and inject
949
+ let tier0: CoreMemoryEntry[] = [];
950
+ let tier1: CoreMemoryEntry[] = [];
951
+ try {
952
+ [tier0, tier1] = await Promise.all([
953
+ store.getAllCoreMemory(0),
954
+ store.getAllCoreMemory(1),
955
+ ]);
956
+ tier0 = applyCoreBudget(tier0, getTier0BudgetChars(budgets));
957
+ tier1 = applyCoreBudget(tier1, getTier1BudgetChars(budgets));
958
+ } catch (e) {
959
+ console.warn("[warn] Core memory load failed:", e);
960
+ }
961
+ if (tier0.length > 0 || tier1.length > 0) {
962
+ const coreContext = await formatContextMessage([], store, session, "", tier0, tier1);
963
+ const result = [coreContext, ...recentTurns];
964
+ return { messages: injectRulesSuffix(result, session), stats: makeStats(result, 0, 0, recentTurns.length, "passthrough") };
965
+ }
966
+ return { messages: injectRulesSuffix(recentTurns, session), stats: makeStats(recentTurns, 0, 0, recentTurns.length, "passthrough") };
967
+ }
968
+
969
+ // Load tiered core memory (full retrieval path)
970
+ let tier0: CoreMemoryEntry[] = [];
971
+ let tier1: CoreMemoryEntry[] = [];
972
+ try {
973
+ [tier0, tier1] = await Promise.all([
974
+ store.getAllCoreMemory(0),
975
+ store.getAllCoreMemory(1),
976
+ ]);
977
+ tier0 = applyCoreBudget(tier0, getTier0BudgetChars(budgets));
978
+ tier1 = applyCoreBudget(tier1, getTier1BudgetChars(budgets));
979
+ } catch (e) {
980
+ console.warn("[warn] Core memory load failed:", e);
981
+ }
982
+
817
983
  // Graceful degradation
818
984
  const embeddingsUp = embeddings.isAvailable();
819
985
  const surrealUp = store.isAvailable();
820
986
 
821
987
  if (!embeddingsUp || !surrealUp) {
822
- const recentTurns = getRecentTurns(messages, budgets.conversation, contextWindow);
988
+ const recentTurns = getRecentTurns(messages, budgets.conversation, contextWindow, session);
823
989
  if (tier0.length > 0 || tier1.length > 0) {
824
990
  const coreContext = await formatContextMessage([], store, session, "", tier0, tier1);
825
991
  const result = [coreContext, ...recentTurns];
@@ -833,9 +999,6 @@ async function graphTransformInner(
833
999
  return { messages: injectRulesSuffix(messages, session), stats: makeStats(messages, 0, 0, messages.length, "passthrough") };
834
1000
  }
835
1001
 
836
- // Derive retrieval config from session's current adaptive config
837
- const config = session.currentConfig;
838
- const skipRetrieval = config?.skipRetrieval ?? false;
839
1002
  const currentIntent = config?.intent ?? "unknown";
840
1003
  const baseLimits = config?.vectorSearchLimits ?? {
841
1004
  turn: 25, identity: 10, concept: 20, memory: 20, artifact: 10,
@@ -852,21 +1015,9 @@ async function graphTransformInner(
852
1015
  };
853
1016
  let tokenBudget = Math.min(config?.tokenBudget ?? 6000, budgets.retrieval);
854
1017
 
855
- // Pressure-based adaptive scaling
856
- // (In Phase 2, _usedTokens will be tracked per-session via hooks)
857
-
858
- if (skipRetrieval) {
859
- const recentTurns = getRecentTurns(messages, budgets.conversation, contextWindow);
860
- if (tier0.length > 0 || tier1.length > 0) {
861
- const coreContext = await formatContextMessage([], store, session, "", tier0, tier1);
862
- const result = [coreContext, ...recentTurns];
863
- return { messages: injectRulesSuffix(result, session), stats: makeStats(result, 0, 0, recentTurns.length, "passthrough") };
864
- }
865
- return { messages: injectRulesSuffix(recentTurns, session), stats: makeStats(recentTurns, 0, 0, recentTurns.length, "passthrough") };
866
- }
867
-
868
1018
  try {
869
1019
  const queryVec = await buildContextualQueryVec(queryText, messages, embeddings);
1020
+ session.lastQueryVec = queryVec; // Stash for redundant recall detection
870
1021
 
871
1022
  // Prefetch cache check
872
1023
  const cached = getCachedContext(queryVec);
@@ -891,7 +1042,7 @@ async function graphTransformInner(
891
1042
  const reflCtx = cached.reflections.length > 0 ? formatReflectionContext(cached.reflections) : "";
892
1043
 
893
1044
  const injectedContext = await formatContextMessage(contextNodes, store, session, skillCtx + reflCtx, tier0, tier1);
894
- const recentTurns = getRecentTurns(messages, budgets.conversation, contextWindow);
1045
+ const recentTurns = getRecentTurns(messages, budgets.conversation, contextWindow, session);
895
1046
  const result = [injectedContext, ...recentTurns];
896
1047
  return { messages: injectRulesSuffix(result, session), stats: makeStats(result, contextNodes.length, 0, recentTurns.length, "graph", true) };
897
1048
  }
@@ -948,7 +1099,7 @@ async function graphTransformInner(
948
1099
  contextNodes = await ensureRecentTurns(contextNodes, session.sessionId, store);
949
1100
 
950
1101
  if (contextNodes.length === 0) {
951
- const result = getRecentTurns(messages, budgets.conversation, contextWindow);
1102
+ const result = getRecentTurns(messages, budgets.conversation, contextWindow, session);
952
1103
  return { messages: injectRulesSuffix(result, session), stats: makeStats(result, 0, 0, result.length, "graph") };
953
1104
  }
954
1105
 
@@ -980,7 +1131,7 @@ async function graphTransformInner(
980
1131
  } catch (e) { swallow("graph-context:reflections", e); }
981
1132
 
982
1133
  const injectedContext = await formatContextMessage(contextNodes, store, session, skillContext + reflectionContext, tier0, tier1);
983
- const recentTurns = getRecentTurns(messages, budgets.conversation, contextWindow);
1134
+ const recentTurns = getRecentTurns(messages, budgets.conversation, contextWindow, session);
984
1135
  const result = [injectedContext, ...recentTurns];
985
1136
  return {
986
1137
  messages: injectRulesSuffix(result, session),
@@ -993,7 +1144,7 @@ async function graphTransformInner(
993
1144
  };
994
1145
  } catch (err) {
995
1146
  console.error("Graph context error, falling back:", err);
996
- const result = getRecentTurns(messages, budgets.conversation, contextWindow);
1147
+ const result = getRecentTurns(messages, budgets.conversation, contextWindow, session);
997
1148
  return { messages: injectRulesSuffix(result, session), stats: makeStats(result, 0, 0, result.length, "recency-only") };
998
1149
  }
999
1150
  }
@@ -6,7 +6,7 @@
6
6
  * so the next session's wakeup has context even before deferred
7
7
  * extraction runs.
8
8
  */
9
- import { readFileSync, writeFileSync, unlinkSync, existsSync, chmodSync } from "node:fs";
9
+ import { readFileSync, writeFileSync, unlinkSync, existsSync, renameSync } from "node:fs";
10
10
  import { join } from "node:path";
11
11
 
12
12
  const HANDOFF_FILENAME = ".kongbrain-handoff.json";
@@ -42,14 +42,21 @@ export function readAndDeleteHandoffFile(
42
42
  workspaceDir: string,
43
43
  ): HandoffFileData | null {
44
44
  const path = join(workspaceDir, HANDOFF_FILENAME);
45
+ const processingPath = path + ".processing";
46
+ // Also clean up stale .processing files from prior crashes
47
+ if (existsSync(processingPath) && !existsSync(path)) {
48
+ try { unlinkSync(processingPath); } catch { /* ignore */ }
49
+ }
45
50
  if (!existsSync(path)) return null;
46
51
  try {
47
- const raw = readFileSync(path, "utf-8");
48
- unlinkSync(path);
52
+ // Atomic rename first so a crash between read and delete can't re-process
53
+ renameSync(path, processingPath);
54
+ const raw = readFileSync(processingPath, "utf-8");
55
+ unlinkSync(processingPath);
49
56
  const parsed = JSON.parse(raw);
50
57
  // Runtime validation — reject prototype pollution and malformed data
51
58
  if (parsed == null || typeof parsed !== "object" || Array.isArray(parsed)) return null;
52
- if ("__proto__" in parsed || "constructor" in parsed) return null;
59
+ if (Object.hasOwn(parsed, "__proto__") || Object.hasOwn(parsed, "constructor")) return null;
53
60
  const data: HandoffFileData = {
54
61
  sessionId: typeof parsed.sessionId === "string" ? parsed.sessionId.slice(0, 200) : "",
55
62
  timestamp: typeof parsed.timestamp === "string" ? parsed.timestamp.slice(0, 50) : "",
@@ -61,7 +68,7 @@ export function readAndDeleteHandoffFile(
61
68
  return data;
62
69
  } catch {
63
70
  // Corrupted or deleted between check and read
64
- try { unlinkSync(path); } catch { /* ignore */ }
71
+ try { unlinkSync(processingPath); } catch { /* ignore */ }
65
72
  return null;
66
73
  }
67
74
  }
@@ -54,7 +54,7 @@ export function createAfterToolCallHandler(state: GlobalPluginState) {
54
54
  });
55
55
  if (assistantTurnId) session.lastAssistantTurnId = assistantTurnId;
56
56
  } catch (e) {
57
- swallow("hook:afterToolCall:eagerAssistantTurn", e);
57
+ swallow.warn("hook:afterToolCall:eagerAssistantTurn", e);
58
58
  }
59
59
  }
60
60
  if (session.lastAssistantTurnId) {
@@ -63,12 +63,12 @@ export function createAfterToolCallHandler(state: GlobalPluginState) {
63
63
  }
64
64
  }
65
65
  } catch (e) {
66
- swallow("hook:afterToolCall:store", e);
66
+ swallow.warn("hook:afterToolCall:store", e);
67
67
  }
68
68
 
69
69
  // Auto-track file artifacts from write/edit tools
70
70
  if (!isError) {
71
- trackArtifact(event.toolName, event.params, session.taskId, session.projectId, state)
71
+ await trackArtifact(event.toolName, event.params, session.taskId, session.projectId, state)
72
72
  .catch(e => swallow.warn("hook:afterToolCall:artifact", e));
73
73
  }
74
74
 
@@ -8,9 +8,12 @@
8
8
 
9
9
  import type { GlobalPluginState } from "../state.js";
10
10
  import { recordToolCall } from "../orchestrator.js";
11
+ import { cosineSimilarity } from "../graph-context.js";
11
12
 
12
13
  const DEFAULT_TOOL_LIMIT = 10;
13
14
  const CLASSIFICATION_LIMITS: Record<string, number> = { LOOKUP: 3, EDIT: 4, REFACTOR: 8 };
15
+ const API_CYCLE_CAP = 16;
16
+ const RECALL_SIMILARITY_THRESHOLD = 0.80;
14
17
 
15
18
  export function createBeforeToolCallHandler(state: GlobalPluginState) {
16
19
  return async (
@@ -30,6 +33,7 @@ export function createBeforeToolCallHandler(state: GlobalPluginState) {
30
33
 
31
34
  session.toolCallCount++;
32
35
  session.toolCallsSinceLastText++;
36
+ session.apiCycleCount++;
33
37
 
34
38
  // Record for steering analysis
35
39
  recordToolCall(session, event.toolName);
@@ -46,6 +50,14 @@ export function createBeforeToolCallHandler(state: GlobalPluginState) {
46
50
  };
47
51
  }
48
52
 
53
+ // API cycle cap (claw-code pattern: max_iterations — conversation.rs:119)
54
+ if (session.apiCycleCount > API_CYCLE_CAP) {
55
+ return {
56
+ block: true,
57
+ blockReason: `Hard API cycle cap (${API_CYCLE_CAP}) reached. Deliver your answer now.`,
58
+ };
59
+ }
60
+
49
61
  // Tool limit
50
62
  if (session.toolCallCount > session.toolLimit) {
51
63
  return {
@@ -54,14 +66,49 @@ export function createBeforeToolCallHandler(state: GlobalPluginState) {
54
66
  };
55
67
  }
56
68
 
69
+ // Intent-based tool gating (claw-code pattern: simple_mode/MCP toggle — tools.py:62-72)
70
+ // On skipRetrieval turns, recall has nothing to add — context was skipped intentionally
71
+ if (event.toolName === "recall" && session.currentConfig?.skipRetrieval) {
72
+ return {
73
+ block: true,
74
+ blockReason: "Context retrieval was skipped this turn (continuation/trivial input). " +
75
+ "Recall would return the same results as previous turns. Continue with what you have.",
76
+ };
77
+ }
78
+
79
+ // Redundant recall blocker (claw-code pattern: _infer_permission_denials — runtime.py:169-174)
80
+ // Block recall when its query would return the same results as context retrieval
81
+ if (event.toolName === "recall" && session.lastQueryVec) {
82
+ const recallQuery = (event.params as { query?: string }).query;
83
+ if (recallQuery && typeof recallQuery === "string" && recallQuery.length > 5) {
84
+ try {
85
+ const recallVec = await state.embeddings.embed(recallQuery);
86
+ const sim = cosineSimilarity(session.lastQueryVec, recallVec);
87
+ if (sim > RECALL_SIMILARITY_THRESHOLD) {
88
+ return {
89
+ block: true,
90
+ blockReason:
91
+ `This recall query is ${(sim * 100).toFixed(0)}% similar to the context already retrieved this turn. ` +
92
+ "The results are in <graph_context> above. Read what you have. " +
93
+ "Only call recall with a DIFFERENT query targeting something specific not already covered.",
94
+ };
95
+ }
96
+ } catch { /* fail-open: allow recall if embedding fails */ }
97
+ }
98
+ }
99
+
57
100
  // Planning gate: model must output text before first tool call
58
101
  if (textLengthSoFar === 0 && toolIndex === 0) {
102
+ const retrievalNote = session.lastRetrievalSummary
103
+ ? `\nContext already injected: ${session.lastRetrievalSummary}. Read <graph_context> before calling tools.`
104
+ : "";
59
105
  return {
60
106
  block: true,
61
107
  blockReason:
62
108
  "PLANNING GATE — You must announce your plan before making tool calls.\n" +
63
109
  "1. Classify: LOOKUP (3 calls max), EDIT (4 max), REFACTOR (8 max)\n" +
64
- "2. STATE WHAT YOU ALREADY KNOW from injected memory/context — if you have prior knowledge about these files, say so\n" +
110
+ "2. STATE WHAT YOU ALREADY KNOW from injected memory/context — if you have prior knowledge about these files, say so" +
111
+ retrievalNote + "\n" +
65
112
  "3. List each planned call and what SPECIFIC GAP it fills that memory doesn't cover\n" +
66
113
  "4. Every step still happens, but COMBINED. Edit + test in one bash call, not two.\n" +
67
114
  "If injected context already answers the question, you may need ZERO tool calls.\n" +