kongbrain 0.3.16 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/SKILL.md CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  name: kongbrain
3
3
  description: Graph-backed persistent memory engine for OpenClaw. Replaces the default context window with SurrealDB + vector embeddings that learn across sessions.
4
- version: 0.3.16
4
+ version: 0.4.0
5
5
  homepage: https://github.com/42U/kongbrain
6
6
  metadata:
7
7
  openclaw:
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "kongbrain",
3
- "version": "0.3.16",
3
+ "version": "0.4.0",
4
4
  "description": "Graph-backed persistent memory engine for OpenClaw. Replaces the default context window with SurrealDB + vector embeddings that learn across sessions.",
5
5
  "type": "module",
6
6
  "license": "MIT",
@@ -48,6 +48,7 @@ import { runDeferredCleanup } from "./deferred-cleanup.js";
48
48
  import { extractSkill } from "./skills.js";
49
49
  import { generateReflection } from "./reflection.js";
50
50
  import { graduateCausalToSkills } from "./skills.js";
51
+ import { attemptGraduation, evolveSoul, checkStageTransition } from "./soul.js";
51
52
  import { swallow } from "./errors.js";
52
53
 
53
54
  export class KongBrainContextEngine implements ContextEngine {
@@ -152,7 +153,7 @@ export class KongBrainContextEngine implements ContextEngine {
152
153
 
153
154
  const contextWindow = params.tokenBudget ?? 200000;
154
155
 
155
- const { messages, stats } = await graphTransformContext({
156
+ const { messages, stats, systemPromptSection } = await graphTransformContext({
156
157
  messages: params.messages,
157
158
  session,
158
159
  store,
@@ -160,9 +161,24 @@ export class KongBrainContextEngine implements ContextEngine {
160
161
  contextWindow,
161
162
  });
162
163
 
164
+ // Stash retrieval summary for planning gate (claw-code pattern: pre-compute and show)
165
+ session.lastRetrievalSummary = stats.graphNodes > 0
166
+ ? `${stats.graphNodes} context items + ${stats.neighborNodes} neighbors injected (${stats.mode} mode)`
167
+ : "no graph context retrieved this turn";
168
+
163
169
  // Build system prompt additions
164
170
  const additions: string[] = [];
165
171
 
172
+ // Static content for API prefix caching (claw-code: prompt.rs static/dynamic split)
173
+ if (systemPromptSection) additions.push(systemPromptSection);
174
+
175
+ // Compaction summary (claw-code: compact.rs structured signals — inject once after compaction)
176
+ const compactionSummary = (session as any)._compactionSummary as string | undefined;
177
+ if (compactionSummary) {
178
+ additions.push("[POST-COMPACTION CONTEXT]\n" + compactionSummary);
179
+ delete (session as any)._compactionSummary;
180
+ }
181
+
166
182
  // Wakeup briefing (synthesized at session start, may still be in-flight)
167
183
  const wakeupPromise = (session as any)._wakeupPromise as Promise<string | null> | undefined;
168
184
  if (wakeupPromise) {
@@ -318,11 +334,70 @@ export class KongBrainContextEngine implements ContextEngine {
318
334
  tokenBudget?: number;
319
335
  force?: boolean;
320
336
  }): Promise<CompactResult> {
321
- // Graph retrieval IS the compaction — ownsCompaction: true
337
+ // Graph retrieval IS the compaction — ownsCompaction: true.
338
+ // But we extract structured signals so the model doesn't lose context
339
+ // about pending work and key files after old messages are dropped.
340
+ // (claw-code pattern: compact.rs extracts pending work, key files, continuation directive)
341
+ const sessionKey = params.sessionKey ?? params.sessionId;
342
+ const session = this.state.getSession(sessionKey);
343
+ if (session) {
344
+ session.injectedSections.clear();
345
+ }
346
+
347
+ // Extract structured compaction signals from stored turns
348
+ let summary: string | undefined;
349
+ try {
350
+ const { store } = this.state;
351
+ if (store.isAvailable()) {
352
+ const turns = await store.getSessionTurnsRich(params.sessionId, 30);
353
+ if (turns.length > 0) {
354
+ const fullText = turns.map(t => t.text).join("\n");
355
+
356
+ // Pending work detection (claw-code: compact.rs:235-254)
357
+ const pendingRe = /\b(todo|next|pending|follow up|remaining|unfinished|still need)\b[^.\n]{0,100}/gi;
358
+ const pendingMatches = [...fullText.matchAll(pendingRe)]
359
+ .map(m => m[0].trim().slice(0, 160))
360
+ .slice(0, 5);
361
+
362
+ // Key file extraction (claw-code: compact.rs:256-269)
363
+ const filePaths = [...new Set(
364
+ (fullText.match(/[\w\-/.]+\.\w{1,5}/g) ?? [])
365
+ .filter(p => /\.(ts|js|py|rs|go|md|json|yaml|toml|tsx|jsx)$/.test(p))
366
+ )].slice(0, 10);
367
+
368
+ // Tool names used (claw-code: compact.rs:127-137)
369
+ const toolNames = [...new Set(
370
+ turns.filter(t => t.tool_name).map(t => t.tool_name!)
371
+ )];
372
+
373
+ // Current work inference (claw-code: compact.rs:272-279)
374
+ const lastText = turns.filter(t => t.text.length > 10).at(-1)?.text.slice(0, 200) ?? "";
375
+
376
+ const parts: string[] = [];
377
+ if (pendingMatches.length > 0) parts.push(`PENDING: ${pendingMatches.join("; ")}`);
378
+ if (filePaths.length > 0) parts.push(`FILES: ${filePaths.join(", ")}`);
379
+ if (toolNames.length > 0) parts.push(`TOOLS USED: ${toolNames.join(", ")}`);
380
+ if (lastText) parts.push(`LAST: ${lastText}`);
381
+ parts.push("Resume directly — do not recap what was happening.");
382
+
383
+ if (parts.length > 1) {
384
+ summary = parts.join("\n");
385
+ // Stash for next assemble() to inject
386
+ if (session) {
387
+ (session as any)._compactionSummary = summary;
388
+ }
389
+ }
390
+ }
391
+ }
392
+ } catch { /* non-critical */ }
393
+
322
394
  return {
323
395
  ok: true,
324
- compacted: false,
325
- reason: "Graph retrieval handles context selection; no LLM-based compaction needed.",
396
+ compacted: !!summary,
397
+ reason: summary
398
+ ? "Extracted structured signals for continuation."
399
+ : "Graph retrieval handles context selection; no LLM-based compaction needed.",
400
+ result: summary ? { summary, tokensBefore: 0 } : undefined,
326
401
  };
327
402
  }
328
403
 
@@ -494,6 +569,55 @@ export class KongBrainContextEngine implements ContextEngine {
494
569
  })().catch(e => swallow.warn("midCleanup:handoff", e)),
495
570
  );
496
571
 
572
+ // Soul graduation + stage transition — run mid-session so marathon
573
+ // sessions don't miss milestones that would normally fire at session_end
574
+ cleanupOps.push(
575
+ (async () => {
576
+ const gradResult = await attemptGraduation(store, this.state.complete);
577
+ if (gradResult?.graduated && gradResult.soul) {
578
+ if (gradResult.report.stage === "ready") {
579
+ // New graduation — persist event for celebration
580
+ await store.queryExec(
581
+ `CREATE graduation_event CONTENT $data`,
582
+ {
583
+ data: {
584
+ session_id: session.sessionId,
585
+ acknowledged: false,
586
+ quality_score: gradResult.report.qualityScore,
587
+ volume_score: gradResult.report.volumeScore,
588
+ stage: gradResult.report.stage,
589
+ created_at: new Date().toISOString(),
590
+ },
591
+ },
592
+ );
593
+ if (this.state.enqueueSystemEvent) {
594
+ this.state.enqueueSystemEvent(
595
+ "[GRADUATION] KongBrain has achieved soul graduation! " +
596
+ "The agent will share this milestone when ready.",
597
+ { sessionKey: session.sessionKey },
598
+ );
599
+ }
600
+ } else {
601
+ // Pre-existing soul — check for evolution
602
+ await evolveSoul(store, this.state.complete);
603
+ }
604
+ }
605
+ })().catch(e => swallow.warn("midCleanup:soulGraduation", e)),
606
+ );
607
+
608
+ cleanupOps.push(
609
+ (async () => {
610
+ const transition = await checkStageTransition(store);
611
+ if (transition.transitioned && this.state.enqueueSystemEvent) {
612
+ this.state.enqueueSystemEvent(
613
+ `[MATURITY] Stage transition: ${transition.previousStage ?? "nascent"} → ${transition.currentStage}. ` +
614
+ `Volume: ${transition.report.met.length}/7 | Quality: ${transition.report.qualityScore.toFixed(2)}`,
615
+ { sessionKey: session.sessionKey },
616
+ );
617
+ }
618
+ })().catch(e => swallow.warn("midCleanup:stageTransition", e)),
619
+ );
620
+
497
621
  // Don't await — let cleanup run in background
498
622
  Promise.allSettled(cleanupOps).catch(() => {});
499
623
  }
@@ -62,6 +62,7 @@ const CONVERSATION_SHARE = 0.50;
62
62
  const RETRIEVAL_SHARE = 0.30;
63
63
  const CORE_MEMORY_SHARE = 0.15;
64
64
  const CORE_MEMORY_TTL = 300_000;
65
+ const MAX_ITEM_CHARS = 1200; // ~350 tokens per item cap (claw-code: MAX_INSTRUCTION_FILE_CHARS)
65
66
  const MIN_RELEVANCE_SCORE = 0.35;
66
67
  const MIN_COSINE = 0.25;
67
68
 
@@ -149,25 +150,37 @@ function extractLastUserText(messages: AgentMessage[]): string | null {
149
150
  return null;
150
151
  }
151
152
 
153
+ /** Estimate char count for a single content block (claw-code: per-block-type estimation). */
154
+ function blockCharLen(c: any): number {
155
+ if (c.type === "text") return c.text.length;
156
+ if (c.type === "thinking") return c.thinking.length;
157
+ if (c.type === "toolCall") {
158
+ // Count tool name + serialized args (claw-code: compact.rs:326-338)
159
+ return (c.name?.length ?? 0) + (c.args ? JSON.stringify(c.args).length : 0);
160
+ }
161
+ if (c.type === "toolResult" && Array.isArray(c.content)) {
162
+ let len = 0;
163
+ for (const rc of c.content) {
164
+ if (rc.type === "text") len += rc.text.length;
165
+ else len += 100;
166
+ }
167
+ return len;
168
+ }
169
+ return 100; // image, etc.
170
+ }
171
+
152
172
  function estimateTokens(messages: AgentMessage[]): number {
153
173
  let chars = 0;
154
174
  for (const msg of messages) {
155
- for (const c of msgContentBlocks(msg)) {
156
- if (c.type === "text") chars += c.text.length;
157
- else if (c.type === "thinking") chars += c.thinking.length;
158
- else chars += 100;
159
- }
175
+ for (const c of msgContentBlocks(msg)) chars += blockCharLen(c);
176
+ chars += 4; // per-message structural overhead
160
177
  }
161
178
  return Math.ceil(chars / CHARS_PER_TOKEN);
162
179
  }
163
180
 
164
181
  function msgCharLen(msg: AgentMessage): number {
165
182
  let len = 0;
166
- for (const c of msgContentBlocks(msg)) {
167
- if (c.type === "text") len += c.text.length;
168
- else if (c.type === "thinking") len += c.thinking.length;
169
- else len += 100;
170
- }
183
+ for (const c of msgContentBlocks(msg)) len += blockCharLen(c);
171
184
  return len;
172
185
  }
173
186
 
@@ -199,7 +212,7 @@ function accessBoost(accessCount: number | undefined): number {
199
212
  return Math.log1p(accessCount ?? 0);
200
213
  }
201
214
 
202
- function cosineSimilarity(a: number[], b: number[]): number {
215
+ export function cosineSimilarity(a: number[], b: number[]): number {
203
216
  let dot = 0, magA = 0, magB = 0;
204
217
  for (let i = 0; i < a.length; i++) {
205
218
  dot += a[i] * b[i];
@@ -217,6 +230,19 @@ function buildRulesSuffix(session: SessionState): string {
217
230
  ? "unlimited" : String(Math.max(0, session.toolLimit - session.toolCallCount));
218
231
  const urgency = session.toolLimit !== Infinity && (session.toolLimit - session.toolCallCount) <= 3
219
232
  ? "\n⚠ WRAP UP or check in with user." : "";
233
+
234
+ // After first exposure, send only the budget line (claw-code: don't re-send static content)
235
+ if (session.injectedSections.has("rules_full")) {
236
+ return (
237
+ "\n<rules_reminder>" +
238
+ `\nBudget: ${session.toolCallCount} used, ${remaining} remaining.${urgency}` +
239
+ "\nCombine steps. If context already answers it, zero calls." +
240
+ "\n</rules_reminder>"
241
+ );
242
+ }
243
+
244
+ // First time — full examples
245
+ session.injectedSections.add("rules_full");
220
246
  return (
221
247
  "\n<rules_reminder>" +
222
248
  `\nBudget: ${session.toolCallCount} used, ${remaining} remaining.${urgency}` +
@@ -430,7 +456,7 @@ function takeWithConstraints(ranked: ScoredResult[], budgetTokens: number, maxIt
430
456
  for (const r of ranked) {
431
457
  if (selected.length >= maxItems) break;
432
458
  if ((r.finalScore ?? 0) < MIN_RELEVANCE_SCORE && selected.length > 0) break;
433
- const len = r.text?.length ?? 0;
459
+ const len = Math.min(r.text?.length ?? 0, MAX_ITEM_CHARS); // Cap per-item size for budget accounting
434
460
  if (used + len > budgetChars && selected.length > 0) break;
435
461
  selected.push(r);
436
462
  used += len;
@@ -447,13 +473,19 @@ function getTier1BudgetChars(budgets: Budgets): number {
447
473
  return Math.round(budgets.core * 0.45 * CHARS_PER_TOKEN);
448
474
  }
449
475
 
476
+ const MAX_CORE_MEMORY_CHARS = 800; // Per-item cap (claw-code: MAX_INSTRUCTION_FILE_CHARS)
477
+
450
478
  function applyCoreBudget(entries: CoreMemoryEntry[], budgetChars: number): CoreMemoryEntry[] {
451
479
  let used = 0;
452
480
  const result: CoreMemoryEntry[] = [];
453
481
  for (const e of entries) {
454
- const len = e.text.length + 6;
482
+ // Cap individual entries so one large directive doesn't starve others
483
+ const text = e.text.length > MAX_CORE_MEMORY_CHARS
484
+ ? e.text.slice(0, MAX_CORE_MEMORY_CHARS) + "..."
485
+ : e.text;
486
+ const len = text.length + 6;
455
487
  if (used + len > budgetChars) continue;
456
- result.push(e);
488
+ result.push(text !== e.text ? { ...e, text } : e);
457
489
  used += len;
458
490
  }
459
491
  return result;
@@ -473,6 +505,40 @@ function formatTierSection(entries: CoreMemoryEntry[], label: string): string {
473
505
  return `${label}:\n${lines.join("\n")}`;
474
506
  }
475
507
 
508
+ /**
509
+ * Build static system prompt section for API prefix caching.
510
+ * Content here goes into systemPromptAddition where it benefits from
511
+ * cache-read rates (10% cost) on subsequent API calls in the agentic loop.
512
+ * (claw-code pattern: __SYSTEM_PROMPT_DYNAMIC_BOUNDARY__ — prompt.rs:37-140)
513
+ */
514
+ function buildSystemPromptSection(session: SessionState, tier0Entries: CoreMemoryEntry[]): string | undefined {
515
+ const parts: string[] = [];
516
+
517
+ // IKONG architecture description (static, ~120 tokens)
518
+ const pillarLines: string[] = [];
519
+ if (session.agentId) pillarLines.push(`Agent: ${session.agentId}`);
520
+ if (session.projectId) pillarLines.push(`Project: ${session.projectId}`);
521
+ if (session.taskId) pillarLines.push(`Task: ${session.taskId}`);
522
+ if (pillarLines.length > 0) {
523
+ parts.push(
524
+ "GRAPH PILLARS (your structural context):\n" +
525
+ ` ${pillarLines.join(" | ")}\n` +
526
+ " IKONG cognitive architecture:\n" +
527
+ " I(ntelligence): intent classification → adaptive orchestration per turn\n" +
528
+ " K(nowledge): memory graph, concepts, skills, reflections, identity chunks\n" +
529
+ " O(peration): tool execution, skill procedures, causal chain tracking\n" +
530
+ " N(etwork): graph traversal, cross-pillar edges, neighbor expansion\n" +
531
+ " G(raph): SurrealDB persistence, vector search, BGE-M3 embeddings",
532
+ );
533
+ }
534
+
535
+ // Tier 0 core directives (semi-static, changes rarely)
536
+ const t0Section = formatTierSection(tier0Entries, "CORE DIRECTIVES (always loaded, never evicted)");
537
+ if (t0Section) parts.push(t0Section);
538
+
539
+ return parts.length > 0 ? parts.join("\n\n") : undefined;
540
+ }
541
+
476
542
  // ── Guaranteed recent turns from previous sessions ─────────────────────────────
477
543
 
478
544
  async function ensureRecentTurns(
@@ -532,27 +598,42 @@ async function formatContextMessage(
532
598
  const sections: string[] = [];
533
599
 
534
600
  // Pillar context — structural awareness of who/what/where
535
- const pillarLines: string[] = [];
536
- if (session.agentId) pillarLines.push(`Agent: ${session.agentId}`);
537
- if (session.projectId) pillarLines.push(`Project: ${session.projectId}`);
538
- if (session.taskId) pillarLines.push(`Task: ${session.taskId}`);
539
- if (pillarLines.length > 0) {
540
- sections.push(
541
- "GRAPH PILLARS (your structural context):\n" +
542
- ` ${pillarLines.join(" | ")}\n` +
543
- " IKONG cognitive architecture:\n" +
544
- " I(ntelligence): intent classification → adaptive orchestration per turn\n" +
545
- " K(nowledge): memory graph, concepts, skills, reflections, identity chunks\n" +
546
- " O(peration): tool execution, skill procedures, causal chain tracking\n" +
547
- " N(etwork): graph traversal, cross-pillar edges, neighbor expansion\n" +
548
- " G(raph): SurrealDB persistence, vector search, BGE-M3 embeddings",
549
- );
601
+ // Skip if model already has it in the conversation window (claw-code static section dedup)
602
+ if (!session.injectedSections.has("ikong")) {
603
+ const pillarLines: string[] = [];
604
+ if (session.agentId) pillarLines.push(`Agent: ${session.agentId}`);
605
+ if (session.projectId) pillarLines.push(`Project: ${session.projectId}`);
606
+ if (session.taskId) pillarLines.push(`Task: ${session.taskId}`);
607
+ if (pillarLines.length > 0) {
608
+ sections.push(
609
+ "GRAPH PILLARS (your structural context):\n" +
610
+ ` ${pillarLines.join(" | ")}\n` +
611
+ " IKONG cognitive architecture:\n" +
612
+ " I(ntelligence): intent classification adaptive orchestration per turn\n" +
613
+ " K(nowledge): memory graph, concepts, skills, reflections, identity chunks\n" +
614
+ " O(peration): tool execution, skill procedures, causal chain tracking\n" +
615
+ " N(etwork): graph traversal, cross-pillar edges, neighbor expansion\n" +
616
+ " G(raph): SurrealDB persistence, vector search, BGE-M3 embeddings",
617
+ );
618
+ session.injectedSections.add("ikong");
619
+ }
550
620
  }
551
621
 
552
- const t0Section = formatTierSection(tier0Entries, "CORE DIRECTIVES (always loaded, never evicted)");
553
- if (t0Section) sections.push(t0Section);
554
- const t1Section = formatTierSection(tier1Entries, "SESSION CONTEXT (pinned for this session)");
555
- if (t1Section) sections.push(t1Section);
622
+ // Core directives skip if model already has them
623
+ if (!session.injectedSections.has("tier0")) {
624
+ const t0Section = formatTierSection(tier0Entries, "CORE DIRECTIVES (always loaded, never evicted)");
625
+ if (t0Section) {
626
+ sections.push(t0Section);
627
+ session.injectedSections.add("tier0");
628
+ }
629
+ }
630
+ if (!session.injectedSections.has("tier1")) {
631
+ const t1Section = formatTierSection(tier1Entries, "SESSION CONTEXT (pinned for this session)");
632
+ if (t1Section) {
633
+ sections.push(t1Section);
634
+ session.injectedSections.add("tier1");
635
+ }
636
+ }
556
637
 
557
638
  // Cognitive directives
558
639
  const directives = getPendingDirectives(session);
@@ -607,6 +688,10 @@ async function formatContextMessage(
607
688
  const score = n.finalScore != null ? ` (relevance: ${(n.finalScore * 100).toFixed(0)}%)` : "";
608
689
  const via = n.fromNeighbor ? " [via graph link]" : "";
609
690
  let text = n.text ?? "";
691
+ // Truncate oversized items (claw-code: MAX_INSTRUCTION_FILE_CHARS pattern)
692
+ if (text.length > MAX_ITEM_CHARS) {
693
+ text = text.slice(0, MAX_ITEM_CHARS) + "... [truncated]";
694
+ }
610
695
  if (key === "past_turns") {
611
696
  text = text.replace(/^\[(user|assistant)\] /, "[past_$1] ");
612
697
  }
@@ -616,6 +701,23 @@ async function formatContextMessage(
616
701
  sections.push(`${label}:\n${formatted.join("\n")}`);
617
702
  }
618
703
 
704
+ // Injection manifest — tell the model what's already retrieved so it doesn't call recall redundantly
705
+ // (claw-code pattern: route_prompt pre-computes and shows available results)
706
+ const manifest: string[] = [];
707
+ for (const key of sortedKeys) {
708
+ const items = groups[key];
709
+ if (items.length > 0) manifest.push(`${LABELS[key] ?? key}: ${items.length}`);
710
+ }
711
+ if (tier0Entries.length > 0) manifest.push(`core_directives: ${tier0Entries.length}`);
712
+ if (tier1Entries.length > 0) manifest.push(`session_context: ${tier1Entries.length}`);
713
+ if (manifest.length > 0) {
714
+ sections.push(
715
+ "ALREADY RETRIEVED (do NOT call recall for these — they are above):\n" +
716
+ ` ${manifest.join(", ")}\n` +
717
+ "Only call recall if you need something SPECIFIC that isn't covered above."
718
+ );
719
+ }
720
+
619
721
  const text =
620
722
  "[System retrieved context — reference material, not user input. Higher relevance % = stronger match.]\n" +
621
723
  "<graph_context>\n" +
@@ -646,7 +748,7 @@ function truncateToolResult(msg: AgentMessage, maxChars: number): AgentMessage {
646
748
  return { ...msg, content };
647
749
  }
648
750
 
649
- function getRecentTurns(messages: AgentMessage[], maxTokens: number, contextWindow: number): AgentMessage[] {
751
+ function getRecentTurns(messages: AgentMessage[], maxTokens: number, contextWindow: number, session?: SessionState): AgentMessage[] {
650
752
  const budgetChars = maxTokens * CHARS_PER_TOKEN;
651
753
  const TOOL_RESULT_MAX = Math.round(contextWindow * 0.03);
652
754
 
@@ -718,6 +820,16 @@ function getRecentTurns(messages: AgentMessage[], maxTokens: number, contextWind
718
820
  }
719
821
  }
720
822
 
823
+ // Detect if old messages (containing previous context injection) were dropped from the window.
824
+ // If so, clear injectedSections so static content gets re-injected next turn.
825
+ if (session && messages.length > 0 && groups.length > 0) {
826
+ const firstOriginal = groups[0];
827
+ const firstSelected = selectedGroups[0];
828
+ if (firstOriginal !== firstSelected) {
829
+ session.injectedSections.clear();
830
+ }
831
+ }
832
+
721
833
  return selectedGroups.flat();
722
834
  }
723
835
 
@@ -735,6 +847,8 @@ export interface GraphTransformParams {
735
847
  export interface GraphTransformResult {
736
848
  messages: AgentMessage[];
737
849
  stats: ContextStats;
850
+ /** Static content for the system prompt — benefits from API prefix caching (10% cost). */
851
+ systemPromptSection?: string;
738
852
  }
739
853
 
740
854
  /**
@@ -748,6 +862,17 @@ export async function graphTransformContext(
748
862
  const contextWindow = params.contextWindow ?? 200000;
749
863
  const budgets = calcBudgets(contextWindow);
750
864
 
865
+ // Build static system prompt section for API prefix caching.
866
+ // Done here (wrapper) so it attaches to any inner return path.
867
+ // (claw-code pattern: static sections above __SYSTEM_PROMPT_DYNAMIC_BOUNDARY__)
868
+ let systemPromptSection: string | undefined;
869
+ try {
870
+ const tier0ForSys = store.isAvailable()
871
+ ? applyCoreBudget(await store.getAllCoreMemory(0), getTier0BudgetChars(budgets))
872
+ : [];
873
+ systemPromptSection = buildSystemPromptSection(session, tier0ForSys);
874
+ } catch { /* non-critical — tier0 will still appear in user message */ }
875
+
751
876
  // Never throw — return raw messages on any failure
752
877
  try {
753
878
  const TRANSFORM_TIMEOUT_MS = 10_000;
@@ -757,6 +882,7 @@ export async function graphTransformContext(
757
882
  setTimeout(() => reject(new Error("graphTransformContext timed out")), TRANSFORM_TIMEOUT_MS),
758
883
  ),
759
884
  ]);
885
+ result.systemPromptSection = systemPromptSection;
760
886
  return result;
761
887
  } catch (err) {
762
888
  console.error("graphTransformContext fatal error, returning raw messages:", err);
@@ -773,6 +899,7 @@ export async function graphTransformContext(
773
899
  mode: "passthrough",
774
900
  prefetchHit: false,
775
901
  },
902
+ systemPromptSection,
776
903
  };
777
904
  }
778
905
  }
@@ -786,20 +913,6 @@ async function graphTransformInner(
786
913
  budgets: Budgets,
787
914
  _signal?: AbortSignal,
788
915
  ): Promise<GraphTransformResult> {
789
- // Load tiered core memory
790
- let tier0: CoreMemoryEntry[] = [];
791
- let tier1: CoreMemoryEntry[] = [];
792
- try {
793
- [tier0, tier1] = await Promise.all([
794
- store.getAllCoreMemory(0),
795
- store.getAllCoreMemory(1),
796
- ]);
797
- tier0 = applyCoreBudget(tier0, getTier0BudgetChars(budgets));
798
- tier1 = applyCoreBudget(tier1, getTier1BudgetChars(budgets));
799
- } catch (e) {
800
- console.warn("[warn] Core memory load failed:", e);
801
- }
802
-
803
916
  function makeStats(
804
917
  sent: AgentMessage[], graphNodes: number, neighborNodes: number,
805
918
  recentTurnCount: number, mode: ContextStats["mode"], prefetchHit = false,
@@ -814,12 +927,65 @@ async function graphTransformInner(
814
927
  };
815
928
  }
816
929
 
930
+ function makeResult(
931
+ msgs: AgentMessage[], stats: ContextStats, sysSection?: string,
932
+ ): GraphTransformResult {
933
+ return { messages: msgs, stats, systemPromptSection: sysSection };
934
+ }
935
+
936
+ // Derive retrieval config from session's current adaptive config
937
+ const config = session.currentConfig;
938
+ const skipRetrieval = config?.skipRetrieval ?? false;
939
+
940
+ // Skip retrieval fast path — avoid DB queries entirely when model already has core memory
941
+ // (claw-code pattern: simple_mode skips the load, not load-then-discard)
942
+ if (skipRetrieval) {
943
+ const recentTurns = getRecentTurns(messages, budgets.conversation, contextWindow, session);
944
+ // If model already saw core memory, just return recent turns + compressed rules. Zero DB queries.
945
+ if (session.injectedSections.has("tier0")) {
946
+ return { messages: injectRulesSuffix(recentTurns, session), stats: makeStats(recentTurns, 0, 0, recentTurns.length, "passthrough") };
947
+ }
948
+ // First turn or after compaction cleared injectedSections — load and inject
949
+ let tier0: CoreMemoryEntry[] = [];
950
+ let tier1: CoreMemoryEntry[] = [];
951
+ try {
952
+ [tier0, tier1] = await Promise.all([
953
+ store.getAllCoreMemory(0),
954
+ store.getAllCoreMemory(1),
955
+ ]);
956
+ tier0 = applyCoreBudget(tier0, getTier0BudgetChars(budgets));
957
+ tier1 = applyCoreBudget(tier1, getTier1BudgetChars(budgets));
958
+ } catch (e) {
959
+ console.warn("[warn] Core memory load failed:", e);
960
+ }
961
+ if (tier0.length > 0 || tier1.length > 0) {
962
+ const coreContext = await formatContextMessage([], store, session, "", tier0, tier1);
963
+ const result = [coreContext, ...recentTurns];
964
+ return { messages: injectRulesSuffix(result, session), stats: makeStats(result, 0, 0, recentTurns.length, "passthrough") };
965
+ }
966
+ return { messages: injectRulesSuffix(recentTurns, session), stats: makeStats(recentTurns, 0, 0, recentTurns.length, "passthrough") };
967
+ }
968
+
969
+ // Load tiered core memory (full retrieval path)
970
+ let tier0: CoreMemoryEntry[] = [];
971
+ let tier1: CoreMemoryEntry[] = [];
972
+ try {
973
+ [tier0, tier1] = await Promise.all([
974
+ store.getAllCoreMemory(0),
975
+ store.getAllCoreMemory(1),
976
+ ]);
977
+ tier0 = applyCoreBudget(tier0, getTier0BudgetChars(budgets));
978
+ tier1 = applyCoreBudget(tier1, getTier1BudgetChars(budgets));
979
+ } catch (e) {
980
+ console.warn("[warn] Core memory load failed:", e);
981
+ }
982
+
817
983
  // Graceful degradation
818
984
  const embeddingsUp = embeddings.isAvailable();
819
985
  const surrealUp = store.isAvailable();
820
986
 
821
987
  if (!embeddingsUp || !surrealUp) {
822
- const recentTurns = getRecentTurns(messages, budgets.conversation, contextWindow);
988
+ const recentTurns = getRecentTurns(messages, budgets.conversation, contextWindow, session);
823
989
  if (tier0.length > 0 || tier1.length > 0) {
824
990
  const coreContext = await formatContextMessage([], store, session, "", tier0, tier1);
825
991
  const result = [coreContext, ...recentTurns];
@@ -833,9 +999,6 @@ async function graphTransformInner(
833
999
  return { messages: injectRulesSuffix(messages, session), stats: makeStats(messages, 0, 0, messages.length, "passthrough") };
834
1000
  }
835
1001
 
836
- // Derive retrieval config from session's current adaptive config
837
- const config = session.currentConfig;
838
- const skipRetrieval = config?.skipRetrieval ?? false;
839
1002
  const currentIntent = config?.intent ?? "unknown";
840
1003
  const baseLimits = config?.vectorSearchLimits ?? {
841
1004
  turn: 25, identity: 10, concept: 20, memory: 20, artifact: 10,
@@ -852,21 +1015,9 @@ async function graphTransformInner(
852
1015
  };
853
1016
  let tokenBudget = Math.min(config?.tokenBudget ?? 6000, budgets.retrieval);
854
1017
 
855
- // Pressure-based adaptive scaling
856
- // (In Phase 2, _usedTokens will be tracked per-session via hooks)
857
-
858
- if (skipRetrieval) {
859
- const recentTurns = getRecentTurns(messages, budgets.conversation, contextWindow);
860
- if (tier0.length > 0 || tier1.length > 0) {
861
- const coreContext = await formatContextMessage([], store, session, "", tier0, tier1);
862
- const result = [coreContext, ...recentTurns];
863
- return { messages: injectRulesSuffix(result, session), stats: makeStats(result, 0, 0, recentTurns.length, "passthrough") };
864
- }
865
- return { messages: injectRulesSuffix(recentTurns, session), stats: makeStats(recentTurns, 0, 0, recentTurns.length, "passthrough") };
866
- }
867
-
868
1018
  try {
869
1019
  const queryVec = await buildContextualQueryVec(queryText, messages, embeddings);
1020
+ session.lastQueryVec = queryVec; // Stash for redundant recall detection
870
1021
 
871
1022
  // Prefetch cache check
872
1023
  const cached = getCachedContext(queryVec);
@@ -891,7 +1042,7 @@ async function graphTransformInner(
891
1042
  const reflCtx = cached.reflections.length > 0 ? formatReflectionContext(cached.reflections) : "";
892
1043
 
893
1044
  const injectedContext = await formatContextMessage(contextNodes, store, session, skillCtx + reflCtx, tier0, tier1);
894
- const recentTurns = getRecentTurns(messages, budgets.conversation, contextWindow);
1045
+ const recentTurns = getRecentTurns(messages, budgets.conversation, contextWindow, session);
895
1046
  const result = [injectedContext, ...recentTurns];
896
1047
  return { messages: injectRulesSuffix(result, session), stats: makeStats(result, contextNodes.length, 0, recentTurns.length, "graph", true) };
897
1048
  }
@@ -948,7 +1099,7 @@ async function graphTransformInner(
948
1099
  contextNodes = await ensureRecentTurns(contextNodes, session.sessionId, store);
949
1100
 
950
1101
  if (contextNodes.length === 0) {
951
- const result = getRecentTurns(messages, budgets.conversation, contextWindow);
1102
+ const result = getRecentTurns(messages, budgets.conversation, contextWindow, session);
952
1103
  return { messages: injectRulesSuffix(result, session), stats: makeStats(result, 0, 0, result.length, "graph") };
953
1104
  }
954
1105
 
@@ -980,7 +1131,7 @@ async function graphTransformInner(
980
1131
  } catch (e) { swallow("graph-context:reflections", e); }
981
1132
 
982
1133
  const injectedContext = await formatContextMessage(contextNodes, store, session, skillContext + reflectionContext, tier0, tier1);
983
- const recentTurns = getRecentTurns(messages, budgets.conversation, contextWindow);
1134
+ const recentTurns = getRecentTurns(messages, budgets.conversation, contextWindow, session);
984
1135
  const result = [injectedContext, ...recentTurns];
985
1136
  return {
986
1137
  messages: injectRulesSuffix(result, session),
@@ -993,7 +1144,7 @@ async function graphTransformInner(
993
1144
  };
994
1145
  } catch (err) {
995
1146
  console.error("Graph context error, falling back:", err);
996
- const result = getRecentTurns(messages, budgets.conversation, contextWindow);
1147
+ const result = getRecentTurns(messages, budgets.conversation, contextWindow, session);
997
1148
  return { messages: injectRulesSuffix(result, session), stats: makeStats(result, 0, 0, result.length, "recency-only") };
998
1149
  }
999
1150
  }
@@ -8,9 +8,12 @@
8
8
 
9
9
  import type { GlobalPluginState } from "../state.js";
10
10
  import { recordToolCall } from "../orchestrator.js";
11
+ import { cosineSimilarity } from "../graph-context.js";
11
12
 
12
13
  const DEFAULT_TOOL_LIMIT = 10;
13
14
  const CLASSIFICATION_LIMITS: Record<string, number> = { LOOKUP: 3, EDIT: 4, REFACTOR: 8 };
15
+ const API_CYCLE_CAP = 16;
16
+ const RECALL_SIMILARITY_THRESHOLD = 0.80;
14
17
 
15
18
  export function createBeforeToolCallHandler(state: GlobalPluginState) {
16
19
  return async (
@@ -30,6 +33,7 @@ export function createBeforeToolCallHandler(state: GlobalPluginState) {
30
33
 
31
34
  session.toolCallCount++;
32
35
  session.toolCallsSinceLastText++;
36
+ session.apiCycleCount++;
33
37
 
34
38
  // Record for steering analysis
35
39
  recordToolCall(session, event.toolName);
@@ -46,6 +50,14 @@ export function createBeforeToolCallHandler(state: GlobalPluginState) {
46
50
  };
47
51
  }
48
52
 
53
+ // API cycle cap (claw-code pattern: max_iterations — conversation.rs:119)
54
+ if (session.apiCycleCount > API_CYCLE_CAP) {
55
+ return {
56
+ block: true,
57
+ blockReason: `Hard API cycle cap (${API_CYCLE_CAP}) reached. Deliver your answer now.`,
58
+ };
59
+ }
60
+
49
61
  // Tool limit
50
62
  if (session.toolCallCount > session.toolLimit) {
51
63
  return {
@@ -54,14 +66,49 @@ export function createBeforeToolCallHandler(state: GlobalPluginState) {
54
66
  };
55
67
  }
56
68
 
69
+ // Intent-based tool gating (claw-code pattern: simple_mode/MCP toggle — tools.py:62-72)
70
+ // On skipRetrieval turns, recall has nothing to add — context was skipped intentionally
71
+ if (event.toolName === "recall" && session.currentConfig?.skipRetrieval) {
72
+ return {
73
+ block: true,
74
+ blockReason: "Context retrieval was skipped this turn (continuation/trivial input). " +
75
+ "Recall would return the same results as previous turns. Continue with what you have.",
76
+ };
77
+ }
78
+
79
+ // Redundant recall blocker (claw-code pattern: _infer_permission_denials — runtime.py:169-174)
80
+ // Block recall when its query would return the same results as context retrieval
81
+ if (event.toolName === "recall" && session.lastQueryVec) {
82
+ const recallQuery = (event.params as { query?: string }).query;
83
+ if (recallQuery && typeof recallQuery === "string" && recallQuery.length > 5) {
84
+ try {
85
+ const recallVec = await state.embeddings.embed(recallQuery);
86
+ const sim = cosineSimilarity(session.lastQueryVec, recallVec);
87
+ if (sim > RECALL_SIMILARITY_THRESHOLD) {
88
+ return {
89
+ block: true,
90
+ blockReason:
91
+ `This recall query is ${(sim * 100).toFixed(0)}% similar to the context already retrieved this turn. ` +
92
+ "The results are in <graph_context> above. Read what you have. " +
93
+ "Only call recall with a DIFFERENT query targeting something specific not already covered.",
94
+ };
95
+ }
96
+ } catch { /* fail-open: allow recall if embedding fails */ }
97
+ }
98
+ }
99
+
57
100
  // Planning gate: model must output text before first tool call
58
101
  if (textLengthSoFar === 0 && toolIndex === 0) {
102
+ const retrievalNote = session.lastRetrievalSummary
103
+ ? `\nContext already injected: ${session.lastRetrievalSummary}. Read <graph_context> before calling tools.`
104
+ : "";
59
105
  return {
60
106
  block: true,
61
107
  blockReason:
62
108
  "PLANNING GATE — You must announce your plan before making tool calls.\n" +
63
109
  "1. Classify: LOOKUP (3 calls max), EDIT (4 max), REFACTOR (8 max)\n" +
64
- "2. STATE WHAT YOU ALREADY KNOW from injected memory/context — if you have prior knowledge about these files, say so\n" +
110
+ "2. STATE WHAT YOU ALREADY KNOW from injected memory/context — if you have prior knowledge about these files, say so" +
111
+ retrievalNote + "\n" +
65
112
  "3. List each planned call and what SPECIFIC GAP it fills that memory doesn't cover\n" +
66
113
  "4. Every step still happens, but COMBINED. Edit + test in one bash call, not two.\n" +
67
114
  "If injected context already answers the question, you may need ZERO tool calls.\n" +
@@ -33,14 +33,34 @@ export function createLlmOutputHandler(state: GlobalPluginState) {
33
33
  // Measure assistant text output (used for token estimation and planning gate)
34
34
  const textLen = event.assistantTexts.reduce((s, t) => s + t.length, 0);
35
35
 
36
- // Extract token counts — fall back to text-length estimate when provider
37
- // doesn't report usage (OpenClaw often passes 0 or undefined)
38
- let inputTokens = event.usage?.input ?? 0;
39
- let outputTokens = event.usage?.output ?? 0;
40
- if (inputTokens + outputTokens === 0 && textLen > 0) {
41
- outputTokens = Math.ceil(textLen / 4); // ~4 chars per token
36
+ // Extract token counts — OpenClaw's getUsageTotals() returns CUMULATIVE totals
37
+ // across all API calls in the session, not per-response values.
38
+ // Compute the delta since last call to avoid quadratic overcounting.
39
+ const reportedInput = event.usage?.input ?? 0;
40
+ const reportedOutput = event.usage?.output ?? 0;
41
+ const reportedCacheRead = event.usage?.cacheRead ?? 0;
42
+ const reportedCacheWrite = event.usage?.cacheWrite ?? 0;
43
+ const reportedTotal = reportedInput + reportedOutput + reportedCacheRead + reportedCacheWrite;
44
+
45
+ let deltaTokens: number;
46
+ if (reportedTotal > 0) {
47
+ deltaTokens = Math.max(0, reportedTotal - session.lastSeenUsageTotal);
48
+ session.lastSeenUsageTotal = reportedTotal;
49
+ } else if (textLen > 0) {
50
+ // No usage data — fall back to text-length estimate
51
+ deltaTokens = Math.ceil(textLen / 4); // ~4 chars per token
52
+ } else {
53
+ deltaTokens = 0;
42
54
  }
43
55
 
56
+ // DB stats: approximate input/output split from the delta
57
+ const inputTokens = reportedTotal > 0 && deltaTokens > 0
58
+ ? Math.round(deltaTokens * (reportedInput / reportedTotal))
59
+ : 0;
60
+ const outputTokens = reportedTotal > 0 && deltaTokens > 0
61
+ ? Math.round(deltaTokens * (reportedOutput / reportedTotal))
62
+ : (deltaTokens > 0 ? deltaTokens : Math.ceil(textLen / 4));
63
+
44
64
  // Always update session stats — turn_count must increment even without usage data
45
65
  if (session.surrealSessionId) {
46
66
  try {
@@ -55,8 +75,8 @@ export function createLlmOutputHandler(state: GlobalPluginState) {
55
75
  }
56
76
 
57
77
  // Accumulate for daemon batching and mid-session cleanup
58
- session.newContentTokens += inputTokens + outputTokens;
59
- session.cumulativeTokens += inputTokens + outputTokens;
78
+ session.newContentTokens += deltaTokens;
79
+ session.cumulativeTokens += deltaTokens;
60
80
 
61
81
  // Track accumulated text output for planning gate
62
82
  session.turnTextLength += textLen;
@@ -0,0 +1,142 @@
1
+ /**
2
+ * subagent_spawned / subagent_ended hooks — track spawned subagents in the graph.
3
+ *
4
+ * Creates `subagent` records and `spawned` edges (session → subagent).
5
+ * Updates subagent records with outcome on completion.
6
+ */
7
+
8
+ import type { GlobalPluginState } from "../state.js";
9
+ import { swallow } from "../errors.js";
10
+
11
+ // ── Event shapes (from OpenClaw gateway) ─────────────────────────────────
12
+
13
+ interface SubagentSpawnedEvent {
14
+ runId: string;
15
+ childSessionKey: string;
16
+ agentId?: string;
17
+ label?: string;
18
+ requester?: {
19
+ channel?: string;
20
+ accountId?: string;
21
+ to?: string;
22
+ threadId?: string;
23
+ };
24
+ threadRequested?: boolean;
25
+ mode?: string; // "run" | "session"
26
+ }
27
+
28
+ interface SubagentSpawnedContext {
29
+ runId: string;
30
+ childSessionKey: string;
31
+ requesterSessionKey?: string;
32
+ }
33
+
34
+ interface SubagentEndedEvent {
35
+ targetSessionKey: string;
36
+ targetKind?: string;
37
+ reason?: string;
38
+ sendFarewell?: boolean;
39
+ accountId?: string;
40
+ runId: string;
41
+ endedAt?: string;
42
+ outcome?: string;
43
+ error?: string;
44
+ }
45
+
46
+ interface SubagentEndedContext {
47
+ runId: string;
48
+ childSessionKey: string;
49
+ requesterSessionKey?: string;
50
+ }
51
+
52
+ // ── Handlers ─────────────────────────────────────────────────────────────
53
+
54
+ export function createSubagentSpawnedHandler(state: GlobalPluginState) {
55
+ return async (event: SubagentSpawnedEvent, ctx: SubagentSpawnedContext) => {
56
+ try {
57
+ const store = state.store;
58
+
59
+ // Create the subagent record
60
+ const rows = await store.queryFirst<{ id: string }>(
61
+ `CREATE subagent CONTENT {
62
+ run_id: $run_id,
63
+ parent_session_key: $parent_key,
64
+ child_session_key: $child_key,
65
+ parent_session_id: $parent_key,
66
+ child_session_id: $child_key,
67
+ agent_id: $agent_id,
68
+ label: $label,
69
+ mode: $mode,
70
+ task: $label,
71
+ status: "running",
72
+ created_at: time::now()
73
+ } RETURN id`,
74
+ {
75
+ run_id: event.runId,
76
+ parent_key: ctx.requesterSessionKey ?? "unknown",
77
+ child_key: event.childSessionKey,
78
+ agent_id: event.agentId ?? "default",
79
+ label: event.label ?? null,
80
+ mode: event.mode ?? "run",
81
+ },
82
+ );
83
+
84
+ const subagentId = String(rows[0]?.id ?? "");
85
+ if (!subagentId) return;
86
+
87
+ // Find the parent's surreal session ID to create the spawned edge.
88
+ // The requesterSessionKey is the OpenClaw session key — we need to
89
+ // find the matching surreal session record.
90
+ if (ctx.requesterSessionKey) {
91
+ // Look up active session state first (fast path)
92
+ const parentSession = state.getSession(ctx.requesterSessionKey);
93
+ if (parentSession?.surrealSessionId) {
94
+ await store.relate(parentSession.surrealSessionId, "spawned", subagentId);
95
+ } else {
96
+ // Fallback: find the most recent session record that's still active
97
+ const sessions = await store.queryFirst<{ id: string }>(
98
+ `SELECT id FROM session
99
+ WHERE ended_at IS NONE
100
+ ORDER BY started_at DESC LIMIT 1`,
101
+ );
102
+ if (sessions.length > 0) {
103
+ await store.relate(String(sessions[0].id), "spawned", subagentId);
104
+ }
105
+ }
106
+ }
107
+ } catch (e) {
108
+ swallow.warn("hook:subagentSpawned", e);
109
+ }
110
+ };
111
+ }
112
+
113
+ export function createSubagentEndedHandler(state: GlobalPluginState) {
114
+ return async (event: SubagentEndedEvent, ctx: SubagentEndedContext) => {
115
+ try {
116
+ const store = state.store;
117
+
118
+ // Update the subagent record by run_id
119
+ await store.queryExec(
120
+ `UPDATE subagent SET
121
+ status = $status,
122
+ outcome = $outcome,
123
+ error = $error,
124
+ reason = $reason,
125
+ ended_at = $ended_at
126
+ WHERE run_id = $run_id`,
127
+ {
128
+ run_id: event.runId,
129
+ status: event.outcome === "success" ? "completed"
130
+ : event.reason === "spawn-failed" ? "error"
131
+ : event.outcome ?? "completed",
132
+ outcome: event.outcome ?? null,
133
+ error: event.error ?? null,
134
+ reason: event.reason ?? null,
135
+ ended_at: event.endedAt ?? new Date().toISOString(),
136
+ },
137
+ );
138
+ } catch (e) {
139
+ swallow.warn("hook:subagentEnded", e);
140
+ }
141
+ };
142
+ }
package/src/index.ts CHANGED
@@ -20,6 +20,7 @@ import { createBeforePromptBuildHandler } from "./hooks/before-prompt-build.js";
20
20
  import { createBeforeToolCallHandler } from "./hooks/before-tool-call.js";
21
21
  import { createAfterToolCallHandler } from "./hooks/after-tool-call.js";
22
22
  import { createLlmOutputHandler } from "./hooks/llm-output.js";
23
+ import { createSubagentSpawnedHandler, createSubagentEndedHandler } from "./hooks/subagent-lifecycle.js";
23
24
  import { startMemoryDaemon } from "./daemon-manager.js";
24
25
  import { seedIdentity } from "./identity.js";
25
26
  import { seedCognitiveBootstrap } from "./cognitive-bootstrap.js";
@@ -421,6 +422,8 @@ export default definePluginEntry({
421
422
  api.on("before_tool_call", createBeforeToolCallHandler(globalState));
422
423
  api.on("after_tool_call", createAfterToolCallHandler(globalState));
423
424
  api.on("llm_output", createLlmOutputHandler(globalState));
425
+ api.on("subagent_spawned", createSubagentSpawnedHandler(globalState));
426
+ api.on("subagent_ended", createSubagentEndedHandler(globalState));
424
427
  }
425
428
 
426
429
  // ── Session lifecycle (also register once) ─────────────────────────
package/src/state.ts CHANGED
@@ -62,6 +62,9 @@ export class SessionState {
62
62
  cumulativeTokens = 0;
63
63
  lastCleanupTokens = 0;
64
64
  midSessionCleanupThreshold = 25_000;
65
+ /** Last cumulative usage total seen from OpenClaw — used to compute per-call deltas
66
+ * since getUsageTotals() returns running totals, not per-response values. */
67
+ lastSeenUsageTotal = 0;
65
68
 
66
69
  // Cleanup tracking
67
70
  cleanedUp = false;
@@ -72,6 +75,17 @@ export class SessionState {
72
75
  // Pending tool args for artifact tracking
73
76
  readonly pendingToolArgs = new Map<string, unknown>();
74
77
 
78
+ // Tool call optimization state (claw-code patterns)
79
+ /** Query vector from this turn's context retrieval — used to detect redundant recall calls. */
80
+ lastQueryVec: number[] | null = null;
81
+ /** Summary of what graphTransformContext injected — shown in planning gate. */
82
+ lastRetrievalSummary = "";
83
+ /** API request cycle counter — hard cap prevents runaway token spend. */
84
+ apiCycleCount = 0;
85
+ /** Tracks which static context sections the model has already seen in the conversation window.
86
+ * Persists across turns (NOT cleared in resetTurn) — cleared only when messages drop from window. */
87
+ readonly injectedSections = new Set<string>();
88
+
75
89
  // 5-pillar IDs (populated at bootstrap)
76
90
  agentId = "";
77
91
  projectId = "";
@@ -92,6 +106,10 @@ export class SessionState {
92
106
  this.softInterrupted = false;
93
107
  this.turnStartMs = Date.now();
94
108
  this.pendingThinking.length = 0;
109
+ this.lastRetrievalSummary = "";
110
+ this.apiCycleCount = 0;
111
+ // NOTE: lastQueryVec and injectedSections are NOT cleared here —
112
+ // they persist across turns within the session.
95
113
  }
96
114
  }
97
115
 
@@ -46,7 +46,7 @@ export function createCoreMemoryToolDef(state: GlobalPluginState, session: Sessi
46
46
  }
47
47
  const formatted = entries.map((e, i) => {
48
48
  const sid = e.session_id ? ` session:${e.session_id}` : "";
49
- return `${i + 1}. [T${e.tier}/${e.category}/p${e.priority}${sid}] ${e.id}\n ${e.text.slice(0, 200)}`;
49
+ return `${i + 1}. [T${e.tier}/${e.category}/p${e.priority}${sid}] ${e.id}\n ${e.text.slice(0, 120)}`;
50
50
  }).join("\n\n");
51
51
  return {
52
52
  content: [{ type: "text" as const, text: `${entries.length} core memory entries:\n\n${formatted}` }],
@@ -73,6 +73,8 @@ export function createCoreMemoryToolDef(state: GlobalPluginState, session: Sessi
73
73
  details: { error: true },
74
74
  };
75
75
  }
76
+ // Invalidate cached section so updated content re-injects next turn
77
+ session.injectedSections.delete(tier === 0 ? "tier0" : "tier1");
76
78
  return {
77
79
  content: [{ type: "text" as const, text: `Created core memory: ${id} (tier ${tier}, ${params.category ?? "general"}, p${params.priority ?? 50})` }],
78
80
  details: { id },
@@ -95,6 +97,9 @@ export function createCoreMemoryToolDef(state: GlobalPluginState, session: Sessi
95
97
  details: { error: true },
96
98
  };
97
99
  }
100
+ // Invalidate both tiers — update may have changed the tier
101
+ session.injectedSections.delete("tier0");
102
+ session.injectedSections.delete("tier1");
98
103
  return {
99
104
  content: [{ type: "text" as const, text: `Updated core memory: ${params.id}` }],
100
105
  details: { id: params.id },
@@ -106,6 +111,9 @@ export function createCoreMemoryToolDef(state: GlobalPluginState, session: Sessi
106
111
  return { content: [{ type: "text" as const, text: "Error: 'id' is required for deactivate action." }], details: null };
107
112
  }
108
113
  await store.deleteCoreMemory(params.id);
114
+ // Invalidate both tiers so removal is reflected next turn
115
+ session.injectedSections.delete("tier0");
116
+ session.injectedSections.delete("tier1");
109
117
  return {
110
118
  content: [{ type: "text" as const, text: `Deactivated core memory: ${params.id}` }],
111
119
  details: { id: params.id },
@@ -34,7 +34,7 @@ export function createRecallToolDef(state: GlobalPluginState, session: SessionSt
34
34
  return { content: [{ type: "text" as const, text: "Memory system unavailable." }], details: null };
35
35
  }
36
36
 
37
- const maxResults = Math.min(params.limit ?? 5, 15);
37
+ const maxResults = Math.min(params.limit ?? 3, 15);
38
38
 
39
39
  try {
40
40
  const queryVec = await embeddings.embed(params.query);
@@ -87,7 +87,7 @@ export function createRecallToolDef(state: GlobalPluginState, session: SessionSt
87
87
  const tag = r.table === "turn" ? `[${r.role ?? "turn"}]` : `[${r.table}]`;
88
88
  const time = r.timestamp ? ` (${new Date(r.timestamp).toLocaleDateString()})` : "";
89
89
  const score = r.score ? ` score:${r.score.toFixed(2)}` : "";
90
- return `${i + 1}. ${tag}${time}${score}\n ${(r.text ?? "").slice(0, 500)}`;
90
+ return `${i + 1}. ${tag}${time}${score}\n ${(r.text ?? "").slice(0, 300)}`;
91
91
  }).join("\n\n");
92
92
 
93
93
  return {