kongbrain 0.4.1 → 0.4.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -23,11 +23,29 @@ import { getCachedContext, recordPrefetchHit, recordPrefetchMiss } from "./prefe
23
23
  import { stageRetrieval, getHistoricalUtilityBatch } from "./retrieval-quality.js";
24
24
  import { isACANActive, scoreWithACAN, type ACANCandidate } from "./acan.js";
25
25
  import { swallow } from "./errors.js";
26
+ import { log } from "./log.js";
26
27
 
27
28
  // ── Message type guards ────────────────────────────────────────────────────────
28
29
 
29
30
  type ContentBlock = TextContent | ThinkingContent | ToolCall | ImageContent;
30
31
 
32
+ /**
33
+ * Loose content block type for message stripping — covers the full range of
34
+ * block shapes that may appear in pi-ai messages beyond the typed union
35
+ * (e.g., toolResult blocks with nested content, image_url, source).
36
+ */
37
+ type AnyContentBlock = {
38
+ type: string;
39
+ text?: string;
40
+ thinking?: string;
41
+ media_type?: string;
42
+ content?: AnyContentBlock[];
43
+ [key: string]: unknown;
44
+ };
45
+
46
+ /** Mutable view of a message for in-place content stripping. */
47
+ type MutableMessage = { role: string; content: AnyContentBlock[] | string };
48
+
31
49
  function isUser(msg: AgentMessage): msg is UserMessage {
32
50
  return (msg as UserMessage).role === "user";
33
51
  }
@@ -56,15 +74,28 @@ function msgContentBlocks(msg: AgentMessage): ContentBlock[] {
56
74
 
57
75
  // ── Constants ──────────────────────────────────────────────────────────────────
58
76
 
59
- const CHARS_PER_TOKEN = 3.4;
60
- const BUDGET_FRACTION = 0.70;
61
- const CONVERSATION_SHARE = 0.50;
62
- const RETRIEVAL_SHARE = 0.30;
63
- const CORE_MEMORY_SHARE = 0.15;
77
+ // Token estimation ratios (aligned with Claude Code's roughTokenCountEstimation):
78
+ // - Prose/code: 4 bytes per token (claw-code default)
79
+ // - JSON (tool results, structured data): 2 bytes per token (denser single-char tokens)
80
+ // - Safety margin: 4/3 (33%) applied to aggregate estimates
81
+ const BYTES_PER_TOKEN = 4;
82
+ const BYTES_PER_TOKEN_JSON = 2;
83
+ const CHARS_PER_TOKEN = BYTES_PER_TOKEN; // backward compat alias for budget math
84
+ const TOKEN_SAFETY_MARGIN = 4 / 3;
85
+ const IMAGE_TOKEN_ESTIMATE = 2000; // claw-code: hardcoded for images/documents
86
+ const BUDGET_FRACTION = 0.325; // ~65k of 200k window (leaves ~135k for LLM generation + tool results)
87
+ const CONVERSATION_SHARE = 0.23; // ~15k for recent user/assistant exchanges
88
+ const RETRIEVAL_SHARE = 0.385; // ~25k for graph-curated context
89
+ const CORE_MEMORY_SHARE = 0.155; // ~10k for core memory/directives
90
+ const TOOL_HISTORY_SHARE = 0.23; // ~15k for recent tool results
64
91
  const CORE_MEMORY_TTL = 300_000;
65
- const MAX_ITEM_CHARS = 1200; // ~350 tokens per item cap (claw-code: MAX_INSTRUCTION_FILE_CHARS)
66
- const MIN_RELEVANCE_SCORE = 0.35;
67
- const MIN_COSINE = 0.25;
92
+ const MAX_ITEM_CHARS = 1200; // ~350 tokens per item (matches claw-code MAX_INSTRUCTION_FILE_CHARS)
93
+ const MIN_RELEVANCE_SCORE = 0.40; // Floor for graph-scored results after WMR/ACAN (tuned: cosine-heavy weights produce lower absolute scores)
94
+ const MIN_COSINE = 0.35; // Minimum cosine similarity to consider a result (raised from 0.25)
95
+
96
+ // Deduplication thresholds
97
+ const DEDUP_COSINE_THRESHOLD = 0.88;
98
+ const DEDUP_JACCARD_THRESHOLD = 0.80;
68
99
 
69
100
  // Recency decay
70
101
  const RECENCY_DECAY_FAST = 0.99;
@@ -87,20 +118,24 @@ const INTENT_REMINDER_THRESHOLD = 10;
87
118
 
88
119
  // ── Budget calculation ─────────────────────────────────────────────────────────
89
120
 
90
- interface Budgets {
121
+ /** @internal Exported for testing. */
122
+ export interface Budgets {
91
123
  conversation: number;
92
124
  retrieval: number;
93
125
  core: number;
126
+ toolHistory: number;
94
127
  maxContextItems: number;
95
128
  }
96
129
 
97
- function calcBudgets(contextWindow: number): Budgets {
130
+ /** Split the context window into 4 budgets: conversation, retrieval, core memory, and tool history. @internal */
131
+ export function calcBudgets(contextWindow: number): Budgets {
98
132
  const total = contextWindow * BUDGET_FRACTION;
99
133
  const retrieval = Math.round(total * RETRIEVAL_SHARE);
100
134
  return {
101
135
  conversation: Math.round(total * CONVERSATION_SHARE),
102
136
  retrieval,
103
137
  core: Math.round(total * CORE_MEMORY_SHARE),
138
+ toolHistory: Math.round(total * TOOL_HISTORY_SHARE),
104
139
  maxContextItems: Math.max(20, Math.round(retrieval / 300)),
105
140
  };
106
141
  }
@@ -150,32 +185,43 @@ function extractLastUserText(messages: AgentMessage[]): string | null {
150
185
  return null;
151
186
  }
152
187
 
153
- /** Estimate char count for a single content block (claw-code: per-block-type estimation). */
188
+ /** Estimate char-equivalent count for a single content block (claw-code: per-block-type estimation). */
154
189
  function blockCharLen(c: any): number {
155
190
  if (c.type === "text") return c.text.length;
156
191
  if (c.type === "thinking") return c.thinking.length;
157
192
  if (c.type === "toolCall") {
158
- // Count tool name + serialized args (claw-code: compact.rs:326-338)
159
- return (c.name?.length ?? 0) + (c.args ? JSON.stringify(c.args).length : 0);
193
+ // Tool name + serialized args — JSON is denser (2 bytes/token vs 4)
194
+ // Scale JSON args to char-equivalent at prose ratio
195
+ const argsJson = c.args ? JSON.stringify(c.args) : "";
196
+ const argsCharEquiv = argsJson.length * (BYTES_PER_TOKEN / BYTES_PER_TOKEN_JSON);
197
+ return (c.name?.length ?? 0) + argsCharEquiv;
160
198
  }
161
199
  if (c.type === "toolResult" && Array.isArray(c.content)) {
162
200
  let len = 0;
163
201
  for (const rc of c.content) {
164
- if (rc.type === "text") len += rc.text.length;
165
- else len += 100;
202
+ if (rc.type === "text") {
203
+ // Detect JSON-heavy tool results and scale accordingly
204
+ const text = rc.text ?? "";
205
+ const isJson = text.length > 20 && (text[0] === "{" || text[0] === "[");
206
+ len += isJson ? text.length * (BYTES_PER_TOKEN / BYTES_PER_TOKEN_JSON) : text.length;
207
+ } else {
208
+ // Images/documents: claw-code hardcodes 2000 tokens
209
+ len += IMAGE_TOKEN_ESTIMATE * BYTES_PER_TOKEN;
210
+ }
166
211
  }
167
212
  return len;
168
213
  }
169
- return 100; // image, etc.
214
+ return IMAGE_TOKEN_ESTIMATE * BYTES_PER_TOKEN; // image, document, etc.
170
215
  }
171
216
 
172
217
  function estimateTokens(messages: AgentMessage[]): number {
173
218
  let chars = 0;
174
219
  for (const msg of messages) {
175
220
  for (const c of msgContentBlocks(msg)) chars += blockCharLen(c);
176
- chars += 4; // per-message structural overhead
221
+ chars += 20; // per-message structural overhead (role token, framing, separators)
177
222
  }
178
- return Math.ceil(chars / CHARS_PER_TOKEN);
223
+ // Apply safety margin (claw-code: 4/3 multiplier on rough estimates)
224
+ return Math.ceil((chars / CHARS_PER_TOKEN) * TOKEN_SAFETY_MARGIN);
179
225
  }
180
226
 
181
227
  function msgCharLen(msg: AgentMessage): number {
@@ -212,6 +258,7 @@ function accessBoost(accessCount: number | undefined): number {
212
258
  return Math.log1p(accessCount ?? 0);
213
259
  }
214
260
 
261
+ /** Dot-product cosine similarity between two equal-length vectors. Returns 0 if either has zero magnitude. */
215
262
  export function cosineSimilarity(a: number[], b: number[]): number {
216
263
  let dot = 0, magA = 0, magB = 0;
217
264
  for (let i = 0; i < a.length; i++) {
@@ -241,35 +288,15 @@ function buildRulesSuffix(session: SessionState): string {
241
288
  );
242
289
  }
243
290
 
244
- // First time — full examples
291
+ // First time — compact rules (no verbose examples)
245
292
  session.injectedSections.add("rules_full");
246
293
  return (
247
294
  "\n<rules_reminder>" +
248
295
  `\nBudget: ${session.toolCallCount} used, ${remaining} remaining.${urgency}` +
249
- "\n\nYOUR BUDGET IS SMALL. Plan the whole task, not just the next call." +
250
- "\n" +
251
- "\nTask: Fix broken import" +
252
- "\n WASTEFUL (6 calls): grep old read file grep new → read context → edit → read to verify" +
253
- "\n DENSE (2 calls):" +
254
- "\n 1. grep -n 'oldImport' src/**/*.ts; grep -rn 'newModule' src/" +
255
- "\n 2. edit file && npm test -- --grep 'relevant' 2>&1 | tail -20" +
256
- "\n" +
257
- "\nTask: Debug failing test" +
258
- "\n WASTEFUL (8 calls): run test → read output → read test → read source → grep → read more → edit → rerun" +
259
- "\n DENSE (3 calls):" +
260
- "\n 1. npm test 2>&1 | tail -30" +
261
- "\n 2. grep -n 'failingTest\\|relevantFn' test/*.ts src/*.ts" +
262
- "\n 3. edit fix && npm test 2>&1 | tail -15" +
263
- "\n" +
264
- "\nTask: Read/understand multiple files" +
265
- "\n WASTEFUL (10 calls): cat file1 → cat file2 → cat file3 → ..." +
266
- "\n DENSE (1-2 calls):" +
267
- "\n 1. head -80 src/a.ts src/b.ts src/c.ts src/d.ts (4 files in ONE call)" +
268
- "\n 2. grep -n 'keyPattern' src/*.ts (search all files at once, not one by one)" +
269
- "\n" +
270
- "\nEvery step still happens — investigation, edit, verification — but COMBINED into fewer calls." +
271
- "\nThe answer is often already in context. Don't call if you already know." +
272
- "\nAnnounce: task type (LOOKUP=1/EDIT=2/REFACTOR=6), planned calls, what each does." +
296
+ "\nClassify: LOOKUP(≤3) | EDIT(≤4) | REFACTOR(≤8). Announce type + plan before tools." +
297
+ "\nCombine: grep+grep in 1 call, edit+test in 1 bash. Read multiple files in 1 call." +
298
+ "\nSkip: if <graph_context> already answers it, zero calls needed." +
299
+ "\nBe dense: lead with answer, no filler, no repeating context back." +
273
300
  "\n</rules_reminder>"
274
301
  );
275
302
  }
@@ -303,43 +330,18 @@ function injectRulesSuffix(messages: AgentMessage[], session: SessionState): Age
303
330
 
304
331
  async function buildContextualQueryVec(
305
332
  queryText: string,
306
- messages: AgentMessage[],
333
+ _messages: AgentMessage[],
307
334
  embeddings: EmbeddingService,
335
+ session?: SessionState,
308
336
  ): Promise<number[]> {
309
- const queryVec = await embeddings.embed(queryText);
310
-
311
- const recentTexts: string[] = [];
312
- for (let i = messages.length - 2; i >= 0 && recentTexts.length < 3; i--) {
313
- const msg = messages[i] as UserMessage | AssistantMessage;
314
- if (msg.role === "user" || msg.role === "assistant") {
315
- const text = extractText(msg);
316
- if (text && text.length > 10) {
317
- recentTexts.push(text.slice(0, 500));
318
- }
319
- }
320
- }
321
-
322
- if (recentTexts.length === 0) return queryVec;
323
-
324
- try {
325
- const recentVecs = await Promise.all(recentTexts.map((t) => embeddings.embed(t)));
326
- const dim = queryVec.length;
327
- const blended = new Array(dim).fill(0);
328
- const queryWeight = 2;
329
- const totalWeight = queryWeight + recentVecs.length;
330
-
331
- for (let d = 0; d < dim; d++) {
332
- blended[d] = queryVec[d] * queryWeight;
333
- for (const rv of recentVecs) {
334
- blended[d] += rv[d];
335
- }
336
- blended[d] /= totalWeight;
337
- }
338
- return blended;
339
- } catch (e) {
340
- swallow.warn("graph-context:contextualQuery", e);
341
- return queryVec;
337
+ // Reuse embedding from ingest if available (same user message, already embedded)
338
+ if (session?.lastUserEmbedding) {
339
+ return session.lastUserEmbedding;
342
340
  }
341
+ // Fallback: embed the query text (first turn, or ingest didn't fire yet)
342
+ return embeddings.embed(queryText);
343
+ // Note: removed the 3-message "blend" — pure query vector is sufficient for retrieval
344
+ // and saves 1-3 embedding calls per turn (~15-200ms)
343
345
  }
344
346
 
345
347
  // ── Scoring ────────────────────────────────────────────────────────────────────
@@ -355,7 +357,11 @@ async function scoreResults(
355
357
  .filter((r) => r.table === "memory" || r.table === "concept")
356
358
  .map((r) => r.id);
357
359
 
358
- const cacheEntries = await store.getUtilityCacheEntries(eligibleIds);
360
+ // Parallelize independent DB lookups (utility cache + reflection sessions)
361
+ const [cacheEntries, reflectedSessions] = await Promise.all([
362
+ store.getUtilityCacheEntries(eligibleIds),
363
+ store.getReflectionSessionIds(),
364
+ ]);
359
365
 
360
366
  const preFiltered = results.filter((r) => {
361
367
  const entry = cacheEntries.get(r.id);
@@ -371,8 +377,6 @@ async function scoreResults(
371
377
  if (utilityMap.size === 0 && eligibleIds.length > 0) {
372
378
  utilityMap = await getHistoricalUtilityBatch(eligibleIds);
373
379
  }
374
-
375
- const reflectedSessions = await store.getReflectionSessionIds();
376
380
  const floor = INTENT_SCORE_FLOORS[currentIntent] ?? SCORE_FLOOR_DEFAULT;
377
381
 
378
382
  // ACAN path
@@ -413,8 +417,8 @@ async function scoreResults(
413
417
  const reflectionBoost = r.sessionId ? (reflectedSessions.has(r.sessionId) ? 1.0 : 0) : 0;
414
418
 
415
419
  const finalScore =
416
- 0.27 * cosine + 0.28 * recency + 0.05 * importance +
417
- 0.05 * access + 0.10 * neighborBonus + 0.15 * provenUtility +
420
+ 0.35 * cosine + 0.18 * recency + 0.07 * importance +
421
+ 0.02 * access + 0.10 * neighborBonus + 0.18 * provenUtility +
418
422
  0.10 * reflectionBoost - utilityPenalty;
419
423
 
420
424
  return { ...r, finalScore, fromNeighbor: neighborIds.has(r.id) };
@@ -426,23 +430,30 @@ async function scoreResults(
426
430
  // ── Deduplication ──────────────────────────────────────────────────────────────
427
431
 
428
432
  function deduplicateResults(ranked: ScoredResult[]): ScoredResult[] {
433
+ // Pre-compute word sets to avoid re-splitting in O(n^2) inner loop
434
+ const wordSets = ranked.map(r =>
435
+ new Set((r.text ?? "").toLowerCase().split(/\s+/).filter((w) => w.length > 2)),
436
+ );
429
437
  const kept: ScoredResult[] = [];
430
- for (const item of ranked) {
438
+ const keptIndexes: number[] = [];
439
+ for (let i = 0; i < ranked.length; i++) {
440
+ const item = ranked[i];
431
441
  let isDup = false;
432
- for (const existing of kept) {
442
+ for (const ki of keptIndexes) {
443
+ const existing = ranked[ki];
433
444
  if (item.embedding?.length && existing.embedding?.length
434
445
  && item.embedding.length === existing.embedding.length) {
435
- if (cosineSimilarity(item.embedding, existing.embedding) > 0.88) { isDup = true; break; }
446
+ if (cosineSimilarity(item.embedding, existing.embedding) > DEDUP_COSINE_THRESHOLD) { isDup = true; break; }
436
447
  continue;
437
448
  }
438
- const words = new Set((item.text ?? "").toLowerCase().split(/\s+/).filter((w) => w.length > 2));
439
- const eWords = new Set((existing.text ?? "").toLowerCase().split(/\s+/).filter((w) => w.length > 2));
449
+ const words = wordSets[i];
450
+ const eWords = wordSets[ki];
440
451
  let intersection = 0;
441
452
  for (const w of words) { if (eWords.has(w)) intersection++; }
442
453
  const union = words.size + eWords.size - intersection;
443
- if (union > 0 && intersection / union > 0.80) { isDup = true; break; }
454
+ if (union > 0 && intersection / union > DEDUP_JACCARD_THRESHOLD) { isDup = true; break; }
444
455
  }
445
- if (!isDup) kept.push(item);
456
+ if (!isDup) { kept.push(item); keptIndexes.push(i); }
446
457
  }
447
458
  return kept;
448
459
  }
@@ -514,24 +525,17 @@ function formatTierSection(entries: CoreMemoryEntry[], label: string): string {
514
525
  function buildSystemPromptSection(session: SessionState, tier0Entries: CoreMemoryEntry[]): string | undefined {
515
526
  const parts: string[] = [];
516
527
 
517
- // IKONG architecture description (static, ~120 tokens)
528
+ // Graph pillar IDs (compact the model doesn't need architecture descriptions)
518
529
  const pillarLines: string[] = [];
519
530
  if (session.agentId) pillarLines.push(`Agent: ${session.agentId}`);
520
531
  if (session.projectId) pillarLines.push(`Project: ${session.projectId}`);
521
532
  if (session.taskId) pillarLines.push(`Task: ${session.taskId}`);
522
533
  if (pillarLines.length > 0) {
523
- parts.push(
524
- "GRAPH PILLARS (your structural context):\n" +
525
- ` ${pillarLines.join(" | ")}\n` +
526
- " IKONG cognitive architecture:\n" +
527
- " I(ntelligence): intent classification → adaptive orchestration per turn\n" +
528
- " K(nowledge): memory graph, concepts, skills, reflections, identity chunks\n" +
529
- " O(peration): tool execution, skill procedures, causal chain tracking\n" +
530
- " N(etwork): graph traversal, cross-pillar edges, neighbor expansion\n" +
531
- " G(raph): SurrealDB persistence, vector search, BGE-M3 embeddings",
532
- );
534
+ parts.push(`GRAPH PILLARS: ${pillarLines.join(" | ")}`);
533
535
  }
534
536
 
537
+ // Token-density rules are in buildRulesSuffix (injected per-turn) — no duplication here
538
+
535
539
  // Tier 0 core directives (semi-static, changes rarely)
536
540
  const t0Section = formatTierSection(tier0Entries, "CORE DIRECTIVES (always loaded, never evicted)");
537
541
  if (t0Section) parts.push(t0Section);
@@ -597,7 +601,7 @@ async function formatContextMessage(
597
601
 
598
602
  const sections: string[] = [];
599
603
 
600
- // Pillar context — structural awareness of who/what/where
604
+ // Pillar context — structural IDs only (architecture description is unnecessary token spend)
601
605
  // Skip if model already has it in the conversation window (claw-code static section dedup)
602
606
  if (!session.injectedSections.has("ikong")) {
603
607
  const pillarLines: string[] = [];
@@ -605,16 +609,7 @@ async function formatContextMessage(
605
609
  if (session.projectId) pillarLines.push(`Project: ${session.projectId}`);
606
610
  if (session.taskId) pillarLines.push(`Task: ${session.taskId}`);
607
611
  if (pillarLines.length > 0) {
608
- sections.push(
609
- "GRAPH PILLARS (your structural context):\n" +
610
- ` ${pillarLines.join(" | ")}\n` +
611
- " IKONG cognitive architecture:\n" +
612
- " I(ntelligence): intent classification → adaptive orchestration per turn\n" +
613
- " K(nowledge): memory graph, concepts, skills, reflections, identity chunks\n" +
614
- " O(peration): tool execution, skill procedures, causal chain tracking\n" +
615
- " N(etwork): graph traversal, cross-pillar edges, neighbor expansion\n" +
616
- " G(raph): SurrealDB persistence, vector search, BGE-M3 embeddings",
617
- );
612
+ sections.push(`GRAPH PILLARS: ${pillarLines.join(" | ")}`);
618
613
  session.injectedSections.add("ikong");
619
614
  }
620
615
  }
@@ -648,8 +643,10 @@ async function formatContextMessage(
648
643
  clearPendingDirectives(session);
649
644
  }
650
645
 
651
- // Fibonacci resurfacing
652
- try {
646
+ // Fibonacci resurfacing — only during conversational intents (noise during deep code work)
647
+ const RESURFACE_INTENTS = new Set(["simple-question", "meta-session", "unknown"]);
648
+ const currentIntent = session.currentConfig?.intent ?? "unknown";
649
+ if (RESURFACE_INTENTS.has(currentIntent)) try {
653
650
  const dueMemories = await store.getDueMemories(3);
654
651
  if (dueMemories.length > 0) {
655
652
  const memLines = dueMemories.map((m: any) => {
@@ -659,14 +656,7 @@ async function formatContextMessage(
659
656
  return ` - [${m.id}] (${ageStr}, surfaced ${m.surface_count}x): ${m.text}`;
660
657
  }).join("\n");
661
658
  sections.push(
662
- `RESURFACING MEMORIES (Fibonacci schedule these are due for a mention):\n` +
663
- `These memories are important but fading. Bring them up naturally when appropriate:\n` +
664
- `- If mid-task on something important, wait until finished\n` +
665
- `- During casual interaction: "I was thinking..." or "remember when you mentioned..."\n` +
666
- `- If user engages: great! Continue that thread. The memory stays alive.\n` +
667
- `- If user ignores or dismisses: let it fade. Don't force it.\n` +
668
- `- NEVER say "my memory system scheduled this" — just bring it up like a thought you had.\n` +
669
- memLines
659
+ `RESURFACING MEMORIES (mention naturally during conversation, never reveal scheduling):\n` + memLines
670
660
  );
671
661
  }
672
662
  } catch { /* non-critical */ }
@@ -748,11 +738,20 @@ function truncateToolResult(msg: AgentMessage, maxChars: number): AgentMessage {
748
738
  return { ...msg, content };
749
739
  }
750
740
 
751
- function getRecentTurns(messages: AgentMessage[], maxTokens: number, contextWindow: number, session?: SessionState): AgentMessage[] {
752
- const budgetChars = maxTokens * CHARS_PER_TOKEN;
753
- const TOOL_RESULT_MAX = Math.round(contextWindow * 0.03);
754
-
755
- // Transform error messages into compact annotations
741
+ function getRecentTurns(
742
+ messages: AgentMessage[],
743
+ convTokens: number,
744
+ toolTokens: number,
745
+ contextWindow: number,
746
+ session?: SessionState,
747
+ ): AgentMessage[] {
748
+ const convBudgetChars = convTokens * CHARS_PER_TOKEN;
749
+ const toolBudgetChars = toolTokens * CHARS_PER_TOKEN;
750
+ // Per-tool-result char cap (claw-code: DEFAULT_MAX_RESULT_SIZE_CHARS = 50,000)
751
+ // Scale with context window but floor at 20k, cap at 50k
752
+ const TOOL_RESULT_MAX = Math.min(50_000, Math.max(20_000, Math.round(contextWindow * 0.10)));
753
+
754
+ // ── Phase 1: Transform error messages into compact annotations ──
756
755
  const clean = messages.map((m) => {
757
756
  if (isAssistant(m) && m.stopReason === "error") {
758
757
  const errorText = m.content
@@ -769,12 +768,88 @@ function getRecentTurns(messages: AgentMessage[], maxTokens: number, contextWind
769
768
  return m;
770
769
  });
771
770
 
772
- // Group messages into structural units
771
+ // ── Phase 2: Strip token-heavy content from non-recent messages ──
772
+ // (claw-code patterns: microcompact content-clearing, image stripping, thinking clearing)
773
+ const RECENT_KEEP = 5; // keep last N groups fully intact
774
+ const msgCount = clean.length;
775
+
776
+ // Find recency boundary: messages in the last RECENT_KEEP groups stay intact
777
+ // We need to identify which messages are "old" vs "recent"
778
+ // Count groups from the end to find the boundary index
779
+ let recentBoundary = msgCount;
780
+ {
781
+ let groupsSeen = 0;
782
+ for (let k = clean.length - 1; k >= 0 && groupsSeen < RECENT_KEEP; k--) {
783
+ recentBoundary = k;
784
+ const msg = clean[k];
785
+ // Each user message or standalone assistant message starts a new group
786
+ if (isUser(msg) || (isAssistant(msg) && !msg.content.some((c: ContentBlock) => c.type === "toolCall"))) {
787
+ groupsSeen++;
788
+ } else if (isAssistant(msg) && msg.content.some((c: ContentBlock) => c.type === "toolCall")) {
789
+ groupsSeen++;
790
+ // Skip past associated tool results (they're part of this group)
791
+ }
792
+ }
793
+ }
794
+
795
+ // Apply stripping to messages before the recency boundary
796
+ for (let k = 0; k < recentBoundary; k++) {
797
+ const msg = clean[k] as MutableMessage;
798
+ if (!msg.content || !Array.isArray(msg.content)) continue;
799
+
800
+ // Collapse old assistant filler text (agentic loop: "I'll now read..." / "Let me check...")
801
+ // Keep tool calls intact but shrink prose to 1-line summary
802
+ if (isAssistant(clean[k]) && msg.content.some((c: AnyContentBlock) => c.type === "toolCall")) {
803
+ msg.content = msg.content.map((c: AnyContentBlock) => {
804
+ if (c.type === "text" && c.text && c.text.length > 120) {
805
+ // Keep first line as summary (usually the intent statement)
806
+ const firstLine = c.text.split("\n")[0].slice(0, 120);
807
+ return { ...c, text: firstLine };
808
+ }
809
+ if (c.type === "thinking") {
810
+ return { type: "text" as const, text: "[thinking]" };
811
+ }
812
+ return c; // preserve toolCall blocks
813
+ });
814
+ continue; // skip generic stripping for this message
815
+ }
816
+
817
+ msg.content = msg.content.map((c: AnyContentBlock) => {
818
+ // Strip thinking blocks → [thinking] marker (often 1-5k tokens each)
819
+ if (c.type === "thinking") {
820
+ return { type: "text" as const, text: "[thinking]" };
821
+ }
822
+ // Strip images → [image] marker (2000 tokens each)
823
+ if (c.type === "image" || c.type === "image_url" || (c.type === "source" && c.media_type?.startsWith("image/"))) {
824
+ return { type: "text" as const, text: "[image]" };
825
+ }
826
+ // Content-clear old tool results → stub (claw-code: microcompact pattern)
827
+ if (c.type === "toolResult" && Array.isArray(c.content)) {
828
+ const stub = c.content.map((rc: AnyContentBlock) => {
829
+ if (rc.type === "text" && rc.text && rc.text.length > 200) {
830
+ return { ...rc, text: `[Old tool result cleared — ${rc.text.length} chars]` };
831
+ }
832
+ if (rc.type === "image" || rc.type === "image_url") {
833
+ return { type: "text" as const, text: "[image]" };
834
+ }
835
+ return rc;
836
+ });
837
+ return { ...c, content: stub };
838
+ }
839
+ // For tool result messages (top-level), clear oversized text blocks
840
+ if (c.type === "text" && isToolResult(clean[k]) && c.text && c.text.length > 200) {
841
+ return { ...c, text: `[Old tool result cleared — ${c.text.length} chars]` };
842
+ }
843
+ return c;
844
+ });
845
+ }
846
+
847
+ // ── Phase 3: Group messages into structural units ──
773
848
  const groups: AgentMessage[][] = [];
774
849
  let i = 0;
775
850
  while (i < clean.length) {
776
851
  const msg = clean[i];
777
- if (isAssistant(msg) && msg.content.some((c: any) => c.type === "toolCall")) {
852
+ if (isAssistant(msg) && msg.content.some((c: ContentBlock) => c.type === "toolCall")) {
778
853
  const group: AgentMessage[] = [clean[i]];
779
854
  let j = i + 1;
780
855
  while (j < clean.length && isToolResult(clean[j])) {
@@ -800,17 +875,38 @@ function getRecentTurns(messages: AgentMessage[], maxTokens: number, contextWind
800
875
  }
801
876
  }
802
877
 
803
- // Take groups from end within budget
804
- const pinnedLen = pinnedGroup ? pinnedGroup.reduce((s, m) => s + msgCharLen(m), 0) : 0;
805
- const remainingBudget = budgetChars - pinnedLen;
806
- let used = 0;
878
+ // Measure pinned group against both budgets
879
+ let pinnedConv = 0;
880
+ let pinnedTool = 0;
881
+ if (pinnedGroup) {
882
+ for (const m of pinnedGroup) {
883
+ if (isToolResult(m)) pinnedTool += msgCharLen(m);
884
+ else pinnedConv += msgCharLen(m);
885
+ }
886
+ }
887
+
888
+ // Take groups from end within split budgets
889
+ const remainingConv = convBudgetChars - pinnedConv;
890
+ const remainingTool = toolBudgetChars - pinnedTool;
891
+ let convUsed = 0;
892
+ let toolUsed = 0;
807
893
  const selectedGroups: AgentMessage[][] = [];
808
894
  for (let g = groups.length - 1; g >= 0; g--) {
809
895
  if (g === pinnedGroupIdx) continue;
810
- const groupLen = groups[g].reduce((s, m) => s + msgCharLen(m), 0);
811
- if (used + groupLen > remainingBudget && selectedGroups.length > 0) break;
896
+ let groupConv = 0;
897
+ let groupTool = 0;
898
+ for (const m of groups[g]) {
899
+ if (isToolResult(m)) groupTool += msgCharLen(m);
900
+ else groupConv += msgCharLen(m);
901
+ }
902
+ // Stop if either budget would overflow (but always include at least one group)
903
+ if (selectedGroups.length > 0) {
904
+ if (convUsed + groupConv > remainingConv) break;
905
+ if (groupTool > 0 && toolUsed + groupTool > remainingTool) break;
906
+ }
812
907
  selectedGroups.unshift(groups[g]);
813
- used += groupLen;
908
+ convUsed += groupConv;
909
+ toolUsed += groupTool;
814
910
  }
815
911
 
816
912
  if (pinnedGroup && pinnedGroupIdx !== -1) {
@@ -852,8 +948,8 @@ export interface GraphTransformResult {
852
948
  }
853
949
 
854
950
  /**
855
- * Transform conversation messages using graph-based context retrieval.
856
- * This is the core "assemble" logic called from ContextEngine.assemble().
951
+ * Main entry point for graph-based context assembly. Retrieves, scores, deduplicates,
952
+ * and budget-trims graph nodes, then splices them into the conversation message array.
857
953
  */
858
954
  export async function graphTransformContext(
859
955
  params: GraphTransformParams,
@@ -866,18 +962,24 @@ export async function graphTransformContext(
866
962
  // Done here (wrapper) so it attaches to any inner return path.
867
963
  // (claw-code pattern: static sections above __SYSTEM_PROMPT_DYNAMIC_BOUNDARY__)
868
964
  let systemPromptSection: string | undefined;
965
+ let tier0ForSys: CoreMemoryEntry[] = [];
869
966
  try {
870
- const tier0ForSys = store.isAvailable()
967
+ tier0ForSys = store.isAvailable()
871
968
  ? applyCoreBudget(await store.getAllCoreMemory(0), getTier0BudgetChars(budgets))
872
969
  : [];
873
970
  systemPromptSection = buildSystemPromptSection(session, tier0ForSys);
971
+ // Mark sections as injected so formatContextMessage() skips them (prevents duplication)
972
+ if (systemPromptSection) {
973
+ if (systemPromptSection.includes("GRAPH PILLARS")) session.injectedSections.add("ikong");
974
+ if (systemPromptSection.includes("CORE DIRECTIVES")) session.injectedSections.add("tier0");
975
+ }
874
976
  } catch { /* non-critical — tier0 will still appear in user message */ }
875
977
 
876
978
  // Never throw — return raw messages on any failure
877
979
  try {
878
980
  const TRANSFORM_TIMEOUT_MS = 10_000;
879
981
  const result = await Promise.race([
880
- graphTransformInner(messages, session, store, embeddings, contextWindow, budgets, signal),
982
+ graphTransformInner(messages, session, store, embeddings, contextWindow, budgets, signal, tier0ForSys),
881
983
  new Promise<never>((_, reject) =>
882
984
  setTimeout(() => reject(new Error("graphTransformContext timed out")), TRANSFORM_TIMEOUT_MS),
883
985
  ),
@@ -885,7 +987,7 @@ export async function graphTransformContext(
885
987
  result.systemPromptSection = systemPromptSection;
886
988
  return result;
887
989
  } catch (err) {
888
- console.error("graphTransformContext fatal error, returning raw messages:", err);
990
+ log.error("graphTransformContext fatal error, returning raw messages:", err);
889
991
  return {
890
992
  messages,
891
993
  stats: {
@@ -912,6 +1014,8 @@ async function graphTransformInner(
912
1014
  contextWindow: number,
913
1015
  budgets: Budgets,
914
1016
  _signal?: AbortSignal,
1017
+ /** Tier 0 entries already fetched by wrapper — avoids double DB fetch. */
1018
+ tier0FromWrapper: CoreMemoryEntry[] = [],
915
1019
  ): Promise<GraphTransformResult> {
916
1020
  function makeStats(
917
1021
  sent: AgentMessage[], graphNodes: number, neighborNodes: number,
@@ -940,7 +1044,7 @@ async function graphTransformInner(
940
1044
  // Skip retrieval fast path — avoid DB queries entirely when model already has core memory
941
1045
  // (claw-code pattern: simple_mode skips the load, not load-then-discard)
942
1046
  if (skipRetrieval) {
943
- const recentTurns = getRecentTurns(messages, budgets.conversation, contextWindow, session);
1047
+ const recentTurns = getRecentTurns(messages, budgets.conversation, budgets.toolHistory, contextWindow, session);
944
1048
  // If model already saw core memory, just return recent turns + compressed rules. Zero DB queries.
945
1049
  if (session.injectedSections.has("tier0")) {
946
1050
  return { messages: injectRulesSuffix(recentTurns, session), stats: makeStats(recentTurns, 0, 0, recentTurns.length, "passthrough") };
@@ -956,7 +1060,7 @@ async function graphTransformInner(
956
1060
  tier0 = applyCoreBudget(tier0, getTier0BudgetChars(budgets));
957
1061
  tier1 = applyCoreBudget(tier1, getTier1BudgetChars(budgets));
958
1062
  } catch (e) {
959
- console.warn("[warn] Core memory load failed:", e);
1063
+ log.warn("Core memory load failed:", e);
960
1064
  }
961
1065
  if (tier0.length > 0 || tier1.length > 0) {
962
1066
  const coreContext = await formatContextMessage([], store, session, "", tier0, tier1);
@@ -970,14 +1074,13 @@ async function graphTransformInner(
970
1074
  let tier0: CoreMemoryEntry[] = [];
971
1075
  let tier1: CoreMemoryEntry[] = [];
972
1076
  try {
973
- [tier0, tier1] = await Promise.all([
974
- store.getAllCoreMemory(0),
975
- store.getAllCoreMemory(1),
976
- ]);
977
- tier0 = applyCoreBudget(tier0, getTier0BudgetChars(budgets));
978
- tier1 = applyCoreBudget(tier1, getTier1BudgetChars(budgets));
1077
+ // Tier 0 already fetched by wrapper (avoids double DB query)
1078
+ tier0 = tier0FromWrapper.length > 0
1079
+ ? tier0FromWrapper
1080
+ : applyCoreBudget(await store.getAllCoreMemory(0), getTier0BudgetChars(budgets));
1081
+ tier1 = applyCoreBudget(await store.getAllCoreMemory(1), getTier1BudgetChars(budgets));
979
1082
  } catch (e) {
980
- console.warn("[warn] Core memory load failed:", e);
1083
+ swallow.warn("graph-context:coreMemoryLoad", e);
981
1084
  }
982
1085
 
983
1086
  // Graceful degradation
@@ -985,7 +1088,7 @@ async function graphTransformInner(
985
1088
  const surrealUp = store.isAvailable();
986
1089
 
987
1090
  if (!embeddingsUp || !surrealUp) {
988
- const recentTurns = getRecentTurns(messages, budgets.conversation, contextWindow, session);
1091
+ const recentTurns = getRecentTurns(messages, budgets.conversation, budgets.toolHistory, contextWindow, session);
989
1092
  if (tier0.length > 0 || tier1.length > 0) {
990
1093
  const coreContext = await formatContextMessage([], store, session, "", tier0, tier1);
991
1094
  const result = [coreContext, ...recentTurns];
@@ -1001,7 +1104,7 @@ async function graphTransformInner(
1001
1104
 
1002
1105
  const currentIntent = config?.intent ?? "unknown";
1003
1106
  const baseLimits = config?.vectorSearchLimits ?? {
1004
- turn: 25, identity: 10, concept: 20, memory: 20, artifact: 10,
1107
+ turn: 25, identity: 10, concept: 35, memory: 20, artifact: 10,
1005
1108
  };
1006
1109
  // Scale search limits with context window — larger windows can use more results
1007
1110
  const cwScale = Math.max(0.5, Math.min(2.0, contextWindow / 200_000));
@@ -1016,7 +1119,7 @@ async function graphTransformInner(
1016
1119
  let tokenBudget = Math.min(config?.tokenBudget ?? 6000, budgets.retrieval);
1017
1120
 
1018
1121
  try {
1019
- const queryVec = await buildContextualQueryVec(queryText, messages, embeddings);
1122
+ const queryVec = await buildContextualQueryVec(queryText, messages, embeddings, session);
1020
1123
  session.lastQueryVec = queryVec; // Stash for redundant recall detection
1021
1124
 
1022
1125
  // Prefetch cache check
@@ -1042,15 +1145,22 @@ async function graphTransformInner(
1042
1145
  const reflCtx = cached.reflections.length > 0 ? formatReflectionContext(cached.reflections) : "";
1043
1146
 
1044
1147
  const injectedContext = await formatContextMessage(contextNodes, store, session, skillCtx + reflCtx, tier0, tier1);
1045
- const recentTurns = getRecentTurns(messages, budgets.conversation, contextWindow, session);
1148
+ const recentTurns = getRecentTurns(messages, budgets.conversation, budgets.toolHistory, contextWindow, session);
1046
1149
  const result = [injectedContext, ...recentTurns];
1047
1150
  return { messages: injectRulesSuffix(result, session), stats: makeStats(result, contextNodes.length, 0, recentTurns.length, "graph", true) };
1048
1151
  }
1049
1152
  }
1050
1153
 
1051
- // Vector search (cache miss path)
1154
+ // Vector search + tag-boosted retrieval (cache miss path, run in parallel)
1052
1155
  recordPrefetchMiss();
1053
- const results = await store.vectorSearch(queryVec, session.sessionId, vectorSearchLimits, isACANActive());
1156
+ const [vectorResults, tagResults] = await Promise.all([
1157
+ store.vectorSearch(queryVec, session.sessionId, vectorSearchLimits, isACANActive()),
1158
+ store.tagBoostedConcepts(queryText, queryVec, 10).catch(e => { swallow.warn("graph-context:tagBoost", e); return [] as VectorSearchResult[]; }),
1159
+ ]);
1160
+ // Merge: dedupe tag results against vector results, then combine
1161
+ const vectorIds = new Set(vectorResults.map(r => r.id));
1162
+ const uniqueTagResults = tagResults.filter(r => !vectorIds.has(r.id));
1163
+ const results = [...vectorResults, ...uniqueTagResults];
1054
1164
 
1055
1165
  // Graph neighbor expansion
1056
1166
  const topIds = results
@@ -1061,28 +1171,21 @@ async function graphTransformInner(
1061
1171
  const DEEP_INTENTS = new Set(["code-debug", "deep-explore", "multi-step", "reference-prior"]);
1062
1172
  const graphHops = DEEP_INTENTS.has(currentIntent) ? 2 : 1;
1063
1173
 
1174
+ // Graph expand + causal traversal run in parallel (both depend only on topIds)
1064
1175
  let neighborIds = new Set<string>();
1065
1176
  let neighborResults: VectorSearchResult[] = [];
1066
- if (topIds.length > 0) {
1067
- try {
1068
- neighborResults = await store.graphExpand(topIds, queryVec, graphHops);
1069
- neighborIds = new Set(neighborResults.map((n) => n.id));
1070
- const existingIds = new Set(results.map((r) => r.id));
1071
- neighborResults = neighborResults.filter((n) => !existingIds.has(n.id));
1072
- } catch (e) {
1073
- swallow.error("graph-context:graphExpand", e);
1074
- }
1075
- }
1076
-
1077
- // Causal chain traversal
1078
1177
  let causalResults: VectorSearchResult[] = [];
1079
- if (topIds.length > 0 && queryVec) {
1080
- try {
1081
- const causal = await queryCausalContext(topIds, queryVec, 2, 0.4, store);
1082
- const existingIds = new Set([...results.map((r) => r.id), ...neighborResults.map((r) => r.id)]);
1083
- causalResults = causal.filter((c) => !existingIds.has(c.id));
1084
- for (const c of causalResults) { neighborIds.add(c.id); }
1085
- } catch (e) { swallow("graph-context:causal", e); }
1178
+ if (topIds.length > 0) {
1179
+ const existingIds = new Set(results.map((r) => r.id));
1180
+ const [expandResult, causalResult] = await Promise.all([
1181
+ store.graphExpand(topIds, queryVec, graphHops).catch(e => { swallow.error("graph-context:graphExpand", e); return [] as VectorSearchResult[]; }),
1182
+ queryVec ? queryCausalContext(topIds, queryVec, 2, 0.4, store).catch(e => { swallow("graph-context:causal", e); return [] as VectorSearchResult[]; }) : Promise.resolve([] as VectorSearchResult[]),
1183
+ ]);
1184
+ neighborResults = expandResult.filter((n) => !existingIds.has(n.id));
1185
+ neighborIds = new Set(neighborResults.map((n) => n.id));
1186
+ const allExisting = new Set([...existingIds, ...neighborResults.map((r) => r.id)]);
1187
+ causalResults = causalResult.filter((c) => !allExisting.has(c.id));
1188
+ for (const c of causalResults) { neighborIds.add(c.id); }
1086
1189
  }
1087
1190
 
1088
1191
  // Combine, filter, score
@@ -1099,7 +1202,7 @@ async function graphTransformInner(
1099
1202
  contextNodes = await ensureRecentTurns(contextNodes, session.sessionId, store);
1100
1203
 
1101
1204
  if (contextNodes.length === 0) {
1102
- const result = getRecentTurns(messages, budgets.conversation, contextWindow, session);
1205
+ const result = getRecentTurns(messages, budgets.conversation, budgets.toolHistory, contextWindow, session);
1103
1206
  return { messages: injectRulesSuffix(result, session), stats: makeStats(result, 0, 0, result.length, "graph") };
1104
1207
  }
1105
1208
 
@@ -1131,7 +1234,7 @@ async function graphTransformInner(
1131
1234
  } catch (e) { swallow("graph-context:reflections", e); }
1132
1235
 
1133
1236
  const injectedContext = await formatContextMessage(contextNodes, store, session, skillContext + reflectionContext, tier0, tier1);
1134
- const recentTurns = getRecentTurns(messages, budgets.conversation, contextWindow, session);
1237
+ const recentTurns = getRecentTurns(messages, budgets.conversation, budgets.toolHistory, contextWindow, session);
1135
1238
  const result = [injectedContext, ...recentTurns];
1136
1239
  return {
1137
1240
  messages: injectRulesSuffix(result, session),
@@ -1143,8 +1246,8 @@ async function graphTransformInner(
1143
1246
  ),
1144
1247
  };
1145
1248
  } catch (err) {
1146
- console.error("Graph context error, falling back:", err);
1147
- const result = getRecentTurns(messages, budgets.conversation, contextWindow, session);
1249
+ log.error("Graph context error, falling back:", err);
1250
+ const result = getRecentTurns(messages, budgets.conversation, budgets.toolHistory, contextWindow, session);
1148
1251
  return { messages: injectRulesSuffix(result, session), stats: makeStats(result, 0, 0, result.length, "recency-only") };
1149
1252
  }
1150
1253
  }