kongbrain 0.3.16 → 0.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/SKILL.md +1 -1
- package/package.json +1 -1
- package/src/acan.ts +4 -1
- package/src/cognitive-check.ts +2 -2
- package/src/concept-extract.ts +1 -1
- package/src/context-engine.ts +128 -4
- package/src/daemon-manager.ts +17 -11
- package/src/deferred-cleanup.ts +3 -7
- package/src/graph-context.ts +220 -69
- package/src/handoff-file.ts +12 -5
- package/src/hooks/after-tool-call.ts +3 -3
- package/src/hooks/before-tool-call.ts +48 -1
- package/src/hooks/llm-output.ts +28 -8
- package/src/hooks/subagent-lifecycle.ts +142 -0
- package/src/index.ts +11 -2
- package/src/orchestrator.ts +1 -1
- package/src/reflection.ts +1 -0
- package/src/soul.ts +1 -1
- package/src/state.ts +18 -0
- package/src/surreal.ts +4 -0
- package/src/tools/core-memory.ts +9 -1
- package/src/tools/recall.ts +3 -3
package/src/graph-context.ts
CHANGED
|
@@ -62,6 +62,7 @@ const CONVERSATION_SHARE = 0.50;
|
|
|
62
62
|
const RETRIEVAL_SHARE = 0.30;
|
|
63
63
|
const CORE_MEMORY_SHARE = 0.15;
|
|
64
64
|
const CORE_MEMORY_TTL = 300_000;
|
|
65
|
+
const MAX_ITEM_CHARS = 1200; // ~350 tokens per item cap (claw-code: MAX_INSTRUCTION_FILE_CHARS)
|
|
65
66
|
const MIN_RELEVANCE_SCORE = 0.35;
|
|
66
67
|
const MIN_COSINE = 0.25;
|
|
67
68
|
|
|
@@ -149,25 +150,37 @@ function extractLastUserText(messages: AgentMessage[]): string | null {
|
|
|
149
150
|
return null;
|
|
150
151
|
}
|
|
151
152
|
|
|
153
|
+
/** Estimate char count for a single content block (claw-code: per-block-type estimation). */
|
|
154
|
+
function blockCharLen(c: any): number {
|
|
155
|
+
if (c.type === "text") return c.text.length;
|
|
156
|
+
if (c.type === "thinking") return c.thinking.length;
|
|
157
|
+
if (c.type === "toolCall") {
|
|
158
|
+
// Count tool name + serialized args (claw-code: compact.rs:326-338)
|
|
159
|
+
return (c.name?.length ?? 0) + (c.args ? JSON.stringify(c.args).length : 0);
|
|
160
|
+
}
|
|
161
|
+
if (c.type === "toolResult" && Array.isArray(c.content)) {
|
|
162
|
+
let len = 0;
|
|
163
|
+
for (const rc of c.content) {
|
|
164
|
+
if (rc.type === "text") len += rc.text.length;
|
|
165
|
+
else len += 100;
|
|
166
|
+
}
|
|
167
|
+
return len;
|
|
168
|
+
}
|
|
169
|
+
return 100; // image, etc.
|
|
170
|
+
}
|
|
171
|
+
|
|
152
172
|
function estimateTokens(messages: AgentMessage[]): number {
|
|
153
173
|
let chars = 0;
|
|
154
174
|
for (const msg of messages) {
|
|
155
|
-
for (const c of msgContentBlocks(msg))
|
|
156
|
-
|
|
157
|
-
else if (c.type === "thinking") chars += c.thinking.length;
|
|
158
|
-
else chars += 100;
|
|
159
|
-
}
|
|
175
|
+
for (const c of msgContentBlocks(msg)) chars += blockCharLen(c);
|
|
176
|
+
chars += 4; // per-message structural overhead
|
|
160
177
|
}
|
|
161
178
|
return Math.ceil(chars / CHARS_PER_TOKEN);
|
|
162
179
|
}
|
|
163
180
|
|
|
164
181
|
function msgCharLen(msg: AgentMessage): number {
|
|
165
182
|
let len = 0;
|
|
166
|
-
for (const c of msgContentBlocks(msg))
|
|
167
|
-
if (c.type === "text") len += c.text.length;
|
|
168
|
-
else if (c.type === "thinking") len += c.thinking.length;
|
|
169
|
-
else len += 100;
|
|
170
|
-
}
|
|
183
|
+
for (const c of msgContentBlocks(msg)) len += blockCharLen(c);
|
|
171
184
|
return len;
|
|
172
185
|
}
|
|
173
186
|
|
|
@@ -199,7 +212,7 @@ function accessBoost(accessCount: number | undefined): number {
|
|
|
199
212
|
return Math.log1p(accessCount ?? 0);
|
|
200
213
|
}
|
|
201
214
|
|
|
202
|
-
function cosineSimilarity(a: number[], b: number[]): number {
|
|
215
|
+
export function cosineSimilarity(a: number[], b: number[]): number {
|
|
203
216
|
let dot = 0, magA = 0, magB = 0;
|
|
204
217
|
for (let i = 0; i < a.length; i++) {
|
|
205
218
|
dot += a[i] * b[i];
|
|
@@ -217,6 +230,19 @@ function buildRulesSuffix(session: SessionState): string {
|
|
|
217
230
|
? "unlimited" : String(Math.max(0, session.toolLimit - session.toolCallCount));
|
|
218
231
|
const urgency = session.toolLimit !== Infinity && (session.toolLimit - session.toolCallCount) <= 3
|
|
219
232
|
? "\n⚠ WRAP UP or check in with user." : "";
|
|
233
|
+
|
|
234
|
+
// After first exposure, send only the budget line (claw-code: don't re-send static content)
|
|
235
|
+
if (session.injectedSections.has("rules_full")) {
|
|
236
|
+
return (
|
|
237
|
+
"\n<rules_reminder>" +
|
|
238
|
+
`\nBudget: ${session.toolCallCount} used, ${remaining} remaining.${urgency}` +
|
|
239
|
+
"\nCombine steps. If context already answers it, zero calls." +
|
|
240
|
+
"\n</rules_reminder>"
|
|
241
|
+
);
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
// First time — full examples
|
|
245
|
+
session.injectedSections.add("rules_full");
|
|
220
246
|
return (
|
|
221
247
|
"\n<rules_reminder>" +
|
|
222
248
|
`\nBudget: ${session.toolCallCount} used, ${remaining} remaining.${urgency}` +
|
|
@@ -430,7 +456,7 @@ function takeWithConstraints(ranked: ScoredResult[], budgetTokens: number, maxIt
|
|
|
430
456
|
for (const r of ranked) {
|
|
431
457
|
if (selected.length >= maxItems) break;
|
|
432
458
|
if ((r.finalScore ?? 0) < MIN_RELEVANCE_SCORE && selected.length > 0) break;
|
|
433
|
-
const len = r.text?.length ?? 0;
|
|
459
|
+
const len = Math.min(r.text?.length ?? 0, MAX_ITEM_CHARS); // Cap per-item size for budget accounting
|
|
434
460
|
if (used + len > budgetChars && selected.length > 0) break;
|
|
435
461
|
selected.push(r);
|
|
436
462
|
used += len;
|
|
@@ -447,13 +473,19 @@ function getTier1BudgetChars(budgets: Budgets): number {
|
|
|
447
473
|
return Math.round(budgets.core * 0.45 * CHARS_PER_TOKEN);
|
|
448
474
|
}
|
|
449
475
|
|
|
476
|
+
const MAX_CORE_MEMORY_CHARS = 800; // Per-item cap (claw-code: MAX_INSTRUCTION_FILE_CHARS)
|
|
477
|
+
|
|
450
478
|
function applyCoreBudget(entries: CoreMemoryEntry[], budgetChars: number): CoreMemoryEntry[] {
|
|
451
479
|
let used = 0;
|
|
452
480
|
const result: CoreMemoryEntry[] = [];
|
|
453
481
|
for (const e of entries) {
|
|
454
|
-
|
|
482
|
+
// Cap individual entries so one large directive doesn't starve others
|
|
483
|
+
const text = e.text.length > MAX_CORE_MEMORY_CHARS
|
|
484
|
+
? e.text.slice(0, MAX_CORE_MEMORY_CHARS) + "..."
|
|
485
|
+
: e.text;
|
|
486
|
+
const len = text.length + 6;
|
|
455
487
|
if (used + len > budgetChars) continue;
|
|
456
|
-
result.push(e);
|
|
488
|
+
result.push(text !== e.text ? { ...e, text } : e);
|
|
457
489
|
used += len;
|
|
458
490
|
}
|
|
459
491
|
return result;
|
|
@@ -473,6 +505,40 @@ function formatTierSection(entries: CoreMemoryEntry[], label: string): string {
|
|
|
473
505
|
return `${label}:\n${lines.join("\n")}`;
|
|
474
506
|
}
|
|
475
507
|
|
|
508
|
+
/**
|
|
509
|
+
* Build static system prompt section for API prefix caching.
|
|
510
|
+
* Content here goes into systemPromptAddition where it benefits from
|
|
511
|
+
* cache-read rates (10% cost) on subsequent API calls in the agentic loop.
|
|
512
|
+
* (claw-code pattern: __SYSTEM_PROMPT_DYNAMIC_BOUNDARY__ — prompt.rs:37-140)
|
|
513
|
+
*/
|
|
514
|
+
function buildSystemPromptSection(session: SessionState, tier0Entries: CoreMemoryEntry[]): string | undefined {
|
|
515
|
+
const parts: string[] = [];
|
|
516
|
+
|
|
517
|
+
// IKONG architecture description (static, ~120 tokens)
|
|
518
|
+
const pillarLines: string[] = [];
|
|
519
|
+
if (session.agentId) pillarLines.push(`Agent: ${session.agentId}`);
|
|
520
|
+
if (session.projectId) pillarLines.push(`Project: ${session.projectId}`);
|
|
521
|
+
if (session.taskId) pillarLines.push(`Task: ${session.taskId}`);
|
|
522
|
+
if (pillarLines.length > 0) {
|
|
523
|
+
parts.push(
|
|
524
|
+
"GRAPH PILLARS (your structural context):\n" +
|
|
525
|
+
` ${pillarLines.join(" | ")}\n` +
|
|
526
|
+
" IKONG cognitive architecture:\n" +
|
|
527
|
+
" I(ntelligence): intent classification → adaptive orchestration per turn\n" +
|
|
528
|
+
" K(nowledge): memory graph, concepts, skills, reflections, identity chunks\n" +
|
|
529
|
+
" O(peration): tool execution, skill procedures, causal chain tracking\n" +
|
|
530
|
+
" N(etwork): graph traversal, cross-pillar edges, neighbor expansion\n" +
|
|
531
|
+
" G(raph): SurrealDB persistence, vector search, BGE-M3 embeddings",
|
|
532
|
+
);
|
|
533
|
+
}
|
|
534
|
+
|
|
535
|
+
// Tier 0 core directives (semi-static, changes rarely)
|
|
536
|
+
const t0Section = formatTierSection(tier0Entries, "CORE DIRECTIVES (always loaded, never evicted)");
|
|
537
|
+
if (t0Section) parts.push(t0Section);
|
|
538
|
+
|
|
539
|
+
return parts.length > 0 ? parts.join("\n\n") : undefined;
|
|
540
|
+
}
|
|
541
|
+
|
|
476
542
|
// ── Guaranteed recent turns from previous sessions ─────────────────────────────
|
|
477
543
|
|
|
478
544
|
async function ensureRecentTurns(
|
|
@@ -532,27 +598,42 @@ async function formatContextMessage(
|
|
|
532
598
|
const sections: string[] = [];
|
|
533
599
|
|
|
534
600
|
// Pillar context — structural awareness of who/what/where
|
|
535
|
-
|
|
536
|
-
if (session.
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
)
|
|
601
|
+
// Skip if model already has it in the conversation window (claw-code static section dedup)
|
|
602
|
+
if (!session.injectedSections.has("ikong")) {
|
|
603
|
+
const pillarLines: string[] = [];
|
|
604
|
+
if (session.agentId) pillarLines.push(`Agent: ${session.agentId}`);
|
|
605
|
+
if (session.projectId) pillarLines.push(`Project: ${session.projectId}`);
|
|
606
|
+
if (session.taskId) pillarLines.push(`Task: ${session.taskId}`);
|
|
607
|
+
if (pillarLines.length > 0) {
|
|
608
|
+
sections.push(
|
|
609
|
+
"GRAPH PILLARS (your structural context):\n" +
|
|
610
|
+
` ${pillarLines.join(" | ")}\n` +
|
|
611
|
+
" IKONG cognitive architecture:\n" +
|
|
612
|
+
" I(ntelligence): intent classification → adaptive orchestration per turn\n" +
|
|
613
|
+
" K(nowledge): memory graph, concepts, skills, reflections, identity chunks\n" +
|
|
614
|
+
" O(peration): tool execution, skill procedures, causal chain tracking\n" +
|
|
615
|
+
" N(etwork): graph traversal, cross-pillar edges, neighbor expansion\n" +
|
|
616
|
+
" G(raph): SurrealDB persistence, vector search, BGE-M3 embeddings",
|
|
617
|
+
);
|
|
618
|
+
session.injectedSections.add("ikong");
|
|
619
|
+
}
|
|
550
620
|
}
|
|
551
621
|
|
|
552
|
-
|
|
553
|
-
if (
|
|
554
|
-
|
|
555
|
-
|
|
622
|
+
// Core directives — skip if model already has them
|
|
623
|
+
if (!session.injectedSections.has("tier0")) {
|
|
624
|
+
const t0Section = formatTierSection(tier0Entries, "CORE DIRECTIVES (always loaded, never evicted)");
|
|
625
|
+
if (t0Section) {
|
|
626
|
+
sections.push(t0Section);
|
|
627
|
+
session.injectedSections.add("tier0");
|
|
628
|
+
}
|
|
629
|
+
}
|
|
630
|
+
if (!session.injectedSections.has("tier1")) {
|
|
631
|
+
const t1Section = formatTierSection(tier1Entries, "SESSION CONTEXT (pinned for this session)");
|
|
632
|
+
if (t1Section) {
|
|
633
|
+
sections.push(t1Section);
|
|
634
|
+
session.injectedSections.add("tier1");
|
|
635
|
+
}
|
|
636
|
+
}
|
|
556
637
|
|
|
557
638
|
// Cognitive directives
|
|
558
639
|
const directives = getPendingDirectives(session);
|
|
@@ -607,6 +688,10 @@ async function formatContextMessage(
|
|
|
607
688
|
const score = n.finalScore != null ? ` (relevance: ${(n.finalScore * 100).toFixed(0)}%)` : "";
|
|
608
689
|
const via = n.fromNeighbor ? " [via graph link]" : "";
|
|
609
690
|
let text = n.text ?? "";
|
|
691
|
+
// Truncate oversized items (claw-code: MAX_INSTRUCTION_FILE_CHARS pattern)
|
|
692
|
+
if (text.length > MAX_ITEM_CHARS) {
|
|
693
|
+
text = text.slice(0, MAX_ITEM_CHARS) + "... [truncated]";
|
|
694
|
+
}
|
|
610
695
|
if (key === "past_turns") {
|
|
611
696
|
text = text.replace(/^\[(user|assistant)\] /, "[past_$1] ");
|
|
612
697
|
}
|
|
@@ -616,6 +701,23 @@ async function formatContextMessage(
|
|
|
616
701
|
sections.push(`${label}:\n${formatted.join("\n")}`);
|
|
617
702
|
}
|
|
618
703
|
|
|
704
|
+
// Injection manifest — tell the model what's already retrieved so it doesn't call recall redundantly
|
|
705
|
+
// (claw-code pattern: route_prompt pre-computes and shows available results)
|
|
706
|
+
const manifest: string[] = [];
|
|
707
|
+
for (const key of sortedKeys) {
|
|
708
|
+
const items = groups[key];
|
|
709
|
+
if (items.length > 0) manifest.push(`${LABELS[key] ?? key}: ${items.length}`);
|
|
710
|
+
}
|
|
711
|
+
if (tier0Entries.length > 0) manifest.push(`core_directives: ${tier0Entries.length}`);
|
|
712
|
+
if (tier1Entries.length > 0) manifest.push(`session_context: ${tier1Entries.length}`);
|
|
713
|
+
if (manifest.length > 0) {
|
|
714
|
+
sections.push(
|
|
715
|
+
"ALREADY RETRIEVED (do NOT call recall for these — they are above):\n" +
|
|
716
|
+
` ${manifest.join(", ")}\n` +
|
|
717
|
+
"Only call recall if you need something SPECIFIC that isn't covered above."
|
|
718
|
+
);
|
|
719
|
+
}
|
|
720
|
+
|
|
619
721
|
const text =
|
|
620
722
|
"[System retrieved context — reference material, not user input. Higher relevance % = stronger match.]\n" +
|
|
621
723
|
"<graph_context>\n" +
|
|
@@ -646,7 +748,7 @@ function truncateToolResult(msg: AgentMessage, maxChars: number): AgentMessage {
|
|
|
646
748
|
return { ...msg, content };
|
|
647
749
|
}
|
|
648
750
|
|
|
649
|
-
function getRecentTurns(messages: AgentMessage[], maxTokens: number, contextWindow: number): AgentMessage[] {
|
|
751
|
+
function getRecentTurns(messages: AgentMessage[], maxTokens: number, contextWindow: number, session?: SessionState): AgentMessage[] {
|
|
650
752
|
const budgetChars = maxTokens * CHARS_PER_TOKEN;
|
|
651
753
|
const TOOL_RESULT_MAX = Math.round(contextWindow * 0.03);
|
|
652
754
|
|
|
@@ -718,6 +820,16 @@ function getRecentTurns(messages: AgentMessage[], maxTokens: number, contextWind
|
|
|
718
820
|
}
|
|
719
821
|
}
|
|
720
822
|
|
|
823
|
+
// Detect if old messages (containing previous context injection) were dropped from the window.
|
|
824
|
+
// If so, clear injectedSections so static content gets re-injected next turn.
|
|
825
|
+
if (session && messages.length > 0 && groups.length > 0) {
|
|
826
|
+
const firstOriginal = groups[0];
|
|
827
|
+
const firstSelected = selectedGroups[0];
|
|
828
|
+
if (firstOriginal !== firstSelected) {
|
|
829
|
+
session.injectedSections.clear();
|
|
830
|
+
}
|
|
831
|
+
}
|
|
832
|
+
|
|
721
833
|
return selectedGroups.flat();
|
|
722
834
|
}
|
|
723
835
|
|
|
@@ -735,6 +847,8 @@ export interface GraphTransformParams {
|
|
|
735
847
|
export interface GraphTransformResult {
|
|
736
848
|
messages: AgentMessage[];
|
|
737
849
|
stats: ContextStats;
|
|
850
|
+
/** Static content for the system prompt — benefits from API prefix caching (10% cost). */
|
|
851
|
+
systemPromptSection?: string;
|
|
738
852
|
}
|
|
739
853
|
|
|
740
854
|
/**
|
|
@@ -748,6 +862,17 @@ export async function graphTransformContext(
|
|
|
748
862
|
const contextWindow = params.contextWindow ?? 200000;
|
|
749
863
|
const budgets = calcBudgets(contextWindow);
|
|
750
864
|
|
|
865
|
+
// Build static system prompt section for API prefix caching.
|
|
866
|
+
// Done here (wrapper) so it attaches to any inner return path.
|
|
867
|
+
// (claw-code pattern: static sections above __SYSTEM_PROMPT_DYNAMIC_BOUNDARY__)
|
|
868
|
+
let systemPromptSection: string | undefined;
|
|
869
|
+
try {
|
|
870
|
+
const tier0ForSys = store.isAvailable()
|
|
871
|
+
? applyCoreBudget(await store.getAllCoreMemory(0), getTier0BudgetChars(budgets))
|
|
872
|
+
: [];
|
|
873
|
+
systemPromptSection = buildSystemPromptSection(session, tier0ForSys);
|
|
874
|
+
} catch { /* non-critical — tier0 will still appear in user message */ }
|
|
875
|
+
|
|
751
876
|
// Never throw — return raw messages on any failure
|
|
752
877
|
try {
|
|
753
878
|
const TRANSFORM_TIMEOUT_MS = 10_000;
|
|
@@ -757,6 +882,7 @@ export async function graphTransformContext(
|
|
|
757
882
|
setTimeout(() => reject(new Error("graphTransformContext timed out")), TRANSFORM_TIMEOUT_MS),
|
|
758
883
|
),
|
|
759
884
|
]);
|
|
885
|
+
result.systemPromptSection = systemPromptSection;
|
|
760
886
|
return result;
|
|
761
887
|
} catch (err) {
|
|
762
888
|
console.error("graphTransformContext fatal error, returning raw messages:", err);
|
|
@@ -773,6 +899,7 @@ export async function graphTransformContext(
|
|
|
773
899
|
mode: "passthrough",
|
|
774
900
|
prefetchHit: false,
|
|
775
901
|
},
|
|
902
|
+
systemPromptSection,
|
|
776
903
|
};
|
|
777
904
|
}
|
|
778
905
|
}
|
|
@@ -786,20 +913,6 @@ async function graphTransformInner(
|
|
|
786
913
|
budgets: Budgets,
|
|
787
914
|
_signal?: AbortSignal,
|
|
788
915
|
): Promise<GraphTransformResult> {
|
|
789
|
-
// Load tiered core memory
|
|
790
|
-
let tier0: CoreMemoryEntry[] = [];
|
|
791
|
-
let tier1: CoreMemoryEntry[] = [];
|
|
792
|
-
try {
|
|
793
|
-
[tier0, tier1] = await Promise.all([
|
|
794
|
-
store.getAllCoreMemory(0),
|
|
795
|
-
store.getAllCoreMemory(1),
|
|
796
|
-
]);
|
|
797
|
-
tier0 = applyCoreBudget(tier0, getTier0BudgetChars(budgets));
|
|
798
|
-
tier1 = applyCoreBudget(tier1, getTier1BudgetChars(budgets));
|
|
799
|
-
} catch (e) {
|
|
800
|
-
console.warn("[warn] Core memory load failed:", e);
|
|
801
|
-
}
|
|
802
|
-
|
|
803
916
|
function makeStats(
|
|
804
917
|
sent: AgentMessage[], graphNodes: number, neighborNodes: number,
|
|
805
918
|
recentTurnCount: number, mode: ContextStats["mode"], prefetchHit = false,
|
|
@@ -814,12 +927,65 @@ async function graphTransformInner(
|
|
|
814
927
|
};
|
|
815
928
|
}
|
|
816
929
|
|
|
930
|
+
function makeResult(
|
|
931
|
+
msgs: AgentMessage[], stats: ContextStats, sysSection?: string,
|
|
932
|
+
): GraphTransformResult {
|
|
933
|
+
return { messages: msgs, stats, systemPromptSection: sysSection };
|
|
934
|
+
}
|
|
935
|
+
|
|
936
|
+
// Derive retrieval config from session's current adaptive config
|
|
937
|
+
const config = session.currentConfig;
|
|
938
|
+
const skipRetrieval = config?.skipRetrieval ?? false;
|
|
939
|
+
|
|
940
|
+
// Skip retrieval fast path — avoid DB queries entirely when model already has core memory
|
|
941
|
+
// (claw-code pattern: simple_mode skips the load, not load-then-discard)
|
|
942
|
+
if (skipRetrieval) {
|
|
943
|
+
const recentTurns = getRecentTurns(messages, budgets.conversation, contextWindow, session);
|
|
944
|
+
// If model already saw core memory, just return recent turns + compressed rules. Zero DB queries.
|
|
945
|
+
if (session.injectedSections.has("tier0")) {
|
|
946
|
+
return { messages: injectRulesSuffix(recentTurns, session), stats: makeStats(recentTurns, 0, 0, recentTurns.length, "passthrough") };
|
|
947
|
+
}
|
|
948
|
+
// First turn or after compaction cleared injectedSections — load and inject
|
|
949
|
+
let tier0: CoreMemoryEntry[] = [];
|
|
950
|
+
let tier1: CoreMemoryEntry[] = [];
|
|
951
|
+
try {
|
|
952
|
+
[tier0, tier1] = await Promise.all([
|
|
953
|
+
store.getAllCoreMemory(0),
|
|
954
|
+
store.getAllCoreMemory(1),
|
|
955
|
+
]);
|
|
956
|
+
tier0 = applyCoreBudget(tier0, getTier0BudgetChars(budgets));
|
|
957
|
+
tier1 = applyCoreBudget(tier1, getTier1BudgetChars(budgets));
|
|
958
|
+
} catch (e) {
|
|
959
|
+
console.warn("[warn] Core memory load failed:", e);
|
|
960
|
+
}
|
|
961
|
+
if (tier0.length > 0 || tier1.length > 0) {
|
|
962
|
+
const coreContext = await formatContextMessage([], store, session, "", tier0, tier1);
|
|
963
|
+
const result = [coreContext, ...recentTurns];
|
|
964
|
+
return { messages: injectRulesSuffix(result, session), stats: makeStats(result, 0, 0, recentTurns.length, "passthrough") };
|
|
965
|
+
}
|
|
966
|
+
return { messages: injectRulesSuffix(recentTurns, session), stats: makeStats(recentTurns, 0, 0, recentTurns.length, "passthrough") };
|
|
967
|
+
}
|
|
968
|
+
|
|
969
|
+
// Load tiered core memory (full retrieval path)
|
|
970
|
+
let tier0: CoreMemoryEntry[] = [];
|
|
971
|
+
let tier1: CoreMemoryEntry[] = [];
|
|
972
|
+
try {
|
|
973
|
+
[tier0, tier1] = await Promise.all([
|
|
974
|
+
store.getAllCoreMemory(0),
|
|
975
|
+
store.getAllCoreMemory(1),
|
|
976
|
+
]);
|
|
977
|
+
tier0 = applyCoreBudget(tier0, getTier0BudgetChars(budgets));
|
|
978
|
+
tier1 = applyCoreBudget(tier1, getTier1BudgetChars(budgets));
|
|
979
|
+
} catch (e) {
|
|
980
|
+
console.warn("[warn] Core memory load failed:", e);
|
|
981
|
+
}
|
|
982
|
+
|
|
817
983
|
// Graceful degradation
|
|
818
984
|
const embeddingsUp = embeddings.isAvailable();
|
|
819
985
|
const surrealUp = store.isAvailable();
|
|
820
986
|
|
|
821
987
|
if (!embeddingsUp || !surrealUp) {
|
|
822
|
-
const recentTurns = getRecentTurns(messages, budgets.conversation, contextWindow);
|
|
988
|
+
const recentTurns = getRecentTurns(messages, budgets.conversation, contextWindow, session);
|
|
823
989
|
if (tier0.length > 0 || tier1.length > 0) {
|
|
824
990
|
const coreContext = await formatContextMessage([], store, session, "", tier0, tier1);
|
|
825
991
|
const result = [coreContext, ...recentTurns];
|
|
@@ -833,9 +999,6 @@ async function graphTransformInner(
|
|
|
833
999
|
return { messages: injectRulesSuffix(messages, session), stats: makeStats(messages, 0, 0, messages.length, "passthrough") };
|
|
834
1000
|
}
|
|
835
1001
|
|
|
836
|
-
// Derive retrieval config from session's current adaptive config
|
|
837
|
-
const config = session.currentConfig;
|
|
838
|
-
const skipRetrieval = config?.skipRetrieval ?? false;
|
|
839
1002
|
const currentIntent = config?.intent ?? "unknown";
|
|
840
1003
|
const baseLimits = config?.vectorSearchLimits ?? {
|
|
841
1004
|
turn: 25, identity: 10, concept: 20, memory: 20, artifact: 10,
|
|
@@ -852,21 +1015,9 @@ async function graphTransformInner(
|
|
|
852
1015
|
};
|
|
853
1016
|
let tokenBudget = Math.min(config?.tokenBudget ?? 6000, budgets.retrieval);
|
|
854
1017
|
|
|
855
|
-
// Pressure-based adaptive scaling
|
|
856
|
-
// (In Phase 2, _usedTokens will be tracked per-session via hooks)
|
|
857
|
-
|
|
858
|
-
if (skipRetrieval) {
|
|
859
|
-
const recentTurns = getRecentTurns(messages, budgets.conversation, contextWindow);
|
|
860
|
-
if (tier0.length > 0 || tier1.length > 0) {
|
|
861
|
-
const coreContext = await formatContextMessage([], store, session, "", tier0, tier1);
|
|
862
|
-
const result = [coreContext, ...recentTurns];
|
|
863
|
-
return { messages: injectRulesSuffix(result, session), stats: makeStats(result, 0, 0, recentTurns.length, "passthrough") };
|
|
864
|
-
}
|
|
865
|
-
return { messages: injectRulesSuffix(recentTurns, session), stats: makeStats(recentTurns, 0, 0, recentTurns.length, "passthrough") };
|
|
866
|
-
}
|
|
867
|
-
|
|
868
1018
|
try {
|
|
869
1019
|
const queryVec = await buildContextualQueryVec(queryText, messages, embeddings);
|
|
1020
|
+
session.lastQueryVec = queryVec; // Stash for redundant recall detection
|
|
870
1021
|
|
|
871
1022
|
// Prefetch cache check
|
|
872
1023
|
const cached = getCachedContext(queryVec);
|
|
@@ -891,7 +1042,7 @@ async function graphTransformInner(
|
|
|
891
1042
|
const reflCtx = cached.reflections.length > 0 ? formatReflectionContext(cached.reflections) : "";
|
|
892
1043
|
|
|
893
1044
|
const injectedContext = await formatContextMessage(contextNodes, store, session, skillCtx + reflCtx, tier0, tier1);
|
|
894
|
-
const recentTurns = getRecentTurns(messages, budgets.conversation, contextWindow);
|
|
1045
|
+
const recentTurns = getRecentTurns(messages, budgets.conversation, contextWindow, session);
|
|
895
1046
|
const result = [injectedContext, ...recentTurns];
|
|
896
1047
|
return { messages: injectRulesSuffix(result, session), stats: makeStats(result, contextNodes.length, 0, recentTurns.length, "graph", true) };
|
|
897
1048
|
}
|
|
@@ -948,7 +1099,7 @@ async function graphTransformInner(
|
|
|
948
1099
|
contextNodes = await ensureRecentTurns(contextNodes, session.sessionId, store);
|
|
949
1100
|
|
|
950
1101
|
if (contextNodes.length === 0) {
|
|
951
|
-
const result = getRecentTurns(messages, budgets.conversation, contextWindow);
|
|
1102
|
+
const result = getRecentTurns(messages, budgets.conversation, contextWindow, session);
|
|
952
1103
|
return { messages: injectRulesSuffix(result, session), stats: makeStats(result, 0, 0, result.length, "graph") };
|
|
953
1104
|
}
|
|
954
1105
|
|
|
@@ -980,7 +1131,7 @@ async function graphTransformInner(
|
|
|
980
1131
|
} catch (e) { swallow("graph-context:reflections", e); }
|
|
981
1132
|
|
|
982
1133
|
const injectedContext = await formatContextMessage(contextNodes, store, session, skillContext + reflectionContext, tier0, tier1);
|
|
983
|
-
const recentTurns = getRecentTurns(messages, budgets.conversation, contextWindow);
|
|
1134
|
+
const recentTurns = getRecentTurns(messages, budgets.conversation, contextWindow, session);
|
|
984
1135
|
const result = [injectedContext, ...recentTurns];
|
|
985
1136
|
return {
|
|
986
1137
|
messages: injectRulesSuffix(result, session),
|
|
@@ -993,7 +1144,7 @@ async function graphTransformInner(
|
|
|
993
1144
|
};
|
|
994
1145
|
} catch (err) {
|
|
995
1146
|
console.error("Graph context error, falling back:", err);
|
|
996
|
-
const result = getRecentTurns(messages, budgets.conversation, contextWindow);
|
|
1147
|
+
const result = getRecentTurns(messages, budgets.conversation, contextWindow, session);
|
|
997
1148
|
return { messages: injectRulesSuffix(result, session), stats: makeStats(result, 0, 0, result.length, "recency-only") };
|
|
998
1149
|
}
|
|
999
1150
|
}
|
package/src/handoff-file.ts
CHANGED
|
@@ -6,7 +6,7 @@
|
|
|
6
6
|
* so the next session's wakeup has context even before deferred
|
|
7
7
|
* extraction runs.
|
|
8
8
|
*/
|
|
9
|
-
import { readFileSync, writeFileSync, unlinkSync, existsSync,
|
|
9
|
+
import { readFileSync, writeFileSync, unlinkSync, existsSync, renameSync } from "node:fs";
|
|
10
10
|
import { join } from "node:path";
|
|
11
11
|
|
|
12
12
|
const HANDOFF_FILENAME = ".kongbrain-handoff.json";
|
|
@@ -42,14 +42,21 @@ export function readAndDeleteHandoffFile(
|
|
|
42
42
|
workspaceDir: string,
|
|
43
43
|
): HandoffFileData | null {
|
|
44
44
|
const path = join(workspaceDir, HANDOFF_FILENAME);
|
|
45
|
+
const processingPath = path + ".processing";
|
|
46
|
+
// Also clean up stale .processing files from prior crashes
|
|
47
|
+
if (existsSync(processingPath) && !existsSync(path)) {
|
|
48
|
+
try { unlinkSync(processingPath); } catch { /* ignore */ }
|
|
49
|
+
}
|
|
45
50
|
if (!existsSync(path)) return null;
|
|
46
51
|
try {
|
|
47
|
-
|
|
48
|
-
|
|
52
|
+
// Atomic rename first so a crash between read and delete can't re-process
|
|
53
|
+
renameSync(path, processingPath);
|
|
54
|
+
const raw = readFileSync(processingPath, "utf-8");
|
|
55
|
+
unlinkSync(processingPath);
|
|
49
56
|
const parsed = JSON.parse(raw);
|
|
50
57
|
// Runtime validation — reject prototype pollution and malformed data
|
|
51
58
|
if (parsed == null || typeof parsed !== "object" || Array.isArray(parsed)) return null;
|
|
52
|
-
if ("__proto__"
|
|
59
|
+
if (Object.hasOwn(parsed, "__proto__") || Object.hasOwn(parsed, "constructor")) return null;
|
|
53
60
|
const data: HandoffFileData = {
|
|
54
61
|
sessionId: typeof parsed.sessionId === "string" ? parsed.sessionId.slice(0, 200) : "",
|
|
55
62
|
timestamp: typeof parsed.timestamp === "string" ? parsed.timestamp.slice(0, 50) : "",
|
|
@@ -61,7 +68,7 @@ export function readAndDeleteHandoffFile(
|
|
|
61
68
|
return data;
|
|
62
69
|
} catch {
|
|
63
70
|
// Corrupted or deleted between check and read
|
|
64
|
-
try { unlinkSync(
|
|
71
|
+
try { unlinkSync(processingPath); } catch { /* ignore */ }
|
|
65
72
|
return null;
|
|
66
73
|
}
|
|
67
74
|
}
|
|
@@ -54,7 +54,7 @@ export function createAfterToolCallHandler(state: GlobalPluginState) {
|
|
|
54
54
|
});
|
|
55
55
|
if (assistantTurnId) session.lastAssistantTurnId = assistantTurnId;
|
|
56
56
|
} catch (e) {
|
|
57
|
-
swallow("hook:afterToolCall:eagerAssistantTurn", e);
|
|
57
|
+
swallow.warn("hook:afterToolCall:eagerAssistantTurn", e);
|
|
58
58
|
}
|
|
59
59
|
}
|
|
60
60
|
if (session.lastAssistantTurnId) {
|
|
@@ -63,12 +63,12 @@ export function createAfterToolCallHandler(state: GlobalPluginState) {
|
|
|
63
63
|
}
|
|
64
64
|
}
|
|
65
65
|
} catch (e) {
|
|
66
|
-
swallow("hook:afterToolCall:store", e);
|
|
66
|
+
swallow.warn("hook:afterToolCall:store", e);
|
|
67
67
|
}
|
|
68
68
|
|
|
69
69
|
// Auto-track file artifacts from write/edit tools
|
|
70
70
|
if (!isError) {
|
|
71
|
-
trackArtifact(event.toolName, event.params, session.taskId, session.projectId, state)
|
|
71
|
+
await trackArtifact(event.toolName, event.params, session.taskId, session.projectId, state)
|
|
72
72
|
.catch(e => swallow.warn("hook:afterToolCall:artifact", e));
|
|
73
73
|
}
|
|
74
74
|
|
|
@@ -8,9 +8,12 @@
|
|
|
8
8
|
|
|
9
9
|
import type { GlobalPluginState } from "../state.js";
|
|
10
10
|
import { recordToolCall } from "../orchestrator.js";
|
|
11
|
+
import { cosineSimilarity } from "../graph-context.js";
|
|
11
12
|
|
|
12
13
|
const DEFAULT_TOOL_LIMIT = 10;
|
|
13
14
|
const CLASSIFICATION_LIMITS: Record<string, number> = { LOOKUP: 3, EDIT: 4, REFACTOR: 8 };
|
|
15
|
+
const API_CYCLE_CAP = 16;
|
|
16
|
+
const RECALL_SIMILARITY_THRESHOLD = 0.80;
|
|
14
17
|
|
|
15
18
|
export function createBeforeToolCallHandler(state: GlobalPluginState) {
|
|
16
19
|
return async (
|
|
@@ -30,6 +33,7 @@ export function createBeforeToolCallHandler(state: GlobalPluginState) {
|
|
|
30
33
|
|
|
31
34
|
session.toolCallCount++;
|
|
32
35
|
session.toolCallsSinceLastText++;
|
|
36
|
+
session.apiCycleCount++;
|
|
33
37
|
|
|
34
38
|
// Record for steering analysis
|
|
35
39
|
recordToolCall(session, event.toolName);
|
|
@@ -46,6 +50,14 @@ export function createBeforeToolCallHandler(state: GlobalPluginState) {
|
|
|
46
50
|
};
|
|
47
51
|
}
|
|
48
52
|
|
|
53
|
+
// API cycle cap (claw-code pattern: max_iterations — conversation.rs:119)
|
|
54
|
+
if (session.apiCycleCount > API_CYCLE_CAP) {
|
|
55
|
+
return {
|
|
56
|
+
block: true,
|
|
57
|
+
blockReason: `Hard API cycle cap (${API_CYCLE_CAP}) reached. Deliver your answer now.`,
|
|
58
|
+
};
|
|
59
|
+
}
|
|
60
|
+
|
|
49
61
|
// Tool limit
|
|
50
62
|
if (session.toolCallCount > session.toolLimit) {
|
|
51
63
|
return {
|
|
@@ -54,14 +66,49 @@ export function createBeforeToolCallHandler(state: GlobalPluginState) {
|
|
|
54
66
|
};
|
|
55
67
|
}
|
|
56
68
|
|
|
69
|
+
// Intent-based tool gating (claw-code pattern: simple_mode/MCP toggle — tools.py:62-72)
|
|
70
|
+
// On skipRetrieval turns, recall has nothing to add — context was skipped intentionally
|
|
71
|
+
if (event.toolName === "recall" && session.currentConfig?.skipRetrieval) {
|
|
72
|
+
return {
|
|
73
|
+
block: true,
|
|
74
|
+
blockReason: "Context retrieval was skipped this turn (continuation/trivial input). " +
|
|
75
|
+
"Recall would return the same results as previous turns. Continue with what you have.",
|
|
76
|
+
};
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
// Redundant recall blocker (claw-code pattern: _infer_permission_denials — runtime.py:169-174)
|
|
80
|
+
// Block recall when its query would return the same results as context retrieval
|
|
81
|
+
if (event.toolName === "recall" && session.lastQueryVec) {
|
|
82
|
+
const recallQuery = (event.params as { query?: string }).query;
|
|
83
|
+
if (recallQuery && typeof recallQuery === "string" && recallQuery.length > 5) {
|
|
84
|
+
try {
|
|
85
|
+
const recallVec = await state.embeddings.embed(recallQuery);
|
|
86
|
+
const sim = cosineSimilarity(session.lastQueryVec, recallVec);
|
|
87
|
+
if (sim > RECALL_SIMILARITY_THRESHOLD) {
|
|
88
|
+
return {
|
|
89
|
+
block: true,
|
|
90
|
+
blockReason:
|
|
91
|
+
`This recall query is ${(sim * 100).toFixed(0)}% similar to the context already retrieved this turn. ` +
|
|
92
|
+
"The results are in <graph_context> above. Read what you have. " +
|
|
93
|
+
"Only call recall with a DIFFERENT query targeting something specific not already covered.",
|
|
94
|
+
};
|
|
95
|
+
}
|
|
96
|
+
} catch { /* fail-open: allow recall if embedding fails */ }
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
|
|
57
100
|
// Planning gate: model must output text before first tool call
|
|
58
101
|
if (textLengthSoFar === 0 && toolIndex === 0) {
|
|
102
|
+
const retrievalNote = session.lastRetrievalSummary
|
|
103
|
+
? `\nContext already injected: ${session.lastRetrievalSummary}. Read <graph_context> before calling tools.`
|
|
104
|
+
: "";
|
|
59
105
|
return {
|
|
60
106
|
block: true,
|
|
61
107
|
blockReason:
|
|
62
108
|
"PLANNING GATE — You must announce your plan before making tool calls.\n" +
|
|
63
109
|
"1. Classify: LOOKUP (3 calls max), EDIT (4 max), REFACTOR (8 max)\n" +
|
|
64
|
-
"2. STATE WHAT YOU ALREADY KNOW from injected memory/context — if you have prior knowledge about these files, say so
|
|
110
|
+
"2. STATE WHAT YOU ALREADY KNOW from injected memory/context — if you have prior knowledge about these files, say so" +
|
|
111
|
+
retrievalNote + "\n" +
|
|
65
112
|
"3. List each planned call and what SPECIFIC GAP it fills that memory doesn't cover\n" +
|
|
66
113
|
"4. Every step still happens, but COMBINED. Edit + test in one bash call, not two.\n" +
|
|
67
114
|
"If injected context already answers the question, you may need ZERO tool calls.\n" +
|