kongbrain 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,348 @@
1
+ /**
2
+ * ISMAR-GENT Orchestration Layer
3
+ *
4
+ * Pre/post processing pipeline between user input and agent.prompt().
5
+ * Classifies intent, adapts agent configuration, records metrics.
6
+ * Target: <25ms for non-trivial prompts, <1ms for simple ones.
7
+ *
8
+ * Ported from kongbrain — takes EmbeddingService + SurrealStore as params
9
+ * instead of module-level singletons. Per-session state via SessionState.
10
+ */
11
+
12
+ import { classifyIntent, estimateComplexity } from "./intent.js";
13
+ import type { IntentResult, ComplexityEstimate, ThinkingLevel, IntentCategory } from "./intent.js";
14
+ import type { EmbeddingService } from "./embeddings.js";
15
+ import type { SurrealStore } from "./surreal.js";
16
+ import type { SessionState } from "./state.js";
17
+ import { getRecentUtilizationAvg } from "./retrieval-quality.js";
18
+ import { swallow } from "./errors.js";
19
+
20
+ // Detects inputs that reference memory/history
21
+ const MEMORY_REFERENCE_RE = /\b(we|our|yesterday|earlier|before|last time|prior|remember|recall|previous|discussed|decided|talked about|worked on|you said|you mentioned)\b/i;
22
+
23
+ // --- Types ---
24
+
25
+ export { type ThinkingLevel, type IntentCategory } from "./intent.js";
26
+
27
+ export interface AdaptiveConfig {
28
+ thinkingLevel: ThinkingLevel;
29
+ toolLimit: number;
30
+ tokenBudget: number;
31
+ retrievalShare?: number;
32
+ skipRetrieval?: boolean;
33
+ intent?: string;
34
+ vectorSearchLimits: {
35
+ turn: number;
36
+ identity: number;
37
+ concept: number;
38
+ memory: number;
39
+ artifact: number;
40
+ };
41
+ }
42
+
43
+ export interface PreflightResult {
44
+ intent: IntentResult;
45
+ complexity: ComplexityEstimate;
46
+ config: AdaptiveConfig;
47
+ preflightMs: number;
48
+ fastPath: boolean;
49
+ }
50
+
51
+ interface SteeringCandidate {
52
+ type: "runaway" | "budget_warning" | "scope_drift";
53
+ toolCall: number;
54
+ detail: string;
55
+ }
56
+
57
+ // --- Default config ---
58
+
59
+ export const DEFAULT_ADAPTIVE_CONFIG: AdaptiveConfig = {
60
+ thinkingLevel: "medium",
61
+ toolLimit: 15,
62
+ tokenBudget: 6000,
63
+ retrievalShare: 0.15,
64
+ vectorSearchLimits: { turn: 25, identity: 10, concept: 20, memory: 20, artifact: 10 },
65
+ };
66
+
67
+ // --- Intent → Config mapping ---
68
+
69
+ const INTENT_CONFIG: Record<IntentCategory, AdaptiveConfig> = {
70
+ "simple-question": {
71
+ thinkingLevel: "low",
72
+ toolLimit: 3,
73
+ tokenBudget: 4000,
74
+ retrievalShare: 0.10,
75
+ vectorSearchLimits: { turn: 15, identity: 5, concept: 12, memory: 12, artifact: 3 },
76
+ },
77
+ "code-read": {
78
+ thinkingLevel: "medium",
79
+ toolLimit: 5,
80
+ tokenBudget: 6000,
81
+ retrievalShare: 0.15,
82
+ vectorSearchLimits: { turn: 25, identity: 8, concept: 20, memory: 20, artifact: 10 },
83
+ },
84
+ "code-write": {
85
+ thinkingLevel: "high",
86
+ toolLimit: 8,
87
+ tokenBudget: 8000,
88
+ retrievalShare: 0.20,
89
+ vectorSearchLimits: { turn: 30, identity: 10, concept: 20, memory: 20, artifact: 15 },
90
+ },
91
+ "code-debug": {
92
+ thinkingLevel: "high",
93
+ toolLimit: 10,
94
+ tokenBudget: 8000,
95
+ retrievalShare: 0.20,
96
+ vectorSearchLimits: { turn: 30, identity: 8, concept: 20, memory: 25, artifact: 15 },
97
+ },
98
+ "deep-explore": {
99
+ thinkingLevel: "medium",
100
+ toolLimit: 15,
101
+ tokenBudget: 6000,
102
+ retrievalShare: 0.15,
103
+ vectorSearchLimits: { turn: 25, identity: 8, concept: 15, memory: 15, artifact: 8 },
104
+ },
105
+ "reference-prior": {
106
+ thinkingLevel: "medium",
107
+ toolLimit: 5,
108
+ tokenBudget: 10000,
109
+ retrievalShare: 0.25,
110
+ vectorSearchLimits: { turn: 40, identity: 10, concept: 25, memory: 30, artifact: 10 },
111
+ },
112
+ "meta-session": {
113
+ thinkingLevel: "low",
114
+ toolLimit: 2,
115
+ tokenBudget: 3000,
116
+ retrievalShare: 0.07,
117
+ skipRetrieval: false,
118
+ vectorSearchLimits: { turn: 8, identity: 5, concept: 5, memory: 8, artifact: 0 },
119
+ },
120
+ "multi-step": {
121
+ thinkingLevel: "high",
122
+ toolLimit: 12,
123
+ tokenBudget: 8000,
124
+ retrievalShare: 0.20,
125
+ vectorSearchLimits: { turn: 30, identity: 10, concept: 20, memory: 20, artifact: 15 },
126
+ },
127
+ "continuation": {
128
+ thinkingLevel: "low",
129
+ toolLimit: 8,
130
+ tokenBudget: 4000,
131
+ skipRetrieval: true,
132
+ vectorSearchLimits: { turn: 0, identity: 0, concept: 0, memory: 0, artifact: 0 },
133
+ },
134
+ "unknown": { ...DEFAULT_ADAPTIVE_CONFIG },
135
+ };
136
+
137
+ // --- Per-session orchestrator state ---
138
+
139
+ interface OrchestratorSessionState {
140
+ lastConfig: AdaptiveConfig;
141
+ turnIndex: number;
142
+ currentTurnTools: { name: string; args?: string }[];
143
+ steeringCandidates: SteeringCandidate[];
144
+ }
145
+
146
+ const sessionOrchState = new WeakMap<SessionState, OrchestratorSessionState>();
147
+
148
+ function getOrchState(session: SessionState): OrchestratorSessionState {
149
+ let state = sessionOrchState.get(session);
150
+ if (!state) {
151
+ state = {
152
+ lastConfig: { ...DEFAULT_ADAPTIVE_CONFIG },
153
+ turnIndex: 0,
154
+ currentTurnTools: [],
155
+ steeringCandidates: [],
156
+ };
157
+ sessionOrchState.set(session, state);
158
+ }
159
+ return state;
160
+ }
161
+
162
+ // --- Public API ---
163
+
164
+ export async function preflight(
165
+ input: string,
166
+ session: SessionState,
167
+ embeddings: EmbeddingService,
168
+ retrievalBudgetTokens = 42000,
169
+ ): Promise<PreflightResult> {
170
+ const start = performance.now();
171
+ const orch = getOrchState(session);
172
+ orch.turnIndex++;
173
+ orch.currentTurnTools = [];
174
+ orch.steeringCandidates = [];
175
+
176
+ // Fast path: trivial first-turn inputs
177
+ const isTrivial = orch.turnIndex <= 1 && input.length < 20 && !input.includes("?");
178
+ if (isTrivial) {
179
+ const config: AdaptiveConfig = {
180
+ thinkingLevel: "low", toolLimit: 15, tokenBudget: 300, skipRetrieval: true,
181
+ vectorSearchLimits: { turn: 0, identity: 0, concept: 0, memory: 0, artifact: 0 },
182
+ };
183
+ orch.lastConfig = config;
184
+ return {
185
+ intent: { category: "unknown", confidence: 0, scores: [] },
186
+ complexity: { level: "simple", estimatedToolCalls: 0, suggestedThinking: "low" },
187
+ config,
188
+ preflightMs: performance.now() - start,
189
+ fastPath: true,
190
+ };
191
+ }
192
+
193
+ // Non-first-turn short inputs → continuation
194
+ if (orch.turnIndex > 1 && input.length < 20 && !input.includes("?")) {
195
+ const inheritedLimit = Math.max(orch.lastConfig.toolLimit, 25);
196
+ const config: AdaptiveConfig = {
197
+ ...orch.lastConfig, toolLimit: inheritedLimit, skipRetrieval: true,
198
+ vectorSearchLimits: { turn: 0, identity: 0, concept: 0, memory: 0, artifact: 0 },
199
+ };
200
+ orch.lastConfig = config;
201
+ return {
202
+ intent: { category: "continuation", confidence: 0.9, scores: [] },
203
+ complexity: { level: "moderate", estimatedToolCalls: 15, suggestedThinking: "medium" },
204
+ config,
205
+ preflightMs: performance.now() - start,
206
+ fastPath: true,
207
+ };
208
+ }
209
+
210
+ // Full classification
211
+ const intent = await classifyIntent(input, embeddings);
212
+ const complexity = estimateComplexity(input, intent);
213
+
214
+ const LOW_CONFIDENCE_CONFIG: AdaptiveConfig = {
215
+ thinkingLevel: "low", toolLimit: 15, tokenBudget: 3000, retrievalShare: 0.08,
216
+ vectorSearchLimits: { turn: 12, identity: 5, concept: 8, memory: 12, artifact: 3 },
217
+ };
218
+
219
+ let config: AdaptiveConfig;
220
+ if (intent.category === "continuation") {
221
+ config = { ...orch.lastConfig };
222
+ config.toolLimit = Math.max(config.toolLimit, 15);
223
+ } else if (intent.confidence < 0.40) {
224
+ config = { ...LOW_CONFIDENCE_CONFIG };
225
+ } else {
226
+ config = { ...(INTENT_CONFIG[intent.category] ?? DEFAULT_ADAPTIVE_CONFIG) };
227
+ }
228
+ config.intent = intent.category;
229
+
230
+ // Gate retrieval for trivial intents (unless memory-referencing)
231
+ if (
232
+ (intent.category === "simple-question" || intent.category === "meta-session") &&
233
+ intent.confidence >= 0.70 &&
234
+ !MEMORY_REFERENCE_RE.test(input)
235
+ ) {
236
+ config.skipRetrieval = true;
237
+ config.vectorSearchLimits = { turn: 0, identity: 0, concept: 0, memory: 0, artifact: 0 };
238
+ }
239
+
240
+ // Derive tokenBudget from retrieval budget
241
+ if (config.retrievalShare != null && config.retrievalShare > 0) {
242
+ config.tokenBudget = Math.round(retrievalBudgetTokens * config.retrievalShare);
243
+ }
244
+
245
+ // Override thinking if complexity demands it
246
+ if (complexity.suggestedThinking === "high" && config.thinkingLevel !== "high") {
247
+ config.thinkingLevel = "high";
248
+ }
249
+
250
+ // Override tool limit from complexity estimate (capped at 1.5x, max 20)
251
+ if (complexity.estimatedToolCalls > config.toolLimit) {
252
+ config.toolLimit = Math.min(complexity.estimatedToolCalls, Math.ceil(config.toolLimit * 1.5), 20);
253
+ }
254
+
255
+ // Adaptive token budget from rolling retrieval quality
256
+ if (!config.skipRetrieval) {
257
+ const recentUtil = await getRecentUtilizationAvg(session.sessionId, 10).catch(() => null);
258
+ if (recentUtil !== null) {
259
+ const scale = Math.max(0.5, Math.min(1.3, 0.5 + recentUtil * 0.8));
260
+ config.tokenBudget = Math.round(config.tokenBudget * scale);
261
+ }
262
+ }
263
+
264
+ orch.lastConfig = config;
265
+ return {
266
+ intent,
267
+ complexity,
268
+ config,
269
+ preflightMs: performance.now() - start,
270
+ fastPath: false,
271
+ };
272
+ }
273
+
274
+ /** Record a tool call for steering analysis. */
275
+ export function recordToolCall(session: SessionState, name: string, args?: string): void {
276
+ const orch = getOrchState(session);
277
+ orch.currentTurnTools.push({ name, args });
278
+
279
+ if (orch.currentTurnTools.length >= 5) {
280
+ const last5 = orch.currentTurnTools.slice(-5);
281
+ if (last5.every((t) => t.name === last5[0].name)) {
282
+ orch.steeringCandidates.push({
283
+ type: "runaway",
284
+ toolCall: orch.currentTurnTools.length,
285
+ detail: `${last5[0].name} called 5+ times consecutively`,
286
+ });
287
+ }
288
+ }
289
+
290
+ const budgetWarnAt = Math.floor(orch.lastConfig.toolLimit * 0.85);
291
+ if (orch.lastConfig.toolLimit !== Infinity && orch.currentTurnTools.length >= budgetWarnAt) {
292
+ orch.steeringCandidates.push({
293
+ type: "budget_warning",
294
+ toolCall: orch.currentTurnTools.length,
295
+ detail: `${orch.currentTurnTools.length}/${orch.lastConfig.toolLimit} tool calls used`,
296
+ });
297
+ }
298
+ }
299
+
300
+ /** Record metrics to SurrealDB (non-blocking). */
301
+ export async function postflight(
302
+ input: string,
303
+ result: PreflightResult,
304
+ actualToolCalls: number,
305
+ actualTokensIn: number,
306
+ actualTokensOut: number,
307
+ turnDurationMs: number,
308
+ session: SessionState,
309
+ store: SurrealStore,
310
+ ): Promise<void> {
311
+ const orch = getOrchState(session);
312
+ try {
313
+ if (!store.isAvailable()) return;
314
+ await store.queryExec(`CREATE orchestrator_metrics CONTENT $data`, {
315
+ data: {
316
+ session_id: session.sessionId,
317
+ turn_index: orch.turnIndex,
318
+ input_length: input.length,
319
+ intent: result.intent.category,
320
+ intent_confidence: result.intent.confidence,
321
+ complexity: result.complexity.level,
322
+ thinking_level: result.config.thinkingLevel,
323
+ tool_limit: result.config.toolLimit === Infinity ? -1 : result.config.toolLimit,
324
+ token_budget: result.config.tokenBudget,
325
+ actual_tool_calls: actualToolCalls,
326
+ actual_tokens_in: actualTokensIn,
327
+ actual_tokens_out: actualTokensOut,
328
+ preflight_ms: result.preflightMs,
329
+ turn_duration_ms: turnDurationMs,
330
+ steering_candidates: orch.steeringCandidates.length,
331
+ steering_details: orch.steeringCandidates.length > 0
332
+ ? orch.steeringCandidates.map((c) => `${c.type}: ${c.detail}`).join("; ")
333
+ : undefined,
334
+ fast_path: result.fastPath,
335
+ },
336
+ });
337
+ } catch (e) {
338
+ swallow("orchestrator:postflight", e);
339
+ }
340
+ }
341
+
342
+ export function getLastPreflightConfig(session: SessionState): AdaptiveConfig {
343
+ return getOrchState(session).lastConfig;
344
+ }
345
+
346
+ export function getSteeringCandidates(session: SessionState): SteeringCandidate[] {
347
+ return getOrchState(session).steeringCandidates;
348
+ }
@@ -0,0 +1,200 @@
1
+ /**
2
+ * Predictive Context Prefetching — Phase 7d
3
+ *
4
+ * After preflight classifies intent, predict 2-4 follow-up queries and fire
5
+ * vector searches in the background. Results are cached in an LRU with 5-min TTL.
6
+ *
7
+ * Ported from kongbrain — takes SurrealStore/EmbeddingService as params.
8
+ */
9
+
10
+ import type { EmbeddingService } from "./embeddings.js";
11
+ import type { SurrealStore, VectorSearchResult } from "./surreal.js";
12
+ import { findRelevantSkills, type Skill } from "./skills.js";
13
+ import { retrieveReflections, type Reflection } from "./reflection.js";
14
+ import type { IntentCategory } from "./intent.js";
15
+ import { swallow } from "./errors.js";
16
+
17
+ // --- Types ---
18
+
19
+ interface CacheEntry {
20
+ queryVec: number[];
21
+ results: VectorSearchResult[];
22
+ skills: Skill[];
23
+ reflections: Reflection[];
24
+ timestamp: number;
25
+ }
26
+
27
+ // --- LRU Cache ---
28
+
29
+ const CACHE_TTL_MS = 5 * 60 * 1000;
30
+ const MAX_CACHE_SIZE = 10;
31
+ const CACHE_HIT_THRESHOLD = 0.85;
32
+
33
+ const warmCache = new Map<string, CacheEntry>();
34
+
35
+ // --- Hit rate telemetry ---
36
+ let _prefetchHits = 0;
37
+ let _prefetchMisses = 0;
38
+
39
+ export function recordPrefetchHit(): void { _prefetchHits++; }
40
+ export function recordPrefetchMiss(): void { _prefetchMisses++; }
41
+
42
+ export function getPrefetchHitRate(): { hits: number; misses: number; attempts: number; hitRate: number } {
43
+ const attempts = _prefetchHits + _prefetchMisses;
44
+ return { hits: _prefetchHits, misses: _prefetchMisses, attempts, hitRate: attempts > 0 ? _prefetchHits / attempts : 0 };
45
+ }
46
+
47
+ function evictStale(): void {
48
+ const now = Date.now();
49
+ for (const [key, entry] of warmCache) {
50
+ if (now - entry.timestamp > CACHE_TTL_MS) warmCache.delete(key);
51
+ }
52
+ while (warmCache.size > MAX_CACHE_SIZE) {
53
+ const oldest = warmCache.keys().next().value;
54
+ if (oldest) warmCache.delete(oldest);
55
+ }
56
+ }
57
+
58
+ // --- Query Prediction ---
59
+
60
+ export function predictQueries(input: string, intent: IntentCategory): string[] {
61
+ const queries: string[] = [];
62
+
63
+ const filePaths = input.match(/[\w./\\-]+\.\w{1,10}/g) ?? [];
64
+ for (const fp of filePaths.slice(0, 2)) queries.push(fp);
65
+
66
+ const quoted = input.match(/[`"']([^`"']{3,60})[`"']/g) ?? [];
67
+ for (const q of quoted.slice(0, 2)) queries.push(q.replace(/[`"']/g, ""));
68
+
69
+ switch (intent) {
70
+ case "code-debug":
71
+ queries.push(`error ${extractKeyTerms(input)}`);
72
+ queries.push(`fix ${extractKeyTerms(input)}`);
73
+ break;
74
+ case "code-write":
75
+ queries.push(`implementation pattern ${extractKeyTerms(input)}`);
76
+ queries.push(`test ${extractKeyTerms(input)}`);
77
+ break;
78
+ case "code-read":
79
+ queries.push(`architecture ${extractKeyTerms(input)}`);
80
+ break;
81
+ case "multi-step":
82
+ queries.push(`procedure ${extractKeyTerms(input)}`);
83
+ queries.push(`workflow ${extractKeyTerms(input)}`);
84
+ break;
85
+ case "reference-prior":
86
+ queries.push(extractKeyTerms(input));
87
+ break;
88
+ default:
89
+ break;
90
+ }
91
+
92
+ return [...new Set(queries.filter((q) => q.length > 3))].slice(0, 4);
93
+ }
94
+
95
+ function extractKeyTerms(input: string): string {
96
+ const STOP = new Set(["the", "and", "for", "with", "from", "this", "that", "have", "will", "can", "not", "are", "was", "but"]);
97
+ return input.split(/\s+/)
98
+ .filter((w) => w.length >= 3 && !STOP.has(w.toLowerCase()))
99
+ .slice(0, 6)
100
+ .join(" ");
101
+ }
102
+
103
+ // --- Prefetching ---
104
+
105
+ export async function prefetchContext(
106
+ queries: string[],
107
+ sessionId: string,
108
+ embeddings: EmbeddingService,
109
+ store: SurrealStore,
110
+ ): Promise<void> {
111
+ if (!embeddings.isAvailable() || !store.isAvailable()) return;
112
+ if (queries.length === 0) return;
113
+
114
+ evictStale();
115
+
116
+ for (const query of queries) {
117
+ try {
118
+ const queryVec = await embeddings.embed(query);
119
+
120
+ const results = await store.vectorSearch(queryVec, sessionId, {
121
+ turn: 5, identity: 2, concept: 3, memory: 3, artifact: 2,
122
+ });
123
+
124
+ const topIds = results
125
+ .sort((a, b) => (b.score ?? 0) - (a.score ?? 0))
126
+ .slice(0, 5)
127
+ .map((r) => r.id);
128
+
129
+ let neighbors: VectorSearchResult[] = [];
130
+ if (topIds.length > 0) {
131
+ try {
132
+ const expanded = await store.graphExpand(topIds, queryVec);
133
+ const existingIds = new Set(results.map((r) => r.id));
134
+ neighbors = expanded.filter((n) => !existingIds.has(n.id));
135
+ } catch (e) { swallow("prefetch:graphExpand", e); }
136
+ }
137
+
138
+ const [skills, reflections] = await Promise.all([
139
+ findRelevantSkills(queryVec, 2, store).catch(() => [] as Skill[]),
140
+ retrieveReflections(queryVec, 2, store).catch(() => [] as Reflection[]),
141
+ ]);
142
+
143
+ warmCache.set(query, {
144
+ queryVec,
145
+ results: [...results, ...neighbors],
146
+ skills,
147
+ reflections,
148
+ timestamp: Date.now(),
149
+ });
150
+ } catch (e) {
151
+ swallow("prefetch:query", e);
152
+ }
153
+ }
154
+ }
155
+
156
+ // --- Cache Lookup ---
157
+
158
+ function cosineSimilarity(a: number[], b: number[]): number {
159
+ if (a.length !== b.length || a.length === 0) return 0;
160
+ let dot = 0, normA = 0, normB = 0;
161
+ for (let i = 0; i < a.length; i++) {
162
+ dot += a[i] * b[i];
163
+ normA += a[i] * a[i];
164
+ normB += b[i] * b[i];
165
+ }
166
+ const denom = Math.sqrt(normA) * Math.sqrt(normB);
167
+ return denom > 0 ? dot / denom : 0;
168
+ }
169
+
170
+ export interface CachedContext {
171
+ results: VectorSearchResult[];
172
+ skills: Skill[];
173
+ reflections: Reflection[];
174
+ }
175
+
176
+ export function getCachedContext(queryVec: number[]): CachedContext | null {
177
+ evictStale();
178
+
179
+ let bestMatch: CacheEntry | null = null;
180
+ let bestSim = 0;
181
+
182
+ for (const [, entry] of warmCache) {
183
+ const sim = cosineSimilarity(queryVec, entry.queryVec);
184
+ if (sim > bestSim) { bestSim = sim; bestMatch = entry; }
185
+ }
186
+
187
+ if (bestMatch && bestSim >= CACHE_HIT_THRESHOLD) {
188
+ return { results: bestMatch.results, skills: bestMatch.skills, reflections: bestMatch.reflections };
189
+ }
190
+ return null;
191
+ }
192
+
193
+ export function getPrefetchStats(): { entries: number; maxSize: number } {
194
+ evictStale();
195
+ return { entries: warmCache.size, maxSize: MAX_CACHE_SIZE };
196
+ }
197
+
198
+ export function clearPrefetchCache(): void {
199
+ warmCache.clear();
200
+ }