@stackbilt/aegis-core 0.6.2 → 0.6.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +4 -2
- package/src/kernel/dispatch.ts +11 -31
- package/src/kernel/executor-router.ts +95 -0
- package/src/kernel/executors/groq.ts +10 -9
- package/src/kernel/executors/index.ts +24 -7
- package/src/kernel/executors/workers-ai.ts +197 -54
- package/src/kernel/provider-factory.ts +36 -0
- package/src/routes/observability.ts +125 -0
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@stackbilt/aegis-core",
|
|
3
|
-
"version": "0.6.
|
|
3
|
+
"version": "0.6.3",
|
|
4
4
|
"description": "Persistent AI agent framework for Cloudflare Workers. Multi-tier memory, autonomous goals, dreaming cycles, MCP native.",
|
|
5
5
|
"license": "Apache-2.0",
|
|
6
6
|
"publishConfig": {
|
|
@@ -33,6 +33,8 @@
|
|
|
33
33
|
"./kernel/argus-correlation": "./src/kernel/argus-correlation.ts",
|
|
34
34
|
"./kernel/port": "./src/kernel/port.ts",
|
|
35
35
|
"./kernel/executor-port": "./src/kernel/executor-port.ts",
|
|
36
|
+
"./kernel/executor-router": "./src/kernel/executor-router.ts",
|
|
37
|
+
"./kernel/provider-factory": "./src/kernel/provider-factory.ts",
|
|
36
38
|
"./kernel/executors": "./src/kernel/executors/index.ts",
|
|
37
39
|
"./kernel/scheduled": "./src/kernel/scheduled/index.ts",
|
|
38
40
|
"./kernel/scheduled/dreaming": "./src/kernel/scheduled/dreaming.ts",
|
|
@@ -84,7 +86,7 @@
|
|
|
84
86
|
"@cloudflare/voice": "^0.1.3",
|
|
85
87
|
"@cloudflare/workers-oauth-provider": "^0.2.4",
|
|
86
88
|
"@stackbilt/contracts": "^0.2.1",
|
|
87
|
-
"@stackbilt/llm-providers": "^1.6.
|
|
89
|
+
"@stackbilt/llm-providers": "^1.6.4",
|
|
88
90
|
"agents": "^0.12.3",
|
|
89
91
|
"hono": "^4.12.12",
|
|
90
92
|
"zod": "^4.4.3"
|
package/src/kernel/dispatch.ts
CHANGED
|
@@ -8,17 +8,14 @@ import { executeComposite } from '../composite.js';
|
|
|
8
8
|
import { buildGroqSystemPrompt } from '../operator/prompt-builder.js';
|
|
9
9
|
import type { KernelIntent, DispatchResult, Executor } from './types.js';
|
|
10
10
|
import {
|
|
11
|
-
executeClaude,
|
|
12
|
-
executeClaudeOpus,
|
|
13
|
-
executeClaudeStream,
|
|
14
|
-
executeGroq,
|
|
15
|
-
executeWorkersAi,
|
|
16
11
|
executeGptOss,
|
|
12
|
+
executeClaudeStream,
|
|
17
13
|
executeDirect,
|
|
18
14
|
executeCodeTask,
|
|
19
15
|
executeWithAnthropicFailover,
|
|
20
16
|
executeTarotScript,
|
|
21
17
|
buildMcpRegistry,
|
|
18
|
+
EXECUTOR_FNS,
|
|
22
19
|
} from './executors/index.js';
|
|
23
20
|
// ─── Edge Environment ────────────────────────────────────────
|
|
24
21
|
|
|
@@ -366,15 +363,6 @@ async function probeAndExecute(
|
|
|
366
363
|
case 'composite':
|
|
367
364
|
result = await executeComposite(intent, env, buildMcpRegistry(env));
|
|
368
365
|
break;
|
|
369
|
-
case 'gpt_oss':
|
|
370
|
-
result = await executeGptOss(intent, env);
|
|
371
|
-
break;
|
|
372
|
-
case 'workers_ai':
|
|
373
|
-
result = await executeWorkersAi(intent, env);
|
|
374
|
-
break;
|
|
375
|
-
case 'groq':
|
|
376
|
-
result = await executeGroq(intent, env);
|
|
377
|
-
break;
|
|
378
366
|
case 'direct':
|
|
379
367
|
result = await executeDirect(intent, env);
|
|
380
368
|
break;
|
|
@@ -384,8 +372,11 @@ async function probeAndExecute(
|
|
|
384
372
|
case 'tarotscript':
|
|
385
373
|
result = await executeTarotScript(intent, env);
|
|
386
374
|
break;
|
|
387
|
-
default:
|
|
388
|
-
|
|
375
|
+
default: {
|
|
376
|
+
const fn = EXECUTOR_FNS[plan.executor as Executor];
|
|
377
|
+
if (!fn) throw new Error(`Unknown executor: ${plan.executor}`);
|
|
378
|
+
result = await fn(intent, env);
|
|
379
|
+
}
|
|
389
380
|
}
|
|
390
381
|
|
|
391
382
|
// For streaming non-Claude executors, emit full text as single delta
|
|
@@ -464,21 +455,10 @@ async function tryShadowExploration(
|
|
|
464
455
|
try {
|
|
465
456
|
// Clone intent to avoid mutation
|
|
466
457
|
const shadowIntent: KernelIntent = { ...intent, classified: shadowExecutor };
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
result = await executeGptOss(shadowIntent, env);
|
|
472
|
-
break;
|
|
473
|
-
case 'workers_ai':
|
|
474
|
-
result = await executeWorkersAi(shadowIntent, env);
|
|
475
|
-
break;
|
|
476
|
-
case 'claude':
|
|
477
|
-
result = await executeClaude(shadowIntent, env);
|
|
478
|
-
break;
|
|
479
|
-
default:
|
|
480
|
-
return;
|
|
481
|
-
}
|
|
458
|
+
|
|
459
|
+
const fn = EXECUTOR_FNS[shadowExecutor];
|
|
460
|
+
if (!fn) return;
|
|
461
|
+
const result = await fn(shadowIntent, env);
|
|
482
462
|
|
|
483
463
|
const passed = shadowQualityPass(primaryText, result.text);
|
|
484
464
|
const outcome = passed ? 'success' : 'failure';
|
|
@@ -0,0 +1,95 @@
|
|
|
1
|
+
import type { EdgeEnv } from './dispatch.js';
|
|
2
|
+
import type { Executor } from './types.js';
|
|
3
|
+
|
|
4
|
+
// ─── Provider Names ──────────────────────────────────────────
|
|
5
|
+
// 'anthropic' and 'cloudflare' are wired in @stackbilt/llm-providers v1.6.0.
|
|
6
|
+
// 'groq' and 'cerebras' are forward-declared — no LLMProviderFactory entry yet.
|
|
7
|
+
// A future session can wire them when provider support lands.
|
|
8
|
+
export type LLMProviderName = 'anthropic' | 'cloudflare' | 'groq' | 'cerebras';
|
|
9
|
+
|
|
10
|
+
// ─── LLM Executor Subset ─────────────────────────────────────
|
|
11
|
+
// These are the executors that call an external LLM provider.
|
|
12
|
+
// Excluded from EXECUTOR_ROUTES (dispatch keeps its own branches):
|
|
13
|
+
// 'direct' — returns a rule-based response without an LLM call
|
|
14
|
+
// 'claude_code' — spins a Claude Code CLI session, not a provider call
|
|
15
|
+
// 'tarotscript' — service-binding fetcher, not an LLM call
|
|
16
|
+
// 'composite' — orchestrates multiple executors; no single provider entry
|
|
17
|
+
export type LLMExecutor = Extract<
|
|
18
|
+
Executor,
|
|
19
|
+
'claude' | 'claude_opus' | 'gpt_oss' | 'workers_ai' | 'groq' | 'cerebras_mid' | 'cerebras_reasoning'
|
|
20
|
+
>;
|
|
21
|
+
|
|
22
|
+
// ─── Route Shape ─────────────────────────────────────────────
|
|
23
|
+
|
|
24
|
+
export interface ExecutorRoute {
|
|
25
|
+
provider: LLMProviderName;
|
|
26
|
+
// Resolves the concrete model string at dispatch time — called with the live
|
|
27
|
+
// EdgeEnv so per-deployment env-var overrides and AI Gateway config are respected.
|
|
28
|
+
model: (env: EdgeEnv) => string;
|
|
29
|
+
// Semantic fallback executor to try when this provider errors (credit, rate-limit, auth).
|
|
30
|
+
// CONSUMER CONTRACT: when a fallback fires, the consumer must propagate actualExecutor
|
|
31
|
+
// back to the telemetry layer. executeWithAnthropicFailover (executors/index.ts:67)
|
|
32
|
+
// returns { actualExecutor } which dispatch.ts:363 uses to mutate plan.executor before
|
|
33
|
+
// the procedure store records the outcome. A routing-layer consumer must preserve this.
|
|
34
|
+
fallback?: LLMExecutor;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
// ─── Route Table ─────────────────────────────────────────────
|
|
38
|
+
// Covers every LLMExecutor. Non-LLM executors (see above) are intentionally absent.
|
|
39
|
+
//
|
|
40
|
+
// Future consumer sketch (D.2 wiring session):
|
|
41
|
+
// const route = EXECUTOR_ROUTES[plan.executor as LLMExecutor];
|
|
42
|
+
// const provider = factory.get(route.provider); // only 'anthropic'|'cloudflare' today
|
|
43
|
+
// const model = route.model(env);
|
|
44
|
+
// try { result = await provider.generateResponse({ model, messages }); }
|
|
45
|
+
// catch { if (route.fallback) { /* re-dispatch, record actualExecutor */ } }
|
|
46
|
+
|
|
47
|
+
export const EXECUTOR_ROUTES: Record<LLMExecutor, ExecutorRoute> = {
|
|
48
|
+
claude: {
|
|
49
|
+
provider: 'anthropic',
|
|
50
|
+
model: (env) => env.claudeModel,
|
|
51
|
+
fallback: 'gpt_oss',
|
|
52
|
+
},
|
|
53
|
+
claude_opus: {
|
|
54
|
+
provider: 'anthropic',
|
|
55
|
+
model: (env) => env.opusModel,
|
|
56
|
+
// Falls back directly to gpt_oss — mirrors executeWithAnthropicFailover behavior.
|
|
57
|
+
// A two-hop chain (opus → claude → gpt_oss) is a possible future refinement.
|
|
58
|
+
fallback: 'gpt_oss',
|
|
59
|
+
},
|
|
60
|
+
gpt_oss: {
|
|
61
|
+
provider: 'cloudflare',
|
|
62
|
+
model: (env) => env.gptOssModel,
|
|
63
|
+
// Terminal fallback — no further fallback defined.
|
|
64
|
+
},
|
|
65
|
+
workers_ai: {
|
|
66
|
+
provider: 'cloudflare',
|
|
67
|
+
// Hardcoded in executeWorkersAi today; no env override.
|
|
68
|
+
model: () => '@cf/meta/llama-3.3-70b-instruct-fp8-fast',
|
|
69
|
+
},
|
|
70
|
+
groq: {
|
|
71
|
+
provider: 'groq',
|
|
72
|
+
// groqResponseModel = 8B (llama-3.1-8b-instant) — fast/cheap for greetings.
|
|
73
|
+
// Intentionally NOT groqModel (70B). See executors/groq.ts:12.
|
|
74
|
+
model: (env) => env.groqResponseModel,
|
|
75
|
+
},
|
|
76
|
+
cerebras_mid: {
|
|
77
|
+
// TODO: EdgeEnv has no cerebras fields yet. Add cerebrasApiKey + cerebrasModel
|
|
78
|
+
// when executors/cerebras.ts lands. Model name below is a placeholder.
|
|
79
|
+
provider: 'cerebras',
|
|
80
|
+
model: () => 'llama3.1-8b',
|
|
81
|
+
},
|
|
82
|
+
cerebras_reasoning: {
|
|
83
|
+
// TODO: EdgeEnv has no cerebras fields yet. Add cerebrasApiKey + cerebrasReasoningModel
|
|
84
|
+
// when executors/cerebras.ts lands. Model name below is a placeholder.
|
|
85
|
+
provider: 'cerebras',
|
|
86
|
+
model: () => 'qwen-3-32b',
|
|
87
|
+
},
|
|
88
|
+
};
|
|
89
|
+
|
|
90
|
+
// ─── Lookup Helper ────────────────────────────────────────────
|
|
91
|
+
// Returns null for non-LLM executors (direct, claude_code, tarotscript, composite).
|
|
92
|
+
// Dispatch uses the null path to keep its own branches for those cases.
|
|
93
|
+
export function getExecutorRoute(executor: Executor): ExecutorRoute | null {
|
|
94
|
+
return (EXECUTOR_ROUTES as Record<string, ExecutorRoute>)[executor] ?? null;
|
|
95
|
+
}
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { buildLLMProviderFactory } from '../provider-factory.js';
|
|
2
2
|
import { buildGroqSystemPrompt } from '../../operator/prompt-builder.js';
|
|
3
3
|
import type { KernelIntent } from '../types.js';
|
|
4
4
|
import type { EdgeEnv } from '../dispatch.js';
|
|
@@ -7,12 +7,13 @@ export async function executeGroq(
|
|
|
7
7
|
intent: KernelIntent,
|
|
8
8
|
env: EdgeEnv,
|
|
9
9
|
): Promise<{ text: string; cost: number }> {
|
|
10
|
-
const
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
10
|
+
const factory = buildLLMProviderFactory(env);
|
|
11
|
+
const result = await factory.generateResponse({
|
|
12
|
+
messages: [{ role: 'user', content: intent.raw }],
|
|
13
|
+
model: env.groqResponseModel, // 8B — fast/cheap for greetings
|
|
14
|
+
systemPrompt: buildGroqSystemPrompt(),
|
|
15
|
+
temperature: 0.3,
|
|
16
|
+
maxTokens: 500,
|
|
17
|
+
});
|
|
18
|
+
return { text: result.message || '(no response)', cost: result.usage.cost };
|
|
18
19
|
}
|
|
@@ -2,14 +2,31 @@ import { McpClient, McpRegistry } from '../../mcp-client.js';
|
|
|
2
2
|
import { operatorConfig } from '../../operator/index.js';
|
|
3
3
|
import type { Executor } from '../types.js';
|
|
4
4
|
import type { EdgeEnv } from '../dispatch.js';
|
|
5
|
-
import {
|
|
5
|
+
import type { KernelIntent } from '../types.js';
|
|
6
6
|
|
|
7
|
-
//
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
7
|
+
// Import then re-export so EXECUTOR_FNS can hold live references
|
|
8
|
+
import { executeClaude, executeClaudeOpus, executeClaudeStream } from './claude.js';
|
|
9
|
+
import { executeGroq } from './groq.js';
|
|
10
|
+
import { executeWorkersAi, executeGptOss } from './workers-ai.js';
|
|
11
|
+
import { executeDirect, executeCodeTask } from './direct.js';
|
|
12
|
+
import { executeTarotScript } from './tarotscript.js';
|
|
13
|
+
export { executeClaude, executeClaudeOpus, executeClaudeStream };
|
|
14
|
+
export { executeGroq };
|
|
15
|
+
export { executeWorkersAi, executeGptOss };
|
|
16
|
+
export { executeDirect, executeCodeTask };
|
|
17
|
+
export { executeTarotScript };
|
|
18
|
+
|
|
19
|
+
// ─── Uniform Executor Dispatch Map ──────────────────────────
|
|
20
|
+
// Executors that share the (intent, env) → {text, cost} signature.
|
|
21
|
+
// Used by dispatch to drive simple cases from the route table,
|
|
22
|
+
// eliminating per-executor switch branches for groq/workers_ai/gpt_oss.
|
|
23
|
+
// claude is included for the shadow exploration path (no failover there).
|
|
24
|
+
export const EXECUTOR_FNS: Partial<Record<Executor, (intent: KernelIntent, env: EdgeEnv) => Promise<{ text: string; cost: number }>>> = {
|
|
25
|
+
groq: executeGroq,
|
|
26
|
+
workers_ai: executeWorkersAi,
|
|
27
|
+
gpt_oss: executeGptOss,
|
|
28
|
+
claude: executeClaude,
|
|
29
|
+
};
|
|
13
30
|
|
|
14
31
|
// ─── MCP Registry ────────────────────────────────────────────
|
|
15
32
|
|
|
@@ -1,54 +1,197 @@
|
|
|
1
|
-
import {
|
|
2
|
-
import { McpClient } from '../../mcp-client.js';
|
|
3
|
-
import { operatorConfig } from '../../operator/index.js';
|
|
4
|
-
import { buildGroqSystemPrompt } from '../../operator/prompt-builder.js';
|
|
5
|
-
import
|
|
6
|
-
import
|
|
7
|
-
import {
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
intent.
|
|
53
|
-
|
|
54
|
-
|
|
1
|
+
import type { LLMMessage, ToolResult as LLMToolResult } from '@stackbilt/llm-providers';
|
|
2
|
+
import { McpClient } from '../../mcp-client.js';
|
|
3
|
+
import { operatorConfig } from '../../operator/index.js';
|
|
4
|
+
import { buildGroqSystemPrompt } from '../../operator/prompt-builder.js';
|
|
5
|
+
import { buildContext, handleInProcessTool, callMcpWithRetry, resolveMcpTool } from '../../claude.js';
|
|
6
|
+
import { toOpenAiTools } from '../../workers-ai-chat.js';
|
|
7
|
+
import { getConversationHistory, budgetConversationHistory } from '../memory/index.js';
|
|
8
|
+
import { buildLLMProviderFactory } from '../provider-factory.js';
|
|
9
|
+
import type { KernelIntent } from '../types.js';
|
|
10
|
+
import type { EdgeEnv } from '../dispatch.js';
|
|
11
|
+
import { buildMcpRegistry } from './index.js';
|
|
12
|
+
|
|
13
|
+
export async function executeWorkersAi(
|
|
14
|
+
intent: KernelIntent,
|
|
15
|
+
env: EdgeEnv,
|
|
16
|
+
): Promise<{ text: string; cost: number }> {
|
|
17
|
+
if (!env.ai) throw new Error('Workers AI binding not available');
|
|
18
|
+
const factory = buildLLMProviderFactory(env);
|
|
19
|
+
const result = await factory.generateResponse({
|
|
20
|
+
messages: [{ role: 'user', content: intent.raw }],
|
|
21
|
+
model: '@cf/meta/llama-3.3-70b-instruct-fp8-fast',
|
|
22
|
+
systemPrompt: buildGroqSystemPrompt(),
|
|
23
|
+
});
|
|
24
|
+
return { text: result.message || '(no response)', cost: result.usage.cost };
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
const GPT_OSS_TOOL_ROUNDS = 8; // 10 max − 2 reserved for summary
|
|
28
|
+
|
|
29
|
+
export async function executeGptOss(
|
|
30
|
+
intent: KernelIntent,
|
|
31
|
+
env: EdgeEnv,
|
|
32
|
+
): Promise<{ text: string; cost: number }> {
|
|
33
|
+
if (!env.ai) throw new Error('Workers AI binding not available');
|
|
34
|
+
|
|
35
|
+
const factory = buildLLMProviderFactory(env);
|
|
36
|
+
const registry = buildMcpRegistry(env);
|
|
37
|
+
const mcpClient = new McpClient({
|
|
38
|
+
url: operatorConfig.integrations.bizops.fallbackUrl,
|
|
39
|
+
token: env.bizopsToken,
|
|
40
|
+
prefix: 'bizops',
|
|
41
|
+
fetcher: env.bizopsFetcher,
|
|
42
|
+
rpcPath: '/rpc',
|
|
43
|
+
});
|
|
44
|
+
|
|
45
|
+
const pseudoConfig = {
|
|
46
|
+
apiKey: '',
|
|
47
|
+
model: env.gptOssModel,
|
|
48
|
+
mcpClient,
|
|
49
|
+
mcpRegistry: registry,
|
|
50
|
+
db: env.db,
|
|
51
|
+
channel: 'web',
|
|
52
|
+
conversationId: intent.source.threadId,
|
|
53
|
+
githubToken: env.githubToken,
|
|
54
|
+
githubRepo: env.githubRepo,
|
|
55
|
+
braveApiKey: env.braveApiKey,
|
|
56
|
+
userQuery: intent.raw,
|
|
57
|
+
};
|
|
58
|
+
const { systemPrompt, tools: anthropicTools } = await buildContext(pseudoConfig);
|
|
59
|
+
// toOpenAiTools output matches factory Tool shape exactly
|
|
60
|
+
const tools = toOpenAiTools(anthropicTools) as Parameters<typeof factory.generateResponse>[0]['tools'];
|
|
61
|
+
|
|
62
|
+
const history = intent.source.threadId
|
|
63
|
+
? await getConversationHistory(env.db, intent.source.threadId, 10)
|
|
64
|
+
: [];
|
|
65
|
+
const priorHistory = history.length > 0 && history[history.length - 1]?.role === 'user'
|
|
66
|
+
? history.slice(0, -1)
|
|
67
|
+
: history;
|
|
68
|
+
|
|
69
|
+
const messages: LLMMessage[] = [
|
|
70
|
+
{ role: 'system', content: systemPrompt },
|
|
71
|
+
...budgetConversationHistory(priorHistory).map(m => ({
|
|
72
|
+
role: m.role as 'user' | 'assistant',
|
|
73
|
+
content: m.content,
|
|
74
|
+
})),
|
|
75
|
+
{ role: 'user', content: intent.raw },
|
|
76
|
+
];
|
|
77
|
+
|
|
78
|
+
let totalCost = 0;
|
|
79
|
+
|
|
80
|
+
// Phase 1: tool-calling rounds
|
|
81
|
+
for (let round = 0; round < GPT_OSS_TOOL_ROUNDS; round++) {
|
|
82
|
+
const result = await factory.generateResponse({
|
|
83
|
+
messages,
|
|
84
|
+
model: env.gptOssModel,
|
|
85
|
+
tools,
|
|
86
|
+
maxTokens: 4096,
|
|
87
|
+
temperature: 0.2,
|
|
88
|
+
topP: 0.9,
|
|
89
|
+
frequencyPenalty: 0.3,
|
|
90
|
+
});
|
|
91
|
+
totalCost += result.usage.cost;
|
|
92
|
+
|
|
93
|
+
if (!result.toolCalls || result.toolCalls.length === 0) {
|
|
94
|
+
return { text: result.message || '(no response)', cost: totalCost };
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
const toolResults: LLMToolResult[] = [];
|
|
98
|
+
for (const call of result.toolCalls) {
|
|
99
|
+
let args: Record<string, unknown> = {};
|
|
100
|
+
try { args = JSON.parse(call.function.arguments); } catch { /* empty args */ }
|
|
101
|
+
|
|
102
|
+
let output: string;
|
|
103
|
+
const inProcess = await handleInProcessTool(
|
|
104
|
+
env.db, call.function.name, args,
|
|
105
|
+
env.githubToken, env.githubRepo, env.braveApiKey,
|
|
106
|
+
undefined, undefined, env.memoryBinding,
|
|
107
|
+
{ resendApiKey: env.resendApiKey, resendApiKeyPersonal: env.resendApiKeyPersonal },
|
|
108
|
+
);
|
|
109
|
+
|
|
110
|
+
if (inProcess !== null) {
|
|
111
|
+
output = inProcess;
|
|
112
|
+
} else {
|
|
113
|
+
const resolved = resolveMcpTool(call.function.name, mcpClient, registry);
|
|
114
|
+
if (resolved) {
|
|
115
|
+
output = await callMcpWithRetry(resolved.client, resolved.mcpName, args);
|
|
116
|
+
} else {
|
|
117
|
+
output = `Unknown tool: ${call.function.name}`;
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
toolResults.push({ id: call.id, output });
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
// Attach tool results to the assistant message; cloudflare provider expands
|
|
124
|
+
// toolResults into separate role:'tool' messages when serializing the next request
|
|
125
|
+
messages.push({
|
|
126
|
+
role: 'assistant',
|
|
127
|
+
content: result.message,
|
|
128
|
+
toolCalls: result.toolCalls,
|
|
129
|
+
toolResults,
|
|
130
|
+
});
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
// Phase 2: condense tool history and generate a text-only summary.
|
|
134
|
+
// Condensed messages carry no toolCalls/toolResults, so the factory's
|
|
135
|
+
// usesTools check is false and no tool definitions are sent — preserving
|
|
136
|
+
// the GPT-OSS "no tools in Phase 2" invariant.
|
|
137
|
+
const condensed: LLMMessage[] = [messages[0]]; // system prompt
|
|
138
|
+
const toolFindings: string[] = [];
|
|
139
|
+
let lastAssistantText = '';
|
|
140
|
+
|
|
141
|
+
for (let i = 1; i < messages.length; i++) {
|
|
142
|
+
const msg = messages[i];
|
|
143
|
+
if (msg.role === 'user') {
|
|
144
|
+
condensed.push({ role: 'user', content: msg.content });
|
|
145
|
+
} else if (msg.role === 'assistant') {
|
|
146
|
+
if (msg.content?.trim().length) lastAssistantText = msg.content;
|
|
147
|
+
if (msg.content) toolFindings.push(msg.content);
|
|
148
|
+
if (msg.toolResults) {
|
|
149
|
+
for (const tr of msg.toolResults) {
|
|
150
|
+
const truncated = tr.output.length > 2000
|
|
151
|
+
? tr.output.slice(0, 2000) + '... [truncated]'
|
|
152
|
+
: tr.output;
|
|
153
|
+
toolFindings.push(truncated);
|
|
154
|
+
}
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
}
|
|
158
|
+
|
|
159
|
+
if (toolFindings.length > 0) {
|
|
160
|
+
const BUDGET = 30_000;
|
|
161
|
+
let accumulated = '';
|
|
162
|
+
for (const finding of toolFindings) {
|
|
163
|
+
if (accumulated.length + finding.length > BUDGET) {
|
|
164
|
+
accumulated += '\n[... additional findings truncated for summary]';
|
|
165
|
+
break;
|
|
166
|
+
}
|
|
167
|
+
accumulated += '\n' + finding;
|
|
168
|
+
}
|
|
169
|
+
condensed.push({ role: 'assistant', content: `Here is what I gathered:\n${accumulated.trim()}` });
|
|
170
|
+
}
|
|
171
|
+
condensed.push({ role: 'user', content: 'Based on everything you have gathered from the tools above, provide your complete answer now. Summarize your findings clearly and concisely.' });
|
|
172
|
+
|
|
173
|
+
let summaryText: string | undefined;
|
|
174
|
+
try {
|
|
175
|
+
const summaryResult = await factory.generateResponse({
|
|
176
|
+
messages: condensed,
|
|
177
|
+
model: env.gptOssModel,
|
|
178
|
+
maxTokens: 4096,
|
|
179
|
+
temperature: 0.2,
|
|
180
|
+
topP: 0.9,
|
|
181
|
+
frequencyPenalty: 0.3,
|
|
182
|
+
});
|
|
183
|
+
totalCost += summaryResult.usage.cost;
|
|
184
|
+
summaryText = summaryResult.message || undefined;
|
|
185
|
+
if (!summaryText) {
|
|
186
|
+
console.warn('[executeGptOss] Summary phase returned no text.');
|
|
187
|
+
}
|
|
188
|
+
} catch (err) {
|
|
189
|
+
console.error('[executeGptOss] Summary phase failed:', err instanceof Error ? err.message : String(err));
|
|
190
|
+
}
|
|
191
|
+
|
|
192
|
+
if (!summaryText && lastAssistantText.length > 20) {
|
|
193
|
+
summaryText = lastAssistantText;
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
return { text: summaryText ?? '(could not generate summary)', cost: totalCost };
|
|
197
|
+
}
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
import { createLLMProviderFactory, type LLMProviderFactory } from '@stackbilt/llm-providers';
|
|
2
|
+
import type { EdgeEnv } from './dispatch.js';
|
|
3
|
+
|
|
4
|
+
// ─── Fallback ownership ──────────────────────────────────────
|
|
5
|
+
// EXECUTOR_ROUTES (executor-router.ts) owns the fallback policy, not this factory.
|
|
6
|
+
// Factory-level fallbackRules are left empty to prevent double-firing:
|
|
7
|
+
// - Router fallback re-dispatches with a different *semantic executor* (different
|
|
8
|
+
// model, cost ceiling, telemetry tag) and must surface actualExecutor to the
|
|
9
|
+
// procedure store (see executeWithAnthropicFailover in executors/index.ts:67).
|
|
10
|
+
// - A factory-level fallback would silently swap providers inside a single call,
|
|
11
|
+
// bypassing actualExecutor tracking and producing wrong telemetry.
|
|
12
|
+
// Circuit breaker and retries operate below the executor boundary and do not
|
|
13
|
+
// interfere with executor-level fallback routing — they are kept enabled.
|
|
14
|
+
|
|
15
|
+
export function buildLLMProviderFactory(env: EdgeEnv): LLMProviderFactory {
|
|
16
|
+
return createLLMProviderFactory({
|
|
17
|
+
anthropic: {
|
|
18
|
+
apiKey: env.anthropicApiKey,
|
|
19
|
+
baseUrl: env.anthropicBaseUrl,
|
|
20
|
+
},
|
|
21
|
+
// Cloudflare Workers AI: wired when the AI binding is present.
|
|
22
|
+
// The factory uses the `ai` binding directly for Workers AI inference;
|
|
23
|
+
// no accountId is required for service-binding usage.
|
|
24
|
+
cloudflare: env.ai ? { ai: env.ai } : undefined,
|
|
25
|
+
groq: {
|
|
26
|
+
apiKey: env.groqApiKey,
|
|
27
|
+
baseUrl: env.groqBaseUrl || undefined,
|
|
28
|
+
},
|
|
29
|
+
// Cerebras: no EdgeEnv fields yet (cerebrasApiKey, cerebrasModel).
|
|
30
|
+
// Add here when executors/cerebras.ts and the corresponding EdgeEnv fields land.
|
|
31
|
+
|
|
32
|
+
fallbackRules: [],
|
|
33
|
+
enableCircuitBreaker: true,
|
|
34
|
+
enableRetries: true,
|
|
35
|
+
});
|
|
36
|
+
}
|
|
@@ -3,9 +3,16 @@
|
|
|
3
3
|
import { Hono } from 'hono';
|
|
4
4
|
import type { Env } from '../types.js';
|
|
5
5
|
import { getAllProceduresWithDerivedStats, getActiveAgendaItems } from '../kernel/memory/index.js';
|
|
6
|
+
import { detectEntropy } from '../kernel/scheduled/entropy.js';
|
|
7
|
+
import { buildEdgeEnv } from '../edge-env.js';
|
|
6
8
|
|
|
7
9
|
const observability = new Hono<{ Bindings: Env }>();
|
|
8
10
|
|
|
11
|
+
function boundedDays(value: string | undefined, fallback: number, max: number): number {
|
|
12
|
+
const days = parseInt(value ?? String(fallback), 10);
|
|
13
|
+
return Number.isNaN(days) || days < 1 || days > max ? fallback : days;
|
|
14
|
+
}
|
|
15
|
+
|
|
9
16
|
// ─── Shadow Write Stats ─────────────────────────────────────
|
|
10
17
|
|
|
11
18
|
observability.get('/api/shadow-stats', async (c) => {
|
|
@@ -46,6 +53,124 @@ observability.get('/api/shadow-read-stats', async (c) => {
|
|
|
46
53
|
return c.json({ days, summary, by_site: bySite.results, recent: recent.results });
|
|
47
54
|
});
|
|
48
55
|
|
|
56
|
+
// ─── Entropy ────────────────────────────────────────────────
|
|
57
|
+
|
|
58
|
+
observability.get('/api/entropy', async (c) => {
|
|
59
|
+
// detectEntropy needs the full EdgeEnv (API keys, model config) — not just c.env.DB
|
|
60
|
+
const env = buildEdgeEnv(c.env);
|
|
61
|
+
const report = await detectEntropy(env);
|
|
62
|
+
return c.json(report);
|
|
63
|
+
});
|
|
64
|
+
|
|
65
|
+
// ─── Shadow Read Drift ──────────────────────────────────────
|
|
66
|
+
|
|
67
|
+
observability.get('/api/shadow-read-drift', async (c) => {
|
|
68
|
+
const days = boundedDays(c.req.query('days'), 7, 30);
|
|
69
|
+
const reader = c.req.query('reader');
|
|
70
|
+
|
|
71
|
+
const latestWhere = reader
|
|
72
|
+
? "WHERE reader = ? AND sampled_at > datetime('now', '-' || ? || ' days')"
|
|
73
|
+
: "WHERE sampled_at > datetime('now', '-' || ? || ' days')";
|
|
74
|
+
// latestBindings: reader-first to match latestWhere (WHERE reader = ? AND sampled_at...)
|
|
75
|
+
// windowBindings: days-first to match the WHERE sampled_at... AND reader = ? pattern used in distribution/topDrifters
|
|
76
|
+
const latestBindings = reader ? [reader, days] : [days];
|
|
77
|
+
const windowBindings = reader ? [days, reader] : [days];
|
|
78
|
+
|
|
79
|
+
const [distribution, readiness, topDrifters] = await Promise.all([
|
|
80
|
+
c.env.DB.prepare(`
|
|
81
|
+
WITH ranked AS (
|
|
82
|
+
SELECT reader,
|
|
83
|
+
ABS((cached_count - pre_tier_count) - derived_count) AS count_abs_drift,
|
|
84
|
+
ABS(cached_avg_latency_ms - derived_avg_latency_ms) AS latency_abs_drift,
|
|
85
|
+
ABS(cached_avg_cost - derived_avg_cost) AS cost_abs_drift,
|
|
86
|
+
ROW_NUMBER() OVER (PARTITION BY reader ORDER BY ABS((cached_count - pre_tier_count) - derived_count)) AS count_rank,
|
|
87
|
+
ROW_NUMBER() OVER (PARTITION BY reader ORDER BY ABS(cached_avg_latency_ms - derived_avg_latency_ms)) AS latency_rank,
|
|
88
|
+
ROW_NUMBER() OVER (PARTITION BY reader ORDER BY ABS(cached_avg_cost - derived_avg_cost)) AS cost_rank,
|
|
89
|
+
COUNT(*) OVER (PARTITION BY reader) AS n
|
|
90
|
+
FROM shadow_read_drift
|
|
91
|
+
WHERE sampled_at > datetime('now', '-' || ? || ' days')
|
|
92
|
+
${reader ? 'AND reader = ?' : ''}
|
|
93
|
+
)
|
|
94
|
+
SELECT reader,
|
|
95
|
+
MAX(n) AS samples,
|
|
96
|
+
ROUND(AVG(count_abs_drift), 2) AS avg_abs_count_drift,
|
|
97
|
+
ROUND(MAX(count_abs_drift), 2) AS max_abs_count_drift,
|
|
98
|
+
ROUND(MAX(CASE WHEN count_rank = MAX(1, (n + 1) / 2) THEN count_abs_drift END), 2) AS p50_count_drift,
|
|
99
|
+
ROUND(MAX(CASE WHEN count_rank = MAX(1, (n * 19 + 19) / 20) THEN count_abs_drift END), 2) AS p95_count_drift,
|
|
100
|
+
ROUND(MAX(CASE WHEN count_rank = MAX(1, (n * 99 + 99) / 100) THEN count_abs_drift END), 2) AS p99_count_drift,
|
|
101
|
+
ROUND(AVG(latency_abs_drift), 2) AS avg_latency_drift_ms,
|
|
102
|
+
ROUND(MAX(latency_abs_drift), 2) AS max_latency_drift_ms,
|
|
103
|
+
ROUND(MAX(CASE WHEN latency_rank = MAX(1, (n + 1) / 2) THEN latency_abs_drift END), 2) AS p50_latency_drift_ms,
|
|
104
|
+
ROUND(MAX(CASE WHEN latency_rank = MAX(1, (n * 19 + 19) / 20) THEN latency_abs_drift END), 2) AS p95_latency_drift_ms,
|
|
105
|
+
ROUND(MAX(CASE WHEN latency_rank = MAX(1, (n * 99 + 99) / 100) THEN latency_abs_drift END), 2) AS p99_latency_drift_ms,
|
|
106
|
+
ROUND(AVG(cost_abs_drift), 6) AS avg_cost_drift,
|
|
107
|
+
ROUND(MAX(cost_abs_drift), 6) AS max_cost_drift,
|
|
108
|
+
ROUND(MAX(CASE WHEN cost_rank = MAX(1, (n + 1) / 2) THEN cost_abs_drift END), 6) AS p50_cost_drift,
|
|
109
|
+
ROUND(MAX(CASE WHEN cost_rank = MAX(1, (n * 19 + 19) / 20) THEN cost_abs_drift END), 6) AS p95_cost_drift,
|
|
110
|
+
ROUND(MAX(CASE WHEN cost_rank = MAX(1, (n * 99 + 99) / 100) THEN cost_abs_drift END), 6) AS p99_cost_drift
|
|
111
|
+
FROM ranked
|
|
112
|
+
GROUP BY reader
|
|
113
|
+
`).bind(...windowBindings).all(),
|
|
114
|
+
|
|
115
|
+
c.env.DB.prepare(`
|
|
116
|
+
WITH latest AS (
|
|
117
|
+
SELECT reader, task_pattern, cached_count, cached_success_count,
|
|
118
|
+
cached_avg_latency_ms, cached_avg_cost,
|
|
119
|
+
derived_count, derived_success_count,
|
|
120
|
+
derived_avg_latency_ms, derived_avg_cost,
|
|
121
|
+
pre_tier_count,
|
|
122
|
+
ROW_NUMBER() OVER (PARTITION BY reader, task_pattern ORDER BY sampled_at DESC) as rn
|
|
123
|
+
FROM shadow_read_drift
|
|
124
|
+
${latestWhere}
|
|
125
|
+
)
|
|
126
|
+
SELECT
|
|
127
|
+
COUNT(*) as total_pairs,
|
|
128
|
+
COUNT(DISTINCT task_pattern) as distinct_procedures,
|
|
129
|
+
SUM(CASE WHEN pre_tier_count = 0 THEN 1 ELSE 0 END) as clean_pairs,
|
|
130
|
+
SUM(CASE
|
|
131
|
+
WHEN pre_tier_count = 0
|
|
132
|
+
AND cached_count = derived_count
|
|
133
|
+
AND cached_success_count = derived_success_count
|
|
134
|
+
AND ABS(cached_avg_latency_ms - derived_avg_latency_ms) < 10
|
|
135
|
+
AND ABS(cached_avg_cost - derived_avg_cost) < 0.0001
|
|
136
|
+
THEN 1 ELSE 0 END) as ready_pairs
|
|
137
|
+
FROM latest WHERE rn = 1
|
|
138
|
+
`).bind(...latestBindings).first(),
|
|
139
|
+
|
|
140
|
+
c.env.DB.prepare(`
|
|
141
|
+
WITH latest_per_pattern AS (
|
|
142
|
+
SELECT task_pattern, reader,
|
|
143
|
+
cached_count, derived_count, pre_tier_count,
|
|
144
|
+
cached_avg_latency_ms, derived_avg_latency_ms,
|
|
145
|
+
cached_avg_cost, derived_avg_cost,
|
|
146
|
+
sampled_at,
|
|
147
|
+
ROW_NUMBER() OVER (PARTITION BY task_pattern, reader ORDER BY sampled_at DESC) as rn
|
|
148
|
+
FROM shadow_read_drift
|
|
149
|
+
WHERE sampled_at > datetime('now', '-' || ? || ' days')
|
|
150
|
+
${reader ? 'AND reader = ?' : ''}
|
|
151
|
+
)
|
|
152
|
+
SELECT task_pattern, reader,
|
|
153
|
+
cached_count, derived_count, pre_tier_count,
|
|
154
|
+
ABS((cached_count - pre_tier_count) - derived_count) as count_drift,
|
|
155
|
+
ROUND(ABS(cached_avg_latency_ms - derived_avg_latency_ms), 1) as latency_drift,
|
|
156
|
+
ROUND(ABS(cached_avg_cost - derived_avg_cost), 6) as cost_drift,
|
|
157
|
+
sampled_at
|
|
158
|
+
FROM latest_per_pattern
|
|
159
|
+
WHERE rn = 1
|
|
160
|
+
ORDER BY ABS((cached_count - pre_tier_count) - derived_count) DESC
|
|
161
|
+
LIMIT 15
|
|
162
|
+
`).bind(...windowBindings).all(),
|
|
163
|
+
]);
|
|
164
|
+
|
|
165
|
+
return c.json({
|
|
166
|
+
days,
|
|
167
|
+
reader_filter: reader ?? null,
|
|
168
|
+
distribution: distribution.results,
|
|
169
|
+
readiness,
|
|
170
|
+
top_drifters: topDrifters.results,
|
|
171
|
+
});
|
|
172
|
+
});
|
|
173
|
+
|
|
49
174
|
// ─── Agenda ─────────────────────────────────────────────────
|
|
50
175
|
|
|
51
176
|
observability.get('/agenda', async (c) => {
|