@stackbilt/aegis-core 0.6.2 → 0.6.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@stackbilt/aegis-core",
3
- "version": "0.6.2",
3
+ "version": "0.6.4",
4
4
  "description": "Persistent AI agent framework for Cloudflare Workers. Multi-tier memory, autonomous goals, dreaming cycles, MCP native.",
5
5
  "license": "Apache-2.0",
6
6
  "publishConfig": {
@@ -33,6 +33,8 @@
33
33
  "./kernel/argus-correlation": "./src/kernel/argus-correlation.ts",
34
34
  "./kernel/port": "./src/kernel/port.ts",
35
35
  "./kernel/executor-port": "./src/kernel/executor-port.ts",
36
+ "./kernel/executor-router": "./src/kernel/executor-router.ts",
37
+ "./kernel/provider-factory": "./src/kernel/provider-factory.ts",
36
38
  "./kernel/executors": "./src/kernel/executors/index.ts",
37
39
  "./kernel/scheduled": "./src/kernel/scheduled/index.ts",
38
40
  "./kernel/scheduled/dreaming": "./src/kernel/scheduled/dreaming.ts",
@@ -84,7 +86,7 @@
84
86
  "@cloudflare/voice": "^0.1.3",
85
87
  "@cloudflare/workers-oauth-provider": "^0.2.4",
86
88
  "@stackbilt/contracts": "^0.2.1",
87
- "@stackbilt/llm-providers": "^1.6.0",
89
+ "@stackbilt/llm-providers": "^1.6.4",
88
90
  "agents": "^0.12.3",
89
91
  "hono": "^4.12.12",
90
92
  "zod": "^4.4.3"
package/src/groq.ts CHANGED
@@ -30,11 +30,16 @@ export async function askGroq(
30
30
  }
31
31
 
32
32
  const data = await response.json<{
33
- choices: { message: { content: string } }[];
33
+ choices: { message: { content: unknown } }[];
34
34
  usage?: { total_tokens: number };
35
35
  }>();
36
36
 
37
- return data.choices[0]?.message?.content ?? '';
37
+ const content = data.choices[0]?.message?.content;
38
+ if (typeof content === 'string') return content;
39
+ if (content == null) return '';
40
+ // Some Groq-routed models (notably gpt-oss tool-calling variants) return content
41
+ // as an array of content blocks. Coerce so downstream string operations don't crash.
42
+ return typeof content === 'object' ? JSON.stringify(content) : String(content);
38
43
  }
39
44
 
40
45
  // ─── Logprobs-enabled classification ─────────────────────────
@@ -8,17 +8,14 @@ import { executeComposite } from '../composite.js';
8
8
  import { buildGroqSystemPrompt } from '../operator/prompt-builder.js';
9
9
  import type { KernelIntent, DispatchResult, Executor } from './types.js';
10
10
  import {
11
- executeClaude,
12
- executeClaudeOpus,
13
- executeClaudeStream,
14
- executeGroq,
15
- executeWorkersAi,
16
11
  executeGptOss,
12
+ executeClaudeStream,
17
13
  executeDirect,
18
14
  executeCodeTask,
19
15
  executeWithAnthropicFailover,
20
16
  executeTarotScript,
21
17
  buildMcpRegistry,
18
+ EXECUTOR_FNS,
22
19
  } from './executors/index.js';
23
20
  // ─── Edge Environment ────────────────────────────────────────
24
21
 
@@ -57,6 +54,7 @@ export interface EdgeEnv {
57
54
  codebeastFetcher?: Fetcher;
58
55
  mindspringFetcher?: Fetcher;
59
56
  mindspringToken?: string;
57
+ mindspringIngestToken?: string;
60
58
  devtoApiKey?: string;
61
59
  gaCredentials?: string;
62
60
  blueskyHandle?: string;
@@ -366,15 +364,6 @@ async function probeAndExecute(
366
364
  case 'composite':
367
365
  result = await executeComposite(intent, env, buildMcpRegistry(env));
368
366
  break;
369
- case 'gpt_oss':
370
- result = await executeGptOss(intent, env);
371
- break;
372
- case 'workers_ai':
373
- result = await executeWorkersAi(intent, env);
374
- break;
375
- case 'groq':
376
- result = await executeGroq(intent, env);
377
- break;
378
367
  case 'direct':
379
368
  result = await executeDirect(intent, env);
380
369
  break;
@@ -384,8 +373,11 @@ async function probeAndExecute(
384
373
  case 'tarotscript':
385
374
  result = await executeTarotScript(intent, env);
386
375
  break;
387
- default:
388
- throw new Error(`Unknown executor: ${plan.executor}`);
376
+ default: {
377
+ const fn = EXECUTOR_FNS[plan.executor as Executor];
378
+ if (!fn) throw new Error(`Unknown executor: ${plan.executor}`);
379
+ result = await fn(intent, env);
380
+ }
389
381
  }
390
382
 
391
383
  // For streaming non-Claude executors, emit full text as single delta
@@ -464,21 +456,10 @@ async function tryShadowExploration(
464
456
  try {
465
457
  // Clone intent to avoid mutation
466
458
  const shadowIntent: KernelIntent = { ...intent, classified: shadowExecutor };
467
- let result: { text: string; cost: number };
468
-
469
- switch (shadowExecutor) {
470
- case 'gpt_oss':
471
- result = await executeGptOss(shadowIntent, env);
472
- break;
473
- case 'workers_ai':
474
- result = await executeWorkersAi(shadowIntent, env);
475
- break;
476
- case 'claude':
477
- result = await executeClaude(shadowIntent, env);
478
- break;
479
- default:
480
- return;
481
- }
459
+
460
+ const fn = EXECUTOR_FNS[shadowExecutor];
461
+ if (!fn) return;
462
+ const result = await fn(shadowIntent, env);
482
463
 
483
464
  const passed = shadowQualityPass(primaryText, result.text);
484
465
  const outcome = passed ? 'success' : 'failure';
@@ -0,0 +1,95 @@
1
+ import type { EdgeEnv } from './dispatch.js';
2
+ import type { Executor } from './types.js';
3
+
4
+ // ─── Provider Names ──────────────────────────────────────────
5
+ // 'anthropic' and 'cloudflare' are wired in @stackbilt/llm-providers v1.6.0.
6
+ // 'groq' and 'cerebras' are forward-declared — no LLMProviderFactory entry yet.
7
+ // A future session can wire them when provider support lands.
8
+ export type LLMProviderName = 'anthropic' | 'cloudflare' | 'groq' | 'cerebras';
9
+
10
+ // ─── LLM Executor Subset ─────────────────────────────────────
11
+ // These are the executors that call an external LLM provider.
12
+ // Excluded from EXECUTOR_ROUTES (dispatch keeps its own branches):
13
+ // 'direct' — returns a rule-based response without an LLM call
14
+ // 'claude_code' — spins a Claude Code CLI session, not a provider call
15
+ // 'tarotscript' — service-binding fetcher, not an LLM call
16
+ // 'composite' — orchestrates multiple executors; no single provider entry
17
+ export type LLMExecutor = Extract<
18
+ Executor,
19
+ 'claude' | 'claude_opus' | 'gpt_oss' | 'workers_ai' | 'groq' | 'cerebras_mid' | 'cerebras_reasoning'
20
+ >;
21
+
22
+ // ─── Route Shape ─────────────────────────────────────────────
23
+
24
+ export interface ExecutorRoute {
25
+ provider: LLMProviderName;
26
+ // Resolves the concrete model string at dispatch time — called with the live
27
+ // EdgeEnv so per-deployment env-var overrides and AI Gateway config are respected.
28
+ model: (env: EdgeEnv) => string;
29
+ // Semantic fallback executor to try when this provider errors (credit, rate-limit, auth).
30
+ // CONSUMER CONTRACT: when a fallback fires, the consumer must propagate actualExecutor
31
+ // back to the telemetry layer. executeWithAnthropicFailover (executors/index.ts:67)
32
+ // returns { actualExecutor } which dispatch.ts:363 uses to mutate plan.executor before
33
+ // the procedure store records the outcome. A routing-layer consumer must preserve this.
34
+ fallback?: LLMExecutor;
35
+ }
36
+
37
+ // ─── Route Table ─────────────────────────────────────────────
38
+ // Covers every LLMExecutor. Non-LLM executors (see above) are intentionally absent.
39
+ //
40
+ // Future consumer sketch (D.2 wiring session):
41
+ // const route = EXECUTOR_ROUTES[plan.executor as LLMExecutor];
42
+ // const provider = factory.get(route.provider); // only 'anthropic'|'cloudflare' today
43
+ // const model = route.model(env);
44
+ // try { result = await provider.generateResponse({ model, messages }); }
45
+ // catch { if (route.fallback) { /* re-dispatch, record actualExecutor */ } }
46
+
47
+ export const EXECUTOR_ROUTES: Record<LLMExecutor, ExecutorRoute> = {
48
+ claude: {
49
+ provider: 'anthropic',
50
+ model: (env) => env.claudeModel,
51
+ fallback: 'gpt_oss',
52
+ },
53
+ claude_opus: {
54
+ provider: 'anthropic',
55
+ model: (env) => env.opusModel,
56
+ // Falls back directly to gpt_oss — mirrors executeWithAnthropicFailover behavior.
57
+ // A two-hop chain (opus → claude → gpt_oss) is a possible future refinement.
58
+ fallback: 'gpt_oss',
59
+ },
60
+ gpt_oss: {
61
+ provider: 'cloudflare',
62
+ model: (env) => env.gptOssModel,
63
+ // Terminal fallback — no further fallback defined.
64
+ },
65
+ workers_ai: {
66
+ provider: 'cloudflare',
67
+ // Hardcoded in executeWorkersAi today; no env override.
68
+ model: () => '@cf/meta/llama-3.3-70b-instruct-fp8-fast',
69
+ },
70
+ groq: {
71
+ provider: 'groq',
72
+ // groqResponseModel = 8B (llama-3.1-8b-instant) — fast/cheap for greetings.
73
+ // Intentionally NOT groqModel (70B). See executors/groq.ts:12.
74
+ model: (env) => env.groqResponseModel,
75
+ },
76
+ cerebras_mid: {
77
+ // TODO: EdgeEnv has no cerebras fields yet. Add cerebrasApiKey + cerebrasModel
78
+ // when executors/cerebras.ts lands. Model name below is a placeholder.
79
+ provider: 'cerebras',
80
+ model: () => 'llama3.1-8b',
81
+ },
82
+ cerebras_reasoning: {
83
+ // TODO: EdgeEnv has no cerebras fields yet. Add cerebrasApiKey + cerebrasReasoningModel
84
+ // when executors/cerebras.ts lands. Model name below is a placeholder.
85
+ provider: 'cerebras',
86
+ model: () => 'qwen-3-32b',
87
+ },
88
+ };
89
+
90
+ // ─── Lookup Helper ────────────────────────────────────────────
91
+ // Returns null for non-LLM executors (direct, claude_code, tarotscript, composite).
92
+ // Dispatch uses the null path to keep its own branches for those cases.
93
+ export function getExecutorRoute(executor: Executor): ExecutorRoute | null {
94
+ return (EXECUTOR_ROUTES as Record<string, ExecutorRoute>)[executor] ?? null;
95
+ }
@@ -1,4 +1,4 @@
1
- import { askGroq } from '../../groq.js';
1
+ import { buildLLMProviderFactory } from '../provider-factory.js';
2
2
  import { buildGroqSystemPrompt } from '../../operator/prompt-builder.js';
3
3
  import type { KernelIntent } from '../types.js';
4
4
  import type { EdgeEnv } from '../dispatch.js';
@@ -7,12 +7,13 @@ export async function executeGroq(
7
7
  intent: KernelIntent,
8
8
  env: EdgeEnv,
9
9
  ): Promise<{ text: string; cost: number }> {
10
- const text = await askGroq(
11
- env.groqApiKey,
12
- env.groqResponseModel, // 8B model for greetings — fast + cheap
13
- buildGroqSystemPrompt(),
14
- intent.raw,
15
- env.groqBaseUrl,
16
- );
17
- return { text, cost: 0.0001 };
10
+ const factory = buildLLMProviderFactory(env);
11
+ const result = await factory.generateResponse({
12
+ messages: [{ role: 'user', content: intent.raw }],
13
+ model: env.groqResponseModel, // 8B — fast/cheap for greetings
14
+ systemPrompt: buildGroqSystemPrompt(),
15
+ temperature: 0.3,
16
+ maxTokens: 500,
17
+ });
18
+ return { text: result.message || '(no response)', cost: result.usage.cost };
18
19
  }
@@ -2,14 +2,31 @@ import { McpClient, McpRegistry } from '../../mcp-client.js';
2
2
  import { operatorConfig } from '../../operator/index.js';
3
3
  import type { Executor } from '../types.js';
4
4
  import type { EdgeEnv } from '../dispatch.js';
5
- import { executeGptOss } from './workers-ai.js';
5
+ import type { KernelIntent } from '../types.js';
6
6
 
7
- // Re-export all executors
8
- export { executeClaude, executeClaudeOpus, executeClaudeStream } from './claude.js';
9
- export { executeGroq } from './groq.js';
10
- export { executeWorkersAi, executeGptOss } from './workers-ai.js';
11
- export { executeDirect, executeCodeTask } from './direct.js';
12
- export { executeTarotScript } from './tarotscript.js';
7
+ // Import then re-export so EXECUTOR_FNS can hold live references
8
+ import { executeClaude, executeClaudeOpus, executeClaudeStream } from './claude.js';
9
+ import { executeGroq } from './groq.js';
10
+ import { executeWorkersAi, executeGptOss } from './workers-ai.js';
11
+ import { executeDirect, executeCodeTask } from './direct.js';
12
+ import { executeTarotScript } from './tarotscript.js';
13
+ export { executeClaude, executeClaudeOpus, executeClaudeStream };
14
+ export { executeGroq };
15
+ export { executeWorkersAi, executeGptOss };
16
+ export { executeDirect, executeCodeTask };
17
+ export { executeTarotScript };
18
+
19
+ // ─── Uniform Executor Dispatch Map ──────────────────────────
20
+ // Executors that share the (intent, env) → {text, cost} signature.
21
+ // Used by dispatch to drive simple cases from the route table,
22
+ // eliminating per-executor switch branches for groq/workers_ai/gpt_oss.
23
+ // claude is included for the shadow exploration path (no failover there).
24
+ export const EXECUTOR_FNS: Partial<Record<Executor, (intent: KernelIntent, env: EdgeEnv) => Promise<{ text: string; cost: number }>>> = {
25
+ groq: executeGroq,
26
+ workers_ai: executeWorkersAi,
27
+ gpt_oss: executeGptOss,
28
+ claude: executeClaude,
29
+ };
13
30
 
14
31
  // ─── MCP Registry ────────────────────────────────────────────
15
32
 
@@ -1,54 +1,197 @@
1
- import { executeWorkersAiChat } from '../../workers-ai-chat.js';
2
- import { McpClient } from '../../mcp-client.js';
3
- import { operatorConfig } from '../../operator/index.js';
4
- import { buildGroqSystemPrompt } from '../../operator/prompt-builder.js';
5
- import type { KernelIntent } from '../types.js';
6
- import type { EdgeEnv } from '../dispatch.js';
7
- import { buildMcpRegistry } from './index.js';
8
-
9
- export async function executeWorkersAi(
10
- intent: KernelIntent,
11
- env: EdgeEnv,
12
- ): Promise<{ text: string; cost: number }> {
13
- if (!env.ai) throw new Error('Workers AI binding not available');
14
- const result = await env.ai.run('@cf/meta/llama-3.3-70b-instruct-fp8-fast', {
15
- messages: [
16
- { role: 'system', content: buildGroqSystemPrompt() },
17
- { role: 'user', content: intent.raw },
18
- ],
19
- }) as { response?: string };
20
- return { text: result.response ?? '(no response)', cost: 0.005 };
21
- }
22
-
23
- export async function executeGptOss(
24
- intent: KernelIntent,
25
- env: EdgeEnv,
26
- ): Promise<{ text: string; cost: number }> {
27
- if (!env.ai) throw new Error('Workers AI binding not available');
28
- const registry = buildMcpRegistry(env);
29
- const mcpClient = new McpClient({
30
- url: operatorConfig.integrations.bizops.fallbackUrl,
31
- token: env.bizopsToken,
32
- prefix: 'bizops',
33
- fetcher: env.bizopsFetcher,
34
- rpcPath: '/rpc',
35
- });
36
-
37
- return executeWorkersAiChat(
38
- {
39
- ai: env.ai,
40
- model: env.gptOssModel,
41
- mcpClient,
42
- mcpRegistry: registry,
43
- db: env.db,
44
- channel: 'web',
45
- conversationId: intent.source.threadId,
46
- githubToken: env.githubToken,
47
- githubRepo: env.githubRepo,
48
- braveApiKey: env.braveApiKey,
49
- memoryBinding: env.memoryBinding,
50
- resendApiKeys: { resendApiKey: env.resendApiKey, resendApiKeyPersonal: env.resendApiKeyPersonal },
51
- },
52
- intent.raw,
53
- );
54
- }
1
+ import type { LLMMessage, ToolResult as LLMToolResult } from '@stackbilt/llm-providers';
2
+ import { McpClient } from '../../mcp-client.js';
3
+ import { operatorConfig } from '../../operator/index.js';
4
+ import { buildGroqSystemPrompt } from '../../operator/prompt-builder.js';
5
+ import { buildContext, handleInProcessTool, callMcpWithRetry, resolveMcpTool } from '../../claude.js';
6
+ import { toOpenAiTools } from '../../workers-ai-chat.js';
7
+ import { getConversationHistory, budgetConversationHistory } from '../memory/index.js';
8
+ import { buildLLMProviderFactory } from '../provider-factory.js';
9
+ import type { KernelIntent } from '../types.js';
10
+ import type { EdgeEnv } from '../dispatch.js';
11
+ import { buildMcpRegistry } from './index.js';
12
+
13
+ export async function executeWorkersAi(
14
+ intent: KernelIntent,
15
+ env: EdgeEnv,
16
+ ): Promise<{ text: string; cost: number }> {
17
+ if (!env.ai) throw new Error('Workers AI binding not available');
18
+ const factory = buildLLMProviderFactory(env);
19
+ const result = await factory.generateResponse({
20
+ messages: [{ role: 'user', content: intent.raw }],
21
+ model: '@cf/meta/llama-3.3-70b-instruct-fp8-fast',
22
+ systemPrompt: buildGroqSystemPrompt(),
23
+ });
24
+ return { text: result.message || '(no response)', cost: result.usage.cost };
25
+ }
26
+
27
+ const GPT_OSS_TOOL_ROUNDS = 8; // 10 max 2 reserved for summary
28
+
29
+ export async function executeGptOss(
30
+ intent: KernelIntent,
31
+ env: EdgeEnv,
32
+ ): Promise<{ text: string; cost: number }> {
33
+ if (!env.ai) throw new Error('Workers AI binding not available');
34
+
35
+ const factory = buildLLMProviderFactory(env);
36
+ const registry = buildMcpRegistry(env);
37
+ const mcpClient = new McpClient({
38
+ url: operatorConfig.integrations.bizops.fallbackUrl,
39
+ token: env.bizopsToken,
40
+ prefix: 'bizops',
41
+ fetcher: env.bizopsFetcher,
42
+ rpcPath: '/rpc',
43
+ });
44
+
45
+ const pseudoConfig = {
46
+ apiKey: '',
47
+ model: env.gptOssModel,
48
+ mcpClient,
49
+ mcpRegistry: registry,
50
+ db: env.db,
51
+ channel: 'web',
52
+ conversationId: intent.source.threadId,
53
+ githubToken: env.githubToken,
54
+ githubRepo: env.githubRepo,
55
+ braveApiKey: env.braveApiKey,
56
+ userQuery: intent.raw,
57
+ };
58
+ const { systemPrompt, tools: anthropicTools } = await buildContext(pseudoConfig);
59
+ // toOpenAiTools output matches factory Tool shape exactly
60
+ const tools = toOpenAiTools(anthropicTools) as Parameters<typeof factory.generateResponse>[0]['tools'];
61
+
62
+ const history = intent.source.threadId
63
+ ? await getConversationHistory(env.db, intent.source.threadId, 10)
64
+ : [];
65
+ const priorHistory = history.length > 0 && history[history.length - 1]?.role === 'user'
66
+ ? history.slice(0, -1)
67
+ : history;
68
+
69
+ const messages: LLMMessage[] = [
70
+ { role: 'system', content: systemPrompt },
71
+ ...budgetConversationHistory(priorHistory).map(m => ({
72
+ role: m.role as 'user' | 'assistant',
73
+ content: m.content,
74
+ })),
75
+ { role: 'user', content: intent.raw },
76
+ ];
77
+
78
+ let totalCost = 0;
79
+
80
+ // Phase 1: tool-calling rounds
81
+ for (let round = 0; round < GPT_OSS_TOOL_ROUNDS; round++) {
82
+ const result = await factory.generateResponse({
83
+ messages,
84
+ model: env.gptOssModel,
85
+ tools,
86
+ maxTokens: 4096,
87
+ temperature: 0.2,
88
+ topP: 0.9,
89
+ frequencyPenalty: 0.3,
90
+ });
91
+ totalCost += result.usage.cost;
92
+
93
+ if (!result.toolCalls || result.toolCalls.length === 0) {
94
+ return { text: result.message || '(no response)', cost: totalCost };
95
+ }
96
+
97
+ const toolResults: LLMToolResult[] = [];
98
+ for (const call of result.toolCalls) {
99
+ let args: Record<string, unknown> = {};
100
+ try { args = JSON.parse(call.function.arguments); } catch { /* empty args */ }
101
+
102
+ let output: string;
103
+ const inProcess = await handleInProcessTool(
104
+ env.db, call.function.name, args,
105
+ env.githubToken, env.githubRepo, env.braveApiKey,
106
+ undefined, undefined, env.memoryBinding,
107
+ { resendApiKey: env.resendApiKey, resendApiKeyPersonal: env.resendApiKeyPersonal },
108
+ );
109
+
110
+ if (inProcess !== null) {
111
+ output = inProcess;
112
+ } else {
113
+ const resolved = resolveMcpTool(call.function.name, mcpClient, registry);
114
+ if (resolved) {
115
+ output = await callMcpWithRetry(resolved.client, resolved.mcpName, args);
116
+ } else {
117
+ output = `Unknown tool: ${call.function.name}`;
118
+ }
119
+ }
120
+ toolResults.push({ id: call.id, output });
121
+ }
122
+
123
+ // Attach tool results to the assistant message; cloudflare provider expands
124
+ // toolResults into separate role:'tool' messages when serializing the next request
125
+ messages.push({
126
+ role: 'assistant',
127
+ content: result.message,
128
+ toolCalls: result.toolCalls,
129
+ toolResults,
130
+ });
131
+ }
132
+
133
+ // Phase 2: condense tool history and generate a text-only summary.
134
+ // Condensed messages carry no toolCalls/toolResults, so the factory's
135
+ // usesTools check is false and no tool definitions are sent — preserving
136
+ // the GPT-OSS "no tools in Phase 2" invariant.
137
+ const condensed: LLMMessage[] = [messages[0]]; // system prompt
138
+ const toolFindings: string[] = [];
139
+ let lastAssistantText = '';
140
+
141
+ for (let i = 1; i < messages.length; i++) {
142
+ const msg = messages[i];
143
+ if (msg.role === 'user') {
144
+ condensed.push({ role: 'user', content: msg.content });
145
+ } else if (msg.role === 'assistant') {
146
+ if (msg.content?.trim().length) lastAssistantText = msg.content;
147
+ if (msg.content) toolFindings.push(msg.content);
148
+ if (msg.toolResults) {
149
+ for (const tr of msg.toolResults) {
150
+ const truncated = tr.output.length > 2000
151
+ ? tr.output.slice(0, 2000) + '... [truncated]'
152
+ : tr.output;
153
+ toolFindings.push(truncated);
154
+ }
155
+ }
156
+ }
157
+ }
158
+
159
+ if (toolFindings.length > 0) {
160
+ const BUDGET = 30_000;
161
+ let accumulated = '';
162
+ for (const finding of toolFindings) {
163
+ if (accumulated.length + finding.length > BUDGET) {
164
+ accumulated += '\n[... additional findings truncated for summary]';
165
+ break;
166
+ }
167
+ accumulated += '\n' + finding;
168
+ }
169
+ condensed.push({ role: 'assistant', content: `Here is what I gathered:\n${accumulated.trim()}` });
170
+ }
171
+ condensed.push({ role: 'user', content: 'Based on everything you have gathered from the tools above, provide your complete answer now. Summarize your findings clearly and concisely.' });
172
+
173
+ let summaryText: string | undefined;
174
+ try {
175
+ const summaryResult = await factory.generateResponse({
176
+ messages: condensed,
177
+ model: env.gptOssModel,
178
+ maxTokens: 4096,
179
+ temperature: 0.2,
180
+ topP: 0.9,
181
+ frequencyPenalty: 0.3,
182
+ });
183
+ totalCost += summaryResult.usage.cost;
184
+ summaryText = summaryResult.message || undefined;
185
+ if (!summaryText) {
186
+ console.warn('[executeGptOss] Summary phase returned no text.');
187
+ }
188
+ } catch (err) {
189
+ console.error('[executeGptOss] Summary phase failed:', err instanceof Error ? err.message : String(err));
190
+ }
191
+
192
+ if (!summaryText && lastAssistantText.length > 20) {
193
+ summaryText = lastAssistantText;
194
+ }
195
+
196
+ return { text: summaryText ?? '(could not generate summary)', cost: totalCost };
197
+ }
@@ -43,6 +43,8 @@ interface MindSpringResult {
43
43
  title: string;
44
44
  text: string;
45
45
  score: number;
46
+ notebook_id?: string;
47
+ notebook_title?: string;
46
48
  }
47
49
 
48
50
  // ─── RRF (Reciprocal Rank Fusion) ────────────────────────────
@@ -143,8 +145,12 @@ export async function recallForQuery(
143
145
  : query;
144
146
 
145
147
  const msResponse = await env.mindspringFetcher.fetch(
146
- `https://mindspring/api/search?q=${encodeURIComponent(msQuery)}&limit=5&threshold=0.4`,
147
- { headers: { 'Authorization': `Bearer ${env.mindspringToken}` } },
148
+ 'https://mindspring/api/v2/workspaces/aegis-daemon/search',
149
+ {
150
+ method: 'POST',
151
+ headers: { 'Authorization': `Bearer ${env.mindspringToken}`, 'Content-Type': 'application/json' },
152
+ body: JSON.stringify({ query: msQuery, limit: 5, threshold: 0.4 }),
153
+ },
148
154
  );
149
155
 
150
156
  if (msResponse.ok) {
@@ -0,0 +1,36 @@
1
+ import { createLLMProviderFactory, type LLMProviderFactory } from '@stackbilt/llm-providers';
2
+ import type { EdgeEnv } from './dispatch.js';
3
+
4
+ // ─── Fallback ownership ──────────────────────────────────────
5
+ // EXECUTOR_ROUTES (executor-router.ts) owns the fallback policy, not this factory.
6
+ // Factory-level fallbackRules are left empty to prevent double-firing:
7
+ // - Router fallback re-dispatches with a different *semantic executor* (different
8
+ // model, cost ceiling, telemetry tag) and must surface actualExecutor to the
9
+ // procedure store (see executeWithAnthropicFailover in executors/index.ts:67).
10
+ // - A factory-level fallback would silently swap providers inside a single call,
11
+ // bypassing actualExecutor tracking and producing wrong telemetry.
12
+ // Circuit breaker and retries operate below the executor boundary and do not
13
+ // interfere with executor-level fallback routing — they are kept enabled.
14
+
15
+ export function buildLLMProviderFactory(env: EdgeEnv): LLMProviderFactory {
16
+ return createLLMProviderFactory({
17
+ anthropic: {
18
+ apiKey: env.anthropicApiKey,
19
+ baseUrl: env.anthropicBaseUrl,
20
+ },
21
+ // Cloudflare Workers AI: wired when the AI binding is present.
22
+ // The factory uses the `ai` binding directly for Workers AI inference;
23
+ // no accountId is required for service-binding usage.
24
+ cloudflare: env.ai ? { ai: env.ai } : undefined,
25
+ groq: {
26
+ apiKey: env.groqApiKey,
27
+ baseUrl: env.groqBaseUrl || undefined,
28
+ },
29
+ // Cerebras: no EdgeEnv fields yet (cerebrasApiKey, cerebrasModel).
30
+ // Add here when executors/cerebras.ts and the corresponding EdgeEnv fields land.
31
+
32
+ fallbackRules: [],
33
+ enableCircuitBreaker: true,
34
+ enableRetries: true,
35
+ });
36
+ }
@@ -79,8 +79,14 @@ async function classifyWithWorkersAI(
79
79
  ],
80
80
  max_tokens: 200,
81
81
  temperature: 0.1,
82
- }) as { response?: string };
83
- return result.response ?? '';
82
+ }) as { response?: unknown };
83
+ const raw = result.response;
84
+ if (typeof raw === 'string') return raw;
85
+ if (raw == null) return '';
86
+ // Workers AI sometimes returns structured responses (objects with tool_calls,
87
+ // arrays of segments, etc.). Coerce to string so downstream .trim()/JSON.parse
88
+ // callers don't crash on non-string payloads.
89
+ return typeof raw === 'object' ? JSON.stringify(raw) : String(raw);
84
90
  }
85
91
 
86
92
 
@@ -21,7 +21,15 @@ import type { CorrelationResult, IncidentCluster, ArgusDiagnosis } from '../argu
21
21
  // ─── Configuration ───────────────────────────────────────────
22
22
 
23
23
  const RUN_CADENCE_HOURS = 3;
24
- const COOLDOWN_MS = 12 * 60 * 60 * 1000; // 12h cooldown per pattern alert
24
+ const COOLDOWN_MS = 12 * 60 * 60 * 1000; // 12h default cooldown per pattern alert
25
+
26
+ // Per-pattern cooldown overrides. Drought patterns are expected to persist in
27
+ // pre-revenue or low-activity states — a longer cooldown prevents daily noise
28
+ // from a condition that isn't going to self-resolve on a 12h cycle.
29
+ const PATTERN_COOLDOWN_MS: Record<string, number> = {
30
+ drought_stripe: 72 * 60 * 60 * 1000, // 72h — expected in pre-revenue
31
+ drought_github: 48 * 60 * 60 * 1000, // 48h
32
+ };
25
33
 
26
34
  // Pattern thresholds
27
35
  const CI_FAILURE_WINDOW_HOURS = 6;
@@ -179,7 +187,8 @@ async function isOnCooldown(db: D1Database, pattern: string): Promise<boolean> {
179
187
  ).bind(key).first<{ received_at: string }>();
180
188
 
181
189
  if (!last) return false;
182
- return (Date.now() - new Date(last.received_at + 'Z').getTime()) < COOLDOWN_MS;
190
+ const cooldown = PATTERN_COOLDOWN_MS[pattern] ?? COOLDOWN_MS;
191
+ return (Date.now() - new Date(last.received_at + 'Z').getTime()) < cooldown;
183
192
  }
184
193
 
185
194
  async function recordCooldown(db: D1Database, pattern: string): Promise<void> {
@@ -1,11 +1,13 @@
1
1
  // Per-conversation fact extraction (#324)
2
2
  // Complements the dreaming cycle (daily, batch) with near-real-time
3
3
  // fact capture from operator chat sessions. Runs every 2 hours,
4
- // processes conversations updated since last run. Uses Workers AI (free).
4
+ // processes conversations updated since last run. Uses Groq (free) with
5
+ // Workers AI llama-3.1-8b (free tier) as fallback.
5
6
 
6
7
  import { type EdgeEnv } from '../dispatch.js';
7
8
  import { recordMemory as recordMemoryAdapter } from '../memory-adapter.js';
8
9
  import { askGroq } from '../../groq.js';
10
+ import { pushFactsToMindSpring, type FactEntry } from './mindspring-notebook.js';
9
11
 
10
12
  const WATERMARK_KEY = 'conversation_facts_watermark';
11
13
  const MAX_CONVERSATIONS = 5;
@@ -85,16 +87,25 @@ async function askAi(
85
87
  system: string,
86
88
  user: string,
87
89
  ): Promise<string> {
90
+ // Groq first — free, same 70B quality, no neuron consumption
91
+ if (env.groqApiKey) {
92
+ try {
93
+ return await askGroq(env.groqApiKey, env.groqResponseModel, system, user, env.groqBaseUrl);
94
+ } catch {
95
+ // fall through to Workers AI
96
+ }
97
+ }
98
+ // Workers AI fallback — llama-3.1-8b is on the genuine free tier
88
99
  if (env.ai) {
89
100
  const result = await env.ai.run(
90
- '@cf/meta/llama-3.3-70b-instruct-fp8-fast' as Parameters<Ai['run']>[0],
101
+ '@cf/meta/llama-3.1-8b-instruct' as Parameters<Ai['run']>[0],
91
102
  { messages: [{ role: 'system', content: system }, { role: 'user', content: user }] },
92
103
  );
93
104
  if (typeof result === 'string') return result;
94
105
  const obj = result as { response?: string; choices?: Array<{ message?: { content?: string } }> };
95
106
  return obj.choices?.[0]?.message?.content ?? obj.response ?? '';
96
107
  }
97
- return askGroq(env.groqApiKey, env.groqResponseModel, system, user, env.groqBaseUrl);
108
+ throw new Error('[conv-facts] No LLM provider available (groqApiKey and env.ai both missing)');
98
109
  }
99
110
 
100
111
  export async function runConversationFactExtraction(env: EdgeEnv): Promise<void> {
@@ -128,6 +139,7 @@ export async function runConversationFactExtraction(env: EdgeEnv): Promise<void>
128
139
  }
129
140
 
130
141
  let totalFacts = 0;
142
+ const allFacts: FactEntry[] = [];
131
143
 
132
144
  for (const conv of conversations.results) {
133
145
  const messages = await env.db.prepare(`
@@ -185,6 +197,7 @@ export async function runConversationFactExtraction(env: EdgeEnv): Promise<void>
185
197
  fact.confidence ?? 0.8,
186
198
  'conversation_extraction',
187
199
  );
200
+ allFacts.push({ topic: topicLower, fact: fact.fact, confidence: fact.confidence ?? 0.8 });
188
201
  totalFacts++;
189
202
  console.log(`[conv-facts] Extracted: [${topicLower}] ${fact.fact.slice(0, 80)}`);
190
203
  } catch (err) {
@@ -195,6 +208,12 @@ export async function runConversationFactExtraction(env: EdgeEnv): Promise<void>
195
208
 
196
209
  await advanceWatermark(env.db);
197
210
  console.log(`[conv-facts] Processed ${conversations.results.length} conversations, extracted ${totalFacts} facts`);
211
+
212
+ // Push to MindSpring topic notebooks (non-blocking, never throws)
213
+ if (allFacts.length > 0) {
214
+ const runTag = `conv-facts-${Date.now().toString(36)}`;
215
+ await pushFactsToMindSpring(allFacts, runTag, env);
216
+ }
198
217
  }
199
218
 
200
219
  async function advanceWatermark(db: D1Database): Promise<void> {
@@ -8,11 +8,12 @@ import { type HeartbeatCheck } from './heartbeat.js';
8
8
  export interface CuriosityCandidate {
9
9
  topic: string;
10
10
  reason: string;
11
- source: 'memory_gap' | 'low_confidence' | 'failure_rate' | 'heartbeat_warn' | 'goal_failure' | 'self_interest';
11
+ source: 'memory_gap' | 'low_confidence' | 'failure_rate' | 'heartbeat_warn' | 'goal_failure' | 'self_interest' | 'conversation_gap';
12
12
  }
13
13
 
14
14
  export async function gatherCuriosityTopics(env: EdgeEnv): Promise<CuriosityCandidate[]> {
15
15
  const candidates: CuriosityCandidate[] = [];
16
+ const thinTopicSeeds: string[] = [];
16
17
 
17
18
  // Source 1: Memory gaps — topics with few entries relative to others
18
19
  if (env.memoryBinding) {
@@ -20,6 +21,7 @@ export async function gatherCuriosityTopics(env: EdgeEnv): Promise<CuriosityCand
20
21
  const stats = await env.memoryBinding.stats('aegis');
21
22
  const thinTopics = stats.topics.filter(t => t.count <= 2).slice(0, 5);
22
23
  for (const t of thinTopics) {
24
+ thinTopicSeeds.push(t.topic);
23
25
  candidates.push({
24
26
  topic: `What more should I know about "${t.topic}"?`,
25
27
  reason: `Only ${t.count} memory entries — thin coverage`,
@@ -154,6 +156,38 @@ export async function gatherCuriosityTopics(env: EdgeEnv): Promise<CuriosityCand
154
156
  }
155
157
  }
156
158
 
159
+ // Source 8: MindSpring conversation gap — topics MW barely knows but that appear in
160
+ // conversation history signal a consolidation pipeline failure, not just a knowledge gap.
161
+ const { mindspringFetcher, mindspringToken } = env;
162
+ if (mindspringFetcher && mindspringToken && thinTopicSeeds.length > 0) {
163
+ try {
164
+ const queryResults = await Promise.allSettled(
165
+ thinTopicSeeds.slice(0, 3).map(async (seed) => {
166
+ const res = await mindspringFetcher.fetch('https://mindspring/api/v2/workspaces/aegis-daemon/search', {
167
+ method: 'POST',
168
+ signal: AbortSignal.timeout(1500),
169
+ headers: { Authorization: `Bearer ${mindspringToken}`, 'Content-Type': 'application/json' },
170
+ body: JSON.stringify({ query: seed, limit: 5, threshold: 0.5 }),
171
+ });
172
+ if (!res.ok) return { seed, count: 0 };
173
+ const data = await res.json<{ results: Array<{ title: string; score: number }> }>();
174
+ return { seed, count: (data.results ?? []).length };
175
+ })
176
+ );
177
+ for (const r of queryResults) {
178
+ if (r.status === 'fulfilled' && r.value.count > 0) {
179
+ candidates.push({
180
+ topic: `"${r.value.seed}" appears in conversation history but has thin memory coverage`,
181
+ reason: `${r.value.count} MindSpring matches vs ≤2 memory entries — consolidation gap`,
182
+ source: 'conversation_gap',
183
+ });
184
+ }
185
+ }
186
+ } catch (err) {
187
+ console.warn('[curiosity] MindSpring gap scan failed:', err instanceof Error ? err.message : String(err));
188
+ }
189
+ }
190
+
157
191
  return candidates;
158
192
  }
159
193
 
@@ -1,4 +1,4 @@
1
- // Shared LLM helper — Workers AI with Groq fallback (zero-cost primary, paid fallback)
1
+ // Shared LLM helper — Groq first (free, 70B quality), Workers AI 70B fallback
2
2
 
3
3
  import type { EdgeEnv } from '../../dispatch.js';
4
4
  import { askGroq } from '../../../groq.js';
@@ -9,18 +9,24 @@ export async function askWorkersAiOrGroq(
9
9
  user: string,
10
10
  useResponseModel = false,
11
11
  ): Promise<string> {
12
+ const groqModel = useResponseModel ? env.groqResponseModel : env.groqModel;
13
+ // Groq first — free tier, same 70B quality, eliminates neuron consumption
14
+ if (env.groqApiKey) {
15
+ try {
16
+ return await askGroq(env.groqApiKey, groqModel, system, user, env.groqBaseUrl);
17
+ } catch {
18
+ // fall through to Workers AI
19
+ }
20
+ }
21
+ // Workers AI fallback — only fires if Groq is unavailable or throws
12
22
  if (env.ai) {
13
- const model = useResponseModel
14
- ? '@cf/meta/llama-3.3-70b-instruct-fp8-fast'
15
- : (env.gptOssModel || '@cf/meta/llama-3.3-70b-instruct-fp8-fast');
16
23
  const result = await env.ai.run(
17
- model as Parameters<Ai['run']>[0],
24
+ '@cf/meta/llama-3.3-70b-instruct-fp8-fast' as Parameters<Ai['run']>[0],
18
25
  { messages: [{ role: 'system', content: system }, { role: 'user', content: user }] },
19
26
  ) as { response?: string; choices?: Array<{ message?: { content?: string } }> };
20
27
  return result.choices?.[0]?.message?.content ?? result.response ?? '';
21
28
  }
22
- const groqModel = useResponseModel ? env.groqResponseModel : env.groqModel;
23
- return askGroq(env.groqApiKey, groqModel, system, user, env.groqBaseUrl);
29
+ throw new Error('[dreaming] No LLM provider available (groqApiKey and env.ai both missing)');
24
30
  }
25
31
 
26
32
  export function parseJsonResponse<T>(raw: string): T | null {
@@ -0,0 +1,132 @@
1
+ // MindSpring v2 write pipeline — push extracted facts to topic notebooks
2
+
3
+ import type { EdgeEnv } from '../dispatch.js';
4
+
5
+ const WORKSPACE_ID = 'aegis-daemon';
6
+ const MS_BASE = 'https://mindspring';
7
+
8
+ interface MsNotebook { id: string; title: string }
9
+ interface UploadAccepted { uploadId: string; status: string }
10
+
11
+ export interface FactEntry {
12
+ topic: string;
13
+ fact: string;
14
+ confidence: number;
15
+ }
16
+
17
+ function msHeaders(token: string, extra?: Record<string, string>): Headers {
18
+ const h = new Headers(extra);
19
+ h.set('Authorization', `Bearer ${token}`);
20
+ return h;
21
+ }
22
+
23
+ async function findOrCreateNotebook(topic: string, env: EdgeEnv): Promise<string> {
24
+ const token = env.mindspringIngestToken!;
25
+ const fetcher = env.mindspringFetcher!;
26
+
27
+ const listResp = await fetcher.fetch(
28
+ `${MS_BASE}/api/v2/workspaces/${WORKSPACE_ID}/notebooks`,
29
+ { headers: msHeaders(token) },
30
+ );
31
+ if (listResp.ok) {
32
+ const data = await listResp.json<{ notebooks: MsNotebook[] }>();
33
+ const existing = data.notebooks?.find((nb) => nb.title === topic);
34
+ if (existing) return existing.id;
35
+ }
36
+
37
+ const createResp = await fetcher.fetch(
38
+ `${MS_BASE}/api/v2/workspaces/${WORKSPACE_ID}/notebooks`,
39
+ {
40
+ method: 'POST',
41
+ headers: msHeaders(token, { 'Content-Type': 'application/json' }),
42
+ body: JSON.stringify({ title: topic, type: 'research' }),
43
+ },
44
+ );
45
+ if (!createResp.ok) {
46
+ const msg = await createResp.text().catch(() => '');
47
+ throw new Error(`create notebook failed: ${createResp.status} ${msg.slice(0, 120)}`);
48
+ }
49
+ const nb = await createResp.json<MsNotebook>();
50
+ return nb.id;
51
+ }
52
+
53
+ async function uploadContent(content: string, filename: string, env: EdgeEnv): Promise<string> {
54
+ const token = env.mindspringIngestToken!;
55
+ const fetcher = env.mindspringFetcher!;
56
+
57
+ const resp = await fetcher.fetch(`${MS_BASE}/api/uploads/simple`, {
58
+ method: 'POST',
59
+ headers: msHeaders(token, {
60
+ 'Content-Type': 'text/plain',
61
+ 'X-File-Name': filename,
62
+ }),
63
+ body: content,
64
+ });
65
+ if (!resp.ok) {
66
+ const msg = await resp.text().catch(() => '');
67
+ throw new Error(`upload failed: ${resp.status} ${msg.slice(0, 120)}`);
68
+ }
69
+ const { uploadId } = await resp.json<UploadAccepted>();
70
+ return uploadId;
71
+ }
72
+
73
+ async function registerSource(notebookId: string, title: string, uploadId: string, env: EdgeEnv): Promise<void> {
74
+ const token = env.mindspringIngestToken!;
75
+ const fetcher = env.mindspringFetcher!;
76
+
77
+ const resp = await fetcher.fetch(
78
+ `${MS_BASE}/api/v2/workspaces/${WORKSPACE_ID}/notebooks/${notebookId}/sources`,
79
+ {
80
+ method: 'POST',
81
+ headers: msHeaders(token, { 'Content-Type': 'application/json' }),
82
+ body: JSON.stringify({ title, type: 'txt', sourceUploadId: uploadId, parserType: 'txt' }),
83
+ },
84
+ );
85
+ if (!resp.ok && resp.status !== 202) {
86
+ const msg = await resp.text().catch(() => '');
87
+ throw new Error(`register source failed: ${resp.status} ${msg.slice(0, 120)}`);
88
+ }
89
+ }
90
+
91
+ /**
92
+ * Push extracted facts to MindSpring v2 topic notebooks.
93
+ * Groups facts by topic, creates notebooks as needed.
94
+ * Never throws — all errors are logged as warnings.
95
+ */
96
+ export async function pushFactsToMindSpring(
97
+ facts: FactEntry[],
98
+ sourceTag: string,
99
+ env: EdgeEnv,
100
+ ): Promise<void> {
101
+ if (!env.mindspringFetcher || !env.mindspringIngestToken || facts.length === 0) return;
102
+
103
+ // Group by topic
104
+ const byTopic = new Map<string, string[]>();
105
+ for (const { topic, fact } of facts) {
106
+ const arr = byTopic.get(topic) ?? [];
107
+ arr.push(fact);
108
+ byTopic.set(topic, arr);
109
+ }
110
+
111
+ const date = new Date().toISOString().slice(0, 10);
112
+
113
+ for (const [topic, topicFacts] of byTopic.entries()) {
114
+ try {
115
+ const content = [
116
+ `Topic: ${topic}`,
117
+ `Source: ${sourceTag}`,
118
+ `Date: ${date}`,
119
+ '',
120
+ ...topicFacts.map((f) => `- ${f}`),
121
+ ].join('\n');
122
+
123
+ const notebookId = await findOrCreateNotebook(topic, env);
124
+ const uploadId = await uploadContent(content, `${topic}-facts.txt`, env);
125
+ await registerSource(notebookId, `facts-${date}-${sourceTag.slice(0, 12)}`, uploadId, env);
126
+
127
+ console.log(`[mindspring-nb] pushed ${topicFacts.length} fact(s) → notebook '${topic}' (${notebookId.slice(0, 8)})`);
128
+ } catch (err) {
129
+ console.warn(`[mindspring-nb] topic '${topic}' push failed:`, err instanceof Error ? err.message : String(err));
130
+ }
131
+ }
132
+ }
@@ -3,9 +3,16 @@
3
3
  import { Hono } from 'hono';
4
4
  import type { Env } from '../types.js';
5
5
  import { getAllProceduresWithDerivedStats, getActiveAgendaItems } from '../kernel/memory/index.js';
6
+ import { detectEntropy } from '../kernel/scheduled/entropy.js';
7
+ import { buildEdgeEnv } from '../edge-env.js';
6
8
 
7
9
  const observability = new Hono<{ Bindings: Env }>();
8
10
 
11
+ function boundedDays(value: string | undefined, fallback: number, max: number): number {
12
+ const days = parseInt(value ?? String(fallback), 10);
13
+ return Number.isNaN(days) || days < 1 || days > max ? fallback : days;
14
+ }
15
+
9
16
  // ─── Shadow Write Stats ─────────────────────────────────────
10
17
 
11
18
  observability.get('/api/shadow-stats', async (c) => {
@@ -46,6 +53,124 @@ observability.get('/api/shadow-read-stats', async (c) => {
46
53
  return c.json({ days, summary, by_site: bySite.results, recent: recent.results });
47
54
  });
48
55
 
56
+ // ─── Entropy ────────────────────────────────────────────────
57
+
58
+ observability.get('/api/entropy', async (c) => {
59
+ // detectEntropy needs the full EdgeEnv (API keys, model config) — not just c.env.DB
60
+ const env = buildEdgeEnv(c.env);
61
+ const report = await detectEntropy(env);
62
+ return c.json(report);
63
+ });
64
+
65
+ // ─── Shadow Read Drift ──────────────────────────────────────
66
+
67
+ observability.get('/api/shadow-read-drift', async (c) => {
68
+ const days = boundedDays(c.req.query('days'), 7, 30);
69
+ const reader = c.req.query('reader');
70
+
71
+ const latestWhere = reader
72
+ ? "WHERE reader = ? AND sampled_at > datetime('now', '-' || ? || ' days')"
73
+ : "WHERE sampled_at > datetime('now', '-' || ? || ' days')";
74
+ // latestBindings: reader-first to match latestWhere (WHERE reader = ? AND sampled_at...)
75
+ // windowBindings: days-first to match the WHERE sampled_at... AND reader = ? pattern used in distribution/topDrifters
76
+ const latestBindings = reader ? [reader, days] : [days];
77
+ const windowBindings = reader ? [days, reader] : [days];
78
+
79
+ const [distribution, readiness, topDrifters] = await Promise.all([
80
+ c.env.DB.prepare(`
81
+ WITH ranked AS (
82
+ SELECT reader,
83
+ ABS((cached_count - pre_tier_count) - derived_count) AS count_abs_drift,
84
+ ABS(cached_avg_latency_ms - derived_avg_latency_ms) AS latency_abs_drift,
85
+ ABS(cached_avg_cost - derived_avg_cost) AS cost_abs_drift,
86
+ ROW_NUMBER() OVER (PARTITION BY reader ORDER BY ABS((cached_count - pre_tier_count) - derived_count)) AS count_rank,
87
+ ROW_NUMBER() OVER (PARTITION BY reader ORDER BY ABS(cached_avg_latency_ms - derived_avg_latency_ms)) AS latency_rank,
88
+ ROW_NUMBER() OVER (PARTITION BY reader ORDER BY ABS(cached_avg_cost - derived_avg_cost)) AS cost_rank,
89
+ COUNT(*) OVER (PARTITION BY reader) AS n
90
+ FROM shadow_read_drift
91
+ WHERE sampled_at > datetime('now', '-' || ? || ' days')
92
+ ${reader ? 'AND reader = ?' : ''}
93
+ )
94
+ SELECT reader,
95
+ MAX(n) AS samples,
96
+ ROUND(AVG(count_abs_drift), 2) AS avg_abs_count_drift,
97
+ ROUND(MAX(count_abs_drift), 2) AS max_abs_count_drift,
98
+ ROUND(MAX(CASE WHEN count_rank = MAX(1, (n + 1) / 2) THEN count_abs_drift END), 2) AS p50_count_drift,
99
+ ROUND(MAX(CASE WHEN count_rank = MAX(1, (n * 19 + 19) / 20) THEN count_abs_drift END), 2) AS p95_count_drift,
100
+ ROUND(MAX(CASE WHEN count_rank = MAX(1, (n * 99 + 99) / 100) THEN count_abs_drift END), 2) AS p99_count_drift,
101
+ ROUND(AVG(latency_abs_drift), 2) AS avg_latency_drift_ms,
102
+ ROUND(MAX(latency_abs_drift), 2) AS max_latency_drift_ms,
103
+ ROUND(MAX(CASE WHEN latency_rank = MAX(1, (n + 1) / 2) THEN latency_abs_drift END), 2) AS p50_latency_drift_ms,
104
+ ROUND(MAX(CASE WHEN latency_rank = MAX(1, (n * 19 + 19) / 20) THEN latency_abs_drift END), 2) AS p95_latency_drift_ms,
105
+ ROUND(MAX(CASE WHEN latency_rank = MAX(1, (n * 99 + 99) / 100) THEN latency_abs_drift END), 2) AS p99_latency_drift_ms,
106
+ ROUND(AVG(cost_abs_drift), 6) AS avg_cost_drift,
107
+ ROUND(MAX(cost_abs_drift), 6) AS max_cost_drift,
108
+ ROUND(MAX(CASE WHEN cost_rank = MAX(1, (n + 1) / 2) THEN cost_abs_drift END), 6) AS p50_cost_drift,
109
+ ROUND(MAX(CASE WHEN cost_rank = MAX(1, (n * 19 + 19) / 20) THEN cost_abs_drift END), 6) AS p95_cost_drift,
110
+ ROUND(MAX(CASE WHEN cost_rank = MAX(1, (n * 99 + 99) / 100) THEN cost_abs_drift END), 6) AS p99_cost_drift
111
+ FROM ranked
112
+ GROUP BY reader
113
+ `).bind(...windowBindings).all(),
114
+
115
+ c.env.DB.prepare(`
116
+ WITH latest AS (
117
+ SELECT reader, task_pattern, cached_count, cached_success_count,
118
+ cached_avg_latency_ms, cached_avg_cost,
119
+ derived_count, derived_success_count,
120
+ derived_avg_latency_ms, derived_avg_cost,
121
+ pre_tier_count,
122
+ ROW_NUMBER() OVER (PARTITION BY reader, task_pattern ORDER BY sampled_at DESC) as rn
123
+ FROM shadow_read_drift
124
+ ${latestWhere}
125
+ )
126
+ SELECT
127
+ COUNT(*) as total_pairs,
128
+ COUNT(DISTINCT task_pattern) as distinct_procedures,
129
+ SUM(CASE WHEN pre_tier_count = 0 THEN 1 ELSE 0 END) as clean_pairs,
130
+ SUM(CASE
131
+ WHEN pre_tier_count = 0
132
+ AND cached_count = derived_count
133
+ AND cached_success_count = derived_success_count
134
+ AND ABS(cached_avg_latency_ms - derived_avg_latency_ms) < 10
135
+ AND ABS(cached_avg_cost - derived_avg_cost) < 0.0001
136
+ THEN 1 ELSE 0 END) as ready_pairs
137
+ FROM latest WHERE rn = 1
138
+ `).bind(...latestBindings).first(),
139
+
140
+ c.env.DB.prepare(`
141
+ WITH latest_per_pattern AS (
142
+ SELECT task_pattern, reader,
143
+ cached_count, derived_count, pre_tier_count,
144
+ cached_avg_latency_ms, derived_avg_latency_ms,
145
+ cached_avg_cost, derived_avg_cost,
146
+ sampled_at,
147
+ ROW_NUMBER() OVER (PARTITION BY task_pattern, reader ORDER BY sampled_at DESC) as rn
148
+ FROM shadow_read_drift
149
+ WHERE sampled_at > datetime('now', '-' || ? || ' days')
150
+ ${reader ? 'AND reader = ?' : ''}
151
+ )
152
+ SELECT task_pattern, reader,
153
+ cached_count, derived_count, pre_tier_count,
154
+ ABS((cached_count - pre_tier_count) - derived_count) as count_drift,
155
+ ROUND(ABS(cached_avg_latency_ms - derived_avg_latency_ms), 1) as latency_drift,
156
+ ROUND(ABS(cached_avg_cost - derived_avg_cost), 6) as cost_drift,
157
+ sampled_at
158
+ FROM latest_per_pattern
159
+ WHERE rn = 1
160
+ ORDER BY ABS((cached_count - pre_tier_count) - derived_count) DESC
161
+ LIMIT 15
162
+ `).bind(...windowBindings).all(),
163
+ ]);
164
+
165
+ return c.json({
166
+ days,
167
+ reader_filter: reader ?? null,
168
+ distribution: distribution.results,
169
+ readiness,
170
+ top_drifters: topDrifters.results,
171
+ });
172
+ });
173
+
49
174
  // ─── Agenda ─────────────────────────────────────────────────
50
175
 
51
176
  observability.get('/agenda', async (c) => {