@stackbilt/aegis-core 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +96 -0
- package/schema.sql +586 -0
- package/src/adapters/voice/cloudflare-agent.ts +34 -0
- package/src/auth.ts +124 -0
- package/src/bluesky.ts +464 -0
- package/src/claude-tools/content.ts +188 -0
- package/src/claude-tools/email.ts +69 -0
- package/src/claude-tools/github.ts +440 -0
- package/src/claude-tools/goals.ts +116 -0
- package/src/claude-tools/index.ts +353 -0
- package/src/claude-tools/web.ts +59 -0
- package/src/claude.ts +406 -0
- package/src/codebeast.ts +200 -0
- package/src/composite.ts +715 -0
- package/src/content/column.ts +80 -0
- package/src/content/hero-image.ts +47 -0
- package/src/content/index.ts +27 -0
- package/src/content/journal.ts +91 -0
- package/src/content/roundtable.ts +163 -0
- package/src/core.ts +309 -0
- package/src/dashboard.ts +620 -0
- package/src/decision-docs.ts +284 -0
- package/src/dispatch.ts +13 -0
- package/src/edge-env.ts +58 -0
- package/src/email.ts +850 -0
- package/src/exports.ts +156 -0
- package/src/github-projects.ts +312 -0
- package/src/github.ts +670 -0
- package/src/groq.ts +247 -0
- package/src/health-page.ts +578 -0
- package/src/index.ts +89 -0
- package/src/kernel/argus-actions.ts +397 -0
- package/src/kernel/argus-correlation.ts +639 -0
- package/src/kernel/board.ts +91 -0
- package/src/kernel/briefing.ts +177 -0
- package/src/kernel/classify-memory-topic.ts +166 -0
- package/src/kernel/cognition.ts +377 -0
- package/src/kernel/court-cards.ts +163 -0
- package/src/kernel/dispatch.ts +587 -0
- package/src/kernel/domain.ts +50 -0
- package/src/kernel/dynamic-tools.ts +322 -0
- package/src/kernel/executor-port.ts +45 -0
- package/src/kernel/executors/claude.ts +73 -0
- package/src/kernel/executors/direct.ts +237 -0
- package/src/kernel/executors/groq.ts +18 -0
- package/src/kernel/executors/index.ts +87 -0
- package/src/kernel/executors/tarotscript.ts +104 -0
- package/src/kernel/executors/workers-ai.ts +54 -0
- package/src/kernel/insight-cache.ts +76 -0
- package/src/kernel/memory/agenda.ts +200 -0
- package/src/kernel/memory/blocks.ts +188 -0
- package/src/kernel/memory/consolidation.ts +194 -0
- package/src/kernel/memory/episodic.ts +241 -0
- package/src/kernel/memory/goals.ts +156 -0
- package/src/kernel/memory/graph.ts +290 -0
- package/src/kernel/memory/index.ts +11 -0
- package/src/kernel/memory/insights.ts +316 -0
- package/src/kernel/memory/procedural.ts +467 -0
- package/src/kernel/memory/pruning.ts +67 -0
- package/src/kernel/memory/recall.ts +367 -0
- package/src/kernel/memory/semantic.ts +315 -0
- package/src/kernel/memory/synthesis.ts +161 -0
- package/src/kernel/memory-adapter.ts +369 -0
- package/src/kernel/memory-guardrails.ts +76 -0
- package/src/kernel/port.ts +23 -0
- package/src/kernel/resilience.ts +322 -0
- package/src/kernel/router.ts +471 -0
- package/src/kernel/scheduled/agent-dispatch.ts +252 -0
- package/src/kernel/scheduled/argus-analytics.ts +247 -0
- package/src/kernel/scheduled/argus-heartbeat.ts +320 -0
- package/src/kernel/scheduled/argus-notify.ts +348 -0
- package/src/kernel/scheduled/board-sync.ts +110 -0
- package/src/kernel/scheduled/ci-watcher.ts +125 -0
- package/src/kernel/scheduled/cognitive-metrics.ts +377 -0
- package/src/kernel/scheduled/consolidation.ts +229 -0
- package/src/kernel/scheduled/content-drip.ts +47 -0
- package/src/kernel/scheduled/content.ts +6 -0
- package/src/kernel/scheduled/conversation-facts.ts +204 -0
- package/src/kernel/scheduled/cost-report.ts +84 -0
- package/src/kernel/scheduled/curiosity.ts +219 -0
- package/src/kernel/scheduled/dev-activity.ts +44 -0
- package/src/kernel/scheduled/digest.ts +317 -0
- package/src/kernel/scheduled/dreaming/agenda-triage.ts +115 -0
- package/src/kernel/scheduled/dreaming/facts.ts +239 -0
- package/src/kernel/scheduled/dreaming/index.ts +8 -0
- package/src/kernel/scheduled/dreaming/llm.ts +33 -0
- package/src/kernel/scheduled/dreaming/pattern-synthesis.ts +124 -0
- package/src/kernel/scheduled/dreaming/persona.ts +75 -0
- package/src/kernel/scheduled/dreaming/symbolic.ts +31 -0
- package/src/kernel/scheduled/dreaming/task-proposals.ts +80 -0
- package/src/kernel/scheduled/dreaming.ts +66 -0
- package/src/kernel/scheduled/entropy.ts +149 -0
- package/src/kernel/scheduled/escalation.ts +192 -0
- package/src/kernel/scheduled/feed-watcher.ts +206 -0
- package/src/kernel/scheduled/goals.ts +214 -0
- package/src/kernel/scheduled/governance.ts +41 -0
- package/src/kernel/scheduled/heartbeat.ts +220 -0
- package/src/kernel/scheduled/inbox-processor.ts +174 -0
- package/src/kernel/scheduled/index.ts +245 -0
- package/src/kernel/scheduled/issue-proposer.ts +478 -0
- package/src/kernel/scheduled/issue-watcher.ts +128 -0
- package/src/kernel/scheduled/pr-automerge.ts +213 -0
- package/src/kernel/scheduled/product-health.ts +107 -0
- package/src/kernel/scheduled/reflection.ts +373 -0
- package/src/kernel/scheduled/self-improvement.ts +114 -0
- package/src/kernel/scheduled/social-engage.ts +175 -0
- package/src/kernel/scheduled/task-audit.ts +60 -0
- package/src/kernel/symbolic.ts +156 -0
- package/src/kernel/types.ts +145 -0
- package/src/landing.ts +1190 -0
- package/src/lib/audit-chain/chain.ts +28 -0
- package/src/lib/audit-chain/types.ts +12 -0
- package/src/lib/observability/errors.ts +55 -0
- package/src/markdown.ts +164 -0
- package/src/mcp/handlers.ts +647 -0
- package/src/mcp/server.ts +184 -0
- package/src/mcp/tools.ts +316 -0
- package/src/mcp-client.ts +275 -0
- package/src/mcp-server.ts +2 -0
- package/src/operator/config.example.ts +60 -0
- package/src/operator/config.ts +60 -0
- package/src/operator/index.ts +46 -0
- package/src/operator/persona.example.ts +34 -0
- package/src/operator/persona.ts +34 -0
- package/src/operator/prompt-builder.ts +190 -0
- package/src/operator/types.ts +43 -0
- package/src/pulse.ts +1179 -0
- package/src/routes/bluesky.ts +116 -0
- package/src/routes/cc-tasks.ts +328 -0
- package/src/routes/codebeast.ts +1 -0
- package/src/routes/content.ts +194 -0
- package/src/routes/conversations.ts +25 -0
- package/src/routes/dynamic-tools.ts +111 -0
- package/src/routes/feedback.ts +192 -0
- package/src/routes/health.ts +147 -0
- package/src/routes/messages.ts +228 -0
- package/src/routes/observability.ts +82 -0
- package/src/routes/operator-logs.ts +42 -0
- package/src/routes/pages.ts +96 -0
- package/src/routes/sessions.ts +54 -0
- package/src/sanitize.ts +73 -0
- package/src/schema-enums.ts +155 -0
- package/src/search.ts +112 -0
- package/src/task-intelligence.ts +497 -0
- package/src/types.ts +194 -0
- package/src/ui.ts +5 -0
- package/src/version.ts +3 -0
- package/src/workers-ai-chat.ts +333 -0
package/src/composite.ts
ADDED
|
@@ -0,0 +1,715 @@
|
|
|
1
|
+
// Composite Executor — Asymmetric AI Microservices Pipeline
|
|
2
|
+
// Groq plans → CF Workers AI gathers (tools) → Groq analyzes (parallel) → Claude synthesizes
|
|
3
|
+
//
|
|
4
|
+
// Cost model:
|
|
5
|
+
// Groq GPT-OSS-120B: $0.15/$0.60 per MTok (orchestrate + analyze)
|
|
6
|
+
// CF Workers AI GPT-OSS-120B: $0.35/$0.75 per MTok (gather)
|
|
7
|
+
// Claude Sonnet: $3/$15 per MTok (synthesize)
|
|
8
|
+
// Expected total: $0.01-0.03 per composite query
|
|
9
|
+
|
|
10
|
+
import { askGroqJson } from './groq.js';
|
|
11
|
+
import { buildContext, handleInProcessTool, callMcpWithRetry, resolveMcpTool } from './claude.js';
|
|
12
|
+
import { toOpenAiTools, extractText, extractToolCalls, extractUsage, type AiChatResponse } from './workers-ai-chat.js';
|
|
13
|
+
import { McpClient, McpRegistry } from './mcp-client.js';
|
|
14
|
+
import { operatorConfig } from './operator/index.js';
|
|
15
|
+
import { buildPersonaPreamble } from './operator/prompt-builder.js';
|
|
16
|
+
import { getCognitiveState, formatCognitiveContext } from './kernel/cognition.js';
|
|
17
|
+
import { getAttachedBlocks, assembleBlockContext } from './kernel/memory/blocks.js';
|
|
18
|
+
import { getConversationHistory, budgetConversationHistory } from './kernel/memory/index.js';
|
|
19
|
+
import { classifyCourtCard, type CourtCard, type CourtCardProfile } from './kernel/court-cards.js';
|
|
20
|
+
import type { KernelIntent } from './kernel/types.js';
|
|
21
|
+
import type { EdgeEnv } from './kernel/dispatch.js';
|
|
22
|
+
|
|
23
|
+
// ─── Types ──────────────────────────────────────────────────
|
|
24
|
+
|
|
25
|
+
interface ExecutionDAG {
|
|
26
|
+
subtasks: Array<{
|
|
27
|
+
id: string;
|
|
28
|
+
description: string;
|
|
29
|
+
tools_needed: string[];
|
|
30
|
+
analysis_prompt: string;
|
|
31
|
+
}>;
|
|
32
|
+
synthesis_instruction: string;
|
|
33
|
+
model_override?: 'sonnet' | 'opus';
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
interface SubtaskResult {
|
|
37
|
+
id: string;
|
|
38
|
+
description: string;
|
|
39
|
+
gathered: string;
|
|
40
|
+
analysis: string;
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
// ─── Cost rates ─────────────────────────────────────────────
|
|
44
|
+
|
|
45
|
+
const GROQ_GPT_OSS_RATES = { input: 0.15, output: 0.60 };
|
|
46
|
+
const CF_GPT_OSS_RATES = { input: 0.35, output: 0.75 };
|
|
47
|
+
const CLAUDE_SONNET_RATES = { input: 3, output: 15 };
|
|
48
|
+
const CLAUDE_OPUS_RATES = { input: 15, output: 75 };
|
|
49
|
+
|
|
50
|
+
// ─── Phase 1: Orchestrate ───────────────────────────────────
|
|
51
|
+
|
|
52
|
+
const ORCHESTRATOR_SYSTEM = `You are a task decomposition engine. Given a user query and a list of available tools, decompose the query into subtasks that can be executed independently.
|
|
53
|
+
|
|
54
|
+
Return a JSON object with this exact schema:
|
|
55
|
+
{
|
|
56
|
+
"subtasks": [
|
|
57
|
+
{
|
|
58
|
+
"id": "subtask_1",
|
|
59
|
+
"description": "What this subtask should accomplish",
|
|
60
|
+
"tools_needed": ["tool_name_1", "tool_name_2"],
|
|
61
|
+
"analysis_prompt": "After gathering data, analyze: [specific question about the gathered data]"
|
|
62
|
+
}
|
|
63
|
+
],
|
|
64
|
+
"synthesis_instruction": "How to combine all subtask results into a final answer",
|
|
65
|
+
"model_override": null
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
Rules:
|
|
69
|
+
- Each subtask should gather ONE logical group of information
|
|
70
|
+
- Use 1-4 subtasks (prefer fewer, more focused subtasks)
|
|
71
|
+
- tools_needed must reference exact tool names from the available list
|
|
72
|
+
- If no tools are needed, use an empty tools_needed array — the subtask will be analysis-only
|
|
73
|
+
- analysis_prompt should ask a specific question about the gathered data
|
|
74
|
+
- CRITICAL: preserve ALL specific identifiers (UUIDs, IDs, enum values, exact names) from the user query verbatim in subtask descriptions. Never paraphrase identifiers.
|
|
75
|
+
- CRITICAL: your subtasks must address exactly what the user asked. Do NOT reinterpret, reframe, or expand the query into a different topic. If conversation context is provided, use it to understand the user's actual intent.
|
|
76
|
+
- synthesis_instruction should describe how to weave everything together
|
|
77
|
+
- Set model_override to "opus" only for queries requiring deep multi-step reasoning`;
|
|
78
|
+
|
|
79
|
+
function buildOrchestratorPrompt(
|
|
80
|
+
userQuery: string,
|
|
81
|
+
toolDescriptions: string,
|
|
82
|
+
conversationContext?: string,
|
|
83
|
+
courtCard?: CourtCardProfile,
|
|
84
|
+
): string {
|
|
85
|
+
const contextBlock = conversationContext
|
|
86
|
+
? `Conversation context (recent turns):\n${conversationContext}\n\n`
|
|
87
|
+
: '';
|
|
88
|
+
const courtCardBlock = courtCard
|
|
89
|
+
? `\nRouting orientation (${courtCard.label}): ${courtCard.orchestratorHint}\n\n`
|
|
90
|
+
: '';
|
|
91
|
+
return `Available tools:\n${toolDescriptions}\n\n${courtCardBlock}${contextBlock}User query: ${userQuery}`;
|
|
92
|
+
}
|
|
93
|
+
|
|
94
|
+
async function orchestrate(
|
|
95
|
+
intent: KernelIntent,
|
|
96
|
+
env: EdgeEnv,
|
|
97
|
+
toolDescriptions: string,
|
|
98
|
+
conversationContext?: string,
|
|
99
|
+
courtCard?: CourtCardProfile,
|
|
100
|
+
): Promise<{ dag: ExecutionDAG; cost: number }> {
|
|
101
|
+
const userPrompt = buildOrchestratorPrompt(intent.raw, toolDescriptions, conversationContext, courtCard);
|
|
102
|
+
const { parsed, usage } = await askGroqJson<ExecutionDAG>(
|
|
103
|
+
env.groqApiKey,
|
|
104
|
+
env.groqGptOssModel,
|
|
105
|
+
ORCHESTRATOR_SYSTEM,
|
|
106
|
+
userPrompt,
|
|
107
|
+
env.groqBaseUrl,
|
|
108
|
+
{ maxTokens: 1500, temperature: 0.2 },
|
|
109
|
+
);
|
|
110
|
+
|
|
111
|
+
let cost = 0;
|
|
112
|
+
if (usage) {
|
|
113
|
+
cost = (usage.prompt_tokens * GROQ_GPT_OSS_RATES.input
|
|
114
|
+
+ usage.completion_tokens * GROQ_GPT_OSS_RATES.output) / 1_000_000;
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
// Validate DAG structure
|
|
118
|
+
if (!parsed.subtasks || !Array.isArray(parsed.subtasks) || parsed.subtasks.length === 0) {
|
|
119
|
+
throw new Error('Orchestrator returned empty DAG');
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
return { dag: parsed, cost };
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
// ─── DAG Intent Validator ────────────────────────────────────
|
|
126
|
+
// Lightweight heuristic: extract significant words from the user query and
|
|
127
|
+
// check that the DAG's synthesis instruction + subtask descriptions share
|
|
128
|
+
// enough lexical overlap. If the orchestrator reframed the task into
|
|
129
|
+
// something unrelated, overlap will be low → fail closed to single-model.
|
|
130
|
+
|
|
131
|
+
function validateDagIntent(userQuery: string, dag: ExecutionDAG): boolean {
|
|
132
|
+
const extractWords = (s: string) =>
|
|
133
|
+
new Set(s.toLowerCase().replace(/[^a-z0-9\s]/g, '').split(/\s+/).filter(w => w.length > 3));
|
|
134
|
+
|
|
135
|
+
const queryWords = extractWords(userQuery);
|
|
136
|
+
if (queryWords.size === 0) return true; // trivial query, let it through
|
|
137
|
+
|
|
138
|
+
// Combine all DAG text: synthesis instruction + subtask descriptions
|
|
139
|
+
const dagText = [
|
|
140
|
+
dag.synthesis_instruction,
|
|
141
|
+
...dag.subtasks.map(s => s.description),
|
|
142
|
+
].join(' ');
|
|
143
|
+
const dagWords = extractWords(dagText);
|
|
144
|
+
|
|
145
|
+
// Count how many query words appear in the DAG
|
|
146
|
+
let overlap = 0;
|
|
147
|
+
for (const word of queryWords) {
|
|
148
|
+
if (dagWords.has(word)) overlap++;
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
const overlapRatio = overlap / queryWords.size;
|
|
152
|
+
// Require at least 25% of query words to appear in the DAG.
|
|
153
|
+
// This catches gross reinterpretation (job search → provider evaluation)
|
|
154
|
+
// while allowing legitimate decomposition that uses different phrasing.
|
|
155
|
+
if (overlapRatio < 0.25) {
|
|
156
|
+
console.warn(`[composite] DAG overlap: ${overlap}/${queryWords.size} (${(overlapRatio * 100).toFixed(0)}%) — below 25% threshold`);
|
|
157
|
+
return false;
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
return true;
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
// ─── Phase 2: Gather (CF Workers AI tool loop) ─────────────
|
|
164
|
+
|
|
165
|
+
type ChatMessage =
|
|
166
|
+
| { role: 'system'; content: string }
|
|
167
|
+
| { role: 'user'; content: string }
|
|
168
|
+
| { role: 'assistant'; content: string; tool_calls?: Array<{ id: string; type: string; function: { name: string; arguments: string } }> }
|
|
169
|
+
| { role: 'tool'; tool_call_id: string; content: string };
|
|
170
|
+
|
|
171
|
+
const MAX_GATHER_ROUNDS = 6;
|
|
172
|
+
|
|
173
|
+
// ─── Phase 3: Analyze (Groq parallel) ──────────────────────
|
|
174
|
+
|
|
175
|
+
async function analyzeSubtask(
|
|
176
|
+
subtask: ExecutionDAG['subtasks'][number],
|
|
177
|
+
gathered: string,
|
|
178
|
+
env: EdgeEnv,
|
|
179
|
+
courtCard?: CourtCardProfile,
|
|
180
|
+
): Promise<{ analysis: string; cost: number }> {
|
|
181
|
+
const lensDirective = courtCard
|
|
182
|
+
? ` ${courtCard.analysisLens}`
|
|
183
|
+
: '';
|
|
184
|
+
const { parsed, usage } = await askGroqJson<{ analysis: string }>(
|
|
185
|
+
env.groqApiKey,
|
|
186
|
+
env.groqGptOssModel,
|
|
187
|
+
`${buildPersonaPreamble()} Analyze the gathered data and answer the analysis prompt. Be direct and specific — reference actual products, numbers, and context.${lensDirective} Return JSON: { "analysis": "your analysis" }`,
|
|
188
|
+
`Analysis prompt: ${subtask.analysis_prompt}\n\nGathered data:\n${gathered}`,
|
|
189
|
+
env.groqBaseUrl,
|
|
190
|
+
{ maxTokens: 2000, temperature: 0.2, prefill: '{"analysis":"' },
|
|
191
|
+
);
|
|
192
|
+
|
|
193
|
+
let cost = 0;
|
|
194
|
+
if (usage) {
|
|
195
|
+
cost = (usage.prompt_tokens * GROQ_GPT_OSS_RATES.input
|
|
196
|
+
+ usage.completion_tokens * GROQ_GPT_OSS_RATES.output) / 1_000_000;
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
return { analysis: parsed.analysis ?? gathered, cost };
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
// ─── Phase 4: Synthesize (Claude) ───────────────────────────
|
|
203
|
+
|
|
204
|
+
async function synthesize(
|
|
205
|
+
intent: KernelIntent,
|
|
206
|
+
subtaskResults: SubtaskResult[],
|
|
207
|
+
synthesisInstruction: string,
|
|
208
|
+
env: EdgeEnv,
|
|
209
|
+
useOpus: boolean,
|
|
210
|
+
courtCard?: CourtCardProfile,
|
|
211
|
+
): Promise<{ text: string; cost: number }> {
|
|
212
|
+
const model = useOpus ? env.opusModel : env.claudeModel;
|
|
213
|
+
const rates = useOpus ? CLAUDE_OPUS_RATES : CLAUDE_SONNET_RATES;
|
|
214
|
+
|
|
215
|
+
const subtaskSummary = subtaskResults.map(r => {
|
|
216
|
+
// Include raw gathered data so synthesis can recover structured values the analysis step may have dropped
|
|
217
|
+
const gatheredSection = r.gathered && r.gathered !== r.analysis
|
|
218
|
+
? `\n**Raw data:**\n${r.gathered.slice(0, 3000)}`
|
|
219
|
+
: '';
|
|
220
|
+
return `### ${r.id}: ${r.description}\n${r.analysis}${gatheredSection}`;
|
|
221
|
+
}).join('\n\n');
|
|
222
|
+
|
|
223
|
+
// Inject block context so Claude can reference identity, products, narratives
|
|
224
|
+
let contextSuffix = '';
|
|
225
|
+
try {
|
|
226
|
+
const blocks = await getAttachedBlocks(env.db, 'composite');
|
|
227
|
+
if (blocks.length > 0) {
|
|
228
|
+
contextSuffix = '\n\n' + assembleBlockContext(blocks);
|
|
229
|
+
} else {
|
|
230
|
+
// Fallback: CognitiveState when blocks haven't been seeded yet
|
|
231
|
+
const cogState = await getCognitiveState(env.db);
|
|
232
|
+
if (cogState) contextSuffix = '\n' + formatCognitiveContext(cogState);
|
|
233
|
+
}
|
|
234
|
+
} catch { /* non-fatal — synthesize without cognitive context */ }
|
|
235
|
+
|
|
236
|
+
const response = await fetch(`${env.anthropicBaseUrl}/v1/messages`, {
|
|
237
|
+
method: 'POST',
|
|
238
|
+
headers: {
|
|
239
|
+
'Content-Type': 'application/json',
|
|
240
|
+
'x-api-key': env.anthropicApiKey,
|
|
241
|
+
'anthropic-version': '2023-06-01',
|
|
242
|
+
},
|
|
243
|
+
body: JSON.stringify({
|
|
244
|
+
model,
|
|
245
|
+
max_tokens: 4096,
|
|
246
|
+
system: `${buildPersonaPreamble()} Synthesize the analyzed subtask results into a coherent, actionable answer. Speak as AEGIS — the co-founder who knows the business inside-out. Be thorough but concise. Reference specific products, numbers, and context from the portfolio below. Never give generic consultant advice; give the answer a co-founder would give.${courtCard ? ` ${courtCard.synthesisVoice}` : ''}${contextSuffix}`,
|
|
247
|
+
messages: [{
|
|
248
|
+
role: 'user',
|
|
249
|
+
content: `Original query: ${intent.raw}\n\nSynthesis instruction: ${synthesisInstruction}\n\nSubtask results:\n${subtaskSummary}`,
|
|
250
|
+
}],
|
|
251
|
+
}),
|
|
252
|
+
});
|
|
253
|
+
|
|
254
|
+
if (!response.ok) {
|
|
255
|
+
const err = await response.text();
|
|
256
|
+
throw new Error(`Anthropic API error ${response.status}: ${err}`);
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
const data = await response.json<{
|
|
260
|
+
content: Array<{ type: string; text?: string }>;
|
|
261
|
+
usage: { input_tokens: number; output_tokens: number };
|
|
262
|
+
}>();
|
|
263
|
+
|
|
264
|
+
const text = data.content.filter(b => b.type === 'text').map(b => b.text ?? '').join('');
|
|
265
|
+
const cost = (data.usage.input_tokens * rates.input + data.usage.output_tokens * rates.output) / 1_000_000;
|
|
266
|
+
|
|
267
|
+
return { text: text || '(no synthesis)', cost };
|
|
268
|
+
}
|
|
269
|
+
|
|
270
|
+
// ─── Groq synthesis fallback ────────────────────────────────
|
|
271
|
+
|
|
272
|
+
async function synthesizeGroqFallback(
|
|
273
|
+
intent: KernelIntent,
|
|
274
|
+
subtaskResults: SubtaskResult[],
|
|
275
|
+
synthesisInstruction: string,
|
|
276
|
+
env: EdgeEnv,
|
|
277
|
+
courtCard?: CourtCardProfile,
|
|
278
|
+
): Promise<{ text: string; cost: number }> {
|
|
279
|
+
const subtaskSummary = subtaskResults.map(r => {
|
|
280
|
+
const gatheredSection = r.gathered && r.gathered !== r.analysis
|
|
281
|
+
? `\n**Raw data:**\n${r.gathered.slice(0, 3000)}`
|
|
282
|
+
: '';
|
|
283
|
+
return `### ${r.id}: ${r.description}\n${r.analysis}${gatheredSection}`;
|
|
284
|
+
}).join('\n\n');
|
|
285
|
+
|
|
286
|
+
// Inject block context for persona grounding in fallback path too
|
|
287
|
+
let contextSuffix = '';
|
|
288
|
+
try {
|
|
289
|
+
const blocks = await getAttachedBlocks(env.db, 'gpt_oss');
|
|
290
|
+
if (blocks.length > 0) {
|
|
291
|
+
contextSuffix = '\n\n' + assembleBlockContext(blocks);
|
|
292
|
+
} else {
|
|
293
|
+
const cogState = await getCognitiveState(env.db);
|
|
294
|
+
if (cogState) contextSuffix = '\n' + formatCognitiveContext(cogState);
|
|
295
|
+
}
|
|
296
|
+
} catch { /* non-fatal */ }
|
|
297
|
+
|
|
298
|
+
const { parsed, usage } = await askGroqJson<{ response: string }>(
|
|
299
|
+
env.groqApiKey,
|
|
300
|
+
env.groqGptOssModel,
|
|
301
|
+
`${buildPersonaPreamble()} Combine the analyzed subtask results into a coherent answer. Speak as AEGIS — the co-founder who knows the business. Reference specific products and context. Never give generic advice.${courtCard ? ` ${courtCard.synthesisVoice}` : ''}${contextSuffix} Return JSON: { "response": "your complete answer" }`,
|
|
302
|
+
`Original query: ${intent.raw}\n\nSynthesis instruction: ${synthesisInstruction}\n\nSubtask results:\n${subtaskSummary}`,
|
|
303
|
+
env.groqBaseUrl,
|
|
304
|
+
{ maxTokens: 4000, temperature: 0.3 },
|
|
305
|
+
);
|
|
306
|
+
|
|
307
|
+
let cost = 0;
|
|
308
|
+
if (usage) {
|
|
309
|
+
cost = (usage.prompt_tokens * GROQ_GPT_OSS_RATES.input
|
|
310
|
+
+ usage.completion_tokens * GROQ_GPT_OSS_RATES.output) / 1_000_000;
|
|
311
|
+
}
|
|
312
|
+
|
|
313
|
+
return { text: parsed.response ?? '(no synthesis)', cost };
|
|
314
|
+
}
|
|
315
|
+
|
|
316
|
+
// ─── Composite meta type ────────────────────────────────────
|
|
317
|
+
|
|
318
|
+
export interface CompositeMeta {
|
|
319
|
+
partialFailure: boolean;
|
|
320
|
+
failedSubtasks: number;
|
|
321
|
+
budgetExhausted: boolean;
|
|
322
|
+
subtasksPlanned: number;
|
|
323
|
+
subtasksExecuted: number;
|
|
324
|
+
courtCard?: CourtCard;
|
|
325
|
+
subrequests: {
|
|
326
|
+
gather: number;
|
|
327
|
+
analyze: number;
|
|
328
|
+
synthesize: number;
|
|
329
|
+
};
|
|
330
|
+
}
|
|
331
|
+
|
|
332
|
+
// ─── Main composite executor ────────────────────────────────
|
|
333
|
+
|
|
334
|
+
export async function executeComposite(
|
|
335
|
+
intent: KernelIntent,
|
|
336
|
+
env: EdgeEnv,
|
|
337
|
+
mcpRegistry?: McpRegistry,
|
|
338
|
+
maxCost = 0.50,
|
|
339
|
+
): Promise<{ text: string; cost: number; meta?: CompositeMeta }> {
|
|
340
|
+
let totalCost = 0;
|
|
341
|
+
|
|
342
|
+
// Subrequest counters per phase
|
|
343
|
+
// Each AI call = 1 subrequest; each tool call inside gather = 1 additional subrequest
|
|
344
|
+
const subrequests = { gather: 0, analyze: 0, synthesize: 0 };
|
|
345
|
+
|
|
346
|
+
// Build context to get tool list
|
|
347
|
+
const mcpClient = new McpClient({
|
|
348
|
+
url: operatorConfig.integrations.bizops.fallbackUrl,
|
|
349
|
+
token: env.bizopsToken,
|
|
350
|
+
prefix: 'bizops',
|
|
351
|
+
fetcher: env.bizopsFetcher,
|
|
352
|
+
rpcPath: '/rpc',
|
|
353
|
+
});
|
|
354
|
+
const { systemPrompt, tools } = await buildContext({
|
|
355
|
+
apiKey: '',
|
|
356
|
+
model: '',
|
|
357
|
+
mcpClient,
|
|
358
|
+
mcpRegistry,
|
|
359
|
+
db: env.db,
|
|
360
|
+
channel: 'web',
|
|
361
|
+
conversationId: intent.source.threadId,
|
|
362
|
+
githubToken: env.githubToken,
|
|
363
|
+
githubRepo: env.githubRepo,
|
|
364
|
+
braveApiKey: env.braveApiKey,
|
|
365
|
+
roundtableDb: env.roundtableDb,
|
|
366
|
+
userQuery: intent.raw,
|
|
367
|
+
}, env.roundtableDb);
|
|
368
|
+
|
|
369
|
+
// Load conversation history for context continuity
|
|
370
|
+
let conversationContext = '';
|
|
371
|
+
if (intent.source.threadId) {
|
|
372
|
+
try {
|
|
373
|
+
const history = await getConversationHistory(env.db, intent.source.threadId, 6);
|
|
374
|
+
if (history.length > 0) {
|
|
375
|
+
const budgeted = budgetConversationHistory(history);
|
|
376
|
+
conversationContext = budgeted.map(m =>
|
|
377
|
+
`${m.role === 'user' ? 'User' : 'Assistant'}: ${m.content.slice(0, 300)}`
|
|
378
|
+
).join('\n');
|
|
379
|
+
}
|
|
380
|
+
} catch { /* non-fatal — orchestrate without history */ }
|
|
381
|
+
}
|
|
382
|
+
|
|
383
|
+
// ─── Court card classification (zero model calls) ──────────
|
|
384
|
+
const courtCard = classifyCourtCard(intent.raw, intent.classified, intent.complexity);
|
|
385
|
+
console.log(`[composite] court card: ${courtCard.label} (${courtCard.orientation})`);
|
|
386
|
+
|
|
387
|
+
// Build tool descriptions for orchestrator (names + descriptions + required params)
|
|
388
|
+
const toolDescriptions = (tools as Array<{ name: string; description: string; input_schema?: { required?: string[]; properties?: Record<string, { type?: string; description?: string }> } }>)
|
|
389
|
+
.map(t => {
|
|
390
|
+
let line = `- ${t.name}: ${t.description}`;
|
|
391
|
+
const schema = t.input_schema;
|
|
392
|
+
if (schema?.required?.length) {
|
|
393
|
+
const params = schema.required.map(p => {
|
|
394
|
+
const prop = schema.properties?.[p];
|
|
395
|
+
return prop?.type ? `${p}: ${prop.type}` : p;
|
|
396
|
+
}).join(', ');
|
|
397
|
+
line += ` [required: ${params}]`;
|
|
398
|
+
}
|
|
399
|
+
return line;
|
|
400
|
+
})
|
|
401
|
+
.join('\n');
|
|
402
|
+
|
|
403
|
+
// Phase 1: Orchestrate
|
|
404
|
+
let dag: ExecutionDAG;
|
|
405
|
+
try {
|
|
406
|
+
const orchResult = await orchestrate(intent, env, toolDescriptions, conversationContext, courtCard);
|
|
407
|
+
dag = orchResult.dag;
|
|
408
|
+
totalCost += orchResult.cost;
|
|
409
|
+
// Orchestrate is 1 Groq API call = 1 subrequest (counted in gather phase budget)
|
|
410
|
+
subrequests.gather += 1;
|
|
411
|
+
console.log(`[composite] orchestrated ${dag.subtasks.length} subtasks`);
|
|
412
|
+
|
|
413
|
+
// ─── DAG intent validator (fail-closed) ─────────────────
|
|
414
|
+
// If the synthesis instruction introduces an objective that doesn't relate
|
|
415
|
+
// to the original query, abort to single-model gpt_oss which preserves
|
|
416
|
+
// thread history and won't reinterpret intent.
|
|
417
|
+
if (!validateDagIntent(intent.raw, dag)) {
|
|
418
|
+
console.warn(`[composite] DAG intent drift detected — aborting to gpt_oss`);
|
|
419
|
+
const { executeWorkersAiChat } = await import('./workers-ai-chat.js');
|
|
420
|
+
return executeWorkersAiChat({
|
|
421
|
+
ai: env.ai!,
|
|
422
|
+
model: env.gptOssModel,
|
|
423
|
+
mcpClient,
|
|
424
|
+
db: env.db,
|
|
425
|
+
channel: 'web',
|
|
426
|
+
conversationId: intent.source.threadId,
|
|
427
|
+
githubToken: env.githubToken,
|
|
428
|
+
githubRepo: env.githubRepo,
|
|
429
|
+
braveApiKey: env.braveApiKey,
|
|
430
|
+
}, intent.raw);
|
|
431
|
+
}
|
|
432
|
+
} catch (err) {
|
|
433
|
+
// Fallback: single-executor GPT-OSS if orchestration fails
|
|
434
|
+
console.warn(`[composite] orchestration failed, falling back to gpt_oss: ${err instanceof Error ? err.message : String(err)}`);
|
|
435
|
+
const { executeWorkersAiChat } = await import('./workers-ai-chat.js');
|
|
436
|
+
return executeWorkersAiChat({
|
|
437
|
+
ai: env.ai!,
|
|
438
|
+
model: env.gptOssModel,
|
|
439
|
+
mcpClient,
|
|
440
|
+
db: env.db,
|
|
441
|
+
channel: 'web',
|
|
442
|
+
conversationId: intent.source.threadId,
|
|
443
|
+
githubToken: env.githubToken,
|
|
444
|
+
githubRepo: env.githubRepo,
|
|
445
|
+
braveApiKey: env.braveApiKey,
|
|
446
|
+
}, intent.raw);
|
|
447
|
+
}
|
|
448
|
+
|
|
449
|
+
// ─── Fast-path: single subtask with tools → skip analyze+synthesize ───
|
|
450
|
+
// When the orchestrator produces exactly 1 subtask, the full 4-model pipeline
|
|
451
|
+
// (orchestrate → gather → analyze → synthesize) is overkill. Run a single gather
|
|
452
|
+
// round with the original query and return the result directly. This eliminates
|
|
453
|
+
// 2 model hops and the parameter-paraphrasing they cause.
|
|
454
|
+
if (dag.subtasks.length === 1 && dag.subtasks[0].tools_needed.length > 0) {
|
|
455
|
+
const subtask = dag.subtasks[0];
|
|
456
|
+
try {
|
|
457
|
+
const { gathered, cost: gatherCost, subrequestCount } = await gatherSubtaskInstrumented(
|
|
458
|
+
subtask, tools, systemPrompt, mcpClient, env, mcpRegistry, intent.raw,
|
|
459
|
+
);
|
|
460
|
+
totalCost += gatherCost;
|
|
461
|
+
subrequests.gather += subrequestCount;
|
|
462
|
+
console.log(`[composite] fast-path: single subtask gathered in ${subrequestCount} subreqs, $${gatherCost.toFixed(4)}`);
|
|
463
|
+
return {
|
|
464
|
+
text: gathered,
|
|
465
|
+
cost: totalCost,
|
|
466
|
+
meta: {
|
|
467
|
+
partialFailure: false,
|
|
468
|
+
failedSubtasks: 0,
|
|
469
|
+
budgetExhausted: false,
|
|
470
|
+
subtasksPlanned: 1,
|
|
471
|
+
subtasksExecuted: 1,
|
|
472
|
+
courtCard: courtCard.card,
|
|
473
|
+
subrequests,
|
|
474
|
+
} satisfies CompositeMeta,
|
|
475
|
+
};
|
|
476
|
+
} catch (err) {
|
|
477
|
+
console.warn(`[composite] fast-path failed, falling through to full pipeline: ${err instanceof Error ? err.message : String(err)}`);
|
|
478
|
+
// Fall through to full pipeline
|
|
479
|
+
}
|
|
480
|
+
}
|
|
481
|
+
|
|
482
|
+
// Phase 2: Gather (sequential — CF Workers AI shared binding)
|
|
483
|
+
// Cost ceiling enforced per subtask — skip remaining if budget exhausted
|
|
484
|
+
const subtasksPlanned = dag.subtasks.length;
|
|
485
|
+
let budgetExhausted = false;
|
|
486
|
+
let failedSubtasks = 0;
|
|
487
|
+
const gatherResults: Array<{ id: string; description: string; gathered: string }> = [];
|
|
488
|
+
|
|
489
|
+
for (const subtask of dag.subtasks) {
|
|
490
|
+
if (totalCost >= maxCost) {
|
|
491
|
+
budgetExhausted = true;
|
|
492
|
+
console.warn(`[composite] budget ceiling $${maxCost.toFixed(2)} hit after ${gatherResults.length}/${subtasksPlanned} subtasks — skipping remaining`);
|
|
493
|
+
break;
|
|
494
|
+
}
|
|
495
|
+
|
|
496
|
+
try {
|
|
497
|
+
const { gathered, cost, subrequestCount } = await gatherSubtaskInstrumented(subtask, tools, systemPrompt, mcpClient, env, mcpRegistry, intent.raw);
|
|
498
|
+
gatherResults.push({ id: subtask.id, description: subtask.description, gathered });
|
|
499
|
+
totalCost += cost;
|
|
500
|
+
subrequests.gather += subrequestCount;
|
|
501
|
+
console.log(`[composite] gathered ${subtask.id}: ${gathered.length} chars, $${cost.toFixed(4)}, ${subrequestCount} subreqs`);
|
|
502
|
+
} catch (err) {
|
|
503
|
+
console.warn(`[composite] gather failed for ${subtask.id}: ${err instanceof Error ? err.message : String(err)}`);
|
|
504
|
+
gatherResults.push({ id: subtask.id, description: subtask.description, gathered: `Error: ${err instanceof Error ? err.message : String(err)}` });
|
|
505
|
+
failedSubtasks += 1;
|
|
506
|
+
}
|
|
507
|
+
}
|
|
508
|
+
|
|
509
|
+
// Phase 3: Analyze (parallel — Groq HTTP calls are independent)
|
|
510
|
+
// Only analyze subtasks that were actually gathered
|
|
511
|
+
const gatheredSubtasks = dag.subtasks.slice(0, gatherResults.length);
|
|
512
|
+
const analyzePromises = gatheredSubtasks.map(async (subtask, i) => {
|
|
513
|
+
const gathered = gatherResults[i]?.gathered ?? '';
|
|
514
|
+
try {
|
|
515
|
+
const { analysis, cost } = await analyzeSubtask(subtask, gathered, env, courtCard);
|
|
516
|
+
return { id: subtask.id, description: subtask.description, gathered, analysis, cost, ok: true };
|
|
517
|
+
} catch (err) {
|
|
518
|
+
console.warn(`[composite] analyze failed for ${subtask.id}: ${err instanceof Error ? err.message : String(err)}`);
|
|
519
|
+
return { id: subtask.id, description: subtask.description, gathered, analysis: gathered, cost: 0, ok: false };
|
|
520
|
+
}
|
|
521
|
+
});
|
|
522
|
+
|
|
523
|
+
const analyzed = await Promise.all(analyzePromises);
|
|
524
|
+
for (const a of analyzed) {
|
|
525
|
+
totalCost += a.cost;
|
|
526
|
+
subrequests.analyze += 1; // 1 Groq call per subtask
|
|
527
|
+
if (!a.ok) failedSubtasks += 1;
|
|
528
|
+
}
|
|
529
|
+
|
|
530
|
+
const subtaskResults: SubtaskResult[] = analyzed.map(a => ({
|
|
531
|
+
id: a.id,
|
|
532
|
+
description: a.description,
|
|
533
|
+
gathered: a.gathered,
|
|
534
|
+
analysis: a.analysis,
|
|
535
|
+
}));
|
|
536
|
+
|
|
537
|
+
console.log(`[composite] analyzed ${subtaskResults.length} subtasks, pre-synthesis cost: $${totalCost.toFixed(4)}`);
|
|
538
|
+
|
|
539
|
+
// Phase 4: Synthesize (Claude with Groq fallback)
|
|
540
|
+
const useOpus = dag.model_override === 'opus';
|
|
541
|
+
const partialFailure = budgetExhausted || failedSubtasks > 0;
|
|
542
|
+
const meta: CompositeMeta = {
|
|
543
|
+
partialFailure,
|
|
544
|
+
failedSubtasks,
|
|
545
|
+
budgetExhausted,
|
|
546
|
+
subtasksPlanned,
|
|
547
|
+
subtasksExecuted: gatherResults.length,
|
|
548
|
+
courtCard: courtCard.card,
|
|
549
|
+
subrequests,
|
|
550
|
+
};
|
|
551
|
+
|
|
552
|
+
const synthesisInstruction = budgetExhausted
|
|
553
|
+
? `${dag.synthesis_instruction} NOTE: Only ${gatherResults.length} of ${subtasksPlanned} subtasks completed due to cost ceiling. Synthesize from available data only.`
|
|
554
|
+
: dag.synthesis_instruction;
|
|
555
|
+
|
|
556
|
+
try {
|
|
557
|
+
const { text, cost } = await synthesize(intent, subtaskResults, synthesisInstruction, env, useOpus, courtCard);
|
|
558
|
+
totalCost += cost;
|
|
559
|
+
subrequests.synthesize += 1;
|
|
560
|
+
return { text, cost: totalCost, meta };
|
|
561
|
+
} catch (err) {
|
|
562
|
+
const msg = err instanceof Error ? err.message : String(err);
|
|
563
|
+
if (msg.includes('Anthropic API error') || msg.includes('credit balance')) {
|
|
564
|
+
console.warn(`[composite] Claude synthesis failed, falling back to Groq: ${msg.slice(0, 120)}`);
|
|
565
|
+
try {
|
|
566
|
+
const { text, cost } = await synthesizeGroqFallback(intent, subtaskResults, synthesisInstruction, env, courtCard);
|
|
567
|
+
totalCost += cost;
|
|
568
|
+
subrequests.synthesize += 1;
|
|
569
|
+
return { text, cost: totalCost, meta };
|
|
570
|
+
} catch (groqErr) {
|
|
571
|
+
console.error('[composite] Groq fallback also failed:', groqErr instanceof Error ? groqErr.message : String(groqErr));
|
|
572
|
+
return { text: `Synthesis failed: both Claude and Groq unavailable. Raw subtask data is available.`, cost: totalCost, meta };
|
|
573
|
+
}
|
|
574
|
+
}
|
|
575
|
+
throw err;
|
|
576
|
+
}
|
|
577
|
+
}
|
|
578
|
+
|
|
579
|
+
// ─── Instrumented gather wrapper ─────────────────────────────
|
|
580
|
+
// Wraps gatherSubtask to count subrequests (AI calls + tool calls)
|
|
581
|
+
|
|
582
|
+
async function gatherSubtaskInstrumented(
|
|
583
|
+
subtask: ExecutionDAG['subtasks'][number],
|
|
584
|
+
allTools: unknown[],
|
|
585
|
+
systemPrompt: string,
|
|
586
|
+
mcpClient: McpClient,
|
|
587
|
+
env: EdgeEnv,
|
|
588
|
+
mcpRegistry?: McpRegistry,
|
|
589
|
+
originalQuery?: string,
|
|
590
|
+
): Promise<{ gathered: string; cost: number; subrequestCount: number }> {
|
|
591
|
+
if (!env.ai) throw new Error('Workers AI binding not available');
|
|
592
|
+
|
|
593
|
+
// Filter tools to only those needed for this subtask
|
|
594
|
+
const anthropicToolDefs = allTools as Array<{ name: string; description: string; input_schema: unknown }>;
|
|
595
|
+
const scopedTools = subtask.tools_needed.length > 0
|
|
596
|
+
? anthropicToolDefs.filter(t => subtask.tools_needed.includes(t.name))
|
|
597
|
+
: [];
|
|
598
|
+
const openAiTools = toOpenAiTools(scopedTools);
|
|
599
|
+
|
|
600
|
+
// Include original query so the gather model has access to exact IDs, UUIDs, and enum values
|
|
601
|
+
const userContent = originalQuery
|
|
602
|
+
? `Original request: ${originalQuery}\n\nYour subtask: ${subtask.description}\n\nIMPORTANT: Use exact identifiers (UUIDs, IDs, enum values) from the original request when calling tools.`
|
|
603
|
+
: subtask.description;
|
|
604
|
+
|
|
605
|
+
const messages: ChatMessage[] = [
|
|
606
|
+
{ role: 'system', content: `${systemPrompt}\n\nFocus: ${subtask.description}\nGather the data needed and return your findings.` },
|
|
607
|
+
{ role: 'user', content: userContent },
|
|
608
|
+
];
|
|
609
|
+
|
|
610
|
+
let totalCost = 0;
|
|
611
|
+
let subrequestCount = 0;
|
|
612
|
+
|
|
613
|
+
// Tool loop — up to MAX_GATHER_ROUNDS
|
|
614
|
+
for (let round = 0; round < MAX_GATHER_ROUNDS; round++) {
|
|
615
|
+
subrequestCount += 1; // 1 AI call per round
|
|
616
|
+
const result = await env.ai.run(env.gptOssModel as Parameters<Ai['run']>[0], {
|
|
617
|
+
messages,
|
|
618
|
+
...(openAiTools.length > 0 ? { tools: openAiTools } : {}),
|
|
619
|
+
max_tokens: 2048,
|
|
620
|
+
temperature: 0.2,
|
|
621
|
+
top_p: 0.9,
|
|
622
|
+
frequency_penalty: 0.3,
|
|
623
|
+
} as Record<string, unknown>) as AiChatResponse;
|
|
624
|
+
|
|
625
|
+
const usage = extractUsage(result);
|
|
626
|
+
if (usage) {
|
|
627
|
+
totalCost += (usage.prompt_tokens * CF_GPT_OSS_RATES.input
|
|
628
|
+
+ usage.completion_tokens * CF_GPT_OSS_RATES.output) / 1_000_000;
|
|
629
|
+
}
|
|
630
|
+
|
|
631
|
+
const toolCalls = extractToolCalls(result);
|
|
632
|
+
const responseText = extractText(result);
|
|
633
|
+
|
|
634
|
+
if (toolCalls.length === 0) {
|
|
635
|
+
return { gathered: responseText ?? '(no data gathered)', cost: totalCost, subrequestCount };
|
|
636
|
+
}
|
|
637
|
+
|
|
638
|
+
messages.push({ role: 'assistant', content: responseText ?? '', tool_calls: toolCalls });
|
|
639
|
+
|
|
640
|
+
for (const call of toolCalls) {
|
|
641
|
+
subrequestCount += 1; // 1 subrequest per tool call (external fetch or DB query)
|
|
642
|
+
let args: Record<string, unknown> = {};
|
|
643
|
+
try { args = JSON.parse(call.function.arguments); } catch { /* empty args */ }
|
|
644
|
+
|
|
645
|
+
let toolResult: string;
|
|
646
|
+
const inProcess = await handleInProcessTool(
|
|
647
|
+
env.db, call.function.name, args,
|
|
648
|
+
env.githubToken, env.githubRepo, env.braveApiKey,
|
|
649
|
+
env.roundtableDb,
|
|
650
|
+
{ apiKey: env.anthropicApiKey, model: env.claudeModel, baseUrl: env.anthropicBaseUrl },
|
|
651
|
+
env.memoryBinding,
|
|
652
|
+
{ resendApiKey: env.resendApiKey, resendApiKeyPersonal: env.resendApiKeyPersonal },
|
|
653
|
+
);
|
|
654
|
+
|
|
655
|
+
if (inProcess !== null) {
|
|
656
|
+
toolResult = inProcess;
|
|
657
|
+
} else {
|
|
658
|
+
const resolved = resolveMcpTool(call.function.name, mcpClient, mcpRegistry);
|
|
659
|
+
if (resolved) {
|
|
660
|
+
toolResult = await callMcpWithRetry(resolved.client, resolved.mcpName, args);
|
|
661
|
+
} else {
|
|
662
|
+
toolResult = `Unknown tool: ${call.function.name}`;
|
|
663
|
+
}
|
|
664
|
+
}
|
|
665
|
+
|
|
666
|
+
messages.push({ role: 'tool', tool_call_id: call.id, content: toolResult });
|
|
667
|
+
}
|
|
668
|
+
}
|
|
669
|
+
|
|
670
|
+
// Force summary if tool rounds exhausted
|
|
671
|
+
// Condense messages: strip tool_calls metadata and truncate tool results
|
|
672
|
+
// to prevent context overflow when sending to GPT-OSS without tools definition
|
|
673
|
+
const condensedGather: ChatMessage[] = [messages[0]]; // system
|
|
674
|
+
const gatherFindings: string[] = [];
|
|
675
|
+
for (let i = 1; i < messages.length; i++) {
|
|
676
|
+
const msg = messages[i];
|
|
677
|
+
if (msg.role === 'user' && !('tool_call_id' in msg)) {
|
|
678
|
+
condensedGather.push(msg);
|
|
679
|
+
} else if (msg.role === 'assistant' && msg.content) {
|
|
680
|
+
gatherFindings.push(msg.content);
|
|
681
|
+
} else if (msg.role === 'tool') {
|
|
682
|
+
const truncated = msg.content.length > 2000
|
|
683
|
+
? msg.content.slice(0, 2000) + '... [truncated]'
|
|
684
|
+
: msg.content;
|
|
685
|
+
gatherFindings.push(truncated);
|
|
686
|
+
}
|
|
687
|
+
}
|
|
688
|
+
if (gatherFindings.length > 0) {
|
|
689
|
+
let accumulated = '';
|
|
690
|
+
for (const f of gatherFindings) {
|
|
691
|
+
if (accumulated.length + f.length > 20000) {
|
|
692
|
+
accumulated += '\n[... additional data truncated]';
|
|
693
|
+
break;
|
|
694
|
+
}
|
|
695
|
+
accumulated += '\n' + f;
|
|
696
|
+
}
|
|
697
|
+
condensedGather.push({ role: 'assistant', content: `Gathered data:\n${accumulated.trim()}` });
|
|
698
|
+
}
|
|
699
|
+
condensedGather.push({ role: 'user', content: 'Summarize all data gathered so far. Return the raw findings.' });
|
|
700
|
+
|
|
701
|
+
subrequestCount += 1;
|
|
702
|
+
const summaryResult = await env.ai.run(env.gptOssModel as Parameters<Ai['run']>[0], {
|
|
703
|
+
messages: condensedGather,
|
|
704
|
+
max_tokens: 2048,
|
|
705
|
+
temperature: 0.2,
|
|
706
|
+
} as Record<string, unknown>) as AiChatResponse;
|
|
707
|
+
|
|
708
|
+
const summaryUsage = extractUsage(summaryResult);
|
|
709
|
+
if (summaryUsage) {
|
|
710
|
+
totalCost += (summaryUsage.prompt_tokens * CF_GPT_OSS_RATES.input
|
|
711
|
+
+ summaryUsage.completion_tokens * CF_GPT_OSS_RATES.output) / 1_000_000;
|
|
712
|
+
}
|
|
713
|
+
|
|
714
|
+
return { gathered: extractText(summaryResult) ?? '(gather exhausted)', cost: totalCost, subrequestCount };
|
|
715
|
+
}
|