@jellyos/agent 0.1.4 → 0.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.npm.md +212 -0
- package/bin/jellyos-mcp +26 -0
- package/dist/api/ExtensionAPI.d.ts +11 -0
- package/dist/cli.js +127 -49
- package/dist/index.d.ts +15 -2
- package/dist/index.js +13 -3
- package/dist/loader.d.ts +2 -9
- package/dist/loader.js +2 -1
- package/dist/mcp/entry.d.ts +2 -0
- package/dist/mcp/entry.js +71 -0
- package/dist/mcp/server.d.ts +31 -0
- package/dist/mcp/server.js +128 -0
- package/dist/models/ModelRegistry.d.ts +12 -1
- package/dist/models/ModelRegistry.js +105 -9
- package/dist/runner/AgentRunner.d.ts +19 -2
- package/dist/runner/AgentRunner.js +247 -17
- package/dist/runner/ModelClient.d.ts +10 -1
- package/dist/runner/ModelClient.js +79 -6
- package/dist/runner/SwarmRouter.d.ts +6 -6
- package/dist/runner/SwarmRouter.js +73 -24
- package/dist/runner/ToolDispatcher.d.ts +10 -0
- package/dist/runner/ToolDispatcher.js +106 -2
- package/dist/scheduler/AgentScheduler.d.ts +118 -0
- package/dist/scheduler/AgentScheduler.js +253 -0
- package/dist/session/ContextStore.d.ts +96 -0
- package/dist/session/ContextStore.js +207 -0
- package/dist/session/GoalManager.d.ts +101 -0
- package/dist/session/GoalManager.js +167 -0
- package/dist/session/MemoryStore.d.ts +48 -0
- package/dist/session/MemoryStore.js +166 -0
- package/dist/session/SessionManager.d.ts +45 -4
- package/dist/session/SessionManager.js +151 -8
- package/dist/telemetry/Tracer.d.ts +48 -0
- package/dist/telemetry/Tracer.js +102 -0
- package/dist/tools/MarketSentiment.d.ts +166 -0
- package/dist/tools/MarketSentiment.js +209 -0
- package/dist/tools/NewsSentiment.js +40 -13
- package/dist/tools/PriceFeed.d.ts +2 -0
- package/dist/tools/PriceFeed.js +79 -27
- package/dist/tools/TechnicalAnalysis.d.ts +37 -0
- package/dist/tools/TechnicalAnalysis.js +85 -0
- package/dist/tui/App.d.ts +4 -3
- package/dist/tui/App.js +346 -119
- package/dist/tui/ModelSelector.d.ts +22 -0
- package/dist/tui/ModelSelector.js +86 -0
- package/dist/tui/REPL.d.ts +2 -1
- package/dist/tui/REPL.js +11 -6
- package/package.json +10 -6
- package/dist/api/ExtensionAPI.d.ts.map +0 -1
- package/dist/api/ExtensionAPI.js.map +0 -1
- package/dist/api/Registry.d.ts.map +0 -1
- package/dist/api/Registry.js.map +0 -1
- package/dist/cli.d.ts.map +0 -1
- package/dist/cli.js.map +0 -1
- package/dist/index.d.ts.map +0 -1
- package/dist/index.js.map +0 -1
- package/dist/loader.d.ts.map +0 -1
- package/dist/loader.js.map +0 -1
- package/dist/models/CostTracker.d.ts.map +0 -1
- package/dist/models/CostTracker.js.map +0 -1
- package/dist/models/ModelRegistry.d.ts.map +0 -1
- package/dist/models/ModelRegistry.js.map +0 -1
- package/dist/models/index.d.ts.map +0 -1
- package/dist/models/index.js.map +0 -1
- package/dist/runner/AgentRunner.d.ts.map +0 -1
- package/dist/runner/AgentRunner.js.map +0 -1
- package/dist/runner/ModelClient.d.ts.map +0 -1
- package/dist/runner/ModelClient.js.map +0 -1
- package/dist/runner/SwarmRouter.d.ts.map +0 -1
- package/dist/runner/SwarmRouter.js.map +0 -1
- package/dist/runner/ToolDispatcher.d.ts.map +0 -1
- package/dist/runner/ToolDispatcher.js.map +0 -1
- package/dist/session/SessionManager.d.ts.map +0 -1
- package/dist/session/SessionManager.js.map +0 -1
- package/dist/tools/NewsSentiment.d.ts.map +0 -1
- package/dist/tools/NewsSentiment.js.map +0 -1
- package/dist/tools/PriceFeed.d.ts.map +0 -1
- package/dist/tools/PriceFeed.js.map +0 -1
- package/dist/tools/TechnicalAnalysis.d.ts.map +0 -1
- package/dist/tools/TechnicalAnalysis.js.map +0 -1
- package/dist/tools/index.d.ts.map +0 -1
- package/dist/tools/index.js.map +0 -1
- package/dist/tui/App.d.ts.map +0 -1
- package/dist/tui/App.js.map +0 -1
- package/dist/tui/REPL.d.ts.map +0 -1
- package/dist/tui/REPL.js.map +0 -1
- package/dist/tui/StatusBar.d.ts.map +0 -1
- package/dist/tui/StatusBar.js.map +0 -1
- package/dist/tui/theme.d.ts.map +0 -1
- package/dist/tui/theme.js.map +0 -1
|
@@ -7,9 +7,39 @@
|
|
|
7
7
|
* 4. Emits events so the TUI can render incrementally
|
|
8
8
|
*/
|
|
9
9
|
import { ModelClient, resolveModelChain, } from "./ModelClient.js";
|
|
10
|
-
import { ToolDispatcher } from "./ToolDispatcher.js";
|
|
10
|
+
import { ToolDispatcher, forecastContextGrowth } from "./ToolDispatcher.js";
|
|
11
11
|
import { SwarmRouter } from "./SwarmRouter.js";
|
|
12
|
+
import { priceFeed } from "../tools/PriceFeed.js";
|
|
13
|
+
import { newsFeed } from "../tools/NewsSentiment.js";
|
|
14
|
+
import { Tracer } from "../telemetry/Tracer.js";
|
|
12
15
|
const MAX_TOOL_ROUNDS = 12;
|
|
16
|
+
const REFLECT_AT_ROUND = 6;
|
|
17
|
+
function detectTaskType(message) {
|
|
18
|
+
const m = message.toLowerCase();
|
|
19
|
+
if (/\bhow much|price of|worth|cost of|current price\b/.test(m))
|
|
20
|
+
return "price_check";
|
|
21
|
+
if (/\brsi|macd|bollinger|technical|chart|candle|ohlcv\b/.test(m))
|
|
22
|
+
return "ta_analysis";
|
|
23
|
+
if (/\bcode|script|write|implement|function|typescript|python\b/.test(m))
|
|
24
|
+
return "code";
|
|
25
|
+
if (/\bpredict|forecast|will.*price|going to|expect.*price\b/.test(m))
|
|
26
|
+
return "prediction";
|
|
27
|
+
if (/\bnews|sentiment|latest|headlines|today.*market\b/.test(m))
|
|
28
|
+
return "news_summary";
|
|
29
|
+
if (/\bstrategy|plan|portfolio|risk|position|trade\b/.test(m))
|
|
30
|
+
return "strategy";
|
|
31
|
+
return "general";
|
|
32
|
+
}
|
|
33
|
+
// Task → tier mapping: cheap tasks go to workers, deep tasks go to orchestrators
|
|
34
|
+
const TASK_TIER_MAP = {
|
|
35
|
+
price_check: "worker", // fast cheap answer: $0.02-0.10/M
|
|
36
|
+
news_summary: "worker", // simple text summarization
|
|
37
|
+
code: "worker", // qwen3-coder, deepseek are great
|
|
38
|
+
ta_analysis: "analyst", // needs math accuracy
|
|
39
|
+
general: "analyst", // balanced
|
|
40
|
+
strategy: "orchestrator", // needs deep reasoning
|
|
41
|
+
prediction: "orchestrator", // thinking model for max effect
|
|
42
|
+
};
|
|
13
43
|
/** Effect level → swarm behaviour config */
|
|
14
44
|
const EFFECT_SWARM = {
|
|
15
45
|
eco: { threshold: 999, maxAgents: 0 }, // never swarm
|
|
@@ -23,17 +53,27 @@ export class AgentRunner {
|
|
|
23
53
|
onEvent;
|
|
24
54
|
sessionCtx;
|
|
25
55
|
effectLevel;
|
|
56
|
+
goalManager;
|
|
57
|
+
contextStore;
|
|
26
58
|
modelChain;
|
|
27
59
|
dispatcher;
|
|
28
60
|
swarmRouter;
|
|
29
61
|
modelRegistry;
|
|
30
62
|
costTracker;
|
|
31
|
-
|
|
63
|
+
abortController = null;
|
|
64
|
+
/** #25: Cancel the current in-flight stream immediately */
|
|
65
|
+
abort() {
|
|
66
|
+
this.abortController?.abort();
|
|
67
|
+
this.abortController = null;
|
|
68
|
+
}
|
|
69
|
+
constructor(registry, session, onEvent, sessionCtx, effectLevel = "normal", modelReg, costTracker, goalManager, contextStore) {
|
|
32
70
|
this.registry = registry;
|
|
33
71
|
this.session = session;
|
|
34
72
|
this.onEvent = onEvent;
|
|
35
73
|
this.sessionCtx = sessionCtx;
|
|
36
74
|
this.effectLevel = effectLevel;
|
|
75
|
+
this.goalManager = goalManager;
|
|
76
|
+
this.contextStore = contextStore;
|
|
37
77
|
this.modelRegistry = modelReg;
|
|
38
78
|
this.costTracker = costTracker;
|
|
39
79
|
this.modelChain = resolveModelChain(modelReg);
|
|
@@ -60,20 +100,106 @@ export class AgentRunner {
|
|
|
60
100
|
}
|
|
61
101
|
/** Run one user turn — may invoke multiple tool rounds and model fallbacks internally */
|
|
62
102
|
async run(userMessage) {
|
|
63
|
-
//
|
|
103
|
+
// #30: Start trace for this turn
|
|
104
|
+
const sessionId = `jelly-${Date.now().toString(36)}`;
|
|
105
|
+
const tracer = new Tracer(sessionId, userMessage);
|
|
106
|
+
// 1. Fire before_agent_start hooks
|
|
64
107
|
await this.registry.fireHook("before_agent_start", this.sessionCtx);
|
|
65
|
-
// 2.
|
|
66
|
-
this.
|
|
67
|
-
|
|
108
|
+
// 2. #38: Rebuild dynamic system prompt each turn
|
|
109
|
+
const basePrompt = this.registry.getSystemPrompt();
|
|
110
|
+
const dynamicSuffix = this.buildDynamicSystemSuffix();
|
|
111
|
+
this.session.setSystemPrompt(basePrompt + dynamicSuffix);
|
|
112
|
+
// 3. #40/#32: Pre-flight context pressure check — smart compact if needed
|
|
113
|
+
const pressure = this.session.getContextPressure();
|
|
114
|
+
if (pressure.pct >= 85 && pressure.pct < 95) {
|
|
115
|
+
// #32: Try tier-2 summarization with cheap model before hard-dropping turns
|
|
116
|
+
await this.session.summarizeOldTurns(async (messages) => {
|
|
117
|
+
const chain = resolveModelChain(this.modelRegistry);
|
|
118
|
+
// Use cheapest available model for summarization (worker or free tier)
|
|
119
|
+
const summaryCfg = chain[chain.length - 1] ?? chain[0];
|
|
120
|
+
const client = new ModelClient(summaryCfg, this.modelRegistry);
|
|
121
|
+
const preview = messages
|
|
122
|
+
.map(m => `${m.role}: ${typeof m.content === "string" ? m.content.slice(0, 150) : "[tool call]"}`)
|
|
123
|
+
.join("\n");
|
|
124
|
+
let out = "";
|
|
125
|
+
for await (const chunk of client.stream([
|
|
126
|
+
{ role: "system", content: "Summarize the following conversation in 3-5 bullet points. Be specific about prices, decisions, and findings." },
|
|
127
|
+
{ role: "user", content: preview },
|
|
128
|
+
], [])) {
|
|
129
|
+
if (chunk.type === "delta" && chunk.text)
|
|
130
|
+
out += chunk.text;
|
|
131
|
+
}
|
|
132
|
+
return out || "(summary unavailable)";
|
|
133
|
+
});
|
|
134
|
+
}
|
|
135
|
+
else if (pressure.pct >= 95) {
|
|
136
|
+
this.session.forceCompact();
|
|
137
|
+
}
|
|
138
|
+
// 4. #33: Guard swarm against insufficient context headroom
|
|
68
139
|
if (this.swarmRouter.shouldSwarm(userMessage)) {
|
|
140
|
+
if (!this.session.getContextPressure().turboReady) {
|
|
141
|
+
this.onEvent({ type: "text_delta", text: "\u26a1 Compacting context for turbo mode...\n" });
|
|
142
|
+
this.session.forceCompact();
|
|
143
|
+
}
|
|
69
144
|
await this.runSwarm(userMessage);
|
|
70
145
|
return;
|
|
71
146
|
}
|
|
72
|
-
//
|
|
73
|
-
|
|
74
|
-
|
|
147
|
+
// 5. #16: Inject live market context into the user message
|
|
148
|
+
const enriched = await this.buildLiveContext(userMessage);
|
|
149
|
+
this.session.addMessage({ role: "user", content: enriched });
|
|
150
|
+
await this.runSingleAgent(userMessage, tracer);
|
|
151
|
+
tracer.flush("ok");
|
|
75
152
|
this.onEvent({ type: "turn_done" });
|
|
76
153
|
}
|
|
154
|
+
// ── #16: Live market context injection ─────────────────────────────────────
|
|
155
|
+
async buildLiveContext(message) {
|
|
156
|
+
const parts = [];
|
|
157
|
+
// Extract ticker symbols mentioned in the message
|
|
158
|
+
const tickerRe = /\b(BTC|ETH|SOL|BNB|MATIC|ARB|OP|AVAX|LINK|UNI|DOGE|XRP|ADA|DOT|ATOM|NEAR|SUI|APT|PEPE|AAVE|WIF|BONK)\b/gi;
|
|
159
|
+
const mentioned = [...new Set((message.match(tickerRe) ?? []).map(s => s.toLowerCase()))];
|
|
160
|
+
if (mentioned.length > 0) {
|
|
161
|
+
const ticks = priceFeed.getMultiple(mentioned);
|
|
162
|
+
if (ticks.length > 0) {
|
|
163
|
+
parts.push("Current prices: " + ticks.map(t => priceFeed.formatPrice(t)).join(" | "));
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
// News sentiment badge if message is analysis/sentiment related
|
|
167
|
+
if (/sentiment|news|market|mood|bullish|bearish|fear|greed/i.test(message)) {
|
|
168
|
+
const badge = newsFeed.statusBadge();
|
|
169
|
+
if (badge && badge !== "📰 ?")
|
|
170
|
+
parts.push(`News: ${badge}`);
|
|
171
|
+
}
|
|
172
|
+
if (parts.length === 0)
|
|
173
|
+
return message;
|
|
174
|
+
return `<live_context>\n${parts.join("\n")}\n</live_context>\n\n${message}`;
|
|
175
|
+
}
|
|
176
|
+
// ── #38: Dynamic system prompt suffix ──────────────────────────────────────
|
|
177
|
+
buildDynamicSystemSuffix() {
|
|
178
|
+
const sections = [];
|
|
179
|
+
// Active goals
|
|
180
|
+
const goals = this.goalManager?.getActive() ?? [];
|
|
181
|
+
if (goals.length > 0) {
|
|
182
|
+
sections.push(`\n## Active Goals\n${goals.map(g => `- [${g.id}] ${g.text}`).join("\n")}`);
|
|
183
|
+
}
|
|
184
|
+
// Active task context references
|
|
185
|
+
const activeTasks = this.contextStore?.getActiveTasks() ?? [];
|
|
186
|
+
if (activeTasks.length > 0) {
|
|
187
|
+
sections.push(`\n## Saved Task Context\n` +
|
|
188
|
+
activeTasks.map(t => this.contextStore.getReference(t.taskId)).join("\n"));
|
|
189
|
+
}
|
|
190
|
+
// Context pressure advisory
|
|
191
|
+
const pressure = this.session.getContextPressure();
|
|
192
|
+
if (pressure.level === "red" || pressure.level === "critical") {
|
|
193
|
+
sections.push(`\n## ⚠ Context Window at ${pressure.pct}%\n` +
|
|
194
|
+
`Be concise. Prefer short summaries. Use read_task_context() for historical data rather than repeating it. ` +
|
|
195
|
+
`${pressure.turboReady ? "" : "Swarm mode is temporarily paused to preserve headroom."}`);
|
|
196
|
+
}
|
|
197
|
+
// Effect level advisory
|
|
198
|
+
if (this.effectLevel === "eco") {
|
|
199
|
+
sections.push("\n## Mode: ECO\nBe brief. Minimize tool calls. Prefer one tool per response.");
|
|
200
|
+
}
|
|
201
|
+
return sections.join("");
|
|
202
|
+
}
|
|
77
203
|
// ── Swarm path ─────────────────────────────────────────────────────────────
|
|
78
204
|
async runSwarm(userMessage) {
|
|
79
205
|
const systemPrompt = this.registry.getSystemPrompt();
|
|
@@ -85,7 +211,7 @@ export class AgentRunner {
|
|
|
85
211
|
ms: result.ms,
|
|
86
212
|
remaining,
|
|
87
213
|
});
|
|
88
|
-
});
|
|
214
|
+
}, this.contextStore);
|
|
89
215
|
this.onEvent({ type: "swarm_review", subCount: subResults.length });
|
|
90
216
|
// Stream reviewer synthesis token-by-token (already complete — re-emit as deltas)
|
|
91
217
|
for (const ch of synthesis) {
|
|
@@ -97,25 +223,48 @@ export class AgentRunner {
|
|
|
97
223
|
this.onEvent({ type: "turn_done" });
|
|
98
224
|
}
|
|
99
225
|
// ── Single-agent path (also used for each sub-task in turbo/max) ────────────
|
|
100
|
-
async runSingleAgent() {
|
|
226
|
+
async runSingleAgent(userMessage, tracer) {
|
|
101
227
|
const openAITools = this.registry.toOpenAITools();
|
|
228
|
+
const t0 = Date.now();
|
|
229
|
+
this.abortController = new AbortController();
|
|
230
|
+
const abortSignal = this.abortController.signal;
|
|
102
231
|
let rounds = 0;
|
|
232
|
+
// #37: Route to appropriate model tier based on task type
|
|
233
|
+
let taskModelChain = this.modelChain;
|
|
234
|
+
if (userMessage && this.modelRegistry) {
|
|
235
|
+
const taskType = detectTaskType(userMessage);
|
|
236
|
+
const targetTier = TASK_TIER_MAP[taskType];
|
|
237
|
+
// For max effect + prediction tasks, enable thinking mode
|
|
238
|
+
const useThinking = this.effectLevel === "max" && taskType === "prediction";
|
|
239
|
+
const taskModel = this.modelRegistry.pick(targetTier);
|
|
240
|
+
if (taskModel) {
|
|
241
|
+
const cfg = this.modelRegistry.buildConfig(taskModel.id, this.modelChain[0]?.maxTokens ?? 8192, this.modelChain[0]?.temperature ?? 0.7, targetTier);
|
|
242
|
+
if (cfg) {
|
|
243
|
+
if (useThinking) {
|
|
244
|
+
cfg.thinkingEnabled = true;
|
|
245
|
+
cfg.thinkingBudget = 8000;
|
|
246
|
+
}
|
|
247
|
+
taskModelChain = [cfg, ...this.modelChain.filter(m => m.model !== cfg.model)];
|
|
248
|
+
}
|
|
249
|
+
}
|
|
250
|
+
}
|
|
103
251
|
while (rounds < MAX_TOOL_ROUNDS) {
|
|
104
252
|
rounds++;
|
|
105
253
|
const messages = this.session.getMessages();
|
|
106
254
|
let assistantText = "";
|
|
107
255
|
let pendingToolCalls = [];
|
|
108
256
|
let modelError = null;
|
|
257
|
+
let usageTokens = null;
|
|
109
258
|
// Try model chain — rotate on 429/5xx
|
|
110
|
-
for (let mi = 0; mi <
|
|
111
|
-
const cfg =
|
|
259
|
+
for (let mi = 0; mi < taskModelChain.length; mi++) {
|
|
260
|
+
const cfg = taskModelChain[mi];
|
|
112
261
|
const client = new ModelClient(cfg, this.modelRegistry);
|
|
113
262
|
assistantText = "";
|
|
114
263
|
pendingToolCalls = [];
|
|
115
264
|
modelError = null;
|
|
116
265
|
let gotError = false;
|
|
117
266
|
let isRateLimit = false;
|
|
118
|
-
for await (const chunk of client.stream(messages, openAITools)) {
|
|
267
|
+
for await (const chunk of client.stream(messages, openAITools, abortSignal)) {
|
|
119
268
|
if (chunk.type === "delta" && chunk.text) {
|
|
120
269
|
assistantText += chunk.text;
|
|
121
270
|
this.onEvent({ type: "text_delta", text: chunk.text });
|
|
@@ -123,9 +272,18 @@ export class AgentRunner {
|
|
|
123
272
|
else if (chunk.type === "tool_call" && chunk.tool_calls) {
|
|
124
273
|
pendingToolCalls = chunk.tool_calls;
|
|
125
274
|
}
|
|
275
|
+
else if (chunk.type === "done" && chunk.finish_reason === "aborted") {
|
|
276
|
+
// #25: Stream was aborted by user — clean exit
|
|
277
|
+
this.onEvent({ type: "turn_done" });
|
|
278
|
+
return;
|
|
279
|
+
}
|
|
280
|
+
else if (chunk.type === "done" && chunk.usage) {
|
|
281
|
+
usageTokens = chunk.usage;
|
|
282
|
+
}
|
|
126
283
|
else if (chunk.type === "error") {
|
|
127
284
|
modelError = chunk.error ?? "Unknown model error";
|
|
128
285
|
gotError = true;
|
|
286
|
+
this.costTracker?.recordError(); // #1: track errors
|
|
129
287
|
// Rotate on 429 rate-limit OR any 5xx server error
|
|
130
288
|
isRateLimit = /429|rate.?limit/i.test(modelError)
|
|
131
289
|
|| (chunk.status !== undefined && chunk.status >= 500);
|
|
@@ -135,7 +293,7 @@ export class AgentRunner {
|
|
|
135
293
|
if (!gotError)
|
|
136
294
|
break; // success — use this model's output
|
|
137
295
|
// Rotate to next model on rate-limit or server errors
|
|
138
|
-
const nextCfg =
|
|
296
|
+
const nextCfg = taskModelChain[mi + 1];
|
|
139
297
|
// Save any partial text the user already saw before rotating
|
|
140
298
|
if (assistantText.trim()) {
|
|
141
299
|
this.session.addMessage({ role: "assistant", content: assistantText + "\n\n[connection interrupted — retrying with fallback model]" });
|
|
@@ -160,6 +318,22 @@ export class AgentRunner {
|
|
|
160
318
|
this.onEvent({ type: "error", message: modelError });
|
|
161
319
|
return;
|
|
162
320
|
}
|
|
321
|
+
// #1: Record cost for this model call
|
|
322
|
+
if (this.costTracker && !modelError) {
|
|
323
|
+
const cfg = this.modelChain[0];
|
|
324
|
+
if (usageTokens) {
|
|
325
|
+
this.costTracker.record(cfg.model, usageTokens.prompt_tokens, usageTokens.completion_tokens, Date.now() - t0);
|
|
326
|
+
}
|
|
327
|
+
else {
|
|
328
|
+
// Fallback: estimate from char counts (~4 chars per token)
|
|
329
|
+
const allMsgs = this.session.getMessages();
|
|
330
|
+
const promptChars = allMsgs.reduce((n, m) => n + (typeof m.content === "string" ? m.content.length : 0), 0);
|
|
331
|
+
const promptTok = Math.ceil(promptChars / 4);
|
|
332
|
+
const completeTok = Math.ceil(assistantText.length / 4);
|
|
333
|
+
this.costTracker.record(cfg.model, promptTok, completeTok, Date.now() - t0);
|
|
334
|
+
}
|
|
335
|
+
usageTokens = null; // reset for next round
|
|
336
|
+
}
|
|
163
337
|
// Save assistant turn
|
|
164
338
|
const assistantMsg = {
|
|
165
339
|
role: "assistant",
|
|
@@ -169,13 +343,69 @@ export class AgentRunner {
|
|
|
169
343
|
this.session.addMessage(assistantMsg);
|
|
170
344
|
if (pendingToolCalls.length === 0)
|
|
171
345
|
break;
|
|
172
|
-
//
|
|
346
|
+
// #9: Reflection — at mid-point, force model to assess progress
|
|
347
|
+
if (rounds === REFLECT_AT_ROUND) {
|
|
348
|
+
this.session.addMessage({
|
|
349
|
+
role: "user",
|
|
350
|
+
content: `[AGENT REFLECTION — round ${rounds}/${MAX_TOOL_ROUNDS}] ` +
|
|
351
|
+
`You have used ${rounds} tool calls. ` +
|
|
352
|
+
`Summarize what you have found so far, then decide: ` +
|
|
353
|
+
`(a) you have enough to answer — do so now, or ` +
|
|
354
|
+
`(b) you need specific additional data — state exactly what and use ONE more tool. ` +
|
|
355
|
+
`Do not call tools unless you have a clear remaining gap.`,
|
|
356
|
+
});
|
|
357
|
+
}
|
|
358
|
+
// #10: Check for tools requiring approval before dispatching
|
|
359
|
+
const approvedCalls = [];
|
|
173
360
|
for (const tc of pendingToolCalls) {
|
|
361
|
+
const toolDef = this.registry.getTool(tc.function.name);
|
|
362
|
+
if (toolDef?.requiresApproval) {
|
|
363
|
+
const approved = await new Promise((resolve) => {
|
|
364
|
+
this.onEvent({
|
|
365
|
+
type: "approval_request",
|
|
366
|
+
toolName: tc.function.name,
|
|
367
|
+
args: tc.function.arguments,
|
|
368
|
+
approve: resolve,
|
|
369
|
+
});
|
|
370
|
+
// Auto-deny after 60 seconds if no response
|
|
371
|
+
setTimeout(() => resolve(false), 60_000);
|
|
372
|
+
});
|
|
373
|
+
if (!approved) {
|
|
374
|
+
// Inject a denial message so model knows it was blocked
|
|
375
|
+
this.session.addMessage({
|
|
376
|
+
role: "tool",
|
|
377
|
+
content: `Tool "${tc.function.name}" was denied by user. Do not retry without asking explicitly.`,
|
|
378
|
+
name: tc.function.name,
|
|
379
|
+
tool_call_id: tc.id,
|
|
380
|
+
});
|
|
381
|
+
this.onEvent({ type: "tool_done", name: tc.function.name, result: "[DENIED by user]", isError: true });
|
|
382
|
+
continue;
|
|
383
|
+
}
|
|
384
|
+
}
|
|
385
|
+
approvedCalls.push(tc);
|
|
386
|
+
}
|
|
387
|
+
for (const tc of approvedCalls) {
|
|
174
388
|
this.onEvent({ type: "tool_start", name: tc.function.name, args: tc.function.arguments });
|
|
389
|
+
tracer?.startSpan(`tool:${tc.function.name}`, { args: tc.function.arguments.slice(0, 100) });
|
|
390
|
+
}
|
|
391
|
+
if (approvedCalls.length === 0)
|
|
392
|
+
continue;
|
|
393
|
+
// #40: Pre-tool context budget forecast — compact proactively if needed
|
|
394
|
+
const forecastedGrowth = forecastContextGrowth(approvedCalls);
|
|
395
|
+
const currentChars = this.session.charCount();
|
|
396
|
+
const forecastedPct = (currentChars + forecastedGrowth) / 80_000 * 100;
|
|
397
|
+
if (forecastedPct > 90) {
|
|
398
|
+
// Pre-compact before tools add more content
|
|
399
|
+
this.session.forceCompact();
|
|
400
|
+
this.onEvent({ type: "text_delta", text: `\n⚡ Pre-compacting context (forecast: ${forecastedPct.toFixed(0)}% after tools)\n` });
|
|
175
401
|
}
|
|
176
|
-
const results = await this.dispatcher.dispatch(
|
|
402
|
+
const results = await this.dispatcher.dispatch(approvedCalls);
|
|
177
403
|
for (const r of results) {
|
|
178
404
|
this.onEvent({ type: "tool_done", name: r.name, result: r.content, isError: r.isError });
|
|
405
|
+
// #30: End tool span
|
|
406
|
+
const toolSpan = tracer?.trace?.spans?.slice().reverse().find((s) => s.name === `tool:${r.name}` && !s.durationMs);
|
|
407
|
+
if (toolSpan)
|
|
408
|
+
tracer?.endSpan(toolSpan.spanId, r.isError ? "error" : "ok", { resultLen: r.content.length });
|
|
179
409
|
this.session.addMessage({
|
|
180
410
|
role: "tool",
|
|
181
411
|
content: r.content,
|
|
@@ -37,6 +37,11 @@ export interface ChatChunk {
|
|
|
37
37
|
finish_reason?: string;
|
|
38
38
|
/** HTTP status code — used by AgentRunner for rate-limit detection */
|
|
39
39
|
status?: number;
|
|
40
|
+
/** Token usage reported by provider on final chunk */
|
|
41
|
+
usage?: {
|
|
42
|
+
prompt_tokens: number;
|
|
43
|
+
completion_tokens: number;
|
|
44
|
+
};
|
|
40
45
|
}
|
|
41
46
|
export interface ModelConfig {
|
|
42
47
|
baseUrl: string;
|
|
@@ -46,6 +51,10 @@ export interface ModelConfig {
|
|
|
46
51
|
temperature: number;
|
|
47
52
|
siteUrl?: string;
|
|
48
53
|
siteName?: string;
|
|
54
|
+
/** Enable extended thinking for Claude Opus 4.x / o3 / Qwen3 thinking models (#13) */
|
|
55
|
+
thinkingEnabled?: boolean;
|
|
56
|
+
/** Thinking token budget — only applies when thinkingEnabled=true (#13) */
|
|
57
|
+
thinkingBudget?: number;
|
|
49
58
|
}
|
|
50
59
|
/**
|
|
51
60
|
* Build the ordered model fallback chain.
|
|
@@ -83,6 +92,6 @@ export declare class ModelClient {
|
|
|
83
92
|
description: string;
|
|
84
93
|
parameters: unknown;
|
|
85
94
|
};
|
|
86
|
-
}
|
|
95
|
+
}>, abortSignal?: AbortSignal): AsyncGenerator<ChatChunk>;
|
|
87
96
|
}
|
|
88
97
|
//# sourceMappingURL=ModelClient.d.ts.map
|
|
@@ -127,7 +127,7 @@ export class ModelClient {
|
|
|
127
127
|
* chunk and returns — the caller (AgentRunner) decides whether to rotate.
|
|
128
128
|
* Also reports success/failure to the ModelRegistry for tiering and cooldown.
|
|
129
129
|
*/
|
|
130
|
-
async *stream(messages, tools) {
|
|
130
|
+
async *stream(messages, tools, abortSignal) {
|
|
131
131
|
const t0 = Date.now();
|
|
132
132
|
let hadError = false;
|
|
133
133
|
const headers = {
|
|
@@ -139,16 +139,68 @@ export class ModelClient {
|
|
|
139
139
|
headers["HTTP-Referer"] = this.cfg.siteUrl;
|
|
140
140
|
if (this.cfg.siteName)
|
|
141
141
|
headers["X-Title"] = this.cfg.siteName;
|
|
142
|
+
// #13: Detect thinking-capable models
|
|
143
|
+
const THINKING_MODELS = new Set([
|
|
144
|
+
"anthropic/claude-opus-4.7", "anthropic/claude-opus-4.7-fast",
|
|
145
|
+
"anthropic/claude-opus-4.6", "anthropic/claude-opus-4.6-fast",
|
|
146
|
+
"anthropic/claude-opus-4.5", "anthropic/claude-opus-4",
|
|
147
|
+
"openai/o3", "openai/o3-pro", "openai/o3-mini",
|
|
148
|
+
"openai/o4", "openai/o4-mini",
|
|
149
|
+
]);
|
|
150
|
+
const isThinkingModel = THINKING_MODELS.has(this.cfg.model) || /thinking/i.test(this.cfg.model);
|
|
151
|
+
const useThinking = this.cfg.thinkingEnabled && isThinkingModel;
|
|
152
|
+
const isOSeries = /openai\/o[34]/i.test(this.cfg.model);
|
|
153
|
+
const isAnthropicModel = this.cfg.model.startsWith("anthropic/") ||
|
|
154
|
+
this.cfg.baseUrl.includes("anthropic.com");
|
|
155
|
+
// Build request body
|
|
142
156
|
const body = {
|
|
143
157
|
model: this.cfg.model,
|
|
144
|
-
messages,
|
|
145
158
|
max_tokens: this.cfg.maxTokens,
|
|
146
|
-
temperature: this.cfg.temperature,
|
|
147
159
|
stream: true,
|
|
148
160
|
};
|
|
161
|
+
// #13: Temperature handling — o-series does not support temperature
|
|
162
|
+
if (!isOSeries) {
|
|
163
|
+
body.temperature = useThinking ? 1.0 : this.cfg.temperature; // thinking requires 1.0
|
|
164
|
+
}
|
|
165
|
+
// #15: Prompt caching for Anthropic — extract system message, add cache_control
|
|
166
|
+
if (isAnthropicModel) {
|
|
167
|
+
const sysMsg = messages.find(m => m.role === "system");
|
|
168
|
+
const rest = messages.filter(m => m.role !== "system");
|
|
169
|
+
if (sysMsg && typeof sysMsg.content === "string" && sysMsg.content.length > 512) {
|
|
170
|
+
// Cache the system prompt (saves up to 90% on repeated calls)
|
|
171
|
+
body.system = [{
|
|
172
|
+
type: "text",
|
|
173
|
+
text: sysMsg.content,
|
|
174
|
+
cache_control: { type: "ephemeral" },
|
|
175
|
+
}];
|
|
176
|
+
body.messages = rest;
|
|
177
|
+
}
|
|
178
|
+
else {
|
|
179
|
+
body.messages = messages;
|
|
180
|
+
}
|
|
181
|
+
// #13: Extended thinking for Claude Opus 4.x
|
|
182
|
+
if (useThinking) {
|
|
183
|
+
body.thinking = { type: "enabled", budget_tokens: this.cfg.thinkingBudget ?? 8000 };
|
|
184
|
+
headers["anthropic-beta"] = "thinking-v1";
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
else {
|
|
188
|
+
body.messages = messages;
|
|
189
|
+
}
|
|
190
|
+
// #13: o-series reasoning effort
|
|
191
|
+
if (isOSeries && useThinking) {
|
|
192
|
+
body.reasoning_effort = "high";
|
|
193
|
+
}
|
|
149
194
|
if (tools && tools.length > 0) {
|
|
150
|
-
|
|
195
|
+
// strict: true enforces valid JSON on GPT-4o+ and GPT-5.x
|
|
196
|
+
// Skip strict mode for o-series (not supported) and thinking models
|
|
197
|
+
body.tools = tools.map(t => ({
|
|
198
|
+
...t,
|
|
199
|
+
function: isOSeries ? t.function : { ...t.function, strict: true },
|
|
200
|
+
}));
|
|
151
201
|
body.tool_choice = "auto";
|
|
202
|
+
// Disable parallel tool calls — prevents race conditions in tool_call_id map
|
|
203
|
+
body.parallel_tool_calls = false;
|
|
152
204
|
}
|
|
153
205
|
const MAX_RETRIES = 2;
|
|
154
206
|
const RETRY_STATUSES = new Set([429, 500, 502, 503, 504]);
|
|
@@ -156,14 +208,23 @@ export class ModelClient {
|
|
|
156
208
|
let lastError = "";
|
|
157
209
|
for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) {
|
|
158
210
|
try {
|
|
211
|
+
// #25: Combine user abort signal with 120s timeout
|
|
212
|
+
const timeoutSignal = AbortSignal.timeout(120_000);
|
|
213
|
+
const combinedSignal = abortSignal
|
|
214
|
+
? AbortSignal.any([abortSignal, timeoutSignal])
|
|
215
|
+
: timeoutSignal;
|
|
159
216
|
res = await fetch(`${this.cfg.baseUrl}/chat/completions`, {
|
|
160
217
|
method: "POST",
|
|
161
218
|
headers,
|
|
162
219
|
body: JSON.stringify(body),
|
|
163
|
-
signal:
|
|
220
|
+
signal: combinedSignal,
|
|
164
221
|
});
|
|
165
222
|
}
|
|
166
223
|
catch (e) {
|
|
224
|
+
if (e?.name === "AbortError") {
|
|
225
|
+
yield { type: "done", finish_reason: "aborted" };
|
|
226
|
+
return;
|
|
227
|
+
}
|
|
167
228
|
hadError = true;
|
|
168
229
|
lastError = `Network error: ${e.message}`;
|
|
169
230
|
if (attempt < MAX_RETRIES) {
|
|
@@ -238,6 +299,17 @@ export class ModelClient {
|
|
|
238
299
|
}
|
|
239
300
|
}
|
|
240
301
|
const finish = chunk.choices?.[0]?.finish_reason;
|
|
302
|
+
// Capture usage from final chunk (OpenAI/OpenRouter send this on finish)
|
|
303
|
+
if (chunk.usage) {
|
|
304
|
+
yield {
|
|
305
|
+
type: "done",
|
|
306
|
+
finish_reason: finish ?? "usage",
|
|
307
|
+
usage: {
|
|
308
|
+
prompt_tokens: chunk.usage.prompt_tokens ?? 0,
|
|
309
|
+
completion_tokens: chunk.usage.completion_tokens ?? 0,
|
|
310
|
+
},
|
|
311
|
+
};
|
|
312
|
+
}
|
|
241
313
|
if (finish === "tool_calls" || finish === "stop") {
|
|
242
314
|
if (toolCallMap.size > 0) {
|
|
243
315
|
const tool_calls = [...toolCallMap.values()].map(tc => ({
|
|
@@ -248,7 +320,8 @@ export class ModelClient {
|
|
|
248
320
|
yield { type: "tool_call", tool_calls };
|
|
249
321
|
toolCallMap.clear();
|
|
250
322
|
}
|
|
251
|
-
|
|
323
|
+
if (!chunk.usage)
|
|
324
|
+
yield { type: "done", finish_reason: finish };
|
|
252
325
|
}
|
|
253
326
|
}
|
|
254
327
|
}
|
|
@@ -9,6 +9,7 @@
|
|
|
9
9
|
* the provider; concurrency is capped at Math.min(maxAgents, os.cpus().length).
|
|
10
10
|
*/
|
|
11
11
|
import type { ModelRegistry } from "../models/ModelRegistry.js";
|
|
12
|
+
import type { ContextStore } from "../session/ContextStore.js";
|
|
12
13
|
export interface SwarmConfig {
|
|
13
14
|
/** Maximum parallel workers (hard cap: 5). Default: min(cpuCount, 3). */
|
|
14
15
|
maxAgents?: number;
|
|
@@ -27,11 +28,10 @@ export interface SubTaskResult {
|
|
|
27
28
|
* Tuned so "check ETH price" ≈ 10, "analyze ETH and BTC then predict" ≈ 55.
|
|
28
29
|
*/
|
|
29
30
|
export declare function scoreComplexity(prompt: string): number;
|
|
30
|
-
/**
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
export declare function decompose(prompt: string, maxTasks: number): string[];
|
|
31
|
+
/** Original heuristic decomposer — used as fallback when LLM planner fails */
|
|
32
|
+
export declare function decomposeHeuristic(prompt: string, maxTasks: number): string[];
|
|
33
|
+
/** Exported for tests — heuristic only, no model call */
|
|
34
|
+
export declare const decompose: typeof decomposeHeuristic;
|
|
35
35
|
export declare class SwarmRouter {
|
|
36
36
|
private maxAgents;
|
|
37
37
|
private complexityThreshold;
|
|
@@ -53,7 +53,7 @@ export declare class SwarmRouter {
|
|
|
53
53
|
* @param systemPrompt - Current system prompt (passed to each sub-agent + reviewer)
|
|
54
54
|
* @param onProgress - Called as each sub-task completes
|
|
55
55
|
*/
|
|
56
|
-
run(prompt: string, systemPrompt: string, onProgress: (result: SubTaskResult, remaining: number) => void): Promise<{
|
|
56
|
+
run(prompt: string, systemPrompt: string, onProgress: (result: SubTaskResult, remaining: number) => void, contextStore?: ContextStore): Promise<{
|
|
57
57
|
synthesis: string;
|
|
58
58
|
subResults: SubTaskResult[];
|
|
59
59
|
}>;
|