@jellyos/agent 0.1.3 → 0.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +9 -9
- package/README.npm.md +212 -0
- package/bin/jellyos-mcp +26 -0
- package/dist/api/ExtensionAPI.d.ts +6 -0
- package/dist/api/Registry.js +3 -1
- package/dist/cli.js +117 -42
- package/dist/index.d.ts +24 -1
- package/dist/index.js +19 -2
- package/dist/mcp/entry.d.ts +2 -0
- package/dist/mcp/entry.js +71 -0
- package/dist/mcp/server.d.ts +31 -0
- package/dist/mcp/server.js +128 -0
- package/dist/models/CostTracker.d.ts +66 -0
- package/dist/models/CostTracker.js +148 -0
- package/dist/models/ModelRegistry.d.ts +157 -0
- package/dist/models/ModelRegistry.js +496 -0
- package/dist/models/index.d.ts +5 -0
- package/dist/models/index.js +3 -0
- package/dist/runner/AgentRunner.d.ts +23 -2
- package/dist/runner/AgentRunner.js +264 -24
- package/dist/runner/ModelClient.d.ts +26 -6
- package/dist/runner/ModelClient.js +147 -28
- package/dist/runner/SwarmRouter.d.ts +10 -7
- package/dist/runner/SwarmRouter.js +85 -28
- package/dist/runner/ToolDispatcher.d.ts +10 -0
- package/dist/runner/ToolDispatcher.js +106 -2
- package/dist/scheduler/AgentScheduler.d.ts +118 -0
- package/dist/scheduler/AgentScheduler.js +253 -0
- package/dist/session/ContextStore.d.ts +96 -0
- package/dist/session/ContextStore.js +207 -0
- package/dist/session/GoalManager.d.ts +101 -0
- package/dist/session/GoalManager.js +167 -0
- package/dist/session/MemoryStore.d.ts +48 -0
- package/dist/session/MemoryStore.js +166 -0
- package/dist/session/SessionManager.d.ts +45 -4
- package/dist/session/SessionManager.js +151 -8
- package/dist/telemetry/Tracer.d.ts +48 -0
- package/dist/telemetry/Tracer.js +102 -0
- package/dist/tests/ContextStore.test.d.ts +2 -0
- package/dist/tests/ContextStore.test.js +74 -0
- package/dist/tests/ModelRegistry.test.d.ts +2 -0
- package/dist/tests/ModelRegistry.test.js +69 -0
- package/dist/tests/SessionManager.test.d.ts +2 -0
- package/dist/tests/SessionManager.test.js +108 -0
- package/dist/tests/TechnicalAnalysis.test.d.ts +2 -0
- package/dist/tests/TechnicalAnalysis.test.js +109 -0
- package/dist/tools/MarketSentiment.d.ts +166 -0
- package/dist/tools/MarketSentiment.js +209 -0
- package/dist/tools/NewsSentiment.d.ts +67 -0
- package/dist/tools/NewsSentiment.js +226 -0
- package/dist/tools/PriceFeed.d.ts +105 -0
- package/dist/tools/PriceFeed.js +282 -0
- package/dist/tools/TechnicalAnalysis.d.ts +110 -0
- package/dist/tools/TechnicalAnalysis.js +357 -0
- package/dist/tools/index.d.ts +7 -0
- package/dist/tools/index.js +4 -0
- package/dist/tui/App.d.ts +7 -5
- package/dist/tui/App.js +350 -65
- package/dist/tui/REPL.d.ts +2 -1
- package/dist/tui/REPL.js +11 -6
- package/dist/tui/StatusBar.js +1 -1
- package/package.json +9 -4
- package/dist/api/ExtensionAPI.d.ts.map +0 -1
- package/dist/api/ExtensionAPI.js.map +0 -1
- package/dist/api/Registry.d.ts.map +0 -1
- package/dist/api/Registry.js.map +0 -1
- package/dist/cli.d.ts.map +0 -1
- package/dist/cli.js.map +0 -1
- package/dist/index.d.ts.map +0 -1
- package/dist/index.js.map +0 -1
- package/dist/loader.d.ts.map +0 -1
- package/dist/loader.js.map +0 -1
- package/dist/runner/AgentRunner.d.ts.map +0 -1
- package/dist/runner/AgentRunner.js.map +0 -1
- package/dist/runner/ModelClient.d.ts.map +0 -1
- package/dist/runner/ModelClient.js.map +0 -1
- package/dist/runner/SwarmRouter.d.ts.map +0 -1
- package/dist/runner/SwarmRouter.js.map +0 -1
- package/dist/runner/ToolDispatcher.d.ts.map +0 -1
- package/dist/runner/ToolDispatcher.js.map +0 -1
- package/dist/session/SessionManager.d.ts.map +0 -1
- package/dist/session/SessionManager.js.map +0 -1
- package/dist/tui/App.d.ts.map +0 -1
- package/dist/tui/App.js.map +0 -1
- package/dist/tui/REPL.d.ts.map +0 -1
- package/dist/tui/REPL.js.map +0 -1
- package/dist/tui/StatusBar.d.ts.map +0 -1
- package/dist/tui/StatusBar.js.map +0 -1
- package/dist/tui/theme.d.ts.map +0 -1
- package/dist/tui/theme.js.map +0 -1
|
@@ -7,9 +7,39 @@
|
|
|
7
7
|
* 4. Emits events so the TUI can render incrementally
|
|
8
8
|
*/
|
|
9
9
|
import { ModelClient, resolveModelChain, } from "./ModelClient.js";
|
|
10
|
-
import { ToolDispatcher } from "./ToolDispatcher.js";
|
|
10
|
+
import { ToolDispatcher, forecastContextGrowth } from "./ToolDispatcher.js";
|
|
11
11
|
import { SwarmRouter } from "./SwarmRouter.js";
|
|
12
|
+
import { priceFeed } from "../tools/PriceFeed.js";
|
|
13
|
+
import { newsFeed } from "../tools/NewsSentiment.js";
|
|
14
|
+
import { Tracer } from "../telemetry/Tracer.js";
|
|
12
15
|
const MAX_TOOL_ROUNDS = 12;
|
|
16
|
+
const REFLECT_AT_ROUND = 6;
|
|
17
|
+
function detectTaskType(message) {
|
|
18
|
+
const m = message.toLowerCase();
|
|
19
|
+
if (/\bhow much|price of|worth|cost of|current price\b/.test(m))
|
|
20
|
+
return "price_check";
|
|
21
|
+
if (/\brsi|macd|bollinger|technical|chart|candle|ohlcv\b/.test(m))
|
|
22
|
+
return "ta_analysis";
|
|
23
|
+
if (/\bcode|script|write|implement|function|typescript|python\b/.test(m))
|
|
24
|
+
return "code";
|
|
25
|
+
if (/\bpredict|forecast|will.*price|going to|expect.*price\b/.test(m))
|
|
26
|
+
return "prediction";
|
|
27
|
+
if (/\bnews|sentiment|latest|headlines|today.*market\b/.test(m))
|
|
28
|
+
return "news_summary";
|
|
29
|
+
if (/\bstrategy|plan|portfolio|risk|position|trade\b/.test(m))
|
|
30
|
+
return "strategy";
|
|
31
|
+
return "general";
|
|
32
|
+
}
|
|
33
|
+
// Task → tier mapping: cheap tasks go to workers, deep tasks go to orchestrators
|
|
34
|
+
const TASK_TIER_MAP = {
|
|
35
|
+
price_check: "worker", // fast cheap answer: $0.02-0.10/M
|
|
36
|
+
news_summary: "worker", // simple text summarization
|
|
37
|
+
code: "worker", // qwen3-coder, deepseek are great
|
|
38
|
+
ta_analysis: "analyst", // needs math accuracy
|
|
39
|
+
general: "analyst", // balanced
|
|
40
|
+
strategy: "orchestrator", // needs deep reasoning
|
|
41
|
+
prediction: "orchestrator", // thinking model for max effect
|
|
42
|
+
};
|
|
13
43
|
/** Effect level → swarm behaviour config */
|
|
14
44
|
const EFFECT_SWARM = {
|
|
15
45
|
eco: { threshold: 999, maxAgents: 0 }, // never swarm
|
|
@@ -23,22 +53,36 @@ export class AgentRunner {
|
|
|
23
53
|
onEvent;
|
|
24
54
|
sessionCtx;
|
|
25
55
|
effectLevel;
|
|
56
|
+
goalManager;
|
|
57
|
+
contextStore;
|
|
26
58
|
modelChain;
|
|
27
59
|
dispatcher;
|
|
28
60
|
swarmRouter;
|
|
29
|
-
|
|
61
|
+
modelRegistry;
|
|
62
|
+
costTracker;
|
|
63
|
+
abortController = null;
|
|
64
|
+
/** #25: Cancel the current in-flight stream immediately */
|
|
65
|
+
abort() {
|
|
66
|
+
this.abortController?.abort();
|
|
67
|
+
this.abortController = null;
|
|
68
|
+
}
|
|
69
|
+
constructor(registry, session, onEvent, sessionCtx, effectLevel = "normal", modelReg, costTracker, goalManager, contextStore) {
|
|
30
70
|
this.registry = registry;
|
|
31
71
|
this.session = session;
|
|
32
72
|
this.onEvent = onEvent;
|
|
33
73
|
this.sessionCtx = sessionCtx;
|
|
34
74
|
this.effectLevel = effectLevel;
|
|
35
|
-
this.
|
|
75
|
+
this.goalManager = goalManager;
|
|
76
|
+
this.contextStore = contextStore;
|
|
77
|
+
this.modelRegistry = modelReg;
|
|
78
|
+
this.costTracker = costTracker;
|
|
79
|
+
this.modelChain = resolveModelChain(modelReg);
|
|
36
80
|
this.dispatcher = new ToolDispatcher(registry);
|
|
37
81
|
const sc = EFFECT_SWARM[effectLevel] ?? EFFECT_SWARM["normal"];
|
|
38
82
|
this.swarmRouter = new SwarmRouter({
|
|
39
83
|
maxAgents: sc.maxAgents,
|
|
40
84
|
complexityThreshold: sc.threshold,
|
|
41
|
-
});
|
|
85
|
+
}, modelReg);
|
|
42
86
|
}
|
|
43
87
|
/**
|
|
44
88
|
* Live reconfigure effect level without recreating the runner.
|
|
@@ -47,30 +91,115 @@ export class AgentRunner {
|
|
|
47
91
|
*/
|
|
48
92
|
setEffectLevel(level) {
|
|
49
93
|
this.effectLevel = level;
|
|
50
|
-
|
|
51
|
-
this.modelChain = resolveModelChain();
|
|
94
|
+
this.modelChain = resolveModelChain(this.modelRegistry);
|
|
52
95
|
const sc = EFFECT_SWARM[level] ?? EFFECT_SWARM["normal"];
|
|
53
96
|
this.swarmRouter = new SwarmRouter({
|
|
54
97
|
maxAgents: sc.maxAgents,
|
|
55
98
|
complexityThreshold: sc.threshold,
|
|
56
|
-
});
|
|
99
|
+
}, this.modelRegistry);
|
|
57
100
|
}
|
|
58
101
|
/** Run one user turn — may invoke multiple tool rounds and model fallbacks internally */
|
|
59
102
|
async run(userMessage) {
|
|
60
|
-
//
|
|
103
|
+
// #30: Start trace for this turn
|
|
104
|
+
const sessionId = `jelly-${Date.now().toString(36)}`;
|
|
105
|
+
const tracer = new Tracer(sessionId, userMessage);
|
|
106
|
+
// 1. Fire before_agent_start hooks
|
|
61
107
|
await this.registry.fireHook("before_agent_start", this.sessionCtx);
|
|
62
|
-
// 2.
|
|
63
|
-
this.
|
|
64
|
-
|
|
108
|
+
// 2. #38: Rebuild dynamic system prompt each turn
|
|
109
|
+
const basePrompt = this.registry.getSystemPrompt();
|
|
110
|
+
const dynamicSuffix = this.buildDynamicSystemSuffix();
|
|
111
|
+
this.session.setSystemPrompt(basePrompt + dynamicSuffix);
|
|
112
|
+
// 3. #40/#32: Pre-flight context pressure check — smart compact if needed
|
|
113
|
+
const pressure = this.session.getContextPressure();
|
|
114
|
+
if (pressure.pct >= 85 && pressure.pct < 95) {
|
|
115
|
+
// #32: Try tier-2 summarization with cheap model before hard-dropping turns
|
|
116
|
+
await this.session.summarizeOldTurns(async (messages) => {
|
|
117
|
+
const chain = resolveModelChain(this.modelRegistry);
|
|
118
|
+
// Use cheapest available model for summarization (worker or free tier)
|
|
119
|
+
const summaryCfg = chain[chain.length - 1] ?? chain[0];
|
|
120
|
+
const client = new ModelClient(summaryCfg, this.modelRegistry);
|
|
121
|
+
const preview = messages
|
|
122
|
+
.map(m => `${m.role}: ${typeof m.content === "string" ? m.content.slice(0, 150) : "[tool call]"}`)
|
|
123
|
+
.join("\n");
|
|
124
|
+
let out = "";
|
|
125
|
+
for await (const chunk of client.stream([
|
|
126
|
+
{ role: "system", content: "Summarize the following conversation in 3-5 bullet points. Be specific about prices, decisions, and findings." },
|
|
127
|
+
{ role: "user", content: preview },
|
|
128
|
+
], [])) {
|
|
129
|
+
if (chunk.type === "delta" && chunk.text)
|
|
130
|
+
out += chunk.text;
|
|
131
|
+
}
|
|
132
|
+
return out || "(summary unavailable)";
|
|
133
|
+
});
|
|
134
|
+
}
|
|
135
|
+
else if (pressure.pct >= 95) {
|
|
136
|
+
this.session.forceCompact();
|
|
137
|
+
}
|
|
138
|
+
// 4. #33: Guard swarm against insufficient context headroom
|
|
65
139
|
if (this.swarmRouter.shouldSwarm(userMessage)) {
|
|
140
|
+
if (!this.session.getContextPressure().turboReady) {
|
|
141
|
+
this.onEvent({ type: "text_delta", text: "\u26a1 Compacting context for turbo mode...\n" });
|
|
142
|
+
this.session.forceCompact();
|
|
143
|
+
}
|
|
66
144
|
await this.runSwarm(userMessage);
|
|
67
145
|
return;
|
|
68
146
|
}
|
|
69
|
-
//
|
|
70
|
-
|
|
71
|
-
|
|
147
|
+
// 5. #16: Inject live market context into the user message
|
|
148
|
+
const enriched = await this.buildLiveContext(userMessage);
|
|
149
|
+
this.session.addMessage({ role: "user", content: enriched });
|
|
150
|
+
await this.runSingleAgent(userMessage, tracer);
|
|
151
|
+
tracer.flush("ok");
|
|
72
152
|
this.onEvent({ type: "turn_done" });
|
|
73
153
|
}
|
|
154
|
+
// ── #16: Live market context injection ─────────────────────────────────────
|
|
155
|
+
async buildLiveContext(message) {
|
|
156
|
+
const parts = [];
|
|
157
|
+
// Extract ticker symbols mentioned in the message
|
|
158
|
+
const tickerRe = /\b(BTC|ETH|SOL|BNB|MATIC|ARB|OP|AVAX|LINK|UNI|DOGE|XRP|ADA|DOT|ATOM|NEAR|SUI|APT|PEPE|AAVE|WIF|BONK)\b/gi;
|
|
159
|
+
const mentioned = [...new Set((message.match(tickerRe) ?? []).map(s => s.toLowerCase()))];
|
|
160
|
+
if (mentioned.length > 0) {
|
|
161
|
+
const ticks = priceFeed.getMultiple(mentioned);
|
|
162
|
+
if (ticks.length > 0) {
|
|
163
|
+
parts.push("Current prices: " + ticks.map(t => priceFeed.formatPrice(t)).join(" | "));
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
// News sentiment badge if message is analysis/sentiment related
|
|
167
|
+
if (/sentiment|news|market|mood|bullish|bearish|fear|greed/i.test(message)) {
|
|
168
|
+
const badge = newsFeed.statusBadge();
|
|
169
|
+
if (badge && badge !== "📰 ?")
|
|
170
|
+
parts.push(`News: ${badge}`);
|
|
171
|
+
}
|
|
172
|
+
if (parts.length === 0)
|
|
173
|
+
return message;
|
|
174
|
+
return `<live_context>\n${parts.join("\n")}\n</live_context>\n\n${message}`;
|
|
175
|
+
}
|
|
176
|
+
// ── #38: Dynamic system prompt suffix ──────────────────────────────────────
|
|
177
|
+
buildDynamicSystemSuffix() {
|
|
178
|
+
const sections = [];
|
|
179
|
+
// Active goals
|
|
180
|
+
const goals = this.goalManager?.getActive() ?? [];
|
|
181
|
+
if (goals.length > 0) {
|
|
182
|
+
sections.push(`\n## Active Goals\n${goals.map(g => `- [${g.id}] ${g.text}`).join("\n")}`);
|
|
183
|
+
}
|
|
184
|
+
// Active task context references
|
|
185
|
+
const activeTasks = this.contextStore?.getActiveTasks() ?? [];
|
|
186
|
+
if (activeTasks.length > 0) {
|
|
187
|
+
sections.push(`\n## Saved Task Context\n` +
|
|
188
|
+
activeTasks.map(t => this.contextStore.getReference(t.taskId)).join("\n"));
|
|
189
|
+
}
|
|
190
|
+
// Context pressure advisory
|
|
191
|
+
const pressure = this.session.getContextPressure();
|
|
192
|
+
if (pressure.level === "red" || pressure.level === "critical") {
|
|
193
|
+
sections.push(`\n## ⚠ Context Window at ${pressure.pct}%\n` +
|
|
194
|
+
`Be concise. Prefer short summaries. Use read_task_context() for historical data rather than repeating it. ` +
|
|
195
|
+
`${pressure.turboReady ? "" : "Swarm mode is temporarily paused to preserve headroom."}`);
|
|
196
|
+
}
|
|
197
|
+
// Effect level advisory
|
|
198
|
+
if (this.effectLevel === "eco") {
|
|
199
|
+
sections.push("\n## Mode: ECO\nBe brief. Minimize tool calls. Prefer one tool per response.");
|
|
200
|
+
}
|
|
201
|
+
return sections.join("");
|
|
202
|
+
}
|
|
74
203
|
// ── Swarm path ─────────────────────────────────────────────────────────────
|
|
75
204
|
async runSwarm(userMessage) {
|
|
76
205
|
const systemPrompt = this.registry.getSystemPrompt();
|
|
@@ -82,7 +211,7 @@ export class AgentRunner {
|
|
|
82
211
|
ms: result.ms,
|
|
83
212
|
remaining,
|
|
84
213
|
});
|
|
85
|
-
});
|
|
214
|
+
}, this.contextStore);
|
|
86
215
|
this.onEvent({ type: "swarm_review", subCount: subResults.length });
|
|
87
216
|
// Stream reviewer synthesis token-by-token (already complete — re-emit as deltas)
|
|
88
217
|
for (const ch of synthesis) {
|
|
@@ -94,25 +223,48 @@ export class AgentRunner {
|
|
|
94
223
|
this.onEvent({ type: "turn_done" });
|
|
95
224
|
}
|
|
96
225
|
// ── Single-agent path (also used for each sub-task in turbo/max) ────────────
|
|
97
|
-
async runSingleAgent() {
|
|
226
|
+
async runSingleAgent(userMessage, tracer) {
|
|
98
227
|
const openAITools = this.registry.toOpenAITools();
|
|
228
|
+
const t0 = Date.now();
|
|
229
|
+
this.abortController = new AbortController();
|
|
230
|
+
const abortSignal = this.abortController.signal;
|
|
99
231
|
let rounds = 0;
|
|
232
|
+
// #37: Route to appropriate model tier based on task type
|
|
233
|
+
let taskModelChain = this.modelChain;
|
|
234
|
+
if (userMessage && this.modelRegistry) {
|
|
235
|
+
const taskType = detectTaskType(userMessage);
|
|
236
|
+
const targetTier = TASK_TIER_MAP[taskType];
|
|
237
|
+
// For max effect + prediction tasks, enable thinking mode
|
|
238
|
+
const useThinking = this.effectLevel === "max" && taskType === "prediction";
|
|
239
|
+
const taskModel = this.modelRegistry.pick(targetTier);
|
|
240
|
+
if (taskModel) {
|
|
241
|
+
const cfg = this.modelRegistry.buildConfig(taskModel.id, this.modelChain[0]?.maxTokens ?? 8192, this.modelChain[0]?.temperature ?? 0.7, targetTier);
|
|
242
|
+
if (cfg) {
|
|
243
|
+
if (useThinking) {
|
|
244
|
+
cfg.thinkingEnabled = true;
|
|
245
|
+
cfg.thinkingBudget = 8000;
|
|
246
|
+
}
|
|
247
|
+
taskModelChain = [cfg, ...this.modelChain.filter(m => m.model !== cfg.model)];
|
|
248
|
+
}
|
|
249
|
+
}
|
|
250
|
+
}
|
|
100
251
|
while (rounds < MAX_TOOL_ROUNDS) {
|
|
101
252
|
rounds++;
|
|
102
253
|
const messages = this.session.getMessages();
|
|
103
254
|
let assistantText = "";
|
|
104
255
|
let pendingToolCalls = [];
|
|
105
256
|
let modelError = null;
|
|
257
|
+
let usageTokens = null;
|
|
106
258
|
// Try model chain — rotate on 429/5xx
|
|
107
|
-
for (let mi = 0; mi <
|
|
108
|
-
const cfg =
|
|
109
|
-
const client = new ModelClient(cfg);
|
|
259
|
+
for (let mi = 0; mi < taskModelChain.length; mi++) {
|
|
260
|
+
const cfg = taskModelChain[mi];
|
|
261
|
+
const client = new ModelClient(cfg, this.modelRegistry);
|
|
110
262
|
assistantText = "";
|
|
111
263
|
pendingToolCalls = [];
|
|
112
264
|
modelError = null;
|
|
113
265
|
let gotError = false;
|
|
114
266
|
let isRateLimit = false;
|
|
115
|
-
for await (const chunk of client.stream(messages, openAITools)) {
|
|
267
|
+
for await (const chunk of client.stream(messages, openAITools, abortSignal)) {
|
|
116
268
|
if (chunk.type === "delta" && chunk.text) {
|
|
117
269
|
assistantText += chunk.text;
|
|
118
270
|
this.onEvent({ type: "text_delta", text: chunk.text });
|
|
@@ -120,9 +272,18 @@ export class AgentRunner {
|
|
|
120
272
|
else if (chunk.type === "tool_call" && chunk.tool_calls) {
|
|
121
273
|
pendingToolCalls = chunk.tool_calls;
|
|
122
274
|
}
|
|
275
|
+
else if (chunk.type === "done" && chunk.finish_reason === "aborted") {
|
|
276
|
+
// #25: Stream was aborted by user — clean exit
|
|
277
|
+
this.onEvent({ type: "turn_done" });
|
|
278
|
+
return;
|
|
279
|
+
}
|
|
280
|
+
else if (chunk.type === "done" && chunk.usage) {
|
|
281
|
+
usageTokens = chunk.usage;
|
|
282
|
+
}
|
|
123
283
|
else if (chunk.type === "error") {
|
|
124
284
|
modelError = chunk.error ?? "Unknown model error";
|
|
125
285
|
gotError = true;
|
|
286
|
+
this.costTracker?.recordError(); // #1: track errors
|
|
126
287
|
// Rotate on 429 rate-limit OR any 5xx server error
|
|
127
288
|
isRateLimit = /429|rate.?limit/i.test(modelError)
|
|
128
289
|
|| (chunk.status !== undefined && chunk.status >= 500);
|
|
@@ -132,7 +293,11 @@ export class AgentRunner {
|
|
|
132
293
|
if (!gotError)
|
|
133
294
|
break; // success — use this model's output
|
|
134
295
|
// Rotate to next model on rate-limit or server errors
|
|
135
|
-
const nextCfg =
|
|
296
|
+
const nextCfg = taskModelChain[mi + 1];
|
|
297
|
+
// Save any partial text the user already saw before rotating
|
|
298
|
+
if (assistantText.trim()) {
|
|
299
|
+
this.session.addMessage({ role: "assistant", content: assistantText + "\n\n[connection interrupted — retrying with fallback model]" });
|
|
300
|
+
}
|
|
136
301
|
if (nextCfg && isRateLimit) {
|
|
137
302
|
this.onEvent({
|
|
138
303
|
type: "model_fallback",
|
|
@@ -142,7 +307,10 @@ export class AgentRunner {
|
|
|
142
307
|
});
|
|
143
308
|
continue;
|
|
144
309
|
}
|
|
145
|
-
// Non-recoverable error or no more fallbacks
|
|
310
|
+
// Non-recoverable error or no more fallbacks — commit partial text if any
|
|
311
|
+
if (assistantText.trim()) {
|
|
312
|
+
this.session.addMessage({ role: "assistant", content: assistantText });
|
|
313
|
+
}
|
|
146
314
|
this.onEvent({ type: "error", message: modelError ?? "Model error" });
|
|
147
315
|
return;
|
|
148
316
|
}
|
|
@@ -150,6 +318,22 @@ export class AgentRunner {
|
|
|
150
318
|
this.onEvent({ type: "error", message: modelError });
|
|
151
319
|
return;
|
|
152
320
|
}
|
|
321
|
+
// #1: Record cost for this model call
|
|
322
|
+
if (this.costTracker && !modelError) {
|
|
323
|
+
const cfg = this.modelChain[0];
|
|
324
|
+
if (usageTokens) {
|
|
325
|
+
this.costTracker.record(cfg.model, usageTokens.prompt_tokens, usageTokens.completion_tokens, Date.now() - t0);
|
|
326
|
+
}
|
|
327
|
+
else {
|
|
328
|
+
// Fallback: estimate from char counts (~4 chars per token)
|
|
329
|
+
const allMsgs = this.session.getMessages();
|
|
330
|
+
const promptChars = allMsgs.reduce((n, m) => n + (typeof m.content === "string" ? m.content.length : 0), 0);
|
|
331
|
+
const promptTok = Math.ceil(promptChars / 4);
|
|
332
|
+
const completeTok = Math.ceil(assistantText.length / 4);
|
|
333
|
+
this.costTracker.record(cfg.model, promptTok, completeTok, Date.now() - t0);
|
|
334
|
+
}
|
|
335
|
+
usageTokens = null; // reset for next round
|
|
336
|
+
}
|
|
153
337
|
// Save assistant turn
|
|
154
338
|
const assistantMsg = {
|
|
155
339
|
role: "assistant",
|
|
@@ -159,13 +343,69 @@ export class AgentRunner {
|
|
|
159
343
|
this.session.addMessage(assistantMsg);
|
|
160
344
|
if (pendingToolCalls.length === 0)
|
|
161
345
|
break;
|
|
162
|
-
//
|
|
346
|
+
// #9: Reflection — at mid-point, force model to assess progress
|
|
347
|
+
if (rounds === REFLECT_AT_ROUND) {
|
|
348
|
+
this.session.addMessage({
|
|
349
|
+
role: "user",
|
|
350
|
+
content: `[AGENT REFLECTION — round ${rounds}/${MAX_TOOL_ROUNDS}] ` +
|
|
351
|
+
`You have used ${rounds} tool calls. ` +
|
|
352
|
+
`Summarize what you have found so far, then decide: ` +
|
|
353
|
+
`(a) you have enough to answer — do so now, or ` +
|
|
354
|
+
`(b) you need specific additional data — state exactly what and use ONE more tool. ` +
|
|
355
|
+
`Do not call tools unless you have a clear remaining gap.`,
|
|
356
|
+
});
|
|
357
|
+
}
|
|
358
|
+
// #10: Check for tools requiring approval before dispatching
|
|
359
|
+
const approvedCalls = [];
|
|
163
360
|
for (const tc of pendingToolCalls) {
|
|
361
|
+
const toolDef = this.registry.getTool(tc.function.name);
|
|
362
|
+
if (toolDef?.requiresApproval) {
|
|
363
|
+
const approved = await new Promise((resolve) => {
|
|
364
|
+
this.onEvent({
|
|
365
|
+
type: "approval_request",
|
|
366
|
+
toolName: tc.function.name,
|
|
367
|
+
args: tc.function.arguments,
|
|
368
|
+
approve: resolve,
|
|
369
|
+
});
|
|
370
|
+
// Auto-deny after 60 seconds if no response
|
|
371
|
+
setTimeout(() => resolve(false), 60_000);
|
|
372
|
+
});
|
|
373
|
+
if (!approved) {
|
|
374
|
+
// Inject a denial message so model knows it was blocked
|
|
375
|
+
this.session.addMessage({
|
|
376
|
+
role: "tool",
|
|
377
|
+
content: `Tool "${tc.function.name}" was denied by user. Do not retry without asking explicitly.`,
|
|
378
|
+
name: tc.function.name,
|
|
379
|
+
tool_call_id: tc.id,
|
|
380
|
+
});
|
|
381
|
+
this.onEvent({ type: "tool_done", name: tc.function.name, result: "[DENIED by user]", isError: true });
|
|
382
|
+
continue;
|
|
383
|
+
}
|
|
384
|
+
}
|
|
385
|
+
approvedCalls.push(tc);
|
|
386
|
+
}
|
|
387
|
+
for (const tc of approvedCalls) {
|
|
164
388
|
this.onEvent({ type: "tool_start", name: tc.function.name, args: tc.function.arguments });
|
|
389
|
+
tracer?.startSpan(`tool:${tc.function.name}`, { args: tc.function.arguments.slice(0, 100) });
|
|
390
|
+
}
|
|
391
|
+
if (approvedCalls.length === 0)
|
|
392
|
+
continue;
|
|
393
|
+
// #40: Pre-tool context budget forecast — compact proactively if needed
|
|
394
|
+
const forecastedGrowth = forecastContextGrowth(approvedCalls);
|
|
395
|
+
const currentChars = this.session.charCount();
|
|
396
|
+
const forecastedPct = (currentChars + forecastedGrowth) / 80_000 * 100;
|
|
397
|
+
if (forecastedPct > 90) {
|
|
398
|
+
// Pre-compact before tools add more content
|
|
399
|
+
this.session.forceCompact();
|
|
400
|
+
this.onEvent({ type: "text_delta", text: `\n⚡ Pre-compacting context (forecast: ${forecastedPct.toFixed(0)}% after tools)\n` });
|
|
165
401
|
}
|
|
166
|
-
const results = await this.dispatcher.dispatch(
|
|
402
|
+
const results = await this.dispatcher.dispatch(approvedCalls);
|
|
167
403
|
for (const r of results) {
|
|
168
404
|
this.onEvent({ type: "tool_done", name: r.name, result: r.content, isError: r.isError });
|
|
405
|
+
// #30: End tool span
|
|
406
|
+
const toolSpan = tracer?.trace?.spans?.slice().reverse().find((s) => s.name === `tool:${r.name}` && !s.durationMs);
|
|
407
|
+
if (toolSpan)
|
|
408
|
+
tracer?.endSpan(toolSpan.spanId, r.isError ? "error" : "ok", { resultLen: r.content.length });
|
|
169
409
|
this.session.addMessage({
|
|
170
410
|
role: "tool",
|
|
171
411
|
content: r.content,
|
|
@@ -5,10 +5,15 @@
|
|
|
5
5
|
* OpenRouter > Anthropic compat > OpenAI > local (ollama/lm-studio)
|
|
6
6
|
*
|
|
7
7
|
* Model rotation: resolveModelChain() returns up to 5 configs — the AgentRunner
|
|
8
|
-
* walks the chain on 429 (rate limit) or 5xx errors,
|
|
8
|
+
* walks the chain on 429 (rate limit) or 5xx errors, with exponential backoff
|
|
9
|
+
* (up to 2 retries per model) before falling through.
|
|
10
|
+
*
|
|
11
|
+
* When a ModelRegistry is available, chains are dynamically built from the
|
|
12
|
+
* tiered pool, with per-model performance tracking and cost estimation.
|
|
9
13
|
*
|
|
10
14
|
* All outbound, all local — no inbound ports, no server.
|
|
11
15
|
*/
|
|
16
|
+
import type { ModelRegistry } from "../models/ModelRegistry.js";
|
|
12
17
|
export interface Message {
|
|
13
18
|
role: "system" | "user" | "assistant" | "tool";
|
|
14
19
|
content: string | null;
|
|
@@ -32,6 +37,11 @@ export interface ChatChunk {
|
|
|
32
37
|
finish_reason?: string;
|
|
33
38
|
/** HTTP status code — used by AgentRunner for rate-limit detection */
|
|
34
39
|
status?: number;
|
|
40
|
+
/** Token usage reported by provider on final chunk */
|
|
41
|
+
usage?: {
|
|
42
|
+
prompt_tokens: number;
|
|
43
|
+
completion_tokens: number;
|
|
44
|
+
};
|
|
35
45
|
}
|
|
36
46
|
export interface ModelConfig {
|
|
37
47
|
baseUrl: string;
|
|
@@ -41,10 +51,17 @@ export interface ModelConfig {
|
|
|
41
51
|
temperature: number;
|
|
42
52
|
siteUrl?: string;
|
|
43
53
|
siteName?: string;
|
|
54
|
+
/** Enable extended thinking for Claude Opus 4.x / o3 / Qwen3 thinking models (#13) */
|
|
55
|
+
thinkingEnabled?: boolean;
|
|
56
|
+
/** Thinking token budget — only applies when thinkingEnabled=true (#13) */
|
|
57
|
+
thinkingBudget?: number;
|
|
44
58
|
}
|
|
45
59
|
/**
|
|
46
60
|
* Build the ordered model fallback chain.
|
|
47
61
|
*
|
|
62
|
+
* If a ModelRegistry is provided, builds from the tiered pool dynamically.
|
|
63
|
+
* Falls back to static env-var parsing otherwise.
|
|
64
|
+
*
|
|
48
65
|
* User-configurable pool: JELLY_MODEL_1 … JELLY_MODEL_5
|
|
49
66
|
* If any JELLY_MODEL_N vars are set they take priority; up to 5 are used in
|
|
50
67
|
* order. Unset slots are filled with provider-appropriate defaults.
|
|
@@ -54,16 +71,19 @@ export interface ModelConfig {
|
|
|
54
71
|
* JELLY_MODEL_2=openai/gpt-4o
|
|
55
72
|
* JELLY_MODEL_3=google/gemini-2.5-pro
|
|
56
73
|
*/
|
|
57
|
-
export declare function resolveModelChain(): ModelConfig[];
|
|
74
|
+
export declare function resolveModelChain(modelReg?: ModelRegistry): ModelConfig[];
|
|
58
75
|
/** Convenience: returns just the primary (first) model config */
|
|
59
|
-
export declare function resolveModelConfig(): ModelConfig;
|
|
76
|
+
export declare function resolveModelConfig(modelReg?: ModelRegistry): ModelConfig;
|
|
60
77
|
export declare class ModelClient {
|
|
61
78
|
private cfg;
|
|
62
|
-
|
|
79
|
+
private modelRegistry?;
|
|
80
|
+
constructor(cfg: ModelConfig, modelReg?: ModelRegistry);
|
|
63
81
|
/**
|
|
64
82
|
* Stream a chat completion. Yields ChatChunk objects.
|
|
65
|
-
*
|
|
83
|
+
* Retries up to 2 times on 429 / 5xx with exponential backoff (1s, 2s).
|
|
84
|
+
* On persistent HTTP error the generator yields a single { type: "error", status, error }
|
|
66
85
|
* chunk and returns — the caller (AgentRunner) decides whether to rotate.
|
|
86
|
+
* Also reports success/failure to the ModelRegistry for tiering and cooldown.
|
|
67
87
|
*/
|
|
68
88
|
stream(messages: Message[], tools?: Array<{
|
|
69
89
|
type: "function";
|
|
@@ -72,6 +92,6 @@ export declare class ModelClient {
|
|
|
72
92
|
description: string;
|
|
73
93
|
parameters: unknown;
|
|
74
94
|
};
|
|
75
|
-
}
|
|
95
|
+
}>, abortSignal?: AbortSignal): AsyncGenerator<ChatChunk>;
|
|
76
96
|
}
|
|
77
97
|
//# sourceMappingURL=ModelClient.d.ts.map
|