@jellyos/agent 0.1.4 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. package/README.npm.md +212 -0
  2. package/bin/jellyos-mcp +26 -0
  3. package/dist/api/ExtensionAPI.d.ts +11 -0
  4. package/dist/cli.js +127 -49
  5. package/dist/index.d.ts +15 -2
  6. package/dist/index.js +13 -3
  7. package/dist/loader.d.ts +2 -9
  8. package/dist/loader.js +2 -1
  9. package/dist/mcp/entry.d.ts +2 -0
  10. package/dist/mcp/entry.js +71 -0
  11. package/dist/mcp/server.d.ts +31 -0
  12. package/dist/mcp/server.js +128 -0
  13. package/dist/models/ModelRegistry.d.ts +12 -1
  14. package/dist/models/ModelRegistry.js +105 -9
  15. package/dist/runner/AgentRunner.d.ts +19 -2
  16. package/dist/runner/AgentRunner.js +247 -17
  17. package/dist/runner/ModelClient.d.ts +10 -1
  18. package/dist/runner/ModelClient.js +79 -6
  19. package/dist/runner/SwarmRouter.d.ts +6 -6
  20. package/dist/runner/SwarmRouter.js +73 -24
  21. package/dist/runner/ToolDispatcher.d.ts +10 -0
  22. package/dist/runner/ToolDispatcher.js +106 -2
  23. package/dist/scheduler/AgentScheduler.d.ts +118 -0
  24. package/dist/scheduler/AgentScheduler.js +253 -0
  25. package/dist/session/ContextStore.d.ts +96 -0
  26. package/dist/session/ContextStore.js +207 -0
  27. package/dist/session/GoalManager.d.ts +101 -0
  28. package/dist/session/GoalManager.js +167 -0
  29. package/dist/session/MemoryStore.d.ts +48 -0
  30. package/dist/session/MemoryStore.js +166 -0
  31. package/dist/session/SessionManager.d.ts +45 -4
  32. package/dist/session/SessionManager.js +151 -8
  33. package/dist/telemetry/Tracer.d.ts +48 -0
  34. package/dist/telemetry/Tracer.js +102 -0
  35. package/dist/tools/MarketSentiment.d.ts +166 -0
  36. package/dist/tools/MarketSentiment.js +209 -0
  37. package/dist/tools/NewsSentiment.js +40 -13
  38. package/dist/tools/PriceFeed.d.ts +2 -0
  39. package/dist/tools/PriceFeed.js +79 -27
  40. package/dist/tools/TechnicalAnalysis.d.ts +37 -0
  41. package/dist/tools/TechnicalAnalysis.js +85 -0
  42. package/dist/tui/App.d.ts +4 -3
  43. package/dist/tui/App.js +346 -119
  44. package/dist/tui/ModelSelector.d.ts +22 -0
  45. package/dist/tui/ModelSelector.js +86 -0
  46. package/dist/tui/REPL.d.ts +2 -1
  47. package/dist/tui/REPL.js +11 -6
  48. package/package.json +10 -6
  49. package/dist/api/ExtensionAPI.d.ts.map +0 -1
  50. package/dist/api/ExtensionAPI.js.map +0 -1
  51. package/dist/api/Registry.d.ts.map +0 -1
  52. package/dist/api/Registry.js.map +0 -1
  53. package/dist/cli.d.ts.map +0 -1
  54. package/dist/cli.js.map +0 -1
  55. package/dist/index.d.ts.map +0 -1
  56. package/dist/index.js.map +0 -1
  57. package/dist/loader.d.ts.map +0 -1
  58. package/dist/loader.js.map +0 -1
  59. package/dist/models/CostTracker.d.ts.map +0 -1
  60. package/dist/models/CostTracker.js.map +0 -1
  61. package/dist/models/ModelRegistry.d.ts.map +0 -1
  62. package/dist/models/ModelRegistry.js.map +0 -1
  63. package/dist/models/index.d.ts.map +0 -1
  64. package/dist/models/index.js.map +0 -1
  65. package/dist/runner/AgentRunner.d.ts.map +0 -1
  66. package/dist/runner/AgentRunner.js.map +0 -1
  67. package/dist/runner/ModelClient.d.ts.map +0 -1
  68. package/dist/runner/ModelClient.js.map +0 -1
  69. package/dist/runner/SwarmRouter.d.ts.map +0 -1
  70. package/dist/runner/SwarmRouter.js.map +0 -1
  71. package/dist/runner/ToolDispatcher.d.ts.map +0 -1
  72. package/dist/runner/ToolDispatcher.js.map +0 -1
  73. package/dist/session/SessionManager.d.ts.map +0 -1
  74. package/dist/session/SessionManager.js.map +0 -1
  75. package/dist/tools/NewsSentiment.d.ts.map +0 -1
  76. package/dist/tools/NewsSentiment.js.map +0 -1
  77. package/dist/tools/PriceFeed.d.ts.map +0 -1
  78. package/dist/tools/PriceFeed.js.map +0 -1
  79. package/dist/tools/TechnicalAnalysis.d.ts.map +0 -1
  80. package/dist/tools/TechnicalAnalysis.js.map +0 -1
  81. package/dist/tools/index.d.ts.map +0 -1
  82. package/dist/tools/index.js.map +0 -1
  83. package/dist/tui/App.d.ts.map +0 -1
  84. package/dist/tui/App.js.map +0 -1
  85. package/dist/tui/REPL.d.ts.map +0 -1
  86. package/dist/tui/REPL.js.map +0 -1
  87. package/dist/tui/StatusBar.d.ts.map +0 -1
  88. package/dist/tui/StatusBar.js.map +0 -1
  89. package/dist/tui/theme.d.ts.map +0 -1
  90. package/dist/tui/theme.js.map +0 -1
@@ -7,9 +7,39 @@
7
7
  * 4. Emits events so the TUI can render incrementally
8
8
  */
9
9
  import { ModelClient, resolveModelChain, } from "./ModelClient.js";
10
- import { ToolDispatcher } from "./ToolDispatcher.js";
10
+ import { ToolDispatcher, forecastContextGrowth } from "./ToolDispatcher.js";
11
11
  import { SwarmRouter } from "./SwarmRouter.js";
12
+ import { priceFeed } from "../tools/PriceFeed.js";
13
+ import { newsFeed } from "../tools/NewsSentiment.js";
14
+ import { Tracer } from "../telemetry/Tracer.js";
12
15
  const MAX_TOOL_ROUNDS = 12;
16
+ const REFLECT_AT_ROUND = 6;
17
+ function detectTaskType(message) {
18
+ const m = message.toLowerCase();
19
+ if (/\bhow much|price of|worth|cost of|current price\b/.test(m))
20
+ return "price_check";
21
+ if (/\brsi|macd|bollinger|technical|chart|candle|ohlcv\b/.test(m))
22
+ return "ta_analysis";
23
+ if (/\bcode|script|write|implement|function|typescript|python\b/.test(m))
24
+ return "code";
25
+ if (/\bpredict|forecast|will.*price|going to|expect.*price\b/.test(m))
26
+ return "prediction";
27
+ if (/\bnews|sentiment|latest|headlines|today.*market\b/.test(m))
28
+ return "news_summary";
29
+ if (/\bstrategy|plan|portfolio|risk|position|trade\b/.test(m))
30
+ return "strategy";
31
+ return "general";
32
+ }
33
+ // Task → tier mapping: cheap tasks go to workers, deep tasks go to orchestrators
34
+ const TASK_TIER_MAP = {
35
+ price_check: "worker", // fast cheap answer: $0.02-0.10/M
36
+ news_summary: "worker", // simple text summarization
37
+ code: "worker", // qwen3-coder, deepseek are great
38
+ ta_analysis: "analyst", // needs math accuracy
39
+ general: "analyst", // balanced
40
+ strategy: "orchestrator", // needs deep reasoning
41
+ prediction: "orchestrator", // thinking model for max effect
42
+ };
13
43
  /** Effect level → swarm behaviour config */
14
44
  const EFFECT_SWARM = {
15
45
  eco: { threshold: 999, maxAgents: 0 }, // never swarm
@@ -23,17 +53,27 @@ export class AgentRunner {
23
53
  onEvent;
24
54
  sessionCtx;
25
55
  effectLevel;
56
+ goalManager;
57
+ contextStore;
26
58
  modelChain;
27
59
  dispatcher;
28
60
  swarmRouter;
29
61
  modelRegistry;
30
62
  costTracker;
31
- constructor(registry, session, onEvent, sessionCtx, effectLevel = "normal", modelReg, costTracker) {
63
+ abortController = null;
64
+ /** #25: Cancel the current in-flight stream immediately */
65
+ abort() {
66
+ this.abortController?.abort();
67
+ this.abortController = null;
68
+ }
69
+ constructor(registry, session, onEvent, sessionCtx, effectLevel = "normal", modelReg, costTracker, goalManager, contextStore) {
32
70
  this.registry = registry;
33
71
  this.session = session;
34
72
  this.onEvent = onEvent;
35
73
  this.sessionCtx = sessionCtx;
36
74
  this.effectLevel = effectLevel;
75
+ this.goalManager = goalManager;
76
+ this.contextStore = contextStore;
37
77
  this.modelRegistry = modelReg;
38
78
  this.costTracker = costTracker;
39
79
  this.modelChain = resolveModelChain(modelReg);
@@ -60,20 +100,106 @@ export class AgentRunner {
60
100
  }
61
101
  /** Run one user turn — may invoke multiple tool rounds and model fallbacks internally */
62
102
  async run(userMessage) {
63
- // 1. Fire before_agent_start hooks extension injects live context, system prompt
103
+ // #30: Start trace for this turn
104
+ const sessionId = `jelly-${Date.now().toString(36)}`;
105
+ const tracer = new Tracer(sessionId, userMessage);
106
+ // 1. Fire before_agent_start hooks
64
107
  await this.registry.fireHook("before_agent_start", this.sessionCtx);
65
- // 2. Sync system prompt from registry (extension may have called setSystemPrompt)
66
- this.session.setSystemPrompt(this.registry.getSystemPrompt());
67
- // 3. Check swarm eligibility before adding to history
108
+ // 2. #38: Rebuild dynamic system prompt each turn
109
+ const basePrompt = this.registry.getSystemPrompt();
110
+ const dynamicSuffix = this.buildDynamicSystemSuffix();
111
+ this.session.setSystemPrompt(basePrompt + dynamicSuffix);
112
+ // 3. #40/#32: Pre-flight context pressure check — smart compact if needed
113
+ const pressure = this.session.getContextPressure();
114
+ if (pressure.pct >= 85 && pressure.pct < 95) {
115
+ // #32: Try tier-2 summarization with cheap model before hard-dropping turns
116
+ await this.session.summarizeOldTurns(async (messages) => {
117
+ const chain = resolveModelChain(this.modelRegistry);
118
+ // Use cheapest available model for summarization (worker or free tier)
119
+ const summaryCfg = chain[chain.length - 1] ?? chain[0];
120
+ const client = new ModelClient(summaryCfg, this.modelRegistry);
121
+ const preview = messages
122
+ .map(m => `${m.role}: ${typeof m.content === "string" ? m.content.slice(0, 150) : "[tool call]"}`)
123
+ .join("\n");
124
+ let out = "";
125
+ for await (const chunk of client.stream([
126
+ { role: "system", content: "Summarize the following conversation in 3-5 bullet points. Be specific about prices, decisions, and findings." },
127
+ { role: "user", content: preview },
128
+ ], [])) {
129
+ if (chunk.type === "delta" && chunk.text)
130
+ out += chunk.text;
131
+ }
132
+ return out || "(summary unavailable)";
133
+ });
134
+ }
135
+ else if (pressure.pct >= 95) {
136
+ this.session.forceCompact();
137
+ }
138
+ // 4. #33: Guard swarm against insufficient context headroom
68
139
  if (this.swarmRouter.shouldSwarm(userMessage)) {
140
+ if (!this.session.getContextPressure().turboReady) {
141
+ this.onEvent({ type: "text_delta", text: "\u26a1 Compacting context for turbo mode...\n" });
142
+ this.session.forceCompact();
143
+ }
69
144
  await this.runSwarm(userMessage);
70
145
  return;
71
146
  }
72
- // 4. Single-agent path add user message to history
73
- this.session.addMessage({ role: "user", content: userMessage });
74
- await this.runSingleAgent();
147
+ // 5. #16: Inject live market context into the user message
148
+ const enriched = await this.buildLiveContext(userMessage);
149
+ this.session.addMessage({ role: "user", content: enriched });
150
+ await this.runSingleAgent(userMessage, tracer);
151
+ tracer.flush("ok");
75
152
  this.onEvent({ type: "turn_done" });
76
153
  }
154
+ // ── #16: Live market context injection ─────────────────────────────────────
155
+ async buildLiveContext(message) {
156
+ const parts = [];
157
+ // Extract ticker symbols mentioned in the message
158
+ const tickerRe = /\b(BTC|ETH|SOL|BNB|MATIC|ARB|OP|AVAX|LINK|UNI|DOGE|XRP|ADA|DOT|ATOM|NEAR|SUI|APT|PEPE|AAVE|WIF|BONK)\b/gi;
159
+ const mentioned = [...new Set((message.match(tickerRe) ?? []).map(s => s.toLowerCase()))];
160
+ if (mentioned.length > 0) {
161
+ const ticks = priceFeed.getMultiple(mentioned);
162
+ if (ticks.length > 0) {
163
+ parts.push("Current prices: " + ticks.map(t => priceFeed.formatPrice(t)).join(" | "));
164
+ }
165
+ }
166
+ // News sentiment badge if message is analysis/sentiment related
167
+ if (/sentiment|news|market|mood|bullish|bearish|fear|greed/i.test(message)) {
168
+ const badge = newsFeed.statusBadge();
169
+ if (badge && badge !== "📰 ?")
170
+ parts.push(`News: ${badge}`);
171
+ }
172
+ if (parts.length === 0)
173
+ return message;
174
+ return `<live_context>\n${parts.join("\n")}\n</live_context>\n\n${message}`;
175
+ }
176
+ // ── #38: Dynamic system prompt suffix ──────────────────────────────────────
177
+ buildDynamicSystemSuffix() {
178
+ const sections = [];
179
+ // Active goals
180
+ const goals = this.goalManager?.getActive() ?? [];
181
+ if (goals.length > 0) {
182
+ sections.push(`\n## Active Goals\n${goals.map(g => `- [${g.id}] ${g.text}`).join("\n")}`);
183
+ }
184
+ // Active task context references
185
+ const activeTasks = this.contextStore?.getActiveTasks() ?? [];
186
+ if (activeTasks.length > 0) {
187
+ sections.push(`\n## Saved Task Context\n` +
188
+ activeTasks.map(t => this.contextStore.getReference(t.taskId)).join("\n"));
189
+ }
190
+ // Context pressure advisory
191
+ const pressure = this.session.getContextPressure();
192
+ if (pressure.level === "red" || pressure.level === "critical") {
193
+ sections.push(`\n## ⚠ Context Window at ${pressure.pct}%\n` +
194
+ `Be concise. Prefer short summaries. Use read_task_context() for historical data rather than repeating it. ` +
195
+ `${pressure.turboReady ? "" : "Swarm mode is temporarily paused to preserve headroom."}`);
196
+ }
197
+ // Effect level advisory
198
+ if (this.effectLevel === "eco") {
199
+ sections.push("\n## Mode: ECO\nBe brief. Minimize tool calls. Prefer one tool per response.");
200
+ }
201
+ return sections.join("");
202
+ }
77
203
  // ── Swarm path ─────────────────────────────────────────────────────────────
78
204
  async runSwarm(userMessage) {
79
205
  const systemPrompt = this.registry.getSystemPrompt();
@@ -85,7 +211,7 @@ export class AgentRunner {
85
211
  ms: result.ms,
86
212
  remaining,
87
213
  });
88
- });
214
+ }, this.contextStore);
89
215
  this.onEvent({ type: "swarm_review", subCount: subResults.length });
90
216
  // Stream reviewer synthesis token-by-token (already complete — re-emit as deltas)
91
217
  for (const ch of synthesis) {
@@ -97,25 +223,48 @@ export class AgentRunner {
97
223
  this.onEvent({ type: "turn_done" });
98
224
  }
99
225
  // ── Single-agent path (also used for each sub-task in turbo/max) ────────────
100
- async runSingleAgent() {
226
+ async runSingleAgent(userMessage, tracer) {
101
227
  const openAITools = this.registry.toOpenAITools();
228
+ const t0 = Date.now();
229
+ this.abortController = new AbortController();
230
+ const abortSignal = this.abortController.signal;
102
231
  let rounds = 0;
232
+ // #37: Route to appropriate model tier based on task type
233
+ let taskModelChain = this.modelChain;
234
+ if (userMessage && this.modelRegistry) {
235
+ const taskType = detectTaskType(userMessage);
236
+ const targetTier = TASK_TIER_MAP[taskType];
237
+ // For max effect + prediction tasks, enable thinking mode
238
+ const useThinking = this.effectLevel === "max" && taskType === "prediction";
239
+ const taskModel = this.modelRegistry.pick(targetTier);
240
+ if (taskModel) {
241
+ const cfg = this.modelRegistry.buildConfig(taskModel.id, this.modelChain[0]?.maxTokens ?? 8192, this.modelChain[0]?.temperature ?? 0.7, targetTier);
242
+ if (cfg) {
243
+ if (useThinking) {
244
+ cfg.thinkingEnabled = true;
245
+ cfg.thinkingBudget = 8000;
246
+ }
247
+ taskModelChain = [cfg, ...this.modelChain.filter(m => m.model !== cfg.model)];
248
+ }
249
+ }
250
+ }
103
251
  while (rounds < MAX_TOOL_ROUNDS) {
104
252
  rounds++;
105
253
  const messages = this.session.getMessages();
106
254
  let assistantText = "";
107
255
  let pendingToolCalls = [];
108
256
  let modelError = null;
257
+ let usageTokens = null;
109
258
  // Try model chain — rotate on 429/5xx
110
- for (let mi = 0; mi < this.modelChain.length; mi++) {
111
- const cfg = this.modelChain[mi];
259
+ for (let mi = 0; mi < taskModelChain.length; mi++) {
260
+ const cfg = taskModelChain[mi];
112
261
  const client = new ModelClient(cfg, this.modelRegistry);
113
262
  assistantText = "";
114
263
  pendingToolCalls = [];
115
264
  modelError = null;
116
265
  let gotError = false;
117
266
  let isRateLimit = false;
118
- for await (const chunk of client.stream(messages, openAITools)) {
267
+ for await (const chunk of client.stream(messages, openAITools, abortSignal)) {
119
268
  if (chunk.type === "delta" && chunk.text) {
120
269
  assistantText += chunk.text;
121
270
  this.onEvent({ type: "text_delta", text: chunk.text });
@@ -123,9 +272,18 @@ export class AgentRunner {
123
272
  else if (chunk.type === "tool_call" && chunk.tool_calls) {
124
273
  pendingToolCalls = chunk.tool_calls;
125
274
  }
275
+ else if (chunk.type === "done" && chunk.finish_reason === "aborted") {
276
+ // #25: Stream was aborted by user — clean exit
277
+ this.onEvent({ type: "turn_done" });
278
+ return;
279
+ }
280
+ else if (chunk.type === "done" && chunk.usage) {
281
+ usageTokens = chunk.usage;
282
+ }
126
283
  else if (chunk.type === "error") {
127
284
  modelError = chunk.error ?? "Unknown model error";
128
285
  gotError = true;
286
+ this.costTracker?.recordError(); // #1: track errors
129
287
  // Rotate on 429 rate-limit OR any 5xx server error
130
288
  isRateLimit = /429|rate.?limit/i.test(modelError)
131
289
  || (chunk.status !== undefined && chunk.status >= 500);
@@ -135,7 +293,7 @@ export class AgentRunner {
135
293
  if (!gotError)
136
294
  break; // success — use this model's output
137
295
  // Rotate to next model on rate-limit or server errors
138
- const nextCfg = this.modelChain[mi + 1];
296
+ const nextCfg = taskModelChain[mi + 1];
139
297
  // Save any partial text the user already saw before rotating
140
298
  if (assistantText.trim()) {
141
299
  this.session.addMessage({ role: "assistant", content: assistantText + "\n\n[connection interrupted — retrying with fallback model]" });
@@ -160,6 +318,22 @@ export class AgentRunner {
160
318
  this.onEvent({ type: "error", message: modelError });
161
319
  return;
162
320
  }
321
+ // #1: Record cost for this model call
322
+ if (this.costTracker && !modelError) {
323
+ const cfg = this.modelChain[0];
324
+ if (usageTokens) {
325
+ this.costTracker.record(cfg.model, usageTokens.prompt_tokens, usageTokens.completion_tokens, Date.now() - t0);
326
+ }
327
+ else {
328
+ // Fallback: estimate from char counts (~4 chars per token)
329
+ const allMsgs = this.session.getMessages();
330
+ const promptChars = allMsgs.reduce((n, m) => n + (typeof m.content === "string" ? m.content.length : 0), 0);
331
+ const promptTok = Math.ceil(promptChars / 4);
332
+ const completeTok = Math.ceil(assistantText.length / 4);
333
+ this.costTracker.record(cfg.model, promptTok, completeTok, Date.now() - t0);
334
+ }
335
+ usageTokens = null; // reset for next round
336
+ }
163
337
  // Save assistant turn
164
338
  const assistantMsg = {
165
339
  role: "assistant",
@@ -169,13 +343,69 @@ export class AgentRunner {
169
343
  this.session.addMessage(assistantMsg);
170
344
  if (pendingToolCalls.length === 0)
171
345
  break;
172
- // Dispatch tool calls
346
+ // #9: Reflection — at mid-point, force model to assess progress
347
+ if (rounds === REFLECT_AT_ROUND) {
348
+ this.session.addMessage({
349
+ role: "user",
350
+ content: `[AGENT REFLECTION — round ${rounds}/${MAX_TOOL_ROUNDS}] ` +
351
+ `You have used ${rounds} tool calls. ` +
352
+ `Summarize what you have found so far, then decide: ` +
353
+ `(a) you have enough to answer — do so now, or ` +
354
+ `(b) you need specific additional data — state exactly what and use ONE more tool. ` +
355
+ `Do not call tools unless you have a clear remaining gap.`,
356
+ });
357
+ }
358
+ // #10: Check for tools requiring approval before dispatching
359
+ const approvedCalls = [];
173
360
  for (const tc of pendingToolCalls) {
361
+ const toolDef = this.registry.getTool(tc.function.name);
362
+ if (toolDef?.requiresApproval) {
363
+ const approved = await new Promise((resolve) => {
364
+ this.onEvent({
365
+ type: "approval_request",
366
+ toolName: tc.function.name,
367
+ args: tc.function.arguments,
368
+ approve: resolve,
369
+ });
370
+ // Auto-deny after 60 seconds if no response
371
+ setTimeout(() => resolve(false), 60_000);
372
+ });
373
+ if (!approved) {
374
+ // Inject a denial message so model knows it was blocked
375
+ this.session.addMessage({
376
+ role: "tool",
377
+ content: `Tool "${tc.function.name}" was denied by user. Do not retry without asking explicitly.`,
378
+ name: tc.function.name,
379
+ tool_call_id: tc.id,
380
+ });
381
+ this.onEvent({ type: "tool_done", name: tc.function.name, result: "[DENIED by user]", isError: true });
382
+ continue;
383
+ }
384
+ }
385
+ approvedCalls.push(tc);
386
+ }
387
+ for (const tc of approvedCalls) {
174
388
  this.onEvent({ type: "tool_start", name: tc.function.name, args: tc.function.arguments });
389
+ tracer?.startSpan(`tool:${tc.function.name}`, { args: tc.function.arguments.slice(0, 100) });
390
+ }
391
+ if (approvedCalls.length === 0)
392
+ continue;
393
+ // #40: Pre-tool context budget forecast — compact proactively if needed
394
+ const forecastedGrowth = forecastContextGrowth(approvedCalls);
395
+ const currentChars = this.session.charCount();
396
+ const forecastedPct = (currentChars + forecastedGrowth) / 80_000 * 100;
397
+ if (forecastedPct > 90) {
398
+ // Pre-compact before tools add more content
399
+ this.session.forceCompact();
400
+ this.onEvent({ type: "text_delta", text: `\n⚡ Pre-compacting context (forecast: ${forecastedPct.toFixed(0)}% after tools)\n` });
175
401
  }
176
- const results = await this.dispatcher.dispatch(pendingToolCalls);
402
+ const results = await this.dispatcher.dispatch(approvedCalls);
177
403
  for (const r of results) {
178
404
  this.onEvent({ type: "tool_done", name: r.name, result: r.content, isError: r.isError });
405
+ // #30: End tool span
406
+ const toolSpan = tracer?.trace?.spans?.slice().reverse().find((s) => s.name === `tool:${r.name}` && !s.durationMs);
407
+ if (toolSpan)
408
+ tracer?.endSpan(toolSpan.spanId, r.isError ? "error" : "ok", { resultLen: r.content.length });
179
409
  this.session.addMessage({
180
410
  role: "tool",
181
411
  content: r.content,
@@ -37,6 +37,11 @@ export interface ChatChunk {
37
37
  finish_reason?: string;
38
38
  /** HTTP status code — used by AgentRunner for rate-limit detection */
39
39
  status?: number;
40
+ /** Token usage reported by provider on final chunk */
41
+ usage?: {
42
+ prompt_tokens: number;
43
+ completion_tokens: number;
44
+ };
40
45
  }
41
46
  export interface ModelConfig {
42
47
  baseUrl: string;
@@ -46,6 +51,10 @@ export interface ModelConfig {
46
51
  temperature: number;
47
52
  siteUrl?: string;
48
53
  siteName?: string;
54
+ /** Enable extended thinking for Claude Opus 4.x / o3 / Qwen3 thinking models (#13) */
55
+ thinkingEnabled?: boolean;
56
+ /** Thinking token budget — only applies when thinkingEnabled=true (#13) */
57
+ thinkingBudget?: number;
49
58
  }
50
59
  /**
51
60
  * Build the ordered model fallback chain.
@@ -83,6 +92,6 @@ export declare class ModelClient {
83
92
  description: string;
84
93
  parameters: unknown;
85
94
  };
86
- }>): AsyncGenerator<ChatChunk>;
95
+ }>, abortSignal?: AbortSignal): AsyncGenerator<ChatChunk>;
87
96
  }
88
97
  //# sourceMappingURL=ModelClient.d.ts.map
@@ -127,7 +127,7 @@ export class ModelClient {
127
127
  * chunk and returns — the caller (AgentRunner) decides whether to rotate.
128
128
  * Also reports success/failure to the ModelRegistry for tiering and cooldown.
129
129
  */
130
- async *stream(messages, tools) {
130
+ async *stream(messages, tools, abortSignal) {
131
131
  const t0 = Date.now();
132
132
  let hadError = false;
133
133
  const headers = {
@@ -139,16 +139,68 @@ export class ModelClient {
139
139
  headers["HTTP-Referer"] = this.cfg.siteUrl;
140
140
  if (this.cfg.siteName)
141
141
  headers["X-Title"] = this.cfg.siteName;
142
+ // #13: Detect thinking-capable models
143
+ const THINKING_MODELS = new Set([
144
+ "anthropic/claude-opus-4.7", "anthropic/claude-opus-4.7-fast",
145
+ "anthropic/claude-opus-4.6", "anthropic/claude-opus-4.6-fast",
146
+ "anthropic/claude-opus-4.5", "anthropic/claude-opus-4",
147
+ "openai/o3", "openai/o3-pro", "openai/o3-mini",
148
+ "openai/o4", "openai/o4-mini",
149
+ ]);
150
+ const isThinkingModel = THINKING_MODELS.has(this.cfg.model) || /thinking/i.test(this.cfg.model);
151
+ const useThinking = this.cfg.thinkingEnabled && isThinkingModel;
152
+ const isOSeries = /openai\/o[34]/i.test(this.cfg.model);
153
+ const isAnthropicModel = this.cfg.model.startsWith("anthropic/") ||
154
+ this.cfg.baseUrl.includes("anthropic.com");
155
+ // Build request body
142
156
  const body = {
143
157
  model: this.cfg.model,
144
- messages,
145
158
  max_tokens: this.cfg.maxTokens,
146
- temperature: this.cfg.temperature,
147
159
  stream: true,
148
160
  };
161
+ // #13: Temperature handling — o-series does not support temperature
162
+ if (!isOSeries) {
163
+ body.temperature = useThinking ? 1.0 : this.cfg.temperature; // thinking requires 1.0
164
+ }
165
+ // #15: Prompt caching for Anthropic — extract system message, add cache_control
166
+ if (isAnthropicModel) {
167
+ const sysMsg = messages.find(m => m.role === "system");
168
+ const rest = messages.filter(m => m.role !== "system");
169
+ if (sysMsg && typeof sysMsg.content === "string" && sysMsg.content.length > 512) {
170
+ // Cache the system prompt (saves up to 90% on repeated calls)
171
+ body.system = [{
172
+ type: "text",
173
+ text: sysMsg.content,
174
+ cache_control: { type: "ephemeral" },
175
+ }];
176
+ body.messages = rest;
177
+ }
178
+ else {
179
+ body.messages = messages;
180
+ }
181
+ // #13: Extended thinking for Claude Opus 4.x
182
+ if (useThinking) {
183
+ body.thinking = { type: "enabled", budget_tokens: this.cfg.thinkingBudget ?? 8000 };
184
+ headers["anthropic-beta"] = "thinking-v1";
185
+ }
186
+ }
187
+ else {
188
+ body.messages = messages;
189
+ }
190
+ // #13: o-series reasoning effort
191
+ if (isOSeries && useThinking) {
192
+ body.reasoning_effort = "high";
193
+ }
149
194
  if (tools && tools.length > 0) {
150
- body.tools = tools;
195
+ // strict: true enforces valid JSON on GPT-4o+ and GPT-5.x
196
+ // Skip strict mode for o-series (not supported) and thinking models
197
+ body.tools = tools.map(t => ({
198
+ ...t,
199
+ function: isOSeries ? t.function : { ...t.function, strict: true },
200
+ }));
151
201
  body.tool_choice = "auto";
202
+ // Disable parallel tool calls — prevents race conditions in tool_call_id map
203
+ body.parallel_tool_calls = false;
152
204
  }
153
205
  const MAX_RETRIES = 2;
154
206
  const RETRY_STATUSES = new Set([429, 500, 502, 503, 504]);
@@ -156,14 +208,23 @@ export class ModelClient {
156
208
  let lastError = "";
157
209
  for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) {
158
210
  try {
211
+ // #25: Combine user abort signal with 120s timeout
212
+ const timeoutSignal = AbortSignal.timeout(120_000);
213
+ const combinedSignal = abortSignal
214
+ ? AbortSignal.any([abortSignal, timeoutSignal])
215
+ : timeoutSignal;
159
216
  res = await fetch(`${this.cfg.baseUrl}/chat/completions`, {
160
217
  method: "POST",
161
218
  headers,
162
219
  body: JSON.stringify(body),
163
- signal: AbortSignal.timeout(120_000),
220
+ signal: combinedSignal,
164
221
  });
165
222
  }
166
223
  catch (e) {
224
+ if (e?.name === "AbortError") {
225
+ yield { type: "done", finish_reason: "aborted" };
226
+ return;
227
+ }
167
228
  hadError = true;
168
229
  lastError = `Network error: ${e.message}`;
169
230
  if (attempt < MAX_RETRIES) {
@@ -238,6 +299,17 @@ export class ModelClient {
238
299
  }
239
300
  }
240
301
  const finish = chunk.choices?.[0]?.finish_reason;
302
+ // Capture usage from final chunk (OpenAI/OpenRouter send this on finish)
303
+ if (chunk.usage) {
304
+ yield {
305
+ type: "done",
306
+ finish_reason: finish ?? "usage",
307
+ usage: {
308
+ prompt_tokens: chunk.usage.prompt_tokens ?? 0,
309
+ completion_tokens: chunk.usage.completion_tokens ?? 0,
310
+ },
311
+ };
312
+ }
241
313
  if (finish === "tool_calls" || finish === "stop") {
242
314
  if (toolCallMap.size > 0) {
243
315
  const tool_calls = [...toolCallMap.values()].map(tc => ({
@@ -248,7 +320,8 @@ export class ModelClient {
248
320
  yield { type: "tool_call", tool_calls };
249
321
  toolCallMap.clear();
250
322
  }
251
- yield { type: "done", finish_reason: finish };
323
+ if (!chunk.usage)
324
+ yield { type: "done", finish_reason: finish };
252
325
  }
253
326
  }
254
327
  }
@@ -9,6 +9,7 @@
9
9
  * the provider; concurrency is capped at Math.min(maxAgents, os.cpus().length).
10
10
  */
11
11
  import type { ModelRegistry } from "../models/ModelRegistry.js";
12
+ import type { ContextStore } from "../session/ContextStore.js";
12
13
  export interface SwarmConfig {
13
14
  /** Maximum parallel workers (hard cap: 5). Default: min(cpuCount, 3). */
14
15
  maxAgents?: number;
@@ -27,11 +28,10 @@ export interface SubTaskResult {
27
28
  * Tuned so "check ETH price" ≈ 10, "analyze ETH and BTC then predict" ≈ 55.
28
29
  */
29
30
  export declare function scoreComplexity(prompt: string): number;
30
- /**
31
- * Splits a complex prompt into 2–5 focused sub-task strings.
32
- * Uses simple heuristics so no extra model call is needed.
33
- */
34
- export declare function decompose(prompt: string, maxTasks: number): string[];
31
+ /** Original heuristic decomposer — used as fallback when LLM planner fails */
32
+ export declare function decomposeHeuristic(prompt: string, maxTasks: number): string[];
33
+ /** Exported for tests heuristic only, no model call */
34
+ export declare const decompose: typeof decomposeHeuristic;
35
35
  export declare class SwarmRouter {
36
36
  private maxAgents;
37
37
  private complexityThreshold;
@@ -53,7 +53,7 @@ export declare class SwarmRouter {
53
53
  * @param systemPrompt - Current system prompt (passed to each sub-agent + reviewer)
54
54
  * @param onProgress - Called as each sub-task completes
55
55
  */
56
- run(prompt: string, systemPrompt: string, onProgress: (result: SubTaskResult, remaining: number) => void): Promise<{
56
+ run(prompt: string, systemPrompt: string, onProgress: (result: SubTaskResult, remaining: number) => void, contextStore?: ContextStore): Promise<{
57
57
  synthesis: string;
58
58
  subResults: SubTaskResult[];
59
59
  }>;