@jellyos/agent 0.1.3 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (90) hide show
  1. package/README.md +9 -9
  2. package/README.npm.md +212 -0
  3. package/bin/jellyos-mcp +26 -0
  4. package/dist/api/ExtensionAPI.d.ts +6 -0
  5. package/dist/api/Registry.js +3 -1
  6. package/dist/cli.js +117 -42
  7. package/dist/index.d.ts +24 -1
  8. package/dist/index.js +19 -2
  9. package/dist/mcp/entry.d.ts +2 -0
  10. package/dist/mcp/entry.js +71 -0
  11. package/dist/mcp/server.d.ts +31 -0
  12. package/dist/mcp/server.js +128 -0
  13. package/dist/models/CostTracker.d.ts +66 -0
  14. package/dist/models/CostTracker.js +148 -0
  15. package/dist/models/ModelRegistry.d.ts +157 -0
  16. package/dist/models/ModelRegistry.js +496 -0
  17. package/dist/models/index.d.ts +5 -0
  18. package/dist/models/index.js +3 -0
  19. package/dist/runner/AgentRunner.d.ts +23 -2
  20. package/dist/runner/AgentRunner.js +264 -24
  21. package/dist/runner/ModelClient.d.ts +26 -6
  22. package/dist/runner/ModelClient.js +147 -28
  23. package/dist/runner/SwarmRouter.d.ts +10 -7
  24. package/dist/runner/SwarmRouter.js +85 -28
  25. package/dist/runner/ToolDispatcher.d.ts +10 -0
  26. package/dist/runner/ToolDispatcher.js +106 -2
  27. package/dist/scheduler/AgentScheduler.d.ts +118 -0
  28. package/dist/scheduler/AgentScheduler.js +253 -0
  29. package/dist/session/ContextStore.d.ts +96 -0
  30. package/dist/session/ContextStore.js +207 -0
  31. package/dist/session/GoalManager.d.ts +101 -0
  32. package/dist/session/GoalManager.js +167 -0
  33. package/dist/session/MemoryStore.d.ts +48 -0
  34. package/dist/session/MemoryStore.js +166 -0
  35. package/dist/session/SessionManager.d.ts +45 -4
  36. package/dist/session/SessionManager.js +151 -8
  37. package/dist/telemetry/Tracer.d.ts +48 -0
  38. package/dist/telemetry/Tracer.js +102 -0
  39. package/dist/tests/ContextStore.test.d.ts +2 -0
  40. package/dist/tests/ContextStore.test.js +74 -0
  41. package/dist/tests/ModelRegistry.test.d.ts +2 -0
  42. package/dist/tests/ModelRegistry.test.js +69 -0
  43. package/dist/tests/SessionManager.test.d.ts +2 -0
  44. package/dist/tests/SessionManager.test.js +108 -0
  45. package/dist/tests/TechnicalAnalysis.test.d.ts +2 -0
  46. package/dist/tests/TechnicalAnalysis.test.js +109 -0
  47. package/dist/tools/MarketSentiment.d.ts +166 -0
  48. package/dist/tools/MarketSentiment.js +209 -0
  49. package/dist/tools/NewsSentiment.d.ts +67 -0
  50. package/dist/tools/NewsSentiment.js +226 -0
  51. package/dist/tools/PriceFeed.d.ts +105 -0
  52. package/dist/tools/PriceFeed.js +282 -0
  53. package/dist/tools/TechnicalAnalysis.d.ts +110 -0
  54. package/dist/tools/TechnicalAnalysis.js +357 -0
  55. package/dist/tools/index.d.ts +7 -0
  56. package/dist/tools/index.js +4 -0
  57. package/dist/tui/App.d.ts +7 -5
  58. package/dist/tui/App.js +350 -65
  59. package/dist/tui/REPL.d.ts +2 -1
  60. package/dist/tui/REPL.js +11 -6
  61. package/dist/tui/StatusBar.js +1 -1
  62. package/package.json +9 -4
  63. package/dist/api/ExtensionAPI.d.ts.map +0 -1
  64. package/dist/api/ExtensionAPI.js.map +0 -1
  65. package/dist/api/Registry.d.ts.map +0 -1
  66. package/dist/api/Registry.js.map +0 -1
  67. package/dist/cli.d.ts.map +0 -1
  68. package/dist/cli.js.map +0 -1
  69. package/dist/index.d.ts.map +0 -1
  70. package/dist/index.js.map +0 -1
  71. package/dist/loader.d.ts.map +0 -1
  72. package/dist/loader.js.map +0 -1
  73. package/dist/runner/AgentRunner.d.ts.map +0 -1
  74. package/dist/runner/AgentRunner.js.map +0 -1
  75. package/dist/runner/ModelClient.d.ts.map +0 -1
  76. package/dist/runner/ModelClient.js.map +0 -1
  77. package/dist/runner/SwarmRouter.d.ts.map +0 -1
  78. package/dist/runner/SwarmRouter.js.map +0 -1
  79. package/dist/runner/ToolDispatcher.d.ts.map +0 -1
  80. package/dist/runner/ToolDispatcher.js.map +0 -1
  81. package/dist/session/SessionManager.d.ts.map +0 -1
  82. package/dist/session/SessionManager.js.map +0 -1
  83. package/dist/tui/App.d.ts.map +0 -1
  84. package/dist/tui/App.js.map +0 -1
  85. package/dist/tui/REPL.d.ts.map +0 -1
  86. package/dist/tui/REPL.js.map +0 -1
  87. package/dist/tui/StatusBar.d.ts.map +0 -1
  88. package/dist/tui/StatusBar.js.map +0 -1
  89. package/dist/tui/theme.d.ts.map +0 -1
  90. package/dist/tui/theme.js.map +0 -1
@@ -5,7 +5,11 @@
5
5
  * OpenRouter > Anthropic compat > OpenAI > local (ollama/lm-studio)
6
6
  *
7
7
  * Model rotation: resolveModelChain() returns up to 5 configs — the AgentRunner
8
- * walks the chain on 429 (rate limit) or 5xx errors, providing seamless fallback.
8
+ * walks the chain on 429 (rate limit) or 5xx errors, with exponential backoff
9
+ * (up to 2 retries per model) before falling through.
10
+ *
11
+ * When a ModelRegistry is available, chains are dynamically built from the
12
+ * tiered pool, with per-model performance tracking and cost estimation.
9
13
  *
10
14
  * All outbound, all local — no inbound ports, no server.
11
15
  */
@@ -13,6 +17,9 @@
13
17
  /**
14
18
  * Build the ordered model fallback chain.
15
19
  *
20
+ * If a ModelRegistry is provided, builds from the tiered pool dynamically.
21
+ * Falls back to static env-var parsing otherwise.
22
+ *
16
23
  * User-configurable pool: JELLY_MODEL_1 … JELLY_MODEL_5
17
24
  * If any JELLY_MODEL_N vars are set they take priority; up to 5 are used in
18
25
  * order. Unset slots are filled with provider-appropriate defaults.
@@ -22,7 +29,7 @@
22
29
  * JELLY_MODEL_2=openai/gpt-4o
23
30
  * JELLY_MODEL_3=google/gemini-2.5-pro
24
31
  */
25
- export function resolveModelChain() {
32
+ export function resolveModelChain(modelReg) {
26
33
  const env = process.env;
27
34
  const tokens = parseInt(env.MAX_TOKENS ?? "8192");
28
35
  const temp = parseFloat(env.TEMPERATURE ?? "0.7");
@@ -33,6 +40,11 @@ export function resolveModelChain() {
33
40
  if (m?.trim())
34
41
  userModels.push(m.trim());
35
42
  }
43
+ // ── Use ModelRegistry dynamic pool if available ──────────────────────────
44
+ if (modelReg) {
45
+ return modelReg.buildModelChain(userModels);
46
+ }
47
+ // ── Static fallback (used when ModelRegistry cannot be initialised) ───────
36
48
  // ── OpenRouter — supports all providers via a single key ─────────────────
37
49
  if (env.OPENROUTER_API_KEY) {
38
50
  const base = "https://openrouter.ai/api/v1";
@@ -94,24 +106,30 @@ export function resolveModelChain() {
94
106
  temperature: temp,
95
107
  }];
96
108
  }
97
- throw new Error("No API key found. Set OPENROUTER_API_KEY, ANTHROPIC_API_KEY, or OPENAI_API_KEY in ~/.jellyos/.env");
109
+ throw new Error("No API key found. Set OPENROUTER_API_KEY, ANTHROPIC_API_KEY, or OPENAI_API_KEY in ~/.jelly/.env");
98
110
  }
99
111
  /** Convenience: returns just the primary (first) model config */
100
- export function resolveModelConfig() {
101
- return resolveModelChain()[0];
112
+ export function resolveModelConfig(modelReg) {
113
+ return resolveModelChain(modelReg)[0];
102
114
  }
103
115
  // ── ModelClient ───────────────────────────────────────────────────────────────
104
116
  export class ModelClient {
105
117
  cfg;
106
- constructor(cfg) {
118
+ modelRegistry;
119
+ constructor(cfg, modelReg) {
107
120
  this.cfg = cfg;
121
+ this.modelRegistry = modelReg;
108
122
  }
109
123
  /**
110
124
  * Stream a chat completion. Yields ChatChunk objects.
111
- * On HTTP error the generator yields a single { type: "error", status, error }
125
+ * Retries up to 2 times on 429 / 5xx with exponential backoff (1s, 2s).
126
+ * On persistent HTTP error the generator yields a single { type: "error", status, error }
112
127
  * chunk and returns — the caller (AgentRunner) decides whether to rotate.
128
+ * Also reports success/failure to the ModelRegistry for tiering and cooldown.
113
129
  */
114
- async *stream(messages, tools) {
130
+ async *stream(messages, tools, abortSignal) {
131
+ const t0 = Date.now();
132
+ let hadError = false;
115
133
  const headers = {
116
134
  "Content-Type": "application/json",
117
135
  "Authorization": `Bearer ${this.cfg.apiKey}`,
@@ -121,34 +139,120 @@ export class ModelClient {
121
139
  headers["HTTP-Referer"] = this.cfg.siteUrl;
122
140
  if (this.cfg.siteName)
123
141
  headers["X-Title"] = this.cfg.siteName;
142
+ // #13: Detect thinking-capable models
143
+ const THINKING_MODELS = new Set([
144
+ "anthropic/claude-opus-4.7", "anthropic/claude-opus-4.7-fast",
145
+ "anthropic/claude-opus-4.6", "anthropic/claude-opus-4.6-fast",
146
+ "anthropic/claude-opus-4.5", "anthropic/claude-opus-4",
147
+ "openai/o3", "openai/o3-pro", "openai/o3-mini",
148
+ "openai/o4", "openai/o4-mini",
149
+ ]);
150
+ const isThinkingModel = THINKING_MODELS.has(this.cfg.model) || /thinking/i.test(this.cfg.model);
151
+ const useThinking = this.cfg.thinkingEnabled && isThinkingModel;
152
+ const isOSeries = /openai\/o[34]/i.test(this.cfg.model);
153
+ const isAnthropicModel = this.cfg.model.startsWith("anthropic/") ||
154
+ this.cfg.baseUrl.includes("anthropic.com");
155
+ // Build request body
124
156
  const body = {
125
157
  model: this.cfg.model,
126
- messages,
127
158
  max_tokens: this.cfg.maxTokens,
128
- temperature: this.cfg.temperature,
129
159
  stream: true,
130
160
  };
161
+ // #13: Temperature handling — o-series does not support temperature
162
+ if (!isOSeries) {
163
+ body.temperature = useThinking ? 1.0 : this.cfg.temperature; // thinking requires 1.0
164
+ }
165
+ // #15: Prompt caching for Anthropic — extract system message, add cache_control
166
+ if (isAnthropicModel) {
167
+ const sysMsg = messages.find(m => m.role === "system");
168
+ const rest = messages.filter(m => m.role !== "system");
169
+ if (sysMsg && typeof sysMsg.content === "string" && sysMsg.content.length > 512) {
170
+ // Cache the system prompt (saves up to 90% on repeated calls)
171
+ body.system = [{
172
+ type: "text",
173
+ text: sysMsg.content,
174
+ cache_control: { type: "ephemeral" },
175
+ }];
176
+ body.messages = rest;
177
+ }
178
+ else {
179
+ body.messages = messages;
180
+ }
181
+ // #13: Extended thinking for Claude Opus 4.x
182
+ if (useThinking) {
183
+ body.thinking = { type: "enabled", budget_tokens: this.cfg.thinkingBudget ?? 8000 };
184
+ headers["anthropic-beta"] = "thinking-v1";
185
+ }
186
+ }
187
+ else {
188
+ body.messages = messages;
189
+ }
190
+ // #13: o-series reasoning effort
191
+ if (isOSeries && useThinking) {
192
+ body.reasoning_effort = "high";
193
+ }
131
194
  if (tools && tools.length > 0) {
132
- body.tools = tools;
195
+ // strict: true enforces valid JSON on GPT-4o+ and GPT-5.x
196
+ // Skip strict mode for o-series (not supported) and thinking models
197
+ body.tools = tools.map(t => ({
198
+ ...t,
199
+ function: isOSeries ? t.function : { ...t.function, strict: true },
200
+ }));
133
201
  body.tool_choice = "auto";
202
+ // Disable parallel tool calls — prevents race conditions in tool_call_id map
203
+ body.parallel_tool_calls = false;
134
204
  }
205
+ const MAX_RETRIES = 2;
206
+ const RETRY_STATUSES = new Set([429, 500, 502, 503, 504]);
135
207
  let res;
136
- try {
137
- res = await fetch(`${this.cfg.baseUrl}/chat/completions`, {
138
- method: "POST",
139
- headers,
140
- body: JSON.stringify(body),
141
- signal: AbortSignal.timeout(120_000),
142
- });
143
- }
144
- catch (e) {
145
- yield { type: "error", error: `Network error: ${e.message}`, status: 0 };
146
- return;
147
- }
148
- if (!res.ok) {
149
- const err = await res.text().catch(() => res.statusText);
150
- yield { type: "error", error: `Model API ${res.status}: ${err}`, status: res.status };
151
- return;
208
+ let lastError = "";
209
+ for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) {
210
+ try {
211
+ // #25: Combine user abort signal with 120s timeout
212
+ const timeoutSignal = AbortSignal.timeout(120_000);
213
+ const combinedSignal = abortSignal
214
+ ? AbortSignal.any([abortSignal, timeoutSignal])
215
+ : timeoutSignal;
216
+ res = await fetch(`${this.cfg.baseUrl}/chat/completions`, {
217
+ method: "POST",
218
+ headers,
219
+ body: JSON.stringify(body),
220
+ signal: combinedSignal,
221
+ });
222
+ }
223
+ catch (e) {
224
+ if (e?.name === "AbortError") {
225
+ yield { type: "done", finish_reason: "aborted" };
226
+ return;
227
+ }
228
+ hadError = true;
229
+ lastError = `Network error: ${e.message}`;
230
+ if (attempt < MAX_RETRIES) {
231
+ await new Promise(r => setTimeout(r, 1000 * (attempt + 1)));
232
+ continue;
233
+ }
234
+ this.modelRegistry?.recordFailure(this.cfg.model);
235
+ yield { type: "error", error: lastError, status: 0 };
236
+ return;
237
+ }
238
+ if (!res.ok && RETRY_STATUSES.has(res.status) && attempt < MAX_RETRIES) {
239
+ hadError = true;
240
+ lastError = await res.text().catch(() => res.statusText);
241
+ await new Promise(r => setTimeout(r, 1000 * (attempt + 1)));
242
+ continue;
243
+ }
244
+ if (!res.ok) {
245
+ hadError = true;
246
+ const err = await res.text().catch(() => res.statusText);
247
+ // 404 → model removed, mark permanently deprecated
248
+ if (res.status === 404)
249
+ this.modelRegistry?.markDeprecated(this.cfg.model);
250
+ else
251
+ this.modelRegistry?.recordFailure(this.cfg.model);
252
+ yield { type: "error", error: `Model API ${res.status}: ${err}`, status: res.status };
253
+ return;
254
+ }
255
+ break; // success — got an ok response
152
256
  }
153
257
  // Accumulate tool calls across chunks (they arrive fragmented)
154
258
  const toolCallMap = new Map();
@@ -195,6 +299,17 @@ export class ModelClient {
195
299
  }
196
300
  }
197
301
  const finish = chunk.choices?.[0]?.finish_reason;
302
+ // Capture usage from final chunk (OpenAI/OpenRouter send this on finish)
303
+ if (chunk.usage) {
304
+ yield {
305
+ type: "done",
306
+ finish_reason: finish ?? "usage",
307
+ usage: {
308
+ prompt_tokens: chunk.usage.prompt_tokens ?? 0,
309
+ completion_tokens: chunk.usage.completion_tokens ?? 0,
310
+ },
311
+ };
312
+ }
198
313
  if (finish === "tool_calls" || finish === "stop") {
199
314
  if (toolCallMap.size > 0) {
200
315
  const tool_calls = [...toolCallMap.values()].map(tc => ({
@@ -205,7 +320,8 @@ export class ModelClient {
205
320
  yield { type: "tool_call", tool_calls };
206
321
  toolCallMap.clear();
207
322
  }
208
- yield { type: "done", finish_reason: finish };
323
+ if (!chunk.usage)
324
+ yield { type: "done", finish_reason: finish };
209
325
  }
210
326
  }
211
327
  }
@@ -219,6 +335,9 @@ export class ModelClient {
219
335
  yield { type: "tool_call", tool_calls };
220
336
  }
221
337
  yield { type: "done", finish_reason: "end" };
338
+ // Report success to model registry
339
+ if (!hadError)
340
+ this.modelRegistry?.recordSuccess(this.cfg.model, Date.now() - t0);
222
341
  }
223
342
  }
224
343
  //# sourceMappingURL=ModelClient.js.map
@@ -8,6 +8,8 @@
8
8
  * Sub-task execution is sequential inside each worker slot to avoid hammering
9
9
  * the provider; concurrency is capped at Math.min(maxAgents, os.cpus().length).
10
10
  */
11
+ import type { ModelRegistry } from "../models/ModelRegistry.js";
12
+ import type { ContextStore } from "../session/ContextStore.js";
11
13
  export interface SwarmConfig {
12
14
  /** Maximum parallel workers (hard cap: 5). Default: min(cpuCount, 3). */
13
15
  maxAgents?: number;
@@ -19,21 +21,22 @@ export interface SubTaskResult {
19
21
  result: string;
20
22
  model: string;
21
23
  ms: number;
24
+ error?: string;
22
25
  }
23
26
  /**
24
27
  * Returns a score 0–100 reflecting prompt complexity.
25
28
  * Tuned so "check ETH price" ≈ 10, "analyze ETH and BTC then predict" ≈ 55.
26
29
  */
27
30
  export declare function scoreComplexity(prompt: string): number;
28
- /**
29
- * Splits a complex prompt into 2–5 focused sub-task strings.
30
- * Uses simple heuristics so no extra model call is needed.
31
- */
32
- export declare function decompose(prompt: string, maxTasks: number): string[];
31
+ /** Original heuristic decomposer — used as fallback when LLM planner fails */
32
+ export declare function decomposeHeuristic(prompt: string, maxTasks: number): string[];
33
+ /** Exported for tests heuristic only, no model call */
34
+ export declare const decompose: typeof decomposeHeuristic;
33
35
  export declare class SwarmRouter {
34
36
  private maxAgents;
35
37
  private complexityThreshold;
36
- constructor(cfg?: SwarmConfig);
38
+ private modelRegistry?;
39
+ constructor(cfg?: SwarmConfig, modelReg?: ModelRegistry);
37
40
  /** True when the prompt is complex enough to warrant swarm execution. */
38
41
  shouldSwarm(prompt: string): boolean;
39
42
  /**
@@ -50,7 +53,7 @@ export declare class SwarmRouter {
50
53
  * @param systemPrompt - Current system prompt (passed to each sub-agent + reviewer)
51
54
  * @param onProgress - Called as each sub-task completes
52
55
  */
53
- run(prompt: string, systemPrompt: string, onProgress: (result: SubTaskResult, remaining: number) => void): Promise<{
56
+ run(prompt: string, systemPrompt: string, onProgress: (result: SubTaskResult, remaining: number) => void, contextStore?: ContextStore): Promise<{
54
57
  synthesis: string;
55
58
  subResults: SubTaskResult[];
56
59
  }>;
@@ -31,44 +31,83 @@ export function scoreComplexity(prompt) {
31
31
  questions * 5 +
32
32
  Math.floor(wordCount / 8));
33
33
  }
34
- // ── Task decomposition ───────────────────────────────────────────────────────
34
+ // ── Task decomposition (# 29: LLM planner with heuristic fallback) ───────────
35
35
  /**
36
- * Splits a complex prompt into 2–5 focused sub-task strings.
37
- * Uses simple heuristics so no extra model call is needed.
36
+ * LLM-based task planner. Uses a cheap worker model to decompose the prompt
37
+ * into focused sub-tasks as a JSON array. Falls back to heuristics on failure.
38
38
  */
39
- export function decompose(prompt, maxTasks) {
39
+ async function planSubtasks(prompt, maxTasks, modelReg) {
40
+ const cap = Math.max(2, Math.min(maxTasks, 5));
41
+ // Attempt LLM decomposition with a cheap/fast model
42
+ try {
43
+ const chain = resolveModelChain(modelReg);
44
+ // Prefer a worker-tier model for planning (fast + cheap)
45
+ const plannerCfg = chain.find(c => modelReg?.getTier(c.model) === "worker") ?? chain[chain.length - 1] ?? chain[0];
46
+ const client = new ModelClient({ ...plannerCfg, temperature: 0.2 }, modelReg);
47
+ const plannerPrompt = `Split the following request into exactly ${cap} focused, non-overlapping sub-tasks.\n` +
48
+ `Each sub-task must be independently answerable using data tools.\n` +
49
+ `Output ONLY a valid JSON array of strings. No explanation, no markdown.\n\n` +
50
+ `Request: ${prompt}`;
51
+ let output = "";
52
+ for await (const chunk of client.stream([
53
+ { role: "system", content: "You output only valid JSON arrays of strings. No markdown, no explanation." },
54
+ { role: "user", content: plannerPrompt },
55
+ ], [])) {
56
+ if (chunk.type === "delta" && chunk.text)
57
+ output += chunk.text;
58
+ if (chunk.type === "error")
59
+ throw new Error(chunk.error);
60
+ }
61
+ // Extract JSON array from output (model might wrap in markdown)
62
+ const jsonMatch = output.match(/\[\s*"[\s\S]*?"\s*(?:,\s*"[\s\S]*?"\s*)*\]/);
63
+ if (jsonMatch) {
64
+ const tasks = JSON.parse(jsonMatch[0]);
65
+ if (Array.isArray(tasks) && tasks.every((t) => typeof t === "string") && tasks.length >= 2) {
66
+ return tasks.slice(0, cap);
67
+ }
68
+ }
69
+ }
70
+ catch {
71
+ // Fall through to heuristic decomposition
72
+ }
73
+ return decomposeHeuristic(prompt, cap);
74
+ }
75
+ /** Original heuristic decomposer — used as fallback when LLM planner fails */
76
+ export function decomposeHeuristic(prompt, maxTasks) {
40
77
  const cap = Math.max(2, Math.min(maxTasks, 5));
41
- // Split on explicit conjunctions / punctuation
42
78
  const parts = prompt
43
79
  .split(/,\s*| and | also | then | additionally | plus /i)
44
80
  .map(s => s.trim())
45
81
  .filter(s => s.length > 4);
46
- if (parts.length >= 2) {
82
+ if (parts.length >= 2)
47
83
  return parts.slice(0, cap);
48
- }
49
- // Fallback: split action verbs into separate sub-questions
50
84
  const verbMatches = [...prompt.matchAll(/\b(analyze|compare|predict|scan|check|estimate|evaluate)\b[^,.?]*/gi)];
51
- if (verbMatches.length >= 2) {
85
+ if (verbMatches.length >= 2)
52
86
  return verbMatches.slice(0, cap).map(m => m[0].trim());
53
- }
54
- // Cannot decompose meaningfully → return as-is (single task)
55
87
  return [prompt];
56
88
  }
57
- // ── Reviewer synthesis ───────────────────────────────────────────────────────
58
- async function reviewerSynthesize(originalPrompt, results, systemPrompt) {
59
- const chain = resolveModelChain();
89
+ /** Exported for tests — heuristic only, no model call */
90
+ export const decompose = decomposeHeuristic;
91
+ // ── Reviewer synthesis (#39: compact refs via ContextStore) ─────────────────
92
+ async function reviewerSynthesize(originalPrompt, allResults, systemPrompt, modelReg, contextRef) {
93
+ const chain = resolveModelChain(modelReg);
60
94
  const cfg = chain[0];
61
- const client = new ModelClient(cfg);
62
- const context = results
63
- .map((r, i) => `### Sub-task ${i + 1}: ${r.task}\n${r.result}`)
64
- .join("\n\n");
95
+ const client = new ModelClient(cfg, modelReg);
96
+ const results = allResults.filter(r => !r.error);
97
+ // #39: If ContextStore holds the full results, send compact summaries + reference
98
+ const context = contextRef
99
+ ? results.map((r, i) => `Sub-task ${i + 1} (${r.task.slice(0, 50)}): ${r.result.slice(0, 300)}...`).join("\n") + `\n\n${contextRef}`
100
+ : results
101
+ .map((r, i) => `### Sub-task ${i + 1}: ${r.task}\n${r.result}`)
102
+ .join("\n\n");
65
103
  const messages = [
66
104
  { role: "system", content: systemPrompt },
67
105
  {
68
106
  role: "user",
69
- content: `You are a synthesis reviewer. The following sub-tasks were run in response to the user's original request.\n\n` +
70
- `**Original request:** ${originalPrompt}\n\n${context}\n\n` +
71
- `Write a concise, unified answer that directly addresses the original request using all the above findings.`,
107
+ content: `You are a synthesis reviewer. Sub-tasks were executed for the following request.\n\n` +
108
+ `**Original request:** ${originalPrompt}\n\n` +
109
+ `**Sub-task results:**\n${context}\n\n` +
110
+ `Write a concise, unified answer that directly addresses the original request.`,
72
111
  },
73
112
  ];
74
113
  let out = "";
@@ -82,10 +121,12 @@ async function reviewerSynthesize(originalPrompt, results, systemPrompt) {
82
121
  export class SwarmRouter {
83
122
  maxAgents;
84
123
  complexityThreshold;
85
- constructor(cfg = {}) {
124
+ modelRegistry;
125
+ constructor(cfg = {}, modelReg) {
86
126
  const cpus = os.cpus().length;
87
127
  this.maxAgents = Math.min(cfg.maxAgents ?? Math.min(cpus, 3), 5);
88
128
  this.complexityThreshold = cfg.complexityThreshold ?? 40;
129
+ this.modelRegistry = modelReg;
89
130
  }
90
131
  /** True when the prompt is complex enough to warrant swarm execution. */
91
132
  shouldSwarm(prompt) {
@@ -105,10 +146,13 @@ export class SwarmRouter {
105
146
  * @param systemPrompt - Current system prompt (passed to each sub-agent + reviewer)
106
147
  * @param onProgress - Called as each sub-task completes
107
148
  */
108
- async run(prompt, systemPrompt, onProgress) {
109
- const tasks = decompose(prompt, this.maxAgents);
110
- const chain = resolveModelChain();
149
+ async run(prompt, systemPrompt, onProgress, contextStore) {
150
+ // #29: Use LLM planner for task decomposition (falls back to heuristic)
151
+ const tasks = await planSubtasks(prompt, this.maxAgents, this.modelRegistry);
152
+ const chain = resolveModelChain(this.modelRegistry);
111
153
  const subResults = [];
154
+ // #39: Open a task context folder to offload sub-results (saves context window)
155
+ const taskCtx = contextStore?.openTask(`Swarm: ${prompt.slice(0, 60)}`);
112
156
  // Split tasks into groups of 3 (the required "groups-of-3" planner)
113
157
  const GROUP_SIZE = 3;
114
158
  const batches = [];
@@ -118,24 +162,32 @@ export class SwarmRouter {
118
162
  let modelIdx = 1; // reserve chain[0] for reviewer
119
163
  const runOne = async (task, mIdx, remaining) => {
120
164
  const cfg = chain[mIdx % chain.length] ?? chain[0];
121
- const client = new ModelClient(cfg);
165
+ const client = new ModelClient(cfg, this.modelRegistry);
122
166
  const msgs = [
123
167
  { role: "system", content: systemPrompt },
124
168
  { role: "user", content: task },
125
169
  ];
126
170
  const t0 = Date.now();
127
171
  let out = "";
172
+ let error;
128
173
  for await (const chunk of client.stream(msgs, [])) {
129
174
  if (chunk.type === "delta" && chunk.text)
130
175
  out += chunk.text;
176
+ if (chunk.type === "error")
177
+ error = chunk.error ?? "Sub-task model error";
131
178
  }
132
179
  const r = {
133
180
  task,
134
- result: out || "(no output)",
181
+ result: out || (error ? `(error: ${error})` : "(no output)"),
135
182
  model: cfg.model,
136
183
  ms: Date.now() - t0,
184
+ error,
137
185
  };
138
186
  subResults.push(r);
187
+ // #39: Write sub-result to context file instead of keeping raw in memory
188
+ if (taskCtx && contextStore) {
189
+ contextStore.appendFinding(taskCtx.taskId, `Sub-task: ${task.slice(0, 50)}`, r.result);
190
+ }
139
191
  onProgress(r, remaining);
140
192
  };
141
193
  // Execute batches sequentially; within each batch run up to 3 in parallel
@@ -146,7 +198,12 @@ export class SwarmRouter {
146
198
  return runOne(task, modelIdx++, remaining);
147
199
  }));
148
200
  }
149
- const synthesis = await reviewerSynthesize(prompt, subResults, systemPrompt);
201
+ // #39: Pass context reference to reviewer (compact path vs raw dump)
202
+ const contextRef = taskCtx ? contextStore?.getReference(taskCtx.taskId) : undefined;
203
+ const synthesis = await reviewerSynthesize(prompt, subResults, systemPrompt, this.modelRegistry, contextRef);
204
+ // Close the context folder (auto-deletes in 5s)
205
+ if (taskCtx)
206
+ contextStore?.closeTask(taskCtx.taskId);
150
207
  return { synthesis, subResults };
151
208
  }
152
209
  }
@@ -10,10 +10,20 @@ export interface ToolResult {
10
10
  content: string;
11
11
  isError: boolean;
12
12
  }
13
+ /** #40: Estimate chars that will be added to context by dispatching these calls */
14
+ export declare function forecastContextGrowth(calls: {
15
+ function: {
16
+ name: string;
17
+ };
18
+ }[]): number;
13
19
  export declare class ToolDispatcher {
14
20
  private registry;
21
+ private failureCounts;
22
+ private openCircuits;
15
23
  constructor(registry: Registry);
16
24
  dispatch(calls: ToolCall[]): Promise<ToolResult[]>;
17
25
  private execute;
26
+ private executeWithTimeout;
27
+ private executeInner;
18
28
  }
19
29
  //# sourceMappingURL=ToolDispatcher.d.ts.map
@@ -3,8 +3,65 @@
3
3
  * Looks up tool by name in the Registry, validates params, runs execute().
4
4
  */
5
5
  import { Value } from "@sinclair/typebox/value";
6
+ /**
7
+ * Attempt to repair common JSON errors from model output.
8
+ * Handles trailing commas, single quotes, unquoted keys.
9
+ * Returns original string if repair doesn't help.
10
+ */
11
+ function repairJson(raw) {
12
+ try {
13
+ JSON.parse(raw);
14
+ return raw;
15
+ }
16
+ catch { /* fall through to repair */ }
17
+ const repaired = raw
18
+ .replace(/,\s*}/g, "}")
19
+ .replace(/,\s*]/g, "]")
20
+ .replace(/([{,]\s*)(\w+)(\s*:)/g, '$1"$2"$3') // unquoted keys
21
+ .replace(/:\s*'([^']*)'/g, ': "$1"'); // single-quoted values
22
+ try {
23
+ JSON.parse(repaired);
24
+ return repaired;
25
+ }
26
+ catch {
27
+ return raw;
28
+ }
29
+ }
30
+ const TOOL_TIMEOUT_MS = 30_000;
31
+ const CIRCUIT_OPEN_MS = 300_000;
32
+ const CIRCUIT_THRESHOLD = 3;
33
+ // #40: Estimated output sizes per tool (chars) for pre-dispatch budget forecasting
34
+ const TOOL_OUTPUT_ESTIMATES = {
35
+ get_candles: 8_000, // 100 OHLCV + TA = ~8KB
36
+ analyze_ta: 2_000,
37
+ get_prices: 500,
38
+ get_top_movers: 800,
39
+ get_market_overview: 1_000,
40
+ get_news: 4_000,
41
+ get_fear_greed: 400,
42
+ get_funding_rates: 600,
43
+ get_btc_mempool: 400,
44
+ get_defi_tvl: 2_000,
45
+ get_solana_stats: 300,
46
+ list_models: 3_000,
47
+ list_tasks: 500,
48
+ read_task_context: 6_000,
49
+ cost_report: 400,
50
+ list_goals: 600,
51
+ model_summary: 400,
52
+ _default: 2_000,
53
+ };
54
+ /** #40: Estimate chars that will be added to context by dispatching these calls */
55
+ export function forecastContextGrowth(calls) {
56
+ return calls.reduce((sum, tc) => {
57
+ const est = TOOL_OUTPUT_ESTIMATES[tc.function.name] ?? TOOL_OUTPUT_ESTIMATES["_default"];
58
+ return sum + est;
59
+ }, 0);
60
+ }
6
61
  export class ToolDispatcher {
7
62
  registry;
63
+ failureCounts = new Map();
64
+ openCircuits = new Map(); // toolName → openUntil timestamp
8
65
  constructor(registry) {
9
66
  this.registry = registry;
10
67
  }
@@ -12,6 +69,52 @@ export class ToolDispatcher {
12
69
  return Promise.all(calls.map(tc => this.execute(tc)));
13
70
  }
14
71
  async execute(tc) {
72
+ const toolName = tc.function.name;
73
+ // #6: Circuit breaker — fast-fail if tool has been consistently broken
74
+ const openUntil = this.openCircuits.get(toolName) ?? 0;
75
+ if (Date.now() < openUntil) {
76
+ const remainMs = Math.ceil((openUntil - Date.now()) / 1000);
77
+ return {
78
+ tool_call_id: tc.id,
79
+ name: toolName,
80
+ content: `Tool "${toolName}" is temporarily unavailable (circuit open for ${remainMs}s after repeated failures). Use a different approach or try again later.`,
81
+ isError: true,
82
+ };
83
+ }
84
+ try {
85
+ const result = await this.executeWithTimeout(tc);
86
+ // Reset failure count on success
87
+ this.failureCounts.delete(toolName);
88
+ return result;
89
+ }
90
+ catch (e) {
91
+ const errMsg = e instanceof Error ? e.message : String(e);
92
+ const failures = (this.failureCounts.get(toolName) ?? 0) + 1;
93
+ this.failureCounts.set(toolName, failures);
94
+ if (failures >= CIRCUIT_THRESHOLD) {
95
+ this.openCircuits.set(toolName, Date.now() + CIRCUIT_OPEN_MS);
96
+ this.failureCounts.delete(toolName);
97
+ return {
98
+ tool_call_id: tc.id,
99
+ name: toolName,
100
+ content: `Tool "${toolName}" failed ${CIRCUIT_THRESHOLD} times in a row. Circuit opened for 5 minutes. Error: ${errMsg}`,
101
+ isError: true,
102
+ };
103
+ }
104
+ return {
105
+ tool_call_id: tc.id,
106
+ name: toolName,
107
+ content: `Tool error (failure ${failures}/${CIRCUIT_THRESHOLD}): ${errMsg}`,
108
+ isError: true,
109
+ };
110
+ }
111
+ }
112
+ async executeWithTimeout(tc) {
113
+ // Race tool execution against a hard timeout
114
+ const timeoutPromise = new Promise((_, reject) => setTimeout(() => reject(new Error(`Tool "${tc.function.name}" timed out after ${TOOL_TIMEOUT_MS / 1000}s`)), TOOL_TIMEOUT_MS));
115
+ return Promise.race([this.executeInner(tc), timeoutPromise]);
116
+ }
117
+ async executeInner(tc) {
15
118
  const tool = this.registry.getTool(tc.function.name);
16
119
  if (!tool) {
17
120
  return {
@@ -23,13 +126,14 @@ export class ToolDispatcher {
23
126
  }
24
127
  let params;
25
128
  try {
26
- params = JSON.parse(tc.function.arguments || "{}");
129
+ // #8: attempt JSON repair before hard-failing on malformed model output
130
+ params = JSON.parse(repairJson(tc.function.arguments || "{}"));
27
131
  }
28
132
  catch {
29
133
  return {
30
134
  tool_call_id: tc.id,
31
135
  name: tc.function.name,
32
- content: `Invalid JSON arguments: ${tc.function.arguments}`,
136
+ content: `Invalid JSON arguments (repair failed): ${tc.function.arguments}`,
33
137
  isError: true,
34
138
  };
35
139
  }