npm - @jellyos/agent - Versions diffs - 0.1.3 → 0.1.5 - Mend

@jellyos/agent 0.1.3 → 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (90) hide show

package/README.md +9 -9
package/README.npm.md +212 -0
package/bin/jellyos-mcp +26 -0
package/dist/api/ExtensionAPI.d.ts +6 -0
package/dist/api/Registry.js +3 -1
package/dist/cli.js +117 -42
package/dist/index.d.ts +24 -1
package/dist/index.js +19 -2
package/dist/mcp/entry.d.ts +2 -0
package/dist/mcp/entry.js +71 -0
package/dist/mcp/server.d.ts +31 -0
package/dist/mcp/server.js +128 -0
package/dist/models/CostTracker.d.ts +66 -0
package/dist/models/CostTracker.js +148 -0
package/dist/models/ModelRegistry.d.ts +157 -0
package/dist/models/ModelRegistry.js +496 -0
package/dist/models/index.d.ts +5 -0
package/dist/models/index.js +3 -0
package/dist/runner/AgentRunner.d.ts +23 -2
package/dist/runner/AgentRunner.js +264 -24
package/dist/runner/ModelClient.d.ts +26 -6
package/dist/runner/ModelClient.js +147 -28
package/dist/runner/SwarmRouter.d.ts +10 -7
package/dist/runner/SwarmRouter.js +85 -28
package/dist/runner/ToolDispatcher.d.ts +10 -0
package/dist/runner/ToolDispatcher.js +106 -2
package/dist/scheduler/AgentScheduler.d.ts +118 -0
package/dist/scheduler/AgentScheduler.js +253 -0
package/dist/session/ContextStore.d.ts +96 -0
package/dist/session/ContextStore.js +207 -0
package/dist/session/GoalManager.d.ts +101 -0
package/dist/session/GoalManager.js +167 -0
package/dist/session/MemoryStore.d.ts +48 -0
package/dist/session/MemoryStore.js +166 -0
package/dist/session/SessionManager.d.ts +45 -4
package/dist/session/SessionManager.js +151 -8
package/dist/telemetry/Tracer.d.ts +48 -0
package/dist/telemetry/Tracer.js +102 -0
package/dist/tests/ContextStore.test.d.ts +2 -0
package/dist/tests/ContextStore.test.js +74 -0
package/dist/tests/ModelRegistry.test.d.ts +2 -0
package/dist/tests/ModelRegistry.test.js +69 -0
package/dist/tests/SessionManager.test.d.ts +2 -0
package/dist/tests/SessionManager.test.js +108 -0
package/dist/tests/TechnicalAnalysis.test.d.ts +2 -0
package/dist/tests/TechnicalAnalysis.test.js +109 -0
package/dist/tools/MarketSentiment.d.ts +166 -0
package/dist/tools/MarketSentiment.js +209 -0
package/dist/tools/NewsSentiment.d.ts +67 -0
package/dist/tools/NewsSentiment.js +226 -0
package/dist/tools/PriceFeed.d.ts +105 -0
package/dist/tools/PriceFeed.js +282 -0
package/dist/tools/TechnicalAnalysis.d.ts +110 -0
package/dist/tools/TechnicalAnalysis.js +357 -0
package/dist/tools/index.d.ts +7 -0
package/dist/tools/index.js +4 -0
package/dist/tui/App.d.ts +7 -5
package/dist/tui/App.js +350 -65
package/dist/tui/REPL.d.ts +2 -1
package/dist/tui/REPL.js +11 -6
package/dist/tui/StatusBar.js +1 -1
package/package.json +9 -4
package/dist/api/ExtensionAPI.d.ts.map +0 -1
package/dist/api/ExtensionAPI.js.map +0 -1
package/dist/api/Registry.d.ts.map +0 -1
package/dist/api/Registry.js.map +0 -1
package/dist/cli.d.ts.map +0 -1
package/dist/cli.js.map +0 -1
package/dist/index.d.ts.map +0 -1
package/dist/index.js.map +0 -1
package/dist/loader.d.ts.map +0 -1
package/dist/loader.js.map +0 -1
package/dist/runner/AgentRunner.d.ts.map +0 -1
package/dist/runner/AgentRunner.js.map +0 -1
package/dist/runner/ModelClient.d.ts.map +0 -1
package/dist/runner/ModelClient.js.map +0 -1
package/dist/runner/SwarmRouter.d.ts.map +0 -1
package/dist/runner/SwarmRouter.js.map +0 -1
package/dist/runner/ToolDispatcher.d.ts.map +0 -1
package/dist/runner/ToolDispatcher.js.map +0 -1
package/dist/session/SessionManager.d.ts.map +0 -1
package/dist/session/SessionManager.js.map +0 -1
package/dist/tui/App.d.ts.map +0 -1
package/dist/tui/App.js.map +0 -1
package/dist/tui/REPL.d.ts.map +0 -1
package/dist/tui/REPL.js.map +0 -1
package/dist/tui/StatusBar.d.ts.map +0 -1
package/dist/tui/StatusBar.js.map +0 -1
package/dist/tui/theme.d.ts.map +0 -1
package/dist/tui/theme.js.map +0 -1

package/dist/runner/ModelClient.js CHANGED Viewed

@@ -5,7 +5,11 @@
  *   OpenRouter > Anthropic compat > OpenAI > local (ollama/lm-studio)
  *
  * Model rotation: resolveModelChain() returns up to 5 configs — the AgentRunner
- * walks the chain on 429 (rate limit) or 5xx errors, providing seamless fallback.
+ * walks the chain on 429 (rate limit) or 5xx errors, with exponential backoff
+ * (up to 2 retries per model) before falling through.
+ *
+ * When a ModelRegistry is available, chains are dynamically built from the
+ * tiered pool, with per-model performance tracking and cost estimation.
  *
  * All outbound, all local — no inbound ports, no server.
  */
@@ -13,6 +17,9 @@
 /**
  * Build the ordered model fallback chain.
  *
+ * If a ModelRegistry is provided, builds from the tiered pool dynamically.
+ * Falls back to static env-var parsing otherwise.
+ *
  * User-configurable pool: JELLY_MODEL_1 … JELLY_MODEL_5
  * If any JELLY_MODEL_N vars are set they take priority; up to 5 are used in
  * order. Unset slots are filled with provider-appropriate defaults.
@@ -22,7 +29,7 @@
  *   JELLY_MODEL_2=openai/gpt-4o
  *   JELLY_MODEL_3=google/gemini-2.5-pro
  */
-export function resolveModelChain() {
+export function resolveModelChain(modelReg) {
     const env = process.env;
     const tokens = parseInt(env.MAX_TOKENS ?? "8192");
     const temp = parseFloat(env.TEMPERATURE ?? "0.7");
@@ -33,6 +40,11 @@ export function resolveModelChain() {
         if (m?.trim())
             userModels.push(m.trim());
     }
+    // ── Use ModelRegistry dynamic pool if available ──────────────────────────
+    if (modelReg) {
+        return modelReg.buildModelChain(userModels);
+    }
+    // ── Static fallback (used when ModelRegistry cannot be initialised) ───────
     // ── OpenRouter — supports all providers via a single key ─────────────────
     if (env.OPENROUTER_API_KEY) {
         const base = "https://openrouter.ai/api/v1";
@@ -94,24 +106,30 @@ export function resolveModelChain() {
                 temperature: temp,
             }];
     }
-    throw new Error("No API key found. Set OPENROUTER_API_KEY, ANTHROPIC_API_KEY, or OPENAI_API_KEY in ~/.jellyos/.env");
+    throw new Error("No API key found. Set OPENROUTER_API_KEY, ANTHROPIC_API_KEY, or OPENAI_API_KEY in ~/.jelly/.env");
 }
 /** Convenience: returns just the primary (first) model config */
-export function resolveModelConfig() {
-    return resolveModelChain()[0];
+export function resolveModelConfig(modelReg) {
+    return resolveModelChain(modelReg)[0];
 }
 // ── ModelClient ───────────────────────────────────────────────────────────────
 export class ModelClient {
     cfg;
-    constructor(cfg) {
+    modelRegistry;
+    constructor(cfg, modelReg) {
         this.cfg = cfg;
+        this.modelRegistry = modelReg;
     }
     /**
      * Stream a chat completion. Yields ChatChunk objects.
-     * On HTTP error the generator yields a single { type: "error", status, error }
+     * Retries up to 2 times on 429 / 5xx with exponential backoff (1s, 2s).
+     * On persistent HTTP error the generator yields a single { type: "error", status, error }
      * chunk and returns — the caller (AgentRunner) decides whether to rotate.
+     * Also reports success/failure to the ModelRegistry for tiering and cooldown.
      */
-    async *stream(messages, tools) {
+    async *stream(messages, tools, abortSignal) {
+        const t0 = Date.now();
+        let hadError = false;
         const headers = {
             "Content-Type": "application/json",
             "Authorization": `Bearer ${this.cfg.apiKey}`,
@@ -121,34 +139,120 @@ export class ModelClient {
             headers["HTTP-Referer"] = this.cfg.siteUrl;
         if (this.cfg.siteName)
             headers["X-Title"] = this.cfg.siteName;
+        // #13: Detect thinking-capable models
+        const THINKING_MODELS = new Set([
+            "anthropic/claude-opus-4.7", "anthropic/claude-opus-4.7-fast",
+            "anthropic/claude-opus-4.6", "anthropic/claude-opus-4.6-fast",
+            "anthropic/claude-opus-4.5", "anthropic/claude-opus-4",
+            "openai/o3", "openai/o3-pro", "openai/o3-mini",
+            "openai/o4", "openai/o4-mini",
+        ]);
+        const isThinkingModel = THINKING_MODELS.has(this.cfg.model) || /thinking/i.test(this.cfg.model);
+        const useThinking = this.cfg.thinkingEnabled && isThinkingModel;
+        const isOSeries = /openai\/o[34]/i.test(this.cfg.model);
+        const isAnthropicModel = this.cfg.model.startsWith("anthropic/") ||
+            this.cfg.baseUrl.includes("anthropic.com");
+        // Build request body
         const body = {
             model: this.cfg.model,
-            messages,
             max_tokens: this.cfg.maxTokens,
-            temperature: this.cfg.temperature,
             stream: true,
         };
+        // #13: Temperature handling — o-series does not support temperature
+        if (!isOSeries) {
+            body.temperature = useThinking ? 1.0 : this.cfg.temperature; // thinking requires 1.0
+        }
+        // #15: Prompt caching for Anthropic — extract system message, add cache_control
+        if (isAnthropicModel) {
+            const sysMsg = messages.find(m => m.role === "system");
+            const rest = messages.filter(m => m.role !== "system");
+            if (sysMsg && typeof sysMsg.content === "string" && sysMsg.content.length > 512) {
+                // Cache the system prompt (saves up to 90% on repeated calls)
+                body.system = [{
+                        type: "text",
+                        text: sysMsg.content,
+                        cache_control: { type: "ephemeral" },
+                    }];
+                body.messages = rest;
+            }
+            else {
+                body.messages = messages;
+            }
+            // #13: Extended thinking for Claude Opus 4.x
+            if (useThinking) {
+                body.thinking = { type: "enabled", budget_tokens: this.cfg.thinkingBudget ?? 8000 };
+                headers["anthropic-beta"] = "thinking-v1";
+            }
+        }
+        else {
+            body.messages = messages;
+        }
+        // #13: o-series reasoning effort
+        if (isOSeries && useThinking) {
+            body.reasoning_effort = "high";
+        }
         if (tools && tools.length > 0) {
-            body.tools = tools;
+            // strict: true enforces valid JSON on GPT-4o+ and GPT-5.x
+            // Skip strict mode for o-series (not supported) and thinking models
+            body.tools = tools.map(t => ({
+                ...t,
+                function: isOSeries ? t.function : { ...t.function, strict: true },
+            }));
             body.tool_choice = "auto";
+            // Disable parallel tool calls — prevents race conditions in tool_call_id map
+            body.parallel_tool_calls = false;
         }
+        const MAX_RETRIES = 2;
+        const RETRY_STATUSES = new Set([429, 500, 502, 503, 504]);
         let res;
-        try {
-            res = await fetch(`${this.cfg.baseUrl}/chat/completions`, {
-                method: "POST",
-                headers,
-                body: JSON.stringify(body),
-                signal: AbortSignal.timeout(120_000),
-            });
-        }
-        catch (e) {
-            yield { type: "error", error: `Network error: ${e.message}`, status: 0 };
-            return;
-        }
-        if (!res.ok) {
-            const err = await res.text().catch(() => res.statusText);
-            yield { type: "error", error: `Model API ${res.status}: ${err}`, status: res.status };
-            return;
+        let lastError = "";
+        for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) {
+            try {
+                // #25: Combine user abort signal with 120s timeout
+                const timeoutSignal = AbortSignal.timeout(120_000);
+                const combinedSignal = abortSignal
+                    ? AbortSignal.any([abortSignal, timeoutSignal])
+                    : timeoutSignal;
+                res = await fetch(`${this.cfg.baseUrl}/chat/completions`, {
+                    method: "POST",
+                    headers,
+                    body: JSON.stringify(body),
+                    signal: combinedSignal,
+                });
+            }
+            catch (e) {
+                if (e?.name === "AbortError") {
+                    yield { type: "done", finish_reason: "aborted" };
+                    return;
+                }
+                hadError = true;
+                lastError = `Network error: ${e.message}`;
+                if (attempt < MAX_RETRIES) {
+                    await new Promise(r => setTimeout(r, 1000 * (attempt + 1)));
+                    continue;
+                }
+                this.modelRegistry?.recordFailure(this.cfg.model);
+                yield { type: "error", error: lastError, status: 0 };
+                return;
+            }
+            if (!res.ok && RETRY_STATUSES.has(res.status) && attempt < MAX_RETRIES) {
+                hadError = true;
+                lastError = await res.text().catch(() => res.statusText);
+                await new Promise(r => setTimeout(r, 1000 * (attempt + 1)));
+                continue;
+            }
+            if (!res.ok) {
+                hadError = true;
+                const err = await res.text().catch(() => res.statusText);
+                // 404 → model removed, mark permanently deprecated
+                if (res.status === 404)
+                    this.modelRegistry?.markDeprecated(this.cfg.model);
+                else
+                    this.modelRegistry?.recordFailure(this.cfg.model);
+                yield { type: "error", error: `Model API ${res.status}: ${err}`, status: res.status };
+                return;
+            }
+            break; // success — got an ok response
         }
         // Accumulate tool calls across chunks (they arrive fragmented)
         const toolCallMap = new Map();
@@ -195,6 +299,17 @@ export class ModelClient {
                     }
                 }
                 const finish = chunk.choices?.[0]?.finish_reason;
+                // Capture usage from final chunk (OpenAI/OpenRouter send this on finish)
+                if (chunk.usage) {
+                    yield {
+                        type: "done",
+                        finish_reason: finish ?? "usage",
+                        usage: {
+                            prompt_tokens: chunk.usage.prompt_tokens ?? 0,
+                            completion_tokens: chunk.usage.completion_tokens ?? 0,
+                        },
+                    };
+                }
                 if (finish === "tool_calls" || finish === "stop") {
                     if (toolCallMap.size > 0) {
                         const tool_calls = [...toolCallMap.values()].map(tc => ({
@@ -205,7 +320,8 @@ export class ModelClient {
                         yield { type: "tool_call", tool_calls };
                         toolCallMap.clear();
                     }
-                    yield { type: "done", finish_reason: finish };
+                    if (!chunk.usage)
+                        yield { type: "done", finish_reason: finish };
                 }
             }
         }
@@ -219,6 +335,9 @@ export class ModelClient {
             yield { type: "tool_call", tool_calls };
         }
         yield { type: "done", finish_reason: "end" };
+        // Report success to model registry
+        if (!hadError)
+            this.modelRegistry?.recordSuccess(this.cfg.model, Date.now() - t0);
     }
 }
 //# sourceMappingURL=ModelClient.js.map

package/dist/runner/SwarmRouter.d.ts CHANGED Viewed

@@ -8,6 +8,8 @@
  * Sub-task execution is sequential inside each worker slot to avoid hammering
  * the provider; concurrency is capped at Math.min(maxAgents, os.cpus().length).
  */
+import type { ModelRegistry } from "../models/ModelRegistry.js";
+import type { ContextStore } from "../session/ContextStore.js";
 export interface SwarmConfig {
     /** Maximum parallel workers (hard cap: 5). Default: min(cpuCount, 3). */
     maxAgents?: number;
@@ -19,21 +21,22 @@ export interface SubTaskResult {
     result: string;
     model: string;
     ms: number;
+    error?: string;
 }
 /**
  * Returns a score 0–100 reflecting prompt complexity.
  * Tuned so "check ETH price" ≈ 10, "analyze ETH and BTC then predict" ≈ 55.
  */
 export declare function scoreComplexity(prompt: string): number;
-/**
- * Splits a complex prompt into 2–5 focused sub-task strings.
- * Uses simple heuristics so no extra model call is needed.
- */
-export declare function decompose(prompt: string, maxTasks: number): string[];
+/** Original heuristic decomposer — used as fallback when LLM planner fails */
+export declare function decomposeHeuristic(prompt: string, maxTasks: number): string[];
+/** Exported for tests — heuristic only, no model call */
+export declare const decompose: typeof decomposeHeuristic;
 export declare class SwarmRouter {
     private maxAgents;
     private complexityThreshold;
-    constructor(cfg?: SwarmConfig);
+    private modelRegistry?;
+    constructor(cfg?: SwarmConfig, modelReg?: ModelRegistry);
     /** True when the prompt is complex enough to warrant swarm execution. */
     shouldSwarm(prompt: string): boolean;
     /**
@@ -50,7 +53,7 @@ export declare class SwarmRouter {
      * @param systemPrompt  - Current system prompt (passed to each sub-agent + reviewer)
      * @param onProgress    - Called as each sub-task completes
      */
-    run(prompt: string, systemPrompt: string, onProgress: (result: SubTaskResult, remaining: number) => void): Promise<{
+    run(prompt: string, systemPrompt: string, onProgress: (result: SubTaskResult, remaining: number) => void, contextStore?: ContextStore): Promise<{
         synthesis: string;
         subResults: SubTaskResult[];
     }>;

package/dist/runner/SwarmRouter.js CHANGED Viewed

@@ -31,44 +31,83 @@ export function scoreComplexity(prompt) {
         questions * 5 +
         Math.floor(wordCount / 8));
 }
-// ── Task decomposition ───────────────────────────────────────────────────────
+// ── Task decomposition (# 29: LLM planner with heuristic fallback) ───────────
 /**
- * Splits a complex prompt into 2–5 focused sub-task strings.
- * Uses simple heuristics so no extra model call is needed.
+ * LLM-based task planner. Uses a cheap worker model to decompose the prompt
+ * into focused sub-tasks as a JSON array. Falls back to heuristics on failure.
  */
-export function decompose(prompt, maxTasks) {
+async function planSubtasks(prompt, maxTasks, modelReg) {
+    const cap = Math.max(2, Math.min(maxTasks, 5));
+    // Attempt LLM decomposition with a cheap/fast model
+    try {
+        const chain = resolveModelChain(modelReg);
+        // Prefer a worker-tier model for planning (fast + cheap)
+        const plannerCfg = chain.find(c => modelReg?.getTier(c.model) === "worker") ?? chain[chain.length - 1] ?? chain[0];
+        const client = new ModelClient({ ...plannerCfg, temperature: 0.2 }, modelReg);
+        const plannerPrompt = `Split the following request into exactly ${cap} focused, non-overlapping sub-tasks.\n` +
+            `Each sub-task must be independently answerable using data tools.\n` +
+            `Output ONLY a valid JSON array of strings. No explanation, no markdown.\n\n` +
+            `Request: ${prompt}`;
+        let output = "";
+        for await (const chunk of client.stream([
+            { role: "system", content: "You output only valid JSON arrays of strings. No markdown, no explanation." },
+            { role: "user", content: plannerPrompt },
+        ], [])) {
+            if (chunk.type === "delta" && chunk.text)
+                output += chunk.text;
+            if (chunk.type === "error")
+                throw new Error(chunk.error);
+        }
+        // Extract JSON array from output (model might wrap in markdown)
+        const jsonMatch = output.match(/\[\s*"[\s\S]*?"\s*(?:,\s*"[\s\S]*?"\s*)*\]/);
+        if (jsonMatch) {
+            const tasks = JSON.parse(jsonMatch[0]);
+            if (Array.isArray(tasks) && tasks.every((t) => typeof t === "string") && tasks.length >= 2) {
+                return tasks.slice(0, cap);
+            }
+        }
+    }
+    catch {
+        // Fall through to heuristic decomposition
+    }
+    return decomposeHeuristic(prompt, cap);
+}
+/** Original heuristic decomposer — used as fallback when LLM planner fails */
+export function decomposeHeuristic(prompt, maxTasks) {
     const cap = Math.max(2, Math.min(maxTasks, 5));
-    // Split on explicit conjunctions / punctuation
     const parts = prompt
         .split(/,\s*| and | also | then | additionally | plus /i)
         .map(s => s.trim())
         .filter(s => s.length > 4);
-    if (parts.length >= 2) {
+    if (parts.length >= 2)
         return parts.slice(0, cap);
-    }
-    // Fallback: split action verbs into separate sub-questions
     const verbMatches = [...prompt.matchAll(/\b(analyze|compare|predict|scan|check|estimate|evaluate)\b[^,.?]*/gi)];
-    if (verbMatches.length >= 2) {
+    if (verbMatches.length >= 2)
         return verbMatches.slice(0, cap).map(m => m[0].trim());
-    }
-    // Cannot decompose meaningfully → return as-is (single task)
     return [prompt];
 }
-// ── Reviewer synthesis ───────────────────────────────────────────────────────
-async function reviewerSynthesize(originalPrompt, results, systemPrompt) {
-    const chain = resolveModelChain();
+/** Exported for tests — heuristic only, no model call */
+export const decompose = decomposeHeuristic;
+// ── Reviewer synthesis (#39: compact refs via ContextStore) ─────────────────
+async function reviewerSynthesize(originalPrompt, allResults, systemPrompt, modelReg, contextRef) {
+    const chain = resolveModelChain(modelReg);
     const cfg = chain[0];
-    const client = new ModelClient(cfg);
-    const context = results
-        .map((r, i) => `### Sub-task ${i + 1}: ${r.task}\n${r.result}`)
-        .join("\n\n");
+    const client = new ModelClient(cfg, modelReg);
+    const results = allResults.filter(r => !r.error);
+    // #39: If ContextStore holds the full results, send compact summaries + reference
+    const context = contextRef
+        ? results.map((r, i) => `Sub-task ${i + 1} (${r.task.slice(0, 50)}): ${r.result.slice(0, 300)}...`).join("\n") + `\n\n${contextRef}`
+        : results
+            .map((r, i) => `### Sub-task ${i + 1}: ${r.task}\n${r.result}`)
+            .join("\n\n");
     const messages = [
         { role: "system", content: systemPrompt },
         {
             role: "user",
-            content: `You are a synthesis reviewer. The following sub-tasks were run in response to the user's original request.\n\n` +
-                `**Original request:** ${originalPrompt}\n\n${context}\n\n` +
-                `Write a concise, unified answer that directly addresses the original request using all the above findings.`,
+            content: `You are a synthesis reviewer. Sub-tasks were executed for the following request.\n\n` +
+                `**Original request:** ${originalPrompt}\n\n` +
+                `**Sub-task results:**\n${context}\n\n` +
+                `Write a concise, unified answer that directly addresses the original request.`,
         },
     ];
     let out = "";
@@ -82,10 +121,12 @@ async function reviewerSynthesize(originalPrompt, results, systemPrompt) {
 export class SwarmRouter {
     maxAgents;
     complexityThreshold;
-    constructor(cfg = {}) {
+    modelRegistry;
+    constructor(cfg = {}, modelReg) {
         const cpus = os.cpus().length;
         this.maxAgents = Math.min(cfg.maxAgents ?? Math.min(cpus, 3), 5);
         this.complexityThreshold = cfg.complexityThreshold ?? 40;
+        this.modelRegistry = modelReg;
     }
     /** True when the prompt is complex enough to warrant swarm execution. */
     shouldSwarm(prompt) {
@@ -105,10 +146,13 @@ export class SwarmRouter {
      * @param systemPrompt  - Current system prompt (passed to each sub-agent + reviewer)
      * @param onProgress    - Called as each sub-task completes
      */
-    async run(prompt, systemPrompt, onProgress) {
-        const tasks = decompose(prompt, this.maxAgents);
-        const chain = resolveModelChain();
+    async run(prompt, systemPrompt, onProgress, contextStore) {
+        // #29: Use LLM planner for task decomposition (falls back to heuristic)
+        const tasks = await planSubtasks(prompt, this.maxAgents, this.modelRegistry);
+        const chain = resolveModelChain(this.modelRegistry);
         const subResults = [];
+        // #39: Open a task context folder to offload sub-results (saves context window)
+        const taskCtx = contextStore?.openTask(`Swarm: ${prompt.slice(0, 60)}`);
         // Split tasks into groups of 3 (the required "groups-of-3" planner)
         const GROUP_SIZE = 3;
         const batches = [];
@@ -118,24 +162,32 @@ export class SwarmRouter {
         let modelIdx = 1; // reserve chain[0] for reviewer
         const runOne = async (task, mIdx, remaining) => {
             const cfg = chain[mIdx % chain.length] ?? chain[0];
-            const client = new ModelClient(cfg);
+            const client = new ModelClient(cfg, this.modelRegistry);
             const msgs = [
                 { role: "system", content: systemPrompt },
                 { role: "user", content: task },
             ];
             const t0 = Date.now();
             let out = "";
+            let error;
             for await (const chunk of client.stream(msgs, [])) {
                 if (chunk.type === "delta" && chunk.text)
                     out += chunk.text;
+                if (chunk.type === "error")
+                    error = chunk.error ?? "Sub-task model error";
             }
             const r = {
                 task,
-                result: out || "(no output)",
+                result: out || (error ? `(error: ${error})` : "(no output)"),
                 model: cfg.model,
                 ms: Date.now() - t0,
+                error,
             };
             subResults.push(r);
+            // #39: Write sub-result to context file instead of keeping raw in memory
+            if (taskCtx && contextStore) {
+                contextStore.appendFinding(taskCtx.taskId, `Sub-task: ${task.slice(0, 50)}`, r.result);
+            }
             onProgress(r, remaining);
         };
         // Execute batches sequentially; within each batch run up to 3 in parallel
@@ -146,7 +198,12 @@ export class SwarmRouter {
                 return runOne(task, modelIdx++, remaining);
             }));
         }
-        const synthesis = await reviewerSynthesize(prompt, subResults, systemPrompt);
+        // #39: Pass context reference to reviewer (compact path vs raw dump)
+        const contextRef = taskCtx ? contextStore?.getReference(taskCtx.taskId) : undefined;
+        const synthesis = await reviewerSynthesize(prompt, subResults, systemPrompt, this.modelRegistry, contextRef);
+        // Close the context folder (auto-deletes in 5s)
+        if (taskCtx)
+            contextStore?.closeTask(taskCtx.taskId);
         return { synthesis, subResults };
     }
 }

package/dist/runner/ToolDispatcher.d.ts CHANGED Viewed

@@ -10,10 +10,20 @@ export interface ToolResult {
     content: string;
     isError: boolean;
 }
+/** #40: Estimate chars that will be added to context by dispatching these calls */
+export declare function forecastContextGrowth(calls: {
+    function: {
+        name: string;
+    };
+}[]): number;
 export declare class ToolDispatcher {
     private registry;
+    private failureCounts;
+    private openCircuits;
     constructor(registry: Registry);
     dispatch(calls: ToolCall[]): Promise<ToolResult[]>;
     private execute;
+    private executeWithTimeout;
+    private executeInner;
 }
 //# sourceMappingURL=ToolDispatcher.d.ts.map

package/dist/runner/ToolDispatcher.js CHANGED Viewed

@@ -3,8 +3,65 @@
  * Looks up tool by name in the Registry, validates params, runs execute().
  */
 import { Value } from "@sinclair/typebox/value";
+/**
+ * Attempt to repair common JSON errors from model output.
+ * Handles trailing commas, single quotes, unquoted keys.
+ * Returns original string if repair doesn't help.
+ */
+function repairJson(raw) {
+    try {
+        JSON.parse(raw);
+        return raw;
+    }
+    catch { /* fall through to repair */ }
+    const repaired = raw
+        .replace(/,\s*}/g, "}")
+        .replace(/,\s*]/g, "]")
+        .replace(/([{,]\s*)(\w+)(\s*:)/g, '$1"$2"$3') // unquoted keys
+        .replace(/:\s*'([^']*)'/g, ': "$1"'); // single-quoted values
+    try {
+        JSON.parse(repaired);
+        return repaired;
+    }
+    catch {
+        return raw;
+    }
+}
+const TOOL_TIMEOUT_MS = 30_000;
+const CIRCUIT_OPEN_MS = 300_000;
+const CIRCUIT_THRESHOLD = 3;
+// #40: Estimated output sizes per tool (chars) for pre-dispatch budget forecasting
+const TOOL_OUTPUT_ESTIMATES = {
+    get_candles: 8_000, // 100 OHLCV + TA = ~8KB
+    analyze_ta: 2_000,
+    get_prices: 500,
+    get_top_movers: 800,
+    get_market_overview: 1_000,
+    get_news: 4_000,
+    get_fear_greed: 400,
+    get_funding_rates: 600,
+    get_btc_mempool: 400,
+    get_defi_tvl: 2_000,
+    get_solana_stats: 300,
+    list_models: 3_000,
+    list_tasks: 500,
+    read_task_context: 6_000,
+    cost_report: 400,
+    list_goals: 600,
+    model_summary: 400,
+    _default: 2_000,
+};
+/** #40: Estimate chars that will be added to context by dispatching these calls */
+export function forecastContextGrowth(calls) {
+    return calls.reduce((sum, tc) => {
+        const est = TOOL_OUTPUT_ESTIMATES[tc.function.name] ?? TOOL_OUTPUT_ESTIMATES["_default"];
+        return sum + est;
+    }, 0);
+}
 export class ToolDispatcher {
     registry;
+    failureCounts = new Map();
+    openCircuits = new Map(); // toolName → openUntil timestamp
     constructor(registry) {
         this.registry = registry;
     }
@@ -12,6 +69,52 @@ export class ToolDispatcher {
         return Promise.all(calls.map(tc => this.execute(tc)));
     }
     async execute(tc) {
+        const toolName = tc.function.name;
+        // #6: Circuit breaker — fast-fail if tool has been consistently broken
+        const openUntil = this.openCircuits.get(toolName) ?? 0;
+        if (Date.now() < openUntil) {
+            const remainMs = Math.ceil((openUntil - Date.now()) / 1000);
+            return {
+                tool_call_id: tc.id,
+                name: toolName,
+                content: `Tool "${toolName}" is temporarily unavailable (circuit open for ${remainMs}s after repeated failures). Use a different approach or try again later.`,
+                isError: true,
+            };
+        }
+        try {
+            const result = await this.executeWithTimeout(tc);
+            // Reset failure count on success
+            this.failureCounts.delete(toolName);
+            return result;
+        }
+        catch (e) {
+            const errMsg = e instanceof Error ? e.message : String(e);
+            const failures = (this.failureCounts.get(toolName) ?? 0) + 1;
+            this.failureCounts.set(toolName, failures);
+            if (failures >= CIRCUIT_THRESHOLD) {
+                this.openCircuits.set(toolName, Date.now() + CIRCUIT_OPEN_MS);
+                this.failureCounts.delete(toolName);
+                return {
+                    tool_call_id: tc.id,
+                    name: toolName,
+                    content: `Tool "${toolName}" failed ${CIRCUIT_THRESHOLD} times in a row. Circuit opened for 5 minutes. Error: ${errMsg}`,
+                    isError: true,
+                };
+            }
+            return {
+                tool_call_id: tc.id,
+                name: toolName,
+                content: `Tool error (failure ${failures}/${CIRCUIT_THRESHOLD}): ${errMsg}`,
+                isError: true,
+            };
+        }
+    }
+    async executeWithTimeout(tc) {
+        // Race tool execution against a hard timeout
+        const timeoutPromise = new Promise((_, reject) => setTimeout(() => reject(new Error(`Tool "${tc.function.name}" timed out after ${TOOL_TIMEOUT_MS / 1000}s`)), TOOL_TIMEOUT_MS));
+        return Promise.race([this.executeInner(tc), timeoutPromise]);
+    }
+    async executeInner(tc) {
         const tool = this.registry.getTool(tc.function.name);
         if (!tool) {
             return {
@@ -23,13 +126,14 @@ export class ToolDispatcher {
         }
         let params;
         try {
-            params = JSON.parse(tc.function.arguments || "{}");
+            // #8: attempt JSON repair before hard-failing on malformed model output
+            params = JSON.parse(repairJson(tc.function.arguments || "{}"));
         }
         catch {
             return {
                 tool_call_id: tc.id,
                 name: tc.function.name,
-                content: `Invalid JSON arguments: ${tc.function.arguments}`,
+                content: `Invalid JSON arguments (repair failed): ${tc.function.arguments}`,
                 isError: true,
             };
         }