npm - @blockrun/franklin - Versions diffs - 3.15.10 → 3.15.12 - Mend

@blockrun/franklin 3.15.10 → 3.15.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

package/README.md +2 -0
package/assets/franklin-vscode-banner.png +0 -0
package/dist/agent/loop.js +19 -39
package/dist/agent/tool-guard.js +16 -2
package/dist/logger.d.ts +10 -0
package/dist/logger.js +74 -0
package/dist/router/index.d.ts +6 -0
package/dist/router/index.js +36 -1
package/dist/stats/audit.d.ts +6 -0
package/dist/stats/audit.js +40 -0
package/dist/stats/insights.d.ts +19 -0
package/dist/stats/insights.js +23 -0
package/dist/tools/index.js +6 -0
package/dist/tools/modal.d.ts +66 -0
package/dist/tools/modal.js +639 -0
package/dist/wallet/reservation.d.ts +51 -0
package/dist/wallet/reservation.js +105 -0
package/package.json +1 -1

package/README.md CHANGED Viewed

@@ -71,6 +71,8 @@ That's it. Zero signup, zero credit card, zero phone verification. Send **$5 of
 ### Prefer a GUI? Try Franklin for VS Code
+[![Franklin for VS Code — Beta is here](assets/franklin-vscode-banner.png)](https://marketplace.visualstudio.com/items?itemName=blockrun.franklin-vscode)
 The same agent ships as a [VS Code extension](https://marketplace.visualstudio.com/items?itemName=blockrun.franklin-vscode) — chat panel, model picker, wallet balance, image / video generation, inline diff cards — all driven by the wallet you already funded for the CLI.
 ```

package/assets/franklin-vscode-banner.png ADDED Viewed

Binary file

package/dist/agent/loop.js CHANGED Viewed

@@ -20,10 +20,11 @@ import { createActivateToolCapability } from '../tools/activate.js';
 import { recordUsage } from '../stats/tracker.js';
 import { recordSessionUsage } from '../stats/session-tracker.js';
 import { appendAudit, extractLastUserPrompt } from '../stats/audit.js';
+import { logger, setDebugMode } from '../logger.js';
 import { estimateCost, OPUS_PRICING } from '../pricing.js';
 import { maybeMidSessionExtract } from '../learnings/extractor.js';
 import { extractMentions, buildEntityContext, loadEntities } from '../brain/store.js';
-import { routeRequestAsync, resolveTierToModel, parseRoutingProfile, getFallbackChain } from '../router/index.js';
+import { routeRequestAsync, resolveTierToModel, parseRoutingProfile, getFallbackChain, pickFreeFallback } from '../router/index.js';
 import { recordOutcome } from '../router/local-elo.js';
 import { shouldPlan, getPlanningPrompt, getExecutorModel, isExecutorStuck, toolCallSignature } from './planner.js';
 import { shouldVerify, runVerification } from './verification.js';
@@ -325,6 +326,9 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
     // fool Edit/Write into skipping the read-before-edit check or serve cached
     // webfetch content fetched under the previous session's intent.
     resetToolSessionState();
+    // Wire stderr-mirroring of log lines to the same flag the agent already
+    // uses to gate verbose console output. File writes happen regardless.
+    setDebugMode(!!config.debug);
     const client = new ModelClient({
         apiUrl: config.apiUrl,
         chain: config.chain,
@@ -725,16 +729,12 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
                             kind: 'text_delta',
                             text: `\n*🗜 Auto-compacted: ~${(beforeTokens / 1000).toFixed(0)}K → ~${(afterTokens / 1000).toFixed(0)}K tokens (saved ${pct}%)*\n\n`,
                         });
-                        if (config.debug) {
-                            console.error(`[franklin] History compacted: ~${afterTokens} tokens`);
-                        }
+                        logger.info(`[franklin] History compacted: ~${afterTokens} tokens`);
                     }
                 }
                 catch (compactErr) {
                     compactFailures++;
-                    if (config.debug) {
-                        console.error(`[franklin] Compaction failed (${compactFailures}/3): ${compactErr.message}`);
-                    }
+                    logger.warn(`[franklin] Compaction failed (${compactFailures}/3): ${compactErr.message}`);
                 }
             }
             // Inject ultrathink instruction when mode is active
@@ -939,9 +939,7 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
                         const oldModel = config.model;
                         config.model = nextModel;
                         config.onModelChange?.(nextModel, 'system');
-                        if (config.debug) {
-                            console.error(`[franklin] ${oldModel} returned empty — switching to ${nextModel}`);
-                        }
+                        logger.warn(`[franklin] ${oldModel} returned empty — switching to ${nextModel}`);
                         onEvent({ kind: 'text_delta', text: `\n*${oldModel} returned empty — switching to ${nextModel}*\n` });
                         continue;
                     }
@@ -973,9 +971,7 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
                 // ── Media size error recovery (strip images/PDFs + retry) ──
                 if (isMediaSizeError(errMsg) && recoveryAttempts < MAX_RECOVERY_ATTEMPTS) {
                     recoveryAttempts++;
-                    if (config.debug) {
-                        console.error(`[franklin] Media too large — stripping and retrying (attempt ${recoveryAttempts})`);
-                    }
+                    logger.warn(`[franklin] Media too large — stripping and retrying (attempt ${recoveryAttempts})`);
                     const { history: stripped, stripped: didStrip } = stripMediaFromHistory(history);
                     if (didStrip) {
                         replaceHistory(history, stripped);
@@ -989,9 +985,7 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
                 // the prompt is too long, so we must compact regardless of our threshold estimate.
                 if (classified.category === 'context_limit' && recoveryAttempts < MAX_RECOVERY_ATTEMPTS) {
                     recoveryAttempts++;
-                    if (config.debug) {
-                        console.error(`[franklin] Prompt too long — force compacting (attempt ${recoveryAttempts})`);
-                    }
+                    logger.warn(`[franklin] Prompt too long — force compacting (attempt ${recoveryAttempts})`);
                     onEvent({ kind: 'text_delta', text: '\n*Context limit hit — compacting conversation...*\n' });
                     const { history: compactedAgain } = await forceCompact(history, config.model, client, config.debug);
                     replaceHistory(history, compactedAgain);
@@ -1017,9 +1011,7 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
                             const continuationPrompt = buildContinuationPrompt();
                             history.push(continuationPrompt);
                             persistSessionMessage(continuationPrompt);
-                            if (config.debug) {
-                                console.error(`[franklin] Stream timeout on ${resolvedModel} — auto-continuing with chunked-task prompt`);
-                            }
+                            logger.warn(`[franklin] Stream timeout on ${resolvedModel} — auto-continuing with chunked-task prompt`);
                             onEvent({
                                 kind: 'text_delta',
                                 text: '\n*Task too big for one streaming turn — auto-continuing with a smaller chunk...*\n',
@@ -1031,10 +1023,8 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
                         const costText = retryDecision.estimatedReplayCostUsd > 0
                             ? ` and at least $${retryDecision.estimatedReplayCostUsd.toFixed(4)} in input charges`
                             : '';
-                        if (config.debug) {
-                            console.error(`[franklin] Timeout retry skipped for ${resolvedModel}: ` +
-                                `~${tokenText} input tokens, replayCost=$${retryDecision.estimatedReplayCostUsd.toFixed(4)}`);
-                        }
+                        logger.warn(`[franklin] Timeout retry skipped for ${resolvedModel}: ` +
+                            `~${tokenText} input tokens, replayCost=$${retryDecision.estimatedReplayCostUsd.toFixed(4)}`);
                         onEvent({
                             kind: 'turn_done',
                             reason: 'error',
@@ -1079,9 +1069,7 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
                     }
                     recoveryAttempts++;
                     const backoffMs = getBackoffDelay(recoveryAttempts);
-                    if (config.debug) {
-                        console.error(`[franklin] ${classified.label} error — retrying in ${(backoffMs / 1000).toFixed(1)}s (attempt ${recoveryAttempts}/${effectiveMaxRetries}): ${errMsg.slice(0, 100)}`);
-                    }
+                    logger.warn(`[franklin] ${classified.label} error — retrying in ${(backoffMs / 1000).toFixed(1)}s (attempt ${recoveryAttempts}/${effectiveMaxRetries}): ${errMsg.slice(0, 100)}`);
                     // Surface the actual error + model so the user can see which model
                     // is failing and what the upstream said. Old "Retrying after Server
                     // error" was uninformative — users couldn't tell whether to wait,
@@ -1110,8 +1098,7 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
                     if (lastRoutedCategory) {
                         recordOutcome(lastRoutedCategory, config.model, 'payment');
                     }
-                    const FREE_MODELS = ['nvidia/qwen3-coder-480b', 'nvidia/llama-4-maverick', 'nvidia/glm-4.7'];
-                    const nextFree = FREE_MODELS.find(m => !turnFailedModels.has(m));
+                    const nextFree = pickFreeFallback(lastRoutedCategory, turnFailedModels);
                     if (nextFree) {
                         const oldModel = config.model;
                         config.model = nextFree;
@@ -1132,8 +1119,7 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
                     if (lastRoutedCategory) {
                         recordOutcome(lastRoutedCategory, config.model, 'rate_limit');
                     }
-                    const FREE_MODELS = ['nvidia/qwen3-coder-480b', 'nvidia/llama-4-maverick', 'nvidia/glm-4.7'];
-                    const nextFree = FREE_MODELS.find(m => !turnFailedModels.has(m));
+                    const nextFree = pickFreeFallback(lastRoutedCategory, turnFailedModels);
                     if (nextFree) {
                         const oldModel = config.model;
                         config.model = nextFree;
@@ -1249,9 +1235,7 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
                 if (maxTokensOverride === undefined) {
                     // First hit: escalate to 64K
                     maxTokensOverride = ESCALATED_MAX_TOKENS;
-                    if (config.debug) {
-                        console.error(`[franklin] Max tokens hit — escalating to ${maxTokensOverride}`);
-                    }
+                    logger.warn(`[franklin] Max tokens hit — escalating to ${maxTokensOverride}`);
                 }
                 // Append what we got + a continuation prompt with last-line anchor
                 const partialAssistant = { role: 'assistant', content: responseParts };
@@ -1293,9 +1277,7 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
             // the existing recovery flow handle it.
             const gatewayErr = looksLikeGatewayErrorAsText(responseParts);
             if (gatewayErr.match) {
-                if (config.debug) {
-                    console.error(`[franklin] Gateway returned an error text in lieu of an answer (${resolvedModel}): ${gatewayErr.message}`);
-                }
+                logger.error(`[franklin] Gateway returned an error text in lieu of an answer (${resolvedModel}): ${gatewayErr.message}`);
                 throw new Error(gatewayErr.message);
             }
             // Reset recovery counter on successful completion
@@ -1572,9 +1554,7 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
             }
             // Hard stop: if cap exceeded, force end this agent loop iteration
             if (turnToolCalls >= MAX_TOOL_CALLS_PER_TURN) {
-                if (config.debug) {
-                    console.error(`[franklin] Tool call cap hit: ${turnToolCalls} calls this turn`);
-                }
+                logger.warn(`[franklin] Tool call cap hit: ${turnToolCalls} calls this turn`);
                 // Don't break — let the model respond one more time to summarize,
                 // but inject the stop signal above so it knows to finish up.
             }

package/dist/agent/tool-guard.js CHANGED Viewed

@@ -188,9 +188,23 @@ export class SessionToolGuard {
         }
     }
     async beforeExecute(invocation, scope) {
-        // Hard-block tools that have failed too many times this session
+        // Hard-block tools that have failed too many times this session.
+        // Modal lifecycle tools are exempt: orphan sandboxes keep billing
+        // GPU time, and ModalTerminate is the only way to recover from
+        // agent-side. Auto-disabling it after 3 transient errors would
+        // strand a $0.40/hr H100 until the session ends. Same logic for
+        // media-gen tools: failures are usually transient (gateway hiccup,
+        // prompt rejection) and the user often wants to retry.
+        const FAILURE_EXEMPT = new Set([
+            'ImageGen',
+            'VideoGen',
+            'ModalCreate',
+            'ModalExec',
+            'ModalStatus',
+            'ModalTerminate',
+        ]);
         const errorCount = this.toolErrorCounts.get(invocation.name) ?? 0;
-        if (errorCount >= 3) {
+        if (errorCount >= 3 && !FAILURE_EXEMPT.has(invocation.name)) {
             return {
                 output: `${invocation.name} has failed ${errorCount} times this session and is now disabled. ` +
                     'Tell the user what went wrong and suggest alternatives.',

package/dist/logger.d.ts ADDED Viewed

@@ -0,0 +1,10 @@
+export type LogLevel = 'debug' | 'info' | 'warn' | 'error';
+export declare function setDebugMode(enabled: boolean): void;
+export declare function isDebugMode(): boolean;
+export declare function getLogFilePath(): string;
+export declare const logger: {
+    debug(msg: string): void;
+    info(msg: string): void;
+    warn(msg: string): void;
+    error(msg: string): void;
+};

package/dist/logger.js ADDED Viewed

@@ -0,0 +1,74 @@
+/**
+ * Unified logger — always persists to ~/.blockrun/franklin-debug.log,
+ * optionally mirrors to stderr when debug mode is on.
+ *
+ * Why this exists: before this module, agent diagnostics were emitted with
+ * `if (config.debug) console.error(...)`. That meant `franklin logs` showed
+ * nothing in normal use because the events never hit the file. Now every
+ * level writes to disk; stderr mirroring is the opt-in part.
+ *
+ * Errors during a log write are swallowed — the agent loop must never die
+ * because the disk is full or the home dir is read-only.
+ */
+import fs from 'node:fs';
+import path from 'node:path';
+import { BLOCKRUN_DIR } from './config.js';
+const LOG_FILE = path.join(BLOCKRUN_DIR, 'franklin-debug.log');
+// Strip ANSI escapes + carriage returns so the log stays grep-able.
+const ANSI_RE = /\x1b\[[0-9;]*m|\x1b\][^\x07]*\x07|\r/g;
+let debugMode = false;
+let dirEnsured = false;
+export function setDebugMode(enabled) {
+    debugMode = enabled;
+}
+export function isDebugMode() {
+    return debugMode;
+}
+export function getLogFilePath() {
+    return LOG_FILE;
+}
+function ensureDir() {
+    if (dirEnsured)
+        return;
+    try {
+        fs.mkdirSync(BLOCKRUN_DIR, { recursive: true });
+        dirEnsured = true;
+    }
+    catch { /* readonly mount / disk full — keep trying so a remount recovers */ }
+}
+function writeFile(level, msg) {
+    ensureDir();
+    try {
+        const clean = msg.replace(ANSI_RE, '');
+        fs.appendFileSync(LOG_FILE, `[${new Date().toISOString()}] [${level.toUpperCase()}] ${clean}\n`);
+    }
+    catch { /* best-effort — never break the agent on log failure */ }
+}
+function writeStderr(msg) {
+    try {
+        process.stderr.write(msg + '\n');
+    }
+    catch { /* swallow */ }
+}
+export const logger = {
+    debug(msg) {
+        writeFile('debug', msg);
+        if (debugMode)
+            writeStderr(msg);
+    },
+    info(msg) {
+        writeFile('info', msg);
+        if (debugMode)
+            writeStderr(msg);
+    },
+    warn(msg) {
+        writeFile('warn', msg);
+        if (debugMode)
+            writeStderr(msg);
+    },
+    error(msg) {
+        writeFile('error', msg);
+        if (debugMode)
+            writeStderr(msg);
+    },
+};

package/dist/router/index.d.ts CHANGED Viewed

@@ -49,6 +49,12 @@ export declare function routeRequest(prompt: string, profile?: RoutingProfile):
  * Get fallback models for a tier
  */
 export declare function getFallbackChain(tier: Tier, profile?: RoutingProfile): string[];
+/**
+ * Pick the next free model to try given the question category and which
+ * free models have already failed this turn. Returns undefined when every
+ * candidate has been exhausted (caller should surface an error to user).
+ */
+export declare function pickFreeFallback(category: string, alreadyFailed: Set<string>): string | undefined;
 /**
  * Parse routing profile from model string
  */

package/dist/router/index.js CHANGED Viewed

@@ -483,10 +483,45 @@ function computeSavings(model) {
  */
 export function getFallbackChain(tier, profile = 'auto') {
     if (profile === 'free')
-        return ['nvidia/qwen3-coder-480b'];
+        return FREE_MODELS_BY_CATEGORY.chat;
     const config = AUTO_TIERS[tier];
     return [config.primary, ...config.fallback];
 }
+// ─── Free-tier fallback (used when paid models 402 / rate-limit) ───
+// Free fallback chains by question category. Used when a paid model fails
+// mid-turn (402 payment, rate-limit) and we need a zero-cost replacement
+// to keep the user moving without waiting for funding.
+//
+// The lists are ordered: best-fit free model first, then degraded fallbacks.
+// Coding goes to qwen3-coder; everything else (chat / trading / research /
+// reasoning / creative) prefers general-purpose free models that aren't
+// coder-tuned. Without this split, a BTC question that exhausted paid
+// models was being handed to qwen3-coder-480b — a coder model trying to
+// do technical analysis. Reported 2026-05-03 with a markets question
+// routed to a coder model on Sonnet failure.
+const FREE_MODELS_BY_CATEGORY = {
+    coding: ['nvidia/qwen3-coder-480b', 'nvidia/glm-4.7', 'nvidia/llama-4-maverick'],
+    trading: ['nvidia/glm-4.7', 'nvidia/llama-4-maverick', 'nvidia/qwen3-coder-480b'],
+    research: ['nvidia/glm-4.7', 'nvidia/llama-4-maverick', 'nvidia/qwen3-coder-480b'],
+    reasoning: ['nvidia/glm-4.7', 'nvidia/qwen3-coder-480b', 'nvidia/llama-4-maverick'],
+    chat: ['nvidia/llama-4-maverick', 'nvidia/glm-4.7', 'nvidia/qwen3-coder-480b'],
+    creative: ['nvidia/llama-4-maverick', 'nvidia/glm-4.7', 'nvidia/qwen3-coder-480b'],
+};
+const DEFAULT_FREE_CHAIN = [
+    'nvidia/glm-4.7',
+    'nvidia/llama-4-maverick',
+    'nvidia/qwen3-coder-480b',
+];
+/**
+ * Pick the next free model to try given the question category and which
+ * free models have already failed this turn. Returns undefined when every
+ * candidate has been exhausted (caller should surface an error to user).
+ */
+export function pickFreeFallback(category, alreadyFailed) {
+    const chain = FREE_MODELS_BY_CATEGORY[category]
+        ?? DEFAULT_FREE_CHAIN;
+    return chain.find(m => !alreadyFailed.has(m));
+}
 /**
  * Parse routing profile from model string
  */

package/dist/stats/audit.d.ts CHANGED Viewed

@@ -24,6 +24,12 @@ export interface AuditEntry {
     routingTier?: string;
 }
 export declare function appendAudit(entry: AuditEntry): void;
+/**
+ * Trim the audit log to the last MAX_AUDIT_ENTRIES lines if it has grown
+ * past the cap. Exported so admin/debug tooling (and tests) can force a
+ * compaction without waiting for the next interval probe.
+ */
+export declare function enforceRetention(): void;
 export declare function getAuditFilePath(): string;
 export declare function readAudit(): AuditEntry[];
 /** Pull the last user message from a Dialogue history, flatten, and strip newlines. */

package/dist/stats/audit.js CHANGED Viewed

@@ -13,6 +13,18 @@ import path from 'node:path';
 import { BLOCKRUN_DIR } from '../config.js';
 const AUDIT_FILE = path.join(BLOCKRUN_DIR, 'franklin-audit.jsonl');
 const PROMPT_PREVIEW_CHARS = 240;
+// Cap the audit log at the most recent N entries. Without this the file
+// grew unbounded — verified ~3.6k lines on a single dev machine after a
+// few weeks of light use, so a months-old install would be in the GB
+// range and slow `franklin insights` to a crawl.
+const MAX_AUDIT_ENTRIES = 10_000;
+// Each entry is roughly 300–800 bytes. We only re-read the file when it
+// looks plausibly over the cap, so we don't pay an O(n) scan on every
+// append. 200 bytes/entry is a conservative lower bound.
+const TRIM_PROBE_BYTES = MAX_AUDIT_ENTRIES * 200;
+// Probe size every N appends — amortizes the stat() call.
+const TRIM_CHECK_INTERVAL = 200;
+let appendsSinceCheck = 0;
 export function appendAudit(entry) {
     try {
         fs.mkdirSync(BLOCKRUN_DIR, { recursive: true });
@@ -21,11 +33,39 @@ export function appendAudit(entry) {
             prompt: entry.prompt ? truncate(entry.prompt, PROMPT_PREVIEW_CHARS) : undefined,
         };
         fs.appendFileSync(AUDIT_FILE, JSON.stringify(safe) + '\n');
+        appendsSinceCheck++;
+        if (appendsSinceCheck >= TRIM_CHECK_INTERVAL) {
+            appendsSinceCheck = 0;
+            enforceRetention();
+        }
     }
     catch {
         /* best-effort — never break the agent loop on audit-write failure */
     }
 }
+/**
+ * Trim the audit log to the last MAX_AUDIT_ENTRIES lines if it has grown
+ * past the cap. Exported so admin/debug tooling (and tests) can force a
+ * compaction without waiting for the next interval probe.
+ */
+export function enforceRetention() {
+    try {
+        if (!fs.existsSync(AUDIT_FILE))
+            return;
+        const stat = fs.statSync(AUDIT_FILE);
+        if (stat.size < TRIM_PROBE_BYTES)
+            return;
+        const content = fs.readFileSync(AUDIT_FILE, 'utf-8');
+        const lines = content.split('\n').filter(Boolean);
+        if (lines.length <= MAX_AUDIT_ENTRIES)
+            return;
+        const kept = lines.slice(lines.length - MAX_AUDIT_ENTRIES);
+        fs.writeFileSync(AUDIT_FILE, kept.join('\n') + '\n');
+    }
+    catch {
+        /* best-effort */
+    }
+}
 export function getAuditFilePath() {
     return AUDIT_FILE;
 }

package/dist/stats/insights.d.ts CHANGED Viewed

@@ -49,6 +49,25 @@ export interface InsightsReport {
     avgRequestCostUsd: number;
     /** Efficiency: cost per 1K tokens */
     costPer1KTokens: number;
+    /**
+     * Cost breakdown by capability category. Lets the UI show a clean
+     * "where did your USDC go" split alongside the per-model bar list.
+     *   - chat:    LLM token-billed calls (anything with non-zero tokens)
+     *   - media:   ImageGen / VideoGen / MusicGen (per_image / per_second / per_track)
+     *   - sandbox: Modal GPU sandbox lifecycle (create / exec / status / terminate)
+     *
+     * Categorization is by `model` name prefix:
+     *   - `modal/*`              → sandbox
+     *   - rows with 0 input + 0 output tokens → media (image/video/music are
+     *     stored with 0 tokens by recordUsage; modal/* matches first)
+     *   - everything else        → chat
+     */
+    byCategory: {
+        chatCostUsd: number;
+        mediaCostUsd: number;
+        sandboxCostUsd: number;
+        sandboxRequests: number;
+    };
 }
 export declare function generateInsights(days?: number): InsightsReport;
 export declare function formatInsights(report: InsightsReport, days: number): string;

package/dist/stats/insights.js CHANGED Viewed

@@ -23,11 +23,28 @@ export function generateInsights(days = 30) {
     let totalCost = 0;
     let totalInput = 0;
     let totalOutput = 0;
+    // Category totals — see InsightsReport.byCategory doc.
+    let chatCost = 0;
+    let mediaCost = 0;
+    let sandboxCost = 0;
+    let sandboxRequests = 0;
     const modelAgg = new Map();
     for (const r of windowHistory) {
         totalCost += r.costUsd;
         totalInput += r.inputTokens;
         totalOutput += r.outputTokens;
+        // Categorize: modal/* always goes to sandbox; zero-token entries are
+        // media (image/video/music recordUsage stores 0/0 tokens); rest = chat.
+        if (r.model.startsWith('modal/')) {
+            sandboxCost += r.costUsd;
+            sandboxRequests++;
+        }
+        else if ((r.inputTokens + r.outputTokens) === 0) {
+            mediaCost += r.costUsd;
+        }
+        else {
+            chatCost += r.costUsd;
+        }
         const existing = modelAgg.get(r.model) ?? {
             requests: 0,
             costUsd: 0,
@@ -101,6 +118,12 @@ export function generateInsights(days = 30) {
         projections,
         avgRequestCostUsd,
         costPer1KTokens,
+        byCategory: {
+            chatCostUsd: chatCost,
+            mediaCostUsd: mediaCost,
+            sandboxCostUsd: sandboxCost,
+            sandboxRequests,
+        },
     };
 }
 // ─── Format for Display ───────────────────────────────────────────────────

package/dist/tools/index.js CHANGED Viewed

@@ -29,6 +29,7 @@ import { jupiterQuoteCapability, jupiterSwapCapability } from './jupiter.js';
 import { base0xQuoteCapability, base0xSwapCapability } from './zerox-base.js';
 import { base0xGaslessSwapCapability } from './zerox-gasless.js';
 import { defiLlamaProtocolsCapability, defiLlamaProtocolCapability, defiLlamaChainsCapability, defiLlamaYieldsCapability, defiLlamaPriceCapability, } from './defillama.js';
+import { modalCapabilities } from './modal.js';
 import { createTradingCapabilities } from './trading-execute.js';
 import { Portfolio } from '../trading/portfolio.js';
 import { RiskEngine } from '../trading/risk.js';
@@ -158,6 +159,11 @@ export const allCapabilities = [
     defiLlamaChainsCapability,
     defiLlamaYieldsCapability,
     defiLlamaPriceCapability,
+    // Modal GPU sandbox tools — registered but hidden by default (not in
+    // CORE_TOOL_NAMES). Agent must `ActivateTool({names:["ModalCreate",...]})`
+    // before they appear in its tool inventory. High-cost ($0.40/H100 create)
+    // operations should not be in the default surface.
+    ...modalCapabilities, // ModalCreate, ModalExec, ModalStatus, ModalTerminate
 ];
 export { readCapability, writeCapability, editCapability, bashCapability, globCapability, grepCapability, webFetchCapability, webSearchCapability, taskCapability, detachCapability, };
 export { createSubAgentCapability } from './subagent.js';

package/dist/tools/modal.d.ts ADDED Viewed

@@ -0,0 +1,66 @@
+/**
+ * Modal Sandbox capabilities — spin up GPU/CPU compute on Modal Labs via the
+ * BlockRun gateway's x402-paid passthrough at /v1/modal/sandbox/{create, exec,
+ * status, terminate}. See https://modal.com/docs/guide/sandboxes for the
+ * underlying primitives.
+ *
+ * Pricing (per-call, USDC):
+ *   create: $0.01 (CPU) / $0.05 (T4) / $0.08 (L4) / $0.10 (A10G) / $0.20 (A100) / $0.40 (H100)
+ *   exec: $0.001
+ *   status: $0.001
+ *   terminate: $0.001
+ *
+ * Gateway constraints (probed 2026-05-02):
+ *   - image is fixed at python:3.11 — no custom containers yet.
+ *   - command is execve-style (string[]), not a shell string. We accept a
+ *     plain string from the LLM and auto-wrap to ["sh","-c", string].
+ *   - No stdin / env / workdir / streaming on exec — keep commands self-
+ *     contained and idempotent.
+ *   - No upload/download endpoints — files in/out via exec heredoc / curl.
+ *
+ * Lifecycle:
+ *   ModalCreate → returns sandbox_id, charged at GPU tier
+ *   ModalExec   → sync, returns { stdout, stderr, exit_code }
+ *   ModalStatus → check running/terminated
+ *   ModalTerminate → release; called automatically at session end via
+ *                    the SessionSandboxTracker registry.
+ */
+import type { CapabilityHandler } from '../agent/types.js';
+export interface SandboxRecord {
+    id: string;
+    gpu: string;
+    createdAt: number;
+    timeoutSeconds?: number;
+}
+declare class SessionSandboxTracker {
+    private sandboxes;
+    add(rec: SandboxRecord): void;
+    remove(id: string): void;
+    list(): SandboxRecord[];
+    /** Snapshot then clear — used by the session cleanup hook. */
+    drainIds(): string[];
+}
+export declare const sessionSandboxTracker: SessionSandboxTracker;
+export declare const modalCreateCapability: CapabilityHandler;
+export declare const modalExecCapability: CapabilityHandler;
+export declare const modalStatusCapability: CapabilityHandler;
+export declare const modalTerminateCapability: CapabilityHandler;
+/**
+ * Terminate every sandbox the current session has created. Called from
+ * vscode-session.ts at session end (and the SessionToolGuard cleanup path)
+ * so a missed agent ModalTerminate doesn't leave Modal billing the user
+ * up to the per-sandbox timeout. Best-effort: failures are logged but
+ * don't block session shutdown.
+ */
+export declare function terminateAllSessionSandboxes(opts?: {
+    abortSignal?: AbortSignal;
+}): Promise<{
+    attempted: number;
+    succeeded: number;
+    failed: Array<{
+        id: string;
+        error: string;
+    }>;
+}>;
+export declare const modalCapabilities: CapabilityHandler[];
+export {};