@blockrun/franklin 3.15.10 → 3.15.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -71,6 +71,8 @@ That's it. Zero signup, zero credit card, zero phone verification. Send **$5 of
71
71
 
72
72
  ### Prefer a GUI? Try Franklin for VS Code
73
73
 
74
+ [![Franklin for VS Code — Beta is here](assets/franklin-vscode-banner.png)](https://marketplace.visualstudio.com/items?itemName=blockrun.franklin-vscode)
75
+
74
76
  The same agent ships as a [VS Code extension](https://marketplace.visualstudio.com/items?itemName=blockrun.franklin-vscode) — chat panel, model picker, wallet balance, image / video generation, inline diff cards — all driven by the wallet you already funded for the CLI.
75
77
 
76
78
  ```
@@ -20,10 +20,11 @@ import { createActivateToolCapability } from '../tools/activate.js';
20
20
  import { recordUsage } from '../stats/tracker.js';
21
21
  import { recordSessionUsage } from '../stats/session-tracker.js';
22
22
  import { appendAudit, extractLastUserPrompt } from '../stats/audit.js';
23
+ import { logger, setDebugMode } from '../logger.js';
23
24
  import { estimateCost, OPUS_PRICING } from '../pricing.js';
24
25
  import { maybeMidSessionExtract } from '../learnings/extractor.js';
25
26
  import { extractMentions, buildEntityContext, loadEntities } from '../brain/store.js';
26
- import { routeRequestAsync, resolveTierToModel, parseRoutingProfile, getFallbackChain } from '../router/index.js';
27
+ import { routeRequestAsync, resolveTierToModel, parseRoutingProfile, getFallbackChain, pickFreeFallback } from '../router/index.js';
27
28
  import { recordOutcome } from '../router/local-elo.js';
28
29
  import { shouldPlan, getPlanningPrompt, getExecutorModel, isExecutorStuck, toolCallSignature } from './planner.js';
29
30
  import { shouldVerify, runVerification } from './verification.js';
@@ -325,6 +326,9 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
325
326
  // fool Edit/Write into skipping the read-before-edit check or serve cached
326
327
  // webfetch content fetched under the previous session's intent.
327
328
  resetToolSessionState();
329
+ // Wire stderr-mirroring of log lines to the same flag the agent already
330
+ // uses to gate verbose console output. File writes happen regardless.
331
+ setDebugMode(!!config.debug);
328
332
  const client = new ModelClient({
329
333
  apiUrl: config.apiUrl,
330
334
  chain: config.chain,
@@ -725,16 +729,12 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
725
729
  kind: 'text_delta',
726
730
  text: `\n*🗜 Auto-compacted: ~${(beforeTokens / 1000).toFixed(0)}K → ~${(afterTokens / 1000).toFixed(0)}K tokens (saved ${pct}%)*\n\n`,
727
731
  });
728
- if (config.debug) {
729
- console.error(`[franklin] History compacted: ~${afterTokens} tokens`);
730
- }
732
+ logger.info(`[franklin] History compacted: ~${afterTokens} tokens`);
731
733
  }
732
734
  }
733
735
  catch (compactErr) {
734
736
  compactFailures++;
735
- if (config.debug) {
736
- console.error(`[franklin] Compaction failed (${compactFailures}/3): ${compactErr.message}`);
737
- }
737
+ logger.warn(`[franklin] Compaction failed (${compactFailures}/3): ${compactErr.message}`);
738
738
  }
739
739
  }
740
740
  // Inject ultrathink instruction when mode is active
@@ -939,9 +939,7 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
939
939
  const oldModel = config.model;
940
940
  config.model = nextModel;
941
941
  config.onModelChange?.(nextModel, 'system');
942
- if (config.debug) {
943
- console.error(`[franklin] ${oldModel} returned empty — switching to ${nextModel}`);
944
- }
942
+ logger.warn(`[franklin] ${oldModel} returned empty — switching to ${nextModel}`);
945
943
  onEvent({ kind: 'text_delta', text: `\n*${oldModel} returned empty — switching to ${nextModel}*\n` });
946
944
  continue;
947
945
  }
@@ -973,9 +971,7 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
973
971
  // ── Media size error recovery (strip images/PDFs + retry) ──
974
972
  if (isMediaSizeError(errMsg) && recoveryAttempts < MAX_RECOVERY_ATTEMPTS) {
975
973
  recoveryAttempts++;
976
- if (config.debug) {
977
- console.error(`[franklin] Media too large — stripping and retrying (attempt ${recoveryAttempts})`);
978
- }
974
+ logger.warn(`[franklin] Media too large — stripping and retrying (attempt ${recoveryAttempts})`);
979
975
  const { history: stripped, stripped: didStrip } = stripMediaFromHistory(history);
980
976
  if (didStrip) {
981
977
  replaceHistory(history, stripped);
@@ -989,9 +985,7 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
989
985
  // the prompt is too long, so we must compact regardless of our threshold estimate.
990
986
  if (classified.category === 'context_limit' && recoveryAttempts < MAX_RECOVERY_ATTEMPTS) {
991
987
  recoveryAttempts++;
992
- if (config.debug) {
993
- console.error(`[franklin] Prompt too long — force compacting (attempt ${recoveryAttempts})`);
994
- }
988
+ logger.warn(`[franklin] Prompt too long — force compacting (attempt ${recoveryAttempts})`);
995
989
  onEvent({ kind: 'text_delta', text: '\n*Context limit hit — compacting conversation...*\n' });
996
990
  const { history: compactedAgain } = await forceCompact(history, config.model, client, config.debug);
997
991
  replaceHistory(history, compactedAgain);
@@ -1017,9 +1011,7 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
1017
1011
  const continuationPrompt = buildContinuationPrompt();
1018
1012
  history.push(continuationPrompt);
1019
1013
  persistSessionMessage(continuationPrompt);
1020
- if (config.debug) {
1021
- console.error(`[franklin] Stream timeout on ${resolvedModel} — auto-continuing with chunked-task prompt`);
1022
- }
1014
+ logger.warn(`[franklin] Stream timeout on ${resolvedModel} — auto-continuing with chunked-task prompt`);
1023
1015
  onEvent({
1024
1016
  kind: 'text_delta',
1025
1017
  text: '\n*Task too big for one streaming turn — auto-continuing with a smaller chunk...*\n',
@@ -1031,10 +1023,8 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
1031
1023
  const costText = retryDecision.estimatedReplayCostUsd > 0
1032
1024
  ? ` and at least $${retryDecision.estimatedReplayCostUsd.toFixed(4)} in input charges`
1033
1025
  : '';
1034
- if (config.debug) {
1035
- console.error(`[franklin] Timeout retry skipped for ${resolvedModel}: ` +
1036
- `~${tokenText} input tokens, replayCost=$${retryDecision.estimatedReplayCostUsd.toFixed(4)}`);
1037
- }
1026
+ logger.warn(`[franklin] Timeout retry skipped for ${resolvedModel}: ` +
1027
+ `~${tokenText} input tokens, replayCost=$${retryDecision.estimatedReplayCostUsd.toFixed(4)}`);
1038
1028
  onEvent({
1039
1029
  kind: 'turn_done',
1040
1030
  reason: 'error',
@@ -1079,9 +1069,7 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
1079
1069
  }
1080
1070
  recoveryAttempts++;
1081
1071
  const backoffMs = getBackoffDelay(recoveryAttempts);
1082
- if (config.debug) {
1083
- console.error(`[franklin] ${classified.label} error — retrying in ${(backoffMs / 1000).toFixed(1)}s (attempt ${recoveryAttempts}/${effectiveMaxRetries}): ${errMsg.slice(0, 100)}`);
1084
- }
1072
+ logger.warn(`[franklin] ${classified.label} error — retrying in ${(backoffMs / 1000).toFixed(1)}s (attempt ${recoveryAttempts}/${effectiveMaxRetries}): ${errMsg.slice(0, 100)}`);
1085
1073
  // Surface the actual error + model so the user can see which model
1086
1074
  // is failing and what the upstream said. Old "Retrying after Server
1087
1075
  // error" was uninformative — users couldn't tell whether to wait,
@@ -1110,8 +1098,7 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
1110
1098
  if (lastRoutedCategory) {
1111
1099
  recordOutcome(lastRoutedCategory, config.model, 'payment');
1112
1100
  }
1113
- const FREE_MODELS = ['nvidia/qwen3-coder-480b', 'nvidia/llama-4-maverick', 'nvidia/glm-4.7'];
1114
- const nextFree = FREE_MODELS.find(m => !turnFailedModels.has(m));
1101
+ const nextFree = pickFreeFallback(lastRoutedCategory, turnFailedModels);
1115
1102
  if (nextFree) {
1116
1103
  const oldModel = config.model;
1117
1104
  config.model = nextFree;
@@ -1132,8 +1119,7 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
1132
1119
  if (lastRoutedCategory) {
1133
1120
  recordOutcome(lastRoutedCategory, config.model, 'rate_limit');
1134
1121
  }
1135
- const FREE_MODELS = ['nvidia/qwen3-coder-480b', 'nvidia/llama-4-maverick', 'nvidia/glm-4.7'];
1136
- const nextFree = FREE_MODELS.find(m => !turnFailedModels.has(m));
1122
+ const nextFree = pickFreeFallback(lastRoutedCategory, turnFailedModels);
1137
1123
  if (nextFree) {
1138
1124
  const oldModel = config.model;
1139
1125
  config.model = nextFree;
@@ -1249,9 +1235,7 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
1249
1235
  if (maxTokensOverride === undefined) {
1250
1236
  // First hit: escalate to 64K
1251
1237
  maxTokensOverride = ESCALATED_MAX_TOKENS;
1252
- if (config.debug) {
1253
- console.error(`[franklin] Max tokens hit — escalating to ${maxTokensOverride}`);
1254
- }
1238
+ logger.warn(`[franklin] Max tokens hit — escalating to ${maxTokensOverride}`);
1255
1239
  }
1256
1240
  // Append what we got + a continuation prompt with last-line anchor
1257
1241
  const partialAssistant = { role: 'assistant', content: responseParts };
@@ -1293,9 +1277,7 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
1293
1277
  // the existing recovery flow handle it.
1294
1278
  const gatewayErr = looksLikeGatewayErrorAsText(responseParts);
1295
1279
  if (gatewayErr.match) {
1296
- if (config.debug) {
1297
- console.error(`[franklin] Gateway returned an error text in lieu of an answer (${resolvedModel}): ${gatewayErr.message}`);
1298
- }
1280
+ logger.error(`[franklin] Gateway returned an error text in lieu of an answer (${resolvedModel}): ${gatewayErr.message}`);
1299
1281
  throw new Error(gatewayErr.message);
1300
1282
  }
1301
1283
  // Reset recovery counter on successful completion
@@ -1572,9 +1554,7 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
1572
1554
  }
1573
1555
  // Hard stop: if cap exceeded, force end this agent loop iteration
1574
1556
  if (turnToolCalls >= MAX_TOOL_CALLS_PER_TURN) {
1575
- if (config.debug) {
1576
- console.error(`[franklin] Tool call cap hit: ${turnToolCalls} calls this turn`);
1577
- }
1557
+ logger.warn(`[franklin] Tool call cap hit: ${turnToolCalls} calls this turn`);
1578
1558
  // Don't break — let the model respond one more time to summarize,
1579
1559
  // but inject the stop signal above so it knows to finish up.
1580
1560
  }
@@ -188,9 +188,23 @@ export class SessionToolGuard {
188
188
  }
189
189
  }
190
190
  async beforeExecute(invocation, scope) {
191
- // Hard-block tools that have failed too many times this session
191
+ // Hard-block tools that have failed too many times this session.
192
+ // Modal lifecycle tools are exempt: orphan sandboxes keep billing
193
+ // GPU time, and ModalTerminate is the only way to recover from
194
+ // agent-side. Auto-disabling it after 3 transient errors would
195
+ // strand a $0.40/hr H100 until the session ends. Same logic for
196
+ // media-gen tools: failures are usually transient (gateway hiccup,
197
+ // prompt rejection) and the user often wants to retry.
198
+ const FAILURE_EXEMPT = new Set([
199
+ 'ImageGen',
200
+ 'VideoGen',
201
+ 'ModalCreate',
202
+ 'ModalExec',
203
+ 'ModalStatus',
204
+ 'ModalTerminate',
205
+ ]);
192
206
  const errorCount = this.toolErrorCounts.get(invocation.name) ?? 0;
193
- if (errorCount >= 3) {
207
+ if (errorCount >= 3 && !FAILURE_EXEMPT.has(invocation.name)) {
194
208
  return {
195
209
  output: `${invocation.name} has failed ${errorCount} times this session and is now disabled. ` +
196
210
  'Tell the user what went wrong and suggest alternatives.',
@@ -0,0 +1,10 @@
1
+ export type LogLevel = 'debug' | 'info' | 'warn' | 'error';
2
+ export declare function setDebugMode(enabled: boolean): void;
3
+ export declare function isDebugMode(): boolean;
4
+ export declare function getLogFilePath(): string;
5
+ export declare const logger: {
6
+ debug(msg: string): void;
7
+ info(msg: string): void;
8
+ warn(msg: string): void;
9
+ error(msg: string): void;
10
+ };
package/dist/logger.js ADDED
@@ -0,0 +1,74 @@
1
+ /**
2
+ * Unified logger — always persists to ~/.blockrun/franklin-debug.log,
3
+ * optionally mirrors to stderr when debug mode is on.
4
+ *
5
+ * Why this exists: before this module, agent diagnostics were emitted with
6
+ * `if (config.debug) console.error(...)`. That meant `franklin logs` showed
7
+ * nothing in normal use because the events never hit the file. Now every
8
+ * level writes to disk; stderr mirroring is the opt-in part.
9
+ *
10
+ * Errors during a log write are swallowed — the agent loop must never die
11
+ * because the disk is full or the home dir is read-only.
12
+ */
13
+ import fs from 'node:fs';
14
+ import path from 'node:path';
15
+ import { BLOCKRUN_DIR } from './config.js';
16
+ const LOG_FILE = path.join(BLOCKRUN_DIR, 'franklin-debug.log');
17
+ // Strip ANSI escapes + carriage returns so the log stays grep-able.
18
+ const ANSI_RE = /\x1b\[[0-9;]*m|\x1b\][^\x07]*\x07|\r/g;
19
+ let debugMode = false;
20
+ let dirEnsured = false;
21
+ export function setDebugMode(enabled) {
22
+ debugMode = enabled;
23
+ }
24
+ export function isDebugMode() {
25
+ return debugMode;
26
+ }
27
+ export function getLogFilePath() {
28
+ return LOG_FILE;
29
+ }
30
+ function ensureDir() {
31
+ if (dirEnsured)
32
+ return;
33
+ try {
34
+ fs.mkdirSync(BLOCKRUN_DIR, { recursive: true });
35
+ dirEnsured = true;
36
+ }
37
+ catch { /* readonly mount / disk full — keep trying so a remount recovers */ }
38
+ }
39
+ function writeFile(level, msg) {
40
+ ensureDir();
41
+ try {
42
+ const clean = msg.replace(ANSI_RE, '');
43
+ fs.appendFileSync(LOG_FILE, `[${new Date().toISOString()}] [${level.toUpperCase()}] ${clean}\n`);
44
+ }
45
+ catch { /* best-effort — never break the agent on log failure */ }
46
+ }
47
+ function writeStderr(msg) {
48
+ try {
49
+ process.stderr.write(msg + '\n');
50
+ }
51
+ catch { /* swallow */ }
52
+ }
53
+ export const logger = {
54
+ debug(msg) {
55
+ writeFile('debug', msg);
56
+ if (debugMode)
57
+ writeStderr(msg);
58
+ },
59
+ info(msg) {
60
+ writeFile('info', msg);
61
+ if (debugMode)
62
+ writeStderr(msg);
63
+ },
64
+ warn(msg) {
65
+ writeFile('warn', msg);
66
+ if (debugMode)
67
+ writeStderr(msg);
68
+ },
69
+ error(msg) {
70
+ writeFile('error', msg);
71
+ if (debugMode)
72
+ writeStderr(msg);
73
+ },
74
+ };
@@ -49,6 +49,12 @@ export declare function routeRequest(prompt: string, profile?: RoutingProfile):
49
49
  * Get fallback models for a tier
50
50
  */
51
51
  export declare function getFallbackChain(tier: Tier, profile?: RoutingProfile): string[];
52
+ /**
53
+ * Pick the next free model to try given the question category and which
54
+ * free models have already failed this turn. Returns undefined when every
55
+ * candidate has been exhausted (caller should surface an error to user).
56
+ */
57
+ export declare function pickFreeFallback(category: string, alreadyFailed: Set<string>): string | undefined;
52
58
  /**
53
59
  * Parse routing profile from model string
54
60
  */
@@ -483,10 +483,45 @@ function computeSavings(model) {
483
483
  */
484
484
  export function getFallbackChain(tier, profile = 'auto') {
485
485
  if (profile === 'free')
486
- return ['nvidia/qwen3-coder-480b'];
486
+ return FREE_MODELS_BY_CATEGORY.chat;
487
487
  const config = AUTO_TIERS[tier];
488
488
  return [config.primary, ...config.fallback];
489
489
  }
490
+ // ─── Free-tier fallback (used when paid models 402 / rate-limit) ───
491
+ // Free fallback chains by question category. Used when a paid model fails
492
+ // mid-turn (402 payment, rate-limit) and we need a zero-cost replacement
493
+ // to keep the user moving without waiting for funding.
494
+ //
495
+ // The lists are ordered: best-fit free model first, then degraded fallbacks.
496
+ // Coding goes to qwen3-coder; everything else (chat / trading / research /
497
+ // reasoning / creative) prefers general-purpose free models that aren't
498
+ // coder-tuned. Without this split, a BTC question that exhausted paid
499
+ // models was being handed to qwen3-coder-480b — a coder model trying to
500
+ // do technical analysis. Reported 2026-05-03 with a markets question
501
+ // routed to a coder model on Sonnet failure.
502
+ const FREE_MODELS_BY_CATEGORY = {
503
+ coding: ['nvidia/qwen3-coder-480b', 'nvidia/glm-4.7', 'nvidia/llama-4-maverick'],
504
+ trading: ['nvidia/glm-4.7', 'nvidia/llama-4-maverick', 'nvidia/qwen3-coder-480b'],
505
+ research: ['nvidia/glm-4.7', 'nvidia/llama-4-maverick', 'nvidia/qwen3-coder-480b'],
506
+ reasoning: ['nvidia/glm-4.7', 'nvidia/qwen3-coder-480b', 'nvidia/llama-4-maverick'],
507
+ chat: ['nvidia/llama-4-maverick', 'nvidia/glm-4.7', 'nvidia/qwen3-coder-480b'],
508
+ creative: ['nvidia/llama-4-maverick', 'nvidia/glm-4.7', 'nvidia/qwen3-coder-480b'],
509
+ };
510
+ const DEFAULT_FREE_CHAIN = [
511
+ 'nvidia/glm-4.7',
512
+ 'nvidia/llama-4-maverick',
513
+ 'nvidia/qwen3-coder-480b',
514
+ ];
515
+ /**
516
+ * Pick the next free model to try given the question category and which
517
+ * free models have already failed this turn. Returns undefined when every
518
+ * candidate has been exhausted (caller should surface an error to user).
519
+ */
520
+ export function pickFreeFallback(category, alreadyFailed) {
521
+ const chain = FREE_MODELS_BY_CATEGORY[category]
522
+ ?? DEFAULT_FREE_CHAIN;
523
+ return chain.find(m => !alreadyFailed.has(m));
524
+ }
490
525
  /**
491
526
  * Parse routing profile from model string
492
527
  */
@@ -24,6 +24,12 @@ export interface AuditEntry {
24
24
  routingTier?: string;
25
25
  }
26
26
  export declare function appendAudit(entry: AuditEntry): void;
27
+ /**
28
+ * Trim the audit log to the last MAX_AUDIT_ENTRIES lines if it has grown
29
+ * past the cap. Exported so admin/debug tooling (and tests) can force a
30
+ * compaction without waiting for the next interval probe.
31
+ */
32
+ export declare function enforceRetention(): void;
27
33
  export declare function getAuditFilePath(): string;
28
34
  export declare function readAudit(): AuditEntry[];
29
35
  /** Pull the last user message from a Dialogue history, flatten, and strip newlines. */
@@ -13,6 +13,18 @@ import path from 'node:path';
13
13
  import { BLOCKRUN_DIR } from '../config.js';
14
14
  const AUDIT_FILE = path.join(BLOCKRUN_DIR, 'franklin-audit.jsonl');
15
15
  const PROMPT_PREVIEW_CHARS = 240;
16
+ // Cap the audit log at the most recent N entries. Without this the file
17
+ // grew unbounded — verified ~3.6k lines on a single dev machine after a
18
+ // few weeks of light use, so a months-old install would be in the GB
19
+ // range and slow `franklin insights` to a crawl.
20
+ const MAX_AUDIT_ENTRIES = 10_000;
21
+ // Each entry is roughly 300–800 bytes. We only re-read the file when it
22
+ // looks plausibly over the cap, so we don't pay an O(n) scan on every
23
+ // append. 200 bytes/entry is a conservative lower bound.
24
+ const TRIM_PROBE_BYTES = MAX_AUDIT_ENTRIES * 200;
25
+ // Probe size every N appends — amortizes the stat() call.
26
+ const TRIM_CHECK_INTERVAL = 200;
27
+ let appendsSinceCheck = 0;
16
28
  export function appendAudit(entry) {
17
29
  try {
18
30
  fs.mkdirSync(BLOCKRUN_DIR, { recursive: true });
@@ -21,11 +33,39 @@ export function appendAudit(entry) {
21
33
  prompt: entry.prompt ? truncate(entry.prompt, PROMPT_PREVIEW_CHARS) : undefined,
22
34
  };
23
35
  fs.appendFileSync(AUDIT_FILE, JSON.stringify(safe) + '\n');
36
+ appendsSinceCheck++;
37
+ if (appendsSinceCheck >= TRIM_CHECK_INTERVAL) {
38
+ appendsSinceCheck = 0;
39
+ enforceRetention();
40
+ }
24
41
  }
25
42
  catch {
26
43
  /* best-effort — never break the agent loop on audit-write failure */
27
44
  }
28
45
  }
46
+ /**
47
+ * Trim the audit log to the last MAX_AUDIT_ENTRIES lines if it has grown
48
+ * past the cap. Exported so admin/debug tooling (and tests) can force a
49
+ * compaction without waiting for the next interval probe.
50
+ */
51
+ export function enforceRetention() {
52
+ try {
53
+ if (!fs.existsSync(AUDIT_FILE))
54
+ return;
55
+ const stat = fs.statSync(AUDIT_FILE);
56
+ if (stat.size < TRIM_PROBE_BYTES)
57
+ return;
58
+ const content = fs.readFileSync(AUDIT_FILE, 'utf-8');
59
+ const lines = content.split('\n').filter(Boolean);
60
+ if (lines.length <= MAX_AUDIT_ENTRIES)
61
+ return;
62
+ const kept = lines.slice(lines.length - MAX_AUDIT_ENTRIES);
63
+ fs.writeFileSync(AUDIT_FILE, kept.join('\n') + '\n');
64
+ }
65
+ catch {
66
+ /* best-effort */
67
+ }
68
+ }
29
69
  export function getAuditFilePath() {
30
70
  return AUDIT_FILE;
31
71
  }
@@ -49,6 +49,25 @@ export interface InsightsReport {
49
49
  avgRequestCostUsd: number;
50
50
  /** Efficiency: cost per 1K tokens */
51
51
  costPer1KTokens: number;
52
+ /**
53
+ * Cost breakdown by capability category. Lets the UI show a clean
54
+ * "where did your USDC go" split alongside the per-model bar list.
55
+ * - chat: LLM token-billed calls (anything with non-zero tokens)
56
+ * - media: ImageGen / VideoGen / MusicGen (per_image / per_second / per_track)
57
+ * - sandbox: Modal GPU sandbox lifecycle (create / exec / status / terminate)
58
+ *
59
+ * Categorization is by `model` name prefix:
60
+ * - `modal/*` → sandbox
61
+ * - rows with 0 input + 0 output tokens → media (image/video/music are
62
+ * stored with 0 tokens by recordUsage; modal/* matches first)
63
+ * - everything else → chat
64
+ */
65
+ byCategory: {
66
+ chatCostUsd: number;
67
+ mediaCostUsd: number;
68
+ sandboxCostUsd: number;
69
+ sandboxRequests: number;
70
+ };
52
71
  }
53
72
  export declare function generateInsights(days?: number): InsightsReport;
54
73
  export declare function formatInsights(report: InsightsReport, days: number): string;
@@ -23,11 +23,28 @@ export function generateInsights(days = 30) {
23
23
  let totalCost = 0;
24
24
  let totalInput = 0;
25
25
  let totalOutput = 0;
26
+ // Category totals — see InsightsReport.byCategory doc.
27
+ let chatCost = 0;
28
+ let mediaCost = 0;
29
+ let sandboxCost = 0;
30
+ let sandboxRequests = 0;
26
31
  const modelAgg = new Map();
27
32
  for (const r of windowHistory) {
28
33
  totalCost += r.costUsd;
29
34
  totalInput += r.inputTokens;
30
35
  totalOutput += r.outputTokens;
36
+ // Categorize: modal/* always goes to sandbox; zero-token entries are
37
+ // media (image/video/music recordUsage stores 0/0 tokens); rest = chat.
38
+ if (r.model.startsWith('modal/')) {
39
+ sandboxCost += r.costUsd;
40
+ sandboxRequests++;
41
+ }
42
+ else if ((r.inputTokens + r.outputTokens) === 0) {
43
+ mediaCost += r.costUsd;
44
+ }
45
+ else {
46
+ chatCost += r.costUsd;
47
+ }
31
48
  const existing = modelAgg.get(r.model) ?? {
32
49
  requests: 0,
33
50
  costUsd: 0,
@@ -101,6 +118,12 @@ export function generateInsights(days = 30) {
101
118
  projections,
102
119
  avgRequestCostUsd,
103
120
  costPer1KTokens,
121
+ byCategory: {
122
+ chatCostUsd: chatCost,
123
+ mediaCostUsd: mediaCost,
124
+ sandboxCostUsd: sandboxCost,
125
+ sandboxRequests,
126
+ },
104
127
  };
105
128
  }
106
129
  // ─── Format for Display ───────────────────────────────────────────────────
@@ -29,6 +29,7 @@ import { jupiterQuoteCapability, jupiterSwapCapability } from './jupiter.js';
29
29
  import { base0xQuoteCapability, base0xSwapCapability } from './zerox-base.js';
30
30
  import { base0xGaslessSwapCapability } from './zerox-gasless.js';
31
31
  import { defiLlamaProtocolsCapability, defiLlamaProtocolCapability, defiLlamaChainsCapability, defiLlamaYieldsCapability, defiLlamaPriceCapability, } from './defillama.js';
32
+ import { modalCapabilities } from './modal.js';
32
33
  import { createTradingCapabilities } from './trading-execute.js';
33
34
  import { Portfolio } from '../trading/portfolio.js';
34
35
  import { RiskEngine } from '../trading/risk.js';
@@ -158,6 +159,11 @@ export const allCapabilities = [
158
159
  defiLlamaChainsCapability,
159
160
  defiLlamaYieldsCapability,
160
161
  defiLlamaPriceCapability,
162
+ // Modal GPU sandbox tools — registered but hidden by default (not in
163
+ // CORE_TOOL_NAMES). Agent must `ActivateTool({names:["ModalCreate",...]})`
164
+ // before they appear in its tool inventory. High-cost ($0.40/H100 create)
165
+ // operations should not be in the default surface.
166
+ ...modalCapabilities, // ModalCreate, ModalExec, ModalStatus, ModalTerminate
161
167
  ];
162
168
  export { readCapability, writeCapability, editCapability, bashCapability, globCapability, grepCapability, webFetchCapability, webSearchCapability, taskCapability, detachCapability, };
163
169
  export { createSubAgentCapability } from './subagent.js';
@@ -0,0 +1,66 @@
1
+ /**
2
+ * Modal Sandbox capabilities — spin up GPU/CPU compute on Modal Labs via the
3
+ * BlockRun gateway's x402-paid passthrough at /v1/modal/sandbox/{create, exec,
4
+ * status, terminate}. See https://modal.com/docs/guide/sandboxes for the
5
+ * underlying primitives.
6
+ *
7
+ * Pricing (per-call, USDC):
8
+ * create: $0.01 (CPU) / $0.05 (T4) / $0.08 (L4) / $0.10 (A10G) / $0.20 (A100) / $0.40 (H100)
9
+ * exec: $0.001
10
+ * status: $0.001
11
+ * terminate: $0.001
12
+ *
13
+ * Gateway constraints (probed 2026-05-02):
14
+ * - image is fixed at python:3.11 — no custom containers yet.
15
+ * - command is execve-style (string[]), not a shell string. We accept a
16
+ * plain string from the LLM and auto-wrap to ["sh","-c", string].
17
+ * - No stdin / env / workdir / streaming on exec — keep commands self-
18
+ * contained and idempotent.
19
+ * - No upload/download endpoints — files in/out via exec heredoc / curl.
20
+ *
21
+ * Lifecycle:
22
+ * ModalCreate → returns sandbox_id, charged at GPU tier
23
+ * ModalExec → sync, returns { stdout, stderr, exit_code }
24
+ * ModalStatus → check running/terminated
25
+ * ModalTerminate → release; called automatically at session end via
26
+ * the SessionSandboxTracker registry.
27
+ */
28
+ import type { CapabilityHandler } from '../agent/types.js';
29
+ export interface SandboxRecord {
30
+ id: string;
31
+ gpu: string;
32
+ createdAt: number;
33
+ timeoutSeconds?: number;
34
+ }
35
+ declare class SessionSandboxTracker {
36
+ private sandboxes;
37
+ add(rec: SandboxRecord): void;
38
+ remove(id: string): void;
39
+ list(): SandboxRecord[];
40
+ /** Snapshot then clear — used by the session cleanup hook. */
41
+ drainIds(): string[];
42
+ }
43
+ export declare const sessionSandboxTracker: SessionSandboxTracker;
44
+ export declare const modalCreateCapability: CapabilityHandler;
45
+ export declare const modalExecCapability: CapabilityHandler;
46
+ export declare const modalStatusCapability: CapabilityHandler;
47
+ export declare const modalTerminateCapability: CapabilityHandler;
48
+ /**
49
+ * Terminate every sandbox the current session has created. Called from
50
+ * vscode-session.ts at session end (and the SessionToolGuard cleanup path)
51
+ * so a missed agent ModalTerminate doesn't leave Modal billing the user
52
+ * up to the per-sandbox timeout. Best-effort: failures are logged but
53
+ * don't block session shutdown.
54
+ */
55
+ export declare function terminateAllSessionSandboxes(opts?: {
56
+ abortSignal?: AbortSignal;
57
+ }): Promise<{
58
+ attempted: number;
59
+ succeeded: number;
60
+ failed: Array<{
61
+ id: string;
62
+ error: string;
63
+ }>;
64
+ }>;
65
+ export declare const modalCapabilities: CapabilityHandler[];
66
+ export {};