@blockrun/franklin 3.15.10 → 3.15.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,639 @@
1
+ /**
2
+ * Modal Sandbox capabilities — spin up GPU/CPU compute on Modal Labs via the
3
+ * BlockRun gateway's x402-paid passthrough at /v1/modal/sandbox/{create, exec,
4
+ * status, terminate}. See https://modal.com/docs/guide/sandboxes for the
5
+ * underlying primitives.
6
+ *
7
+ * Pricing (per-call, USDC):
8
+ * create: $0.01 (CPU) / $0.05 (T4) / $0.08 (L4) / $0.10 (A10G) / $0.20 (A100) / $0.40 (H100)
9
+ * exec: $0.001
10
+ * status: $0.001
11
+ * terminate: $0.001
12
+ *
13
+ * Gateway constraints (probed 2026-05-02):
14
+ * - image is fixed at python:3.11 — no custom containers yet.
15
+ * - command is execve-style (string[]), not a shell string. We accept a
16
+ * plain string from the LLM and auto-wrap to ["sh","-c", string].
17
+ * - No stdin / env / workdir / streaming on exec — keep commands self-
18
+ * contained and idempotent.
19
+ * - No upload/download endpoints — files in/out via exec heredoc / curl.
20
+ *
21
+ * Lifecycle:
22
+ * ModalCreate → returns sandbox_id, charged at GPU tier
23
+ * ModalExec → sync, returns { stdout, stderr, exit_code }
24
+ * ModalStatus → check running/terminated
25
+ * ModalTerminate → release; called automatically at session end via
26
+ * the SessionSandboxTracker registry.
27
+ */
28
+ import { getOrCreateWallet, getOrCreateSolanaWallet, createPaymentPayload, createSolanaPaymentPayload, parsePaymentRequired, extractPaymentDetails, solanaKeyToBytes, SOLANA_NETWORK, } from '@blockrun/llm';
29
+ import { loadChain, API_URLS, VERSION } from '../config.js';
30
+ import { walletReservation } from '../wallet/reservation.js';
31
+ import { recordUsage } from '../stats/tracker.js';
32
+ // ─── Pricing table (probed from /.well-known/x402 + 402 responses) ─────────
33
+ const CREATE_PRICE_USD = {
34
+ cpu: 0.01,
35
+ T4: 0.05,
36
+ L4: 0.08,
37
+ A10G: 0.10,
38
+ A100: 0.20,
39
+ H100: 0.40,
40
+ };
41
+ const EXEC_PRICE_USD = 0.001;
42
+ const STATUS_PRICE_USD = 0.001;
43
+ const TERMINATE_PRICE_USD = 0.001;
44
+ const VALID_GPUS = new Set(Object.keys(CREATE_PRICE_USD).filter(g => g !== 'cpu'));
45
+ class SessionSandboxTracker {
46
+ sandboxes = new Map();
47
+ add(rec) {
48
+ this.sandboxes.set(rec.id, rec);
49
+ }
50
+ remove(id) {
51
+ this.sandboxes.delete(id);
52
+ }
53
+ list() {
54
+ return [...this.sandboxes.values()].sort((a, b) => b.createdAt - a.createdAt);
55
+ }
56
+ /** Snapshot then clear — used by the session cleanup hook. */
57
+ drainIds() {
58
+ const ids = [...this.sandboxes.keys()];
59
+ this.sandboxes.clear();
60
+ return ids;
61
+ }
62
+ }
63
+ export const sessionSandboxTracker = new SessionSandboxTracker();
64
+ // ─── x402 payment signing — same shape as imagegen's helper ───────────────
65
+ async function signPayment(response, chain, endpoint, resourceDescription) {
66
+ try {
67
+ const paymentHeader = await extractPaymentReq(response);
68
+ if (!paymentHeader)
69
+ return null;
70
+ if (chain === 'solana') {
71
+ const wallet = await getOrCreateSolanaWallet();
72
+ const paymentRequired = parsePaymentRequired(paymentHeader);
73
+ const details = extractPaymentDetails(paymentRequired, SOLANA_NETWORK);
74
+ const secretBytes = await solanaKeyToBytes(wallet.privateKey);
75
+ const feePayer = details.extra?.feePayer || details.recipient;
76
+ const payload = await createSolanaPaymentPayload(secretBytes, wallet.address, details.recipient, details.amount, feePayer, {
77
+ resourceUrl: details.resource?.url || endpoint,
78
+ resourceDescription: details.resource?.description || resourceDescription,
79
+ maxTimeoutSeconds: details.maxTimeoutSeconds || 300,
80
+ extra: details.extra,
81
+ });
82
+ return { 'PAYMENT-SIGNATURE': payload };
83
+ }
84
+ else {
85
+ const wallet = getOrCreateWallet();
86
+ const paymentRequired = parsePaymentRequired(paymentHeader);
87
+ const details = extractPaymentDetails(paymentRequired);
88
+ const payload = await createPaymentPayload(wallet.privateKey, wallet.address, details.recipient, details.amount, details.network || 'eip155:8453', {
89
+ resourceUrl: details.resource?.url || endpoint,
90
+ resourceDescription: details.resource?.description || resourceDescription,
91
+ maxTimeoutSeconds: details.maxTimeoutSeconds || 300,
92
+ extra: details.extra,
93
+ });
94
+ return { 'PAYMENT-SIGNATURE': payload };
95
+ }
96
+ }
97
+ catch (err) {
98
+ console.error(`[franklin] Modal payment error: ${err.message}`);
99
+ return null;
100
+ }
101
+ }
102
+ async function extractPaymentReq(response) {
103
+ let header = response.headers.get('payment-required');
104
+ if (!header) {
105
+ try {
106
+ const body = (await response.json());
107
+ if (body.x402 || body.accepts) {
108
+ header = btoa(JSON.stringify(body));
109
+ }
110
+ }
111
+ catch { /* ignore */ }
112
+ }
113
+ return header;
114
+ }
115
+ /**
116
+ * Generic POST-with-x402-retry helper used by all four Modal endpoints. The
117
+ * first POST gets a 402 with payment requirements; we sign and retry once
118
+ * with the X-PAYMENT header. Returns the parsed JSON body and the raw
119
+ * Response (callers may need status code).
120
+ */
121
+ async function postWithPayment(endpoint, body, resourceDescription, abortSignal, timeoutMs) {
122
+ const chain = loadChain();
123
+ const headers = {
124
+ 'Content-Type': 'application/json',
125
+ 'User-Agent': `franklin/${VERSION}`,
126
+ };
127
+ const ctrl = new AbortController();
128
+ const onParentAbort = () => ctrl.abort();
129
+ abortSignal.addEventListener('abort', onParentAbort, { once: true });
130
+ const timer = setTimeout(() => ctrl.abort(), timeoutMs);
131
+ try {
132
+ const payload = JSON.stringify(body);
133
+ let response = await fetch(endpoint, { method: 'POST', signal: ctrl.signal, headers, body: payload });
134
+ if (response.status === 402) {
135
+ const paymentHeaders = await signPayment(response, chain, endpoint, resourceDescription);
136
+ if (!paymentHeaders) {
137
+ return { ok: false, status: 402, body: { error: 'payment signing failed' }, raw: '' };
138
+ }
139
+ response = await fetch(endpoint, {
140
+ method: 'POST',
141
+ signal: ctrl.signal,
142
+ headers: { ...headers, ...paymentHeaders },
143
+ body: payload,
144
+ });
145
+ }
146
+ const raw = await response.text().catch(() => '');
147
+ let parsed = {};
148
+ try {
149
+ parsed = raw ? JSON.parse(raw) : {};
150
+ }
151
+ catch { /* leave as {} */ }
152
+ return { ok: response.ok, status: response.status, body: parsed, raw };
153
+ }
154
+ finally {
155
+ clearTimeout(timer);
156
+ abortSignal.removeEventListener('abort', onParentAbort);
157
+ }
158
+ }
159
+ // ─── Helpers ─────────────────────────────────────────────────────────────
160
+ function modalEndpoint(path) {
161
+ const chain = loadChain();
162
+ return `${API_URLS[chain]}/v1/modal/sandbox/${path}`;
163
+ }
164
+ /**
165
+ * Normalize the agent's `command` input into the execve-style array Modal
166
+ * expects. LLMs frequently pass a shell string ("pip install torch && python
167
+ * train.py"); auto-wrap that into ["sh","-c", string] so the agent doesn't
168
+ * have to know the difference. Arrays are passed through verbatim.
169
+ */
170
+ function normalizeCommand(input) {
171
+ if (Array.isArray(input)) {
172
+ if (input.every(x => typeof x === 'string') && input.length > 0) {
173
+ return input;
174
+ }
175
+ return null;
176
+ }
177
+ if (typeof input === 'string' && input.trim().length > 0) {
178
+ return ['sh', '-c', input];
179
+ }
180
+ return null;
181
+ }
182
+ function fmtUsd(n) {
183
+ if (n < 0.01)
184
+ return `$${n.toFixed(4)}`;
185
+ return `$${n.toFixed(2)}`;
186
+ }
187
+ export const modalCreateCapability = {
188
+ spec: {
189
+ name: 'ModalCreate',
190
+ description: 'Create a Modal Python 3.11 sandbox (CPU or GPU) via the BlockRun gateway. ' +
191
+ 'Returns a sandbox_id you pass to ModalExec. Charged once per create at the ' +
192
+ 'GPU tier price: CPU $0.01, T4 $0.05, L4 $0.08, A10G $0.10, A100 $0.20, H100 $0.40. ' +
193
+ 'IMPORTANT — current limitations (BlockRun gateway is in early-access for sandboxes):\n' +
194
+ ' - sandbox lifetime: 5 minutes MAX (gateway hard-cap, regardless of GPU tier)\n' +
195
+ ' - per ModalExec call: 60 seconds MAX wall-clock\n' +
196
+ ' - Python 3.11 only, no custom images yet\n' +
197
+ ' - 1 vCPU, 1 GiB RAM defaults\n' +
198
+ ' - GPU access is preview-tier (officially "coming later" in docs)\n' +
199
+ ' - No setup-time provisioning — every sandbox starts empty\n' +
200
+ 'These limits make this tool suitable for: GPU benchmarks (nvidia-smi, matmul), ' +
201
+ 'small model inference (≤3B params if weights pre-cached), CUDA kernel validation, ' +
202
+ 'short ad-hoc Python tasks. NOT suitable for: full LoRA / fine-tuning runs, ' +
203
+ 'pip install + model download + training (pip alone burns 1-2 min of the 5-min budget). ' +
204
+ 'Custom images + longer lifetime + GPU production tier are documented as "coming later" ' +
205
+ 'by BlockRun — for serious ML workloads tell the user to use Modal directly until then. ' +
206
+ 'Always call ModalTerminate when done. ' +
207
+ 'Long-running command pattern: each ModalExec call is itself capped at 60s wall-clock. ' +
208
+ 'For work that takes >60s (pip install, model download, training), use the ' +
209
+ 'fire-and-poll pattern: ModalExec(["sh","-c","nohup <cmd> > /workspace/log 2>&1 &"]) ' +
210
+ 'returns in <1s, then poll with subsequent ModalExec(["cat","/workspace/log"]) calls.',
211
+ input_schema: {
212
+ type: 'object',
213
+ properties: {
214
+ gpu: { type: 'string', description: 'GPU tier. One of T4, L4, A10G, A100, H100. Omit for CPU-only ($0.01).' },
215
+ timeout: { type: 'number', description: 'Lifetime cap in seconds. Default + Max = 300 (5 min). Gateway rejects values > 300 with HTTP 400.' },
216
+ cpu: { type: 'number', description: 'Number of CPU cores. Default 0.125, max 8.' },
217
+ memory: { type: 'number', description: 'Memory MB. Default 128, max 32768.' },
218
+ },
219
+ },
220
+ },
221
+ concurrent: false,
222
+ async execute(input, ctx) {
223
+ const raw = input;
224
+ // ── Client-side coercion ────────────────────────────────────────────
225
+ // LLMs routinely pass numeric fields as strings ("timeout":"300") and
226
+ // GPU tier in lowercase ("t4"). The gateway's schema is strict and
227
+ // 400s on either, leaving the agent confused (it sees "Invalid
228
+ // request body" with no actionable hint). Fix the obvious mistakes
229
+ // before they leave the client.
230
+ let gpu = raw.gpu;
231
+ if (typeof gpu === 'string') {
232
+ const matched = [...VALID_GPUS].find(g => g.toLowerCase() === gpu.toLowerCase());
233
+ if (matched)
234
+ gpu = matched;
235
+ }
236
+ if (gpu && !VALID_GPUS.has(gpu)) {
237
+ return {
238
+ output: `Error: invalid gpu "${gpu}". Allowed: ${[...VALID_GPUS].join(', ')} (or omit for CPU).`,
239
+ isError: true,
240
+ };
241
+ }
242
+ const tier = gpu ?? 'cpu';
243
+ const price = CREATE_PRICE_USD[tier];
244
+ // Coerce numeric fields. Reject NaN explicitly so we don't ship
245
+ // garbage to the gateway.
246
+ const coerceNum = (v, name) => {
247
+ if (v === undefined || v === null || v === '')
248
+ return undefined;
249
+ const n = typeof v === 'string' ? Number(v) : v;
250
+ if (typeof n !== 'number' || !Number.isFinite(n)) {
251
+ return { error: `${name} must be a number, got ${typeof v}: ${JSON.stringify(v)}` };
252
+ }
253
+ return n;
254
+ };
255
+ const timeoutCoerced = coerceNum(raw.timeout, 'timeout');
256
+ const cpuCoerced = coerceNum(raw.cpu, 'cpu');
257
+ const memoryCoerced = coerceNum(raw.memory, 'memory');
258
+ for (const c of [timeoutCoerced, cpuCoerced, memoryCoerced]) {
259
+ if (c && typeof c === 'object' && 'error' in c) {
260
+ return { output: `Error: ${c.error}`, isError: true };
261
+ }
262
+ }
263
+ // Gateway hard-caps sandbox lifetime at 300s. Cap client-side so we
264
+ // surface a clear error instead of letting the user pay $0.20 for a
265
+ // create that 400s on the wire.
266
+ const CREATE_TIMEOUT_MAX = 300;
267
+ if (typeof timeoutCoerced === 'number' && timeoutCoerced > CREATE_TIMEOUT_MAX) {
268
+ return {
269
+ output: `Error: timeout ${timeoutCoerced}s exceeds gateway max of ${CREATE_TIMEOUT_MAX}s. ` +
270
+ `BlockRun caps Modal sandbox lifetime at 5 minutes regardless of GPU tier. ` +
271
+ `For longer workloads, the work must be split across multiple sandboxes ` +
272
+ `(checkpoint + reload) or you need to ask BlockRun to lift this cap.`,
273
+ isError: true,
274
+ };
275
+ }
276
+ // ── AskUser cost preview (skipped if env auto-approve or non-UI mode) ──
277
+ const autoApprove = process.env.FRANKLIN_MEDIA_AUTO_APPROVE_ALL === '1';
278
+ if (ctx.onAskUser && !autoApprove) {
279
+ const timeoutSec = raw.timeout ?? 300;
280
+ const lines = [
281
+ `Create Modal sandbox?`,
282
+ ``,
283
+ ` Tier: ${tier === 'cpu' ? 'CPU only' : `GPU ${tier}`}`,
284
+ ` Image: python:3.11`,
285
+ ` Timeout: ${timeoutSec}s (${(timeoutSec / 60).toFixed(1)} min)`,
286
+ ...(raw.cpu ? [` CPU cores: ${raw.cpu}`] : []),
287
+ ...(raw.memory ? [` Memory: ${raw.memory} MB`] : []),
288
+ ``,
289
+ `Create cost: ${fmtUsd(price)} (one-time)`,
290
+ `Each exec: ${fmtUsd(EXEC_PRICE_USD)}`,
291
+ `Terminate: ${fmtUsd(TERMINATE_PRICE_USD)}`,
292
+ ];
293
+ try {
294
+ const answer = await ctx.onAskUser(lines.join('\n'), ['Approve', 'Cancel']);
295
+ if (answer !== 'Approve') {
296
+ return { output: '## Sandbox creation cancelled\n\nNo USDC was spent.' };
297
+ }
298
+ }
299
+ catch {
300
+ // askUser failed (UI gone) — fall through and create. Better than
301
+ // silently aborting in headless contexts.
302
+ }
303
+ }
304
+ // Wallet reservation — block over-spend if other in-flight calls hold balance.
305
+ let reservation = null;
306
+ try {
307
+ reservation = await walletReservation.hold(price);
308
+ if (!reservation) {
309
+ return {
310
+ output: `Insufficient USDC for ModalCreate (${tier}, ~${fmtUsd(price)}). ` +
311
+ `Other in-flight paid calls may be holding your balance — wait or fund the wallet.`,
312
+ isError: true,
313
+ };
314
+ }
315
+ }
316
+ catch { /* fall through, x402 will surface real error */ }
317
+ try {
318
+ const body = {};
319
+ if (gpu)
320
+ body.gpu = gpu;
321
+ if (typeof timeoutCoerced === 'number')
322
+ body.timeout = timeoutCoerced;
323
+ if (typeof cpuCoerced === 'number')
324
+ body.cpu = cpuCoerced;
325
+ if (typeof memoryCoerced === 'number')
326
+ body.memory = memoryCoerced;
327
+ const res = await postWithPayment(modalEndpoint('create'), body, 'Franklin Modal sandbox create', ctx.abortSignal, 90_000);
328
+ if (!res.ok) {
329
+ const err = res.body.error ? String(res.body.error) : res.raw.slice(0, 300);
330
+ // Surface the per-field validation issues — usually the
331
+ // actionable bit ("expected number, received string at path
332
+ // ['timeout']").
333
+ const details = Array.isArray(res.body.details)
334
+ ? '\nDetails: ' + res.body.details.map((d) => `${d.path?.join('.') ?? '?'}: ${d.message ?? JSON.stringify(d)}`).join('; ')
335
+ : '';
336
+ return {
337
+ output: `ModalCreate failed (${res.status}): ${err}${details}`,
338
+ isError: true,
339
+ };
340
+ }
341
+ const sandboxId = (typeof res.body.sandbox_id === 'string' && res.body.sandbox_id) ||
342
+ (typeof res.body.id === 'string' && res.body.id) ||
343
+ '';
344
+ if (!sandboxId) {
345
+ return {
346
+ output: `ModalCreate returned no sandbox_id. Raw: ${res.raw.slice(0, 300)}`,
347
+ isError: true,
348
+ };
349
+ }
350
+ sessionSandboxTracker.add({
351
+ id: sandboxId,
352
+ gpu: tier,
353
+ createdAt: Date.now(),
354
+ timeoutSeconds: raw.timeout ?? 300,
355
+ });
356
+ // Stats — surface Modal usage in `franklin insights` like other paid tools.
357
+ try {
358
+ recordUsage(`modal/${tier}`, 0, 0, price, 0);
359
+ }
360
+ catch { /* ignore */ }
361
+ return {
362
+ output: `Sandbox created\n` +
363
+ `- id: \`${sandboxId}\`\n` +
364
+ `- tier: ${tier === 'cpu' ? 'CPU only' : `GPU ${tier}`}\n` +
365
+ `- timeout: ${raw.timeout ?? 300}s\n` +
366
+ `- charged: ${fmtUsd(price)}\n\n` +
367
+ `Next: ModalExec({ sandbox_id: "${sandboxId}", command: ["python","-c","print(1)"] })`,
368
+ };
369
+ }
370
+ finally {
371
+ walletReservation.release(reservation);
372
+ }
373
+ },
374
+ };
375
+ export const modalExecCapability = {
376
+ spec: {
377
+ name: 'ModalExec',
378
+ description: 'Run a command inside a Modal sandbox (must already exist via ModalCreate). ' +
379
+ '`command` accepts either an execve-style array (e.g. ["python","-c","print(1)"]) ' +
380
+ 'or a shell string (e.g. "pip install torch && python train.py") which is auto-wrapped ' +
381
+ 'as ["sh","-c", <string>]. Returns stdout, stderr, exit_code synchronously. ' +
382
+ 'Each call charges $0.001. The sandbox keeps state across exec calls (filesystem, ' +
383
+ 'installed pip packages, etc) until ModalTerminate. ' +
384
+ 'CRITICAL: timeout is HARD-CAPPED at 60 seconds by the gateway — anything longer ' +
385
+ 'returns HTTP 400. For long-running commands (pip install large packages, model ' +
386
+ 'downloads, training loops), use the fire-and-poll pattern: ' +
387
+ ' exec1: ["sh","-c","nohup <slow-cmd> > /workspace/log 2>&1 & echo $! > /workspace/pid"] (<1s) ' +
388
+ ' exec2: ["sh","-c","tail -50 /workspace/log"] (poll progress, <1s) ' +
389
+ ' exec3: ["sh","-c","kill -0 $(cat /workspace/pid) 2>/dev/null && echo RUN || echo DONE"] (check live) ' +
390
+ 'This decouples actual work duration from the per-exec 60s ceiling, but the sandbox ' +
391
+ 'itself still dies at 300s wall-clock — total useful work fits in ~5 minutes.',
392
+ input_schema: {
393
+ type: 'object',
394
+ properties: {
395
+ sandbox_id: { type: 'string', description: 'Sandbox id from ModalCreate.' },
396
+ command: {
397
+ description: 'Execve-style array OR shell string. Strings are wrapped as ["sh","-c", string].',
398
+ },
399
+ timeout: { type: 'number', description: 'Per-exec timeout in seconds. Default 60, MAX 60 (gateway hard cap). Use fire-and-poll for longer work.' },
400
+ },
401
+ required: ['sandbox_id', 'command'],
402
+ },
403
+ },
404
+ concurrent: false,
405
+ async execute(input, ctx) {
406
+ const raw = input;
407
+ if (!raw.sandbox_id)
408
+ return { output: 'Error: sandbox_id is required', isError: true };
409
+ const command = normalizeCommand(raw.command);
410
+ if (!command) {
411
+ // JSON.stringify(undefined) returns undefined — guard the slice call.
412
+ const got = raw.command === undefined
413
+ ? 'undefined (missing)'
414
+ : JSON.stringify(raw.command);
415
+ return {
416
+ output: `Error: invalid command. Expected a non-empty string or string[] of length >= 1. ` +
417
+ `Got: ${(got ?? 'undefined').slice(0, 100)}`,
418
+ isError: true,
419
+ };
420
+ }
421
+ let reservation = null;
422
+ try {
423
+ reservation = await walletReservation.hold(EXEC_PRICE_USD);
424
+ // For micro-cost calls don't hard-block on insufficient — just proceed.
425
+ }
426
+ catch { /* ignore */ }
427
+ try {
428
+ // Same string-as-number guard as ModalCreate. LLMs love
429
+ // "timeout":"300".
430
+ let coercedTimeout;
431
+ if (raw.timeout !== undefined && raw.timeout !== null && raw.timeout !== '') {
432
+ const n = typeof raw.timeout === 'string' ? Number(raw.timeout) : raw.timeout;
433
+ if (typeof n === 'number' && Number.isFinite(n))
434
+ coercedTimeout = n;
435
+ }
436
+ // Gateway hard-caps exec timeout at 60s. Cap client-side so we
437
+ // never burn an x402 round-trip on a 400. Default to 60s if
438
+ // unset since "I want it to actually run" is a more sensible
439
+ // default than the lib's smaller value.
440
+ const EXEC_TIMEOUT_MAX = 60;
441
+ if (coercedTimeout === undefined || coercedTimeout > EXEC_TIMEOUT_MAX) {
442
+ coercedTimeout = EXEC_TIMEOUT_MAX;
443
+ }
444
+ const body = {
445
+ sandbox_id: raw.sandbox_id,
446
+ command,
447
+ };
448
+ if (coercedTimeout !== undefined)
449
+ body.timeout = coercedTimeout;
450
+ const res = await postWithPayment(modalEndpoint('exec'), body, 'Franklin Modal sandbox exec', ctx.abortSignal, Math.max(30_000, ((coercedTimeout ?? 300) + 30) * 1000));
451
+ if (!res.ok) {
452
+ // 400 here usually means the agent built the wrong shape (bad
453
+ // sandbox_id, malformed command). Dump the full raw body so the
454
+ // agent can see exactly what the gateway complained about and
455
+ // self-correct on the next turn instead of looping blind.
456
+ const err = res.body.error ? String(res.body.error) : '(no error field)';
457
+ const details = res.body.details ? `\nDetails: ${JSON.stringify(res.body.details)}` : '';
458
+ const raw = res.raw.length > 500 ? res.raw.slice(0, 500) + '…' : res.raw;
459
+ return {
460
+ output: `ModalExec failed (${res.status}): ${err}${details}\n` +
461
+ `Raw response: ${raw}\n` +
462
+ `Sent: command=${JSON.stringify(command).slice(0, 200)}`,
463
+ isError: true,
464
+ };
465
+ }
466
+ const stdout = typeof res.body.stdout === 'string' ? res.body.stdout : '';
467
+ const stderr = typeof res.body.stderr === 'string' ? res.body.stderr : '';
468
+ // Gateway field shape isn't 100% pinned — accept exit_code, exitCode,
469
+ // returncode, code (in priority order). If NONE of them are present
470
+ // but stdout/stderr came back, treat as success (exit 0) rather than
471
+ // poisoning the failure counter on a healthy run with an unfamiliar
472
+ // response shape.
473
+ const rawExit = typeof res.body.exit_code === 'number' ? res.body.exit_code :
474
+ typeof res.body.exitCode === 'number' ? res.body.exitCode :
475
+ typeof res.body.returncode === 'number' ? res.body.returncode :
476
+ typeof res.body.code === 'number' ? res.body.code :
477
+ null;
478
+ const hasAnyOutput = stdout.length > 0 || stderr.length > 0;
479
+ const exitCode = rawExit !== null ? rawExit : (hasAnyOutput ? 0 : -1);
480
+ try {
481
+ recordUsage('modal/exec', 0, 0, EXEC_PRICE_USD, 0);
482
+ }
483
+ catch { /* ignore */ }
484
+ const summary = `exit ${exitCode}` + (rawExit === null ? ' (inferred — no exit_code field in response)' : '');
485
+ const sections = [
486
+ `\`${command.join(' ')}\` → ${summary}`,
487
+ ];
488
+ if (stdout)
489
+ sections.push(`--- stdout ---\n${stdout}`);
490
+ if (stderr)
491
+ sections.push(`--- stderr ---\n${stderr}`);
492
+ // Only mark as error when we have a real non-zero exit code OR
493
+ // we have nothing at all (no stdout / stderr / exit_code) which
494
+ // suggests an actual problem rather than a parsing edge case.
495
+ const isError = rawExit !== null ? rawExit !== 0 : !hasAnyOutput;
496
+ return { output: sections.join('\n\n'), isError };
497
+ }
498
+ finally {
499
+ walletReservation.release(reservation);
500
+ }
501
+ },
502
+ };
503
+ // ─── ModalStatus ─────────────────────────────────────────────────────────
504
+ export const modalStatusCapability = {
505
+ spec: {
506
+ name: 'ModalStatus',
507
+ description: 'Check the status of a Modal sandbox (running / terminated). Charges $0.001. ' +
508
+ 'Useful when you suspect a sandbox died or you want to confirm a previous ' +
509
+ 'ModalTerminate succeeded.',
510
+ input_schema: {
511
+ type: 'object',
512
+ properties: {
513
+ sandbox_id: { type: 'string' },
514
+ },
515
+ required: ['sandbox_id'],
516
+ },
517
+ },
518
+ concurrent: false,
519
+ async execute(input, ctx) {
520
+ const sandbox_id = input.sandbox_id;
521
+ if (!sandbox_id)
522
+ return { output: 'Error: sandbox_id is required', isError: true };
523
+ let reservation = null;
524
+ try {
525
+ reservation = await walletReservation.hold(STATUS_PRICE_USD);
526
+ }
527
+ catch { /* ignore */ }
528
+ try {
529
+ const res = await postWithPayment(modalEndpoint('status'), { sandbox_id }, 'Franklin Modal sandbox status', ctx.abortSignal, 30_000);
530
+ if (!res.ok) {
531
+ const err = res.body.error ? String(res.body.error) : res.raw.slice(0, 300);
532
+ return { output: `ModalStatus failed (${res.status}): ${err}`, isError: true };
533
+ }
534
+ try {
535
+ recordUsage('modal/status', 0, 0, STATUS_PRICE_USD, 0);
536
+ }
537
+ catch { /* ignore */ }
538
+ const status = res.body.status || 'unknown';
539
+ const extra = JSON.stringify(res.body, null, 2);
540
+ return { output: `Sandbox \`${sandbox_id}\` status: **${status}**\n\n${extra}` };
541
+ }
542
+ finally {
543
+ walletReservation.release(reservation);
544
+ }
545
+ },
546
+ };
547
+ // ─── ModalTerminate ──────────────────────────────────────────────────────
548
+ export const modalTerminateCapability = {
549
+ spec: {
550
+ name: 'ModalTerminate',
551
+ description: 'Terminate a Modal sandbox and release its resources. Charges $0.001. ' +
552
+ 'Strongly recommended after every successful ModalExec sequence — ' +
553
+ 'Modal bills wall-clock GPU time until the sandbox terminates or hits ' +
554
+ 'its `timeout`. Session-end auto-cleanup also calls this for any sandboxes ' +
555
+ 'the agent forgot, but explicit is better.',
556
+ input_schema: {
557
+ type: 'object',
558
+ properties: {
559
+ sandbox_id: { type: 'string' },
560
+ },
561
+ required: ['sandbox_id'],
562
+ },
563
+ },
564
+ concurrent: false,
565
+ async execute(input, ctx) {
566
+ const sandbox_id = input.sandbox_id;
567
+ if (!sandbox_id)
568
+ return { output: 'Error: sandbox_id is required', isError: true };
569
+ let reservation = null;
570
+ try {
571
+ reservation = await walletReservation.hold(TERMINATE_PRICE_USD);
572
+ }
573
+ catch { /* ignore */ }
574
+ try {
575
+ const res = await postWithPayment(modalEndpoint('terminate'), { sandbox_id }, 'Franklin Modal sandbox terminate', ctx.abortSignal, 30_000);
576
+ // Always remove from tracker — even on failure, retrying is wasteful.
577
+ sessionSandboxTracker.remove(sandbox_id);
578
+ if (!res.ok) {
579
+ const err = res.body.error ? String(res.body.error) : res.raw.slice(0, 300);
580
+ return {
581
+ output: `ModalTerminate returned ${res.status}: ${err}\n\n` +
582
+ `(Removed from local tracker regardless. Modal-side cleanup will happen at the timeout.)`,
583
+ isError: res.status >= 500, // 4xx (e.g. already-terminated) is benign
584
+ };
585
+ }
586
+ try {
587
+ recordUsage('modal/terminate', 0, 0, TERMINATE_PRICE_USD, 0);
588
+ }
589
+ catch { /* ignore */ }
590
+ return { output: `Sandbox \`${sandbox_id}\` terminated.` };
591
+ }
592
+ finally {
593
+ walletReservation.release(reservation);
594
+ }
595
+ },
596
+ };
597
+ // ─── Bulk session cleanup ────────────────────────────────────────────────
598
+ /**
599
+ * Terminate every sandbox the current session has created. Called from
600
+ * vscode-session.ts at session end (and the SessionToolGuard cleanup path)
601
+ * so a missed agent ModalTerminate doesn't leave Modal billing the user
602
+ * up to the per-sandbox timeout. Best-effort: failures are logged but
603
+ * don't block session shutdown.
604
+ */
605
+ export async function terminateAllSessionSandboxes(opts = {}) {
606
+ const ids = sessionSandboxTracker.drainIds();
607
+ const failed = [];
608
+ let succeeded = 0;
609
+ const ctrl = new AbortController();
610
+ if (opts.abortSignal) {
611
+ if (opts.abortSignal.aborted)
612
+ ctrl.abort();
613
+ else
614
+ opts.abortSignal.addEventListener('abort', () => ctrl.abort(), { once: true });
615
+ }
616
+ // Sequential — terminating a few sandboxes in parallel offers no real
617
+ // win over serial, and serial keeps the wallet-reservation accounting
618
+ // simple.
619
+ for (const id of ids) {
620
+ try {
621
+ const res = await postWithPayment(modalEndpoint('terminate'), { sandbox_id: id }, 'Franklin Modal sandbox cleanup', ctrl.signal, 20_000);
622
+ if (res.ok)
623
+ succeeded++;
624
+ else
625
+ failed.push({ id, error: String(res.body.error ?? res.raw.slice(0, 200)) });
626
+ }
627
+ catch (err) {
628
+ failed.push({ id, error: err.message });
629
+ }
630
+ }
631
+ return { attempted: ids.length, succeeded, failed };
632
+ }
633
+ // ─── All-in-one export for index.ts registration ─────────────────────────
634
+ export const modalCapabilities = [
635
+ modalCreateCapability,
636
+ modalExecCapability,
637
+ modalStatusCapability,
638
+ modalTerminateCapability,
639
+ ];