@blockrun/franklin 3.15.10 → 3.15.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -0
- package/assets/franklin-vscode-banner.png +0 -0
- package/dist/agent/loop.js +16 -34
- package/dist/agent/tool-guard.js +16 -2
- package/dist/logger.d.ts +10 -0
- package/dist/logger.js +74 -0
- package/dist/stats/audit.d.ts +6 -0
- package/dist/stats/audit.js +40 -0
- package/dist/stats/insights.d.ts +19 -0
- package/dist/stats/insights.js +23 -0
- package/dist/tools/index.js +6 -0
- package/dist/tools/modal.d.ts +66 -0
- package/dist/tools/modal.js +639 -0
- package/dist/wallet/reservation.d.ts +51 -0
- package/dist/wallet/reservation.js +105 -0
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -71,6 +71,8 @@ That's it. Zero signup, zero credit card, zero phone verification. Send **$5 of
|
|
|
71
71
|
|
|
72
72
|
### Prefer a GUI? Try Franklin for VS Code
|
|
73
73
|
|
|
74
|
+
[](https://marketplace.visualstudio.com/items?itemName=blockrun.franklin-vscode)
|
|
75
|
+
|
|
74
76
|
The same agent ships as a [VS Code extension](https://marketplace.visualstudio.com/items?itemName=blockrun.franklin-vscode) — chat panel, model picker, wallet balance, image / video generation, inline diff cards — all driven by the wallet you already funded for the CLI.
|
|
75
77
|
|
|
76
78
|
```
|
|
Binary file
|
package/dist/agent/loop.js
CHANGED
|
@@ -20,6 +20,7 @@ import { createActivateToolCapability } from '../tools/activate.js';
|
|
|
20
20
|
import { recordUsage } from '../stats/tracker.js';
|
|
21
21
|
import { recordSessionUsage } from '../stats/session-tracker.js';
|
|
22
22
|
import { appendAudit, extractLastUserPrompt } from '../stats/audit.js';
|
|
23
|
+
import { logger, setDebugMode } from '../logger.js';
|
|
23
24
|
import { estimateCost, OPUS_PRICING } from '../pricing.js';
|
|
24
25
|
import { maybeMidSessionExtract } from '../learnings/extractor.js';
|
|
25
26
|
import { extractMentions, buildEntityContext, loadEntities } from '../brain/store.js';
|
|
@@ -325,6 +326,9 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
|
|
|
325
326
|
// fool Edit/Write into skipping the read-before-edit check or serve cached
|
|
326
327
|
// webfetch content fetched under the previous session's intent.
|
|
327
328
|
resetToolSessionState();
|
|
329
|
+
// Wire stderr-mirroring of log lines to the same flag the agent already
|
|
330
|
+
// uses to gate verbose console output. File writes happen regardless.
|
|
331
|
+
setDebugMode(!!config.debug);
|
|
328
332
|
const client = new ModelClient({
|
|
329
333
|
apiUrl: config.apiUrl,
|
|
330
334
|
chain: config.chain,
|
|
@@ -725,16 +729,12 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
|
|
|
725
729
|
kind: 'text_delta',
|
|
726
730
|
text: `\n*🗜 Auto-compacted: ~${(beforeTokens / 1000).toFixed(0)}K → ~${(afterTokens / 1000).toFixed(0)}K tokens (saved ${pct}%)*\n\n`,
|
|
727
731
|
});
|
|
728
|
-
|
|
729
|
-
console.error(`[franklin] History compacted: ~${afterTokens} tokens`);
|
|
730
|
-
}
|
|
732
|
+
logger.info(`[franklin] History compacted: ~${afterTokens} tokens`);
|
|
731
733
|
}
|
|
732
734
|
}
|
|
733
735
|
catch (compactErr) {
|
|
734
736
|
compactFailures++;
|
|
735
|
-
|
|
736
|
-
console.error(`[franklin] Compaction failed (${compactFailures}/3): ${compactErr.message}`);
|
|
737
|
-
}
|
|
737
|
+
logger.warn(`[franklin] Compaction failed (${compactFailures}/3): ${compactErr.message}`);
|
|
738
738
|
}
|
|
739
739
|
}
|
|
740
740
|
// Inject ultrathink instruction when mode is active
|
|
@@ -939,9 +939,7 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
|
|
|
939
939
|
const oldModel = config.model;
|
|
940
940
|
config.model = nextModel;
|
|
941
941
|
config.onModelChange?.(nextModel, 'system');
|
|
942
|
-
|
|
943
|
-
console.error(`[franklin] ${oldModel} returned empty — switching to ${nextModel}`);
|
|
944
|
-
}
|
|
942
|
+
logger.warn(`[franklin] ${oldModel} returned empty — switching to ${nextModel}`);
|
|
945
943
|
onEvent({ kind: 'text_delta', text: `\n*${oldModel} returned empty — switching to ${nextModel}*\n` });
|
|
946
944
|
continue;
|
|
947
945
|
}
|
|
@@ -973,9 +971,7 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
|
|
|
973
971
|
// ── Media size error recovery (strip images/PDFs + retry) ──
|
|
974
972
|
if (isMediaSizeError(errMsg) && recoveryAttempts < MAX_RECOVERY_ATTEMPTS) {
|
|
975
973
|
recoveryAttempts++;
|
|
976
|
-
|
|
977
|
-
console.error(`[franklin] Media too large — stripping and retrying (attempt ${recoveryAttempts})`);
|
|
978
|
-
}
|
|
974
|
+
logger.warn(`[franklin] Media too large — stripping and retrying (attempt ${recoveryAttempts})`);
|
|
979
975
|
const { history: stripped, stripped: didStrip } = stripMediaFromHistory(history);
|
|
980
976
|
if (didStrip) {
|
|
981
977
|
replaceHistory(history, stripped);
|
|
@@ -989,9 +985,7 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
|
|
|
989
985
|
// the prompt is too long, so we must compact regardless of our threshold estimate.
|
|
990
986
|
if (classified.category === 'context_limit' && recoveryAttempts < MAX_RECOVERY_ATTEMPTS) {
|
|
991
987
|
recoveryAttempts++;
|
|
992
|
-
|
|
993
|
-
console.error(`[franklin] Prompt too long — force compacting (attempt ${recoveryAttempts})`);
|
|
994
|
-
}
|
|
988
|
+
logger.warn(`[franklin] Prompt too long — force compacting (attempt ${recoveryAttempts})`);
|
|
995
989
|
onEvent({ kind: 'text_delta', text: '\n*Context limit hit — compacting conversation...*\n' });
|
|
996
990
|
const { history: compactedAgain } = await forceCompact(history, config.model, client, config.debug);
|
|
997
991
|
replaceHistory(history, compactedAgain);
|
|
@@ -1017,9 +1011,7 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
|
|
|
1017
1011
|
const continuationPrompt = buildContinuationPrompt();
|
|
1018
1012
|
history.push(continuationPrompt);
|
|
1019
1013
|
persistSessionMessage(continuationPrompt);
|
|
1020
|
-
|
|
1021
|
-
console.error(`[franklin] Stream timeout on ${resolvedModel} — auto-continuing with chunked-task prompt`);
|
|
1022
|
-
}
|
|
1014
|
+
logger.warn(`[franklin] Stream timeout on ${resolvedModel} — auto-continuing with chunked-task prompt`);
|
|
1023
1015
|
onEvent({
|
|
1024
1016
|
kind: 'text_delta',
|
|
1025
1017
|
text: '\n*Task too big for one streaming turn — auto-continuing with a smaller chunk...*\n',
|
|
@@ -1031,10 +1023,8 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
|
|
|
1031
1023
|
const costText = retryDecision.estimatedReplayCostUsd > 0
|
|
1032
1024
|
? ` and at least $${retryDecision.estimatedReplayCostUsd.toFixed(4)} in input charges`
|
|
1033
1025
|
: '';
|
|
1034
|
-
|
|
1035
|
-
|
|
1036
|
-
`~${tokenText} input tokens, replayCost=$${retryDecision.estimatedReplayCostUsd.toFixed(4)}`);
|
|
1037
|
-
}
|
|
1026
|
+
logger.warn(`[franklin] Timeout retry skipped for ${resolvedModel}: ` +
|
|
1027
|
+
`~${tokenText} input tokens, replayCost=$${retryDecision.estimatedReplayCostUsd.toFixed(4)}`);
|
|
1038
1028
|
onEvent({
|
|
1039
1029
|
kind: 'turn_done',
|
|
1040
1030
|
reason: 'error',
|
|
@@ -1079,9 +1069,7 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
|
|
|
1079
1069
|
}
|
|
1080
1070
|
recoveryAttempts++;
|
|
1081
1071
|
const backoffMs = getBackoffDelay(recoveryAttempts);
|
|
1082
|
-
|
|
1083
|
-
console.error(`[franklin] ${classified.label} error — retrying in ${(backoffMs / 1000).toFixed(1)}s (attempt ${recoveryAttempts}/${effectiveMaxRetries}): ${errMsg.slice(0, 100)}`);
|
|
1084
|
-
}
|
|
1072
|
+
logger.warn(`[franklin] ${classified.label} error — retrying in ${(backoffMs / 1000).toFixed(1)}s (attempt ${recoveryAttempts}/${effectiveMaxRetries}): ${errMsg.slice(0, 100)}`);
|
|
1085
1073
|
// Surface the actual error + model so the user can see which model
|
|
1086
1074
|
// is failing and what the upstream said. Old "Retrying after Server
|
|
1087
1075
|
// error" was uninformative — users couldn't tell whether to wait,
|
|
@@ -1249,9 +1237,7 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
|
|
|
1249
1237
|
if (maxTokensOverride === undefined) {
|
|
1250
1238
|
// First hit: escalate to 64K
|
|
1251
1239
|
maxTokensOverride = ESCALATED_MAX_TOKENS;
|
|
1252
|
-
|
|
1253
|
-
console.error(`[franklin] Max tokens hit — escalating to ${maxTokensOverride}`);
|
|
1254
|
-
}
|
|
1240
|
+
logger.warn(`[franklin] Max tokens hit — escalating to ${maxTokensOverride}`);
|
|
1255
1241
|
}
|
|
1256
1242
|
// Append what we got + a continuation prompt with last-line anchor
|
|
1257
1243
|
const partialAssistant = { role: 'assistant', content: responseParts };
|
|
@@ -1293,9 +1279,7 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
|
|
|
1293
1279
|
// the existing recovery flow handle it.
|
|
1294
1280
|
const gatewayErr = looksLikeGatewayErrorAsText(responseParts);
|
|
1295
1281
|
if (gatewayErr.match) {
|
|
1296
|
-
|
|
1297
|
-
console.error(`[franklin] Gateway returned an error text in lieu of an answer (${resolvedModel}): ${gatewayErr.message}`);
|
|
1298
|
-
}
|
|
1282
|
+
logger.error(`[franklin] Gateway returned an error text in lieu of an answer (${resolvedModel}): ${gatewayErr.message}`);
|
|
1299
1283
|
throw new Error(gatewayErr.message);
|
|
1300
1284
|
}
|
|
1301
1285
|
// Reset recovery counter on successful completion
|
|
@@ -1572,9 +1556,7 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
|
|
|
1572
1556
|
}
|
|
1573
1557
|
// Hard stop: if cap exceeded, force end this agent loop iteration
|
|
1574
1558
|
if (turnToolCalls >= MAX_TOOL_CALLS_PER_TURN) {
|
|
1575
|
-
|
|
1576
|
-
console.error(`[franklin] Tool call cap hit: ${turnToolCalls} calls this turn`);
|
|
1577
|
-
}
|
|
1559
|
+
logger.warn(`[franklin] Tool call cap hit: ${turnToolCalls} calls this turn`);
|
|
1578
1560
|
// Don't break — let the model respond one more time to summarize,
|
|
1579
1561
|
// but inject the stop signal above so it knows to finish up.
|
|
1580
1562
|
}
|
package/dist/agent/tool-guard.js
CHANGED
|
@@ -188,9 +188,23 @@ export class SessionToolGuard {
|
|
|
188
188
|
}
|
|
189
189
|
}
|
|
190
190
|
async beforeExecute(invocation, scope) {
|
|
191
|
-
// Hard-block tools that have failed too many times this session
|
|
191
|
+
// Hard-block tools that have failed too many times this session.
|
|
192
|
+
// Modal lifecycle tools are exempt: orphan sandboxes keep billing
|
|
193
|
+
// GPU time, and ModalTerminate is the only way to recover from
|
|
194
|
+
// agent-side. Auto-disabling it after 3 transient errors would
|
|
195
|
+
// strand a $0.40/hr H100 until the session ends. Same logic for
|
|
196
|
+
// media-gen tools: failures are usually transient (gateway hiccup,
|
|
197
|
+
// prompt rejection) and the user often wants to retry.
|
|
198
|
+
const FAILURE_EXEMPT = new Set([
|
|
199
|
+
'ImageGen',
|
|
200
|
+
'VideoGen',
|
|
201
|
+
'ModalCreate',
|
|
202
|
+
'ModalExec',
|
|
203
|
+
'ModalStatus',
|
|
204
|
+
'ModalTerminate',
|
|
205
|
+
]);
|
|
192
206
|
const errorCount = this.toolErrorCounts.get(invocation.name) ?? 0;
|
|
193
|
-
if (errorCount >= 3) {
|
|
207
|
+
if (errorCount >= 3 && !FAILURE_EXEMPT.has(invocation.name)) {
|
|
194
208
|
return {
|
|
195
209
|
output: `${invocation.name} has failed ${errorCount} times this session and is now disabled. ` +
|
|
196
210
|
'Tell the user what went wrong and suggest alternatives.',
|
package/dist/logger.d.ts
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
export type LogLevel = 'debug' | 'info' | 'warn' | 'error';
|
|
2
|
+
export declare function setDebugMode(enabled: boolean): void;
|
|
3
|
+
export declare function isDebugMode(): boolean;
|
|
4
|
+
export declare function getLogFilePath(): string;
|
|
5
|
+
export declare const logger: {
|
|
6
|
+
debug(msg: string): void;
|
|
7
|
+
info(msg: string): void;
|
|
8
|
+
warn(msg: string): void;
|
|
9
|
+
error(msg: string): void;
|
|
10
|
+
};
|
package/dist/logger.js
ADDED
|
@@ -0,0 +1,74 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Unified logger — always persists to ~/.blockrun/franklin-debug.log,
|
|
3
|
+
* optionally mirrors to stderr when debug mode is on.
|
|
4
|
+
*
|
|
5
|
+
* Why this exists: before this module, agent diagnostics were emitted with
|
|
6
|
+
* `if (config.debug) console.error(...)`. That meant `franklin logs` showed
|
|
7
|
+
* nothing in normal use because the events never hit the file. Now every
|
|
8
|
+
* level writes to disk; stderr mirroring is the opt-in part.
|
|
9
|
+
*
|
|
10
|
+
* Errors during a log write are swallowed — the agent loop must never die
|
|
11
|
+
* because the disk is full or the home dir is read-only.
|
|
12
|
+
*/
|
|
13
|
+
import fs from 'node:fs';
|
|
14
|
+
import path from 'node:path';
|
|
15
|
+
import { BLOCKRUN_DIR } from './config.js';
|
|
16
|
+
const LOG_FILE = path.join(BLOCKRUN_DIR, 'franklin-debug.log');
|
|
17
|
+
// Strip ANSI escapes + carriage returns so the log stays grep-able.
|
|
18
|
+
const ANSI_RE = /\x1b\[[0-9;]*m|\x1b\][^\x07]*\x07|\r/g;
|
|
19
|
+
let debugMode = false;
|
|
20
|
+
let dirEnsured = false;
|
|
21
|
+
export function setDebugMode(enabled) {
|
|
22
|
+
debugMode = enabled;
|
|
23
|
+
}
|
|
24
|
+
export function isDebugMode() {
|
|
25
|
+
return debugMode;
|
|
26
|
+
}
|
|
27
|
+
export function getLogFilePath() {
|
|
28
|
+
return LOG_FILE;
|
|
29
|
+
}
|
|
30
|
+
function ensureDir() {
|
|
31
|
+
if (dirEnsured)
|
|
32
|
+
return;
|
|
33
|
+
try {
|
|
34
|
+
fs.mkdirSync(BLOCKRUN_DIR, { recursive: true });
|
|
35
|
+
dirEnsured = true;
|
|
36
|
+
}
|
|
37
|
+
catch { /* readonly mount / disk full — keep trying so a remount recovers */ }
|
|
38
|
+
}
|
|
39
|
+
function writeFile(level, msg) {
|
|
40
|
+
ensureDir();
|
|
41
|
+
try {
|
|
42
|
+
const clean = msg.replace(ANSI_RE, '');
|
|
43
|
+
fs.appendFileSync(LOG_FILE, `[${new Date().toISOString()}] [${level.toUpperCase()}] ${clean}\n`);
|
|
44
|
+
}
|
|
45
|
+
catch { /* best-effort — never break the agent on log failure */ }
|
|
46
|
+
}
|
|
47
|
+
function writeStderr(msg) {
|
|
48
|
+
try {
|
|
49
|
+
process.stderr.write(msg + '\n');
|
|
50
|
+
}
|
|
51
|
+
catch { /* swallow */ }
|
|
52
|
+
}
|
|
53
|
+
export const logger = {
|
|
54
|
+
debug(msg) {
|
|
55
|
+
writeFile('debug', msg);
|
|
56
|
+
if (debugMode)
|
|
57
|
+
writeStderr(msg);
|
|
58
|
+
},
|
|
59
|
+
info(msg) {
|
|
60
|
+
writeFile('info', msg);
|
|
61
|
+
if (debugMode)
|
|
62
|
+
writeStderr(msg);
|
|
63
|
+
},
|
|
64
|
+
warn(msg) {
|
|
65
|
+
writeFile('warn', msg);
|
|
66
|
+
if (debugMode)
|
|
67
|
+
writeStderr(msg);
|
|
68
|
+
},
|
|
69
|
+
error(msg) {
|
|
70
|
+
writeFile('error', msg);
|
|
71
|
+
if (debugMode)
|
|
72
|
+
writeStderr(msg);
|
|
73
|
+
},
|
|
74
|
+
};
|
package/dist/stats/audit.d.ts
CHANGED
|
@@ -24,6 +24,12 @@ export interface AuditEntry {
|
|
|
24
24
|
routingTier?: string;
|
|
25
25
|
}
|
|
26
26
|
export declare function appendAudit(entry: AuditEntry): void;
|
|
27
|
+
/**
|
|
28
|
+
* Trim the audit log to the last MAX_AUDIT_ENTRIES lines if it has grown
|
|
29
|
+
* past the cap. Exported so admin/debug tooling (and tests) can force a
|
|
30
|
+
* compaction without waiting for the next interval probe.
|
|
31
|
+
*/
|
|
32
|
+
export declare function enforceRetention(): void;
|
|
27
33
|
export declare function getAuditFilePath(): string;
|
|
28
34
|
export declare function readAudit(): AuditEntry[];
|
|
29
35
|
/** Pull the last user message from a Dialogue history, flatten, and strip newlines. */
|
package/dist/stats/audit.js
CHANGED
|
@@ -13,6 +13,18 @@ import path from 'node:path';
|
|
|
13
13
|
import { BLOCKRUN_DIR } from '../config.js';
|
|
14
14
|
const AUDIT_FILE = path.join(BLOCKRUN_DIR, 'franklin-audit.jsonl');
|
|
15
15
|
const PROMPT_PREVIEW_CHARS = 240;
|
|
16
|
+
// Cap the audit log at the most recent N entries. Without this the file
|
|
17
|
+
// grew unbounded — verified ~3.6k lines on a single dev machine after a
|
|
18
|
+
// few weeks of light use, so a months-old install would be in the GB
|
|
19
|
+
// range and slow `franklin insights` to a crawl.
|
|
20
|
+
const MAX_AUDIT_ENTRIES = 10_000;
|
|
21
|
+
// Each entry is roughly 300–800 bytes. We only re-read the file when it
|
|
22
|
+
// looks plausibly over the cap, so we don't pay an O(n) scan on every
|
|
23
|
+
// append. 200 bytes/entry is a conservative lower bound.
|
|
24
|
+
const TRIM_PROBE_BYTES = MAX_AUDIT_ENTRIES * 200;
|
|
25
|
+
// Probe size every N appends — amortizes the stat() call.
|
|
26
|
+
const TRIM_CHECK_INTERVAL = 200;
|
|
27
|
+
let appendsSinceCheck = 0;
|
|
16
28
|
export function appendAudit(entry) {
|
|
17
29
|
try {
|
|
18
30
|
fs.mkdirSync(BLOCKRUN_DIR, { recursive: true });
|
|
@@ -21,11 +33,39 @@ export function appendAudit(entry) {
|
|
|
21
33
|
prompt: entry.prompt ? truncate(entry.prompt, PROMPT_PREVIEW_CHARS) : undefined,
|
|
22
34
|
};
|
|
23
35
|
fs.appendFileSync(AUDIT_FILE, JSON.stringify(safe) + '\n');
|
|
36
|
+
appendsSinceCheck++;
|
|
37
|
+
if (appendsSinceCheck >= TRIM_CHECK_INTERVAL) {
|
|
38
|
+
appendsSinceCheck = 0;
|
|
39
|
+
enforceRetention();
|
|
40
|
+
}
|
|
24
41
|
}
|
|
25
42
|
catch {
|
|
26
43
|
/* best-effort — never break the agent loop on audit-write failure */
|
|
27
44
|
}
|
|
28
45
|
}
|
|
46
|
+
/**
|
|
47
|
+
* Trim the audit log to the last MAX_AUDIT_ENTRIES lines if it has grown
|
|
48
|
+
* past the cap. Exported so admin/debug tooling (and tests) can force a
|
|
49
|
+
* compaction without waiting for the next interval probe.
|
|
50
|
+
*/
|
|
51
|
+
export function enforceRetention() {
|
|
52
|
+
try {
|
|
53
|
+
if (!fs.existsSync(AUDIT_FILE))
|
|
54
|
+
return;
|
|
55
|
+
const stat = fs.statSync(AUDIT_FILE);
|
|
56
|
+
if (stat.size < TRIM_PROBE_BYTES)
|
|
57
|
+
return;
|
|
58
|
+
const content = fs.readFileSync(AUDIT_FILE, 'utf-8');
|
|
59
|
+
const lines = content.split('\n').filter(Boolean);
|
|
60
|
+
if (lines.length <= MAX_AUDIT_ENTRIES)
|
|
61
|
+
return;
|
|
62
|
+
const kept = lines.slice(lines.length - MAX_AUDIT_ENTRIES);
|
|
63
|
+
fs.writeFileSync(AUDIT_FILE, kept.join('\n') + '\n');
|
|
64
|
+
}
|
|
65
|
+
catch {
|
|
66
|
+
/* best-effort */
|
|
67
|
+
}
|
|
68
|
+
}
|
|
29
69
|
export function getAuditFilePath() {
|
|
30
70
|
return AUDIT_FILE;
|
|
31
71
|
}
|
package/dist/stats/insights.d.ts
CHANGED
|
@@ -49,6 +49,25 @@ export interface InsightsReport {
|
|
|
49
49
|
avgRequestCostUsd: number;
|
|
50
50
|
/** Efficiency: cost per 1K tokens */
|
|
51
51
|
costPer1KTokens: number;
|
|
52
|
+
/**
|
|
53
|
+
* Cost breakdown by capability category. Lets the UI show a clean
|
|
54
|
+
* "where did your USDC go" split alongside the per-model bar list.
|
|
55
|
+
* - chat: LLM token-billed calls (anything with non-zero tokens)
|
|
56
|
+
* - media: ImageGen / VideoGen / MusicGen (per_image / per_second / per_track)
|
|
57
|
+
* - sandbox: Modal GPU sandbox lifecycle (create / exec / status / terminate)
|
|
58
|
+
*
|
|
59
|
+
* Categorization is by `model` name prefix:
|
|
60
|
+
* - `modal/*` → sandbox
|
|
61
|
+
* - rows with 0 input + 0 output tokens → media (image/video/music are
|
|
62
|
+
* stored with 0 tokens by recordUsage; modal/* matches first)
|
|
63
|
+
* - everything else → chat
|
|
64
|
+
*/
|
|
65
|
+
byCategory: {
|
|
66
|
+
chatCostUsd: number;
|
|
67
|
+
mediaCostUsd: number;
|
|
68
|
+
sandboxCostUsd: number;
|
|
69
|
+
sandboxRequests: number;
|
|
70
|
+
};
|
|
52
71
|
}
|
|
53
72
|
export declare function generateInsights(days?: number): InsightsReport;
|
|
54
73
|
export declare function formatInsights(report: InsightsReport, days: number): string;
|
package/dist/stats/insights.js
CHANGED
|
@@ -23,11 +23,28 @@ export function generateInsights(days = 30) {
|
|
|
23
23
|
let totalCost = 0;
|
|
24
24
|
let totalInput = 0;
|
|
25
25
|
let totalOutput = 0;
|
|
26
|
+
// Category totals — see InsightsReport.byCategory doc.
|
|
27
|
+
let chatCost = 0;
|
|
28
|
+
let mediaCost = 0;
|
|
29
|
+
let sandboxCost = 0;
|
|
30
|
+
let sandboxRequests = 0;
|
|
26
31
|
const modelAgg = new Map();
|
|
27
32
|
for (const r of windowHistory) {
|
|
28
33
|
totalCost += r.costUsd;
|
|
29
34
|
totalInput += r.inputTokens;
|
|
30
35
|
totalOutput += r.outputTokens;
|
|
36
|
+
// Categorize: modal/* always goes to sandbox; zero-token entries are
|
|
37
|
+
// media (image/video/music recordUsage stores 0/0 tokens); rest = chat.
|
|
38
|
+
if (r.model.startsWith('modal/')) {
|
|
39
|
+
sandboxCost += r.costUsd;
|
|
40
|
+
sandboxRequests++;
|
|
41
|
+
}
|
|
42
|
+
else if ((r.inputTokens + r.outputTokens) === 0) {
|
|
43
|
+
mediaCost += r.costUsd;
|
|
44
|
+
}
|
|
45
|
+
else {
|
|
46
|
+
chatCost += r.costUsd;
|
|
47
|
+
}
|
|
31
48
|
const existing = modelAgg.get(r.model) ?? {
|
|
32
49
|
requests: 0,
|
|
33
50
|
costUsd: 0,
|
|
@@ -101,6 +118,12 @@ export function generateInsights(days = 30) {
|
|
|
101
118
|
projections,
|
|
102
119
|
avgRequestCostUsd,
|
|
103
120
|
costPer1KTokens,
|
|
121
|
+
byCategory: {
|
|
122
|
+
chatCostUsd: chatCost,
|
|
123
|
+
mediaCostUsd: mediaCost,
|
|
124
|
+
sandboxCostUsd: sandboxCost,
|
|
125
|
+
sandboxRequests,
|
|
126
|
+
},
|
|
104
127
|
};
|
|
105
128
|
}
|
|
106
129
|
// ─── Format for Display ───────────────────────────────────────────────────
|
package/dist/tools/index.js
CHANGED
|
@@ -29,6 +29,7 @@ import { jupiterQuoteCapability, jupiterSwapCapability } from './jupiter.js';
|
|
|
29
29
|
import { base0xQuoteCapability, base0xSwapCapability } from './zerox-base.js';
|
|
30
30
|
import { base0xGaslessSwapCapability } from './zerox-gasless.js';
|
|
31
31
|
import { defiLlamaProtocolsCapability, defiLlamaProtocolCapability, defiLlamaChainsCapability, defiLlamaYieldsCapability, defiLlamaPriceCapability, } from './defillama.js';
|
|
32
|
+
import { modalCapabilities } from './modal.js';
|
|
32
33
|
import { createTradingCapabilities } from './trading-execute.js';
|
|
33
34
|
import { Portfolio } from '../trading/portfolio.js';
|
|
34
35
|
import { RiskEngine } from '../trading/risk.js';
|
|
@@ -158,6 +159,11 @@ export const allCapabilities = [
|
|
|
158
159
|
defiLlamaChainsCapability,
|
|
159
160
|
defiLlamaYieldsCapability,
|
|
160
161
|
defiLlamaPriceCapability,
|
|
162
|
+
// Modal GPU sandbox tools — registered but hidden by default (not in
|
|
163
|
+
// CORE_TOOL_NAMES). Agent must `ActivateTool({names:["ModalCreate",...]})`
|
|
164
|
+
// before they appear in its tool inventory. High-cost ($0.40/H100 create)
|
|
165
|
+
// operations should not be in the default surface.
|
|
166
|
+
...modalCapabilities, // ModalCreate, ModalExec, ModalStatus, ModalTerminate
|
|
161
167
|
];
|
|
162
168
|
export { readCapability, writeCapability, editCapability, bashCapability, globCapability, grepCapability, webFetchCapability, webSearchCapability, taskCapability, detachCapability, };
|
|
163
169
|
export { createSubAgentCapability } from './subagent.js';
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Modal Sandbox capabilities — spin up GPU/CPU compute on Modal Labs via the
|
|
3
|
+
* BlockRun gateway's x402-paid passthrough at /v1/modal/sandbox/{create, exec,
|
|
4
|
+
* status, terminate}. See https://modal.com/docs/guide/sandboxes for the
|
|
5
|
+
* underlying primitives.
|
|
6
|
+
*
|
|
7
|
+
* Pricing (per-call, USDC):
|
|
8
|
+
* create: $0.01 (CPU) / $0.05 (T4) / $0.08 (L4) / $0.10 (A10G) / $0.20 (A100) / $0.40 (H100)
|
|
9
|
+
* exec: $0.001
|
|
10
|
+
* status: $0.001
|
|
11
|
+
* terminate: $0.001
|
|
12
|
+
*
|
|
13
|
+
* Gateway constraints (probed 2026-05-02):
|
|
14
|
+
* - image is fixed at python:3.11 — no custom containers yet.
|
|
15
|
+
* - command is execve-style (string[]), not a shell string. We accept a
|
|
16
|
+
* plain string from the LLM and auto-wrap to ["sh","-c", string].
|
|
17
|
+
* - No stdin / env / workdir / streaming on exec — keep commands self-
|
|
18
|
+
* contained and idempotent.
|
|
19
|
+
* - No upload/download endpoints — files in/out via exec heredoc / curl.
|
|
20
|
+
*
|
|
21
|
+
* Lifecycle:
|
|
22
|
+
* ModalCreate → returns sandbox_id, charged at GPU tier
|
|
23
|
+
* ModalExec → sync, returns { stdout, stderr, exit_code }
|
|
24
|
+
* ModalStatus → check running/terminated
|
|
25
|
+
* ModalTerminate → release; called automatically at session end via
|
|
26
|
+
* the SessionSandboxTracker registry.
|
|
27
|
+
*/
|
|
28
|
+
import type { CapabilityHandler } from '../agent/types.js';
|
|
29
|
+
export interface SandboxRecord {
|
|
30
|
+
id: string;
|
|
31
|
+
gpu: string;
|
|
32
|
+
createdAt: number;
|
|
33
|
+
timeoutSeconds?: number;
|
|
34
|
+
}
|
|
35
|
+
declare class SessionSandboxTracker {
|
|
36
|
+
private sandboxes;
|
|
37
|
+
add(rec: SandboxRecord): void;
|
|
38
|
+
remove(id: string): void;
|
|
39
|
+
list(): SandboxRecord[];
|
|
40
|
+
/** Snapshot then clear — used by the session cleanup hook. */
|
|
41
|
+
drainIds(): string[];
|
|
42
|
+
}
|
|
43
|
+
export declare const sessionSandboxTracker: SessionSandboxTracker;
|
|
44
|
+
export declare const modalCreateCapability: CapabilityHandler;
|
|
45
|
+
export declare const modalExecCapability: CapabilityHandler;
|
|
46
|
+
export declare const modalStatusCapability: CapabilityHandler;
|
|
47
|
+
export declare const modalTerminateCapability: CapabilityHandler;
|
|
48
|
+
/**
|
|
49
|
+
* Terminate every sandbox the current session has created. Called from
|
|
50
|
+
* vscode-session.ts at session end (and the SessionToolGuard cleanup path)
|
|
51
|
+
* so a missed agent ModalTerminate doesn't leave Modal billing the user
|
|
52
|
+
* up to the per-sandbox timeout. Best-effort: failures are logged but
|
|
53
|
+
* don't block session shutdown.
|
|
54
|
+
*/
|
|
55
|
+
export declare function terminateAllSessionSandboxes(opts?: {
|
|
56
|
+
abortSignal?: AbortSignal;
|
|
57
|
+
}): Promise<{
|
|
58
|
+
attempted: number;
|
|
59
|
+
succeeded: number;
|
|
60
|
+
failed: Array<{
|
|
61
|
+
id: string;
|
|
62
|
+
error: string;
|
|
63
|
+
}>;
|
|
64
|
+
}>;
|
|
65
|
+
export declare const modalCapabilities: CapabilityHandler[];
|
|
66
|
+
export {};
|
|
@@ -0,0 +1,639 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Modal Sandbox capabilities — spin up GPU/CPU compute on Modal Labs via the
|
|
3
|
+
* BlockRun gateway's x402-paid passthrough at /v1/modal/sandbox/{create, exec,
|
|
4
|
+
* status, terminate}. See https://modal.com/docs/guide/sandboxes for the
|
|
5
|
+
* underlying primitives.
|
|
6
|
+
*
|
|
7
|
+
* Pricing (per-call, USDC):
|
|
8
|
+
* create: $0.01 (CPU) / $0.05 (T4) / $0.08 (L4) / $0.10 (A10G) / $0.20 (A100) / $0.40 (H100)
|
|
9
|
+
* exec: $0.001
|
|
10
|
+
* status: $0.001
|
|
11
|
+
* terminate: $0.001
|
|
12
|
+
*
|
|
13
|
+
* Gateway constraints (probed 2026-05-02):
|
|
14
|
+
* - image is fixed at python:3.11 — no custom containers yet.
|
|
15
|
+
* - command is execve-style (string[]), not a shell string. We accept a
|
|
16
|
+
* plain string from the LLM and auto-wrap to ["sh","-c", string].
|
|
17
|
+
* - No stdin / env / workdir / streaming on exec — keep commands self-
|
|
18
|
+
* contained and idempotent.
|
|
19
|
+
* - No upload/download endpoints — files in/out via exec heredoc / curl.
|
|
20
|
+
*
|
|
21
|
+
* Lifecycle:
|
|
22
|
+
* ModalCreate → returns sandbox_id, charged at GPU tier
|
|
23
|
+
* ModalExec → sync, returns { stdout, stderr, exit_code }
|
|
24
|
+
* ModalStatus → check running/terminated
|
|
25
|
+
* ModalTerminate → release; called automatically at session end via
|
|
26
|
+
* the SessionSandboxTracker registry.
|
|
27
|
+
*/
|
|
28
|
+
import { getOrCreateWallet, getOrCreateSolanaWallet, createPaymentPayload, createSolanaPaymentPayload, parsePaymentRequired, extractPaymentDetails, solanaKeyToBytes, SOLANA_NETWORK, } from '@blockrun/llm';
|
|
29
|
+
import { loadChain, API_URLS, VERSION } from '../config.js';
|
|
30
|
+
import { walletReservation } from '../wallet/reservation.js';
|
|
31
|
+
import { recordUsage } from '../stats/tracker.js';
|
|
32
|
+
// ─── Pricing table (probed from /.well-known/x402 + 402 responses) ─────────
|
|
33
|
+
const CREATE_PRICE_USD = {
|
|
34
|
+
cpu: 0.01,
|
|
35
|
+
T4: 0.05,
|
|
36
|
+
L4: 0.08,
|
|
37
|
+
A10G: 0.10,
|
|
38
|
+
A100: 0.20,
|
|
39
|
+
H100: 0.40,
|
|
40
|
+
};
|
|
41
|
+
const EXEC_PRICE_USD = 0.001;
|
|
42
|
+
const STATUS_PRICE_USD = 0.001;
|
|
43
|
+
const TERMINATE_PRICE_USD = 0.001;
|
|
44
|
+
const VALID_GPUS = new Set(Object.keys(CREATE_PRICE_USD).filter(g => g !== 'cpu'));
|
|
45
|
+
class SessionSandboxTracker {
|
|
46
|
+
sandboxes = new Map();
|
|
47
|
+
add(rec) {
|
|
48
|
+
this.sandboxes.set(rec.id, rec);
|
|
49
|
+
}
|
|
50
|
+
remove(id) {
|
|
51
|
+
this.sandboxes.delete(id);
|
|
52
|
+
}
|
|
53
|
+
list() {
|
|
54
|
+
return [...this.sandboxes.values()].sort((a, b) => b.createdAt - a.createdAt);
|
|
55
|
+
}
|
|
56
|
+
/** Snapshot then clear — used by the session cleanup hook. */
|
|
57
|
+
drainIds() {
|
|
58
|
+
const ids = [...this.sandboxes.keys()];
|
|
59
|
+
this.sandboxes.clear();
|
|
60
|
+
return ids;
|
|
61
|
+
}
|
|
62
|
+
}
|
|
63
|
+
export const sessionSandboxTracker = new SessionSandboxTracker();
|
|
64
|
+
// ─── x402 payment signing — same shape as imagegen's helper ───────────────
|
|
65
|
+
async function signPayment(response, chain, endpoint, resourceDescription) {
|
|
66
|
+
try {
|
|
67
|
+
const paymentHeader = await extractPaymentReq(response);
|
|
68
|
+
if (!paymentHeader)
|
|
69
|
+
return null;
|
|
70
|
+
if (chain === 'solana') {
|
|
71
|
+
const wallet = await getOrCreateSolanaWallet();
|
|
72
|
+
const paymentRequired = parsePaymentRequired(paymentHeader);
|
|
73
|
+
const details = extractPaymentDetails(paymentRequired, SOLANA_NETWORK);
|
|
74
|
+
const secretBytes = await solanaKeyToBytes(wallet.privateKey);
|
|
75
|
+
const feePayer = details.extra?.feePayer || details.recipient;
|
|
76
|
+
const payload = await createSolanaPaymentPayload(secretBytes, wallet.address, details.recipient, details.amount, feePayer, {
|
|
77
|
+
resourceUrl: details.resource?.url || endpoint,
|
|
78
|
+
resourceDescription: details.resource?.description || resourceDescription,
|
|
79
|
+
maxTimeoutSeconds: details.maxTimeoutSeconds || 300,
|
|
80
|
+
extra: details.extra,
|
|
81
|
+
});
|
|
82
|
+
return { 'PAYMENT-SIGNATURE': payload };
|
|
83
|
+
}
|
|
84
|
+
else {
|
|
85
|
+
const wallet = getOrCreateWallet();
|
|
86
|
+
const paymentRequired = parsePaymentRequired(paymentHeader);
|
|
87
|
+
const details = extractPaymentDetails(paymentRequired);
|
|
88
|
+
const payload = await createPaymentPayload(wallet.privateKey, wallet.address, details.recipient, details.amount, details.network || 'eip155:8453', {
|
|
89
|
+
resourceUrl: details.resource?.url || endpoint,
|
|
90
|
+
resourceDescription: details.resource?.description || resourceDescription,
|
|
91
|
+
maxTimeoutSeconds: details.maxTimeoutSeconds || 300,
|
|
92
|
+
extra: details.extra,
|
|
93
|
+
});
|
|
94
|
+
return { 'PAYMENT-SIGNATURE': payload };
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
catch (err) {
|
|
98
|
+
console.error(`[franklin] Modal payment error: ${err.message}`);
|
|
99
|
+
return null;
|
|
100
|
+
}
|
|
101
|
+
}
|
|
102
|
+
async function extractPaymentReq(response) {
|
|
103
|
+
let header = response.headers.get('payment-required');
|
|
104
|
+
if (!header) {
|
|
105
|
+
try {
|
|
106
|
+
const body = (await response.json());
|
|
107
|
+
if (body.x402 || body.accepts) {
|
|
108
|
+
header = btoa(JSON.stringify(body));
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
catch { /* ignore */ }
|
|
112
|
+
}
|
|
113
|
+
return header;
|
|
114
|
+
}
|
|
115
|
+
/**
|
|
116
|
+
* Generic POST-with-x402-retry helper used by all four Modal endpoints. The
|
|
117
|
+
* first POST gets a 402 with payment requirements; we sign and retry once
|
|
118
|
+
* with the X-PAYMENT header. Returns the parsed JSON body and the raw
|
|
119
|
+
* Response (callers may need status code).
|
|
120
|
+
*/
|
|
121
|
+
async function postWithPayment(endpoint, body, resourceDescription, abortSignal, timeoutMs) {
|
|
122
|
+
const chain = loadChain();
|
|
123
|
+
const headers = {
|
|
124
|
+
'Content-Type': 'application/json',
|
|
125
|
+
'User-Agent': `franklin/${VERSION}`,
|
|
126
|
+
};
|
|
127
|
+
const ctrl = new AbortController();
|
|
128
|
+
const onParentAbort = () => ctrl.abort();
|
|
129
|
+
abortSignal.addEventListener('abort', onParentAbort, { once: true });
|
|
130
|
+
const timer = setTimeout(() => ctrl.abort(), timeoutMs);
|
|
131
|
+
try {
|
|
132
|
+
const payload = JSON.stringify(body);
|
|
133
|
+
let response = await fetch(endpoint, { method: 'POST', signal: ctrl.signal, headers, body: payload });
|
|
134
|
+
if (response.status === 402) {
|
|
135
|
+
const paymentHeaders = await signPayment(response, chain, endpoint, resourceDescription);
|
|
136
|
+
if (!paymentHeaders) {
|
|
137
|
+
return { ok: false, status: 402, body: { error: 'payment signing failed' }, raw: '' };
|
|
138
|
+
}
|
|
139
|
+
response = await fetch(endpoint, {
|
|
140
|
+
method: 'POST',
|
|
141
|
+
signal: ctrl.signal,
|
|
142
|
+
headers: { ...headers, ...paymentHeaders },
|
|
143
|
+
body: payload,
|
|
144
|
+
});
|
|
145
|
+
}
|
|
146
|
+
const raw = await response.text().catch(() => '');
|
|
147
|
+
let parsed = {};
|
|
148
|
+
try {
|
|
149
|
+
parsed = raw ? JSON.parse(raw) : {};
|
|
150
|
+
}
|
|
151
|
+
catch { /* leave as {} */ }
|
|
152
|
+
return { ok: response.ok, status: response.status, body: parsed, raw };
|
|
153
|
+
}
|
|
154
|
+
finally {
|
|
155
|
+
clearTimeout(timer);
|
|
156
|
+
abortSignal.removeEventListener('abort', onParentAbort);
|
|
157
|
+
}
|
|
158
|
+
}
|
|
159
|
+
// ─── Helpers ─────────────────────────────────────────────────────────────
|
|
160
|
+
function modalEndpoint(path) {
|
|
161
|
+
const chain = loadChain();
|
|
162
|
+
return `${API_URLS[chain]}/v1/modal/sandbox/${path}`;
|
|
163
|
+
}
|
|
164
|
+
/**
|
|
165
|
+
* Normalize the agent's `command` input into the execve-style array Modal
|
|
166
|
+
* expects. LLMs frequently pass a shell string ("pip install torch && python
|
|
167
|
+
* train.py"); auto-wrap that into ["sh","-c", string] so the agent doesn't
|
|
168
|
+
* have to know the difference. Arrays are passed through verbatim.
|
|
169
|
+
*/
|
|
170
|
+
function normalizeCommand(input) {
|
|
171
|
+
if (Array.isArray(input)) {
|
|
172
|
+
if (input.every(x => typeof x === 'string') && input.length > 0) {
|
|
173
|
+
return input;
|
|
174
|
+
}
|
|
175
|
+
return null;
|
|
176
|
+
}
|
|
177
|
+
if (typeof input === 'string' && input.trim().length > 0) {
|
|
178
|
+
return ['sh', '-c', input];
|
|
179
|
+
}
|
|
180
|
+
return null;
|
|
181
|
+
}
|
|
182
|
+
function fmtUsd(n) {
|
|
183
|
+
if (n < 0.01)
|
|
184
|
+
return `$${n.toFixed(4)}`;
|
|
185
|
+
return `$${n.toFixed(2)}`;
|
|
186
|
+
}
|
|
187
|
+
export const modalCreateCapability = {
|
|
188
|
+
spec: {
|
|
189
|
+
name: 'ModalCreate',
|
|
190
|
+
description: 'Create a Modal Python 3.11 sandbox (CPU or GPU) via the BlockRun gateway. ' +
|
|
191
|
+
'Returns a sandbox_id you pass to ModalExec. Charged once per create at the ' +
|
|
192
|
+
'GPU tier price: CPU $0.01, T4 $0.05, L4 $0.08, A10G $0.10, A100 $0.20, H100 $0.40. ' +
|
|
193
|
+
'IMPORTANT — current limitations (BlockRun gateway is in early-access for sandboxes):\n' +
|
|
194
|
+
' - sandbox lifetime: 5 minutes MAX (gateway hard-cap, regardless of GPU tier)\n' +
|
|
195
|
+
' - per ModalExec call: 60 seconds MAX wall-clock\n' +
|
|
196
|
+
' - Python 3.11 only, no custom images yet\n' +
|
|
197
|
+
' - 1 vCPU, 1 GiB RAM defaults\n' +
|
|
198
|
+
' - GPU access is preview-tier (officially "coming later" in docs)\n' +
|
|
199
|
+
' - No setup-time provisioning — every sandbox starts empty\n' +
|
|
200
|
+
'These limits make this tool suitable for: GPU benchmarks (nvidia-smi, matmul), ' +
|
|
201
|
+
'small model inference (≤3B params if weights pre-cached), CUDA kernel validation, ' +
|
|
202
|
+
'short ad-hoc Python tasks. NOT suitable for: full LoRA / fine-tuning runs, ' +
|
|
203
|
+
'pip install + model download + training (pip alone burns 1-2 min of the 5-min budget). ' +
|
|
204
|
+
'Custom images + longer lifetime + GPU production tier are documented as "coming later" ' +
|
|
205
|
+
'by BlockRun — for serious ML workloads tell the user to use Modal directly until then. ' +
|
|
206
|
+
'Always call ModalTerminate when done. ' +
|
|
207
|
+
'Long-running command pattern: each ModalExec call is itself capped at 60s wall-clock. ' +
|
|
208
|
+
'For work that takes >60s (pip install, model download, training), use the ' +
|
|
209
|
+
'fire-and-poll pattern: ModalExec(["sh","-c","nohup <cmd> > /workspace/log 2>&1 &"]) ' +
|
|
210
|
+
'returns in <1s, then poll with subsequent ModalExec(["cat","/workspace/log"]) calls.',
|
|
211
|
+
input_schema: {
|
|
212
|
+
type: 'object',
|
|
213
|
+
properties: {
|
|
214
|
+
gpu: { type: 'string', description: 'GPU tier. One of T4, L4, A10G, A100, H100. Omit for CPU-only ($0.01).' },
|
|
215
|
+
timeout: { type: 'number', description: 'Lifetime cap in seconds. Default + Max = 300 (5 min). Gateway rejects values > 300 with HTTP 400.' },
|
|
216
|
+
cpu: { type: 'number', description: 'Number of CPU cores. Default 0.125, max 8.' },
|
|
217
|
+
memory: { type: 'number', description: 'Memory MB. Default 128, max 32768.' },
|
|
218
|
+
},
|
|
219
|
+
},
|
|
220
|
+
},
|
|
221
|
+
concurrent: false,
|
|
222
|
+
async execute(input, ctx) {
|
|
223
|
+
const raw = input;
|
|
224
|
+
// ── Client-side coercion ────────────────────────────────────────────
|
|
225
|
+
// LLMs routinely pass numeric fields as strings ("timeout":"300") and
|
|
226
|
+
// GPU tier in lowercase ("t4"). The gateway's schema is strict and
|
|
227
|
+
// 400s on either, leaving the agent confused (it sees "Invalid
|
|
228
|
+
// request body" with no actionable hint). Fix the obvious mistakes
|
|
229
|
+
// before they leave the client.
|
|
230
|
+
let gpu = raw.gpu;
|
|
231
|
+
if (typeof gpu === 'string') {
|
|
232
|
+
const matched = [...VALID_GPUS].find(g => g.toLowerCase() === gpu.toLowerCase());
|
|
233
|
+
if (matched)
|
|
234
|
+
gpu = matched;
|
|
235
|
+
}
|
|
236
|
+
if (gpu && !VALID_GPUS.has(gpu)) {
|
|
237
|
+
return {
|
|
238
|
+
output: `Error: invalid gpu "${gpu}". Allowed: ${[...VALID_GPUS].join(', ')} (or omit for CPU).`,
|
|
239
|
+
isError: true,
|
|
240
|
+
};
|
|
241
|
+
}
|
|
242
|
+
const tier = gpu ?? 'cpu';
|
|
243
|
+
const price = CREATE_PRICE_USD[tier];
|
|
244
|
+
// Coerce numeric fields. Reject NaN explicitly so we don't ship
|
|
245
|
+
// garbage to the gateway.
|
|
246
|
+
const coerceNum = (v, name) => {
|
|
247
|
+
if (v === undefined || v === null || v === '')
|
|
248
|
+
return undefined;
|
|
249
|
+
const n = typeof v === 'string' ? Number(v) : v;
|
|
250
|
+
if (typeof n !== 'number' || !Number.isFinite(n)) {
|
|
251
|
+
return { error: `${name} must be a number, got ${typeof v}: ${JSON.stringify(v)}` };
|
|
252
|
+
}
|
|
253
|
+
return n;
|
|
254
|
+
};
|
|
255
|
+
const timeoutCoerced = coerceNum(raw.timeout, 'timeout');
|
|
256
|
+
const cpuCoerced = coerceNum(raw.cpu, 'cpu');
|
|
257
|
+
const memoryCoerced = coerceNum(raw.memory, 'memory');
|
|
258
|
+
for (const c of [timeoutCoerced, cpuCoerced, memoryCoerced]) {
|
|
259
|
+
if (c && typeof c === 'object' && 'error' in c) {
|
|
260
|
+
return { output: `Error: ${c.error}`, isError: true };
|
|
261
|
+
}
|
|
262
|
+
}
|
|
263
|
+
// Gateway hard-caps sandbox lifetime at 300s. Cap client-side so we
|
|
264
|
+
// surface a clear error instead of letting the user pay $0.20 for a
|
|
265
|
+
// create that 400s on the wire.
|
|
266
|
+
const CREATE_TIMEOUT_MAX = 300;
|
|
267
|
+
if (typeof timeoutCoerced === 'number' && timeoutCoerced > CREATE_TIMEOUT_MAX) {
|
|
268
|
+
return {
|
|
269
|
+
output: `Error: timeout ${timeoutCoerced}s exceeds gateway max of ${CREATE_TIMEOUT_MAX}s. ` +
|
|
270
|
+
`BlockRun caps Modal sandbox lifetime at 5 minutes regardless of GPU tier. ` +
|
|
271
|
+
`For longer workloads, the work must be split across multiple sandboxes ` +
|
|
272
|
+
`(checkpoint + reload) or you need to ask BlockRun to lift this cap.`,
|
|
273
|
+
isError: true,
|
|
274
|
+
};
|
|
275
|
+
}
|
|
276
|
+
// ── AskUser cost preview (skipped if env auto-approve or non-UI mode) ──
|
|
277
|
+
const autoApprove = process.env.FRANKLIN_MEDIA_AUTO_APPROVE_ALL === '1';
|
|
278
|
+
if (ctx.onAskUser && !autoApprove) {
|
|
279
|
+
const timeoutSec = raw.timeout ?? 300;
|
|
280
|
+
const lines = [
|
|
281
|
+
`Create Modal sandbox?`,
|
|
282
|
+
``,
|
|
283
|
+
` Tier: ${tier === 'cpu' ? 'CPU only' : `GPU ${tier}`}`,
|
|
284
|
+
` Image: python:3.11`,
|
|
285
|
+
` Timeout: ${timeoutSec}s (${(timeoutSec / 60).toFixed(1)} min)`,
|
|
286
|
+
...(raw.cpu ? [` CPU cores: ${raw.cpu}`] : []),
|
|
287
|
+
...(raw.memory ? [` Memory: ${raw.memory} MB`] : []),
|
|
288
|
+
``,
|
|
289
|
+
`Create cost: ${fmtUsd(price)} (one-time)`,
|
|
290
|
+
`Each exec: ${fmtUsd(EXEC_PRICE_USD)}`,
|
|
291
|
+
`Terminate: ${fmtUsd(TERMINATE_PRICE_USD)}`,
|
|
292
|
+
];
|
|
293
|
+
try {
|
|
294
|
+
const answer = await ctx.onAskUser(lines.join('\n'), ['Approve', 'Cancel']);
|
|
295
|
+
if (answer !== 'Approve') {
|
|
296
|
+
return { output: '## Sandbox creation cancelled\n\nNo USDC was spent.' };
|
|
297
|
+
}
|
|
298
|
+
}
|
|
299
|
+
catch {
|
|
300
|
+
// askUser failed (UI gone) — fall through and create. Better than
|
|
301
|
+
// silently aborting in headless contexts.
|
|
302
|
+
}
|
|
303
|
+
}
|
|
304
|
+
// Wallet reservation — block over-spend if other in-flight calls hold balance.
|
|
305
|
+
let reservation = null;
|
|
306
|
+
try {
|
|
307
|
+
reservation = await walletReservation.hold(price);
|
|
308
|
+
if (!reservation) {
|
|
309
|
+
return {
|
|
310
|
+
output: `Insufficient USDC for ModalCreate (${tier}, ~${fmtUsd(price)}). ` +
|
|
311
|
+
`Other in-flight paid calls may be holding your balance — wait or fund the wallet.`,
|
|
312
|
+
isError: true,
|
|
313
|
+
};
|
|
314
|
+
}
|
|
315
|
+
}
|
|
316
|
+
catch { /* fall through, x402 will surface real error */ }
|
|
317
|
+
try {
|
|
318
|
+
const body = {};
|
|
319
|
+
if (gpu)
|
|
320
|
+
body.gpu = gpu;
|
|
321
|
+
if (typeof timeoutCoerced === 'number')
|
|
322
|
+
body.timeout = timeoutCoerced;
|
|
323
|
+
if (typeof cpuCoerced === 'number')
|
|
324
|
+
body.cpu = cpuCoerced;
|
|
325
|
+
if (typeof memoryCoerced === 'number')
|
|
326
|
+
body.memory = memoryCoerced;
|
|
327
|
+
const res = await postWithPayment(modalEndpoint('create'), body, 'Franklin Modal sandbox create', ctx.abortSignal, 90_000);
|
|
328
|
+
if (!res.ok) {
|
|
329
|
+
const err = res.body.error ? String(res.body.error) : res.raw.slice(0, 300);
|
|
330
|
+
// Surface the per-field validation issues — usually the
|
|
331
|
+
// actionable bit ("expected number, received string at path
|
|
332
|
+
// ['timeout']").
|
|
333
|
+
const details = Array.isArray(res.body.details)
|
|
334
|
+
? '\nDetails: ' + res.body.details.map((d) => `${d.path?.join('.') ?? '?'}: ${d.message ?? JSON.stringify(d)}`).join('; ')
|
|
335
|
+
: '';
|
|
336
|
+
return {
|
|
337
|
+
output: `ModalCreate failed (${res.status}): ${err}${details}`,
|
|
338
|
+
isError: true,
|
|
339
|
+
};
|
|
340
|
+
}
|
|
341
|
+
const sandboxId = (typeof res.body.sandbox_id === 'string' && res.body.sandbox_id) ||
|
|
342
|
+
(typeof res.body.id === 'string' && res.body.id) ||
|
|
343
|
+
'';
|
|
344
|
+
if (!sandboxId) {
|
|
345
|
+
return {
|
|
346
|
+
output: `ModalCreate returned no sandbox_id. Raw: ${res.raw.slice(0, 300)}`,
|
|
347
|
+
isError: true,
|
|
348
|
+
};
|
|
349
|
+
}
|
|
350
|
+
sessionSandboxTracker.add({
|
|
351
|
+
id: sandboxId,
|
|
352
|
+
gpu: tier,
|
|
353
|
+
createdAt: Date.now(),
|
|
354
|
+
timeoutSeconds: raw.timeout ?? 300,
|
|
355
|
+
});
|
|
356
|
+
// Stats — surface Modal usage in `franklin insights` like other paid tools.
|
|
357
|
+
try {
|
|
358
|
+
recordUsage(`modal/${tier}`, 0, 0, price, 0);
|
|
359
|
+
}
|
|
360
|
+
catch { /* ignore */ }
|
|
361
|
+
return {
|
|
362
|
+
output: `Sandbox created\n` +
|
|
363
|
+
`- id: \`${sandboxId}\`\n` +
|
|
364
|
+
`- tier: ${tier === 'cpu' ? 'CPU only' : `GPU ${tier}`}\n` +
|
|
365
|
+
`- timeout: ${raw.timeout ?? 300}s\n` +
|
|
366
|
+
`- charged: ${fmtUsd(price)}\n\n` +
|
|
367
|
+
`Next: ModalExec({ sandbox_id: "${sandboxId}", command: ["python","-c","print(1)"] })`,
|
|
368
|
+
};
|
|
369
|
+
}
|
|
370
|
+
finally {
|
|
371
|
+
walletReservation.release(reservation);
|
|
372
|
+
}
|
|
373
|
+
},
|
|
374
|
+
};
|
|
375
|
+
export const modalExecCapability = {
|
|
376
|
+
spec: {
|
|
377
|
+
name: 'ModalExec',
|
|
378
|
+
description: 'Run a command inside a Modal sandbox (must already exist via ModalCreate). ' +
|
|
379
|
+
'`command` accepts either an execve-style array (e.g. ["python","-c","print(1)"]) ' +
|
|
380
|
+
'or a shell string (e.g. "pip install torch && python train.py") which is auto-wrapped ' +
|
|
381
|
+
'as ["sh","-c", <string>]. Returns stdout, stderr, exit_code synchronously. ' +
|
|
382
|
+
'Each call charges $0.001. The sandbox keeps state across exec calls (filesystem, ' +
|
|
383
|
+
'installed pip packages, etc) until ModalTerminate. ' +
|
|
384
|
+
'CRITICAL: timeout is HARD-CAPPED at 60 seconds by the gateway — anything longer ' +
|
|
385
|
+
'returns HTTP 400. For long-running commands (pip install large packages, model ' +
|
|
386
|
+
'downloads, training loops), use the fire-and-poll pattern: ' +
|
|
387
|
+
' exec1: ["sh","-c","nohup <slow-cmd> > /workspace/log 2>&1 & echo $! > /workspace/pid"] (<1s) ' +
|
|
388
|
+
' exec2: ["sh","-c","tail -50 /workspace/log"] (poll progress, <1s) ' +
|
|
389
|
+
' exec3: ["sh","-c","kill -0 $(cat /workspace/pid) 2>/dev/null && echo RUN || echo DONE"] (check live) ' +
|
|
390
|
+
'This decouples actual work duration from the per-exec 60s ceiling, but the sandbox ' +
|
|
391
|
+
'itself still dies at 300s wall-clock — total useful work fits in ~5 minutes.',
|
|
392
|
+
input_schema: {
|
|
393
|
+
type: 'object',
|
|
394
|
+
properties: {
|
|
395
|
+
sandbox_id: { type: 'string', description: 'Sandbox id from ModalCreate.' },
|
|
396
|
+
command: {
|
|
397
|
+
description: 'Execve-style array OR shell string. Strings are wrapped as ["sh","-c", string].',
|
|
398
|
+
},
|
|
399
|
+
timeout: { type: 'number', description: 'Per-exec timeout in seconds. Default 60, MAX 60 (gateway hard cap). Use fire-and-poll for longer work.' },
|
|
400
|
+
},
|
|
401
|
+
required: ['sandbox_id', 'command'],
|
|
402
|
+
},
|
|
403
|
+
},
|
|
404
|
+
concurrent: false,
|
|
405
|
+
async execute(input, ctx) {
|
|
406
|
+
const raw = input;
|
|
407
|
+
if (!raw.sandbox_id)
|
|
408
|
+
return { output: 'Error: sandbox_id is required', isError: true };
|
|
409
|
+
const command = normalizeCommand(raw.command);
|
|
410
|
+
if (!command) {
|
|
411
|
+
// JSON.stringify(undefined) returns undefined — guard the slice call.
|
|
412
|
+
const got = raw.command === undefined
|
|
413
|
+
? 'undefined (missing)'
|
|
414
|
+
: JSON.stringify(raw.command);
|
|
415
|
+
return {
|
|
416
|
+
output: `Error: invalid command. Expected a non-empty string or string[] of length >= 1. ` +
|
|
417
|
+
`Got: ${(got ?? 'undefined').slice(0, 100)}`,
|
|
418
|
+
isError: true,
|
|
419
|
+
};
|
|
420
|
+
}
|
|
421
|
+
let reservation = null;
|
|
422
|
+
try {
|
|
423
|
+
reservation = await walletReservation.hold(EXEC_PRICE_USD);
|
|
424
|
+
// For micro-cost calls don't hard-block on insufficient — just proceed.
|
|
425
|
+
}
|
|
426
|
+
catch { /* ignore */ }
|
|
427
|
+
try {
|
|
428
|
+
// Same string-as-number guard as ModalCreate. LLMs love
|
|
429
|
+
// "timeout":"300".
|
|
430
|
+
let coercedTimeout;
|
|
431
|
+
if (raw.timeout !== undefined && raw.timeout !== null && raw.timeout !== '') {
|
|
432
|
+
const n = typeof raw.timeout === 'string' ? Number(raw.timeout) : raw.timeout;
|
|
433
|
+
if (typeof n === 'number' && Number.isFinite(n))
|
|
434
|
+
coercedTimeout = n;
|
|
435
|
+
}
|
|
436
|
+
// Gateway hard-caps exec timeout at 60s. Cap client-side so we
|
|
437
|
+
// never burn an x402 round-trip on a 400. Default to 60s if
|
|
438
|
+
// unset since "I want it to actually run" is a more sensible
|
|
439
|
+
// default than the lib's smaller value.
|
|
440
|
+
const EXEC_TIMEOUT_MAX = 60;
|
|
441
|
+
if (coercedTimeout === undefined || coercedTimeout > EXEC_TIMEOUT_MAX) {
|
|
442
|
+
coercedTimeout = EXEC_TIMEOUT_MAX;
|
|
443
|
+
}
|
|
444
|
+
const body = {
|
|
445
|
+
sandbox_id: raw.sandbox_id,
|
|
446
|
+
command,
|
|
447
|
+
};
|
|
448
|
+
if (coercedTimeout !== undefined)
|
|
449
|
+
body.timeout = coercedTimeout;
|
|
450
|
+
const res = await postWithPayment(modalEndpoint('exec'), body, 'Franklin Modal sandbox exec', ctx.abortSignal, Math.max(30_000, ((coercedTimeout ?? 300) + 30) * 1000));
|
|
451
|
+
if (!res.ok) {
|
|
452
|
+
// 400 here usually means the agent built the wrong shape (bad
|
|
453
|
+
// sandbox_id, malformed command). Dump the full raw body so the
|
|
454
|
+
// agent can see exactly what the gateway complained about and
|
|
455
|
+
// self-correct on the next turn instead of looping blind.
|
|
456
|
+
const err = res.body.error ? String(res.body.error) : '(no error field)';
|
|
457
|
+
const details = res.body.details ? `\nDetails: ${JSON.stringify(res.body.details)}` : '';
|
|
458
|
+
const raw = res.raw.length > 500 ? res.raw.slice(0, 500) + '…' : res.raw;
|
|
459
|
+
return {
|
|
460
|
+
output: `ModalExec failed (${res.status}): ${err}${details}\n` +
|
|
461
|
+
`Raw response: ${raw}\n` +
|
|
462
|
+
`Sent: command=${JSON.stringify(command).slice(0, 200)}`,
|
|
463
|
+
isError: true,
|
|
464
|
+
};
|
|
465
|
+
}
|
|
466
|
+
const stdout = typeof res.body.stdout === 'string' ? res.body.stdout : '';
|
|
467
|
+
const stderr = typeof res.body.stderr === 'string' ? res.body.stderr : '';
|
|
468
|
+
// Gateway field shape isn't 100% pinned — accept exit_code, exitCode,
|
|
469
|
+
// returncode, code (in priority order). If NONE of them are present
|
|
470
|
+
// but stdout/stderr came back, treat as success (exit 0) rather than
|
|
471
|
+
// poisoning the failure counter on a healthy run with an unfamiliar
|
|
472
|
+
// response shape.
|
|
473
|
+
const rawExit = typeof res.body.exit_code === 'number' ? res.body.exit_code :
|
|
474
|
+
typeof res.body.exitCode === 'number' ? res.body.exitCode :
|
|
475
|
+
typeof res.body.returncode === 'number' ? res.body.returncode :
|
|
476
|
+
typeof res.body.code === 'number' ? res.body.code :
|
|
477
|
+
null;
|
|
478
|
+
const hasAnyOutput = stdout.length > 0 || stderr.length > 0;
|
|
479
|
+
const exitCode = rawExit !== null ? rawExit : (hasAnyOutput ? 0 : -1);
|
|
480
|
+
try {
|
|
481
|
+
recordUsage('modal/exec', 0, 0, EXEC_PRICE_USD, 0);
|
|
482
|
+
}
|
|
483
|
+
catch { /* ignore */ }
|
|
484
|
+
const summary = `exit ${exitCode}` + (rawExit === null ? ' (inferred — no exit_code field in response)' : '');
|
|
485
|
+
const sections = [
|
|
486
|
+
`\`${command.join(' ')}\` → ${summary}`,
|
|
487
|
+
];
|
|
488
|
+
if (stdout)
|
|
489
|
+
sections.push(`--- stdout ---\n${stdout}`);
|
|
490
|
+
if (stderr)
|
|
491
|
+
sections.push(`--- stderr ---\n${stderr}`);
|
|
492
|
+
// Only mark as error when we have a real non-zero exit code OR
|
|
493
|
+
// we have nothing at all (no stdout / stderr / exit_code) which
|
|
494
|
+
// suggests an actual problem rather than a parsing edge case.
|
|
495
|
+
const isError = rawExit !== null ? rawExit !== 0 : !hasAnyOutput;
|
|
496
|
+
return { output: sections.join('\n\n'), isError };
|
|
497
|
+
}
|
|
498
|
+
finally {
|
|
499
|
+
walletReservation.release(reservation);
|
|
500
|
+
}
|
|
501
|
+
},
|
|
502
|
+
};
|
|
503
|
+
// ─── ModalStatus ─────────────────────────────────────────────────────────
|
|
504
|
+
export const modalStatusCapability = {
|
|
505
|
+
spec: {
|
|
506
|
+
name: 'ModalStatus',
|
|
507
|
+
description: 'Check the status of a Modal sandbox (running / terminated). Charges $0.001. ' +
|
|
508
|
+
'Useful when you suspect a sandbox died or you want to confirm a previous ' +
|
|
509
|
+
'ModalTerminate succeeded.',
|
|
510
|
+
input_schema: {
|
|
511
|
+
type: 'object',
|
|
512
|
+
properties: {
|
|
513
|
+
sandbox_id: { type: 'string' },
|
|
514
|
+
},
|
|
515
|
+
required: ['sandbox_id'],
|
|
516
|
+
},
|
|
517
|
+
},
|
|
518
|
+
concurrent: false,
|
|
519
|
+
async execute(input, ctx) {
|
|
520
|
+
const sandbox_id = input.sandbox_id;
|
|
521
|
+
if (!sandbox_id)
|
|
522
|
+
return { output: 'Error: sandbox_id is required', isError: true };
|
|
523
|
+
let reservation = null;
|
|
524
|
+
try {
|
|
525
|
+
reservation = await walletReservation.hold(STATUS_PRICE_USD);
|
|
526
|
+
}
|
|
527
|
+
catch { /* ignore */ }
|
|
528
|
+
try {
|
|
529
|
+
const res = await postWithPayment(modalEndpoint('status'), { sandbox_id }, 'Franklin Modal sandbox status', ctx.abortSignal, 30_000);
|
|
530
|
+
if (!res.ok) {
|
|
531
|
+
const err = res.body.error ? String(res.body.error) : res.raw.slice(0, 300);
|
|
532
|
+
return { output: `ModalStatus failed (${res.status}): ${err}`, isError: true };
|
|
533
|
+
}
|
|
534
|
+
try {
|
|
535
|
+
recordUsage('modal/status', 0, 0, STATUS_PRICE_USD, 0);
|
|
536
|
+
}
|
|
537
|
+
catch { /* ignore */ }
|
|
538
|
+
const status = res.body.status || 'unknown';
|
|
539
|
+
const extra = JSON.stringify(res.body, null, 2);
|
|
540
|
+
return { output: `Sandbox \`${sandbox_id}\` status: **${status}**\n\n${extra}` };
|
|
541
|
+
}
|
|
542
|
+
finally {
|
|
543
|
+
walletReservation.release(reservation);
|
|
544
|
+
}
|
|
545
|
+
},
|
|
546
|
+
};
|
|
547
|
+
// ─── ModalTerminate ──────────────────────────────────────────────────────
|
|
548
|
+
export const modalTerminateCapability = {
|
|
549
|
+
spec: {
|
|
550
|
+
name: 'ModalTerminate',
|
|
551
|
+
description: 'Terminate a Modal sandbox and release its resources. Charges $0.001. ' +
|
|
552
|
+
'Strongly recommended after every successful ModalExec sequence — ' +
|
|
553
|
+
'Modal bills wall-clock GPU time until the sandbox terminates or hits ' +
|
|
554
|
+
'its `timeout`. Session-end auto-cleanup also calls this for any sandboxes ' +
|
|
555
|
+
'the agent forgot, but explicit is better.',
|
|
556
|
+
input_schema: {
|
|
557
|
+
type: 'object',
|
|
558
|
+
properties: {
|
|
559
|
+
sandbox_id: { type: 'string' },
|
|
560
|
+
},
|
|
561
|
+
required: ['sandbox_id'],
|
|
562
|
+
},
|
|
563
|
+
},
|
|
564
|
+
concurrent: false,
|
|
565
|
+
async execute(input, ctx) {
|
|
566
|
+
const sandbox_id = input.sandbox_id;
|
|
567
|
+
if (!sandbox_id)
|
|
568
|
+
return { output: 'Error: sandbox_id is required', isError: true };
|
|
569
|
+
let reservation = null;
|
|
570
|
+
try {
|
|
571
|
+
reservation = await walletReservation.hold(TERMINATE_PRICE_USD);
|
|
572
|
+
}
|
|
573
|
+
catch { /* ignore */ }
|
|
574
|
+
try {
|
|
575
|
+
const res = await postWithPayment(modalEndpoint('terminate'), { sandbox_id }, 'Franklin Modal sandbox terminate', ctx.abortSignal, 30_000);
|
|
576
|
+
// Always remove from tracker — even on failure, retrying is wasteful.
|
|
577
|
+
sessionSandboxTracker.remove(sandbox_id);
|
|
578
|
+
if (!res.ok) {
|
|
579
|
+
const err = res.body.error ? String(res.body.error) : res.raw.slice(0, 300);
|
|
580
|
+
return {
|
|
581
|
+
output: `ModalTerminate returned ${res.status}: ${err}\n\n` +
|
|
582
|
+
`(Removed from local tracker regardless. Modal-side cleanup will happen at the timeout.)`,
|
|
583
|
+
isError: res.status >= 500, // 4xx (e.g. already-terminated) is benign
|
|
584
|
+
};
|
|
585
|
+
}
|
|
586
|
+
try {
|
|
587
|
+
recordUsage('modal/terminate', 0, 0, TERMINATE_PRICE_USD, 0);
|
|
588
|
+
}
|
|
589
|
+
catch { /* ignore */ }
|
|
590
|
+
return { output: `Sandbox \`${sandbox_id}\` terminated.` };
|
|
591
|
+
}
|
|
592
|
+
finally {
|
|
593
|
+
walletReservation.release(reservation);
|
|
594
|
+
}
|
|
595
|
+
},
|
|
596
|
+
};
|
|
597
|
+
// ─── Bulk session cleanup ────────────────────────────────────────────────
|
|
598
|
+
/**
|
|
599
|
+
* Terminate every sandbox the current session has created. Called from
|
|
600
|
+
* vscode-session.ts at session end (and the SessionToolGuard cleanup path)
|
|
601
|
+
* so a missed agent ModalTerminate doesn't leave Modal billing the user
|
|
602
|
+
* up to the per-sandbox timeout. Best-effort: failures are logged but
|
|
603
|
+
* don't block session shutdown.
|
|
604
|
+
*/
|
|
605
|
+
export async function terminateAllSessionSandboxes(opts = {}) {
|
|
606
|
+
const ids = sessionSandboxTracker.drainIds();
|
|
607
|
+
const failed = [];
|
|
608
|
+
let succeeded = 0;
|
|
609
|
+
const ctrl = new AbortController();
|
|
610
|
+
if (opts.abortSignal) {
|
|
611
|
+
if (opts.abortSignal.aborted)
|
|
612
|
+
ctrl.abort();
|
|
613
|
+
else
|
|
614
|
+
opts.abortSignal.addEventListener('abort', () => ctrl.abort(), { once: true });
|
|
615
|
+
}
|
|
616
|
+
// Sequential — terminating a few sandboxes in parallel offers no real
|
|
617
|
+
// win over serial, and serial keeps the wallet-reservation accounting
|
|
618
|
+
// simple.
|
|
619
|
+
for (const id of ids) {
|
|
620
|
+
try {
|
|
621
|
+
const res = await postWithPayment(modalEndpoint('terminate'), { sandbox_id: id }, 'Franklin Modal sandbox cleanup', ctrl.signal, 20_000);
|
|
622
|
+
if (res.ok)
|
|
623
|
+
succeeded++;
|
|
624
|
+
else
|
|
625
|
+
failed.push({ id, error: String(res.body.error ?? res.raw.slice(0, 200)) });
|
|
626
|
+
}
|
|
627
|
+
catch (err) {
|
|
628
|
+
failed.push({ id, error: err.message });
|
|
629
|
+
}
|
|
630
|
+
}
|
|
631
|
+
return { attempted: ids.length, succeeded, failed };
|
|
632
|
+
}
|
|
633
|
+
// ─── All-in-one export for index.ts registration ─────────────────────────
|
|
634
|
+
export const modalCapabilities = [
|
|
635
|
+
modalCreateCapability,
|
|
636
|
+
modalExecCapability,
|
|
637
|
+
modalStatusCapability,
|
|
638
|
+
modalTerminateCapability,
|
|
639
|
+
];
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* WalletReservation — local accounting layer for concurrent paid tool calls.
|
|
3
|
+
*
|
|
4
|
+
* Problem this solves: when N batch tools (ImageGen / VideoGen) run in
|
|
5
|
+
* parallel, each independently checks balance and dispatches its x402
|
|
6
|
+
* payment. With balance $0.20 and 6 calls × $0.04 each, all 6 see "$0.20
|
|
7
|
+
* available, $0.04 fits" and start; only 5 can actually settle on-chain,
|
|
8
|
+
* the rest fail mid-flight with insufficient-funds and the user sees
|
|
9
|
+
* partial completion with no preflight warning.
|
|
10
|
+
*
|
|
11
|
+
* The fix is *not* on-chain — x402 is fire-and-forget per-request, there's
|
|
12
|
+
* no real "hold" capability. Instead this is a per-process bookkeeping
|
|
13
|
+
* layer:
|
|
14
|
+
* 1. Tool calls hold(amount) before paying.
|
|
15
|
+
* 2. hold() refuses if (balance - sum(active reservations)) < amount.
|
|
16
|
+
* 3. After payment succeeds OR fails, tool calls release(token).
|
|
17
|
+
*
|
|
18
|
+
* Single-process JS guarantees the check-and-set is atomic (no real race),
|
|
19
|
+
* and balance is cached briefly so we don't hit the RPC for every hold.
|
|
20
|
+
*/
|
|
21
|
+
export interface ReservationToken {
|
|
22
|
+
id: string;
|
|
23
|
+
amountUsd: number;
|
|
24
|
+
}
|
|
25
|
+
declare class WalletReservationManager {
|
|
26
|
+
private reserved;
|
|
27
|
+
private cachedBalance;
|
|
28
|
+
private balanceFetchInflight;
|
|
29
|
+
private fetchBalance;
|
|
30
|
+
private totalReserved;
|
|
31
|
+
/**
|
|
32
|
+
* Try to reserve `amountUsd`. Returns a token on success, or null if
|
|
33
|
+
* insufficient (balance - already-reserved < amountUsd). Caller MUST
|
|
34
|
+
* release the token after the actual payment resolves, success or fail.
|
|
35
|
+
*/
|
|
36
|
+
hold(amountUsd: number): Promise<ReservationToken | null>;
|
|
37
|
+
/**
|
|
38
|
+
* Release a hold. Idempotent — releasing the same token twice is a no-op.
|
|
39
|
+
* Invalidate the balance cache so the next hold sees up-to-date state.
|
|
40
|
+
*/
|
|
41
|
+
release(token: ReservationToken | string | null | undefined): void;
|
|
42
|
+
/** Force the next hold() to refetch balance from chain. */
|
|
43
|
+
invalidateBalance(): void;
|
|
44
|
+
/** Snapshot of current reservation state — diagnostic / testing only. */
|
|
45
|
+
snapshot(): {
|
|
46
|
+
count: number;
|
|
47
|
+
totalUsd: number;
|
|
48
|
+
};
|
|
49
|
+
}
|
|
50
|
+
export declare const walletReservation: WalletReservationManager;
|
|
51
|
+
export {};
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* WalletReservation — local accounting layer for concurrent paid tool calls.
|
|
3
|
+
*
|
|
4
|
+
* Problem this solves: when N batch tools (ImageGen / VideoGen) run in
|
|
5
|
+
* parallel, each independently checks balance and dispatches its x402
|
|
6
|
+
* payment. With balance $0.20 and 6 calls × $0.04 each, all 6 see "$0.20
|
|
7
|
+
* available, $0.04 fits" and start; only 5 can actually settle on-chain,
|
|
8
|
+
* the rest fail mid-flight with insufficient-funds and the user sees
|
|
9
|
+
* partial completion with no preflight warning.
|
|
10
|
+
*
|
|
11
|
+
* The fix is *not* on-chain — x402 is fire-and-forget per-request, there's
|
|
12
|
+
* no real "hold" capability. Instead this is a per-process bookkeeping
|
|
13
|
+
* layer:
|
|
14
|
+
* 1. Tool calls hold(amount) before paying.
|
|
15
|
+
* 2. hold() refuses if (balance - sum(active reservations)) < amount.
|
|
16
|
+
* 3. After payment succeeds OR fails, tool calls release(token).
|
|
17
|
+
*
|
|
18
|
+
* Single-process JS guarantees the check-and-set is atomic (no real race),
|
|
19
|
+
* and balance is cached briefly so we don't hit the RPC for every hold.
|
|
20
|
+
*/
|
|
21
|
+
import { setupAgentWallet, setupAgentSolanaWallet } from '@blockrun/llm';
|
|
22
|
+
import { loadChain } from '../config.js';
|
|
23
|
+
const BALANCE_CACHE_MS = 5_000;
|
|
24
|
+
class WalletReservationManager {
|
|
25
|
+
reserved = new Map();
|
|
26
|
+
cachedBalance = null;
|
|
27
|
+
balanceFetchInflight = null;
|
|
28
|
+
async fetchBalance() {
|
|
29
|
+
if (this.cachedBalance && Date.now() - this.cachedBalance.fetchedAt < BALANCE_CACHE_MS) {
|
|
30
|
+
return this.cachedBalance.value;
|
|
31
|
+
}
|
|
32
|
+
if (this.balanceFetchInflight)
|
|
33
|
+
return this.balanceFetchInflight;
|
|
34
|
+
const chain = loadChain();
|
|
35
|
+
this.balanceFetchInflight = (async () => {
|
|
36
|
+
try {
|
|
37
|
+
if (chain === 'solana') {
|
|
38
|
+
const client = await setupAgentSolanaWallet({ silent: true });
|
|
39
|
+
return await client.getBalance();
|
|
40
|
+
}
|
|
41
|
+
const client = setupAgentWallet({ silent: true });
|
|
42
|
+
return await client.getBalance();
|
|
43
|
+
}
|
|
44
|
+
catch {
|
|
45
|
+
// If balance fetch fails, return Infinity so reservations don't
|
|
46
|
+
// block — the actual payment will surface the real error. We'd
|
|
47
|
+
// rather under-protect than block all paid tools on RPC flakiness.
|
|
48
|
+
return Number.POSITIVE_INFINITY;
|
|
49
|
+
}
|
|
50
|
+
})()
|
|
51
|
+
.then((v) => {
|
|
52
|
+
this.cachedBalance = { value: v, fetchedAt: Date.now() };
|
|
53
|
+
this.balanceFetchInflight = null;
|
|
54
|
+
return v;
|
|
55
|
+
});
|
|
56
|
+
return this.balanceFetchInflight;
|
|
57
|
+
}
|
|
58
|
+
totalReserved() {
|
|
59
|
+
let sum = 0;
|
|
60
|
+
for (const v of this.reserved.values())
|
|
61
|
+
sum += v;
|
|
62
|
+
return sum;
|
|
63
|
+
}
|
|
64
|
+
/**
|
|
65
|
+
* Try to reserve `amountUsd`. Returns a token on success, or null if
|
|
66
|
+
* insufficient (balance - already-reserved < amountUsd). Caller MUST
|
|
67
|
+
* release the token after the actual payment resolves, success or fail.
|
|
68
|
+
*/
|
|
69
|
+
async hold(amountUsd) {
|
|
70
|
+
if (amountUsd <= 0) {
|
|
71
|
+
// Free / zero-cost calls don't need accounting.
|
|
72
|
+
return { id: `free-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`, amountUsd: 0 };
|
|
73
|
+
}
|
|
74
|
+
const balance = await this.fetchBalance();
|
|
75
|
+
const available = balance - this.totalReserved();
|
|
76
|
+
if (available < amountUsd)
|
|
77
|
+
return null;
|
|
78
|
+
const id = `res-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
|
|
79
|
+
this.reserved.set(id, amountUsd);
|
|
80
|
+
return { id, amountUsd };
|
|
81
|
+
}
|
|
82
|
+
/**
|
|
83
|
+
* Release a hold. Idempotent — releasing the same token twice is a no-op.
|
|
84
|
+
* Invalidate the balance cache so the next hold sees up-to-date state.
|
|
85
|
+
*/
|
|
86
|
+
release(token) {
|
|
87
|
+
if (!token)
|
|
88
|
+
return;
|
|
89
|
+
const id = typeof token === 'string' ? token : token.id;
|
|
90
|
+
if (this.reserved.delete(id)) {
|
|
91
|
+
// A real payment may have just settled on-chain; force re-fetch
|
|
92
|
+
// next time so subsequent holds see the post-payment balance.
|
|
93
|
+
this.cachedBalance = null;
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
/** Force the next hold() to refetch balance from chain. */
|
|
97
|
+
invalidateBalance() {
|
|
98
|
+
this.cachedBalance = null;
|
|
99
|
+
}
|
|
100
|
+
/** Snapshot of current reservation state — diagnostic / testing only. */
|
|
101
|
+
snapshot() {
|
|
102
|
+
return { count: this.reserved.size, totalUsd: this.totalReserved() };
|
|
103
|
+
}
|
|
104
|
+
}
|
|
105
|
+
export const walletReservation = new WalletReservationManager();
|
package/package.json
CHANGED