@blockrun/franklin 3.15.9 → 3.15.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -13,6 +13,18 @@ import path from 'node:path';
13
13
  import { BLOCKRUN_DIR } from '../config.js';
14
14
  const AUDIT_FILE = path.join(BLOCKRUN_DIR, 'franklin-audit.jsonl');
15
15
  const PROMPT_PREVIEW_CHARS = 240;
16
+ // Cap the audit log at the most recent N entries. Without this the file
17
+ // grew unbounded — verified ~3.6k lines on a single dev machine after a
18
+ // few weeks of light use, so a months-old install would be in the GB
19
+ // range and slow `franklin insights` to a crawl.
20
+ const MAX_AUDIT_ENTRIES = 10_000;
21
+ // Each entry is roughly 300–800 bytes. We only re-read the file when it
22
+ // looks plausibly over the cap, so we don't pay an O(n) scan on every
23
+ // append. 200 bytes/entry is a conservative lower bound.
24
+ const TRIM_PROBE_BYTES = MAX_AUDIT_ENTRIES * 200;
25
+ // Probe size every N appends — amortizes the stat() call.
26
+ const TRIM_CHECK_INTERVAL = 200;
27
+ let appendsSinceCheck = 0;
16
28
  export function appendAudit(entry) {
17
29
  try {
18
30
  fs.mkdirSync(BLOCKRUN_DIR, { recursive: true });
@@ -21,11 +33,39 @@ export function appendAudit(entry) {
21
33
  prompt: entry.prompt ? truncate(entry.prompt, PROMPT_PREVIEW_CHARS) : undefined,
22
34
  };
23
35
  fs.appendFileSync(AUDIT_FILE, JSON.stringify(safe) + '\n');
36
+ appendsSinceCheck++;
37
+ if (appendsSinceCheck >= TRIM_CHECK_INTERVAL) {
38
+ appendsSinceCheck = 0;
39
+ enforceRetention();
40
+ }
24
41
  }
25
42
  catch {
26
43
  /* best-effort — never break the agent loop on audit-write failure */
27
44
  }
28
45
  }
46
+ /**
47
+ * Trim the audit log to the last MAX_AUDIT_ENTRIES lines if it has grown
48
+ * past the cap. Exported so admin/debug tooling (and tests) can force a
49
+ * compaction without waiting for the next interval probe.
50
+ */
51
+ export function enforceRetention() {
52
+ try {
53
+ if (!fs.existsSync(AUDIT_FILE))
54
+ return;
55
+ const stat = fs.statSync(AUDIT_FILE);
56
+ if (stat.size < TRIM_PROBE_BYTES)
57
+ return;
58
+ const content = fs.readFileSync(AUDIT_FILE, 'utf-8');
59
+ const lines = content.split('\n').filter(Boolean);
60
+ if (lines.length <= MAX_AUDIT_ENTRIES)
61
+ return;
62
+ const kept = lines.slice(lines.length - MAX_AUDIT_ENTRIES);
63
+ fs.writeFileSync(AUDIT_FILE, kept.join('\n') + '\n');
64
+ }
65
+ catch {
66
+ /* best-effort */
67
+ }
68
+ }
29
69
  export function getAuditFilePath() {
30
70
  return AUDIT_FILE;
31
71
  }
@@ -49,6 +49,25 @@ export interface InsightsReport {
49
49
  avgRequestCostUsd: number;
50
50
  /** Efficiency: cost per 1K tokens */
51
51
  costPer1KTokens: number;
52
+ /**
53
+ * Cost breakdown by capability category. Lets the UI show a clean
54
+ * "where did your USDC go" split alongside the per-model bar list.
55
+ * - chat: LLM token-billed calls (anything with non-zero tokens)
56
+ * - media: ImageGen / VideoGen / MusicGen (per_image / per_second / per_track)
57
+ * - sandbox: Modal GPU sandbox lifecycle (create / exec / status / terminate)
58
+ *
59
+ * Categorization is by `model` name prefix:
60
+ * - `modal/*` → sandbox
61
+ * - rows with 0 input + 0 output tokens → media (image/video/music are
62
+ * stored with 0 tokens by recordUsage; modal/* matches first)
63
+ * - everything else → chat
64
+ */
65
+ byCategory: {
66
+ chatCostUsd: number;
67
+ mediaCostUsd: number;
68
+ sandboxCostUsd: number;
69
+ sandboxRequests: number;
70
+ };
52
71
  }
53
72
  export declare function generateInsights(days?: number): InsightsReport;
54
73
  export declare function formatInsights(report: InsightsReport, days: number): string;
@@ -23,11 +23,28 @@ export function generateInsights(days = 30) {
23
23
  let totalCost = 0;
24
24
  let totalInput = 0;
25
25
  let totalOutput = 0;
26
+ // Category totals — see InsightsReport.byCategory doc.
27
+ let chatCost = 0;
28
+ let mediaCost = 0;
29
+ let sandboxCost = 0;
30
+ let sandboxRequests = 0;
26
31
  const modelAgg = new Map();
27
32
  for (const r of windowHistory) {
28
33
  totalCost += r.costUsd;
29
34
  totalInput += r.inputTokens;
30
35
  totalOutput += r.outputTokens;
36
+ // Categorize: modal/* always goes to sandbox; zero-token entries are
37
+ // media (image/video/music recordUsage stores 0/0 tokens); rest = chat.
38
+ if (r.model.startsWith('modal/')) {
39
+ sandboxCost += r.costUsd;
40
+ sandboxRequests++;
41
+ }
42
+ else if ((r.inputTokens + r.outputTokens) === 0) {
43
+ mediaCost += r.costUsd;
44
+ }
45
+ else {
46
+ chatCost += r.costUsd;
47
+ }
31
48
  const existing = modelAgg.get(r.model) ?? {
32
49
  requests: 0,
33
50
  costUsd: 0,
@@ -101,6 +118,12 @@ export function generateInsights(days = 30) {
101
118
  projections,
102
119
  avgRequestCostUsd,
103
120
  costPer1KTokens,
121
+ byCategory: {
122
+ chatCostUsd: chatCost,
123
+ mediaCostUsd: mediaCost,
124
+ sandboxCostUsd: sandboxCost,
125
+ sandboxRequests,
126
+ },
104
127
  };
105
128
  }
106
129
  // ─── Format for Display ───────────────────────────────────────────────────
@@ -29,6 +29,7 @@ import { jupiterQuoteCapability, jupiterSwapCapability } from './jupiter.js';
29
29
  import { base0xQuoteCapability, base0xSwapCapability } from './zerox-base.js';
30
30
  import { base0xGaslessSwapCapability } from './zerox-gasless.js';
31
31
  import { defiLlamaProtocolsCapability, defiLlamaProtocolCapability, defiLlamaChainsCapability, defiLlamaYieldsCapability, defiLlamaPriceCapability, } from './defillama.js';
32
+ import { modalCapabilities } from './modal.js';
32
33
  import { createTradingCapabilities } from './trading-execute.js';
33
34
  import { Portfolio } from '../trading/portfolio.js';
34
35
  import { RiskEngine } from '../trading/risk.js';
@@ -158,6 +159,11 @@ export const allCapabilities = [
158
159
  defiLlamaChainsCapability,
159
160
  defiLlamaYieldsCapability,
160
161
  defiLlamaPriceCapability,
162
+ // Modal GPU sandbox tools — registered but hidden by default (not in
163
+ // CORE_TOOL_NAMES). Agent must `ActivateTool({names:["ModalCreate",...]})`
164
+ // before they appear in its tool inventory. High-cost ($0.40/H100 create)
165
+ // operations should not be in the default surface.
166
+ ...modalCapabilities, // ModalCreate, ModalExec, ModalStatus, ModalTerminate
161
167
  ];
162
168
  export { readCapability, writeCapability, editCapability, bashCapability, globCapability, grepCapability, webFetchCapability, webSearchCapability, taskCapability, detachCapability, };
163
169
  export { createSubAgentCapability } from './subagent.js';
@@ -0,0 +1,66 @@
1
+ /**
2
+ * Modal Sandbox capabilities — spin up GPU/CPU compute on Modal Labs via the
3
+ * BlockRun gateway's x402-paid passthrough at /v1/modal/sandbox/{create, exec,
4
+ * status, terminate}. See https://modal.com/docs/guide/sandboxes for the
5
+ * underlying primitives.
6
+ *
7
+ * Pricing (per-call, USDC):
8
+ * create: $0.01 (CPU) / $0.05 (T4) / $0.08 (L4) / $0.10 (A10G) / $0.20 (A100) / $0.40 (H100)
9
+ * exec: $0.001
10
+ * status: $0.001
11
+ * terminate: $0.001
12
+ *
13
+ * Gateway constraints (probed 2026-05-02):
14
+ * - image is fixed at python:3.11 — no custom containers yet.
15
+ * - command is execve-style (string[]), not a shell string. We accept a
16
+ * plain string from the LLM and auto-wrap to ["sh","-c", string].
17
+ * - No stdin / env / workdir / streaming on exec — keep commands self-
18
+ * contained and idempotent.
19
+ * - No upload/download endpoints — files in/out via exec heredoc / curl.
20
+ *
21
+ * Lifecycle:
22
+ * ModalCreate → returns sandbox_id, charged at GPU tier
23
+ * ModalExec → sync, returns { stdout, stderr, exit_code }
24
+ * ModalStatus → check running/terminated
25
+ * ModalTerminate → release; called automatically at session end via
26
+ * the SessionSandboxTracker registry.
27
+ */
28
+ import type { CapabilityHandler } from '../agent/types.js';
29
+ export interface SandboxRecord {
30
+ id: string;
31
+ gpu: string;
32
+ createdAt: number;
33
+ timeoutSeconds?: number;
34
+ }
35
+ declare class SessionSandboxTracker {
36
+ private sandboxes;
37
+ add(rec: SandboxRecord): void;
38
+ remove(id: string): void;
39
+ list(): SandboxRecord[];
40
+ /** Snapshot then clear — used by the session cleanup hook. */
41
+ drainIds(): string[];
42
+ }
43
+ export declare const sessionSandboxTracker: SessionSandboxTracker;
44
+ export declare const modalCreateCapability: CapabilityHandler;
45
+ export declare const modalExecCapability: CapabilityHandler;
46
+ export declare const modalStatusCapability: CapabilityHandler;
47
+ export declare const modalTerminateCapability: CapabilityHandler;
48
+ /**
49
+ * Terminate every sandbox the current session has created. Called from
50
+ * vscode-session.ts at session end (and the SessionToolGuard cleanup path)
51
+ * so a missed agent ModalTerminate doesn't leave Modal billing the user
52
+ * up to the per-sandbox timeout. Best-effort: failures are logged but
53
+ * don't block session shutdown.
54
+ */
55
+ export declare function terminateAllSessionSandboxes(opts?: {
56
+ abortSignal?: AbortSignal;
57
+ }): Promise<{
58
+ attempted: number;
59
+ succeeded: number;
60
+ failed: Array<{
61
+ id: string;
62
+ error: string;
63
+ }>;
64
+ }>;
65
+ export declare const modalCapabilities: CapabilityHandler[];
66
+ export {};