@blockrun/franklin 3.15.87 → 3.15.89

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,6 +5,7 @@
5
5
  import fs from 'node:fs';
6
6
  import path from 'node:path';
7
7
  import { execSync } from 'node:child_process';
8
+ import { BLOCKRUN_DIR } from '../config.js';
8
9
  import { getWalletAddress as getBaseWalletAddress } from '@blockrun/llm';
9
10
  import { Keypair } from '@solana/web3.js';
10
11
  import bs58 from 'bs58';
@@ -18,7 +19,7 @@ You are an interactive agent — not a chatbot. Use the tools available to you t
18
19
 
19
20
  # Franklin has hands
20
21
  You run with live tools by default:
21
- - **Wallet** — read your own chain, address, and USDC balance. Use this for any "what's my balance / how much money / 钱包余额 / wallet status" question instead of running \`franklin balance\` via Bash. Free, one call, never costs USDC.
22
+ - **Wallet** — read your own chain, address, and USDC balance. Use this for any "what's my balance / how much money / wallet status" question instead of running \`franklin balance\` via Bash. Free, one call, never costs USDC.
22
23
  - **TradingMarket** — current stock / FX / crypto / commodity prices (BlockRun Gateway / Pyth; wallet pays automatically, $0.001/stock call, free for everything else).
23
24
  - **ExaAnswer / ExaSearch / ExaReadUrls** — cited current-events answers, semantic web search, clean URL content.
24
25
  - **WebSearch / WebFetch** — live web.
@@ -88,11 +89,11 @@ function getOutputEfficiencySection() {
88
89
  return `# Output Efficiency
89
90
  Go straight to the point. Lead with the action, not the reasoning. Do not restate what the user said.
90
91
 
91
- **No pre-tool narration.** Do NOT write things like "让我先 X...", "Let me read the file...", "I'll now search for...", "好的,让我研究一下...", "现在我来 X", "OK now I have everything I need", "完美!", "好,现在我完全明白了". These phrases are internal monologue — the user can see your tool calls directly and does not need step-by-step play-by-play. Just call the tool.
92
+ **No pre-tool narration.** Do NOT write things like "Let me read the file...", "I'll now search for...", "Let me investigate...", "Now I'm going to X", "OK now I have everything I need", "Perfect!", "Got it, now I fully understand". These phrases are internal monologue — the user can see your tool calls directly and does not need step-by-step play-by-play. Just call the tool. The same rule applies in any language — no equivalent narration in non-English replies either.
92
93
 
93
94
  The exception: a single short sentence between tool calls is fine when it tells the user something they would otherwise miss — a finding ("Build passes — moving on to tests."), a course correction ("That approach won't work — switching to X."), or a one-line status before a long-running operation. One sentence per update is enough.
94
95
 
95
- **No internal-language leakage.** Always write your visible response in the same language the user is using. If your private reasoning happens in a different language (English while the user writes Chinese, Korean while the user writes Chinese, etc.), do NOT let phrases from that language appear in the user-facing text. The user should never see a stray "좋아", "OK now", or "Alright" in the middle of a Chinese reply.
96
+ **No internal-language leakage.** Always write your visible response in the same language the user is using. If your private reasoning happens in a different language than the user's message, do NOT let phrases from that language appear in the user-facing text. The user should never see a stray "d'accord", "OK now", or "Alright" in the middle of a reply written in another language.
96
97
 
97
98
  Focus text output on:
98
99
  - Decisions that need the user's input
@@ -159,6 +160,19 @@ After delivering results, if a better data source exists, add one line at the en
159
160
  Do NOT check access before acting. Do NOT explain what you tried. Just deliver, then tip.`;
160
161
  }
161
162
  function getWalletKnowledgeSection() {
163
+ // Read the panel URL persisted by startPanelBackground (start.ts) so we
164
+ // surface the actual bound port — the panel auto-increments past 3100
165
+ // when the default is taken (e.g. a second franklin running). Falls back
166
+ // to the canonical default when the file is missing (panel disabled or
167
+ // never started this session).
168
+ let panelUrl = 'http://localhost:3100';
169
+ try {
170
+ const persisted = fs.readFileSync(path.join(BLOCKRUN_DIR, 'panel-url'), 'utf8').trim();
171
+ if (persisted.startsWith('http://') || persisted.startsWith('https://')) {
172
+ panelUrl = persisted;
173
+ }
174
+ }
175
+ catch { /* fall through to default */ }
162
176
  return `# Wallet Storage (answer "where is my wallet" directly — no searching)
163
177
  Franklin stores wallet keys in ~/.blockrun/. When the user asks about wallet location, answer from this map — do not grep or scan.
164
178
 
@@ -178,7 +192,32 @@ Franklin stores wallet keys in ~/.blockrun/. When the user asks about wallet loc
178
192
  - Use \`franklin stats\` / \`franklin content list\` instead of parsing files when the user asks "how much did I spend".
179
193
  - Programmatic access: import { getWalletAddress, getOrCreateWallet, getOrCreateSolanaWallet } from '@blockrun/llm'
180
194
 
181
- When the user asks about "my wallet" without qualifier, default to Base (it's the primary chain shown at launch). Only mention Solana if the chain file says solana or the user explicitly asks.`;
195
+ When the user asks about "my wallet" without qualifier, default to Base (it's the primary chain shown at launch). Only mention Solana if the chain file says solana or the user explicitly asks.
196
+
197
+ ## Funding the wallet ("how do I deposit / recharge / fund / top up", in any language)
198
+
199
+ When the user asks about depositing or funding USDC — in any language — do not describe the steps in chat. **Open the panel wallet page directly in their browser** using Bash, then confirm in chat what you opened and which chain is active.
200
+
201
+ The exact wallet URL for this session:
202
+
203
+ ${panelUrl}/#wallet
204
+
205
+ Bash command to open it (macOS \`open\`, Linux \`xdg-open\`, Windows \`start\`):
206
+
207
+ open ${panelUrl}/#wallet
208
+
209
+ That page is where the deposit address, QR code, live balance, chain switcher, and back-up controls all live. The user lands on it instead of you reciting steps.
210
+
211
+ After running \`open\`:
212
+ - Tell the user one line: "Opened the wallet page — \`${panelUrl}/#wallet\`. Active chain: <base|solana>."
213
+ - Read the active chain from ~/.blockrun/payment-chain so they know which network to send USDC on.
214
+ - Mention USDC is the only accepted token; ETH/SOL on their own won't settle x402 calls.
215
+
216
+ Hard rules:
217
+ - Do NOT print the private key in chat. The panel reveals it behind a click.
218
+ - Do NOT invent a \`franklin deposit\` CLI flow — there isn't one; the panel IS the funding surface.
219
+ - Do NOT hand-craft a different localhost port; the URL above tracks the actual bound port (3100 might have been taken; the panel could be on 3101+).
220
+ - If \`open\` fails (e.g. no GUI on a remote box), fall back to giving them the URL as plain text and tell them to paste it into a browser.`;
182
221
  }
183
222
  function getBlockRunApiSection() {
184
223
  return `# BlockRun Gateway API (the network you live on)
@@ -342,7 +381,7 @@ If you find yourself about to emit one of these, stop and call the tool instead.
342
381
  - "what are the odds on Polymarket / Kalshi specifically" → \`searchPolymarket\` (\$0.001) and \`searchKalshi\` (\$0.001) **in parallel**; comparing implied probability across the two venues is the high-value answer.
343
382
  - "where do Polymarket and Kalshi disagree / arbitrage" → \`crossPlatform\` (\$0.005) returns pre-matched pairs.
344
383
  - "who's profitable / top traders / who should I follow on Polymarket" → \`leaderboard\` (\$0.001) — global top wallets by P&L.
345
- - "analyze this wallet / can I copy this trader / 复制交易 / show me their P&L AND positions" → run \`walletProfile\` + \`walletPnl\` + \`walletPositions\` IN PARALLEL with the same address. Three \$0.005 calls = full picture for \$0.015. Do NOT \`Bash\`-curl \`data-api.polymarket.com\` directly — those are paid Predexon endpoints and going around them defeats the wallet-attached architecture. If just the profile is needed: \`walletProfile\` alone (single address → /wallet/{addr}, comma-list → batch).
384
+ - "analyze this wallet / can I copy this trader / show me their P&L AND positions" → run \`walletProfile\` + \`walletPnl\` + \`walletPositions\` IN PARALLEL with the same address. Three \$0.005 calls = full picture for \$0.015. Do NOT \`Bash\`-curl \`data-api.polymarket.com\` directly — those are paid Predexon endpoints and going around them defeats the wallet-attached architecture. If just the profile is needed: \`walletProfile\` alone (single address → /wallet/{addr}, comma-list → batch).
346
385
  - "what are smart traders betting on right now / smart money flow across markets" → \`smartActivity\` (\$0.005) — markets where high-P&L wallets are positioning.
347
386
  - "show smart money on this specific Polymarket market / this condition_id" → \`smartMoney\` (\$0.005) with \`conditionId="<condition_id>"\`.
348
387
 
@@ -352,7 +391,7 @@ NEVER answer "what are the odds of X" from training-data memory — these are li
352
391
  - Run **TradingSignal** with default lookback (90d). Lower values leave MACD undefined.
353
392
  - The tool returns a **Verdict** section with \`Direction\`, \`Bull signals\`, \`Bear signals\`. Echo it directly. Do not soften "bullish" to "leaning slightly positive" — say what the data says.
354
393
  - If \`Data Notes\` lists an indicator as "insufficient data", state that explicitly to the user and suggest re-running with more days. Do NOT pretend that indicator is "neutral".
355
- - **Forbidden default**: "持有观望", "wait and see", "hold for clearer signals" — these are bugs when ≥2 indicators voted in a clear direction. Bail out to those phrases ONLY when (a) the Verdict says \`neutral\` AND (b) the bull/bear signal lists are both genuinely empty or one of each. Otherwise commit to a direction with the reasoning the tool already gave you.
394
+ - **Forbidden default**: "wait and see" / "hold for clearer signals" / equivalent hedging in any language — these are bugs when ≥2 indicators voted in a clear direction. Bail out to that posture ONLY when (a) the Verdict says \`neutral\` AND (b) the bull/bear signal lists are both genuinely empty or one of each. Otherwise commit to a direction with the reasoning the tool already gave you.
356
395
 
357
396
  **Media generation (ImageGen / VideoGen).** Pass just the user's descriptive prompt and the output path — do NOT pass \`model\`. The harness picks the right model for the requested style + budget, refines loose prompts using a 5-slot template (scene / subject / details / use case / constraints), and surfaces both the refinement and a cost proposal through AskUser before spending. If the user wants their prompt left exactly as written, prefix it with \`///\` to skip refinement. Only pass \`model\` explicitly if the user named one specifically.`;
358
397
  }
@@ -48,7 +48,7 @@ Flag as ungrounded:
48
48
  - Invented specifics — names, numbers, dates the model produced without a tool call supporting them
49
49
 
50
50
  ### B. Tool-use refusal (NEW)
51
- If the user clearly asked for live-world data — a current price, today's news, the latest state of X — and the assistant's answer contains a refusal or deflection (e.g. "I can't provide real-time prices", "我无法提供实时数据", "check Yahoo Finance yourself", "as an AI I don't have access to live data"), that is also UNGROUNDED. Franklin HAS tools for this (TradingMarket for prices, ExaAnswer for current events, WebSearch for general web, etc.). Refusing to reach for them is the failure this check was built for.
51
+ If the user clearly asked for live-world data — a current price, today's news, the latest state of X — and the assistant's answer contains a refusal or deflection (e.g. "I can't provide real-time prices", "I don't have access to live data", "check Yahoo Finance yourself", "as an AI I cannot fetch this"), that is also UNGROUNDED. The same rule applies in any language. Franklin HAS tools for this (TradingMarket for prices, ExaAnswer for current events, WebSearch for general web, etc.). Refusing to reach for them is the failure this check was built for.
52
52
 
53
53
  Flag as tool-use refusal:
54
54
  - "I can't check real-time prices"
@@ -116,6 +116,15 @@ export declare class ModelClient {
116
116
  private cachedBaseWallet;
117
117
  private cachedSolanaWallet;
118
118
  private walletCacheTime;
119
+ /**
120
+ * USDC actually charged on the most recent x402 settlement, parsed
121
+ * from `details.amount` (micro-USDC → USD). Reset to 0 at the start
122
+ * of every `streamCompletion`, written by `signBasePayment` /
123
+ * `signSolanaPayment`. Callers read it via `getLastPaidUsd()` after
124
+ * the stream completes so franklin-stats.json records the real wallet
125
+ * charge instead of a token-catalog estimate.
126
+ */
127
+ private lastPaidUsd;
119
128
  private static WALLET_CACHE_TTL;
120
129
  constructor(opts: LLMClientOptions);
121
130
  /**
@@ -132,6 +141,13 @@ export declare class ModelClient {
132
141
  * default model.
133
142
  */
134
143
  private resolveVirtualModel;
144
+ /**
145
+ * USDC actually charged for the most recent stream. 0 if no payment
146
+ * was made (free model / cached / pre-stream error). Callers should
147
+ * read this after the stream finishes — before that it carries the
148
+ * value from a previous call.
149
+ */
150
+ getLastPaidUsd(): number;
135
151
  streamCompletion(request: ModelRequest, signal?: AbortSignal): AsyncGenerator<StreamChunk>;
136
152
  private parseNonStreamingMessage;
137
153
  /**
package/dist/agent/llm.js CHANGED
@@ -5,6 +5,7 @@
5
5
  */
6
6
  import { getOrCreateWallet, getOrCreateSolanaWallet, createPaymentPayload, createSolanaPaymentPayload, parsePaymentRequired, extractPaymentDetails, solanaKeyToBytes, SOLANA_NETWORK, } from '@blockrun/llm';
7
7
  import { USER_AGENT } from '../config.js';
8
+ import { appendSettlementRow } from '../stats/cost-log.js';
8
9
  import { routeRequest, parseRoutingProfile } from '../router/index.js';
9
10
  import { ThinkTagStripper } from './think-tag-stripper.js';
10
11
  import { isNemotronProseModel, stripNemotronProse } from './nemotron-prose-stripper.js';
@@ -28,6 +29,19 @@ function parseTimeoutEnv(name) {
28
29
  const parsed = raw ? Number.parseInt(raw, 10) : NaN;
29
30
  return Number.isFinite(parsed) && parsed >= 0 ? parsed : null;
30
31
  }
32
+ /**
33
+ * Convert an x402 `details.amount` field (USDC in micro-units, 6 decimals)
34
+ * to a USD float. Mirrors the SDK's `appendCostLog` math so the agent
35
+ * loop, the proxy, and `cost_log.jsonl` all agree to the cent.
36
+ */
37
+ function paymentAmountToUsd(amount) {
38
+ if (amount === undefined || amount === null)
39
+ return 0;
40
+ const n = typeof amount === 'string' ? parseFloat(amount) : amount;
41
+ if (!Number.isFinite(n))
42
+ return 0;
43
+ return n / 1e6;
44
+ }
31
45
  /**
32
46
  * Replace Unicode box-drawing characters with their ASCII equivalents.
33
47
  *
@@ -284,6 +298,15 @@ export class ModelClient {
284
298
  cachedBaseWallet = null;
285
299
  cachedSolanaWallet = null;
286
300
  walletCacheTime = 0;
301
+ /**
302
+ * USDC actually charged on the most recent x402 settlement, parsed
303
+ * from `details.amount` (micro-USDC → USD). Reset to 0 at the start
304
+ * of every `streamCompletion`, written by `signBasePayment` /
305
+ * `signSolanaPayment`. Callers read it via `getLastPaidUsd()` after
306
+ * the stream completes so franklin-stats.json records the real wallet
307
+ * charge instead of a token-catalog estimate.
308
+ */
309
+ lastPaidUsd = 0;
287
310
  static WALLET_CACHE_TTL = 30 * 60 * 1000; // 30 min TTL
288
311
  constructor(opts) {
289
312
  this.apiUrl = opts.apiUrl;
@@ -329,7 +352,19 @@ export class ModelClient {
329
352
  };
330
353
  return FALLBACKS[model] || 'nvidia/qwen3-coder-480b';
331
354
  }
355
+ /**
356
+ * USDC actually charged for the most recent stream. 0 if no payment
357
+ * was made (free model / cached / pre-stream error). Callers should
358
+ * read this after the stream finishes — before that it carries the
359
+ * value from a previous call.
360
+ */
361
+ getLastPaidUsd() {
362
+ return this.lastPaidUsd;
363
+ }
332
364
  async *streamCompletion(request, signal) {
365
+ // Reset the per-call charge tracker. signBasePayment / signSolanaPayment
366
+ // will set it when the gateway demands a 402 settlement.
367
+ this.lastPaidUsd = 0;
333
368
  // Resolve virtual models before any API call
334
369
  const resolvedModel = this.resolveVirtualModel(request.model);
335
370
  if (resolvedModel !== request.model) {
@@ -463,7 +498,7 @@ export class ModelClient {
463
498
  if (response.status === 402) {
464
499
  if (this.debug)
465
500
  console.error('[franklin] Payment required — signing...');
466
- const paymentHeader = await this.signPayment(response);
501
+ const paymentHeader = await this.signPayment(response, request.model);
467
502
  if (!paymentHeader) {
468
503
  yield { kind: 'error', payload: { message: 'Payment signing failed' } };
469
504
  return;
@@ -525,7 +560,7 @@ export class ModelClient {
525
560
  signal: requestController.signal,
526
561
  }), requestController, createModelTimeoutError('request', request.model, requestTimeoutMs), requestTimeoutMs);
527
562
  if (response.status === 402) {
528
- const paymentHeader = await this.signPayment(response);
563
+ const paymentHeader = await this.signPayment(response, request.model);
529
564
  if (!paymentHeader) {
530
565
  yield { kind: 'error', payload: { message: 'Payment signing failed' } };
531
566
  return;
@@ -918,17 +953,17 @@ export class ModelClient {
918
953
  return { content: collected, usage, stopReason };
919
954
  }
920
955
  // ─── Payment ───────────────────────────────────────────────────────────
921
- async signPayment(response) {
956
+ async signPayment(response, model) {
922
957
  try {
923
958
  if (this.chain === 'solana') {
924
- return await this.signSolanaPayment(response);
959
+ return await this.signSolanaPayment(response, model);
925
960
  }
926
- return await this.signBasePayment(response);
961
+ return await this.signBasePayment(response, model);
927
962
  }
928
963
  catch (err) {
929
964
  const msg = err.message || '';
930
965
  if (msg.includes('insufficient') || msg.includes('balance')) {
931
- console.error(`[franklin] Insufficient USDC balance. Run 'franklin balance' to check.`);
966
+ console.error(`[franklin] Insufficient USDC balance. Open http://localhost:3100/#wallet to deposit (or run 'franklin balance').`);
932
967
  }
933
968
  else if (this.debug) {
934
969
  console.error('[franklin] Payment error:', msg);
@@ -939,7 +974,7 @@ export class ModelClient {
939
974
  return null;
940
975
  }
941
976
  }
942
- async signBasePayment(response) {
977
+ async signBasePayment(response, model) {
943
978
  // Refresh wallet cache after TTL to pick up balance/key changes
944
979
  if (!this.cachedBaseWallet || (Date.now() - this.walletCacheTime > ModelClient.WALLET_CACHE_TTL)) {
945
980
  const w = getOrCreateWallet();
@@ -954,6 +989,18 @@ export class ModelClient {
954
989
  throw new Error('No payment requirements in 402 response');
955
990
  const paymentRequired = parsePaymentRequired(paymentHeader);
956
991
  const details = extractPaymentDetails(paymentRequired);
992
+ this.lastPaidUsd = paymentAmountToUsd(details.amount);
993
+ // Mirror the SDK's appendCostLog write so cost_log.jsonl becomes a
994
+ // true wallet-truth ledger covering both SDK helper traffic AND the
995
+ // agent's main LLM stream (which uses this signer, not the SDK).
996
+ // Match SDK schema (model/wallet/network/client_kind) so every row
997
+ // is independently queryable.
998
+ appendSettlementRow('/v1/messages', this.lastPaidUsd, {
999
+ model,
1000
+ wallet: wallet.address,
1001
+ network: details.network || 'base-mainnet',
1002
+ client_kind: 'AgentClient',
1003
+ });
957
1004
  const payload = await createPaymentPayload(wallet.privateKey, wallet.address, details.recipient, details.amount, details.network || 'eip155:8453', {
958
1005
  resourceUrl: details.resource?.url || this.apiUrl,
959
1006
  resourceDescription: details.resource?.description || 'BlockRun AI API call',
@@ -962,7 +1009,7 @@ export class ModelClient {
962
1009
  });
963
1010
  return { 'PAYMENT-SIGNATURE': payload };
964
1011
  }
965
- async signSolanaPayment(response) {
1012
+ async signSolanaPayment(response, model) {
966
1013
  if (!this.cachedSolanaWallet || (Date.now() - this.walletCacheTime > ModelClient.WALLET_CACHE_TTL)) {
967
1014
  const w = await getOrCreateSolanaWallet();
968
1015
  this.walletCacheTime = Date.now();
@@ -975,6 +1022,13 @@ export class ModelClient {
975
1022
  throw new Error('No payment requirements in 402 response');
976
1023
  const paymentRequired = parsePaymentRequired(paymentHeader);
977
1024
  const details = extractPaymentDetails(paymentRequired, SOLANA_NETWORK);
1025
+ this.lastPaidUsd = paymentAmountToUsd(details.amount);
1026
+ appendSettlementRow('/v1/messages', this.lastPaidUsd, {
1027
+ model,
1028
+ wallet: wallet.address,
1029
+ network: details.network || 'solana-mainnet',
1030
+ client_kind: 'AgentClient',
1031
+ });
978
1032
  const secretBytes = await solanaKeyToBytes(wallet.privateKey);
979
1033
  const feePayer = details.extra?.feePayer || details.recipient;
980
1034
  const payload = await createSolanaPaymentPayload(secretBytes, wallet.address, details.recipient, details.amount, feePayer, {
@@ -291,10 +291,10 @@ export function looksLikeGatewayErrorAsText(parts) {
291
291
  * pinned by tool_choice when the user prompt actually references that
292
292
  * tool's domain — otherwise we let the smart generator pick from any tool.
293
293
  *
294
- * The motivating bug: a real-estate question ("可以还价 20% ") had its
295
- * answer flagged as ungrounded for citing $/sqft figures. The cheap
296
- * evaluator model picked TradingMarket as the missing tool because it
297
- * was the first example in the evaluator prompt. Forcing TradingMarket
294
+ * The motivating bug: a real-estate question ("can I negotiate 20% off")
295
+ * had its answer flagged as ungrounded for citing $/sqft figures. The
296
+ * cheap evaluator model picked TradingMarket as the missing tool because
297
+ * it was the first example in the evaluator prompt. Forcing TradingMarket
298
298
  * (a crypto-only tool) on a housing question made the retry useless.
299
299
  *
300
300
  * This function returns false for specialized tools when the prompt has
@@ -304,16 +304,18 @@ export function looksLikeGatewayErrorAsText(parts) {
304
304
  */
305
305
  function isToolRelevantToPrompt(toolName, promptLower) {
306
306
  // Crypto trading tools — need a ticker, "crypto", "coin", "swap", etc.
307
+ // English-only fast path; the LLM-level classifier handles other languages
308
+ // before this domain-relevance check runs.
307
309
  if (/^(Trading|DefiLlama|Jupiter|Base0x|Base0xGasless)/i.test(toolName)) {
308
- return /\b(btc|eth|sol|xrp|doge|usdc|usdt|crypto|coin|token|defi|tvl|yield|swap|jupiter|uniswap|pump\.fun|solana|base chain|polygon|ethereum|币|代币|链上|做空|做多)\b/i.test(promptLower);
310
+ return /\b(btc|eth|sol|xrp|doge|usdc|usdt|crypto|coin|token|defi|tvl|yield|swap|jupiter|uniswap|pump\.fun|solana|base chain|polygon|ethereum)\b/i.test(promptLower);
309
311
  }
310
312
  // X.com search — need an @handle, "twitter", "tweet", "X.com"
311
313
  if (/^SearchX$/i.test(toolName) || /^PostToX$/i.test(toolName)) {
312
- return /(@\w+|twitter|x\.com|tweet|推特)/i.test(promptLower);
314
+ return /(@\w+|twitter|x\.com|tweet)/i.test(promptLower);
313
315
  }
314
316
  // Image / video / music gen — need a creative-content request
315
317
  if (/^(ImageGen|VideoGen|MusicGen)$/i.test(toolName)) {
316
- return /\b(image|picture|photo|video|clip|music|song|generate|create|render|draw|画|图|视频|音乐|歌)\b/i.test(promptLower);
318
+ return /\b(image|picture|photo|video|clip|music|song|generate|create|render|draw)\b/i.test(promptLower);
317
319
  }
318
320
  // General-purpose / file / shell tools — always relevant.
319
321
  return true;
@@ -860,7 +862,8 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
860
862
  try {
861
863
  // Anchor 1: the user's current message (already in lastUserInput).
862
864
  // Anchor 2: first chunk of the previous assistant reply — gives the
863
- // analyzer enough context to resolve deictic follow-ups like "那 AAPL 呢".
865
+ // analyzer enough context to resolve deictic follow-ups like
866
+ // "and that one?" / "what about AAPL".
864
867
  const lastAssistantText = (() => {
865
868
  const prior = [...history.slice(0, -1)].reverse()
866
869
  .find((m) => m.role === 'assistant');
@@ -1540,16 +1543,25 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
1540
1543
  contextPct: Math.round(contextUsagePct),
1541
1544
  });
1542
1545
  // Record usage for stats tracking (franklin stats command).
1546
+ // Prefer the real x402 charge from the gateway over a token-catalog
1547
+ // estimate. The estimate is wrong any time the gateway applies
1548
+ // promo pricing, prompt-cache discounts, or per-call flat fees
1549
+ // (verified 2026-05-09 against cost_log.jsonl: token-based
1550
+ // estimate said $34.79 across the same calls the wallet only
1551
+ // paid $2.24 for — a 15× drift). estimateCost only fills in
1552
+ // when no payment was made (free model / cached / pre-stream
1553
+ // failure), where the gateway charge is genuinely 0.
1554
+ //
1543
1555
  // Pass the fallback flag so franklin-stats.json's totalFallbacks +
1544
1556
  // per-model fallbackCount stay in sync with the audit log a few
1545
1557
  // lines below — same `turnFailedModels.size > 0` predicate, same
1546
- // turn. Without this, stats showed 0 fallbacks across 5150 real
1547
- // requests on a machine that visibly hit fallback paths in
1548
- // franklin-debug.log; `franklin insights` was therefore useless
1549
- // for spotting a hot routing chain.
1550
- const costEstimate = estimateCost(resolvedModel, inputTokens, usage.outputTokens, 1);
1558
+ // turn.
1559
+ const paidUsd = client.getLastPaidUsd();
1560
+ const callCost = paidUsd > 0
1561
+ ? paidUsd
1562
+ : estimateCost(resolvedModel, inputTokens, usage.outputTokens, 1);
1551
1563
  const llmLatencyMs = Date.now() - llmCallStartedAt;
1552
- recordUsage(resolvedModel, inputTokens, usage.outputTokens, costEstimate, llmLatencyMs, turnFailedModels.size > 0);
1564
+ recordUsage(resolvedModel, inputTokens, usage.outputTokens, callCost, llmLatencyMs, turnFailedModels.size > 0);
1553
1565
  // ── Circuit breakers: prevent infinite-loop wallet drain ──
1554
1566
  // Per-turn $-cap was removed in v3.11.0 — runaway loops are caught by
1555
1567
  // MAX_TOOL_CALLS_PER_TURN (25) and MAX_TINY_RESPONSES (2) above; the
@@ -1576,7 +1588,7 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
1576
1588
  else {
1577
1589
  consecutiveTinyResponses = 0;
1578
1590
  }
1579
- recordSessionUsage(resolvedModel, inputTokens, usage.outputTokens, costEstimate, routingTier);
1591
+ recordSessionUsage(resolvedModel, inputTokens, usage.outputTokens, callCost, routingTier);
1580
1592
  // Capture tool names invoked in this assistant turn. The AuditEntry
1581
1593
  // interface has had a `toolCalls?: string[]` slot since 3.15.11, but
1582
1594
  // nothing populated it — verified 2026-05-04 in a real Opus session
@@ -1599,7 +1611,7 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
1599
1611
  model: resolvedModel,
1600
1612
  inputTokens,
1601
1613
  outputTokens: usage.outputTokens,
1602
- costUsd: costEstimate,
1614
+ costUsd: callCost,
1603
1615
  // Any failed model this turn means the model that finally
1604
1616
  // succeeded was a fallback. Without this, audit log read 0%
1605
1617
  // fallbacks across 4k entries — useless for diagnosing whether
@@ -1614,11 +1626,11 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
1614
1626
  // Accumulate session-level totals for session meta
1615
1627
  sessionInputTokens += inputTokens;
1616
1628
  sessionOutputTokens += usage.outputTokens;
1617
- sessionCostUsd += costEstimate;
1618
- turnCostUsd += costEstimate;
1629
+ sessionCostUsd += callCost;
1630
+ turnCostUsd += callCost;
1619
1631
  const opusCost = (inputTokens / 1_000_000) * OPUS_PRICING.input
1620
1632
  + (usage.outputTokens / 1_000_000) * OPUS_PRICING.output;
1621
- sessionSavedVsOpus += Math.max(0, opusCost - costEstimate);
1633
+ sessionSavedVsOpus += Math.max(0, opusCost - callCost);
1622
1634
  // ── Max-spend guard ──
1623
1635
  // Session-level cost ceiling. Batch/scripted callers pass this to bound a
1624
1636
  // single run ("spend at most $0.50 for today's digest"); interactive
@@ -1843,7 +1855,7 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
1843
1855
  recordOutcome(lastRoutedCategory, lastRoutedModel, 'continued', turnToolCalls);
1844
1856
  }
1845
1857
  // End-of-turn marker for question-shaped responses. Real-world UX
1846
- // problem 2026-05-06: agent finishes a turn with "要我查一下 X 吗?"
1858
+ // problem 2026-05-06: agent finishes a turn with "Should I look up X?"
1847
1859
  // and stops; the user reads the silence as "Franklin died" twice in
1848
1860
  // one hour. The Ink input box is already on screen but it's easy to
1849
1861
  // miss after a long output scroll. A single trailing italic line
@@ -64,15 +64,15 @@ Anti-slop rules:
64
64
  - Wrap literal text that must appear in the image in double quotes. Spell difficult words letter-by-letter.
65
65
  - One revision per turn — do not combine conflicting asks.
66
66
  - Natural language, not keyword-tag format.
67
- - refined_prompt stays in the same language as the user input. Chinese in → Chinese out.
67
+ - refined_prompt stays in the same language as the user input.
68
68
 
69
69
  Examples:
70
70
 
71
71
  Input: "a photo of a cat on Mars, photoreal"
72
72
  Output: {"style":"photoreal","priority":"balanced","refined_prompt":"Eye-level photograph of a cat standing on the rust-colored Martian surface, late-afternoon low sun casting long shadows, distant canyon rim in the background, 50mm feel, shallow depth of field, editorial photo use, no watermark.","refinement_summary":"Added scene, lighting, lens, use case, constraint.","recommended":{"model":"google/nano-banana-pro","rationale":"Photoreal scenes — Nano Banana Pro has strong realism at moderate cost."},"cheaper":{"model":"google/nano-banana","rationale":"Same family, lower cost, slightly less detail."},"premium":{"model":"openai/gpt-image-2","rationale":"Best photoreal fidelity when budget allows."}}
73
73
 
74
- Input: "赛博朋克风格的动漫角色"
75
- Output: {"style":"anime","priority":"balanced","refined_prompt":"赛博朋克风格的动漫角色,站在霓虹灯映照的雨夜街道上,身穿合成纤维夹克与金属反光饰件,头顶全息广告牌漂浮,低角度视角,强烈青粉对比,海报用,居中构图。","refinement_summary":"补全了场景、光线、材质、用途、构图。","recommended":{"model":"zai/cogview-4","rationale":"CogView-4 specializes in stylized/anime imagery."},"cheaper":{"model":"google/nano-banana","rationale":"Cheaper but less stylized."},"premium":{"model":"xai/grok-imagine-image-pro","rationale":"Premium detail for complex scenes."}}
74
+ Input: "cyberpunk-style anime character"
75
+ Output: {"style":"anime","priority":"balanced","refined_prompt":"Cyberpunk-style anime character standing on a neon-lit rainy street at night, wearing a synthetic-fiber jacket with metallic reflective accents, holographic billboards floating overhead, low-angle view, strong cyan-and-pink contrast, poster use, centered composition.","refinement_summary":"Added scene, lighting, materials, use case, composition.","recommended":{"model":"zai/cogview-4","rationale":"CogView-4 specializes in stylized/anime imagery."},"cheaper":{"model":"google/nano-banana","rationale":"Cheaper but less stylized."},"premium":{"model":"xai/grok-imagine-image-pro","rationale":"Premium detail for complex scenes."}}
76
76
 
77
77
  Input: "a 10-second cinematic drone shot over Tokyo at night"
78
78
  Output: {"style":"concept","priority":"quality","refined_prompt":null,"refinement_summary":"Already well-specified.","recommended":{"model":"bytedance/seedance-2.0","rationale":"Seedance 2.0 delivers the best cinematic quality."},"cheaper":{"model":"bytedance/seedance-2.0-fast","rationale":"Faster + cheaper, minor quality trade-off."},"premium":{"model":null,"rationale":"2.0 is already the top tier."}}
@@ -77,33 +77,68 @@ export function budgetToolResults(history) {
77
77
  budgeted.push(part);
78
78
  continue;
79
79
  }
80
- const content = typeof part.content === 'string' ? part.content : JSON.stringify(part.content);
81
- const size = content.length;
82
- // Per-tool cap
80
+ // Decompose tool_result content. Two shapes are valid per
81
+ // CapabilityOutcome (types.ts:38): a bare string OR an array of
82
+ // text + image segments. Pre-fix, we collapsed array content to
83
+ // JSON.stringify(content), which made base64 image bytes count
84
+ // toward the char budget — a 275KB image would tip past the 32K
85
+ // cap, the whole content array (including the image block) got
86
+ // replaced with a truncated text preview, and the image was
87
+ // destroyed before reaching the wire. Verified 2026-05-10 from a
88
+ // gateway log (sonnet-4.6, ~21K input tokens — would have been
89
+ // ~150K with the image present): the tool_result body was a
90
+ // 2KB self-referential string starting with "[Output truncated:
91
+ // 275,952 chars → 2000 preview]\n\n[{\"type\":\"text\"…". Vision
92
+ // hallucinated everything in that session.
93
+ //
94
+ // Fix: only the TEXT segments count toward MAX_TOOL_RESULT_CHARS.
95
+ // Image segments pass through untouched. If text is over budget,
96
+ // truncate ONLY the text — keep the image array alongside.
97
+ const isArrayContent = Array.isArray(part.content);
98
+ const textBlocks = isArrayContent
99
+ ? part.content.filter((b) => b.type === 'text')
100
+ : [];
101
+ const imageBlocks = isArrayContent
102
+ ? part.content.filter((b) => b.type === 'image')
103
+ : [];
104
+ const textOnly = isArrayContent
105
+ ? textBlocks.map(b => b.text).join('\n')
106
+ : part.content;
107
+ const size = textOnly.length;
108
+ // Per-tool cap (text-only — images stay)
83
109
  if (size > MAX_TOOL_RESULT_CHARS) {
84
110
  modified = true;
85
111
  // Truncate at line boundary for cleaner output
86
- let preview = content.slice(0, PREVIEW_CHARS);
112
+ let preview = textOnly.slice(0, PREVIEW_CHARS);
87
113
  const lastNewline = preview.lastIndexOf('\n');
88
114
  if (lastNewline > PREVIEW_CHARS * 0.5) {
89
115
  preview = preview.slice(0, lastNewline);
90
116
  }
117
+ const truncatedText = `[Output truncated: ${size.toLocaleString()} chars → ${PREVIEW_CHARS} preview]\n\n${preview}\n\n... (${size - PREVIEW_CHARS} chars omitted)`;
91
118
  budgeted.push({
92
119
  type: 'tool_result',
93
120
  tool_use_id: part.tool_use_id,
94
- content: `[Output truncated: ${size.toLocaleString()} chars → ${PREVIEW_CHARS} preview]\n\n${preview}\n\n... (${size - PREVIEW_CHARS} chars omitted)`,
121
+ content: imageBlocks.length > 0
122
+ ? [{ type: 'text', text: truncatedText }, ...imageBlocks]
123
+ : truncatedText,
95
124
  is_error: part.is_error,
96
125
  });
97
126
  messageTotal += PREVIEW_CHARS + 200;
98
127
  continue;
99
128
  }
100
- // Per-message aggregate cap — once exceeded, truncate remaining results
129
+ // Per-message aggregate cap — once exceeded, truncate remaining results.
130
+ // Same rule: drop only the text payload; images survive so multi-image
131
+ // tool flows aren't silently broken when a single chatty text result
132
+ // pushes the message over the cap.
101
133
  if (messageTotal + size > MAX_TOOL_RESULTS_PER_MESSAGE_CHARS) {
102
134
  modified = true;
135
+ const placeholder = `[Output omitted: message budget exceeded (${MAX_TOOL_RESULTS_PER_MESSAGE_CHARS / 1000}K chars/msg)]`;
103
136
  budgeted.push({
104
137
  type: 'tool_result',
105
138
  tool_use_id: part.tool_use_id,
106
- content: `[Output omitted: message budget exceeded (${MAX_TOOL_RESULTS_PER_MESSAGE_CHARS / 1000}K chars/msg)]`,
139
+ content: imageBlocks.length > 0
140
+ ? [{ type: 'text', text: placeholder }, ...imageBlocks]
141
+ : placeholder,
107
142
  is_error: part.is_error,
108
143
  });
109
144
  messageTotal = MAX_TOOL_RESULTS_PER_MESSAGE_CHARS;
@@ -84,10 +84,10 @@ asksForLiveData: true | false
84
84
  ## Context anchors in input
85
85
 
86
86
  [CURRENT] user's message this turn (primary signal)
87
- [PREV_REPLY] last assistant reply, first ~300 chars (for follow-up references: "那 AAPL 呢", "and that one?", "the other ticker")
87
+ [PREV_REPLY] last assistant reply, first ~300 chars (for follow-up references: "and that one?", "the other ticker", "what about AAPL")
88
88
  [GOAL] original session prompt, first ~200 chars
89
89
 
90
- If [CURRENT] uses a deictic ("it", "that", "", "这个"), resolve intent/tier from [PREV_REPLY] or [GOAL].
90
+ If [CURRENT] uses a deictic ("it", "that", "the other one", or any equivalent in the user's language), resolve intent/tier from [PREV_REPLY] or [GOAL].
91
91
 
92
92
  ## Examples
93
93
 
@@ -100,17 +100,17 @@ Input:
100
100
  Output: {"tier":"COMPLEX","intent":{"kind":"ticker","symbol":"CRCL","assetClass":"stock","market":"us","wantNews":true},"needsPlanning":false,"isPushback":false,"asksForLiveData":true}
101
101
 
102
102
  Input:
103
- [CURRENT] AAPL
104
- [PREV_REPLY] CRCL 当前价格 $96.18,最近因 Drift 诉讼下跌...
103
+ [CURRENT] what about AAPL
104
+ [PREV_REPLY] CRCL price $96.18, recently down on Drift lawsuit news...
105
105
  Output: {"tier":"COMPLEX","intent":{"kind":"ticker","symbol":"AAPL","assetClass":"stock","market":"us","wantNews":false},"needsPlanning":false,"isPushback":false,"asksForLiveData":true}
106
106
 
107
107
  Input:
108
- [CURRENT] BTC 为什么跌了
108
+ [CURRENT] why did BTC drop
109
109
  Output: {"tier":"COMPLEX","intent":{"kind":"ticker","symbol":"BTC","assetClass":"crypto","wantNews":true},"needsPlanning":false,"isPushback":false,"asksForLiveData":true}
110
110
 
111
111
  Input:
112
- [CURRENT] 不对,你应该看 NVDA 不是 AAPL
113
- [PREV_REPLY] AAPL 当前价格 $186.42
112
+ [CURRENT] no, you should be looking at NVDA, not AAPL
113
+ [PREV_REPLY] AAPL price $186.42
114
114
  Output: {"tier":"COMPLEX","intent":{"kind":"ticker","symbol":"NVDA","assetClass":"stock","market":"us","wantNews":false},"needsPlanning":false,"isPushback":true,"asksForLiveData":true}
115
115
 
116
116
  Input:
@@ -5,9 +5,9 @@
5
5
  * Tools (ContentCreate / ContentAddAsset) write the library during agent
6
6
  * sessions; before this command, there was no way to see the resulting
7
7
  * spend without scripting against the JSON file. Verified 2026-05-04 in
8
- * a live session: user asked "我花了多少钱做这个", agent ran
9
- * `franklin content list` and got "no content subcommand", fell back to
10
- * estimating from memory.
8
+ * a live session: user asked "how much did I spend making this", agent
9
+ * ran `franklin content list` and got "no content subcommand", fell
10
+ * back to estimating from memory.
11
11
  *
12
12
  * Subcommands:
13
13
  * - list : table of id, type, title, status, spent/budget, assets
@@ -5,9 +5,9 @@
5
5
  * Tools (ContentCreate / ContentAddAsset) write the library during agent
6
6
  * sessions; before this command, there was no way to see the resulting
7
7
  * spend without scripting against the JSON file. Verified 2026-05-04 in
8
- * a live session: user asked "我花了多少钱做这个", agent ran
9
- * `franklin content list` and got "no content subcommand", fell back to
10
- * estimating from memory.
8
+ * a live session: user asked "how much did I spend making this", agent
9
+ * ran `franklin content list` and got "no content subcommand", fell
10
+ * back to estimating from memory.
11
11
  *
12
12
  * Subcommands:
13
13
  * - list : table of id, type, title, status, spent/budget, assets
@@ -2,7 +2,10 @@
2
2
  * franklin panel — launch the local web dashboard.
3
3
  */
4
4
  import chalk from 'chalk';
5
+ import fs from 'node:fs';
6
+ import path from 'node:path';
5
7
  import { createPanelServer } from '../panel/server.js';
8
+ import { BLOCKRUN_DIR } from '../config.js';
6
9
  export async function panelCommand(options) {
7
10
  const requestedPort = parseInt(options.port || '3100', 10);
8
11
  // Handle port-in-use by trying up to 20 subsequent ports silently.
@@ -25,9 +28,20 @@ export async function panelCommand(options) {
25
28
  // Bind to loopback only — the panel exposes wallet secrets on /api/wallet/secret
26
29
  // and a write-capable /api/wallet/import. Never expose these on a LAN.
27
30
  server.listen(port, '127.0.0.1', () => {
31
+ const url = `http://localhost:${port}`;
32
+ // Mirror what start.ts does for the auto-panel — persist the bound
33
+ // URL so any concurrent `franklin start` agent can read /#wallet
34
+ // off the same file. Without this, a user who disables panel
35
+ // autostart and runs `franklin panel` separately would still get
36
+ // the hardcoded 3100 default in the agent prompt.
37
+ try {
38
+ fs.mkdirSync(BLOCKRUN_DIR, { recursive: true });
39
+ fs.writeFileSync(path.join(BLOCKRUN_DIR, 'panel-url'), url, 'utf8');
40
+ }
41
+ catch { /* best-effort */ }
28
42
  console.log('');
29
43
  console.log(chalk.bold(' Franklin Panel'));
30
- console.log(chalk.dim(` http://localhost:${port}`) +
44
+ console.log(chalk.dim(` ${url}`) +
31
45
  (port !== requestedPort ? chalk.yellow(` (fell back from ${requestedPort})`) : ''));
32
46
  console.log('');
33
47
  console.log(chalk.dim(' Press Ctrl+C to stop.'));
@@ -35,7 +49,7 @@ export async function panelCommand(options) {
35
49
  // Try to open browser
36
50
  const open = process.platform === 'darwin' ? 'open' : process.platform === 'win32' ? 'start' : 'xdg-open';
37
51
  import('node:child_process').then(({ exec }) => {
38
- exec(`${open} http://localhost:${port}`);
52
+ exec(`${open} ${url}`);
39
53
  }).catch(() => { });
40
54
  });
41
55
  // Graceful shutdown