npm - @blockrun/franklin - Versions diffs - 3.15.87 → 3.15.89 - Mend

@blockrun/franklin 3.15.87 → 3.15.89

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

package/dist/agent/context.js +45 -6
package/dist/agent/evaluator.js +1 -1
package/dist/agent/llm.d.ts +16 -0
package/dist/agent/llm.js +62 -8
package/dist/agent/loop.js +32 -20
package/dist/agent/media-router.js +3 -3
package/dist/agent/optimize.js +42 -7
package/dist/agent/turn-analyzer.js +7 -7
package/dist/commands/content.d.ts +3 -3
package/dist/commands/content.js +3 -3
package/dist/commands/panel.js +16 -2
package/dist/commands/start.js +15 -2
package/dist/learnings/extractor.js +1 -1
package/dist/proxy/server.js +77 -13
package/dist/router/categories.js +4 -6
package/dist/router/index.js +10 -8
package/dist/social/a11y.d.ts +1 -1
package/dist/social/a11y.js +5 -4
package/dist/social/browser.js +63 -4
package/dist/stats/cost-log.d.ts +52 -17
package/dist/stats/cost-log.js +67 -17
package/dist/tools/prediction.debug.js +1 -1
package/dist/tools/prediction.js +1 -1
package/dist/tools/searchx.js +3 -3
package/dist/tools/wallet.js +1 -1
package/dist/ui/app.js +1 -1
package/package.json +1 -1

package/dist/agent/context.js CHANGED Viewed

@@ -5,6 +5,7 @@
 import fs from 'node:fs';
 import path from 'node:path';
 import { execSync } from 'node:child_process';
+import { BLOCKRUN_DIR } from '../config.js';
 import { getWalletAddress as getBaseWalletAddress } from '@blockrun/llm';
 import { Keypair } from '@solana/web3.js';
 import bs58 from 'bs58';
@@ -18,7 +19,7 @@ You are an interactive agent — not a chatbot. Use the tools available to you t
 # Franklin has hands
 You run with live tools by default:
-- **Wallet** — read your own chain, address, and USDC balance. Use this for any "what's my balance / how much money / 钱包余额 / wallet status" question instead of running \`franklin balance\` via Bash. Free, one call, never costs USDC.
+- **Wallet** — read your own chain, address, and USDC balance. Use this for any "what's my balance / how much money / wallet status" question instead of running \`franklin balance\` via Bash. Free, one call, never costs USDC.
 - **TradingMarket** — current stock / FX / crypto / commodity prices (BlockRun Gateway / Pyth; wallet pays automatically, $0.001/stock call, free for everything else).
 - **ExaAnswer / ExaSearch / ExaReadUrls** — cited current-events answers, semantic web search, clean URL content.
 - **WebSearch / WebFetch** — live web.
@@ -88,11 +89,11 @@ function getOutputEfficiencySection() {
     return `# Output Efficiency
 Go straight to the point. Lead with the action, not the reasoning. Do not restate what the user said.
-**No pre-tool narration.** Do NOT write things like "让我先 X...", "Let me read the file...", "I'll now search for...", "好的，让我研究一下...", "现在我来 X", "OK now I have everything I need", "完美！", "好，现在我完全明白了". These phrases are internal monologue — the user can see your tool calls directly and does not need step-by-step play-by-play. Just call the tool.
+**No pre-tool narration.** Do NOT write things like "Let me read the file...", "I'll now search for...", "Let me investigate...", "Now I'm going to X", "OK now I have everything I need", "Perfect!", "Got it, now I fully understand". These phrases are internal monologue — the user can see your tool calls directly and does not need step-by-step play-by-play. Just call the tool. The same rule applies in any language — no equivalent narration in non-English replies either.
 The exception: a single short sentence between tool calls is fine when it tells the user something they would otherwise miss — a finding ("Build passes — moving on to tests."), a course correction ("That approach won't work — switching to X."), or a one-line status before a long-running operation. One sentence per update is enough.
-**No internal-language leakage.** Always write your visible response in the same language the user is using. If your private reasoning happens in a different language (English while the user writes Chinese, Korean while the user writes Chinese, etc.), do NOT let phrases from that language appear in the user-facing text. The user should never see a stray "좋아", "OK now", or "Alright" in the middle of a Chinese reply.
+**No internal-language leakage.** Always write your visible response in the same language the user is using. If your private reasoning happens in a different language than the user's message, do NOT let phrases from that language appear in the user-facing text. The user should never see a stray "d'accord", "OK now", or "Alright" in the middle of a reply written in another language.
 Focus text output on:
 - Decisions that need the user's input
@@ -159,6 +160,19 @@ After delivering results, if a better data source exists, add one line at the en
 Do NOT check access before acting. Do NOT explain what you tried. Just deliver, then tip.`;
 }
 function getWalletKnowledgeSection() {
+    // Read the panel URL persisted by startPanelBackground (start.ts) so we
+    // surface the actual bound port — the panel auto-increments past 3100
+    // when the default is taken (e.g. a second franklin running). Falls back
+    // to the canonical default when the file is missing (panel disabled or
+    // never started this session).
+    let panelUrl = 'http://localhost:3100';
+    try {
+        const persisted = fs.readFileSync(path.join(BLOCKRUN_DIR, 'panel-url'), 'utf8').trim();
+        if (persisted.startsWith('http://') || persisted.startsWith('https://')) {
+            panelUrl = persisted;
+        }
+    }
+    catch { /* fall through to default */ }
     return `# Wallet Storage (answer "where is my wallet" directly — no searching)
 Franklin stores wallet keys in ~/.blockrun/. When the user asks about wallet location, answer from this map — do not grep or scan.
@@ -178,7 +192,32 @@ Franklin stores wallet keys in ~/.blockrun/. When the user asks about wallet loc
   - Use \`franklin stats\` / \`franklin content list\` instead of parsing files when the user asks "how much did I spend".
 - Programmatic access: import { getWalletAddress, getOrCreateWallet, getOrCreateSolanaWallet } from '@blockrun/llm'
-When the user asks about "my wallet" without qualifier, default to Base (it's the primary chain shown at launch). Only mention Solana if the chain file says solana or the user explicitly asks.`;
+When the user asks about "my wallet" without qualifier, default to Base (it's the primary chain shown at launch). Only mention Solana if the chain file says solana or the user explicitly asks.
+## Funding the wallet ("how do I deposit / recharge / fund / top up", in any language)
+When the user asks about depositing or funding USDC — in any language — do not describe the steps in chat. **Open the panel wallet page directly in their browser** using Bash, then confirm in chat what you opened and which chain is active.
+The exact wallet URL for this session:
+  ${panelUrl}/#wallet
+Bash command to open it (macOS \`open\`, Linux \`xdg-open\`, Windows \`start\`):
+  open ${panelUrl}/#wallet
+That page is where the deposit address, QR code, live balance, chain switcher, and back-up controls all live. The user lands on it instead of you reciting steps.
+After running \`open\`:
+- Tell the user one line: "Opened the wallet page — \`${panelUrl}/#wallet\`. Active chain: <base|solana>."
+- Read the active chain from ~/.blockrun/payment-chain so they know which network to send USDC on.
+- Mention USDC is the only accepted token; ETH/SOL on their own won't settle x402 calls.
+Hard rules:
+- Do NOT print the private key in chat. The panel reveals it behind a click.
+- Do NOT invent a \`franklin deposit\` CLI flow — there isn't one; the panel IS the funding surface.
+- Do NOT hand-craft a different localhost port; the URL above tracks the actual bound port (3100 might have been taken; the panel could be on 3101+).
+- If \`open\` fails (e.g. no GUI on a remote box), fall back to giving them the URL as plain text and tell them to paste it into a browser.`;
 }
 function getBlockRunApiSection() {
     return `# BlockRun Gateway API (the network you live on)
@@ -342,7 +381,7 @@ If you find yourself about to emit one of these, stop and call the tool instead.
 - "what are the odds on Polymarket / Kalshi specifically" → \`searchPolymarket\` (\$0.001) and \`searchKalshi\` (\$0.001) **in parallel**; comparing implied probability across the two venues is the high-value answer.
 - "where do Polymarket and Kalshi disagree / arbitrage" → \`crossPlatform\` (\$0.005) returns pre-matched pairs.
 - "who's profitable / top traders / who should I follow on Polymarket" → \`leaderboard\` (\$0.001) — global top wallets by P&L.
-- "analyze this wallet / can I copy this trader / 复制交易 / show me their P&L AND positions" → run \`walletProfile\` + \`walletPnl\` + \`walletPositions\` IN PARALLEL with the same address. Three \$0.005 calls = full picture for \$0.015. Do NOT \`Bash\`-curl \`data-api.polymarket.com\` directly — those are paid Predexon endpoints and going around them defeats the wallet-attached architecture. If just the profile is needed: \`walletProfile\` alone (single address → /wallet/{addr}, comma-list → batch).
+- "analyze this wallet / can I copy this trader / show me their P&L AND positions" → run \`walletProfile\` + \`walletPnl\` + \`walletPositions\` IN PARALLEL with the same address. Three \$0.005 calls = full picture for \$0.015. Do NOT \`Bash\`-curl \`data-api.polymarket.com\` directly — those are paid Predexon endpoints and going around them defeats the wallet-attached architecture. If just the profile is needed: \`walletProfile\` alone (single address → /wallet/{addr}, comma-list → batch).
 - "what are smart traders betting on right now / smart money flow across markets" → \`smartActivity\` (\$0.005) — markets where high-P&L wallets are positioning.
 - "show smart money on this specific Polymarket market / this condition_id" → \`smartMoney\` (\$0.005) with \`conditionId="<condition_id>"\`.
@@ -352,7 +391,7 @@ NEVER answer "what are the odds of X" from training-data memory — these are li
 - Run **TradingSignal** with default lookback (90d). Lower values leave MACD undefined.
 - The tool returns a **Verdict** section with \`Direction\`, \`Bull signals\`, \`Bear signals\`. Echo it directly. Do not soften "bullish" to "leaning slightly positive" — say what the data says.
 - If \`Data Notes\` lists an indicator as "insufficient data", state that explicitly to the user and suggest re-running with more days. Do NOT pretend that indicator is "neutral".
-- **Forbidden default**: "持有观望", "wait and see", "hold for clearer signals" — these are bugs when ≥2 indicators voted in a clear direction. Bail out to those phrases ONLY when (a) the Verdict says \`neutral\` AND (b) the bull/bear signal lists are both genuinely empty or one of each. Otherwise commit to a direction with the reasoning the tool already gave you.
+- **Forbidden default**: "wait and see" / "hold for clearer signals" / equivalent hedging in any language — these are bugs when ≥2 indicators voted in a clear direction. Bail out to that posture ONLY when (a) the Verdict says \`neutral\` AND (b) the bull/bear signal lists are both genuinely empty or one of each. Otherwise commit to a direction with the reasoning the tool already gave you.
 **Media generation (ImageGen / VideoGen).** Pass just the user's descriptive prompt and the output path — do NOT pass \`model\`. The harness picks the right model for the requested style + budget, refines loose prompts using a 5-slot template (scene / subject / details / use case / constraints), and surfaces both the refinement and a cost proposal through AskUser before spending. If the user wants their prompt left exactly as written, prefix it with \`///\` to skip refinement. Only pass \`model\` explicitly if the user named one specifically.`;
 }

package/dist/agent/evaluator.js CHANGED Viewed

@@ -48,7 +48,7 @@ Flag as ungrounded:
 - Invented specifics — names, numbers, dates the model produced without a tool call supporting them
 ### B. Tool-use refusal (NEW)
-If the user clearly asked for live-world data — a current price, today's news, the latest state of X — and the assistant's answer contains a refusal or deflection (e.g. "I can't provide real-time prices", "我无法提供实时数据", "check Yahoo Finance yourself", "as an AI I don't have access to live data"), that is also UNGROUNDED. Franklin HAS tools for this (TradingMarket for prices, ExaAnswer for current events, WebSearch for general web, etc.). Refusing to reach for them is the failure this check was built for.
+If the user clearly asked for live-world data — a current price, today's news, the latest state of X — and the assistant's answer contains a refusal or deflection (e.g. "I can't provide real-time prices", "I don't have access to live data", "check Yahoo Finance yourself", "as an AI I cannot fetch this"), that is also UNGROUNDED. The same rule applies in any language. Franklin HAS tools for this (TradingMarket for prices, ExaAnswer for current events, WebSearch for general web, etc.). Refusing to reach for them is the failure this check was built for.
 Flag as tool-use refusal:
 - "I can't check real-time prices"

package/dist/agent/llm.d.ts CHANGED Viewed

@@ -116,6 +116,15 @@ export declare class ModelClient {
     private cachedBaseWallet;
     private cachedSolanaWallet;
     private walletCacheTime;
+    /**
+     * USDC actually charged on the most recent x402 settlement, parsed
+     * from `details.amount` (micro-USDC → USD). Reset to 0 at the start
+     * of every `streamCompletion`, written by `signBasePayment` /
+     * `signSolanaPayment`. Callers read it via `getLastPaidUsd()` after
+     * the stream completes so franklin-stats.json records the real wallet
+     * charge instead of a token-catalog estimate.
+     */
+    private lastPaidUsd;
     private static WALLET_CACHE_TTL;
     constructor(opts: LLMClientOptions);
     /**
@@ -132,6 +141,13 @@ export declare class ModelClient {
      * default model.
      */
     private resolveVirtualModel;
+    /**
+     * USDC actually charged for the most recent stream. 0 if no payment
+     * was made (free model / cached / pre-stream error). Callers should
+     * read this after the stream finishes — before that it carries the
+     * value from a previous call.
+     */
+    getLastPaidUsd(): number;
     streamCompletion(request: ModelRequest, signal?: AbortSignal): AsyncGenerator<StreamChunk>;
     private parseNonStreamingMessage;
     /**

package/dist/agent/llm.js CHANGED Viewed

@@ -5,6 +5,7 @@
  */
 import { getOrCreateWallet, getOrCreateSolanaWallet, createPaymentPayload, createSolanaPaymentPayload, parsePaymentRequired, extractPaymentDetails, solanaKeyToBytes, SOLANA_NETWORK, } from '@blockrun/llm';
 import { USER_AGENT } from '../config.js';
+import { appendSettlementRow } from '../stats/cost-log.js';
 import { routeRequest, parseRoutingProfile } from '../router/index.js';
 import { ThinkTagStripper } from './think-tag-stripper.js';
 import { isNemotronProseModel, stripNemotronProse } from './nemotron-prose-stripper.js';
@@ -28,6 +29,19 @@ function parseTimeoutEnv(name) {
     const parsed = raw ? Number.parseInt(raw, 10) : NaN;
     return Number.isFinite(parsed) && parsed >= 0 ? parsed : null;
 }
+/**
+ * Convert an x402 `details.amount` field (USDC in micro-units, 6 decimals)
+ * to a USD float. Mirrors the SDK's `appendCostLog` math so the agent
+ * loop, the proxy, and `cost_log.jsonl` all agree to the cent.
+ */
+function paymentAmountToUsd(amount) {
+    if (amount === undefined || amount === null)
+        return 0;
+    const n = typeof amount === 'string' ? parseFloat(amount) : amount;
+    if (!Number.isFinite(n))
+        return 0;
+    return n / 1e6;
+}
 /**
  * Replace Unicode box-drawing characters with their ASCII equivalents.
  *
@@ -284,6 +298,15 @@ export class ModelClient {
     cachedBaseWallet = null;
     cachedSolanaWallet = null;
     walletCacheTime = 0;
+    /**
+     * USDC actually charged on the most recent x402 settlement, parsed
+     * from `details.amount` (micro-USDC → USD). Reset to 0 at the start
+     * of every `streamCompletion`, written by `signBasePayment` /
+     * `signSolanaPayment`. Callers read it via `getLastPaidUsd()` after
+     * the stream completes so franklin-stats.json records the real wallet
+     * charge instead of a token-catalog estimate.
+     */
+    lastPaidUsd = 0;
     static WALLET_CACHE_TTL = 30 * 60 * 1000; // 30 min TTL
     constructor(opts) {
         this.apiUrl = opts.apiUrl;
@@ -329,7 +352,19 @@ export class ModelClient {
         };
         return FALLBACKS[model] || 'nvidia/qwen3-coder-480b';
     }
+    /**
+     * USDC actually charged for the most recent stream. 0 if no payment
+     * was made (free model / cached / pre-stream error). Callers should
+     * read this after the stream finishes — before that it carries the
+     * value from a previous call.
+     */
+    getLastPaidUsd() {
+        return this.lastPaidUsd;
+    }
     async *streamCompletion(request, signal) {
+        // Reset the per-call charge tracker. signBasePayment / signSolanaPayment
+        // will set it when the gateway demands a 402 settlement.
+        this.lastPaidUsd = 0;
         // Resolve virtual models before any API call
         const resolvedModel = this.resolveVirtualModel(request.model);
         if (resolvedModel !== request.model) {
@@ -463,7 +498,7 @@ export class ModelClient {
             if (response.status === 402) {
                 if (this.debug)
                     console.error('[franklin] Payment required — signing...');
-                const paymentHeader = await this.signPayment(response);
+                const paymentHeader = await this.signPayment(response, request.model);
                 if (!paymentHeader) {
                     yield { kind: 'error', payload: { message: 'Payment signing failed' } };
                     return;
@@ -525,7 +560,7 @@ export class ModelClient {
                         signal: requestController.signal,
                     }), requestController, createModelTimeoutError('request', request.model, requestTimeoutMs), requestTimeoutMs);
                     if (response.status === 402) {
-                        const paymentHeader = await this.signPayment(response);
+                        const paymentHeader = await this.signPayment(response, request.model);
                         if (!paymentHeader) {
                             yield { kind: 'error', payload: { message: 'Payment signing failed' } };
                             return;
@@ -918,17 +953,17 @@ export class ModelClient {
         return { content: collected, usage, stopReason };
     }
     // ─── Payment ───────────────────────────────────────────────────────────
-    async signPayment(response) {
+    async signPayment(response, model) {
         try {
             if (this.chain === 'solana') {
-                return await this.signSolanaPayment(response);
+                return await this.signSolanaPayment(response, model);
             }
-            return await this.signBasePayment(response);
+            return await this.signBasePayment(response, model);
         }
         catch (err) {
             const msg = err.message || '';
             if (msg.includes('insufficient') || msg.includes('balance')) {
-                console.error(`[franklin] Insufficient USDC balance. Run 'franklin balance' to check.`);
+                console.error(`[franklin] Insufficient USDC balance. Open http://localhost:3100/#wallet to deposit (or run 'franklin balance').`);
             }
             else if (this.debug) {
                 console.error('[franklin] Payment error:', msg);
@@ -939,7 +974,7 @@ export class ModelClient {
             return null;
         }
     }
-    async signBasePayment(response) {
+    async signBasePayment(response, model) {
         // Refresh wallet cache after TTL to pick up balance/key changes
         if (!this.cachedBaseWallet || (Date.now() - this.walletCacheTime > ModelClient.WALLET_CACHE_TTL)) {
             const w = getOrCreateWallet();
@@ -954,6 +989,18 @@ export class ModelClient {
             throw new Error('No payment requirements in 402 response');
         const paymentRequired = parsePaymentRequired(paymentHeader);
         const details = extractPaymentDetails(paymentRequired);
+        this.lastPaidUsd = paymentAmountToUsd(details.amount);
+        // Mirror the SDK's appendCostLog write so cost_log.jsonl becomes a
+        // true wallet-truth ledger covering both SDK helper traffic AND the
+        // agent's main LLM stream (which uses this signer, not the SDK).
+        // Match SDK schema (model/wallet/network/client_kind) so every row
+        // is independently queryable.
+        appendSettlementRow('/v1/messages', this.lastPaidUsd, {
+            model,
+            wallet: wallet.address,
+            network: details.network || 'base-mainnet',
+            client_kind: 'AgentClient',
+        });
         const payload = await createPaymentPayload(wallet.privateKey, wallet.address, details.recipient, details.amount, details.network || 'eip155:8453', {
             resourceUrl: details.resource?.url || this.apiUrl,
             resourceDescription: details.resource?.description || 'BlockRun AI API call',
@@ -962,7 +1009,7 @@ export class ModelClient {
         });
         return { 'PAYMENT-SIGNATURE': payload };
     }
-    async signSolanaPayment(response) {
+    async signSolanaPayment(response, model) {
         if (!this.cachedSolanaWallet || (Date.now() - this.walletCacheTime > ModelClient.WALLET_CACHE_TTL)) {
             const w = await getOrCreateSolanaWallet();
             this.walletCacheTime = Date.now();
@@ -975,6 +1022,13 @@ export class ModelClient {
             throw new Error('No payment requirements in 402 response');
         const paymentRequired = parsePaymentRequired(paymentHeader);
         const details = extractPaymentDetails(paymentRequired, SOLANA_NETWORK);
+        this.lastPaidUsd = paymentAmountToUsd(details.amount);
+        appendSettlementRow('/v1/messages', this.lastPaidUsd, {
+            model,
+            wallet: wallet.address,
+            network: details.network || 'solana-mainnet',
+            client_kind: 'AgentClient',
+        });
         const secretBytes = await solanaKeyToBytes(wallet.privateKey);
         const feePayer = details.extra?.feePayer || details.recipient;
         const payload = await createSolanaPaymentPayload(secretBytes, wallet.address, details.recipient, details.amount, feePayer, {

package/dist/agent/loop.js CHANGED Viewed

@@ -291,10 +291,10 @@ export function looksLikeGatewayErrorAsText(parts) {
  * pinned by tool_choice when the user prompt actually references that
  * tool's domain — otherwise we let the smart generator pick from any tool.
  *
- * The motivating bug: a real-estate question ("可以还价 20% 吗") had its
- * answer flagged as ungrounded for citing $/sqft figures. The cheap
- * evaluator model picked TradingMarket as the missing tool because it
- * was the first example in the evaluator prompt. Forcing TradingMarket
+ * The motivating bug: a real-estate question ("can I negotiate 20% off")
+ * had its answer flagged as ungrounded for citing $/sqft figures. The
+ * cheap evaluator model picked TradingMarket as the missing tool because
+ * it was the first example in the evaluator prompt. Forcing TradingMarket
  * (a crypto-only tool) on a housing question made the retry useless.
  *
  * This function returns false for specialized tools when the prompt has
@@ -304,16 +304,18 @@ export function looksLikeGatewayErrorAsText(parts) {
  */
 function isToolRelevantToPrompt(toolName, promptLower) {
     // Crypto trading tools — need a ticker, "crypto", "coin", "swap", etc.
+    // English-only fast path; the LLM-level classifier handles other languages
+    // before this domain-relevance check runs.
     if (/^(Trading|DefiLlama|Jupiter|Base0x|Base0xGasless)/i.test(toolName)) {
-        return /\b(btc|eth|sol|xrp|doge|usdc|usdt|crypto|coin|token|defi|tvl|yield|swap|jupiter|uniswap|pump\.fun|solana|base chain|polygon|ethereum|币|代币|链上|做空|做多)\b/i.test(promptLower);
+        return /\b(btc|eth|sol|xrp|doge|usdc|usdt|crypto|coin|token|defi|tvl|yield|swap|jupiter|uniswap|pump\.fun|solana|base chain|polygon|ethereum)\b/i.test(promptLower);
     }
     // X.com search — need an @handle, "twitter", "tweet", "X.com"
     if (/^SearchX$/i.test(toolName) || /^PostToX$/i.test(toolName)) {
-        return /(@\w+|twitter|x\.com|tweet|推特)/i.test(promptLower);
+        return /(@\w+|twitter|x\.com|tweet)/i.test(promptLower);
     }
     // Image / video / music gen — need a creative-content request
     if (/^(ImageGen|VideoGen|MusicGen)$/i.test(toolName)) {
-        return /\b(image|picture|photo|video|clip|music|song|generate|create|render|draw|画|图|视频|音乐|歌)\b/i.test(promptLower);
+        return /\b(image|picture|photo|video|clip|music|song|generate|create|render|draw)\b/i.test(promptLower);
     }
     // General-purpose / file / shell tools — always relevant.
     return true;
@@ -860,7 +862,8 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
         try {
             // Anchor 1: the user's current message (already in lastUserInput).
             // Anchor 2: first chunk of the previous assistant reply — gives the
-            // analyzer enough context to resolve deictic follow-ups like "那 AAPL 呢".
+            // analyzer enough context to resolve deictic follow-ups like
+            // "and that one?" / "what about AAPL".
             const lastAssistantText = (() => {
                 const prior = [...history.slice(0, -1)].reverse()
                     .find((m) => m.role === 'assistant');
@@ -1540,16 +1543,25 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
                 contextPct: Math.round(contextUsagePct),
             });
             // Record usage for stats tracking (franklin stats command).
+            // Prefer the real x402 charge from the gateway over a token-catalog
+            // estimate. The estimate is wrong any time the gateway applies
+            // promo pricing, prompt-cache discounts, or per-call flat fees
+            // (verified 2026-05-09 against cost_log.jsonl: token-based
+            // estimate said $34.79 across the same calls the wallet only
+            // paid $2.24 for — a 15× drift). estimateCost only fills in
+            // when no payment was made (free model / cached / pre-stream
+            // failure), where the gateway charge is genuinely 0.
+            //
             // Pass the fallback flag so franklin-stats.json's totalFallbacks +
             // per-model fallbackCount stay in sync with the audit log a few
             // lines below — same `turnFailedModels.size > 0` predicate, same
-            // turn. Without this, stats showed 0 fallbacks across 5150 real
-            // requests on a machine that visibly hit fallback paths in
-            // franklin-debug.log; `franklin insights` was therefore useless
-            // for spotting a hot routing chain.
-            const costEstimate = estimateCost(resolvedModel, inputTokens, usage.outputTokens, 1);
+            // turn.
+            const paidUsd = client.getLastPaidUsd();
+            const callCost = paidUsd > 0
+                ? paidUsd
+                : estimateCost(resolvedModel, inputTokens, usage.outputTokens, 1);
             const llmLatencyMs = Date.now() - llmCallStartedAt;
-            recordUsage(resolvedModel, inputTokens, usage.outputTokens, costEstimate, llmLatencyMs, turnFailedModels.size > 0);
+            recordUsage(resolvedModel, inputTokens, usage.outputTokens, callCost, llmLatencyMs, turnFailedModels.size > 0);
             // ── Circuit breakers: prevent infinite-loop wallet drain ──
             // Per-turn $-cap was removed in v3.11.0 — runaway loops are caught by
             // MAX_TOOL_CALLS_PER_TURN (25) and MAX_TINY_RESPONSES (2) above; the
@@ -1576,7 +1588,7 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
             else {
                 consecutiveTinyResponses = 0;
             }
-            recordSessionUsage(resolvedModel, inputTokens, usage.outputTokens, costEstimate, routingTier);
+            recordSessionUsage(resolvedModel, inputTokens, usage.outputTokens, callCost, routingTier);
             // Capture tool names invoked in this assistant turn. The AuditEntry
             // interface has had a `toolCalls?: string[]` slot since 3.15.11, but
             // nothing populated it — verified 2026-05-04 in a real Opus session
@@ -1599,7 +1611,7 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
                 model: resolvedModel,
                 inputTokens,
                 outputTokens: usage.outputTokens,
-                costUsd: costEstimate,
+                costUsd: callCost,
                 // Any failed model this turn means the model that finally
                 // succeeded was a fallback. Without this, audit log read 0%
                 // fallbacks across 4k entries — useless for diagnosing whether
@@ -1614,11 +1626,11 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
             // Accumulate session-level totals for session meta
             sessionInputTokens += inputTokens;
             sessionOutputTokens += usage.outputTokens;
-            sessionCostUsd += costEstimate;
-            turnCostUsd += costEstimate;
+            sessionCostUsd += callCost;
+            turnCostUsd += callCost;
             const opusCost = (inputTokens / 1_000_000) * OPUS_PRICING.input
                 + (usage.outputTokens / 1_000_000) * OPUS_PRICING.output;
-            sessionSavedVsOpus += Math.max(0, opusCost - costEstimate);
+            sessionSavedVsOpus += Math.max(0, opusCost - callCost);
             // ── Max-spend guard ──
             // Session-level cost ceiling. Batch/scripted callers pass this to bound a
             // single run ("spend at most $0.50 for today's digest"); interactive
@@ -1843,7 +1855,7 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
                     recordOutcome(lastRoutedCategory, lastRoutedModel, 'continued', turnToolCalls);
                 }
                 // End-of-turn marker for question-shaped responses. Real-world UX
-                // problem 2026-05-06: agent finishes a turn with "要我查一下 X 吗?"
+                // problem 2026-05-06: agent finishes a turn with "Should I look up X?"
                 // and stops; the user reads the silence as "Franklin died" twice in
                 // one hour. The Ink input box is already on screen but it's easy to
                 // miss after a long output scroll. A single trailing italic line

package/dist/agent/media-router.js CHANGED Viewed

@@ -64,15 +64,15 @@ Anti-slop rules:
 - Wrap literal text that must appear in the image in double quotes. Spell difficult words letter-by-letter.
 - One revision per turn — do not combine conflicting asks.
 - Natural language, not keyword-tag format.
-- refined_prompt stays in the same language as the user input. Chinese in → Chinese out.
+- refined_prompt stays in the same language as the user input.
 Examples:
 Input: "a photo of a cat on Mars, photoreal"
 Output: {"style":"photoreal","priority":"balanced","refined_prompt":"Eye-level photograph of a cat standing on the rust-colored Martian surface, late-afternoon low sun casting long shadows, distant canyon rim in the background, 50mm feel, shallow depth of field, editorial photo use, no watermark.","refinement_summary":"Added scene, lighting, lens, use case, constraint.","recommended":{"model":"google/nano-banana-pro","rationale":"Photoreal scenes — Nano Banana Pro has strong realism at moderate cost."},"cheaper":{"model":"google/nano-banana","rationale":"Same family, lower cost, slightly less detail."},"premium":{"model":"openai/gpt-image-2","rationale":"Best photoreal fidelity when budget allows."}}
-Input: "赛博朋克风格的动漫角色"
-Output: {"style":"anime","priority":"balanced","refined_prompt":"赛博朋克风格的动漫角色，站在霓虹灯映照的雨夜街道上，身穿合成纤维夹克与金属反光饰件，头顶全息广告牌漂浮，低角度视角，强烈青粉对比，海报用，居中构图。","refinement_summary":"补全了场景、光线、材质、用途、构图。","recommended":{"model":"zai/cogview-4","rationale":"CogView-4 specializes in stylized/anime imagery."},"cheaper":{"model":"google/nano-banana","rationale":"Cheaper but less stylized."},"premium":{"model":"xai/grok-imagine-image-pro","rationale":"Premium detail for complex scenes."}}
+Input: "cyberpunk-style anime character"
+Output: {"style":"anime","priority":"balanced","refined_prompt":"Cyberpunk-style anime character standing on a neon-lit rainy street at night, wearing a synthetic-fiber jacket with metallic reflective accents, holographic billboards floating overhead, low-angle view, strong cyan-and-pink contrast, poster use, centered composition.","refinement_summary":"Added scene, lighting, materials, use case, composition.","recommended":{"model":"zai/cogview-4","rationale":"CogView-4 specializes in stylized/anime imagery."},"cheaper":{"model":"google/nano-banana","rationale":"Cheaper but less stylized."},"premium":{"model":"xai/grok-imagine-image-pro","rationale":"Premium detail for complex scenes."}}
 Input: "a 10-second cinematic drone shot over Tokyo at night"
 Output: {"style":"concept","priority":"quality","refined_prompt":null,"refinement_summary":"Already well-specified.","recommended":{"model":"bytedance/seedance-2.0","rationale":"Seedance 2.0 delivers the best cinematic quality."},"cheaper":{"model":"bytedance/seedance-2.0-fast","rationale":"Faster + cheaper, minor quality trade-off."},"premium":{"model":null,"rationale":"2.0 is already the top tier."}}

package/dist/agent/optimize.js CHANGED Viewed

@@ -77,33 +77,68 @@ export function budgetToolResults(history) {
                 budgeted.push(part);
                 continue;
             }
-            const content = typeof part.content === 'string' ? part.content : JSON.stringify(part.content);
-            const size = content.length;
-            // Per-tool cap
+            // Decompose tool_result content. Two shapes are valid per
+            // CapabilityOutcome (types.ts:38): a bare string OR an array of
+            // text + image segments. Pre-fix, we collapsed array content to
+            // JSON.stringify(content), which made base64 image bytes count
+            // toward the char budget — a 275KB image would tip past the 32K
+            // cap, the whole content array (including the image block) got
+            // replaced with a truncated text preview, and the image was
+            // destroyed before reaching the wire. Verified 2026-05-10 from a
+            // gateway log (sonnet-4.6, ~21K input tokens — would have been
+            // ~150K with the image present): the tool_result body was a
+            // 2KB self-referential string starting with "[Output truncated:
+            // 275,952 chars → 2000 preview]\n\n[{\"type\":\"text\"…". Vision
+            // hallucinated everything in that session.
+            //
+            // Fix: only the TEXT segments count toward MAX_TOOL_RESULT_CHARS.
+            // Image segments pass through untouched. If text is over budget,
+            // truncate ONLY the text — keep the image array alongside.
+            const isArrayContent = Array.isArray(part.content);
+            const textBlocks = isArrayContent
+                ? part.content.filter((b) => b.type === 'text')
+                : [];
+            const imageBlocks = isArrayContent
+                ? part.content.filter((b) => b.type === 'image')
+                : [];
+            const textOnly = isArrayContent
+                ? textBlocks.map(b => b.text).join('\n')
+                : part.content;
+            const size = textOnly.length;
+            // Per-tool cap (text-only — images stay)
             if (size > MAX_TOOL_RESULT_CHARS) {
                 modified = true;
                 // Truncate at line boundary for cleaner output
-                let preview = content.slice(0, PREVIEW_CHARS);
+                let preview = textOnly.slice(0, PREVIEW_CHARS);
                 const lastNewline = preview.lastIndexOf('\n');
                 if (lastNewline > PREVIEW_CHARS * 0.5) {
                     preview = preview.slice(0, lastNewline);
                 }
+                const truncatedText = `[Output truncated: ${size.toLocaleString()} chars → ${PREVIEW_CHARS} preview]\n\n${preview}\n\n... (${size - PREVIEW_CHARS} chars omitted)`;
                 budgeted.push({
                     type: 'tool_result',
                     tool_use_id: part.tool_use_id,
-                    content: `[Output truncated: ${size.toLocaleString()} chars → ${PREVIEW_CHARS} preview]\n\n${preview}\n\n... (${size - PREVIEW_CHARS} chars omitted)`,
+                    content: imageBlocks.length > 0
+                        ? [{ type: 'text', text: truncatedText }, ...imageBlocks]
+                        : truncatedText,
                     is_error: part.is_error,
                 });
                 messageTotal += PREVIEW_CHARS + 200;
                 continue;
             }
-            // Per-message aggregate cap — once exceeded, truncate remaining results
+            // Per-message aggregate cap — once exceeded, truncate remaining results.
+            // Same rule: drop only the text payload; images survive so multi-image
+            // tool flows aren't silently broken when a single chatty text result
+            // pushes the message over the cap.
             if (messageTotal + size > MAX_TOOL_RESULTS_PER_MESSAGE_CHARS) {
                 modified = true;
+                const placeholder = `[Output omitted: message budget exceeded (${MAX_TOOL_RESULTS_PER_MESSAGE_CHARS / 1000}K chars/msg)]`;
                 budgeted.push({
                     type: 'tool_result',
                     tool_use_id: part.tool_use_id,
-                    content: `[Output omitted: message budget exceeded (${MAX_TOOL_RESULTS_PER_MESSAGE_CHARS / 1000}K chars/msg)]`,
+                    content: imageBlocks.length > 0
+                        ? [{ type: 'text', text: placeholder }, ...imageBlocks]
+                        : placeholder,
                     is_error: part.is_error,
                 });
                 messageTotal = MAX_TOOL_RESULTS_PER_MESSAGE_CHARS;

package/dist/agent/turn-analyzer.js CHANGED Viewed

@@ -84,10 +84,10 @@ asksForLiveData: true | false
 ## Context anchors in input
 [CURRENT]    user's message this turn (primary signal)
-[PREV_REPLY] last assistant reply, first ~300 chars (for follow-up references: "那 AAPL 呢", "and that one?", "the other ticker")
+[PREV_REPLY] last assistant reply, first ~300 chars (for follow-up references: "and that one?", "the other ticker", "what about AAPL")
 [GOAL]       original session prompt, first ~200 chars
-If [CURRENT] uses a deictic ("it", "that", "那", "这个"), resolve intent/tier from [PREV_REPLY] or [GOAL].
+If [CURRENT] uses a deictic ("it", "that", "the other one", or any equivalent in the user's language), resolve intent/tier from [PREV_REPLY] or [GOAL].
 ## Examples
@@ -100,17 +100,17 @@ Input:
 Output: {"tier":"COMPLEX","intent":{"kind":"ticker","symbol":"CRCL","assetClass":"stock","market":"us","wantNews":true},"needsPlanning":false,"isPushback":false,"asksForLiveData":true}
 Input:
-[CURRENT] 那 AAPL 呢
-[PREV_REPLY] CRCL 当前价格 $96.18，最近因 Drift 诉讼下跌...
+[CURRENT] what about AAPL
+[PREV_REPLY] CRCL price $96.18, recently down on Drift lawsuit news...
 Output: {"tier":"COMPLEX","intent":{"kind":"ticker","symbol":"AAPL","assetClass":"stock","market":"us","wantNews":false},"needsPlanning":false,"isPushback":false,"asksForLiveData":true}
 Input:
-[CURRENT] BTC 为什么跌了
+[CURRENT] why did BTC drop
 Output: {"tier":"COMPLEX","intent":{"kind":"ticker","symbol":"BTC","assetClass":"crypto","wantNews":true},"needsPlanning":false,"isPushback":false,"asksForLiveData":true}
 Input:
-[CURRENT] 不对，你应该看 NVDA 不是 AAPL
-[PREV_REPLY] AAPL 当前价格 $186.42
+[CURRENT] no, you should be looking at NVDA, not AAPL
+[PREV_REPLY] AAPL price $186.42
 Output: {"tier":"COMPLEX","intent":{"kind":"ticker","symbol":"NVDA","assetClass":"stock","market":"us","wantNews":false},"needsPlanning":false,"isPushback":true,"asksForLiveData":true}
 Input:

package/dist/commands/content.d.ts CHANGED Viewed

@@ -5,9 +5,9 @@
  * Tools (ContentCreate / ContentAddAsset) write the library during agent
  * sessions; before this command, there was no way to see the resulting
  * spend without scripting against the JSON file. Verified 2026-05-04 in
- * a live session: user asked "我花了多少钱做这个", agent ran
- * `franklin content list` and got "no content subcommand", fell back to
- * estimating from memory.
+ * a live session: user asked "how much did I spend making this", agent
+ * ran `franklin content list` and got "no content subcommand", fell
+ * back to estimating from memory.
  *
  * Subcommands:
  *   - list             : table of id, type, title, status, spent/budget, assets

package/dist/commands/content.js CHANGED Viewed

@@ -5,9 +5,9 @@
  * Tools (ContentCreate / ContentAddAsset) write the library during agent
  * sessions; before this command, there was no way to see the resulting
  * spend without scripting against the JSON file. Verified 2026-05-04 in
- * a live session: user asked "我花了多少钱做这个", agent ran
- * `franklin content list` and got "no content subcommand", fell back to
- * estimating from memory.
+ * a live session: user asked "how much did I spend making this", agent
+ * ran `franklin content list` and got "no content subcommand", fell
+ * back to estimating from memory.
  *
  * Subcommands:
  *   - list             : table of id, type, title, status, spent/budget, assets

package/dist/commands/panel.js CHANGED Viewed

@@ -2,7 +2,10 @@
  * franklin panel — launch the local web dashboard.
  */
 import chalk from 'chalk';
+import fs from 'node:fs';
+import path from 'node:path';
 import { createPanelServer } from '../panel/server.js';
+import { BLOCKRUN_DIR } from '../config.js';
 export async function panelCommand(options) {
     const requestedPort = parseInt(options.port || '3100', 10);
     // Handle port-in-use by trying up to 20 subsequent ports silently.
@@ -25,9 +28,20 @@ export async function panelCommand(options) {
         // Bind to loopback only — the panel exposes wallet secrets on /api/wallet/secret
         // and a write-capable /api/wallet/import. Never expose these on a LAN.
         server.listen(port, '127.0.0.1', () => {
+            const url = `http://localhost:${port}`;
+            // Mirror what start.ts does for the auto-panel — persist the bound
+            // URL so any concurrent `franklin start` agent can read /#wallet
+            // off the same file. Without this, a user who disables panel
+            // autostart and runs `franklin panel` separately would still get
+            // the hardcoded 3100 default in the agent prompt.
+            try {
+                fs.mkdirSync(BLOCKRUN_DIR, { recursive: true });
+                fs.writeFileSync(path.join(BLOCKRUN_DIR, 'panel-url'), url, 'utf8');
+            }
+            catch { /* best-effort */ }
             console.log('');
             console.log(chalk.bold('  Franklin Panel'));
-            console.log(chalk.dim(`  http://localhost:${port}`) +
+            console.log(chalk.dim(`  ${url}`) +
                 (port !== requestedPort ? chalk.yellow(`  (fell back from ${requestedPort})`) : ''));
             console.log('');
             console.log(chalk.dim('  Press Ctrl+C to stop.'));
@@ -35,7 +49,7 @@ export async function panelCommand(options) {
             // Try to open browser
             const open = process.platform === 'darwin' ? 'open' : process.platform === 'win32' ? 'start' : 'xdg-open';
             import('node:child_process').then(({ exec }) => {
-                exec(`${open} http://localhost:${port}`);
+                exec(`${open} ${url}`);
             }).catch(() => { });
         });
         // Graceful shutdown