npm - @blockrun/franklin - Versions diffs - 3.8.19 → 3.8.21 - Mend

@blockrun/franklin 3.8.19 → 3.8.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/dist/agent/context.js +7 -5
package/dist/agent/intent-prefetch.d.ts +64 -0
package/dist/agent/intent-prefetch.js +291 -0
package/dist/agent/loop.js +31 -0
package/dist/agent/types.d.ts +2 -0
package/dist/commands/start.js +2 -0
package/package.json +1 -1

package/dist/agent/context.js CHANGED Viewed

@@ -14,11 +14,13 @@ function getCoreInstructions() {
 You are an interactive agent — not a chatbot. Use the tools available to you to accomplish tasks. Your job is to be a highly capable collaborator who takes initiative, makes progress, and delivers results.
-# You are Franklin, not Claude.ai
-The model weights you're running on were trained by other teams, but your instructions and tools are Franklin's. Specifically:
-- **You have live tools.** TradingMarket returns current stock / FX / crypto / commodity prices. ExaAnswer returns cited current-events answers. WebSearch + WebFetch read the live web. The wallet pays for them automatically.
-- **Refusal is a bug, not a safety feature.** If the user asks for a current price, today's news, or live market state, saying "I can't provide real-time data" or "check Yahoo Finance" is a Claude.ai reflex — it's correct for a chatbot with no tools, wrong for Franklin. Call the tool.
-- **Franklin's brand is spending for real work.** $0.001 for a stock quote is exactly what the wallet is for. Don't hesitate on cents.
+# Franklin has hands
+You run with live tools by default:
+- **TradingMarket** — current stock / FX / crypto / commodity prices (BlockRun Gateway / Pyth; wallet pays automatically, $0.001/stock call, free for everything else).
+- **ExaAnswer / ExaSearch / ExaReadUrls** — cited current-events answers, semantic web search, clean URL content.
+- **WebSearch / WebFetch** — live web.
+When a user asks for a current price, today's news, or any live-world state, **call the tool**. Refusal phrases like "I can't provide real-time data" or "check Yahoo Finance" are a bug — they belong to systems without tools. Your brand is spending USDC to get real answers; $0.001 for a stock quote is exactly what the wallet is for. Don't hesitate on cents.
 # System
 - All text you output outside of tool use is displayed to the user. Use markdown for formatting.

package/dist/agent/intent-prefetch.d.ts ADDED Viewed

@@ -0,0 +1,64 @@
+/**
+ * Proactive prefetch for live-world questions.
+ *
+ * Why this exists:
+ * When a user asks "what is CRCL trading at?", the agent has TradingMarket
+ * in CORE and the system prompt demands it be used. The evaluator catches
+ * refusals. The auto-retry loop feeds findings back. All four layers run
+ * every turn. It still isn't enough — Sonnet 4.6 (the strongest model we
+ * route to) confidently answers "Circle is a private company" from 2022
+ * training data, refusing the tool across retries.
+ *
+ * The lesson: every mechanism above depends on the model *agreeing* to call
+ * a tool. When the model is confident-but-wrong about current-world state,
+ * it doesn't reach for the tool at all. No prompt tweak will fix this —
+ * fine-tuning priors beat prompt priors.
+ *
+ * Harness-level fix: prefetch the data *before* the model decides. When
+ * the user's message contains a ticker or a current-events ask, Franklin's
+ * harness spends the $0.001 unprompted, injects the result into context,
+ * and then the model answers a question it already has evidence for —
+ * not a question its training data has a prior about.
+ *
+ * This is the pattern Anthropic's harness-design writeup calls out:
+ * "Remove components that encode a stale assumption (the model will
+ * reach for tools on its own), replace with components that handle the
+ * coordination gap (harness fetches, model synthesizes)."
+ */
+import type { ModelClient } from './llm.js';
+import type { Dialogue } from './types.js';
+import type { MarketCode } from '../trading/providers/standard-models.js';
+export interface TickerIntent {
+    kind: 'ticker';
+    /** Raw symbol as the user wrote it; may be company name or ticker. */
+    symbol: string;
+    /** Resolved market if the classifier was confident; `us` default when `assetClass === 'stock'`. */
+    market?: MarketCode;
+    /** Asset class — stock prefers paid Gateway path; crypto stays free on CoinGecko. */
+    assetClass: 'stock' | 'crypto';
+    /** Does the user also want the news / "why did it move"? */
+    wantNews: boolean;
+}
+export type Intent = TickerIntent | null;
+export interface PrefetchResult {
+    /** Markdown snippet that gets prepended to the user's message for the LLM. */
+    contextBlock: string;
+    /** User-visible status line ("*Prefetched CRCL ...*"). */
+    statusLine: string;
+    /** Spend incurred by prefetch. For telemetry + Markets panel display. */
+    costUsd: number;
+    /** Did any prefetch call actually succeed? If all failed, the caller may
+     *  decide to skip injection entirely and let the model try its own way. */
+    anyOk: boolean;
+}
+/** Parse the classifier's one-line reply. Very strict — any junk → null. */
+export declare function parseIntentReply(reply: string): Intent;
+export declare function classifyIntent(userInput: string, client: ModelClient): Promise<Intent>;
+/** Run the prefetch for an intent. Concurrent fan-out for price + news. */
+export declare function prefetchForIntent(intent: Intent, client: ModelClient): Promise<PrefetchResult | null>;
+/**
+ * Augment a user message with the prefetch context block prepended. The
+ * final model sees the data as part of the "incoming" user turn — no
+ * synthetic tool_use fabrication needed, history stays clean.
+ */
+export declare function augmentUserMessage(originalInput: string, prefetch: PrefetchResult): Dialogue;

package/dist/agent/intent-prefetch.js ADDED Viewed

@@ -0,0 +1,291 @@
+/**
+ * Proactive prefetch for live-world questions.
+ *
+ * Why this exists:
+ * When a user asks "what is CRCL trading at?", the agent has TradingMarket
+ * in CORE and the system prompt demands it be used. The evaluator catches
+ * refusals. The auto-retry loop feeds findings back. All four layers run
+ * every turn. It still isn't enough — Sonnet 4.6 (the strongest model we
+ * route to) confidently answers "Circle is a private company" from 2022
+ * training data, refusing the tool across retries.
+ *
+ * The lesson: every mechanism above depends on the model *agreeing* to call
+ * a tool. When the model is confident-but-wrong about current-world state,
+ * it doesn't reach for the tool at all. No prompt tweak will fix this —
+ * fine-tuning priors beat prompt priors.
+ *
+ * Harness-level fix: prefetch the data *before* the model decides. When
+ * the user's message contains a ticker or a current-events ask, Franklin's
+ * harness spends the $0.001 unprompted, injects the result into context,
+ * and then the model answers a question it already has evidence for —
+ * not a question its training data has a prior about.
+ *
+ * This is the pattern Anthropic's harness-design writeup calls out:
+ * "Remove components that encode a stale assumption (the model will
+ * reach for tools on its own), replace with components that handle the
+ * coordination gap (harness fetches, model synthesizes)."
+ */
+import { getStockPrice, getPrice } from '../trading/data.js';
+// ─── Classifier ──────────────────────────────────────────────────────────
+const CLASSIFIER_MODEL = process.env.FRANKLIN_PREFETCH_MODEL || 'nvidia/nemotron-ultra-253b';
+const CLASSIFIER_TIMEOUT_MS = 2_500;
+const CLASSIFIER_PROMPT = `You extract PREFETCH INTENT from a user message for a CLI agent that has live market-data tools.
+Your job: decide whether Franklin should fetch live data BEFORE the main model answers, so the answer is grounded in real data instead of model memory.
+Output one of:
+1. STOCK <TICKER> <MARKET> <NEWS>
+   When the user asks about a specific publicly-traded equity — by ticker (CRCL, AAPL, NVDA, 7203, 0005) or by company name that maps to one (Circle → CRCL, Apple → AAPL, Toyota → 7203, HSBC → 0005).
+   MARKET: us | hk | jp | kr | gb | de | fr | nl | ie | lu | cn | ca
+   NEWS: yes if the user also asks "why / what happened / analysis"; no otherwise.
+   Default market: us.
+2. CRYPTO <SYMBOL> <NEWS>
+   When the user asks about a cryptocurrency by symbol or name (BTC, ETH, Bitcoin, Ethereum, SOL, Solana).
+   NEWS: yes if asks why / recent news.
+3. NONE
+   Any other message: greetings, coding questions, general chat, questions about non-traded entities.
+Rules:
+- If the company could be either public or private and you're unsure, assume PUBLIC and emit STOCK with your best ticker guess. The tool will 404 gracefully if wrong.
+- One output line only. No explanation. No punctuation beyond what's shown.
+- Ticker in UPPERCASE.
+Examples:
+User: 帮我看看 CRCL 股票                → STOCK CRCL us no
+User: should I sell Circle stock?      → STOCK CRCL us no
+User: why did CRCL drop this week      → STOCK CRCL us yes
+User: BTC 现在价格                       → CRYPTO BTC no
+User: 为什么以太坊跌了                   → CRYPTO ETH yes
+User: Toyota 股价                        → STOCK 7203 jp no
+User: hi how are you                   → NONE
+User: fix the bug in foo.ts            → NONE
+Answer with just the one-line directive.`;
+/** Parse the classifier's one-line reply. Very strict — any junk → null. */
+export function parseIntentReply(reply) {
+    const line = reply.trim().split('\n')[0].trim().toUpperCase();
+    if (!line || line.startsWith('NONE'))
+        return null;
+    const stockMatch = line.match(/^STOCK\s+([A-Z0-9.\-]+)\s+([A-Z]{2})\s+(YES|NO)\b/);
+    if (stockMatch) {
+        const market = stockMatch[2].toLowerCase();
+        const validMarkets = ['us', 'hk', 'jp', 'kr', 'gb', 'de', 'fr', 'nl', 'ie', 'lu', 'cn', 'ca'];
+        if (!validMarkets.includes(market))
+            return null;
+        return {
+            kind: 'ticker',
+            symbol: stockMatch[1],
+            market: market,
+            assetClass: 'stock',
+            wantNews: stockMatch[3] === 'YES',
+        };
+    }
+    const cryptoMatch = line.match(/^CRYPTO\s+([A-Z0-9.\-]+)\s+(YES|NO)\b/);
+    if (cryptoMatch) {
+        return {
+            kind: 'ticker',
+            symbol: cryptoMatch[1],
+            assetClass: 'crypto',
+            wantNews: cryptoMatch[2] === 'YES',
+        };
+    }
+    return null;
+}
+export async function classifyIntent(userInput, client) {
+    if (process.env.FRANKLIN_NO_PREFETCH === '1')
+        return null;
+    const trimmed = userInput.trim();
+    // Short inputs (<12 chars) are rarely asking for market data — skip the call entirely.
+    if (trimmed.length < 12)
+        return null;
+    const ctrl = new AbortController();
+    const timer = setTimeout(() => ctrl.abort(), CLASSIFIER_TIMEOUT_MS);
+    try {
+        const result = await client.complete({
+            model: CLASSIFIER_MODEL,
+            system: CLASSIFIER_PROMPT,
+            messages: [{ role: 'user', content: trimmed.slice(0, 800) }],
+            tools: [],
+            max_tokens: 24,
+        }, ctrl.signal);
+        let raw = '';
+        for (const part of result.content) {
+            if (typeof part === 'object' && part.type === 'text' && part.text)
+                raw += part.text;
+        }
+        return parseIntentReply(raw);
+    }
+    catch {
+        return null;
+    }
+    finally {
+        clearTimeout(timer);
+    }
+}
+// ─── Prefetch dispatcher ─────────────────────────────────────────────────
+function formatUsd(n) {
+    if (!Number.isFinite(n))
+        return '—';
+    if (n >= 100)
+        return `$${n.toFixed(2)}`;
+    return `$${n.toFixed(4).replace(/0+$/, '').replace(/\.$/, '')}`;
+}
+/** Run the prefetch for an intent. Concurrent fan-out for price + news. */
+export async function prefetchForIntent(intent, client) {
+    if (!intent)
+        return null;
+    const tasks = [];
+    let cost = 0;
+    // 1. Price
+    if (intent.kind === 'ticker') {
+        if (intent.assetClass === 'stock') {
+            const market = intent.market || 'us';
+            tasks.push(getStockPrice(intent.symbol, market).then((r) => {
+                if (typeof r === 'string') {
+                    return { ok: false, line: `- ${intent.symbol} (${market}): lookup failed — ${r.slice(0, 80)}`, cost: 0 };
+                }
+                return {
+                    ok: true,
+                    line: `- ${intent.symbol} (${market}) live price: ${formatUsd(r.price)} (BlockRun Gateway / Pyth)`,
+                    cost: 0.001,
+                };
+            }));
+        }
+        else {
+            // crypto
+            tasks.push(getPrice(intent.symbol, 'crypto').then((r) => {
+                if (typeof r === 'string') {
+                    return { ok: false, line: `- ${intent.symbol}: lookup failed — ${r.slice(0, 80)}`, cost: 0 };
+                }
+                const delta = Number.isFinite(r.change24h) ? ` (${r.change24h > 0 ? '+' : ''}${r.change24h.toFixed(2)}% 24h)` : '';
+                return {
+                    ok: true,
+                    line: `- ${intent.symbol} live price: ${formatUsd(r.price)}${delta} (CoinGecko)`,
+                    cost: 0,
+                };
+            }));
+        }
+    }
+    // 2. News, if asked
+    if (intent.kind === 'ticker' && intent.wantNews) {
+        const query = intent.assetClass === 'stock'
+            ? `Why did ${intent.symbol} stock move over the past week? Recent news and catalysts for ${intent.symbol} as of today.`
+            : `What are the most important recent news events affecting ${intent.symbol} cryptocurrency in the past week?`;
+        tasks.push(exaAnswerTry(query, client).then(snippet => {
+            if (!snippet) {
+                return { ok: false, line: `- Recent ${intent.symbol} news: ExaAnswer lookup failed`, cost: 0 };
+            }
+            return {
+                ok: true,
+                line: `- Recent ${intent.symbol} news (ExaAnswer synthesized):\n  ${snippet.replace(/\n/g, '\n  ')}`,
+                cost: 0.01,
+            };
+        }));
+    }
+    const results = await Promise.all(tasks);
+    const anyOk = results.some(r => r.ok);
+    cost = results.reduce((s, r) => s + r.cost, 0);
+    const lines = results.map(r => r.line).filter(Boolean);
+    if (lines.length === 0)
+        return null;
+    const contextBlock = [
+        '[FRANKLIN HARNESS PREFETCH]',
+        `The harness automatically fetched live data before your turn. Use these facts as ground truth — do NOT override them with training-data assumptions.`,
+        '',
+        ...lines,
+        '',
+    ].join('\n');
+    const statusLine = `*Prefetched ${lines.length} source${lines.length === 1 ? '' : 's'} · cost ${formatUsd(cost)}*`;
+    return { contextBlock, statusLine, costUsd: cost, anyOk };
+}
+/** Thin wrapper: call ExaAnswer via the gateway, return first-paragraph text or null. */
+async function exaAnswerTry(query, client) {
+    try {
+        // Reuse the BlockRun gateway chat endpoint the ExaAnswer tool already uses.
+        // We inline the request rather than invoke the capability through the full
+        // tool framework because prefetch runs outside the agent loop — no
+        // permission prompt, no streaming.
+        const { loadChain, API_URLS } = await import('../config.js');
+        const chain = loadChain();
+        const apiUrl = API_URLS[chain];
+        void client; // (future: unify the paid-endpoint client so we reuse wallet caching)
+        const res = await fetch(`${apiUrl}/v1/exa/answer`, {
+            method: 'POST',
+            headers: { 'Content-Type': 'application/json' },
+            body: JSON.stringify({ query }),
+        });
+        if (res.status === 402) {
+            const payHdr = await extractPaymentReq(res);
+            if (!payHdr)
+                return null;
+            const { getOrCreateWallet, getOrCreateSolanaWallet, createPaymentPayload, createSolanaPaymentPayload, parsePaymentRequired, extractPaymentDetails, solanaKeyToBytes, SOLANA_NETWORK } = await import('@blockrun/llm');
+            const paymentRequired = parsePaymentRequired(payHdr);
+            let headers = { 'Content-Type': 'application/json' };
+            if (chain === 'solana') {
+                const wallet = await getOrCreateSolanaWallet();
+                const details = extractPaymentDetails(paymentRequired, SOLANA_NETWORK);
+                const secretBytes = await solanaKeyToBytes(wallet.privateKey);
+                const feePayer = details.extra?.feePayer || details.recipient;
+                const payload = await createSolanaPaymentPayload(secretBytes, wallet.address, details.recipient, details.amount, feePayer, {
+                    resourceUrl: details.resource?.url || `${apiUrl}/v1/exa/answer`,
+                    resourceDescription: 'Franklin prefetch ExaAnswer',
+                    maxTimeoutSeconds: details.maxTimeoutSeconds || 60,
+                    extra: details.extra,
+                });
+                headers = { ...headers, 'PAYMENT-SIGNATURE': payload };
+            }
+            else {
+                const wallet = getOrCreateWallet();
+                const details = extractPaymentDetails(paymentRequired);
+                const payload = await createPaymentPayload(wallet.privateKey, wallet.address, details.recipient, details.amount, details.network || 'eip155:8453', {
+                    resourceUrl: details.resource?.url || `${apiUrl}/v1/exa/answer`,
+                    resourceDescription: 'Franklin prefetch ExaAnswer',
+                    maxTimeoutSeconds: details.maxTimeoutSeconds || 60,
+                    extra: details.extra,
+                });
+                headers = { ...headers, 'PAYMENT-SIGNATURE': payload };
+            }
+            const res2 = await fetch(`${apiUrl}/v1/exa/answer`, {
+                method: 'POST', headers, body: JSON.stringify({ query }),
+            });
+            if (!res2.ok)
+                return null;
+            const body = await res2.json();
+            return (body.data?.answer || '').slice(0, 600).trim() || null;
+        }
+        if (!res.ok)
+            return null;
+        const body = await res.json();
+        return (body.data?.answer || '').slice(0, 600).trim() || null;
+    }
+    catch {
+        return null;
+    }
+}
+async function extractPaymentReq(response) {
+    let header = response.headers.get('payment-required');
+    if (!header) {
+        try {
+            const body = (await response.json());
+            if (body.x402 || body.accepts)
+                header = btoa(JSON.stringify(body));
+        }
+        catch { /* ignore */ }
+    }
+    return header;
+}
+// ─── Injection helper ────────────────────────────────────────────────────
+/**
+ * Augment a user message with the prefetch context block prepended. The
+ * final model sees the data as part of the "incoming" user turn — no
+ * synthetic tool_use fabrication needed, history stays clean.
+ */
+export function augmentUserMessage(originalInput, prefetch) {
+    return {
+        role: 'user',
+        content: `${prefetch.contextBlock}\n\nOriginal user message:\n${originalInput}`,
+    };
+}

package/dist/agent/loop.js CHANGED Viewed

@@ -26,6 +26,7 @@ import { recordOutcome } from '../router/local-elo.js';
 import { shouldPlan, getPlanningPrompt, getExecutorModel, isExecutorStuck, toolCallSignature } from './planner.js';
 import { shouldVerify, runVerification } from './verification.js';
 import { shouldCheckGrounding, checkGrounding, renderGroundingFollowup, buildGroundingRetryInstruction, } from './evaluator.js';
+import { augmentUserMessage, classifyIntent, prefetchForIntent } from './intent-prefetch.js';
 import { createSessionId, appendToSession, updateSessionMeta, pruneOldSessions, loadSessionHistory, loadSessionMeta, } from '../session/storage.js';
 /**
  * Atomically replace all elements in a history array.
@@ -551,6 +552,36 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
         const MAX_TINY_RESPONSES = 2; // Break after N tiny responses — if 2 calls return near-empty, something is wrong
         let turnSpend = 0; // Cost spent this user turn (USD)
         const MAX_TURN_SPEND_USD = 0.25; // Hard circuit breaker per user message (lowered — user wallets are real money)
+        // ── Proactive prefetch ────────────────────────────────────────────
+        // Before the main model gets a chance to answer a live-world question
+        // from stale training data, the harness detects ticker / price / news
+        // intent and fetches the data itself. Result is prepended to the user's
+        // message so the model sees it as ground truth for this turn. This
+        // makes the answer tool-grounded regardless of the model's willingness
+        // to call tools on its own — important for models with strong
+        // refusal priors on financial data.
+        try {
+            const intent = await classifyIntent(input, client);
+            if (intent) {
+                const prefetch = await prefetchForIntent(intent, client);
+                if (prefetch && prefetch.anyOk) {
+                    if (config.showPrefetchStatus !== false) {
+                        onEvent({ kind: 'text_delta', text: `\n${prefetch.statusLine}\n\n` });
+                    }
+                    // Augment the last user message in history (NOT lastUserInput,
+                    // which /retry restores — that should remain the user's original).
+                    const lastIdx = history.length - 1;
+                    const last = history[lastIdx];
+                    if (last && last.role === 'user' && typeof last.content === 'string') {
+                        history[lastIdx] = augmentUserMessage(last.content, prefetch);
+                    }
+                }
+            }
+        }
+        catch {
+            // Prefetch is best-effort — if the classifier or any fetch trips,
+            // fall through and let the main loop do its own thing.
+        }
         // Agent loop for this user message
         while (loopCount < maxTurns) {
             loopCount++;

package/dist/agent/types.d.ts CHANGED Viewed

@@ -162,4 +162,6 @@ export interface AgentConfig {
      * bound a single run to keep autonomous execution inside a known envelope.
      */
     maxSpendUsd?: number;
+    /** Show user-visible harness prefetch status lines (interactive UX only). */
+    showPrefetchStatus?: boolean;
 }

package/dist/commands/start.js CHANGED Viewed

@@ -111,6 +111,7 @@ export async function startCommand(options) {
             workingDir: workDir,
             permissionMode: 'trust',
             debug: options.debug,
+            showPrefetchStatus: false,
             resumeSessionId: (typeof options.resume === 'string' && options.resume !== 'picker')
                 ? options.resume
                 : continueResolvedId,
@@ -291,6 +292,7 @@ export async function startCommand(options) {
         // --prompt is also scripted; batch callers never see a TTY.
         permissionMode: (options.trust || options.prompt || !process.stdin.isTTY) ? 'trust' : 'default',
         debug: options.debug,
+        showPrefetchStatus: process.stdin.isTTY,
         resumeSessionId,
         ...(options.maxSpend != null
             ? { maxSpendUsd: Number(options.maxSpend) }

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@blockrun/franklin",
-  "version": "3.8.19",
+  "version": "3.8.21",
   "description": "Franklin — The AI agent with a wallet. Spends USDC autonomously to get real work done. Pay per action, no subscriptions.",
   "type": "module",
   "exports": {