@blockrun/franklin 3.8.19 → 3.8.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -14,11 +14,13 @@ function getCoreInstructions() {
14
14
 
15
15
  You are an interactive agent — not a chatbot. Use the tools available to you to accomplish tasks. Your job is to be a highly capable collaborator who takes initiative, makes progress, and delivers results.
16
16
 
17
- # You are Franklin, not Claude.ai
18
- The model weights you're running on were trained by other teams, but your instructions and tools are Franklin's. Specifically:
19
- - **You have live tools.** TradingMarket returns current stock / FX / crypto / commodity prices. ExaAnswer returns cited current-events answers. WebSearch + WebFetch read the live web. The wallet pays for them automatically.
20
- - **Refusal is a bug, not a safety feature.** If the user asks for a current price, today's news, or live market state, saying "I can't provide real-time data" or "check Yahoo Finance" is a Claude.ai reflex — it's correct for a chatbot with no tools, wrong for Franklin. Call the tool.
21
- - **Franklin's brand is spending for real work.** $0.001 for a stock quote is exactly what the wallet is for. Don't hesitate on cents.
17
+ # Franklin has hands
18
+ You run with live tools by default:
19
+ - **TradingMarket** current stock / FX / crypto / commodity prices (BlockRun Gateway / Pyth; wallet pays automatically, $0.001/stock call, free for everything else).
20
+ - **ExaAnswer / ExaSearch / ExaReadUrls** cited current-events answers, semantic web search, clean URL content.
21
+ - **WebSearch / WebFetch** live web.
22
+
23
+ When a user asks for a current price, today's news, or any live-world state, **call the tool**. Refusal phrases like "I can't provide real-time data" or "check Yahoo Finance" are a bug — they belong to systems without tools. Your brand is spending USDC to get real answers; $0.001 for a stock quote is exactly what the wallet is for. Don't hesitate on cents.
22
24
 
23
25
  # System
24
26
  - All text you output outside of tool use is displayed to the user. Use markdown for formatting.
@@ -0,0 +1,64 @@
1
+ /**
2
+ * Proactive prefetch for live-world questions.
3
+ *
4
+ * Why this exists:
5
+ * When a user asks "what is CRCL trading at?", the agent has TradingMarket
6
+ * in CORE and the system prompt demands it be used. The evaluator catches
7
+ * refusals. The auto-retry loop feeds findings back. All four layers run
8
+ * every turn. It still isn't enough — Sonnet 4.6 (the strongest model we
9
+ * route to) confidently answers "Circle is a private company" from 2022
10
+ * training data, refusing the tool across retries.
11
+ *
12
+ * The lesson: every mechanism above depends on the model *agreeing* to call
13
+ * a tool. When the model is confident-but-wrong about current-world state,
14
+ * it doesn't reach for the tool at all. No prompt tweak will fix this —
15
+ * fine-tuning priors beat prompt priors.
16
+ *
17
+ * Harness-level fix: prefetch the data *before* the model decides. When
18
+ * the user's message contains a ticker or a current-events ask, Franklin's
19
+ * harness spends the $0.001 unprompted, injects the result into context,
20
+ * and then the model answers a question it already has evidence for —
21
+ * not a question its training data has a prior about.
22
+ *
23
+ * This is the pattern Anthropic's harness-design writeup calls out:
24
+ * "Remove components that encode a stale assumption (the model will
25
+ * reach for tools on its own), replace with components that handle the
26
+ * coordination gap (harness fetches, model synthesizes)."
27
+ */
28
+ import type { ModelClient } from './llm.js';
29
+ import type { Dialogue } from './types.js';
30
+ import type { MarketCode } from '../trading/providers/standard-models.js';
31
+ export interface TickerIntent {
32
+ kind: 'ticker';
33
+ /** Raw symbol as the user wrote it; may be company name or ticker. */
34
+ symbol: string;
35
+ /** Resolved market if the classifier was confident; `us` default when `assetClass === 'stock'`. */
36
+ market?: MarketCode;
37
+ /** Asset class — stock prefers paid Gateway path; crypto stays free on CoinGecko. */
38
+ assetClass: 'stock' | 'crypto';
39
+ /** Does the user also want the news / "why did it move"? */
40
+ wantNews: boolean;
41
+ }
42
+ export type Intent = TickerIntent | null;
43
+ export interface PrefetchResult {
44
+ /** Markdown snippet that gets prepended to the user's message for the LLM. */
45
+ contextBlock: string;
46
+ /** User-visible status line ("*Prefetched CRCL ...*"). */
47
+ statusLine: string;
48
+ /** Spend incurred by prefetch. For telemetry + Markets panel display. */
49
+ costUsd: number;
50
+ /** Did any prefetch call actually succeed? If all failed, the caller may
51
+ * decide to skip injection entirely and let the model try its own way. */
52
+ anyOk: boolean;
53
+ }
54
+ /** Parse the classifier's one-line reply. Very strict — any junk → null. */
55
+ export declare function parseIntentReply(reply: string): Intent;
56
+ export declare function classifyIntent(userInput: string, client: ModelClient): Promise<Intent>;
57
+ /** Run the prefetch for an intent. Concurrent fan-out for price + news. */
58
+ export declare function prefetchForIntent(intent: Intent, client: ModelClient): Promise<PrefetchResult | null>;
59
+ /**
60
+ * Augment a user message with the prefetch context block prepended. The
61
+ * final model sees the data as part of the "incoming" user turn — no
62
+ * synthetic tool_use fabrication needed, history stays clean.
63
+ */
64
+ export declare function augmentUserMessage(originalInput: string, prefetch: PrefetchResult): Dialogue;
@@ -0,0 +1,291 @@
1
+ /**
2
+ * Proactive prefetch for live-world questions.
3
+ *
4
+ * Why this exists:
5
+ * When a user asks "what is CRCL trading at?", the agent has TradingMarket
6
+ * in CORE and the system prompt demands it be used. The evaluator catches
7
+ * refusals. The auto-retry loop feeds findings back. All four layers run
8
+ * every turn. It still isn't enough — Sonnet 4.6 (the strongest model we
9
+ * route to) confidently answers "Circle is a private company" from 2022
10
+ * training data, refusing the tool across retries.
11
+ *
12
+ * The lesson: every mechanism above depends on the model *agreeing* to call
13
+ * a tool. When the model is confident-but-wrong about current-world state,
14
+ * it doesn't reach for the tool at all. No prompt tweak will fix this —
15
+ * fine-tuning priors beat prompt priors.
16
+ *
17
+ * Harness-level fix: prefetch the data *before* the model decides. When
18
+ * the user's message contains a ticker or a current-events ask, Franklin's
19
+ * harness spends the $0.001 unprompted, injects the result into context,
20
+ * and then the model answers a question it already has evidence for —
21
+ * not a question its training data has a prior about.
22
+ *
23
+ * This is the pattern Anthropic's harness-design writeup calls out:
24
+ * "Remove components that encode a stale assumption (the model will
25
+ * reach for tools on its own), replace with components that handle the
26
+ * coordination gap (harness fetches, model synthesizes)."
27
+ */
28
+ import { getStockPrice, getPrice } from '../trading/data.js';
29
+ // ─── Classifier ──────────────────────────────────────────────────────────
30
+ const CLASSIFIER_MODEL = process.env.FRANKLIN_PREFETCH_MODEL || 'nvidia/nemotron-ultra-253b';
31
+ const CLASSIFIER_TIMEOUT_MS = 2_500;
32
+ const CLASSIFIER_PROMPT = `You extract PREFETCH INTENT from a user message for a CLI agent that has live market-data tools.
33
+
34
+ Your job: decide whether Franklin should fetch live data BEFORE the main model answers, so the answer is grounded in real data instead of model memory.
35
+
36
+ Output one of:
37
+
38
+ 1. STOCK <TICKER> <MARKET> <NEWS>
39
+ When the user asks about a specific publicly-traded equity — by ticker (CRCL, AAPL, NVDA, 7203, 0005) or by company name that maps to one (Circle → CRCL, Apple → AAPL, Toyota → 7203, HSBC → 0005).
40
+ MARKET: us | hk | jp | kr | gb | de | fr | nl | ie | lu | cn | ca
41
+ NEWS: yes if the user also asks "why / what happened / analysis"; no otherwise.
42
+ Default market: us.
43
+
44
+ 2. CRYPTO <SYMBOL> <NEWS>
45
+ When the user asks about a cryptocurrency by symbol or name (BTC, ETH, Bitcoin, Ethereum, SOL, Solana).
46
+ NEWS: yes if asks why / recent news.
47
+
48
+ 3. NONE
49
+ Any other message: greetings, coding questions, general chat, questions about non-traded entities.
50
+
51
+ Rules:
52
+ - If the company could be either public or private and you're unsure, assume PUBLIC and emit STOCK with your best ticker guess. The tool will 404 gracefully if wrong.
53
+ - One output line only. No explanation. No punctuation beyond what's shown.
54
+ - Ticker in UPPERCASE.
55
+
56
+ Examples:
57
+ User: 帮我看看 CRCL 股票 → STOCK CRCL us no
58
+ User: should I sell Circle stock? → STOCK CRCL us no
59
+ User: why did CRCL drop this week → STOCK CRCL us yes
60
+ User: BTC 现在价格 → CRYPTO BTC no
61
+ User: 为什么以太坊跌了 → CRYPTO ETH yes
62
+ User: Toyota 股价 → STOCK 7203 jp no
63
+ User: hi how are you → NONE
64
+ User: fix the bug in foo.ts → NONE
65
+
66
+ Answer with just the one-line directive.`;
67
+ /** Parse the classifier's one-line reply. Very strict — any junk → null. */
68
+ export function parseIntentReply(reply) {
69
+ const line = reply.trim().split('\n')[0].trim().toUpperCase();
70
+ if (!line || line.startsWith('NONE'))
71
+ return null;
72
+ const stockMatch = line.match(/^STOCK\s+([A-Z0-9.\-]+)\s+([A-Z]{2})\s+(YES|NO)\b/);
73
+ if (stockMatch) {
74
+ const market = stockMatch[2].toLowerCase();
75
+ const validMarkets = ['us', 'hk', 'jp', 'kr', 'gb', 'de', 'fr', 'nl', 'ie', 'lu', 'cn', 'ca'];
76
+ if (!validMarkets.includes(market))
77
+ return null;
78
+ return {
79
+ kind: 'ticker',
80
+ symbol: stockMatch[1],
81
+ market: market,
82
+ assetClass: 'stock',
83
+ wantNews: stockMatch[3] === 'YES',
84
+ };
85
+ }
86
+ const cryptoMatch = line.match(/^CRYPTO\s+([A-Z0-9.\-]+)\s+(YES|NO)\b/);
87
+ if (cryptoMatch) {
88
+ return {
89
+ kind: 'ticker',
90
+ symbol: cryptoMatch[1],
91
+ assetClass: 'crypto',
92
+ wantNews: cryptoMatch[2] === 'YES',
93
+ };
94
+ }
95
+ return null;
96
+ }
97
+ export async function classifyIntent(userInput, client) {
98
+ if (process.env.FRANKLIN_NO_PREFETCH === '1')
99
+ return null;
100
+ const trimmed = userInput.trim();
101
+ // Short inputs (<12 chars) are rarely asking for market data — skip the call entirely.
102
+ if (trimmed.length < 12)
103
+ return null;
104
+ const ctrl = new AbortController();
105
+ const timer = setTimeout(() => ctrl.abort(), CLASSIFIER_TIMEOUT_MS);
106
+ try {
107
+ const result = await client.complete({
108
+ model: CLASSIFIER_MODEL,
109
+ system: CLASSIFIER_PROMPT,
110
+ messages: [{ role: 'user', content: trimmed.slice(0, 800) }],
111
+ tools: [],
112
+ max_tokens: 24,
113
+ }, ctrl.signal);
114
+ let raw = '';
115
+ for (const part of result.content) {
116
+ if (typeof part === 'object' && part.type === 'text' && part.text)
117
+ raw += part.text;
118
+ }
119
+ return parseIntentReply(raw);
120
+ }
121
+ catch {
122
+ return null;
123
+ }
124
+ finally {
125
+ clearTimeout(timer);
126
+ }
127
+ }
128
+ // ─── Prefetch dispatcher ─────────────────────────────────────────────────
129
+ function formatUsd(n) {
130
+ if (!Number.isFinite(n))
131
+ return '—';
132
+ if (n >= 100)
133
+ return `$${n.toFixed(2)}`;
134
+ return `$${n.toFixed(4).replace(/0+$/, '').replace(/\.$/, '')}`;
135
+ }
136
+ /** Run the prefetch for an intent. Concurrent fan-out for price + news. */
137
+ export async function prefetchForIntent(intent, client) {
138
+ if (!intent)
139
+ return null;
140
+ const tasks = [];
141
+ let cost = 0;
142
+ // 1. Price
143
+ if (intent.kind === 'ticker') {
144
+ if (intent.assetClass === 'stock') {
145
+ const market = intent.market || 'us';
146
+ tasks.push(getStockPrice(intent.symbol, market).then((r) => {
147
+ if (typeof r === 'string') {
148
+ return { ok: false, line: `- ${intent.symbol} (${market}): lookup failed — ${r.slice(0, 80)}`, cost: 0 };
149
+ }
150
+ return {
151
+ ok: true,
152
+ line: `- ${intent.symbol} (${market}) live price: ${formatUsd(r.price)} (BlockRun Gateway / Pyth)`,
153
+ cost: 0.001,
154
+ };
155
+ }));
156
+ }
157
+ else {
158
+ // crypto
159
+ tasks.push(getPrice(intent.symbol, 'crypto').then((r) => {
160
+ if (typeof r === 'string') {
161
+ return { ok: false, line: `- ${intent.symbol}: lookup failed — ${r.slice(0, 80)}`, cost: 0 };
162
+ }
163
+ const delta = Number.isFinite(r.change24h) ? ` (${r.change24h > 0 ? '+' : ''}${r.change24h.toFixed(2)}% 24h)` : '';
164
+ return {
165
+ ok: true,
166
+ line: `- ${intent.symbol} live price: ${formatUsd(r.price)}${delta} (CoinGecko)`,
167
+ cost: 0,
168
+ };
169
+ }));
170
+ }
171
+ }
172
+ // 2. News, if asked
173
+ if (intent.kind === 'ticker' && intent.wantNews) {
174
+ const query = intent.assetClass === 'stock'
175
+ ? `Why did ${intent.symbol} stock move over the past week? Recent news and catalysts for ${intent.symbol} as of today.`
176
+ : `What are the most important recent news events affecting ${intent.symbol} cryptocurrency in the past week?`;
177
+ tasks.push(exaAnswerTry(query, client).then(snippet => {
178
+ if (!snippet) {
179
+ return { ok: false, line: `- Recent ${intent.symbol} news: ExaAnswer lookup failed`, cost: 0 };
180
+ }
181
+ return {
182
+ ok: true,
183
+ line: `- Recent ${intent.symbol} news (ExaAnswer synthesized):\n ${snippet.replace(/\n/g, '\n ')}`,
184
+ cost: 0.01,
185
+ };
186
+ }));
187
+ }
188
+ const results = await Promise.all(tasks);
189
+ const anyOk = results.some(r => r.ok);
190
+ cost = results.reduce((s, r) => s + r.cost, 0);
191
+ const lines = results.map(r => r.line).filter(Boolean);
192
+ if (lines.length === 0)
193
+ return null;
194
+ const contextBlock = [
195
+ '[FRANKLIN HARNESS PREFETCH]',
196
+ `The harness automatically fetched live data before your turn. Use these facts as ground truth — do NOT override them with training-data assumptions.`,
197
+ '',
198
+ ...lines,
199
+ '',
200
+ ].join('\n');
201
+ const statusLine = `*Prefetched ${lines.length} source${lines.length === 1 ? '' : 's'} · cost ${formatUsd(cost)}*`;
202
+ return { contextBlock, statusLine, costUsd: cost, anyOk };
203
+ }
204
+ /** Thin wrapper: call ExaAnswer via the gateway, return first-paragraph text or null. */
205
+ async function exaAnswerTry(query, client) {
206
+ try {
207
+ // Reuse the BlockRun gateway chat endpoint the ExaAnswer tool already uses.
208
+ // We inline the request rather than invoke the capability through the full
209
+ // tool framework because prefetch runs outside the agent loop — no
210
+ // permission prompt, no streaming.
211
+ const { loadChain, API_URLS } = await import('../config.js');
212
+ const chain = loadChain();
213
+ const apiUrl = API_URLS[chain];
214
+ void client; // (future: unify the paid-endpoint client so we reuse wallet caching)
215
+ const res = await fetch(`${apiUrl}/v1/exa/answer`, {
216
+ method: 'POST',
217
+ headers: { 'Content-Type': 'application/json' },
218
+ body: JSON.stringify({ query }),
219
+ });
220
+ if (res.status === 402) {
221
+ const payHdr = await extractPaymentReq(res);
222
+ if (!payHdr)
223
+ return null;
224
+ const { getOrCreateWallet, getOrCreateSolanaWallet, createPaymentPayload, createSolanaPaymentPayload, parsePaymentRequired, extractPaymentDetails, solanaKeyToBytes, SOLANA_NETWORK } = await import('@blockrun/llm');
225
+ const paymentRequired = parsePaymentRequired(payHdr);
226
+ let headers = { 'Content-Type': 'application/json' };
227
+ if (chain === 'solana') {
228
+ const wallet = await getOrCreateSolanaWallet();
229
+ const details = extractPaymentDetails(paymentRequired, SOLANA_NETWORK);
230
+ const secretBytes = await solanaKeyToBytes(wallet.privateKey);
231
+ const feePayer = details.extra?.feePayer || details.recipient;
232
+ const payload = await createSolanaPaymentPayload(secretBytes, wallet.address, details.recipient, details.amount, feePayer, {
233
+ resourceUrl: details.resource?.url || `${apiUrl}/v1/exa/answer`,
234
+ resourceDescription: 'Franklin prefetch ExaAnswer',
235
+ maxTimeoutSeconds: details.maxTimeoutSeconds || 60,
236
+ extra: details.extra,
237
+ });
238
+ headers = { ...headers, 'PAYMENT-SIGNATURE': payload };
239
+ }
240
+ else {
241
+ const wallet = getOrCreateWallet();
242
+ const details = extractPaymentDetails(paymentRequired);
243
+ const payload = await createPaymentPayload(wallet.privateKey, wallet.address, details.recipient, details.amount, details.network || 'eip155:8453', {
244
+ resourceUrl: details.resource?.url || `${apiUrl}/v1/exa/answer`,
245
+ resourceDescription: 'Franklin prefetch ExaAnswer',
246
+ maxTimeoutSeconds: details.maxTimeoutSeconds || 60,
247
+ extra: details.extra,
248
+ });
249
+ headers = { ...headers, 'PAYMENT-SIGNATURE': payload };
250
+ }
251
+ const res2 = await fetch(`${apiUrl}/v1/exa/answer`, {
252
+ method: 'POST', headers, body: JSON.stringify({ query }),
253
+ });
254
+ if (!res2.ok)
255
+ return null;
256
+ const body = await res2.json();
257
+ return (body.data?.answer || '').slice(0, 600).trim() || null;
258
+ }
259
+ if (!res.ok)
260
+ return null;
261
+ const body = await res.json();
262
+ return (body.data?.answer || '').slice(0, 600).trim() || null;
263
+ }
264
+ catch {
265
+ return null;
266
+ }
267
+ }
268
+ async function extractPaymentReq(response) {
269
+ let header = response.headers.get('payment-required');
270
+ if (!header) {
271
+ try {
272
+ const body = (await response.json());
273
+ if (body.x402 || body.accepts)
274
+ header = btoa(JSON.stringify(body));
275
+ }
276
+ catch { /* ignore */ }
277
+ }
278
+ return header;
279
+ }
280
+ // ─── Injection helper ────────────────────────────────────────────────────
281
+ /**
282
+ * Augment a user message with the prefetch context block prepended. The
283
+ * final model sees the data as part of the "incoming" user turn — no
284
+ * synthetic tool_use fabrication needed, history stays clean.
285
+ */
286
+ export function augmentUserMessage(originalInput, prefetch) {
287
+ return {
288
+ role: 'user',
289
+ content: `${prefetch.contextBlock}\n\nOriginal user message:\n${originalInput}`,
290
+ };
291
+ }
@@ -26,6 +26,7 @@ import { recordOutcome } from '../router/local-elo.js';
26
26
  import { shouldPlan, getPlanningPrompt, getExecutorModel, isExecutorStuck, toolCallSignature } from './planner.js';
27
27
  import { shouldVerify, runVerification } from './verification.js';
28
28
  import { shouldCheckGrounding, checkGrounding, renderGroundingFollowup, buildGroundingRetryInstruction, } from './evaluator.js';
29
+ import { augmentUserMessage, classifyIntent, prefetchForIntent } from './intent-prefetch.js';
29
30
  import { createSessionId, appendToSession, updateSessionMeta, pruneOldSessions, loadSessionHistory, loadSessionMeta, } from '../session/storage.js';
30
31
  /**
31
32
  * Atomically replace all elements in a history array.
@@ -551,6 +552,36 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
551
552
  const MAX_TINY_RESPONSES = 2; // Break after N tiny responses — if 2 calls return near-empty, something is wrong
552
553
  let turnSpend = 0; // Cost spent this user turn (USD)
553
554
  const MAX_TURN_SPEND_USD = 0.25; // Hard circuit breaker per user message (lowered — user wallets are real money)
555
+ // ── Proactive prefetch ────────────────────────────────────────────
556
+ // Before the main model gets a chance to answer a live-world question
557
+ // from stale training data, the harness detects ticker / price / news
558
+ // intent and fetches the data itself. Result is prepended to the user's
559
+ // message so the model sees it as ground truth for this turn. This
560
+ // makes the answer tool-grounded regardless of the model's willingness
561
+ // to call tools on its own — important for models with strong
562
+ // refusal priors on financial data.
563
+ try {
564
+ const intent = await classifyIntent(input, client);
565
+ if (intent) {
566
+ const prefetch = await prefetchForIntent(intent, client);
567
+ if (prefetch && prefetch.anyOk) {
568
+ if (config.showPrefetchStatus !== false) {
569
+ onEvent({ kind: 'text_delta', text: `\n${prefetch.statusLine}\n\n` });
570
+ }
571
+ // Augment the last user message in history (NOT lastUserInput,
572
+ // which /retry restores — that should remain the user's original).
573
+ const lastIdx = history.length - 1;
574
+ const last = history[lastIdx];
575
+ if (last && last.role === 'user' && typeof last.content === 'string') {
576
+ history[lastIdx] = augmentUserMessage(last.content, prefetch);
577
+ }
578
+ }
579
+ }
580
+ }
581
+ catch {
582
+ // Prefetch is best-effort — if the classifier or any fetch trips,
583
+ // fall through and let the main loop do its own thing.
584
+ }
554
585
  // Agent loop for this user message
555
586
  while (loopCount < maxTurns) {
556
587
  loopCount++;
@@ -162,4 +162,6 @@ export interface AgentConfig {
162
162
  * bound a single run to keep autonomous execution inside a known envelope.
163
163
  */
164
164
  maxSpendUsd?: number;
165
+ /** Show user-visible harness prefetch status lines (interactive UX only). */
166
+ showPrefetchStatus?: boolean;
165
167
  }
@@ -111,6 +111,7 @@ export async function startCommand(options) {
111
111
  workingDir: workDir,
112
112
  permissionMode: 'trust',
113
113
  debug: options.debug,
114
+ showPrefetchStatus: false,
114
115
  resumeSessionId: (typeof options.resume === 'string' && options.resume !== 'picker')
115
116
  ? options.resume
116
117
  : continueResolvedId,
@@ -291,6 +292,7 @@ export async function startCommand(options) {
291
292
  // --prompt is also scripted; batch callers never see a TTY.
292
293
  permissionMode: (options.trust || options.prompt || !process.stdin.isTTY) ? 'trust' : 'default',
293
294
  debug: options.debug,
295
+ showPrefetchStatus: process.stdin.isTTY,
294
296
  resumeSessionId,
295
297
  ...(options.maxSpend != null
296
298
  ? { maxSpendUsd: Number(options.maxSpend) }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@blockrun/franklin",
3
- "version": "3.8.19",
3
+ "version": "3.8.21",
4
4
  "description": "Franklin — The AI agent with a wallet. Spends USDC autonomously to get real work done. Pay per action, no subscriptions.",
5
5
  "type": "module",
6
6
  "exports": {