@blockrun/franklin 3.8.11 → 3.8.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -203,7 +203,7 @@ const DIRECT_COMMANDS = {
203
203
  ` **Analysis:** /security /lint /optimize /todo /deps /clean /migrate /doc\n` +
204
204
  ` **Session:** /plan /ultraplan /execute /compact /retry /sessions /resume /session-search /context /tasks\n` +
205
205
  ` **Power:** /ultrathink [query] /ultraplan /noplan /moa [query] /dump\n` +
206
- ` **Info:** /model /wallet /cost /tokens /learnings /brain /mcp /doctor /version /bug /help\n` +
206
+ ` **Info:** /model /auto /wallet /cost /tokens /learnings /brain /mcp /doctor /version /bug /help\n` +
207
207
  ` **UI:** /clear /exit\n` +
208
208
  (ultrathinkOn ? `\n Ultrathink: ON\n` : '')
209
209
  });
@@ -616,6 +616,17 @@ export async function handleSlashCommand(input, ctx) {
616
616
  emitDone(ctx);
617
617
  return { handled: true };
618
618
  }
619
+ // /auto — hard-reset to smart routing regardless of current state.
620
+ // Shortcut for `/model auto`. Fixes the common "I got stuck on a model
621
+ // and want Franklin to pick again" scenario without typing the long form.
622
+ if (input === '/auto') {
623
+ ctx.config.model = 'blockrun/auto';
624
+ ctx.config.baseModel = 'blockrun/auto';
625
+ ctx.config.onModelChange?.('blockrun/auto', 'user');
626
+ ctx.onEvent({ kind: 'text_delta', text: `Model → **Auto** (smart routing re-enabled)\n` });
627
+ emitDone(ctx);
628
+ return { handled: true };
629
+ }
619
630
  // /model — show current model or switch with /model <name>
620
631
  if (input === '/model' || input.startsWith('/model ')) {
621
632
  if (input === '/model') {
@@ -920,7 +931,7 @@ export async function handleSlashCommand(input, ctx) {
920
931
  ...Object.keys(DIRECT_COMMANDS),
921
932
  ...Object.keys(REWRITE_COMMANDS),
922
933
  ...ARG_COMMANDS.map(c => c.prefix.trim()),
923
- '/branch', '/resume', '/model', '/wallet', '/cost', '/help', '/clear', '/retry', '/exit', '/session-search', '/ssearch', '/failures',
934
+ '/branch', '/resume', '/model', '/auto', '/wallet', '/cost', '/help', '/clear', '/retry', '/exit', '/session-search', '/ssearch', '/failures',
924
935
  ];
925
936
  const cmd = input.split(/\s/)[0];
926
937
  const close = allCommands.filter(c => {
@@ -160,7 +160,14 @@ function getToolPatternsSection() {
160
160
  - **Running commands**: Use Bash for shell operations that have no dedicated tool. Chain commands with && when sequential. Use separate Bash calls when you need to inspect intermediate output.
161
161
  - **Research**: WebSearch for discovery → WebFetch for specific URLs from search results. Don't WebFetch URLs you invented.
162
162
  - **Complex tasks**: Use Agent to spawn sub-agents for 2+ independent research or implementation tasks. Don't do sequentially what can be done in parallel.
163
- - **Multiple independent lookups**: Call all tools in a single response. NEVER make sequential calls when parallel calls would work.`;
163
+ - **Multiple independent lookups**: Call all tools in a single response. NEVER make sequential calls when parallel calls would work.
164
+
165
+ # Grounding Before Answering
166
+ Your training data is frozen in the past. Live-world questions MUST be answered from tool results, not memory.
167
+ - Any question about a current price, quote, market state, or "should I buy/sell/hold X" → use **TradingMarket** (crypto/FX/commodity are free; stocks cost \$0.001 via the wallet).
168
+ - Any "what happened / why did it change / latest news on X" → use **ExaAnswer** for a cited synthesized answer, or **ExaSearch** + **ExaReadUrls** when you need more depth.
169
+ - If the user names a thing you don't recognize (a company, ticker, project), don't demand clarification — call the research tools and figure it out. You have a wallet to spend on exactly this.
170
+ - If a tool returns an error (rate-limit, 404, insufficient funds), say so plainly and suggest the next action. Don't silently fall back to memory.`;
164
171
  }
165
172
  function getTokenEfficiencySection() {
166
173
  return `# Token Efficiency
@@ -694,6 +694,16 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
694
694
  routingSavings = routing.savings;
695
695
  lastRoutedModel = routing.model;
696
696
  lastRoutedCategory = routing.signals[0] || '';
697
+ // Surface the routing decision so users know which concrete model
698
+ // just got picked. Without this the status bar reads "auto" and
699
+ // users have no idea what's actually running — or worse, they
700
+ // believe they're stuck on the last-seen concrete name.
701
+ if (loopCount === 1) {
702
+ onEvent({
703
+ kind: 'text_delta',
704
+ text: `*Auto → ${routing.model}*\n\n`,
705
+ });
706
+ }
697
707
  }
698
708
  // Update token estimation model for more accurate byte-per-token ratio
699
709
  setEstimationModel(resolvedModel);
@@ -964,7 +974,7 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
964
974
  + (usage.outputTokens / 1_000_000) * OPUS_PRICING.output;
965
975
  sessionSavedVsOpus += Math.max(0, opusCost - costEstimate);
966
976
  // ── Max-spend guard ──
967
- // Session-level cost ceiling. Cron/daily drivers pass this to bound a
977
+ // Session-level cost ceiling. Batch/scripted callers pass this to bound a
968
978
  // single run ("spend at most $0.50 for today's digest"); interactive
969
979
  // users can pass it to feel safe walking away. Hits as soon as accumulated
970
980
  // cost crosses the cap — the last call that tipped us over still runs,
@@ -158,7 +158,7 @@ export interface AgentConfig {
158
158
  /**
159
159
  * Hard cap on total USD spend for this session. When accumulated API cost
160
160
  * crosses the cap, the loop stops with `reason: 'budget'`. Zero/negative
161
- * values disable the cap. Primary use case: cron/daily drivers that must
161
+ * values disable the cap. Primary use case: batch/scripted callers that must
162
162
  * bound a single run to keep autonomous execution inside a known envelope.
163
163
  */
164
164
  maxSpendUsd?: number;
@@ -1,3 +1,4 @@
1
+ import type { StreamTurnDone } from '../agent/types.js';
1
2
  interface StartOptions {
2
3
  model?: string;
3
4
  debug?: boolean;
@@ -9,8 +10,9 @@ interface StartOptions {
9
10
  continue?: boolean;
10
11
  /** Hard USD cap on total session spend. Stops the loop when exceeded. */
11
12
  maxSpend?: string | number;
12
- /** Run a single prompt non-interactively, then exit. For cron/scripted use. */
13
+ /** Run a single prompt non-interactively, then exit. For batch/scripted use. */
13
14
  prompt?: string;
14
15
  }
15
16
  export declare function startCommand(options: StartOptions): Promise<void>;
17
+ export declare function oneShotExitCodeForTurnReason(reason: StreamTurnDone['reason']): number;
16
18
  export {};
@@ -76,7 +76,7 @@ export async function startCommand(options) {
76
76
  }
77
77
  const workDir = process.cwd();
78
78
  // --prompt batch mode: skip all interactive startup UI/side effects so
79
- // stdout stays clean for cron/scripts. Keep the capability surface to the
79
+ // stdout stays clean for scripts and one-shot callers. Keep the capability surface to the
80
80
  // built-ins only — no panel, no MCP autoconnect, no wallet/banner chatter.
81
81
  if (options.prompt) {
82
82
  if (options.resume === true || options.resume === 'picker') {
@@ -282,7 +282,7 @@ export async function startCommand(options) {
282
282
  workingDir: workDir,
283
283
  // Non-TTY (piped) input = scripted mode → trust all tools automatically.
284
284
  // Interactive TTY = default mode (prompts for Bash/Write/Edit).
285
- // --prompt is also scripted; the cron driver never sees a TTY.
285
+ // --prompt is also scripted; batch callers never see a TTY.
286
286
  permissionMode: (options.trust || options.prompt || !process.stdin.isTTY) ? 'trust' : 'default',
287
287
  debug: options.debug,
288
288
  resumeSessionId,
@@ -309,8 +309,12 @@ export async function startCommand(options) {
309
309
  }
310
310
  }
311
311
  // ─── One-shot mode (franklin --prompt "...") ──────────────────────────────
312
- // Used by cron drivers. Non-interactive, prints text deltas to stdout as
313
- // they stream, honors --max-spend, exits 0 on completion / 1 on error.
312
+ // Used by batch/scripted callers. Non-interactive, prints text deltas to
313
+ // stdout as they stream, honors --max-spend, exits non-zero for any
314
+ // non-completed terminal state.
315
+ export function oneShotExitCodeForTurnReason(reason) {
316
+ return reason === 'completed' ? 0 : 1;
317
+ }
314
318
  async function runOneShot(agentConfig, prompt) {
315
319
  let delivered = false;
316
320
  let exitCode = 0;
@@ -325,8 +329,7 @@ async function runOneShot(agentConfig, prompt) {
325
329
  process.stdout.write(event.text);
326
330
  }
327
331
  else if (event.kind === 'turn_done') {
328
- if (event.reason === 'error')
329
- exitCode = 1;
332
+ exitCode = oneShotExitCodeForTurnReason(event.reason);
330
333
  process.stdout.write('\n');
331
334
  }
332
335
  });
package/dist/index.js CHANGED
@@ -44,7 +44,7 @@ program
44
44
  .option('-r, --resume [sessionId]', 'Resume a session by ID (or show picker if omitted)')
45
45
  .option('-c, --continue', 'Continue the most recent session in this directory')
46
46
  .option('--max-spend <usd>', 'Hard USD cap on total session API spend — session stops when exceeded')
47
- .option('-p, --prompt <text>', 'Run a single prompt non-interactively (for cron/scripted use)')
47
+ .option('-p, --prompt <text>', 'Run a single prompt non-interactively (for batch/scripted use)')
48
48
  .action((options) => startCommand({ ...options, version }));
49
49
  program
50
50
  .command('resume [sessionId]')
@@ -1,15 +1,20 @@
1
1
  /**
2
2
  * Tool visibility categories.
3
3
  *
4
- * Franklin ships with ~27 capabilities. Exposing all of them to the model on
5
- * every turn makes the tool inventory large enough that weak models start
6
- * hallucinating tool names or emitting role-play "[TOOLCALL]" fragments.
7
- * The fix: keep a minimal always-on core (file ops, shell, ask) and gate the
8
- * rest behind an `ActivateTool` meta-tool that the agent pulls on demand —
9
- * the same per-session visibility pattern that OpenBB's MCP server uses.
4
+ * Franklin ships with ~27 capabilities. Exposing all of them on every turn
5
+ * makes the tool inventory large enough that weak models start hallucinating
6
+ * tool names or emitting role-play "[TOOLCALL]" fragments. The compromise:
7
+ * keep the hero surface always-on (file/shell/search PLUS the trading and
8
+ * research tools that define Franklin's category), and gate the long tail
9
+ * (webhook, imagegen, videogen, musicgen, memory, etc.) behind an
10
+ * `ActivateTool` meta-tool the agent pulls on demand.
10
11
  *
11
- * `CORE_TOOL_NAMES` is the per-session initial active set. Everything else
12
- * becomes visible only after the agent calls ActivateTool with its name.
12
+ * History: earlier releases kept only file/shell/search in core, which made
13
+ * mid-tier models answer stock / market questions from 2022 training data
14
+ * instead of calling TradingMarket. That's anti-positioning for an agent
15
+ * whose whole brand is "spends USDC for real market data." Hero tools now
16
+ * live in the always-on set so the default experience shows the wallet
17
+ * actually at work.
13
18
  */
14
19
  export declare const CORE_TOOL_NAMES: ReadonlySet<string>;
15
20
  /** True if this tool is always available without activation. */
@@ -1,15 +1,20 @@
1
1
  /**
2
2
  * Tool visibility categories.
3
3
  *
4
- * Franklin ships with ~27 capabilities. Exposing all of them to the model on
5
- * every turn makes the tool inventory large enough that weak models start
6
- * hallucinating tool names or emitting role-play "[TOOLCALL]" fragments.
7
- * The fix: keep a minimal always-on core (file ops, shell, ask) and gate the
8
- * rest behind an `ActivateTool` meta-tool that the agent pulls on demand —
9
- * the same per-session visibility pattern that OpenBB's MCP server uses.
4
+ * Franklin ships with ~27 capabilities. Exposing all of them on every turn
5
+ * makes the tool inventory large enough that weak models start hallucinating
6
+ * tool names or emitting role-play "[TOOLCALL]" fragments. The compromise:
7
+ * keep the hero surface always-on (file/shell/search PLUS the trading and
8
+ * research tools that define Franklin's category), and gate the long tail
9
+ * (webhook, imagegen, videogen, musicgen, memory, etc.) behind an
10
+ * `ActivateTool` meta-tool the agent pulls on demand.
10
11
  *
11
- * `CORE_TOOL_NAMES` is the per-session initial active set. Everything else
12
- * becomes visible only after the agent calls ActivateTool with its name.
12
+ * History: earlier releases kept only file/shell/search in core, which made
13
+ * mid-tier models answer stock / market questions from 2022 training data
14
+ * instead of calling TradingMarket. That's anti-positioning for an agent
15
+ * whose whole brand is "spends USDC for real market data." Hero tools now
16
+ * live in the always-on set so the default experience shows the wallet
17
+ * actually at work.
13
18
  */
14
19
  export const CORE_TOOL_NAMES = new Set([
15
20
  // File operations — nothing else works without these.
@@ -27,8 +32,24 @@ export const CORE_TOOL_NAMES = new Set([
27
32
  // so keeping this in the core doesn't leak the full inventory.
28
33
  'Task',
29
34
  // The meta-tool itself — must always be callable so the agent can
30
- // discover and activate the rest.
35
+ // discover and activate anything not in this core set.
31
36
  'ActivateTool',
37
+ // ── Hero surface: Franklin's reason to exist ────────────────────────
38
+ // Trading market data — crypto, FX, commodity, stocks (via x402).
39
+ // "Is NVDA up?" / "Should I sell CRCL?" must never fall back to
40
+ // training-data guessing.
41
+ 'TradingMarket',
42
+ 'TradingSignal',
43
+ // Research — synthesized answers with real citations, semantic web
44
+ // search, and clean URL fetching. Any factual current-events question
45
+ // ("why did X drop?") should route here rather than the model's prior.
46
+ 'ExaAnswer',
47
+ 'ExaSearch',
48
+ 'ExaReadUrls',
49
+ // Plain web fetch — specific URL → readable text. Cheap and obvious
50
+ // enough that every model tends to pick it correctly.
51
+ 'WebFetch',
52
+ 'WebSearch',
32
53
  ]);
33
54
  /** True if this tool is always available without activation. */
34
55
  export function isCoreTool(name) {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@blockrun/franklin",
3
- "version": "3.8.11",
3
+ "version": "3.8.13",
4
4
  "description": "Franklin — The AI agent with a wallet. Spends USDC autonomously to get real work done. Pay per action, no subscriptions.",
5
5
  "type": "module",
6
6
  "exports": {