npm - @blockrun/franklin - Versions diffs - 3.8.11 → 3.8.13 - Mend

@blockrun/franklin 3.8.11 → 3.8.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

package/dist/agent/commands.js +13 -2
package/dist/agent/context.js +8 -1
package/dist/agent/loop.js +11 -1
package/dist/agent/types.d.ts +1 -1
package/dist/commands/start.d.ts +3 -1
package/dist/commands/start.js +9 -6
package/dist/index.js +1 -1
package/dist/tools/tool-categories.d.ts +13 -8
package/dist/tools/tool-categories.js +30 -9
package/package.json +1 -1

package/dist/agent/commands.js CHANGED Viewed

@@ -203,7 +203,7 @@ const DIRECT_COMMANDS = {
                 `  **Analysis:** /security /lint /optimize /todo /deps /clean /migrate /doc\n` +
                 `  **Session:** /plan /ultraplan /execute /compact /retry /sessions /resume /session-search /context /tasks\n` +
                 `  **Power:** /ultrathink [query] /ultraplan /noplan /moa [query] /dump\n` +
-                `  **Info:** /model /wallet /cost /tokens /learnings /brain /mcp /doctor /version /bug /help\n` +
+                `  **Info:** /model /auto /wallet /cost /tokens /learnings /brain /mcp /doctor /version /bug /help\n` +
                 `  **UI:** /clear /exit\n` +
                 (ultrathinkOn ? `\n  Ultrathink: ON\n` : '')
         });
@@ -616,6 +616,17 @@ export async function handleSlashCommand(input, ctx) {
         emitDone(ctx);
         return { handled: true };
     }
+    // /auto — hard-reset to smart routing regardless of current state.
+    // Shortcut for `/model auto`. Fixes the common "I got stuck on a model
+    // and want Franklin to pick again" scenario without typing the long form.
+    if (input === '/auto') {
+        ctx.config.model = 'blockrun/auto';
+        ctx.config.baseModel = 'blockrun/auto';
+        ctx.config.onModelChange?.('blockrun/auto', 'user');
+        ctx.onEvent({ kind: 'text_delta', text: `Model → **Auto** (smart routing re-enabled)\n` });
+        emitDone(ctx);
+        return { handled: true };
+    }
     // /model — show current model or switch with /model <name>
     if (input === '/model' || input.startsWith('/model ')) {
         if (input === '/model') {
@@ -920,7 +931,7 @@ export async function handleSlashCommand(input, ctx) {
         ...Object.keys(DIRECT_COMMANDS),
         ...Object.keys(REWRITE_COMMANDS),
         ...ARG_COMMANDS.map(c => c.prefix.trim()),
-        '/branch', '/resume', '/model', '/wallet', '/cost', '/help', '/clear', '/retry', '/exit', '/session-search', '/ssearch', '/failures',
+        '/branch', '/resume', '/model', '/auto', '/wallet', '/cost', '/help', '/clear', '/retry', '/exit', '/session-search', '/ssearch', '/failures',
     ];
     const cmd = input.split(/\s/)[0];
     const close = allCommands.filter(c => {

package/dist/agent/context.js CHANGED Viewed

@@ -160,7 +160,14 @@ function getToolPatternsSection() {
 - **Running commands**: Use Bash for shell operations that have no dedicated tool. Chain commands with && when sequential. Use separate Bash calls when you need to inspect intermediate output.
 - **Research**: WebSearch for discovery → WebFetch for specific URLs from search results. Don't WebFetch URLs you invented.
 - **Complex tasks**: Use Agent to spawn sub-agents for 2+ independent research or implementation tasks. Don't do sequentially what can be done in parallel.
-- **Multiple independent lookups**: Call all tools in a single response. NEVER make sequential calls when parallel calls would work.`;
+- **Multiple independent lookups**: Call all tools in a single response. NEVER make sequential calls when parallel calls would work.
+# Grounding Before Answering
+Your training data is frozen in the past. Live-world questions MUST be answered from tool results, not memory.
+- Any question about a current price, quote, market state, or "should I buy/sell/hold X" → use **TradingMarket** (crypto/FX/commodity are free; stocks cost \$0.001 via the wallet).
+- Any "what happened / why did it change / latest news on X" → use **ExaAnswer** for a cited synthesized answer, or **ExaSearch** + **ExaReadUrls** when you need more depth.
+- If the user names a thing you don't recognize (a company, ticker, project), don't demand clarification — call the research tools and figure it out. You have a wallet to spend on exactly this.
+- If a tool returns an error (rate-limit, 404, insufficient funds), say so plainly and suggest the next action. Don't silently fall back to memory.`;
 }
 function getTokenEfficiencySection() {
     return `# Token Efficiency

package/dist/agent/loop.js CHANGED Viewed

@@ -694,6 +694,16 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
                 routingSavings = routing.savings;
                 lastRoutedModel = routing.model;
                 lastRoutedCategory = routing.signals[0] || '';
+                // Surface the routing decision so users know which concrete model
+                // just got picked. Without this the status bar reads "auto" and
+                // users have no idea what's actually running — or worse, they
+                // believe they're stuck on the last-seen concrete name.
+                if (loopCount === 1) {
+                    onEvent({
+                        kind: 'text_delta',
+                        text: `*Auto → ${routing.model}*\n\n`,
+                    });
+                }
             }
             // Update token estimation model for more accurate byte-per-token ratio
             setEstimationModel(resolvedModel);
@@ -964,7 +974,7 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
                 + (usage.outputTokens / 1_000_000) * OPUS_PRICING.output;
             sessionSavedVsOpus += Math.max(0, opusCost - costEstimate);
             // ── Max-spend guard ──
-            // Session-level cost ceiling. Cron/daily drivers pass this to bound a
+            // Session-level cost ceiling. Batch/scripted callers pass this to bound a
             // single run ("spend at most $0.50 for today's digest"); interactive
             // users can pass it to feel safe walking away. Hits as soon as accumulated
             // cost crosses the cap — the last call that tipped us over still runs,

package/dist/agent/types.d.ts CHANGED Viewed

@@ -158,7 +158,7 @@ export interface AgentConfig {
     /**
      * Hard cap on total USD spend for this session. When accumulated API cost
      * crosses the cap, the loop stops with `reason: 'budget'`. Zero/negative
-     * values disable the cap. Primary use case: cron/daily drivers that must
+     * values disable the cap. Primary use case: batch/scripted callers that must
      * bound a single run to keep autonomous execution inside a known envelope.
      */
     maxSpendUsd?: number;

package/dist/commands/start.d.ts CHANGED Viewed

@@ -1,3 +1,4 @@
+import type { StreamTurnDone } from '../agent/types.js';
 interface StartOptions {
     model?: string;
     debug?: boolean;
@@ -9,8 +10,9 @@ interface StartOptions {
     continue?: boolean;
     /** Hard USD cap on total session spend. Stops the loop when exceeded. */
     maxSpend?: string | number;
-    /** Run a single prompt non-interactively, then exit. For cron/scripted use. */
+    /** Run a single prompt non-interactively, then exit. For batch/scripted use. */
     prompt?: string;
 }
 export declare function startCommand(options: StartOptions): Promise<void>;
+export declare function oneShotExitCodeForTurnReason(reason: StreamTurnDone['reason']): number;
 export {};

package/dist/commands/start.js CHANGED Viewed

@@ -76,7 +76,7 @@ export async function startCommand(options) {
     }
     const workDir = process.cwd();
     // --prompt batch mode: skip all interactive startup UI/side effects so
-    // stdout stays clean for cron/scripts. Keep the capability surface to the
+    // stdout stays clean for scripts and one-shot callers. Keep the capability surface to the
     // built-ins only — no panel, no MCP autoconnect, no wallet/banner chatter.
     if (options.prompt) {
         if (options.resume === true || options.resume === 'picker') {
@@ -282,7 +282,7 @@ export async function startCommand(options) {
         workingDir: workDir,
         // Non-TTY (piped) input = scripted mode → trust all tools automatically.
         // Interactive TTY = default mode (prompts for Bash/Write/Edit).
-        // --prompt is also scripted; the cron driver never sees a TTY.
+        // --prompt is also scripted; batch callers never see a TTY.
         permissionMode: (options.trust || options.prompt || !process.stdin.isTTY) ? 'trust' : 'default',
         debug: options.debug,
         resumeSessionId,
@@ -309,8 +309,12 @@ export async function startCommand(options) {
     }
 }
 // ─── One-shot mode (franklin --prompt "...") ──────────────────────────────
-// Used by cron drivers. Non-interactive, prints text deltas to stdout as
-// they stream, honors --max-spend, exits 0 on completion / 1 on error.
+// Used by batch/scripted callers. Non-interactive, prints text deltas to
+// stdout as they stream, honors --max-spend, exits non-zero for any
+// non-completed terminal state.
+export function oneShotExitCodeForTurnReason(reason) {
+    return reason === 'completed' ? 0 : 1;
+}
 async function runOneShot(agentConfig, prompt) {
     let delivered = false;
     let exitCode = 0;
@@ -325,8 +329,7 @@ async function runOneShot(agentConfig, prompt) {
             process.stdout.write(event.text);
         }
         else if (event.kind === 'turn_done') {
-            if (event.reason === 'error')
-                exitCode = 1;
+            exitCode = oneShotExitCodeForTurnReason(event.reason);
             process.stdout.write('\n');
         }
     });

package/dist/index.js CHANGED Viewed

@@ -44,7 +44,7 @@ program
     .option('-r, --resume [sessionId]', 'Resume a session by ID (or show picker if omitted)')
     .option('-c, --continue', 'Continue the most recent session in this directory')
     .option('--max-spend <usd>', 'Hard USD cap on total session API spend — session stops when exceeded')
-    .option('-p, --prompt <text>', 'Run a single prompt non-interactively (for cron/scripted use)')
+    .option('-p, --prompt <text>', 'Run a single prompt non-interactively (for batch/scripted use)')
     .action((options) => startCommand({ ...options, version }));
 program
     .command('resume [sessionId]')

package/dist/tools/tool-categories.d.ts CHANGED Viewed

@@ -1,15 +1,20 @@
 /**
  * Tool visibility categories.
  *
- * Franklin ships with ~27 capabilities. Exposing all of them to the model on
- * every turn makes the tool inventory large enough that weak models start
- * hallucinating tool names or emitting role-play "[TOOLCALL]" fragments.
- * The fix: keep a minimal always-on core (file ops, shell, ask) and gate the
- * rest behind an `ActivateTool` meta-tool that the agent pulls on demand —
- * the same per-session visibility pattern that OpenBB's MCP server uses.
+ * Franklin ships with ~27 capabilities. Exposing all of them on every turn
+ * makes the tool inventory large enough that weak models start hallucinating
+ * tool names or emitting role-play "[TOOLCALL]" fragments. The compromise:
+ * keep the hero surface always-on (file/shell/search PLUS the trading and
+ * research tools that define Franklin's category), and gate the long tail
+ * (webhook, imagegen, videogen, musicgen, memory, etc.) behind an
+ * `ActivateTool` meta-tool the agent pulls on demand.
  *
- * `CORE_TOOL_NAMES` is the per-session initial active set. Everything else
- * becomes visible only after the agent calls ActivateTool with its name.
+ * History: earlier releases kept only file/shell/search in core, which made
+ * mid-tier models answer stock / market questions from 2022 training data
+ * instead of calling TradingMarket. That's anti-positioning for an agent
+ * whose whole brand is "spends USDC for real market data." Hero tools now
+ * live in the always-on set so the default experience shows the wallet
+ * actually at work.
  */
 export declare const CORE_TOOL_NAMES: ReadonlySet<string>;
 /** True if this tool is always available without activation. */

package/dist/tools/tool-categories.js CHANGED Viewed

@@ -1,15 +1,20 @@
 /**
  * Tool visibility categories.
  *
- * Franklin ships with ~27 capabilities. Exposing all of them to the model on
- * every turn makes the tool inventory large enough that weak models start
- * hallucinating tool names or emitting role-play "[TOOLCALL]" fragments.
- * The fix: keep a minimal always-on core (file ops, shell, ask) and gate the
- * rest behind an `ActivateTool` meta-tool that the agent pulls on demand —
- * the same per-session visibility pattern that OpenBB's MCP server uses.
+ * Franklin ships with ~27 capabilities. Exposing all of them on every turn
+ * makes the tool inventory large enough that weak models start hallucinating
+ * tool names or emitting role-play "[TOOLCALL]" fragments. The compromise:
+ * keep the hero surface always-on (file/shell/search PLUS the trading and
+ * research tools that define Franklin's category), and gate the long tail
+ * (webhook, imagegen, videogen, musicgen, memory, etc.) behind an
+ * `ActivateTool` meta-tool the agent pulls on demand.
  *
- * `CORE_TOOL_NAMES` is the per-session initial active set. Everything else
- * becomes visible only after the agent calls ActivateTool with its name.
+ * History: earlier releases kept only file/shell/search in core, which made
+ * mid-tier models answer stock / market questions from 2022 training data
+ * instead of calling TradingMarket. That's anti-positioning for an agent
+ * whose whole brand is "spends USDC for real market data." Hero tools now
+ * live in the always-on set so the default experience shows the wallet
+ * actually at work.
  */
 export const CORE_TOOL_NAMES = new Set([
     // File operations — nothing else works without these.
@@ -27,8 +32,24 @@ export const CORE_TOOL_NAMES = new Set([
     // so keeping this in the core doesn't leak the full inventory.
     'Task',
     // The meta-tool itself — must always be callable so the agent can
-    // discover and activate the rest.
+    // discover and activate anything not in this core set.
     'ActivateTool',
+    // ── Hero surface: Franklin's reason to exist ────────────────────────
+    // Trading market data — crypto, FX, commodity, stocks (via x402).
+    // "Is NVDA up?" / "Should I sell CRCL?" must never fall back to
+    // training-data guessing.
+    'TradingMarket',
+    'TradingSignal',
+    // Research — synthesized answers with real citations, semantic web
+    // search, and clean URL fetching. Any factual current-events question
+    // ("why did X drop?") should route here rather than the model's prior.
+    'ExaAnswer',
+    'ExaSearch',
+    'ExaReadUrls',
+    // Plain web fetch — specific URL → readable text. Cheap and obvious
+    // enough that every model tends to pick it correctly.
+    'WebFetch',
+    'WebSearch',
 ]);
 /** True if this tool is always available without activation. */
 export function isCoreTool(name) {

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@blockrun/franklin",
-  "version": "3.8.11",
+  "version": "3.8.13",
   "description": "Franklin — The AI agent with a wallet. Spends USDC autonomously to get real work done. Pay per action, no subscriptions.",
   "type": "module",
   "exports": {