npm - @blockrun/franklin - Versions diffs - 3.8.21 → 3.8.23 - Mend

@blockrun/franklin 3.8.21 → 3.8.23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (22) hide show

package/assets/franklin-bill.jpg +0 -0
package/assets/franklin-portrait.jpg +0 -0
package/assets/terminal-banner.png +0 -0
package/dist/agent/compact.js +1 -1
package/dist/agent/evaluator.js +1 -1
package/dist/agent/intent-prefetch.js +9 -3
package/dist/agent/llm.js +4 -4
package/dist/agent/loop.js +2 -2
package/dist/agent/verification.js +1 -1
package/dist/commands/social.js +1 -1
package/dist/commands/start.js +9 -3
package/dist/commands/telegram.js +1 -1
package/dist/learnings/extractor.js +2 -2
package/dist/plugin-sdk/workflow.js +2 -2
package/dist/pricing.js +9 -6
package/dist/proxy/fallback.js +1 -1
package/dist/proxy/server.js +10 -4
package/dist/router/index.js +9 -6
package/dist/tools/moa.js +3 -2
package/dist/tools/subagent.js +1 -1
package/dist/ui/model-picker.js +17 -7
package/package.json +2 -1

package/assets/franklin-bill.jpg ADDED Viewed

Binary file

package/assets/franklin-portrait.jpg ADDED Viewed

Binary file

package/assets/terminal-banner.png ADDED Viewed

Binary file

package/dist/agent/compact.js CHANGED Viewed

@@ -427,7 +427,7 @@ function formatCompactSummary(raw) {
 function pickCompactionModel(primaryModel) {
     // Free parent → free compaction (no silent charge)
     if (primaryModel.startsWith('nvidia/') || primaryModel === 'blockrun/free') {
-        return 'nvidia/nemotron-ultra-253b';
+        return 'nvidia/glm-4.7';
     }
     // Use cheapest capable model for summarization to save cost
     // Tier down: opus/pro → sonnet, sonnet → haiku, everything else → flash (cheapest capable)

package/dist/agent/evaluator.js CHANGED Viewed

@@ -168,7 +168,7 @@ export function parseGroundingResponse(raw) {
  *  choice so both quality gates have the same cost profile. Override via
  *  `FRANKLIN_EVALUATOR_MODEL` to experiment with accuracy/cost trade-offs. */
 export function evaluatorModel() {
-    return process.env.FRANKLIN_EVALUATOR_MODEL || 'nvidia/nemotron-ultra-253b';
+    return process.env.FRANKLIN_EVALUATOR_MODEL || 'nvidia/llama-4-maverick';
 }
 // ─── Run grounding check ─────────────────────────────────────────────────
 const MAX_EVAL_TOKENS = 512;

package/dist/agent/intent-prefetch.js CHANGED Viewed

@@ -27,7 +27,9 @@
  */
 import { getStockPrice, getPrice } from '../trading/data.js';
 // ─── Classifier ──────────────────────────────────────────────────────────
-const CLASSIFIER_MODEL = process.env.FRANKLIN_PREFETCH_MODEL || 'nvidia/nemotron-ultra-253b';
+// llama-4-maverick: same rationale as the router classifier — emits plain
+// text under tight max_tokens rather than routing through thinking blocks.
+const CLASSIFIER_MODEL = process.env.FRANKLIN_PREFETCH_MODEL || 'nvidia/llama-4-maverick';
 const CLASSIFIER_TIMEOUT_MS = 2_500;
 const CLASSIFIER_PROMPT = `You extract PREFETCH INTENT from a user message for a CLI agent that has live market-data tools.
@@ -98,8 +100,12 @@ export async function classifyIntent(userInput, client) {
     if (process.env.FRANKLIN_NO_PREFETCH === '1')
         return null;
     const trimmed = userInput.trim();
-    // Short inputs (<12 chars) are rarely asking for market data — skip the call entirely.
-    if (trimmed.length < 12)
+    // Only the cheapest gate — skip very short inputs that can't be a real
+    // market question ("hi", "ok", "thanks"). 6 chars covers those while
+    // still letting short-form Chinese / ticker prompts through, e.g.
+    // "BTC 价格" (6), "CRCL 多少" (7). Longer prompts all route to the LLM
+    // classifier, which decides NONE cheaply when not market-related.
+    if (trimmed.length < 6)
         return null;
     const ctrl = new AbortController();
     const timer = setTimeout(() => ctrl.abort(), CLASSIFIER_TIMEOUT_MS);

package/dist/agent/llm.js CHANGED Viewed

@@ -205,12 +205,12 @@ export class ModelClient {
         // Static fallback if router is unavailable. Default to FREE model so
         // users aren't silently charged when their intended model can't resolve.
         const FALLBACKS = {
-            'blockrun/auto': 'nvidia/nemotron-ultra-253b',
-            'blockrun/eco': 'nvidia/nemotron-ultra-253b',
+            'blockrun/auto': 'nvidia/glm-4.7',
+            'blockrun/eco': 'nvidia/glm-4.7',
             'blockrun/premium': 'anthropic/claude-sonnet-4.6',
-            'blockrun/free': 'nvidia/nemotron-ultra-253b',
+            'blockrun/free': 'nvidia/glm-4.7',
         };
-        return FALLBACKS[model] || 'nvidia/nemotron-ultra-253b';
+        return FALLBACKS[model] || 'nvidia/glm-4.7';
     }
     async *streamCompletion(request, signal) {
         // Resolve virtual models before any API call

package/dist/agent/loop.js CHANGED Viewed

@@ -823,7 +823,7 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
                 const hasTools = responseParts.some(p => p.type === 'tool_use');
                 const hasThinking = responseParts.some(p => p.type === 'thinking');
                 if (!hasText && !hasTools && !hasThinking) {
-                    const EMPTY_FALLBACK_MODELS = ['nvidia/qwen3-coder-480b', 'nvidia/nemotron-ultra-253b', 'zai/glm-5.1'];
+                    const EMPTY_FALLBACK_MODELS = ['nvidia/qwen3-coder-480b', 'nvidia/glm-4.7', 'zai/glm-5.1'];
                     const nextModel = EMPTY_FALLBACK_MODELS.find(m => m !== config.model && !turnFailedModels.has(m));
                     if (nextModel && recoveryAttempts < 2) {
                         recoveryAttempts++;
@@ -922,7 +922,7 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
                     if (lastRoutedCategory) {
                         recordOutcome(lastRoutedCategory, config.model, 'payment');
                     }
-                    const FREE_MODELS = ['nvidia/qwen3-coder-480b', 'nvidia/nemotron-ultra-253b', 'nvidia/devstral-2-123b'];
+                    const FREE_MODELS = ['nvidia/glm-4.7', 'nvidia/qwen3-coder-480b', 'nvidia/llama-4-maverick', 'nvidia/qwen3-next-80b-a3b-thinking'];
                     const nextFree = FREE_MODELS.find(m => !turnFailedModels.has(m));
                     if (nextFree) {
                         const oldModel = config.model;

package/dist/agent/verification.js CHANGED Viewed

@@ -113,7 +113,7 @@ export async function runVerification(history, handlers, client, config) {
     ];
     config.onEvent?.({ kind: 'text_delta', text: '\n*Verifying...*\n' });
     // Use cheap model for verification
-    const verificationModel = 'nvidia/nemotron-ultra-253b'; // Free model to keep cost zero
+    const verificationModel = 'nvidia/glm-4.7'; // Free model to keep cost zero
     try {
         // Simple single-turn verification call
         const response = await client.complete({

package/dist/commands/social.js CHANGED Viewed

@@ -159,7 +159,7 @@ async function runCommand(options) {
     const chain = loadChain();
     const apiUrl = API_URLS[chain];
     const appConfig = loadAppConfig();
-    const model = options.model || appConfig['default-model'] || 'nvidia/nemotron-ultra-253b';
+    const model = options.model || appConfig['default-model'] || 'nvidia/glm-4.7';
     console.log(chalk.dim(`  Model: ${model}`));
     console.log('');
     let result;

package/dist/commands/start.js CHANGED Viewed

@@ -124,11 +124,17 @@ export async function startCommand(options) {
         process.exitCode = exitCode;
         return;
     }
-    // Warn when a paid model is active so users know they'll be charged
+    // Warn when a paid model is active so users know they'll be charged.
+    // Set members = BlockRun gateway's current free tier (refreshed 2026-04).
     const FREE_MODELS = new Set([
-        'nvidia/nemotron-ultra-253b',
+        'nvidia/glm-4.7',
+        'nvidia/qwen3-next-80b-a3b-thinking',
         'nvidia/qwen3-coder-480b',
-        'nvidia/devstral-2-123b',
+        'nvidia/mistral-small-4-119b',
+        'nvidia/llama-4-maverick',
+        'nvidia/deepseek-v3.2',
+        'nvidia/gpt-oss-120b',
+        'nvidia/gpt-oss-20b',
         'blockrun/free',
     ]);
     if (!FREE_MODELS.has(model)) {

package/dist/commands/telegram.js CHANGED Viewed

@@ -36,7 +36,7 @@ export async function telegramCommand(opts) {
     // Model: --model flag > config default > free default.
     const model = opts.model ||
         config['default-model'] ||
-        'nvidia/nemotron-ultra-253b';
+        'nvidia/glm-4.7';
     const workingDir = process.cwd();
     const systemInstructions = assembleInstructions(workingDir, model);
     // Resume the most recent session tagged for THIS owner so a process

package/dist/learnings/extractor.js CHANGED Viewed

@@ -9,9 +9,9 @@ import { loadLearnings, mergeLearning, saveLearnings, loadSkills, saveSkill } fr
 // Free models for learning extraction — JSON extraction is simple enough.
 // Ordered by reliability: try the best free model first, fall back to others.
 const EXTRACTION_MODELS = [
-    'nvidia/nemotron-ultra-253b', // Best free model for structured output
+    'nvidia/glm-4.7', // Best free model for structured output
     'nvidia/qwen3-coder-480b', // Strong at JSON tasks
-    'nvidia/devstral-2-123b', // Fallback
+    'nvidia/llama-4-maverick', // Fallback
 ];
 const VALID_CATEGORIES = new Set([
     'language', 'model_preference', 'tool_pattern', 'coding_style',

package/dist/plugin-sdk/workflow.js CHANGED Viewed

@@ -5,7 +5,7 @@
  * Plugins implement Workflow; core orchestrates execution and provides infrastructure.
  */
 export const DEFAULT_MODEL_TIERS = {
-    free: 'nvidia/nemotron-ultra-253b',
-    cheap: 'nvidia/nemotron-ultra-253b', // Was glm-5.1 ($0.001/call). Free by default; opt-in to paid.
+    free: 'nvidia/glm-4.7',
+    cheap: 'nvidia/glm-4.7', // Free by default; opt-in to paid flat-rate via 'zai/glm-5.1'.
     premium: 'anthropic/claude-sonnet-4.6',
 };

package/dist/pricing.js CHANGED Viewed

@@ -8,18 +8,21 @@ export const MODEL_PRICING = {
     'blockrun/eco': { input: 0.2, output: 1.0 },
     'blockrun/premium': { input: 3.0, output: 15.0 },
     'blockrun/free': { input: 0, output: 0 },
-    // FREE - NVIDIA models
+    // FREE — BlockRun gateway free tier (refreshed 2026-04)
+    'nvidia/glm-4.7': { input: 0, output: 0 },
+    'nvidia/qwen3-next-80b-a3b-thinking': { input: 0, output: 0 },
+    'nvidia/qwen3-coder-480b': { input: 0, output: 0 },
+    'nvidia/mistral-small-4-119b': { input: 0, output: 0 },
+    'nvidia/llama-4-maverick': { input: 0, output: 0 },
+    'nvidia/deepseek-v3.2': { input: 0, output: 0 },
     'nvidia/gpt-oss-120b': { input: 0, output: 0 },
     'nvidia/gpt-oss-20b': { input: 0, output: 0 },
+    // Retired (kept at 0 for legacy session-cost records; gateway no longer serves these).
     'nvidia/nemotron-ultra-253b': { input: 0, output: 0 },
+    'nvidia/devstral-2-123b': { input: 0, output: 0 },
     'nvidia/nemotron-3-super-120b': { input: 0, output: 0 },
     'nvidia/nemotron-super-49b': { input: 0, output: 0 },
-    'nvidia/deepseek-v3.2': { input: 0, output: 0 },
     'nvidia/mistral-large-3-675b': { input: 0, output: 0 },
-    'nvidia/qwen3-coder-480b': { input: 0, output: 0 },
-    'nvidia/devstral-2-123b': { input: 0, output: 0 },
-    'nvidia/glm-4.7': { input: 0, output: 0 },
-    'nvidia/llama-4-maverick': { input: 0, output: 0 },
     // Anthropic
     'anthropic/claude-sonnet-4.6': { input: 3.0, output: 15.0 },
     'anthropic/claude-opus-4.7': { input: 5.0, output: 25.0 },

package/dist/proxy/fallback.js CHANGED Viewed

@@ -19,7 +19,7 @@ export const DEFAULT_FALLBACK_CONFIG = {
     chain: [
         'deepseek/deepseek-chat', // Direct fallback — cheap & reliable
         'google/gemini-2.5-flash', // Fast & capable
-        'nvidia/nemotron-ultra-253b', // Free model as ultimate fallback
+        'nvidia/glm-4.7', // Free model as ultimate fallback
     ],
     retryOn: [429, 500, 502, 503, 504, 529],
     maxRetries: 5,

package/dist/proxy/server.js CHANGED Viewed

@@ -94,13 +94,19 @@ const MODEL_SHORTCUTS = {
     // DeepSeek
     deepseek: 'deepseek/deepseek-chat',
     r1: 'deepseek/deepseek-reasoner',
-    // Free models
-    free: 'nvidia/nemotron-ultra-253b',
-    nemotron: 'nvidia/nemotron-ultra-253b',
+    // Free models (gateway free tier — refreshed 2026-04)
+    free: 'nvidia/glm-4.7',
+    glm4: 'nvidia/glm-4.7',
     'deepseek-free': 'nvidia/deepseek-v3.2',
-    devstral: 'nvidia/devstral-2-123b',
     'qwen-coder': 'nvidia/qwen3-coder-480b',
+    'qwen-think': 'nvidia/qwen3-next-80b-a3b-thinking',
     maverick: 'nvidia/llama-4-maverick',
+    'gpt-oss': 'nvidia/gpt-oss-120b',
+    'gpt-oss-small': 'nvidia/gpt-oss-20b',
+    'mistral-small': 'nvidia/mistral-small-4-119b',
+    // Retired-gateway-model aliases (map to closest current).
+    nemotron: 'nvidia/glm-4.7',
+    devstral: 'nvidia/qwen3-coder-480b',
     // Minimax
     minimax: 'minimax/minimax-m2.7',
     // Others

package/dist/router/index.js CHANGED Viewed

@@ -66,12 +66,12 @@ const AUTO_TIERS = {
 };
 const ECO_TIERS = {
     SIMPLE: {
-        primary: 'nvidia/nemotron-ultra-253b',
+        primary: 'nvidia/glm-4.7',
         fallback: ['nvidia/gpt-oss-120b', 'nvidia/deepseek-v3.2'],
     },
     MEDIUM: {
         primary: 'google/gemini-2.5-flash-lite',
-        fallback: ['nvidia/nemotron-ultra-253b', 'nvidia/qwen3-coder-480b'],
+        fallback: ['nvidia/glm-4.7', 'nvidia/qwen3-coder-480b'],
     },
     COMPLEX: {
         primary: 'google/gemini-2.5-flash-lite',
@@ -79,7 +79,7 @@ const ECO_TIERS = {
     },
     REASONING: {
         primary: 'xai/grok-4-1-fast-reasoning',
-        fallback: ['deepseek/deepseek-reasoner', 'nvidia/nemotron-ultra-253b'],
+        fallback: ['deepseek/deepseek-reasoner', 'nvidia/qwen3-next-80b-a3b-thinking'],
     },
 };
 const PREMIUM_TIERS = {
@@ -283,7 +283,10 @@ function classicRouteRequest(prompt, profile) {
 //   - Exposed via async `routeRequestAsync(prompt, profile, classify?)`. Callers
 //     that can't be async (proxy, LLM-client bootstrap) keep using the sync
 //     `routeRequest`, which silently does keyword-only routing.
-const CLASSIFIER_MODEL = process.env.FRANKLIN_ROUTER_MODEL || 'nvidia/nemotron-ultra-253b';
+// llama-4-maverick: clean one-word classification output. glm-4.7 + qwen-
+// thinking emit reasoning into thinking blocks and leave text empty under
+// tight max_tokens — fine for chat, wrong shape for single-word dispatch.
+const CLASSIFIER_MODEL = process.env.FRANKLIN_ROUTER_MODEL || 'nvidia/llama-4-maverick';
 const CLASSIFIER_TIMEOUT_MS = 2_500;
 const CLASSIFIER_SYSTEM = `You classify a user's message into ONE routing tier for a CLI agent. Reply with EXACTLY ONE WORD from the allowed set. No explanation, no punctuation, no quotes.
@@ -395,7 +398,7 @@ export function routeRequest(prompt, profile = 'auto') {
     // Free profile — always use free model
     if (profile === 'free') {
         return {
-            model: 'nvidia/nemotron-ultra-253b',
+            model: 'nvidia/glm-4.7',
             tier: 'SIMPLE',
             confidence: 1.0,
             signals: ['free-profile'],
@@ -467,7 +470,7 @@ export function getFallbackChain(tier, profile = 'auto') {
             tierConfigs = PREMIUM_TIERS;
             break;
         case 'free':
-            return ['nvidia/nemotron-ultra-253b'];
+            return ['nvidia/glm-4.7'];
         default:
             tierConfigs = AUTO_TIERS;
     }

package/dist/tools/moa.js CHANGED Viewed

@@ -14,13 +14,14 @@ import { ModelClient } from '../agent/llm.js';
 // ─── Configuration ────────────────────────────────────────────────────────
 /** Reference models — diverse, cheap/free models for parallel queries. */
 const REFERENCE_MODELS = [
-    'nvidia/nemotron-ultra-253b', // Free, strong reasoning
+    'nvidia/glm-4.7', // Free, strong reasoning + coding
+    'nvidia/qwen3-next-80b-a3b-thinking', // Free, explicit reasoning model
     'nvidia/qwen3-coder-480b', // Free, strong coding
     'google/gemini-2.5-flash', // Fast, cheap
     'deepseek/deepseek-chat', // Cheap, good reasoning
 ];
 /** Aggregator model — free by default. Users explicitly pass `aggregator` to upgrade. */
-const AGGREGATOR_MODEL = 'nvidia/nemotron-ultra-253b';
+const AGGREGATOR_MODEL = 'nvidia/glm-4.7';
 /** Max tokens per reference response. */
 const REFERENCE_MAX_TOKENS = 4096;
 /** Max tokens for aggregator. */

package/dist/tools/subagent.js CHANGED Viewed

@@ -18,7 +18,7 @@ async function execute(input, ctx) {
         return { output: 'Error: prompt is required', isError: true };
     }
     // Resolve which model the sub-agent will actually run on
-    const subModel = model || registeredParentModel || 'nvidia/nemotron-ultra-253b';
+    const subModel = model || registeredParentModel || 'nvidia/glm-4.7';
     // Cost gate: if parent is free but sub-agent wants paid, ask user first.
     // Prevents silent charges when the agent decides to spawn a more capable sub-agent.
     if (isFreeModel(registeredParentModel) && !isFreeModel(subModel)) {

package/dist/ui/model-picker.js CHANGED Viewed

@@ -46,13 +46,21 @@ export const MODEL_SHORTCUTS = {
     // DeepSeek
     deepseek: 'deepseek/deepseek-chat',
     r1: 'deepseek/deepseek-reasoner',
-    // Free
-    free: 'nvidia/nemotron-ultra-253b',
-    nemotron: 'nvidia/nemotron-ultra-253b',
+    // Free (BlockRun gateway free tier — refreshed 2026-04)
+    free: 'nvidia/glm-4.7',
+    glm4: 'nvidia/glm-4.7',
     'deepseek-free': 'nvidia/deepseek-v3.2',
-    devstral: 'nvidia/devstral-2-123b',
     'qwen-coder': 'nvidia/qwen3-coder-480b',
+    'qwen-think': 'nvidia/qwen3-next-80b-a3b-thinking',
     maverick: 'nvidia/llama-4-maverick',
+    'gpt-oss': 'nvidia/gpt-oss-120b',
+    'gpt-oss-small': 'nvidia/gpt-oss-20b',
+    'mistral-small': 'nvidia/mistral-small-4-119b',
+    // Backward-compatibility aliases for models the gateway retired.
+    // Map to the closest current free model so old session records + user
+    // muscle memory keep working.
+    nemotron: 'nvidia/glm-4.7',
+    devstral: 'nvidia/qwen3-coder-480b',
     // Others
     minimax: 'minimax/minimax-m2.7',
     glm: 'zai/glm-5.1',
@@ -134,12 +142,14 @@ export const PICKER_CATEGORIES = [
     {
         category: '🆓 Free (no USDC needed)',
         models: [
-            { id: 'nvidia/nemotron-ultra-253b', shortcut: 'free', label: 'Nemotron Ultra 253B', price: 'FREE' },
+            { id: 'nvidia/glm-4.7', shortcut: 'free', label: 'GLM-4.7', price: 'FREE' },
+            { id: 'nvidia/qwen3-next-80b-a3b-thinking', shortcut: 'qwen-think', label: 'Qwen3-Next 80B Thinking', price: 'FREE' },
             { id: 'nvidia/qwen3-coder-480b', shortcut: 'qwen-coder', label: 'Qwen3 Coder 480B', price: 'FREE' },
-            { id: 'nvidia/devstral-2-123b', shortcut: 'devstral', label: 'Devstral 2 123B', price: 'FREE' },
             { id: 'nvidia/llama-4-maverick', shortcut: 'maverick', label: 'Llama 4 Maverick', price: 'FREE' },
             { id: 'nvidia/deepseek-v3.2', shortcut: 'deepseek-free', label: 'DeepSeek V3.2', price: 'FREE' },
-            { id: 'nvidia/gpt-oss-120b', shortcut: 'gpt-oss', label: 'GPT OSS 120B', price: 'FREE' },
+            { id: 'nvidia/gpt-oss-120b', shortcut: 'gpt-oss', label: 'GPT-OSS 120B', price: 'FREE' },
+            { id: 'nvidia/gpt-oss-20b', shortcut: 'gpt-oss-small', label: 'GPT-OSS 20B', price: 'FREE' },
+            { id: 'nvidia/mistral-small-4-119b', shortcut: 'mistral-small', label: 'Mistral Small 4 119B', price: 'FREE' },
         ],
     },
 ];

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@blockrun/franklin",
-  "version": "3.8.21",
+  "version": "3.8.23",
   "description": "Franklin — The AI agent with a wallet. Spends USDC autonomously to get real work done. Pay per action, no subscriptions.",
   "type": "module",
   "exports": {
@@ -20,6 +20,7 @@
   },
   "files": [
     "dist",
+    "assets",
     "README.md",
     "LICENSE"
   ],