@blockrun/franklin 3.8.21 → 3.8.23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Binary file
Binary file
Binary file
@@ -427,7 +427,7 @@ function formatCompactSummary(raw) {
427
427
  function pickCompactionModel(primaryModel) {
428
428
  // Free parent → free compaction (no silent charge)
429
429
  if (primaryModel.startsWith('nvidia/') || primaryModel === 'blockrun/free') {
430
- return 'nvidia/nemotron-ultra-253b';
430
+ return 'nvidia/glm-4.7';
431
431
  }
432
432
  // Use cheapest capable model for summarization to save cost
433
433
  // Tier down: opus/pro → sonnet, sonnet → haiku, everything else → flash (cheapest capable)
@@ -168,7 +168,7 @@ export function parseGroundingResponse(raw) {
168
168
  * choice so both quality gates have the same cost profile. Override via
169
169
  * `FRANKLIN_EVALUATOR_MODEL` to experiment with accuracy/cost trade-offs. */
170
170
  export function evaluatorModel() {
171
- return process.env.FRANKLIN_EVALUATOR_MODEL || 'nvidia/nemotron-ultra-253b';
171
+ return process.env.FRANKLIN_EVALUATOR_MODEL || 'nvidia/llama-4-maverick';
172
172
  }
173
173
  // ─── Run grounding check ─────────────────────────────────────────────────
174
174
  const MAX_EVAL_TOKENS = 512;
@@ -27,7 +27,9 @@
27
27
  */
28
28
  import { getStockPrice, getPrice } from '../trading/data.js';
29
29
  // ─── Classifier ──────────────────────────────────────────────────────────
30
- const CLASSIFIER_MODEL = process.env.FRANKLIN_PREFETCH_MODEL || 'nvidia/nemotron-ultra-253b';
30
+ // llama-4-maverick: same rationale as the router classifier — emits plain
31
+ // text under tight max_tokens rather than routing through thinking blocks.
32
+ const CLASSIFIER_MODEL = process.env.FRANKLIN_PREFETCH_MODEL || 'nvidia/llama-4-maverick';
31
33
  const CLASSIFIER_TIMEOUT_MS = 2_500;
32
34
  const CLASSIFIER_PROMPT = `You extract PREFETCH INTENT from a user message for a CLI agent that has live market-data tools.
33
35
 
@@ -98,8 +100,12 @@ export async function classifyIntent(userInput, client) {
98
100
  if (process.env.FRANKLIN_NO_PREFETCH === '1')
99
101
  return null;
100
102
  const trimmed = userInput.trim();
101
- // Short inputs (<12 chars) are rarely asking for market data skip the call entirely.
102
- if (trimmed.length < 12)
103
+ // Only the cheapest gate skip very short inputs that can't be a real
104
+ // market question ("hi", "ok", "thanks"). 6 chars covers those while
105
+ // still letting short-form Chinese / ticker prompts through, e.g.
106
+ // "BTC 价格" (6), "CRCL 多少" (7). Longer prompts all route to the LLM
107
+ // classifier, which decides NONE cheaply when not market-related.
108
+ if (trimmed.length < 6)
103
109
  return null;
104
110
  const ctrl = new AbortController();
105
111
  const timer = setTimeout(() => ctrl.abort(), CLASSIFIER_TIMEOUT_MS);
package/dist/agent/llm.js CHANGED
@@ -205,12 +205,12 @@ export class ModelClient {
205
205
  // Static fallback if router is unavailable. Default to FREE model so
206
206
  // users aren't silently charged when their intended model can't resolve.
207
207
  const FALLBACKS = {
208
- 'blockrun/auto': 'nvidia/nemotron-ultra-253b',
209
- 'blockrun/eco': 'nvidia/nemotron-ultra-253b',
208
+ 'blockrun/auto': 'nvidia/glm-4.7',
209
+ 'blockrun/eco': 'nvidia/glm-4.7',
210
210
  'blockrun/premium': 'anthropic/claude-sonnet-4.6',
211
- 'blockrun/free': 'nvidia/nemotron-ultra-253b',
211
+ 'blockrun/free': 'nvidia/glm-4.7',
212
212
  };
213
- return FALLBACKS[model] || 'nvidia/nemotron-ultra-253b';
213
+ return FALLBACKS[model] || 'nvidia/glm-4.7';
214
214
  }
215
215
  async *streamCompletion(request, signal) {
216
216
  // Resolve virtual models before any API call
@@ -823,7 +823,7 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
823
823
  const hasTools = responseParts.some(p => p.type === 'tool_use');
824
824
  const hasThinking = responseParts.some(p => p.type === 'thinking');
825
825
  if (!hasText && !hasTools && !hasThinking) {
826
- const EMPTY_FALLBACK_MODELS = ['nvidia/qwen3-coder-480b', 'nvidia/nemotron-ultra-253b', 'zai/glm-5.1'];
826
+ const EMPTY_FALLBACK_MODELS = ['nvidia/qwen3-coder-480b', 'nvidia/glm-4.7', 'zai/glm-5.1'];
827
827
  const nextModel = EMPTY_FALLBACK_MODELS.find(m => m !== config.model && !turnFailedModels.has(m));
828
828
  if (nextModel && recoveryAttempts < 2) {
829
829
  recoveryAttempts++;
@@ -922,7 +922,7 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
922
922
  if (lastRoutedCategory) {
923
923
  recordOutcome(lastRoutedCategory, config.model, 'payment');
924
924
  }
925
- const FREE_MODELS = ['nvidia/qwen3-coder-480b', 'nvidia/nemotron-ultra-253b', 'nvidia/devstral-2-123b'];
925
+ const FREE_MODELS = ['nvidia/glm-4.7', 'nvidia/qwen3-coder-480b', 'nvidia/llama-4-maverick', 'nvidia/qwen3-next-80b-a3b-thinking'];
926
926
  const nextFree = FREE_MODELS.find(m => !turnFailedModels.has(m));
927
927
  if (nextFree) {
928
928
  const oldModel = config.model;
@@ -113,7 +113,7 @@ export async function runVerification(history, handlers, client, config) {
113
113
  ];
114
114
  config.onEvent?.({ kind: 'text_delta', text: '\n*Verifying...*\n' });
115
115
  // Use cheap model for verification
116
- const verificationModel = 'nvidia/nemotron-ultra-253b'; // Free model to keep cost zero
116
+ const verificationModel = 'nvidia/glm-4.7'; // Free model to keep cost zero
117
117
  try {
118
118
  // Simple single-turn verification call
119
119
  const response = await client.complete({
@@ -159,7 +159,7 @@ async function runCommand(options) {
159
159
  const chain = loadChain();
160
160
  const apiUrl = API_URLS[chain];
161
161
  const appConfig = loadAppConfig();
162
- const model = options.model || appConfig['default-model'] || 'nvidia/nemotron-ultra-253b';
162
+ const model = options.model || appConfig['default-model'] || 'nvidia/glm-4.7';
163
163
  console.log(chalk.dim(` Model: ${model}`));
164
164
  console.log('');
165
165
  let result;
@@ -124,11 +124,17 @@ export async function startCommand(options) {
124
124
  process.exitCode = exitCode;
125
125
  return;
126
126
  }
127
- // Warn when a paid model is active so users know they'll be charged
127
+ // Warn when a paid model is active so users know they'll be charged.
128
+ // Set members = BlockRun gateway's current free tier (refreshed 2026-04).
128
129
  const FREE_MODELS = new Set([
129
- 'nvidia/nemotron-ultra-253b',
130
+ 'nvidia/glm-4.7',
131
+ 'nvidia/qwen3-next-80b-a3b-thinking',
130
132
  'nvidia/qwen3-coder-480b',
131
- 'nvidia/devstral-2-123b',
133
+ 'nvidia/mistral-small-4-119b',
134
+ 'nvidia/llama-4-maverick',
135
+ 'nvidia/deepseek-v3.2',
136
+ 'nvidia/gpt-oss-120b',
137
+ 'nvidia/gpt-oss-20b',
132
138
  'blockrun/free',
133
139
  ]);
134
140
  if (!FREE_MODELS.has(model)) {
@@ -36,7 +36,7 @@ export async function telegramCommand(opts) {
36
36
  // Model: --model flag > config default > free default.
37
37
  const model = opts.model ||
38
38
  config['default-model'] ||
39
- 'nvidia/nemotron-ultra-253b';
39
+ 'nvidia/glm-4.7';
40
40
  const workingDir = process.cwd();
41
41
  const systemInstructions = assembleInstructions(workingDir, model);
42
42
  // Resume the most recent session tagged for THIS owner so a process
@@ -9,9 +9,9 @@ import { loadLearnings, mergeLearning, saveLearnings, loadSkills, saveSkill } fr
9
9
  // Free models for learning extraction — JSON extraction is simple enough.
10
10
  // Ordered by reliability: try the best free model first, fall back to others.
11
11
  const EXTRACTION_MODELS = [
12
- 'nvidia/nemotron-ultra-253b', // Best free model for structured output
12
+ 'nvidia/glm-4.7', // Best free model for structured output
13
13
  'nvidia/qwen3-coder-480b', // Strong at JSON tasks
14
- 'nvidia/devstral-2-123b', // Fallback
14
+ 'nvidia/llama-4-maverick', // Fallback
15
15
  ];
16
16
  const VALID_CATEGORIES = new Set([
17
17
  'language', 'model_preference', 'tool_pattern', 'coding_style',
@@ -5,7 +5,7 @@
5
5
  * Plugins implement Workflow; core orchestrates execution and provides infrastructure.
6
6
  */
7
7
  export const DEFAULT_MODEL_TIERS = {
8
- free: 'nvidia/nemotron-ultra-253b',
9
- cheap: 'nvidia/nemotron-ultra-253b', // Was glm-5.1 ($0.001/call). Free by default; opt-in to paid.
8
+ free: 'nvidia/glm-4.7',
9
+ cheap: 'nvidia/glm-4.7', // Free by default; opt-in to paid flat-rate via 'zai/glm-5.1'.
10
10
  premium: 'anthropic/claude-sonnet-4.6',
11
11
  };
package/dist/pricing.js CHANGED
@@ -8,18 +8,21 @@ export const MODEL_PRICING = {
8
8
  'blockrun/eco': { input: 0.2, output: 1.0 },
9
9
  'blockrun/premium': { input: 3.0, output: 15.0 },
10
10
  'blockrun/free': { input: 0, output: 0 },
11
- // FREE - NVIDIA models
11
+ // FREE BlockRun gateway free tier (refreshed 2026-04)
12
+ 'nvidia/glm-4.7': { input: 0, output: 0 },
13
+ 'nvidia/qwen3-next-80b-a3b-thinking': { input: 0, output: 0 },
14
+ 'nvidia/qwen3-coder-480b': { input: 0, output: 0 },
15
+ 'nvidia/mistral-small-4-119b': { input: 0, output: 0 },
16
+ 'nvidia/llama-4-maverick': { input: 0, output: 0 },
17
+ 'nvidia/deepseek-v3.2': { input: 0, output: 0 },
12
18
  'nvidia/gpt-oss-120b': { input: 0, output: 0 },
13
19
  'nvidia/gpt-oss-20b': { input: 0, output: 0 },
20
+ // Retired (kept at 0 for legacy session-cost records; gateway no longer serves these).
14
21
  'nvidia/nemotron-ultra-253b': { input: 0, output: 0 },
22
+ 'nvidia/devstral-2-123b': { input: 0, output: 0 },
15
23
  'nvidia/nemotron-3-super-120b': { input: 0, output: 0 },
16
24
  'nvidia/nemotron-super-49b': { input: 0, output: 0 },
17
- 'nvidia/deepseek-v3.2': { input: 0, output: 0 },
18
25
  'nvidia/mistral-large-3-675b': { input: 0, output: 0 },
19
- 'nvidia/qwen3-coder-480b': { input: 0, output: 0 },
20
- 'nvidia/devstral-2-123b': { input: 0, output: 0 },
21
- 'nvidia/glm-4.7': { input: 0, output: 0 },
22
- 'nvidia/llama-4-maverick': { input: 0, output: 0 },
23
26
  // Anthropic
24
27
  'anthropic/claude-sonnet-4.6': { input: 3.0, output: 15.0 },
25
28
  'anthropic/claude-opus-4.7': { input: 5.0, output: 25.0 },
@@ -19,7 +19,7 @@ export const DEFAULT_FALLBACK_CONFIG = {
19
19
  chain: [
20
20
  'deepseek/deepseek-chat', // Direct fallback — cheap & reliable
21
21
  'google/gemini-2.5-flash', // Fast & capable
22
- 'nvidia/nemotron-ultra-253b', // Free model as ultimate fallback
22
+ 'nvidia/glm-4.7', // Free model as ultimate fallback
23
23
  ],
24
24
  retryOn: [429, 500, 502, 503, 504, 529],
25
25
  maxRetries: 5,
@@ -94,13 +94,19 @@ const MODEL_SHORTCUTS = {
94
94
  // DeepSeek
95
95
  deepseek: 'deepseek/deepseek-chat',
96
96
  r1: 'deepseek/deepseek-reasoner',
97
- // Free models
98
- free: 'nvidia/nemotron-ultra-253b',
99
- nemotron: 'nvidia/nemotron-ultra-253b',
97
+ // Free models (gateway free tier — refreshed 2026-04)
98
+ free: 'nvidia/glm-4.7',
99
+ glm4: 'nvidia/glm-4.7',
100
100
  'deepseek-free': 'nvidia/deepseek-v3.2',
101
- devstral: 'nvidia/devstral-2-123b',
102
101
  'qwen-coder': 'nvidia/qwen3-coder-480b',
102
+ 'qwen-think': 'nvidia/qwen3-next-80b-a3b-thinking',
103
103
  maverick: 'nvidia/llama-4-maverick',
104
+ 'gpt-oss': 'nvidia/gpt-oss-120b',
105
+ 'gpt-oss-small': 'nvidia/gpt-oss-20b',
106
+ 'mistral-small': 'nvidia/mistral-small-4-119b',
107
+ // Retired-gateway-model aliases (map to closest current).
108
+ nemotron: 'nvidia/glm-4.7',
109
+ devstral: 'nvidia/qwen3-coder-480b',
104
110
  // Minimax
105
111
  minimax: 'minimax/minimax-m2.7',
106
112
  // Others
@@ -66,12 +66,12 @@ const AUTO_TIERS = {
66
66
  };
67
67
  const ECO_TIERS = {
68
68
  SIMPLE: {
69
- primary: 'nvidia/nemotron-ultra-253b',
69
+ primary: 'nvidia/glm-4.7',
70
70
  fallback: ['nvidia/gpt-oss-120b', 'nvidia/deepseek-v3.2'],
71
71
  },
72
72
  MEDIUM: {
73
73
  primary: 'google/gemini-2.5-flash-lite',
74
- fallback: ['nvidia/nemotron-ultra-253b', 'nvidia/qwen3-coder-480b'],
74
+ fallback: ['nvidia/glm-4.7', 'nvidia/qwen3-coder-480b'],
75
75
  },
76
76
  COMPLEX: {
77
77
  primary: 'google/gemini-2.5-flash-lite',
@@ -79,7 +79,7 @@ const ECO_TIERS = {
79
79
  },
80
80
  REASONING: {
81
81
  primary: 'xai/grok-4-1-fast-reasoning',
82
- fallback: ['deepseek/deepseek-reasoner', 'nvidia/nemotron-ultra-253b'],
82
+ fallback: ['deepseek/deepseek-reasoner', 'nvidia/qwen3-next-80b-a3b-thinking'],
83
83
  },
84
84
  };
85
85
  const PREMIUM_TIERS = {
@@ -283,7 +283,10 @@ function classicRouteRequest(prompt, profile) {
283
283
  // - Exposed via async `routeRequestAsync(prompt, profile, classify?)`. Callers
284
284
  // that can't be async (proxy, LLM-client bootstrap) keep using the sync
285
285
  // `routeRequest`, which silently does keyword-only routing.
286
- const CLASSIFIER_MODEL = process.env.FRANKLIN_ROUTER_MODEL || 'nvidia/nemotron-ultra-253b';
286
+ // llama-4-maverick: clean one-word classification output. glm-4.7 + qwen-
287
+ // thinking emit reasoning into thinking blocks and leave text empty under
288
+ // tight max_tokens — fine for chat, wrong shape for single-word dispatch.
289
+ const CLASSIFIER_MODEL = process.env.FRANKLIN_ROUTER_MODEL || 'nvidia/llama-4-maverick';
287
290
  const CLASSIFIER_TIMEOUT_MS = 2_500;
288
291
  const CLASSIFIER_SYSTEM = `You classify a user's message into ONE routing tier for a CLI agent. Reply with EXACTLY ONE WORD from the allowed set. No explanation, no punctuation, no quotes.
289
292
 
@@ -395,7 +398,7 @@ export function routeRequest(prompt, profile = 'auto') {
395
398
  // Free profile — always use free model
396
399
  if (profile === 'free') {
397
400
  return {
398
- model: 'nvidia/nemotron-ultra-253b',
401
+ model: 'nvidia/glm-4.7',
399
402
  tier: 'SIMPLE',
400
403
  confidence: 1.0,
401
404
  signals: ['free-profile'],
@@ -467,7 +470,7 @@ export function getFallbackChain(tier, profile = 'auto') {
467
470
  tierConfigs = PREMIUM_TIERS;
468
471
  break;
469
472
  case 'free':
470
- return ['nvidia/nemotron-ultra-253b'];
473
+ return ['nvidia/glm-4.7'];
471
474
  default:
472
475
  tierConfigs = AUTO_TIERS;
473
476
  }
package/dist/tools/moa.js CHANGED
@@ -14,13 +14,14 @@ import { ModelClient } from '../agent/llm.js';
14
14
  // ─── Configuration ────────────────────────────────────────────────────────
15
15
  /** Reference models — diverse, cheap/free models for parallel queries. */
16
16
  const REFERENCE_MODELS = [
17
- 'nvidia/nemotron-ultra-253b', // Free, strong reasoning
17
+ 'nvidia/glm-4.7', // Free, strong reasoning + coding
18
+ 'nvidia/qwen3-next-80b-a3b-thinking', // Free, explicit reasoning model
18
19
  'nvidia/qwen3-coder-480b', // Free, strong coding
19
20
  'google/gemini-2.5-flash', // Fast, cheap
20
21
  'deepseek/deepseek-chat', // Cheap, good reasoning
21
22
  ];
22
23
  /** Aggregator model — free by default. Users explicitly pass `aggregator` to upgrade. */
23
- const AGGREGATOR_MODEL = 'nvidia/nemotron-ultra-253b';
24
+ const AGGREGATOR_MODEL = 'nvidia/glm-4.7';
24
25
  /** Max tokens per reference response. */
25
26
  const REFERENCE_MAX_TOKENS = 4096;
26
27
  /** Max tokens for aggregator. */
@@ -18,7 +18,7 @@ async function execute(input, ctx) {
18
18
  return { output: 'Error: prompt is required', isError: true };
19
19
  }
20
20
  // Resolve which model the sub-agent will actually run on
21
- const subModel = model || registeredParentModel || 'nvidia/nemotron-ultra-253b';
21
+ const subModel = model || registeredParentModel || 'nvidia/glm-4.7';
22
22
  // Cost gate: if parent is free but sub-agent wants paid, ask user first.
23
23
  // Prevents silent charges when the agent decides to spawn a more capable sub-agent.
24
24
  if (isFreeModel(registeredParentModel) && !isFreeModel(subModel)) {
@@ -46,13 +46,21 @@ export const MODEL_SHORTCUTS = {
46
46
  // DeepSeek
47
47
  deepseek: 'deepseek/deepseek-chat',
48
48
  r1: 'deepseek/deepseek-reasoner',
49
- // Free
50
- free: 'nvidia/nemotron-ultra-253b',
51
- nemotron: 'nvidia/nemotron-ultra-253b',
49
+ // Free (BlockRun gateway free tier — refreshed 2026-04)
50
+ free: 'nvidia/glm-4.7',
51
+ glm4: 'nvidia/glm-4.7',
52
52
  'deepseek-free': 'nvidia/deepseek-v3.2',
53
- devstral: 'nvidia/devstral-2-123b',
54
53
  'qwen-coder': 'nvidia/qwen3-coder-480b',
54
+ 'qwen-think': 'nvidia/qwen3-next-80b-a3b-thinking',
55
55
  maverick: 'nvidia/llama-4-maverick',
56
+ 'gpt-oss': 'nvidia/gpt-oss-120b',
57
+ 'gpt-oss-small': 'nvidia/gpt-oss-20b',
58
+ 'mistral-small': 'nvidia/mistral-small-4-119b',
59
+ // Backward-compatibility aliases for models the gateway retired.
60
+ // Map to the closest current free model so old session records + user
61
+ // muscle memory keep working.
62
+ nemotron: 'nvidia/glm-4.7',
63
+ devstral: 'nvidia/qwen3-coder-480b',
56
64
  // Others
57
65
  minimax: 'minimax/minimax-m2.7',
58
66
  glm: 'zai/glm-5.1',
@@ -134,12 +142,14 @@ export const PICKER_CATEGORIES = [
134
142
  {
135
143
  category: '🆓 Free (no USDC needed)',
136
144
  models: [
137
- { id: 'nvidia/nemotron-ultra-253b', shortcut: 'free', label: 'Nemotron Ultra 253B', price: 'FREE' },
145
+ { id: 'nvidia/glm-4.7', shortcut: 'free', label: 'GLM-4.7', price: 'FREE' },
146
+ { id: 'nvidia/qwen3-next-80b-a3b-thinking', shortcut: 'qwen-think', label: 'Qwen3-Next 80B Thinking', price: 'FREE' },
138
147
  { id: 'nvidia/qwen3-coder-480b', shortcut: 'qwen-coder', label: 'Qwen3 Coder 480B', price: 'FREE' },
139
- { id: 'nvidia/devstral-2-123b', shortcut: 'devstral', label: 'Devstral 2 123B', price: 'FREE' },
140
148
  { id: 'nvidia/llama-4-maverick', shortcut: 'maverick', label: 'Llama 4 Maverick', price: 'FREE' },
141
149
  { id: 'nvidia/deepseek-v3.2', shortcut: 'deepseek-free', label: 'DeepSeek V3.2', price: 'FREE' },
142
- { id: 'nvidia/gpt-oss-120b', shortcut: 'gpt-oss', label: 'GPT OSS 120B', price: 'FREE' },
150
+ { id: 'nvidia/gpt-oss-120b', shortcut: 'gpt-oss', label: 'GPT-OSS 120B', price: 'FREE' },
151
+ { id: 'nvidia/gpt-oss-20b', shortcut: 'gpt-oss-small', label: 'GPT-OSS 20B', price: 'FREE' },
152
+ { id: 'nvidia/mistral-small-4-119b', shortcut: 'mistral-small', label: 'Mistral Small 4 119B', price: 'FREE' },
143
153
  ],
144
154
  },
145
155
  ];
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@blockrun/franklin",
3
- "version": "3.8.21",
3
+ "version": "3.8.23",
4
4
  "description": "Franklin — The AI agent with a wallet. Spends USDC autonomously to get real work done. Pay per action, no subscriptions.",
5
5
  "type": "module",
6
6
  "exports": {
@@ -20,6 +20,7 @@
20
20
  },
21
21
  "files": [
22
22
  "dist",
23
+ "assets",
23
24
  "README.md",
24
25
  "LICENSE"
25
26
  ],