@blockrun/franklin 3.8.44 → 3.9.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. package/README.md +3 -2
  2. package/dist/agent/commands.d.ts +5 -0
  3. package/dist/agent/commands.js +28 -0
  4. package/dist/agent/compact.js +1 -1
  5. package/dist/agent/context.js +1 -0
  6. package/dist/agent/llm.js +4 -4
  7. package/dist/agent/loop.js +40 -13
  8. package/dist/agent/verification.js +2 -2
  9. package/dist/commands/balance-retry.d.ts +15 -0
  10. package/dist/commands/balance-retry.js +20 -0
  11. package/dist/commands/skills.d.ts +8 -0
  12. package/dist/commands/skills.js +93 -0
  13. package/dist/commands/social.js +1 -1
  14. package/dist/commands/start.js +17 -13
  15. package/dist/commands/telegram.js +1 -1
  16. package/dist/index.js +9 -0
  17. package/dist/learnings/extractor.js +3 -3
  18. package/dist/plugin-sdk/workflow.js +2 -2
  19. package/dist/pricing.js +1 -1
  20. package/dist/proxy/fallback.js +1 -1
  21. package/dist/proxy/server.js +10 -10
  22. package/dist/router/index.js +8 -8
  23. package/dist/skills/bootstrap.d.ts +27 -0
  24. package/dist/skills/bootstrap.js +40 -0
  25. package/dist/skills/invoke.d.ts +23 -0
  26. package/dist/skills/invoke.js +38 -0
  27. package/dist/skills/loader.d.ts +21 -0
  28. package/dist/skills/loader.js +149 -0
  29. package/dist/skills/registry.d.ts +26 -0
  30. package/dist/skills/registry.js +54 -0
  31. package/dist/skills/types.d.ts +47 -0
  32. package/dist/skills/types.js +8 -0
  33. package/dist/skills-bundled/budget-grill/SKILL.md +24 -0
  34. package/dist/tools/index.js +2 -0
  35. package/dist/tools/moa.js +4 -4
  36. package/dist/tools/subagent.js +3 -3
  37. package/dist/tools/tool-categories.js +3 -0
  38. package/dist/tools/wallet.d.ts +23 -0
  39. package/dist/tools/wallet.js +63 -0
  40. package/dist/ui/app.js +3 -3
  41. package/dist/ui/model-picker.js +13 -17
  42. package/package.json +4 -3
package/README.md CHANGED
@@ -105,7 +105,7 @@ Concretely — $1 in USDC gets you roughly:
105
105
  - ~13M Gemini Flash tokens
106
106
  - ~20 DALL-E 3 images
107
107
  - ~40 Exa neural web searches
108
- - Unlimited NVIDIA GPT-OSS (free tier, no wallet needed)
108
+ - Unlimited agent-tested NVIDIA free tier (Qwen3 Coder + Llama 4 Maverick, no wallet needed)
109
109
 
110
110
  ---
111
111
 
@@ -440,7 +440,7 @@ src/
440
440
  Start with **zero dollars**. Franklin defaults to free NVIDIA models that need no wallet funding.
441
441
 
442
442
  ```bash
443
- franklin --model nvidia/qwen3-next-80b-a3b-thinking
443
+ franklin --model free
444
444
  ```
445
445
 
446
446
  When you fund the wallet, Franklin gets more purchasing power: Sonnet, Opus, GPT, Gemini, Grok, and paid tools like Exa, DALL-E, and CoinGecko Pro.
@@ -493,6 +493,7 @@ npm install
493
493
  npm run build
494
494
  npm test # deterministic local tests — no API calls
495
495
  npm run test:e2e # live e2e tests — free smoke works unfunded; paid tools need network + funded wallet
496
+ npm run test:free-models # live matrix across current free NVIDIA models
496
497
  node dist/index.js --help
497
498
  ```
498
499
 
@@ -8,6 +8,7 @@
8
8
  */
9
9
  import type { ModelClient } from './llm.js';
10
10
  import type { AgentConfig, Dialogue, StreamEvent } from './types.js';
11
+ import type { Registry } from '../skills/registry.js';
11
12
  type EventEmitter = (event: StreamEvent) => void;
12
13
  interface CommandContext {
13
14
  history: Dialogue[];
@@ -15,6 +16,10 @@ interface CommandContext {
15
16
  client: ModelClient;
16
17
  sessionId: string;
17
18
  onEvent: EventEmitter;
19
+ /** Skills loaded for this session — see src/skills/. */
20
+ skillRegistry?: Registry;
21
+ /** Runtime variables substituted into skill bodies before $ARGUMENTS. */
22
+ skillVars?: Record<string, string>;
18
23
  }
19
24
  interface CommandResult {
20
25
  handled: boolean;
@@ -15,6 +15,7 @@ import { forceCompact } from './compact.js';
15
15
  import { getStatsSummary } from '../stats/tracker.js';
16
16
  import { resolveModel } from '../ui/model-picker.js';
17
17
  import { listSessions, loadSessionHistory, } from '../session/storage.js';
18
+ import { matchSkill } from '../skills/invoke.js';
18
19
  // ─── Git helpers ──────────────────────────────────────────────────────────
19
20
  function gitExec(cmd, cwd, timeout = 5000, maxBuffer) {
20
21
  return execSync(cmd, {
@@ -197,6 +198,20 @@ const DIRECT_COMMANDS = {
197
198
  },
198
199
  '/help': (ctx) => {
199
200
  const ultrathinkOn = ctx.config.ultrathink;
201
+ let skillsBlock = '';
202
+ if (ctx.skillRegistry) {
203
+ const visible = ctx.skillRegistry
204
+ .list()
205
+ .filter((l) => !l.skill.disableModelInvocation);
206
+ if (visible.length > 0) {
207
+ skillsBlock =
208
+ `\n **Skills:**\n` +
209
+ visible
210
+ .map((l) => ` /${l.skill.name.padEnd(22)} ${l.skill.description}`)
211
+ .join('\n') +
212
+ `\n`;
213
+ }
214
+ }
200
215
  ctx.onEvent({ kind: 'text_delta', text: `**RunCode Commands**\n\n` +
201
216
  ` **Coding:** /commit /review /test /fix /debug /explain /search /find /refactor /scaffold\n` +
202
217
  ` **Git:** /push /pr /undo /status /diff /log /branch /stash /unstash\n` +
@@ -205,6 +220,7 @@ const DIRECT_COMMANDS = {
205
220
  ` **Power:** /ultrathink [query] /ultraplan /noplan /moa [query] /dump\n` +
206
221
  ` **Info:** /model /auto /wallet /cost /tokens /learnings /brain /mcp /doctor /version /bug /help\n` +
207
222
  ` **UI:** /clear /exit\n` +
223
+ skillsBlock +
208
224
  (ultrathinkOn ? `\n Ultrathink: ON\n` : '')
209
225
  });
210
226
  emitDone(ctx);
@@ -926,11 +942,23 @@ export async function handleSlashCommand(input, ctx) {
926
942
  return { handled: false, rewritten: rewrite(arg) };
927
943
  }
928
944
  }
945
+ // File-loaded skills — registered after built-ins so `/security` etc.
946
+ // are never shadowed by a user-installed skill of the same name.
947
+ if (ctx.skillRegistry) {
948
+ const skillResult = matchSkill(input, ctx.skillRegistry, ctx.skillVars ?? {});
949
+ if (skillResult) {
950
+ return { handled: false, rewritten: skillResult.rewritten };
951
+ }
952
+ }
929
953
  // Not a recognized command — suggest closest match
954
+ const skillNames = ctx.skillRegistry
955
+ ? ctx.skillRegistry.list().map((s) => `/${s.skill.name}`)
956
+ : [];
930
957
  const allCommands = [
931
958
  ...Object.keys(DIRECT_COMMANDS),
932
959
  ...Object.keys(REWRITE_COMMANDS),
933
960
  ...ARG_COMMANDS.map(c => c.prefix.trim()),
961
+ ...skillNames,
934
962
  '/branch', '/resume', '/model', '/auto', '/wallet', '/cost', '/help', '/clear', '/retry', '/exit', '/session-search', '/ssearch', '/failures',
935
963
  ];
936
964
  const cmd = input.split(/\s/)[0];
@@ -427,7 +427,7 @@ function formatCompactSummary(raw) {
427
427
  function pickCompactionModel(primaryModel) {
428
428
  // Free parent → free compaction (no silent charge)
429
429
  if (primaryModel.startsWith('nvidia/') || primaryModel === 'blockrun/free') {
430
- return 'nvidia/glm-4.7';
430
+ return 'nvidia/qwen3-coder-480b';
431
431
  }
432
432
  // Use cheapest capable model for summarization to save cost
433
433
  // Tier down: opus/pro → sonnet, sonnet → haiku, everything else → flash (cheapest capable)
@@ -16,6 +16,7 @@ You are an interactive agent — not a chatbot. Use the tools available to you t
16
16
 
17
17
  # Franklin has hands
18
18
  You run with live tools by default:
19
+ - **Wallet** — read your own chain, address, and USDC balance. Use this for any "what's my balance / how much money / 钱包余额 / wallet status" question instead of running \`franklin balance\` via Bash. Free, one call, never costs USDC.
19
20
  - **TradingMarket** — current stock / FX / crypto / commodity prices (BlockRun Gateway / Pyth; wallet pays automatically, $0.001/stock call, free for everything else).
20
21
  - **ExaAnswer / ExaSearch / ExaReadUrls** — cited current-events answers, semantic web search, clean URL content.
21
22
  - **WebSearch / WebFetch** — live web.
package/dist/agent/llm.js CHANGED
@@ -256,12 +256,12 @@ export class ModelClient {
256
256
  // Static fallback if router is unavailable. Default to FREE model so
257
257
  // users aren't silently charged when their intended model can't resolve.
258
258
  const FALLBACKS = {
259
- 'blockrun/auto': 'nvidia/glm-4.7',
260
- 'blockrun/eco': 'nvidia/glm-4.7',
259
+ 'blockrun/auto': 'nvidia/qwen3-coder-480b',
260
+ 'blockrun/eco': 'nvidia/qwen3-coder-480b',
261
261
  'blockrun/premium': 'anthropic/claude-sonnet-4.6',
262
- 'blockrun/free': 'nvidia/glm-4.7',
262
+ 'blockrun/free': 'nvidia/qwen3-coder-480b',
263
263
  };
264
- return FALLBACKS[model] || 'nvidia/glm-4.7';
264
+ return FALLBACKS[model] || 'nvidia/qwen3-coder-480b';
265
265
  }
266
266
  async *streamCompletion(request, signal) {
267
267
  // Resolve virtual models before any API call
@@ -6,6 +6,7 @@ import { ModelClient } from './llm.js';
6
6
  import { autoCompactIfNeeded, forceCompact, microCompact } from './compact.js';
7
7
  import { estimateHistoryTokens, updateActualTokens, resetTokenAnchor, getAnchoredTokenCount, getContextWindow, setEstimationModel } from './tokens.js';
8
8
  import { handleSlashCommand } from './commands.js';
9
+ import { loadBundledSkills, getSkillVars } from '../skills/bootstrap.js';
9
10
  import { reduceTokens } from './reduce.js';
10
11
  import { PermissionManager } from './permissions.js';
11
12
  import { StreamingExecutor } from './streaming-executor.js';
@@ -225,8 +226,8 @@ function getBackoffDelay(attempt, maxDelayMs = 32_000) {
225
226
  export function isWeakModel(model) {
226
227
  const m = model.toLowerCase();
227
228
  // NVIDIA-hosted open models have been observed confabulating tool calls.
228
- // `blockrun/free` and `blockrun/eco` resolve to nvidia/nemotron-ultra in
229
- // llm.ts, so catching the `nvidia/` prefix also catches those paths.
229
+ // `blockrun/free` resolves to an NVIDIA model before the API call, so
230
+ // catching the `nvidia/` prefix also catches the free-profile path.
230
231
  if (m.startsWith('nvidia/'))
231
232
  return true;
232
233
  if (m.includes('nemotron-ultra'))
@@ -297,6 +298,17 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
297
298
  let lastUserInput = ''; // For /retry
298
299
  config.baseModel = config.model; // User's intended model — /model command updates this
299
300
  let turnFailedModels = new Set(); // Models that failed this turn (cleared each new turn)
301
+ // ── Skills (file-loaded SKILL.md prompt-rewrite slash commands) ──
302
+ // Bundled-only in Phase 1 of the skills MVP. User-global and project-local
303
+ // discovery + the budget-cap-usd / cost-receipt enforcement contract land
304
+ // in Phase 2 — see docs/plans/2026-04-29-franklin-skills-mvp-design.md.
305
+ const skillBoot = loadBundledSkills();
306
+ if (skillBoot.errors.length > 0 && config.debug) {
307
+ for (const err of skillBoot.errors) {
308
+ onEvent({ kind: 'text_delta', text: `[skills] ${err.path}: ${err.error}\n` });
309
+ }
310
+ }
311
+ const skillRegistry = skillBoot.registry;
300
312
  // Track models that failed with 402 (payment required) across turns.
301
313
  // These persist until the session ends — unlike transient errors, payment failures
302
314
  // will keep failing until the user adds funds. Map stores failure timestamp for future TTL.
@@ -387,6 +399,19 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
387
399
  else {
388
400
  const cmdResult = await handleSlashCommand(input, {
389
401
  history, config, client, sessionId, onEvent,
402
+ skillRegistry,
403
+ skillVars: getSkillVars({
404
+ chain: config.chain,
405
+ perTurnCapUsd: (() => {
406
+ const raw = loadConfig()['max-turn-spend-usd'];
407
+ if (raw == null)
408
+ return 1.0;
409
+ const n = Number(raw);
410
+ if (!Number.isFinite(n))
411
+ return 1.0;
412
+ return n <= 0 ? Infinity : n;
413
+ })(),
414
+ }),
390
415
  });
391
416
  if (cmdResult.handled)
392
417
  continue;
@@ -498,15 +523,17 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
498
523
  // is treated as a typo and falls back to the safe default rather than
499
524
  // silently removing the wallet guard.
500
525
  //
501
- // Default raised from $0.25 → $1.00 in v3.8.42 the original ceiling
502
- // dated from when Franklin was mostly chat. Real workloads (multi-stage
503
- // dashboard scaffolds on sonnet, image-to-image edits, research-heavy
504
- // turns) routinely land in the $0.20–$0.80 range on a single legit
505
- // prompt. $1.00 is still meaningful as a runaway-protection guardrail
506
- // (catches the kind of failure v3.8.41's retry-policy was built for)
507
- // but doesn't impose a friction tax on every multi-stage task. Users
508
- // who liked the old ceiling can opt back in via the config.
509
- const TURN_SPEND_DEFAULT_USD = 1.0;
526
+ // Default lineage: $0.25 (≤ v3.8.41) → $1.00 (v3.8.42) $2.00 (v3.9.1).
527
+ // v3.8.42's $1.00 was tuned for "multi-stage dashboard scaffold" workloads
528
+ // landing in the $0.20–$0.80 range on a single prompt. Real coding turns
529
+ // since full BTC-style dashboards, multi-file refactors that pull in
530
+ // sonnet/opus on a COMPLEX-tier route routinely cross $1.00 in their
531
+ // first planning pass alone, leaving no headroom for the execution call
532
+ // and tripping the cap mid-task. $2.00 keeps the runaway-protection
533
+ // promise (catches the buggy-loop drain v3.8.41's retry-policy targets)
534
+ // while letting a legitimate complex coding task finish in one turn.
535
+ // Users who liked the old ceiling can pin it via the config.
536
+ const TURN_SPEND_DEFAULT_USD = 2.0;
510
537
  const turnSpendCap = (() => {
511
538
  const raw = loadConfig()['max-turn-spend-usd'];
512
539
  if (raw == null)
@@ -985,7 +1012,7 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
985
1012
  if (lastRoutedCategory) {
986
1013
  recordOutcome(lastRoutedCategory, config.model, 'payment');
987
1014
  }
988
- const FREE_MODELS = ['nvidia/glm-4.7', 'nvidia/qwen3-coder-480b', 'nvidia/llama-4-maverick', 'nvidia/qwen3-next-80b-a3b-thinking'];
1015
+ const FREE_MODELS = ['nvidia/qwen3-coder-480b', 'nvidia/llama-4-maverick', 'nvidia/glm-4.7'];
989
1016
  const nextFree = FREE_MODELS.find(m => !turnFailedModels.has(m));
990
1017
  if (nextFree) {
991
1018
  const oldModel = config.model;
@@ -1033,7 +1060,7 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
1033
1060
  onEvent({
1034
1061
  kind: 'text_delta',
1035
1062
  text: `\n\n⚠️ Turn spend limit reached ($${turnSpend.toFixed(3)} > $${MAX_TURN_SPEND_USD}). Stopping to protect your wallet.\n` +
1036
- `Raise the cap with \`franklin config set max-turn-spend-usd 2.0\` (or \`0\` to disable), then \`/retry\`.\n`,
1063
+ `Raise the cap with \`franklin config set max-turn-spend-usd 4.0\` (or \`0\` to disable), then \`/retry\`.\n`,
1037
1064
  });
1038
1065
  onEvent({ kind: 'turn_done', reason: 'budget' });
1039
1066
  break;
@@ -112,8 +112,8 @@ export async function runVerification(history, handlers, client, config) {
112
112
  },
113
113
  ];
114
114
  config.onEvent?.({ kind: 'text_delta', text: '\n*Verifying...*\n' });
115
- // Use cheap model for verification
116
- const verificationModel = 'nvidia/glm-4.7'; // Free model to keep cost zero
115
+ // Use agent-tested free model for verification.
116
+ const verificationModel = 'nvidia/qwen3-coder-480b';
117
117
  try {
118
118
  // Simple single-turn verification call
119
119
  const response = await client.complete({
@@ -0,0 +1,15 @@
1
+ /**
2
+ * Wallet-balance retry helper.
3
+ *
4
+ * Used by the agent UI's startup balance fetch and the post-turn refresh.
5
+ * Some wallet client paths return `0` transiently — for example, when the
6
+ * SDK is queried before the chain provider has finished initializing — even
7
+ * when the on-chain balance is non-zero. A single defensive retry catches
8
+ * that case without lengthening the path for a genuinely empty wallet:
9
+ * empty wallets still resolve to `0` in roughly two RPC round-trips.
10
+ */
11
+ export interface RetryOptions {
12
+ /** Delay between the first and second attempt, in milliseconds. */
13
+ delayMs?: number;
14
+ }
15
+ export declare function retryFetchBalance(fetchOnce: () => Promise<number>, opts?: RetryOptions): Promise<number>;
@@ -0,0 +1,20 @@
1
+ /**
2
+ * Wallet-balance retry helper.
3
+ *
4
+ * Used by the agent UI's startup balance fetch and the post-turn refresh.
5
+ * Some wallet client paths return `0` transiently — for example, when the
6
+ * SDK is queried before the chain provider has finished initializing — even
7
+ * when the on-chain balance is non-zero. A single defensive retry catches
8
+ * that case without lengthening the path for a genuinely empty wallet:
9
+ * empty wallets still resolve to `0` in roughly two RPC round-trips.
10
+ */
11
+ export async function retryFetchBalance(fetchOnce, opts = {}) {
12
+ const first = await fetchOnce();
13
+ if (first !== 0)
14
+ return first;
15
+ await sleep(opts.delayMs ?? 750);
16
+ return fetchOnce();
17
+ }
18
+ function sleep(ms) {
19
+ return new Promise((resolve) => setTimeout(resolve, ms));
20
+ }
@@ -0,0 +1,8 @@
1
+ /**
2
+ * `franklin skills` — list and inspect SKILL.md files Franklin would load
3
+ * during a session. Phase 1 of the skills MVP — bundled only.
4
+ */
5
+ export interface SkillsCommandOptions {
6
+ json?: boolean;
7
+ }
8
+ export declare function skillsCommand(action: string | undefined, arg: string | undefined, opts?: SkillsCommandOptions): Promise<void>;
@@ -0,0 +1,93 @@
1
+ /**
2
+ * `franklin skills` — list and inspect SKILL.md files Franklin would load
3
+ * during a session. Phase 1 of the skills MVP — bundled only.
4
+ */
5
+ import chalk from 'chalk';
6
+ import { loadBundledSkills } from '../skills/bootstrap.js';
7
+ export async function skillsCommand(action, arg, opts = {}) {
8
+ const sub = action ?? 'list';
9
+ if (sub === 'list') {
10
+ runList(opts);
11
+ return;
12
+ }
13
+ if (sub === 'which') {
14
+ runWhich(arg);
15
+ return;
16
+ }
17
+ console.log(chalk.red(`Unknown skills subcommand: ${sub}`));
18
+ console.log('Usage: franklin skills [list|which <name>]');
19
+ process.exit(1);
20
+ }
21
+ function runList(opts) {
22
+ const { registry, errors } = loadBundledSkills();
23
+ const skills = registry.list();
24
+ if (opts.json) {
25
+ process.stdout.write(JSON.stringify({
26
+ skills: skills.map((l) => ({
27
+ name: l.skill.name,
28
+ description: l.skill.description,
29
+ source: l.source,
30
+ path: l.path,
31
+ warnings: l.warnings,
32
+ costReceipt: l.skill.costReceipt ?? false,
33
+ budgetCapUsd: l.skill.budgetCapUsd ?? null,
34
+ disableModelInvocation: l.skill.disableModelInvocation ?? false,
35
+ })),
36
+ errors,
37
+ shadowed: registry.shadowed().map((s) => ({
38
+ winner: { name: s.winner.skill.name, source: s.winner.source, path: s.winner.path },
39
+ loser: { name: s.loser.skill.name, source: s.loser.source, path: s.loser.path },
40
+ })),
41
+ }, null, 2) + '\n');
42
+ return;
43
+ }
44
+ if (skills.length === 0) {
45
+ console.log(chalk.dim('No skills loaded.'));
46
+ }
47
+ else {
48
+ console.log(chalk.bold(`Skills (${skills.length})`));
49
+ console.log('');
50
+ const nameWidth = Math.max(...skills.map((l) => l.skill.name.length), 4);
51
+ for (const l of skills) {
52
+ const flags = [];
53
+ if (l.skill.costReceipt)
54
+ flags.push('receipt');
55
+ if (typeof l.skill.budgetCapUsd === 'number')
56
+ flags.push(`cap $${l.skill.budgetCapUsd.toFixed(2)}`);
57
+ if (l.skill.disableModelInvocation)
58
+ flags.push('manual-only');
59
+ const flagStr = flags.length > 0 ? chalk.dim(` [${flags.join(', ')}]`) : '';
60
+ const sourceTag = chalk.dim(`(${l.source})`);
61
+ console.log(` ${chalk.cyan('/' + l.skill.name.padEnd(nameWidth))} ${l.skill.description}${flagStr} ${sourceTag}`);
62
+ }
63
+ }
64
+ const shadowed = registry.shadowed();
65
+ if (shadowed.length > 0) {
66
+ console.log('');
67
+ console.log(chalk.yellow('Shadowed (lost to a higher-precedence source):'));
68
+ for (const s of shadowed) {
69
+ console.log(` /${s.loser.skill.name} from ${s.loser.source} ` +
70
+ chalk.dim(`(winner: ${s.winner.source} at ${s.winner.path})`));
71
+ }
72
+ }
73
+ if (errors.length > 0) {
74
+ console.log('');
75
+ console.log(chalk.red(`Failed to load (${errors.length}):`));
76
+ for (const e of errors) {
77
+ console.log(` ${e.path}: ${e.error}`);
78
+ }
79
+ }
80
+ }
81
+ function runWhich(name) {
82
+ if (!name) {
83
+ console.log(chalk.red('Usage: franklin skills which <name>'));
84
+ process.exit(1);
85
+ }
86
+ const { registry } = loadBundledSkills();
87
+ const skill = registry.lookup(name);
88
+ if (!skill) {
89
+ console.log(chalk.red(`Skill not found: ${name}`));
90
+ process.exit(1);
91
+ }
92
+ console.log(skill.path);
93
+ }
@@ -159,7 +159,7 @@ async function runCommand(options) {
159
159
  const chain = loadChain();
160
160
  const apiUrl = API_URLS[chain];
161
161
  const appConfig = loadAppConfig();
162
- const model = options.model || appConfig['default-model'] || 'nvidia/glm-4.7';
162
+ const model = options.model || appConfig['default-model'] || 'nvidia/qwen3-coder-480b';
163
163
  console.log(chalk.dim(` Model: ${model}`));
164
164
  console.log('');
165
165
  let result;
@@ -1,6 +1,7 @@
1
1
  import chalk from 'chalk';
2
2
  import { getOrCreateWallet, getOrCreateSolanaWallet } from '@blockrun/llm';
3
3
  import { loadChain, API_URLS } from '../config.js';
4
+ import { retryFetchBalance } from './balance-retry.js';
4
5
  import { flushStats, loadStats } from '../stats/tracker.js';
5
6
  import { OPUS_PRICING } from '../pricing.js';
6
7
  import { loadConfig } from './config.js';
@@ -125,7 +126,7 @@ export async function startCommand(options) {
125
126
  return;
126
127
  }
127
128
  // Warn when a paid model is active so users know they'll be charged.
128
- // Set members = BlockRun gateway's current free tier (refreshed 2026-04).
129
+ // Set members = BlockRun gateway's current live free tier (refreshed 2026-04).
129
130
  const FREE_MODELS = new Set([
130
131
  'nvidia/glm-4.7',
131
132
  'nvidia/qwen3-next-80b-a3b-thinking',
@@ -133,8 +134,6 @@ export async function startCommand(options) {
133
134
  'nvidia/mistral-small-4-119b',
134
135
  'nvidia/llama-4-maverick',
135
136
  'nvidia/deepseek-v3.2',
136
- 'nvidia/gpt-oss-120b',
137
- 'nvidia/gpt-oss-20b',
138
137
  'blockrun/free',
139
138
  ]);
140
139
  if (!FREE_MODELS.has(model)) {
@@ -183,20 +182,25 @@ export async function startCommand(options) {
183
182
  console.log(chalk.dim(' Dashboard: ') + (panelUrl ? chalk.cyan(panelUrl) : chalk.cyan('franklin panel') + chalk.dim(' → http://localhost:3100')));
184
183
  console.log(chalk.dim(' Help: ') + chalk.cyan('/help'));
185
184
  console.log('');
186
- // Balance fetcher — used at startup and after each turn
185
+ // Balance fetcher — used at startup and after each turn.
186
+ //
187
+ // Some wallet client paths return 0 transiently (chain provider not yet
188
+ // initialized, RPC dust race). Without a defensive retry the UI's status
189
+ // bar locks at $0.00 USDC for the rest of the session even after the wallet
190
+ // is provably non-empty. retryFetchBalance does one extra round-trip on a
191
+ // zero result; genuinely empty wallets still resolve to $0.00 quickly.
187
192
  const fetchBalance = async () => {
188
193
  try {
189
- let bal;
190
- if (chain === 'solana') {
191
- const { setupAgentSolanaWallet } = await import('@blockrun/llm');
192
- const client = await setupAgentSolanaWallet({ silent: true });
193
- bal = await client.getBalance();
194
- }
195
- else {
194
+ const bal = await retryFetchBalance(async () => {
195
+ if (chain === 'solana') {
196
+ const { setupAgentSolanaWallet } = await import('@blockrun/llm');
197
+ const client = await setupAgentSolanaWallet({ silent: true });
198
+ return client.getBalance();
199
+ }
196
200
  const { setupAgentWallet } = await import('@blockrun/llm');
197
201
  const client = setupAgentWallet({ silent: true });
198
- bal = await client.getBalance();
199
- }
202
+ return client.getBalance();
203
+ });
200
204
  return `$${bal.toFixed(2)} USDC`;
201
205
  }
202
206
  catch {
@@ -36,7 +36,7 @@ export async function telegramCommand(opts) {
36
36
  // Model: --model flag > config default > free default.
37
37
  const model = opts.model ||
38
38
  config['default-model'] ||
39
- 'nvidia/glm-4.7';
39
+ 'nvidia/qwen3-coder-480b';
40
40
  const workingDir = process.cwd();
41
41
  const systemInstructions = assembleInstructions(workingDir, model);
42
42
  // Resume the most recent session tagged for THIS owner so a process
package/dist/index.js CHANGED
@@ -130,6 +130,15 @@ program
130
130
  const matches = searchSessions(query, { limit, model: opts.model });
131
131
  process.stdout.write(formatSearchResults(matches, query));
132
132
  });
133
+ // ─── franklin skills (file-loaded SKILL.md slash commands) ───────────────
134
+ program
135
+ .command('skills [action] [arg]')
136
+ .description('Manage Franklin skills — list | which <name>')
137
+ .option('--json', 'Output the skill list as JSON')
138
+ .action(async (action, arg, opts) => {
139
+ const { skillsCommand } = await import('./commands/skills.js');
140
+ await skillsCommand(action, arg, opts);
141
+ });
133
142
  // ─── franklin social (native X bot) ───────────────────────────────────────
134
143
  // First-class subcommand. Handles setup / login / run / stats / config
135
144
  // subactions. No plugin SDK, no MCP — everything lives in src/social/.
@@ -9,9 +9,9 @@ import { loadLearnings, mergeLearning, saveLearnings, loadSkills, saveSkill } fr
9
9
  // Free models for learning extraction — JSON extraction is simple enough.
10
10
  // Ordered by reliability: try the best free model first, fall back to others.
11
11
  const EXTRACTION_MODELS = [
12
- 'nvidia/glm-4.7', // Best free model for structured output
13
- 'nvidia/qwen3-coder-480b', // Strong at JSON tasks
14
- 'nvidia/llama-4-maverick', // Fallback
12
+ 'nvidia/qwen3-coder-480b', // Agent-tested free model; strong at JSON tasks
13
+ 'nvidia/llama-4-maverick', // Agent-tested fallback
14
+ 'nvidia/glm-4.7', // Chat fallback; not default for tool-heavy paths
15
15
  ];
16
16
  const VALID_CATEGORIES = new Set([
17
17
  'language', 'model_preference', 'tool_pattern', 'coding_style',
@@ -5,7 +5,7 @@
5
5
  * Plugins implement Workflow; core orchestrates execution and provides infrastructure.
6
6
  */
7
7
  export const DEFAULT_MODEL_TIERS = {
8
- free: 'nvidia/glm-4.7',
9
- cheap: 'nvidia/glm-4.7', // Free by default; opt-in to paid flat-rate via 'zai/glm-5.1'.
8
+ free: 'nvidia/qwen3-coder-480b',
9
+ cheap: 'nvidia/qwen3-coder-480b', // Free by default; opt-in to paid flat-rate via 'zai/glm-5.1'.
10
10
  premium: 'anthropic/claude-sonnet-4.6',
11
11
  };
package/dist/pricing.js CHANGED
@@ -15,9 +15,9 @@ export const MODEL_PRICING = {
15
15
  'nvidia/mistral-small-4-119b': { input: 0, output: 0 },
16
16
  'nvidia/llama-4-maverick': { input: 0, output: 0 },
17
17
  'nvidia/deepseek-v3.2': { input: 0, output: 0 },
18
+ // Retired (kept at 0 for legacy session-cost records; gateway no longer serves these).
18
19
  'nvidia/gpt-oss-120b': { input: 0, output: 0 },
19
20
  'nvidia/gpt-oss-20b': { input: 0, output: 0 },
20
- // Retired (kept at 0 for legacy session-cost records; gateway no longer serves these).
21
21
  'nvidia/nemotron-ultra-253b': { input: 0, output: 0 },
22
22
  'nvidia/devstral-2-123b': { input: 0, output: 0 },
23
23
  'nvidia/nemotron-3-super-120b': { input: 0, output: 0 },
@@ -19,7 +19,7 @@ export const DEFAULT_FALLBACK_CONFIG = {
19
19
  chain: [
20
20
  'deepseek/deepseek-chat', // Direct fallback — cheap & reliable
21
21
  'google/gemini-2.5-flash', // Fast & capable
22
- 'nvidia/glm-4.7', // Free model as ultimate fallback
22
+ 'nvidia/qwen3-coder-480b', // Free model as ultimate fallback
23
23
  ],
24
24
  retryOn: [429, 500, 502, 503, 504, 529],
25
25
  maxRetries: 5,
@@ -153,18 +153,18 @@ const MODEL_SHORTCUTS = {
153
153
  // DeepSeek
154
154
  deepseek: 'deepseek/deepseek-chat',
155
155
  r1: 'deepseek/deepseek-reasoner',
156
- // Free models (gateway free tier — refreshed 2026-04)
157
- free: 'nvidia/glm-4.7',
158
- glm4: 'nvidia/glm-4.7',
159
- 'deepseek-free': 'nvidia/deepseek-v3.2',
156
+ // Free models (agent-tested gateway free tier — refreshed 2026-04)
157
+ free: 'nvidia/qwen3-coder-480b',
158
+ glm4: 'nvidia/qwen3-coder-480b',
159
+ 'deepseek-free': 'nvidia/qwen3-coder-480b',
160
160
  'qwen-coder': 'nvidia/qwen3-coder-480b',
161
- 'qwen-think': 'nvidia/qwen3-next-80b-a3b-thinking',
161
+ 'qwen-think': 'nvidia/qwen3-coder-480b',
162
162
  maverick: 'nvidia/llama-4-maverick',
163
- 'gpt-oss': 'nvidia/gpt-oss-120b',
164
- 'gpt-oss-small': 'nvidia/gpt-oss-20b',
165
- 'mistral-small': 'nvidia/mistral-small-4-119b',
166
- // Retired-gateway-model aliases (map to closest current).
167
- nemotron: 'nvidia/glm-4.7',
163
+ 'gpt-oss': 'nvidia/qwen3-coder-480b',
164
+ 'gpt-oss-small': 'nvidia/qwen3-coder-480b',
165
+ 'mistral-small': 'nvidia/llama-4-maverick',
166
+ // Retired/unreliable gateway-model aliases (map to closest agent-tested current).
167
+ nemotron: 'nvidia/qwen3-coder-480b',
168
168
  devstral: 'nvidia/qwen3-coder-480b',
169
169
  // Minimax
170
170
  minimax: 'minimax/minimax-m2.7',
@@ -66,20 +66,20 @@ const AUTO_TIERS = {
66
66
  };
67
67
  const ECO_TIERS = {
68
68
  SIMPLE: {
69
- primary: 'nvidia/glm-4.7',
70
- fallback: ['nvidia/gpt-oss-120b', 'nvidia/deepseek-v3.2'],
69
+ primary: 'nvidia/qwen3-coder-480b',
70
+ fallback: ['nvidia/llama-4-maverick'],
71
71
  },
72
72
  MEDIUM: {
73
73
  primary: 'google/gemini-2.5-flash-lite',
74
- fallback: ['nvidia/glm-4.7', 'nvidia/qwen3-coder-480b'],
74
+ fallback: ['nvidia/qwen3-coder-480b', 'nvidia/llama-4-maverick'],
75
75
  },
76
76
  COMPLEX: {
77
77
  primary: 'google/gemini-2.5-flash-lite',
78
- fallback: ['deepseek/deepseek-chat', 'nvidia/glm-4.7'],
78
+ fallback: ['deepseek/deepseek-chat', 'nvidia/qwen3-coder-480b'],
79
79
  },
80
80
  REASONING: {
81
81
  primary: 'xai/grok-4-1-fast-reasoning',
82
- fallback: ['deepseek/deepseek-reasoner', 'nvidia/qwen3-next-80b-a3b-thinking'],
82
+ fallback: ['deepseek/deepseek-reasoner', 'nvidia/qwen3-coder-480b'],
83
83
  },
84
84
  };
85
85
  const PREMIUM_TIERS = {
@@ -409,7 +409,7 @@ export function resolveTierToModel(tier, profile = 'auto') {
409
409
  // Free profile short-circuits — everything routes to a single free model.
410
410
  if (profile === 'free') {
411
411
  return {
412
- model: 'nvidia/glm-4.7',
412
+ model: 'nvidia/qwen3-coder-480b',
413
413
  tier: 'SIMPLE',
414
414
  confidence: 1.0,
415
415
  signals: ['free-profile'],
@@ -440,7 +440,7 @@ export function routeRequest(prompt, profile = 'auto') {
440
440
  // Free profile — always use free model
441
441
  if (profile === 'free') {
442
442
  return {
443
- model: 'nvidia/glm-4.7',
443
+ model: 'nvidia/qwen3-coder-480b',
444
444
  tier: 'SIMPLE',
445
445
  confidence: 1.0,
446
446
  signals: ['free-profile'],
@@ -513,7 +513,7 @@ export function getFallbackChain(tier, profile = 'auto') {
513
513
  tierConfigs = PREMIUM_TIERS;
514
514
  break;
515
515
  case 'free':
516
- return ['nvidia/glm-4.7'];
516
+ return ['nvidia/qwen3-coder-480b'];
517
517
  default:
518
518
  tierConfigs = AUTO_TIERS;
519
519
  }