npm - @blockrun/franklin - Versions diffs - 3.8.44 → 3.9.0 - Mend

@blockrun/franklin 3.8.44 → 3.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (41) hide show

package/README.md +3 -2
package/dist/agent/commands.d.ts +5 -0
package/dist/agent/commands.js +28 -0
package/dist/agent/compact.js +1 -1
package/dist/agent/context.js +1 -0
package/dist/agent/llm.js +4 -4
package/dist/agent/loop.js +28 -3
package/dist/agent/verification.js +2 -2
package/dist/commands/balance-retry.d.ts +15 -0
package/dist/commands/balance-retry.js +20 -0
package/dist/commands/skills.d.ts +8 -0
package/dist/commands/skills.js +93 -0
package/dist/commands/social.js +1 -1
package/dist/commands/start.js +17 -13
package/dist/commands/telegram.js +1 -1
package/dist/index.js +9 -0
package/dist/learnings/extractor.js +3 -3
package/dist/plugin-sdk/workflow.js +2 -2
package/dist/pricing.js +1 -1
package/dist/proxy/fallback.js +1 -1
package/dist/proxy/server.js +10 -10
package/dist/router/index.js +8 -8
package/dist/skills/bootstrap.d.ts +27 -0
package/dist/skills/bootstrap.js +40 -0
package/dist/skills/invoke.d.ts +23 -0
package/dist/skills/invoke.js +38 -0
package/dist/skills/loader.d.ts +21 -0
package/dist/skills/loader.js +149 -0
package/dist/skills/registry.d.ts +26 -0
package/dist/skills/registry.js +54 -0
package/dist/skills/types.d.ts +47 -0
package/dist/skills/types.js +8 -0
package/dist/skills-bundled/budget-grill/SKILL.md +24 -0
package/dist/tools/index.js +2 -0
package/dist/tools/moa.js +4 -4
package/dist/tools/subagent.js +3 -3
package/dist/tools/tool-categories.js +3 -0
package/dist/tools/wallet.d.ts +23 -0
package/dist/tools/wallet.js +63 -0
package/dist/ui/model-picker.js +13 -17
package/package.json +4 -3

package/README.md CHANGED Viewed

@@ -105,7 +105,7 @@ Concretely — $1 in USDC gets you roughly:
 - ~13M Gemini Flash tokens
 - ~20 DALL-E 3 images
 - ~40 Exa neural web searches
-- Unlimited NVIDIA GPT-OSS (free tier, no wallet needed)
+- Unlimited agent-tested NVIDIA free tier (Qwen3 Coder + Llama 4 Maverick, no wallet needed)
 ---
@@ -440,7 +440,7 @@ src/
 Start with **zero dollars**. Franklin defaults to free NVIDIA models that need no wallet funding.
 ```bash
-franklin --model nvidia/qwen3-next-80b-a3b-thinking
+franklin --model free
 ```
 When you fund the wallet, Franklin gets more purchasing power: Sonnet, Opus, GPT, Gemini, Grok, and paid tools like Exa, DALL-E, and CoinGecko Pro.
@@ -493,6 +493,7 @@ npm install
 npm run build
 npm test              # deterministic local tests — no API calls
 npm run test:e2e      # live e2e tests — free smoke works unfunded; paid tools need network + funded wallet
+npm run test:free-models # live matrix across current free NVIDIA models
 node dist/index.js --help
 ```

package/dist/agent/commands.d.ts CHANGED Viewed

@@ -8,6 +8,7 @@
  */
 import type { ModelClient } from './llm.js';
 import type { AgentConfig, Dialogue, StreamEvent } from './types.js';
+import type { Registry } from '../skills/registry.js';
 type EventEmitter = (event: StreamEvent) => void;
 interface CommandContext {
     history: Dialogue[];
@@ -15,6 +16,10 @@ interface CommandContext {
     client: ModelClient;
     sessionId: string;
     onEvent: EventEmitter;
+    /** Skills loaded for this session — see src/skills/. */
+    skillRegistry?: Registry;
+    /** Runtime variables substituted into skill bodies before $ARGUMENTS. */
+    skillVars?: Record<string, string>;
 }
 interface CommandResult {
     handled: boolean;

package/dist/agent/commands.js CHANGED Viewed

@@ -15,6 +15,7 @@ import { forceCompact } from './compact.js';
 import { getStatsSummary } from '../stats/tracker.js';
 import { resolveModel } from '../ui/model-picker.js';
 import { listSessions, loadSessionHistory, } from '../session/storage.js';
+import { matchSkill } from '../skills/invoke.js';
 // ─── Git helpers ──────────────────────────────────────────────────────────
 function gitExec(cmd, cwd, timeout = 5000, maxBuffer) {
     return execSync(cmd, {
@@ -197,6 +198,20 @@ const DIRECT_COMMANDS = {
     },
     '/help': (ctx) => {
         const ultrathinkOn = ctx.config.ultrathink;
+        let skillsBlock = '';
+        if (ctx.skillRegistry) {
+            const visible = ctx.skillRegistry
+                .list()
+                .filter((l) => !l.skill.disableModelInvocation);
+            if (visible.length > 0) {
+                skillsBlock =
+                    `\n  **Skills:**\n` +
+                        visible
+                            .map((l) => `    /${l.skill.name.padEnd(22)} ${l.skill.description}`)
+                            .join('\n') +
+                        `\n`;
+            }
+        }
         ctx.onEvent({ kind: 'text_delta', text: `**RunCode Commands**\n\n` +
                 `  **Coding:** /commit /review /test /fix /debug /explain /search /find /refactor /scaffold\n` +
                 `  **Git:** /push /pr /undo /status /diff /log /branch /stash /unstash\n` +
@@ -205,6 +220,7 @@ const DIRECT_COMMANDS = {
                 `  **Power:** /ultrathink [query] /ultraplan /noplan /moa [query] /dump\n` +
                 `  **Info:** /model /auto /wallet /cost /tokens /learnings /brain /mcp /doctor /version /bug /help\n` +
                 `  **UI:** /clear /exit\n` +
+                skillsBlock +
                 (ultrathinkOn ? `\n  Ultrathink: ON\n` : '')
         });
         emitDone(ctx);
@@ -926,11 +942,23 @@ export async function handleSlashCommand(input, ctx) {
             return { handled: false, rewritten: rewrite(arg) };
         }
     }
+    // File-loaded skills — registered after built-ins so `/security` etc.
+    // are never shadowed by a user-installed skill of the same name.
+    if (ctx.skillRegistry) {
+        const skillResult = matchSkill(input, ctx.skillRegistry, ctx.skillVars ?? {});
+        if (skillResult) {
+            return { handled: false, rewritten: skillResult.rewritten };
+        }
+    }
     // Not a recognized command — suggest closest match
+    const skillNames = ctx.skillRegistry
+        ? ctx.skillRegistry.list().map((s) => `/${s.skill.name}`)
+        : [];
     const allCommands = [
         ...Object.keys(DIRECT_COMMANDS),
         ...Object.keys(REWRITE_COMMANDS),
         ...ARG_COMMANDS.map(c => c.prefix.trim()),
+        ...skillNames,
         '/branch', '/resume', '/model', '/auto', '/wallet', '/cost', '/help', '/clear', '/retry', '/exit', '/session-search', '/ssearch', '/failures',
     ];
     const cmd = input.split(/\s/)[0];

package/dist/agent/compact.js CHANGED Viewed

@@ -427,7 +427,7 @@ function formatCompactSummary(raw) {
 function pickCompactionModel(primaryModel) {
     // Free parent → free compaction (no silent charge)
     if (primaryModel.startsWith('nvidia/') || primaryModel === 'blockrun/free') {
-        return 'nvidia/glm-4.7';
+        return 'nvidia/qwen3-coder-480b';
     }
     // Use cheapest capable model for summarization to save cost
     // Tier down: opus/pro → sonnet, sonnet → haiku, everything else → flash (cheapest capable)

package/dist/agent/context.js CHANGED Viewed

@@ -16,6 +16,7 @@ You are an interactive agent — not a chatbot. Use the tools available to you t
 # Franklin has hands
 You run with live tools by default:
+- **Wallet** — read your own chain, address, and USDC balance. Use this for any "what's my balance / how much money / 钱包余额 / wallet status" question instead of running \`franklin balance\` via Bash. Free, one call, never costs USDC.
 - **TradingMarket** — current stock / FX / crypto / commodity prices (BlockRun Gateway / Pyth; wallet pays automatically, $0.001/stock call, free for everything else).
 - **ExaAnswer / ExaSearch / ExaReadUrls** — cited current-events answers, semantic web search, clean URL content.
 - **WebSearch / WebFetch** — live web.

package/dist/agent/llm.js CHANGED Viewed

@@ -256,12 +256,12 @@ export class ModelClient {
         // Static fallback if router is unavailable. Default to FREE model so
         // users aren't silently charged when their intended model can't resolve.
         const FALLBACKS = {
-            'blockrun/auto': 'nvidia/glm-4.7',
-            'blockrun/eco': 'nvidia/glm-4.7',
+            'blockrun/auto': 'nvidia/qwen3-coder-480b',
+            'blockrun/eco': 'nvidia/qwen3-coder-480b',
             'blockrun/premium': 'anthropic/claude-sonnet-4.6',
-            'blockrun/free': 'nvidia/glm-4.7',
+            'blockrun/free': 'nvidia/qwen3-coder-480b',
         };
-        return FALLBACKS[model] || 'nvidia/glm-4.7';
+        return FALLBACKS[model] || 'nvidia/qwen3-coder-480b';
     }
     async *streamCompletion(request, signal) {
         // Resolve virtual models before any API call

package/dist/agent/loop.js CHANGED Viewed

@@ -6,6 +6,7 @@ import { ModelClient } from './llm.js';
 import { autoCompactIfNeeded, forceCompact, microCompact } from './compact.js';
 import { estimateHistoryTokens, updateActualTokens, resetTokenAnchor, getAnchoredTokenCount, getContextWindow, setEstimationModel } from './tokens.js';
 import { handleSlashCommand } from './commands.js';
+import { loadBundledSkills, getSkillVars } from '../skills/bootstrap.js';
 import { reduceTokens } from './reduce.js';
 import { PermissionManager } from './permissions.js';
 import { StreamingExecutor } from './streaming-executor.js';
@@ -225,8 +226,8 @@ function getBackoffDelay(attempt, maxDelayMs = 32_000) {
 export function isWeakModel(model) {
     const m = model.toLowerCase();
     // NVIDIA-hosted open models have been observed confabulating tool calls.
-    // `blockrun/free` and `blockrun/eco` resolve to nvidia/nemotron-ultra in
-    // llm.ts, so catching the `nvidia/` prefix also catches those paths.
+    // `blockrun/free` resolves to an NVIDIA model before the API call, so
+    // catching the `nvidia/` prefix also catches the free-profile path.
     if (m.startsWith('nvidia/'))
         return true;
     if (m.includes('nemotron-ultra'))
@@ -297,6 +298,17 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
     let lastUserInput = ''; // For /retry
     config.baseModel = config.model; // User's intended model — /model command updates this
     let turnFailedModels = new Set(); // Models that failed this turn (cleared each new turn)
+    // ── Skills (file-loaded SKILL.md prompt-rewrite slash commands) ──
+    // Bundled-only in Phase 1 of the skills MVP. User-global and project-local
+    // discovery + the budget-cap-usd / cost-receipt enforcement contract land
+    // in Phase 2 — see docs/plans/2026-04-29-franklin-skills-mvp-design.md.
+    const skillBoot = loadBundledSkills();
+    if (skillBoot.errors.length > 0 && config.debug) {
+        for (const err of skillBoot.errors) {
+            onEvent({ kind: 'text_delta', text: `[skills] ${err.path}: ${err.error}\n` });
+        }
+    }
+    const skillRegistry = skillBoot.registry;
     // Track models that failed with 402 (payment required) across turns.
     // These persist until the session ends — unlike transient errors, payment failures
     // will keep failing until the user adds funds. Map stores failure timestamp for future TTL.
@@ -387,6 +399,19 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
             else {
                 const cmdResult = await handleSlashCommand(input, {
                     history, config, client, sessionId, onEvent,
+                    skillRegistry,
+                    skillVars: getSkillVars({
+                        chain: config.chain,
+                        perTurnCapUsd: (() => {
+                            const raw = loadConfig()['max-turn-spend-usd'];
+                            if (raw == null)
+                                return 1.0;
+                            const n = Number(raw);
+                            if (!Number.isFinite(n))
+                                return 1.0;
+                            return n <= 0 ? Infinity : n;
+                        })(),
+                    }),
                 });
                 if (cmdResult.handled)
                     continue;
@@ -985,7 +1010,7 @@ export async function interactiveSession(config, getUserInput, onEvent, onAbortR
                     if (lastRoutedCategory) {
                         recordOutcome(lastRoutedCategory, config.model, 'payment');
                     }
-                    const FREE_MODELS = ['nvidia/glm-4.7', 'nvidia/qwen3-coder-480b', 'nvidia/llama-4-maverick', 'nvidia/qwen3-next-80b-a3b-thinking'];
+                    const FREE_MODELS = ['nvidia/qwen3-coder-480b', 'nvidia/llama-4-maverick', 'nvidia/glm-4.7'];
                     const nextFree = FREE_MODELS.find(m => !turnFailedModels.has(m));
                     if (nextFree) {
                         const oldModel = config.model;

package/dist/agent/verification.js CHANGED Viewed

@@ -112,8 +112,8 @@ export async function runVerification(history, handlers, client, config) {
         },
     ];
     config.onEvent?.({ kind: 'text_delta', text: '\n*Verifying...*\n' });
-    // Use cheap model for verification
-    const verificationModel = 'nvidia/glm-4.7'; // Free model to keep cost zero
+    // Use agent-tested free model for verification.
+    const verificationModel = 'nvidia/qwen3-coder-480b';
     try {
         // Simple single-turn verification call
         const response = await client.complete({

package/dist/commands/balance-retry.d.ts ADDED Viewed

@@ -0,0 +1,15 @@
+/**
+ * Wallet-balance retry helper.
+ *
+ * Used by the agent UI's startup balance fetch and the post-turn refresh.
+ * Some wallet client paths return `0` transiently — for example, when the
+ * SDK is queried before the chain provider has finished initializing — even
+ * when the on-chain balance is non-zero. A single defensive retry catches
+ * that case without lengthening the path for a genuinely empty wallet:
+ * empty wallets still resolve to `0` in roughly two RPC round-trips.
+ */
+export interface RetryOptions {
+    /** Delay between the first and second attempt, in milliseconds. */
+    delayMs?: number;
+}
+export declare function retryFetchBalance(fetchOnce: () => Promise<number>, opts?: RetryOptions): Promise<number>;

package/dist/commands/balance-retry.js ADDED Viewed

@@ -0,0 +1,20 @@
+/**
+ * Wallet-balance retry helper.
+ *
+ * Used by the agent UI's startup balance fetch and the post-turn refresh.
+ * Some wallet client paths return `0` transiently — for example, when the
+ * SDK is queried before the chain provider has finished initializing — even
+ * when the on-chain balance is non-zero. A single defensive retry catches
+ * that case without lengthening the path for a genuinely empty wallet:
+ * empty wallets still resolve to `0` in roughly two RPC round-trips.
+ */
+export async function retryFetchBalance(fetchOnce, opts = {}) {
+    const first = await fetchOnce();
+    if (first !== 0)
+        return first;
+    await sleep(opts.delayMs ?? 750);
+    return fetchOnce();
+}
+function sleep(ms) {
+    return new Promise((resolve) => setTimeout(resolve, ms));
+}

package/dist/commands/skills.d.ts ADDED Viewed

@@ -0,0 +1,8 @@
+/**
+ * `franklin skills` — list and inspect SKILL.md files Franklin would load
+ * during a session. Phase 1 of the skills MVP — bundled only.
+ */
+export interface SkillsCommandOptions {
+    json?: boolean;
+}
+export declare function skillsCommand(action: string | undefined, arg: string | undefined, opts?: SkillsCommandOptions): Promise<void>;

package/dist/commands/skills.js ADDED Viewed

@@ -0,0 +1,93 @@
+/**
+ * `franklin skills` — list and inspect SKILL.md files Franklin would load
+ * during a session. Phase 1 of the skills MVP — bundled only.
+ */
+import chalk from 'chalk';
+import { loadBundledSkills } from '../skills/bootstrap.js';
+export async function skillsCommand(action, arg, opts = {}) {
+    const sub = action ?? 'list';
+    if (sub === 'list') {
+        runList(opts);
+        return;
+    }
+    if (sub === 'which') {
+        runWhich(arg);
+        return;
+    }
+    console.log(chalk.red(`Unknown skills subcommand: ${sub}`));
+    console.log('Usage: franklin skills [list|which <name>]');
+    process.exit(1);
+}
+function runList(opts) {
+    const { registry, errors } = loadBundledSkills();
+    const skills = registry.list();
+    if (opts.json) {
+        process.stdout.write(JSON.stringify({
+            skills: skills.map((l) => ({
+                name: l.skill.name,
+                description: l.skill.description,
+                source: l.source,
+                path: l.path,
+                warnings: l.warnings,
+                costReceipt: l.skill.costReceipt ?? false,
+                budgetCapUsd: l.skill.budgetCapUsd ?? null,
+                disableModelInvocation: l.skill.disableModelInvocation ?? false,
+            })),
+            errors,
+            shadowed: registry.shadowed().map((s) => ({
+                winner: { name: s.winner.skill.name, source: s.winner.source, path: s.winner.path },
+                loser: { name: s.loser.skill.name, source: s.loser.source, path: s.loser.path },
+            })),
+        }, null, 2) + '\n');
+        return;
+    }
+    if (skills.length === 0) {
+        console.log(chalk.dim('No skills loaded.'));
+    }
+    else {
+        console.log(chalk.bold(`Skills (${skills.length})`));
+        console.log('');
+        const nameWidth = Math.max(...skills.map((l) => l.skill.name.length), 4);
+        for (const l of skills) {
+            const flags = [];
+            if (l.skill.costReceipt)
+                flags.push('receipt');
+            if (typeof l.skill.budgetCapUsd === 'number')
+                flags.push(`cap $${l.skill.budgetCapUsd.toFixed(2)}`);
+            if (l.skill.disableModelInvocation)
+                flags.push('manual-only');
+            const flagStr = flags.length > 0 ? chalk.dim(` [${flags.join(', ')}]`) : '';
+            const sourceTag = chalk.dim(`(${l.source})`);
+            console.log(`  ${chalk.cyan('/' + l.skill.name.padEnd(nameWidth))}  ${l.skill.description}${flagStr} ${sourceTag}`);
+        }
+    }
+    const shadowed = registry.shadowed();
+    if (shadowed.length > 0) {
+        console.log('');
+        console.log(chalk.yellow('Shadowed (lost to a higher-precedence source):'));
+        for (const s of shadowed) {
+            console.log(`  /${s.loser.skill.name} from ${s.loser.source} ` +
+                chalk.dim(`(winner: ${s.winner.source} at ${s.winner.path})`));
+        }
+    }
+    if (errors.length > 0) {
+        console.log('');
+        console.log(chalk.red(`Failed to load (${errors.length}):`));
+        for (const e of errors) {
+            console.log(`  ${e.path}: ${e.error}`);
+        }
+    }
+}
+function runWhich(name) {
+    if (!name) {
+        console.log(chalk.red('Usage: franklin skills which <name>'));
+        process.exit(1);
+    }
+    const { registry } = loadBundledSkills();
+    const skill = registry.lookup(name);
+    if (!skill) {
+        console.log(chalk.red(`Skill not found: ${name}`));
+        process.exit(1);
+    }
+    console.log(skill.path);
+}

package/dist/commands/social.js CHANGED Viewed

@@ -159,7 +159,7 @@ async function runCommand(options) {
     const chain = loadChain();
     const apiUrl = API_URLS[chain];
     const appConfig = loadAppConfig();
-    const model = options.model || appConfig['default-model'] || 'nvidia/glm-4.7';
+    const model = options.model || appConfig['default-model'] || 'nvidia/qwen3-coder-480b';
     console.log(chalk.dim(`  Model: ${model}`));
     console.log('');
     let result;

package/dist/commands/start.js CHANGED Viewed

@@ -1,6 +1,7 @@
 import chalk from 'chalk';
 import { getOrCreateWallet, getOrCreateSolanaWallet } from '@blockrun/llm';
 import { loadChain, API_URLS } from '../config.js';
+import { retryFetchBalance } from './balance-retry.js';
 import { flushStats, loadStats } from '../stats/tracker.js';
 import { OPUS_PRICING } from '../pricing.js';
 import { loadConfig } from './config.js';
@@ -125,7 +126,7 @@ export async function startCommand(options) {
         return;
     }
     // Warn when a paid model is active so users know they'll be charged.
-    // Set members = BlockRun gateway's current free tier (refreshed 2026-04).
+    // Set members = BlockRun gateway's current live free tier (refreshed 2026-04).
     const FREE_MODELS = new Set([
         'nvidia/glm-4.7',
         'nvidia/qwen3-next-80b-a3b-thinking',
@@ -133,8 +134,6 @@ export async function startCommand(options) {
         'nvidia/mistral-small-4-119b',
         'nvidia/llama-4-maverick',
         'nvidia/deepseek-v3.2',
-        'nvidia/gpt-oss-120b',
-        'nvidia/gpt-oss-20b',
         'blockrun/free',
     ]);
     if (!FREE_MODELS.has(model)) {
@@ -183,20 +182,25 @@ export async function startCommand(options) {
     console.log(chalk.dim('  Dashboard: ') + (panelUrl ? chalk.cyan(panelUrl) : chalk.cyan('franklin panel') + chalk.dim(' → http://localhost:3100')));
     console.log(chalk.dim('  Help:      ') + chalk.cyan('/help'));
     console.log('');
-    // Balance fetcher — used at startup and after each turn
+    // Balance fetcher — used at startup and after each turn.
+    //
+    // Some wallet client paths return 0 transiently (chain provider not yet
+    // initialized, RPC dust race). Without a defensive retry the UI's status
+    // bar locks at $0.00 USDC for the rest of the session even after the wallet
+    // is provably non-empty. retryFetchBalance does one extra round-trip on a
+    // zero result; genuinely empty wallets still resolve to $0.00 quickly.
     const fetchBalance = async () => {
         try {
-            let bal;
-            if (chain === 'solana') {
-                const { setupAgentSolanaWallet } = await import('@blockrun/llm');
-                const client = await setupAgentSolanaWallet({ silent: true });
-                bal = await client.getBalance();
-            }
-            else {
+            const bal = await retryFetchBalance(async () => {
+                if (chain === 'solana') {
+                    const { setupAgentSolanaWallet } = await import('@blockrun/llm');
+                    const client = await setupAgentSolanaWallet({ silent: true });
+                    return client.getBalance();
+                }
                 const { setupAgentWallet } = await import('@blockrun/llm');
                 const client = setupAgentWallet({ silent: true });
-                bal = await client.getBalance();
-            }
+                return client.getBalance();
+            });
             return `$${bal.toFixed(2)} USDC`;
         }
         catch {

package/dist/commands/telegram.js CHANGED Viewed

@@ -36,7 +36,7 @@ export async function telegramCommand(opts) {
     // Model: --model flag > config default > free default.
     const model = opts.model ||
         config['default-model'] ||
-        'nvidia/glm-4.7';
+        'nvidia/qwen3-coder-480b';
     const workingDir = process.cwd();
     const systemInstructions = assembleInstructions(workingDir, model);
     // Resume the most recent session tagged for THIS owner so a process

package/dist/index.js CHANGED Viewed

@@ -130,6 +130,15 @@ program
     const matches = searchSessions(query, { limit, model: opts.model });
     process.stdout.write(formatSearchResults(matches, query));
 });
+// ─── franklin skills (file-loaded SKILL.md slash commands) ───────────────
+program
+    .command('skills [action] [arg]')
+    .description('Manage Franklin skills — list | which <name>')
+    .option('--json', 'Output the skill list as JSON')
+    .action(async (action, arg, opts) => {
+    const { skillsCommand } = await import('./commands/skills.js');
+    await skillsCommand(action, arg, opts);
+});
 // ─── franklin social (native X bot) ───────────────────────────────────────
 // First-class subcommand. Handles setup / login / run / stats / config
 // subactions. No plugin SDK, no MCP — everything lives in src/social/.

package/dist/learnings/extractor.js CHANGED Viewed

@@ -9,9 +9,9 @@ import { loadLearnings, mergeLearning, saveLearnings, loadSkills, saveSkill } fr
 // Free models for learning extraction — JSON extraction is simple enough.
 // Ordered by reliability: try the best free model first, fall back to others.
 const EXTRACTION_MODELS = [
-    'nvidia/glm-4.7', // Best free model for structured output
-    'nvidia/qwen3-coder-480b', // Strong at JSON tasks
-    'nvidia/llama-4-maverick', // Fallback
+    'nvidia/qwen3-coder-480b', // Agent-tested free model; strong at JSON tasks
+    'nvidia/llama-4-maverick', // Agent-tested fallback
+    'nvidia/glm-4.7', // Chat fallback; not default for tool-heavy paths
 ];
 const VALID_CATEGORIES = new Set([
     'language', 'model_preference', 'tool_pattern', 'coding_style',

package/dist/plugin-sdk/workflow.js CHANGED Viewed

@@ -5,7 +5,7 @@
  * Plugins implement Workflow; core orchestrates execution and provides infrastructure.
  */
 export const DEFAULT_MODEL_TIERS = {
-    free: 'nvidia/glm-4.7',
-    cheap: 'nvidia/glm-4.7', // Free by default; opt-in to paid flat-rate via 'zai/glm-5.1'.
+    free: 'nvidia/qwen3-coder-480b',
+    cheap: 'nvidia/qwen3-coder-480b', // Free by default; opt-in to paid flat-rate via 'zai/glm-5.1'.
     premium: 'anthropic/claude-sonnet-4.6',
 };

package/dist/pricing.js CHANGED Viewed

@@ -15,9 +15,9 @@ export const MODEL_PRICING = {
     'nvidia/mistral-small-4-119b': { input: 0, output: 0 },
     'nvidia/llama-4-maverick': { input: 0, output: 0 },
     'nvidia/deepseek-v3.2': { input: 0, output: 0 },
+    // Retired (kept at 0 for legacy session-cost records; gateway no longer serves these).
     'nvidia/gpt-oss-120b': { input: 0, output: 0 },
     'nvidia/gpt-oss-20b': { input: 0, output: 0 },
-    // Retired (kept at 0 for legacy session-cost records; gateway no longer serves these).
     'nvidia/nemotron-ultra-253b': { input: 0, output: 0 },
     'nvidia/devstral-2-123b': { input: 0, output: 0 },
     'nvidia/nemotron-3-super-120b': { input: 0, output: 0 },

package/dist/proxy/fallback.js CHANGED Viewed

@@ -19,7 +19,7 @@ export const DEFAULT_FALLBACK_CONFIG = {
     chain: [
         'deepseek/deepseek-chat', // Direct fallback — cheap & reliable
         'google/gemini-2.5-flash', // Fast & capable
-        'nvidia/glm-4.7', // Free model as ultimate fallback
+        'nvidia/qwen3-coder-480b', // Free model as ultimate fallback
     ],
     retryOn: [429, 500, 502, 503, 504, 529],
     maxRetries: 5,

package/dist/proxy/server.js CHANGED Viewed

@@ -153,18 +153,18 @@ const MODEL_SHORTCUTS = {
     // DeepSeek
     deepseek: 'deepseek/deepseek-chat',
     r1: 'deepseek/deepseek-reasoner',
-    // Free models (gateway free tier — refreshed 2026-04)
-    free: 'nvidia/glm-4.7',
-    glm4: 'nvidia/glm-4.7',
-    'deepseek-free': 'nvidia/deepseek-v3.2',
+    // Free models (agent-tested gateway free tier — refreshed 2026-04)
+    free: 'nvidia/qwen3-coder-480b',
+    glm4: 'nvidia/qwen3-coder-480b',
+    'deepseek-free': 'nvidia/qwen3-coder-480b',
     'qwen-coder': 'nvidia/qwen3-coder-480b',
-    'qwen-think': 'nvidia/qwen3-next-80b-a3b-thinking',
+    'qwen-think': 'nvidia/qwen3-coder-480b',
     maverick: 'nvidia/llama-4-maverick',
-    'gpt-oss': 'nvidia/gpt-oss-120b',
-    'gpt-oss-small': 'nvidia/gpt-oss-20b',
-    'mistral-small': 'nvidia/mistral-small-4-119b',
-    // Retired-gateway-model aliases (map to closest current).
-    nemotron: 'nvidia/glm-4.7',
+    'gpt-oss': 'nvidia/qwen3-coder-480b',
+    'gpt-oss-small': 'nvidia/qwen3-coder-480b',
+    'mistral-small': 'nvidia/llama-4-maverick',
+    // Retired/unreliable gateway-model aliases (map to closest agent-tested current).
+    nemotron: 'nvidia/qwen3-coder-480b',
     devstral: 'nvidia/qwen3-coder-480b',
     // Minimax
     minimax: 'minimax/minimax-m2.7',

package/dist/router/index.js CHANGED Viewed

@@ -66,20 +66,20 @@ const AUTO_TIERS = {
 };
 const ECO_TIERS = {
     SIMPLE: {
-        primary: 'nvidia/glm-4.7',
-        fallback: ['nvidia/gpt-oss-120b', 'nvidia/deepseek-v3.2'],
+        primary: 'nvidia/qwen3-coder-480b',
+        fallback: ['nvidia/llama-4-maverick'],
     },
     MEDIUM: {
         primary: 'google/gemini-2.5-flash-lite',
-        fallback: ['nvidia/glm-4.7', 'nvidia/qwen3-coder-480b'],
+        fallback: ['nvidia/qwen3-coder-480b', 'nvidia/llama-4-maverick'],
     },
     COMPLEX: {
         primary: 'google/gemini-2.5-flash-lite',
-        fallback: ['deepseek/deepseek-chat', 'nvidia/glm-4.7'],
+        fallback: ['deepseek/deepseek-chat', 'nvidia/qwen3-coder-480b'],
     },
     REASONING: {
         primary: 'xai/grok-4-1-fast-reasoning',
-        fallback: ['deepseek/deepseek-reasoner', 'nvidia/qwen3-next-80b-a3b-thinking'],
+        fallback: ['deepseek/deepseek-reasoner', 'nvidia/qwen3-coder-480b'],
     },
 };
 const PREMIUM_TIERS = {
@@ -409,7 +409,7 @@ export function resolveTierToModel(tier, profile = 'auto') {
     // Free profile short-circuits — everything routes to a single free model.
     if (profile === 'free') {
         return {
-            model: 'nvidia/glm-4.7',
+            model: 'nvidia/qwen3-coder-480b',
             tier: 'SIMPLE',
             confidence: 1.0,
             signals: ['free-profile'],
@@ -440,7 +440,7 @@ export function routeRequest(prompt, profile = 'auto') {
     // Free profile — always use free model
     if (profile === 'free') {
         return {
-            model: 'nvidia/glm-4.7',
+            model: 'nvidia/qwen3-coder-480b',
             tier: 'SIMPLE',
             confidence: 1.0,
             signals: ['free-profile'],
@@ -513,7 +513,7 @@ export function getFallbackChain(tier, profile = 'auto') {
             tierConfigs = PREMIUM_TIERS;
             break;
         case 'free':
-            return ['nvidia/glm-4.7'];
+            return ['nvidia/qwen3-coder-480b'];
         default:
             tierConfigs = AUTO_TIERS;
     }

package/dist/skills/bootstrap.d.ts ADDED Viewed

@@ -0,0 +1,27 @@
+/**
+ * Boot-time helpers that wire the skills library into the running process:
+ *
+ * - `loadBundledSkills()` discovers `dist/skills-bundled/<name>/SKILL.md`
+ *   relative to this module's location and returns a populated Registry.
+ *   User-global and project-local discovery are deferred to Phase 2 of the
+ *   skills MVP plan; today we only ship the bundled set.
+ *
+ * - `getSkillVars()` returns the synchronously-known runtime variables
+ *   that `substituteVariables` injects into a skill body before
+ *   `$ARGUMENTS` expansion. Async values (wallet balance, on-chain reads)
+ *   are deferred to a later phase: those vars stay literal in the rendered
+ *   prompt and `substituteVariables` leaves unknown vars intact.
+ */
+import { Registry } from './registry.js';
+import type { LoadError } from './types.js';
+export interface BundledLoad {
+    registry: Registry;
+    errors: LoadError[];
+}
+export declare function loadBundledSkills(): BundledLoad;
+export interface SkillVarSource {
+    chain?: 'base' | 'solana';
+    /** Per-turn spend cap in USD; mirrors the `max-turn-spend-usd` config key. */
+    perTurnCapUsd?: number;
+}
+export declare function getSkillVars(src: SkillVarSource): Record<string, string>;