npm - winter-super-cli - Versions diffs - 2026.5.28 → 2026.5.30 - Mend

winter-super-cli 2026.5.28 → 2026.5.30

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

package/bin/winter.js +2 -1
package/package.json +1 -1
package/src/ai/benchmark.js +352 -0
package/src/ai/prompts/system-prompt.js +70 -81
package/src/ai/providers.js +12 -9
package/src/ai/reasoning.js +5 -81
package/src/cli/commands.js +62 -0
package/src/cli/context-loader.js +64 -1
package/src/cli/conversation-format.js +90 -12
package/src/cli/prompt-builder.js +43 -17
package/src/cli/repl-commands.js +14 -3
package/src/cli/repl.js +333 -214
package/src/context/router.js +26 -22
package/src/tools/executor.js +78 -9

package/bin/winter.js CHANGED Viewed

@@ -17,7 +17,7 @@ const pkg = JSON.parse(readFileSync(new URL('../package.json', import.meta.url),
 const version = pkg.version;
 const COMMANDS = new Set([
-  'chat', 'call', 'session', 'skill', 'plugin', 'design', 'config', 'init',
+  'chat', 'call', 'benchmark', 'session', 'skill', 'plugin', 'design', 'config', 'init',
   'help', 'project', 'code', 'review', 'mcp', 'permissions',
   'provider', 'providers', 'model', 'models',
 ]);
@@ -53,6 +53,7 @@ Usage:
 Commands:
   winter call <prompt>        Call all configured providers
+  winter benchmark [providers] Benchmark model intelligence
   winter session <action>     Session management
   winter skill <action>       Skill management
   winter plugin <action>      Plugin management

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "winter-super-cli",
-  "version": "2026.5.28",
+  "version": "2026.5.30",
   "description": "❄️ AI-Powered Development CLI with Interactive REPL",
   "type": "module",
   "main": "bin/winter.js",

package/src/ai/benchmark.js ADDED Viewed

@@ -0,0 +1,352 @@
+/**
+ * Benchmark Engine — Đo độ thông minh của models trong Winter CLI
+ *
+ * Cố định câu hỏi test (logic, coding, math, reasoning, language)
+ * + Coding task thật → chạy qua providers → chấm điểm → so sánh
+ */
+import { colors } from '../cli/snowflake-logo.js';
+// ── Question Bank ────────────────────────────────────────────────────────────
+const BENCHMARK_QUESTIONS = [
+  {
+    id: 'q01',
+    category: 'logic',
+    question: `If all cats are mammals and some mammals are dogs, are all cats dogs? Explain your reasoning step by step.`,
+    keywords: ['not', 'no', 'incorrect', 'cannot conclude', 'not necessarily', 'invalid'],
+    weight: 1,
+  },
+  {
+    id: 'q02',
+    category: 'coding',
+    question: `Write a JavaScript function called isPalindrome that checks if a string is a palindrome (reads the same forwards and backwards). Include example usage.`,
+    keywords: ['function', 'palindrome', 'reverse', 'split', 'return'],
+    weight: 1.5,
+  },
+  {
+    id: 'q03',
+    category: 'math',
+    question: `What is 15% of 200? Show your calculation.`,
+    keywords: ['30', '15', '200', '0.15'],
+    weight: 0.5,
+  },
+  {
+    id: 'q04',
+    category: 'reasoning',
+    question: `A bat and a ball cost $1.10 in total. The bat costs $1.00 more than the ball. How much does the ball cost? Think carefully.`,
+    keywords: ['0.05', '5 cents', '5 cent', '0.05$', '5¢', 'five cents'],
+    weight: 1.5,
+  },
+  {
+    id: 'q05',
+    category: 'coding',
+    question: `What's wrong with this code and how would you fix it?\n\nfunction add(a, b) {\n  return a + b;\n}\nconsole.log(add(5, '3'));`,
+    keywords: ['string', 'type', 'concatenation', 'number', 'parse', 'typeof', 'coercion'],
+    weight: 1,
+  },
+  {
+    id: 'q06',
+    category: 'language',
+    question: `Translate this sentence to Vietnamese: "Good morning, how are you today?"`,
+    keywords: ['chào', 'sáng', 'khỏe', 'hôm nay', 'bạn'],
+    weight: 0.5,
+  },
+  {
+    id: 'q07',
+    category: 'logic',
+    question: `You have a 3-gallon jug and a 5-gallon jug. How can you measure exactly 4 gallons of water? Explain step by step.`,
+    keywords: ['fill', 'pour', '3', '5', '4', 'empty'],
+    weight: 1.5,
+  },
+  {
+    id: 'q08',
+    category: 'coding',
+    question: `Write a recursive function to calculate the nth Fibonacci number. Explain how memoization can optimize it.`,
+    keywords: ['function', 'fibonacci', 'recursive', 'memoization', 'cache'],
+    weight: 1.5,
+  },
+];
+const BENCHMARK_TASKS = [
+  {
+    id: 't01',
+    category: 'coding-task',
+    title: 'API Fetch with Error Handling',
+    description: 'Write a JavaScript function that fetches JSON data from a URL, handles network errors, HTTP errors, and invalid JSON responses gracefully.',
+    evaluationCriteria: ['error handling', 'try/catch', 'async/await', 'fetch', 'response.ok'],
+    weight: 2,
+  },
+  {
+    id: 't02',
+    category: 'coding-task',
+    title: 'Event Emitter Class',
+    description: 'Create a simple EventEmitter class in JavaScript with on(), off(), and emit() methods. It should support multiple listeners for the same event and removing listeners.',
+    evaluationCriteria: ['class', 'on', 'off', 'emit', 'listeners', 'events'],
+    weight: 2,
+  },
+  {
+    id: 't03',
+    category: 'coding-task',
+    title: 'Fix This Bug',
+    description: `What's wrong with this code? Identify ALL bugs and provide a fixed version:\n\nconst users = [\n  { name: 'Alice', age: 30 },\n  { name: 'Bob', age: 25 },\n  { name: 'Charlie', age: 35 },\n];\n\nconst adultUsers = users.filter(u => u.age >= 18);\nadultUsers.forEach(u => {\n  console.log(u.Name);\n});\n\nadultUsers.sort((a, b) => a.age - b.age);\nconst totalAge = adultUsers.reduce((acc, u) => acc + u.age);\nconsole.log('Average age:', totalAge / adultUsers.length);`,
+    evaluationCriteria: ['Name', 'name', 'undefined', 'reduce', 'initial', 'initialize', 'capital N'],
+    weight: 2.5,
+  },
+];
+// ── Scoring ──────────────────────────────────────────────────────────────────
+function scoreAnswer(question, answer) {
+  if (!answer || typeof answer !== 'string') return 0;
+  const lower = answer.toLowerCase();
+  let matches = 0;
+  for (const kw of question.keywords) {
+    if (lower.includes(kw.toLowerCase())) {
+      matches++;
+    }
+  }
+  const ratio = question.keywords.length > 0 ? matches / question.keywords.length : 0;
+  // Bonus: longer, well-structured answers tend to be better
+  const words = answer.split(/\s+/).length;
+  const lengthBonus = words > 50 ? 0.1 : words > 20 ? 0.05 : 0;
+  return Math.min(1, ratio + lengthBonus);
+}
+function scoreTask(task, answer) {
+  if (!answer || typeof answer !== 'string') return 0;
+  const lower = answer.toLowerCase();
+  let matches = 0;
+  for (const criterion of task.evaluationCriteria) {
+    if (lower.includes(criterion.toLowerCase())) {
+      matches++;
+    }
+  }
+  const ratio = task.evaluationCriteria.length > 0 ? matches / task.evaluationCriteria.length : 0;
+  const words = answer.split(/\s+/).length;
+  const lengthBonus = words > 100 ? 0.1 : words > 50 ? 0.05 : 0;
+  return Math.min(1, ratio + lengthBonus);
+}
+// ── Benchmark Runner ─────────────────────────────────────────────────────────
+export class BenchmarkRunner {
+  constructor(aiManager) {
+    this.ai = aiManager;
+  }
+  /**
+   * Run all benchmark questions across specified providers.
+   * @param {string[]} providerNames - List of provider names (e.g., ['claude', 'openai', 'ollama'])
+   * @param {object} options
+   * @param {boolean} options.tasks - Whether to include coding tasks (default: true)
+   * @param {boolean} options.questions - Whether to include fixed questions (default: true)
+   */
+  async run(providerNames, options = {}) {
+    const { questions = true, tasks = true } = options;
+    await this.ai.init();
+    // Filter to only ready providers
+    const providers = providerNames
+      .map(name => ({ name, provider: this.ai.providers[name] }))
+      .filter(({ provider }) => provider && provider.ready);
+    if (providers.length === 0) {
+      return { error: 'No ready providers found. Configure providers in winter.json first.' };
+    }
+    const results = {};
+    const startTime = Date.now();
+    for (const { name, provider } of providers) {
+      console.log(`${colors.dim}Benchmarking ${colors.bright}${name}${colors.reset}${colors.dim}...${colors.reset}`);
+      const providerResults = [];
+      let totalScore = 0;
+      let maxScore = 0;
+      // Fixed questions
+      if (questions) {
+        for (const q of BENCHMARK_QUESTIONS) {
+          const qStart = Date.now();
+          const answer = await this.askProvider(provider, q.question);
+          const elapsed = Date.now() - qStart;
+          const score = scoreAnswer(q, answer);
+          providerResults.push({
+            type: 'question',
+            id: q.id,
+            category: q.category,
+            question: q.question,
+            answer: answer.slice(0, 500), // truncate for display
+            score,
+            weightedScore: score * q.weight,
+            maxWeightedScore: q.weight,
+            elapsed,
+          });
+          totalScore += score * q.weight;
+          maxScore += q.weight;
+        }
+      }
+      // Coding tasks
+      if (tasks) {
+        for (const t of BENCHMARK_TASKS) {
+          const tStart = Date.now();
+          const answer = await this.askProvider(provider, t.description);
+          const elapsed = Date.now() - tStart;
+          const score = scoreTask(t, answer);
+          providerResults.push({
+            type: 'task',
+            id: t.id,
+            category: t.category,
+            title: t.title,
+            question: t.description,
+            answer: answer.slice(0, 500),
+            score,
+            weightedScore: score * t.weight,
+            maxWeightedScore: t.weight,
+            elapsed,
+          });
+          totalScore += score * t.weight;
+          maxScore += t.weight;
+        }
+      }
+      const overall = maxScore > 0 ? Math.round((totalScore / maxScore) * 100) : 0;
+      results[name] = {
+        provider: name,
+        model: provider.model,
+        results: providerResults,
+        totalScore,
+        maxScore,
+        overall,
+        elapsed: Date.now() - startTime,
+      };
+    }
+    return {
+      timestamp: new Date().toISOString(),
+      totalElapsed: Date.now() - startTime,
+      providers: results,
+      // Sort providers by overall score descending
+      ranking: Object.values(results)
+        .sort((a, b) => b.overall - a.overall)
+        .map(r => ({ name: r.provider, model: r.model, score: r.overall, elapsed: r.elapsed })),
+    };
+  }
+  async askProvider(provider, prompt) {
+    try {
+      const messages = [
+        { role: 'system', content: 'You are a helpful AI assistant. Answer concisely and accurately.' },
+        { role: 'user', content: prompt },
+      ];
+      const data = await this.ai.sendRequestToProvider(provider, messages, {
+        enableTools: false,
+        model: provider.model,
+      });
+      return data.choices?.[0]?.message?.content || '';
+    } catch (err) {
+      return `[ERROR: ${err.message}]`;
+    }
+  }
+  // ── Format Results ────────────────────────────────────────────────────────
+  formatResults(benchmarkResult) {
+    if (benchmarkResult.error) {
+      return `\n${colors.red}${benchmarkResult.error}${colors.reset}\n`;
+    }
+    const lines = [];
+    lines.push(`\n${colors.cyan}${'═'.repeat(60)}${colors.reset}`);
+    lines.push(`${colors.bright}${colors.cyan}   🧠 WINTER MODEL BENCHMARK${colors.reset}`);
+    lines.push(`${colors.cyan}${'═'.repeat(60)}${colors.reset}`);
+    lines.push(`  ${colors.dim}${benchmarkResult.timestamp}${colors.reset}`);
+    lines.push(`  ${colors.dim}Total time: ${(benchmarkResult.totalElapsed / 1000).toFixed(1)}s${colors.reset}`);
+    lines.push('');
+    // Ranking
+    lines.push(`${colors.bright}🏆 RANKING${colors.reset}`);
+    lines.push(`${'─'.repeat(40)}`);
+    benchmarkResult.ranking.forEach((r, i) => {
+      const medal = i === 0 ? '🥇' : i === 1 ? '🥈' : i === 2 ? '🥉' : ` ${i + 1}.`;
+      const bar = this._scoreBar(r.score, 20);
+      lines.push(`  ${medal} ${colors.bright}${r.name}${colors.reset} ${bar} ${r.score}%`);
+      lines.push(`     ${colors.dim}Model: ${r.model} | Time: ${(r.elapsed / 1000).toFixed(1)}s${colors.reset}`);
+    });
+    lines.push('');
+    // Detail per provider
+    for (const [name, data] of Object.entries(benchmarkResult.providers)) {
+      lines.push(`${colors.bright}${'─'.repeat(50)}${colors.reset}`);
+      lines.push(`${colors.bright}📊 ${name}${colors.reset} ${colors.dim}(${data.model})${colors.reset}`);
+      lines.push(`${'─'.repeat(50)}`);
+      const categories = {};
+      for (const r of data.results) {
+        const cat = r.category || 'other';
+        if (!categories[cat]) categories[cat] = { count: 0, totalScore: 0, maxScore: 0 };
+        categories[cat].count++;
+        categories[cat].totalScore += r.score;
+        categories[cat].maxScore += 1;
+      }
+      for (const [cat, stats] of Object.entries(categories)) {
+        const catPct = Math.round((stats.totalScore / stats.maxScore) * 100);
+        const bar = this._scoreBar(catPct, 10);
+        lines.push(`  ${bar} ${colors.dim}${cat}:${colors.reset} ${catPct}% (${stats.count} items)`);
+      }
+      lines.push('');
+      // Per-item breakdown
+      for (const r of data.results) {
+        const icon = r.score >= 0.8 ? '✅' : r.score >= 0.5 ? '🟡' : r.score >= 0.2 ? '🟠' : '❌';
+        const label = r.type === 'question' ? r.id : r.title;
+        lines.push(`  ${icon} ${colors.dim}${label}:${colors.reset} ${Math.round(r.score * 100)}% (${(r.elapsed / 1000).toFixed(1)}s)`);
+        // Show preview of answer
+        const preview = r.answer.replace(/\n/g, ' ').slice(0, 120);
+        lines.push(`    ${colors.dim}${preview}${r.answer.length > 120 ? '...' : ''}${colors.reset}`);
+      }
+      lines.push('');
+    }
+    lines.push(`${colors.cyan}${'═'.repeat(60)}${colors.reset}\n`);
+    return lines.join('\n');
+  }
+  _scoreBar(score, width = 20) {
+    const filled = Math.round((score / 100) * width);
+    const empty = width - filled;
+    const filledChar = '█';
+    const emptyChar = '░';
+    return colors.green + filledChar.repeat(filled) + colors.dim + emptyChar.repeat(empty) + colors.reset;
+  }
+  // ── History ───────────────────────────────────────────────────────────────
+  formatHistorySummary(benchmarkResult) {
+    return benchmarkResult.ranking
+      .map(r => `[${r.name}] Score: ${r.score}% | Model: ${r.model} | Time: ${(r.elapsed / 1000).toFixed(1)}s`)
+      .join('\n');
+  }
+}
+// Export question/task banks for testing
+export { BENCHMARK_QUESTIONS, BENCHMARK_TASKS, scoreAnswer, scoreTask };

package/src/ai/prompts/system-prompt.js CHANGED Viewed

@@ -1,7 +1,7 @@
 /**
  * Dynamic System Prompt Builder
  * Builds context-aware system prompts based on task, role, and session state.
- * Small models get aggressive structural guidance to compensate for limited capability.
+ * Small models get compact structural guidance so the task stays in focus.
  */
 import { isSmallModel, getModelCapabilityLabel } from '../model-capabilities.js';
@@ -40,98 +40,93 @@ function buildEnvironmentSummary() {
   ].join('\n');
 }
-/**
- * Build a "boosted" system prompt for small/tiny models.
- * Small models need: more explicit structure, strict formats, explicit step-by-step forcing.
- */
-function buildSmallModelSystemPrompt({
-  role = 'coding',
-  context,
-  tools = [],
-  session,
-  environment,
-  design,
-  resourceContext,
-  modelTier,
-} = {}) {
-  const parts = [
-    `You are Winter, an expert AI coding assistant. You are running on a ${getModelCapabilityLabel(modelTier)}.`,
-    '',
-    '## CRITICAL: YOU MUST THINK STEP BY STEP',
-    '',
-    'Because you are a smaller model, you MUST use structured thinking to produce quality results.',
-    'Before any response, use <thinking> tags to reason through the problem.',
-    '',
-    'Your thinking must cover:',
-    '1. What does the user want? (restate briefly)',
-    '2. What files/tools do I need to use?',
-    '3. What is the best approach?',
-    '4. What could go wrong? Edge cases?',
-    '5. Is my solution complete and correct?',
-    '',
-    'After thinking, THEN act. Never skip the thinking step.',
-    '',
-    '## Core Principles',
-    ...BASE_PRINCIPLES.map((p, i) => `${i + 1}. ${p}`),
-    '',
-    '## Runtime Environment',
-    environment || buildEnvironmentSummary(),
-    '',
-  ];
+function formatToolList(tools = []) {
+  return tools.length > 0 ? tools.slice(0, 10).join(', ') : '';
+}
-  if (tools.length > 0) {
-    parts.push('## Available Tools', tools.join(', '), '');
-  }
+function appendSharedContext(parts, { environment, session, design, resourceContext, context, includeResources = false } = {}) {
+  parts.push('## Runtime Environment', environment || buildEnvironmentSummary(), '');
   if (session?.memory?.length) {
     parts.push('## Session Memory');
-    session.memory.forEach(m => parts.push(`  - ${m.substring(0, 120)}`));
+    session.memory.slice(-5).forEach(m => parts.push('- ' + String(m).slice(0, 100)));
     parts.push('');
   }
   if (session?.plans?.length) {
     parts.push('## Active Plans');
-    session.plans.forEach(p => parts.push(`  - ${p.title || p.substring(0, 80)}`));
+    session.plans.slice(-3).forEach(p => parts.push('- ' + (p.title || String(p).slice(0, 80))));
     parts.push('');
   }
   if (design) {
-    parts.push('## Design Guidelines');
+    parts.push('## Design Context');
     if (design.brand) {
-      parts.push(`Brand: ${design.brand}`);
-      parts.push('');
-      const lines = design.content.split('\n').filter(Boolean);
-      const preview = lines.slice(0, 40).join('\n');
-      parts.push(preview);
-      if (lines.length > 40) parts.push('... (design file truncated)');
+      parts.push('Brand: ' + design.brand);
+      parts.push(design.content.split('\n').filter(Boolean).slice(0, 18).join('\n'));
     } else if (design.type === 'design_hint') {
-      parts.push('Design-related task detected. Consider applying one of the available design systems.');
-      parts.push(`Available: ${design.brands.join(', ')}`);
+      parts.push('Available design systems: ' + design.brands.slice(0, 5).join(', '));
     }
     parts.push('');
   }
-  if (resourceContext) {
-    parts.push(resourceContext);
+  if (includeResources && resourceContext) {
+    parts.push(resourceContext.trim().slice(0, 1200), '');
   }
-  parts.push(
-    '## Execution Rules (STRICT)',
-    '- EXECUTE FIRST. Read files, then edit. Do NOT describe what you will do — just do it.',
-    '- Keep explanations under 2 sentences. Say what you changed, not what you could do.',
-    '- After using tools, give only a one-line summary of what was done.',
-    '- Answer questions directly — no disclaimers or warnings.',
-    '- If a request is unsafe, refuse briefly and stop.',
+  if (context && typeof context === 'object') {
+    parts.push('Task: ' + (context.category || 'coding') + ' / ' + (context.type || 'simple'), '');
+  }
+}
+function buildCompactSmallModelPrompt(options = {}) {
+  const { tools = [], modelTier } = options;
+  const parts = [
+    'You are Winter, an AI coding assistant running on a ' + getModelCapabilityLabel(modelTier) + '.',
     '',
-    '## Thinking Format (MANDATORY)',
-    '<thinking>',
-    'Step-by-step reasoning here...',
-    '</thinking>',
-    '[Your action/answer here]',
+    '## Operating Rules',
+    '1. Understand the user request first. If project state matters, inspect files before answering.',
+    '2. Keep context tight. Use only relevant tools and avoid long explanations.',
+    '3. For coding: Read/Grep/Glob -> Edit/Write -> Bash/test. Do not guess file paths.',
+    '4. Final answer in Vietnamese. Mention changed files and verification only.',
+    '',
+  ];
+  const toolList = formatToolList(tools);
+  if (toolList) parts.push('## Tools', toolList, '');
+  appendSharedContext(parts, { ...options, includeResources: false });
+  parts.push(
+    '## Response Shape',
+    '- If action is needed, use tools instead of describing the action.',
+    '- Keep final output short and concrete.',
   );
-  return parts.join('\n');
+  return parts.filter(Boolean).join('\n');
 }
+function buildStandardSystemPrompt(options = {}) {
+  const { role = 'coding', tools = [], resourceContext } = options;
+  const parts = [
+    'You are Winter, an expert AI coding assistant.',
+    '',
+    '## Core Principles',
+    ...BASE_PRINCIPLES.map((p, i) => (i + 1) + '. ' + p),
+    '',
+    '## Tool Usage',
+    'Use tools when they materially improve correctness. Inspect before editing. Verify after changes.',
+    'Never invent file paths, APIs, command output, or test results.',
+    '',
+  ];
+  const toolList = formatToolList(tools);
+  if (toolList) parts.push('## Tools', toolList, '');
+  appendSharedContext(parts, { ...options, includeResources: Boolean(resourceContext) && (role === 'design' || role === 'ui') });
+  parts.push('Always respond in Vietnamese.');
+  return parts.filter(Boolean).join('\n');
+}
 export function buildSystemPrompt({
   role = 'coding',
   context,
@@ -142,17 +137,10 @@ export function buildSystemPrompt({
   resourceContext,
   modelTier,
 } = {}) {
-  // ALL models get the deep-thinking system prompt for maximum code quality
-  return buildSmallModelSystemPrompt({
-    role,
-    context,
-    tools,
-    session,
-    environment,
-    design,
-    resourceContext,
-    modelTier,
-  });
+  const options = { role, context, tools, session, environment, design, resourceContext, modelTier };
+  return isSmallModel(modelTier)
+    ? buildCompactSmallModelPrompt(options)
+    : buildStandardSystemPrompt(options);
 }
 export function buildFastSystemPrompt({
@@ -164,7 +152,7 @@ export function buildFastSystemPrompt({
     return [
       'Winter (fast mode - small model). Be concise. Use tools when needed.',
       tools.length > 0 ? `Tools: ${tools.join(', ')}` : '',
-      'THINK inside <thinking> before acting. Keep responses to 1 sentence.',
+      'Use a brief private plan, then answer in 1 sentence.',
     ].filter(Boolean).join('\n');
   }
@@ -182,11 +170,12 @@ export function buildAgentSystemPrompt(role, { tools = [], modelTier } = {}) {
     debug: 'You are a debug specialist. Use systematic elimination to find root causes.',
     research: 'You search codebases and documentation to answer questions comprehensively.',
     browser: 'You interact with web pages via browser automation. Report findings clearly.',
+    coding: 'You solve coding tasks directly. Inspect files, edit surgically, and verify.',
   };
   const base = roleConfigs[role] || roleConfigs.coding;
   const smallNote = modelTier && isSmallModel(modelTier)
-    ? '\n\nYou are running on a small model. Use <thinking> tags and reason step by step before each action.'
+    ? '\n\nYou are running on a small model. Keep context tight, use tools early, and keep final output short.'
     : '';
   return [

package/src/ai/providers.js CHANGED Viewed

@@ -103,16 +103,17 @@ export class AIProviderManager {
       if (available) this.activeProvider = available;
     }
-    // Auto-detect model capability tier
-    const providerConfig = this.providers[this.activeProvider] || {};
-    this._modelTier = classifyModelTier(providerConfig.model, this.activeProvider);
-    // Eager-load local resources (design systems, agent instructions) for contextual injection
-    this._loadResourceContext(); // fire-and-forget
+    this.updateActiveModelTier();
     this.initialized = true;
   }
+  updateActiveModelTier() {
+    const providerConfig = this.providers[this.activeProvider] || {};
+    this._modelTier = classifyModelTier(providerConfig.model, this.activeProvider);
+    return this._modelTier;
+  }
   async loadAuthToken() {
     // 1) Honor explicit environment variables (highest priority)
     const envToken = process.env.CLAUDE_AUTH_TOKEN || process.env.ANTHROPIC_API_KEY || process.env.CLAUDE_TOKEN || null;
@@ -172,6 +173,7 @@ export class AIProviderManager {
     const providerName = this.normalizeProviderName(name);
     if (this.providers[providerName]) {
       this.activeProvider = providerName;
+      this.updateActiveModelTier();
       return true;
     }
     return false;
@@ -594,7 +596,9 @@ export class AIProviderManager {
   getSystemPrompt(options = {}) {
     const taskInfo = options.task ? classifyTask(options.task) : null;
-    const tools = this.tools ? Object.keys(this.tools) : [];
+    const tools = Array.isArray(this.tools)
+      ? this.tools.map(tool => tool?.function?.name || tool?.name).filter(Boolean)
+      : [];
     const sessionInfo = {
       memory: options.memory || [],
       plans: options.plans || [],
@@ -633,8 +637,7 @@ export class AIProviderManager {
       ? '\n\n' + SuccessCriteria.fromRequest(options.task).buildPrompt()
       : '';
-    // Use cached resource context (eager-loaded in init())
-    const resourceContext = this._cachedResourceContext || '';
+    const resourceContext = options.includeResources ? (this._cachedResourceContext || '') : '';
     // Auto-detect relevant design guide for UI/design tasks
     let designGuide = null;