winter-super-cli 2026.5.28 → 2026.5.30

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/bin/winter.js CHANGED
@@ -17,7 +17,7 @@ const pkg = JSON.parse(readFileSync(new URL('../package.json', import.meta.url),
17
17
  const version = pkg.version;
18
18
 
19
19
  const COMMANDS = new Set([
20
- 'chat', 'call', 'session', 'skill', 'plugin', 'design', 'config', 'init',
20
+ 'chat', 'call', 'benchmark', 'session', 'skill', 'plugin', 'design', 'config', 'init',
21
21
  'help', 'project', 'code', 'review', 'mcp', 'permissions',
22
22
  'provider', 'providers', 'model', 'models',
23
23
  ]);
@@ -53,6 +53,7 @@ Usage:
53
53
  Commands:
54
54
 
55
55
  winter call <prompt> Call all configured providers
56
+ winter benchmark [providers] Benchmark model intelligence
56
57
  winter session <action> Session management
57
58
  winter skill <action> Skill management
58
59
  winter plugin <action> Plugin management
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "winter-super-cli",
3
- "version": "2026.5.28",
3
+ "version": "2026.5.30",
4
4
  "description": "❄️ AI-Powered Development CLI with Interactive REPL",
5
5
  "type": "module",
6
6
  "main": "bin/winter.js",
@@ -0,0 +1,352 @@
1
+ /**
2
+ * Benchmark Engine — Đo độ thông minh của models trong Winter CLI
3
+ *
4
+ * Cố định câu hỏi test (logic, coding, math, reasoning, language)
5
+ * + Coding task thật → chạy qua providers → chấm điểm → so sánh
6
+ */
7
+
8
+ import { colors } from '../cli/snowflake-logo.js';
9
+
10
+ // ── Question Bank ────────────────────────────────────────────────────────────
11
+
12
+ const BENCHMARK_QUESTIONS = [
13
+ {
14
+ id: 'q01',
15
+ category: 'logic',
16
+ question: `If all cats are mammals and some mammals are dogs, are all cats dogs? Explain your reasoning step by step.`,
17
+ keywords: ['not', 'no', 'incorrect', 'cannot conclude', 'not necessarily', 'invalid'],
18
+ weight: 1,
19
+ },
20
+ {
21
+ id: 'q02',
22
+ category: 'coding',
23
+ question: `Write a JavaScript function called isPalindrome that checks if a string is a palindrome (reads the same forwards and backwards). Include example usage.`,
24
+ keywords: ['function', 'palindrome', 'reverse', 'split', 'return'],
25
+ weight: 1.5,
26
+ },
27
+ {
28
+ id: 'q03',
29
+ category: 'math',
30
+ question: `What is 15% of 200? Show your calculation.`,
31
+ keywords: ['30', '15', '200', '0.15'],
32
+ weight: 0.5,
33
+ },
34
+ {
35
+ id: 'q04',
36
+ category: 'reasoning',
37
+ question: `A bat and a ball cost $1.10 in total. The bat costs $1.00 more than the ball. How much does the ball cost? Think carefully.`,
38
+ keywords: ['0.05', '5 cents', '5 cent', '0.05$', '5¢', 'five cents'],
39
+ weight: 1.5,
40
+ },
41
+ {
42
+ id: 'q05',
43
+ category: 'coding',
44
+ question: `What's wrong with this code and how would you fix it?\n\nfunction add(a, b) {\n return a + b;\n}\nconsole.log(add(5, '3'));`,
45
+ keywords: ['string', 'type', 'concatenation', 'number', 'parse', 'typeof', 'coercion'],
46
+ weight: 1,
47
+ },
48
+ {
49
+ id: 'q06',
50
+ category: 'language',
51
+ question: `Translate this sentence to Vietnamese: "Good morning, how are you today?"`,
52
+ keywords: ['chào', 'sáng', 'khỏe', 'hôm nay', 'bạn'],
53
+ weight: 0.5,
54
+ },
55
+ {
56
+ id: 'q07',
57
+ category: 'logic',
58
+ question: `You have a 3-gallon jug and a 5-gallon jug. How can you measure exactly 4 gallons of water? Explain step by step.`,
59
+ keywords: ['fill', 'pour', '3', '5', '4', 'empty'],
60
+ weight: 1.5,
61
+ },
62
+ {
63
+ id: 'q08',
64
+ category: 'coding',
65
+ question: `Write a recursive function to calculate the nth Fibonacci number. Explain how memoization can optimize it.`,
66
+ keywords: ['function', 'fibonacci', 'recursive', 'memoization', 'cache'],
67
+ weight: 1.5,
68
+ },
69
+ ];
70
+
71
+ const BENCHMARK_TASKS = [
72
+ {
73
+ id: 't01',
74
+ category: 'coding-task',
75
+ title: 'API Fetch with Error Handling',
76
+ description: 'Write a JavaScript function that fetches JSON data from a URL, handles network errors, HTTP errors, and invalid JSON responses gracefully.',
77
+ evaluationCriteria: ['error handling', 'try/catch', 'async/await', 'fetch', 'response.ok'],
78
+ weight: 2,
79
+ },
80
+ {
81
+ id: 't02',
82
+ category: 'coding-task',
83
+ title: 'Event Emitter Class',
84
+ description: 'Create a simple EventEmitter class in JavaScript with on(), off(), and emit() methods. It should support multiple listeners for the same event and removing listeners.',
85
+ evaluationCriteria: ['class', 'on', 'off', 'emit', 'listeners', 'events'],
86
+ weight: 2,
87
+ },
88
+ {
89
+ id: 't03',
90
+ category: 'coding-task',
91
+ title: 'Fix This Bug',
92
+ description: `What's wrong with this code? Identify ALL bugs and provide a fixed version:\n\nconst users = [\n { name: 'Alice', age: 30 },\n { name: 'Bob', age: 25 },\n { name: 'Charlie', age: 35 },\n];\n\nconst adultUsers = users.filter(u => u.age >= 18);\nadultUsers.forEach(u => {\n console.log(u.Name);\n});\n\nadultUsers.sort((a, b) => a.age - b.age);\nconst totalAge = adultUsers.reduce((acc, u) => acc + u.age);\nconsole.log('Average age:', totalAge / adultUsers.length);`,
93
+ evaluationCriteria: ['Name', 'name', 'undefined', 'reduce', 'initial', 'initialize', 'capital N'],
94
+ weight: 2.5,
95
+ },
96
+ ];
97
+
98
+ // ── Scoring ──────────────────────────────────────────────────────────────────
99
+
100
+ function scoreAnswer(question, answer) {
101
+ if (!answer || typeof answer !== 'string') return 0;
102
+
103
+ const lower = answer.toLowerCase();
104
+ let matches = 0;
105
+
106
+ for (const kw of question.keywords) {
107
+ if (lower.includes(kw.toLowerCase())) {
108
+ matches++;
109
+ }
110
+ }
111
+
112
+ const ratio = question.keywords.length > 0 ? matches / question.keywords.length : 0;
113
+
114
+ // Bonus: longer, well-structured answers tend to be better
115
+ const words = answer.split(/\s+/).length;
116
+ const lengthBonus = words > 50 ? 0.1 : words > 20 ? 0.05 : 0;
117
+
118
+ return Math.min(1, ratio + lengthBonus);
119
+ }
120
+
121
+ function scoreTask(task, answer) {
122
+ if (!answer || typeof answer !== 'string') return 0;
123
+
124
+ const lower = answer.toLowerCase();
125
+ let matches = 0;
126
+
127
+ for (const criterion of task.evaluationCriteria) {
128
+ if (lower.includes(criterion.toLowerCase())) {
129
+ matches++;
130
+ }
131
+ }
132
+
133
+ const ratio = task.evaluationCriteria.length > 0 ? matches / task.evaluationCriteria.length : 0;
134
+ const words = answer.split(/\s+/).length;
135
+ const lengthBonus = words > 100 ? 0.1 : words > 50 ? 0.05 : 0;
136
+
137
+ return Math.min(1, ratio + lengthBonus);
138
+ }
139
+
140
+ // ── Benchmark Runner ─────────────────────────────────────────────────────────
141
+
142
+ export class BenchmarkRunner {
143
+ constructor(aiManager) {
144
+ this.ai = aiManager;
145
+ }
146
+
147
+ /**
148
+ * Run all benchmark questions across specified providers.
149
+ * @param {string[]} providerNames - List of provider names (e.g., ['claude', 'openai', 'ollama'])
150
+ * @param {object} options
151
+ * @param {boolean} options.tasks - Whether to include coding tasks (default: true)
152
+ * @param {boolean} options.questions - Whether to include fixed questions (default: true)
153
+ */
154
+ async run(providerNames, options = {}) {
155
+ const { questions = true, tasks = true } = options;
156
+
157
+ await this.ai.init();
158
+
159
+ // Filter to only ready providers
160
+ const providers = providerNames
161
+ .map(name => ({ name, provider: this.ai.providers[name] }))
162
+ .filter(({ provider }) => provider && provider.ready);
163
+
164
+ if (providers.length === 0) {
165
+ return { error: 'No ready providers found. Configure providers in winter.json first.' };
166
+ }
167
+
168
+ const results = {};
169
+ const startTime = Date.now();
170
+
171
+ for (const { name, provider } of providers) {
172
+ console.log(`${colors.dim}Benchmarking ${colors.bright}${name}${colors.reset}${colors.dim}...${colors.reset}`);
173
+
174
+ const providerResults = [];
175
+ let totalScore = 0;
176
+ let maxScore = 0;
177
+
178
+ // Fixed questions
179
+ if (questions) {
180
+ for (const q of BENCHMARK_QUESTIONS) {
181
+ const qStart = Date.now();
182
+ const answer = await this.askProvider(provider, q.question);
183
+ const elapsed = Date.now() - qStart;
184
+ const score = scoreAnswer(q, answer);
185
+
186
+ providerResults.push({
187
+ type: 'question',
188
+ id: q.id,
189
+ category: q.category,
190
+ question: q.question,
191
+ answer: answer.slice(0, 500), // truncate for display
192
+ score,
193
+ weightedScore: score * q.weight,
194
+ maxWeightedScore: q.weight,
195
+ elapsed,
196
+ });
197
+
198
+ totalScore += score * q.weight;
199
+ maxScore += q.weight;
200
+ }
201
+ }
202
+
203
+ // Coding tasks
204
+ if (tasks) {
205
+ for (const t of BENCHMARK_TASKS) {
206
+ const tStart = Date.now();
207
+ const answer = await this.askProvider(provider, t.description);
208
+ const elapsed = Date.now() - tStart;
209
+ const score = scoreTask(t, answer);
210
+
211
+ providerResults.push({
212
+ type: 'task',
213
+ id: t.id,
214
+ category: t.category,
215
+ title: t.title,
216
+ question: t.description,
217
+ answer: answer.slice(0, 500),
218
+ score,
219
+ weightedScore: score * t.weight,
220
+ maxWeightedScore: t.weight,
221
+ elapsed,
222
+ });
223
+
224
+ totalScore += score * t.weight;
225
+ maxScore += t.weight;
226
+ }
227
+ }
228
+
229
+ const overall = maxScore > 0 ? Math.round((totalScore / maxScore) * 100) : 0;
230
+
231
+ results[name] = {
232
+ provider: name,
233
+ model: provider.model,
234
+ results: providerResults,
235
+ totalScore,
236
+ maxScore,
237
+ overall,
238
+ elapsed: Date.now() - startTime,
239
+ };
240
+ }
241
+
242
+ return {
243
+ timestamp: new Date().toISOString(),
244
+ totalElapsed: Date.now() - startTime,
245
+ providers: results,
246
+ // Sort providers by overall score descending
247
+ ranking: Object.values(results)
248
+ .sort((a, b) => b.overall - a.overall)
249
+ .map(r => ({ name: r.provider, model: r.model, score: r.overall, elapsed: r.elapsed })),
250
+ };
251
+ }
252
+
253
+ async askProvider(provider, prompt) {
254
+ try {
255
+ const messages = [
256
+ { role: 'system', content: 'You are a helpful AI assistant. Answer concisely and accurately.' },
257
+ { role: 'user', content: prompt },
258
+ ];
259
+ const data = await this.ai.sendRequestToProvider(provider, messages, {
260
+ enableTools: false,
261
+ model: provider.model,
262
+ });
263
+ return data.choices?.[0]?.message?.content || '';
264
+ } catch (err) {
265
+ return `[ERROR: ${err.message}]`;
266
+ }
267
+ }
268
+
269
+ // ── Format Results ────────────────────────────────────────────────────────
270
+
271
+ formatResults(benchmarkResult) {
272
+ if (benchmarkResult.error) {
273
+ return `\n${colors.red}${benchmarkResult.error}${colors.reset}\n`;
274
+ }
275
+
276
+ const lines = [];
277
+ lines.push(`\n${colors.cyan}${'═'.repeat(60)}${colors.reset}`);
278
+ lines.push(`${colors.bright}${colors.cyan} 🧠 WINTER MODEL BENCHMARK${colors.reset}`);
279
+ lines.push(`${colors.cyan}${'═'.repeat(60)}${colors.reset}`);
280
+ lines.push(` ${colors.dim}${benchmarkResult.timestamp}${colors.reset}`);
281
+ lines.push(` ${colors.dim}Total time: ${(benchmarkResult.totalElapsed / 1000).toFixed(1)}s${colors.reset}`);
282
+ lines.push('');
283
+
284
+ // Ranking
285
+ lines.push(`${colors.bright}🏆 RANKING${colors.reset}`);
286
+ lines.push(`${'─'.repeat(40)}`);
287
+ benchmarkResult.ranking.forEach((r, i) => {
288
+ const medal = i === 0 ? '🥇' : i === 1 ? '🥈' : i === 2 ? '🥉' : ` ${i + 1}.`;
289
+ const bar = this._scoreBar(r.score, 20);
290
+ lines.push(` ${medal} ${colors.bright}${r.name}${colors.reset} ${bar} ${r.score}%`);
291
+ lines.push(` ${colors.dim}Model: ${r.model} | Time: ${(r.elapsed / 1000).toFixed(1)}s${colors.reset}`);
292
+ });
293
+ lines.push('');
294
+
295
+ // Detail per provider
296
+ for (const [name, data] of Object.entries(benchmarkResult.providers)) {
297
+ lines.push(`${colors.bright}${'─'.repeat(50)}${colors.reset}`);
298
+ lines.push(`${colors.bright}📊 ${name}${colors.reset} ${colors.dim}(${data.model})${colors.reset}`);
299
+ lines.push(`${'─'.repeat(50)}`);
300
+
301
+ const categories = {};
302
+ for (const r of data.results) {
303
+ const cat = r.category || 'other';
304
+ if (!categories[cat]) categories[cat] = { count: 0, totalScore: 0, maxScore: 0 };
305
+ categories[cat].count++;
306
+ categories[cat].totalScore += r.score;
307
+ categories[cat].maxScore += 1;
308
+ }
309
+
310
+ for (const [cat, stats] of Object.entries(categories)) {
311
+ const catPct = Math.round((stats.totalScore / stats.maxScore) * 100);
312
+ const bar = this._scoreBar(catPct, 10);
313
+ lines.push(` ${bar} ${colors.dim}${cat}:${colors.reset} ${catPct}% (${stats.count} items)`);
314
+ }
315
+ lines.push('');
316
+
317
+ // Per-item breakdown
318
+ for (const r of data.results) {
319
+ const icon = r.score >= 0.8 ? '✅' : r.score >= 0.5 ? '🟡' : r.score >= 0.2 ? '🟠' : '❌';
320
+ const label = r.type === 'question' ? r.id : r.title;
321
+ lines.push(` ${icon} ${colors.dim}${label}:${colors.reset} ${Math.round(r.score * 100)}% (${(r.elapsed / 1000).toFixed(1)}s)`);
322
+ // Show preview of answer
323
+ const preview = r.answer.replace(/\n/g, ' ').slice(0, 120);
324
+ lines.push(` ${colors.dim}${preview}${r.answer.length > 120 ? '...' : ''}${colors.reset}`);
325
+ }
326
+ lines.push('');
327
+ }
328
+
329
+ lines.push(`${colors.cyan}${'═'.repeat(60)}${colors.reset}\n`);
330
+
331
+ return lines.join('\n');
332
+ }
333
+
334
+ _scoreBar(score, width = 20) {
335
+ const filled = Math.round((score / 100) * width);
336
+ const empty = width - filled;
337
+ const filledChar = '█';
338
+ const emptyChar = '░';
339
+ return colors.green + filledChar.repeat(filled) + colors.dim + emptyChar.repeat(empty) + colors.reset;
340
+ }
341
+
342
+ // ── History ───────────────────────────────────────────────────────────────
343
+
344
+ formatHistorySummary(benchmarkResult) {
345
+ return benchmarkResult.ranking
346
+ .map(r => `[${r.name}] Score: ${r.score}% | Model: ${r.model} | Time: ${(r.elapsed / 1000).toFixed(1)}s`)
347
+ .join('\n');
348
+ }
349
+ }
350
+
351
+ // Export question/task banks for testing
352
+ export { BENCHMARK_QUESTIONS, BENCHMARK_TASKS, scoreAnswer, scoreTask };
@@ -1,7 +1,7 @@
1
1
  /**
2
2
  * Dynamic System Prompt Builder
3
3
  * Builds context-aware system prompts based on task, role, and session state.
4
- * Small models get aggressive structural guidance to compensate for limited capability.
4
+ * Small models get compact structural guidance so the task stays in focus.
5
5
  */
6
6
 
7
7
  import { isSmallModel, getModelCapabilityLabel } from '../model-capabilities.js';
@@ -40,98 +40,93 @@ function buildEnvironmentSummary() {
40
40
  ].join('\n');
41
41
  }
42
42
 
43
- /**
44
- * Build a "boosted" system prompt for small/tiny models.
45
- * Small models need: more explicit structure, strict formats, explicit step-by-step forcing.
46
- */
47
- function buildSmallModelSystemPrompt({
48
- role = 'coding',
49
- context,
50
- tools = [],
51
- session,
52
- environment,
53
- design,
54
- resourceContext,
55
- modelTier,
56
- } = {}) {
57
- const parts = [
58
- `You are Winter, an expert AI coding assistant. You are running on a ${getModelCapabilityLabel(modelTier)}.`,
59
- '',
60
- '## CRITICAL: YOU MUST THINK STEP BY STEP',
61
- '',
62
- 'Because you are a smaller model, you MUST use structured thinking to produce quality results.',
63
- 'Before any response, use <thinking> tags to reason through the problem.',
64
- '',
65
- 'Your thinking must cover:',
66
- '1. What does the user want? (restate briefly)',
67
- '2. What files/tools do I need to use?',
68
- '3. What is the best approach?',
69
- '4. What could go wrong? Edge cases?',
70
- '5. Is my solution complete and correct?',
71
- '',
72
- 'After thinking, THEN act. Never skip the thinking step.',
73
- '',
74
- '## Core Principles',
75
- ...BASE_PRINCIPLES.map((p, i) => `${i + 1}. ${p}`),
76
- '',
77
- '## Runtime Environment',
78
- environment || buildEnvironmentSummary(),
79
- '',
80
- ];
43
+ function formatToolList(tools = []) {
44
+ return tools.length > 0 ? tools.slice(0, 10).join(', ') : '';
45
+ }
81
46
 
82
- if (tools.length > 0) {
83
- parts.push('## Available Tools', tools.join(', '), '');
84
- }
47
+ function appendSharedContext(parts, { environment, session, design, resourceContext, context, includeResources = false } = {}) {
48
+ parts.push('## Runtime Environment', environment || buildEnvironmentSummary(), '');
85
49
 
86
50
  if (session?.memory?.length) {
87
51
  parts.push('## Session Memory');
88
- session.memory.forEach(m => parts.push(` - ${m.substring(0, 120)}`));
52
+ session.memory.slice(-5).forEach(m => parts.push('- ' + String(m).slice(0, 100)));
89
53
  parts.push('');
90
54
  }
91
55
 
92
56
  if (session?.plans?.length) {
93
57
  parts.push('## Active Plans');
94
- session.plans.forEach(p => parts.push(` - ${p.title || p.substring(0, 80)}`));
58
+ session.plans.slice(-3).forEach(p => parts.push('- ' + (p.title || String(p).slice(0, 80))));
95
59
  parts.push('');
96
60
  }
97
61
 
98
62
  if (design) {
99
- parts.push('## Design Guidelines');
63
+ parts.push('## Design Context');
100
64
  if (design.brand) {
101
- parts.push(`Brand: ${design.brand}`);
102
- parts.push('');
103
- const lines = design.content.split('\n').filter(Boolean);
104
- const preview = lines.slice(0, 40).join('\n');
105
- parts.push(preview);
106
- if (lines.length > 40) parts.push('... (design file truncated)');
65
+ parts.push('Brand: ' + design.brand);
66
+ parts.push(design.content.split('\n').filter(Boolean).slice(0, 18).join('\n'));
107
67
  } else if (design.type === 'design_hint') {
108
- parts.push('Design-related task detected. Consider applying one of the available design systems.');
109
- parts.push(`Available: ${design.brands.join(', ')}`);
68
+ parts.push('Available design systems: ' + design.brands.slice(0, 5).join(', '));
110
69
  }
111
70
  parts.push('');
112
71
  }
113
72
 
114
- if (resourceContext) {
115
- parts.push(resourceContext);
73
+ if (includeResources && resourceContext) {
74
+ parts.push(resourceContext.trim().slice(0, 1200), '');
116
75
  }
117
76
 
118
- parts.push(
119
- '## Execution Rules (STRICT)',
120
- '- EXECUTE FIRST. Read files, then edit. Do NOT describe what you will do — just do it.',
121
- '- Keep explanations under 2 sentences. Say what you changed, not what you could do.',
122
- '- After using tools, give only a one-line summary of what was done.',
123
- '- Answer questions directly — no disclaimers or warnings.',
124
- '- If a request is unsafe, refuse briefly and stop.',
77
+ if (context && typeof context === 'object') {
78
+ parts.push('Task: ' + (context.category || 'coding') + ' / ' + (context.type || 'simple'), '');
79
+ }
80
+ }
81
+
82
+ function buildCompactSmallModelPrompt(options = {}) {
83
+ const { tools = [], modelTier } = options;
84
+ const parts = [
85
+ 'You are Winter, an AI coding assistant running on a ' + getModelCapabilityLabel(modelTier) + '.',
125
86
  '',
126
- '## Thinking Format (MANDATORY)',
127
- '<thinking>',
128
- 'Step-by-step reasoning here...',
129
- '</thinking>',
130
- '[Your action/answer here]',
87
+ '## Operating Rules',
88
+ '1. Understand the user request first. If project state matters, inspect files before answering.',
89
+ '2. Keep context tight. Use only relevant tools and avoid long explanations.',
90
+ '3. For coding: Read/Grep/Glob -> Edit/Write -> Bash/test. Do not guess file paths.',
91
+ '4. Final answer in Vietnamese. Mention changed files and verification only.',
92
+ '',
93
+ ];
94
+
95
+ const toolList = formatToolList(tools);
96
+ if (toolList) parts.push('## Tools', toolList, '');
97
+ appendSharedContext(parts, { ...options, includeResources: false });
98
+
99
+ parts.push(
100
+ '## Response Shape',
101
+ '- If action is needed, use tools instead of describing the action.',
102
+ '- Keep final output short and concrete.',
131
103
  );
132
104
 
133
- return parts.join('\n');
105
+ return parts.filter(Boolean).join('\n');
134
106
  }
107
+
108
+ function buildStandardSystemPrompt(options = {}) {
109
+ const { role = 'coding', tools = [], resourceContext } = options;
110
+ const parts = [
111
+ 'You are Winter, an expert AI coding assistant.',
112
+ '',
113
+ '## Core Principles',
114
+ ...BASE_PRINCIPLES.map((p, i) => (i + 1) + '. ' + p),
115
+ '',
116
+ '## Tool Usage',
117
+ 'Use tools when they materially improve correctness. Inspect before editing. Verify after changes.',
118
+ 'Never invent file paths, APIs, command output, or test results.',
119
+ '',
120
+ ];
121
+
122
+ const toolList = formatToolList(tools);
123
+ if (toolList) parts.push('## Tools', toolList, '');
124
+ appendSharedContext(parts, { ...options, includeResources: Boolean(resourceContext) && (role === 'design' || role === 'ui') });
125
+
126
+ parts.push('Always respond in Vietnamese.');
127
+ return parts.filter(Boolean).join('\n');
128
+ }
129
+
135
130
  export function buildSystemPrompt({
136
131
  role = 'coding',
137
132
  context,
@@ -142,17 +137,10 @@ export function buildSystemPrompt({
142
137
  resourceContext,
143
138
  modelTier,
144
139
  } = {}) {
145
- // ALL models get the deep-thinking system prompt for maximum code quality
146
- return buildSmallModelSystemPrompt({
147
- role,
148
- context,
149
- tools,
150
- session,
151
- environment,
152
- design,
153
- resourceContext,
154
- modelTier,
155
- });
140
+ const options = { role, context, tools, session, environment, design, resourceContext, modelTier };
141
+ return isSmallModel(modelTier)
142
+ ? buildCompactSmallModelPrompt(options)
143
+ : buildStandardSystemPrompt(options);
156
144
  }
157
145
 
158
146
  export function buildFastSystemPrompt({
@@ -164,7 +152,7 @@ export function buildFastSystemPrompt({
164
152
  return [
165
153
  'Winter (fast mode - small model). Be concise. Use tools when needed.',
166
154
  tools.length > 0 ? `Tools: ${tools.join(', ')}` : '',
167
- 'THINK inside <thinking> before acting. Keep responses to 1 sentence.',
155
+ 'Use a brief private plan, then answer in 1 sentence.',
168
156
  ].filter(Boolean).join('\n');
169
157
  }
170
158
 
@@ -182,11 +170,12 @@ export function buildAgentSystemPrompt(role, { tools = [], modelTier } = {}) {
182
170
  debug: 'You are a debug specialist. Use systematic elimination to find root causes.',
183
171
  research: 'You search codebases and documentation to answer questions comprehensively.',
184
172
  browser: 'You interact with web pages via browser automation. Report findings clearly.',
173
+ coding: 'You solve coding tasks directly. Inspect files, edit surgically, and verify.',
185
174
  };
186
175
 
187
176
  const base = roleConfigs[role] || roleConfigs.coding;
188
177
  const smallNote = modelTier && isSmallModel(modelTier)
189
- ? '\n\nYou are running on a small model. Use <thinking> tags and reason step by step before each action.'
178
+ ? '\n\nYou are running on a small model. Keep context tight, use tools early, and keep final output short.'
190
179
  : '';
191
180
 
192
181
  return [
@@ -103,16 +103,17 @@ export class AIProviderManager {
103
103
  if (available) this.activeProvider = available;
104
104
  }
105
105
 
106
- // Auto-detect model capability tier
107
- const providerConfig = this.providers[this.activeProvider] || {};
108
- this._modelTier = classifyModelTier(providerConfig.model, this.activeProvider);
109
-
110
- // Eager-load local resources (design systems, agent instructions) for contextual injection
111
- this._loadResourceContext(); // fire-and-forget
106
+ this.updateActiveModelTier();
112
107
 
113
108
  this.initialized = true;
114
109
  }
115
110
 
111
+ updateActiveModelTier() {
112
+ const providerConfig = this.providers[this.activeProvider] || {};
113
+ this._modelTier = classifyModelTier(providerConfig.model, this.activeProvider);
114
+ return this._modelTier;
115
+ }
116
+
116
117
  async loadAuthToken() {
117
118
  // 1) Honor explicit environment variables (highest priority)
118
119
  const envToken = process.env.CLAUDE_AUTH_TOKEN || process.env.ANTHROPIC_API_KEY || process.env.CLAUDE_TOKEN || null;
@@ -172,6 +173,7 @@ export class AIProviderManager {
172
173
  const providerName = this.normalizeProviderName(name);
173
174
  if (this.providers[providerName]) {
174
175
  this.activeProvider = providerName;
176
+ this.updateActiveModelTier();
175
177
  return true;
176
178
  }
177
179
  return false;
@@ -594,7 +596,9 @@ export class AIProviderManager {
594
596
 
595
597
  getSystemPrompt(options = {}) {
596
598
  const taskInfo = options.task ? classifyTask(options.task) : null;
597
- const tools = this.tools ? Object.keys(this.tools) : [];
599
+ const tools = Array.isArray(this.tools)
600
+ ? this.tools.map(tool => tool?.function?.name || tool?.name).filter(Boolean)
601
+ : [];
598
602
  const sessionInfo = {
599
603
  memory: options.memory || [],
600
604
  plans: options.plans || [],
@@ -633,8 +637,7 @@ export class AIProviderManager {
633
637
  ? '\n\n' + SuccessCriteria.fromRequest(options.task).buildPrompt()
634
638
  : '';
635
639
 
636
- // Use cached resource context (eager-loaded in init())
637
- const resourceContext = this._cachedResourceContext || '';
640
+ const resourceContext = options.includeResources ? (this._cachedResourceContext || '') : '';
638
641
 
639
642
  // Auto-detect relevant design guide for UI/design tasks
640
643
  let designGuide = null;