winter-super-cli 2026.5.27 → 2026.5.29

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/bin/winter.js CHANGED
@@ -17,7 +17,7 @@ const pkg = JSON.parse(readFileSync(new URL('../package.json', import.meta.url),
17
17
  const version = pkg.version;
18
18
 
19
19
  const COMMANDS = new Set([
20
- 'chat', 'call', 'session', 'skill', 'plugin', 'design', 'config', 'init',
20
+ 'chat', 'call', 'benchmark', 'session', 'skill', 'plugin', 'design', 'config', 'init',
21
21
  'help', 'project', 'code', 'review', 'mcp', 'permissions',
22
22
  'provider', 'providers', 'model', 'models',
23
23
  ]);
@@ -53,6 +53,7 @@ Usage:
53
53
  Commands:
54
54
 
55
55
  winter call <prompt> Call all configured providers
56
+ winter benchmark [providers] Benchmark model intelligence
56
57
  winter session <action> Session management
57
58
  winter skill <action> Skill management
58
59
  winter plugin <action> Plugin management
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "winter-super-cli",
3
- "version": "2026.5.27",
3
+ "version": "2026.5.29",
4
4
  "description": "❄️ AI-Powered Development CLI with Interactive REPL",
5
5
  "type": "module",
6
6
  "main": "bin/winter.js",
@@ -0,0 +1,352 @@
1
+ /**
2
+ * Benchmark Engine — Đo độ thông minh của models trong Winter CLI
3
+ *
4
+ * Cố định câu hỏi test (logic, coding, math, reasoning, language)
5
+ * + Coding task thật → chạy qua providers → chấm điểm → so sánh
6
+ */
7
+
8
+ import { colors } from '../cli/snowflake-logo.js';
9
+
10
+ // ── Question Bank ────────────────────────────────────────────────────────────
11
+
12
+ const BENCHMARK_QUESTIONS = [
13
+ {
14
+ id: 'q01',
15
+ category: 'logic',
16
+ question: `If all cats are mammals and some mammals are dogs, are all cats dogs? Explain your reasoning step by step.`,
17
+ keywords: ['not', 'no', 'incorrect', 'cannot conclude', 'not necessarily', 'invalid'],
18
+ weight: 1,
19
+ },
20
+ {
21
+ id: 'q02',
22
+ category: 'coding',
23
+ question: `Write a JavaScript function called isPalindrome that checks if a string is a palindrome (reads the same forwards and backwards). Include example usage.`,
24
+ keywords: ['function', 'palindrome', 'reverse', 'split', 'return'],
25
+ weight: 1.5,
26
+ },
27
+ {
28
+ id: 'q03',
29
+ category: 'math',
30
+ question: `What is 15% of 200? Show your calculation.`,
31
+ keywords: ['30', '15', '200', '0.15'],
32
+ weight: 0.5,
33
+ },
34
+ {
35
+ id: 'q04',
36
+ category: 'reasoning',
37
+ question: `A bat and a ball cost $1.10 in total. The bat costs $1.00 more than the ball. How much does the ball cost? Think carefully.`,
38
+ keywords: ['0.05', '5 cents', '5 cent', '0.05$', '5¢', 'five cents'],
39
+ weight: 1.5,
40
+ },
41
+ {
42
+ id: 'q05',
43
+ category: 'coding',
44
+ question: `What's wrong with this code and how would you fix it?\n\nfunction add(a, b) {\n return a + b;\n}\nconsole.log(add(5, '3'));`,
45
+ keywords: ['string', 'type', 'concatenation', 'number', 'parse', 'typeof', 'coercion'],
46
+ weight: 1,
47
+ },
48
+ {
49
+ id: 'q06',
50
+ category: 'language',
51
+ question: `Translate this sentence to Vietnamese: "Good morning, how are you today?"`,
52
+ keywords: ['chào', 'sáng', 'khỏe', 'hôm nay', 'bạn'],
53
+ weight: 0.5,
54
+ },
55
+ {
56
+ id: 'q07',
57
+ category: 'logic',
58
+ question: `You have a 3-gallon jug and a 5-gallon jug. How can you measure exactly 4 gallons of water? Explain step by step.`,
59
+ keywords: ['fill', 'pour', '3', '5', '4', 'empty'],
60
+ weight: 1.5,
61
+ },
62
+ {
63
+ id: 'q08',
64
+ category: 'coding',
65
+ question: `Write a recursive function to calculate the nth Fibonacci number. Explain how memoization can optimize it.`,
66
+ keywords: ['function', 'fibonacci', 'recursive', 'memoization', 'cache'],
67
+ weight: 1.5,
68
+ },
69
+ ];
70
+
71
+ const BENCHMARK_TASKS = [
72
+ {
73
+ id: 't01',
74
+ category: 'coding-task',
75
+ title: 'API Fetch with Error Handling',
76
+ description: 'Write a JavaScript function that fetches JSON data from a URL, handles network errors, HTTP errors, and invalid JSON responses gracefully.',
77
+ evaluationCriteria: ['error handling', 'try/catch', 'async/await', 'fetch', 'response.ok'],
78
+ weight: 2,
79
+ },
80
+ {
81
+ id: 't02',
82
+ category: 'coding-task',
83
+ title: 'Event Emitter Class',
84
+ description: 'Create a simple EventEmitter class in JavaScript with on(), off(), and emit() methods. It should support multiple listeners for the same event and removing listeners.',
85
+ evaluationCriteria: ['class', 'on', 'off', 'emit', 'listeners', 'events'],
86
+ weight: 2,
87
+ },
88
+ {
89
+ id: 't03',
90
+ category: 'coding-task',
91
+ title: 'Fix This Bug',
92
+ description: `What's wrong with this code? Identify ALL bugs and provide a fixed version:\n\nconst users = [\n { name: 'Alice', age: 30 },\n { name: 'Bob', age: 25 },\n { name: 'Charlie', age: 35 },\n];\n\nconst adultUsers = users.filter(u => u.age >= 18);\nadultUsers.forEach(u => {\n console.log(u.Name);\n});\n\nadultUsers.sort((a, b) => a.age - b.age);\nconst totalAge = adultUsers.reduce((acc, u) => acc + u.age);\nconsole.log('Average age:', totalAge / adultUsers.length);`,
93
+ evaluationCriteria: ['Name', 'name', 'undefined', 'reduce', 'initial', 'initialize', 'capital N'],
94
+ weight: 2.5,
95
+ },
96
+ ];
97
+
98
+ // ── Scoring ──────────────────────────────────────────────────────────────────
99
+
100
+ function scoreAnswer(question, answer) {
101
+ if (!answer || typeof answer !== 'string') return 0;
102
+
103
+ const lower = answer.toLowerCase();
104
+ let matches = 0;
105
+
106
+ for (const kw of question.keywords) {
107
+ if (lower.includes(kw.toLowerCase())) {
108
+ matches++;
109
+ }
110
+ }
111
+
112
+ const ratio = question.keywords.length > 0 ? matches / question.keywords.length : 0;
113
+
114
+ // Bonus: longer, well-structured answers tend to be better
115
+ const words = answer.split(/\s+/).length;
116
+ const lengthBonus = words > 50 ? 0.1 : words > 20 ? 0.05 : 0;
117
+
118
+ return Math.min(1, ratio + lengthBonus);
119
+ }
120
+
121
+ function scoreTask(task, answer) {
122
+ if (!answer || typeof answer !== 'string') return 0;
123
+
124
+ const lower = answer.toLowerCase();
125
+ let matches = 0;
126
+
127
+ for (const criterion of task.evaluationCriteria) {
128
+ if (lower.includes(criterion.toLowerCase())) {
129
+ matches++;
130
+ }
131
+ }
132
+
133
+ const ratio = task.evaluationCriteria.length > 0 ? matches / task.evaluationCriteria.length : 0;
134
+ const words = answer.split(/\s+/).length;
135
+ const lengthBonus = words > 100 ? 0.1 : words > 50 ? 0.05 : 0;
136
+
137
+ return Math.min(1, ratio + lengthBonus);
138
+ }
139
+
140
+ // ── Benchmark Runner ─────────────────────────────────────────────────────────
141
+
142
+ export class BenchmarkRunner {
143
+ constructor(aiManager) {
144
+ this.ai = aiManager;
145
+ }
146
+
147
+ /**
148
+ * Run all benchmark questions across specified providers.
149
+ * @param {string[]} providerNames - List of provider names (e.g., ['claude', 'openai', 'ollama'])
150
+ * @param {object} options
151
+ * @param {boolean} options.tasks - Whether to include coding tasks (default: true)
152
+ * @param {boolean} options.questions - Whether to include fixed questions (default: true)
153
+ */
154
+ async run(providerNames, options = {}) {
155
+ const { questions = true, tasks = true } = options;
156
+
157
+ await this.ai.init();
158
+
159
+ // Filter to only ready providers
160
+ const providers = providerNames
161
+ .map(name => ({ name, provider: this.ai.providers[name] }))
162
+ .filter(({ provider }) => provider && provider.ready);
163
+
164
+ if (providers.length === 0) {
165
+ return { error: 'No ready providers found. Configure providers in winter.json first.' };
166
+ }
167
+
168
+ const results = {};
169
+ const startTime = Date.now();
170
+
171
+ for (const { name, provider } of providers) {
172
+ console.log(`${colors.dim}Benchmarking ${colors.bright}${name}${colors.reset}${colors.dim}...${colors.reset}`);
173
+
174
+ const providerResults = [];
175
+ let totalScore = 0;
176
+ let maxScore = 0;
177
+
178
+ // Fixed questions
179
+ if (questions) {
180
+ for (const q of BENCHMARK_QUESTIONS) {
181
+ const qStart = Date.now();
182
+ const answer = await this.askProvider(provider, q.question);
183
+ const elapsed = Date.now() - qStart;
184
+ const score = scoreAnswer(q, answer);
185
+
186
+ providerResults.push({
187
+ type: 'question',
188
+ id: q.id,
189
+ category: q.category,
190
+ question: q.question,
191
+ answer: answer.slice(0, 500), // truncate for display
192
+ score,
193
+ weightedScore: score * q.weight,
194
+ maxWeightedScore: q.weight,
195
+ elapsed,
196
+ });
197
+
198
+ totalScore += score * q.weight;
199
+ maxScore += q.weight;
200
+ }
201
+ }
202
+
203
+ // Coding tasks
204
+ if (tasks) {
205
+ for (const t of BENCHMARK_TASKS) {
206
+ const tStart = Date.now();
207
+ const answer = await this.askProvider(provider, t.description);
208
+ const elapsed = Date.now() - tStart;
209
+ const score = scoreTask(t, answer);
210
+
211
+ providerResults.push({
212
+ type: 'task',
213
+ id: t.id,
214
+ category: t.category,
215
+ title: t.title,
216
+ question: t.description,
217
+ answer: answer.slice(0, 500),
218
+ score,
219
+ weightedScore: score * t.weight,
220
+ maxWeightedScore: t.weight,
221
+ elapsed,
222
+ });
223
+
224
+ totalScore += score * t.weight;
225
+ maxScore += t.weight;
226
+ }
227
+ }
228
+
229
+ const overall = maxScore > 0 ? Math.round((totalScore / maxScore) * 100) : 0;
230
+
231
+ results[name] = {
232
+ provider: name,
233
+ model: provider.model,
234
+ results: providerResults,
235
+ totalScore,
236
+ maxScore,
237
+ overall,
238
+ elapsed: Date.now() - startTime,
239
+ };
240
+ }
241
+
242
+ return {
243
+ timestamp: new Date().toISOString(),
244
+ totalElapsed: Date.now() - startTime,
245
+ providers: results,
246
+ // Sort providers by overall score descending
247
+ ranking: Object.values(results)
248
+ .sort((a, b) => b.overall - a.overall)
249
+ .map(r => ({ name: r.provider, model: r.model, score: r.overall, elapsed: r.elapsed })),
250
+ };
251
+ }
252
+
253
+ async askProvider(provider, prompt) {
254
+ try {
255
+ const messages = [
256
+ { role: 'system', content: 'You are a helpful AI assistant. Answer concisely and accurately.' },
257
+ { role: 'user', content: prompt },
258
+ ];
259
+ const data = await this.ai.sendRequestToProvider(provider, messages, {
260
+ enableTools: false,
261
+ model: provider.model,
262
+ });
263
+ return data.choices?.[0]?.message?.content || '';
264
+ } catch (err) {
265
+ return `[ERROR: ${err.message}]`;
266
+ }
267
+ }
268
+
269
+ // ── Format Results ────────────────────────────────────────────────────────
270
+
271
+ formatResults(benchmarkResult) {
272
+ if (benchmarkResult.error) {
273
+ return `\n${colors.red}${benchmarkResult.error}${colors.reset}\n`;
274
+ }
275
+
276
+ const lines = [];
277
+ lines.push(`\n${colors.cyan}${'═'.repeat(60)}${colors.reset}`);
278
+ lines.push(`${colors.bright}${colors.cyan} 🧠 WINTER MODEL BENCHMARK${colors.reset}`);
279
+ lines.push(`${colors.cyan}${'═'.repeat(60)}${colors.reset}`);
280
+ lines.push(` ${colors.dim}${benchmarkResult.timestamp}${colors.reset}`);
281
+ lines.push(` ${colors.dim}Total time: ${(benchmarkResult.totalElapsed / 1000).toFixed(1)}s${colors.reset}`);
282
+ lines.push('');
283
+
284
+ // Ranking
285
+ lines.push(`${colors.bright}🏆 RANKING${colors.reset}`);
286
+ lines.push(`${'─'.repeat(40)}`);
287
+ benchmarkResult.ranking.forEach((r, i) => {
288
+ const medal = i === 0 ? '🥇' : i === 1 ? '🥈' : i === 2 ? '🥉' : ` ${i + 1}.`;
289
+ const bar = this._scoreBar(r.score, 20);
290
+ lines.push(` ${medal} ${colors.bright}${r.name}${colors.reset} ${bar} ${r.score}%`);
291
+ lines.push(` ${colors.dim}Model: ${r.model} | Time: ${(r.elapsed / 1000).toFixed(1)}s${colors.reset}`);
292
+ });
293
+ lines.push('');
294
+
295
+ // Detail per provider
296
+ for (const [name, data] of Object.entries(benchmarkResult.providers)) {
297
+ lines.push(`${colors.bright}${'─'.repeat(50)}${colors.reset}`);
298
+ lines.push(`${colors.bright}📊 ${name}${colors.reset} ${colors.dim}(${data.model})${colors.reset}`);
299
+ lines.push(`${'─'.repeat(50)}`);
300
+
301
+ const categories = {};
302
+ for (const r of data.results) {
303
+ const cat = r.category || 'other';
304
+ if (!categories[cat]) categories[cat] = { count: 0, totalScore: 0, maxScore: 0 };
305
+ categories[cat].count++;
306
+ categories[cat].totalScore += r.score;
307
+ categories[cat].maxScore += 1;
308
+ }
309
+
310
+ for (const [cat, stats] of Object.entries(categories)) {
311
+ const catPct = Math.round((stats.totalScore / stats.maxScore) * 100);
312
+ const bar = this._scoreBar(catPct, 10);
313
+ lines.push(` ${bar} ${colors.dim}${cat}:${colors.reset} ${catPct}% (${stats.count} items)`);
314
+ }
315
+ lines.push('');
316
+
317
+ // Per-item breakdown
318
+ for (const r of data.results) {
319
+ const icon = r.score >= 0.8 ? '✅' : r.score >= 0.5 ? '🟡' : r.score >= 0.2 ? '🟠' : '❌';
320
+ const label = r.type === 'question' ? r.id : r.title;
321
+ lines.push(` ${icon} ${colors.dim}${label}:${colors.reset} ${Math.round(r.score * 100)}% (${(r.elapsed / 1000).toFixed(1)}s)`);
322
+ // Show preview of answer
323
+ const preview = r.answer.replace(/\n/g, ' ').slice(0, 120);
324
+ lines.push(` ${colors.dim}${preview}${r.answer.length > 120 ? '...' : ''}${colors.reset}`);
325
+ }
326
+ lines.push('');
327
+ }
328
+
329
+ lines.push(`${colors.cyan}${'═'.repeat(60)}${colors.reset}\n`);
330
+
331
+ return lines.join('\n');
332
+ }
333
+
334
+ _scoreBar(score, width = 20) {
335
+ const filled = Math.round((score / 100) * width);
336
+ const empty = width - filled;
337
+ const filledChar = '█';
338
+ const emptyChar = '░';
339
+ return colors.green + filledChar.repeat(filled) + colors.dim + emptyChar.repeat(empty) + colors.reset;
340
+ }
341
+
342
+ // ── History ───────────────────────────────────────────────────────────────
343
+
344
+ formatHistorySummary(benchmarkResult) {
345
+ return benchmarkResult.ranking
346
+ .map(r => `[${r.name}] Score: ${r.score}% | Model: ${r.model} | Time: ${(r.elapsed / 1000).toFixed(1)}s`)
347
+ .join('\n');
348
+ }
349
+ }
350
+
351
+ // Export question/task banks for testing
352
+ export { BENCHMARK_QUESTIONS, BENCHMARK_TASKS, scoreAnswer, scoreTask };
@@ -0,0 +1,185 @@
1
+ /**
2
+ * ❄️ MODEL CAPABILITIES ❄️
3
+ * Detect AI model capability tier from model name.
4
+ * Small models need aggressive prompting to compete with large ones.
5
+ */
6
+
7
+ export const MODEL_TIERS = {
8
+ TINY: 'tiny', // <3B params — barely functional for code
9
+ SMALL: 'small', // 3B-15B params — basic code ability
10
+ MEDIUM: 'medium', // 15B-40B params — decent code ability
11
+ LARGE: 'large', // 40B-120B params — strong code ability, could be flagship
12
+ FLAGSHIP: 'flagship', // 120B+ or proprietary frontier models
13
+ };
14
+
15
+ /**
16
+ * Ordered tiers from weakest to strongest (for comparison).
17
+ */
18
+ const TIER_ORDER = [MODEL_TIERS.TINY, MODEL_TIERS.SMALL, MODEL_TIERS.MEDIUM, MODEL_TIERS.LARGE, MODEL_TIERS.FLAGSHIP];
19
+
20
+ /**
21
+ * Classify a model name into a capability tier.
22
+ * @param {string} modelName - e.g. "llama3", "gpt-4", "qwen2.5:7b"
23
+ * @param {string} [provider] - e.g. "ollama", "openai" (optional, helps disambiguate)
24
+ * @returns {string} One of MODEL_TIERS
25
+ */
26
+ export function classifyModelTier(modelName, provider = '') {
27
+ const name = (modelName || '').toLowerCase().trim();
28
+ const prov = (provider || '').toLowerCase().trim();
29
+
30
+ // ===== FLAGSHIP (frontier models) =====
31
+ const flagshipPatterns = [
32
+ /claude-3-5-sonnet/i, /claude-opus/i, /claude-4/i, /claude-sonnet-4/i,
33
+ /gpt-4o/i, /gpt-4-turbo/i, /o1/i, /o3/i,
34
+ /gemini-2\.5-pro/i, /gemini-2\.0-ultra/i,
35
+ /deepseek-v3/i, /deepseek-r1/i,
36
+ /llama-4/i, /llama-3-70b/i, /llama3-70b/i, /llama3\.1-70b/i, /llama3\.2-90b/i, /llama3\.3/i,
37
+ /qwen2\.5-?72b/i, /qwen2\.5-?70b/i, /qwen-?2\.5-?72b/i,
38
+ /mistral-large/i, /mixtral-8x22b/i,
39
+ /command-r-plus/i, /command-a/i,
40
+ /yi-?34b/i,
41
+ /dbrx-instruct/i,
42
+ ];
43
+
44
+ // If using a cloud provider like OpenAI/Anthropic/Groq, their default models are typically large+
45
+ if (prov === 'openai' || prov === 'anthropic' || prov === 'claude') {
46
+ if (name.includes('gpt-3.5') || name.includes('gpt-3')) return MODEL_TIERS.MEDIUM;
47
+ if (name.includes('claude-3-haiku') || name.includes('claude-3-5-haiku')) return MODEL_TIERS.MEDIUM;
48
+ return MODEL_TIERS.LARGE; // Default for OpenAI/Anthropic is >= gpt-4 level
49
+ }
50
+
51
+ if (prov === 'groq') {
52
+ // Groq runs open models, most are large but some are not
53
+ if (/llama.*8b|llama3.*8b|llama3\.2.*3b/i.test(name)) return MODEL_TIERS.SMALL;
54
+ if (/gemma2.*9b/i.test(name)) return MODEL_TIERS.SMALL;
55
+ if (/mixtral-8x7|llama.*70b|llama3.*70b|llama3\.1.*70b|qwen/i.test(name)) return MODEL_TIERS.LARGE;
56
+ return MODEL_TIERS.MEDIUM; // Default for Groq
57
+ }
58
+
59
+ // Check patterns for any provider
60
+ for (const pattern of flagshipPatterns) {
61
+ if (pattern.test(name)) return MODEL_TIERS.FLAGSHIP;
62
+ }
63
+
64
+ // ===== LARGE MODELS =====
65
+ const largePatterns = [
66
+ /claude-sonnet/i, /claude-3/i, /claude-2/i,
67
+ /gpt-4/i, /gpt-4-32k/i,
68
+ /llama-3\.1-?70b/i, /llama-3\.2-?70b/i, /llama3-?70b/i,
69
+ /llama-2-?70b/i,
70
+ /qwen-?2\.5-?32b/i, /qwen-?2-?72b/i,
71
+ /codellama-?70b/i,
72
+ /mixtral/i,
73
+ /deepseek-?v2/i,
74
+ /gemini-1\.5-pro/i, /gemini-2\.0-flash/i,
75
+ /command-r/i,
76
+ /yi-?34b/i,
77
+ /mistral-medium/i,
78
+ ];
79
+
80
+ for (const pattern of largePatterns) {
81
+ if (pattern.test(name)) return MODEL_TIERS.LARGE;
82
+ }
83
+
84
+ // ===== MEDIUM MODELS =====
85
+ const mediumPatterns = [
86
+ /qwen-?2\.5-?14b/i, /qwen-?2\.5-?7b/i, /qwen-?2/i,
87
+ /llama-3-?8b/i, /llama-3\.1-?8b/i, /llama-3\.2-?11b/i,
88
+ /llama-2-?13b/i, /llama-2-?7b/i,
89
+ /deepseek-coder-?6\.7b/i, /deepseek-coder-?33b/i,
90
+ /codellama-?34b/i, /codellama-?13b/i, /codellama-?7b/i,
91
+ /mistral/i, /mistral-7b/i,
92
+ /gemma-2-?9b/i, /gemma-?7b/i,
93
+ /phi-3/i, /phi-3-medium/i,
94
+ /nemotron/i,
95
+ /solar/i,
96
+ /dbrx/i,
97
+ /starcoder2/i,
98
+ /deepseek-llm/i,
99
+ /yi-?6b/i, /yi-?9b/i,
100
+ ];
101
+
102
+ for (const pattern of mediumPatterns) {
103
+ if (pattern.test(name)) return MODEL_TIERS.MEDIUM;
104
+ }
105
+
106
+ // ===== SMALL MODELS =====
107
+ const smallPatterns = [
108
+ /llama-3\.2-?3b/i, /llama-3\.2-?1b/i, /tinyllama/i,
109
+ /qwen-?2\.5-?3b/i, /qwen-?2\.5-?1\.5b/i, /qwen-?2\.5-?0\.5b/i,
110
+ /phi-?3-?mini/i, /phi-?2/i, /phi-?1/i,
111
+ /gemma-?2-?2b/i,
112
+ /stablelm/i,
113
+ /orca/i,
114
+ /falcon/i,
115
+ /red-pajama/i,
116
+ /pythia/i,
117
+ /opt/i,
118
+ /bloom/i,
119
+ /mpnet/i,
120
+ ];
121
+
122
+ for (const pattern of smallPatterns) {
123
+ if (pattern.test(name)) return MODEL_TIERS.SMALL;
124
+ }
125
+
126
+ if (/tiny/i.test(name) || /mini/i.test(name) || /small/i.test(name) || /nano/i.test(name)) {
127
+ return MODEL_TIERS.TINY;
128
+ }
129
+
130
+ // Fallback: if Ollama, likely small
131
+ if (prov === 'ollama' || prov === 'local') return MODEL_TIERS.SMALL;
132
+
133
+ // Default: assume medium
134
+ return MODEL_TIERS.MEDIUM;
135
+ }
136
+
137
+ /**
138
+ * Check if a model tier is considered "small" (needs aggressive prompting).
139
+ */
140
+ export function isSmallModel(tier) {
141
+ return tier === MODEL_TIERS.TINY || tier === MODEL_TIERS.SMALL;
142
+ }
143
+
144
+ /**
145
+ * Get the index of a tier in the order array (0=weakest).
146
+ * @private
147
+ */
148
+ function tierIndex(tier) {
149
+ const idx = TIER_ORDER.indexOf(tier);
150
+ return idx >= 0 ? idx : 2; // Default to medium index
151
+ }
152
+
153
+ /**
154
+ * Compare two tiers. Returns negative if a < b, positive if a > b, 0 if equal.
155
+ * @private
156
+ */
157
+ function compareTiers(a, b) {
158
+ return tierIndex(a) - tierIndex(b);
159
+ }
160
+
161
+ /**
162
+ * Get recommended reasoning level bump for small models.
163
+ * Small models need more aggressive reasoning prompting to compensate.
164
+ */
165
+ export function getReasoningBump(tier) {
166
+ switch (tier) {
167
+ case MODEL_TIERS.TINY: return 2; // bump 2 levels
168
+ case MODEL_TIERS.SMALL: return 1; // bump 1 level
169
+ default: return 0;
170
+ }
171
+ }
172
+
173
+ /**
174
+ * Build a short string describing model capability for system prompt injection.
175
+ */
176
+ export function getModelCapabilityLabel(tier) {
177
+ switch (tier) {
178
+ case MODEL_TIERS.TINY: return 'tiny local model — needs maximum guidance';
179
+ case MODEL_TIERS.SMALL: return 'small local model — needs extra structure';
180
+ case MODEL_TIERS.MEDIUM: return 'medium-capability model';
181
+ case MODEL_TIERS.LARGE: return 'high-capability model';
182
+ case MODEL_TIERS.FLAGSHIP: return 'frontier model — full capability expected';
183
+ default: return '';
184
+ }
185
+ }