winter-super-cli 2026.5.28 → 2026.5.30
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/winter.js +2 -1
- package/package.json +1 -1
- package/src/ai/benchmark.js +352 -0
- package/src/ai/prompts/system-prompt.js +70 -81
- package/src/ai/providers.js +12 -9
- package/src/ai/reasoning.js +5 -81
- package/src/cli/commands.js +62 -0
- package/src/cli/context-loader.js +64 -1
- package/src/cli/conversation-format.js +90 -12
- package/src/cli/prompt-builder.js +43 -17
- package/src/cli/repl-commands.js +14 -3
- package/src/cli/repl.js +333 -214
- package/src/context/router.js +26 -22
- package/src/tools/executor.js +78 -9
package/bin/winter.js
CHANGED
|
@@ -17,7 +17,7 @@ const pkg = JSON.parse(readFileSync(new URL('../package.json', import.meta.url),
|
|
|
17
17
|
const version = pkg.version;
|
|
18
18
|
|
|
19
19
|
const COMMANDS = new Set([
|
|
20
|
-
'chat', 'call', 'session', 'skill', 'plugin', 'design', 'config', 'init',
|
|
20
|
+
'chat', 'call', 'benchmark', 'session', 'skill', 'plugin', 'design', 'config', 'init',
|
|
21
21
|
'help', 'project', 'code', 'review', 'mcp', 'permissions',
|
|
22
22
|
'provider', 'providers', 'model', 'models',
|
|
23
23
|
]);
|
|
@@ -53,6 +53,7 @@ Usage:
|
|
|
53
53
|
Commands:
|
|
54
54
|
|
|
55
55
|
winter call <prompt> Call all configured providers
|
|
56
|
+
winter benchmark [providers] Benchmark model intelligence
|
|
56
57
|
winter session <action> Session management
|
|
57
58
|
winter skill <action> Skill management
|
|
58
59
|
winter plugin <action> Plugin management
|
package/package.json
CHANGED
|
@@ -0,0 +1,352 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Benchmark Engine — Đo độ thông minh của models trong Winter CLI
|
|
3
|
+
*
|
|
4
|
+
* Cố định câu hỏi test (logic, coding, math, reasoning, language)
|
|
5
|
+
* + Coding task thật → chạy qua providers → chấm điểm → so sánh
|
|
6
|
+
*/
|
|
7
|
+
|
|
8
|
+
import { colors } from '../cli/snowflake-logo.js';
|
|
9
|
+
|
|
10
|
+
// ── Question Bank ────────────────────────────────────────────────────────────
|
|
11
|
+
|
|
12
|
+
const BENCHMARK_QUESTIONS = [
|
|
13
|
+
{
|
|
14
|
+
id: 'q01',
|
|
15
|
+
category: 'logic',
|
|
16
|
+
question: `If all cats are mammals and some mammals are dogs, are all cats dogs? Explain your reasoning step by step.`,
|
|
17
|
+
keywords: ['not', 'no', 'incorrect', 'cannot conclude', 'not necessarily', 'invalid'],
|
|
18
|
+
weight: 1,
|
|
19
|
+
},
|
|
20
|
+
{
|
|
21
|
+
id: 'q02',
|
|
22
|
+
category: 'coding',
|
|
23
|
+
question: `Write a JavaScript function called isPalindrome that checks if a string is a palindrome (reads the same forwards and backwards). Include example usage.`,
|
|
24
|
+
keywords: ['function', 'palindrome', 'reverse', 'split', 'return'],
|
|
25
|
+
weight: 1.5,
|
|
26
|
+
},
|
|
27
|
+
{
|
|
28
|
+
id: 'q03',
|
|
29
|
+
category: 'math',
|
|
30
|
+
question: `What is 15% of 200? Show your calculation.`,
|
|
31
|
+
keywords: ['30', '15', '200', '0.15'],
|
|
32
|
+
weight: 0.5,
|
|
33
|
+
},
|
|
34
|
+
{
|
|
35
|
+
id: 'q04',
|
|
36
|
+
category: 'reasoning',
|
|
37
|
+
question: `A bat and a ball cost $1.10 in total. The bat costs $1.00 more than the ball. How much does the ball cost? Think carefully.`,
|
|
38
|
+
keywords: ['0.05', '5 cents', '5 cent', '0.05$', '5¢', 'five cents'],
|
|
39
|
+
weight: 1.5,
|
|
40
|
+
},
|
|
41
|
+
{
|
|
42
|
+
id: 'q05',
|
|
43
|
+
category: 'coding',
|
|
44
|
+
question: `What's wrong with this code and how would you fix it?\n\nfunction add(a, b) {\n return a + b;\n}\nconsole.log(add(5, '3'));`,
|
|
45
|
+
keywords: ['string', 'type', 'concatenation', 'number', 'parse', 'typeof', 'coercion'],
|
|
46
|
+
weight: 1,
|
|
47
|
+
},
|
|
48
|
+
{
|
|
49
|
+
id: 'q06',
|
|
50
|
+
category: 'language',
|
|
51
|
+
question: `Translate this sentence to Vietnamese: "Good morning, how are you today?"`,
|
|
52
|
+
keywords: ['chào', 'sáng', 'khỏe', 'hôm nay', 'bạn'],
|
|
53
|
+
weight: 0.5,
|
|
54
|
+
},
|
|
55
|
+
{
|
|
56
|
+
id: 'q07',
|
|
57
|
+
category: 'logic',
|
|
58
|
+
question: `You have a 3-gallon jug and a 5-gallon jug. How can you measure exactly 4 gallons of water? Explain step by step.`,
|
|
59
|
+
keywords: ['fill', 'pour', '3', '5', '4', 'empty'],
|
|
60
|
+
weight: 1.5,
|
|
61
|
+
},
|
|
62
|
+
{
|
|
63
|
+
id: 'q08',
|
|
64
|
+
category: 'coding',
|
|
65
|
+
question: `Write a recursive function to calculate the nth Fibonacci number. Explain how memoization can optimize it.`,
|
|
66
|
+
keywords: ['function', 'fibonacci', 'recursive', 'memoization', 'cache'],
|
|
67
|
+
weight: 1.5,
|
|
68
|
+
},
|
|
69
|
+
];
|
|
70
|
+
|
|
71
|
+
const BENCHMARK_TASKS = [
|
|
72
|
+
{
|
|
73
|
+
id: 't01',
|
|
74
|
+
category: 'coding-task',
|
|
75
|
+
title: 'API Fetch with Error Handling',
|
|
76
|
+
description: 'Write a JavaScript function that fetches JSON data from a URL, handles network errors, HTTP errors, and invalid JSON responses gracefully.',
|
|
77
|
+
evaluationCriteria: ['error handling', 'try/catch', 'async/await', 'fetch', 'response.ok'],
|
|
78
|
+
weight: 2,
|
|
79
|
+
},
|
|
80
|
+
{
|
|
81
|
+
id: 't02',
|
|
82
|
+
category: 'coding-task',
|
|
83
|
+
title: 'Event Emitter Class',
|
|
84
|
+
description: 'Create a simple EventEmitter class in JavaScript with on(), off(), and emit() methods. It should support multiple listeners for the same event and removing listeners.',
|
|
85
|
+
evaluationCriteria: ['class', 'on', 'off', 'emit', 'listeners', 'events'],
|
|
86
|
+
weight: 2,
|
|
87
|
+
},
|
|
88
|
+
{
|
|
89
|
+
id: 't03',
|
|
90
|
+
category: 'coding-task',
|
|
91
|
+
title: 'Fix This Bug',
|
|
92
|
+
description: `What's wrong with this code? Identify ALL bugs and provide a fixed version:\n\nconst users = [\n { name: 'Alice', age: 30 },\n { name: 'Bob', age: 25 },\n { name: 'Charlie', age: 35 },\n];\n\nconst adultUsers = users.filter(u => u.age >= 18);\nadultUsers.forEach(u => {\n console.log(u.Name);\n});\n\nadultUsers.sort((a, b) => a.age - b.age);\nconst totalAge = adultUsers.reduce((acc, u) => acc + u.age);\nconsole.log('Average age:', totalAge / adultUsers.length);`,
|
|
93
|
+
evaluationCriteria: ['Name', 'name', 'undefined', 'reduce', 'initial', 'initialize', 'capital N'],
|
|
94
|
+
weight: 2.5,
|
|
95
|
+
},
|
|
96
|
+
];
|
|
97
|
+
|
|
98
|
+
// ── Scoring ──────────────────────────────────────────────────────────────────
|
|
99
|
+
|
|
100
|
+
function scoreAnswer(question, answer) {
|
|
101
|
+
if (!answer || typeof answer !== 'string') return 0;
|
|
102
|
+
|
|
103
|
+
const lower = answer.toLowerCase();
|
|
104
|
+
let matches = 0;
|
|
105
|
+
|
|
106
|
+
for (const kw of question.keywords) {
|
|
107
|
+
if (lower.includes(kw.toLowerCase())) {
|
|
108
|
+
matches++;
|
|
109
|
+
}
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
const ratio = question.keywords.length > 0 ? matches / question.keywords.length : 0;
|
|
113
|
+
|
|
114
|
+
// Bonus: longer, well-structured answers tend to be better
|
|
115
|
+
const words = answer.split(/\s+/).length;
|
|
116
|
+
const lengthBonus = words > 50 ? 0.1 : words > 20 ? 0.05 : 0;
|
|
117
|
+
|
|
118
|
+
return Math.min(1, ratio + lengthBonus);
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
function scoreTask(task, answer) {
|
|
122
|
+
if (!answer || typeof answer !== 'string') return 0;
|
|
123
|
+
|
|
124
|
+
const lower = answer.toLowerCase();
|
|
125
|
+
let matches = 0;
|
|
126
|
+
|
|
127
|
+
for (const criterion of task.evaluationCriteria) {
|
|
128
|
+
if (lower.includes(criterion.toLowerCase())) {
|
|
129
|
+
matches++;
|
|
130
|
+
}
|
|
131
|
+
}
|
|
132
|
+
|
|
133
|
+
const ratio = task.evaluationCriteria.length > 0 ? matches / task.evaluationCriteria.length : 0;
|
|
134
|
+
const words = answer.split(/\s+/).length;
|
|
135
|
+
const lengthBonus = words > 100 ? 0.1 : words > 50 ? 0.05 : 0;
|
|
136
|
+
|
|
137
|
+
return Math.min(1, ratio + lengthBonus);
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
// ── Benchmark Runner ─────────────────────────────────────────────────────────
|
|
141
|
+
|
|
142
|
+
export class BenchmarkRunner {
|
|
143
|
+
constructor(aiManager) {
|
|
144
|
+
this.ai = aiManager;
|
|
145
|
+
}
|
|
146
|
+
|
|
147
|
+
/**
|
|
148
|
+
* Run all benchmark questions across specified providers.
|
|
149
|
+
* @param {string[]} providerNames - List of provider names (e.g., ['claude', 'openai', 'ollama'])
|
|
150
|
+
* @param {object} options
|
|
151
|
+
* @param {boolean} options.tasks - Whether to include coding tasks (default: true)
|
|
152
|
+
* @param {boolean} options.questions - Whether to include fixed questions (default: true)
|
|
153
|
+
*/
|
|
154
|
+
async run(providerNames, options = {}) {
|
|
155
|
+
const { questions = true, tasks = true } = options;
|
|
156
|
+
|
|
157
|
+
await this.ai.init();
|
|
158
|
+
|
|
159
|
+
// Filter to only ready providers
|
|
160
|
+
const providers = providerNames
|
|
161
|
+
.map(name => ({ name, provider: this.ai.providers[name] }))
|
|
162
|
+
.filter(({ provider }) => provider && provider.ready);
|
|
163
|
+
|
|
164
|
+
if (providers.length === 0) {
|
|
165
|
+
return { error: 'No ready providers found. Configure providers in winter.json first.' };
|
|
166
|
+
}
|
|
167
|
+
|
|
168
|
+
const results = {};
|
|
169
|
+
const startTime = Date.now();
|
|
170
|
+
|
|
171
|
+
for (const { name, provider } of providers) {
|
|
172
|
+
console.log(`${colors.dim}Benchmarking ${colors.bright}${name}${colors.reset}${colors.dim}...${colors.reset}`);
|
|
173
|
+
|
|
174
|
+
const providerResults = [];
|
|
175
|
+
let totalScore = 0;
|
|
176
|
+
let maxScore = 0;
|
|
177
|
+
|
|
178
|
+
// Fixed questions
|
|
179
|
+
if (questions) {
|
|
180
|
+
for (const q of BENCHMARK_QUESTIONS) {
|
|
181
|
+
const qStart = Date.now();
|
|
182
|
+
const answer = await this.askProvider(provider, q.question);
|
|
183
|
+
const elapsed = Date.now() - qStart;
|
|
184
|
+
const score = scoreAnswer(q, answer);
|
|
185
|
+
|
|
186
|
+
providerResults.push({
|
|
187
|
+
type: 'question',
|
|
188
|
+
id: q.id,
|
|
189
|
+
category: q.category,
|
|
190
|
+
question: q.question,
|
|
191
|
+
answer: answer.slice(0, 500), // truncate for display
|
|
192
|
+
score,
|
|
193
|
+
weightedScore: score * q.weight,
|
|
194
|
+
maxWeightedScore: q.weight,
|
|
195
|
+
elapsed,
|
|
196
|
+
});
|
|
197
|
+
|
|
198
|
+
totalScore += score * q.weight;
|
|
199
|
+
maxScore += q.weight;
|
|
200
|
+
}
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
// Coding tasks
|
|
204
|
+
if (tasks) {
|
|
205
|
+
for (const t of BENCHMARK_TASKS) {
|
|
206
|
+
const tStart = Date.now();
|
|
207
|
+
const answer = await this.askProvider(provider, t.description);
|
|
208
|
+
const elapsed = Date.now() - tStart;
|
|
209
|
+
const score = scoreTask(t, answer);
|
|
210
|
+
|
|
211
|
+
providerResults.push({
|
|
212
|
+
type: 'task',
|
|
213
|
+
id: t.id,
|
|
214
|
+
category: t.category,
|
|
215
|
+
title: t.title,
|
|
216
|
+
question: t.description,
|
|
217
|
+
answer: answer.slice(0, 500),
|
|
218
|
+
score,
|
|
219
|
+
weightedScore: score * t.weight,
|
|
220
|
+
maxWeightedScore: t.weight,
|
|
221
|
+
elapsed,
|
|
222
|
+
});
|
|
223
|
+
|
|
224
|
+
totalScore += score * t.weight;
|
|
225
|
+
maxScore += t.weight;
|
|
226
|
+
}
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
const overall = maxScore > 0 ? Math.round((totalScore / maxScore) * 100) : 0;
|
|
230
|
+
|
|
231
|
+
results[name] = {
|
|
232
|
+
provider: name,
|
|
233
|
+
model: provider.model,
|
|
234
|
+
results: providerResults,
|
|
235
|
+
totalScore,
|
|
236
|
+
maxScore,
|
|
237
|
+
overall,
|
|
238
|
+
elapsed: Date.now() - startTime,
|
|
239
|
+
};
|
|
240
|
+
}
|
|
241
|
+
|
|
242
|
+
return {
|
|
243
|
+
timestamp: new Date().toISOString(),
|
|
244
|
+
totalElapsed: Date.now() - startTime,
|
|
245
|
+
providers: results,
|
|
246
|
+
// Sort providers by overall score descending
|
|
247
|
+
ranking: Object.values(results)
|
|
248
|
+
.sort((a, b) => b.overall - a.overall)
|
|
249
|
+
.map(r => ({ name: r.provider, model: r.model, score: r.overall, elapsed: r.elapsed })),
|
|
250
|
+
};
|
|
251
|
+
}
|
|
252
|
+
|
|
253
|
+
async askProvider(provider, prompt) {
|
|
254
|
+
try {
|
|
255
|
+
const messages = [
|
|
256
|
+
{ role: 'system', content: 'You are a helpful AI assistant. Answer concisely and accurately.' },
|
|
257
|
+
{ role: 'user', content: prompt },
|
|
258
|
+
];
|
|
259
|
+
const data = await this.ai.sendRequestToProvider(provider, messages, {
|
|
260
|
+
enableTools: false,
|
|
261
|
+
model: provider.model,
|
|
262
|
+
});
|
|
263
|
+
return data.choices?.[0]?.message?.content || '';
|
|
264
|
+
} catch (err) {
|
|
265
|
+
return `[ERROR: ${err.message}]`;
|
|
266
|
+
}
|
|
267
|
+
}
|
|
268
|
+
|
|
269
|
+
// ── Format Results ────────────────────────────────────────────────────────
|
|
270
|
+
|
|
271
|
+
formatResults(benchmarkResult) {
|
|
272
|
+
if (benchmarkResult.error) {
|
|
273
|
+
return `\n${colors.red}${benchmarkResult.error}${colors.reset}\n`;
|
|
274
|
+
}
|
|
275
|
+
|
|
276
|
+
const lines = [];
|
|
277
|
+
lines.push(`\n${colors.cyan}${'═'.repeat(60)}${colors.reset}`);
|
|
278
|
+
lines.push(`${colors.bright}${colors.cyan} 🧠 WINTER MODEL BENCHMARK${colors.reset}`);
|
|
279
|
+
lines.push(`${colors.cyan}${'═'.repeat(60)}${colors.reset}`);
|
|
280
|
+
lines.push(` ${colors.dim}${benchmarkResult.timestamp}${colors.reset}`);
|
|
281
|
+
lines.push(` ${colors.dim}Total time: ${(benchmarkResult.totalElapsed / 1000).toFixed(1)}s${colors.reset}`);
|
|
282
|
+
lines.push('');
|
|
283
|
+
|
|
284
|
+
// Ranking
|
|
285
|
+
lines.push(`${colors.bright}🏆 RANKING${colors.reset}`);
|
|
286
|
+
lines.push(`${'─'.repeat(40)}`);
|
|
287
|
+
benchmarkResult.ranking.forEach((r, i) => {
|
|
288
|
+
const medal = i === 0 ? '🥇' : i === 1 ? '🥈' : i === 2 ? '🥉' : ` ${i + 1}.`;
|
|
289
|
+
const bar = this._scoreBar(r.score, 20);
|
|
290
|
+
lines.push(` ${medal} ${colors.bright}${r.name}${colors.reset} ${bar} ${r.score}%`);
|
|
291
|
+
lines.push(` ${colors.dim}Model: ${r.model} | Time: ${(r.elapsed / 1000).toFixed(1)}s${colors.reset}`);
|
|
292
|
+
});
|
|
293
|
+
lines.push('');
|
|
294
|
+
|
|
295
|
+
// Detail per provider
|
|
296
|
+
for (const [name, data] of Object.entries(benchmarkResult.providers)) {
|
|
297
|
+
lines.push(`${colors.bright}${'─'.repeat(50)}${colors.reset}`);
|
|
298
|
+
lines.push(`${colors.bright}📊 ${name}${colors.reset} ${colors.dim}(${data.model})${colors.reset}`);
|
|
299
|
+
lines.push(`${'─'.repeat(50)}`);
|
|
300
|
+
|
|
301
|
+
const categories = {};
|
|
302
|
+
for (const r of data.results) {
|
|
303
|
+
const cat = r.category || 'other';
|
|
304
|
+
if (!categories[cat]) categories[cat] = { count: 0, totalScore: 0, maxScore: 0 };
|
|
305
|
+
categories[cat].count++;
|
|
306
|
+
categories[cat].totalScore += r.score;
|
|
307
|
+
categories[cat].maxScore += 1;
|
|
308
|
+
}
|
|
309
|
+
|
|
310
|
+
for (const [cat, stats] of Object.entries(categories)) {
|
|
311
|
+
const catPct = Math.round((stats.totalScore / stats.maxScore) * 100);
|
|
312
|
+
const bar = this._scoreBar(catPct, 10);
|
|
313
|
+
lines.push(` ${bar} ${colors.dim}${cat}:${colors.reset} ${catPct}% (${stats.count} items)`);
|
|
314
|
+
}
|
|
315
|
+
lines.push('');
|
|
316
|
+
|
|
317
|
+
// Per-item breakdown
|
|
318
|
+
for (const r of data.results) {
|
|
319
|
+
const icon = r.score >= 0.8 ? '✅' : r.score >= 0.5 ? '🟡' : r.score >= 0.2 ? '🟠' : '❌';
|
|
320
|
+
const label = r.type === 'question' ? r.id : r.title;
|
|
321
|
+
lines.push(` ${icon} ${colors.dim}${label}:${colors.reset} ${Math.round(r.score * 100)}% (${(r.elapsed / 1000).toFixed(1)}s)`);
|
|
322
|
+
// Show preview of answer
|
|
323
|
+
const preview = r.answer.replace(/\n/g, ' ').slice(0, 120);
|
|
324
|
+
lines.push(` ${colors.dim}${preview}${r.answer.length > 120 ? '...' : ''}${colors.reset}`);
|
|
325
|
+
}
|
|
326
|
+
lines.push('');
|
|
327
|
+
}
|
|
328
|
+
|
|
329
|
+
lines.push(`${colors.cyan}${'═'.repeat(60)}${colors.reset}\n`);
|
|
330
|
+
|
|
331
|
+
return lines.join('\n');
|
|
332
|
+
}
|
|
333
|
+
|
|
334
|
+
_scoreBar(score, width = 20) {
|
|
335
|
+
const filled = Math.round((score / 100) * width);
|
|
336
|
+
const empty = width - filled;
|
|
337
|
+
const filledChar = '█';
|
|
338
|
+
const emptyChar = '░';
|
|
339
|
+
return colors.green + filledChar.repeat(filled) + colors.dim + emptyChar.repeat(empty) + colors.reset;
|
|
340
|
+
}
|
|
341
|
+
|
|
342
|
+
// ── History ───────────────────────────────────────────────────────────────
|
|
343
|
+
|
|
344
|
+
formatHistorySummary(benchmarkResult) {
|
|
345
|
+
return benchmarkResult.ranking
|
|
346
|
+
.map(r => `[${r.name}] Score: ${r.score}% | Model: ${r.model} | Time: ${(r.elapsed / 1000).toFixed(1)}s`)
|
|
347
|
+
.join('\n');
|
|
348
|
+
}
|
|
349
|
+
}
|
|
350
|
+
|
|
351
|
+
// Export question/task banks for testing
|
|
352
|
+
export { BENCHMARK_QUESTIONS, BENCHMARK_TASKS, scoreAnswer, scoreTask };
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* Dynamic System Prompt Builder
|
|
3
3
|
* Builds context-aware system prompts based on task, role, and session state.
|
|
4
|
-
* Small models get
|
|
4
|
+
* Small models get compact structural guidance so the task stays in focus.
|
|
5
5
|
*/
|
|
6
6
|
|
|
7
7
|
import { isSmallModel, getModelCapabilityLabel } from '../model-capabilities.js';
|
|
@@ -40,98 +40,93 @@ function buildEnvironmentSummary() {
|
|
|
40
40
|
].join('\n');
|
|
41
41
|
}
|
|
42
42
|
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
*/
|
|
47
|
-
function buildSmallModelSystemPrompt({
|
|
48
|
-
role = 'coding',
|
|
49
|
-
context,
|
|
50
|
-
tools = [],
|
|
51
|
-
session,
|
|
52
|
-
environment,
|
|
53
|
-
design,
|
|
54
|
-
resourceContext,
|
|
55
|
-
modelTier,
|
|
56
|
-
} = {}) {
|
|
57
|
-
const parts = [
|
|
58
|
-
`You are Winter, an expert AI coding assistant. You are running on a ${getModelCapabilityLabel(modelTier)}.`,
|
|
59
|
-
'',
|
|
60
|
-
'## CRITICAL: YOU MUST THINK STEP BY STEP',
|
|
61
|
-
'',
|
|
62
|
-
'Because you are a smaller model, you MUST use structured thinking to produce quality results.',
|
|
63
|
-
'Before any response, use <thinking> tags to reason through the problem.',
|
|
64
|
-
'',
|
|
65
|
-
'Your thinking must cover:',
|
|
66
|
-
'1. What does the user want? (restate briefly)',
|
|
67
|
-
'2. What files/tools do I need to use?',
|
|
68
|
-
'3. What is the best approach?',
|
|
69
|
-
'4. What could go wrong? Edge cases?',
|
|
70
|
-
'5. Is my solution complete and correct?',
|
|
71
|
-
'',
|
|
72
|
-
'After thinking, THEN act. Never skip the thinking step.',
|
|
73
|
-
'',
|
|
74
|
-
'## Core Principles',
|
|
75
|
-
...BASE_PRINCIPLES.map((p, i) => `${i + 1}. ${p}`),
|
|
76
|
-
'',
|
|
77
|
-
'## Runtime Environment',
|
|
78
|
-
environment || buildEnvironmentSummary(),
|
|
79
|
-
'',
|
|
80
|
-
];
|
|
43
|
+
function formatToolList(tools = []) {
|
|
44
|
+
return tools.length > 0 ? tools.slice(0, 10).join(', ') : '';
|
|
45
|
+
}
|
|
81
46
|
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
}
|
|
47
|
+
function appendSharedContext(parts, { environment, session, design, resourceContext, context, includeResources = false } = {}) {
|
|
48
|
+
parts.push('## Runtime Environment', environment || buildEnvironmentSummary(), '');
|
|
85
49
|
|
|
86
50
|
if (session?.memory?.length) {
|
|
87
51
|
parts.push('## Session Memory');
|
|
88
|
-
session.memory.forEach(m => parts.push(
|
|
52
|
+
session.memory.slice(-5).forEach(m => parts.push('- ' + String(m).slice(0, 100)));
|
|
89
53
|
parts.push('');
|
|
90
54
|
}
|
|
91
55
|
|
|
92
56
|
if (session?.plans?.length) {
|
|
93
57
|
parts.push('## Active Plans');
|
|
94
|
-
session.plans.forEach(p => parts.push(
|
|
58
|
+
session.plans.slice(-3).forEach(p => parts.push('- ' + (p.title || String(p).slice(0, 80))));
|
|
95
59
|
parts.push('');
|
|
96
60
|
}
|
|
97
61
|
|
|
98
62
|
if (design) {
|
|
99
|
-
parts.push('## Design
|
|
63
|
+
parts.push('## Design Context');
|
|
100
64
|
if (design.brand) {
|
|
101
|
-
parts.push(
|
|
102
|
-
parts.push('');
|
|
103
|
-
const lines = design.content.split('\n').filter(Boolean);
|
|
104
|
-
const preview = lines.slice(0, 40).join('\n');
|
|
105
|
-
parts.push(preview);
|
|
106
|
-
if (lines.length > 40) parts.push('... (design file truncated)');
|
|
65
|
+
parts.push('Brand: ' + design.brand);
|
|
66
|
+
parts.push(design.content.split('\n').filter(Boolean).slice(0, 18).join('\n'));
|
|
107
67
|
} else if (design.type === 'design_hint') {
|
|
108
|
-
parts.push('
|
|
109
|
-
parts.push(`Available: ${design.brands.join(', ')}`);
|
|
68
|
+
parts.push('Available design systems: ' + design.brands.slice(0, 5).join(', '));
|
|
110
69
|
}
|
|
111
70
|
parts.push('');
|
|
112
71
|
}
|
|
113
72
|
|
|
114
|
-
if (resourceContext) {
|
|
115
|
-
parts.push(resourceContext);
|
|
73
|
+
if (includeResources && resourceContext) {
|
|
74
|
+
parts.push(resourceContext.trim().slice(0, 1200), '');
|
|
116
75
|
}
|
|
117
76
|
|
|
118
|
-
|
|
119
|
-
'
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
77
|
+
if (context && typeof context === 'object') {
|
|
78
|
+
parts.push('Task: ' + (context.category || 'coding') + ' / ' + (context.type || 'simple'), '');
|
|
79
|
+
}
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
function buildCompactSmallModelPrompt(options = {}) {
|
|
83
|
+
const { tools = [], modelTier } = options;
|
|
84
|
+
const parts = [
|
|
85
|
+
'You are Winter, an AI coding assistant running on a ' + getModelCapabilityLabel(modelTier) + '.',
|
|
125
86
|
'',
|
|
126
|
-
'##
|
|
127
|
-
'
|
|
128
|
-
'
|
|
129
|
-
'
|
|
130
|
-
'
|
|
87
|
+
'## Operating Rules',
|
|
88
|
+
'1. Understand the user request first. If project state matters, inspect files before answering.',
|
|
89
|
+
'2. Keep context tight. Use only relevant tools and avoid long explanations.',
|
|
90
|
+
'3. For coding: Read/Grep/Glob -> Edit/Write -> Bash/test. Do not guess file paths.',
|
|
91
|
+
'4. Final answer in Vietnamese. Mention changed files and verification only.',
|
|
92
|
+
'',
|
|
93
|
+
];
|
|
94
|
+
|
|
95
|
+
const toolList = formatToolList(tools);
|
|
96
|
+
if (toolList) parts.push('## Tools', toolList, '');
|
|
97
|
+
appendSharedContext(parts, { ...options, includeResources: false });
|
|
98
|
+
|
|
99
|
+
parts.push(
|
|
100
|
+
'## Response Shape',
|
|
101
|
+
'- If action is needed, use tools instead of describing the action.',
|
|
102
|
+
'- Keep final output short and concrete.',
|
|
131
103
|
);
|
|
132
104
|
|
|
133
|
-
return parts.join('\n');
|
|
105
|
+
return parts.filter(Boolean).join('\n');
|
|
134
106
|
}
|
|
107
|
+
|
|
108
|
+
function buildStandardSystemPrompt(options = {}) {
|
|
109
|
+
const { role = 'coding', tools = [], resourceContext } = options;
|
|
110
|
+
const parts = [
|
|
111
|
+
'You are Winter, an expert AI coding assistant.',
|
|
112
|
+
'',
|
|
113
|
+
'## Core Principles',
|
|
114
|
+
...BASE_PRINCIPLES.map((p, i) => (i + 1) + '. ' + p),
|
|
115
|
+
'',
|
|
116
|
+
'## Tool Usage',
|
|
117
|
+
'Use tools when they materially improve correctness. Inspect before editing. Verify after changes.',
|
|
118
|
+
'Never invent file paths, APIs, command output, or test results.',
|
|
119
|
+
'',
|
|
120
|
+
];
|
|
121
|
+
|
|
122
|
+
const toolList = formatToolList(tools);
|
|
123
|
+
if (toolList) parts.push('## Tools', toolList, '');
|
|
124
|
+
appendSharedContext(parts, { ...options, includeResources: Boolean(resourceContext) && (role === 'design' || role === 'ui') });
|
|
125
|
+
|
|
126
|
+
parts.push('Always respond in Vietnamese.');
|
|
127
|
+
return parts.filter(Boolean).join('\n');
|
|
128
|
+
}
|
|
129
|
+
|
|
135
130
|
export function buildSystemPrompt({
|
|
136
131
|
role = 'coding',
|
|
137
132
|
context,
|
|
@@ -142,17 +137,10 @@ export function buildSystemPrompt({
|
|
|
142
137
|
resourceContext,
|
|
143
138
|
modelTier,
|
|
144
139
|
} = {}) {
|
|
145
|
-
|
|
146
|
-
return
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
tools,
|
|
150
|
-
session,
|
|
151
|
-
environment,
|
|
152
|
-
design,
|
|
153
|
-
resourceContext,
|
|
154
|
-
modelTier,
|
|
155
|
-
});
|
|
140
|
+
const options = { role, context, tools, session, environment, design, resourceContext, modelTier };
|
|
141
|
+
return isSmallModel(modelTier)
|
|
142
|
+
? buildCompactSmallModelPrompt(options)
|
|
143
|
+
: buildStandardSystemPrompt(options);
|
|
156
144
|
}
|
|
157
145
|
|
|
158
146
|
export function buildFastSystemPrompt({
|
|
@@ -164,7 +152,7 @@ export function buildFastSystemPrompt({
|
|
|
164
152
|
return [
|
|
165
153
|
'Winter (fast mode - small model). Be concise. Use tools when needed.',
|
|
166
154
|
tools.length > 0 ? `Tools: ${tools.join(', ')}` : '',
|
|
167
|
-
'
|
|
155
|
+
'Use a brief private plan, then answer in 1 sentence.',
|
|
168
156
|
].filter(Boolean).join('\n');
|
|
169
157
|
}
|
|
170
158
|
|
|
@@ -182,11 +170,12 @@ export function buildAgentSystemPrompt(role, { tools = [], modelTier } = {}) {
|
|
|
182
170
|
debug: 'You are a debug specialist. Use systematic elimination to find root causes.',
|
|
183
171
|
research: 'You search codebases and documentation to answer questions comprehensively.',
|
|
184
172
|
browser: 'You interact with web pages via browser automation. Report findings clearly.',
|
|
173
|
+
coding: 'You solve coding tasks directly. Inspect files, edit surgically, and verify.',
|
|
185
174
|
};
|
|
186
175
|
|
|
187
176
|
const base = roleConfigs[role] || roleConfigs.coding;
|
|
188
177
|
const smallNote = modelTier && isSmallModel(modelTier)
|
|
189
|
-
? '\n\nYou are running on a small model.
|
|
178
|
+
? '\n\nYou are running on a small model. Keep context tight, use tools early, and keep final output short.'
|
|
190
179
|
: '';
|
|
191
180
|
|
|
192
181
|
return [
|
package/src/ai/providers.js
CHANGED
|
@@ -103,16 +103,17 @@ export class AIProviderManager {
|
|
|
103
103
|
if (available) this.activeProvider = available;
|
|
104
104
|
}
|
|
105
105
|
|
|
106
|
-
|
|
107
|
-
const providerConfig = this.providers[this.activeProvider] || {};
|
|
108
|
-
this._modelTier = classifyModelTier(providerConfig.model, this.activeProvider);
|
|
109
|
-
|
|
110
|
-
// Eager-load local resources (design systems, agent instructions) for contextual injection
|
|
111
|
-
this._loadResourceContext(); // fire-and-forget
|
|
106
|
+
this.updateActiveModelTier();
|
|
112
107
|
|
|
113
108
|
this.initialized = true;
|
|
114
109
|
}
|
|
115
110
|
|
|
111
|
+
updateActiveModelTier() {
|
|
112
|
+
const providerConfig = this.providers[this.activeProvider] || {};
|
|
113
|
+
this._modelTier = classifyModelTier(providerConfig.model, this.activeProvider);
|
|
114
|
+
return this._modelTier;
|
|
115
|
+
}
|
|
116
|
+
|
|
116
117
|
async loadAuthToken() {
|
|
117
118
|
// 1) Honor explicit environment variables (highest priority)
|
|
118
119
|
const envToken = process.env.CLAUDE_AUTH_TOKEN || process.env.ANTHROPIC_API_KEY || process.env.CLAUDE_TOKEN || null;
|
|
@@ -172,6 +173,7 @@ export class AIProviderManager {
|
|
|
172
173
|
const providerName = this.normalizeProviderName(name);
|
|
173
174
|
if (this.providers[providerName]) {
|
|
174
175
|
this.activeProvider = providerName;
|
|
176
|
+
this.updateActiveModelTier();
|
|
175
177
|
return true;
|
|
176
178
|
}
|
|
177
179
|
return false;
|
|
@@ -594,7 +596,9 @@ export class AIProviderManager {
|
|
|
594
596
|
|
|
595
597
|
getSystemPrompt(options = {}) {
|
|
596
598
|
const taskInfo = options.task ? classifyTask(options.task) : null;
|
|
597
|
-
const tools =
|
|
599
|
+
const tools = Array.isArray(this.tools)
|
|
600
|
+
? this.tools.map(tool => tool?.function?.name || tool?.name).filter(Boolean)
|
|
601
|
+
: [];
|
|
598
602
|
const sessionInfo = {
|
|
599
603
|
memory: options.memory || [],
|
|
600
604
|
plans: options.plans || [],
|
|
@@ -633,8 +637,7 @@ export class AIProviderManager {
|
|
|
633
637
|
? '\n\n' + SuccessCriteria.fromRequest(options.task).buildPrompt()
|
|
634
638
|
: '';
|
|
635
639
|
|
|
636
|
-
|
|
637
|
-
const resourceContext = this._cachedResourceContext || '';
|
|
640
|
+
const resourceContext = options.includeResources ? (this._cachedResourceContext || '') : '';
|
|
638
641
|
|
|
639
642
|
// Auto-detect relevant design guide for UI/design tasks
|
|
640
643
|
let designGuide = null;
|