winter-super-cli 2026.6.5 → 2026.6.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/bin/winter.js CHANGED
@@ -23,6 +23,7 @@ const COMMANDS = new Set([
23
23
  'autopilot', 'plan',
24
24
  'provider', 'providers', 'model', 'models', 'ecc', 'page-agent', 'pageagent',
25
25
  'resources', 'htmlfx', 'memory-vault', 'doctor', 'context', 'scorecard',
26
+ 'tui',
26
27
  ]);
27
28
 
28
29
  function isInteractiveRequest(args) {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "winter-super-cli",
3
- "version": "2026.6.5",
3
+ "version": "2026.6.7",
4
4
  "description": "❄️ AI-Powered Development CLI with Interactive REPL",
5
5
  "type": "module",
6
6
  "main": "bin/winter.js",
@@ -57,7 +57,7 @@
57
57
  "diff": "^9.0.0"
58
58
  },
59
59
  "optionalDependencies": {
60
+ "@colbymchenry/codegraph": "^0.7.12",
60
61
  "puppeteer": "^24.43.1"
61
- },
62
- "devDependencies": {}
62
+ }
63
63
  }
@@ -1,6 +1,6 @@
1
1
  import { Spinner } from '../cli/spinner.js';
2
2
  import { colors } from '../cli/snowflake-logo.js';
3
- import { renderBox, terminalWidth, wrapText } from '../cli/terminal-ui.js';
3
+ import { renderToolPanel } from '../cli/tui.js';
4
4
  import { getMutatingToolNames, recordToolCallAdapterStats } from '../cli/tool-runtime.js';
5
5
  import { buildSmallModelAmplification } from '../ai/small-model-amplifier.js';
6
6
 
@@ -38,6 +38,10 @@ export class AgentRuntime {
38
38
  depth,
39
39
  });
40
40
  const maxToolTurns = amplifier.maxToolTurns || 8;
41
+ // Keep self-critique as prompt discipline only. A second runtime model turn
42
+ // duplicates the final answer because the first answer is already rendered.
43
+ amplifier.enforceSelfCritique = false;
44
+ let forceTextToolFallback = false;
41
45
 
42
46
  try {
43
47
  for (let i = 0; i < maxToolTurns; i++) {
@@ -46,6 +50,7 @@ export class AgentRuntime {
46
50
  provider: executionProfile.provider,
47
51
  model: executionProfile.model,
48
52
  enableTools: true,
53
+ toolPromptOnly: forceTextToolFallback,
49
54
  requireToolEvidence: requireToolEvidence && !usedTools,
50
55
  }, startedAt, totalUsage);
51
56
 
@@ -73,6 +78,7 @@ export class AgentRuntime {
73
78
  role: 'user',
74
79
  content: repl.buildToolEvidenceCorrection(messages),
75
80
  });
81
+ forceTextToolFallback = true;
76
82
  finalContent = '';
77
83
  continue;
78
84
  }
@@ -113,7 +119,6 @@ export class AgentRuntime {
113
119
  }
114
120
  }
115
121
 
116
- const BOX_WIDTH = terminalWidth(76, 116, 92);
117
122
  messages.push({
118
123
  role: 'assistant',
119
124
  content: assistantMsg.content || '',
@@ -186,20 +191,12 @@ export class AgentRuntime {
186
191
  const summary = repl.formatToolResultForConsole(canonicalToolName, result);
187
192
  if (summary) {
188
193
  toolSummaries.push(`${canonicalToolName}: ${summary}`);
189
- const statusIcon = result.success === false
190
- ? `${colors.red}${repl.useUnicodeUi ? '✖' : 'x'}${colors.reset}`
191
- : `${colors.green}${repl.useUnicodeUi ? '✓' : 'ok'}${colors.reset}`;
192
- const toolLine = `${icon} ${colors.cyan}${colors.bright}${toolName}${colors.reset}`;
193
- const summaryLines = summary.split('\n').flatMap(line => wrapText(line, BOX_WIDTH - 8));
194
- console.log(renderBox({
195
- title: 'AGENT TOOLS EXECUTION',
196
- width: BOX_WIDTH,
197
- borderColor: colors.magenta,
198
- titleColor: colors.bright,
199
- body: [
200
- toolLine,
201
- ...summaryLines.map((line, index) => index === 0 ? `${statusIcon} ${colors.dim}${line}${colors.reset}` : `${colors.dim}${line}${colors.reset}`),
202
- ],
194
+ console.log(renderToolPanel({
195
+ toolName: `${icon} ${toolName}`,
196
+ summary,
197
+ success: result.success !== false,
198
+ colors,
199
+ title: 'Agent Tools',
203
200
  }));
204
201
  }
205
202
  }
@@ -32,6 +32,7 @@ export function classifyModelTier(modelName, provider = '') {
32
32
  /claude-3-5-sonnet/i, /claude-opus/i, /claude-4/i, /claude-sonnet-4/i,
33
33
  /gpt-4o/i, /gpt-4-turbo/i, /o1/i, /o3/i,
34
34
  /gemini-2\.5-pro/i, /gemini-2\.0-ultra/i,
35
+ /minimax-?m2\.5/i, /minimax.*m2\.5/i, /minimax/i,
35
36
  /deepseek-v3/i, /deepseek-r1/i,
36
37
  /llama-4/i, /llama-3-70b/i, /llama3-70b/i, /llama3\.1-70b/i, /llama3\.2-90b/i, /llama3\.3/i,
37
38
  /qwen2\.5-?72b/i, /qwen2\.5-?70b/i, /qwen-?2\.5-?72b/i,
@@ -123,7 +124,7 @@ export function classifyModelTier(modelName, provider = '') {
123
124
  if (pattern.test(name)) return MODEL_TIERS.SMALL;
124
125
  }
125
126
 
126
- if (/tiny/i.test(name) || /mini/i.test(name) || /small/i.test(name) || /nano/i.test(name)) {
127
+ if (/\btiny\b/i.test(name) || /(?:^|[-_:/])mini(?:$|[-_:/])/i.test(name) || /\bsmall\b/i.test(name) || /\bnano\b/i.test(name)) {
127
128
  return MODEL_TIERS.TINY;
128
129
  }
129
130
 
@@ -170,6 +171,21 @@ export function getReasoningBump(tier) {
170
171
  }
171
172
  }
172
173
 
174
+ /**
175
+ * Get a budget multiplier for prompt/context sizing.
176
+ * Bigger models can safely absorb more context and larger tool outputs.
177
+ */
178
+ export function getModelBudgetMultiplier(tier) {
179
+ switch (tier) {
180
+ case MODEL_TIERS.TINY: return 0.5;
181
+ case MODEL_TIERS.SMALL: return 0.75;
182
+ case MODEL_TIERS.MEDIUM: return 1;
183
+ case MODEL_TIERS.LARGE: return 2;
184
+ case MODEL_TIERS.FLAGSHIP: return 4;
185
+ default: return 1;
186
+ }
187
+ }
188
+
173
189
  /**
174
190
  * Build a short string describing model capability for system prompt injection.
175
191
  */
@@ -1,11 +1,11 @@
1
1
  /**
2
2
  * Dynamic System Prompt Builder
3
3
  * Builds context-aware system prompts based on task, role, and session state.
4
- * Small models get compact structural guidance so the task stays in focus.
4
+ * Winter always gives every model the strongest available agent instructions.
5
5
  */
6
6
 
7
- import { isSmallModel, getModelCapabilityLabel } from '../model-capabilities.js';
8
7
  import { formatRuntimeEnvironmentSummary, getRuntimeEnvironment } from '../../cli/runtime-env.js';
8
+ import { getModelBudgetMultiplier } from '../model-capabilities.js';
9
9
 
10
10
  const BASE_PRINCIPLES = [
11
11
  'Execute, don\'t describe - Do the work, don\'t write plans about doing the work',
@@ -35,11 +35,22 @@ function buildEnvironmentSummary() {
35
35
  ].join('\n');
36
36
  }
37
37
 
38
+ function getPromptBudgets(modelTier = '') {
39
+ const scale = getModelBudgetMultiplier(modelTier);
40
+ const compactSystemPrompt = scale <= 0.75;
41
+
42
+ return {
43
+ compactSystemPrompt,
44
+ projectContextBudget: Math.round(3200 * scale),
45
+ resourceContextBudget: Math.round(1200 * scale),
46
+ };
47
+ }
48
+
38
49
  function formatToolList(tools = []) {
39
50
  return tools.length > 0 ? tools.slice(0, 10).join(', ') : '';
40
51
  }
41
52
 
42
- function appendSharedContext(parts, { environment, session, design, resourceContext, context, includeResources = false } = {}) {
53
+ function appendSharedContext(parts, { environment, session, design, resourceContext, context, includeResources = false, resourceContextBudget = 1200 } = {}) {
43
54
  parts.push('## Runtime Environment', environment || buildEnvironmentSummary(), '');
44
55
 
45
56
  if (session?.memory?.length) {
@@ -66,7 +77,7 @@ function appendSharedContext(parts, { environment, session, design, resourceCont
66
77
  }
67
78
 
68
79
  if (includeResources && resourceContext) {
69
- parts.push(resourceContext.trim().slice(0, 1200), '');
80
+ parts.push(resourceContext.trim().slice(0, resourceContextBudget), '');
70
81
  }
71
82
 
72
83
  if (context && typeof context === 'object') {
@@ -74,37 +85,11 @@ function appendSharedContext(parts, { environment, session, design, resourceCont
74
85
  }
75
86
  }
76
87
 
77
- function buildCompactSmallModelPrompt(options = {}) {
78
- const { tools = [], modelTier } = options;
79
- const parts = [
80
- 'You are Winter, an AI coding assistant running on a ' + getModelCapabilityLabel(modelTier) + '.',
81
- '',
82
- '## Operating Rules',
83
- '1. Understand the user request first. If project state matters, inspect files before answering.',
84
- '2. Operate as an agent: inspect -> hypothesize -> act -> verify -> final.',
85
- '3. Keep context tight. Use only relevant tools and avoid long explanations.',
86
- '4. For coding/debug: Read/Grep/Glob/logs -> Edit/Write -> Bash/test/browser smoke. Do not guess file paths.',
87
- '5. For UI/design: inspect existing components/styles/resources before changing visuals.',
88
- '6. Final answer in Vietnamese. Mention changed files and verification only.',
89
- '',
90
- ];
91
-
92
- const toolList = formatToolList(tools);
93
- if (toolList) parts.push('## Tools', toolList, '');
94
- appendSharedContext(parts, { ...options, includeResources: false });
95
-
96
- parts.push(
97
- '## Response Shape',
98
- '- If action is needed, use tools instead of describing the action.',
99
- '- If an image is provided, analyze the image directly and tie findings to project files when relevant.',
100
- '- Keep final output short and concrete.',
101
- );
102
-
103
- return parts.filter(Boolean).join('\n');
104
- }
105
-
106
88
  function buildStandardSystemPrompt(options = {}) {
107
- const { role = 'coding', tools = [], resourceContext } = options;
89
+ const { role = 'coding', tools = [], resourceContext, modelTier = '' } = options;
90
+ const budgets = getPromptBudgets(modelTier);
91
+ const projectContextBudget = options.projectContextBudget ?? budgets.projectContextBudget;
92
+ const compactSystemPrompt = options.compactSystemPrompt ?? budgets.compactSystemPrompt;
108
93
  const parts = [
109
94
  'You are Winter, an expert AI coding assistant.',
110
95
  '',
@@ -113,6 +98,7 @@ function buildStandardSystemPrompt(options = {}) {
113
98
  '',
114
99
  '## Tool Usage',
115
100
  'Use tools when they materially improve correctness. Inspect before editing. Verify after changes.',
101
+ 'Use maximum reasoning discipline for every model tier, including tiny, local, free, and routed models.',
116
102
  'Never invent file paths, APIs, command output, or test results.',
117
103
  'For debug work, locate the first hard failure, patch the root cause, and verify with the closest test/build/browser smoke.',
118
104
  'For design/UI work, inspect the existing interface and design resources first; avoid generic placeholder layouts.',
@@ -122,7 +108,11 @@ function buildStandardSystemPrompt(options = {}) {
122
108
 
123
109
  const toolList = formatToolList(tools);
124
110
  if (toolList) parts.push('## Tools', toolList, '');
125
- appendSharedContext(parts, { ...options, includeResources: Boolean(resourceContext) && (role === 'design' || role === 'ui') });
111
+ appendSharedContext(parts, {
112
+ ...options,
113
+ includeResources: Boolean(resourceContext) && (role === 'design' || role === 'ui'),
114
+ resourceContextBudget: budgets.resourceContextBudget,
115
+ });
126
116
 
127
117
  parts.push('Always respond in Vietnamese.');
128
118
  return parts.filter(Boolean).join('\n');
@@ -138,10 +128,11 @@ export function buildSystemPrompt({
138
128
  resourceContext,
139
129
  modelTier,
140
130
  } = {}) {
131
+ const budgets = getPromptBudgets(modelTier);
141
132
  const options = { role, context, tools, session, environment, design, resourceContext, modelTier };
142
- return isSmallModel(modelTier)
143
- ? buildCompactSmallModelPrompt(options)
144
- : buildStandardSystemPrompt(options);
133
+ options.projectContextBudget = options.projectContextBudget ?? budgets.projectContextBudget;
134
+ options.compactSystemPrompt = options.compactSystemPrompt ?? budgets.compactSystemPrompt;
135
+ return buildStandardSystemPrompt(options);
145
136
  }
146
137
 
147
138
  export function buildFastSystemPrompt({
@@ -149,18 +140,10 @@ export function buildFastSystemPrompt({
149
140
  tools = [],
150
141
  modelTier,
151
142
  } = {}) {
152
- if (modelTier && isSmallModel(modelTier)) {
153
- return [
154
- 'Winter (fast mode - small model). Be concise. Use tools when needed.',
155
- tools.length > 0 ? `Tools: ${tools.join(', ')}` : '',
156
- 'Use a brief private plan, then answer in 1 sentence.',
157
- ].filter(Boolean).join('\n');
158
- }
159
-
160
143
  return [
161
- 'You are Winter (fast mode). Be concise. Use tools when needed.',
144
+ 'You are Winter (fast mode with maximum correctness). Be concise, but inspect and use tools when needed.',
162
145
  tools.length > 0 ? `Tools: ${tools.join(', ')}` : '',
163
- 'Keep responses brief and focused on the immediate task.',
146
+ 'Use a brief private plan, then execute or answer with concrete evidence.',
164
147
  ].filter(Boolean).join('\n');
165
148
  }
166
149
 
@@ -176,15 +159,13 @@ export function buildAgentSystemPrompt(role, { tools = [], modelTier } = {}) {
176
159
  };
177
160
 
178
161
  const base = roleConfigs[role] || roleConfigs.coding;
179
- const smallNote = modelTier && isSmallModel(modelTier)
180
- ? '\n\nYou are running on a small model. Keep context tight, use tools early, and keep final output short.'
181
- : '';
162
+ const strengthNote = '\n\nWinter Strength Mode: use the full agent loop, inspect real code, reason carefully, verify results, and avoid unsupported claims regardless of base model size.';
182
163
 
183
164
  return [
184
165
  `You are Winter (${role} agent).`,
185
166
  base,
186
167
  tools.length > 0 ? `\nTools: ${tools.join(', ')}` : '',
187
- smallNote,
168
+ strengthNote,
188
169
  '\nCRITICAL: Output only the requested format. No extra commentary.',
189
170
  ].filter(Boolean).join('\n');
190
171
  }
@@ -25,11 +25,64 @@ const RESERVED_CONFIG_SECTIONS = new Set([
25
25
  'ui',
26
26
  ]);
27
27
 
28
+ const DEFAULT_REQUEST_TIMEOUT_MS = 120000;
29
+
28
30
  function isAuthError(error) {
29
31
  const msg = String(error?.message || error || '');
30
32
  return /\b(401|403)\b/.test(msg) || /authentication_error|invalid_api_key|unauthorized|auth\s*failed/i.test(msg);
31
33
  }
32
34
 
35
+ function isRateLimitError(error) {
36
+ const msg = String(error?.message || error || '');
37
+ return error?.status === 429 || /\b429\b|rate[_ -]?limit|tokens per minute|\bTPM\b/i.test(msg);
38
+ }
39
+
40
+ function getRequestTimeoutMs(options = {}) {
41
+ const raw = options.timeoutMs ?? process.env.WINTER_REQUEST_TIMEOUT_MS;
42
+ const value = Number(raw);
43
+ if (Number.isFinite(value) && value > 0) return value;
44
+ return DEFAULT_REQUEST_TIMEOUT_MS;
45
+ }
46
+
47
+ function createTimeoutSignal(timeoutMs, externalSignal = null) {
48
+ const controller = new AbortController();
49
+ let timedOut = false;
50
+ const onAbort = () => {
51
+ controller.abort(externalSignal?.reason || new DOMException('The operation was aborted.', 'AbortError'));
52
+ };
53
+ if (externalSignal?.aborted) {
54
+ onAbort();
55
+ } else if (externalSignal) {
56
+ externalSignal.addEventListener('abort', onAbort, { once: true });
57
+ }
58
+ const timer = setTimeout(() => {
59
+ timedOut = true;
60
+ controller.abort(new Error(`Winter request timed out after ${timeoutMs}ms`));
61
+ }, timeoutMs);
62
+ if (typeof timer.unref === 'function') timer.unref();
63
+ return {
64
+ signal: controller.signal,
65
+ timedOut: () => timedOut,
66
+ cleanup: () => {
67
+ clearTimeout(timer);
68
+ if (externalSignal) externalSignal.removeEventListener('abort', onAbort);
69
+ },
70
+ };
71
+ }
72
+
73
+ function normalizeFetchError(error, provider, timeoutMs, stream = false, timedOut = false) {
74
+ if (timedOut || /timed out/i.test(String(error?.message || ''))) {
75
+ const label = stream ? 'stream' : 'request';
76
+ return new Error(`${provider?.name || 'Provider'} ${label} timed out after ${Math.ceil(timeoutMs / 1000)}s`);
77
+ }
78
+ if (error?.name === 'AbortError' || /abort/i.test(String(error?.message || ''))) {
79
+ const abortError = new Error('AbortError');
80
+ abortError.name = 'AbortError';
81
+ return abortError;
82
+ }
83
+ return error;
84
+ }
85
+
33
86
  export class AIProviderManager {
34
87
  constructor(config) {
35
88
  this.config = config;
@@ -293,6 +346,44 @@ export class AIProviderManager {
293
346
  this.tools = tools;
294
347
  }
295
348
 
349
+ normalizeToolDefinitionsForApi(tools = []) {
350
+ if (!Array.isArray(tools)) return [];
351
+
352
+ return tools
353
+ .map(tool => {
354
+ if (!tool || typeof tool !== 'object') return null;
355
+
356
+ if (tool.type === 'function' && tool.function && typeof tool.function === 'object') {
357
+ return tool;
358
+ }
359
+
360
+ if (tool.name && tool.parameters) {
361
+ return {
362
+ type: 'function',
363
+ function: {
364
+ name: tool.name,
365
+ description: tool.description || '',
366
+ parameters: tool.parameters,
367
+ },
368
+ };
369
+ }
370
+
371
+ if (tool.function?.name) {
372
+ return {
373
+ type: 'function',
374
+ function: {
375
+ name: tool.function.name,
376
+ description: tool.function.description || tool.description || '',
377
+ parameters: tool.function.parameters || tool.parameters || { type: 'object', properties: {} },
378
+ },
379
+ };
380
+ }
381
+
382
+ return null;
383
+ })
384
+ .filter(Boolean);
385
+ }
386
+
296
387
  async chat(message, options = {}) {
297
388
  await this.init();
298
389
  const messages = [
@@ -327,7 +418,7 @@ export class AIProviderManager {
327
418
  model: routingModel,
328
419
  reasoning: routingReasoning,
329
420
  reasoningLevel: options.reasoningLevel || executionProfile.reasoningLevel,
330
- }), { maxAttempts: 3, baseDelayMs: 150 });
421
+ }), { maxAttempts: 3, baseDelayMs: 150, retryable: error => !isRateLimitError(error) && !/\b(400|404)\b/.test(String(error?.message || error || '')) });
331
422
  } catch (error) {
332
423
  if (isAuthError(error) && routedProvider !== defaultProvider && defaultProvider) {
333
424
  if (!this._fallbackWarned) {
@@ -339,7 +430,7 @@ export class AIProviderManager {
339
430
  model: options.model || defaultProvider.model,
340
431
  reasoning: routingReasoning,
341
432
  reasoningLevel: options.reasoningLevel || executionProfile.reasoningLevel,
342
- }), { maxAttempts: 1, baseDelayMs: 0 });
433
+ }), { maxAttempts: 1, baseDelayMs: 0, retryable: error => !isRateLimitError(error) && !/\b(400|404)\b/.test(String(error?.message || error || '')) });
343
434
  }
344
435
  throw error;
345
436
  }
@@ -388,6 +479,7 @@ export class AIProviderManager {
388
479
  if (!provider) {
389
480
  throw new Error('No active provider is configured');
390
481
  }
482
+ const timeoutMs = getRequestTimeoutMs(options);
391
483
 
392
484
  const body = {
393
485
  model: options.model || provider.model,
@@ -405,8 +497,9 @@ export class AIProviderManager {
405
497
  }
406
498
  }
407
499
 
408
- if (this.tools.length > 0 && options.enableTools) {
409
- body.tools = this.tools;
500
+ if (this.tools.length > 0 && options.enableTools && !options.toolPromptOnly) {
501
+ const tools = this.normalizeToolDefinitionsForApi(this.tools);
502
+ if (tools.length > 0) body.tools = tools;
410
503
  }
411
504
 
412
505
  const headers = {
@@ -420,15 +513,26 @@ export class AIProviderManager {
420
513
  headers['Authorization'] = `Bearer ${provider.apiKey}`;
421
514
  }
422
515
 
423
- const response = await fetch(`${provider.baseURL}/chat/completions`, {
424
- method: 'POST',
425
- headers,
426
- body: JSON.stringify(body),
427
- });
516
+ const timeout = createTimeoutSignal(timeoutMs, options.signal || options.abortSignal);
517
+ let response;
518
+ try {
519
+ response = await fetch(`${provider.baseURL}/chat/completions`, {
520
+ method: 'POST',
521
+ headers,
522
+ body: JSON.stringify(body),
523
+ signal: timeout.signal,
524
+ });
525
+ } catch (error) {
526
+ throw normalizeFetchError(error, provider, timeoutMs, false, timeout.timedOut());
527
+ } finally {
528
+ timeout.cleanup();
529
+ }
428
530
 
429
531
  if (!response.ok) {
430
532
  const error = await response.text();
431
- throw new Error(`${provider.name} error (${response.status}): ${error}`);
533
+ const requestError = new Error(`${provider.name} error (${response.status}): ${error}`);
534
+ requestError.status = response.status;
535
+ throw requestError;
432
536
  }
433
537
 
434
538
  return await response.json();
@@ -438,6 +542,7 @@ export class AIProviderManager {
438
542
  if (!provider) {
439
543
  throw new Error('No active provider is configured');
440
544
  }
545
+ const timeoutMs = getRequestTimeoutMs(options);
441
546
 
442
547
  const body = {
443
548
  model: options.model || provider.model,
@@ -460,8 +565,9 @@ export class AIProviderManager {
460
565
  }
461
566
  }
462
567
 
463
- if (this.tools.length > 0 && options.enableTools) {
464
- body.tools = this.tools;
568
+ if (this.tools.length > 0 && options.enableTools && !options.toolPromptOnly) {
569
+ const tools = this.normalizeToolDefinitionsForApi(this.tools);
570
+ if (tools.length > 0) body.tools = tools;
465
571
  }
466
572
 
467
573
  const headers = {
@@ -475,67 +581,78 @@ export class AIProviderManager {
475
581
  headers['Authorization'] = `Bearer ${provider.apiKey}`;
476
582
  }
477
583
 
478
- const response = await fetch(`${provider.baseURL}/chat/completions`, {
479
- method: 'POST',
480
- headers,
481
- body: JSON.stringify(body),
482
- });
483
-
484
- if (!response.ok) {
485
- const error = await response.text();
486
- throw new Error(`${provider.name} stream error (${response.status}): ${error}`);
487
- }
584
+ const timeout = createTimeoutSignal(timeoutMs, options.signal || options.abortSignal);
585
+ let response;
586
+ try {
587
+ response = await fetch(`${provider.baseURL}/chat/completions`, {
588
+ method: 'POST',
589
+ headers,
590
+ body: JSON.stringify(body),
591
+ signal: timeout.signal,
592
+ });
488
593
 
489
- if (!response.body) {
490
- throw new Error(`${provider.name} did not return a stream body`);
491
- }
594
+ if (!response.ok) {
595
+ const error = await response.text();
596
+ const streamError = new Error(`${provider.name} stream error (${response.status}): ${error}`);
597
+ streamError.status = response.status;
598
+ throw streamError;
599
+ }
492
600
 
493
- const decoder = new TextDecoder();
494
- let buffer = '';
601
+ if (!response.body) {
602
+ throw new Error(`${provider.name} did not return a stream body`);
603
+ }
495
604
 
496
- for await (const chunk of response.body) {
497
- buffer += decoder.decode(chunk, { stream: true });
498
- const lines = buffer.split(/\r?\n/);
499
- buffer = lines.pop() || '';
605
+ const decoder = new TextDecoder();
606
+ let buffer = '';
500
607
 
501
- for (const line of lines) {
502
- const trimmed = line.trim();
503
- if (!trimmed || !trimmed.startsWith('data:')) continue;
608
+ for await (const chunk of response.body) {
609
+ buffer += decoder.decode(chunk, { stream: true });
610
+ const lines = buffer.split(/\r?\n/);
611
+ buffer = lines.pop() || '';
504
612
 
505
- const payload = trimmed.slice(5).trim();
506
- if (!payload || payload === '[DONE]') continue;
613
+ for (const line of lines) {
614
+ const trimmed = line.trim();
615
+ if (!trimmed || !trimmed.startsWith('data:')) continue;
507
616
 
508
- let data;
509
- try {
510
- data = JSON.parse(payload);
511
- } catch {
512
- continue;
513
- }
617
+ const payload = trimmed.slice(5).trim();
618
+ if (!payload || payload === '[DONE]') continue;
514
619
 
515
- const choice = data.choices?.[0] || {};
516
- const content = choice.delta?.content ?? choice.message?.content ?? choice.text ?? '';
517
- yield {
518
- content,
519
- usage: data.usage,
520
- raw: data,
521
- };
522
- }
523
- }
620
+ let data;
621
+ try {
622
+ data = JSON.parse(payload);
623
+ } catch {
624
+ continue;
625
+ }
524
626
 
525
- const tail = buffer.trim();
526
- if (tail.startsWith('data:')) {
527
- const payload = tail.slice(5).trim();
528
- if (payload && payload !== '[DONE]') {
529
- try {
530
- const data = JSON.parse(payload);
531
627
  const choice = data.choices?.[0] || {};
628
+ const content = choice.delta?.content ?? choice.message?.content ?? choice.text ?? '';
532
629
  yield {
533
- content: choice.delta?.content ?? choice.message?.content ?? choice.text ?? '',
630
+ content,
534
631
  usage: data.usage,
535
632
  raw: data,
536
633
  };
537
- } catch {}
634
+ }
538
635
  }
636
+
637
+ const tail = buffer.trim();
638
+ if (tail.startsWith('data:')) {
639
+ const payload = tail.slice(5).trim();
640
+ if (payload && payload !== '[DONE]') {
641
+ try {
642
+ const data = JSON.parse(payload);
643
+ const choice = data.choices?.[0] || {};
644
+ yield {
645
+ content: choice.delta?.content ?? choice.message?.content ?? choice.text ?? '',
646
+ usage: data.usage,
647
+ raw: data,
648
+ };
649
+ } catch {}
650
+ }
651
+ }
652
+ } catch (error) {
653
+ throw normalizeFetchError(error, provider, timeoutMs, true, timeout.timedOut());
654
+ } finally {
655
+ timeout.cleanup();
539
656
  }
540
657
  }
541
658
 
@@ -586,7 +703,7 @@ export class AIProviderManager {
586
703
  const body = {
587
704
  model: options.model || provider.model,
588
705
  messages: currentMessages,
589
- tools: this.tools.length > 0 ? this.tools : undefined,
706
+ tools: this.tools.length > 0 ? this.normalizeToolDefinitionsForApi(this.tools) : undefined,
590
707
  };
591
708
 
592
709
  const headers = {
@@ -681,13 +798,13 @@ export class AIProviderManager {
681
798
  let reasoningPrompt = '';
682
799
  if (options.reasoningLevel || options.reasoningPrompt) {
683
800
  reasoningPrompt = options.reasoningPrompt || new ReasoningConfig({
684
- level: options.reasoningLevel || REASONING_LEVELS.MEDIUM,
801
+ level: options.reasoningLevel || REASONING_LEVELS.MAX,
685
802
  provider: this.activeProvider,
686
803
  modelTier: this._modelTier,
687
804
  }).getPromptInstructions();
688
805
  } else if (taskInfo) {
689
806
  // Auto-inject based on task complexity for providers without API reasoning
690
- const level = complexityToReasoningLevel(taskInfo.type);
807
+ const level = REASONING_LEVELS.MAX;
691
808
  const config = new ReasoningConfig({
692
809
  level,
693
810
  provider: this.activeProvider,