@denizokcu/haze 0.0.2 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. package/CHANGELOG.md +19 -0
  2. package/README.md +100 -34
  3. package/dist/cli/commands/chat.d.ts +3 -1
  4. package/dist/cli/commands/chat.js +500 -56
  5. package/dist/cli/commands/commands.d.ts +5 -0
  6. package/dist/cli/commands/commands.js +114 -29
  7. package/dist/cli/commands/formatters.js +32 -2
  8. package/dist/cli/commands/streaming.d.ts +6 -1
  9. package/dist/cli/commands/streaming.js +316 -98
  10. package/dist/cli/index.js +5 -2
  11. package/dist/config/inputHistory.js +8 -0
  12. package/dist/config/providers.d.ts +26 -0
  13. package/dist/config/providers.js +88 -0
  14. package/dist/config/settings.d.ts +9 -2
  15. package/dist/core/agent/compaction.d.ts +13 -0
  16. package/dist/core/agent/compaction.js +34 -0
  17. package/dist/core/agent/errors.d.ts +3 -0
  18. package/dist/core/agent/errors.js +13 -0
  19. package/dist/core/agent/events.d.ts +58 -0
  20. package/dist/core/agent/events.js +3 -0
  21. package/dist/core/goal/completionPolicy.d.ts +28 -0
  22. package/dist/core/goal/completionPolicy.js +84 -0
  23. package/dist/core/goal/requestClassifier.d.ts +6 -0
  24. package/dist/core/goal/requestClassifier.js +31 -0
  25. package/dist/core/goal/sessionGoal.d.ts +30 -0
  26. package/dist/core/goal/sessionGoal.js +88 -0
  27. package/dist/core/session/sessionStore.d.ts +37 -0
  28. package/dist/core/session/sessionStore.js +59 -0
  29. package/dist/core/subagent/subagentRunner.d.ts +33 -0
  30. package/dist/core/subagent/subagentRunner.js +140 -0
  31. package/dist/llm/client.d.ts +1 -1
  32. package/dist/llm/client.js +6 -6
  33. package/dist/llm/hazeTools.d.ts +86 -0
  34. package/dist/llm/hazeTools.js +313 -93
  35. package/dist/llm/initPrompt.js +6 -4
  36. package/dist/llm/systemPrompt.js +11 -7
  37. package/dist/skills/builder/SkillBuilder.d.ts +6 -0
  38. package/dist/skills/builder/SkillBuilder.js +146 -24
  39. package/dist/ui/components/ErrorView.d.ts +2 -1
  40. package/dist/ui/components/Header.d.ts +2 -1
  41. package/dist/ui/components/Header.js +1 -11
  42. package/dist/ui/components/MarkdownText.d.ts +2 -1
  43. package/dist/ui/components/TextInput.d.ts +7 -3
  44. package/dist/ui/components/TextInput.js +112 -27
  45. package/dist/ui/theme.d.ts +3 -0
  46. package/dist/ui/theme.js +4 -1
  47. package/package.json +8 -8
@@ -5,6 +5,12 @@ import { buildSystemPrompt } from '../../llm/systemPrompt.js';
5
5
  import { loadSkillRegistry } from '../../skills/SkillRegistry.js';
6
6
  import { buildSkillTools } from '../../skills/skillTools.js';
7
7
  import { compact, toolCallSummary, toolResultSummary, formatSeconds } from './formatters.js';
8
+ import { isActionRequest, isPlanImplementationRequest, isPlanOnlyRequest, isValidationRequest } from '../../core/goal/requestClassifier.js';
9
+ import { completionDecision, looksIncomplete, noTextAfterToolPrompt, postContinuationPrompt, toolLoopBudgetPrompt } from '../../core/goal/completionPolicy.js';
10
+ import { createSessionGoal, formatGoalStatus, observeGoalToolEvent } from '../../core/goal/sessionGoal.js';
11
+ import { agentEvent } from '../../core/agent/events.js';
12
+ import { isContextOverflowError, isRetryableModelError } from '../../core/agent/errors.js';
13
+ import { createSubagentTool } from '../../core/subagent/subagentRunner.js';
8
14
  function stableToolKey(toolCall) {
9
15
  return `${toolCall.toolName}:${JSON.stringify(toolCall.input)}`;
10
16
  }
@@ -28,31 +34,30 @@ function toolOnlyStepCount(steps) {
28
34
  }
29
35
  return count;
30
36
  }
31
- function isPlanOnlyRequest(value) {
32
- return /\b(create|make|write|draft|outline)\s+(?:a\s+)?plan\b|\bplan\s+(?:for|to)\b/i.test(value) && !/\bimplement|execute|do\b/i.test(value);
33
- }
34
- function isLikelyActionRequest(value) {
35
- if (isPlanOnlyRequest(value))
36
- return false;
37
- return /\b(add|create|write|implement|update|fix|change|support|wire|test|tests|document|docs|documentation|run|verify)\b/i.test(value);
38
- }
39
- function isValidationRequest(value) {
40
- if (isPlanOnlyRequest(value))
41
- return false;
42
- return /\b(run|verify|test|tests|check|validate)\b/i.test(value);
43
- }
44
- function isPlanImplementationRequest(value) {
45
- return /\b(implement|execute|do)\b.*\bplan\b|\bplan\.md\b|\btest_plan\.md\b/i.test(value);
46
- }
47
- function looksIncomplete(text) {
48
- return /\b(incomplete|what remains|remains:|next:|not implemented|not created|no tests exist|created no docs|has not been|have not been|not yet|never executed|not executed|not run|cannot retry|cannot write|cannot validate|tool budget reached)\b/i.test(text);
49
- }
50
37
  function sanitizeAssistantText(text) {
51
38
  return [...text].filter(char => {
52
39
  const code = char.charCodeAt(0);
53
40
  return !(code <= 8 || code === 11 || code === 12 || (code >= 14 && code <= 31) || code === 127 || code === 155);
54
41
  }).join('');
55
42
  }
43
+ function hideSyntheticToolCallMarkup(text) {
44
+ return text
45
+ .replace(/(^|\n)\s*(?:```(?:xml)?\s*)?(?:xml\s*)?<tool_call>[\s\S]*?<\/tool_call>\s*(?:```)?/gi, '$1')
46
+ .replace(/(^|\n)\s*(?:```(?:xml)?\s*)?(?:xml\s*)?<tool_call>[\s\S]*$/i, '$1');
47
+ }
48
+ function isNonSubstantiveAssistantText(text) {
49
+ return /^[`\s]*$/.test(text);
50
+ }
51
+ function assistantDisplayText(text) {
52
+ return hideSyntheticToolCallMarkup(text).trim();
53
+ }
54
+ function normalizeAssistantText(text) {
55
+ return assistantDisplayText(text)
56
+ .replace(/[`*_~#>\-–—:;,.!?()[\]{}"']/g, '')
57
+ .replace(/\s+/g, ' ')
58
+ .trim()
59
+ .toLowerCase();
60
+ }
56
61
  function toolInputPath(input) {
57
62
  return typeof input === 'object' && input != null && 'path' in input && typeof input.path === 'string'
58
63
  ? input.path
@@ -61,40 +66,77 @@ function toolInputPath(input) {
61
66
  function isDuplicateSkippedOutput(output) {
62
67
  return typeof output === 'object' && output != null && 'duplicateSkipped' in output && output.duplicateSkipped === true;
63
68
  }
64
- export async function runAgentTurn(value, displayValue, contextFiles, callbacks) {
69
+ function retryDelayMs(attempt) {
70
+ return Math.min(4000, 1000 * 2 ** attempt);
71
+ }
72
+ async function abortableDelay(milliseconds, signal) {
73
+ if (signal.aborted)
74
+ return;
75
+ await new Promise(resolve => {
76
+ const timer = setTimeout(resolve, milliseconds);
77
+ signal.addEventListener('abort', () => {
78
+ clearTimeout(timer);
79
+ resolve();
80
+ }, { once: true });
81
+ });
82
+ }
83
+ const DEFAULT_MAX_OUTPUT_TOKENS = 16384;
84
+ const IDLE_TIMEOUT_MS = 5 * 60_000;
85
+ const MAIN_STEP_LIMIT = 40;
86
+ const MAIN_TOOL_CALL_LIMIT = 40;
87
+ const MAIN_TOOL_ONLY_STEP_LIMIT = 12;
88
+ const FOLLOW_UP_STEP_LIMIT = 30;
89
+ const FOLLOW_UP_TOOL_CALL_LIMIT = 30;
90
+ const FOLLOW_UP_TOOL_ONLY_STEP_LIMIT = 10;
91
+ const COMPLETION_CONTINUATION_LIMIT = 30;
92
+ function toolOutputOk(output, success) {
93
+ if (!success)
94
+ return false;
95
+ return !(typeof output === 'object' && output != null && 'ok' in output && output.ok === false);
96
+ }
97
+ export async function runAgentTurn(value, displayValue, contextFiles, callbacks, retryAttempt = 0, retryingExistingRequest = false, contextOverflowRecovered = false) {
65
98
  const displayVal = displayValue ?? value;
66
99
  const userMessage = { role: 'user', text: displayVal };
100
+ callbacks.onEvent?.(agentEvent({ type: 'turn_start', request: value }));
67
101
  callbacks.setBusy(true);
68
- callbacks.addMessage(userMessage);
102
+ if (!retryingExistingRequest)
103
+ callbacks.addMessage(userMessage);
69
104
  const abortController = new AbortController();
70
105
  callbacks.setAbortController?.(abortController);
106
+ let turnStatus = 'failed';
71
107
  let idleTimer;
72
108
  const resetIdleTimer = () => {
73
109
  if (idleTimer)
74
110
  clearTimeout(idleTimer);
75
- idleTimer = setTimeout(() => abortController.abort('Haze turn timed out after no model/tool activity.'), 90_000);
111
+ idleTimer = setTimeout(() => abortController.abort('Haze turn timed out after no model/tool activity.'), IDLE_TIMEOUT_MS);
76
112
  };
77
113
  try {
78
114
  const m = await model();
79
115
  if (!m) {
80
- callbacks.addMessage({ role: 'assistant', text: 'No API key configured. Run /login, then /model x-ai/grok-build-0.1. Haze cannot hallucinate without credentials. Progress.' });
116
+ callbacks.addMessage({ role: 'assistant', text: 'No model provider configured. Run /provider to choose or add a provider. Haze cannot hallucinate without a model. Progress.' });
81
117
  return;
82
118
  }
83
119
  const activeModel = m;
84
120
  const skillRegistry = await loadSkillRegistry();
85
- const availableTools = { ...hazeTools, ...buildSkillTools(skillRegistry) };
121
+ const subagentTool = createSubagentTool({ model: activeModel, contextFiles });
122
+ const availableTools = { ...hazeTools, subagent: subagentTool, ...buildSkillTools(skillRegistry) };
123
+ const goal = createSessionGoal(value);
124
+ callbacks.setGoalStatus?.(formatGoalStatus(goal));
86
125
  const likelyPlanOnlyRequest = isPlanOnlyRequest(value);
87
126
  const likelyPlanImplementationRequest = isPlanImplementationRequest(value);
88
- const likelyActionRequest = isLikelyActionRequest(value);
127
+ const likelyActionRequest = isActionRequest(value);
89
128
  const likelyValidationRequest = isValidationRequest(value);
90
129
  const planImplementationGuidance = 'When implementing a plan file, first identify the concrete required checklist items and compare them with the current files. Do not edit source or tests when the required behavior is already present. Implement the smallest clearly required phase or required items, skip optional/design-question items unless explicitly requested, add tests rather than exploratory one-off scripts where possible, use file tools (not bash) for any file changes, run validation once after code/test edits, then update plan status with file tools if requested. Do not call unresolved optional scope a blocker.';
91
- const requestMessages = likelyPlanImplementationRequest
92
- ? [...callbacks.getConversation(), { role: 'user', content: value }, { role: 'user', content: planImplementationGuidance }]
93
- : [...callbacks.getConversation(), { role: 'user', content: value }];
130
+ const requestMessages = retryingExistingRequest
131
+ ? callbacks.getConversation()
132
+ : likelyPlanImplementationRequest
133
+ ? [...callbacks.getConversation(), { role: 'user', content: value }, { role: 'user', content: planImplementationGuidance }]
134
+ : [...callbacks.getConversation(), { role: 'user', content: value }];
94
135
  callbacks.setConversation(requestMessages);
95
136
  resetIdleTimer();
96
137
  let currentAssistantId = `assistant-${Date.now()}`;
97
138
  let assistantStarted = false;
139
+ let currentAssistantStarted = false;
98
140
  let currentAssistantText = '';
99
141
  let assistantText = '';
100
142
  let toolEpoch = 0;
@@ -102,22 +144,50 @@ export async function runAgentTurn(value, displayValue, contextFiles, callbacks)
102
144
  let editFileFailed = false;
103
145
  let mutatingToolSucceeded = false;
104
146
  let validationToolSucceeded = false;
147
+ let validationToolFailed = false;
105
148
  let sawReadOnlyTool = false;
106
149
  let sawToolCall = false;
107
150
  let textAfterTool = false;
108
- let forcedContinuationUsed = false;
109
- let secondContinuationUsed = false;
151
+ let completionContinuationCount = 0;
152
+ const maxCompletionContinuations = COMPLETION_CONTINUATION_LIMIT;
110
153
  let editRecoveryPath;
111
154
  let editRecoveryReadSatisfied = false;
112
155
  const toolSummaries = [];
156
+ const visibleAssistantTexts = new Set();
157
+ const previousAssistantText = normalizeAssistantText(callbacks.getLastAssistantText());
158
+ if (previousAssistantText)
159
+ visibleAssistantTexts.add(previousAssistantText);
160
+ const rememberVisibleAssistantText = (text) => {
161
+ const normalized = normalizeAssistantText(text);
162
+ if (!normalized)
163
+ return;
164
+ visibleAssistantTexts.add(normalized);
165
+ callbacks.setLastAssistantText(text);
166
+ };
167
+ const isDuplicateVisibleAssistantText = (text) => {
168
+ const normalized = normalizeAssistantText(text);
169
+ return normalized.length > 0 && visibleAssistantTexts.has(normalized);
170
+ };
171
+ const isPrefixOfVisibleAssistantText = (text) => {
172
+ const normalized = normalizeAssistantText(text);
173
+ return normalized.length > 0 && [...visibleAssistantTexts].some(previous => previous.startsWith(normalized) && previous !== normalized);
174
+ };
113
175
  const toolExecutionContext = { inFlightToolCalls: new Map() };
114
- const toolGroupId = `tools-${Date.now()}-${Math.random().toString(36).slice(2)}`;
176
+ let toolGroupId = `tools-${Date.now()}-${Math.random().toString(36).slice(2)}`;
177
+ const INLINE_DIFF_LINE_LIMIT = 20;
115
178
  const toolDisplayItems = [];
116
179
  let toolGroupStarted = false;
180
+ let toolGroupFinalized = false;
117
181
  function renderToolGroup(streaming) {
118
182
  const visibleItems = toolDisplayItems.filter(item => !item.hidden);
183
+ const running = visibleItems.some(item => item.status === 'running');
184
+ const failures = visibleItems.filter(item => item.status === 'error');
185
+ const changes = visibleItems.filter(item => /^(editFile|replaceLines|writeFile)\b/.test(item.summary));
186
+ const compactItems = !running && visibleItems.length > 12
187
+ ? [...new Map([...failures, ...changes].map(item => [item.id, item])).values()]
188
+ : visibleItems;
119
189
  const grouped = new Map();
120
- for (const item of visibleItems) {
190
+ for (const item of compactItems) {
121
191
  const key = `${item.status}:${item.summary}:${item.result ?? ''}`;
122
192
  const current = grouped.get(key);
123
193
  if (current)
@@ -126,14 +196,33 @@ export async function runAgentTurn(value, displayValue, contextFiles, callbacks)
126
196
  grouped.set(key, { item, count: 1 });
127
197
  }
128
198
  const rows = [...grouped.values()];
129
- const running = visibleItems.some(item => item.status === 'running');
130
- const header = running || streaming ? 'Running tools' : `Tools: ${visibleItems.length} call${visibleItems.length === 1 ? '' : 's'}`;
131
- const lines = rows.map(({ item, count }) => {
199
+ const compactSuffix = !running && visibleItems.length > 12 ? ` · showing ${compactItems.length} important` : '';
200
+ const header = running || streaming
201
+ ? 'Running tools'
202
+ : `${visibleItems.length} call${visibleItems.length === 1 ? '' : 's'} · ${changes.length} change${changes.length === 1 ? '' : 's'} · ${failures.length} failed${compactSuffix}`;
203
+ const lines = [];
204
+ for (const { item, count } of rows) {
132
205
  const icon = item.status === 'running' ? '…' : item.status === 'success' ? '✓' : '✗';
133
206
  const countText = count > 1 ? ` ×${count}` : '';
134
207
  const result = item.status === 'running' ? '' : ` — ${item.result ?? item.status}${item.durationMs == null ? '' : ` in ${formatSeconds(item.durationMs)}`}`;
135
- return ` ${icon} ${item.summary}${countText}${result}`;
136
- });
208
+ lines.push(` ${icon} ${item.summary}${countText}${result}`);
209
+ if (item.diff && item.diff.length > 0 && (item.diffLineCount ?? item.diff.length) <= INLINE_DIFF_LINE_LIMIT) {
210
+ for (const diffLine of item.diff) {
211
+ const lineNumber = diffLine.type === 'add' ? diffLine.newLine : diffLine.oldLine;
212
+ const marker = diffLine.type === 'add' ? '+' : diffLine.type === 'remove' ? '-' : ' ';
213
+ lines.push(` ${String(lineNumber ?? '').padStart(5)} ${marker} ${diffLine.text}`);
214
+ }
215
+ }
216
+ else if ((item.diffLineCount ?? 0) > INLINE_DIFF_LINE_LIMIT) {
217
+ lines.push(` diff hidden (${item.diffLineCount} changed lines; run git diff to inspect)`);
218
+ }
219
+ if (item.subItems && item.subItems.length > 0) {
220
+ for (const sub of item.subItems) {
221
+ const subDuration = sub.durationMs > 1000 ? ` (${formatSeconds(sub.durationMs)})` : '';
222
+ lines.push(` · ${sub.name} — ${sub.summary}${subDuration}`);
223
+ }
224
+ }
225
+ }
137
226
  return [header, ...lines].join('\n');
138
227
  }
139
228
  function updateToolGroup(streaming = true) {
@@ -145,36 +234,74 @@ export async function runAgentTurn(value, displayValue, contextFiles, callbacks)
145
234
  else {
146
235
  callbacks.updateMessage(toolGroupId, { text, streaming });
147
236
  }
237
+ if (!streaming)
238
+ toolGroupFinalized = true;
148
239
  }
149
240
  function recordToolStart(toolCall) {
241
+ if (toolGroupFinalized) {
242
+ toolDisplayItems.length = 0;
243
+ toolGroupId = `tools-${Date.now()}-${Math.random().toString(36).slice(2)}`;
244
+ toolGroupFinalized = false;
245
+ toolGroupStarted = false;
246
+ }
247
+ callbacks.onEvent?.(agentEvent({ type: 'tool_start', id: toolCall.toolCallId, name: toolCall.toolName, input: toolCall.input }));
150
248
  toolDisplayItems.push({ id: toolCall.toolCallId, summary: toolCallSummary(toolCall.toolName, toolCall.input), status: 'running' });
151
249
  updateToolGroup(true);
250
+ const runningSubagents = toolDisplayItems.filter(item => item.status === 'running' && item.summary.startsWith('subagent')).length;
251
+ if (runningSubagents > 0)
252
+ callbacks.setBusyLabel?.(`Running ${runningSubagents} subagent${runningSubagents === 1 ? '' : 's'}`);
152
253
  }
153
254
  function recordToolDisplayFinish(event) {
255
+ callbacks.onEvent?.(agentEvent({ type: 'tool_end', id: event.toolCall.toolCallId, name: event.toolCall.toolName, success: event.success, output: event.output, error: event.error, durationMs: event.durationMs }));
154
256
  const item = toolDisplayItems.find(candidate => candidate.id === event.toolCall.toolCallId);
155
257
  if (!item)
156
258
  return;
157
- item.status = event.success ? 'success' : 'error';
259
+ item.status = toolOutputOk(event.output, event.success) ? 'success' : 'error';
158
260
  item.result = toolResultSummary(event);
159
261
  item.durationMs = event.durationMs;
160
262
  item.hidden = isDuplicateSkippedOutput(event.output);
263
+ if (typeof event.output === 'object' && event.output != null) {
264
+ const output = event.output;
265
+ if (typeof output.diffLineCount === 'number')
266
+ item.diffLineCount = output.diffLineCount;
267
+ if (Array.isArray(output.diff))
268
+ item.diff = output.diff;
269
+ }
270
+ if (event.toolCall.toolName === 'subagent' && typeof event.output === 'object' && event.output != null) {
271
+ const out = event.output;
272
+ if (Array.isArray(out.toolCalls)) {
273
+ item.subItems = out.toolCalls.map(tc => ({
274
+ name: tc.name,
275
+ summary: tc.summary,
276
+ durationMs: tc.durationMs,
277
+ }));
278
+ }
279
+ }
161
280
  updateToolGroup(toolDisplayItems.some(candidate => candidate.status === 'running'));
281
+ const runningSubagents = toolDisplayItems.filter(i => i.status === 'running' && i.summary.startsWith('subagent')).length;
282
+ if (runningSubagents === 0)
283
+ callbacks.setBusyLabel?.('Haze is thinking');
284
+ else
285
+ callbacks.setBusyLabel?.(`Running ${runningSubagents} subagent${runningSubagents === 1 ? '' : 's'}`);
162
286
  }
163
- callbacks.debugLog(`request started with ${requestMessages.length} conversation messages; action=${likelyActionRequest}`);
287
+ callbacks.debugLog(`request started with ${requestMessages.length} conversation messages; intent=${goal.normalizedIntent}; action=${likelyActionRequest}`);
164
288
  function recordToolFinish(event) {
165
289
  const path = toolInputPath(event.toolCall.input);
166
290
  const duplicateSkipped = isDuplicateSkippedOutput(event.output);
167
- if (!event.success && ['editFile', 'replaceLines', 'writeFile'].includes(event.toolCall.toolName)) {
291
+ const ok = toolOutputOk(event.output, event.success);
292
+ observeGoalToolEvent(goal, { ...event.toolCall, success: ok, output: event.output, duplicateSkipped });
293
+ callbacks.setGoalStatus?.(formatGoalStatus(goal));
294
+ if (!ok && ['editFile', 'replaceLines', 'writeFile'].includes(event.toolCall.toolName)) {
168
295
  editFileFailed = true;
169
296
  editRecoveryPath = path;
170
297
  editRecoveryReadSatisfied = false;
171
298
  }
172
- if (event.success && ['listFiles', 'readFile'].includes(event.toolCall.toolName))
299
+ if (ok && ['listFiles', 'readFile'].includes(event.toolCall.toolName))
173
300
  sawReadOnlyTool = true;
174
- if (event.success && event.toolCall.toolName === 'readFile' && path && path === editRecoveryPath && !duplicateSkipped) {
301
+ if (ok && event.toolCall.toolName === 'readFile' && path && path === editRecoveryPath && !duplicateSkipped) {
175
302
  editRecoveryReadSatisfied = true;
176
303
  }
177
- if (event.success && ['editFile', 'replaceLines', 'writeFile'].includes(event.toolCall.toolName)) {
304
+ if (ok && !duplicateSkipped && ['editFile', 'replaceLines', 'writeFile'].includes(event.toolCall.toolName)) {
178
305
  mutatingToolSucceeded = true;
179
306
  if (!path || path === editRecoveryPath) {
180
307
  editRecoveryPath = undefined;
@@ -183,9 +310,10 @@ export async function runAgentTurn(value, displayValue, contextFiles, callbacks)
183
310
  }
184
311
  }
185
312
  if (event.success && event.toolCall.toolName === 'bash') {
186
- const ok = typeof event.output === 'object' && event.output != null && 'ok' in event.output ? Boolean(event.output.ok) : true;
187
313
  if (ok)
188
314
  validationToolSucceeded = true;
315
+ else
316
+ validationToolFailed = true;
189
317
  }
190
318
  }
191
319
  async function streamAssistantResponse(messages, reason, prompt, allowTools = false) {
@@ -194,6 +322,7 @@ export async function runAgentTurn(value, displayValue, contextFiles, callbacks)
194
322
  let responseStarted = false;
195
323
  let responseText = '';
196
324
  let continuationToolCalls = 0;
325
+ let followUpStreamError;
197
326
  const continuationMessages = [
198
327
  ...messages,
199
328
  { role: 'user', content: prompt },
@@ -201,21 +330,22 @@ export async function runAgentTurn(value, displayValue, contextFiles, callbacks)
201
330
  const followUp = streamText({
202
331
  model: activeModel,
203
332
  temperature: 0,
333
+ maxOutputTokens: DEFAULT_MAX_OUTPUT_TOKENS,
204
334
  system: buildSystemPrompt(contextFiles),
205
335
  messages: continuationMessages,
206
336
  tools: availableTools,
207
337
  toolChoice: allowTools ? 'auto' : 'none',
208
- stopWhen: stepCountIs(10),
338
+ stopWhen: stepCountIs(FOLLOW_UP_STEP_LIMIT),
209
339
  abortSignal: abortController.signal,
210
340
  experimental_context: toolExecutionContext,
211
341
  prepareStep({ steps, messages }) {
212
342
  continuationToolCalls = steps.flatMap(step => step.toolCalls).length;
213
- if (continuationToolCalls >= 10 || toolOnlyStepCount(steps) >= 5) {
343
+ if (continuationToolCalls >= FOLLOW_UP_TOOL_CALL_LIMIT || toolOnlyStepCount(steps) >= FOLLOW_UP_TOOL_ONLY_STEP_LIMIT) {
214
344
  return {
215
345
  toolChoice: 'none',
216
346
  messages: [
217
347
  ...messages,
218
- { role: 'user', content: 'Tool budget reached. If the current request is complete, summarize only current-turn changes and validation. If incomplete, state the concrete blocker briefly; do not claim tools are unavailable and do not recap unrelated earlier tasks.' },
348
+ { role: 'user', content: toolLoopBudgetPrompt() },
219
349
  ],
220
350
  };
221
351
  }
@@ -242,6 +372,7 @@ export async function runAgentTurn(value, displayValue, contextFiles, callbacks)
242
372
  return undefined;
243
373
  },
244
374
  onError({ error }) {
375
+ followUpStreamError = error;
245
376
  callbacks.debugLog(`stream error: ${error instanceof Error ? error.message : String(error)}`);
246
377
  },
247
378
  onFinish(event) {
@@ -271,30 +402,50 @@ export async function runAgentTurn(value, displayValue, contextFiles, callbacks)
271
402
  resetIdleTimer();
272
403
  const delta = sanitizeAssistantText(rawDelta);
273
404
  responseText += delta;
405
+ const displayText = assistantDisplayText(responseText);
406
+ if ((!displayText || isNonSubstantiveAssistantText(displayText) || isPrefixOfVisibleAssistantText(displayText)) && !responseStarted)
407
+ continue;
274
408
  if (!responseStarted) {
275
409
  responseStarted = true;
276
- callbacks.addMessage({ id: responseId, role: 'assistant', text: delta, streaming: true });
410
+ callbacks.onEvent?.(agentEvent({ type: 'message_start', id: responseId, role: 'assistant' }));
411
+ callbacks.addMessage({ id: responseId, role: 'assistant', text: displayText, streaming: true });
277
412
  }
278
413
  else {
279
- callbacks.updateMessage(responseId, { text: responseText });
414
+ callbacks.onEvent?.(agentEvent({ type: 'message_update', id: responseId, text: displayText }));
415
+ callbacks.updateMessage(responseId, { text: displayText });
280
416
  }
281
417
  }
418
+ try {
419
+ await followUp.response;
420
+ }
421
+ catch (error) {
422
+ throw followUpStreamError ?? error;
423
+ }
424
+ const finalText = assistantDisplayText(responseText);
425
+ const visibleFinalText = finalText;
426
+ const hidden = visibleFinalText.length === 0 || isNonSubstantiveAssistantText(visibleFinalText) || isDuplicateVisibleAssistantText(visibleFinalText);
282
427
  if (responseStarted) {
283
- callbacks.setLastAssistantText(responseText.trim());
284
- callbacks.updateMessage(responseId, { streaming: false });
428
+ if (!hidden)
429
+ rememberVisibleAssistantText(visibleFinalText);
430
+ callbacks.onEvent?.(agentEvent({ type: 'message_end', id: responseId, text: visibleFinalText, hidden }));
431
+ callbacks.updateMessage(responseId, { text: visibleFinalText, streaming: false, hidden });
285
432
  }
286
- return responseText.trim();
433
+ return { text: finalText, id: responseId, started: responseStarted };
287
434
  }
435
+ let streamError;
436
+ let lastFinishReason;
288
437
  const result = streamText({
289
438
  model: activeModel,
290
439
  temperature: 0,
440
+ maxOutputTokens: DEFAULT_MAX_OUTPUT_TOKENS,
291
441
  system: buildSystemPrompt(contextFiles),
292
442
  messages: requestMessages,
293
443
  tools: availableTools,
294
- stopWhen: stepCountIs(12),
444
+ stopWhen: stepCountIs(MAIN_STEP_LIMIT),
295
445
  abortSignal: abortController.signal,
296
446
  experimental_context: toolExecutionContext,
297
447
  onError({ error }) {
448
+ streamError = error;
298
449
  callbacks.debugLog(`stream error: ${error instanceof Error ? error.message : String(error)}`);
299
450
  },
300
451
  prepareStep({ steps, messages }) {
@@ -331,7 +482,7 @@ export async function runAgentTurn(value, displayValue, contextFiles, callbacks)
331
482
  ],
332
483
  };
333
484
  }
334
- if (likelyActionRequest && !mutatingToolSucceeded && consecutiveToolOnlySteps >= 3 && toolCalls.length < 10) {
485
+ if (likelyActionRequest && !mutatingToolSucceeded && consecutiveToolOnlySteps >= 3 && toolCalls.length < MAIN_TOOL_CALL_LIMIT) {
335
486
  callbacks.debugLog('nudging action request toward mutation after read-only steps');
336
487
  return {
337
488
  messages: [
@@ -340,13 +491,13 @@ export async function runAgentTurn(value, displayValue, contextFiles, callbacks)
340
491
  ],
341
492
  };
342
493
  }
343
- if (toolCalls.length >= 12 || consecutiveToolOnlySteps >= 5) {
494
+ if (toolCalls.length >= MAIN_TOOL_CALL_LIMIT || consecutiveToolOnlySteps >= MAIN_TOOL_ONLY_STEP_LIMIT) {
344
495
  callbacks.debugLog('forcing text response to avoid tool loop');
345
496
  return {
346
497
  toolChoice: 'none',
347
498
  messages: [
348
499
  ...messages,
349
- { role: 'user', content: 'Tool budget reached. If the current request is complete, summarize only current-turn changes and validation. If the requested change is incomplete, state the concrete blocker briefly. Do not claim tools are unavailable, recap unrelated earlier tasks, or provide a generic remains list.' },
500
+ { role: 'user', content: toolLoopBudgetPrompt() },
350
501
  ],
351
502
  };
352
503
  }
@@ -355,6 +506,7 @@ export async function runAgentTurn(value, displayValue, contextFiles, callbacks)
355
506
  return undefined;
356
507
  },
357
508
  onStepFinish({ stepNumber, text, toolCalls, toolResults, finishReason }) {
509
+ lastFinishReason = finishReason;
358
510
  callbacks.debugLog(`step ${stepNumber} finished: ${finishReason}; text=${text.length}; toolCalls=${toolCalls.length}; toolResults=${toolResults.length}`);
359
511
  },
360
512
  onFinish(event) {
@@ -386,20 +538,32 @@ export async function runAgentTurn(value, displayValue, contextFiles, callbacks)
386
538
  const delta = sanitizeAssistantText(rawDelta);
387
539
  if (sawToolCall)
388
540
  textAfterTool = true;
389
- if (currentAssistantText.length > 0 && toolEpoch > currentAssistantToolEpoch) {
390
- callbacks.updateMessage(currentAssistantId, { streaming: false });
541
+ if (currentAssistantStarted && currentAssistantText.length > 0 && toolEpoch > currentAssistantToolEpoch) {
542
+ const intermediateText = assistantDisplayText(currentAssistantText);
543
+ const hidden = intermediateText.length === 0 || isNonSubstantiveAssistantText(intermediateText) || isDuplicateVisibleAssistantText(intermediateText);
544
+ if (!hidden)
545
+ rememberVisibleAssistantText(intermediateText);
546
+ callbacks.onEvent?.(agentEvent({ type: 'message_end', id: currentAssistantId, text: intermediateText, hidden }));
547
+ callbacks.updateMessage(currentAssistantId, { text: intermediateText, streaming: false, hidden });
391
548
  currentAssistantId = `assistant-${Date.now()}-${Math.random().toString(36).slice(2)}`;
549
+ currentAssistantStarted = false;
392
550
  currentAssistantText = '';
393
551
  currentAssistantToolEpoch = toolEpoch;
394
552
  }
395
553
  assistantText += delta;
396
554
  currentAssistantText += delta;
397
- if (currentAssistantText === delta) {
555
+ const displayText = assistantDisplayText(currentAssistantText);
556
+ if ((!displayText || isNonSubstantiveAssistantText(displayText) || isPrefixOfVisibleAssistantText(displayText)) && !currentAssistantStarted)
557
+ continue;
558
+ if (!currentAssistantStarted) {
398
559
  assistantStarted = true;
399
- callbacks.addMessage({ id: currentAssistantId, role: 'assistant', text: currentAssistantText, streaming: true });
560
+ currentAssistantStarted = true;
561
+ callbacks.onEvent?.(agentEvent({ type: 'message_start', id: currentAssistantId, role: 'assistant' }));
562
+ callbacks.addMessage({ id: currentAssistantId, role: 'assistant', text: displayText, streaming: true });
400
563
  }
401
564
  else {
402
- callbacks.updateMessage(currentAssistantId, { text: currentAssistantText });
565
+ callbacks.onEvent?.(agentEvent({ type: 'message_update', id: currentAssistantId, text: displayText }));
566
+ callbacks.updateMessage(currentAssistantId, { text: displayText });
403
567
  }
404
568
  }
405
569
  let completedConversation = callbacks.getConversation();
@@ -408,54 +572,81 @@ export async function runAgentTurn(value, displayValue, contextFiles, callbacks)
408
572
  completedConversation = [...requestMessages, ...response.messages];
409
573
  callbacks.setConversation(completedConversation);
410
574
  }
411
- catch {
412
- // Keep the conversation from onFinish if the response promise is unavailable.
575
+ catch (error) {
576
+ throw streamError ?? error;
413
577
  }
414
578
  callbacks.debugLog(`response stream finished; session has ${completedConversation.length} model messages`);
415
- const finalAssistantText = assistantText.trim();
416
- const assistantAdmitsIncomplete = looksIncomplete(finalAssistantText);
417
- const requestCompletedByTools = mutatingToolSucceeded && validationToolSucceeded && !editRecoveryPath;
418
- const needsActionContinuation = likelyActionRequest
419
- && !requestCompletedByTools
420
- && ((sawReadOnlyTool && !mutatingToolSucceeded) || editFileFailed || assistantAdmitsIncomplete);
421
- const needsValidationContinuation = likelyValidationRequest && !requestCompletedByTools && !validationToolSucceeded && (sawReadOnlyTool || mutatingToolSucceeded || assistantAdmitsIncomplete);
579
+ if (lastFinishReason === 'length' && !sawToolCall && completionContinuationCount < maxCompletionContinuations) {
580
+ completionContinuationCount += 1;
581
+ callbacks.debugLog('output token limit reached, auto-continuing');
582
+ const continuation = await streamAssistantResponse(completedConversation, 'output token limit reached', 'Your response was cut off because you hit the output token limit. Continue from where you left off — do not repeat what you already said, just pick up exactly where you stopped.', true);
583
+ completedConversation = callbacks.getConversation();
584
+ if (continuation.text) {
585
+ assistantText += '\n' + continuation.text;
586
+ }
587
+ }
588
+ const combinedAssistantText = assistantDisplayText(assistantText);
589
+ const decideCompletion = (text) => completionDecision({
590
+ request: value,
591
+ goal,
592
+ assistantText: text,
593
+ sawReadOnlyTool,
594
+ sawToolCall,
595
+ mutatingToolSucceeded,
596
+ validationToolSucceeded,
597
+ validationToolFailed,
598
+ editFileFailed,
599
+ editRecoveryPath,
600
+ });
601
+ let decision = decideCompletion(combinedAssistantText);
602
+ async function runCompletionLoop(seedConversation, seedText) {
603
+ let loopConversation = seedConversation;
604
+ let latestText = seedText;
605
+ while ((decision.needsActionContinuation || decision.needsValidationContinuation) && completionContinuationCount < maxCompletionContinuations) {
606
+ completionContinuationCount += 1;
607
+ const prompt = decision.continuationPrompt
608
+ ?? (looksIncomplete(latestText) ? postContinuationPrompt() : 'Continue the same user goal until it is complete, blocked by a concrete issue, or needs a user decision. Focus on the concrete blocker, not a generic plan.');
609
+ const continuation = await streamAssistantResponse(loopConversation, `completion gate ${completionContinuationCount}`, prompt, true);
610
+ loopConversation = callbacks.getConversation();
611
+ if (continuation.text)
612
+ latestText = continuation.text;
613
+ decision = decideCompletion(latestText);
614
+ }
615
+ if ((decision.needsActionContinuation || decision.needsValidationContinuation) && completionContinuationCount >= maxCompletionContinuations) {
616
+ callbacks.addMessage({ role: 'assistant', text: 'Stopped after the autonomous safety limit. The current goal may still need work; ask me to continue and I will resume from the latest tool results.' });
617
+ }
618
+ if (!latestText && toolSummaries.length > 0) {
619
+ const followUp = await streamAssistantResponse(loopConversation, 'completion loop ended without text', noTextAfterToolPrompt(false), false);
620
+ if (!followUp.text)
621
+ callbacks.addMessage({ role: 'assistant', text: `Finished tool work but the model did not produce a final response. Last tool result: ${toolSummaries.at(-1)}.` });
622
+ }
623
+ }
422
624
  if (assistantStarted) {
423
- callbacks.setLastAssistantText(finalAssistantText);
424
- callbacks.updateMessage(currentAssistantId, { streaming: false });
425
- if ((needsActionContinuation || needsValidationContinuation) && !forcedContinuationUsed) {
426
- forcedContinuationUsed = true;
427
- callbacks.updateMessage(currentAssistantId, { text: 'Continuing to complete the requested change...', streaming: false });
428
- const prompt = editFileFailed
429
- ? 'Your editFile attempt failed. Use the latest readFile line-numbered output and replaceLines to complete the requested change. Continue with any remaining tests or validation if relevant. Do not stop with a summary.'
430
- : needsValidationContinuation
431
- ? 'You have not run the requested validation yet. Continue now by running the appropriate test/check command. Summarize only after the command finishes.'
432
- : mutatingToolSucceeded
433
- ? 'Your previous response says the current request is incomplete. Continue now with the remaining edits and validation for this same request. Do not summarize a plan unless blocked.'
434
- : 'You inspected files but have not made the requested change yet. Continue now by editing or writing the necessary files. Do not summarize a plan unless blocked.';
435
- const continuationText = await streamAssistantResponse(completedConversation, 'current-turn completion gate', prompt, true);
436
- if (!secondContinuationUsed && looksIncomplete(continuationText) && (likelyActionRequest || likelyValidationRequest)) {
437
- secondContinuationUsed = true;
438
- await streamAssistantResponse(callbacks.getConversation(), 'post-continuation completion gate', 'Your previous response still described unfinished work, missing validation, or a tool-budget issue. If any tools are still available, complete the remaining edit or run the final validation now. Only call something a blocker if a concrete tool failure prevents progress.', true);
439
- }
625
+ const hidePreToolFragment = sawToolCall && !textAfterTool;
626
+ const visibleFinalAssistantText = assistantDisplayText(currentAssistantText);
627
+ const hidden = visibleFinalAssistantText.length === 0 || isNonSubstantiveAssistantText(visibleFinalAssistantText) || isDuplicateVisibleAssistantText(visibleFinalAssistantText) || hidePreToolFragment;
628
+ if (!hidden)
629
+ rememberVisibleAssistantText(visibleFinalAssistantText);
630
+ callbacks.onEvent?.(agentEvent({ type: 'message_end', id: currentAssistantId, text: visibleFinalAssistantText, hidden }));
631
+ callbacks.updateMessage(currentAssistantId, { text: visibleFinalAssistantText, streaming: false, hidden });
632
+ if (decision.needsActionContinuation || decision.needsValidationContinuation) {
633
+ await runCompletionLoop(completedConversation, combinedAssistantText);
440
634
  }
441
635
  else if (sawToolCall && !textAfterTool) {
442
- const followUpText = await streamAssistantResponse(completedConversation, 'tool use completed without follow-up text', 'Continue from the tool result and answer my original request. Do not call tools. Summarize only current-turn changes and validation; do not recap unrelated earlier tasks.', false);
443
- if (!followUpText) {
636
+ const followUp = await streamAssistantResponse(completedConversation, 'tool use completed without follow-up text', noTextAfterToolPrompt(false), false);
637
+ if (!followUp.text) {
444
638
  callbacks.addMessage({ role: 'assistant', text: 'Stopped after tool use without a follow-up response. You can ask me to continue if the task is not complete.' });
445
639
  }
446
640
  }
447
641
  }
448
642
  else if (sawToolCall) {
449
643
  const allowTools = (likelyActionRequest && (!mutatingToolSucceeded || editFileFailed)) || (likelyValidationRequest && !validationToolSucceeded);
450
- const prompt = allowTools
451
- ? 'Continue the original request now. If it asks for a change, edit or write the necessary files. If it asks to run or verify tests, run the command. Do not provide only a retrospective summary unless blocked.'
452
- : 'Continue from the tool result and answer my original request. Do not call tools. Summarize only current-turn changes and validation; do not recap unrelated earlier tasks.';
453
- const followUpText = await streamAssistantResponse(completedConversation, 'tool-only turn completed without text', prompt, allowTools);
454
- if (!secondContinuationUsed && allowTools && looksIncomplete(followUpText)) {
455
- secondContinuationUsed = true;
456
- await streamAssistantResponse(callbacks.getConversation(), 'post-follow-up completion gate', 'Your previous response still described unfinished work, missing validation, or a tool-budget issue. If any tools are still available, complete the remaining edit or run the final validation now. Only call something a blocker if a concrete tool failure prevents progress.', true);
457
- }
458
- if (!followUpText) {
644
+ const prompt = noTextAfterToolPrompt(allowTools);
645
+ const followUp = await streamAssistantResponse(completedConversation, 'tool-only turn completed without text', prompt, allowTools);
646
+ decision = decideCompletion(followUp.text);
647
+ if (allowTools)
648
+ await runCompletionLoop(callbacks.getConversation(), followUp.text);
649
+ if (!followUp.text && completionContinuationCount === 0) {
459
650
  const fallback = toolSummaries.length > 0
460
651
  ? `Finished tool work but the model did not produce a final response. Last tool result: ${toolSummaries.at(-1)}.`
461
652
  : 'Finished without a text response.';
@@ -465,21 +656,48 @@ export async function runAgentTurn(value, displayValue, contextFiles, callbacks)
465
656
  else {
466
657
  callbacks.addMessage({ id: currentAssistantId, role: 'assistant', text: 'Finished without a text response.', streaming: false });
467
658
  }
659
+ goal.phase = 'done';
660
+ goal.status = 'complete';
661
+ turnStatus = 'complete';
662
+ callbacks.setGoalStatus?.(undefined);
468
663
  }
469
664
  catch (error) {
470
665
  if (abortController.signal.aborted) {
666
+ turnStatus = 'aborted';
471
667
  callbacks.debugLog('request aborted');
472
668
  callbacks.addMessage({ role: 'system', text: 'Thinking aborted. You can type again.' });
473
669
  }
474
670
  else {
475
671
  const text = error instanceof Error ? error.message : String(error);
476
672
  callbacks.debugLog(`error: ${text}`);
673
+ if (!contextOverflowRecovered && isContextOverflowError(error)) {
674
+ const compacted = callbacks.compactConversation?.('Automatic recovery after provider context overflow. Preserve the active user request and concrete next steps.') ?? false;
675
+ callbacks.onEvent?.(agentEvent({ type: 'context_overflow', recovered: compacted, error: text }));
676
+ if (compacted) {
677
+ callbacks.addMessage({ role: 'system', text: 'Context overflow detected; compacted older context and retrying the same request once.' });
678
+ await runAgentTurn(value, displayValue, contextFiles, callbacks, retryAttempt, true, true);
679
+ return;
680
+ }
681
+ callbacks.addMessage({ role: 'system', text: 'Context overflow detected, but there was not enough conversation history to compact automatically.' });
682
+ }
683
+ const maxRetries = 2;
684
+ if (retryAttempt < maxRetries && isRetryableModelError(error)) {
685
+ const delay = retryDelayMs(retryAttempt);
686
+ callbacks.onEvent?.(agentEvent({ type: 'retry', attempt: retryAttempt + 1, maxAttempts: maxRetries, delayMs: delay, error: text }));
687
+ callbacks.addMessage({ role: 'system', text: `Transient model error; retrying attempt ${retryAttempt + 1}/${maxRetries} in ${formatSeconds(delay)}: ${text}` });
688
+ await abortableDelay(delay, abortController.signal);
689
+ if (abortController.signal.aborted)
690
+ return;
691
+ await runAgentTurn(value, displayValue, contextFiles, callbacks, retryAttempt + 1, true, contextOverflowRecovered);
692
+ return;
693
+ }
477
694
  callbacks.addMessage({ role: 'assistant', text: `Model call failed: ${text}` });
478
695
  }
479
696
  }
480
697
  finally {
481
698
  if (idleTimer)
482
699
  clearTimeout(idleTimer);
700
+ callbacks.onEvent?.(agentEvent({ type: 'turn_end', request: value, status: turnStatus }));
483
701
  callbacks.setAbortController?.(null);
484
702
  callbacks.setBusy(false);
485
703
  }