npm - @denizokcu/haze - Versions diffs - 0.0.2 → 0.1.0 - Mend

@denizokcu/haze 0.0.2 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (47) hide show

package/CHANGELOG.md +19 -0
package/README.md +100 -34
package/dist/cli/commands/chat.d.ts +3 -1
package/dist/cli/commands/chat.js +500 -56
package/dist/cli/commands/commands.d.ts +5 -0
package/dist/cli/commands/commands.js +114 -29
package/dist/cli/commands/formatters.js +32 -2
package/dist/cli/commands/streaming.d.ts +6 -1
package/dist/cli/commands/streaming.js +316 -98
package/dist/cli/index.js +5 -2
package/dist/config/inputHistory.js +8 -0
package/dist/config/providers.d.ts +26 -0
package/dist/config/providers.js +88 -0
package/dist/config/settings.d.ts +9 -2
package/dist/core/agent/compaction.d.ts +13 -0
package/dist/core/agent/compaction.js +34 -0
package/dist/core/agent/errors.d.ts +3 -0
package/dist/core/agent/errors.js +13 -0
package/dist/core/agent/events.d.ts +58 -0
package/dist/core/agent/events.js +3 -0
package/dist/core/goal/completionPolicy.d.ts +28 -0
package/dist/core/goal/completionPolicy.js +84 -0
package/dist/core/goal/requestClassifier.d.ts +6 -0
package/dist/core/goal/requestClassifier.js +31 -0
package/dist/core/goal/sessionGoal.d.ts +30 -0
package/dist/core/goal/sessionGoal.js +88 -0
package/dist/core/session/sessionStore.d.ts +37 -0
package/dist/core/session/sessionStore.js +59 -0
package/dist/core/subagent/subagentRunner.d.ts +33 -0
package/dist/core/subagent/subagentRunner.js +140 -0
package/dist/llm/client.d.ts +1 -1
package/dist/llm/client.js +6 -6
package/dist/llm/hazeTools.d.ts +86 -0
package/dist/llm/hazeTools.js +313 -93
package/dist/llm/initPrompt.js +6 -4
package/dist/llm/systemPrompt.js +11 -7
package/dist/skills/builder/SkillBuilder.d.ts +6 -0
package/dist/skills/builder/SkillBuilder.js +146 -24
package/dist/ui/components/ErrorView.d.ts +2 -1
package/dist/ui/components/Header.d.ts +2 -1
package/dist/ui/components/Header.js +1 -11
package/dist/ui/components/MarkdownText.d.ts +2 -1
package/dist/ui/components/TextInput.d.ts +7 -3
package/dist/ui/components/TextInput.js +112 -27
package/dist/ui/theme.d.ts +3 -0
package/dist/ui/theme.js +4 -1
package/package.json +8 -8

package/dist/cli/commands/streaming.js CHANGED Viewed

@@ -5,6 +5,12 @@ import { buildSystemPrompt } from '../../llm/systemPrompt.js';
 import { loadSkillRegistry } from '../../skills/SkillRegistry.js';
 import { buildSkillTools } from '../../skills/skillTools.js';
 import { compact, toolCallSummary, toolResultSummary, formatSeconds } from './formatters.js';
+import { isActionRequest, isPlanImplementationRequest, isPlanOnlyRequest, isValidationRequest } from '../../core/goal/requestClassifier.js';
+import { completionDecision, looksIncomplete, noTextAfterToolPrompt, postContinuationPrompt, toolLoopBudgetPrompt } from '../../core/goal/completionPolicy.js';
+import { createSessionGoal, formatGoalStatus, observeGoalToolEvent } from '../../core/goal/sessionGoal.js';
+import { agentEvent } from '../../core/agent/events.js';
+import { isContextOverflowError, isRetryableModelError } from '../../core/agent/errors.js';
+import { createSubagentTool } from '../../core/subagent/subagentRunner.js';
 function stableToolKey(toolCall) {
     return `${toolCall.toolName}:${JSON.stringify(toolCall.input)}`;
 }
@@ -28,31 +34,30 @@ function toolOnlyStepCount(steps) {
     }
     return count;
 }
-function isPlanOnlyRequest(value) {
-    return /\b(create|make|write|draft|outline)\s+(?:a\s+)?plan\b|\bplan\s+(?:for|to)\b/i.test(value) && !/\bimplement|execute|do\b/i.test(value);
-}
-function isLikelyActionRequest(value) {
-    if (isPlanOnlyRequest(value))
-        return false;
-    return /\b(add|create|write|implement|update|fix|change|support|wire|test|tests|document|docs|documentation|run|verify)\b/i.test(value);
-}
-function isValidationRequest(value) {
-    if (isPlanOnlyRequest(value))
-        return false;
-    return /\b(run|verify|test|tests|check|validate)\b/i.test(value);
-}
-function isPlanImplementationRequest(value) {
-    return /\b(implement|execute|do)\b.*\bplan\b|\bplan\.md\b|\btest_plan\.md\b/i.test(value);
-}
-function looksIncomplete(text) {
-    return /\b(incomplete|what remains|remains:|next:|not implemented|not created|no tests exist|created no docs|has not been|have not been|not yet|never executed|not executed|not run|cannot retry|cannot write|cannot validate|tool budget reached)\b/i.test(text);
-}
 function sanitizeAssistantText(text) {
     return [...text].filter(char => {
         const code = char.charCodeAt(0);
         return !(code <= 8 || code === 11 || code === 12 || (code >= 14 && code <= 31) || code === 127 || code === 155);
     }).join('');
 }
+function hideSyntheticToolCallMarkup(text) {
+    return text
+        .replace(/(^|\n)\s*(?:```(?:xml)?\s*)?(?:xml\s*)?<tool_call>[\s\S]*?<\/tool_call>\s*(?:```)?/gi, '$1')
+        .replace(/(^|\n)\s*(?:```(?:xml)?\s*)?(?:xml\s*)?<tool_call>[\s\S]*$/i, '$1');
+}
+function isNonSubstantiveAssistantText(text) {
+    return /^[`\s]*$/.test(text);
+}
+function assistantDisplayText(text) {
+    return hideSyntheticToolCallMarkup(text).trim();
+}
+function normalizeAssistantText(text) {
+    return assistantDisplayText(text)
+        .replace(/[`*_~#>\-–—:;,.!?()[\]{}"']/g, '')
+        .replace(/\s+/g, ' ')
+        .trim()
+        .toLowerCase();
+}
 function toolInputPath(input) {
     return typeof input === 'object' && input != null && 'path' in input && typeof input.path === 'string'
         ? input.path
@@ -61,40 +66,77 @@ function toolInputPath(input) {
 function isDuplicateSkippedOutput(output) {
     return typeof output === 'object' && output != null && 'duplicateSkipped' in output && output.duplicateSkipped === true;
 }
-export async function runAgentTurn(value, displayValue, contextFiles, callbacks) {
+function retryDelayMs(attempt) {
+    return Math.min(4000, 1000 * 2 ** attempt);
+}
+async function abortableDelay(milliseconds, signal) {
+    if (signal.aborted)
+        return;
+    await new Promise(resolve => {
+        const timer = setTimeout(resolve, milliseconds);
+        signal.addEventListener('abort', () => {
+            clearTimeout(timer);
+            resolve();
+        }, { once: true });
+    });
+}
+const DEFAULT_MAX_OUTPUT_TOKENS = 16384;
+const IDLE_TIMEOUT_MS = 5 * 60_000;
+const MAIN_STEP_LIMIT = 40;
+const MAIN_TOOL_CALL_LIMIT = 40;
+const MAIN_TOOL_ONLY_STEP_LIMIT = 12;
+const FOLLOW_UP_STEP_LIMIT = 30;
+const FOLLOW_UP_TOOL_CALL_LIMIT = 30;
+const FOLLOW_UP_TOOL_ONLY_STEP_LIMIT = 10;
+const COMPLETION_CONTINUATION_LIMIT = 30;
+function toolOutputOk(output, success) {
+    if (!success)
+        return false;
+    return !(typeof output === 'object' && output != null && 'ok' in output && output.ok === false);
+}
+export async function runAgentTurn(value, displayValue, contextFiles, callbacks, retryAttempt = 0, retryingExistingRequest = false, contextOverflowRecovered = false) {
     const displayVal = displayValue ?? value;
     const userMessage = { role: 'user', text: displayVal };
+    callbacks.onEvent?.(agentEvent({ type: 'turn_start', request: value }));
     callbacks.setBusy(true);
-    callbacks.addMessage(userMessage);
+    if (!retryingExistingRequest)
+        callbacks.addMessage(userMessage);
     const abortController = new AbortController();
     callbacks.setAbortController?.(abortController);
+    let turnStatus = 'failed';
     let idleTimer;
     const resetIdleTimer = () => {
         if (idleTimer)
             clearTimeout(idleTimer);
-        idleTimer = setTimeout(() => abortController.abort('Haze turn timed out after no model/tool activity.'), 90_000);
+        idleTimer = setTimeout(() => abortController.abort('Haze turn timed out after no model/tool activity.'), IDLE_TIMEOUT_MS);
     };
     try {
         const m = await model();
         if (!m) {
-            callbacks.addMessage({ role: 'assistant', text: 'No API key configured. Run /login, then /model x-ai/grok-build-0.1. Haze cannot hallucinate without credentials. Progress.' });
+            callbacks.addMessage({ role: 'assistant', text: 'No model provider configured. Run /provider to choose or add a provider. Haze cannot hallucinate without a model. Progress.' });
             return;
         }
         const activeModel = m;
         const skillRegistry = await loadSkillRegistry();
-        const availableTools = { ...hazeTools, ...buildSkillTools(skillRegistry) };
+        const subagentTool = createSubagentTool({ model: activeModel, contextFiles });
+        const availableTools = { ...hazeTools, subagent: subagentTool, ...buildSkillTools(skillRegistry) };
+        const goal = createSessionGoal(value);
+        callbacks.setGoalStatus?.(formatGoalStatus(goal));
         const likelyPlanOnlyRequest = isPlanOnlyRequest(value);
         const likelyPlanImplementationRequest = isPlanImplementationRequest(value);
-        const likelyActionRequest = isLikelyActionRequest(value);
+        const likelyActionRequest = isActionRequest(value);
         const likelyValidationRequest = isValidationRequest(value);
         const planImplementationGuidance = 'When implementing a plan file, first identify the concrete required checklist items and compare them with the current files. Do not edit source or tests when the required behavior is already present. Implement the smallest clearly required phase or required items, skip optional/design-question items unless explicitly requested, add tests rather than exploratory one-off scripts where possible, use file tools (not bash) for any file changes, run validation once after code/test edits, then update plan status with file tools if requested. Do not call unresolved optional scope a blocker.';
-        const requestMessages = likelyPlanImplementationRequest
-            ? [...callbacks.getConversation(), { role: 'user', content: value }, { role: 'user', content: planImplementationGuidance }]
-            : [...callbacks.getConversation(), { role: 'user', content: value }];
+        const requestMessages = retryingExistingRequest
+            ? callbacks.getConversation()
+            : likelyPlanImplementationRequest
+                ? [...callbacks.getConversation(), { role: 'user', content: value }, { role: 'user', content: planImplementationGuidance }]
+                : [...callbacks.getConversation(), { role: 'user', content: value }];
         callbacks.setConversation(requestMessages);
         resetIdleTimer();
         let currentAssistantId = `assistant-${Date.now()}`;
         let assistantStarted = false;
+        let currentAssistantStarted = false;
         let currentAssistantText = '';
         let assistantText = '';
         let toolEpoch = 0;
@@ -102,22 +144,50 @@ export async function runAgentTurn(value, displayValue, contextFiles, callbacks)
         let editFileFailed = false;
         let mutatingToolSucceeded = false;
         let validationToolSucceeded = false;
+        let validationToolFailed = false;
         let sawReadOnlyTool = false;
         let sawToolCall = false;
         let textAfterTool = false;
-        let forcedContinuationUsed = false;
-        let secondContinuationUsed = false;
+        let completionContinuationCount = 0;
+        const maxCompletionContinuations = COMPLETION_CONTINUATION_LIMIT;
         let editRecoveryPath;
         let editRecoveryReadSatisfied = false;
         const toolSummaries = [];
+        const visibleAssistantTexts = new Set();
+        const previousAssistantText = normalizeAssistantText(callbacks.getLastAssistantText());
+        if (previousAssistantText)
+            visibleAssistantTexts.add(previousAssistantText);
+        const rememberVisibleAssistantText = (text) => {
+            const normalized = normalizeAssistantText(text);
+            if (!normalized)
+                return;
+            visibleAssistantTexts.add(normalized);
+            callbacks.setLastAssistantText(text);
+        };
+        const isDuplicateVisibleAssistantText = (text) => {
+            const normalized = normalizeAssistantText(text);
+            return normalized.length > 0 && visibleAssistantTexts.has(normalized);
+        };
+        const isPrefixOfVisibleAssistantText = (text) => {
+            const normalized = normalizeAssistantText(text);
+            return normalized.length > 0 && [...visibleAssistantTexts].some(previous => previous.startsWith(normalized) && previous !== normalized);
+        };
         const toolExecutionContext = { inFlightToolCalls: new Map() };
-        const toolGroupId = `tools-${Date.now()}-${Math.random().toString(36).slice(2)}`;
+        let toolGroupId = `tools-${Date.now()}-${Math.random().toString(36).slice(2)}`;
+        const INLINE_DIFF_LINE_LIMIT = 20;
         const toolDisplayItems = [];
         let toolGroupStarted = false;
+        let toolGroupFinalized = false;
         function renderToolGroup(streaming) {
             const visibleItems = toolDisplayItems.filter(item => !item.hidden);
+            const running = visibleItems.some(item => item.status === 'running');
+            const failures = visibleItems.filter(item => item.status === 'error');
+            const changes = visibleItems.filter(item => /^(editFile|replaceLines|writeFile)\b/.test(item.summary));
+            const compactItems = !running && visibleItems.length > 12
+                ? [...new Map([...failures, ...changes].map(item => [item.id, item])).values()]
+                : visibleItems;
             const grouped = new Map();
-            for (const item of visibleItems) {
+            for (const item of compactItems) {
                 const key = `${item.status}:${item.summary}:${item.result ?? ''}`;
                 const current = grouped.get(key);
                 if (current)
@@ -126,14 +196,33 @@ export async function runAgentTurn(value, displayValue, contextFiles, callbacks)
                     grouped.set(key, { item, count: 1 });
             }
             const rows = [...grouped.values()];
-            const running = visibleItems.some(item => item.status === 'running');
-            const header = running || streaming ? 'Running tools' : `Tools: ${visibleItems.length} call${visibleItems.length === 1 ? '' : 's'}`;
-            const lines = rows.map(({ item, count }) => {
+            const compactSuffix = !running && visibleItems.length > 12 ? ` · showing ${compactItems.length} important` : '';
+            const header = running || streaming
+                ? 'Running tools'
+                : `${visibleItems.length} call${visibleItems.length === 1 ? '' : 's'} · ${changes.length} change${changes.length === 1 ? '' : 's'} · ${failures.length} failed${compactSuffix}`;
+            const lines = [];
+            for (const { item, count } of rows) {
                 const icon = item.status === 'running' ? '…' : item.status === 'success' ? '✓' : '✗';
                 const countText = count > 1 ? ` ×${count}` : '';
                 const result = item.status === 'running' ? '' : ` — ${item.result ?? item.status}${item.durationMs == null ? '' : ` in ${formatSeconds(item.durationMs)}`}`;
-                return `  ${icon} ${item.summary}${countText}${result}`;
-            });
+                lines.push(`  ${icon} ${item.summary}${countText}${result}`);
+                if (item.diff && item.diff.length > 0 && (item.diffLineCount ?? item.diff.length) <= INLINE_DIFF_LINE_LIMIT) {
+                    for (const diffLine of item.diff) {
+                        const lineNumber = diffLine.type === 'add' ? diffLine.newLine : diffLine.oldLine;
+                        const marker = diffLine.type === 'add' ? '+' : diffLine.type === 'remove' ? '-' : ' ';
+                        lines.push(`    ${String(lineNumber ?? '').padStart(5)} ${marker} ${diffLine.text}`);
+                    }
+                }
+                else if ((item.diffLineCount ?? 0) > INLINE_DIFF_LINE_LIMIT) {
+                    lines.push(`          diff hidden (${item.diffLineCount} changed lines; run git diff to inspect)`);
+                }
+                if (item.subItems && item.subItems.length > 0) {
+                    for (const sub of item.subItems) {
+                        const subDuration = sub.durationMs > 1000 ? ` (${formatSeconds(sub.durationMs)})` : '';
+                        lines.push(`    · ${sub.name} — ${sub.summary}${subDuration}`);
+                    }
+                }
+            }
             return [header, ...lines].join('\n');
         }
         function updateToolGroup(streaming = true) {
@@ -145,36 +234,74 @@ export async function runAgentTurn(value, displayValue, contextFiles, callbacks)
             else {
                 callbacks.updateMessage(toolGroupId, { text, streaming });
             }
+            if (!streaming)
+                toolGroupFinalized = true;
         }
         function recordToolStart(toolCall) {
+            if (toolGroupFinalized) {
+                toolDisplayItems.length = 0;
+                toolGroupId = `tools-${Date.now()}-${Math.random().toString(36).slice(2)}`;
+                toolGroupFinalized = false;
+                toolGroupStarted = false;
+            }
+            callbacks.onEvent?.(agentEvent({ type: 'tool_start', id: toolCall.toolCallId, name: toolCall.toolName, input: toolCall.input }));
             toolDisplayItems.push({ id: toolCall.toolCallId, summary: toolCallSummary(toolCall.toolName, toolCall.input), status: 'running' });
             updateToolGroup(true);
+            const runningSubagents = toolDisplayItems.filter(item => item.status === 'running' && item.summary.startsWith('subagent')).length;
+            if (runningSubagents > 0)
+                callbacks.setBusyLabel?.(`Running ${runningSubagents} subagent${runningSubagents === 1 ? '' : 's'}`);
         }
         function recordToolDisplayFinish(event) {
+            callbacks.onEvent?.(agentEvent({ type: 'tool_end', id: event.toolCall.toolCallId, name: event.toolCall.toolName, success: event.success, output: event.output, error: event.error, durationMs: event.durationMs }));
             const item = toolDisplayItems.find(candidate => candidate.id === event.toolCall.toolCallId);
             if (!item)
                 return;
-            item.status = event.success ? 'success' : 'error';
+            item.status = toolOutputOk(event.output, event.success) ? 'success' : 'error';
             item.result = toolResultSummary(event);
             item.durationMs = event.durationMs;
             item.hidden = isDuplicateSkippedOutput(event.output);
+            if (typeof event.output === 'object' && event.output != null) {
+                const output = event.output;
+                if (typeof output.diffLineCount === 'number')
+                    item.diffLineCount = output.diffLineCount;
+                if (Array.isArray(output.diff))
+                    item.diff = output.diff;
+            }
+            if (event.toolCall.toolName === 'subagent' && typeof event.output === 'object' && event.output != null) {
+                const out = event.output;
+                if (Array.isArray(out.toolCalls)) {
+                    item.subItems = out.toolCalls.map(tc => ({
+                        name: tc.name,
+                        summary: tc.summary,
+                        durationMs: tc.durationMs,
+                    }));
+                }
+            }
             updateToolGroup(toolDisplayItems.some(candidate => candidate.status === 'running'));
+            const runningSubagents = toolDisplayItems.filter(i => i.status === 'running' && i.summary.startsWith('subagent')).length;
+            if (runningSubagents === 0)
+                callbacks.setBusyLabel?.('Haze is thinking');
+            else
+                callbacks.setBusyLabel?.(`Running ${runningSubagents} subagent${runningSubagents === 1 ? '' : 's'}`);
         }
-        callbacks.debugLog(`request started with ${requestMessages.length} conversation messages; action=${likelyActionRequest}`);
+        callbacks.debugLog(`request started with ${requestMessages.length} conversation messages; intent=${goal.normalizedIntent}; action=${likelyActionRequest}`);
         function recordToolFinish(event) {
             const path = toolInputPath(event.toolCall.input);
             const duplicateSkipped = isDuplicateSkippedOutput(event.output);
-            if (!event.success && ['editFile', 'replaceLines', 'writeFile'].includes(event.toolCall.toolName)) {
+            const ok = toolOutputOk(event.output, event.success);
+            observeGoalToolEvent(goal, { ...event.toolCall, success: ok, output: event.output, duplicateSkipped });
+            callbacks.setGoalStatus?.(formatGoalStatus(goal));
+            if (!ok && ['editFile', 'replaceLines', 'writeFile'].includes(event.toolCall.toolName)) {
                 editFileFailed = true;
                 editRecoveryPath = path;
                 editRecoveryReadSatisfied = false;
             }
-            if (event.success && ['listFiles', 'readFile'].includes(event.toolCall.toolName))
+            if (ok && ['listFiles', 'readFile'].includes(event.toolCall.toolName))
                 sawReadOnlyTool = true;
-            if (event.success && event.toolCall.toolName === 'readFile' && path && path === editRecoveryPath && !duplicateSkipped) {
+            if (ok && event.toolCall.toolName === 'readFile' && path && path === editRecoveryPath && !duplicateSkipped) {
                 editRecoveryReadSatisfied = true;
             }
-            if (event.success && ['editFile', 'replaceLines', 'writeFile'].includes(event.toolCall.toolName)) {
+            if (ok && !duplicateSkipped && ['editFile', 'replaceLines', 'writeFile'].includes(event.toolCall.toolName)) {
                 mutatingToolSucceeded = true;
                 if (!path || path === editRecoveryPath) {
                     editRecoveryPath = undefined;
@@ -183,9 +310,10 @@ export async function runAgentTurn(value, displayValue, contextFiles, callbacks)
                 }
             }
             if (event.success && event.toolCall.toolName === 'bash') {
-                const ok = typeof event.output === 'object' && event.output != null && 'ok' in event.output ? Boolean(event.output.ok) : true;
                 if (ok)
                     validationToolSucceeded = true;
+                else
+                    validationToolFailed = true;
             }
         }
         async function streamAssistantResponse(messages, reason, prompt, allowTools = false) {
@@ -194,6 +322,7 @@ export async function runAgentTurn(value, displayValue, contextFiles, callbacks)
             let responseStarted = false;
             let responseText = '';
             let continuationToolCalls = 0;
+            let followUpStreamError;
             const continuationMessages = [
                 ...messages,
                 { role: 'user', content: prompt },
@@ -201,21 +330,22 @@ export async function runAgentTurn(value, displayValue, contextFiles, callbacks)
             const followUp = streamText({
                 model: activeModel,
                 temperature: 0,
+                maxOutputTokens: DEFAULT_MAX_OUTPUT_TOKENS,
                 system: buildSystemPrompt(contextFiles),
                 messages: continuationMessages,
                 tools: availableTools,
                 toolChoice: allowTools ? 'auto' : 'none',
-                stopWhen: stepCountIs(10),
+                stopWhen: stepCountIs(FOLLOW_UP_STEP_LIMIT),
                 abortSignal: abortController.signal,
                 experimental_context: toolExecutionContext,
                 prepareStep({ steps, messages }) {
                     continuationToolCalls = steps.flatMap(step => step.toolCalls).length;
-                    if (continuationToolCalls >= 10 || toolOnlyStepCount(steps) >= 5) {
+                    if (continuationToolCalls >= FOLLOW_UP_TOOL_CALL_LIMIT || toolOnlyStepCount(steps) >= FOLLOW_UP_TOOL_ONLY_STEP_LIMIT) {
                         return {
                             toolChoice: 'none',
                             messages: [
                                 ...messages,
-                                { role: 'user', content: 'Tool budget reached. If the current request is complete, summarize only current-turn changes and validation. If incomplete, state the concrete blocker briefly; do not claim tools are unavailable and do not recap unrelated earlier tasks.' },
+                                { role: 'user', content: toolLoopBudgetPrompt() },
                             ],
                         };
                     }
@@ -242,6 +372,7 @@ export async function runAgentTurn(value, displayValue, contextFiles, callbacks)
                     return undefined;
                 },
                 onError({ error }) {
+                    followUpStreamError = error;
                     callbacks.debugLog(`stream error: ${error instanceof Error ? error.message : String(error)}`);
                 },
                 onFinish(event) {
@@ -271,30 +402,50 @@ export async function runAgentTurn(value, displayValue, contextFiles, callbacks)
                 resetIdleTimer();
                 const delta = sanitizeAssistantText(rawDelta);
                 responseText += delta;
+                const displayText = assistantDisplayText(responseText);
+                if ((!displayText || isNonSubstantiveAssistantText(displayText) || isPrefixOfVisibleAssistantText(displayText)) && !responseStarted)
+                    continue;
                 if (!responseStarted) {
                     responseStarted = true;
-                    callbacks.addMessage({ id: responseId, role: 'assistant', text: delta, streaming: true });
+                    callbacks.onEvent?.(agentEvent({ type: 'message_start', id: responseId, role: 'assistant' }));
+                    callbacks.addMessage({ id: responseId, role: 'assistant', text: displayText, streaming: true });
                 }
                 else {
-                    callbacks.updateMessage(responseId, { text: responseText });
+                    callbacks.onEvent?.(agentEvent({ type: 'message_update', id: responseId, text: displayText }));
+                    callbacks.updateMessage(responseId, { text: displayText });
                 }
             }
+            try {
+                await followUp.response;
+            }
+            catch (error) {
+                throw followUpStreamError ?? error;
+            }
+            const finalText = assistantDisplayText(responseText);
+            const visibleFinalText = finalText;
+            const hidden = visibleFinalText.length === 0 || isNonSubstantiveAssistantText(visibleFinalText) || isDuplicateVisibleAssistantText(visibleFinalText);
             if (responseStarted) {
-                callbacks.setLastAssistantText(responseText.trim());
-                callbacks.updateMessage(responseId, { streaming: false });
+                if (!hidden)
+                    rememberVisibleAssistantText(visibleFinalText);
+                callbacks.onEvent?.(agentEvent({ type: 'message_end', id: responseId, text: visibleFinalText, hidden }));
+                callbacks.updateMessage(responseId, { text: visibleFinalText, streaming: false, hidden });
             }
-            return responseText.trim();
+            return { text: finalText, id: responseId, started: responseStarted };
         }
+        let streamError;
+        let lastFinishReason;
         const result = streamText({
             model: activeModel,
             temperature: 0,
+            maxOutputTokens: DEFAULT_MAX_OUTPUT_TOKENS,
             system: buildSystemPrompt(contextFiles),
             messages: requestMessages,
             tools: availableTools,
-            stopWhen: stepCountIs(12),
+            stopWhen: stepCountIs(MAIN_STEP_LIMIT),
             abortSignal: abortController.signal,
             experimental_context: toolExecutionContext,
             onError({ error }) {
+                streamError = error;
                 callbacks.debugLog(`stream error: ${error instanceof Error ? error.message : String(error)}`);
             },
             prepareStep({ steps, messages }) {
@@ -331,7 +482,7 @@ export async function runAgentTurn(value, displayValue, contextFiles, callbacks)
                         ],
                     };
                 }
-                if (likelyActionRequest && !mutatingToolSucceeded && consecutiveToolOnlySteps >= 3 && toolCalls.length < 10) {
+                if (likelyActionRequest && !mutatingToolSucceeded && consecutiveToolOnlySteps >= 3 && toolCalls.length < MAIN_TOOL_CALL_LIMIT) {
                     callbacks.debugLog('nudging action request toward mutation after read-only steps');
                     return {
                         messages: [
@@ -340,13 +491,13 @@ export async function runAgentTurn(value, displayValue, contextFiles, callbacks)
                         ],
                     };
                 }
-                if (toolCalls.length >= 12 || consecutiveToolOnlySteps >= 5) {
+                if (toolCalls.length >= MAIN_TOOL_CALL_LIMIT || consecutiveToolOnlySteps >= MAIN_TOOL_ONLY_STEP_LIMIT) {
                     callbacks.debugLog('forcing text response to avoid tool loop');
                     return {
                         toolChoice: 'none',
                         messages: [
                             ...messages,
-                            { role: 'user', content: 'Tool budget reached. If the current request is complete, summarize only current-turn changes and validation. If the requested change is incomplete, state the concrete blocker briefly. Do not claim tools are unavailable, recap unrelated earlier tasks, or provide a generic remains list.' },
+                            { role: 'user', content: toolLoopBudgetPrompt() },
                         ],
                     };
                 }
@@ -355,6 +506,7 @@ export async function runAgentTurn(value, displayValue, contextFiles, callbacks)
                 return undefined;
             },
             onStepFinish({ stepNumber, text, toolCalls, toolResults, finishReason }) {
+                lastFinishReason = finishReason;
                 callbacks.debugLog(`step ${stepNumber} finished: ${finishReason}; text=${text.length}; toolCalls=${toolCalls.length}; toolResults=${toolResults.length}`);
             },
             onFinish(event) {
@@ -386,20 +538,32 @@ export async function runAgentTurn(value, displayValue, contextFiles, callbacks)
             const delta = sanitizeAssistantText(rawDelta);
             if (sawToolCall)
                 textAfterTool = true;
-            if (currentAssistantText.length > 0 && toolEpoch > currentAssistantToolEpoch) {
-                callbacks.updateMessage(currentAssistantId, { streaming: false });
+            if (currentAssistantStarted && currentAssistantText.length > 0 && toolEpoch > currentAssistantToolEpoch) {
+                const intermediateText = assistantDisplayText(currentAssistantText);
+                const hidden = intermediateText.length === 0 || isNonSubstantiveAssistantText(intermediateText) || isDuplicateVisibleAssistantText(intermediateText);
+                if (!hidden)
+                    rememberVisibleAssistantText(intermediateText);
+                callbacks.onEvent?.(agentEvent({ type: 'message_end', id: currentAssistantId, text: intermediateText, hidden }));
+                callbacks.updateMessage(currentAssistantId, { text: intermediateText, streaming: false, hidden });
                 currentAssistantId = `assistant-${Date.now()}-${Math.random().toString(36).slice(2)}`;
+                currentAssistantStarted = false;
                 currentAssistantText = '';
                 currentAssistantToolEpoch = toolEpoch;
             }
             assistantText += delta;
             currentAssistantText += delta;
-            if (currentAssistantText === delta) {
+            const displayText = assistantDisplayText(currentAssistantText);
+            if ((!displayText || isNonSubstantiveAssistantText(displayText) || isPrefixOfVisibleAssistantText(displayText)) && !currentAssistantStarted)
+                continue;
+            if (!currentAssistantStarted) {
                 assistantStarted = true;
-                callbacks.addMessage({ id: currentAssistantId, role: 'assistant', text: currentAssistantText, streaming: true });
+                currentAssistantStarted = true;
+                callbacks.onEvent?.(agentEvent({ type: 'message_start', id: currentAssistantId, role: 'assistant' }));
+                callbacks.addMessage({ id: currentAssistantId, role: 'assistant', text: displayText, streaming: true });
             }
             else {
-                callbacks.updateMessage(currentAssistantId, { text: currentAssistantText });
+                callbacks.onEvent?.(agentEvent({ type: 'message_update', id: currentAssistantId, text: displayText }));
+                callbacks.updateMessage(currentAssistantId, { text: displayText });
             }
         }
         let completedConversation = callbacks.getConversation();
@@ -408,54 +572,81 @@ export async function runAgentTurn(value, displayValue, contextFiles, callbacks)
             completedConversation = [...requestMessages, ...response.messages];
             callbacks.setConversation(completedConversation);
         }
-        catch {
-            // Keep the conversation from onFinish if the response promise is unavailable.
+        catch (error) {
+            throw streamError ?? error;
         }
         callbacks.debugLog(`response stream finished; session has ${completedConversation.length} model messages`);
-        const finalAssistantText = assistantText.trim();
-        const assistantAdmitsIncomplete = looksIncomplete(finalAssistantText);
-        const requestCompletedByTools = mutatingToolSucceeded && validationToolSucceeded && !editRecoveryPath;
-        const needsActionContinuation = likelyActionRequest
-            && !requestCompletedByTools
-            && ((sawReadOnlyTool && !mutatingToolSucceeded) || editFileFailed || assistantAdmitsIncomplete);
-        const needsValidationContinuation = likelyValidationRequest && !requestCompletedByTools && !validationToolSucceeded && (sawReadOnlyTool || mutatingToolSucceeded || assistantAdmitsIncomplete);
+        if (lastFinishReason === 'length' && !sawToolCall && completionContinuationCount < maxCompletionContinuations) {
+            completionContinuationCount += 1;
+            callbacks.debugLog('output token limit reached, auto-continuing');
+            const continuation = await streamAssistantResponse(completedConversation, 'output token limit reached', 'Your response was cut off because you hit the output token limit. Continue from where you left off — do not repeat what you already said, just pick up exactly where you stopped.', true);
+            completedConversation = callbacks.getConversation();
+            if (continuation.text) {
+                assistantText += '\n' + continuation.text;
+            }
+        }
+        const combinedAssistantText = assistantDisplayText(assistantText);
+        const decideCompletion = (text) => completionDecision({
+            request: value,
+            goal,
+            assistantText: text,
+            sawReadOnlyTool,
+            sawToolCall,
+            mutatingToolSucceeded,
+            validationToolSucceeded,
+            validationToolFailed,
+            editFileFailed,
+            editRecoveryPath,
+        });
+        let decision = decideCompletion(combinedAssistantText);
+        async function runCompletionLoop(seedConversation, seedText) {
+            let loopConversation = seedConversation;
+            let latestText = seedText;
+            while ((decision.needsActionContinuation || decision.needsValidationContinuation) && completionContinuationCount < maxCompletionContinuations) {
+                completionContinuationCount += 1;
+                const prompt = decision.continuationPrompt
+                    ?? (looksIncomplete(latestText) ? postContinuationPrompt() : 'Continue the same user goal until it is complete, blocked by a concrete issue, or needs a user decision. Focus on the concrete blocker, not a generic plan.');
+                const continuation = await streamAssistantResponse(loopConversation, `completion gate ${completionContinuationCount}`, prompt, true);
+                loopConversation = callbacks.getConversation();
+                if (continuation.text)
+                    latestText = continuation.text;
+                decision = decideCompletion(latestText);
+            }
+            if ((decision.needsActionContinuation || decision.needsValidationContinuation) && completionContinuationCount >= maxCompletionContinuations) {
+                callbacks.addMessage({ role: 'assistant', text: 'Stopped after the autonomous safety limit. The current goal may still need work; ask me to continue and I will resume from the latest tool results.' });
+            }
+            if (!latestText && toolSummaries.length > 0) {
+                const followUp = await streamAssistantResponse(loopConversation, 'completion loop ended without text', noTextAfterToolPrompt(false), false);
+                if (!followUp.text)
+                    callbacks.addMessage({ role: 'assistant', text: `Finished tool work but the model did not produce a final response. Last tool result: ${toolSummaries.at(-1)}.` });
+            }
+        }
         if (assistantStarted) {
-            callbacks.setLastAssistantText(finalAssistantText);
-            callbacks.updateMessage(currentAssistantId, { streaming: false });
-            if ((needsActionContinuation || needsValidationContinuation) && !forcedContinuationUsed) {
-                forcedContinuationUsed = true;
-                callbacks.updateMessage(currentAssistantId, { text: 'Continuing to complete the requested change...', streaming: false });
-                const prompt = editFileFailed
-                    ? 'Your editFile attempt failed. Use the latest readFile line-numbered output and replaceLines to complete the requested change. Continue with any remaining tests or validation if relevant. Do not stop with a summary.'
-                    : needsValidationContinuation
-                        ? 'You have not run the requested validation yet. Continue now by running the appropriate test/check command. Summarize only after the command finishes.'
-                        : mutatingToolSucceeded
-                            ? 'Your previous response says the current request is incomplete. Continue now with the remaining edits and validation for this same request. Do not summarize a plan unless blocked.'
-                            : 'You inspected files but have not made the requested change yet. Continue now by editing or writing the necessary files. Do not summarize a plan unless blocked.';
-                const continuationText = await streamAssistantResponse(completedConversation, 'current-turn completion gate', prompt, true);
-                if (!secondContinuationUsed && looksIncomplete(continuationText) && (likelyActionRequest || likelyValidationRequest)) {
-                    secondContinuationUsed = true;
-                    await streamAssistantResponse(callbacks.getConversation(), 'post-continuation completion gate', 'Your previous response still described unfinished work, missing validation, or a tool-budget issue. If any tools are still available, complete the remaining edit or run the final validation now. Only call something a blocker if a concrete tool failure prevents progress.', true);
-                }
+            const hidePreToolFragment = sawToolCall && !textAfterTool;
+            const visibleFinalAssistantText = assistantDisplayText(currentAssistantText);
+            const hidden = visibleFinalAssistantText.length === 0 || isNonSubstantiveAssistantText(visibleFinalAssistantText) || isDuplicateVisibleAssistantText(visibleFinalAssistantText) || hidePreToolFragment;
+            if (!hidden)
+                rememberVisibleAssistantText(visibleFinalAssistantText);
+            callbacks.onEvent?.(agentEvent({ type: 'message_end', id: currentAssistantId, text: visibleFinalAssistantText, hidden }));
+            callbacks.updateMessage(currentAssistantId, { text: visibleFinalAssistantText, streaming: false, hidden });
+            if (decision.needsActionContinuation || decision.needsValidationContinuation) {
+                await runCompletionLoop(completedConversation, combinedAssistantText);
             }
             else if (sawToolCall && !textAfterTool) {
-                const followUpText = await streamAssistantResponse(completedConversation, 'tool use completed without follow-up text', 'Continue from the tool result and answer my original request. Do not call tools. Summarize only current-turn changes and validation; do not recap unrelated earlier tasks.', false);
-                if (!followUpText) {
+                const followUp = await streamAssistantResponse(completedConversation, 'tool use completed without follow-up text', noTextAfterToolPrompt(false), false);
+                if (!followUp.text) {
                     callbacks.addMessage({ role: 'assistant', text: 'Stopped after tool use without a follow-up response. You can ask me to continue if the task is not complete.' });
                 }
             }
         }
         else if (sawToolCall) {
             const allowTools = (likelyActionRequest && (!mutatingToolSucceeded || editFileFailed)) || (likelyValidationRequest && !validationToolSucceeded);
-            const prompt = allowTools
-                ? 'Continue the original request now. If it asks for a change, edit or write the necessary files. If it asks to run or verify tests, run the command. Do not provide only a retrospective summary unless blocked.'
-                : 'Continue from the tool result and answer my original request. Do not call tools. Summarize only current-turn changes and validation; do not recap unrelated earlier tasks.';
-            const followUpText = await streamAssistantResponse(completedConversation, 'tool-only turn completed without text', prompt, allowTools);
-            if (!secondContinuationUsed && allowTools && looksIncomplete(followUpText)) {
-                secondContinuationUsed = true;
-                await streamAssistantResponse(callbacks.getConversation(), 'post-follow-up completion gate', 'Your previous response still described unfinished work, missing validation, or a tool-budget issue. If any tools are still available, complete the remaining edit or run the final validation now. Only call something a blocker if a concrete tool failure prevents progress.', true);
-            }
-            if (!followUpText) {
+            const prompt = noTextAfterToolPrompt(allowTools);
+            const followUp = await streamAssistantResponse(completedConversation, 'tool-only turn completed without text', prompt, allowTools);
+            decision = decideCompletion(followUp.text);
+            if (allowTools)
+                await runCompletionLoop(callbacks.getConversation(), followUp.text);
+            if (!followUp.text && completionContinuationCount === 0) {
                 const fallback = toolSummaries.length > 0
                     ? `Finished tool work but the model did not produce a final response. Last tool result: ${toolSummaries.at(-1)}.`
                     : 'Finished without a text response.';
@@ -465,21 +656,48 @@ export async function runAgentTurn(value, displayValue, contextFiles, callbacks)
         else {
             callbacks.addMessage({ id: currentAssistantId, role: 'assistant', text: 'Finished without a text response.', streaming: false });
         }
+        goal.phase = 'done';
+        goal.status = 'complete';
+        turnStatus = 'complete';
+        callbacks.setGoalStatus?.(undefined);
     }
     catch (error) {
         if (abortController.signal.aborted) {
+            turnStatus = 'aborted';
             callbacks.debugLog('request aborted');
             callbacks.addMessage({ role: 'system', text: 'Thinking aborted. You can type again.' });
         }
         else {
             const text = error instanceof Error ? error.message : String(error);
             callbacks.debugLog(`error: ${text}`);
+            if (!contextOverflowRecovered && isContextOverflowError(error)) {
+                const compacted = callbacks.compactConversation?.('Automatic recovery after provider context overflow. Preserve the active user request and concrete next steps.') ?? false;
+                callbacks.onEvent?.(agentEvent({ type: 'context_overflow', recovered: compacted, error: text }));
+                if (compacted) {
+                    callbacks.addMessage({ role: 'system', text: 'Context overflow detected; compacted older context and retrying the same request once.' });
+                    await runAgentTurn(value, displayValue, contextFiles, callbacks, retryAttempt, true, true);
+                    return;
+                }
+                callbacks.addMessage({ role: 'system', text: 'Context overflow detected, but there was not enough conversation history to compact automatically.' });
+            }
+            const maxRetries = 2;
+            if (retryAttempt < maxRetries && isRetryableModelError(error)) {
+                const delay = retryDelayMs(retryAttempt);
+                callbacks.onEvent?.(agentEvent({ type: 'retry', attempt: retryAttempt + 1, maxAttempts: maxRetries, delayMs: delay, error: text }));
+                callbacks.addMessage({ role: 'system', text: `Transient model error; retrying attempt ${retryAttempt + 1}/${maxRetries} in ${formatSeconds(delay)}: ${text}` });
+                await abortableDelay(delay, abortController.signal);
+                if (abortController.signal.aborted)
+                    return;
+                await runAgentTurn(value, displayValue, contextFiles, callbacks, retryAttempt + 1, true, contextOverflowRecovered);
+                return;
+            }
             callbacks.addMessage({ role: 'assistant', text: `Model call failed: ${text}` });
         }
     }
     finally {
         if (idleTimer)
             clearTimeout(idleTimer);
+        callbacks.onEvent?.(agentEvent({ type: 'turn_end', request: value, status: turnStatus }));
         callbacks.setAbortController?.(null);
         callbacks.setBusy(false);
     }