npm - centaurus-cli - Versions diffs - 2.9.1 → 2.9.3 - Mend

centaurus-cli 2.9.1 → 2.9.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (156) hide show

package/dist/cli-adapter.d.ts +76 -3
package/dist/cli-adapter.d.ts.map +1 -1
package/dist/cli-adapter.js +593 -230
package/dist/cli-adapter.js.map +1 -1
package/dist/config/mcp-config-manager.d.ts +21 -0
package/dist/config/mcp-config-manager.d.ts.map +1 -1
package/dist/config/mcp-config-manager.js +184 -1
package/dist/config/mcp-config-manager.js.map +1 -1
package/dist/config/models.d.ts +1 -0
package/dist/config/models.d.ts.map +1 -1
package/dist/config/models.js +9 -2
package/dist/config/models.js.map +1 -1
package/dist/config/slash-commands.d.ts +3 -0
package/dist/config/slash-commands.d.ts.map +1 -1
package/dist/config/slash-commands.js +39 -4
package/dist/config/slash-commands.js.map +1 -1
package/dist/config/types.d.ts +2 -0
package/dist/config/types.d.ts.map +1 -1
package/dist/config/types.js +1 -0
package/dist/config/types.js.map +1 -1
package/dist/index.js +60 -11
package/dist/index.js.map +1 -1
package/dist/mcp/mcp-command-handler.d.ts +34 -3
package/dist/mcp/mcp-command-handler.d.ts.map +1 -1
package/dist/mcp/mcp-command-handler.js +171 -83
package/dist/mcp/mcp-command-handler.js.map +1 -1
package/dist/mcp/mcp-server-manager.d.ts.map +1 -1
package/dist/mcp/mcp-server-manager.js +9 -23
package/dist/mcp/mcp-server-manager.js.map +1 -1
package/dist/mcp/mcp-tool-wrapper.d.ts.map +1 -1
package/dist/mcp/mcp-tool-wrapper.js +42 -5
package/dist/mcp/mcp-tool-wrapper.js.map +1 -1
package/dist/services/ai-autocomplete-agent.d.ts +39 -0
package/dist/services/ai-autocomplete-agent.d.ts.map +1 -0
package/dist/services/ai-autocomplete-agent.js +189 -0
package/dist/services/ai-autocomplete-agent.js.map +1 -0
package/dist/services/ai-service-client.d.ts +25 -0
package/dist/services/ai-service-client.d.ts.map +1 -1
package/dist/services/ai-service-client.js +162 -1
package/dist/services/ai-service-client.js.map +1 -1
package/dist/services/api-client.d.ts +9 -0
package/dist/services/api-client.d.ts.map +1 -1
package/dist/services/api-client.js +25 -0
package/dist/services/api-client.js.map +1 -1
package/dist/services/auth-handler.js +1 -1
package/dist/services/auth-handler.js.map +1 -1
package/dist/services/input-detection-agent.d.ts +40 -0
package/dist/services/input-detection-agent.d.ts.map +1 -0
package/dist/services/input-detection-agent.js +213 -0
package/dist/services/input-detection-agent.js.map +1 -0
package/dist/services/input-requirement-detector.d.ts +28 -0
package/dist/services/input-requirement-detector.d.ts.map +1 -0
package/dist/services/input-requirement-detector.js +203 -0
package/dist/services/input-requirement-detector.js.map +1 -0
package/dist/services/local-chat-storage.d.ts +21 -0
package/dist/services/local-chat-storage.d.ts.map +1 -1
package/dist/services/local-chat-storage.js +138 -43
package/dist/services/local-chat-storage.js.map +1 -1
package/dist/services/monitored-shell-manager.d.ts +120 -0
package/dist/services/monitored-shell-manager.d.ts.map +1 -0
package/dist/services/monitored-shell-manager.js +239 -0
package/dist/services/monitored-shell-manager.js.map +1 -0
package/dist/services/ollama-service.d.ts +197 -0
package/dist/services/ollama-service.d.ts.map +1 -0
package/dist/services/ollama-service.js +324 -0
package/dist/services/ollama-service.js.map +1 -0
package/dist/services/shell-input-agent.d.ts +89 -0
package/dist/services/shell-input-agent.d.ts.map +1 -0
package/dist/services/shell-input-agent.js +361 -0
package/dist/services/shell-input-agent.js.map +1 -0
package/dist/services/sub-agent-manager.d.ts +139 -0
package/dist/services/sub-agent-manager.d.ts.map +1 -0
package/dist/services/sub-agent-manager.js +517 -0
package/dist/services/sub-agent-manager.js.map +1 -0
package/dist/tools/background-command.d.ts.map +1 -1
package/dist/tools/background-command.js +33 -13
package/dist/tools/background-command.js.map +1 -1
package/dist/tools/command.d.ts.map +1 -1
package/dist/tools/command.js +64 -1
package/dist/tools/command.js.map +1 -1
package/dist/tools/file-ops.d.ts.map +1 -1
package/dist/tools/file-ops.js +33 -19
package/dist/tools/file-ops.js.map +1 -1
package/dist/tools/get-diff.js +1 -1
package/dist/tools/get-diff.js.map +1 -1
package/dist/tools/grep-search.d.ts.map +1 -1
package/dist/tools/grep-search.js +41 -15
package/dist/tools/grep-search.js.map +1 -1
package/dist/tools/plan-mode.js +3 -3
package/dist/tools/plan-mode.js.map +1 -1
package/dist/tools/registry.js +1 -1
package/dist/tools/registry.js.map +1 -1
package/dist/tools/sub-agent.d.ts +9 -0
package/dist/tools/sub-agent.d.ts.map +1 -0
package/dist/tools/sub-agent.js +232 -0
package/dist/tools/sub-agent.js.map +1 -0
package/dist/tools/task-complete.d.ts.map +1 -1
package/dist/tools/task-complete.js +14 -26
package/dist/tools/task-complete.js.map +1 -1
package/dist/ui/components/App.d.ts +45 -2
package/dist/ui/components/App.d.ts.map +1 -1
package/dist/ui/components/App.js +605 -96
package/dist/ui/components/App.js.map +1 -1
package/dist/ui/components/CircularSelectInput.d.ts +24 -0
package/dist/ui/components/CircularSelectInput.d.ts.map +1 -0
package/dist/ui/components/CircularSelectInput.js +71 -0
package/dist/ui/components/CircularSelectInput.js.map +1 -0
package/dist/ui/components/ErrorBoundary.d.ts +3 -2
package/dist/ui/components/ErrorBoundary.d.ts.map +1 -1
package/dist/ui/components/ErrorBoundary.js +29 -1
package/dist/ui/components/ErrorBoundary.js.map +1 -1
package/dist/ui/components/InputBox.d.ts +4 -0
package/dist/ui/components/InputBox.d.ts.map +1 -1
package/dist/ui/components/InputBox.js +343 -21
package/dist/ui/components/InputBox.js.map +1 -1
package/dist/ui/components/InteractiveShell.d.ts +6 -0
package/dist/ui/components/InteractiveShell.d.ts.map +1 -1
package/dist/ui/components/InteractiveShell.js +57 -6
package/dist/ui/components/InteractiveShell.js.map +1 -1
package/dist/ui/components/MCPAddScreen.d.ts +13 -0
package/dist/ui/components/MCPAddScreen.d.ts.map +1 -0
package/dist/ui/components/MCPAddScreen.js +54 -0
package/dist/ui/components/MCPAddScreen.js.map +1 -0
package/dist/ui/components/MCPListScreen.d.ts +17 -0
package/dist/ui/components/MCPListScreen.d.ts.map +1 -0
package/dist/ui/components/MCPListScreen.js +50 -0
package/dist/ui/components/MCPListScreen.js.map +1 -0
package/dist/ui/components/MCPServerListScreen.d.ts +16 -0
package/dist/ui/components/MCPServerListScreen.d.ts.map +1 -0
package/dist/ui/components/MCPServerListScreen.js +59 -0
package/dist/ui/components/MCPServerListScreen.js.map +1 -0
package/dist/ui/components/MonitorModeAIPanel.d.ts +23 -0
package/dist/ui/components/MonitorModeAIPanel.d.ts.map +1 -0
package/dist/ui/components/MonitorModeAIPanel.js +69 -0
package/dist/ui/components/MonitorModeAIPanel.js.map +1 -0
package/dist/ui/components/MultiLineInput.d.ts +13 -0
package/dist/ui/components/MultiLineInput.d.ts.map +1 -0
package/dist/ui/components/MultiLineInput.js +289 -0
package/dist/ui/components/MultiLineInput.js.map +1 -0
package/dist/ui/components/StatusBar.d.ts +2 -0
package/dist/ui/components/StatusBar.d.ts.map +1 -1
package/dist/ui/components/StatusBar.js +33 -2
package/dist/ui/components/StatusBar.js.map +1 -1
package/dist/ui/components/ToolExecutionMessage.d.ts.map +1 -1
package/dist/ui/components/ToolExecutionMessage.js +231 -13
package/dist/ui/components/ToolExecutionMessage.js.map +1 -1
package/dist/ui/components/VersionUpdatePrompt.d.ts.map +1 -1
package/dist/ui/components/VersionUpdatePrompt.js +3 -2
package/dist/ui/components/VersionUpdatePrompt.js.map +1 -1
package/dist/utils/command-history.d.ts +12 -2
package/dist/utils/command-history.d.ts.map +1 -1
package/dist/utils/command-history.js +57 -13
package/dist/utils/command-history.js.map +1 -1
package/dist/utils/input-classifier.js +1 -1
package/dist/utils/input-classifier.js.map +1 -1
package/package.json +2 -1

package/dist/cli-adapter.js CHANGED Viewed

@@ -19,6 +19,9 @@ import { taskCompleteTool } from './tools/task-complete.js';
 import { readBinaryFileTool } from './tools/read-binary-file.js';
 import { createImageTool } from './tools/create-image.js';
 import { backgroundCommandTool } from './tools/background-command.js';
+import { subAgentTool } from './tools/sub-agent.js';
+import { SubAgentManager } from './services/sub-agent-manager.js';
+import { ShellInputAgent } from './services/shell-input-agent.js';
 import { apiClient } from './services/api-client.js';
 import { conversationManager } from './services/conversation-manager.js';
 import { aiServiceClient } from './services/ai-service-client.js';
@@ -39,6 +42,7 @@ import { localChatStorage } from './services/local-chat-storage.js';
 import { logWarning } from './utils/logger.js';
 import { BackgroundTaskManager } from './services/background-task-manager.js';
 import { sessionQuotaManager } from './services/session-quota-manager.js';
+import { ollamaService, OllamaService } from './services/ollama-service.js';
 export class CentaurusCLI {
     configManager;
     toolRegistry;
@@ -48,6 +52,7 @@ export class CentaurusCLI {
     pendingPlanRequest = null; // Stores original user request during planning phase
     commandMode = false;
     backgroundMode = false; // Background shell mode for running commands in background
+    shellIdCounter = 1;
     previousMode = 'execution';
     onResponseCallback;
     onDirectMessageCallback; // For slash commands - adds directly to history
@@ -94,7 +99,18 @@ export class CentaurusCLI {
     onShowBackgroundTaskCancelPickerCallback;
     onBackgroundTaskViewCallback;
     onTokenCountUpdate; // Report actual AI context token count to UI
+    currentTokenCount = 0; // Track current token count for context limit checking
+    contextLimitReached = false; // Track if context limit has been reached
+    onContextLimitReached; // Notify UI about context limit state
     onSessionQuotaUpdate;
+    // MCP screen callbacks
+    onShowMCPAddScreen;
+    onShowMCPRemoveScreen;
+    onShowMCPEnableScreen;
+    onShowMCPDisableScreen;
+    onShowMCPListScreen;
+    onSubAgentCountChange; // Callback for sub-agent count changes
+    onPromptAnswered; // Callback when AI answers a shell prompt
     constructor() {
         this.configManager = new ConfigManager();
         this.toolRegistry = new ToolRegistry();
@@ -115,6 +131,11 @@ export class CentaurusCLI {
         });
         // Initialize MCP
         this.initializeMCP();
+        // Initialize ShellInputAgent with tool registry and wire shell input callback
+        ShellInputAgent.initialize(this.toolRegistry);
+        ShellInputAgent.setOnShellInput((shellId, input) => {
+            this.writeToShellStdin(input);
+        });
     }
     setOnResponseCallback(callback) {
         this.onResponseCallback = callback;
@@ -186,9 +207,131 @@ export class CentaurusCLI {
     setOnTokenCountUpdate(callback) {
         this.onTokenCountUpdate = callback;
     }
+    setOnContextLimitReached(callback) {
+        this.onContextLimitReached = callback;
+    }
+    setOnSubAgentCountChange(callback) {
+        this.onSubAgentCountChange = callback;
+    }
+    setOnPromptAnswered(callback) {
+        this.onPromptAnswered = callback;
+        // Wire this callback to ShellInputAgent
+        ShellInputAgent.setOnPromptAnswered(callback);
+    }
+    /**
+     * Calculate and update token count based on current conversation history
+     * This ensures UI is always in sync with the actual AI context
+     * Uses backend's accurate token counting API (Vertex AI countTokens)
+     */
+    async updateTokenCount() {
+        if (!this.onTokenCountUpdate)
+            return;
+        try {
+            // Get current model
+            const currentModel = this.configManager.get('modelName') || 'gemini-2.5-flash';
+            // Prepare messages for token counting
+            // Backend will automatically include system prompt when counting
+            // We just send the conversation history
+            const messagesForCounting = [...this.conversationHistory];
+            // Call backend API for accurate token counting
+            const tokenCount = await apiClient.countTokens(currentModel, messagesForCounting);
+            // Store locally for context limit checking
+            this.currentTokenCount = tokenCount;
+            // Update UI with accurate count
+            this.onTokenCountUpdate(tokenCount);
+            quickLog(`[${new Date().toISOString()}] [updateTokenCount] Accurate count: ${tokenCount} tokens for ${messagesForCounting.length} messages\n`);
+        }
+        catch (error) {
+            // Fallback to character-based estimation if API fails
+            const SYSTEM_PROMPT_ESTIMATE = 14000; // Backend injects ~14K char system prompt
+            // Calculate total characters from conversation history
+            let totalCharacters = 0;
+            for (const msg of this.conversationHistory) {
+                // Content
+                if (typeof msg.content === 'string') {
+                    totalCharacters += msg.content.length;
+                }
+                // Thinking content
+                if (msg.thinking) {
+                    totalCharacters += msg.thinking.length;
+                }
+                // Tool calls
+                if (msg.tool_calls) {
+                    for (const tc of msg.tool_calls) {
+                        totalCharacters += tc.name.length;
+                        if (tc.arguments) {
+                            totalCharacters += JSON.stringify(tc.arguments).length;
+                        }
+                    }
+                }
+                // Tool call ID
+                if (msg.role === 'tool' && msg.tool_call_id) {
+                    totalCharacters += msg.tool_call_id.length;
+                }
+            }
+            // Estimate tokens (1 token ≈ 4 chars)
+            // Only include system prompt estimate if there's conversation history
+            const systemPromptChars = this.conversationHistory.length > 0 ? SYSTEM_PROMPT_ESTIMATE : 0;
+            const estimatedTokens = Math.ceil((totalCharacters + systemPromptChars) / 4);
+            // Store locally for context limit checking
+            this.currentTokenCount = estimatedTokens;
+            this.onTokenCountUpdate(estimatedTokens);
+            quickLog(`[${new Date().toISOString()}] [updateTokenCount] Fallback estimate: ${estimatedTokens} tokens (API error: ${error})\n`);
+        }
+    }
+    /**
+     * Get current token count for context limit checking
+     */
+    getCurrentTokenCount() {
+        return this.currentTokenCount;
+    }
     setOnSessionQuotaUpdate(callback) {
         this.onSessionQuotaUpdate = callback;
     }
+    // MCP screen callback setters
+    setOnMCPAddScreenSetup(callback) {
+        this.onShowMCPAddScreen = callback;
+    }
+    setOnMCPRemoveScreenSetup(callback) {
+        this.onShowMCPRemoveScreen = callback;
+    }
+    setOnMCPEnableScreenSetup(callback) {
+        this.onShowMCPEnableScreen = callback;
+    }
+    setOnMCPDisableScreenSetup(callback) {
+        this.onShowMCPDisableScreen = callback;
+    }
+    setOnMCPListScreenSetup(callback) {
+        this.onShowMCPListScreen = callback;
+    }
+    // MCP server operation methods (called from UI)
+    mcpAddServer(config) {
+        if (this.mcpCommandHandler) {
+            return this.mcpCommandHandler.addServer(config);
+        }
+        return { success: false, error: 'MCP not initialized' };
+    }
+    mcpRemoveServer(name) {
+        if (this.mcpCommandHandler) {
+            this.mcpCommandHandler.removeServer(name);
+        }
+    }
+    mcpEnableServer(name) {
+        if (this.mcpCommandHandler) {
+            this.mcpCommandHandler.enableServer(name);
+        }
+    }
+    mcpDisableServer(name) {
+        if (this.mcpCommandHandler) {
+            this.mcpCommandHandler.disableServer(name);
+        }
+    }
+    mcpValidateConfig(jsonString) {
+        if (this.mcpCommandHandler) {
+            return this.mcpCommandHandler.validateServerConfig(jsonString);
+        }
+        return { valid: false, error: 'MCP not initialized' };
+    }
     /**
      * Notify UI about session quota status
      */
@@ -205,6 +348,32 @@ export class CentaurusCLI {
             const mcpConfigManager = new MCPConfigManager();
             const mcpServerManager = new MCPServerManager();
             this.mcpCommandHandler = new MCPCommandHandler(mcpConfigManager, mcpServerManager, this.toolRegistry);
+            // Wire MCP screen callbacks
+            this.mcpCommandHandler.setOnShowMCPAddScreen(() => {
+                if (this.onShowMCPAddScreen) {
+                    this.onShowMCPAddScreen();
+                }
+            });
+            this.mcpCommandHandler.setOnShowMCPRemoveScreen((servers) => {
+                if (this.onShowMCPRemoveScreen) {
+                    this.onShowMCPRemoveScreen(servers);
+                }
+            });
+            this.mcpCommandHandler.setOnShowMCPEnableScreen((servers) => {
+                if (this.onShowMCPEnableScreen) {
+                    this.onShowMCPEnableScreen(servers);
+                }
+            });
+            this.mcpCommandHandler.setOnShowMCPDisableScreen((servers) => {
+                if (this.onShowMCPDisableScreen) {
+                    this.onShowMCPDisableScreen(servers);
+                }
+            });
+            this.mcpCommandHandler.setOnShowMCPListScreen((servers) => {
+                if (this.onShowMCPListScreen) {
+                    this.onShowMCPListScreen(servers);
+                }
+            });
             // Initialize MCP servers and tools
             await this.mcpCommandHandler.initializeMCP();
         }
@@ -249,12 +418,22 @@ export class CentaurusCLI {
     getCommandMode() {
         return this.commandMode;
     }
+    /**
+     * Get current conversation history for shell input agent context
+     * Returns a copy to prevent modification
+     */
+    getConversationHistory() {
+        return [...this.conversationHistory];
+    }
     getCurrentWorkingDirectory() {
         return this.cwd;
     }
     getCurrentSubshellContext() {
         return this.contextManager.getCurrentContext();
     }
+    getCurrentInteractiveProcess() {
+        return this.currentInteractiveProcess;
+    }
     /**
      * Get the current conversation ID for file uploads
      */
@@ -263,25 +442,48 @@ export class CentaurusCLI {
     }
     async handlePickerSelection(selection, pickerType) {
         try {
-            // Selection is the index of the model in models array from backend
-            const modelsConfig = await fetchModelsConfig();
-            const modelIndex = parseInt(selection, 10);
-            if (isNaN(modelIndex) || modelIndex < 0 || modelIndex >= modelsConfig.models.length) {
-                throw new Error('Invalid model selection');
-            }
-            const selectedModel = modelsConfig.models[modelIndex];
-            // Store only the model ID and name (not the full config with thinkingConfig)
-            // This prevents caching issues when we update model configs
-            this.configManager.set('model', selectedModel.id);
-            this.configManager.set('modelName', selectedModel.name);
-            // Notify UI of model name change and contextWindow
-            if (this.onModelChange) {
-                this.onModelChange(selectedModel.name, selectedModel.contextWindow);
-            }
-            const responseMessage = `Model changed to: ${selectedModel.name}`;
-            // Send response back to UI
-            if (this.onResponseCallback) {
-                this.onResponseCallback(responseMessage);
+            if (pickerType === 'local-model') {
+                // Local Ollama model selection
+                // Selection is the model name (e.g., "llama3:latest")
+                const modelName = selection;
+                // Store the local model configuration
+                this.configManager.set('model', modelName);
+                this.configManager.set('modelName', modelName);
+                this.configManager.set('isLocalModel', true);
+                // Notify UI of model name change
+                // Local models don't have a fixed context window, use a reasonable default
+                if (this.onModelChange) {
+                    this.onModelChange(modelName, 128000); // Most local models have 128k context
+                }
+                const responseMessage = `✅ Switched to local Ollama model: ${modelName}`;
+                // Send response back to UI
+                if (this.onResponseCallback) {
+                    this.onResponseCallback(responseMessage);
+                }
+            }
+            else {
+                // Cloud model selection (existing behavior)
+                // Selection is the index of the model in models array from backend
+                const modelsConfig = await fetchModelsConfig();
+                const modelIndex = parseInt(selection, 10);
+                if (isNaN(modelIndex) || modelIndex < 0 || modelIndex >= modelsConfig.models.length) {
+                    throw new Error('Invalid model selection');
+                }
+                const selectedModel = modelsConfig.models[modelIndex];
+                // Store only the model ID and name (not the full config with thinkingConfig)
+                // This prevents caching issues when we update model configs
+                this.configManager.set('model', selectedModel.id);
+                this.configManager.set('modelName', selectedModel.name);
+                this.configManager.set('isLocalModel', false);
+                // Notify UI of model name change and contextWindow
+                if (this.onModelChange) {
+                    this.onModelChange(selectedModel.name, selectedModel.contextWindow);
+                }
+                const responseMessage = `✅ Switched to cloud model: ${selectedModel.name}`;
+                // Send response back to UI
+                if (this.onResponseCallback) {
+                    this.onResponseCallback(responseMessage);
+                }
             }
         }
         catch (error) {
@@ -371,6 +573,14 @@ export class CentaurusCLI {
         this.toolRegistry.register(readBinaryFileTool);
         this.toolRegistry.register(createImageTool);
         this.toolRegistry.register(backgroundCommandTool);
+        this.toolRegistry.register(subAgentTool);
+        // Initialize SubAgentManager with tool registry
+        SubAgentManager.initialize(this.toolRegistry);
+        SubAgentManager.setOnSubAgentCountChange((count) => {
+            if (this.onSubAgentCountChange) {
+                this.onSubAgentCountChange(count);
+            }
+        });
         // Load configuration
         const config = this.configManager.load();
         // Enable backend sync if authenticated
@@ -473,41 +683,69 @@ Press Enter to continue...
     }
     /**
      * Clean up orphaned tool_calls from conversation history.
-     * This is called when a request is aborted to ensure the history remains consistent.
-     * Vertex AI requires that every assistant message with tool_calls has matching tool result messages.
+     * This validates the ENTIRE history and removes any assistant messages
+     * where tool_calls don't have matching tool result messages.
+     *
+     * Vertex AI / Claude APIs require that every assistant message with tool_calls
+     * has matching tool result messages immediately following it.
      */
     cleanupOrphanedToolCalls() {
         if (this.conversationHistory.length === 0)
             return;
-        // Find the last assistant message with tool_calls
-        let lastAssistantWithToolCallsIndex = -1;
-        for (let i = this.conversationHistory.length - 1; i >= 0; i--) {
-            const msg = this.conversationHistory[i];
-            if (msg.role === 'assistant' && msg.tool_calls && msg.tool_calls.length > 0) {
-                lastAssistantWithToolCallsIndex = i;
-                break;
+        let cleanedAny = false;
+        let iterations = 0;
+        const maxIterations = 20; // Safety limit to prevent infinite loops
+        // Keep cleaning until no more orphans are found
+        // (removing one orphan may expose another)
+        while (iterations < maxIterations) {
+            iterations++;
+            let foundOrphan = false;
+            // Scan through history to find ALL assistant messages with tool_calls
+            for (let i = 0; i < this.conversationHistory.length; i++) {
+                const msg = this.conversationHistory[i];
+                if (msg.role !== 'assistant' || !msg.tool_calls || msg.tool_calls.length === 0) {
+                    continue;
+                }
+                // Collect all tool_call IDs from this assistant message
+                const expectedToolCallIds = new Set(msg.tool_calls.map((tc) => tc.id));
+                // Check if ALL tool_calls have matching tool result messages after this message
+                // Tool results must come AFTER the assistant message, before the next user/assistant message
+                let j = i + 1;
+                while (j < this.conversationHistory.length) {
+                    const nextMsg = this.conversationHistory[j];
+                    // If we hit a user or assistant message, stop looking for tool results
+                    if (nextMsg.role === 'user' || nextMsg.role === 'assistant') {
+                        break;
+                    }
+                    // If it's a tool result, check if it matches one of our expected IDs
+                    if (nextMsg.role === 'tool' && nextMsg.tool_call_id) {
+                        expectedToolCallIds.delete(nextMsg.tool_call_id);
+                    }
+                    j++;
+                }
+                // If there are still unmatched tool_calls, this is an orphan
+                if (expectedToolCallIds.size > 0) {
+                    try {
+                        quickLog(`[${new Date().toISOString()}] [CLI] Found orphaned tool_calls at index ${i}: ${Array.from(expectedToolCallIds).join(', ')}\n`);
+                    }
+                    catch (e) { }
+                    // Remove this assistant message and all tool results up to (but not including) the next user/assistant message
+                    const removeCount = j - i;
+                    this.conversationHistory.splice(i, removeCount);
+                    foundOrphan = true;
+                    cleanedAny = true;
+                    break; // Restart scan from beginning since indices changed
+                }
             }
-        }
-        if (lastAssistantWithToolCallsIndex === -1)
-            return; // No assistant messages with tool_calls
-        const assistantMsg = this.conversationHistory[lastAssistantWithToolCallsIndex];
-        const toolCallIds = new Set(assistantMsg.tool_calls.map((tc) => tc.id));
-        // Check if all tool_calls have matching tool result messages after this assistant message
-        for (let i = lastAssistantWithToolCallsIndex + 1; i < this.conversationHistory.length; i++) {
-            const msg = this.conversationHistory[i];
-            if (msg.role === 'tool' && msg.tool_call_id) {
-                toolCallIds.delete(msg.tool_call_id);
+            if (!foundOrphan) {
+                break; // No more orphans found, we're done
             }
         }
-        // If there are orphaned tool_calls (no matching tool results), remove from history
-        if (toolCallIds.size > 0) {
-            // Log the cleanup for debugging
+        if (cleanedAny) {
             try {
-                quickLog(`[${new Date().toISOString()}] [CLI] Cleaning up orphaned tool_calls: ${Array.from(toolCallIds).join(', ')}\n`);
+                quickLog(`[${new Date().toISOString()}] [CLI] Completed history cleanup after ${iterations} iteration(s), ${this.conversationHistory.length} messages remaining\n`);
             }
             catch (e) { }
-            // Remove the orphaned assistant message and any partial tool results after it
-            this.conversationHistory.splice(lastAssistantWithToolCallsIndex);
         }
     }
     /**
@@ -562,11 +800,59 @@ Press Enter to continue...
             this.notifySessionQuotaStatus();
             return;
         }
+        // Check context window limit before accepting new messages
+        // Get current model's context window
+        const currentModel = this.configManager.get('modelName') || 'gemini-2.5-flash';
+        const { getModelContextWindowSync } = await import('./config/models.js');
+        const maxTokens = getModelContextWindowSync(currentModel);
+        // Calculate current token usage percentage
+        // We need to estimate tokens for the new message too
+        const newMessageChars = message.length;
+        const estimatedNewMessageTokens = Math.ceil(newMessageChars / 4);
+        // Get current token count from state (updated by updateTokenCount)
+        const currentTokens = this.getCurrentTokenCount();
+        const projectedTokens = currentTokens + estimatedNewMessageTokens;
+        const usagePercent = (projectedTokens / maxTokens) * 100;
+        // Block new messages if context is ≥80% full
+        if (usagePercent >= 80) {
+            // Set context limit reached state
+            if (!this.contextLimitReached) {
+                this.contextLimitReached = true;
+                if (this.onContextLimitReached) {
+                    this.onContextLimitReached(true);
+                }
+            }
+            const contextLimitMessage = `\n⚠️ Context limit reached (${usagePercent.toFixed(1)}% of ${maxTokens.toLocaleString()} tokens used).\n\nYour conversation has grown too large for the AI to process effectively.\n\nPlease start a new chat to continue:\n  • Use /new to start a fresh conversation\n  • Or use /chat to switch to a different chat\n\nYour current conversation has been saved and you can return to it later.\n\nYou can still use:\n  • Slash commands (e.g., /help, /new, /chat)\n  • Terminal commands (in Command mode)`;
+            if (this.onDirectMessageCallback) {
+                this.onDirectMessageCallback(contextLimitMessage);
+            }
+            quickLog(`[${new Date().toISOString()}] [handleMessage] Context limit reached: ${usagePercent.toFixed(1)}% (${projectedTokens}/${maxTokens} tokens)\n`);
+            return;
+        }
         // Cancel any active request when a new message comes in
         // This enables "interrupt and replace" - new message takes priority
         if (this.currentAbortController) {
-            this.currentAbortController.abort();
-            this.currentAbortController = undefined;
+            // Mark as intentionally aborted so error handling knows not to throw or show message
+            this.requestIntentionallyAborted = true;
+            const oldController = this.currentAbortController;
+            // Create new controller BEFORE aborting old one to avoid race condition
+            // where new request tries to access undefined controller
+            this.currentAbortController = new AbortController();
+            oldController.abort();
+            // Clean up orphaned tool calls from the interrupted turn
+            this.cleanupOrphanedToolCalls();
+            // Remove the last user message from history (it's being replaced by the new message)
+            // Walk backwards and remove messages until we find and remove a user message
+            while (this.conversationHistory.length > 0) {
+                const lastMsg = this.conversationHistory[this.conversationHistory.length - 1];
+                this.conversationHistory.pop();
+                if (lastMsg.role === 'user') {
+                    // Found and removed the interrupted user message, stop here
+                    break;
+                }
+                // Continue removing assistant/tool messages that were part of the interrupted turn
+            }
+            quickLog(`[${new Date().toISOString()}] [handleMessage] Interrupted active request - cleaned up history for replacement\n`);
         }
         // Store original request if in planning mode (for execution phase after approval)
         if (this.planMode && !this.pendingPlanRequest) {
@@ -613,6 +899,12 @@ DO NOT use write_to_file, edit_file, or execute_command until the plan is approv
                 contextManager: this.contextManager,
                 cliAdapter: this, // Pass CLI adapter reference for interactive process management
                 requireApproval: async (message, risky, preview, operationType, operationDetails) => {
+                    // Special bypass for shell input to running processes:
+                    // If the AI is sending input to an existing shell (via shell_input), we bypass the separate approval step.
+                    // The user already implicitly approved the interaction by running the command in agent control mode.
+                    if (operationType === 'execute_command' && operationDetails?.shell_input) {
+                        return true;
+                    }
                     if (this.onToolApprovalRequest) {
                         return await this.onToolApprovalRequest({ message, risky, preview, operationType, operationDetails });
                     }
@@ -636,6 +928,9 @@ DO NOT use write_to_file, edit_file, or execute_command until the plan is approv
             // Build messages array WITHOUT system prompt - backend will inject it
             // The backend uses cli-system-prompt.md for CLI clients
             // We pass environmentContext and mode separately so backend can inject them
+            // SAFETY: Clean up any orphaned tool calls before making AI request
+            // This prevents "improperly formed request" errors from corrupted history
+            this.cleanupOrphanedToolCalls();
             let messages = [...this.conversationHistory];
             // Inject subshell context if in a subshell environment
             const currentContext = this.contextManager.getCurrentContext();
@@ -646,11 +941,7 @@ DO NOT use write_to_file, edit_file, or execute_command until the plan is approv
             const mode = this.getMode();
             let finalAssistantMessage = '';
             const MAX_TURNS = 500; // Allow up to 500 turns for complex tasks
-            const MAX_TOOL_CALLS_PER_TURN = 5; // Limit tool calls per turn to prevent overthinking
-            const MAX_NARRATION_ATTEMPTS = 3; // Maximum times we'll prompt AI to stop narrating
             let turnCount = 0;
-            let narrationAttempts = 0; // Track how many times AI narrated without executing
-            let completionAttempts = 0; // Track how many times AI provided text summary without task_complete
             let thoughtStartTime = null; // Track when thinking started
             let thoughtContent = ''; // Accumulate thought content during streaming
             let currentTurnThinking = ''; // Persist thinking for the current turn to attach to assistant message
@@ -662,9 +953,12 @@ DO NOT use write_to_file, edit_file, or execute_command until the plan is approv
             // ANTI-LOOP: Track ALL duplicate tool calls (not just file ops)
             const toolCallTracker = new Map(); // Hash -> count
             const MAX_IDENTICAL_TOOL_CALLS = 3; // Max times exact same tool call allowed
-            // Create AbortController for this request
-            this.currentAbortController = new AbortController();
-            this.requestIntentionallyAborted = false; // Reset abort flag for new request
+            // Create AbortController for this request (if not already created during interruption handling)
+            if (!this.currentAbortController) {
+                this.currentAbortController = new AbortController();
+            }
+            // Note: Don't reset requestIntentionallyAborted here - let the error handler reset it
+            // to avoid race condition where old request's error handler sees false
             // Clean up any orphaned tool_calls from a previous aborted request
             // This prevents 400 Bad Request errors when sending to the backend
             this.cleanupOrphanedToolCalls();
@@ -736,16 +1030,14 @@ DO NOT use write_to_file, edit_file, or execute_command until the plan is approv
                     quickLog(`[${new Date().toISOString()}] [CLI] Assistant messages with tool_calls: ${messageStats.assistantWithToolCalls}\n`);
                 }
                 catch (e) { }
-                // Report actual token count to UI (estimate: 1 token ≈ 4 chars)
-                // Include system prompt estimate (~14000 chars based on backend logs)
-                const SYSTEM_PROMPT_ESTIMATE = 14000; // Backend injects ~14K char system prompt
-                const estimatedTokens = Math.ceil((messageStats.totalCharacters + SYSTEM_PROMPT_ESTIMATE) / 4);
-                if (this.onTokenCountUpdate) {
-                    this.onTokenCountUpdate(estimatedTokens);
-                }
+                // Update token count using accurate API
+                // This will use backend's Vertex AI countTokens for precision
+                this.updateTokenCount().catch(err => {
+                    quickLog(`[${new Date().toISOString()}] [CLI] Failed to update token count: ${err}\n`);
+                });
                 // Stream AI response from backend
                 // Backend will inject system prompt automatically with environment context
-                for await (const chunk of aiServiceClient.streamChat(selectedModel, messages, tools, environmentContext, mode, selectedModelThinkingConfig, this.currentAbortController.signal)) {
+                for await (const chunk of aiServiceClient.streamChat(selectedModel, messages, tools, environmentContext, mode, selectedModelThinkingConfig, this.currentAbortController?.signal)) {
                     // Handle error chunks
                     if (chunk.type === 'error') {
                         // Check if this is an abort situation (user cancelled or sent new message)
@@ -817,6 +1109,16 @@ DO NOT use write_to_file, edit_file, or execute_command until the plan is approv
                     // Handle tool call chunks
                     if (chunk.type === 'tool_call') {
                         const toolCall = chunk.toolCall;
+                        // Kiro/Claude compatibility: Parse string arguments early so they are objects throughout the pipeline
+                        // This ensures logging, UI updates, and tool execution all see the parsed object
+                        if (toolCall.arguments && typeof toolCall.arguments === 'string') {
+                            try {
+                                toolCall.arguments = JSON.parse(toolCall.arguments);
+                            }
+                            catch (e) {
+                                // Ignore parsing error, will be handled by downstream logic
+                            }
+                        }
                         // Debug: Log every tool_call chunk received
                         try {
                             quickLog(`[${new Date().toISOString()}] [CLI] *** TOOL_CALL CHUNK RECEIVED (REAL-TIME): ${toolCall?.name || 'unknown'}\n`);
@@ -856,9 +1158,11 @@ DO NOT use write_to_file, edit_file, or execute_command until the plan is approv
                         // REAL-TIME EXECUTION: Execute tool immediately during streaming
                         // This reduces latency by not waiting for the entire stream to finish
                         try {
-                            // Extract and display reason_text if present (skip for task_complete)
+                            // Extract and display reason_text if present (skip for task_complete and shell_input)
                             const reasonText = toolCall.arguments.reason_text;
-                            if (reasonText && this.onResponseStreamCallback) {
+                            // Don't show reason text for shell inputs (hidden from history per user request)
+                            const isShellInput = toolCall.name === 'execute_command' && toolCall.arguments.shell_input;
+                            if (reasonText && !isShellInput && this.onResponseStreamCallback) {
                                 this.onResponseStreamCallback(reasonText + '\n\n');
                             }
                             // Show 'executing' status immediately
@@ -866,6 +1170,14 @@ DO NOT use write_to_file, edit_file, or execute_command until the plan is approv
                             // Log tool execution start
                             conversationLogger.logToolExecutionStart(toolCall.name, toolCall.id);
                             // Execute the tool (it will request approval if needed via requireApproval callback)
+                            // SPECIAL: Intercept sub_agent spawn to enforce approval
+                            if (toolCall.name === 'sub_agent' && toolCall.arguments?.action === 'spawn') {
+                                const approved = await context.requireApproval(`Spawn Sub-Agent`, true, // risky
+                                undefined, 'execute_command', { command: `spawn sub-agent` });
+                                if (!approved) {
+                                    throw new Error('User rejected sub-agent spawn request');
+                                }
+                            }
                             const result = await this.toolRegistry.execute(toolCall.name, toolCall.arguments, context);
                             if (result.success) {
                                 conversationLogger.logToolResult(toolCall.name, toolCall.id, result.result, true);
@@ -947,8 +1259,6 @@ DO NOT use write_to_file, edit_file, or execute_command until the plan is approv
                     assistantMessageLength: assistantMessage.length,
                     hasToolCalls: toolCalls.length > 0,
                     willContinue: toolCalls.length > 0,
-                    narrationAttempts,
-                    completionAttempts,
                 });
                 // If there are tool calls, execute them
                 if (toolCalls.length > 0) {
@@ -961,11 +1271,7 @@ DO NOT use write_to_file, edit_file, or execute_command until the plan is approv
                         // Suppress text output - AI should only use reason_text
                         assistantMessage = ''; // Clear ALL text output - AI should only use reason_text
                     }
-                    // Limit tool calls per turn to prevent overthinking
-                    if (toolCalls.length > MAX_TOOL_CALLS_PER_TURN) {
-                        // Silently limit tool calls
-                        toolCalls = toolCalls.slice(0, MAX_TOOL_CALLS_PER_TURN);
-                    }
+                    // Tool call limit removed - let AI use as many tools as needed per turn
                     const toolResults = [...inStreamToolResults]; // Start with in-stream results
                     const handledToolCallIds = new Set(); // Only for special tools (create_plan, mark_task_complete)
                     let userCancelledOperation = false;
@@ -989,25 +1295,24 @@ DO NOT use write_to_file, edit_file, or execute_command until the plan is approv
                         try {
                             // Check if this is task_complete FIRST (before displaying anything)
                             if (toolCall.name === 'task_complete') {
-                                taskCompleted = true;
-                                taskCompleteSummary = toolCall.arguments.summary || '';
-                                conversationLogger.logTaskComplete(taskCompleteSummary);
-                                // If task_complete has a summary, use that as the final message
-                                // If no summary provided, PRESERVE the already-streamed assistantMessage text
-                                if (taskCompleteSummary) {
-                                    // AI provided a summary - suppress any other text and use the summary
-                                    assistantMessage = '';
-                                    // IMPORTANT: Clear the already-streamed text from UI before showing summary
-                                    // This prevents duplicate content (streamed text + summary)
-                                    if (this.onClearStreamedResponse) {
-                                        this.onClearStreamedResponse();
-                                    }
-                                    // Stream the summary to UI so it's visible
-                                    if (this.onResponseStreamCallback) {
-                                        this.onResponseStreamCallback(taskCompleteSummary);
-                                    }
+                                // SUBAGENT BLOCKING: Check if any sub-agents are still running
+                                const runningSubAgents = SubAgentManager.getRunningSubAgents();
+                                if (runningSubAgents.length > 0) {
+                                    // Block task_complete and provide feedback
+                                    const agentIds = runningSubAgents.map(a => a.id).join(', ');
+                                    toolResults.push({
+                                        tool_call_id: toolCall.id,
+                                        name: toolCall.name,
+                                        result: `Cannot complete task: ${runningSubAgents.length} sub-agent(s) still running. IDs: ${agentIds}. Check their status periodically with sub_agent(action="status", agent_id="...") and wait for completion before calling task_complete.`,
+                                    });
+                                    handledToolCallIds.add(toolCall.id);
+                                    continue; // Skip task_complete execution, keep loop running
                                 }
-                                // If no summary, keep assistantMessage as-is (it was already streamed to UI)
+                                taskCompleted = true;
+                                conversationLogger.logTaskComplete('');
+                                // task_complete no longer has a summary parameter
+                                // The AI streams all response text BEFORE calling task_complete()
+                                // So we just preserve whatever assistantMessage was already streamed
                                 // Execute the tool for proper result handling
                                 await this.toolRegistry.execute(toolCall.name, toolCall.arguments, context);
                                 // Clear the plan when task is complete
@@ -1240,7 +1545,7 @@ DO NOT use write_to_file, edit_file, or execute_command until the plan is approv
                                             tool_call_id: toolCall.id,
                                             name: toolCall.name,
                                             result: completion.allComplete
-                                                ? 'All tasks completed! Call task_complete with a summary of what was accomplished.'
+                                                ? 'All tasks completed! Output your summary of what was accomplished, then call task_complete().'
                                                 : completion.nextSubtask
                                                     ? `Subtask ${completion.taskNumber} completed. Next subtask: ${completion.nextSubtask}`
                                                     : completion.nextTask
@@ -1250,7 +1555,7 @@ DO NOT use write_to_file, edit_file, or execute_command until the plan is approv
                                         // If all tasks are complete, prompt AI to call task_complete
                                         if (completion.allComplete) {
                                             toolResults[toolResults.length - 1].result =
-                                                'All tasks in the plan are now completed! Call task_complete with a summary of what was accomplished.';
+                                                'All tasks in the plan are now completed! Output your summary of what was accomplished, then call task_complete().';
                                         }
                                     }
                                     catch (parseError) {
@@ -1314,12 +1619,14 @@ DO NOT use write_to_file, edit_file, or execute_command until the plan is approv
                                     remoteContext = `docker:${metadata.containerId?.substring(0, 12) || 'container'}`;
                                 }
                             }
-                            // Notify UI: tool is executing (send before execution starts)
+                            // Notify UI: tool executing
                             if (this.onToolExecutionUpdate) {
-                                // Add cwd to arguments for execute_command tool, and remoteContext for all tools
-                                const toolArgs = toolCall.name === 'execute_command'
-                                    ? { ...toolCall.arguments, cwd: effectiveCwd, remoteContext }
-                                    : { ...toolCall.arguments, remoteContext };
+                                let toolArgs = { ...toolCall.arguments, remoteContext };
+                                // Special handling for execute_command
+                                if (toolCall.name === 'execute_command') {
+                                    // Add effective CWD
+                                    toolArgs.cwd = effectiveCwd;
+                                }
                                 this.onToolExecutionUpdate({
                                     toolName: toolCall.name,
                                     status: 'executing',
@@ -1329,6 +1636,31 @@ DO NOT use write_to_file, edit_file, or execute_command until the plan is approv
                             // Log tool execution start
                             conversationLogger.logToolExecutionStart(toolCall.name, toolCall.id);
                             // Execute the tool (it will request approval if needed)
+                            // SPECIAL: Intercept sub_agent spawn to enforce approval
+                            if (toolCall.name === 'sub_agent' && toolCall.arguments?.action === 'spawn') {
+                                const approved = await context.requireApproval(`Spawn Sub-Agent`, true, // risky
+                                undefined, 'execute_command', { command: `spawn sub-agent` });
+                                if (!approved) {
+                                    // User rejected - log result as error and skip execution
+                                    conversationLogger.logToolResult(toolCall.name, toolCall.id, null, false, 'User rejected');
+                                    // Notify UI: tool failed
+                                    if (this.onToolExecutionUpdate) {
+                                        this.onToolExecutionUpdate({
+                                            toolName: toolCall.name,
+                                            status: 'error',
+                                            error: 'User rejected',
+                                            arguments: toolCall.arguments
+                                        });
+                                    }
+                                    toolResults.push({
+                                        tool_call_id: toolCall.id,
+                                        name: toolCall.name,
+                                        result: 'User rejected sub-agent spawn request',
+                                        error: 'User rejected'
+                                    });
+                                    continue;
+                                }
+                            }
                             const result = await this.toolRegistry.execute(toolCall.name, toolCall.arguments, context);
                             if (result.success) {
                                 // Log successful tool result
@@ -1442,6 +1774,16 @@ DO NOT use write_to_file, edit_file, or execute_command until the plan is approv
                             }
                         }
                     }
+                    // STOP AGENT LOOP if shell_input was provided
+                    // Interactive shell input implies handing control back to the shell/user
+                    const hasShellInput = toolCalls.some(tc => tc.name === 'execute_command' && tc.arguments && tc.arguments.shell_input);
+                    if (hasShellInput) {
+                        try {
+                            quickLog(`[${new Date().toISOString()}] [CLI] Input sent to shell. Stopping agent loop to await output.\n`);
+                        }
+                        catch (e) { }
+                        taskCompleted = true;
+                    }
                     // If task_complete was called, stop the agentic loop immediately
                     if (taskCompleted) {
                         // Set the final message: use summary if provided, otherwise use the streamed assistantMessage
@@ -1556,8 +1898,8 @@ DO NOT use write_to_file, edit_file, or execute_command until the plan is approv
                         const silentStopPrompt = '⚠️ **SILENT STOP DETECTED**: You ended your turn without any output or tool calls.\n\n' +
                             '**This is not allowed.** You must either:\n' +
                             '1. Execute a tool call if more work is needed, OR\n' +
-                            '2. Call task_complete() with a summary of what you accomplished\n\n' +
-                            '**If you have completed the task**, call task_complete() NOW with a comprehensive summary.\n' +
+                            '2. Output your response text, then call task_complete()\n\n' +
+                            '**If you have completed the task**, output your summary now, then call task_complete().\n' +
                             '**If more work is needed**, execute the next tool call immediately.';
                         conversationLogger.logSystemPrompt('silent_stop_prompt', silentStopPrompt);
                         this.conversationHistory.push({
@@ -1565,113 +1907,17 @@ DO NOT use write_to_file, edit_file, or execute_command until the plan is approv
                             content: silentStopPrompt,
                         });
                     }
-                    // Case 2: Text-only response (Narration or Summary)
+                    // Case 2: Text-only response - accept it immediately as final
                     else {
-                        const isNarration = /\b(I will|I'll|Let me|Let's|I need to|I'm going to|I should|I can)\b/i.test(assistantMessage);
-                        if (isNarration) {
-                            narrationAttempts++;
-                            conversationLogger.logNarrationDetection('narration', {
-                                turn: turnCount,
-                                narrationAttempts,
-                                maxAttempts: MAX_NARRATION_ATTEMPTS,
-                                messagePreview: assistantMessage.substring(0, 200),
-                            });
-                            // If AI keeps narrating without executing, force completion immediately
-                            if (narrationAttempts >= MAX_NARRATION_ATTEMPTS) {
-                                // Force task completion with error message
-                                conversationLogger.logNarrationDetection('narration', {
-                                    action: 'forced_completion',
-                                    reason: 'max_narration_attempts_reached',
-                                });
-                                finalAssistantMessage = '⚠️ **Task Incomplete**: The AI repeatedly described actions without executing them.\n\n' +
-                                    '**What happened**: The AI entered a narration loop, describing what it wanted to do instead of using tool calls.\n\n' +
-                                    '**Suggestions**:\n' +
-                                    '1. Try rephrasing your request more specifically\n' +
-                                    '2. Break the task into smaller, concrete steps\n' +
-                                    '3. Provide explicit file paths if known\n' +
-                                    '4. Check if the model supports tool calling properly\n\n' +
-                                    '**Last message**: ' + assistantMessage;
-                                break;
-                            }
-                            // First narration attempt - give a strong warning with specific guidance
-                            if (narrationAttempts === 1) {
-                                const completionPrompt = '🛑 **CRITICAL ERROR**: You output text without using tools.\n\n' +
-                                    '**COMMUNICATION RULE VIOLATION**: You can ONLY communicate through:\n' +
-                                    '1. `reason_text` parameter in tool calls\n' +
-                                    '2. `summary` parameter in task_complete tool\n\n' +
-                                    '**Your text output was HIDDEN from the user.**\n\n' +
-                                    '**MANDATORY CORRECTION**:\n' +
-                                    '- If you need to DO something: Call the tool with `reason_text`\n' +
-                                    '- If you are DONE: Call `task_complete(summary="your message")`\n' +
-                                    '- NEVER output plain text - it will be hidden\n\n' +
-                                    '**Example for greeting**:\n' +
-                                    '```\n' +
-                                    '<thought>User said hello, I should greet back</thought>\n' +
-                                    '(Call task_complete with summary="Hello! How can I help you today?")\n' +
-                                    '```\n\n' +
-                                    '**Your NEXT response MUST use tools.**';
-                                this.conversationHistory.push({
-                                    role: 'user',
-                                    content: completionPrompt,
-                                });
-                            }
-                            else {
-                                // Second narration attempt - final warning before forced completion
-                                const completionPrompt = '🚨 **FINAL WARNING** (Attempt ' + narrationAttempts + '/' + MAX_NARRATION_ATTEMPTS + '): You are STILL narrating instead of executing.\n\n' +
-                                    '**This is your LAST chance**:\n' +
-                                    '1. Execute a tool call NOW, or\n' +
-                                    '2. Call task_complete() to end\n\n' +
-                                    'If you output narration text again, the task will be forcibly terminated.';
-                                this.conversationHistory.push({
-                                    role: 'user',
-                                    content: completionPrompt,
-                                });
-                            }
-                        }
-                        else {
-                            // AI output a response without narration - it should finish
-                            // Reset narration counter since this is a valid response
-                            narrationAttempts = 0;
-                            // Check if the message looks like a final answer/summary
-                            // If it has substantial length, assume it's a summary attempt
-                            const isFinalAnswer = assistantMessage.length > 20;
-                            if (isFinalAnswer) {
-                                completionAttempts++;
-                                conversationLogger.logNarrationDetection('final_answer', {
-                                    turn: turnCount,
-                                    completionAttempts,
-                                    messagePreview: assistantMessage.substring(0, 200),
-                                });
-                                // If AI keeps providing text summaries without calling task_complete, accept the text and finish
-                                // This prevents the infinite loop where the AI keeps summarizing in response to our prompt
-                                if (completionAttempts > 1) {
-                                    conversationLogger.logNarrationDetection('final_answer', {
-                                        action: 'accepting_text_as_final',
-                                        reason: 'multiple_completion_attempts',
-                                    });
-                                    finalAssistantMessage = assistantMessage;
-                                    break;
-                                }
-                                // This looks like a final answer - prompt to call task_complete
-                                const completionPrompt = '✅ **Possible Completion Detected**: You provided a text response but did not call `task_complete`.\n\n' +
-                                    '**To finish the conversation, you MUST call the `task_complete` tool.**\n\n' +
-                                    'Please call `task_complete` now with your summary as the argument.';
-                                this.conversationHistory.push({
-                                    role: 'user',
-                                    content: completionPrompt,
-                                });
-                            }
-                            else {
-                                // Short message without clear intent - ask for clarification or completion
-                                const completionPrompt = 'Your response is unclear. Either:\n' +
-                                    '1. Execute the next tool call if more work is needed, or\n' +
-                                    '2. Call task_complete() if the task is done';
-                                this.conversationHistory.push({
-                                    role: 'user',
-                                    content: completionPrompt,
-                                });
-                            }
-                        }
+                        // Log that we're accepting this as a final answer
+                        conversationLogger.logNarrationDetection('final_answer', {
+                            turn: turnCount,
+                            messagePreview: assistantMessage.substring(0, 200),
+                            action: 'accepting_immediately',
+                        });
+                        // Accept the text as the final message and break
+                        finalAssistantMessage = assistantMessage;
+                        break;
                     }
                     // Rebuild messages array with updated history
                     // Backend will inject system prompt
@@ -1687,8 +1933,8 @@ DO NOT use write_to_file, edit_file, or execute_command until the plan is approv
                 const silentStopPrompt = '⚠️ **SILENT STOP DETECTED**: You ended your turn without any output or tool calls.\n\n' +
                     '**This is not allowed.** You must either:\n' +
                     '1. Execute a tool call if more work is needed, OR\n' +
-                    '2. Call task_complete() with a summary of what you accomplished\n\n' +
-                    '**If you have completed the task**, call task_complete() NOW with a comprehensive summary.\n' +
+                    '2. Output your response text, then call task_complete()\n\n' +
+                    '**If you have completed the task**, output your summary now, then call task_complete().\n' +
                     '**If more work is needed**, execute the next tool call immediately.';
                 this.conversationHistory.push({
                     role: 'user',
@@ -1741,8 +1987,14 @@ DO NOT use write_to_file, edit_file, or execute_command until the plan is approv
         catch (error) {
             // Log the error
             conversationLogger.logError('handleMessage', error);
-            // Check if this was an abort/cancellation
-            if (error.name === 'AbortError' || error.message?.includes('aborted')) {
+            // Check if this was an abort/cancellation (including timeout errors from aborted requests)
+            if (error.name === 'AbortError' || error.message?.includes('aborted') || error.message?.includes('timed out') || this.requestIntentionallyAborted) {
+                // If intentionally aborted for replacement by new message, return silently
+                // The new message will take over - no need to show cancellation message
+                if (this.requestIntentionallyAborted) {
+                    this.requestIntentionallyAborted = false;
+                    return;
+                }
                 conversationLogger.logError('handleMessage', new Error('Request cancelled by user'));
                 if (this.onResponseCallback) {
                     this.onResponseCallback('⚠️ Request cancelled by user.');
@@ -2092,7 +2344,7 @@ Start by listing the directory structure to understand what you're working with.
                         '• Work silently without narrating actions\n' +
                         '• Use Touch-First safety (never guess file paths)\n' +
                         '• Apply surgical precision to file edits\n' +
-                        '• Call task_complete when done with comprehensive summary\n' +
+                        '• Output summary text, then call task_complete() when done\n' +
                         '• Inject intelligent error recovery hints\n\n' +
                         'This is the industry-standard autonomous agent mode.'
                     : '⚠️  Autonomous Mode disabled\n\n' +
@@ -2154,43 +2406,126 @@ Start by listing the directory structure to understand what you're working with.
                         `Model: ${config.model || 'gemini-2.5-flash (default)'}\n` +
                         `Enhanced Quality: ${config.enhancedQuality !== false ? '✅ Enabled' : '❌ Disabled'}\n` +
                         `Autonomous Mode: ${config.autonomousMode === true ? '✅ Enabled' : '❌ Disabled'}\n` +
+                        `AI Auto-Suggest: ${config.aiAutoSuggest === true ? '✅ Enabled' : '❌ Disabled'}\n` +
                         `Authentication: ${apiClient.isAuthenticated() ? '✅ Signed in' : '❌ Not signed in'}`;
                 }
                 break;
+            case 'settings':
+                if (args.length >= 2 && args[0].toLowerCase() === 'auto-suggest') {
+                    // Handle /settings auto-suggest <on/off>
+                    const value = args[1].toLowerCase();
+                    if (value === 'on') {
+                        this.configManager.set('aiAutoSuggest', true);
+                        responseMessage = '✅ **AI Auto-Suggestions Enabled**\n\n' +
+                            'From now on, I will suggest commands after 5 seconds of inactivity.\n' +
+                            'Suggestions will appear in grey text. Use the **Right Arrow** key to accept them.';
+                    }
+                    else if (value === 'off') {
+                        this.configManager.set('aiAutoSuggest', false);
+                        responseMessage = '✅ **AI Auto-Suggestions Disabled**\n\n' +
+                            'I will no longer provide AI-powered command suggestions.';
+                    }
+                    else {
+                        responseMessage = '❌ Invalid option. Usage: `/settings auto-suggest on` or `/settings auto-suggest off`';
+                    }
+                }
+                else {
+                    responseMessage = '❌ Invalid command format.\n\nUsage:\n- `/settings auto-suggest on`\n- `/settings auto-suggest off`';
+                }
+                break;
             case 'model':
-                if (args.length === 0) {
-                    // Show picker for model selection
+            case 'models':
+                // Handle subcommands: local, cloud
+                const modelSubCommand = args[0]?.toLowerCase();
+                if (modelSubCommand === 'local') {
+                    // Local Ollama models
+                    try {
+                        // Check if Ollama is running
+                        const status = await ollamaService.isOllamaRunning();
+                        if (!status.available) {
+                            responseMessage = `❌ Cannot connect to Ollama
+${status.error || 'Ollama is not running.'}
+To use local models:
+1. Install Ollama from: https://ollama.ai
+2. Start Ollama by running: ollama serve
+3. Pull a model: ollama pull llama3
+Then try /models local again.`;
+                            break;
+                        }
+                        // Get available local models
+                        const localModels = await ollamaService.getLocalModels();
+                        if (localModels.length === 0) {
+                            responseMessage = `📭 No local models found
+Ollama is running (v${status.version}) but no models are downloaded.
+To download models, run:
+  ollama pull llama3
+  ollama pull codellama
+  ollama pull mistral
+Then try /models local again.`;
+                            break;
+                        }
+                        // Show picker for local model selection
+                        if (this.onShowPickerCallback) {
+                            const config = this.configManager.load();
+                            const currentModelName = config.modelName || '';
+                            const isCurrentLocal = config.isLocalModel === true;
+                            this.onShowPickerCallback({
+                                message: 'Select Local Model (Ollama)',
+                                type: 'local-model', // Cast to bypass type check, will be handled in handlePickerSelection
+                                choices: localModels.map((model) => {
+                                    const size = OllamaService.formatModelSize(model.size);
+                                    const isCurrent = isCurrentLocal && currentModelName === model.name;
+                                    const supportsTools = OllamaService.modelSupportsTools(model.name);
+                                    const toolsBadge = supportsTools ? ' [Tools]' : '';
+                                    return {
+                                        label: `${model.name} (${size})${toolsBadge}${isCurrent ? ' [CURRENT]' : ''}`,
+                                        value: model.name
+                                    };
+                                })
+                            });
+                            return; // Don't send a text response, picker will handle it
+                        }
+                    }
+                    catch (error) {
+                        responseMessage = OllamaService.getHelpfulErrorMessage(error);
+                    }
+                    break;
+                }
+                if (modelSubCommand === 'cloud' || args.length === 0) {
+                    // Cloud models (default behavior when no subcommand or 'cloud' specified)
                     if (this.onShowPickerCallback) {
                         const config = this.configManager.load();
                         const currentModelName = config.modelName || '';
+                        const isCurrentCloud = config.isLocalModel !== true;
                         // Fetch models from backend
                         const modelsConfig = await fetchModelsConfig();
                         this.onShowPickerCallback({
-                            message: 'Select Model',
+                            message: '☁️ Select Cloud Model',
                             type: 'model',
-                            choices: modelsConfig.models.map((modelConfig, index) => ({
-                                label: `${modelConfig.name} - ${modelConfig.description}${currentModelName === modelConfig.name ? ' [CURRENT]' : ''}`,
-                                value: `${index}` // Use index as unique identifier
-                            }))
+                            choices: modelsConfig.models.map((modelConfig, index) => {
+                                const isCurrent = isCurrentCloud && currentModelName === modelConfig.name;
+                                return {
+                                    label: `${modelConfig.name} - ${modelConfig.description}${isCurrent ? ' [CURRENT]' : ''}`,
+                                    value: `${index}` // Use index as unique identifier
+                                };
+                            })
                         });
                         return; // Don't send a text response, picker will handle it
                     }
                 }
                 else {
-                    // Direct set via command argument
-                    const newModel = args.join(' ');
-                    // Validate model (async)
-                    if (!(await isValidModel(newModel))) {
-                        responseMessage = `❌ ${await getInvalidModelError(newModel)}`;
-                        break;
-                    }
-                    try {
-                        this.configManager.set('model', newModel);
-                        responseMessage = `✅ Model changed to: ${newModel}`;
-                    }
-                    catch (error) {
-                        responseMessage = `❌ Failed to set model: ${error.message}`;
-                    }
+                    // Unrecognized subcommand - show help
+                    responseMessage = `Usage: /models [local|cloud]
+  /models local  - Select from locally installed Ollama models
+  /models cloud  - Select from cloud models (Centaurus backend)
+  /models        - Default: show cloud models`;
                 }
                 break;
             case 'mcp':
@@ -2203,7 +2538,7 @@ Start by listing the directory structure to understand what you're working with.
                 break;
             case 'docs':
                 // Open documentation URL in default browser
-                const docsUrl = 'https://www.centauruslabs.in/docs';
+                const docsUrl = 'https://centauruslabs.in/docs';
                 const { exec } = await import('child_process');
                 const platform = process.platform;
                 if (platform === 'win32') {
@@ -3034,6 +3369,23 @@ Start by listing the directory structure to understand what you're working with.
                 this.onCwdChange(chat.cwd);
             }
         }
+        // Reset context limit state when loading a chat
+        // We'll recalculate it based on the loaded conversation
+        if (this.contextLimitReached) {
+            this.contextLimitReached = false;
+            if (this.onContextLimitReached) {
+                this.onContextLimitReached(false);
+            }
+        }
+        // Update token count to reflect loaded conversation
+        // This will also check if the loaded chat is near the limit
+        this.updateTokenCount().catch(err => {
+            quickLog(`[${new Date().toISOString()}] [loadChatFromPicker] Failed to update token count: ${err}\n`);
+        });
+        // Clean up any orphaned tool calls from previous interrupted sessions
+        // This prevents "improperly formed request" errors when continuing conversations
+        this.cleanupOrphanedToolCalls();
+        quickLog(`[${new Date().toISOString()}] [loadChat] Cleaned up conversation history after load\n`);
         return true;
     }
     /**
@@ -3202,6 +3554,17 @@ Start by listing the directory structure to understand what you're working with.
         this.uiMessageHistory = [];
         this.localCwdBeforeRemote = null;
         this.lastConnectionCommand = null;
+        // Reset context limit state
+        if (this.contextLimitReached) {
+            this.contextLimitReached = false;
+            if (this.onContextLimitReached) {
+                this.onContextLimitReached(false);
+            }
+        }
+        // Update token count to reflect empty conversation
+        this.updateTokenCount().catch(err => {
+            quickLog(`[${new Date().toISOString()}] [startNewChat] Failed to update token count: ${err}\n`);
+        });
     }
     /**
      * Update UI message history (called from App.tsx via callback)