PyPI - code-puppy - Versions diffs - 0.0.214__py3-none-any.whl → 0.0.366__py3-none-any.whl - Mend

code-puppy 0.0.214py3-none-any.whl → 0.0.366py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (231) hide show

code_puppy/__init__.py +7 -1
code_puppy/agents/__init__.py +2 -0
code_puppy/agents/agent_c_reviewer.py +59 -6
code_puppy/agents/agent_code_puppy.py +7 -1
code_puppy/agents/agent_code_reviewer.py +12 -2
code_puppy/agents/agent_cpp_reviewer.py +73 -6
code_puppy/agents/agent_creator_agent.py +45 -4
code_puppy/agents/agent_golang_reviewer.py +92 -3
code_puppy/agents/agent_javascript_reviewer.py +101 -8
code_puppy/agents/agent_manager.py +81 -4
code_puppy/agents/agent_pack_leader.py +383 -0
code_puppy/agents/agent_planning.py +163 -0
code_puppy/agents/agent_python_programmer.py +165 -0
code_puppy/agents/agent_python_reviewer.py +28 -6
code_puppy/agents/agent_qa_expert.py +98 -6
code_puppy/agents/agent_qa_kitten.py +12 -7
code_puppy/agents/agent_security_auditor.py +113 -3
code_puppy/agents/agent_terminal_qa.py +323 -0
code_puppy/agents/agent_typescript_reviewer.py +106 -7
code_puppy/agents/base_agent.py +802 -176
code_puppy/agents/event_stream_handler.py +350 -0
code_puppy/agents/pack/__init__.py +34 -0
code_puppy/agents/pack/bloodhound.py +304 -0
code_puppy/agents/pack/husky.py +321 -0
code_puppy/agents/pack/retriever.py +393 -0
code_puppy/agents/pack/shepherd.py +348 -0
code_puppy/agents/pack/terrier.py +287 -0
code_puppy/agents/pack/watchdog.py +367 -0
code_puppy/agents/prompt_reviewer.py +145 -0
code_puppy/agents/subagent_stream_handler.py +276 -0
code_puppy/api/__init__.py +13 -0
code_puppy/api/app.py +169 -0
code_puppy/api/main.py +21 -0
code_puppy/api/pty_manager.py +446 -0
code_puppy/api/routers/__init__.py +12 -0
code_puppy/api/routers/agents.py +36 -0
code_puppy/api/routers/commands.py +217 -0
code_puppy/api/routers/config.py +74 -0
code_puppy/api/routers/sessions.py +232 -0
code_puppy/api/templates/terminal.html +361 -0
code_puppy/api/websocket.py +154 -0
code_puppy/callbacks.py +142 -4
code_puppy/chatgpt_codex_client.py +283 -0
code_puppy/claude_cache_client.py +586 -0
code_puppy/cli_runner.py +916 -0
code_puppy/command_line/add_model_menu.py +1079 -0
code_puppy/command_line/agent_menu.py +395 -0
code_puppy/command_line/attachments.py +10 -5
code_puppy/command_line/autosave_menu.py +605 -0
code_puppy/command_line/clipboard.py +527 -0
code_puppy/command_line/colors_menu.py +520 -0
code_puppy/command_line/command_handler.py +176 -738
code_puppy/command_line/command_registry.py +150 -0
code_puppy/command_line/config_commands.py +715 -0
code_puppy/command_line/core_commands.py +792 -0
code_puppy/command_line/diff_menu.py +863 -0
code_puppy/command_line/load_context_completion.py +15 -22
code_puppy/command_line/mcp/base.py +0 -3
code_puppy/command_line/mcp/catalog_server_installer.py +175 -0
code_puppy/command_line/mcp/custom_server_form.py +688 -0
code_puppy/command_line/mcp/custom_server_installer.py +195 -0
code_puppy/command_line/mcp/edit_command.py +148 -0
code_puppy/command_line/mcp/handler.py +9 -4
code_puppy/command_line/mcp/help_command.py +6 -5
code_puppy/command_line/mcp/install_command.py +15 -26
code_puppy/command_line/mcp/install_menu.py +685 -0
code_puppy/command_line/mcp/list_command.py +2 -2
code_puppy/command_line/mcp/logs_command.py +174 -65
code_puppy/command_line/mcp/remove_command.py +2 -2
code_puppy/command_line/mcp/restart_command.py +12 -4
code_puppy/command_line/mcp/search_command.py +16 -10
code_puppy/command_line/mcp/start_all_command.py +18 -6
code_puppy/command_line/mcp/start_command.py +47 -25
code_puppy/command_line/mcp/status_command.py +4 -5
code_puppy/command_line/mcp/stop_all_command.py +7 -1
code_puppy/command_line/mcp/stop_command.py +8 -4
code_puppy/command_line/mcp/test_command.py +2 -2
code_puppy/command_line/mcp/wizard_utils.py +20 -16
code_puppy/command_line/mcp_completion.py +174 -0
code_puppy/command_line/model_picker_completion.py +75 -25
code_puppy/command_line/model_settings_menu.py +884 -0
code_puppy/command_line/motd.py +14 -8
code_puppy/command_line/onboarding_slides.py +179 -0
code_puppy/command_line/onboarding_wizard.py +340 -0
code_puppy/command_line/pin_command_completion.py +329 -0
code_puppy/command_line/prompt_toolkit_completion.py +463 -63
code_puppy/command_line/session_commands.py +296 -0
code_puppy/command_line/utils.py +54 -0
code_puppy/config.py +898 -112
code_puppy/error_logging.py +118 -0
code_puppy/gemini_code_assist.py +385 -0
code_puppy/gemini_model.py +602 -0
code_puppy/http_utils.py +210 -148
code_puppy/keymap.py +128 -0
code_puppy/main.py +5 -698
code_puppy/mcp_/__init__.py +17 -0
code_puppy/mcp_/async_lifecycle.py +35 -4
code_puppy/mcp_/blocking_startup.py +70 -43
code_puppy/mcp_/captured_stdio_server.py +2 -2
code_puppy/mcp_/config_wizard.py +4 -4
code_puppy/mcp_/dashboard.py +15 -6
code_puppy/mcp_/managed_server.py +65 -38
code_puppy/mcp_/manager.py +146 -52
code_puppy/mcp_/mcp_logs.py +224 -0
code_puppy/mcp_/registry.py +6 -6
code_puppy/mcp_/server_registry_catalog.py +24 -5
code_puppy/messaging/__init__.py +199 -2
code_puppy/messaging/bus.py +610 -0
code_puppy/messaging/commands.py +167 -0
code_puppy/messaging/markdown_patches.py +57 -0
code_puppy/messaging/message_queue.py +17 -48
code_puppy/messaging/messages.py +500 -0
code_puppy/messaging/queue_console.py +1 -24
code_puppy/messaging/renderers.py +43 -146
code_puppy/messaging/rich_renderer.py +1027 -0
code_puppy/messaging/spinner/__init__.py +21 -5
code_puppy/messaging/spinner/console_spinner.py +86 -51
code_puppy/messaging/subagent_console.py +461 -0
code_puppy/model_factory.py +634 -83
code_puppy/model_utils.py +167 -0
code_puppy/models.json +66 -68
code_puppy/models_dev_api.json +1 -0
code_puppy/models_dev_parser.py +592 -0
code_puppy/plugins/__init__.py +164 -10
code_puppy/plugins/antigravity_oauth/__init__.py +10 -0
code_puppy/plugins/antigravity_oauth/accounts.py +406 -0
code_puppy/plugins/antigravity_oauth/antigravity_model.py +704 -0
code_puppy/plugins/antigravity_oauth/config.py +42 -0
code_puppy/plugins/antigravity_oauth/constants.py +136 -0
code_puppy/plugins/antigravity_oauth/oauth.py +478 -0
code_puppy/plugins/antigravity_oauth/register_callbacks.py +406 -0
code_puppy/plugins/antigravity_oauth/storage.py +271 -0
code_puppy/plugins/antigravity_oauth/test_plugin.py +319 -0
code_puppy/plugins/antigravity_oauth/token.py +167 -0
code_puppy/plugins/antigravity_oauth/transport.py +767 -0
code_puppy/plugins/antigravity_oauth/utils.py +169 -0
code_puppy/plugins/chatgpt_oauth/__init__.py +8 -0
code_puppy/plugins/chatgpt_oauth/config.py +52 -0
code_puppy/plugins/chatgpt_oauth/oauth_flow.py +328 -0
code_puppy/plugins/chatgpt_oauth/register_callbacks.py +94 -0
code_puppy/plugins/chatgpt_oauth/test_plugin.py +293 -0
code_puppy/plugins/chatgpt_oauth/utils.py +489 -0
code_puppy/plugins/claude_code_oauth/README.md +167 -0
code_puppy/plugins/claude_code_oauth/SETUP.md +93 -0
code_puppy/plugins/claude_code_oauth/__init__.py +6 -0
code_puppy/plugins/claude_code_oauth/config.py +50 -0
code_puppy/plugins/claude_code_oauth/register_callbacks.py +308 -0
code_puppy/plugins/claude_code_oauth/test_plugin.py +283 -0
code_puppy/plugins/claude_code_oauth/utils.py +518 -0
code_puppy/plugins/customizable_commands/__init__.py +0 -0
code_puppy/plugins/customizable_commands/register_callbacks.py +169 -0
code_puppy/plugins/example_custom_command/README.md +280 -0
code_puppy/plugins/example_custom_command/register_callbacks.py +2 -2
code_puppy/plugins/file_permission_handler/__init__.py +4 -0
code_puppy/plugins/file_permission_handler/register_callbacks.py +523 -0
code_puppy/plugins/frontend_emitter/__init__.py +25 -0
code_puppy/plugins/frontend_emitter/emitter.py +121 -0
code_puppy/plugins/frontend_emitter/register_callbacks.py +261 -0
code_puppy/plugins/oauth_puppy_html.py +228 -0
code_puppy/plugins/shell_safety/__init__.py +6 -0
code_puppy/plugins/shell_safety/agent_shell_safety.py +69 -0
code_puppy/plugins/shell_safety/command_cache.py +156 -0
code_puppy/plugins/shell_safety/register_callbacks.py +202 -0
code_puppy/prompts/antigravity_system_prompt.md +1 -0
code_puppy/prompts/codex_system_prompt.md +310 -0
code_puppy/pydantic_patches.py +131 -0
code_puppy/reopenable_async_client.py +8 -8
code_puppy/round_robin_model.py +9 -12
code_puppy/session_storage.py +2 -1
code_puppy/status_display.py +21 -4
code_puppy/summarization_agent.py +41 -13
code_puppy/terminal_utils.py +418 -0
code_puppy/tools/__init__.py +37 -1
code_puppy/tools/agent_tools.py +536 -52
code_puppy/tools/browser/__init__.py +37 -0
code_puppy/tools/browser/browser_control.py +19 -23
code_puppy/tools/browser/browser_interactions.py +41 -48
code_puppy/tools/browser/browser_locators.py +36 -38
code_puppy/tools/browser/browser_manager.py +316 -0
code_puppy/tools/browser/browser_navigation.py +16 -16
code_puppy/tools/browser/browser_screenshot.py +79 -143
code_puppy/tools/browser/browser_scripts.py +32 -42
code_puppy/tools/browser/browser_workflows.py +44 -27
code_puppy/tools/browser/chromium_terminal_manager.py +259 -0
code_puppy/tools/browser/terminal_command_tools.py +521 -0
code_puppy/tools/browser/terminal_screenshot_tools.py +556 -0
code_puppy/tools/browser/terminal_tools.py +525 -0
code_puppy/tools/command_runner.py +930 -147
code_puppy/tools/common.py +1113 -5
code_puppy/tools/display.py +84 -0
code_puppy/tools/file_modifications.py +288 -89
code_puppy/tools/file_operations.py +226 -154
code_puppy/tools/subagent_context.py +158 -0
code_puppy/uvx_detection.py +242 -0
code_puppy/version_checker.py +30 -11
code_puppy-0.0.366.data/data/code_puppy/models.json +110 -0
code_puppy-0.0.366.data/data/code_puppy/models_dev_api.json +1 -0
{code_puppy-0.0.214.dist-info → code_puppy-0.0.366.dist-info}/METADATA +149 -75
code_puppy-0.0.366.dist-info/RECORD +217 -0
{code_puppy-0.0.214.dist-info → code_puppy-0.0.366.dist-info}/WHEEL +1 -1
code_puppy/command_line/mcp/add_command.py +0 -183
code_puppy/messaging/spinner/textual_spinner.py +0 -106
code_puppy/tools/browser/camoufox_manager.py +0 -216
code_puppy/tools/browser/vqa_agent.py +0 -70
code_puppy/tui/__init__.py +0 -10
code_puppy/tui/app.py +0 -1105
code_puppy/tui/components/__init__.py +0 -21
code_puppy/tui/components/chat_view.py +0 -551
code_puppy/tui/components/command_history_modal.py +0 -218
code_puppy/tui/components/copy_button.py +0 -139
code_puppy/tui/components/custom_widgets.py +0 -63
code_puppy/tui/components/human_input_modal.py +0 -175
code_puppy/tui/components/input_area.py +0 -167
code_puppy/tui/components/sidebar.py +0 -309
code_puppy/tui/components/status_bar.py +0 -185
code_puppy/tui/messages.py +0 -27
code_puppy/tui/models/__init__.py +0 -8
code_puppy/tui/models/chat_message.py +0 -25
code_puppy/tui/models/command_history.py +0 -89
code_puppy/tui/models/enums.py +0 -24
code_puppy/tui/screens/__init__.py +0 -17
code_puppy/tui/screens/autosave_picker.py +0 -175
code_puppy/tui/screens/help.py +0 -130
code_puppy/tui/screens/mcp_install_wizard.py +0 -803
code_puppy/tui/screens/settings.py +0 -306
code_puppy/tui/screens/tools.py +0 -74
code_puppy/tui_state.py +0 -55
code_puppy-0.0.214.data/data/code_puppy/models.json +0 -112
code_puppy-0.0.214.dist-info/RECORD +0 -131
{code_puppy-0.0.214.dist-info → code_puppy-0.0.366.dist-info}/entry_points.txt +0 -0
{code_puppy-0.0.214.dist-info → code_puppy-0.0.366.dist-info}/licenses/LICENSE +0 -0

code_puppy/agents/base_agent.py CHANGED Viewed

@@ -4,9 +4,21 @@ import asyncio
 import json
 import math
 import signal
+import threading
 import uuid
 from abc import ABC, abstractmethod
-from typing import Any, Dict, List, Optional, Sequence, Set, Tuple, Union
+from typing import (
+    Any,
+    Callable,
+    Dict,
+    List,
+    Optional,
+    Sequence,
+    Set,
+    Tuple,
+    Type,
+    Union,
+)
 import mcp
 import pydantic
@@ -26,13 +38,15 @@ from pydantic_ai.messages import (
     ModelMessage,
     ModelRequest,
     TextPart,
+    ThinkingPart,
     ToolCallPart,
     ToolCallPartDelta,
     ToolReturn,
     ToolReturnPart,
 )
-from pydantic_ai.models.openai import OpenAIChatModelSettings
-from pydantic_ai.settings import ModelSettings
+from rich.text import Text
+from code_puppy.agents.event_stream_handler import event_stream_handler
 # Consolidated relative imports
 from code_puppy.config import (
@@ -41,26 +55,31 @@ from code_puppy.config import (
     get_compaction_threshold,
     get_global_model_name,
     get_message_limit,
-    get_openai_reasoning_effort,
     get_protected_token_count,
     get_use_dbos,
     get_value,
-    load_mcp_server_configs,
 )
-from code_puppy.mcp_ import ServerConfig, get_mcp_manager
+from code_puppy.error_logging import log_error
+from code_puppy.keymap import cancel_agent_uses_signal, get_cancel_agent_char_code
+from code_puppy.mcp_ import get_mcp_manager
 from code_puppy.messaging import (
     emit_error,
     emit_info,
-    emit_system_message,
     emit_warning,
 )
 from code_puppy.messaging.spinner import (
     SpinnerBase,
     update_spinner_context,
 )
-from code_puppy.model_factory import ModelFactory
+from code_puppy.model_factory import ModelFactory, make_model_settings
 from code_puppy.summarization_agent import run_summarization_sync
-from code_puppy.tools.common import console
+from code_puppy.tools.agent_tools import _active_subagent_tasks
+from code_puppy.tools.command_runner import (
+    is_awaiting_user_input,
+)
+# Global flag to track delayed compaction requests
+_delayed_compaction_requested = False
 _reload_count = 0
@@ -78,6 +97,9 @@ class BaseAgent(ABC):
         # Puppy rules loaded lazily
         self._puppy_rules: Optional[str] = None
         self.cur_model: pydantic_ai.models.Model
+        # Cache for MCP tool definitions (for token estimation)
+        # This is populated after the first successful run when MCP tools are retrieved
+        self._mcp_tool_definitions_cache: List[Dict[str, Any]] = []
     @property
     @abstractmethod
@@ -334,6 +356,173 @@ class BaseAgent(ABC):
         return max(1, total_tokens)
+    def estimate_context_overhead_tokens(self) -> int:
+        """
+        Estimate the token overhead from system prompt and tool definitions.
+        This accounts for tokens that are always present in the context:
+        - System prompt (for non-Claude-Code models)
+        - Tool definitions (name, description, parameter schema)
+        - MCP tool definitions
+        Note: For Claude Code models, the system prompt is prepended to the first
+        user message, so it's already counted in the message history tokens.
+        We only count the short fixed instructions for Claude Code models.
+        """
+        total_tokens = 0
+        # 1. Estimate tokens for system prompt / instructions
+        # For Claude Code models, the full system prompt is prepended to the first
+        # user message (already in message history), so we only count the short
+        # fixed instructions. For other models, count the full system prompt.
+        try:
+            from code_puppy.model_utils import (
+                get_antigravity_instructions,
+                get_chatgpt_codex_instructions,
+                get_claude_code_instructions,
+                is_antigravity_model,
+                is_chatgpt_codex_model,
+                is_claude_code_model,
+            )
+            model_name = (
+                self.get_model_name() if hasattr(self, "get_model_name") else ""
+            )
+            if is_claude_code_model(model_name):
+                # For Claude Code models, only count the short fixed instructions
+                # The full system prompt is already in the message history
+                instructions = get_claude_code_instructions()
+                total_tokens += self.estimate_token_count(instructions)
+            elif is_chatgpt_codex_model(model_name):
+                # For ChatGPT Codex models, only count the short fixed instructions
+                # The full system prompt is already in the message history
+                instructions = get_chatgpt_codex_instructions()
+                total_tokens += self.estimate_token_count(instructions)
+            elif is_antigravity_model(model_name):
+                # For Antigravity models, only count the short fixed instructions
+                # The full system prompt is already in the message history
+                instructions = get_antigravity_instructions()
+                total_tokens += self.estimate_token_count(instructions)
+            else:
+                # For other models, count the full system prompt
+                system_prompt = self.get_system_prompt()
+                if system_prompt:
+                    total_tokens += self.estimate_token_count(system_prompt)
+        except Exception:
+            pass  # If we can't get system prompt, skip it
+        # 2. Estimate tokens for pydantic_agent tool definitions
+        pydantic_agent = getattr(self, "pydantic_agent", None)
+        if pydantic_agent:
+            tools = getattr(pydantic_agent, "_tools", None)
+            if tools and isinstance(tools, dict):
+                for tool_name, tool_func in tools.items():
+                    try:
+                        # Estimate tokens from tool name
+                        total_tokens += self.estimate_token_count(tool_name)
+                        # Estimate tokens from tool description
+                        description = getattr(tool_func, "__doc__", None) or ""
+                        if description:
+                            total_tokens += self.estimate_token_count(description)
+                        # Estimate tokens from parameter schema
+                        # Tools may have a schema attribute or we can try to get it from annotations
+                        schema = getattr(tool_func, "schema", None)
+                        if schema:
+                            schema_str = (
+                                json.dumps(schema)
+                                if isinstance(schema, dict)
+                                else str(schema)
+                            )
+                            total_tokens += self.estimate_token_count(schema_str)
+                        else:
+                            # Try to get schema from function annotations
+                            annotations = getattr(tool_func, "__annotations__", None)
+                            if annotations:
+                                total_tokens += self.estimate_token_count(
+                                    str(annotations)
+                                )
+                    except Exception:
+                        continue  # Skip tools we can't process
+        # 3. Estimate tokens for MCP tool definitions from cache
+        # MCP tools are fetched asynchronously, so we use a cache that's populated
+        # after the first successful run. See _update_mcp_tool_cache() method.
+        mcp_tool_cache = getattr(self, "_mcp_tool_definitions_cache", [])
+        if mcp_tool_cache:
+            for tool_def in mcp_tool_cache:
+                try:
+                    # Estimate tokens from tool name
+                    tool_name = tool_def.get("name", "")
+                    if tool_name:
+                        total_tokens += self.estimate_token_count(tool_name)
+                    # Estimate tokens from tool description
+                    description = tool_def.get("description", "")
+                    if description:
+                        total_tokens += self.estimate_token_count(description)
+                    # Estimate tokens from parameter schema (inputSchema)
+                    input_schema = tool_def.get("inputSchema")
+                    if input_schema:
+                        schema_str = (
+                            json.dumps(input_schema)
+                            if isinstance(input_schema, dict)
+                            else str(input_schema)
+                        )
+                        total_tokens += self.estimate_token_count(schema_str)
+                except Exception:
+                    continue  # Skip tools we can't process
+        return total_tokens
+    async def _update_mcp_tool_cache(self) -> None:
+        """
+        Update the MCP tool definitions cache by fetching tools from running MCP servers.
+        This should be called after a successful run to populate the cache for
+        accurate token estimation in subsequent runs.
+        """
+        mcp_servers = getattr(self, "_mcp_servers", None)
+        if not mcp_servers:
+            return
+        tool_definitions = []
+        for mcp_server in mcp_servers:
+            try:
+                # Check if the server has list_tools method (pydantic-ai MCP servers)
+                if hasattr(mcp_server, "list_tools"):
+                    # list_tools() returns list[mcp_types.Tool]
+                    tools = await mcp_server.list_tools()
+                    for tool in tools:
+                        tool_def = {
+                            "name": getattr(tool, "name", ""),
+                            "description": getattr(tool, "description", ""),
+                            "inputSchema": getattr(tool, "inputSchema", {}),
+                        }
+                        tool_definitions.append(tool_def)
+            except Exception:
+                # Server might not be running or accessible, skip it
+                continue
+        self._mcp_tool_definitions_cache = tool_definitions
+    def update_mcp_tool_cache_sync(self) -> None:
+        """
+        Synchronously clear the MCP tool cache.
+        This clears the cache so that token counts will be recalculated on the next
+        agent run. Call this after starting/stopping MCP servers.
+        Note: We don't try to fetch tools synchronously because MCP servers require
+        async context management that doesn't work well from sync code. The cache
+        will be repopulated on the next successful agent run.
+        """
+        # Simply clear the cache - it will be repopulated on the next agent run
+        # This is safer than trying to call async methods from sync context
+        self._mcp_tool_definitions_cache = []
     def _is_tool_call_part(self, part: Any) -> bool:
         if isinstance(part, (ToolCallPart, ToolCallPartDelta)):
             return True
@@ -516,6 +705,98 @@ class BaseAgent(ABC):
             # Be safe; don't blow up status/compaction if model lookup fails
             return 128000
+    def has_pending_tool_calls(self, messages: List[ModelMessage]) -> bool:
+        """
+        Check if there are any pending tool calls in the message history.
+        A pending tool call is one that has a ToolCallPart without a corresponding
+        ToolReturnPart. This indicates the model is still waiting for tool execution.
+        Returns:
+            True if there are pending tool calls, False otherwise
+        """
+        if not messages:
+            return False
+        tool_call_ids: Set[str] = set()
+        tool_return_ids: Set[str] = set()
+        # Collect all tool call and return IDs
+        for msg in messages:
+            for part in getattr(msg, "parts", []) or []:
+                tool_call_id = getattr(part, "tool_call_id", None)
+                if not tool_call_id:
+                    continue
+                if part.part_kind == "tool-call":
+                    tool_call_ids.add(tool_call_id)
+                elif part.part_kind == "tool-return":
+                    tool_return_ids.add(tool_call_id)
+        # Pending tool calls are those without corresponding returns
+        pending_calls = tool_call_ids - tool_return_ids
+        return len(pending_calls) > 0
+    def request_delayed_compaction(self) -> None:
+        """
+        Request that compaction be attempted after the current tool calls complete.
+        This sets a global flag that will be checked during the next message
+        processing cycle to trigger compaction when it's safe to do so.
+        """
+        global _delayed_compaction_requested
+        _delayed_compaction_requested = True
+        emit_info(
+            "🔄 Delayed compaction requested - will attempt after tool calls complete",
+            message_group="token_context_status",
+        )
+    def should_attempt_delayed_compaction(self) -> bool:
+        """
+        Check if delayed compaction was requested and it's now safe to proceed.
+        Returns:
+            True if delayed compaction was requested and no tool calls are pending
+        """
+        global _delayed_compaction_requested
+        if not _delayed_compaction_requested:
+            return False
+        # Check if it's now safe to compact
+        messages = self.get_message_history()
+        if not self.has_pending_tool_calls(messages):
+            _delayed_compaction_requested = False  # Reset the flag
+            return True
+        return False
+    def get_pending_tool_call_count(self, messages: List[ModelMessage]) -> int:
+        """
+        Get the count of pending tool calls for debugging purposes.
+        Returns:
+            Number of tool calls waiting for execution
+        """
+        if not messages:
+            return 0
+        tool_call_ids: Set[str] = set()
+        tool_return_ids: Set[str] = set()
+        for msg in messages:
+            for part in getattr(msg, "parts", []) or []:
+                tool_call_id = getattr(part, "tool_call_id", None)
+                if not tool_call_id:
+                    continue
+                if part.part_kind == "tool-call":
+                    tool_call_ids.add(tool_call_id)
+                elif part.part_kind == "tool-return":
+                    tool_return_ids.add(tool_call_id)
+        pending_calls = tool_call_ids - tool_return_ids
+        return len(pending_calls)
     def prune_interrupted_tool_calls(
         self, messages: List[ModelMessage]
     ) -> List[ModelMessage]:
@@ -570,35 +851,16 @@ class BaseAgent(ABC):
         # First, prune any interrupted/mismatched tool-call conversations
         model_max = self.get_model_context_length()
-        total_current_tokens = sum(
-            self.estimate_tokens_for_message(msg) for msg in messages
-        )
+        message_tokens = sum(self.estimate_tokens_for_message(msg) for msg in messages)
+        context_overhead = self.estimate_context_overhead_tokens()
+        total_current_tokens = message_tokens + context_overhead
         proportion_used = total_current_tokens / model_max
-        # Check if we're in TUI mode and can update the status bar
-        from code_puppy.tui_state import get_tui_app_instance, is_tui_mode
         context_summary = SpinnerBase.format_context_info(
             total_current_tokens, model_max, proportion_used
         )
         update_spinner_context(context_summary)
-        if is_tui_mode():
-            tui_app = get_tui_app_instance()
-            if tui_app:
-                try:
-                    # Update the status bar instead of emitting a chat message
-                    status_bar = tui_app.query_one("StatusBar")
-                    status_bar.update_token_info(
-                        total_current_tokens, model_max, proportion_used
-                    )
-                except Exception as e:
-                    emit_error(e)
-            else:
-                emit_info(
-                    f"Final token count after processing: {total_current_tokens}",
-                    message_group="token_context_status",
-                )
         # Get the configured compaction threshold
         compaction_threshold = get_compaction_threshold()
@@ -606,6 +868,21 @@ class BaseAgent(ABC):
         compaction_strategy = get_compaction_strategy()
         if proportion_used > compaction_threshold:
+            # RACE CONDITION PROTECTION: Check for pending tool calls before summarization
+            if compaction_strategy == "summarization" and self.has_pending_tool_calls(
+                messages
+            ):
+                pending_count = self.get_pending_tool_call_count(messages)
+                emit_warning(
+                    f"⚠️  Summarization deferred: {pending_count} pending tool call(s) detected. "
+                    "Waiting for tool execution to complete before compaction.",
+                    message_group="token_context_status",
+                )
+                # Request delayed compaction for when tool calls complete
+                self.request_delayed_compaction()
+                # Return original messages without compaction
+                return messages, []
             if compaction_strategy == "truncation":
                 # Use truncation instead of summarization
                 protected_tokens = get_protected_token_count()
@@ -614,7 +891,7 @@ class BaseAgent(ABC):
                 )
                 summarized_messages = []  # No summarization in truncation mode
             else:
-                # Default to summarization
+                # Default to summarization (safe to proceed - no pending tool calls)
                 result_messages, summarized_messages = self.summarize_messages(
                     self.filter_huge_messages(messages)
                 )
@@ -622,31 +899,12 @@ class BaseAgent(ABC):
             final_token_count = sum(
                 self.estimate_tokens_for_message(msg) for msg in result_messages
             )
-            # Update status bar with final token count if in TUI mode
+            # Update spinner with final token count
             final_summary = SpinnerBase.format_context_info(
                 final_token_count, model_max, final_token_count / model_max
             )
             update_spinner_context(final_summary)
-            if is_tui_mode():
-                tui_app = get_tui_app_instance()
-                if tui_app:
-                    try:
-                        status_bar = tui_app.query_one("StatusBar")
-                        status_bar.update_token_info(
-                            final_token_count, model_max, final_token_count / model_max
-                        )
-                    except Exception:
-                        emit_info(
-                            f"Final token count after processing: {final_token_count}",
-                            message_group="token_context_status",
-                        )
-                else:
-                    emit_info(
-                        f"Final token count after processing: {final_token_count}",
-                        message_group="token_context_status",
-                    )
             self.set_message_history(result_messages)
             for m in summarized_messages:
                 self.add_compacted_message_hash(self.hash_message(m))
@@ -659,6 +917,11 @@ class BaseAgent(ABC):
         """
         Truncate message history to manage token usage.
+        Protects:
+        - The first message (system prompt) - always kept
+        - The second message if it contains a ThinkingPart (extended thinking context)
+        - The most recent messages up to protected_tokens
         Args:
             messages: List of messages to truncate
             protected_tokens: Number of tokens to protect
@@ -670,12 +933,30 @@ class BaseAgent(ABC):
         emit_info("Truncating message history to manage token usage")
         result = [messages[0]]  # Always keep the first message (system prompt)
+        # Check if second message exists and contains a ThinkingPart
+        # If so, protect it (extended thinking context shouldn't be lost)
+        skip_second = False
+        if len(messages) > 1:
+            second_msg = messages[1]
+            has_thinking = any(
+                isinstance(part, ThinkingPart) for part in second_msg.parts
+            )
+            if has_thinking:
+                result.append(second_msg)
+                skip_second = True
         num_tokens = 0
         stack = queue.LifoQueue()
+        # Determine which messages to consider for the recent-tokens window
+        # Skip first message (already added), and skip second if it has thinking
+        start_idx = 2 if skip_second else 1
+        messages_to_scan = messages[start_idx:]
         # Put messages in reverse order (most recent first) into the stack
         # but break when we exceed protected_tokens
-        for idx, msg in enumerate(reversed(messages[1:])):  # Skip the first message
+        for msg in reversed(messages_to_scan):
             num_tokens += self.estimate_tokens_for_message(msg)
             if num_tokens > protected_tokens:
                 break
@@ -708,71 +989,71 @@ class BaseAgent(ABC):
     # ===== Agent wiring formerly in code_puppy/agent.py =====
     def load_puppy_rules(self) -> Optional[str]:
-        """Load AGENT(S).md if present and cache the contents."""
+        """Load AGENT(S).md from both global config and project directory.
+        Checks for AGENTS.md/AGENT.md/agents.md/agent.md in this order:
+        1. Global config directory (~/.code_puppy/ or XDG config)
+        2. Current working directory (project-specific)
+        If both exist, they are combined with global rules first, then project rules.
+        This allows project-specific rules to override or extend global rules.
+        """
         if self._puppy_rules is not None:
             return self._puppy_rules
         from pathlib import Path
         possible_paths = ["AGENTS.md", "AGENT.md", "agents.md", "agent.md"]
+        # Load global rules from CONFIG_DIR
+        global_rules = None
+        from code_puppy.config import CONFIG_DIR
         for path_str in possible_paths:
-            puppy_rules_path = Path(path_str)
-            if puppy_rules_path.exists():
-                with open(puppy_rules_path, "r") as f:
-                    self._puppy_rules = f.read()
-                    break
+            global_path = Path(CONFIG_DIR) / path_str
+            if global_path.exists():
+                global_rules = global_path.read_text(encoding="utf-8-sig")
+                break
+        # Load project-local rules from current working directory
+        project_rules = None
+        for path_str in possible_paths:
+            project_path = Path(path_str)
+            if project_path.exists():
+                project_rules = project_path.read_text(encoding="utf-8-sig")
+                break
+        # Combine global and project rules
+        # Global rules come first, project rules second (allowing project to override)
+        rules = [r for r in [global_rules, project_rules] if r]
+        self._puppy_rules = "\n\n".join(rules) if rules else None
         return self._puppy_rules
     def load_mcp_servers(self, extra_headers: Optional[Dict[str, str]] = None):
-        """Load MCP servers through the manager and return pydantic-ai compatible servers."""
+        """Load MCP servers through the manager and return pydantic-ai compatible servers.
+        Note: The manager automatically syncs from mcp_servers.json during initialization,
+        so we don't need to sync here. Use reload_mcp_servers() to force a re-sync.
+        """
         mcp_disabled = get_value("disable_mcp_servers")
         if mcp_disabled and str(mcp_disabled).lower() in ("1", "true", "yes", "on"):
-            emit_system_message("[dim]MCP servers disabled via config[/dim]")
             return []
         manager = get_mcp_manager()
-        configs = load_mcp_server_configs()
-        if not configs:
-            existing_servers = manager.list_servers()
-            if not existing_servers:
-                emit_system_message("[dim]No MCP servers configured[/dim]")
-                return []
-        else:
-            for name, conf in configs.items():
-                try:
-                    server_config = ServerConfig(
-                        id=conf.get("id", f"{name}_{hash(name)}"),
-                        name=name,
-                        type=conf.get("type", "sse"),
-                        enabled=conf.get("enabled", True),
-                        config=conf,
-                    )
-                    existing = manager.get_server_by_name(name)
-                    if not existing:
-                        manager.register_server(server_config)
-                        emit_system_message(f"[dim]Registered MCP server: {name}[/dim]")
-                    else:
-                        if existing.config != server_config.config:
-                            manager.update_server(existing.id, server_config)
-                            emit_system_message(
-                                f"[dim]Updated MCP server: {name}[/dim]"
-                            )
-                except Exception as e:
-                    emit_error(f"Failed to register MCP server '{name}': {str(e)}")
-                    continue
-        servers = manager.get_servers_for_agent()
-        if servers:
-            emit_system_message(
-                f"[green]Successfully loaded {len(servers)} MCP server(s)[/green]"
-            )
-        # Stay silent when there are no servers configured/available
-        return servers
+        return manager.get_servers_for_agent()
     def reload_mcp_servers(self):
-        """Reload MCP servers and return updated servers."""
-        self.load_mcp_servers()
+        """Reload MCP servers and return updated servers.
+        Forces a re-sync from mcp_servers.json to pick up any configuration changes.
+        """
+        # Clear the MCP tool cache when servers are reloaded
+        self._mcp_tool_definitions_cache = []
+        # Force re-sync from mcp_servers.json
         manager = get_mcp_manager()
+        manager.sync_from_config()
         return manager.get_servers_for_agent()
     def _load_model_with_fallback(
@@ -794,8 +1075,8 @@ class BaseAgent(ABC):
             )
             emit_warning(
                 (
-                    f"[yellow]Model '{requested_model_name}' not found. "
-                    f"Available models: {available_str}[/yellow]"
+                    f"Model '{requested_model_name}' not found. "
+                    f"Available models: {available_str}"
                 ),
                 message_group=message_group,
             )
@@ -815,7 +1096,7 @@ class BaseAgent(ABC):
                 try:
                     model = ModelFactory.get_model(candidate, models_config)
                     emit_info(
-                        f"[bold cyan]Using fallback model: {candidate}[/bold cyan]",
+                        f"Using fallback model: {candidate}",
                         message_group=message_group,
                     )
                     return model, candidate
@@ -827,7 +1108,7 @@ class BaseAgent(ABC):
                 "a valid model with `config set`."
             )
             emit_error(
-                f"[bold red]{friendly_message}[/bold red]",
+                friendly_message,
                 message_group=message_group,
             )
             raise ValueError(friendly_message) from exc
@@ -841,10 +1122,6 @@ class BaseAgent(ABC):
         model_name = self.get_model_name()
-        emit_info(
-            f"[bold cyan]Loading Model: {model_name}[/bold cyan]",
-            message_group=message_group,
-        )
         models_config = ModelFactory.load_config()
         model, resolved_model_name = self._load_model_with_fallback(
             model_name,
@@ -852,34 +1129,22 @@ class BaseAgent(ABC):
             message_group,
         )
-        emit_info(
-            f"[bold magenta]Loading Agent: {self.name}[/bold magenta]",
-            message_group=message_group,
-        )
         instructions = self.get_system_prompt()
         puppy_rules = self.load_puppy_rules()
         if puppy_rules:
             instructions += f"\n{puppy_rules}"
         mcp_servers = self.load_mcp_servers()
-        emit_info(f"[dim]DEBUG: Loaded {len(mcp_servers)} MCP servers during reload[/dim]")
-        model_settings_dict: Dict[str, Any] = {"seed": 42}
-        output_tokens = max(
-            2048,
-            min(int(0.05 * self.get_model_context_length()) - 1024, 16384),
-        )
-        console.print(f"Max output tokens per message: {output_tokens}")
-        model_settings_dict["max_tokens"] = output_tokens
+        model_settings = make_model_settings(resolved_model_name)
-        model_settings: ModelSettings = ModelSettings(**model_settings_dict)
-        if "gpt-5" in model_name:
-            model_settings_dict["openai_reasoning_effort"] = (
-                get_openai_reasoning_effort()
-            )
-            model_settings_dict["extra_body"] = {"verbosity": "low"}
-            model_settings = OpenAIChatModelSettings(**model_settings_dict)
+        # Handle claude-code models: swap instructions (prompt prepending happens in run_with_mcp)
+        from code_puppy.model_utils import prepare_prompt_for_model
+        prepared = prepare_prompt_for_model(
+            model_name, instructions, "", prepend_system_to_user=False
+        )
+        instructions = prepared.instructions
         self.cur_model = model
         p_agent = PydanticAgent(
@@ -894,36 +1159,37 @@ class BaseAgent(ABC):
         agent_tools = self.get_available_tools()
         register_tools_for_agent(p_agent, agent_tools)
         # Get existing tool names to filter out conflicts with MCP tools
         existing_tool_names = set()
         try:
             # Get tools from the agent to find existing tool names
-            tools = getattr(p_agent, '_tools', None)
+            tools = getattr(p_agent, "_tools", None)
             if tools:
                 existing_tool_names = set(tools.keys())
         except Exception:
             # If we can't get tool names, proceed without filtering
             pass
         # Filter MCP server toolsets to remove conflicting tools
         filtered_mcp_servers = []
         if mcp_servers and existing_tool_names:
             for mcp_server in mcp_servers:
                 try:
                     # Get tools from this MCP server
-                    server_tools = getattr(mcp_server, 'tools', None)
+                    server_tools = getattr(mcp_server, "tools", None)
                     if server_tools:
                         # Filter out conflicting tools
                         filtered_tools = {}
                         for tool_name, tool_func in server_tools.items():
                             if tool_name not in existing_tool_names:
                                 filtered_tools[tool_name] = tool_func
                         # Create a filtered version of the MCP server if we have tools
                         if filtered_tools:
                             # Create a new toolset with filtered tools
                             from pydantic_ai.tools import ToolSet
                             filtered_toolset = ToolSet()
                             for tool_name, tool_func in filtered_tools.items():
                                 filtered_toolset._tools[tool_name] = tool_func
@@ -934,15 +1200,19 @@ class BaseAgent(ABC):
                     else:
                         # Can't get tools from this server, include as-is
                         filtered_mcp_servers.append(mcp_server)
-                except Exception as e:
+                except Exception:
                     # Error processing this server, include as-is to be safe
                     filtered_mcp_servers.append(mcp_server)
         else:
             # No filtering needed or possible
             filtered_mcp_servers = mcp_servers if mcp_servers else []
         if len(filtered_mcp_servers) != len(mcp_servers):
-            emit_info(f"[dim]Filtered {len(mcp_servers) - len(filtered_mcp_servers)} conflicting MCP tools[/dim]")
+            emit_info(
+                Text.from_markup(
+                    f"[dim]Filtered {len(mcp_servers) - len(filtered_mcp_servers)} conflicting MCP tools[/dim]"
+                )
+            )
         self._last_model_name = resolved_model_name
         # expose for run_with_mcp
@@ -962,16 +1232,21 @@ class BaseAgent(ABC):
                 history_processors=[self.message_history_accumulator],
                 model_settings=model_settings,
             )
             # Register regular tools (non-MCP) on the new agent
             agent_tools = self.get_available_tools()
             register_tools_for_agent(agent_without_mcp, agent_tools)
-            # Wrap with DBOS
-            dbos_agent = DBOSAgent(agent_without_mcp, name=f"{self.name}-{_reload_count}")
+            # Wrap with DBOS - pass event_stream_handler at construction time
+            # so DBOSModel gets the handler for streaming output
+            dbos_agent = DBOSAgent(
+                agent_without_mcp,
+                name=f"{self.name}-{_reload_count}",
+                event_stream_handler=event_stream_handler,
+            )
             self.pydantic_agent = dbos_agent
             self._code_generation_agent = dbos_agent
             # Store filtered MCP servers separately for runtime use
             self._mcp_servers = filtered_mcp_servers
         else:
@@ -989,13 +1264,84 @@ class BaseAgent(ABC):
             # Register regular tools on the agent
             agent_tools = self.get_available_tools()
             register_tools_for_agent(p_agent, agent_tools)
             self.pydantic_agent = p_agent
             self._code_generation_agent = p_agent
             self._mcp_servers = filtered_mcp_servers
             self._mcp_servers = mcp_servers
         return self._code_generation_agent
+    def _create_agent_with_output_type(self, output_type: Type[Any]) -> PydanticAgent:
+        """Create a temporary agent configured with a custom output_type.
+        This is used when structured output is requested via run_with_mcp.
+        The agent is created fresh with the same configuration as the main agent
+        but with the specified output_type instead of str.
+        Args:
+            output_type: The Pydantic model or type for structured output.
+        Returns:
+            A configured PydanticAgent (or DBOSAgent wrapper) with the custom output_type.
+        """
+        from code_puppy.model_utils import prepare_prompt_for_model
+        from code_puppy.tools import register_tools_for_agent
+        model_name = self.get_model_name()
+        models_config = ModelFactory.load_config()
+        model, resolved_model_name = self._load_model_with_fallback(
+            model_name, models_config, str(uuid.uuid4())
+        )
+        instructions = self.get_system_prompt()
+        puppy_rules = self.load_puppy_rules()
+        if puppy_rules:
+            instructions += f"\n{puppy_rules}"
+        mcp_servers = getattr(self, "_mcp_servers", []) or []
+        model_settings = make_model_settings(resolved_model_name)
+        prepared = prepare_prompt_for_model(
+            model_name, instructions, "", prepend_system_to_user=False
+        )
+        instructions = prepared.instructions
+        global _reload_count
+        _reload_count += 1
+        if get_use_dbos():
+            temp_agent = PydanticAgent(
+                model=model,
+                instructions=instructions,
+                output_type=output_type,
+                retries=3,
+                toolsets=[],
+                history_processors=[self.message_history_accumulator],
+                model_settings=model_settings,
+            )
+            agent_tools = self.get_available_tools()
+            register_tools_for_agent(temp_agent, agent_tools)
+            # Pass event_stream_handler at construction time for streaming output
+            dbos_agent = DBOSAgent(
+                temp_agent,
+                name=f"{self.name}-structured-{_reload_count}",
+                event_stream_handler=event_stream_handler,
+            )
+            return dbos_agent
+        else:
+            temp_agent = PydanticAgent(
+                model=model,
+                instructions=instructions,
+                output_type=output_type,
+                retries=3,
+                toolsets=mcp_servers,
+                history_processors=[self.message_history_accumulator],
+                model_settings=model_settings,
+            )
+            agent_tools = self.get_available_tools()
+            register_tools_for_agent(temp_agent, agent_tools)
+            return temp_agent
     # It's okay to decorate it with DBOS.step even if not using DBOS; the decorator is a no-op in that case.
     @DBOS.step()
     def message_history_accumulator(self, ctx: RunContext, messages: List[Any]):
@@ -1011,14 +1357,171 @@ class BaseAgent(ABC):
         # Apply message history trimming using the main processor
         # This ensures we maintain global state while still managing context limits
         self.message_history_processor(ctx, _message_history)
+        result_messages_filtered_empty_thinking = []
+        for msg in self.get_message_history():
+            if len(msg.parts) == 1:
+                if isinstance(msg.parts[0], ThinkingPart):
+                    if msg.parts[0].content == "":
+                        continue
+            result_messages_filtered_empty_thinking.append(msg)
+            self.set_message_history(result_messages_filtered_empty_thinking)
         return self.get_message_history()
+    def _spawn_ctrl_x_key_listener(
+        self,
+        stop_event: threading.Event,
+        on_escape: Callable[[], None],
+        on_cancel_agent: Optional[Callable[[], None]] = None,
+    ) -> Optional[threading.Thread]:
+        """Start a keyboard listener thread for CLI sessions.
+        Listens for Ctrl+X (shell command cancel) and optionally the configured
+        cancel_agent_key (when not using SIGINT/Ctrl+C).
+        Args:
+            stop_event: Event to signal the listener to stop.
+            on_escape: Callback for Ctrl+X (shell command cancel).
+            on_cancel_agent: Optional callback for cancel_agent_key (only used
+                when cancel_agent_uses_signal() returns False).
+        """
+        try:
+            import sys
+        except ImportError:
+            return None
+        stdin = getattr(sys, "stdin", None)
+        if stdin is None or not hasattr(stdin, "isatty"):
+            return None
+        try:
+            if not stdin.isatty():
+                return None
+        except Exception:
+            return None
+        def listener() -> None:
+            try:
+                if sys.platform.startswith("win"):
+                    self._listen_for_ctrl_x_windows(
+                        stop_event, on_escape, on_cancel_agent
+                    )
+                else:
+                    self._listen_for_ctrl_x_posix(
+                        stop_event, on_escape, on_cancel_agent
+                    )
+            except Exception:
+                emit_warning(
+                    "Key listener stopped unexpectedly; press Ctrl+C to cancel."
+                )
+        thread = threading.Thread(
+            target=listener, name="code-puppy-key-listener", daemon=True
+        )
+        thread.start()
+        return thread
+    def _listen_for_ctrl_x_windows(
+        self,
+        stop_event: threading.Event,
+        on_escape: Callable[[], None],
+        on_cancel_agent: Optional[Callable[[], None]] = None,
+    ) -> None:
+        import msvcrt
+        import time
+        # Get the cancel agent char code if we're using keyboard-based cancel
+        cancel_agent_char: Optional[str] = None
+        if on_cancel_agent is not None and not cancel_agent_uses_signal():
+            cancel_agent_char = get_cancel_agent_char_code()
+        while not stop_event.is_set():
+            try:
+                if msvcrt.kbhit():
+                    key = msvcrt.getwch()
+                    if key == "\x18":  # Ctrl+X
+                        try:
+                            on_escape()
+                        except Exception:
+                            emit_warning(
+                                "Ctrl+X handler raised unexpectedly; Ctrl+C still works."
+                            )
+                    elif (
+                        cancel_agent_char
+                        and on_cancel_agent
+                        and key == cancel_agent_char
+                    ):
+                        try:
+                            on_cancel_agent()
+                        except Exception:
+                            emit_warning("Cancel agent handler raised unexpectedly.")
+            except Exception:
+                emit_warning(
+                    "Windows key listener error; Ctrl+C is still available for cancel."
+                )
+                return
+            time.sleep(0.05)
+    def _listen_for_ctrl_x_posix(
+        self,
+        stop_event: threading.Event,
+        on_escape: Callable[[], None],
+        on_cancel_agent: Optional[Callable[[], None]] = None,
+    ) -> None:
+        import select
+        import sys
+        import termios
+        import tty
+        # Get the cancel agent char code if we're using keyboard-based cancel
+        cancel_agent_char: Optional[str] = None
+        if on_cancel_agent is not None and not cancel_agent_uses_signal():
+            cancel_agent_char = get_cancel_agent_char_code()
+        stdin = sys.stdin
+        try:
+            fd = stdin.fileno()
+        except (AttributeError, ValueError, OSError):
+            return
+        try:
+            original_attrs = termios.tcgetattr(fd)
+        except Exception:
+            return
+        try:
+            tty.setcbreak(fd)
+            while not stop_event.is_set():
+                try:
+                    read_ready, _, _ = select.select([stdin], [], [], 0.05)
+                except Exception:
+                    break
+                if not read_ready:
+                    continue
+                data = stdin.read(1)
+                if not data:
+                    break
+                if data == "\x18":  # Ctrl+X
+                    try:
+                        on_escape()
+                    except Exception:
+                        emit_warning(
+                            "Ctrl+X handler raised unexpectedly; Ctrl+C still works."
+                        )
+                elif (
+                    cancel_agent_char and on_cancel_agent and data == cancel_agent_char
+                ):
+                    try:
+                        on_cancel_agent()
+                    except Exception:
+                        emit_warning("Cancel agent handler raised unexpectedly.")
+        finally:
+            termios.tcsetattr(fd, termios.TCSADRAIN, original_attrs)
     async def run_with_mcp(
         self,
         prompt: str,
         *,
         attachments: Optional[Sequence[BinaryContent]] = None,
         link_attachments: Optional[Sequence[Union[ImageUrl, DocumentUrl]]] = None,
+        output_type: Optional[Type[Any]] = None,
         **kwargs,
     ) -> Any:
         """Run the agent with MCP servers, attachments, and full cancellation support.
@@ -1027,20 +1530,60 @@ class BaseAgent(ABC):
             prompt: Primary user prompt text (may be empty when attachments present).
             attachments: Local binary payloads (e.g., dragged images) to include.
             link_attachments: Remote assets (image/document URLs) to include.
+            output_type: Optional Pydantic model or type for structured output.
+                When provided, creates a temporary agent configured to return
+                this type instead of the default string output.
             **kwargs: Additional arguments forwarded to `pydantic_ai.Agent.run`.
         Returns:
-            The agent's response.
+            The agent's response (typed according to output_type if specified).
         Raises:
             asyncio.CancelledError: When execution is cancelled by user.
         """
+        # Sanitize prompt to remove invalid Unicode surrogates that can cause
+        # encoding errors (especially common on Windows with copy-paste)
+        if prompt:
+            try:
+                prompt = prompt.encode("utf-8", errors="surrogatepass").decode(
+                    "utf-8", errors="replace"
+                )
+            except (UnicodeEncodeError, UnicodeDecodeError):
+                # Fallback: filter out surrogate characters directly
+                prompt = "".join(
+                    char if ord(char) < 0xD800 or ord(char) > 0xDFFF else "\ufffd"
+                    for char in prompt
+                )
         group_id = str(uuid.uuid4())
         # Avoid double-loading: reuse existing agent if already built
         pydantic_agent = (
             self._code_generation_agent or self.reload_code_generation_agent()
         )
+        # If a custom output_type is specified, create a temporary agent with that type
+        if output_type is not None:
+            pydantic_agent = self._create_agent_with_output_type(output_type)
+        # Handle claude-code, chatgpt-codex, and antigravity models: prepend system prompt to first user message
+        from code_puppy.model_utils import (
+            is_antigravity_model,
+            is_chatgpt_codex_model,
+            is_claude_code_model,
+        )
+        if (
+            is_claude_code_model(self.get_model_name())
+            or is_chatgpt_codex_model(self.get_model_name())
+            or is_antigravity_model(self.get_model_name())
+        ):
+            if len(self.get_message_history()) == 0:
+                system_prompt = self.get_system_prompt()
+                puppy_rules = self.load_puppy_rules()
+                if puppy_rules:
+                    system_prompt += f"\n{puppy_rules}"
+                prompt = system_prompt + "\n\n" + prompt
         # Build combined prompt payload when attachments are provided.
         attachment_parts: List[Any] = []
         if attachments:
@@ -1061,15 +1604,35 @@ class BaseAgent(ABC):
                 self.set_message_history(
                     self.prune_interrupted_tool_calls(self.get_message_history())
                 )
+                # DELAYED COMPACTION: Check if we should attempt delayed compaction
+                if self.should_attempt_delayed_compaction():
+                    emit_info(
+                        "🔄 Attempting delayed compaction (tool calls completed)",
+                        message_group="token_context_status",
+                    )
+                    current_messages = self.get_message_history()
+                    compacted_messages, _ = self.compact_messages(current_messages)
+                    if compacted_messages != current_messages:
+                        self.set_message_history(compacted_messages)
+                        emit_info(
+                            "✅ Delayed compaction completed successfully",
+                            message_group="token_context_status",
+                        )
                 usage_limits = UsageLimits(request_limit=get_message_limit())
                 # Handle MCP servers - add them temporarily when using DBOS
-                if get_use_dbos() and hasattr(self, '_mcp_servers') and self._mcp_servers:
+                if (
+                    get_use_dbos()
+                    and hasattr(self, "_mcp_servers")
+                    and self._mcp_servers
+                ):
                     # Temporarily add MCP servers to the DBOS agent using internal _toolsets
                     original_toolsets = pydantic_agent._toolsets
                     pydantic_agent._toolsets = original_toolsets + self._mcp_servers
                     pydantic_agent._toolsets = original_toolsets + self._mcp_servers
                     try:
                         # Set the workflow ID for DBOS context so DBOS and Code Puppy ID match
                         with SetWorkflowID(group_id):
@@ -1077,29 +1640,33 @@ class BaseAgent(ABC):
                                 prompt_payload,
                                 message_history=self.get_message_history(),
                                 usage_limits=usage_limits,
+                                event_stream_handler=event_stream_handler,
                                 **kwargs,
                             )
+                            return result_
                     finally:
                         # Always restore original toolsets
                         pydantic_agent._toolsets = original_toolsets
                 elif get_use_dbos():
-                    # DBOS without MCP servers
                     with SetWorkflowID(group_id):
                         result_ = await pydantic_agent.run(
                             prompt_payload,
                             message_history=self.get_message_history(),
                             usage_limits=usage_limits,
+                            event_stream_handler=event_stream_handler,
                             **kwargs,
                         )
+                        return result_
                 else:
                     # Non-DBOS path (MCP servers are already included)
                     result_ = await pydantic_agent.run(
                         prompt_payload,
                         message_history=self.get_message_history(),
                         usage_limits=usage_limits,
+                        event_stream_handler=event_stream_handler,
                         **kwargs,
                     )
-                return result_
+                    return result_
             except* UsageLimitExceeded as ule:
                 emit_info(f"Usage limit exceeded: {str(ule)}", group_id=group_id)
                 emit_info(
@@ -1134,6 +1701,12 @@ class BaseAgent(ABC):
                         remaining_exceptions.append(exc)
                         emit_info(f"Unexpected error: {str(exc)}", group_id=group_id)
                         emit_info(f"{str(exc.args)}", group_id=group_id)
+                        # Log to file for debugging
+                        log_error(
+                            exc,
+                            context=f"Agent run (group_id={group_id})",
+                            include_traceback=True,
+                        )
                 collect_non_cancelled_exceptions(other_error)
@@ -1156,35 +1729,87 @@ class BaseAgent(ABC):
         # Create the task FIRST
         agent_task = asyncio.create_task(run_agent_task())
-        # Import shell process killer
-        from code_puppy.tools.command_runner import kill_all_running_shell_processes
+        # Import shell process status helper
-        # Ensure the interrupt handler only acts once per task
-        def keyboard_interrupt_handler(sig, frame):
-            """Signal handler for Ctrl+C - replicating exact original logic"""
+        loop = asyncio.get_running_loop()
-            # First, nuke any running shell processes triggered by tools
-            try:
-                killed = kill_all_running_shell_processes()
-                if killed:
-                    emit_info(f"Cancelled {killed} running shell process(es).")
-                else:
-                    # Only cancel the agent task if no shell processes were killed
-                    if not agent_task.done():
-                        agent_task.cancel()
-            except Exception as e:
-                emit_info(f"Shell kill error: {e}")
-                if not agent_task.done():
-                    agent_task.cancel()
-            # Don't call the original handler
-            # This prevents the application from exiting
+        def schedule_agent_cancel() -> None:
+            from code_puppy.tools.command_runner import _RUNNING_PROCESSES
+            if len(_RUNNING_PROCESSES):
+                emit_warning(
+                    "Refusing to cancel Agent while a shell command is currently running - press Ctrl+X to cancel the shell command."
+                )
+                return
+            if agent_task.done():
+                return
+            # Cancel all active subagent tasks
+            if _active_subagent_tasks:
+                emit_warning(
+                    f"Cancelling {len(_active_subagent_tasks)} active subagent task(s)..."
+                )
+                for task in list(
+                    _active_subagent_tasks
+                ):  # Create a copy since we'll be modifying the set
+                    if not task.done():
+                        loop.call_soon_threadsafe(task.cancel)
+            loop.call_soon_threadsafe(agent_task.cancel)
+        def keyboard_interrupt_handler(_sig, _frame):
+            # If we're awaiting user input (e.g., file permission prompt),
+            # don't cancel the agent - let the input() call handle the interrupt naturally
+            if is_awaiting_user_input():
+                # Don't do anything here - let the input() call raise KeyboardInterrupt naturally
+                return
+            schedule_agent_cancel()
+        def graceful_sigint_handler(_sig, _frame):
+            # When using keyboard-based cancel, SIGINT should be a no-op
+            # (just show a hint to user about the configured cancel key)
+            # Also reset terminal to prevent bricking on Windows+uvx
+            from code_puppy.keymap import get_cancel_agent_display_name
+            from code_puppy.terminal_utils import reset_windows_terminal_full
+            # Reset terminal state first to prevent bricking
+            reset_windows_terminal_full()
+            cancel_key = get_cancel_agent_display_name()
+            emit_info(f"Use {cancel_key} to cancel the agent task.")
+        original_handler = None
+        key_listener_stop_event = None
+        _key_listener_thread = None
         try:
-            # Save original handler and set our custom one AFTER task is created
-            original_handler = signal.signal(signal.SIGINT, keyboard_interrupt_handler)
+            if cancel_agent_uses_signal():
+                # Use SIGINT-based cancellation (default Ctrl+C behavior)
+                original_handler = signal.signal(
+                    signal.SIGINT, keyboard_interrupt_handler
+                )
+            else:
+                # Use keyboard listener for agent cancellation
+                # Set a graceful SIGINT handler that shows a hint
+                original_handler = signal.signal(signal.SIGINT, graceful_sigint_handler)
+                # Spawn keyboard listener with the cancel agent callback
+                key_listener_stop_event = threading.Event()
+                _key_listener_thread = self._spawn_ctrl_x_key_listener(
+                    key_listener_stop_event,
+                    on_escape=lambda: None,  # Ctrl+X handled by command_runner
+                    on_cancel_agent=schedule_agent_cancel,
+                )
             # Wait for the task to complete or be cancelled
             result = await agent_task
+            # Update MCP tool cache after successful run for accurate token estimation
+            if hasattr(self, "_mcp_servers") and self._mcp_servers:
+                try:
+                    await self._update_mcp_tool_cache()
+                except Exception:
+                    pass  # Don't fail the run if cache update fails
             return result
         except asyncio.CancelledError:
             agent_task.cancel()
@@ -1192,11 +1817,12 @@ class BaseAgent(ABC):
             # Handle direct keyboard interrupt during await
             if not agent_task.done():
                 agent_task.cancel()
-            try:
-                await agent_task
-            except asyncio.CancelledError:
-                pass
         finally:
+            # Stop keyboard listener if it was started
+            if key_listener_stop_event is not None:
+                key_listener_stop_event.set()
             # Restore original signal handler
-            if original_handler:
-                signal.signal(signal.SIGINT, original_handler)
+            if (
+                original_handler is not None
+            ):  # Explicit None check - SIG_DFL can be 0/falsy!
+                signal.signal(signal.SIGINT, original_handler)

code-puppy 0.0.214__py3-none-any.whl → 0.0.366__py3-none-any.whl

code-puppy 0.0.214py3-none-any.whl → 0.0.366py3-none-any.whl