PyPI - code-puppy - Versions diffs - 0.0.316__py3-none-any.whl → 0.0.325__py3-none-any.whl - Mend

code-puppy 0.0.316py3-none-any.whl → 0.0.325py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

code_puppy/agents/base_agent.py +241 -91
code_puppy/cli_runner.py +8 -1
code_puppy/command_line/add_model_menu.py +11 -0
code_puppy/command_line/mcp/logs_command.py +173 -64
code_puppy/command_line/model_settings_menu.py +6 -0
code_puppy/keymap.py +8 -2
code_puppy/mcp_/__init__.py +17 -0
code_puppy/mcp_/blocking_startup.py +50 -29
code_puppy/mcp_/managed_server.py +1 -1
code_puppy/mcp_/mcp_logs.py +224 -0
code_puppy/messaging/__init__.py +9 -0
code_puppy/messaging/markdown_patches.py +57 -0
code_puppy/model_factory.py +54 -0
code_puppy/models.json +3 -3
code_puppy/plugins/__init__.py +12 -0
code_puppy/plugins/claude_code_oauth/utils.py +1 -0
code_puppy/plugins/shell_safety/agent_shell_safety.py +1 -118
code_puppy/plugins/shell_safety/register_callbacks.py +44 -3
code_puppy/tools/command_runner.py +48 -21
{code_puppy-0.0.316.data → code_puppy-0.0.325.data}/data/code_puppy/models.json +3 -3
{code_puppy-0.0.316.dist-info → code_puppy-0.0.325.dist-info}/METADATA +1 -1
{code_puppy-0.0.316.dist-info → code_puppy-0.0.325.dist-info}/RECORD +26 -24
{code_puppy-0.0.316.data → code_puppy-0.0.325.data}/data/code_puppy/models_dev_api.json +0 -0
{code_puppy-0.0.316.dist-info → code_puppy-0.0.325.dist-info}/WHEEL +0 -0
{code_puppy-0.0.316.dist-info → code_puppy-0.0.325.dist-info}/entry_points.txt +0 -0
{code_puppy-0.0.316.dist-info → code_puppy-0.0.325.dist-info}/licenses/LICENSE +0 -0

code_puppy/agents/base_agent.py CHANGED Viewed

@@ -4,11 +4,23 @@ import asyncio
 import json
 import math
 import signal
+import sys
 import threading
 import uuid
 from abc import ABC, abstractmethod
 from collections.abc import AsyncIterable
-from typing import Any, Callable, Dict, List, Optional, Sequence, Set, Tuple, Union
+from typing import (
+    Any,
+    Callable,
+    Dict,
+    List,
+    Optional,
+    Sequence,
+    Set,
+    Tuple,
+    Type,
+    Union,
+)
 import mcp
 import pydantic
@@ -89,6 +101,9 @@ class BaseAgent(ABC):
         # Cache for MCP tool definitions (for token estimation)
         # This is populated after the first successful run when MCP tools are retrieved
         self._mcp_tool_definitions_cache: List[Dict[str, Any]] = []
+        # Shared console for streaming output - should be set by cli_runner
+        # to avoid conflicts between spinner's Live display and response streaming
+        self._console: Optional[Any] = None
     @property
     @abstractmethod
@@ -1227,6 +1242,74 @@ class BaseAgent(ABC):
             self._mcp_servers = mcp_servers
         return self._code_generation_agent
+    def _create_agent_with_output_type(self, output_type: Type[Any]) -> PydanticAgent:
+        """Create a temporary agent configured with a custom output_type.
+        This is used when structured output is requested via run_with_mcp.
+        The agent is created fresh with the same configuration as the main agent
+        but with the specified output_type instead of str.
+        Args:
+            output_type: The Pydantic model or type for structured output.
+        Returns:
+            A configured PydanticAgent (or DBOSAgent wrapper) with the custom output_type.
+        """
+        from code_puppy.model_utils import prepare_prompt_for_model
+        from code_puppy.tools import register_tools_for_agent
+        model_name = self.get_model_name()
+        models_config = ModelFactory.load_config()
+        model, resolved_model_name = self._load_model_with_fallback(
+            model_name, models_config, str(uuid.uuid4())
+        )
+        instructions = self.get_system_prompt()
+        puppy_rules = self.load_puppy_rules()
+        if puppy_rules:
+            instructions += f"\n{puppy_rules}"
+        mcp_servers = getattr(self, "_mcp_servers", []) or []
+        model_settings = make_model_settings(resolved_model_name)
+        prepared = prepare_prompt_for_model(
+            model_name, instructions, "", prepend_system_to_user=False
+        )
+        instructions = prepared.instructions
+        global _reload_count
+        _reload_count += 1
+        if get_use_dbos():
+            temp_agent = PydanticAgent(
+                model=model,
+                instructions=instructions,
+                output_type=output_type,
+                retries=3,
+                toolsets=[],
+                history_processors=[self.message_history_accumulator],
+                model_settings=model_settings,
+            )
+            agent_tools = self.get_available_tools()
+            register_tools_for_agent(temp_agent, agent_tools)
+            dbos_agent = DBOSAgent(
+                temp_agent, name=f"{self.name}-structured-{_reload_count}"
+            )
+            return dbos_agent
+        else:
+            temp_agent = PydanticAgent(
+                model=model,
+                instructions=instructions,
+                output_type=output_type,
+                retries=3,
+                toolsets=mcp_servers,
+                history_processors=[self.message_history_accumulator],
+                model_settings=model_settings,
+            )
+            agent_tools = self.get_available_tools()
+            register_tools_for_agent(temp_agent, agent_tools)
+            return temp_agent
     # It's okay to decorate it with DBOS.step even if not using DBOS; the decorator is a no-op in that case.
     @DBOS.step()
     def message_history_accumulator(self, ctx: RunContext, messages: List[Any]):
@@ -1257,47 +1340,45 @@ class BaseAgent(ABC):
     ) -> None:
         """Handle streaming events from the agent run.
-        This method processes streaming events and emits TextPart and ThinkingPart
-        content with styled banners as they stream in.
+        This method processes streaming events and emits TextPart, ThinkingPart,
+        and ToolCallPart content with styled banners/tokens as they stream in.
         Args:
             ctx: The run context.
             events: Async iterable of streaming events (PartStartEvent, PartDeltaEvent, etc.).
         """
-        import os
-        import time as time_module
         from pydantic_ai import PartDeltaEvent, PartStartEvent
-        from pydantic_ai.messages import TextPartDelta, ThinkingPartDelta
+        from pydantic_ai.messages import (
+            TextPartDelta,
+            ThinkingPartDelta,
+            ToolCallPartDelta,
+        )
         from rich.console import Console
-        from rich.live import Live
         from rich.markdown import Markdown
         from rich.markup import escape
         from code_puppy.messaging.spinner import pause_all_spinners
-        console = Console()
-        # Disable Live display in test mode or non-interactive environments
-        # This fixes issues with pexpect PTY where Live() hangs
-        use_live_display = (
-            console.is_terminal
-            and os.environ.get("CODE_PUPPY_TEST_FAST", "").lower() not in ("1", "true")
-            and os.environ.get("CI", "").lower() not in ("1", "true")
-        )
+        # IMPORTANT: Use the shared console (set by cli_runner) to avoid conflicts
+        # with the spinner's Live display. Multiple Console instances with separate
+        # Live displays cause cursor positioning chaos and line duplication.
+        if self._console is not None:
+            console = self._console
+        else:
+            # Fallback if console not set (shouldn't happen in normal use)
+            console = Console()
-        # Track which part indices we're currently streaming (for Text/Thinking parts)
+        # Track which part indices we're currently streaming (for Text/Thinking/Tool parts)
         streaming_parts: set[int] = set()
         thinking_parts: set[int] = (
             set()
         )  # Track which parts are thinking (for dim style)
         text_parts: set[int] = set()  # Track which parts are text
+        tool_parts: set[int] = set()  # Track which parts are tool calls
         banner_printed: set[int] = set()  # Track if banner was already printed
-        text_buffer: dict[int, list[str]] = {}  # Buffer text for markdown
-        live_displays: dict[int, Live] = {}  # Live displays for streaming markdown
+        text_buffer: dict[int, list[str]] = {}  # Buffer text for final markdown render
+        token_count: dict[int, int] = {}  # Track token count per text/tool part
         did_stream_anything = False  # Track if we streamed any content
-        last_render_time: dict[int, float] = {}  # Track last render time per part
-        render_interval = 0.1  # Only re-render markdown every 100ms (throttle)
         def _print_thinking_banner() -> None:
             """Print the THINKING banner with spinner pause and line clear."""
@@ -1362,9 +1443,20 @@ class BaseAgent(ABC):
                     streaming_parts.add(event.index)
                     text_parts.add(event.index)
                     text_buffer[event.index] = []  # Initialize buffer
+                    token_count[event.index] = 0  # Initialize token counter
                     # Buffer initial content if present
                     if part.content and part.content.strip():
                         text_buffer[event.index].append(part.content)
+                        # Use len(content) / 3 for token estimation (more accurate than chunk counting)
+                        token_count[event.index] += len(part.content) // 3
+                elif isinstance(part, ToolCallPart):
+                    streaming_parts.add(event.index)
+                    tool_parts.add(event.index)
+                    token_count[event.index] = 0  # Initialize token counter
+                    # Track tool name for display
+                    banner_printed.add(
+                        event.index
+                    )  # Use banner_printed to track if we've shown tool info
             # PartDeltaEvent - stream the content as it arrives
             elif isinstance(event, PartDeltaEvent):
@@ -1372,43 +1464,26 @@ class BaseAgent(ABC):
                     delta = event.delta
                     if isinstance(delta, (TextPartDelta, ThinkingPartDelta)):
                         if delta.content_delta:
-                            # For text parts, stream markdown with Live display
+                            # For text parts, show token counter then render at end
                             if event.index in text_parts:
-                                # Print banner and start Live on first content
+                                import sys
+                                # Print banner on first content
                                 if event.index not in banner_printed:
                                     _print_response_banner()
                                     banner_printed.add(event.index)
-                                    # Only use Live display if enabled (disabled in test/CI)
-                                    if use_live_display:
-                                        live = Live(
-                                            Markdown(""),
-                                            console=console,
-                                            refresh_per_second=10,
-                                            vertical_overflow="visible",  # Allow scrolling for long content
-                                        )
-                                        live.start()
-                                        live_displays[event.index] = live
-                                # Accumulate text and throttle markdown rendering
-                                # (Markdown parsing is O(n), doing it on every token = O(n²) death)
+                                # Accumulate text for final markdown render
                                 text_buffer[event.index].append(delta.content_delta)
-                                now = time_module.monotonic()
-                                last_render = last_render_time.get(event.index, 0)
-                                # Only re-render if enough time has passed (throttle)
-                                # Skip Live updates when not using live display
-                                if (
-                                    use_live_display
-                                    and now - last_render >= render_interval
-                                ):
-                                    content = "".join(text_buffer[event.index])
-                                    if event.index in live_displays:
-                                        try:
-                                            live_displays[event.index].update(
-                                                Markdown(content)
-                                            )
-                                            last_render_time[event.index] = now
-                                        except Exception:
-                                            pass
+                                # Use len(content) / 3 for token estimation
+                                token_count[event.index] += (
+                                    len(delta.content_delta) // 3
+                                )
+                                # Update token counter in place (single line)
+                                count = token_count[event.index]
+                                sys.stdout.write(
+                                    f"\r\x1b[K  ⏳ Receiving... {count} tokens"
+                                )
+                                sys.stdout.flush()
                             else:
                                 # For thinking parts, stream immediately (dim)
                                 if event.index not in banner_printed:
@@ -1416,56 +1491,69 @@ class BaseAgent(ABC):
                                     banner_printed.add(event.index)
                                 escaped = escape(delta.content_delta)
                                 console.print(f"[dim]{escaped}[/dim]", end="")
+                    elif isinstance(delta, ToolCallPartDelta):
+                        import sys
+                        # For tool calls, show token counter (use string repr for estimation)
+                        token_count[event.index] += len(str(delta)) // 3
+                        # Get tool name if available
+                        tool_name = getattr(delta, "tool_name_delta", "")
+                        count = token_count[event.index]
+                        # Display with tool wrench icon and tool name
+                        if tool_name:
+                            sys.stdout.write(
+                                f"\r\x1b[K  🔧 Calling {tool_name}... {count} tokens"
+                            )
+                        else:
+                            sys.stdout.write(
+                                f"\r\x1b[K  🔧 Calling tool... {count} tokens"
+                            )
+                        sys.stdout.flush()
             # PartEndEvent - finish the streaming with a newline
             elif isinstance(event, PartEndEvent):
                 if event.index in streaming_parts:
-                    # For text parts, do final render then stop the Live display
+                    import sys
+                    # For text parts, clear counter line and render markdown
                     if event.index in text_parts:
-                        # Final render to ensure we show complete content
-                        # (throttling may have skipped the last few tokens)
-                        if event.index in live_displays and event.index in text_buffer:
-                            try:
-                                final_content = "".join(text_buffer[event.index])
-                                live_displays[event.index].update(
-                                    Markdown(final_content)
-                                )
-                            except Exception:
-                                pass
-                        if event.index in live_displays:
-                            try:
-                                live_displays[event.index].stop()
-                            except Exception:
-                                pass
-                            del live_displays[event.index]
-                        # When not using Live display, print the final content as markdown
-                        elif event.index in text_buffer:
+                        # Clear the token counter line
+                        sys.stdout.write("\r\x1b[K")
+                        sys.stdout.flush()
+                        # Render the final markdown nicely
+                        if event.index in text_buffer:
                             try:
                                 final_content = "".join(text_buffer[event.index])
                                 if final_content.strip():
                                     console.print(Markdown(final_content))
                             except Exception:
                                 pass
-                        if event.index in text_buffer:
                             del text_buffer[event.index]
-                        # Clean up render time tracking
-                        last_render_time.pop(event.index, None)
+                    # For tool parts, clear the token counter line
+                    elif event.index in tool_parts:
+                        # Clear the token counter line
+                        sys.stdout.write("\r\x1b[K")
+                        sys.stdout.flush()
                     # For thinking parts, just print newline
                     elif event.index in banner_printed:
                         console.print()  # Final newline after streaming
+                    # Clean up token count
+                    token_count.pop(event.index, None)
                     # Clean up all tracking sets
                     streaming_parts.discard(event.index)
                     thinking_parts.discard(event.index)
                     text_parts.discard(event.index)
+                    tool_parts.discard(event.index)
                     banner_printed.discard(event.index)
-                    # Resume spinner if next part is NOT text/thinking (avoid race condition)
-                    # If next part is a tool call or None, it's safe to resume
+                    # Resume spinner if next part is NOT text/thinking/tool (avoid race condition)
+                    # If next part is None or handled differently, it's safe to resume
                     # Note: spinner itself handles blank line before appearing
                     from code_puppy.messaging.spinner import resume_all_spinners
                     next_kind = getattr(event, "next_part_kind", None)
-                    if next_kind not in ("text", "thinking"):
+                    if next_kind not in ("text", "thinking", "tool-call"):
                         resume_all_spinners()
         # Spinner is resumed in PartEndEvent when appropriate (based on next_part_kind)
@@ -1624,6 +1712,7 @@ class BaseAgent(ABC):
         *,
         attachments: Optional[Sequence[BinaryContent]] = None,
         link_attachments: Optional[Sequence[Union[ImageUrl, DocumentUrl]]] = None,
+        output_type: Optional[Type[Any]] = None,
         **kwargs,
     ) -> Any:
         """Run the agent with MCP servers, attachments, and full cancellation support.
@@ -1632,10 +1721,13 @@ class BaseAgent(ABC):
             prompt: Primary user prompt text (may be empty when attachments present).
             attachments: Local binary payloads (e.g., dragged images) to include.
             link_attachments: Remote assets (image/document URLs) to include.
+            output_type: Optional Pydantic model or type for structured output.
+                When provided, creates a temporary agent configured to return
+                this type instead of the default string output.
             **kwargs: Additional arguments forwarded to `pydantic_ai.Agent.run`.
         Returns:
-            The agent's response.
+            The agent's response (typed according to output_type if specified).
         Raises:
             asyncio.CancelledError: When execution is cancelled by user.
@@ -1659,6 +1751,11 @@ class BaseAgent(ABC):
         pydantic_agent = (
             self._code_generation_agent or self.reload_code_generation_agent()
         )
+        # If a custom output_type is specified, create a temporary agent with that type
+        if output_type is not None:
+            pydantic_agent = self._create_agent_with_output_type(output_type)
         # Handle claude-code and chatgpt-codex models: prepend system prompt to first user message
         from code_puppy.model_utils import is_chatgpt_codex_model, is_claude_code_model
@@ -1855,30 +1952,74 @@ class BaseAgent(ABC):
         def graceful_sigint_handler(_sig, _frame):
             # When using keyboard-based cancel, SIGINT should be a no-op
             # (just show a hint to user about the configured cancel key)
+            import sys
             from code_puppy.keymap import get_cancel_agent_display_name
             cancel_key = get_cancel_agent_display_name()
-            emit_info(f"Use {cancel_key} to cancel the agent task.")
+            if sys.platform == "win32":
+                # On Windows, we use keyboard listener, so SIGINT might still fire
+                # but we handle cancellation via the key listener
+                pass  # Silent on Windows - the key listener handles it
+            else:
+                emit_info(f"Use {cancel_key} to cancel the agent task.")
         original_handler = None
         key_listener_stop_event = None
         _key_listener_thread = None
+        _windows_ctrl_handler = None  # Store reference to prevent garbage collection
         try:
-            if cancel_agent_uses_signal():
-                # Use SIGINT-based cancellation (default Ctrl+C behavior)
+            if sys.platform == "win32":
+                # Windows: Use SetConsoleCtrlHandler for reliable Ctrl+C handling
+                import ctypes
+                # Define the handler function type
+                HANDLER_ROUTINE = ctypes.WINFUNCTYPE(ctypes.c_bool, ctypes.c_ulong)
+                def windows_ctrl_handler(ctrl_type):
+                    """Handle Windows console control events."""
+                    CTRL_C_EVENT = 0
+                    CTRL_BREAK_EVENT = 1
+                    if ctrl_type in (CTRL_C_EVENT, CTRL_BREAK_EVENT):
+                        # Check if we're awaiting user input
+                        if is_awaiting_user_input():
+                            return False  # Let default handler run
+                        # Schedule agent cancellation
+                        schedule_agent_cancel()
+                        return True  # We handled it, don't terminate
+                    return False  # Let other handlers process it
+                # Create the callback - must keep reference alive!
+                _windows_ctrl_handler = HANDLER_ROUTINE(windows_ctrl_handler)
+                # Register the handler
+                kernel32 = ctypes.windll.kernel32
+                if not kernel32.SetConsoleCtrlHandler(_windows_ctrl_handler, True):
+                    emit_warning("Failed to set Windows Ctrl+C handler")
+                # Also spawn keyboard listener for Ctrl+X (shell cancel) and other keys
+                key_listener_stop_event = threading.Event()
+                _key_listener_thread = self._spawn_ctrl_x_key_listener(
+                    key_listener_stop_event,
+                    on_escape=lambda: None,  # Ctrl+X handled by command_runner
+                    on_cancel_agent=None,  # Ctrl+C handled by SetConsoleCtrlHandler above
+                )
+            elif cancel_agent_uses_signal():
+                # Unix with Ctrl+C: Use SIGINT-based cancellation
                 original_handler = signal.signal(
                     signal.SIGINT, keyboard_interrupt_handler
                 )
             else:
-                # Use keyboard listener for agent cancellation
-                # Set a graceful SIGINT handler that shows a hint
+                # Unix with different cancel key: Use keyboard listener
                 original_handler = signal.signal(signal.SIGINT, graceful_sigint_handler)
-                # Spawn keyboard listener with the cancel agent callback
                 key_listener_stop_event = threading.Event()
                 _key_listener_thread = self._spawn_ctrl_x_key_listener(
                     key_listener_stop_event,
-                    on_escape=lambda: None,  # Ctrl+X handled by command_runner
+                    on_escape=lambda: None,
                     on_cancel_agent=schedule_agent_cancel,
                 )
@@ -1903,8 +2044,17 @@ class BaseAgent(ABC):
             # Stop keyboard listener if it was started
             if key_listener_stop_event is not None:
                 key_listener_stop_event.set()
-            # Restore original signal handler
-            if (
-                original_handler is not None
-            ):  # Explicit None check - SIG_DFL can be 0/falsy!
+            # Unregister Windows Ctrl handler
+            if sys.platform == "win32" and _windows_ctrl_handler is not None:
+                try:
+                    import ctypes
+                    kernel32 = ctypes.windll.kernel32
+                    kernel32.SetConsoleCtrlHandler(_windows_ctrl_handler, False)
+                except Exception:
+                    pass  # Best effort cleanup
+            # Restore original signal handler (Unix)
+            if original_handler is not None:
                 signal.signal(signal.SIGINT, original_handler)

code_puppy/cli_runner.py CHANGED Viewed

@@ -706,6 +706,12 @@ async def run_prompt_with_attachments(
     attachments = [attachment.content for attachment in processed_prompt.attachments]
     link_attachments = [link.url_part for link in processed_prompt.link_attachments]
+    # IMPORTANT: Set the shared console on the agent so that streaming output
+    # uses the same console as the spinner. This prevents Live display conflicts
+    # that cause line duplication during markdown streaming.
+    if spinner_console is not None:
+        agent._console = spinner_console
     # Create the agent task first so we can track and cancel it
     agent_task = asyncio.create_task(
         agent.run_with_mcp(
@@ -784,5 +790,6 @@ def main_entry():
             DBOS.destroy()
         return 0
     finally:
-        # Reset terminal on Unix-like systems (not Windows)
+        # Reset terminal on all platforms for clean state
+        reset_windows_terminal_full()  # Safe no-op on non-Windows
         reset_unix_terminal()

code_puppy/command_line/add_model_menu.py CHANGED Viewed

@@ -571,6 +571,7 @@ class AddModelMenu:
             "cerebras": "cerebras",
             "cohere": "custom_openai",
             "perplexity": "custom_openai",
+            "minimax": "custom_anthropic",
         }
         # Determine the model type
@@ -600,6 +601,16 @@ class AddModelMenu:
                 api_key_env = f"${provider.env[0]}" if provider.env else "$API_KEY"
                 config["custom_endpoint"] = {"url": api_url, "api_key": api_key_env}
+        # Special handling for minimax: uses custom_anthropic but needs custom_endpoint
+        # and the URL needs /v1 stripped (comes as https://api.minimax.io/anthropic/v1)
+        if provider.id == "minimax" and provider.api:
+            api_url = provider.api
+            # Strip /v1 suffix if present
+            if api_url.endswith("/v1"):
+                api_url = api_url[:-3]
+            api_key_env = f"${provider.env[0]}" if provider.env else "$API_KEY"
+            config["custom_endpoint"] = {"url": api_url, "api_key": api_key_env}
         # Add context length if available
         if model.context_length and model.context_length > 0:
             config["context_length"] = model.context_length

code-puppy 0.0.316__py3-none-any.whl → 0.0.325__py3-none-any.whl

code-puppy 0.0.316py3-none-any.whl → 0.0.325py3-none-any.whl