PyPI - code-puppy - Versions diffs - 0.0.320__tar.gz → 0.0.323__tar.gz - Mend

code-puppy 0.0.320tar.gz → 0.0.323tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (167) hide show

{code_puppy-0.0.320 → code_puppy-0.0.323}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: code-puppy
-Version: 0.0.320
+Version: 0.0.323
 Summary: Code generation agent
 Project-URL: repository, https://github.com/mpfaffenberger/code_puppy
 Project-URL: HomePage, https://github.com/mpfaffenberger/code_puppy

{code_puppy-0.0.320 → code_puppy-0.0.323}/code_puppy/agents/base_agent.py RENAMED Viewed

@@ -4,11 +4,23 @@ import asyncio
 import json
 import math
 import signal
+import sys
 import threading
 import uuid
 from abc import ABC, abstractmethod
 from collections.abc import AsyncIterable
-from typing import Any, Callable, Dict, List, Optional, Sequence, Set, Tuple, Union
+from typing import (
+    Any,
+    Callable,
+    Dict,
+    List,
+    Optional,
+    Sequence,
+    Set,
+    Tuple,
+    Type,
+    Union,
+)
 import mcp
 import pydantic
@@ -1230,6 +1242,74 @@ class BaseAgent(ABC):
             self._mcp_servers = mcp_servers
         return self._code_generation_agent
+    def _create_agent_with_output_type(self, output_type: Type[Any]) -> PydanticAgent:
+        """Create a temporary agent configured with a custom output_type.
+        This is used when structured output is requested via run_with_mcp.
+        The agent is created fresh with the same configuration as the main agent
+        but with the specified output_type instead of str.
+        Args:
+            output_type: The Pydantic model or type for structured output.
+        Returns:
+            A configured PydanticAgent (or DBOSAgent wrapper) with the custom output_type.
+        """
+        from code_puppy.model_utils import prepare_prompt_for_model
+        from code_puppy.tools import register_tools_for_agent
+        model_name = self.get_model_name()
+        models_config = ModelFactory.load_config()
+        model, resolved_model_name = self._load_model_with_fallback(
+            model_name, models_config, str(uuid.uuid4())
+        )
+        instructions = self.get_system_prompt()
+        puppy_rules = self.load_puppy_rules()
+        if puppy_rules:
+            instructions += f"\n{puppy_rules}"
+        mcp_servers = getattr(self, "_mcp_servers", []) or []
+        model_settings = make_model_settings(resolved_model_name)
+        prepared = prepare_prompt_for_model(
+            model_name, instructions, "", prepend_system_to_user=False
+        )
+        instructions = prepared.instructions
+        global _reload_count
+        _reload_count += 1
+        if get_use_dbos():
+            temp_agent = PydanticAgent(
+                model=model,
+                instructions=instructions,
+                output_type=output_type,
+                retries=3,
+                toolsets=[],
+                history_processors=[self.message_history_accumulator],
+                model_settings=model_settings,
+            )
+            agent_tools = self.get_available_tools()
+            register_tools_for_agent(temp_agent, agent_tools)
+            dbos_agent = DBOSAgent(
+                temp_agent, name=f"{self.name}-structured-{_reload_count}"
+            )
+            return dbos_agent
+        else:
+            temp_agent = PydanticAgent(
+                model=model,
+                instructions=instructions,
+                output_type=output_type,
+                retries=3,
+                toolsets=mcp_servers,
+                history_processors=[self.message_history_accumulator],
+                model_settings=model_settings,
+            )
+            agent_tools = self.get_available_tools()
+            register_tools_for_agent(temp_agent, agent_tools)
+            return temp_agent
     # It's okay to decorate it with DBOS.step even if not using DBOS; the decorator is a no-op in that case.
     @DBOS.step()
     def message_history_accumulator(self, ctx: RunContext, messages: List[Any]):
@@ -1590,6 +1670,7 @@ class BaseAgent(ABC):
         *,
         attachments: Optional[Sequence[BinaryContent]] = None,
         link_attachments: Optional[Sequence[Union[ImageUrl, DocumentUrl]]] = None,
+        output_type: Optional[Type[Any]] = None,
         **kwargs,
     ) -> Any:
         """Run the agent with MCP servers, attachments, and full cancellation support.
@@ -1598,10 +1679,13 @@ class BaseAgent(ABC):
             prompt: Primary user prompt text (may be empty when attachments present).
             attachments: Local binary payloads (e.g., dragged images) to include.
             link_attachments: Remote assets (image/document URLs) to include.
+            output_type: Optional Pydantic model or type for structured output.
+                When provided, creates a temporary agent configured to return
+                this type instead of the default string output.
             **kwargs: Additional arguments forwarded to `pydantic_ai.Agent.run`.
         Returns:
-            The agent's response.
+            The agent's response (typed according to output_type if specified).
         Raises:
             asyncio.CancelledError: When execution is cancelled by user.
@@ -1625,6 +1709,11 @@ class BaseAgent(ABC):
         pydantic_agent = (
             self._code_generation_agent or self.reload_code_generation_agent()
         )
+        # If a custom output_type is specified, create a temporary agent with that type
+        if output_type is not None:
+            pydantic_agent = self._create_agent_with_output_type(output_type)
         # Handle claude-code and chatgpt-codex models: prepend system prompt to first user message
         from code_puppy.model_utils import is_chatgpt_codex_model, is_claude_code_model
@@ -1822,29 +1911,72 @@ class BaseAgent(ABC):
             # When using keyboard-based cancel, SIGINT should be a no-op
             # (just show a hint to user about the configured cancel key)
             from code_puppy.keymap import get_cancel_agent_display_name
+            import sys
             cancel_key = get_cancel_agent_display_name()
-            emit_info(f"Use {cancel_key} to cancel the agent task.")
+            if sys.platform == "win32":
+                # On Windows, we use keyboard listener, so SIGINT might still fire
+                # but we handle cancellation via the key listener
+                pass  # Silent on Windows - the key listener handles it
+            else:
+                emit_info(f"Use {cancel_key} to cancel the agent task.")
         original_handler = None
         key_listener_stop_event = None
         _key_listener_thread = None
+        _windows_ctrl_handler = None  # Store reference to prevent garbage collection
         try:
-            if cancel_agent_uses_signal():
-                # Use SIGINT-based cancellation (default Ctrl+C behavior)
+            if sys.platform == "win32":
+                # Windows: Use SetConsoleCtrlHandler for reliable Ctrl+C handling
+                import ctypes
+                # Define the handler function type
+                HANDLER_ROUTINE = ctypes.WINFUNCTYPE(ctypes.c_bool, ctypes.c_ulong)
+                def windows_ctrl_handler(ctrl_type):
+                    """Handle Windows console control events."""
+                    CTRL_C_EVENT = 0
+                    CTRL_BREAK_EVENT = 1
+                    if ctrl_type in (CTRL_C_EVENT, CTRL_BREAK_EVENT):
+                        # Check if we're awaiting user input
+                        if is_awaiting_user_input():
+                            return False  # Let default handler run
+                        # Schedule agent cancellation
+                        schedule_agent_cancel()
+                        return True  # We handled it, don't terminate
+                    return False  # Let other handlers process it
+                # Create the callback - must keep reference alive!
+                _windows_ctrl_handler = HANDLER_ROUTINE(windows_ctrl_handler)
+                # Register the handler
+                kernel32 = ctypes.windll.kernel32
+                if not kernel32.SetConsoleCtrlHandler(_windows_ctrl_handler, True):
+                    emit_warning("Failed to set Windows Ctrl+C handler")
+                # Also spawn keyboard listener for Ctrl+X (shell cancel) and other keys
+                key_listener_stop_event = threading.Event()
+                _key_listener_thread = self._spawn_ctrl_x_key_listener(
+                    key_listener_stop_event,
+                    on_escape=lambda: None,  # Ctrl+X handled by command_runner
+                    on_cancel_agent=None,  # Ctrl+C handled by SetConsoleCtrlHandler above
+                )
+            elif cancel_agent_uses_signal():
+                # Unix with Ctrl+C: Use SIGINT-based cancellation
                 original_handler = signal.signal(
                     signal.SIGINT, keyboard_interrupt_handler
                 )
             else:
-                # Use keyboard listener for agent cancellation
-                # Set a graceful SIGINT handler that shows a hint
+                # Unix with different cancel key: Use keyboard listener
                 original_handler = signal.signal(signal.SIGINT, graceful_sigint_handler)
-                # Spawn keyboard listener with the cancel agent callback
                 key_listener_stop_event = threading.Event()
                 _key_listener_thread = self._spawn_ctrl_x_key_listener(
                     key_listener_stop_event,
-                    on_escape=lambda: None,  # Ctrl+X handled by command_runner
+                    on_escape=lambda: None,
                     on_cancel_agent=schedule_agent_cancel,
                 )
@@ -1869,8 +2001,17 @@ class BaseAgent(ABC):
             # Stop keyboard listener if it was started
             if key_listener_stop_event is not None:
                 key_listener_stop_event.set()
-            # Restore original signal handler
-            if (
-                original_handler is not None
-            ):  # Explicit None check - SIG_DFL can be 0/falsy!
+            # Unregister Windows Ctrl handler
+            if sys.platform == "win32" and _windows_ctrl_handler is not None:
+                try:
+                    import ctypes
+                    kernel32 = ctypes.windll.kernel32
+                    kernel32.SetConsoleCtrlHandler(_windows_ctrl_handler, False)
+                except Exception:
+                    pass  # Best effort cleanup
+            # Restore original signal handler (Unix)
+            if original_handler is not None:
                 signal.signal(signal.SIGINT, original_handler)

{code_puppy-0.0.320 → code_puppy-0.0.323}/code_puppy/cli_runner.py RENAMED Viewed

@@ -790,5 +790,6 @@ def main_entry():
             DBOS.destroy()
         return 0
     finally:
-        # Reset terminal on Unix-like systems (not Windows)
+        # Reset terminal on all platforms for clean state
+        reset_windows_terminal_full()  # Safe no-op on non-Windows
         reset_unix_terminal()

{code_puppy-0.0.320 → code_puppy-0.0.323}/code_puppy/keymap.py RENAMED Viewed

@@ -86,9 +86,15 @@ def cancel_agent_uses_signal() -> bool:
     """Check if the cancel agent key uses SIGINT (Ctrl+C).
     Returns:
-        True if the cancel key is ctrl+c (uses SIGINT handler),
-        False if it uses keyboard listener approach.
+        True if the cancel key is ctrl+c AND we're not on Windows
+        (uses SIGINT handler), False if it uses keyboard listener approach.
     """
+    import sys
+    # On Windows, always use keyboard listener - SIGINT is unreliable
+    if sys.platform == "win32":
+        return False
     return get_cancel_agent_key() == "ctrl+c"

code_puppy-0.0.323/code_puppy/plugins/shell_safety/agent_shell_safety.py ADDED Viewed

@@ -0,0 +1,69 @@
+"""Shell command safety assessment agent.
+This agent provides rapid risk assessment of shell commands before execution.
+It's designed to be ultra-lightweight with a concise prompt (<200 tokens) and
+uses structured output for reliable parsing.
+"""
+from typing import TYPE_CHECKING, List
+from code_puppy.agents.base_agent import BaseAgent
+if TYPE_CHECKING:
+    pass
+class ShellSafetyAgent(BaseAgent):
+    """Lightweight agent for assessing shell command safety risks.
+    This agent evaluates shell commands for potential risks including:
+    - File system destruction (rm -rf, dd, format, mkfs)
+    - Database operations (DROP, TRUNCATE, unfiltered UPDATE/DELETE)
+    - Privilege escalation (sudo, su, chmod 777)
+    - Network operations (wget/curl to unknown hosts)
+    - Data exfiltration patterns
+    The agent returns structured output with a risk level and brief reasoning.
+    """
+    @property
+    def name(self) -> str:
+        """Agent name for internal use."""
+        return "shell_safety_checker"
+    @property
+    def display_name(self) -> str:
+        """User-facing display name."""
+        return "Shell Safety Checker 🛡️"
+    @property
+    def description(self) -> str:
+        """Agent description."""
+        return "Lightweight agent that assesses shell command safety risks"
+    def get_system_prompt(self) -> str:
+        """Get the ultra-concise system prompt for shell safety assessment.
+        This prompt is kept under 200 tokens for fast inference and low cost.
+        """
+        return """You are a shell command safety analyzer. Assess risk levels concisely.
+**Risk Levels:**
+- none: Completely safe (ls, pwd, echo, cat readonly files)
+- low: Minimal risk (mkdir, touch, git status, read-only queries)
+- medium: Moderate risk (file edits, package installs, service restarts)
+- high: Significant risk (rm files, UPDATE/DELETE without WHERE, TRUNCATE, chmod dangerous permissions)
+- critical: Severe/destructive (rm -rf, DROP TABLE/DATABASE, dd, format, mkfs, bq delete dataset, unfiltered mass deletes)
+**Evaluate:**
+- Scope (single file vs. entire system)
+- Reversibility (can it be undone?)
+- Data loss potential
+- Privilege requirements
+- Database destruction patterns
+**Output:** Risk level + reasoning (max 1 sentence)."""
+    def get_available_tools(self) -> List[str]:
+        """This agent uses no tools - pure reasoning only."""
+        return []

{code_puppy-0.0.320 → code_puppy-0.0.323}/code_puppy/plugins/shell_safety/register_callbacks.py RENAMED Viewed

@@ -7,12 +7,42 @@ and assesses their safety risk before execution.
 from typing import Any, Dict, Optional
 from code_puppy.callbacks import register_callback
-from code_puppy.config import get_safety_permission_level, get_yolo_mode
+from code_puppy.config import (
+    get_global_model_name,
+    get_safety_permission_level,
+    get_yolo_mode,
+)
 from code_puppy.messaging import emit_info
 from code_puppy.plugins.shell_safety.command_cache import (
     cache_assessment,
     get_cached_assessment,
 )
+from code_puppy.tools.command_runner import ShellSafetyAssessment
+# OAuth model prefixes - these models have their own safety mechanisms
+OAUTH_MODEL_PREFIXES = (
+    "claude-code-",  # Anthropic OAuth
+    "chatgpt-",  # OpenAI OAuth
+    "gemini-oauth",  # Google OAuth
+)
+def is_oauth_model(model_name: str | None) -> bool:
+    """Check if the model is an OAuth model that should skip safety checks.
+    OAuth models have their own built-in safety mechanisms, so we skip
+    the shell safety callback to avoid redundant checks and potential bugs.
+    Args:
+        model_name: The name of the current model
+    Returns:
+        True if the model is an OAuth model, False otherwise
+    """
+    if not model_name:
+        return False
+    return model_name.startswith(OAUTH_MODEL_PREFIXES)
 # Risk level hierarchy for numeric comparison
 # Lower numbers = safer commands, higher numbers = more dangerous
@@ -68,6 +98,11 @@ async def shell_safety_callback(
         None if command is safe to proceed
         Dict with rejection info if command should be blocked
     """
+    # Skip safety checks for OAuth models - they have their own safety mechanisms
+    current_model = get_global_model_name()
+    if is_oauth_model(current_model):
+        return None
     # Only check safety in yolo_mode - otherwise user is reviewing manually
     yolo_mode = get_yolo_mode()
     if not yolo_mode:
@@ -108,8 +143,14 @@ async def shell_safety_callback(
         # Create agent and assess command
         agent = ShellSafetyAgent()
-        # Run async assessment (we're in an async callback now!)
-        assessment = await agent.assess_command(command, cwd)
+        # Build the assessment prompt with optional cwd context
+        prompt = f"Assess this shell command:\n\nCommand: {command}"
+        if cwd:
+            prompt += f"\nWorking directory: {cwd}"
+        # Run async assessment with structured output type
+        result = await agent.run_with_mcp(prompt, output_type=ShellSafetyAssessment)
+        assessment = result.output
         # Cache the result for future use, but only if it's not a fallback assessment
         if not getattr(assessment, "is_fallback", False):

{code_puppy-0.0.320 → code_puppy-0.0.323}/code_puppy/tools/command_runner.py RENAMED Viewed

@@ -192,6 +192,11 @@ def kill_all_running_shell_processes() -> int:
     """Kill all currently tracked running shell processes and stop reader threads.
     Returns the number of processes signaled.
+    Implementation notes:
+    - Atomically snapshot and clear the registry to prevent race conditions
+    - Deduplicate by PID to ensure each process is killed at most once
+    - Let exceptions from _kill_process_group propagate (tests expect this)
     """
     global _READER_STOP_EVENT
@@ -199,30 +204,52 @@ def kill_all_running_shell_processes() -> int:
     if _READER_STOP_EVENT:
         _READER_STOP_EVENT.set()
-    procs: list[subprocess.Popen]
+    # Atomically take snapshot and clear registry
+    # This prevents other threads from seeing/processing the same processes
     with _RUNNING_PROCESSES_LOCK:
-        procs = list(_RUNNING_PROCESSES)
-    count = 0
-    for p in procs:
+        procs_snapshot = list(_RUNNING_PROCESSES)
+        _RUNNING_PROCESSES.clear()
+    # Deduplicate by pid to ensure at-most-one kill per process
+    seen_pids: set = set()
+    killed_count = 0
+    for proc in procs_snapshot:
+        if proc is None:
+            continue
+        pid = getattr(proc, "pid", None)
+        key = pid if pid is not None else id(proc)
+        if key in seen_pids:
+            continue
+        seen_pids.add(key)
+        # Close pipes first to unblock readline()
         try:
-            # Close pipes first to unblock readline()
-            try:
-                if p.stdout and not p.stdout.closed:
-                    p.stdout.close()
-                if p.stderr and not p.stderr.closed:
-                    p.stderr.close()
-                if p.stdin and not p.stdin.closed:
-                    p.stdin.close()
-            except (OSError, ValueError):
-                pass
+            if proc.stdout and not proc.stdout.closed:
+                proc.stdout.close()
+            if proc.stderr and not proc.stderr.closed:
+                proc.stderr.close()
+            if proc.stdin and not proc.stdin.closed:
+                proc.stdin.close()
+        except (OSError, ValueError):
+            pass
+        # Only attempt to kill processes that are still running
+        if proc.poll() is None:
+            # Let exceptions bubble up (tests expect this behavior)
+            _kill_process_group(proc)
+            killed_count += 1
+            # Track user-killed PIDs
+            if pid is not None:
+                try:
+                    _USER_KILLED_PROCESSES.add(pid)
+                except Exception:
+                    pass  # Non-fatal bookkeeping
-            if p.poll() is None:
-                _kill_process_group(p)
-                count += 1
-                _USER_KILLED_PROCESSES.add(p.pid)
-        finally:
-            _unregister_process(p)
-    return count
+    return killed_count
 def get_running_shell_process_count() -> int:

{code_puppy-0.0.320 → code_puppy-0.0.323}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
 [project]
 name = "code-puppy"
-version = "0.0.320"
+version = "0.0.323"
 description = "Code generation agent"
 readme = "README.md"
 requires-python = ">=3.11,<3.14"

code-puppy 0.0.320__tar.gz → 0.0.323__tar.gz

code-puppy 0.0.320tar.gz → 0.0.323tar.gz