PyPI - patchpal - Versions diffs - 0.4.5__py3-none-any.whl → 0.7.1__py3-none-any.whl - Mend

patchpal 0.4.5py3-none-any.whl → 0.7.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

patchpal/__init__.py +1 -1
patchpal/agent.py +248 -12
patchpal/cli.py +72 -2
patchpal/tool_schema.py +288 -0
patchpal/tools.py +21 -2
{patchpal-0.4.5.dist-info → patchpal-0.7.1.dist-info}/METADATA +402 -17
patchpal-0.7.1.dist-info/RECORD +15 -0
patchpal-0.4.5.dist-info/RECORD +0 -14
{patchpal-0.4.5.dist-info → patchpal-0.7.1.dist-info}/WHEEL +0 -0
{patchpal-0.4.5.dist-info → patchpal-0.7.1.dist-info}/entry_points.txt +0 -0
{patchpal-0.4.5.dist-info → patchpal-0.7.1.dist-info}/licenses/LICENSE +0 -0
{patchpal-0.4.5.dist-info → patchpal-0.7.1.dist-info}/top_level.txt +0 -0

patchpal/__init__.py CHANGED Viewed

@@ -1,6 +1,6 @@
 """PatchPal - An open-source Claude Code clone implemented purely in Python."""
-__version__ = "0.4.5"
+__version__ = "0.7.1"
 from patchpal.agent import create_agent
 from patchpal.tools import (

patchpal/agent.py CHANGED Viewed

@@ -811,12 +811,17 @@ def _apply_prompt_caching(messages: List[Dict[str, Any]], model_id: str) -> List
 class PatchPalAgent:
     """Simple agent that uses LiteLLM for tool calling."""
-    def __init__(self, model_id: str = "anthropic/claude-sonnet-4-5"):
+    def __init__(self, model_id: str = "anthropic/claude-sonnet-4-5", custom_tools=None):
         """Initialize the agent.
         Args:
             model_id: LiteLLM model identifier
+            custom_tools: Optional list of Python functions to add as tools
         """
+        # Store custom tools
+        self.custom_tools = custom_tools or []
+        self.custom_tool_funcs = {func.__name__: func for func in self.custom_tools}
         # Convert ollama/ to ollama_chat/ for LiteLLM compatibility
         if model_id.startswith("ollama/"):
             model_id = model_id.replace("ollama/", "ollama_chat/", 1)
@@ -862,6 +867,10 @@ class PatchPalAgent:
         self.cumulative_input_tokens = 0
         self.cumulative_output_tokens = 0
+        # Track cache-related tokens (for Anthropic/Bedrock models with prompt caching)
+        self.cumulative_cache_creation_tokens = 0
+        self.cumulative_cache_read_tokens = 0
         # LiteLLM settings for models that need parameter dropping
         self.litellm_kwargs = {}
         if self.model_id.startswith("bedrock/"):
@@ -873,6 +882,26 @@ class PatchPalAgent:
             # Custom OpenAI-compatible servers (vLLM, etc.) often don't support all parameters
             self.litellm_kwargs["drop_params"] = True
+    def _prune_tool_outputs_inline(self, max_chars: int, truncation_message: str) -> int:
+        """Unified pruning function for tool outputs.
+        Args:
+            max_chars: Maximum characters to keep per tool output
+            truncation_message: Message to append after truncation
+        Returns:
+            Number of characters pruned
+        """
+        pruned_chars = 0
+        for msg in self.messages:
+            if msg.get("role") == "tool" and msg.get("content"):
+                content_size = len(str(msg["content"]))
+                if content_size > max_chars:
+                    original_size = content_size
+                    msg["content"] = str(msg["content"])[:max_chars] + truncation_message
+                    pruned_chars += original_size - len(msg["content"])
+        return pruned_chars
     def _perform_auto_compaction(self):
         """Perform automatic context window compaction.
@@ -881,10 +910,32 @@ class PatchPalAgent:
         """
         # Don't compact if we have very few messages - compaction summary
         # could be longer than the messages being removed
-        if len(self.messages) < 5:
+        # Instead, use aggressive pruning since high capacity with few messages
+        # indicates large tool outputs rather than conversation depth
+        if len(self.messages) < 10:
             print(
-                f"\033[2m   Skipping compaction - only {len(self.messages)} messages (need at least 5 for effective compaction)\033[0m"
+                f"\033[2m   Only {len(self.messages)} messages - using aggressive pruning instead of summarization\033[0m"
             )
+            # Aggressively truncate all large tool outputs (5K chars)
+            pruned_chars = self._prune_tool_outputs_inline(
+                max_chars=5_000,
+                truncation_message="\n\n[... content truncated during compaction. Use read_lines or grep_code for targeted access ...]",
+            )
+            stats_after = self.context_manager.get_usage_stats(self.messages)
+            if pruned_chars > 0:
+                print(
+                    f"\033[1;32m✓ Context reduced to {stats_after['usage_percent']}% through aggressive pruning (removed ~{pruned_chars:,} chars)\033[0m\n"
+                )
+            else:
+                print(
+                    f"\033[1;33m⚠️  No large tool outputs to prune. Context at {stats_after['usage_percent']}%.\033[0m"
+                )
+                print("\033[1;33m   Consider using '/clear' to start fresh.\033[0m\n")
+            # Update tracker to prevent immediate re-compaction
+            self._last_compaction_message_count = len(self.messages)
             return
         # Prevent compaction loops - don't compact again if we just did
@@ -931,6 +982,43 @@ class PatchPalAgent:
                 return
         # Phase 2: Full compaction needed
+        # EMERGENCY: If context is at or over capacity (≥100%), do aggressive pruning first
+        # Otherwise the summarization request itself will exceed context limits
+        stats_after_prune = self.context_manager.get_usage_stats(self.messages)
+        if stats_after_prune["usage_ratio"] >= 1.0:
+            print(
+                f"\033[1;31m   ⚠️  Context at or over capacity ({stats_after_prune['usage_percent']}%)!\033[0m"
+            )
+            print(
+                "\033[2m   Emergency: Aggressively pruning recent large tool outputs...\033[0m",
+                flush=True,
+            )
+            # Truncate large tool outputs (10K chars - less aggressive than 5K for few-messages case)
+            emergency_pruned = self._prune_tool_outputs_inline(
+                max_chars=10_000,
+                truncation_message="\n\n[... content truncated due to context window limits ...]",
+            )
+            if emergency_pruned > 0:
+                print(
+                    f"\033[2m   Emergency pruned ~{emergency_pruned:,} chars from large tool outputs\033[0m",
+                    flush=True,
+                )
+                stats_after_emergency = self.context_manager.get_usage_stats(self.messages)
+                print(
+                    f"\033[2m   Context now at {stats_after_emergency['usage_percent']}% capacity\033[0m",
+                    flush=True,
+                )
+                # If still over 150%, give up and recommend /clear
+                if stats_after_emergency["usage_ratio"] > 1.5:
+                    print(
+                        f"\033[1;31m✗ Context still too large for compaction ({stats_after_emergency['usage_percent']}%)\033[0m"
+                    )
+                    print("\033[1;33m   Please use '/clear' to start a fresh session.\033[0m\n")
+                    return
         print("\033[2m   Generating conversation summary...\033[0m", flush=True)
         try:
@@ -953,6 +1041,19 @@ class PatchPalAgent:
                         self.cumulative_input_tokens += response.usage.prompt_tokens
                     if hasattr(response.usage, "completion_tokens"):
                         self.cumulative_output_tokens += response.usage.completion_tokens
+                    # Track cache statistics (Anthropic/Bedrock prompt caching)
+                    if (
+                        hasattr(response.usage, "cache_creation_input_tokens")
+                        and response.usage.cache_creation_input_tokens
+                    ):
+                        self.cumulative_cache_creation_tokens += (
+                            response.usage.cache_creation_input_tokens
+                        )
+                    if (
+                        hasattr(response.usage, "cache_read_input_tokens")
+                        and response.usage.cache_read_input_tokens
+                    ):
+                        self.cumulative_cache_read_tokens += response.usage.cache_read_input_tokens
                 return response
@@ -1029,6 +1130,67 @@ class PatchPalAgent:
         if self.enable_auto_compact and self.context_manager.needs_compaction(self.messages):
             self._perform_auto_compaction()
+        # Agent loop with interrupt handling
+        try:
+            return self._run_agent_loop(max_iterations)
+        except KeyboardInterrupt:
+            # Clean up conversation state if interrupted mid-execution
+            self._cleanup_interrupted_state()
+            raise  # Re-raise so CLI can handle it
+    def _cleanup_interrupted_state(self):
+        """Clean up conversation state after KeyboardInterrupt.
+        If the last message is an assistant message with tool_calls but no
+        corresponding tool responses, we need to either remove the message
+        or add error responses to maintain valid conversation structure.
+        """
+        if not self.messages:
+            return
+        last_msg = self.messages[-1]
+        # Check if last message is assistant with tool_calls
+        if last_msg.get("role") == "assistant" and last_msg.get("tool_calls"):
+            tool_calls = last_msg["tool_calls"]
+            # Check if we have tool responses for all tool_calls
+            tool_call_ids = {tc.id for tc in tool_calls}
+            # Look for tool responses after this assistant message
+            # (should be immediately following, but scan to be safe)
+            response_ids = set()
+            for msg in self.messages[self.messages.index(last_msg) + 1 :]:
+                if msg.get("role") == "tool":
+                    response_ids.add(msg.get("tool_call_id"))
+            # If we're missing responses, add error responses for all tool calls
+            if tool_call_ids != response_ids:
+                missing_ids = tool_call_ids - response_ids
+                # Add error tool responses for the missing tool calls
+                for tool_call in tool_calls:
+                    if tool_call.id in missing_ids:
+                        self.messages.append(
+                            {
+                                "role": "tool",
+                                "tool_call_id": tool_call.id,
+                                "name": tool_call.function.name,
+                                "content": "Error: Operation interrupted by user (Ctrl-C)",
+                            }
+                        )
+    def _run_agent_loop(self, max_iterations: int) -> str:
+        """Internal method that runs the agent loop.
+        Separated from run() to enable proper interrupt handling.
+        Args:
+            max_iterations: Maximum number of agent iterations
+        Returns:
+            The agent's final response
+        """
         # Agent loop
         for iteration in range(max_iterations):
             # Show thinking message
@@ -1042,10 +1204,18 @@ class PatchPalAgent:
             # Use LiteLLM for all providers
             try:
+                # Build tool list (built-in + custom)
+                tools = list(TOOLS)
+                if self.custom_tools:
+                    from patchpal.tool_schema import function_to_tool_schema
+                    for func in self.custom_tools:
+                        tools.append(function_to_tool_schema(func))
                 response = litellm.completion(
                     model=self.model_id,
                     messages=messages,
-                    tools=TOOLS,
+                    tools=tools,
                     tool_choice="auto",
                     **self.litellm_kwargs,
                 )
@@ -1057,6 +1227,19 @@ class PatchPalAgent:
                         self.cumulative_input_tokens += response.usage.prompt_tokens
                     if hasattr(response.usage, "completion_tokens"):
                         self.cumulative_output_tokens += response.usage.completion_tokens
+                    # Track cache statistics (Anthropic/Bedrock prompt caching)
+                    if (
+                        hasattr(response.usage, "cache_creation_input_tokens")
+                        and response.usage.cache_creation_input_tokens
+                    ):
+                        self.cumulative_cache_creation_tokens += (
+                            response.usage.cache_creation_input_tokens
+                        )
+                    if (
+                        hasattr(response.usage, "cache_read_input_tokens")
+                        and response.usage.cache_read_input_tokens
+                    ):
+                        self.cumulative_cache_read_tokens += response.usage.cache_read_input_tokens
             except Exception as e:
                 return f"Error calling model: {e}"
@@ -1099,15 +1282,25 @@ class PatchPalAgent:
                         tool_result = f"Error: Invalid JSON arguments for {tool_name}"
                         print(f"\033[1;31m✗ {tool_name}: Invalid arguments\033[0m")
                     else:
-                        # Get the tool function
-                        tool_func = TOOL_FUNCTIONS.get(tool_name)
+                        # Get the tool function (check custom tools first, then built-in)
+                        tool_func = self.custom_tool_funcs.get(tool_name) or TOOL_FUNCTIONS.get(
+                            tool_name
+                        )
                         if tool_func is None:
                             tool_result = f"Error: Unknown tool {tool_name}"
                             print(f"\033[1;31m✗ Unknown tool: {tool_name}\033[0m")
                         else:
                             # Show tool call message
-                            tool_display = tool_name.replace("_", " ").title()
-                            if tool_name == "read_file":
+                            if tool_name in self.custom_tool_funcs:
+                                # Custom tool - show generic message with args
+                                args_preview = str(tool_args)[:60]
+                                if len(str(tool_args)) > 60:
+                                    args_preview += "..."
+                                print(
+                                    f"\033[2m🔧 {tool_name}({args_preview})\033[0m",
+                                    flush=True,
+                                )
+                            elif tool_name == "read_file":
                                 print(
                                     f"\033[2m📖 Reading: {tool_args.get('path', '')}\033[0m",
                                     flush=True,
@@ -1250,15 +1443,43 @@ class PatchPalAgent:
                                 tool_result = tool_func(**filtered_args)
                             except Exception as e:
                                 tool_result = f"Error executing {tool_name}: {e}"
-                                print(f"\033[1;31m✗ {tool_display}: {e}\033[0m")
+                                print(f"\033[1;31m✗ {tool_name}: {e}\033[0m")
                     # Add tool result to messages
+                    # Check if result is extremely large and might blow context
+                    result_str = str(tool_result)
+                    result_size = len(result_str)
+                    # Warn if result is > 100K chars (~33K tokens)
+                    if result_size > 100_000:
+                        print(
+                            f"\033[1;33m⚠️  Large tool output: {result_size:,} chars (~{result_size // 3:,} tokens)\033[0m"
+                        )
+                        # If result would push us WAY over capacity, truncate it
+                        current_stats = self.context_manager.get_usage_stats(self.messages)
+                        # Estimate tokens in this result
+                        result_tokens = self.context_manager.estimator.estimate_tokens(result_str)
+                        projected_ratio = (
+                            current_stats["total_tokens"] + result_tokens
+                        ) / current_stats["context_limit"]
+                        if projected_ratio > 1.5:  # Would exceed 150% capacity
+                            print(
+                                "\033[1;31m⚠️  Tool output would exceed context capacity! Truncating...\033[0m"
+                            )
+                            # Keep first 50K chars
+                            result_str = (
+                                result_str[:50_000]
+                                + "\n\n[... Output truncated to prevent context window overflow. Use read_lines or grep_code for targeted access ...]"
+                            )
                     self.messages.append(
                         {
                             "role": "tool",
                             "tool_call_id": tool_call.id,
                             "name": tool_name,
-                            "content": str(tool_result),
+                            "content": result_str,
                         }
                     )
@@ -1299,18 +1520,33 @@ class PatchPalAgent:
         )
-def create_agent(model_id: str = "anthropic/claude-sonnet-4-5") -> PatchPalAgent:
+def create_agent(model_id: str = "anthropic/claude-sonnet-4-5", custom_tools=None) -> PatchPalAgent:
     """Create and return a PatchPal agent.
     Args:
         model_id: LiteLLM model identifier (default: anthropic/claude-sonnet-4-5)
+        custom_tools: Optional list of Python functions to use as custom tools.
+                     Each function should have type hints and a docstring.
     Returns:
         A configured PatchPalAgent instance
+    Example:
+        def calculator(x: int, y: int) -> str:
+            '''Add two numbers.
+            Args:
+                x: First number
+                y: Second number
+            '''
+            return str(x + y)
+        agent = create_agent(custom_tools=[calculator])
+        response = agent.run("What's 5 + 3?")
     """
     # Reset session todos for new session
     from patchpal.tools import reset_session_todos
     reset_session_todos()
-    return PatchPalAgent(model_id=model_id)
+    return PatchPalAgent(model_id=model_id, custom_tools=custom_tools)

patchpal/cli.py CHANGED Viewed

@@ -211,9 +211,26 @@ Supported models: Any LiteLLM-supported model
     # Determine model to use (priority: CLI arg > env var > default)
     model_id = args.model or os.getenv("PATCHPAL_MODEL") or "anthropic/claude-sonnet-4-5"
-    # Create the agent with the specified model
+    # Discover custom tools from ~/.patchpal/tools/
+    from patchpal.tool_schema import discover_tools, list_custom_tools
+    custom_tools = discover_tools()
+    # Show custom tools info if any were loaded
+    custom_tool_info = list_custom_tools()
+    if custom_tool_info:
+        tool_names = [name for name, _, _ in custom_tool_info]
+        tools_str = ", ".join(tool_names)
+        # Store for later display (after model info)
+        custom_tools_message = (
+            f"\033[1;36m🔧 Loaded {len(custom_tool_info)} custom tool(s): {tools_str}\033[0m"
+        )
+    else:
+        custom_tools_message = None
+    # Create the agent with the specified model and custom tools
     # LiteLLM will handle API key validation and provide appropriate error messages
-    agent = create_agent(model_id=model_id)
+    agent = create_agent(model_id=model_id, custom_tools=custom_tools)
     # Get max iterations from environment variable or use default
     max_iterations = int(os.getenv("PATCHPAL_MAX_ITERATIONS", "100"))
@@ -238,6 +255,10 @@ Supported models: Any LiteLLM-supported model
     print("=" * 80)
     print(f"\nUsing model: {model_id}")
+    # Show custom tools info if any were loaded
+    if custom_tools_message:
+        print(custom_tools_message)
     # Show require-permission-for-all indicator if active
     if args.require_permission_for_all:
         print("\033[1;33m🔒 Permission required for ALL operations (including reads)\033[0m")
@@ -380,6 +401,55 @@ Supported models: Any LiteLLM-supported model
                     total_tokens = agent.cumulative_input_tokens + agent.cumulative_output_tokens
                     print(f"  Total tokens: {total_tokens:,}")
+                    # Show cache statistics if available (Anthropic/Bedrock prompt caching)
+                    has_cache_stats = (
+                        agent.cumulative_cache_creation_tokens > 0
+                        or agent.cumulative_cache_read_tokens > 0
+                    )
+                    if has_cache_stats:
+                        print("\n  \033[1;36mPrompt Caching Statistics\033[0m")
+                        print(f"  Cache write tokens: {agent.cumulative_cache_creation_tokens:,}")
+                        print(f"  Cache read tokens: {agent.cumulative_cache_read_tokens:,}")
+                        # Calculate cache hit rate
+                        if agent.cumulative_input_tokens > 0:
+                            cache_hit_rate = (
+                                agent.cumulative_cache_read_tokens / agent.cumulative_input_tokens
+                            ) * 100
+                            print(f"  Cache hit rate: {cache_hit_rate:.1f}%")
+                        # Show cost-adjusted input tokens (cache reads cost less)
+                        # Note: This is an approximation - actual pricing varies by model
+                        # For Anthropic: cache writes = 1.25x, cache reads = 0.1x, regular = 1x
+                        if "anthropic" in model_id.lower() or "claude" in model_id.lower():
+                            # Break down: cumulative_input = non_cached + cache_read + cache_write
+                            non_cached_tokens = (
+                                agent.cumulative_input_tokens
+                                - agent.cumulative_cache_read_tokens
+                                - agent.cumulative_cache_creation_tokens
+                            )
+                            # Approximate cost-equivalent tokens (cache reads cost 10%, cache writes cost 125%)
+                            cost_adjusted = (
+                                non_cached_tokens
+                                + (agent.cumulative_cache_read_tokens * 0.1)
+                                + (agent.cumulative_cache_creation_tokens * 1.25)
+                            )
+                            savings_pct = (
+                                (
+                                    (agent.cumulative_input_tokens - cost_adjusted)
+                                    / agent.cumulative_input_tokens
+                                    * 100
+                                )
+                                if agent.cumulative_input_tokens > 0
+                                else 0
+                            )
+                            print(
+                                f"  Cost-adjusted input tokens: {cost_adjusted:,.0f} (~{savings_pct:.0f}% savings)"
+                            )
+                            print(
+                                "  \033[2m(Cache reads cost 10% of base price, writes cost 125% of base price)\033[0m"
+                            )
                 print("=" * 70 + "\n")
                 continue

patchpal 0.4.5__py3-none-any.whl → 0.7.1__py3-none-any.whl

patchpal 0.4.5py3-none-any.whl → 0.7.1py3-none-any.whl