PyPI - patchpal - Versions diffs - 0.6.0__py3-none-any.whl → 0.8.0__py3-none-any.whl - Mend

patchpal 0.6.0py3-none-any.whl → 0.8.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

patchpal/__init__.py +1 -1
patchpal/agent.py +216 -3
patchpal/cli.py +167 -0
patchpal/tools.py +4 -1
{patchpal-0.6.0.dist-info → patchpal-0.8.0.dist-info}/METADATA +37 -20
patchpal-0.8.0.dist-info/RECORD +15 -0
patchpal-0.6.0.dist-info/RECORD +0 -15
{patchpal-0.6.0.dist-info → patchpal-0.8.0.dist-info}/WHEEL +0 -0
{patchpal-0.6.0.dist-info → patchpal-0.8.0.dist-info}/entry_points.txt +0 -0
{patchpal-0.6.0.dist-info → patchpal-0.8.0.dist-info}/licenses/LICENSE +0 -0
{patchpal-0.6.0.dist-info → patchpal-0.8.0.dist-info}/top_level.txt +0 -0

patchpal/__init__.py CHANGED Viewed

@@ -1,6 +1,6 @@
 """PatchPal - An open-source Claude Code clone implemented purely in Python."""
-__version__ = "0.6.0"
+__version__ = "0.8.0"
 from patchpal.agent import create_agent
 from patchpal.tools import (

patchpal/agent.py CHANGED Viewed

@@ -867,6 +867,14 @@ class PatchPalAgent:
         self.cumulative_input_tokens = 0
         self.cumulative_output_tokens = 0
+        # Track cache-related tokens (for Anthropic/Bedrock models with prompt caching)
+        self.cumulative_cache_creation_tokens = 0
+        self.cumulative_cache_read_tokens = 0
+        # Track cumulative costs across all LLM calls
+        self.cumulative_cost = 0.0
+        self.last_message_cost = 0.0
         # LiteLLM settings for models that need parameter dropping
         self.litellm_kwargs = {}
         if self.model_id.startswith("bedrock/"):
@@ -878,6 +886,26 @@ class PatchPalAgent:
             # Custom OpenAI-compatible servers (vLLM, etc.) often don't support all parameters
             self.litellm_kwargs["drop_params"] = True
+    def _prune_tool_outputs_inline(self, max_chars: int, truncation_message: str) -> int:
+        """Unified pruning function for tool outputs.
+        Args:
+            max_chars: Maximum characters to keep per tool output
+            truncation_message: Message to append after truncation
+        Returns:
+            Number of characters pruned
+        """
+        pruned_chars = 0
+        for msg in self.messages:
+            if msg.get("role") == "tool" and msg.get("content"):
+                content_size = len(str(msg["content"]))
+                if content_size > max_chars:
+                    original_size = content_size
+                    msg["content"] = str(msg["content"])[:max_chars] + truncation_message
+                    pruned_chars += original_size - len(msg["content"])
+        return pruned_chars
     def _perform_auto_compaction(self):
         """Perform automatic context window compaction.
@@ -886,10 +914,32 @@ class PatchPalAgent:
         """
         # Don't compact if we have very few messages - compaction summary
         # could be longer than the messages being removed
-        if len(self.messages) < 5:
+        # Instead, use aggressive pruning since high capacity with few messages
+        # indicates large tool outputs rather than conversation depth
+        if len(self.messages) < 10:
             print(
-                f"\033[2m   Skipping compaction - only {len(self.messages)} messages (need at least 5 for effective compaction)\033[0m"
+                f"\033[2m   Only {len(self.messages)} messages - using aggressive pruning instead of summarization\033[0m"
             )
+            # Aggressively truncate all large tool outputs (5K chars)
+            pruned_chars = self._prune_tool_outputs_inline(
+                max_chars=5_000,
+                truncation_message="\n\n[... content truncated during compaction. Use read_lines or grep_code for targeted access ...]",
+            )
+            stats_after = self.context_manager.get_usage_stats(self.messages)
+            if pruned_chars > 0:
+                print(
+                    f"\033[1;32m✓ Context reduced to {stats_after['usage_percent']}% through aggressive pruning (removed ~{pruned_chars:,} chars)\033[0m\n"
+                )
+            else:
+                print(
+                    f"\033[1;33m⚠️  No large tool outputs to prune. Context at {stats_after['usage_percent']}%.\033[0m"
+                )
+                print("\033[1;33m   Consider using '/clear' to start fresh.\033[0m\n")
+            # Update tracker to prevent immediate re-compaction
+            self._last_compaction_message_count = len(self.messages)
             return
         # Prevent compaction loops - don't compact again if we just did
@@ -936,6 +986,43 @@ class PatchPalAgent:
                 return
         # Phase 2: Full compaction needed
+        # EMERGENCY: If context is at or over capacity (≥100%), do aggressive pruning first
+        # Otherwise the summarization request itself will exceed context limits
+        stats_after_prune = self.context_manager.get_usage_stats(self.messages)
+        if stats_after_prune["usage_ratio"] >= 1.0:
+            print(
+                f"\033[1;31m   ⚠️  Context at or over capacity ({stats_after_prune['usage_percent']}%)!\033[0m"
+            )
+            print(
+                "\033[2m   Emergency: Aggressively pruning recent large tool outputs...\033[0m",
+                flush=True,
+            )
+            # Truncate large tool outputs (10K chars - less aggressive than 5K for few-messages case)
+            emergency_pruned = self._prune_tool_outputs_inline(
+                max_chars=10_000,
+                truncation_message="\n\n[... content truncated due to context window limits ...]",
+            )
+            if emergency_pruned > 0:
+                print(
+                    f"\033[2m   Emergency pruned ~{emergency_pruned:,} chars from large tool outputs\033[0m",
+                    flush=True,
+                )
+                stats_after_emergency = self.context_manager.get_usage_stats(self.messages)
+                print(
+                    f"\033[2m   Context now at {stats_after_emergency['usage_percent']}% capacity\033[0m",
+                    flush=True,
+                )
+                # If still over 150%, give up and recommend /clear
+                if stats_after_emergency["usage_ratio"] > 1.5:
+                    print(
+                        f"\033[1;31m✗ Context still too large for compaction ({stats_after_emergency['usage_percent']}%)\033[0m"
+                    )
+                    print("\033[1;33m   Please use '/clear' to start a fresh session.\033[0m\n")
+                    return
         print("\033[2m   Generating conversation summary...\033[0m", flush=True)
         try:
@@ -958,6 +1045,22 @@ class PatchPalAgent:
                         self.cumulative_input_tokens += response.usage.prompt_tokens
                     if hasattr(response.usage, "completion_tokens"):
                         self.cumulative_output_tokens += response.usage.completion_tokens
+                    # Track cache statistics (Anthropic/Bedrock prompt caching)
+                    if (
+                        hasattr(response.usage, "cache_creation_input_tokens")
+                        and response.usage.cache_creation_input_tokens
+                    ):
+                        self.cumulative_cache_creation_tokens += (
+                            response.usage.cache_creation_input_tokens
+                        )
+                    if (
+                        hasattr(response.usage, "cache_read_input_tokens")
+                        and response.usage.cache_read_input_tokens
+                    ):
+                        self.cumulative_cache_read_tokens += response.usage.cache_read_input_tokens
+                # Track cost from compaction call
+                self._calculate_cost(response)
                 return response
@@ -1016,6 +1119,72 @@ class PatchPalAgent:
                 "\033[1;33m   Continuing without compaction. Consider starting a new session.\033[0m\n"
             )
+    def _compute_cost_from_tokens(self, usage):
+        """Manually calculate cost from token usage using model pricing.
+        Args:
+            usage: The usage object from the LLM response
+        Returns:
+            float: The calculated cost in dollars
+        """
+        try:
+            model_info = litellm.get_model_info(self.model_id)
+            input_cost_per_token = model_info.get("input_cost_per_token", 0)
+            output_cost_per_token = model_info.get("output_cost_per_token", 0)
+            cost = 0.0
+            # Handle cache pricing for models that support it (e.g., Anthropic)
+            # Cache writes cost 1.25x, cache reads cost 0.1x of base price
+            cache_creation_tokens = 0
+            cache_read_tokens = 0
+            if hasattr(usage, "cache_creation_input_tokens") and usage.cache_creation_input_tokens:
+                cache_creation_tokens = usage.cache_creation_input_tokens
+                cost += cache_creation_tokens * input_cost_per_token * 1.25
+            if hasattr(usage, "cache_read_input_tokens") and usage.cache_read_input_tokens:
+                cache_read_tokens = usage.cache_read_input_tokens
+                cost += cache_read_tokens * input_cost_per_token * 0.1
+            # Regular input tokens (excluding cache tokens)
+            regular_input = usage.prompt_tokens - cache_creation_tokens - cache_read_tokens
+            cost += regular_input * input_cost_per_token
+            # Output tokens
+            cost += usage.completion_tokens * output_cost_per_token
+            return cost
+        except Exception:
+            # If pricing data is unavailable, return 0
+            return 0.0
+    def _calculate_cost(self, response):
+        """Calculate cost from LLM response and update cumulative tracking.
+        Args:
+            response: The LLM response object
+        Returns:
+            float: The calculated cost in dollars
+        """
+        try:
+            # Try litellm's built-in cost calculator first
+            cost = litellm.completion_cost(completion_response=response)
+        except Exception:
+            cost = 0.0
+        if not cost and hasattr(response, "usage") and response.usage:
+            # Fallback: manual calculation using model pricing
+            cost = self._compute_cost_from_tokens(response.usage)
+        if isinstance(cost, (int, float)) and cost > 0:
+            self.cumulative_cost += cost
+            self.last_message_cost = cost
+        return cost
     def run(self, user_message: str, max_iterations: int = 100) -> str:
         """Run the agent on a user message.
@@ -1131,6 +1300,22 @@ class PatchPalAgent:
                         self.cumulative_input_tokens += response.usage.prompt_tokens
                     if hasattr(response.usage, "completion_tokens"):
                         self.cumulative_output_tokens += response.usage.completion_tokens
+                    # Track cache statistics (Anthropic/Bedrock prompt caching)
+                    if (
+                        hasattr(response.usage, "cache_creation_input_tokens")
+                        and response.usage.cache_creation_input_tokens
+                    ):
+                        self.cumulative_cache_creation_tokens += (
+                            response.usage.cache_creation_input_tokens
+                        )
+                    if (
+                        hasattr(response.usage, "cache_read_input_tokens")
+                        and response.usage.cache_read_input_tokens
+                    ):
+                        self.cumulative_cache_read_tokens += response.usage.cache_read_input_tokens
+                # Track cost from this LLM call
+                self._calculate_cost(response)
             except Exception as e:
                 return f"Error calling model: {e}"
@@ -1337,12 +1522,40 @@ class PatchPalAgent:
                                 print(f"\033[1;31m✗ {tool_name}: {e}\033[0m")
                     # Add tool result to messages
+                    # Check if result is extremely large and might blow context
+                    result_str = str(tool_result)
+                    result_size = len(result_str)
+                    # Warn if result is > 100K chars (~33K tokens)
+                    if result_size > 100_000:
+                        print(
+                            f"\033[1;33m⚠️  Large tool output: {result_size:,} chars (~{result_size // 3:,} tokens)\033[0m"
+                        )
+                        # If result would push us WAY over capacity, truncate it
+                        current_stats = self.context_manager.get_usage_stats(self.messages)
+                        # Estimate tokens in this result
+                        result_tokens = self.context_manager.estimator.estimate_tokens(result_str)
+                        projected_ratio = (
+                            current_stats["total_tokens"] + result_tokens
+                        ) / current_stats["context_limit"]
+                        if projected_ratio > 1.5:  # Would exceed 150% capacity
+                            print(
+                                "\033[1;31m⚠️  Tool output would exceed context capacity! Truncating...\033[0m"
+                            )
+                            # Keep first 50K chars
+                            result_str = (
+                                result_str[:50_000]
+                                + "\n\n[... Output truncated to prevent context window overflow. Use read_lines or grep_code for targeted access ...]"
+                            )
                     self.messages.append(
                         {
                             "role": "tool",
                             "tool_call_id": tool_call.id,
                             "name": tool_name,
-                            "content": str(tool_result),
+                            "content": result_str,
                         }
                     )

patchpal/cli.py CHANGED Viewed

@@ -16,6 +16,116 @@ from patchpal.agent import create_agent
 from patchpal.tools import audit_logger
+def _format_cost(value: float) -> str:
+    """Format cost with smart precision.
+    Args:
+        value: Cost in dollars
+    Returns:
+        Formatted cost string (e.g., "0.0234" or "0.00145")
+    """
+    if value == 0:
+        return "0.00"
+    magnitude = abs(value)
+    if magnitude >= 0.01:
+        return f"{value:.2f}"
+    else:
+        # For very small costs, show more decimal places
+        import math
+        return f"{value:.{max(2, 2 - int(math.log10(magnitude)))}f}"
+def _print_cost_statistics(
+    agent, total_tokens: int, show_header: bool = False, show_disclaimer: bool = False
+):
+    """Print cost statistics section.
+    Args:
+        agent: PatchPalAgent instance
+        total_tokens: Total token count for calculating averages
+        show_header: If True, print a section header
+        show_disclaimer: If True, show disclaimer about checking provider bills
+    """
+    if agent.cumulative_cost > 0:
+        if show_header:
+            print("\n  \033[1;36mCost Statistics\033[0m")
+        print(f"  Session cost: ${_format_cost(agent.cumulative_cost)} (estimated)")
+        if show_disclaimer:
+            print(
+                "  \033[2m(Calculated from token counts - check provider bill for exact cost)\033[0m"
+            )
+        # Show cost breakdown if we have token data
+        if total_tokens > 0:
+            cost_per_1k = (agent.cumulative_cost / total_tokens) * 1000
+            print(f"  Average: ${_format_cost(cost_per_1k)} per 1K tokens")
+    elif agent.total_llm_calls > 0:
+        # Model might not have pricing data (e.g., local Ollama)
+        if show_header:
+            print()
+        print("  \033[2mCost tracking unavailable (no pricing data for this model)\033[0m")
+def _print_session_summary(agent, show_detailed: bool = False):
+    """Print session statistics summary.
+    Args:
+        agent: PatchPalAgent instance
+        show_detailed: If True, show detailed breakdown; if False, show compact summary
+    """
+    # Guard against missing attributes (e.g., in tests with mock agents)
+    if (
+        not hasattr(agent, "total_llm_calls")
+        or not isinstance(agent.total_llm_calls, int)
+        or agent.total_llm_calls == 0
+    ):
+        return
+    print("\n" + "=" * 70)
+    print("\033[1;36mSession Summary\033[0m")
+    print("=" * 70)
+    print(f"  LLM calls: {agent.total_llm_calls}")
+    # Show token usage if available
+    has_usage_info = (
+        hasattr(agent, "cumulative_input_tokens")
+        and hasattr(agent, "cumulative_output_tokens")
+        and (agent.cumulative_input_tokens > 0 or agent.cumulative_output_tokens > 0)
+    )
+    if has_usage_info:
+        total_tokens = agent.cumulative_input_tokens + agent.cumulative_output_tokens
+        print(f"  Total tokens: {total_tokens:,}")
+        # Show cache hit rate if caching was used
+        if (
+            hasattr(agent, "cumulative_cache_read_tokens")
+            and hasattr(agent, "cumulative_input_tokens")
+            and agent.cumulative_cache_read_tokens > 0
+        ):
+            cache_hit_rate = (
+                agent.cumulative_cache_read_tokens / agent.cumulative_input_tokens
+            ) * 100
+            print(f"  Cache hit rate: {cache_hit_rate:.1f}%")
+    # Show cost statistics
+    if has_usage_info:
+        total_tokens = agent.cumulative_input_tokens + agent.cumulative_output_tokens
+    else:
+        total_tokens = 0
+    if show_detailed:
+        _print_cost_statistics(agent, total_tokens, show_header=False, show_disclaimer=False)
+    else:
+        # Show cost if available (compact version)
+        if hasattr(agent, "cumulative_cost") and agent.cumulative_cost > 0:
+            print(f"  Session cost: ${_format_cost(agent.cumulative_cost)} (estimated)")
+    print("=" * 70)
 class SkillCompleter(Completer):
     """Completer for skill names when input starts with /"""
@@ -304,6 +414,9 @@ Supported models: Any LiteLLM-supported model
             # Check for exit commands
             if user_input.lower() in ["exit", "quit", "q"]:
+                # Show session statistics before exiting
+                _print_session_summary(agent, show_detailed=False)
                 print("\nGoodbye!")
                 break
@@ -401,6 +514,60 @@ Supported models: Any LiteLLM-supported model
                     total_tokens = agent.cumulative_input_tokens + agent.cumulative_output_tokens
                     print(f"  Total tokens: {total_tokens:,}")
+                    # Show cache statistics if available (Anthropic/Bedrock prompt caching)
+                    has_cache_stats = (
+                        agent.cumulative_cache_creation_tokens > 0
+                        or agent.cumulative_cache_read_tokens > 0
+                    )
+                    if has_cache_stats:
+                        print("\n  \033[1;36mPrompt Caching Statistics\033[0m")
+                        print(f"  Cache write tokens: {agent.cumulative_cache_creation_tokens:,}")
+                        print(f"  Cache read tokens: {agent.cumulative_cache_read_tokens:,}")
+                        # Calculate cache hit rate
+                        if agent.cumulative_input_tokens > 0:
+                            cache_hit_rate = (
+                                agent.cumulative_cache_read_tokens / agent.cumulative_input_tokens
+                            ) * 100
+                            print(f"  Cache hit rate: {cache_hit_rate:.1f}%")
+                        # Show cost-adjusted input tokens (cache reads cost less)
+                        # Note: This is an approximation - actual pricing varies by model
+                        # For Anthropic: cache writes = 1.25x, cache reads = 0.1x, regular = 1x
+                        if "anthropic" in model_id.lower() or "claude" in model_id.lower():
+                            # Break down: cumulative_input = non_cached + cache_read + cache_write
+                            non_cached_tokens = (
+                                agent.cumulative_input_tokens
+                                - agent.cumulative_cache_read_tokens
+                                - agent.cumulative_cache_creation_tokens
+                            )
+                            # Approximate cost-equivalent tokens (cache reads cost 10%, cache writes cost 125%)
+                            cost_adjusted = (
+                                non_cached_tokens
+                                + (agent.cumulative_cache_read_tokens * 0.1)
+                                + (agent.cumulative_cache_creation_tokens * 1.25)
+                            )
+                            savings_pct = (
+                                (
+                                    (agent.cumulative_input_tokens - cost_adjusted)
+                                    / agent.cumulative_input_tokens
+                                    * 100
+                                )
+                                if agent.cumulative_input_tokens > 0
+                                else 0
+                            )
+                            print(
+                                f"  Cost-adjusted input tokens: {cost_adjusted:,.0f} (~{savings_pct:.0f}% savings)"
+                            )
+                            print(
+                                "  \033[2m(Cache reads cost 10% of base price, writes cost 125% of base price)\033[0m"
+                            )
+                    # Show cost statistics if available
+                    _print_cost_statistics(
+                        agent, total_tokens, show_header=True, show_disclaimer=True
+                    )
                 print("=" * 70 + "\n")
                 continue

patchpal/tools.py CHANGED Viewed

@@ -80,7 +80,10 @@ CRITICAL_FILES = {
 }
 # Configuration
-MAX_FILE_SIZE = int(os.getenv("PATCHPAL_MAX_FILE_SIZE", 10 * 1024 * 1024))  # 10MB default
+# Reduced from 10MB to 500KB to prevent context window explosions
+# A 3.46MB file = ~1.15M tokens which exceeds most model context limits (128K-200K)
+# 500KB ≈ 166K tokens which is safe for most models
+MAX_FILE_SIZE = int(os.getenv("PATCHPAL_MAX_FILE_SIZE", 500 * 1024))  # 500KB default
 READ_ONLY_MODE = os.getenv("PATCHPAL_READ_ONLY", "false").lower() == "true"
 ALLOW_SENSITIVE = os.getenv("PATCHPAL_ALLOW_SENSITIVE", "false").lower() == "true"
 ENABLE_AUDIT_LOG = os.getenv("PATCHPAL_AUDIT_LOG", "true").lower() == "true"

{patchpal-0.6.0.dist-info → patchpal-0.8.0.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: patchpal
-Version: 0.6.0
+Version: 0.8.0
 Summary: A lean Claude Code clone in pure  Python
 Author: PatchPal Contributors
 License-Expression: Apache-2.0
@@ -129,7 +129,7 @@ export HOSTED_VLLM_API_KEY=token-abc123           # optional depending on your v
 patchpal
 # Use a specific model via command-line argument
-patchpal --model openai/gpt-4o  # or openai/gpt-5, anthropic/claude-opus-4-5 etc.
+patchpal --model openai/gpt-5.2  # or openai/gpt-5-mini, anthropic/claude-opus-4-5 etc.
 # Use vLLM (local)
 # Note: vLLM server must be started with --tool-call-parser and --enable-auto-tool-choice
@@ -143,7 +143,7 @@ export OLLAMA_CONTEXT_LENGTH=32768
 patchpal --model ollama_chat/qwen3:32b
 # Or set the model via environment variable
-export PATCHPAL_MODEL=openai/gpt-5
+export PATCHPAL_MODEL=openai/gpt-5.2
 patchpal
 ```
@@ -155,6 +155,8 @@ The agent has the following tools:
 ### File Operations
 - **read_file**: Read contents of files in the repository
+  - Limited to 500KB by default (configurable with `PATCHPAL_MAX_FILE_SIZE`)
+  - For larger files, use `read_lines` or `grep_code` for targeted access
 - **read_lines**: Read specific line ranges from a file without loading the entire file
   - Example: `read_lines("app.py", 100, 150)` - read lines 100-150
   - More efficient than read_file when you only need a few lines
@@ -281,7 +283,7 @@ cd patchpal
 # Copy examples to your personal skills directory
 cp -r examples/skills/commit ~/.patchpal/skills/
 cp -r examples/skills/review ~/.patchpal/skills/
-cp -r examples/skills/add-tests ~/.patchpal/skills/
+cp -r examples/skills/skill-creator ~/.patchpal/skills/
 ```
 **View examples online:**
@@ -368,18 +370,28 @@ Custom tools are Python functions with specific requirements:
 ```python
 # ~/.patchpal/tools/my_tools.py
-def add(x: int, y: int) -> str:
-    """Add two numbers together.
+def calculator(x: int, y: int, operation: str = "add") -> str:
+    """Perform basic arithmetic operations.
     Args:
         x: First number
         y: Second number
+        operation: Operation to perform (add, subtract, multiply, divide)
     Returns:
-        The sum as a string
+        Result as a string
     """
-    result = x + y
-    return f"{x} + {y} = {result}"
+    if operation == "add":
+        return f"{x} + {y} = {x + y}"
+    elif operation == "subtract":
+        return f"{x} - {y} = {x - y}"
+    elif operation == "multiply":
+        return f"{x} * {y} = {x * y}"
+    elif operation == "divide":
+        if y == 0:
+            return "Error: Cannot divide by zero"
+        return f"{x} / {y} = {x / y}"
+    return "Unknown operation"
 def convert_currency(amount: float, from_currency: str, to_currency: str) -> str:
@@ -407,11 +419,15 @@ Once loaded, the agent calls your custom tools automatically:
 ```bash
 You: What's 15 + 27?
-Agent: [Calls the add tool]
+Agent: [Calls calculator(15, 27, "add")]
         15 + 27 = 42
+You: What's 100 divided by 4?
+Agent: [Calls calculator(100, 4, "divide")]
+        100 / 4 = 25
 You: Convert 100 USD to EUR
-Agent: [Calls convert_currency tool]
+Agent: [Calls convert_currency(100, "USD", "EUR")]
         100 USD = 85.00 EUR
 ```
@@ -514,14 +530,14 @@ PatchPal supports any LiteLLM-compatible model. You can configure the model in t
 ### 1. Command-line Argument
 ```bash
-patchpal --model openai/gpt-5
+patchpal --model openai/gpt-5.2
 patchpal --model anthropic/claude-sonnet-4-5
 patchpal --model hosted_vllm/openai/gpt-oss-20b # local model - no API charges
 ```
 ### 2. Environment Variable
 ```bash
-export PATCHPAL_MODEL=openai/gpt-5
+export PATCHPAL_MODEL=openai/gpt-5.2
 patchpal
 ```
@@ -533,7 +549,7 @@ If no model is specified, PatchPal uses `anthropic/claude-sonnet-4-5` (Claude So
 PatchPal works with any model supported by LiteLLM, including:
 - **Anthropic** (Recommended): `anthropic/claude-sonnet-4-5`, `anthropic/claude-opus-4-5`, `anthropic/claude-3-7-sonnet-latest`
-- **OpenAI**: `openai/gpt-5`, `openai/gpt-4o`
+- **OpenAI**: `openai/gpt-5.2`, `openai/gpt-5-mini`
 - **AWS Bedrock**: `bedrock/anthropic.claude-sonnet-4-5-v1:0`
 - **vLLM (Local)** (Recommended for local): See vLLM section below for setup
 - **Ollama (Local)**:  See Ollama section below for setup
@@ -1033,7 +1049,7 @@ PatchPal can be configured through `PATCHPAL_*` environment variables to customi
 ### Model Selection
 ```bash
-export PATCHPAL_MODEL=openai/gpt-4o          # Override default model
+export PATCHPAL_MODEL=openai/gpt-5.2          # Override default model
 # Priority: CLI arg > PATCHPAL_MODEL env var > default (anthropic/claude-sonnet-4-5)
 ```
@@ -1045,11 +1061,12 @@ export PATCHPAL_REQUIRE_PERMISSION=true      # Prompt before executing commands/
                                               # ⚠️  WARNING: Setting to false disables prompts - only use in trusted environments
 # File Safety
-export PATCHPAL_MAX_FILE_SIZE=10485760       # Maximum file size in bytes for read/write (default: 10MB)
+export PATCHPAL_MAX_FILE_SIZE=512000         # Maximum file size in bytes for read/write (default: 500KB)
+                                             # Reduced from 10MB to prevent context window explosions
 export PATCHPAL_READ_ONLY=true               # Prevent ALL file modifications (default: false)
-                                              # Useful for: code review, exploration, security audits
+                                             # Useful for: code review, exploration, security audits
 export PATCHPAL_ALLOW_SENSITIVE=true         # Allow access to .env, credentials (default: false - blocked)
-                                              # Only enable with test/dummy credentials
+                                             # Only enable with test/dummy credentials
 # Command Safety
 export PATCHPAL_ALLOW_SUDO=true              # Allow sudo/privilege escalation (default: false - blocked)
@@ -1191,7 +1208,7 @@ PatchPal includes comprehensive security protections enabled by default:
 **Critical Security:**
 - **Permission prompts**: Agent asks for permission before executing commands or modifying files (like Claude Code)
 - **Sensitive file protection**: Blocks access to `.env`, credentials, API keys
-- **File size limits**: Prevents OOM with configurable size limits (10MB default)
+- **File size limits**: Prevents OOM and context explosions with configurable size limits (500KB default)
 - **Binary file detection**: Blocks reading non-text files
 - **Critical file warnings**: Warns when modifying infrastructure files (package.json, Dockerfile, etc.)
 - **Read-only mode**: Optional mode that prevents all modifications
@@ -1453,7 +1470,7 @@ When using cloud LLM providers (Anthropic, OpenAI, etc.), token usage directly i
 - Use less expensive models for routine tasks:
   ```bash
   patchpal --model anthropic/claude-3-7-sonnet-latest  # Cheaper than claude-sonnet-4-5
-  patchpal --model openai/gpt-4o-mini                  # Cheaper than gpt-4o
+  patchpal --model openai/gpt-5-mini                   # Cheaper than gpt-5.2
   ```
 - Reserve premium models for complex reasoning tasks

patchpal-0.8.0.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,15 @@
+patchpal/__init__.py,sha256=lQlcUScZhf19wSBOBVKGHQzilSNoZC4JuaP5bKuE8Yw,606
+patchpal/agent.py,sha256=u5x4wOv4ComTWWKT9TeoHtXh6OkOcZyP89CGwdUAlPE,69601
+patchpal/cli.py,sha256=a-X57sSKLqkA5CB919-cL__KE6dHl1Q340BMOSTjxQg,31155
+patchpal/context.py,sha256=hdTUvyAXXUP47JY1Q3YJDU7noGAcHuBGlNuU272Fjp4,14831
+patchpal/permissions.py,sha256=pVlzit2KFmCpfcbHrHhjPA0LPka04wOtaQdZCf3CCa0,10781
+patchpal/skills.py,sha256=ESLPHkDI8DH4mnAbN8mIcbZ6Bis4vCcqS_NjlYPNCOs,3926
+patchpal/system_prompt.md,sha256=LQzcILr41s65hk7JjaX_WzjUHBHCazVSrx_F_ErqTmA,10850
+patchpal/tool_schema.py,sha256=dGEGYV160G9c7EnSMtnbQ_mYuoR1n6PHHE8T20BriYE,8357
+patchpal/tools.py,sha256=eZ5eh8DKYyqO95Vdu-tn1_6-W6OsBbY4JL5APGyp-tc,94018
+patchpal-0.8.0.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+patchpal-0.8.0.dist-info/METADATA,sha256=t8DaEZQTeEXAp4Ndk7vHwqqJswvo2LnSXcMAvM2LQ6s,58247
+patchpal-0.8.0.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
+patchpal-0.8.0.dist-info/entry_points.txt,sha256=XcuQikKu5i8Sd8AfHLuKxSE2RWByInTcQgWpP61sr48,47
+patchpal-0.8.0.dist-info/top_level.txt,sha256=YWgv2F-_PIHCu-sF3AF8N1ut5_FbOT-VV6HB70pGSQ8,9
+patchpal-0.8.0.dist-info/RECORD,,

patchpal-0.6.0.dist-info/RECORD DELETED Viewed

@@ -1,15 +0,0 @@
-patchpal/__init__.py,sha256=S3dYO3L8dSQG2Eaosbu4Pbdq5eTxXLmmvxSzh-TIPiI,606
-patchpal/agent.py,sha256=ayMkZUoohUsf5Tz4esBjOPZUvBT5n-ijOzoOp3c9LAA,59719
-patchpal/cli.py,sha256=6Imrd4hGupIrTi9jnnfwvraNZ_Pq0VJxfo6aSjLRoCY,24131
-patchpal/context.py,sha256=hdTUvyAXXUP47JY1Q3YJDU7noGAcHuBGlNuU272Fjp4,14831
-patchpal/permissions.py,sha256=pVlzit2KFmCpfcbHrHhjPA0LPka04wOtaQdZCf3CCa0,10781
-patchpal/skills.py,sha256=ESLPHkDI8DH4mnAbN8mIcbZ6Bis4vCcqS_NjlYPNCOs,3926
-patchpal/system_prompt.md,sha256=LQzcILr41s65hk7JjaX_WzjUHBHCazVSrx_F_ErqTmA,10850
-patchpal/tool_schema.py,sha256=dGEGYV160G9c7EnSMtnbQ_mYuoR1n6PHHE8T20BriYE,8357
-patchpal/tools.py,sha256=YAUX2-8BBqjZEadIWlUdO-KV2-WHGazgKdMHkYRAExI,93819
-patchpal-0.6.0.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-patchpal-0.6.0.dist-info/METADATA,sha256=hjleiaXTNaavuW0OygY1XPdbuflYxMQb0hAWw9pGWPw,57384
-patchpal-0.6.0.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
-patchpal-0.6.0.dist-info/entry_points.txt,sha256=XcuQikKu5i8Sd8AfHLuKxSE2RWByInTcQgWpP61sr48,47
-patchpal-0.6.0.dist-info/top_level.txt,sha256=YWgv2F-_PIHCu-sF3AF8N1ut5_FbOT-VV6HB70pGSQ8,9
-patchpal-0.6.0.dist-info/RECORD,,

{patchpal-0.6.0.dist-info → patchpal-0.8.0.dist-info}/WHEEL RENAMED Viewed

File without changes

{patchpal-0.6.0.dist-info → patchpal-0.8.0.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{patchpal-0.6.0.dist-info → patchpal-0.8.0.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{patchpal-0.6.0.dist-info → patchpal-0.8.0.dist-info}/top_level.txt RENAMED Viewed

File without changes

patchpal 0.6.0__py3-none-any.whl → 0.8.0__py3-none-any.whl

patchpal 0.6.0py3-none-any.whl → 0.8.0py3-none-any.whl