PyPI - patchpal - Versions diffs - 0.22.3__tar.gz → 0.22.5__tar.gz - Mend

patchpal 0.22.3tar.gz → 0.22.5tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (67) hide show

{patchpal-0.22.3/patchpal.egg-info → patchpal-0.22.5}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: patchpal
-Version: 0.22.3
+Version: 0.22.5
 Summary: An agentic coding and automation assistant, supporting both local and cloud LLMs
 Author: PatchPal Contributors
 License-Expression: Apache-2.0

{patchpal-0.22.3 → patchpal-0.22.5}/patchpal/__init__.py RENAMED Viewed

@@ -1,6 +1,6 @@
 """PatchPal - An open-source Claude Code clone implemented purely in Python."""
-__version__ = "0.22.3"
+__version__ = "0.22.5"
 from patchpal.agent import create_agent, create_react_agent
 from patchpal.cli.autopilot import autopilot_loop

{patchpal-0.22.3 → patchpal-0.22.5}/patchpal/agent/function_calling.py RENAMED Viewed

@@ -741,6 +741,7 @@ It's currently empty (just the template). The file is automatically loaded at se
                 response = litellm.completion(
                     model=self.model_id,
                     messages=messages,
+                    max_tokens=32000,  # Explicit output token limit for predictable context usage
                     timeout=LLM_TIMEOUT,
                     **self.litellm_kwargs,
                 )
@@ -957,10 +958,8 @@ It's currently empty (just the template). The file is automatically loaded at se
         # Check for compaction BEFORE starting work
         # This ensures we never compact mid-execution and lose tool results
-        # Use last_prompt_tokens from previous API call for accurate check (includes cache operations)
-        if self.enable_auto_compact and self.context_manager.needs_compaction(
-            self.messages, actual_prompt_tokens=self.last_prompt_tokens
-        ):
+        # Always estimates current messages to avoid staleness issues (no actual_prompt_tokens)
+        if self.enable_auto_compact and self.context_manager.needs_compaction(self.messages):
             self._perform_auto_compaction()
         # Agent loop with interrupt handling
@@ -1080,6 +1079,7 @@ It's currently empty (just the template). The file is automatically loaded at se
                         "messages": messages,
                         "tools": tools,
                         "tool_choice": "auto",
+                        "max_tokens": 32000,  # Explicit output token limit for predictable context usage
                         "timeout": LLM_TIMEOUT,
                         "stream": stream,
                         **self.litellm_kwargs,

{patchpal-0.22.3 → patchpal-0.22.5}/patchpal/context.py RENAMED Viewed

@@ -257,7 +257,10 @@ Be comprehensive but concise. The goal is to continue work seamlessly without lo
         self.system_prompt = system_prompt
         self.estimator = TokenEstimator(model_id)
         self.context_limit = self._get_context_limit()
-        self.output_reserve = 4_096  # Reserve tokens for model output
+        # Reserve 16% of context for output (min 4K, max 32K)
+        # This ensures older models like GPT-4 (8K) get 1.28K reserve
+        # while modern models get full 32K reserve
+        self.output_reserve = min(32_000, max(4_000, int(self.context_limit * 0.16)))
     def _get_context_limit(self) -> int:
         """Get context limit for model.
@@ -321,30 +324,31 @@ Be comprehensive but concise. The goal is to continue work seamlessly without lo
     ) -> bool:
         """Check if context window needs compaction.
-        Supports both reactive (preferred) and proactive (fallback) approaches:
-        - Reactive: Use actual_prompt_tokens from latest API response
-        - Proactive: Estimate tokens if actual_prompt_tokens not available
+        ALWAYS estimates current messages to avoid staleness issues when predicting
+        whether the NEXT API call will overflow. Using actual_prompt_tokens from a
+        previous call can cause false negatives when large messages are added between
+        the last API call and the compaction check.
-        The reactive approach is preferred as it uses actual token counts from the LLM,
-        avoiding the need for tiktoken or other estimation methods.
+        Example of staleness bug (fixed):
+        - Previous API call: 120K tokens (60% usage)
+        - User pastes huge changelog: +90K tokens
+        - Total: 210K tokens (exceeds 200K limit)
+        - Bug: If we used actual_prompt_tokens=120K, we'd think we're at 60%
+        - Fix: Always re-estimate to see the 210K total
+        The actual_prompt_tokens parameter is kept for API compatibility but ignored
+        for compaction decisions. Use get_usage_stats() for display purposes where
+        actual tokens are appropriate (staleness OK for showing recent stats).
         Args:
             messages: Current message history
-            actual_prompt_tokens: Optional actual prompt token count from latest API response
+            actual_prompt_tokens: IGNORED - kept for API compatibility only
         Returns:
             True if compaction is needed
         """
-        # Reactive approach (preferred): use actual token counts from API response
-        if actual_prompt_tokens is not None:
-            # Add output reserve to account for response tokens
-            total_tokens = actual_prompt_tokens + self.output_reserve
-            usage_ratio = total_tokens / self.context_limit
-            return usage_ratio >= self.COMPACT_THRESHOLD
-        # Proactive approach (fallback): estimate tokens when API data not available
-        # This uses character-based estimation (3 chars per token) which works
-        # reliably without requiring tiktoken or network access
+        # ALWAYS estimate current messages - never use stale actual_prompt_tokens
+        # This ensures we detect large message additions that happen between API calls
         # Note: Dynamic date/time message adds ~30 tokens on each LLM call
         system_tokens = self.estimator.estimate_tokens(self.system_prompt)
         datetime_tokens = 30  # Approximate size of dynamic date/time message

{patchpal-0.22.3 → patchpal-0.22.5/patchpal.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: patchpal
-Version: 0.22.3
+Version: 0.22.5
 Summary: An agentic coding and automation assistant, supporting both local and cloud LLMs
 Author: PatchPal Contributors
 License-Expression: Apache-2.0