patchpal 0.4.5__py3-none-any.whl → 0.7.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
patchpal/__init__.py CHANGED
@@ -1,6 +1,6 @@
1
1
  """PatchPal - An open-source Claude Code clone implemented purely in Python."""
2
2
 
3
- __version__ = "0.4.5"
3
+ __version__ = "0.7.1"
4
4
 
5
5
  from patchpal.agent import create_agent
6
6
  from patchpal.tools import (
patchpal/agent.py CHANGED
@@ -811,12 +811,17 @@ def _apply_prompt_caching(messages: List[Dict[str, Any]], model_id: str) -> List
811
811
  class PatchPalAgent:
812
812
  """Simple agent that uses LiteLLM for tool calling."""
813
813
 
814
- def __init__(self, model_id: str = "anthropic/claude-sonnet-4-5"):
814
+ def __init__(self, model_id: str = "anthropic/claude-sonnet-4-5", custom_tools=None):
815
815
  """Initialize the agent.
816
816
 
817
817
  Args:
818
818
  model_id: LiteLLM model identifier
819
+ custom_tools: Optional list of Python functions to add as tools
819
820
  """
821
+ # Store custom tools
822
+ self.custom_tools = custom_tools or []
823
+ self.custom_tool_funcs = {func.__name__: func for func in self.custom_tools}
824
+
820
825
  # Convert ollama/ to ollama_chat/ for LiteLLM compatibility
821
826
  if model_id.startswith("ollama/"):
822
827
  model_id = model_id.replace("ollama/", "ollama_chat/", 1)
@@ -862,6 +867,10 @@ class PatchPalAgent:
862
867
  self.cumulative_input_tokens = 0
863
868
  self.cumulative_output_tokens = 0
864
869
 
870
+ # Track cache-related tokens (for Anthropic/Bedrock models with prompt caching)
871
+ self.cumulative_cache_creation_tokens = 0
872
+ self.cumulative_cache_read_tokens = 0
873
+
865
874
  # LiteLLM settings for models that need parameter dropping
866
875
  self.litellm_kwargs = {}
867
876
  if self.model_id.startswith("bedrock/"):
@@ -873,6 +882,26 @@ class PatchPalAgent:
873
882
  # Custom OpenAI-compatible servers (vLLM, etc.) often don't support all parameters
874
883
  self.litellm_kwargs["drop_params"] = True
875
884
 
885
+ def _prune_tool_outputs_inline(self, max_chars: int, truncation_message: str) -> int:
886
+ """Unified pruning function for tool outputs.
887
+
888
+ Args:
889
+ max_chars: Maximum characters to keep per tool output
890
+ truncation_message: Message to append after truncation
891
+
892
+ Returns:
893
+ Number of characters pruned
894
+ """
895
+ pruned_chars = 0
896
+ for msg in self.messages:
897
+ if msg.get("role") == "tool" and msg.get("content"):
898
+ content_size = len(str(msg["content"]))
899
+ if content_size > max_chars:
900
+ original_size = content_size
901
+ msg["content"] = str(msg["content"])[:max_chars] + truncation_message
902
+ pruned_chars += original_size - len(msg["content"])
903
+ return pruned_chars
904
+
876
905
  def _perform_auto_compaction(self):
877
906
  """Perform automatic context window compaction.
878
907
 
@@ -881,10 +910,32 @@ class PatchPalAgent:
881
910
  """
882
911
  # Don't compact if we have very few messages - compaction summary
883
912
  # could be longer than the messages being removed
884
- if len(self.messages) < 5:
913
+ # Instead, use aggressive pruning since high capacity with few messages
914
+ # indicates large tool outputs rather than conversation depth
915
+ if len(self.messages) < 10:
885
916
  print(
886
- f"\033[2m Skipping compaction - only {len(self.messages)} messages (need at least 5 for effective compaction)\033[0m"
917
+ f"\033[2m Only {len(self.messages)} messages - using aggressive pruning instead of summarization\033[0m"
887
918
  )
919
+
920
+ # Aggressively truncate all large tool outputs (5K chars)
921
+ pruned_chars = self._prune_tool_outputs_inline(
922
+ max_chars=5_000,
923
+ truncation_message="\n\n[... content truncated during compaction. Use read_lines or grep_code for targeted access ...]",
924
+ )
925
+
926
+ stats_after = self.context_manager.get_usage_stats(self.messages)
927
+ if pruned_chars > 0:
928
+ print(
929
+ f"\033[1;32m✓ Context reduced to {stats_after['usage_percent']}% through aggressive pruning (removed ~{pruned_chars:,} chars)\033[0m\n"
930
+ )
931
+ else:
932
+ print(
933
+ f"\033[1;33m⚠️ No large tool outputs to prune. Context at {stats_after['usage_percent']}%.\033[0m"
934
+ )
935
+ print("\033[1;33m Consider using '/clear' to start fresh.\033[0m\n")
936
+
937
+ # Update tracker to prevent immediate re-compaction
938
+ self._last_compaction_message_count = len(self.messages)
888
939
  return
889
940
 
890
941
  # Prevent compaction loops - don't compact again if we just did
@@ -931,6 +982,43 @@ class PatchPalAgent:
931
982
  return
932
983
 
933
984
  # Phase 2: Full compaction needed
985
+ # EMERGENCY: If context is at or over capacity (≥100%), do aggressive pruning first
986
+ # Otherwise the summarization request itself will exceed context limits
987
+ stats_after_prune = self.context_manager.get_usage_stats(self.messages)
988
+ if stats_after_prune["usage_ratio"] >= 1.0:
989
+ print(
990
+ f"\033[1;31m ⚠️ Context at or over capacity ({stats_after_prune['usage_percent']}%)!\033[0m"
991
+ )
992
+ print(
993
+ "\033[2m Emergency: Aggressively pruning recent large tool outputs...\033[0m",
994
+ flush=True,
995
+ )
996
+
997
+ # Truncate large tool outputs (10K chars - less aggressive than 5K for few-messages case)
998
+ emergency_pruned = self._prune_tool_outputs_inline(
999
+ max_chars=10_000,
1000
+ truncation_message="\n\n[... content truncated due to context window limits ...]",
1001
+ )
1002
+
1003
+ if emergency_pruned > 0:
1004
+ print(
1005
+ f"\033[2m Emergency pruned ~{emergency_pruned:,} chars from large tool outputs\033[0m",
1006
+ flush=True,
1007
+ )
1008
+ stats_after_emergency = self.context_manager.get_usage_stats(self.messages)
1009
+ print(
1010
+ f"\033[2m Context now at {stats_after_emergency['usage_percent']}% capacity\033[0m",
1011
+ flush=True,
1012
+ )
1013
+
1014
+ # If still over 150%, give up and recommend /clear
1015
+ if stats_after_emergency["usage_ratio"] > 1.5:
1016
+ print(
1017
+ f"\033[1;31m✗ Context still too large for compaction ({stats_after_emergency['usage_percent']}%)\033[0m"
1018
+ )
1019
+ print("\033[1;33m Please use '/clear' to start a fresh session.\033[0m\n")
1020
+ return
1021
+
934
1022
  print("\033[2m Generating conversation summary...\033[0m", flush=True)
935
1023
 
936
1024
  try:
@@ -953,6 +1041,19 @@ class PatchPalAgent:
953
1041
  self.cumulative_input_tokens += response.usage.prompt_tokens
954
1042
  if hasattr(response.usage, "completion_tokens"):
955
1043
  self.cumulative_output_tokens += response.usage.completion_tokens
1044
+ # Track cache statistics (Anthropic/Bedrock prompt caching)
1045
+ if (
1046
+ hasattr(response.usage, "cache_creation_input_tokens")
1047
+ and response.usage.cache_creation_input_tokens
1048
+ ):
1049
+ self.cumulative_cache_creation_tokens += (
1050
+ response.usage.cache_creation_input_tokens
1051
+ )
1052
+ if (
1053
+ hasattr(response.usage, "cache_read_input_tokens")
1054
+ and response.usage.cache_read_input_tokens
1055
+ ):
1056
+ self.cumulative_cache_read_tokens += response.usage.cache_read_input_tokens
956
1057
 
957
1058
  return response
958
1059
 
@@ -1029,6 +1130,67 @@ class PatchPalAgent:
1029
1130
  if self.enable_auto_compact and self.context_manager.needs_compaction(self.messages):
1030
1131
  self._perform_auto_compaction()
1031
1132
 
1133
+ # Agent loop with interrupt handling
1134
+ try:
1135
+ return self._run_agent_loop(max_iterations)
1136
+ except KeyboardInterrupt:
1137
+ # Clean up conversation state if interrupted mid-execution
1138
+ self._cleanup_interrupted_state()
1139
+ raise # Re-raise so CLI can handle it
1140
+
1141
+ def _cleanup_interrupted_state(self):
1142
+ """Clean up conversation state after KeyboardInterrupt.
1143
+
1144
+ If the last message is an assistant message with tool_calls but no
1145
+ corresponding tool responses, we need to either remove the message
1146
+ or add error responses to maintain valid conversation structure.
1147
+ """
1148
+ if not self.messages:
1149
+ return
1150
+
1151
+ last_msg = self.messages[-1]
1152
+
1153
+ # Check if last message is assistant with tool_calls
1154
+ if last_msg.get("role") == "assistant" and last_msg.get("tool_calls"):
1155
+ tool_calls = last_msg["tool_calls"]
1156
+
1157
+ # Check if we have tool responses for all tool_calls
1158
+ tool_call_ids = {tc.id for tc in tool_calls}
1159
+
1160
+ # Look for tool responses after this assistant message
1161
+ # (should be immediately following, but scan to be safe)
1162
+ response_ids = set()
1163
+ for msg in self.messages[self.messages.index(last_msg) + 1 :]:
1164
+ if msg.get("role") == "tool":
1165
+ response_ids.add(msg.get("tool_call_id"))
1166
+
1167
+ # If we're missing responses, add error responses for all tool calls
1168
+ if tool_call_ids != response_ids:
1169
+ missing_ids = tool_call_ids - response_ids
1170
+
1171
+ # Add error tool responses for the missing tool calls
1172
+ for tool_call in tool_calls:
1173
+ if tool_call.id in missing_ids:
1174
+ self.messages.append(
1175
+ {
1176
+ "role": "tool",
1177
+ "tool_call_id": tool_call.id,
1178
+ "name": tool_call.function.name,
1179
+ "content": "Error: Operation interrupted by user (Ctrl-C)",
1180
+ }
1181
+ )
1182
+
1183
+ def _run_agent_loop(self, max_iterations: int) -> str:
1184
+ """Internal method that runs the agent loop.
1185
+
1186
+ Separated from run() to enable proper interrupt handling.
1187
+
1188
+ Args:
1189
+ max_iterations: Maximum number of agent iterations
1190
+
1191
+ Returns:
1192
+ The agent's final response
1193
+ """
1032
1194
  # Agent loop
1033
1195
  for iteration in range(max_iterations):
1034
1196
  # Show thinking message
@@ -1042,10 +1204,18 @@ class PatchPalAgent:
1042
1204
 
1043
1205
  # Use LiteLLM for all providers
1044
1206
  try:
1207
+ # Build tool list (built-in + custom)
1208
+ tools = list(TOOLS)
1209
+ if self.custom_tools:
1210
+ from patchpal.tool_schema import function_to_tool_schema
1211
+
1212
+ for func in self.custom_tools:
1213
+ tools.append(function_to_tool_schema(func))
1214
+
1045
1215
  response = litellm.completion(
1046
1216
  model=self.model_id,
1047
1217
  messages=messages,
1048
- tools=TOOLS,
1218
+ tools=tools,
1049
1219
  tool_choice="auto",
1050
1220
  **self.litellm_kwargs,
1051
1221
  )
@@ -1057,6 +1227,19 @@ class PatchPalAgent:
1057
1227
  self.cumulative_input_tokens += response.usage.prompt_tokens
1058
1228
  if hasattr(response.usage, "completion_tokens"):
1059
1229
  self.cumulative_output_tokens += response.usage.completion_tokens
1230
+ # Track cache statistics (Anthropic/Bedrock prompt caching)
1231
+ if (
1232
+ hasattr(response.usage, "cache_creation_input_tokens")
1233
+ and response.usage.cache_creation_input_tokens
1234
+ ):
1235
+ self.cumulative_cache_creation_tokens += (
1236
+ response.usage.cache_creation_input_tokens
1237
+ )
1238
+ if (
1239
+ hasattr(response.usage, "cache_read_input_tokens")
1240
+ and response.usage.cache_read_input_tokens
1241
+ ):
1242
+ self.cumulative_cache_read_tokens += response.usage.cache_read_input_tokens
1060
1243
 
1061
1244
  except Exception as e:
1062
1245
  return f"Error calling model: {e}"
@@ -1099,15 +1282,25 @@ class PatchPalAgent:
1099
1282
  tool_result = f"Error: Invalid JSON arguments for {tool_name}"
1100
1283
  print(f"\033[1;31m✗ {tool_name}: Invalid arguments\033[0m")
1101
1284
  else:
1102
- # Get the tool function
1103
- tool_func = TOOL_FUNCTIONS.get(tool_name)
1285
+ # Get the tool function (check custom tools first, then built-in)
1286
+ tool_func = self.custom_tool_funcs.get(tool_name) or TOOL_FUNCTIONS.get(
1287
+ tool_name
1288
+ )
1104
1289
  if tool_func is None:
1105
1290
  tool_result = f"Error: Unknown tool {tool_name}"
1106
1291
  print(f"\033[1;31m✗ Unknown tool: {tool_name}\033[0m")
1107
1292
  else:
1108
1293
  # Show tool call message
1109
- tool_display = tool_name.replace("_", " ").title()
1110
- if tool_name == "read_file":
1294
+ if tool_name in self.custom_tool_funcs:
1295
+ # Custom tool - show generic message with args
1296
+ args_preview = str(tool_args)[:60]
1297
+ if len(str(tool_args)) > 60:
1298
+ args_preview += "..."
1299
+ print(
1300
+ f"\033[2m🔧 {tool_name}({args_preview})\033[0m",
1301
+ flush=True,
1302
+ )
1303
+ elif tool_name == "read_file":
1111
1304
  print(
1112
1305
  f"\033[2m📖 Reading: {tool_args.get('path', '')}\033[0m",
1113
1306
  flush=True,
@@ -1250,15 +1443,43 @@ class PatchPalAgent:
1250
1443
  tool_result = tool_func(**filtered_args)
1251
1444
  except Exception as e:
1252
1445
  tool_result = f"Error executing {tool_name}: {e}"
1253
- print(f"\033[1;31m✗ {tool_display}: {e}\033[0m")
1446
+ print(f"\033[1;31m✗ {tool_name}: {e}\033[0m")
1254
1447
 
1255
1448
  # Add tool result to messages
1449
+ # Check if result is extremely large and might blow context
1450
+ result_str = str(tool_result)
1451
+ result_size = len(result_str)
1452
+
1453
+ # Warn if result is > 100K chars (~33K tokens)
1454
+ if result_size > 100_000:
1455
+ print(
1456
+ f"\033[1;33m⚠️ Large tool output: {result_size:,} chars (~{result_size // 3:,} tokens)\033[0m"
1457
+ )
1458
+
1459
+ # If result would push us WAY over capacity, truncate it
1460
+ current_stats = self.context_manager.get_usage_stats(self.messages)
1461
+ # Estimate tokens in this result
1462
+ result_tokens = self.context_manager.estimator.estimate_tokens(result_str)
1463
+ projected_ratio = (
1464
+ current_stats["total_tokens"] + result_tokens
1465
+ ) / current_stats["context_limit"]
1466
+
1467
+ if projected_ratio > 1.5: # Would exceed 150% capacity
1468
+ print(
1469
+ "\033[1;31m⚠️ Tool output would exceed context capacity! Truncating...\033[0m"
1470
+ )
1471
+ # Keep first 50K chars
1472
+ result_str = (
1473
+ result_str[:50_000]
1474
+ + "\n\n[... Output truncated to prevent context window overflow. Use read_lines or grep_code for targeted access ...]"
1475
+ )
1476
+
1256
1477
  self.messages.append(
1257
1478
  {
1258
1479
  "role": "tool",
1259
1480
  "tool_call_id": tool_call.id,
1260
1481
  "name": tool_name,
1261
- "content": str(tool_result),
1482
+ "content": result_str,
1262
1483
  }
1263
1484
  )
1264
1485
 
@@ -1299,18 +1520,33 @@ class PatchPalAgent:
1299
1520
  )
1300
1521
 
1301
1522
 
1302
- def create_agent(model_id: str = "anthropic/claude-sonnet-4-5") -> PatchPalAgent:
1523
+ def create_agent(model_id: str = "anthropic/claude-sonnet-4-5", custom_tools=None) -> PatchPalAgent:
1303
1524
  """Create and return a PatchPal agent.
1304
1525
 
1305
1526
  Args:
1306
1527
  model_id: LiteLLM model identifier (default: anthropic/claude-sonnet-4-5)
1528
+ custom_tools: Optional list of Python functions to use as custom tools.
1529
+ Each function should have type hints and a docstring.
1307
1530
 
1308
1531
  Returns:
1309
1532
  A configured PatchPalAgent instance
1533
+
1534
+ Example:
1535
+ def calculator(x: int, y: int) -> str:
1536
+ '''Add two numbers.
1537
+
1538
+ Args:
1539
+ x: First number
1540
+ y: Second number
1541
+ '''
1542
+ return str(x + y)
1543
+
1544
+ agent = create_agent(custom_tools=[calculator])
1545
+ response = agent.run("What's 5 + 3?")
1310
1546
  """
1311
1547
  # Reset session todos for new session
1312
1548
  from patchpal.tools import reset_session_todos
1313
1549
 
1314
1550
  reset_session_todos()
1315
1551
 
1316
- return PatchPalAgent(model_id=model_id)
1552
+ return PatchPalAgent(model_id=model_id, custom_tools=custom_tools)
patchpal/cli.py CHANGED
@@ -211,9 +211,26 @@ Supported models: Any LiteLLM-supported model
211
211
  # Determine model to use (priority: CLI arg > env var > default)
212
212
  model_id = args.model or os.getenv("PATCHPAL_MODEL") or "anthropic/claude-sonnet-4-5"
213
213
 
214
- # Create the agent with the specified model
214
+ # Discover custom tools from ~/.patchpal/tools/
215
+ from patchpal.tool_schema import discover_tools, list_custom_tools
216
+
217
+ custom_tools = discover_tools()
218
+
219
+ # Show custom tools info if any were loaded
220
+ custom_tool_info = list_custom_tools()
221
+ if custom_tool_info:
222
+ tool_names = [name for name, _, _ in custom_tool_info]
223
+ tools_str = ", ".join(tool_names)
224
+ # Store for later display (after model info)
225
+ custom_tools_message = (
226
+ f"\033[1;36m🔧 Loaded {len(custom_tool_info)} custom tool(s): {tools_str}\033[0m"
227
+ )
228
+ else:
229
+ custom_tools_message = None
230
+
231
+ # Create the agent with the specified model and custom tools
215
232
  # LiteLLM will handle API key validation and provide appropriate error messages
216
- agent = create_agent(model_id=model_id)
233
+ agent = create_agent(model_id=model_id, custom_tools=custom_tools)
217
234
 
218
235
  # Get max iterations from environment variable or use default
219
236
  max_iterations = int(os.getenv("PATCHPAL_MAX_ITERATIONS", "100"))
@@ -238,6 +255,10 @@ Supported models: Any LiteLLM-supported model
238
255
  print("=" * 80)
239
256
  print(f"\nUsing model: {model_id}")
240
257
 
258
+ # Show custom tools info if any were loaded
259
+ if custom_tools_message:
260
+ print(custom_tools_message)
261
+
241
262
  # Show require-permission-for-all indicator if active
242
263
  if args.require_permission_for_all:
243
264
  print("\033[1;33m🔒 Permission required for ALL operations (including reads)\033[0m")
@@ -380,6 +401,55 @@ Supported models: Any LiteLLM-supported model
380
401
  total_tokens = agent.cumulative_input_tokens + agent.cumulative_output_tokens
381
402
  print(f" Total tokens: {total_tokens:,}")
382
403
 
404
+ # Show cache statistics if available (Anthropic/Bedrock prompt caching)
405
+ has_cache_stats = (
406
+ agent.cumulative_cache_creation_tokens > 0
407
+ or agent.cumulative_cache_read_tokens > 0
408
+ )
409
+ if has_cache_stats:
410
+ print("\n \033[1;36mPrompt Caching Statistics\033[0m")
411
+ print(f" Cache write tokens: {agent.cumulative_cache_creation_tokens:,}")
412
+ print(f" Cache read tokens: {agent.cumulative_cache_read_tokens:,}")
413
+
414
+ # Calculate cache hit rate
415
+ if agent.cumulative_input_tokens > 0:
416
+ cache_hit_rate = (
417
+ agent.cumulative_cache_read_tokens / agent.cumulative_input_tokens
418
+ ) * 100
419
+ print(f" Cache hit rate: {cache_hit_rate:.1f}%")
420
+
421
+ # Show cost-adjusted input tokens (cache reads cost less)
422
+ # Note: This is an approximation - actual pricing varies by model
423
+ # For Anthropic: cache writes = 1.25x, cache reads = 0.1x, regular = 1x
424
+ if "anthropic" in model_id.lower() or "claude" in model_id.lower():
425
+ # Break down: cumulative_input = non_cached + cache_read + cache_write
426
+ non_cached_tokens = (
427
+ agent.cumulative_input_tokens
428
+ - agent.cumulative_cache_read_tokens
429
+ - agent.cumulative_cache_creation_tokens
430
+ )
431
+ # Approximate cost-equivalent tokens (cache reads cost 10%, cache writes cost 125%)
432
+ cost_adjusted = (
433
+ non_cached_tokens
434
+ + (agent.cumulative_cache_read_tokens * 0.1)
435
+ + (agent.cumulative_cache_creation_tokens * 1.25)
436
+ )
437
+ savings_pct = (
438
+ (
439
+ (agent.cumulative_input_tokens - cost_adjusted)
440
+ / agent.cumulative_input_tokens
441
+ * 100
442
+ )
443
+ if agent.cumulative_input_tokens > 0
444
+ else 0
445
+ )
446
+ print(
447
+ f" Cost-adjusted input tokens: {cost_adjusted:,.0f} (~{savings_pct:.0f}% savings)"
448
+ )
449
+ print(
450
+ " \033[2m(Cache reads cost 10% of base price, writes cost 125% of base price)\033[0m"
451
+ )
452
+
383
453
  print("=" * 70 + "\n")
384
454
  continue
385
455