patchpal 0.6.0__py3-none-any.whl → 0.8.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- patchpal/__init__.py +1 -1
- patchpal/agent.py +216 -3
- patchpal/cli.py +167 -0
- patchpal/tools.py +4 -1
- {patchpal-0.6.0.dist-info → patchpal-0.8.0.dist-info}/METADATA +37 -20
- patchpal-0.8.0.dist-info/RECORD +15 -0
- patchpal-0.6.0.dist-info/RECORD +0 -15
- {patchpal-0.6.0.dist-info → patchpal-0.8.0.dist-info}/WHEEL +0 -0
- {patchpal-0.6.0.dist-info → patchpal-0.8.0.dist-info}/entry_points.txt +0 -0
- {patchpal-0.6.0.dist-info → patchpal-0.8.0.dist-info}/licenses/LICENSE +0 -0
- {patchpal-0.6.0.dist-info → patchpal-0.8.0.dist-info}/top_level.txt +0 -0
patchpal/__init__.py
CHANGED
patchpal/agent.py
CHANGED
|
@@ -867,6 +867,14 @@ class PatchPalAgent:
|
|
|
867
867
|
self.cumulative_input_tokens = 0
|
|
868
868
|
self.cumulative_output_tokens = 0
|
|
869
869
|
|
|
870
|
+
# Track cache-related tokens (for Anthropic/Bedrock models with prompt caching)
|
|
871
|
+
self.cumulative_cache_creation_tokens = 0
|
|
872
|
+
self.cumulative_cache_read_tokens = 0
|
|
873
|
+
|
|
874
|
+
# Track cumulative costs across all LLM calls
|
|
875
|
+
self.cumulative_cost = 0.0
|
|
876
|
+
self.last_message_cost = 0.0
|
|
877
|
+
|
|
870
878
|
# LiteLLM settings for models that need parameter dropping
|
|
871
879
|
self.litellm_kwargs = {}
|
|
872
880
|
if self.model_id.startswith("bedrock/"):
|
|
@@ -878,6 +886,26 @@ class PatchPalAgent:
|
|
|
878
886
|
# Custom OpenAI-compatible servers (vLLM, etc.) often don't support all parameters
|
|
879
887
|
self.litellm_kwargs["drop_params"] = True
|
|
880
888
|
|
|
889
|
+
def _prune_tool_outputs_inline(self, max_chars: int, truncation_message: str) -> int:
|
|
890
|
+
"""Unified pruning function for tool outputs.
|
|
891
|
+
|
|
892
|
+
Args:
|
|
893
|
+
max_chars: Maximum characters to keep per tool output
|
|
894
|
+
truncation_message: Message to append after truncation
|
|
895
|
+
|
|
896
|
+
Returns:
|
|
897
|
+
Number of characters pruned
|
|
898
|
+
"""
|
|
899
|
+
pruned_chars = 0
|
|
900
|
+
for msg in self.messages:
|
|
901
|
+
if msg.get("role") == "tool" and msg.get("content"):
|
|
902
|
+
content_size = len(str(msg["content"]))
|
|
903
|
+
if content_size > max_chars:
|
|
904
|
+
original_size = content_size
|
|
905
|
+
msg["content"] = str(msg["content"])[:max_chars] + truncation_message
|
|
906
|
+
pruned_chars += original_size - len(msg["content"])
|
|
907
|
+
return pruned_chars
|
|
908
|
+
|
|
881
909
|
def _perform_auto_compaction(self):
|
|
882
910
|
"""Perform automatic context window compaction.
|
|
883
911
|
|
|
@@ -886,10 +914,32 @@ class PatchPalAgent:
|
|
|
886
914
|
"""
|
|
887
915
|
# Don't compact if we have very few messages - compaction summary
|
|
888
916
|
# could be longer than the messages being removed
|
|
889
|
-
|
|
917
|
+
# Instead, use aggressive pruning since high capacity with few messages
|
|
918
|
+
# indicates large tool outputs rather than conversation depth
|
|
919
|
+
if len(self.messages) < 10:
|
|
890
920
|
print(
|
|
891
|
-
f"\033[2m
|
|
921
|
+
f"\033[2m Only {len(self.messages)} messages - using aggressive pruning instead of summarization\033[0m"
|
|
892
922
|
)
|
|
923
|
+
|
|
924
|
+
# Aggressively truncate all large tool outputs (5K chars)
|
|
925
|
+
pruned_chars = self._prune_tool_outputs_inline(
|
|
926
|
+
max_chars=5_000,
|
|
927
|
+
truncation_message="\n\n[... content truncated during compaction. Use read_lines or grep_code for targeted access ...]",
|
|
928
|
+
)
|
|
929
|
+
|
|
930
|
+
stats_after = self.context_manager.get_usage_stats(self.messages)
|
|
931
|
+
if pruned_chars > 0:
|
|
932
|
+
print(
|
|
933
|
+
f"\033[1;32m✓ Context reduced to {stats_after['usage_percent']}% through aggressive pruning (removed ~{pruned_chars:,} chars)\033[0m\n"
|
|
934
|
+
)
|
|
935
|
+
else:
|
|
936
|
+
print(
|
|
937
|
+
f"\033[1;33m⚠️ No large tool outputs to prune. Context at {stats_after['usage_percent']}%.\033[0m"
|
|
938
|
+
)
|
|
939
|
+
print("\033[1;33m Consider using '/clear' to start fresh.\033[0m\n")
|
|
940
|
+
|
|
941
|
+
# Update tracker to prevent immediate re-compaction
|
|
942
|
+
self._last_compaction_message_count = len(self.messages)
|
|
893
943
|
return
|
|
894
944
|
|
|
895
945
|
# Prevent compaction loops - don't compact again if we just did
|
|
@@ -936,6 +986,43 @@ class PatchPalAgent:
|
|
|
936
986
|
return
|
|
937
987
|
|
|
938
988
|
# Phase 2: Full compaction needed
|
|
989
|
+
# EMERGENCY: If context is at or over capacity (≥100%), do aggressive pruning first
|
|
990
|
+
# Otherwise the summarization request itself will exceed context limits
|
|
991
|
+
stats_after_prune = self.context_manager.get_usage_stats(self.messages)
|
|
992
|
+
if stats_after_prune["usage_ratio"] >= 1.0:
|
|
993
|
+
print(
|
|
994
|
+
f"\033[1;31m ⚠️ Context at or over capacity ({stats_after_prune['usage_percent']}%)!\033[0m"
|
|
995
|
+
)
|
|
996
|
+
print(
|
|
997
|
+
"\033[2m Emergency: Aggressively pruning recent large tool outputs...\033[0m",
|
|
998
|
+
flush=True,
|
|
999
|
+
)
|
|
1000
|
+
|
|
1001
|
+
# Truncate large tool outputs (10K chars - less aggressive than 5K for few-messages case)
|
|
1002
|
+
emergency_pruned = self._prune_tool_outputs_inline(
|
|
1003
|
+
max_chars=10_000,
|
|
1004
|
+
truncation_message="\n\n[... content truncated due to context window limits ...]",
|
|
1005
|
+
)
|
|
1006
|
+
|
|
1007
|
+
if emergency_pruned > 0:
|
|
1008
|
+
print(
|
|
1009
|
+
f"\033[2m Emergency pruned ~{emergency_pruned:,} chars from large tool outputs\033[0m",
|
|
1010
|
+
flush=True,
|
|
1011
|
+
)
|
|
1012
|
+
stats_after_emergency = self.context_manager.get_usage_stats(self.messages)
|
|
1013
|
+
print(
|
|
1014
|
+
f"\033[2m Context now at {stats_after_emergency['usage_percent']}% capacity\033[0m",
|
|
1015
|
+
flush=True,
|
|
1016
|
+
)
|
|
1017
|
+
|
|
1018
|
+
# If still over 150%, give up and recommend /clear
|
|
1019
|
+
if stats_after_emergency["usage_ratio"] > 1.5:
|
|
1020
|
+
print(
|
|
1021
|
+
f"\033[1;31m✗ Context still too large for compaction ({stats_after_emergency['usage_percent']}%)\033[0m"
|
|
1022
|
+
)
|
|
1023
|
+
print("\033[1;33m Please use '/clear' to start a fresh session.\033[0m\n")
|
|
1024
|
+
return
|
|
1025
|
+
|
|
939
1026
|
print("\033[2m Generating conversation summary...\033[0m", flush=True)
|
|
940
1027
|
|
|
941
1028
|
try:
|
|
@@ -958,6 +1045,22 @@ class PatchPalAgent:
|
|
|
958
1045
|
self.cumulative_input_tokens += response.usage.prompt_tokens
|
|
959
1046
|
if hasattr(response.usage, "completion_tokens"):
|
|
960
1047
|
self.cumulative_output_tokens += response.usage.completion_tokens
|
|
1048
|
+
# Track cache statistics (Anthropic/Bedrock prompt caching)
|
|
1049
|
+
if (
|
|
1050
|
+
hasattr(response.usage, "cache_creation_input_tokens")
|
|
1051
|
+
and response.usage.cache_creation_input_tokens
|
|
1052
|
+
):
|
|
1053
|
+
self.cumulative_cache_creation_tokens += (
|
|
1054
|
+
response.usage.cache_creation_input_tokens
|
|
1055
|
+
)
|
|
1056
|
+
if (
|
|
1057
|
+
hasattr(response.usage, "cache_read_input_tokens")
|
|
1058
|
+
and response.usage.cache_read_input_tokens
|
|
1059
|
+
):
|
|
1060
|
+
self.cumulative_cache_read_tokens += response.usage.cache_read_input_tokens
|
|
1061
|
+
|
|
1062
|
+
# Track cost from compaction call
|
|
1063
|
+
self._calculate_cost(response)
|
|
961
1064
|
|
|
962
1065
|
return response
|
|
963
1066
|
|
|
@@ -1016,6 +1119,72 @@ class PatchPalAgent:
|
|
|
1016
1119
|
"\033[1;33m Continuing without compaction. Consider starting a new session.\033[0m\n"
|
|
1017
1120
|
)
|
|
1018
1121
|
|
|
1122
|
+
def _compute_cost_from_tokens(self, usage):
|
|
1123
|
+
"""Manually calculate cost from token usage using model pricing.
|
|
1124
|
+
|
|
1125
|
+
Args:
|
|
1126
|
+
usage: The usage object from the LLM response
|
|
1127
|
+
|
|
1128
|
+
Returns:
|
|
1129
|
+
float: The calculated cost in dollars
|
|
1130
|
+
"""
|
|
1131
|
+
try:
|
|
1132
|
+
model_info = litellm.get_model_info(self.model_id)
|
|
1133
|
+
input_cost_per_token = model_info.get("input_cost_per_token", 0)
|
|
1134
|
+
output_cost_per_token = model_info.get("output_cost_per_token", 0)
|
|
1135
|
+
|
|
1136
|
+
cost = 0.0
|
|
1137
|
+
|
|
1138
|
+
# Handle cache pricing for models that support it (e.g., Anthropic)
|
|
1139
|
+
# Cache writes cost 1.25x, cache reads cost 0.1x of base price
|
|
1140
|
+
cache_creation_tokens = 0
|
|
1141
|
+
cache_read_tokens = 0
|
|
1142
|
+
|
|
1143
|
+
if hasattr(usage, "cache_creation_input_tokens") and usage.cache_creation_input_tokens:
|
|
1144
|
+
cache_creation_tokens = usage.cache_creation_input_tokens
|
|
1145
|
+
cost += cache_creation_tokens * input_cost_per_token * 1.25
|
|
1146
|
+
|
|
1147
|
+
if hasattr(usage, "cache_read_input_tokens") and usage.cache_read_input_tokens:
|
|
1148
|
+
cache_read_tokens = usage.cache_read_input_tokens
|
|
1149
|
+
cost += cache_read_tokens * input_cost_per_token * 0.1
|
|
1150
|
+
|
|
1151
|
+
# Regular input tokens (excluding cache tokens)
|
|
1152
|
+
regular_input = usage.prompt_tokens - cache_creation_tokens - cache_read_tokens
|
|
1153
|
+
cost += regular_input * input_cost_per_token
|
|
1154
|
+
|
|
1155
|
+
# Output tokens
|
|
1156
|
+
cost += usage.completion_tokens * output_cost_per_token
|
|
1157
|
+
|
|
1158
|
+
return cost
|
|
1159
|
+
except Exception:
|
|
1160
|
+
# If pricing data is unavailable, return 0
|
|
1161
|
+
return 0.0
|
|
1162
|
+
|
|
1163
|
+
def _calculate_cost(self, response):
|
|
1164
|
+
"""Calculate cost from LLM response and update cumulative tracking.
|
|
1165
|
+
|
|
1166
|
+
Args:
|
|
1167
|
+
response: The LLM response object
|
|
1168
|
+
|
|
1169
|
+
Returns:
|
|
1170
|
+
float: The calculated cost in dollars
|
|
1171
|
+
"""
|
|
1172
|
+
try:
|
|
1173
|
+
# Try litellm's built-in cost calculator first
|
|
1174
|
+
cost = litellm.completion_cost(completion_response=response)
|
|
1175
|
+
except Exception:
|
|
1176
|
+
cost = 0.0
|
|
1177
|
+
|
|
1178
|
+
if not cost and hasattr(response, "usage") and response.usage:
|
|
1179
|
+
# Fallback: manual calculation using model pricing
|
|
1180
|
+
cost = self._compute_cost_from_tokens(response.usage)
|
|
1181
|
+
|
|
1182
|
+
if isinstance(cost, (int, float)) and cost > 0:
|
|
1183
|
+
self.cumulative_cost += cost
|
|
1184
|
+
self.last_message_cost = cost
|
|
1185
|
+
|
|
1186
|
+
return cost
|
|
1187
|
+
|
|
1019
1188
|
def run(self, user_message: str, max_iterations: int = 100) -> str:
|
|
1020
1189
|
"""Run the agent on a user message.
|
|
1021
1190
|
|
|
@@ -1131,6 +1300,22 @@ class PatchPalAgent:
|
|
|
1131
1300
|
self.cumulative_input_tokens += response.usage.prompt_tokens
|
|
1132
1301
|
if hasattr(response.usage, "completion_tokens"):
|
|
1133
1302
|
self.cumulative_output_tokens += response.usage.completion_tokens
|
|
1303
|
+
# Track cache statistics (Anthropic/Bedrock prompt caching)
|
|
1304
|
+
if (
|
|
1305
|
+
hasattr(response.usage, "cache_creation_input_tokens")
|
|
1306
|
+
and response.usage.cache_creation_input_tokens
|
|
1307
|
+
):
|
|
1308
|
+
self.cumulative_cache_creation_tokens += (
|
|
1309
|
+
response.usage.cache_creation_input_tokens
|
|
1310
|
+
)
|
|
1311
|
+
if (
|
|
1312
|
+
hasattr(response.usage, "cache_read_input_tokens")
|
|
1313
|
+
and response.usage.cache_read_input_tokens
|
|
1314
|
+
):
|
|
1315
|
+
self.cumulative_cache_read_tokens += response.usage.cache_read_input_tokens
|
|
1316
|
+
|
|
1317
|
+
# Track cost from this LLM call
|
|
1318
|
+
self._calculate_cost(response)
|
|
1134
1319
|
|
|
1135
1320
|
except Exception as e:
|
|
1136
1321
|
return f"Error calling model: {e}"
|
|
@@ -1337,12 +1522,40 @@ class PatchPalAgent:
|
|
|
1337
1522
|
print(f"\033[1;31m✗ {tool_name}: {e}\033[0m")
|
|
1338
1523
|
|
|
1339
1524
|
# Add tool result to messages
|
|
1525
|
+
# Check if result is extremely large and might blow context
|
|
1526
|
+
result_str = str(tool_result)
|
|
1527
|
+
result_size = len(result_str)
|
|
1528
|
+
|
|
1529
|
+
# Warn if result is > 100K chars (~33K tokens)
|
|
1530
|
+
if result_size > 100_000:
|
|
1531
|
+
print(
|
|
1532
|
+
f"\033[1;33m⚠️ Large tool output: {result_size:,} chars (~{result_size // 3:,} tokens)\033[0m"
|
|
1533
|
+
)
|
|
1534
|
+
|
|
1535
|
+
# If result would push us WAY over capacity, truncate it
|
|
1536
|
+
current_stats = self.context_manager.get_usage_stats(self.messages)
|
|
1537
|
+
# Estimate tokens in this result
|
|
1538
|
+
result_tokens = self.context_manager.estimator.estimate_tokens(result_str)
|
|
1539
|
+
projected_ratio = (
|
|
1540
|
+
current_stats["total_tokens"] + result_tokens
|
|
1541
|
+
) / current_stats["context_limit"]
|
|
1542
|
+
|
|
1543
|
+
if projected_ratio > 1.5: # Would exceed 150% capacity
|
|
1544
|
+
print(
|
|
1545
|
+
"\033[1;31m⚠️ Tool output would exceed context capacity! Truncating...\033[0m"
|
|
1546
|
+
)
|
|
1547
|
+
# Keep first 50K chars
|
|
1548
|
+
result_str = (
|
|
1549
|
+
result_str[:50_000]
|
|
1550
|
+
+ "\n\n[... Output truncated to prevent context window overflow. Use read_lines or grep_code for targeted access ...]"
|
|
1551
|
+
)
|
|
1552
|
+
|
|
1340
1553
|
self.messages.append(
|
|
1341
1554
|
{
|
|
1342
1555
|
"role": "tool",
|
|
1343
1556
|
"tool_call_id": tool_call.id,
|
|
1344
1557
|
"name": tool_name,
|
|
1345
|
-
"content":
|
|
1558
|
+
"content": result_str,
|
|
1346
1559
|
}
|
|
1347
1560
|
)
|
|
1348
1561
|
|
patchpal/cli.py
CHANGED
|
@@ -16,6 +16,116 @@ from patchpal.agent import create_agent
|
|
|
16
16
|
from patchpal.tools import audit_logger
|
|
17
17
|
|
|
18
18
|
|
|
19
|
+
def _format_cost(value: float) -> str:
|
|
20
|
+
"""Format cost with smart precision.
|
|
21
|
+
|
|
22
|
+
Args:
|
|
23
|
+
value: Cost in dollars
|
|
24
|
+
|
|
25
|
+
Returns:
|
|
26
|
+
Formatted cost string (e.g., "0.0234" or "0.00145")
|
|
27
|
+
"""
|
|
28
|
+
if value == 0:
|
|
29
|
+
return "0.00"
|
|
30
|
+
magnitude = abs(value)
|
|
31
|
+
if magnitude >= 0.01:
|
|
32
|
+
return f"{value:.2f}"
|
|
33
|
+
else:
|
|
34
|
+
# For very small costs, show more decimal places
|
|
35
|
+
import math
|
|
36
|
+
|
|
37
|
+
return f"{value:.{max(2, 2 - int(math.log10(magnitude)))}f}"
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def _print_cost_statistics(
|
|
41
|
+
agent, total_tokens: int, show_header: bool = False, show_disclaimer: bool = False
|
|
42
|
+
):
|
|
43
|
+
"""Print cost statistics section.
|
|
44
|
+
|
|
45
|
+
Args:
|
|
46
|
+
agent: PatchPalAgent instance
|
|
47
|
+
total_tokens: Total token count for calculating averages
|
|
48
|
+
show_header: If True, print a section header
|
|
49
|
+
show_disclaimer: If True, show disclaimer about checking provider bills
|
|
50
|
+
"""
|
|
51
|
+
if agent.cumulative_cost > 0:
|
|
52
|
+
if show_header:
|
|
53
|
+
print("\n \033[1;36mCost Statistics\033[0m")
|
|
54
|
+
print(f" Session cost: ${_format_cost(agent.cumulative_cost)} (estimated)")
|
|
55
|
+
|
|
56
|
+
if show_disclaimer:
|
|
57
|
+
print(
|
|
58
|
+
" \033[2m(Calculated from token counts - check provider bill for exact cost)\033[0m"
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
# Show cost breakdown if we have token data
|
|
62
|
+
if total_tokens > 0:
|
|
63
|
+
cost_per_1k = (agent.cumulative_cost / total_tokens) * 1000
|
|
64
|
+
print(f" Average: ${_format_cost(cost_per_1k)} per 1K tokens")
|
|
65
|
+
elif agent.total_llm_calls > 0:
|
|
66
|
+
# Model might not have pricing data (e.g., local Ollama)
|
|
67
|
+
if show_header:
|
|
68
|
+
print()
|
|
69
|
+
print(" \033[2mCost tracking unavailable (no pricing data for this model)\033[0m")
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def _print_session_summary(agent, show_detailed: bool = False):
|
|
73
|
+
"""Print session statistics summary.
|
|
74
|
+
|
|
75
|
+
Args:
|
|
76
|
+
agent: PatchPalAgent instance
|
|
77
|
+
show_detailed: If True, show detailed breakdown; if False, show compact summary
|
|
78
|
+
"""
|
|
79
|
+
# Guard against missing attributes (e.g., in tests with mock agents)
|
|
80
|
+
if (
|
|
81
|
+
not hasattr(agent, "total_llm_calls")
|
|
82
|
+
or not isinstance(agent.total_llm_calls, int)
|
|
83
|
+
or agent.total_llm_calls == 0
|
|
84
|
+
):
|
|
85
|
+
return
|
|
86
|
+
|
|
87
|
+
print("\n" + "=" * 70)
|
|
88
|
+
print("\033[1;36mSession Summary\033[0m")
|
|
89
|
+
print("=" * 70)
|
|
90
|
+
print(f" LLM calls: {agent.total_llm_calls}")
|
|
91
|
+
|
|
92
|
+
# Show token usage if available
|
|
93
|
+
has_usage_info = (
|
|
94
|
+
hasattr(agent, "cumulative_input_tokens")
|
|
95
|
+
and hasattr(agent, "cumulative_output_tokens")
|
|
96
|
+
and (agent.cumulative_input_tokens > 0 or agent.cumulative_output_tokens > 0)
|
|
97
|
+
)
|
|
98
|
+
if has_usage_info:
|
|
99
|
+
total_tokens = agent.cumulative_input_tokens + agent.cumulative_output_tokens
|
|
100
|
+
print(f" Total tokens: {total_tokens:,}")
|
|
101
|
+
|
|
102
|
+
# Show cache hit rate if caching was used
|
|
103
|
+
if (
|
|
104
|
+
hasattr(agent, "cumulative_cache_read_tokens")
|
|
105
|
+
and hasattr(agent, "cumulative_input_tokens")
|
|
106
|
+
and agent.cumulative_cache_read_tokens > 0
|
|
107
|
+
):
|
|
108
|
+
cache_hit_rate = (
|
|
109
|
+
agent.cumulative_cache_read_tokens / agent.cumulative_input_tokens
|
|
110
|
+
) * 100
|
|
111
|
+
print(f" Cache hit rate: {cache_hit_rate:.1f}%")
|
|
112
|
+
|
|
113
|
+
# Show cost statistics
|
|
114
|
+
if has_usage_info:
|
|
115
|
+
total_tokens = agent.cumulative_input_tokens + agent.cumulative_output_tokens
|
|
116
|
+
else:
|
|
117
|
+
total_tokens = 0
|
|
118
|
+
|
|
119
|
+
if show_detailed:
|
|
120
|
+
_print_cost_statistics(agent, total_tokens, show_header=False, show_disclaimer=False)
|
|
121
|
+
else:
|
|
122
|
+
# Show cost if available (compact version)
|
|
123
|
+
if hasattr(agent, "cumulative_cost") and agent.cumulative_cost > 0:
|
|
124
|
+
print(f" Session cost: ${_format_cost(agent.cumulative_cost)} (estimated)")
|
|
125
|
+
|
|
126
|
+
print("=" * 70)
|
|
127
|
+
|
|
128
|
+
|
|
19
129
|
class SkillCompleter(Completer):
|
|
20
130
|
"""Completer for skill names when input starts with /"""
|
|
21
131
|
|
|
@@ -304,6 +414,9 @@ Supported models: Any LiteLLM-supported model
|
|
|
304
414
|
|
|
305
415
|
# Check for exit commands
|
|
306
416
|
if user_input.lower() in ["exit", "quit", "q"]:
|
|
417
|
+
# Show session statistics before exiting
|
|
418
|
+
_print_session_summary(agent, show_detailed=False)
|
|
419
|
+
|
|
307
420
|
print("\nGoodbye!")
|
|
308
421
|
break
|
|
309
422
|
|
|
@@ -401,6 +514,60 @@ Supported models: Any LiteLLM-supported model
|
|
|
401
514
|
total_tokens = agent.cumulative_input_tokens + agent.cumulative_output_tokens
|
|
402
515
|
print(f" Total tokens: {total_tokens:,}")
|
|
403
516
|
|
|
517
|
+
# Show cache statistics if available (Anthropic/Bedrock prompt caching)
|
|
518
|
+
has_cache_stats = (
|
|
519
|
+
agent.cumulative_cache_creation_tokens > 0
|
|
520
|
+
or agent.cumulative_cache_read_tokens > 0
|
|
521
|
+
)
|
|
522
|
+
if has_cache_stats:
|
|
523
|
+
print("\n \033[1;36mPrompt Caching Statistics\033[0m")
|
|
524
|
+
print(f" Cache write tokens: {agent.cumulative_cache_creation_tokens:,}")
|
|
525
|
+
print(f" Cache read tokens: {agent.cumulative_cache_read_tokens:,}")
|
|
526
|
+
|
|
527
|
+
# Calculate cache hit rate
|
|
528
|
+
if agent.cumulative_input_tokens > 0:
|
|
529
|
+
cache_hit_rate = (
|
|
530
|
+
agent.cumulative_cache_read_tokens / agent.cumulative_input_tokens
|
|
531
|
+
) * 100
|
|
532
|
+
print(f" Cache hit rate: {cache_hit_rate:.1f}%")
|
|
533
|
+
|
|
534
|
+
# Show cost-adjusted input tokens (cache reads cost less)
|
|
535
|
+
# Note: This is an approximation - actual pricing varies by model
|
|
536
|
+
# For Anthropic: cache writes = 1.25x, cache reads = 0.1x, regular = 1x
|
|
537
|
+
if "anthropic" in model_id.lower() or "claude" in model_id.lower():
|
|
538
|
+
# Break down: cumulative_input = non_cached + cache_read + cache_write
|
|
539
|
+
non_cached_tokens = (
|
|
540
|
+
agent.cumulative_input_tokens
|
|
541
|
+
- agent.cumulative_cache_read_tokens
|
|
542
|
+
- agent.cumulative_cache_creation_tokens
|
|
543
|
+
)
|
|
544
|
+
# Approximate cost-equivalent tokens (cache reads cost 10%, cache writes cost 125%)
|
|
545
|
+
cost_adjusted = (
|
|
546
|
+
non_cached_tokens
|
|
547
|
+
+ (agent.cumulative_cache_read_tokens * 0.1)
|
|
548
|
+
+ (agent.cumulative_cache_creation_tokens * 1.25)
|
|
549
|
+
)
|
|
550
|
+
savings_pct = (
|
|
551
|
+
(
|
|
552
|
+
(agent.cumulative_input_tokens - cost_adjusted)
|
|
553
|
+
/ agent.cumulative_input_tokens
|
|
554
|
+
* 100
|
|
555
|
+
)
|
|
556
|
+
if agent.cumulative_input_tokens > 0
|
|
557
|
+
else 0
|
|
558
|
+
)
|
|
559
|
+
print(
|
|
560
|
+
f" Cost-adjusted input tokens: {cost_adjusted:,.0f} (~{savings_pct:.0f}% savings)"
|
|
561
|
+
)
|
|
562
|
+
print(
|
|
563
|
+
" \033[2m(Cache reads cost 10% of base price, writes cost 125% of base price)\033[0m"
|
|
564
|
+
)
|
|
565
|
+
|
|
566
|
+
# Show cost statistics if available
|
|
567
|
+
_print_cost_statistics(
|
|
568
|
+
agent, total_tokens, show_header=True, show_disclaimer=True
|
|
569
|
+
)
|
|
570
|
+
|
|
404
571
|
print("=" * 70 + "\n")
|
|
405
572
|
continue
|
|
406
573
|
|
patchpal/tools.py
CHANGED
|
@@ -80,7 +80,10 @@ CRITICAL_FILES = {
|
|
|
80
80
|
}
|
|
81
81
|
|
|
82
82
|
# Configuration
|
|
83
|
-
|
|
83
|
+
# Reduced from 10MB to 500KB to prevent context window explosions
|
|
84
|
+
# A 3.46MB file = ~1.15M tokens which exceeds most model context limits (128K-200K)
|
|
85
|
+
# 500KB ≈ 166K tokens which is safe for most models
|
|
86
|
+
MAX_FILE_SIZE = int(os.getenv("PATCHPAL_MAX_FILE_SIZE", 500 * 1024)) # 500KB default
|
|
84
87
|
READ_ONLY_MODE = os.getenv("PATCHPAL_READ_ONLY", "false").lower() == "true"
|
|
85
88
|
ALLOW_SENSITIVE = os.getenv("PATCHPAL_ALLOW_SENSITIVE", "false").lower() == "true"
|
|
86
89
|
ENABLE_AUDIT_LOG = os.getenv("PATCHPAL_AUDIT_LOG", "true").lower() == "true"
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: patchpal
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.8.0
|
|
4
4
|
Summary: A lean Claude Code clone in pure Python
|
|
5
5
|
Author: PatchPal Contributors
|
|
6
6
|
License-Expression: Apache-2.0
|
|
@@ -129,7 +129,7 @@ export HOSTED_VLLM_API_KEY=token-abc123 # optional depending on your v
|
|
|
129
129
|
patchpal
|
|
130
130
|
|
|
131
131
|
# Use a specific model via command-line argument
|
|
132
|
-
patchpal --model openai/gpt-
|
|
132
|
+
patchpal --model openai/gpt-5.2 # or openai/gpt-5-mini, anthropic/claude-opus-4-5 etc.
|
|
133
133
|
|
|
134
134
|
# Use vLLM (local)
|
|
135
135
|
# Note: vLLM server must be started with --tool-call-parser and --enable-auto-tool-choice
|
|
@@ -143,7 +143,7 @@ export OLLAMA_CONTEXT_LENGTH=32768
|
|
|
143
143
|
patchpal --model ollama_chat/qwen3:32b
|
|
144
144
|
|
|
145
145
|
# Or set the model via environment variable
|
|
146
|
-
export PATCHPAL_MODEL=openai/gpt-5
|
|
146
|
+
export PATCHPAL_MODEL=openai/gpt-5.2
|
|
147
147
|
patchpal
|
|
148
148
|
```
|
|
149
149
|
|
|
@@ -155,6 +155,8 @@ The agent has the following tools:
|
|
|
155
155
|
|
|
156
156
|
### File Operations
|
|
157
157
|
- **read_file**: Read contents of files in the repository
|
|
158
|
+
- Limited to 500KB by default (configurable with `PATCHPAL_MAX_FILE_SIZE`)
|
|
159
|
+
- For larger files, use `read_lines` or `grep_code` for targeted access
|
|
158
160
|
- **read_lines**: Read specific line ranges from a file without loading the entire file
|
|
159
161
|
- Example: `read_lines("app.py", 100, 150)` - read lines 100-150
|
|
160
162
|
- More efficient than read_file when you only need a few lines
|
|
@@ -281,7 +283,7 @@ cd patchpal
|
|
|
281
283
|
# Copy examples to your personal skills directory
|
|
282
284
|
cp -r examples/skills/commit ~/.patchpal/skills/
|
|
283
285
|
cp -r examples/skills/review ~/.patchpal/skills/
|
|
284
|
-
cp -r examples/skills/
|
|
286
|
+
cp -r examples/skills/skill-creator ~/.patchpal/skills/
|
|
285
287
|
```
|
|
286
288
|
|
|
287
289
|
**View examples online:**
|
|
@@ -368,18 +370,28 @@ Custom tools are Python functions with specific requirements:
|
|
|
368
370
|
```python
|
|
369
371
|
# ~/.patchpal/tools/my_tools.py
|
|
370
372
|
|
|
371
|
-
def
|
|
372
|
-
"""
|
|
373
|
+
def calculator(x: int, y: int, operation: str = "add") -> str:
|
|
374
|
+
"""Perform basic arithmetic operations.
|
|
373
375
|
|
|
374
376
|
Args:
|
|
375
377
|
x: First number
|
|
376
378
|
y: Second number
|
|
379
|
+
operation: Operation to perform (add, subtract, multiply, divide)
|
|
377
380
|
|
|
378
381
|
Returns:
|
|
379
|
-
|
|
382
|
+
Result as a string
|
|
380
383
|
"""
|
|
381
|
-
|
|
382
|
-
|
|
384
|
+
if operation == "add":
|
|
385
|
+
return f"{x} + {y} = {x + y}"
|
|
386
|
+
elif operation == "subtract":
|
|
387
|
+
return f"{x} - {y} = {x - y}"
|
|
388
|
+
elif operation == "multiply":
|
|
389
|
+
return f"{x} * {y} = {x * y}"
|
|
390
|
+
elif operation == "divide":
|
|
391
|
+
if y == 0:
|
|
392
|
+
return "Error: Cannot divide by zero"
|
|
393
|
+
return f"{x} / {y} = {x / y}"
|
|
394
|
+
return "Unknown operation"
|
|
383
395
|
|
|
384
396
|
|
|
385
397
|
def convert_currency(amount: float, from_currency: str, to_currency: str) -> str:
|
|
@@ -407,11 +419,15 @@ Once loaded, the agent calls your custom tools automatically:
|
|
|
407
419
|
|
|
408
420
|
```bash
|
|
409
421
|
You: What's 15 + 27?
|
|
410
|
-
Agent: [Calls
|
|
422
|
+
Agent: [Calls calculator(15, 27, "add")]
|
|
411
423
|
15 + 27 = 42
|
|
412
424
|
|
|
425
|
+
You: What's 100 divided by 4?
|
|
426
|
+
Agent: [Calls calculator(100, 4, "divide")]
|
|
427
|
+
100 / 4 = 25
|
|
428
|
+
|
|
413
429
|
You: Convert 100 USD to EUR
|
|
414
|
-
Agent: [Calls convert_currency
|
|
430
|
+
Agent: [Calls convert_currency(100, "USD", "EUR")]
|
|
415
431
|
100 USD = 85.00 EUR
|
|
416
432
|
```
|
|
417
433
|
|
|
@@ -514,14 +530,14 @@ PatchPal supports any LiteLLM-compatible model. You can configure the model in t
|
|
|
514
530
|
|
|
515
531
|
### 1. Command-line Argument
|
|
516
532
|
```bash
|
|
517
|
-
patchpal --model openai/gpt-5
|
|
533
|
+
patchpal --model openai/gpt-5.2
|
|
518
534
|
patchpal --model anthropic/claude-sonnet-4-5
|
|
519
535
|
patchpal --model hosted_vllm/openai/gpt-oss-20b # local model - no API charges
|
|
520
536
|
```
|
|
521
537
|
|
|
522
538
|
### 2. Environment Variable
|
|
523
539
|
```bash
|
|
524
|
-
export PATCHPAL_MODEL=openai/gpt-5
|
|
540
|
+
export PATCHPAL_MODEL=openai/gpt-5.2
|
|
525
541
|
patchpal
|
|
526
542
|
```
|
|
527
543
|
|
|
@@ -533,7 +549,7 @@ If no model is specified, PatchPal uses `anthropic/claude-sonnet-4-5` (Claude So
|
|
|
533
549
|
PatchPal works with any model supported by LiteLLM, including:
|
|
534
550
|
|
|
535
551
|
- **Anthropic** (Recommended): `anthropic/claude-sonnet-4-5`, `anthropic/claude-opus-4-5`, `anthropic/claude-3-7-sonnet-latest`
|
|
536
|
-
- **OpenAI**: `openai/gpt-5`, `openai/gpt-
|
|
552
|
+
- **OpenAI**: `openai/gpt-5.2`, `openai/gpt-5-mini`
|
|
537
553
|
- **AWS Bedrock**: `bedrock/anthropic.claude-sonnet-4-5-v1:0`
|
|
538
554
|
- **vLLM (Local)** (Recommended for local): See vLLM section below for setup
|
|
539
555
|
- **Ollama (Local)**: See Ollama section below for setup
|
|
@@ -1033,7 +1049,7 @@ PatchPal can be configured through `PATCHPAL_*` environment variables to customi
|
|
|
1033
1049
|
### Model Selection
|
|
1034
1050
|
|
|
1035
1051
|
```bash
|
|
1036
|
-
export PATCHPAL_MODEL=openai/gpt-
|
|
1052
|
+
export PATCHPAL_MODEL=openai/gpt-5.2 # Override default model
|
|
1037
1053
|
# Priority: CLI arg > PATCHPAL_MODEL env var > default (anthropic/claude-sonnet-4-5)
|
|
1038
1054
|
```
|
|
1039
1055
|
|
|
@@ -1045,11 +1061,12 @@ export PATCHPAL_REQUIRE_PERMISSION=true # Prompt before executing commands/
|
|
|
1045
1061
|
# ⚠️ WARNING: Setting to false disables prompts - only use in trusted environments
|
|
1046
1062
|
|
|
1047
1063
|
# File Safety
|
|
1048
|
-
export PATCHPAL_MAX_FILE_SIZE=
|
|
1064
|
+
export PATCHPAL_MAX_FILE_SIZE=512000 # Maximum file size in bytes for read/write (default: 500KB)
|
|
1065
|
+
# Reduced from 10MB to prevent context window explosions
|
|
1049
1066
|
export PATCHPAL_READ_ONLY=true # Prevent ALL file modifications (default: false)
|
|
1050
|
-
|
|
1067
|
+
# Useful for: code review, exploration, security audits
|
|
1051
1068
|
export PATCHPAL_ALLOW_SENSITIVE=true # Allow access to .env, credentials (default: false - blocked)
|
|
1052
|
-
|
|
1069
|
+
# Only enable with test/dummy credentials
|
|
1053
1070
|
|
|
1054
1071
|
# Command Safety
|
|
1055
1072
|
export PATCHPAL_ALLOW_SUDO=true # Allow sudo/privilege escalation (default: false - blocked)
|
|
@@ -1191,7 +1208,7 @@ PatchPal includes comprehensive security protections enabled by default:
|
|
|
1191
1208
|
**Critical Security:**
|
|
1192
1209
|
- **Permission prompts**: Agent asks for permission before executing commands or modifying files (like Claude Code)
|
|
1193
1210
|
- **Sensitive file protection**: Blocks access to `.env`, credentials, API keys
|
|
1194
|
-
- **File size limits**: Prevents OOM with configurable size limits (
|
|
1211
|
+
- **File size limits**: Prevents OOM and context explosions with configurable size limits (500KB default)
|
|
1195
1212
|
- **Binary file detection**: Blocks reading non-text files
|
|
1196
1213
|
- **Critical file warnings**: Warns when modifying infrastructure files (package.json, Dockerfile, etc.)
|
|
1197
1214
|
- **Read-only mode**: Optional mode that prevents all modifications
|
|
@@ -1453,7 +1470,7 @@ When using cloud LLM providers (Anthropic, OpenAI, etc.), token usage directly i
|
|
|
1453
1470
|
- Use less expensive models for routine tasks:
|
|
1454
1471
|
```bash
|
|
1455
1472
|
patchpal --model anthropic/claude-3-7-sonnet-latest # Cheaper than claude-sonnet-4-5
|
|
1456
|
-
patchpal --model openai/gpt-
|
|
1473
|
+
patchpal --model openai/gpt-5-mini # Cheaper than gpt-5.2
|
|
1457
1474
|
```
|
|
1458
1475
|
- Reserve premium models for complex reasoning tasks
|
|
1459
1476
|
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
patchpal/__init__.py,sha256=lQlcUScZhf19wSBOBVKGHQzilSNoZC4JuaP5bKuE8Yw,606
|
|
2
|
+
patchpal/agent.py,sha256=u5x4wOv4ComTWWKT9TeoHtXh6OkOcZyP89CGwdUAlPE,69601
|
|
3
|
+
patchpal/cli.py,sha256=a-X57sSKLqkA5CB919-cL__KE6dHl1Q340BMOSTjxQg,31155
|
|
4
|
+
patchpal/context.py,sha256=hdTUvyAXXUP47JY1Q3YJDU7noGAcHuBGlNuU272Fjp4,14831
|
|
5
|
+
patchpal/permissions.py,sha256=pVlzit2KFmCpfcbHrHhjPA0LPka04wOtaQdZCf3CCa0,10781
|
|
6
|
+
patchpal/skills.py,sha256=ESLPHkDI8DH4mnAbN8mIcbZ6Bis4vCcqS_NjlYPNCOs,3926
|
|
7
|
+
patchpal/system_prompt.md,sha256=LQzcILr41s65hk7JjaX_WzjUHBHCazVSrx_F_ErqTmA,10850
|
|
8
|
+
patchpal/tool_schema.py,sha256=dGEGYV160G9c7EnSMtnbQ_mYuoR1n6PHHE8T20BriYE,8357
|
|
9
|
+
patchpal/tools.py,sha256=eZ5eh8DKYyqO95Vdu-tn1_6-W6OsBbY4JL5APGyp-tc,94018
|
|
10
|
+
patchpal-0.8.0.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
11
|
+
patchpal-0.8.0.dist-info/METADATA,sha256=t8DaEZQTeEXAp4Ndk7vHwqqJswvo2LnSXcMAvM2LQ6s,58247
|
|
12
|
+
patchpal-0.8.0.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
13
|
+
patchpal-0.8.0.dist-info/entry_points.txt,sha256=XcuQikKu5i8Sd8AfHLuKxSE2RWByInTcQgWpP61sr48,47
|
|
14
|
+
patchpal-0.8.0.dist-info/top_level.txt,sha256=YWgv2F-_PIHCu-sF3AF8N1ut5_FbOT-VV6HB70pGSQ8,9
|
|
15
|
+
patchpal-0.8.0.dist-info/RECORD,,
|
patchpal-0.6.0.dist-info/RECORD
DELETED
|
@@ -1,15 +0,0 @@
|
|
|
1
|
-
patchpal/__init__.py,sha256=S3dYO3L8dSQG2Eaosbu4Pbdq5eTxXLmmvxSzh-TIPiI,606
|
|
2
|
-
patchpal/agent.py,sha256=ayMkZUoohUsf5Tz4esBjOPZUvBT5n-ijOzoOp3c9LAA,59719
|
|
3
|
-
patchpal/cli.py,sha256=6Imrd4hGupIrTi9jnnfwvraNZ_Pq0VJxfo6aSjLRoCY,24131
|
|
4
|
-
patchpal/context.py,sha256=hdTUvyAXXUP47JY1Q3YJDU7noGAcHuBGlNuU272Fjp4,14831
|
|
5
|
-
patchpal/permissions.py,sha256=pVlzit2KFmCpfcbHrHhjPA0LPka04wOtaQdZCf3CCa0,10781
|
|
6
|
-
patchpal/skills.py,sha256=ESLPHkDI8DH4mnAbN8mIcbZ6Bis4vCcqS_NjlYPNCOs,3926
|
|
7
|
-
patchpal/system_prompt.md,sha256=LQzcILr41s65hk7JjaX_WzjUHBHCazVSrx_F_ErqTmA,10850
|
|
8
|
-
patchpal/tool_schema.py,sha256=dGEGYV160G9c7EnSMtnbQ_mYuoR1n6PHHE8T20BriYE,8357
|
|
9
|
-
patchpal/tools.py,sha256=YAUX2-8BBqjZEadIWlUdO-KV2-WHGazgKdMHkYRAExI,93819
|
|
10
|
-
patchpal-0.6.0.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
|
|
11
|
-
patchpal-0.6.0.dist-info/METADATA,sha256=hjleiaXTNaavuW0OygY1XPdbuflYxMQb0hAWw9pGWPw,57384
|
|
12
|
-
patchpal-0.6.0.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
13
|
-
patchpal-0.6.0.dist-info/entry_points.txt,sha256=XcuQikKu5i8Sd8AfHLuKxSE2RWByInTcQgWpP61sr48,47
|
|
14
|
-
patchpal-0.6.0.dist-info/top_level.txt,sha256=YWgv2F-_PIHCu-sF3AF8N1ut5_FbOT-VV6HB70pGSQ8,9
|
|
15
|
-
patchpal-0.6.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|