patchpal 0.4.5__py3-none-any.whl → 0.7.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- patchpal/__init__.py +1 -1
- patchpal/agent.py +248 -12
- patchpal/cli.py +72 -2
- patchpal/tool_schema.py +288 -0
- patchpal/tools.py +21 -2
- {patchpal-0.4.5.dist-info → patchpal-0.7.1.dist-info}/METADATA +402 -17
- patchpal-0.7.1.dist-info/RECORD +15 -0
- patchpal-0.4.5.dist-info/RECORD +0 -14
- {patchpal-0.4.5.dist-info → patchpal-0.7.1.dist-info}/WHEEL +0 -0
- {patchpal-0.4.5.dist-info → patchpal-0.7.1.dist-info}/entry_points.txt +0 -0
- {patchpal-0.4.5.dist-info → patchpal-0.7.1.dist-info}/licenses/LICENSE +0 -0
- {patchpal-0.4.5.dist-info → patchpal-0.7.1.dist-info}/top_level.txt +0 -0
patchpal/__init__.py
CHANGED
patchpal/agent.py
CHANGED
|
@@ -811,12 +811,17 @@ def _apply_prompt_caching(messages: List[Dict[str, Any]], model_id: str) -> List
|
|
|
811
811
|
class PatchPalAgent:
|
|
812
812
|
"""Simple agent that uses LiteLLM for tool calling."""
|
|
813
813
|
|
|
814
|
-
def __init__(self, model_id: str = "anthropic/claude-sonnet-4-5"):
|
|
814
|
+
def __init__(self, model_id: str = "anthropic/claude-sonnet-4-5", custom_tools=None):
|
|
815
815
|
"""Initialize the agent.
|
|
816
816
|
|
|
817
817
|
Args:
|
|
818
818
|
model_id: LiteLLM model identifier
|
|
819
|
+
custom_tools: Optional list of Python functions to add as tools
|
|
819
820
|
"""
|
|
821
|
+
# Store custom tools
|
|
822
|
+
self.custom_tools = custom_tools or []
|
|
823
|
+
self.custom_tool_funcs = {func.__name__: func for func in self.custom_tools}
|
|
824
|
+
|
|
820
825
|
# Convert ollama/ to ollama_chat/ for LiteLLM compatibility
|
|
821
826
|
if model_id.startswith("ollama/"):
|
|
822
827
|
model_id = model_id.replace("ollama/", "ollama_chat/", 1)
|
|
@@ -862,6 +867,10 @@ class PatchPalAgent:
|
|
|
862
867
|
self.cumulative_input_tokens = 0
|
|
863
868
|
self.cumulative_output_tokens = 0
|
|
864
869
|
|
|
870
|
+
# Track cache-related tokens (for Anthropic/Bedrock models with prompt caching)
|
|
871
|
+
self.cumulative_cache_creation_tokens = 0
|
|
872
|
+
self.cumulative_cache_read_tokens = 0
|
|
873
|
+
|
|
865
874
|
# LiteLLM settings for models that need parameter dropping
|
|
866
875
|
self.litellm_kwargs = {}
|
|
867
876
|
if self.model_id.startswith("bedrock/"):
|
|
@@ -873,6 +882,26 @@ class PatchPalAgent:
|
|
|
873
882
|
# Custom OpenAI-compatible servers (vLLM, etc.) often don't support all parameters
|
|
874
883
|
self.litellm_kwargs["drop_params"] = True
|
|
875
884
|
|
|
885
|
+
def _prune_tool_outputs_inline(self, max_chars: int, truncation_message: str) -> int:
|
|
886
|
+
"""Unified pruning function for tool outputs.
|
|
887
|
+
|
|
888
|
+
Args:
|
|
889
|
+
max_chars: Maximum characters to keep per tool output
|
|
890
|
+
truncation_message: Message to append after truncation
|
|
891
|
+
|
|
892
|
+
Returns:
|
|
893
|
+
Number of characters pruned
|
|
894
|
+
"""
|
|
895
|
+
pruned_chars = 0
|
|
896
|
+
for msg in self.messages:
|
|
897
|
+
if msg.get("role") == "tool" and msg.get("content"):
|
|
898
|
+
content_size = len(str(msg["content"]))
|
|
899
|
+
if content_size > max_chars:
|
|
900
|
+
original_size = content_size
|
|
901
|
+
msg["content"] = str(msg["content"])[:max_chars] + truncation_message
|
|
902
|
+
pruned_chars += original_size - len(msg["content"])
|
|
903
|
+
return pruned_chars
|
|
904
|
+
|
|
876
905
|
def _perform_auto_compaction(self):
|
|
877
906
|
"""Perform automatic context window compaction.
|
|
878
907
|
|
|
@@ -881,10 +910,32 @@ class PatchPalAgent:
|
|
|
881
910
|
"""
|
|
882
911
|
# Don't compact if we have very few messages - compaction summary
|
|
883
912
|
# could be longer than the messages being removed
|
|
884
|
-
|
|
913
|
+
# Instead, use aggressive pruning since high capacity with few messages
|
|
914
|
+
# indicates large tool outputs rather than conversation depth
|
|
915
|
+
if len(self.messages) < 10:
|
|
885
916
|
print(
|
|
886
|
-
f"\033[2m
|
|
917
|
+
f"\033[2m Only {len(self.messages)} messages - using aggressive pruning instead of summarization\033[0m"
|
|
887
918
|
)
|
|
919
|
+
|
|
920
|
+
# Aggressively truncate all large tool outputs (5K chars)
|
|
921
|
+
pruned_chars = self._prune_tool_outputs_inline(
|
|
922
|
+
max_chars=5_000,
|
|
923
|
+
truncation_message="\n\n[... content truncated during compaction. Use read_lines or grep_code for targeted access ...]",
|
|
924
|
+
)
|
|
925
|
+
|
|
926
|
+
stats_after = self.context_manager.get_usage_stats(self.messages)
|
|
927
|
+
if pruned_chars > 0:
|
|
928
|
+
print(
|
|
929
|
+
f"\033[1;32m✓ Context reduced to {stats_after['usage_percent']}% through aggressive pruning (removed ~{pruned_chars:,} chars)\033[0m\n"
|
|
930
|
+
)
|
|
931
|
+
else:
|
|
932
|
+
print(
|
|
933
|
+
f"\033[1;33m⚠️ No large tool outputs to prune. Context at {stats_after['usage_percent']}%.\033[0m"
|
|
934
|
+
)
|
|
935
|
+
print("\033[1;33m Consider using '/clear' to start fresh.\033[0m\n")
|
|
936
|
+
|
|
937
|
+
# Update tracker to prevent immediate re-compaction
|
|
938
|
+
self._last_compaction_message_count = len(self.messages)
|
|
888
939
|
return
|
|
889
940
|
|
|
890
941
|
# Prevent compaction loops - don't compact again if we just did
|
|
@@ -931,6 +982,43 @@ class PatchPalAgent:
|
|
|
931
982
|
return
|
|
932
983
|
|
|
933
984
|
# Phase 2: Full compaction needed
|
|
985
|
+
# EMERGENCY: If context is at or over capacity (≥100%), do aggressive pruning first
|
|
986
|
+
# Otherwise the summarization request itself will exceed context limits
|
|
987
|
+
stats_after_prune = self.context_manager.get_usage_stats(self.messages)
|
|
988
|
+
if stats_after_prune["usage_ratio"] >= 1.0:
|
|
989
|
+
print(
|
|
990
|
+
f"\033[1;31m ⚠️ Context at or over capacity ({stats_after_prune['usage_percent']}%)!\033[0m"
|
|
991
|
+
)
|
|
992
|
+
print(
|
|
993
|
+
"\033[2m Emergency: Aggressively pruning recent large tool outputs...\033[0m",
|
|
994
|
+
flush=True,
|
|
995
|
+
)
|
|
996
|
+
|
|
997
|
+
# Truncate large tool outputs (10K chars - less aggressive than 5K for few-messages case)
|
|
998
|
+
emergency_pruned = self._prune_tool_outputs_inline(
|
|
999
|
+
max_chars=10_000,
|
|
1000
|
+
truncation_message="\n\n[... content truncated due to context window limits ...]",
|
|
1001
|
+
)
|
|
1002
|
+
|
|
1003
|
+
if emergency_pruned > 0:
|
|
1004
|
+
print(
|
|
1005
|
+
f"\033[2m Emergency pruned ~{emergency_pruned:,} chars from large tool outputs\033[0m",
|
|
1006
|
+
flush=True,
|
|
1007
|
+
)
|
|
1008
|
+
stats_after_emergency = self.context_manager.get_usage_stats(self.messages)
|
|
1009
|
+
print(
|
|
1010
|
+
f"\033[2m Context now at {stats_after_emergency['usage_percent']}% capacity\033[0m",
|
|
1011
|
+
flush=True,
|
|
1012
|
+
)
|
|
1013
|
+
|
|
1014
|
+
# If still over 150%, give up and recommend /clear
|
|
1015
|
+
if stats_after_emergency["usage_ratio"] > 1.5:
|
|
1016
|
+
print(
|
|
1017
|
+
f"\033[1;31m✗ Context still too large for compaction ({stats_after_emergency['usage_percent']}%)\033[0m"
|
|
1018
|
+
)
|
|
1019
|
+
print("\033[1;33m Please use '/clear' to start a fresh session.\033[0m\n")
|
|
1020
|
+
return
|
|
1021
|
+
|
|
934
1022
|
print("\033[2m Generating conversation summary...\033[0m", flush=True)
|
|
935
1023
|
|
|
936
1024
|
try:
|
|
@@ -953,6 +1041,19 @@ class PatchPalAgent:
|
|
|
953
1041
|
self.cumulative_input_tokens += response.usage.prompt_tokens
|
|
954
1042
|
if hasattr(response.usage, "completion_tokens"):
|
|
955
1043
|
self.cumulative_output_tokens += response.usage.completion_tokens
|
|
1044
|
+
# Track cache statistics (Anthropic/Bedrock prompt caching)
|
|
1045
|
+
if (
|
|
1046
|
+
hasattr(response.usage, "cache_creation_input_tokens")
|
|
1047
|
+
and response.usage.cache_creation_input_tokens
|
|
1048
|
+
):
|
|
1049
|
+
self.cumulative_cache_creation_tokens += (
|
|
1050
|
+
response.usage.cache_creation_input_tokens
|
|
1051
|
+
)
|
|
1052
|
+
if (
|
|
1053
|
+
hasattr(response.usage, "cache_read_input_tokens")
|
|
1054
|
+
and response.usage.cache_read_input_tokens
|
|
1055
|
+
):
|
|
1056
|
+
self.cumulative_cache_read_tokens += response.usage.cache_read_input_tokens
|
|
956
1057
|
|
|
957
1058
|
return response
|
|
958
1059
|
|
|
@@ -1029,6 +1130,67 @@ class PatchPalAgent:
|
|
|
1029
1130
|
if self.enable_auto_compact and self.context_manager.needs_compaction(self.messages):
|
|
1030
1131
|
self._perform_auto_compaction()
|
|
1031
1132
|
|
|
1133
|
+
# Agent loop with interrupt handling
|
|
1134
|
+
try:
|
|
1135
|
+
return self._run_agent_loop(max_iterations)
|
|
1136
|
+
except KeyboardInterrupt:
|
|
1137
|
+
# Clean up conversation state if interrupted mid-execution
|
|
1138
|
+
self._cleanup_interrupted_state()
|
|
1139
|
+
raise # Re-raise so CLI can handle it
|
|
1140
|
+
|
|
1141
|
+
def _cleanup_interrupted_state(self):
|
|
1142
|
+
"""Clean up conversation state after KeyboardInterrupt.
|
|
1143
|
+
|
|
1144
|
+
If the last message is an assistant message with tool_calls but no
|
|
1145
|
+
corresponding tool responses, we need to either remove the message
|
|
1146
|
+
or add error responses to maintain valid conversation structure.
|
|
1147
|
+
"""
|
|
1148
|
+
if not self.messages:
|
|
1149
|
+
return
|
|
1150
|
+
|
|
1151
|
+
last_msg = self.messages[-1]
|
|
1152
|
+
|
|
1153
|
+
# Check if last message is assistant with tool_calls
|
|
1154
|
+
if last_msg.get("role") == "assistant" and last_msg.get("tool_calls"):
|
|
1155
|
+
tool_calls = last_msg["tool_calls"]
|
|
1156
|
+
|
|
1157
|
+
# Check if we have tool responses for all tool_calls
|
|
1158
|
+
tool_call_ids = {tc.id for tc in tool_calls}
|
|
1159
|
+
|
|
1160
|
+
# Look for tool responses after this assistant message
|
|
1161
|
+
# (should be immediately following, but scan to be safe)
|
|
1162
|
+
response_ids = set()
|
|
1163
|
+
for msg in self.messages[self.messages.index(last_msg) + 1 :]:
|
|
1164
|
+
if msg.get("role") == "tool":
|
|
1165
|
+
response_ids.add(msg.get("tool_call_id"))
|
|
1166
|
+
|
|
1167
|
+
# If we're missing responses, add error responses for all tool calls
|
|
1168
|
+
if tool_call_ids != response_ids:
|
|
1169
|
+
missing_ids = tool_call_ids - response_ids
|
|
1170
|
+
|
|
1171
|
+
# Add error tool responses for the missing tool calls
|
|
1172
|
+
for tool_call in tool_calls:
|
|
1173
|
+
if tool_call.id in missing_ids:
|
|
1174
|
+
self.messages.append(
|
|
1175
|
+
{
|
|
1176
|
+
"role": "tool",
|
|
1177
|
+
"tool_call_id": tool_call.id,
|
|
1178
|
+
"name": tool_call.function.name,
|
|
1179
|
+
"content": "Error: Operation interrupted by user (Ctrl-C)",
|
|
1180
|
+
}
|
|
1181
|
+
)
|
|
1182
|
+
|
|
1183
|
+
def _run_agent_loop(self, max_iterations: int) -> str:
|
|
1184
|
+
"""Internal method that runs the agent loop.
|
|
1185
|
+
|
|
1186
|
+
Separated from run() to enable proper interrupt handling.
|
|
1187
|
+
|
|
1188
|
+
Args:
|
|
1189
|
+
max_iterations: Maximum number of agent iterations
|
|
1190
|
+
|
|
1191
|
+
Returns:
|
|
1192
|
+
The agent's final response
|
|
1193
|
+
"""
|
|
1032
1194
|
# Agent loop
|
|
1033
1195
|
for iteration in range(max_iterations):
|
|
1034
1196
|
# Show thinking message
|
|
@@ -1042,10 +1204,18 @@ class PatchPalAgent:
|
|
|
1042
1204
|
|
|
1043
1205
|
# Use LiteLLM for all providers
|
|
1044
1206
|
try:
|
|
1207
|
+
# Build tool list (built-in + custom)
|
|
1208
|
+
tools = list(TOOLS)
|
|
1209
|
+
if self.custom_tools:
|
|
1210
|
+
from patchpal.tool_schema import function_to_tool_schema
|
|
1211
|
+
|
|
1212
|
+
for func in self.custom_tools:
|
|
1213
|
+
tools.append(function_to_tool_schema(func))
|
|
1214
|
+
|
|
1045
1215
|
response = litellm.completion(
|
|
1046
1216
|
model=self.model_id,
|
|
1047
1217
|
messages=messages,
|
|
1048
|
-
tools=
|
|
1218
|
+
tools=tools,
|
|
1049
1219
|
tool_choice="auto",
|
|
1050
1220
|
**self.litellm_kwargs,
|
|
1051
1221
|
)
|
|
@@ -1057,6 +1227,19 @@ class PatchPalAgent:
|
|
|
1057
1227
|
self.cumulative_input_tokens += response.usage.prompt_tokens
|
|
1058
1228
|
if hasattr(response.usage, "completion_tokens"):
|
|
1059
1229
|
self.cumulative_output_tokens += response.usage.completion_tokens
|
|
1230
|
+
# Track cache statistics (Anthropic/Bedrock prompt caching)
|
|
1231
|
+
if (
|
|
1232
|
+
hasattr(response.usage, "cache_creation_input_tokens")
|
|
1233
|
+
and response.usage.cache_creation_input_tokens
|
|
1234
|
+
):
|
|
1235
|
+
self.cumulative_cache_creation_tokens += (
|
|
1236
|
+
response.usage.cache_creation_input_tokens
|
|
1237
|
+
)
|
|
1238
|
+
if (
|
|
1239
|
+
hasattr(response.usage, "cache_read_input_tokens")
|
|
1240
|
+
and response.usage.cache_read_input_tokens
|
|
1241
|
+
):
|
|
1242
|
+
self.cumulative_cache_read_tokens += response.usage.cache_read_input_tokens
|
|
1060
1243
|
|
|
1061
1244
|
except Exception as e:
|
|
1062
1245
|
return f"Error calling model: {e}"
|
|
@@ -1099,15 +1282,25 @@ class PatchPalAgent:
|
|
|
1099
1282
|
tool_result = f"Error: Invalid JSON arguments for {tool_name}"
|
|
1100
1283
|
print(f"\033[1;31m✗ {tool_name}: Invalid arguments\033[0m")
|
|
1101
1284
|
else:
|
|
1102
|
-
# Get the tool function
|
|
1103
|
-
tool_func =
|
|
1285
|
+
# Get the tool function (check custom tools first, then built-in)
|
|
1286
|
+
tool_func = self.custom_tool_funcs.get(tool_name) or TOOL_FUNCTIONS.get(
|
|
1287
|
+
tool_name
|
|
1288
|
+
)
|
|
1104
1289
|
if tool_func is None:
|
|
1105
1290
|
tool_result = f"Error: Unknown tool {tool_name}"
|
|
1106
1291
|
print(f"\033[1;31m✗ Unknown tool: {tool_name}\033[0m")
|
|
1107
1292
|
else:
|
|
1108
1293
|
# Show tool call message
|
|
1109
|
-
|
|
1110
|
-
|
|
1294
|
+
if tool_name in self.custom_tool_funcs:
|
|
1295
|
+
# Custom tool - show generic message with args
|
|
1296
|
+
args_preview = str(tool_args)[:60]
|
|
1297
|
+
if len(str(tool_args)) > 60:
|
|
1298
|
+
args_preview += "..."
|
|
1299
|
+
print(
|
|
1300
|
+
f"\033[2m🔧 {tool_name}({args_preview})\033[0m",
|
|
1301
|
+
flush=True,
|
|
1302
|
+
)
|
|
1303
|
+
elif tool_name == "read_file":
|
|
1111
1304
|
print(
|
|
1112
1305
|
f"\033[2m📖 Reading: {tool_args.get('path', '')}\033[0m",
|
|
1113
1306
|
flush=True,
|
|
@@ -1250,15 +1443,43 @@ class PatchPalAgent:
|
|
|
1250
1443
|
tool_result = tool_func(**filtered_args)
|
|
1251
1444
|
except Exception as e:
|
|
1252
1445
|
tool_result = f"Error executing {tool_name}: {e}"
|
|
1253
|
-
print(f"\033[1;31m✗ {
|
|
1446
|
+
print(f"\033[1;31m✗ {tool_name}: {e}\033[0m")
|
|
1254
1447
|
|
|
1255
1448
|
# Add tool result to messages
|
|
1449
|
+
# Check if result is extremely large and might blow context
|
|
1450
|
+
result_str = str(tool_result)
|
|
1451
|
+
result_size = len(result_str)
|
|
1452
|
+
|
|
1453
|
+
# Warn if result is > 100K chars (~33K tokens)
|
|
1454
|
+
if result_size > 100_000:
|
|
1455
|
+
print(
|
|
1456
|
+
f"\033[1;33m⚠️ Large tool output: {result_size:,} chars (~{result_size // 3:,} tokens)\033[0m"
|
|
1457
|
+
)
|
|
1458
|
+
|
|
1459
|
+
# If result would push us WAY over capacity, truncate it
|
|
1460
|
+
current_stats = self.context_manager.get_usage_stats(self.messages)
|
|
1461
|
+
# Estimate tokens in this result
|
|
1462
|
+
result_tokens = self.context_manager.estimator.estimate_tokens(result_str)
|
|
1463
|
+
projected_ratio = (
|
|
1464
|
+
current_stats["total_tokens"] + result_tokens
|
|
1465
|
+
) / current_stats["context_limit"]
|
|
1466
|
+
|
|
1467
|
+
if projected_ratio > 1.5: # Would exceed 150% capacity
|
|
1468
|
+
print(
|
|
1469
|
+
"\033[1;31m⚠️ Tool output would exceed context capacity! Truncating...\033[0m"
|
|
1470
|
+
)
|
|
1471
|
+
# Keep first 50K chars
|
|
1472
|
+
result_str = (
|
|
1473
|
+
result_str[:50_000]
|
|
1474
|
+
+ "\n\n[... Output truncated to prevent context window overflow. Use read_lines or grep_code for targeted access ...]"
|
|
1475
|
+
)
|
|
1476
|
+
|
|
1256
1477
|
self.messages.append(
|
|
1257
1478
|
{
|
|
1258
1479
|
"role": "tool",
|
|
1259
1480
|
"tool_call_id": tool_call.id,
|
|
1260
1481
|
"name": tool_name,
|
|
1261
|
-
"content":
|
|
1482
|
+
"content": result_str,
|
|
1262
1483
|
}
|
|
1263
1484
|
)
|
|
1264
1485
|
|
|
@@ -1299,18 +1520,33 @@ class PatchPalAgent:
|
|
|
1299
1520
|
)
|
|
1300
1521
|
|
|
1301
1522
|
|
|
1302
|
-
def create_agent(model_id: str = "anthropic/claude-sonnet-4-5") -> PatchPalAgent:
|
|
1523
|
+
def create_agent(model_id: str = "anthropic/claude-sonnet-4-5", custom_tools=None) -> PatchPalAgent:
|
|
1303
1524
|
"""Create and return a PatchPal agent.
|
|
1304
1525
|
|
|
1305
1526
|
Args:
|
|
1306
1527
|
model_id: LiteLLM model identifier (default: anthropic/claude-sonnet-4-5)
|
|
1528
|
+
custom_tools: Optional list of Python functions to use as custom tools.
|
|
1529
|
+
Each function should have type hints and a docstring.
|
|
1307
1530
|
|
|
1308
1531
|
Returns:
|
|
1309
1532
|
A configured PatchPalAgent instance
|
|
1533
|
+
|
|
1534
|
+
Example:
|
|
1535
|
+
def calculator(x: int, y: int) -> str:
|
|
1536
|
+
'''Add two numbers.
|
|
1537
|
+
|
|
1538
|
+
Args:
|
|
1539
|
+
x: First number
|
|
1540
|
+
y: Second number
|
|
1541
|
+
'''
|
|
1542
|
+
return str(x + y)
|
|
1543
|
+
|
|
1544
|
+
agent = create_agent(custom_tools=[calculator])
|
|
1545
|
+
response = agent.run("What's 5 + 3?")
|
|
1310
1546
|
"""
|
|
1311
1547
|
# Reset session todos for new session
|
|
1312
1548
|
from patchpal.tools import reset_session_todos
|
|
1313
1549
|
|
|
1314
1550
|
reset_session_todos()
|
|
1315
1551
|
|
|
1316
|
-
return PatchPalAgent(model_id=model_id)
|
|
1552
|
+
return PatchPalAgent(model_id=model_id, custom_tools=custom_tools)
|
patchpal/cli.py
CHANGED
|
@@ -211,9 +211,26 @@ Supported models: Any LiteLLM-supported model
|
|
|
211
211
|
# Determine model to use (priority: CLI arg > env var > default)
|
|
212
212
|
model_id = args.model or os.getenv("PATCHPAL_MODEL") or "anthropic/claude-sonnet-4-5"
|
|
213
213
|
|
|
214
|
-
#
|
|
214
|
+
# Discover custom tools from ~/.patchpal/tools/
|
|
215
|
+
from patchpal.tool_schema import discover_tools, list_custom_tools
|
|
216
|
+
|
|
217
|
+
custom_tools = discover_tools()
|
|
218
|
+
|
|
219
|
+
# Show custom tools info if any were loaded
|
|
220
|
+
custom_tool_info = list_custom_tools()
|
|
221
|
+
if custom_tool_info:
|
|
222
|
+
tool_names = [name for name, _, _ in custom_tool_info]
|
|
223
|
+
tools_str = ", ".join(tool_names)
|
|
224
|
+
# Store for later display (after model info)
|
|
225
|
+
custom_tools_message = (
|
|
226
|
+
f"\033[1;36m🔧 Loaded {len(custom_tool_info)} custom tool(s): {tools_str}\033[0m"
|
|
227
|
+
)
|
|
228
|
+
else:
|
|
229
|
+
custom_tools_message = None
|
|
230
|
+
|
|
231
|
+
# Create the agent with the specified model and custom tools
|
|
215
232
|
# LiteLLM will handle API key validation and provide appropriate error messages
|
|
216
|
-
agent = create_agent(model_id=model_id)
|
|
233
|
+
agent = create_agent(model_id=model_id, custom_tools=custom_tools)
|
|
217
234
|
|
|
218
235
|
# Get max iterations from environment variable or use default
|
|
219
236
|
max_iterations = int(os.getenv("PATCHPAL_MAX_ITERATIONS", "100"))
|
|
@@ -238,6 +255,10 @@ Supported models: Any LiteLLM-supported model
|
|
|
238
255
|
print("=" * 80)
|
|
239
256
|
print(f"\nUsing model: {model_id}")
|
|
240
257
|
|
|
258
|
+
# Show custom tools info if any were loaded
|
|
259
|
+
if custom_tools_message:
|
|
260
|
+
print(custom_tools_message)
|
|
261
|
+
|
|
241
262
|
# Show require-permission-for-all indicator if active
|
|
242
263
|
if args.require_permission_for_all:
|
|
243
264
|
print("\033[1;33m🔒 Permission required for ALL operations (including reads)\033[0m")
|
|
@@ -380,6 +401,55 @@ Supported models: Any LiteLLM-supported model
|
|
|
380
401
|
total_tokens = agent.cumulative_input_tokens + agent.cumulative_output_tokens
|
|
381
402
|
print(f" Total tokens: {total_tokens:,}")
|
|
382
403
|
|
|
404
|
+
# Show cache statistics if available (Anthropic/Bedrock prompt caching)
|
|
405
|
+
has_cache_stats = (
|
|
406
|
+
agent.cumulative_cache_creation_tokens > 0
|
|
407
|
+
or agent.cumulative_cache_read_tokens > 0
|
|
408
|
+
)
|
|
409
|
+
if has_cache_stats:
|
|
410
|
+
print("\n \033[1;36mPrompt Caching Statistics\033[0m")
|
|
411
|
+
print(f" Cache write tokens: {agent.cumulative_cache_creation_tokens:,}")
|
|
412
|
+
print(f" Cache read tokens: {agent.cumulative_cache_read_tokens:,}")
|
|
413
|
+
|
|
414
|
+
# Calculate cache hit rate
|
|
415
|
+
if agent.cumulative_input_tokens > 0:
|
|
416
|
+
cache_hit_rate = (
|
|
417
|
+
agent.cumulative_cache_read_tokens / agent.cumulative_input_tokens
|
|
418
|
+
) * 100
|
|
419
|
+
print(f" Cache hit rate: {cache_hit_rate:.1f}%")
|
|
420
|
+
|
|
421
|
+
# Show cost-adjusted input tokens (cache reads cost less)
|
|
422
|
+
# Note: This is an approximation - actual pricing varies by model
|
|
423
|
+
# For Anthropic: cache writes = 1.25x, cache reads = 0.1x, regular = 1x
|
|
424
|
+
if "anthropic" in model_id.lower() or "claude" in model_id.lower():
|
|
425
|
+
# Break down: cumulative_input = non_cached + cache_read + cache_write
|
|
426
|
+
non_cached_tokens = (
|
|
427
|
+
agent.cumulative_input_tokens
|
|
428
|
+
- agent.cumulative_cache_read_tokens
|
|
429
|
+
- agent.cumulative_cache_creation_tokens
|
|
430
|
+
)
|
|
431
|
+
# Approximate cost-equivalent tokens (cache reads cost 10%, cache writes cost 125%)
|
|
432
|
+
cost_adjusted = (
|
|
433
|
+
non_cached_tokens
|
|
434
|
+
+ (agent.cumulative_cache_read_tokens * 0.1)
|
|
435
|
+
+ (agent.cumulative_cache_creation_tokens * 1.25)
|
|
436
|
+
)
|
|
437
|
+
savings_pct = (
|
|
438
|
+
(
|
|
439
|
+
(agent.cumulative_input_tokens - cost_adjusted)
|
|
440
|
+
/ agent.cumulative_input_tokens
|
|
441
|
+
* 100
|
|
442
|
+
)
|
|
443
|
+
if agent.cumulative_input_tokens > 0
|
|
444
|
+
else 0
|
|
445
|
+
)
|
|
446
|
+
print(
|
|
447
|
+
f" Cost-adjusted input tokens: {cost_adjusted:,.0f} (~{savings_pct:.0f}% savings)"
|
|
448
|
+
)
|
|
449
|
+
print(
|
|
450
|
+
" \033[2m(Cache reads cost 10% of base price, writes cost 125% of base price)\033[0m"
|
|
451
|
+
)
|
|
452
|
+
|
|
383
453
|
print("=" * 70 + "\n")
|
|
384
454
|
continue
|
|
385
455
|
|