holmesgpt 0.14.2__py3-none-any.whl → 0.14.3a0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of holmesgpt might be problematic. Click here for more details.
- holmes/__init__.py +1 -1
- holmes/common/env_vars.py +6 -0
- holmes/config.py +3 -6
- holmes/core/conversations.py +12 -2
- holmes/core/feedback.py +191 -0
- holmes/core/llm.py +16 -12
- holmes/core/models.py +101 -1
- holmes/core/supabase_dal.py +23 -9
- holmes/core/tool_calling_llm.py +197 -15
- holmes/core/tools.py +20 -7
- holmes/core/tools_utils/token_counting.py +13 -0
- holmes/core/tools_utils/tool_context_window_limiter.py +45 -23
- holmes/core/tools_utils/tool_executor.py +11 -6
- holmes/core/toolset_manager.py +5 -1
- holmes/core/truncation/dal_truncation_utils.py +23 -0
- holmes/interactive.py +146 -14
- holmes/plugins/prompts/_fetch_logs.jinja2 +3 -0
- holmes/plugins/runbooks/__init__.py +6 -1
- holmes/plugins/toolsets/__init__.py +11 -4
- holmes/plugins/toolsets/atlas_mongodb/mongodb_atlas.py +9 -20
- holmes/plugins/toolsets/azure_sql/tools/analyze_connection_failures.py +2 -3
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_connections.py +2 -3
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_health_status.py +6 -4
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_performance.py +6 -4
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_storage.py +2 -3
- holmes/plugins/toolsets/azure_sql/tools/get_active_alerts.py +6 -4
- holmes/plugins/toolsets/azure_sql/tools/get_slow_queries.py +2 -3
- holmes/plugins/toolsets/azure_sql/tools/get_top_cpu_queries.py +2 -3
- holmes/plugins/toolsets/azure_sql/tools/get_top_data_io_queries.py +2 -3
- holmes/plugins/toolsets/azure_sql/tools/get_top_log_io_queries.py +2 -3
- holmes/plugins/toolsets/bash/bash_toolset.py +4 -7
- holmes/plugins/toolsets/cilium.yaml +284 -0
- holmes/plugins/toolsets/datadog/toolset_datadog_general.py +5 -10
- holmes/plugins/toolsets/datadog/toolset_datadog_logs.py +1 -1
- holmes/plugins/toolsets/datadog/toolset_datadog_metrics.py +6 -13
- holmes/plugins/toolsets/datadog/toolset_datadog_rds.py +3 -6
- holmes/plugins/toolsets/datadog/toolset_datadog_traces.py +4 -9
- holmes/plugins/toolsets/git.py +14 -12
- holmes/plugins/toolsets/grafana/grafana_tempo_api.py +23 -42
- holmes/plugins/toolsets/grafana/toolset_grafana.py +2 -3
- holmes/plugins/toolsets/grafana/toolset_grafana_tempo.py +18 -36
- holmes/plugins/toolsets/internet/internet.py +2 -3
- holmes/plugins/toolsets/internet/notion.py +2 -3
- holmes/plugins/toolsets/investigator/core_investigation.py +7 -9
- holmes/plugins/toolsets/kafka.py +7 -18
- holmes/plugins/toolsets/logging_utils/logging_api.py +79 -3
- holmes/plugins/toolsets/mcp/toolset_mcp.py +2 -3
- holmes/plugins/toolsets/newrelic/__init__.py +0 -0
- holmes/plugins/toolsets/newrelic/new_relic_api.py +125 -0
- holmes/plugins/toolsets/newrelic/newrelic.jinja2 +41 -0
- holmes/plugins/toolsets/newrelic/newrelic.py +211 -0
- holmes/plugins/toolsets/opensearch/opensearch.py +5 -12
- holmes/plugins/toolsets/opensearch/opensearch_traces.py +3 -6
- holmes/plugins/toolsets/prometheus/prometheus.py +131 -97
- holmes/plugins/toolsets/rabbitmq/toolset_rabbitmq.py +3 -6
- holmes/plugins/toolsets/robusta/robusta.py +4 -9
- holmes/plugins/toolsets/runbook/runbook_fetcher.py +93 -13
- holmes/plugins/toolsets/servicenow/servicenow.py +5 -10
- holmes/utils/sentry_helper.py +1 -1
- holmes/utils/stream.py +22 -7
- holmes/version.py +34 -14
- {holmesgpt-0.14.2.dist-info → holmesgpt-0.14.3a0.dist-info}/METADATA +6 -8
- {holmesgpt-0.14.2.dist-info → holmesgpt-0.14.3a0.dist-info}/RECORD +66 -60
- holmes/core/tools_utils/data_types.py +0 -81
- holmes/plugins/toolsets/newrelic.py +0 -231
- {holmesgpt-0.14.2.dist-info → holmesgpt-0.14.3a0.dist-info}/LICENSE.txt +0 -0
- {holmesgpt-0.14.2.dist-info → holmesgpt-0.14.3a0.dist-info}/WHEEL +0 -0
- {holmesgpt-0.14.2.dist-info → holmesgpt-0.14.3a0.dist-info}/entry_points.txt +0 -0
holmes/core/tool_calling_llm.py
CHANGED
|
@@ -4,6 +4,13 @@ import logging
|
|
|
4
4
|
import textwrap
|
|
5
5
|
from typing import Dict, List, Optional, Type, Union, Callable, Any
|
|
6
6
|
|
|
7
|
+
from holmes.core.models import (
|
|
8
|
+
ToolApprovalDecision,
|
|
9
|
+
ToolCallResult,
|
|
10
|
+
TruncationResult,
|
|
11
|
+
TruncationMetadata,
|
|
12
|
+
PendingToolApproval,
|
|
13
|
+
)
|
|
7
14
|
|
|
8
15
|
import sentry_sdk
|
|
9
16
|
from openai import BadRequestError
|
|
@@ -32,8 +39,13 @@ from holmes.core.performance_timing import PerformanceTiming
|
|
|
32
39
|
from holmes.core.resource_instruction import ResourceInstructions
|
|
33
40
|
from holmes.core.runbooks import RunbookManager
|
|
34
41
|
from holmes.core.safeguards import prevent_overly_repeated_tool_call
|
|
35
|
-
from holmes.core.tools import
|
|
42
|
+
from holmes.core.tools import (
|
|
43
|
+
StructuredToolResult,
|
|
44
|
+
StructuredToolResultStatus,
|
|
45
|
+
ToolInvokeContext,
|
|
46
|
+
)
|
|
36
47
|
from holmes.core.tools_utils.tool_context_window_limiter import (
|
|
48
|
+
get_max_token_count_for_single_tool,
|
|
37
49
|
prevent_overly_big_tool_response,
|
|
38
50
|
)
|
|
39
51
|
from holmes.plugins.prompts import load_and_render_prompt
|
|
@@ -44,11 +56,6 @@ from holmes.utils.global_instructions import (
|
|
|
44
56
|
)
|
|
45
57
|
from holmes.utils.tags import format_tags_in_string, parse_messages_tags
|
|
46
58
|
from holmes.core.tools_utils.tool_executor import ToolExecutor
|
|
47
|
-
from holmes.core.tools_utils.data_types import (
|
|
48
|
-
TruncationResult,
|
|
49
|
-
ToolCallResult,
|
|
50
|
-
TruncationMetadata,
|
|
51
|
-
)
|
|
52
59
|
from holmes.core.tracing import DummySpan
|
|
53
60
|
from holmes.utils.colors import AI_COLOR
|
|
54
61
|
from holmes.utils.stream import StreamEvents, StreamMessage
|
|
@@ -264,6 +271,99 @@ class ToolCallingLLM:
|
|
|
264
271
|
Callable[[StructuredToolResult], tuple[bool, Optional[str]]]
|
|
265
272
|
] = None
|
|
266
273
|
|
|
274
|
+
def process_tool_decisions(
|
|
275
|
+
self, messages: List[Dict[str, Any]], tool_decisions: List[ToolApprovalDecision]
|
|
276
|
+
) -> List[Dict[str, Any]]:
|
|
277
|
+
"""
|
|
278
|
+
Process tool approval decisions and execute approved tools.
|
|
279
|
+
|
|
280
|
+
Args:
|
|
281
|
+
messages: Current conversation messages
|
|
282
|
+
tool_decisions: List of ToolApprovalDecision objects
|
|
283
|
+
|
|
284
|
+
Returns:
|
|
285
|
+
Updated messages list with tool execution results
|
|
286
|
+
"""
|
|
287
|
+
# Import here to avoid circular imports
|
|
288
|
+
|
|
289
|
+
# Find the last message with pending approvals
|
|
290
|
+
pending_message_idx = None
|
|
291
|
+
pending_tool_calls = None
|
|
292
|
+
|
|
293
|
+
for i in reversed(range(len(messages))):
|
|
294
|
+
msg = messages[i]
|
|
295
|
+
if msg.get("role") == "assistant" and msg.get("pending_approval"):
|
|
296
|
+
pending_message_idx = i
|
|
297
|
+
pending_tool_calls = msg.get("tool_calls", [])
|
|
298
|
+
break
|
|
299
|
+
|
|
300
|
+
if pending_message_idx is None or not pending_tool_calls:
|
|
301
|
+
# No pending approvals found
|
|
302
|
+
if tool_decisions:
|
|
303
|
+
logging.warning(
|
|
304
|
+
f"Received {len(tool_decisions)} tool decisions but no pending approvals found"
|
|
305
|
+
)
|
|
306
|
+
return messages
|
|
307
|
+
|
|
308
|
+
# Create decision lookup
|
|
309
|
+
decisions_by_id = {
|
|
310
|
+
decision.tool_call_id: decision for decision in tool_decisions
|
|
311
|
+
}
|
|
312
|
+
|
|
313
|
+
# Validate that all decisions have corresponding pending tool calls
|
|
314
|
+
pending_tool_ids = {tool_call["id"] for tool_call in pending_tool_calls}
|
|
315
|
+
invalid_decisions = [
|
|
316
|
+
decision.tool_call_id
|
|
317
|
+
for decision in tool_decisions
|
|
318
|
+
if decision.tool_call_id not in pending_tool_ids
|
|
319
|
+
]
|
|
320
|
+
|
|
321
|
+
if invalid_decisions:
|
|
322
|
+
logging.warning(
|
|
323
|
+
f"Received decisions for non-pending tool calls: {invalid_decisions}"
|
|
324
|
+
)
|
|
325
|
+
|
|
326
|
+
# Process each tool call
|
|
327
|
+
for tool_call in pending_tool_calls:
|
|
328
|
+
tool_call_id = tool_call["id"]
|
|
329
|
+
decision = decisions_by_id.get(tool_call_id)
|
|
330
|
+
|
|
331
|
+
if decision and decision.approved:
|
|
332
|
+
try:
|
|
333
|
+
tool_call_obj = ChatCompletionMessageToolCall(**tool_call)
|
|
334
|
+
llm_tool_result = self._invoke_llm_tool_call(
|
|
335
|
+
tool_to_call=tool_call_obj,
|
|
336
|
+
previous_tool_calls=[],
|
|
337
|
+
trace_span=DummySpan(),
|
|
338
|
+
tool_number=None,
|
|
339
|
+
)
|
|
340
|
+
messages.append(llm_tool_result.as_tool_call_message())
|
|
341
|
+
|
|
342
|
+
except Exception as e:
|
|
343
|
+
logging.error(
|
|
344
|
+
f"Failed to execute approved tool {tool_call_id}: {e}"
|
|
345
|
+
)
|
|
346
|
+
messages.append(
|
|
347
|
+
{
|
|
348
|
+
"tool_call_id": tool_call_id,
|
|
349
|
+
"role": "tool",
|
|
350
|
+
"name": tool_call["function"]["name"],
|
|
351
|
+
"content": f"Tool execution failed: {str(e)}",
|
|
352
|
+
}
|
|
353
|
+
)
|
|
354
|
+
else:
|
|
355
|
+
# Tool was rejected or no decision found, add rejection message
|
|
356
|
+
messages.append(
|
|
357
|
+
{
|
|
358
|
+
"tool_call_id": tool_call_id,
|
|
359
|
+
"role": "tool",
|
|
360
|
+
"name": tool_call["function"]["name"],
|
|
361
|
+
"content": "Tool execution was denied by the user.",
|
|
362
|
+
}
|
|
363
|
+
)
|
|
364
|
+
|
|
365
|
+
return messages
|
|
366
|
+
|
|
267
367
|
def prompt_call(
|
|
268
368
|
self,
|
|
269
369
|
system_prompt: str,
|
|
@@ -527,9 +627,13 @@ class ToolCallingLLM:
|
|
|
527
627
|
)
|
|
528
628
|
|
|
529
629
|
try:
|
|
530
|
-
|
|
531
|
-
|
|
630
|
+
invoke_context = ToolInvokeContext(
|
|
631
|
+
tool_number=tool_number,
|
|
632
|
+
user_approved=user_approved,
|
|
633
|
+
llm=self.llm,
|
|
634
|
+
max_token_count=get_max_token_count_for_single_tool(self.llm),
|
|
532
635
|
)
|
|
636
|
+
tool_response = tool.invoke(tool_params, context=invoke_context)
|
|
533
637
|
except Exception as e:
|
|
534
638
|
logging.error(
|
|
535
639
|
f"Tool call to {tool_name} failed with an Exception", exc_info=True
|
|
@@ -587,7 +691,9 @@ class ToolCallingLLM:
|
|
|
587
691
|
return ToolCallResult(
|
|
588
692
|
tool_call_id=tool_call_id,
|
|
589
693
|
tool_name=tool_name,
|
|
590
|
-
description=tool.get_parameterized_one_liner(tool_params)
|
|
694
|
+
description=str(tool.get_parameterized_one_liner(tool_params))
|
|
695
|
+
if tool
|
|
696
|
+
else "",
|
|
591
697
|
result=tool_response,
|
|
592
698
|
)
|
|
593
699
|
|
|
@@ -765,12 +871,13 @@ class ToolCallingLLM:
|
|
|
765
871
|
response_format: Optional[Union[dict, Type[BaseModel]]] = None,
|
|
766
872
|
sections: Optional[InputSectionsDataType] = None,
|
|
767
873
|
msgs: Optional[list[dict]] = None,
|
|
874
|
+
enable_tool_approval: bool = False,
|
|
768
875
|
):
|
|
769
876
|
"""
|
|
770
877
|
This function DOES NOT call llm.completion(stream=true).
|
|
771
878
|
This function streams holmes one iteration at a time instead of waiting for all iterations to complete.
|
|
772
879
|
"""
|
|
773
|
-
messages = []
|
|
880
|
+
messages: list[dict] = []
|
|
774
881
|
if system_prompt:
|
|
775
882
|
messages.append({"role": "system", "content": system_prompt})
|
|
776
883
|
if user_prompt:
|
|
@@ -890,6 +997,11 @@ class ToolCallingLLM:
|
|
|
890
997
|
)
|
|
891
998
|
|
|
892
999
|
perf_timing.measure("pre-tool-calls")
|
|
1000
|
+
|
|
1001
|
+
# Check if any tools require approval first
|
|
1002
|
+
pending_approvals = []
|
|
1003
|
+
approval_required_tools = []
|
|
1004
|
+
|
|
893
1005
|
with concurrent.futures.ThreadPoolExecutor(max_workers=16) as executor:
|
|
894
1006
|
futures = []
|
|
895
1007
|
for tool_index, t in enumerate(tools_to_call, 1): # type: ignore
|
|
@@ -909,15 +1021,85 @@ class ToolCallingLLM:
|
|
|
909
1021
|
|
|
910
1022
|
for future in concurrent.futures.as_completed(futures):
|
|
911
1023
|
tool_call_result: ToolCallResult = future.result()
|
|
912
|
-
tool_calls.append(tool_call_result.as_tool_result_response())
|
|
913
|
-
messages.append(tool_call_result.as_tool_call_message())
|
|
914
1024
|
|
|
915
|
-
|
|
1025
|
+
if (
|
|
1026
|
+
tool_call_result.result.status
|
|
1027
|
+
== StructuredToolResultStatus.APPROVAL_REQUIRED
|
|
1028
|
+
):
|
|
1029
|
+
if enable_tool_approval:
|
|
1030
|
+
pending_approvals.append(
|
|
1031
|
+
PendingToolApproval(
|
|
1032
|
+
tool_call_id=tool_call_result.tool_call_id,
|
|
1033
|
+
tool_name=tool_call_result.tool_name,
|
|
1034
|
+
description=tool_call_result.description,
|
|
1035
|
+
params=tool_call_result.result.params or {},
|
|
1036
|
+
)
|
|
1037
|
+
)
|
|
1038
|
+
approval_required_tools.append(tool_call_result)
|
|
1039
|
+
|
|
1040
|
+
yield StreamMessage(
|
|
1041
|
+
event=StreamEvents.TOOL_RESULT,
|
|
1042
|
+
data=tool_call_result.as_streaming_tool_result_response(),
|
|
1043
|
+
)
|
|
1044
|
+
else:
|
|
1045
|
+
tool_call_result.result.status = (
|
|
1046
|
+
StructuredToolResultStatus.ERROR
|
|
1047
|
+
)
|
|
1048
|
+
tool_call_result.result.error = f"Tool call rejected for security reasons: {tool_call_result.result.error}"
|
|
1049
|
+
|
|
1050
|
+
tool_calls.append(
|
|
1051
|
+
tool_call_result.as_tool_result_response()
|
|
1052
|
+
)
|
|
1053
|
+
messages.append(tool_call_result.as_tool_call_message())
|
|
916
1054
|
|
|
1055
|
+
yield StreamMessage(
|
|
1056
|
+
event=StreamEvents.TOOL_RESULT,
|
|
1057
|
+
data=tool_call_result.as_streaming_tool_result_response(),
|
|
1058
|
+
)
|
|
1059
|
+
|
|
1060
|
+
else:
|
|
1061
|
+
tool_calls.append(tool_call_result.as_tool_result_response())
|
|
1062
|
+
messages.append(tool_call_result.as_tool_call_message())
|
|
1063
|
+
|
|
1064
|
+
yield StreamMessage(
|
|
1065
|
+
event=StreamEvents.TOOL_RESULT,
|
|
1066
|
+
data=tool_call_result.as_streaming_tool_result_response(),
|
|
1067
|
+
)
|
|
1068
|
+
|
|
1069
|
+
# If we have approval required tools, end the stream with pending approvals
|
|
1070
|
+
if pending_approvals:
|
|
1071
|
+
# Add assistant message with pending tool calls
|
|
1072
|
+
assistant_msg = {
|
|
1073
|
+
"role": "assistant",
|
|
1074
|
+
"content": response_message.content,
|
|
1075
|
+
"tool_calls": [
|
|
1076
|
+
{
|
|
1077
|
+
"id": result.tool_call_id,
|
|
1078
|
+
"type": "function",
|
|
1079
|
+
"function": {
|
|
1080
|
+
"name": result.tool_name,
|
|
1081
|
+
"arguments": json.dumps(result.result.params or {}),
|
|
1082
|
+
},
|
|
1083
|
+
}
|
|
1084
|
+
for result in approval_required_tools
|
|
1085
|
+
],
|
|
1086
|
+
"pending_approval": True,
|
|
1087
|
+
}
|
|
1088
|
+
messages.append(assistant_msg)
|
|
1089
|
+
|
|
1090
|
+
# End stream with approvals required
|
|
917
1091
|
yield StreamMessage(
|
|
918
|
-
event=StreamEvents.
|
|
919
|
-
data=
|
|
1092
|
+
event=StreamEvents.APPROVAL_REQUIRED,
|
|
1093
|
+
data={
|
|
1094
|
+
"content": None,
|
|
1095
|
+
"messages": messages,
|
|
1096
|
+
"pending_approvals": [
|
|
1097
|
+
approval.model_dump() for approval in pending_approvals
|
|
1098
|
+
],
|
|
1099
|
+
"requires_approval": True,
|
|
1100
|
+
},
|
|
920
1101
|
)
|
|
1102
|
+
return
|
|
921
1103
|
|
|
922
1104
|
# Update the tool number offset for the next iteration
|
|
923
1105
|
tool_number_offset += len(tools_to_call)
|
holmes/core/tools.py
CHANGED
|
@@ -31,6 +31,7 @@ from pydantic import (
|
|
|
31
31
|
)
|
|
32
32
|
from rich.console import Console
|
|
33
33
|
|
|
34
|
+
from holmes.core.llm import LLM
|
|
34
35
|
from holmes.core.openai_formatting import format_tool_to_open_ai_standard
|
|
35
36
|
from holmes.plugins.prompts import load_and_render_prompt
|
|
36
37
|
from holmes.core.transformers import (
|
|
@@ -159,6 +160,15 @@ class ToolParameter(BaseModel):
|
|
|
159
160
|
items: Optional["ToolParameter"] = None # For array item schemas
|
|
160
161
|
|
|
161
162
|
|
|
163
|
+
class ToolInvokeContext(BaseModel):
|
|
164
|
+
model_config = ConfigDict(arbitrary_types_allowed=True)
|
|
165
|
+
|
|
166
|
+
tool_number: Optional[int] = None
|
|
167
|
+
user_approved: bool = False
|
|
168
|
+
llm: LLM
|
|
169
|
+
max_token_count: int
|
|
170
|
+
|
|
171
|
+
|
|
162
172
|
class Tool(ABC, BaseModel):
|
|
163
173
|
name: str
|
|
164
174
|
description: str
|
|
@@ -225,15 +235,14 @@ class Tool(ABC, BaseModel):
|
|
|
225
235
|
def invoke(
|
|
226
236
|
self,
|
|
227
237
|
params: Dict,
|
|
228
|
-
|
|
229
|
-
user_approved: bool = False,
|
|
238
|
+
context: ToolInvokeContext,
|
|
230
239
|
) -> StructuredToolResult:
|
|
231
|
-
tool_number_str = f"#{tool_number} " if tool_number else ""
|
|
240
|
+
tool_number_str = f"#{context.tool_number} " if context.tool_number else ""
|
|
232
241
|
logger.info(
|
|
233
242
|
f"Running tool {tool_number_str}[bold]{self.name}[/bold]: {self.get_parameterized_one_liner(params)}"
|
|
234
243
|
)
|
|
235
244
|
start_time = time.time()
|
|
236
|
-
result = self._invoke(params=params,
|
|
245
|
+
result = self._invoke(params=params, context=context)
|
|
237
246
|
result.icon_url = self.icon_url
|
|
238
247
|
|
|
239
248
|
# Apply transformers to the result
|
|
@@ -244,7 +253,7 @@ class Tool(ABC, BaseModel):
|
|
|
244
253
|
if hasattr(transformed_result, "get_stringified_data")
|
|
245
254
|
else str(transformed_result)
|
|
246
255
|
)
|
|
247
|
-
show_hint = f"/show {tool_number}" if tool_number else "/show"
|
|
256
|
+
show_hint = f"/show {context.tool_number}" if context.tool_number else "/show"
|
|
248
257
|
line_count = output_str.count("\n") + 1 if output_str else 0
|
|
249
258
|
logger.info(
|
|
250
259
|
f" [dim]Finished {tool_number_str}in {elapsed:.2f}s, output length: {len(output_str):,} characters ({line_count:,} lines) - {show_hint} to view contents[/dim]"
|
|
@@ -340,7 +349,9 @@ class Tool(ABC, BaseModel):
|
|
|
340
349
|
|
|
341
350
|
@abstractmethod
|
|
342
351
|
def _invoke(
|
|
343
|
-
self,
|
|
352
|
+
self,
|
|
353
|
+
params: dict,
|
|
354
|
+
context: ToolInvokeContext,
|
|
344
355
|
) -> StructuredToolResult:
|
|
345
356
|
"""
|
|
346
357
|
params: the tool params
|
|
@@ -400,7 +411,9 @@ class YAMLTool(Tool, BaseModel):
|
|
|
400
411
|
return StructuredToolResultStatus.SUCCESS
|
|
401
412
|
|
|
402
413
|
def _invoke(
|
|
403
|
-
self,
|
|
414
|
+
self,
|
|
415
|
+
params: dict,
|
|
416
|
+
context: ToolInvokeContext,
|
|
404
417
|
) -> StructuredToolResult:
|
|
405
418
|
if self.command is not None:
|
|
406
419
|
raw_output, return_code, invocation = self.__invoke_command(params)
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
from holmes.core.llm import LLM
|
|
2
|
+
from holmes.core.models import format_tool_result_data
|
|
3
|
+
from holmes.core.tools import StructuredToolResult
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def count_tool_response_tokens(
|
|
7
|
+
llm: LLM, structured_tool_result: StructuredToolResult
|
|
8
|
+
) -> int:
|
|
9
|
+
message = {
|
|
10
|
+
"role": "tool",
|
|
11
|
+
"content": format_tool_result_data(structured_tool_result),
|
|
12
|
+
}
|
|
13
|
+
return llm.count_tokens_for_message([message])
|
|
@@ -1,33 +1,55 @@
|
|
|
1
|
+
from typing import Optional
|
|
1
2
|
from holmes.common.env_vars import TOOL_MAX_ALLOCATED_CONTEXT_WINDOW_PCT
|
|
2
3
|
from holmes.core.llm import LLM
|
|
3
4
|
from holmes.core.tools import StructuredToolResultStatus
|
|
4
|
-
from holmes.core.
|
|
5
|
+
from holmes.core.models import ToolCallResult
|
|
5
6
|
from holmes.utils import sentry_helper
|
|
6
7
|
|
|
7
8
|
|
|
9
|
+
def get_pct_token_count(percent_of_total_context_window: float, llm: LLM) -> int:
|
|
10
|
+
context_window_size = llm.get_context_window_size()
|
|
11
|
+
|
|
12
|
+
if 0 < percent_of_total_context_window and percent_of_total_context_window <= 100:
|
|
13
|
+
return int(context_window_size * percent_of_total_context_window // 100)
|
|
14
|
+
else:
|
|
15
|
+
return context_window_size
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def get_max_token_count_for_single_tool(llm: LLM) -> int:
|
|
19
|
+
return get_pct_token_count(
|
|
20
|
+
percent_of_total_context_window=TOOL_MAX_ALLOCATED_CONTEXT_WINDOW_PCT, llm=llm
|
|
21
|
+
)
|
|
22
|
+
|
|
23
|
+
|
|
8
24
|
def prevent_overly_big_tool_response(tool_call_result: ToolCallResult, llm: LLM):
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
context_window_size * TOOL_MAX_ALLOCATED_CONTEXT_WINDOW_PCT // 100
|
|
25
|
+
max_tokens_allowed = get_max_token_count_for_single_tool(llm)
|
|
26
|
+
|
|
27
|
+
message = tool_call_result.as_tool_call_message()
|
|
28
|
+
messages_token = llm.count_tokens_for_message(messages=[message])
|
|
29
|
+
|
|
30
|
+
if messages_token > max_tokens_allowed:
|
|
31
|
+
relative_pct = ((messages_token - max_tokens_allowed) / messages_token) * 100
|
|
32
|
+
|
|
33
|
+
error_message: Optional[str] = (
|
|
34
|
+
f"The tool call result is too large to return: {messages_token} tokens.\nThe maximum allowed tokens is {max_tokens_allowed} which is {format(relative_pct, '.1f')}% smaller.\nInstructions for the LLM: try to repeat the query but proactively narrow down the result so that the tool answer fits within the allowed number of tokens."
|
|
20
35
|
)
|
|
21
36
|
|
|
22
|
-
if
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
sentry_helper.capture_toolcall_contains_too_many_tokens(
|
|
32
|
-
tool_call_result, messages_token, max_tokens_allowed
|
|
37
|
+
if tool_call_result.result.status == StructuredToolResultStatus.NO_DATA:
|
|
38
|
+
error_message = None
|
|
39
|
+
# tool_call_result.result.data is set to None below which is expected to fix the issue
|
|
40
|
+
elif tool_call_result.result.status == StructuredToolResultStatus.ERROR:
|
|
41
|
+
original_error = (
|
|
42
|
+
tool_call_result.result.error
|
|
43
|
+
or tool_call_result.result.data
|
|
44
|
+
or "Unknown error"
|
|
33
45
|
)
|
|
46
|
+
truncated_error = str(original_error)[:100]
|
|
47
|
+
error_message = f"The tool call returned an error it is too large to return\nThe following original error is truncated:\n{truncated_error}"
|
|
48
|
+
|
|
49
|
+
tool_call_result.result.status = StructuredToolResultStatus.ERROR
|
|
50
|
+
tool_call_result.result.data = None
|
|
51
|
+
tool_call_result.result.error = error_message
|
|
52
|
+
|
|
53
|
+
sentry_helper.capture_toolcall_contains_too_many_tokens(
|
|
54
|
+
tool_call_result, messages_token, max_tokens_allowed
|
|
55
|
+
)
|
|
@@ -9,6 +9,7 @@ from holmes.core.tools import (
|
|
|
9
9
|
StructuredToolResultStatus,
|
|
10
10
|
Toolset,
|
|
11
11
|
ToolsetStatusEnum,
|
|
12
|
+
ToolInvokeContext,
|
|
12
13
|
)
|
|
13
14
|
from holmes.core.tools_utils.toolset_utils import filter_out_default_logging_toolset
|
|
14
15
|
|
|
@@ -46,16 +47,20 @@ class ToolExecutor:
|
|
|
46
47
|
)
|
|
47
48
|
self.tools_by_name[tool.name] = tool
|
|
48
49
|
|
|
49
|
-
def invoke(
|
|
50
|
+
def invoke(
|
|
51
|
+
self, tool_name: str, params: dict, context: ToolInvokeContext
|
|
52
|
+
) -> StructuredToolResult:
|
|
53
|
+
"""TODO: remove this function as it seems unused.
|
|
54
|
+
We call tool_executor.get_tool_by_name() and then tool.invoke() directly instead of this invoke function
|
|
55
|
+
"""
|
|
50
56
|
tool = self.get_tool_by_name(tool_name)
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
if tool
|
|
54
|
-
else StructuredToolResult(
|
|
57
|
+
if not tool:
|
|
58
|
+
return StructuredToolResult(
|
|
55
59
|
status=StructuredToolResultStatus.ERROR,
|
|
56
60
|
error=f"Could not find tool named {tool_name}",
|
|
57
61
|
)
|
|
58
|
-
|
|
62
|
+
|
|
63
|
+
return tool.invoke(params, context)
|
|
59
64
|
|
|
60
65
|
def get_tool_by_name(self, name: str) -> Optional[Tool]:
|
|
61
66
|
if name in self.tools_by_name:
|
holmes/core/toolset_manager.py
CHANGED
|
@@ -275,7 +275,11 @@ class ToolsetManager:
|
|
|
275
275
|
toolset.path = cached_status.get("path", None)
|
|
276
276
|
# check prerequisites for only enabled toolset when the toolset is loaded from cache. When the toolset is
|
|
277
277
|
# not loaded from cache, the prerequisites are checked in the refresh_toolset_status method.
|
|
278
|
-
if toolset.enabled and
|
|
278
|
+
if toolset.enabled and (
|
|
279
|
+
toolset.status == ToolsetStatusEnum.ENABLED
|
|
280
|
+
or toolset.type == ToolsetType.MCP
|
|
281
|
+
):
|
|
282
|
+
# MCP servers need to reload their tools even if previously failed, so rerun prerequisites
|
|
279
283
|
enabled_toolsets_from_cache.append(toolset)
|
|
280
284
|
self.check_toolset_prerequisites(enabled_toolsets_from_cache)
|
|
281
285
|
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
from holmes.common.env_vars import MAX_EVIDENCE_DATA_CHARACTERS_BEFORE_TRUNCATION
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def truncate_string(data_str: str) -> str:
|
|
5
|
+
if data_str and len(data_str) > MAX_EVIDENCE_DATA_CHARACTERS_BEFORE_TRUNCATION:
|
|
6
|
+
return (
|
|
7
|
+
data_str[:MAX_EVIDENCE_DATA_CHARACTERS_BEFORE_TRUNCATION]
|
|
8
|
+
+ "-- DATA TRUNCATED TO AVOID HITTING CONTEXT WINDOW LIMITS"
|
|
9
|
+
)
|
|
10
|
+
return data_str
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def truncate_evidences_entities_if_necessary(evidence_list: list[dict]):
|
|
14
|
+
if (
|
|
15
|
+
not MAX_EVIDENCE_DATA_CHARACTERS_BEFORE_TRUNCATION
|
|
16
|
+
or MAX_EVIDENCE_DATA_CHARACTERS_BEFORE_TRUNCATION <= 0
|
|
17
|
+
):
|
|
18
|
+
return
|
|
19
|
+
|
|
20
|
+
for evidence in evidence_list:
|
|
21
|
+
data = evidence.get("data")
|
|
22
|
+
if data:
|
|
23
|
+
evidence["data"] = truncate_string(str(data))
|