holmesgpt 0.14.4a0__py3-none-any.whl → 0.15.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of holmesgpt might be problematic. Click here for more details.
- holmes/__init__.py +1 -1
- holmes/clients/robusta_client.py +12 -10
- holmes/common/env_vars.py +14 -0
- holmes/config.py +51 -4
- holmes/core/conversations.py +3 -2
- holmes/core/llm.py +198 -72
- holmes/core/openai_formatting.py +13 -0
- holmes/core/tool_calling_llm.py +129 -95
- holmes/core/tools.py +21 -1
- holmes/core/tools_utils/token_counting.py +2 -1
- holmes/core/tools_utils/tool_context_window_limiter.py +13 -4
- holmes/interactive.py +17 -7
- holmes/plugins/prompts/_general_instructions.jinja2 +1 -2
- holmes/plugins/toolsets/__init__.py +4 -0
- holmes/plugins/toolsets/atlas_mongodb/mongodb_atlas.py +0 -1
- holmes/plugins/toolsets/azure_sql/azure_sql_toolset.py +0 -1
- holmes/plugins/toolsets/grafana/grafana_api.py +1 -1
- holmes/plugins/toolsets/investigator/core_investigation.py +14 -13
- holmes/plugins/toolsets/opensearch/opensearch_ppl_query_docs.jinja2 +1616 -0
- holmes/plugins/toolsets/opensearch/opensearch_query_assist.py +78 -0
- holmes/plugins/toolsets/opensearch/opensearch_query_assist_instructions.jinja2 +223 -0
- holmes/plugins/toolsets/prometheus/prometheus.py +1 -1
- holmes/plugins/toolsets/service_discovery.py +1 -1
- holmes/plugins/toolsets/servicenow/servicenow.py +0 -1
- holmes/utils/stream.py +30 -1
- {holmesgpt-0.14.4a0.dist-info → holmesgpt-0.15.0.dist-info}/METADATA +3 -1
- {holmesgpt-0.14.4a0.dist-info → holmesgpt-0.15.0.dist-info}/RECORD +30 -27
- {holmesgpt-0.14.4a0.dist-info → holmesgpt-0.15.0.dist-info}/LICENSE.txt +0 -0
- {holmesgpt-0.14.4a0.dist-info → holmesgpt-0.15.0.dist-info}/WHEEL +0 -0
- {holmesgpt-0.14.4a0.dist-info → holmesgpt-0.15.0.dist-info}/entry_points.txt +0 -0
holmes/core/tool_calling_llm.py
CHANGED
|
@@ -34,7 +34,7 @@ from holmes.core.investigation_structured_output import (
|
|
|
34
34
|
is_response_an_incorrect_tool_call,
|
|
35
35
|
)
|
|
36
36
|
from holmes.core.issue import Issue
|
|
37
|
-
from holmes.core.llm import LLM
|
|
37
|
+
from holmes.core.llm import LLM
|
|
38
38
|
from holmes.core.performance_timing import PerformanceTiming
|
|
39
39
|
from holmes.core.resource_instruction import ResourceInstructions
|
|
40
40
|
from holmes.core.runbooks import RunbookManager
|
|
@@ -58,7 +58,12 @@ from holmes.utils.tags import format_tags_in_string, parse_messages_tags
|
|
|
58
58
|
from holmes.core.tools_utils.tool_executor import ToolExecutor
|
|
59
59
|
from holmes.core.tracing import DummySpan
|
|
60
60
|
from holmes.utils.colors import AI_COLOR
|
|
61
|
-
from holmes.utils.stream import
|
|
61
|
+
from holmes.utils.stream import (
|
|
62
|
+
StreamEvents,
|
|
63
|
+
StreamMessage,
|
|
64
|
+
add_token_count_to_metadata,
|
|
65
|
+
build_stream_event_token_count,
|
|
66
|
+
)
|
|
62
67
|
|
|
63
68
|
# Create a named logger for cost tracking
|
|
64
69
|
cost_logger = logging.getLogger("holmes.costs")
|
|
@@ -164,7 +169,8 @@ def truncate_messages_to_fit_context(
|
|
|
164
169
|
messages_except_tools = [
|
|
165
170
|
message for message in messages if message["role"] != "tool"
|
|
166
171
|
]
|
|
167
|
-
|
|
172
|
+
tokens = count_tokens_fn(messages_except_tools)
|
|
173
|
+
message_size_without_tools = tokens.total_tokens
|
|
168
174
|
|
|
169
175
|
tool_call_messages = [message for message in messages if message["role"] == "tool"]
|
|
170
176
|
|
|
@@ -185,7 +191,9 @@ def truncate_messages_to_fit_context(
|
|
|
185
191
|
)
|
|
186
192
|
remaining_space = available_space
|
|
187
193
|
tool_call_messages.sort(
|
|
188
|
-
key=lambda x: count_tokens_fn(
|
|
194
|
+
key=lambda x: count_tokens_fn(
|
|
195
|
+
[{"role": "tool", "content": x["content"]}]
|
|
196
|
+
).total_tokens
|
|
189
197
|
)
|
|
190
198
|
|
|
191
199
|
truncations = []
|
|
@@ -196,7 +204,9 @@ def truncate_messages_to_fit_context(
|
|
|
196
204
|
for i, msg in enumerate(tool_call_messages):
|
|
197
205
|
remaining_tools = len(tool_call_messages) - i
|
|
198
206
|
max_allocation = remaining_space // remaining_tools
|
|
199
|
-
needed_space = count_tokens_fn(
|
|
207
|
+
needed_space = count_tokens_fn(
|
|
208
|
+
[{"role": "tool", "content": msg["content"]}]
|
|
209
|
+
).total_tokens
|
|
200
210
|
allocated_space = min(needed_space, max_allocation)
|
|
201
211
|
|
|
202
212
|
if needed_space > allocated_space:
|
|
@@ -257,6 +267,12 @@ class LLMResult(LLMCosts):
|
|
|
257
267
|
)
|
|
258
268
|
|
|
259
269
|
|
|
270
|
+
class ToolCallWithDecision(BaseModel):
|
|
271
|
+
message_index: int
|
|
272
|
+
tool_call: ChatCompletionMessageToolCall
|
|
273
|
+
decision: Optional[ToolApprovalDecision]
|
|
274
|
+
|
|
275
|
+
|
|
260
276
|
class ToolCallingLLM:
|
|
261
277
|
llm: LLM
|
|
262
278
|
|
|
@@ -284,83 +300,79 @@ class ToolCallingLLM:
|
|
|
284
300
|
Returns:
|
|
285
301
|
Updated messages list with tool execution results
|
|
286
302
|
"""
|
|
287
|
-
|
|
288
|
-
|
|
289
|
-
# Find the last message with pending approvals
|
|
290
|
-
pending_message_idx = None
|
|
291
|
-
pending_tool_calls = None
|
|
292
|
-
|
|
293
|
-
for i in reversed(range(len(messages))):
|
|
294
|
-
msg = messages[i]
|
|
295
|
-
if msg.get("role") == "assistant" and msg.get("pending_approval"):
|
|
296
|
-
pending_message_idx = i
|
|
297
|
-
pending_tool_calls = msg.get("tool_calls", [])
|
|
298
|
-
break
|
|
299
|
-
|
|
300
|
-
if pending_message_idx is None or not pending_tool_calls:
|
|
301
|
-
# No pending approvals found
|
|
302
|
-
if tool_decisions:
|
|
303
|
-
logging.warning(
|
|
304
|
-
f"Received {len(tool_decisions)} tool decisions but no pending approvals found"
|
|
305
|
-
)
|
|
303
|
+
if not tool_decisions:
|
|
306
304
|
return messages
|
|
307
305
|
|
|
308
306
|
# Create decision lookup
|
|
309
|
-
|
|
307
|
+
decisions_by_tool_call_id = {
|
|
310
308
|
decision.tool_call_id: decision for decision in tool_decisions
|
|
311
309
|
}
|
|
312
310
|
|
|
313
|
-
|
|
314
|
-
pending_tool_ids = {tool_call["id"] for tool_call in pending_tool_calls}
|
|
315
|
-
invalid_decisions = [
|
|
316
|
-
decision.tool_call_id
|
|
317
|
-
for decision in tool_decisions
|
|
318
|
-
if decision.tool_call_id not in pending_tool_ids
|
|
319
|
-
]
|
|
311
|
+
pending_tool_calls: list[ToolCallWithDecision] = []
|
|
320
312
|
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
313
|
+
for i in reversed(range(len(messages))):
|
|
314
|
+
msg = messages[i]
|
|
315
|
+
if msg.get("role") == "assistant" and msg.get("tool_calls"):
|
|
316
|
+
message_tool_calls = msg.get("tool_calls", [])
|
|
317
|
+
for tool_call in message_tool_calls:
|
|
318
|
+
decision = decisions_by_tool_call_id.get(tool_call.get("id"), None)
|
|
319
|
+
if tool_call.get("pending_approval"):
|
|
320
|
+
del tool_call[
|
|
321
|
+
"pending_approval"
|
|
322
|
+
] # Cleanup so that a pending approval is not tagged on message in a future response
|
|
323
|
+
pending_tool_calls.append(
|
|
324
|
+
ToolCallWithDecision(
|
|
325
|
+
tool_call=ChatCompletionMessageToolCall(**tool_call),
|
|
326
|
+
decision=decision,
|
|
327
|
+
message_index=i,
|
|
328
|
+
)
|
|
329
|
+
)
|
|
325
330
|
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
331
|
+
if not pending_tool_calls:
|
|
332
|
+
error_message = f"Received {len(tool_decisions)} tool decisions but no pending approvals found"
|
|
333
|
+
logging.error(error_message)
|
|
334
|
+
raise Exception(error_message)
|
|
330
335
|
|
|
336
|
+
for tool_call_with_decision in pending_tool_calls:
|
|
337
|
+
tool_call_message: dict
|
|
338
|
+
tool_call = tool_call_with_decision.tool_call
|
|
339
|
+
decision = tool_call_with_decision.decision
|
|
331
340
|
if decision and decision.approved:
|
|
332
341
|
try:
|
|
333
|
-
tool_call_obj = ChatCompletionMessageToolCall(**tool_call)
|
|
334
342
|
llm_tool_result = self._invoke_llm_tool_call(
|
|
335
|
-
tool_to_call=
|
|
343
|
+
tool_to_call=tool_call,
|
|
336
344
|
previous_tool_calls=[],
|
|
337
|
-
trace_span=DummySpan(),
|
|
345
|
+
trace_span=DummySpan(), # TODO: replace with proper span
|
|
338
346
|
tool_number=None,
|
|
347
|
+
user_approved=True,
|
|
339
348
|
)
|
|
340
|
-
|
|
349
|
+
tool_call_message = llm_tool_result.as_tool_call_message()
|
|
341
350
|
|
|
342
351
|
except Exception as e:
|
|
343
352
|
logging.error(
|
|
344
|
-
f"Failed to execute approved tool {
|
|
353
|
+
f"Failed to execute approved tool {tool_call.id}: {e}"
|
|
345
354
|
)
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
"tool_call_id": tool_call_id,
|
|
349
|
-
"role": "tool",
|
|
350
|
-
"name": tool_call["function"]["name"],
|
|
351
|
-
"content": f"Tool execution failed: {str(e)}",
|
|
352
|
-
}
|
|
353
|
-
)
|
|
354
|
-
else:
|
|
355
|
-
# Tool was rejected or no decision found, add rejection message
|
|
356
|
-
messages.append(
|
|
357
|
-
{
|
|
358
|
-
"tool_call_id": tool_call_id,
|
|
355
|
+
tool_call_message = {
|
|
356
|
+
"tool_call_id": tool_call.id,
|
|
359
357
|
"role": "tool",
|
|
360
|
-
"name": tool_call
|
|
361
|
-
"content": "Tool execution
|
|
358
|
+
"name": tool_call.function.name,
|
|
359
|
+
"content": f"Tool execution failed: {str(e)}",
|
|
362
360
|
}
|
|
363
|
-
|
|
361
|
+
else:
|
|
362
|
+
# Tool was rejected or no decision found, add rejection message
|
|
363
|
+
tool_call_message = {
|
|
364
|
+
"tool_call_id": tool_call.id,
|
|
365
|
+
"role": "tool",
|
|
366
|
+
"name": tool_call.function.name,
|
|
367
|
+
"content": "Tool execution was denied by the user.",
|
|
368
|
+
}
|
|
369
|
+
|
|
370
|
+
# It is expected that the tool call result directly follows the tool call request from the LLM
|
|
371
|
+
# The API call may contain a user ask which is appended to the messages so we can't just append
|
|
372
|
+
# tool call results; they need to be inserted right after the llm's message requesting tool calls
|
|
373
|
+
messages.insert(
|
|
374
|
+
tool_call_with_decision.message_index + 1, tool_call_message
|
|
375
|
+
)
|
|
364
376
|
|
|
365
377
|
return messages
|
|
366
378
|
|
|
@@ -427,12 +439,12 @@ class ToolCallingLLM:
|
|
|
427
439
|
tools = None if i == max_steps else tools
|
|
428
440
|
tool_choice = "auto" if tools else None
|
|
429
441
|
|
|
430
|
-
|
|
442
|
+
tokens = self.llm.count_tokens(messages=messages, tools=tools)
|
|
431
443
|
max_context_size = self.llm.get_context_window_size()
|
|
432
444
|
maximum_output_token = self.llm.get_maximum_output_token()
|
|
433
445
|
perf_timing.measure("count tokens")
|
|
434
446
|
|
|
435
|
-
if (total_tokens + maximum_output_token) > max_context_size:
|
|
447
|
+
if (tokens.total_tokens + maximum_output_token) > max_context_size:
|
|
436
448
|
logging.warning("Token limit exceeded. Truncating tool responses.")
|
|
437
449
|
truncated_res = self.truncate_messages_to_fit_context(
|
|
438
450
|
messages, max_context_size, maximum_output_token
|
|
@@ -483,7 +495,7 @@ class ToolCallingLLM:
|
|
|
483
495
|
|
|
484
496
|
if incorrect_tool_call:
|
|
485
497
|
logging.warning(
|
|
486
|
-
"Detected incorrect tool call. Structured output will be disabled. This can happen on models that do not support tool calling. For Azure AI, make sure the model name contains 'gpt-
|
|
498
|
+
"Detected incorrect tool call. Structured output will be disabled. This can happen on models that do not support tool calling. For Azure AI, make sure the model name contains 'gpt-4.1' or other structured output compatible models. To disable this holmes behaviour, set REQUEST_STRUCTURED_OUTPUT_FROM_LLM to `false`."
|
|
487
499
|
)
|
|
488
500
|
# disable structured output going forward and and retry
|
|
489
501
|
sentry_helper.capture_structured_output_incorrect_tool_call()
|
|
@@ -522,11 +534,17 @@ class ToolCallingLLM:
|
|
|
522
534
|
)
|
|
523
535
|
costs.total_cost += post_processing_cost
|
|
524
536
|
|
|
525
|
-
self.llm.
|
|
537
|
+
tokens = self.llm.count_tokens(messages=messages, tools=tools)
|
|
538
|
+
|
|
539
|
+
add_token_count_to_metadata(
|
|
540
|
+
tokens=tokens,
|
|
541
|
+
full_llm_response=full_response,
|
|
542
|
+
max_context_size=max_context_size,
|
|
543
|
+
maximum_output_token=maximum_output_token,
|
|
544
|
+
metadata=metadata,
|
|
545
|
+
)
|
|
526
546
|
perf_timing.end(f"- completed in {i} iterations -")
|
|
527
|
-
|
|
528
|
-
metadata["max_tokens"] = max_context_size
|
|
529
|
-
metadata["max_output_tokens"] = maximum_output_token
|
|
547
|
+
|
|
530
548
|
return LLMResult(
|
|
531
549
|
result=post_processed_response,
|
|
532
550
|
unprocessed_result=raw_response,
|
|
@@ -650,6 +668,7 @@ class ToolCallingLLM:
|
|
|
650
668
|
tool_call_id: str,
|
|
651
669
|
tool_name: str,
|
|
652
670
|
tool_arguments: str,
|
|
671
|
+
user_approved: bool,
|
|
653
672
|
previous_tool_calls: list[dict],
|
|
654
673
|
tool_number: Optional[int] = None,
|
|
655
674
|
) -> ToolCallResult:
|
|
@@ -671,7 +690,7 @@ class ToolCallingLLM:
|
|
|
671
690
|
tool_response = self._directly_invoke_tool_call(
|
|
672
691
|
tool_name=tool_name,
|
|
673
692
|
tool_params=tool_params,
|
|
674
|
-
user_approved=
|
|
693
|
+
user_approved=user_approved,
|
|
675
694
|
tool_number=tool_number,
|
|
676
695
|
)
|
|
677
696
|
|
|
@@ -716,6 +735,7 @@ class ToolCallingLLM:
|
|
|
716
735
|
previous_tool_calls: list[dict],
|
|
717
736
|
trace_span=None,
|
|
718
737
|
tool_number=None,
|
|
738
|
+
user_approved: bool = False,
|
|
719
739
|
) -> ToolCallResult:
|
|
720
740
|
if trace_span is None:
|
|
721
741
|
trace_span = DummySpan()
|
|
@@ -748,6 +768,7 @@ class ToolCallingLLM:
|
|
|
748
768
|
tool_arguments,
|
|
749
769
|
previous_tool_calls=previous_tool_calls,
|
|
750
770
|
tool_number=tool_number,
|
|
771
|
+
user_approved=user_approved,
|
|
751
772
|
)
|
|
752
773
|
|
|
753
774
|
prevent_overly_big_tool_response(
|
|
@@ -858,7 +879,7 @@ class ToolCallingLLM:
|
|
|
858
879
|
messages,
|
|
859
880
|
max_context_size,
|
|
860
881
|
maximum_output_token,
|
|
861
|
-
self.llm.
|
|
882
|
+
self.llm.count_tokens,
|
|
862
883
|
)
|
|
863
884
|
if truncated_res.truncations:
|
|
864
885
|
sentry_helper.capture_tool_truncations(truncated_res.truncations)
|
|
@@ -903,12 +924,12 @@ class ToolCallingLLM:
|
|
|
903
924
|
tools = None if i == max_steps else tools
|
|
904
925
|
tool_choice = "auto" if tools else None
|
|
905
926
|
|
|
906
|
-
|
|
927
|
+
tokens = self.llm.count_tokens(messages=messages, tools=tools) # type: ignore
|
|
907
928
|
max_context_size = self.llm.get_context_window_size()
|
|
908
929
|
maximum_output_token = self.llm.get_maximum_output_token()
|
|
909
930
|
perf_timing.measure("count tokens")
|
|
910
931
|
|
|
911
|
-
if (total_tokens + maximum_output_token) > max_context_size:
|
|
932
|
+
if (tokens.total_tokens + maximum_output_token) > max_context_size:
|
|
912
933
|
logging.warning("Token limit exceeded. Truncating tool responses.")
|
|
913
934
|
truncated_res = self.truncate_messages_to_fit_context(
|
|
914
935
|
messages, max_context_size, maximum_output_token
|
|
@@ -958,7 +979,7 @@ class ToolCallingLLM:
|
|
|
958
979
|
|
|
959
980
|
if incorrect_tool_call:
|
|
960
981
|
logging.warning(
|
|
961
|
-
"Detected incorrect tool call. Structured output will be disabled. This can happen on models that do not support tool calling. For Azure AI, make sure the model name contains 'gpt-
|
|
982
|
+
"Detected incorrect tool call. Structured output will be disabled. This can happen on models that do not support tool calling. For Azure AI, make sure the model name contains 'gpt-4.1' or other structured output compatible models. To disable this holmes behaviour, set REQUEST_STRUCTURED_OUTPUT_FROM_LLM to `false`."
|
|
962
983
|
)
|
|
963
984
|
# disable structured output going forward and and retry
|
|
964
985
|
sentry_helper.capture_structured_output_incorrect_tool_call()
|
|
@@ -972,12 +993,18 @@ class ToolCallingLLM:
|
|
|
972
993
|
)
|
|
973
994
|
)
|
|
974
995
|
|
|
996
|
+
tokens = self.llm.count_tokens(messages=messages, tools=tools)
|
|
997
|
+
add_token_count_to_metadata(
|
|
998
|
+
tokens=tokens,
|
|
999
|
+
full_llm_response=full_response,
|
|
1000
|
+
max_context_size=max_context_size,
|
|
1001
|
+
maximum_output_token=maximum_output_token,
|
|
1002
|
+
metadata=metadata,
|
|
1003
|
+
)
|
|
1004
|
+
yield build_stream_event_token_count(metadata=metadata)
|
|
1005
|
+
|
|
975
1006
|
tools_to_call = getattr(response_message, "tool_calls", None)
|
|
976
1007
|
if not tools_to_call:
|
|
977
|
-
self.llm.count_tokens_for_message(messages)
|
|
978
|
-
metadata["usage"] = get_llm_usage(full_response)
|
|
979
|
-
metadata["max_tokens"] = max_context_size
|
|
980
|
-
metadata["max_output_tokens"] = maximum_output_token
|
|
981
1008
|
yield StreamMessage(
|
|
982
1009
|
event=StreamEvents.ANSWER_END,
|
|
983
1010
|
data={
|
|
@@ -993,7 +1020,11 @@ class ToolCallingLLM:
|
|
|
993
1020
|
if reasoning or message:
|
|
994
1021
|
yield StreamMessage(
|
|
995
1022
|
event=StreamEvents.AI_MESSAGE,
|
|
996
|
-
data={
|
|
1023
|
+
data={
|
|
1024
|
+
"content": message,
|
|
1025
|
+
"reasoning": reasoning,
|
|
1026
|
+
"metadata": metadata,
|
|
1027
|
+
},
|
|
997
1028
|
)
|
|
998
1029
|
|
|
999
1030
|
perf_timing.measure("pre-tool-calls")
|
|
@@ -1069,23 +1100,11 @@ class ToolCallingLLM:
|
|
|
1069
1100
|
# If we have approval required tools, end the stream with pending approvals
|
|
1070
1101
|
if pending_approvals:
|
|
1071
1102
|
# Add assistant message with pending tool calls
|
|
1072
|
-
|
|
1073
|
-
|
|
1074
|
-
|
|
1075
|
-
|
|
1076
|
-
|
|
1077
|
-
"id": result.tool_call_id,
|
|
1078
|
-
"type": "function",
|
|
1079
|
-
"function": {
|
|
1080
|
-
"name": result.tool_name,
|
|
1081
|
-
"arguments": json.dumps(result.result.params or {}),
|
|
1082
|
-
},
|
|
1083
|
-
}
|
|
1084
|
-
for result in approval_required_tools
|
|
1085
|
-
],
|
|
1086
|
-
"pending_approval": True,
|
|
1087
|
-
}
|
|
1088
|
-
messages.append(assistant_msg)
|
|
1103
|
+
for result in approval_required_tools:
|
|
1104
|
+
tool_call = self.find_assistant_tool_call_request(
|
|
1105
|
+
tool_call_id=result.tool_call_id, messages=messages
|
|
1106
|
+
)
|
|
1107
|
+
tool_call["pending_approval"] = True
|
|
1089
1108
|
|
|
1090
1109
|
# End stream with approvals required
|
|
1091
1110
|
yield StreamMessage(
|
|
@@ -1108,6 +1127,21 @@ class ToolCallingLLM:
|
|
|
1108
1127
|
f"Too many LLM calls - exceeded max_steps: {i}/{self.max_steps}"
|
|
1109
1128
|
)
|
|
1110
1129
|
|
|
1130
|
+
def find_assistant_tool_call_request(
|
|
1131
|
+
self, tool_call_id: str, messages: list[dict[str, Any]]
|
|
1132
|
+
) -> dict[str, Any]:
|
|
1133
|
+
for message in messages:
|
|
1134
|
+
if message.get("role") == "assistant":
|
|
1135
|
+
for tool_call in message.get("tool_calls", []):
|
|
1136
|
+
if tool_call.get("id") == tool_call_id:
|
|
1137
|
+
return tool_call
|
|
1138
|
+
|
|
1139
|
+
# Should not happen unless there is a bug.
|
|
1140
|
+
# If we are here
|
|
1141
|
+
raise Exception(
|
|
1142
|
+
f"Failed to find assistant request for a tool_call in conversation history. tool_call_id={tool_call_id}"
|
|
1143
|
+
)
|
|
1144
|
+
|
|
1111
1145
|
|
|
1112
1146
|
# TODO: consider getting rid of this entirely and moving templating into the cmds in holmes_cli.py
|
|
1113
1147
|
class IssueInvestigator(ToolCallingLLM):
|
holmes/core/tools.py
CHANGED
|
@@ -158,6 +158,7 @@ class ToolParameter(BaseModel):
|
|
|
158
158
|
required: bool = True
|
|
159
159
|
properties: Optional[Dict[str, "ToolParameter"]] = None # For object types
|
|
160
160
|
items: Optional["ToolParameter"] = None # For array item schemas
|
|
161
|
+
enum: Optional[List[str]] = None # For restricting to specific values
|
|
161
162
|
|
|
162
163
|
|
|
163
164
|
class ToolInvokeContext(BaseModel):
|
|
@@ -682,7 +683,26 @@ class Toolset(BaseModel):
|
|
|
682
683
|
def check_prerequisites(self):
|
|
683
684
|
self.status = ToolsetStatusEnum.ENABLED
|
|
684
685
|
|
|
685
|
-
|
|
686
|
+
# Sort prerequisites by type to fail fast on missing env vars before
|
|
687
|
+
# running slow commands (e.g., ArgoCD checks that timeout):
|
|
688
|
+
# 1. Static checks (instant)
|
|
689
|
+
# 2. Environment variable checks (instant, often required by commands)
|
|
690
|
+
# 3. Callable checks (variable speed)
|
|
691
|
+
# 4. Command checks (slowest - may timeout or hang)
|
|
692
|
+
def prereq_priority(prereq):
|
|
693
|
+
if isinstance(prereq, StaticPrerequisite):
|
|
694
|
+
return 0
|
|
695
|
+
elif isinstance(prereq, ToolsetEnvironmentPrerequisite):
|
|
696
|
+
return 1
|
|
697
|
+
elif isinstance(prereq, CallablePrerequisite):
|
|
698
|
+
return 2
|
|
699
|
+
elif isinstance(prereq, ToolsetCommandPrerequisite):
|
|
700
|
+
return 3
|
|
701
|
+
return 4 # Unknown types go last
|
|
702
|
+
|
|
703
|
+
sorted_prereqs = sorted(self.prerequisites, key=prereq_priority)
|
|
704
|
+
|
|
705
|
+
for prereq in sorted_prereqs:
|
|
686
706
|
if isinstance(prereq, ToolsetCommandPrerequisite):
|
|
687
707
|
try:
|
|
688
708
|
command = self.interpolate_command(prereq.command)
|
|
@@ -1,5 +1,8 @@
|
|
|
1
1
|
from typing import Optional
|
|
2
|
-
from holmes.common.env_vars import
|
|
2
|
+
from holmes.common.env_vars import (
|
|
3
|
+
TOOL_MAX_ALLOCATED_CONTEXT_WINDOW_PCT,
|
|
4
|
+
TOOL_MAX_ALLOCATED_CONTEXT_WINDOW_TOKENS,
|
|
5
|
+
)
|
|
3
6
|
from holmes.core.llm import LLM
|
|
4
7
|
from holmes.core.tools import StructuredToolResultStatus
|
|
5
8
|
from holmes.core.models import ToolCallResult
|
|
@@ -16,8 +19,12 @@ def get_pct_token_count(percent_of_total_context_window: float, llm: LLM) -> int
|
|
|
16
19
|
|
|
17
20
|
|
|
18
21
|
def get_max_token_count_for_single_tool(llm: LLM) -> int:
|
|
19
|
-
return
|
|
20
|
-
|
|
22
|
+
return min(
|
|
23
|
+
TOOL_MAX_ALLOCATED_CONTEXT_WINDOW_TOKENS,
|
|
24
|
+
get_pct_token_count(
|
|
25
|
+
percent_of_total_context_window=TOOL_MAX_ALLOCATED_CONTEXT_WINDOW_PCT,
|
|
26
|
+
llm=llm,
|
|
27
|
+
),
|
|
21
28
|
)
|
|
22
29
|
|
|
23
30
|
|
|
@@ -25,7 +32,9 @@ def prevent_overly_big_tool_response(tool_call_result: ToolCallResult, llm: LLM)
|
|
|
25
32
|
max_tokens_allowed = get_max_token_count_for_single_tool(llm)
|
|
26
33
|
|
|
27
34
|
message = tool_call_result.as_tool_call_message()
|
|
28
|
-
|
|
35
|
+
|
|
36
|
+
tokens = llm.count_tokens(messages=[message])
|
|
37
|
+
messages_token = tokens.total_tokens
|
|
29
38
|
|
|
30
39
|
if messages_token > max_tokens_allowed:
|
|
31
40
|
relative_pct = ((messages_token - max_tokens_allowed) / messages_token) * 100
|
holmes/interactive.py
CHANGED
|
@@ -480,10 +480,14 @@ def handle_context_command(messages, ai: ToolCallingLLM, console: Console) -> No
|
|
|
480
480
|
return
|
|
481
481
|
|
|
482
482
|
# Calculate context statistics
|
|
483
|
-
|
|
483
|
+
tokens_metadata = ai.llm.count_tokens(
|
|
484
|
+
messages
|
|
485
|
+
) # TODO: pass tools to also count tokens used by input tools
|
|
484
486
|
max_context_size = ai.llm.get_context_window_size()
|
|
485
487
|
max_output_tokens = ai.llm.get_maximum_output_token()
|
|
486
|
-
available_tokens =
|
|
488
|
+
available_tokens = (
|
|
489
|
+
max_context_size - tokens_metadata.total_tokens - max_output_tokens
|
|
490
|
+
)
|
|
487
491
|
|
|
488
492
|
# Analyze token distribution by role and tool calls
|
|
489
493
|
role_token_usage: DefaultDict[str, int] = defaultdict(int)
|
|
@@ -492,19 +496,21 @@ def handle_context_command(messages, ai: ToolCallingLLM, console: Console) -> No
|
|
|
492
496
|
|
|
493
497
|
for msg in messages:
|
|
494
498
|
role = msg.get("role", "unknown")
|
|
495
|
-
|
|
496
|
-
|
|
499
|
+
message_tokens = ai.llm.count_tokens(
|
|
500
|
+
[msg]
|
|
501
|
+
) # TODO: pass tools to also count tokens used by input tools
|
|
502
|
+
role_token_usage[role] += message_tokens.total_tokens
|
|
497
503
|
|
|
498
504
|
# Track individual tool usage
|
|
499
505
|
if role == "tool":
|
|
500
506
|
tool_name = msg.get("name", "unknown_tool")
|
|
501
|
-
tool_token_usage[tool_name] +=
|
|
507
|
+
tool_token_usage[tool_name] += message_tokens.total_tokens
|
|
502
508
|
tool_call_counts[tool_name] += 1
|
|
503
509
|
|
|
504
510
|
# Display context information
|
|
505
511
|
console.print(f"[bold {STATUS_COLOR}]Conversation Context:[/bold {STATUS_COLOR}]")
|
|
506
512
|
console.print(
|
|
507
|
-
f" Context used: {total_tokens:,} / {max_context_size:,} tokens ({(total_tokens / max_context_size) * 100:.1f}%)"
|
|
513
|
+
f" Context used: {tokens_metadata.total_tokens:,} / {max_context_size:,} tokens ({(tokens_metadata.total_tokens / max_context_size) * 100:.1f}%)"
|
|
508
514
|
)
|
|
509
515
|
console.print(
|
|
510
516
|
f" Space remaining: {available_tokens:,} for input ({(available_tokens / max_context_size) * 100:.1f}%) + {max_output_tokens:,} reserved for output ({(max_output_tokens / max_context_size) * 100:.1f}%)"
|
|
@@ -515,7 +521,11 @@ def handle_context_command(messages, ai: ToolCallingLLM, console: Console) -> No
|
|
|
515
521
|
for role in ["system", "user", "assistant", "tool"]:
|
|
516
522
|
if role in role_token_usage:
|
|
517
523
|
tokens = role_token_usage[role]
|
|
518
|
-
percentage = (
|
|
524
|
+
percentage = (
|
|
525
|
+
(tokens / tokens_metadata.total_tokens) * 100
|
|
526
|
+
if tokens_metadata.total_tokens > 0
|
|
527
|
+
else 0
|
|
528
|
+
)
|
|
519
529
|
role_name = {
|
|
520
530
|
"system": "system prompt",
|
|
521
531
|
"user": "user messages",
|
|
@@ -12,8 +12,7 @@
|
|
|
12
12
|
* do not stop investigating until you are at the final root cause you are able to find.
|
|
13
13
|
* use the "five whys" methodology to find the root cause.
|
|
14
14
|
* for example, if you found a problem in microservice A that is due to an error in microservice B, look at microservice B too and find the error in that.
|
|
15
|
-
* if you cannot find the resource/application that the user referred to, assume they made a typo or included/excluded characters like - and
|
|
16
|
-
* in this case, try to find substrings or search for the correct spellings
|
|
15
|
+
* if you cannot find the resource/application that the user referred to, assume they made a typo or included/excluded characters like - and in this case, try to find substrings or search for the correct spellings
|
|
17
16
|
* always provide detailed information like exact resource names, versions, labels, etc
|
|
18
17
|
* even if you found the root cause, keep investigating to find other possible root causes and to gather data for the answer like exact names
|
|
19
18
|
* if a runbook url is present you MUST fetch the runbook before beginning your investigation
|
|
@@ -44,6 +44,9 @@ from holmes.plugins.toolsets.mcp.toolset_mcp import RemoteMCPToolset
|
|
|
44
44
|
from holmes.plugins.toolsets.newrelic.newrelic import NewRelicToolset
|
|
45
45
|
from holmes.plugins.toolsets.opensearch.opensearch import OpenSearchToolset
|
|
46
46
|
from holmes.plugins.toolsets.opensearch.opensearch_logs import OpenSearchLogsToolset
|
|
47
|
+
from holmes.plugins.toolsets.opensearch.opensearch_query_assist import (
|
|
48
|
+
OpenSearchQueryAssistToolset,
|
|
49
|
+
)
|
|
47
50
|
from holmes.plugins.toolsets.opensearch.opensearch_traces import OpenSearchTracesToolset
|
|
48
51
|
from holmes.plugins.toolsets.rabbitmq.toolset_rabbitmq import RabbitMQToolset
|
|
49
52
|
from holmes.plugins.toolsets.robusta.robusta import RobustaToolset
|
|
@@ -93,6 +96,7 @@ def load_python_toolsets(dal: Optional[SupabaseDal]) -> List[Toolset]:
|
|
|
93
96
|
DatadogRDSToolset(),
|
|
94
97
|
OpenSearchLogsToolset(),
|
|
95
98
|
OpenSearchTracesToolset(),
|
|
99
|
+
OpenSearchQueryAssistToolset(),
|
|
96
100
|
CoralogixLogsToolset(),
|
|
97
101
|
RabbitMQToolset(),
|
|
98
102
|
GitToolset(),
|
|
@@ -42,7 +42,6 @@ class MongoDBAtlasToolset(Toolset):
|
|
|
42
42
|
def __init__(self):
|
|
43
43
|
super().__init__(
|
|
44
44
|
prerequisites=[CallablePrerequisite(callable=self.prerequisites_callable)],
|
|
45
|
-
experimental=True,
|
|
46
45
|
tools=[
|
|
47
46
|
ReturnProjectAlerts(toolset=self),
|
|
48
47
|
ReturnProjectProcesses(toolset=self),
|
|
@@ -60,7 +60,6 @@ class AzureSQLToolset(BaseAzureSQLToolset):
|
|
|
60
60
|
docs_url="https://kagi.com/proxy/png-clipart-microsoft-sql-server-microsoft-azure-sql-database-microsoft-text-logo-thumbnail.png?c=4Sg1bvcUGOrhnDzXgoBBa0G0j27ykgskX4a8cLrZp_quzqlpVGVG02OqQtezTxy7lB6ydmTKgbVAn_F7BxofxK6LKKUZSpjJ1huIAsXPVaXyakO4sWXFiX0Wz_8WjkA0AIlO_oFfW31AKaj5RcvGcr3siy0n5kW-GcqdpeBWsmm_huxUT6RycULFCDFBwuUzHvVl5TW3cYqlMxT8ecPZfg%3D%3D",
|
|
61
61
|
icon_url="https://upload.wikimedia.org/wikipedia/commons/thumb/f/f7/Azure_SQL_Database_logo.svg/1200px-Azure_SQL_Database_logo.svg.png",
|
|
62
62
|
tags=[ToolsetTag.CORE],
|
|
63
|
-
experimental=True,
|
|
64
63
|
tools=[
|
|
65
64
|
AnalyzeDatabaseHealthStatus(self),
|
|
66
65
|
AnalyzeDatabasePerformance(self),
|
|
@@ -27,7 +27,7 @@ def grafana_health_check(config: GrafanaConfig) -> Tuple[bool, str]:
|
|
|
27
27
|
response.raise_for_status()
|
|
28
28
|
return True, ""
|
|
29
29
|
except Exception as e:
|
|
30
|
-
logging.
|
|
30
|
+
logging.debug(f"Failed to fetch grafana health status at {url}", exc_info=True)
|
|
31
31
|
error_msg = f"Failed to fetch grafana health status at {url}. {str(e)}"
|
|
32
32
|
|
|
33
33
|
# Add helpful hint if this looks like a common misconfiguration
|
|
@@ -29,7 +29,11 @@ class TodoWriteTool(Tool):
|
|
|
29
29
|
properties={
|
|
30
30
|
"id": ToolParameter(type="string", required=True),
|
|
31
31
|
"content": ToolParameter(type="string", required=True),
|
|
32
|
-
"status": ToolParameter(
|
|
32
|
+
"status": ToolParameter(
|
|
33
|
+
type="string",
|
|
34
|
+
required=True,
|
|
35
|
+
enum=["pending", "in_progress", "completed"],
|
|
36
|
+
),
|
|
33
37
|
},
|
|
34
38
|
),
|
|
35
39
|
),
|
|
@@ -58,22 +62,20 @@ class TodoWriteTool(Tool):
|
|
|
58
62
|
content_width = max(max_content_width, len("Content"))
|
|
59
63
|
status_width = max(max_status_display_width, len("Status"))
|
|
60
64
|
|
|
61
|
-
# Build table
|
|
62
65
|
separator = f"+{'-' * (id_width + 2)}+{'-' * (content_width + 2)}+{'-' * (status_width + 2)}+"
|
|
63
66
|
header = f"| {'ID':<{id_width}} | {'Content':<{content_width}} | {'Status':<{status_width}} |"
|
|
64
|
-
|
|
65
|
-
# Log the table
|
|
66
|
-
logging.info("Updated Investigation Tasks:")
|
|
67
|
-
logging.info(separator)
|
|
68
|
-
logging.info(header)
|
|
69
|
-
logging.info(separator)
|
|
67
|
+
tasks_to_display = []
|
|
70
68
|
|
|
71
69
|
for task in tasks:
|
|
72
70
|
status_display = f"{status_icons[task.status.value]} {task.status.value}"
|
|
73
71
|
row = f"| {task.id:<{id_width}} | {task.content:<{content_width}} | {status_display:<{status_width}} |"
|
|
74
|
-
|
|
72
|
+
tasks_to_display.append(row)
|
|
75
73
|
|
|
76
|
-
logging.info(
|
|
74
|
+
logging.info(
|
|
75
|
+
f"Task List:\n{separator}\n{header}\n{separator}\n"
|
|
76
|
+
+ "\n".join(tasks_to_display)
|
|
77
|
+
+ f"\n{separator}"
|
|
78
|
+
)
|
|
77
79
|
|
|
78
80
|
def _invoke(self, params: dict, context: ToolInvokeContext) -> StructuredToolResult:
|
|
79
81
|
try:
|
|
@@ -90,7 +92,7 @@ class TodoWriteTool(Tool):
|
|
|
90
92
|
)
|
|
91
93
|
tasks.append(task)
|
|
92
94
|
|
|
93
|
-
logging.
|
|
95
|
+
logging.debug(f"Tasks: {len(tasks)}")
|
|
94
96
|
|
|
95
97
|
self.print_tasks_table(tasks)
|
|
96
98
|
formatted_tasks = format_tasks(tasks)
|
|
@@ -116,8 +118,7 @@ class TodoWriteTool(Tool):
|
|
|
116
118
|
)
|
|
117
119
|
|
|
118
120
|
def get_parameterized_one_liner(self, params: Dict) -> str:
|
|
119
|
-
|
|
120
|
-
return f"Write {todos} investigation tasks"
|
|
121
|
+
return "Update investigation tasks"
|
|
121
122
|
|
|
122
123
|
|
|
123
124
|
class CoreInvestigationToolset(Toolset):
|