holmesgpt 0.12.6__py3-none-any.whl → 0.13.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of holmesgpt might be problematic. Click here for more details.
- holmes/__init__.py +1 -1
- holmes/clients/robusta_client.py +19 -1
- holmes/common/env_vars.py +17 -0
- holmes/config.py +69 -9
- holmes/core/conversations.py +11 -0
- holmes/core/investigation.py +16 -3
- holmes/core/investigation_structured_output.py +12 -0
- holmes/core/llm.py +13 -1
- holmes/core/models.py +9 -1
- holmes/core/openai_formatting.py +72 -12
- holmes/core/prompt.py +13 -0
- holmes/core/supabase_dal.py +3 -0
- holmes/core/todo_manager.py +88 -0
- holmes/core/tool_calling_llm.py +230 -157
- holmes/core/tools.py +10 -1
- holmes/core/tools_utils/tool_executor.py +7 -2
- holmes/core/tools_utils/toolset_utils.py +7 -2
- holmes/core/toolset_manager.py +1 -5
- holmes/core/tracing.py +4 -3
- holmes/interactive.py +1 -0
- holmes/main.py +9 -2
- holmes/plugins/prompts/__init__.py +7 -1
- holmes/plugins/prompts/_current_date_time.jinja2 +1 -0
- holmes/plugins/prompts/_default_log_prompt.jinja2 +4 -2
- holmes/plugins/prompts/_fetch_logs.jinja2 +10 -1
- holmes/plugins/prompts/_general_instructions.jinja2 +14 -0
- holmes/plugins/prompts/_permission_errors.jinja2 +1 -1
- holmes/plugins/prompts/_toolsets_instructions.jinja2 +4 -4
- holmes/plugins/prompts/generic_ask.jinja2 +4 -3
- holmes/plugins/prompts/investigation_procedure.jinja2 +210 -0
- holmes/plugins/prompts/kubernetes_workload_ask.jinja2 +2 -0
- holmes/plugins/runbooks/CLAUDE.md +85 -0
- holmes/plugins/runbooks/README.md +24 -0
- holmes/plugins/toolsets/__init__.py +19 -6
- holmes/plugins/toolsets/atlas_mongodb/mongodb_atlas.py +27 -0
- holmes/plugins/toolsets/azure_sql/tools/analyze_connection_failures.py +2 -2
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_connections.py +2 -1
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_health_status.py +3 -1
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_performance.py +2 -1
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_storage.py +2 -1
- holmes/plugins/toolsets/azure_sql/tools/get_active_alerts.py +3 -1
- holmes/plugins/toolsets/azure_sql/tools/get_slow_queries.py +2 -1
- holmes/plugins/toolsets/azure_sql/tools/get_top_cpu_queries.py +2 -1
- holmes/plugins/toolsets/azure_sql/tools/get_top_data_io_queries.py +2 -1
- holmes/plugins/toolsets/azure_sql/tools/get_top_log_io_queries.py +2 -1
- holmes/plugins/toolsets/bash/argocd/__init__.py +65 -0
- holmes/plugins/toolsets/bash/argocd/constants.py +120 -0
- holmes/plugins/toolsets/bash/aws/__init__.py +66 -0
- holmes/plugins/toolsets/bash/aws/constants.py +529 -0
- holmes/plugins/toolsets/bash/azure/__init__.py +56 -0
- holmes/plugins/toolsets/bash/azure/constants.py +339 -0
- holmes/plugins/toolsets/bash/bash_instructions.jinja2 +6 -7
- holmes/plugins/toolsets/bash/bash_toolset.py +47 -13
- holmes/plugins/toolsets/bash/common/bash_command.py +131 -0
- holmes/plugins/toolsets/bash/common/stringify.py +14 -1
- holmes/plugins/toolsets/bash/common/validators.py +91 -0
- holmes/plugins/toolsets/bash/docker/__init__.py +59 -0
- holmes/plugins/toolsets/bash/docker/constants.py +255 -0
- holmes/plugins/toolsets/bash/helm/__init__.py +61 -0
- holmes/plugins/toolsets/bash/helm/constants.py +92 -0
- holmes/plugins/toolsets/bash/kubectl/__init__.py +80 -79
- holmes/plugins/toolsets/bash/kubectl/constants.py +0 -14
- holmes/plugins/toolsets/bash/kubectl/kubectl_describe.py +38 -56
- holmes/plugins/toolsets/bash/kubectl/kubectl_events.py +28 -76
- holmes/plugins/toolsets/bash/kubectl/kubectl_get.py +39 -99
- holmes/plugins/toolsets/bash/kubectl/kubectl_logs.py +34 -15
- holmes/plugins/toolsets/bash/kubectl/kubectl_run.py +1 -1
- holmes/plugins/toolsets/bash/kubectl/kubectl_top.py +38 -77
- holmes/plugins/toolsets/bash/parse_command.py +106 -32
- holmes/plugins/toolsets/bash/utilities/__init__.py +0 -0
- holmes/plugins/toolsets/bash/utilities/base64_util.py +12 -0
- holmes/plugins/toolsets/bash/utilities/cut.py +12 -0
- holmes/plugins/toolsets/bash/utilities/grep/__init__.py +10 -0
- holmes/plugins/toolsets/bash/utilities/head.py +12 -0
- holmes/plugins/toolsets/bash/utilities/jq.py +79 -0
- holmes/plugins/toolsets/bash/utilities/sed.py +164 -0
- holmes/plugins/toolsets/bash/utilities/sort.py +15 -0
- holmes/plugins/toolsets/bash/utilities/tail.py +12 -0
- holmes/plugins/toolsets/bash/utilities/tr.py +57 -0
- holmes/plugins/toolsets/bash/utilities/uniq.py +12 -0
- holmes/plugins/toolsets/bash/utilities/wc.py +12 -0
- holmes/plugins/toolsets/coralogix/api.py +6 -6
- holmes/plugins/toolsets/coralogix/toolset_coralogix_logs.py +7 -1
- holmes/plugins/toolsets/datadog/datadog_api.py +20 -8
- holmes/plugins/toolsets/datadog/datadog_metrics_instructions.jinja2 +8 -1
- holmes/plugins/toolsets/datadog/datadog_rds_instructions.jinja2 +82 -0
- holmes/plugins/toolsets/datadog/toolset_datadog_logs.py +12 -5
- holmes/plugins/toolsets/datadog/toolset_datadog_metrics.py +20 -11
- holmes/plugins/toolsets/datadog/toolset_datadog_rds.py +735 -0
- holmes/plugins/toolsets/datadog/toolset_datadog_traces.py +18 -11
- holmes/plugins/toolsets/git.py +15 -15
- holmes/plugins/toolsets/grafana/grafana_api.py +12 -1
- holmes/plugins/toolsets/grafana/toolset_grafana.py +5 -1
- holmes/plugins/toolsets/grafana/toolset_grafana_loki.py +9 -4
- holmes/plugins/toolsets/grafana/toolset_grafana_tempo.py +12 -5
- holmes/plugins/toolsets/internet/internet.py +2 -1
- holmes/plugins/toolsets/internet/notion.py +2 -1
- holmes/plugins/toolsets/investigator/__init__.py +0 -0
- holmes/plugins/toolsets/investigator/core_investigation.py +157 -0
- holmes/plugins/toolsets/investigator/investigator_instructions.jinja2 +253 -0
- holmes/plugins/toolsets/investigator/model.py +15 -0
- holmes/plugins/toolsets/kafka.py +14 -7
- holmes/plugins/toolsets/kubernetes_logs.py +454 -25
- holmes/plugins/toolsets/logging_utils/logging_api.py +115 -55
- holmes/plugins/toolsets/mcp/toolset_mcp.py +1 -1
- holmes/plugins/toolsets/newrelic.py +8 -3
- holmes/plugins/toolsets/opensearch/opensearch.py +8 -4
- holmes/plugins/toolsets/opensearch/opensearch_logs.py +9 -2
- holmes/plugins/toolsets/opensearch/opensearch_traces.py +6 -2
- holmes/plugins/toolsets/prometheus/prometheus.py +179 -44
- holmes/plugins/toolsets/rabbitmq/toolset_rabbitmq.py +8 -2
- holmes/plugins/toolsets/robusta/robusta.py +4 -4
- holmes/plugins/toolsets/runbook/runbook_fetcher.py +6 -5
- holmes/plugins/toolsets/servicenow/servicenow.py +18 -3
- holmes/plugins/toolsets/utils.py +8 -1
- holmes/utils/console/logging.py +6 -1
- holmes/utils/llms.py +20 -0
- holmes/utils/stream.py +90 -0
- {holmesgpt-0.12.6.dist-info → holmesgpt-0.13.1.dist-info}/METADATA +47 -34
- {holmesgpt-0.12.6.dist-info → holmesgpt-0.13.1.dist-info}/RECORD +123 -91
- holmes/plugins/toolsets/bash/grep/__init__.py +0 -52
- holmes/utils/robusta.py +0 -9
- {holmesgpt-0.12.6.dist-info → holmesgpt-0.13.1.dist-info}/LICENSE.txt +0 -0
- {holmesgpt-0.12.6.dist-info → holmesgpt-0.13.1.dist-info}/WHEEL +0 -0
- {holmesgpt-0.12.6.dist-info → holmesgpt-0.13.1.dist-info}/entry_points.txt +0 -0
holmes/core/tool_calling_llm.py
CHANGED
|
@@ -2,32 +2,25 @@ import concurrent.futures
|
|
|
2
2
|
import json
|
|
3
3
|
import logging
|
|
4
4
|
import textwrap
|
|
5
|
+
import uuid
|
|
5
6
|
from typing import Dict, List, Optional, Type, Union
|
|
6
7
|
|
|
7
|
-
import requests # type: ignore
|
|
8
8
|
import sentry_sdk
|
|
9
|
-
from litellm.types.utils import Message
|
|
10
9
|
from openai import BadRequestError
|
|
11
10
|
from openai.types.chat.chat_completion_message_tool_call import (
|
|
12
11
|
ChatCompletionMessageToolCall,
|
|
13
12
|
)
|
|
14
|
-
from pydantic import BaseModel
|
|
15
|
-
from pydantic_core import from_json
|
|
13
|
+
from pydantic import BaseModel, Field
|
|
16
14
|
from rich.console import Console
|
|
17
15
|
|
|
18
|
-
from holmes.common.env_vars import
|
|
19
|
-
|
|
20
|
-
STREAM_CHUNKS_PER_PARSE,
|
|
21
|
-
TEMPERATURE,
|
|
22
|
-
)
|
|
16
|
+
from holmes.common.env_vars import TEMPERATURE, MAX_OUTPUT_TOKEN_RESERVATION
|
|
17
|
+
|
|
23
18
|
from holmes.core.investigation_structured_output import (
|
|
24
19
|
DEFAULT_SECTIONS,
|
|
25
20
|
REQUEST_STRUCTURED_OUTPUT_FROM_LLM,
|
|
26
21
|
InputSectionsDataType,
|
|
27
22
|
get_output_format_for_investigation,
|
|
28
23
|
is_response_an_incorrect_tool_call,
|
|
29
|
-
parse_markdown_into_sections_from_hash_sign,
|
|
30
|
-
process_response_into_sections,
|
|
31
24
|
)
|
|
32
25
|
from holmes.core.issue import Issue
|
|
33
26
|
from holmes.core.llm import LLM
|
|
@@ -45,6 +38,82 @@ from holmes.utils.tags import format_tags_in_string, parse_messages_tags
|
|
|
45
38
|
from holmes.core.tools_utils.tool_executor import ToolExecutor
|
|
46
39
|
from holmes.core.tracing import DummySpan
|
|
47
40
|
from holmes.utils.colors import AI_COLOR
|
|
41
|
+
from holmes.utils.stream import StreamEvents, StreamMessage
|
|
42
|
+
from holmes.core.todo_manager import (
|
|
43
|
+
get_todo_manager,
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
# Create a named logger for cost tracking
|
|
47
|
+
cost_logger = logging.getLogger("holmes.costs")
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
class LLMCosts(BaseModel):
|
|
51
|
+
"""Tracks cost and token usage for LLM calls."""
|
|
52
|
+
|
|
53
|
+
total_cost: float = 0.0
|
|
54
|
+
total_tokens: int = 0
|
|
55
|
+
prompt_tokens: int = 0
|
|
56
|
+
completion_tokens: int = 0
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
def _extract_cost_from_response(full_response) -> float:
|
|
60
|
+
"""Extract cost value from LLM response.
|
|
61
|
+
|
|
62
|
+
Args:
|
|
63
|
+
full_response: The raw LLM response object
|
|
64
|
+
|
|
65
|
+
Returns:
|
|
66
|
+
The cost as a float, or 0.0 if not available
|
|
67
|
+
"""
|
|
68
|
+
try:
|
|
69
|
+
cost_value = (
|
|
70
|
+
full_response._hidden_params.get("response_cost", 0)
|
|
71
|
+
if hasattr(full_response, "_hidden_params")
|
|
72
|
+
else 0
|
|
73
|
+
)
|
|
74
|
+
# Ensure cost is a float
|
|
75
|
+
return float(cost_value) if cost_value is not None else 0.0
|
|
76
|
+
except Exception:
|
|
77
|
+
return 0.0
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def _process_cost_info(
|
|
81
|
+
full_response, costs: Optional[LLMCosts] = None, log_prefix: str = "LLM call"
|
|
82
|
+
) -> None:
|
|
83
|
+
"""Process cost and token information from LLM response.
|
|
84
|
+
|
|
85
|
+
Logs the cost information and optionally accumulates it into a costs object.
|
|
86
|
+
|
|
87
|
+
Args:
|
|
88
|
+
full_response: The raw LLM response object
|
|
89
|
+
costs: Optional LLMCosts object to accumulate costs into
|
|
90
|
+
log_prefix: Prefix for logging messages (e.g., "LLM call", "Post-processing")
|
|
91
|
+
"""
|
|
92
|
+
try:
|
|
93
|
+
cost = _extract_cost_from_response(full_response)
|
|
94
|
+
usage = getattr(full_response, "usage", {})
|
|
95
|
+
|
|
96
|
+
if usage:
|
|
97
|
+
prompt_toks = usage.get("prompt_tokens", 0)
|
|
98
|
+
completion_toks = usage.get("completion_tokens", 0)
|
|
99
|
+
total_toks = usage.get("total_tokens", 0)
|
|
100
|
+
cost_logger.debug(
|
|
101
|
+
f"{log_prefix} cost: ${cost:.6f} | Tokens: {prompt_toks} prompt + {completion_toks} completion = {total_toks} total"
|
|
102
|
+
)
|
|
103
|
+
# Accumulate costs and tokens if costs object provided
|
|
104
|
+
if costs:
|
|
105
|
+
costs.total_cost += cost
|
|
106
|
+
costs.prompt_tokens += prompt_toks
|
|
107
|
+
costs.completion_tokens += completion_toks
|
|
108
|
+
costs.total_tokens += total_toks
|
|
109
|
+
elif cost > 0:
|
|
110
|
+
cost_logger.debug(
|
|
111
|
+
f"{log_prefix} cost: ${cost:.6f} | Token usage not available"
|
|
112
|
+
)
|
|
113
|
+
if costs:
|
|
114
|
+
costs.total_cost += cost
|
|
115
|
+
except Exception as e:
|
|
116
|
+
logging.debug(f"Could not extract cost information: {e}")
|
|
48
117
|
|
|
49
118
|
|
|
50
119
|
def format_tool_result_data(tool_result: StructuredToolResult) -> str:
|
|
@@ -94,12 +163,13 @@ def truncate_messages_to_fit_context(
|
|
|
94
163
|
|
|
95
164
|
tool_call_messages = [message for message in messages if message["role"] == "tool"]
|
|
96
165
|
|
|
97
|
-
|
|
166
|
+
reserved_for_output_tokens = min(maximum_output_token, MAX_OUTPUT_TOKEN_RESERVATION)
|
|
167
|
+
if message_size_without_tools >= (max_context_size - reserved_for_output_tokens):
|
|
98
168
|
logging.error(
|
|
99
169
|
f"The combined size of system_prompt and user_prompt ({message_size_without_tools} tokens) exceeds the model's context window for input."
|
|
100
170
|
)
|
|
101
171
|
raise Exception(
|
|
102
|
-
f"The combined size of system_prompt and user_prompt ({message_size_without_tools} tokens) exceeds the maximum context size of {max_context_size -
|
|
172
|
+
f"The combined size of system_prompt and user_prompt ({message_size_without_tools} tokens) exceeds the maximum context size of {max_context_size - reserved_for_output_tokens} tokens available for input."
|
|
103
173
|
)
|
|
104
174
|
|
|
105
175
|
if len(tool_call_messages) == 0:
|
|
@@ -188,11 +258,11 @@ class ToolCallResult(BaseModel):
|
|
|
188
258
|
}
|
|
189
259
|
|
|
190
260
|
|
|
191
|
-
class LLMResult(
|
|
261
|
+
class LLMResult(LLMCosts):
|
|
192
262
|
tool_calls: Optional[List[ToolCallResult]] = None
|
|
193
263
|
result: Optional[str] = None
|
|
194
264
|
unprocessed_result: Optional[str] = None
|
|
195
|
-
instructions: List[str] =
|
|
265
|
+
instructions: List[str] = Field(default_factory=list)
|
|
196
266
|
# TODO: clean up these two
|
|
197
267
|
prompt: Optional[str] = None
|
|
198
268
|
messages: Optional[List[dict]] = None
|
|
@@ -213,6 +283,7 @@ class ToolCallingLLM:
|
|
|
213
283
|
self.max_steps = max_steps
|
|
214
284
|
self.tracer = tracer
|
|
215
285
|
self.llm = llm
|
|
286
|
+
self.investigation_id = str(uuid.uuid4())
|
|
216
287
|
|
|
217
288
|
def prompt_call(
|
|
218
289
|
self,
|
|
@@ -221,6 +292,7 @@ class ToolCallingLLM:
|
|
|
221
292
|
post_process_prompt: Optional[str] = None,
|
|
222
293
|
response_format: Optional[Union[dict, Type[BaseModel]]] = None,
|
|
223
294
|
sections: Optional[InputSectionsDataType] = None,
|
|
295
|
+
trace_span=DummySpan(),
|
|
224
296
|
) -> LLMResult:
|
|
225
297
|
messages = [
|
|
226
298
|
{"role": "system", "content": system_prompt},
|
|
@@ -232,6 +304,7 @@ class ToolCallingLLM:
|
|
|
232
304
|
response_format,
|
|
233
305
|
user_prompt=user_prompt,
|
|
234
306
|
sections=sections,
|
|
307
|
+
trace_span=trace_span,
|
|
235
308
|
)
|
|
236
309
|
|
|
237
310
|
def messages_call(
|
|
@@ -258,7 +331,11 @@ class ToolCallingLLM:
|
|
|
258
331
|
) -> LLMResult:
|
|
259
332
|
perf_timing = PerformanceTiming("tool_calling_llm.call")
|
|
260
333
|
tool_calls = [] # type: ignore
|
|
261
|
-
|
|
334
|
+
costs = LLMCosts()
|
|
335
|
+
|
|
336
|
+
tools = self.tool_executor.get_all_tools_openai_format(
|
|
337
|
+
target_model=self.llm.model
|
|
338
|
+
)
|
|
262
339
|
perf_timing.measure("get_all_tools_openai_format")
|
|
263
340
|
max_steps = self.max_steps
|
|
264
341
|
i = 0
|
|
@@ -296,6 +373,9 @@ class ToolCallingLLM:
|
|
|
296
373
|
)
|
|
297
374
|
logging.debug(f"got response {full_response.to_json()}") # type: ignore
|
|
298
375
|
|
|
376
|
+
# Extract and accumulate cost information
|
|
377
|
+
_process_cost_info(full_response, costs, "LLM call")
|
|
378
|
+
|
|
299
379
|
perf_timing.measure("llm.completion")
|
|
300
380
|
# catch a known error that occurs with Azure and replace the error message with something more obvious to the user
|
|
301
381
|
except BadRequestError as e:
|
|
@@ -349,11 +429,14 @@ class ToolCallingLLM:
|
|
|
349
429
|
if post_process_prompt and user_prompt:
|
|
350
430
|
logging.info("Running post processing on investigation.")
|
|
351
431
|
raw_response = text_response
|
|
352
|
-
post_processed_response =
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
432
|
+
post_processed_response, post_processing_cost = (
|
|
433
|
+
self._post_processing_call(
|
|
434
|
+
prompt=user_prompt,
|
|
435
|
+
investigation=raw_response,
|
|
436
|
+
user_prompt=post_process_prompt,
|
|
437
|
+
)
|
|
356
438
|
)
|
|
439
|
+
costs.total_cost += post_processing_cost
|
|
357
440
|
|
|
358
441
|
perf_timing.end(f"- completed in {i} iterations -")
|
|
359
442
|
return LLMResult(
|
|
@@ -362,6 +445,7 @@ class ToolCallingLLM:
|
|
|
362
445
|
tool_calls=tool_calls,
|
|
363
446
|
prompt=json.dumps(messages, indent=2),
|
|
364
447
|
messages=messages,
|
|
448
|
+
**costs.model_dump(), # Include all cost fields
|
|
365
449
|
)
|
|
366
450
|
|
|
367
451
|
perf_timing.end(f"- completed in {i} iterations -")
|
|
@@ -370,6 +454,7 @@ class ToolCallingLLM:
|
|
|
370
454
|
tool_calls=tool_calls,
|
|
371
455
|
prompt=json.dumps(messages, indent=2),
|
|
372
456
|
messages=messages,
|
|
457
|
+
**costs.model_dump(), # Include all cost fields
|
|
373
458
|
)
|
|
374
459
|
|
|
375
460
|
if text_response and text_response.strip():
|
|
@@ -400,6 +485,9 @@ class ToolCallingLLM:
|
|
|
400
485
|
|
|
401
486
|
perf_timing.measure(f"tool completed {tool_call_result.tool_name}")
|
|
402
487
|
|
|
488
|
+
# Update the tool number offset for the next iteration
|
|
489
|
+
tool_number_offset += len(tools_to_call)
|
|
490
|
+
|
|
403
491
|
# Add a blank line after all tools in this batch complete
|
|
404
492
|
if tools_to_call:
|
|
405
493
|
logging.info("")
|
|
@@ -413,20 +501,41 @@ class ToolCallingLLM:
|
|
|
413
501
|
trace_span=DummySpan(),
|
|
414
502
|
tool_number=None,
|
|
415
503
|
) -> ToolCallResult:
|
|
416
|
-
|
|
504
|
+
# Handle the union type - ChatCompletionMessageToolCall can be either
|
|
505
|
+
# ChatCompletionMessageFunctionToolCall (with 'function' field and type='function')
|
|
506
|
+
# or ChatCompletionMessageCustomToolCall (with 'custom' field and type='custom').
|
|
507
|
+
# We use hasattr to check for the 'function' attribute as it's more flexible
|
|
508
|
+
# and doesn't require importing the specific type.
|
|
509
|
+
if hasattr(tool_to_call, "function"):
|
|
510
|
+
tool_name = tool_to_call.function.name
|
|
511
|
+
tool_arguments = tool_to_call.function.arguments
|
|
512
|
+
else:
|
|
513
|
+
# This is a custom tool call - we don't support these currently
|
|
514
|
+
logging.error(f"Unsupported custom tool call: {tool_to_call}")
|
|
515
|
+
return ToolCallResult(
|
|
516
|
+
tool_call_id=tool_to_call.id,
|
|
517
|
+
tool_name="unknown",
|
|
518
|
+
description="NA",
|
|
519
|
+
result=StructuredToolResult(
|
|
520
|
+
status=ToolResultStatus.ERROR,
|
|
521
|
+
error="Custom tool calls are not supported",
|
|
522
|
+
params=None,
|
|
523
|
+
),
|
|
524
|
+
)
|
|
525
|
+
|
|
417
526
|
tool_params = None
|
|
418
527
|
try:
|
|
419
|
-
tool_params = json.loads(
|
|
528
|
+
tool_params = json.loads(tool_arguments)
|
|
420
529
|
except Exception:
|
|
421
530
|
logging.warning(
|
|
422
|
-
f"Failed to parse arguments for tool: {tool_name}. args: {
|
|
531
|
+
f"Failed to parse arguments for tool: {tool_name}. args: {tool_arguments}"
|
|
423
532
|
)
|
|
424
533
|
tool_call_id = tool_to_call.id
|
|
425
534
|
tool = self.tool_executor.get_tool_by_name(tool_name)
|
|
426
535
|
|
|
427
536
|
if (not tool) or (tool_params is None):
|
|
428
537
|
logging.warning(
|
|
429
|
-
f"Skipping tool execution for {tool_name}: args: {
|
|
538
|
+
f"Skipping tool execution for {tool_name}: args: {tool_arguments}"
|
|
430
539
|
)
|
|
431
540
|
return ToolCallResult(
|
|
432
541
|
tool_call_id=tool_call_id,
|
|
@@ -516,7 +625,7 @@ class ToolCallingLLM:
|
|
|
516
625
|
investigation,
|
|
517
626
|
user_prompt: Optional[str] = None,
|
|
518
627
|
system_prompt: str = "You are an AI assistant summarizing Kubernetes issues.",
|
|
519
|
-
) -> Optional[str]:
|
|
628
|
+
) -> tuple[Optional[str], float]:
|
|
520
629
|
try:
|
|
521
630
|
user_prompt = ToolCallingLLM.__load_post_processing_user_prompt(
|
|
522
631
|
prompt, investigation, user_prompt
|
|
@@ -535,10 +644,18 @@ class ToolCallingLLM:
|
|
|
535
644
|
]
|
|
536
645
|
full_response = self.llm.completion(messages=messages, temperature=0)
|
|
537
646
|
logging.debug(f"Post processing response {full_response}")
|
|
538
|
-
|
|
647
|
+
|
|
648
|
+
# Extract and log cost information for post-processing
|
|
649
|
+
post_processing_cost = _extract_cost_from_response(full_response)
|
|
650
|
+
if post_processing_cost > 0:
|
|
651
|
+
cost_logger.debug(
|
|
652
|
+
f"Post-processing LLM cost: ${post_processing_cost:.6f}"
|
|
653
|
+
)
|
|
654
|
+
|
|
655
|
+
return full_response.choices[0].message.content, post_processing_cost # type: ignore
|
|
539
656
|
except Exception:
|
|
540
657
|
logging.exception("Failed to run post processing", exc_info=True)
|
|
541
|
-
return investigation
|
|
658
|
+
return investigation, 0.0
|
|
542
659
|
|
|
543
660
|
@sentry_sdk.trace
|
|
544
661
|
def truncate_messages_to_fit_context(
|
|
@@ -553,61 +670,40 @@ class ToolCallingLLM:
|
|
|
553
670
|
|
|
554
671
|
def call_stream(
|
|
555
672
|
self,
|
|
556
|
-
system_prompt: str,
|
|
673
|
+
system_prompt: str = "",
|
|
557
674
|
user_prompt: Optional[str] = None,
|
|
558
|
-
stream: bool = False,
|
|
559
675
|
response_format: Optional[Union[dict, Type[BaseModel]]] = None,
|
|
560
676
|
sections: Optional[InputSectionsDataType] = None,
|
|
561
|
-
|
|
677
|
+
msgs: Optional[list[dict]] = None,
|
|
562
678
|
):
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
|
|
566
|
-
|
|
567
|
-
|
|
568
|
-
|
|
569
|
-
|
|
570
|
-
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
|
|
574
|
-
"ai_answer",
|
|
575
|
-
{
|
|
576
|
-
"sections": parse_markdown_into_sections_from_hash_sign(
|
|
577
|
-
buffer
|
|
578
|
-
)
|
|
579
|
-
or {},
|
|
580
|
-
"analysis": buffer,
|
|
581
|
-
"instructions": runbooks or [],
|
|
582
|
-
},
|
|
583
|
-
)
|
|
584
|
-
|
|
585
|
-
yield create_sse_message(
|
|
586
|
-
"ai_answer_end",
|
|
587
|
-
{
|
|
588
|
-
"sections": parse_markdown_into_sections_from_hash_sign(buffer)
|
|
589
|
-
or {},
|
|
590
|
-
"analysis": buffer,
|
|
591
|
-
"instructions": runbooks or [],
|
|
592
|
-
},
|
|
593
|
-
)
|
|
594
|
-
|
|
595
|
-
messages = [
|
|
596
|
-
{"role": "system", "content": system_prompt},
|
|
597
|
-
{"role": "user", "content": user_prompt},
|
|
598
|
-
]
|
|
679
|
+
"""
|
|
680
|
+
This function DOES NOT call llm.completion(stream=true).
|
|
681
|
+
This function streams holmes one iteration at a time instead of waiting for all iterations to complete.
|
|
682
|
+
"""
|
|
683
|
+
messages = []
|
|
684
|
+
if system_prompt:
|
|
685
|
+
messages.append({"role": "system", "content": system_prompt})
|
|
686
|
+
if user_prompt:
|
|
687
|
+
messages.append({"role": "user", "content": user_prompt})
|
|
688
|
+
if msgs:
|
|
689
|
+
messages.extend(msgs)
|
|
599
690
|
perf_timing = PerformanceTiming("tool_calling_llm.call")
|
|
600
|
-
|
|
691
|
+
tool_calls: list[dict] = []
|
|
692
|
+
tools = self.tool_executor.get_all_tools_openai_format(
|
|
693
|
+
target_model=self.llm.model
|
|
694
|
+
)
|
|
601
695
|
perf_timing.measure("get_all_tools_openai_format")
|
|
696
|
+
max_steps = self.max_steps
|
|
602
697
|
i = 0
|
|
603
|
-
|
|
604
|
-
|
|
698
|
+
tool_number_offset = 0
|
|
699
|
+
|
|
700
|
+
while i < max_steps:
|
|
605
701
|
i += 1
|
|
606
702
|
perf_timing.measure(f"start iteration {i}")
|
|
607
703
|
logging.debug(f"running iteration {i}")
|
|
608
704
|
|
|
609
|
-
tools =
|
|
610
|
-
tool_choice =
|
|
705
|
+
tools = None if i == max_steps else tools
|
|
706
|
+
tool_choice = "auto" if tools else None
|
|
611
707
|
|
|
612
708
|
total_tokens = self.llm.count_tokens_for_message(messages) # type: ignore
|
|
613
709
|
max_context_size = self.llm.get_context_window_size()
|
|
@@ -623,90 +719,47 @@ class ToolCallingLLM:
|
|
|
623
719
|
|
|
624
720
|
logging.debug(f"sending messages={messages}\n\ntools={tools}")
|
|
625
721
|
try:
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
|
|
631
|
-
|
|
632
|
-
|
|
633
|
-
|
|
634
|
-
|
|
635
|
-
"stream": True,
|
|
636
|
-
"drop_param": True,
|
|
637
|
-
},
|
|
638
|
-
headers={"Authorization": f"Bearer {self.llm.api_key}"}, # type: ignore
|
|
639
|
-
stream=True,
|
|
640
|
-
)
|
|
641
|
-
response.raise_for_status()
|
|
642
|
-
it = response.iter_content(chunk_size=None, decode_unicode=True)
|
|
643
|
-
peek_chunk = from_json(next(it))
|
|
644
|
-
tools = peek_chunk.get("tool_calls")
|
|
645
|
-
|
|
646
|
-
if not tools:
|
|
647
|
-
yield from stream_analysis(it, peek_chunk)
|
|
648
|
-
perf_timing.measure("llm.completion")
|
|
649
|
-
return
|
|
650
|
-
|
|
651
|
-
response_message = Message(**peek_chunk)
|
|
652
|
-
tools_to_call = response_message.tool_calls
|
|
653
|
-
else:
|
|
654
|
-
full_response = self.llm.completion(
|
|
655
|
-
messages=parse_messages_tags(messages), # type: ignore
|
|
656
|
-
tools=tools,
|
|
657
|
-
tool_choice=tool_choice,
|
|
658
|
-
temperature=TEMPERATURE,
|
|
659
|
-
response_format=response_format,
|
|
660
|
-
stream=False,
|
|
661
|
-
drop_params=True,
|
|
662
|
-
)
|
|
663
|
-
perf_timing.measure("llm.completion")
|
|
664
|
-
|
|
665
|
-
response_message = full_response.choices[0].message # type: ignore
|
|
666
|
-
if response_message and response_format:
|
|
667
|
-
# Litellm API is bugged. Stringify and parsing ensures all attrs of the choice are available.
|
|
668
|
-
dict_response = json.loads(full_response.to_json()) # type: ignore
|
|
669
|
-
incorrect_tool_call = is_response_an_incorrect_tool_call(
|
|
670
|
-
sections, dict_response.get("choices", [{}])[0]
|
|
671
|
-
)
|
|
722
|
+
full_response = self.llm.completion(
|
|
723
|
+
messages=parse_messages_tags(messages), # type: ignore
|
|
724
|
+
tools=tools,
|
|
725
|
+
tool_choice=tool_choice,
|
|
726
|
+
response_format=response_format,
|
|
727
|
+
temperature=TEMPERATURE,
|
|
728
|
+
stream=False,
|
|
729
|
+
drop_params=True,
|
|
730
|
+
)
|
|
672
731
|
|
|
673
|
-
|
|
674
|
-
|
|
675
|
-
"Detected incorrect tool call. Structured output will be disabled. This can happen on models that do not support tool calling. For Azure AI, make sure the model name contains 'gpt-4o'. To disable this holmes behaviour, set REQUEST_STRUCTURED_OUTPUT_FROM_LLM to `false`."
|
|
676
|
-
)
|
|
677
|
-
# disable structured output going forward and and retry
|
|
678
|
-
response_format = None
|
|
679
|
-
i -= 1
|
|
680
|
-
continue
|
|
681
|
-
|
|
682
|
-
tools_to_call = getattr(response_message, "tool_calls", None)
|
|
683
|
-
if not tools_to_call:
|
|
684
|
-
(text_response, sections) = process_response_into_sections( # type: ignore
|
|
685
|
-
response_message.content
|
|
686
|
-
)
|
|
732
|
+
# Log cost information for this iteration (no accumulation in streaming)
|
|
733
|
+
_process_cost_info(full_response, log_prefix="LLM iteration")
|
|
687
734
|
|
|
688
|
-
|
|
689
|
-
"ai_answer_end",
|
|
690
|
-
{
|
|
691
|
-
"sections": sections or {},
|
|
692
|
-
"analysis": text_response,
|
|
693
|
-
"instructions": runbooks or [],
|
|
694
|
-
},
|
|
695
|
-
)
|
|
696
|
-
return
|
|
735
|
+
perf_timing.measure("llm.completion")
|
|
697
736
|
# catch a known error that occurs with Azure and replace the error message with something more obvious to the user
|
|
698
737
|
except BadRequestError as e:
|
|
699
|
-
logging.exception("Bad completion request")
|
|
700
738
|
if "Unrecognized request arguments supplied: tool_choice, tools" in str(
|
|
701
739
|
e
|
|
702
740
|
):
|
|
703
741
|
raise Exception(
|
|
704
742
|
"The Azure model you chose is not supported. Model version 1106 and higher required."
|
|
743
|
+
) from e
|
|
744
|
+
else:
|
|
745
|
+
raise
|
|
746
|
+
|
|
747
|
+
response_message = full_response.choices[0].message # type: ignore
|
|
748
|
+
if response_message and response_format:
|
|
749
|
+
# Litellm API is bugged. Stringify and parsing ensures all attrs of the choice are available.
|
|
750
|
+
dict_response = json.loads(full_response.to_json()) # type: ignore
|
|
751
|
+
incorrect_tool_call = is_response_an_incorrect_tool_call(
|
|
752
|
+
sections, dict_response.get("choices", [{}])[0]
|
|
753
|
+
)
|
|
754
|
+
|
|
755
|
+
if incorrect_tool_call:
|
|
756
|
+
logging.warning(
|
|
757
|
+
"Detected incorrect tool call. Structured output will be disabled. This can happen on models that do not support tool calling. For Azure AI, make sure the model name contains 'gpt-4o'. To disable this holmes behaviour, set REQUEST_STRUCTURED_OUTPUT_FROM_LLM to `false`."
|
|
705
758
|
)
|
|
706
|
-
|
|
707
|
-
|
|
708
|
-
|
|
709
|
-
|
|
759
|
+
# disable structured output going forward and and retry
|
|
760
|
+
response_format = None
|
|
761
|
+
max_steps = max_steps + 1
|
|
762
|
+
continue
|
|
710
763
|
|
|
711
764
|
messages.append(
|
|
712
765
|
response_message.model_dump(
|
|
@@ -714,6 +767,22 @@ class ToolCallingLLM:
|
|
|
714
767
|
)
|
|
715
768
|
)
|
|
716
769
|
|
|
770
|
+
tools_to_call = getattr(response_message, "tool_calls", None)
|
|
771
|
+
if not tools_to_call:
|
|
772
|
+
yield StreamMessage(
|
|
773
|
+
event=StreamEvents.ANSWER_END,
|
|
774
|
+
data={"content": response_message.content, "messages": messages},
|
|
775
|
+
)
|
|
776
|
+
return
|
|
777
|
+
|
|
778
|
+
reasoning = getattr(response_message, "reasoning_content", None)
|
|
779
|
+
message = response_message.content
|
|
780
|
+
if reasoning or message:
|
|
781
|
+
yield StreamMessage(
|
|
782
|
+
event=StreamEvents.AI_MESSAGE,
|
|
783
|
+
data={"content": message, "reasoning": reasoning},
|
|
784
|
+
)
|
|
785
|
+
|
|
717
786
|
perf_timing.measure("pre-tool-calls")
|
|
718
787
|
with concurrent.futures.ThreadPoolExecutor(max_workers=16) as executor:
|
|
719
788
|
futures = []
|
|
@@ -724,11 +793,12 @@ class ToolCallingLLM:
|
|
|
724
793
|
tool_to_call=t, # type: ignore
|
|
725
794
|
previous_tool_calls=tool_calls,
|
|
726
795
|
trace_span=DummySpan(), # Streaming mode doesn't support tracing yet
|
|
727
|
-
tool_number=tool_index,
|
|
796
|
+
tool_number=tool_number_offset + tool_index,
|
|
728
797
|
)
|
|
729
798
|
)
|
|
730
|
-
yield
|
|
731
|
-
|
|
799
|
+
yield StreamMessage(
|
|
800
|
+
event=StreamEvents.START_TOOL,
|
|
801
|
+
data={"tool_name": t.function.name, "id": t.id},
|
|
732
802
|
)
|
|
733
803
|
|
|
734
804
|
for future in concurrent.futures.as_completed(futures):
|
|
@@ -739,13 +809,13 @@ class ToolCallingLLM:
|
|
|
739
809
|
|
|
740
810
|
perf_timing.measure(f"tool completed {tool_call_result.tool_name}")
|
|
741
811
|
|
|
742
|
-
|
|
743
|
-
|
|
812
|
+
yield StreamMessage(
|
|
813
|
+
event=StreamEvents.TOOL_RESULT,
|
|
814
|
+
data=tool_call_result.as_streaming_tool_result_response(),
|
|
744
815
|
)
|
|
745
816
|
|
|
746
|
-
|
|
747
|
-
|
|
748
|
-
)
|
|
817
|
+
# Update the tool number offset for the next iteration
|
|
818
|
+
tool_number_offset += len(tools_to_call)
|
|
749
819
|
|
|
750
820
|
raise Exception(
|
|
751
821
|
f"Too many LLM calls - exceeded max_steps: {i}/{self.max_steps}"
|
|
@@ -782,6 +852,7 @@ class IssueInvestigator(ToolCallingLLM):
|
|
|
782
852
|
global_instructions: Optional[Instructions] = None,
|
|
783
853
|
post_processing_prompt: Optional[str] = None,
|
|
784
854
|
sections: Optional[InputSectionsDataType] = None,
|
|
855
|
+
trace_span=DummySpan(),
|
|
785
856
|
) -> LLMResult:
|
|
786
857
|
runbooks = self.runbook_manager.get_instructions_for_issue(issue)
|
|
787
858
|
|
|
@@ -823,6 +894,9 @@ class IssueInvestigator(ToolCallingLLM):
|
|
|
823
894
|
"[bold]No runbooks found for this issue. Using default behaviour. (Add runbooks to guide the investigation.)[/bold]"
|
|
824
895
|
)
|
|
825
896
|
|
|
897
|
+
todo_manager = get_todo_manager()
|
|
898
|
+
todo_context = todo_manager.format_tasks_for_prompt(self.investigation_id)
|
|
899
|
+
|
|
826
900
|
system_prompt = load_and_render_prompt(
|
|
827
901
|
prompt,
|
|
828
902
|
{
|
|
@@ -831,6 +905,8 @@ class IssueInvestigator(ToolCallingLLM):
|
|
|
831
905
|
"structured_output": request_structured_output_from_llm,
|
|
832
906
|
"toolsets": self.tool_executor.toolsets,
|
|
833
907
|
"cluster_name": self.cluster_name,
|
|
908
|
+
"todo_list": todo_context,
|
|
909
|
+
"investigation_id": self.investigation_id,
|
|
834
910
|
},
|
|
835
911
|
)
|
|
836
912
|
|
|
@@ -865,10 +941,7 @@ class IssueInvestigator(ToolCallingLLM):
|
|
|
865
941
|
post_processing_prompt,
|
|
866
942
|
response_format=response_format,
|
|
867
943
|
sections=sections,
|
|
944
|
+
trace_span=trace_span,
|
|
868
945
|
)
|
|
869
946
|
res.instructions = runbooks
|
|
870
947
|
return res
|
|
871
|
-
|
|
872
|
-
|
|
873
|
-
def create_sse_message(event_type: str, data: dict = {}):
|
|
874
|
-
return f"event: {event_type}\ndata: {json.dumps(data)}\n\n"
|
holmes/core/tools.py
CHANGED
|
@@ -51,6 +51,7 @@ class StructuredToolResult(BaseModel):
|
|
|
51
51
|
url: Optional[str] = None
|
|
52
52
|
invocation: Optional[str] = None
|
|
53
53
|
params: Optional[Dict] = None
|
|
54
|
+
icon_url: Optional[str] = None
|
|
54
55
|
|
|
55
56
|
def get_stringified_data(self) -> str:
|
|
56
57
|
if self.data is None:
|
|
@@ -121,6 +122,8 @@ class ToolParameter(BaseModel):
|
|
|
121
122
|
description: Optional[str] = None
|
|
122
123
|
type: str = "string"
|
|
123
124
|
required: bool = True
|
|
125
|
+
properties: Optional[Dict[str, "ToolParameter"]] = None # For object types
|
|
126
|
+
items: Optional["ToolParameter"] = None # For array item schemas
|
|
124
127
|
|
|
125
128
|
|
|
126
129
|
class Tool(ABC, BaseModel):
|
|
@@ -131,12 +134,17 @@ class Tool(ABC, BaseModel):
|
|
|
131
134
|
None # templated string to show to the user describing this tool invocation (not seen by llm)
|
|
132
135
|
)
|
|
133
136
|
additional_instructions: Optional[str] = None
|
|
137
|
+
icon_url: Optional[str] = Field(
|
|
138
|
+
default=None,
|
|
139
|
+
description="The URL of the icon for the tool, if None will get toolset icon",
|
|
140
|
+
)
|
|
134
141
|
|
|
135
|
-
def get_openai_format(self):
|
|
142
|
+
def get_openai_format(self, target_model: str):
|
|
136
143
|
return format_tool_to_open_ai_standard(
|
|
137
144
|
tool_name=self.name,
|
|
138
145
|
tool_description=self.description,
|
|
139
146
|
tool_parameters=self.parameters,
|
|
147
|
+
target_model=target_model,
|
|
140
148
|
)
|
|
141
149
|
|
|
142
150
|
def invoke(
|
|
@@ -148,6 +156,7 @@ class Tool(ABC, BaseModel):
|
|
|
148
156
|
)
|
|
149
157
|
start_time = time.time()
|
|
150
158
|
result = self._invoke(params)
|
|
159
|
+
result.icon_url = self.icon_url
|
|
151
160
|
elapsed = time.time() - start_time
|
|
152
161
|
output_str = (
|
|
153
162
|
result.get_stringified_data()
|
|
@@ -38,6 +38,8 @@ class ToolExecutor:
|
|
|
38
38
|
self.tools_by_name: dict[str, Tool] = {}
|
|
39
39
|
for ts in toolsets_by_name.values():
|
|
40
40
|
for tool in ts.tools:
|
|
41
|
+
if tool.icon_url is None and ts.icon_url is not None:
|
|
42
|
+
tool.icon_url = ts.icon_url
|
|
41
43
|
if tool.name in self.tools_by_name:
|
|
42
44
|
logging.warning(
|
|
43
45
|
f"Overriding existing tool '{tool.name} with new tool from {ts.name} at {ts.path}'!"
|
|
@@ -62,5 +64,8 @@ class ToolExecutor:
|
|
|
62
64
|
return None
|
|
63
65
|
|
|
64
66
|
@sentry_sdk.trace
|
|
65
|
-
def get_all_tools_openai_format(self):
|
|
66
|
-
return [
|
|
67
|
+
def get_all_tools_openai_format(self, target_model: str):
|
|
68
|
+
return [
|
|
69
|
+
tool.get_openai_format(target_model=target_model)
|
|
70
|
+
for tool in self.tools_by_name.values()
|
|
71
|
+
]
|