holmesgpt 0.13.0__py3-none-any.whl → 0.13.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- holmes/__init__.py +1 -1
- holmes/common/env_vars.py +11 -0
- holmes/config.py +3 -1
- holmes/core/conversations.py +0 -11
- holmes/core/investigation.py +0 -6
- holmes/core/llm.py +63 -2
- holmes/core/prompt.py +0 -2
- holmes/core/supabase_dal.py +2 -2
- holmes/core/todo_tasks_formatter.py +51 -0
- holmes/core/tool_calling_llm.py +277 -101
- holmes/core/tools.py +20 -4
- holmes/core/toolset_manager.py +1 -5
- holmes/core/tracing.py +1 -1
- holmes/interactive.py +63 -2
- holmes/main.py +7 -2
- holmes/plugins/prompts/_fetch_logs.jinja2 +4 -0
- holmes/plugins/prompts/_general_instructions.jinja2 +3 -1
- holmes/plugins/prompts/investigation_procedure.jinja2 +3 -13
- holmes/plugins/runbooks/CLAUDE.md +85 -0
- holmes/plugins/runbooks/README.md +24 -0
- holmes/plugins/toolsets/__init__.py +5 -1
- holmes/plugins/toolsets/argocd.yaml +1 -1
- holmes/plugins/toolsets/atlas_mongodb/mongodb_atlas.py +18 -6
- holmes/plugins/toolsets/aws.yaml +9 -5
- holmes/plugins/toolsets/azure_sql/tools/analyze_connection_failures.py +3 -1
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_connections.py +3 -1
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_health_status.py +3 -1
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_performance.py +3 -1
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_storage.py +3 -1
- holmes/plugins/toolsets/azure_sql/tools/get_active_alerts.py +3 -1
- holmes/plugins/toolsets/azure_sql/tools/get_slow_queries.py +3 -1
- holmes/plugins/toolsets/azure_sql/tools/get_top_cpu_queries.py +3 -1
- holmes/plugins/toolsets/azure_sql/tools/get_top_data_io_queries.py +3 -1
- holmes/plugins/toolsets/azure_sql/tools/get_top_log_io_queries.py +3 -1
- holmes/plugins/toolsets/bash/argocd/__init__.py +65 -0
- holmes/plugins/toolsets/bash/argocd/constants.py +120 -0
- holmes/plugins/toolsets/bash/aws/__init__.py +66 -0
- holmes/plugins/toolsets/bash/aws/constants.py +529 -0
- holmes/plugins/toolsets/bash/azure/__init__.py +56 -0
- holmes/plugins/toolsets/bash/azure/constants.py +339 -0
- holmes/plugins/toolsets/bash/bash_instructions.jinja2 +6 -7
- holmes/plugins/toolsets/bash/bash_toolset.py +62 -17
- holmes/plugins/toolsets/bash/common/bash_command.py +131 -0
- holmes/plugins/toolsets/bash/common/stringify.py +14 -1
- holmes/plugins/toolsets/bash/common/validators.py +91 -0
- holmes/plugins/toolsets/bash/docker/__init__.py +59 -0
- holmes/plugins/toolsets/bash/docker/constants.py +255 -0
- holmes/plugins/toolsets/bash/helm/__init__.py +61 -0
- holmes/plugins/toolsets/bash/helm/constants.py +92 -0
- holmes/plugins/toolsets/bash/kubectl/__init__.py +80 -79
- holmes/plugins/toolsets/bash/kubectl/constants.py +0 -14
- holmes/plugins/toolsets/bash/kubectl/kubectl_describe.py +38 -56
- holmes/plugins/toolsets/bash/kubectl/kubectl_events.py +28 -76
- holmes/plugins/toolsets/bash/kubectl/kubectl_get.py +39 -99
- holmes/plugins/toolsets/bash/kubectl/kubectl_logs.py +34 -15
- holmes/plugins/toolsets/bash/kubectl/kubectl_run.py +1 -1
- holmes/plugins/toolsets/bash/kubectl/kubectl_top.py +38 -77
- holmes/plugins/toolsets/bash/parse_command.py +106 -32
- holmes/plugins/toolsets/bash/utilities/__init__.py +0 -0
- holmes/plugins/toolsets/bash/utilities/base64_util.py +12 -0
- holmes/plugins/toolsets/bash/utilities/cut.py +12 -0
- holmes/plugins/toolsets/bash/utilities/grep/__init__.py +10 -0
- holmes/plugins/toolsets/bash/utilities/head.py +12 -0
- holmes/plugins/toolsets/bash/utilities/jq.py +79 -0
- holmes/plugins/toolsets/bash/utilities/sed.py +164 -0
- holmes/plugins/toolsets/bash/utilities/sort.py +15 -0
- holmes/plugins/toolsets/bash/utilities/tail.py +12 -0
- holmes/plugins/toolsets/bash/utilities/tr.py +57 -0
- holmes/plugins/toolsets/bash/utilities/uniq.py +12 -0
- holmes/plugins/toolsets/bash/utilities/wc.py +12 -0
- holmes/plugins/toolsets/confluence.yaml +1 -1
- holmes/plugins/toolsets/coralogix/api.py +3 -1
- holmes/plugins/toolsets/coralogix/toolset_coralogix_logs.py +4 -4
- holmes/plugins/toolsets/coralogix/utils.py +41 -14
- holmes/plugins/toolsets/datadog/datadog_api.py +45 -2
- holmes/plugins/toolsets/datadog/datadog_general_instructions.jinja2 +208 -0
- holmes/plugins/toolsets/datadog/datadog_logs_instructions.jinja2 +43 -0
- holmes/plugins/toolsets/datadog/datadog_metrics_instructions.jinja2 +12 -9
- holmes/plugins/toolsets/datadog/toolset_datadog_general.py +722 -0
- holmes/plugins/toolsets/datadog/toolset_datadog_logs.py +17 -6
- holmes/plugins/toolsets/datadog/toolset_datadog_metrics.py +15 -7
- holmes/plugins/toolsets/datadog/toolset_datadog_rds.py +6 -2
- holmes/plugins/toolsets/datadog/toolset_datadog_traces.py +9 -3
- holmes/plugins/toolsets/docker.yaml +1 -1
- holmes/plugins/toolsets/git.py +15 -5
- holmes/plugins/toolsets/grafana/toolset_grafana.py +25 -4
- holmes/plugins/toolsets/grafana/toolset_grafana_loki.py +4 -4
- holmes/plugins/toolsets/grafana/toolset_grafana_tempo.jinja2 +5 -3
- holmes/plugins/toolsets/grafana/toolset_grafana_tempo.py +299 -32
- holmes/plugins/toolsets/helm.yaml +1 -1
- holmes/plugins/toolsets/internet/internet.py +4 -2
- holmes/plugins/toolsets/internet/notion.py +4 -2
- holmes/plugins/toolsets/investigator/core_investigation.py +5 -17
- holmes/plugins/toolsets/investigator/investigator_instructions.jinja2 +1 -5
- holmes/plugins/toolsets/kafka.py +19 -7
- holmes/plugins/toolsets/kubernetes.yaml +5 -5
- holmes/plugins/toolsets/kubernetes_logs.py +4 -4
- holmes/plugins/toolsets/kubernetes_logs.yaml +1 -1
- holmes/plugins/toolsets/logging_utils/logging_api.py +15 -2
- holmes/plugins/toolsets/mcp/toolset_mcp.py +3 -1
- holmes/plugins/toolsets/newrelic.py +8 -4
- holmes/plugins/toolsets/opensearch/opensearch.py +13 -5
- holmes/plugins/toolsets/opensearch/opensearch_logs.py +4 -4
- holmes/plugins/toolsets/opensearch/opensearch_traces.py +9 -6
- holmes/plugins/toolsets/prometheus/prometheus.py +198 -57
- holmes/plugins/toolsets/rabbitmq/toolset_rabbitmq.py +7 -3
- holmes/plugins/toolsets/robusta/robusta.py +10 -4
- holmes/plugins/toolsets/runbook/runbook_fetcher.py +4 -2
- holmes/plugins/toolsets/servicenow/servicenow.py +9 -3
- holmes/plugins/toolsets/slab.yaml +1 -1
- holmes/utils/console/logging.py +6 -1
- {holmesgpt-0.13.0.dist-info → holmesgpt-0.13.2.dist-info}/METADATA +3 -2
- {holmesgpt-0.13.0.dist-info → holmesgpt-0.13.2.dist-info}/RECORD +116 -90
- holmes/core/todo_manager.py +0 -88
- holmes/plugins/toolsets/bash/grep/__init__.py +0 -52
- {holmesgpt-0.13.0.dist-info → holmesgpt-0.13.2.dist-info}/LICENSE.txt +0 -0
- {holmesgpt-0.13.0.dist-info → holmesgpt-0.13.2.dist-info}/WHEEL +0 -0
- {holmesgpt-0.13.0.dist-info → holmesgpt-0.13.2.dist-info}/entry_points.txt +0 -0
holmes/core/tool_calling_llm.py
CHANGED
|
@@ -2,18 +2,22 @@ import concurrent.futures
|
|
|
2
2
|
import json
|
|
3
3
|
import logging
|
|
4
4
|
import textwrap
|
|
5
|
-
import
|
|
6
|
-
|
|
5
|
+
from typing import Dict, List, Optional, Type, Union, Callable
|
|
6
|
+
|
|
7
7
|
|
|
8
8
|
import sentry_sdk
|
|
9
9
|
from openai import BadRequestError
|
|
10
10
|
from openai.types.chat.chat_completion_message_tool_call import (
|
|
11
11
|
ChatCompletionMessageToolCall,
|
|
12
12
|
)
|
|
13
|
-
from pydantic import BaseModel
|
|
13
|
+
from pydantic import BaseModel, Field
|
|
14
14
|
from rich.console import Console
|
|
15
15
|
|
|
16
|
-
from holmes.common.env_vars import
|
|
16
|
+
from holmes.common.env_vars import (
|
|
17
|
+
TEMPERATURE,
|
|
18
|
+
MAX_OUTPUT_TOKEN_RESERVATION,
|
|
19
|
+
LOG_LLM_USAGE_RESPONSE,
|
|
20
|
+
)
|
|
17
21
|
|
|
18
22
|
from holmes.core.investigation_structured_output import (
|
|
19
23
|
DEFAULT_SECTIONS,
|
|
@@ -39,9 +43,80 @@ from holmes.core.tools_utils.tool_executor import ToolExecutor
|
|
|
39
43
|
from holmes.core.tracing import DummySpan
|
|
40
44
|
from holmes.utils.colors import AI_COLOR
|
|
41
45
|
from holmes.utils.stream import StreamEvents, StreamMessage
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
)
|
|
46
|
+
|
|
47
|
+
# Create a named logger for cost tracking
|
|
48
|
+
cost_logger = logging.getLogger("holmes.costs")
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
class LLMCosts(BaseModel):
|
|
52
|
+
"""Tracks cost and token usage for LLM calls."""
|
|
53
|
+
|
|
54
|
+
total_cost: float = 0.0
|
|
55
|
+
total_tokens: int = 0
|
|
56
|
+
prompt_tokens: int = 0
|
|
57
|
+
completion_tokens: int = 0
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def _extract_cost_from_response(full_response) -> float:
|
|
61
|
+
"""Extract cost value from LLM response.
|
|
62
|
+
|
|
63
|
+
Args:
|
|
64
|
+
full_response: The raw LLM response object
|
|
65
|
+
|
|
66
|
+
Returns:
|
|
67
|
+
The cost as a float, or 0.0 if not available
|
|
68
|
+
"""
|
|
69
|
+
try:
|
|
70
|
+
cost_value = (
|
|
71
|
+
full_response._hidden_params.get("response_cost", 0)
|
|
72
|
+
if hasattr(full_response, "_hidden_params")
|
|
73
|
+
else 0
|
|
74
|
+
)
|
|
75
|
+
# Ensure cost is a float
|
|
76
|
+
return float(cost_value) if cost_value is not None else 0.0
|
|
77
|
+
except Exception:
|
|
78
|
+
return 0.0
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def _process_cost_info(
|
|
82
|
+
full_response, costs: Optional[LLMCosts] = None, log_prefix: str = "LLM call"
|
|
83
|
+
) -> None:
|
|
84
|
+
"""Process cost and token information from LLM response.
|
|
85
|
+
|
|
86
|
+
Logs the cost information and optionally accumulates it into a costs object.
|
|
87
|
+
|
|
88
|
+
Args:
|
|
89
|
+
full_response: The raw LLM response object
|
|
90
|
+
costs: Optional LLMCosts object to accumulate costs into
|
|
91
|
+
log_prefix: Prefix for logging messages (e.g., "LLM call", "Post-processing")
|
|
92
|
+
"""
|
|
93
|
+
try:
|
|
94
|
+
cost = _extract_cost_from_response(full_response)
|
|
95
|
+
usage = getattr(full_response, "usage", {})
|
|
96
|
+
|
|
97
|
+
if usage:
|
|
98
|
+
if LOG_LLM_USAGE_RESPONSE: # shows stats on token cache usage
|
|
99
|
+
logging.info(f"LLM usage response:\n{usage}\n")
|
|
100
|
+
prompt_toks = usage.get("prompt_tokens", 0)
|
|
101
|
+
completion_toks = usage.get("completion_tokens", 0)
|
|
102
|
+
total_toks = usage.get("total_tokens", 0)
|
|
103
|
+
cost_logger.debug(
|
|
104
|
+
f"{log_prefix} cost: ${cost:.6f} | Tokens: {prompt_toks} prompt + {completion_toks} completion = {total_toks} total"
|
|
105
|
+
)
|
|
106
|
+
# Accumulate costs and tokens if costs object provided
|
|
107
|
+
if costs:
|
|
108
|
+
costs.total_cost += cost
|
|
109
|
+
costs.prompt_tokens += prompt_toks
|
|
110
|
+
costs.completion_tokens += completion_toks
|
|
111
|
+
costs.total_tokens += total_toks
|
|
112
|
+
elif cost > 0:
|
|
113
|
+
cost_logger.debug(
|
|
114
|
+
f"{log_prefix} cost: ${cost:.6f} | Token usage not available"
|
|
115
|
+
)
|
|
116
|
+
if costs:
|
|
117
|
+
costs.total_cost += cost
|
|
118
|
+
except Exception as e:
|
|
119
|
+
logging.debug(f"Could not extract cost information: {e}")
|
|
45
120
|
|
|
46
121
|
|
|
47
122
|
def format_tool_result_data(tool_result: StructuredToolResult) -> str:
|
|
@@ -186,11 +261,11 @@ class ToolCallResult(BaseModel):
|
|
|
186
261
|
}
|
|
187
262
|
|
|
188
263
|
|
|
189
|
-
class LLMResult(
|
|
264
|
+
class LLMResult(LLMCosts):
|
|
190
265
|
tool_calls: Optional[List[ToolCallResult]] = None
|
|
191
266
|
result: Optional[str] = None
|
|
192
267
|
unprocessed_result: Optional[str] = None
|
|
193
|
-
instructions: List[str] =
|
|
268
|
+
instructions: List[str] = Field(default_factory=list)
|
|
194
269
|
# TODO: clean up these two
|
|
195
270
|
prompt: Optional[str] = None
|
|
196
271
|
messages: Optional[List[dict]] = None
|
|
@@ -211,7 +286,9 @@ class ToolCallingLLM:
|
|
|
211
286
|
self.max_steps = max_steps
|
|
212
287
|
self.tracer = tracer
|
|
213
288
|
self.llm = llm
|
|
214
|
-
self.
|
|
289
|
+
self.approval_callback: Optional[
|
|
290
|
+
Callable[[StructuredToolResult], tuple[bool, Optional[str]]]
|
|
291
|
+
] = None
|
|
215
292
|
|
|
216
293
|
def prompt_call(
|
|
217
294
|
self,
|
|
@@ -259,6 +336,8 @@ class ToolCallingLLM:
|
|
|
259
336
|
) -> LLMResult:
|
|
260
337
|
perf_timing = PerformanceTiming("tool_calling_llm.call")
|
|
261
338
|
tool_calls = [] # type: ignore
|
|
339
|
+
costs = LLMCosts()
|
|
340
|
+
|
|
262
341
|
tools = self.tool_executor.get_all_tools_openai_format(
|
|
263
342
|
target_model=self.llm.model
|
|
264
343
|
)
|
|
@@ -299,6 +378,9 @@ class ToolCallingLLM:
|
|
|
299
378
|
)
|
|
300
379
|
logging.debug(f"got response {full_response.to_json()}") # type: ignore
|
|
301
380
|
|
|
381
|
+
# Extract and accumulate cost information
|
|
382
|
+
_process_cost_info(full_response, costs, "LLM call")
|
|
383
|
+
|
|
302
384
|
perf_timing.measure("llm.completion")
|
|
303
385
|
# catch a known error that occurs with Azure and replace the error message with something more obvious to the user
|
|
304
386
|
except BadRequestError as e:
|
|
@@ -352,11 +434,14 @@ class ToolCallingLLM:
|
|
|
352
434
|
if post_process_prompt and user_prompt:
|
|
353
435
|
logging.info("Running post processing on investigation.")
|
|
354
436
|
raw_response = text_response
|
|
355
|
-
post_processed_response =
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
437
|
+
post_processed_response, post_processing_cost = (
|
|
438
|
+
self._post_processing_call(
|
|
439
|
+
prompt=user_prompt,
|
|
440
|
+
investigation=raw_response,
|
|
441
|
+
user_prompt=post_process_prompt,
|
|
442
|
+
)
|
|
359
443
|
)
|
|
444
|
+
costs.total_cost += post_processing_cost
|
|
360
445
|
|
|
361
446
|
perf_timing.end(f"- completed in {i} iterations -")
|
|
362
447
|
return LLMResult(
|
|
@@ -365,6 +450,7 @@ class ToolCallingLLM:
|
|
|
365
450
|
tool_calls=tool_calls,
|
|
366
451
|
prompt=json.dumps(messages, indent=2),
|
|
367
452
|
messages=messages,
|
|
453
|
+
**costs.model_dump(), # Include all cost fields
|
|
368
454
|
)
|
|
369
455
|
|
|
370
456
|
perf_timing.end(f"- completed in {i} iterations -")
|
|
@@ -373,6 +459,7 @@ class ToolCallingLLM:
|
|
|
373
459
|
tool_calls=tool_calls,
|
|
374
460
|
prompt=json.dumps(messages, indent=2),
|
|
375
461
|
messages=messages,
|
|
462
|
+
**costs.model_dump(), # Include all cost fields
|
|
376
463
|
)
|
|
377
464
|
|
|
378
465
|
if text_response and text_response.strip():
|
|
@@ -383,33 +470,106 @@ class ToolCallingLLM:
|
|
|
383
470
|
perf_timing.measure("pre-tool-calls")
|
|
384
471
|
with concurrent.futures.ThreadPoolExecutor(max_workers=16) as executor:
|
|
385
472
|
futures = []
|
|
473
|
+
futures_tool_numbers: dict[
|
|
474
|
+
concurrent.futures.Future, Optional[int]
|
|
475
|
+
] = {}
|
|
476
|
+
tool_number: Optional[int]
|
|
386
477
|
for tool_index, t in enumerate(tools_to_call, 1):
|
|
387
478
|
logging.debug(f"Tool to call: {t}")
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
)
|
|
479
|
+
tool_number = tool_number_offset + tool_index
|
|
480
|
+
future = executor.submit(
|
|
481
|
+
self._invoke_llm_tool_call,
|
|
482
|
+
tool_to_call=t,
|
|
483
|
+
previous_tool_calls=tool_calls,
|
|
484
|
+
trace_span=trace_span,
|
|
485
|
+
tool_number=tool_number,
|
|
396
486
|
)
|
|
487
|
+
futures_tool_numbers[future] = tool_number
|
|
488
|
+
futures.append(future)
|
|
397
489
|
|
|
398
490
|
for future in concurrent.futures.as_completed(futures):
|
|
399
491
|
tool_call_result: ToolCallResult = future.result()
|
|
400
492
|
|
|
493
|
+
tool_number = (
|
|
494
|
+
futures_tool_numbers[future]
|
|
495
|
+
if future in futures_tool_numbers
|
|
496
|
+
else None
|
|
497
|
+
)
|
|
498
|
+
tool_call_result = self.handle_tool_call_approval(
|
|
499
|
+
tool_call_result=tool_call_result, tool_number=tool_number
|
|
500
|
+
)
|
|
501
|
+
|
|
401
502
|
tool_calls.append(tool_call_result.as_tool_result_response())
|
|
402
503
|
messages.append(tool_call_result.as_tool_call_message())
|
|
403
504
|
|
|
404
505
|
perf_timing.measure(f"tool completed {tool_call_result.tool_name}")
|
|
405
506
|
|
|
507
|
+
# Update the tool number offset for the next iteration
|
|
508
|
+
tool_number_offset += len(tools_to_call)
|
|
509
|
+
|
|
406
510
|
# Add a blank line after all tools in this batch complete
|
|
407
511
|
if tools_to_call:
|
|
408
512
|
logging.info("")
|
|
409
513
|
|
|
410
514
|
raise Exception(f"Too many LLM calls - exceeded max_steps: {i}/{max_steps}")
|
|
411
515
|
|
|
412
|
-
def
|
|
516
|
+
def _directly_invoke_tool(
|
|
517
|
+
self,
|
|
518
|
+
tool_name: str,
|
|
519
|
+
tool_params: dict,
|
|
520
|
+
user_approved: bool,
|
|
521
|
+
trace_span=DummySpan(),
|
|
522
|
+
tool_number: Optional[int] = None,
|
|
523
|
+
) -> StructuredToolResult:
|
|
524
|
+
tool_span = trace_span.start_span(name=tool_name, type="tool")
|
|
525
|
+
tool = self.tool_executor.get_tool_by_name(tool_name)
|
|
526
|
+
tool_response = None
|
|
527
|
+
try:
|
|
528
|
+
if (not tool) or (tool_params is None):
|
|
529
|
+
logging.warning(
|
|
530
|
+
f"Skipping tool execution for {tool_name}: args: {tool_params}"
|
|
531
|
+
)
|
|
532
|
+
tool_response = StructuredToolResult(
|
|
533
|
+
status=ToolResultStatus.ERROR,
|
|
534
|
+
error=f"Failed to find tool {tool_name}",
|
|
535
|
+
params=tool_params,
|
|
536
|
+
)
|
|
537
|
+
else:
|
|
538
|
+
tool_response = tool.invoke(
|
|
539
|
+
tool_params, tool_number=tool_number, user_approved=user_approved
|
|
540
|
+
)
|
|
541
|
+
except Exception as e:
|
|
542
|
+
logging.error(
|
|
543
|
+
f"Tool call to {tool_name} failed with an Exception", exc_info=True
|
|
544
|
+
)
|
|
545
|
+
tool_response = StructuredToolResult(
|
|
546
|
+
status=ToolResultStatus.ERROR,
|
|
547
|
+
error=f"Tool call failed: {e}",
|
|
548
|
+
params=tool_params,
|
|
549
|
+
)
|
|
550
|
+
|
|
551
|
+
# Log error to trace span
|
|
552
|
+
tool_span.log(
|
|
553
|
+
input=tool_params, output=str(e), metadata={"status": "ERROR"}
|
|
554
|
+
)
|
|
555
|
+
|
|
556
|
+
tool_span.log(
|
|
557
|
+
input=tool_params,
|
|
558
|
+
output=tool_response.data,
|
|
559
|
+
metadata={
|
|
560
|
+
"status": tool_response.status.value,
|
|
561
|
+
"error": tool_response.error,
|
|
562
|
+
"description": tool.get_parameterized_one_liner(tool_params)
|
|
563
|
+
if tool
|
|
564
|
+
else "",
|
|
565
|
+
"structured_tool_result": tool_response,
|
|
566
|
+
},
|
|
567
|
+
)
|
|
568
|
+
tool_span.end()
|
|
569
|
+
|
|
570
|
+
return tool_response
|
|
571
|
+
|
|
572
|
+
def _invoke_llm_tool_call(
|
|
413
573
|
self,
|
|
414
574
|
tool_to_call: ChatCompletionMessageToolCall,
|
|
415
575
|
previous_tool_calls: list[dict],
|
|
@@ -438,92 +598,97 @@ class ToolCallingLLM:
|
|
|
438
598
|
),
|
|
439
599
|
)
|
|
440
600
|
|
|
441
|
-
tool_params =
|
|
601
|
+
tool_params = {}
|
|
442
602
|
try:
|
|
443
603
|
tool_params = json.loads(tool_arguments)
|
|
444
604
|
except Exception:
|
|
445
605
|
logging.warning(
|
|
446
606
|
f"Failed to parse arguments for tool: {tool_name}. args: {tool_arguments}"
|
|
447
607
|
)
|
|
448
|
-
tool_call_id = tool_to_call.id
|
|
449
|
-
tool = self.tool_executor.get_tool_by_name(tool_name)
|
|
450
|
-
|
|
451
|
-
if (not tool) or (tool_params is None):
|
|
452
|
-
logging.warning(
|
|
453
|
-
f"Skipping tool execution for {tool_name}: args: {tool_arguments}"
|
|
454
|
-
)
|
|
455
|
-
return ToolCallResult(
|
|
456
|
-
tool_call_id=tool_call_id,
|
|
457
|
-
tool_name=tool_name,
|
|
458
|
-
description="NA",
|
|
459
|
-
result=StructuredToolResult(
|
|
460
|
-
status=ToolResultStatus.ERROR,
|
|
461
|
-
error=f"Failed to find tool {tool_name}",
|
|
462
|
-
params=tool_params,
|
|
463
|
-
),
|
|
464
|
-
)
|
|
465
608
|
|
|
466
|
-
|
|
609
|
+
tool_call_id = tool_to_call.id
|
|
467
610
|
|
|
468
|
-
|
|
469
|
-
|
|
611
|
+
tool_response = prevent_overly_repeated_tool_call(
|
|
612
|
+
tool_name=tool_name,
|
|
613
|
+
tool_params=tool_params,
|
|
614
|
+
tool_calls=previous_tool_calls,
|
|
615
|
+
)
|
|
470
616
|
|
|
471
|
-
|
|
472
|
-
tool_response =
|
|
473
|
-
tool_name=
|
|
617
|
+
if not tool_response:
|
|
618
|
+
tool_response = self._directly_invoke_tool(
|
|
619
|
+
tool_name=tool_name,
|
|
474
620
|
tool_params=tool_params,
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
tool_response = tool.invoke(tool_params, tool_number=tool_number)
|
|
479
|
-
|
|
480
|
-
if not isinstance(tool_response, StructuredToolResult):
|
|
481
|
-
# Should never be needed but ensure Holmes does not crash if one of the tools does not return the right type
|
|
482
|
-
logging.error(
|
|
483
|
-
f"Tool {tool.name} return type is not StructuredToolResult. Nesting the tool result into StructuredToolResult..."
|
|
484
|
-
)
|
|
485
|
-
tool_response = StructuredToolResult(
|
|
486
|
-
status=ToolResultStatus.SUCCESS,
|
|
487
|
-
data=tool_response,
|
|
488
|
-
params=tool_params,
|
|
489
|
-
)
|
|
490
|
-
|
|
491
|
-
# Log tool execution to trace span
|
|
492
|
-
tool_span.log(
|
|
493
|
-
input=tool_params,
|
|
494
|
-
output=tool_response.data,
|
|
495
|
-
metadata={
|
|
496
|
-
"status": tool_response.status.value,
|
|
497
|
-
"error": tool_response.error,
|
|
498
|
-
"description": tool.get_parameterized_one_liner(tool_params),
|
|
499
|
-
"structured_tool_result": tool_response,
|
|
500
|
-
},
|
|
621
|
+
user_approved=False,
|
|
622
|
+
trace_span=trace_span,
|
|
623
|
+
tool_number=tool_number,
|
|
501
624
|
)
|
|
502
625
|
|
|
503
|
-
|
|
626
|
+
if not isinstance(tool_response, StructuredToolResult):
|
|
627
|
+
# Should never be needed but ensure Holmes does not crash if one of the tools does not return the right type
|
|
504
628
|
logging.error(
|
|
505
|
-
f"Tool
|
|
629
|
+
f"Tool {tool_name} return type is not StructuredToolResult. Nesting the tool result into StructuredToolResult..."
|
|
506
630
|
)
|
|
507
631
|
tool_response = StructuredToolResult(
|
|
508
|
-
status=ToolResultStatus.
|
|
509
|
-
|
|
632
|
+
status=ToolResultStatus.SUCCESS,
|
|
633
|
+
data=tool_response,
|
|
510
634
|
params=tool_params,
|
|
511
635
|
)
|
|
512
636
|
|
|
513
|
-
|
|
514
|
-
tool_span.log(
|
|
515
|
-
input=tool_params, output=str(e), metadata={"status": "ERROR"}
|
|
516
|
-
)
|
|
517
|
-
finally:
|
|
518
|
-
# End tool span
|
|
519
|
-
tool_span.end()
|
|
637
|
+
tool = self.tool_executor.get_tool_by_name(tool_name)
|
|
520
638
|
return ToolCallResult(
|
|
521
639
|
tool_call_id=tool_call_id,
|
|
522
640
|
tool_name=tool_name,
|
|
523
|
-
description=tool.get_parameterized_one_liner(tool_params),
|
|
641
|
+
description=tool.get_parameterized_one_liner(tool_params) if tool else "",
|
|
524
642
|
result=tool_response,
|
|
525
643
|
)
|
|
526
644
|
|
|
645
|
+
def handle_tool_call_approval(
|
|
646
|
+
self, tool_call_result: ToolCallResult, tool_number: Optional[int]
|
|
647
|
+
) -> ToolCallResult:
|
|
648
|
+
"""
|
|
649
|
+
Handle approval for a single tool call if required.
|
|
650
|
+
|
|
651
|
+
Args:
|
|
652
|
+
tool_call_result: A single tool call result that may require approval
|
|
653
|
+
|
|
654
|
+
Returns:
|
|
655
|
+
Updated tool call result with approved/denied status
|
|
656
|
+
"""
|
|
657
|
+
|
|
658
|
+
if tool_call_result.result.status != ToolResultStatus.APPROVAL_REQUIRED:
|
|
659
|
+
return tool_call_result
|
|
660
|
+
|
|
661
|
+
# If no approval callback, convert to ERROR because it is assumed the client may not be able to handle approvals
|
|
662
|
+
if not self.approval_callback:
|
|
663
|
+
tool_call_result.result.status = ToolResultStatus.ERROR
|
|
664
|
+
return tool_call_result
|
|
665
|
+
|
|
666
|
+
# Get approval from user
|
|
667
|
+
approved, feedback = self.approval_callback(tool_call_result.result)
|
|
668
|
+
|
|
669
|
+
if approved:
|
|
670
|
+
logging.debug(
|
|
671
|
+
f"User approved command: {tool_call_result.result.invocation}"
|
|
672
|
+
)
|
|
673
|
+
|
|
674
|
+
new_response = self._directly_invoke_tool(
|
|
675
|
+
tool_name=tool_call_result.tool_name,
|
|
676
|
+
tool_params=tool_call_result.result.params or {},
|
|
677
|
+
user_approved=True,
|
|
678
|
+
trace_span=DummySpan(),
|
|
679
|
+
tool_number=tool_number,
|
|
680
|
+
)
|
|
681
|
+
tool_call_result.result = new_response
|
|
682
|
+
else:
|
|
683
|
+
# User denied - update to error
|
|
684
|
+
feedback_text = f" User feedback: {feedback}" if feedback else ""
|
|
685
|
+
tool_call_result.result.status = ToolResultStatus.ERROR
|
|
686
|
+
tool_call_result.result.error = (
|
|
687
|
+
f"User denied command execution.{feedback_text}"
|
|
688
|
+
)
|
|
689
|
+
|
|
690
|
+
return tool_call_result
|
|
691
|
+
|
|
527
692
|
@staticmethod
|
|
528
693
|
def __load_post_processing_user_prompt(
|
|
529
694
|
input_prompt, investigation, user_prompt: Optional[str] = None
|
|
@@ -540,7 +705,7 @@ class ToolCallingLLM:
|
|
|
540
705
|
investigation,
|
|
541
706
|
user_prompt: Optional[str] = None,
|
|
542
707
|
system_prompt: str = "You are an AI assistant summarizing Kubernetes issues.",
|
|
543
|
-
) -> Optional[str]:
|
|
708
|
+
) -> tuple[Optional[str], float]:
|
|
544
709
|
try:
|
|
545
710
|
user_prompt = ToolCallingLLM.__load_post_processing_user_prompt(
|
|
546
711
|
prompt, investigation, user_prompt
|
|
@@ -559,10 +724,18 @@ class ToolCallingLLM:
|
|
|
559
724
|
]
|
|
560
725
|
full_response = self.llm.completion(messages=messages, temperature=0)
|
|
561
726
|
logging.debug(f"Post processing response {full_response}")
|
|
562
|
-
|
|
727
|
+
|
|
728
|
+
# Extract and log cost information for post-processing
|
|
729
|
+
post_processing_cost = _extract_cost_from_response(full_response)
|
|
730
|
+
if post_processing_cost > 0:
|
|
731
|
+
cost_logger.debug(
|
|
732
|
+
f"Post-processing LLM cost: ${post_processing_cost:.6f}"
|
|
733
|
+
)
|
|
734
|
+
|
|
735
|
+
return full_response.choices[0].message.content, post_processing_cost # type: ignore
|
|
563
736
|
except Exception:
|
|
564
737
|
logging.exception("Failed to run post processing", exc_info=True)
|
|
565
|
-
return investigation
|
|
738
|
+
return investigation, 0.0
|
|
566
739
|
|
|
567
740
|
@sentry_sdk.trace
|
|
568
741
|
def truncate_messages_to_fit_context(
|
|
@@ -602,6 +775,7 @@ class ToolCallingLLM:
|
|
|
602
775
|
perf_timing.measure("get_all_tools_openai_format")
|
|
603
776
|
max_steps = self.max_steps
|
|
604
777
|
i = 0
|
|
778
|
+
tool_number_offset = 0
|
|
605
779
|
|
|
606
780
|
while i < max_steps:
|
|
607
781
|
i += 1
|
|
@@ -634,6 +808,10 @@ class ToolCallingLLM:
|
|
|
634
808
|
stream=False,
|
|
635
809
|
drop_params=True,
|
|
636
810
|
)
|
|
811
|
+
|
|
812
|
+
# Log cost information for this iteration (no accumulation in streaming)
|
|
813
|
+
_process_cost_info(full_response, log_prefix="LLM iteration")
|
|
814
|
+
|
|
637
815
|
perf_timing.measure("llm.completion")
|
|
638
816
|
# catch a known error that occurs with Azure and replace the error message with something more obvious to the user
|
|
639
817
|
except BadRequestError as e:
|
|
@@ -689,15 +867,15 @@ class ToolCallingLLM:
|
|
|
689
867
|
with concurrent.futures.ThreadPoolExecutor(max_workers=16) as executor:
|
|
690
868
|
futures = []
|
|
691
869
|
for tool_index, t in enumerate(tools_to_call, 1): # type: ignore
|
|
692
|
-
|
|
693
|
-
|
|
694
|
-
|
|
695
|
-
|
|
696
|
-
|
|
697
|
-
|
|
698
|
-
|
|
699
|
-
)
|
|
870
|
+
tool_number = tool_number_offset + tool_index
|
|
871
|
+
future = executor.submit(
|
|
872
|
+
self._invoke_llm_tool_call,
|
|
873
|
+
tool_to_call=t, # type: ignore
|
|
874
|
+
previous_tool_calls=tool_calls,
|
|
875
|
+
trace_span=DummySpan(), # Streaming mode doesn't support tracing yet
|
|
876
|
+
tool_number=tool_number,
|
|
700
877
|
)
|
|
878
|
+
futures.append(future)
|
|
701
879
|
yield StreamMessage(
|
|
702
880
|
event=StreamEvents.START_TOOL,
|
|
703
881
|
data={"tool_name": t.function.name, "id": t.id},
|
|
@@ -716,6 +894,9 @@ class ToolCallingLLM:
|
|
|
716
894
|
data=tool_call_result.as_streaming_tool_result_response(),
|
|
717
895
|
)
|
|
718
896
|
|
|
897
|
+
# Update the tool number offset for the next iteration
|
|
898
|
+
tool_number_offset += len(tools_to_call)
|
|
899
|
+
|
|
719
900
|
raise Exception(
|
|
720
901
|
f"Too many LLM calls - exceeded max_steps: {i}/{self.max_steps}"
|
|
721
902
|
)
|
|
@@ -793,9 +974,6 @@ class IssueInvestigator(ToolCallingLLM):
|
|
|
793
974
|
"[bold]No runbooks found for this issue. Using default behaviour. (Add runbooks to guide the investigation.)[/bold]"
|
|
794
975
|
)
|
|
795
976
|
|
|
796
|
-
todo_manager = get_todo_manager()
|
|
797
|
-
todo_context = todo_manager.format_tasks_for_prompt(self.investigation_id)
|
|
798
|
-
|
|
799
977
|
system_prompt = load_and_render_prompt(
|
|
800
978
|
prompt,
|
|
801
979
|
{
|
|
@@ -804,8 +982,6 @@ class IssueInvestigator(ToolCallingLLM):
|
|
|
804
982
|
"structured_output": request_structured_output_from_llm,
|
|
805
983
|
"toolsets": self.tool_executor.toolsets,
|
|
806
984
|
"cluster_name": self.cluster_name,
|
|
807
|
-
"todo_list": todo_context,
|
|
808
|
-
"investigation_id": self.investigation_id,
|
|
809
985
|
},
|
|
810
986
|
)
|
|
811
987
|
|
holmes/core/tools.py
CHANGED
|
@@ -24,12 +24,15 @@ class ToolResultStatus(str, Enum):
|
|
|
24
24
|
SUCCESS = "success"
|
|
25
25
|
ERROR = "error"
|
|
26
26
|
NO_DATA = "no_data"
|
|
27
|
+
APPROVAL_REQUIRED = "approval_required"
|
|
27
28
|
|
|
28
29
|
def to_color(self) -> str:
|
|
29
30
|
if self == ToolResultStatus.SUCCESS:
|
|
30
31
|
return "green"
|
|
31
32
|
elif self == ToolResultStatus.ERROR:
|
|
32
33
|
return "red"
|
|
34
|
+
elif self == ToolResultStatus.APPROVAL_REQUIRED:
|
|
35
|
+
return "yellow"
|
|
33
36
|
else:
|
|
34
37
|
return "white"
|
|
35
38
|
|
|
@@ -38,6 +41,8 @@ class ToolResultStatus(str, Enum):
|
|
|
38
41
|
return "✔"
|
|
39
42
|
elif self == ToolResultStatus.ERROR:
|
|
40
43
|
return "❌"
|
|
44
|
+
elif self == ToolResultStatus.APPROVAL_REQUIRED:
|
|
45
|
+
return "⚠️"
|
|
41
46
|
else:
|
|
42
47
|
return "⚪️"
|
|
43
48
|
|
|
@@ -148,14 +153,17 @@ class Tool(ABC, BaseModel):
|
|
|
148
153
|
)
|
|
149
154
|
|
|
150
155
|
def invoke(
|
|
151
|
-
self,
|
|
156
|
+
self,
|
|
157
|
+
params: Dict,
|
|
158
|
+
tool_number: Optional[int] = None,
|
|
159
|
+
user_approved: bool = False,
|
|
152
160
|
) -> StructuredToolResult:
|
|
153
161
|
tool_number_str = f"#{tool_number} " if tool_number else ""
|
|
154
162
|
logging.info(
|
|
155
163
|
f"Running tool {tool_number_str}[bold]{self.name}[/bold]: {self.get_parameterized_one_liner(params)}"
|
|
156
164
|
)
|
|
157
165
|
start_time = time.time()
|
|
158
|
-
result = self._invoke(params)
|
|
166
|
+
result = self._invoke(params=params, user_approved=user_approved)
|
|
159
167
|
result.icon_url = self.icon_url
|
|
160
168
|
elapsed = time.time() - start_time
|
|
161
169
|
output_str = (
|
|
@@ -171,7 +179,13 @@ class Tool(ABC, BaseModel):
|
|
|
171
179
|
return result
|
|
172
180
|
|
|
173
181
|
@abstractmethod
|
|
174
|
-
def _invoke(
|
|
182
|
+
def _invoke(
|
|
183
|
+
self, params: dict, user_approved: bool = False
|
|
184
|
+
) -> StructuredToolResult:
|
|
185
|
+
"""
|
|
186
|
+
params: the tool params
|
|
187
|
+
user_approved: whether the tool call is approved by the user. Can be used to confidently execute unsafe actions.
|
|
188
|
+
"""
|
|
175
189
|
pass
|
|
176
190
|
|
|
177
191
|
@abstractmethod
|
|
@@ -223,7 +237,9 @@ class YAMLTool(Tool, BaseModel):
|
|
|
223
237
|
return ToolResultStatus.NO_DATA
|
|
224
238
|
return ToolResultStatus.SUCCESS
|
|
225
239
|
|
|
226
|
-
def _invoke(
|
|
240
|
+
def _invoke(
|
|
241
|
+
self, params: dict, user_approved: bool = False
|
|
242
|
+
) -> StructuredToolResult:
|
|
227
243
|
if self.command is not None:
|
|
228
244
|
raw_output, return_code, invocation = self.__invoke_command(params)
|
|
229
245
|
else:
|
holmes/core/toolset_manager.py
CHANGED
|
@@ -266,11 +266,7 @@ class ToolsetManager:
|
|
|
266
266
|
toolset.path = cached_status.get("path", None)
|
|
267
267
|
# check prerequisites for only enabled toolset when the toolset is loaded from cache. When the toolset is
|
|
268
268
|
# not loaded from cache, the prerequisites are checked in the refresh_toolset_status method.
|
|
269
|
-
if
|
|
270
|
-
toolset.enabled
|
|
271
|
-
and toolset.status == ToolsetStatusEnum.ENABLED
|
|
272
|
-
and using_cached
|
|
273
|
-
):
|
|
269
|
+
if toolset.enabled and toolset.status == ToolsetStatusEnum.ENABLED:
|
|
274
270
|
enabled_toolsets_from_cache.append(toolset)
|
|
275
271
|
self.check_toolset_prerequisites(enabled_toolsets_from_cache)
|
|
276
272
|
|
holmes/core/tracing.py
CHANGED
|
@@ -120,7 +120,7 @@ class DummySpan:
|
|
|
120
120
|
class DummyTracer:
|
|
121
121
|
"""A no-op tracer implementation for when tracing is disabled."""
|
|
122
122
|
|
|
123
|
-
def start_experiment(self, experiment_name=None,
|
|
123
|
+
def start_experiment(self, experiment_name=None, additional_metadata=None):
|
|
124
124
|
"""No-op experiment creation."""
|
|
125
125
|
return None
|
|
126
126
|
|