holmesgpt 0.12.4__py3-none-any.whl → 0.13.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of holmesgpt might be problematic. Click here for more details.
- holmes/__init__.py +1 -1
- holmes/clients/robusta_client.py +19 -1
- holmes/common/env_vars.py +13 -0
- holmes/config.py +69 -9
- holmes/core/conversations.py +11 -0
- holmes/core/investigation.py +16 -3
- holmes/core/investigation_structured_output.py +12 -0
- holmes/core/llm.py +10 -0
- holmes/core/models.py +9 -1
- holmes/core/openai_formatting.py +72 -12
- holmes/core/prompt.py +13 -0
- holmes/core/supabase_dal.py +3 -0
- holmes/core/todo_manager.py +88 -0
- holmes/core/tool_calling_llm.py +121 -149
- holmes/core/tools.py +10 -1
- holmes/core/tools_utils/tool_executor.py +7 -2
- holmes/core/tools_utils/toolset_utils.py +7 -2
- holmes/core/tracing.py +8 -7
- holmes/interactive.py +1 -0
- holmes/main.py +2 -1
- holmes/plugins/prompts/__init__.py +7 -1
- holmes/plugins/prompts/_ai_safety.jinja2 +43 -0
- holmes/plugins/prompts/_current_date_time.jinja2 +1 -0
- holmes/plugins/prompts/_default_log_prompt.jinja2 +4 -2
- holmes/plugins/prompts/_fetch_logs.jinja2 +6 -1
- holmes/plugins/prompts/_general_instructions.jinja2 +16 -0
- holmes/plugins/prompts/_permission_errors.jinja2 +1 -1
- holmes/plugins/prompts/_toolsets_instructions.jinja2 +4 -4
- holmes/plugins/prompts/generic_ask.jinja2 +4 -3
- holmes/plugins/prompts/investigation_procedure.jinja2 +210 -0
- holmes/plugins/prompts/kubernetes_workload_ask.jinja2 +4 -0
- holmes/plugins/toolsets/__init__.py +19 -6
- holmes/plugins/toolsets/atlas_mongodb/mongodb_atlas.py +27 -0
- holmes/plugins/toolsets/azure_sql/tools/analyze_connection_failures.py +2 -2
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_connections.py +2 -1
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_health_status.py +3 -1
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_performance.py +2 -1
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_storage.py +2 -1
- holmes/plugins/toolsets/azure_sql/tools/get_active_alerts.py +3 -1
- holmes/plugins/toolsets/azure_sql/tools/get_slow_queries.py +2 -1
- holmes/plugins/toolsets/azure_sql/tools/get_top_cpu_queries.py +2 -1
- holmes/plugins/toolsets/azure_sql/tools/get_top_data_io_queries.py +2 -1
- holmes/plugins/toolsets/azure_sql/tools/get_top_log_io_queries.py +2 -1
- holmes/plugins/toolsets/coralogix/api.py +6 -6
- holmes/plugins/toolsets/coralogix/toolset_coralogix_logs.py +7 -1
- holmes/plugins/toolsets/datadog/datadog_api.py +20 -8
- holmes/plugins/toolsets/datadog/datadog_metrics_instructions.jinja2 +8 -1
- holmes/plugins/toolsets/datadog/datadog_rds_instructions.jinja2 +82 -0
- holmes/plugins/toolsets/datadog/toolset_datadog_logs.py +12 -5
- holmes/plugins/toolsets/datadog/toolset_datadog_metrics.py +20 -11
- holmes/plugins/toolsets/datadog/toolset_datadog_rds.py +735 -0
- holmes/plugins/toolsets/datadog/toolset_datadog_traces.py +18 -11
- holmes/plugins/toolsets/git.py +15 -15
- holmes/plugins/toolsets/grafana/grafana_api.py +12 -1
- holmes/plugins/toolsets/grafana/toolset_grafana.py +5 -1
- holmes/plugins/toolsets/grafana/toolset_grafana_loki.py +9 -4
- holmes/plugins/toolsets/grafana/toolset_grafana_tempo.py +12 -5
- holmes/plugins/toolsets/internet/internet.py +2 -1
- holmes/plugins/toolsets/internet/notion.py +2 -1
- holmes/plugins/toolsets/investigator/__init__.py +0 -0
- holmes/plugins/toolsets/investigator/core_investigation.py +157 -0
- holmes/plugins/toolsets/investigator/investigator_instructions.jinja2 +253 -0
- holmes/plugins/toolsets/investigator/model.py +15 -0
- holmes/plugins/toolsets/kafka.py +14 -7
- holmes/plugins/toolsets/kubernetes.yaml +7 -7
- holmes/plugins/toolsets/kubernetes_logs.py +454 -25
- holmes/plugins/toolsets/logging_utils/logging_api.py +115 -55
- holmes/plugins/toolsets/mcp/toolset_mcp.py +1 -1
- holmes/plugins/toolsets/newrelic.py +8 -3
- holmes/plugins/toolsets/opensearch/opensearch.py +8 -4
- holmes/plugins/toolsets/opensearch/opensearch_logs.py +9 -2
- holmes/plugins/toolsets/opensearch/opensearch_traces.py +6 -2
- holmes/plugins/toolsets/prometheus/prometheus.py +149 -44
- holmes/plugins/toolsets/rabbitmq/toolset_rabbitmq.py +8 -2
- holmes/plugins/toolsets/robusta/robusta.py +4 -4
- holmes/plugins/toolsets/runbook/runbook_fetcher.py +6 -5
- holmes/plugins/toolsets/servicenow/servicenow.py +18 -3
- holmes/plugins/toolsets/utils.py +8 -1
- holmes/utils/llms.py +20 -0
- holmes/utils/stream.py +90 -0
- {holmesgpt-0.12.4.dist-info → holmesgpt-0.13.0.dist-info}/METADATA +48 -35
- {holmesgpt-0.12.4.dist-info → holmesgpt-0.13.0.dist-info}/RECORD +85 -75
- holmes/utils/robusta.py +0 -9
- {holmesgpt-0.12.4.dist-info → holmesgpt-0.13.0.dist-info}/LICENSE.txt +0 -0
- {holmesgpt-0.12.4.dist-info → holmesgpt-0.13.0.dist-info}/WHEEL +0 -0
- {holmesgpt-0.12.4.dist-info → holmesgpt-0.13.0.dist-info}/entry_points.txt +0 -0
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
from typing import Dict, List
|
|
2
|
+
from threading import Lock
|
|
3
|
+
|
|
4
|
+
from holmes.plugins.toolsets.investigator.model import Task, TaskStatus
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
class TodoListManager:
|
|
8
|
+
"""
|
|
9
|
+
Session-based storage manager for investigation TodoLists.
|
|
10
|
+
Stores TodoLists per session and provides methods to get/update tasks.
|
|
11
|
+
"""
|
|
12
|
+
|
|
13
|
+
def __init__(self):
|
|
14
|
+
self._sessions: Dict[str, List[Task]] = {}
|
|
15
|
+
self._lock: Lock = Lock()
|
|
16
|
+
|
|
17
|
+
def get_session_tasks(self, session_id: str) -> List[Task]:
|
|
18
|
+
with self._lock:
|
|
19
|
+
return self._sessions.get(session_id, []).copy()
|
|
20
|
+
|
|
21
|
+
def update_session_tasks(self, session_id: str, tasks: List[Task]) -> None:
|
|
22
|
+
with self._lock:
|
|
23
|
+
self._sessions[session_id] = tasks.copy()
|
|
24
|
+
|
|
25
|
+
def clear_session(self, session_id: str) -> None:
|
|
26
|
+
with self._lock:
|
|
27
|
+
if session_id in self._sessions:
|
|
28
|
+
del self._sessions[session_id]
|
|
29
|
+
|
|
30
|
+
def get_session_count(self) -> int:
|
|
31
|
+
with self._lock:
|
|
32
|
+
return len(self._sessions)
|
|
33
|
+
|
|
34
|
+
def format_tasks_for_prompt(self, session_id: str) -> str:
|
|
35
|
+
"""
|
|
36
|
+
Format tasks for injection into system prompt.
|
|
37
|
+
Returns empty string if no tasks exist.
|
|
38
|
+
"""
|
|
39
|
+
tasks = self.get_session_tasks(session_id)
|
|
40
|
+
|
|
41
|
+
if not tasks:
|
|
42
|
+
return ""
|
|
43
|
+
|
|
44
|
+
status_order = {
|
|
45
|
+
TaskStatus.PENDING: 0,
|
|
46
|
+
TaskStatus.IN_PROGRESS: 1,
|
|
47
|
+
TaskStatus.COMPLETED: 2,
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
sorted_tasks = sorted(
|
|
51
|
+
tasks,
|
|
52
|
+
key=lambda t: (status_order.get(t.status, 3),),
|
|
53
|
+
)
|
|
54
|
+
|
|
55
|
+
lines = ["# CURRENT INVESTIGATION TASKS"]
|
|
56
|
+
lines.append("")
|
|
57
|
+
|
|
58
|
+
pending_count = sum(1 for t in tasks if t.status == TaskStatus.PENDING)
|
|
59
|
+
progress_count = sum(1 for t in tasks if t.status == TaskStatus.IN_PROGRESS)
|
|
60
|
+
completed_count = sum(1 for t in tasks if t.status == TaskStatus.COMPLETED)
|
|
61
|
+
|
|
62
|
+
lines.append(
|
|
63
|
+
f"**Task Status**: {completed_count} completed, {progress_count} in progress, {pending_count} pending"
|
|
64
|
+
)
|
|
65
|
+
lines.append("")
|
|
66
|
+
|
|
67
|
+
for task in sorted_tasks:
|
|
68
|
+
status_indicator = {
|
|
69
|
+
TaskStatus.PENDING: "[ ]",
|
|
70
|
+
TaskStatus.IN_PROGRESS: "[~]",
|
|
71
|
+
TaskStatus.COMPLETED: "[✓]",
|
|
72
|
+
}.get(task.status, "[?]")
|
|
73
|
+
|
|
74
|
+
lines.append(f"{status_indicator} [{task.id}] {task.content}")
|
|
75
|
+
|
|
76
|
+
lines.append("")
|
|
77
|
+
lines.append(
|
|
78
|
+
"**Instructions**: Use TodoWrite tool to update task status as you work. Mark tasks as 'in_progress' when starting, 'completed' when finished."
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
return "\n".join(lines)
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
_todo_manager = TodoListManager()
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def get_todo_manager() -> TodoListManager:
|
|
88
|
+
return _todo_manager
|
holmes/core/tool_calling_llm.py
CHANGED
|
@@ -2,32 +2,25 @@ import concurrent.futures
|
|
|
2
2
|
import json
|
|
3
3
|
import logging
|
|
4
4
|
import textwrap
|
|
5
|
+
import uuid
|
|
5
6
|
from typing import Dict, List, Optional, Type, Union
|
|
6
7
|
|
|
7
|
-
import requests # type: ignore
|
|
8
8
|
import sentry_sdk
|
|
9
|
-
from litellm.types.utils import Message
|
|
10
9
|
from openai import BadRequestError
|
|
11
10
|
from openai.types.chat.chat_completion_message_tool_call import (
|
|
12
11
|
ChatCompletionMessageToolCall,
|
|
13
12
|
)
|
|
14
13
|
from pydantic import BaseModel
|
|
15
|
-
from pydantic_core import from_json
|
|
16
14
|
from rich.console import Console
|
|
17
15
|
|
|
18
|
-
from holmes.common.env_vars import
|
|
19
|
-
|
|
20
|
-
STREAM_CHUNKS_PER_PARSE,
|
|
21
|
-
TEMPERATURE,
|
|
22
|
-
)
|
|
16
|
+
from holmes.common.env_vars import TEMPERATURE, MAX_OUTPUT_TOKEN_RESERVATION
|
|
17
|
+
|
|
23
18
|
from holmes.core.investigation_structured_output import (
|
|
24
19
|
DEFAULT_SECTIONS,
|
|
25
20
|
REQUEST_STRUCTURED_OUTPUT_FROM_LLM,
|
|
26
21
|
InputSectionsDataType,
|
|
27
22
|
get_output_format_for_investigation,
|
|
28
23
|
is_response_an_incorrect_tool_call,
|
|
29
|
-
parse_markdown_into_sections_from_hash_sign,
|
|
30
|
-
process_response_into_sections,
|
|
31
24
|
)
|
|
32
25
|
from holmes.core.issue import Issue
|
|
33
26
|
from holmes.core.llm import LLM
|
|
@@ -45,6 +38,10 @@ from holmes.utils.tags import format_tags_in_string, parse_messages_tags
|
|
|
45
38
|
from holmes.core.tools_utils.tool_executor import ToolExecutor
|
|
46
39
|
from holmes.core.tracing import DummySpan
|
|
47
40
|
from holmes.utils.colors import AI_COLOR
|
|
41
|
+
from holmes.utils.stream import StreamEvents, StreamMessage
|
|
42
|
+
from holmes.core.todo_manager import (
|
|
43
|
+
get_todo_manager,
|
|
44
|
+
)
|
|
48
45
|
|
|
49
46
|
|
|
50
47
|
def format_tool_result_data(tool_result: StructuredToolResult) -> str:
|
|
@@ -94,12 +91,13 @@ def truncate_messages_to_fit_context(
|
|
|
94
91
|
|
|
95
92
|
tool_call_messages = [message for message in messages if message["role"] == "tool"]
|
|
96
93
|
|
|
97
|
-
|
|
94
|
+
reserved_for_output_tokens = min(maximum_output_token, MAX_OUTPUT_TOKEN_RESERVATION)
|
|
95
|
+
if message_size_without_tools >= (max_context_size - reserved_for_output_tokens):
|
|
98
96
|
logging.error(
|
|
99
97
|
f"The combined size of system_prompt and user_prompt ({message_size_without_tools} tokens) exceeds the model's context window for input."
|
|
100
98
|
)
|
|
101
99
|
raise Exception(
|
|
102
|
-
f"The combined size of system_prompt and user_prompt ({message_size_without_tools} tokens) exceeds the maximum context size of {max_context_size -
|
|
100
|
+
f"The combined size of system_prompt and user_prompt ({message_size_without_tools} tokens) exceeds the maximum context size of {max_context_size - reserved_for_output_tokens} tokens available for input."
|
|
103
101
|
)
|
|
104
102
|
|
|
105
103
|
if len(tool_call_messages) == 0:
|
|
@@ -213,6 +211,7 @@ class ToolCallingLLM:
|
|
|
213
211
|
self.max_steps = max_steps
|
|
214
212
|
self.tracer = tracer
|
|
215
213
|
self.llm = llm
|
|
214
|
+
self.investigation_id = str(uuid.uuid4())
|
|
216
215
|
|
|
217
216
|
def prompt_call(
|
|
218
217
|
self,
|
|
@@ -221,6 +220,7 @@ class ToolCallingLLM:
|
|
|
221
220
|
post_process_prompt: Optional[str] = None,
|
|
222
221
|
response_format: Optional[Union[dict, Type[BaseModel]]] = None,
|
|
223
222
|
sections: Optional[InputSectionsDataType] = None,
|
|
223
|
+
trace_span=DummySpan(),
|
|
224
224
|
) -> LLMResult:
|
|
225
225
|
messages = [
|
|
226
226
|
{"role": "system", "content": system_prompt},
|
|
@@ -232,6 +232,7 @@ class ToolCallingLLM:
|
|
|
232
232
|
response_format,
|
|
233
233
|
user_prompt=user_prompt,
|
|
234
234
|
sections=sections,
|
|
235
|
+
trace_span=trace_span,
|
|
235
236
|
)
|
|
236
237
|
|
|
237
238
|
def messages_call(
|
|
@@ -258,7 +259,9 @@ class ToolCallingLLM:
|
|
|
258
259
|
) -> LLMResult:
|
|
259
260
|
perf_timing = PerformanceTiming("tool_calling_llm.call")
|
|
260
261
|
tool_calls = [] # type: ignore
|
|
261
|
-
tools = self.tool_executor.get_all_tools_openai_format(
|
|
262
|
+
tools = self.tool_executor.get_all_tools_openai_format(
|
|
263
|
+
target_model=self.llm.model
|
|
264
|
+
)
|
|
262
265
|
perf_timing.measure("get_all_tools_openai_format")
|
|
263
266
|
max_steps = self.max_steps
|
|
264
267
|
i = 0
|
|
@@ -413,20 +416,41 @@ class ToolCallingLLM:
|
|
|
413
416
|
trace_span=DummySpan(),
|
|
414
417
|
tool_number=None,
|
|
415
418
|
) -> ToolCallResult:
|
|
416
|
-
|
|
419
|
+
# Handle the union type - ChatCompletionMessageToolCall can be either
|
|
420
|
+
# ChatCompletionMessageFunctionToolCall (with 'function' field and type='function')
|
|
421
|
+
# or ChatCompletionMessageCustomToolCall (with 'custom' field and type='custom').
|
|
422
|
+
# We use hasattr to check for the 'function' attribute as it's more flexible
|
|
423
|
+
# and doesn't require importing the specific type.
|
|
424
|
+
if hasattr(tool_to_call, "function"):
|
|
425
|
+
tool_name = tool_to_call.function.name
|
|
426
|
+
tool_arguments = tool_to_call.function.arguments
|
|
427
|
+
else:
|
|
428
|
+
# This is a custom tool call - we don't support these currently
|
|
429
|
+
logging.error(f"Unsupported custom tool call: {tool_to_call}")
|
|
430
|
+
return ToolCallResult(
|
|
431
|
+
tool_call_id=tool_to_call.id,
|
|
432
|
+
tool_name="unknown",
|
|
433
|
+
description="NA",
|
|
434
|
+
result=StructuredToolResult(
|
|
435
|
+
status=ToolResultStatus.ERROR,
|
|
436
|
+
error="Custom tool calls are not supported",
|
|
437
|
+
params=None,
|
|
438
|
+
),
|
|
439
|
+
)
|
|
440
|
+
|
|
417
441
|
tool_params = None
|
|
418
442
|
try:
|
|
419
|
-
tool_params = json.loads(
|
|
443
|
+
tool_params = json.loads(tool_arguments)
|
|
420
444
|
except Exception:
|
|
421
445
|
logging.warning(
|
|
422
|
-
f"Failed to parse arguments for tool: {tool_name}. args: {
|
|
446
|
+
f"Failed to parse arguments for tool: {tool_name}. args: {tool_arguments}"
|
|
423
447
|
)
|
|
424
448
|
tool_call_id = tool_to_call.id
|
|
425
449
|
tool = self.tool_executor.get_tool_by_name(tool_name)
|
|
426
450
|
|
|
427
451
|
if (not tool) or (tool_params is None):
|
|
428
452
|
logging.warning(
|
|
429
|
-
f"Skipping tool execution for {tool_name}: args: {
|
|
453
|
+
f"Skipping tool execution for {tool_name}: args: {tool_arguments}"
|
|
430
454
|
)
|
|
431
455
|
return ToolCallResult(
|
|
432
456
|
tool_call_id=tool_call_id,
|
|
@@ -553,61 +577,39 @@ class ToolCallingLLM:
|
|
|
553
577
|
|
|
554
578
|
def call_stream(
|
|
555
579
|
self,
|
|
556
|
-
system_prompt: str,
|
|
580
|
+
system_prompt: str = "",
|
|
557
581
|
user_prompt: Optional[str] = None,
|
|
558
|
-
stream: bool = False,
|
|
559
582
|
response_format: Optional[Union[dict, Type[BaseModel]]] = None,
|
|
560
583
|
sections: Optional[InputSectionsDataType] = None,
|
|
561
|
-
|
|
584
|
+
msgs: Optional[list[dict]] = None,
|
|
562
585
|
):
|
|
563
|
-
|
|
564
|
-
|
|
565
|
-
|
|
566
|
-
|
|
567
|
-
|
|
568
|
-
|
|
569
|
-
|
|
570
|
-
|
|
571
|
-
|
|
572
|
-
|
|
573
|
-
|
|
574
|
-
"ai_answer",
|
|
575
|
-
{
|
|
576
|
-
"sections": parse_markdown_into_sections_from_hash_sign(
|
|
577
|
-
buffer
|
|
578
|
-
)
|
|
579
|
-
or {},
|
|
580
|
-
"analysis": buffer,
|
|
581
|
-
"instructions": runbooks or [],
|
|
582
|
-
},
|
|
583
|
-
)
|
|
584
|
-
|
|
585
|
-
yield create_sse_message(
|
|
586
|
-
"ai_answer_end",
|
|
587
|
-
{
|
|
588
|
-
"sections": parse_markdown_into_sections_from_hash_sign(buffer)
|
|
589
|
-
or {},
|
|
590
|
-
"analysis": buffer,
|
|
591
|
-
"instructions": runbooks or [],
|
|
592
|
-
},
|
|
593
|
-
)
|
|
594
|
-
|
|
595
|
-
messages = [
|
|
596
|
-
{"role": "system", "content": system_prompt},
|
|
597
|
-
{"role": "user", "content": user_prompt},
|
|
598
|
-
]
|
|
586
|
+
"""
|
|
587
|
+
This function DOES NOT call llm.completion(stream=true).
|
|
588
|
+
This function streams holmes one iteration at a time instead of waiting for all iterations to complete.
|
|
589
|
+
"""
|
|
590
|
+
messages = []
|
|
591
|
+
if system_prompt:
|
|
592
|
+
messages.append({"role": "system", "content": system_prompt})
|
|
593
|
+
if user_prompt:
|
|
594
|
+
messages.append({"role": "user", "content": user_prompt})
|
|
595
|
+
if msgs:
|
|
596
|
+
messages.extend(msgs)
|
|
599
597
|
perf_timing = PerformanceTiming("tool_calling_llm.call")
|
|
600
|
-
|
|
598
|
+
tool_calls: list[dict] = []
|
|
599
|
+
tools = self.tool_executor.get_all_tools_openai_format(
|
|
600
|
+
target_model=self.llm.model
|
|
601
|
+
)
|
|
601
602
|
perf_timing.measure("get_all_tools_openai_format")
|
|
603
|
+
max_steps = self.max_steps
|
|
602
604
|
i = 0
|
|
603
|
-
|
|
604
|
-
while i <
|
|
605
|
+
|
|
606
|
+
while i < max_steps:
|
|
605
607
|
i += 1
|
|
606
608
|
perf_timing.measure(f"start iteration {i}")
|
|
607
609
|
logging.debug(f"running iteration {i}")
|
|
608
610
|
|
|
609
|
-
tools =
|
|
610
|
-
tool_choice =
|
|
611
|
+
tools = None if i == max_steps else tools
|
|
612
|
+
tool_choice = "auto" if tools else None
|
|
611
613
|
|
|
612
614
|
total_tokens = self.llm.count_tokens_for_message(messages) # type: ignore
|
|
613
615
|
max_context_size = self.llm.get_context_window_size()
|
|
@@ -623,90 +625,43 @@ class ToolCallingLLM:
|
|
|
623
625
|
|
|
624
626
|
logging.debug(f"sending messages={messages}\n\ntools={tools}")
|
|
625
627
|
try:
|
|
626
|
-
|
|
627
|
-
|
|
628
|
-
|
|
629
|
-
|
|
630
|
-
|
|
631
|
-
|
|
632
|
-
|
|
633
|
-
|
|
634
|
-
|
|
635
|
-
|
|
636
|
-
"drop_param": True,
|
|
637
|
-
},
|
|
638
|
-
headers={"Authorization": f"Bearer {self.llm.api_key}"}, # type: ignore
|
|
639
|
-
stream=True,
|
|
640
|
-
)
|
|
641
|
-
response.raise_for_status()
|
|
642
|
-
it = response.iter_content(chunk_size=None, decode_unicode=True)
|
|
643
|
-
peek_chunk = from_json(next(it))
|
|
644
|
-
tools = peek_chunk.get("tool_calls")
|
|
645
|
-
|
|
646
|
-
if not tools:
|
|
647
|
-
yield from stream_analysis(it, peek_chunk)
|
|
648
|
-
perf_timing.measure("llm.completion")
|
|
649
|
-
return
|
|
650
|
-
|
|
651
|
-
response_message = Message(**peek_chunk)
|
|
652
|
-
tools_to_call = response_message.tool_calls
|
|
653
|
-
else:
|
|
654
|
-
full_response = self.llm.completion(
|
|
655
|
-
messages=parse_messages_tags(messages), # type: ignore
|
|
656
|
-
tools=tools,
|
|
657
|
-
tool_choice=tool_choice,
|
|
658
|
-
temperature=TEMPERATURE,
|
|
659
|
-
response_format=response_format,
|
|
660
|
-
stream=False,
|
|
661
|
-
drop_params=True,
|
|
662
|
-
)
|
|
663
|
-
perf_timing.measure("llm.completion")
|
|
664
|
-
|
|
665
|
-
response_message = full_response.choices[0].message # type: ignore
|
|
666
|
-
if response_message and response_format:
|
|
667
|
-
# Litellm API is bugged. Stringify and parsing ensures all attrs of the choice are available.
|
|
668
|
-
dict_response = json.loads(full_response.to_json()) # type: ignore
|
|
669
|
-
incorrect_tool_call = is_response_an_incorrect_tool_call(
|
|
670
|
-
sections, dict_response.get("choices", [{}])[0]
|
|
671
|
-
)
|
|
672
|
-
|
|
673
|
-
if incorrect_tool_call:
|
|
674
|
-
logging.warning(
|
|
675
|
-
"Detected incorrect tool call. Structured output will be disabled. This can happen on models that do not support tool calling. For Azure AI, make sure the model name contains 'gpt-4o'. To disable this holmes behaviour, set REQUEST_STRUCTURED_OUTPUT_FROM_LLM to `false`."
|
|
676
|
-
)
|
|
677
|
-
# disable structured output going forward and and retry
|
|
678
|
-
response_format = None
|
|
679
|
-
i -= 1
|
|
680
|
-
continue
|
|
681
|
-
|
|
682
|
-
tools_to_call = getattr(response_message, "tool_calls", None)
|
|
683
|
-
if not tools_to_call:
|
|
684
|
-
(text_response, sections) = process_response_into_sections( # type: ignore
|
|
685
|
-
response_message.content
|
|
686
|
-
)
|
|
687
|
-
|
|
688
|
-
yield create_sse_message(
|
|
689
|
-
"ai_answer_end",
|
|
690
|
-
{
|
|
691
|
-
"sections": sections or {},
|
|
692
|
-
"analysis": text_response,
|
|
693
|
-
"instructions": runbooks or [],
|
|
694
|
-
},
|
|
695
|
-
)
|
|
696
|
-
return
|
|
628
|
+
full_response = self.llm.completion(
|
|
629
|
+
messages=parse_messages_tags(messages), # type: ignore
|
|
630
|
+
tools=tools,
|
|
631
|
+
tool_choice=tool_choice,
|
|
632
|
+
response_format=response_format,
|
|
633
|
+
temperature=TEMPERATURE,
|
|
634
|
+
stream=False,
|
|
635
|
+
drop_params=True,
|
|
636
|
+
)
|
|
637
|
+
perf_timing.measure("llm.completion")
|
|
697
638
|
# catch a known error that occurs with Azure and replace the error message with something more obvious to the user
|
|
698
639
|
except BadRequestError as e:
|
|
699
|
-
logging.exception("Bad completion request")
|
|
700
640
|
if "Unrecognized request arguments supplied: tool_choice, tools" in str(
|
|
701
641
|
e
|
|
702
642
|
):
|
|
703
643
|
raise Exception(
|
|
704
644
|
"The Azure model you chose is not supported. Model version 1106 and higher required."
|
|
645
|
+
) from e
|
|
646
|
+
else:
|
|
647
|
+
raise
|
|
648
|
+
|
|
649
|
+
response_message = full_response.choices[0].message # type: ignore
|
|
650
|
+
if response_message and response_format:
|
|
651
|
+
# Litellm API is bugged. Stringify and parsing ensures all attrs of the choice are available.
|
|
652
|
+
dict_response = json.loads(full_response.to_json()) # type: ignore
|
|
653
|
+
incorrect_tool_call = is_response_an_incorrect_tool_call(
|
|
654
|
+
sections, dict_response.get("choices", [{}])[0]
|
|
655
|
+
)
|
|
656
|
+
|
|
657
|
+
if incorrect_tool_call:
|
|
658
|
+
logging.warning(
|
|
659
|
+
"Detected incorrect tool call. Structured output will be disabled. This can happen on models that do not support tool calling. For Azure AI, make sure the model name contains 'gpt-4o'. To disable this holmes behaviour, set REQUEST_STRUCTURED_OUTPUT_FROM_LLM to `false`."
|
|
705
660
|
)
|
|
706
|
-
|
|
707
|
-
|
|
708
|
-
|
|
709
|
-
|
|
661
|
+
# disable structured output going forward and and retry
|
|
662
|
+
response_format = None
|
|
663
|
+
max_steps = max_steps + 1
|
|
664
|
+
continue
|
|
710
665
|
|
|
711
666
|
messages.append(
|
|
712
667
|
response_message.model_dump(
|
|
@@ -714,6 +669,22 @@ class ToolCallingLLM:
|
|
|
714
669
|
)
|
|
715
670
|
)
|
|
716
671
|
|
|
672
|
+
tools_to_call = getattr(response_message, "tool_calls", None)
|
|
673
|
+
if not tools_to_call:
|
|
674
|
+
yield StreamMessage(
|
|
675
|
+
event=StreamEvents.ANSWER_END,
|
|
676
|
+
data={"content": response_message.content, "messages": messages},
|
|
677
|
+
)
|
|
678
|
+
return
|
|
679
|
+
|
|
680
|
+
reasoning = getattr(response_message, "reasoning_content", None)
|
|
681
|
+
message = response_message.content
|
|
682
|
+
if reasoning or message:
|
|
683
|
+
yield StreamMessage(
|
|
684
|
+
event=StreamEvents.AI_MESSAGE,
|
|
685
|
+
data={"content": message, "reasoning": reasoning},
|
|
686
|
+
)
|
|
687
|
+
|
|
717
688
|
perf_timing.measure("pre-tool-calls")
|
|
718
689
|
with concurrent.futures.ThreadPoolExecutor(max_workers=16) as executor:
|
|
719
690
|
futures = []
|
|
@@ -727,8 +698,9 @@ class ToolCallingLLM:
|
|
|
727
698
|
tool_number=tool_index,
|
|
728
699
|
)
|
|
729
700
|
)
|
|
730
|
-
yield
|
|
731
|
-
|
|
701
|
+
yield StreamMessage(
|
|
702
|
+
event=StreamEvents.START_TOOL,
|
|
703
|
+
data={"tool_name": t.function.name, "id": t.id},
|
|
732
704
|
)
|
|
733
705
|
|
|
734
706
|
for future in concurrent.futures.as_completed(futures):
|
|
@@ -739,12 +711,9 @@ class ToolCallingLLM:
|
|
|
739
711
|
|
|
740
712
|
perf_timing.measure(f"tool completed {tool_call_result.tool_name}")
|
|
741
713
|
|
|
742
|
-
|
|
743
|
-
|
|
744
|
-
|
|
745
|
-
|
|
746
|
-
yield create_sse_message(
|
|
747
|
-
"tool_calling_result", streaming_result_dict
|
|
714
|
+
yield StreamMessage(
|
|
715
|
+
event=StreamEvents.TOOL_RESULT,
|
|
716
|
+
data=tool_call_result.as_streaming_tool_result_response(),
|
|
748
717
|
)
|
|
749
718
|
|
|
750
719
|
raise Exception(
|
|
@@ -782,6 +751,7 @@ class IssueInvestigator(ToolCallingLLM):
|
|
|
782
751
|
global_instructions: Optional[Instructions] = None,
|
|
783
752
|
post_processing_prompt: Optional[str] = None,
|
|
784
753
|
sections: Optional[InputSectionsDataType] = None,
|
|
754
|
+
trace_span=DummySpan(),
|
|
785
755
|
) -> LLMResult:
|
|
786
756
|
runbooks = self.runbook_manager.get_instructions_for_issue(issue)
|
|
787
757
|
|
|
@@ -823,6 +793,9 @@ class IssueInvestigator(ToolCallingLLM):
|
|
|
823
793
|
"[bold]No runbooks found for this issue. Using default behaviour. (Add runbooks to guide the investigation.)[/bold]"
|
|
824
794
|
)
|
|
825
795
|
|
|
796
|
+
todo_manager = get_todo_manager()
|
|
797
|
+
todo_context = todo_manager.format_tasks_for_prompt(self.investigation_id)
|
|
798
|
+
|
|
826
799
|
system_prompt = load_and_render_prompt(
|
|
827
800
|
prompt,
|
|
828
801
|
{
|
|
@@ -831,6 +804,8 @@ class IssueInvestigator(ToolCallingLLM):
|
|
|
831
804
|
"structured_output": request_structured_output_from_llm,
|
|
832
805
|
"toolsets": self.tool_executor.toolsets,
|
|
833
806
|
"cluster_name": self.cluster_name,
|
|
807
|
+
"todo_list": todo_context,
|
|
808
|
+
"investigation_id": self.investigation_id,
|
|
834
809
|
},
|
|
835
810
|
)
|
|
836
811
|
|
|
@@ -865,10 +840,7 @@ class IssueInvestigator(ToolCallingLLM):
|
|
|
865
840
|
post_processing_prompt,
|
|
866
841
|
response_format=response_format,
|
|
867
842
|
sections=sections,
|
|
843
|
+
trace_span=trace_span,
|
|
868
844
|
)
|
|
869
845
|
res.instructions = runbooks
|
|
870
846
|
return res
|
|
871
|
-
|
|
872
|
-
|
|
873
|
-
def create_sse_message(event_type: str, data: dict = {}):
|
|
874
|
-
return f"event: {event_type}\ndata: {json.dumps(data)}\n\n"
|
holmes/core/tools.py
CHANGED
|
@@ -51,6 +51,7 @@ class StructuredToolResult(BaseModel):
|
|
|
51
51
|
url: Optional[str] = None
|
|
52
52
|
invocation: Optional[str] = None
|
|
53
53
|
params: Optional[Dict] = None
|
|
54
|
+
icon_url: Optional[str] = None
|
|
54
55
|
|
|
55
56
|
def get_stringified_data(self) -> str:
|
|
56
57
|
if self.data is None:
|
|
@@ -121,6 +122,8 @@ class ToolParameter(BaseModel):
|
|
|
121
122
|
description: Optional[str] = None
|
|
122
123
|
type: str = "string"
|
|
123
124
|
required: bool = True
|
|
125
|
+
properties: Optional[Dict[str, "ToolParameter"]] = None # For object types
|
|
126
|
+
items: Optional["ToolParameter"] = None # For array item schemas
|
|
124
127
|
|
|
125
128
|
|
|
126
129
|
class Tool(ABC, BaseModel):
|
|
@@ -131,12 +134,17 @@ class Tool(ABC, BaseModel):
|
|
|
131
134
|
None # templated string to show to the user describing this tool invocation (not seen by llm)
|
|
132
135
|
)
|
|
133
136
|
additional_instructions: Optional[str] = None
|
|
137
|
+
icon_url: Optional[str] = Field(
|
|
138
|
+
default=None,
|
|
139
|
+
description="The URL of the icon for the tool, if None will get toolset icon",
|
|
140
|
+
)
|
|
134
141
|
|
|
135
|
-
def get_openai_format(self):
|
|
142
|
+
def get_openai_format(self, target_model: str):
|
|
136
143
|
return format_tool_to_open_ai_standard(
|
|
137
144
|
tool_name=self.name,
|
|
138
145
|
tool_description=self.description,
|
|
139
146
|
tool_parameters=self.parameters,
|
|
147
|
+
target_model=target_model,
|
|
140
148
|
)
|
|
141
149
|
|
|
142
150
|
def invoke(
|
|
@@ -148,6 +156,7 @@ class Tool(ABC, BaseModel):
|
|
|
148
156
|
)
|
|
149
157
|
start_time = time.time()
|
|
150
158
|
result = self._invoke(params)
|
|
159
|
+
result.icon_url = self.icon_url
|
|
151
160
|
elapsed = time.time() - start_time
|
|
152
161
|
output_str = (
|
|
153
162
|
result.get_stringified_data()
|
|
@@ -38,6 +38,8 @@ class ToolExecutor:
|
|
|
38
38
|
self.tools_by_name: dict[str, Tool] = {}
|
|
39
39
|
for ts in toolsets_by_name.values():
|
|
40
40
|
for tool in ts.tools:
|
|
41
|
+
if tool.icon_url is None and ts.icon_url is not None:
|
|
42
|
+
tool.icon_url = ts.icon_url
|
|
41
43
|
if tool.name in self.tools_by_name:
|
|
42
44
|
logging.warning(
|
|
43
45
|
f"Overriding existing tool '{tool.name} with new tool from {ts.name} at {ts.path}'!"
|
|
@@ -62,5 +64,8 @@ class ToolExecutor:
|
|
|
62
64
|
return None
|
|
63
65
|
|
|
64
66
|
@sentry_sdk.trace
|
|
65
|
-
def get_all_tools_openai_format(self):
|
|
66
|
-
return [
|
|
67
|
+
def get_all_tools_openai_format(self, target_model: str):
|
|
68
|
+
return [
|
|
69
|
+
tool.get_openai_format(target_model=target_model)
|
|
70
|
+
for tool in self.tools_by_name.values()
|
|
71
|
+
]
|
|
@@ -16,12 +16,17 @@ def filter_out_default_logging_toolset(toolsets: list[Toolset]) -> list[Toolset]
|
|
|
16
16
|
All other types of toolsets are included as is.
|
|
17
17
|
"""
|
|
18
18
|
|
|
19
|
-
logging_toolsets: list[
|
|
19
|
+
logging_toolsets: list[Toolset] = []
|
|
20
20
|
final_toolsets: list[Toolset] = []
|
|
21
21
|
|
|
22
22
|
for ts in toolsets:
|
|
23
|
+
toolset_type = (
|
|
24
|
+
ts.original_toolset_type
|
|
25
|
+
if hasattr(ts, "original_toolset_type")
|
|
26
|
+
else type(ts)
|
|
27
|
+
)
|
|
23
28
|
if (
|
|
24
|
-
|
|
29
|
+
issubclass(toolset_type, BasePodLoggingToolset)
|
|
25
30
|
and ts.status == ToolsetStatusEnum.ENABLED
|
|
26
31
|
):
|
|
27
32
|
logging_toolsets.append(ts)
|
holmes/core/tracing.py
CHANGED
|
@@ -1,12 +1,12 @@
|
|
|
1
|
-
import
|
|
1
|
+
import getpass
|
|
2
2
|
import logging
|
|
3
|
+
import os
|
|
3
4
|
import platform
|
|
4
|
-
import pwd
|
|
5
5
|
import socket
|
|
6
6
|
from datetime import datetime
|
|
7
|
-
from typing import Optional, Any, Union, Dict
|
|
8
|
-
from pathlib import Path
|
|
9
7
|
from enum import Enum
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
from typing import Any, Dict, Optional, Union
|
|
10
10
|
|
|
11
11
|
BRAINTRUST_API_KEY = os.environ.get("BRAINTRUST_API_KEY")
|
|
12
12
|
BRAINTRUST_ORG = os.environ.get("BRAINTRUST_ORG", "robustadev")
|
|
@@ -69,7 +69,7 @@ def get_active_branch_name():
|
|
|
69
69
|
|
|
70
70
|
def get_machine_state_tags() -> Dict[str, str]:
|
|
71
71
|
return {
|
|
72
|
-
"username":
|
|
72
|
+
"username": getpass.getuser(),
|
|
73
73
|
"branch": get_active_branch_name(),
|
|
74
74
|
"platform": platform.platform(),
|
|
75
75
|
"hostname": socket.gethostname(),
|
|
@@ -91,10 +91,11 @@ class SpanType(Enum):
|
|
|
91
91
|
"""Standard span types for tracing categorization."""
|
|
92
92
|
|
|
93
93
|
LLM = "llm"
|
|
94
|
-
TOOL = "tool"
|
|
95
|
-
TASK = "task"
|
|
96
94
|
SCORE = "score"
|
|
95
|
+
FUNCTION = "function"
|
|
97
96
|
EVAL = "eval"
|
|
97
|
+
TASK = "task"
|
|
98
|
+
TOOL = "tool"
|
|
98
99
|
|
|
99
100
|
|
|
100
101
|
class DummySpan:
|
holmes/interactive.py
CHANGED
holmes/main.py
CHANGED
|
@@ -94,7 +94,7 @@ opt_custom_runbooks: Optional[List[Path]] = typer.Option(
|
|
|
94
94
|
help="Path to a custom runbooks (can specify -r multiple times to add multiple runbooks)",
|
|
95
95
|
)
|
|
96
96
|
opt_max_steps: Optional[int] = typer.Option(
|
|
97
|
-
|
|
97
|
+
40,
|
|
98
98
|
"--max-steps",
|
|
99
99
|
help="Advanced. Maximum number of steps the LLM can take to investigate the issue",
|
|
100
100
|
)
|
|
@@ -302,6 +302,7 @@ def ask(
|
|
|
302
302
|
prompt, # type: ignore
|
|
303
303
|
include_file,
|
|
304
304
|
ai.tool_executor,
|
|
305
|
+
ai.investigation_id,
|
|
305
306
|
config.get_runbook_catalog(),
|
|
306
307
|
system_prompt_additions,
|
|
307
308
|
)
|