holmesgpt 0.13.1__py3-none-any.whl → 0.13.3a0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of holmesgpt might be problematic. Click here for more details.
- holmes/__init__.py +1 -1
- holmes/common/env_vars.py +7 -0
- holmes/config.py +3 -1
- holmes/core/conversations.py +0 -11
- holmes/core/investigation.py +0 -6
- holmes/core/llm.py +60 -1
- holmes/core/prompt.py +0 -2
- holmes/core/supabase_dal.py +2 -2
- holmes/core/todo_tasks_formatter.py +51 -0
- holmes/core/tool_calling_llm.py +166 -91
- holmes/core/tools.py +20 -4
- holmes/interactive.py +63 -2
- holmes/main.py +0 -1
- holmes/plugins/prompts/_general_instructions.jinja2 +3 -1
- holmes/plugins/prompts/investigation_procedure.jinja2 +3 -13
- holmes/plugins/toolsets/__init__.py +5 -1
- holmes/plugins/toolsets/argocd.yaml +1 -1
- holmes/plugins/toolsets/atlas_mongodb/mongodb_atlas.py +18 -6
- holmes/plugins/toolsets/aws.yaml +9 -5
- holmes/plugins/toolsets/azure_sql/tools/analyze_connection_failures.py +3 -1
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_connections.py +3 -1
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_health_status.py +3 -1
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_performance.py +3 -1
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_storage.py +3 -1
- holmes/plugins/toolsets/azure_sql/tools/get_active_alerts.py +3 -1
- holmes/plugins/toolsets/azure_sql/tools/get_slow_queries.py +3 -1
- holmes/plugins/toolsets/azure_sql/tools/get_top_cpu_queries.py +3 -1
- holmes/plugins/toolsets/azure_sql/tools/get_top_data_io_queries.py +3 -1
- holmes/plugins/toolsets/azure_sql/tools/get_top_log_io_queries.py +3 -1
- holmes/plugins/toolsets/bash/bash_toolset.py +31 -20
- holmes/plugins/toolsets/confluence.yaml +1 -1
- holmes/plugins/toolsets/coralogix/api.py +3 -1
- holmes/plugins/toolsets/coralogix/toolset_coralogix_logs.py +4 -4
- holmes/plugins/toolsets/coralogix/utils.py +41 -14
- holmes/plugins/toolsets/datadog/datadog_api.py +45 -2
- holmes/plugins/toolsets/datadog/datadog_general_instructions.jinja2 +208 -0
- holmes/plugins/toolsets/datadog/datadog_logs_instructions.jinja2 +43 -0
- holmes/plugins/toolsets/datadog/datadog_metrics_instructions.jinja2 +12 -9
- holmes/plugins/toolsets/datadog/toolset_datadog_general.py +722 -0
- holmes/plugins/toolsets/datadog/toolset_datadog_logs.py +17 -6
- holmes/plugins/toolsets/datadog/toolset_datadog_metrics.py +15 -7
- holmes/plugins/toolsets/datadog/toolset_datadog_rds.py +6 -2
- holmes/plugins/toolsets/datadog/toolset_datadog_traces.py +9 -3
- holmes/plugins/toolsets/docker.yaml +1 -1
- holmes/plugins/toolsets/git.py +15 -5
- holmes/plugins/toolsets/grafana/toolset_grafana.py +25 -4
- holmes/plugins/toolsets/grafana/toolset_grafana_loki.py +4 -4
- holmes/plugins/toolsets/grafana/toolset_grafana_tempo.jinja2 +5 -3
- holmes/plugins/toolsets/grafana/toolset_grafana_tempo.py +299 -32
- holmes/plugins/toolsets/helm.yaml +1 -1
- holmes/plugins/toolsets/internet/internet.py +4 -2
- holmes/plugins/toolsets/internet/notion.py +4 -2
- holmes/plugins/toolsets/investigator/core_investigation.py +5 -17
- holmes/plugins/toolsets/investigator/investigator_instructions.jinja2 +1 -5
- holmes/plugins/toolsets/kafka.py +19 -7
- holmes/plugins/toolsets/kubernetes.yaml +5 -5
- holmes/plugins/toolsets/kubernetes_logs.py +4 -4
- holmes/plugins/toolsets/kubernetes_logs.yaml +1 -1
- holmes/plugins/toolsets/logging_utils/logging_api.py +15 -2
- holmes/plugins/toolsets/mcp/toolset_mcp.py +3 -1
- holmes/plugins/toolsets/newrelic.py +8 -4
- holmes/plugins/toolsets/opensearch/opensearch.py +13 -5
- holmes/plugins/toolsets/opensearch/opensearch_logs.py +4 -4
- holmes/plugins/toolsets/opensearch/opensearch_traces.py +9 -6
- holmes/plugins/toolsets/prometheus/prometheus.py +193 -82
- holmes/plugins/toolsets/rabbitmq/toolset_rabbitmq.py +7 -3
- holmes/plugins/toolsets/robusta/robusta.py +10 -4
- holmes/plugins/toolsets/runbook/runbook_fetcher.py +4 -2
- holmes/plugins/toolsets/servicenow/servicenow.py +9 -3
- holmes/plugins/toolsets/slab.yaml +1 -1
- {holmesgpt-0.13.1.dist-info → holmesgpt-0.13.3a0.dist-info}/METADATA +3 -2
- {holmesgpt-0.13.1.dist-info → holmesgpt-0.13.3a0.dist-info}/RECORD +75 -72
- holmes/core/todo_manager.py +0 -88
- {holmesgpt-0.13.1.dist-info → holmesgpt-0.13.3a0.dist-info}/LICENSE.txt +0 -0
- {holmesgpt-0.13.1.dist-info → holmesgpt-0.13.3a0.dist-info}/WHEEL +0 -0
- {holmesgpt-0.13.1.dist-info → holmesgpt-0.13.3a0.dist-info}/entry_points.txt +0 -0
holmes/__init__.py
CHANGED
holmes/common/env_vars.py
CHANGED
|
@@ -67,3 +67,10 @@ MAX_OUTPUT_TOKEN_RESERVATION = int(
|
|
|
67
67
|
|
|
68
68
|
# When using the bash tool, setting BASH_TOOL_UNSAFE_ALLOW_ALL will skip any command validation and run any command requested by the LLM
|
|
69
69
|
BASH_TOOL_UNSAFE_ALLOW_ALL = load_bool("BASH_TOOL_UNSAFE_ALLOW_ALL", False)
|
|
70
|
+
|
|
71
|
+
LOG_LLM_USAGE_RESPONSE = load_bool("LOG_LLM_USAGE_RESPONSE", False)
|
|
72
|
+
|
|
73
|
+
# For CLI only, enable user approval for potentially sensitive commands that would otherwise be rejected
|
|
74
|
+
ENABLE_CLI_TOOL_APPROVAL = load_bool("ENABLE_CLI_TOOL_APPROVAL", True)
|
|
75
|
+
|
|
76
|
+
MAX_GRAPH_POINTS = float(os.environ.get("MAX_GRAPH_POINTS", 300))
|
holmes/config.py
CHANGED
|
@@ -527,7 +527,9 @@ class Config(RobustaBaseConfig):
|
|
|
527
527
|
if model_key
|
|
528
528
|
else next(iter(self._model_list.values())).copy()
|
|
529
529
|
)
|
|
530
|
-
|
|
530
|
+
is_robusta_model = model_params.pop("is_robusta_model", False)
|
|
531
|
+
if is_robusta_model and self.api_key:
|
|
532
|
+
# we set here the api_key since it is being refresh when exprided and not as part of the model loading.
|
|
531
533
|
api_key = self.api_key.get_secret_value()
|
|
532
534
|
else:
|
|
533
535
|
api_key = model_params.pop("api_key", api_key)
|
holmes/core/conversations.py
CHANGED
|
@@ -133,7 +133,6 @@ def build_issue_chat_messages(
|
|
|
133
133
|
"issue": issue_chat_request.issue_type,
|
|
134
134
|
"toolsets": ai.tool_executor.toolsets,
|
|
135
135
|
"cluster_name": config.cluster_name,
|
|
136
|
-
"investigation_id": ai.investigation_id,
|
|
137
136
|
},
|
|
138
137
|
)
|
|
139
138
|
messages = [
|
|
@@ -154,7 +153,6 @@ def build_issue_chat_messages(
|
|
|
154
153
|
"issue": issue_chat_request.issue_type,
|
|
155
154
|
"toolsets": ai.tool_executor.toolsets,
|
|
156
155
|
"cluster_name": config.cluster_name,
|
|
157
|
-
"investigation_id": ai.investigation_id,
|
|
158
156
|
}
|
|
159
157
|
system_prompt_without_tools = load_and_render_prompt(
|
|
160
158
|
template_path, template_context_without_tools
|
|
@@ -188,7 +186,6 @@ def build_issue_chat_messages(
|
|
|
188
186
|
"issue": issue_chat_request.issue_type,
|
|
189
187
|
"toolsets": ai.tool_executor.toolsets,
|
|
190
188
|
"cluster_name": config.cluster_name,
|
|
191
|
-
"investigation_id": ai.investigation_id,
|
|
192
189
|
}
|
|
193
190
|
system_prompt_with_truncated_tools = load_and_render_prompt(
|
|
194
191
|
template_path, truncated_template_context
|
|
@@ -230,7 +227,6 @@ def build_issue_chat_messages(
|
|
|
230
227
|
"issue": issue_chat_request.issue_type,
|
|
231
228
|
"toolsets": ai.tool_executor.toolsets,
|
|
232
229
|
"cluster_name": config.cluster_name,
|
|
233
|
-
"investigation_id": ai.investigation_id,
|
|
234
230
|
}
|
|
235
231
|
system_prompt_without_tools = load_and_render_prompt(
|
|
236
232
|
template_path, template_context_without_tools
|
|
@@ -254,7 +250,6 @@ def build_issue_chat_messages(
|
|
|
254
250
|
"issue": issue_chat_request.issue_type,
|
|
255
251
|
"toolsets": ai.tool_executor.toolsets,
|
|
256
252
|
"cluster_name": config.cluster_name,
|
|
257
|
-
"investigation_id": ai.investigation_id,
|
|
258
253
|
}
|
|
259
254
|
system_prompt_with_truncated_tools = load_and_render_prompt(
|
|
260
255
|
template_path, template_context
|
|
@@ -279,7 +274,6 @@ def add_or_update_system_prompt(
|
|
|
279
274
|
context = {
|
|
280
275
|
"toolsets": ai.tool_executor.toolsets,
|
|
281
276
|
"cluster_name": config.cluster_name,
|
|
282
|
-
"investigation_id": ai.investigation_id,
|
|
283
277
|
}
|
|
284
278
|
|
|
285
279
|
system_prompt = load_and_render_prompt(template_path, context)
|
|
@@ -471,7 +465,6 @@ def build_workload_health_chat_messages(
|
|
|
471
465
|
"resource": resource,
|
|
472
466
|
"toolsets": ai.tool_executor.toolsets,
|
|
473
467
|
"cluster_name": config.cluster_name,
|
|
474
|
-
"investigation_id": ai.investigation_id,
|
|
475
468
|
},
|
|
476
469
|
)
|
|
477
470
|
messages = [
|
|
@@ -492,7 +485,6 @@ def build_workload_health_chat_messages(
|
|
|
492
485
|
"resource": resource,
|
|
493
486
|
"toolsets": ai.tool_executor.toolsets,
|
|
494
487
|
"cluster_name": config.cluster_name,
|
|
495
|
-
"investigation_id": ai.investigation_id,
|
|
496
488
|
}
|
|
497
489
|
system_prompt_without_tools = load_and_render_prompt(
|
|
498
490
|
template_path, template_context_without_tools
|
|
@@ -526,7 +518,6 @@ def build_workload_health_chat_messages(
|
|
|
526
518
|
"resource": resource,
|
|
527
519
|
"toolsets": ai.tool_executor.toolsets,
|
|
528
520
|
"cluster_name": config.cluster_name,
|
|
529
|
-
"investigation_id": ai.investigation_id,
|
|
530
521
|
}
|
|
531
522
|
system_prompt_with_truncated_tools = load_and_render_prompt(
|
|
532
523
|
template_path, truncated_template_context
|
|
@@ -568,7 +559,6 @@ def build_workload_health_chat_messages(
|
|
|
568
559
|
"resource": resource,
|
|
569
560
|
"toolsets": ai.tool_executor.toolsets,
|
|
570
561
|
"cluster_name": config.cluster_name,
|
|
571
|
-
"investigation_id": ai.investigation_id,
|
|
572
562
|
}
|
|
573
563
|
system_prompt_without_tools = load_and_render_prompt(
|
|
574
564
|
template_path, template_context_without_tools
|
|
@@ -592,7 +582,6 @@ def build_workload_health_chat_messages(
|
|
|
592
582
|
"resource": resource,
|
|
593
583
|
"toolsets": ai.tool_executor.toolsets,
|
|
594
584
|
"cluster_name": config.cluster_name,
|
|
595
|
-
"investigation_id": ai.investigation_id,
|
|
596
585
|
}
|
|
597
586
|
system_prompt_with_truncated_tools = load_and_render_prompt(
|
|
598
587
|
template_path, template_context
|
holmes/core/investigation.py
CHANGED
|
@@ -9,7 +9,6 @@ from holmes.core.models import InvestigateRequest, InvestigationResult
|
|
|
9
9
|
from holmes.core.supabase_dal import SupabaseDal
|
|
10
10
|
from holmes.core.tracing import DummySpan, SpanType
|
|
11
11
|
from holmes.utils.global_instructions import add_global_instructions_to_user_prompt
|
|
12
|
-
from holmes.core.todo_manager import get_todo_manager
|
|
13
12
|
|
|
14
13
|
from holmes.core.investigation_structured_output import (
|
|
15
14
|
DEFAULT_SECTIONS,
|
|
@@ -133,9 +132,6 @@ def get_investigation_context(
|
|
|
133
132
|
else:
|
|
134
133
|
logging.info("Structured output is disabled for this request")
|
|
135
134
|
|
|
136
|
-
todo_manager = get_todo_manager()
|
|
137
|
-
todo_context = todo_manager.format_tasks_for_prompt(ai.investigation_id)
|
|
138
|
-
|
|
139
135
|
system_prompt = load_and_render_prompt(
|
|
140
136
|
investigate_request.prompt_template,
|
|
141
137
|
{
|
|
@@ -144,8 +140,6 @@ def get_investigation_context(
|
|
|
144
140
|
"structured_output": request_structured_output_from_llm,
|
|
145
141
|
"toolsets": ai.tool_executor.toolsets,
|
|
146
142
|
"cluster_name": config.cluster_name,
|
|
147
|
-
"todo_list": todo_context,
|
|
148
|
-
"investigation_id": ai.investigation_id,
|
|
149
143
|
},
|
|
150
144
|
)
|
|
151
145
|
|
holmes/core/llm.py
CHANGED
|
@@ -229,9 +229,11 @@ class DefaultLLM(LLM):
|
|
|
229
229
|
] # can be removed after next litelm version
|
|
230
230
|
|
|
231
231
|
self.args.setdefault("temperature", temperature)
|
|
232
|
+
|
|
233
|
+
self._add_cache_control_to_last_message(messages)
|
|
234
|
+
|
|
232
235
|
# Get the litellm module to use (wrapped or unwrapped)
|
|
233
236
|
litellm_to_use = self.tracer.wrap_llm(litellm) if self.tracer else litellm
|
|
234
|
-
|
|
235
237
|
result = litellm_to_use.completion(
|
|
236
238
|
model=self.model,
|
|
237
239
|
api_key=self.api_key,
|
|
@@ -266,3 +268,60 @@ class DefaultLLM(LLM):
|
|
|
266
268
|
f"Couldn't find model's name {model_name} in litellm's model list, fallback to 4096 tokens for max_output_tokens"
|
|
267
269
|
)
|
|
268
270
|
return 4096
|
|
271
|
+
|
|
272
|
+
def _add_cache_control_to_last_message(
|
|
273
|
+
self, messages: List[Dict[str, Any]]
|
|
274
|
+
) -> None:
|
|
275
|
+
"""
|
|
276
|
+
Add cache_control to the last non-user message for Anthropic prompt caching.
|
|
277
|
+
Removes any existing cache_control from previous messages to avoid accumulation.
|
|
278
|
+
"""
|
|
279
|
+
# First, remove any existing cache_control from all messages
|
|
280
|
+
for msg in messages:
|
|
281
|
+
content = msg.get("content")
|
|
282
|
+
if isinstance(content, list):
|
|
283
|
+
for block in content:
|
|
284
|
+
if isinstance(block, dict) and "cache_control" in block:
|
|
285
|
+
del block["cache_control"]
|
|
286
|
+
logging.debug(
|
|
287
|
+
f"Removed existing cache_control from {msg.get('role')} message"
|
|
288
|
+
)
|
|
289
|
+
|
|
290
|
+
# Find the last non-user message to add cache_control to.
|
|
291
|
+
# Adding cache_control to user message requires changing its structure, so we avoid it
|
|
292
|
+
# This avoids breaking parse_messages_tags which only processes user messages
|
|
293
|
+
target_msg = None
|
|
294
|
+
for msg in reversed(messages):
|
|
295
|
+
if msg.get("role") != "user":
|
|
296
|
+
target_msg = msg
|
|
297
|
+
break
|
|
298
|
+
|
|
299
|
+
if not target_msg:
|
|
300
|
+
logging.debug("No non-user message found for cache_control")
|
|
301
|
+
return
|
|
302
|
+
|
|
303
|
+
content = target_msg.get("content")
|
|
304
|
+
|
|
305
|
+
if content is None:
|
|
306
|
+
return
|
|
307
|
+
|
|
308
|
+
if isinstance(content, str):
|
|
309
|
+
# Convert string to structured format with cache_control
|
|
310
|
+
target_msg["content"] = [
|
|
311
|
+
{
|
|
312
|
+
"type": "text",
|
|
313
|
+
"text": content,
|
|
314
|
+
"cache_control": {"type": "ephemeral"},
|
|
315
|
+
}
|
|
316
|
+
]
|
|
317
|
+
logging.debug(
|
|
318
|
+
f"Added cache_control to {target_msg.get('role')} message (converted from string)"
|
|
319
|
+
)
|
|
320
|
+
elif isinstance(content, list) and content:
|
|
321
|
+
# Add cache_control to the last content block
|
|
322
|
+
last_block = content[-1]
|
|
323
|
+
if isinstance(last_block, dict) and "type" in last_block:
|
|
324
|
+
last_block["cache_control"] = {"type": "ephemeral"}
|
|
325
|
+
logging.debug(
|
|
326
|
+
f"Added cache_control to {target_msg.get('role')} message (structured content)"
|
|
327
|
+
)
|
holmes/core/prompt.py
CHANGED
|
@@ -40,7 +40,6 @@ def build_initial_ask_messages(
|
|
|
40
40
|
initial_user_prompt: str,
|
|
41
41
|
file_paths: Optional[List[Path]],
|
|
42
42
|
tool_executor: Any, # ToolExecutor type
|
|
43
|
-
investigation_id: str,
|
|
44
43
|
runbooks: Union[RunbookCatalog, Dict, None] = None,
|
|
45
44
|
system_prompt_additions: Optional[str] = None,
|
|
46
45
|
) -> List[Dict]:
|
|
@@ -60,7 +59,6 @@ def build_initial_ask_messages(
|
|
|
60
59
|
"toolsets": tool_executor.toolsets,
|
|
61
60
|
"runbooks": runbooks or {},
|
|
62
61
|
"system_prompt_additions": system_prompt_additions or "",
|
|
63
|
-
"investigation_id": investigation_id,
|
|
64
62
|
}
|
|
65
63
|
system_prompt_rendered = load_and_render_prompt(
|
|
66
64
|
system_prompt_template, template_context
|
holmes/core/supabase_dal.py
CHANGED
|
@@ -131,7 +131,7 @@ class SupabaseDal:
|
|
|
131
131
|
raise Exception(
|
|
132
132
|
"No robusta token provided to Holmes.\n"
|
|
133
133
|
"Please set a valid Robusta UI token.\n "
|
|
134
|
-
"See https://
|
|
134
|
+
"See https://holmesgpt.dev/ai-providers/ for instructions."
|
|
135
135
|
)
|
|
136
136
|
env_replacement_token = get_env_replacement(token)
|
|
137
137
|
if env_replacement_token:
|
|
@@ -143,7 +143,7 @@ class SupabaseDal:
|
|
|
143
143
|
"Ensure your Helm chart or environment variables are set correctly.\n "
|
|
144
144
|
"If you store the token in a secret, you must also pass "
|
|
145
145
|
"the environment variable ROBUSTA_UI_TOKEN to Holmes.\n "
|
|
146
|
-
"See https://
|
|
146
|
+
"See https://holmesgpt.dev/data-sources/builtin-toolsets/robusta/ for instructions."
|
|
147
147
|
)
|
|
148
148
|
try:
|
|
149
149
|
decoded = base64.b64decode(token)
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
from typing import List
|
|
2
|
+
|
|
3
|
+
from holmes.plugins.toolsets.investigator.model import Task, TaskStatus
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
def format_tasks(tasks: List[Task]) -> str:
|
|
7
|
+
"""
|
|
8
|
+
Format tasks for tool response
|
|
9
|
+
Returns empty string if no tasks exist.
|
|
10
|
+
"""
|
|
11
|
+
if not tasks:
|
|
12
|
+
return ""
|
|
13
|
+
|
|
14
|
+
status_order = {
|
|
15
|
+
TaskStatus.PENDING: 0,
|
|
16
|
+
TaskStatus.IN_PROGRESS: 1,
|
|
17
|
+
TaskStatus.COMPLETED: 2,
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
sorted_tasks = sorted(
|
|
21
|
+
tasks,
|
|
22
|
+
key=lambda t: (status_order.get(t.status, 3),),
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
lines = ["# CURRENT INVESTIGATION TASKS"]
|
|
26
|
+
lines.append("")
|
|
27
|
+
|
|
28
|
+
pending_count = sum(1 for t in tasks if t.status == TaskStatus.PENDING)
|
|
29
|
+
progress_count = sum(1 for t in tasks if t.status == TaskStatus.IN_PROGRESS)
|
|
30
|
+
completed_count = sum(1 for t in tasks if t.status == TaskStatus.COMPLETED)
|
|
31
|
+
|
|
32
|
+
lines.append(
|
|
33
|
+
f"**Task Status**: {completed_count} completed, {progress_count} in progress, {pending_count} pending"
|
|
34
|
+
)
|
|
35
|
+
lines.append("")
|
|
36
|
+
|
|
37
|
+
for task in sorted_tasks:
|
|
38
|
+
status_indicator = {
|
|
39
|
+
TaskStatus.PENDING: "[ ]",
|
|
40
|
+
TaskStatus.IN_PROGRESS: "[~]",
|
|
41
|
+
TaskStatus.COMPLETED: "[✓]",
|
|
42
|
+
}.get(task.status, "[?]")
|
|
43
|
+
|
|
44
|
+
lines.append(f"{status_indicator} [{task.id}] {task.content}")
|
|
45
|
+
|
|
46
|
+
lines.append("")
|
|
47
|
+
lines.append(
|
|
48
|
+
"**Instructions**: Use TodoWrite tool to update task status as you work. Mark tasks as 'in_progress' when starting, 'completed' when finished."
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
return "\n".join(lines)
|
holmes/core/tool_calling_llm.py
CHANGED
|
@@ -2,8 +2,8 @@ import concurrent.futures
|
|
|
2
2
|
import json
|
|
3
3
|
import logging
|
|
4
4
|
import textwrap
|
|
5
|
-
import
|
|
6
|
-
|
|
5
|
+
from typing import Dict, List, Optional, Type, Union, Callable
|
|
6
|
+
|
|
7
7
|
|
|
8
8
|
import sentry_sdk
|
|
9
9
|
from openai import BadRequestError
|
|
@@ -13,7 +13,11 @@ from openai.types.chat.chat_completion_message_tool_call import (
|
|
|
13
13
|
from pydantic import BaseModel, Field
|
|
14
14
|
from rich.console import Console
|
|
15
15
|
|
|
16
|
-
from holmes.common.env_vars import
|
|
16
|
+
from holmes.common.env_vars import (
|
|
17
|
+
TEMPERATURE,
|
|
18
|
+
MAX_OUTPUT_TOKEN_RESERVATION,
|
|
19
|
+
LOG_LLM_USAGE_RESPONSE,
|
|
20
|
+
)
|
|
17
21
|
|
|
18
22
|
from holmes.core.investigation_structured_output import (
|
|
19
23
|
DEFAULT_SECTIONS,
|
|
@@ -39,9 +43,6 @@ from holmes.core.tools_utils.tool_executor import ToolExecutor
|
|
|
39
43
|
from holmes.core.tracing import DummySpan
|
|
40
44
|
from holmes.utils.colors import AI_COLOR
|
|
41
45
|
from holmes.utils.stream import StreamEvents, StreamMessage
|
|
42
|
-
from holmes.core.todo_manager import (
|
|
43
|
-
get_todo_manager,
|
|
44
|
-
)
|
|
45
46
|
|
|
46
47
|
# Create a named logger for cost tracking
|
|
47
48
|
cost_logger = logging.getLogger("holmes.costs")
|
|
@@ -94,6 +95,8 @@ def _process_cost_info(
|
|
|
94
95
|
usage = getattr(full_response, "usage", {})
|
|
95
96
|
|
|
96
97
|
if usage:
|
|
98
|
+
if LOG_LLM_USAGE_RESPONSE: # shows stats on token cache usage
|
|
99
|
+
logging.info(f"LLM usage response:\n{usage}\n")
|
|
97
100
|
prompt_toks = usage.get("prompt_tokens", 0)
|
|
98
101
|
completion_toks = usage.get("completion_tokens", 0)
|
|
99
102
|
total_toks = usage.get("total_tokens", 0)
|
|
@@ -283,7 +286,9 @@ class ToolCallingLLM:
|
|
|
283
286
|
self.max_steps = max_steps
|
|
284
287
|
self.tracer = tracer
|
|
285
288
|
self.llm = llm
|
|
286
|
-
self.
|
|
289
|
+
self.approval_callback: Optional[
|
|
290
|
+
Callable[[StructuredToolResult], tuple[bool, Optional[str]]]
|
|
291
|
+
] = None
|
|
287
292
|
|
|
288
293
|
def prompt_call(
|
|
289
294
|
self,
|
|
@@ -465,21 +470,35 @@ class ToolCallingLLM:
|
|
|
465
470
|
perf_timing.measure("pre-tool-calls")
|
|
466
471
|
with concurrent.futures.ThreadPoolExecutor(max_workers=16) as executor:
|
|
467
472
|
futures = []
|
|
473
|
+
futures_tool_numbers: dict[
|
|
474
|
+
concurrent.futures.Future, Optional[int]
|
|
475
|
+
] = {}
|
|
476
|
+
tool_number: Optional[int]
|
|
468
477
|
for tool_index, t in enumerate(tools_to_call, 1):
|
|
469
478
|
logging.debug(f"Tool to call: {t}")
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
)
|
|
479
|
+
tool_number = tool_number_offset + tool_index
|
|
480
|
+
future = executor.submit(
|
|
481
|
+
self._invoke_llm_tool_call,
|
|
482
|
+
tool_to_call=t,
|
|
483
|
+
previous_tool_calls=tool_calls,
|
|
484
|
+
trace_span=trace_span,
|
|
485
|
+
tool_number=tool_number,
|
|
478
486
|
)
|
|
487
|
+
futures_tool_numbers[future] = tool_number
|
|
488
|
+
futures.append(future)
|
|
479
489
|
|
|
480
490
|
for future in concurrent.futures.as_completed(futures):
|
|
481
491
|
tool_call_result: ToolCallResult = future.result()
|
|
482
492
|
|
|
493
|
+
tool_number = (
|
|
494
|
+
futures_tool_numbers[future]
|
|
495
|
+
if future in futures_tool_numbers
|
|
496
|
+
else None
|
|
497
|
+
)
|
|
498
|
+
tool_call_result = self.handle_tool_call_approval(
|
|
499
|
+
tool_call_result=tool_call_result, tool_number=tool_number
|
|
500
|
+
)
|
|
501
|
+
|
|
483
502
|
tool_calls.append(tool_call_result.as_tool_result_response())
|
|
484
503
|
messages.append(tool_call_result.as_tool_call_message())
|
|
485
504
|
|
|
@@ -494,7 +513,63 @@ class ToolCallingLLM:
|
|
|
494
513
|
|
|
495
514
|
raise Exception(f"Too many LLM calls - exceeded max_steps: {i}/{max_steps}")
|
|
496
515
|
|
|
497
|
-
def
|
|
516
|
+
def _directly_invoke_tool(
|
|
517
|
+
self,
|
|
518
|
+
tool_name: str,
|
|
519
|
+
tool_params: dict,
|
|
520
|
+
user_approved: bool,
|
|
521
|
+
trace_span=DummySpan(),
|
|
522
|
+
tool_number: Optional[int] = None,
|
|
523
|
+
) -> StructuredToolResult:
|
|
524
|
+
tool_span = trace_span.start_span(name=tool_name, type="tool")
|
|
525
|
+
tool = self.tool_executor.get_tool_by_name(tool_name)
|
|
526
|
+
tool_response = None
|
|
527
|
+
try:
|
|
528
|
+
if (not tool) or (tool_params is None):
|
|
529
|
+
logging.warning(
|
|
530
|
+
f"Skipping tool execution for {tool_name}: args: {tool_params}"
|
|
531
|
+
)
|
|
532
|
+
tool_response = StructuredToolResult(
|
|
533
|
+
status=ToolResultStatus.ERROR,
|
|
534
|
+
error=f"Failed to find tool {tool_name}",
|
|
535
|
+
params=tool_params,
|
|
536
|
+
)
|
|
537
|
+
else:
|
|
538
|
+
tool_response = tool.invoke(
|
|
539
|
+
tool_params, tool_number=tool_number, user_approved=user_approved
|
|
540
|
+
)
|
|
541
|
+
except Exception as e:
|
|
542
|
+
logging.error(
|
|
543
|
+
f"Tool call to {tool_name} failed with an Exception", exc_info=True
|
|
544
|
+
)
|
|
545
|
+
tool_response = StructuredToolResult(
|
|
546
|
+
status=ToolResultStatus.ERROR,
|
|
547
|
+
error=f"Tool call failed: {e}",
|
|
548
|
+
params=tool_params,
|
|
549
|
+
)
|
|
550
|
+
|
|
551
|
+
# Log error to trace span
|
|
552
|
+
tool_span.log(
|
|
553
|
+
input=tool_params, output=str(e), metadata={"status": "ERROR"}
|
|
554
|
+
)
|
|
555
|
+
|
|
556
|
+
tool_span.log(
|
|
557
|
+
input=tool_params,
|
|
558
|
+
output=tool_response.data,
|
|
559
|
+
metadata={
|
|
560
|
+
"status": tool_response.status.value,
|
|
561
|
+
"error": tool_response.error,
|
|
562
|
+
"description": tool.get_parameterized_one_liner(tool_params)
|
|
563
|
+
if tool
|
|
564
|
+
else "",
|
|
565
|
+
"structured_tool_result": tool_response,
|
|
566
|
+
},
|
|
567
|
+
)
|
|
568
|
+
tool_span.end()
|
|
569
|
+
|
|
570
|
+
return tool_response
|
|
571
|
+
|
|
572
|
+
def _invoke_llm_tool_call(
|
|
498
573
|
self,
|
|
499
574
|
tool_to_call: ChatCompletionMessageToolCall,
|
|
500
575
|
previous_tool_calls: list[dict],
|
|
@@ -523,92 +598,97 @@ class ToolCallingLLM:
|
|
|
523
598
|
),
|
|
524
599
|
)
|
|
525
600
|
|
|
526
|
-
tool_params =
|
|
601
|
+
tool_params = {}
|
|
527
602
|
try:
|
|
528
603
|
tool_params = json.loads(tool_arguments)
|
|
529
604
|
except Exception:
|
|
530
605
|
logging.warning(
|
|
531
606
|
f"Failed to parse arguments for tool: {tool_name}. args: {tool_arguments}"
|
|
532
607
|
)
|
|
533
|
-
tool_call_id = tool_to_call.id
|
|
534
|
-
tool = self.tool_executor.get_tool_by_name(tool_name)
|
|
535
608
|
|
|
536
|
-
|
|
537
|
-
logging.warning(
|
|
538
|
-
f"Skipping tool execution for {tool_name}: args: {tool_arguments}"
|
|
539
|
-
)
|
|
540
|
-
return ToolCallResult(
|
|
541
|
-
tool_call_id=tool_call_id,
|
|
542
|
-
tool_name=tool_name,
|
|
543
|
-
description="NA",
|
|
544
|
-
result=StructuredToolResult(
|
|
545
|
-
status=ToolResultStatus.ERROR,
|
|
546
|
-
error=f"Failed to find tool {tool_name}",
|
|
547
|
-
params=tool_params,
|
|
548
|
-
),
|
|
549
|
-
)
|
|
550
|
-
|
|
551
|
-
tool_response = None
|
|
609
|
+
tool_call_id = tool_to_call.id
|
|
552
610
|
|
|
553
|
-
|
|
554
|
-
|
|
611
|
+
tool_response = prevent_overly_repeated_tool_call(
|
|
612
|
+
tool_name=tool_name,
|
|
613
|
+
tool_params=tool_params,
|
|
614
|
+
tool_calls=previous_tool_calls,
|
|
615
|
+
)
|
|
555
616
|
|
|
556
|
-
|
|
557
|
-
tool_response =
|
|
558
|
-
tool_name=
|
|
617
|
+
if not tool_response:
|
|
618
|
+
tool_response = self._directly_invoke_tool(
|
|
619
|
+
tool_name=tool_name,
|
|
559
620
|
tool_params=tool_params,
|
|
560
|
-
|
|
621
|
+
user_approved=False,
|
|
622
|
+
trace_span=trace_span,
|
|
623
|
+
tool_number=tool_number,
|
|
561
624
|
)
|
|
562
|
-
if not tool_response:
|
|
563
|
-
tool_response = tool.invoke(tool_params, tool_number=tool_number)
|
|
564
625
|
|
|
565
|
-
|
|
566
|
-
|
|
567
|
-
logging.error(
|
|
568
|
-
f"Tool {tool.name} return type is not StructuredToolResult. Nesting the tool result into StructuredToolResult..."
|
|
569
|
-
)
|
|
570
|
-
tool_response = StructuredToolResult(
|
|
571
|
-
status=ToolResultStatus.SUCCESS,
|
|
572
|
-
data=tool_response,
|
|
573
|
-
params=tool_params,
|
|
574
|
-
)
|
|
575
|
-
|
|
576
|
-
# Log tool execution to trace span
|
|
577
|
-
tool_span.log(
|
|
578
|
-
input=tool_params,
|
|
579
|
-
output=tool_response.data,
|
|
580
|
-
metadata={
|
|
581
|
-
"status": tool_response.status.value,
|
|
582
|
-
"error": tool_response.error,
|
|
583
|
-
"description": tool.get_parameterized_one_liner(tool_params),
|
|
584
|
-
"structured_tool_result": tool_response,
|
|
585
|
-
},
|
|
586
|
-
)
|
|
587
|
-
|
|
588
|
-
except Exception as e:
|
|
626
|
+
if not isinstance(tool_response, StructuredToolResult):
|
|
627
|
+
# Should never be needed but ensure Holmes does not crash if one of the tools does not return the right type
|
|
589
628
|
logging.error(
|
|
590
|
-
f"Tool
|
|
629
|
+
f"Tool {tool_name} return type is not StructuredToolResult. Nesting the tool result into StructuredToolResult..."
|
|
591
630
|
)
|
|
592
631
|
tool_response = StructuredToolResult(
|
|
593
|
-
status=ToolResultStatus.
|
|
594
|
-
|
|
632
|
+
status=ToolResultStatus.SUCCESS,
|
|
633
|
+
data=tool_response,
|
|
595
634
|
params=tool_params,
|
|
596
635
|
)
|
|
597
636
|
|
|
598
|
-
|
|
599
|
-
tool_span.log(
|
|
600
|
-
input=tool_params, output=str(e), metadata={"status": "ERROR"}
|
|
601
|
-
)
|
|
602
|
-
finally:
|
|
603
|
-
# End tool span
|
|
604
|
-
tool_span.end()
|
|
637
|
+
tool = self.tool_executor.get_tool_by_name(tool_name)
|
|
605
638
|
return ToolCallResult(
|
|
606
639
|
tool_call_id=tool_call_id,
|
|
607
640
|
tool_name=tool_name,
|
|
608
|
-
description=tool.get_parameterized_one_liner(tool_params),
|
|
641
|
+
description=tool.get_parameterized_one_liner(tool_params) if tool else "",
|
|
609
642
|
result=tool_response,
|
|
610
643
|
)
|
|
611
644
|
|
|
645
|
+
def handle_tool_call_approval(
|
|
646
|
+
self, tool_call_result: ToolCallResult, tool_number: Optional[int]
|
|
647
|
+
) -> ToolCallResult:
|
|
648
|
+
"""
|
|
649
|
+
Handle approval for a single tool call if required.
|
|
650
|
+
|
|
651
|
+
Args:
|
|
652
|
+
tool_call_result: A single tool call result that may require approval
|
|
653
|
+
|
|
654
|
+
Returns:
|
|
655
|
+
Updated tool call result with approved/denied status
|
|
656
|
+
"""
|
|
657
|
+
|
|
658
|
+
if tool_call_result.result.status != ToolResultStatus.APPROVAL_REQUIRED:
|
|
659
|
+
return tool_call_result
|
|
660
|
+
|
|
661
|
+
# If no approval callback, convert to ERROR because it is assumed the client may not be able to handle approvals
|
|
662
|
+
if not self.approval_callback:
|
|
663
|
+
tool_call_result.result.status = ToolResultStatus.ERROR
|
|
664
|
+
return tool_call_result
|
|
665
|
+
|
|
666
|
+
# Get approval from user
|
|
667
|
+
approved, feedback = self.approval_callback(tool_call_result.result)
|
|
668
|
+
|
|
669
|
+
if approved:
|
|
670
|
+
logging.debug(
|
|
671
|
+
f"User approved command: {tool_call_result.result.invocation}"
|
|
672
|
+
)
|
|
673
|
+
|
|
674
|
+
new_response = self._directly_invoke_tool(
|
|
675
|
+
tool_name=tool_call_result.tool_name,
|
|
676
|
+
tool_params=tool_call_result.result.params or {},
|
|
677
|
+
user_approved=True,
|
|
678
|
+
trace_span=DummySpan(),
|
|
679
|
+
tool_number=tool_number,
|
|
680
|
+
)
|
|
681
|
+
tool_call_result.result = new_response
|
|
682
|
+
else:
|
|
683
|
+
# User denied - update to error
|
|
684
|
+
feedback_text = f" User feedback: {feedback}" if feedback else ""
|
|
685
|
+
tool_call_result.result.status = ToolResultStatus.ERROR
|
|
686
|
+
tool_call_result.result.error = (
|
|
687
|
+
f"User denied command execution.{feedback_text}"
|
|
688
|
+
)
|
|
689
|
+
|
|
690
|
+
return tool_call_result
|
|
691
|
+
|
|
612
692
|
@staticmethod
|
|
613
693
|
def __load_post_processing_user_prompt(
|
|
614
694
|
input_prompt, investigation, user_prompt: Optional[str] = None
|
|
@@ -787,15 +867,15 @@ class ToolCallingLLM:
|
|
|
787
867
|
with concurrent.futures.ThreadPoolExecutor(max_workers=16) as executor:
|
|
788
868
|
futures = []
|
|
789
869
|
for tool_index, t in enumerate(tools_to_call, 1): # type: ignore
|
|
790
|
-
|
|
791
|
-
|
|
792
|
-
|
|
793
|
-
|
|
794
|
-
|
|
795
|
-
|
|
796
|
-
|
|
797
|
-
)
|
|
870
|
+
tool_number = tool_number_offset + tool_index
|
|
871
|
+
future = executor.submit(
|
|
872
|
+
self._invoke_llm_tool_call,
|
|
873
|
+
tool_to_call=t, # type: ignore
|
|
874
|
+
previous_tool_calls=tool_calls,
|
|
875
|
+
trace_span=DummySpan(), # Streaming mode doesn't support tracing yet
|
|
876
|
+
tool_number=tool_number,
|
|
798
877
|
)
|
|
878
|
+
futures.append(future)
|
|
799
879
|
yield StreamMessage(
|
|
800
880
|
event=StreamEvents.START_TOOL,
|
|
801
881
|
data={"tool_name": t.function.name, "id": t.id},
|
|
@@ -894,9 +974,6 @@ class IssueInvestigator(ToolCallingLLM):
|
|
|
894
974
|
"[bold]No runbooks found for this issue. Using default behaviour. (Add runbooks to guide the investigation.)[/bold]"
|
|
895
975
|
)
|
|
896
976
|
|
|
897
|
-
todo_manager = get_todo_manager()
|
|
898
|
-
todo_context = todo_manager.format_tasks_for_prompt(self.investigation_id)
|
|
899
|
-
|
|
900
977
|
system_prompt = load_and_render_prompt(
|
|
901
978
|
prompt,
|
|
902
979
|
{
|
|
@@ -905,8 +982,6 @@ class IssueInvestigator(ToolCallingLLM):
|
|
|
905
982
|
"structured_output": request_structured_output_from_llm,
|
|
906
983
|
"toolsets": self.tool_executor.toolsets,
|
|
907
984
|
"cluster_name": self.cluster_name,
|
|
908
|
-
"todo_list": todo_context,
|
|
909
|
-
"investigation_id": self.investigation_id,
|
|
910
985
|
},
|
|
911
986
|
)
|
|
912
987
|
|