holmesgpt 0.13.1__py3-none-any.whl → 0.13.3a0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of holmesgpt might be problematic. Click here for more details.

Files changed (76) hide show
  1. holmes/__init__.py +1 -1
  2. holmes/common/env_vars.py +7 -0
  3. holmes/config.py +3 -1
  4. holmes/core/conversations.py +0 -11
  5. holmes/core/investigation.py +0 -6
  6. holmes/core/llm.py +60 -1
  7. holmes/core/prompt.py +0 -2
  8. holmes/core/supabase_dal.py +2 -2
  9. holmes/core/todo_tasks_formatter.py +51 -0
  10. holmes/core/tool_calling_llm.py +166 -91
  11. holmes/core/tools.py +20 -4
  12. holmes/interactive.py +63 -2
  13. holmes/main.py +0 -1
  14. holmes/plugins/prompts/_general_instructions.jinja2 +3 -1
  15. holmes/plugins/prompts/investigation_procedure.jinja2 +3 -13
  16. holmes/plugins/toolsets/__init__.py +5 -1
  17. holmes/plugins/toolsets/argocd.yaml +1 -1
  18. holmes/plugins/toolsets/atlas_mongodb/mongodb_atlas.py +18 -6
  19. holmes/plugins/toolsets/aws.yaml +9 -5
  20. holmes/plugins/toolsets/azure_sql/tools/analyze_connection_failures.py +3 -1
  21. holmes/plugins/toolsets/azure_sql/tools/analyze_database_connections.py +3 -1
  22. holmes/plugins/toolsets/azure_sql/tools/analyze_database_health_status.py +3 -1
  23. holmes/plugins/toolsets/azure_sql/tools/analyze_database_performance.py +3 -1
  24. holmes/plugins/toolsets/azure_sql/tools/analyze_database_storage.py +3 -1
  25. holmes/plugins/toolsets/azure_sql/tools/get_active_alerts.py +3 -1
  26. holmes/plugins/toolsets/azure_sql/tools/get_slow_queries.py +3 -1
  27. holmes/plugins/toolsets/azure_sql/tools/get_top_cpu_queries.py +3 -1
  28. holmes/plugins/toolsets/azure_sql/tools/get_top_data_io_queries.py +3 -1
  29. holmes/plugins/toolsets/azure_sql/tools/get_top_log_io_queries.py +3 -1
  30. holmes/plugins/toolsets/bash/bash_toolset.py +31 -20
  31. holmes/plugins/toolsets/confluence.yaml +1 -1
  32. holmes/plugins/toolsets/coralogix/api.py +3 -1
  33. holmes/plugins/toolsets/coralogix/toolset_coralogix_logs.py +4 -4
  34. holmes/plugins/toolsets/coralogix/utils.py +41 -14
  35. holmes/plugins/toolsets/datadog/datadog_api.py +45 -2
  36. holmes/plugins/toolsets/datadog/datadog_general_instructions.jinja2 +208 -0
  37. holmes/plugins/toolsets/datadog/datadog_logs_instructions.jinja2 +43 -0
  38. holmes/plugins/toolsets/datadog/datadog_metrics_instructions.jinja2 +12 -9
  39. holmes/plugins/toolsets/datadog/toolset_datadog_general.py +722 -0
  40. holmes/plugins/toolsets/datadog/toolset_datadog_logs.py +17 -6
  41. holmes/plugins/toolsets/datadog/toolset_datadog_metrics.py +15 -7
  42. holmes/plugins/toolsets/datadog/toolset_datadog_rds.py +6 -2
  43. holmes/plugins/toolsets/datadog/toolset_datadog_traces.py +9 -3
  44. holmes/plugins/toolsets/docker.yaml +1 -1
  45. holmes/plugins/toolsets/git.py +15 -5
  46. holmes/plugins/toolsets/grafana/toolset_grafana.py +25 -4
  47. holmes/plugins/toolsets/grafana/toolset_grafana_loki.py +4 -4
  48. holmes/plugins/toolsets/grafana/toolset_grafana_tempo.jinja2 +5 -3
  49. holmes/plugins/toolsets/grafana/toolset_grafana_tempo.py +299 -32
  50. holmes/plugins/toolsets/helm.yaml +1 -1
  51. holmes/plugins/toolsets/internet/internet.py +4 -2
  52. holmes/plugins/toolsets/internet/notion.py +4 -2
  53. holmes/plugins/toolsets/investigator/core_investigation.py +5 -17
  54. holmes/plugins/toolsets/investigator/investigator_instructions.jinja2 +1 -5
  55. holmes/plugins/toolsets/kafka.py +19 -7
  56. holmes/plugins/toolsets/kubernetes.yaml +5 -5
  57. holmes/plugins/toolsets/kubernetes_logs.py +4 -4
  58. holmes/plugins/toolsets/kubernetes_logs.yaml +1 -1
  59. holmes/plugins/toolsets/logging_utils/logging_api.py +15 -2
  60. holmes/plugins/toolsets/mcp/toolset_mcp.py +3 -1
  61. holmes/plugins/toolsets/newrelic.py +8 -4
  62. holmes/plugins/toolsets/opensearch/opensearch.py +13 -5
  63. holmes/plugins/toolsets/opensearch/opensearch_logs.py +4 -4
  64. holmes/plugins/toolsets/opensearch/opensearch_traces.py +9 -6
  65. holmes/plugins/toolsets/prometheus/prometheus.py +193 -82
  66. holmes/plugins/toolsets/rabbitmq/toolset_rabbitmq.py +7 -3
  67. holmes/plugins/toolsets/robusta/robusta.py +10 -4
  68. holmes/plugins/toolsets/runbook/runbook_fetcher.py +4 -2
  69. holmes/plugins/toolsets/servicenow/servicenow.py +9 -3
  70. holmes/plugins/toolsets/slab.yaml +1 -1
  71. {holmesgpt-0.13.1.dist-info → holmesgpt-0.13.3a0.dist-info}/METADATA +3 -2
  72. {holmesgpt-0.13.1.dist-info → holmesgpt-0.13.3a0.dist-info}/RECORD +75 -72
  73. holmes/core/todo_manager.py +0 -88
  74. {holmesgpt-0.13.1.dist-info → holmesgpt-0.13.3a0.dist-info}/LICENSE.txt +0 -0
  75. {holmesgpt-0.13.1.dist-info → holmesgpt-0.13.3a0.dist-info}/WHEEL +0 -0
  76. {holmesgpt-0.13.1.dist-info → holmesgpt-0.13.3a0.dist-info}/entry_points.txt +0 -0
holmes/__init__.py CHANGED
@@ -1,5 +1,5 @@
1
1
  # This is patched by github actions during release
2
- __version__ = "0.13.1"
2
+ __version__ = "0.13.3-alpha"
3
3
 
4
4
  # Re-export version functions from version module for backward compatibility
5
5
  from .version import (
holmes/common/env_vars.py CHANGED
@@ -67,3 +67,10 @@ MAX_OUTPUT_TOKEN_RESERVATION = int(
67
67
 
68
68
  # When using the bash tool, setting BASH_TOOL_UNSAFE_ALLOW_ALL will skip any command validation and run any command requested by the LLM
69
69
  BASH_TOOL_UNSAFE_ALLOW_ALL = load_bool("BASH_TOOL_UNSAFE_ALLOW_ALL", False)
70
+
71
+ LOG_LLM_USAGE_RESPONSE = load_bool("LOG_LLM_USAGE_RESPONSE", False)
72
+
73
+ # For CLI only, enable user approval for potentially sensitive commands that would otherwise be rejected
74
+ ENABLE_CLI_TOOL_APPROVAL = load_bool("ENABLE_CLI_TOOL_APPROVAL", True)
75
+
76
+ MAX_GRAPH_POINTS = float(os.environ.get("MAX_GRAPH_POINTS", 300))
holmes/config.py CHANGED
@@ -527,7 +527,9 @@ class Config(RobustaBaseConfig):
527
527
  if model_key
528
528
  else next(iter(self._model_list.values())).copy()
529
529
  )
530
- if model_params.get("is_robusta_model") and self.api_key:
530
+ is_robusta_model = model_params.pop("is_robusta_model", False)
531
+ if is_robusta_model and self.api_key:
532
+ # we set here the api_key since it is being refresh when exprided and not as part of the model loading.
531
533
  api_key = self.api_key.get_secret_value()
532
534
  else:
533
535
  api_key = model_params.pop("api_key", api_key)
@@ -133,7 +133,6 @@ def build_issue_chat_messages(
133
133
  "issue": issue_chat_request.issue_type,
134
134
  "toolsets": ai.tool_executor.toolsets,
135
135
  "cluster_name": config.cluster_name,
136
- "investigation_id": ai.investigation_id,
137
136
  },
138
137
  )
139
138
  messages = [
@@ -154,7 +153,6 @@ def build_issue_chat_messages(
154
153
  "issue": issue_chat_request.issue_type,
155
154
  "toolsets": ai.tool_executor.toolsets,
156
155
  "cluster_name": config.cluster_name,
157
- "investigation_id": ai.investigation_id,
158
156
  }
159
157
  system_prompt_without_tools = load_and_render_prompt(
160
158
  template_path, template_context_without_tools
@@ -188,7 +186,6 @@ def build_issue_chat_messages(
188
186
  "issue": issue_chat_request.issue_type,
189
187
  "toolsets": ai.tool_executor.toolsets,
190
188
  "cluster_name": config.cluster_name,
191
- "investigation_id": ai.investigation_id,
192
189
  }
193
190
  system_prompt_with_truncated_tools = load_and_render_prompt(
194
191
  template_path, truncated_template_context
@@ -230,7 +227,6 @@ def build_issue_chat_messages(
230
227
  "issue": issue_chat_request.issue_type,
231
228
  "toolsets": ai.tool_executor.toolsets,
232
229
  "cluster_name": config.cluster_name,
233
- "investigation_id": ai.investigation_id,
234
230
  }
235
231
  system_prompt_without_tools = load_and_render_prompt(
236
232
  template_path, template_context_without_tools
@@ -254,7 +250,6 @@ def build_issue_chat_messages(
254
250
  "issue": issue_chat_request.issue_type,
255
251
  "toolsets": ai.tool_executor.toolsets,
256
252
  "cluster_name": config.cluster_name,
257
- "investigation_id": ai.investigation_id,
258
253
  }
259
254
  system_prompt_with_truncated_tools = load_and_render_prompt(
260
255
  template_path, template_context
@@ -279,7 +274,6 @@ def add_or_update_system_prompt(
279
274
  context = {
280
275
  "toolsets": ai.tool_executor.toolsets,
281
276
  "cluster_name": config.cluster_name,
282
- "investigation_id": ai.investigation_id,
283
277
  }
284
278
 
285
279
  system_prompt = load_and_render_prompt(template_path, context)
@@ -471,7 +465,6 @@ def build_workload_health_chat_messages(
471
465
  "resource": resource,
472
466
  "toolsets": ai.tool_executor.toolsets,
473
467
  "cluster_name": config.cluster_name,
474
- "investigation_id": ai.investigation_id,
475
468
  },
476
469
  )
477
470
  messages = [
@@ -492,7 +485,6 @@ def build_workload_health_chat_messages(
492
485
  "resource": resource,
493
486
  "toolsets": ai.tool_executor.toolsets,
494
487
  "cluster_name": config.cluster_name,
495
- "investigation_id": ai.investigation_id,
496
488
  }
497
489
  system_prompt_without_tools = load_and_render_prompt(
498
490
  template_path, template_context_without_tools
@@ -526,7 +518,6 @@ def build_workload_health_chat_messages(
526
518
  "resource": resource,
527
519
  "toolsets": ai.tool_executor.toolsets,
528
520
  "cluster_name": config.cluster_name,
529
- "investigation_id": ai.investigation_id,
530
521
  }
531
522
  system_prompt_with_truncated_tools = load_and_render_prompt(
532
523
  template_path, truncated_template_context
@@ -568,7 +559,6 @@ def build_workload_health_chat_messages(
568
559
  "resource": resource,
569
560
  "toolsets": ai.tool_executor.toolsets,
570
561
  "cluster_name": config.cluster_name,
571
- "investigation_id": ai.investigation_id,
572
562
  }
573
563
  system_prompt_without_tools = load_and_render_prompt(
574
564
  template_path, template_context_without_tools
@@ -592,7 +582,6 @@ def build_workload_health_chat_messages(
592
582
  "resource": resource,
593
583
  "toolsets": ai.tool_executor.toolsets,
594
584
  "cluster_name": config.cluster_name,
595
- "investigation_id": ai.investigation_id,
596
585
  }
597
586
  system_prompt_with_truncated_tools = load_and_render_prompt(
598
587
  template_path, template_context
@@ -9,7 +9,6 @@ from holmes.core.models import InvestigateRequest, InvestigationResult
9
9
  from holmes.core.supabase_dal import SupabaseDal
10
10
  from holmes.core.tracing import DummySpan, SpanType
11
11
  from holmes.utils.global_instructions import add_global_instructions_to_user_prompt
12
- from holmes.core.todo_manager import get_todo_manager
13
12
 
14
13
  from holmes.core.investigation_structured_output import (
15
14
  DEFAULT_SECTIONS,
@@ -133,9 +132,6 @@ def get_investigation_context(
133
132
  else:
134
133
  logging.info("Structured output is disabled for this request")
135
134
 
136
- todo_manager = get_todo_manager()
137
- todo_context = todo_manager.format_tasks_for_prompt(ai.investigation_id)
138
-
139
135
  system_prompt = load_and_render_prompt(
140
136
  investigate_request.prompt_template,
141
137
  {
@@ -144,8 +140,6 @@ def get_investigation_context(
144
140
  "structured_output": request_structured_output_from_llm,
145
141
  "toolsets": ai.tool_executor.toolsets,
146
142
  "cluster_name": config.cluster_name,
147
- "todo_list": todo_context,
148
- "investigation_id": ai.investigation_id,
149
143
  },
150
144
  )
151
145
 
holmes/core/llm.py CHANGED
@@ -229,9 +229,11 @@ class DefaultLLM(LLM):
229
229
  ] # can be removed after next litelm version
230
230
 
231
231
  self.args.setdefault("temperature", temperature)
232
+
233
+ self._add_cache_control_to_last_message(messages)
234
+
232
235
  # Get the litellm module to use (wrapped or unwrapped)
233
236
  litellm_to_use = self.tracer.wrap_llm(litellm) if self.tracer else litellm
234
-
235
237
  result = litellm_to_use.completion(
236
238
  model=self.model,
237
239
  api_key=self.api_key,
@@ -266,3 +268,60 @@ class DefaultLLM(LLM):
266
268
  f"Couldn't find model's name {model_name} in litellm's model list, fallback to 4096 tokens for max_output_tokens"
267
269
  )
268
270
  return 4096
271
+
272
+ def _add_cache_control_to_last_message(
273
+ self, messages: List[Dict[str, Any]]
274
+ ) -> None:
275
+ """
276
+ Add cache_control to the last non-user message for Anthropic prompt caching.
277
+ Removes any existing cache_control from previous messages to avoid accumulation.
278
+ """
279
+ # First, remove any existing cache_control from all messages
280
+ for msg in messages:
281
+ content = msg.get("content")
282
+ if isinstance(content, list):
283
+ for block in content:
284
+ if isinstance(block, dict) and "cache_control" in block:
285
+ del block["cache_control"]
286
+ logging.debug(
287
+ f"Removed existing cache_control from {msg.get('role')} message"
288
+ )
289
+
290
+ # Find the last non-user message to add cache_control to.
291
+ # Adding cache_control to user message requires changing its structure, so we avoid it
292
+ # This avoids breaking parse_messages_tags which only processes user messages
293
+ target_msg = None
294
+ for msg in reversed(messages):
295
+ if msg.get("role") != "user":
296
+ target_msg = msg
297
+ break
298
+
299
+ if not target_msg:
300
+ logging.debug("No non-user message found for cache_control")
301
+ return
302
+
303
+ content = target_msg.get("content")
304
+
305
+ if content is None:
306
+ return
307
+
308
+ if isinstance(content, str):
309
+ # Convert string to structured format with cache_control
310
+ target_msg["content"] = [
311
+ {
312
+ "type": "text",
313
+ "text": content,
314
+ "cache_control": {"type": "ephemeral"},
315
+ }
316
+ ]
317
+ logging.debug(
318
+ f"Added cache_control to {target_msg.get('role')} message (converted from string)"
319
+ )
320
+ elif isinstance(content, list) and content:
321
+ # Add cache_control to the last content block
322
+ last_block = content[-1]
323
+ if isinstance(last_block, dict) and "type" in last_block:
324
+ last_block["cache_control"] = {"type": "ephemeral"}
325
+ logging.debug(
326
+ f"Added cache_control to {target_msg.get('role')} message (structured content)"
327
+ )
holmes/core/prompt.py CHANGED
@@ -40,7 +40,6 @@ def build_initial_ask_messages(
40
40
  initial_user_prompt: str,
41
41
  file_paths: Optional[List[Path]],
42
42
  tool_executor: Any, # ToolExecutor type
43
- investigation_id: str,
44
43
  runbooks: Union[RunbookCatalog, Dict, None] = None,
45
44
  system_prompt_additions: Optional[str] = None,
46
45
  ) -> List[Dict]:
@@ -60,7 +59,6 @@ def build_initial_ask_messages(
60
59
  "toolsets": tool_executor.toolsets,
61
60
  "runbooks": runbooks or {},
62
61
  "system_prompt_additions": system_prompt_additions or "",
63
- "investigation_id": investigation_id,
64
62
  }
65
63
  system_prompt_rendered = load_and_render_prompt(
66
64
  system_prompt_template, template_context
@@ -131,7 +131,7 @@ class SupabaseDal:
131
131
  raise Exception(
132
132
  "No robusta token provided to Holmes.\n"
133
133
  "Please set a valid Robusta UI token.\n "
134
- "See https://docs.robusta.dev/master/configuration/ai-analysis.html#choosing-and-configuring-an-ai-provider for instructions."
134
+ "See https://holmesgpt.dev/ai-providers/ for instructions."
135
135
  )
136
136
  env_replacement_token = get_env_replacement(token)
137
137
  if env_replacement_token:
@@ -143,7 +143,7 @@ class SupabaseDal:
143
143
  "Ensure your Helm chart or environment variables are set correctly.\n "
144
144
  "If you store the token in a secret, you must also pass "
145
145
  "the environment variable ROBUSTA_UI_TOKEN to Holmes.\n "
146
- "See https://docs.robusta.dev/master/configuration/ai-analysis.html#configuring-holmesgpt-access-to-saas-data for instructions."
146
+ "See https://holmesgpt.dev/data-sources/builtin-toolsets/robusta/ for instructions."
147
147
  )
148
148
  try:
149
149
  decoded = base64.b64decode(token)
@@ -0,0 +1,51 @@
1
+ from typing import List
2
+
3
+ from holmes.plugins.toolsets.investigator.model import Task, TaskStatus
4
+
5
+
6
+ def format_tasks(tasks: List[Task]) -> str:
7
+ """
8
+ Format tasks for tool response
9
+ Returns empty string if no tasks exist.
10
+ """
11
+ if not tasks:
12
+ return ""
13
+
14
+ status_order = {
15
+ TaskStatus.PENDING: 0,
16
+ TaskStatus.IN_PROGRESS: 1,
17
+ TaskStatus.COMPLETED: 2,
18
+ }
19
+
20
+ sorted_tasks = sorted(
21
+ tasks,
22
+ key=lambda t: (status_order.get(t.status, 3),),
23
+ )
24
+
25
+ lines = ["# CURRENT INVESTIGATION TASKS"]
26
+ lines.append("")
27
+
28
+ pending_count = sum(1 for t in tasks if t.status == TaskStatus.PENDING)
29
+ progress_count = sum(1 for t in tasks if t.status == TaskStatus.IN_PROGRESS)
30
+ completed_count = sum(1 for t in tasks if t.status == TaskStatus.COMPLETED)
31
+
32
+ lines.append(
33
+ f"**Task Status**: {completed_count} completed, {progress_count} in progress, {pending_count} pending"
34
+ )
35
+ lines.append("")
36
+
37
+ for task in sorted_tasks:
38
+ status_indicator = {
39
+ TaskStatus.PENDING: "[ ]",
40
+ TaskStatus.IN_PROGRESS: "[~]",
41
+ TaskStatus.COMPLETED: "[✓]",
42
+ }.get(task.status, "[?]")
43
+
44
+ lines.append(f"{status_indicator} [{task.id}] {task.content}")
45
+
46
+ lines.append("")
47
+ lines.append(
48
+ "**Instructions**: Use TodoWrite tool to update task status as you work. Mark tasks as 'in_progress' when starting, 'completed' when finished."
49
+ )
50
+
51
+ return "\n".join(lines)
@@ -2,8 +2,8 @@ import concurrent.futures
2
2
  import json
3
3
  import logging
4
4
  import textwrap
5
- import uuid
6
- from typing import Dict, List, Optional, Type, Union
5
+ from typing import Dict, List, Optional, Type, Union, Callable
6
+
7
7
 
8
8
  import sentry_sdk
9
9
  from openai import BadRequestError
@@ -13,7 +13,11 @@ from openai.types.chat.chat_completion_message_tool_call import (
13
13
  from pydantic import BaseModel, Field
14
14
  from rich.console import Console
15
15
 
16
- from holmes.common.env_vars import TEMPERATURE, MAX_OUTPUT_TOKEN_RESERVATION
16
+ from holmes.common.env_vars import (
17
+ TEMPERATURE,
18
+ MAX_OUTPUT_TOKEN_RESERVATION,
19
+ LOG_LLM_USAGE_RESPONSE,
20
+ )
17
21
 
18
22
  from holmes.core.investigation_structured_output import (
19
23
  DEFAULT_SECTIONS,
@@ -39,9 +43,6 @@ from holmes.core.tools_utils.tool_executor import ToolExecutor
39
43
  from holmes.core.tracing import DummySpan
40
44
  from holmes.utils.colors import AI_COLOR
41
45
  from holmes.utils.stream import StreamEvents, StreamMessage
42
- from holmes.core.todo_manager import (
43
- get_todo_manager,
44
- )
45
46
 
46
47
  # Create a named logger for cost tracking
47
48
  cost_logger = logging.getLogger("holmes.costs")
@@ -94,6 +95,8 @@ def _process_cost_info(
94
95
  usage = getattr(full_response, "usage", {})
95
96
 
96
97
  if usage:
98
+ if LOG_LLM_USAGE_RESPONSE: # shows stats on token cache usage
99
+ logging.info(f"LLM usage response:\n{usage}\n")
97
100
  prompt_toks = usage.get("prompt_tokens", 0)
98
101
  completion_toks = usage.get("completion_tokens", 0)
99
102
  total_toks = usage.get("total_tokens", 0)
@@ -283,7 +286,9 @@ class ToolCallingLLM:
283
286
  self.max_steps = max_steps
284
287
  self.tracer = tracer
285
288
  self.llm = llm
286
- self.investigation_id = str(uuid.uuid4())
289
+ self.approval_callback: Optional[
290
+ Callable[[StructuredToolResult], tuple[bool, Optional[str]]]
291
+ ] = None
287
292
 
288
293
  def prompt_call(
289
294
  self,
@@ -465,21 +470,35 @@ class ToolCallingLLM:
465
470
  perf_timing.measure("pre-tool-calls")
466
471
  with concurrent.futures.ThreadPoolExecutor(max_workers=16) as executor:
467
472
  futures = []
473
+ futures_tool_numbers: dict[
474
+ concurrent.futures.Future, Optional[int]
475
+ ] = {}
476
+ tool_number: Optional[int]
468
477
  for tool_index, t in enumerate(tools_to_call, 1):
469
478
  logging.debug(f"Tool to call: {t}")
470
- futures.append(
471
- executor.submit(
472
- self._invoke_tool,
473
- tool_to_call=t,
474
- previous_tool_calls=tool_calls,
475
- trace_span=trace_span,
476
- tool_number=tool_number_offset + tool_index,
477
- )
479
+ tool_number = tool_number_offset + tool_index
480
+ future = executor.submit(
481
+ self._invoke_llm_tool_call,
482
+ tool_to_call=t,
483
+ previous_tool_calls=tool_calls,
484
+ trace_span=trace_span,
485
+ tool_number=tool_number,
478
486
  )
487
+ futures_tool_numbers[future] = tool_number
488
+ futures.append(future)
479
489
 
480
490
  for future in concurrent.futures.as_completed(futures):
481
491
  tool_call_result: ToolCallResult = future.result()
482
492
 
493
+ tool_number = (
494
+ futures_tool_numbers[future]
495
+ if future in futures_tool_numbers
496
+ else None
497
+ )
498
+ tool_call_result = self.handle_tool_call_approval(
499
+ tool_call_result=tool_call_result, tool_number=tool_number
500
+ )
501
+
483
502
  tool_calls.append(tool_call_result.as_tool_result_response())
484
503
  messages.append(tool_call_result.as_tool_call_message())
485
504
 
@@ -494,7 +513,63 @@ class ToolCallingLLM:
494
513
 
495
514
  raise Exception(f"Too many LLM calls - exceeded max_steps: {i}/{max_steps}")
496
515
 
497
- def _invoke_tool(
516
+ def _directly_invoke_tool(
517
+ self,
518
+ tool_name: str,
519
+ tool_params: dict,
520
+ user_approved: bool,
521
+ trace_span=DummySpan(),
522
+ tool_number: Optional[int] = None,
523
+ ) -> StructuredToolResult:
524
+ tool_span = trace_span.start_span(name=tool_name, type="tool")
525
+ tool = self.tool_executor.get_tool_by_name(tool_name)
526
+ tool_response = None
527
+ try:
528
+ if (not tool) or (tool_params is None):
529
+ logging.warning(
530
+ f"Skipping tool execution for {tool_name}: args: {tool_params}"
531
+ )
532
+ tool_response = StructuredToolResult(
533
+ status=ToolResultStatus.ERROR,
534
+ error=f"Failed to find tool {tool_name}",
535
+ params=tool_params,
536
+ )
537
+ else:
538
+ tool_response = tool.invoke(
539
+ tool_params, tool_number=tool_number, user_approved=user_approved
540
+ )
541
+ except Exception as e:
542
+ logging.error(
543
+ f"Tool call to {tool_name} failed with an Exception", exc_info=True
544
+ )
545
+ tool_response = StructuredToolResult(
546
+ status=ToolResultStatus.ERROR,
547
+ error=f"Tool call failed: {e}",
548
+ params=tool_params,
549
+ )
550
+
551
+ # Log error to trace span
552
+ tool_span.log(
553
+ input=tool_params, output=str(e), metadata={"status": "ERROR"}
554
+ )
555
+
556
+ tool_span.log(
557
+ input=tool_params,
558
+ output=tool_response.data,
559
+ metadata={
560
+ "status": tool_response.status.value,
561
+ "error": tool_response.error,
562
+ "description": tool.get_parameterized_one_liner(tool_params)
563
+ if tool
564
+ else "",
565
+ "structured_tool_result": tool_response,
566
+ },
567
+ )
568
+ tool_span.end()
569
+
570
+ return tool_response
571
+
572
+ def _invoke_llm_tool_call(
498
573
  self,
499
574
  tool_to_call: ChatCompletionMessageToolCall,
500
575
  previous_tool_calls: list[dict],
@@ -523,92 +598,97 @@ class ToolCallingLLM:
523
598
  ),
524
599
  )
525
600
 
526
- tool_params = None
601
+ tool_params = {}
527
602
  try:
528
603
  tool_params = json.loads(tool_arguments)
529
604
  except Exception:
530
605
  logging.warning(
531
606
  f"Failed to parse arguments for tool: {tool_name}. args: {tool_arguments}"
532
607
  )
533
- tool_call_id = tool_to_call.id
534
- tool = self.tool_executor.get_tool_by_name(tool_name)
535
608
 
536
- if (not tool) or (tool_params is None):
537
- logging.warning(
538
- f"Skipping tool execution for {tool_name}: args: {tool_arguments}"
539
- )
540
- return ToolCallResult(
541
- tool_call_id=tool_call_id,
542
- tool_name=tool_name,
543
- description="NA",
544
- result=StructuredToolResult(
545
- status=ToolResultStatus.ERROR,
546
- error=f"Failed to find tool {tool_name}",
547
- params=tool_params,
548
- ),
549
- )
550
-
551
- tool_response = None
609
+ tool_call_id = tool_to_call.id
552
610
 
553
- # Create tool span if tracing is enabled
554
- tool_span = trace_span.start_span(name=tool_name, type="tool")
611
+ tool_response = prevent_overly_repeated_tool_call(
612
+ tool_name=tool_name,
613
+ tool_params=tool_params,
614
+ tool_calls=previous_tool_calls,
615
+ )
555
616
 
556
- try:
557
- tool_response = prevent_overly_repeated_tool_call(
558
- tool_name=tool.name,
617
+ if not tool_response:
618
+ tool_response = self._directly_invoke_tool(
619
+ tool_name=tool_name,
559
620
  tool_params=tool_params,
560
- tool_calls=previous_tool_calls,
621
+ user_approved=False,
622
+ trace_span=trace_span,
623
+ tool_number=tool_number,
561
624
  )
562
- if not tool_response:
563
- tool_response = tool.invoke(tool_params, tool_number=tool_number)
564
625
 
565
- if not isinstance(tool_response, StructuredToolResult):
566
- # Should never be needed but ensure Holmes does not crash if one of the tools does not return the right type
567
- logging.error(
568
- f"Tool {tool.name} return type is not StructuredToolResult. Nesting the tool result into StructuredToolResult..."
569
- )
570
- tool_response = StructuredToolResult(
571
- status=ToolResultStatus.SUCCESS,
572
- data=tool_response,
573
- params=tool_params,
574
- )
575
-
576
- # Log tool execution to trace span
577
- tool_span.log(
578
- input=tool_params,
579
- output=tool_response.data,
580
- metadata={
581
- "status": tool_response.status.value,
582
- "error": tool_response.error,
583
- "description": tool.get_parameterized_one_liner(tool_params),
584
- "structured_tool_result": tool_response,
585
- },
586
- )
587
-
588
- except Exception as e:
626
+ if not isinstance(tool_response, StructuredToolResult):
627
+ # Should never be needed but ensure Holmes does not crash if one of the tools does not return the right type
589
628
  logging.error(
590
- f"Tool call to {tool_name} failed with an Exception", exc_info=True
629
+ f"Tool {tool_name} return type is not StructuredToolResult. Nesting the tool result into StructuredToolResult..."
591
630
  )
592
631
  tool_response = StructuredToolResult(
593
- status=ToolResultStatus.ERROR,
594
- error=f"Tool call failed: {e}",
632
+ status=ToolResultStatus.SUCCESS,
633
+ data=tool_response,
595
634
  params=tool_params,
596
635
  )
597
636
 
598
- # Log error to trace span
599
- tool_span.log(
600
- input=tool_params, output=str(e), metadata={"status": "ERROR"}
601
- )
602
- finally:
603
- # End tool span
604
- tool_span.end()
637
+ tool = self.tool_executor.get_tool_by_name(tool_name)
605
638
  return ToolCallResult(
606
639
  tool_call_id=tool_call_id,
607
640
  tool_name=tool_name,
608
- description=tool.get_parameterized_one_liner(tool_params),
641
+ description=tool.get_parameterized_one_liner(tool_params) if tool else "",
609
642
  result=tool_response,
610
643
  )
611
644
 
645
+ def handle_tool_call_approval(
646
+ self, tool_call_result: ToolCallResult, tool_number: Optional[int]
647
+ ) -> ToolCallResult:
648
+ """
649
+ Handle approval for a single tool call if required.
650
+
651
+ Args:
652
+ tool_call_result: A single tool call result that may require approval
653
+
654
+ Returns:
655
+ Updated tool call result with approved/denied status
656
+ """
657
+
658
+ if tool_call_result.result.status != ToolResultStatus.APPROVAL_REQUIRED:
659
+ return tool_call_result
660
+
661
+ # If no approval callback, convert to ERROR because it is assumed the client may not be able to handle approvals
662
+ if not self.approval_callback:
663
+ tool_call_result.result.status = ToolResultStatus.ERROR
664
+ return tool_call_result
665
+
666
+ # Get approval from user
667
+ approved, feedback = self.approval_callback(tool_call_result.result)
668
+
669
+ if approved:
670
+ logging.debug(
671
+ f"User approved command: {tool_call_result.result.invocation}"
672
+ )
673
+
674
+ new_response = self._directly_invoke_tool(
675
+ tool_name=tool_call_result.tool_name,
676
+ tool_params=tool_call_result.result.params or {},
677
+ user_approved=True,
678
+ trace_span=DummySpan(),
679
+ tool_number=tool_number,
680
+ )
681
+ tool_call_result.result = new_response
682
+ else:
683
+ # User denied - update to error
684
+ feedback_text = f" User feedback: {feedback}" if feedback else ""
685
+ tool_call_result.result.status = ToolResultStatus.ERROR
686
+ tool_call_result.result.error = (
687
+ f"User denied command execution.{feedback_text}"
688
+ )
689
+
690
+ return tool_call_result
691
+
612
692
  @staticmethod
613
693
  def __load_post_processing_user_prompt(
614
694
  input_prompt, investigation, user_prompt: Optional[str] = None
@@ -787,15 +867,15 @@ class ToolCallingLLM:
787
867
  with concurrent.futures.ThreadPoolExecutor(max_workers=16) as executor:
788
868
  futures = []
789
869
  for tool_index, t in enumerate(tools_to_call, 1): # type: ignore
790
- futures.append(
791
- executor.submit(
792
- self._invoke_tool,
793
- tool_to_call=t, # type: ignore
794
- previous_tool_calls=tool_calls,
795
- trace_span=DummySpan(), # Streaming mode doesn't support tracing yet
796
- tool_number=tool_number_offset + tool_index,
797
- )
870
+ tool_number = tool_number_offset + tool_index
871
+ future = executor.submit(
872
+ self._invoke_llm_tool_call,
873
+ tool_to_call=t, # type: ignore
874
+ previous_tool_calls=tool_calls,
875
+ trace_span=DummySpan(), # Streaming mode doesn't support tracing yet
876
+ tool_number=tool_number,
798
877
  )
878
+ futures.append(future)
799
879
  yield StreamMessage(
800
880
  event=StreamEvents.START_TOOL,
801
881
  data={"tool_name": t.function.name, "id": t.id},
@@ -894,9 +974,6 @@ class IssueInvestigator(ToolCallingLLM):
894
974
  "[bold]No runbooks found for this issue. Using default behaviour. (Add runbooks to guide the investigation.)[/bold]"
895
975
  )
896
976
 
897
- todo_manager = get_todo_manager()
898
- todo_context = todo_manager.format_tasks_for_prompt(self.investigation_id)
899
-
900
977
  system_prompt = load_and_render_prompt(
901
978
  prompt,
902
979
  {
@@ -905,8 +982,6 @@ class IssueInvestigator(ToolCallingLLM):
905
982
  "structured_output": request_structured_output_from_llm,
906
983
  "toolsets": self.tool_executor.toolsets,
907
984
  "cluster_name": self.cluster_name,
908
- "todo_list": todo_context,
909
- "investigation_id": self.investigation_id,
910
985
  },
911
986
  )
912
987