holmesgpt 0.13.0__py3-none-any.whl → 0.13.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (118) hide show
  1. holmes/__init__.py +1 -1
  2. holmes/common/env_vars.py +11 -0
  3. holmes/config.py +3 -1
  4. holmes/core/conversations.py +0 -11
  5. holmes/core/investigation.py +0 -6
  6. holmes/core/llm.py +63 -2
  7. holmes/core/prompt.py +0 -2
  8. holmes/core/supabase_dal.py +2 -2
  9. holmes/core/todo_tasks_formatter.py +51 -0
  10. holmes/core/tool_calling_llm.py +277 -101
  11. holmes/core/tools.py +20 -4
  12. holmes/core/toolset_manager.py +1 -5
  13. holmes/core/tracing.py +1 -1
  14. holmes/interactive.py +63 -2
  15. holmes/main.py +7 -2
  16. holmes/plugins/prompts/_fetch_logs.jinja2 +4 -0
  17. holmes/plugins/prompts/_general_instructions.jinja2 +3 -1
  18. holmes/plugins/prompts/investigation_procedure.jinja2 +3 -13
  19. holmes/plugins/runbooks/CLAUDE.md +85 -0
  20. holmes/plugins/runbooks/README.md +24 -0
  21. holmes/plugins/toolsets/__init__.py +5 -1
  22. holmes/plugins/toolsets/argocd.yaml +1 -1
  23. holmes/plugins/toolsets/atlas_mongodb/mongodb_atlas.py +18 -6
  24. holmes/plugins/toolsets/aws.yaml +9 -5
  25. holmes/plugins/toolsets/azure_sql/tools/analyze_connection_failures.py +3 -1
  26. holmes/plugins/toolsets/azure_sql/tools/analyze_database_connections.py +3 -1
  27. holmes/plugins/toolsets/azure_sql/tools/analyze_database_health_status.py +3 -1
  28. holmes/plugins/toolsets/azure_sql/tools/analyze_database_performance.py +3 -1
  29. holmes/plugins/toolsets/azure_sql/tools/analyze_database_storage.py +3 -1
  30. holmes/plugins/toolsets/azure_sql/tools/get_active_alerts.py +3 -1
  31. holmes/plugins/toolsets/azure_sql/tools/get_slow_queries.py +3 -1
  32. holmes/plugins/toolsets/azure_sql/tools/get_top_cpu_queries.py +3 -1
  33. holmes/plugins/toolsets/azure_sql/tools/get_top_data_io_queries.py +3 -1
  34. holmes/plugins/toolsets/azure_sql/tools/get_top_log_io_queries.py +3 -1
  35. holmes/plugins/toolsets/bash/argocd/__init__.py +65 -0
  36. holmes/plugins/toolsets/bash/argocd/constants.py +120 -0
  37. holmes/plugins/toolsets/bash/aws/__init__.py +66 -0
  38. holmes/plugins/toolsets/bash/aws/constants.py +529 -0
  39. holmes/plugins/toolsets/bash/azure/__init__.py +56 -0
  40. holmes/plugins/toolsets/bash/azure/constants.py +339 -0
  41. holmes/plugins/toolsets/bash/bash_instructions.jinja2 +6 -7
  42. holmes/plugins/toolsets/bash/bash_toolset.py +62 -17
  43. holmes/plugins/toolsets/bash/common/bash_command.py +131 -0
  44. holmes/plugins/toolsets/bash/common/stringify.py +14 -1
  45. holmes/plugins/toolsets/bash/common/validators.py +91 -0
  46. holmes/plugins/toolsets/bash/docker/__init__.py +59 -0
  47. holmes/plugins/toolsets/bash/docker/constants.py +255 -0
  48. holmes/plugins/toolsets/bash/helm/__init__.py +61 -0
  49. holmes/plugins/toolsets/bash/helm/constants.py +92 -0
  50. holmes/plugins/toolsets/bash/kubectl/__init__.py +80 -79
  51. holmes/plugins/toolsets/bash/kubectl/constants.py +0 -14
  52. holmes/plugins/toolsets/bash/kubectl/kubectl_describe.py +38 -56
  53. holmes/plugins/toolsets/bash/kubectl/kubectl_events.py +28 -76
  54. holmes/plugins/toolsets/bash/kubectl/kubectl_get.py +39 -99
  55. holmes/plugins/toolsets/bash/kubectl/kubectl_logs.py +34 -15
  56. holmes/plugins/toolsets/bash/kubectl/kubectl_run.py +1 -1
  57. holmes/plugins/toolsets/bash/kubectl/kubectl_top.py +38 -77
  58. holmes/plugins/toolsets/bash/parse_command.py +106 -32
  59. holmes/plugins/toolsets/bash/utilities/__init__.py +0 -0
  60. holmes/plugins/toolsets/bash/utilities/base64_util.py +12 -0
  61. holmes/plugins/toolsets/bash/utilities/cut.py +12 -0
  62. holmes/plugins/toolsets/bash/utilities/grep/__init__.py +10 -0
  63. holmes/plugins/toolsets/bash/utilities/head.py +12 -0
  64. holmes/plugins/toolsets/bash/utilities/jq.py +79 -0
  65. holmes/plugins/toolsets/bash/utilities/sed.py +164 -0
  66. holmes/plugins/toolsets/bash/utilities/sort.py +15 -0
  67. holmes/plugins/toolsets/bash/utilities/tail.py +12 -0
  68. holmes/plugins/toolsets/bash/utilities/tr.py +57 -0
  69. holmes/plugins/toolsets/bash/utilities/uniq.py +12 -0
  70. holmes/plugins/toolsets/bash/utilities/wc.py +12 -0
  71. holmes/plugins/toolsets/confluence.yaml +1 -1
  72. holmes/plugins/toolsets/coralogix/api.py +3 -1
  73. holmes/plugins/toolsets/coralogix/toolset_coralogix_logs.py +4 -4
  74. holmes/plugins/toolsets/coralogix/utils.py +41 -14
  75. holmes/plugins/toolsets/datadog/datadog_api.py +45 -2
  76. holmes/plugins/toolsets/datadog/datadog_general_instructions.jinja2 +208 -0
  77. holmes/plugins/toolsets/datadog/datadog_logs_instructions.jinja2 +43 -0
  78. holmes/plugins/toolsets/datadog/datadog_metrics_instructions.jinja2 +12 -9
  79. holmes/plugins/toolsets/datadog/toolset_datadog_general.py +722 -0
  80. holmes/plugins/toolsets/datadog/toolset_datadog_logs.py +17 -6
  81. holmes/plugins/toolsets/datadog/toolset_datadog_metrics.py +15 -7
  82. holmes/plugins/toolsets/datadog/toolset_datadog_rds.py +6 -2
  83. holmes/plugins/toolsets/datadog/toolset_datadog_traces.py +9 -3
  84. holmes/plugins/toolsets/docker.yaml +1 -1
  85. holmes/plugins/toolsets/git.py +15 -5
  86. holmes/plugins/toolsets/grafana/toolset_grafana.py +25 -4
  87. holmes/plugins/toolsets/grafana/toolset_grafana_loki.py +4 -4
  88. holmes/plugins/toolsets/grafana/toolset_grafana_tempo.jinja2 +5 -3
  89. holmes/plugins/toolsets/grafana/toolset_grafana_tempo.py +299 -32
  90. holmes/plugins/toolsets/helm.yaml +1 -1
  91. holmes/plugins/toolsets/internet/internet.py +4 -2
  92. holmes/plugins/toolsets/internet/notion.py +4 -2
  93. holmes/plugins/toolsets/investigator/core_investigation.py +5 -17
  94. holmes/plugins/toolsets/investigator/investigator_instructions.jinja2 +1 -5
  95. holmes/plugins/toolsets/kafka.py +19 -7
  96. holmes/plugins/toolsets/kubernetes.yaml +5 -5
  97. holmes/plugins/toolsets/kubernetes_logs.py +4 -4
  98. holmes/plugins/toolsets/kubernetes_logs.yaml +1 -1
  99. holmes/plugins/toolsets/logging_utils/logging_api.py +15 -2
  100. holmes/plugins/toolsets/mcp/toolset_mcp.py +3 -1
  101. holmes/plugins/toolsets/newrelic.py +8 -4
  102. holmes/plugins/toolsets/opensearch/opensearch.py +13 -5
  103. holmes/plugins/toolsets/opensearch/opensearch_logs.py +4 -4
  104. holmes/plugins/toolsets/opensearch/opensearch_traces.py +9 -6
  105. holmes/plugins/toolsets/prometheus/prometheus.py +198 -57
  106. holmes/plugins/toolsets/rabbitmq/toolset_rabbitmq.py +7 -3
  107. holmes/plugins/toolsets/robusta/robusta.py +10 -4
  108. holmes/plugins/toolsets/runbook/runbook_fetcher.py +4 -2
  109. holmes/plugins/toolsets/servicenow/servicenow.py +9 -3
  110. holmes/plugins/toolsets/slab.yaml +1 -1
  111. holmes/utils/console/logging.py +6 -1
  112. {holmesgpt-0.13.0.dist-info → holmesgpt-0.13.2.dist-info}/METADATA +3 -2
  113. {holmesgpt-0.13.0.dist-info → holmesgpt-0.13.2.dist-info}/RECORD +116 -90
  114. holmes/core/todo_manager.py +0 -88
  115. holmes/plugins/toolsets/bash/grep/__init__.py +0 -52
  116. {holmesgpt-0.13.0.dist-info → holmesgpt-0.13.2.dist-info}/LICENSE.txt +0 -0
  117. {holmesgpt-0.13.0.dist-info → holmesgpt-0.13.2.dist-info}/WHEEL +0 -0
  118. {holmesgpt-0.13.0.dist-info → holmesgpt-0.13.2.dist-info}/entry_points.txt +0 -0
@@ -2,18 +2,22 @@ import concurrent.futures
2
2
  import json
3
3
  import logging
4
4
  import textwrap
5
- import uuid
6
- from typing import Dict, List, Optional, Type, Union
5
+ from typing import Dict, List, Optional, Type, Union, Callable
6
+
7
7
 
8
8
  import sentry_sdk
9
9
  from openai import BadRequestError
10
10
  from openai.types.chat.chat_completion_message_tool_call import (
11
11
  ChatCompletionMessageToolCall,
12
12
  )
13
- from pydantic import BaseModel
13
+ from pydantic import BaseModel, Field
14
14
  from rich.console import Console
15
15
 
16
- from holmes.common.env_vars import TEMPERATURE, MAX_OUTPUT_TOKEN_RESERVATION
16
+ from holmes.common.env_vars import (
17
+ TEMPERATURE,
18
+ MAX_OUTPUT_TOKEN_RESERVATION,
19
+ LOG_LLM_USAGE_RESPONSE,
20
+ )
17
21
 
18
22
  from holmes.core.investigation_structured_output import (
19
23
  DEFAULT_SECTIONS,
@@ -39,9 +43,80 @@ from holmes.core.tools_utils.tool_executor import ToolExecutor
39
43
  from holmes.core.tracing import DummySpan
40
44
  from holmes.utils.colors import AI_COLOR
41
45
  from holmes.utils.stream import StreamEvents, StreamMessage
42
- from holmes.core.todo_manager import (
43
- get_todo_manager,
44
- )
46
+
47
+ # Create a named logger for cost tracking
48
+ cost_logger = logging.getLogger("holmes.costs")
49
+
50
+
51
+ class LLMCosts(BaseModel):
52
+ """Tracks cost and token usage for LLM calls."""
53
+
54
+ total_cost: float = 0.0
55
+ total_tokens: int = 0
56
+ prompt_tokens: int = 0
57
+ completion_tokens: int = 0
58
+
59
+
60
+ def _extract_cost_from_response(full_response) -> float:
61
+ """Extract cost value from LLM response.
62
+
63
+ Args:
64
+ full_response: The raw LLM response object
65
+
66
+ Returns:
67
+ The cost as a float, or 0.0 if not available
68
+ """
69
+ try:
70
+ cost_value = (
71
+ full_response._hidden_params.get("response_cost", 0)
72
+ if hasattr(full_response, "_hidden_params")
73
+ else 0
74
+ )
75
+ # Ensure cost is a float
76
+ return float(cost_value) if cost_value is not None else 0.0
77
+ except Exception:
78
+ return 0.0
79
+
80
+
81
+ def _process_cost_info(
82
+ full_response, costs: Optional[LLMCosts] = None, log_prefix: str = "LLM call"
83
+ ) -> None:
84
+ """Process cost and token information from LLM response.
85
+
86
+ Logs the cost information and optionally accumulates it into a costs object.
87
+
88
+ Args:
89
+ full_response: The raw LLM response object
90
+ costs: Optional LLMCosts object to accumulate costs into
91
+ log_prefix: Prefix for logging messages (e.g., "LLM call", "Post-processing")
92
+ """
93
+ try:
94
+ cost = _extract_cost_from_response(full_response)
95
+ usage = getattr(full_response, "usage", {})
96
+
97
+ if usage:
98
+ if LOG_LLM_USAGE_RESPONSE: # shows stats on token cache usage
99
+ logging.info(f"LLM usage response:\n{usage}\n")
100
+ prompt_toks = usage.get("prompt_tokens", 0)
101
+ completion_toks = usage.get("completion_tokens", 0)
102
+ total_toks = usage.get("total_tokens", 0)
103
+ cost_logger.debug(
104
+ f"{log_prefix} cost: ${cost:.6f} | Tokens: {prompt_toks} prompt + {completion_toks} completion = {total_toks} total"
105
+ )
106
+ # Accumulate costs and tokens if costs object provided
107
+ if costs:
108
+ costs.total_cost += cost
109
+ costs.prompt_tokens += prompt_toks
110
+ costs.completion_tokens += completion_toks
111
+ costs.total_tokens += total_toks
112
+ elif cost > 0:
113
+ cost_logger.debug(
114
+ f"{log_prefix} cost: ${cost:.6f} | Token usage not available"
115
+ )
116
+ if costs:
117
+ costs.total_cost += cost
118
+ except Exception as e:
119
+ logging.debug(f"Could not extract cost information: {e}")
45
120
 
46
121
 
47
122
  def format_tool_result_data(tool_result: StructuredToolResult) -> str:
@@ -186,11 +261,11 @@ class ToolCallResult(BaseModel):
186
261
  }
187
262
 
188
263
 
189
- class LLMResult(BaseModel):
264
+ class LLMResult(LLMCosts):
190
265
  tool_calls: Optional[List[ToolCallResult]] = None
191
266
  result: Optional[str] = None
192
267
  unprocessed_result: Optional[str] = None
193
- instructions: List[str] = []
268
+ instructions: List[str] = Field(default_factory=list)
194
269
  # TODO: clean up these two
195
270
  prompt: Optional[str] = None
196
271
  messages: Optional[List[dict]] = None
@@ -211,7 +286,9 @@ class ToolCallingLLM:
211
286
  self.max_steps = max_steps
212
287
  self.tracer = tracer
213
288
  self.llm = llm
214
- self.investigation_id = str(uuid.uuid4())
289
+ self.approval_callback: Optional[
290
+ Callable[[StructuredToolResult], tuple[bool, Optional[str]]]
291
+ ] = None
215
292
 
216
293
  def prompt_call(
217
294
  self,
@@ -259,6 +336,8 @@ class ToolCallingLLM:
259
336
  ) -> LLMResult:
260
337
  perf_timing = PerformanceTiming("tool_calling_llm.call")
261
338
  tool_calls = [] # type: ignore
339
+ costs = LLMCosts()
340
+
262
341
  tools = self.tool_executor.get_all_tools_openai_format(
263
342
  target_model=self.llm.model
264
343
  )
@@ -299,6 +378,9 @@ class ToolCallingLLM:
299
378
  )
300
379
  logging.debug(f"got response {full_response.to_json()}") # type: ignore
301
380
 
381
+ # Extract and accumulate cost information
382
+ _process_cost_info(full_response, costs, "LLM call")
383
+
302
384
  perf_timing.measure("llm.completion")
303
385
  # catch a known error that occurs with Azure and replace the error message with something more obvious to the user
304
386
  except BadRequestError as e:
@@ -352,11 +434,14 @@ class ToolCallingLLM:
352
434
  if post_process_prompt and user_prompt:
353
435
  logging.info("Running post processing on investigation.")
354
436
  raw_response = text_response
355
- post_processed_response = self._post_processing_call(
356
- prompt=user_prompt,
357
- investigation=raw_response,
358
- user_prompt=post_process_prompt,
437
+ post_processed_response, post_processing_cost = (
438
+ self._post_processing_call(
439
+ prompt=user_prompt,
440
+ investigation=raw_response,
441
+ user_prompt=post_process_prompt,
442
+ )
359
443
  )
444
+ costs.total_cost += post_processing_cost
360
445
 
361
446
  perf_timing.end(f"- completed in {i} iterations -")
362
447
  return LLMResult(
@@ -365,6 +450,7 @@ class ToolCallingLLM:
365
450
  tool_calls=tool_calls,
366
451
  prompt=json.dumps(messages, indent=2),
367
452
  messages=messages,
453
+ **costs.model_dump(), # Include all cost fields
368
454
  )
369
455
 
370
456
  perf_timing.end(f"- completed in {i} iterations -")
@@ -373,6 +459,7 @@ class ToolCallingLLM:
373
459
  tool_calls=tool_calls,
374
460
  prompt=json.dumps(messages, indent=2),
375
461
  messages=messages,
462
+ **costs.model_dump(), # Include all cost fields
376
463
  )
377
464
 
378
465
  if text_response and text_response.strip():
@@ -383,33 +470,106 @@ class ToolCallingLLM:
383
470
  perf_timing.measure("pre-tool-calls")
384
471
  with concurrent.futures.ThreadPoolExecutor(max_workers=16) as executor:
385
472
  futures = []
473
+ futures_tool_numbers: dict[
474
+ concurrent.futures.Future, Optional[int]
475
+ ] = {}
476
+ tool_number: Optional[int]
386
477
  for tool_index, t in enumerate(tools_to_call, 1):
387
478
  logging.debug(f"Tool to call: {t}")
388
- futures.append(
389
- executor.submit(
390
- self._invoke_tool,
391
- tool_to_call=t,
392
- previous_tool_calls=tool_calls,
393
- trace_span=trace_span,
394
- tool_number=tool_number_offset + tool_index,
395
- )
479
+ tool_number = tool_number_offset + tool_index
480
+ future = executor.submit(
481
+ self._invoke_llm_tool_call,
482
+ tool_to_call=t,
483
+ previous_tool_calls=tool_calls,
484
+ trace_span=trace_span,
485
+ tool_number=tool_number,
396
486
  )
487
+ futures_tool_numbers[future] = tool_number
488
+ futures.append(future)
397
489
 
398
490
  for future in concurrent.futures.as_completed(futures):
399
491
  tool_call_result: ToolCallResult = future.result()
400
492
 
493
+ tool_number = (
494
+ futures_tool_numbers[future]
495
+ if future in futures_tool_numbers
496
+ else None
497
+ )
498
+ tool_call_result = self.handle_tool_call_approval(
499
+ tool_call_result=tool_call_result, tool_number=tool_number
500
+ )
501
+
401
502
  tool_calls.append(tool_call_result.as_tool_result_response())
402
503
  messages.append(tool_call_result.as_tool_call_message())
403
504
 
404
505
  perf_timing.measure(f"tool completed {tool_call_result.tool_name}")
405
506
 
507
+ # Update the tool number offset for the next iteration
508
+ tool_number_offset += len(tools_to_call)
509
+
406
510
  # Add a blank line after all tools in this batch complete
407
511
  if tools_to_call:
408
512
  logging.info("")
409
513
 
410
514
  raise Exception(f"Too many LLM calls - exceeded max_steps: {i}/{max_steps}")
411
515
 
412
- def _invoke_tool(
516
+ def _directly_invoke_tool(
517
+ self,
518
+ tool_name: str,
519
+ tool_params: dict,
520
+ user_approved: bool,
521
+ trace_span=DummySpan(),
522
+ tool_number: Optional[int] = None,
523
+ ) -> StructuredToolResult:
524
+ tool_span = trace_span.start_span(name=tool_name, type="tool")
525
+ tool = self.tool_executor.get_tool_by_name(tool_name)
526
+ tool_response = None
527
+ try:
528
+ if (not tool) or (tool_params is None):
529
+ logging.warning(
530
+ f"Skipping tool execution for {tool_name}: args: {tool_params}"
531
+ )
532
+ tool_response = StructuredToolResult(
533
+ status=ToolResultStatus.ERROR,
534
+ error=f"Failed to find tool {tool_name}",
535
+ params=tool_params,
536
+ )
537
+ else:
538
+ tool_response = tool.invoke(
539
+ tool_params, tool_number=tool_number, user_approved=user_approved
540
+ )
541
+ except Exception as e:
542
+ logging.error(
543
+ f"Tool call to {tool_name} failed with an Exception", exc_info=True
544
+ )
545
+ tool_response = StructuredToolResult(
546
+ status=ToolResultStatus.ERROR,
547
+ error=f"Tool call failed: {e}",
548
+ params=tool_params,
549
+ )
550
+
551
+ # Log error to trace span
552
+ tool_span.log(
553
+ input=tool_params, output=str(e), metadata={"status": "ERROR"}
554
+ )
555
+
556
+ tool_span.log(
557
+ input=tool_params,
558
+ output=tool_response.data,
559
+ metadata={
560
+ "status": tool_response.status.value,
561
+ "error": tool_response.error,
562
+ "description": tool.get_parameterized_one_liner(tool_params)
563
+ if tool
564
+ else "",
565
+ "structured_tool_result": tool_response,
566
+ },
567
+ )
568
+ tool_span.end()
569
+
570
+ return tool_response
571
+
572
+ def _invoke_llm_tool_call(
413
573
  self,
414
574
  tool_to_call: ChatCompletionMessageToolCall,
415
575
  previous_tool_calls: list[dict],
@@ -438,92 +598,97 @@ class ToolCallingLLM:
438
598
  ),
439
599
  )
440
600
 
441
- tool_params = None
601
+ tool_params = {}
442
602
  try:
443
603
  tool_params = json.loads(tool_arguments)
444
604
  except Exception:
445
605
  logging.warning(
446
606
  f"Failed to parse arguments for tool: {tool_name}. args: {tool_arguments}"
447
607
  )
448
- tool_call_id = tool_to_call.id
449
- tool = self.tool_executor.get_tool_by_name(tool_name)
450
-
451
- if (not tool) or (tool_params is None):
452
- logging.warning(
453
- f"Skipping tool execution for {tool_name}: args: {tool_arguments}"
454
- )
455
- return ToolCallResult(
456
- tool_call_id=tool_call_id,
457
- tool_name=tool_name,
458
- description="NA",
459
- result=StructuredToolResult(
460
- status=ToolResultStatus.ERROR,
461
- error=f"Failed to find tool {tool_name}",
462
- params=tool_params,
463
- ),
464
- )
465
608
 
466
- tool_response = None
609
+ tool_call_id = tool_to_call.id
467
610
 
468
- # Create tool span if tracing is enabled
469
- tool_span = trace_span.start_span(name=tool_name, type="tool")
611
+ tool_response = prevent_overly_repeated_tool_call(
612
+ tool_name=tool_name,
613
+ tool_params=tool_params,
614
+ tool_calls=previous_tool_calls,
615
+ )
470
616
 
471
- try:
472
- tool_response = prevent_overly_repeated_tool_call(
473
- tool_name=tool.name,
617
+ if not tool_response:
618
+ tool_response = self._directly_invoke_tool(
619
+ tool_name=tool_name,
474
620
  tool_params=tool_params,
475
- tool_calls=previous_tool_calls,
476
- )
477
- if not tool_response:
478
- tool_response = tool.invoke(tool_params, tool_number=tool_number)
479
-
480
- if not isinstance(tool_response, StructuredToolResult):
481
- # Should never be needed but ensure Holmes does not crash if one of the tools does not return the right type
482
- logging.error(
483
- f"Tool {tool.name} return type is not StructuredToolResult. Nesting the tool result into StructuredToolResult..."
484
- )
485
- tool_response = StructuredToolResult(
486
- status=ToolResultStatus.SUCCESS,
487
- data=tool_response,
488
- params=tool_params,
489
- )
490
-
491
- # Log tool execution to trace span
492
- tool_span.log(
493
- input=tool_params,
494
- output=tool_response.data,
495
- metadata={
496
- "status": tool_response.status.value,
497
- "error": tool_response.error,
498
- "description": tool.get_parameterized_one_liner(tool_params),
499
- "structured_tool_result": tool_response,
500
- },
621
+ user_approved=False,
622
+ trace_span=trace_span,
623
+ tool_number=tool_number,
501
624
  )
502
625
 
503
- except Exception as e:
626
+ if not isinstance(tool_response, StructuredToolResult):
627
+ # Should never be needed but ensure Holmes does not crash if one of the tools does not return the right type
504
628
  logging.error(
505
- f"Tool call to {tool_name} failed with an Exception", exc_info=True
629
+ f"Tool {tool_name} return type is not StructuredToolResult. Nesting the tool result into StructuredToolResult..."
506
630
  )
507
631
  tool_response = StructuredToolResult(
508
- status=ToolResultStatus.ERROR,
509
- error=f"Tool call failed: {e}",
632
+ status=ToolResultStatus.SUCCESS,
633
+ data=tool_response,
510
634
  params=tool_params,
511
635
  )
512
636
 
513
- # Log error to trace span
514
- tool_span.log(
515
- input=tool_params, output=str(e), metadata={"status": "ERROR"}
516
- )
517
- finally:
518
- # End tool span
519
- tool_span.end()
637
+ tool = self.tool_executor.get_tool_by_name(tool_name)
520
638
  return ToolCallResult(
521
639
  tool_call_id=tool_call_id,
522
640
  tool_name=tool_name,
523
- description=tool.get_parameterized_one_liner(tool_params),
641
+ description=tool.get_parameterized_one_liner(tool_params) if tool else "",
524
642
  result=tool_response,
525
643
  )
526
644
 
645
+ def handle_tool_call_approval(
646
+ self, tool_call_result: ToolCallResult, tool_number: Optional[int]
647
+ ) -> ToolCallResult:
648
+ """
649
+ Handle approval for a single tool call if required.
650
+
651
+ Args:
652
+ tool_call_result: A single tool call result that may require approval
653
+
654
+ Returns:
655
+ Updated tool call result with approved/denied status
656
+ """
657
+
658
+ if tool_call_result.result.status != ToolResultStatus.APPROVAL_REQUIRED:
659
+ return tool_call_result
660
+
661
+ # If no approval callback, convert to ERROR because it is assumed the client may not be able to handle approvals
662
+ if not self.approval_callback:
663
+ tool_call_result.result.status = ToolResultStatus.ERROR
664
+ return tool_call_result
665
+
666
+ # Get approval from user
667
+ approved, feedback = self.approval_callback(tool_call_result.result)
668
+
669
+ if approved:
670
+ logging.debug(
671
+ f"User approved command: {tool_call_result.result.invocation}"
672
+ )
673
+
674
+ new_response = self._directly_invoke_tool(
675
+ tool_name=tool_call_result.tool_name,
676
+ tool_params=tool_call_result.result.params or {},
677
+ user_approved=True,
678
+ trace_span=DummySpan(),
679
+ tool_number=tool_number,
680
+ )
681
+ tool_call_result.result = new_response
682
+ else:
683
+ # User denied - update to error
684
+ feedback_text = f" User feedback: {feedback}" if feedback else ""
685
+ tool_call_result.result.status = ToolResultStatus.ERROR
686
+ tool_call_result.result.error = (
687
+ f"User denied command execution.{feedback_text}"
688
+ )
689
+
690
+ return tool_call_result
691
+
527
692
  @staticmethod
528
693
  def __load_post_processing_user_prompt(
529
694
  input_prompt, investigation, user_prompt: Optional[str] = None
@@ -540,7 +705,7 @@ class ToolCallingLLM:
540
705
  investigation,
541
706
  user_prompt: Optional[str] = None,
542
707
  system_prompt: str = "You are an AI assistant summarizing Kubernetes issues.",
543
- ) -> Optional[str]:
708
+ ) -> tuple[Optional[str], float]:
544
709
  try:
545
710
  user_prompt = ToolCallingLLM.__load_post_processing_user_prompt(
546
711
  prompt, investigation, user_prompt
@@ -559,10 +724,18 @@ class ToolCallingLLM:
559
724
  ]
560
725
  full_response = self.llm.completion(messages=messages, temperature=0)
561
726
  logging.debug(f"Post processing response {full_response}")
562
- return full_response.choices[0].message.content # type: ignore
727
+
728
+ # Extract and log cost information for post-processing
729
+ post_processing_cost = _extract_cost_from_response(full_response)
730
+ if post_processing_cost > 0:
731
+ cost_logger.debug(
732
+ f"Post-processing LLM cost: ${post_processing_cost:.6f}"
733
+ )
734
+
735
+ return full_response.choices[0].message.content, post_processing_cost # type: ignore
563
736
  except Exception:
564
737
  logging.exception("Failed to run post processing", exc_info=True)
565
- return investigation
738
+ return investigation, 0.0
566
739
 
567
740
  @sentry_sdk.trace
568
741
  def truncate_messages_to_fit_context(
@@ -602,6 +775,7 @@ class ToolCallingLLM:
602
775
  perf_timing.measure("get_all_tools_openai_format")
603
776
  max_steps = self.max_steps
604
777
  i = 0
778
+ tool_number_offset = 0
605
779
 
606
780
  while i < max_steps:
607
781
  i += 1
@@ -634,6 +808,10 @@ class ToolCallingLLM:
634
808
  stream=False,
635
809
  drop_params=True,
636
810
  )
811
+
812
+ # Log cost information for this iteration (no accumulation in streaming)
813
+ _process_cost_info(full_response, log_prefix="LLM iteration")
814
+
637
815
  perf_timing.measure("llm.completion")
638
816
  # catch a known error that occurs with Azure and replace the error message with something more obvious to the user
639
817
  except BadRequestError as e:
@@ -689,15 +867,15 @@ class ToolCallingLLM:
689
867
  with concurrent.futures.ThreadPoolExecutor(max_workers=16) as executor:
690
868
  futures = []
691
869
  for tool_index, t in enumerate(tools_to_call, 1): # type: ignore
692
- futures.append(
693
- executor.submit(
694
- self._invoke_tool,
695
- tool_to_call=t, # type: ignore
696
- previous_tool_calls=tool_calls,
697
- trace_span=DummySpan(), # Streaming mode doesn't support tracing yet
698
- tool_number=tool_index,
699
- )
870
+ tool_number = tool_number_offset + tool_index
871
+ future = executor.submit(
872
+ self._invoke_llm_tool_call,
873
+ tool_to_call=t, # type: ignore
874
+ previous_tool_calls=tool_calls,
875
+ trace_span=DummySpan(), # Streaming mode doesn't support tracing yet
876
+ tool_number=tool_number,
700
877
  )
878
+ futures.append(future)
701
879
  yield StreamMessage(
702
880
  event=StreamEvents.START_TOOL,
703
881
  data={"tool_name": t.function.name, "id": t.id},
@@ -716,6 +894,9 @@ class ToolCallingLLM:
716
894
  data=tool_call_result.as_streaming_tool_result_response(),
717
895
  )
718
896
 
897
+ # Update the tool number offset for the next iteration
898
+ tool_number_offset += len(tools_to_call)
899
+
719
900
  raise Exception(
720
901
  f"Too many LLM calls - exceeded max_steps: {i}/{self.max_steps}"
721
902
  )
@@ -793,9 +974,6 @@ class IssueInvestigator(ToolCallingLLM):
793
974
  "[bold]No runbooks found for this issue. Using default behaviour. (Add runbooks to guide the investigation.)[/bold]"
794
975
  )
795
976
 
796
- todo_manager = get_todo_manager()
797
- todo_context = todo_manager.format_tasks_for_prompt(self.investigation_id)
798
-
799
977
  system_prompt = load_and_render_prompt(
800
978
  prompt,
801
979
  {
@@ -804,8 +982,6 @@ class IssueInvestigator(ToolCallingLLM):
804
982
  "structured_output": request_structured_output_from_llm,
805
983
  "toolsets": self.tool_executor.toolsets,
806
984
  "cluster_name": self.cluster_name,
807
- "todo_list": todo_context,
808
- "investigation_id": self.investigation_id,
809
985
  },
810
986
  )
811
987
 
holmes/core/tools.py CHANGED
@@ -24,12 +24,15 @@ class ToolResultStatus(str, Enum):
24
24
  SUCCESS = "success"
25
25
  ERROR = "error"
26
26
  NO_DATA = "no_data"
27
+ APPROVAL_REQUIRED = "approval_required"
27
28
 
28
29
  def to_color(self) -> str:
29
30
  if self == ToolResultStatus.SUCCESS:
30
31
  return "green"
31
32
  elif self == ToolResultStatus.ERROR:
32
33
  return "red"
34
+ elif self == ToolResultStatus.APPROVAL_REQUIRED:
35
+ return "yellow"
33
36
  else:
34
37
  return "white"
35
38
 
@@ -38,6 +41,8 @@ class ToolResultStatus(str, Enum):
38
41
  return "✔"
39
42
  elif self == ToolResultStatus.ERROR:
40
43
  return "❌"
44
+ elif self == ToolResultStatus.APPROVAL_REQUIRED:
45
+ return "⚠️"
41
46
  else:
42
47
  return "⚪️"
43
48
 
@@ -148,14 +153,17 @@ class Tool(ABC, BaseModel):
148
153
  )
149
154
 
150
155
  def invoke(
151
- self, params: Dict, tool_number: Optional[int] = None
156
+ self,
157
+ params: Dict,
158
+ tool_number: Optional[int] = None,
159
+ user_approved: bool = False,
152
160
  ) -> StructuredToolResult:
153
161
  tool_number_str = f"#{tool_number} " if tool_number else ""
154
162
  logging.info(
155
163
  f"Running tool {tool_number_str}[bold]{self.name}[/bold]: {self.get_parameterized_one_liner(params)}"
156
164
  )
157
165
  start_time = time.time()
158
- result = self._invoke(params)
166
+ result = self._invoke(params=params, user_approved=user_approved)
159
167
  result.icon_url = self.icon_url
160
168
  elapsed = time.time() - start_time
161
169
  output_str = (
@@ -171,7 +179,13 @@ class Tool(ABC, BaseModel):
171
179
  return result
172
180
 
173
181
  @abstractmethod
174
- def _invoke(self, params: Dict) -> StructuredToolResult:
182
+ def _invoke(
183
+ self, params: dict, user_approved: bool = False
184
+ ) -> StructuredToolResult:
185
+ """
186
+ params: the tool params
187
+ user_approved: whether the tool call is approved by the user. Can be used to confidently execute unsafe actions.
188
+ """
175
189
  pass
176
190
 
177
191
  @abstractmethod
@@ -223,7 +237,9 @@ class YAMLTool(Tool, BaseModel):
223
237
  return ToolResultStatus.NO_DATA
224
238
  return ToolResultStatus.SUCCESS
225
239
 
226
- def _invoke(self, params) -> StructuredToolResult:
240
+ def _invoke(
241
+ self, params: dict, user_approved: bool = False
242
+ ) -> StructuredToolResult:
227
243
  if self.command is not None:
228
244
  raw_output, return_code, invocation = self.__invoke_command(params)
229
245
  else:
@@ -266,11 +266,7 @@ class ToolsetManager:
266
266
  toolset.path = cached_status.get("path", None)
267
267
  # check prerequisites for only enabled toolset when the toolset is loaded from cache. When the toolset is
268
268
  # not loaded from cache, the prerequisites are checked in the refresh_toolset_status method.
269
- if (
270
- toolset.enabled
271
- and toolset.status == ToolsetStatusEnum.ENABLED
272
- and using_cached
273
- ):
269
+ if toolset.enabled and toolset.status == ToolsetStatusEnum.ENABLED:
274
270
  enabled_toolsets_from_cache.append(toolset)
275
271
  self.check_toolset_prerequisites(enabled_toolsets_from_cache)
276
272
 
holmes/core/tracing.py CHANGED
@@ -120,7 +120,7 @@ class DummySpan:
120
120
  class DummyTracer:
121
121
  """A no-op tracer implementation for when tracing is disabled."""
122
122
 
123
- def start_experiment(self, experiment_name=None, metadata=None):
123
+ def start_experiment(self, experiment_name=None, additional_metadata=None):
124
124
  """No-op experiment creation."""
125
125
  return None
126
126