holmesgpt 0.12.5__py3-none-any.whl → 0.13.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of holmesgpt might be problematic. Click here for more details.

Files changed (84) hide show
  1. holmes/__init__.py +1 -1
  2. holmes/clients/robusta_client.py +19 -1
  3. holmes/common/env_vars.py +13 -0
  4. holmes/config.py +69 -9
  5. holmes/core/conversations.py +11 -0
  6. holmes/core/investigation.py +16 -3
  7. holmes/core/investigation_structured_output.py +12 -0
  8. holmes/core/llm.py +10 -0
  9. holmes/core/models.py +9 -1
  10. holmes/core/openai_formatting.py +72 -12
  11. holmes/core/prompt.py +13 -0
  12. holmes/core/supabase_dal.py +3 -0
  13. holmes/core/todo_manager.py +88 -0
  14. holmes/core/tool_calling_llm.py +121 -149
  15. holmes/core/tools.py +10 -1
  16. holmes/core/tools_utils/tool_executor.py +7 -2
  17. holmes/core/tools_utils/toolset_utils.py +7 -2
  18. holmes/core/tracing.py +3 -2
  19. holmes/interactive.py +1 -0
  20. holmes/main.py +2 -1
  21. holmes/plugins/prompts/__init__.py +7 -1
  22. holmes/plugins/prompts/_current_date_time.jinja2 +1 -0
  23. holmes/plugins/prompts/_default_log_prompt.jinja2 +4 -2
  24. holmes/plugins/prompts/_fetch_logs.jinja2 +6 -1
  25. holmes/plugins/prompts/_general_instructions.jinja2 +14 -0
  26. holmes/plugins/prompts/_permission_errors.jinja2 +1 -1
  27. holmes/plugins/prompts/_toolsets_instructions.jinja2 +4 -4
  28. holmes/plugins/prompts/generic_ask.jinja2 +4 -3
  29. holmes/plugins/prompts/investigation_procedure.jinja2 +210 -0
  30. holmes/plugins/prompts/kubernetes_workload_ask.jinja2 +2 -0
  31. holmes/plugins/toolsets/__init__.py +19 -6
  32. holmes/plugins/toolsets/atlas_mongodb/mongodb_atlas.py +27 -0
  33. holmes/plugins/toolsets/azure_sql/tools/analyze_connection_failures.py +2 -2
  34. holmes/plugins/toolsets/azure_sql/tools/analyze_database_connections.py +2 -1
  35. holmes/plugins/toolsets/azure_sql/tools/analyze_database_health_status.py +3 -1
  36. holmes/plugins/toolsets/azure_sql/tools/analyze_database_performance.py +2 -1
  37. holmes/plugins/toolsets/azure_sql/tools/analyze_database_storage.py +2 -1
  38. holmes/plugins/toolsets/azure_sql/tools/get_active_alerts.py +3 -1
  39. holmes/plugins/toolsets/azure_sql/tools/get_slow_queries.py +2 -1
  40. holmes/plugins/toolsets/azure_sql/tools/get_top_cpu_queries.py +2 -1
  41. holmes/plugins/toolsets/azure_sql/tools/get_top_data_io_queries.py +2 -1
  42. holmes/plugins/toolsets/azure_sql/tools/get_top_log_io_queries.py +2 -1
  43. holmes/plugins/toolsets/coralogix/api.py +6 -6
  44. holmes/plugins/toolsets/coralogix/toolset_coralogix_logs.py +7 -1
  45. holmes/plugins/toolsets/datadog/datadog_api.py +20 -8
  46. holmes/plugins/toolsets/datadog/datadog_metrics_instructions.jinja2 +8 -1
  47. holmes/plugins/toolsets/datadog/datadog_rds_instructions.jinja2 +82 -0
  48. holmes/plugins/toolsets/datadog/toolset_datadog_logs.py +12 -5
  49. holmes/plugins/toolsets/datadog/toolset_datadog_metrics.py +20 -11
  50. holmes/plugins/toolsets/datadog/toolset_datadog_rds.py +735 -0
  51. holmes/plugins/toolsets/datadog/toolset_datadog_traces.py +18 -11
  52. holmes/plugins/toolsets/git.py +15 -15
  53. holmes/plugins/toolsets/grafana/grafana_api.py +12 -1
  54. holmes/plugins/toolsets/grafana/toolset_grafana.py +5 -1
  55. holmes/plugins/toolsets/grafana/toolset_grafana_loki.py +9 -4
  56. holmes/plugins/toolsets/grafana/toolset_grafana_tempo.py +12 -5
  57. holmes/plugins/toolsets/internet/internet.py +2 -1
  58. holmes/plugins/toolsets/internet/notion.py +2 -1
  59. holmes/plugins/toolsets/investigator/__init__.py +0 -0
  60. holmes/plugins/toolsets/investigator/core_investigation.py +157 -0
  61. holmes/plugins/toolsets/investigator/investigator_instructions.jinja2 +253 -0
  62. holmes/plugins/toolsets/investigator/model.py +15 -0
  63. holmes/plugins/toolsets/kafka.py +14 -7
  64. holmes/plugins/toolsets/kubernetes_logs.py +454 -25
  65. holmes/plugins/toolsets/logging_utils/logging_api.py +115 -55
  66. holmes/plugins/toolsets/mcp/toolset_mcp.py +1 -1
  67. holmes/plugins/toolsets/newrelic.py +8 -3
  68. holmes/plugins/toolsets/opensearch/opensearch.py +8 -4
  69. holmes/plugins/toolsets/opensearch/opensearch_logs.py +9 -2
  70. holmes/plugins/toolsets/opensearch/opensearch_traces.py +6 -2
  71. holmes/plugins/toolsets/prometheus/prometheus.py +149 -44
  72. holmes/plugins/toolsets/rabbitmq/toolset_rabbitmq.py +8 -2
  73. holmes/plugins/toolsets/robusta/robusta.py +4 -4
  74. holmes/plugins/toolsets/runbook/runbook_fetcher.py +6 -5
  75. holmes/plugins/toolsets/servicenow/servicenow.py +18 -3
  76. holmes/plugins/toolsets/utils.py +8 -1
  77. holmes/utils/llms.py +20 -0
  78. holmes/utils/stream.py +90 -0
  79. {holmesgpt-0.12.5.dist-info → holmesgpt-0.13.0.dist-info}/METADATA +48 -35
  80. {holmesgpt-0.12.5.dist-info → holmesgpt-0.13.0.dist-info}/RECORD +83 -74
  81. holmes/utils/robusta.py +0 -9
  82. {holmesgpt-0.12.5.dist-info → holmesgpt-0.13.0.dist-info}/LICENSE.txt +0 -0
  83. {holmesgpt-0.12.5.dist-info → holmesgpt-0.13.0.dist-info}/WHEEL +0 -0
  84. {holmesgpt-0.12.5.dist-info → holmesgpt-0.13.0.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,88 @@
1
+ from typing import Dict, List
2
+ from threading import Lock
3
+
4
+ from holmes.plugins.toolsets.investigator.model import Task, TaskStatus
5
+
6
+
7
+ class TodoListManager:
8
+ """
9
+ Session-based storage manager for investigation TodoLists.
10
+ Stores TodoLists per session and provides methods to get/update tasks.
11
+ """
12
+
13
+ def __init__(self):
14
+ self._sessions: Dict[str, List[Task]] = {}
15
+ self._lock: Lock = Lock()
16
+
17
+ def get_session_tasks(self, session_id: str) -> List[Task]:
18
+ with self._lock:
19
+ return self._sessions.get(session_id, []).copy()
20
+
21
+ def update_session_tasks(self, session_id: str, tasks: List[Task]) -> None:
22
+ with self._lock:
23
+ self._sessions[session_id] = tasks.copy()
24
+
25
+ def clear_session(self, session_id: str) -> None:
26
+ with self._lock:
27
+ if session_id in self._sessions:
28
+ del self._sessions[session_id]
29
+
30
+ def get_session_count(self) -> int:
31
+ with self._lock:
32
+ return len(self._sessions)
33
+
34
+ def format_tasks_for_prompt(self, session_id: str) -> str:
35
+ """
36
+ Format tasks for injection into system prompt.
37
+ Returns empty string if no tasks exist.
38
+ """
39
+ tasks = self.get_session_tasks(session_id)
40
+
41
+ if not tasks:
42
+ return ""
43
+
44
+ status_order = {
45
+ TaskStatus.PENDING: 0,
46
+ TaskStatus.IN_PROGRESS: 1,
47
+ TaskStatus.COMPLETED: 2,
48
+ }
49
+
50
+ sorted_tasks = sorted(
51
+ tasks,
52
+ key=lambda t: (status_order.get(t.status, 3),),
53
+ )
54
+
55
+ lines = ["# CURRENT INVESTIGATION TASKS"]
56
+ lines.append("")
57
+
58
+ pending_count = sum(1 for t in tasks if t.status == TaskStatus.PENDING)
59
+ progress_count = sum(1 for t in tasks if t.status == TaskStatus.IN_PROGRESS)
60
+ completed_count = sum(1 for t in tasks if t.status == TaskStatus.COMPLETED)
61
+
62
+ lines.append(
63
+ f"**Task Status**: {completed_count} completed, {progress_count} in progress, {pending_count} pending"
64
+ )
65
+ lines.append("")
66
+
67
+ for task in sorted_tasks:
68
+ status_indicator = {
69
+ TaskStatus.PENDING: "[ ]",
70
+ TaskStatus.IN_PROGRESS: "[~]",
71
+ TaskStatus.COMPLETED: "[✓]",
72
+ }.get(task.status, "[?]")
73
+
74
+ lines.append(f"{status_indicator} [{task.id}] {task.content}")
75
+
76
+ lines.append("")
77
+ lines.append(
78
+ "**Instructions**: Use TodoWrite tool to update task status as you work. Mark tasks as 'in_progress' when starting, 'completed' when finished."
79
+ )
80
+
81
+ return "\n".join(lines)
82
+
83
+
84
+ _todo_manager = TodoListManager()
85
+
86
+
87
+ def get_todo_manager() -> TodoListManager:
88
+ return _todo_manager
@@ -2,32 +2,25 @@ import concurrent.futures
2
2
  import json
3
3
  import logging
4
4
  import textwrap
5
+ import uuid
5
6
  from typing import Dict, List, Optional, Type, Union
6
7
 
7
- import requests # type: ignore
8
8
  import sentry_sdk
9
- from litellm.types.utils import Message
10
9
  from openai import BadRequestError
11
10
  from openai.types.chat.chat_completion_message_tool_call import (
12
11
  ChatCompletionMessageToolCall,
13
12
  )
14
13
  from pydantic import BaseModel
15
- from pydantic_core import from_json
16
14
  from rich.console import Console
17
15
 
18
- from holmes.common.env_vars import (
19
- ROBUSTA_API_ENDPOINT,
20
- STREAM_CHUNKS_PER_PARSE,
21
- TEMPERATURE,
22
- )
16
+ from holmes.common.env_vars import TEMPERATURE, MAX_OUTPUT_TOKEN_RESERVATION
17
+
23
18
  from holmes.core.investigation_structured_output import (
24
19
  DEFAULT_SECTIONS,
25
20
  REQUEST_STRUCTURED_OUTPUT_FROM_LLM,
26
21
  InputSectionsDataType,
27
22
  get_output_format_for_investigation,
28
23
  is_response_an_incorrect_tool_call,
29
- parse_markdown_into_sections_from_hash_sign,
30
- process_response_into_sections,
31
24
  )
32
25
  from holmes.core.issue import Issue
33
26
  from holmes.core.llm import LLM
@@ -45,6 +38,10 @@ from holmes.utils.tags import format_tags_in_string, parse_messages_tags
45
38
  from holmes.core.tools_utils.tool_executor import ToolExecutor
46
39
  from holmes.core.tracing import DummySpan
47
40
  from holmes.utils.colors import AI_COLOR
41
+ from holmes.utils.stream import StreamEvents, StreamMessage
42
+ from holmes.core.todo_manager import (
43
+ get_todo_manager,
44
+ )
48
45
 
49
46
 
50
47
  def format_tool_result_data(tool_result: StructuredToolResult) -> str:
@@ -94,12 +91,13 @@ def truncate_messages_to_fit_context(
94
91
 
95
92
  tool_call_messages = [message for message in messages if message["role"] == "tool"]
96
93
 
97
- if message_size_without_tools >= (max_context_size - maximum_output_token):
94
+ reserved_for_output_tokens = min(maximum_output_token, MAX_OUTPUT_TOKEN_RESERVATION)
95
+ if message_size_without_tools >= (max_context_size - reserved_for_output_tokens):
98
96
  logging.error(
99
97
  f"The combined size of system_prompt and user_prompt ({message_size_without_tools} tokens) exceeds the model's context window for input."
100
98
  )
101
99
  raise Exception(
102
- f"The combined size of system_prompt and user_prompt ({message_size_without_tools} tokens) exceeds the maximum context size of {max_context_size - maximum_output_token} tokens available for input."
100
+ f"The combined size of system_prompt and user_prompt ({message_size_without_tools} tokens) exceeds the maximum context size of {max_context_size - reserved_for_output_tokens} tokens available for input."
103
101
  )
104
102
 
105
103
  if len(tool_call_messages) == 0:
@@ -213,6 +211,7 @@ class ToolCallingLLM:
213
211
  self.max_steps = max_steps
214
212
  self.tracer = tracer
215
213
  self.llm = llm
214
+ self.investigation_id = str(uuid.uuid4())
216
215
 
217
216
  def prompt_call(
218
217
  self,
@@ -221,6 +220,7 @@ class ToolCallingLLM:
221
220
  post_process_prompt: Optional[str] = None,
222
221
  response_format: Optional[Union[dict, Type[BaseModel]]] = None,
223
222
  sections: Optional[InputSectionsDataType] = None,
223
+ trace_span=DummySpan(),
224
224
  ) -> LLMResult:
225
225
  messages = [
226
226
  {"role": "system", "content": system_prompt},
@@ -232,6 +232,7 @@ class ToolCallingLLM:
232
232
  response_format,
233
233
  user_prompt=user_prompt,
234
234
  sections=sections,
235
+ trace_span=trace_span,
235
236
  )
236
237
 
237
238
  def messages_call(
@@ -258,7 +259,9 @@ class ToolCallingLLM:
258
259
  ) -> LLMResult:
259
260
  perf_timing = PerformanceTiming("tool_calling_llm.call")
260
261
  tool_calls = [] # type: ignore
261
- tools = self.tool_executor.get_all_tools_openai_format()
262
+ tools = self.tool_executor.get_all_tools_openai_format(
263
+ target_model=self.llm.model
264
+ )
262
265
  perf_timing.measure("get_all_tools_openai_format")
263
266
  max_steps = self.max_steps
264
267
  i = 0
@@ -413,20 +416,41 @@ class ToolCallingLLM:
413
416
  trace_span=DummySpan(),
414
417
  tool_number=None,
415
418
  ) -> ToolCallResult:
416
- tool_name = tool_to_call.function.name
419
+ # Handle the union type - ChatCompletionMessageToolCall can be either
420
+ # ChatCompletionMessageFunctionToolCall (with 'function' field and type='function')
421
+ # or ChatCompletionMessageCustomToolCall (with 'custom' field and type='custom').
422
+ # We use hasattr to check for the 'function' attribute as it's more flexible
423
+ # and doesn't require importing the specific type.
424
+ if hasattr(tool_to_call, "function"):
425
+ tool_name = tool_to_call.function.name
426
+ tool_arguments = tool_to_call.function.arguments
427
+ else:
428
+ # This is a custom tool call - we don't support these currently
429
+ logging.error(f"Unsupported custom tool call: {tool_to_call}")
430
+ return ToolCallResult(
431
+ tool_call_id=tool_to_call.id,
432
+ tool_name="unknown",
433
+ description="NA",
434
+ result=StructuredToolResult(
435
+ status=ToolResultStatus.ERROR,
436
+ error="Custom tool calls are not supported",
437
+ params=None,
438
+ ),
439
+ )
440
+
417
441
  tool_params = None
418
442
  try:
419
- tool_params = json.loads(tool_to_call.function.arguments)
443
+ tool_params = json.loads(tool_arguments)
420
444
  except Exception:
421
445
  logging.warning(
422
- f"Failed to parse arguments for tool: {tool_name}. args: {tool_to_call.function.arguments}"
446
+ f"Failed to parse arguments for tool: {tool_name}. args: {tool_arguments}"
423
447
  )
424
448
  tool_call_id = tool_to_call.id
425
449
  tool = self.tool_executor.get_tool_by_name(tool_name)
426
450
 
427
451
  if (not tool) or (tool_params is None):
428
452
  logging.warning(
429
- f"Skipping tool execution for {tool_name}: args: {tool_to_call.function.arguments}"
453
+ f"Skipping tool execution for {tool_name}: args: {tool_arguments}"
430
454
  )
431
455
  return ToolCallResult(
432
456
  tool_call_id=tool_call_id,
@@ -553,61 +577,39 @@ class ToolCallingLLM:
553
577
 
554
578
  def call_stream(
555
579
  self,
556
- system_prompt: str,
580
+ system_prompt: str = "",
557
581
  user_prompt: Optional[str] = None,
558
- stream: bool = False,
559
582
  response_format: Optional[Union[dict, Type[BaseModel]]] = None,
560
583
  sections: Optional[InputSectionsDataType] = None,
561
- runbooks: Optional[List[str]] = None,
584
+ msgs: Optional[list[dict]] = None,
562
585
  ):
563
- def stream_analysis(it, peek_chunk):
564
- buffer = peek_chunk.get("data", "")
565
- yield create_sse_message(peek_chunk.get("event"), peek_chunk.get("data"))
566
- chunk_counter = 0
567
-
568
- for chunk in it:
569
- buffer += chunk
570
- chunk_counter += 1
571
- if chunk_counter == STREAM_CHUNKS_PER_PARSE:
572
- chunk_counter = 0
573
- yield create_sse_message(
574
- "ai_answer",
575
- {
576
- "sections": parse_markdown_into_sections_from_hash_sign(
577
- buffer
578
- )
579
- or {},
580
- "analysis": buffer,
581
- "instructions": runbooks or [],
582
- },
583
- )
584
-
585
- yield create_sse_message(
586
- "ai_answer_end",
587
- {
588
- "sections": parse_markdown_into_sections_from_hash_sign(buffer)
589
- or {},
590
- "analysis": buffer,
591
- "instructions": runbooks or [],
592
- },
593
- )
594
-
595
- messages = [
596
- {"role": "system", "content": system_prompt},
597
- {"role": "user", "content": user_prompt},
598
- ]
586
+ """
587
+ This function DOES NOT call llm.completion(stream=true).
588
+ This function streams holmes one iteration at a time instead of waiting for all iterations to complete.
589
+ """
590
+ messages = []
591
+ if system_prompt:
592
+ messages.append({"role": "system", "content": system_prompt})
593
+ if user_prompt:
594
+ messages.append({"role": "user", "content": user_prompt})
595
+ if msgs:
596
+ messages.extend(msgs)
599
597
  perf_timing = PerformanceTiming("tool_calling_llm.call")
600
- tools = self.tool_executor.get_all_tools_openai_format()
598
+ tool_calls: list[dict] = []
599
+ tools = self.tool_executor.get_all_tools_openai_format(
600
+ target_model=self.llm.model
601
+ )
601
602
  perf_timing.measure("get_all_tools_openai_format")
603
+ max_steps = self.max_steps
602
604
  i = 0
603
- tool_calls: list[dict] = []
604
- while i < self.max_steps:
605
+
606
+ while i < max_steps:
605
607
  i += 1
606
608
  perf_timing.measure(f"start iteration {i}")
607
609
  logging.debug(f"running iteration {i}")
608
610
 
609
- tools = [] if i == self.max_steps - 1 else tools
610
- tool_choice = None if tools == [] else "auto"
611
+ tools = None if i == max_steps else tools
612
+ tool_choice = "auto" if tools else None
611
613
 
612
614
  total_tokens = self.llm.count_tokens_for_message(messages) # type: ignore
613
615
  max_context_size = self.llm.get_context_window_size()
@@ -623,90 +625,43 @@ class ToolCallingLLM:
623
625
 
624
626
  logging.debug(f"sending messages={messages}\n\ntools={tools}")
625
627
  try:
626
- if stream:
627
- response = requests.post(
628
- f"{ROBUSTA_API_ENDPOINT}/chat/completions",
629
- json={
630
- "messages": parse_messages_tags(messages), # type: ignore
631
- "tools": tools,
632
- "tool_choice": tool_choice,
633
- "temperature": TEMPERATURE,
634
- "response_format": response_format,
635
- "stream": True,
636
- "drop_param": True,
637
- },
638
- headers={"Authorization": f"Bearer {self.llm.api_key}"}, # type: ignore
639
- stream=True,
640
- )
641
- response.raise_for_status()
642
- it = response.iter_content(chunk_size=None, decode_unicode=True)
643
- peek_chunk = from_json(next(it))
644
- tools = peek_chunk.get("tool_calls")
645
-
646
- if not tools:
647
- yield from stream_analysis(it, peek_chunk)
648
- perf_timing.measure("llm.completion")
649
- return
650
-
651
- response_message = Message(**peek_chunk)
652
- tools_to_call = response_message.tool_calls
653
- else:
654
- full_response = self.llm.completion(
655
- messages=parse_messages_tags(messages), # type: ignore
656
- tools=tools,
657
- tool_choice=tool_choice,
658
- temperature=TEMPERATURE,
659
- response_format=response_format,
660
- stream=False,
661
- drop_params=True,
662
- )
663
- perf_timing.measure("llm.completion")
664
-
665
- response_message = full_response.choices[0].message # type: ignore
666
- if response_message and response_format:
667
- # Litellm API is bugged. Stringify and parsing ensures all attrs of the choice are available.
668
- dict_response = json.loads(full_response.to_json()) # type: ignore
669
- incorrect_tool_call = is_response_an_incorrect_tool_call(
670
- sections, dict_response.get("choices", [{}])[0]
671
- )
672
-
673
- if incorrect_tool_call:
674
- logging.warning(
675
- "Detected incorrect tool call. Structured output will be disabled. This can happen on models that do not support tool calling. For Azure AI, make sure the model name contains 'gpt-4o'. To disable this holmes behaviour, set REQUEST_STRUCTURED_OUTPUT_FROM_LLM to `false`."
676
- )
677
- # disable structured output going forward and and retry
678
- response_format = None
679
- i -= 1
680
- continue
681
-
682
- tools_to_call = getattr(response_message, "tool_calls", None)
683
- if not tools_to_call:
684
- (text_response, sections) = process_response_into_sections( # type: ignore
685
- response_message.content
686
- )
687
-
688
- yield create_sse_message(
689
- "ai_answer_end",
690
- {
691
- "sections": sections or {},
692
- "analysis": text_response,
693
- "instructions": runbooks or [],
694
- },
695
- )
696
- return
628
+ full_response = self.llm.completion(
629
+ messages=parse_messages_tags(messages), # type: ignore
630
+ tools=tools,
631
+ tool_choice=tool_choice,
632
+ response_format=response_format,
633
+ temperature=TEMPERATURE,
634
+ stream=False,
635
+ drop_params=True,
636
+ )
637
+ perf_timing.measure("llm.completion")
697
638
  # catch a known error that occurs with Azure and replace the error message with something more obvious to the user
698
639
  except BadRequestError as e:
699
- logging.exception("Bad completion request")
700
640
  if "Unrecognized request arguments supplied: tool_choice, tools" in str(
701
641
  e
702
642
  ):
703
643
  raise Exception(
704
644
  "The Azure model you chose is not supported. Model version 1106 and higher required."
645
+ ) from e
646
+ else:
647
+ raise
648
+
649
+ response_message = full_response.choices[0].message # type: ignore
650
+ if response_message and response_format:
651
+ # Litellm API is bugged. Stringify and parsing ensures all attrs of the choice are available.
652
+ dict_response = json.loads(full_response.to_json()) # type: ignore
653
+ incorrect_tool_call = is_response_an_incorrect_tool_call(
654
+ sections, dict_response.get("choices", [{}])[0]
655
+ )
656
+
657
+ if incorrect_tool_call:
658
+ logging.warning(
659
+ "Detected incorrect tool call. Structured output will be disabled. This can happen on models that do not support tool calling. For Azure AI, make sure the model name contains 'gpt-4o'. To disable this holmes behaviour, set REQUEST_STRUCTURED_OUTPUT_FROM_LLM to `false`."
705
660
  )
706
- raise e
707
- except Exception:
708
- logging.exception("Completion request exception")
709
- raise
661
+ # disable structured output going forward and and retry
662
+ response_format = None
663
+ max_steps = max_steps + 1
664
+ continue
710
665
 
711
666
  messages.append(
712
667
  response_message.model_dump(
@@ -714,6 +669,22 @@ class ToolCallingLLM:
714
669
  )
715
670
  )
716
671
 
672
+ tools_to_call = getattr(response_message, "tool_calls", None)
673
+ if not tools_to_call:
674
+ yield StreamMessage(
675
+ event=StreamEvents.ANSWER_END,
676
+ data={"content": response_message.content, "messages": messages},
677
+ )
678
+ return
679
+
680
+ reasoning = getattr(response_message, "reasoning_content", None)
681
+ message = response_message.content
682
+ if reasoning or message:
683
+ yield StreamMessage(
684
+ event=StreamEvents.AI_MESSAGE,
685
+ data={"content": message, "reasoning": reasoning},
686
+ )
687
+
717
688
  perf_timing.measure("pre-tool-calls")
718
689
  with concurrent.futures.ThreadPoolExecutor(max_workers=16) as executor:
719
690
  futures = []
@@ -727,8 +698,9 @@ class ToolCallingLLM:
727
698
  tool_number=tool_index,
728
699
  )
729
700
  )
730
- yield create_sse_message(
731
- "start_tool_calling", {"tool_name": t.function.name, "id": t.id}
701
+ yield StreamMessage(
702
+ event=StreamEvents.START_TOOL,
703
+ data={"tool_name": t.function.name, "id": t.id},
732
704
  )
733
705
 
734
706
  for future in concurrent.futures.as_completed(futures):
@@ -739,12 +711,9 @@ class ToolCallingLLM:
739
711
 
740
712
  perf_timing.measure(f"tool completed {tool_call_result.tool_name}")
741
713
 
742
- streaming_result_dict = (
743
- tool_call_result.as_streaming_tool_result_response()
744
- )
745
-
746
- yield create_sse_message(
747
- "tool_calling_result", streaming_result_dict
714
+ yield StreamMessage(
715
+ event=StreamEvents.TOOL_RESULT,
716
+ data=tool_call_result.as_streaming_tool_result_response(),
748
717
  )
749
718
 
750
719
  raise Exception(
@@ -782,6 +751,7 @@ class IssueInvestigator(ToolCallingLLM):
782
751
  global_instructions: Optional[Instructions] = None,
783
752
  post_processing_prompt: Optional[str] = None,
784
753
  sections: Optional[InputSectionsDataType] = None,
754
+ trace_span=DummySpan(),
785
755
  ) -> LLMResult:
786
756
  runbooks = self.runbook_manager.get_instructions_for_issue(issue)
787
757
 
@@ -823,6 +793,9 @@ class IssueInvestigator(ToolCallingLLM):
823
793
  "[bold]No runbooks found for this issue. Using default behaviour. (Add runbooks to guide the investigation.)[/bold]"
824
794
  )
825
795
 
796
+ todo_manager = get_todo_manager()
797
+ todo_context = todo_manager.format_tasks_for_prompt(self.investigation_id)
798
+
826
799
  system_prompt = load_and_render_prompt(
827
800
  prompt,
828
801
  {
@@ -831,6 +804,8 @@ class IssueInvestigator(ToolCallingLLM):
831
804
  "structured_output": request_structured_output_from_llm,
832
805
  "toolsets": self.tool_executor.toolsets,
833
806
  "cluster_name": self.cluster_name,
807
+ "todo_list": todo_context,
808
+ "investigation_id": self.investigation_id,
834
809
  },
835
810
  )
836
811
 
@@ -865,10 +840,7 @@ class IssueInvestigator(ToolCallingLLM):
865
840
  post_processing_prompt,
866
841
  response_format=response_format,
867
842
  sections=sections,
843
+ trace_span=trace_span,
868
844
  )
869
845
  res.instructions = runbooks
870
846
  return res
871
-
872
-
873
- def create_sse_message(event_type: str, data: dict = {}):
874
- return f"event: {event_type}\ndata: {json.dumps(data)}\n\n"
holmes/core/tools.py CHANGED
@@ -51,6 +51,7 @@ class StructuredToolResult(BaseModel):
51
51
  url: Optional[str] = None
52
52
  invocation: Optional[str] = None
53
53
  params: Optional[Dict] = None
54
+ icon_url: Optional[str] = None
54
55
 
55
56
  def get_stringified_data(self) -> str:
56
57
  if self.data is None:
@@ -121,6 +122,8 @@ class ToolParameter(BaseModel):
121
122
  description: Optional[str] = None
122
123
  type: str = "string"
123
124
  required: bool = True
125
+ properties: Optional[Dict[str, "ToolParameter"]] = None # For object types
126
+ items: Optional["ToolParameter"] = None # For array item schemas
124
127
 
125
128
 
126
129
  class Tool(ABC, BaseModel):
@@ -131,12 +134,17 @@ class Tool(ABC, BaseModel):
131
134
  None # templated string to show to the user describing this tool invocation (not seen by llm)
132
135
  )
133
136
  additional_instructions: Optional[str] = None
137
+ icon_url: Optional[str] = Field(
138
+ default=None,
139
+ description="The URL of the icon for the tool, if None will get toolset icon",
140
+ )
134
141
 
135
- def get_openai_format(self):
142
+ def get_openai_format(self, target_model: str):
136
143
  return format_tool_to_open_ai_standard(
137
144
  tool_name=self.name,
138
145
  tool_description=self.description,
139
146
  tool_parameters=self.parameters,
147
+ target_model=target_model,
140
148
  )
141
149
 
142
150
  def invoke(
@@ -148,6 +156,7 @@ class Tool(ABC, BaseModel):
148
156
  )
149
157
  start_time = time.time()
150
158
  result = self._invoke(params)
159
+ result.icon_url = self.icon_url
151
160
  elapsed = time.time() - start_time
152
161
  output_str = (
153
162
  result.get_stringified_data()
@@ -38,6 +38,8 @@ class ToolExecutor:
38
38
  self.tools_by_name: dict[str, Tool] = {}
39
39
  for ts in toolsets_by_name.values():
40
40
  for tool in ts.tools:
41
+ if tool.icon_url is None and ts.icon_url is not None:
42
+ tool.icon_url = ts.icon_url
41
43
  if tool.name in self.tools_by_name:
42
44
  logging.warning(
43
45
  f"Overriding existing tool '{tool.name} with new tool from {ts.name} at {ts.path}'!"
@@ -62,5 +64,8 @@ class ToolExecutor:
62
64
  return None
63
65
 
64
66
  @sentry_sdk.trace
65
- def get_all_tools_openai_format(self):
66
- return [tool.get_openai_format() for tool in self.tools_by_name.values()]
67
+ def get_all_tools_openai_format(self, target_model: str):
68
+ return [
69
+ tool.get_openai_format(target_model=target_model)
70
+ for tool in self.tools_by_name.values()
71
+ ]
@@ -16,12 +16,17 @@ def filter_out_default_logging_toolset(toolsets: list[Toolset]) -> list[Toolset]
16
16
  All other types of toolsets are included as is.
17
17
  """
18
18
 
19
- logging_toolsets: list[BasePodLoggingToolset] = []
19
+ logging_toolsets: list[Toolset] = []
20
20
  final_toolsets: list[Toolset] = []
21
21
 
22
22
  for ts in toolsets:
23
+ toolset_type = (
24
+ ts.original_toolset_type
25
+ if hasattr(ts, "original_toolset_type")
26
+ else type(ts)
27
+ )
23
28
  if (
24
- isinstance(ts, BasePodLoggingToolset)
29
+ issubclass(toolset_type, BasePodLoggingToolset)
25
30
  and ts.status == ToolsetStatusEnum.ENABLED
26
31
  ):
27
32
  logging_toolsets.append(ts)
holmes/core/tracing.py CHANGED
@@ -91,10 +91,11 @@ class SpanType(Enum):
91
91
  """Standard span types for tracing categorization."""
92
92
 
93
93
  LLM = "llm"
94
- TOOL = "tool"
95
- TASK = "task"
96
94
  SCORE = "score"
95
+ FUNCTION = "function"
97
96
  EVAL = "eval"
97
+ TASK = "task"
98
+ TOOL = "tool"
98
99
 
99
100
 
100
101
  class DummySpan:
holmes/interactive.py CHANGED
@@ -1002,6 +1002,7 @@ def run_interactive_loop(
1002
1002
  user_input,
1003
1003
  include_files,
1004
1004
  ai.tool_executor,
1005
+ ai.investigation_id,
1005
1006
  runbooks,
1006
1007
  system_prompt_additions,
1007
1008
  )
holmes/main.py CHANGED
@@ -94,7 +94,7 @@ opt_custom_runbooks: Optional[List[Path]] = typer.Option(
94
94
  help="Path to a custom runbooks (can specify -r multiple times to add multiple runbooks)",
95
95
  )
96
96
  opt_max_steps: Optional[int] = typer.Option(
97
- 10,
97
+ 40,
98
98
  "--max-steps",
99
99
  help="Advanced. Maximum number of steps the LLM can take to investigate the issue",
100
100
  )
@@ -302,6 +302,7 @@ def ask(
302
302
  prompt, # type: ignore
303
303
  include_file,
304
304
  ai.tool_executor,
305
+ ai.investigation_id,
305
306
  config.get_runbook_catalog(),
306
307
  system_prompt_additions,
307
308
  )
@@ -43,6 +43,12 @@ def load_and_render_prompt(prompt: str, context: Optional[dict] = None) -> str:
43
43
  context = {}
44
44
 
45
45
  now = datetime.now(timezone.utc)
46
- context.update({"now": f"{now}", "now_timestamp_seconds": int(now.timestamp())})
46
+ context.update(
47
+ {
48
+ "now": f"{now}",
49
+ "now_timestamp_seconds": int(now.timestamp()),
50
+ "current_year": now.year,
51
+ }
52
+ )
47
53
 
48
54
  return template.render(**context)
@@ -1 +1,2 @@
1
1
  When querying tools, always query for the relevant time period. The current UTC date and time are {{ now }}. The current UTC timestamp in seconds is {{ now_timestamp_seconds }}.
2
+ When users mention dates without years (e.g., 'March 25th', 'last May', etc.), assume they either mean the current year ({{ current_year }}) unless context suggests otherwise.