PyPI - holmesgpt - Versions diffs - 0.12.5__py3-none-any.whl → 0.13.0__py3-none-any.whl - Mend

holmesgpt 0.12.5py3-none-any.whl → 0.13.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of holmesgpt might be problematic. Click here for more details.

Files changed (84) hide show

holmes/__init__.py +1 -1
holmes/clients/robusta_client.py +19 -1
holmes/common/env_vars.py +13 -0
holmes/config.py +69 -9
holmes/core/conversations.py +11 -0
holmes/core/investigation.py +16 -3
holmes/core/investigation_structured_output.py +12 -0
holmes/core/llm.py +10 -0
holmes/core/models.py +9 -1
holmes/core/openai_formatting.py +72 -12
holmes/core/prompt.py +13 -0
holmes/core/supabase_dal.py +3 -0
holmes/core/todo_manager.py +88 -0
holmes/core/tool_calling_llm.py +121 -149
holmes/core/tools.py +10 -1
holmes/core/tools_utils/tool_executor.py +7 -2
holmes/core/tools_utils/toolset_utils.py +7 -2
holmes/core/tracing.py +3 -2
holmes/interactive.py +1 -0
holmes/main.py +2 -1
holmes/plugins/prompts/__init__.py +7 -1
holmes/plugins/prompts/_current_date_time.jinja2 +1 -0
holmes/plugins/prompts/_default_log_prompt.jinja2 +4 -2
holmes/plugins/prompts/_fetch_logs.jinja2 +6 -1
holmes/plugins/prompts/_general_instructions.jinja2 +14 -0
holmes/plugins/prompts/_permission_errors.jinja2 +1 -1
holmes/plugins/prompts/_toolsets_instructions.jinja2 +4 -4
holmes/plugins/prompts/generic_ask.jinja2 +4 -3
holmes/plugins/prompts/investigation_procedure.jinja2 +210 -0
holmes/plugins/prompts/kubernetes_workload_ask.jinja2 +2 -0
holmes/plugins/toolsets/__init__.py +19 -6
holmes/plugins/toolsets/atlas_mongodb/mongodb_atlas.py +27 -0
holmes/plugins/toolsets/azure_sql/tools/analyze_connection_failures.py +2 -2
holmes/plugins/toolsets/azure_sql/tools/analyze_database_connections.py +2 -1
holmes/plugins/toolsets/azure_sql/tools/analyze_database_health_status.py +3 -1
holmes/plugins/toolsets/azure_sql/tools/analyze_database_performance.py +2 -1
holmes/plugins/toolsets/azure_sql/tools/analyze_database_storage.py +2 -1
holmes/plugins/toolsets/azure_sql/tools/get_active_alerts.py +3 -1
holmes/plugins/toolsets/azure_sql/tools/get_slow_queries.py +2 -1
holmes/plugins/toolsets/azure_sql/tools/get_top_cpu_queries.py +2 -1
holmes/plugins/toolsets/azure_sql/tools/get_top_data_io_queries.py +2 -1
holmes/plugins/toolsets/azure_sql/tools/get_top_log_io_queries.py +2 -1
holmes/plugins/toolsets/coralogix/api.py +6 -6
holmes/plugins/toolsets/coralogix/toolset_coralogix_logs.py +7 -1
holmes/plugins/toolsets/datadog/datadog_api.py +20 -8
holmes/plugins/toolsets/datadog/datadog_metrics_instructions.jinja2 +8 -1
holmes/plugins/toolsets/datadog/datadog_rds_instructions.jinja2 +82 -0
holmes/plugins/toolsets/datadog/toolset_datadog_logs.py +12 -5
holmes/plugins/toolsets/datadog/toolset_datadog_metrics.py +20 -11
holmes/plugins/toolsets/datadog/toolset_datadog_rds.py +735 -0
holmes/plugins/toolsets/datadog/toolset_datadog_traces.py +18 -11
holmes/plugins/toolsets/git.py +15 -15
holmes/plugins/toolsets/grafana/grafana_api.py +12 -1
holmes/plugins/toolsets/grafana/toolset_grafana.py +5 -1
holmes/plugins/toolsets/grafana/toolset_grafana_loki.py +9 -4
holmes/plugins/toolsets/grafana/toolset_grafana_tempo.py +12 -5
holmes/plugins/toolsets/internet/internet.py +2 -1
holmes/plugins/toolsets/internet/notion.py +2 -1
holmes/plugins/toolsets/investigator/__init__.py +0 -0
holmes/plugins/toolsets/investigator/core_investigation.py +157 -0
holmes/plugins/toolsets/investigator/investigator_instructions.jinja2 +253 -0
holmes/plugins/toolsets/investigator/model.py +15 -0
holmes/plugins/toolsets/kafka.py +14 -7
holmes/plugins/toolsets/kubernetes_logs.py +454 -25
holmes/plugins/toolsets/logging_utils/logging_api.py +115 -55
holmes/plugins/toolsets/mcp/toolset_mcp.py +1 -1
holmes/plugins/toolsets/newrelic.py +8 -3
holmes/plugins/toolsets/opensearch/opensearch.py +8 -4
holmes/plugins/toolsets/opensearch/opensearch_logs.py +9 -2
holmes/plugins/toolsets/opensearch/opensearch_traces.py +6 -2
holmes/plugins/toolsets/prometheus/prometheus.py +149 -44
holmes/plugins/toolsets/rabbitmq/toolset_rabbitmq.py +8 -2
holmes/plugins/toolsets/robusta/robusta.py +4 -4
holmes/plugins/toolsets/runbook/runbook_fetcher.py +6 -5
holmes/plugins/toolsets/servicenow/servicenow.py +18 -3
holmes/plugins/toolsets/utils.py +8 -1
holmes/utils/llms.py +20 -0
holmes/utils/stream.py +90 -0
{holmesgpt-0.12.5.dist-info → holmesgpt-0.13.0.dist-info}/METADATA +48 -35
{holmesgpt-0.12.5.dist-info → holmesgpt-0.13.0.dist-info}/RECORD +83 -74
holmes/utils/robusta.py +0 -9
{holmesgpt-0.12.5.dist-info → holmesgpt-0.13.0.dist-info}/LICENSE.txt +0 -0
{holmesgpt-0.12.5.dist-info → holmesgpt-0.13.0.dist-info}/WHEEL +0 -0
{holmesgpt-0.12.5.dist-info → holmesgpt-0.13.0.dist-info}/entry_points.txt +0 -0

holmes/core/todo_manager.py ADDED Viewed

@@ -0,0 +1,88 @@
+from typing import Dict, List
+from threading import Lock
+from holmes.plugins.toolsets.investigator.model import Task, TaskStatus
+class TodoListManager:
+    """
+    Session-based storage manager for investigation TodoLists.
+    Stores TodoLists per session and provides methods to get/update tasks.
+    """
+    def __init__(self):
+        self._sessions: Dict[str, List[Task]] = {}
+        self._lock: Lock = Lock()
+    def get_session_tasks(self, session_id: str) -> List[Task]:
+        with self._lock:
+            return self._sessions.get(session_id, []).copy()
+    def update_session_tasks(self, session_id: str, tasks: List[Task]) -> None:
+        with self._lock:
+            self._sessions[session_id] = tasks.copy()
+    def clear_session(self, session_id: str) -> None:
+        with self._lock:
+            if session_id in self._sessions:
+                del self._sessions[session_id]
+    def get_session_count(self) -> int:
+        with self._lock:
+            return len(self._sessions)
+    def format_tasks_for_prompt(self, session_id: str) -> str:
+        """
+        Format tasks for injection into system prompt.
+        Returns empty string if no tasks exist.
+        """
+        tasks = self.get_session_tasks(session_id)
+        if not tasks:
+            return ""
+        status_order = {
+            TaskStatus.PENDING: 0,
+            TaskStatus.IN_PROGRESS: 1,
+            TaskStatus.COMPLETED: 2,
+        }
+        sorted_tasks = sorted(
+            tasks,
+            key=lambda t: (status_order.get(t.status, 3),),
+        )
+        lines = ["# CURRENT INVESTIGATION TASKS"]
+        lines.append("")
+        pending_count = sum(1 for t in tasks if t.status == TaskStatus.PENDING)
+        progress_count = sum(1 for t in tasks if t.status == TaskStatus.IN_PROGRESS)
+        completed_count = sum(1 for t in tasks if t.status == TaskStatus.COMPLETED)
+        lines.append(
+            f"**Task Status**: {completed_count} completed, {progress_count} in progress, {pending_count} pending"
+        )
+        lines.append("")
+        for task in sorted_tasks:
+            status_indicator = {
+                TaskStatus.PENDING: "[ ]",
+                TaskStatus.IN_PROGRESS: "[~]",
+                TaskStatus.COMPLETED: "[✓]",
+            }.get(task.status, "[?]")
+            lines.append(f"{status_indicator} [{task.id}] {task.content}")
+        lines.append("")
+        lines.append(
+            "**Instructions**: Use TodoWrite tool to update task status as you work. Mark tasks as 'in_progress' when starting, 'completed' when finished."
+        )
+        return "\n".join(lines)
+_todo_manager = TodoListManager()
+def get_todo_manager() -> TodoListManager:
+    return _todo_manager

holmes/core/tool_calling_llm.py CHANGED Viewed

@@ -2,32 +2,25 @@ import concurrent.futures
 import json
 import logging
 import textwrap
+import uuid
 from typing import Dict, List, Optional, Type, Union
-import requests  # type: ignore
 import sentry_sdk
-from litellm.types.utils import Message
 from openai import BadRequestError
 from openai.types.chat.chat_completion_message_tool_call import (
     ChatCompletionMessageToolCall,
 )
 from pydantic import BaseModel
-from pydantic_core import from_json
 from rich.console import Console
-from holmes.common.env_vars import (
-    ROBUSTA_API_ENDPOINT,
-    STREAM_CHUNKS_PER_PARSE,
-    TEMPERATURE,
-)
+from holmes.common.env_vars import TEMPERATURE, MAX_OUTPUT_TOKEN_RESERVATION
 from holmes.core.investigation_structured_output import (
     DEFAULT_SECTIONS,
     REQUEST_STRUCTURED_OUTPUT_FROM_LLM,
     InputSectionsDataType,
     get_output_format_for_investigation,
     is_response_an_incorrect_tool_call,
-    parse_markdown_into_sections_from_hash_sign,
-    process_response_into_sections,
 )
 from holmes.core.issue import Issue
 from holmes.core.llm import LLM
@@ -45,6 +38,10 @@ from holmes.utils.tags import format_tags_in_string, parse_messages_tags
 from holmes.core.tools_utils.tool_executor import ToolExecutor
 from holmes.core.tracing import DummySpan
 from holmes.utils.colors import AI_COLOR
+from holmes.utils.stream import StreamEvents, StreamMessage
+from holmes.core.todo_manager import (
+    get_todo_manager,
+)
 def format_tool_result_data(tool_result: StructuredToolResult) -> str:
@@ -94,12 +91,13 @@ def truncate_messages_to_fit_context(
     tool_call_messages = [message for message in messages if message["role"] == "tool"]
-    if message_size_without_tools >= (max_context_size - maximum_output_token):
+    reserved_for_output_tokens = min(maximum_output_token, MAX_OUTPUT_TOKEN_RESERVATION)
+    if message_size_without_tools >= (max_context_size - reserved_for_output_tokens):
         logging.error(
             f"The combined size of system_prompt and user_prompt ({message_size_without_tools} tokens) exceeds the model's context window for input."
         )
         raise Exception(
-            f"The combined size of system_prompt and user_prompt ({message_size_without_tools} tokens) exceeds the maximum context size of {max_context_size - maximum_output_token} tokens available for input."
+            f"The combined size of system_prompt and user_prompt ({message_size_without_tools} tokens) exceeds the maximum context size of {max_context_size - reserved_for_output_tokens} tokens available for input."
         )
     if len(tool_call_messages) == 0:
@@ -213,6 +211,7 @@ class ToolCallingLLM:
         self.max_steps = max_steps
         self.tracer = tracer
         self.llm = llm
+        self.investigation_id = str(uuid.uuid4())
     def prompt_call(
         self,
@@ -221,6 +220,7 @@ class ToolCallingLLM:
         post_process_prompt: Optional[str] = None,
         response_format: Optional[Union[dict, Type[BaseModel]]] = None,
         sections: Optional[InputSectionsDataType] = None,
+        trace_span=DummySpan(),
     ) -> LLMResult:
         messages = [
             {"role": "system", "content": system_prompt},
@@ -232,6 +232,7 @@ class ToolCallingLLM:
             response_format,
             user_prompt=user_prompt,
             sections=sections,
+            trace_span=trace_span,
         )
     def messages_call(
@@ -258,7 +259,9 @@ class ToolCallingLLM:
     ) -> LLMResult:
         perf_timing = PerformanceTiming("tool_calling_llm.call")
         tool_calls = []  # type: ignore
-        tools = self.tool_executor.get_all_tools_openai_format()
+        tools = self.tool_executor.get_all_tools_openai_format(
+            target_model=self.llm.model
+        )
         perf_timing.measure("get_all_tools_openai_format")
         max_steps = self.max_steps
         i = 0
@@ -413,20 +416,41 @@ class ToolCallingLLM:
         trace_span=DummySpan(),
         tool_number=None,
     ) -> ToolCallResult:
-        tool_name = tool_to_call.function.name
+        # Handle the union type - ChatCompletionMessageToolCall can be either
+        # ChatCompletionMessageFunctionToolCall (with 'function' field and type='function')
+        # or ChatCompletionMessageCustomToolCall (with 'custom' field and type='custom').
+        # We use hasattr to check for the 'function' attribute as it's more flexible
+        # and doesn't require importing the specific type.
+        if hasattr(tool_to_call, "function"):
+            tool_name = tool_to_call.function.name
+            tool_arguments = tool_to_call.function.arguments
+        else:
+            # This is a custom tool call - we don't support these currently
+            logging.error(f"Unsupported custom tool call: {tool_to_call}")
+            return ToolCallResult(
+                tool_call_id=tool_to_call.id,
+                tool_name="unknown",
+                description="NA",
+                result=StructuredToolResult(
+                    status=ToolResultStatus.ERROR,
+                    error="Custom tool calls are not supported",
+                    params=None,
+                ),
+            )
         tool_params = None
         try:
-            tool_params = json.loads(tool_to_call.function.arguments)
+            tool_params = json.loads(tool_arguments)
         except Exception:
             logging.warning(
-                f"Failed to parse arguments for tool: {tool_name}. args: {tool_to_call.function.arguments}"
+                f"Failed to parse arguments for tool: {tool_name}. args: {tool_arguments}"
             )
         tool_call_id = tool_to_call.id
         tool = self.tool_executor.get_tool_by_name(tool_name)
         if (not tool) or (tool_params is None):
             logging.warning(
-                f"Skipping tool execution for {tool_name}: args: {tool_to_call.function.arguments}"
+                f"Skipping tool execution for {tool_name}: args: {tool_arguments}"
             )
             return ToolCallResult(
                 tool_call_id=tool_call_id,
@@ -553,61 +577,39 @@ class ToolCallingLLM:
     def call_stream(
         self,
-        system_prompt: str,
+        system_prompt: str = "",
         user_prompt: Optional[str] = None,
-        stream: bool = False,
         response_format: Optional[Union[dict, Type[BaseModel]]] = None,
         sections: Optional[InputSectionsDataType] = None,
-        runbooks: Optional[List[str]] = None,
+        msgs: Optional[list[dict]] = None,
     ):
-        def stream_analysis(it, peek_chunk):
-            buffer = peek_chunk.get("data", "")
-            yield create_sse_message(peek_chunk.get("event"), peek_chunk.get("data"))
-            chunk_counter = 0
-            for chunk in it:
-                buffer += chunk
-                chunk_counter += 1
-                if chunk_counter == STREAM_CHUNKS_PER_PARSE:
-                    chunk_counter = 0
-                    yield create_sse_message(
-                        "ai_answer",
-                        {
-                            "sections": parse_markdown_into_sections_from_hash_sign(
-                                buffer
-                            )
-                            or {},
-                            "analysis": buffer,
-                            "instructions": runbooks or [],
-                        },
-                    )
-            yield create_sse_message(
-                "ai_answer_end",
-                {
-                    "sections": parse_markdown_into_sections_from_hash_sign(buffer)
-                    or {},
-                    "analysis": buffer,
-                    "instructions": runbooks or [],
-                },
-            )
-        messages = [
-            {"role": "system", "content": system_prompt},
-            {"role": "user", "content": user_prompt},
-        ]
+        """
+        This function DOES NOT call llm.completion(stream=true).
+        This function streams holmes one iteration at a time instead of waiting for all iterations to complete.
+        """
+        messages = []
+        if system_prompt:
+            messages.append({"role": "system", "content": system_prompt})
+        if user_prompt:
+            messages.append({"role": "user", "content": user_prompt})
+        if msgs:
+            messages.extend(msgs)
         perf_timing = PerformanceTiming("tool_calling_llm.call")
-        tools = self.tool_executor.get_all_tools_openai_format()
+        tool_calls: list[dict] = []
+        tools = self.tool_executor.get_all_tools_openai_format(
+            target_model=self.llm.model
+        )
         perf_timing.measure("get_all_tools_openai_format")
+        max_steps = self.max_steps
         i = 0
-        tool_calls: list[dict] = []
-        while i < self.max_steps:
+        while i < max_steps:
             i += 1
             perf_timing.measure(f"start iteration {i}")
             logging.debug(f"running iteration {i}")
-            tools = [] if i == self.max_steps - 1 else tools
-            tool_choice = None if tools == [] else "auto"
+            tools = None if i == max_steps else tools
+            tool_choice = "auto" if tools else None
             total_tokens = self.llm.count_tokens_for_message(messages)  # type: ignore
             max_context_size = self.llm.get_context_window_size()
@@ -623,90 +625,43 @@ class ToolCallingLLM:
             logging.debug(f"sending messages={messages}\n\ntools={tools}")
             try:
-                if stream:
-                    response = requests.post(
-                        f"{ROBUSTA_API_ENDPOINT}/chat/completions",
-                        json={
-                            "messages": parse_messages_tags(messages),  # type: ignore
-                            "tools": tools,
-                            "tool_choice": tool_choice,
-                            "temperature": TEMPERATURE,
-                            "response_format": response_format,
-                            "stream": True,
-                            "drop_param": True,
-                        },
-                        headers={"Authorization": f"Bearer {self.llm.api_key}"},  # type: ignore
-                        stream=True,
-                    )
-                    response.raise_for_status()
-                    it = response.iter_content(chunk_size=None, decode_unicode=True)
-                    peek_chunk = from_json(next(it))
-                    tools = peek_chunk.get("tool_calls")
-                    if not tools:
-                        yield from stream_analysis(it, peek_chunk)
-                        perf_timing.measure("llm.completion")
-                        return
-                    response_message = Message(**peek_chunk)
-                    tools_to_call = response_message.tool_calls
-                else:
-                    full_response = self.llm.completion(
-                        messages=parse_messages_tags(messages),  # type: ignore
-                        tools=tools,
-                        tool_choice=tool_choice,
-                        temperature=TEMPERATURE,
-                        response_format=response_format,
-                        stream=False,
-                        drop_params=True,
-                    )
-                    perf_timing.measure("llm.completion")
-                    response_message = full_response.choices[0].message  # type: ignore
-                    if response_message and response_format:
-                        # Litellm API is bugged. Stringify and parsing ensures all attrs of the choice are available.
-                        dict_response = json.loads(full_response.to_json())  # type: ignore
-                        incorrect_tool_call = is_response_an_incorrect_tool_call(
-                            sections, dict_response.get("choices", [{}])[0]
-                        )
-                        if incorrect_tool_call:
-                            logging.warning(
-                                "Detected incorrect tool call. Structured output will be disabled. This can happen on models that do not support tool calling. For Azure AI, make sure the model name contains 'gpt-4o'. To disable this holmes behaviour, set REQUEST_STRUCTURED_OUTPUT_FROM_LLM to `false`."
-                            )
-                            # disable structured output going forward and and retry
-                            response_format = None
-                            i -= 1
-                            continue
-                    tools_to_call = getattr(response_message, "tool_calls", None)
-                    if not tools_to_call:
-                        (text_response, sections) = process_response_into_sections(  # type: ignore
-                            response_message.content
-                        )
-                        yield create_sse_message(
-                            "ai_answer_end",
-                            {
-                                "sections": sections or {},
-                                "analysis": text_response,
-                                "instructions": runbooks or [],
-                            },
-                        )
-                        return
+                full_response = self.llm.completion(
+                    messages=parse_messages_tags(messages),  # type: ignore
+                    tools=tools,
+                    tool_choice=tool_choice,
+                    response_format=response_format,
+                    temperature=TEMPERATURE,
+                    stream=False,
+                    drop_params=True,
+                )
+                perf_timing.measure("llm.completion")
             # catch a known error that occurs with Azure and replace the error message with something more obvious to the user
             except BadRequestError as e:
-                logging.exception("Bad completion request")
                 if "Unrecognized request arguments supplied: tool_choice, tools" in str(
                     e
                 ):
                     raise Exception(
                         "The Azure model you chose is not supported. Model version 1106 and higher required."
+                    ) from e
+                else:
+                    raise
+            response_message = full_response.choices[0].message  # type: ignore
+            if response_message and response_format:
+                # Litellm API is bugged. Stringify and parsing ensures all attrs of the choice are available.
+                dict_response = json.loads(full_response.to_json())  # type: ignore
+                incorrect_tool_call = is_response_an_incorrect_tool_call(
+                    sections, dict_response.get("choices", [{}])[0]
+                )
+                if incorrect_tool_call:
+                    logging.warning(
+                        "Detected incorrect tool call. Structured output will be disabled. This can happen on models that do not support tool calling. For Azure AI, make sure the model name contains 'gpt-4o'. To disable this holmes behaviour, set REQUEST_STRUCTURED_OUTPUT_FROM_LLM to `false`."
                     )
-                raise e
-            except Exception:
-                logging.exception("Completion request exception")
-                raise
+                    # disable structured output going forward and and retry
+                    response_format = None
+                    max_steps = max_steps + 1
+                    continue
             messages.append(
                 response_message.model_dump(
@@ -714,6 +669,22 @@ class ToolCallingLLM:
                 )
             )
+            tools_to_call = getattr(response_message, "tool_calls", None)
+            if not tools_to_call:
+                yield StreamMessage(
+                    event=StreamEvents.ANSWER_END,
+                    data={"content": response_message.content, "messages": messages},
+                )
+                return
+            reasoning = getattr(response_message, "reasoning_content", None)
+            message = response_message.content
+            if reasoning or message:
+                yield StreamMessage(
+                    event=StreamEvents.AI_MESSAGE,
+                    data={"content": message, "reasoning": reasoning},
+                )
             perf_timing.measure("pre-tool-calls")
             with concurrent.futures.ThreadPoolExecutor(max_workers=16) as executor:
                 futures = []
@@ -727,8 +698,9 @@ class ToolCallingLLM:
                             tool_number=tool_index,
                         )
                     )
-                    yield create_sse_message(
-                        "start_tool_calling", {"tool_name": t.function.name, "id": t.id}
+                    yield StreamMessage(
+                        event=StreamEvents.START_TOOL,
+                        data={"tool_name": t.function.name, "id": t.id},
                     )
                 for future in concurrent.futures.as_completed(futures):
@@ -739,12 +711,9 @@ class ToolCallingLLM:
                     perf_timing.measure(f"tool completed {tool_call_result.tool_name}")
-                    streaming_result_dict = (
-                        tool_call_result.as_streaming_tool_result_response()
-                    )
-                    yield create_sse_message(
-                        "tool_calling_result", streaming_result_dict
+                    yield StreamMessage(
+                        event=StreamEvents.TOOL_RESULT,
+                        data=tool_call_result.as_streaming_tool_result_response(),
                     )
         raise Exception(
@@ -782,6 +751,7 @@ class IssueInvestigator(ToolCallingLLM):
         global_instructions: Optional[Instructions] = None,
         post_processing_prompt: Optional[str] = None,
         sections: Optional[InputSectionsDataType] = None,
+        trace_span=DummySpan(),
     ) -> LLMResult:
         runbooks = self.runbook_manager.get_instructions_for_issue(issue)
@@ -823,6 +793,9 @@ class IssueInvestigator(ToolCallingLLM):
                 "[bold]No runbooks found for this issue. Using default behaviour. (Add runbooks to guide the investigation.)[/bold]"
             )
+        todo_manager = get_todo_manager()
+        todo_context = todo_manager.format_tasks_for_prompt(self.investigation_id)
         system_prompt = load_and_render_prompt(
             prompt,
             {
@@ -831,6 +804,8 @@ class IssueInvestigator(ToolCallingLLM):
                 "structured_output": request_structured_output_from_llm,
                 "toolsets": self.tool_executor.toolsets,
                 "cluster_name": self.cluster_name,
+                "todo_list": todo_context,
+                "investigation_id": self.investigation_id,
             },
         )
@@ -865,10 +840,7 @@ class IssueInvestigator(ToolCallingLLM):
             post_processing_prompt,
             response_format=response_format,
             sections=sections,
+            trace_span=trace_span,
         )
         res.instructions = runbooks
         return res
-def create_sse_message(event_type: str, data: dict = {}):
-    return f"event: {event_type}\ndata: {json.dumps(data)}\n\n"

holmes/core/tools.py CHANGED Viewed

@@ -51,6 +51,7 @@ class StructuredToolResult(BaseModel):
     url: Optional[str] = None
     invocation: Optional[str] = None
     params: Optional[Dict] = None
+    icon_url: Optional[str] = None
     def get_stringified_data(self) -> str:
         if self.data is None:
@@ -121,6 +122,8 @@ class ToolParameter(BaseModel):
     description: Optional[str] = None
     type: str = "string"
     required: bool = True
+    properties: Optional[Dict[str, "ToolParameter"]] = None  # For object types
+    items: Optional["ToolParameter"] = None  # For array item schemas
 class Tool(ABC, BaseModel):
@@ -131,12 +134,17 @@ class Tool(ABC, BaseModel):
         None  # templated string to show to the user describing this tool invocation (not seen by llm)
     )
     additional_instructions: Optional[str] = None
+    icon_url: Optional[str] = Field(
+        default=None,
+        description="The URL of the icon for the tool, if None will get toolset icon",
+    )
-    def get_openai_format(self):
+    def get_openai_format(self, target_model: str):
         return format_tool_to_open_ai_standard(
             tool_name=self.name,
             tool_description=self.description,
             tool_parameters=self.parameters,
+            target_model=target_model,
         )
     def invoke(
@@ -148,6 +156,7 @@ class Tool(ABC, BaseModel):
         )
         start_time = time.time()
         result = self._invoke(params)
+        result.icon_url = self.icon_url
         elapsed = time.time() - start_time
         output_str = (
             result.get_stringified_data()

holmes/core/tools_utils/tool_executor.py CHANGED Viewed

@@ -38,6 +38,8 @@ class ToolExecutor:
         self.tools_by_name: dict[str, Tool] = {}
         for ts in toolsets_by_name.values():
             for tool in ts.tools:
+                if tool.icon_url is None and ts.icon_url is not None:
+                    tool.icon_url = ts.icon_url
                 if tool.name in self.tools_by_name:
                     logging.warning(
                         f"Overriding existing tool '{tool.name} with new tool from {ts.name} at {ts.path}'!"
@@ -62,5 +64,8 @@ class ToolExecutor:
         return None
     @sentry_sdk.trace
-    def get_all_tools_openai_format(self):
-        return [tool.get_openai_format() for tool in self.tools_by_name.values()]
+    def get_all_tools_openai_format(self, target_model: str):
+        return [
+            tool.get_openai_format(target_model=target_model)
+            for tool in self.tools_by_name.values()
+        ]

holmes/core/tools_utils/toolset_utils.py CHANGED Viewed

@@ -16,12 +16,17 @@ def filter_out_default_logging_toolset(toolsets: list[Toolset]) -> list[Toolset]
     All other types of toolsets are included as is.
     """
-    logging_toolsets: list[BasePodLoggingToolset] = []
+    logging_toolsets: list[Toolset] = []
     final_toolsets: list[Toolset] = []
     for ts in toolsets:
+        toolset_type = (
+            ts.original_toolset_type
+            if hasattr(ts, "original_toolset_type")
+            else type(ts)
+        )
         if (
-            isinstance(ts, BasePodLoggingToolset)
+            issubclass(toolset_type, BasePodLoggingToolset)
             and ts.status == ToolsetStatusEnum.ENABLED
         ):
             logging_toolsets.append(ts)

holmes/core/tracing.py CHANGED Viewed

@@ -91,10 +91,11 @@ class SpanType(Enum):
     """Standard span types for tracing categorization."""
     LLM = "llm"
-    TOOL = "tool"
-    TASK = "task"
     SCORE = "score"
+    FUNCTION = "function"
     EVAL = "eval"
+    TASK = "task"
+    TOOL = "tool"
 class DummySpan:

holmes/interactive.py CHANGED Viewed

@@ -1002,6 +1002,7 @@ def run_interactive_loop(
                     user_input,
                     include_files,
                     ai.tool_executor,
+                    ai.investigation_id,
                     runbooks,
                     system_prompt_additions,
                 )

holmes/main.py CHANGED Viewed

@@ -94,7 +94,7 @@ opt_custom_runbooks: Optional[List[Path]] = typer.Option(
     help="Path to a custom runbooks (can specify -r multiple times to add multiple runbooks)",
 )
 opt_max_steps: Optional[int] = typer.Option(
-    10,
+    40,
     "--max-steps",
     help="Advanced. Maximum number of steps the LLM can take to investigate the issue",
 )
@@ -302,6 +302,7 @@ def ask(
         prompt,  # type: ignore
         include_file,
         ai.tool_executor,
+        ai.investigation_id,
         config.get_runbook_catalog(),
         system_prompt_additions,
     )

holmes/plugins/prompts/__init__.py CHANGED Viewed

@@ -43,6 +43,12 @@ def load_and_render_prompt(prompt: str, context: Optional[dict] = None) -> str:
         context = {}
     now = datetime.now(timezone.utc)
-    context.update({"now": f"{now}", "now_timestamp_seconds": int(now.timestamp())})
+    context.update(
+        {
+            "now": f"{now}",
+            "now_timestamp_seconds": int(now.timestamp()),
+            "current_year": now.year,
+        }
+    )
     return template.render(**context)

holmes/plugins/prompts/_current_date_time.jinja2 CHANGED Viewed

	@@ -1 +1,2 @@
1 1	When querying tools, always query for the relevant time period. The current UTC date and time are {{ now }}. The current UTC timestamp in seconds is {{ now_timestamp_seconds }}.
2	+ When users mention dates without years (e.g., 'March 25th', 'last May', etc.), assume they either mean the current year ({{ current_year }}) unless context suggests otherwise.

holmesgpt 0.12.5__py3-none-any.whl → 0.13.0__py3-none-any.whl

Potentially problematic release.

holmesgpt 0.12.5py3-none-any.whl → 0.13.0py3-none-any.whl