PyPI - holmesgpt - Versions diffs - 0.12.6__py3-none-any.whl → 0.13.1__py3-none-any.whl - Mend

holmesgpt 0.12.6py3-none-any.whl → 0.13.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of holmesgpt might be problematic. Click here for more details.

Files changed (125) hide show

holmes/__init__.py +1 -1
holmes/clients/robusta_client.py +19 -1
holmes/common/env_vars.py +17 -0
holmes/config.py +69 -9
holmes/core/conversations.py +11 -0
holmes/core/investigation.py +16 -3
holmes/core/investigation_structured_output.py +12 -0
holmes/core/llm.py +13 -1
holmes/core/models.py +9 -1
holmes/core/openai_formatting.py +72 -12
holmes/core/prompt.py +13 -0
holmes/core/supabase_dal.py +3 -0
holmes/core/todo_manager.py +88 -0
holmes/core/tool_calling_llm.py +230 -157
holmes/core/tools.py +10 -1
holmes/core/tools_utils/tool_executor.py +7 -2
holmes/core/tools_utils/toolset_utils.py +7 -2
holmes/core/toolset_manager.py +1 -5
holmes/core/tracing.py +4 -3
holmes/interactive.py +1 -0
holmes/main.py +9 -2
holmes/plugins/prompts/__init__.py +7 -1
holmes/plugins/prompts/_current_date_time.jinja2 +1 -0
holmes/plugins/prompts/_default_log_prompt.jinja2 +4 -2
holmes/plugins/prompts/_fetch_logs.jinja2 +10 -1
holmes/plugins/prompts/_general_instructions.jinja2 +14 -0
holmes/plugins/prompts/_permission_errors.jinja2 +1 -1
holmes/plugins/prompts/_toolsets_instructions.jinja2 +4 -4
holmes/plugins/prompts/generic_ask.jinja2 +4 -3
holmes/plugins/prompts/investigation_procedure.jinja2 +210 -0
holmes/plugins/prompts/kubernetes_workload_ask.jinja2 +2 -0
holmes/plugins/runbooks/CLAUDE.md +85 -0
holmes/plugins/runbooks/README.md +24 -0
holmes/plugins/toolsets/__init__.py +19 -6
holmes/plugins/toolsets/atlas_mongodb/mongodb_atlas.py +27 -0
holmes/plugins/toolsets/azure_sql/tools/analyze_connection_failures.py +2 -2
holmes/plugins/toolsets/azure_sql/tools/analyze_database_connections.py +2 -1
holmes/plugins/toolsets/azure_sql/tools/analyze_database_health_status.py +3 -1
holmes/plugins/toolsets/azure_sql/tools/analyze_database_performance.py +2 -1
holmes/plugins/toolsets/azure_sql/tools/analyze_database_storage.py +2 -1
holmes/plugins/toolsets/azure_sql/tools/get_active_alerts.py +3 -1
holmes/plugins/toolsets/azure_sql/tools/get_slow_queries.py +2 -1
holmes/plugins/toolsets/azure_sql/tools/get_top_cpu_queries.py +2 -1
holmes/plugins/toolsets/azure_sql/tools/get_top_data_io_queries.py +2 -1
holmes/plugins/toolsets/azure_sql/tools/get_top_log_io_queries.py +2 -1
holmes/plugins/toolsets/bash/argocd/__init__.py +65 -0
holmes/plugins/toolsets/bash/argocd/constants.py +120 -0
holmes/plugins/toolsets/bash/aws/__init__.py +66 -0
holmes/plugins/toolsets/bash/aws/constants.py +529 -0
holmes/plugins/toolsets/bash/azure/__init__.py +56 -0
holmes/plugins/toolsets/bash/azure/constants.py +339 -0
holmes/plugins/toolsets/bash/bash_instructions.jinja2 +6 -7
holmes/plugins/toolsets/bash/bash_toolset.py +47 -13
holmes/plugins/toolsets/bash/common/bash_command.py +131 -0
holmes/plugins/toolsets/bash/common/stringify.py +14 -1
holmes/plugins/toolsets/bash/common/validators.py +91 -0
holmes/plugins/toolsets/bash/docker/__init__.py +59 -0
holmes/plugins/toolsets/bash/docker/constants.py +255 -0
holmes/plugins/toolsets/bash/helm/__init__.py +61 -0
holmes/plugins/toolsets/bash/helm/constants.py +92 -0
holmes/plugins/toolsets/bash/kubectl/__init__.py +80 -79
holmes/plugins/toolsets/bash/kubectl/constants.py +0 -14
holmes/plugins/toolsets/bash/kubectl/kubectl_describe.py +38 -56
holmes/plugins/toolsets/bash/kubectl/kubectl_events.py +28 -76
holmes/plugins/toolsets/bash/kubectl/kubectl_get.py +39 -99
holmes/plugins/toolsets/bash/kubectl/kubectl_logs.py +34 -15
holmes/plugins/toolsets/bash/kubectl/kubectl_run.py +1 -1
holmes/plugins/toolsets/bash/kubectl/kubectl_top.py +38 -77
holmes/plugins/toolsets/bash/parse_command.py +106 -32
holmes/plugins/toolsets/bash/utilities/__init__.py +0 -0
holmes/plugins/toolsets/bash/utilities/base64_util.py +12 -0
holmes/plugins/toolsets/bash/utilities/cut.py +12 -0
holmes/plugins/toolsets/bash/utilities/grep/__init__.py +10 -0
holmes/plugins/toolsets/bash/utilities/head.py +12 -0
holmes/plugins/toolsets/bash/utilities/jq.py +79 -0
holmes/plugins/toolsets/bash/utilities/sed.py +164 -0
holmes/plugins/toolsets/bash/utilities/sort.py +15 -0
holmes/plugins/toolsets/bash/utilities/tail.py +12 -0
holmes/plugins/toolsets/bash/utilities/tr.py +57 -0
holmes/plugins/toolsets/bash/utilities/uniq.py +12 -0
holmes/plugins/toolsets/bash/utilities/wc.py +12 -0
holmes/plugins/toolsets/coralogix/api.py +6 -6
holmes/plugins/toolsets/coralogix/toolset_coralogix_logs.py +7 -1
holmes/plugins/toolsets/datadog/datadog_api.py +20 -8
holmes/plugins/toolsets/datadog/datadog_metrics_instructions.jinja2 +8 -1
holmes/plugins/toolsets/datadog/datadog_rds_instructions.jinja2 +82 -0
holmes/plugins/toolsets/datadog/toolset_datadog_logs.py +12 -5
holmes/plugins/toolsets/datadog/toolset_datadog_metrics.py +20 -11
holmes/plugins/toolsets/datadog/toolset_datadog_rds.py +735 -0
holmes/plugins/toolsets/datadog/toolset_datadog_traces.py +18 -11
holmes/plugins/toolsets/git.py +15 -15
holmes/plugins/toolsets/grafana/grafana_api.py +12 -1
holmes/plugins/toolsets/grafana/toolset_grafana.py +5 -1
holmes/plugins/toolsets/grafana/toolset_grafana_loki.py +9 -4
holmes/plugins/toolsets/grafana/toolset_grafana_tempo.py +12 -5
holmes/plugins/toolsets/internet/internet.py +2 -1
holmes/plugins/toolsets/internet/notion.py +2 -1
holmes/plugins/toolsets/investigator/__init__.py +0 -0
holmes/plugins/toolsets/investigator/core_investigation.py +157 -0
holmes/plugins/toolsets/investigator/investigator_instructions.jinja2 +253 -0
holmes/plugins/toolsets/investigator/model.py +15 -0
holmes/plugins/toolsets/kafka.py +14 -7
holmes/plugins/toolsets/kubernetes_logs.py +454 -25
holmes/plugins/toolsets/logging_utils/logging_api.py +115 -55
holmes/plugins/toolsets/mcp/toolset_mcp.py +1 -1
holmes/plugins/toolsets/newrelic.py +8 -3
holmes/plugins/toolsets/opensearch/opensearch.py +8 -4
holmes/plugins/toolsets/opensearch/opensearch_logs.py +9 -2
holmes/plugins/toolsets/opensearch/opensearch_traces.py +6 -2
holmes/plugins/toolsets/prometheus/prometheus.py +179 -44
holmes/plugins/toolsets/rabbitmq/toolset_rabbitmq.py +8 -2
holmes/plugins/toolsets/robusta/robusta.py +4 -4
holmes/plugins/toolsets/runbook/runbook_fetcher.py +6 -5
holmes/plugins/toolsets/servicenow/servicenow.py +18 -3
holmes/plugins/toolsets/utils.py +8 -1
holmes/utils/console/logging.py +6 -1
holmes/utils/llms.py +20 -0
holmes/utils/stream.py +90 -0
{holmesgpt-0.12.6.dist-info → holmesgpt-0.13.1.dist-info}/METADATA +47 -34
{holmesgpt-0.12.6.dist-info → holmesgpt-0.13.1.dist-info}/RECORD +123 -91
holmes/plugins/toolsets/bash/grep/__init__.py +0 -52
holmes/utils/robusta.py +0 -9
{holmesgpt-0.12.6.dist-info → holmesgpt-0.13.1.dist-info}/LICENSE.txt +0 -0
{holmesgpt-0.12.6.dist-info → holmesgpt-0.13.1.dist-info}/WHEEL +0 -0
{holmesgpt-0.12.6.dist-info → holmesgpt-0.13.1.dist-info}/entry_points.txt +0 -0

holmes/core/tool_calling_llm.py CHANGED Viewed

@@ -2,32 +2,25 @@ import concurrent.futures
 import json
 import logging
 import textwrap
+import uuid
 from typing import Dict, List, Optional, Type, Union
-import requests  # type: ignore
 import sentry_sdk
-from litellm.types.utils import Message
 from openai import BadRequestError
 from openai.types.chat.chat_completion_message_tool_call import (
     ChatCompletionMessageToolCall,
 )
-from pydantic import BaseModel
-from pydantic_core import from_json
+from pydantic import BaseModel, Field
 from rich.console import Console
-from holmes.common.env_vars import (
-    ROBUSTA_API_ENDPOINT,
-    STREAM_CHUNKS_PER_PARSE,
-    TEMPERATURE,
-)
+from holmes.common.env_vars import TEMPERATURE, MAX_OUTPUT_TOKEN_RESERVATION
 from holmes.core.investigation_structured_output import (
     DEFAULT_SECTIONS,
     REQUEST_STRUCTURED_OUTPUT_FROM_LLM,
     InputSectionsDataType,
     get_output_format_for_investigation,
     is_response_an_incorrect_tool_call,
-    parse_markdown_into_sections_from_hash_sign,
-    process_response_into_sections,
 )
 from holmes.core.issue import Issue
 from holmes.core.llm import LLM
@@ -45,6 +38,82 @@ from holmes.utils.tags import format_tags_in_string, parse_messages_tags
 from holmes.core.tools_utils.tool_executor import ToolExecutor
 from holmes.core.tracing import DummySpan
 from holmes.utils.colors import AI_COLOR
+from holmes.utils.stream import StreamEvents, StreamMessage
+from holmes.core.todo_manager import (
+    get_todo_manager,
+)
+# Create a named logger for cost tracking
+cost_logger = logging.getLogger("holmes.costs")
+class LLMCosts(BaseModel):
+    """Tracks cost and token usage for LLM calls."""
+    total_cost: float = 0.0
+    total_tokens: int = 0
+    prompt_tokens: int = 0
+    completion_tokens: int = 0
+def _extract_cost_from_response(full_response) -> float:
+    """Extract cost value from LLM response.
+    Args:
+        full_response: The raw LLM response object
+    Returns:
+        The cost as a float, or 0.0 if not available
+    """
+    try:
+        cost_value = (
+            full_response._hidden_params.get("response_cost", 0)
+            if hasattr(full_response, "_hidden_params")
+            else 0
+        )
+        # Ensure cost is a float
+        return float(cost_value) if cost_value is not None else 0.0
+    except Exception:
+        return 0.0
+def _process_cost_info(
+    full_response, costs: Optional[LLMCosts] = None, log_prefix: str = "LLM call"
+) -> None:
+    """Process cost and token information from LLM response.
+    Logs the cost information and optionally accumulates it into a costs object.
+    Args:
+        full_response: The raw LLM response object
+        costs: Optional LLMCosts object to accumulate costs into
+        log_prefix: Prefix for logging messages (e.g., "LLM call", "Post-processing")
+    """
+    try:
+        cost = _extract_cost_from_response(full_response)
+        usage = getattr(full_response, "usage", {})
+        if usage:
+            prompt_toks = usage.get("prompt_tokens", 0)
+            completion_toks = usage.get("completion_tokens", 0)
+            total_toks = usage.get("total_tokens", 0)
+            cost_logger.debug(
+                f"{log_prefix} cost: ${cost:.6f} | Tokens: {prompt_toks} prompt + {completion_toks} completion = {total_toks} total"
+            )
+            # Accumulate costs and tokens if costs object provided
+            if costs:
+                costs.total_cost += cost
+                costs.prompt_tokens += prompt_toks
+                costs.completion_tokens += completion_toks
+                costs.total_tokens += total_toks
+        elif cost > 0:
+            cost_logger.debug(
+                f"{log_prefix} cost: ${cost:.6f} | Token usage not available"
+            )
+            if costs:
+                costs.total_cost += cost
+    except Exception as e:
+        logging.debug(f"Could not extract cost information: {e}")
 def format_tool_result_data(tool_result: StructuredToolResult) -> str:
@@ -94,12 +163,13 @@ def truncate_messages_to_fit_context(
     tool_call_messages = [message for message in messages if message["role"] == "tool"]
-    if message_size_without_tools >= (max_context_size - maximum_output_token):
+    reserved_for_output_tokens = min(maximum_output_token, MAX_OUTPUT_TOKEN_RESERVATION)
+    if message_size_without_tools >= (max_context_size - reserved_for_output_tokens):
         logging.error(
             f"The combined size of system_prompt and user_prompt ({message_size_without_tools} tokens) exceeds the model's context window for input."
         )
         raise Exception(
-            f"The combined size of system_prompt and user_prompt ({message_size_without_tools} tokens) exceeds the maximum context size of {max_context_size - maximum_output_token} tokens available for input."
+            f"The combined size of system_prompt and user_prompt ({message_size_without_tools} tokens) exceeds the maximum context size of {max_context_size - reserved_for_output_tokens} tokens available for input."
         )
     if len(tool_call_messages) == 0:
@@ -188,11 +258,11 @@ class ToolCallResult(BaseModel):
         }
-class LLMResult(BaseModel):
+class LLMResult(LLMCosts):
     tool_calls: Optional[List[ToolCallResult]] = None
     result: Optional[str] = None
     unprocessed_result: Optional[str] = None
-    instructions: List[str] = []
+    instructions: List[str] = Field(default_factory=list)
     # TODO: clean up these two
     prompt: Optional[str] = None
     messages: Optional[List[dict]] = None
@@ -213,6 +283,7 @@ class ToolCallingLLM:
         self.max_steps = max_steps
         self.tracer = tracer
         self.llm = llm
+        self.investigation_id = str(uuid.uuid4())
     def prompt_call(
         self,
@@ -221,6 +292,7 @@ class ToolCallingLLM:
         post_process_prompt: Optional[str] = None,
         response_format: Optional[Union[dict, Type[BaseModel]]] = None,
         sections: Optional[InputSectionsDataType] = None,
+        trace_span=DummySpan(),
     ) -> LLMResult:
         messages = [
             {"role": "system", "content": system_prompt},
@@ -232,6 +304,7 @@ class ToolCallingLLM:
             response_format,
             user_prompt=user_prompt,
             sections=sections,
+            trace_span=trace_span,
         )
     def messages_call(
@@ -258,7 +331,11 @@ class ToolCallingLLM:
     ) -> LLMResult:
         perf_timing = PerformanceTiming("tool_calling_llm.call")
         tool_calls = []  # type: ignore
-        tools = self.tool_executor.get_all_tools_openai_format()
+        costs = LLMCosts()
+        tools = self.tool_executor.get_all_tools_openai_format(
+            target_model=self.llm.model
+        )
         perf_timing.measure("get_all_tools_openai_format")
         max_steps = self.max_steps
         i = 0
@@ -296,6 +373,9 @@ class ToolCallingLLM:
                 )
                 logging.debug(f"got response {full_response.to_json()}")  # type: ignore
+                # Extract and accumulate cost information
+                _process_cost_info(full_response, costs, "LLM call")
                 perf_timing.measure("llm.completion")
             # catch a known error that occurs with Azure and replace the error message with something more obvious to the user
             except BadRequestError as e:
@@ -349,11 +429,14 @@ class ToolCallingLLM:
                 if post_process_prompt and user_prompt:
                     logging.info("Running post processing on investigation.")
                     raw_response = text_response
-                    post_processed_response = self._post_processing_call(
-                        prompt=user_prompt,
-                        investigation=raw_response,
-                        user_prompt=post_process_prompt,
+                    post_processed_response, post_processing_cost = (
+                        self._post_processing_call(
+                            prompt=user_prompt,
+                            investigation=raw_response,
+                            user_prompt=post_process_prompt,
+                        )
                     )
+                    costs.total_cost += post_processing_cost
                     perf_timing.end(f"- completed in {i} iterations -")
                     return LLMResult(
@@ -362,6 +445,7 @@ class ToolCallingLLM:
                         tool_calls=tool_calls,
                         prompt=json.dumps(messages, indent=2),
                         messages=messages,
+                        **costs.model_dump(),  # Include all cost fields
                     )
                 perf_timing.end(f"- completed in {i} iterations -")
@@ -370,6 +454,7 @@ class ToolCallingLLM:
                     tool_calls=tool_calls,
                     prompt=json.dumps(messages, indent=2),
                     messages=messages,
+                    **costs.model_dump(),  # Include all cost fields
                 )
             if text_response and text_response.strip():
@@ -400,6 +485,9 @@ class ToolCallingLLM:
                     perf_timing.measure(f"tool completed {tool_call_result.tool_name}")
+                # Update the tool number offset for the next iteration
+                tool_number_offset += len(tools_to_call)
                 # Add a blank line after all tools in this batch complete
                 if tools_to_call:
                     logging.info("")
@@ -413,20 +501,41 @@ class ToolCallingLLM:
         trace_span=DummySpan(),
         tool_number=None,
     ) -> ToolCallResult:
-        tool_name = tool_to_call.function.name
+        # Handle the union type - ChatCompletionMessageToolCall can be either
+        # ChatCompletionMessageFunctionToolCall (with 'function' field and type='function')
+        # or ChatCompletionMessageCustomToolCall (with 'custom' field and type='custom').
+        # We use hasattr to check for the 'function' attribute as it's more flexible
+        # and doesn't require importing the specific type.
+        if hasattr(tool_to_call, "function"):
+            tool_name = tool_to_call.function.name
+            tool_arguments = tool_to_call.function.arguments
+        else:
+            # This is a custom tool call - we don't support these currently
+            logging.error(f"Unsupported custom tool call: {tool_to_call}")
+            return ToolCallResult(
+                tool_call_id=tool_to_call.id,
+                tool_name="unknown",
+                description="NA",
+                result=StructuredToolResult(
+                    status=ToolResultStatus.ERROR,
+                    error="Custom tool calls are not supported",
+                    params=None,
+                ),
+            )
         tool_params = None
         try:
-            tool_params = json.loads(tool_to_call.function.arguments)
+            tool_params = json.loads(tool_arguments)
         except Exception:
             logging.warning(
-                f"Failed to parse arguments for tool: {tool_name}. args: {tool_to_call.function.arguments}"
+                f"Failed to parse arguments for tool: {tool_name}. args: {tool_arguments}"
             )
         tool_call_id = tool_to_call.id
         tool = self.tool_executor.get_tool_by_name(tool_name)
         if (not tool) or (tool_params is None):
             logging.warning(
-                f"Skipping tool execution for {tool_name}: args: {tool_to_call.function.arguments}"
+                f"Skipping tool execution for {tool_name}: args: {tool_arguments}"
             )
             return ToolCallResult(
                 tool_call_id=tool_call_id,
@@ -516,7 +625,7 @@ class ToolCallingLLM:
         investigation,
         user_prompt: Optional[str] = None,
         system_prompt: str = "You are an AI assistant summarizing Kubernetes issues.",
-    ) -> Optional[str]:
+    ) -> tuple[Optional[str], float]:
         try:
             user_prompt = ToolCallingLLM.__load_post_processing_user_prompt(
                 prompt, investigation, user_prompt
@@ -535,10 +644,18 @@ class ToolCallingLLM:
             ]
             full_response = self.llm.completion(messages=messages, temperature=0)
             logging.debug(f"Post processing response {full_response}")
-            return full_response.choices[0].message.content  # type: ignore
+            # Extract and log cost information for post-processing
+            post_processing_cost = _extract_cost_from_response(full_response)
+            if post_processing_cost > 0:
+                cost_logger.debug(
+                    f"Post-processing LLM cost: ${post_processing_cost:.6f}"
+                )
+            return full_response.choices[0].message.content, post_processing_cost  # type: ignore
         except Exception:
             logging.exception("Failed to run post processing", exc_info=True)
-            return investigation
+            return investigation, 0.0
     @sentry_sdk.trace
     def truncate_messages_to_fit_context(
@@ -553,61 +670,40 @@ class ToolCallingLLM:
     def call_stream(
         self,
-        system_prompt: str,
+        system_prompt: str = "",
         user_prompt: Optional[str] = None,
-        stream: bool = False,
         response_format: Optional[Union[dict, Type[BaseModel]]] = None,
         sections: Optional[InputSectionsDataType] = None,
-        runbooks: Optional[List[str]] = None,
+        msgs: Optional[list[dict]] = None,
     ):
-        def stream_analysis(it, peek_chunk):
-            buffer = peek_chunk.get("data", "")
-            yield create_sse_message(peek_chunk.get("event"), peek_chunk.get("data"))
-            chunk_counter = 0
-            for chunk in it:
-                buffer += chunk
-                chunk_counter += 1
-                if chunk_counter == STREAM_CHUNKS_PER_PARSE:
-                    chunk_counter = 0
-                    yield create_sse_message(
-                        "ai_answer",
-                        {
-                            "sections": parse_markdown_into_sections_from_hash_sign(
-                                buffer
-                            )
-                            or {},
-                            "analysis": buffer,
-                            "instructions": runbooks or [],
-                        },
-                    )
-            yield create_sse_message(
-                "ai_answer_end",
-                {
-                    "sections": parse_markdown_into_sections_from_hash_sign(buffer)
-                    or {},
-                    "analysis": buffer,
-                    "instructions": runbooks or [],
-                },
-            )
-        messages = [
-            {"role": "system", "content": system_prompt},
-            {"role": "user", "content": user_prompt},
-        ]
+        """
+        This function DOES NOT call llm.completion(stream=true).
+        This function streams holmes one iteration at a time instead of waiting for all iterations to complete.
+        """
+        messages = []
+        if system_prompt:
+            messages.append({"role": "system", "content": system_prompt})
+        if user_prompt:
+            messages.append({"role": "user", "content": user_prompt})
+        if msgs:
+            messages.extend(msgs)
         perf_timing = PerformanceTiming("tool_calling_llm.call")
-        tools = self.tool_executor.get_all_tools_openai_format()
+        tool_calls: list[dict] = []
+        tools = self.tool_executor.get_all_tools_openai_format(
+            target_model=self.llm.model
+        )
         perf_timing.measure("get_all_tools_openai_format")
+        max_steps = self.max_steps
         i = 0
-        tool_calls: list[dict] = []
-        while i < self.max_steps:
+        tool_number_offset = 0
+        while i < max_steps:
             i += 1
             perf_timing.measure(f"start iteration {i}")
             logging.debug(f"running iteration {i}")
-            tools = [] if i == self.max_steps - 1 else tools
-            tool_choice = None if tools == [] else "auto"
+            tools = None if i == max_steps else tools
+            tool_choice = "auto" if tools else None
             total_tokens = self.llm.count_tokens_for_message(messages)  # type: ignore
             max_context_size = self.llm.get_context_window_size()
@@ -623,90 +719,47 @@ class ToolCallingLLM:
             logging.debug(f"sending messages={messages}\n\ntools={tools}")
             try:
-                if stream:
-                    response = requests.post(
-                        f"{ROBUSTA_API_ENDPOINT}/chat/completions",
-                        json={
-                            "messages": parse_messages_tags(messages),  # type: ignore
-                            "tools": tools,
-                            "tool_choice": tool_choice,
-                            "temperature": TEMPERATURE,
-                            "response_format": response_format,
-                            "stream": True,
-                            "drop_param": True,
-                        },
-                        headers={"Authorization": f"Bearer {self.llm.api_key}"},  # type: ignore
-                        stream=True,
-                    )
-                    response.raise_for_status()
-                    it = response.iter_content(chunk_size=None, decode_unicode=True)
-                    peek_chunk = from_json(next(it))
-                    tools = peek_chunk.get("tool_calls")
-                    if not tools:
-                        yield from stream_analysis(it, peek_chunk)
-                        perf_timing.measure("llm.completion")
-                        return
-                    response_message = Message(**peek_chunk)
-                    tools_to_call = response_message.tool_calls
-                else:
-                    full_response = self.llm.completion(
-                        messages=parse_messages_tags(messages),  # type: ignore
-                        tools=tools,
-                        tool_choice=tool_choice,
-                        temperature=TEMPERATURE,
-                        response_format=response_format,
-                        stream=False,
-                        drop_params=True,
-                    )
-                    perf_timing.measure("llm.completion")
-                    response_message = full_response.choices[0].message  # type: ignore
-                    if response_message and response_format:
-                        # Litellm API is bugged. Stringify and parsing ensures all attrs of the choice are available.
-                        dict_response = json.loads(full_response.to_json())  # type: ignore
-                        incorrect_tool_call = is_response_an_incorrect_tool_call(
-                            sections, dict_response.get("choices", [{}])[0]
-                        )
+                full_response = self.llm.completion(
+                    messages=parse_messages_tags(messages),  # type: ignore
+                    tools=tools,
+                    tool_choice=tool_choice,
+                    response_format=response_format,
+                    temperature=TEMPERATURE,
+                    stream=False,
+                    drop_params=True,
+                )
-                        if incorrect_tool_call:
-                            logging.warning(
-                                "Detected incorrect tool call. Structured output will be disabled. This can happen on models that do not support tool calling. For Azure AI, make sure the model name contains 'gpt-4o'. To disable this holmes behaviour, set REQUEST_STRUCTURED_OUTPUT_FROM_LLM to `false`."
-                            )
-                            # disable structured output going forward and and retry
-                            response_format = None
-                            i -= 1
-                            continue
-                    tools_to_call = getattr(response_message, "tool_calls", None)
-                    if not tools_to_call:
-                        (text_response, sections) = process_response_into_sections(  # type: ignore
-                            response_message.content
-                        )
+                # Log cost information for this iteration (no accumulation in streaming)
+                _process_cost_info(full_response, log_prefix="LLM iteration")
-                        yield create_sse_message(
-                            "ai_answer_end",
-                            {
-                                "sections": sections or {},
-                                "analysis": text_response,
-                                "instructions": runbooks or [],
-                            },
-                        )
-                        return
+                perf_timing.measure("llm.completion")
             # catch a known error that occurs with Azure and replace the error message with something more obvious to the user
             except BadRequestError as e:
-                logging.exception("Bad completion request")
                 if "Unrecognized request arguments supplied: tool_choice, tools" in str(
                     e
                 ):
                     raise Exception(
                         "The Azure model you chose is not supported. Model version 1106 and higher required."
+                    ) from e
+                else:
+                    raise
+            response_message = full_response.choices[0].message  # type: ignore
+            if response_message and response_format:
+                # Litellm API is bugged. Stringify and parsing ensures all attrs of the choice are available.
+                dict_response = json.loads(full_response.to_json())  # type: ignore
+                incorrect_tool_call = is_response_an_incorrect_tool_call(
+                    sections, dict_response.get("choices", [{}])[0]
+                )
+                if incorrect_tool_call:
+                    logging.warning(
+                        "Detected incorrect tool call. Structured output will be disabled. This can happen on models that do not support tool calling. For Azure AI, make sure the model name contains 'gpt-4o'. To disable this holmes behaviour, set REQUEST_STRUCTURED_OUTPUT_FROM_LLM to `false`."
                     )
-                raise e
-            except Exception:
-                logging.exception("Completion request exception")
-                raise
+                    # disable structured output going forward and and retry
+                    response_format = None
+                    max_steps = max_steps + 1
+                    continue
             messages.append(
                 response_message.model_dump(
@@ -714,6 +767,22 @@ class ToolCallingLLM:
                 )
             )
+            tools_to_call = getattr(response_message, "tool_calls", None)
+            if not tools_to_call:
+                yield StreamMessage(
+                    event=StreamEvents.ANSWER_END,
+                    data={"content": response_message.content, "messages": messages},
+                )
+                return
+            reasoning = getattr(response_message, "reasoning_content", None)
+            message = response_message.content
+            if reasoning or message:
+                yield StreamMessage(
+                    event=StreamEvents.AI_MESSAGE,
+                    data={"content": message, "reasoning": reasoning},
+                )
             perf_timing.measure("pre-tool-calls")
             with concurrent.futures.ThreadPoolExecutor(max_workers=16) as executor:
                 futures = []
@@ -724,11 +793,12 @@ class ToolCallingLLM:
                             tool_to_call=t,  # type: ignore
                             previous_tool_calls=tool_calls,
                             trace_span=DummySpan(),  # Streaming mode doesn't support tracing yet
-                            tool_number=tool_index,
+                            tool_number=tool_number_offset + tool_index,
                         )
                     )
-                    yield create_sse_message(
-                        "start_tool_calling", {"tool_name": t.function.name, "id": t.id}
+                    yield StreamMessage(
+                        event=StreamEvents.START_TOOL,
+                        data={"tool_name": t.function.name, "id": t.id},
                     )
                 for future in concurrent.futures.as_completed(futures):
@@ -739,13 +809,13 @@ class ToolCallingLLM:
                     perf_timing.measure(f"tool completed {tool_call_result.tool_name}")
-                    streaming_result_dict = (
-                        tool_call_result.as_streaming_tool_result_response()
+                    yield StreamMessage(
+                        event=StreamEvents.TOOL_RESULT,
+                        data=tool_call_result.as_streaming_tool_result_response(),
                     )
-                    yield create_sse_message(
-                        "tool_calling_result", streaming_result_dict
-                    )
+                # Update the tool number offset for the next iteration
+                tool_number_offset += len(tools_to_call)
         raise Exception(
             f"Too many LLM calls - exceeded max_steps: {i}/{self.max_steps}"
@@ -782,6 +852,7 @@ class IssueInvestigator(ToolCallingLLM):
         global_instructions: Optional[Instructions] = None,
         post_processing_prompt: Optional[str] = None,
         sections: Optional[InputSectionsDataType] = None,
+        trace_span=DummySpan(),
     ) -> LLMResult:
         runbooks = self.runbook_manager.get_instructions_for_issue(issue)
@@ -823,6 +894,9 @@ class IssueInvestigator(ToolCallingLLM):
                 "[bold]No runbooks found for this issue. Using default behaviour. (Add runbooks to guide the investigation.)[/bold]"
             )
+        todo_manager = get_todo_manager()
+        todo_context = todo_manager.format_tasks_for_prompt(self.investigation_id)
         system_prompt = load_and_render_prompt(
             prompt,
             {
@@ -831,6 +905,8 @@ class IssueInvestigator(ToolCallingLLM):
                 "structured_output": request_structured_output_from_llm,
                 "toolsets": self.tool_executor.toolsets,
                 "cluster_name": self.cluster_name,
+                "todo_list": todo_context,
+                "investigation_id": self.investigation_id,
             },
         )
@@ -865,10 +941,7 @@ class IssueInvestigator(ToolCallingLLM):
             post_processing_prompt,
             response_format=response_format,
             sections=sections,
+            trace_span=trace_span,
         )
         res.instructions = runbooks
         return res
-def create_sse_message(event_type: str, data: dict = {}):
-    return f"event: {event_type}\ndata: {json.dumps(data)}\n\n"

holmes/core/tools.py CHANGED Viewed

@@ -51,6 +51,7 @@ class StructuredToolResult(BaseModel):
     url: Optional[str] = None
     invocation: Optional[str] = None
     params: Optional[Dict] = None
+    icon_url: Optional[str] = None
     def get_stringified_data(self) -> str:
         if self.data is None:
@@ -121,6 +122,8 @@ class ToolParameter(BaseModel):
     description: Optional[str] = None
     type: str = "string"
     required: bool = True
+    properties: Optional[Dict[str, "ToolParameter"]] = None  # For object types
+    items: Optional["ToolParameter"] = None  # For array item schemas
 class Tool(ABC, BaseModel):
@@ -131,12 +134,17 @@ class Tool(ABC, BaseModel):
         None  # templated string to show to the user describing this tool invocation (not seen by llm)
     )
     additional_instructions: Optional[str] = None
+    icon_url: Optional[str] = Field(
+        default=None,
+        description="The URL of the icon for the tool, if None will get toolset icon",
+    )
-    def get_openai_format(self):
+    def get_openai_format(self, target_model: str):
         return format_tool_to_open_ai_standard(
             tool_name=self.name,
             tool_description=self.description,
             tool_parameters=self.parameters,
+            target_model=target_model,
         )
     def invoke(
@@ -148,6 +156,7 @@ class Tool(ABC, BaseModel):
         )
         start_time = time.time()
         result = self._invoke(params)
+        result.icon_url = self.icon_url
         elapsed = time.time() - start_time
         output_str = (
             result.get_stringified_data()

holmes/core/tools_utils/tool_executor.py CHANGED Viewed

@@ -38,6 +38,8 @@ class ToolExecutor:
         self.tools_by_name: dict[str, Tool] = {}
         for ts in toolsets_by_name.values():
             for tool in ts.tools:
+                if tool.icon_url is None and ts.icon_url is not None:
+                    tool.icon_url = ts.icon_url
                 if tool.name in self.tools_by_name:
                     logging.warning(
                         f"Overriding existing tool '{tool.name} with new tool from {ts.name} at {ts.path}'!"
@@ -62,5 +64,8 @@ class ToolExecutor:
         return None
     @sentry_sdk.trace
-    def get_all_tools_openai_format(self):
-        return [tool.get_openai_format() for tool in self.tools_by_name.values()]
+    def get_all_tools_openai_format(self, target_model: str):
+        return [
+            tool.get_openai_format(target_model=target_model)
+            for tool in self.tools_by_name.values()
+        ]

holmesgpt 0.12.6__py3-none-any.whl → 0.13.1__py3-none-any.whl

Potentially problematic release.

holmesgpt 0.12.6py3-none-any.whl → 0.13.1py3-none-any.whl