PyPI - holmesgpt - Versions diffs - 0.13.0__py3-none-any.whl → 0.13.2__py3-none-any.whl - Mend

holmesgpt 0.13.0py3-none-any.whl → 0.13.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (118) hide show

holmes/__init__.py +1 -1
holmes/common/env_vars.py +11 -0
holmes/config.py +3 -1
holmes/core/conversations.py +0 -11
holmes/core/investigation.py +0 -6
holmes/core/llm.py +63 -2
holmes/core/prompt.py +0 -2
holmes/core/supabase_dal.py +2 -2
holmes/core/todo_tasks_formatter.py +51 -0
holmes/core/tool_calling_llm.py +277 -101
holmes/core/tools.py +20 -4
holmes/core/toolset_manager.py +1 -5
holmes/core/tracing.py +1 -1
holmes/interactive.py +63 -2
holmes/main.py +7 -2
holmes/plugins/prompts/_fetch_logs.jinja2 +4 -0
holmes/plugins/prompts/_general_instructions.jinja2 +3 -1
holmes/plugins/prompts/investigation_procedure.jinja2 +3 -13
holmes/plugins/runbooks/CLAUDE.md +85 -0
holmes/plugins/runbooks/README.md +24 -0
holmes/plugins/toolsets/__init__.py +5 -1
holmes/plugins/toolsets/argocd.yaml +1 -1
holmes/plugins/toolsets/atlas_mongodb/mongodb_atlas.py +18 -6
holmes/plugins/toolsets/aws.yaml +9 -5
holmes/plugins/toolsets/azure_sql/tools/analyze_connection_failures.py +3 -1
holmes/plugins/toolsets/azure_sql/tools/analyze_database_connections.py +3 -1
holmes/plugins/toolsets/azure_sql/tools/analyze_database_health_status.py +3 -1
holmes/plugins/toolsets/azure_sql/tools/analyze_database_performance.py +3 -1
holmes/plugins/toolsets/azure_sql/tools/analyze_database_storage.py +3 -1
holmes/plugins/toolsets/azure_sql/tools/get_active_alerts.py +3 -1
holmes/plugins/toolsets/azure_sql/tools/get_slow_queries.py +3 -1
holmes/plugins/toolsets/azure_sql/tools/get_top_cpu_queries.py +3 -1
holmes/plugins/toolsets/azure_sql/tools/get_top_data_io_queries.py +3 -1
holmes/plugins/toolsets/azure_sql/tools/get_top_log_io_queries.py +3 -1
holmes/plugins/toolsets/bash/argocd/__init__.py +65 -0
holmes/plugins/toolsets/bash/argocd/constants.py +120 -0
holmes/plugins/toolsets/bash/aws/__init__.py +66 -0
holmes/plugins/toolsets/bash/aws/constants.py +529 -0
holmes/plugins/toolsets/bash/azure/__init__.py +56 -0
holmes/plugins/toolsets/bash/azure/constants.py +339 -0
holmes/plugins/toolsets/bash/bash_instructions.jinja2 +6 -7
holmes/plugins/toolsets/bash/bash_toolset.py +62 -17
holmes/plugins/toolsets/bash/common/bash_command.py +131 -0
holmes/plugins/toolsets/bash/common/stringify.py +14 -1
holmes/plugins/toolsets/bash/common/validators.py +91 -0
holmes/plugins/toolsets/bash/docker/__init__.py +59 -0
holmes/plugins/toolsets/bash/docker/constants.py +255 -0
holmes/plugins/toolsets/bash/helm/__init__.py +61 -0
holmes/plugins/toolsets/bash/helm/constants.py +92 -0
holmes/plugins/toolsets/bash/kubectl/__init__.py +80 -79
holmes/plugins/toolsets/bash/kubectl/constants.py +0 -14
holmes/plugins/toolsets/bash/kubectl/kubectl_describe.py +38 -56
holmes/plugins/toolsets/bash/kubectl/kubectl_events.py +28 -76
holmes/plugins/toolsets/bash/kubectl/kubectl_get.py +39 -99
holmes/plugins/toolsets/bash/kubectl/kubectl_logs.py +34 -15
holmes/plugins/toolsets/bash/kubectl/kubectl_run.py +1 -1
holmes/plugins/toolsets/bash/kubectl/kubectl_top.py +38 -77
holmes/plugins/toolsets/bash/parse_command.py +106 -32
holmes/plugins/toolsets/bash/utilities/__init__.py +0 -0
holmes/plugins/toolsets/bash/utilities/base64_util.py +12 -0
holmes/plugins/toolsets/bash/utilities/cut.py +12 -0
holmes/plugins/toolsets/bash/utilities/grep/__init__.py +10 -0
holmes/plugins/toolsets/bash/utilities/head.py +12 -0
holmes/plugins/toolsets/bash/utilities/jq.py +79 -0
holmes/plugins/toolsets/bash/utilities/sed.py +164 -0
holmes/plugins/toolsets/bash/utilities/sort.py +15 -0
holmes/plugins/toolsets/bash/utilities/tail.py +12 -0
holmes/plugins/toolsets/bash/utilities/tr.py +57 -0
holmes/plugins/toolsets/bash/utilities/uniq.py +12 -0
holmes/plugins/toolsets/bash/utilities/wc.py +12 -0
holmes/plugins/toolsets/confluence.yaml +1 -1
holmes/plugins/toolsets/coralogix/api.py +3 -1
holmes/plugins/toolsets/coralogix/toolset_coralogix_logs.py +4 -4
holmes/plugins/toolsets/coralogix/utils.py +41 -14
holmes/plugins/toolsets/datadog/datadog_api.py +45 -2
holmes/plugins/toolsets/datadog/datadog_general_instructions.jinja2 +208 -0
holmes/plugins/toolsets/datadog/datadog_logs_instructions.jinja2 +43 -0
holmes/plugins/toolsets/datadog/datadog_metrics_instructions.jinja2 +12 -9
holmes/plugins/toolsets/datadog/toolset_datadog_general.py +722 -0
holmes/plugins/toolsets/datadog/toolset_datadog_logs.py +17 -6
holmes/plugins/toolsets/datadog/toolset_datadog_metrics.py +15 -7
holmes/plugins/toolsets/datadog/toolset_datadog_rds.py +6 -2
holmes/plugins/toolsets/datadog/toolset_datadog_traces.py +9 -3
holmes/plugins/toolsets/docker.yaml +1 -1
holmes/plugins/toolsets/git.py +15 -5
holmes/plugins/toolsets/grafana/toolset_grafana.py +25 -4
holmes/plugins/toolsets/grafana/toolset_grafana_loki.py +4 -4
holmes/plugins/toolsets/grafana/toolset_grafana_tempo.jinja2 +5 -3
holmes/plugins/toolsets/grafana/toolset_grafana_tempo.py +299 -32
holmes/plugins/toolsets/helm.yaml +1 -1
holmes/plugins/toolsets/internet/internet.py +4 -2
holmes/plugins/toolsets/internet/notion.py +4 -2
holmes/plugins/toolsets/investigator/core_investigation.py +5 -17
holmes/plugins/toolsets/investigator/investigator_instructions.jinja2 +1 -5
holmes/plugins/toolsets/kafka.py +19 -7
holmes/plugins/toolsets/kubernetes.yaml +5 -5
holmes/plugins/toolsets/kubernetes_logs.py +4 -4
holmes/plugins/toolsets/kubernetes_logs.yaml +1 -1
holmes/plugins/toolsets/logging_utils/logging_api.py +15 -2
holmes/plugins/toolsets/mcp/toolset_mcp.py +3 -1
holmes/plugins/toolsets/newrelic.py +8 -4
holmes/plugins/toolsets/opensearch/opensearch.py +13 -5
holmes/plugins/toolsets/opensearch/opensearch_logs.py +4 -4
holmes/plugins/toolsets/opensearch/opensearch_traces.py +9 -6
holmes/plugins/toolsets/prometheus/prometheus.py +198 -57
holmes/plugins/toolsets/rabbitmq/toolset_rabbitmq.py +7 -3
holmes/plugins/toolsets/robusta/robusta.py +10 -4
holmes/plugins/toolsets/runbook/runbook_fetcher.py +4 -2
holmes/plugins/toolsets/servicenow/servicenow.py +9 -3
holmes/plugins/toolsets/slab.yaml +1 -1
holmes/utils/console/logging.py +6 -1
{holmesgpt-0.13.0.dist-info → holmesgpt-0.13.2.dist-info}/METADATA +3 -2
{holmesgpt-0.13.0.dist-info → holmesgpt-0.13.2.dist-info}/RECORD +116 -90
holmes/core/todo_manager.py +0 -88
holmes/plugins/toolsets/bash/grep/__init__.py +0 -52
{holmesgpt-0.13.0.dist-info → holmesgpt-0.13.2.dist-info}/LICENSE.txt +0 -0
{holmesgpt-0.13.0.dist-info → holmesgpt-0.13.2.dist-info}/WHEEL +0 -0
{holmesgpt-0.13.0.dist-info → holmesgpt-0.13.2.dist-info}/entry_points.txt +0 -0

holmes/core/tool_calling_llm.py CHANGED Viewed

@@ -2,18 +2,22 @@ import concurrent.futures
 import json
 import logging
 import textwrap
-import uuid
-from typing import Dict, List, Optional, Type, Union
+from typing import Dict, List, Optional, Type, Union, Callable
 import sentry_sdk
 from openai import BadRequestError
 from openai.types.chat.chat_completion_message_tool_call import (
     ChatCompletionMessageToolCall,
 )
-from pydantic import BaseModel
+from pydantic import BaseModel, Field
 from rich.console import Console
-from holmes.common.env_vars import TEMPERATURE, MAX_OUTPUT_TOKEN_RESERVATION
+from holmes.common.env_vars import (
+    TEMPERATURE,
+    MAX_OUTPUT_TOKEN_RESERVATION,
+    LOG_LLM_USAGE_RESPONSE,
+)
 from holmes.core.investigation_structured_output import (
     DEFAULT_SECTIONS,
@@ -39,9 +43,80 @@ from holmes.core.tools_utils.tool_executor import ToolExecutor
 from holmes.core.tracing import DummySpan
 from holmes.utils.colors import AI_COLOR
 from holmes.utils.stream import StreamEvents, StreamMessage
-from holmes.core.todo_manager import (
-    get_todo_manager,
-)
+# Create a named logger for cost tracking
+cost_logger = logging.getLogger("holmes.costs")
+class LLMCosts(BaseModel):
+    """Tracks cost and token usage for LLM calls."""
+    total_cost: float = 0.0
+    total_tokens: int = 0
+    prompt_tokens: int = 0
+    completion_tokens: int = 0
+def _extract_cost_from_response(full_response) -> float:
+    """Extract cost value from LLM response.
+    Args:
+        full_response: The raw LLM response object
+    Returns:
+        The cost as a float, or 0.0 if not available
+    """
+    try:
+        cost_value = (
+            full_response._hidden_params.get("response_cost", 0)
+            if hasattr(full_response, "_hidden_params")
+            else 0
+        )
+        # Ensure cost is a float
+        return float(cost_value) if cost_value is not None else 0.0
+    except Exception:
+        return 0.0
+def _process_cost_info(
+    full_response, costs: Optional[LLMCosts] = None, log_prefix: str = "LLM call"
+) -> None:
+    """Process cost and token information from LLM response.
+    Logs the cost information and optionally accumulates it into a costs object.
+    Args:
+        full_response: The raw LLM response object
+        costs: Optional LLMCosts object to accumulate costs into
+        log_prefix: Prefix for logging messages (e.g., "LLM call", "Post-processing")
+    """
+    try:
+        cost = _extract_cost_from_response(full_response)
+        usage = getattr(full_response, "usage", {})
+        if usage:
+            if LOG_LLM_USAGE_RESPONSE:  # shows stats on token cache usage
+                logging.info(f"LLM usage response:\n{usage}\n")
+            prompt_toks = usage.get("prompt_tokens", 0)
+            completion_toks = usage.get("completion_tokens", 0)
+            total_toks = usage.get("total_tokens", 0)
+            cost_logger.debug(
+                f"{log_prefix} cost: ${cost:.6f} | Tokens: {prompt_toks} prompt + {completion_toks} completion = {total_toks} total"
+            )
+            # Accumulate costs and tokens if costs object provided
+            if costs:
+                costs.total_cost += cost
+                costs.prompt_tokens += prompt_toks
+                costs.completion_tokens += completion_toks
+                costs.total_tokens += total_toks
+        elif cost > 0:
+            cost_logger.debug(
+                f"{log_prefix} cost: ${cost:.6f} | Token usage not available"
+            )
+            if costs:
+                costs.total_cost += cost
+    except Exception as e:
+        logging.debug(f"Could not extract cost information: {e}")
 def format_tool_result_data(tool_result: StructuredToolResult) -> str:
@@ -186,11 +261,11 @@ class ToolCallResult(BaseModel):
         }
-class LLMResult(BaseModel):
+class LLMResult(LLMCosts):
     tool_calls: Optional[List[ToolCallResult]] = None
     result: Optional[str] = None
     unprocessed_result: Optional[str] = None
-    instructions: List[str] = []
+    instructions: List[str] = Field(default_factory=list)
     # TODO: clean up these two
     prompt: Optional[str] = None
     messages: Optional[List[dict]] = None
@@ -211,7 +286,9 @@ class ToolCallingLLM:
         self.max_steps = max_steps
         self.tracer = tracer
         self.llm = llm
-        self.investigation_id = str(uuid.uuid4())
+        self.approval_callback: Optional[
+            Callable[[StructuredToolResult], tuple[bool, Optional[str]]]
+        ] = None
     def prompt_call(
         self,
@@ -259,6 +336,8 @@ class ToolCallingLLM:
     ) -> LLMResult:
         perf_timing = PerformanceTiming("tool_calling_llm.call")
         tool_calls = []  # type: ignore
+        costs = LLMCosts()
         tools = self.tool_executor.get_all_tools_openai_format(
             target_model=self.llm.model
         )
@@ -299,6 +378,9 @@ class ToolCallingLLM:
                 )
                 logging.debug(f"got response {full_response.to_json()}")  # type: ignore
+                # Extract and accumulate cost information
+                _process_cost_info(full_response, costs, "LLM call")
                 perf_timing.measure("llm.completion")
             # catch a known error that occurs with Azure and replace the error message with something more obvious to the user
             except BadRequestError as e:
@@ -352,11 +434,14 @@ class ToolCallingLLM:
                 if post_process_prompt and user_prompt:
                     logging.info("Running post processing on investigation.")
                     raw_response = text_response
-                    post_processed_response = self._post_processing_call(
-                        prompt=user_prompt,
-                        investigation=raw_response,
-                        user_prompt=post_process_prompt,
+                    post_processed_response, post_processing_cost = (
+                        self._post_processing_call(
+                            prompt=user_prompt,
+                            investigation=raw_response,
+                            user_prompt=post_process_prompt,
+                        )
                     )
+                    costs.total_cost += post_processing_cost
                     perf_timing.end(f"- completed in {i} iterations -")
                     return LLMResult(
@@ -365,6 +450,7 @@ class ToolCallingLLM:
                         tool_calls=tool_calls,
                         prompt=json.dumps(messages, indent=2),
                         messages=messages,
+                        **costs.model_dump(),  # Include all cost fields
                     )
                 perf_timing.end(f"- completed in {i} iterations -")
@@ -373,6 +459,7 @@ class ToolCallingLLM:
                     tool_calls=tool_calls,
                     prompt=json.dumps(messages, indent=2),
                     messages=messages,
+                    **costs.model_dump(),  # Include all cost fields
                 )
             if text_response and text_response.strip():
@@ -383,33 +470,106 @@ class ToolCallingLLM:
             perf_timing.measure("pre-tool-calls")
             with concurrent.futures.ThreadPoolExecutor(max_workers=16) as executor:
                 futures = []
+                futures_tool_numbers: dict[
+                    concurrent.futures.Future, Optional[int]
+                ] = {}
+                tool_number: Optional[int]
                 for tool_index, t in enumerate(tools_to_call, 1):
                     logging.debug(f"Tool to call: {t}")
-                    futures.append(
-                        executor.submit(
-                            self._invoke_tool,
-                            tool_to_call=t,
-                            previous_tool_calls=tool_calls,
-                            trace_span=trace_span,
-                            tool_number=tool_number_offset + tool_index,
-                        )
+                    tool_number = tool_number_offset + tool_index
+                    future = executor.submit(
+                        self._invoke_llm_tool_call,
+                        tool_to_call=t,
+                        previous_tool_calls=tool_calls,
+                        trace_span=trace_span,
+                        tool_number=tool_number,
                     )
+                    futures_tool_numbers[future] = tool_number
+                    futures.append(future)
                 for future in concurrent.futures.as_completed(futures):
                     tool_call_result: ToolCallResult = future.result()
+                    tool_number = (
+                        futures_tool_numbers[future]
+                        if future in futures_tool_numbers
+                        else None
+                    )
+                    tool_call_result = self.handle_tool_call_approval(
+                        tool_call_result=tool_call_result, tool_number=tool_number
+                    )
                     tool_calls.append(tool_call_result.as_tool_result_response())
                     messages.append(tool_call_result.as_tool_call_message())
                     perf_timing.measure(f"tool completed {tool_call_result.tool_name}")
+                # Update the tool number offset for the next iteration
+                tool_number_offset += len(tools_to_call)
                 # Add a blank line after all tools in this batch complete
                 if tools_to_call:
                     logging.info("")
         raise Exception(f"Too many LLM calls - exceeded max_steps: {i}/{max_steps}")
-    def _invoke_tool(
+    def _directly_invoke_tool(
+        self,
+        tool_name: str,
+        tool_params: dict,
+        user_approved: bool,
+        trace_span=DummySpan(),
+        tool_number: Optional[int] = None,
+    ) -> StructuredToolResult:
+        tool_span = trace_span.start_span(name=tool_name, type="tool")
+        tool = self.tool_executor.get_tool_by_name(tool_name)
+        tool_response = None
+        try:
+            if (not tool) or (tool_params is None):
+                logging.warning(
+                    f"Skipping tool execution for {tool_name}: args: {tool_params}"
+                )
+                tool_response = StructuredToolResult(
+                    status=ToolResultStatus.ERROR,
+                    error=f"Failed to find tool {tool_name}",
+                    params=tool_params,
+                )
+            else:
+                tool_response = tool.invoke(
+                    tool_params, tool_number=tool_number, user_approved=user_approved
+                )
+        except Exception as e:
+            logging.error(
+                f"Tool call to {tool_name} failed with an Exception", exc_info=True
+            )
+            tool_response = StructuredToolResult(
+                status=ToolResultStatus.ERROR,
+                error=f"Tool call failed: {e}",
+                params=tool_params,
+            )
+            # Log error to trace span
+            tool_span.log(
+                input=tool_params, output=str(e), metadata={"status": "ERROR"}
+            )
+        tool_span.log(
+            input=tool_params,
+            output=tool_response.data,
+            metadata={
+                "status": tool_response.status.value,
+                "error": tool_response.error,
+                "description": tool.get_parameterized_one_liner(tool_params)
+                if tool
+                else "",
+                "structured_tool_result": tool_response,
+            },
+        )
+        tool_span.end()
+        return tool_response
+    def _invoke_llm_tool_call(
         self,
         tool_to_call: ChatCompletionMessageToolCall,
         previous_tool_calls: list[dict],
@@ -438,92 +598,97 @@ class ToolCallingLLM:
                 ),
             )
-        tool_params = None
+        tool_params = {}
         try:
             tool_params = json.loads(tool_arguments)
         except Exception:
             logging.warning(
                 f"Failed to parse arguments for tool: {tool_name}. args: {tool_arguments}"
             )
-        tool_call_id = tool_to_call.id
-        tool = self.tool_executor.get_tool_by_name(tool_name)
-        if (not tool) or (tool_params is None):
-            logging.warning(
-                f"Skipping tool execution for {tool_name}: args: {tool_arguments}"
-            )
-            return ToolCallResult(
-                tool_call_id=tool_call_id,
-                tool_name=tool_name,
-                description="NA",
-                result=StructuredToolResult(
-                    status=ToolResultStatus.ERROR,
-                    error=f"Failed to find tool {tool_name}",
-                    params=tool_params,
-                ),
-            )
-        tool_response = None
+        tool_call_id = tool_to_call.id
-        # Create tool span if tracing is enabled
-        tool_span = trace_span.start_span(name=tool_name, type="tool")
+        tool_response = prevent_overly_repeated_tool_call(
+            tool_name=tool_name,
+            tool_params=tool_params,
+            tool_calls=previous_tool_calls,
+        )
-        try:
-            tool_response = prevent_overly_repeated_tool_call(
-                tool_name=tool.name,
+        if not tool_response:
+            tool_response = self._directly_invoke_tool(
+                tool_name=tool_name,
                 tool_params=tool_params,
-                tool_calls=previous_tool_calls,
-            )
-            if not tool_response:
-                tool_response = tool.invoke(tool_params, tool_number=tool_number)
-            if not isinstance(tool_response, StructuredToolResult):
-                # Should never be needed but ensure Holmes does not crash if one of the tools does not return the right type
-                logging.error(
-                    f"Tool {tool.name} return type is not StructuredToolResult. Nesting the tool result into StructuredToolResult..."
-                )
-                tool_response = StructuredToolResult(
-                    status=ToolResultStatus.SUCCESS,
-                    data=tool_response,
-                    params=tool_params,
-                )
-            # Log tool execution to trace span
-            tool_span.log(
-                input=tool_params,
-                output=tool_response.data,
-                metadata={
-                    "status": tool_response.status.value,
-                    "error": tool_response.error,
-                    "description": tool.get_parameterized_one_liner(tool_params),
-                    "structured_tool_result": tool_response,
-                },
+                user_approved=False,
+                trace_span=trace_span,
+                tool_number=tool_number,
             )
-        except Exception as e:
+        if not isinstance(tool_response, StructuredToolResult):
+            # Should never be needed but ensure Holmes does not crash if one of the tools does not return the right type
             logging.error(
-                f"Tool call to {tool_name} failed with an Exception", exc_info=True
+                f"Tool {tool_name} return type is not StructuredToolResult. Nesting the tool result into StructuredToolResult..."
             )
             tool_response = StructuredToolResult(
-                status=ToolResultStatus.ERROR,
-                error=f"Tool call failed: {e}",
+                status=ToolResultStatus.SUCCESS,
+                data=tool_response,
                 params=tool_params,
             )
-            # Log error to trace span
-            tool_span.log(
-                input=tool_params, output=str(e), metadata={"status": "ERROR"}
-            )
-        finally:
-            # End tool span
-            tool_span.end()
+        tool = self.tool_executor.get_tool_by_name(tool_name)
         return ToolCallResult(
             tool_call_id=tool_call_id,
             tool_name=tool_name,
-            description=tool.get_parameterized_one_liner(tool_params),
+            description=tool.get_parameterized_one_liner(tool_params) if tool else "",
             result=tool_response,
         )
+    def handle_tool_call_approval(
+        self, tool_call_result: ToolCallResult, tool_number: Optional[int]
+    ) -> ToolCallResult:
+        """
+        Handle approval for a single tool call if required.
+        Args:
+            tool_call_result: A single tool call result that may require approval
+        Returns:
+            Updated tool call result with approved/denied status
+        """
+        if tool_call_result.result.status != ToolResultStatus.APPROVAL_REQUIRED:
+            return tool_call_result
+        # If no approval callback, convert to ERROR because it is assumed the client may not be able to handle approvals
+        if not self.approval_callback:
+            tool_call_result.result.status = ToolResultStatus.ERROR
+            return tool_call_result
+        # Get approval from user
+        approved, feedback = self.approval_callback(tool_call_result.result)
+        if approved:
+            logging.debug(
+                f"User approved command: {tool_call_result.result.invocation}"
+            )
+            new_response = self._directly_invoke_tool(
+                tool_name=tool_call_result.tool_name,
+                tool_params=tool_call_result.result.params or {},
+                user_approved=True,
+                trace_span=DummySpan(),
+                tool_number=tool_number,
+            )
+            tool_call_result.result = new_response
+        else:
+            # User denied - update to error
+            feedback_text = f" User feedback: {feedback}" if feedback else ""
+            tool_call_result.result.status = ToolResultStatus.ERROR
+            tool_call_result.result.error = (
+                f"User denied command execution.{feedback_text}"
+            )
+        return tool_call_result
     @staticmethod
     def __load_post_processing_user_prompt(
         input_prompt, investigation, user_prompt: Optional[str] = None
@@ -540,7 +705,7 @@ class ToolCallingLLM:
         investigation,
         user_prompt: Optional[str] = None,
         system_prompt: str = "You are an AI assistant summarizing Kubernetes issues.",
-    ) -> Optional[str]:
+    ) -> tuple[Optional[str], float]:
         try:
             user_prompt = ToolCallingLLM.__load_post_processing_user_prompt(
                 prompt, investigation, user_prompt
@@ -559,10 +724,18 @@ class ToolCallingLLM:
             ]
             full_response = self.llm.completion(messages=messages, temperature=0)
             logging.debug(f"Post processing response {full_response}")
-            return full_response.choices[0].message.content  # type: ignore
+            # Extract and log cost information for post-processing
+            post_processing_cost = _extract_cost_from_response(full_response)
+            if post_processing_cost > 0:
+                cost_logger.debug(
+                    f"Post-processing LLM cost: ${post_processing_cost:.6f}"
+                )
+            return full_response.choices[0].message.content, post_processing_cost  # type: ignore
         except Exception:
             logging.exception("Failed to run post processing", exc_info=True)
-            return investigation
+            return investigation, 0.0
     @sentry_sdk.trace
     def truncate_messages_to_fit_context(
@@ -602,6 +775,7 @@ class ToolCallingLLM:
         perf_timing.measure("get_all_tools_openai_format")
         max_steps = self.max_steps
         i = 0
+        tool_number_offset = 0
         while i < max_steps:
             i += 1
@@ -634,6 +808,10 @@ class ToolCallingLLM:
                     stream=False,
                     drop_params=True,
                 )
+                # Log cost information for this iteration (no accumulation in streaming)
+                _process_cost_info(full_response, log_prefix="LLM iteration")
                 perf_timing.measure("llm.completion")
             # catch a known error that occurs with Azure and replace the error message with something more obvious to the user
             except BadRequestError as e:
@@ -689,15 +867,15 @@ class ToolCallingLLM:
             with concurrent.futures.ThreadPoolExecutor(max_workers=16) as executor:
                 futures = []
                 for tool_index, t in enumerate(tools_to_call, 1):  # type: ignore
-                    futures.append(
-                        executor.submit(
-                            self._invoke_tool,
-                            tool_to_call=t,  # type: ignore
-                            previous_tool_calls=tool_calls,
-                            trace_span=DummySpan(),  # Streaming mode doesn't support tracing yet
-                            tool_number=tool_index,
-                        )
+                    tool_number = tool_number_offset + tool_index
+                    future = executor.submit(
+                        self._invoke_llm_tool_call,
+                        tool_to_call=t,  # type: ignore
+                        previous_tool_calls=tool_calls,
+                        trace_span=DummySpan(),  # Streaming mode doesn't support tracing yet
+                        tool_number=tool_number,
                     )
+                    futures.append(future)
                     yield StreamMessage(
                         event=StreamEvents.START_TOOL,
                         data={"tool_name": t.function.name, "id": t.id},
@@ -716,6 +894,9 @@ class ToolCallingLLM:
                         data=tool_call_result.as_streaming_tool_result_response(),
                     )
+                # Update the tool number offset for the next iteration
+                tool_number_offset += len(tools_to_call)
         raise Exception(
             f"Too many LLM calls - exceeded max_steps: {i}/{self.max_steps}"
         )
@@ -793,9 +974,6 @@ class IssueInvestigator(ToolCallingLLM):
                 "[bold]No runbooks found for this issue. Using default behaviour. (Add runbooks to guide the investigation.)[/bold]"
             )
-        todo_manager = get_todo_manager()
-        todo_context = todo_manager.format_tasks_for_prompt(self.investigation_id)
         system_prompt = load_and_render_prompt(
             prompt,
             {
@@ -804,8 +982,6 @@ class IssueInvestigator(ToolCallingLLM):
                 "structured_output": request_structured_output_from_llm,
                 "toolsets": self.tool_executor.toolsets,
                 "cluster_name": self.cluster_name,
-                "todo_list": todo_context,
-                "investigation_id": self.investigation_id,
             },
         )

holmes/core/tools.py CHANGED Viewed

@@ -24,12 +24,15 @@ class ToolResultStatus(str, Enum):
     SUCCESS = "success"
     ERROR = "error"
     NO_DATA = "no_data"
+    APPROVAL_REQUIRED = "approval_required"
     def to_color(self) -> str:
         if self == ToolResultStatus.SUCCESS:
             return "green"
         elif self == ToolResultStatus.ERROR:
             return "red"
+        elif self == ToolResultStatus.APPROVAL_REQUIRED:
+            return "yellow"
         else:
             return "white"
@@ -38,6 +41,8 @@ class ToolResultStatus(str, Enum):
             return "✔"
         elif self == ToolResultStatus.ERROR:
             return "❌"
+        elif self == ToolResultStatus.APPROVAL_REQUIRED:
+            return "⚠️"
         else:
             return "⚪️"
@@ -148,14 +153,17 @@ class Tool(ABC, BaseModel):
         )
     def invoke(
-        self, params: Dict, tool_number: Optional[int] = None
+        self,
+        params: Dict,
+        tool_number: Optional[int] = None,
+        user_approved: bool = False,
     ) -> StructuredToolResult:
         tool_number_str = f"#{tool_number} " if tool_number else ""
         logging.info(
             f"Running tool {tool_number_str}[bold]{self.name}[/bold]: {self.get_parameterized_one_liner(params)}"
         )
         start_time = time.time()
-        result = self._invoke(params)
+        result = self._invoke(params=params, user_approved=user_approved)
         result.icon_url = self.icon_url
         elapsed = time.time() - start_time
         output_str = (
@@ -171,7 +179,13 @@ class Tool(ABC, BaseModel):
         return result
     @abstractmethod
-    def _invoke(self, params: Dict) -> StructuredToolResult:
+    def _invoke(
+        self, params: dict, user_approved: bool = False
+    ) -> StructuredToolResult:
+        """
+        params: the tool params
+        user_approved: whether the tool call is approved by the user. Can be used to confidently execute unsafe actions.
+        """
         pass
     @abstractmethod
@@ -223,7 +237,9 @@ class YAMLTool(Tool, BaseModel):
             return ToolResultStatus.NO_DATA
         return ToolResultStatus.SUCCESS
-    def _invoke(self, params) -> StructuredToolResult:
+    def _invoke(
+        self, params: dict, user_approved: bool = False
+    ) -> StructuredToolResult:
         if self.command is not None:
             raw_output, return_code, invocation = self.__invoke_command(params)
         else:

holmes/core/toolset_manager.py CHANGED Viewed

@@ -266,11 +266,7 @@ class ToolsetManager:
                 toolset.path = cached_status.get("path", None)
             # check prerequisites for only enabled toolset when the toolset is loaded from cache. When the toolset is
             # not loaded from cache, the prerequisites are checked in the refresh_toolset_status method.
-            if (
-                toolset.enabled
-                and toolset.status == ToolsetStatusEnum.ENABLED
-                and using_cached
-            ):
+            if toolset.enabled and toolset.status == ToolsetStatusEnum.ENABLED:
                 enabled_toolsets_from_cache.append(toolset)
         self.check_toolset_prerequisites(enabled_toolsets_from_cache)

holmes/core/tracing.py CHANGED Viewed

@@ -120,7 +120,7 @@ class DummySpan:
 class DummyTracer:
     """A no-op tracer implementation for when tracing is disabled."""
-    def start_experiment(self, experiment_name=None, metadata=None):
+    def start_experiment(self, experiment_name=None, additional_metadata=None):
         """No-op experiment creation."""
         return None

holmesgpt 0.13.0__py3-none-any.whl → 0.13.2__py3-none-any.whl

holmesgpt 0.13.0py3-none-any.whl → 0.13.2py3-none-any.whl