PyPI - holmesgpt - Versions diffs - 0.14.4a0__py3-none-any.whl → 0.15.0__py3-none-any.whl - Mend

holmesgpt 0.14.4a0py3-none-any.whl → 0.15.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of holmesgpt might be problematic. Click here for more details.

Files changed (30) hide show

holmes/__init__.py +1 -1
holmes/clients/robusta_client.py +12 -10
holmes/common/env_vars.py +14 -0
holmes/config.py +51 -4
holmes/core/conversations.py +3 -2
holmes/core/llm.py +198 -72
holmes/core/openai_formatting.py +13 -0
holmes/core/tool_calling_llm.py +129 -95
holmes/core/tools.py +21 -1
holmes/core/tools_utils/token_counting.py +2 -1
holmes/core/tools_utils/tool_context_window_limiter.py +13 -4
holmes/interactive.py +17 -7
holmes/plugins/prompts/_general_instructions.jinja2 +1 -2
holmes/plugins/toolsets/__init__.py +4 -0
holmes/plugins/toolsets/atlas_mongodb/mongodb_atlas.py +0 -1
holmes/plugins/toolsets/azure_sql/azure_sql_toolset.py +0 -1
holmes/plugins/toolsets/grafana/grafana_api.py +1 -1
holmes/plugins/toolsets/investigator/core_investigation.py +14 -13
holmes/plugins/toolsets/opensearch/opensearch_ppl_query_docs.jinja2 +1616 -0
holmes/plugins/toolsets/opensearch/opensearch_query_assist.py +78 -0
holmes/plugins/toolsets/opensearch/opensearch_query_assist_instructions.jinja2 +223 -0
holmes/plugins/toolsets/prometheus/prometheus.py +1 -1
holmes/plugins/toolsets/service_discovery.py +1 -1
holmes/plugins/toolsets/servicenow/servicenow.py +0 -1
holmes/utils/stream.py +30 -1
{holmesgpt-0.14.4a0.dist-info → holmesgpt-0.15.0.dist-info}/METADATA +3 -1
{holmesgpt-0.14.4a0.dist-info → holmesgpt-0.15.0.dist-info}/RECORD +30 -27
{holmesgpt-0.14.4a0.dist-info → holmesgpt-0.15.0.dist-info}/LICENSE.txt +0 -0
{holmesgpt-0.14.4a0.dist-info → holmesgpt-0.15.0.dist-info}/WHEEL +0 -0
{holmesgpt-0.14.4a0.dist-info → holmesgpt-0.15.0.dist-info}/entry_points.txt +0 -0

holmes/core/tool_calling_llm.py CHANGED Viewed

@@ -34,7 +34,7 @@ from holmes.core.investigation_structured_output import (
     is_response_an_incorrect_tool_call,
 )
 from holmes.core.issue import Issue
-from holmes.core.llm import LLM, get_llm_usage
+from holmes.core.llm import LLM
 from holmes.core.performance_timing import PerformanceTiming
 from holmes.core.resource_instruction import ResourceInstructions
 from holmes.core.runbooks import RunbookManager
@@ -58,7 +58,12 @@ from holmes.utils.tags import format_tags_in_string, parse_messages_tags
 from holmes.core.tools_utils.tool_executor import ToolExecutor
 from holmes.core.tracing import DummySpan
 from holmes.utils.colors import AI_COLOR
-from holmes.utils.stream import StreamEvents, StreamMessage
+from holmes.utils.stream import (
+    StreamEvents,
+    StreamMessage,
+    add_token_count_to_metadata,
+    build_stream_event_token_count,
+)
 # Create a named logger for cost tracking
 cost_logger = logging.getLogger("holmes.costs")
@@ -164,7 +169,8 @@ def truncate_messages_to_fit_context(
     messages_except_tools = [
         message for message in messages if message["role"] != "tool"
     ]
-    message_size_without_tools = count_tokens_fn(messages_except_tools)
+    tokens = count_tokens_fn(messages_except_tools)
+    message_size_without_tools = tokens.total_tokens
     tool_call_messages = [message for message in messages if message["role"] == "tool"]
@@ -185,7 +191,9 @@ def truncate_messages_to_fit_context(
     )
     remaining_space = available_space
     tool_call_messages.sort(
-        key=lambda x: count_tokens_fn([{"role": "tool", "content": x["content"]}])
+        key=lambda x: count_tokens_fn(
+            [{"role": "tool", "content": x["content"]}]
+        ).total_tokens
     )
     truncations = []
@@ -196,7 +204,9 @@ def truncate_messages_to_fit_context(
     for i, msg in enumerate(tool_call_messages):
         remaining_tools = len(tool_call_messages) - i
         max_allocation = remaining_space // remaining_tools
-        needed_space = count_tokens_fn([{"role": "tool", "content": msg["content"]}])
+        needed_space = count_tokens_fn(
+            [{"role": "tool", "content": msg["content"]}]
+        ).total_tokens
         allocated_space = min(needed_space, max_allocation)
         if needed_space > allocated_space:
@@ -257,6 +267,12 @@ class LLMResult(LLMCosts):
         )
+class ToolCallWithDecision(BaseModel):
+    message_index: int
+    tool_call: ChatCompletionMessageToolCall
+    decision: Optional[ToolApprovalDecision]
 class ToolCallingLLM:
     llm: LLM
@@ -284,83 +300,79 @@ class ToolCallingLLM:
         Returns:
             Updated messages list with tool execution results
         """
-        # Import here to avoid circular imports
-        # Find the last message with pending approvals
-        pending_message_idx = None
-        pending_tool_calls = None
-        for i in reversed(range(len(messages))):
-            msg = messages[i]
-            if msg.get("role") == "assistant" and msg.get("pending_approval"):
-                pending_message_idx = i
-                pending_tool_calls = msg.get("tool_calls", [])
-                break
-        if pending_message_idx is None or not pending_tool_calls:
-            # No pending approvals found
-            if tool_decisions:
-                logging.warning(
-                    f"Received {len(tool_decisions)} tool decisions but no pending approvals found"
-                )
+        if not tool_decisions:
             return messages
         # Create decision lookup
-        decisions_by_id = {
+        decisions_by_tool_call_id = {
             decision.tool_call_id: decision for decision in tool_decisions
         }
-        # Validate that all decisions have corresponding pending tool calls
-        pending_tool_ids = {tool_call["id"] for tool_call in pending_tool_calls}
-        invalid_decisions = [
-            decision.tool_call_id
-            for decision in tool_decisions
-            if decision.tool_call_id not in pending_tool_ids
-        ]
+        pending_tool_calls: list[ToolCallWithDecision] = []
-        if invalid_decisions:
-            logging.warning(
-                f"Received decisions for non-pending tool calls: {invalid_decisions}"
-            )
+        for i in reversed(range(len(messages))):
+            msg = messages[i]
+            if msg.get("role") == "assistant" and msg.get("tool_calls"):
+                message_tool_calls = msg.get("tool_calls", [])
+                for tool_call in message_tool_calls:
+                    decision = decisions_by_tool_call_id.get(tool_call.get("id"), None)
+                    if tool_call.get("pending_approval"):
+                        del tool_call[
+                            "pending_approval"
+                        ]  # Cleanup so that a pending approval is not tagged on message in a future response
+                        pending_tool_calls.append(
+                            ToolCallWithDecision(
+                                tool_call=ChatCompletionMessageToolCall(**tool_call),
+                                decision=decision,
+                                message_index=i,
+                            )
+                        )
-        # Process each tool call
-        for tool_call in pending_tool_calls:
-            tool_call_id = tool_call["id"]
-            decision = decisions_by_id.get(tool_call_id)
+        if not pending_tool_calls:
+            error_message = f"Received {len(tool_decisions)} tool decisions but no pending approvals found"
+            logging.error(error_message)
+            raise Exception(error_message)
+        for tool_call_with_decision in pending_tool_calls:
+            tool_call_message: dict
+            tool_call = tool_call_with_decision.tool_call
+            decision = tool_call_with_decision.decision
             if decision and decision.approved:
                 try:
-                    tool_call_obj = ChatCompletionMessageToolCall(**tool_call)
                     llm_tool_result = self._invoke_llm_tool_call(
-                        tool_to_call=tool_call_obj,
+                        tool_to_call=tool_call,
                         previous_tool_calls=[],
-                        trace_span=DummySpan(),
+                        trace_span=DummySpan(),  # TODO: replace with proper span
                         tool_number=None,
+                        user_approved=True,
                     )
-                    messages.append(llm_tool_result.as_tool_call_message())
+                    tool_call_message = llm_tool_result.as_tool_call_message()
                 except Exception as e:
                     logging.error(
-                        f"Failed to execute approved tool {tool_call_id}: {e}"
+                        f"Failed to execute approved tool {tool_call.id}: {e}"
                     )
-                    messages.append(
-                        {
-                            "tool_call_id": tool_call_id,
-                            "role": "tool",
-                            "name": tool_call["function"]["name"],
-                            "content": f"Tool execution failed: {str(e)}",
-                        }
-                    )
-            else:
-                # Tool was rejected or no decision found, add rejection message
-                messages.append(
-                    {
-                        "tool_call_id": tool_call_id,
+                    tool_call_message = {
+                        "tool_call_id": tool_call.id,
                         "role": "tool",
-                        "name": tool_call["function"]["name"],
-                        "content": "Tool execution was denied by the user.",
+                        "name": tool_call.function.name,
+                        "content": f"Tool execution failed: {str(e)}",
                     }
-                )
+            else:
+                # Tool was rejected or no decision found, add rejection message
+                tool_call_message = {
+                    "tool_call_id": tool_call.id,
+                    "role": "tool",
+                    "name": tool_call.function.name,
+                    "content": "Tool execution was denied by the user.",
+                }
+            # It is expected that the tool call result directly follows the tool call request from the LLM
+            # The API call may contain a user ask which is appended to the messages so we can't just append
+            # tool call results; they need to be inserted right after the llm's message requesting tool calls
+            messages.insert(
+                tool_call_with_decision.message_index + 1, tool_call_message
+            )
         return messages
@@ -427,12 +439,12 @@ class ToolCallingLLM:
             tools = None if i == max_steps else tools
             tool_choice = "auto" if tools else None
-            total_tokens = self.llm.count_tokens_for_message(messages)
+            tokens = self.llm.count_tokens(messages=messages, tools=tools)
             max_context_size = self.llm.get_context_window_size()
             maximum_output_token = self.llm.get_maximum_output_token()
             perf_timing.measure("count tokens")
-            if (total_tokens + maximum_output_token) > max_context_size:
+            if (tokens.total_tokens + maximum_output_token) > max_context_size:
                 logging.warning("Token limit exceeded. Truncating tool responses.")
                 truncated_res = self.truncate_messages_to_fit_context(
                     messages, max_context_size, maximum_output_token
@@ -483,7 +495,7 @@ class ToolCallingLLM:
                 if incorrect_tool_call:
                     logging.warning(
-                        "Detected incorrect tool call. Structured output will be disabled. This can happen on models that do not support tool calling. For Azure AI, make sure the model name contains 'gpt-4o'. To disable this holmes behaviour, set REQUEST_STRUCTURED_OUTPUT_FROM_LLM to `false`."
+                        "Detected incorrect tool call. Structured output will be disabled. This can happen on models that do not support tool calling. For Azure AI, make sure the model name contains 'gpt-4.1' or other structured output compatible models. To disable this holmes behaviour, set REQUEST_STRUCTURED_OUTPUT_FROM_LLM to `false`."
                     )
                     # disable structured output going forward and and retry
                     sentry_helper.capture_structured_output_incorrect_tool_call()
@@ -522,11 +534,17 @@ class ToolCallingLLM:
                     )
                     costs.total_cost += post_processing_cost
-                    self.llm.count_tokens_for_message(messages)
+                    tokens = self.llm.count_tokens(messages=messages, tools=tools)
+                    add_token_count_to_metadata(
+                        tokens=tokens,
+                        full_llm_response=full_response,
+                        max_context_size=max_context_size,
+                        maximum_output_token=maximum_output_token,
+                        metadata=metadata,
+                    )
                     perf_timing.end(f"- completed in {i} iterations -")
-                    metadata["usage"] = get_llm_usage(full_response)
-                    metadata["max_tokens"] = max_context_size
-                    metadata["max_output_tokens"] = maximum_output_token
                     return LLMResult(
                         result=post_processed_response,
                         unprocessed_result=raw_response,
@@ -650,6 +668,7 @@ class ToolCallingLLM:
         tool_call_id: str,
         tool_name: str,
         tool_arguments: str,
+        user_approved: bool,
         previous_tool_calls: list[dict],
         tool_number: Optional[int] = None,
     ) -> ToolCallResult:
@@ -671,7 +690,7 @@ class ToolCallingLLM:
             tool_response = self._directly_invoke_tool_call(
                 tool_name=tool_name,
                 tool_params=tool_params,
-                user_approved=False,
+                user_approved=user_approved,
                 tool_number=tool_number,
             )
@@ -716,6 +735,7 @@ class ToolCallingLLM:
         previous_tool_calls: list[dict],
         trace_span=None,
         tool_number=None,
+        user_approved: bool = False,
     ) -> ToolCallResult:
         if trace_span is None:
             trace_span = DummySpan()
@@ -748,6 +768,7 @@ class ToolCallingLLM:
                     tool_arguments,
                     previous_tool_calls=previous_tool_calls,
                     tool_number=tool_number,
+                    user_approved=user_approved,
                 )
             prevent_overly_big_tool_response(
@@ -858,7 +879,7 @@ class ToolCallingLLM:
             messages,
             max_context_size,
             maximum_output_token,
-            self.llm.count_tokens_for_message,
+            self.llm.count_tokens,
         )
         if truncated_res.truncations:
             sentry_helper.capture_tool_truncations(truncated_res.truncations)
@@ -903,12 +924,12 @@ class ToolCallingLLM:
             tools = None if i == max_steps else tools
             tool_choice = "auto" if tools else None
-            total_tokens = self.llm.count_tokens_for_message(messages)  # type: ignore
+            tokens = self.llm.count_tokens(messages=messages, tools=tools)  # type: ignore
             max_context_size = self.llm.get_context_window_size()
             maximum_output_token = self.llm.get_maximum_output_token()
             perf_timing.measure("count tokens")
-            if (total_tokens + maximum_output_token) > max_context_size:
+            if (tokens.total_tokens + maximum_output_token) > max_context_size:
                 logging.warning("Token limit exceeded. Truncating tool responses.")
                 truncated_res = self.truncate_messages_to_fit_context(
                     messages, max_context_size, maximum_output_token
@@ -958,7 +979,7 @@ class ToolCallingLLM:
                 if incorrect_tool_call:
                     logging.warning(
-                        "Detected incorrect tool call. Structured output will be disabled. This can happen on models that do not support tool calling. For Azure AI, make sure the model name contains 'gpt-4o'. To disable this holmes behaviour, set REQUEST_STRUCTURED_OUTPUT_FROM_LLM to `false`."
+                        "Detected incorrect tool call. Structured output will be disabled. This can happen on models that do not support tool calling. For Azure AI, make sure the model name contains 'gpt-4.1' or other structured output compatible models. To disable this holmes behaviour, set REQUEST_STRUCTURED_OUTPUT_FROM_LLM to `false`."
                     )
                     # disable structured output going forward and and retry
                     sentry_helper.capture_structured_output_incorrect_tool_call()
@@ -972,12 +993,18 @@ class ToolCallingLLM:
                 )
             )
+            tokens = self.llm.count_tokens(messages=messages, tools=tools)
+            add_token_count_to_metadata(
+                tokens=tokens,
+                full_llm_response=full_response,
+                max_context_size=max_context_size,
+                maximum_output_token=maximum_output_token,
+                metadata=metadata,
+            )
+            yield build_stream_event_token_count(metadata=metadata)
             tools_to_call = getattr(response_message, "tool_calls", None)
             if not tools_to_call:
-                self.llm.count_tokens_for_message(messages)
-                metadata["usage"] = get_llm_usage(full_response)
-                metadata["max_tokens"] = max_context_size
-                metadata["max_output_tokens"] = maximum_output_token
                 yield StreamMessage(
                     event=StreamEvents.ANSWER_END,
                     data={
@@ -993,7 +1020,11 @@ class ToolCallingLLM:
             if reasoning or message:
                 yield StreamMessage(
                     event=StreamEvents.AI_MESSAGE,
-                    data={"content": message, "reasoning": reasoning},
+                    data={
+                        "content": message,
+                        "reasoning": reasoning,
+                        "metadata": metadata,
+                    },
                 )
             perf_timing.measure("pre-tool-calls")
@@ -1069,23 +1100,11 @@ class ToolCallingLLM:
                 # If we have approval required tools, end the stream with pending approvals
                 if pending_approvals:
                     # Add assistant message with pending tool calls
-                    assistant_msg = {
-                        "role": "assistant",
-                        "content": response_message.content,
-                        "tool_calls": [
-                            {
-                                "id": result.tool_call_id,
-                                "type": "function",
-                                "function": {
-                                    "name": result.tool_name,
-                                    "arguments": json.dumps(result.result.params or {}),
-                                },
-                            }
-                            for result in approval_required_tools
-                        ],
-                        "pending_approval": True,
-                    }
-                    messages.append(assistant_msg)
+                    for result in approval_required_tools:
+                        tool_call = self.find_assistant_tool_call_request(
+                            tool_call_id=result.tool_call_id, messages=messages
+                        )
+                        tool_call["pending_approval"] = True
                     # End stream with approvals required
                     yield StreamMessage(
@@ -1108,6 +1127,21 @@ class ToolCallingLLM:
             f"Too many LLM calls - exceeded max_steps: {i}/{self.max_steps}"
         )
+    def find_assistant_tool_call_request(
+        self, tool_call_id: str, messages: list[dict[str, Any]]
+    ) -> dict[str, Any]:
+        for message in messages:
+            if message.get("role") == "assistant":
+                for tool_call in message.get("tool_calls", []):
+                    if tool_call.get("id") == tool_call_id:
+                        return tool_call
+        # Should not happen unless there is a bug.
+        # If we are here
+        raise Exception(
+            f"Failed to find assistant request for a tool_call in conversation history. tool_call_id={tool_call_id}"
+        )
 # TODO: consider getting rid of this entirely and moving templating into the cmds in holmes_cli.py
 class IssueInvestigator(ToolCallingLLM):

holmes/core/tools.py CHANGED Viewed

@@ -158,6 +158,7 @@ class ToolParameter(BaseModel):
     required: bool = True
     properties: Optional[Dict[str, "ToolParameter"]] = None  # For object types
     items: Optional["ToolParameter"] = None  # For array item schemas
+    enum: Optional[List[str]] = None  # For restricting to specific values
 class ToolInvokeContext(BaseModel):
@@ -682,7 +683,26 @@ class Toolset(BaseModel):
     def check_prerequisites(self):
         self.status = ToolsetStatusEnum.ENABLED
-        for prereq in self.prerequisites:
+        # Sort prerequisites by type to fail fast on missing env vars before
+        # running slow commands (e.g., ArgoCD checks that timeout):
+        # 1. Static checks (instant)
+        # 2. Environment variable checks (instant, often required by commands)
+        # 3. Callable checks (variable speed)
+        # 4. Command checks (slowest - may timeout or hang)
+        def prereq_priority(prereq):
+            if isinstance(prereq, StaticPrerequisite):
+                return 0
+            elif isinstance(prereq, ToolsetEnvironmentPrerequisite):
+                return 1
+            elif isinstance(prereq, CallablePrerequisite):
+                return 2
+            elif isinstance(prereq, ToolsetCommandPrerequisite):
+                return 3
+            return 4  # Unknown types go last
+        sorted_prereqs = sorted(self.prerequisites, key=prereq_priority)
+        for prereq in sorted_prereqs:
             if isinstance(prereq, ToolsetCommandPrerequisite):
                 try:
                     command = self.interpolate_command(prereq.command)

holmes/core/tools_utils/token_counting.py CHANGED Viewed

@@ -10,4 +10,5 @@ def count_tool_response_tokens(
         "role": "tool",
         "content": format_tool_result_data(structured_tool_result),
     }
-    return llm.count_tokens_for_message([message])
+    tokens = llm.count_tokens([message])
+    return tokens.total_tokens

holmes/core/tools_utils/tool_context_window_limiter.py CHANGED Viewed

@@ -1,5 +1,8 @@
 from typing import Optional
-from holmes.common.env_vars import TOOL_MAX_ALLOCATED_CONTEXT_WINDOW_PCT
+from holmes.common.env_vars import (
+    TOOL_MAX_ALLOCATED_CONTEXT_WINDOW_PCT,
+    TOOL_MAX_ALLOCATED_CONTEXT_WINDOW_TOKENS,
+)
 from holmes.core.llm import LLM
 from holmes.core.tools import StructuredToolResultStatus
 from holmes.core.models import ToolCallResult
@@ -16,8 +19,12 @@ def get_pct_token_count(percent_of_total_context_window: float, llm: LLM) -> int
 def get_max_token_count_for_single_tool(llm: LLM) -> int:
-    return get_pct_token_count(
-        percent_of_total_context_window=TOOL_MAX_ALLOCATED_CONTEXT_WINDOW_PCT, llm=llm
+    return min(
+        TOOL_MAX_ALLOCATED_CONTEXT_WINDOW_TOKENS,
+        get_pct_token_count(
+            percent_of_total_context_window=TOOL_MAX_ALLOCATED_CONTEXT_WINDOW_PCT,
+            llm=llm,
+        ),
     )
@@ -25,7 +32,9 @@ def prevent_overly_big_tool_response(tool_call_result: ToolCallResult, llm: LLM)
     max_tokens_allowed = get_max_token_count_for_single_tool(llm)
     message = tool_call_result.as_tool_call_message()
-    messages_token = llm.count_tokens_for_message(messages=[message])
+    tokens = llm.count_tokens(messages=[message])
+    messages_token = tokens.total_tokens
     if messages_token > max_tokens_allowed:
         relative_pct = ((messages_token - max_tokens_allowed) / messages_token) * 100

holmes/interactive.py CHANGED Viewed

@@ -480,10 +480,14 @@ def handle_context_command(messages, ai: ToolCallingLLM, console: Console) -> No
         return
     # Calculate context statistics
-    total_tokens = ai.llm.count_tokens_for_message(messages)
+    tokens_metadata = ai.llm.count_tokens(
+        messages
+    )  # TODO: pass tools to also count tokens used by input tools
     max_context_size = ai.llm.get_context_window_size()
     max_output_tokens = ai.llm.get_maximum_output_token()
-    available_tokens = max_context_size - total_tokens - max_output_tokens
+    available_tokens = (
+        max_context_size - tokens_metadata.total_tokens - max_output_tokens
+    )
     # Analyze token distribution by role and tool calls
     role_token_usage: DefaultDict[str, int] = defaultdict(int)
@@ -492,19 +496,21 @@ def handle_context_command(messages, ai: ToolCallingLLM, console: Console) -> No
     for msg in messages:
         role = msg.get("role", "unknown")
-        msg_tokens = ai.llm.count_tokens_for_message([msg])
-        role_token_usage[role] += msg_tokens
+        message_tokens = ai.llm.count_tokens(
+            [msg]
+        )  # TODO: pass tools to also count tokens used by input tools
+        role_token_usage[role] += message_tokens.total_tokens
         # Track individual tool usage
         if role == "tool":
             tool_name = msg.get("name", "unknown_tool")
-            tool_token_usage[tool_name] += msg_tokens
+            tool_token_usage[tool_name] += message_tokens.total_tokens
             tool_call_counts[tool_name] += 1
     # Display context information
     console.print(f"[bold {STATUS_COLOR}]Conversation Context:[/bold {STATUS_COLOR}]")
     console.print(
-        f"  Context used: {total_tokens:,} / {max_context_size:,} tokens ({(total_tokens / max_context_size) * 100:.1f}%)"
+        f"  Context used: {tokens_metadata.total_tokens:,} / {max_context_size:,} tokens ({(tokens_metadata.total_tokens / max_context_size) * 100:.1f}%)"
     )
     console.print(
         f"  Space remaining: {available_tokens:,} for input ({(available_tokens / max_context_size) * 100:.1f}%) + {max_output_tokens:,} reserved for output ({(max_output_tokens / max_context_size) * 100:.1f}%)"
@@ -515,7 +521,11 @@ def handle_context_command(messages, ai: ToolCallingLLM, console: Console) -> No
     for role in ["system", "user", "assistant", "tool"]:
         if role in role_token_usage:
             tokens = role_token_usage[role]
-            percentage = (tokens / total_tokens) * 100 if total_tokens > 0 else 0
+            percentage = (
+                (tokens / tokens_metadata.total_tokens) * 100
+                if tokens_metadata.total_tokens > 0
+                else 0
+            )
             role_name = {
                 "system": "system prompt",
                 "user": "user messages",

holmes/plugins/prompts/_general_instructions.jinja2 CHANGED Viewed

@@ -12,8 +12,7 @@
 * do not stop investigating until you are at the final root cause you are able to find.
 * use the "five whys" methodology to find the root cause.
 * for example, if you found a problem in microservice A that is due to an error in microservice B, look at microservice B too and find the error in that.
-* if you cannot find the resource/application that the user referred to, assume they made a typo or included/excluded characters like - and.
-* in this case, try to find substrings or search for the correct spellings
+* if you cannot find the resource/application that the user referred to, assume they made a typo or included/excluded characters like - and in this case, try to find substrings or search for the correct spellings
 * always provide detailed information like exact resource names, versions, labels, etc
 * even if you found the root cause, keep investigating to find other possible root causes and to gather data for the answer like exact names
 * if a runbook url is present you MUST fetch the runbook before beginning your investigation

holmes/plugins/toolsets/__init__.py CHANGED Viewed

@@ -44,6 +44,9 @@ from holmes.plugins.toolsets.mcp.toolset_mcp import RemoteMCPToolset
 from holmes.plugins.toolsets.newrelic.newrelic import NewRelicToolset
 from holmes.plugins.toolsets.opensearch.opensearch import OpenSearchToolset
 from holmes.plugins.toolsets.opensearch.opensearch_logs import OpenSearchLogsToolset
+from holmes.plugins.toolsets.opensearch.opensearch_query_assist import (
+    OpenSearchQueryAssistToolset,
+)
 from holmes.plugins.toolsets.opensearch.opensearch_traces import OpenSearchTracesToolset
 from holmes.plugins.toolsets.rabbitmq.toolset_rabbitmq import RabbitMQToolset
 from holmes.plugins.toolsets.robusta.robusta import RobustaToolset
@@ -93,6 +96,7 @@ def load_python_toolsets(dal: Optional[SupabaseDal]) -> List[Toolset]:
         DatadogRDSToolset(),
         OpenSearchLogsToolset(),
         OpenSearchTracesToolset(),
+        OpenSearchQueryAssistToolset(),
         CoralogixLogsToolset(),
         RabbitMQToolset(),
         GitToolset(),

holmes/plugins/toolsets/atlas_mongodb/mongodb_atlas.py CHANGED Viewed

@@ -42,7 +42,6 @@ class MongoDBAtlasToolset(Toolset):
     def __init__(self):
         super().__init__(
             prerequisites=[CallablePrerequisite(callable=self.prerequisites_callable)],
-            experimental=True,
             tools=[
                 ReturnProjectAlerts(toolset=self),
                 ReturnProjectProcesses(toolset=self),

holmes/plugins/toolsets/azure_sql/azure_sql_toolset.py CHANGED Viewed

@@ -60,7 +60,6 @@ class AzureSQLToolset(BaseAzureSQLToolset):
             docs_url="https://kagi.com/proxy/png-clipart-microsoft-sql-server-microsoft-azure-sql-database-microsoft-text-logo-thumbnail.png?c=4Sg1bvcUGOrhnDzXgoBBa0G0j27ykgskX4a8cLrZp_quzqlpVGVG02OqQtezTxy7lB6ydmTKgbVAn_F7BxofxK6LKKUZSpjJ1huIAsXPVaXyakO4sWXFiX0Wz_8WjkA0AIlO_oFfW31AKaj5RcvGcr3siy0n5kW-GcqdpeBWsmm_huxUT6RycULFCDFBwuUzHvVl5TW3cYqlMxT8ecPZfg%3D%3D",
             icon_url="https://upload.wikimedia.org/wikipedia/commons/thumb/f/f7/Azure_SQL_Database_logo.svg/1200px-Azure_SQL_Database_logo.svg.png",
             tags=[ToolsetTag.CORE],
-            experimental=True,
             tools=[
                 AnalyzeDatabaseHealthStatus(self),
                 AnalyzeDatabasePerformance(self),

holmes/plugins/toolsets/grafana/grafana_api.py CHANGED Viewed

@@ -27,7 +27,7 @@ def grafana_health_check(config: GrafanaConfig) -> Tuple[bool, str]:
         response.raise_for_status()
         return True, ""
     except Exception as e:
-        logging.error(f"Failed to fetch grafana health status at {url}", exc_info=True)
+        logging.debug(f"Failed to fetch grafana health status at {url}", exc_info=True)
         error_msg = f"Failed to fetch grafana health status at {url}. {str(e)}"
         # Add helpful hint if this looks like a common misconfiguration

holmes/plugins/toolsets/investigator/core_investigation.py CHANGED Viewed

@@ -29,7 +29,11 @@ class TodoWriteTool(Tool):
                 properties={
                     "id": ToolParameter(type="string", required=True),
                     "content": ToolParameter(type="string", required=True),
-                    "status": ToolParameter(type="string", required=True),
+                    "status": ToolParameter(
+                        type="string",
+                        required=True,
+                        enum=["pending", "in_progress", "completed"],
+                    ),
                 },
             ),
         ),
@@ -58,22 +62,20 @@ class TodoWriteTool(Tool):
         content_width = max(max_content_width, len("Content"))
         status_width = max(max_status_display_width, len("Status"))
-        # Build table
         separator = f"+{'-' * (id_width + 2)}+{'-' * (content_width + 2)}+{'-' * (status_width + 2)}+"
         header = f"| {'ID':<{id_width}} | {'Content':<{content_width}} | {'Status':<{status_width}} |"
-        # Log the table
-        logging.info("Updated Investigation Tasks:")
-        logging.info(separator)
-        logging.info(header)
-        logging.info(separator)
+        tasks_to_display = []
         for task in tasks:
             status_display = f"{status_icons[task.status.value]} {task.status.value}"
             row = f"| {task.id:<{id_width}} | {task.content:<{content_width}} | {status_display:<{status_width}} |"
-            logging.info(row)
+            tasks_to_display.append(row)
-        logging.info(separator)
+        logging.info(
+            f"Task List:\n{separator}\n{header}\n{separator}\n"
+            + "\n".join(tasks_to_display)
+            + f"\n{separator}"
+        )
     def _invoke(self, params: dict, context: ToolInvokeContext) -> StructuredToolResult:
         try:
@@ -90,7 +92,7 @@ class TodoWriteTool(Tool):
                     )
                     tasks.append(task)
-            logging.info(f"Tasks: {len(tasks)}")
+            logging.debug(f"Tasks: {len(tasks)}")
             self.print_tasks_table(tasks)
             formatted_tasks = format_tasks(tasks)
@@ -116,8 +118,7 @@ class TodoWriteTool(Tool):
             )
     def get_parameterized_one_liner(self, params: Dict) -> str:
-        todos = params.get("todos", [])
-        return f"Write {todos} investigation tasks"
+        return "Update investigation tasks"
 class CoreInvestigationToolset(Toolset):

holmesgpt 0.14.4a0__py3-none-any.whl → 0.15.0__py3-none-any.whl

Potentially problematic release.

holmesgpt 0.14.4a0py3-none-any.whl → 0.15.0py3-none-any.whl