PyPI - holmesgpt - Versions diffs - 0.14.4a0__py3-none-any.whl → 0.16.0__py3-none-any.whl - Mend

holmesgpt 0.14.4a0py3-none-any.whl → 0.16.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of holmesgpt might be problematic. Click here for more details.

Files changed (37) hide show

holmes/__init__.py +1 -1
holmes/clients/robusta_client.py +12 -10
holmes/common/env_vars.py +22 -0
holmes/config.py +51 -4
holmes/core/conversations.py +3 -2
holmes/core/llm.py +226 -72
holmes/core/openai_formatting.py +13 -0
holmes/core/supabase_dal.py +33 -42
holmes/core/tool_calling_llm.py +185 -282
holmes/core/tools.py +21 -1
holmes/core/tools_utils/token_counting.py +2 -1
holmes/core/tools_utils/tool_context_window_limiter.py +32 -30
holmes/core/truncation/compaction.py +59 -0
holmes/core/truncation/input_context_window_limiter.py +218 -0
holmes/interactive.py +17 -7
holmes/plugins/prompts/_general_instructions.jinja2 +1 -2
holmes/plugins/prompts/conversation_history_compaction.jinja2 +88 -0
holmes/plugins/toolsets/__init__.py +4 -0
holmes/plugins/toolsets/atlas_mongodb/mongodb_atlas.py +0 -1
holmes/plugins/toolsets/azure_sql/azure_sql_toolset.py +0 -1
holmes/plugins/toolsets/grafana/grafana_api.py +1 -1
holmes/plugins/toolsets/investigator/core_investigation.py +34 -24
holmes/plugins/toolsets/opensearch/opensearch_ppl_query_docs.jinja2 +1616 -0
holmes/plugins/toolsets/opensearch/opensearch_query_assist.py +78 -0
holmes/plugins/toolsets/opensearch/opensearch_query_assist_instructions.jinja2 +223 -0
holmes/plugins/toolsets/prometheus/prometheus.py +1 -1
holmes/plugins/toolsets/robusta/robusta.py +35 -8
holmes/plugins/toolsets/robusta/robusta_instructions.jinja2 +4 -3
holmes/plugins/toolsets/service_discovery.py +1 -1
holmes/plugins/toolsets/servicenow/servicenow.py +0 -1
holmes/utils/stream.py +31 -1
{holmesgpt-0.14.4a0.dist-info → holmesgpt-0.16.0.dist-info}/METADATA +6 -2
{holmesgpt-0.14.4a0.dist-info → holmesgpt-0.16.0.dist-info}/RECORD +36 -31
holmes/core/performance_timing.py +0 -72
{holmesgpt-0.14.4a0.dist-info → holmesgpt-0.16.0.dist-info}/LICENSE.txt +0 -0
{holmesgpt-0.14.4a0.dist-info → holmesgpt-0.16.0.dist-info}/WHEEL +0 -0
{holmesgpt-0.14.4a0.dist-info → holmesgpt-0.16.0.dist-info}/entry_points.txt +0 -0

holmes/core/tools.py CHANGED Viewed

@@ -158,6 +158,7 @@ class ToolParameter(BaseModel):
     required: bool = True
     properties: Optional[Dict[str, "ToolParameter"]] = None  # For object types
     items: Optional["ToolParameter"] = None  # For array item schemas
+    enum: Optional[List[str]] = None  # For restricting to specific values
 class ToolInvokeContext(BaseModel):
@@ -682,7 +683,26 @@ class Toolset(BaseModel):
     def check_prerequisites(self):
         self.status = ToolsetStatusEnum.ENABLED
-        for prereq in self.prerequisites:
+        # Sort prerequisites by type to fail fast on missing env vars before
+        # running slow commands (e.g., ArgoCD checks that timeout):
+        # 1. Static checks (instant)
+        # 2. Environment variable checks (instant, often required by commands)
+        # 3. Callable checks (variable speed)
+        # 4. Command checks (slowest - may timeout or hang)
+        def prereq_priority(prereq):
+            if isinstance(prereq, StaticPrerequisite):
+                return 0
+            elif isinstance(prereq, ToolsetEnvironmentPrerequisite):
+                return 1
+            elif isinstance(prereq, CallablePrerequisite):
+                return 2
+            elif isinstance(prereq, ToolsetCommandPrerequisite):
+                return 3
+            return 4  # Unknown types go last
+        sorted_prereqs = sorted(self.prerequisites, key=prereq_priority)
+        for prereq in sorted_prereqs:
             if isinstance(prereq, ToolsetCommandPrerequisite):
                 try:
                     command = self.interpolate_command(prereq.command)

holmes/core/tools_utils/token_counting.py CHANGED Viewed

@@ -10,4 +10,5 @@ def count_tool_response_tokens(
         "role": "tool",
         "content": format_tool_result_data(structured_tool_result),
     }
-    return llm.count_tokens_for_message([message])
+    tokens = llm.count_tokens([message])
+    return tokens.total_tokens

holmes/core/tools_utils/tool_context_window_limiter.py CHANGED Viewed

@@ -1,11 +1,16 @@
 from typing import Optional
-from holmes.common.env_vars import TOOL_MAX_ALLOCATED_CONTEXT_WINDOW_PCT
+from pydantic import BaseModel
 from holmes.core.llm import LLM
 from holmes.core.tools import StructuredToolResultStatus
 from holmes.core.models import ToolCallResult
 from holmes.utils import sentry_helper
+class ToolCallSizeMetadata(BaseModel):
+    messages_token: int
+    max_tokens_allowed: int
 def get_pct_token_count(percent_of_total_context_window: float, llm: LLM) -> int:
     context_window_size = llm.get_context_window_size()
@@ -15,41 +20,38 @@ def get_pct_token_count(percent_of_total_context_window: float, llm: LLM) -> int
         return context_window_size
-def get_max_token_count_for_single_tool(llm: LLM) -> int:
-    return get_pct_token_count(
-        percent_of_total_context_window=TOOL_MAX_ALLOCATED_CONTEXT_WINDOW_PCT, llm=llm
-    )
-def prevent_overly_big_tool_response(tool_call_result: ToolCallResult, llm: LLM):
-    max_tokens_allowed = get_max_token_count_for_single_tool(llm)
-    message = tool_call_result.as_tool_call_message()
-    messages_token = llm.count_tokens_for_message(messages=[message])
-    if messages_token > max_tokens_allowed:
-        relative_pct = ((messages_token - max_tokens_allowed) / messages_token) * 100
-        error_message: Optional[str] = (
-            f"The tool call result is too large to return: {messages_token} tokens.\nThe maximum allowed tokens is {max_tokens_allowed} which is {format(relative_pct, '.1f')}% smaller.\nInstructions for the LLM: try to repeat the query but proactively narrow down the result so that the tool answer fits within the allowed number of tokens."
+def is_tool_call_too_big(
+    tool_call_result: ToolCallResult, llm: LLM
+) -> tuple[bool, Optional[ToolCallSizeMetadata]]:
+    if tool_call_result.result.status == StructuredToolResultStatus.SUCCESS:
+        message = tool_call_result.as_tool_call_message()
+        tokens = llm.count_tokens(messages=[message])
+        max_tokens_allowed = llm.get_max_token_count_for_single_tool()
+        return (
+            tokens.total_tokens > max_tokens_allowed,
+            ToolCallSizeMetadata(
+                messages_token=tokens.total_tokens,
+                max_tokens_allowed=max_tokens_allowed,
+            ),
         )
+    return False, None
-        if tool_call_result.result.status == StructuredToolResultStatus.NO_DATA:
-            error_message = None
-            # tool_call_result.result.data is set to None below which is expected to fix the issue
-        elif tool_call_result.result.status == StructuredToolResultStatus.ERROR:
-            original_error = (
-                tool_call_result.result.error
-                or tool_call_result.result.data
-                or "Unknown error"
-            )
-            truncated_error = str(original_error)[:100]
-            error_message = f"The tool call returned an error it is too large to return\nThe following original error is truncated:\n{truncated_error}"
+def prevent_overly_big_tool_response(tool_call_result: ToolCallResult, llm: LLM):
+    tool_call_result_is_too_big, metadata = is_tool_call_too_big(
+        tool_call_result=tool_call_result, llm=llm
+    )
+    if tool_call_result_is_too_big and metadata:
+        relative_pct = (
+            (metadata.messages_token - metadata.max_tokens_allowed)
+            / metadata.messages_token
+        ) * 100
+        error_message = f"The tool call result is too large to return: {metadata.messages_token} tokens.\nThe maximum allowed tokens is {metadata.max_tokens_allowed} which is {format(relative_pct, '.1f')}% smaller.\nInstructions for the LLM: try to repeat the query but proactively narrow down the result so that the tool answer fits within the allowed number of tokens."
         tool_call_result.result.status = StructuredToolResultStatus.ERROR
         tool_call_result.result.data = None
         tool_call_result.result.error = error_message
         sentry_helper.capture_toolcall_contains_too_many_tokens(
-            tool_call_result, messages_token, max_tokens_allowed
+            tool_call_result, metadata.messages_token, metadata.max_tokens_allowed
         )

holmes/core/truncation/compaction.py ADDED Viewed

@@ -0,0 +1,59 @@
+import logging
+from typing import Optional
+from holmes.core.llm import LLM
+from holmes.plugins.prompts import load_and_render_prompt
+from litellm.types.utils import ModelResponse
+def strip_system_prompt(
+    conversation_history: list[dict],
+) -> tuple[list[dict], Optional[dict]]:
+    if not conversation_history:
+        return conversation_history, None
+    first_message = conversation_history[0]
+    if first_message and first_message.get("role") == "system":
+        return conversation_history[1:], first_message
+    return conversation_history[:], None
+def compact_conversation_history(
+    original_conversation_history: list[dict], llm: LLM
+) -> list[dict]:
+    conversation_history, system_prompt_message = strip_system_prompt(
+        original_conversation_history
+    )
+    compaction_instructions = load_and_render_prompt(
+        prompt="builtin://conversation_history_compaction.jinja2", context={}
+    )
+    conversation_history.append({"role": "user", "content": compaction_instructions})
+    response: ModelResponse = llm.completion(conversation_history)  # type: ignore
+    response_message = None
+    if (
+        response
+        and response.choices
+        and response.choices[0]
+        and response.choices[0].message  # type:ignore
+    ):
+        response_message = response.choices[0].message  # type:ignore
+    else:
+        logging.error(
+            "Failed to compact conversation history. Unexpected LLM's response for compaction"
+        )
+        return original_conversation_history
+    compacted_conversation_history: list[dict] = []
+    if system_prompt_message:
+        compacted_conversation_history.append(system_prompt_message)
+    compacted_conversation_history.append(
+        response_message.model_dump(
+            exclude_defaults=True, exclude_unset=True, exclude_none=True
+        )
+    )
+    compacted_conversation_history.append(
+        {
+            "role": "system",
+            "content": "The conversation history has been compacted to preserve available space in the context window. Continue.",
+        }
+    )
+    return compacted_conversation_history

holmes/core/truncation/input_context_window_limiter.py ADDED Viewed

@@ -0,0 +1,218 @@
+import logging
+from typing import Any, Optional
+from pydantic import BaseModel
+import sentry_sdk
+from holmes.common.env_vars import (
+    ENABLE_CONVERSATION_HISTORY_COMPACTION,
+    MAX_OUTPUT_TOKEN_RESERVATION,
+)
+from holmes.core.llm import (
+    LLM,
+    TokenCountMetadata,
+    get_context_window_compaction_threshold_pct,
+)
+from holmes.core.models import TruncationMetadata, TruncationResult
+from holmes.core.truncation.compaction import compact_conversation_history
+from holmes.utils import sentry_helper
+from holmes.utils.stream import StreamEvents, StreamMessage
+TRUNCATION_NOTICE = "\n\n[TRUNCATED]"
+def _truncate_tool_message(
+    msg: dict, allocated_space: int, needed_space: int
+) -> TruncationMetadata:
+    msg_content = msg["content"]
+    tool_call_id = msg["tool_call_id"]
+    tool_name = msg["name"]
+    # Ensure the indicator fits in the allocated space
+    if allocated_space > len(TRUNCATION_NOTICE):
+        original = msg_content if isinstance(msg_content, str) else str(msg_content)
+        msg["content"] = (
+            original[: allocated_space - len(TRUNCATION_NOTICE)] + TRUNCATION_NOTICE
+        )
+        end_index = allocated_space - len(TRUNCATION_NOTICE)
+    else:
+        msg["content"] = TRUNCATION_NOTICE[:allocated_space]
+        end_index = allocated_space
+    msg.pop("token_count", None)  # Remove token_count if present
+    logging.info(
+        f"Truncating tool message '{tool_name}' from {needed_space} to {allocated_space} tokens"
+    )
+    truncation_metadata = TruncationMetadata(
+        tool_call_id=tool_call_id,
+        start_index=0,
+        end_index=end_index,
+        tool_name=tool_name,
+        original_token_count=needed_space,
+    )
+    return truncation_metadata
+# TODO: I think there's a bug here because we don't account for the 'role' or json structure like '{...}' when counting tokens
+# However, in practice it works because we reserve enough space for the output tokens that the minor inconsistency does not matter
+# We should fix this in the future
+# TODO: we truncate using character counts not token counts - this means we're overly agressive with truncation - improve it by considering
+# token truncation and not character truncation
+def truncate_messages_to_fit_context(
+    messages: list, max_context_size: int, maximum_output_token: int, count_tokens_fn
+) -> TruncationResult:
+    """
+    Helper function to truncate tool messages to fit within context limits.
+    Args:
+        messages: List of message dictionaries with roles and content
+        max_context_size: Maximum context window size for the model
+        maximum_output_token: Maximum tokens reserved for model output
+        count_tokens_fn: Function to count tokens for a list of messages
+    Returns:
+        Modified list of messages with truncated tool responses
+    Raises:
+        Exception: If non-tool messages exceed available context space
+    """
+    messages_except_tools = [
+        message for message in messages if message["role"] != "tool"
+    ]
+    tokens = count_tokens_fn(messages_except_tools)
+    message_size_without_tools = tokens.total_tokens
+    tool_call_messages = [message for message in messages if message["role"] == "tool"]
+    reserved_for_output_tokens = min(maximum_output_token, MAX_OUTPUT_TOKEN_RESERVATION)
+    if message_size_without_tools >= (max_context_size - reserved_for_output_tokens):
+        logging.error(
+            f"The combined size of system_prompt and user_prompt ({message_size_without_tools} tokens) exceeds the model's context window for input."
+        )
+        raise Exception(
+            f"The combined size of system_prompt and user_prompt ({message_size_without_tools} tokens) exceeds the maximum context size of {max_context_size - reserved_for_output_tokens} tokens available for input."
+        )
+    if len(tool_call_messages) == 0:
+        return TruncationResult(truncated_messages=messages, truncations=[])
+    available_space = (
+        max_context_size - message_size_without_tools - reserved_for_output_tokens
+    )
+    remaining_space = available_space
+    tool_call_messages.sort(
+        key=lambda x: count_tokens_fn(
+            [{"role": "tool", "content": x["content"]}]
+        ).total_tokens
+    )
+    truncations = []
+    # Allocate space starting with small tools and going to larger tools, while maintaining fairness
+    # Small tools can often get exactly what they need, while larger tools may need to be truncated
+    # We ensure fairness (no tool gets more than others that need it) and also maximize utilization (we don't leave space unused)
+    for i, msg in enumerate(tool_call_messages):
+        remaining_tools = len(tool_call_messages) - i
+        max_allocation = remaining_space // remaining_tools
+        needed_space = count_tokens_fn(
+            [{"role": "tool", "content": msg["content"]}]
+        ).total_tokens
+        allocated_space = min(needed_space, max_allocation)
+        if needed_space > allocated_space:
+            truncation_metadata = _truncate_tool_message(
+                msg, allocated_space, needed_space
+            )
+            truncations.append(truncation_metadata)
+        remaining_space -= allocated_space
+    if truncations:
+        sentry_helper.capture_tool_truncations(truncations)
+    return TruncationResult(truncated_messages=messages, truncations=truncations)
+class ContextWindowLimiterOutput(BaseModel):
+    metadata: dict
+    messages: list[dict]
+    events: list[StreamMessage]
+    max_context_size: int
+    maximum_output_token: int
+    tokens: TokenCountMetadata
+    conversation_history_compacted: bool
+@sentry_sdk.trace
+def limit_input_context_window(
+    llm: LLM, messages: list[dict], tools: Optional[list[dict[str, Any]]]
+) -> ContextWindowLimiterOutput:
+    events = []
+    metadata = {}
+    initial_tokens = llm.count_tokens(messages=messages, tools=tools)  # type: ignore
+    max_context_size = llm.get_context_window_size()
+    maximum_output_token = llm.get_maximum_output_token()
+    conversation_history_compacted = False
+    if ENABLE_CONVERSATION_HISTORY_COMPACTION and (
+        initial_tokens.total_tokens + maximum_output_token
+    ) > (max_context_size * get_context_window_compaction_threshold_pct() / 100):
+        compacted_messages = compact_conversation_history(
+            original_conversation_history=messages, llm=llm
+        )
+        compacted_tokens = llm.count_tokens(compacted_messages, tools=tools)
+        compacted_total_tokens = compacted_tokens.total_tokens
+        if compacted_total_tokens < initial_tokens.total_tokens:
+            messages = compacted_messages
+            compaction_message = f"The conversation history has been compacted from {initial_tokens.total_tokens} to {compacted_total_tokens} tokens"
+            logging.info(compaction_message)
+            conversation_history_compacted = True
+            events.append(
+                StreamMessage(
+                    event=StreamEvents.CONVERSATION_HISTORY_COMPACTED,
+                    data={
+                        "content": compaction_message,
+                        "messages": compacted_messages,
+                        "metadata": {
+                            "initial_tokens": initial_tokens.total_tokens,
+                            "compacted_tokens": compacted_total_tokens,
+                        },
+                    },
+                )
+            )
+            events.append(
+                StreamMessage(
+                    event=StreamEvents.AI_MESSAGE,
+                    data={"content": compaction_message},
+                )
+            )
+        else:
+            logging.debug(
+                f"Failed to reduce token count when compacting conversation history. Original tokens:{initial_tokens.total_tokens}. Compacted tokens:{compacted_total_tokens}"
+            )
+    tokens = llm.count_tokens(messages=messages, tools=tools)  # type: ignore
+    if (tokens.total_tokens + maximum_output_token) > max_context_size:
+        # Compaction was not sufficient. Truncating messages.
+        truncated_res = truncate_messages_to_fit_context(
+            messages=messages,
+            max_context_size=max_context_size,
+            maximum_output_token=maximum_output_token,
+            count_tokens_fn=llm.count_tokens,
+        )
+        metadata["truncations"] = [t.model_dump() for t in truncated_res.truncations]
+        messages = truncated_res.truncated_messages
+        # recount after truncation
+        tokens = llm.count_tokens(messages=messages, tools=tools)  # type: ignore
+    else:
+        metadata["truncations"] = []
+    return ContextWindowLimiterOutput(
+        events=events,
+        messages=messages,
+        metadata=metadata,
+        max_context_size=max_context_size,
+        maximum_output_token=maximum_output_token,
+        tokens=tokens,
+        conversation_history_compacted=conversation_history_compacted,
+    )

holmes/interactive.py CHANGED Viewed

@@ -480,10 +480,14 @@ def handle_context_command(messages, ai: ToolCallingLLM, console: Console) -> No
         return
     # Calculate context statistics
-    total_tokens = ai.llm.count_tokens_for_message(messages)
+    tokens_metadata = ai.llm.count_tokens(
+        messages
+    )  # TODO: pass tools to also count tokens used by input tools
     max_context_size = ai.llm.get_context_window_size()
     max_output_tokens = ai.llm.get_maximum_output_token()
-    available_tokens = max_context_size - total_tokens - max_output_tokens
+    available_tokens = (
+        max_context_size - tokens_metadata.total_tokens - max_output_tokens
+    )
     # Analyze token distribution by role and tool calls
     role_token_usage: DefaultDict[str, int] = defaultdict(int)
@@ -492,19 +496,21 @@ def handle_context_command(messages, ai: ToolCallingLLM, console: Console) -> No
     for msg in messages:
         role = msg.get("role", "unknown")
-        msg_tokens = ai.llm.count_tokens_for_message([msg])
-        role_token_usage[role] += msg_tokens
+        message_tokens = ai.llm.count_tokens(
+            [msg]
+        )  # TODO: pass tools to also count tokens used by input tools
+        role_token_usage[role] += message_tokens.total_tokens
         # Track individual tool usage
         if role == "tool":
             tool_name = msg.get("name", "unknown_tool")
-            tool_token_usage[tool_name] += msg_tokens
+            tool_token_usage[tool_name] += message_tokens.total_tokens
             tool_call_counts[tool_name] += 1
     # Display context information
     console.print(f"[bold {STATUS_COLOR}]Conversation Context:[/bold {STATUS_COLOR}]")
     console.print(
-        f"  Context used: {total_tokens:,} / {max_context_size:,} tokens ({(total_tokens / max_context_size) * 100:.1f}%)"
+        f"  Context used: {tokens_metadata.total_tokens:,} / {max_context_size:,} tokens ({(tokens_metadata.total_tokens / max_context_size) * 100:.1f}%)"
     )
     console.print(
         f"  Space remaining: {available_tokens:,} for input ({(available_tokens / max_context_size) * 100:.1f}%) + {max_output_tokens:,} reserved for output ({(max_output_tokens / max_context_size) * 100:.1f}%)"
@@ -515,7 +521,11 @@ def handle_context_command(messages, ai: ToolCallingLLM, console: Console) -> No
     for role in ["system", "user", "assistant", "tool"]:
         if role in role_token_usage:
             tokens = role_token_usage[role]
-            percentage = (tokens / total_tokens) * 100 if total_tokens > 0 else 0
+            percentage = (
+                (tokens / tokens_metadata.total_tokens) * 100
+                if tokens_metadata.total_tokens > 0
+                else 0
+            )
             role_name = {
                 "system": "system prompt",
                 "user": "user messages",

holmes/plugins/prompts/_general_instructions.jinja2 CHANGED Viewed

@@ -12,8 +12,7 @@
 * do not stop investigating until you are at the final root cause you are able to find.
 * use the "five whys" methodology to find the root cause.
 * for example, if you found a problem in microservice A that is due to an error in microservice B, look at microservice B too and find the error in that.
-* if you cannot find the resource/application that the user referred to, assume they made a typo or included/excluded characters like - and.
-* in this case, try to find substrings or search for the correct spellings
+* if you cannot find the resource/application that the user referred to, assume they made a typo or included/excluded characters like - and in this case, try to find substrings or search for the correct spellings
 * always provide detailed information like exact resource names, versions, labels, etc
 * even if you found the root cause, keep investigating to find other possible root causes and to gather data for the answer like exact names
 * if a runbook url is present you MUST fetch the runbook before beginning your investigation

holmes/plugins/prompts/conversation_history_compaction.jinja2 ADDED Viewed

@@ -0,0 +1,88 @@
+Your task is to create a detailed summary of the conversation so far, paying close attention to the user's explicit requests and your previous actions.
+This summary should be thorough in capturing technical details, code patterns, and architectural decisions that would be essential for continuing development work without losing context.
+Before providing your final summary, wrap your analysis in <analysis> tags to organize your thoughts and ensure you've covered all necessary points. In your analysis process:
+1. Chronologically analyze each message and section of the conversation. For each section thoroughly identify:
+   - The user's explicit requests and intents
+   - Your approach to addressing the user's requests
+   - Key decisions, technical concepts and code patterns
+   - Specific details like kubernetes resource names, namespaces, relevant logs extracts (verbatim), etc
+   - What tools were called and the outcome or analysis of the tool output
+2. Double-check for technical accuracy and completeness, addressing each required element thoroughly.
+Your summary should include the following sections:
+1. Primary Request and Intent: Capture all of the user's explicit requests and intents in detail
+2. Key Technical Concepts: List all important technical concepts, technologies, and frameworks discussed.
+3. Resources: Enumerate specific kubernetes or cloud resources and logs extract examined. Pay special attention to the most recent messages and include logs or tool outputs where applicable and include a summary of why this resource is important.
+4. Tool calls: List all tool calls that were executed and whether they failed/succeeded. Make sure to mention the full arguments used. Only summarize the arguments if they are over 200 characters long
+5. Problem Solving: Document problems solved and any ongoing troubleshooting efforts.
+6. Pending Tasks: Outline any pending tasks that you have explicitly been asked to work on.
+7. Current Work: Describe in detail precisely what was being worked on immediately before this summary request, paying special attention to the most recent messages from both user and assistant. Include resource names and their namespace and log extracts where applicable.
+8. Optional Next Step: List the next step that you will take that is related to the most recent work you were doing. IMPORTANT: ensure that this step is DIRECTLY in line with the user's explicit requests, and the task you were working on immediately before this summary request. If your last task was concluded, then only list next steps if they are explicitly in line with the users request. Do not start on tangential requests without confirming with the user first.
+                       If there is a next step, include direct quotes from the most recent conversation showing exactly what task you were working on and where you left off. This should be verbatim to ensure there's no drift in task interpretation.
+Here's an example of how your output should be structured:
+<example>
+<analysis>
+[Your thought process, ensuring all points are covered thoroughly and accurately]
+</analysis>
+<summary>
+1. Primary Request and Intent:
+   [Detailed description]
+2. Key Technical Concepts:
+   - [Concept 1]
+   - [Concept 2]
+   - [...]
+3. Infrastructure Resources:
+   - [Deployment name 1]
+      - [Summary of why this deployment is important]
+      - [Summary of the issues identified with this deployment, if any]
+      - [List of related pods/services or otyher resources and why they are relevant]
+   - [Pod name 2]
+      - [Summary of why this pod is important]
+      - [Summary of the issues identified with this pod, if any]
+      - [List of related pods/services or otyher resources and why they are relevant]
+   - [...]
+4. Tool Calls:
+   - [✅ function_name {args}]
+   - [✅ function_name {args}]
+   - [❌ function_name {args} - NO DATA]
+   - [❌ function_name {args} - Error message]
+   - [...]
+5. Problem Solving:
+   [Description of solved problems and ongoing troubleshooting]
+6. Pending Tasks:
+   - [Task 1]
+   - [Task 2]
+   - [...]
+7. Current Work:
+   [Precise description of current work]
+8. Optional Next Step:
+   [Optional Next step to take]
+</summary>
+</example>
+Please provide your summary based on the conversation so far, following this structure and ensuring precision and thoroughness in your response.
+There may be additional summarization instructions provided in the included context. If so, remember to follow these instructions when creating the above summary. Examples of instructions include:
+<example>
+## Compact Instructions
+When summarizing the conversation focus on typescript code changes and also remember the mistakes you made and how you fixed them.
+</example>
+<example>
+# Summary instructions
+When you are using compact - please focus on test output and code changes. Include relevant logs verbatim.
+</example>

holmes/plugins/toolsets/__init__.py CHANGED Viewed

@@ -44,6 +44,9 @@ from holmes.plugins.toolsets.mcp.toolset_mcp import RemoteMCPToolset
 from holmes.plugins.toolsets.newrelic.newrelic import NewRelicToolset
 from holmes.plugins.toolsets.opensearch.opensearch import OpenSearchToolset
 from holmes.plugins.toolsets.opensearch.opensearch_logs import OpenSearchLogsToolset
+from holmes.plugins.toolsets.opensearch.opensearch_query_assist import (
+    OpenSearchQueryAssistToolset,
+)
 from holmes.plugins.toolsets.opensearch.opensearch_traces import OpenSearchTracesToolset
 from holmes.plugins.toolsets.rabbitmq.toolset_rabbitmq import RabbitMQToolset
 from holmes.plugins.toolsets.robusta.robusta import RobustaToolset
@@ -93,6 +96,7 @@ def load_python_toolsets(dal: Optional[SupabaseDal]) -> List[Toolset]:
         DatadogRDSToolset(),
         OpenSearchLogsToolset(),
         OpenSearchTracesToolset(),
+        OpenSearchQueryAssistToolset(),
         CoralogixLogsToolset(),
         RabbitMQToolset(),
         GitToolset(),

holmes/plugins/toolsets/atlas_mongodb/mongodb_atlas.py CHANGED Viewed

@@ -42,7 +42,6 @@ class MongoDBAtlasToolset(Toolset):
     def __init__(self):
         super().__init__(
             prerequisites=[CallablePrerequisite(callable=self.prerequisites_callable)],
-            experimental=True,
             tools=[
                 ReturnProjectAlerts(toolset=self),
                 ReturnProjectProcesses(toolset=self),

holmes/plugins/toolsets/azure_sql/azure_sql_toolset.py CHANGED Viewed

@@ -60,7 +60,6 @@ class AzureSQLToolset(BaseAzureSQLToolset):
             docs_url="https://kagi.com/proxy/png-clipart-microsoft-sql-server-microsoft-azure-sql-database-microsoft-text-logo-thumbnail.png?c=4Sg1bvcUGOrhnDzXgoBBa0G0j27ykgskX4a8cLrZp_quzqlpVGVG02OqQtezTxy7lB6ydmTKgbVAn_F7BxofxK6LKKUZSpjJ1huIAsXPVaXyakO4sWXFiX0Wz_8WjkA0AIlO_oFfW31AKaj5RcvGcr3siy0n5kW-GcqdpeBWsmm_huxUT6RycULFCDFBwuUzHvVl5TW3cYqlMxT8ecPZfg%3D%3D",
             icon_url="https://upload.wikimedia.org/wikipedia/commons/thumb/f/f7/Azure_SQL_Database_logo.svg/1200px-Azure_SQL_Database_logo.svg.png",
             tags=[ToolsetTag.CORE],
-            experimental=True,
             tools=[
                 AnalyzeDatabaseHealthStatus(self),
                 AnalyzeDatabasePerformance(self),

holmes/plugins/toolsets/grafana/grafana_api.py CHANGED Viewed

@@ -27,7 +27,7 @@ def grafana_health_check(config: GrafanaConfig) -> Tuple[bool, str]:
         response.raise_for_status()
         return True, ""
     except Exception as e:
-        logging.error(f"Failed to fetch grafana health status at {url}", exc_info=True)
+        logging.debug(f"Failed to fetch grafana health status at {url}", exc_info=True)
         error_msg = f"Failed to fetch grafana health status at {url}. {str(e)}"
         # Add helpful hint if this looks like a common misconfiguration

holmesgpt 0.14.4a0__py3-none-any.whl → 0.16.0__py3-none-any.whl

Potentially problematic release.

holmesgpt 0.14.4a0py3-none-any.whl → 0.16.0py3-none-any.whl