PyPI - holmesgpt - Versions diffs - 0.16.2a0__py3-none-any.whl → 0.18.4__py3-none-any.whl - Mend

holmesgpt 0.16.2a0py3-none-any.whl → 0.18.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (162) hide show

holmes/__init__.py +3 -5
holmes/clients/robusta_client.py +4 -3
holmes/common/env_vars.py +18 -2
holmes/common/openshift.py +1 -1
holmes/config.py +11 -6
holmes/core/conversations.py +30 -13
holmes/core/investigation.py +21 -25
holmes/core/investigation_structured_output.py +3 -3
holmes/core/issue.py +1 -1
holmes/core/llm.py +50 -31
holmes/core/models.py +19 -17
holmes/core/openai_formatting.py +1 -1
holmes/core/prompt.py +47 -2
holmes/core/runbooks.py +1 -0
holmes/core/safeguards.py +4 -2
holmes/core/supabase_dal.py +4 -2
holmes/core/tool_calling_llm.py +102 -141
holmes/core/tools.py +19 -28
holmes/core/tools_utils/token_counting.py +9 -2
holmes/core/tools_utils/tool_context_window_limiter.py +13 -30
holmes/core/tools_utils/tool_executor.py +0 -18
holmes/core/tools_utils/toolset_utils.py +1 -0
holmes/core/toolset_manager.py +37 -2
holmes/core/tracing.py +13 -2
holmes/core/transformers/__init__.py +1 -1
holmes/core/transformers/base.py +1 -0
holmes/core/transformers/llm_summarize.py +3 -2
holmes/core/transformers/registry.py +2 -1
holmes/core/transformers/transformer.py +1 -0
holmes/core/truncation/compaction.py +37 -2
holmes/core/truncation/input_context_window_limiter.py +3 -2
holmes/interactive.py +52 -8
holmes/main.py +17 -37
holmes/plugins/interfaces.py +2 -1
holmes/plugins/prompts/__init__.py +2 -1
holmes/plugins/prompts/_fetch_logs.jinja2 +5 -5
holmes/plugins/prompts/_runbook_instructions.jinja2 +2 -1
holmes/plugins/prompts/base_user_prompt.jinja2 +7 -0
holmes/plugins/prompts/conversation_history_compaction.jinja2 +2 -1
holmes/plugins/prompts/generic_ask.jinja2 +0 -2
holmes/plugins/prompts/generic_ask_conversation.jinja2 +0 -2
holmes/plugins/prompts/generic_ask_for_issue_conversation.jinja2 +0 -2
holmes/plugins/prompts/generic_investigation.jinja2 +0 -2
holmes/plugins/prompts/investigation_procedure.jinja2 +2 -1
holmes/plugins/prompts/kubernetes_workload_ask.jinja2 +0 -2
holmes/plugins/prompts/kubernetes_workload_chat.jinja2 +0 -2
holmes/plugins/runbooks/__init__.py +32 -3
holmes/plugins/sources/github/__init__.py +4 -2
holmes/plugins/sources/prometheus/models.py +1 -0
holmes/plugins/toolsets/__init__.py +30 -26
holmes/plugins/toolsets/atlas_mongodb/mongodb_atlas.py +13 -12
holmes/plugins/toolsets/azure_sql/apis/alert_monitoring_api.py +3 -2
holmes/plugins/toolsets/azure_sql/apis/azure_sql_api.py +2 -1
holmes/plugins/toolsets/azure_sql/apis/connection_failure_api.py +3 -2
holmes/plugins/toolsets/azure_sql/apis/connection_monitoring_api.py +3 -1
holmes/plugins/toolsets/azure_sql/apis/storage_analysis_api.py +3 -1
holmes/plugins/toolsets/azure_sql/azure_sql_toolset.py +12 -12
holmes/plugins/toolsets/azure_sql/tools/analyze_connection_failures.py +7 -7
holmes/plugins/toolsets/azure_sql/tools/analyze_database_connections.py +7 -7
holmes/plugins/toolsets/azure_sql/tools/analyze_database_health_status.py +3 -5
holmes/plugins/toolsets/azure_sql/tools/analyze_database_performance.py +3 -3
holmes/plugins/toolsets/azure_sql/tools/analyze_database_storage.py +7 -7
holmes/plugins/toolsets/azure_sql/tools/get_active_alerts.py +6 -8
holmes/plugins/toolsets/azure_sql/tools/get_slow_queries.py +3 -3
holmes/plugins/toolsets/azure_sql/tools/get_top_cpu_queries.py +3 -3
holmes/plugins/toolsets/azure_sql/tools/get_top_data_io_queries.py +3 -3
holmes/plugins/toolsets/azure_sql/tools/get_top_log_io_queries.py +3 -3
holmes/plugins/toolsets/azure_sql/utils.py +0 -32
holmes/plugins/toolsets/bash/argocd/__init__.py +3 -3
holmes/plugins/toolsets/bash/aws/__init__.py +4 -4
holmes/plugins/toolsets/bash/azure/__init__.py +4 -4
holmes/plugins/toolsets/bash/bash_toolset.py +2 -3
holmes/plugins/toolsets/bash/common/bash.py +19 -9
holmes/plugins/toolsets/bash/common/bash_command.py +1 -1
holmes/plugins/toolsets/bash/common/stringify.py +1 -1
holmes/plugins/toolsets/bash/kubectl/__init__.py +2 -1
holmes/plugins/toolsets/bash/kubectl/constants.py +0 -1
holmes/plugins/toolsets/bash/kubectl/kubectl_get.py +3 -4
holmes/plugins/toolsets/bash/parse_command.py +12 -13
holmes/plugins/toolsets/connectivity_check.py +124 -0
holmes/plugins/toolsets/coralogix/api.py +132 -119
holmes/plugins/toolsets/coralogix/coralogix.jinja2 +14 -0
holmes/plugins/toolsets/coralogix/toolset_coralogix.py +219 -0
holmes/plugins/toolsets/coralogix/utils.py +15 -79
holmes/plugins/toolsets/datadog/datadog_api.py +36 -3
holmes/plugins/toolsets/datadog/datadog_logs_instructions.jinja2 +34 -1
holmes/plugins/toolsets/datadog/datadog_metrics_instructions.jinja2 +3 -3
holmes/plugins/toolsets/datadog/datadog_models.py +59 -0
holmes/plugins/toolsets/datadog/datadog_url_utils.py +213 -0
holmes/plugins/toolsets/datadog/instructions_datadog_traces.jinja2 +165 -28
holmes/plugins/toolsets/datadog/toolset_datadog_general.py +71 -28
holmes/plugins/toolsets/datadog/toolset_datadog_logs.py +224 -375
holmes/plugins/toolsets/datadog/toolset_datadog_metrics.py +67 -36
holmes/plugins/toolsets/datadog/toolset_datadog_traces.py +360 -343
holmes/plugins/toolsets/elasticsearch/__init__.py +6 -0
holmes/plugins/toolsets/elasticsearch/elasticsearch.py +834 -0
holmes/plugins/toolsets/git.py +7 -8
holmes/plugins/toolsets/grafana/base_grafana_toolset.py +16 -4
holmes/plugins/toolsets/grafana/common.py +2 -30
holmes/plugins/toolsets/grafana/grafana_tempo_api.py +2 -1
holmes/plugins/toolsets/grafana/loki/instructions.jinja2 +18 -2
holmes/plugins/toolsets/grafana/loki/toolset_grafana_loki.py +92 -18
holmes/plugins/toolsets/grafana/loki_api.py +4 -0
holmes/plugins/toolsets/grafana/toolset_grafana.py +109 -25
holmes/plugins/toolsets/grafana/toolset_grafana_dashboard.jinja2 +22 -0
holmes/plugins/toolsets/grafana/toolset_grafana_tempo.py +201 -33
holmes/plugins/toolsets/grafana/trace_parser.py +3 -2
holmes/plugins/toolsets/internet/internet.py +10 -10
holmes/plugins/toolsets/internet/notion.py +5 -6
holmes/plugins/toolsets/investigator/core_investigation.py +3 -3
holmes/plugins/toolsets/investigator/model.py +3 -1
holmes/plugins/toolsets/json_filter_mixin.py +134 -0
holmes/plugins/toolsets/kafka.py +12 -7
holmes/plugins/toolsets/kubernetes.yaml +260 -30
holmes/plugins/toolsets/kubernetes_logs.py +3 -3
holmes/plugins/toolsets/logging_utils/logging_api.py +16 -6
holmes/plugins/toolsets/mcp/toolset_mcp.py +88 -60
holmes/plugins/toolsets/newrelic/new_relic_api.py +41 -1
holmes/plugins/toolsets/newrelic/newrelic.jinja2 +24 -0
holmes/plugins/toolsets/newrelic/newrelic.py +212 -55
holmes/plugins/toolsets/prometheus/prometheus.py +358 -102
holmes/plugins/toolsets/prometheus/prometheus_instructions.jinja2 +11 -3
holmes/plugins/toolsets/rabbitmq/api.py +23 -4
holmes/plugins/toolsets/rabbitmq/toolset_rabbitmq.py +5 -5
holmes/plugins/toolsets/robusta/robusta.py +5 -5
holmes/plugins/toolsets/runbook/runbook_fetcher.py +25 -6
holmes/plugins/toolsets/servicenow_tables/servicenow_tables.py +1 -1
holmes/plugins/toolsets/utils.py +1 -1
holmes/utils/config_utils.py +1 -1
holmes/utils/connection_utils.py +31 -0
holmes/utils/console/result.py +10 -0
holmes/utils/file_utils.py +2 -1
holmes/utils/global_instructions.py +10 -26
holmes/utils/holmes_status.py +4 -3
holmes/utils/log.py +15 -0
holmes/utils/markdown_utils.py +2 -3
holmes/utils/memory_limit.py +58 -0
holmes/utils/sentry_helper.py +23 -0
holmes/utils/stream.py +12 -5
holmes/utils/tags.py +4 -3
holmes/version.py +3 -1
{holmesgpt-0.16.2a0.dist-info → holmesgpt-0.18.4.dist-info}/METADATA +12 -10
holmesgpt-0.18.4.dist-info/RECORD +258 -0
holmes/plugins/toolsets/aws.yaml +0 -80
holmes/plugins/toolsets/coralogix/toolset_coralogix_logs.py +0 -114
holmes/plugins/toolsets/datadog/datadog_traces_formatter.py +0 -310
holmes/plugins/toolsets/datadog/toolset_datadog_rds.py +0 -736
holmes/plugins/toolsets/grafana/grafana_api.py +0 -64
holmes/plugins/toolsets/opensearch/__init__.py +0 -0
holmes/plugins/toolsets/opensearch/opensearch.py +0 -250
holmes/plugins/toolsets/opensearch/opensearch_logs.py +0 -161
holmes/plugins/toolsets/opensearch/opensearch_traces.py +0 -215
holmes/plugins/toolsets/opensearch/opensearch_traces_instructions.jinja2 +0 -12
holmes/plugins/toolsets/opensearch/opensearch_utils.py +0 -166
holmes/utils/keygen_utils.py +0 -6
holmesgpt-0.16.2a0.dist-info/RECORD +0 -258
holmes/plugins/toolsets/{opensearch → elasticsearch}/opensearch_ppl_query_docs.jinja2 +0 -0
holmes/plugins/toolsets/{opensearch → elasticsearch}/opensearch_query_assist.py +2 -2
/holmes/plugins/toolsets/{opensearch → elasticsearch}/opensearch_query_assist_instructions.jinja2 +0 -0
{holmesgpt-0.16.2a0.dist-info → holmesgpt-0.18.4.dist-info}/LICENSE +0 -0
{holmesgpt-0.16.2a0.dist-info → holmesgpt-0.18.4.dist-info}/WHEEL +0 -0
{holmesgpt-0.16.2a0.dist-info → holmesgpt-0.18.4.dist-info}/entry_points.txt +0 -0

holmes/core/runbooks.py CHANGED Viewed

@@ -1,4 +1,5 @@
 from typing import List
 from holmes.core.issue import Issue
 from holmes.plugins.runbooks import Runbook

holmes/core/safeguards.py CHANGED Viewed

@@ -4,9 +4,11 @@ from typing import Optional
 from pydantic import ValidationError
 from holmes.common.env_vars import TOOL_CALL_SAFEGUARDS_ENABLED
-from holmes.plugins.toolsets.logging_utils.logging_api import POD_LOGGING_TOOL_NAME
 from holmes.core.tools import StructuredToolResult, StructuredToolResultStatus
-from holmes.plugins.toolsets.logging_utils.logging_api import FetchPodLogsParams
+from holmes.plugins.toolsets.logging_utils.logging_api import (
+    POD_LOGGING_TOOL_NAME,
+    FetchPodLogsParams,
+)
 def _is_redundant_fetch_pod_logs(

holmes/core/supabase_dal.py CHANGED Viewed

@@ -10,10 +10,12 @@ from enum import Enum
 from typing import Dict, List, Optional, Tuple
 from uuid import uuid4
-from postgrest.base_request_builder import QueryArgs
+import sentry_sdk
 import yaml  # type: ignore
 from cachetools import TTLCache  # type: ignore
+from postgrest._sync import request_builder as supabase_request_builder
 from postgrest._sync.request_builder import SyncQueryRequestBuilder
+from postgrest.base_request_builder import QueryArgs
 from postgrest.exceptions import APIError as PGAPIError
 from postgrest.types import ReturnMethod
 from pydantic import BaseModel
@@ -40,7 +42,6 @@ from holmes.utils.definitions import RobustaConfig
 from holmes.utils.env import get_env_replacement
 from holmes.utils.global_instructions import Instructions
 from holmes.utils.krr_utils import calculate_krr_savings
-from postgrest._sync import request_builder as supabase_request_builder
 SUPABASE_TIMEOUT_SECONDS = int(os.getenv("SUPABASE_TIMEOUT_SECONDS", 3600))
@@ -112,6 +113,7 @@ class SupabaseDal:
             f"Initializing Robusta platform connection for account {self.account_id}"
         )
         options = ClientOptions(postgrest_client_timeout=SUPABASE_TIMEOUT_SECONDS)
+        sentry_sdk.set_tag("db_url", self.url)
         self.client = create_client(self.url, self.api_key, options)  # type: ignore
         self.user_id = self.sign_in()
         ttl = int(os.environ.get("SAAS_SESSION_TOKEN_TTL_SEC", "82800"))  # 23 hours

holmes/core/tool_calling_llm.py CHANGED Viewed

@@ -2,13 +2,7 @@ import concurrent.futures
 import json
 import logging
 import textwrap
-from typing import Dict, List, Optional, Type, Union, Callable, Any
-from holmes.core.models import (
-    ToolApprovalDecision,
-    ToolCallResult,
-    PendingToolApproval,
-)
+from typing import Any, Callable, Dict, List, Optional, Type, Union
 import sentry_sdk
 from openai import BadRequestError
@@ -19,11 +13,10 @@ from pydantic import BaseModel, Field
 from rich.console import Console
 from holmes.common.env_vars import (
+    LOG_LLM_USAGE_RESPONSE,
     RESET_REPEATED_TOOL_CALL_CHECK_AFTER_COMPACTION,
     TEMPERATURE,
-    LOG_LLM_USAGE_RESPONSE,
 )
 from holmes.core.investigation_structured_output import (
     DEFAULT_SECTIONS,
     REQUEST_STRUCTURED_OUTPUT_FROM_LLM,
@@ -33,7 +26,12 @@ from holmes.core.investigation_structured_output import (
 )
 from holmes.core.issue import Issue
 from holmes.core.llm import LLM
-from holmes.core.resource_instruction import ResourceInstructions
+from holmes.core.models import (
+    PendingToolApproval,
+    ToolApprovalDecision,
+    ToolCallResult,
+)
+from holmes.core.prompt import generate_user_prompt
 from holmes.core.runbooks import RunbookManager
 from holmes.core.safeguards import prevent_overly_repeated_tool_call
 from holmes.core.tools import (
@@ -44,26 +42,26 @@ from holmes.core.tools import (
 from holmes.core.tools_utils.tool_context_window_limiter import (
     prevent_overly_big_tool_response,
 )
+from holmes.core.tools_utils.tool_executor import ToolExecutor
+from holmes.core.tracing import DummySpan
 from holmes.core.truncation.input_context_window_limiter import (
     limit_input_context_window,
 )
 from holmes.plugins.prompts import load_and_render_prompt
 from holmes.plugins.runbooks import RunbookCatalog
 from holmes.utils import sentry_helper
+from holmes.utils.colors import AI_COLOR
 from holmes.utils.global_instructions import (
     Instructions,
-    add_runbooks_to_user_prompt,
+    generate_runbooks_args,
 )
-from holmes.utils.tags import format_tags_in_string, parse_messages_tags
-from holmes.core.tools_utils.tool_executor import ToolExecutor
-from holmes.core.tracing import DummySpan
-from holmes.utils.colors import AI_COLOR
 from holmes.utils.stream import (
     StreamEvents,
     StreamMessage,
     add_token_count_to_metadata,
     build_stream_event_token_count,
 )
+from holmes.utils.tags import parse_messages_tags
 # Create a named logger for cost tracking
 cost_logger = logging.getLogger("holmes.costs")
@@ -142,6 +140,7 @@ def _process_cost_info(
 class LLMResult(LLMCosts):
     tool_calls: Optional[List[ToolCallResult]] = None
+    num_llm_calls: Optional[int] = None  # Number of LLM API calls (turns)
     result: Optional[str] = None
     unprocessed_result: Optional[str] = None
     instructions: List[str] = Field(default_factory=list)
@@ -269,7 +268,6 @@ class ToolCallingLLM:
         self,
         system_prompt: str,
         user_prompt: str,
-        post_process_prompt: Optional[str] = None,
         response_format: Optional[Union[dict, Type[BaseModel]]] = None,
         sections: Optional[InputSectionsDataType] = None,
         trace_span=DummySpan(),
@@ -280,8 +278,7 @@ class ToolCallingLLM:
         ]
         return self.call(
             messages,
-            post_process_prompt,
-            response_format,
+            response_format=response_format,
             user_prompt=user_prompt,
             sections=sections,
             trace_span=trace_span,
@@ -290,19 +287,17 @@ class ToolCallingLLM:
     def messages_call(
         self,
         messages: List[Dict[str, str]],
-        post_process_prompt: Optional[str] = None,
         response_format: Optional[Union[dict, Type[BaseModel]]] = None,
         trace_span=DummySpan(),
     ) -> LLMResult:
         return self.call(
-            messages, post_process_prompt, response_format, trace_span=trace_span
+            messages, response_format=response_format, trace_span=trace_span
         )
     @sentry_sdk.trace
     def call(  # type: ignore
         self,
         messages: List[Dict[str, str]],
-        post_process_prompt: Optional[str] = None,
         response_format: Optional[Union[dict, Type[BaseModel]]] = None,
         user_prompt: Optional[str] = None,
         sections: Optional[InputSectionsDataType] = None,
@@ -403,43 +398,20 @@ class ToolCallingLLM:
                 )
             if not tools_to_call:
-                # For chatty models post process and summarize the result
-                # this only works for calls where user prompt is explicitly passed through
-                if post_process_prompt and user_prompt:
-                    logging.info("Running post processing on investigation.")
-                    raw_response = text_response
-                    post_processed_response, post_processing_cost = (
-                        self._post_processing_call(
-                            prompt=user_prompt,
-                            investigation=raw_response,
-                            user_prompt=post_process_prompt,
-                        )
-                    )
-                    costs.total_cost += post_processing_cost
-                    tokens = self.llm.count_tokens(messages=messages, tools=tools)
-                    add_token_count_to_metadata(
-                        tokens=tokens,
-                        full_llm_response=full_response,
-                        max_context_size=limit_result.max_context_size,
-                        maximum_output_token=limit_result.maximum_output_token,
-                        metadata=metadata,
-                    )
+                tokens = self.llm.count_tokens(messages=messages, tools=tools)
-                    return LLMResult(
-                        result=post_processed_response,
-                        unprocessed_result=raw_response,
-                        tool_calls=all_tool_calls,
-                        prompt=json.dumps(messages, indent=2),
-                        messages=messages,
-                        **costs.model_dump(),  # Include all cost fields
-                        metadata=metadata,
-                    )
+                add_token_count_to_metadata(
+                    tokens=tokens,
+                    full_llm_response=full_response,
+                    max_context_size=limit_result.max_context_size,
+                    maximum_output_token=limit_result.maximum_output_token,
+                    metadata=metadata,
+                )
                 return LLMResult(
                     result=text_response,
                     tool_calls=all_tool_calls,
+                    num_llm_calls=i,
                     prompt=json.dumps(messages, indent=2),
                     messages=messages,
                     **costs.model_dump(),  # Include all cost fields
@@ -484,14 +456,11 @@ class ToolCallingLLM:
                         tool_call_result.result.status
                         == StructuredToolResultStatus.APPROVAL_REQUIRED
                     ):
-                        with trace_span.start_span(type="tool") as tool_span:
-                            tool_call_result = self._handle_tool_call_approval(
-                                tool_call_result=tool_call_result,
-                                tool_number=tool_number,
-                            )
-                            ToolCallingLLM._log_tool_call_result(
-                                tool_span, tool_call_result
-                            )
+                        tool_call_result = self._handle_tool_call_approval(
+                            tool_call_result=tool_call_result,
+                            tool_number=tool_number,
+                            trace_span=trace_span,
+                        )
                     tool_result_response_dict = (
                         tool_call_result.as_tool_result_response()
@@ -515,6 +484,7 @@ class ToolCallingLLM:
         tool_name: str,
         tool_params: dict,
         user_approved: bool,
+        tool_call_id: str,
         tool_number: Optional[int] = None,
     ) -> StructuredToolResult:
         tool = self.tool_executor.get_tool_by_name(tool_name)
@@ -534,6 +504,8 @@ class ToolCallingLLM:
                 user_approved=user_approved,
                 llm=self.llm,
                 max_token_count=self.llm.get_max_token_count_for_single_tool(),
+                tool_name=tool_name,
+                tool_call_id=tool_call_id,
             )
             tool_response = tool.invoke(tool_params, context=invoke_context)
         except Exception as e:
@@ -578,6 +550,7 @@ class ToolCallingLLM:
                 tool_params=tool_params,
                 user_approved=user_approved,
                 tool_number=tool_number,
+                tool_call_id=tool_call_id,
             )
         if not isinstance(tool_response, StructuredToolResult):
@@ -603,15 +576,39 @@ class ToolCallingLLM:
         )
     @staticmethod
-    def _log_tool_call_result(tool_span, tool_call_result: ToolCallResult):
+    def _log_tool_call_result(
+        tool_span,
+        tool_call_result: ToolCallResult,
+        approval_possible=True,
+        original_token_count=None,
+    ):
         tool_span.set_attributes(name=tool_call_result.tool_name)
+        status = tool_call_result.result.status
+        if (
+            status == StructuredToolResultStatus.APPROVAL_REQUIRED
+            and not approval_possible
+        ):
+            status = StructuredToolResultStatus.ERROR
+        if status == StructuredToolResultStatus.ERROR:
+            error = (
+                tool_call_result.result.error
+                if tool_call_result.result.error
+                else "Unspecified error"
+            )
+        else:
+            error = None
         tool_span.log(
             input=tool_call_result.result.params,
             output=tool_call_result.result.data,
-            error=tool_call_result.result.error,
+            error=error,
             metadata={
-                "status": tool_call_result.result.status,
+                "status": status,
                 "description": tool_call_result.description,
+                "return_code": tool_call_result.result.return_code,
+                "error": tool_call_result.result.error,
+                "original_token_count": original_token_count,
             },
         )
@@ -657,17 +654,23 @@ class ToolCallingLLM:
                     user_approved=user_approved,
                 )
-            prevent_overly_big_tool_response(
+            original_token_count = prevent_overly_big_tool_response(
                 tool_call_result=tool_call_result, llm=self.llm
             )
-            ToolCallingLLM._log_tool_call_result(tool_span, tool_call_result)
+            ToolCallingLLM._log_tool_call_result(
+                tool_span,
+                tool_call_result,
+                self.approval_callback is not None,
+                original_token_count,
+            )
             return tool_call_result
     def _handle_tool_call_approval(
         self,
         tool_call_result: ToolCallResult,
         tool_number: Optional[int],
+        trace_span: Any,
     ) -> ToolCallResult:
         """
         Handle approval for a single tool call if required.
@@ -686,76 +689,35 @@ class ToolCallingLLM:
             return tool_call_result
         # Get approval from user
-        approved, feedback = self.approval_callback(tool_call_result.result)
-        if approved:
-            logging.debug(
-                f"User approved command: {tool_call_result.result.invocation}"
-            )
-            new_response = self._directly_invoke_tool_call(
-                tool_name=tool_call_result.tool_name,
-                tool_params=tool_call_result.result.params or {},
-                user_approved=True,
-                tool_number=tool_number,
-            )
-            tool_call_result.result = new_response
-        else:
-            # User denied - update to error
-            feedback_text = f" User feedback: {feedback}" if feedback else ""
-            tool_call_result.result.status = StructuredToolResultStatus.ERROR
-            tool_call_result.result.error = (
-                f"User denied command execution.{feedback_text}"
-            )
-        return tool_call_result
-    @staticmethod
-    def __load_post_processing_user_prompt(
-        input_prompt, investigation, user_prompt: Optional[str] = None
-    ) -> str:
-        if not user_prompt:
-            user_prompt = "builtin://generic_post_processing.jinja2"
-        return load_and_render_prompt(
-            user_prompt, {"investigation": investigation, "prompt": input_prompt}
-        )
+        with trace_span.start_span(
+            type="task", name=f"Ask approval for {tool_call_result.tool_name}"
+        ):
+            approved, feedback = self.approval_callback(tool_call_result.result)
-    def _post_processing_call(
-        self,
-        prompt,
-        investigation,
-        user_prompt: Optional[str] = None,
-        system_prompt: str = "You are an AI assistant summarizing Kubernetes issues.",
-    ) -> tuple[Optional[str], float]:
-        try:
-            user_prompt = ToolCallingLLM.__load_post_processing_user_prompt(
-                prompt, investigation, user_prompt
-            )
-            logging.debug(f'Post processing prompt:\n"""\n{user_prompt}\n"""')
-            messages = [
-                {
-                    "role": "system",
-                    "content": system_prompt,
-                },
-                {
-                    "role": "user",
-                    "content": format_tags_in_string(user_prompt),
-                },
-            ]
-            full_response = self.llm.completion(messages=messages, temperature=0)
-            logging.debug(f"Post processing response {full_response}")
-            # Extract and log cost information for post-processing
-            post_processing_cost = _extract_cost_from_response(full_response)
-            if post_processing_cost > 0:
-                cost_logger.debug(
-                    f"Post-processing LLM cost: ${post_processing_cost:.6f}"
+        # Note - Tool calls are currently logged twice, once when returning APPROVAL_REQUIRED and once here
+        with trace_span.start_span(type="tool") as tool_span:
+            if approved:
+                logging.debug(
+                    f"User approved command: {tool_call_result.result.invocation}"
                 )
+                new_response = self._directly_invoke_tool_call(
+                    tool_name=tool_call_result.tool_name,
+                    tool_params=tool_call_result.result.params or {},
+                    user_approved=True,
+                    tool_number=tool_number,
+                    tool_call_id=tool_call_result.tool_call_id,
+                )
+                tool_call_result.result = new_response
+            else:
+                # User denied - update to error
+                feedback_text = f" User feedback: {feedback}" if feedback else ""
+                tool_call_result.result.status = StructuredToolResultStatus.ERROR
+                tool_call_result.result.error = (
+                    f"User denied command execution.{feedback_text}"
+                )
+            ToolCallingLLM._log_tool_call_result(tool_span, tool_call_result)
-            return full_response.choices[0].message.content, post_processing_cost  # type: ignore
-        except Exception:
-            logging.exception("Failed to run post processing", exc_info=True)
-            return investigation, 0.0
+        return tool_call_result
     def call_stream(
         self,
@@ -1038,10 +1000,8 @@ class IssueInvestigator(ToolCallingLLM):
         self,
         issue: Issue,
         prompt: str,
-        instructions: Optional[ResourceInstructions],
         console: Optional[Console] = None,
         global_instructions: Optional[Instructions] = None,
-        post_processing_prompt: Optional[str] = None,
         sections: Optional[InputSectionsDataType] = None,
         trace_span=DummySpan(),
         runbooks: Optional[RunbookCatalog] = None,
@@ -1095,16 +1055,18 @@ class IssueInvestigator(ToolCallingLLM):
             },
         )
-        user_prompt = ""
+        base_user = ""
+        base_user = f"{base_user}\n #This is context from the issue:\n{issue.raw}"
-        user_prompt = add_runbooks_to_user_prompt(
-            user_prompt,
+        runbooks_ctx = generate_runbooks_args(
             runbook_catalog=runbooks,
             global_instructions=global_instructions,
             issue_instructions=issue_runbooks,
-            resource_instructions=instructions,
         )
-        user_prompt = f"{user_prompt}\n #This is context from the issue:\n{issue.raw}"
+        user_prompt = generate_user_prompt(
+            base_user,
+            runbooks_ctx,
+        )
         logging.debug(
             "Rendered system prompt:\n%s", textwrap.indent(system_prompt, "    ")
         )
@@ -1113,7 +1075,6 @@ class IssueInvestigator(ToolCallingLLM):
         res = self.prompt_call(
             system_prompt,
             user_prompt,
-            post_processing_prompt,
             response_format=response_format,
             sections=sections,
             trace_span=trace_span,

holmes/core/tools.py CHANGED Viewed

@@ -5,6 +5,7 @@ import re
 import shlex
 import subprocess
 import tempfile
+import time
 from abc import ABC, abstractmethod
 from datetime import datetime
 from enum import Enum
@@ -26,25 +27,25 @@ from pydantic import (
     ConfigDict,
     Field,
     FilePath,
-    model_validator,
     PrivateAttr,
+    model_validator,
 )
 from rich.console import Console
+from rich.table import Table
 from holmes.core.llm import LLM
 from holmes.core.openai_formatting import format_tool_to_open_ai_standard
-from holmes.plugins.prompts import load_and_render_prompt
 from holmes.core.transformers import (
-    registry,
-    TransformerError,
     Transformer,
+    TransformerError,
+    registry,
 )
+from holmes.plugins.prompts import load_and_render_prompt
+from holmes.utils.config_utils import merge_transformers
+from holmes.utils.memory_limit import check_oom_and_append_hint, get_ulimit_prefix
 if TYPE_CHECKING:
     from holmes.core.transformers import BaseTransformer
-from holmes.utils.config_utils import merge_transformers
-import time
-from rich.table import Table
 logger = logging.getLogger(__name__)
@@ -96,9 +97,11 @@ class StructuredToolResult(BaseModel):
         else:
             try:
                 if isinstance(self.data, BaseModel):
-                    return self.data.model_dump_json(indent=2)
+                    return self.data.model_dump_json()
                 else:
-                    return json.dumps(self.data, indent=2)
+                    return json.dumps(
+                        self.data, separators=(",", ":"), ensure_ascii=False
+                    )
             except Exception:
                 return str(self.data)
@@ -117,23 +120,6 @@ def sanitize_params(params):
     return {k: sanitize(str(v)) for k, v in params.items()}
-def format_tool_output(tool_result: Union[str, StructuredToolResult]) -> str:
-    if isinstance(tool_result, StructuredToolResult):
-        if tool_result.data and isinstance(tool_result.data, str):
-            # Display logs and other string outputs in a way that is readable to humans.
-            # To do this, we extract them from the result and print them as-is below.
-            # The metadata is printed on a single line to
-            data = tool_result.data
-            tool_result.data = "The raw tool data is printed below this JSON"
-            result_str = tool_result.model_dump_json(indent=2, exclude_none=True)
-            result_str += f"\n{data}"
-            return result_str
-        else:
-            return tool_result.model_dump_json(indent=2)
-    else:
-        return tool_result
 class ToolsetStatusEnum(str, Enum):
     ENABLED = "enabled"
     DISABLED = "disabled"
@@ -168,6 +154,8 @@ class ToolInvokeContext(BaseModel):
     user_approved: bool = False
     llm: LLM
     max_token_count: int
+    tool_call_id: str
+    tool_name: str
 class Tool(ABC, BaseModel):
@@ -493,8 +481,9 @@ class YAMLTool(Tool, BaseModel):
     def __execute_subprocess(self, cmd) -> Tuple[str, int]:
         try:
             logger.debug(f"Running `{cmd}`")
+            protected_cmd = get_ulimit_prefix() + cmd
             result = subprocess.run(
-                cmd,
+                protected_cmd,
                 shell=True,
                 text=True,
                 check=False,  # do not throw error, we just return the error code
@@ -503,7 +492,9 @@ class YAMLTool(Tool, BaseModel):
                 stderr=subprocess.STDOUT,
             )
-            return result.stdout.strip(), result.returncode
+            output = result.stdout.strip()
+            output = check_oom_and_append_hint(output, result.returncode)
+            return output, result.returncode
         except Exception as e:
             logger.error(
                 f"An unexpected error occurred while running '{cmd}': {e}",

holmes/core/tools_utils/token_counting.py CHANGED Viewed

@@ -4,11 +4,18 @@ from holmes.core.tools import StructuredToolResult
 def count_tool_response_tokens(
-    llm: LLM, structured_tool_result: StructuredToolResult
+    llm: LLM,
+    structured_tool_result: StructuredToolResult,
+    tool_call_id: str,
+    tool_name: str,
 ) -> int:
     message = {
         "role": "tool",
-        "content": format_tool_result_data(structured_tool_result),
+        "content": format_tool_result_data(
+            tool_result=structured_tool_result,
+            tool_call_id=tool_call_id,
+            tool_name=tool_name,
+        ),
     }
     tokens = llm.count_tokens([message])
     return tokens.total_tokens

holmes/core/tools_utils/tool_context_window_limiter.py CHANGED Viewed

@@ -1,8 +1,8 @@
-from typing import Optional
 from pydantic import BaseModel
 from holmes.core.llm import LLM
-from holmes.core.tools import StructuredToolResultStatus
 from holmes.core.models import ToolCallResult
+from holmes.core.tools import StructuredToolResultStatus
 from holmes.utils import sentry_helper
@@ -20,38 +20,21 @@ def get_pct_token_count(percent_of_total_context_window: float, llm: LLM) -> int
         return context_window_size
-def is_tool_call_too_big(
-    tool_call_result: ToolCallResult, llm: LLM
-) -> tuple[bool, Optional[ToolCallSizeMetadata]]:
-    if tool_call_result.result.status == StructuredToolResultStatus.SUCCESS:
-        message = tool_call_result.as_tool_call_message()
-        tokens = llm.count_tokens(messages=[message])
-        max_tokens_allowed = llm.get_max_token_count_for_single_tool()
-        return (
-            tokens.total_tokens > max_tokens_allowed,
-            ToolCallSizeMetadata(
-                messages_token=tokens.total_tokens,
-                max_tokens_allowed=max_tokens_allowed,
-            ),
-        )
-    return False, None
 def prevent_overly_big_tool_response(tool_call_result: ToolCallResult, llm: LLM):
-    tool_call_result_is_too_big, metadata = is_tool_call_too_big(
-        tool_call_result=tool_call_result, llm=llm
-    )
-    if tool_call_result_is_too_big and metadata:
-        relative_pct = (
-            (metadata.messages_token - metadata.max_tokens_allowed)
-            / metadata.messages_token
-        ) * 100
-        error_message = f"The tool call result is too large to return: {metadata.messages_token} tokens.\nThe maximum allowed tokens is {metadata.max_tokens_allowed} which is {format(relative_pct, '.1f')}% smaller.\nInstructions for the LLM: try to repeat the query but proactively narrow down the result so that the tool answer fits within the allowed number of tokens."
+    message = tool_call_result.as_tool_call_message()
+    messages_token = llm.count_tokens(messages=[message]).total_tokens
+    max_tokens_allowed = llm.get_max_token_count_for_single_tool()
+    if (
+        tool_call_result.result.status == StructuredToolResultStatus.SUCCESS
+        and messages_token > max_tokens_allowed
+    ):
+        relative_pct = ((messages_token - max_tokens_allowed) / messages_token) * 100
+        error_message = f"The tool call result is too large to return: {messages_token} tokens.\nThe maximum allowed tokens is {max_tokens_allowed} which is {format(relative_pct, '.1f')}% smaller.\nInstructions for the LLM: try to repeat the query but proactively narrow down the result so that the tool answer fits within the allowed number of tokens."
         tool_call_result.result.status = StructuredToolResultStatus.ERROR
         tool_call_result.result.data = None
         tool_call_result.result.error = error_message
         sentry_helper.capture_toolcall_contains_too_many_tokens(
-            tool_call_result, metadata.messages_token, metadata.max_tokens_allowed
+            tool_call_result, messages_token, max_tokens_allowed
         )
+    return messages_token

holmesgpt 0.16.2a0__py3-none-any.whl → 0.18.4__py3-none-any.whl

holmesgpt 0.16.2a0py3-none-any.whl → 0.18.4py3-none-any.whl