PyPI - holmesgpt - Versions diffs - 0.12.4__py3-none-any.whl → 0.13.0__py3-none-any.whl - Mend

holmesgpt 0.12.4py3-none-any.whl → 0.13.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of holmesgpt might be problematic. Click here for more details.

Files changed (86) hide show

holmes/__init__.py +1 -1
holmes/clients/robusta_client.py +19 -1
holmes/common/env_vars.py +13 -0
holmes/config.py +69 -9
holmes/core/conversations.py +11 -0
holmes/core/investigation.py +16 -3
holmes/core/investigation_structured_output.py +12 -0
holmes/core/llm.py +10 -0
holmes/core/models.py +9 -1
holmes/core/openai_formatting.py +72 -12
holmes/core/prompt.py +13 -0
holmes/core/supabase_dal.py +3 -0
holmes/core/todo_manager.py +88 -0
holmes/core/tool_calling_llm.py +121 -149
holmes/core/tools.py +10 -1
holmes/core/tools_utils/tool_executor.py +7 -2
holmes/core/tools_utils/toolset_utils.py +7 -2
holmes/core/tracing.py +8 -7
holmes/interactive.py +1 -0
holmes/main.py +2 -1
holmes/plugins/prompts/__init__.py +7 -1
holmes/plugins/prompts/_ai_safety.jinja2 +43 -0
holmes/plugins/prompts/_current_date_time.jinja2 +1 -0
holmes/plugins/prompts/_default_log_prompt.jinja2 +4 -2
holmes/plugins/prompts/_fetch_logs.jinja2 +6 -1
holmes/plugins/prompts/_general_instructions.jinja2 +16 -0
holmes/plugins/prompts/_permission_errors.jinja2 +1 -1
holmes/plugins/prompts/_toolsets_instructions.jinja2 +4 -4
holmes/plugins/prompts/generic_ask.jinja2 +4 -3
holmes/plugins/prompts/investigation_procedure.jinja2 +210 -0
holmes/plugins/prompts/kubernetes_workload_ask.jinja2 +4 -0
holmes/plugins/toolsets/__init__.py +19 -6
holmes/plugins/toolsets/atlas_mongodb/mongodb_atlas.py +27 -0
holmes/plugins/toolsets/azure_sql/tools/analyze_connection_failures.py +2 -2
holmes/plugins/toolsets/azure_sql/tools/analyze_database_connections.py +2 -1
holmes/plugins/toolsets/azure_sql/tools/analyze_database_health_status.py +3 -1
holmes/plugins/toolsets/azure_sql/tools/analyze_database_performance.py +2 -1
holmes/plugins/toolsets/azure_sql/tools/analyze_database_storage.py +2 -1
holmes/plugins/toolsets/azure_sql/tools/get_active_alerts.py +3 -1
holmes/plugins/toolsets/azure_sql/tools/get_slow_queries.py +2 -1
holmes/plugins/toolsets/azure_sql/tools/get_top_cpu_queries.py +2 -1
holmes/plugins/toolsets/azure_sql/tools/get_top_data_io_queries.py +2 -1
holmes/plugins/toolsets/azure_sql/tools/get_top_log_io_queries.py +2 -1
holmes/plugins/toolsets/coralogix/api.py +6 -6
holmes/plugins/toolsets/coralogix/toolset_coralogix_logs.py +7 -1
holmes/plugins/toolsets/datadog/datadog_api.py +20 -8
holmes/plugins/toolsets/datadog/datadog_metrics_instructions.jinja2 +8 -1
holmes/plugins/toolsets/datadog/datadog_rds_instructions.jinja2 +82 -0
holmes/plugins/toolsets/datadog/toolset_datadog_logs.py +12 -5
holmes/plugins/toolsets/datadog/toolset_datadog_metrics.py +20 -11
holmes/plugins/toolsets/datadog/toolset_datadog_rds.py +735 -0
holmes/plugins/toolsets/datadog/toolset_datadog_traces.py +18 -11
holmes/plugins/toolsets/git.py +15 -15
holmes/plugins/toolsets/grafana/grafana_api.py +12 -1
holmes/plugins/toolsets/grafana/toolset_grafana.py +5 -1
holmes/plugins/toolsets/grafana/toolset_grafana_loki.py +9 -4
holmes/plugins/toolsets/grafana/toolset_grafana_tempo.py +12 -5
holmes/plugins/toolsets/internet/internet.py +2 -1
holmes/plugins/toolsets/internet/notion.py +2 -1
holmes/plugins/toolsets/investigator/__init__.py +0 -0
holmes/plugins/toolsets/investigator/core_investigation.py +157 -0
holmes/plugins/toolsets/investigator/investigator_instructions.jinja2 +253 -0
holmes/plugins/toolsets/investigator/model.py +15 -0
holmes/plugins/toolsets/kafka.py +14 -7
holmes/plugins/toolsets/kubernetes.yaml +7 -7
holmes/plugins/toolsets/kubernetes_logs.py +454 -25
holmes/plugins/toolsets/logging_utils/logging_api.py +115 -55
holmes/plugins/toolsets/mcp/toolset_mcp.py +1 -1
holmes/plugins/toolsets/newrelic.py +8 -3
holmes/plugins/toolsets/opensearch/opensearch.py +8 -4
holmes/plugins/toolsets/opensearch/opensearch_logs.py +9 -2
holmes/plugins/toolsets/opensearch/opensearch_traces.py +6 -2
holmes/plugins/toolsets/prometheus/prometheus.py +149 -44
holmes/plugins/toolsets/rabbitmq/toolset_rabbitmq.py +8 -2
holmes/plugins/toolsets/robusta/robusta.py +4 -4
holmes/plugins/toolsets/runbook/runbook_fetcher.py +6 -5
holmes/plugins/toolsets/servicenow/servicenow.py +18 -3
holmes/plugins/toolsets/utils.py +8 -1
holmes/utils/llms.py +20 -0
holmes/utils/stream.py +90 -0
{holmesgpt-0.12.4.dist-info → holmesgpt-0.13.0.dist-info}/METADATA +48 -35
{holmesgpt-0.12.4.dist-info → holmesgpt-0.13.0.dist-info}/RECORD +85 -75
holmes/utils/robusta.py +0 -9
{holmesgpt-0.12.4.dist-info → holmesgpt-0.13.0.dist-info}/LICENSE.txt +0 -0
{holmesgpt-0.12.4.dist-info → holmesgpt-0.13.0.dist-info}/WHEEL +0 -0
{holmesgpt-0.12.4.dist-info → holmesgpt-0.13.0.dist-info}/entry_points.txt +0 -0

holmes/plugins/toolsets/prometheus/prometheus.py CHANGED Viewed

@@ -3,12 +3,13 @@ import logging
 import os
 import re
 import time
-from typing import Any, Dict, List, Optional, Tuple, Union
+from typing import Any, Dict, List, Optional, Tuple, Type, Union
 from urllib.parse import urljoin
 import requests  # type: ignore
 from pydantic import BaseModel, field_validator, Field, model_validator
 from requests import RequestException
+from requests_aws4auth import AWS4Auth
 from holmes.core.tools import (
     CallablePrerequisite,
@@ -25,14 +26,17 @@ from holmes.plugins.toolsets.utils import (
     get_param_or_raise,
     process_timestamps_to_rfc3339,
     standard_start_datetime_tool_param_description,
+    toolset_name_for_one_liner,
 )
 from holmes.utils.cache import TTLCache
 from holmes.common.env_vars import IS_OPENSHIFT
 from holmes.common.openshift import load_openshift_token
+from holmes.plugins.toolsets.logging_utils.logging_api import (
+    DEFAULT_TIME_SPAN_SECONDS,
+)
 from holmes.utils.keygen_utils import generate_random_key
 PROMETHEUS_RULES_CACHE_KEY = "cached_prometheus_rules"
-DEFAULT_TIME_SPAN_SECONDS = 3600
 class PrometheusConfig(BaseModel):
@@ -49,6 +53,7 @@ class PrometheusConfig(BaseModel):
     headers: Dict = Field(default_factory=dict)
     rules_cache_duration_seconds: Union[int, None] = 1800  # 30 minutes
     additional_labels: Optional[Dict[str, str]] = None
+    prometheus_ssl_enabled: bool = True
     @field_validator("prometheus_url")
     def ensure_trailing_slash(cls, v: Optional[str]) -> Optional[str]:
@@ -73,6 +78,32 @@ class PrometheusConfig(BaseModel):
         return self
+    def is_amp(self) -> bool:
+        return False
+    def get_auth(self) -> Any:
+        return None
+class AMPConfig(PrometheusConfig):
+    aws_access_key: str
+    aws_secret_access_key: str
+    aws_region: str
+    aws_service_name: str = "aps"
+    healthcheck: str = "api/v1/query?query=up"  # Override for AMP
+    prometheus_ssl_enabled: bool = False
+    def is_amp(self) -> bool:
+        return True
+    def get_auth(self):
+        return AWS4Auth(
+            self.aws_access_key,  # type: ignore
+            self.aws_secret_access_key,  # type: ignore
+            self.aws_region,  # type: ignore
+            self.aws_service_name,  # type: ignore
+        )
 class BasePrometheusTool(Tool):
     toolset: "PrometheusToolset"
@@ -99,10 +130,15 @@ def filter_metrics_by_name(metrics: Dict, pattern: str) -> Dict:
 METRICS_SUFFIXES_TO_STRIP = ["_bucket", "_count", "_sum"]
-def fetch_metadata(prometheus_url: str, headers: Optional[Dict]) -> Dict:
+def fetch_metadata(
+    prometheus_url: str,
+    headers: Optional[Dict],
+    auth=None,
+    verify_ssl: bool = True,
+) -> Dict:
     metadata_url = urljoin(prometheus_url, "api/v1/metadata")
     metadata_response = requests.get(
-        metadata_url, headers=headers, timeout=60, verify=True
+        metadata_url, headers=headers, timeout=60, verify=verify_ssl, auth=auth
     )
     metadata_response.raise_for_status()
@@ -124,13 +160,17 @@ def fetch_metadata(prometheus_url: str, headers: Optional[Dict]) -> Dict:
 def fetch_metadata_with_series_api(
-    prometheus_url: str, metric_name: str, headers: Dict
+    prometheus_url: str,
+    metric_name: str,
+    headers: Dict,
+    auth=None,
+    verify_ssl: bool = True,
 ) -> Dict:
     url = urljoin(prometheus_url, "api/v1/series")
     params: Dict = {"match[]": f'{{__name__=~".*{metric_name}.*"}}', "limit": "10000"}
     response = requests.get(
-        url, headers=headers, timeout=60, params=params, verify=True
+        url, headers=headers, timeout=60, params=params, auth=auth, verify=verify_ssl
     )
     response.raise_for_status()
     metrics = response.json()["data"]
@@ -172,6 +212,8 @@ def fetch_metrics_labels_with_series_api(
     cache: Optional[TTLCache],
     metrics_labels_time_window_hrs: Union[int, None],
     metric_name: str,
+    auth=None,
+    verify_ssl: bool = True,
 ) -> dict:
     """This is a slow query. Takes 5+ seconds to run"""
     cache_key = f"metrics_labels_series_api:{metric_name}"
@@ -188,7 +230,12 @@ def fetch_metrics_labels_with_series_api(
         params["start"] = params["end"] - (metrics_labels_time_window_hrs * 60 * 60)
     series_response = requests.get(
-        url=series_url, headers=headers, params=params, timeout=60, verify=True
+        url=series_url,
+        headers=headers,
+        params=params,
+        auth=auth,
+        timeout=60,
+        verify=verify_ssl,
     )
     series_response.raise_for_status()
     series = series_response.json()["data"]
@@ -214,6 +261,8 @@ def fetch_metrics_labels_with_labels_api(
     metrics_labels_time_window_hrs: Union[int, None],
     metric_names: List[str],
     headers: Dict,
+    auth=None,
+    verify_ssl: bool = True,
 ) -> dict:
     metrics_labels = {}
@@ -233,7 +282,12 @@ def fetch_metrics_labels_with_labels_api(
             params["start"] = params["end"] - (metrics_labels_time_window_hrs * 60 * 60)
         response = requests.get(
-            url=url, headers=headers, params=params, timeout=60, verify=True
+            url=url,
+            headers=headers,
+            params=params,
+            auth=auth,
+            timeout=60,
+            verify=verify_ssl,
         )
         response.raise_for_status()
         labels = response.json()["data"]
@@ -254,16 +308,27 @@ def fetch_metrics(
     should_fetch_labels_with_labels_api: bool,
     should_fetch_metadata_with_series_api: bool,
     headers: Dict,
+    auth=None,
+    verify_ssl: bool = True,
 ) -> dict:
     metrics = None
     should_fetch_labels = True
     if should_fetch_metadata_with_series_api:
         metrics = fetch_metadata_with_series_api(
-            prometheus_url=prometheus_url, metric_name=metric_name, headers=headers
+            prometheus_url=prometheus_url,
+            metric_name=metric_name,
+            headers=headers,
+            auth=auth,
+            verify_ssl=verify_ssl,
         )
         should_fetch_labels = False  # series API returns the labels
     else:
-        metrics = fetch_metadata(prometheus_url=prometheus_url, headers=headers)
+        metrics = fetch_metadata(
+            prometheus_url=prometheus_url,
+            headers=headers,
+            auth=auth,
+            verify_ssl=verify_ssl,
+        )
         metrics = filter_metrics_by_name(metrics, metric_name)
     if should_fetch_labels:
@@ -275,6 +340,8 @@ def fetch_metrics(
                 metrics_labels_time_window_hrs=metrics_labels_time_window_hrs,
                 metric_names=list(metrics.keys()),
                 headers=headers,
+                auth=auth,
+                verify_ssl=verify_ssl,
             )
         else:
             metrics_labels = fetch_metrics_labels_with_series_api(
@@ -283,6 +350,8 @@ def fetch_metrics(
                 metrics_labels_time_window_hrs=metrics_labels_time_window_hrs,
                 metric_name=metric_name,
                 headers=headers,
+                auth=auth,
+                verify_ssl=verify_ssl,
             )
         for metric_name in metrics:
@@ -309,6 +378,12 @@ class ListPrometheusRules(BasePrometheusTool):
                 error="Prometheus is not configured. Prometheus URL is missing",
                 params=params,
             )
+        if self.toolset.config.is_amp():
+            return StructuredToolResult(
+                status=ToolResultStatus.ERROR,
+                error="Tool not supported in AMP",
+                params=params,
+            )
         if not self._cache and self.toolset.config.rules_cache_duration_seconds:
             self._cache = TTLCache(self.toolset.config.rules_cache_duration_seconds)  # type: ignore
         try:
@@ -330,8 +405,9 @@ class ListPrometheusRules(BasePrometheusTool):
             rules_response = requests.get(
                 url=rules_url,
                 params=params,
+                auth=self.toolset.config.get_auth(),
                 timeout=180,
-                verify=True,
+                verify=self.toolset.config.prometheus_ssl_enabled,
                 headers=self.toolset.config.headers,
             )
             rules_response.raise_for_status()
@@ -367,7 +443,7 @@ class ListPrometheusRules(BasePrometheusTool):
             )
     def get_parameterized_one_liner(self, params) -> str:
-        return "list available prometheus rules"
+        return f"{toolset_name_for_one_liner(self.toolset.name)}: Fetch Rules"
 class ListAvailableMetrics(BasePrometheusTool):
@@ -424,6 +500,8 @@ class ListAvailableMetrics(BasePrometheusTool):
                 should_fetch_labels_with_labels_api=self.toolset.config.fetch_labels_with_labels_api,
                 should_fetch_metadata_with_series_api=self.toolset.config.fetch_metadata_with_series_api,
                 headers=self.toolset.config.headers,
+                auth=self.toolset.config.get_auth(),
+                verify_ssl=self.toolset.config.prometheus_ssl_enabled,
             )
             if params.get("type_filter"):
@@ -470,7 +548,8 @@ class ListAvailableMetrics(BasePrometheusTool):
             )
     def get_parameterized_one_liner(self, params) -> str:
-        return f'Search Available Prometheus Metrics: name_filter="{params.get("name_filter", "<no filter>")}", type_filter="{params.get("type_filter", "<no filter>")}"'
+        name_filter = params.get("name_filter", "")
+        return f"{toolset_name_for_one_liner(self.toolset.name)}: Search Metrics ({name_filter})"
 class ExecuteInstantQuery(BasePrometheusTool):
@@ -509,7 +588,11 @@ class ExecuteInstantQuery(BasePrometheusTool):
             payload = {"query": query}
             response = requests.post(
-                url=url, headers=self.toolset.config.headers, data=payload, timeout=60
+                url=url,
+                headers=self.toolset.config.headers,
+                auth=self.toolset.config.get_auth(),
+                data=payload,
+                timeout=60,
             )
             if response.status_code == 200:
@@ -579,9 +662,8 @@ class ExecuteInstantQuery(BasePrometheusTool):
             )
     def get_parameterized_one_liner(self, params) -> str:
-        query = params.get("query")
-        description = params.get("description")
-        return f"Execute Prometheus Query (instant): promql='{query}', description='{description}'"
+        description = params.get("description", "")
+        return f"{toolset_name_for_one_liner(self.toolset.name)}: Query ({description})"
 class ExecuteRangeQuery(BasePrometheusTool):
@@ -654,7 +736,11 @@ class ExecuteRangeQuery(BasePrometheusTool):
             }
             response = requests.post(
-                url=url, headers=self.toolset.config.headers, data=payload, timeout=120
+                url=url,
+                headers=self.toolset.config.headers,
+                auth=self.toolset.config.get_auth(),
+                data=payload,
+                timeout=120,
             )
             if response.status_code == 200:
@@ -726,15 +812,13 @@ class ExecuteRangeQuery(BasePrometheusTool):
             )
     def get_parameterized_one_liner(self, params) -> str:
-        query = params.get("query")
-        start = params.get("start")
-        end = params.get("end")
-        step = params.get("step")
-        description = params.get("description")
-        return f"Execute Prometheus Query (range): promql='{query}', start={start}, end={end}, step={step}, description='{description}'"
+        description = params.get("description", "")
+        return f"{toolset_name_for_one_liner(self.toolset.name)}: Query ({description})"
 class PrometheusToolset(Toolset):
+    config: Optional[Union[PrometheusConfig, AMPConfig]] = None
     def __init__(self):
         super().__init__(
             name="prometheus/metrics",
@@ -760,28 +844,45 @@ class PrometheusToolset(Toolset):
         )
         self._load_llm_instructions(jinja_template=f"file://{template_file_path}")
-    def prerequisites_callable(self, config: dict[str, Any]) -> Tuple[bool, str]:
-        if config:
-            self.config = PrometheusConfig(**config)
-            self._reload_llm_instructions()
-            return self._is_healthy()
+    def determine_prometheus_class(
+        self, config: dict[str, Any]
+    ) -> Type[Union[PrometheusConfig, AMPConfig]]:
+        has_aws_credentials = (
+            "aws_access_key" in config or "aws_secret_access_key" in config
+        )
+        return AMPConfig if has_aws_credentials else PrometheusConfig
-        prometheus_url = os.environ.get("PROMETHEUS_URL")
-        if not prometheus_url:
-            prometheus_url = self.auto_detect_prometheus_url()
+    def prerequisites_callable(self, config: dict[str, Any]) -> Tuple[bool, str]:
+        try:
+            if config:
+                config_cls = self.determine_prometheus_class(config)
+                self.config = config_cls(**config)  # type: ignore
+                self._reload_llm_instructions()
+                return self._is_healthy()
+        except Exception:
+            logging.exception("Failed to create prometheus config")
+            return False, "Failed to create prometheus config"
+        try:
+            prometheus_url = os.environ.get("PROMETHEUS_URL")
             if not prometheus_url:
-                return (
-                    False,
-                    "Unable to auto-detect prometheus. Define prometheus_url in the configuration for tool prometheus/metrics",
-                )
+                prometheus_url = self.auto_detect_prometheus_url()
+                if not prometheus_url:
+                    return (
+                        False,
+                        "Unable to auto-detect prometheus. Define prometheus_url in the configuration for tool prometheus/metrics",
+                    )
-        self.config = PrometheusConfig(
-            prometheus_url=prometheus_url,
-            headers=add_prometheus_auth(os.environ.get("PROMETHEUS_AUTH_HEADER")),
-        )
-        logging.info(f"Prometheus auto discovered at url {prometheus_url}")
-        self._reload_llm_instructions()
-        return self._is_healthy()
+            self.config = PrometheusConfig(
+                prometheus_url=prometheus_url,
+                headers=add_prometheus_auth(os.environ.get("PROMETHEUS_AUTH_HEADER")),
+            )
+            logging.info(f"Prometheus auto discovered at url {prometheus_url}")
+            self._reload_llm_instructions()
+            return self._is_healthy()
+        except Exception as e:
+            logging.exception("Failed to set up prometheus")
+            return False, str(e)
     def auto_detect_prometheus_url(self) -> Optional[str]:
         url: Optional[str] = PrometheusDiscovery.find_prometheus_url()
@@ -804,7 +905,11 @@ class PrometheusToolset(Toolset):
         url = urljoin(self.config.prometheus_url, self.config.healthcheck)
         try:
             response = requests.get(
-                url=url, headers=self.config.headers, timeout=10, verify=True
+                url=url,
+                headers=self.config.headers,
+                auth=self.config.get_auth(),
+                timeout=10,
+                verify=self.config.prometheus_ssl_enabled,
             )
             if response.status_code == 200:

holmes/plugins/toolsets/rabbitmq/toolset_rabbitmq.py CHANGED Viewed

@@ -21,6 +21,7 @@ from holmes.plugins.toolsets.rabbitmq.api import (
     get_cluster_status,
     make_request,
 )
+from holmes.plugins.toolsets.utils import toolset_name_for_one_liner
 class RabbitMQConfig(BaseModel):
@@ -80,7 +81,9 @@ class ListConfiguredClusters(BaseRabbitMQTool):
         )
     def get_parameterized_one_liner(self, params) -> str:
-        return "list configured RabbitMQ clusters"
+        return (
+            f"{toolset_name_for_one_liner(self.toolset.name)}: List RabbitMQ Clusters"
+        )
 class GetRabbitMQClusterStatus(BaseRabbitMQTool):
@@ -116,7 +119,10 @@ class GetRabbitMQClusterStatus(BaseRabbitMQTool):
             )
     def get_parameterized_one_liner(self, params) -> str:
-        return "get RabbitMQ cluster status and partition information"
+        cluster_id = params.get("cluster_id", "")
+        if cluster_id:
+            return f"{toolset_name_for_one_liner(self.toolset.name)}: Get Cluster Status ({cluster_id})"
+        return f"{toolset_name_for_one_liner(self.toolset.name)}: Get Cluster Status"
 class RabbitMQToolset(Toolset):

holmes/plugins/toolsets/robusta/robusta.py CHANGED Viewed

@@ -74,7 +74,7 @@ class FetchRobustaFinding(Tool):
             )
     def get_parameterized_one_liner(self, params: Dict) -> str:
-        return "Fetch Alert Metadata"
+        return "Robusta: Fetch Alert Metadata"
 class FetchResourceRecommendation(Tool):
@@ -138,7 +138,7 @@ class FetchResourceRecommendation(Tool):
             )
     def get_parameterized_one_liner(self, params: Dict) -> str:
-        return f"Check Historical Resource Utilization: ({str(params)})"
+        return f"Robusta: Check Historical Resource Utilization: ({str(params)})"
 class FetchConfigurationChanges(Tool):
@@ -196,14 +196,14 @@ class FetchConfigurationChanges(Tool):
             )
     def get_parameterized_one_liner(self, params: Dict) -> str:
-        return f"Search Change History: ({str(params)})"
+        return "Robusta: Search Change History"
 class RobustaToolset(Toolset):
     def __init__(self, dal: Optional[SupabaseDal]):
         dal_prereq = StaticPrerequisite(
             enabled=True if dal else False,
-            disabled_reason="The data access layer is not available",
+            disabled_reason="Integration with Robusta cloud is disabled",
         )
         if dal:
             dal_prereq = StaticPrerequisite(

holmes/plugins/toolsets/runbook/runbook_fetcher.py CHANGED Viewed

@@ -12,6 +12,7 @@ from holmes.core.tools import (
 )
 from holmes.plugins.runbooks import get_runbook_by_path, DEFAULT_RUNBOOK_SEARCH_PATH
+from holmes.plugins.toolsets.utils import toolset_name_for_one_liner
 # TODO(mainred): currently we support fetch runbooks hosted internally, in the future we may want to support fetching
@@ -82,10 +83,10 @@ class RunbookFetcher(Tool):
                         4. ❌ *Could not analyze process mailbox sizes* - Observer tool not enabled in container. Enable remote shell or observer_cli for process introspection.
                         5. ✅ *Check pod memory limits* - container limit 4Gi, requests 2Gi
                         6. ✅ *Verify BEAM startup arguments* - `+S 4:4 +P 1048576`, no memory instrumentation flags enabled
-                        7. ❌ *Could not retrieve APM traces* - Datadog traces toolset is disabled. You can enable it by following https://robusta-dev.github.io/holmesgpt/data-sources/builtin-toolsets/datadog/
-                        8. ❌ *Could not query Erlang metrics* - Prometheus integration is not connected. Enable it via https://robusta-dev.github.io/holmesgpt/data-sources/builtin-toolsets/prometheus/
+                        7. ❌ *Could not retrieve APM traces* - Datadog traces toolset is disabled. You can enable it by following https://holmesgpt.dev/data-sources/builtin-toolsets/datadog/
+                        8. ❌ *Could not query Erlang metrics* - Prometheus integration is not connected. Enable it via https://holmesgpt.dev/data-sources/builtin-toolsets/prometheus/
                         9. ✅ *Examine recent deployments* - app version 2.1.3 deployed 4 hours ago, coincides with memory spike
-                        10. ❌ *Could not check Stripe API status* - No toolset for Stripe integration exists. To monitor Stripe or similar third-party APIs, add a [custom toolset](https://robusta-dev.github.io/holmesgpt/data-sources/custom-toolsets/) or use a [remote MCP server](https://robusta-dev.github.io/holmesgpt/data-sources/remote-mcp-servers/)
+                        10. ❌ *Could not check Stripe API status* - No toolset for Stripe integration exists. To monitor Stripe or similar third-party APIs, add a [custom toolset](https://holmesgpt.dev/data-sources/custom-toolsets/) or use a [remote MCP server](https://holmesgpt.dev/data-sources/remote-mcp-servers/)
                         **Root cause:** Memory leak in `gen_server` logic introduced in v2.1.3. BEAM VM hitting memory limit, causing out-of-memory crashes.
@@ -107,8 +108,8 @@ class RunbookFetcher(Tool):
             )
     def get_parameterized_one_liner(self, params) -> str:
-        path: str = params["link"]
-        return f"fetched runbook {path}"
+        path: str = params.get("link", "")
+        return f"{toolset_name_for_one_liner(self.toolset.name)}: Fetch Runbook {path}"
 class RunbookToolset(Toolset):

holmes/plugins/toolsets/servicenow/servicenow.py CHANGED Viewed

@@ -15,9 +15,11 @@ from holmes.core.tools import StructuredToolResult, ToolResultStatus
 from holmes.plugins.toolsets.utils import (
     process_timestamps_to_rfc3339,
     standard_start_datetime_tool_param_description,
+    toolset_name_for_one_liner,
+)
+from holmes.plugins.toolsets.logging_utils.logging_api import (
+    DEFAULT_TIME_SPAN_SECONDS,
 )
-DEFAULT_TIME_SPAN_SECONDS = 3600
 class ServiceNowConfig(BaseModel):
@@ -92,7 +94,8 @@ class ServiceNowBaseTool(Tool):
         )
     def get_parameterized_one_liner(self, params) -> str:
-        return f"ServiceNow {self.name} {params}"
+        # Default implementation - will be overridden by subclasses
+        return f"{toolset_name_for_one_liner(self.toolset.name)}: ServiceNow {self.name} {params}"
 class ReturnChangesInTimerange(ServiceNowBaseTool):
@@ -108,6 +111,10 @@ class ReturnChangesInTimerange(ServiceNowBaseTool):
         )
     }
+    def get_parameterized_one_liner(self, params) -> str:
+        start = params.get("start", "last hour")
+        return f"{toolset_name_for_one_liner(self.toolset.name)}: Get Change Requests ({start})"
     def _invoke(self, params: Any) -> StructuredToolResult:
         parsed_params = {}
         try:
@@ -147,6 +154,10 @@ class ReturnChange(ServiceNowBaseTool):
         )
     }
+    def get_parameterized_one_liner(self, params) -> str:
+        sys_id = params.get("sys_id", "")
+        return f"{toolset_name_for_one_liner(self.toolset.name)}: Get Change Details ({sys_id})"
     def _invoke(self, params: Any) -> StructuredToolResult:
         try:
             url = "https://{instance}.service-now.com/api/now/v2/table/change_request/{sys_id}".format(
@@ -175,6 +186,10 @@ class ReturnChangesWithKeyword(ServiceNowBaseTool):
         )
     }
+    def get_parameterized_one_liner(self, params) -> str:
+        keyword = params.get("keyword", "")
+        return f"{toolset_name_for_one_liner(self.toolset.name)}: Search Changes ({keyword})"
     def _invoke(self, params: Any) -> StructuredToolResult:
         parsed_params = {}
         try:

holmes/plugins/toolsets/utils.py CHANGED Viewed

@@ -2,7 +2,7 @@ import datetime
 import time
 from typing import Dict, Optional, Tuple, Union
-from dateutil import parser  # type: ignore
+from dateutil import parser
 def standard_start_datetime_tool_param_description(time_span_seconds: int):
@@ -139,3 +139,10 @@ def get_param_or_raise(dict: Dict, param: str) -> str:
     if not value:
         raise Exception(f'Missing param "{param}"')
     return value
+def toolset_name_for_one_liner(toolset_name: str) -> str:
+    name = toolset_name
+    if "/" in toolset_name:
+        name = toolset_name.split("/")[0]
+    return name.capitalize()

holmes/utils/llms.py ADDED Viewed

@@ -0,0 +1,20 @@
+import fnmatch
+from typing import List
+def model_matches_list(model: str, model_list: List[str]) -> bool:
+    """
+    Check if a model matches any pattern in a list of model patterns.
+    Args:
+        model: The name of an LLM model (e.g., "azure/gpt", "openai/gpt-4o")
+        model_list: List of model patterns that may include wildcards
+                   (e.g., ["azure/*", "*/mistral", "openai/gpt-*"])
+    Returns:
+        True if the model matches any pattern in the list, False otherwise
+    """
+    for pattern in model_list:
+        if fnmatch.fnmatchcase(model, pattern):
+            return True
+    return False

holmes/utils/stream.py ADDED Viewed

@@ -0,0 +1,90 @@
+import json
+from enum import Enum
+from typing import Generator, Optional, List
+import litellm
+from pydantic import BaseModel, Field
+from holmes.core.investigation_structured_output import process_response_into_sections
+from functools import partial
+class StreamEvents(str, Enum):
+    ANSWER_END = "ai_answer_end"
+    START_TOOL = "start_tool_calling"
+    TOOL_RESULT = "tool_calling_result"
+    ERROR = "error"
+    AI_MESSAGE = "ai_message"
+class StreamMessage(BaseModel):
+    event: StreamEvents
+    data: dict = Field(default={})
+def create_sse_message(event_type: str, data: Optional[dict] = None):
+    if data is None:
+        data = {}
+    return f"event: {event_type}\ndata: {json.dumps(data)}\n\n"
+def create_sse_error_message(description: str, error_code: int, msg: str):
+    return create_sse_message(
+        StreamEvents.ERROR.value,
+        {
+            "description": description,
+            "error_code": error_code,
+            "msg": msg,
+            "success": False,
+        },
+    )
+create_rate_limit_error_message = partial(
+    create_sse_error_message,
+    error_code=5204,
+    msg="Rate limit exceeded",
+)
+def stream_investigate_formatter(
+    call_stream: Generator[StreamMessage, None, None], runbooks
+):
+    try:
+        for message in call_stream:
+            if message.event == StreamEvents.ANSWER_END:
+                (text_response, sections) = process_response_into_sections(  # type: ignore
+                    message.data.get("content")
+                )
+                yield create_sse_message(
+                    StreamEvents.ANSWER_END.value,
+                    {
+                        "sections": sections or {},
+                        "analysis": text_response,
+                        "instructions": runbooks or [],
+                    },
+                )
+            else:
+                yield create_sse_message(message.event.value, message.data)
+    except litellm.exceptions.RateLimitError as e:
+        yield create_rate_limit_error_message(str(e))
+def stream_chat_formatter(
+    call_stream: Generator[StreamMessage, None, None],
+    followups: Optional[List[dict]] = None,
+):
+    try:
+        for message in call_stream:
+            if message.event == StreamEvents.ANSWER_END:
+                yield create_sse_message(
+                    StreamEvents.ANSWER_END.value,
+                    {
+                        "analysis": message.data.get("content"),
+                        "conversation_history": message.data.get("messages"),
+                        "follow_up_actions": followups,
+                    },
+                )
+            else:
+                yield create_sse_message(message.event.value, message.data)
+    except litellm.exceptions.RateLimitError as e:
+        yield create_rate_limit_error_message(str(e))

holmesgpt 0.12.4__py3-none-any.whl → 0.13.0__py3-none-any.whl

Potentially problematic release.

holmesgpt 0.12.4py3-none-any.whl → 0.13.0py3-none-any.whl