PyPI - holmesgpt - Versions diffs - 0.13.3a0__py3-none-any.whl → 0.14.1__py3-none-any.whl - Mend

holmesgpt 0.13.3a0py3-none-any.whl → 0.14.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of holmesgpt might be problematic. Click here for more details.

Files changed (86) hide show

holmes/__init__.py +1 -1
holmes/clients/robusta_client.py +15 -4
holmes/common/env_vars.py +8 -1
holmes/config.py +66 -139
holmes/core/investigation.py +1 -2
holmes/core/llm.py +295 -52
holmes/core/models.py +2 -0
holmes/core/safeguards.py +4 -4
holmes/core/supabase_dal.py +14 -8
holmes/core/tool_calling_llm.py +202 -177
holmes/core/tools.py +260 -25
holmes/core/tools_utils/data_types.py +81 -0
holmes/core/tools_utils/tool_context_window_limiter.py +33 -0
holmes/core/tools_utils/tool_executor.py +2 -2
holmes/core/toolset_manager.py +150 -3
holmes/core/tracing.py +6 -1
holmes/core/transformers/__init__.py +23 -0
holmes/core/transformers/base.py +62 -0
holmes/core/transformers/llm_summarize.py +174 -0
holmes/core/transformers/registry.py +122 -0
holmes/core/transformers/transformer.py +31 -0
holmes/main.py +5 -0
holmes/plugins/prompts/_fetch_logs.jinja2 +10 -1
holmes/plugins/toolsets/aks-node-health.yaml +46 -0
holmes/plugins/toolsets/aks.yaml +64 -0
holmes/plugins/toolsets/atlas_mongodb/mongodb_atlas.py +17 -15
holmes/plugins/toolsets/azure_sql/tools/analyze_connection_failures.py +8 -4
holmes/plugins/toolsets/azure_sql/tools/analyze_database_connections.py +7 -3
holmes/plugins/toolsets/azure_sql/tools/analyze_database_health_status.py +3 -3
holmes/plugins/toolsets/azure_sql/tools/analyze_database_performance.py +3 -3
holmes/plugins/toolsets/azure_sql/tools/analyze_database_storage.py +7 -3
holmes/plugins/toolsets/azure_sql/tools/get_active_alerts.py +4 -4
holmes/plugins/toolsets/azure_sql/tools/get_slow_queries.py +7 -3
holmes/plugins/toolsets/azure_sql/tools/get_top_cpu_queries.py +7 -3
holmes/plugins/toolsets/azure_sql/tools/get_top_data_io_queries.py +7 -3
holmes/plugins/toolsets/azure_sql/tools/get_top_log_io_queries.py +7 -3
holmes/plugins/toolsets/bash/bash_toolset.py +6 -6
holmes/plugins/toolsets/bash/common/bash.py +7 -7
holmes/plugins/toolsets/coralogix/toolset_coralogix_logs.py +5 -3
holmes/plugins/toolsets/datadog/datadog_api.py +490 -24
holmes/plugins/toolsets/datadog/datadog_logs_instructions.jinja2 +21 -10
holmes/plugins/toolsets/datadog/toolset_datadog_general.py +345 -207
holmes/plugins/toolsets/datadog/toolset_datadog_logs.py +190 -19
holmes/plugins/toolsets/datadog/toolset_datadog_metrics.py +96 -32
holmes/plugins/toolsets/datadog/toolset_datadog_rds.py +10 -10
holmes/plugins/toolsets/datadog/toolset_datadog_traces.py +21 -22
holmes/plugins/toolsets/git.py +22 -22
holmes/plugins/toolsets/grafana/common.py +14 -2
holmes/plugins/toolsets/grafana/grafana_tempo_api.py +473 -0
holmes/plugins/toolsets/grafana/toolset_grafana.py +4 -4
holmes/plugins/toolsets/grafana/toolset_grafana_loki.py +5 -4
holmes/plugins/toolsets/grafana/toolset_grafana_tempo.jinja2 +246 -11
holmes/plugins/toolsets/grafana/toolset_grafana_tempo.py +662 -290
holmes/plugins/toolsets/grafana/trace_parser.py +1 -1
holmes/plugins/toolsets/internet/internet.py +3 -3
holmes/plugins/toolsets/internet/notion.py +3 -3
holmes/plugins/toolsets/investigator/core_investigation.py +3 -3
holmes/plugins/toolsets/kafka.py +18 -18
holmes/plugins/toolsets/kubernetes.yaml +58 -0
holmes/plugins/toolsets/kubernetes_logs.py +6 -6
holmes/plugins/toolsets/kubernetes_logs.yaml +32 -0
holmes/plugins/toolsets/logging_utils/logging_api.py +1 -1
holmes/plugins/toolsets/mcp/toolset_mcp.py +4 -4
holmes/plugins/toolsets/newrelic.py +8 -8
holmes/plugins/toolsets/opensearch/opensearch.py +5 -5
holmes/plugins/toolsets/opensearch/opensearch_logs.py +7 -7
holmes/plugins/toolsets/opensearch/opensearch_traces.py +10 -10
holmes/plugins/toolsets/prometheus/prometheus.py +841 -351
holmes/plugins/toolsets/prometheus/prometheus_instructions.jinja2 +39 -2
holmes/plugins/toolsets/prometheus/utils.py +28 -0
holmes/plugins/toolsets/rabbitmq/toolset_rabbitmq.py +6 -4
holmes/plugins/toolsets/robusta/robusta.py +10 -10
holmes/plugins/toolsets/runbook/runbook_fetcher.py +4 -4
holmes/plugins/toolsets/servicenow/servicenow.py +6 -6
holmes/plugins/toolsets/utils.py +88 -0
holmes/utils/config_utils.py +91 -0
holmes/utils/env.py +7 -0
holmes/utils/holmes_status.py +2 -1
holmes/utils/sentry_helper.py +41 -0
holmes/utils/stream.py +9 -0
{holmesgpt-0.13.3a0.dist-info → holmesgpt-0.14.1.dist-info}/METADATA +11 -15
{holmesgpt-0.13.3a0.dist-info → holmesgpt-0.14.1.dist-info}/RECORD +85 -75
holmes/plugins/toolsets/grafana/tempo_api.py +0 -124
{holmesgpt-0.13.3a0.dist-info → holmesgpt-0.14.1.dist-info}/LICENSE.txt +0 -0
{holmesgpt-0.13.3a0.dist-info → holmesgpt-0.14.1.dist-info}/WHEEL +0 -0
{holmesgpt-0.13.3a0.dist-info → holmesgpt-0.14.1.dist-info}/entry_points.txt +0 -0

holmes/plugins/toolsets/prometheus/prometheus_instructions.jinja2 CHANGED Viewed

@@ -1,6 +1,18 @@
 # Prometheus/PromQL queries
-* ALWAYS call list_prometheus_rules to get the alert definition
+## Efficient Metric Discovery (when needed)
+* When you need to discover metrics, use `get_metric_names` with filters - it's the fastest method
+* Combine multiple patterns with regex OR (|) to reduce API calls:
+  - `{__name__=~"node_cpu.*|node_memory.*|node_disk.*"}` - get all node resource metrics in one call
+  - `{__name__=~"container.*|pod.*|kube.*"}` - get all Kubernetes-related metrics
+  - `{namespace=~"default|kube-system|monitoring"}` - metrics from multiple namespaces
+* Use `get_metric_metadata` after discovering names to get types/descriptions if needed
+* Use `get_label_values` to discover pods, namespaces, jobs: e.g., get_label_values(label="pod")
+* Only use `get_series` when you need full label sets (slower than other methods)
+## Alert Investigation & Query Execution
+* When investigating a Prometheus alert, ALWAYS call list_prometheus_rules to get the alert definition
 * Use Prometheus to query metrics from the alert promql
 * Use prometheus to execute promql queries with the tools `execute_prometheus_instant_query` and `execute_prometheus_range_query`
 * To create queries, use 'start_timestamp' and 'end_timestamp' as graphs start and end times
@@ -16,9 +28,34 @@
 ** Avoid global averages like `sum(rate(<metric>_sum)) / sum(rate(<metric>_count))` because it hides data and is not generally informative
 * Timestamps MUST be in string date format. For example: '2025-03-15 10:10:08.610862+00:00'
 * Post processing will parse your response, re-run the query from the tool output and create a chart visible to the user
-* Only generate and execute a prometheus query after checking what metrics are available with the `list_available_metrics` tool
+* When unsure about available metrics, use `get_metric_names` with appropriate filters (combine multiple patterns with | for efficiency). Then use `get_metric_metadata` if you need descriptions/types
 * Check that any node, service, pod, container, app, namespace, etc. mentioned in the query exist in the kubernetes cluster before making a query. Use any appropriate kubectl tool(s) for this
 * The toolcall will return no data to you. That is expected. You MUST however ensure that the query is successful.
+## Handling High-Cardinality Metrics
+* CRITICAL: When querying metrics that may return many time series (>10), ALWAYS use aggregation to limit results
+* ALWAYS use `topk()` or `bottomk()` to limit the number of series returned
+* Standard pattern for high-cardinality queries:
+  - Use `topk(5, <your_query>)` to get the top 5 series
+  - Example: `topk(5, rate(container_cpu_usage_seconds_total{namespace="default"}[5m]))`
+  - This prevents context overflow and focuses on the most relevant data
+* To also capture the aggregate of remaining series as "other":
+  ```
+  topk(5, rate(container_cpu_usage_seconds_total{namespace="default"}[5m]))
+  or
+  label_replace(
+    (sum(rate(container_cpu_usage_seconds_total{namespace="default"}[5m])) - sum(topk(5, rate(container_cpu_usage_seconds_total{namespace="default"}[5m])))),
+    "pod", "other", "", ""
+  )
+  ```
+* Common high-cardinality scenarios requiring topk():
+  - Pod-level metrics in namespaces with many pods
+  - Container-level CPU/memory metrics
+  - HTTP metrics with many endpoints or status codes
+  - Any query returning more than 10 time series
+* For initial exploration, use instant queries with `count()` to check cardinality:
+  - Example: `count(count by (pod) (container_cpu_usage_seconds_total{namespace="default"}))`
+  - If count > 10, use topk() in your range query
 * When doing queries, always extend the time range, to 15 min before and after the alert start time
 * ALWAYS embed the execution results into your answer
 * ALWAYS embed a Prometheus graph in the response. The graph should visualize data related to the incident.

holmes/plugins/toolsets/prometheus/utils.py ADDED Viewed

@@ -0,0 +1,28 @@
+import re
+from typing import Optional, Union
+def parse_duration_to_seconds(v: Optional[Union[str, float, int]]) -> Optional[float]:
+    if v is None:
+        return None
+    if isinstance(v, (int, float)):
+        return float(v)
+    s = v.strip().lower()
+    if s.isdigit():
+        return float(int(s))
+    units = {"s": 1, "m": 60, "h": 3600, "d": 86400}
+    # Check for partial time formats (e.g., 1h30m, 5m12s, 1d2h30m)
+    pattern = r"(\d+(?:\.\d+)?)(d|h|m|s)"
+    matches = re.findall(pattern, s)
+    if matches:
+        total_seconds = 0.0
+        for value_str, unit in matches:
+            value = float(value_str)
+            total_seconds += value * units[unit]
+        return float(int(total_seconds))
+    # fallback: try float seconds
+    return float(s)

holmes/plugins/toolsets/rabbitmq/toolset_rabbitmq.py CHANGED Viewed

@@ -8,7 +8,7 @@ from holmes.core.tools import (
     StructuredToolResult,
     Tool,
     ToolParameter,
-    ToolResultStatus,
+    StructuredToolResultStatus,
     Toolset,
     ToolsetTag,
 )
@@ -79,7 +79,7 @@ class ListConfiguredClusters(BaseRabbitMQTool):
             if c.connection_status == ClusterConnectionStatus.SUCCESS
         ]
         return StructuredToolResult(
-            status=ToolResultStatus.SUCCESS, data=available_clusters
+            status=StructuredToolResultStatus.SUCCESS, data=available_clusters
         )
     def get_parameterized_one_liner(self, params) -> str:
@@ -112,12 +112,14 @@ class GetRabbitMQClusterStatus(BaseRabbitMQTool):
                 cluster_id=params.get("cluster_id")
             )
             result = get_cluster_status(cluster_config)
-            return StructuredToolResult(status=ToolResultStatus.SUCCESS, data=result)
+            return StructuredToolResult(
+                status=StructuredToolResultStatus.SUCCESS, data=result
+            )
         except Exception as e:
             logging.info("Failed to process RabbitMQ cluster status", exc_info=True)
             return StructuredToolResult(
-                status=ToolResultStatus.ERROR,
+                status=StructuredToolResultStatus.ERROR,
                 error=f"Unexpected error fetching RabbitMQ cluster status: {str(e)}",
                 data=None,
             )

holmes/plugins/toolsets/robusta/robusta.py CHANGED Viewed

@@ -11,7 +11,7 @@ from holmes.core.tools import (
     Toolset,
     ToolsetTag,
 )
-from holmes.core.tools import StructuredToolResult, ToolResultStatus
+from holmes.core.tools import StructuredToolResult, StructuredToolResultStatus
 PARAM_FINDING_ID = "id"
 START_TIME = "start_datetime"
@@ -53,13 +53,13 @@ class FetchRobustaFinding(Tool):
             finding = self._fetch_finding(finding_id)
             if finding:
                 return StructuredToolResult(
-                    status=ToolResultStatus.SUCCESS,
+                    status=StructuredToolResultStatus.SUCCESS,
                     data=finding,
                     params=params,
                 )
             else:
                 return StructuredToolResult(
-                    status=ToolResultStatus.NO_DATA,
+                    status=StructuredToolResultStatus.NO_DATA,
                     data=f"Could not find a finding with finding_id={finding_id}",
                     params=params,
                 )
@@ -70,7 +70,7 @@ class FetchRobustaFinding(Tool):
             )
             return StructuredToolResult(
-                status=ToolResultStatus.ERROR,
+                status=StructuredToolResultStatus.ERROR,
                 data=f"There was an internal error while fetching finding {finding_id}",
                 params=params,
             )
@@ -122,13 +122,13 @@ class FetchResourceRecommendation(Tool):
             recommendations = self._resource_recommendation(params)
             if recommendations:
                 return StructuredToolResult(
-                    status=ToolResultStatus.SUCCESS,
+                    status=StructuredToolResultStatus.SUCCESS,
                     data=recommendations,
                     params=params,
                 )
             else:
                 return StructuredToolResult(
-                    status=ToolResultStatus.NO_DATA,
+                    status=StructuredToolResultStatus.NO_DATA,
                     data=f"Could not find recommendations for {params}",
                     params=params,
                 )
@@ -136,7 +136,7 @@ class FetchResourceRecommendation(Tool):
             msg = f"There was an internal error while fetching recommendations for {params}. {str(e)}"
             logging.exception(msg)
             return StructuredToolResult(
-                status=ToolResultStatus.ERROR,
+                status=StructuredToolResultStatus.ERROR,
                 data=msg,
                 params=params,
             )
@@ -182,13 +182,13 @@ class FetchConfigurationChanges(Tool):
             changes = self._fetch_change_history(params)
             if changes:
                 return StructuredToolResult(
-                    status=ToolResultStatus.SUCCESS,
+                    status=StructuredToolResultStatus.SUCCESS,
                     data=changes,
                     params=params,
                 )
             else:
                 return StructuredToolResult(
-                    status=ToolResultStatus.NO_DATA,
+                    status=StructuredToolResultStatus.NO_DATA,
                     data=f"Could not find changes for {params}",
                     params=params,
                 )
@@ -196,7 +196,7 @@ class FetchConfigurationChanges(Tool):
             msg = f"There was an internal error while fetching changes for {params}. {str(e)}"
             logging.exception(msg)
             return StructuredToolResult(
-                status=ToolResultStatus.ERROR,
+                status=StructuredToolResultStatus.ERROR,
                 data=msg,
                 params=params,
             )

holmes/plugins/toolsets/runbook/runbook_fetcher.py CHANGED Viewed

@@ -6,7 +6,7 @@ from holmes.core.tools import (
     StructuredToolResult,
     Tool,
     ToolParameter,
-    ToolResultStatus,
+    StructuredToolResultStatus,
     Toolset,
     ToolsetTag,
 )
@@ -52,7 +52,7 @@ class RunbookFetcher(Tool):
             )
             logging.error(err_msg)
             return StructuredToolResult(
-                status=ToolResultStatus.ERROR,
+                status=StructuredToolResultStatus.ERROR,
                 error=err_msg,
                 params=params,
             )
@@ -96,7 +96,7 @@ class RunbookFetcher(Tool):
                     </example>
                 """)
                 return StructuredToolResult(
-                    status=ToolResultStatus.SUCCESS,
+                    status=StructuredToolResultStatus.SUCCESS,
                     data=wrapped_content,
                     params=params,
                 )
@@ -104,7 +104,7 @@ class RunbookFetcher(Tool):
             err_msg = f"Failed to read runbook {runbook_path}: {str(e)}"
             logging.error(err_msg)
             return StructuredToolResult(
-                status=ToolResultStatus.ERROR,
+                status=StructuredToolResultStatus.ERROR,
                 error=err_msg,
                 params=params,
             )

holmes/plugins/toolsets/servicenow/servicenow.py CHANGED Viewed

@@ -11,7 +11,7 @@ from holmes.core.tools import (
 )
 from pydantic import BaseModel, PrivateAttr
-from holmes.core.tools import StructuredToolResult, ToolResultStatus
+from holmes.core.tools import StructuredToolResult, StructuredToolResultStatus
 from holmes.plugins.toolsets.utils import (
     process_timestamps_to_rfc3339,
     standard_start_datetime_tool_param_description,
@@ -86,9 +86,9 @@ class ServiceNowBaseTool(Tool):
         response.raise_for_status()
         res = response.json()
         return StructuredToolResult(
-            status=ToolResultStatus.SUCCESS
+            status=StructuredToolResultStatus.SUCCESS
             if res.get(field, [])
-            else ToolResultStatus.NO_DATA,
+            else StructuredToolResultStatus.NO_DATA,
             data=res,
             params=params,
         )
@@ -139,7 +139,7 @@ class ReturnChangesInTimerange(ServiceNowBaseTool):
         except Exception as e:
             logging.exception(self.get_parameterized_one_liner(params))
             return StructuredToolResult(
-                status=ToolResultStatus.ERROR,
+                status=StructuredToolResultStatus.ERROR,
                 data=f"Exception {self.name}: {str(e)}",
                 params=params,
             )
@@ -173,7 +173,7 @@ class ReturnChange(ServiceNowBaseTool):
         except Exception as e:
             logging.exception(self.get_parameterized_one_liner(params))
             return StructuredToolResult(
-                status=ToolResultStatus.ERROR,
+                status=StructuredToolResultStatus.ERROR,
                 data=f"Exception {self.name}: {str(e)}",
                 params=params,
             )
@@ -213,7 +213,7 @@ class ReturnChangesWithKeyword(ServiceNowBaseTool):
         except Exception as e:
             logging.exception(self.get_parameterized_one_liner(params))
             return StructuredToolResult(
-                status=ToolResultStatus.ERROR,
+                status=StructuredToolResultStatus.ERROR,
                 data=f"Exception {self.name}: {str(e)}",
                 params=params,
             )

holmes/plugins/toolsets/utils.py CHANGED Viewed

@@ -1,5 +1,7 @@
 import datetime
+import math
 import time
+import re
 from typing import Dict, Optional, Tuple, Union
 from dateutil import parser
@@ -134,6 +136,92 @@ def process_timestamps_to_int(
     return (start, end)  # type: ignore
+def seconds_to_duration_string(seconds: int) -> str:
+    """Convert seconds into a compact duration string like '2h30m15s'.
+    If the value is less than 1 minute, return just the number of seconds (e.g. '45').
+    """
+    if seconds < 0:
+        raise ValueError("seconds must be non-negative")
+    parts = []
+    weeks, seconds = divmod(seconds, 7 * 24 * 3600)
+    days, seconds = divmod(seconds, 24 * 3600)
+    hours, seconds = divmod(seconds, 3600)
+    minutes, seconds = divmod(seconds, 60)
+    if weeks:
+        parts.append(f"{weeks}w")
+    if days:
+        parts.append(f"{days}d")
+    if hours:
+        parts.append(f"{hours}h")
+    if minutes:
+        parts.append(f"{minutes}m")
+    if seconds or not parts:
+        parts.append(f"{seconds}s")
+    return "".join(parts)
+def duration_string_to_seconds(duration_string: str) -> int:
+    """Convert a duration string like '2h30m15s' or '300' into total seconds.
+    A bare integer string is treated as seconds.
+    """
+    if not duration_string:
+        raise ValueError("duration_string cannot be empty")
+    # Pure number? Assume seconds
+    if duration_string.isdigit():
+        return int(duration_string)
+    pattern = re.compile(r"(?P<value>\d+)(?P<unit>[wdhms])")
+    matches = pattern.findall(duration_string)
+    if not matches:
+        raise ValueError(f"Invalid duration string: {duration_string}")
+    unit_multipliers = {
+        "w": 7 * 24 * 3600,
+        "d": 24 * 3600,
+        "h": 3600,
+        "m": 60,
+        "s": 1,
+    }
+    total_seconds = 0
+    for value, unit in matches:
+        if unit not in unit_multipliers:
+            raise ValueError(f"Unknown unit: {unit}")
+        total_seconds += int(value) * unit_multipliers[unit]
+    return total_seconds
+def adjust_step_for_max_points(
+    time_range_seconds: int,
+    max_points: int,
+    step: Optional[int] = None,
+) -> int:
+    """
+    Adjusts the step parameter to ensure the number of data points doesn't exceed max_points.
+    Args:
+        time_range_seconds: time range in seconds
+        step: The requested step duration in seconds
+        max_points: The requested maximum number of data points
+    Returns:
+        Adjusted step value in seconds that ensures points <= max_points
+    """
+    smallest_allowed_step = int(
+        math.ceil(float(time_range_seconds) / float(max_points))
+    )
+    if not step:
+        return smallest_allowed_step
+    return max(smallest_allowed_step, step)
 def get_param_or_raise(dict: Dict, param: str) -> str:
     value = dict.get(param)
     if not value:

holmes/utils/config_utils.py ADDED Viewed

@@ -0,0 +1,91 @@
+"""
+Configuration utility functions for HolmesGPT.
+"""
+from typing import List, Optional, TYPE_CHECKING
+if TYPE_CHECKING:
+    from holmes.core.transformers import Transformer
+def merge_transformers(
+    base_transformers: Optional[List["Transformer"]],
+    override_transformers: Optional[List["Transformer"]],
+    only_merge_when_override_exists: bool = False,
+) -> Optional[List["Transformer"]]:
+    """
+    Merge transformer configurations with intelligent field-level merging.
+    Logic:
+    - Override transformers take precedence for existing fields
+    - Base transformers provide missing fields
+    - Merge at transformer-type level (e.g., "llm_summarize")
+    Args:
+        base_transformers: Base transformer configurations (e.g., global transformers)
+        override_transformers: Override transformer configurations (e.g., toolset transformers)
+        only_merge_when_override_exists: If True, only merge when override_transformers exist.
+    Returns:
+        Merged transformer configuration list or None if both inputs are None/empty
+    """
+    if not base_transformers and not override_transformers:
+        return None
+    if not base_transformers:
+        return override_transformers
+    if not override_transformers:
+        if only_merge_when_override_exists:
+            return None  # Don't apply base transformers if override doesn't exist
+        else:
+            return base_transformers  # Original behavior: return base transformers
+    # Convert lists to dicts keyed by transformer name for easier merging
+    base_dict = {}
+    for transformer in base_transformers:
+        base_dict[transformer.name] = transformer
+    override_dict = {}
+    for transformer in override_transformers:
+        override_dict[transformer.name] = transformer
+    # Merge configurations at field level
+    merged_transformers = []
+    # Start with all base transformer types
+    for transformer_name, base_transformer in base_dict.items():
+        if transformer_name in override_dict:
+            # Merge fields: override takes precedence, base provides missing fields
+            override_transformer = override_dict[transformer_name]
+            merged_config = dict(base_transformer.config)  # Start with base
+            merged_config.update(
+                override_transformer.config
+            )  # Override with specific fields
+            # IMPORTANT: Preserve global_fast_model from both base and override
+            # This ensures our injected global_fast_model settings aren't lost during merging
+            if "global_fast_model" in base_transformer.config:
+                merged_config["global_fast_model"] = base_transformer.config[
+                    "global_fast_model"
+                ]
+            if "global_fast_model" in override_transformer.config:
+                merged_config["global_fast_model"] = override_transformer.config[
+                    "global_fast_model"
+                ]
+            # Create new transformer with merged config
+            from holmes.core.transformers import Transformer
+            merged_transformer = Transformer(
+                name=transformer_name, config=merged_config
+            )
+            merged_transformers.append(merged_transformer)
+        else:
+            # No override, use base transformer as-is
+            merged_transformers.append(base_transformer)
+    # Add any override-only transformer types
+    for transformer_name, override_transformer in override_dict.items():
+        if transformer_name not in base_dict:
+            merged_transformers.append(override_transformer)
+    return merged_transformers

holmes/utils/env.py CHANGED Viewed

@@ -6,6 +6,13 @@ from typing import Any, Optional
 from pydantic import SecretStr
+def environ_get_safe_int(env_var: str, default: str = "0") -> int:
+    try:
+        return max(int(os.environ.get(env_var, default)), 0)
+    except ValueError:
+        return int(default)
 def get_env_replacement(value: str) -> Optional[str]:
     env_patterns = re.findall(r"{{\s*env\.([^}]*)\s*}}", value)

holmes/utils/holmes_status.py CHANGED Viewed

@@ -1,3 +1,4 @@
+import json
 from holmes.core.supabase_dal import SupabaseDal
 from holmes.config import Config
 from holmes import get_version  # type: ignore
@@ -16,7 +17,7 @@ def update_holmes_status_in_db(dal: SupabaseDal, config: Config):
     dal.upsert_holmes_status(
         {
             "cluster_id": config.cluster_name,
-            "model": config.get_models_list(),
+            "model": json.dumps(config.get_models_list()),
             "version": get_version(),
         }
     )

holmes/utils/sentry_helper.py ADDED Viewed

@@ -0,0 +1,41 @@
+import sentry_sdk
+from holmes.core.tools_utils.data_types import ToolCallResult, TruncationMetadata
+def capture_tool_truncations(truncations: list[TruncationMetadata]):
+    for truncation in truncations:
+        _capture_tool_truncation(truncation)
+def _capture_tool_truncation(truncation: TruncationMetadata):
+    sentry_sdk.capture_message(
+        f"Tool {truncation.tool_name} was truncated",
+        level="warning",
+        tags={
+            "tool_name": truncation.tool_name,
+            "tool_original_token_count": truncation.original_token_count,
+            "tool_new_token_count": truncation.end_index,
+        },
+    )
+def capture_toolcall_contains_too_many_tokens(
+    tool_call_result: ToolCallResult, token_count: int, max_allowed_token_count: int
+):
+    sentry_sdk.capture_message(
+        f"Tool call {tool_call_result.tool_name} contains too many tokens",
+        level="warning",
+        tags={
+            "tool_name": tool_call_result.tool_name,
+            "tool_original_token_count": token_count,
+            "tool_max_allowed_token_count": max_allowed_token_count,
+            "tool_description": tool_call_result.description,
+        },
+    )
+def capture_structured_output_incorrect_tool_call():
+    sentry_sdk.capture_message(
+        "Structured output incorrect tool call",
+        level="warning",
+    )

holmes/utils/stream.py CHANGED Viewed

@@ -5,6 +5,7 @@ import litellm
 from pydantic import BaseModel, Field
 from holmes.core.investigation_structured_output import process_response_into_sections
 from functools import partial
+import logging
 class StreamEvents(str, Enum):
@@ -61,6 +62,7 @@ def stream_investigate_formatter(
                         "sections": sections or {},
                         "analysis": text_response,
                         "instructions": runbooks or [],
+                        "metadata": message.data.get("metadata") or {},
                     },
                 )
             else:
@@ -82,9 +84,16 @@ def stream_chat_formatter(
                         "analysis": message.data.get("content"),
                         "conversation_history": message.data.get("messages"),
                         "follow_up_actions": followups,
+                        "metadata": message.data.get("metadata") or {},
                     },
                 )
             else:
                 yield create_sse_message(message.event.value, message.data)
     except litellm.exceptions.RateLimitError as e:
         yield create_rate_limit_error_message(str(e))
+    except Exception as e:
+        logging.error(e)
+        if "Model is getting throttled" in str(e):  # happens for bedrock
+            yield create_rate_limit_error_message(str(e))
+        else:
+            yield create_sse_error_message(description=str(e), error_code=1, msg=str(e))

{holmesgpt-0.13.3a0.dist-info → holmesgpt-0.14.1.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: holmesgpt
-Version: 0.13.3a0
+Version: 0.14.1
 Summary:
 Author: Natan Yellin
 Author-email: natan@robusta.dev
@@ -8,7 +8,6 @@ Requires-Python: >=3.10,<4.0
 Classifier: Programming Language :: Python :: 3
 Classifier: Programming Language :: Python :: 3.10
 Classifier: Programming Language :: Python :: 3.11
-Requires-Dist: aiohttp (>=3.10.2,<4.0.0)
 Requires-Dist: azure-core (>=1.34.0,<2.0.0)
 Requires-Dist: azure-identity (>=1.23.0,<2.0.0)
 Requires-Dist: azure-mgmt-alertsmanagement (>=1.0.0,<2.0.0)
@@ -24,41 +23,30 @@ Requires-Dist: certifi (>=2024.7.4,<2025.0.0)
 Requires-Dist: colorlog (>=6.8.2,<7.0.0)
 Requires-Dist: confluent-kafka (>=2.6.1,<3.0.0)
 Requires-Dist: fastapi (>=0.116,<0.117)
-Requires-Dist: google-api-python-client (>=2.156.0,<3.0.0)
 Requires-Dist: humanize (>=4.9.0,<5.0.0)
 Requires-Dist: jinja2 (>=3.1.2,<4.0.0)
 Requires-Dist: kubernetes (>=32.0.1,<33.0.0)
-Requires-Dist: litellm (>=1.75.4,<2.0.0)
+Requires-Dist: litellm (==1.77.1)
 Requires-Dist: markdown (>=3.6,<4.0)
 Requires-Dist: markdownify (>=1.1.0,<2.0.0)
 Requires-Dist: mcp (==v1.12.2)
 Requires-Dist: openai (>=1.6.1,<1.100.0)
 Requires-Dist: opensearch-py (>=2.8.0,<3.0.0)
 Requires-Dist: postgrest (==0.16.8)
-Requires-Dist: prometrix (==0.2.3)
+Requires-Dist: prometrix (==0.2.5)
 Requires-Dist: prompt-toolkit (>=3.0.51,<4.0.0)
-Requires-Dist: protobuf (>=6.31.1)
 Requires-Dist: pydantic (>=2.7,<3.0)
-Requires-Dist: pydantic-settings (>=2.1.0,<3.0.0)
-Requires-Dist: pydash (>=8.0.1,<9.0.0)
 Requires-Dist: pygments (>=2.18.0,<3.0.0)
 Requires-Dist: pyodbc (>=5.0.1,<6.0.0)
-Requires-Dist: pytest-shared-session-scope (>=0.4.0,<0.5.0)
 Requires-Dist: python-benedict (>=0.33.1,<0.34.0)
-Requires-Dist: python_multipart (>=0.0.18,<0.0.19)
-Requires-Dist: pyyaml (>=6.0.1,<7.0.0)
 Requires-Dist: requests (>=2.32.4,<3.0.0)
 Requires-Dist: requests-aws4auth (>=1.3.1,<2.0.0)
 Requires-Dist: rich (>=13.7.1,<14.0.0)
 Requires-Dist: sentry-sdk[fastapi] (>=2.20.0,<3.0.0)
-Requires-Dist: setuptools (>=80.9.0,<81.0.0)
-Requires-Dist: slack-bolt (>=1.18.1,<2.0.0)
-Requires-Dist: starlette (==0.47.2)
 Requires-Dist: strenum (>=0.4.15,<0.5.0)
 Requires-Dist: supabase (>=2.5,<3.0)
 Requires-Dist: tenacity (>=9.1.2,<10.0.0)
 Requires-Dist: typer (>=0.15.4,<0.16.0)
-Requires-Dist: urllib3 (>=1.26.19,<2.0.0)
 Requires-Dist: uvicorn (>=0.30,<0.31)
 Description-Content-Type: text/markdown
@@ -223,6 +211,14 @@ You can save common settings and API Keys in a config file to avoid passing them
 You can save common settings and API keys in config file for re-use. Place the config file in <code>~/.holmes/config.yaml`</code> or pass it using the <code> --config</code>
 You can view an example config file with all available settings [here](config.example.yaml).
+### Tool Output Transformers
+HolmesGPT supports **transformers** to process large tool outputs before sending them to your primary LLM. This feature helps manage context window limits while preserving essential information.
+The most common transformer is `llm_summarize`, which uses a fast secondary model to summarize lengthy outputs from tools like `kubectl describe`, log queries, or metrics collection.
+📖 **Learn more**: [Tool Output Transformers Documentation](docs/transformers.md)
 </details>
 ## 🔐 Data Privacy

holmesgpt 0.13.3a0__py3-none-any.whl → 0.14.1__py3-none-any.whl

Potentially problematic release.

holmesgpt 0.13.3a0py3-none-any.whl → 0.14.1py3-none-any.whl