PyPI - holmesgpt - Versions diffs - 0.14.1a0__py3-none-any.whl → 0.14.2__py3-none-any.whl - Mend

holmesgpt 0.14.1a0py3-none-any.whl → 0.14.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of holmesgpt might be problematic. Click here for more details.

Files changed (25) hide show

holmes/__init__.py +1 -1
holmes/clients/robusta_client.py +5 -2
holmes/common/env_vars.py +2 -2
holmes/config.py +1 -1
holmes/core/llm.py +44 -6
holmes/core/tool_calling_llm.py +9 -1
holmes/core/toolset_manager.py +2 -2
holmes/plugins/prompts/_fetch_logs.jinja2 +10 -1
holmes/plugins/toolsets/datadog/datadog_api.py +490 -24
holmes/plugins/toolsets/datadog/datadog_logs_instructions.jinja2 +21 -10
holmes/plugins/toolsets/datadog/toolset_datadog_general.py +329 -190
holmes/plugins/toolsets/datadog/toolset_datadog_logs.py +181 -9
holmes/plugins/toolsets/datadog/toolset_datadog_metrics.py +75 -10
holmes/plugins/toolsets/datadog/toolset_datadog_rds.py +2 -2
holmes/plugins/toolsets/datadog/toolset_datadog_traces.py +3 -3
holmes/plugins/toolsets/grafana/toolset_grafana_loki.py +2 -1
holmes/plugins/toolsets/grafana/toolset_grafana_tempo.py +3 -3
holmes/plugins/toolsets/logging_utils/logging_api.py +1 -1
holmes/plugins/toolsets/prometheus/prometheus.py +704 -349
holmes/plugins/toolsets/prometheus/prometheus_instructions.jinja2 +27 -11
{holmesgpt-0.14.1a0.dist-info → holmesgpt-0.14.2.dist-info}/METADATA +2 -2
{holmesgpt-0.14.1a0.dist-info → holmesgpt-0.14.2.dist-info}/RECORD +25 -25
{holmesgpt-0.14.1a0.dist-info → holmesgpt-0.14.2.dist-info}/LICENSE.txt +0 -0
{holmesgpt-0.14.1a0.dist-info → holmesgpt-0.14.2.dist-info}/WHEEL +0 -0
{holmesgpt-0.14.1a0.dist-info → holmesgpt-0.14.2.dist-info}/entry_points.txt +0 -0

holmes/__init__.py CHANGED Viewed

@@ -1,5 +1,5 @@
 # This is patched by github actions during release
-__version__ = "0.14.1-alpha"
+__version__ = "0.14.2"
 # Re-export version functions from version module for backward compatibility
 from .version import (

holmes/clients/robusta_client.py CHANGED Viewed

@@ -1,8 +1,8 @@
 import logging
-from typing import List, Optional
+from typing import List, Optional, Dict, Any
 import requests  # type: ignore
 from functools import cache
-from pydantic import BaseModel, ConfigDict
+from pydantic import BaseModel, ConfigDict, Field
 from holmes.common.env_vars import ROBUSTA_API_ENDPOINT
 HOLMES_GET_INFO_URL = f"{ROBUSTA_API_ENDPOINT}/api/holmes/get_info"
@@ -17,6 +17,9 @@ class HolmesInfo(BaseModel):
 class RobustaModelsResponse(BaseModel):
     model_config = ConfigDict(extra="ignore")
     models: List[str]
+    models_args: Dict[str, Any] = Field(
+        default_factory=dict, alias="models_holmes_args"
+    )
     default_model: Optional[str] = None

holmes/common/env_vars.py CHANGED Viewed

@@ -73,11 +73,11 @@ LOG_LLM_USAGE_RESPONSE = load_bool("LOG_LLM_USAGE_RESPONSE", False)
 # For CLI only, enable user approval for potentially sensitive commands that would otherwise be rejected
 ENABLE_CLI_TOOL_APPROVAL = load_bool("ENABLE_CLI_TOOL_APPROVAL", True)
-MAX_GRAPH_POINTS = float(os.environ.get("MAX_GRAPH_POINTS", 200))
+MAX_GRAPH_POINTS = float(os.environ.get("MAX_GRAPH_POINTS", 100))
 # Limit each tool response to N% of the total context window.
 # Number between 0 and 100
 # Setting to either 0 or any number above 100 disables the logic that limits tool response size
 TOOL_MAX_ALLOCATED_CONTEXT_WINDOW_PCT = float(
-    os.environ.get("TOOL_MAX_ALLOCATED_CONTEXT_WINDOW_PCT", 10)
+    os.environ.get("TOOL_MAX_ALLOCATED_CONTEXT_WINDOW_PCT", 15)
 )

holmes/config.py CHANGED Viewed

@@ -131,7 +131,7 @@ class Config(RobustaBaseConfig):
     def log_useful_info(self):
         if self.llm_model_registry and self.llm_model_registry.models:
             logging.info(
-                f"loaded models: {list(self.llm_model_registry.models.keys())}"
+                f"Loaded models: {list(self.llm_model_registry.models.keys())}"
             )
         else:
             logging.warning("No llm models were loaded")

holmes/core/llm.py CHANGED Viewed

@@ -3,7 +3,7 @@ import logging
 from abc import abstractmethod
 from typing import Any, Dict, List, Optional, Type, Union, TYPE_CHECKING
-from litellm.types.utils import ModelResponse
+from litellm.types.utils import ModelResponse, TextCompletionResponse
 import sentry_sdk
 from litellm.litellm_core_utils.streaming_handler import CustomStreamWrapper
@@ -90,9 +90,13 @@ class DefaultLLM(LLM):
         self.args = args or {}
         self.tracer = tracer
         self.name = name
+        self.update_custom_args()
         self.check_llm(self.model, self.api_key, self.api_base, self.api_version)
+    def update_custom_args(self):
+        self.max_context_size = self.args.get("custom_args", {}).get("max_context_size")
+        self.args.pop("custom_args", None)
     def check_llm(
         self,
         model: str,
@@ -178,6 +182,9 @@ class DefaultLLM(LLM):
         return list(dict.fromkeys(names_to_try))
     def get_context_window_size(self) -> int:
+        if self.max_context_size:
+            return self.max_context_size
         if OVERRIDE_MAX_CONTENT_SIZE:
             logging.debug(
                 f"Using override OVERRIDE_MAX_CONTENT_SIZE {OVERRIDE_MAX_CONTENT_SIZE}"
@@ -424,7 +431,8 @@ class LLMModelRegistry:
             for model in robusta_models.models:
                 logging.info(f"Loading Robusta AI model: {model}")
-                self._llms[model] = self._create_robusta_model_entry(model)
+                args = robusta_models.models_args.get(model)
+                self._llms[model] = self._create_robusta_model_entry(model, args)
             if robusta_models.default_model:
                 logging.info(
@@ -492,7 +500,7 @@ class LLMModelRegistry:
             )
         model_key, first_model_params = next(iter(self._llms.items()))
-        logging.info(f"Using first available model: {model_key}")
+        logging.debug(f"Using first available model: {model_key}")
         return first_model_params.copy()
     def get_llm(self, name: str) -> LLM:  # TODO: fix logic
@@ -509,12 +517,15 @@ class LLMModelRegistry:
         return models
-    def _create_robusta_model_entry(self, model_name: str) -> dict[str, Any]:
+    def _create_robusta_model_entry(
+        self, model_name: str, args: Optional[dict[str, Any]] = None
+    ) -> dict[str, Any]:
         return self._create_model_entry(
             model="gpt-4o",  # Robusta AI model is using openai like API.
             model_name=model_name,
             base_url=f"{ROBUSTA_API_ENDPOINT}/llm/{model_name}",
             is_robusta_model=True,
+            args=args or {},
         )
     def _create_model_entry(
@@ -523,10 +534,37 @@ class LLMModelRegistry:
         model_name: str,
         base_url: Optional[str] = None,
         is_robusta_model: Optional[bool] = None,
+        args: Optional[dict[str, Any]] = None,
     ) -> dict[str, Any]:
-        return {
+        entry = {
             "name": model_name,
             "base_url": base_url,
             "is_robusta_model": is_robusta_model,
             "model": model,
         }
+        if args:
+            entry["custom_args"] = args  # type: ignore[assignment]
+        return entry
+def get_llm_usage(
+    llm_response: Union[ModelResponse, CustomStreamWrapper, TextCompletionResponse],
+) -> dict:
+    usage: dict = {}
+    if (
+        (
+            isinstance(llm_response, ModelResponse)
+            or isinstance(llm_response, TextCompletionResponse)
+        )
+        and hasattr(llm_response, "usage")
+        and llm_response.usage
+    ):  # type: ignore
+        usage["prompt_tokens"] = llm_response.usage.prompt_tokens  # type: ignore
+        usage["completion_tokens"] = llm_response.usage.completion_tokens  # type: ignore
+        usage["total_tokens"] = llm_response.usage.total_tokens  # type: ignore
+    elif isinstance(llm_response, CustomStreamWrapper):
+        complete_response = litellm.stream_chunk_builder(chunks=llm_response)  # type: ignore
+        if complete_response:
+            return get_llm_usage(complete_response)
+    return usage

holmes/core/tool_calling_llm.py CHANGED Viewed

@@ -27,7 +27,7 @@ from holmes.core.investigation_structured_output import (
     is_response_an_incorrect_tool_call,
 )
 from holmes.core.issue import Issue
-from holmes.core.llm import LLM
+from holmes.core.llm import LLM, get_llm_usage
 from holmes.core.performance_timing import PerformanceTiming
 from holmes.core.resource_instruction import ResourceInstructions
 from holmes.core.runbooks import RunbookManager
@@ -422,7 +422,11 @@ class ToolCallingLLM:
                     )
                     costs.total_cost += post_processing_cost
+                    self.llm.count_tokens_for_message(messages)
                     perf_timing.end(f"- completed in {i} iterations -")
+                    metadata["usage"] = get_llm_usage(full_response)
+                    metadata["max_tokens"] = max_context_size
+                    metadata["max_output_tokens"] = maximum_output_token
                     return LLMResult(
                         result=post_processed_response,
                         unprocessed_result=raw_response,
@@ -863,6 +867,10 @@ class ToolCallingLLM:
             tools_to_call = getattr(response_message, "tool_calls", None)
             if not tools_to_call:
+                self.llm.count_tokens_for_message(messages)
+                metadata["usage"] = get_llm_usage(full_response)
+                metadata["max_tokens"] = max_context_size
+                metadata["max_output_tokens"] = maximum_output_token
                 yield StreamMessage(
                     event=StreamEvents.ANSWER_END,
                     data={

holmes/core/toolset_manager.py CHANGED Viewed

@@ -464,12 +464,12 @@ class ToolsetManager:
         logger = logging.getLogger(__name__)
-        logger.info(
+        logger.debug(
             f"Starting fast_model injection. global_fast_model={self.global_fast_model}"
         )
         if not self.global_fast_model:
-            logger.info("No global_fast_model configured, skipping injection")
+            logger.debug("No global_fast_model configured, skipping injection")
             return
         injected_count = 0

holmes/plugins/prompts/_fetch_logs.jinja2 CHANGED Viewed

@@ -11,6 +11,7 @@
 * IMPORTANT: ALWAYS inform the user about what logs you fetched. For example: "Here are pod logs for ..."
 * IMPORTANT: If logs commands have limits mention them. For example: "Showing last 100 lines of logs:"
 * IMPORTANT: If a filter was used, mention the filter. For example: "Logs filtered for 'error':"
+* IMPORTANT: If a date range was used (even if just the default one and you didn't specify the parameter, mention the date range. For example: "Logs from last 1 hour..."
 {% if loki_ts and loki_ts.status == "enabled" -%}
 * For any logs, including for investigating kubernetes problems, use Loki
@@ -34,7 +35,15 @@ Tools to search and fetch logs from Coralogix.
 ### datadog/logs
 #### Datadog Logs Toolset
 Tools to search and fetch logs from Datadog.
-{% include '_default_log_prompt.jinja2' %}
+* Use the tool `fetch_pod_logs` to access an application's logs.
+* Do fetch application logs yourself and DO not ask users to do so
+* If you have an alert/monitor try to figure out the time it fired
+** Then, use `start_time=-300` (5 minutes before `end_time`) and `end_time=<time monitor started firing>`  when calling `fetch_pod_logs`.
+** If there are too many logs, or not enough, narrow or widen the timestamps
+* If the user did not explicitly ask about a given timeframe, ignore the `start_time` and `end_time` so it will use the default.
+* IMPORTANT: ALWAYS inform the user about the actual time period fetched (e.g., "Looking at logs from the last <X> days")
+* IMPORTANT: If a limit was applied, ALWAYS tell the user how many logs were shown vs total (e.g., "Showing latest <Y> of <Z> logs")
+* IMPORTANT: If any filters were applied, ALWAYS mention them explicitly
 {%- elif k8s_yaml_ts and k8s_yaml_ts.status == "enabled" -%}
 ### kubernetes/logs
 #### Kubernetes Logs Toolset

holmesgpt 0.14.1a0__py3-none-any.whl → 0.14.2__py3-none-any.whl

Potentially problematic release.

holmesgpt 0.14.1a0py3-none-any.whl → 0.14.2py3-none-any.whl