PyPI - holmesgpt - Versions diffs - 0.14.4a0__py3-none-any.whl → 0.16.0__py3-none-any.whl - Mend

holmesgpt 0.14.4a0py3-none-any.whl → 0.16.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of holmesgpt might be problematic. Click here for more details.

Files changed (37) hide show

holmes/__init__.py +1 -1
holmes/clients/robusta_client.py +12 -10
holmes/common/env_vars.py +22 -0
holmes/config.py +51 -4
holmes/core/conversations.py +3 -2
holmes/core/llm.py +226 -72
holmes/core/openai_formatting.py +13 -0
holmes/core/supabase_dal.py +33 -42
holmes/core/tool_calling_llm.py +185 -282
holmes/core/tools.py +21 -1
holmes/core/tools_utils/token_counting.py +2 -1
holmes/core/tools_utils/tool_context_window_limiter.py +32 -30
holmes/core/truncation/compaction.py +59 -0
holmes/core/truncation/input_context_window_limiter.py +218 -0
holmes/interactive.py +17 -7
holmes/plugins/prompts/_general_instructions.jinja2 +1 -2
holmes/plugins/prompts/conversation_history_compaction.jinja2 +88 -0
holmes/plugins/toolsets/__init__.py +4 -0
holmes/plugins/toolsets/atlas_mongodb/mongodb_atlas.py +0 -1
holmes/plugins/toolsets/azure_sql/azure_sql_toolset.py +0 -1
holmes/plugins/toolsets/grafana/grafana_api.py +1 -1
holmes/plugins/toolsets/investigator/core_investigation.py +34 -24
holmes/plugins/toolsets/opensearch/opensearch_ppl_query_docs.jinja2 +1616 -0
holmes/plugins/toolsets/opensearch/opensearch_query_assist.py +78 -0
holmes/plugins/toolsets/opensearch/opensearch_query_assist_instructions.jinja2 +223 -0
holmes/plugins/toolsets/prometheus/prometheus.py +1 -1
holmes/plugins/toolsets/robusta/robusta.py +35 -8
holmes/plugins/toolsets/robusta/robusta_instructions.jinja2 +4 -3
holmes/plugins/toolsets/service_discovery.py +1 -1
holmes/plugins/toolsets/servicenow/servicenow.py +0 -1
holmes/utils/stream.py +31 -1
{holmesgpt-0.14.4a0.dist-info → holmesgpt-0.16.0.dist-info}/METADATA +6 -2
{holmesgpt-0.14.4a0.dist-info → holmesgpt-0.16.0.dist-info}/RECORD +36 -31
holmes/core/performance_timing.py +0 -72
{holmesgpt-0.14.4a0.dist-info → holmesgpt-0.16.0.dist-info}/LICENSE.txt +0 -0
{holmesgpt-0.14.4a0.dist-info → holmesgpt-0.16.0.dist-info}/WHEEL +0 -0
{holmesgpt-0.14.4a0.dist-info → holmesgpt-0.16.0.dist-info}/entry_points.txt +0 -0

holmes/__init__.py CHANGED Viewed

@@ -1,5 +1,5 @@
 # This is patched by github actions during release
-__version__ = "0.14.4-alpha"
+__version__ = "0.16.0"
 # Re-export version functions from version module for backward compatibility
 from .version import (

holmes/clients/robusta_client.py CHANGED Viewed

@@ -1,8 +1,8 @@
 import logging
-from typing import List, Optional, Dict, Any
+from typing import Optional, Dict, Any
 import requests  # type: ignore
 from functools import cache
-from pydantic import BaseModel, ConfigDict, Field
+from pydantic import BaseModel, ConfigDict
 from holmes.common.env_vars import ROBUSTA_API_ENDPOINT
 HOLMES_GET_INFO_URL = f"{ROBUSTA_API_ENDPOINT}/api/holmes/get_info"
@@ -14,13 +14,15 @@ class HolmesInfo(BaseModel):
     latest_version: Optional[str] = None
-class RobustaModelsResponse(BaseModel):
+class RobustaModel(BaseModel):
     model_config = ConfigDict(extra="ignore")
-    models: List[str]
-    models_args: Dict[str, Any] = Field(
-        default_factory=dict, alias="models_holmes_args"
-    )
-    default_model: Optional[str] = None
+    model: str
+    holmes_args: Optional[dict[str, Any]] = None
+    is_default: bool = False
+class RobustaModelsResponse(BaseModel):
+    models: Dict[str, RobustaModel]
 @cache
@@ -30,13 +32,13 @@ def fetch_robusta_models(
     try:
         session_request = {"session_token": token, "account_id": account_id}
         resp = requests.post(
-            f"{ROBUSTA_API_ENDPOINT}/api/llm/models",
+            f"{ROBUSTA_API_ENDPOINT}/api/llm/models/v2",
             json=session_request,
             timeout=10,
         )
         resp.raise_for_status()
         response_json = resp.json()
-        return RobustaModelsResponse(**response_json)
+        return RobustaModelsResponse(**{"models": response_json})
     except Exception:
         logging.exception("Failed to fetch robusta models")
         return None

holmes/common/env_vars.py CHANGED Viewed

@@ -2,6 +2,16 @@ import os
 import json
 from typing import Optional
+# Recommended models for different providers
+RECOMMENDED_OPENAI_MODEL = "gpt-4.1"
+RECOMMENDED_ANTHROPIC_MODEL = "anthropic/claude-opus-4-1-20250805"
+# Default model for HolmesGPT
+DEFAULT_MODEL = RECOMMENDED_OPENAI_MODEL
+FALLBACK_CONTEXT_WINDOW_SIZE = (
+    200000  # Fallback context window size if it can't be determined from the model
+)
 def load_bool(env_var, default: Optional[bool]) -> Optional[bool]:
     env_value = os.environ.get(env_var)
@@ -38,6 +48,7 @@ DEVELOPMENT_MODE = load_bool("DEVELOPMENT_MODE", False)
 SENTRY_DSN = os.environ.get("SENTRY_DSN", "")
 SENTRY_TRACES_SAMPLE_RATE = float(os.environ.get("SENTRY_TRACES_SAMPLE_RATE", "0.0"))
+EXTRA_HEADERS = os.environ.get("EXTRA_HEADERS", "")
 THINKING = os.environ.get("THINKING", "")
 REASONING_EFFORT = os.environ.get("REASONING_EFFORT", "").strip().lower()
 TEMPERATURE = float(os.environ.get("TEMPERATURE", "0.00000001"))
@@ -82,8 +93,19 @@ TOOL_MAX_ALLOCATED_CONTEXT_WINDOW_PCT = float(
     os.environ.get("TOOL_MAX_ALLOCATED_CONTEXT_WINDOW_PCT", 15)
 )
+# Absolute max tokens to allocate for a single tool response
+TOOL_MAX_ALLOCATED_CONTEXT_WINDOW_TOKENS = 25000
 MAX_EVIDENCE_DATA_CHARACTERS_BEFORE_TRUNCATION = int(
     os.environ.get("MAX_EVIDENCE_DATA_CHARACTERS_BEFORE_TRUNCATION", 3000)
 )
+ENABLE_CONVERSATION_HISTORY_COMPACTION = load_bool(
+    "ENABLE_CONVERSATION_HISTORY_COMPACTION", default=True
+)
 DISABLE_PROMETHEUS_TOOLSET = load_bool("DISABLE_PROMETHEUS_TOOLSET", False)
+RESET_REPEATED_TOOL_CALL_CHECK_AFTER_COMPACTION = load_bool(
+    "RESET_REPEATED_TOOL_CALL_CHECK_AFTER_COMPACTION", True
+)

holmes/config.py CHANGED Viewed

@@ -45,6 +45,9 @@ class SupportedTicketSources(str, Enum):
 class Config(RobustaBaseConfig):
     model: Optional[str] = None
+    api_key: Optional[SecretStr] = (
+        None  # if None, read from OPENAI_API_KEY or AZURE_OPENAI_ENDPOINT env var
+    )
     api_base: Optional[str] = None
     api_version: Optional[str] = None
     fast_model: Optional[str] = None
@@ -95,6 +98,7 @@ class Config(RobustaBaseConfig):
     mcp_servers: Optional[dict[str, dict[str, Any]]] = None
     _server_tool_executor: Optional[ToolExecutor] = None
+    _agui_tool_executor: Optional[ToolExecutor] = None
     # TODO: Separate those fields to facade class, this shouldn't be part of the config.
     _toolset_manager: Optional[ToolsetManager] = PrivateAttr(None)
@@ -242,6 +246,23 @@ class Config(RobustaBaseConfig):
         )
         return ToolExecutor(cli_toolsets)
+    def create_agui_tool_executor(self, dal: Optional["SupabaseDal"]) -> ToolExecutor:
+        """
+        Creates ToolExecutor for the AG-UI server endpoints
+        """
+        if self._agui_tool_executor:
+            return self._agui_tool_executor
+        # Use same toolset as CLI for AG-UI front-end.
+        agui_toolsets = self.toolset_manager.list_console_toolsets(
+            dal=dal, refresh_status=True
+        )
+        self._agui_tool_executor = ToolExecutor(agui_toolsets)
+        return self._agui_tool_executor
     def create_tool_executor(self, dal: Optional["SupabaseDal"]) -> ToolExecutor:
         """
         Creates ToolExecutor for the server endpoints
@@ -273,6 +294,19 @@ class Config(RobustaBaseConfig):
             tool_executor, self.max_steps, self._get_llm(tracer=tracer)
         )
+    def create_agui_toolcalling_llm(
+        self,
+        dal: Optional["SupabaseDal"] = None,
+        model: Optional[str] = None,
+        tracer=None,
+    ) -> "ToolCallingLLM":
+        tool_executor = self.create_agui_tool_executor(dal)
+        from holmes.core.tool_calling_llm import ToolCallingLLM
+        return ToolCallingLLM(
+            tool_executor, self.max_steps, self._get_llm(model, tracer)
+        )
     def create_toolcalling_llm(
         self,
         dal: Optional["SupabaseDal"] = None,
@@ -441,7 +475,8 @@ class Config(RobustaBaseConfig):
     # TODO: move this to the llm model registry
     def _get_llm(self, model_key: Optional[str] = None, tracer=None) -> "DefaultLLM":
         sentry_sdk.set_tag("requested_model", model_key)
-        model_params = self.llm_model_registry.get_model_params(model_key)
+        model_entry = self.llm_model_registry.get_model_params(model_key)
+        model_params = model_entry.model_dump(exclude_none=True)
         api_base = self.api_base
         api_version = self.api_version
@@ -453,6 +488,8 @@ class Config(RobustaBaseConfig):
             api_key = f"{account_id} {token}"
         else:
             api_key = model_params.pop("api_key", None)
+            if api_key is not None:
+                api_key = api_key.get_secret_value()
         model = model_params.pop("model")
         # It's ok if the model does not have api base and api version, which are defaults to None.
@@ -463,10 +500,20 @@ class Config(RobustaBaseConfig):
         api_version = model_params.pop("api_version", api_version)
         model_name = model_params.pop("name", None) or model_key or model
         sentry_sdk.set_tag("model_name", model_name)
-        logging.info(f"Creating LLM with model: {model_name}")
-        return DefaultLLM(
-            model, api_key, api_base, api_version, model_params, tracer, model_name
+        llm = DefaultLLM(
+            model=model,
+            api_key=api_key,
+            api_base=api_base,
+            api_version=api_version,
+            args=model_params,
+            tracer=tracer,
+            name=model_name,
+            is_robusta_model=is_robusta_model,
         )  # type: ignore
+        logging.info(
+            f"Using model: {model_name} ({llm.get_context_window_size():,} total tokens, {llm.get_maximum_output_token():,} output tokens)"
+        )
+        return llm
     def get_models_list(self) -> List[str]:
         if self.llm_model_registry and self.llm_model_registry.models:

holmes/core/conversations.py CHANGED Viewed

@@ -26,7 +26,8 @@ def calculate_tool_size(
         return DEFAULT_TOOL_SIZE
     context_window = ai.llm.get_context_window_size()
-    message_size_without_tools = ai.llm.count_tokens_for_message(messages_without_tools)
+    tokens = ai.llm.count_tokens(messages_without_tools)
+    message_size_without_tools = tokens.total_tokens
     maximum_output_token = ai.llm.get_maximum_output_token()
     tool_size = min(
@@ -372,13 +373,13 @@ def build_chat_messages(
     )
     ask = add_global_instructions_to_user_prompt(ask, global_instructions)
     conversation_history.append(  # type: ignore
         {
             "role": "user",
             "content": ask,
         },
     )
     number_of_tools = len(
         [message for message in conversation_history if message.get("role") == "tool"]  # type: ignore
     )

holmesgpt 0.14.4a0__py3-none-any.whl → 0.16.0__py3-none-any.whl

Potentially problematic release.

holmesgpt 0.14.4a0py3-none-any.whl → 0.16.0py3-none-any.whl