holmesgpt 0.14.4a0__py3-none-any.whl → 0.16.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of holmesgpt might be problematic. Click here for more details.
- holmes/__init__.py +1 -1
- holmes/clients/robusta_client.py +12 -10
- holmes/common/env_vars.py +22 -0
- holmes/config.py +51 -4
- holmes/core/conversations.py +3 -2
- holmes/core/llm.py +226 -72
- holmes/core/openai_formatting.py +13 -0
- holmes/core/supabase_dal.py +33 -42
- holmes/core/tool_calling_llm.py +185 -282
- holmes/core/tools.py +21 -1
- holmes/core/tools_utils/token_counting.py +2 -1
- holmes/core/tools_utils/tool_context_window_limiter.py +32 -30
- holmes/core/truncation/compaction.py +59 -0
- holmes/core/truncation/input_context_window_limiter.py +218 -0
- holmes/interactive.py +17 -7
- holmes/plugins/prompts/_general_instructions.jinja2 +1 -2
- holmes/plugins/prompts/conversation_history_compaction.jinja2 +88 -0
- holmes/plugins/toolsets/__init__.py +4 -0
- holmes/plugins/toolsets/atlas_mongodb/mongodb_atlas.py +0 -1
- holmes/plugins/toolsets/azure_sql/azure_sql_toolset.py +0 -1
- holmes/plugins/toolsets/grafana/grafana_api.py +1 -1
- holmes/plugins/toolsets/investigator/core_investigation.py +34 -24
- holmes/plugins/toolsets/opensearch/opensearch_ppl_query_docs.jinja2 +1616 -0
- holmes/plugins/toolsets/opensearch/opensearch_query_assist.py +78 -0
- holmes/plugins/toolsets/opensearch/opensearch_query_assist_instructions.jinja2 +223 -0
- holmes/plugins/toolsets/prometheus/prometheus.py +1 -1
- holmes/plugins/toolsets/robusta/robusta.py +35 -8
- holmes/plugins/toolsets/robusta/robusta_instructions.jinja2 +4 -3
- holmes/plugins/toolsets/service_discovery.py +1 -1
- holmes/plugins/toolsets/servicenow/servicenow.py +0 -1
- holmes/utils/stream.py +31 -1
- {holmesgpt-0.14.4a0.dist-info → holmesgpt-0.16.0.dist-info}/METADATA +6 -2
- {holmesgpt-0.14.4a0.dist-info → holmesgpt-0.16.0.dist-info}/RECORD +36 -31
- holmes/core/performance_timing.py +0 -72
- {holmesgpt-0.14.4a0.dist-info → holmesgpt-0.16.0.dist-info}/LICENSE.txt +0 -0
- {holmesgpt-0.14.4a0.dist-info → holmesgpt-0.16.0.dist-info}/WHEEL +0 -0
- {holmesgpt-0.14.4a0.dist-info → holmesgpt-0.16.0.dist-info}/entry_points.txt +0 -0
holmes/__init__.py
CHANGED
holmes/clients/robusta_client.py
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
import logging
|
|
2
|
-
from typing import
|
|
2
|
+
from typing import Optional, Dict, Any
|
|
3
3
|
import requests # type: ignore
|
|
4
4
|
from functools import cache
|
|
5
|
-
from pydantic import BaseModel, ConfigDict
|
|
5
|
+
from pydantic import BaseModel, ConfigDict
|
|
6
6
|
from holmes.common.env_vars import ROBUSTA_API_ENDPOINT
|
|
7
7
|
|
|
8
8
|
HOLMES_GET_INFO_URL = f"{ROBUSTA_API_ENDPOINT}/api/holmes/get_info"
|
|
@@ -14,13 +14,15 @@ class HolmesInfo(BaseModel):
|
|
|
14
14
|
latest_version: Optional[str] = None
|
|
15
15
|
|
|
16
16
|
|
|
17
|
-
class
|
|
17
|
+
class RobustaModel(BaseModel):
|
|
18
18
|
model_config = ConfigDict(extra="ignore")
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
19
|
+
model: str
|
|
20
|
+
holmes_args: Optional[dict[str, Any]] = None
|
|
21
|
+
is_default: bool = False
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class RobustaModelsResponse(BaseModel):
|
|
25
|
+
models: Dict[str, RobustaModel]
|
|
24
26
|
|
|
25
27
|
|
|
26
28
|
@cache
|
|
@@ -30,13 +32,13 @@ def fetch_robusta_models(
|
|
|
30
32
|
try:
|
|
31
33
|
session_request = {"session_token": token, "account_id": account_id}
|
|
32
34
|
resp = requests.post(
|
|
33
|
-
f"{ROBUSTA_API_ENDPOINT}/api/llm/models",
|
|
35
|
+
f"{ROBUSTA_API_ENDPOINT}/api/llm/models/v2",
|
|
34
36
|
json=session_request,
|
|
35
37
|
timeout=10,
|
|
36
38
|
)
|
|
37
39
|
resp.raise_for_status()
|
|
38
40
|
response_json = resp.json()
|
|
39
|
-
return RobustaModelsResponse(**response_json)
|
|
41
|
+
return RobustaModelsResponse(**{"models": response_json})
|
|
40
42
|
except Exception:
|
|
41
43
|
logging.exception("Failed to fetch robusta models")
|
|
42
44
|
return None
|
holmes/common/env_vars.py
CHANGED
|
@@ -2,6 +2,16 @@ import os
|
|
|
2
2
|
import json
|
|
3
3
|
from typing import Optional
|
|
4
4
|
|
|
5
|
+
# Recommended models for different providers
|
|
6
|
+
RECOMMENDED_OPENAI_MODEL = "gpt-4.1"
|
|
7
|
+
RECOMMENDED_ANTHROPIC_MODEL = "anthropic/claude-opus-4-1-20250805"
|
|
8
|
+
|
|
9
|
+
# Default model for HolmesGPT
|
|
10
|
+
DEFAULT_MODEL = RECOMMENDED_OPENAI_MODEL
|
|
11
|
+
FALLBACK_CONTEXT_WINDOW_SIZE = (
|
|
12
|
+
200000 # Fallback context window size if it can't be determined from the model
|
|
13
|
+
)
|
|
14
|
+
|
|
5
15
|
|
|
6
16
|
def load_bool(env_var, default: Optional[bool]) -> Optional[bool]:
|
|
7
17
|
env_value = os.environ.get(env_var)
|
|
@@ -38,6 +48,7 @@ DEVELOPMENT_MODE = load_bool("DEVELOPMENT_MODE", False)
|
|
|
38
48
|
SENTRY_DSN = os.environ.get("SENTRY_DSN", "")
|
|
39
49
|
SENTRY_TRACES_SAMPLE_RATE = float(os.environ.get("SENTRY_TRACES_SAMPLE_RATE", "0.0"))
|
|
40
50
|
|
|
51
|
+
EXTRA_HEADERS = os.environ.get("EXTRA_HEADERS", "")
|
|
41
52
|
THINKING = os.environ.get("THINKING", "")
|
|
42
53
|
REASONING_EFFORT = os.environ.get("REASONING_EFFORT", "").strip().lower()
|
|
43
54
|
TEMPERATURE = float(os.environ.get("TEMPERATURE", "0.00000001"))
|
|
@@ -82,8 +93,19 @@ TOOL_MAX_ALLOCATED_CONTEXT_WINDOW_PCT = float(
|
|
|
82
93
|
os.environ.get("TOOL_MAX_ALLOCATED_CONTEXT_WINDOW_PCT", 15)
|
|
83
94
|
)
|
|
84
95
|
|
|
96
|
+
# Absolute max tokens to allocate for a single tool response
|
|
97
|
+
TOOL_MAX_ALLOCATED_CONTEXT_WINDOW_TOKENS = 25000
|
|
98
|
+
|
|
85
99
|
MAX_EVIDENCE_DATA_CHARACTERS_BEFORE_TRUNCATION = int(
|
|
86
100
|
os.environ.get("MAX_EVIDENCE_DATA_CHARACTERS_BEFORE_TRUNCATION", 3000)
|
|
87
101
|
)
|
|
88
102
|
|
|
103
|
+
ENABLE_CONVERSATION_HISTORY_COMPACTION = load_bool(
|
|
104
|
+
"ENABLE_CONVERSATION_HISTORY_COMPACTION", default=True
|
|
105
|
+
)
|
|
106
|
+
|
|
89
107
|
DISABLE_PROMETHEUS_TOOLSET = load_bool("DISABLE_PROMETHEUS_TOOLSET", False)
|
|
108
|
+
|
|
109
|
+
RESET_REPEATED_TOOL_CALL_CHECK_AFTER_COMPACTION = load_bool(
|
|
110
|
+
"RESET_REPEATED_TOOL_CALL_CHECK_AFTER_COMPACTION", True
|
|
111
|
+
)
|
holmes/config.py
CHANGED
|
@@ -45,6 +45,9 @@ class SupportedTicketSources(str, Enum):
|
|
|
45
45
|
|
|
46
46
|
class Config(RobustaBaseConfig):
|
|
47
47
|
model: Optional[str] = None
|
|
48
|
+
api_key: Optional[SecretStr] = (
|
|
49
|
+
None # if None, read from OPENAI_API_KEY or AZURE_OPENAI_ENDPOINT env var
|
|
50
|
+
)
|
|
48
51
|
api_base: Optional[str] = None
|
|
49
52
|
api_version: Optional[str] = None
|
|
50
53
|
fast_model: Optional[str] = None
|
|
@@ -95,6 +98,7 @@ class Config(RobustaBaseConfig):
|
|
|
95
98
|
mcp_servers: Optional[dict[str, dict[str, Any]]] = None
|
|
96
99
|
|
|
97
100
|
_server_tool_executor: Optional[ToolExecutor] = None
|
|
101
|
+
_agui_tool_executor: Optional[ToolExecutor] = None
|
|
98
102
|
|
|
99
103
|
# TODO: Separate those fields to facade class, this shouldn't be part of the config.
|
|
100
104
|
_toolset_manager: Optional[ToolsetManager] = PrivateAttr(None)
|
|
@@ -242,6 +246,23 @@ class Config(RobustaBaseConfig):
|
|
|
242
246
|
)
|
|
243
247
|
return ToolExecutor(cli_toolsets)
|
|
244
248
|
|
|
249
|
+
def create_agui_tool_executor(self, dal: Optional["SupabaseDal"]) -> ToolExecutor:
|
|
250
|
+
"""
|
|
251
|
+
Creates ToolExecutor for the AG-UI server endpoints
|
|
252
|
+
"""
|
|
253
|
+
|
|
254
|
+
if self._agui_tool_executor:
|
|
255
|
+
return self._agui_tool_executor
|
|
256
|
+
|
|
257
|
+
# Use same toolset as CLI for AG-UI front-end.
|
|
258
|
+
agui_toolsets = self.toolset_manager.list_console_toolsets(
|
|
259
|
+
dal=dal, refresh_status=True
|
|
260
|
+
)
|
|
261
|
+
|
|
262
|
+
self._agui_tool_executor = ToolExecutor(agui_toolsets)
|
|
263
|
+
|
|
264
|
+
return self._agui_tool_executor
|
|
265
|
+
|
|
245
266
|
def create_tool_executor(self, dal: Optional["SupabaseDal"]) -> ToolExecutor:
|
|
246
267
|
"""
|
|
247
268
|
Creates ToolExecutor for the server endpoints
|
|
@@ -273,6 +294,19 @@ class Config(RobustaBaseConfig):
|
|
|
273
294
|
tool_executor, self.max_steps, self._get_llm(tracer=tracer)
|
|
274
295
|
)
|
|
275
296
|
|
|
297
|
+
def create_agui_toolcalling_llm(
|
|
298
|
+
self,
|
|
299
|
+
dal: Optional["SupabaseDal"] = None,
|
|
300
|
+
model: Optional[str] = None,
|
|
301
|
+
tracer=None,
|
|
302
|
+
) -> "ToolCallingLLM":
|
|
303
|
+
tool_executor = self.create_agui_tool_executor(dal)
|
|
304
|
+
from holmes.core.tool_calling_llm import ToolCallingLLM
|
|
305
|
+
|
|
306
|
+
return ToolCallingLLM(
|
|
307
|
+
tool_executor, self.max_steps, self._get_llm(model, tracer)
|
|
308
|
+
)
|
|
309
|
+
|
|
276
310
|
def create_toolcalling_llm(
|
|
277
311
|
self,
|
|
278
312
|
dal: Optional["SupabaseDal"] = None,
|
|
@@ -441,7 +475,8 @@ class Config(RobustaBaseConfig):
|
|
|
441
475
|
# TODO: move this to the llm model registry
|
|
442
476
|
def _get_llm(self, model_key: Optional[str] = None, tracer=None) -> "DefaultLLM":
|
|
443
477
|
sentry_sdk.set_tag("requested_model", model_key)
|
|
444
|
-
|
|
478
|
+
model_entry = self.llm_model_registry.get_model_params(model_key)
|
|
479
|
+
model_params = model_entry.model_dump(exclude_none=True)
|
|
445
480
|
api_base = self.api_base
|
|
446
481
|
api_version = self.api_version
|
|
447
482
|
|
|
@@ -453,6 +488,8 @@ class Config(RobustaBaseConfig):
|
|
|
453
488
|
api_key = f"{account_id} {token}"
|
|
454
489
|
else:
|
|
455
490
|
api_key = model_params.pop("api_key", None)
|
|
491
|
+
if api_key is not None:
|
|
492
|
+
api_key = api_key.get_secret_value()
|
|
456
493
|
|
|
457
494
|
model = model_params.pop("model")
|
|
458
495
|
# It's ok if the model does not have api base and api version, which are defaults to None.
|
|
@@ -463,10 +500,20 @@ class Config(RobustaBaseConfig):
|
|
|
463
500
|
api_version = model_params.pop("api_version", api_version)
|
|
464
501
|
model_name = model_params.pop("name", None) or model_key or model
|
|
465
502
|
sentry_sdk.set_tag("model_name", model_name)
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
503
|
+
llm = DefaultLLM(
|
|
504
|
+
model=model,
|
|
505
|
+
api_key=api_key,
|
|
506
|
+
api_base=api_base,
|
|
507
|
+
api_version=api_version,
|
|
508
|
+
args=model_params,
|
|
509
|
+
tracer=tracer,
|
|
510
|
+
name=model_name,
|
|
511
|
+
is_robusta_model=is_robusta_model,
|
|
469
512
|
) # type: ignore
|
|
513
|
+
logging.info(
|
|
514
|
+
f"Using model: {model_name} ({llm.get_context_window_size():,} total tokens, {llm.get_maximum_output_token():,} output tokens)"
|
|
515
|
+
)
|
|
516
|
+
return llm
|
|
470
517
|
|
|
471
518
|
def get_models_list(self) -> List[str]:
|
|
472
519
|
if self.llm_model_registry and self.llm_model_registry.models:
|
holmes/core/conversations.py
CHANGED
|
@@ -26,7 +26,8 @@ def calculate_tool_size(
|
|
|
26
26
|
return DEFAULT_TOOL_SIZE
|
|
27
27
|
|
|
28
28
|
context_window = ai.llm.get_context_window_size()
|
|
29
|
-
|
|
29
|
+
tokens = ai.llm.count_tokens(messages_without_tools)
|
|
30
|
+
message_size_without_tools = tokens.total_tokens
|
|
30
31
|
maximum_output_token = ai.llm.get_maximum_output_token()
|
|
31
32
|
|
|
32
33
|
tool_size = min(
|
|
@@ -372,13 +373,13 @@ def build_chat_messages(
|
|
|
372
373
|
)
|
|
373
374
|
|
|
374
375
|
ask = add_global_instructions_to_user_prompt(ask, global_instructions)
|
|
375
|
-
|
|
376
376
|
conversation_history.append( # type: ignore
|
|
377
377
|
{
|
|
378
378
|
"role": "user",
|
|
379
379
|
"content": ask,
|
|
380
380
|
},
|
|
381
381
|
)
|
|
382
|
+
|
|
382
383
|
number_of_tools = len(
|
|
383
384
|
[message for message in conversation_history if message.get("role") == "tool"] # type: ignore
|
|
384
385
|
)
|