holmesgpt 0.14.4a0__py3-none-any.whl → 0.16.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of holmesgpt might be problematic. Click here for more details.

Files changed (37) hide show
  1. holmes/__init__.py +1 -1
  2. holmes/clients/robusta_client.py +12 -10
  3. holmes/common/env_vars.py +22 -0
  4. holmes/config.py +51 -4
  5. holmes/core/conversations.py +3 -2
  6. holmes/core/llm.py +226 -72
  7. holmes/core/openai_formatting.py +13 -0
  8. holmes/core/supabase_dal.py +33 -42
  9. holmes/core/tool_calling_llm.py +185 -282
  10. holmes/core/tools.py +21 -1
  11. holmes/core/tools_utils/token_counting.py +2 -1
  12. holmes/core/tools_utils/tool_context_window_limiter.py +32 -30
  13. holmes/core/truncation/compaction.py +59 -0
  14. holmes/core/truncation/input_context_window_limiter.py +218 -0
  15. holmes/interactive.py +17 -7
  16. holmes/plugins/prompts/_general_instructions.jinja2 +1 -2
  17. holmes/plugins/prompts/conversation_history_compaction.jinja2 +88 -0
  18. holmes/plugins/toolsets/__init__.py +4 -0
  19. holmes/plugins/toolsets/atlas_mongodb/mongodb_atlas.py +0 -1
  20. holmes/plugins/toolsets/azure_sql/azure_sql_toolset.py +0 -1
  21. holmes/plugins/toolsets/grafana/grafana_api.py +1 -1
  22. holmes/plugins/toolsets/investigator/core_investigation.py +34 -24
  23. holmes/plugins/toolsets/opensearch/opensearch_ppl_query_docs.jinja2 +1616 -0
  24. holmes/plugins/toolsets/opensearch/opensearch_query_assist.py +78 -0
  25. holmes/plugins/toolsets/opensearch/opensearch_query_assist_instructions.jinja2 +223 -0
  26. holmes/plugins/toolsets/prometheus/prometheus.py +1 -1
  27. holmes/plugins/toolsets/robusta/robusta.py +35 -8
  28. holmes/plugins/toolsets/robusta/robusta_instructions.jinja2 +4 -3
  29. holmes/plugins/toolsets/service_discovery.py +1 -1
  30. holmes/plugins/toolsets/servicenow/servicenow.py +0 -1
  31. holmes/utils/stream.py +31 -1
  32. {holmesgpt-0.14.4a0.dist-info → holmesgpt-0.16.0.dist-info}/METADATA +6 -2
  33. {holmesgpt-0.14.4a0.dist-info → holmesgpt-0.16.0.dist-info}/RECORD +36 -31
  34. holmes/core/performance_timing.py +0 -72
  35. {holmesgpt-0.14.4a0.dist-info → holmesgpt-0.16.0.dist-info}/LICENSE.txt +0 -0
  36. {holmesgpt-0.14.4a0.dist-info → holmesgpt-0.16.0.dist-info}/WHEEL +0 -0
  37. {holmesgpt-0.14.4a0.dist-info → holmesgpt-0.16.0.dist-info}/entry_points.txt +0 -0
holmes/__init__.py CHANGED
@@ -1,5 +1,5 @@
1
1
  # This is patched by github actions during release
2
- __version__ = "0.14.4-alpha"
2
+ __version__ = "0.16.0"
3
3
 
4
4
  # Re-export version functions from version module for backward compatibility
5
5
  from .version import (
@@ -1,8 +1,8 @@
1
1
  import logging
2
- from typing import List, Optional, Dict, Any
2
+ from typing import Optional, Dict, Any
3
3
  import requests # type: ignore
4
4
  from functools import cache
5
- from pydantic import BaseModel, ConfigDict, Field
5
+ from pydantic import BaseModel, ConfigDict
6
6
  from holmes.common.env_vars import ROBUSTA_API_ENDPOINT
7
7
 
8
8
  HOLMES_GET_INFO_URL = f"{ROBUSTA_API_ENDPOINT}/api/holmes/get_info"
@@ -14,13 +14,15 @@ class HolmesInfo(BaseModel):
14
14
  latest_version: Optional[str] = None
15
15
 
16
16
 
17
- class RobustaModelsResponse(BaseModel):
17
+ class RobustaModel(BaseModel):
18
18
  model_config = ConfigDict(extra="ignore")
19
- models: List[str]
20
- models_args: Dict[str, Any] = Field(
21
- default_factory=dict, alias="models_holmes_args"
22
- )
23
- default_model: Optional[str] = None
19
+ model: str
20
+ holmes_args: Optional[dict[str, Any]] = None
21
+ is_default: bool = False
22
+
23
+
24
+ class RobustaModelsResponse(BaseModel):
25
+ models: Dict[str, RobustaModel]
24
26
 
25
27
 
26
28
  @cache
@@ -30,13 +32,13 @@ def fetch_robusta_models(
30
32
  try:
31
33
  session_request = {"session_token": token, "account_id": account_id}
32
34
  resp = requests.post(
33
- f"{ROBUSTA_API_ENDPOINT}/api/llm/models",
35
+ f"{ROBUSTA_API_ENDPOINT}/api/llm/models/v2",
34
36
  json=session_request,
35
37
  timeout=10,
36
38
  )
37
39
  resp.raise_for_status()
38
40
  response_json = resp.json()
39
- return RobustaModelsResponse(**response_json)
41
+ return RobustaModelsResponse(**{"models": response_json})
40
42
  except Exception:
41
43
  logging.exception("Failed to fetch robusta models")
42
44
  return None
holmes/common/env_vars.py CHANGED
@@ -2,6 +2,16 @@ import os
2
2
  import json
3
3
  from typing import Optional
4
4
 
5
+ # Recommended models for different providers
6
+ RECOMMENDED_OPENAI_MODEL = "gpt-4.1"
7
+ RECOMMENDED_ANTHROPIC_MODEL = "anthropic/claude-opus-4-1-20250805"
8
+
9
+ # Default model for HolmesGPT
10
+ DEFAULT_MODEL = RECOMMENDED_OPENAI_MODEL
11
+ FALLBACK_CONTEXT_WINDOW_SIZE = (
12
+ 200000 # Fallback context window size if it can't be determined from the model
13
+ )
14
+
5
15
 
6
16
  def load_bool(env_var, default: Optional[bool]) -> Optional[bool]:
7
17
  env_value = os.environ.get(env_var)
@@ -38,6 +48,7 @@ DEVELOPMENT_MODE = load_bool("DEVELOPMENT_MODE", False)
38
48
  SENTRY_DSN = os.environ.get("SENTRY_DSN", "")
39
49
  SENTRY_TRACES_SAMPLE_RATE = float(os.environ.get("SENTRY_TRACES_SAMPLE_RATE", "0.0"))
40
50
 
51
+ EXTRA_HEADERS = os.environ.get("EXTRA_HEADERS", "")
41
52
  THINKING = os.environ.get("THINKING", "")
42
53
  REASONING_EFFORT = os.environ.get("REASONING_EFFORT", "").strip().lower()
43
54
  TEMPERATURE = float(os.environ.get("TEMPERATURE", "0.00000001"))
@@ -82,8 +93,19 @@ TOOL_MAX_ALLOCATED_CONTEXT_WINDOW_PCT = float(
82
93
  os.environ.get("TOOL_MAX_ALLOCATED_CONTEXT_WINDOW_PCT", 15)
83
94
  )
84
95
 
96
+ # Absolute max tokens to allocate for a single tool response
97
+ TOOL_MAX_ALLOCATED_CONTEXT_WINDOW_TOKENS = 25000
98
+
85
99
  MAX_EVIDENCE_DATA_CHARACTERS_BEFORE_TRUNCATION = int(
86
100
  os.environ.get("MAX_EVIDENCE_DATA_CHARACTERS_BEFORE_TRUNCATION", 3000)
87
101
  )
88
102
 
103
+ ENABLE_CONVERSATION_HISTORY_COMPACTION = load_bool(
104
+ "ENABLE_CONVERSATION_HISTORY_COMPACTION", default=True
105
+ )
106
+
89
107
  DISABLE_PROMETHEUS_TOOLSET = load_bool("DISABLE_PROMETHEUS_TOOLSET", False)
108
+
109
+ RESET_REPEATED_TOOL_CALL_CHECK_AFTER_COMPACTION = load_bool(
110
+ "RESET_REPEATED_TOOL_CALL_CHECK_AFTER_COMPACTION", True
111
+ )
holmes/config.py CHANGED
@@ -45,6 +45,9 @@ class SupportedTicketSources(str, Enum):
45
45
 
46
46
  class Config(RobustaBaseConfig):
47
47
  model: Optional[str] = None
48
+ api_key: Optional[SecretStr] = (
49
+ None # if None, read from OPENAI_API_KEY or AZURE_OPENAI_ENDPOINT env var
50
+ )
48
51
  api_base: Optional[str] = None
49
52
  api_version: Optional[str] = None
50
53
  fast_model: Optional[str] = None
@@ -95,6 +98,7 @@ class Config(RobustaBaseConfig):
95
98
  mcp_servers: Optional[dict[str, dict[str, Any]]] = None
96
99
 
97
100
  _server_tool_executor: Optional[ToolExecutor] = None
101
+ _agui_tool_executor: Optional[ToolExecutor] = None
98
102
 
99
103
  # TODO: Separate those fields to facade class, this shouldn't be part of the config.
100
104
  _toolset_manager: Optional[ToolsetManager] = PrivateAttr(None)
@@ -242,6 +246,23 @@ class Config(RobustaBaseConfig):
242
246
  )
243
247
  return ToolExecutor(cli_toolsets)
244
248
 
249
+ def create_agui_tool_executor(self, dal: Optional["SupabaseDal"]) -> ToolExecutor:
250
+ """
251
+ Creates ToolExecutor for the AG-UI server endpoints
252
+ """
253
+
254
+ if self._agui_tool_executor:
255
+ return self._agui_tool_executor
256
+
257
+ # Use same toolset as CLI for AG-UI front-end.
258
+ agui_toolsets = self.toolset_manager.list_console_toolsets(
259
+ dal=dal, refresh_status=True
260
+ )
261
+
262
+ self._agui_tool_executor = ToolExecutor(agui_toolsets)
263
+
264
+ return self._agui_tool_executor
265
+
245
266
  def create_tool_executor(self, dal: Optional["SupabaseDal"]) -> ToolExecutor:
246
267
  """
247
268
  Creates ToolExecutor for the server endpoints
@@ -273,6 +294,19 @@ class Config(RobustaBaseConfig):
273
294
  tool_executor, self.max_steps, self._get_llm(tracer=tracer)
274
295
  )
275
296
 
297
+ def create_agui_toolcalling_llm(
298
+ self,
299
+ dal: Optional["SupabaseDal"] = None,
300
+ model: Optional[str] = None,
301
+ tracer=None,
302
+ ) -> "ToolCallingLLM":
303
+ tool_executor = self.create_agui_tool_executor(dal)
304
+ from holmes.core.tool_calling_llm import ToolCallingLLM
305
+
306
+ return ToolCallingLLM(
307
+ tool_executor, self.max_steps, self._get_llm(model, tracer)
308
+ )
309
+
276
310
  def create_toolcalling_llm(
277
311
  self,
278
312
  dal: Optional["SupabaseDal"] = None,
@@ -441,7 +475,8 @@ class Config(RobustaBaseConfig):
441
475
  # TODO: move this to the llm model registry
442
476
  def _get_llm(self, model_key: Optional[str] = None, tracer=None) -> "DefaultLLM":
443
477
  sentry_sdk.set_tag("requested_model", model_key)
444
- model_params = self.llm_model_registry.get_model_params(model_key)
478
+ model_entry = self.llm_model_registry.get_model_params(model_key)
479
+ model_params = model_entry.model_dump(exclude_none=True)
445
480
  api_base = self.api_base
446
481
  api_version = self.api_version
447
482
 
@@ -453,6 +488,8 @@ class Config(RobustaBaseConfig):
453
488
  api_key = f"{account_id} {token}"
454
489
  else:
455
490
  api_key = model_params.pop("api_key", None)
491
+ if api_key is not None:
492
+ api_key = api_key.get_secret_value()
456
493
 
457
494
  model = model_params.pop("model")
458
495
  # It's ok if the model does not have api base and api version, which are defaults to None.
@@ -463,10 +500,20 @@ class Config(RobustaBaseConfig):
463
500
  api_version = model_params.pop("api_version", api_version)
464
501
  model_name = model_params.pop("name", None) or model_key or model
465
502
  sentry_sdk.set_tag("model_name", model_name)
466
- logging.info(f"Creating LLM with model: {model_name}")
467
- return DefaultLLM(
468
- model, api_key, api_base, api_version, model_params, tracer, model_name
503
+ llm = DefaultLLM(
504
+ model=model,
505
+ api_key=api_key,
506
+ api_base=api_base,
507
+ api_version=api_version,
508
+ args=model_params,
509
+ tracer=tracer,
510
+ name=model_name,
511
+ is_robusta_model=is_robusta_model,
469
512
  ) # type: ignore
513
+ logging.info(
514
+ f"Using model: {model_name} ({llm.get_context_window_size():,} total tokens, {llm.get_maximum_output_token():,} output tokens)"
515
+ )
516
+ return llm
470
517
 
471
518
  def get_models_list(self) -> List[str]:
472
519
  if self.llm_model_registry and self.llm_model_registry.models:
@@ -26,7 +26,8 @@ def calculate_tool_size(
26
26
  return DEFAULT_TOOL_SIZE
27
27
 
28
28
  context_window = ai.llm.get_context_window_size()
29
- message_size_without_tools = ai.llm.count_tokens_for_message(messages_without_tools)
29
+ tokens = ai.llm.count_tokens(messages_without_tools)
30
+ message_size_without_tools = tokens.total_tokens
30
31
  maximum_output_token = ai.llm.get_maximum_output_token()
31
32
 
32
33
  tool_size = min(
@@ -372,13 +373,13 @@ def build_chat_messages(
372
373
  )
373
374
 
374
375
  ask = add_global_instructions_to_user_prompt(ask, global_instructions)
375
-
376
376
  conversation_history.append( # type: ignore
377
377
  {
378
378
  "role": "user",
379
379
  "content": ask,
380
380
  },
381
381
  )
382
+
382
383
  number_of_tools = len(
383
384
  [message for message in conversation_history if message.get("role") == "tool"] # type: ignore
384
385
  )