holmesgpt 0.13.2__py3-none-any.whl → 0.16.2a0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (134) hide show
  1. holmes/__init__.py +1 -1
  2. holmes/clients/robusta_client.py +17 -4
  3. holmes/common/env_vars.py +40 -1
  4. holmes/config.py +114 -144
  5. holmes/core/conversations.py +53 -14
  6. holmes/core/feedback.py +191 -0
  7. holmes/core/investigation.py +18 -22
  8. holmes/core/llm.py +489 -88
  9. holmes/core/models.py +103 -1
  10. holmes/core/openai_formatting.py +13 -0
  11. holmes/core/prompt.py +1 -1
  12. holmes/core/safeguards.py +4 -4
  13. holmes/core/supabase_dal.py +293 -100
  14. holmes/core/tool_calling_llm.py +423 -323
  15. holmes/core/tools.py +311 -33
  16. holmes/core/tools_utils/token_counting.py +14 -0
  17. holmes/core/tools_utils/tool_context_window_limiter.py +57 -0
  18. holmes/core/tools_utils/tool_executor.py +13 -8
  19. holmes/core/toolset_manager.py +155 -4
  20. holmes/core/tracing.py +6 -1
  21. holmes/core/transformers/__init__.py +23 -0
  22. holmes/core/transformers/base.py +62 -0
  23. holmes/core/transformers/llm_summarize.py +174 -0
  24. holmes/core/transformers/registry.py +122 -0
  25. holmes/core/transformers/transformer.py +31 -0
  26. holmes/core/truncation/compaction.py +59 -0
  27. holmes/core/truncation/dal_truncation_utils.py +23 -0
  28. holmes/core/truncation/input_context_window_limiter.py +218 -0
  29. holmes/interactive.py +177 -24
  30. holmes/main.py +7 -4
  31. holmes/plugins/prompts/_fetch_logs.jinja2 +26 -1
  32. holmes/plugins/prompts/_general_instructions.jinja2 +1 -2
  33. holmes/plugins/prompts/_runbook_instructions.jinja2 +23 -12
  34. holmes/plugins/prompts/conversation_history_compaction.jinja2 +88 -0
  35. holmes/plugins/prompts/generic_ask.jinja2 +2 -4
  36. holmes/plugins/prompts/generic_ask_conversation.jinja2 +2 -1
  37. holmes/plugins/prompts/generic_ask_for_issue_conversation.jinja2 +2 -1
  38. holmes/plugins/prompts/generic_investigation.jinja2 +2 -1
  39. holmes/plugins/prompts/investigation_procedure.jinja2 +48 -0
  40. holmes/plugins/prompts/kubernetes_workload_ask.jinja2 +2 -1
  41. holmes/plugins/prompts/kubernetes_workload_chat.jinja2 +2 -1
  42. holmes/plugins/runbooks/__init__.py +117 -18
  43. holmes/plugins/runbooks/catalog.json +2 -0
  44. holmes/plugins/toolsets/__init__.py +21 -8
  45. holmes/plugins/toolsets/aks-node-health.yaml +46 -0
  46. holmes/plugins/toolsets/aks.yaml +64 -0
  47. holmes/plugins/toolsets/atlas_mongodb/mongodb_atlas.py +26 -36
  48. holmes/plugins/toolsets/azure_sql/azure_sql_toolset.py +0 -1
  49. holmes/plugins/toolsets/azure_sql/tools/analyze_connection_failures.py +10 -7
  50. holmes/plugins/toolsets/azure_sql/tools/analyze_database_connections.py +9 -6
  51. holmes/plugins/toolsets/azure_sql/tools/analyze_database_health_status.py +8 -6
  52. holmes/plugins/toolsets/azure_sql/tools/analyze_database_performance.py +8 -6
  53. holmes/plugins/toolsets/azure_sql/tools/analyze_database_storage.py +9 -6
  54. holmes/plugins/toolsets/azure_sql/tools/get_active_alerts.py +9 -7
  55. holmes/plugins/toolsets/azure_sql/tools/get_slow_queries.py +9 -6
  56. holmes/plugins/toolsets/azure_sql/tools/get_top_cpu_queries.py +9 -6
  57. holmes/plugins/toolsets/azure_sql/tools/get_top_data_io_queries.py +9 -6
  58. holmes/plugins/toolsets/azure_sql/tools/get_top_log_io_queries.py +9 -6
  59. holmes/plugins/toolsets/bash/bash_toolset.py +10 -13
  60. holmes/plugins/toolsets/bash/common/bash.py +7 -7
  61. holmes/plugins/toolsets/cilium.yaml +284 -0
  62. holmes/plugins/toolsets/coralogix/toolset_coralogix_logs.py +5 -3
  63. holmes/plugins/toolsets/datadog/datadog_api.py +490 -24
  64. holmes/plugins/toolsets/datadog/datadog_logs_instructions.jinja2 +21 -10
  65. holmes/plugins/toolsets/datadog/toolset_datadog_general.py +349 -216
  66. holmes/plugins/toolsets/datadog/toolset_datadog_logs.py +190 -19
  67. holmes/plugins/toolsets/datadog/toolset_datadog_metrics.py +101 -44
  68. holmes/plugins/toolsets/datadog/toolset_datadog_rds.py +13 -16
  69. holmes/plugins/toolsets/datadog/toolset_datadog_traces.py +25 -31
  70. holmes/plugins/toolsets/git.py +51 -46
  71. holmes/plugins/toolsets/grafana/common.py +15 -3
  72. holmes/plugins/toolsets/grafana/grafana_api.py +46 -24
  73. holmes/plugins/toolsets/grafana/grafana_tempo_api.py +454 -0
  74. holmes/plugins/toolsets/grafana/loki/instructions.jinja2 +9 -0
  75. holmes/plugins/toolsets/grafana/loki/toolset_grafana_loki.py +117 -0
  76. holmes/plugins/toolsets/grafana/toolset_grafana.py +211 -91
  77. holmes/plugins/toolsets/grafana/toolset_grafana_dashboard.jinja2 +27 -0
  78. holmes/plugins/toolsets/grafana/toolset_grafana_tempo.jinja2 +246 -11
  79. holmes/plugins/toolsets/grafana/toolset_grafana_tempo.py +653 -293
  80. holmes/plugins/toolsets/grafana/trace_parser.py +1 -1
  81. holmes/plugins/toolsets/internet/internet.py +6 -7
  82. holmes/plugins/toolsets/internet/notion.py +5 -6
  83. holmes/plugins/toolsets/investigator/core_investigation.py +42 -34
  84. holmes/plugins/toolsets/kafka.py +25 -36
  85. holmes/plugins/toolsets/kubernetes.yaml +58 -84
  86. holmes/plugins/toolsets/kubernetes_logs.py +6 -6
  87. holmes/plugins/toolsets/kubernetes_logs.yaml +32 -0
  88. holmes/plugins/toolsets/logging_utils/logging_api.py +80 -4
  89. holmes/plugins/toolsets/mcp/toolset_mcp.py +181 -55
  90. holmes/plugins/toolsets/newrelic/__init__.py +0 -0
  91. holmes/plugins/toolsets/newrelic/new_relic_api.py +125 -0
  92. holmes/plugins/toolsets/newrelic/newrelic.jinja2 +41 -0
  93. holmes/plugins/toolsets/newrelic/newrelic.py +163 -0
  94. holmes/plugins/toolsets/opensearch/opensearch.py +10 -17
  95. holmes/plugins/toolsets/opensearch/opensearch_logs.py +7 -7
  96. holmes/plugins/toolsets/opensearch/opensearch_ppl_query_docs.jinja2 +1616 -0
  97. holmes/plugins/toolsets/opensearch/opensearch_query_assist.py +78 -0
  98. holmes/plugins/toolsets/opensearch/opensearch_query_assist_instructions.jinja2 +223 -0
  99. holmes/plugins/toolsets/opensearch/opensearch_traces.py +13 -16
  100. holmes/plugins/toolsets/openshift.yaml +283 -0
  101. holmes/plugins/toolsets/prometheus/prometheus.py +915 -390
  102. holmes/plugins/toolsets/prometheus/prometheus_instructions.jinja2 +43 -2
  103. holmes/plugins/toolsets/prometheus/utils.py +28 -0
  104. holmes/plugins/toolsets/rabbitmq/toolset_rabbitmq.py +9 -10
  105. holmes/plugins/toolsets/robusta/robusta.py +236 -65
  106. holmes/plugins/toolsets/robusta/robusta_instructions.jinja2 +26 -9
  107. holmes/plugins/toolsets/runbook/runbook_fetcher.py +137 -26
  108. holmes/plugins/toolsets/service_discovery.py +1 -1
  109. holmes/plugins/toolsets/servicenow_tables/instructions.jinja2 +83 -0
  110. holmes/plugins/toolsets/servicenow_tables/servicenow_tables.py +426 -0
  111. holmes/plugins/toolsets/utils.py +88 -0
  112. holmes/utils/config_utils.py +91 -0
  113. holmes/utils/default_toolset_installation_guide.jinja2 +1 -22
  114. holmes/utils/env.py +7 -0
  115. holmes/utils/global_instructions.py +75 -10
  116. holmes/utils/holmes_status.py +2 -1
  117. holmes/utils/holmes_sync_toolsets.py +0 -2
  118. holmes/utils/krr_utils.py +188 -0
  119. holmes/utils/sentry_helper.py +41 -0
  120. holmes/utils/stream.py +61 -7
  121. holmes/version.py +34 -14
  122. holmesgpt-0.16.2a0.dist-info/LICENSE +178 -0
  123. {holmesgpt-0.13.2.dist-info → holmesgpt-0.16.2a0.dist-info}/METADATA +29 -27
  124. {holmesgpt-0.13.2.dist-info → holmesgpt-0.16.2a0.dist-info}/RECORD +126 -102
  125. holmes/core/performance_timing.py +0 -72
  126. holmes/plugins/toolsets/grafana/tempo_api.py +0 -124
  127. holmes/plugins/toolsets/grafana/toolset_grafana_loki.py +0 -110
  128. holmes/plugins/toolsets/newrelic.py +0 -231
  129. holmes/plugins/toolsets/servicenow/install.md +0 -37
  130. holmes/plugins/toolsets/servicenow/instructions.jinja2 +0 -3
  131. holmes/plugins/toolsets/servicenow/servicenow.py +0 -219
  132. holmesgpt-0.13.2.dist-info/LICENSE.txt +0 -21
  133. {holmesgpt-0.13.2.dist-info → holmesgpt-0.16.2a0.dist-info}/WHEEL +0 -0
  134. {holmesgpt-0.13.2.dist-info → holmesgpt-0.16.2a0.dist-info}/entry_points.txt +0 -0
holmes/__init__.py CHANGED
@@ -1,5 +1,5 @@
1
1
  # This is patched by github actions during release
2
- __version__ = "0.13.2"
2
+ __version__ = "0.16.2-alpha"
3
3
 
4
4
  # Re-export version functions from version module for backward compatibility
5
5
  from .version import (
@@ -1,5 +1,5 @@
1
1
  import logging
2
- from typing import List, Optional
2
+ from typing import Optional, Dict, Any
3
3
  import requests # type: ignore
4
4
  from functools import cache
5
5
  from pydantic import BaseModel, ConfigDict
@@ -14,18 +14,31 @@ class HolmesInfo(BaseModel):
14
14
  latest_version: Optional[str] = None
15
15
 
16
16
 
17
+ class RobustaModel(BaseModel):
18
+ model_config = ConfigDict(extra="ignore")
19
+ model: str
20
+ holmes_args: Optional[dict[str, Any]] = None
21
+ is_default: bool = False
22
+
23
+
24
+ class RobustaModelsResponse(BaseModel):
25
+ models: Dict[str, RobustaModel]
26
+
27
+
17
28
  @cache
18
- def fetch_robusta_models(account_id, token) -> Optional[List[str]]:
29
+ def fetch_robusta_models(
30
+ account_id: str, token: str
31
+ ) -> Optional[RobustaModelsResponse]:
19
32
  try:
20
33
  session_request = {"session_token": token, "account_id": account_id}
21
34
  resp = requests.post(
22
- f"{ROBUSTA_API_ENDPOINT}/api/llm/models",
35
+ f"{ROBUSTA_API_ENDPOINT}/api/llm/models/v2",
23
36
  json=session_request,
24
37
  timeout=10,
25
38
  )
26
39
  resp.raise_for_status()
27
40
  response_json = resp.json()
28
- return response_json.get("models")
41
+ return RobustaModelsResponse(**{"models": response_json})
29
42
  except Exception:
30
43
  logging.exception("Failed to fetch robusta models")
31
44
  return None
holmes/common/env_vars.py CHANGED
@@ -2,6 +2,16 @@ import os
2
2
  import json
3
3
  from typing import Optional
4
4
 
5
+ # Recommended models for different providers
6
+ RECOMMENDED_OPENAI_MODEL = "gpt-4.1"
7
+ RECOMMENDED_ANTHROPIC_MODEL = "anthropic/claude-opus-4-1-20250805"
8
+
9
+ # Default model for HolmesGPT
10
+ DEFAULT_MODEL = RECOMMENDED_OPENAI_MODEL
11
+ FALLBACK_CONTEXT_WINDOW_SIZE = (
12
+ 200000 # Fallback context window size if it can't be determined from the model
13
+ )
14
+
5
15
 
6
16
  def load_bool(env_var, default: Optional[bool]) -> Optional[bool]:
7
17
  env_value = os.environ.get(env_var)
@@ -38,6 +48,7 @@ DEVELOPMENT_MODE = load_bool("DEVELOPMENT_MODE", False)
38
48
  SENTRY_DSN = os.environ.get("SENTRY_DSN", "")
39
49
  SENTRY_TRACES_SAMPLE_RATE = float(os.environ.get("SENTRY_TRACES_SAMPLE_RATE", "0.0"))
40
50
 
51
+ EXTRA_HEADERS = os.environ.get("EXTRA_HEADERS", "")
41
52
  THINKING = os.environ.get("THINKING", "")
42
53
  REASONING_EFFORT = os.environ.get("REASONING_EFFORT", "").strip().lower()
43
54
  TEMPERATURE = float(os.environ.get("TEMPERATURE", "0.00000001"))
@@ -73,4 +84,32 @@ LOG_LLM_USAGE_RESPONSE = load_bool("LOG_LLM_USAGE_RESPONSE", False)
73
84
  # For CLI only, enable user approval for potentially sensitive commands that would otherwise be rejected
74
85
  ENABLE_CLI_TOOL_APPROVAL = load_bool("ENABLE_CLI_TOOL_APPROVAL", True)
75
86
 
76
- MAX_GRAPH_POINTS = float(os.environ.get("MAX_GRAPH_POINTS", 300))
87
+ MAX_GRAPH_POINTS = float(os.environ.get("MAX_GRAPH_POINTS", 100))
88
+
89
+ # Limit each tool response to N% of the total context window.
90
+ # Number between 0 and 100
91
+ # Setting to either 0 or any number above 100 disables the logic that limits tool response size
92
+ TOOL_MAX_ALLOCATED_CONTEXT_WINDOW_PCT = float(
93
+ os.environ.get("TOOL_MAX_ALLOCATED_CONTEXT_WINDOW_PCT", 15)
94
+ )
95
+
96
+ # Absolute max tokens to allocate for a single tool response
97
+ TOOL_MAX_ALLOCATED_CONTEXT_WINDOW_TOKENS = int(
98
+ os.environ.get("TOOL_MAX_ALLOCATED_CONTEXT_WINDOW_TOKENS", 25000)
99
+ )
100
+
101
+ MAX_EVIDENCE_DATA_CHARACTERS_BEFORE_TRUNCATION = int(
102
+ os.environ.get("MAX_EVIDENCE_DATA_CHARACTERS_BEFORE_TRUNCATION", 3000)
103
+ )
104
+
105
+ ENABLE_CONVERSATION_HISTORY_COMPACTION = load_bool(
106
+ "ENABLE_CONVERSATION_HISTORY_COMPACTION", default=True
107
+ )
108
+
109
+ DISABLE_PROMETHEUS_TOOLSET = load_bool("DISABLE_PROMETHEUS_TOOLSET", False)
110
+
111
+ RESET_REPEATED_TOOL_CALL_CHECK_AFTER_COMPACTION = load_bool(
112
+ "RESET_REPEATED_TOOL_CALL_CHECK_AFTER_COMPACTION", True
113
+ )
114
+
115
+ SSE_READ_TIMEOUT = float(os.environ.get("SSE_READ_TIMEOUT", "120"))
holmes/config.py CHANGED
@@ -1,4 +1,3 @@
1
- import json
2
1
  import logging
3
2
  import os
4
3
  import os.path
@@ -6,18 +5,12 @@ from enum import Enum
6
5
  from pathlib import Path
7
6
  from typing import TYPE_CHECKING, Any, List, Optional, Union
8
7
 
8
+ import sentry_sdk
9
9
  import yaml # type: ignore
10
- from pydantic import BaseModel, ConfigDict, FilePath, SecretStr
10
+ from pydantic import BaseModel, ConfigDict, FilePath, PrivateAttr, SecretStr
11
11
 
12
-
13
- from holmes.clients.robusta_client import fetch_robusta_models
14
- from holmes.core.llm import DefaultLLM
15
- from holmes.common.env_vars import (
16
- ROBUSTA_AI,
17
- LOAD_ALL_ROBUSTA_MODELS,
18
- ROBUSTA_API_ENDPOINT,
19
- ROBUSTA_CONFIG_PATH,
20
- )
12
+ from holmes.common.env_vars import ROBUSTA_CONFIG_PATH
13
+ from holmes.core.llm import DefaultLLM, LLMModelRegistry
21
14
  from holmes.core.tools_utils.tool_executor import ToolExecutor
22
15
  from holmes.core.toolset_manager import ToolsetManager
23
16
  from holmes.plugins.runbooks import (
@@ -29,7 +22,6 @@ from holmes.plugins.runbooks import (
29
22
 
30
23
  # Source plugin imports moved to their respective create methods to speed up startup
31
24
  if TYPE_CHECKING:
32
- from holmes.core.llm import LLM
33
25
  from holmes.core.tool_calling_llm import IssueInvestigator, ToolCallingLLM
34
26
  from holmes.plugins.destinations.slack import SlackDestination
35
27
  from holmes.plugins.sources.github import GitHubSource
@@ -38,18 +30,12 @@ if TYPE_CHECKING:
38
30
  from holmes.plugins.sources.pagerduty import PagerDutySource
39
31
  from holmes.plugins.sources.prometheus.plugin import AlertManagerSource
40
32
 
41
- from holmes.core.supabase_dal import SupabaseDal
42
33
  from holmes.core.config import config_path_dir
34
+ from holmes.core.supabase_dal import SupabaseDal
43
35
  from holmes.utils.definitions import RobustaConfig
44
- from holmes.utils.env import replace_env_vars_values
45
- from holmes.utils.file_utils import load_yaml_file
46
36
  from holmes.utils.pydantic_utils import RobustaBaseConfig, load_model_from_file
47
37
 
48
38
  DEFAULT_CONFIG_LOCATION = os.path.join(config_path_dir, "config.yaml")
49
- MODEL_LIST_FILE_LOCATION = os.environ.get(
50
- "MODEL_LIST_FILE_LOCATION", "/etc/holmes/config/model_list.yaml"
51
- )
52
- ROBUSTA_AI_MODEL_NAME = "Robusta"
53
39
 
54
40
 
55
41
  class SupportedTicketSources(str, Enum):
@@ -57,32 +43,14 @@ class SupportedTicketSources(str, Enum):
57
43
  PAGERDUTY = "pagerduty"
58
44
 
59
45
 
60
- def is_old_toolset_config(
61
- toolsets: Union[dict[str, dict[str, Any]], List[dict[str, Any]]],
62
- ) -> bool:
63
- # old config is a list of toolsets
64
- if isinstance(toolsets, list):
65
- return True
66
- return False
67
-
68
-
69
- def parse_models_file(path: str):
70
- models = load_yaml_file(path, raise_error=False, warn_not_found=False)
71
-
72
- for _, params in models.items():
73
- params = replace_env_vars_values(params)
74
-
75
- return models
76
-
77
-
78
46
  class Config(RobustaBaseConfig):
47
+ model: Optional[str] = None
79
48
  api_key: Optional[SecretStr] = (
80
49
  None # if None, read from OPENAI_API_KEY or AZURE_OPENAI_ENDPOINT env var
81
50
  )
82
- account_id: Optional[str] = None
83
- session_token: Optional[SecretStr] = None
84
-
85
- model: Optional[str] = "gpt-4o"
51
+ api_base: Optional[str] = None
52
+ api_version: Optional[str] = None
53
+ fast_model: Optional[str] = None
86
54
  max_steps: int = 40
87
55
  cluster_name: Optional[str] = None
88
56
 
@@ -123,14 +91,19 @@ class Config(RobustaBaseConfig):
123
91
  # custom_toolsets_from_cli is passed from CLI option `--custom-toolsets` as 'experimental' custom toolsets.
124
92
  # The status of toolset here won't be cached, so the toolset from cli will always be loaded when specified in the CLI.
125
93
  custom_toolsets_from_cli: Optional[List[FilePath]] = None
126
- should_try_robusta_ai: bool = False # if True, we will try to load the Robusta AI model, in cli we aren't trying to load it.
94
+ # if True, we will try to load the Robusta AI model, in cli we aren't trying to load it.
95
+ should_try_robusta_ai: bool = False
127
96
 
128
97
  toolsets: Optional[dict[str, dict[str, Any]]] = None
129
98
  mcp_servers: Optional[dict[str, dict[str, Any]]] = None
130
99
 
131
100
  _server_tool_executor: Optional[ToolExecutor] = None
101
+ _agui_tool_executor: Optional[ToolExecutor] = None
132
102
 
133
- _toolset_manager: Optional[ToolsetManager] = None
103
+ # TODO: Separate those fields to facade class, this shouldn't be part of the config.
104
+ _toolset_manager: Optional[ToolsetManager] = PrivateAttr(None)
105
+ _llm_model_registry: Optional[LLMModelRegistry] = PrivateAttr(None)
106
+ _dal: Optional[SupabaseDal] = PrivateAttr(None)
134
107
 
135
108
  @property
136
109
  def toolset_manager(self) -> ToolsetManager:
@@ -140,80 +113,29 @@ class Config(RobustaBaseConfig):
140
113
  mcp_servers=self.mcp_servers,
141
114
  custom_toolsets=self.custom_toolsets,
142
115
  custom_toolsets_from_cli=self.custom_toolsets_from_cli,
116
+ global_fast_model=self.fast_model,
143
117
  )
144
118
  return self._toolset_manager
145
119
 
146
- def model_post_init(self, __context: Any) -> None:
147
- self._model_list = parse_models_file(MODEL_LIST_FILE_LOCATION)
148
-
149
- if not self._should_load_robusta_ai():
150
- return
151
-
152
- self.configure_robusta_ai_model()
153
-
154
- def configure_robusta_ai_model(self) -> None:
155
- try:
156
- if not self.cluster_name or not LOAD_ALL_ROBUSTA_MODELS:
157
- self._load_default_robusta_config()
158
- return
159
-
160
- if not self.api_key:
161
- dal = SupabaseDal(self.cluster_name)
162
- self.load_robusta_api_key(dal)
163
-
164
- if not self.account_id or not self.session_token:
165
- self._load_default_robusta_config()
166
- return
120
+ @property
121
+ def dal(self) -> SupabaseDal:
122
+ if not self._dal:
123
+ self._dal = SupabaseDal(self.cluster_name) # type: ignore
124
+ return self._dal
167
125
 
168
- models = fetch_robusta_models(
169
- self.account_id, self.session_token.get_secret_value()
170
- )
171
- if not models:
172
- self._load_default_robusta_config()
173
- return
174
-
175
- for model in models:
176
- logging.info(f"Loading Robusta AI model: {model}")
177
- self._model_list[model] = {
178
- "base_url": f"{ROBUSTA_API_ENDPOINT}/llm/{model}",
179
- "is_robusta_model": True,
180
- }
181
-
182
- except Exception:
183
- logging.exception("Failed to get all robusta models")
184
- # fallback to default behavior
185
- self._load_default_robusta_config()
186
-
187
- def _load_default_robusta_config(self):
188
- if self._should_load_robusta_ai() and self.api_key:
189
- logging.info("Loading default Robusta AI model")
190
- self._model_list[ROBUSTA_AI_MODEL_NAME] = {
191
- "base_url": ROBUSTA_API_ENDPOINT,
192
- "is_robusta_model": True,
193
- }
194
-
195
- def _should_load_robusta_ai(self) -> bool:
196
- if not self.should_try_robusta_ai:
197
- return False
198
-
199
- # ROBUSTA_AI were set in the env vars, so we can use it directly
200
- if ROBUSTA_AI is not None:
201
- return ROBUSTA_AI
202
-
203
- # MODEL is set in the env vars, e.g. the user is using a custom model
204
- # so we don't need to load the robusta AI model and keep the behavior backward compatible
205
- if "MODEL" in os.environ:
206
- return False
207
-
208
- # if the user has provided a model list, we don't need to load the robusta AI model
209
- if self._model_list:
210
- return False
211
-
212
- return True
126
+ @property
127
+ def llm_model_registry(self) -> LLMModelRegistry:
128
+ if not self._llm_model_registry:
129
+ self._llm_model_registry = LLMModelRegistry(self, dal=self.dal)
130
+ return self._llm_model_registry
213
131
 
214
132
  def log_useful_info(self):
215
- if self._model_list:
216
- logging.info(f"loaded models: {list(self._model_list.keys())}")
133
+ if self.llm_model_registry.models:
134
+ logging.info(
135
+ f"Loaded models: {list(self.llm_model_registry.models.keys())}"
136
+ )
137
+ else:
138
+ logging.warning("No llm models were loaded")
217
139
 
218
140
  @classmethod
219
141
  def load_from_file(cls, config_file: Optional[Path], **kwargs) -> "Config":
@@ -227,6 +149,7 @@ class Config(RobustaBaseConfig):
227
149
  Returns:
228
150
  Config instance with merged settings
229
151
  """
152
+
230
153
  config_from_file: Optional[Config] = None
231
154
  if config_file is not None and config_file.exists():
232
155
  logging.debug(f"Loading config from {config_file}")
@@ -250,7 +173,10 @@ class Config(RobustaBaseConfig):
250
173
  kwargs = {}
251
174
  for field_name in [
252
175
  "model",
176
+ "fast_model",
253
177
  "api_key",
178
+ "api_base",
179
+ "api_version",
254
180
  "max_steps",
255
181
  "alertmanager_url",
256
182
  "alertmanager_username",
@@ -297,10 +223,9 @@ class Config(RobustaBaseConfig):
297
223
 
298
224
  return None
299
225
 
300
- @staticmethod
301
- def get_runbook_catalog() -> Optional[RunbookCatalog]:
226
+ def get_runbook_catalog(self) -> Optional[RunbookCatalog]:
302
227
  # TODO(mainred): besides the built-in runbooks, we need to allow the user to bring their own runbooks
303
- runbook_catalog = load_runbook_catalog()
228
+ runbook_catalog = load_runbook_catalog(dal=self.dal)
304
229
  return runbook_catalog
305
230
 
306
231
  def create_console_tool_executor(
@@ -320,6 +245,23 @@ class Config(RobustaBaseConfig):
320
245
  )
321
246
  return ToolExecutor(cli_toolsets)
322
247
 
248
+ def create_agui_tool_executor(self, dal: Optional["SupabaseDal"]) -> ToolExecutor:
249
+ """
250
+ Creates ToolExecutor for the AG-UI server endpoints
251
+ """
252
+
253
+ if self._agui_tool_executor:
254
+ return self._agui_tool_executor
255
+
256
+ # Use same toolset as CLI for AG-UI front-end.
257
+ agui_toolsets = self.toolset_manager.list_console_toolsets(
258
+ dal=dal, refresh_status=True
259
+ )
260
+
261
+ self._agui_tool_executor = ToolExecutor(agui_toolsets)
262
+
263
+ return self._agui_tool_executor
264
+
323
265
  def create_tool_executor(self, dal: Optional["SupabaseDal"]) -> ToolExecutor:
324
266
  """
325
267
  Creates ToolExecutor for the server endpoints
@@ -351,6 +293,19 @@ class Config(RobustaBaseConfig):
351
293
  tool_executor, self.max_steps, self._get_llm(tracer=tracer)
352
294
  )
353
295
 
296
+ def create_agui_toolcalling_llm(
297
+ self,
298
+ dal: Optional["SupabaseDal"] = None,
299
+ model: Optional[str] = None,
300
+ tracer=None,
301
+ ) -> "ToolCallingLLM":
302
+ tool_executor = self.create_agui_tool_executor(dal)
303
+ from holmes.core.tool_calling_llm import ToolCallingLLM
304
+
305
+ return ToolCallingLLM(
306
+ tool_executor, self.max_steps, self._get_llm(model, tracer)
307
+ )
308
+
354
309
  def create_toolcalling_llm(
355
310
  self,
356
311
  dal: Optional["SupabaseDal"] = None,
@@ -516,39 +471,54 @@ class Config(RobustaBaseConfig):
516
471
  raise ValueError("--slack-channel must be specified")
517
472
  return SlackDestination(self.slack_token.get_secret_value(), self.slack_channel)
518
473
 
519
- def _get_llm(self, model_key: Optional[str] = None, tracer=None) -> "LLM":
520
- api_key: Optional[str] = None
521
- model = self.model
522
- model_params = {}
523
- if self._model_list:
524
- # get requested model or the first credentials if no model requested.
525
- model_params = (
526
- self._model_list.get(model_key, {}).copy()
527
- if model_key
528
- else next(iter(self._model_list.values())).copy()
529
- )
530
- is_robusta_model = model_params.pop("is_robusta_model", False)
531
- if is_robusta_model and self.api_key:
532
- # we set here the api_key since it is being refresh when exprided and not as part of the model loading.
533
- api_key = self.api_key.get_secret_value()
534
- else:
535
- api_key = model_params.pop("api_key", api_key)
536
- model = model_params.pop("model", model)
537
-
538
- return DefaultLLM(model, api_key, model_params, tracer) # type: ignore
474
+ # TODO: move this to the llm model registry
475
+ def _get_llm(self, model_key: Optional[str] = None, tracer=None) -> "DefaultLLM":
476
+ sentry_sdk.set_tag("requested_model", model_key)
477
+ model_entry = self.llm_model_registry.get_model_params(model_key)
478
+ model_params = model_entry.model_dump(exclude_none=True)
479
+ api_base = self.api_base
480
+ api_version = self.api_version
481
+
482
+ is_robusta_model = model_params.pop("is_robusta_model", False)
483
+ sentry_sdk.set_tag("is_robusta_model", is_robusta_model)
484
+ if is_robusta_model:
485
+ # we set here the api_key since it is being refresh when exprided and not as part of the model loading.
486
+ account_id, token = self.dal.get_ai_credentials()
487
+ api_key = f"{account_id} {token}"
488
+ else:
489
+ api_key = model_params.pop("api_key", None)
490
+ if api_key is not None:
491
+ api_key = api_key.get_secret_value()
492
+
493
+ model = model_params.pop("model")
494
+ # It's ok if the model does not have api base and api version, which are defaults to None.
495
+ # Handle both api_base and base_url - api_base takes precedence
496
+ model_api_base = model_params.pop("api_base", None)
497
+ model_base_url = model_params.pop("base_url", None)
498
+ api_base = model_api_base or model_base_url or api_base
499
+ api_version = model_params.pop("api_version", api_version)
500
+ model_name = model_params.pop("name", None) or model_key or model
501
+ sentry_sdk.set_tag("model_name", model_name)
502
+ llm = DefaultLLM(
503
+ model=model,
504
+ api_key=api_key,
505
+ api_base=api_base,
506
+ api_version=api_version,
507
+ args=model_params,
508
+ tracer=tracer,
509
+ name=model_name,
510
+ is_robusta_model=is_robusta_model,
511
+ ) # type: ignore
512
+ logging.info(
513
+ f"Using model: {model_name} ({llm.get_context_window_size():,} total tokens, {llm.get_maximum_output_token():,} output tokens)"
514
+ )
515
+ return llm
539
516
 
540
517
  def get_models_list(self) -> List[str]:
541
- if self._model_list:
542
- return json.dumps(list(self._model_list.keys())) # type: ignore
543
-
544
- return json.dumps([self.model]) # type: ignore
518
+ if self.llm_model_registry and self.llm_model_registry.models:
519
+ return list(self.llm_model_registry.models.keys())
545
520
 
546
- def load_robusta_api_key(self, dal: SupabaseDal):
547
- if ROBUSTA_AI:
548
- account_id, token = dal.get_ai_credentials()
549
- self.api_key = SecretStr(f"{account_id} {token}")
550
- self.account_id = account_id
551
- self.session_token = SecretStr(token)
521
+ return []
552
522
 
553
523
 
554
524
  class TicketSource(BaseModel):