holmesgpt 0.13.2__py3-none-any.whl → 0.18.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (188) hide show
  1. holmes/__init__.py +3 -5
  2. holmes/clients/robusta_client.py +20 -6
  3. holmes/common/env_vars.py +58 -3
  4. holmes/common/openshift.py +1 -1
  5. holmes/config.py +123 -148
  6. holmes/core/conversations.py +71 -15
  7. holmes/core/feedback.py +191 -0
  8. holmes/core/investigation.py +31 -39
  9. holmes/core/investigation_structured_output.py +3 -3
  10. holmes/core/issue.py +1 -1
  11. holmes/core/llm.py +508 -88
  12. holmes/core/models.py +108 -4
  13. holmes/core/openai_formatting.py +14 -1
  14. holmes/core/prompt.py +48 -3
  15. holmes/core/runbooks.py +1 -0
  16. holmes/core/safeguards.py +8 -6
  17. holmes/core/supabase_dal.py +295 -100
  18. holmes/core/tool_calling_llm.py +489 -428
  19. holmes/core/tools.py +325 -56
  20. holmes/core/tools_utils/token_counting.py +21 -0
  21. holmes/core/tools_utils/tool_context_window_limiter.py +40 -0
  22. holmes/core/tools_utils/tool_executor.py +0 -13
  23. holmes/core/tools_utils/toolset_utils.py +1 -0
  24. holmes/core/toolset_manager.py +191 -5
  25. holmes/core/tracing.py +19 -3
  26. holmes/core/transformers/__init__.py +23 -0
  27. holmes/core/transformers/base.py +63 -0
  28. holmes/core/transformers/llm_summarize.py +175 -0
  29. holmes/core/transformers/registry.py +123 -0
  30. holmes/core/transformers/transformer.py +32 -0
  31. holmes/core/truncation/compaction.py +94 -0
  32. holmes/core/truncation/dal_truncation_utils.py +23 -0
  33. holmes/core/truncation/input_context_window_limiter.py +219 -0
  34. holmes/interactive.py +228 -31
  35. holmes/main.py +23 -40
  36. holmes/plugins/interfaces.py +2 -1
  37. holmes/plugins/prompts/__init__.py +2 -1
  38. holmes/plugins/prompts/_fetch_logs.jinja2 +31 -6
  39. holmes/plugins/prompts/_general_instructions.jinja2 +1 -2
  40. holmes/plugins/prompts/_runbook_instructions.jinja2 +24 -12
  41. holmes/plugins/prompts/base_user_prompt.jinja2 +7 -0
  42. holmes/plugins/prompts/conversation_history_compaction.jinja2 +89 -0
  43. holmes/plugins/prompts/generic_ask.jinja2 +0 -4
  44. holmes/plugins/prompts/generic_ask_conversation.jinja2 +0 -1
  45. holmes/plugins/prompts/generic_ask_for_issue_conversation.jinja2 +0 -1
  46. holmes/plugins/prompts/generic_investigation.jinja2 +0 -1
  47. holmes/plugins/prompts/investigation_procedure.jinja2 +50 -1
  48. holmes/plugins/prompts/kubernetes_workload_ask.jinja2 +0 -1
  49. holmes/plugins/prompts/kubernetes_workload_chat.jinja2 +0 -1
  50. holmes/plugins/runbooks/__init__.py +145 -17
  51. holmes/plugins/runbooks/catalog.json +2 -0
  52. holmes/plugins/sources/github/__init__.py +4 -2
  53. holmes/plugins/sources/prometheus/models.py +1 -0
  54. holmes/plugins/toolsets/__init__.py +44 -27
  55. holmes/plugins/toolsets/aks-node-health.yaml +46 -0
  56. holmes/plugins/toolsets/aks.yaml +64 -0
  57. holmes/plugins/toolsets/atlas_mongodb/mongodb_atlas.py +38 -47
  58. holmes/plugins/toolsets/azure_sql/apis/alert_monitoring_api.py +3 -2
  59. holmes/plugins/toolsets/azure_sql/apis/azure_sql_api.py +2 -1
  60. holmes/plugins/toolsets/azure_sql/apis/connection_failure_api.py +3 -2
  61. holmes/plugins/toolsets/azure_sql/apis/connection_monitoring_api.py +3 -1
  62. holmes/plugins/toolsets/azure_sql/apis/storage_analysis_api.py +3 -1
  63. holmes/plugins/toolsets/azure_sql/azure_sql_toolset.py +12 -13
  64. holmes/plugins/toolsets/azure_sql/tools/analyze_connection_failures.py +15 -12
  65. holmes/plugins/toolsets/azure_sql/tools/analyze_database_connections.py +15 -12
  66. holmes/plugins/toolsets/azure_sql/tools/analyze_database_health_status.py +11 -11
  67. holmes/plugins/toolsets/azure_sql/tools/analyze_database_performance.py +11 -9
  68. holmes/plugins/toolsets/azure_sql/tools/analyze_database_storage.py +15 -12
  69. holmes/plugins/toolsets/azure_sql/tools/get_active_alerts.py +15 -15
  70. holmes/plugins/toolsets/azure_sql/tools/get_slow_queries.py +11 -8
  71. holmes/plugins/toolsets/azure_sql/tools/get_top_cpu_queries.py +11 -8
  72. holmes/plugins/toolsets/azure_sql/tools/get_top_data_io_queries.py +11 -8
  73. holmes/plugins/toolsets/azure_sql/tools/get_top_log_io_queries.py +11 -8
  74. holmes/plugins/toolsets/azure_sql/utils.py +0 -32
  75. holmes/plugins/toolsets/bash/argocd/__init__.py +3 -3
  76. holmes/plugins/toolsets/bash/aws/__init__.py +4 -4
  77. holmes/plugins/toolsets/bash/azure/__init__.py +4 -4
  78. holmes/plugins/toolsets/bash/bash_toolset.py +11 -15
  79. holmes/plugins/toolsets/bash/common/bash.py +23 -13
  80. holmes/plugins/toolsets/bash/common/bash_command.py +1 -1
  81. holmes/plugins/toolsets/bash/common/stringify.py +1 -1
  82. holmes/plugins/toolsets/bash/kubectl/__init__.py +2 -1
  83. holmes/plugins/toolsets/bash/kubectl/constants.py +0 -1
  84. holmes/plugins/toolsets/bash/kubectl/kubectl_get.py +3 -4
  85. holmes/plugins/toolsets/bash/parse_command.py +12 -13
  86. holmes/plugins/toolsets/cilium.yaml +284 -0
  87. holmes/plugins/toolsets/connectivity_check.py +124 -0
  88. holmes/plugins/toolsets/coralogix/api.py +132 -119
  89. holmes/plugins/toolsets/coralogix/coralogix.jinja2 +14 -0
  90. holmes/plugins/toolsets/coralogix/toolset_coralogix.py +219 -0
  91. holmes/plugins/toolsets/coralogix/utils.py +15 -79
  92. holmes/plugins/toolsets/datadog/datadog_api.py +525 -26
  93. holmes/plugins/toolsets/datadog/datadog_logs_instructions.jinja2 +55 -11
  94. holmes/plugins/toolsets/datadog/datadog_metrics_instructions.jinja2 +3 -3
  95. holmes/plugins/toolsets/datadog/datadog_models.py +59 -0
  96. holmes/plugins/toolsets/datadog/datadog_url_utils.py +213 -0
  97. holmes/plugins/toolsets/datadog/instructions_datadog_traces.jinja2 +165 -28
  98. holmes/plugins/toolsets/datadog/toolset_datadog_general.py +417 -241
  99. holmes/plugins/toolsets/datadog/toolset_datadog_logs.py +234 -214
  100. holmes/plugins/toolsets/datadog/toolset_datadog_metrics.py +167 -79
  101. holmes/plugins/toolsets/datadog/toolset_datadog_traces.py +374 -363
  102. holmes/plugins/toolsets/elasticsearch/__init__.py +6 -0
  103. holmes/plugins/toolsets/elasticsearch/elasticsearch.py +834 -0
  104. holmes/plugins/toolsets/elasticsearch/opensearch_ppl_query_docs.jinja2 +1616 -0
  105. holmes/plugins/toolsets/elasticsearch/opensearch_query_assist.py +78 -0
  106. holmes/plugins/toolsets/elasticsearch/opensearch_query_assist_instructions.jinja2 +223 -0
  107. holmes/plugins/toolsets/git.py +54 -50
  108. holmes/plugins/toolsets/grafana/base_grafana_toolset.py +16 -4
  109. holmes/plugins/toolsets/grafana/common.py +13 -29
  110. holmes/plugins/toolsets/grafana/grafana_tempo_api.py +455 -0
  111. holmes/plugins/toolsets/grafana/loki/instructions.jinja2 +25 -0
  112. holmes/plugins/toolsets/grafana/loki/toolset_grafana_loki.py +191 -0
  113. holmes/plugins/toolsets/grafana/loki_api.py +4 -0
  114. holmes/plugins/toolsets/grafana/toolset_grafana.py +293 -89
  115. holmes/plugins/toolsets/grafana/toolset_grafana_dashboard.jinja2 +49 -0
  116. holmes/plugins/toolsets/grafana/toolset_grafana_tempo.jinja2 +246 -11
  117. holmes/plugins/toolsets/grafana/toolset_grafana_tempo.py +820 -292
  118. holmes/plugins/toolsets/grafana/trace_parser.py +4 -3
  119. holmes/plugins/toolsets/internet/internet.py +15 -16
  120. holmes/plugins/toolsets/internet/notion.py +9 -11
  121. holmes/plugins/toolsets/investigator/core_investigation.py +44 -36
  122. holmes/plugins/toolsets/investigator/model.py +3 -1
  123. holmes/plugins/toolsets/json_filter_mixin.py +134 -0
  124. holmes/plugins/toolsets/kafka.py +36 -42
  125. holmes/plugins/toolsets/kubernetes.yaml +317 -113
  126. holmes/plugins/toolsets/kubernetes_logs.py +9 -9
  127. holmes/plugins/toolsets/kubernetes_logs.yaml +32 -0
  128. holmes/plugins/toolsets/logging_utils/logging_api.py +94 -8
  129. holmes/plugins/toolsets/mcp/toolset_mcp.py +218 -64
  130. holmes/plugins/toolsets/newrelic/new_relic_api.py +165 -0
  131. holmes/plugins/toolsets/newrelic/newrelic.jinja2 +65 -0
  132. holmes/plugins/toolsets/newrelic/newrelic.py +320 -0
  133. holmes/plugins/toolsets/openshift.yaml +283 -0
  134. holmes/plugins/toolsets/prometheus/prometheus.py +1202 -421
  135. holmes/plugins/toolsets/prometheus/prometheus_instructions.jinja2 +54 -5
  136. holmes/plugins/toolsets/prometheus/utils.py +28 -0
  137. holmes/plugins/toolsets/rabbitmq/api.py +23 -4
  138. holmes/plugins/toolsets/rabbitmq/toolset_rabbitmq.py +13 -14
  139. holmes/plugins/toolsets/robusta/robusta.py +239 -68
  140. holmes/plugins/toolsets/robusta/robusta_instructions.jinja2 +26 -9
  141. holmes/plugins/toolsets/runbook/runbook_fetcher.py +157 -27
  142. holmes/plugins/toolsets/service_discovery.py +1 -1
  143. holmes/plugins/toolsets/servicenow_tables/instructions.jinja2 +83 -0
  144. holmes/plugins/toolsets/servicenow_tables/servicenow_tables.py +426 -0
  145. holmes/plugins/toolsets/utils.py +88 -0
  146. holmes/utils/config_utils.py +91 -0
  147. holmes/utils/connection_utils.py +31 -0
  148. holmes/utils/console/result.py +10 -0
  149. holmes/utils/default_toolset_installation_guide.jinja2 +1 -22
  150. holmes/utils/env.py +7 -0
  151. holmes/utils/file_utils.py +2 -1
  152. holmes/utils/global_instructions.py +60 -11
  153. holmes/utils/holmes_status.py +6 -4
  154. holmes/utils/holmes_sync_toolsets.py +0 -2
  155. holmes/utils/krr_utils.py +188 -0
  156. holmes/utils/log.py +15 -0
  157. holmes/utils/markdown_utils.py +2 -3
  158. holmes/utils/memory_limit.py +58 -0
  159. holmes/utils/sentry_helper.py +64 -0
  160. holmes/utils/stream.py +69 -8
  161. holmes/utils/tags.py +4 -3
  162. holmes/version.py +37 -15
  163. holmesgpt-0.18.4.dist-info/LICENSE +178 -0
  164. {holmesgpt-0.13.2.dist-info → holmesgpt-0.18.4.dist-info}/METADATA +35 -31
  165. holmesgpt-0.18.4.dist-info/RECORD +258 -0
  166. holmes/core/performance_timing.py +0 -72
  167. holmes/plugins/toolsets/aws.yaml +0 -80
  168. holmes/plugins/toolsets/coralogix/toolset_coralogix_logs.py +0 -112
  169. holmes/plugins/toolsets/datadog/datadog_traces_formatter.py +0 -310
  170. holmes/plugins/toolsets/datadog/toolset_datadog_rds.py +0 -739
  171. holmes/plugins/toolsets/grafana/grafana_api.py +0 -42
  172. holmes/plugins/toolsets/grafana/tempo_api.py +0 -124
  173. holmes/plugins/toolsets/grafana/toolset_grafana_loki.py +0 -110
  174. holmes/plugins/toolsets/newrelic.py +0 -231
  175. holmes/plugins/toolsets/opensearch/opensearch.py +0 -257
  176. holmes/plugins/toolsets/opensearch/opensearch_logs.py +0 -161
  177. holmes/plugins/toolsets/opensearch/opensearch_traces.py +0 -218
  178. holmes/plugins/toolsets/opensearch/opensearch_traces_instructions.jinja2 +0 -12
  179. holmes/plugins/toolsets/opensearch/opensearch_utils.py +0 -166
  180. holmes/plugins/toolsets/servicenow/install.md +0 -37
  181. holmes/plugins/toolsets/servicenow/instructions.jinja2 +0 -3
  182. holmes/plugins/toolsets/servicenow/servicenow.py +0 -219
  183. holmes/utils/keygen_utils.py +0 -6
  184. holmesgpt-0.13.2.dist-info/LICENSE.txt +0 -21
  185. holmesgpt-0.13.2.dist-info/RECORD +0 -234
  186. /holmes/plugins/toolsets/{opensearch → newrelic}/__init__.py +0 -0
  187. {holmesgpt-0.13.2.dist-info → holmesgpt-0.18.4.dist-info}/WHEEL +0 -0
  188. {holmesgpt-0.13.2.dist-info → holmesgpt-0.18.4.dist-info}/entry_points.txt +0 -0
holmes/__init__.py CHANGED
@@ -1,8 +1,6 @@
1
1
  # This is patched by github actions during release
2
- __version__ = "0.13.2"
2
+ __version__ = "0.18.4"
3
3
 
4
4
  # Re-export version functions from version module for backward compatibility
5
- from .version import (
6
- get_version as get_version,
7
- is_official_release as is_official_release,
8
- )
5
+ from .version import get_version as get_version
6
+ from .version import is_official_release as is_official_release
@@ -1,8 +1,10 @@
1
1
  import logging
2
- from typing import List, Optional
3
- import requests # type: ignore
4
2
  from functools import cache
3
+ from typing import Any, Dict, Optional
4
+
5
+ import requests # type: ignore
5
6
  from pydantic import BaseModel, ConfigDict
7
+
6
8
  from holmes.common.env_vars import ROBUSTA_API_ENDPOINT
7
9
 
8
10
  HOLMES_GET_INFO_URL = f"{ROBUSTA_API_ENDPOINT}/api/holmes/get_info"
@@ -14,18 +16,30 @@ class HolmesInfo(BaseModel):
14
16
  latest_version: Optional[str] = None
15
17
 
16
18
 
17
- @cache
18
- def fetch_robusta_models(account_id, token) -> Optional[List[str]]:
19
+ class RobustaModel(BaseModel):
20
+ model_config = ConfigDict(extra="ignore")
21
+ model: str
22
+ holmes_args: Optional[dict[str, Any]] = None
23
+ is_default: bool = False
24
+
25
+
26
+ class RobustaModelsResponse(BaseModel):
27
+ models: Dict[str, RobustaModel]
28
+
29
+
30
+ def fetch_robusta_models(
31
+ account_id: str, token: str
32
+ ) -> Optional[RobustaModelsResponse]:
19
33
  try:
20
34
  session_request = {"session_token": token, "account_id": account_id}
21
35
  resp = requests.post(
22
- f"{ROBUSTA_API_ENDPOINT}/api/llm/models",
36
+ f"{ROBUSTA_API_ENDPOINT}/api/llm/models/v2",
23
37
  json=session_request,
24
38
  timeout=10,
25
39
  )
26
40
  resp.raise_for_status()
27
41
  response_json = resp.json()
28
- return response_json.get("models")
42
+ return RobustaModelsResponse(**{"models": response_json})
29
43
  except Exception:
30
44
  logging.exception("Failed to fetch robusta models")
31
45
  return None
holmes/common/env_vars.py CHANGED
@@ -1,7 +1,18 @@
1
- import os
2
1
  import json
2
+ import os
3
+ import platform
3
4
  from typing import Optional
4
5
 
6
+ # Recommended models for different providers
7
+ RECOMMENDED_OPENAI_MODEL = "gpt-4.1"
8
+ RECOMMENDED_ANTHROPIC_MODEL = "anthropic/claude-opus-4-1-20250805"
9
+
10
+ # Default model for HolmesGPT
11
+ DEFAULT_MODEL = RECOMMENDED_OPENAI_MODEL
12
+ FALLBACK_CONTEXT_WINDOW_SIZE = (
13
+ 200000 # Fallback context window size if it can't be determined from the model
14
+ )
15
+
5
16
 
6
17
  def load_bool(env_var, default: Optional[bool]) -> Optional[bool]:
7
18
  env_value = os.environ.get(env_var)
@@ -25,7 +36,6 @@ STORE_URL = os.environ.get("STORE_URL", "")
25
36
  STORE_API_KEY = os.environ.get("STORE_API_KEY", "")
26
37
  STORE_EMAIL = os.environ.get("STORE_EMAIL", "")
27
38
  STORE_PASSWORD = os.environ.get("STORE_PASSWORD", "")
28
- HOLMES_POST_PROCESSING_PROMPT = os.environ.get("HOLMES_POST_PROCESSING_PROMPT", "")
29
39
  ROBUSTA_AI = load_bool("ROBUSTA_AI", None)
30
40
  LOAD_ALL_ROBUSTA_MODELS = load_bool("LOAD_ALL_ROBUSTA_MODELS", True)
31
41
  ROBUSTA_API_ENDPOINT = os.environ.get("ROBUSTA_API_ENDPOINT", "https://api.robusta.dev")
@@ -38,10 +48,20 @@ DEVELOPMENT_MODE = load_bool("DEVELOPMENT_MODE", False)
38
48
  SENTRY_DSN = os.environ.get("SENTRY_DSN", "")
39
49
  SENTRY_TRACES_SAMPLE_RATE = float(os.environ.get("SENTRY_TRACES_SAMPLE_RATE", "0.0"))
40
50
 
51
+ EXTRA_HEADERS = os.environ.get("EXTRA_HEADERS", "")
41
52
  THINKING = os.environ.get("THINKING", "")
42
53
  REASONING_EFFORT = os.environ.get("REASONING_EFFORT", "").strip().lower()
43
54
  TEMPERATURE = float(os.environ.get("TEMPERATURE", "0.00000001"))
44
55
 
56
+ # Set default memory limit based on CPU architecture
57
+ # ARM architectures typically need more memory
58
+ _default_memory_limit = (
59
+ 1500 if platform.machine().lower() in ("arm64", "aarch64", "arm") else 800
60
+ )
61
+ TOOL_MEMORY_LIMIT_MB = int(
62
+ os.environ.get("TOOL_MEMORY_LIMIT_MB", _default_memory_limit)
63
+ )
64
+
45
65
  STREAM_CHUNKS_PER_PARSE = int(
46
66
  os.environ.get("STREAM_CHUNKS_PER_PARSE", 80)
47
67
  ) # Empirical value with 6~ parsing calls. Consider using larger value if LLM response is long as to reduce markdown to section calls.
@@ -73,4 +93,39 @@ LOG_LLM_USAGE_RESPONSE = load_bool("LOG_LLM_USAGE_RESPONSE", False)
73
93
  # For CLI only, enable user approval for potentially sensitive commands that would otherwise be rejected
74
94
  ENABLE_CLI_TOOL_APPROVAL = load_bool("ENABLE_CLI_TOOL_APPROVAL", True)
75
95
 
76
- MAX_GRAPH_POINTS = float(os.environ.get("MAX_GRAPH_POINTS", 300))
96
+ MAX_GRAPH_POINTS = float(os.environ.get("MAX_GRAPH_POINTS", 100))
97
+
98
+ # Limit each tool response to N% of the total context window.
99
+ # Number between 0 and 100
100
+ # Setting to either 0 or any number above 100 disables the logic that limits tool response size
101
+ TOOL_MAX_ALLOCATED_CONTEXT_WINDOW_PCT = float(
102
+ os.environ.get("TOOL_MAX_ALLOCATED_CONTEXT_WINDOW_PCT", 15)
103
+ )
104
+
105
+ # Absolute max tokens to allocate for a single tool response
106
+ TOOL_MAX_ALLOCATED_CONTEXT_WINDOW_TOKENS = int(
107
+ os.environ.get("TOOL_MAX_ALLOCATED_CONTEXT_WINDOW_TOKENS", 25000)
108
+ )
109
+
110
+ MAX_EVIDENCE_DATA_CHARACTERS_BEFORE_TRUNCATION = int(
111
+ os.environ.get("MAX_EVIDENCE_DATA_CHARACTERS_BEFORE_TRUNCATION", 3000)
112
+ )
113
+
114
+ ENABLE_CONVERSATION_HISTORY_COMPACTION = load_bool(
115
+ "ENABLE_CONVERSATION_HISTORY_COMPACTION", default=True
116
+ )
117
+
118
+ DISABLE_PROMETHEUS_TOOLSET = load_bool("DISABLE_PROMETHEUS_TOOLSET", False)
119
+
120
+ RESET_REPEATED_TOOL_CALL_CHECK_AFTER_COMPACTION = load_bool(
121
+ "RESET_REPEATED_TOOL_CALL_CHECK_AFTER_COMPACTION", True
122
+ )
123
+
124
+ SSE_READ_TIMEOUT = float(os.environ.get("SSE_READ_TIMEOUT", "120"))
125
+
126
+ LLM_REQUEST_TIMEOUT = float(os.environ.get("LLM_REQUEST_TIMEOUT", "600"))
127
+
128
+ ENABLE_CONNECTION_KEEPALIVE = load_bool("ENABLE_CONNECTION_KEEPALIVE", False)
129
+ KEEPALIVE_IDLE = int(os.environ.get("KEEPALIVE_IDLE", 2))
130
+ KEEPALIVE_INTVL = int(os.environ.get("KEEPALIVE_INTVL", 2))
131
+ KEEPALIVE_CNT = int(os.environ.get("KEEPALIVE_CNT", 5))
@@ -1,5 +1,5 @@
1
- from typing import Optional
2
1
  import os
2
+ from typing import Optional
3
3
 
4
4
  # NOTE: This one will be mounted if openshift is enabled in values.yaml
5
5
  TOKEN_LOCATION = os.environ.get(
holmes/config.py CHANGED
@@ -1,4 +1,3 @@
1
- import json
2
1
  import logging
3
2
  import os
4
3
  import os.path
@@ -6,18 +5,12 @@ from enum import Enum
6
5
  from pathlib import Path
7
6
  from typing import TYPE_CHECKING, Any, List, Optional, Union
8
7
 
8
+ import sentry_sdk
9
9
  import yaml # type: ignore
10
- from pydantic import BaseModel, ConfigDict, FilePath, SecretStr
10
+ from pydantic import BaseModel, ConfigDict, FilePath, PrivateAttr, SecretStr
11
11
 
12
-
13
- from holmes.clients.robusta_client import fetch_robusta_models
14
- from holmes.core.llm import DefaultLLM
15
- from holmes.common.env_vars import (
16
- ROBUSTA_AI,
17
- LOAD_ALL_ROBUSTA_MODELS,
18
- ROBUSTA_API_ENDPOINT,
19
- ROBUSTA_CONFIG_PATH,
20
- )
12
+ from holmes.common.env_vars import ROBUSTA_CONFIG_PATH
13
+ from holmes.core.llm import DefaultLLM, LLMModelRegistry
21
14
  from holmes.core.tools_utils.tool_executor import ToolExecutor
22
15
  from holmes.core.toolset_manager import ToolsetManager
23
16
  from holmes.plugins.runbooks import (
@@ -29,7 +22,6 @@ from holmes.plugins.runbooks import (
29
22
 
30
23
  # Source plugin imports moved to their respective create methods to speed up startup
31
24
  if TYPE_CHECKING:
32
- from holmes.core.llm import LLM
33
25
  from holmes.core.tool_calling_llm import IssueInvestigator, ToolCallingLLM
34
26
  from holmes.plugins.destinations.slack import SlackDestination
35
27
  from holmes.plugins.sources.github import GitHubSource
@@ -38,18 +30,12 @@ if TYPE_CHECKING:
38
30
  from holmes.plugins.sources.pagerduty import PagerDutySource
39
31
  from holmes.plugins.sources.prometheus.plugin import AlertManagerSource
40
32
 
41
- from holmes.core.supabase_dal import SupabaseDal
42
33
  from holmes.core.config import config_path_dir
34
+ from holmes.core.supabase_dal import SupabaseDal
43
35
  from holmes.utils.definitions import RobustaConfig
44
- from holmes.utils.env import replace_env_vars_values
45
- from holmes.utils.file_utils import load_yaml_file
46
36
  from holmes.utils.pydantic_utils import RobustaBaseConfig, load_model_from_file
47
37
 
48
38
  DEFAULT_CONFIG_LOCATION = os.path.join(config_path_dir, "config.yaml")
49
- MODEL_LIST_FILE_LOCATION = os.environ.get(
50
- "MODEL_LIST_FILE_LOCATION", "/etc/holmes/config/model_list.yaml"
51
- )
52
- ROBUSTA_AI_MODEL_NAME = "Robusta"
53
39
 
54
40
 
55
41
  class SupportedTicketSources(str, Enum):
@@ -57,32 +43,14 @@ class SupportedTicketSources(str, Enum):
57
43
  PAGERDUTY = "pagerduty"
58
44
 
59
45
 
60
- def is_old_toolset_config(
61
- toolsets: Union[dict[str, dict[str, Any]], List[dict[str, Any]]],
62
- ) -> bool:
63
- # old config is a list of toolsets
64
- if isinstance(toolsets, list):
65
- return True
66
- return False
67
-
68
-
69
- def parse_models_file(path: str):
70
- models = load_yaml_file(path, raise_error=False, warn_not_found=False)
71
-
72
- for _, params in models.items():
73
- params = replace_env_vars_values(params)
74
-
75
- return models
76
-
77
-
78
46
  class Config(RobustaBaseConfig):
47
+ model: Optional[str] = None
79
48
  api_key: Optional[SecretStr] = (
80
49
  None # if None, read from OPENAI_API_KEY or AZURE_OPENAI_ENDPOINT env var
81
50
  )
82
- account_id: Optional[str] = None
83
- session_token: Optional[SecretStr] = None
84
-
85
- model: Optional[str] = "gpt-4o"
51
+ api_base: Optional[str] = None
52
+ api_version: Optional[str] = None
53
+ fast_model: Optional[str] = None
86
54
  max_steps: int = 40
87
55
  cluster_name: Optional[str] = None
88
56
 
@@ -116,6 +84,7 @@ class Config(RobustaBaseConfig):
116
84
  opsgenie_query: Optional[str] = None
117
85
 
118
86
  custom_runbooks: List[FilePath] = []
87
+ custom_runbook_catalogs: List[Union[str, FilePath]] = []
119
88
 
120
89
  # custom_toolsets is passed from config file, and be used to override built-in toolsets, provides 'stable' customized toolset.
121
90
  # The status of custom toolsets can be cached.
@@ -123,14 +92,19 @@ class Config(RobustaBaseConfig):
123
92
  # custom_toolsets_from_cli is passed from CLI option `--custom-toolsets` as 'experimental' custom toolsets.
124
93
  # The status of toolset here won't be cached, so the toolset from cli will always be loaded when specified in the CLI.
125
94
  custom_toolsets_from_cli: Optional[List[FilePath]] = None
126
- should_try_robusta_ai: bool = False # if True, we will try to load the Robusta AI model, in cli we aren't trying to load it.
95
+ # if True, we will try to load the Robusta AI model, in cli we aren't trying to load it.
96
+ should_try_robusta_ai: bool = False
127
97
 
128
98
  toolsets: Optional[dict[str, dict[str, Any]]] = None
129
99
  mcp_servers: Optional[dict[str, dict[str, Any]]] = None
130
100
 
131
101
  _server_tool_executor: Optional[ToolExecutor] = None
102
+ _agui_tool_executor: Optional[ToolExecutor] = None
132
103
 
133
- _toolset_manager: Optional[ToolsetManager] = None
104
+ # TODO: Separate those fields to facade class, this shouldn't be part of the config.
105
+ _toolset_manager: Optional[ToolsetManager] = PrivateAttr(None)
106
+ _llm_model_registry: Optional[LLMModelRegistry] = PrivateAttr(None)
107
+ _dal: Optional[SupabaseDal] = PrivateAttr(None)
134
108
 
135
109
  @property
136
110
  def toolset_manager(self) -> ToolsetManager:
@@ -140,80 +114,30 @@ class Config(RobustaBaseConfig):
140
114
  mcp_servers=self.mcp_servers,
141
115
  custom_toolsets=self.custom_toolsets,
142
116
  custom_toolsets_from_cli=self.custom_toolsets_from_cli,
117
+ global_fast_model=self.fast_model,
118
+ custom_runbook_catalogs=self.custom_runbook_catalogs,
143
119
  )
144
120
  return self._toolset_manager
145
121
 
146
- def model_post_init(self, __context: Any) -> None:
147
- self._model_list = parse_models_file(MODEL_LIST_FILE_LOCATION)
148
-
149
- if not self._should_load_robusta_ai():
150
- return
151
-
152
- self.configure_robusta_ai_model()
153
-
154
- def configure_robusta_ai_model(self) -> None:
155
- try:
156
- if not self.cluster_name or not LOAD_ALL_ROBUSTA_MODELS:
157
- self._load_default_robusta_config()
158
- return
159
-
160
- if not self.api_key:
161
- dal = SupabaseDal(self.cluster_name)
162
- self.load_robusta_api_key(dal)
163
-
164
- if not self.account_id or not self.session_token:
165
- self._load_default_robusta_config()
166
- return
122
+ @property
123
+ def dal(self) -> SupabaseDal:
124
+ if not self._dal:
125
+ self._dal = SupabaseDal(self.cluster_name) # type: ignore
126
+ return self._dal
167
127
 
168
- models = fetch_robusta_models(
169
- self.account_id, self.session_token.get_secret_value()
170
- )
171
- if not models:
172
- self._load_default_robusta_config()
173
- return
174
-
175
- for model in models:
176
- logging.info(f"Loading Robusta AI model: {model}")
177
- self._model_list[model] = {
178
- "base_url": f"{ROBUSTA_API_ENDPOINT}/llm/{model}",
179
- "is_robusta_model": True,
180
- }
181
-
182
- except Exception:
183
- logging.exception("Failed to get all robusta models")
184
- # fallback to default behavior
185
- self._load_default_robusta_config()
186
-
187
- def _load_default_robusta_config(self):
188
- if self._should_load_robusta_ai() and self.api_key:
189
- logging.info("Loading default Robusta AI model")
190
- self._model_list[ROBUSTA_AI_MODEL_NAME] = {
191
- "base_url": ROBUSTA_API_ENDPOINT,
192
- "is_robusta_model": True,
193
- }
194
-
195
- def _should_load_robusta_ai(self) -> bool:
196
- if not self.should_try_robusta_ai:
197
- return False
198
-
199
- # ROBUSTA_AI were set in the env vars, so we can use it directly
200
- if ROBUSTA_AI is not None:
201
- return ROBUSTA_AI
202
-
203
- # MODEL is set in the env vars, e.g. the user is using a custom model
204
- # so we don't need to load the robusta AI model and keep the behavior backward compatible
205
- if "MODEL" in os.environ:
206
- return False
207
-
208
- # if the user has provided a model list, we don't need to load the robusta AI model
209
- if self._model_list:
210
- return False
211
-
212
- return True
128
+ @property
129
+ def llm_model_registry(self) -> LLMModelRegistry:
130
+ if not self._llm_model_registry:
131
+ self._llm_model_registry = LLMModelRegistry(self, dal=self.dal)
132
+ return self._llm_model_registry
213
133
 
214
134
  def log_useful_info(self):
215
- if self._model_list:
216
- logging.info(f"loaded models: {list(self._model_list.keys())}")
135
+ if self.llm_model_registry.models:
136
+ logging.info(
137
+ f"Loaded models: {list(self.llm_model_registry.models.keys())}"
138
+ )
139
+ else:
140
+ logging.warning("No llm models were loaded")
217
141
 
218
142
  @classmethod
219
143
  def load_from_file(cls, config_file: Optional[Path], **kwargs) -> "Config":
@@ -227,6 +151,7 @@ class Config(RobustaBaseConfig):
227
151
  Returns:
228
152
  Config instance with merged settings
229
153
  """
154
+
230
155
  config_from_file: Optional[Config] = None
231
156
  if config_file is not None and config_file.exists():
232
157
  logging.debug(f"Loading config from {config_file}")
@@ -250,7 +175,10 @@ class Config(RobustaBaseConfig):
250
175
  kwargs = {}
251
176
  for field_name in [
252
177
  "model",
178
+ "fast_model",
253
179
  "api_key",
180
+ "api_base",
181
+ "api_version",
254
182
  "max_steps",
255
183
  "alertmanager_url",
256
184
  "alertmanager_username",
@@ -297,10 +225,10 @@ class Config(RobustaBaseConfig):
297
225
 
298
226
  return None
299
227
 
300
- @staticmethod
301
- def get_runbook_catalog() -> Optional[RunbookCatalog]:
302
- # TODO(mainred): besides the built-in runbooks, we need to allow the user to bring their own runbooks
303
- runbook_catalog = load_runbook_catalog()
228
+ def get_runbook_catalog(self) -> Optional[RunbookCatalog]:
229
+ runbook_catalog = load_runbook_catalog(
230
+ dal=self.dal, custom_catalog_paths=self.custom_runbook_catalogs
231
+ )
304
232
  return runbook_catalog
305
233
 
306
234
  def create_console_tool_executor(
@@ -320,6 +248,23 @@ class Config(RobustaBaseConfig):
320
248
  )
321
249
  return ToolExecutor(cli_toolsets)
322
250
 
251
+ def create_agui_tool_executor(self, dal: Optional["SupabaseDal"]) -> ToolExecutor:
252
+ """
253
+ Creates ToolExecutor for the AG-UI server endpoints
254
+ """
255
+
256
+ if self._agui_tool_executor:
257
+ return self._agui_tool_executor
258
+
259
+ # Use same toolset as CLI for AG-UI front-end.
260
+ agui_toolsets = self.toolset_manager.list_console_toolsets(
261
+ dal=dal, refresh_status=True
262
+ )
263
+
264
+ self._agui_tool_executor = ToolExecutor(agui_toolsets)
265
+
266
+ return self._agui_tool_executor
267
+
323
268
  def create_tool_executor(self, dal: Optional["SupabaseDal"]) -> ToolExecutor:
324
269
  """
325
270
  Creates ToolExecutor for the server endpoints
@@ -343,12 +288,28 @@ class Config(RobustaBaseConfig):
343
288
  dal: Optional["SupabaseDal"] = None,
344
289
  refresh_toolsets: bool = False,
345
290
  tracer=None,
291
+ model_name: Optional[str] = None,
346
292
  ) -> "ToolCallingLLM":
347
293
  tool_executor = self.create_console_tool_executor(dal, refresh_toolsets)
348
294
  from holmes.core.tool_calling_llm import ToolCallingLLM
349
295
 
350
296
  return ToolCallingLLM(
351
- tool_executor, self.max_steps, self._get_llm(tracer=tracer)
297
+ tool_executor,
298
+ self.max_steps,
299
+ self._get_llm(tracer=tracer, model_key=model_name),
300
+ )
301
+
302
+ def create_agui_toolcalling_llm(
303
+ self,
304
+ dal: Optional["SupabaseDal"] = None,
305
+ model: Optional[str] = None,
306
+ tracer=None,
307
+ ) -> "ToolCallingLLM":
308
+ tool_executor = self.create_agui_tool_executor(dal)
309
+ from holmes.core.tool_calling_llm import ToolCallingLLM
310
+
311
+ return ToolCallingLLM(
312
+ tool_executor, self.max_steps, self._get_llm(model, tracer)
352
313
  )
353
314
 
354
315
  def create_toolcalling_llm(
@@ -389,7 +350,7 @@ class Config(RobustaBaseConfig):
389
350
  )
390
351
 
391
352
  def create_console_issue_investigator(
392
- self, dal: Optional["SupabaseDal"] = None
353
+ self, dal: Optional["SupabaseDal"] = None, model_name: Optional[str] = None
393
354
  ) -> "IssueInvestigator":
394
355
  all_runbooks = load_builtin_runbooks()
395
356
  for runbook_path in self.custom_runbooks:
@@ -405,7 +366,7 @@ class Config(RobustaBaseConfig):
405
366
  tool_executor=tool_executor,
406
367
  runbook_manager=runbook_manager,
407
368
  max_steps=self.max_steps,
408
- llm=self._get_llm(),
369
+ llm=self._get_llm(model_key=model_name),
409
370
  cluster_name=self.cluster_name,
410
371
  )
411
372
 
@@ -516,39 +477,53 @@ class Config(RobustaBaseConfig):
516
477
  raise ValueError("--slack-channel must be specified")
517
478
  return SlackDestination(self.slack_token.get_secret_value(), self.slack_channel)
518
479
 
519
- def _get_llm(self, model_key: Optional[str] = None, tracer=None) -> "LLM":
520
- api_key: Optional[str] = None
521
- model = self.model
522
- model_params = {}
523
- if self._model_list:
524
- # get requested model or the first credentials if no model requested.
525
- model_params = (
526
- self._model_list.get(model_key, {}).copy()
527
- if model_key
528
- else next(iter(self._model_list.values())).copy()
529
- )
530
- is_robusta_model = model_params.pop("is_robusta_model", False)
531
- if is_robusta_model and self.api_key:
532
- # we set here the api_key since it is being refresh when exprided and not as part of the model loading.
533
- api_key = self.api_key.get_secret_value()
534
- else:
535
- api_key = model_params.pop("api_key", api_key)
536
- model = model_params.pop("model", model)
537
-
538
- return DefaultLLM(model, api_key, model_params, tracer) # type: ignore
480
+ # TODO: move this to the llm model registry
481
+ def _get_llm(self, model_key: Optional[str] = None, tracer=None) -> "DefaultLLM":
482
+ sentry_sdk.set_tag("requested_model", model_key)
483
+ model_entry = self.llm_model_registry.get_model_params(model_key)
484
+ model_params = model_entry.model_dump(exclude_none=True)
485
+ api_base = self.api_base
486
+ api_version = self.api_version
487
+ is_robusta_model = model_params.pop("is_robusta_model", False)
488
+ sentry_sdk.set_tag("is_robusta_model", is_robusta_model)
489
+ if is_robusta_model:
490
+ # we set here the api_key since it is being refresh when exprided and not as part of the model loading.
491
+ account_id, token = self.dal.get_ai_credentials()
492
+ api_key = f"{account_id} {token}"
493
+ else:
494
+ api_key = model_params.pop("api_key", None)
495
+ if api_key is not None:
496
+ api_key = api_key.get_secret_value()
497
+
498
+ model = model_params.pop("model")
499
+ # It's ok if the model does not have api base and api version, which are defaults to None.
500
+ # Handle both api_base and base_url - api_base takes precedence
501
+ model_api_base = model_params.pop("api_base", None)
502
+ model_base_url = model_params.pop("base_url", None)
503
+ api_base = model_api_base or model_base_url or api_base
504
+ api_version = model_params.pop("api_version", api_version)
505
+ model_name = model_params.pop("name", None) or model_key or model
506
+ sentry_sdk.set_tag("model_name", model_name)
507
+ llm = DefaultLLM(
508
+ model=model,
509
+ api_key=api_key,
510
+ api_base=api_base,
511
+ api_version=api_version,
512
+ args=model_params,
513
+ tracer=tracer,
514
+ name=model_name,
515
+ is_robusta_model=is_robusta_model,
516
+ ) # type: ignore
517
+ logging.info(
518
+ f"Using model: {model_name} ({llm.get_context_window_size():,} total tokens, {llm.get_maximum_output_token():,} output tokens)"
519
+ )
520
+ return llm
539
521
 
540
522
  def get_models_list(self) -> List[str]:
541
- if self._model_list:
542
- return json.dumps(list(self._model_list.keys())) # type: ignore
543
-
544
- return json.dumps([self.model]) # type: ignore
523
+ if self.llm_model_registry and self.llm_model_registry.models:
524
+ return list(self.llm_model_registry.models.keys())
545
525
 
546
- def load_robusta_api_key(self, dal: SupabaseDal):
547
- if ROBUSTA_AI:
548
- account_id, token = dal.get_ai_credentials()
549
- self.api_key = SecretStr(f"{account_id} {token}")
550
- self.account_id = account_id
551
- self.session_token = SecretStr(token)
526
+ return []
552
527
 
553
528
 
554
529
  class TicketSource(BaseModel):