holmesgpt 0.13.2__py3-none-any.whl → 0.16.2a0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- holmes/__init__.py +1 -1
- holmes/clients/robusta_client.py +17 -4
- holmes/common/env_vars.py +40 -1
- holmes/config.py +114 -144
- holmes/core/conversations.py +53 -14
- holmes/core/feedback.py +191 -0
- holmes/core/investigation.py +18 -22
- holmes/core/llm.py +489 -88
- holmes/core/models.py +103 -1
- holmes/core/openai_formatting.py +13 -0
- holmes/core/prompt.py +1 -1
- holmes/core/safeguards.py +4 -4
- holmes/core/supabase_dal.py +293 -100
- holmes/core/tool_calling_llm.py +423 -323
- holmes/core/tools.py +311 -33
- holmes/core/tools_utils/token_counting.py +14 -0
- holmes/core/tools_utils/tool_context_window_limiter.py +57 -0
- holmes/core/tools_utils/tool_executor.py +13 -8
- holmes/core/toolset_manager.py +155 -4
- holmes/core/tracing.py +6 -1
- holmes/core/transformers/__init__.py +23 -0
- holmes/core/transformers/base.py +62 -0
- holmes/core/transformers/llm_summarize.py +174 -0
- holmes/core/transformers/registry.py +122 -0
- holmes/core/transformers/transformer.py +31 -0
- holmes/core/truncation/compaction.py +59 -0
- holmes/core/truncation/dal_truncation_utils.py +23 -0
- holmes/core/truncation/input_context_window_limiter.py +218 -0
- holmes/interactive.py +177 -24
- holmes/main.py +7 -4
- holmes/plugins/prompts/_fetch_logs.jinja2 +26 -1
- holmes/plugins/prompts/_general_instructions.jinja2 +1 -2
- holmes/plugins/prompts/_runbook_instructions.jinja2 +23 -12
- holmes/plugins/prompts/conversation_history_compaction.jinja2 +88 -0
- holmes/plugins/prompts/generic_ask.jinja2 +2 -4
- holmes/plugins/prompts/generic_ask_conversation.jinja2 +2 -1
- holmes/plugins/prompts/generic_ask_for_issue_conversation.jinja2 +2 -1
- holmes/plugins/prompts/generic_investigation.jinja2 +2 -1
- holmes/plugins/prompts/investigation_procedure.jinja2 +48 -0
- holmes/plugins/prompts/kubernetes_workload_ask.jinja2 +2 -1
- holmes/plugins/prompts/kubernetes_workload_chat.jinja2 +2 -1
- holmes/plugins/runbooks/__init__.py +117 -18
- holmes/plugins/runbooks/catalog.json +2 -0
- holmes/plugins/toolsets/__init__.py +21 -8
- holmes/plugins/toolsets/aks-node-health.yaml +46 -0
- holmes/plugins/toolsets/aks.yaml +64 -0
- holmes/plugins/toolsets/atlas_mongodb/mongodb_atlas.py +26 -36
- holmes/plugins/toolsets/azure_sql/azure_sql_toolset.py +0 -1
- holmes/plugins/toolsets/azure_sql/tools/analyze_connection_failures.py +10 -7
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_connections.py +9 -6
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_health_status.py +8 -6
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_performance.py +8 -6
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_storage.py +9 -6
- holmes/plugins/toolsets/azure_sql/tools/get_active_alerts.py +9 -7
- holmes/plugins/toolsets/azure_sql/tools/get_slow_queries.py +9 -6
- holmes/plugins/toolsets/azure_sql/tools/get_top_cpu_queries.py +9 -6
- holmes/plugins/toolsets/azure_sql/tools/get_top_data_io_queries.py +9 -6
- holmes/plugins/toolsets/azure_sql/tools/get_top_log_io_queries.py +9 -6
- holmes/plugins/toolsets/bash/bash_toolset.py +10 -13
- holmes/plugins/toolsets/bash/common/bash.py +7 -7
- holmes/plugins/toolsets/cilium.yaml +284 -0
- holmes/plugins/toolsets/coralogix/toolset_coralogix_logs.py +5 -3
- holmes/plugins/toolsets/datadog/datadog_api.py +490 -24
- holmes/plugins/toolsets/datadog/datadog_logs_instructions.jinja2 +21 -10
- holmes/plugins/toolsets/datadog/toolset_datadog_general.py +349 -216
- holmes/plugins/toolsets/datadog/toolset_datadog_logs.py +190 -19
- holmes/plugins/toolsets/datadog/toolset_datadog_metrics.py +101 -44
- holmes/plugins/toolsets/datadog/toolset_datadog_rds.py +13 -16
- holmes/plugins/toolsets/datadog/toolset_datadog_traces.py +25 -31
- holmes/plugins/toolsets/git.py +51 -46
- holmes/plugins/toolsets/grafana/common.py +15 -3
- holmes/plugins/toolsets/grafana/grafana_api.py +46 -24
- holmes/plugins/toolsets/grafana/grafana_tempo_api.py +454 -0
- holmes/plugins/toolsets/grafana/loki/instructions.jinja2 +9 -0
- holmes/plugins/toolsets/grafana/loki/toolset_grafana_loki.py +117 -0
- holmes/plugins/toolsets/grafana/toolset_grafana.py +211 -91
- holmes/plugins/toolsets/grafana/toolset_grafana_dashboard.jinja2 +27 -0
- holmes/plugins/toolsets/grafana/toolset_grafana_tempo.jinja2 +246 -11
- holmes/plugins/toolsets/grafana/toolset_grafana_tempo.py +653 -293
- holmes/plugins/toolsets/grafana/trace_parser.py +1 -1
- holmes/plugins/toolsets/internet/internet.py +6 -7
- holmes/plugins/toolsets/internet/notion.py +5 -6
- holmes/plugins/toolsets/investigator/core_investigation.py +42 -34
- holmes/plugins/toolsets/kafka.py +25 -36
- holmes/plugins/toolsets/kubernetes.yaml +58 -84
- holmes/plugins/toolsets/kubernetes_logs.py +6 -6
- holmes/plugins/toolsets/kubernetes_logs.yaml +32 -0
- holmes/plugins/toolsets/logging_utils/logging_api.py +80 -4
- holmes/plugins/toolsets/mcp/toolset_mcp.py +181 -55
- holmes/plugins/toolsets/newrelic/__init__.py +0 -0
- holmes/plugins/toolsets/newrelic/new_relic_api.py +125 -0
- holmes/plugins/toolsets/newrelic/newrelic.jinja2 +41 -0
- holmes/plugins/toolsets/newrelic/newrelic.py +163 -0
- holmes/plugins/toolsets/opensearch/opensearch.py +10 -17
- holmes/plugins/toolsets/opensearch/opensearch_logs.py +7 -7
- holmes/plugins/toolsets/opensearch/opensearch_ppl_query_docs.jinja2 +1616 -0
- holmes/plugins/toolsets/opensearch/opensearch_query_assist.py +78 -0
- holmes/plugins/toolsets/opensearch/opensearch_query_assist_instructions.jinja2 +223 -0
- holmes/plugins/toolsets/opensearch/opensearch_traces.py +13 -16
- holmes/plugins/toolsets/openshift.yaml +283 -0
- holmes/plugins/toolsets/prometheus/prometheus.py +915 -390
- holmes/plugins/toolsets/prometheus/prometheus_instructions.jinja2 +43 -2
- holmes/plugins/toolsets/prometheus/utils.py +28 -0
- holmes/plugins/toolsets/rabbitmq/toolset_rabbitmq.py +9 -10
- holmes/plugins/toolsets/robusta/robusta.py +236 -65
- holmes/plugins/toolsets/robusta/robusta_instructions.jinja2 +26 -9
- holmes/plugins/toolsets/runbook/runbook_fetcher.py +137 -26
- holmes/plugins/toolsets/service_discovery.py +1 -1
- holmes/plugins/toolsets/servicenow_tables/instructions.jinja2 +83 -0
- holmes/plugins/toolsets/servicenow_tables/servicenow_tables.py +426 -0
- holmes/plugins/toolsets/utils.py +88 -0
- holmes/utils/config_utils.py +91 -0
- holmes/utils/default_toolset_installation_guide.jinja2 +1 -22
- holmes/utils/env.py +7 -0
- holmes/utils/global_instructions.py +75 -10
- holmes/utils/holmes_status.py +2 -1
- holmes/utils/holmes_sync_toolsets.py +0 -2
- holmes/utils/krr_utils.py +188 -0
- holmes/utils/sentry_helper.py +41 -0
- holmes/utils/stream.py +61 -7
- holmes/version.py +34 -14
- holmesgpt-0.16.2a0.dist-info/LICENSE +178 -0
- {holmesgpt-0.13.2.dist-info → holmesgpt-0.16.2a0.dist-info}/METADATA +29 -27
- {holmesgpt-0.13.2.dist-info → holmesgpt-0.16.2a0.dist-info}/RECORD +126 -102
- holmes/core/performance_timing.py +0 -72
- holmes/plugins/toolsets/grafana/tempo_api.py +0 -124
- holmes/plugins/toolsets/grafana/toolset_grafana_loki.py +0 -110
- holmes/plugins/toolsets/newrelic.py +0 -231
- holmes/plugins/toolsets/servicenow/install.md +0 -37
- holmes/plugins/toolsets/servicenow/instructions.jinja2 +0 -3
- holmes/plugins/toolsets/servicenow/servicenow.py +0 -219
- holmesgpt-0.13.2.dist-info/LICENSE.txt +0 -21
- {holmesgpt-0.13.2.dist-info → holmesgpt-0.16.2a0.dist-info}/WHEEL +0 -0
- {holmesgpt-0.13.2.dist-info → holmesgpt-0.16.2a0.dist-info}/entry_points.txt +0 -0
holmes/__init__.py
CHANGED
holmes/clients/robusta_client.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import logging
|
|
2
|
-
from typing import
|
|
2
|
+
from typing import Optional, Dict, Any
|
|
3
3
|
import requests # type: ignore
|
|
4
4
|
from functools import cache
|
|
5
5
|
from pydantic import BaseModel, ConfigDict
|
|
@@ -14,18 +14,31 @@ class HolmesInfo(BaseModel):
|
|
|
14
14
|
latest_version: Optional[str] = None
|
|
15
15
|
|
|
16
16
|
|
|
17
|
+
class RobustaModel(BaseModel):
|
|
18
|
+
model_config = ConfigDict(extra="ignore")
|
|
19
|
+
model: str
|
|
20
|
+
holmes_args: Optional[dict[str, Any]] = None
|
|
21
|
+
is_default: bool = False
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class RobustaModelsResponse(BaseModel):
|
|
25
|
+
models: Dict[str, RobustaModel]
|
|
26
|
+
|
|
27
|
+
|
|
17
28
|
@cache
|
|
18
|
-
def fetch_robusta_models(
|
|
29
|
+
def fetch_robusta_models(
|
|
30
|
+
account_id: str, token: str
|
|
31
|
+
) -> Optional[RobustaModelsResponse]:
|
|
19
32
|
try:
|
|
20
33
|
session_request = {"session_token": token, "account_id": account_id}
|
|
21
34
|
resp = requests.post(
|
|
22
|
-
f"{ROBUSTA_API_ENDPOINT}/api/llm/models",
|
|
35
|
+
f"{ROBUSTA_API_ENDPOINT}/api/llm/models/v2",
|
|
23
36
|
json=session_request,
|
|
24
37
|
timeout=10,
|
|
25
38
|
)
|
|
26
39
|
resp.raise_for_status()
|
|
27
40
|
response_json = resp.json()
|
|
28
|
-
return
|
|
41
|
+
return RobustaModelsResponse(**{"models": response_json})
|
|
29
42
|
except Exception:
|
|
30
43
|
logging.exception("Failed to fetch robusta models")
|
|
31
44
|
return None
|
holmes/common/env_vars.py
CHANGED
|
@@ -2,6 +2,16 @@ import os
|
|
|
2
2
|
import json
|
|
3
3
|
from typing import Optional
|
|
4
4
|
|
|
5
|
+
# Recommended models for different providers
|
|
6
|
+
RECOMMENDED_OPENAI_MODEL = "gpt-4.1"
|
|
7
|
+
RECOMMENDED_ANTHROPIC_MODEL = "anthropic/claude-opus-4-1-20250805"
|
|
8
|
+
|
|
9
|
+
# Default model for HolmesGPT
|
|
10
|
+
DEFAULT_MODEL = RECOMMENDED_OPENAI_MODEL
|
|
11
|
+
FALLBACK_CONTEXT_WINDOW_SIZE = (
|
|
12
|
+
200000 # Fallback context window size if it can't be determined from the model
|
|
13
|
+
)
|
|
14
|
+
|
|
5
15
|
|
|
6
16
|
def load_bool(env_var, default: Optional[bool]) -> Optional[bool]:
|
|
7
17
|
env_value = os.environ.get(env_var)
|
|
@@ -38,6 +48,7 @@ DEVELOPMENT_MODE = load_bool("DEVELOPMENT_MODE", False)
|
|
|
38
48
|
SENTRY_DSN = os.environ.get("SENTRY_DSN", "")
|
|
39
49
|
SENTRY_TRACES_SAMPLE_RATE = float(os.environ.get("SENTRY_TRACES_SAMPLE_RATE", "0.0"))
|
|
40
50
|
|
|
51
|
+
EXTRA_HEADERS = os.environ.get("EXTRA_HEADERS", "")
|
|
41
52
|
THINKING = os.environ.get("THINKING", "")
|
|
42
53
|
REASONING_EFFORT = os.environ.get("REASONING_EFFORT", "").strip().lower()
|
|
43
54
|
TEMPERATURE = float(os.environ.get("TEMPERATURE", "0.00000001"))
|
|
@@ -73,4 +84,32 @@ LOG_LLM_USAGE_RESPONSE = load_bool("LOG_LLM_USAGE_RESPONSE", False)
|
|
|
73
84
|
# For CLI only, enable user approval for potentially sensitive commands that would otherwise be rejected
|
|
74
85
|
ENABLE_CLI_TOOL_APPROVAL = load_bool("ENABLE_CLI_TOOL_APPROVAL", True)
|
|
75
86
|
|
|
76
|
-
MAX_GRAPH_POINTS = float(os.environ.get("MAX_GRAPH_POINTS",
|
|
87
|
+
MAX_GRAPH_POINTS = float(os.environ.get("MAX_GRAPH_POINTS", 100))
|
|
88
|
+
|
|
89
|
+
# Limit each tool response to N% of the total context window.
|
|
90
|
+
# Number between 0 and 100
|
|
91
|
+
# Setting to either 0 or any number above 100 disables the logic that limits tool response size
|
|
92
|
+
TOOL_MAX_ALLOCATED_CONTEXT_WINDOW_PCT = float(
|
|
93
|
+
os.environ.get("TOOL_MAX_ALLOCATED_CONTEXT_WINDOW_PCT", 15)
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
# Absolute max tokens to allocate for a single tool response
|
|
97
|
+
TOOL_MAX_ALLOCATED_CONTEXT_WINDOW_TOKENS = int(
|
|
98
|
+
os.environ.get("TOOL_MAX_ALLOCATED_CONTEXT_WINDOW_TOKENS", 25000)
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
MAX_EVIDENCE_DATA_CHARACTERS_BEFORE_TRUNCATION = int(
|
|
102
|
+
os.environ.get("MAX_EVIDENCE_DATA_CHARACTERS_BEFORE_TRUNCATION", 3000)
|
|
103
|
+
)
|
|
104
|
+
|
|
105
|
+
ENABLE_CONVERSATION_HISTORY_COMPACTION = load_bool(
|
|
106
|
+
"ENABLE_CONVERSATION_HISTORY_COMPACTION", default=True
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
DISABLE_PROMETHEUS_TOOLSET = load_bool("DISABLE_PROMETHEUS_TOOLSET", False)
|
|
110
|
+
|
|
111
|
+
RESET_REPEATED_TOOL_CALL_CHECK_AFTER_COMPACTION = load_bool(
|
|
112
|
+
"RESET_REPEATED_TOOL_CALL_CHECK_AFTER_COMPACTION", True
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
SSE_READ_TIMEOUT = float(os.environ.get("SSE_READ_TIMEOUT", "120"))
|
holmes/config.py
CHANGED
|
@@ -1,4 +1,3 @@
|
|
|
1
|
-
import json
|
|
2
1
|
import logging
|
|
3
2
|
import os
|
|
4
3
|
import os.path
|
|
@@ -6,18 +5,12 @@ from enum import Enum
|
|
|
6
5
|
from pathlib import Path
|
|
7
6
|
from typing import TYPE_CHECKING, Any, List, Optional, Union
|
|
8
7
|
|
|
8
|
+
import sentry_sdk
|
|
9
9
|
import yaml # type: ignore
|
|
10
|
-
from pydantic import BaseModel, ConfigDict, FilePath, SecretStr
|
|
10
|
+
from pydantic import BaseModel, ConfigDict, FilePath, PrivateAttr, SecretStr
|
|
11
11
|
|
|
12
|
-
|
|
13
|
-
from holmes.
|
|
14
|
-
from holmes.core.llm import DefaultLLM
|
|
15
|
-
from holmes.common.env_vars import (
|
|
16
|
-
ROBUSTA_AI,
|
|
17
|
-
LOAD_ALL_ROBUSTA_MODELS,
|
|
18
|
-
ROBUSTA_API_ENDPOINT,
|
|
19
|
-
ROBUSTA_CONFIG_PATH,
|
|
20
|
-
)
|
|
12
|
+
from holmes.common.env_vars import ROBUSTA_CONFIG_PATH
|
|
13
|
+
from holmes.core.llm import DefaultLLM, LLMModelRegistry
|
|
21
14
|
from holmes.core.tools_utils.tool_executor import ToolExecutor
|
|
22
15
|
from holmes.core.toolset_manager import ToolsetManager
|
|
23
16
|
from holmes.plugins.runbooks import (
|
|
@@ -29,7 +22,6 @@ from holmes.plugins.runbooks import (
|
|
|
29
22
|
|
|
30
23
|
# Source plugin imports moved to their respective create methods to speed up startup
|
|
31
24
|
if TYPE_CHECKING:
|
|
32
|
-
from holmes.core.llm import LLM
|
|
33
25
|
from holmes.core.tool_calling_llm import IssueInvestigator, ToolCallingLLM
|
|
34
26
|
from holmes.plugins.destinations.slack import SlackDestination
|
|
35
27
|
from holmes.plugins.sources.github import GitHubSource
|
|
@@ -38,18 +30,12 @@ if TYPE_CHECKING:
|
|
|
38
30
|
from holmes.plugins.sources.pagerduty import PagerDutySource
|
|
39
31
|
from holmes.plugins.sources.prometheus.plugin import AlertManagerSource
|
|
40
32
|
|
|
41
|
-
from holmes.core.supabase_dal import SupabaseDal
|
|
42
33
|
from holmes.core.config import config_path_dir
|
|
34
|
+
from holmes.core.supabase_dal import SupabaseDal
|
|
43
35
|
from holmes.utils.definitions import RobustaConfig
|
|
44
|
-
from holmes.utils.env import replace_env_vars_values
|
|
45
|
-
from holmes.utils.file_utils import load_yaml_file
|
|
46
36
|
from holmes.utils.pydantic_utils import RobustaBaseConfig, load_model_from_file
|
|
47
37
|
|
|
48
38
|
DEFAULT_CONFIG_LOCATION = os.path.join(config_path_dir, "config.yaml")
|
|
49
|
-
MODEL_LIST_FILE_LOCATION = os.environ.get(
|
|
50
|
-
"MODEL_LIST_FILE_LOCATION", "/etc/holmes/config/model_list.yaml"
|
|
51
|
-
)
|
|
52
|
-
ROBUSTA_AI_MODEL_NAME = "Robusta"
|
|
53
39
|
|
|
54
40
|
|
|
55
41
|
class SupportedTicketSources(str, Enum):
|
|
@@ -57,32 +43,14 @@ class SupportedTicketSources(str, Enum):
|
|
|
57
43
|
PAGERDUTY = "pagerduty"
|
|
58
44
|
|
|
59
45
|
|
|
60
|
-
def is_old_toolset_config(
|
|
61
|
-
toolsets: Union[dict[str, dict[str, Any]], List[dict[str, Any]]],
|
|
62
|
-
) -> bool:
|
|
63
|
-
# old config is a list of toolsets
|
|
64
|
-
if isinstance(toolsets, list):
|
|
65
|
-
return True
|
|
66
|
-
return False
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
def parse_models_file(path: str):
|
|
70
|
-
models = load_yaml_file(path, raise_error=False, warn_not_found=False)
|
|
71
|
-
|
|
72
|
-
for _, params in models.items():
|
|
73
|
-
params = replace_env_vars_values(params)
|
|
74
|
-
|
|
75
|
-
return models
|
|
76
|
-
|
|
77
|
-
|
|
78
46
|
class Config(RobustaBaseConfig):
|
|
47
|
+
model: Optional[str] = None
|
|
79
48
|
api_key: Optional[SecretStr] = (
|
|
80
49
|
None # if None, read from OPENAI_API_KEY or AZURE_OPENAI_ENDPOINT env var
|
|
81
50
|
)
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
model: Optional[str] = "gpt-4o"
|
|
51
|
+
api_base: Optional[str] = None
|
|
52
|
+
api_version: Optional[str] = None
|
|
53
|
+
fast_model: Optional[str] = None
|
|
86
54
|
max_steps: int = 40
|
|
87
55
|
cluster_name: Optional[str] = None
|
|
88
56
|
|
|
@@ -123,14 +91,19 @@ class Config(RobustaBaseConfig):
|
|
|
123
91
|
# custom_toolsets_from_cli is passed from CLI option `--custom-toolsets` as 'experimental' custom toolsets.
|
|
124
92
|
# The status of toolset here won't be cached, so the toolset from cli will always be loaded when specified in the CLI.
|
|
125
93
|
custom_toolsets_from_cli: Optional[List[FilePath]] = None
|
|
126
|
-
|
|
94
|
+
# if True, we will try to load the Robusta AI model, in cli we aren't trying to load it.
|
|
95
|
+
should_try_robusta_ai: bool = False
|
|
127
96
|
|
|
128
97
|
toolsets: Optional[dict[str, dict[str, Any]]] = None
|
|
129
98
|
mcp_servers: Optional[dict[str, dict[str, Any]]] = None
|
|
130
99
|
|
|
131
100
|
_server_tool_executor: Optional[ToolExecutor] = None
|
|
101
|
+
_agui_tool_executor: Optional[ToolExecutor] = None
|
|
132
102
|
|
|
133
|
-
|
|
103
|
+
# TODO: Separate those fields to facade class, this shouldn't be part of the config.
|
|
104
|
+
_toolset_manager: Optional[ToolsetManager] = PrivateAttr(None)
|
|
105
|
+
_llm_model_registry: Optional[LLMModelRegistry] = PrivateAttr(None)
|
|
106
|
+
_dal: Optional[SupabaseDal] = PrivateAttr(None)
|
|
134
107
|
|
|
135
108
|
@property
|
|
136
109
|
def toolset_manager(self) -> ToolsetManager:
|
|
@@ -140,80 +113,29 @@ class Config(RobustaBaseConfig):
|
|
|
140
113
|
mcp_servers=self.mcp_servers,
|
|
141
114
|
custom_toolsets=self.custom_toolsets,
|
|
142
115
|
custom_toolsets_from_cli=self.custom_toolsets_from_cli,
|
|
116
|
+
global_fast_model=self.fast_model,
|
|
143
117
|
)
|
|
144
118
|
return self._toolset_manager
|
|
145
119
|
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
self.configure_robusta_ai_model()
|
|
153
|
-
|
|
154
|
-
def configure_robusta_ai_model(self) -> None:
|
|
155
|
-
try:
|
|
156
|
-
if not self.cluster_name or not LOAD_ALL_ROBUSTA_MODELS:
|
|
157
|
-
self._load_default_robusta_config()
|
|
158
|
-
return
|
|
159
|
-
|
|
160
|
-
if not self.api_key:
|
|
161
|
-
dal = SupabaseDal(self.cluster_name)
|
|
162
|
-
self.load_robusta_api_key(dal)
|
|
163
|
-
|
|
164
|
-
if not self.account_id or not self.session_token:
|
|
165
|
-
self._load_default_robusta_config()
|
|
166
|
-
return
|
|
120
|
+
@property
|
|
121
|
+
def dal(self) -> SupabaseDal:
|
|
122
|
+
if not self._dal:
|
|
123
|
+
self._dal = SupabaseDal(self.cluster_name) # type: ignore
|
|
124
|
+
return self._dal
|
|
167
125
|
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
return
|
|
174
|
-
|
|
175
|
-
for model in models:
|
|
176
|
-
logging.info(f"Loading Robusta AI model: {model}")
|
|
177
|
-
self._model_list[model] = {
|
|
178
|
-
"base_url": f"{ROBUSTA_API_ENDPOINT}/llm/{model}",
|
|
179
|
-
"is_robusta_model": True,
|
|
180
|
-
}
|
|
181
|
-
|
|
182
|
-
except Exception:
|
|
183
|
-
logging.exception("Failed to get all robusta models")
|
|
184
|
-
# fallback to default behavior
|
|
185
|
-
self._load_default_robusta_config()
|
|
186
|
-
|
|
187
|
-
def _load_default_robusta_config(self):
|
|
188
|
-
if self._should_load_robusta_ai() and self.api_key:
|
|
189
|
-
logging.info("Loading default Robusta AI model")
|
|
190
|
-
self._model_list[ROBUSTA_AI_MODEL_NAME] = {
|
|
191
|
-
"base_url": ROBUSTA_API_ENDPOINT,
|
|
192
|
-
"is_robusta_model": True,
|
|
193
|
-
}
|
|
194
|
-
|
|
195
|
-
def _should_load_robusta_ai(self) -> bool:
|
|
196
|
-
if not self.should_try_robusta_ai:
|
|
197
|
-
return False
|
|
198
|
-
|
|
199
|
-
# ROBUSTA_AI were set in the env vars, so we can use it directly
|
|
200
|
-
if ROBUSTA_AI is not None:
|
|
201
|
-
return ROBUSTA_AI
|
|
202
|
-
|
|
203
|
-
# MODEL is set in the env vars, e.g. the user is using a custom model
|
|
204
|
-
# so we don't need to load the robusta AI model and keep the behavior backward compatible
|
|
205
|
-
if "MODEL" in os.environ:
|
|
206
|
-
return False
|
|
207
|
-
|
|
208
|
-
# if the user has provided a model list, we don't need to load the robusta AI model
|
|
209
|
-
if self._model_list:
|
|
210
|
-
return False
|
|
211
|
-
|
|
212
|
-
return True
|
|
126
|
+
@property
|
|
127
|
+
def llm_model_registry(self) -> LLMModelRegistry:
|
|
128
|
+
if not self._llm_model_registry:
|
|
129
|
+
self._llm_model_registry = LLMModelRegistry(self, dal=self.dal)
|
|
130
|
+
return self._llm_model_registry
|
|
213
131
|
|
|
214
132
|
def log_useful_info(self):
|
|
215
|
-
if self.
|
|
216
|
-
logging.info(
|
|
133
|
+
if self.llm_model_registry.models:
|
|
134
|
+
logging.info(
|
|
135
|
+
f"Loaded models: {list(self.llm_model_registry.models.keys())}"
|
|
136
|
+
)
|
|
137
|
+
else:
|
|
138
|
+
logging.warning("No llm models were loaded")
|
|
217
139
|
|
|
218
140
|
@classmethod
|
|
219
141
|
def load_from_file(cls, config_file: Optional[Path], **kwargs) -> "Config":
|
|
@@ -227,6 +149,7 @@ class Config(RobustaBaseConfig):
|
|
|
227
149
|
Returns:
|
|
228
150
|
Config instance with merged settings
|
|
229
151
|
"""
|
|
152
|
+
|
|
230
153
|
config_from_file: Optional[Config] = None
|
|
231
154
|
if config_file is not None and config_file.exists():
|
|
232
155
|
logging.debug(f"Loading config from {config_file}")
|
|
@@ -250,7 +173,10 @@ class Config(RobustaBaseConfig):
|
|
|
250
173
|
kwargs = {}
|
|
251
174
|
for field_name in [
|
|
252
175
|
"model",
|
|
176
|
+
"fast_model",
|
|
253
177
|
"api_key",
|
|
178
|
+
"api_base",
|
|
179
|
+
"api_version",
|
|
254
180
|
"max_steps",
|
|
255
181
|
"alertmanager_url",
|
|
256
182
|
"alertmanager_username",
|
|
@@ -297,10 +223,9 @@ class Config(RobustaBaseConfig):
|
|
|
297
223
|
|
|
298
224
|
return None
|
|
299
225
|
|
|
300
|
-
|
|
301
|
-
def get_runbook_catalog() -> Optional[RunbookCatalog]:
|
|
226
|
+
def get_runbook_catalog(self) -> Optional[RunbookCatalog]:
|
|
302
227
|
# TODO(mainred): besides the built-in runbooks, we need to allow the user to bring their own runbooks
|
|
303
|
-
runbook_catalog = load_runbook_catalog()
|
|
228
|
+
runbook_catalog = load_runbook_catalog(dal=self.dal)
|
|
304
229
|
return runbook_catalog
|
|
305
230
|
|
|
306
231
|
def create_console_tool_executor(
|
|
@@ -320,6 +245,23 @@ class Config(RobustaBaseConfig):
|
|
|
320
245
|
)
|
|
321
246
|
return ToolExecutor(cli_toolsets)
|
|
322
247
|
|
|
248
|
+
def create_agui_tool_executor(self, dal: Optional["SupabaseDal"]) -> ToolExecutor:
|
|
249
|
+
"""
|
|
250
|
+
Creates ToolExecutor for the AG-UI server endpoints
|
|
251
|
+
"""
|
|
252
|
+
|
|
253
|
+
if self._agui_tool_executor:
|
|
254
|
+
return self._agui_tool_executor
|
|
255
|
+
|
|
256
|
+
# Use same toolset as CLI for AG-UI front-end.
|
|
257
|
+
agui_toolsets = self.toolset_manager.list_console_toolsets(
|
|
258
|
+
dal=dal, refresh_status=True
|
|
259
|
+
)
|
|
260
|
+
|
|
261
|
+
self._agui_tool_executor = ToolExecutor(agui_toolsets)
|
|
262
|
+
|
|
263
|
+
return self._agui_tool_executor
|
|
264
|
+
|
|
323
265
|
def create_tool_executor(self, dal: Optional["SupabaseDal"]) -> ToolExecutor:
|
|
324
266
|
"""
|
|
325
267
|
Creates ToolExecutor for the server endpoints
|
|
@@ -351,6 +293,19 @@ class Config(RobustaBaseConfig):
|
|
|
351
293
|
tool_executor, self.max_steps, self._get_llm(tracer=tracer)
|
|
352
294
|
)
|
|
353
295
|
|
|
296
|
+
def create_agui_toolcalling_llm(
|
|
297
|
+
self,
|
|
298
|
+
dal: Optional["SupabaseDal"] = None,
|
|
299
|
+
model: Optional[str] = None,
|
|
300
|
+
tracer=None,
|
|
301
|
+
) -> "ToolCallingLLM":
|
|
302
|
+
tool_executor = self.create_agui_tool_executor(dal)
|
|
303
|
+
from holmes.core.tool_calling_llm import ToolCallingLLM
|
|
304
|
+
|
|
305
|
+
return ToolCallingLLM(
|
|
306
|
+
tool_executor, self.max_steps, self._get_llm(model, tracer)
|
|
307
|
+
)
|
|
308
|
+
|
|
354
309
|
def create_toolcalling_llm(
|
|
355
310
|
self,
|
|
356
311
|
dal: Optional["SupabaseDal"] = None,
|
|
@@ -516,39 +471,54 @@ class Config(RobustaBaseConfig):
|
|
|
516
471
|
raise ValueError("--slack-channel must be specified")
|
|
517
472
|
return SlackDestination(self.slack_token.get_secret_value(), self.slack_channel)
|
|
518
473
|
|
|
519
|
-
|
|
520
|
-
|
|
521
|
-
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
|
|
538
|
-
|
|
474
|
+
# TODO: move this to the llm model registry
|
|
475
|
+
def _get_llm(self, model_key: Optional[str] = None, tracer=None) -> "DefaultLLM":
|
|
476
|
+
sentry_sdk.set_tag("requested_model", model_key)
|
|
477
|
+
model_entry = self.llm_model_registry.get_model_params(model_key)
|
|
478
|
+
model_params = model_entry.model_dump(exclude_none=True)
|
|
479
|
+
api_base = self.api_base
|
|
480
|
+
api_version = self.api_version
|
|
481
|
+
|
|
482
|
+
is_robusta_model = model_params.pop("is_robusta_model", False)
|
|
483
|
+
sentry_sdk.set_tag("is_robusta_model", is_robusta_model)
|
|
484
|
+
if is_robusta_model:
|
|
485
|
+
# we set here the api_key since it is being refresh when exprided and not as part of the model loading.
|
|
486
|
+
account_id, token = self.dal.get_ai_credentials()
|
|
487
|
+
api_key = f"{account_id} {token}"
|
|
488
|
+
else:
|
|
489
|
+
api_key = model_params.pop("api_key", None)
|
|
490
|
+
if api_key is not None:
|
|
491
|
+
api_key = api_key.get_secret_value()
|
|
492
|
+
|
|
493
|
+
model = model_params.pop("model")
|
|
494
|
+
# It's ok if the model does not have api base and api version, which are defaults to None.
|
|
495
|
+
# Handle both api_base and base_url - api_base takes precedence
|
|
496
|
+
model_api_base = model_params.pop("api_base", None)
|
|
497
|
+
model_base_url = model_params.pop("base_url", None)
|
|
498
|
+
api_base = model_api_base or model_base_url or api_base
|
|
499
|
+
api_version = model_params.pop("api_version", api_version)
|
|
500
|
+
model_name = model_params.pop("name", None) or model_key or model
|
|
501
|
+
sentry_sdk.set_tag("model_name", model_name)
|
|
502
|
+
llm = DefaultLLM(
|
|
503
|
+
model=model,
|
|
504
|
+
api_key=api_key,
|
|
505
|
+
api_base=api_base,
|
|
506
|
+
api_version=api_version,
|
|
507
|
+
args=model_params,
|
|
508
|
+
tracer=tracer,
|
|
509
|
+
name=model_name,
|
|
510
|
+
is_robusta_model=is_robusta_model,
|
|
511
|
+
) # type: ignore
|
|
512
|
+
logging.info(
|
|
513
|
+
f"Using model: {model_name} ({llm.get_context_window_size():,} total tokens, {llm.get_maximum_output_token():,} output tokens)"
|
|
514
|
+
)
|
|
515
|
+
return llm
|
|
539
516
|
|
|
540
517
|
def get_models_list(self) -> List[str]:
|
|
541
|
-
if self.
|
|
542
|
-
return
|
|
543
|
-
|
|
544
|
-
return json.dumps([self.model]) # type: ignore
|
|
518
|
+
if self.llm_model_registry and self.llm_model_registry.models:
|
|
519
|
+
return list(self.llm_model_registry.models.keys())
|
|
545
520
|
|
|
546
|
-
|
|
547
|
-
if ROBUSTA_AI:
|
|
548
|
-
account_id, token = dal.get_ai_credentials()
|
|
549
|
-
self.api_key = SecretStr(f"{account_id} {token}")
|
|
550
|
-
self.account_id = account_id
|
|
551
|
-
self.session_token = SecretStr(token)
|
|
521
|
+
return []
|
|
552
522
|
|
|
553
523
|
|
|
554
524
|
class TicketSource(BaseModel):
|