holmesgpt 0.14.0a0__py3-none-any.whl → 0.14.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of holmesgpt might be problematic. Click here for more details.
- holmes/__init__.py +1 -1
- holmes/clients/robusta_client.py +15 -4
- holmes/common/env_vars.py +8 -1
- holmes/config.py +66 -139
- holmes/core/investigation.py +1 -2
- holmes/core/llm.py +295 -52
- holmes/core/models.py +2 -0
- holmes/core/safeguards.py +4 -4
- holmes/core/supabase_dal.py +14 -8
- holmes/core/tool_calling_llm.py +110 -102
- holmes/core/tools.py +260 -25
- holmes/core/tools_utils/data_types.py +81 -0
- holmes/core/tools_utils/tool_context_window_limiter.py +33 -0
- holmes/core/tools_utils/tool_executor.py +2 -2
- holmes/core/toolset_manager.py +150 -3
- holmes/core/transformers/__init__.py +23 -0
- holmes/core/transformers/base.py +62 -0
- holmes/core/transformers/llm_summarize.py +174 -0
- holmes/core/transformers/registry.py +122 -0
- holmes/core/transformers/transformer.py +31 -0
- holmes/main.py +5 -0
- holmes/plugins/prompts/_fetch_logs.jinja2 +10 -1
- holmes/plugins/toolsets/aks-node-health.yaml +46 -0
- holmes/plugins/toolsets/aks.yaml +64 -0
- holmes/plugins/toolsets/atlas_mongodb/mongodb_atlas.py +17 -15
- holmes/plugins/toolsets/azure_sql/tools/analyze_connection_failures.py +8 -4
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_connections.py +7 -3
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_health_status.py +3 -3
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_performance.py +3 -3
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_storage.py +7 -3
- holmes/plugins/toolsets/azure_sql/tools/get_active_alerts.py +4 -4
- holmes/plugins/toolsets/azure_sql/tools/get_slow_queries.py +7 -3
- holmes/plugins/toolsets/azure_sql/tools/get_top_cpu_queries.py +7 -3
- holmes/plugins/toolsets/azure_sql/tools/get_top_data_io_queries.py +7 -3
- holmes/plugins/toolsets/azure_sql/tools/get_top_log_io_queries.py +7 -3
- holmes/plugins/toolsets/bash/bash_toolset.py +6 -6
- holmes/plugins/toolsets/bash/common/bash.py +7 -7
- holmes/plugins/toolsets/coralogix/toolset_coralogix_logs.py +5 -3
- holmes/plugins/toolsets/datadog/datadog_api.py +490 -24
- holmes/plugins/toolsets/datadog/datadog_logs_instructions.jinja2 +21 -10
- holmes/plugins/toolsets/datadog/toolset_datadog_general.py +344 -205
- holmes/plugins/toolsets/datadog/toolset_datadog_logs.py +189 -17
- holmes/plugins/toolsets/datadog/toolset_datadog_metrics.py +95 -30
- holmes/plugins/toolsets/datadog/toolset_datadog_rds.py +10 -10
- holmes/plugins/toolsets/datadog/toolset_datadog_traces.py +20 -20
- holmes/plugins/toolsets/git.py +21 -21
- holmes/plugins/toolsets/grafana/common.py +2 -2
- holmes/plugins/toolsets/grafana/toolset_grafana.py +4 -4
- holmes/plugins/toolsets/grafana/toolset_grafana_loki.py +5 -4
- holmes/plugins/toolsets/grafana/toolset_grafana_tempo.jinja2 +123 -23
- holmes/plugins/toolsets/grafana/toolset_grafana_tempo.py +165 -307
- holmes/plugins/toolsets/internet/internet.py +3 -3
- holmes/plugins/toolsets/internet/notion.py +3 -3
- holmes/plugins/toolsets/investigator/core_investigation.py +3 -3
- holmes/plugins/toolsets/kafka.py +18 -18
- holmes/plugins/toolsets/kubernetes.yaml +58 -0
- holmes/plugins/toolsets/kubernetes_logs.py +6 -6
- holmes/plugins/toolsets/kubernetes_logs.yaml +32 -0
- holmes/plugins/toolsets/logging_utils/logging_api.py +1 -1
- holmes/plugins/toolsets/mcp/toolset_mcp.py +4 -4
- holmes/plugins/toolsets/newrelic.py +5 -5
- holmes/plugins/toolsets/opensearch/opensearch.py +5 -5
- holmes/plugins/toolsets/opensearch/opensearch_logs.py +7 -7
- holmes/plugins/toolsets/opensearch/opensearch_traces.py +10 -10
- holmes/plugins/toolsets/prometheus/prometheus.py +841 -351
- holmes/plugins/toolsets/prometheus/prometheus_instructions.jinja2 +39 -2
- holmes/plugins/toolsets/prometheus/utils.py +28 -0
- holmes/plugins/toolsets/rabbitmq/toolset_rabbitmq.py +6 -4
- holmes/plugins/toolsets/robusta/robusta.py +10 -10
- holmes/plugins/toolsets/runbook/runbook_fetcher.py +4 -4
- holmes/plugins/toolsets/servicenow/servicenow.py +6 -6
- holmes/plugins/toolsets/utils.py +88 -0
- holmes/utils/config_utils.py +91 -0
- holmes/utils/env.py +7 -0
- holmes/utils/holmes_status.py +2 -1
- holmes/utils/sentry_helper.py +41 -0
- holmes/utils/stream.py +9 -0
- {holmesgpt-0.14.0a0.dist-info → holmesgpt-0.14.1.dist-info}/METADATA +10 -14
- {holmesgpt-0.14.0a0.dist-info → holmesgpt-0.14.1.dist-info}/RECORD +82 -72
- {holmesgpt-0.14.0a0.dist-info → holmesgpt-0.14.1.dist-info}/LICENSE.txt +0 -0
- {holmesgpt-0.14.0a0.dist-info → holmesgpt-0.14.1.dist-info}/WHEEL +0 -0
- {holmesgpt-0.14.0a0.dist-info → holmesgpt-0.14.1.dist-info}/entry_points.txt +0 -0
holmes/core/llm.py
CHANGED
|
@@ -1,30 +1,38 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import logging
|
|
3
3
|
from abc import abstractmethod
|
|
4
|
-
from typing import Any, Dict, List, Optional, Type, Union
|
|
4
|
+
from typing import Any, Dict, List, Optional, Type, Union, TYPE_CHECKING
|
|
5
5
|
|
|
6
|
-
from litellm.types.utils import ModelResponse
|
|
6
|
+
from litellm.types.utils import ModelResponse, TextCompletionResponse
|
|
7
7
|
import sentry_sdk
|
|
8
8
|
|
|
9
9
|
from litellm.litellm_core_utils.streaming_handler import CustomStreamWrapper
|
|
10
10
|
from pydantic import BaseModel
|
|
11
11
|
import litellm
|
|
12
12
|
import os
|
|
13
|
+
from holmes.clients.robusta_client import RobustaModelsResponse, fetch_robusta_models
|
|
13
14
|
from holmes.common.env_vars import (
|
|
15
|
+
LOAD_ALL_ROBUSTA_MODELS,
|
|
14
16
|
REASONING_EFFORT,
|
|
17
|
+
ROBUSTA_AI,
|
|
18
|
+
ROBUSTA_API_ENDPOINT,
|
|
15
19
|
THINKING,
|
|
16
20
|
)
|
|
21
|
+
from holmes.core.supabase_dal import SupabaseDal
|
|
22
|
+
from holmes.utils.env import environ_get_safe_int, replace_env_vars_values
|
|
23
|
+
from holmes.utils.file_utils import load_yaml_file
|
|
17
24
|
|
|
25
|
+
if TYPE_CHECKING:
|
|
26
|
+
from holmes.config import Config
|
|
18
27
|
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
except ValueError:
|
|
23
|
-
return int(default)
|
|
28
|
+
MODEL_LIST_FILE_LOCATION = os.environ.get(
|
|
29
|
+
"MODEL_LIST_FILE_LOCATION", "/etc/holmes/config/model_list.yaml"
|
|
30
|
+
)
|
|
24
31
|
|
|
25
32
|
|
|
26
33
|
OVERRIDE_MAX_OUTPUT_TOKEN = environ_get_safe_int("OVERRIDE_MAX_OUTPUT_TOKEN")
|
|
27
34
|
OVERRIDE_MAX_CONTENT_SIZE = environ_get_safe_int("OVERRIDE_MAX_CONTENT_SIZE")
|
|
35
|
+
ROBUSTA_AI_MODEL_NAME = "Robusta"
|
|
28
36
|
|
|
29
37
|
|
|
30
38
|
class LLM:
|
|
@@ -61,31 +69,43 @@ class LLM:
|
|
|
61
69
|
class DefaultLLM(LLM):
|
|
62
70
|
model: str
|
|
63
71
|
api_key: Optional[str]
|
|
64
|
-
|
|
72
|
+
api_base: Optional[str]
|
|
73
|
+
api_version: Optional[str]
|
|
65
74
|
args: Dict
|
|
66
75
|
|
|
67
76
|
def __init__(
|
|
68
77
|
self,
|
|
69
78
|
model: str,
|
|
70
79
|
api_key: Optional[str] = None,
|
|
80
|
+
api_base: Optional[str] = None,
|
|
81
|
+
api_version: Optional[str] = None,
|
|
71
82
|
args: Optional[Dict] = None,
|
|
72
|
-
tracer=None,
|
|
83
|
+
tracer: Optional[Any] = None,
|
|
84
|
+
name: Optional[str] = None,
|
|
73
85
|
):
|
|
74
86
|
self.model = model
|
|
75
87
|
self.api_key = api_key
|
|
88
|
+
self.api_base = api_base
|
|
89
|
+
self.api_version = api_version
|
|
76
90
|
self.args = args or {}
|
|
77
91
|
self.tracer = tracer
|
|
92
|
+
self.name = name
|
|
93
|
+
self.update_custom_args()
|
|
94
|
+
self.check_llm(self.model, self.api_key, self.api_base, self.api_version)
|
|
78
95
|
|
|
79
|
-
|
|
80
|
-
|
|
96
|
+
def update_custom_args(self):
|
|
97
|
+
self.max_context_size = self.args.get("custom_args", {}).get("max_context_size")
|
|
98
|
+
self.args.pop("custom_args", None)
|
|
81
99
|
|
|
82
|
-
def check_llm(
|
|
100
|
+
def check_llm(
|
|
101
|
+
self,
|
|
102
|
+
model: str,
|
|
103
|
+
api_key: Optional[str],
|
|
104
|
+
api_base: Optional[str],
|
|
105
|
+
api_version: Optional[str],
|
|
106
|
+
):
|
|
83
107
|
logging.debug(f"Checking LiteLLM model {model}")
|
|
84
|
-
|
|
85
|
-
# so without this hack it always complains that the environment variable for the api key is missing
|
|
86
|
-
# to fix that, we always set an api key in the standard format that litellm expects (which is ${PROVIDER}_API_KEY)
|
|
87
|
-
# TODO: we can now handle this better - see https://github.com/BerriAI/litellm/issues/4375#issuecomment-2223684750
|
|
88
|
-
lookup = litellm.get_llm_provider(self.model)
|
|
108
|
+
lookup = litellm.get_llm_provider(model)
|
|
89
109
|
if not lookup:
|
|
90
110
|
raise Exception(f"Unknown provider for model {model}")
|
|
91
111
|
provider = lookup[1]
|
|
@@ -124,51 +144,67 @@ class DefaultLLM(LLM):
|
|
|
124
144
|
):
|
|
125
145
|
model_requirements = {"keys_in_environment": True, "missing_keys": []}
|
|
126
146
|
else:
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
147
|
+
model_requirements = litellm.validate_environment(
|
|
148
|
+
model=model, api_key=api_key, api_base=api_base
|
|
149
|
+
)
|
|
150
|
+
# validate_environment does not accept api_version, and as a special case for Azure OpenAI Service,
|
|
151
|
+
# when all the other AZURE environments are set expect AZURE_API_VERSION, validate_environment complains
|
|
152
|
+
# the missing of it even after the api_version is set.
|
|
153
|
+
# TODO: There's an open PR in litellm to accept api_version in validate_environment, we can leverage this
|
|
154
|
+
# change if accepted to ignore the following check.
|
|
155
|
+
# https://github.com/BerriAI/litellm/pull/13808
|
|
156
|
+
if (
|
|
157
|
+
provider == "azure"
|
|
158
|
+
and ["AZURE_API_VERSION"] == model_requirements["missing_keys"]
|
|
159
|
+
and api_version is not None
|
|
160
|
+
):
|
|
161
|
+
model_requirements["missing_keys"] = []
|
|
162
|
+
model_requirements["keys_in_environment"] = True
|
|
132
163
|
|
|
133
164
|
if not model_requirements["keys_in_environment"]:
|
|
134
165
|
raise Exception(
|
|
135
166
|
f"model {model} requires the following environment variables: {model_requirements['missing_keys']}"
|
|
136
167
|
)
|
|
137
168
|
|
|
138
|
-
def
|
|
169
|
+
def _get_model_name_variants_for_lookup(self) -> list[str]:
|
|
139
170
|
"""
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json
|
|
171
|
+
Generate model name variants to try when looking up in litellm.model_cost.
|
|
172
|
+
Returns a list of names to try in order: exact, lowercase, without prefix, etc.
|
|
143
173
|
"""
|
|
144
|
-
|
|
145
|
-
prefixes = ["openai/", "bedrock/", "vertex_ai/", "anthropic/"]
|
|
174
|
+
names_to_try = [self.model, self.model.lower()]
|
|
146
175
|
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
176
|
+
# If there's a prefix, also try without it
|
|
177
|
+
if "/" in self.model:
|
|
178
|
+
base_model = self.model.split("/", 1)[1]
|
|
179
|
+
names_to_try.extend([base_model, base_model.lower()])
|
|
150
180
|
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
# this unfortunately does not seem to work for azure if the deployment name is not a well-known model name
|
|
154
|
-
# if not litellm.supports_function_calling(model=model):
|
|
155
|
-
# raise Exception(f"model {model} does not support function calling. You must use HolmesGPT with a model that supports function calling.")
|
|
181
|
+
# Remove duplicates while preserving order (dict.fromkeys maintains insertion order in Python 3.7+)
|
|
182
|
+
return list(dict.fromkeys(names_to_try))
|
|
156
183
|
|
|
157
184
|
def get_context_window_size(self) -> int:
|
|
185
|
+
if self.max_context_size:
|
|
186
|
+
return self.max_context_size
|
|
187
|
+
|
|
158
188
|
if OVERRIDE_MAX_CONTENT_SIZE:
|
|
159
189
|
logging.debug(
|
|
160
190
|
f"Using override OVERRIDE_MAX_CONTENT_SIZE {OVERRIDE_MAX_CONTENT_SIZE}"
|
|
161
191
|
)
|
|
162
192
|
return OVERRIDE_MAX_CONTENT_SIZE
|
|
163
193
|
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
194
|
+
# Try each name variant
|
|
195
|
+
for name in self._get_model_name_variants_for_lookup():
|
|
196
|
+
try:
|
|
197
|
+
return litellm.model_cost[name]["max_input_tokens"]
|
|
198
|
+
except Exception:
|
|
199
|
+
continue
|
|
200
|
+
|
|
201
|
+
# Log which lookups we tried
|
|
202
|
+
logging.warning(
|
|
203
|
+
f"Couldn't find model {self.model} in litellm's model list (tried: {', '.join(self._get_model_name_variants_for_lookup())}), "
|
|
204
|
+
f"using default 128k tokens for max_input_tokens. "
|
|
205
|
+
f"To override, set OVERRIDE_MAX_CONTENT_SIZE environment variable to the correct value for your model."
|
|
206
|
+
)
|
|
207
|
+
return 128000
|
|
172
208
|
|
|
173
209
|
@sentry_sdk.trace
|
|
174
210
|
def count_tokens_for_message(self, messages: list[dict]) -> int:
|
|
@@ -237,6 +273,8 @@ class DefaultLLM(LLM):
|
|
|
237
273
|
result = litellm_to_use.completion(
|
|
238
274
|
model=self.model,
|
|
239
275
|
api_key=self.api_key,
|
|
276
|
+
base_url=self.api_base,
|
|
277
|
+
api_version=self.api_version,
|
|
240
278
|
messages=messages,
|
|
241
279
|
response_format=response_format,
|
|
242
280
|
drop_params=drop_params,
|
|
@@ -260,14 +298,20 @@ class DefaultLLM(LLM):
|
|
|
260
298
|
)
|
|
261
299
|
return OVERRIDE_MAX_OUTPUT_TOKEN
|
|
262
300
|
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
301
|
+
# Try each name variant
|
|
302
|
+
for name in self._get_model_name_variants_for_lookup():
|
|
303
|
+
try:
|
|
304
|
+
return litellm.model_cost[name]["max_output_tokens"]
|
|
305
|
+
except Exception:
|
|
306
|
+
continue
|
|
307
|
+
|
|
308
|
+
# Log which lookups we tried
|
|
309
|
+
logging.warning(
|
|
310
|
+
f"Couldn't find model {self.model} in litellm's model list (tried: {', '.join(self._get_model_name_variants_for_lookup())}), "
|
|
311
|
+
f"using default 4096 tokens for max_output_tokens. "
|
|
312
|
+
f"To override, set OVERRIDE_MAX_OUTPUT_TOKEN environment variable to the correct value for your model."
|
|
313
|
+
)
|
|
314
|
+
return 4096
|
|
271
315
|
|
|
272
316
|
def _add_cache_control_to_last_message(
|
|
273
317
|
self, messages: List[Dict[str, Any]]
|
|
@@ -325,3 +369,202 @@ class DefaultLLM(LLM):
|
|
|
325
369
|
logging.debug(
|
|
326
370
|
f"Added cache_control to {target_msg.get('role')} message (structured content)"
|
|
327
371
|
)
|
|
372
|
+
|
|
373
|
+
|
|
374
|
+
class LLMModelRegistry:
|
|
375
|
+
def __init__(self, config: "Config", dal: SupabaseDal) -> None:
|
|
376
|
+
self.config = config
|
|
377
|
+
self._llms: dict[str, dict[str, Any]] = {}
|
|
378
|
+
self._default_robusta_model = None
|
|
379
|
+
self.dal = dal
|
|
380
|
+
|
|
381
|
+
self._init_models()
|
|
382
|
+
|
|
383
|
+
@property
|
|
384
|
+
def default_robusta_model(self) -> Optional[str]:
|
|
385
|
+
return self._default_robusta_model
|
|
386
|
+
|
|
387
|
+
def _init_models(self):
|
|
388
|
+
self._llms = self._parse_models_file(MODEL_LIST_FILE_LOCATION)
|
|
389
|
+
|
|
390
|
+
if self._should_load_robusta_ai():
|
|
391
|
+
self.configure_robusta_ai_model()
|
|
392
|
+
|
|
393
|
+
if self._should_load_config_model():
|
|
394
|
+
self._llms[self.config.model] = self._create_model_entry(
|
|
395
|
+
model=self.config.model,
|
|
396
|
+
model_name=self.config.model,
|
|
397
|
+
base_url=self.config.api_base,
|
|
398
|
+
is_robusta_model=False,
|
|
399
|
+
)
|
|
400
|
+
|
|
401
|
+
def _should_load_config_model(self) -> bool:
|
|
402
|
+
if self.config.model is not None:
|
|
403
|
+
return True
|
|
404
|
+
|
|
405
|
+
# backward compatibility - in the past config.model was set by default to gpt-4o.
|
|
406
|
+
# so we need to check if the user has set an OPENAI_API_KEY to load the config model.
|
|
407
|
+
has_openai_key = os.environ.get("OPENAI_API_KEY")
|
|
408
|
+
if has_openai_key:
|
|
409
|
+
self.config.model = "gpt-4o"
|
|
410
|
+
return True
|
|
411
|
+
|
|
412
|
+
return False
|
|
413
|
+
|
|
414
|
+
def configure_robusta_ai_model(self) -> None:
|
|
415
|
+
try:
|
|
416
|
+
if not self.config.cluster_name or not LOAD_ALL_ROBUSTA_MODELS:
|
|
417
|
+
self._load_default_robusta_config()
|
|
418
|
+
return
|
|
419
|
+
|
|
420
|
+
if not self.dal.account_id or not self.dal.enabled:
|
|
421
|
+
self._load_default_robusta_config()
|
|
422
|
+
return
|
|
423
|
+
|
|
424
|
+
account_id, token = self.dal.get_ai_credentials()
|
|
425
|
+
robusta_models: RobustaModelsResponse | None = fetch_robusta_models(
|
|
426
|
+
account_id, token
|
|
427
|
+
)
|
|
428
|
+
if not robusta_models or not robusta_models.models:
|
|
429
|
+
self._load_default_robusta_config()
|
|
430
|
+
return
|
|
431
|
+
|
|
432
|
+
for model in robusta_models.models:
|
|
433
|
+
logging.info(f"Loading Robusta AI model: {model}")
|
|
434
|
+
args = robusta_models.models_args.get(model)
|
|
435
|
+
self._llms[model] = self._create_robusta_model_entry(model, args)
|
|
436
|
+
|
|
437
|
+
if robusta_models.default_model:
|
|
438
|
+
logging.info(
|
|
439
|
+
f"Setting default Robusta AI model to: {robusta_models.default_model}"
|
|
440
|
+
)
|
|
441
|
+
self._default_robusta_model: str = robusta_models.default_model # type: ignore
|
|
442
|
+
|
|
443
|
+
except Exception:
|
|
444
|
+
logging.exception("Failed to get all robusta models")
|
|
445
|
+
# fallback to default behavior
|
|
446
|
+
self._load_default_robusta_config()
|
|
447
|
+
|
|
448
|
+
def _load_default_robusta_config(self):
|
|
449
|
+
if self._should_load_robusta_ai():
|
|
450
|
+
logging.info("Loading default Robusta AI model")
|
|
451
|
+
self._llms[ROBUSTA_AI_MODEL_NAME] = {
|
|
452
|
+
"name": ROBUSTA_AI_MODEL_NAME,
|
|
453
|
+
"base_url": ROBUSTA_API_ENDPOINT,
|
|
454
|
+
"is_robusta_model": True,
|
|
455
|
+
"model": "gpt-4o",
|
|
456
|
+
}
|
|
457
|
+
self._default_robusta_model = ROBUSTA_AI_MODEL_NAME
|
|
458
|
+
|
|
459
|
+
def _should_load_robusta_ai(self) -> bool:
|
|
460
|
+
if not self.config.should_try_robusta_ai:
|
|
461
|
+
return False
|
|
462
|
+
|
|
463
|
+
# ROBUSTA_AI were set in the env vars, so we can use it directly
|
|
464
|
+
if ROBUSTA_AI is not None:
|
|
465
|
+
return ROBUSTA_AI
|
|
466
|
+
|
|
467
|
+
# MODEL is set in the env vars, e.g. the user is using a custom model
|
|
468
|
+
# so we don't need to load the robusta AI model and keep the behavior backward compatible
|
|
469
|
+
if "MODEL" in os.environ:
|
|
470
|
+
return False
|
|
471
|
+
|
|
472
|
+
# if the user has provided a model list, we don't need to load the robusta AI model
|
|
473
|
+
if self._llms:
|
|
474
|
+
return False
|
|
475
|
+
|
|
476
|
+
return True
|
|
477
|
+
|
|
478
|
+
def get_model_params(self, model_key: Optional[str] = None) -> dict:
|
|
479
|
+
if not self._llms:
|
|
480
|
+
raise Exception("No llm models were loaded")
|
|
481
|
+
|
|
482
|
+
if model_key:
|
|
483
|
+
model_params = self._llms.get(model_key)
|
|
484
|
+
if model_params is not None:
|
|
485
|
+
logging.info(f"Using selected model: {model_key}")
|
|
486
|
+
return model_params.copy()
|
|
487
|
+
|
|
488
|
+
logging.error(f"Couldn't find model: {model_key} in model list")
|
|
489
|
+
|
|
490
|
+
if self._default_robusta_model:
|
|
491
|
+
model_params = self._llms.get(self._default_robusta_model)
|
|
492
|
+
if model_params is not None:
|
|
493
|
+
logging.info(
|
|
494
|
+
f"Using default Robusta AI model: {self._default_robusta_model}"
|
|
495
|
+
)
|
|
496
|
+
return model_params.copy()
|
|
497
|
+
|
|
498
|
+
logging.error(
|
|
499
|
+
f"Couldn't find default Robusta AI model: {self._default_robusta_model} in model list"
|
|
500
|
+
)
|
|
501
|
+
|
|
502
|
+
model_key, first_model_params = next(iter(self._llms.items()))
|
|
503
|
+
logging.debug(f"Using first available model: {model_key}")
|
|
504
|
+
return first_model_params.copy()
|
|
505
|
+
|
|
506
|
+
def get_llm(self, name: str) -> LLM: # TODO: fix logic
|
|
507
|
+
return self._llms[name] # type: ignore
|
|
508
|
+
|
|
509
|
+
@property
|
|
510
|
+
def models(self) -> dict[str, dict[str, Any]]:
|
|
511
|
+
return self._llms
|
|
512
|
+
|
|
513
|
+
def _parse_models_file(self, path: str):
|
|
514
|
+
models = load_yaml_file(path, raise_error=False, warn_not_found=False)
|
|
515
|
+
for _, params in models.items():
|
|
516
|
+
params = replace_env_vars_values(params)
|
|
517
|
+
|
|
518
|
+
return models
|
|
519
|
+
|
|
520
|
+
def _create_robusta_model_entry(
|
|
521
|
+
self, model_name: str, args: Optional[dict[str, Any]] = None
|
|
522
|
+
) -> dict[str, Any]:
|
|
523
|
+
return self._create_model_entry(
|
|
524
|
+
model="gpt-4o", # Robusta AI model is using openai like API.
|
|
525
|
+
model_name=model_name,
|
|
526
|
+
base_url=f"{ROBUSTA_API_ENDPOINT}/llm/{model_name}",
|
|
527
|
+
is_robusta_model=True,
|
|
528
|
+
args=args or {},
|
|
529
|
+
)
|
|
530
|
+
|
|
531
|
+
def _create_model_entry(
|
|
532
|
+
self,
|
|
533
|
+
model: str,
|
|
534
|
+
model_name: str,
|
|
535
|
+
base_url: Optional[str] = None,
|
|
536
|
+
is_robusta_model: Optional[bool] = None,
|
|
537
|
+
args: Optional[dict[str, Any]] = None,
|
|
538
|
+
) -> dict[str, Any]:
|
|
539
|
+
entry = {
|
|
540
|
+
"name": model_name,
|
|
541
|
+
"base_url": base_url,
|
|
542
|
+
"is_robusta_model": is_robusta_model,
|
|
543
|
+
"model": model,
|
|
544
|
+
}
|
|
545
|
+
if args:
|
|
546
|
+
entry["custom_args"] = args # type: ignore[assignment]
|
|
547
|
+
|
|
548
|
+
return entry
|
|
549
|
+
|
|
550
|
+
|
|
551
|
+
def get_llm_usage(
|
|
552
|
+
llm_response: Union[ModelResponse, CustomStreamWrapper, TextCompletionResponse],
|
|
553
|
+
) -> dict:
|
|
554
|
+
usage: dict = {}
|
|
555
|
+
if (
|
|
556
|
+
(
|
|
557
|
+
isinstance(llm_response, ModelResponse)
|
|
558
|
+
or isinstance(llm_response, TextCompletionResponse)
|
|
559
|
+
)
|
|
560
|
+
and hasattr(llm_response, "usage")
|
|
561
|
+
and llm_response.usage
|
|
562
|
+
): # type: ignore
|
|
563
|
+
usage["prompt_tokens"] = llm_response.usage.prompt_tokens # type: ignore
|
|
564
|
+
usage["completion_tokens"] = llm_response.usage.completion_tokens # type: ignore
|
|
565
|
+
usage["total_tokens"] = llm_response.usage.total_tokens # type: ignore
|
|
566
|
+
elif isinstance(llm_response, CustomStreamWrapper):
|
|
567
|
+
complete_response = litellm.stream_chunk_builder(chunks=llm_response) # type: ignore
|
|
568
|
+
if complete_response:
|
|
569
|
+
return get_llm_usage(complete_response)
|
|
570
|
+
return usage
|
holmes/core/models.py
CHANGED
|
@@ -10,6 +10,7 @@ class InvestigationResult(BaseModel):
|
|
|
10
10
|
sections: Optional[Dict[str, Union[str, None]]] = None
|
|
11
11
|
tool_calls: List[ToolCallResult] = []
|
|
12
12
|
instructions: List[str] = []
|
|
13
|
+
metadata: Optional[Dict[Any, Any]] = None
|
|
13
14
|
|
|
14
15
|
|
|
15
16
|
class InvestigateRequest(BaseModel):
|
|
@@ -145,6 +146,7 @@ class ChatResponse(BaseModel):
|
|
|
145
146
|
conversation_history: list[dict]
|
|
146
147
|
tool_calls: Optional[List[ToolCallResult]] = []
|
|
147
148
|
follow_up_actions: Optional[List[FollowUpAction]] = []
|
|
149
|
+
metadata: Optional[Dict[Any, Any]] = None
|
|
148
150
|
|
|
149
151
|
|
|
150
152
|
class WorkloadHealthInvestigationResult(BaseModel):
|
holmes/core/safeguards.py
CHANGED
|
@@ -5,7 +5,7 @@ from pydantic import ValidationError
|
|
|
5
5
|
|
|
6
6
|
from holmes.common.env_vars import TOOL_CALL_SAFEGUARDS_ENABLED
|
|
7
7
|
from holmes.plugins.toolsets.logging_utils.logging_api import POD_LOGGING_TOOL_NAME
|
|
8
|
-
from holmes.core.tools import StructuredToolResult,
|
|
8
|
+
from holmes.core.tools import StructuredToolResult, StructuredToolResultStatus
|
|
9
9
|
from holmes.plugins.toolsets.logging_utils.logging_api import FetchPodLogsParams
|
|
10
10
|
|
|
11
11
|
|
|
@@ -39,7 +39,7 @@ def _has_previous_unfiltered_pod_logs_call(
|
|
|
39
39
|
result = tool_call.get("result", {})
|
|
40
40
|
if (
|
|
41
41
|
tool_call.get("tool_name") == POD_LOGGING_TOOL_NAME
|
|
42
|
-
and result.get("status") ==
|
|
42
|
+
and result.get("status") == StructuredToolResultStatus.NO_DATA
|
|
43
43
|
and result.get("params")
|
|
44
44
|
):
|
|
45
45
|
params = FetchPodLogsParams(**result.get("params"))
|
|
@@ -94,7 +94,7 @@ def prevent_overly_repeated_tool_call(
|
|
|
94
94
|
For example if Holmes checks if a resource is deployed, runs a command to deploy it and then checks again if it has deployed properly.
|
|
95
95
|
"""
|
|
96
96
|
return StructuredToolResult(
|
|
97
|
-
status=
|
|
97
|
+
status=StructuredToolResultStatus.ERROR,
|
|
98
98
|
error=(
|
|
99
99
|
"Refusing to run this tool call because it has already been called during this session with the exact same parameters.\n"
|
|
100
100
|
"Move on with your investigation to a different tool or change the parameter values."
|
|
@@ -106,7 +106,7 @@ def prevent_overly_repeated_tool_call(
|
|
|
106
106
|
tool_name=tool_name, tool_params=tool_params, tool_calls=tool_calls
|
|
107
107
|
):
|
|
108
108
|
return StructuredToolResult(
|
|
109
|
-
status=
|
|
109
|
+
status=StructuredToolResultStatus.ERROR,
|
|
110
110
|
error=(
|
|
111
111
|
f"Refusing to run this tool call because the exact same {POD_LOGGING_TOOL_NAME} tool call without filter has already run and returned no data.\n"
|
|
112
112
|
"This tool call would also have returned no data.\n"
|
holmes/core/supabase_dal.py
CHANGED
|
@@ -37,6 +37,7 @@ from holmes.utils.global_instructions import Instructions
|
|
|
37
37
|
SUPABASE_TIMEOUT_SECONDS = int(os.getenv("SUPABASE_TIMEOUT_SECONDS", 3600))
|
|
38
38
|
|
|
39
39
|
ISSUES_TABLE = "Issues"
|
|
40
|
+
GROUPED_ISSUES_TABLE = "GroupedIssues"
|
|
40
41
|
EVIDENCE_TABLE = "Evidence"
|
|
41
42
|
RUNBOOKS_TABLE = "HolmesRunbooks"
|
|
42
43
|
SESSION_TOKENS_TABLE = "AuthTokens"
|
|
@@ -338,6 +339,14 @@ class SupabaseDal:
|
|
|
338
339
|
data.extend(unzipped_files)
|
|
339
340
|
return data
|
|
340
341
|
|
|
342
|
+
def get_issue_from_db(self, issue_id: str, table: str) -> Optional[Dict]:
|
|
343
|
+
issue_response = (
|
|
344
|
+
self.client.table(table).select("*").filter("id", "eq", issue_id).execute()
|
|
345
|
+
)
|
|
346
|
+
if len(issue_response.data):
|
|
347
|
+
return issue_response.data[0]
|
|
348
|
+
return None
|
|
349
|
+
|
|
341
350
|
def get_issue_data(self, issue_id: Optional[str]) -> Optional[Dict]:
|
|
342
351
|
# TODO this could be done in a single atomic SELECT, but there is no
|
|
343
352
|
# foreign key relation between Issues and Evidence.
|
|
@@ -347,14 +356,11 @@ class SupabaseDal:
|
|
|
347
356
|
return None
|
|
348
357
|
issue_data = None
|
|
349
358
|
try:
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
.
|
|
353
|
-
|
|
354
|
-
.
|
|
355
|
-
)
|
|
356
|
-
if len(issue_response.data):
|
|
357
|
-
issue_data = issue_response.data[0]
|
|
359
|
+
issue_data = self.get_issue_from_db(issue_id, ISSUES_TABLE)
|
|
360
|
+
if issue_data and issue_data["source"] == "prometheus":
|
|
361
|
+
logging.debug("Getting alert %s from GroupedIssuesTable", issue_id)
|
|
362
|
+
# This issue will have the complete alert duration information
|
|
363
|
+
issue_data = self.get_issue_from_db(issue_id, GROUPED_ISSUES_TABLE)
|
|
358
364
|
|
|
359
365
|
except Exception: # e.g. invalid id format
|
|
360
366
|
logging.exception("Supabase error while retrieving issue data")
|