holmesgpt 0.14.4a0__py3-none-any.whl → 0.16.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of holmesgpt might be problematic. Click here for more details.
- holmes/__init__.py +1 -1
- holmes/clients/robusta_client.py +12 -10
- holmes/common/env_vars.py +22 -0
- holmes/config.py +51 -4
- holmes/core/conversations.py +3 -2
- holmes/core/llm.py +226 -72
- holmes/core/openai_formatting.py +13 -0
- holmes/core/supabase_dal.py +33 -42
- holmes/core/tool_calling_llm.py +185 -282
- holmes/core/tools.py +21 -1
- holmes/core/tools_utils/token_counting.py +2 -1
- holmes/core/tools_utils/tool_context_window_limiter.py +32 -30
- holmes/core/truncation/compaction.py +59 -0
- holmes/core/truncation/input_context_window_limiter.py +218 -0
- holmes/interactive.py +17 -7
- holmes/plugins/prompts/_general_instructions.jinja2 +1 -2
- holmes/plugins/prompts/conversation_history_compaction.jinja2 +88 -0
- holmes/plugins/toolsets/__init__.py +4 -0
- holmes/plugins/toolsets/atlas_mongodb/mongodb_atlas.py +0 -1
- holmes/plugins/toolsets/azure_sql/azure_sql_toolset.py +0 -1
- holmes/plugins/toolsets/grafana/grafana_api.py +1 -1
- holmes/plugins/toolsets/investigator/core_investigation.py +34 -24
- holmes/plugins/toolsets/opensearch/opensearch_ppl_query_docs.jinja2 +1616 -0
- holmes/plugins/toolsets/opensearch/opensearch_query_assist.py +78 -0
- holmes/plugins/toolsets/opensearch/opensearch_query_assist_instructions.jinja2 +223 -0
- holmes/plugins/toolsets/prometheus/prometheus.py +1 -1
- holmes/plugins/toolsets/robusta/robusta.py +35 -8
- holmes/plugins/toolsets/robusta/robusta_instructions.jinja2 +4 -3
- holmes/plugins/toolsets/service_discovery.py +1 -1
- holmes/plugins/toolsets/servicenow/servicenow.py +0 -1
- holmes/utils/stream.py +31 -1
- {holmesgpt-0.14.4a0.dist-info → holmesgpt-0.16.0.dist-info}/METADATA +6 -2
- {holmesgpt-0.14.4a0.dist-info → holmesgpt-0.16.0.dist-info}/RECORD +36 -31
- holmes/core/performance_timing.py +0 -72
- {holmesgpt-0.14.4a0.dist-info → holmesgpt-0.16.0.dist-info}/LICENSE.txt +0 -0
- {holmesgpt-0.14.4a0.dist-info → holmesgpt-0.16.0.dist-info}/WHEEL +0 -0
- {holmesgpt-0.14.4a0.dist-info → holmesgpt-0.16.0.dist-info}/entry_points.txt +0 -0
holmes/core/llm.py
CHANGED
|
@@ -1,23 +1,33 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import logging
|
|
3
|
+
import os
|
|
3
4
|
from abc import abstractmethod
|
|
4
5
|
from math import floor
|
|
5
|
-
from typing import Any, Dict, List, Optional, Type, Union
|
|
6
|
+
from typing import TYPE_CHECKING, Any, Dict, List, Optional, Type, Union
|
|
6
7
|
|
|
8
|
+
import litellm
|
|
9
|
+
from litellm.litellm_core_utils.streaming_handler import CustomStreamWrapper
|
|
7
10
|
from litellm.types.utils import ModelResponse, TextCompletionResponse
|
|
8
11
|
import sentry_sdk
|
|
12
|
+
from pydantic import BaseModel, ConfigDict, SecretStr
|
|
13
|
+
from typing_extensions import Self
|
|
14
|
+
|
|
15
|
+
from holmes.clients.robusta_client import (
|
|
16
|
+
RobustaModel,
|
|
17
|
+
RobustaModelsResponse,
|
|
18
|
+
fetch_robusta_models,
|
|
19
|
+
)
|
|
9
20
|
|
|
10
|
-
from litellm.litellm_core_utils.streaming_handler import CustomStreamWrapper
|
|
11
|
-
from pydantic import BaseModel
|
|
12
|
-
import litellm
|
|
13
|
-
import os
|
|
14
|
-
from holmes.clients.robusta_client import RobustaModelsResponse, fetch_robusta_models
|
|
15
21
|
from holmes.common.env_vars import (
|
|
22
|
+
FALLBACK_CONTEXT_WINDOW_SIZE,
|
|
16
23
|
LOAD_ALL_ROBUSTA_MODELS,
|
|
17
24
|
REASONING_EFFORT,
|
|
18
25
|
ROBUSTA_AI,
|
|
19
26
|
ROBUSTA_API_ENDPOINT,
|
|
20
27
|
THINKING,
|
|
28
|
+
EXTRA_HEADERS,
|
|
29
|
+
TOOL_MAX_ALLOCATED_CONTEXT_WINDOW_PCT,
|
|
30
|
+
TOOL_MAX_ALLOCATED_CONTEXT_WINDOW_TOKENS,
|
|
21
31
|
)
|
|
22
32
|
from holmes.core.supabase_dal import SupabaseDal
|
|
23
33
|
from holmes.utils.env import environ_get_safe_int, replace_env_vars_values
|
|
@@ -33,9 +43,50 @@ MODEL_LIST_FILE_LOCATION = os.environ.get(
|
|
|
33
43
|
|
|
34
44
|
OVERRIDE_MAX_OUTPUT_TOKEN = environ_get_safe_int("OVERRIDE_MAX_OUTPUT_TOKEN")
|
|
35
45
|
OVERRIDE_MAX_CONTENT_SIZE = environ_get_safe_int("OVERRIDE_MAX_CONTENT_SIZE")
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def get_context_window_compaction_threshold_pct() -> int:
|
|
49
|
+
"""Get the compaction threshold percentage at runtime to support test overrides."""
|
|
50
|
+
return environ_get_safe_int("CONTEXT_WINDOW_COMPACTION_THRESHOLD_PCT", default="95")
|
|
51
|
+
|
|
52
|
+
|
|
36
53
|
ROBUSTA_AI_MODEL_NAME = "Robusta"
|
|
37
54
|
|
|
38
55
|
|
|
56
|
+
class TokenCountMetadata(BaseModel):
|
|
57
|
+
total_tokens: int
|
|
58
|
+
tools_tokens: int
|
|
59
|
+
system_tokens: int
|
|
60
|
+
user_tokens: int
|
|
61
|
+
tools_to_call_tokens: int
|
|
62
|
+
assistant_tokens: int
|
|
63
|
+
other_tokens: int
|
|
64
|
+
|
|
65
|
+
|
|
66
|
+
class ModelEntry(BaseModel):
|
|
67
|
+
"""ModelEntry represents a single LLM model configuration."""
|
|
68
|
+
|
|
69
|
+
model: str
|
|
70
|
+
# TODO: the name field seems to be redundant, can we remove it?
|
|
71
|
+
name: Optional[str] = None
|
|
72
|
+
api_key: Optional[SecretStr] = None
|
|
73
|
+
base_url: Optional[str] = None
|
|
74
|
+
is_robusta_model: Optional[bool] = None
|
|
75
|
+
custom_args: Optional[Dict[str, Any]] = None
|
|
76
|
+
|
|
77
|
+
# LLM configurations used services like Azure OpenAI Service
|
|
78
|
+
api_base: Optional[str] = None
|
|
79
|
+
api_version: Optional[str] = None
|
|
80
|
+
|
|
81
|
+
model_config = ConfigDict(
|
|
82
|
+
extra="allow",
|
|
83
|
+
)
|
|
84
|
+
|
|
85
|
+
@classmethod
|
|
86
|
+
def load_from_dict(cls, data: dict) -> Self:
|
|
87
|
+
return cls.model_validate(data)
|
|
88
|
+
|
|
89
|
+
|
|
39
90
|
class LLM:
|
|
40
91
|
@abstractmethod
|
|
41
92
|
def __init__(self):
|
|
@@ -49,8 +100,23 @@ class LLM:
|
|
|
49
100
|
def get_maximum_output_token(self) -> int:
|
|
50
101
|
pass
|
|
51
102
|
|
|
103
|
+
def get_max_token_count_for_single_tool(self) -> int:
|
|
104
|
+
if (
|
|
105
|
+
0 < TOOL_MAX_ALLOCATED_CONTEXT_WINDOW_PCT
|
|
106
|
+
and TOOL_MAX_ALLOCATED_CONTEXT_WINDOW_PCT <= 100
|
|
107
|
+
):
|
|
108
|
+
context_window_size = self.get_context_window_size()
|
|
109
|
+
calculated_max_tokens = int(
|
|
110
|
+
context_window_size * TOOL_MAX_ALLOCATED_CONTEXT_WINDOW_PCT // 100
|
|
111
|
+
)
|
|
112
|
+
return min(calculated_max_tokens, TOOL_MAX_ALLOCATED_CONTEXT_WINDOW_TOKENS)
|
|
113
|
+
else:
|
|
114
|
+
return TOOL_MAX_ALLOCATED_CONTEXT_WINDOW_TOKENS
|
|
115
|
+
|
|
52
116
|
@abstractmethod
|
|
53
|
-
def
|
|
117
|
+
def count_tokens(
|
|
118
|
+
self, messages: list[dict], tools: Optional[list[dict[str, Any]]] = None
|
|
119
|
+
) -> TokenCountMetadata:
|
|
54
120
|
pass
|
|
55
121
|
|
|
56
122
|
@abstractmethod
|
|
@@ -73,6 +139,7 @@ class DefaultLLM(LLM):
|
|
|
73
139
|
api_base: Optional[str]
|
|
74
140
|
api_version: Optional[str]
|
|
75
141
|
args: Dict
|
|
142
|
+
is_robusta_model: bool
|
|
76
143
|
|
|
77
144
|
def __init__(
|
|
78
145
|
self,
|
|
@@ -83,6 +150,7 @@ class DefaultLLM(LLM):
|
|
|
83
150
|
args: Optional[Dict] = None,
|
|
84
151
|
tracer: Optional[Any] = None,
|
|
85
152
|
name: Optional[str] = None,
|
|
153
|
+
is_robusta_model: bool = False,
|
|
86
154
|
):
|
|
87
155
|
self.model = model
|
|
88
156
|
self.api_key = api_key
|
|
@@ -91,8 +159,11 @@ class DefaultLLM(LLM):
|
|
|
91
159
|
self.args = args or {}
|
|
92
160
|
self.tracer = tracer
|
|
93
161
|
self.name = name
|
|
162
|
+
self.is_robusta_model = is_robusta_model
|
|
94
163
|
self.update_custom_args()
|
|
95
|
-
self.check_llm(
|
|
164
|
+
self.check_llm(
|
|
165
|
+
self.model, self.api_key, self.api_base, self.api_version, self.args
|
|
166
|
+
)
|
|
96
167
|
|
|
97
168
|
def update_custom_args(self):
|
|
98
169
|
self.max_context_size = self.args.get("custom_args", {}).get("max_context_size")
|
|
@@ -104,7 +175,14 @@ class DefaultLLM(LLM):
|
|
|
104
175
|
api_key: Optional[str],
|
|
105
176
|
api_base: Optional[str],
|
|
106
177
|
api_version: Optional[str],
|
|
178
|
+
args: Optional[dict] = None,
|
|
107
179
|
):
|
|
180
|
+
if self.is_robusta_model:
|
|
181
|
+
# The model is assumed correctly configured if it is a robusta model
|
|
182
|
+
# For robusta models, this code would fail because Holmes has no knowledge of the API keys
|
|
183
|
+
# to azure or bedrock as all completion API calls go through robusta's LLM proxy
|
|
184
|
+
return
|
|
185
|
+
args = args or {}
|
|
108
186
|
logging.debug(f"Checking LiteLLM model {model}")
|
|
109
187
|
lookup = litellm.get_llm_provider(model)
|
|
110
188
|
if not lookup:
|
|
@@ -140,10 +218,17 @@ class DefaultLLM(LLM):
|
|
|
140
218
|
"environment variable for proper functionality. For more information, refer to the documentation: "
|
|
141
219
|
"https://docs.litellm.ai/docs/providers/watsonx#usage---models-in-deployment-spaces"
|
|
142
220
|
)
|
|
143
|
-
elif provider == "bedrock"
|
|
144
|
-
os.environ.get("AWS_PROFILE") or os.environ.get(
|
|
145
|
-
|
|
146
|
-
|
|
221
|
+
elif provider == "bedrock":
|
|
222
|
+
if os.environ.get("AWS_PROFILE") or os.environ.get(
|
|
223
|
+
"AWS_BEARER_TOKEN_BEDROCK"
|
|
224
|
+
):
|
|
225
|
+
model_requirements = {"keys_in_environment": True, "missing_keys": []}
|
|
226
|
+
elif args.get("aws_access_key_id") and args.get("aws_secret_access_key"):
|
|
227
|
+
return # break fast.
|
|
228
|
+
else:
|
|
229
|
+
model_requirements = litellm.validate_environment(
|
|
230
|
+
model=model, api_key=api_key, api_base=api_base
|
|
231
|
+
)
|
|
147
232
|
else:
|
|
148
233
|
model_requirements = litellm.validate_environment(
|
|
149
234
|
model=model, api_key=api_key, api_base=api_base
|
|
@@ -202,39 +287,82 @@ class DefaultLLM(LLM):
|
|
|
202
287
|
# Log which lookups we tried
|
|
203
288
|
logging.warning(
|
|
204
289
|
f"Couldn't find model {self.model} in litellm's model list (tried: {', '.join(self._get_model_name_variants_for_lookup())}), "
|
|
205
|
-
f"using default
|
|
290
|
+
f"using default {FALLBACK_CONTEXT_WINDOW_SIZE} tokens for max_input_tokens. "
|
|
206
291
|
f"To override, set OVERRIDE_MAX_CONTENT_SIZE environment variable to the correct value for your model."
|
|
207
292
|
)
|
|
208
|
-
return
|
|
293
|
+
return FALLBACK_CONTEXT_WINDOW_SIZE
|
|
209
294
|
|
|
210
295
|
@sentry_sdk.trace
|
|
211
|
-
def
|
|
212
|
-
|
|
296
|
+
def count_tokens(
|
|
297
|
+
self, messages: list[dict], tools: Optional[list[dict[str, Any]]] = None
|
|
298
|
+
) -> TokenCountMetadata:
|
|
299
|
+
# TODO: Add a recount:bool flag to save time. When the flag is false, reuse 'message["token_count"]' for individual messages.
|
|
300
|
+
# It's only necessary to recount message tokens at the beginning of a session because the LLM model may have changed.
|
|
301
|
+
# Changing the model requires recounting tokens because the tokenizer may be different
|
|
302
|
+
total_tokens = 0
|
|
303
|
+
tools_tokens = 0
|
|
304
|
+
system_tokens = 0
|
|
305
|
+
assistant_tokens = 0
|
|
306
|
+
user_tokens = 0
|
|
307
|
+
other_tokens = 0
|
|
308
|
+
tools_to_call_tokens = 0
|
|
213
309
|
for message in messages:
|
|
214
|
-
|
|
215
|
-
|
|
310
|
+
# count message tokens individually because it gives us fine grain information about each tool call/message etc.
|
|
311
|
+
# However be aware that the sum of individual message tokens is not equal to the overall messages token
|
|
312
|
+
token_count = litellm.token_counter( # type: ignore
|
|
313
|
+
model=self.model, messages=[message]
|
|
314
|
+
)
|
|
315
|
+
message["token_count"] = token_count
|
|
316
|
+
role = message.get("role")
|
|
317
|
+
if role == "system":
|
|
318
|
+
system_tokens += token_count
|
|
319
|
+
elif role == "user":
|
|
320
|
+
user_tokens += token_count
|
|
321
|
+
elif role == "tool":
|
|
322
|
+
tools_tokens += token_count
|
|
323
|
+
elif role == "assistant":
|
|
324
|
+
assistant_tokens += token_count
|
|
216
325
|
else:
|
|
217
|
-
#
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
326
|
+
# although this should not be needed,
|
|
327
|
+
# it is defensive code so that all tokens are accounted for
|
|
328
|
+
# and can potentially make debugging easier
|
|
329
|
+
other_tokens += token_count
|
|
330
|
+
|
|
331
|
+
messages_token_count_without_tools = litellm.token_counter( # type: ignore
|
|
332
|
+
model=self.model, messages=messages
|
|
333
|
+
)
|
|
334
|
+
|
|
335
|
+
total_tokens = litellm.token_counter( # type: ignore
|
|
336
|
+
model=self.model,
|
|
337
|
+
messages=messages,
|
|
338
|
+
tools=tools, # type: ignore
|
|
339
|
+
)
|
|
340
|
+
tools_to_call_tokens = max(0, total_tokens - messages_token_count_without_tools)
|
|
341
|
+
|
|
342
|
+
return TokenCountMetadata(
|
|
343
|
+
total_tokens=total_tokens,
|
|
344
|
+
system_tokens=system_tokens,
|
|
345
|
+
user_tokens=user_tokens,
|
|
346
|
+
tools_tokens=tools_tokens,
|
|
347
|
+
tools_to_call_tokens=tools_to_call_tokens,
|
|
348
|
+
other_tokens=other_tokens,
|
|
349
|
+
assistant_tokens=assistant_tokens,
|
|
350
|
+
)
|
|
351
|
+
|
|
352
|
+
def get_litellm_corrected_name_for_robusta_ai(self) -> str:
|
|
353
|
+
if self.is_robusta_model:
|
|
354
|
+
# For robusta models, self.model is the underlying provider/model used by Robusta AI
|
|
355
|
+
# To avoid litellm modifying the API URL according to the provider, the provider name
|
|
356
|
+
# is replaced with 'openai/' just before doing a completion() call
|
|
357
|
+
# Cf. https://docs.litellm.ai/docs/providers/openai_compatible
|
|
358
|
+
split_model_name = self.model.split("/")
|
|
359
|
+
return (
|
|
360
|
+
split_model_name[0]
|
|
361
|
+
if len(split_model_name) == 1
|
|
362
|
+
else f"openai/{split_model_name[1]}"
|
|
363
|
+
)
|
|
364
|
+
else:
|
|
365
|
+
return self.model
|
|
238
366
|
|
|
239
367
|
def completion(
|
|
240
368
|
self,
|
|
@@ -256,6 +384,9 @@ class DefaultLLM(LLM):
|
|
|
256
384
|
if THINKING:
|
|
257
385
|
self.args.setdefault("thinking", json.loads(THINKING))
|
|
258
386
|
|
|
387
|
+
if EXTRA_HEADERS:
|
|
388
|
+
self.args.setdefault("extra_headers", json.loads(EXTRA_HEADERS))
|
|
389
|
+
|
|
259
390
|
if self.args.get("thinking", None):
|
|
260
391
|
litellm.modify_params = True
|
|
261
392
|
|
|
@@ -271,8 +402,10 @@ class DefaultLLM(LLM):
|
|
|
271
402
|
|
|
272
403
|
# Get the litellm module to use (wrapped or unwrapped)
|
|
273
404
|
litellm_to_use = self.tracer.wrap_llm(litellm) if self.tracer else litellm
|
|
405
|
+
|
|
406
|
+
litellm_model_name = self.get_litellm_corrected_name_for_robusta_ai()
|
|
274
407
|
result = litellm_to_use.completion(
|
|
275
|
-
model=
|
|
408
|
+
model=litellm_model_name,
|
|
276
409
|
api_key=self.api_key,
|
|
277
410
|
base_url=self.api_base,
|
|
278
411
|
api_version=self.api_version,
|
|
@@ -328,6 +461,12 @@ class DefaultLLM(LLM):
|
|
|
328
461
|
Add cache_control to the last non-user message for Anthropic prompt caching.
|
|
329
462
|
Removes any existing cache_control from previous messages to avoid accumulation.
|
|
330
463
|
"""
|
|
464
|
+
# Skip cache_control for VertexAI/Gemini models as they don't support it with tools
|
|
465
|
+
if self.model and (
|
|
466
|
+
"vertex" in self.model.lower() or "gemini" in self.model.lower()
|
|
467
|
+
):
|
|
468
|
+
return
|
|
469
|
+
|
|
331
470
|
# First, remove any existing cache_control from all messages
|
|
332
471
|
for msg in messages:
|
|
333
472
|
content = msg.get("content")
|
|
@@ -382,7 +521,7 @@ class DefaultLLM(LLM):
|
|
|
382
521
|
class LLMModelRegistry:
|
|
383
522
|
def __init__(self, config: "Config", dal: SupabaseDal) -> None:
|
|
384
523
|
self.config = config
|
|
385
|
-
self._llms: dict[str,
|
|
524
|
+
self._llms: dict[str, ModelEntry] = {}
|
|
386
525
|
self._default_robusta_model = None
|
|
387
526
|
self.dal = dal
|
|
388
527
|
|
|
@@ -404,6 +543,8 @@ class LLMModelRegistry:
|
|
|
404
543
|
model_name=self.config.model,
|
|
405
544
|
base_url=self.config.api_base,
|
|
406
545
|
is_robusta_model=False,
|
|
546
|
+
api_key=self.config.api_key,
|
|
547
|
+
api_version=self.config.api_version,
|
|
407
548
|
)
|
|
408
549
|
|
|
409
550
|
def _should_load_config_model(self) -> bool:
|
|
@@ -414,7 +555,7 @@ class LLMModelRegistry:
|
|
|
414
555
|
# so we need to check if the user has set an OPENAI_API_KEY to load the config model.
|
|
415
556
|
has_openai_key = os.environ.get("OPENAI_API_KEY")
|
|
416
557
|
if has_openai_key:
|
|
417
|
-
self.config.model = "gpt-
|
|
558
|
+
self.config.model = "gpt-4.1"
|
|
418
559
|
return True
|
|
419
560
|
|
|
420
561
|
return False
|
|
@@ -430,6 +571,7 @@ class LLMModelRegistry:
|
|
|
430
571
|
return
|
|
431
572
|
|
|
432
573
|
account_id, token = self.dal.get_ai_credentials()
|
|
574
|
+
|
|
433
575
|
robusta_models: RobustaModelsResponse | None = fetch_robusta_models(
|
|
434
576
|
account_id, token
|
|
435
577
|
)
|
|
@@ -437,16 +579,18 @@ class LLMModelRegistry:
|
|
|
437
579
|
self._load_default_robusta_config()
|
|
438
580
|
return
|
|
439
581
|
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
self._llms[
|
|
444
|
-
|
|
445
|
-
if robusta_models.default_model:
|
|
446
|
-
logging.info(
|
|
447
|
-
f"Setting default Robusta AI model to: {robusta_models.default_model}"
|
|
582
|
+
default_model = None
|
|
583
|
+
for model_name, model_data in robusta_models.models.items():
|
|
584
|
+
logging.info(f"Loading Robusta AI model: {model_name}")
|
|
585
|
+
self._llms[model_name] = self._create_robusta_model_entry(
|
|
586
|
+
model_name=model_name, model_data=model_data
|
|
448
587
|
)
|
|
449
|
-
|
|
588
|
+
if model_data.is_default:
|
|
589
|
+
default_model = model_name
|
|
590
|
+
|
|
591
|
+
if default_model:
|
|
592
|
+
logging.info(f"Setting default Robusta AI model to: {default_model}")
|
|
593
|
+
self._default_robusta_model: str = default_model # type: ignore
|
|
450
594
|
|
|
451
595
|
except Exception:
|
|
452
596
|
logging.exception("Failed to get all robusta models")
|
|
@@ -456,12 +600,12 @@ class LLMModelRegistry:
|
|
|
456
600
|
def _load_default_robusta_config(self):
|
|
457
601
|
if self._should_load_robusta_ai():
|
|
458
602
|
logging.info("Loading default Robusta AI model")
|
|
459
|
-
self._llms[ROBUSTA_AI_MODEL_NAME] =
|
|
460
|
-
|
|
461
|
-
"
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
603
|
+
self._llms[ROBUSTA_AI_MODEL_NAME] = ModelEntry(
|
|
604
|
+
name=ROBUSTA_AI_MODEL_NAME,
|
|
605
|
+
model="gpt-4o", # TODO: tech debt, this isn't really
|
|
606
|
+
base_url=ROBUSTA_API_ENDPOINT,
|
|
607
|
+
is_robusta_model=True,
|
|
608
|
+
)
|
|
465
609
|
self._default_robusta_model = ROBUSTA_AI_MODEL_NAME
|
|
466
610
|
|
|
467
611
|
def _should_load_robusta_ai(self) -> bool:
|
|
@@ -483,7 +627,7 @@ class LLMModelRegistry:
|
|
|
483
627
|
|
|
484
628
|
return True
|
|
485
629
|
|
|
486
|
-
def get_model_params(self, model_key: Optional[str] = None) ->
|
|
630
|
+
def get_model_params(self, model_key: Optional[str] = None) -> ModelEntry:
|
|
487
631
|
if not self._llms:
|
|
488
632
|
raise Exception("No llm models were loaded")
|
|
489
633
|
|
|
@@ -515,26 +659,30 @@ class LLMModelRegistry:
|
|
|
515
659
|
return self._llms[name] # type: ignore
|
|
516
660
|
|
|
517
661
|
@property
|
|
518
|
-
def models(self) -> dict[str,
|
|
662
|
+
def models(self) -> dict[str, ModelEntry]:
|
|
519
663
|
return self._llms
|
|
520
664
|
|
|
521
|
-
def _parse_models_file(self, path: str):
|
|
665
|
+
def _parse_models_file(self, path: str) -> dict[str, ModelEntry]:
|
|
522
666
|
models = load_yaml_file(path, raise_error=False, warn_not_found=False)
|
|
523
667
|
for _, params in models.items():
|
|
524
668
|
params = replace_env_vars_values(params)
|
|
525
669
|
|
|
526
|
-
|
|
670
|
+
llms = {}
|
|
671
|
+
for model_name, params in models.items():
|
|
672
|
+
llms[model_name] = ModelEntry.model_validate(params)
|
|
673
|
+
|
|
674
|
+
return llms
|
|
527
675
|
|
|
528
676
|
def _create_robusta_model_entry(
|
|
529
|
-
self, model_name: str,
|
|
530
|
-
) ->
|
|
677
|
+
self, model_name: str, model_data: RobustaModel
|
|
678
|
+
) -> ModelEntry:
|
|
531
679
|
entry = self._create_model_entry(
|
|
532
|
-
model=
|
|
680
|
+
model=model_data.model,
|
|
533
681
|
model_name=model_name,
|
|
534
682
|
base_url=f"{ROBUSTA_API_ENDPOINT}/llm/{model_name}",
|
|
535
683
|
is_robusta_model=True,
|
|
536
684
|
)
|
|
537
|
-
entry
|
|
685
|
+
entry.custom_args = model_data.holmes_args or {} # type: ignore[assignment]
|
|
538
686
|
return entry
|
|
539
687
|
|
|
540
688
|
def _create_model_entry(
|
|
@@ -543,13 +691,19 @@ class LLMModelRegistry:
|
|
|
543
691
|
model_name: str,
|
|
544
692
|
base_url: Optional[str] = None,
|
|
545
693
|
is_robusta_model: Optional[bool] = None,
|
|
546
|
-
|
|
547
|
-
|
|
548
|
-
|
|
549
|
-
|
|
550
|
-
|
|
551
|
-
|
|
552
|
-
|
|
694
|
+
api_key: Optional[SecretStr] = None,
|
|
695
|
+
api_base: Optional[str] = None,
|
|
696
|
+
api_version: Optional[str] = None,
|
|
697
|
+
) -> ModelEntry:
|
|
698
|
+
return ModelEntry(
|
|
699
|
+
name=model_name,
|
|
700
|
+
model=model,
|
|
701
|
+
base_url=base_url,
|
|
702
|
+
is_robusta_model=is_robusta_model,
|
|
703
|
+
api_key=api_key,
|
|
704
|
+
api_base=api_base,
|
|
705
|
+
api_version=api_version,
|
|
706
|
+
)
|
|
553
707
|
|
|
554
708
|
|
|
555
709
|
def get_llm_usage(
|
holmes/core/openai_formatting.py
CHANGED
|
@@ -80,6 +80,19 @@ def format_tool_to_open_ai_standard(
|
|
|
80
80
|
)
|
|
81
81
|
if param_attributes.description is not None:
|
|
82
82
|
tool_properties[param_name]["description"] = param_attributes.description
|
|
83
|
+
# Add enum constraint if specified
|
|
84
|
+
if hasattr(param_attributes, "enum") and param_attributes.enum:
|
|
85
|
+
enum_values = list(
|
|
86
|
+
param_attributes.enum
|
|
87
|
+
) # Create a copy to avoid modifying original
|
|
88
|
+
# In strict mode, optional parameters need None in their enum to match the type allowing null
|
|
89
|
+
if (
|
|
90
|
+
strict_mode
|
|
91
|
+
and not param_attributes.required
|
|
92
|
+
and None not in enum_values
|
|
93
|
+
):
|
|
94
|
+
enum_values.append(None)
|
|
95
|
+
tool_properties[param_name]["enum"] = enum_values
|
|
83
96
|
|
|
84
97
|
result: dict[str, Any] = {
|
|
85
98
|
"type": "function",
|
holmes/core/supabase_dal.py
CHANGED
|
@@ -237,70 +237,61 @@ class SupabaseDal:
|
|
|
237
237
|
logging.exception("Supabase error while retrieving efficiency data")
|
|
238
238
|
return None
|
|
239
239
|
|
|
240
|
-
def
|
|
241
|
-
self,
|
|
240
|
+
def get_configuration_changes_metadata(
|
|
241
|
+
self,
|
|
242
|
+
start_datetime: str,
|
|
243
|
+
end_datetime: str,
|
|
244
|
+
limit: int = 100,
|
|
245
|
+
workload: Optional[str] = None,
|
|
246
|
+
ns: Optional[str] = None,
|
|
242
247
|
) -> Optional[List[Dict]]:
|
|
243
248
|
if not self.enabled:
|
|
244
249
|
return []
|
|
245
250
|
|
|
246
251
|
try:
|
|
247
|
-
|
|
252
|
+
query = (
|
|
248
253
|
self.client.table(ISSUES_TABLE)
|
|
249
|
-
.select(
|
|
254
|
+
.select(
|
|
255
|
+
"id",
|
|
256
|
+
"title",
|
|
257
|
+
"subject_name",
|
|
258
|
+
"subject_namespace",
|
|
259
|
+
"subject_type",
|
|
260
|
+
"description",
|
|
261
|
+
"starts_at",
|
|
262
|
+
"ends_at",
|
|
263
|
+
)
|
|
250
264
|
.eq("account_id", self.account_id)
|
|
251
265
|
.eq("cluster", self.cluster)
|
|
252
266
|
.eq("finding_type", "configuration_change")
|
|
253
267
|
.gte("creation_date", start_datetime)
|
|
254
268
|
.lte("creation_date", end_datetime)
|
|
255
|
-
.
|
|
269
|
+
.limit(limit)
|
|
256
270
|
)
|
|
257
|
-
if not len(changes_response.data):
|
|
258
|
-
return None
|
|
259
271
|
|
|
260
|
-
|
|
261
|
-
|
|
262
|
-
|
|
272
|
+
if workload:
|
|
273
|
+
query.eq("subject_name", workload)
|
|
274
|
+
if ns:
|
|
275
|
+
query.eq("subject_namespace", ns)
|
|
263
276
|
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
change_data_response = (
|
|
267
|
-
self.client.table(EVIDENCE_TABLE)
|
|
268
|
-
.select("*")
|
|
269
|
-
.eq("account_id", self.account_id)
|
|
270
|
-
.in_("issue_id", changes_ids)
|
|
271
|
-
.not_.in_("enrichment_type", ENRICHMENT_BLACKLIST)
|
|
272
|
-
.execute()
|
|
273
|
-
)
|
|
274
|
-
if not len(change_data_response.data):
|
|
277
|
+
res = query.execute()
|
|
278
|
+
if not res.data:
|
|
275
279
|
return None
|
|
276
280
|
|
|
277
|
-
truncate_evidences_entities_if_necessary(change_data_response.data)
|
|
278
|
-
|
|
279
281
|
except Exception:
|
|
280
|
-
logging.exception("Supabase error while retrieving change
|
|
282
|
+
logging.exception("Supabase error while retrieving change data")
|
|
281
283
|
return None
|
|
282
284
|
|
|
283
|
-
changes_data = []
|
|
284
|
-
change_data_map = {
|
|
285
|
-
change["issue_id"]: change for change in change_data_response.data
|
|
286
|
-
}
|
|
287
|
-
|
|
288
|
-
for change in changes_response.data:
|
|
289
|
-
change_content = change_data_map.get(change["id"])
|
|
290
|
-
if change_content:
|
|
291
|
-
changes_data.append(
|
|
292
|
-
{
|
|
293
|
-
"change": change_content["data"],
|
|
294
|
-
"evidence_id": change_content["id"],
|
|
295
|
-
**change,
|
|
296
|
-
}
|
|
297
|
-
)
|
|
298
|
-
|
|
299
285
|
logging.debug(
|
|
300
|
-
"Change history for %s-%s: %s",
|
|
286
|
+
"Change history metadata for %s-%s workload %s in ns %s: %s",
|
|
287
|
+
start_datetime,
|
|
288
|
+
end_datetime,
|
|
289
|
+
workload,
|
|
290
|
+
ns,
|
|
291
|
+
res.data,
|
|
301
292
|
)
|
|
302
293
|
|
|
303
|
-
return
|
|
294
|
+
return res.data
|
|
304
295
|
|
|
305
296
|
def unzip_evidence_file(self, data):
|
|
306
297
|
try:
|