holmesgpt 0.13.3a0__py3-none-any.whl → 0.14.1a0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of holmesgpt might be problematic. Click here for more details.

Files changed (82) hide show
  1. holmes/__init__.py +1 -1
  2. holmes/clients/robusta_client.py +10 -2
  3. holmes/common/env_vars.py +8 -1
  4. holmes/config.py +66 -139
  5. holmes/core/investigation.py +1 -2
  6. holmes/core/llm.py +256 -51
  7. holmes/core/models.py +2 -0
  8. holmes/core/safeguards.py +4 -4
  9. holmes/core/supabase_dal.py +14 -8
  10. holmes/core/tool_calling_llm.py +193 -176
  11. holmes/core/tools.py +260 -25
  12. holmes/core/tools_utils/data_types.py +81 -0
  13. holmes/core/tools_utils/tool_context_window_limiter.py +33 -0
  14. holmes/core/tools_utils/tool_executor.py +2 -2
  15. holmes/core/toolset_manager.py +150 -3
  16. holmes/core/tracing.py +6 -1
  17. holmes/core/transformers/__init__.py +23 -0
  18. holmes/core/transformers/base.py +62 -0
  19. holmes/core/transformers/llm_summarize.py +174 -0
  20. holmes/core/transformers/registry.py +122 -0
  21. holmes/core/transformers/transformer.py +31 -0
  22. holmes/main.py +5 -0
  23. holmes/plugins/toolsets/aks-node-health.yaml +46 -0
  24. holmes/plugins/toolsets/aks.yaml +64 -0
  25. holmes/plugins/toolsets/atlas_mongodb/mongodb_atlas.py +17 -15
  26. holmes/plugins/toolsets/azure_sql/tools/analyze_connection_failures.py +8 -4
  27. holmes/plugins/toolsets/azure_sql/tools/analyze_database_connections.py +7 -3
  28. holmes/plugins/toolsets/azure_sql/tools/analyze_database_health_status.py +3 -3
  29. holmes/plugins/toolsets/azure_sql/tools/analyze_database_performance.py +3 -3
  30. holmes/plugins/toolsets/azure_sql/tools/analyze_database_storage.py +7 -3
  31. holmes/plugins/toolsets/azure_sql/tools/get_active_alerts.py +4 -4
  32. holmes/plugins/toolsets/azure_sql/tools/get_slow_queries.py +7 -3
  33. holmes/plugins/toolsets/azure_sql/tools/get_top_cpu_queries.py +7 -3
  34. holmes/plugins/toolsets/azure_sql/tools/get_top_data_io_queries.py +7 -3
  35. holmes/plugins/toolsets/azure_sql/tools/get_top_log_io_queries.py +7 -3
  36. holmes/plugins/toolsets/bash/bash_toolset.py +6 -6
  37. holmes/plugins/toolsets/bash/common/bash.py +7 -7
  38. holmes/plugins/toolsets/coralogix/toolset_coralogix_logs.py +5 -3
  39. holmes/plugins/toolsets/datadog/toolset_datadog_general.py +16 -17
  40. holmes/plugins/toolsets/datadog/toolset_datadog_logs.py +9 -10
  41. holmes/plugins/toolsets/datadog/toolset_datadog_metrics.py +21 -22
  42. holmes/plugins/toolsets/datadog/toolset_datadog_rds.py +8 -8
  43. holmes/plugins/toolsets/datadog/toolset_datadog_traces.py +18 -19
  44. holmes/plugins/toolsets/git.py +22 -22
  45. holmes/plugins/toolsets/grafana/common.py +14 -2
  46. holmes/plugins/toolsets/grafana/grafana_tempo_api.py +473 -0
  47. holmes/plugins/toolsets/grafana/toolset_grafana.py +4 -4
  48. holmes/plugins/toolsets/grafana/toolset_grafana_loki.py +3 -3
  49. holmes/plugins/toolsets/grafana/toolset_grafana_tempo.jinja2 +246 -11
  50. holmes/plugins/toolsets/grafana/toolset_grafana_tempo.py +662 -290
  51. holmes/plugins/toolsets/grafana/trace_parser.py +1 -1
  52. holmes/plugins/toolsets/internet/internet.py +3 -3
  53. holmes/plugins/toolsets/internet/notion.py +3 -3
  54. holmes/plugins/toolsets/investigator/core_investigation.py +3 -3
  55. holmes/plugins/toolsets/kafka.py +18 -18
  56. holmes/plugins/toolsets/kubernetes.yaml +58 -0
  57. holmes/plugins/toolsets/kubernetes_logs.py +6 -6
  58. holmes/plugins/toolsets/kubernetes_logs.yaml +32 -0
  59. holmes/plugins/toolsets/mcp/toolset_mcp.py +4 -4
  60. holmes/plugins/toolsets/newrelic.py +8 -8
  61. holmes/plugins/toolsets/opensearch/opensearch.py +5 -5
  62. holmes/plugins/toolsets/opensearch/opensearch_logs.py +7 -7
  63. holmes/plugins/toolsets/opensearch/opensearch_traces.py +10 -10
  64. holmes/plugins/toolsets/prometheus/prometheus.py +172 -39
  65. holmes/plugins/toolsets/prometheus/prometheus_instructions.jinja2 +25 -0
  66. holmes/plugins/toolsets/prometheus/utils.py +28 -0
  67. holmes/plugins/toolsets/rabbitmq/toolset_rabbitmq.py +6 -4
  68. holmes/plugins/toolsets/robusta/robusta.py +10 -10
  69. holmes/plugins/toolsets/runbook/runbook_fetcher.py +4 -4
  70. holmes/plugins/toolsets/servicenow/servicenow.py +6 -6
  71. holmes/plugins/toolsets/utils.py +88 -0
  72. holmes/utils/config_utils.py +91 -0
  73. holmes/utils/env.py +7 -0
  74. holmes/utils/holmes_status.py +2 -1
  75. holmes/utils/sentry_helper.py +41 -0
  76. holmes/utils/stream.py +9 -0
  77. {holmesgpt-0.13.3a0.dist-info → holmesgpt-0.14.1a0.dist-info}/METADATA +10 -14
  78. {holmesgpt-0.13.3a0.dist-info → holmesgpt-0.14.1a0.dist-info}/RECORD +81 -71
  79. holmes/plugins/toolsets/grafana/tempo_api.py +0 -124
  80. {holmesgpt-0.13.3a0.dist-info → holmesgpt-0.14.1a0.dist-info}/LICENSE.txt +0 -0
  81. {holmesgpt-0.13.3a0.dist-info → holmesgpt-0.14.1a0.dist-info}/WHEEL +0 -0
  82. {holmesgpt-0.13.3a0.dist-info → holmesgpt-0.14.1a0.dist-info}/entry_points.txt +0 -0
holmes/core/llm.py CHANGED
@@ -1,7 +1,7 @@
1
1
  import json
2
2
  import logging
3
3
  from abc import abstractmethod
4
- from typing import Any, Dict, List, Optional, Type, Union
4
+ from typing import Any, Dict, List, Optional, Type, Union, TYPE_CHECKING
5
5
 
6
6
  from litellm.types.utils import ModelResponse
7
7
  import sentry_sdk
@@ -10,21 +10,29 @@ from litellm.litellm_core_utils.streaming_handler import CustomStreamWrapper
10
10
  from pydantic import BaseModel
11
11
  import litellm
12
12
  import os
13
+ from holmes.clients.robusta_client import RobustaModelsResponse, fetch_robusta_models
13
14
  from holmes.common.env_vars import (
15
+ LOAD_ALL_ROBUSTA_MODELS,
14
16
  REASONING_EFFORT,
17
+ ROBUSTA_AI,
18
+ ROBUSTA_API_ENDPOINT,
15
19
  THINKING,
16
20
  )
21
+ from holmes.core.supabase_dal import SupabaseDal
22
+ from holmes.utils.env import environ_get_safe_int, replace_env_vars_values
23
+ from holmes.utils.file_utils import load_yaml_file
17
24
 
25
+ if TYPE_CHECKING:
26
+ from holmes.config import Config
18
27
 
19
- def environ_get_safe_int(env_var, default="0"):
20
- try:
21
- return max(int(os.environ.get(env_var, default)), 0)
22
- except ValueError:
23
- return int(default)
28
+ MODEL_LIST_FILE_LOCATION = os.environ.get(
29
+ "MODEL_LIST_FILE_LOCATION", "/etc/holmes/config/model_list.yaml"
30
+ )
24
31
 
25
32
 
26
33
  OVERRIDE_MAX_OUTPUT_TOKEN = environ_get_safe_int("OVERRIDE_MAX_OUTPUT_TOKEN")
27
34
  OVERRIDE_MAX_CONTENT_SIZE = environ_get_safe_int("OVERRIDE_MAX_CONTENT_SIZE")
35
+ ROBUSTA_AI_MODEL_NAME = "Robusta"
28
36
 
29
37
 
30
38
  class LLM:
@@ -61,31 +69,39 @@ class LLM:
61
69
  class DefaultLLM(LLM):
62
70
  model: str
63
71
  api_key: Optional[str]
64
- base_url: Optional[str]
72
+ api_base: Optional[str]
73
+ api_version: Optional[str]
65
74
  args: Dict
66
75
 
67
76
  def __init__(
68
77
  self,
69
78
  model: str,
70
79
  api_key: Optional[str] = None,
80
+ api_base: Optional[str] = None,
81
+ api_version: Optional[str] = None,
71
82
  args: Optional[Dict] = None,
72
- tracer=None,
83
+ tracer: Optional[Any] = None,
84
+ name: Optional[str] = None,
73
85
  ):
74
86
  self.model = model
75
87
  self.api_key = api_key
88
+ self.api_base = api_base
89
+ self.api_version = api_version
76
90
  self.args = args or {}
77
91
  self.tracer = tracer
92
+ self.name = name
78
93
 
79
- if not self.args:
80
- self.check_llm(self.model, self.api_key)
94
+ self.check_llm(self.model, self.api_key, self.api_base, self.api_version)
81
95
 
82
- def check_llm(self, model: str, api_key: Optional[str]):
96
+ def check_llm(
97
+ self,
98
+ model: str,
99
+ api_key: Optional[str],
100
+ api_base: Optional[str],
101
+ api_version: Optional[str],
102
+ ):
83
103
  logging.debug(f"Checking LiteLLM model {model}")
84
- # TODO: this WAS a hack to get around the fact that we can't pass in an api key to litellm.validate_environment
85
- # so without this hack it always complains that the environment variable for the api key is missing
86
- # to fix that, we always set an api key in the standard format that litellm expects (which is ${PROVIDER}_API_KEY)
87
- # TODO: we can now handle this better - see https://github.com/BerriAI/litellm/issues/4375#issuecomment-2223684750
88
- lookup = litellm.get_llm_provider(self.model)
104
+ lookup = litellm.get_llm_provider(model)
89
105
  if not lookup:
90
106
  raise Exception(f"Unknown provider for model {model}")
91
107
  provider = lookup[1]
@@ -124,35 +140,42 @@ class DefaultLLM(LLM):
124
140
  ):
125
141
  model_requirements = {"keys_in_environment": True, "missing_keys": []}
126
142
  else:
127
- #
128
- api_key_env_var = f"{provider.upper()}_API_KEY"
129
- if api_key:
130
- os.environ[api_key_env_var] = api_key
131
- model_requirements = litellm.validate_environment(model=model)
143
+ model_requirements = litellm.validate_environment(
144
+ model=model, api_key=api_key, api_base=api_base
145
+ )
146
+ # validate_environment does not accept api_version, and as a special case for Azure OpenAI Service,
147
+ # when all the other AZURE environments are set expect AZURE_API_VERSION, validate_environment complains
148
+ # the missing of it even after the api_version is set.
149
+ # TODO: There's an open PR in litellm to accept api_version in validate_environment, we can leverage this
150
+ # change if accepted to ignore the following check.
151
+ # https://github.com/BerriAI/litellm/pull/13808
152
+ if (
153
+ provider == "azure"
154
+ and ["AZURE_API_VERSION"] == model_requirements["missing_keys"]
155
+ and api_version is not None
156
+ ):
157
+ model_requirements["missing_keys"] = []
158
+ model_requirements["keys_in_environment"] = True
132
159
 
133
160
  if not model_requirements["keys_in_environment"]:
134
161
  raise Exception(
135
162
  f"model {model} requires the following environment variables: {model_requirements['missing_keys']}"
136
163
  )
137
164
 
138
- def _strip_model_prefix(self) -> str:
165
+ def _get_model_name_variants_for_lookup(self) -> list[str]:
139
166
  """
140
- Helper function to strip 'openai/' prefix from model name if it exists.
141
- model cost is taken from here which does not have the openai prefix
142
- https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json
167
+ Generate model name variants to try when looking up in litellm.model_cost.
168
+ Returns a list of names to try in order: exact, lowercase, without prefix, etc.
143
169
  """
144
- model_name = self.model
145
- prefixes = ["openai/", "bedrock/", "vertex_ai/", "anthropic/"]
146
-
147
- for prefix in prefixes:
148
- if model_name.startswith(prefix):
149
- return model_name[len(prefix) :]
170
+ names_to_try = [self.model, self.model.lower()]
150
171
 
151
- return model_name
172
+ # If there's a prefix, also try without it
173
+ if "/" in self.model:
174
+ base_model = self.model.split("/", 1)[1]
175
+ names_to_try.extend([base_model, base_model.lower()])
152
176
 
153
- # this unfortunately does not seem to work for azure if the deployment name is not a well-known model name
154
- # if not litellm.supports_function_calling(model=model):
155
- # raise Exception(f"model {model} does not support function calling. You must use HolmesGPT with a model that supports function calling.")
177
+ # Remove duplicates while preserving order (dict.fromkeys maintains insertion order in Python 3.7+)
178
+ return list(dict.fromkeys(names_to_try))
156
179
 
157
180
  def get_context_window_size(self) -> int:
158
181
  if OVERRIDE_MAX_CONTENT_SIZE:
@@ -161,14 +184,20 @@ class DefaultLLM(LLM):
161
184
  )
162
185
  return OVERRIDE_MAX_CONTENT_SIZE
163
186
 
164
- model_name = os.environ.get("MODEL_TYPE", self._strip_model_prefix())
165
- try:
166
- return litellm.model_cost[model_name]["max_input_tokens"]
167
- except Exception:
168
- logging.warning(
169
- f"Couldn't find model's name {model_name} in litellm's model list, fallback to 128k tokens for max_input_tokens"
170
- )
171
- return 128000
187
+ # Try each name variant
188
+ for name in self._get_model_name_variants_for_lookup():
189
+ try:
190
+ return litellm.model_cost[name]["max_input_tokens"]
191
+ except Exception:
192
+ continue
193
+
194
+ # Log which lookups we tried
195
+ logging.warning(
196
+ f"Couldn't find model {self.model} in litellm's model list (tried: {', '.join(self._get_model_name_variants_for_lookup())}), "
197
+ f"using default 128k tokens for max_input_tokens. "
198
+ f"To override, set OVERRIDE_MAX_CONTENT_SIZE environment variable to the correct value for your model."
199
+ )
200
+ return 128000
172
201
 
173
202
  @sentry_sdk.trace
174
203
  def count_tokens_for_message(self, messages: list[dict]) -> int:
@@ -237,6 +266,8 @@ class DefaultLLM(LLM):
237
266
  result = litellm_to_use.completion(
238
267
  model=self.model,
239
268
  api_key=self.api_key,
269
+ base_url=self.api_base,
270
+ api_version=self.api_version,
240
271
  messages=messages,
241
272
  response_format=response_format,
242
273
  drop_params=drop_params,
@@ -260,14 +291,20 @@ class DefaultLLM(LLM):
260
291
  )
261
292
  return OVERRIDE_MAX_OUTPUT_TOKEN
262
293
 
263
- model_name = os.environ.get("MODEL_TYPE", self._strip_model_prefix())
264
- try:
265
- return litellm.model_cost[model_name]["max_output_tokens"]
266
- except Exception:
267
- logging.warning(
268
- f"Couldn't find model's name {model_name} in litellm's model list, fallback to 4096 tokens for max_output_tokens"
269
- )
270
- return 4096
294
+ # Try each name variant
295
+ for name in self._get_model_name_variants_for_lookup():
296
+ try:
297
+ return litellm.model_cost[name]["max_output_tokens"]
298
+ except Exception:
299
+ continue
300
+
301
+ # Log which lookups we tried
302
+ logging.warning(
303
+ f"Couldn't find model {self.model} in litellm's model list (tried: {', '.join(self._get_model_name_variants_for_lookup())}), "
304
+ f"using default 4096 tokens for max_output_tokens. "
305
+ f"To override, set OVERRIDE_MAX_OUTPUT_TOKEN environment variable to the correct value for your model."
306
+ )
307
+ return 4096
271
308
 
272
309
  def _add_cache_control_to_last_message(
273
310
  self, messages: List[Dict[str, Any]]
@@ -325,3 +362,171 @@ class DefaultLLM(LLM):
325
362
  logging.debug(
326
363
  f"Added cache_control to {target_msg.get('role')} message (structured content)"
327
364
  )
365
+
366
+
367
+ class LLMModelRegistry:
368
+ def __init__(self, config: "Config", dal: SupabaseDal) -> None:
369
+ self.config = config
370
+ self._llms: dict[str, dict[str, Any]] = {}
371
+ self._default_robusta_model = None
372
+ self.dal = dal
373
+
374
+ self._init_models()
375
+
376
+ @property
377
+ def default_robusta_model(self) -> Optional[str]:
378
+ return self._default_robusta_model
379
+
380
+ def _init_models(self):
381
+ self._llms = self._parse_models_file(MODEL_LIST_FILE_LOCATION)
382
+
383
+ if self._should_load_robusta_ai():
384
+ self.configure_robusta_ai_model()
385
+
386
+ if self._should_load_config_model():
387
+ self._llms[self.config.model] = self._create_model_entry(
388
+ model=self.config.model,
389
+ model_name=self.config.model,
390
+ base_url=self.config.api_base,
391
+ is_robusta_model=False,
392
+ )
393
+
394
+ def _should_load_config_model(self) -> bool:
395
+ if self.config.model is not None:
396
+ return True
397
+
398
+ # backward compatibility - in the past config.model was set by default to gpt-4o.
399
+ # so we need to check if the user has set an OPENAI_API_KEY to load the config model.
400
+ has_openai_key = os.environ.get("OPENAI_API_KEY")
401
+ if has_openai_key:
402
+ self.config.model = "gpt-4o"
403
+ return True
404
+
405
+ return False
406
+
407
+ def configure_robusta_ai_model(self) -> None:
408
+ try:
409
+ if not self.config.cluster_name or not LOAD_ALL_ROBUSTA_MODELS:
410
+ self._load_default_robusta_config()
411
+ return
412
+
413
+ if not self.dal.account_id or not self.dal.enabled:
414
+ self._load_default_robusta_config()
415
+ return
416
+
417
+ account_id, token = self.dal.get_ai_credentials()
418
+ robusta_models: RobustaModelsResponse | None = fetch_robusta_models(
419
+ account_id, token
420
+ )
421
+ if not robusta_models or not robusta_models.models:
422
+ self._load_default_robusta_config()
423
+ return
424
+
425
+ for model in robusta_models.models:
426
+ logging.info(f"Loading Robusta AI model: {model}")
427
+ self._llms[model] = self._create_robusta_model_entry(model)
428
+
429
+ if robusta_models.default_model:
430
+ logging.info(
431
+ f"Setting default Robusta AI model to: {robusta_models.default_model}"
432
+ )
433
+ self._default_robusta_model: str = robusta_models.default_model # type: ignore
434
+
435
+ except Exception:
436
+ logging.exception("Failed to get all robusta models")
437
+ # fallback to default behavior
438
+ self._load_default_robusta_config()
439
+
440
+ def _load_default_robusta_config(self):
441
+ if self._should_load_robusta_ai():
442
+ logging.info("Loading default Robusta AI model")
443
+ self._llms[ROBUSTA_AI_MODEL_NAME] = {
444
+ "name": ROBUSTA_AI_MODEL_NAME,
445
+ "base_url": ROBUSTA_API_ENDPOINT,
446
+ "is_robusta_model": True,
447
+ "model": "gpt-4o",
448
+ }
449
+ self._default_robusta_model = ROBUSTA_AI_MODEL_NAME
450
+
451
+ def _should_load_robusta_ai(self) -> bool:
452
+ if not self.config.should_try_robusta_ai:
453
+ return False
454
+
455
+ # ROBUSTA_AI were set in the env vars, so we can use it directly
456
+ if ROBUSTA_AI is not None:
457
+ return ROBUSTA_AI
458
+
459
+ # MODEL is set in the env vars, e.g. the user is using a custom model
460
+ # so we don't need to load the robusta AI model and keep the behavior backward compatible
461
+ if "MODEL" in os.environ:
462
+ return False
463
+
464
+ # if the user has provided a model list, we don't need to load the robusta AI model
465
+ if self._llms:
466
+ return False
467
+
468
+ return True
469
+
470
+ def get_model_params(self, model_key: Optional[str] = None) -> dict:
471
+ if not self._llms:
472
+ raise Exception("No llm models were loaded")
473
+
474
+ if model_key:
475
+ model_params = self._llms.get(model_key)
476
+ if model_params is not None:
477
+ logging.info(f"Using selected model: {model_key}")
478
+ return model_params.copy()
479
+
480
+ logging.error(f"Couldn't find model: {model_key} in model list")
481
+
482
+ if self._default_robusta_model:
483
+ model_params = self._llms.get(self._default_robusta_model)
484
+ if model_params is not None:
485
+ logging.info(
486
+ f"Using default Robusta AI model: {self._default_robusta_model}"
487
+ )
488
+ return model_params.copy()
489
+
490
+ logging.error(
491
+ f"Couldn't find default Robusta AI model: {self._default_robusta_model} in model list"
492
+ )
493
+
494
+ model_key, first_model_params = next(iter(self._llms.items()))
495
+ logging.info(f"Using first available model: {model_key}")
496
+ return first_model_params.copy()
497
+
498
+ def get_llm(self, name: str) -> LLM: # TODO: fix logic
499
+ return self._llms[name] # type: ignore
500
+
501
+ @property
502
+ def models(self) -> dict[str, dict[str, Any]]:
503
+ return self._llms
504
+
505
+ def _parse_models_file(self, path: str):
506
+ models = load_yaml_file(path, raise_error=False, warn_not_found=False)
507
+ for _, params in models.items():
508
+ params = replace_env_vars_values(params)
509
+
510
+ return models
511
+
512
+ def _create_robusta_model_entry(self, model_name: str) -> dict[str, Any]:
513
+ return self._create_model_entry(
514
+ model="gpt-4o", # Robusta AI model is using openai like API.
515
+ model_name=model_name,
516
+ base_url=f"{ROBUSTA_API_ENDPOINT}/llm/{model_name}",
517
+ is_robusta_model=True,
518
+ )
519
+
520
+ def _create_model_entry(
521
+ self,
522
+ model: str,
523
+ model_name: str,
524
+ base_url: Optional[str] = None,
525
+ is_robusta_model: Optional[bool] = None,
526
+ ) -> dict[str, Any]:
527
+ return {
528
+ "name": model_name,
529
+ "base_url": base_url,
530
+ "is_robusta_model": is_robusta_model,
531
+ "model": model,
532
+ }
holmes/core/models.py CHANGED
@@ -10,6 +10,7 @@ class InvestigationResult(BaseModel):
10
10
  sections: Optional[Dict[str, Union[str, None]]] = None
11
11
  tool_calls: List[ToolCallResult] = []
12
12
  instructions: List[str] = []
13
+ metadata: Optional[Dict[Any, Any]] = None
13
14
 
14
15
 
15
16
  class InvestigateRequest(BaseModel):
@@ -145,6 +146,7 @@ class ChatResponse(BaseModel):
145
146
  conversation_history: list[dict]
146
147
  tool_calls: Optional[List[ToolCallResult]] = []
147
148
  follow_up_actions: Optional[List[FollowUpAction]] = []
149
+ metadata: Optional[Dict[Any, Any]] = None
148
150
 
149
151
 
150
152
  class WorkloadHealthInvestigationResult(BaseModel):
holmes/core/safeguards.py CHANGED
@@ -5,7 +5,7 @@ from pydantic import ValidationError
5
5
 
6
6
  from holmes.common.env_vars import TOOL_CALL_SAFEGUARDS_ENABLED
7
7
  from holmes.plugins.toolsets.logging_utils.logging_api import POD_LOGGING_TOOL_NAME
8
- from holmes.core.tools import StructuredToolResult, ToolResultStatus
8
+ from holmes.core.tools import StructuredToolResult, StructuredToolResultStatus
9
9
  from holmes.plugins.toolsets.logging_utils.logging_api import FetchPodLogsParams
10
10
 
11
11
 
@@ -39,7 +39,7 @@ def _has_previous_unfiltered_pod_logs_call(
39
39
  result = tool_call.get("result", {})
40
40
  if (
41
41
  tool_call.get("tool_name") == POD_LOGGING_TOOL_NAME
42
- and result.get("status") == ToolResultStatus.NO_DATA
42
+ and result.get("status") == StructuredToolResultStatus.NO_DATA
43
43
  and result.get("params")
44
44
  ):
45
45
  params = FetchPodLogsParams(**result.get("params"))
@@ -94,7 +94,7 @@ def prevent_overly_repeated_tool_call(
94
94
  For example if Holmes checks if a resource is deployed, runs a command to deploy it and then checks again if it has deployed properly.
95
95
  """
96
96
  return StructuredToolResult(
97
- status=ToolResultStatus.ERROR,
97
+ status=StructuredToolResultStatus.ERROR,
98
98
  error=(
99
99
  "Refusing to run this tool call because it has already been called during this session with the exact same parameters.\n"
100
100
  "Move on with your investigation to a different tool or change the parameter values."
@@ -106,7 +106,7 @@ def prevent_overly_repeated_tool_call(
106
106
  tool_name=tool_name, tool_params=tool_params, tool_calls=tool_calls
107
107
  ):
108
108
  return StructuredToolResult(
109
- status=ToolResultStatus.ERROR,
109
+ status=StructuredToolResultStatus.ERROR,
110
110
  error=(
111
111
  f"Refusing to run this tool call because the exact same {POD_LOGGING_TOOL_NAME} tool call without filter has already run and returned no data.\n"
112
112
  "This tool call would also have returned no data.\n"
@@ -37,6 +37,7 @@ from holmes.utils.global_instructions import Instructions
37
37
  SUPABASE_TIMEOUT_SECONDS = int(os.getenv("SUPABASE_TIMEOUT_SECONDS", 3600))
38
38
 
39
39
  ISSUES_TABLE = "Issues"
40
+ GROUPED_ISSUES_TABLE = "GroupedIssues"
40
41
  EVIDENCE_TABLE = "Evidence"
41
42
  RUNBOOKS_TABLE = "HolmesRunbooks"
42
43
  SESSION_TOKENS_TABLE = "AuthTokens"
@@ -338,6 +339,14 @@ class SupabaseDal:
338
339
  data.extend(unzipped_files)
339
340
  return data
340
341
 
342
+ def get_issue_from_db(self, issue_id: str, table: str) -> Optional[Dict]:
343
+ issue_response = (
344
+ self.client.table(table).select("*").filter("id", "eq", issue_id).execute()
345
+ )
346
+ if len(issue_response.data):
347
+ return issue_response.data[0]
348
+ return None
349
+
341
350
  def get_issue_data(self, issue_id: Optional[str]) -> Optional[Dict]:
342
351
  # TODO this could be done in a single atomic SELECT, but there is no
343
352
  # foreign key relation between Issues and Evidence.
@@ -347,14 +356,11 @@ class SupabaseDal:
347
356
  return None
348
357
  issue_data = None
349
358
  try:
350
- issue_response = (
351
- self.client.table(ISSUES_TABLE)
352
- .select("*")
353
- .filter("id", "eq", issue_id)
354
- .execute()
355
- )
356
- if len(issue_response.data):
357
- issue_data = issue_response.data[0]
359
+ issue_data = self.get_issue_from_db(issue_id, ISSUES_TABLE)
360
+ if issue_data and issue_data["source"] == "prometheus":
361
+ logging.debug("Getting alert %s from GroupedIssuesTable", issue_id)
362
+ # This issue will have the complete alert duration information
363
+ issue_data = self.get_issue_from_db(issue_id, GROUPED_ISSUES_TABLE)
358
364
 
359
365
  except Exception: # e.g. invalid id format
360
366
  logging.exception("Supabase error while retrieving issue data")