holmesgpt 0.12.4__py3-none-any.whl → 0.13.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of holmesgpt might be problematic. Click here for more details.

Files changed (86) hide show
  1. holmes/__init__.py +1 -1
  2. holmes/clients/robusta_client.py +19 -1
  3. holmes/common/env_vars.py +13 -0
  4. holmes/config.py +69 -9
  5. holmes/core/conversations.py +11 -0
  6. holmes/core/investigation.py +16 -3
  7. holmes/core/investigation_structured_output.py +12 -0
  8. holmes/core/llm.py +10 -0
  9. holmes/core/models.py +9 -1
  10. holmes/core/openai_formatting.py +72 -12
  11. holmes/core/prompt.py +13 -0
  12. holmes/core/supabase_dal.py +3 -0
  13. holmes/core/todo_manager.py +88 -0
  14. holmes/core/tool_calling_llm.py +121 -149
  15. holmes/core/tools.py +10 -1
  16. holmes/core/tools_utils/tool_executor.py +7 -2
  17. holmes/core/tools_utils/toolset_utils.py +7 -2
  18. holmes/core/tracing.py +8 -7
  19. holmes/interactive.py +1 -0
  20. holmes/main.py +2 -1
  21. holmes/plugins/prompts/__init__.py +7 -1
  22. holmes/plugins/prompts/_ai_safety.jinja2 +43 -0
  23. holmes/plugins/prompts/_current_date_time.jinja2 +1 -0
  24. holmes/plugins/prompts/_default_log_prompt.jinja2 +4 -2
  25. holmes/plugins/prompts/_fetch_logs.jinja2 +6 -1
  26. holmes/plugins/prompts/_general_instructions.jinja2 +16 -0
  27. holmes/plugins/prompts/_permission_errors.jinja2 +1 -1
  28. holmes/plugins/prompts/_toolsets_instructions.jinja2 +4 -4
  29. holmes/plugins/prompts/generic_ask.jinja2 +4 -3
  30. holmes/plugins/prompts/investigation_procedure.jinja2 +210 -0
  31. holmes/plugins/prompts/kubernetes_workload_ask.jinja2 +4 -0
  32. holmes/plugins/toolsets/__init__.py +19 -6
  33. holmes/plugins/toolsets/atlas_mongodb/mongodb_atlas.py +27 -0
  34. holmes/plugins/toolsets/azure_sql/tools/analyze_connection_failures.py +2 -2
  35. holmes/plugins/toolsets/azure_sql/tools/analyze_database_connections.py +2 -1
  36. holmes/plugins/toolsets/azure_sql/tools/analyze_database_health_status.py +3 -1
  37. holmes/plugins/toolsets/azure_sql/tools/analyze_database_performance.py +2 -1
  38. holmes/plugins/toolsets/azure_sql/tools/analyze_database_storage.py +2 -1
  39. holmes/plugins/toolsets/azure_sql/tools/get_active_alerts.py +3 -1
  40. holmes/plugins/toolsets/azure_sql/tools/get_slow_queries.py +2 -1
  41. holmes/plugins/toolsets/azure_sql/tools/get_top_cpu_queries.py +2 -1
  42. holmes/plugins/toolsets/azure_sql/tools/get_top_data_io_queries.py +2 -1
  43. holmes/plugins/toolsets/azure_sql/tools/get_top_log_io_queries.py +2 -1
  44. holmes/plugins/toolsets/coralogix/api.py +6 -6
  45. holmes/plugins/toolsets/coralogix/toolset_coralogix_logs.py +7 -1
  46. holmes/plugins/toolsets/datadog/datadog_api.py +20 -8
  47. holmes/plugins/toolsets/datadog/datadog_metrics_instructions.jinja2 +8 -1
  48. holmes/plugins/toolsets/datadog/datadog_rds_instructions.jinja2 +82 -0
  49. holmes/plugins/toolsets/datadog/toolset_datadog_logs.py +12 -5
  50. holmes/plugins/toolsets/datadog/toolset_datadog_metrics.py +20 -11
  51. holmes/plugins/toolsets/datadog/toolset_datadog_rds.py +735 -0
  52. holmes/plugins/toolsets/datadog/toolset_datadog_traces.py +18 -11
  53. holmes/plugins/toolsets/git.py +15 -15
  54. holmes/plugins/toolsets/grafana/grafana_api.py +12 -1
  55. holmes/plugins/toolsets/grafana/toolset_grafana.py +5 -1
  56. holmes/plugins/toolsets/grafana/toolset_grafana_loki.py +9 -4
  57. holmes/plugins/toolsets/grafana/toolset_grafana_tempo.py +12 -5
  58. holmes/plugins/toolsets/internet/internet.py +2 -1
  59. holmes/plugins/toolsets/internet/notion.py +2 -1
  60. holmes/plugins/toolsets/investigator/__init__.py +0 -0
  61. holmes/plugins/toolsets/investigator/core_investigation.py +157 -0
  62. holmes/plugins/toolsets/investigator/investigator_instructions.jinja2 +253 -0
  63. holmes/plugins/toolsets/investigator/model.py +15 -0
  64. holmes/plugins/toolsets/kafka.py +14 -7
  65. holmes/plugins/toolsets/kubernetes.yaml +7 -7
  66. holmes/plugins/toolsets/kubernetes_logs.py +454 -25
  67. holmes/plugins/toolsets/logging_utils/logging_api.py +115 -55
  68. holmes/plugins/toolsets/mcp/toolset_mcp.py +1 -1
  69. holmes/plugins/toolsets/newrelic.py +8 -3
  70. holmes/plugins/toolsets/opensearch/opensearch.py +8 -4
  71. holmes/plugins/toolsets/opensearch/opensearch_logs.py +9 -2
  72. holmes/plugins/toolsets/opensearch/opensearch_traces.py +6 -2
  73. holmes/plugins/toolsets/prometheus/prometheus.py +149 -44
  74. holmes/plugins/toolsets/rabbitmq/toolset_rabbitmq.py +8 -2
  75. holmes/plugins/toolsets/robusta/robusta.py +4 -4
  76. holmes/plugins/toolsets/runbook/runbook_fetcher.py +6 -5
  77. holmes/plugins/toolsets/servicenow/servicenow.py +18 -3
  78. holmes/plugins/toolsets/utils.py +8 -1
  79. holmes/utils/llms.py +20 -0
  80. holmes/utils/stream.py +90 -0
  81. {holmesgpt-0.12.4.dist-info → holmesgpt-0.13.0.dist-info}/METADATA +48 -35
  82. {holmesgpt-0.12.4.dist-info → holmesgpt-0.13.0.dist-info}/RECORD +85 -75
  83. holmes/utils/robusta.py +0 -9
  84. {holmesgpt-0.12.4.dist-info → holmesgpt-0.13.0.dist-info}/LICENSE.txt +0 -0
  85. {holmesgpt-0.12.4.dist-info → holmesgpt-0.13.0.dist-info}/WHEEL +0 -0
  86. {holmesgpt-0.12.4.dist-info → holmesgpt-0.13.0.dist-info}/entry_points.txt +0 -0
@@ -3,12 +3,13 @@ import logging
3
3
  import os
4
4
  import re
5
5
  import time
6
- from typing import Any, Dict, List, Optional, Tuple, Union
6
+ from typing import Any, Dict, List, Optional, Tuple, Type, Union
7
7
  from urllib.parse import urljoin
8
8
 
9
9
  import requests # type: ignore
10
10
  from pydantic import BaseModel, field_validator, Field, model_validator
11
11
  from requests import RequestException
12
+ from requests_aws4auth import AWS4Auth
12
13
 
13
14
  from holmes.core.tools import (
14
15
  CallablePrerequisite,
@@ -25,14 +26,17 @@ from holmes.plugins.toolsets.utils import (
25
26
  get_param_or_raise,
26
27
  process_timestamps_to_rfc3339,
27
28
  standard_start_datetime_tool_param_description,
29
+ toolset_name_for_one_liner,
28
30
  )
29
31
  from holmes.utils.cache import TTLCache
30
32
  from holmes.common.env_vars import IS_OPENSHIFT
31
33
  from holmes.common.openshift import load_openshift_token
34
+ from holmes.plugins.toolsets.logging_utils.logging_api import (
35
+ DEFAULT_TIME_SPAN_SECONDS,
36
+ )
32
37
  from holmes.utils.keygen_utils import generate_random_key
33
38
 
34
39
  PROMETHEUS_RULES_CACHE_KEY = "cached_prometheus_rules"
35
- DEFAULT_TIME_SPAN_SECONDS = 3600
36
40
 
37
41
 
38
42
  class PrometheusConfig(BaseModel):
@@ -49,6 +53,7 @@ class PrometheusConfig(BaseModel):
49
53
  headers: Dict = Field(default_factory=dict)
50
54
  rules_cache_duration_seconds: Union[int, None] = 1800 # 30 minutes
51
55
  additional_labels: Optional[Dict[str, str]] = None
56
+ prometheus_ssl_enabled: bool = True
52
57
 
53
58
  @field_validator("prometheus_url")
54
59
  def ensure_trailing_slash(cls, v: Optional[str]) -> Optional[str]:
@@ -73,6 +78,32 @@ class PrometheusConfig(BaseModel):
73
78
 
74
79
  return self
75
80
 
81
+ def is_amp(self) -> bool:
82
+ return False
83
+
84
+ def get_auth(self) -> Any:
85
+ return None
86
+
87
+
88
+ class AMPConfig(PrometheusConfig):
89
+ aws_access_key: str
90
+ aws_secret_access_key: str
91
+ aws_region: str
92
+ aws_service_name: str = "aps"
93
+ healthcheck: str = "api/v1/query?query=up" # Override for AMP
94
+ prometheus_ssl_enabled: bool = False
95
+
96
+ def is_amp(self) -> bool:
97
+ return True
98
+
99
+ def get_auth(self):
100
+ return AWS4Auth(
101
+ self.aws_access_key, # type: ignore
102
+ self.aws_secret_access_key, # type: ignore
103
+ self.aws_region, # type: ignore
104
+ self.aws_service_name, # type: ignore
105
+ )
106
+
76
107
 
77
108
  class BasePrometheusTool(Tool):
78
109
  toolset: "PrometheusToolset"
@@ -99,10 +130,15 @@ def filter_metrics_by_name(metrics: Dict, pattern: str) -> Dict:
99
130
  METRICS_SUFFIXES_TO_STRIP = ["_bucket", "_count", "_sum"]
100
131
 
101
132
 
102
- def fetch_metadata(prometheus_url: str, headers: Optional[Dict]) -> Dict:
133
+ def fetch_metadata(
134
+ prometheus_url: str,
135
+ headers: Optional[Dict],
136
+ auth=None,
137
+ verify_ssl: bool = True,
138
+ ) -> Dict:
103
139
  metadata_url = urljoin(prometheus_url, "api/v1/metadata")
104
140
  metadata_response = requests.get(
105
- metadata_url, headers=headers, timeout=60, verify=True
141
+ metadata_url, headers=headers, timeout=60, verify=verify_ssl, auth=auth
106
142
  )
107
143
 
108
144
  metadata_response.raise_for_status()
@@ -124,13 +160,17 @@ def fetch_metadata(prometheus_url: str, headers: Optional[Dict]) -> Dict:
124
160
 
125
161
 
126
162
  def fetch_metadata_with_series_api(
127
- prometheus_url: str, metric_name: str, headers: Dict
163
+ prometheus_url: str,
164
+ metric_name: str,
165
+ headers: Dict,
166
+ auth=None,
167
+ verify_ssl: bool = True,
128
168
  ) -> Dict:
129
169
  url = urljoin(prometheus_url, "api/v1/series")
130
170
  params: Dict = {"match[]": f'{{__name__=~".*{metric_name}.*"}}', "limit": "10000"}
131
171
 
132
172
  response = requests.get(
133
- url, headers=headers, timeout=60, params=params, verify=True
173
+ url, headers=headers, timeout=60, params=params, auth=auth, verify=verify_ssl
134
174
  )
135
175
  response.raise_for_status()
136
176
  metrics = response.json()["data"]
@@ -172,6 +212,8 @@ def fetch_metrics_labels_with_series_api(
172
212
  cache: Optional[TTLCache],
173
213
  metrics_labels_time_window_hrs: Union[int, None],
174
214
  metric_name: str,
215
+ auth=None,
216
+ verify_ssl: bool = True,
175
217
  ) -> dict:
176
218
  """This is a slow query. Takes 5+ seconds to run"""
177
219
  cache_key = f"metrics_labels_series_api:{metric_name}"
@@ -188,7 +230,12 @@ def fetch_metrics_labels_with_series_api(
188
230
  params["start"] = params["end"] - (metrics_labels_time_window_hrs * 60 * 60)
189
231
 
190
232
  series_response = requests.get(
191
- url=series_url, headers=headers, params=params, timeout=60, verify=True
233
+ url=series_url,
234
+ headers=headers,
235
+ params=params,
236
+ auth=auth,
237
+ timeout=60,
238
+ verify=verify_ssl,
192
239
  )
193
240
  series_response.raise_for_status()
194
241
  series = series_response.json()["data"]
@@ -214,6 +261,8 @@ def fetch_metrics_labels_with_labels_api(
214
261
  metrics_labels_time_window_hrs: Union[int, None],
215
262
  metric_names: List[str],
216
263
  headers: Dict,
264
+ auth=None,
265
+ verify_ssl: bool = True,
217
266
  ) -> dict:
218
267
  metrics_labels = {}
219
268
 
@@ -233,7 +282,12 @@ def fetch_metrics_labels_with_labels_api(
233
282
  params["start"] = params["end"] - (metrics_labels_time_window_hrs * 60 * 60)
234
283
 
235
284
  response = requests.get(
236
- url=url, headers=headers, params=params, timeout=60, verify=True
285
+ url=url,
286
+ headers=headers,
287
+ params=params,
288
+ auth=auth,
289
+ timeout=60,
290
+ verify=verify_ssl,
237
291
  )
238
292
  response.raise_for_status()
239
293
  labels = response.json()["data"]
@@ -254,16 +308,27 @@ def fetch_metrics(
254
308
  should_fetch_labels_with_labels_api: bool,
255
309
  should_fetch_metadata_with_series_api: bool,
256
310
  headers: Dict,
311
+ auth=None,
312
+ verify_ssl: bool = True,
257
313
  ) -> dict:
258
314
  metrics = None
259
315
  should_fetch_labels = True
260
316
  if should_fetch_metadata_with_series_api:
261
317
  metrics = fetch_metadata_with_series_api(
262
- prometheus_url=prometheus_url, metric_name=metric_name, headers=headers
318
+ prometheus_url=prometheus_url,
319
+ metric_name=metric_name,
320
+ headers=headers,
321
+ auth=auth,
322
+ verify_ssl=verify_ssl,
263
323
  )
264
324
  should_fetch_labels = False # series API returns the labels
265
325
  else:
266
- metrics = fetch_metadata(prometheus_url=prometheus_url, headers=headers)
326
+ metrics = fetch_metadata(
327
+ prometheus_url=prometheus_url,
328
+ headers=headers,
329
+ auth=auth,
330
+ verify_ssl=verify_ssl,
331
+ )
267
332
  metrics = filter_metrics_by_name(metrics, metric_name)
268
333
 
269
334
  if should_fetch_labels:
@@ -275,6 +340,8 @@ def fetch_metrics(
275
340
  metrics_labels_time_window_hrs=metrics_labels_time_window_hrs,
276
341
  metric_names=list(metrics.keys()),
277
342
  headers=headers,
343
+ auth=auth,
344
+ verify_ssl=verify_ssl,
278
345
  )
279
346
  else:
280
347
  metrics_labels = fetch_metrics_labels_with_series_api(
@@ -283,6 +350,8 @@ def fetch_metrics(
283
350
  metrics_labels_time_window_hrs=metrics_labels_time_window_hrs,
284
351
  metric_name=metric_name,
285
352
  headers=headers,
353
+ auth=auth,
354
+ verify_ssl=verify_ssl,
286
355
  )
287
356
 
288
357
  for metric_name in metrics:
@@ -309,6 +378,12 @@ class ListPrometheusRules(BasePrometheusTool):
309
378
  error="Prometheus is not configured. Prometheus URL is missing",
310
379
  params=params,
311
380
  )
381
+ if self.toolset.config.is_amp():
382
+ return StructuredToolResult(
383
+ status=ToolResultStatus.ERROR,
384
+ error="Tool not supported in AMP",
385
+ params=params,
386
+ )
312
387
  if not self._cache and self.toolset.config.rules_cache_duration_seconds:
313
388
  self._cache = TTLCache(self.toolset.config.rules_cache_duration_seconds) # type: ignore
314
389
  try:
@@ -330,8 +405,9 @@ class ListPrometheusRules(BasePrometheusTool):
330
405
  rules_response = requests.get(
331
406
  url=rules_url,
332
407
  params=params,
408
+ auth=self.toolset.config.get_auth(),
333
409
  timeout=180,
334
- verify=True,
410
+ verify=self.toolset.config.prometheus_ssl_enabled,
335
411
  headers=self.toolset.config.headers,
336
412
  )
337
413
  rules_response.raise_for_status()
@@ -367,7 +443,7 @@ class ListPrometheusRules(BasePrometheusTool):
367
443
  )
368
444
 
369
445
  def get_parameterized_one_liner(self, params) -> str:
370
- return "list available prometheus rules"
446
+ return f"{toolset_name_for_one_liner(self.toolset.name)}: Fetch Rules"
371
447
 
372
448
 
373
449
  class ListAvailableMetrics(BasePrometheusTool):
@@ -424,6 +500,8 @@ class ListAvailableMetrics(BasePrometheusTool):
424
500
  should_fetch_labels_with_labels_api=self.toolset.config.fetch_labels_with_labels_api,
425
501
  should_fetch_metadata_with_series_api=self.toolset.config.fetch_metadata_with_series_api,
426
502
  headers=self.toolset.config.headers,
503
+ auth=self.toolset.config.get_auth(),
504
+ verify_ssl=self.toolset.config.prometheus_ssl_enabled,
427
505
  )
428
506
 
429
507
  if params.get("type_filter"):
@@ -470,7 +548,8 @@ class ListAvailableMetrics(BasePrometheusTool):
470
548
  )
471
549
 
472
550
  def get_parameterized_one_liner(self, params) -> str:
473
- return f'Search Available Prometheus Metrics: name_filter="{params.get("name_filter", "<no filter>")}", type_filter="{params.get("type_filter", "<no filter>")}"'
551
+ name_filter = params.get("name_filter", "")
552
+ return f"{toolset_name_for_one_liner(self.toolset.name)}: Search Metrics ({name_filter})"
474
553
 
475
554
 
476
555
  class ExecuteInstantQuery(BasePrometheusTool):
@@ -509,7 +588,11 @@ class ExecuteInstantQuery(BasePrometheusTool):
509
588
  payload = {"query": query}
510
589
 
511
590
  response = requests.post(
512
- url=url, headers=self.toolset.config.headers, data=payload, timeout=60
591
+ url=url,
592
+ headers=self.toolset.config.headers,
593
+ auth=self.toolset.config.get_auth(),
594
+ data=payload,
595
+ timeout=60,
513
596
  )
514
597
 
515
598
  if response.status_code == 200:
@@ -579,9 +662,8 @@ class ExecuteInstantQuery(BasePrometheusTool):
579
662
  )
580
663
 
581
664
  def get_parameterized_one_liner(self, params) -> str:
582
- query = params.get("query")
583
- description = params.get("description")
584
- return f"Execute Prometheus Query (instant): promql='{query}', description='{description}'"
665
+ description = params.get("description", "")
666
+ return f"{toolset_name_for_one_liner(self.toolset.name)}: Query ({description})"
585
667
 
586
668
 
587
669
  class ExecuteRangeQuery(BasePrometheusTool):
@@ -654,7 +736,11 @@ class ExecuteRangeQuery(BasePrometheusTool):
654
736
  }
655
737
 
656
738
  response = requests.post(
657
- url=url, headers=self.toolset.config.headers, data=payload, timeout=120
739
+ url=url,
740
+ headers=self.toolset.config.headers,
741
+ auth=self.toolset.config.get_auth(),
742
+ data=payload,
743
+ timeout=120,
658
744
  )
659
745
 
660
746
  if response.status_code == 200:
@@ -726,15 +812,13 @@ class ExecuteRangeQuery(BasePrometheusTool):
726
812
  )
727
813
 
728
814
  def get_parameterized_one_liner(self, params) -> str:
729
- query = params.get("query")
730
- start = params.get("start")
731
- end = params.get("end")
732
- step = params.get("step")
733
- description = params.get("description")
734
- return f"Execute Prometheus Query (range): promql='{query}', start={start}, end={end}, step={step}, description='{description}'"
815
+ description = params.get("description", "")
816
+ return f"{toolset_name_for_one_liner(self.toolset.name)}: Query ({description})"
735
817
 
736
818
 
737
819
  class PrometheusToolset(Toolset):
820
+ config: Optional[Union[PrometheusConfig, AMPConfig]] = None
821
+
738
822
  def __init__(self):
739
823
  super().__init__(
740
824
  name="prometheus/metrics",
@@ -760,28 +844,45 @@ class PrometheusToolset(Toolset):
760
844
  )
761
845
  self._load_llm_instructions(jinja_template=f"file://{template_file_path}")
762
846
 
763
- def prerequisites_callable(self, config: dict[str, Any]) -> Tuple[bool, str]:
764
- if config:
765
- self.config = PrometheusConfig(**config)
766
- self._reload_llm_instructions()
767
- return self._is_healthy()
847
+ def determine_prometheus_class(
848
+ self, config: dict[str, Any]
849
+ ) -> Type[Union[PrometheusConfig, AMPConfig]]:
850
+ has_aws_credentials = (
851
+ "aws_access_key" in config or "aws_secret_access_key" in config
852
+ )
853
+ return AMPConfig if has_aws_credentials else PrometheusConfig
768
854
 
769
- prometheus_url = os.environ.get("PROMETHEUS_URL")
770
- if not prometheus_url:
771
- prometheus_url = self.auto_detect_prometheus_url()
855
+ def prerequisites_callable(self, config: dict[str, Any]) -> Tuple[bool, str]:
856
+ try:
857
+ if config:
858
+ config_cls = self.determine_prometheus_class(config)
859
+ self.config = config_cls(**config) # type: ignore
860
+
861
+ self._reload_llm_instructions()
862
+ return self._is_healthy()
863
+ except Exception:
864
+ logging.exception("Failed to create prometheus config")
865
+ return False, "Failed to create prometheus config"
866
+ try:
867
+ prometheus_url = os.environ.get("PROMETHEUS_URL")
772
868
  if not prometheus_url:
773
- return (
774
- False,
775
- "Unable to auto-detect prometheus. Define prometheus_url in the configuration for tool prometheus/metrics",
776
- )
869
+ prometheus_url = self.auto_detect_prometheus_url()
870
+ if not prometheus_url:
871
+ return (
872
+ False,
873
+ "Unable to auto-detect prometheus. Define prometheus_url in the configuration for tool prometheus/metrics",
874
+ )
777
875
 
778
- self.config = PrometheusConfig(
779
- prometheus_url=prometheus_url,
780
- headers=add_prometheus_auth(os.environ.get("PROMETHEUS_AUTH_HEADER")),
781
- )
782
- logging.info(f"Prometheus auto discovered at url {prometheus_url}")
783
- self._reload_llm_instructions()
784
- return self._is_healthy()
876
+ self.config = PrometheusConfig(
877
+ prometheus_url=prometheus_url,
878
+ headers=add_prometheus_auth(os.environ.get("PROMETHEUS_AUTH_HEADER")),
879
+ )
880
+ logging.info(f"Prometheus auto discovered at url {prometheus_url}")
881
+ self._reload_llm_instructions()
882
+ return self._is_healthy()
883
+ except Exception as e:
884
+ logging.exception("Failed to set up prometheus")
885
+ return False, str(e)
785
886
 
786
887
  def auto_detect_prometheus_url(self) -> Optional[str]:
787
888
  url: Optional[str] = PrometheusDiscovery.find_prometheus_url()
@@ -804,7 +905,11 @@ class PrometheusToolset(Toolset):
804
905
  url = urljoin(self.config.prometheus_url, self.config.healthcheck)
805
906
  try:
806
907
  response = requests.get(
807
- url=url, headers=self.config.headers, timeout=10, verify=True
908
+ url=url,
909
+ headers=self.config.headers,
910
+ auth=self.config.get_auth(),
911
+ timeout=10,
912
+ verify=self.config.prometheus_ssl_enabled,
808
913
  )
809
914
 
810
915
  if response.status_code == 200:
@@ -21,6 +21,7 @@ from holmes.plugins.toolsets.rabbitmq.api import (
21
21
  get_cluster_status,
22
22
  make_request,
23
23
  )
24
+ from holmes.plugins.toolsets.utils import toolset_name_for_one_liner
24
25
 
25
26
 
26
27
  class RabbitMQConfig(BaseModel):
@@ -80,7 +81,9 @@ class ListConfiguredClusters(BaseRabbitMQTool):
80
81
  )
81
82
 
82
83
  def get_parameterized_one_liner(self, params) -> str:
83
- return "list configured RabbitMQ clusters"
84
+ return (
85
+ f"{toolset_name_for_one_liner(self.toolset.name)}: List RabbitMQ Clusters"
86
+ )
84
87
 
85
88
 
86
89
  class GetRabbitMQClusterStatus(BaseRabbitMQTool):
@@ -116,7 +119,10 @@ class GetRabbitMQClusterStatus(BaseRabbitMQTool):
116
119
  )
117
120
 
118
121
  def get_parameterized_one_liner(self, params) -> str:
119
- return "get RabbitMQ cluster status and partition information"
122
+ cluster_id = params.get("cluster_id", "")
123
+ if cluster_id:
124
+ return f"{toolset_name_for_one_liner(self.toolset.name)}: Get Cluster Status ({cluster_id})"
125
+ return f"{toolset_name_for_one_liner(self.toolset.name)}: Get Cluster Status"
120
126
 
121
127
 
122
128
  class RabbitMQToolset(Toolset):
@@ -74,7 +74,7 @@ class FetchRobustaFinding(Tool):
74
74
  )
75
75
 
76
76
  def get_parameterized_one_liner(self, params: Dict) -> str:
77
- return "Fetch Alert Metadata"
77
+ return "Robusta: Fetch Alert Metadata"
78
78
 
79
79
 
80
80
  class FetchResourceRecommendation(Tool):
@@ -138,7 +138,7 @@ class FetchResourceRecommendation(Tool):
138
138
  )
139
139
 
140
140
  def get_parameterized_one_liner(self, params: Dict) -> str:
141
- return f"Check Historical Resource Utilization: ({str(params)})"
141
+ return f"Robusta: Check Historical Resource Utilization: ({str(params)})"
142
142
 
143
143
 
144
144
  class FetchConfigurationChanges(Tool):
@@ -196,14 +196,14 @@ class FetchConfigurationChanges(Tool):
196
196
  )
197
197
 
198
198
  def get_parameterized_one_liner(self, params: Dict) -> str:
199
- return f"Search Change History: ({str(params)})"
199
+ return "Robusta: Search Change History"
200
200
 
201
201
 
202
202
  class RobustaToolset(Toolset):
203
203
  def __init__(self, dal: Optional[SupabaseDal]):
204
204
  dal_prereq = StaticPrerequisite(
205
205
  enabled=True if dal else False,
206
- disabled_reason="The data access layer is not available",
206
+ disabled_reason="Integration with Robusta cloud is disabled",
207
207
  )
208
208
  if dal:
209
209
  dal_prereq = StaticPrerequisite(
@@ -12,6 +12,7 @@ from holmes.core.tools import (
12
12
  )
13
13
 
14
14
  from holmes.plugins.runbooks import get_runbook_by_path, DEFAULT_RUNBOOK_SEARCH_PATH
15
+ from holmes.plugins.toolsets.utils import toolset_name_for_one_liner
15
16
 
16
17
 
17
18
  # TODO(mainred): currently we support fetch runbooks hosted internally, in the future we may want to support fetching
@@ -82,10 +83,10 @@ class RunbookFetcher(Tool):
82
83
  4. ❌ *Could not analyze process mailbox sizes* - Observer tool not enabled in container. Enable remote shell or observer_cli for process introspection.
83
84
  5. ✅ *Check pod memory limits* - container limit 4Gi, requests 2Gi
84
85
  6. ✅ *Verify BEAM startup arguments* - `+S 4:4 +P 1048576`, no memory instrumentation flags enabled
85
- 7. ❌ *Could not retrieve APM traces* - Datadog traces toolset is disabled. You can enable it by following https://robusta-dev.github.io/holmesgpt/data-sources/builtin-toolsets/datadog/
86
- 8. ❌ *Could not query Erlang metrics* - Prometheus integration is not connected. Enable it via https://robusta-dev.github.io/holmesgpt/data-sources/builtin-toolsets/prometheus/
86
+ 7. ❌ *Could not retrieve APM traces* - Datadog traces toolset is disabled. You can enable it by following https://holmesgpt.dev/data-sources/builtin-toolsets/datadog/
87
+ 8. ❌ *Could not query Erlang metrics* - Prometheus integration is not connected. Enable it via https://holmesgpt.dev/data-sources/builtin-toolsets/prometheus/
87
88
  9. ✅ *Examine recent deployments* - app version 2.1.3 deployed 4 hours ago, coincides with memory spike
88
- 10. ❌ *Could not check Stripe API status* - No toolset for Stripe integration exists. To monitor Stripe or similar third-party APIs, add a [custom toolset](https://robusta-dev.github.io/holmesgpt/data-sources/custom-toolsets/) or use a [remote MCP server](https://robusta-dev.github.io/holmesgpt/data-sources/remote-mcp-servers/)
89
+ 10. ❌ *Could not check Stripe API status* - No toolset for Stripe integration exists. To monitor Stripe or similar third-party APIs, add a [custom toolset](https://holmesgpt.dev/data-sources/custom-toolsets/) or use a [remote MCP server](https://holmesgpt.dev/data-sources/remote-mcp-servers/)
89
90
 
90
91
  **Root cause:** Memory leak in `gen_server` logic introduced in v2.1.3. BEAM VM hitting memory limit, causing out-of-memory crashes.
91
92
 
@@ -107,8 +108,8 @@ class RunbookFetcher(Tool):
107
108
  )
108
109
 
109
110
  def get_parameterized_one_liner(self, params) -> str:
110
- path: str = params["link"]
111
- return f"fetched runbook {path}"
111
+ path: str = params.get("link", "")
112
+ return f"{toolset_name_for_one_liner(self.toolset.name)}: Fetch Runbook {path}"
112
113
 
113
114
 
114
115
  class RunbookToolset(Toolset):
@@ -15,9 +15,11 @@ from holmes.core.tools import StructuredToolResult, ToolResultStatus
15
15
  from holmes.plugins.toolsets.utils import (
16
16
  process_timestamps_to_rfc3339,
17
17
  standard_start_datetime_tool_param_description,
18
+ toolset_name_for_one_liner,
19
+ )
20
+ from holmes.plugins.toolsets.logging_utils.logging_api import (
21
+ DEFAULT_TIME_SPAN_SECONDS,
18
22
  )
19
-
20
- DEFAULT_TIME_SPAN_SECONDS = 3600
21
23
 
22
24
 
23
25
  class ServiceNowConfig(BaseModel):
@@ -92,7 +94,8 @@ class ServiceNowBaseTool(Tool):
92
94
  )
93
95
 
94
96
  def get_parameterized_one_liner(self, params) -> str:
95
- return f"ServiceNow {self.name} {params}"
97
+ # Default implementation - will be overridden by subclasses
98
+ return f"{toolset_name_for_one_liner(self.toolset.name)}: ServiceNow {self.name} {params}"
96
99
 
97
100
 
98
101
  class ReturnChangesInTimerange(ServiceNowBaseTool):
@@ -108,6 +111,10 @@ class ReturnChangesInTimerange(ServiceNowBaseTool):
108
111
  )
109
112
  }
110
113
 
114
+ def get_parameterized_one_liner(self, params) -> str:
115
+ start = params.get("start", "last hour")
116
+ return f"{toolset_name_for_one_liner(self.toolset.name)}: Get Change Requests ({start})"
117
+
111
118
  def _invoke(self, params: Any) -> StructuredToolResult:
112
119
  parsed_params = {}
113
120
  try:
@@ -147,6 +154,10 @@ class ReturnChange(ServiceNowBaseTool):
147
154
  )
148
155
  }
149
156
 
157
+ def get_parameterized_one_liner(self, params) -> str:
158
+ sys_id = params.get("sys_id", "")
159
+ return f"{toolset_name_for_one_liner(self.toolset.name)}: Get Change Details ({sys_id})"
160
+
150
161
  def _invoke(self, params: Any) -> StructuredToolResult:
151
162
  try:
152
163
  url = "https://{instance}.service-now.com/api/now/v2/table/change_request/{sys_id}".format(
@@ -175,6 +186,10 @@ class ReturnChangesWithKeyword(ServiceNowBaseTool):
175
186
  )
176
187
  }
177
188
 
189
+ def get_parameterized_one_liner(self, params) -> str:
190
+ keyword = params.get("keyword", "")
191
+ return f"{toolset_name_for_one_liner(self.toolset.name)}: Search Changes ({keyword})"
192
+
178
193
  def _invoke(self, params: Any) -> StructuredToolResult:
179
194
  parsed_params = {}
180
195
  try:
@@ -2,7 +2,7 @@ import datetime
2
2
  import time
3
3
  from typing import Dict, Optional, Tuple, Union
4
4
 
5
- from dateutil import parser # type: ignore
5
+ from dateutil import parser
6
6
 
7
7
 
8
8
  def standard_start_datetime_tool_param_description(time_span_seconds: int):
@@ -139,3 +139,10 @@ def get_param_or_raise(dict: Dict, param: str) -> str:
139
139
  if not value:
140
140
  raise Exception(f'Missing param "{param}"')
141
141
  return value
142
+
143
+
144
+ def toolset_name_for_one_liner(toolset_name: str) -> str:
145
+ name = toolset_name
146
+ if "/" in toolset_name:
147
+ name = toolset_name.split("/")[0]
148
+ return name.capitalize()
holmes/utils/llms.py ADDED
@@ -0,0 +1,20 @@
1
+ import fnmatch
2
+ from typing import List
3
+
4
+
5
+ def model_matches_list(model: str, model_list: List[str]) -> bool:
6
+ """
7
+ Check if a model matches any pattern in a list of model patterns.
8
+
9
+ Args:
10
+ model: The name of an LLM model (e.g., "azure/gpt", "openai/gpt-4o")
11
+ model_list: List of model patterns that may include wildcards
12
+ (e.g., ["azure/*", "*/mistral", "openai/gpt-*"])
13
+
14
+ Returns:
15
+ True if the model matches any pattern in the list, False otherwise
16
+ """
17
+ for pattern in model_list:
18
+ if fnmatch.fnmatchcase(model, pattern):
19
+ return True
20
+ return False
holmes/utils/stream.py ADDED
@@ -0,0 +1,90 @@
1
+ import json
2
+ from enum import Enum
3
+ from typing import Generator, Optional, List
4
+ import litellm
5
+ from pydantic import BaseModel, Field
6
+ from holmes.core.investigation_structured_output import process_response_into_sections
7
+ from functools import partial
8
+
9
+
10
+ class StreamEvents(str, Enum):
11
+ ANSWER_END = "ai_answer_end"
12
+ START_TOOL = "start_tool_calling"
13
+ TOOL_RESULT = "tool_calling_result"
14
+ ERROR = "error"
15
+ AI_MESSAGE = "ai_message"
16
+
17
+
18
+ class StreamMessage(BaseModel):
19
+ event: StreamEvents
20
+ data: dict = Field(default={})
21
+
22
+
23
+ def create_sse_message(event_type: str, data: Optional[dict] = None):
24
+ if data is None:
25
+ data = {}
26
+ return f"event: {event_type}\ndata: {json.dumps(data)}\n\n"
27
+
28
+
29
+ def create_sse_error_message(description: str, error_code: int, msg: str):
30
+ return create_sse_message(
31
+ StreamEvents.ERROR.value,
32
+ {
33
+ "description": description,
34
+ "error_code": error_code,
35
+ "msg": msg,
36
+ "success": False,
37
+ },
38
+ )
39
+
40
+
41
+ create_rate_limit_error_message = partial(
42
+ create_sse_error_message,
43
+ error_code=5204,
44
+ msg="Rate limit exceeded",
45
+ )
46
+
47
+
48
+ def stream_investigate_formatter(
49
+ call_stream: Generator[StreamMessage, None, None], runbooks
50
+ ):
51
+ try:
52
+ for message in call_stream:
53
+ if message.event == StreamEvents.ANSWER_END:
54
+ (text_response, sections) = process_response_into_sections( # type: ignore
55
+ message.data.get("content")
56
+ )
57
+
58
+ yield create_sse_message(
59
+ StreamEvents.ANSWER_END.value,
60
+ {
61
+ "sections": sections or {},
62
+ "analysis": text_response,
63
+ "instructions": runbooks or [],
64
+ },
65
+ )
66
+ else:
67
+ yield create_sse_message(message.event.value, message.data)
68
+ except litellm.exceptions.RateLimitError as e:
69
+ yield create_rate_limit_error_message(str(e))
70
+
71
+
72
+ def stream_chat_formatter(
73
+ call_stream: Generator[StreamMessage, None, None],
74
+ followups: Optional[List[dict]] = None,
75
+ ):
76
+ try:
77
+ for message in call_stream:
78
+ if message.event == StreamEvents.ANSWER_END:
79
+ yield create_sse_message(
80
+ StreamEvents.ANSWER_END.value,
81
+ {
82
+ "analysis": message.data.get("content"),
83
+ "conversation_history": message.data.get("messages"),
84
+ "follow_up_actions": followups,
85
+ },
86
+ )
87
+ else:
88
+ yield create_sse_message(message.event.value, message.data)
89
+ except litellm.exceptions.RateLimitError as e:
90
+ yield create_rate_limit_error_message(str(e))