holmesgpt 0.12.6__py3-none-any.whl → 0.13.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of holmesgpt might be problematic. Click here for more details.

Files changed (125) hide show
  1. holmes/__init__.py +1 -1
  2. holmes/clients/robusta_client.py +19 -1
  3. holmes/common/env_vars.py +17 -0
  4. holmes/config.py +69 -9
  5. holmes/core/conversations.py +11 -0
  6. holmes/core/investigation.py +16 -3
  7. holmes/core/investigation_structured_output.py +12 -0
  8. holmes/core/llm.py +13 -1
  9. holmes/core/models.py +9 -1
  10. holmes/core/openai_formatting.py +72 -12
  11. holmes/core/prompt.py +13 -0
  12. holmes/core/supabase_dal.py +3 -0
  13. holmes/core/todo_manager.py +88 -0
  14. holmes/core/tool_calling_llm.py +230 -157
  15. holmes/core/tools.py +10 -1
  16. holmes/core/tools_utils/tool_executor.py +7 -2
  17. holmes/core/tools_utils/toolset_utils.py +7 -2
  18. holmes/core/toolset_manager.py +1 -5
  19. holmes/core/tracing.py +4 -3
  20. holmes/interactive.py +1 -0
  21. holmes/main.py +9 -2
  22. holmes/plugins/prompts/__init__.py +7 -1
  23. holmes/plugins/prompts/_current_date_time.jinja2 +1 -0
  24. holmes/plugins/prompts/_default_log_prompt.jinja2 +4 -2
  25. holmes/plugins/prompts/_fetch_logs.jinja2 +10 -1
  26. holmes/plugins/prompts/_general_instructions.jinja2 +14 -0
  27. holmes/plugins/prompts/_permission_errors.jinja2 +1 -1
  28. holmes/plugins/prompts/_toolsets_instructions.jinja2 +4 -4
  29. holmes/plugins/prompts/generic_ask.jinja2 +4 -3
  30. holmes/plugins/prompts/investigation_procedure.jinja2 +210 -0
  31. holmes/plugins/prompts/kubernetes_workload_ask.jinja2 +2 -0
  32. holmes/plugins/runbooks/CLAUDE.md +85 -0
  33. holmes/plugins/runbooks/README.md +24 -0
  34. holmes/plugins/toolsets/__init__.py +19 -6
  35. holmes/plugins/toolsets/atlas_mongodb/mongodb_atlas.py +27 -0
  36. holmes/plugins/toolsets/azure_sql/tools/analyze_connection_failures.py +2 -2
  37. holmes/plugins/toolsets/azure_sql/tools/analyze_database_connections.py +2 -1
  38. holmes/plugins/toolsets/azure_sql/tools/analyze_database_health_status.py +3 -1
  39. holmes/plugins/toolsets/azure_sql/tools/analyze_database_performance.py +2 -1
  40. holmes/plugins/toolsets/azure_sql/tools/analyze_database_storage.py +2 -1
  41. holmes/plugins/toolsets/azure_sql/tools/get_active_alerts.py +3 -1
  42. holmes/plugins/toolsets/azure_sql/tools/get_slow_queries.py +2 -1
  43. holmes/plugins/toolsets/azure_sql/tools/get_top_cpu_queries.py +2 -1
  44. holmes/plugins/toolsets/azure_sql/tools/get_top_data_io_queries.py +2 -1
  45. holmes/plugins/toolsets/azure_sql/tools/get_top_log_io_queries.py +2 -1
  46. holmes/plugins/toolsets/bash/argocd/__init__.py +65 -0
  47. holmes/plugins/toolsets/bash/argocd/constants.py +120 -0
  48. holmes/plugins/toolsets/bash/aws/__init__.py +66 -0
  49. holmes/plugins/toolsets/bash/aws/constants.py +529 -0
  50. holmes/plugins/toolsets/bash/azure/__init__.py +56 -0
  51. holmes/plugins/toolsets/bash/azure/constants.py +339 -0
  52. holmes/plugins/toolsets/bash/bash_instructions.jinja2 +6 -7
  53. holmes/plugins/toolsets/bash/bash_toolset.py +47 -13
  54. holmes/plugins/toolsets/bash/common/bash_command.py +131 -0
  55. holmes/plugins/toolsets/bash/common/stringify.py +14 -1
  56. holmes/plugins/toolsets/bash/common/validators.py +91 -0
  57. holmes/plugins/toolsets/bash/docker/__init__.py +59 -0
  58. holmes/plugins/toolsets/bash/docker/constants.py +255 -0
  59. holmes/plugins/toolsets/bash/helm/__init__.py +61 -0
  60. holmes/plugins/toolsets/bash/helm/constants.py +92 -0
  61. holmes/plugins/toolsets/bash/kubectl/__init__.py +80 -79
  62. holmes/plugins/toolsets/bash/kubectl/constants.py +0 -14
  63. holmes/plugins/toolsets/bash/kubectl/kubectl_describe.py +38 -56
  64. holmes/plugins/toolsets/bash/kubectl/kubectl_events.py +28 -76
  65. holmes/plugins/toolsets/bash/kubectl/kubectl_get.py +39 -99
  66. holmes/plugins/toolsets/bash/kubectl/kubectl_logs.py +34 -15
  67. holmes/plugins/toolsets/bash/kubectl/kubectl_run.py +1 -1
  68. holmes/plugins/toolsets/bash/kubectl/kubectl_top.py +38 -77
  69. holmes/plugins/toolsets/bash/parse_command.py +106 -32
  70. holmes/plugins/toolsets/bash/utilities/__init__.py +0 -0
  71. holmes/plugins/toolsets/bash/utilities/base64_util.py +12 -0
  72. holmes/plugins/toolsets/bash/utilities/cut.py +12 -0
  73. holmes/plugins/toolsets/bash/utilities/grep/__init__.py +10 -0
  74. holmes/plugins/toolsets/bash/utilities/head.py +12 -0
  75. holmes/plugins/toolsets/bash/utilities/jq.py +79 -0
  76. holmes/plugins/toolsets/bash/utilities/sed.py +164 -0
  77. holmes/plugins/toolsets/bash/utilities/sort.py +15 -0
  78. holmes/plugins/toolsets/bash/utilities/tail.py +12 -0
  79. holmes/plugins/toolsets/bash/utilities/tr.py +57 -0
  80. holmes/plugins/toolsets/bash/utilities/uniq.py +12 -0
  81. holmes/plugins/toolsets/bash/utilities/wc.py +12 -0
  82. holmes/plugins/toolsets/coralogix/api.py +6 -6
  83. holmes/plugins/toolsets/coralogix/toolset_coralogix_logs.py +7 -1
  84. holmes/plugins/toolsets/datadog/datadog_api.py +20 -8
  85. holmes/plugins/toolsets/datadog/datadog_metrics_instructions.jinja2 +8 -1
  86. holmes/plugins/toolsets/datadog/datadog_rds_instructions.jinja2 +82 -0
  87. holmes/plugins/toolsets/datadog/toolset_datadog_logs.py +12 -5
  88. holmes/plugins/toolsets/datadog/toolset_datadog_metrics.py +20 -11
  89. holmes/plugins/toolsets/datadog/toolset_datadog_rds.py +735 -0
  90. holmes/plugins/toolsets/datadog/toolset_datadog_traces.py +18 -11
  91. holmes/plugins/toolsets/git.py +15 -15
  92. holmes/plugins/toolsets/grafana/grafana_api.py +12 -1
  93. holmes/plugins/toolsets/grafana/toolset_grafana.py +5 -1
  94. holmes/plugins/toolsets/grafana/toolset_grafana_loki.py +9 -4
  95. holmes/plugins/toolsets/grafana/toolset_grafana_tempo.py +12 -5
  96. holmes/plugins/toolsets/internet/internet.py +2 -1
  97. holmes/plugins/toolsets/internet/notion.py +2 -1
  98. holmes/plugins/toolsets/investigator/__init__.py +0 -0
  99. holmes/plugins/toolsets/investigator/core_investigation.py +157 -0
  100. holmes/plugins/toolsets/investigator/investigator_instructions.jinja2 +253 -0
  101. holmes/plugins/toolsets/investigator/model.py +15 -0
  102. holmes/plugins/toolsets/kafka.py +14 -7
  103. holmes/plugins/toolsets/kubernetes_logs.py +454 -25
  104. holmes/plugins/toolsets/logging_utils/logging_api.py +115 -55
  105. holmes/plugins/toolsets/mcp/toolset_mcp.py +1 -1
  106. holmes/plugins/toolsets/newrelic.py +8 -3
  107. holmes/plugins/toolsets/opensearch/opensearch.py +8 -4
  108. holmes/plugins/toolsets/opensearch/opensearch_logs.py +9 -2
  109. holmes/plugins/toolsets/opensearch/opensearch_traces.py +6 -2
  110. holmes/plugins/toolsets/prometheus/prometheus.py +179 -44
  111. holmes/plugins/toolsets/rabbitmq/toolset_rabbitmq.py +8 -2
  112. holmes/plugins/toolsets/robusta/robusta.py +4 -4
  113. holmes/plugins/toolsets/runbook/runbook_fetcher.py +6 -5
  114. holmes/plugins/toolsets/servicenow/servicenow.py +18 -3
  115. holmes/plugins/toolsets/utils.py +8 -1
  116. holmes/utils/console/logging.py +6 -1
  117. holmes/utils/llms.py +20 -0
  118. holmes/utils/stream.py +90 -0
  119. {holmesgpt-0.12.6.dist-info → holmesgpt-0.13.1.dist-info}/METADATA +47 -34
  120. {holmesgpt-0.12.6.dist-info → holmesgpt-0.13.1.dist-info}/RECORD +123 -91
  121. holmes/plugins/toolsets/bash/grep/__init__.py +0 -52
  122. holmes/utils/robusta.py +0 -9
  123. {holmesgpt-0.12.6.dist-info → holmesgpt-0.13.1.dist-info}/LICENSE.txt +0 -0
  124. {holmesgpt-0.12.6.dist-info → holmesgpt-0.13.1.dist-info}/WHEEL +0 -0
  125. {holmesgpt-0.12.6.dist-info → holmesgpt-0.13.1.dist-info}/entry_points.txt +0 -0
@@ -1,14 +1,16 @@
1
1
  import json
2
2
  import logging
3
+ import boto3
3
4
  import os
4
5
  import re
5
6
  import time
6
- from typing import Any, Dict, List, Optional, Tuple, Union
7
+ from typing import Any, Dict, List, Optional, Tuple, Type, Union
7
8
  from urllib.parse import urljoin
8
9
 
9
10
  import requests # type: ignore
10
11
  from pydantic import BaseModel, field_validator, Field, model_validator
11
12
  from requests import RequestException
13
+ from requests_aws4auth import AWS4Auth
12
14
 
13
15
  from holmes.core.tools import (
14
16
  CallablePrerequisite,
@@ -25,14 +27,17 @@ from holmes.plugins.toolsets.utils import (
25
27
  get_param_or_raise,
26
28
  process_timestamps_to_rfc3339,
27
29
  standard_start_datetime_tool_param_description,
30
+ toolset_name_for_one_liner,
28
31
  )
29
32
  from holmes.utils.cache import TTLCache
30
33
  from holmes.common.env_vars import IS_OPENSHIFT
31
34
  from holmes.common.openshift import load_openshift_token
35
+ from holmes.plugins.toolsets.logging_utils.logging_api import (
36
+ DEFAULT_TIME_SPAN_SECONDS,
37
+ )
32
38
  from holmes.utils.keygen_utils import generate_random_key
33
39
 
34
40
  PROMETHEUS_RULES_CACHE_KEY = "cached_prometheus_rules"
35
- DEFAULT_TIME_SPAN_SECONDS = 3600
36
41
 
37
42
 
38
43
  class PrometheusConfig(BaseModel):
@@ -49,6 +54,7 @@ class PrometheusConfig(BaseModel):
49
54
  headers: Dict = Field(default_factory=dict)
50
55
  rules_cache_duration_seconds: Union[int, None] = 1800 # 30 minutes
51
56
  additional_labels: Optional[Dict[str, str]] = None
57
+ prometheus_ssl_enabled: bool = True
52
58
 
53
59
  @field_validator("prometheus_url")
54
60
  def ensure_trailing_slash(cls, v: Optional[str]) -> Optional[str]:
@@ -73,6 +79,63 @@ class PrometheusConfig(BaseModel):
73
79
 
74
80
  return self
75
81
 
82
+ def is_amp(self) -> bool:
83
+ return False
84
+
85
+ def get_auth(self) -> Any:
86
+ return None
87
+
88
+
89
+ class AMPConfig(PrometheusConfig):
90
+ aws_access_key: Optional[str] = None
91
+ aws_secret_access_key: Optional[str] = None
92
+ aws_region: str
93
+ aws_service_name: str = "aps"
94
+ healthcheck: str = "api/v1/query?query=up"
95
+ prometheus_ssl_enabled: bool = False
96
+
97
+ def is_amp(self) -> bool:
98
+ return True
99
+
100
+ def _build_irsa_auth(self) -> Optional[AWS4Auth]:
101
+ """Try IRSA (or default AWS provider chain)."""
102
+ session = boto3.Session()
103
+ creds = session.get_credentials()
104
+ if creds is None:
105
+ return None
106
+ frozen = creds.get_frozen_credentials()
107
+ return AWS4Auth(
108
+ frozen.access_key,
109
+ frozen.secret_key,
110
+ self.aws_region,
111
+ self.aws_service_name,
112
+ session_token=frozen.token,
113
+ )
114
+
115
+ def _build_static_aws_auth(self) -> Optional[AWS4Auth]:
116
+ """Fallback: static credentials from config."""
117
+ if self.aws_access_key and self.aws_secret_access_key:
118
+ return AWS4Auth(
119
+ self.aws_access_key,
120
+ self.aws_secret_access_key,
121
+ self.aws_region,
122
+ self.aws_service_name,
123
+ )
124
+ return None
125
+
126
+ def get_auth(self):
127
+ # Prefer IRSA, fallback to static
128
+ irsa_auth = self._build_irsa_auth()
129
+ if irsa_auth:
130
+ return irsa_auth
131
+ static_auth = self._build_static_aws_auth()
132
+ if static_auth:
133
+ return static_auth
134
+ raise RuntimeError(
135
+ "No AWS credentials available. Tried IRSA and static keys. "
136
+ "Ensure IRSA is configured on the service account or provide aws_access_key/aws_secret_access_key."
137
+ )
138
+
76
139
 
77
140
  class BasePrometheusTool(Tool):
78
141
  toolset: "PrometheusToolset"
@@ -99,10 +162,15 @@ def filter_metrics_by_name(metrics: Dict, pattern: str) -> Dict:
99
162
  METRICS_SUFFIXES_TO_STRIP = ["_bucket", "_count", "_sum"]
100
163
 
101
164
 
102
- def fetch_metadata(prometheus_url: str, headers: Optional[Dict]) -> Dict:
165
+ def fetch_metadata(
166
+ prometheus_url: str,
167
+ headers: Optional[Dict],
168
+ auth=None,
169
+ verify_ssl: bool = True,
170
+ ) -> Dict:
103
171
  metadata_url = urljoin(prometheus_url, "api/v1/metadata")
104
172
  metadata_response = requests.get(
105
- metadata_url, headers=headers, timeout=60, verify=True
173
+ metadata_url, headers=headers, timeout=60, verify=verify_ssl, auth=auth
106
174
  )
107
175
 
108
176
  metadata_response.raise_for_status()
@@ -124,13 +192,17 @@ def fetch_metadata(prometheus_url: str, headers: Optional[Dict]) -> Dict:
124
192
 
125
193
 
126
194
  def fetch_metadata_with_series_api(
127
- prometheus_url: str, metric_name: str, headers: Dict
195
+ prometheus_url: str,
196
+ metric_name: str,
197
+ headers: Dict,
198
+ auth=None,
199
+ verify_ssl: bool = True,
128
200
  ) -> Dict:
129
201
  url = urljoin(prometheus_url, "api/v1/series")
130
202
  params: Dict = {"match[]": f'{{__name__=~".*{metric_name}.*"}}', "limit": "10000"}
131
203
 
132
204
  response = requests.get(
133
- url, headers=headers, timeout=60, params=params, verify=True
205
+ url, headers=headers, timeout=60, params=params, auth=auth, verify=verify_ssl
134
206
  )
135
207
  response.raise_for_status()
136
208
  metrics = response.json()["data"]
@@ -172,6 +244,8 @@ def fetch_metrics_labels_with_series_api(
172
244
  cache: Optional[TTLCache],
173
245
  metrics_labels_time_window_hrs: Union[int, None],
174
246
  metric_name: str,
247
+ auth=None,
248
+ verify_ssl: bool = True,
175
249
  ) -> dict:
176
250
  """This is a slow query. Takes 5+ seconds to run"""
177
251
  cache_key = f"metrics_labels_series_api:{metric_name}"
@@ -188,7 +262,12 @@ def fetch_metrics_labels_with_series_api(
188
262
  params["start"] = params["end"] - (metrics_labels_time_window_hrs * 60 * 60)
189
263
 
190
264
  series_response = requests.get(
191
- url=series_url, headers=headers, params=params, timeout=60, verify=True
265
+ url=series_url,
266
+ headers=headers,
267
+ params=params,
268
+ auth=auth,
269
+ timeout=60,
270
+ verify=verify_ssl,
192
271
  )
193
272
  series_response.raise_for_status()
194
273
  series = series_response.json()["data"]
@@ -214,6 +293,8 @@ def fetch_metrics_labels_with_labels_api(
214
293
  metrics_labels_time_window_hrs: Union[int, None],
215
294
  metric_names: List[str],
216
295
  headers: Dict,
296
+ auth=None,
297
+ verify_ssl: bool = True,
217
298
  ) -> dict:
218
299
  metrics_labels = {}
219
300
 
@@ -233,7 +314,12 @@ def fetch_metrics_labels_with_labels_api(
233
314
  params["start"] = params["end"] - (metrics_labels_time_window_hrs * 60 * 60)
234
315
 
235
316
  response = requests.get(
236
- url=url, headers=headers, params=params, timeout=60, verify=True
317
+ url=url,
318
+ headers=headers,
319
+ params=params,
320
+ auth=auth,
321
+ timeout=60,
322
+ verify=verify_ssl,
237
323
  )
238
324
  response.raise_for_status()
239
325
  labels = response.json()["data"]
@@ -254,16 +340,27 @@ def fetch_metrics(
254
340
  should_fetch_labels_with_labels_api: bool,
255
341
  should_fetch_metadata_with_series_api: bool,
256
342
  headers: Dict,
343
+ auth=None,
344
+ verify_ssl: bool = True,
257
345
  ) -> dict:
258
346
  metrics = None
259
347
  should_fetch_labels = True
260
348
  if should_fetch_metadata_with_series_api:
261
349
  metrics = fetch_metadata_with_series_api(
262
- prometheus_url=prometheus_url, metric_name=metric_name, headers=headers
350
+ prometheus_url=prometheus_url,
351
+ metric_name=metric_name,
352
+ headers=headers,
353
+ auth=auth,
354
+ verify_ssl=verify_ssl,
263
355
  )
264
356
  should_fetch_labels = False # series API returns the labels
265
357
  else:
266
- metrics = fetch_metadata(prometheus_url=prometheus_url, headers=headers)
358
+ metrics = fetch_metadata(
359
+ prometheus_url=prometheus_url,
360
+ headers=headers,
361
+ auth=auth,
362
+ verify_ssl=verify_ssl,
363
+ )
267
364
  metrics = filter_metrics_by_name(metrics, metric_name)
268
365
 
269
366
  if should_fetch_labels:
@@ -275,6 +372,8 @@ def fetch_metrics(
275
372
  metrics_labels_time_window_hrs=metrics_labels_time_window_hrs,
276
373
  metric_names=list(metrics.keys()),
277
374
  headers=headers,
375
+ auth=auth,
376
+ verify_ssl=verify_ssl,
278
377
  )
279
378
  else:
280
379
  metrics_labels = fetch_metrics_labels_with_series_api(
@@ -283,6 +382,8 @@ def fetch_metrics(
283
382
  metrics_labels_time_window_hrs=metrics_labels_time_window_hrs,
284
383
  metric_name=metric_name,
285
384
  headers=headers,
385
+ auth=auth,
386
+ verify_ssl=verify_ssl,
286
387
  )
287
388
 
288
389
  for metric_name in metrics:
@@ -309,6 +410,12 @@ class ListPrometheusRules(BasePrometheusTool):
309
410
  error="Prometheus is not configured. Prometheus URL is missing",
310
411
  params=params,
311
412
  )
413
+ if self.toolset.config.is_amp():
414
+ return StructuredToolResult(
415
+ status=ToolResultStatus.ERROR,
416
+ error="Tool not supported in AMP",
417
+ params=params,
418
+ )
312
419
  if not self._cache and self.toolset.config.rules_cache_duration_seconds:
313
420
  self._cache = TTLCache(self.toolset.config.rules_cache_duration_seconds) # type: ignore
314
421
  try:
@@ -330,8 +437,9 @@ class ListPrometheusRules(BasePrometheusTool):
330
437
  rules_response = requests.get(
331
438
  url=rules_url,
332
439
  params=params,
440
+ auth=self.toolset.config.get_auth(),
333
441
  timeout=180,
334
- verify=True,
442
+ verify=self.toolset.config.prometheus_ssl_enabled,
335
443
  headers=self.toolset.config.headers,
336
444
  )
337
445
  rules_response.raise_for_status()
@@ -367,7 +475,7 @@ class ListPrometheusRules(BasePrometheusTool):
367
475
  )
368
476
 
369
477
  def get_parameterized_one_liner(self, params) -> str:
370
- return "list available prometheus rules"
478
+ return f"{toolset_name_for_one_liner(self.toolset.name)}: Fetch Rules"
371
479
 
372
480
 
373
481
  class ListAvailableMetrics(BasePrometheusTool):
@@ -424,6 +532,8 @@ class ListAvailableMetrics(BasePrometheusTool):
424
532
  should_fetch_labels_with_labels_api=self.toolset.config.fetch_labels_with_labels_api,
425
533
  should_fetch_metadata_with_series_api=self.toolset.config.fetch_metadata_with_series_api,
426
534
  headers=self.toolset.config.headers,
535
+ auth=self.toolset.config.get_auth(),
536
+ verify_ssl=self.toolset.config.prometheus_ssl_enabled,
427
537
  )
428
538
 
429
539
  if params.get("type_filter"):
@@ -470,7 +580,8 @@ class ListAvailableMetrics(BasePrometheusTool):
470
580
  )
471
581
 
472
582
  def get_parameterized_one_liner(self, params) -> str:
473
- return f'Search Available Prometheus Metrics: name_filter="{params.get("name_filter", "<no filter>")}", type_filter="{params.get("type_filter", "<no filter>")}"'
583
+ name_filter = params.get("name_filter", "")
584
+ return f"{toolset_name_for_one_liner(self.toolset.name)}: Search Metrics ({name_filter})"
474
585
 
475
586
 
476
587
  class ExecuteInstantQuery(BasePrometheusTool):
@@ -509,7 +620,11 @@ class ExecuteInstantQuery(BasePrometheusTool):
509
620
  payload = {"query": query}
510
621
 
511
622
  response = requests.post(
512
- url=url, headers=self.toolset.config.headers, data=payload, timeout=60
623
+ url=url,
624
+ headers=self.toolset.config.headers,
625
+ auth=self.toolset.config.get_auth(),
626
+ data=payload,
627
+ timeout=60,
513
628
  )
514
629
 
515
630
  if response.status_code == 200:
@@ -579,9 +694,8 @@ class ExecuteInstantQuery(BasePrometheusTool):
579
694
  )
580
695
 
581
696
  def get_parameterized_one_liner(self, params) -> str:
582
- query = params.get("query")
583
- description = params.get("description")
584
- return f"Execute Prometheus Query (instant): promql='{query}', description='{description}'"
697
+ description = params.get("description", "")
698
+ return f"{toolset_name_for_one_liner(self.toolset.name)}: Query ({description})"
585
699
 
586
700
 
587
701
  class ExecuteRangeQuery(BasePrometheusTool):
@@ -654,7 +768,11 @@ class ExecuteRangeQuery(BasePrometheusTool):
654
768
  }
655
769
 
656
770
  response = requests.post(
657
- url=url, headers=self.toolset.config.headers, data=payload, timeout=120
771
+ url=url,
772
+ headers=self.toolset.config.headers,
773
+ auth=self.toolset.config.get_auth(),
774
+ data=payload,
775
+ timeout=120,
658
776
  )
659
777
 
660
778
  if response.status_code == 200:
@@ -726,15 +844,13 @@ class ExecuteRangeQuery(BasePrometheusTool):
726
844
  )
727
845
 
728
846
  def get_parameterized_one_liner(self, params) -> str:
729
- query = params.get("query")
730
- start = params.get("start")
731
- end = params.get("end")
732
- step = params.get("step")
733
- description = params.get("description")
734
- return f"Execute Prometheus Query (range): promql='{query}', start={start}, end={end}, step={step}, description='{description}'"
847
+ description = params.get("description", "")
848
+ return f"{toolset_name_for_one_liner(self.toolset.name)}: Query ({description})"
735
849
 
736
850
 
737
851
  class PrometheusToolset(Toolset):
852
+ config: Optional[Union[PrometheusConfig, AMPConfig]] = None
853
+
738
854
  def __init__(self):
739
855
  super().__init__(
740
856
  name="prometheus/metrics",
@@ -760,28 +876,43 @@ class PrometheusToolset(Toolset):
760
876
  )
761
877
  self._load_llm_instructions(jinja_template=f"file://{template_file_path}")
762
878
 
763
- def prerequisites_callable(self, config: dict[str, Any]) -> Tuple[bool, str]:
764
- if config:
765
- self.config = PrometheusConfig(**config)
766
- self._reload_llm_instructions()
767
- return self._is_healthy()
879
+ def determine_prometheus_class(
880
+ self, config: dict[str, Any]
881
+ ) -> Type[Union[PrometheusConfig, AMPConfig]]:
882
+ has_aws_fields = "aws_region" in config
883
+ return AMPConfig if has_aws_fields else PrometheusConfig
768
884
 
769
- prometheus_url = os.environ.get("PROMETHEUS_URL")
770
- if not prometheus_url:
771
- prometheus_url = self.auto_detect_prometheus_url()
885
+ def prerequisites_callable(self, config: dict[str, Any]) -> Tuple[bool, str]:
886
+ try:
887
+ if config:
888
+ config_cls = self.determine_prometheus_class(config)
889
+ self.config = config_cls(**config) # type: ignore
890
+
891
+ self._reload_llm_instructions()
892
+ return self._is_healthy()
893
+ except Exception:
894
+ logging.exception("Failed to create prometheus config")
895
+ return False, "Failed to create prometheus config"
896
+ try:
897
+ prometheus_url = os.environ.get("PROMETHEUS_URL")
772
898
  if not prometheus_url:
773
- return (
774
- False,
775
- "Unable to auto-detect prometheus. Define prometheus_url in the configuration for tool prometheus/metrics",
776
- )
899
+ prometheus_url = self.auto_detect_prometheus_url()
900
+ if not prometheus_url:
901
+ return (
902
+ False,
903
+ "Unable to auto-detect prometheus. Define prometheus_url in the configuration for tool prometheus/metrics",
904
+ )
777
905
 
778
- self.config = PrometheusConfig(
779
- prometheus_url=prometheus_url,
780
- headers=add_prometheus_auth(os.environ.get("PROMETHEUS_AUTH_HEADER")),
781
- )
782
- logging.info(f"Prometheus auto discovered at url {prometheus_url}")
783
- self._reload_llm_instructions()
784
- return self._is_healthy()
906
+ self.config = PrometheusConfig(
907
+ prometheus_url=prometheus_url,
908
+ headers=add_prometheus_auth(os.environ.get("PROMETHEUS_AUTH_HEADER")),
909
+ )
910
+ logging.info(f"Prometheus auto discovered at url {prometheus_url}")
911
+ self._reload_llm_instructions()
912
+ return self._is_healthy()
913
+ except Exception as e:
914
+ logging.exception("Failed to set up prometheus")
915
+ return False, str(e)
785
916
 
786
917
  def auto_detect_prometheus_url(self) -> Optional[str]:
787
918
  url: Optional[str] = PrometheusDiscovery.find_prometheus_url()
@@ -804,7 +935,11 @@ class PrometheusToolset(Toolset):
804
935
  url = urljoin(self.config.prometheus_url, self.config.healthcheck)
805
936
  try:
806
937
  response = requests.get(
807
- url=url, headers=self.config.headers, timeout=10, verify=True
938
+ url=url,
939
+ headers=self.config.headers,
940
+ auth=self.config.get_auth(),
941
+ timeout=10,
942
+ verify=self.config.prometheus_ssl_enabled,
808
943
  )
809
944
 
810
945
  if response.status_code == 200:
@@ -21,6 +21,7 @@ from holmes.plugins.toolsets.rabbitmq.api import (
21
21
  get_cluster_status,
22
22
  make_request,
23
23
  )
24
+ from holmes.plugins.toolsets.utils import toolset_name_for_one_liner
24
25
 
25
26
 
26
27
  class RabbitMQConfig(BaseModel):
@@ -80,7 +81,9 @@ class ListConfiguredClusters(BaseRabbitMQTool):
80
81
  )
81
82
 
82
83
  def get_parameterized_one_liner(self, params) -> str:
83
- return "list configured RabbitMQ clusters"
84
+ return (
85
+ f"{toolset_name_for_one_liner(self.toolset.name)}: List RabbitMQ Clusters"
86
+ )
84
87
 
85
88
 
86
89
  class GetRabbitMQClusterStatus(BaseRabbitMQTool):
@@ -116,7 +119,10 @@ class GetRabbitMQClusterStatus(BaseRabbitMQTool):
116
119
  )
117
120
 
118
121
  def get_parameterized_one_liner(self, params) -> str:
119
- return "get RabbitMQ cluster status and partition information"
122
+ cluster_id = params.get("cluster_id", "")
123
+ if cluster_id:
124
+ return f"{toolset_name_for_one_liner(self.toolset.name)}: Get Cluster Status ({cluster_id})"
125
+ return f"{toolset_name_for_one_liner(self.toolset.name)}: Get Cluster Status"
120
126
 
121
127
 
122
128
  class RabbitMQToolset(Toolset):
@@ -74,7 +74,7 @@ class FetchRobustaFinding(Tool):
74
74
  )
75
75
 
76
76
  def get_parameterized_one_liner(self, params: Dict) -> str:
77
- return "Fetch Alert Metadata"
77
+ return "Robusta: Fetch Alert Metadata"
78
78
 
79
79
 
80
80
  class FetchResourceRecommendation(Tool):
@@ -138,7 +138,7 @@ class FetchResourceRecommendation(Tool):
138
138
  )
139
139
 
140
140
  def get_parameterized_one_liner(self, params: Dict) -> str:
141
- return f"Check Historical Resource Utilization: ({str(params)})"
141
+ return f"Robusta: Check Historical Resource Utilization: ({str(params)})"
142
142
 
143
143
 
144
144
  class FetchConfigurationChanges(Tool):
@@ -196,14 +196,14 @@ class FetchConfigurationChanges(Tool):
196
196
  )
197
197
 
198
198
  def get_parameterized_one_liner(self, params: Dict) -> str:
199
- return f"Search Change History: ({str(params)})"
199
+ return "Robusta: Search Change History"
200
200
 
201
201
 
202
202
  class RobustaToolset(Toolset):
203
203
  def __init__(self, dal: Optional[SupabaseDal]):
204
204
  dal_prereq = StaticPrerequisite(
205
205
  enabled=True if dal else False,
206
- disabled_reason="The data access layer is not available",
206
+ disabled_reason="Integration with Robusta cloud is disabled",
207
207
  )
208
208
  if dal:
209
209
  dal_prereq = StaticPrerequisite(
@@ -12,6 +12,7 @@ from holmes.core.tools import (
12
12
  )
13
13
 
14
14
  from holmes.plugins.runbooks import get_runbook_by_path, DEFAULT_RUNBOOK_SEARCH_PATH
15
+ from holmes.plugins.toolsets.utils import toolset_name_for_one_liner
15
16
 
16
17
 
17
18
  # TODO(mainred): currently we support fetch runbooks hosted internally, in the future we may want to support fetching
@@ -82,10 +83,10 @@ class RunbookFetcher(Tool):
82
83
  4. ❌ *Could not analyze process mailbox sizes* - Observer tool not enabled in container. Enable remote shell or observer_cli for process introspection.
83
84
  5. ✅ *Check pod memory limits* - container limit 4Gi, requests 2Gi
84
85
  6. ✅ *Verify BEAM startup arguments* - `+S 4:4 +P 1048576`, no memory instrumentation flags enabled
85
- 7. ❌ *Could not retrieve APM traces* - Datadog traces toolset is disabled. You can enable it by following https://robusta-dev.github.io/holmesgpt/data-sources/builtin-toolsets/datadog/
86
- 8. ❌ *Could not query Erlang metrics* - Prometheus integration is not connected. Enable it via https://robusta-dev.github.io/holmesgpt/data-sources/builtin-toolsets/prometheus/
86
+ 7. ❌ *Could not retrieve APM traces* - Datadog traces toolset is disabled. You can enable it by following https://holmesgpt.dev/data-sources/builtin-toolsets/datadog/
87
+ 8. ❌ *Could not query Erlang metrics* - Prometheus integration is not connected. Enable it via https://holmesgpt.dev/data-sources/builtin-toolsets/prometheus/
87
88
  9. ✅ *Examine recent deployments* - app version 2.1.3 deployed 4 hours ago, coincides with memory spike
88
- 10. ❌ *Could not check Stripe API status* - No toolset for Stripe integration exists. To monitor Stripe or similar third-party APIs, add a [custom toolset](https://robusta-dev.github.io/holmesgpt/data-sources/custom-toolsets/) or use a [remote MCP server](https://robusta-dev.github.io/holmesgpt/data-sources/remote-mcp-servers/)
89
+ 10. ❌ *Could not check Stripe API status* - No toolset for Stripe integration exists. To monitor Stripe or similar third-party APIs, add a [custom toolset](https://holmesgpt.dev/data-sources/custom-toolsets/) or use a [remote MCP server](https://holmesgpt.dev/data-sources/remote-mcp-servers/)
89
90
 
90
91
  **Root cause:** Memory leak in `gen_server` logic introduced in v2.1.3. BEAM VM hitting memory limit, causing out-of-memory crashes.
91
92
 
@@ -107,8 +108,8 @@ class RunbookFetcher(Tool):
107
108
  )
108
109
 
109
110
  def get_parameterized_one_liner(self, params) -> str:
110
- path: str = params["link"]
111
- return f"fetched runbook {path}"
111
+ path: str = params.get("link", "")
112
+ return f"{toolset_name_for_one_liner(self.toolset.name)}: Fetch Runbook {path}"
112
113
 
113
114
 
114
115
  class RunbookToolset(Toolset):
@@ -15,9 +15,11 @@ from holmes.core.tools import StructuredToolResult, ToolResultStatus
15
15
  from holmes.plugins.toolsets.utils import (
16
16
  process_timestamps_to_rfc3339,
17
17
  standard_start_datetime_tool_param_description,
18
+ toolset_name_for_one_liner,
19
+ )
20
+ from holmes.plugins.toolsets.logging_utils.logging_api import (
21
+ DEFAULT_TIME_SPAN_SECONDS,
18
22
  )
19
-
20
- DEFAULT_TIME_SPAN_SECONDS = 3600
21
23
 
22
24
 
23
25
  class ServiceNowConfig(BaseModel):
@@ -92,7 +94,8 @@ class ServiceNowBaseTool(Tool):
92
94
  )
93
95
 
94
96
  def get_parameterized_one_liner(self, params) -> str:
95
- return f"ServiceNow {self.name} {params}"
97
+ # Default implementation - will be overridden by subclasses
98
+ return f"{toolset_name_for_one_liner(self.toolset.name)}: ServiceNow {self.name} {params}"
96
99
 
97
100
 
98
101
  class ReturnChangesInTimerange(ServiceNowBaseTool):
@@ -108,6 +111,10 @@ class ReturnChangesInTimerange(ServiceNowBaseTool):
108
111
  )
109
112
  }
110
113
 
114
+ def get_parameterized_one_liner(self, params) -> str:
115
+ start = params.get("start", "last hour")
116
+ return f"{toolset_name_for_one_liner(self.toolset.name)}: Get Change Requests ({start})"
117
+
111
118
  def _invoke(self, params: Any) -> StructuredToolResult:
112
119
  parsed_params = {}
113
120
  try:
@@ -147,6 +154,10 @@ class ReturnChange(ServiceNowBaseTool):
147
154
  )
148
155
  }
149
156
 
157
+ def get_parameterized_one_liner(self, params) -> str:
158
+ sys_id = params.get("sys_id", "")
159
+ return f"{toolset_name_for_one_liner(self.toolset.name)}: Get Change Details ({sys_id})"
160
+
150
161
  def _invoke(self, params: Any) -> StructuredToolResult:
151
162
  try:
152
163
  url = "https://{instance}.service-now.com/api/now/v2/table/change_request/{sys_id}".format(
@@ -175,6 +186,10 @@ class ReturnChangesWithKeyword(ServiceNowBaseTool):
175
186
  )
176
187
  }
177
188
 
189
+ def get_parameterized_one_liner(self, params) -> str:
190
+ keyword = params.get("keyword", "")
191
+ return f"{toolset_name_for_one_liner(self.toolset.name)}: Search Changes ({keyword})"
192
+
178
193
  def _invoke(self, params: Any) -> StructuredToolResult:
179
194
  parsed_params = {}
180
195
  try:
@@ -2,7 +2,7 @@ import datetime
2
2
  import time
3
3
  from typing import Dict, Optional, Tuple, Union
4
4
 
5
- from dateutil import parser # type: ignore
5
+ from dateutil import parser
6
6
 
7
7
 
8
8
  def standard_start_datetime_tool_param_description(time_span_seconds: int):
@@ -139,3 +139,10 @@ def get_param_or_raise(dict: Dict, param: str) -> str:
139
139
  if not value:
140
140
  raise Exception(f'Missing param "{param}"')
141
141
  return value
142
+
143
+
144
+ def toolset_name_for_one_liner(toolset_name: str) -> str:
145
+ name = toolset_name
146
+ if "/" in toolset_name:
147
+ name = toolset_name.split("/")[0]
148
+ return name.capitalize()
@@ -41,9 +41,14 @@ def suppress_noisy_logs():
41
41
  warnings.filterwarnings("ignore", category=UserWarning, module="slack_sdk.*")
42
42
 
43
43
 
44
- def init_logging(verbose_flags: Optional[List[bool]] = None):
44
+ def init_logging(verbose_flags: Optional[List[bool]] = None, log_costs: bool = False):
45
45
  verbosity = cli_flags_to_verbosity(verbose_flags) # type: ignore
46
46
 
47
+ # Setup cost logger if requested
48
+ if log_costs:
49
+ cost_logger = logging.getLogger("holmes.costs")
50
+ cost_logger.setLevel(logging.DEBUG)
51
+
47
52
  if verbosity == Verbosity.VERY_VERBOSE:
48
53
  logging.basicConfig(
49
54
  force=True,
holmes/utils/llms.py ADDED
@@ -0,0 +1,20 @@
1
+ import fnmatch
2
+ from typing import List
3
+
4
+
5
+ def model_matches_list(model: str, model_list: List[str]) -> bool:
6
+ """
7
+ Check if a model matches any pattern in a list of model patterns.
8
+
9
+ Args:
10
+ model: The name of an LLM model (e.g., "azure/gpt", "openai/gpt-4o")
11
+ model_list: List of model patterns that may include wildcards
12
+ (e.g., ["azure/*", "*/mistral", "openai/gpt-*"])
13
+
14
+ Returns:
15
+ True if the model matches any pattern in the list, False otherwise
16
+ """
17
+ for pattern in model_list:
18
+ if fnmatch.fnmatchcase(model, pattern):
19
+ return True
20
+ return False