holmesgpt 0.13.0__py3-none-any.whl → 0.13.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (118) hide show
  1. holmes/__init__.py +1 -1
  2. holmes/common/env_vars.py +11 -0
  3. holmes/config.py +3 -1
  4. holmes/core/conversations.py +0 -11
  5. holmes/core/investigation.py +0 -6
  6. holmes/core/llm.py +63 -2
  7. holmes/core/prompt.py +0 -2
  8. holmes/core/supabase_dal.py +2 -2
  9. holmes/core/todo_tasks_formatter.py +51 -0
  10. holmes/core/tool_calling_llm.py +277 -101
  11. holmes/core/tools.py +20 -4
  12. holmes/core/toolset_manager.py +1 -5
  13. holmes/core/tracing.py +1 -1
  14. holmes/interactive.py +63 -2
  15. holmes/main.py +7 -2
  16. holmes/plugins/prompts/_fetch_logs.jinja2 +4 -0
  17. holmes/plugins/prompts/_general_instructions.jinja2 +3 -1
  18. holmes/plugins/prompts/investigation_procedure.jinja2 +3 -13
  19. holmes/plugins/runbooks/CLAUDE.md +85 -0
  20. holmes/plugins/runbooks/README.md +24 -0
  21. holmes/plugins/toolsets/__init__.py +5 -1
  22. holmes/plugins/toolsets/argocd.yaml +1 -1
  23. holmes/plugins/toolsets/atlas_mongodb/mongodb_atlas.py +18 -6
  24. holmes/plugins/toolsets/aws.yaml +9 -5
  25. holmes/plugins/toolsets/azure_sql/tools/analyze_connection_failures.py +3 -1
  26. holmes/plugins/toolsets/azure_sql/tools/analyze_database_connections.py +3 -1
  27. holmes/plugins/toolsets/azure_sql/tools/analyze_database_health_status.py +3 -1
  28. holmes/plugins/toolsets/azure_sql/tools/analyze_database_performance.py +3 -1
  29. holmes/plugins/toolsets/azure_sql/tools/analyze_database_storage.py +3 -1
  30. holmes/plugins/toolsets/azure_sql/tools/get_active_alerts.py +3 -1
  31. holmes/plugins/toolsets/azure_sql/tools/get_slow_queries.py +3 -1
  32. holmes/plugins/toolsets/azure_sql/tools/get_top_cpu_queries.py +3 -1
  33. holmes/plugins/toolsets/azure_sql/tools/get_top_data_io_queries.py +3 -1
  34. holmes/plugins/toolsets/azure_sql/tools/get_top_log_io_queries.py +3 -1
  35. holmes/plugins/toolsets/bash/argocd/__init__.py +65 -0
  36. holmes/plugins/toolsets/bash/argocd/constants.py +120 -0
  37. holmes/plugins/toolsets/bash/aws/__init__.py +66 -0
  38. holmes/plugins/toolsets/bash/aws/constants.py +529 -0
  39. holmes/plugins/toolsets/bash/azure/__init__.py +56 -0
  40. holmes/plugins/toolsets/bash/azure/constants.py +339 -0
  41. holmes/plugins/toolsets/bash/bash_instructions.jinja2 +6 -7
  42. holmes/plugins/toolsets/bash/bash_toolset.py +62 -17
  43. holmes/plugins/toolsets/bash/common/bash_command.py +131 -0
  44. holmes/plugins/toolsets/bash/common/stringify.py +14 -1
  45. holmes/plugins/toolsets/bash/common/validators.py +91 -0
  46. holmes/plugins/toolsets/bash/docker/__init__.py +59 -0
  47. holmes/plugins/toolsets/bash/docker/constants.py +255 -0
  48. holmes/plugins/toolsets/bash/helm/__init__.py +61 -0
  49. holmes/plugins/toolsets/bash/helm/constants.py +92 -0
  50. holmes/plugins/toolsets/bash/kubectl/__init__.py +80 -79
  51. holmes/plugins/toolsets/bash/kubectl/constants.py +0 -14
  52. holmes/plugins/toolsets/bash/kubectl/kubectl_describe.py +38 -56
  53. holmes/plugins/toolsets/bash/kubectl/kubectl_events.py +28 -76
  54. holmes/plugins/toolsets/bash/kubectl/kubectl_get.py +39 -99
  55. holmes/plugins/toolsets/bash/kubectl/kubectl_logs.py +34 -15
  56. holmes/plugins/toolsets/bash/kubectl/kubectl_run.py +1 -1
  57. holmes/plugins/toolsets/bash/kubectl/kubectl_top.py +38 -77
  58. holmes/plugins/toolsets/bash/parse_command.py +106 -32
  59. holmes/plugins/toolsets/bash/utilities/__init__.py +0 -0
  60. holmes/plugins/toolsets/bash/utilities/base64_util.py +12 -0
  61. holmes/plugins/toolsets/bash/utilities/cut.py +12 -0
  62. holmes/plugins/toolsets/bash/utilities/grep/__init__.py +10 -0
  63. holmes/plugins/toolsets/bash/utilities/head.py +12 -0
  64. holmes/plugins/toolsets/bash/utilities/jq.py +79 -0
  65. holmes/plugins/toolsets/bash/utilities/sed.py +164 -0
  66. holmes/plugins/toolsets/bash/utilities/sort.py +15 -0
  67. holmes/plugins/toolsets/bash/utilities/tail.py +12 -0
  68. holmes/plugins/toolsets/bash/utilities/tr.py +57 -0
  69. holmes/plugins/toolsets/bash/utilities/uniq.py +12 -0
  70. holmes/plugins/toolsets/bash/utilities/wc.py +12 -0
  71. holmes/plugins/toolsets/confluence.yaml +1 -1
  72. holmes/plugins/toolsets/coralogix/api.py +3 -1
  73. holmes/plugins/toolsets/coralogix/toolset_coralogix_logs.py +4 -4
  74. holmes/plugins/toolsets/coralogix/utils.py +41 -14
  75. holmes/plugins/toolsets/datadog/datadog_api.py +45 -2
  76. holmes/plugins/toolsets/datadog/datadog_general_instructions.jinja2 +208 -0
  77. holmes/plugins/toolsets/datadog/datadog_logs_instructions.jinja2 +43 -0
  78. holmes/plugins/toolsets/datadog/datadog_metrics_instructions.jinja2 +12 -9
  79. holmes/plugins/toolsets/datadog/toolset_datadog_general.py +722 -0
  80. holmes/plugins/toolsets/datadog/toolset_datadog_logs.py +17 -6
  81. holmes/plugins/toolsets/datadog/toolset_datadog_metrics.py +15 -7
  82. holmes/plugins/toolsets/datadog/toolset_datadog_rds.py +6 -2
  83. holmes/plugins/toolsets/datadog/toolset_datadog_traces.py +9 -3
  84. holmes/plugins/toolsets/docker.yaml +1 -1
  85. holmes/plugins/toolsets/git.py +15 -5
  86. holmes/plugins/toolsets/grafana/toolset_grafana.py +25 -4
  87. holmes/plugins/toolsets/grafana/toolset_grafana_loki.py +4 -4
  88. holmes/plugins/toolsets/grafana/toolset_grafana_tempo.jinja2 +5 -3
  89. holmes/plugins/toolsets/grafana/toolset_grafana_tempo.py +299 -32
  90. holmes/plugins/toolsets/helm.yaml +1 -1
  91. holmes/plugins/toolsets/internet/internet.py +4 -2
  92. holmes/plugins/toolsets/internet/notion.py +4 -2
  93. holmes/plugins/toolsets/investigator/core_investigation.py +5 -17
  94. holmes/plugins/toolsets/investigator/investigator_instructions.jinja2 +1 -5
  95. holmes/plugins/toolsets/kafka.py +19 -7
  96. holmes/plugins/toolsets/kubernetes.yaml +5 -5
  97. holmes/plugins/toolsets/kubernetes_logs.py +4 -4
  98. holmes/plugins/toolsets/kubernetes_logs.yaml +1 -1
  99. holmes/plugins/toolsets/logging_utils/logging_api.py +15 -2
  100. holmes/plugins/toolsets/mcp/toolset_mcp.py +3 -1
  101. holmes/plugins/toolsets/newrelic.py +8 -4
  102. holmes/plugins/toolsets/opensearch/opensearch.py +13 -5
  103. holmes/plugins/toolsets/opensearch/opensearch_logs.py +4 -4
  104. holmes/plugins/toolsets/opensearch/opensearch_traces.py +9 -6
  105. holmes/plugins/toolsets/prometheus/prometheus.py +198 -57
  106. holmes/plugins/toolsets/rabbitmq/toolset_rabbitmq.py +7 -3
  107. holmes/plugins/toolsets/robusta/robusta.py +10 -4
  108. holmes/plugins/toolsets/runbook/runbook_fetcher.py +4 -2
  109. holmes/plugins/toolsets/servicenow/servicenow.py +9 -3
  110. holmes/plugins/toolsets/slab.yaml +1 -1
  111. holmes/utils/console/logging.py +6 -1
  112. {holmesgpt-0.13.0.dist-info → holmesgpt-0.13.2.dist-info}/METADATA +3 -2
  113. {holmesgpt-0.13.0.dist-info → holmesgpt-0.13.2.dist-info}/RECORD +116 -90
  114. holmes/core/todo_manager.py +0 -88
  115. holmes/plugins/toolsets/bash/grep/__init__.py +0 -52
  116. {holmesgpt-0.13.0.dist-info → holmesgpt-0.13.2.dist-info}/LICENSE.txt +0 -0
  117. {holmesgpt-0.13.0.dist-info → holmesgpt-0.13.2.dist-info}/WHEEL +0 -0
  118. {holmesgpt-0.13.0.dist-info → holmesgpt-0.13.2.dist-info}/entry_points.txt +0 -0
@@ -3,14 +3,15 @@ import logging
3
3
  import os
4
4
  import re
5
5
  import time
6
+ import dateutil.parser
6
7
  from typing import Any, Dict, List, Optional, Tuple, Type, Union
7
8
  from urllib.parse import urljoin
8
9
 
9
10
  import requests # type: ignore
10
11
  from pydantic import BaseModel, field_validator, Field, model_validator
11
12
  from requests import RequestException
12
- from requests_aws4auth import AWS4Auth
13
-
13
+ from prometrix.connect.aws_connect import AWSPrometheusConnect
14
+ from prometrix.models.prometheus_config import PrometheusConfig as BasePrometheusConfig
14
15
  from holmes.core.tools import (
15
16
  CallablePrerequisite,
16
17
  StructuredToolResult,
@@ -29,10 +30,10 @@ from holmes.plugins.toolsets.utils import (
29
30
  toolset_name_for_one_liner,
30
31
  )
31
32
  from holmes.utils.cache import TTLCache
32
- from holmes.common.env_vars import IS_OPENSHIFT
33
+ from holmes.common.env_vars import IS_OPENSHIFT, MAX_GRAPH_POINTS
33
34
  from holmes.common.openshift import load_openshift_token
34
35
  from holmes.plugins.toolsets.logging_utils.logging_api import (
35
- DEFAULT_TIME_SPAN_SECONDS,
36
+ DEFAULT_GRAPH_TIME_SPAN_SECONDS,
36
37
  )
37
38
  from holmes.utils.keygen_utils import generate_random_key
38
39
 
@@ -81,34 +82,105 @@ class PrometheusConfig(BaseModel):
81
82
  def is_amp(self) -> bool:
82
83
  return False
83
84
 
84
- def get_auth(self) -> Any:
85
- return None
86
-
87
85
 
88
86
  class AMPConfig(PrometheusConfig):
89
- aws_access_key: str
90
- aws_secret_access_key: str
87
+ aws_access_key: Optional[str] = None
88
+ aws_secret_access_key: Optional[str] = None
91
89
  aws_region: str
92
90
  aws_service_name: str = "aps"
93
- healthcheck: str = "api/v1/query?query=up" # Override for AMP
91
+ healthcheck: str = "api/v1/query?query=up"
94
92
  prometheus_ssl_enabled: bool = False
93
+ assume_role_arn: Optional[str] = None
94
+
95
+ # Refresh the AWS client (and its STS creds) every N seconds (default: 15 minutes)
96
+ refresh_interval_seconds: int = 900
97
+
98
+ _aws_client: Optional[AWSPrometheusConnect] = None
99
+ _aws_client_created_at: float = 0.0
95
100
 
96
101
  def is_amp(self) -> bool:
97
102
  return True
98
103
 
99
- def get_auth(self):
100
- return AWS4Auth(
101
- self.aws_access_key, # type: ignore
102
- self.aws_secret_access_key, # type: ignore
103
- self.aws_region, # type: ignore
104
- self.aws_service_name, # type: ignore
105
- )
104
+ def _should_refresh_client(self) -> bool:
105
+ if not self._aws_client:
106
+ return True
107
+ return (
108
+ time.time() - self._aws_client_created_at
109
+ ) >= self.refresh_interval_seconds
110
+
111
+ def get_aws_client(self) -> Optional[AWSPrometheusConnect]:
112
+ if not self._aws_client or self._should_refresh_client():
113
+ try:
114
+ base_config = BasePrometheusConfig(
115
+ url=self.prometheus_url,
116
+ disable_ssl=not self.prometheus_ssl_enabled,
117
+ additional_labels=self.additional_labels,
118
+ )
119
+ self._aws_client = AWSPrometheusConnect(
120
+ access_key=self.aws_access_key,
121
+ secret_key=self.aws_secret_access_key,
122
+ token=None,
123
+ region=self.aws_region,
124
+ service_name=self.aws_service_name,
125
+ assume_role_arn=self.assume_role_arn,
126
+ config=base_config,
127
+ )
128
+ self._aws_client_created_at = time.time()
129
+ except Exception:
130
+ logging.exception("Failed to create/refresh AWS client")
131
+ return self._aws_client
132
+ return self._aws_client
106
133
 
107
134
 
108
135
  class BasePrometheusTool(Tool):
109
136
  toolset: "PrometheusToolset"
110
137
 
111
138
 
139
+ def do_request(
140
+ config, # PrometheusConfig | AMPConfig
141
+ url: str,
142
+ params: Optional[Dict] = None,
143
+ data: Optional[Dict] = None,
144
+ timeout: int = 60,
145
+ verify: Optional[bool] = None,
146
+ headers: Optional[Dict] = None,
147
+ method: str = "GET",
148
+ ) -> requests.Response:
149
+ """
150
+ Route a request through either:
151
+ - AWSPrometheusConnect (SigV4) when config is AMPConfig
152
+ - plain requests otherwise
153
+
154
+ method defaults to GET so callers can omit it for reads.
155
+ """
156
+ if verify is None:
157
+ verify = config.prometheus_ssl_enabled
158
+ if headers is None:
159
+ headers = config.headers or {}
160
+
161
+ if isinstance(config, AMPConfig):
162
+ client = config.get_aws_client() # cached AWSPrometheusConnect
163
+ return client.signed_request( # type: ignore
164
+ method=method,
165
+ url=url,
166
+ data=data,
167
+ params=params,
168
+ verify=verify,
169
+ headers=headers,
170
+ )
171
+
172
+ # Non-AMP: plain HTTP
173
+ return requests.request(
174
+ method=method,
175
+ url=url,
176
+ headers=headers,
177
+ params=params,
178
+ data=data,
179
+ timeout=timeout,
180
+ verify=verify,
181
+ )
182
+
183
+
112
184
  def filter_metrics_by_type(metrics: Dict, expected_type: str):
113
185
  return {
114
186
  metric_name: metric_data
@@ -133,14 +205,18 @@ METRICS_SUFFIXES_TO_STRIP = ["_bucket", "_count", "_sum"]
133
205
  def fetch_metadata(
134
206
  prometheus_url: str,
135
207
  headers: Optional[Dict],
136
- auth=None,
208
+ config,
137
209
  verify_ssl: bool = True,
138
210
  ) -> Dict:
139
211
  metadata_url = urljoin(prometheus_url, "api/v1/metadata")
140
- metadata_response = requests.get(
141
- metadata_url, headers=headers, timeout=60, verify=verify_ssl, auth=auth
212
+ metadata_response = do_request(
213
+ config=config,
214
+ url=metadata_url,
215
+ headers=headers,
216
+ timeout=60,
217
+ verify=verify_ssl,
218
+ method="GET",
142
219
  )
143
-
144
220
  metadata_response.raise_for_status()
145
221
 
146
222
  metadata = metadata_response.json()["data"]
@@ -163,14 +239,20 @@ def fetch_metadata_with_series_api(
163
239
  prometheus_url: str,
164
240
  metric_name: str,
165
241
  headers: Dict,
166
- auth=None,
242
+ config,
167
243
  verify_ssl: bool = True,
168
244
  ) -> Dict:
169
245
  url = urljoin(prometheus_url, "api/v1/series")
170
246
  params: Dict = {"match[]": f'{{__name__=~".*{metric_name}.*"}}', "limit": "10000"}
171
247
 
172
- response = requests.get(
173
- url, headers=headers, timeout=60, params=params, auth=auth, verify=verify_ssl
248
+ response = do_request(
249
+ config=config,
250
+ url=url,
251
+ headers=headers,
252
+ params=params,
253
+ timeout=60,
254
+ verify=verify_ssl,
255
+ method="GET",
174
256
  )
175
257
  response.raise_for_status()
176
258
  metrics = response.json()["data"]
@@ -199,6 +281,42 @@ def result_has_data(result: Dict) -> bool:
199
281
  return False
200
282
 
201
283
 
284
+ def adjust_step_for_max_points(
285
+ start_timestamp: str,
286
+ end_timestamp: str,
287
+ step: float,
288
+ ) -> float:
289
+ """
290
+ Adjusts the step parameter to ensure the number of data points doesn't exceed max_points.
291
+ Max points is controlled by the PROMETHEUS_MAX_GRAPH_POINTS environment variable (default: 300).
292
+
293
+ Args:
294
+ start_timestamp: RFC3339 formatted start time
295
+ end_timestamp: RFC3339 formatted end time
296
+ step: The requested step duration in seconds
297
+
298
+ Returns:
299
+ Adjusted step value in seconds that ensures points <= max_points
300
+ """
301
+
302
+ start_dt = dateutil.parser.parse(start_timestamp)
303
+ end_dt = dateutil.parser.parse(end_timestamp)
304
+
305
+ time_range_seconds = (end_dt - start_dt).total_seconds()
306
+
307
+ current_points = time_range_seconds / step
308
+
309
+ # If current points exceed max, adjust the step
310
+ if current_points > MAX_GRAPH_POINTS:
311
+ adjusted_step = time_range_seconds / MAX_GRAPH_POINTS
312
+ logging.info(
313
+ f"Adjusting step from {step}s to {adjusted_step}s to limit points from {current_points:.0f} to {MAX_GRAPH_POINTS}"
314
+ )
315
+ return adjusted_step
316
+
317
+ return step
318
+
319
+
202
320
  def add_prometheus_auth(prometheus_auth_header: Optional[str]) -> Dict[str, Any]:
203
321
  results = {}
204
322
  if prometheus_auth_header:
@@ -212,7 +330,7 @@ def fetch_metrics_labels_with_series_api(
212
330
  cache: Optional[TTLCache],
213
331
  metrics_labels_time_window_hrs: Union[int, None],
214
332
  metric_name: str,
215
- auth=None,
333
+ config=None,
216
334
  verify_ssl: bool = True,
217
335
  ) -> dict:
218
336
  """This is a slow query. Takes 5+ seconds to run"""
@@ -229,13 +347,14 @@ def fetch_metrics_labels_with_series_api(
229
347
  params["end"] = int(time.time())
230
348
  params["start"] = params["end"] - (metrics_labels_time_window_hrs * 60 * 60)
231
349
 
232
- series_response = requests.get(
350
+ series_response = do_request(
351
+ config=config,
233
352
  url=series_url,
234
353
  headers=headers,
235
354
  params=params,
236
- auth=auth,
237
355
  timeout=60,
238
356
  verify=verify_ssl,
357
+ method="GET",
239
358
  )
240
359
  series_response.raise_for_status()
241
360
  series = series_response.json()["data"]
@@ -261,7 +380,7 @@ def fetch_metrics_labels_with_labels_api(
261
380
  metrics_labels_time_window_hrs: Union[int, None],
262
381
  metric_names: List[str],
263
382
  headers: Dict,
264
- auth=None,
383
+ config=None,
265
384
  verify_ssl: bool = True,
266
385
  ) -> dict:
267
386
  metrics_labels = {}
@@ -281,13 +400,14 @@ def fetch_metrics_labels_with_labels_api(
281
400
  params["end"] = int(time.time())
282
401
  params["start"] = params["end"] - (metrics_labels_time_window_hrs * 60 * 60)
283
402
 
284
- response = requests.get(
403
+ response = do_request(
404
+ config=config,
285
405
  url=url,
286
406
  headers=headers,
287
407
  params=params,
288
- auth=auth,
289
408
  timeout=60,
290
409
  verify=verify_ssl,
410
+ method="GET",
291
411
  )
292
412
  response.raise_for_status()
293
413
  labels = response.json()["data"]
@@ -308,7 +428,7 @@ def fetch_metrics(
308
428
  should_fetch_labels_with_labels_api: bool,
309
429
  should_fetch_metadata_with_series_api: bool,
310
430
  headers: Dict,
311
- auth=None,
431
+ config=None,
312
432
  verify_ssl: bool = True,
313
433
  ) -> dict:
314
434
  metrics = None
@@ -318,7 +438,7 @@ def fetch_metrics(
318
438
  prometheus_url=prometheus_url,
319
439
  metric_name=metric_name,
320
440
  headers=headers,
321
- auth=auth,
441
+ config=config,
322
442
  verify_ssl=verify_ssl,
323
443
  )
324
444
  should_fetch_labels = False # series API returns the labels
@@ -326,7 +446,7 @@ def fetch_metrics(
326
446
  metrics = fetch_metadata(
327
447
  prometheus_url=prometheus_url,
328
448
  headers=headers,
329
- auth=auth,
449
+ config=config,
330
450
  verify_ssl=verify_ssl,
331
451
  )
332
452
  metrics = filter_metrics_by_name(metrics, metric_name)
@@ -340,7 +460,7 @@ def fetch_metrics(
340
460
  metrics_labels_time_window_hrs=metrics_labels_time_window_hrs,
341
461
  metric_names=list(metrics.keys()),
342
462
  headers=headers,
343
- auth=auth,
463
+ config=config,
344
464
  verify_ssl=verify_ssl,
345
465
  )
346
466
  else:
@@ -350,7 +470,7 @@ def fetch_metrics(
350
470
  metrics_labels_time_window_hrs=metrics_labels_time_window_hrs,
351
471
  metric_name=metric_name,
352
472
  headers=headers,
353
- auth=auth,
473
+ config=config,
354
474
  verify_ssl=verify_ssl,
355
475
  )
356
476
 
@@ -371,7 +491,9 @@ class ListPrometheusRules(BasePrometheusTool):
371
491
  )
372
492
  self._cache = None
373
493
 
374
- def _invoke(self, params: Any) -> StructuredToolResult:
494
+ def _invoke(
495
+ self, params: dict, user_approved: bool = False
496
+ ) -> StructuredToolResult:
375
497
  if not self.toolset.config or not self.toolset.config.prometheus_url:
376
498
  return StructuredToolResult(
377
499
  status=ToolResultStatus.ERROR,
@@ -402,13 +524,14 @@ class ListPrometheusRules(BasePrometheusTool):
402
524
 
403
525
  rules_url = urljoin(prometheus_url, "api/v1/rules")
404
526
 
405
- rules_response = requests.get(
527
+ rules_response = do_request(
528
+ config=self.toolset.config,
406
529
  url=rules_url,
407
530
  params=params,
408
- auth=self.toolset.config.get_auth(),
409
531
  timeout=180,
410
532
  verify=self.toolset.config.prometheus_ssl_enabled,
411
533
  headers=self.toolset.config.headers,
534
+ method="GET",
412
535
  )
413
536
  rules_response.raise_for_status()
414
537
  data = rules_response.json()["data"]
@@ -467,7 +590,9 @@ class ListAvailableMetrics(BasePrometheusTool):
467
590
  )
468
591
  self._cache = None
469
592
 
470
- def _invoke(self, params: Any) -> StructuredToolResult:
593
+ def _invoke(
594
+ self, params: dict, user_approved: bool = False
595
+ ) -> StructuredToolResult:
471
596
  if not self.toolset.config or not self.toolset.config.prometheus_url:
472
597
  return StructuredToolResult(
473
598
  status=ToolResultStatus.ERROR,
@@ -500,12 +625,13 @@ class ListAvailableMetrics(BasePrometheusTool):
500
625
  should_fetch_labels_with_labels_api=self.toolset.config.fetch_labels_with_labels_api,
501
626
  should_fetch_metadata_with_series_api=self.toolset.config.fetch_metadata_with_series_api,
502
627
  headers=self.toolset.config.headers,
503
- auth=self.toolset.config.get_auth(),
628
+ config=self.toolset.config,
504
629
  verify_ssl=self.toolset.config.prometheus_ssl_enabled,
505
630
  )
506
631
 
507
- if params.get("type_filter"):
508
- metrics = filter_metrics_by_type(metrics, params.get("type_filter"))
632
+ type_filter = params.get("type_filter")
633
+ if type_filter:
634
+ metrics = filter_metrics_by_type(metrics, type_filter)
509
635
 
510
636
  output = ["Metric | Description | Type | Labels"]
511
637
  output.append("-" * 100)
@@ -572,7 +698,9 @@ class ExecuteInstantQuery(BasePrometheusTool):
572
698
  toolset=toolset,
573
699
  )
574
700
 
575
- def _invoke(self, params: Any) -> StructuredToolResult:
701
+ def _invoke(
702
+ self, params: dict, user_approved: bool = False
703
+ ) -> StructuredToolResult:
576
704
  if not self.toolset.config or not self.toolset.config.prometheus_url:
577
705
  return StructuredToolResult(
578
706
  status=ToolResultStatus.ERROR,
@@ -587,12 +715,14 @@ class ExecuteInstantQuery(BasePrometheusTool):
587
715
 
588
716
  payload = {"query": query}
589
717
 
590
- response = requests.post(
718
+ response = do_request(
719
+ config=self.toolset.config,
591
720
  url=url,
592
721
  headers=self.toolset.config.headers,
593
- auth=self.toolset.config.get_auth(),
594
722
  data=payload,
595
723
  timeout=60,
724
+ verify=self.toolset.config.prometheus_ssl_enabled,
725
+ method="POST",
596
726
  )
597
727
 
598
728
  if response.status_code == 200:
@@ -684,7 +814,7 @@ class ExecuteRangeQuery(BasePrometheusTool):
684
814
  ),
685
815
  "start": ToolParameter(
686
816
  description=standard_start_datetime_tool_param_description(
687
- DEFAULT_TIME_SPAN_SECONDS
817
+ DEFAULT_GRAPH_TIME_SPAN_SECONDS
688
818
  ),
689
819
  type="string",
690
820
  required=False,
@@ -708,7 +838,9 @@ class ExecuteRangeQuery(BasePrometheusTool):
708
838
  toolset=toolset,
709
839
  )
710
840
 
711
- def _invoke(self, params: Any) -> StructuredToolResult:
841
+ def _invoke(
842
+ self, params: dict, user_approved: bool = False
843
+ ) -> StructuredToolResult:
712
844
  if not self.toolset.config or not self.toolset.config.prometheus_url:
713
845
  return StructuredToolResult(
714
846
  status=ToolResultStatus.ERROR,
@@ -723,9 +855,16 @@ class ExecuteRangeQuery(BasePrometheusTool):
723
855
  (start, end) = process_timestamps_to_rfc3339(
724
856
  start_timestamp=params.get("start"),
725
857
  end_timestamp=params.get("end"),
726
- default_time_span_seconds=DEFAULT_TIME_SPAN_SECONDS,
858
+ default_time_span_seconds=DEFAULT_GRAPH_TIME_SPAN_SECONDS,
727
859
  )
728
860
  step = params.get("step", "")
861
+
862
+ step = adjust_step_for_max_points(
863
+ start_timestamp=start,
864
+ end_timestamp=end,
865
+ step=float(step) if step else MAX_GRAPH_POINTS,
866
+ )
867
+
729
868
  description = params.get("description", "")
730
869
  output_type = params.get("output_type", "Plain")
731
870
  payload = {
@@ -735,12 +874,14 @@ class ExecuteRangeQuery(BasePrometheusTool):
735
874
  "step": step,
736
875
  }
737
876
 
738
- response = requests.post(
877
+ response = do_request(
878
+ config=self.toolset.config,
739
879
  url=url,
740
880
  headers=self.toolset.config.headers,
741
- auth=self.toolset.config.get_auth(),
742
881
  data=payload,
743
882
  timeout=120,
883
+ verify=self.toolset.config.prometheus_ssl_enabled,
884
+ method="POST",
744
885
  )
745
886
 
746
887
  if response.status_code == 200:
@@ -823,7 +964,7 @@ class PrometheusToolset(Toolset):
823
964
  super().__init__(
824
965
  name="prometheus/metrics",
825
966
  description="Prometheus integration to fetch metadata and execute PromQL queries",
826
- docs_url="https://docs.robusta.dev/master/configuration/holmesgpt/toolsets/prometheus.html",
967
+ docs_url="https://holmesgpt.dev/data-sources/builtin-toolsets/prometheus/",
827
968
  icon_url="https://upload.wikimedia.org/wikipedia/commons/3/38/Prometheus_software_logo.svg",
828
969
  prerequisites=[CallablePrerequisite(callable=self.prerequisites_callable)],
829
970
  tools=[
@@ -847,10 +988,8 @@ class PrometheusToolset(Toolset):
847
988
  def determine_prometheus_class(
848
989
  self, config: dict[str, Any]
849
990
  ) -> Type[Union[PrometheusConfig, AMPConfig]]:
850
- has_aws_credentials = (
851
- "aws_access_key" in config or "aws_secret_access_key" in config
852
- )
853
- return AMPConfig if has_aws_credentials else PrometheusConfig
991
+ has_aws_fields = "aws_region" in config
992
+ return AMPConfig if has_aws_fields else PrometheusConfig
854
993
 
855
994
  def prerequisites_callable(self, config: dict[str, Any]) -> Tuple[bool, str]:
856
995
  try:
@@ -904,12 +1043,13 @@ class PrometheusToolset(Toolset):
904
1043
 
905
1044
  url = urljoin(self.config.prometheus_url, self.config.healthcheck)
906
1045
  try:
907
- response = requests.get(
1046
+ response = do_request(
1047
+ config=self.config,
908
1048
  url=url,
909
1049
  headers=self.config.headers,
910
- auth=self.config.get_auth(),
911
1050
  timeout=10,
912
1051
  verify=self.config.prometheus_ssl_enabled,
1052
+ method="GET",
913
1053
  )
914
1054
 
915
1055
  if response.status_code == 200:
@@ -926,6 +1066,7 @@ class PrometheusToolset(Toolset):
926
1066
  f"Failed to initialize using url={url}",
927
1067
  )
928
1068
  except Exception as e:
1069
+ logging.exception("Failed to initialize Prometheus")
929
1070
  return (
930
1071
  False,
931
1072
  f"Failed to initialize using url={url}. Unexpected error: {str(e)}",
@@ -63,7 +63,9 @@ class ListConfiguredClusters(BaseRabbitMQTool):
63
63
  toolset=toolset,
64
64
  )
65
65
 
66
- def _invoke(self, params: Any) -> StructuredToolResult:
66
+ def _invoke(
67
+ self, params: dict, user_approved: bool = False
68
+ ) -> StructuredToolResult:
67
69
  if not self.toolset.config:
68
70
  raise ValueError("RabbitMQ is not configured.")
69
71
 
@@ -101,7 +103,9 @@ class GetRabbitMQClusterStatus(BaseRabbitMQTool):
101
103
  toolset=toolset,
102
104
  )
103
105
 
104
- def _invoke(self, params: Any) -> StructuredToolResult:
106
+ def _invoke(
107
+ self, params: dict, user_approved: bool = False
108
+ ) -> StructuredToolResult:
105
109
  try:
106
110
  # Fetch node details which include partition info
107
111
  cluster_config = self._get_cluster_config(
@@ -130,7 +134,7 @@ class RabbitMQToolset(Toolset):
130
134
  super().__init__(
131
135
  name="rabbitmq/core",
132
136
  description="Provides tools to interact with RabbitMQ to diagnose cluster health, node status, and specifically network partitions (split-brain).",
133
- docs_url="https://docs.robusta.dev/master/configuration/holmesgpt/toolsets/rabbitmq.html",
137
+ docs_url="https://holmesgpt.dev/data-sources/builtin-toolsets/rabbitmq/",
134
138
  icon_url="https://cdn.worldvectorlogo.com/logos/rabbitmq.svg",
135
139
  prerequisites=[CallablePrerequisite(callable=self.prerequisites_callable)],
136
140
  tools=[
@@ -45,7 +45,9 @@ class FetchRobustaFinding(Tool):
45
45
  logging.error(error)
46
46
  return {"error": error}
47
47
 
48
- def _invoke(self, params: Dict) -> StructuredToolResult:
48
+ def _invoke(
49
+ self, params: dict, user_approved: bool = False
50
+ ) -> StructuredToolResult:
49
51
  finding_id = params[PARAM_FINDING_ID]
50
52
  try:
51
53
  finding = self._fetch_finding(finding_id)
@@ -113,7 +115,9 @@ class FetchResourceRecommendation(Tool):
113
115
  )
114
116
  return None
115
117
 
116
- def _invoke(self, params: Dict) -> StructuredToolResult:
118
+ def _invoke(
119
+ self, params: dict, user_approved: bool = False
120
+ ) -> StructuredToolResult:
117
121
  try:
118
122
  recommendations = self._resource_recommendation(params)
119
123
  if recommendations:
@@ -171,7 +175,9 @@ class FetchConfigurationChanges(Tool):
171
175
  )
172
176
  return None
173
177
 
174
- def _invoke(self, params: Dict) -> StructuredToolResult:
178
+ def _invoke(
179
+ self, params: dict, user_approved: bool = False
180
+ ) -> StructuredToolResult:
175
181
  try:
176
182
  changes = self._fetch_change_history(params)
177
183
  if changes:
@@ -213,7 +219,7 @@ class RobustaToolset(Toolset):
213
219
  super().__init__(
214
220
  icon_url="https://cdn.prod.website-files.com/633e9bac8f71dfb7a8e4c9a6/646be7710db810b14133bdb5_logo.svg",
215
221
  description="Fetches alerts metadata and change history",
216
- docs_url="https://docs.robusta.dev/master/configuration/holmesgpt/toolsets/robusta.html",
222
+ docs_url="https://holmesgpt.dev/data-sources/builtin-toolsets/robusta/",
217
223
  name="robusta",
218
224
  prerequisites=[dal_prereq],
219
225
  tools=[
@@ -35,7 +35,9 @@ class RunbookFetcher(Tool):
35
35
  toolset=toolset, # type: ignore
36
36
  )
37
37
 
38
- def _invoke(self, params: Any) -> StructuredToolResult:
38
+ def _invoke(
39
+ self, params: dict, user_approved: bool = False
40
+ ) -> StructuredToolResult:
39
41
  link: str = params["link"]
40
42
 
41
43
  search_paths = [DEFAULT_RUNBOOK_SEARCH_PATH]
@@ -126,7 +128,7 @@ class RunbookToolset(Toolset):
126
128
  tools=[
127
129
  RunbookFetcher(self),
128
130
  ],
129
- docs_url="https://docs.robusta.dev/master/configuration/holmesgpt/toolsets/runbook.html",
131
+ docs_url="https://holmesgpt.dev/data-sources/",
130
132
  tags=[
131
133
  ToolsetTag.CORE,
132
134
  ],
@@ -115,7 +115,9 @@ class ReturnChangesInTimerange(ServiceNowBaseTool):
115
115
  start = params.get("start", "last hour")
116
116
  return f"{toolset_name_for_one_liner(self.toolset.name)}: Get Change Requests ({start})"
117
117
 
118
- def _invoke(self, params: Any) -> StructuredToolResult:
118
+ def _invoke(
119
+ self, params: dict, user_approved: bool = False
120
+ ) -> StructuredToolResult:
119
121
  parsed_params = {}
120
122
  try:
121
123
  (start, _) = process_timestamps_to_rfc3339(
@@ -158,7 +160,9 @@ class ReturnChange(ServiceNowBaseTool):
158
160
  sys_id = params.get("sys_id", "")
159
161
  return f"{toolset_name_for_one_liner(self.toolset.name)}: Get Change Details ({sys_id})"
160
162
 
161
- def _invoke(self, params: Any) -> StructuredToolResult:
163
+ def _invoke(
164
+ self, params: dict, user_approved: bool = False
165
+ ) -> StructuredToolResult:
162
166
  try:
163
167
  url = "https://{instance}.service-now.com/api/now/v2/table/change_request/{sys_id}".format(
164
168
  instance=self.toolset.config.get("instance"),
@@ -190,7 +194,9 @@ class ReturnChangesWithKeyword(ServiceNowBaseTool):
190
194
  keyword = params.get("keyword", "")
191
195
  return f"{toolset_name_for_one_liner(self.toolset.name)}: Search Changes ({keyword})"
192
196
 
193
- def _invoke(self, params: Any) -> StructuredToolResult:
197
+ def _invoke(
198
+ self, params: dict, user_approved: bool = False
199
+ ) -> StructuredToolResult:
194
200
  parsed_params = {}
195
201
  try:
196
202
  url = f"https://{self.toolset.config.get('instance')}.service-now.com/api/now/v2/table/change_request"
@@ -1,7 +1,7 @@
1
1
  toolsets:
2
2
  slab:
3
3
  description: "Fetches slab pages"
4
- docs_url: "https://docs.robusta.dev/master/configuration/holmesgpt/toolsets/slab.html"
4
+ docs_url: "https://holmesgpt.dev/data-sources/builtin-toolsets/slab/"
5
5
  icon_url: "https://platform.robusta.dev/demos/slab-mark.svg"
6
6
  tags:
7
7
  - core
@@ -41,9 +41,14 @@ def suppress_noisy_logs():
41
41
  warnings.filterwarnings("ignore", category=UserWarning, module="slack_sdk.*")
42
42
 
43
43
 
44
- def init_logging(verbose_flags: Optional[List[bool]] = None):
44
+ def init_logging(verbose_flags: Optional[List[bool]] = None, log_costs: bool = False):
45
45
  verbosity = cli_flags_to_verbosity(verbose_flags) # type: ignore
46
46
 
47
+ # Setup cost logger if requested
48
+ if log_costs:
49
+ cost_logger = logging.getLogger("holmes.costs")
50
+ cost_logger.setLevel(logging.DEBUG)
51
+
47
52
  if verbosity == Verbosity.VERY_VERBOSE:
48
53
  logging.basicConfig(
49
54
  force=True,