holmesgpt 0.13.1__py3-none-any.whl → 0.13.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of holmesgpt might be problematic. Click here for more details.
- holmes/__init__.py +1 -1
- holmes/common/env_vars.py +7 -0
- holmes/config.py +3 -1
- holmes/core/conversations.py +0 -11
- holmes/core/investigation.py +0 -6
- holmes/core/llm.py +60 -1
- holmes/core/prompt.py +0 -2
- holmes/core/supabase_dal.py +2 -2
- holmes/core/todo_tasks_formatter.py +51 -0
- holmes/core/tool_calling_llm.py +166 -91
- holmes/core/tools.py +20 -4
- holmes/interactive.py +63 -2
- holmes/main.py +0 -1
- holmes/plugins/prompts/_general_instructions.jinja2 +3 -1
- holmes/plugins/prompts/investigation_procedure.jinja2 +3 -13
- holmes/plugins/toolsets/__init__.py +5 -1
- holmes/plugins/toolsets/argocd.yaml +1 -1
- holmes/plugins/toolsets/atlas_mongodb/mongodb_atlas.py +18 -6
- holmes/plugins/toolsets/aws.yaml +9 -5
- holmes/plugins/toolsets/azure_sql/tools/analyze_connection_failures.py +3 -1
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_connections.py +3 -1
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_health_status.py +3 -1
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_performance.py +3 -1
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_storage.py +3 -1
- holmes/plugins/toolsets/azure_sql/tools/get_active_alerts.py +3 -1
- holmes/plugins/toolsets/azure_sql/tools/get_slow_queries.py +3 -1
- holmes/plugins/toolsets/azure_sql/tools/get_top_cpu_queries.py +3 -1
- holmes/plugins/toolsets/azure_sql/tools/get_top_data_io_queries.py +3 -1
- holmes/plugins/toolsets/azure_sql/tools/get_top_log_io_queries.py +3 -1
- holmes/plugins/toolsets/bash/bash_toolset.py +31 -20
- holmes/plugins/toolsets/confluence.yaml +1 -1
- holmes/plugins/toolsets/coralogix/api.py +3 -1
- holmes/plugins/toolsets/coralogix/toolset_coralogix_logs.py +4 -4
- holmes/plugins/toolsets/coralogix/utils.py +41 -14
- holmes/plugins/toolsets/datadog/datadog_api.py +45 -2
- holmes/plugins/toolsets/datadog/datadog_general_instructions.jinja2 +208 -0
- holmes/plugins/toolsets/datadog/datadog_logs_instructions.jinja2 +43 -0
- holmes/plugins/toolsets/datadog/datadog_metrics_instructions.jinja2 +12 -9
- holmes/plugins/toolsets/datadog/toolset_datadog_general.py +722 -0
- holmes/plugins/toolsets/datadog/toolset_datadog_logs.py +17 -6
- holmes/plugins/toolsets/datadog/toolset_datadog_metrics.py +15 -7
- holmes/plugins/toolsets/datadog/toolset_datadog_rds.py +6 -2
- holmes/plugins/toolsets/datadog/toolset_datadog_traces.py +9 -3
- holmes/plugins/toolsets/docker.yaml +1 -1
- holmes/plugins/toolsets/git.py +15 -5
- holmes/plugins/toolsets/grafana/toolset_grafana.py +25 -4
- holmes/plugins/toolsets/grafana/toolset_grafana_loki.py +4 -4
- holmes/plugins/toolsets/grafana/toolset_grafana_tempo.jinja2 +5 -3
- holmes/plugins/toolsets/grafana/toolset_grafana_tempo.py +299 -32
- holmes/plugins/toolsets/helm.yaml +1 -1
- holmes/plugins/toolsets/internet/internet.py +4 -2
- holmes/plugins/toolsets/internet/notion.py +4 -2
- holmes/plugins/toolsets/investigator/core_investigation.py +5 -17
- holmes/plugins/toolsets/investigator/investigator_instructions.jinja2 +1 -5
- holmes/plugins/toolsets/kafka.py +19 -7
- holmes/plugins/toolsets/kubernetes.yaml +5 -5
- holmes/plugins/toolsets/kubernetes_logs.py +4 -4
- holmes/plugins/toolsets/kubernetes_logs.yaml +1 -1
- holmes/plugins/toolsets/logging_utils/logging_api.py +15 -2
- holmes/plugins/toolsets/mcp/toolset_mcp.py +3 -1
- holmes/plugins/toolsets/newrelic.py +8 -4
- holmes/plugins/toolsets/opensearch/opensearch.py +13 -5
- holmes/plugins/toolsets/opensearch/opensearch_logs.py +4 -4
- holmes/plugins/toolsets/opensearch/opensearch_traces.py +9 -6
- holmes/plugins/toolsets/prometheus/prometheus.py +193 -82
- holmes/plugins/toolsets/rabbitmq/toolset_rabbitmq.py +7 -3
- holmes/plugins/toolsets/robusta/robusta.py +10 -4
- holmes/plugins/toolsets/runbook/runbook_fetcher.py +4 -2
- holmes/plugins/toolsets/servicenow/servicenow.py +9 -3
- holmes/plugins/toolsets/slab.yaml +1 -1
- {holmesgpt-0.13.1.dist-info → holmesgpt-0.13.3.dist-info}/METADATA +3 -2
- {holmesgpt-0.13.1.dist-info → holmesgpt-0.13.3.dist-info}/RECORD +75 -72
- holmes/core/todo_manager.py +0 -88
- {holmesgpt-0.13.1.dist-info → holmesgpt-0.13.3.dist-info}/LICENSE.txt +0 -0
- {holmesgpt-0.13.1.dist-info → holmesgpt-0.13.3.dist-info}/WHEEL +0 -0
- {holmesgpt-0.13.1.dist-info → holmesgpt-0.13.3.dist-info}/entry_points.txt +0 -0
|
@@ -1,17 +1,17 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import logging
|
|
3
|
-
import boto3
|
|
4
3
|
import os
|
|
5
4
|
import re
|
|
6
5
|
import time
|
|
6
|
+
import dateutil.parser
|
|
7
7
|
from typing import Any, Dict, List, Optional, Tuple, Type, Union
|
|
8
8
|
from urllib.parse import urljoin
|
|
9
9
|
|
|
10
10
|
import requests # type: ignore
|
|
11
11
|
from pydantic import BaseModel, field_validator, Field, model_validator
|
|
12
12
|
from requests import RequestException
|
|
13
|
-
from
|
|
14
|
-
|
|
13
|
+
from prometrix.connect.aws_connect import AWSPrometheusConnect
|
|
14
|
+
from prometrix.models.prometheus_config import PrometheusConfig as BasePrometheusConfig
|
|
15
15
|
from holmes.core.tools import (
|
|
16
16
|
CallablePrerequisite,
|
|
17
17
|
StructuredToolResult,
|
|
@@ -30,10 +30,10 @@ from holmes.plugins.toolsets.utils import (
|
|
|
30
30
|
toolset_name_for_one_liner,
|
|
31
31
|
)
|
|
32
32
|
from holmes.utils.cache import TTLCache
|
|
33
|
-
from holmes.common.env_vars import IS_OPENSHIFT
|
|
33
|
+
from holmes.common.env_vars import IS_OPENSHIFT, MAX_GRAPH_POINTS
|
|
34
34
|
from holmes.common.openshift import load_openshift_token
|
|
35
35
|
from holmes.plugins.toolsets.logging_utils.logging_api import (
|
|
36
|
-
|
|
36
|
+
DEFAULT_GRAPH_TIME_SPAN_SECONDS,
|
|
37
37
|
)
|
|
38
38
|
from holmes.utils.keygen_utils import generate_random_key
|
|
39
39
|
|
|
@@ -82,9 +82,6 @@ class PrometheusConfig(BaseModel):
|
|
|
82
82
|
def is_amp(self) -> bool:
|
|
83
83
|
return False
|
|
84
84
|
|
|
85
|
-
def get_auth(self) -> Any:
|
|
86
|
-
return None
|
|
87
|
-
|
|
88
85
|
|
|
89
86
|
class AMPConfig(PrometheusConfig):
|
|
90
87
|
aws_access_key: Optional[str] = None
|
|
@@ -93,54 +90,97 @@ class AMPConfig(PrometheusConfig):
|
|
|
93
90
|
aws_service_name: str = "aps"
|
|
94
91
|
healthcheck: str = "api/v1/query?query=up"
|
|
95
92
|
prometheus_ssl_enabled: bool = False
|
|
93
|
+
assume_role_arn: Optional[str] = None
|
|
94
|
+
|
|
95
|
+
# Refresh the AWS client (and its STS creds) every N seconds (default: 15 minutes)
|
|
96
|
+
refresh_interval_seconds: int = 900
|
|
97
|
+
|
|
98
|
+
_aws_client: Optional[AWSPrometheusConnect] = None
|
|
99
|
+
_aws_client_created_at: float = 0.0
|
|
96
100
|
|
|
97
101
|
def is_amp(self) -> bool:
|
|
98
102
|
return True
|
|
99
103
|
|
|
100
|
-
def
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
if irsa_auth:
|
|
130
|
-
return irsa_auth
|
|
131
|
-
static_auth = self._build_static_aws_auth()
|
|
132
|
-
if static_auth:
|
|
133
|
-
return static_auth
|
|
134
|
-
raise RuntimeError(
|
|
135
|
-
"No AWS credentials available. Tried IRSA and static keys. "
|
|
136
|
-
"Ensure IRSA is configured on the service account or provide aws_access_key/aws_secret_access_key."
|
|
137
|
-
)
|
|
104
|
+
def _should_refresh_client(self) -> bool:
|
|
105
|
+
if not self._aws_client:
|
|
106
|
+
return True
|
|
107
|
+
return (
|
|
108
|
+
time.time() - self._aws_client_created_at
|
|
109
|
+
) >= self.refresh_interval_seconds
|
|
110
|
+
|
|
111
|
+
def get_aws_client(self) -> Optional[AWSPrometheusConnect]:
|
|
112
|
+
if not self._aws_client or self._should_refresh_client():
|
|
113
|
+
try:
|
|
114
|
+
base_config = BasePrometheusConfig(
|
|
115
|
+
url=self.prometheus_url,
|
|
116
|
+
disable_ssl=not self.prometheus_ssl_enabled,
|
|
117
|
+
additional_labels=self.additional_labels,
|
|
118
|
+
)
|
|
119
|
+
self._aws_client = AWSPrometheusConnect(
|
|
120
|
+
access_key=self.aws_access_key,
|
|
121
|
+
secret_key=self.aws_secret_access_key,
|
|
122
|
+
token=None,
|
|
123
|
+
region=self.aws_region,
|
|
124
|
+
service_name=self.aws_service_name,
|
|
125
|
+
assume_role_arn=self.assume_role_arn,
|
|
126
|
+
config=base_config,
|
|
127
|
+
)
|
|
128
|
+
self._aws_client_created_at = time.time()
|
|
129
|
+
except Exception:
|
|
130
|
+
logging.exception("Failed to create/refresh AWS client")
|
|
131
|
+
return self._aws_client
|
|
132
|
+
return self._aws_client
|
|
138
133
|
|
|
139
134
|
|
|
140
135
|
class BasePrometheusTool(Tool):
|
|
141
136
|
toolset: "PrometheusToolset"
|
|
142
137
|
|
|
143
138
|
|
|
139
|
+
def do_request(
|
|
140
|
+
config, # PrometheusConfig | AMPConfig
|
|
141
|
+
url: str,
|
|
142
|
+
params: Optional[Dict] = None,
|
|
143
|
+
data: Optional[Dict] = None,
|
|
144
|
+
timeout: int = 60,
|
|
145
|
+
verify: Optional[bool] = None,
|
|
146
|
+
headers: Optional[Dict] = None,
|
|
147
|
+
method: str = "GET",
|
|
148
|
+
) -> requests.Response:
|
|
149
|
+
"""
|
|
150
|
+
Route a request through either:
|
|
151
|
+
- AWSPrometheusConnect (SigV4) when config is AMPConfig
|
|
152
|
+
- plain requests otherwise
|
|
153
|
+
|
|
154
|
+
method defaults to GET so callers can omit it for reads.
|
|
155
|
+
"""
|
|
156
|
+
if verify is None:
|
|
157
|
+
verify = config.prometheus_ssl_enabled
|
|
158
|
+
if headers is None:
|
|
159
|
+
headers = config.headers or {}
|
|
160
|
+
|
|
161
|
+
if isinstance(config, AMPConfig):
|
|
162
|
+
client = config.get_aws_client() # cached AWSPrometheusConnect
|
|
163
|
+
return client.signed_request( # type: ignore
|
|
164
|
+
method=method,
|
|
165
|
+
url=url,
|
|
166
|
+
data=data,
|
|
167
|
+
params=params,
|
|
168
|
+
verify=verify,
|
|
169
|
+
headers=headers,
|
|
170
|
+
)
|
|
171
|
+
|
|
172
|
+
# Non-AMP: plain HTTP
|
|
173
|
+
return requests.request(
|
|
174
|
+
method=method,
|
|
175
|
+
url=url,
|
|
176
|
+
headers=headers,
|
|
177
|
+
params=params,
|
|
178
|
+
data=data,
|
|
179
|
+
timeout=timeout,
|
|
180
|
+
verify=verify,
|
|
181
|
+
)
|
|
182
|
+
|
|
183
|
+
|
|
144
184
|
def filter_metrics_by_type(metrics: Dict, expected_type: str):
|
|
145
185
|
return {
|
|
146
186
|
metric_name: metric_data
|
|
@@ -165,14 +205,18 @@ METRICS_SUFFIXES_TO_STRIP = ["_bucket", "_count", "_sum"]
|
|
|
165
205
|
def fetch_metadata(
|
|
166
206
|
prometheus_url: str,
|
|
167
207
|
headers: Optional[Dict],
|
|
168
|
-
|
|
208
|
+
config,
|
|
169
209
|
verify_ssl: bool = True,
|
|
170
210
|
) -> Dict:
|
|
171
211
|
metadata_url = urljoin(prometheus_url, "api/v1/metadata")
|
|
172
|
-
metadata_response =
|
|
173
|
-
|
|
212
|
+
metadata_response = do_request(
|
|
213
|
+
config=config,
|
|
214
|
+
url=metadata_url,
|
|
215
|
+
headers=headers,
|
|
216
|
+
timeout=60,
|
|
217
|
+
verify=verify_ssl,
|
|
218
|
+
method="GET",
|
|
174
219
|
)
|
|
175
|
-
|
|
176
220
|
metadata_response.raise_for_status()
|
|
177
221
|
|
|
178
222
|
metadata = metadata_response.json()["data"]
|
|
@@ -195,14 +239,20 @@ def fetch_metadata_with_series_api(
|
|
|
195
239
|
prometheus_url: str,
|
|
196
240
|
metric_name: str,
|
|
197
241
|
headers: Dict,
|
|
198
|
-
|
|
242
|
+
config,
|
|
199
243
|
verify_ssl: bool = True,
|
|
200
244
|
) -> Dict:
|
|
201
245
|
url = urljoin(prometheus_url, "api/v1/series")
|
|
202
246
|
params: Dict = {"match[]": f'{{__name__=~".*{metric_name}.*"}}', "limit": "10000"}
|
|
203
247
|
|
|
204
|
-
response =
|
|
205
|
-
|
|
248
|
+
response = do_request(
|
|
249
|
+
config=config,
|
|
250
|
+
url=url,
|
|
251
|
+
headers=headers,
|
|
252
|
+
params=params,
|
|
253
|
+
timeout=60,
|
|
254
|
+
verify=verify_ssl,
|
|
255
|
+
method="GET",
|
|
206
256
|
)
|
|
207
257
|
response.raise_for_status()
|
|
208
258
|
metrics = response.json()["data"]
|
|
@@ -231,6 +281,42 @@ def result_has_data(result: Dict) -> bool:
|
|
|
231
281
|
return False
|
|
232
282
|
|
|
233
283
|
|
|
284
|
+
def adjust_step_for_max_points(
|
|
285
|
+
start_timestamp: str,
|
|
286
|
+
end_timestamp: str,
|
|
287
|
+
step: float,
|
|
288
|
+
) -> float:
|
|
289
|
+
"""
|
|
290
|
+
Adjusts the step parameter to ensure the number of data points doesn't exceed max_points.
|
|
291
|
+
Max points is controlled by the PROMETHEUS_MAX_GRAPH_POINTS environment variable (default: 300).
|
|
292
|
+
|
|
293
|
+
Args:
|
|
294
|
+
start_timestamp: RFC3339 formatted start time
|
|
295
|
+
end_timestamp: RFC3339 formatted end time
|
|
296
|
+
step: The requested step duration in seconds
|
|
297
|
+
|
|
298
|
+
Returns:
|
|
299
|
+
Adjusted step value in seconds that ensures points <= max_points
|
|
300
|
+
"""
|
|
301
|
+
|
|
302
|
+
start_dt = dateutil.parser.parse(start_timestamp)
|
|
303
|
+
end_dt = dateutil.parser.parse(end_timestamp)
|
|
304
|
+
|
|
305
|
+
time_range_seconds = (end_dt - start_dt).total_seconds()
|
|
306
|
+
|
|
307
|
+
current_points = time_range_seconds / step
|
|
308
|
+
|
|
309
|
+
# If current points exceed max, adjust the step
|
|
310
|
+
if current_points > MAX_GRAPH_POINTS:
|
|
311
|
+
adjusted_step = time_range_seconds / MAX_GRAPH_POINTS
|
|
312
|
+
logging.info(
|
|
313
|
+
f"Adjusting step from {step}s to {adjusted_step}s to limit points from {current_points:.0f} to {MAX_GRAPH_POINTS}"
|
|
314
|
+
)
|
|
315
|
+
return adjusted_step
|
|
316
|
+
|
|
317
|
+
return step
|
|
318
|
+
|
|
319
|
+
|
|
234
320
|
def add_prometheus_auth(prometheus_auth_header: Optional[str]) -> Dict[str, Any]:
|
|
235
321
|
results = {}
|
|
236
322
|
if prometheus_auth_header:
|
|
@@ -244,7 +330,7 @@ def fetch_metrics_labels_with_series_api(
|
|
|
244
330
|
cache: Optional[TTLCache],
|
|
245
331
|
metrics_labels_time_window_hrs: Union[int, None],
|
|
246
332
|
metric_name: str,
|
|
247
|
-
|
|
333
|
+
config=None,
|
|
248
334
|
verify_ssl: bool = True,
|
|
249
335
|
) -> dict:
|
|
250
336
|
"""This is a slow query. Takes 5+ seconds to run"""
|
|
@@ -261,13 +347,14 @@ def fetch_metrics_labels_with_series_api(
|
|
|
261
347
|
params["end"] = int(time.time())
|
|
262
348
|
params["start"] = params["end"] - (metrics_labels_time_window_hrs * 60 * 60)
|
|
263
349
|
|
|
264
|
-
series_response =
|
|
350
|
+
series_response = do_request(
|
|
351
|
+
config=config,
|
|
265
352
|
url=series_url,
|
|
266
353
|
headers=headers,
|
|
267
354
|
params=params,
|
|
268
|
-
auth=auth,
|
|
269
355
|
timeout=60,
|
|
270
356
|
verify=verify_ssl,
|
|
357
|
+
method="GET",
|
|
271
358
|
)
|
|
272
359
|
series_response.raise_for_status()
|
|
273
360
|
series = series_response.json()["data"]
|
|
@@ -293,7 +380,7 @@ def fetch_metrics_labels_with_labels_api(
|
|
|
293
380
|
metrics_labels_time_window_hrs: Union[int, None],
|
|
294
381
|
metric_names: List[str],
|
|
295
382
|
headers: Dict,
|
|
296
|
-
|
|
383
|
+
config=None,
|
|
297
384
|
verify_ssl: bool = True,
|
|
298
385
|
) -> dict:
|
|
299
386
|
metrics_labels = {}
|
|
@@ -313,13 +400,14 @@ def fetch_metrics_labels_with_labels_api(
|
|
|
313
400
|
params["end"] = int(time.time())
|
|
314
401
|
params["start"] = params["end"] - (metrics_labels_time_window_hrs * 60 * 60)
|
|
315
402
|
|
|
316
|
-
response =
|
|
403
|
+
response = do_request(
|
|
404
|
+
config=config,
|
|
317
405
|
url=url,
|
|
318
406
|
headers=headers,
|
|
319
407
|
params=params,
|
|
320
|
-
auth=auth,
|
|
321
408
|
timeout=60,
|
|
322
409
|
verify=verify_ssl,
|
|
410
|
+
method="GET",
|
|
323
411
|
)
|
|
324
412
|
response.raise_for_status()
|
|
325
413
|
labels = response.json()["data"]
|
|
@@ -340,7 +428,7 @@ def fetch_metrics(
|
|
|
340
428
|
should_fetch_labels_with_labels_api: bool,
|
|
341
429
|
should_fetch_metadata_with_series_api: bool,
|
|
342
430
|
headers: Dict,
|
|
343
|
-
|
|
431
|
+
config=None,
|
|
344
432
|
verify_ssl: bool = True,
|
|
345
433
|
) -> dict:
|
|
346
434
|
metrics = None
|
|
@@ -350,7 +438,7 @@ def fetch_metrics(
|
|
|
350
438
|
prometheus_url=prometheus_url,
|
|
351
439
|
metric_name=metric_name,
|
|
352
440
|
headers=headers,
|
|
353
|
-
|
|
441
|
+
config=config,
|
|
354
442
|
verify_ssl=verify_ssl,
|
|
355
443
|
)
|
|
356
444
|
should_fetch_labels = False # series API returns the labels
|
|
@@ -358,7 +446,7 @@ def fetch_metrics(
|
|
|
358
446
|
metrics = fetch_metadata(
|
|
359
447
|
prometheus_url=prometheus_url,
|
|
360
448
|
headers=headers,
|
|
361
|
-
|
|
449
|
+
config=config,
|
|
362
450
|
verify_ssl=verify_ssl,
|
|
363
451
|
)
|
|
364
452
|
metrics = filter_metrics_by_name(metrics, metric_name)
|
|
@@ -372,7 +460,7 @@ def fetch_metrics(
|
|
|
372
460
|
metrics_labels_time_window_hrs=metrics_labels_time_window_hrs,
|
|
373
461
|
metric_names=list(metrics.keys()),
|
|
374
462
|
headers=headers,
|
|
375
|
-
|
|
463
|
+
config=config,
|
|
376
464
|
verify_ssl=verify_ssl,
|
|
377
465
|
)
|
|
378
466
|
else:
|
|
@@ -382,7 +470,7 @@ def fetch_metrics(
|
|
|
382
470
|
metrics_labels_time_window_hrs=metrics_labels_time_window_hrs,
|
|
383
471
|
metric_name=metric_name,
|
|
384
472
|
headers=headers,
|
|
385
|
-
|
|
473
|
+
config=config,
|
|
386
474
|
verify_ssl=verify_ssl,
|
|
387
475
|
)
|
|
388
476
|
|
|
@@ -403,7 +491,9 @@ class ListPrometheusRules(BasePrometheusTool):
|
|
|
403
491
|
)
|
|
404
492
|
self._cache = None
|
|
405
493
|
|
|
406
|
-
def _invoke(
|
|
494
|
+
def _invoke(
|
|
495
|
+
self, params: dict, user_approved: bool = False
|
|
496
|
+
) -> StructuredToolResult:
|
|
407
497
|
if not self.toolset.config or not self.toolset.config.prometheus_url:
|
|
408
498
|
return StructuredToolResult(
|
|
409
499
|
status=ToolResultStatus.ERROR,
|
|
@@ -434,13 +524,14 @@ class ListPrometheusRules(BasePrometheusTool):
|
|
|
434
524
|
|
|
435
525
|
rules_url = urljoin(prometheus_url, "api/v1/rules")
|
|
436
526
|
|
|
437
|
-
rules_response =
|
|
527
|
+
rules_response = do_request(
|
|
528
|
+
config=self.toolset.config,
|
|
438
529
|
url=rules_url,
|
|
439
530
|
params=params,
|
|
440
|
-
auth=self.toolset.config.get_auth(),
|
|
441
531
|
timeout=180,
|
|
442
532
|
verify=self.toolset.config.prometheus_ssl_enabled,
|
|
443
533
|
headers=self.toolset.config.headers,
|
|
534
|
+
method="GET",
|
|
444
535
|
)
|
|
445
536
|
rules_response.raise_for_status()
|
|
446
537
|
data = rules_response.json()["data"]
|
|
@@ -499,7 +590,9 @@ class ListAvailableMetrics(BasePrometheusTool):
|
|
|
499
590
|
)
|
|
500
591
|
self._cache = None
|
|
501
592
|
|
|
502
|
-
def _invoke(
|
|
593
|
+
def _invoke(
|
|
594
|
+
self, params: dict, user_approved: bool = False
|
|
595
|
+
) -> StructuredToolResult:
|
|
503
596
|
if not self.toolset.config or not self.toolset.config.prometheus_url:
|
|
504
597
|
return StructuredToolResult(
|
|
505
598
|
status=ToolResultStatus.ERROR,
|
|
@@ -532,12 +625,13 @@ class ListAvailableMetrics(BasePrometheusTool):
|
|
|
532
625
|
should_fetch_labels_with_labels_api=self.toolset.config.fetch_labels_with_labels_api,
|
|
533
626
|
should_fetch_metadata_with_series_api=self.toolset.config.fetch_metadata_with_series_api,
|
|
534
627
|
headers=self.toolset.config.headers,
|
|
535
|
-
|
|
628
|
+
config=self.toolset.config,
|
|
536
629
|
verify_ssl=self.toolset.config.prometheus_ssl_enabled,
|
|
537
630
|
)
|
|
538
631
|
|
|
539
|
-
|
|
540
|
-
|
|
632
|
+
type_filter = params.get("type_filter")
|
|
633
|
+
if type_filter:
|
|
634
|
+
metrics = filter_metrics_by_type(metrics, type_filter)
|
|
541
635
|
|
|
542
636
|
output = ["Metric | Description | Type | Labels"]
|
|
543
637
|
output.append("-" * 100)
|
|
@@ -604,7 +698,9 @@ class ExecuteInstantQuery(BasePrometheusTool):
|
|
|
604
698
|
toolset=toolset,
|
|
605
699
|
)
|
|
606
700
|
|
|
607
|
-
def _invoke(
|
|
701
|
+
def _invoke(
|
|
702
|
+
self, params: dict, user_approved: bool = False
|
|
703
|
+
) -> StructuredToolResult:
|
|
608
704
|
if not self.toolset.config or not self.toolset.config.prometheus_url:
|
|
609
705
|
return StructuredToolResult(
|
|
610
706
|
status=ToolResultStatus.ERROR,
|
|
@@ -619,12 +715,14 @@ class ExecuteInstantQuery(BasePrometheusTool):
|
|
|
619
715
|
|
|
620
716
|
payload = {"query": query}
|
|
621
717
|
|
|
622
|
-
response =
|
|
718
|
+
response = do_request(
|
|
719
|
+
config=self.toolset.config,
|
|
623
720
|
url=url,
|
|
624
721
|
headers=self.toolset.config.headers,
|
|
625
|
-
auth=self.toolset.config.get_auth(),
|
|
626
722
|
data=payload,
|
|
627
723
|
timeout=60,
|
|
724
|
+
verify=self.toolset.config.prometheus_ssl_enabled,
|
|
725
|
+
method="POST",
|
|
628
726
|
)
|
|
629
727
|
|
|
630
728
|
if response.status_code == 200:
|
|
@@ -716,7 +814,7 @@ class ExecuteRangeQuery(BasePrometheusTool):
|
|
|
716
814
|
),
|
|
717
815
|
"start": ToolParameter(
|
|
718
816
|
description=standard_start_datetime_tool_param_description(
|
|
719
|
-
|
|
817
|
+
DEFAULT_GRAPH_TIME_SPAN_SECONDS
|
|
720
818
|
),
|
|
721
819
|
type="string",
|
|
722
820
|
required=False,
|
|
@@ -740,7 +838,9 @@ class ExecuteRangeQuery(BasePrometheusTool):
|
|
|
740
838
|
toolset=toolset,
|
|
741
839
|
)
|
|
742
840
|
|
|
743
|
-
def _invoke(
|
|
841
|
+
def _invoke(
|
|
842
|
+
self, params: dict, user_approved: bool = False
|
|
843
|
+
) -> StructuredToolResult:
|
|
744
844
|
if not self.toolset.config or not self.toolset.config.prometheus_url:
|
|
745
845
|
return StructuredToolResult(
|
|
746
846
|
status=ToolResultStatus.ERROR,
|
|
@@ -755,9 +855,16 @@ class ExecuteRangeQuery(BasePrometheusTool):
|
|
|
755
855
|
(start, end) = process_timestamps_to_rfc3339(
|
|
756
856
|
start_timestamp=params.get("start"),
|
|
757
857
|
end_timestamp=params.get("end"),
|
|
758
|
-
default_time_span_seconds=
|
|
858
|
+
default_time_span_seconds=DEFAULT_GRAPH_TIME_SPAN_SECONDS,
|
|
759
859
|
)
|
|
760
860
|
step = params.get("step", "")
|
|
861
|
+
|
|
862
|
+
step = adjust_step_for_max_points(
|
|
863
|
+
start_timestamp=start,
|
|
864
|
+
end_timestamp=end,
|
|
865
|
+
step=float(step) if step else MAX_GRAPH_POINTS,
|
|
866
|
+
)
|
|
867
|
+
|
|
761
868
|
description = params.get("description", "")
|
|
762
869
|
output_type = params.get("output_type", "Plain")
|
|
763
870
|
payload = {
|
|
@@ -767,12 +874,14 @@ class ExecuteRangeQuery(BasePrometheusTool):
|
|
|
767
874
|
"step": step,
|
|
768
875
|
}
|
|
769
876
|
|
|
770
|
-
response =
|
|
877
|
+
response = do_request(
|
|
878
|
+
config=self.toolset.config,
|
|
771
879
|
url=url,
|
|
772
880
|
headers=self.toolset.config.headers,
|
|
773
|
-
auth=self.toolset.config.get_auth(),
|
|
774
881
|
data=payload,
|
|
775
882
|
timeout=120,
|
|
883
|
+
verify=self.toolset.config.prometheus_ssl_enabled,
|
|
884
|
+
method="POST",
|
|
776
885
|
)
|
|
777
886
|
|
|
778
887
|
if response.status_code == 200:
|
|
@@ -855,7 +964,7 @@ class PrometheusToolset(Toolset):
|
|
|
855
964
|
super().__init__(
|
|
856
965
|
name="prometheus/metrics",
|
|
857
966
|
description="Prometheus integration to fetch metadata and execute PromQL queries",
|
|
858
|
-
docs_url="https://
|
|
967
|
+
docs_url="https://holmesgpt.dev/data-sources/builtin-toolsets/prometheus/",
|
|
859
968
|
icon_url="https://upload.wikimedia.org/wikipedia/commons/3/38/Prometheus_software_logo.svg",
|
|
860
969
|
prerequisites=[CallablePrerequisite(callable=self.prerequisites_callable)],
|
|
861
970
|
tools=[
|
|
@@ -934,12 +1043,13 @@ class PrometheusToolset(Toolset):
|
|
|
934
1043
|
|
|
935
1044
|
url = urljoin(self.config.prometheus_url, self.config.healthcheck)
|
|
936
1045
|
try:
|
|
937
|
-
response =
|
|
1046
|
+
response = do_request(
|
|
1047
|
+
config=self.config,
|
|
938
1048
|
url=url,
|
|
939
1049
|
headers=self.config.headers,
|
|
940
|
-
auth=self.config.get_auth(),
|
|
941
1050
|
timeout=10,
|
|
942
1051
|
verify=self.config.prometheus_ssl_enabled,
|
|
1052
|
+
method="GET",
|
|
943
1053
|
)
|
|
944
1054
|
|
|
945
1055
|
if response.status_code == 200:
|
|
@@ -956,6 +1066,7 @@ class PrometheusToolset(Toolset):
|
|
|
956
1066
|
f"Failed to initialize using url={url}",
|
|
957
1067
|
)
|
|
958
1068
|
except Exception as e:
|
|
1069
|
+
logging.exception("Failed to initialize Prometheus")
|
|
959
1070
|
return (
|
|
960
1071
|
False,
|
|
961
1072
|
f"Failed to initialize using url={url}. Unexpected error: {str(e)}",
|
|
@@ -63,7 +63,9 @@ class ListConfiguredClusters(BaseRabbitMQTool):
|
|
|
63
63
|
toolset=toolset,
|
|
64
64
|
)
|
|
65
65
|
|
|
66
|
-
def _invoke(
|
|
66
|
+
def _invoke(
|
|
67
|
+
self, params: dict, user_approved: bool = False
|
|
68
|
+
) -> StructuredToolResult:
|
|
67
69
|
if not self.toolset.config:
|
|
68
70
|
raise ValueError("RabbitMQ is not configured.")
|
|
69
71
|
|
|
@@ -101,7 +103,9 @@ class GetRabbitMQClusterStatus(BaseRabbitMQTool):
|
|
|
101
103
|
toolset=toolset,
|
|
102
104
|
)
|
|
103
105
|
|
|
104
|
-
def _invoke(
|
|
106
|
+
def _invoke(
|
|
107
|
+
self, params: dict, user_approved: bool = False
|
|
108
|
+
) -> StructuredToolResult:
|
|
105
109
|
try:
|
|
106
110
|
# Fetch node details which include partition info
|
|
107
111
|
cluster_config = self._get_cluster_config(
|
|
@@ -130,7 +134,7 @@ class RabbitMQToolset(Toolset):
|
|
|
130
134
|
super().__init__(
|
|
131
135
|
name="rabbitmq/core",
|
|
132
136
|
description="Provides tools to interact with RabbitMQ to diagnose cluster health, node status, and specifically network partitions (split-brain).",
|
|
133
|
-
docs_url="https://
|
|
137
|
+
docs_url="https://holmesgpt.dev/data-sources/builtin-toolsets/rabbitmq/",
|
|
134
138
|
icon_url="https://cdn.worldvectorlogo.com/logos/rabbitmq.svg",
|
|
135
139
|
prerequisites=[CallablePrerequisite(callable=self.prerequisites_callable)],
|
|
136
140
|
tools=[
|
|
@@ -45,7 +45,9 @@ class FetchRobustaFinding(Tool):
|
|
|
45
45
|
logging.error(error)
|
|
46
46
|
return {"error": error}
|
|
47
47
|
|
|
48
|
-
def _invoke(
|
|
48
|
+
def _invoke(
|
|
49
|
+
self, params: dict, user_approved: bool = False
|
|
50
|
+
) -> StructuredToolResult:
|
|
49
51
|
finding_id = params[PARAM_FINDING_ID]
|
|
50
52
|
try:
|
|
51
53
|
finding = self._fetch_finding(finding_id)
|
|
@@ -113,7 +115,9 @@ class FetchResourceRecommendation(Tool):
|
|
|
113
115
|
)
|
|
114
116
|
return None
|
|
115
117
|
|
|
116
|
-
def _invoke(
|
|
118
|
+
def _invoke(
|
|
119
|
+
self, params: dict, user_approved: bool = False
|
|
120
|
+
) -> StructuredToolResult:
|
|
117
121
|
try:
|
|
118
122
|
recommendations = self._resource_recommendation(params)
|
|
119
123
|
if recommendations:
|
|
@@ -171,7 +175,9 @@ class FetchConfigurationChanges(Tool):
|
|
|
171
175
|
)
|
|
172
176
|
return None
|
|
173
177
|
|
|
174
|
-
def _invoke(
|
|
178
|
+
def _invoke(
|
|
179
|
+
self, params: dict, user_approved: bool = False
|
|
180
|
+
) -> StructuredToolResult:
|
|
175
181
|
try:
|
|
176
182
|
changes = self._fetch_change_history(params)
|
|
177
183
|
if changes:
|
|
@@ -213,7 +219,7 @@ class RobustaToolset(Toolset):
|
|
|
213
219
|
super().__init__(
|
|
214
220
|
icon_url="https://cdn.prod.website-files.com/633e9bac8f71dfb7a8e4c9a6/646be7710db810b14133bdb5_logo.svg",
|
|
215
221
|
description="Fetches alerts metadata and change history",
|
|
216
|
-
docs_url="https://
|
|
222
|
+
docs_url="https://holmesgpt.dev/data-sources/builtin-toolsets/robusta/",
|
|
217
223
|
name="robusta",
|
|
218
224
|
prerequisites=[dal_prereq],
|
|
219
225
|
tools=[
|
|
@@ -35,7 +35,9 @@ class RunbookFetcher(Tool):
|
|
|
35
35
|
toolset=toolset, # type: ignore
|
|
36
36
|
)
|
|
37
37
|
|
|
38
|
-
def _invoke(
|
|
38
|
+
def _invoke(
|
|
39
|
+
self, params: dict, user_approved: bool = False
|
|
40
|
+
) -> StructuredToolResult:
|
|
39
41
|
link: str = params["link"]
|
|
40
42
|
|
|
41
43
|
search_paths = [DEFAULT_RUNBOOK_SEARCH_PATH]
|
|
@@ -126,7 +128,7 @@ class RunbookToolset(Toolset):
|
|
|
126
128
|
tools=[
|
|
127
129
|
RunbookFetcher(self),
|
|
128
130
|
],
|
|
129
|
-
docs_url="https://
|
|
131
|
+
docs_url="https://holmesgpt.dev/data-sources/",
|
|
130
132
|
tags=[
|
|
131
133
|
ToolsetTag.CORE,
|
|
132
134
|
],
|
|
@@ -115,7 +115,9 @@ class ReturnChangesInTimerange(ServiceNowBaseTool):
|
|
|
115
115
|
start = params.get("start", "last hour")
|
|
116
116
|
return f"{toolset_name_for_one_liner(self.toolset.name)}: Get Change Requests ({start})"
|
|
117
117
|
|
|
118
|
-
def _invoke(
|
|
118
|
+
def _invoke(
|
|
119
|
+
self, params: dict, user_approved: bool = False
|
|
120
|
+
) -> StructuredToolResult:
|
|
119
121
|
parsed_params = {}
|
|
120
122
|
try:
|
|
121
123
|
(start, _) = process_timestamps_to_rfc3339(
|
|
@@ -158,7 +160,9 @@ class ReturnChange(ServiceNowBaseTool):
|
|
|
158
160
|
sys_id = params.get("sys_id", "")
|
|
159
161
|
return f"{toolset_name_for_one_liner(self.toolset.name)}: Get Change Details ({sys_id})"
|
|
160
162
|
|
|
161
|
-
def _invoke(
|
|
163
|
+
def _invoke(
|
|
164
|
+
self, params: dict, user_approved: bool = False
|
|
165
|
+
) -> StructuredToolResult:
|
|
162
166
|
try:
|
|
163
167
|
url = "https://{instance}.service-now.com/api/now/v2/table/change_request/{sys_id}".format(
|
|
164
168
|
instance=self.toolset.config.get("instance"),
|
|
@@ -190,7 +194,9 @@ class ReturnChangesWithKeyword(ServiceNowBaseTool):
|
|
|
190
194
|
keyword = params.get("keyword", "")
|
|
191
195
|
return f"{toolset_name_for_one_liner(self.toolset.name)}: Search Changes ({keyword})"
|
|
192
196
|
|
|
193
|
-
def _invoke(
|
|
197
|
+
def _invoke(
|
|
198
|
+
self, params: dict, user_approved: bool = False
|
|
199
|
+
) -> StructuredToolResult:
|
|
194
200
|
parsed_params = {}
|
|
195
201
|
try:
|
|
196
202
|
url = f"https://{self.toolset.config.get('instance')}.service-now.com/api/now/v2/table/change_request"
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
toolsets:
|
|
2
2
|
slab:
|
|
3
3
|
description: "Fetches slab pages"
|
|
4
|
-
docs_url: "https://
|
|
4
|
+
docs_url: "https://holmesgpt.dev/data-sources/builtin-toolsets/slab/"
|
|
5
5
|
icon_url: "https://platform.robusta.dev/demos/slab-mark.svg"
|
|
6
6
|
tags:
|
|
7
7
|
- core
|