holmesgpt 0.13.0__py3-none-any.whl → 0.13.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- holmes/__init__.py +1 -1
- holmes/common/env_vars.py +11 -0
- holmes/config.py +3 -1
- holmes/core/conversations.py +0 -11
- holmes/core/investigation.py +0 -6
- holmes/core/llm.py +63 -2
- holmes/core/prompt.py +0 -2
- holmes/core/supabase_dal.py +2 -2
- holmes/core/todo_tasks_formatter.py +51 -0
- holmes/core/tool_calling_llm.py +277 -101
- holmes/core/tools.py +20 -4
- holmes/core/toolset_manager.py +1 -5
- holmes/core/tracing.py +1 -1
- holmes/interactive.py +63 -2
- holmes/main.py +7 -2
- holmes/plugins/prompts/_fetch_logs.jinja2 +4 -0
- holmes/plugins/prompts/_general_instructions.jinja2 +3 -1
- holmes/plugins/prompts/investigation_procedure.jinja2 +3 -13
- holmes/plugins/runbooks/CLAUDE.md +85 -0
- holmes/plugins/runbooks/README.md +24 -0
- holmes/plugins/toolsets/__init__.py +5 -1
- holmes/plugins/toolsets/argocd.yaml +1 -1
- holmes/plugins/toolsets/atlas_mongodb/mongodb_atlas.py +18 -6
- holmes/plugins/toolsets/aws.yaml +9 -5
- holmes/plugins/toolsets/azure_sql/tools/analyze_connection_failures.py +3 -1
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_connections.py +3 -1
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_health_status.py +3 -1
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_performance.py +3 -1
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_storage.py +3 -1
- holmes/plugins/toolsets/azure_sql/tools/get_active_alerts.py +3 -1
- holmes/plugins/toolsets/azure_sql/tools/get_slow_queries.py +3 -1
- holmes/plugins/toolsets/azure_sql/tools/get_top_cpu_queries.py +3 -1
- holmes/plugins/toolsets/azure_sql/tools/get_top_data_io_queries.py +3 -1
- holmes/plugins/toolsets/azure_sql/tools/get_top_log_io_queries.py +3 -1
- holmes/plugins/toolsets/bash/argocd/__init__.py +65 -0
- holmes/plugins/toolsets/bash/argocd/constants.py +120 -0
- holmes/plugins/toolsets/bash/aws/__init__.py +66 -0
- holmes/plugins/toolsets/bash/aws/constants.py +529 -0
- holmes/plugins/toolsets/bash/azure/__init__.py +56 -0
- holmes/plugins/toolsets/bash/azure/constants.py +339 -0
- holmes/plugins/toolsets/bash/bash_instructions.jinja2 +6 -7
- holmes/plugins/toolsets/bash/bash_toolset.py +62 -17
- holmes/plugins/toolsets/bash/common/bash_command.py +131 -0
- holmes/plugins/toolsets/bash/common/stringify.py +14 -1
- holmes/plugins/toolsets/bash/common/validators.py +91 -0
- holmes/plugins/toolsets/bash/docker/__init__.py +59 -0
- holmes/plugins/toolsets/bash/docker/constants.py +255 -0
- holmes/plugins/toolsets/bash/helm/__init__.py +61 -0
- holmes/plugins/toolsets/bash/helm/constants.py +92 -0
- holmes/plugins/toolsets/bash/kubectl/__init__.py +80 -79
- holmes/plugins/toolsets/bash/kubectl/constants.py +0 -14
- holmes/plugins/toolsets/bash/kubectl/kubectl_describe.py +38 -56
- holmes/plugins/toolsets/bash/kubectl/kubectl_events.py +28 -76
- holmes/plugins/toolsets/bash/kubectl/kubectl_get.py +39 -99
- holmes/plugins/toolsets/bash/kubectl/kubectl_logs.py +34 -15
- holmes/plugins/toolsets/bash/kubectl/kubectl_run.py +1 -1
- holmes/plugins/toolsets/bash/kubectl/kubectl_top.py +38 -77
- holmes/plugins/toolsets/bash/parse_command.py +106 -32
- holmes/plugins/toolsets/bash/utilities/__init__.py +0 -0
- holmes/plugins/toolsets/bash/utilities/base64_util.py +12 -0
- holmes/plugins/toolsets/bash/utilities/cut.py +12 -0
- holmes/plugins/toolsets/bash/utilities/grep/__init__.py +10 -0
- holmes/plugins/toolsets/bash/utilities/head.py +12 -0
- holmes/plugins/toolsets/bash/utilities/jq.py +79 -0
- holmes/plugins/toolsets/bash/utilities/sed.py +164 -0
- holmes/plugins/toolsets/bash/utilities/sort.py +15 -0
- holmes/plugins/toolsets/bash/utilities/tail.py +12 -0
- holmes/plugins/toolsets/bash/utilities/tr.py +57 -0
- holmes/plugins/toolsets/bash/utilities/uniq.py +12 -0
- holmes/plugins/toolsets/bash/utilities/wc.py +12 -0
- holmes/plugins/toolsets/confluence.yaml +1 -1
- holmes/plugins/toolsets/coralogix/api.py +3 -1
- holmes/plugins/toolsets/coralogix/toolset_coralogix_logs.py +4 -4
- holmes/plugins/toolsets/coralogix/utils.py +41 -14
- holmes/plugins/toolsets/datadog/datadog_api.py +45 -2
- holmes/plugins/toolsets/datadog/datadog_general_instructions.jinja2 +208 -0
- holmes/plugins/toolsets/datadog/datadog_logs_instructions.jinja2 +43 -0
- holmes/plugins/toolsets/datadog/datadog_metrics_instructions.jinja2 +12 -9
- holmes/plugins/toolsets/datadog/toolset_datadog_general.py +722 -0
- holmes/plugins/toolsets/datadog/toolset_datadog_logs.py +17 -6
- holmes/plugins/toolsets/datadog/toolset_datadog_metrics.py +15 -7
- holmes/plugins/toolsets/datadog/toolset_datadog_rds.py +6 -2
- holmes/plugins/toolsets/datadog/toolset_datadog_traces.py +9 -3
- holmes/plugins/toolsets/docker.yaml +1 -1
- holmes/plugins/toolsets/git.py +15 -5
- holmes/plugins/toolsets/grafana/toolset_grafana.py +25 -4
- holmes/plugins/toolsets/grafana/toolset_grafana_loki.py +4 -4
- holmes/plugins/toolsets/grafana/toolset_grafana_tempo.jinja2 +5 -3
- holmes/plugins/toolsets/grafana/toolset_grafana_tempo.py +299 -32
- holmes/plugins/toolsets/helm.yaml +1 -1
- holmes/plugins/toolsets/internet/internet.py +4 -2
- holmes/plugins/toolsets/internet/notion.py +4 -2
- holmes/plugins/toolsets/investigator/core_investigation.py +5 -17
- holmes/plugins/toolsets/investigator/investigator_instructions.jinja2 +1 -5
- holmes/plugins/toolsets/kafka.py +19 -7
- holmes/plugins/toolsets/kubernetes.yaml +5 -5
- holmes/plugins/toolsets/kubernetes_logs.py +4 -4
- holmes/plugins/toolsets/kubernetes_logs.yaml +1 -1
- holmes/plugins/toolsets/logging_utils/logging_api.py +15 -2
- holmes/plugins/toolsets/mcp/toolset_mcp.py +3 -1
- holmes/plugins/toolsets/newrelic.py +8 -4
- holmes/plugins/toolsets/opensearch/opensearch.py +13 -5
- holmes/plugins/toolsets/opensearch/opensearch_logs.py +4 -4
- holmes/plugins/toolsets/opensearch/opensearch_traces.py +9 -6
- holmes/plugins/toolsets/prometheus/prometheus.py +198 -57
- holmes/plugins/toolsets/rabbitmq/toolset_rabbitmq.py +7 -3
- holmes/plugins/toolsets/robusta/robusta.py +10 -4
- holmes/plugins/toolsets/runbook/runbook_fetcher.py +4 -2
- holmes/plugins/toolsets/servicenow/servicenow.py +9 -3
- holmes/plugins/toolsets/slab.yaml +1 -1
- holmes/utils/console/logging.py +6 -1
- {holmesgpt-0.13.0.dist-info → holmesgpt-0.13.2.dist-info}/METADATA +3 -2
- {holmesgpt-0.13.0.dist-info → holmesgpt-0.13.2.dist-info}/RECORD +116 -90
- holmes/core/todo_manager.py +0 -88
- holmes/plugins/toolsets/bash/grep/__init__.py +0 -52
- {holmesgpt-0.13.0.dist-info → holmesgpt-0.13.2.dist-info}/LICENSE.txt +0 -0
- {holmesgpt-0.13.0.dist-info → holmesgpt-0.13.2.dist-info}/WHEEL +0 -0
- {holmesgpt-0.13.0.dist-info → holmesgpt-0.13.2.dist-info}/entry_points.txt +0 -0
|
@@ -3,14 +3,15 @@ import logging
|
|
|
3
3
|
import os
|
|
4
4
|
import re
|
|
5
5
|
import time
|
|
6
|
+
import dateutil.parser
|
|
6
7
|
from typing import Any, Dict, List, Optional, Tuple, Type, Union
|
|
7
8
|
from urllib.parse import urljoin
|
|
8
9
|
|
|
9
10
|
import requests # type: ignore
|
|
10
11
|
from pydantic import BaseModel, field_validator, Field, model_validator
|
|
11
12
|
from requests import RequestException
|
|
12
|
-
from
|
|
13
|
-
|
|
13
|
+
from prometrix.connect.aws_connect import AWSPrometheusConnect
|
|
14
|
+
from prometrix.models.prometheus_config import PrometheusConfig as BasePrometheusConfig
|
|
14
15
|
from holmes.core.tools import (
|
|
15
16
|
CallablePrerequisite,
|
|
16
17
|
StructuredToolResult,
|
|
@@ -29,10 +30,10 @@ from holmes.plugins.toolsets.utils import (
|
|
|
29
30
|
toolset_name_for_one_liner,
|
|
30
31
|
)
|
|
31
32
|
from holmes.utils.cache import TTLCache
|
|
32
|
-
from holmes.common.env_vars import IS_OPENSHIFT
|
|
33
|
+
from holmes.common.env_vars import IS_OPENSHIFT, MAX_GRAPH_POINTS
|
|
33
34
|
from holmes.common.openshift import load_openshift_token
|
|
34
35
|
from holmes.plugins.toolsets.logging_utils.logging_api import (
|
|
35
|
-
|
|
36
|
+
DEFAULT_GRAPH_TIME_SPAN_SECONDS,
|
|
36
37
|
)
|
|
37
38
|
from holmes.utils.keygen_utils import generate_random_key
|
|
38
39
|
|
|
@@ -81,34 +82,105 @@ class PrometheusConfig(BaseModel):
|
|
|
81
82
|
def is_amp(self) -> bool:
|
|
82
83
|
return False
|
|
83
84
|
|
|
84
|
-
def get_auth(self) -> Any:
|
|
85
|
-
return None
|
|
86
|
-
|
|
87
85
|
|
|
88
86
|
class AMPConfig(PrometheusConfig):
|
|
89
|
-
aws_access_key: str
|
|
90
|
-
aws_secret_access_key: str
|
|
87
|
+
aws_access_key: Optional[str] = None
|
|
88
|
+
aws_secret_access_key: Optional[str] = None
|
|
91
89
|
aws_region: str
|
|
92
90
|
aws_service_name: str = "aps"
|
|
93
|
-
healthcheck: str = "api/v1/query?query=up"
|
|
91
|
+
healthcheck: str = "api/v1/query?query=up"
|
|
94
92
|
prometheus_ssl_enabled: bool = False
|
|
93
|
+
assume_role_arn: Optional[str] = None
|
|
94
|
+
|
|
95
|
+
# Refresh the AWS client (and its STS creds) every N seconds (default: 15 minutes)
|
|
96
|
+
refresh_interval_seconds: int = 900
|
|
97
|
+
|
|
98
|
+
_aws_client: Optional[AWSPrometheusConnect] = None
|
|
99
|
+
_aws_client_created_at: float = 0.0
|
|
95
100
|
|
|
96
101
|
def is_amp(self) -> bool:
|
|
97
102
|
return True
|
|
98
103
|
|
|
99
|
-
def
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
104
|
+
def _should_refresh_client(self) -> bool:
|
|
105
|
+
if not self._aws_client:
|
|
106
|
+
return True
|
|
107
|
+
return (
|
|
108
|
+
time.time() - self._aws_client_created_at
|
|
109
|
+
) >= self.refresh_interval_seconds
|
|
110
|
+
|
|
111
|
+
def get_aws_client(self) -> Optional[AWSPrometheusConnect]:
|
|
112
|
+
if not self._aws_client or self._should_refresh_client():
|
|
113
|
+
try:
|
|
114
|
+
base_config = BasePrometheusConfig(
|
|
115
|
+
url=self.prometheus_url,
|
|
116
|
+
disable_ssl=not self.prometheus_ssl_enabled,
|
|
117
|
+
additional_labels=self.additional_labels,
|
|
118
|
+
)
|
|
119
|
+
self._aws_client = AWSPrometheusConnect(
|
|
120
|
+
access_key=self.aws_access_key,
|
|
121
|
+
secret_key=self.aws_secret_access_key,
|
|
122
|
+
token=None,
|
|
123
|
+
region=self.aws_region,
|
|
124
|
+
service_name=self.aws_service_name,
|
|
125
|
+
assume_role_arn=self.assume_role_arn,
|
|
126
|
+
config=base_config,
|
|
127
|
+
)
|
|
128
|
+
self._aws_client_created_at = time.time()
|
|
129
|
+
except Exception:
|
|
130
|
+
logging.exception("Failed to create/refresh AWS client")
|
|
131
|
+
return self._aws_client
|
|
132
|
+
return self._aws_client
|
|
106
133
|
|
|
107
134
|
|
|
108
135
|
class BasePrometheusTool(Tool):
|
|
109
136
|
toolset: "PrometheusToolset"
|
|
110
137
|
|
|
111
138
|
|
|
139
|
+
def do_request(
|
|
140
|
+
config, # PrometheusConfig | AMPConfig
|
|
141
|
+
url: str,
|
|
142
|
+
params: Optional[Dict] = None,
|
|
143
|
+
data: Optional[Dict] = None,
|
|
144
|
+
timeout: int = 60,
|
|
145
|
+
verify: Optional[bool] = None,
|
|
146
|
+
headers: Optional[Dict] = None,
|
|
147
|
+
method: str = "GET",
|
|
148
|
+
) -> requests.Response:
|
|
149
|
+
"""
|
|
150
|
+
Route a request through either:
|
|
151
|
+
- AWSPrometheusConnect (SigV4) when config is AMPConfig
|
|
152
|
+
- plain requests otherwise
|
|
153
|
+
|
|
154
|
+
method defaults to GET so callers can omit it for reads.
|
|
155
|
+
"""
|
|
156
|
+
if verify is None:
|
|
157
|
+
verify = config.prometheus_ssl_enabled
|
|
158
|
+
if headers is None:
|
|
159
|
+
headers = config.headers or {}
|
|
160
|
+
|
|
161
|
+
if isinstance(config, AMPConfig):
|
|
162
|
+
client = config.get_aws_client() # cached AWSPrometheusConnect
|
|
163
|
+
return client.signed_request( # type: ignore
|
|
164
|
+
method=method,
|
|
165
|
+
url=url,
|
|
166
|
+
data=data,
|
|
167
|
+
params=params,
|
|
168
|
+
verify=verify,
|
|
169
|
+
headers=headers,
|
|
170
|
+
)
|
|
171
|
+
|
|
172
|
+
# Non-AMP: plain HTTP
|
|
173
|
+
return requests.request(
|
|
174
|
+
method=method,
|
|
175
|
+
url=url,
|
|
176
|
+
headers=headers,
|
|
177
|
+
params=params,
|
|
178
|
+
data=data,
|
|
179
|
+
timeout=timeout,
|
|
180
|
+
verify=verify,
|
|
181
|
+
)
|
|
182
|
+
|
|
183
|
+
|
|
112
184
|
def filter_metrics_by_type(metrics: Dict, expected_type: str):
|
|
113
185
|
return {
|
|
114
186
|
metric_name: metric_data
|
|
@@ -133,14 +205,18 @@ METRICS_SUFFIXES_TO_STRIP = ["_bucket", "_count", "_sum"]
|
|
|
133
205
|
def fetch_metadata(
|
|
134
206
|
prometheus_url: str,
|
|
135
207
|
headers: Optional[Dict],
|
|
136
|
-
|
|
208
|
+
config,
|
|
137
209
|
verify_ssl: bool = True,
|
|
138
210
|
) -> Dict:
|
|
139
211
|
metadata_url = urljoin(prometheus_url, "api/v1/metadata")
|
|
140
|
-
metadata_response =
|
|
141
|
-
|
|
212
|
+
metadata_response = do_request(
|
|
213
|
+
config=config,
|
|
214
|
+
url=metadata_url,
|
|
215
|
+
headers=headers,
|
|
216
|
+
timeout=60,
|
|
217
|
+
verify=verify_ssl,
|
|
218
|
+
method="GET",
|
|
142
219
|
)
|
|
143
|
-
|
|
144
220
|
metadata_response.raise_for_status()
|
|
145
221
|
|
|
146
222
|
metadata = metadata_response.json()["data"]
|
|
@@ -163,14 +239,20 @@ def fetch_metadata_with_series_api(
|
|
|
163
239
|
prometheus_url: str,
|
|
164
240
|
metric_name: str,
|
|
165
241
|
headers: Dict,
|
|
166
|
-
|
|
242
|
+
config,
|
|
167
243
|
verify_ssl: bool = True,
|
|
168
244
|
) -> Dict:
|
|
169
245
|
url = urljoin(prometheus_url, "api/v1/series")
|
|
170
246
|
params: Dict = {"match[]": f'{{__name__=~".*{metric_name}.*"}}', "limit": "10000"}
|
|
171
247
|
|
|
172
|
-
response =
|
|
173
|
-
|
|
248
|
+
response = do_request(
|
|
249
|
+
config=config,
|
|
250
|
+
url=url,
|
|
251
|
+
headers=headers,
|
|
252
|
+
params=params,
|
|
253
|
+
timeout=60,
|
|
254
|
+
verify=verify_ssl,
|
|
255
|
+
method="GET",
|
|
174
256
|
)
|
|
175
257
|
response.raise_for_status()
|
|
176
258
|
metrics = response.json()["data"]
|
|
@@ -199,6 +281,42 @@ def result_has_data(result: Dict) -> bool:
|
|
|
199
281
|
return False
|
|
200
282
|
|
|
201
283
|
|
|
284
|
+
def adjust_step_for_max_points(
|
|
285
|
+
start_timestamp: str,
|
|
286
|
+
end_timestamp: str,
|
|
287
|
+
step: float,
|
|
288
|
+
) -> float:
|
|
289
|
+
"""
|
|
290
|
+
Adjusts the step parameter to ensure the number of data points doesn't exceed max_points.
|
|
291
|
+
Max points is controlled by the PROMETHEUS_MAX_GRAPH_POINTS environment variable (default: 300).
|
|
292
|
+
|
|
293
|
+
Args:
|
|
294
|
+
start_timestamp: RFC3339 formatted start time
|
|
295
|
+
end_timestamp: RFC3339 formatted end time
|
|
296
|
+
step: The requested step duration in seconds
|
|
297
|
+
|
|
298
|
+
Returns:
|
|
299
|
+
Adjusted step value in seconds that ensures points <= max_points
|
|
300
|
+
"""
|
|
301
|
+
|
|
302
|
+
start_dt = dateutil.parser.parse(start_timestamp)
|
|
303
|
+
end_dt = dateutil.parser.parse(end_timestamp)
|
|
304
|
+
|
|
305
|
+
time_range_seconds = (end_dt - start_dt).total_seconds()
|
|
306
|
+
|
|
307
|
+
current_points = time_range_seconds / step
|
|
308
|
+
|
|
309
|
+
# If current points exceed max, adjust the step
|
|
310
|
+
if current_points > MAX_GRAPH_POINTS:
|
|
311
|
+
adjusted_step = time_range_seconds / MAX_GRAPH_POINTS
|
|
312
|
+
logging.info(
|
|
313
|
+
f"Adjusting step from {step}s to {adjusted_step}s to limit points from {current_points:.0f} to {MAX_GRAPH_POINTS}"
|
|
314
|
+
)
|
|
315
|
+
return adjusted_step
|
|
316
|
+
|
|
317
|
+
return step
|
|
318
|
+
|
|
319
|
+
|
|
202
320
|
def add_prometheus_auth(prometheus_auth_header: Optional[str]) -> Dict[str, Any]:
|
|
203
321
|
results = {}
|
|
204
322
|
if prometheus_auth_header:
|
|
@@ -212,7 +330,7 @@ def fetch_metrics_labels_with_series_api(
|
|
|
212
330
|
cache: Optional[TTLCache],
|
|
213
331
|
metrics_labels_time_window_hrs: Union[int, None],
|
|
214
332
|
metric_name: str,
|
|
215
|
-
|
|
333
|
+
config=None,
|
|
216
334
|
verify_ssl: bool = True,
|
|
217
335
|
) -> dict:
|
|
218
336
|
"""This is a slow query. Takes 5+ seconds to run"""
|
|
@@ -229,13 +347,14 @@ def fetch_metrics_labels_with_series_api(
|
|
|
229
347
|
params["end"] = int(time.time())
|
|
230
348
|
params["start"] = params["end"] - (metrics_labels_time_window_hrs * 60 * 60)
|
|
231
349
|
|
|
232
|
-
series_response =
|
|
350
|
+
series_response = do_request(
|
|
351
|
+
config=config,
|
|
233
352
|
url=series_url,
|
|
234
353
|
headers=headers,
|
|
235
354
|
params=params,
|
|
236
|
-
auth=auth,
|
|
237
355
|
timeout=60,
|
|
238
356
|
verify=verify_ssl,
|
|
357
|
+
method="GET",
|
|
239
358
|
)
|
|
240
359
|
series_response.raise_for_status()
|
|
241
360
|
series = series_response.json()["data"]
|
|
@@ -261,7 +380,7 @@ def fetch_metrics_labels_with_labels_api(
|
|
|
261
380
|
metrics_labels_time_window_hrs: Union[int, None],
|
|
262
381
|
metric_names: List[str],
|
|
263
382
|
headers: Dict,
|
|
264
|
-
|
|
383
|
+
config=None,
|
|
265
384
|
verify_ssl: bool = True,
|
|
266
385
|
) -> dict:
|
|
267
386
|
metrics_labels = {}
|
|
@@ -281,13 +400,14 @@ def fetch_metrics_labels_with_labels_api(
|
|
|
281
400
|
params["end"] = int(time.time())
|
|
282
401
|
params["start"] = params["end"] - (metrics_labels_time_window_hrs * 60 * 60)
|
|
283
402
|
|
|
284
|
-
response =
|
|
403
|
+
response = do_request(
|
|
404
|
+
config=config,
|
|
285
405
|
url=url,
|
|
286
406
|
headers=headers,
|
|
287
407
|
params=params,
|
|
288
|
-
auth=auth,
|
|
289
408
|
timeout=60,
|
|
290
409
|
verify=verify_ssl,
|
|
410
|
+
method="GET",
|
|
291
411
|
)
|
|
292
412
|
response.raise_for_status()
|
|
293
413
|
labels = response.json()["data"]
|
|
@@ -308,7 +428,7 @@ def fetch_metrics(
|
|
|
308
428
|
should_fetch_labels_with_labels_api: bool,
|
|
309
429
|
should_fetch_metadata_with_series_api: bool,
|
|
310
430
|
headers: Dict,
|
|
311
|
-
|
|
431
|
+
config=None,
|
|
312
432
|
verify_ssl: bool = True,
|
|
313
433
|
) -> dict:
|
|
314
434
|
metrics = None
|
|
@@ -318,7 +438,7 @@ def fetch_metrics(
|
|
|
318
438
|
prometheus_url=prometheus_url,
|
|
319
439
|
metric_name=metric_name,
|
|
320
440
|
headers=headers,
|
|
321
|
-
|
|
441
|
+
config=config,
|
|
322
442
|
verify_ssl=verify_ssl,
|
|
323
443
|
)
|
|
324
444
|
should_fetch_labels = False # series API returns the labels
|
|
@@ -326,7 +446,7 @@ def fetch_metrics(
|
|
|
326
446
|
metrics = fetch_metadata(
|
|
327
447
|
prometheus_url=prometheus_url,
|
|
328
448
|
headers=headers,
|
|
329
|
-
|
|
449
|
+
config=config,
|
|
330
450
|
verify_ssl=verify_ssl,
|
|
331
451
|
)
|
|
332
452
|
metrics = filter_metrics_by_name(metrics, metric_name)
|
|
@@ -340,7 +460,7 @@ def fetch_metrics(
|
|
|
340
460
|
metrics_labels_time_window_hrs=metrics_labels_time_window_hrs,
|
|
341
461
|
metric_names=list(metrics.keys()),
|
|
342
462
|
headers=headers,
|
|
343
|
-
|
|
463
|
+
config=config,
|
|
344
464
|
verify_ssl=verify_ssl,
|
|
345
465
|
)
|
|
346
466
|
else:
|
|
@@ -350,7 +470,7 @@ def fetch_metrics(
|
|
|
350
470
|
metrics_labels_time_window_hrs=metrics_labels_time_window_hrs,
|
|
351
471
|
metric_name=metric_name,
|
|
352
472
|
headers=headers,
|
|
353
|
-
|
|
473
|
+
config=config,
|
|
354
474
|
verify_ssl=verify_ssl,
|
|
355
475
|
)
|
|
356
476
|
|
|
@@ -371,7 +491,9 @@ class ListPrometheusRules(BasePrometheusTool):
|
|
|
371
491
|
)
|
|
372
492
|
self._cache = None
|
|
373
493
|
|
|
374
|
-
def _invoke(
|
|
494
|
+
def _invoke(
|
|
495
|
+
self, params: dict, user_approved: bool = False
|
|
496
|
+
) -> StructuredToolResult:
|
|
375
497
|
if not self.toolset.config or not self.toolset.config.prometheus_url:
|
|
376
498
|
return StructuredToolResult(
|
|
377
499
|
status=ToolResultStatus.ERROR,
|
|
@@ -402,13 +524,14 @@ class ListPrometheusRules(BasePrometheusTool):
|
|
|
402
524
|
|
|
403
525
|
rules_url = urljoin(prometheus_url, "api/v1/rules")
|
|
404
526
|
|
|
405
|
-
rules_response =
|
|
527
|
+
rules_response = do_request(
|
|
528
|
+
config=self.toolset.config,
|
|
406
529
|
url=rules_url,
|
|
407
530
|
params=params,
|
|
408
|
-
auth=self.toolset.config.get_auth(),
|
|
409
531
|
timeout=180,
|
|
410
532
|
verify=self.toolset.config.prometheus_ssl_enabled,
|
|
411
533
|
headers=self.toolset.config.headers,
|
|
534
|
+
method="GET",
|
|
412
535
|
)
|
|
413
536
|
rules_response.raise_for_status()
|
|
414
537
|
data = rules_response.json()["data"]
|
|
@@ -467,7 +590,9 @@ class ListAvailableMetrics(BasePrometheusTool):
|
|
|
467
590
|
)
|
|
468
591
|
self._cache = None
|
|
469
592
|
|
|
470
|
-
def _invoke(
|
|
593
|
+
def _invoke(
|
|
594
|
+
self, params: dict, user_approved: bool = False
|
|
595
|
+
) -> StructuredToolResult:
|
|
471
596
|
if not self.toolset.config or not self.toolset.config.prometheus_url:
|
|
472
597
|
return StructuredToolResult(
|
|
473
598
|
status=ToolResultStatus.ERROR,
|
|
@@ -500,12 +625,13 @@ class ListAvailableMetrics(BasePrometheusTool):
|
|
|
500
625
|
should_fetch_labels_with_labels_api=self.toolset.config.fetch_labels_with_labels_api,
|
|
501
626
|
should_fetch_metadata_with_series_api=self.toolset.config.fetch_metadata_with_series_api,
|
|
502
627
|
headers=self.toolset.config.headers,
|
|
503
|
-
|
|
628
|
+
config=self.toolset.config,
|
|
504
629
|
verify_ssl=self.toolset.config.prometheus_ssl_enabled,
|
|
505
630
|
)
|
|
506
631
|
|
|
507
|
-
|
|
508
|
-
|
|
632
|
+
type_filter = params.get("type_filter")
|
|
633
|
+
if type_filter:
|
|
634
|
+
metrics = filter_metrics_by_type(metrics, type_filter)
|
|
509
635
|
|
|
510
636
|
output = ["Metric | Description | Type | Labels"]
|
|
511
637
|
output.append("-" * 100)
|
|
@@ -572,7 +698,9 @@ class ExecuteInstantQuery(BasePrometheusTool):
|
|
|
572
698
|
toolset=toolset,
|
|
573
699
|
)
|
|
574
700
|
|
|
575
|
-
def _invoke(
|
|
701
|
+
def _invoke(
|
|
702
|
+
self, params: dict, user_approved: bool = False
|
|
703
|
+
) -> StructuredToolResult:
|
|
576
704
|
if not self.toolset.config or not self.toolset.config.prometheus_url:
|
|
577
705
|
return StructuredToolResult(
|
|
578
706
|
status=ToolResultStatus.ERROR,
|
|
@@ -587,12 +715,14 @@ class ExecuteInstantQuery(BasePrometheusTool):
|
|
|
587
715
|
|
|
588
716
|
payload = {"query": query}
|
|
589
717
|
|
|
590
|
-
response =
|
|
718
|
+
response = do_request(
|
|
719
|
+
config=self.toolset.config,
|
|
591
720
|
url=url,
|
|
592
721
|
headers=self.toolset.config.headers,
|
|
593
|
-
auth=self.toolset.config.get_auth(),
|
|
594
722
|
data=payload,
|
|
595
723
|
timeout=60,
|
|
724
|
+
verify=self.toolset.config.prometheus_ssl_enabled,
|
|
725
|
+
method="POST",
|
|
596
726
|
)
|
|
597
727
|
|
|
598
728
|
if response.status_code == 200:
|
|
@@ -684,7 +814,7 @@ class ExecuteRangeQuery(BasePrometheusTool):
|
|
|
684
814
|
),
|
|
685
815
|
"start": ToolParameter(
|
|
686
816
|
description=standard_start_datetime_tool_param_description(
|
|
687
|
-
|
|
817
|
+
DEFAULT_GRAPH_TIME_SPAN_SECONDS
|
|
688
818
|
),
|
|
689
819
|
type="string",
|
|
690
820
|
required=False,
|
|
@@ -708,7 +838,9 @@ class ExecuteRangeQuery(BasePrometheusTool):
|
|
|
708
838
|
toolset=toolset,
|
|
709
839
|
)
|
|
710
840
|
|
|
711
|
-
def _invoke(
|
|
841
|
+
def _invoke(
|
|
842
|
+
self, params: dict, user_approved: bool = False
|
|
843
|
+
) -> StructuredToolResult:
|
|
712
844
|
if not self.toolset.config or not self.toolset.config.prometheus_url:
|
|
713
845
|
return StructuredToolResult(
|
|
714
846
|
status=ToolResultStatus.ERROR,
|
|
@@ -723,9 +855,16 @@ class ExecuteRangeQuery(BasePrometheusTool):
|
|
|
723
855
|
(start, end) = process_timestamps_to_rfc3339(
|
|
724
856
|
start_timestamp=params.get("start"),
|
|
725
857
|
end_timestamp=params.get("end"),
|
|
726
|
-
default_time_span_seconds=
|
|
858
|
+
default_time_span_seconds=DEFAULT_GRAPH_TIME_SPAN_SECONDS,
|
|
727
859
|
)
|
|
728
860
|
step = params.get("step", "")
|
|
861
|
+
|
|
862
|
+
step = adjust_step_for_max_points(
|
|
863
|
+
start_timestamp=start,
|
|
864
|
+
end_timestamp=end,
|
|
865
|
+
step=float(step) if step else MAX_GRAPH_POINTS,
|
|
866
|
+
)
|
|
867
|
+
|
|
729
868
|
description = params.get("description", "")
|
|
730
869
|
output_type = params.get("output_type", "Plain")
|
|
731
870
|
payload = {
|
|
@@ -735,12 +874,14 @@ class ExecuteRangeQuery(BasePrometheusTool):
|
|
|
735
874
|
"step": step,
|
|
736
875
|
}
|
|
737
876
|
|
|
738
|
-
response =
|
|
877
|
+
response = do_request(
|
|
878
|
+
config=self.toolset.config,
|
|
739
879
|
url=url,
|
|
740
880
|
headers=self.toolset.config.headers,
|
|
741
|
-
auth=self.toolset.config.get_auth(),
|
|
742
881
|
data=payload,
|
|
743
882
|
timeout=120,
|
|
883
|
+
verify=self.toolset.config.prometheus_ssl_enabled,
|
|
884
|
+
method="POST",
|
|
744
885
|
)
|
|
745
886
|
|
|
746
887
|
if response.status_code == 200:
|
|
@@ -823,7 +964,7 @@ class PrometheusToolset(Toolset):
|
|
|
823
964
|
super().__init__(
|
|
824
965
|
name="prometheus/metrics",
|
|
825
966
|
description="Prometheus integration to fetch metadata and execute PromQL queries",
|
|
826
|
-
docs_url="https://
|
|
967
|
+
docs_url="https://holmesgpt.dev/data-sources/builtin-toolsets/prometheus/",
|
|
827
968
|
icon_url="https://upload.wikimedia.org/wikipedia/commons/3/38/Prometheus_software_logo.svg",
|
|
828
969
|
prerequisites=[CallablePrerequisite(callable=self.prerequisites_callable)],
|
|
829
970
|
tools=[
|
|
@@ -847,10 +988,8 @@ class PrometheusToolset(Toolset):
|
|
|
847
988
|
def determine_prometheus_class(
|
|
848
989
|
self, config: dict[str, Any]
|
|
849
990
|
) -> Type[Union[PrometheusConfig, AMPConfig]]:
|
|
850
|
-
|
|
851
|
-
|
|
852
|
-
)
|
|
853
|
-
return AMPConfig if has_aws_credentials else PrometheusConfig
|
|
991
|
+
has_aws_fields = "aws_region" in config
|
|
992
|
+
return AMPConfig if has_aws_fields else PrometheusConfig
|
|
854
993
|
|
|
855
994
|
def prerequisites_callable(self, config: dict[str, Any]) -> Tuple[bool, str]:
|
|
856
995
|
try:
|
|
@@ -904,12 +1043,13 @@ class PrometheusToolset(Toolset):
|
|
|
904
1043
|
|
|
905
1044
|
url = urljoin(self.config.prometheus_url, self.config.healthcheck)
|
|
906
1045
|
try:
|
|
907
|
-
response =
|
|
1046
|
+
response = do_request(
|
|
1047
|
+
config=self.config,
|
|
908
1048
|
url=url,
|
|
909
1049
|
headers=self.config.headers,
|
|
910
|
-
auth=self.config.get_auth(),
|
|
911
1050
|
timeout=10,
|
|
912
1051
|
verify=self.config.prometheus_ssl_enabled,
|
|
1052
|
+
method="GET",
|
|
913
1053
|
)
|
|
914
1054
|
|
|
915
1055
|
if response.status_code == 200:
|
|
@@ -926,6 +1066,7 @@ class PrometheusToolset(Toolset):
|
|
|
926
1066
|
f"Failed to initialize using url={url}",
|
|
927
1067
|
)
|
|
928
1068
|
except Exception as e:
|
|
1069
|
+
logging.exception("Failed to initialize Prometheus")
|
|
929
1070
|
return (
|
|
930
1071
|
False,
|
|
931
1072
|
f"Failed to initialize using url={url}. Unexpected error: {str(e)}",
|
|
@@ -63,7 +63,9 @@ class ListConfiguredClusters(BaseRabbitMQTool):
|
|
|
63
63
|
toolset=toolset,
|
|
64
64
|
)
|
|
65
65
|
|
|
66
|
-
def _invoke(
|
|
66
|
+
def _invoke(
|
|
67
|
+
self, params: dict, user_approved: bool = False
|
|
68
|
+
) -> StructuredToolResult:
|
|
67
69
|
if not self.toolset.config:
|
|
68
70
|
raise ValueError("RabbitMQ is not configured.")
|
|
69
71
|
|
|
@@ -101,7 +103,9 @@ class GetRabbitMQClusterStatus(BaseRabbitMQTool):
|
|
|
101
103
|
toolset=toolset,
|
|
102
104
|
)
|
|
103
105
|
|
|
104
|
-
def _invoke(
|
|
106
|
+
def _invoke(
|
|
107
|
+
self, params: dict, user_approved: bool = False
|
|
108
|
+
) -> StructuredToolResult:
|
|
105
109
|
try:
|
|
106
110
|
# Fetch node details which include partition info
|
|
107
111
|
cluster_config = self._get_cluster_config(
|
|
@@ -130,7 +134,7 @@ class RabbitMQToolset(Toolset):
|
|
|
130
134
|
super().__init__(
|
|
131
135
|
name="rabbitmq/core",
|
|
132
136
|
description="Provides tools to interact with RabbitMQ to diagnose cluster health, node status, and specifically network partitions (split-brain).",
|
|
133
|
-
docs_url="https://
|
|
137
|
+
docs_url="https://holmesgpt.dev/data-sources/builtin-toolsets/rabbitmq/",
|
|
134
138
|
icon_url="https://cdn.worldvectorlogo.com/logos/rabbitmq.svg",
|
|
135
139
|
prerequisites=[CallablePrerequisite(callable=self.prerequisites_callable)],
|
|
136
140
|
tools=[
|
|
@@ -45,7 +45,9 @@ class FetchRobustaFinding(Tool):
|
|
|
45
45
|
logging.error(error)
|
|
46
46
|
return {"error": error}
|
|
47
47
|
|
|
48
|
-
def _invoke(
|
|
48
|
+
def _invoke(
|
|
49
|
+
self, params: dict, user_approved: bool = False
|
|
50
|
+
) -> StructuredToolResult:
|
|
49
51
|
finding_id = params[PARAM_FINDING_ID]
|
|
50
52
|
try:
|
|
51
53
|
finding = self._fetch_finding(finding_id)
|
|
@@ -113,7 +115,9 @@ class FetchResourceRecommendation(Tool):
|
|
|
113
115
|
)
|
|
114
116
|
return None
|
|
115
117
|
|
|
116
|
-
def _invoke(
|
|
118
|
+
def _invoke(
|
|
119
|
+
self, params: dict, user_approved: bool = False
|
|
120
|
+
) -> StructuredToolResult:
|
|
117
121
|
try:
|
|
118
122
|
recommendations = self._resource_recommendation(params)
|
|
119
123
|
if recommendations:
|
|
@@ -171,7 +175,9 @@ class FetchConfigurationChanges(Tool):
|
|
|
171
175
|
)
|
|
172
176
|
return None
|
|
173
177
|
|
|
174
|
-
def _invoke(
|
|
178
|
+
def _invoke(
|
|
179
|
+
self, params: dict, user_approved: bool = False
|
|
180
|
+
) -> StructuredToolResult:
|
|
175
181
|
try:
|
|
176
182
|
changes = self._fetch_change_history(params)
|
|
177
183
|
if changes:
|
|
@@ -213,7 +219,7 @@ class RobustaToolset(Toolset):
|
|
|
213
219
|
super().__init__(
|
|
214
220
|
icon_url="https://cdn.prod.website-files.com/633e9bac8f71dfb7a8e4c9a6/646be7710db810b14133bdb5_logo.svg",
|
|
215
221
|
description="Fetches alerts metadata and change history",
|
|
216
|
-
docs_url="https://
|
|
222
|
+
docs_url="https://holmesgpt.dev/data-sources/builtin-toolsets/robusta/",
|
|
217
223
|
name="robusta",
|
|
218
224
|
prerequisites=[dal_prereq],
|
|
219
225
|
tools=[
|
|
@@ -35,7 +35,9 @@ class RunbookFetcher(Tool):
|
|
|
35
35
|
toolset=toolset, # type: ignore
|
|
36
36
|
)
|
|
37
37
|
|
|
38
|
-
def _invoke(
|
|
38
|
+
def _invoke(
|
|
39
|
+
self, params: dict, user_approved: bool = False
|
|
40
|
+
) -> StructuredToolResult:
|
|
39
41
|
link: str = params["link"]
|
|
40
42
|
|
|
41
43
|
search_paths = [DEFAULT_RUNBOOK_SEARCH_PATH]
|
|
@@ -126,7 +128,7 @@ class RunbookToolset(Toolset):
|
|
|
126
128
|
tools=[
|
|
127
129
|
RunbookFetcher(self),
|
|
128
130
|
],
|
|
129
|
-
docs_url="https://
|
|
131
|
+
docs_url="https://holmesgpt.dev/data-sources/",
|
|
130
132
|
tags=[
|
|
131
133
|
ToolsetTag.CORE,
|
|
132
134
|
],
|
|
@@ -115,7 +115,9 @@ class ReturnChangesInTimerange(ServiceNowBaseTool):
|
|
|
115
115
|
start = params.get("start", "last hour")
|
|
116
116
|
return f"{toolset_name_for_one_liner(self.toolset.name)}: Get Change Requests ({start})"
|
|
117
117
|
|
|
118
|
-
def _invoke(
|
|
118
|
+
def _invoke(
|
|
119
|
+
self, params: dict, user_approved: bool = False
|
|
120
|
+
) -> StructuredToolResult:
|
|
119
121
|
parsed_params = {}
|
|
120
122
|
try:
|
|
121
123
|
(start, _) = process_timestamps_to_rfc3339(
|
|
@@ -158,7 +160,9 @@ class ReturnChange(ServiceNowBaseTool):
|
|
|
158
160
|
sys_id = params.get("sys_id", "")
|
|
159
161
|
return f"{toolset_name_for_one_liner(self.toolset.name)}: Get Change Details ({sys_id})"
|
|
160
162
|
|
|
161
|
-
def _invoke(
|
|
163
|
+
def _invoke(
|
|
164
|
+
self, params: dict, user_approved: bool = False
|
|
165
|
+
) -> StructuredToolResult:
|
|
162
166
|
try:
|
|
163
167
|
url = "https://{instance}.service-now.com/api/now/v2/table/change_request/{sys_id}".format(
|
|
164
168
|
instance=self.toolset.config.get("instance"),
|
|
@@ -190,7 +194,9 @@ class ReturnChangesWithKeyword(ServiceNowBaseTool):
|
|
|
190
194
|
keyword = params.get("keyword", "")
|
|
191
195
|
return f"{toolset_name_for_one_liner(self.toolset.name)}: Search Changes ({keyword})"
|
|
192
196
|
|
|
193
|
-
def _invoke(
|
|
197
|
+
def _invoke(
|
|
198
|
+
self, params: dict, user_approved: bool = False
|
|
199
|
+
) -> StructuredToolResult:
|
|
194
200
|
parsed_params = {}
|
|
195
201
|
try:
|
|
196
202
|
url = f"https://{self.toolset.config.get('instance')}.service-now.com/api/now/v2/table/change_request"
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
toolsets:
|
|
2
2
|
slab:
|
|
3
3
|
description: "Fetches slab pages"
|
|
4
|
-
docs_url: "https://
|
|
4
|
+
docs_url: "https://holmesgpt.dev/data-sources/builtin-toolsets/slab/"
|
|
5
5
|
icon_url: "https://platform.robusta.dev/demos/slab-mark.svg"
|
|
6
6
|
tags:
|
|
7
7
|
- core
|
holmes/utils/console/logging.py
CHANGED
|
@@ -41,9 +41,14 @@ def suppress_noisy_logs():
|
|
|
41
41
|
warnings.filterwarnings("ignore", category=UserWarning, module="slack_sdk.*")
|
|
42
42
|
|
|
43
43
|
|
|
44
|
-
def init_logging(verbose_flags: Optional[List[bool]] = None):
|
|
44
|
+
def init_logging(verbose_flags: Optional[List[bool]] = None, log_costs: bool = False):
|
|
45
45
|
verbosity = cli_flags_to_verbosity(verbose_flags) # type: ignore
|
|
46
46
|
|
|
47
|
+
# Setup cost logger if requested
|
|
48
|
+
if log_costs:
|
|
49
|
+
cost_logger = logging.getLogger("holmes.costs")
|
|
50
|
+
cost_logger.setLevel(logging.DEBUG)
|
|
51
|
+
|
|
47
52
|
if verbosity == Verbosity.VERY_VERBOSE:
|
|
48
53
|
logging.basicConfig(
|
|
49
54
|
force=True,
|