holmesgpt 0.11.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of holmesgpt might be problematic. Click here for more details.
- holmes/.git_archival.json +7 -0
- holmes/__init__.py +76 -0
- holmes/__init__.py.bak +76 -0
- holmes/clients/robusta_client.py +24 -0
- holmes/common/env_vars.py +47 -0
- holmes/config.py +526 -0
- holmes/core/__init__.py +0 -0
- holmes/core/conversations.py +578 -0
- holmes/core/investigation.py +152 -0
- holmes/core/investigation_structured_output.py +264 -0
- holmes/core/issue.py +54 -0
- holmes/core/llm.py +250 -0
- holmes/core/models.py +157 -0
- holmes/core/openai_formatting.py +51 -0
- holmes/core/performance_timing.py +72 -0
- holmes/core/prompt.py +42 -0
- holmes/core/resource_instruction.py +17 -0
- holmes/core/runbooks.py +26 -0
- holmes/core/safeguards.py +120 -0
- holmes/core/supabase_dal.py +540 -0
- holmes/core/tool_calling_llm.py +798 -0
- holmes/core/tools.py +566 -0
- holmes/core/tools_utils/__init__.py +0 -0
- holmes/core/tools_utils/tool_executor.py +65 -0
- holmes/core/tools_utils/toolset_utils.py +52 -0
- holmes/core/toolset_manager.py +418 -0
- holmes/interactive.py +229 -0
- holmes/main.py +1041 -0
- holmes/plugins/__init__.py +0 -0
- holmes/plugins/destinations/__init__.py +6 -0
- holmes/plugins/destinations/slack/__init__.py +2 -0
- holmes/plugins/destinations/slack/plugin.py +163 -0
- holmes/plugins/interfaces.py +32 -0
- holmes/plugins/prompts/__init__.py +48 -0
- holmes/plugins/prompts/_current_date_time.jinja2 +1 -0
- holmes/plugins/prompts/_default_log_prompt.jinja2 +11 -0
- holmes/plugins/prompts/_fetch_logs.jinja2 +36 -0
- holmes/plugins/prompts/_general_instructions.jinja2 +86 -0
- holmes/plugins/prompts/_global_instructions.jinja2 +12 -0
- holmes/plugins/prompts/_runbook_instructions.jinja2 +13 -0
- holmes/plugins/prompts/_toolsets_instructions.jinja2 +56 -0
- holmes/plugins/prompts/generic_ask.jinja2 +36 -0
- holmes/plugins/prompts/generic_ask_conversation.jinja2 +32 -0
- holmes/plugins/prompts/generic_ask_for_issue_conversation.jinja2 +50 -0
- holmes/plugins/prompts/generic_investigation.jinja2 +42 -0
- holmes/plugins/prompts/generic_post_processing.jinja2 +13 -0
- holmes/plugins/prompts/generic_ticket.jinja2 +12 -0
- holmes/plugins/prompts/investigation_output_format.jinja2 +32 -0
- holmes/plugins/prompts/kubernetes_workload_ask.jinja2 +84 -0
- holmes/plugins/prompts/kubernetes_workload_chat.jinja2 +39 -0
- holmes/plugins/runbooks/README.md +22 -0
- holmes/plugins/runbooks/__init__.py +100 -0
- holmes/plugins/runbooks/catalog.json +14 -0
- holmes/plugins/runbooks/jira.yaml +12 -0
- holmes/plugins/runbooks/kube-prometheus-stack.yaml +10 -0
- holmes/plugins/runbooks/networking/dns_troubleshooting_instructions.md +66 -0
- holmes/plugins/runbooks/upgrade/upgrade_troubleshooting_instructions.md +44 -0
- holmes/plugins/sources/github/__init__.py +77 -0
- holmes/plugins/sources/jira/__init__.py +123 -0
- holmes/plugins/sources/opsgenie/__init__.py +93 -0
- holmes/plugins/sources/pagerduty/__init__.py +147 -0
- holmes/plugins/sources/prometheus/__init__.py +0 -0
- holmes/plugins/sources/prometheus/models.py +104 -0
- holmes/plugins/sources/prometheus/plugin.py +154 -0
- holmes/plugins/toolsets/__init__.py +171 -0
- holmes/plugins/toolsets/aks-node-health.yaml +65 -0
- holmes/plugins/toolsets/aks.yaml +86 -0
- holmes/plugins/toolsets/argocd.yaml +70 -0
- holmes/plugins/toolsets/atlas_mongodb/instructions.jinja2 +8 -0
- holmes/plugins/toolsets/atlas_mongodb/mongodb_atlas.py +307 -0
- holmes/plugins/toolsets/aws.yaml +76 -0
- holmes/plugins/toolsets/azure_sql/__init__.py +0 -0
- holmes/plugins/toolsets/azure_sql/apis/alert_monitoring_api.py +600 -0
- holmes/plugins/toolsets/azure_sql/apis/azure_sql_api.py +309 -0
- holmes/plugins/toolsets/azure_sql/apis/connection_failure_api.py +445 -0
- holmes/plugins/toolsets/azure_sql/apis/connection_monitoring_api.py +251 -0
- holmes/plugins/toolsets/azure_sql/apis/storage_analysis_api.py +317 -0
- holmes/plugins/toolsets/azure_sql/azure_base_toolset.py +55 -0
- holmes/plugins/toolsets/azure_sql/azure_sql_instructions.jinja2 +137 -0
- holmes/plugins/toolsets/azure_sql/azure_sql_toolset.py +183 -0
- holmes/plugins/toolsets/azure_sql/install.md +66 -0
- holmes/plugins/toolsets/azure_sql/tools/__init__.py +1 -0
- holmes/plugins/toolsets/azure_sql/tools/analyze_connection_failures.py +324 -0
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_connections.py +243 -0
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_health_status.py +205 -0
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_performance.py +249 -0
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_storage.py +373 -0
- holmes/plugins/toolsets/azure_sql/tools/get_active_alerts.py +237 -0
- holmes/plugins/toolsets/azure_sql/tools/get_slow_queries.py +172 -0
- holmes/plugins/toolsets/azure_sql/tools/get_top_cpu_queries.py +170 -0
- holmes/plugins/toolsets/azure_sql/tools/get_top_data_io_queries.py +188 -0
- holmes/plugins/toolsets/azure_sql/tools/get_top_log_io_queries.py +180 -0
- holmes/plugins/toolsets/azure_sql/utils.py +83 -0
- holmes/plugins/toolsets/bash/__init__.py +0 -0
- holmes/plugins/toolsets/bash/bash_instructions.jinja2 +14 -0
- holmes/plugins/toolsets/bash/bash_toolset.py +208 -0
- holmes/plugins/toolsets/bash/common/bash.py +52 -0
- holmes/plugins/toolsets/bash/common/config.py +14 -0
- holmes/plugins/toolsets/bash/common/stringify.py +25 -0
- holmes/plugins/toolsets/bash/common/validators.py +24 -0
- holmes/plugins/toolsets/bash/grep/__init__.py +52 -0
- holmes/plugins/toolsets/bash/kubectl/__init__.py +100 -0
- holmes/plugins/toolsets/bash/kubectl/constants.py +96 -0
- holmes/plugins/toolsets/bash/kubectl/kubectl_describe.py +66 -0
- holmes/plugins/toolsets/bash/kubectl/kubectl_events.py +88 -0
- holmes/plugins/toolsets/bash/kubectl/kubectl_get.py +108 -0
- holmes/plugins/toolsets/bash/kubectl/kubectl_logs.py +20 -0
- holmes/plugins/toolsets/bash/kubectl/kubectl_run.py +46 -0
- holmes/plugins/toolsets/bash/kubectl/kubectl_top.py +81 -0
- holmes/plugins/toolsets/bash/parse_command.py +103 -0
- holmes/plugins/toolsets/confluence.yaml +19 -0
- holmes/plugins/toolsets/consts.py +5 -0
- holmes/plugins/toolsets/coralogix/api.py +158 -0
- holmes/plugins/toolsets/coralogix/toolset_coralogix_logs.py +103 -0
- holmes/plugins/toolsets/coralogix/utils.py +181 -0
- holmes/plugins/toolsets/datadog.py +153 -0
- holmes/plugins/toolsets/docker.yaml +46 -0
- holmes/plugins/toolsets/git.py +756 -0
- holmes/plugins/toolsets/grafana/__init__.py +0 -0
- holmes/plugins/toolsets/grafana/base_grafana_toolset.py +54 -0
- holmes/plugins/toolsets/grafana/common.py +68 -0
- holmes/plugins/toolsets/grafana/grafana_api.py +31 -0
- holmes/plugins/toolsets/grafana/loki_api.py +89 -0
- holmes/plugins/toolsets/grafana/tempo_api.py +124 -0
- holmes/plugins/toolsets/grafana/toolset_grafana.py +102 -0
- holmes/plugins/toolsets/grafana/toolset_grafana_loki.py +102 -0
- holmes/plugins/toolsets/grafana/toolset_grafana_tempo.jinja2 +10 -0
- holmes/plugins/toolsets/grafana/toolset_grafana_tempo.py +299 -0
- holmes/plugins/toolsets/grafana/trace_parser.py +195 -0
- holmes/plugins/toolsets/helm.yaml +42 -0
- holmes/plugins/toolsets/internet/internet.py +275 -0
- holmes/plugins/toolsets/internet/notion.py +137 -0
- holmes/plugins/toolsets/kafka.py +638 -0
- holmes/plugins/toolsets/kubernetes.yaml +255 -0
- holmes/plugins/toolsets/kubernetes_logs.py +426 -0
- holmes/plugins/toolsets/kubernetes_logs.yaml +42 -0
- holmes/plugins/toolsets/logging_utils/__init__.py +0 -0
- holmes/plugins/toolsets/logging_utils/logging_api.py +217 -0
- holmes/plugins/toolsets/logging_utils/types.py +0 -0
- holmes/plugins/toolsets/mcp/toolset_mcp.py +135 -0
- holmes/plugins/toolsets/newrelic.py +222 -0
- holmes/plugins/toolsets/opensearch/__init__.py +0 -0
- holmes/plugins/toolsets/opensearch/opensearch.py +245 -0
- holmes/plugins/toolsets/opensearch/opensearch_logs.py +151 -0
- holmes/plugins/toolsets/opensearch/opensearch_traces.py +211 -0
- holmes/plugins/toolsets/opensearch/opensearch_traces_instructions.jinja2 +12 -0
- holmes/plugins/toolsets/opensearch/opensearch_utils.py +166 -0
- holmes/plugins/toolsets/prometheus/prometheus.py +818 -0
- holmes/plugins/toolsets/prometheus/prometheus_instructions.jinja2 +38 -0
- holmes/plugins/toolsets/rabbitmq/api.py +398 -0
- holmes/plugins/toolsets/rabbitmq/rabbitmq_instructions.jinja2 +37 -0
- holmes/plugins/toolsets/rabbitmq/toolset_rabbitmq.py +222 -0
- holmes/plugins/toolsets/robusta/__init__.py +0 -0
- holmes/plugins/toolsets/robusta/robusta.py +235 -0
- holmes/plugins/toolsets/robusta/robusta_instructions.jinja2 +24 -0
- holmes/plugins/toolsets/runbook/__init__.py +0 -0
- holmes/plugins/toolsets/runbook/runbook_fetcher.py +78 -0
- holmes/plugins/toolsets/service_discovery.py +92 -0
- holmes/plugins/toolsets/servicenow/install.md +37 -0
- holmes/plugins/toolsets/servicenow/instructions.jinja2 +3 -0
- holmes/plugins/toolsets/servicenow/servicenow.py +198 -0
- holmes/plugins/toolsets/slab.yaml +20 -0
- holmes/plugins/toolsets/utils.py +137 -0
- holmes/plugins/utils.py +14 -0
- holmes/utils/__init__.py +0 -0
- holmes/utils/cache.py +84 -0
- holmes/utils/cert_utils.py +40 -0
- holmes/utils/default_toolset_installation_guide.jinja2 +44 -0
- holmes/utils/definitions.py +13 -0
- holmes/utils/env.py +53 -0
- holmes/utils/file_utils.py +56 -0
- holmes/utils/global_instructions.py +20 -0
- holmes/utils/holmes_status.py +22 -0
- holmes/utils/holmes_sync_toolsets.py +80 -0
- holmes/utils/markdown_utils.py +55 -0
- holmes/utils/pydantic_utils.py +54 -0
- holmes/utils/robusta.py +10 -0
- holmes/utils/tags.py +97 -0
- holmesgpt-0.11.5.dist-info/LICENSE.txt +21 -0
- holmesgpt-0.11.5.dist-info/METADATA +400 -0
- holmesgpt-0.11.5.dist-info/RECORD +183 -0
- holmesgpt-0.11.5.dist-info/WHEEL +4 -0
- holmesgpt-0.11.5.dist-info/entry_points.txt +3 -0
|
@@ -0,0 +1,818 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import logging
|
|
3
|
+
import os
|
|
4
|
+
import random
|
|
5
|
+
import re
|
|
6
|
+
import string
|
|
7
|
+
import time
|
|
8
|
+
from typing import Any, Dict, List, Optional, Tuple, Union
|
|
9
|
+
from urllib.parse import urljoin
|
|
10
|
+
|
|
11
|
+
import requests # type: ignore
|
|
12
|
+
from pydantic import BaseModel, field_validator
|
|
13
|
+
from requests import RequestException
|
|
14
|
+
|
|
15
|
+
from holmes.core.tools import (
|
|
16
|
+
CallablePrerequisite,
|
|
17
|
+
StructuredToolResult,
|
|
18
|
+
Tool,
|
|
19
|
+
ToolParameter,
|
|
20
|
+
ToolResultStatus,
|
|
21
|
+
Toolset,
|
|
22
|
+
ToolsetTag,
|
|
23
|
+
)
|
|
24
|
+
from holmes.plugins.toolsets.consts import STANDARD_END_DATETIME_TOOL_PARAM_DESCRIPTION
|
|
25
|
+
from holmes.plugins.toolsets.service_discovery import PrometheusDiscovery
|
|
26
|
+
from holmes.plugins.toolsets.utils import (
|
|
27
|
+
get_param_or_raise,
|
|
28
|
+
process_timestamps_to_rfc3339,
|
|
29
|
+
standard_start_datetime_tool_param_description,
|
|
30
|
+
)
|
|
31
|
+
from holmes.utils.cache import TTLCache
|
|
32
|
+
|
|
33
|
+
PROMETHEUS_RULES_CACHE_KEY = "cached_prometheus_rules"
|
|
34
|
+
DEFAULT_TIME_SPAN_SECONDS = 3600
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class PrometheusConfig(BaseModel):
|
|
38
|
+
# URL is optional because it can be set with an env var
|
|
39
|
+
prometheus_url: Optional[str]
|
|
40
|
+
healthcheck: str = "-/healthy"
|
|
41
|
+
# Setting to None will remove the time window from the request for labels
|
|
42
|
+
metrics_labels_time_window_hrs: Union[int, None] = 48
|
|
43
|
+
# Setting to None will disable the cache
|
|
44
|
+
metrics_labels_cache_duration_hrs: Union[int, None] = 12
|
|
45
|
+
fetch_labels_with_labels_api: bool = False
|
|
46
|
+
fetch_metadata_with_series_api: bool = False
|
|
47
|
+
tool_calls_return_data: bool = True
|
|
48
|
+
headers: Dict = {}
|
|
49
|
+
rules_cache_duration_seconds: Union[int, None] = 1800 # 30 minutes
|
|
50
|
+
additional_labels: Optional[Dict[str, str]] = None
|
|
51
|
+
|
|
52
|
+
@field_validator("prometheus_url")
|
|
53
|
+
def ensure_trailing_slash(cls, v: Optional[str]) -> Optional[str]:
|
|
54
|
+
if v is not None and not v.endswith("/"):
|
|
55
|
+
return v + "/"
|
|
56
|
+
return v
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
class BasePrometheusTool(Tool):
|
|
60
|
+
toolset: "PrometheusToolset"
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def generate_random_key():
|
|
64
|
+
return "".join(random.choices(string.ascii_letters + string.digits, k=4))
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def filter_metrics_by_type(metrics: Dict, expected_type: str):
|
|
68
|
+
return {
|
|
69
|
+
metric_name: metric_data
|
|
70
|
+
for metric_name, metric_data in metrics.items()
|
|
71
|
+
if expected_type in metric_data.get("type", "")
|
|
72
|
+
or metric_data.get("type", "") == "?"
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
def filter_metrics_by_name(metrics: Dict, pattern: str) -> Dict:
|
|
77
|
+
regex = re.compile(pattern)
|
|
78
|
+
return {
|
|
79
|
+
metric_name: metric_data
|
|
80
|
+
for metric_name, metric_data in metrics.items()
|
|
81
|
+
if regex.search(metric_name)
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
METRICS_SUFFIXES_TO_STRIP = ["_bucket", "_count", "_sum"]
|
|
86
|
+
|
|
87
|
+
|
|
88
|
+
def fetch_metadata(prometheus_url: str, headers: Optional[Dict]) -> Dict:
|
|
89
|
+
metadata_url = urljoin(prometheus_url, "api/v1/metadata")
|
|
90
|
+
metadata_response = requests.get(
|
|
91
|
+
metadata_url, headers=headers, timeout=60, verify=True
|
|
92
|
+
)
|
|
93
|
+
|
|
94
|
+
metadata_response.raise_for_status()
|
|
95
|
+
|
|
96
|
+
metadata = metadata_response.json()["data"]
|
|
97
|
+
|
|
98
|
+
metrics = {}
|
|
99
|
+
for metric_name, meta_list in metadata.items():
|
|
100
|
+
if meta_list:
|
|
101
|
+
metric_type = meta_list[0].get("type", "unknown")
|
|
102
|
+
metric_description = meta_list[0].get("help", "unknown")
|
|
103
|
+
metrics[metric_name] = {
|
|
104
|
+
"type": metric_type,
|
|
105
|
+
"description": metric_description,
|
|
106
|
+
"labels": set(),
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
return metrics
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def fetch_metadata_with_series_api(
|
|
113
|
+
prometheus_url: str, metric_name: str, headers: Dict
|
|
114
|
+
) -> Dict:
|
|
115
|
+
url = urljoin(prometheus_url, "api/v1/series")
|
|
116
|
+
params: Dict = {"match[]": f'{{__name__=~".*{metric_name}.*"}}', "limit": "10000"}
|
|
117
|
+
|
|
118
|
+
response = requests.get(
|
|
119
|
+
url, headers=headers, timeout=60, params=params, verify=True
|
|
120
|
+
)
|
|
121
|
+
response.raise_for_status()
|
|
122
|
+
metrics = response.json()["data"]
|
|
123
|
+
|
|
124
|
+
metadata: Dict = {}
|
|
125
|
+
for metric_data in metrics:
|
|
126
|
+
metric_name = metric_data.get("__name__")
|
|
127
|
+
if not metric_name:
|
|
128
|
+
continue
|
|
129
|
+
|
|
130
|
+
metric = metadata.get(metric_name)
|
|
131
|
+
if not metric:
|
|
132
|
+
metric = {"description": "?", "type": "?", "labels": set()}
|
|
133
|
+
metadata[metric_name] = metric
|
|
134
|
+
|
|
135
|
+
labels = {k for k in metric_data.keys() if k != "__name__"}
|
|
136
|
+
metric["labels"].update(labels)
|
|
137
|
+
|
|
138
|
+
return metadata
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
def result_has_data(result: Dict) -> bool:
|
|
142
|
+
data = result.get("data", {})
|
|
143
|
+
if len(data.get("result", [])) > 0:
|
|
144
|
+
return True
|
|
145
|
+
return False
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
def add_prometheus_auth(prometheus_auth_header: Optional[str]) -> Dict[str, Any]:
|
|
149
|
+
results = {}
|
|
150
|
+
if prometheus_auth_header:
|
|
151
|
+
results["Authorization"] = prometheus_auth_header
|
|
152
|
+
return results
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
def fetch_metrics_labels_with_series_api(
|
|
156
|
+
prometheus_url: str,
|
|
157
|
+
headers: Dict[str, str],
|
|
158
|
+
cache: Optional[TTLCache],
|
|
159
|
+
metrics_labels_time_window_hrs: Union[int, None],
|
|
160
|
+
metric_name: str,
|
|
161
|
+
) -> dict:
|
|
162
|
+
"""This is a slow query. Takes 5+ seconds to run"""
|
|
163
|
+
cache_key = f"metrics_labels_series_api:{metric_name}"
|
|
164
|
+
if cache:
|
|
165
|
+
cached_result = cache.get(cache_key)
|
|
166
|
+
if cached_result:
|
|
167
|
+
return cached_result
|
|
168
|
+
|
|
169
|
+
series_url = urljoin(prometheus_url, "api/v1/series")
|
|
170
|
+
params: dict = {"match[]": f'{{__name__=~".*{metric_name}.*"}}', "limit": "10000"}
|
|
171
|
+
|
|
172
|
+
if metrics_labels_time_window_hrs is not None:
|
|
173
|
+
params["end"] = int(time.time())
|
|
174
|
+
params["start"] = params["end"] - (metrics_labels_time_window_hrs * 60 * 60)
|
|
175
|
+
|
|
176
|
+
series_response = requests.get(
|
|
177
|
+
url=series_url, headers=headers, params=params, timeout=60, verify=True
|
|
178
|
+
)
|
|
179
|
+
series_response.raise_for_status()
|
|
180
|
+
series = series_response.json()["data"]
|
|
181
|
+
|
|
182
|
+
metrics_labels: dict = {}
|
|
183
|
+
for serie in series:
|
|
184
|
+
metric_name = serie["__name__"]
|
|
185
|
+
# Add all labels except __name__
|
|
186
|
+
labels = {k for k in serie.keys() if k != "__name__"}
|
|
187
|
+
if metric_name in metrics_labels:
|
|
188
|
+
metrics_labels[metric_name].update(labels)
|
|
189
|
+
else:
|
|
190
|
+
metrics_labels[metric_name] = labels
|
|
191
|
+
if cache:
|
|
192
|
+
cache.set(cache_key, metrics_labels)
|
|
193
|
+
|
|
194
|
+
return metrics_labels
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
def fetch_metrics_labels_with_labels_api(
|
|
198
|
+
prometheus_url: str,
|
|
199
|
+
cache: Optional[TTLCache],
|
|
200
|
+
metrics_labels_time_window_hrs: Union[int, None],
|
|
201
|
+
metric_names: List[str],
|
|
202
|
+
headers: Dict,
|
|
203
|
+
) -> dict:
|
|
204
|
+
metrics_labels = {}
|
|
205
|
+
|
|
206
|
+
for metric_name in metric_names:
|
|
207
|
+
cache_key = f"metrics_labels_labels_api:{metric_name}"
|
|
208
|
+
if cache:
|
|
209
|
+
cached_result = cache.get(cache_key)
|
|
210
|
+
if cached_result:
|
|
211
|
+
metrics_labels[metric_name] = cached_result
|
|
212
|
+
|
|
213
|
+
url = urljoin(prometheus_url, "api/v1/labels")
|
|
214
|
+
params: dict = {
|
|
215
|
+
"match[]": f'{{__name__="{metric_name}"}}',
|
|
216
|
+
}
|
|
217
|
+
if metrics_labels_time_window_hrs is not None:
|
|
218
|
+
params["end"] = int(time.time())
|
|
219
|
+
params["start"] = params["end"] - (metrics_labels_time_window_hrs * 60 * 60)
|
|
220
|
+
|
|
221
|
+
response = requests.get(
|
|
222
|
+
url=url, headers=headers, params=params, timeout=60, verify=True
|
|
223
|
+
)
|
|
224
|
+
response.raise_for_status()
|
|
225
|
+
labels = response.json()["data"]
|
|
226
|
+
filtered_labels = {label for label in labels if label != "__name__"}
|
|
227
|
+
metrics_labels[metric_name] = filtered_labels
|
|
228
|
+
|
|
229
|
+
if cache:
|
|
230
|
+
cache.set(cache_key, filtered_labels)
|
|
231
|
+
|
|
232
|
+
return metrics_labels
|
|
233
|
+
|
|
234
|
+
|
|
235
|
+
def fetch_metrics(
|
|
236
|
+
prometheus_url: str,
|
|
237
|
+
cache: Optional[TTLCache],
|
|
238
|
+
metrics_labels_time_window_hrs: Union[int, None],
|
|
239
|
+
metric_name: str,
|
|
240
|
+
should_fetch_labels_with_labels_api: bool,
|
|
241
|
+
should_fetch_metadata_with_series_api: bool,
|
|
242
|
+
headers: Dict,
|
|
243
|
+
) -> dict:
|
|
244
|
+
metrics = None
|
|
245
|
+
should_fetch_labels = True
|
|
246
|
+
if should_fetch_metadata_with_series_api:
|
|
247
|
+
metrics = fetch_metadata_with_series_api(
|
|
248
|
+
prometheus_url=prometheus_url, metric_name=metric_name, headers=headers
|
|
249
|
+
)
|
|
250
|
+
should_fetch_labels = False # series API returns the labels
|
|
251
|
+
else:
|
|
252
|
+
metrics = fetch_metadata(prometheus_url=prometheus_url, headers=headers)
|
|
253
|
+
metrics = filter_metrics_by_name(metrics, metric_name)
|
|
254
|
+
|
|
255
|
+
if should_fetch_labels:
|
|
256
|
+
metrics_labels = {}
|
|
257
|
+
if should_fetch_labels_with_labels_api:
|
|
258
|
+
metrics_labels = fetch_metrics_labels_with_labels_api(
|
|
259
|
+
prometheus_url=prometheus_url,
|
|
260
|
+
cache=cache,
|
|
261
|
+
metrics_labels_time_window_hrs=metrics_labels_time_window_hrs,
|
|
262
|
+
metric_names=list(metrics.keys()),
|
|
263
|
+
headers=headers,
|
|
264
|
+
)
|
|
265
|
+
else:
|
|
266
|
+
metrics_labels = fetch_metrics_labels_with_series_api(
|
|
267
|
+
prometheus_url=prometheus_url,
|
|
268
|
+
cache=cache,
|
|
269
|
+
metrics_labels_time_window_hrs=metrics_labels_time_window_hrs,
|
|
270
|
+
metric_name=metric_name,
|
|
271
|
+
headers=headers,
|
|
272
|
+
)
|
|
273
|
+
|
|
274
|
+
for metric_name in metrics:
|
|
275
|
+
if metric_name in metrics_labels:
|
|
276
|
+
metrics[metric_name]["labels"] = metrics_labels[metric_name]
|
|
277
|
+
|
|
278
|
+
return metrics
|
|
279
|
+
|
|
280
|
+
|
|
281
|
+
class ListPrometheusRules(BasePrometheusTool):
|
|
282
|
+
def __init__(self, toolset: "PrometheusToolset"):
|
|
283
|
+
super().__init__(
|
|
284
|
+
name="list_prometheus_rules",
|
|
285
|
+
description="List all defined prometheus rules. Will show the prometheus rules description, expression and annotations",
|
|
286
|
+
parameters={},
|
|
287
|
+
toolset=toolset,
|
|
288
|
+
)
|
|
289
|
+
self._cache = None
|
|
290
|
+
|
|
291
|
+
def _invoke(self, params: Any) -> StructuredToolResult:
|
|
292
|
+
if not self.toolset.config or not self.toolset.config.prometheus_url:
|
|
293
|
+
return StructuredToolResult(
|
|
294
|
+
status=ToolResultStatus.ERROR,
|
|
295
|
+
error="Prometheus is not configured. Prometheus URL is missing",
|
|
296
|
+
params=params,
|
|
297
|
+
)
|
|
298
|
+
if not self._cache and self.toolset.config.rules_cache_duration_seconds:
|
|
299
|
+
self._cache = TTLCache(self.toolset.config.rules_cache_duration_seconds) # type: ignore
|
|
300
|
+
try:
|
|
301
|
+
if self._cache:
|
|
302
|
+
cached_rules = self._cache.get(PROMETHEUS_RULES_CACHE_KEY)
|
|
303
|
+
if cached_rules:
|
|
304
|
+
logging.debug("rules returned from cache")
|
|
305
|
+
|
|
306
|
+
return StructuredToolResult(
|
|
307
|
+
status=ToolResultStatus.SUCCESS,
|
|
308
|
+
data=cached_rules,
|
|
309
|
+
params=params,
|
|
310
|
+
)
|
|
311
|
+
|
|
312
|
+
prometheus_url = self.toolset.config.prometheus_url
|
|
313
|
+
|
|
314
|
+
rules_url = urljoin(prometheus_url, "api/v1/rules")
|
|
315
|
+
|
|
316
|
+
rules_response = requests.get(
|
|
317
|
+
url=rules_url,
|
|
318
|
+
params=params,
|
|
319
|
+
timeout=180,
|
|
320
|
+
verify=True,
|
|
321
|
+
headers=self.toolset.config.headers,
|
|
322
|
+
)
|
|
323
|
+
rules_response.raise_for_status()
|
|
324
|
+
data = rules_response.json()["data"]
|
|
325
|
+
|
|
326
|
+
if self._cache:
|
|
327
|
+
self._cache.set(PROMETHEUS_RULES_CACHE_KEY, data)
|
|
328
|
+
return StructuredToolResult(
|
|
329
|
+
status=ToolResultStatus.SUCCESS,
|
|
330
|
+
data=data,
|
|
331
|
+
params=params,
|
|
332
|
+
)
|
|
333
|
+
except requests.Timeout:
|
|
334
|
+
logging.warning("Timeout while fetching prometheus rules", exc_info=True)
|
|
335
|
+
return StructuredToolResult(
|
|
336
|
+
status=ToolResultStatus.ERROR,
|
|
337
|
+
error="Request timed out while fetching rules",
|
|
338
|
+
params=params,
|
|
339
|
+
)
|
|
340
|
+
except RequestException as e:
|
|
341
|
+
logging.warning("Failed to fetch prometheus rules", exc_info=True)
|
|
342
|
+
return StructuredToolResult(
|
|
343
|
+
status=ToolResultStatus.ERROR,
|
|
344
|
+
error=f"Network error while fetching rules: {str(e)}",
|
|
345
|
+
params=params,
|
|
346
|
+
)
|
|
347
|
+
except Exception as e:
|
|
348
|
+
logging.warning("Failed to process prometheus rules", exc_info=True)
|
|
349
|
+
return StructuredToolResult(
|
|
350
|
+
status=ToolResultStatus.ERROR,
|
|
351
|
+
error=f"Unexpected error: {str(e)}",
|
|
352
|
+
params=params,
|
|
353
|
+
)
|
|
354
|
+
|
|
355
|
+
def get_parameterized_one_liner(self, params) -> str:
|
|
356
|
+
return "list available prometheus rules"
|
|
357
|
+
|
|
358
|
+
|
|
359
|
+
class ListAvailableMetrics(BasePrometheusTool):
|
|
360
|
+
def __init__(self, toolset: "PrometheusToolset"):
|
|
361
|
+
super().__init__(
|
|
362
|
+
name="list_available_metrics",
|
|
363
|
+
description="List all the available metrics to query from prometheus, including their types (counter, gauge, histogram, summary) and available labels.",
|
|
364
|
+
parameters={
|
|
365
|
+
"type_filter": ToolParameter(
|
|
366
|
+
description="Optional filter to only return a specific metric type. Can be one of counter, gauge, histogram, summary",
|
|
367
|
+
type="string",
|
|
368
|
+
required=False,
|
|
369
|
+
),
|
|
370
|
+
"name_filter": ToolParameter(
|
|
371
|
+
description="Only the metrics partially or fully matching this name will be returned",
|
|
372
|
+
type="string",
|
|
373
|
+
required=True,
|
|
374
|
+
),
|
|
375
|
+
},
|
|
376
|
+
toolset=toolset,
|
|
377
|
+
)
|
|
378
|
+
self._cache = None
|
|
379
|
+
|
|
380
|
+
def _invoke(self, params: Any) -> StructuredToolResult:
|
|
381
|
+
if not self.toolset.config or not self.toolset.config.prometheus_url:
|
|
382
|
+
return StructuredToolResult(
|
|
383
|
+
status=ToolResultStatus.ERROR,
|
|
384
|
+
error="Prometheus is not configured. Prometheus URL is missing",
|
|
385
|
+
params=params,
|
|
386
|
+
)
|
|
387
|
+
if not self._cache and self.toolset.config.metrics_labels_cache_duration_hrs:
|
|
388
|
+
self._cache = TTLCache(
|
|
389
|
+
self.toolset.config.metrics_labels_cache_duration_hrs * 3600 # type: ignore
|
|
390
|
+
)
|
|
391
|
+
try:
|
|
392
|
+
prometheus_url = self.toolset.config.prometheus_url
|
|
393
|
+
metrics_labels_time_window_hrs = (
|
|
394
|
+
self.toolset.config.metrics_labels_time_window_hrs
|
|
395
|
+
)
|
|
396
|
+
|
|
397
|
+
name_filter = params.get("name_filter")
|
|
398
|
+
if not name_filter:
|
|
399
|
+
return StructuredToolResult(
|
|
400
|
+
status=ToolResultStatus.ERROR,
|
|
401
|
+
error="Error: cannot run tool 'list_available_metrics'. The param 'name_filter' is required but is missing.",
|
|
402
|
+
params=params,
|
|
403
|
+
)
|
|
404
|
+
|
|
405
|
+
metrics = fetch_metrics(
|
|
406
|
+
prometheus_url=prometheus_url,
|
|
407
|
+
cache=self._cache,
|
|
408
|
+
metrics_labels_time_window_hrs=metrics_labels_time_window_hrs,
|
|
409
|
+
metric_name=name_filter,
|
|
410
|
+
should_fetch_labels_with_labels_api=self.toolset.config.fetch_labels_with_labels_api,
|
|
411
|
+
should_fetch_metadata_with_series_api=self.toolset.config.fetch_metadata_with_series_api,
|
|
412
|
+
headers=self.toolset.config.headers,
|
|
413
|
+
)
|
|
414
|
+
|
|
415
|
+
if params.get("type_filter"):
|
|
416
|
+
metrics = filter_metrics_by_type(metrics, params.get("type_filter"))
|
|
417
|
+
|
|
418
|
+
output = ["Metric | Description | Type | Labels"]
|
|
419
|
+
output.append("-" * 100)
|
|
420
|
+
|
|
421
|
+
for metric, info in sorted(metrics.items()):
|
|
422
|
+
labels_str = (
|
|
423
|
+
", ".join(sorted(info["labels"])) if info["labels"] else "none"
|
|
424
|
+
)
|
|
425
|
+
output.append(
|
|
426
|
+
f"{metric} | {info['description']} | {info['type']} | {labels_str}"
|
|
427
|
+
)
|
|
428
|
+
|
|
429
|
+
table_output = "\n".join(output)
|
|
430
|
+
return StructuredToolResult(
|
|
431
|
+
status=ToolResultStatus.SUCCESS,
|
|
432
|
+
data=table_output,
|
|
433
|
+
params=params,
|
|
434
|
+
)
|
|
435
|
+
|
|
436
|
+
except requests.Timeout:
|
|
437
|
+
logging.warn("Timeout while fetching prometheus metrics", exc_info=True)
|
|
438
|
+
return StructuredToolResult(
|
|
439
|
+
status=ToolResultStatus.ERROR,
|
|
440
|
+
error="Request timed out while fetching metrics",
|
|
441
|
+
params=params,
|
|
442
|
+
)
|
|
443
|
+
except RequestException as e:
|
|
444
|
+
logging.warn("Failed to fetch prometheus metrics", exc_info=True)
|
|
445
|
+
return StructuredToolResult(
|
|
446
|
+
status=ToolResultStatus.ERROR,
|
|
447
|
+
error=f"Network error while fetching metrics: {str(e)}",
|
|
448
|
+
params=params,
|
|
449
|
+
)
|
|
450
|
+
except Exception as e:
|
|
451
|
+
logging.warn("Failed to process prometheus metrics", exc_info=True)
|
|
452
|
+
return StructuredToolResult(
|
|
453
|
+
status=ToolResultStatus.ERROR,
|
|
454
|
+
error=f"Unexpected error: {str(e)}",
|
|
455
|
+
params=params,
|
|
456
|
+
)
|
|
457
|
+
|
|
458
|
+
def get_parameterized_one_liner(self, params) -> str:
|
|
459
|
+
return f'Search Available Prometheus Metrics: name_filter="{params.get("name_filter", "<no filter>")}", type_filter="{params.get("type_filter", "<no filter>")}"'
|
|
460
|
+
|
|
461
|
+
|
|
462
|
+
class ExecuteInstantQuery(BasePrometheusTool):
|
|
463
|
+
def __init__(self, toolset: "PrometheusToolset"):
|
|
464
|
+
super().__init__(
|
|
465
|
+
name="execute_prometheus_instant_query",
|
|
466
|
+
description="Execute an instant PromQL query",
|
|
467
|
+
parameters={
|
|
468
|
+
"query": ToolParameter(
|
|
469
|
+
description="The PromQL query",
|
|
470
|
+
type="string",
|
|
471
|
+
required=True,
|
|
472
|
+
),
|
|
473
|
+
"description": ToolParameter(
|
|
474
|
+
description="Describes the query",
|
|
475
|
+
type="string",
|
|
476
|
+
required=True,
|
|
477
|
+
),
|
|
478
|
+
},
|
|
479
|
+
toolset=toolset,
|
|
480
|
+
)
|
|
481
|
+
|
|
482
|
+
def _invoke(self, params: Any) -> StructuredToolResult:
|
|
483
|
+
if not self.toolset.config or not self.toolset.config.prometheus_url:
|
|
484
|
+
return StructuredToolResult(
|
|
485
|
+
status=ToolResultStatus.ERROR,
|
|
486
|
+
error="Prometheus is not configured. Prometheus URL is missing",
|
|
487
|
+
params=params,
|
|
488
|
+
)
|
|
489
|
+
try:
|
|
490
|
+
query = params.get("query", "")
|
|
491
|
+
description = params.get("description", "")
|
|
492
|
+
|
|
493
|
+
url = urljoin(self.toolset.config.prometheus_url, "api/v1/query")
|
|
494
|
+
|
|
495
|
+
payload = {"query": query}
|
|
496
|
+
|
|
497
|
+
response = requests.post(
|
|
498
|
+
url=url, headers=self.toolset.config.headers, data=payload, timeout=60
|
|
499
|
+
)
|
|
500
|
+
|
|
501
|
+
if response.status_code == 200:
|
|
502
|
+
data = response.json()
|
|
503
|
+
status = data.get("status")
|
|
504
|
+
error_message = None
|
|
505
|
+
if status == "success" and not result_has_data(data):
|
|
506
|
+
status = "Failed"
|
|
507
|
+
error_message = (
|
|
508
|
+
"The prometheus query returned no result. Is the query correct?"
|
|
509
|
+
)
|
|
510
|
+
response_data = {
|
|
511
|
+
"status": status,
|
|
512
|
+
"error_message": error_message,
|
|
513
|
+
"random_key": generate_random_key(),
|
|
514
|
+
"tool_name": self.name,
|
|
515
|
+
"description": description,
|
|
516
|
+
"query": query,
|
|
517
|
+
}
|
|
518
|
+
|
|
519
|
+
if self.toolset.config.tool_calls_return_data:
|
|
520
|
+
response_data["data"] = data.get("data")
|
|
521
|
+
|
|
522
|
+
data_str = json.dumps(response_data, indent=2)
|
|
523
|
+
return StructuredToolResult(
|
|
524
|
+
status=ToolResultStatus.SUCCESS,
|
|
525
|
+
data=data_str,
|
|
526
|
+
params=params,
|
|
527
|
+
)
|
|
528
|
+
|
|
529
|
+
# Handle known Prometheus error status codes
|
|
530
|
+
error_msg = "Unknown error occurred"
|
|
531
|
+
if response.status_code in [400, 429]:
|
|
532
|
+
try:
|
|
533
|
+
error_data = response.json()
|
|
534
|
+
error_msg = error_data.get(
|
|
535
|
+
"error", error_data.get("message", str(response.content))
|
|
536
|
+
)
|
|
537
|
+
except json.JSONDecodeError:
|
|
538
|
+
pass
|
|
539
|
+
return StructuredToolResult(
|
|
540
|
+
status=ToolResultStatus.ERROR,
|
|
541
|
+
error=f"Query execution failed. HTTP {response.status_code}: {error_msg}",
|
|
542
|
+
params=params,
|
|
543
|
+
)
|
|
544
|
+
|
|
545
|
+
# For other status codes, just return the status code and content
|
|
546
|
+
return StructuredToolResult(
|
|
547
|
+
status=ToolResultStatus.ERROR,
|
|
548
|
+
error=f"Query execution failed with unexpected status code: {response.status_code}. Response: {str(response.content)}",
|
|
549
|
+
params=params,
|
|
550
|
+
)
|
|
551
|
+
|
|
552
|
+
except RequestException as e:
|
|
553
|
+
logging.info("Failed to connect to Prometheus", exc_info=True)
|
|
554
|
+
return StructuredToolResult(
|
|
555
|
+
status=ToolResultStatus.ERROR,
|
|
556
|
+
error=f"Connection error to Prometheus: {str(e)}",
|
|
557
|
+
params=params,
|
|
558
|
+
)
|
|
559
|
+
except Exception as e:
|
|
560
|
+
logging.info("Failed to connect to Prometheus", exc_info=True)
|
|
561
|
+
return StructuredToolResult(
|
|
562
|
+
status=ToolResultStatus.ERROR,
|
|
563
|
+
error=f"Unexpected error executing query: {str(e)}",
|
|
564
|
+
params=params,
|
|
565
|
+
)
|
|
566
|
+
|
|
567
|
+
def get_parameterized_one_liner(self, params) -> str:
|
|
568
|
+
query = params.get("query")
|
|
569
|
+
description = params.get("description")
|
|
570
|
+
return f"Execute Prometheus Query (instant): promql='{query}', description='{description}'"
|
|
571
|
+
|
|
572
|
+
|
|
573
|
+
class ExecuteRangeQuery(BasePrometheusTool):
|
|
574
|
+
def __init__(self, toolset: "PrometheusToolset"):
|
|
575
|
+
super().__init__(
|
|
576
|
+
name="execute_prometheus_range_query",
|
|
577
|
+
description="Generates a graph and Execute a PromQL range query",
|
|
578
|
+
parameters={
|
|
579
|
+
"query": ToolParameter(
|
|
580
|
+
description="The PromQL query",
|
|
581
|
+
type="string",
|
|
582
|
+
required=True,
|
|
583
|
+
),
|
|
584
|
+
"description": ToolParameter(
|
|
585
|
+
description="Describes the query",
|
|
586
|
+
type="string",
|
|
587
|
+
required=True,
|
|
588
|
+
),
|
|
589
|
+
"start": ToolParameter(
|
|
590
|
+
description=standard_start_datetime_tool_param_description(
|
|
591
|
+
DEFAULT_TIME_SPAN_SECONDS
|
|
592
|
+
),
|
|
593
|
+
type="string",
|
|
594
|
+
required=False,
|
|
595
|
+
),
|
|
596
|
+
"end": ToolParameter(
|
|
597
|
+
description=STANDARD_END_DATETIME_TOOL_PARAM_DESCRIPTION,
|
|
598
|
+
type="string",
|
|
599
|
+
required=False,
|
|
600
|
+
),
|
|
601
|
+
"step": ToolParameter(
|
|
602
|
+
description="Query resolution step width in duration format or float number of seconds",
|
|
603
|
+
type="number",
|
|
604
|
+
required=True,
|
|
605
|
+
),
|
|
606
|
+
"output_type": ToolParameter(
|
|
607
|
+
description="Specifies how to interpret the Prometheus result. Use 'Plain' for raw values, 'Bytes' to format byte values, 'Percentage' to scale 0–1 values into 0–100%, or 'CPUUsage' to convert values to cores (e.g., 500 becomes 500m, 2000 becomes 2).",
|
|
608
|
+
type="string",
|
|
609
|
+
required=True,
|
|
610
|
+
),
|
|
611
|
+
},
|
|
612
|
+
toolset=toolset,
|
|
613
|
+
)
|
|
614
|
+
|
|
615
|
+
def _invoke(self, params: Any) -> StructuredToolResult:
|
|
616
|
+
if not self.toolset.config or not self.toolset.config.prometheus_url:
|
|
617
|
+
return StructuredToolResult(
|
|
618
|
+
status=ToolResultStatus.ERROR,
|
|
619
|
+
error="Prometheus is not configured. Prometheus URL is missing",
|
|
620
|
+
params=params,
|
|
621
|
+
)
|
|
622
|
+
|
|
623
|
+
try:
|
|
624
|
+
url = urljoin(self.toolset.config.prometheus_url, "api/v1/query_range")
|
|
625
|
+
|
|
626
|
+
query = get_param_or_raise(params, "query")
|
|
627
|
+
(start, end) = process_timestamps_to_rfc3339(
|
|
628
|
+
start_timestamp=params.get("start"),
|
|
629
|
+
end_timestamp=params.get("end"),
|
|
630
|
+
default_time_span_seconds=DEFAULT_TIME_SPAN_SECONDS,
|
|
631
|
+
)
|
|
632
|
+
step = params.get("step", "")
|
|
633
|
+
description = params.get("description", "")
|
|
634
|
+
output_type = params.get("output_type", "Plain")
|
|
635
|
+
payload = {
|
|
636
|
+
"query": query,
|
|
637
|
+
"start": start,
|
|
638
|
+
"end": end,
|
|
639
|
+
"step": step,
|
|
640
|
+
}
|
|
641
|
+
|
|
642
|
+
response = requests.post(
|
|
643
|
+
url=url, headers=self.toolset.config.headers, data=payload, timeout=120
|
|
644
|
+
)
|
|
645
|
+
|
|
646
|
+
if response.status_code == 200:
|
|
647
|
+
data = response.json()
|
|
648
|
+
status = data.get("status")
|
|
649
|
+
error_message = None
|
|
650
|
+
if status == "success" and not result_has_data(data):
|
|
651
|
+
status = "Failed"
|
|
652
|
+
error_message = (
|
|
653
|
+
"The prometheus query returned no result. Is the query correct?"
|
|
654
|
+
)
|
|
655
|
+
response_data = {
|
|
656
|
+
"status": status,
|
|
657
|
+
"error_message": error_message,
|
|
658
|
+
"random_key": generate_random_key(),
|
|
659
|
+
"tool_name": self.name,
|
|
660
|
+
"description": description,
|
|
661
|
+
"query": query,
|
|
662
|
+
"start": start,
|
|
663
|
+
"end": end,
|
|
664
|
+
"step": step,
|
|
665
|
+
"output_type": output_type,
|
|
666
|
+
}
|
|
667
|
+
|
|
668
|
+
if self.toolset.config.tool_calls_return_data:
|
|
669
|
+
response_data["data"] = data.get("data")
|
|
670
|
+
data_str = json.dumps(response_data, indent=2)
|
|
671
|
+
return StructuredToolResult(
|
|
672
|
+
status=ToolResultStatus.SUCCESS,
|
|
673
|
+
data=data_str,
|
|
674
|
+
params=params,
|
|
675
|
+
)
|
|
676
|
+
|
|
677
|
+
error_msg = "Unknown error occurred"
|
|
678
|
+
if response.status_code in [400, 429]:
|
|
679
|
+
try:
|
|
680
|
+
error_data = response.json()
|
|
681
|
+
error_msg = error_data.get(
|
|
682
|
+
"error", error_data.get("message", str(response.content))
|
|
683
|
+
)
|
|
684
|
+
except json.JSONDecodeError:
|
|
685
|
+
pass
|
|
686
|
+
return StructuredToolResult(
|
|
687
|
+
status=ToolResultStatus.ERROR,
|
|
688
|
+
error=f"Query execution failed. HTTP {response.status_code}: {error_msg}",
|
|
689
|
+
params=params,
|
|
690
|
+
)
|
|
691
|
+
|
|
692
|
+
return StructuredToolResult(
|
|
693
|
+
status=ToolResultStatus.ERROR,
|
|
694
|
+
error=f"Query execution failed with unexpected status code: {response.status_code}. Response: {str(response.content)}",
|
|
695
|
+
params=params,
|
|
696
|
+
)
|
|
697
|
+
|
|
698
|
+
except RequestException as e:
|
|
699
|
+
logging.info("Failed to connect to Prometheus", exc_info=True)
|
|
700
|
+
return StructuredToolResult(
|
|
701
|
+
status=ToolResultStatus.ERROR,
|
|
702
|
+
error=f"Connection error to Prometheus: {str(e)}",
|
|
703
|
+
params=params,
|
|
704
|
+
)
|
|
705
|
+
except Exception as e:
|
|
706
|
+
logging.info("Failed to connect to Prometheus", exc_info=True)
|
|
707
|
+
return StructuredToolResult(
|
|
708
|
+
status=ToolResultStatus.ERROR,
|
|
709
|
+
error=f"Unexpected error executing query: {str(e)}",
|
|
710
|
+
params=params,
|
|
711
|
+
)
|
|
712
|
+
|
|
713
|
+
def get_parameterized_one_liner(self, params) -> str:
|
|
714
|
+
query = params.get("query")
|
|
715
|
+
start = params.get("start")
|
|
716
|
+
end = params.get("end")
|
|
717
|
+
step = params.get("step")
|
|
718
|
+
description = params.get("description")
|
|
719
|
+
return f"Execute Prometheus Query (range): promql='{query}', start={start}, end={end}, step={step}, description='{description}'"
|
|
720
|
+
|
|
721
|
+
|
|
722
|
+
class PrometheusToolset(Toolset):
|
|
723
|
+
def __init__(self):
|
|
724
|
+
super().__init__(
|
|
725
|
+
name="prometheus/metrics",
|
|
726
|
+
description="Prometheus integration to fetch metadata and execute PromQL queries",
|
|
727
|
+
docs_url="https://docs.robusta.dev/master/configuration/holmesgpt/toolsets/prometheus.html",
|
|
728
|
+
icon_url="https://upload.wikimedia.org/wikipedia/commons/3/38/Prometheus_software_logo.svg",
|
|
729
|
+
prerequisites=[CallablePrerequisite(callable=self.prerequisites_callable)],
|
|
730
|
+
tools=[
|
|
731
|
+
ListPrometheusRules(toolset=self),
|
|
732
|
+
ListAvailableMetrics(toolset=self),
|
|
733
|
+
ExecuteInstantQuery(toolset=self),
|
|
734
|
+
ExecuteRangeQuery(toolset=self),
|
|
735
|
+
],
|
|
736
|
+
tags=[
|
|
737
|
+
ToolsetTag.CORE,
|
|
738
|
+
],
|
|
739
|
+
)
|
|
740
|
+
self._reload_llm_instructions()
|
|
741
|
+
|
|
742
|
+
def _reload_llm_instructions(self):
|
|
743
|
+
template_file_path = os.path.abspath(
|
|
744
|
+
os.path.join(os.path.dirname(__file__), "prometheus_instructions.jinja2")
|
|
745
|
+
)
|
|
746
|
+
self._load_llm_instructions(jinja_template=f"file://{template_file_path}")
|
|
747
|
+
|
|
748
|
+
def prerequisites_callable(self, config: dict[str, Any]) -> Tuple[bool, str]:
|
|
749
|
+
if config:
|
|
750
|
+
self.config = PrometheusConfig(**config)
|
|
751
|
+
self._reload_llm_instructions()
|
|
752
|
+
return self._is_healthy()
|
|
753
|
+
|
|
754
|
+
prometheus_url = os.environ.get("PROMETHEUS_URL")
|
|
755
|
+
if not prometheus_url:
|
|
756
|
+
prometheus_url = self.auto_detect_prometheus_url()
|
|
757
|
+
if not prometheus_url:
|
|
758
|
+
return (
|
|
759
|
+
False,
|
|
760
|
+
"Unable to auto-detect prometheus. Define prometheus_url in the configuration for tool prometheus/metrics",
|
|
761
|
+
)
|
|
762
|
+
|
|
763
|
+
self.config = PrometheusConfig(
|
|
764
|
+
prometheus_url=prometheus_url,
|
|
765
|
+
headers=add_prometheus_auth(os.environ.get("PROMETHEUS_AUTH_HEADER")),
|
|
766
|
+
)
|
|
767
|
+
logging.info(f"Prometheus auto discovered at url {prometheus_url}")
|
|
768
|
+
self._reload_llm_instructions()
|
|
769
|
+
return self._is_healthy()
|
|
770
|
+
|
|
771
|
+
def auto_detect_prometheus_url(self) -> Optional[str]:
|
|
772
|
+
url: Optional[str] = PrometheusDiscovery.find_prometheus_url()
|
|
773
|
+
if not url:
|
|
774
|
+
url = PrometheusDiscovery.find_vm_url()
|
|
775
|
+
|
|
776
|
+
return url
|
|
777
|
+
|
|
778
|
+
def _is_healthy(self) -> Tuple[bool, str]:
|
|
779
|
+
if (
|
|
780
|
+
not hasattr(self, "config")
|
|
781
|
+
or not self.config
|
|
782
|
+
or not self.config.prometheus_url
|
|
783
|
+
):
|
|
784
|
+
return (
|
|
785
|
+
False,
|
|
786
|
+
f"Toolset {self.name} failed to initialize because prometheus is not configured correctly",
|
|
787
|
+
)
|
|
788
|
+
|
|
789
|
+
url = urljoin(self.config.prometheus_url, self.config.healthcheck)
|
|
790
|
+
try:
|
|
791
|
+
response = requests.get(
|
|
792
|
+
url=url, headers=self.config.headers, timeout=10, verify=True
|
|
793
|
+
)
|
|
794
|
+
|
|
795
|
+
if response.status_code == 200:
|
|
796
|
+
return True, ""
|
|
797
|
+
else:
|
|
798
|
+
return (
|
|
799
|
+
False,
|
|
800
|
+
f"Failed to connect to Prometheus at {url}: HTTP {response.status_code}",
|
|
801
|
+
)
|
|
802
|
+
|
|
803
|
+
except RequestException:
|
|
804
|
+
return (
|
|
805
|
+
False,
|
|
806
|
+
f"Failed to initialize using url={url}",
|
|
807
|
+
)
|
|
808
|
+
except Exception as e:
|
|
809
|
+
return (
|
|
810
|
+
False,
|
|
811
|
+
f"Failed to initialize using url={url}. Unexpected error: {str(e)}",
|
|
812
|
+
)
|
|
813
|
+
|
|
814
|
+
def get_example_config(self):
|
|
815
|
+
example_config = PrometheusConfig(
|
|
816
|
+
prometheus_url="http://robusta-kube-prometheus-st-prometheus:9090"
|
|
817
|
+
)
|
|
818
|
+
return example_config.model_dump()
|