holmesgpt 0.13.2__py3-none-any.whl → 0.16.2a0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- holmes/__init__.py +1 -1
- holmes/clients/robusta_client.py +17 -4
- holmes/common/env_vars.py +40 -1
- holmes/config.py +114 -144
- holmes/core/conversations.py +53 -14
- holmes/core/feedback.py +191 -0
- holmes/core/investigation.py +18 -22
- holmes/core/llm.py +489 -88
- holmes/core/models.py +103 -1
- holmes/core/openai_formatting.py +13 -0
- holmes/core/prompt.py +1 -1
- holmes/core/safeguards.py +4 -4
- holmes/core/supabase_dal.py +293 -100
- holmes/core/tool_calling_llm.py +423 -323
- holmes/core/tools.py +311 -33
- holmes/core/tools_utils/token_counting.py +14 -0
- holmes/core/tools_utils/tool_context_window_limiter.py +57 -0
- holmes/core/tools_utils/tool_executor.py +13 -8
- holmes/core/toolset_manager.py +155 -4
- holmes/core/tracing.py +6 -1
- holmes/core/transformers/__init__.py +23 -0
- holmes/core/transformers/base.py +62 -0
- holmes/core/transformers/llm_summarize.py +174 -0
- holmes/core/transformers/registry.py +122 -0
- holmes/core/transformers/transformer.py +31 -0
- holmes/core/truncation/compaction.py +59 -0
- holmes/core/truncation/dal_truncation_utils.py +23 -0
- holmes/core/truncation/input_context_window_limiter.py +218 -0
- holmes/interactive.py +177 -24
- holmes/main.py +7 -4
- holmes/plugins/prompts/_fetch_logs.jinja2 +26 -1
- holmes/plugins/prompts/_general_instructions.jinja2 +1 -2
- holmes/plugins/prompts/_runbook_instructions.jinja2 +23 -12
- holmes/plugins/prompts/conversation_history_compaction.jinja2 +88 -0
- holmes/plugins/prompts/generic_ask.jinja2 +2 -4
- holmes/plugins/prompts/generic_ask_conversation.jinja2 +2 -1
- holmes/plugins/prompts/generic_ask_for_issue_conversation.jinja2 +2 -1
- holmes/plugins/prompts/generic_investigation.jinja2 +2 -1
- holmes/plugins/prompts/investigation_procedure.jinja2 +48 -0
- holmes/plugins/prompts/kubernetes_workload_ask.jinja2 +2 -1
- holmes/plugins/prompts/kubernetes_workload_chat.jinja2 +2 -1
- holmes/plugins/runbooks/__init__.py +117 -18
- holmes/plugins/runbooks/catalog.json +2 -0
- holmes/plugins/toolsets/__init__.py +21 -8
- holmes/plugins/toolsets/aks-node-health.yaml +46 -0
- holmes/plugins/toolsets/aks.yaml +64 -0
- holmes/plugins/toolsets/atlas_mongodb/mongodb_atlas.py +26 -36
- holmes/plugins/toolsets/azure_sql/azure_sql_toolset.py +0 -1
- holmes/plugins/toolsets/azure_sql/tools/analyze_connection_failures.py +10 -7
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_connections.py +9 -6
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_health_status.py +8 -6
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_performance.py +8 -6
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_storage.py +9 -6
- holmes/plugins/toolsets/azure_sql/tools/get_active_alerts.py +9 -7
- holmes/plugins/toolsets/azure_sql/tools/get_slow_queries.py +9 -6
- holmes/plugins/toolsets/azure_sql/tools/get_top_cpu_queries.py +9 -6
- holmes/plugins/toolsets/azure_sql/tools/get_top_data_io_queries.py +9 -6
- holmes/plugins/toolsets/azure_sql/tools/get_top_log_io_queries.py +9 -6
- holmes/plugins/toolsets/bash/bash_toolset.py +10 -13
- holmes/plugins/toolsets/bash/common/bash.py +7 -7
- holmes/plugins/toolsets/cilium.yaml +284 -0
- holmes/plugins/toolsets/coralogix/toolset_coralogix_logs.py +5 -3
- holmes/plugins/toolsets/datadog/datadog_api.py +490 -24
- holmes/plugins/toolsets/datadog/datadog_logs_instructions.jinja2 +21 -10
- holmes/plugins/toolsets/datadog/toolset_datadog_general.py +349 -216
- holmes/plugins/toolsets/datadog/toolset_datadog_logs.py +190 -19
- holmes/plugins/toolsets/datadog/toolset_datadog_metrics.py +101 -44
- holmes/plugins/toolsets/datadog/toolset_datadog_rds.py +13 -16
- holmes/plugins/toolsets/datadog/toolset_datadog_traces.py +25 -31
- holmes/plugins/toolsets/git.py +51 -46
- holmes/plugins/toolsets/grafana/common.py +15 -3
- holmes/plugins/toolsets/grafana/grafana_api.py +46 -24
- holmes/plugins/toolsets/grafana/grafana_tempo_api.py +454 -0
- holmes/plugins/toolsets/grafana/loki/instructions.jinja2 +9 -0
- holmes/plugins/toolsets/grafana/loki/toolset_grafana_loki.py +117 -0
- holmes/plugins/toolsets/grafana/toolset_grafana.py +211 -91
- holmes/plugins/toolsets/grafana/toolset_grafana_dashboard.jinja2 +27 -0
- holmes/plugins/toolsets/grafana/toolset_grafana_tempo.jinja2 +246 -11
- holmes/plugins/toolsets/grafana/toolset_grafana_tempo.py +653 -293
- holmes/plugins/toolsets/grafana/trace_parser.py +1 -1
- holmes/plugins/toolsets/internet/internet.py +6 -7
- holmes/plugins/toolsets/internet/notion.py +5 -6
- holmes/plugins/toolsets/investigator/core_investigation.py +42 -34
- holmes/plugins/toolsets/kafka.py +25 -36
- holmes/plugins/toolsets/kubernetes.yaml +58 -84
- holmes/plugins/toolsets/kubernetes_logs.py +6 -6
- holmes/plugins/toolsets/kubernetes_logs.yaml +32 -0
- holmes/plugins/toolsets/logging_utils/logging_api.py +80 -4
- holmes/plugins/toolsets/mcp/toolset_mcp.py +181 -55
- holmes/plugins/toolsets/newrelic/__init__.py +0 -0
- holmes/plugins/toolsets/newrelic/new_relic_api.py +125 -0
- holmes/plugins/toolsets/newrelic/newrelic.jinja2 +41 -0
- holmes/plugins/toolsets/newrelic/newrelic.py +163 -0
- holmes/plugins/toolsets/opensearch/opensearch.py +10 -17
- holmes/plugins/toolsets/opensearch/opensearch_logs.py +7 -7
- holmes/plugins/toolsets/opensearch/opensearch_ppl_query_docs.jinja2 +1616 -0
- holmes/plugins/toolsets/opensearch/opensearch_query_assist.py +78 -0
- holmes/plugins/toolsets/opensearch/opensearch_query_assist_instructions.jinja2 +223 -0
- holmes/plugins/toolsets/opensearch/opensearch_traces.py +13 -16
- holmes/plugins/toolsets/openshift.yaml +283 -0
- holmes/plugins/toolsets/prometheus/prometheus.py +915 -390
- holmes/plugins/toolsets/prometheus/prometheus_instructions.jinja2 +43 -2
- holmes/plugins/toolsets/prometheus/utils.py +28 -0
- holmes/plugins/toolsets/rabbitmq/toolset_rabbitmq.py +9 -10
- holmes/plugins/toolsets/robusta/robusta.py +236 -65
- holmes/plugins/toolsets/robusta/robusta_instructions.jinja2 +26 -9
- holmes/plugins/toolsets/runbook/runbook_fetcher.py +137 -26
- holmes/plugins/toolsets/service_discovery.py +1 -1
- holmes/plugins/toolsets/servicenow_tables/instructions.jinja2 +83 -0
- holmes/plugins/toolsets/servicenow_tables/servicenow_tables.py +426 -0
- holmes/plugins/toolsets/utils.py +88 -0
- holmes/utils/config_utils.py +91 -0
- holmes/utils/default_toolset_installation_guide.jinja2 +1 -22
- holmes/utils/env.py +7 -0
- holmes/utils/global_instructions.py +75 -10
- holmes/utils/holmes_status.py +2 -1
- holmes/utils/holmes_sync_toolsets.py +0 -2
- holmes/utils/krr_utils.py +188 -0
- holmes/utils/sentry_helper.py +41 -0
- holmes/utils/stream.py +61 -7
- holmes/version.py +34 -14
- holmesgpt-0.16.2a0.dist-info/LICENSE +178 -0
- {holmesgpt-0.13.2.dist-info → holmesgpt-0.16.2a0.dist-info}/METADATA +29 -27
- {holmesgpt-0.13.2.dist-info → holmesgpt-0.16.2a0.dist-info}/RECORD +126 -102
- holmes/core/performance_timing.py +0 -72
- holmes/plugins/toolsets/grafana/tempo_api.py +0 -124
- holmes/plugins/toolsets/grafana/toolset_grafana_loki.py +0 -110
- holmes/plugins/toolsets/newrelic.py +0 -231
- holmes/plugins/toolsets/servicenow/install.md +0 -37
- holmes/plugins/toolsets/servicenow/instructions.jinja2 +0 -3
- holmes/plugins/toolsets/servicenow/servicenow.py +0 -219
- holmesgpt-0.13.2.dist-info/LICENSE.txt +0 -21
- {holmesgpt-0.13.2.dist-info → holmesgpt-0.16.2a0.dist-info}/WHEEL +0 -0
- {holmesgpt-0.13.2.dist-info → holmesgpt-0.16.2a0.dist-info}/entry_points.txt +0 -0
|
@@ -1,10 +1,9 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import logging
|
|
3
3
|
import os
|
|
4
|
-
import re
|
|
5
4
|
import time
|
|
6
5
|
import dateutil.parser
|
|
7
|
-
from typing import Any, Dict,
|
|
6
|
+
from typing import Any, Dict, Optional, Tuple, Type, Union
|
|
8
7
|
from urllib.parse import urljoin
|
|
9
8
|
|
|
10
9
|
import requests # type: ignore
|
|
@@ -16,12 +15,16 @@ from holmes.core.tools import (
|
|
|
16
15
|
CallablePrerequisite,
|
|
17
16
|
StructuredToolResult,
|
|
18
17
|
Tool,
|
|
18
|
+
ToolInvokeContext,
|
|
19
19
|
ToolParameter,
|
|
20
|
-
|
|
20
|
+
StructuredToolResultStatus,
|
|
21
21
|
Toolset,
|
|
22
22
|
ToolsetTag,
|
|
23
23
|
)
|
|
24
|
+
from holmes.core.tools_utils.token_counting import count_tool_response_tokens
|
|
25
|
+
from holmes.core.tools_utils.tool_context_window_limiter import get_pct_token_count
|
|
24
26
|
from holmes.plugins.toolsets.consts import STANDARD_END_DATETIME_TOOL_PARAM_DESCRIPTION
|
|
27
|
+
from holmes.plugins.toolsets.prometheus.utils import parse_duration_to_seconds
|
|
25
28
|
from holmes.plugins.toolsets.service_discovery import PrometheusDiscovery
|
|
26
29
|
from holmes.plugins.toolsets.utils import (
|
|
27
30
|
get_param_or_raise,
|
|
@@ -38,24 +41,64 @@ from holmes.plugins.toolsets.logging_utils.logging_api import (
|
|
|
38
41
|
from holmes.utils.keygen_utils import generate_random_key
|
|
39
42
|
|
|
40
43
|
PROMETHEUS_RULES_CACHE_KEY = "cached_prometheus_rules"
|
|
44
|
+
PROMETHEUS_METADATA_API_LIMIT = 100 # Default limit for Prometheus metadata APIs (series, labels, metadata) to prevent overwhelming responses
|
|
45
|
+
# Default timeout values for PromQL queries
|
|
46
|
+
DEFAULT_QUERY_TIMEOUT_SECONDS = 20
|
|
47
|
+
MAX_QUERY_TIMEOUT_SECONDS = 180
|
|
48
|
+
# Default timeout for metadata API calls (discovery endpoints)
|
|
49
|
+
DEFAULT_METADATA_TIMEOUT_SECONDS = 20
|
|
50
|
+
MAX_METADATA_TIMEOUT_SECONDS = 60
|
|
51
|
+
# Default time window for metadata APIs (in hours)
|
|
52
|
+
DEFAULT_METADATA_TIME_WINDOW_HRS = 1
|
|
41
53
|
|
|
42
54
|
|
|
43
55
|
class PrometheusConfig(BaseModel):
|
|
44
56
|
# URL is optional because it can be set with an env var
|
|
45
57
|
prometheus_url: Optional[str]
|
|
46
58
|
healthcheck: str = "-/healthy"
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
#
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
59
|
+
|
|
60
|
+
# New config for default time window for metadata APIs
|
|
61
|
+
default_metadata_time_window_hrs: int = DEFAULT_METADATA_TIME_WINDOW_HRS # Default: only show metrics active in the last hour
|
|
62
|
+
|
|
63
|
+
# Query timeout configuration
|
|
64
|
+
default_query_timeout_seconds: int = (
|
|
65
|
+
DEFAULT_QUERY_TIMEOUT_SECONDS # Default timeout for PromQL queries
|
|
66
|
+
)
|
|
67
|
+
max_query_timeout_seconds: int = (
|
|
68
|
+
MAX_QUERY_TIMEOUT_SECONDS # Maximum allowed timeout for PromQL queries
|
|
69
|
+
)
|
|
70
|
+
|
|
71
|
+
# Metadata API timeout configuration
|
|
72
|
+
default_metadata_timeout_seconds: int = (
|
|
73
|
+
DEFAULT_METADATA_TIMEOUT_SECONDS # Default timeout for metadata/discovery APIs
|
|
74
|
+
)
|
|
75
|
+
max_metadata_timeout_seconds: int = (
|
|
76
|
+
MAX_METADATA_TIMEOUT_SECONDS # Maximum allowed timeout for metadata APIs
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
# DEPRECATED: These config values are deprecated and will be removed in a future version
|
|
80
|
+
# Using None as default so we can detect if user explicitly set them
|
|
81
|
+
metrics_labels_time_window_hrs: Optional[int] = (
|
|
82
|
+
None # DEPRECATED - use default_metadata_time_window_hrs instead
|
|
83
|
+
)
|
|
84
|
+
metrics_labels_cache_duration_hrs: Optional[int] = (
|
|
85
|
+
None # DEPRECATED - no longer used
|
|
86
|
+
)
|
|
87
|
+
fetch_labels_with_labels_api: Optional[bool] = None # DEPRECATED - no longer used
|
|
88
|
+
fetch_metadata_with_series_api: Optional[bool] = None # DEPRECATED - no longer used
|
|
89
|
+
|
|
53
90
|
tool_calls_return_data: bool = True
|
|
54
91
|
headers: Dict = Field(default_factory=dict)
|
|
55
|
-
rules_cache_duration_seconds:
|
|
92
|
+
rules_cache_duration_seconds: Optional[int] = 1800 # 30 minutes
|
|
56
93
|
additional_labels: Optional[Dict[str, str]] = None
|
|
57
94
|
prometheus_ssl_enabled: bool = True
|
|
58
95
|
|
|
96
|
+
# Custom limit to the max number of tokens that a query result can take to proactively
|
|
97
|
+
# prevent token limit issues. Expressed in % of the model's context window.
|
|
98
|
+
# This limit only overrides the global limit for all tools (TOOL_MAX_ALLOCATED_CONTEXT_WINDOW_PCT)
|
|
99
|
+
# if it is lower.
|
|
100
|
+
query_response_size_limit_pct: Optional[int] = None
|
|
101
|
+
|
|
59
102
|
@field_validator("prometheus_url")
|
|
60
103
|
def ensure_trailing_slash(cls, v: Optional[str]) -> Optional[str]:
|
|
61
104
|
if v is not None and not v.endswith("/"):
|
|
@@ -64,6 +107,26 @@ class PrometheusConfig(BaseModel):
|
|
|
64
107
|
|
|
65
108
|
@model_validator(mode="after")
|
|
66
109
|
def validate_prom_config(self):
|
|
110
|
+
# Check for deprecated config values and print warnings
|
|
111
|
+
deprecated_configs = []
|
|
112
|
+
if self.metrics_labels_time_window_hrs is not None: # Check if explicitly set
|
|
113
|
+
deprecated_configs.append(
|
|
114
|
+
"metrics_labels_time_window_hrs (use default_metadata_time_window_hrs instead)"
|
|
115
|
+
)
|
|
116
|
+
if (
|
|
117
|
+
self.metrics_labels_cache_duration_hrs is not None
|
|
118
|
+
): # Check if explicitly set
|
|
119
|
+
deprecated_configs.append("metrics_labels_cache_duration_hrs")
|
|
120
|
+
if self.fetch_labels_with_labels_api is not None: # Check if explicitly set
|
|
121
|
+
deprecated_configs.append("fetch_labels_with_labels_api")
|
|
122
|
+
if self.fetch_metadata_with_series_api is not None: # Check if explicitly set
|
|
123
|
+
deprecated_configs.append("fetch_metadata_with_series_api")
|
|
124
|
+
|
|
125
|
+
if deprecated_configs:
|
|
126
|
+
logging.warning(
|
|
127
|
+
f"WARNING: The following Prometheus config values are deprecated and will be removed in a future version: "
|
|
128
|
+
f"{', '.join(deprecated_configs)}. These configs no longer affect behavior."
|
|
129
|
+
)
|
|
67
130
|
# If openshift is enabled, and the user didn't configure auth headers, we will try to load the token from the service account.
|
|
68
131
|
if IS_OPENSHIFT:
|
|
69
132
|
if self.healthcheck == "-/healthy":
|
|
@@ -160,6 +223,8 @@ def do_request(
|
|
|
160
223
|
|
|
161
224
|
if isinstance(config, AMPConfig):
|
|
162
225
|
client = config.get_aws_client() # cached AWSPrometheusConnect
|
|
226
|
+
# Note: timeout parameter is not supported by prometrix's signed_request
|
|
227
|
+
# AWS/AMP requests will not respect the timeout setting
|
|
163
228
|
return client.signed_request( # type: ignore
|
|
164
229
|
method=method,
|
|
165
230
|
url=url,
|
|
@@ -181,99 +246,6 @@ def do_request(
|
|
|
181
246
|
)
|
|
182
247
|
|
|
183
248
|
|
|
184
|
-
def filter_metrics_by_type(metrics: Dict, expected_type: str):
|
|
185
|
-
return {
|
|
186
|
-
metric_name: metric_data
|
|
187
|
-
for metric_name, metric_data in metrics.items()
|
|
188
|
-
if expected_type in metric_data.get("type", "")
|
|
189
|
-
or metric_data.get("type", "") == "?"
|
|
190
|
-
}
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
def filter_metrics_by_name(metrics: Dict, pattern: str) -> Dict:
|
|
194
|
-
regex = re.compile(pattern)
|
|
195
|
-
return {
|
|
196
|
-
metric_name: metric_data
|
|
197
|
-
for metric_name, metric_data in metrics.items()
|
|
198
|
-
if regex.search(metric_name)
|
|
199
|
-
}
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
METRICS_SUFFIXES_TO_STRIP = ["_bucket", "_count", "_sum"]
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
def fetch_metadata(
|
|
206
|
-
prometheus_url: str,
|
|
207
|
-
headers: Optional[Dict],
|
|
208
|
-
config,
|
|
209
|
-
verify_ssl: bool = True,
|
|
210
|
-
) -> Dict:
|
|
211
|
-
metadata_url = urljoin(prometheus_url, "api/v1/metadata")
|
|
212
|
-
metadata_response = do_request(
|
|
213
|
-
config=config,
|
|
214
|
-
url=metadata_url,
|
|
215
|
-
headers=headers,
|
|
216
|
-
timeout=60,
|
|
217
|
-
verify=verify_ssl,
|
|
218
|
-
method="GET",
|
|
219
|
-
)
|
|
220
|
-
metadata_response.raise_for_status()
|
|
221
|
-
|
|
222
|
-
metadata = metadata_response.json()["data"]
|
|
223
|
-
|
|
224
|
-
metrics = {}
|
|
225
|
-
for metric_name, meta_list in metadata.items():
|
|
226
|
-
if meta_list:
|
|
227
|
-
metric_type = meta_list[0].get("type", "unknown")
|
|
228
|
-
metric_description = meta_list[0].get("help", "unknown")
|
|
229
|
-
metrics[metric_name] = {
|
|
230
|
-
"type": metric_type,
|
|
231
|
-
"description": metric_description,
|
|
232
|
-
"labels": set(),
|
|
233
|
-
}
|
|
234
|
-
|
|
235
|
-
return metrics
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
def fetch_metadata_with_series_api(
|
|
239
|
-
prometheus_url: str,
|
|
240
|
-
metric_name: str,
|
|
241
|
-
headers: Dict,
|
|
242
|
-
config,
|
|
243
|
-
verify_ssl: bool = True,
|
|
244
|
-
) -> Dict:
|
|
245
|
-
url = urljoin(prometheus_url, "api/v1/series")
|
|
246
|
-
params: Dict = {"match[]": f'{{__name__=~".*{metric_name}.*"}}', "limit": "10000"}
|
|
247
|
-
|
|
248
|
-
response = do_request(
|
|
249
|
-
config=config,
|
|
250
|
-
url=url,
|
|
251
|
-
headers=headers,
|
|
252
|
-
params=params,
|
|
253
|
-
timeout=60,
|
|
254
|
-
verify=verify_ssl,
|
|
255
|
-
method="GET",
|
|
256
|
-
)
|
|
257
|
-
response.raise_for_status()
|
|
258
|
-
metrics = response.json()["data"]
|
|
259
|
-
|
|
260
|
-
metadata: Dict = {}
|
|
261
|
-
for metric_data in metrics:
|
|
262
|
-
metric_name = metric_data.get("__name__")
|
|
263
|
-
if not metric_name:
|
|
264
|
-
continue
|
|
265
|
-
|
|
266
|
-
metric = metadata.get(metric_name)
|
|
267
|
-
if not metric:
|
|
268
|
-
metric = {"description": "?", "type": "?", "labels": set()}
|
|
269
|
-
metadata[metric_name] = metric
|
|
270
|
-
|
|
271
|
-
labels = {k for k in metric_data.keys() if k != "__name__"}
|
|
272
|
-
metric["labels"].update(labels)
|
|
273
|
-
|
|
274
|
-
return metadata
|
|
275
|
-
|
|
276
|
-
|
|
277
249
|
def result_has_data(result: Dict) -> bool:
|
|
278
250
|
data = result.get("data", {})
|
|
279
251
|
if len(data.get("result", [])) > 0:
|
|
@@ -284,33 +256,58 @@ def result_has_data(result: Dict) -> bool:
|
|
|
284
256
|
def adjust_step_for_max_points(
|
|
285
257
|
start_timestamp: str,
|
|
286
258
|
end_timestamp: str,
|
|
287
|
-
step: float,
|
|
259
|
+
step: Optional[float] = None,
|
|
260
|
+
max_points_override: Optional[float] = None,
|
|
288
261
|
) -> float:
|
|
289
262
|
"""
|
|
290
263
|
Adjusts the step parameter to ensure the number of data points doesn't exceed max_points.
|
|
291
|
-
Max points is controlled by the PROMETHEUS_MAX_GRAPH_POINTS environment variable (default: 300).
|
|
292
264
|
|
|
293
265
|
Args:
|
|
294
266
|
start_timestamp: RFC3339 formatted start time
|
|
295
267
|
end_timestamp: RFC3339 formatted end time
|
|
296
|
-
step: The requested step duration in seconds
|
|
268
|
+
step: The requested step duration in seconds (None for auto-calculation)
|
|
269
|
+
max_points_override: Optional override for max points (must be <= MAX_GRAPH_POINTS)
|
|
297
270
|
|
|
298
271
|
Returns:
|
|
299
272
|
Adjusted step value in seconds that ensures points <= max_points
|
|
300
273
|
"""
|
|
274
|
+
# Use override if provided and valid, otherwise use default
|
|
275
|
+
max_points = MAX_GRAPH_POINTS
|
|
276
|
+
if max_points_override is not None:
|
|
277
|
+
if max_points_override > MAX_GRAPH_POINTS:
|
|
278
|
+
logging.warning(
|
|
279
|
+
f"max_points override ({max_points_override}) exceeds system limit ({MAX_GRAPH_POINTS}), using {MAX_GRAPH_POINTS}"
|
|
280
|
+
)
|
|
281
|
+
max_points = MAX_GRAPH_POINTS
|
|
282
|
+
elif max_points_override < 1:
|
|
283
|
+
logging.warning(
|
|
284
|
+
f"max_points override ({max_points_override}) is invalid, using default {MAX_GRAPH_POINTS}"
|
|
285
|
+
)
|
|
286
|
+
max_points = MAX_GRAPH_POINTS
|
|
287
|
+
else:
|
|
288
|
+
max_points = max_points_override
|
|
289
|
+
logging.debug(f"Using max_points override: {max_points}")
|
|
301
290
|
|
|
302
291
|
start_dt = dateutil.parser.parse(start_timestamp)
|
|
303
292
|
end_dt = dateutil.parser.parse(end_timestamp)
|
|
304
293
|
|
|
305
294
|
time_range_seconds = (end_dt - start_dt).total_seconds()
|
|
306
295
|
|
|
296
|
+
# If no step provided, calculate a reasonable default
|
|
297
|
+
# Aim for ~60 data points across the time range (1 per minute for hourly, etc)
|
|
298
|
+
if step is None:
|
|
299
|
+
step = max(1, time_range_seconds / 60)
|
|
300
|
+
logging.debug(
|
|
301
|
+
f"No step provided, defaulting to {step}s for {time_range_seconds}s range"
|
|
302
|
+
)
|
|
303
|
+
|
|
307
304
|
current_points = time_range_seconds / step
|
|
308
305
|
|
|
309
306
|
# If current points exceed max, adjust the step
|
|
310
|
-
if current_points >
|
|
311
|
-
adjusted_step = time_range_seconds /
|
|
307
|
+
if current_points > max_points:
|
|
308
|
+
adjusted_step = time_range_seconds / max_points
|
|
312
309
|
logging.info(
|
|
313
|
-
f"Adjusting step from {step}s to {adjusted_step}s to limit points from {current_points:.0f} to {
|
|
310
|
+
f"Adjusting step from {step}s to {adjusted_step}s to limit points from {current_points:.0f} to {max_points}"
|
|
314
311
|
)
|
|
315
312
|
return adjusted_step
|
|
316
313
|
|
|
@@ -324,185 +321,143 @@ def add_prometheus_auth(prometheus_auth_header: Optional[str]) -> Dict[str, Any]
|
|
|
324
321
|
return results
|
|
325
322
|
|
|
326
323
|
|
|
327
|
-
def
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
metric_name: str,
|
|
333
|
-
config=None,
|
|
334
|
-
verify_ssl: bool = True,
|
|
335
|
-
) -> dict:
|
|
336
|
-
"""This is a slow query. Takes 5+ seconds to run"""
|
|
337
|
-
cache_key = f"metrics_labels_series_api:{metric_name}"
|
|
338
|
-
if cache:
|
|
339
|
-
cached_result = cache.get(cache_key)
|
|
340
|
-
if cached_result:
|
|
341
|
-
return cached_result
|
|
342
|
-
|
|
343
|
-
series_url = urljoin(prometheus_url, "api/v1/series")
|
|
344
|
-
params: dict = {"match[]": f'{{__name__=~".*{metric_name}.*"}}', "limit": "10000"}
|
|
345
|
-
|
|
346
|
-
if metrics_labels_time_window_hrs is not None:
|
|
347
|
-
params["end"] = int(time.time())
|
|
348
|
-
params["start"] = params["end"] - (metrics_labels_time_window_hrs * 60 * 60)
|
|
349
|
-
|
|
350
|
-
series_response = do_request(
|
|
351
|
-
config=config,
|
|
352
|
-
url=series_url,
|
|
353
|
-
headers=headers,
|
|
354
|
-
params=params,
|
|
355
|
-
timeout=60,
|
|
356
|
-
verify=verify_ssl,
|
|
357
|
-
method="GET",
|
|
358
|
-
)
|
|
359
|
-
series_response.raise_for_status()
|
|
360
|
-
series = series_response.json()["data"]
|
|
361
|
-
|
|
362
|
-
metrics_labels: dict = {}
|
|
363
|
-
for serie in series:
|
|
364
|
-
metric_name = serie["__name__"]
|
|
365
|
-
# Add all labels except __name__
|
|
366
|
-
labels = {k for k in serie.keys() if k != "__name__"}
|
|
367
|
-
if metric_name in metrics_labels:
|
|
368
|
-
metrics_labels[metric_name].update(labels)
|
|
369
|
-
else:
|
|
370
|
-
metrics_labels[metric_name] = labels
|
|
371
|
-
if cache:
|
|
372
|
-
cache.set(cache_key, metrics_labels)
|
|
373
|
-
|
|
374
|
-
return metrics_labels
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
def fetch_metrics_labels_with_labels_api(
|
|
378
|
-
prometheus_url: str,
|
|
379
|
-
cache: Optional[TTLCache],
|
|
380
|
-
metrics_labels_time_window_hrs: Union[int, None],
|
|
381
|
-
metric_names: List[str],
|
|
382
|
-
headers: Dict,
|
|
383
|
-
config=None,
|
|
384
|
-
verify_ssl: bool = True,
|
|
385
|
-
) -> dict:
|
|
386
|
-
metrics_labels = {}
|
|
387
|
-
|
|
388
|
-
for metric_name in metric_names:
|
|
389
|
-
cache_key = f"metrics_labels_labels_api:{metric_name}"
|
|
390
|
-
if cache:
|
|
391
|
-
cached_result = cache.get(cache_key)
|
|
392
|
-
if cached_result:
|
|
393
|
-
metrics_labels[metric_name] = cached_result
|
|
394
|
-
|
|
395
|
-
url = urljoin(prometheus_url, "api/v1/labels")
|
|
396
|
-
params: dict = {
|
|
397
|
-
"match[]": f'{{__name__="{metric_name}"}}',
|
|
398
|
-
}
|
|
399
|
-
if metrics_labels_time_window_hrs is not None:
|
|
400
|
-
params["end"] = int(time.time())
|
|
401
|
-
params["start"] = params["end"] - (metrics_labels_time_window_hrs * 60 * 60)
|
|
324
|
+
def create_data_summary_for_large_result(
|
|
325
|
+
result_data: Dict, query: str, data_size_tokens: int, is_range_query: bool = False
|
|
326
|
+
) -> Dict[str, Any]:
|
|
327
|
+
"""
|
|
328
|
+
Create a summary for large Prometheus results instead of returning full data.
|
|
402
329
|
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
412
|
-
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
416
|
-
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
)
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
437
|
-
|
|
438
|
-
prometheus_url=prometheus_url,
|
|
439
|
-
metric_name=metric_name,
|
|
440
|
-
headers=headers,
|
|
441
|
-
config=config,
|
|
442
|
-
verify_ssl=verify_ssl,
|
|
330
|
+
Args:
|
|
331
|
+
result_data: The Prometheus data result
|
|
332
|
+
query: The original PromQL query
|
|
333
|
+
data_size_tokens: Size of the data in tokens
|
|
334
|
+
is_range_query: Whether this is a range query (vs instant query)
|
|
335
|
+
|
|
336
|
+
Returns:
|
|
337
|
+
Dictionary with summary information and suggestions
|
|
338
|
+
"""
|
|
339
|
+
if is_range_query:
|
|
340
|
+
series_list = result_data.get("result", [])
|
|
341
|
+
num_items = len(series_list)
|
|
342
|
+
|
|
343
|
+
# Calculate exact total data points across all series
|
|
344
|
+
total_points = 0
|
|
345
|
+
for series in series_list: # Iterate through ALL series for exact count
|
|
346
|
+
points = len(series.get("values", []))
|
|
347
|
+
total_points += points
|
|
348
|
+
|
|
349
|
+
# Analyze label keys and their cardinality
|
|
350
|
+
label_cardinality: Dict[str, set] = {}
|
|
351
|
+
for series in series_list:
|
|
352
|
+
metric = series.get("metric", {})
|
|
353
|
+
for label_key, label_value in metric.items():
|
|
354
|
+
if label_key not in label_cardinality:
|
|
355
|
+
label_cardinality[label_key] = set()
|
|
356
|
+
label_cardinality[label_key].add(label_value)
|
|
357
|
+
|
|
358
|
+
# Convert sets to counts for the summary
|
|
359
|
+
label_summary = {
|
|
360
|
+
label: len(values) for label, values in label_cardinality.items()
|
|
361
|
+
}
|
|
362
|
+
# Sort by cardinality (highest first) for better insights
|
|
363
|
+
label_summary = dict(
|
|
364
|
+
sorted(label_summary.items(), key=lambda x: x[1], reverse=True)
|
|
443
365
|
)
|
|
444
|
-
|
|
366
|
+
|
|
367
|
+
return {
|
|
368
|
+
"message": f"Data too large to return ({data_size_tokens:,} tokens). Query returned {num_items} time series with {total_points:,} total data points.",
|
|
369
|
+
"series_count": num_items,
|
|
370
|
+
"total_data_points": total_points,
|
|
371
|
+
"data_size_tokens": data_size_tokens,
|
|
372
|
+
"label_cardinality": label_summary,
|
|
373
|
+
"suggestion": f'Consider using topk({min(5, num_items)}, {query}) to limit results to the top {min(5, num_items)} series. To also capture remaining data as \'other\': topk({min(5, num_items)}, {query}) or label_replace((sum({query}) - sum(topk({min(5, num_items)}, {query}))), "pod", "other", "", "")',
|
|
374
|
+
}
|
|
445
375
|
else:
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
376
|
+
# Instant query
|
|
377
|
+
result_type = result_data.get("resultType", "")
|
|
378
|
+
result_list = result_data.get("result", [])
|
|
379
|
+
num_items = len(result_list)
|
|
380
|
+
|
|
381
|
+
# Analyze label keys and their cardinality
|
|
382
|
+
instant_label_cardinality: Dict[str, set] = {}
|
|
383
|
+
for item in result_list:
|
|
384
|
+
if isinstance(item, dict):
|
|
385
|
+
metric = item.get("metric", {})
|
|
386
|
+
for label_key, label_value in metric.items():
|
|
387
|
+
if label_key not in instant_label_cardinality:
|
|
388
|
+
instant_label_cardinality[label_key] = set()
|
|
389
|
+
instant_label_cardinality[label_key].add(label_value)
|
|
390
|
+
|
|
391
|
+
# Convert sets to counts for the summary
|
|
392
|
+
label_summary = {
|
|
393
|
+
label: len(values) for label, values in instant_label_cardinality.items()
|
|
394
|
+
}
|
|
395
|
+
# Sort by cardinality (highest first) for better insights
|
|
396
|
+
label_summary = dict(
|
|
397
|
+
sorted(label_summary.items(), key=lambda x: x[1], reverse=True)
|
|
451
398
|
)
|
|
452
|
-
metrics = filter_metrics_by_name(metrics, metric_name)
|
|
453
399
|
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
headers=headers,
|
|
463
|
-
config=config,
|
|
464
|
-
verify_ssl=verify_ssl,
|
|
465
|
-
)
|
|
466
|
-
else:
|
|
467
|
-
metrics_labels = fetch_metrics_labels_with_series_api(
|
|
468
|
-
prometheus_url=prometheus_url,
|
|
469
|
-
cache=cache,
|
|
470
|
-
metrics_labels_time_window_hrs=metrics_labels_time_window_hrs,
|
|
471
|
-
metric_name=metric_name,
|
|
472
|
-
headers=headers,
|
|
473
|
-
config=config,
|
|
474
|
-
verify_ssl=verify_ssl,
|
|
475
|
-
)
|
|
400
|
+
return {
|
|
401
|
+
"message": f"Data too large to return ({data_size_tokens:,} tokens). Query returned {num_items} results.",
|
|
402
|
+
"result_count": num_items,
|
|
403
|
+
"result_type": result_type,
|
|
404
|
+
"data_size_tokens": data_size_tokens,
|
|
405
|
+
"label_cardinality": label_summary,
|
|
406
|
+
"suggestion": f'Consider using topk({min(5, num_items)}, {query}) to limit results. To also capture remaining data as \'other\': topk({min(5, num_items)}, {query}) or label_replace((sum({query}) - sum(topk({min(5, num_items)}, {query}))), "instance", "other", "", "")',
|
|
407
|
+
}
|
|
476
408
|
|
|
477
|
-
for metric_name in metrics:
|
|
478
|
-
if metric_name in metrics_labels:
|
|
479
|
-
metrics[metric_name]["labels"] = metrics_labels[metric_name]
|
|
480
409
|
|
|
481
|
-
|
|
410
|
+
class MetricsBasedResponse(BaseModel):
|
|
411
|
+
status: str
|
|
412
|
+
error_message: Optional[str] = None
|
|
413
|
+
data: Optional[str] = None
|
|
414
|
+
random_key: str
|
|
415
|
+
tool_name: str
|
|
416
|
+
description: str
|
|
417
|
+
query: str
|
|
418
|
+
start: Optional[str] = None
|
|
419
|
+
end: Optional[str] = None
|
|
420
|
+
step: Optional[float] = None
|
|
421
|
+
output_type: Optional[str] = None
|
|
422
|
+
data_summary: Optional[dict[str, Any]] = None
|
|
423
|
+
|
|
424
|
+
|
|
425
|
+
def create_structured_tool_result(
|
|
426
|
+
params: dict, response: MetricsBasedResponse
|
|
427
|
+
) -> StructuredToolResult:
|
|
428
|
+
status = StructuredToolResultStatus.SUCCESS
|
|
429
|
+
if response.error_message or response.status.lower() in ("failed", "error"):
|
|
430
|
+
status = StructuredToolResultStatus.ERROR
|
|
431
|
+
elif not response.data:
|
|
432
|
+
status = StructuredToolResultStatus.NO_DATA
|
|
433
|
+
|
|
434
|
+
return StructuredToolResult(
|
|
435
|
+
status=status,
|
|
436
|
+
data=response.model_dump_json(indent=2),
|
|
437
|
+
params=params,
|
|
438
|
+
)
|
|
482
439
|
|
|
483
440
|
|
|
484
441
|
class ListPrometheusRules(BasePrometheusTool):
|
|
485
442
|
def __init__(self, toolset: "PrometheusToolset"):
|
|
486
443
|
super().__init__(
|
|
487
444
|
name="list_prometheus_rules",
|
|
488
|
-
description="List all defined
|
|
445
|
+
description="List all defined Prometheus rules (api/v1/rules). Will show the Prometheus rules description, expression and annotations",
|
|
489
446
|
parameters={},
|
|
490
447
|
toolset=toolset,
|
|
491
448
|
)
|
|
492
449
|
self._cache = None
|
|
493
450
|
|
|
494
|
-
def _invoke(
|
|
495
|
-
self, params: dict, user_approved: bool = False
|
|
496
|
-
) -> StructuredToolResult:
|
|
451
|
+
def _invoke(self, params: dict, context: ToolInvokeContext) -> StructuredToolResult:
|
|
497
452
|
if not self.toolset.config or not self.toolset.config.prometheus_url:
|
|
498
453
|
return StructuredToolResult(
|
|
499
|
-
status=
|
|
454
|
+
status=StructuredToolResultStatus.ERROR,
|
|
500
455
|
error="Prometheus is not configured. Prometheus URL is missing",
|
|
501
456
|
params=params,
|
|
502
457
|
)
|
|
503
458
|
if self.toolset.config.is_amp():
|
|
504
459
|
return StructuredToolResult(
|
|
505
|
-
status=
|
|
460
|
+
status=StructuredToolResultStatus.ERROR,
|
|
506
461
|
error="Tool not supported in AMP",
|
|
507
462
|
params=params,
|
|
508
463
|
)
|
|
@@ -515,7 +470,7 @@ class ListPrometheusRules(BasePrometheusTool):
|
|
|
515
470
|
logging.debug("rules returned from cache")
|
|
516
471
|
|
|
517
472
|
return StructuredToolResult(
|
|
518
|
-
status=
|
|
473
|
+
status=StructuredToolResultStatus.SUCCESS,
|
|
519
474
|
data=cached_rules,
|
|
520
475
|
params=params,
|
|
521
476
|
)
|
|
@@ -528,7 +483,7 @@ class ListPrometheusRules(BasePrometheusTool):
|
|
|
528
483
|
config=self.toolset.config,
|
|
529
484
|
url=rules_url,
|
|
530
485
|
params=params,
|
|
531
|
-
timeout=
|
|
486
|
+
timeout=40,
|
|
532
487
|
verify=self.toolset.config.prometheus_ssl_enabled,
|
|
533
488
|
headers=self.toolset.config.headers,
|
|
534
489
|
method="GET",
|
|
@@ -539,28 +494,28 @@ class ListPrometheusRules(BasePrometheusTool):
|
|
|
539
494
|
if self._cache:
|
|
540
495
|
self._cache.set(PROMETHEUS_RULES_CACHE_KEY, data)
|
|
541
496
|
return StructuredToolResult(
|
|
542
|
-
status=
|
|
497
|
+
status=StructuredToolResultStatus.SUCCESS,
|
|
543
498
|
data=data,
|
|
544
499
|
params=params,
|
|
545
500
|
)
|
|
546
501
|
except requests.Timeout:
|
|
547
502
|
logging.warning("Timeout while fetching prometheus rules", exc_info=True)
|
|
548
503
|
return StructuredToolResult(
|
|
549
|
-
status=
|
|
504
|
+
status=StructuredToolResultStatus.ERROR,
|
|
550
505
|
error="Request timed out while fetching rules",
|
|
551
506
|
params=params,
|
|
552
507
|
)
|
|
553
508
|
except RequestException as e:
|
|
554
509
|
logging.warning("Failed to fetch prometheus rules", exc_info=True)
|
|
555
510
|
return StructuredToolResult(
|
|
556
|
-
status=
|
|
511
|
+
status=StructuredToolResultStatus.ERROR,
|
|
557
512
|
error=f"Network error while fetching rules: {str(e)}",
|
|
558
513
|
params=params,
|
|
559
514
|
)
|
|
560
515
|
except Exception as e:
|
|
561
516
|
logging.warning("Failed to process prometheus rules", exc_info=True)
|
|
562
517
|
return StructuredToolResult(
|
|
563
|
-
status=
|
|
518
|
+
status=StructuredToolResultStatus.ERROR,
|
|
564
519
|
error=f"Unexpected error: {str(e)}",
|
|
565
520
|
params=params,
|
|
566
521
|
)
|
|
@@ -569,120 +524,553 @@ class ListPrometheusRules(BasePrometheusTool):
|
|
|
569
524
|
return f"{toolset_name_for_one_liner(self.toolset.name)}: Fetch Rules"
|
|
570
525
|
|
|
571
526
|
|
|
572
|
-
class
|
|
527
|
+
class GetMetricNames(BasePrometheusTool):
|
|
528
|
+
"""Thin wrapper around /api/v1/label/__name__/values - the fastest way to discover metric names"""
|
|
529
|
+
|
|
573
530
|
def __init__(self, toolset: "PrometheusToolset"):
|
|
574
531
|
super().__init__(
|
|
575
|
-
name="
|
|
576
|
-
description=
|
|
532
|
+
name="get_metric_names",
|
|
533
|
+
description=(
|
|
534
|
+
"Get list of metric names using /api/v1/label/__name__/values. "
|
|
535
|
+
"FASTEST method for metric discovery when you need to explore available metrics. "
|
|
536
|
+
f"Returns up to {PROMETHEUS_METADATA_API_LIMIT} unique metric names (limit={PROMETHEUS_METADATA_API_LIMIT}). If {PROMETHEUS_METADATA_API_LIMIT} results returned, more may exist - use a more specific filter. "
|
|
537
|
+
f"ALWAYS use match[] parameter to filter metrics - without it you'll get random {PROMETHEUS_METADATA_API_LIMIT} metrics which is rarely useful. "
|
|
538
|
+
"Note: Does not return metric metadata (type, description, labels). "
|
|
539
|
+
"By default returns metrics active in the last 1 hour (configurable via default_metadata_time_window_hrs)."
|
|
540
|
+
),
|
|
577
541
|
parameters={
|
|
578
|
-
"
|
|
579
|
-
description=
|
|
542
|
+
"match": ToolParameter(
|
|
543
|
+
description=(
|
|
544
|
+
"REQUIRED: PromQL selector to filter metrics. Use regex OR (|) to check multiple patterns in one call - much faster than multiple calls! Examples: "
|
|
545
|
+
"'{__name__=~\"node_cpu.*|node_memory.*|node_disk.*\"}' for all node resource metrics, "
|
|
546
|
+
"'{__name__=~\"container_cpu.*|container_memory.*|container_network.*\"}' for all container metrics, "
|
|
547
|
+
"'{__name__=~\"kube_pod.*|kube_deployment.*|kube_service.*\"}' for multiple Kubernetes object metrics, "
|
|
548
|
+
"'{__name__=~\".*cpu.*|.*memory.*|.*disk.*\"}' for all resource metrics, "
|
|
549
|
+
"'{namespace=~\"kube-system|default|monitoring\"}' for metrics from multiple namespaces, "
|
|
550
|
+
"'{job=~\"prometheus|node-exporter|kube-state-metrics\"}' for metrics from multiple jobs."
|
|
551
|
+
),
|
|
552
|
+
type="string",
|
|
553
|
+
required=True,
|
|
554
|
+
),
|
|
555
|
+
"start": ToolParameter(
|
|
556
|
+
description="Start timestamp (RFC3339 or Unix). Default: 1 hour ago",
|
|
580
557
|
type="string",
|
|
581
558
|
required=False,
|
|
582
559
|
),
|
|
583
|
-
"
|
|
584
|
-
description="
|
|
560
|
+
"end": ToolParameter(
|
|
561
|
+
description="End timestamp (RFC3339 or Unix). Default: now",
|
|
585
562
|
type="string",
|
|
586
|
-
required=
|
|
563
|
+
required=False,
|
|
587
564
|
),
|
|
588
565
|
},
|
|
589
566
|
toolset=toolset,
|
|
590
567
|
)
|
|
591
|
-
self._cache = None
|
|
592
568
|
|
|
593
|
-
def _invoke(
|
|
594
|
-
self, params: dict, user_approved: bool = False
|
|
595
|
-
) -> StructuredToolResult:
|
|
569
|
+
def _invoke(self, params: dict, context: ToolInvokeContext) -> StructuredToolResult:
|
|
596
570
|
if not self.toolset.config or not self.toolset.config.prometheus_url:
|
|
597
571
|
return StructuredToolResult(
|
|
598
|
-
status=
|
|
572
|
+
status=StructuredToolResultStatus.ERROR,
|
|
599
573
|
error="Prometheus is not configured. Prometheus URL is missing",
|
|
600
574
|
params=params,
|
|
601
575
|
)
|
|
602
|
-
if not self._cache and self.toolset.config.metrics_labels_cache_duration_hrs:
|
|
603
|
-
self._cache = TTLCache(
|
|
604
|
-
self.toolset.config.metrics_labels_cache_duration_hrs * 3600 # type: ignore
|
|
605
|
-
)
|
|
606
576
|
try:
|
|
607
|
-
|
|
608
|
-
|
|
609
|
-
|
|
577
|
+
match_param = params.get("match")
|
|
578
|
+
if not match_param:
|
|
579
|
+
return StructuredToolResult(
|
|
580
|
+
status=StructuredToolResultStatus.ERROR,
|
|
581
|
+
error="Match parameter is required to filter metrics",
|
|
582
|
+
params=params,
|
|
583
|
+
)
|
|
584
|
+
|
|
585
|
+
url = urljoin(
|
|
586
|
+
self.toolset.config.prometheus_url, "api/v1/label/__name__/values"
|
|
610
587
|
)
|
|
588
|
+
query_params = {
|
|
589
|
+
"limit": str(PROMETHEUS_METADATA_API_LIMIT),
|
|
590
|
+
"match[]": match_param,
|
|
591
|
+
}
|
|
592
|
+
|
|
593
|
+
# Add time parameters - use provided values or defaults
|
|
594
|
+
if params.get("end"):
|
|
595
|
+
query_params["end"] = params["end"]
|
|
596
|
+
else:
|
|
597
|
+
query_params["end"] = str(int(time.time()))
|
|
598
|
+
|
|
599
|
+
if params.get("start"):
|
|
600
|
+
query_params["start"] = params["start"]
|
|
601
|
+
elif self.toolset.config.default_metadata_time_window_hrs:
|
|
602
|
+
# Use default time window
|
|
603
|
+
query_params["start"] = str(
|
|
604
|
+
int(time.time())
|
|
605
|
+
- (self.toolset.config.default_metadata_time_window_hrs * 3600)
|
|
606
|
+
)
|
|
607
|
+
|
|
608
|
+
response = do_request(
|
|
609
|
+
config=self.toolset.config,
|
|
610
|
+
url=url,
|
|
611
|
+
params=query_params,
|
|
612
|
+
timeout=self.toolset.config.default_metadata_timeout_seconds,
|
|
613
|
+
verify=self.toolset.config.prometheus_ssl_enabled,
|
|
614
|
+
headers=self.toolset.config.headers,
|
|
615
|
+
method="GET",
|
|
616
|
+
)
|
|
617
|
+
response.raise_for_status()
|
|
618
|
+
data = response.json()
|
|
619
|
+
|
|
620
|
+
# Check if results were truncated
|
|
621
|
+
if (
|
|
622
|
+
"data" in data
|
|
623
|
+
and isinstance(data["data"], list)
|
|
624
|
+
and len(data["data"]) == PROMETHEUS_METADATA_API_LIMIT
|
|
625
|
+
):
|
|
626
|
+
data["_truncated"] = True
|
|
627
|
+
data["_message"] = (
|
|
628
|
+
f"Results truncated at limit={PROMETHEUS_METADATA_API_LIMIT}. Use a more specific match filter to see additional metrics."
|
|
629
|
+
)
|
|
630
|
+
|
|
631
|
+
return StructuredToolResult(
|
|
632
|
+
status=StructuredToolResultStatus.SUCCESS,
|
|
633
|
+
data=data,
|
|
634
|
+
params=params,
|
|
635
|
+
)
|
|
636
|
+
except Exception as e:
|
|
637
|
+
return StructuredToolResult(
|
|
638
|
+
status=StructuredToolResultStatus.ERROR,
|
|
639
|
+
error=str(e),
|
|
640
|
+
params=params,
|
|
641
|
+
)
|
|
642
|
+
|
|
643
|
+
def get_parameterized_one_liner(self, params) -> str:
|
|
644
|
+
return f"{toolset_name_for_one_liner(self.toolset.name)}: Get Metric Names"
|
|
645
|
+
|
|
646
|
+
|
|
647
|
+
class GetLabelValues(BasePrometheusTool):
|
|
648
|
+
"""Get values for a specific label across all metrics"""
|
|
649
|
+
|
|
650
|
+
def __init__(self, toolset: "PrometheusToolset"):
|
|
651
|
+
super().__init__(
|
|
652
|
+
name="get_label_values",
|
|
653
|
+
description=(
|
|
654
|
+
"Get all values for a specific label using /api/v1/label/{label}/values. "
|
|
655
|
+
"Use this to discover pods, namespaces, jobs, instances, etc. "
|
|
656
|
+
f"Returns up to {PROMETHEUS_METADATA_API_LIMIT} unique values (limit={PROMETHEUS_METADATA_API_LIMIT}). If {PROMETHEUS_METADATA_API_LIMIT} results returned, more may exist - use match[] to filter. "
|
|
657
|
+
"Supports optional match[] parameter to filter. "
|
|
658
|
+
"By default returns values from metrics active in the last 1 hour (configurable via default_metadata_time_window_hrs)."
|
|
659
|
+
),
|
|
660
|
+
parameters={
|
|
661
|
+
"label": ToolParameter(
|
|
662
|
+
description="Label name to get values for (e.g., 'pod', 'namespace', 'job', 'instance')",
|
|
663
|
+
type="string",
|
|
664
|
+
required=True,
|
|
665
|
+
),
|
|
666
|
+
"match": ToolParameter(
|
|
667
|
+
description=(
|
|
668
|
+
"Optional PromQL selector to filter (e.g., '{__name__=~\"kube.*\"}', "
|
|
669
|
+
"'{namespace=\"default\"}')."
|
|
670
|
+
),
|
|
671
|
+
type="string",
|
|
672
|
+
required=False,
|
|
673
|
+
),
|
|
674
|
+
"start": ToolParameter(
|
|
675
|
+
description="Start timestamp (RFC3339 or Unix). Default: 1 hour ago",
|
|
676
|
+
type="string",
|
|
677
|
+
required=False,
|
|
678
|
+
),
|
|
679
|
+
"end": ToolParameter(
|
|
680
|
+
description="End timestamp (RFC3339 or Unix). Default: now",
|
|
681
|
+
type="string",
|
|
682
|
+
required=False,
|
|
683
|
+
),
|
|
684
|
+
},
|
|
685
|
+
toolset=toolset,
|
|
686
|
+
)
|
|
611
687
|
|
|
612
|
-
|
|
613
|
-
|
|
688
|
+
def _invoke(self, params: dict, context: ToolInvokeContext) -> StructuredToolResult:
|
|
689
|
+
if not self.toolset.config or not self.toolset.config.prometheus_url:
|
|
690
|
+
return StructuredToolResult(
|
|
691
|
+
status=StructuredToolResultStatus.ERROR,
|
|
692
|
+
error="Prometheus is not configured. Prometheus URL is missing",
|
|
693
|
+
params=params,
|
|
694
|
+
)
|
|
695
|
+
try:
|
|
696
|
+
label = params.get("label")
|
|
697
|
+
if not label:
|
|
614
698
|
return StructuredToolResult(
|
|
615
|
-
status=
|
|
616
|
-
error="
|
|
699
|
+
status=StructuredToolResultStatus.ERROR,
|
|
700
|
+
error="Label parameter is required",
|
|
617
701
|
params=params,
|
|
618
702
|
)
|
|
619
703
|
|
|
620
|
-
|
|
621
|
-
prometheus_url
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
|
|
704
|
+
url = urljoin(
|
|
705
|
+
self.toolset.config.prometheus_url, f"api/v1/label/{label}/values"
|
|
706
|
+
)
|
|
707
|
+
query_params = {"limit": str(PROMETHEUS_METADATA_API_LIMIT)}
|
|
708
|
+
if params.get("match"):
|
|
709
|
+
query_params["match[]"] = params["match"]
|
|
710
|
+
|
|
711
|
+
# Add time parameters - use provided values or defaults
|
|
712
|
+
if params.get("end"):
|
|
713
|
+
query_params["end"] = params["end"]
|
|
714
|
+
else:
|
|
715
|
+
query_params["end"] = str(int(time.time()))
|
|
716
|
+
|
|
717
|
+
if params.get("start"):
|
|
718
|
+
query_params["start"] = params["start"]
|
|
719
|
+
elif self.toolset.config.default_metadata_time_window_hrs:
|
|
720
|
+
# Use default time window
|
|
721
|
+
query_params["start"] = str(
|
|
722
|
+
int(time.time())
|
|
723
|
+
- (self.toolset.config.default_metadata_time_window_hrs * 3600)
|
|
724
|
+
)
|
|
725
|
+
|
|
726
|
+
response = do_request(
|
|
727
|
+
config=self.toolset.config,
|
|
728
|
+
url=url,
|
|
729
|
+
params=query_params,
|
|
730
|
+
timeout=self.toolset.config.default_metadata_timeout_seconds,
|
|
731
|
+
verify=self.toolset.config.prometheus_ssl_enabled,
|
|
627
732
|
headers=self.toolset.config.headers,
|
|
733
|
+
method="GET",
|
|
734
|
+
)
|
|
735
|
+
response.raise_for_status()
|
|
736
|
+
data = response.json()
|
|
737
|
+
|
|
738
|
+
# Check if results were truncated
|
|
739
|
+
if (
|
|
740
|
+
"data" in data
|
|
741
|
+
and isinstance(data["data"], list)
|
|
742
|
+
and len(data["data"]) == PROMETHEUS_METADATA_API_LIMIT
|
|
743
|
+
):
|
|
744
|
+
data["_truncated"] = True
|
|
745
|
+
data["_message"] = (
|
|
746
|
+
f"Results truncated at limit={PROMETHEUS_METADATA_API_LIMIT}. Use match[] parameter to filter label '{label}' values."
|
|
747
|
+
)
|
|
748
|
+
|
|
749
|
+
return StructuredToolResult(
|
|
750
|
+
status=StructuredToolResultStatus.SUCCESS,
|
|
751
|
+
data=data,
|
|
752
|
+
params=params,
|
|
753
|
+
)
|
|
754
|
+
except Exception as e:
|
|
755
|
+
return StructuredToolResult(
|
|
756
|
+
status=StructuredToolResultStatus.ERROR,
|
|
757
|
+
error=str(e),
|
|
758
|
+
params=params,
|
|
759
|
+
)
|
|
760
|
+
|
|
761
|
+
def get_parameterized_one_liner(self, params) -> str:
|
|
762
|
+
label = params.get("label", "")
|
|
763
|
+
return f"{toolset_name_for_one_liner(self.toolset.name)}: Get {label} Values"
|
|
764
|
+
|
|
765
|
+
|
|
766
|
+
class GetAllLabels(BasePrometheusTool):
|
|
767
|
+
"""Get all label names that exist in Prometheus"""
|
|
768
|
+
|
|
769
|
+
def __init__(self, toolset: "PrometheusToolset"):
|
|
770
|
+
super().__init__(
|
|
771
|
+
name="get_all_labels",
|
|
772
|
+
description=(
|
|
773
|
+
"Get list of all label names using /api/v1/labels. "
|
|
774
|
+
"Use this to discover what labels are available across all metrics. "
|
|
775
|
+
f"Returns up to {PROMETHEUS_METADATA_API_LIMIT} label names (limit={PROMETHEUS_METADATA_API_LIMIT}). If {PROMETHEUS_METADATA_API_LIMIT} results returned, more may exist - use match[] to filter. "
|
|
776
|
+
"Supports optional match[] parameter to filter. "
|
|
777
|
+
"By default returns labels from metrics active in the last 1 hour (configurable via default_metadata_time_window_hrs)."
|
|
778
|
+
),
|
|
779
|
+
parameters={
|
|
780
|
+
"match": ToolParameter(
|
|
781
|
+
description=(
|
|
782
|
+
"Optional PromQL selector to filter (e.g., '{__name__=~\"kube.*\"}', "
|
|
783
|
+
"'{job=\"prometheus\"}')."
|
|
784
|
+
),
|
|
785
|
+
type="string",
|
|
786
|
+
required=False,
|
|
787
|
+
),
|
|
788
|
+
"start": ToolParameter(
|
|
789
|
+
description="Start timestamp (RFC3339 or Unix). Default: 1 hour ago",
|
|
790
|
+
type="string",
|
|
791
|
+
required=False,
|
|
792
|
+
),
|
|
793
|
+
"end": ToolParameter(
|
|
794
|
+
description="End timestamp (RFC3339 or Unix). Default: now",
|
|
795
|
+
type="string",
|
|
796
|
+
required=False,
|
|
797
|
+
),
|
|
798
|
+
},
|
|
799
|
+
toolset=toolset,
|
|
800
|
+
)
|
|
801
|
+
|
|
802
|
+
def _invoke(self, params: dict, context: ToolInvokeContext) -> StructuredToolResult:
|
|
803
|
+
if not self.toolset.config or not self.toolset.config.prometheus_url:
|
|
804
|
+
return StructuredToolResult(
|
|
805
|
+
status=StructuredToolResultStatus.ERROR,
|
|
806
|
+
error="Prometheus is not configured. Prometheus URL is missing",
|
|
807
|
+
params=params,
|
|
808
|
+
)
|
|
809
|
+
try:
|
|
810
|
+
url = urljoin(self.toolset.config.prometheus_url, "api/v1/labels")
|
|
811
|
+
query_params = {"limit": str(PROMETHEUS_METADATA_API_LIMIT)}
|
|
812
|
+
if params.get("match"):
|
|
813
|
+
query_params["match[]"] = params["match"]
|
|
814
|
+
|
|
815
|
+
# Add time parameters - use provided values or defaults
|
|
816
|
+
if params.get("end"):
|
|
817
|
+
query_params["end"] = params["end"]
|
|
818
|
+
else:
|
|
819
|
+
query_params["end"] = str(int(time.time()))
|
|
820
|
+
|
|
821
|
+
if params.get("start"):
|
|
822
|
+
query_params["start"] = params["start"]
|
|
823
|
+
elif self.toolset.config.default_metadata_time_window_hrs:
|
|
824
|
+
# Use default time window
|
|
825
|
+
query_params["start"] = str(
|
|
826
|
+
int(time.time())
|
|
827
|
+
- (self.toolset.config.default_metadata_time_window_hrs * 3600)
|
|
828
|
+
)
|
|
829
|
+
|
|
830
|
+
response = do_request(
|
|
628
831
|
config=self.toolset.config,
|
|
629
|
-
|
|
832
|
+
url=url,
|
|
833
|
+
params=query_params,
|
|
834
|
+
timeout=self.toolset.config.default_metadata_timeout_seconds,
|
|
835
|
+
verify=self.toolset.config.prometheus_ssl_enabled,
|
|
836
|
+
headers=self.toolset.config.headers,
|
|
837
|
+
method="GET",
|
|
838
|
+
)
|
|
839
|
+
response.raise_for_status()
|
|
840
|
+
data = response.json()
|
|
841
|
+
|
|
842
|
+
# Check if results were truncated
|
|
843
|
+
if (
|
|
844
|
+
"data" in data
|
|
845
|
+
and isinstance(data["data"], list)
|
|
846
|
+
and len(data["data"]) == PROMETHEUS_METADATA_API_LIMIT
|
|
847
|
+
):
|
|
848
|
+
data["_truncated"] = True
|
|
849
|
+
data["_message"] = (
|
|
850
|
+
f"Results truncated at limit={PROMETHEUS_METADATA_API_LIMIT}. Use match[] parameter to filter labels."
|
|
851
|
+
)
|
|
852
|
+
|
|
853
|
+
return StructuredToolResult(
|
|
854
|
+
status=StructuredToolResultStatus.SUCCESS,
|
|
855
|
+
data=data,
|
|
856
|
+
params=params,
|
|
857
|
+
)
|
|
858
|
+
except Exception as e:
|
|
859
|
+
return StructuredToolResult(
|
|
860
|
+
status=StructuredToolResultStatus.ERROR,
|
|
861
|
+
error=str(e),
|
|
862
|
+
params=params,
|
|
630
863
|
)
|
|
631
864
|
|
|
632
|
-
|
|
633
|
-
|
|
634
|
-
metrics = filter_metrics_by_type(metrics, type_filter)
|
|
865
|
+
def get_parameterized_one_liner(self, params) -> str:
|
|
866
|
+
return f"{toolset_name_for_one_liner(self.toolset.name)}: Get All Labels"
|
|
635
867
|
|
|
636
|
-
output = ["Metric | Description | Type | Labels"]
|
|
637
|
-
output.append("-" * 100)
|
|
638
868
|
|
|
639
|
-
|
|
640
|
-
|
|
641
|
-
|
|
869
|
+
class GetSeries(BasePrometheusTool):
|
|
870
|
+
"""Get time series matching a selector"""
|
|
871
|
+
|
|
872
|
+
def __init__(self, toolset: "PrometheusToolset"):
|
|
873
|
+
super().__init__(
|
|
874
|
+
name="get_series",
|
|
875
|
+
description=(
|
|
876
|
+
"Get time series using /api/v1/series. "
|
|
877
|
+
"Returns label sets for all time series matching the selector. "
|
|
878
|
+
"SLOWER than other discovery methods - use only when you need full label sets. "
|
|
879
|
+
f"Returns up to {PROMETHEUS_METADATA_API_LIMIT} series (limit={PROMETHEUS_METADATA_API_LIMIT}). If {PROMETHEUS_METADATA_API_LIMIT} results returned, more series exist - use more specific selector. "
|
|
880
|
+
"Requires match[] parameter with PromQL selector. "
|
|
881
|
+
"By default returns series active in the last 1 hour (configurable via default_metadata_time_window_hrs)."
|
|
882
|
+
),
|
|
883
|
+
parameters={
|
|
884
|
+
"match": ToolParameter(
|
|
885
|
+
description=(
|
|
886
|
+
"PromQL selector to match series (e.g., 'up', 'node_cpu_seconds_total', "
|
|
887
|
+
"'{__name__=~\"node.*\"}', '{job=\"prometheus\"}', "
|
|
888
|
+
'\'{__name__="up",job="prometheus"}\').'
|
|
889
|
+
),
|
|
890
|
+
type="string",
|
|
891
|
+
required=True,
|
|
892
|
+
),
|
|
893
|
+
"start": ToolParameter(
|
|
894
|
+
description="Start timestamp (RFC3339 or Unix). Default: 1 hour ago",
|
|
895
|
+
type="string",
|
|
896
|
+
required=False,
|
|
897
|
+
),
|
|
898
|
+
"end": ToolParameter(
|
|
899
|
+
description="End timestamp (RFC3339 or Unix). Default: now",
|
|
900
|
+
type="string",
|
|
901
|
+
required=False,
|
|
902
|
+
),
|
|
903
|
+
},
|
|
904
|
+
toolset=toolset,
|
|
905
|
+
)
|
|
906
|
+
|
|
907
|
+
def _invoke(self, params: dict, context: ToolInvokeContext) -> StructuredToolResult:
|
|
908
|
+
if not self.toolset.config or not self.toolset.config.prometheus_url:
|
|
909
|
+
return StructuredToolResult(
|
|
910
|
+
status=StructuredToolResultStatus.ERROR,
|
|
911
|
+
error="Prometheus is not configured. Prometheus URL is missing",
|
|
912
|
+
params=params,
|
|
913
|
+
)
|
|
914
|
+
try:
|
|
915
|
+
match = params.get("match")
|
|
916
|
+
if not match:
|
|
917
|
+
return StructuredToolResult(
|
|
918
|
+
status=StructuredToolResultStatus.ERROR,
|
|
919
|
+
error="Match parameter is required",
|
|
920
|
+
params=params,
|
|
921
|
+
)
|
|
922
|
+
|
|
923
|
+
url = urljoin(self.toolset.config.prometheus_url, "api/v1/series")
|
|
924
|
+
query_params = {
|
|
925
|
+
"match[]": match,
|
|
926
|
+
"limit": str(PROMETHEUS_METADATA_API_LIMIT),
|
|
927
|
+
}
|
|
928
|
+
|
|
929
|
+
# Add time parameters - use provided values or defaults
|
|
930
|
+
if params.get("end"):
|
|
931
|
+
query_params["end"] = params["end"]
|
|
932
|
+
else:
|
|
933
|
+
query_params["end"] = str(int(time.time()))
|
|
934
|
+
|
|
935
|
+
if params.get("start"):
|
|
936
|
+
query_params["start"] = params["start"]
|
|
937
|
+
elif self.toolset.config.default_metadata_time_window_hrs:
|
|
938
|
+
# Use default time window
|
|
939
|
+
query_params["start"] = str(
|
|
940
|
+
int(time.time())
|
|
941
|
+
- (self.toolset.config.default_metadata_time_window_hrs * 3600)
|
|
642
942
|
)
|
|
643
|
-
|
|
644
|
-
|
|
943
|
+
|
|
944
|
+
response = do_request(
|
|
945
|
+
config=self.toolset.config,
|
|
946
|
+
url=url,
|
|
947
|
+
params=query_params,
|
|
948
|
+
timeout=self.toolset.config.default_metadata_timeout_seconds,
|
|
949
|
+
verify=self.toolset.config.prometheus_ssl_enabled,
|
|
950
|
+
headers=self.toolset.config.headers,
|
|
951
|
+
method="GET",
|
|
952
|
+
)
|
|
953
|
+
response.raise_for_status()
|
|
954
|
+
data = response.json()
|
|
955
|
+
|
|
956
|
+
# Check if results were truncated
|
|
957
|
+
if (
|
|
958
|
+
"data" in data
|
|
959
|
+
and isinstance(data["data"], list)
|
|
960
|
+
and len(data["data"]) == PROMETHEUS_METADATA_API_LIMIT
|
|
961
|
+
):
|
|
962
|
+
data["_truncated"] = True
|
|
963
|
+
data["_message"] = (
|
|
964
|
+
f"Results truncated at limit={PROMETHEUS_METADATA_API_LIMIT}. Use a more specific match selector to see additional series."
|
|
645
965
|
)
|
|
646
966
|
|
|
647
|
-
table_output = "\n".join(output)
|
|
648
967
|
return StructuredToolResult(
|
|
649
|
-
status=
|
|
650
|
-
data=
|
|
968
|
+
status=StructuredToolResultStatus.SUCCESS,
|
|
969
|
+
data=data,
|
|
970
|
+
params=params,
|
|
971
|
+
)
|
|
972
|
+
except Exception as e:
|
|
973
|
+
return StructuredToolResult(
|
|
974
|
+
status=StructuredToolResultStatus.ERROR,
|
|
975
|
+
error=str(e),
|
|
651
976
|
params=params,
|
|
652
977
|
)
|
|
653
978
|
|
|
654
|
-
|
|
655
|
-
|
|
979
|
+
def get_parameterized_one_liner(self, params) -> str:
|
|
980
|
+
return f"{toolset_name_for_one_liner(self.toolset.name)}: Get Series"
|
|
981
|
+
|
|
982
|
+
|
|
983
|
+
class GetMetricMetadata(BasePrometheusTool):
|
|
984
|
+
"""Get metadata (type, description, unit) for metrics"""
|
|
985
|
+
|
|
986
|
+
def __init__(self, toolset: "PrometheusToolset"):
|
|
987
|
+
super().__init__(
|
|
988
|
+
name="get_metric_metadata",
|
|
989
|
+
description=(
|
|
990
|
+
"Get metric metadata using /api/v1/metadata. "
|
|
991
|
+
"Returns type, help text, and unit for metrics. "
|
|
992
|
+
"Use after discovering metric names to get their descriptions. "
|
|
993
|
+
f"Returns up to {PROMETHEUS_METADATA_API_LIMIT} metrics (limit={PROMETHEUS_METADATA_API_LIMIT}). If {PROMETHEUS_METADATA_API_LIMIT} results returned, more may exist - filter by specific metric name. "
|
|
994
|
+
"Supports optional metric name filter."
|
|
995
|
+
),
|
|
996
|
+
parameters={
|
|
997
|
+
"metric": ToolParameter(
|
|
998
|
+
description=(
|
|
999
|
+
"Optional metric name to filter (e.g., 'up', 'node_cpu_seconds_total'). "
|
|
1000
|
+
"If not provided, returns metadata for all metrics."
|
|
1001
|
+
),
|
|
1002
|
+
type="string",
|
|
1003
|
+
required=False,
|
|
1004
|
+
),
|
|
1005
|
+
},
|
|
1006
|
+
toolset=toolset,
|
|
1007
|
+
)
|
|
1008
|
+
|
|
1009
|
+
def _invoke(self, params: dict, context: ToolInvokeContext) -> StructuredToolResult:
|
|
1010
|
+
if not self.toolset.config or not self.toolset.config.prometheus_url:
|
|
656
1011
|
return StructuredToolResult(
|
|
657
|
-
status=
|
|
658
|
-
error="
|
|
1012
|
+
status=StructuredToolResultStatus.ERROR,
|
|
1013
|
+
error="Prometheus is not configured. Prometheus URL is missing",
|
|
659
1014
|
params=params,
|
|
660
1015
|
)
|
|
661
|
-
|
|
662
|
-
|
|
1016
|
+
try:
|
|
1017
|
+
url = urljoin(self.toolset.config.prometheus_url, "api/v1/metadata")
|
|
1018
|
+
query_params = {"limit": str(PROMETHEUS_METADATA_API_LIMIT)}
|
|
1019
|
+
|
|
1020
|
+
if params.get("metric"):
|
|
1021
|
+
query_params["metric"] = params["metric"]
|
|
1022
|
+
|
|
1023
|
+
response = do_request(
|
|
1024
|
+
config=self.toolset.config,
|
|
1025
|
+
url=url,
|
|
1026
|
+
params=query_params,
|
|
1027
|
+
timeout=self.toolset.config.default_metadata_timeout_seconds,
|
|
1028
|
+
verify=self.toolset.config.prometheus_ssl_enabled,
|
|
1029
|
+
headers=self.toolset.config.headers,
|
|
1030
|
+
method="GET",
|
|
1031
|
+
)
|
|
1032
|
+
response.raise_for_status()
|
|
1033
|
+
data = response.json()
|
|
1034
|
+
|
|
1035
|
+
# Check if results were truncated (metadata endpoint returns a dict, not a list)
|
|
1036
|
+
if (
|
|
1037
|
+
"data" in data
|
|
1038
|
+
and isinstance(data["data"], dict)
|
|
1039
|
+
and len(data["data"]) == PROMETHEUS_METADATA_API_LIMIT
|
|
1040
|
+
):
|
|
1041
|
+
data["_truncated"] = True
|
|
1042
|
+
data["_message"] = (
|
|
1043
|
+
f"Results truncated at limit={PROMETHEUS_METADATA_API_LIMIT}. Use metric parameter to filter by specific metric name."
|
|
1044
|
+
)
|
|
1045
|
+
|
|
663
1046
|
return StructuredToolResult(
|
|
664
|
-
status=
|
|
665
|
-
|
|
1047
|
+
status=StructuredToolResultStatus.SUCCESS,
|
|
1048
|
+
data=data,
|
|
666
1049
|
params=params,
|
|
667
1050
|
)
|
|
668
1051
|
except Exception as e:
|
|
669
|
-
logging.warn("Failed to process prometheus metrics", exc_info=True)
|
|
670
1052
|
return StructuredToolResult(
|
|
671
|
-
status=
|
|
672
|
-
error=
|
|
1053
|
+
status=StructuredToolResultStatus.ERROR,
|
|
1054
|
+
error=str(e),
|
|
673
1055
|
params=params,
|
|
674
1056
|
)
|
|
675
1057
|
|
|
676
1058
|
def get_parameterized_one_liner(self, params) -> str:
|
|
677
|
-
|
|
678
|
-
return
|
|
1059
|
+
metric = params.get("metric", "all")
|
|
1060
|
+
return (
|
|
1061
|
+
f"{toolset_name_for_one_liner(self.toolset.name)}: Get Metadata ({metric})"
|
|
1062
|
+
)
|
|
679
1063
|
|
|
680
1064
|
|
|
681
1065
|
class ExecuteInstantQuery(BasePrometheusTool):
|
|
682
1066
|
def __init__(self, toolset: "PrometheusToolset"):
|
|
683
1067
|
super().__init__(
|
|
684
1068
|
name="execute_prometheus_instant_query",
|
|
685
|
-
description=
|
|
1069
|
+
description=(
|
|
1070
|
+
f"Execute an instant PromQL query (single point in time). "
|
|
1071
|
+
f"Default timeout is {DEFAULT_QUERY_TIMEOUT_SECONDS} seconds "
|
|
1072
|
+
f"but can be increased up to {MAX_QUERY_TIMEOUT_SECONDS} seconds for complex/slow queries."
|
|
1073
|
+
),
|
|
686
1074
|
parameters={
|
|
687
1075
|
"query": ToolParameter(
|
|
688
1076
|
description="The PromQL query",
|
|
@@ -694,16 +1082,23 @@ class ExecuteInstantQuery(BasePrometheusTool):
|
|
|
694
1082
|
type="string",
|
|
695
1083
|
required=True,
|
|
696
1084
|
),
|
|
1085
|
+
"timeout": ToolParameter(
|
|
1086
|
+
description=(
|
|
1087
|
+
f"Query timeout in seconds. Default: {DEFAULT_QUERY_TIMEOUT_SECONDS}. "
|
|
1088
|
+
f"Maximum: {MAX_QUERY_TIMEOUT_SECONDS}. "
|
|
1089
|
+
f"Increase for complex queries that may take longer."
|
|
1090
|
+
),
|
|
1091
|
+
type="number",
|
|
1092
|
+
required=False,
|
|
1093
|
+
),
|
|
697
1094
|
},
|
|
698
1095
|
toolset=toolset,
|
|
699
1096
|
)
|
|
700
1097
|
|
|
701
|
-
def _invoke(
|
|
702
|
-
self, params: dict, user_approved: bool = False
|
|
703
|
-
) -> StructuredToolResult:
|
|
1098
|
+
def _invoke(self, params: dict, context: ToolInvokeContext) -> StructuredToolResult:
|
|
704
1099
|
if not self.toolset.config or not self.toolset.config.prometheus_url:
|
|
705
1100
|
return StructuredToolResult(
|
|
706
|
-
status=
|
|
1101
|
+
status=StructuredToolResultStatus.ERROR,
|
|
707
1102
|
error="Prometheus is not configured. Prometheus URL is missing",
|
|
708
1103
|
params=params,
|
|
709
1104
|
)
|
|
@@ -715,12 +1110,24 @@ class ExecuteInstantQuery(BasePrometheusTool):
|
|
|
715
1110
|
|
|
716
1111
|
payload = {"query": query}
|
|
717
1112
|
|
|
1113
|
+
# Get timeout parameter and enforce limits
|
|
1114
|
+
default_timeout = self.toolset.config.default_query_timeout_seconds
|
|
1115
|
+
max_timeout = self.toolset.config.max_query_timeout_seconds
|
|
1116
|
+
timeout = params.get("timeout", default_timeout)
|
|
1117
|
+
if timeout > max_timeout:
|
|
1118
|
+
timeout = max_timeout
|
|
1119
|
+
logging.warning(
|
|
1120
|
+
f"Timeout requested ({params.get('timeout')}) exceeds maximum ({max_timeout}s), using {max_timeout}s"
|
|
1121
|
+
)
|
|
1122
|
+
elif timeout < 1:
|
|
1123
|
+
timeout = default_timeout # Min 1 second, but use default if invalid
|
|
1124
|
+
|
|
718
1125
|
response = do_request(
|
|
719
1126
|
config=self.toolset.config,
|
|
720
1127
|
url=url,
|
|
721
1128
|
headers=self.toolset.config.headers,
|
|
722
1129
|
data=payload,
|
|
723
|
-
timeout=
|
|
1130
|
+
timeout=timeout,
|
|
724
1131
|
verify=self.toolset.config.prometheus_ssl_enabled,
|
|
725
1132
|
method="POST",
|
|
726
1133
|
)
|
|
@@ -734,24 +1141,64 @@ class ExecuteInstantQuery(BasePrometheusTool):
|
|
|
734
1141
|
error_message = (
|
|
735
1142
|
"The prometheus query returned no result. Is the query correct?"
|
|
736
1143
|
)
|
|
737
|
-
response_data =
|
|
738
|
-
|
|
739
|
-
|
|
740
|
-
|
|
741
|
-
|
|
742
|
-
|
|
743
|
-
|
|
744
|
-
|
|
745
|
-
|
|
1144
|
+
response_data = MetricsBasedResponse(
|
|
1145
|
+
status=status,
|
|
1146
|
+
error_message=error_message,
|
|
1147
|
+
random_key=generate_random_key(),
|
|
1148
|
+
tool_name=self.name,
|
|
1149
|
+
description=description,
|
|
1150
|
+
query=query,
|
|
1151
|
+
)
|
|
1152
|
+
structured_tool_result: StructuredToolResult
|
|
1153
|
+
# Check if data should be included based on size
|
|
746
1154
|
if self.toolset.config.tool_calls_return_data:
|
|
747
|
-
|
|
1155
|
+
result_data = data.get("data", {})
|
|
1156
|
+
response_data.data = result_data
|
|
748
1157
|
|
|
749
|
-
|
|
750
|
-
|
|
751
|
-
|
|
752
|
-
|
|
753
|
-
|
|
1158
|
+
structured_tool_result = create_structured_tool_result(
|
|
1159
|
+
params=params, response=response_data
|
|
1160
|
+
)
|
|
1161
|
+
token_count = count_tool_response_tokens(
|
|
1162
|
+
llm=context.llm, structured_tool_result=structured_tool_result
|
|
1163
|
+
)
|
|
1164
|
+
|
|
1165
|
+
token_limit = context.max_token_count
|
|
1166
|
+
if self.toolset.config.query_response_size_limit_pct:
|
|
1167
|
+
custom_token_limit = get_pct_token_count(
|
|
1168
|
+
percent_of_total_context_window=self.toolset.config.query_response_size_limit_pct,
|
|
1169
|
+
llm=context.llm,
|
|
1170
|
+
)
|
|
1171
|
+
if custom_token_limit < token_limit:
|
|
1172
|
+
token_limit = custom_token_limit
|
|
1173
|
+
|
|
1174
|
+
# Provide summary if data is too large
|
|
1175
|
+
if token_count > token_limit:
|
|
1176
|
+
response_data.data = None
|
|
1177
|
+
response_data.data_summary = (
|
|
1178
|
+
create_data_summary_for_large_result(
|
|
1179
|
+
result_data,
|
|
1180
|
+
query,
|
|
1181
|
+
token_count,
|
|
1182
|
+
is_range_query=False,
|
|
1183
|
+
)
|
|
1184
|
+
)
|
|
1185
|
+
logging.info(
|
|
1186
|
+
f"Prometheus instant query returned large dataset: "
|
|
1187
|
+
f"{response_data.data_summary.get('result_count', 0)} results, "
|
|
1188
|
+
f"{token_count:,} tokens (limit: {token_limit:,}). "
|
|
1189
|
+
f"Returning summary instead of full data."
|
|
1190
|
+
)
|
|
1191
|
+
# Also add token info to the summary for debugging
|
|
1192
|
+
response_data.data_summary["_debug_info"] = (
|
|
1193
|
+
f"Data size: {token_count:,} tokens exceeded limit of {token_limit:,} tokens"
|
|
1194
|
+
)
|
|
1195
|
+
else:
|
|
1196
|
+
response_data.data = result_data
|
|
1197
|
+
|
|
1198
|
+
structured_tool_result = create_structured_tool_result(
|
|
1199
|
+
params=params, response=response_data
|
|
754
1200
|
)
|
|
1201
|
+
return structured_tool_result
|
|
755
1202
|
|
|
756
1203
|
# Handle known Prometheus error status codes
|
|
757
1204
|
error_msg = "Unknown error occurred"
|
|
@@ -764,14 +1211,14 @@ class ExecuteInstantQuery(BasePrometheusTool):
|
|
|
764
1211
|
except json.JSONDecodeError:
|
|
765
1212
|
pass
|
|
766
1213
|
return StructuredToolResult(
|
|
767
|
-
status=
|
|
1214
|
+
status=StructuredToolResultStatus.ERROR,
|
|
768
1215
|
error=f"Query execution failed. HTTP {response.status_code}: {error_msg}",
|
|
769
1216
|
params=params,
|
|
770
1217
|
)
|
|
771
1218
|
|
|
772
1219
|
# For other status codes, just return the status code and content
|
|
773
1220
|
return StructuredToolResult(
|
|
774
|
-
status=
|
|
1221
|
+
status=StructuredToolResultStatus.ERROR,
|
|
775
1222
|
error=f"Query execution failed with unexpected status code: {response.status_code}. Response: {str(response.content)}",
|
|
776
1223
|
params=params,
|
|
777
1224
|
)
|
|
@@ -779,14 +1226,14 @@ class ExecuteInstantQuery(BasePrometheusTool):
|
|
|
779
1226
|
except RequestException as e:
|
|
780
1227
|
logging.info("Failed to connect to Prometheus", exc_info=True)
|
|
781
1228
|
return StructuredToolResult(
|
|
782
|
-
status=
|
|
1229
|
+
status=StructuredToolResultStatus.ERROR,
|
|
783
1230
|
error=f"Connection error to Prometheus: {str(e)}",
|
|
784
1231
|
params=params,
|
|
785
1232
|
)
|
|
786
1233
|
except Exception as e:
|
|
787
1234
|
logging.info("Failed to connect to Prometheus", exc_info=True)
|
|
788
1235
|
return StructuredToolResult(
|
|
789
|
-
status=
|
|
1236
|
+
status=StructuredToolResultStatus.ERROR,
|
|
790
1237
|
error=f"Unexpected error executing query: {str(e)}",
|
|
791
1238
|
params=params,
|
|
792
1239
|
)
|
|
@@ -800,7 +1247,12 @@ class ExecuteRangeQuery(BasePrometheusTool):
|
|
|
800
1247
|
def __init__(self, toolset: "PrometheusToolset"):
|
|
801
1248
|
super().__init__(
|
|
802
1249
|
name="execute_prometheus_range_query",
|
|
803
|
-
description=
|
|
1250
|
+
description=(
|
|
1251
|
+
f"Generates a graph and Execute a PromQL range query. "
|
|
1252
|
+
f"Default timeout is {DEFAULT_QUERY_TIMEOUT_SECONDS} seconds "
|
|
1253
|
+
f"but can be increased up to {MAX_QUERY_TIMEOUT_SECONDS} seconds for complex/slow queries. "
|
|
1254
|
+
f"Default time range is last 1 hour."
|
|
1255
|
+
),
|
|
804
1256
|
parameters={
|
|
805
1257
|
"query": ToolParameter(
|
|
806
1258
|
description="The PromQL query",
|
|
@@ -827,23 +1279,40 @@ class ExecuteRangeQuery(BasePrometheusTool):
|
|
|
827
1279
|
"step": ToolParameter(
|
|
828
1280
|
description="Query resolution step width in duration format or float number of seconds",
|
|
829
1281
|
type="number",
|
|
830
|
-
required=
|
|
1282
|
+
required=False,
|
|
831
1283
|
),
|
|
832
1284
|
"output_type": ToolParameter(
|
|
833
1285
|
description="Specifies how to interpret the Prometheus result. Use 'Plain' for raw values, 'Bytes' to format byte values, 'Percentage' to scale 0–1 values into 0–100%, or 'CPUUsage' to convert values to cores (e.g., 500 becomes 500m, 2000 becomes 2).",
|
|
834
1286
|
type="string",
|
|
835
1287
|
required=True,
|
|
836
1288
|
),
|
|
1289
|
+
"timeout": ToolParameter(
|
|
1290
|
+
description=(
|
|
1291
|
+
f"Query timeout in seconds. Default: {DEFAULT_QUERY_TIMEOUT_SECONDS}. "
|
|
1292
|
+
f"Maximum: {MAX_QUERY_TIMEOUT_SECONDS}. "
|
|
1293
|
+
f"Increase for complex queries that may take longer."
|
|
1294
|
+
),
|
|
1295
|
+
type="number",
|
|
1296
|
+
required=False,
|
|
1297
|
+
),
|
|
1298
|
+
"max_points": ToolParameter(
|
|
1299
|
+
description=(
|
|
1300
|
+
f"Maximum number of data points to return. Default: {int(MAX_GRAPH_POINTS)}. "
|
|
1301
|
+
f"Can be reduced to get fewer data points (e.g., 50 for simpler graphs). "
|
|
1302
|
+
f"Cannot exceed system limit of {int(MAX_GRAPH_POINTS)}. "
|
|
1303
|
+
f"If your query would return more points than this limit, the step will be automatically adjusted."
|
|
1304
|
+
),
|
|
1305
|
+
type="number",
|
|
1306
|
+
required=False,
|
|
1307
|
+
),
|
|
837
1308
|
},
|
|
838
1309
|
toolset=toolset,
|
|
839
1310
|
)
|
|
840
1311
|
|
|
841
|
-
def _invoke(
|
|
842
|
-
self, params: dict, user_approved: bool = False
|
|
843
|
-
) -> StructuredToolResult:
|
|
1312
|
+
def _invoke(self, params: dict, context: ToolInvokeContext) -> StructuredToolResult:
|
|
844
1313
|
if not self.toolset.config or not self.toolset.config.prometheus_url:
|
|
845
1314
|
return StructuredToolResult(
|
|
846
|
-
status=
|
|
1315
|
+
status=StructuredToolResultStatus.ERROR,
|
|
847
1316
|
error="Prometheus is not configured. Prometheus URL is missing",
|
|
848
1317
|
params=params,
|
|
849
1318
|
)
|
|
@@ -857,12 +1326,17 @@ class ExecuteRangeQuery(BasePrometheusTool):
|
|
|
857
1326
|
end_timestamp=params.get("end"),
|
|
858
1327
|
default_time_span_seconds=DEFAULT_GRAPH_TIME_SPAN_SECONDS,
|
|
859
1328
|
)
|
|
860
|
-
step = params.get("step"
|
|
1329
|
+
step = parse_duration_to_seconds(params.get("step"))
|
|
1330
|
+
max_points = params.get(
|
|
1331
|
+
"max_points"
|
|
1332
|
+
) # Get the optional max_points parameter
|
|
861
1333
|
|
|
1334
|
+
# adjust_step_for_max_points handles None case and converts to float
|
|
862
1335
|
step = adjust_step_for_max_points(
|
|
863
1336
|
start_timestamp=start,
|
|
864
1337
|
end_timestamp=end,
|
|
865
|
-
step=
|
|
1338
|
+
step=step,
|
|
1339
|
+
max_points_override=max_points,
|
|
866
1340
|
)
|
|
867
1341
|
|
|
868
1342
|
description = params.get("description", "")
|
|
@@ -874,12 +1348,24 @@ class ExecuteRangeQuery(BasePrometheusTool):
|
|
|
874
1348
|
"step": step,
|
|
875
1349
|
}
|
|
876
1350
|
|
|
1351
|
+
# Get timeout parameter and enforce limits
|
|
1352
|
+
default_timeout = self.toolset.config.default_query_timeout_seconds
|
|
1353
|
+
max_timeout = self.toolset.config.max_query_timeout_seconds
|
|
1354
|
+
timeout = params.get("timeout", default_timeout)
|
|
1355
|
+
if timeout > max_timeout:
|
|
1356
|
+
timeout = max_timeout
|
|
1357
|
+
logging.warning(
|
|
1358
|
+
f"Timeout requested ({params.get('timeout')}) exceeds maximum ({max_timeout}s), using {max_timeout}s"
|
|
1359
|
+
)
|
|
1360
|
+
elif timeout < 1:
|
|
1361
|
+
timeout = default_timeout # Min 1 second, but use default if invalid
|
|
1362
|
+
|
|
877
1363
|
response = do_request(
|
|
878
1364
|
config=self.toolset.config,
|
|
879
1365
|
url=url,
|
|
880
1366
|
headers=self.toolset.config.headers,
|
|
881
1367
|
data=payload,
|
|
882
|
-
timeout=
|
|
1368
|
+
timeout=timeout,
|
|
883
1369
|
verify=self.toolset.config.prometheus_ssl_enabled,
|
|
884
1370
|
method="POST",
|
|
885
1371
|
)
|
|
@@ -893,29 +1379,69 @@ class ExecuteRangeQuery(BasePrometheusTool):
|
|
|
893
1379
|
error_message = (
|
|
894
1380
|
"The prometheus query returned no result. Is the query correct?"
|
|
895
1381
|
)
|
|
896
|
-
response_data =
|
|
897
|
-
|
|
898
|
-
|
|
899
|
-
|
|
900
|
-
|
|
901
|
-
|
|
902
|
-
|
|
903
|
-
|
|
904
|
-
|
|
905
|
-
|
|
906
|
-
|
|
907
|
-
|
|
1382
|
+
response_data = MetricsBasedResponse(
|
|
1383
|
+
status=status,
|
|
1384
|
+
error_message=error_message,
|
|
1385
|
+
random_key=generate_random_key(),
|
|
1386
|
+
tool_name=self.name,
|
|
1387
|
+
description=description,
|
|
1388
|
+
query=query,
|
|
1389
|
+
start=start,
|
|
1390
|
+
end=end,
|
|
1391
|
+
step=step,
|
|
1392
|
+
output_type=output_type,
|
|
1393
|
+
)
|
|
908
1394
|
|
|
1395
|
+
structured_tool_result: StructuredToolResult
|
|
1396
|
+
|
|
1397
|
+
# Check if data should be included based on size
|
|
909
1398
|
if self.toolset.config.tool_calls_return_data:
|
|
910
|
-
|
|
911
|
-
|
|
1399
|
+
result_data = data.get("data", {})
|
|
1400
|
+
response_data.data = result_data
|
|
1401
|
+
structured_tool_result = create_structured_tool_result(
|
|
1402
|
+
params=params, response=response_data
|
|
1403
|
+
)
|
|
912
1404
|
|
|
913
|
-
|
|
914
|
-
|
|
915
|
-
|
|
916
|
-
|
|
1405
|
+
token_count = count_tool_response_tokens(
|
|
1406
|
+
llm=context.llm, structured_tool_result=structured_tool_result
|
|
1407
|
+
)
|
|
1408
|
+
|
|
1409
|
+
token_limit = context.max_token_count
|
|
1410
|
+
if self.toolset.config.query_response_size_limit_pct:
|
|
1411
|
+
custom_token_limit = get_pct_token_count(
|
|
1412
|
+
percent_of_total_context_window=self.toolset.config.query_response_size_limit_pct,
|
|
1413
|
+
llm=context.llm,
|
|
1414
|
+
)
|
|
1415
|
+
if custom_token_limit < token_limit:
|
|
1416
|
+
token_limit = custom_token_limit
|
|
1417
|
+
|
|
1418
|
+
# Provide summary if data is too large
|
|
1419
|
+
if token_count > token_limit:
|
|
1420
|
+
response_data.data = None
|
|
1421
|
+
response_data.data_summary = (
|
|
1422
|
+
create_data_summary_for_large_result(
|
|
1423
|
+
result_data, query, token_count, is_range_query=True
|
|
1424
|
+
)
|
|
1425
|
+
)
|
|
1426
|
+
logging.info(
|
|
1427
|
+
f"Prometheus range query returned large dataset: "
|
|
1428
|
+
f"{response_data.data_summary.get('series_count', 0)} series, "
|
|
1429
|
+
f"{token_count:,} tokens (limit: {token_limit:,}). "
|
|
1430
|
+
f"Returning summary instead of full data."
|
|
1431
|
+
)
|
|
1432
|
+
# Also add character info to the summary for debugging
|
|
1433
|
+
response_data.data_summary["_debug_info"] = (
|
|
1434
|
+
f"Data size: {token_count:,} tokens exceeded limit of {token_limit:,} tokens"
|
|
1435
|
+
)
|
|
1436
|
+
else:
|
|
1437
|
+
response_data.data = result_data
|
|
1438
|
+
|
|
1439
|
+
structured_tool_result = create_structured_tool_result(
|
|
1440
|
+
params=params, response=response_data
|
|
917
1441
|
)
|
|
918
1442
|
|
|
1443
|
+
return structured_tool_result
|
|
1444
|
+
|
|
919
1445
|
error_msg = "Unknown error occurred"
|
|
920
1446
|
if response.status_code in [400, 429]:
|
|
921
1447
|
try:
|
|
@@ -926,13 +1452,13 @@ class ExecuteRangeQuery(BasePrometheusTool):
|
|
|
926
1452
|
except json.JSONDecodeError:
|
|
927
1453
|
pass
|
|
928
1454
|
return StructuredToolResult(
|
|
929
|
-
status=
|
|
1455
|
+
status=StructuredToolResultStatus.ERROR,
|
|
930
1456
|
error=f"Query execution failed. HTTP {response.status_code}: {error_msg}",
|
|
931
1457
|
params=params,
|
|
932
1458
|
)
|
|
933
1459
|
|
|
934
1460
|
return StructuredToolResult(
|
|
935
|
-
status=
|
|
1461
|
+
status=StructuredToolResultStatus.ERROR,
|
|
936
1462
|
error=f"Query execution failed with unexpected status code: {response.status_code}. Response: {str(response.content)}",
|
|
937
1463
|
params=params,
|
|
938
1464
|
)
|
|
@@ -940,14 +1466,14 @@ class ExecuteRangeQuery(BasePrometheusTool):
|
|
|
940
1466
|
except RequestException as e:
|
|
941
1467
|
logging.info("Failed to connect to Prometheus", exc_info=True)
|
|
942
1468
|
return StructuredToolResult(
|
|
943
|
-
status=
|
|
1469
|
+
status=StructuredToolResultStatus.ERROR,
|
|
944
1470
|
error=f"Connection error to Prometheus: {str(e)}",
|
|
945
1471
|
params=params,
|
|
946
1472
|
)
|
|
947
1473
|
except Exception as e:
|
|
948
1474
|
logging.info("Failed to connect to Prometheus", exc_info=True)
|
|
949
1475
|
return StructuredToolResult(
|
|
950
|
-
status=
|
|
1476
|
+
status=StructuredToolResultStatus.ERROR,
|
|
951
1477
|
error=f"Unexpected error executing query: {str(e)}",
|
|
952
1478
|
params=params,
|
|
953
1479
|
)
|
|
@@ -969,7 +1495,11 @@ class PrometheusToolset(Toolset):
|
|
|
969
1495
|
prerequisites=[CallablePrerequisite(callable=self.prerequisites_callable)],
|
|
970
1496
|
tools=[
|
|
971
1497
|
ListPrometheusRules(toolset=self),
|
|
972
|
-
|
|
1498
|
+
GetMetricNames(toolset=self),
|
|
1499
|
+
GetLabelValues(toolset=self),
|
|
1500
|
+
GetAllLabels(toolset=self),
|
|
1501
|
+
GetSeries(toolset=self),
|
|
1502
|
+
GetMetricMetadata(toolset=self),
|
|
973
1503
|
ExecuteInstantQuery(toolset=self),
|
|
974
1504
|
ExecuteRangeQuery(toolset=self),
|
|
975
1505
|
],
|
|
@@ -1060,13 +1590,8 @@ class PrometheusToolset(Toolset):
|
|
|
1060
1590
|
f"Failed to connect to Prometheus at {url}: HTTP {response.status_code}",
|
|
1061
1591
|
)
|
|
1062
1592
|
|
|
1063
|
-
except RequestException:
|
|
1064
|
-
return (
|
|
1065
|
-
False,
|
|
1066
|
-
f"Failed to initialize using url={url}",
|
|
1067
|
-
)
|
|
1068
1593
|
except Exception as e:
|
|
1069
|
-
logging.
|
|
1594
|
+
logging.debug("Failed to initialize Prometheus", exc_info=True)
|
|
1070
1595
|
return (
|
|
1071
1596
|
False,
|
|
1072
1597
|
f"Failed to initialize using url={url}. Unexpected error: {str(e)}",
|