holmesgpt 0.13.3a0__py3-none-any.whl → 0.14.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of holmesgpt might be problematic. Click here for more details.

Files changed (86) hide show
  1. holmes/__init__.py +1 -1
  2. holmes/clients/robusta_client.py +15 -4
  3. holmes/common/env_vars.py +8 -1
  4. holmes/config.py +66 -139
  5. holmes/core/investigation.py +1 -2
  6. holmes/core/llm.py +295 -52
  7. holmes/core/models.py +2 -0
  8. holmes/core/safeguards.py +4 -4
  9. holmes/core/supabase_dal.py +14 -8
  10. holmes/core/tool_calling_llm.py +202 -177
  11. holmes/core/tools.py +260 -25
  12. holmes/core/tools_utils/data_types.py +81 -0
  13. holmes/core/tools_utils/tool_context_window_limiter.py +33 -0
  14. holmes/core/tools_utils/tool_executor.py +2 -2
  15. holmes/core/toolset_manager.py +150 -3
  16. holmes/core/tracing.py +6 -1
  17. holmes/core/transformers/__init__.py +23 -0
  18. holmes/core/transformers/base.py +62 -0
  19. holmes/core/transformers/llm_summarize.py +174 -0
  20. holmes/core/transformers/registry.py +122 -0
  21. holmes/core/transformers/transformer.py +31 -0
  22. holmes/main.py +5 -0
  23. holmes/plugins/prompts/_fetch_logs.jinja2 +10 -1
  24. holmes/plugins/toolsets/aks-node-health.yaml +46 -0
  25. holmes/plugins/toolsets/aks.yaml +64 -0
  26. holmes/plugins/toolsets/atlas_mongodb/mongodb_atlas.py +17 -15
  27. holmes/plugins/toolsets/azure_sql/tools/analyze_connection_failures.py +8 -4
  28. holmes/plugins/toolsets/azure_sql/tools/analyze_database_connections.py +7 -3
  29. holmes/plugins/toolsets/azure_sql/tools/analyze_database_health_status.py +3 -3
  30. holmes/plugins/toolsets/azure_sql/tools/analyze_database_performance.py +3 -3
  31. holmes/plugins/toolsets/azure_sql/tools/analyze_database_storage.py +7 -3
  32. holmes/plugins/toolsets/azure_sql/tools/get_active_alerts.py +4 -4
  33. holmes/plugins/toolsets/azure_sql/tools/get_slow_queries.py +7 -3
  34. holmes/plugins/toolsets/azure_sql/tools/get_top_cpu_queries.py +7 -3
  35. holmes/plugins/toolsets/azure_sql/tools/get_top_data_io_queries.py +7 -3
  36. holmes/plugins/toolsets/azure_sql/tools/get_top_log_io_queries.py +7 -3
  37. holmes/plugins/toolsets/bash/bash_toolset.py +6 -6
  38. holmes/plugins/toolsets/bash/common/bash.py +7 -7
  39. holmes/plugins/toolsets/coralogix/toolset_coralogix_logs.py +5 -3
  40. holmes/plugins/toolsets/datadog/datadog_api.py +490 -24
  41. holmes/plugins/toolsets/datadog/datadog_logs_instructions.jinja2 +21 -10
  42. holmes/plugins/toolsets/datadog/toolset_datadog_general.py +345 -207
  43. holmes/plugins/toolsets/datadog/toolset_datadog_logs.py +190 -19
  44. holmes/plugins/toolsets/datadog/toolset_datadog_metrics.py +96 -32
  45. holmes/plugins/toolsets/datadog/toolset_datadog_rds.py +10 -10
  46. holmes/plugins/toolsets/datadog/toolset_datadog_traces.py +21 -22
  47. holmes/plugins/toolsets/git.py +22 -22
  48. holmes/plugins/toolsets/grafana/common.py +14 -2
  49. holmes/plugins/toolsets/grafana/grafana_tempo_api.py +473 -0
  50. holmes/plugins/toolsets/grafana/toolset_grafana.py +4 -4
  51. holmes/plugins/toolsets/grafana/toolset_grafana_loki.py +5 -4
  52. holmes/plugins/toolsets/grafana/toolset_grafana_tempo.jinja2 +246 -11
  53. holmes/plugins/toolsets/grafana/toolset_grafana_tempo.py +662 -290
  54. holmes/plugins/toolsets/grafana/trace_parser.py +1 -1
  55. holmes/plugins/toolsets/internet/internet.py +3 -3
  56. holmes/plugins/toolsets/internet/notion.py +3 -3
  57. holmes/plugins/toolsets/investigator/core_investigation.py +3 -3
  58. holmes/plugins/toolsets/kafka.py +18 -18
  59. holmes/plugins/toolsets/kubernetes.yaml +58 -0
  60. holmes/plugins/toolsets/kubernetes_logs.py +6 -6
  61. holmes/plugins/toolsets/kubernetes_logs.yaml +32 -0
  62. holmes/plugins/toolsets/logging_utils/logging_api.py +1 -1
  63. holmes/plugins/toolsets/mcp/toolset_mcp.py +4 -4
  64. holmes/plugins/toolsets/newrelic.py +8 -8
  65. holmes/plugins/toolsets/opensearch/opensearch.py +5 -5
  66. holmes/plugins/toolsets/opensearch/opensearch_logs.py +7 -7
  67. holmes/plugins/toolsets/opensearch/opensearch_traces.py +10 -10
  68. holmes/plugins/toolsets/prometheus/prometheus.py +841 -351
  69. holmes/plugins/toolsets/prometheus/prometheus_instructions.jinja2 +39 -2
  70. holmes/plugins/toolsets/prometheus/utils.py +28 -0
  71. holmes/plugins/toolsets/rabbitmq/toolset_rabbitmq.py +6 -4
  72. holmes/plugins/toolsets/robusta/robusta.py +10 -10
  73. holmes/plugins/toolsets/runbook/runbook_fetcher.py +4 -4
  74. holmes/plugins/toolsets/servicenow/servicenow.py +6 -6
  75. holmes/plugins/toolsets/utils.py +88 -0
  76. holmes/utils/config_utils.py +91 -0
  77. holmes/utils/env.py +7 -0
  78. holmes/utils/holmes_status.py +2 -1
  79. holmes/utils/sentry_helper.py +41 -0
  80. holmes/utils/stream.py +9 -0
  81. {holmesgpt-0.13.3a0.dist-info → holmesgpt-0.14.1.dist-info}/METADATA +11 -15
  82. {holmesgpt-0.13.3a0.dist-info → holmesgpt-0.14.1.dist-info}/RECORD +85 -75
  83. holmes/plugins/toolsets/grafana/tempo_api.py +0 -124
  84. {holmesgpt-0.13.3a0.dist-info → holmesgpt-0.14.1.dist-info}/LICENSE.txt +0 -0
  85. {holmesgpt-0.13.3a0.dist-info → holmesgpt-0.14.1.dist-info}/WHEEL +0 -0
  86. {holmesgpt-0.13.3a0.dist-info → holmesgpt-0.14.1.dist-info}/entry_points.txt +0 -0
@@ -1,10 +1,9 @@
1
1
  import json
2
2
  import logging
3
3
  import os
4
- import re
5
4
  import time
6
5
  import dateutil.parser
7
- from typing import Any, Dict, List, Optional, Tuple, Type, Union
6
+ from typing import Any, Dict, Optional, Tuple, Type, Union
8
7
  from urllib.parse import urljoin
9
8
 
10
9
  import requests # type: ignore
@@ -17,11 +16,12 @@ from holmes.core.tools import (
17
16
  StructuredToolResult,
18
17
  Tool,
19
18
  ToolParameter,
20
- ToolResultStatus,
19
+ StructuredToolResultStatus,
21
20
  Toolset,
22
21
  ToolsetTag,
23
22
  )
24
23
  from holmes.plugins.toolsets.consts import STANDARD_END_DATETIME_TOOL_PARAM_DESCRIPTION
24
+ from holmes.plugins.toolsets.prometheus.utils import parse_duration_to_seconds
25
25
  from holmes.plugins.toolsets.service_discovery import PrometheusDiscovery
26
26
  from holmes.plugins.toolsets.utils import (
27
27
  get_param_or_raise,
@@ -38,23 +38,64 @@ from holmes.plugins.toolsets.logging_utils.logging_api import (
38
38
  from holmes.utils.keygen_utils import generate_random_key
39
39
 
40
40
  PROMETHEUS_RULES_CACHE_KEY = "cached_prometheus_rules"
41
+ PROMETHEUS_METADATA_API_LIMIT = 100 # Default limit for Prometheus metadata APIs (series, labels, metadata) to prevent overwhelming responses
42
+ # Default timeout values for PromQL queries
43
+ DEFAULT_QUERY_TIMEOUT_SECONDS = 20
44
+ MAX_QUERY_TIMEOUT_SECONDS = 180
45
+ # Default character limit for query responses to prevent token limit issues
46
+ DEFAULT_QUERY_RESPONSE_SIZE_LIMIT = 20000
47
+ # Default timeout for metadata API calls (discovery endpoints)
48
+ DEFAULT_METADATA_TIMEOUT_SECONDS = 20
49
+ MAX_METADATA_TIMEOUT_SECONDS = 60
50
+ # Default time window for metadata APIs (in hours)
51
+ DEFAULT_METADATA_TIME_WINDOW_HRS = 1
52
+ # Sample size for data summaries when results are too large
53
+ DATA_SUMMARY_SAMPLE_SIZE = 10
41
54
 
42
55
 
43
56
  class PrometheusConfig(BaseModel):
44
57
  # URL is optional because it can be set with an env var
45
58
  prometheus_url: Optional[str]
46
59
  healthcheck: str = "-/healthy"
47
- # Setting to None will remove the time window from the request for labels
48
- metrics_labels_time_window_hrs: Union[int, None] = 48
49
- # Setting to None will disable the cache
50
- metrics_labels_cache_duration_hrs: Union[int, None] = 12
51
- fetch_labels_with_labels_api: bool = False
52
- fetch_metadata_with_series_api: bool = False
60
+
61
+ # New config for default time window for metadata APIs
62
+ default_metadata_time_window_hrs: int = DEFAULT_METADATA_TIME_WINDOW_HRS # Default: only show metrics active in the last hour
63
+
64
+ # Query timeout configuration
65
+ default_query_timeout_seconds: int = (
66
+ DEFAULT_QUERY_TIMEOUT_SECONDS # Default timeout for PromQL queries
67
+ )
68
+ max_query_timeout_seconds: int = (
69
+ MAX_QUERY_TIMEOUT_SECONDS # Maximum allowed timeout for PromQL queries
70
+ )
71
+
72
+ # Metadata API timeout configuration
73
+ default_metadata_timeout_seconds: int = (
74
+ DEFAULT_METADATA_TIMEOUT_SECONDS # Default timeout for metadata/discovery APIs
75
+ )
76
+ max_metadata_timeout_seconds: int = (
77
+ MAX_METADATA_TIMEOUT_SECONDS # Maximum allowed timeout for metadata APIs
78
+ )
79
+
80
+ # DEPRECATED: These config values are deprecated and will be removed in a future version
81
+ # Using None as default so we can detect if user explicitly set them
82
+ metrics_labels_time_window_hrs: Optional[int] = (
83
+ None # DEPRECATED - use default_metadata_time_window_hrs instead
84
+ )
85
+ metrics_labels_cache_duration_hrs: Optional[int] = (
86
+ None # DEPRECATED - no longer used
87
+ )
88
+ fetch_labels_with_labels_api: Optional[bool] = None # DEPRECATED - no longer used
89
+ fetch_metadata_with_series_api: Optional[bool] = None # DEPRECATED - no longer used
90
+
53
91
  tool_calls_return_data: bool = True
54
92
  headers: Dict = Field(default_factory=dict)
55
- rules_cache_duration_seconds: Union[int, None] = 1800 # 30 minutes
93
+ rules_cache_duration_seconds: Optional[int] = 1800 # 30 minutes
56
94
  additional_labels: Optional[Dict[str, str]] = None
57
95
  prometheus_ssl_enabled: bool = True
96
+ query_response_size_limit: Optional[int] = (
97
+ DEFAULT_QUERY_RESPONSE_SIZE_LIMIT # Limit the max number of characters in a query result to proactively prevent token limit issues (roughly 5-6k tokens)
98
+ )
58
99
 
59
100
  @field_validator("prometheus_url")
60
101
  def ensure_trailing_slash(cls, v: Optional[str]) -> Optional[str]:
@@ -64,6 +105,26 @@ class PrometheusConfig(BaseModel):
64
105
 
65
106
  @model_validator(mode="after")
66
107
  def validate_prom_config(self):
108
+ # Check for deprecated config values and print warnings
109
+ deprecated_configs = []
110
+ if self.metrics_labels_time_window_hrs is not None: # Check if explicitly set
111
+ deprecated_configs.append(
112
+ "metrics_labels_time_window_hrs (use default_metadata_time_window_hrs instead)"
113
+ )
114
+ if (
115
+ self.metrics_labels_cache_duration_hrs is not None
116
+ ): # Check if explicitly set
117
+ deprecated_configs.append("metrics_labels_cache_duration_hrs")
118
+ if self.fetch_labels_with_labels_api is not None: # Check if explicitly set
119
+ deprecated_configs.append("fetch_labels_with_labels_api")
120
+ if self.fetch_metadata_with_series_api is not None: # Check if explicitly set
121
+ deprecated_configs.append("fetch_metadata_with_series_api")
122
+
123
+ if deprecated_configs:
124
+ logging.warning(
125
+ f"WARNING: The following Prometheus config values are deprecated and will be removed in a future version: "
126
+ f"{', '.join(deprecated_configs)}. These configs no longer affect behavior."
127
+ )
67
128
  # If openshift is enabled, and the user didn't configure auth headers, we will try to load the token from the service account.
68
129
  if IS_OPENSHIFT:
69
130
  if self.healthcheck == "-/healthy":
@@ -160,6 +221,8 @@ def do_request(
160
221
 
161
222
  if isinstance(config, AMPConfig):
162
223
  client = config.get_aws_client() # cached AWSPrometheusConnect
224
+ # Note: timeout parameter is not supported by prometrix's signed_request
225
+ # AWS/AMP requests will not respect the timeout setting
163
226
  return client.signed_request( # type: ignore
164
227
  method=method,
165
228
  url=url,
@@ -181,99 +244,6 @@ def do_request(
181
244
  )
182
245
 
183
246
 
184
- def filter_metrics_by_type(metrics: Dict, expected_type: str):
185
- return {
186
- metric_name: metric_data
187
- for metric_name, metric_data in metrics.items()
188
- if expected_type in metric_data.get("type", "")
189
- or metric_data.get("type", "") == "?"
190
- }
191
-
192
-
193
- def filter_metrics_by_name(metrics: Dict, pattern: str) -> Dict:
194
- regex = re.compile(pattern)
195
- return {
196
- metric_name: metric_data
197
- for metric_name, metric_data in metrics.items()
198
- if regex.search(metric_name)
199
- }
200
-
201
-
202
- METRICS_SUFFIXES_TO_STRIP = ["_bucket", "_count", "_sum"]
203
-
204
-
205
- def fetch_metadata(
206
- prometheus_url: str,
207
- headers: Optional[Dict],
208
- config,
209
- verify_ssl: bool = True,
210
- ) -> Dict:
211
- metadata_url = urljoin(prometheus_url, "api/v1/metadata")
212
- metadata_response = do_request(
213
- config=config,
214
- url=metadata_url,
215
- headers=headers,
216
- timeout=60,
217
- verify=verify_ssl,
218
- method="GET",
219
- )
220
- metadata_response.raise_for_status()
221
-
222
- metadata = metadata_response.json()["data"]
223
-
224
- metrics = {}
225
- for metric_name, meta_list in metadata.items():
226
- if meta_list:
227
- metric_type = meta_list[0].get("type", "unknown")
228
- metric_description = meta_list[0].get("help", "unknown")
229
- metrics[metric_name] = {
230
- "type": metric_type,
231
- "description": metric_description,
232
- "labels": set(),
233
- }
234
-
235
- return metrics
236
-
237
-
238
- def fetch_metadata_with_series_api(
239
- prometheus_url: str,
240
- metric_name: str,
241
- headers: Dict,
242
- config,
243
- verify_ssl: bool = True,
244
- ) -> Dict:
245
- url = urljoin(prometheus_url, "api/v1/series")
246
- params: Dict = {"match[]": f'{{__name__=~".*{metric_name}.*"}}', "limit": "10000"}
247
-
248
- response = do_request(
249
- config=config,
250
- url=url,
251
- headers=headers,
252
- params=params,
253
- timeout=60,
254
- verify=verify_ssl,
255
- method="GET",
256
- )
257
- response.raise_for_status()
258
- metrics = response.json()["data"]
259
-
260
- metadata: Dict = {}
261
- for metric_data in metrics:
262
- metric_name = metric_data.get("__name__")
263
- if not metric_name:
264
- continue
265
-
266
- metric = metadata.get(metric_name)
267
- if not metric:
268
- metric = {"description": "?", "type": "?", "labels": set()}
269
- metadata[metric_name] = metric
270
-
271
- labels = {k for k in metric_data.keys() if k != "__name__"}
272
- metric["labels"].update(labels)
273
-
274
- return metadata
275
-
276
-
277
247
  def result_has_data(result: Dict) -> bool:
278
248
  data = result.get("data", {})
279
249
  if len(data.get("result", [])) > 0:
@@ -284,33 +254,58 @@ def result_has_data(result: Dict) -> bool:
284
254
  def adjust_step_for_max_points(
285
255
  start_timestamp: str,
286
256
  end_timestamp: str,
287
- step: float,
257
+ step: Optional[float] = None,
258
+ max_points_override: Optional[float] = None,
288
259
  ) -> float:
289
260
  """
290
261
  Adjusts the step parameter to ensure the number of data points doesn't exceed max_points.
291
- Max points is controlled by the PROMETHEUS_MAX_GRAPH_POINTS environment variable (default: 300).
292
262
 
293
263
  Args:
294
264
  start_timestamp: RFC3339 formatted start time
295
265
  end_timestamp: RFC3339 formatted end time
296
- step: The requested step duration in seconds
266
+ step: The requested step duration in seconds (None for auto-calculation)
267
+ max_points_override: Optional override for max points (must be <= MAX_GRAPH_POINTS)
297
268
 
298
269
  Returns:
299
270
  Adjusted step value in seconds that ensures points <= max_points
300
271
  """
272
+ # Use override if provided and valid, otherwise use default
273
+ max_points = MAX_GRAPH_POINTS
274
+ if max_points_override is not None:
275
+ if max_points_override > MAX_GRAPH_POINTS:
276
+ logging.warning(
277
+ f"max_points override ({max_points_override}) exceeds system limit ({MAX_GRAPH_POINTS}), using {MAX_GRAPH_POINTS}"
278
+ )
279
+ max_points = MAX_GRAPH_POINTS
280
+ elif max_points_override < 1:
281
+ logging.warning(
282
+ f"max_points override ({max_points_override}) is invalid, using default {MAX_GRAPH_POINTS}"
283
+ )
284
+ max_points = MAX_GRAPH_POINTS
285
+ else:
286
+ max_points = max_points_override
287
+ logging.debug(f"Using max_points override: {max_points}")
301
288
 
302
289
  start_dt = dateutil.parser.parse(start_timestamp)
303
290
  end_dt = dateutil.parser.parse(end_timestamp)
304
291
 
305
292
  time_range_seconds = (end_dt - start_dt).total_seconds()
306
293
 
294
+ # If no step provided, calculate a reasonable default
295
+ # Aim for ~60 data points across the time range (1 per minute for hourly, etc)
296
+ if step is None:
297
+ step = max(1, time_range_seconds / 60)
298
+ logging.debug(
299
+ f"No step provided, defaulting to {step}s for {time_range_seconds}s range"
300
+ )
301
+
307
302
  current_points = time_range_seconds / step
308
303
 
309
304
  # If current points exceed max, adjust the step
310
- if current_points > MAX_GRAPH_POINTS:
311
- adjusted_step = time_range_seconds / MAX_GRAPH_POINTS
305
+ if current_points > max_points:
306
+ adjusted_step = time_range_seconds / max_points
312
307
  logging.info(
313
- f"Adjusting step from {step}s to {adjusted_step}s to limit points from {current_points:.0f} to {MAX_GRAPH_POINTS}"
308
+ f"Adjusting step from {step}s to {adjusted_step}s to limit points from {current_points:.0f} to {max_points}"
314
309
  )
315
310
  return adjusted_step
316
311
 
@@ -324,168 +319,97 @@ def add_prometheus_auth(prometheus_auth_header: Optional[str]) -> Dict[str, Any]
324
319
  return results
325
320
 
326
321
 
327
- def fetch_metrics_labels_with_series_api(
328
- prometheus_url: str,
329
- headers: Dict[str, str],
330
- cache: Optional[TTLCache],
331
- metrics_labels_time_window_hrs: Union[int, None],
332
- metric_name: str,
333
- config=None,
334
- verify_ssl: bool = True,
335
- ) -> dict:
336
- """This is a slow query. Takes 5+ seconds to run"""
337
- cache_key = f"metrics_labels_series_api:{metric_name}"
338
- if cache:
339
- cached_result = cache.get(cache_key)
340
- if cached_result:
341
- return cached_result
342
-
343
- series_url = urljoin(prometheus_url, "api/v1/series")
344
- params: dict = {"match[]": f'{{__name__=~".*{metric_name}.*"}}', "limit": "10000"}
345
-
346
- if metrics_labels_time_window_hrs is not None:
347
- params["end"] = int(time.time())
348
- params["start"] = params["end"] - (metrics_labels_time_window_hrs * 60 * 60)
349
-
350
- series_response = do_request(
351
- config=config,
352
- url=series_url,
353
- headers=headers,
354
- params=params,
355
- timeout=60,
356
- verify=verify_ssl,
357
- method="GET",
358
- )
359
- series_response.raise_for_status()
360
- series = series_response.json()["data"]
361
-
362
- metrics_labels: dict = {}
363
- for serie in series:
364
- metric_name = serie["__name__"]
365
- # Add all labels except __name__
366
- labels = {k for k in serie.keys() if k != "__name__"}
367
- if metric_name in metrics_labels:
368
- metrics_labels[metric_name].update(labels)
369
- else:
370
- metrics_labels[metric_name] = labels
371
- if cache:
372
- cache.set(cache_key, metrics_labels)
373
-
374
- return metrics_labels
375
-
376
-
377
- def fetch_metrics_labels_with_labels_api(
378
- prometheus_url: str,
379
- cache: Optional[TTLCache],
380
- metrics_labels_time_window_hrs: Union[int, None],
381
- metric_names: List[str],
382
- headers: Dict,
383
- config=None,
384
- verify_ssl: bool = True,
385
- ) -> dict:
386
- metrics_labels = {}
387
-
388
- for metric_name in metric_names:
389
- cache_key = f"metrics_labels_labels_api:{metric_name}"
390
- if cache:
391
- cached_result = cache.get(cache_key)
392
- if cached_result:
393
- metrics_labels[metric_name] = cached_result
394
-
395
- url = urljoin(prometheus_url, "api/v1/labels")
396
- params: dict = {
397
- "match[]": f'{{__name__="{metric_name}"}}',
398
- }
399
- if metrics_labels_time_window_hrs is not None:
400
- params["end"] = int(time.time())
401
- params["start"] = params["end"] - (metrics_labels_time_window_hrs * 60 * 60)
322
+ def create_data_summary_for_large_result(
323
+ result_data: Dict, query: str, data_size_chars: int, is_range_query: bool = False
324
+ ) -> Dict[str, Any]:
325
+ """
326
+ Create a summary for large Prometheus results instead of returning full data.
402
327
 
403
- response = do_request(
404
- config=config,
405
- url=url,
406
- headers=headers,
407
- params=params,
408
- timeout=60,
409
- verify=verify_ssl,
410
- method="GET",
411
- )
412
- response.raise_for_status()
413
- labels = response.json()["data"]
414
- filtered_labels = {label for label in labels if label != "__name__"}
415
- metrics_labels[metric_name] = filtered_labels
416
-
417
- if cache:
418
- cache.set(cache_key, filtered_labels)
419
-
420
- return metrics_labels
421
-
422
-
423
- def fetch_metrics(
424
- prometheus_url: str,
425
- cache: Optional[TTLCache],
426
- metrics_labels_time_window_hrs: Union[int, None],
427
- metric_name: str,
428
- should_fetch_labels_with_labels_api: bool,
429
- should_fetch_metadata_with_series_api: bool,
430
- headers: Dict,
431
- config=None,
432
- verify_ssl: bool = True,
433
- ) -> dict:
434
- metrics = None
435
- should_fetch_labels = True
436
- if should_fetch_metadata_with_series_api:
437
- metrics = fetch_metadata_with_series_api(
438
- prometheus_url=prometheus_url,
439
- metric_name=metric_name,
440
- headers=headers,
441
- config=config,
442
- verify_ssl=verify_ssl,
328
+ Args:
329
+ result_data: The Prometheus data result
330
+ query: The original PromQL query
331
+ data_size_chars: Size of the data in characters
332
+ is_range_query: Whether this is a range query (vs instant query)
333
+
334
+ Returns:
335
+ Dictionary with summary information and suggestions
336
+ """
337
+ if is_range_query:
338
+ series_list = result_data.get("result", [])
339
+ num_items = len(series_list)
340
+
341
+ # Calculate exact total data points across all series
342
+ total_points = 0
343
+ for series in series_list: # Iterate through ALL series for exact count
344
+ points = len(series.get("values", []))
345
+ total_points += points
346
+
347
+ # Analyze label keys and their cardinality
348
+ label_cardinality: Dict[str, set] = {}
349
+ for series in series_list:
350
+ metric = series.get("metric", {})
351
+ for label_key, label_value in metric.items():
352
+ if label_key not in label_cardinality:
353
+ label_cardinality[label_key] = set()
354
+ label_cardinality[label_key].add(label_value)
355
+
356
+ # Convert sets to counts for the summary
357
+ label_summary = {
358
+ label: len(values) for label, values in label_cardinality.items()
359
+ }
360
+ # Sort by cardinality (highest first) for better insights
361
+ label_summary = dict(
362
+ sorted(label_summary.items(), key=lambda x: x[1], reverse=True)
443
363
  )
444
- should_fetch_labels = False # series API returns the labels
364
+
365
+ return {
366
+ "message": f"Data too large to return ({data_size_chars:,} characters). Query returned {num_items} time series with {total_points:,} total data points.",
367
+ "series_count": num_items,
368
+ "total_data_points": total_points,
369
+ "data_size_characters": data_size_chars,
370
+ "label_cardinality": label_summary,
371
+ "suggestion": f'Consider using topk({min(5, num_items)}, {query}) to limit results to the top {min(5, num_items)} series. To also capture remaining data as \'other\': topk({min(5, num_items)}, {query}) or label_replace((sum({query}) - sum(topk({min(5, num_items)}, {query}))), "pod", "other", "", "")',
372
+ }
445
373
  else:
446
- metrics = fetch_metadata(
447
- prometheus_url=prometheus_url,
448
- headers=headers,
449
- config=config,
450
- verify_ssl=verify_ssl,
374
+ # Instant query
375
+ result_type = result_data.get("resultType", "")
376
+ result_list = result_data.get("result", [])
377
+ num_items = len(result_list)
378
+
379
+ # Analyze label keys and their cardinality
380
+ instant_label_cardinality: Dict[str, set] = {}
381
+ for item in result_list:
382
+ if isinstance(item, dict):
383
+ metric = item.get("metric", {})
384
+ for label_key, label_value in metric.items():
385
+ if label_key not in instant_label_cardinality:
386
+ instant_label_cardinality[label_key] = set()
387
+ instant_label_cardinality[label_key].add(label_value)
388
+
389
+ # Convert sets to counts for the summary
390
+ label_summary = {
391
+ label: len(values) for label, values in instant_label_cardinality.items()
392
+ }
393
+ # Sort by cardinality (highest first) for better insights
394
+ label_summary = dict(
395
+ sorted(label_summary.items(), key=lambda x: x[1], reverse=True)
451
396
  )
452
- metrics = filter_metrics_by_name(metrics, metric_name)
453
-
454
- if should_fetch_labels:
455
- metrics_labels = {}
456
- if should_fetch_labels_with_labels_api:
457
- metrics_labels = fetch_metrics_labels_with_labels_api(
458
- prometheus_url=prometheus_url,
459
- cache=cache,
460
- metrics_labels_time_window_hrs=metrics_labels_time_window_hrs,
461
- metric_names=list(metrics.keys()),
462
- headers=headers,
463
- config=config,
464
- verify_ssl=verify_ssl,
465
- )
466
- else:
467
- metrics_labels = fetch_metrics_labels_with_series_api(
468
- prometheus_url=prometheus_url,
469
- cache=cache,
470
- metrics_labels_time_window_hrs=metrics_labels_time_window_hrs,
471
- metric_name=metric_name,
472
- headers=headers,
473
- config=config,
474
- verify_ssl=verify_ssl,
475
- )
476
-
477
- for metric_name in metrics:
478
- if metric_name in metrics_labels:
479
- metrics[metric_name]["labels"] = metrics_labels[metric_name]
480
397
 
481
- return metrics
398
+ return {
399
+ "message": f"Data too large to return ({data_size_chars:,} characters). Query returned {num_items} results.",
400
+ "result_count": num_items,
401
+ "result_type": result_type,
402
+ "data_size_characters": data_size_chars,
403
+ "label_cardinality": label_summary,
404
+ "suggestion": f'Consider using topk({min(5, num_items)}, {query}) to limit results. To also capture remaining data as \'other\': topk({min(5, num_items)}, {query}) or label_replace((sum({query}) - sum(topk({min(5, num_items)}, {query}))), "instance", "other", "", "")',
405
+ }
482
406
 
483
407
 
484
408
  class ListPrometheusRules(BasePrometheusTool):
485
409
  def __init__(self, toolset: "PrometheusToolset"):
486
410
  super().__init__(
487
411
  name="list_prometheus_rules",
488
- description="List all defined prometheus rules. Will show the prometheus rules description, expression and annotations",
412
+ description="List all defined Prometheus rules (api/v1/rules). Will show the Prometheus rules description, expression and annotations",
489
413
  parameters={},
490
414
  toolset=toolset,
491
415
  )
@@ -496,13 +420,13 @@ class ListPrometheusRules(BasePrometheusTool):
496
420
  ) -> StructuredToolResult:
497
421
  if not self.toolset.config or not self.toolset.config.prometheus_url:
498
422
  return StructuredToolResult(
499
- status=ToolResultStatus.ERROR,
423
+ status=StructuredToolResultStatus.ERROR,
500
424
  error="Prometheus is not configured. Prometheus URL is missing",
501
425
  params=params,
502
426
  )
503
427
  if self.toolset.config.is_amp():
504
428
  return StructuredToolResult(
505
- status=ToolResultStatus.ERROR,
429
+ status=StructuredToolResultStatus.ERROR,
506
430
  error="Tool not supported in AMP",
507
431
  params=params,
508
432
  )
@@ -515,7 +439,7 @@ class ListPrometheusRules(BasePrometheusTool):
515
439
  logging.debug("rules returned from cache")
516
440
 
517
441
  return StructuredToolResult(
518
- status=ToolResultStatus.SUCCESS,
442
+ status=StructuredToolResultStatus.SUCCESS,
519
443
  data=cached_rules,
520
444
  params=params,
521
445
  )
@@ -528,7 +452,7 @@ class ListPrometheusRules(BasePrometheusTool):
528
452
  config=self.toolset.config,
529
453
  url=rules_url,
530
454
  params=params,
531
- timeout=180,
455
+ timeout=40,
532
456
  verify=self.toolset.config.prometheus_ssl_enabled,
533
457
  headers=self.toolset.config.headers,
534
458
  method="GET",
@@ -539,28 +463,28 @@ class ListPrometheusRules(BasePrometheusTool):
539
463
  if self._cache:
540
464
  self._cache.set(PROMETHEUS_RULES_CACHE_KEY, data)
541
465
  return StructuredToolResult(
542
- status=ToolResultStatus.SUCCESS,
466
+ status=StructuredToolResultStatus.SUCCESS,
543
467
  data=data,
544
468
  params=params,
545
469
  )
546
470
  except requests.Timeout:
547
471
  logging.warning("Timeout while fetching prometheus rules", exc_info=True)
548
472
  return StructuredToolResult(
549
- status=ToolResultStatus.ERROR,
473
+ status=StructuredToolResultStatus.ERROR,
550
474
  error="Request timed out while fetching rules",
551
475
  params=params,
552
476
  )
553
477
  except RequestException as e:
554
478
  logging.warning("Failed to fetch prometheus rules", exc_info=True)
555
479
  return StructuredToolResult(
556
- status=ToolResultStatus.ERROR,
480
+ status=StructuredToolResultStatus.ERROR,
557
481
  error=f"Network error while fetching rules: {str(e)}",
558
482
  params=params,
559
483
  )
560
484
  except Exception as e:
561
485
  logging.warning("Failed to process prometheus rules", exc_info=True)
562
486
  return StructuredToolResult(
563
- status=ToolResultStatus.ERROR,
487
+ status=StructuredToolResultStatus.ERROR,
564
488
  error=f"Unexpected error: {str(e)}",
565
489
  params=params,
566
490
  )
@@ -569,120 +493,563 @@ class ListPrometheusRules(BasePrometheusTool):
569
493
  return f"{toolset_name_for_one_liner(self.toolset.name)}: Fetch Rules"
570
494
 
571
495
 
572
- class ListAvailableMetrics(BasePrometheusTool):
496
+ class GetMetricNames(BasePrometheusTool):
497
+ """Thin wrapper around /api/v1/label/__name__/values - the fastest way to discover metric names"""
498
+
573
499
  def __init__(self, toolset: "PrometheusToolset"):
574
500
  super().__init__(
575
- name="list_available_metrics",
576
- description="List all the available metrics to query from prometheus, including their types (counter, gauge, histogram, summary) and available labels.",
501
+ name="get_metric_names",
502
+ description=(
503
+ "Get list of metric names using /api/v1/label/__name__/values. "
504
+ "FASTEST method for metric discovery when you need to explore available metrics. "
505
+ f"Returns up to {PROMETHEUS_METADATA_API_LIMIT} unique metric names (limit={PROMETHEUS_METADATA_API_LIMIT}). If {PROMETHEUS_METADATA_API_LIMIT} results returned, more may exist - use a more specific filter. "
506
+ f"ALWAYS use match[] parameter to filter metrics - without it you'll get random {PROMETHEUS_METADATA_API_LIMIT} metrics which is rarely useful. "
507
+ "Note: Does not return metric metadata (type, description, labels). "
508
+ "By default returns metrics active in the last 1 hour (configurable via default_metadata_time_window_hrs)."
509
+ ),
577
510
  parameters={
578
- "type_filter": ToolParameter(
579
- description="Optional filter to only return a specific metric type. Can be one of counter, gauge, histogram, summary",
511
+ "match": ToolParameter(
512
+ description=(
513
+ "REQUIRED: PromQL selector to filter metrics. Use regex OR (|) to check multiple patterns in one call - much faster than multiple calls! Examples: "
514
+ "'{__name__=~\"node_cpu.*|node_memory.*|node_disk.*\"}' for all node resource metrics, "
515
+ "'{__name__=~\"container_cpu.*|container_memory.*|container_network.*\"}' for all container metrics, "
516
+ "'{__name__=~\"kube_pod.*|kube_deployment.*|kube_service.*\"}' for multiple Kubernetes object metrics, "
517
+ "'{__name__=~\".*cpu.*|.*memory.*|.*disk.*\"}' for all resource metrics, "
518
+ "'{namespace=~\"kube-system|default|monitoring\"}' for metrics from multiple namespaces, "
519
+ "'{job=~\"prometheus|node-exporter|kube-state-metrics\"}' for metrics from multiple jobs."
520
+ ),
521
+ type="string",
522
+ required=True,
523
+ ),
524
+ "start": ToolParameter(
525
+ description="Start timestamp (RFC3339 or Unix). Default: 1 hour ago",
580
526
  type="string",
581
527
  required=False,
582
528
  ),
583
- "name_filter": ToolParameter(
584
- description="Only the metrics partially or fully matching this name will be returned",
529
+ "end": ToolParameter(
530
+ description="End timestamp (RFC3339 or Unix). Default: now",
585
531
  type="string",
586
- required=True,
532
+ required=False,
587
533
  ),
588
534
  },
589
535
  toolset=toolset,
590
536
  )
591
- self._cache = None
592
537
 
593
538
  def _invoke(
594
539
  self, params: dict, user_approved: bool = False
595
540
  ) -> StructuredToolResult:
596
541
  if not self.toolset.config or not self.toolset.config.prometheus_url:
597
542
  return StructuredToolResult(
598
- status=ToolResultStatus.ERROR,
543
+ status=StructuredToolResultStatus.ERROR,
599
544
  error="Prometheus is not configured. Prometheus URL is missing",
600
545
  params=params,
601
546
  )
602
- if not self._cache and self.toolset.config.metrics_labels_cache_duration_hrs:
603
- self._cache = TTLCache(
604
- self.toolset.config.metrics_labels_cache_duration_hrs * 3600 # type: ignore
605
- )
606
547
  try:
607
- prometheus_url = self.toolset.config.prometheus_url
608
- metrics_labels_time_window_hrs = (
609
- self.toolset.config.metrics_labels_time_window_hrs
548
+ match_param = params.get("match")
549
+ if not match_param:
550
+ return StructuredToolResult(
551
+ status=StructuredToolResultStatus.ERROR,
552
+ error="Match parameter is required to filter metrics",
553
+ params=params,
554
+ )
555
+
556
+ url = urljoin(
557
+ self.toolset.config.prometheus_url, "api/v1/label/__name__/values"
610
558
  )
559
+ query_params = {
560
+ "limit": str(PROMETHEUS_METADATA_API_LIMIT),
561
+ "match[]": match_param,
562
+ }
563
+
564
+ # Add time parameters - use provided values or defaults
565
+ if params.get("end"):
566
+ query_params["end"] = params["end"]
567
+ else:
568
+ query_params["end"] = str(int(time.time()))
569
+
570
+ if params.get("start"):
571
+ query_params["start"] = params["start"]
572
+ elif self.toolset.config.default_metadata_time_window_hrs:
573
+ # Use default time window
574
+ query_params["start"] = str(
575
+ int(time.time())
576
+ - (self.toolset.config.default_metadata_time_window_hrs * 3600)
577
+ )
578
+
579
+ response = do_request(
580
+ config=self.toolset.config,
581
+ url=url,
582
+ params=query_params,
583
+ timeout=self.toolset.config.default_metadata_timeout_seconds,
584
+ verify=self.toolset.config.prometheus_ssl_enabled,
585
+ headers=self.toolset.config.headers,
586
+ method="GET",
587
+ )
588
+ response.raise_for_status()
589
+ data = response.json()
590
+
591
+ # Check if results were truncated
592
+ if (
593
+ "data" in data
594
+ and isinstance(data["data"], list)
595
+ and len(data["data"]) == PROMETHEUS_METADATA_API_LIMIT
596
+ ):
597
+ data["_truncated"] = True
598
+ data["_message"] = (
599
+ f"Results truncated at limit={PROMETHEUS_METADATA_API_LIMIT}. Use a more specific match filter to see additional metrics."
600
+ )
601
+
602
+ return StructuredToolResult(
603
+ status=StructuredToolResultStatus.SUCCESS,
604
+ data=data,
605
+ params=params,
606
+ )
607
+ except Exception as e:
608
+ return StructuredToolResult(
609
+ status=StructuredToolResultStatus.ERROR,
610
+ error=str(e),
611
+ params=params,
612
+ )
613
+
614
+ def get_parameterized_one_liner(self, params) -> str:
615
+ return f"{toolset_name_for_one_liner(self.toolset.name)}: Get Metric Names"
616
+
617
+
618
+ class GetLabelValues(BasePrometheusTool):
619
+ """Get values for a specific label across all metrics"""
620
+
621
+ def __init__(self, toolset: "PrometheusToolset"):
622
+ super().__init__(
623
+ name="get_label_values",
624
+ description=(
625
+ "Get all values for a specific label using /api/v1/label/{label}/values. "
626
+ "Use this to discover pods, namespaces, jobs, instances, etc. "
627
+ f"Returns up to {PROMETHEUS_METADATA_API_LIMIT} unique values (limit={PROMETHEUS_METADATA_API_LIMIT}). If {PROMETHEUS_METADATA_API_LIMIT} results returned, more may exist - use match[] to filter. "
628
+ "Supports optional match[] parameter to filter. "
629
+ "By default returns values from metrics active in the last 1 hour (configurable via default_metadata_time_window_hrs)."
630
+ ),
631
+ parameters={
632
+ "label": ToolParameter(
633
+ description="Label name to get values for (e.g., 'pod', 'namespace', 'job', 'instance')",
634
+ type="string",
635
+ required=True,
636
+ ),
637
+ "match": ToolParameter(
638
+ description=(
639
+ "Optional PromQL selector to filter (e.g., '{__name__=~\"kube.*\"}', "
640
+ "'{namespace=\"default\"}')."
641
+ ),
642
+ type="string",
643
+ required=False,
644
+ ),
645
+ "start": ToolParameter(
646
+ description="Start timestamp (RFC3339 or Unix). Default: 1 hour ago",
647
+ type="string",
648
+ required=False,
649
+ ),
650
+ "end": ToolParameter(
651
+ description="End timestamp (RFC3339 or Unix). Default: now",
652
+ type="string",
653
+ required=False,
654
+ ),
655
+ },
656
+ toolset=toolset,
657
+ )
611
658
 
612
- name_filter = params.get("name_filter")
613
- if not name_filter:
659
+ def _invoke(
660
+ self, params: dict, user_approved: bool = False
661
+ ) -> StructuredToolResult:
662
+ if not self.toolset.config or not self.toolset.config.prometheus_url:
663
+ return StructuredToolResult(
664
+ status=StructuredToolResultStatus.ERROR,
665
+ error="Prometheus is not configured. Prometheus URL is missing",
666
+ params=params,
667
+ )
668
+ try:
669
+ label = params.get("label")
670
+ if not label:
614
671
  return StructuredToolResult(
615
- status=ToolResultStatus.ERROR,
616
- error="Error: cannot run tool 'list_available_metrics'. The param 'name_filter' is required but is missing.",
672
+ status=StructuredToolResultStatus.ERROR,
673
+ error="Label parameter is required",
617
674
  params=params,
618
675
  )
619
676
 
620
- metrics = fetch_metrics(
621
- prometheus_url=prometheus_url,
622
- cache=self._cache,
623
- metrics_labels_time_window_hrs=metrics_labels_time_window_hrs,
624
- metric_name=name_filter,
625
- should_fetch_labels_with_labels_api=self.toolset.config.fetch_labels_with_labels_api,
626
- should_fetch_metadata_with_series_api=self.toolset.config.fetch_metadata_with_series_api,
677
+ url = urljoin(
678
+ self.toolset.config.prometheus_url, f"api/v1/label/{label}/values"
679
+ )
680
+ query_params = {"limit": str(PROMETHEUS_METADATA_API_LIMIT)}
681
+ if params.get("match"):
682
+ query_params["match[]"] = params["match"]
683
+
684
+ # Add time parameters - use provided values or defaults
685
+ if params.get("end"):
686
+ query_params["end"] = params["end"]
687
+ else:
688
+ query_params["end"] = str(int(time.time()))
689
+
690
+ if params.get("start"):
691
+ query_params["start"] = params["start"]
692
+ elif self.toolset.config.default_metadata_time_window_hrs:
693
+ # Use default time window
694
+ query_params["start"] = str(
695
+ int(time.time())
696
+ - (self.toolset.config.default_metadata_time_window_hrs * 3600)
697
+ )
698
+
699
+ response = do_request(
700
+ config=self.toolset.config,
701
+ url=url,
702
+ params=query_params,
703
+ timeout=self.toolset.config.default_metadata_timeout_seconds,
704
+ verify=self.toolset.config.prometheus_ssl_enabled,
627
705
  headers=self.toolset.config.headers,
706
+ method="GET",
707
+ )
708
+ response.raise_for_status()
709
+ data = response.json()
710
+
711
+ # Check if results were truncated
712
+ if (
713
+ "data" in data
714
+ and isinstance(data["data"], list)
715
+ and len(data["data"]) == PROMETHEUS_METADATA_API_LIMIT
716
+ ):
717
+ data["_truncated"] = True
718
+ data["_message"] = (
719
+ f"Results truncated at limit={PROMETHEUS_METADATA_API_LIMIT}. Use match[] parameter to filter label '{label}' values."
720
+ )
721
+
722
+ return StructuredToolResult(
723
+ status=StructuredToolResultStatus.SUCCESS,
724
+ data=data,
725
+ params=params,
726
+ )
727
+ except Exception as e:
728
+ return StructuredToolResult(
729
+ status=StructuredToolResultStatus.ERROR,
730
+ error=str(e),
731
+ params=params,
732
+ )
733
+
734
+ def get_parameterized_one_liner(self, params) -> str:
735
+ label = params.get("label", "")
736
+ return f"{toolset_name_for_one_liner(self.toolset.name)}: Get {label} Values"
737
+
738
+
739
+ class GetAllLabels(BasePrometheusTool):
740
+ """Get all label names that exist in Prometheus"""
741
+
742
+ def __init__(self, toolset: "PrometheusToolset"):
743
+ super().__init__(
744
+ name="get_all_labels",
745
+ description=(
746
+ "Get list of all label names using /api/v1/labels. "
747
+ "Use this to discover what labels are available across all metrics. "
748
+ f"Returns up to {PROMETHEUS_METADATA_API_LIMIT} label names (limit={PROMETHEUS_METADATA_API_LIMIT}). If {PROMETHEUS_METADATA_API_LIMIT} results returned, more may exist - use match[] to filter. "
749
+ "Supports optional match[] parameter to filter. "
750
+ "By default returns labels from metrics active in the last 1 hour (configurable via default_metadata_time_window_hrs)."
751
+ ),
752
+ parameters={
753
+ "match": ToolParameter(
754
+ description=(
755
+ "Optional PromQL selector to filter (e.g., '{__name__=~\"kube.*\"}', "
756
+ "'{job=\"prometheus\"}')."
757
+ ),
758
+ type="string",
759
+ required=False,
760
+ ),
761
+ "start": ToolParameter(
762
+ description="Start timestamp (RFC3339 or Unix). Default: 1 hour ago",
763
+ type="string",
764
+ required=False,
765
+ ),
766
+ "end": ToolParameter(
767
+ description="End timestamp (RFC3339 or Unix). Default: now",
768
+ type="string",
769
+ required=False,
770
+ ),
771
+ },
772
+ toolset=toolset,
773
+ )
774
+
775
+ def _invoke(
776
+ self, params: dict, user_approved: bool = False
777
+ ) -> StructuredToolResult:
778
+ if not self.toolset.config or not self.toolset.config.prometheus_url:
779
+ return StructuredToolResult(
780
+ status=StructuredToolResultStatus.ERROR,
781
+ error="Prometheus is not configured. Prometheus URL is missing",
782
+ params=params,
783
+ )
784
+ try:
785
+ url = urljoin(self.toolset.config.prometheus_url, "api/v1/labels")
786
+ query_params = {"limit": str(PROMETHEUS_METADATA_API_LIMIT)}
787
+ if params.get("match"):
788
+ query_params["match[]"] = params["match"]
789
+
790
+ # Add time parameters - use provided values or defaults
791
+ if params.get("end"):
792
+ query_params["end"] = params["end"]
793
+ else:
794
+ query_params["end"] = str(int(time.time()))
795
+
796
+ if params.get("start"):
797
+ query_params["start"] = params["start"]
798
+ elif self.toolset.config.default_metadata_time_window_hrs:
799
+ # Use default time window
800
+ query_params["start"] = str(
801
+ int(time.time())
802
+ - (self.toolset.config.default_metadata_time_window_hrs * 3600)
803
+ )
804
+
805
+ response = do_request(
628
806
  config=self.toolset.config,
629
- verify_ssl=self.toolset.config.prometheus_ssl_enabled,
807
+ url=url,
808
+ params=query_params,
809
+ timeout=self.toolset.config.default_metadata_timeout_seconds,
810
+ verify=self.toolset.config.prometheus_ssl_enabled,
811
+ headers=self.toolset.config.headers,
812
+ method="GET",
630
813
  )
814
+ response.raise_for_status()
815
+ data = response.json()
816
+
817
+ # Check if results were truncated
818
+ if (
819
+ "data" in data
820
+ and isinstance(data["data"], list)
821
+ and len(data["data"]) == PROMETHEUS_METADATA_API_LIMIT
822
+ ):
823
+ data["_truncated"] = True
824
+ data["_message"] = (
825
+ f"Results truncated at limit={PROMETHEUS_METADATA_API_LIMIT}. Use match[] parameter to filter labels."
826
+ )
631
827
 
632
- type_filter = params.get("type_filter")
633
- if type_filter:
634
- metrics = filter_metrics_by_type(metrics, type_filter)
828
+ return StructuredToolResult(
829
+ status=StructuredToolResultStatus.SUCCESS,
830
+ data=data,
831
+ params=params,
832
+ )
833
+ except Exception as e:
834
+ return StructuredToolResult(
835
+ status=StructuredToolResultStatus.ERROR,
836
+ error=str(e),
837
+ params=params,
838
+ )
635
839
 
636
- output = ["Metric | Description | Type | Labels"]
637
- output.append("-" * 100)
840
+ def get_parameterized_one_liner(self, params) -> str:
841
+ return f"{toolset_name_for_one_liner(self.toolset.name)}: Get All Labels"
842
+
843
+
844
+ class GetSeries(BasePrometheusTool):
845
+ """Get time series matching a selector"""
638
846
 
639
- for metric, info in sorted(metrics.items()):
640
- labels_str = (
641
- ", ".join(sorted(info["labels"])) if info["labels"] else "none"
847
+ def __init__(self, toolset: "PrometheusToolset"):
848
+ super().__init__(
849
+ name="get_series",
850
+ description=(
851
+ "Get time series using /api/v1/series. "
852
+ "Returns label sets for all time series matching the selector. "
853
+ "SLOWER than other discovery methods - use only when you need full label sets. "
854
+ f"Returns up to {PROMETHEUS_METADATA_API_LIMIT} series (limit={PROMETHEUS_METADATA_API_LIMIT}). If {PROMETHEUS_METADATA_API_LIMIT} results returned, more series exist - use more specific selector. "
855
+ "Requires match[] parameter with PromQL selector. "
856
+ "By default returns series active in the last 1 hour (configurable via default_metadata_time_window_hrs)."
857
+ ),
858
+ parameters={
859
+ "match": ToolParameter(
860
+ description=(
861
+ "PromQL selector to match series (e.g., 'up', 'node_cpu_seconds_total', "
862
+ "'{__name__=~\"node.*\"}', '{job=\"prometheus\"}', "
863
+ '\'{__name__="up",job="prometheus"}\').'
864
+ ),
865
+ type="string",
866
+ required=True,
867
+ ),
868
+ "start": ToolParameter(
869
+ description="Start timestamp (RFC3339 or Unix). Default: 1 hour ago",
870
+ type="string",
871
+ required=False,
872
+ ),
873
+ "end": ToolParameter(
874
+ description="End timestamp (RFC3339 or Unix). Default: now",
875
+ type="string",
876
+ required=False,
877
+ ),
878
+ },
879
+ toolset=toolset,
880
+ )
881
+
882
+ def _invoke(
883
+ self, params: dict, user_approved: bool = False
884
+ ) -> StructuredToolResult:
885
+ if not self.toolset.config or not self.toolset.config.prometheus_url:
886
+ return StructuredToolResult(
887
+ status=StructuredToolResultStatus.ERROR,
888
+ error="Prometheus is not configured. Prometheus URL is missing",
889
+ params=params,
890
+ )
891
+ try:
892
+ match = params.get("match")
893
+ if not match:
894
+ return StructuredToolResult(
895
+ status=StructuredToolResultStatus.ERROR,
896
+ error="Match parameter is required",
897
+ params=params,
642
898
  )
643
- output.append(
644
- f"{metric} | {info['description']} | {info['type']} | {labels_str}"
899
+
900
+ url = urljoin(self.toolset.config.prometheus_url, "api/v1/series")
901
+ query_params = {
902
+ "match[]": match,
903
+ "limit": str(PROMETHEUS_METADATA_API_LIMIT),
904
+ }
905
+
906
+ # Add time parameters - use provided values or defaults
907
+ if params.get("end"):
908
+ query_params["end"] = params["end"]
909
+ else:
910
+ query_params["end"] = str(int(time.time()))
911
+
912
+ if params.get("start"):
913
+ query_params["start"] = params["start"]
914
+ elif self.toolset.config.default_metadata_time_window_hrs:
915
+ # Use default time window
916
+ query_params["start"] = str(
917
+ int(time.time())
918
+ - (self.toolset.config.default_metadata_time_window_hrs * 3600)
645
919
  )
646
920
 
647
- table_output = "\n".join(output)
921
+ response = do_request(
922
+ config=self.toolset.config,
923
+ url=url,
924
+ params=query_params,
925
+ timeout=self.toolset.config.default_metadata_timeout_seconds,
926
+ verify=self.toolset.config.prometheus_ssl_enabled,
927
+ headers=self.toolset.config.headers,
928
+ method="GET",
929
+ )
930
+ response.raise_for_status()
931
+ data = response.json()
932
+
933
+ # Check if results were truncated
934
+ if (
935
+ "data" in data
936
+ and isinstance(data["data"], list)
937
+ and len(data["data"]) == PROMETHEUS_METADATA_API_LIMIT
938
+ ):
939
+ data["_truncated"] = True
940
+ data["_message"] = (
941
+ f"Results truncated at limit={PROMETHEUS_METADATA_API_LIMIT}. Use a more specific match selector to see additional series."
942
+ )
943
+
944
+ return StructuredToolResult(
945
+ status=StructuredToolResultStatus.SUCCESS,
946
+ data=data,
947
+ params=params,
948
+ )
949
+ except Exception as e:
648
950
  return StructuredToolResult(
649
- status=ToolResultStatus.SUCCESS,
650
- data=table_output,
951
+ status=StructuredToolResultStatus.ERROR,
952
+ error=str(e),
651
953
  params=params,
652
954
  )
653
955
 
654
- except requests.Timeout:
655
- logging.warn("Timeout while fetching prometheus metrics", exc_info=True)
956
+ def get_parameterized_one_liner(self, params) -> str:
957
+ return f"{toolset_name_for_one_liner(self.toolset.name)}: Get Series"
958
+
959
+
960
+ class GetMetricMetadata(BasePrometheusTool):
961
+ """Get metadata (type, description, unit) for metrics"""
962
+
963
+ def __init__(self, toolset: "PrometheusToolset"):
964
+ super().__init__(
965
+ name="get_metric_metadata",
966
+ description=(
967
+ "Get metric metadata using /api/v1/metadata. "
968
+ "Returns type, help text, and unit for metrics. "
969
+ "Use after discovering metric names to get their descriptions. "
970
+ f"Returns up to {PROMETHEUS_METADATA_API_LIMIT} metrics (limit={PROMETHEUS_METADATA_API_LIMIT}). If {PROMETHEUS_METADATA_API_LIMIT} results returned, more may exist - filter by specific metric name. "
971
+ "Supports optional metric name filter."
972
+ ),
973
+ parameters={
974
+ "metric": ToolParameter(
975
+ description=(
976
+ "Optional metric name to filter (e.g., 'up', 'node_cpu_seconds_total'). "
977
+ "If not provided, returns metadata for all metrics."
978
+ ),
979
+ type="string",
980
+ required=False,
981
+ ),
982
+ },
983
+ toolset=toolset,
984
+ )
985
+
986
+ def _invoke(
987
+ self, params: dict, user_approved: bool = False
988
+ ) -> StructuredToolResult:
989
+ if not self.toolset.config or not self.toolset.config.prometheus_url:
656
990
  return StructuredToolResult(
657
- status=ToolResultStatus.ERROR,
658
- error="Request timed out while fetching metrics",
991
+ status=StructuredToolResultStatus.ERROR,
992
+ error="Prometheus is not configured. Prometheus URL is missing",
659
993
  params=params,
660
994
  )
661
- except RequestException as e:
662
- logging.warn("Failed to fetch prometheus metrics", exc_info=True)
995
+ try:
996
+ url = urljoin(self.toolset.config.prometheus_url, "api/v1/metadata")
997
+ query_params = {"limit": str(PROMETHEUS_METADATA_API_LIMIT)}
998
+
999
+ if params.get("metric"):
1000
+ query_params["metric"] = params["metric"]
1001
+
1002
+ response = do_request(
1003
+ config=self.toolset.config,
1004
+ url=url,
1005
+ params=query_params,
1006
+ timeout=self.toolset.config.default_metadata_timeout_seconds,
1007
+ verify=self.toolset.config.prometheus_ssl_enabled,
1008
+ headers=self.toolset.config.headers,
1009
+ method="GET",
1010
+ )
1011
+ response.raise_for_status()
1012
+ data = response.json()
1013
+
1014
+ # Check if results were truncated (metadata endpoint returns a dict, not a list)
1015
+ if (
1016
+ "data" in data
1017
+ and isinstance(data["data"], dict)
1018
+ and len(data["data"]) == PROMETHEUS_METADATA_API_LIMIT
1019
+ ):
1020
+ data["_truncated"] = True
1021
+ data["_message"] = (
1022
+ f"Results truncated at limit={PROMETHEUS_METADATA_API_LIMIT}. Use metric parameter to filter by specific metric name."
1023
+ )
1024
+
663
1025
  return StructuredToolResult(
664
- status=ToolResultStatus.ERROR,
665
- error=f"Network error while fetching metrics: {str(e)}",
1026
+ status=StructuredToolResultStatus.SUCCESS,
1027
+ data=data,
666
1028
  params=params,
667
1029
  )
668
1030
  except Exception as e:
669
- logging.warn("Failed to process prometheus metrics", exc_info=True)
670
1031
  return StructuredToolResult(
671
- status=ToolResultStatus.ERROR,
672
- error=f"Unexpected error: {str(e)}",
1032
+ status=StructuredToolResultStatus.ERROR,
1033
+ error=str(e),
673
1034
  params=params,
674
1035
  )
675
1036
 
676
1037
  def get_parameterized_one_liner(self, params) -> str:
677
- name_filter = params.get("name_filter", "")
678
- return f"{toolset_name_for_one_liner(self.toolset.name)}: Search Metrics ({name_filter})"
1038
+ metric = params.get("metric", "all")
1039
+ return (
1040
+ f"{toolset_name_for_one_liner(self.toolset.name)}: Get Metadata ({metric})"
1041
+ )
679
1042
 
680
1043
 
681
1044
  class ExecuteInstantQuery(BasePrometheusTool):
682
1045
  def __init__(self, toolset: "PrometheusToolset"):
683
1046
  super().__init__(
684
1047
  name="execute_prometheus_instant_query",
685
- description="Execute an instant PromQL query",
1048
+ description=(
1049
+ f"Execute an instant PromQL query (single point in time). "
1050
+ f"Default timeout is {DEFAULT_QUERY_TIMEOUT_SECONDS} seconds "
1051
+ f"but can be increased up to {MAX_QUERY_TIMEOUT_SECONDS} seconds for complex/slow queries."
1052
+ ),
686
1053
  parameters={
687
1054
  "query": ToolParameter(
688
1055
  description="The PromQL query",
@@ -694,6 +1061,15 @@ class ExecuteInstantQuery(BasePrometheusTool):
694
1061
  type="string",
695
1062
  required=True,
696
1063
  ),
1064
+ "timeout": ToolParameter(
1065
+ description=(
1066
+ f"Query timeout in seconds. Default: {DEFAULT_QUERY_TIMEOUT_SECONDS}. "
1067
+ f"Maximum: {MAX_QUERY_TIMEOUT_SECONDS}. "
1068
+ f"Increase for complex queries that may take longer."
1069
+ ),
1070
+ type="number",
1071
+ required=False,
1072
+ ),
697
1073
  },
698
1074
  toolset=toolset,
699
1075
  )
@@ -703,7 +1079,7 @@ class ExecuteInstantQuery(BasePrometheusTool):
703
1079
  ) -> StructuredToolResult:
704
1080
  if not self.toolset.config or not self.toolset.config.prometheus_url:
705
1081
  return StructuredToolResult(
706
- status=ToolResultStatus.ERROR,
1082
+ status=StructuredToolResultStatus.ERROR,
707
1083
  error="Prometheus is not configured. Prometheus URL is missing",
708
1084
  params=params,
709
1085
  )
@@ -715,12 +1091,24 @@ class ExecuteInstantQuery(BasePrometheusTool):
715
1091
 
716
1092
  payload = {"query": query}
717
1093
 
1094
+ # Get timeout parameter and enforce limits
1095
+ default_timeout = self.toolset.config.default_query_timeout_seconds
1096
+ max_timeout = self.toolset.config.max_query_timeout_seconds
1097
+ timeout = params.get("timeout", default_timeout)
1098
+ if timeout > max_timeout:
1099
+ timeout = max_timeout
1100
+ logging.warning(
1101
+ f"Timeout requested ({params.get('timeout')}) exceeds maximum ({max_timeout}s), using {max_timeout}s"
1102
+ )
1103
+ elif timeout < 1:
1104
+ timeout = default_timeout # Min 1 second, but use default if invalid
1105
+
718
1106
  response = do_request(
719
1107
  config=self.toolset.config,
720
1108
  url=url,
721
1109
  headers=self.toolset.config.headers,
722
1110
  data=payload,
723
- timeout=60,
1111
+ timeout=timeout,
724
1112
  verify=self.toolset.config.prometheus_ssl_enabled,
725
1113
  method="POST",
726
1114
  )
@@ -743,12 +1131,44 @@ class ExecuteInstantQuery(BasePrometheusTool):
743
1131
  "query": query,
744
1132
  }
745
1133
 
1134
+ # Check if data should be included based on size
746
1135
  if self.toolset.config.tool_calls_return_data:
747
- response_data["data"] = data.get("data")
1136
+ result_data = data.get("data", {})
1137
+
1138
+ # Estimate the size of the data
1139
+ data_str_preview = json.dumps(result_data)
1140
+ data_size_chars = len(data_str_preview)
1141
+
1142
+ # Provide summary if data is too large
1143
+ if (
1144
+ self.toolset.config.query_response_size_limit
1145
+ and data_size_chars
1146
+ > self.toolset.config.query_response_size_limit
1147
+ ):
1148
+ response_data["data_summary"] = (
1149
+ create_data_summary_for_large_result(
1150
+ result_data,
1151
+ query,
1152
+ data_size_chars,
1153
+ is_range_query=False,
1154
+ )
1155
+ )
1156
+ logging.info(
1157
+ f"Prometheus instant query returned large dataset: "
1158
+ f"{response_data['data_summary'].get('result_count', 0)} results, "
1159
+ f"{data_size_chars:,} characters (limit: {self.toolset.config.query_response_size_limit:,}). "
1160
+ f"Returning summary instead of full data."
1161
+ )
1162
+ # Also add character info to the summary for debugging
1163
+ response_data["data_summary"]["_debug_info"] = (
1164
+ f"Data size: {data_size_chars:,} chars exceeded limit of {self.toolset.config.query_response_size_limit:,} chars"
1165
+ )
1166
+ else:
1167
+ response_data["data"] = result_data
748
1168
 
749
1169
  data_str = json.dumps(response_data, indent=2)
750
1170
  return StructuredToolResult(
751
- status=ToolResultStatus.SUCCESS,
1171
+ status=StructuredToolResultStatus.SUCCESS,
752
1172
  data=data_str,
753
1173
  params=params,
754
1174
  )
@@ -764,14 +1184,14 @@ class ExecuteInstantQuery(BasePrometheusTool):
764
1184
  except json.JSONDecodeError:
765
1185
  pass
766
1186
  return StructuredToolResult(
767
- status=ToolResultStatus.ERROR,
1187
+ status=StructuredToolResultStatus.ERROR,
768
1188
  error=f"Query execution failed. HTTP {response.status_code}: {error_msg}",
769
1189
  params=params,
770
1190
  )
771
1191
 
772
1192
  # For other status codes, just return the status code and content
773
1193
  return StructuredToolResult(
774
- status=ToolResultStatus.ERROR,
1194
+ status=StructuredToolResultStatus.ERROR,
775
1195
  error=f"Query execution failed with unexpected status code: {response.status_code}. Response: {str(response.content)}",
776
1196
  params=params,
777
1197
  )
@@ -779,14 +1199,14 @@ class ExecuteInstantQuery(BasePrometheusTool):
779
1199
  except RequestException as e:
780
1200
  logging.info("Failed to connect to Prometheus", exc_info=True)
781
1201
  return StructuredToolResult(
782
- status=ToolResultStatus.ERROR,
1202
+ status=StructuredToolResultStatus.ERROR,
783
1203
  error=f"Connection error to Prometheus: {str(e)}",
784
1204
  params=params,
785
1205
  )
786
1206
  except Exception as e:
787
1207
  logging.info("Failed to connect to Prometheus", exc_info=True)
788
1208
  return StructuredToolResult(
789
- status=ToolResultStatus.ERROR,
1209
+ status=StructuredToolResultStatus.ERROR,
790
1210
  error=f"Unexpected error executing query: {str(e)}",
791
1211
  params=params,
792
1212
  )
@@ -800,7 +1220,12 @@ class ExecuteRangeQuery(BasePrometheusTool):
800
1220
  def __init__(self, toolset: "PrometheusToolset"):
801
1221
  super().__init__(
802
1222
  name="execute_prometheus_range_query",
803
- description="Generates a graph and Execute a PromQL range query",
1223
+ description=(
1224
+ f"Generates a graph and Execute a PromQL range query. "
1225
+ f"Default timeout is {DEFAULT_QUERY_TIMEOUT_SECONDS} seconds "
1226
+ f"but can be increased up to {MAX_QUERY_TIMEOUT_SECONDS} seconds for complex/slow queries. "
1227
+ f"Default time range is last 1 hour."
1228
+ ),
804
1229
  parameters={
805
1230
  "query": ToolParameter(
806
1231
  description="The PromQL query",
@@ -827,13 +1252,32 @@ class ExecuteRangeQuery(BasePrometheusTool):
827
1252
  "step": ToolParameter(
828
1253
  description="Query resolution step width in duration format or float number of seconds",
829
1254
  type="number",
830
- required=True,
1255
+ required=False,
831
1256
  ),
832
1257
  "output_type": ToolParameter(
833
1258
  description="Specifies how to interpret the Prometheus result. Use 'Plain' for raw values, 'Bytes' to format byte values, 'Percentage' to scale 0–1 values into 0–100%, or 'CPUUsage' to convert values to cores (e.g., 500 becomes 500m, 2000 becomes 2).",
834
1259
  type="string",
835
1260
  required=True,
836
1261
  ),
1262
+ "timeout": ToolParameter(
1263
+ description=(
1264
+ f"Query timeout in seconds. Default: {DEFAULT_QUERY_TIMEOUT_SECONDS}. "
1265
+ f"Maximum: {MAX_QUERY_TIMEOUT_SECONDS}. "
1266
+ f"Increase for complex queries that may take longer."
1267
+ ),
1268
+ type="number",
1269
+ required=False,
1270
+ ),
1271
+ "max_points": ToolParameter(
1272
+ description=(
1273
+ f"Maximum number of data points to return. Default: {int(MAX_GRAPH_POINTS)}. "
1274
+ f"Can be reduced to get fewer data points (e.g., 50 for simpler graphs). "
1275
+ f"Cannot exceed system limit of {int(MAX_GRAPH_POINTS)}. "
1276
+ f"If your query would return more points than this limit, the step will be automatically adjusted."
1277
+ ),
1278
+ type="number",
1279
+ required=False,
1280
+ ),
837
1281
  },
838
1282
  toolset=toolset,
839
1283
  )
@@ -843,7 +1287,7 @@ class ExecuteRangeQuery(BasePrometheusTool):
843
1287
  ) -> StructuredToolResult:
844
1288
  if not self.toolset.config or not self.toolset.config.prometheus_url:
845
1289
  return StructuredToolResult(
846
- status=ToolResultStatus.ERROR,
1290
+ status=StructuredToolResultStatus.ERROR,
847
1291
  error="Prometheus is not configured. Prometheus URL is missing",
848
1292
  params=params,
849
1293
  )
@@ -857,12 +1301,17 @@ class ExecuteRangeQuery(BasePrometheusTool):
857
1301
  end_timestamp=params.get("end"),
858
1302
  default_time_span_seconds=DEFAULT_GRAPH_TIME_SPAN_SECONDS,
859
1303
  )
860
- step = params.get("step", "")
1304
+ step = parse_duration_to_seconds(params.get("step"))
1305
+ max_points = params.get(
1306
+ "max_points"
1307
+ ) # Get the optional max_points parameter
861
1308
 
1309
+ # adjust_step_for_max_points handles None case and converts to float
862
1310
  step = adjust_step_for_max_points(
863
1311
  start_timestamp=start,
864
1312
  end_timestamp=end,
865
- step=float(step) if step else MAX_GRAPH_POINTS,
1313
+ step=step,
1314
+ max_points_override=max_points,
866
1315
  )
867
1316
 
868
1317
  description = params.get("description", "")
@@ -874,12 +1323,24 @@ class ExecuteRangeQuery(BasePrometheusTool):
874
1323
  "step": step,
875
1324
  }
876
1325
 
1326
+ # Get timeout parameter and enforce limits
1327
+ default_timeout = self.toolset.config.default_query_timeout_seconds
1328
+ max_timeout = self.toolset.config.max_query_timeout_seconds
1329
+ timeout = params.get("timeout", default_timeout)
1330
+ if timeout > max_timeout:
1331
+ timeout = max_timeout
1332
+ logging.warning(
1333
+ f"Timeout requested ({params.get('timeout')}) exceeds maximum ({max_timeout}s), using {max_timeout}s"
1334
+ )
1335
+ elif timeout < 1:
1336
+ timeout = default_timeout # Min 1 second, but use default if invalid
1337
+
877
1338
  response = do_request(
878
1339
  config=self.toolset.config,
879
1340
  url=url,
880
1341
  headers=self.toolset.config.headers,
881
1342
  data=payload,
882
- timeout=120,
1343
+ timeout=timeout,
883
1344
  verify=self.toolset.config.prometheus_ssl_enabled,
884
1345
  method="POST",
885
1346
  )
@@ -906,12 +1367,42 @@ class ExecuteRangeQuery(BasePrometheusTool):
906
1367
  "output_type": output_type,
907
1368
  }
908
1369
 
1370
+ # Check if data should be included based on size
909
1371
  if self.toolset.config.tool_calls_return_data:
910
- response_data["data"] = data.get("data")
1372
+ result_data = data.get("data", {})
1373
+
1374
+ # Estimate the size of the data
1375
+ data_str_preview = json.dumps(result_data)
1376
+ data_size_chars = len(data_str_preview)
1377
+
1378
+ # Provide summary if data is too large
1379
+ if (
1380
+ self.toolset.config.query_response_size_limit
1381
+ and data_size_chars
1382
+ > self.toolset.config.query_response_size_limit
1383
+ ):
1384
+ response_data["data_summary"] = (
1385
+ create_data_summary_for_large_result(
1386
+ result_data, query, data_size_chars, is_range_query=True
1387
+ )
1388
+ )
1389
+ logging.info(
1390
+ f"Prometheus range query returned large dataset: "
1391
+ f"{response_data['data_summary'].get('series_count', 0)} series, "
1392
+ f"{data_size_chars:,} characters (limit: {self.toolset.config.query_response_size_limit:,}). "
1393
+ f"Returning summary instead of full data."
1394
+ )
1395
+ # Also add character info to the summary for debugging
1396
+ response_data["data_summary"]["_debug_info"] = (
1397
+ f"Data size: {data_size_chars:,} chars exceeded limit of {self.toolset.config.query_response_size_limit:,} chars"
1398
+ )
1399
+ else:
1400
+ response_data["data"] = result_data
1401
+
911
1402
  data_str = json.dumps(response_data, indent=2)
912
1403
 
913
1404
  return StructuredToolResult(
914
- status=ToolResultStatus.SUCCESS,
1405
+ status=StructuredToolResultStatus.SUCCESS,
915
1406
  data=data_str,
916
1407
  params=params,
917
1408
  )
@@ -926,13 +1417,13 @@ class ExecuteRangeQuery(BasePrometheusTool):
926
1417
  except json.JSONDecodeError:
927
1418
  pass
928
1419
  return StructuredToolResult(
929
- status=ToolResultStatus.ERROR,
1420
+ status=StructuredToolResultStatus.ERROR,
930
1421
  error=f"Query execution failed. HTTP {response.status_code}: {error_msg}",
931
1422
  params=params,
932
1423
  )
933
1424
 
934
1425
  return StructuredToolResult(
935
- status=ToolResultStatus.ERROR,
1426
+ status=StructuredToolResultStatus.ERROR,
936
1427
  error=f"Query execution failed with unexpected status code: {response.status_code}. Response: {str(response.content)}",
937
1428
  params=params,
938
1429
  )
@@ -940,14 +1431,14 @@ class ExecuteRangeQuery(BasePrometheusTool):
940
1431
  except RequestException as e:
941
1432
  logging.info("Failed to connect to Prometheus", exc_info=True)
942
1433
  return StructuredToolResult(
943
- status=ToolResultStatus.ERROR,
1434
+ status=StructuredToolResultStatus.ERROR,
944
1435
  error=f"Connection error to Prometheus: {str(e)}",
945
1436
  params=params,
946
1437
  )
947
1438
  except Exception as e:
948
1439
  logging.info("Failed to connect to Prometheus", exc_info=True)
949
1440
  return StructuredToolResult(
950
- status=ToolResultStatus.ERROR,
1441
+ status=StructuredToolResultStatus.ERROR,
951
1442
  error=f"Unexpected error executing query: {str(e)}",
952
1443
  params=params,
953
1444
  )
@@ -969,7 +1460,11 @@ class PrometheusToolset(Toolset):
969
1460
  prerequisites=[CallablePrerequisite(callable=self.prerequisites_callable)],
970
1461
  tools=[
971
1462
  ListPrometheusRules(toolset=self),
972
- ListAvailableMetrics(toolset=self),
1463
+ GetMetricNames(toolset=self),
1464
+ GetLabelValues(toolset=self),
1465
+ GetAllLabels(toolset=self),
1466
+ GetSeries(toolset=self),
1467
+ GetMetricMetadata(toolset=self),
973
1468
  ExecuteInstantQuery(toolset=self),
974
1469
  ExecuteRangeQuery(toolset=self),
975
1470
  ],
@@ -1060,13 +1555,8 @@ class PrometheusToolset(Toolset):
1060
1555
  f"Failed to connect to Prometheus at {url}: HTTP {response.status_code}",
1061
1556
  )
1062
1557
 
1063
- except RequestException:
1064
- return (
1065
- False,
1066
- f"Failed to initialize using url={url}",
1067
- )
1068
1558
  except Exception as e:
1069
- logging.exception("Failed to initialize Prometheus")
1559
+ logging.exception("Failed to initialize Prometheus", exc_info=True)
1070
1560
  return (
1071
1561
  False,
1072
1562
  f"Failed to initialize using url={url}. Unexpected error: {str(e)}",