holmesgpt 0.14.0a0__py3-none-any.whl → 0.14.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of holmesgpt might be problematic. Click here for more details.

Files changed (82) hide show
  1. holmes/__init__.py +1 -1
  2. holmes/clients/robusta_client.py +15 -4
  3. holmes/common/env_vars.py +8 -1
  4. holmes/config.py +66 -139
  5. holmes/core/investigation.py +1 -2
  6. holmes/core/llm.py +295 -52
  7. holmes/core/models.py +2 -0
  8. holmes/core/safeguards.py +4 -4
  9. holmes/core/supabase_dal.py +14 -8
  10. holmes/core/tool_calling_llm.py +110 -102
  11. holmes/core/tools.py +260 -25
  12. holmes/core/tools_utils/data_types.py +81 -0
  13. holmes/core/tools_utils/tool_context_window_limiter.py +33 -0
  14. holmes/core/tools_utils/tool_executor.py +2 -2
  15. holmes/core/toolset_manager.py +150 -3
  16. holmes/core/transformers/__init__.py +23 -0
  17. holmes/core/transformers/base.py +62 -0
  18. holmes/core/transformers/llm_summarize.py +174 -0
  19. holmes/core/transformers/registry.py +122 -0
  20. holmes/core/transformers/transformer.py +31 -0
  21. holmes/main.py +5 -0
  22. holmes/plugins/prompts/_fetch_logs.jinja2 +10 -1
  23. holmes/plugins/toolsets/aks-node-health.yaml +46 -0
  24. holmes/plugins/toolsets/aks.yaml +64 -0
  25. holmes/plugins/toolsets/atlas_mongodb/mongodb_atlas.py +17 -15
  26. holmes/plugins/toolsets/azure_sql/tools/analyze_connection_failures.py +8 -4
  27. holmes/plugins/toolsets/azure_sql/tools/analyze_database_connections.py +7 -3
  28. holmes/plugins/toolsets/azure_sql/tools/analyze_database_health_status.py +3 -3
  29. holmes/plugins/toolsets/azure_sql/tools/analyze_database_performance.py +3 -3
  30. holmes/plugins/toolsets/azure_sql/tools/analyze_database_storage.py +7 -3
  31. holmes/plugins/toolsets/azure_sql/tools/get_active_alerts.py +4 -4
  32. holmes/plugins/toolsets/azure_sql/tools/get_slow_queries.py +7 -3
  33. holmes/plugins/toolsets/azure_sql/tools/get_top_cpu_queries.py +7 -3
  34. holmes/plugins/toolsets/azure_sql/tools/get_top_data_io_queries.py +7 -3
  35. holmes/plugins/toolsets/azure_sql/tools/get_top_log_io_queries.py +7 -3
  36. holmes/plugins/toolsets/bash/bash_toolset.py +6 -6
  37. holmes/plugins/toolsets/bash/common/bash.py +7 -7
  38. holmes/plugins/toolsets/coralogix/toolset_coralogix_logs.py +5 -3
  39. holmes/plugins/toolsets/datadog/datadog_api.py +490 -24
  40. holmes/plugins/toolsets/datadog/datadog_logs_instructions.jinja2 +21 -10
  41. holmes/plugins/toolsets/datadog/toolset_datadog_general.py +344 -205
  42. holmes/plugins/toolsets/datadog/toolset_datadog_logs.py +189 -17
  43. holmes/plugins/toolsets/datadog/toolset_datadog_metrics.py +95 -30
  44. holmes/plugins/toolsets/datadog/toolset_datadog_rds.py +10 -10
  45. holmes/plugins/toolsets/datadog/toolset_datadog_traces.py +20 -20
  46. holmes/plugins/toolsets/git.py +21 -21
  47. holmes/plugins/toolsets/grafana/common.py +2 -2
  48. holmes/plugins/toolsets/grafana/toolset_grafana.py +4 -4
  49. holmes/plugins/toolsets/grafana/toolset_grafana_loki.py +5 -4
  50. holmes/plugins/toolsets/grafana/toolset_grafana_tempo.jinja2 +123 -23
  51. holmes/plugins/toolsets/grafana/toolset_grafana_tempo.py +165 -307
  52. holmes/plugins/toolsets/internet/internet.py +3 -3
  53. holmes/plugins/toolsets/internet/notion.py +3 -3
  54. holmes/plugins/toolsets/investigator/core_investigation.py +3 -3
  55. holmes/plugins/toolsets/kafka.py +18 -18
  56. holmes/plugins/toolsets/kubernetes.yaml +58 -0
  57. holmes/plugins/toolsets/kubernetes_logs.py +6 -6
  58. holmes/plugins/toolsets/kubernetes_logs.yaml +32 -0
  59. holmes/plugins/toolsets/logging_utils/logging_api.py +1 -1
  60. holmes/plugins/toolsets/mcp/toolset_mcp.py +4 -4
  61. holmes/plugins/toolsets/newrelic.py +5 -5
  62. holmes/plugins/toolsets/opensearch/opensearch.py +5 -5
  63. holmes/plugins/toolsets/opensearch/opensearch_logs.py +7 -7
  64. holmes/plugins/toolsets/opensearch/opensearch_traces.py +10 -10
  65. holmes/plugins/toolsets/prometheus/prometheus.py +841 -351
  66. holmes/plugins/toolsets/prometheus/prometheus_instructions.jinja2 +39 -2
  67. holmes/plugins/toolsets/prometheus/utils.py +28 -0
  68. holmes/plugins/toolsets/rabbitmq/toolset_rabbitmq.py +6 -4
  69. holmes/plugins/toolsets/robusta/robusta.py +10 -10
  70. holmes/plugins/toolsets/runbook/runbook_fetcher.py +4 -4
  71. holmes/plugins/toolsets/servicenow/servicenow.py +6 -6
  72. holmes/plugins/toolsets/utils.py +88 -0
  73. holmes/utils/config_utils.py +91 -0
  74. holmes/utils/env.py +7 -0
  75. holmes/utils/holmes_status.py +2 -1
  76. holmes/utils/sentry_helper.py +41 -0
  77. holmes/utils/stream.py +9 -0
  78. {holmesgpt-0.14.0a0.dist-info → holmesgpt-0.14.1.dist-info}/METADATA +10 -14
  79. {holmesgpt-0.14.0a0.dist-info → holmesgpt-0.14.1.dist-info}/RECORD +82 -72
  80. {holmesgpt-0.14.0a0.dist-info → holmesgpt-0.14.1.dist-info}/LICENSE.txt +0 -0
  81. {holmesgpt-0.14.0a0.dist-info → holmesgpt-0.14.1.dist-info}/WHEEL +0 -0
  82. {holmesgpt-0.14.0a0.dist-info → holmesgpt-0.14.1.dist-info}/entry_points.txt +0 -0
@@ -3,12 +3,13 @@ from enum import Enum
3
3
  import json
4
4
  import logging
5
5
  from typing import Any, Optional, Dict, Tuple, Set
6
+ from urllib.parse import urlencode
6
7
  from holmes.core.tools import (
7
8
  CallablePrerequisite,
8
9
  ToolsetTag,
9
10
  )
10
11
  from pydantic import BaseModel, Field
11
- from holmes.core.tools import StructuredToolResult, ToolResultStatus
12
+ from holmes.core.tools import StructuredToolResult, StructuredToolResultStatus
12
13
  from holmes.plugins.toolsets.consts import TOOLSET_CONFIG_MISSING_ERROR
13
14
  from holmes.plugins.toolsets.datadog.datadog_api import (
14
15
  DatadogBaseConfig,
@@ -16,6 +17,8 @@ from holmes.plugins.toolsets.datadog.datadog_api import (
16
17
  execute_paginated_datadog_http_request,
17
18
  get_headers,
18
19
  MAX_RETRY_COUNT_ON_RATE_LIMIT,
20
+ enhance_error_message,
21
+ preprocess_time_fields,
19
22
  )
20
23
  from holmes.plugins.toolsets.logging_utils.logging_api import (
21
24
  DEFAULT_TIME_SPAN_SECONDS,
@@ -99,23 +102,28 @@ def fetch_paginated_logs(
99
102
  "page": {"limit": calculate_page_size(params, dd_config, [])},
100
103
  }
101
104
 
105
+ # Preprocess time fields to ensure correct format
106
+ processed_payload = preprocess_time_fields(payload, "/api/v2/logs/events/search")
107
+
102
108
  logs, cursor = execute_paginated_datadog_http_request(
103
109
  url=url,
104
110
  headers=headers,
105
- payload_or_params=payload,
111
+ payload_or_params=processed_payload,
106
112
  timeout=dd_config.request_timeout,
107
113
  )
108
114
 
109
115
  while cursor and len(logs) < limit:
110
- payload["page"]["cursor"] = cursor
116
+ processed_payload["page"]["cursor"] = cursor
117
+ processed_payload["page"]["limit"] = calculate_page_size(
118
+ params, dd_config, logs
119
+ )
111
120
  new_logs, cursor = execute_paginated_datadog_http_request(
112
121
  url=url,
113
122
  headers=headers,
114
- payload_or_params=payload,
123
+ payload_or_params=processed_payload,
115
124
  timeout=dd_config.request_timeout,
116
125
  )
117
126
  logs += new_logs
118
- payload["page"]["limit"] = calculate_page_size(params, dd_config, logs)
119
127
 
120
128
  # logs are fetched descending order. Unified logging API follows the pattern of kubectl logs where oldest logs are first
121
129
  logs.reverse()
@@ -129,14 +137,73 @@ def format_logs(raw_logs: list[dict]) -> str:
129
137
  logs = []
130
138
 
131
139
  for raw_log_item in raw_logs:
140
+ # Extract timestamp - Datadog returns it in ISO format
141
+ timestamp = raw_log_item.get("attributes", {}).get("timestamp", "")
142
+ if not timestamp:
143
+ # Fallback to @timestamp if timestamp is not in attributes
144
+ timestamp = raw_log_item.get("attributes", {}).get("@timestamp", "")
145
+
146
+ # Extract message
132
147
  message = raw_log_item.get("attributes", {}).get(
133
148
  "message", json.dumps(raw_log_item)
134
149
  )
135
- logs.append(message)
150
+
151
+ # Format as: [timestamp] message
152
+ if timestamp:
153
+ logs.append(f"[{timestamp}] {message}")
154
+ else:
155
+ logs.append(message)
136
156
 
137
157
  return "\n".join(logs)
138
158
 
139
159
 
160
+ def generate_datadog_logs_url(
161
+ dd_config: DatadogLogsConfig,
162
+ params: FetchPodLogsParams,
163
+ storage_tier: DataDogStorageTier,
164
+ ) -> str:
165
+ """Generate a Datadog web UI URL for the logs query."""
166
+ from holmes.plugins.toolsets.utils import process_timestamps_to_int
167
+ from holmes.plugins.toolsets.datadog.datadog_api import convert_api_url_to_app_url
168
+
169
+ # Convert API URL to app URL using the shared helper
170
+ base_url = convert_api_url_to_app_url(dd_config.site_api_url)
171
+
172
+ # Build the query string
173
+ query = f"{dd_config.labels.namespace}:{params.namespace}"
174
+ query += f" {dd_config.labels.pod}:{params.pod_name}"
175
+ if params.filter:
176
+ filter = params.filter.replace('"', '\\"')
177
+ query += f' "{filter}"'
178
+
179
+ # Process timestamps - get Unix timestamps in seconds
180
+ (from_time_seconds, to_time_seconds) = process_timestamps_to_int(
181
+ start=params.start_time,
182
+ end=params.end_time,
183
+ default_time_span_seconds=DEFAULT_TIME_SPAN_SECONDS,
184
+ )
185
+
186
+ # Convert to milliseconds for Datadog web UI
187
+ from_time_ms = from_time_seconds * 1000
188
+ to_time_ms = to_time_seconds * 1000
189
+
190
+ # Build URL parameters matching Datadog's web UI format
191
+ url_params = {
192
+ "query": query,
193
+ "from_ts": str(from_time_ms),
194
+ "to_ts": str(to_time_ms),
195
+ "live": "true",
196
+ "storage": storage_tier.value,
197
+ }
198
+
199
+ # Add indexes if not default
200
+ if dd_config.indexes != ["*"]:
201
+ url_params["index"] = ",".join(dd_config.indexes)
202
+
203
+ # Construct the full URL
204
+ return f"{base_url}/logs?{urlencode(url_params)}"
205
+
206
+
140
207
  class DatadogLogsToolset(BasePodLoggingToolset):
141
208
  dd_config: Optional[DatadogLogsConfig] = None
142
209
 
@@ -167,7 +234,7 @@ class DatadogLogsToolset(BasePodLoggingToolset):
167
234
  def fetch_pod_logs(self, params: FetchPodLogsParams) -> StructuredToolResult:
168
235
  if not self.dd_config:
169
236
  return StructuredToolResult(
170
- status=ToolResultStatus.ERROR,
237
+ status=StructuredToolResultStatus.ERROR,
171
238
  data=TOOLSET_CONFIG_MISSING_ERROR,
172
239
  params=params.model_dump(),
173
240
  )
@@ -181,29 +248,134 @@ class DatadogLogsToolset(BasePodLoggingToolset):
181
248
 
182
249
  if raw_logs:
183
250
  logs_str = format_logs(raw_logs)
251
+ # Generate Datadog web UI URL
252
+ datadog_url = generate_datadog_logs_url(
253
+ self.dd_config, params, storage_tier
254
+ )
255
+ logs_with_link = f"{logs_str}\n\nView in Datadog: {datadog_url}"
184
256
  return StructuredToolResult(
185
- status=ToolResultStatus.SUCCESS,
186
- data=logs_str,
257
+ status=StructuredToolResultStatus.SUCCESS,
258
+ data=logs_with_link,
259
+ url=datadog_url,
187
260
  params=params.model_dump(),
188
261
  )
189
262
 
263
+ # Include detailed diagnostic context
264
+ query = f"{self.dd_config.labels.namespace}:{params.namespace} {self.dd_config.labels.pod}:{params.pod_name}"
265
+ if params.filter:
266
+ query += f' "{params.filter}"'
267
+
268
+ # Get actual time range used
269
+ (from_time, to_time) = process_timestamps_to_rfc3339(
270
+ start_timestamp=params.start_time,
271
+ end_timestamp=params.end_time,
272
+ default_time_span_seconds=DEFAULT_TIME_SPAN_SECONDS,
273
+ )
274
+
275
+ # Generate Datadog web UI URL for the last storage tier checked
276
+ datadog_url = generate_datadog_logs_url(
277
+ self.dd_config, params, self.dd_config.storage_tiers[-1]
278
+ )
279
+
280
+ # Build diagnostic information
281
+ diagnostics: Dict[str, Any] = {
282
+ "query_executed": query,
283
+ "time_range": f"{from_time} to {to_time}",
284
+ "indexes_searched": self.dd_config.indexes,
285
+ "storage_tiers_checked": [
286
+ tier.value for tier in self.dd_config.storage_tiers
287
+ ],
288
+ "field_mappings": {
289
+ "namespace_field": self.dd_config.labels.namespace,
290
+ "pod_field": self.dd_config.labels.pod,
291
+ },
292
+ "limit": params.limit or self.dd_config.default_limit,
293
+ "datadog_url": datadog_url,
294
+ }
295
+
296
+ # Format diagnostic info as structured text
297
+ error_msg = (
298
+ f"No logs found.\n\n"
299
+ f"Diagnostic Information:\n"
300
+ f"----------------------\n"
301
+ f"Query executed: {diagnostics['query_executed']}\n"
302
+ f"Time range: {diagnostics['time_range']}\n"
303
+ f"Indexes searched: {diagnostics['indexes_searched']}\n"
304
+ f"Storage tiers checked: {', '.join(str(tier) for tier in diagnostics.get('storage_tiers_checked', []))}\n"
305
+ f"Field mappings:\n"
306
+ f" - Namespace field: {diagnostics.get('field_mappings', {}).get('namespace_field', 'N/A')}\n"
307
+ f" - Pod field: {diagnostics.get('field_mappings', {}).get('pod_field', 'N/A')}\n"
308
+ f"Limit: {diagnostics['limit']}\n\n"
309
+ f"View in Datadog: {diagnostics['datadog_url']}"
310
+ )
311
+
190
312
  return StructuredToolResult(
191
- status=ToolResultStatus.NO_DATA,
313
+ status=StructuredToolResultStatus.NO_DATA,
314
+ error=error_msg,
315
+ url=datadog_url,
192
316
  params=params.model_dump(),
193
317
  )
194
318
 
195
319
  except DataDogRequestError as e:
196
320
  logging.exception(e, exc_info=True)
197
321
 
322
+ # Always try to generate Datadog URL for debugging
323
+ try:
324
+ datadog_url = generate_datadog_logs_url(
325
+ self.dd_config, params, self.dd_config.storage_tiers[0]
326
+ )
327
+ except Exception:
328
+ datadog_url = None
329
+
198
330
  # Provide more specific error message for rate limiting failures
199
331
  if e.status_code == 429:
200
332
  error_msg = f"Datadog API rate limit exceeded. Failed after {MAX_RETRY_COUNT_ON_RATE_LIMIT} retry attempts."
333
+ if datadog_url:
334
+ error_msg += f"\nView in Datadog: {datadog_url}"
335
+ elif e.status_code == 400:
336
+ # Use enhanced error message for validation errors
337
+ error_msg = enhance_error_message(
338
+ e,
339
+ "/api/v2/logs/events/search",
340
+ "POST",
341
+ str(self.dd_config.site_api_url),
342
+ )
343
+
344
+ # Add query context
345
+ query = f"{self.dd_config.labels.namespace}:{params.namespace} {self.dd_config.labels.pod}:{params.pod_name}"
346
+ if params.filter:
347
+ query += f' "{params.filter}"'
348
+ error_msg += f"\n\nQuery attempted: {query}"
349
+
350
+ # Add Datadog web UI URL to error message
351
+ if datadog_url:
352
+ error_msg += f"\nView in Datadog: {datadog_url}"
201
353
  else:
202
- error_msg = f"Exception while querying Datadog: {str(e)}"
354
+ # Include full API error details and query context
355
+ error_msg = (
356
+ f"Datadog API error (status {e.status_code}): {e.response_text}"
357
+ )
358
+ query = f"{self.dd_config.labels.namespace}:{params.namespace} {self.dd_config.labels.pod}:{params.pod_name}"
359
+ if params.filter:
360
+ query += f' "{params.filter}"'
361
+ error_msg += f"\nQuery: {query}"
362
+
363
+ # Get actual time range used
364
+ (from_time, to_time) = process_timestamps_to_rfc3339(
365
+ start_timestamp=params.start_time,
366
+ end_timestamp=params.end_time,
367
+ default_time_span_seconds=DEFAULT_TIME_SPAN_SECONDS,
368
+ )
369
+ error_msg += f"\nTime range: {from_time} to {to_time}"
370
+
371
+ # Add Datadog web UI URL to error message
372
+ if datadog_url:
373
+ error_msg += f"\nView in Datadog: {datadog_url}"
203
374
 
204
375
  return StructuredToolResult(
205
- status=ToolResultStatus.ERROR,
376
+ status=StructuredToolResultStatus.ERROR,
206
377
  error=error_msg,
378
+ url=datadog_url,
207
379
  params=params.model_dump(),
208
380
  invocation=json.dumps(e.payload),
209
381
  )
@@ -213,7 +385,7 @@ class DatadogLogsToolset(BasePodLoggingToolset):
213
385
  f"Failed to query Datadog logs for params: {params}", exc_info=True
214
386
  )
215
387
  return StructuredToolResult(
216
- status=ToolResultStatus.ERROR,
388
+ status=StructuredToolResultStatus.ERROR,
217
389
  error=f"Exception while querying Datadog: {str(e)}",
218
390
  params=params.model_dump(),
219
391
  )
@@ -224,7 +396,7 @@ class DatadogLogsToolset(BasePodLoggingToolset):
224
396
  Returns (success, error_message).
225
397
  """
226
398
  try:
227
- logging.info("Performing Datadog configuration healthcheck...")
399
+ logging.debug("Performing Datadog configuration healthcheck...")
228
400
  healthcheck_params = FetchPodLogsParams(
229
401
  namespace="*",
230
402
  pod_name="*",
@@ -234,11 +406,11 @@ class DatadogLogsToolset(BasePodLoggingToolset):
234
406
 
235
407
  result = self.fetch_pod_logs(healthcheck_params)
236
408
 
237
- if result.status == ToolResultStatus.ERROR:
409
+ if result.status == StructuredToolResultStatus.ERROR:
238
410
  error_msg = result.error or "Unknown error during healthcheck"
239
411
  logging.error(f"Datadog healthcheck failed: {error_msg}")
240
412
  return False, f"Datadog healthcheck failed: {error_msg}"
241
- elif result.status == ToolResultStatus.NO_DATA:
413
+ elif result.status == StructuredToolResultStatus.NO_DATA:
242
414
  error_msg = "No logs were found in the last 48 hours using wildcards for pod and namespace. Is the configuration correct?"
243
415
  logging.error(f"Datadog healthcheck failed: {error_msg}")
244
416
  return False, f"Datadog healthcheck failed: {error_msg}"
@@ -254,7 +426,7 @@ class DatadogLogsToolset(BasePodLoggingToolset):
254
426
  if not config:
255
427
  return (
256
428
  False,
257
- TOOLSET_CONFIG_MISSING_ERROR,
429
+ "Datadog logs toolset requires configuration. Please provide: dd_api_key, dd_app_key, and site_api_url in your Holmes config. For more details, see https://holmesgpt.dev/data-sources/builtin-toolsets/datadog/",
258
430
  )
259
431
 
260
432
  try:
@@ -7,7 +7,7 @@ from holmes.core.tools import (
7
7
  StructuredToolResult,
8
8
  Tool,
9
9
  ToolParameter,
10
- ToolResultStatus,
10
+ StructuredToolResultStatus,
11
11
  Toolset,
12
12
  ToolsetTag,
13
13
  )
@@ -54,7 +54,7 @@ class ListActiveMetrics(BaseDatadogMetricsTool):
54
54
  def __init__(self, toolset: "DatadogMetricsToolset"):
55
55
  super().__init__(
56
56
  name="list_active_datadog_metrics",
57
- description=f"List active metrics from Datadog for the last {ACTIVE_METRICS_DEFAULT_LOOK_BACK_HOURS} hours. This includes metrics that have actively reported data points, including from pods no longer in the cluster.",
57
+ description=f"[datadog/metrics toolset] List active metrics from Datadog for the last {ACTIVE_METRICS_DEFAULT_LOOK_BACK_HOURS} hours. This includes metrics that have actively reported data points, including from pods no longer in the cluster.",
58
58
  parameters={
59
59
  "from_time": ToolParameter(
60
60
  description=f"Start time for listing metrics. Can be an RFC3339 formatted datetime (e.g. '2023-03-01T10:30:00Z') or a negative integer for relative seconds from now (e.g. -86400 for 24 hours ago). Defaults to {ACTIVE_METRICS_DEFAULT_LOOK_BACK_HOURS} hours ago",
@@ -80,7 +80,7 @@ class ListActiveMetrics(BaseDatadogMetricsTool):
80
80
  ) -> StructuredToolResult:
81
81
  if not self.toolset.dd_config:
82
82
  return StructuredToolResult(
83
- status=ToolResultStatus.ERROR,
83
+ status=StructuredToolResultStatus.ERROR,
84
84
  error=TOOLSET_CONFIG_MISSING_ERROR,
85
85
  params=params,
86
86
  )
@@ -121,7 +121,7 @@ class ListActiveMetrics(BaseDatadogMetricsTool):
121
121
  metrics = data.get("metrics", [])
122
122
  if not metrics:
123
123
  return StructuredToolResult(
124
- status=ToolResultStatus.ERROR,
124
+ status=StructuredToolResultStatus.ERROR,
125
125
  data="Your filter returned no metrics. Change your filter and try again",
126
126
  params=params,
127
127
  )
@@ -133,7 +133,7 @@ class ListActiveMetrics(BaseDatadogMetricsTool):
133
133
  output.append(metric)
134
134
 
135
135
  return StructuredToolResult(
136
- status=ToolResultStatus.SUCCESS,
136
+ status=StructuredToolResultStatus.SUCCESS,
137
137
  data="\n".join(output),
138
138
  params=params,
139
139
  )
@@ -149,10 +149,30 @@ class ListActiveMetrics(BaseDatadogMetricsTool):
149
149
  f"and 'timeseries_query' permissions. Error: {str(e)}"
150
150
  )
151
151
  else:
152
- error_msg = f"Exception while querying Datadog: {str(e)}"
152
+ # Include full API error details for better debugging
153
+ error_msg = (
154
+ f"Datadog API error (status {e.status_code}): {e.response_text}"
155
+ )
156
+ if params:
157
+ # ListActiveMetrics parameters: from_time, host, tag_filter
158
+ if params.get("host"):
159
+ error_msg += f"\nHost filter: {params.get('host')}"
160
+ if params.get("tag_filter"):
161
+ error_msg += f"\nTag filter: {params.get('tag_filter')}"
162
+
163
+ from_time_param = params.get("from_time")
164
+ if from_time_param:
165
+ time_desc = from_time_param
166
+ else:
167
+ time_desc = f"default (last {ACTIVE_METRICS_DEFAULT_LOOK_BACK_HOURS} hours)"
168
+ error_msg += f"\nTime range: {time_desc}"
169
+
170
+ # Note: We cannot generate a Datadog Metrics Explorer URL for ListActiveMetrics
171
+ # because the Metrics Explorer requires a specific metric query,
172
+ # while ListActiveMetrics just lists available metrics without querying any specific one
153
173
 
154
174
  return StructuredToolResult(
155
- status=ToolResultStatus.ERROR,
175
+ status=StructuredToolResultStatus.ERROR,
156
176
  error=error_msg,
157
177
  params=params,
158
178
  invocation=json.dumps({"url": url, "params": query_params})
@@ -165,7 +185,7 @@ class ListActiveMetrics(BaseDatadogMetricsTool):
165
185
  f"Failed to query Datadog metrics for params: {params}", exc_info=True
166
186
  )
167
187
  return StructuredToolResult(
168
- status=ToolResultStatus.ERROR,
188
+ status=StructuredToolResultStatus.ERROR,
169
189
  error=f"Exception while querying Datadog: {str(e)}",
170
190
  params=params,
171
191
  )
@@ -184,7 +204,7 @@ class QueryMetrics(BaseDatadogMetricsTool):
184
204
  def __init__(self, toolset: "DatadogMetricsToolset"):
185
205
  super().__init__(
186
206
  name="query_datadog_metrics",
187
- description="Query timeseries data from Datadog for a specific metric, including historical data for pods no longer in the cluster",
207
+ description="[datadog/metrics toolset] Query timeseries data from Datadog for a specific metric, including historical data for pods no longer in the cluster",
188
208
  parameters={
189
209
  "query": ToolParameter(
190
210
  description="The metric query string (e.g., 'system.cpu.user{host:myhost}')",
@@ -222,7 +242,7 @@ class QueryMetrics(BaseDatadogMetricsTool):
222
242
  ) -> StructuredToolResult:
223
243
  if not self.toolset.dd_config:
224
244
  return StructuredToolResult(
225
- status=ToolResultStatus.ERROR,
245
+ status=StructuredToolResultStatus.ERROR,
226
246
  error=TOOLSET_CONFIG_MISSING_ERROR,
227
247
  params=params,
228
248
  )
@@ -261,9 +281,29 @@ class QueryMetrics(BaseDatadogMetricsTool):
261
281
  output_type = params.get("output_type", "Plain")
262
282
 
263
283
  if not series:
284
+ # Include detailed context in error message
285
+ from_time_param = params.get("from_time")
286
+ to_time_param = params.get("to_time")
287
+
288
+ if from_time_param:
289
+ from_desc = from_time_param
290
+ else:
291
+ from_desc = (
292
+ f"default (last {DEFAULT_TIME_SPAN_SECONDS // 86400} days)"
293
+ )
294
+
295
+ to_desc = to_time_param or "now"
296
+
297
+ error_msg = (
298
+ f"The query returned no data.\n"
299
+ f"Query: {params.get('query', 'not specified')}\n"
300
+ f"Time range: {from_desc} to {to_desc}\n"
301
+ f"Please check your query syntax and ensure data exists for this time range."
302
+ )
303
+
264
304
  return StructuredToolResult(
265
- status=ToolResultStatus.NO_DATA,
266
- error="The query returned no data. Please check your query syntax and time range.",
305
+ status=StructuredToolResultStatus.NO_DATA,
306
+ error=error_msg,
267
307
  params=params,
268
308
  )
269
309
 
@@ -317,7 +357,7 @@ class QueryMetrics(BaseDatadogMetricsTool):
317
357
 
318
358
  data_str = json.dumps(response_data, indent=2)
319
359
  return StructuredToolResult(
320
- status=ToolResultStatus.SUCCESS,
360
+ status=StructuredToolResultStatus.SUCCESS,
321
361
  data=data_str,
322
362
  params=params,
323
363
  )
@@ -333,10 +373,28 @@ class QueryMetrics(BaseDatadogMetricsTool):
333
373
  f"and 'timeseries_query' permissions. Error: {str(e)}"
334
374
  )
335
375
  else:
336
- error_msg = f"Exception while querying Datadog: {str(e)}"
376
+ # Include full API error details for better debugging
377
+ error_msg = (
378
+ f"Datadog API error (status {e.status_code}): {e.response_text}"
379
+ )
380
+ if params:
381
+ error_msg += f"\nQuery: {params.get('query', 'not specified')}"
382
+
383
+ from_time_param = params.get("from_time")
384
+ to_time_param = params.get("to_time")
385
+
386
+ if from_time_param:
387
+ from_desc = from_time_param
388
+ else:
389
+ from_desc = (
390
+ f"default (last {DEFAULT_TIME_SPAN_SECONDS // 86400} days)"
391
+ )
392
+
393
+ to_desc = to_time_param or "now"
394
+ error_msg += f"\nTime range: {from_desc} to {to_desc}"
337
395
 
338
396
  return StructuredToolResult(
339
- status=ToolResultStatus.ERROR,
397
+ status=StructuredToolResultStatus.ERROR,
340
398
  error=error_msg,
341
399
  params=params,
342
400
  invocation=json.dumps({"url": url, "params": query_params})
@@ -350,7 +408,7 @@ class QueryMetrics(BaseDatadogMetricsTool):
350
408
  )
351
409
 
352
410
  return StructuredToolResult(
353
- status=ToolResultStatus.ERROR,
411
+ status=StructuredToolResultStatus.ERROR,
354
412
  error=f"Exception while querying Datadog: {str(e)}",
355
413
  params=params,
356
414
  )
@@ -364,7 +422,7 @@ class QueryMetricsMetadata(BaseDatadogMetricsTool):
364
422
  def __init__(self, toolset: "DatadogMetricsToolset"):
365
423
  super().__init__(
366
424
  name="get_datadog_metric_metadata",
367
- description="Get metadata about one or more metrics including their type, description, unit, and other properties",
425
+ description="[datadog/metrics toolset] Get metadata about one or more metrics including their type, description, unit, and other properties",
368
426
  parameters={
369
427
  "metric_names": ToolParameter(
370
428
  description="Comma-separated list of metric names to get metadata for (e.g., 'system.cpu.user, system.mem.used')",
@@ -380,7 +438,7 @@ class QueryMetricsMetadata(BaseDatadogMetricsTool):
380
438
  ) -> StructuredToolResult:
381
439
  if not self.toolset.dd_config:
382
440
  return StructuredToolResult(
383
- status=ToolResultStatus.ERROR,
441
+ status=StructuredToolResultStatus.ERROR,
384
442
  error=TOOLSET_CONFIG_MISSING_ERROR,
385
443
  params=params,
386
444
  )
@@ -396,7 +454,7 @@ class QueryMetricsMetadata(BaseDatadogMetricsTool):
396
454
 
397
455
  if not metric_names:
398
456
  return StructuredToolResult(
399
- status=ToolResultStatus.ERROR,
457
+ status=StructuredToolResultStatus.ERROR,
400
458
  error="metric_names cannot be empty",
401
459
  params=params,
402
460
  )
@@ -442,14 +500,14 @@ class QueryMetricsMetadata(BaseDatadogMetricsTool):
442
500
 
443
501
  if not results and errors:
444
502
  return StructuredToolResult(
445
- status=ToolResultStatus.ERROR,
503
+ status=StructuredToolResultStatus.ERROR,
446
504
  error="Failed to retrieve metadata for all metrics",
447
505
  data=json.dumps(response_data, indent=2),
448
506
  params=params,
449
507
  )
450
508
 
451
509
  return StructuredToolResult(
452
- status=ToolResultStatus.SUCCESS,
510
+ status=StructuredToolResultStatus.SUCCESS,
453
511
  data=json.dumps(response_data, indent=2),
454
512
  params=params,
455
513
  )
@@ -461,7 +519,7 @@ class QueryMetricsMetadata(BaseDatadogMetricsTool):
461
519
  )
462
520
 
463
521
  return StructuredToolResult(
464
- status=ToolResultStatus.ERROR,
522
+ status=StructuredToolResultStatus.ERROR,
465
523
  error=f"Exception while querying Datadog: {str(e)}",
466
524
  params=params,
467
525
  )
@@ -480,7 +538,7 @@ class ListMetricTags(BaseDatadogMetricsTool):
480
538
  def __init__(self, toolset: "DatadogMetricsToolset"):
481
539
  super().__init__(
482
540
  name="list_datadog_metric_tags",
483
- description="List all available tags and aggregations for a specific metric. This helps in building queries by showing what dimensions are available for filtering.",
541
+ description="[datadog/metrics toolset] List all available tags and aggregations for a specific metric. This helps in building queries by showing what dimensions are available for filtering.",
484
542
  parameters={
485
543
  "metric_name": ToolParameter(
486
544
  description="The name of the metric to get tags for (e.g., 'system.cpu.user', 'container.memory.usage')",
@@ -496,7 +554,7 @@ class ListMetricTags(BaseDatadogMetricsTool):
496
554
  ) -> StructuredToolResult:
497
555
  if not self.toolset.dd_config:
498
556
  return StructuredToolResult(
499
- status=ToolResultStatus.ERROR,
557
+ status=StructuredToolResultStatus.ERROR,
500
558
  error=TOOLSET_CONFIG_MISSING_ERROR,
501
559
  params=params,
502
560
  )
@@ -519,7 +577,7 @@ class ListMetricTags(BaseDatadogMetricsTool):
519
577
  )
520
578
 
521
579
  return StructuredToolResult(
522
- status=ToolResultStatus.SUCCESS,
580
+ status=StructuredToolResultStatus.SUCCESS,
523
581
  data=data,
524
582
  params=params,
525
583
  )
@@ -537,10 +595,17 @@ class ListMetricTags(BaseDatadogMetricsTool):
537
595
  f"permissions. Error: {str(e)}"
538
596
  )
539
597
  else:
540
- error_msg = f"Exception while querying Datadog: {str(e)}"
598
+ # Include full API error details for better debugging
599
+ error_msg = (
600
+ f"Datadog API error (status {e.status_code}): {e.response_text}"
601
+ )
602
+ if params:
603
+ error_msg += (
604
+ f"\nMetric name: {params.get('metric_name', 'not specified')}"
605
+ )
541
606
 
542
607
  return StructuredToolResult(
543
- status=ToolResultStatus.ERROR,
608
+ status=StructuredToolResultStatus.ERROR,
544
609
  error=error_msg,
545
610
  params=params,
546
611
  invocation=json.dumps({"url": url, "params": query_params})
@@ -554,7 +619,7 @@ class ListMetricTags(BaseDatadogMetricsTool):
554
619
  exc_info=True,
555
620
  )
556
621
  return StructuredToolResult(
557
- status=ToolResultStatus.ERROR,
622
+ status=StructuredToolResultStatus.ERROR,
558
623
  error=f"Exception while querying Datadog: {str(e)}",
559
624
  params=params,
560
625
  )
@@ -586,7 +651,7 @@ class DatadogMetricsToolset(Toolset):
586
651
 
587
652
  def _perform_healthcheck(self, dd_config: DatadogMetricsConfig) -> Tuple[bool, str]:
588
653
  try:
589
- logging.info("Performing Datadog metrics configuration healthcheck...")
654
+ logging.debug("Performing Datadog metrics configuration healthcheck...")
590
655
 
591
656
  url = f"{dd_config.site_api_url}/api/v1/validate"
592
657
  headers = get_headers(dd_config)
@@ -615,7 +680,7 @@ class DatadogMetricsToolset(Toolset):
615
680
  if not config:
616
681
  return (
617
682
  False,
618
- TOOLSET_CONFIG_MISSING_ERROR,
683
+ "Datadog metrics toolset requires configuration. Please provide: dd_api_key, dd_app_key, and site_api_url in your Holmes config. For more details, see https://holmesgpt.dev/data-sources/builtin-toolsets/datadog/",
619
684
  )
620
685
 
621
686
  try: