holmesgpt 0.14.1a0__py3-none-any.whl → 0.14.3a0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of holmesgpt might be problematic. Click here for more details.

Files changed (73) hide show
  1. holmes/__init__.py +1 -1
  2. holmes/clients/robusta_client.py +5 -2
  3. holmes/common/env_vars.py +8 -2
  4. holmes/config.py +4 -7
  5. holmes/core/conversations.py +12 -2
  6. holmes/core/feedback.py +191 -0
  7. holmes/core/llm.py +52 -10
  8. holmes/core/models.py +101 -1
  9. holmes/core/supabase_dal.py +23 -9
  10. holmes/core/tool_calling_llm.py +206 -16
  11. holmes/core/tools.py +20 -7
  12. holmes/core/tools_utils/token_counting.py +13 -0
  13. holmes/core/tools_utils/tool_context_window_limiter.py +45 -23
  14. holmes/core/tools_utils/tool_executor.py +11 -6
  15. holmes/core/toolset_manager.py +7 -3
  16. holmes/core/truncation/dal_truncation_utils.py +23 -0
  17. holmes/interactive.py +146 -14
  18. holmes/plugins/prompts/_fetch_logs.jinja2 +13 -1
  19. holmes/plugins/runbooks/__init__.py +6 -1
  20. holmes/plugins/toolsets/__init__.py +11 -4
  21. holmes/plugins/toolsets/atlas_mongodb/mongodb_atlas.py +9 -20
  22. holmes/plugins/toolsets/azure_sql/tools/analyze_connection_failures.py +2 -3
  23. holmes/plugins/toolsets/azure_sql/tools/analyze_database_connections.py +2 -3
  24. holmes/plugins/toolsets/azure_sql/tools/analyze_database_health_status.py +6 -4
  25. holmes/plugins/toolsets/azure_sql/tools/analyze_database_performance.py +6 -4
  26. holmes/plugins/toolsets/azure_sql/tools/analyze_database_storage.py +2 -3
  27. holmes/plugins/toolsets/azure_sql/tools/get_active_alerts.py +6 -4
  28. holmes/plugins/toolsets/azure_sql/tools/get_slow_queries.py +2 -3
  29. holmes/plugins/toolsets/azure_sql/tools/get_top_cpu_queries.py +2 -3
  30. holmes/plugins/toolsets/azure_sql/tools/get_top_data_io_queries.py +2 -3
  31. holmes/plugins/toolsets/azure_sql/tools/get_top_log_io_queries.py +2 -3
  32. holmes/plugins/toolsets/bash/bash_toolset.py +4 -7
  33. holmes/plugins/toolsets/cilium.yaml +284 -0
  34. holmes/plugins/toolsets/datadog/datadog_api.py +490 -24
  35. holmes/plugins/toolsets/datadog/datadog_logs_instructions.jinja2 +21 -10
  36. holmes/plugins/toolsets/datadog/toolset_datadog_general.py +333 -199
  37. holmes/plugins/toolsets/datadog/toolset_datadog_logs.py +181 -9
  38. holmes/plugins/toolsets/datadog/toolset_datadog_metrics.py +80 -22
  39. holmes/plugins/toolsets/datadog/toolset_datadog_rds.py +5 -8
  40. holmes/plugins/toolsets/datadog/toolset_datadog_traces.py +7 -12
  41. holmes/plugins/toolsets/git.py +14 -12
  42. holmes/plugins/toolsets/grafana/grafana_tempo_api.py +23 -42
  43. holmes/plugins/toolsets/grafana/toolset_grafana.py +2 -3
  44. holmes/plugins/toolsets/grafana/toolset_grafana_loki.py +2 -1
  45. holmes/plugins/toolsets/grafana/toolset_grafana_tempo.py +21 -39
  46. holmes/plugins/toolsets/internet/internet.py +2 -3
  47. holmes/plugins/toolsets/internet/notion.py +2 -3
  48. holmes/plugins/toolsets/investigator/core_investigation.py +7 -9
  49. holmes/plugins/toolsets/kafka.py +7 -18
  50. holmes/plugins/toolsets/logging_utils/logging_api.py +80 -4
  51. holmes/plugins/toolsets/mcp/toolset_mcp.py +2 -3
  52. holmes/plugins/toolsets/newrelic/__init__.py +0 -0
  53. holmes/plugins/toolsets/newrelic/new_relic_api.py +125 -0
  54. holmes/plugins/toolsets/newrelic/newrelic.jinja2 +41 -0
  55. holmes/plugins/toolsets/newrelic/newrelic.py +211 -0
  56. holmes/plugins/toolsets/opensearch/opensearch.py +5 -12
  57. holmes/plugins/toolsets/opensearch/opensearch_traces.py +3 -6
  58. holmes/plugins/toolsets/prometheus/prometheus.py +808 -419
  59. holmes/plugins/toolsets/prometheus/prometheus_instructions.jinja2 +27 -11
  60. holmes/plugins/toolsets/rabbitmq/toolset_rabbitmq.py +3 -6
  61. holmes/plugins/toolsets/robusta/robusta.py +4 -9
  62. holmes/plugins/toolsets/runbook/runbook_fetcher.py +93 -13
  63. holmes/plugins/toolsets/servicenow/servicenow.py +5 -10
  64. holmes/utils/sentry_helper.py +1 -1
  65. holmes/utils/stream.py +22 -7
  66. holmes/version.py +34 -14
  67. {holmesgpt-0.14.1a0.dist-info → holmesgpt-0.14.3a0.dist-info}/METADATA +7 -9
  68. {holmesgpt-0.14.1a0.dist-info → holmesgpt-0.14.3a0.dist-info}/RECORD +71 -65
  69. holmes/core/tools_utils/data_types.py +0 -81
  70. holmes/plugins/toolsets/newrelic.py +0 -231
  71. {holmesgpt-0.14.1a0.dist-info → holmesgpt-0.14.3a0.dist-info}/LICENSE.txt +0 -0
  72. {holmesgpt-0.14.1a0.dist-info → holmesgpt-0.14.3a0.dist-info}/WHEEL +0 -0
  73. {holmesgpt-0.14.1a0.dist-info → holmesgpt-0.14.3a0.dist-info}/entry_points.txt +0 -0
@@ -3,6 +3,7 @@ from enum import Enum
3
3
  import json
4
4
  import logging
5
5
  from typing import Any, Optional, Dict, Tuple, Set
6
+ from urllib.parse import urlencode
6
7
  from holmes.core.tools import (
7
8
  CallablePrerequisite,
8
9
  ToolsetTag,
@@ -16,6 +17,8 @@ from holmes.plugins.toolsets.datadog.datadog_api import (
16
17
  execute_paginated_datadog_http_request,
17
18
  get_headers,
18
19
  MAX_RETRY_COUNT_ON_RATE_LIMIT,
20
+ enhance_error_message,
21
+ preprocess_time_fields,
19
22
  )
20
23
  from holmes.plugins.toolsets.logging_utils.logging_api import (
21
24
  DEFAULT_TIME_SPAN_SECONDS,
@@ -99,23 +102,28 @@ def fetch_paginated_logs(
99
102
  "page": {"limit": calculate_page_size(params, dd_config, [])},
100
103
  }
101
104
 
105
+ # Preprocess time fields to ensure correct format
106
+ processed_payload = preprocess_time_fields(payload, "/api/v2/logs/events/search")
107
+
102
108
  logs, cursor = execute_paginated_datadog_http_request(
103
109
  url=url,
104
110
  headers=headers,
105
- payload_or_params=payload,
111
+ payload_or_params=processed_payload,
106
112
  timeout=dd_config.request_timeout,
107
113
  )
108
114
 
109
115
  while cursor and len(logs) < limit:
110
- payload["page"]["cursor"] = cursor
116
+ processed_payload["page"]["cursor"] = cursor
117
+ processed_payload["page"]["limit"] = calculate_page_size(
118
+ params, dd_config, logs
119
+ )
111
120
  new_logs, cursor = execute_paginated_datadog_http_request(
112
121
  url=url,
113
122
  headers=headers,
114
- payload_or_params=payload,
123
+ payload_or_params=processed_payload,
115
124
  timeout=dd_config.request_timeout,
116
125
  )
117
126
  logs += new_logs
118
- payload["page"]["limit"] = calculate_page_size(params, dd_config, logs)
119
127
 
120
128
  # logs are fetched descending order. Unified logging API follows the pattern of kubectl logs where oldest logs are first
121
129
  logs.reverse()
@@ -129,14 +137,73 @@ def format_logs(raw_logs: list[dict]) -> str:
129
137
  logs = []
130
138
 
131
139
  for raw_log_item in raw_logs:
140
+ # Extract timestamp - Datadog returns it in ISO format
141
+ timestamp = raw_log_item.get("attributes", {}).get("timestamp", "")
142
+ if not timestamp:
143
+ # Fallback to @timestamp if timestamp is not in attributes
144
+ timestamp = raw_log_item.get("attributes", {}).get("@timestamp", "")
145
+
146
+ # Extract message
132
147
  message = raw_log_item.get("attributes", {}).get(
133
148
  "message", json.dumps(raw_log_item)
134
149
  )
135
- logs.append(message)
150
+
151
+ # Format as: [timestamp] message
152
+ if timestamp:
153
+ logs.append(f"[{timestamp}] {message}")
154
+ else:
155
+ logs.append(message)
136
156
 
137
157
  return "\n".join(logs)
138
158
 
139
159
 
160
+ def generate_datadog_logs_url(
161
+ dd_config: DatadogLogsConfig,
162
+ params: FetchPodLogsParams,
163
+ storage_tier: DataDogStorageTier,
164
+ ) -> str:
165
+ """Generate a Datadog web UI URL for the logs query."""
166
+ from holmes.plugins.toolsets.utils import process_timestamps_to_int
167
+ from holmes.plugins.toolsets.datadog.datadog_api import convert_api_url_to_app_url
168
+
169
+ # Convert API URL to app URL using the shared helper
170
+ base_url = convert_api_url_to_app_url(dd_config.site_api_url)
171
+
172
+ # Build the query string
173
+ query = f"{dd_config.labels.namespace}:{params.namespace}"
174
+ query += f" {dd_config.labels.pod}:{params.pod_name}"
175
+ if params.filter:
176
+ filter = params.filter.replace('"', '\\"')
177
+ query += f' "{filter}"'
178
+
179
+ # Process timestamps - get Unix timestamps in seconds
180
+ (from_time_seconds, to_time_seconds) = process_timestamps_to_int(
181
+ start=params.start_time,
182
+ end=params.end_time,
183
+ default_time_span_seconds=DEFAULT_TIME_SPAN_SECONDS,
184
+ )
185
+
186
+ # Convert to milliseconds for Datadog web UI
187
+ from_time_ms = from_time_seconds * 1000
188
+ to_time_ms = to_time_seconds * 1000
189
+
190
+ # Build URL parameters matching Datadog's web UI format
191
+ url_params = {
192
+ "query": query,
193
+ "from_ts": str(from_time_ms),
194
+ "to_ts": str(to_time_ms),
195
+ "live": "true",
196
+ "storage": storage_tier.value,
197
+ }
198
+
199
+ # Add indexes if not default
200
+ if dd_config.indexes != ["*"]:
201
+ url_params["index"] = ",".join(dd_config.indexes)
202
+
203
+ # Construct the full URL
204
+ return f"{base_url}/logs?{urlencode(url_params)}"
205
+
206
+
140
207
  class DatadogLogsToolset(BasePodLoggingToolset):
141
208
  dd_config: Optional[DatadogLogsConfig] = None
142
209
 
@@ -181,29 +248,134 @@ class DatadogLogsToolset(BasePodLoggingToolset):
181
248
 
182
249
  if raw_logs:
183
250
  logs_str = format_logs(raw_logs)
251
+ # Generate Datadog web UI URL
252
+ datadog_url = generate_datadog_logs_url(
253
+ self.dd_config, params, storage_tier
254
+ )
255
+ logs_with_link = f"{logs_str}\n\nView in Datadog: {datadog_url}"
184
256
  return StructuredToolResult(
185
257
  status=StructuredToolResultStatus.SUCCESS,
186
- data=logs_str,
258
+ data=logs_with_link,
259
+ url=datadog_url,
187
260
  params=params.model_dump(),
188
261
  )
189
262
 
263
+ # Include detailed diagnostic context
264
+ query = f"{self.dd_config.labels.namespace}:{params.namespace} {self.dd_config.labels.pod}:{params.pod_name}"
265
+ if params.filter:
266
+ query += f' "{params.filter}"'
267
+
268
+ # Get actual time range used
269
+ (from_time, to_time) = process_timestamps_to_rfc3339(
270
+ start_timestamp=params.start_time,
271
+ end_timestamp=params.end_time,
272
+ default_time_span_seconds=DEFAULT_TIME_SPAN_SECONDS,
273
+ )
274
+
275
+ # Generate Datadog web UI URL for the last storage tier checked
276
+ datadog_url = generate_datadog_logs_url(
277
+ self.dd_config, params, self.dd_config.storage_tiers[-1]
278
+ )
279
+
280
+ # Build diagnostic information
281
+ diagnostics: Dict[str, Any] = {
282
+ "query_executed": query,
283
+ "time_range": f"{from_time} to {to_time}",
284
+ "indexes_searched": self.dd_config.indexes,
285
+ "storage_tiers_checked": [
286
+ tier.value for tier in self.dd_config.storage_tiers
287
+ ],
288
+ "field_mappings": {
289
+ "namespace_field": self.dd_config.labels.namespace,
290
+ "pod_field": self.dd_config.labels.pod,
291
+ },
292
+ "limit": params.limit or self.dd_config.default_limit,
293
+ "datadog_url": datadog_url,
294
+ }
295
+
296
+ # Format diagnostic info as structured text
297
+ error_msg = (
298
+ f"No logs found.\n\n"
299
+ f"Diagnostic Information:\n"
300
+ f"----------------------\n"
301
+ f"Query executed: {diagnostics['query_executed']}\n"
302
+ f"Time range: {diagnostics['time_range']}\n"
303
+ f"Indexes searched: {diagnostics['indexes_searched']}\n"
304
+ f"Storage tiers checked: {', '.join(str(tier) for tier in diagnostics.get('storage_tiers_checked', []))}\n"
305
+ f"Field mappings:\n"
306
+ f" - Namespace field: {diagnostics.get('field_mappings', {}).get('namespace_field', 'N/A')}\n"
307
+ f" - Pod field: {diagnostics.get('field_mappings', {}).get('pod_field', 'N/A')}\n"
308
+ f"Limit: {diagnostics['limit']}\n\n"
309
+ f"View in Datadog: {diagnostics['datadog_url']}"
310
+ )
311
+
190
312
  return StructuredToolResult(
191
313
  status=StructuredToolResultStatus.NO_DATA,
314
+ error=error_msg,
315
+ url=datadog_url,
192
316
  params=params.model_dump(),
193
317
  )
194
318
 
195
319
  except DataDogRequestError as e:
196
320
  logging.exception(e, exc_info=True)
197
321
 
322
+ # Always try to generate Datadog URL for debugging
323
+ try:
324
+ datadog_url = generate_datadog_logs_url(
325
+ self.dd_config, params, self.dd_config.storage_tiers[0]
326
+ )
327
+ except Exception:
328
+ datadog_url = None
329
+
198
330
  # Provide more specific error message for rate limiting failures
199
331
  if e.status_code == 429:
200
332
  error_msg = f"Datadog API rate limit exceeded. Failed after {MAX_RETRY_COUNT_ON_RATE_LIMIT} retry attempts."
333
+ if datadog_url:
334
+ error_msg += f"\nView in Datadog: {datadog_url}"
335
+ elif e.status_code == 400:
336
+ # Use enhanced error message for validation errors
337
+ error_msg = enhance_error_message(
338
+ e,
339
+ "/api/v2/logs/events/search",
340
+ "POST",
341
+ str(self.dd_config.site_api_url),
342
+ )
343
+
344
+ # Add query context
345
+ query = f"{self.dd_config.labels.namespace}:{params.namespace} {self.dd_config.labels.pod}:{params.pod_name}"
346
+ if params.filter:
347
+ query += f' "{params.filter}"'
348
+ error_msg += f"\n\nQuery attempted: {query}"
349
+
350
+ # Add Datadog web UI URL to error message
351
+ if datadog_url:
352
+ error_msg += f"\nView in Datadog: {datadog_url}"
201
353
  else:
202
- error_msg = f"Exception while querying Datadog: {str(e)}"
354
+ # Include full API error details and query context
355
+ error_msg = (
356
+ f"Datadog API error (status {e.status_code}): {e.response_text}"
357
+ )
358
+ query = f"{self.dd_config.labels.namespace}:{params.namespace} {self.dd_config.labels.pod}:{params.pod_name}"
359
+ if params.filter:
360
+ query += f' "{params.filter}"'
361
+ error_msg += f"\nQuery: {query}"
362
+
363
+ # Get actual time range used
364
+ (from_time, to_time) = process_timestamps_to_rfc3339(
365
+ start_timestamp=params.start_time,
366
+ end_timestamp=params.end_time,
367
+ default_time_span_seconds=DEFAULT_TIME_SPAN_SECONDS,
368
+ )
369
+ error_msg += f"\nTime range: {from_time} to {to_time}"
370
+
371
+ # Add Datadog web UI URL to error message
372
+ if datadog_url:
373
+ error_msg += f"\nView in Datadog: {datadog_url}"
203
374
 
204
375
  return StructuredToolResult(
205
376
  status=StructuredToolResultStatus.ERROR,
206
377
  error=error_msg,
378
+ url=datadog_url,
207
379
  params=params.model_dump(),
208
380
  invocation=json.dumps(e.payload),
209
381
  )
@@ -224,7 +396,7 @@ class DatadogLogsToolset(BasePodLoggingToolset):
224
396
  Returns (success, error_message).
225
397
  """
226
398
  try:
227
- logging.info("Performing Datadog configuration healthcheck...")
399
+ logging.debug("Performing Datadog configuration healthcheck...")
228
400
  healthcheck_params = FetchPodLogsParams(
229
401
  namespace="*",
230
402
  pod_name="*",
@@ -254,7 +426,7 @@ class DatadogLogsToolset(BasePodLoggingToolset):
254
426
  if not config:
255
427
  return (
256
428
  False,
257
- TOOLSET_CONFIG_MISSING_ERROR,
429
+ "Missing config for dd_api_key, dd_app_key, or site_api_url. For details: https://holmesgpt.dev/data-sources/builtin-toolsets/datadog/",
258
430
  )
259
431
 
260
432
  try:
@@ -6,6 +6,7 @@ from holmes.core.tools import (
6
6
  CallablePrerequisite,
7
7
  StructuredToolResult,
8
8
  Tool,
9
+ ToolInvokeContext,
9
10
  ToolParameter,
10
11
  StructuredToolResultStatus,
11
12
  Toolset,
@@ -54,7 +55,7 @@ class ListActiveMetrics(BaseDatadogMetricsTool):
54
55
  def __init__(self, toolset: "DatadogMetricsToolset"):
55
56
  super().__init__(
56
57
  name="list_active_datadog_metrics",
57
- description=f"List active metrics from Datadog for the last {ACTIVE_METRICS_DEFAULT_LOOK_BACK_HOURS} hours. This includes metrics that have actively reported data points, including from pods no longer in the cluster.",
58
+ description=f"[datadog/metrics toolset] List active metrics from Datadog for the last {ACTIVE_METRICS_DEFAULT_LOOK_BACK_HOURS} hours. This includes metrics that have actively reported data points, including from pods no longer in the cluster.",
58
59
  parameters={
59
60
  "from_time": ToolParameter(
60
61
  description=f"Start time for listing metrics. Can be an RFC3339 formatted datetime (e.g. '2023-03-01T10:30:00Z') or a negative integer for relative seconds from now (e.g. -86400 for 24 hours ago). Defaults to {ACTIVE_METRICS_DEFAULT_LOOK_BACK_HOURS} hours ago",
@@ -75,9 +76,7 @@ class ListActiveMetrics(BaseDatadogMetricsTool):
75
76
  toolset=toolset,
76
77
  )
77
78
 
78
- def _invoke(
79
- self, params: dict, user_approved: bool = False
80
- ) -> StructuredToolResult:
79
+ def _invoke(self, params: dict, context: ToolInvokeContext) -> StructuredToolResult:
81
80
  if not self.toolset.dd_config:
82
81
  return StructuredToolResult(
83
82
  status=StructuredToolResultStatus.ERROR,
@@ -149,7 +148,27 @@ class ListActiveMetrics(BaseDatadogMetricsTool):
149
148
  f"and 'timeseries_query' permissions. Error: {str(e)}"
150
149
  )
151
150
  else:
152
- error_msg = f"Exception while querying Datadog: {str(e)}"
151
+ # Include full API error details for better debugging
152
+ error_msg = (
153
+ f"Datadog API error (status {e.status_code}): {e.response_text}"
154
+ )
155
+ if params:
156
+ # ListActiveMetrics parameters: from_time, host, tag_filter
157
+ if params.get("host"):
158
+ error_msg += f"\nHost filter: {params.get('host')}"
159
+ if params.get("tag_filter"):
160
+ error_msg += f"\nTag filter: {params.get('tag_filter')}"
161
+
162
+ from_time_param = params.get("from_time")
163
+ if from_time_param:
164
+ time_desc = from_time_param
165
+ else:
166
+ time_desc = f"default (last {ACTIVE_METRICS_DEFAULT_LOOK_BACK_HOURS} hours)"
167
+ error_msg += f"\nTime range: {time_desc}"
168
+
169
+ # Note: We cannot generate a Datadog Metrics Explorer URL for ListActiveMetrics
170
+ # because the Metrics Explorer requires a specific metric query,
171
+ # while ListActiveMetrics just lists available metrics without querying any specific one
153
172
 
154
173
  return StructuredToolResult(
155
174
  status=StructuredToolResultStatus.ERROR,
@@ -184,7 +203,7 @@ class QueryMetrics(BaseDatadogMetricsTool):
184
203
  def __init__(self, toolset: "DatadogMetricsToolset"):
185
204
  super().__init__(
186
205
  name="query_datadog_metrics",
187
- description="Query timeseries data from Datadog for a specific metric, including historical data for pods no longer in the cluster",
206
+ description="[datadog/metrics toolset] Query timeseries data from Datadog for a specific metric, including historical data for pods no longer in the cluster",
188
207
  parameters={
189
208
  "query": ToolParameter(
190
209
  description="The metric query string (e.g., 'system.cpu.user{host:myhost}')",
@@ -217,9 +236,7 @@ class QueryMetrics(BaseDatadogMetricsTool):
217
236
  toolset=toolset,
218
237
  )
219
238
 
220
- def _invoke(
221
- self, params: dict, user_approved: bool = False
222
- ) -> StructuredToolResult:
239
+ def _invoke(self, params: dict, context: ToolInvokeContext) -> StructuredToolResult:
223
240
  if not self.toolset.dd_config:
224
241
  return StructuredToolResult(
225
242
  status=StructuredToolResultStatus.ERROR,
@@ -261,9 +278,29 @@ class QueryMetrics(BaseDatadogMetricsTool):
261
278
  output_type = params.get("output_type", "Plain")
262
279
 
263
280
  if not series:
281
+ # Include detailed context in error message
282
+ from_time_param = params.get("from_time")
283
+ to_time_param = params.get("to_time")
284
+
285
+ if from_time_param:
286
+ from_desc = from_time_param
287
+ else:
288
+ from_desc = (
289
+ f"default (last {DEFAULT_TIME_SPAN_SECONDS // 86400} days)"
290
+ )
291
+
292
+ to_desc = to_time_param or "now"
293
+
294
+ error_msg = (
295
+ f"The query returned no data.\n"
296
+ f"Query: {params.get('query', 'not specified')}\n"
297
+ f"Time range: {from_desc} to {to_desc}\n"
298
+ f"Please check your query syntax and ensure data exists for this time range."
299
+ )
300
+
264
301
  return StructuredToolResult(
265
302
  status=StructuredToolResultStatus.NO_DATA,
266
- error="The query returned no data. Please check your query syntax and time range.",
303
+ error=error_msg,
267
304
  params=params,
268
305
  )
269
306
 
@@ -333,7 +370,25 @@ class QueryMetrics(BaseDatadogMetricsTool):
333
370
  f"and 'timeseries_query' permissions. Error: {str(e)}"
334
371
  )
335
372
  else:
336
- error_msg = f"Exception while querying Datadog: {str(e)}"
373
+ # Include full API error details for better debugging
374
+ error_msg = (
375
+ f"Datadog API error (status {e.status_code}): {e.response_text}"
376
+ )
377
+ if params:
378
+ error_msg += f"\nQuery: {params.get('query', 'not specified')}"
379
+
380
+ from_time_param = params.get("from_time")
381
+ to_time_param = params.get("to_time")
382
+
383
+ if from_time_param:
384
+ from_desc = from_time_param
385
+ else:
386
+ from_desc = (
387
+ f"default (last {DEFAULT_TIME_SPAN_SECONDS // 86400} days)"
388
+ )
389
+
390
+ to_desc = to_time_param or "now"
391
+ error_msg += f"\nTime range: {from_desc} to {to_desc}"
337
392
 
338
393
  return StructuredToolResult(
339
394
  status=StructuredToolResultStatus.ERROR,
@@ -364,7 +419,7 @@ class QueryMetricsMetadata(BaseDatadogMetricsTool):
364
419
  def __init__(self, toolset: "DatadogMetricsToolset"):
365
420
  super().__init__(
366
421
  name="get_datadog_metric_metadata",
367
- description="Get metadata about one or more metrics including their type, description, unit, and other properties",
422
+ description="[datadog/metrics toolset] Get metadata about one or more metrics including their type, description, unit, and other properties",
368
423
  parameters={
369
424
  "metric_names": ToolParameter(
370
425
  description="Comma-separated list of metric names to get metadata for (e.g., 'system.cpu.user, system.mem.used')",
@@ -375,9 +430,7 @@ class QueryMetricsMetadata(BaseDatadogMetricsTool):
375
430
  toolset=toolset,
376
431
  )
377
432
 
378
- def _invoke(
379
- self, params: dict, user_approved: bool = False
380
- ) -> StructuredToolResult:
433
+ def _invoke(self, params: dict, context: ToolInvokeContext) -> StructuredToolResult:
381
434
  if not self.toolset.dd_config:
382
435
  return StructuredToolResult(
383
436
  status=StructuredToolResultStatus.ERROR,
@@ -480,7 +533,7 @@ class ListMetricTags(BaseDatadogMetricsTool):
480
533
  def __init__(self, toolset: "DatadogMetricsToolset"):
481
534
  super().__init__(
482
535
  name="list_datadog_metric_tags",
483
- description="List all available tags and aggregations for a specific metric. This helps in building queries by showing what dimensions are available for filtering.",
536
+ description="[datadog/metrics toolset] List all available tags and aggregations for a specific metric. This helps in building queries by showing what dimensions are available for filtering.",
484
537
  parameters={
485
538
  "metric_name": ToolParameter(
486
539
  description="The name of the metric to get tags for (e.g., 'system.cpu.user', 'container.memory.usage')",
@@ -491,9 +544,7 @@ class ListMetricTags(BaseDatadogMetricsTool):
491
544
  toolset=toolset,
492
545
  )
493
546
 
494
- def _invoke(
495
- self, params: dict, user_approved: bool = False
496
- ) -> StructuredToolResult:
547
+ def _invoke(self, params: dict, context: ToolInvokeContext) -> StructuredToolResult:
497
548
  if not self.toolset.dd_config:
498
549
  return StructuredToolResult(
499
550
  status=StructuredToolResultStatus.ERROR,
@@ -537,7 +588,14 @@ class ListMetricTags(BaseDatadogMetricsTool):
537
588
  f"permissions. Error: {str(e)}"
538
589
  )
539
590
  else:
540
- error_msg = f"Exception while querying Datadog: {str(e)}"
591
+ # Include full API error details for better debugging
592
+ error_msg = (
593
+ f"Datadog API error (status {e.status_code}): {e.response_text}"
594
+ )
595
+ if params:
596
+ error_msg += (
597
+ f"\nMetric name: {params.get('metric_name', 'not specified')}"
598
+ )
541
599
 
542
600
  return StructuredToolResult(
543
601
  status=StructuredToolResultStatus.ERROR,
@@ -586,7 +644,7 @@ class DatadogMetricsToolset(Toolset):
586
644
 
587
645
  def _perform_healthcheck(self, dd_config: DatadogMetricsConfig) -> Tuple[bool, str]:
588
646
  try:
589
- logging.info("Performing Datadog metrics configuration healthcheck...")
647
+ logging.debug("Performing Datadog metrics configuration healthcheck...")
590
648
 
591
649
  url = f"{dd_config.site_api_url}/api/v1/validate"
592
650
  headers = get_headers(dd_config)
@@ -615,7 +673,7 @@ class DatadogMetricsToolset(Toolset):
615
673
  if not config:
616
674
  return (
617
675
  False,
618
- TOOLSET_CONFIG_MISSING_ERROR,
676
+ "Missing config for dd_api_key, dd_app_key, or site_api_url. For details: https://holmesgpt.dev/data-sources/builtin-toolsets/datadog/",
619
677
  )
620
678
 
621
679
  try:
@@ -9,6 +9,7 @@ from holmes.core.tools import (
9
9
  CallablePrerequisite,
10
10
  StructuredToolResult,
11
11
  Tool,
12
+ ToolInvokeContext,
12
13
  ToolParameter,
13
14
  StructuredToolResultStatus,
14
15
  Toolset,
@@ -69,7 +70,7 @@ class GenerateRDSPerformanceReport(BaseDatadogRDSTool):
69
70
  def __init__(self, toolset: "DatadogRDSToolset"):
70
71
  super().__init__(
71
72
  name="datadog_rds_performance_report",
72
- description="Generate a comprehensive performance report for a specific RDS instance including latency, resource utilization, and storage metrics with analysis",
73
+ description="[datadog/rds toolset] Generate a comprehensive performance report for a specific RDS instance including latency, resource utilization, and storage metrics with analysis",
73
74
  parameters={
74
75
  "db_instance_identifier": ToolParameter(
75
76
  description="The RDS database instance identifier",
@@ -92,9 +93,7 @@ class GenerateRDSPerformanceReport(BaseDatadogRDSTool):
92
93
  toolset=toolset,
93
94
  )
94
95
 
95
- def _invoke(
96
- self, params: dict, user_approved: bool = False
97
- ) -> StructuredToolResult:
96
+ def _invoke(self, params: dict, context: ToolInvokeContext) -> StructuredToolResult:
98
97
  if not self.toolset.dd_config:
99
98
  return StructuredToolResult(
100
99
  status=StructuredToolResultStatus.ERROR,
@@ -364,7 +363,7 @@ class GetTopWorstPerformingRDSInstances(BaseDatadogRDSTool):
364
363
  def __init__(self, toolset: "DatadogRDSToolset"):
365
364
  super().__init__(
366
365
  name="datadog_rds_top_worst_performing",
367
- description="Get a summarized report of the top worst performing RDS instances based on latency, CPU utilization, and error rates",
366
+ description="[datadog/rds toolset] Get a summarized report of the top worst performing RDS instances based on latency, CPU utilization, and error rates",
368
367
  parameters={
369
368
  "top_n": ToolParameter(
370
369
  description=f"Number of worst performing instances to return (default: {DEFAULT_TOP_INSTANCES})",
@@ -392,9 +391,7 @@ class GetTopWorstPerformingRDSInstances(BaseDatadogRDSTool):
392
391
  toolset=toolset,
393
392
  )
394
393
 
395
- def _invoke(
396
- self, params: dict, user_approved: bool = False
397
- ) -> StructuredToolResult:
394
+ def _invoke(self, params: dict, context: ToolInvokeContext) -> StructuredToolResult:
398
395
  if not self.toolset.dd_config:
399
396
  return StructuredToolResult(
400
397
  status=StructuredToolResultStatus.ERROR,
@@ -9,6 +9,7 @@ from typing import Any, Dict, Optional, Tuple
9
9
  from holmes.core.tools import (
10
10
  CallablePrerequisite,
11
11
  Tool,
12
+ ToolInvokeContext,
12
13
  ToolParameter,
13
14
  Toolset,
14
15
  StructuredToolResult,
@@ -156,7 +157,7 @@ class FetchDatadogTracesList(BaseDatadogTracesTool):
156
157
  def __init__(self, toolset: "DatadogTracesToolset"):
157
158
  super().__init__(
158
159
  name="fetch_datadog_traces",
159
- description="Fetch a list of traces from Datadog with optional filters",
160
+ description="[datadog/traces toolset] Fetch a list of traces from Datadog with optional filters",
160
161
  parameters={
161
162
  "service": ToolParameter(
162
163
  description="Filter by service name",
@@ -210,9 +211,7 @@ class FetchDatadogTracesList(BaseDatadogTracesTool):
210
211
  filter_str = ", ".join(filters) if filters else "all"
211
212
  return f"{toolset_name_for_one_liner(self.toolset.name)}: Fetch Traces ({filter_str})"
212
213
 
213
- def _invoke(
214
- self, params: dict, user_approved: bool = False
215
- ) -> StructuredToolResult:
214
+ def _invoke(self, params: dict, context: ToolInvokeContext) -> StructuredToolResult:
216
215
  """Execute the tool to fetch traces."""
217
216
  if not self.toolset.dd_config:
218
217
  return StructuredToolResult(
@@ -360,7 +359,7 @@ class FetchDatadogTraceById(BaseDatadogTracesTool):
360
359
  def __init__(self, toolset: "DatadogTracesToolset"):
361
360
  super().__init__(
362
361
  name="fetch_datadog_trace_by_id",
363
- description="Fetch detailed information about a specific trace by its ID",
362
+ description="[datadog/traces toolset] Fetch detailed information about a specific trace by its ID",
364
363
  parameters={
365
364
  "trace_id": ToolParameter(
366
365
  description="The trace ID to fetch details for",
@@ -376,9 +375,7 @@ class FetchDatadogTraceById(BaseDatadogTracesTool):
376
375
  trace_id = params.get("trace_id", "unknown")
377
376
  return f"{toolset_name_for_one_liner(self.toolset.name)}: Fetch Trace Details ({trace_id})"
378
377
 
379
- def _invoke(
380
- self, params: dict, user_approved: bool = False
381
- ) -> StructuredToolResult:
378
+ def _invoke(self, params: dict, context: ToolInvokeContext) -> StructuredToolResult:
382
379
  """Execute the tool to fetch trace details."""
383
380
  if not self.toolset.dd_config:
384
381
  return StructuredToolResult(
@@ -499,7 +496,7 @@ class FetchDatadogSpansByFilter(BaseDatadogTracesTool):
499
496
  def __init__(self, toolset: "DatadogTracesToolset"):
500
497
  super().__init__(
501
498
  name="fetch_datadog_spans",
502
- description="Search for spans in Datadog with detailed filters",
499
+ description="[datadog/traces toolset] Search for spans in Datadog with detailed filters",
503
500
  parameters={
504
501
  "query": ToolParameter(
505
502
  description="Datadog search query (e.g., 'service:web-app @http.status_code:500')",
@@ -559,9 +556,7 @@ class FetchDatadogSpansByFilter(BaseDatadogTracesTool):
559
556
  filter_str = ", ".join(filters) if filters else "all"
560
557
  return f"{toolset_name_for_one_liner(self.toolset.name)}: Search Spans ({filter_str})"
561
558
 
562
- def _invoke(
563
- self, params: dict, user_approved: bool = False
564
- ) -> StructuredToolResult:
559
+ def _invoke(self, params: dict, context: ToolInvokeContext) -> StructuredToolResult:
565
560
  """Execute the tool to search spans."""
566
561
  if not self.toolset.dd_config:
567
562
  return StructuredToolResult(
@@ -4,7 +4,11 @@ import requests # type: ignore
4
4
  import os
5
5
  from typing import Any, Optional, Dict, List, Tuple
6
6
  from pydantic import BaseModel
7
- from holmes.core.tools import StructuredToolResult, StructuredToolResultStatus
7
+ from holmes.core.tools import (
8
+ StructuredToolResult,
9
+ StructuredToolResultStatus,
10
+ ToolInvokeContext,
11
+ )
8
12
 
9
13
  from holmes.core.tools import (
10
14
  Toolset,
@@ -250,7 +254,9 @@ class GitReadFileWithLineNumbers(Tool):
250
254
  )
251
255
 
252
256
  def _invoke(
253
- self, params: dict, user_approved: bool = False
257
+ self,
258
+ params: dict,
259
+ context: ToolInvokeContext,
254
260
  ) -> StructuredToolResult:
255
261
  filepath = params["filepath"]
256
262
  try:
@@ -296,7 +302,9 @@ class GitListFiles(Tool):
296
302
  )
297
303
 
298
304
  def _invoke(
299
- self, params: dict, user_approved: bool = False
305
+ self,
306
+ params: dict,
307
+ context: ToolInvokeContext,
300
308
  ) -> StructuredToolResult:
301
309
  try:
302
310
  headers = {"Authorization": f"token {self.toolset.git_credentials}"}
@@ -338,9 +346,7 @@ class GitListOpenPRs(Tool):
338
346
  toolset=toolset, # type: ignore
339
347
  )
340
348
 
341
- def _invoke(
342
- self, params: dict, user_approved: bool = False
343
- ) -> StructuredToolResult:
349
+ def _invoke(self, params: dict, context: ToolInvokeContext) -> StructuredToolResult:
344
350
  try:
345
351
  prs = self.toolset.list_open_prs()
346
352
  formatted = [
@@ -408,9 +414,7 @@ class GitExecuteChanges(Tool):
408
414
  toolset=toolset, # type: ignore
409
415
  )
410
416
 
411
- def _invoke(
412
- self, params: dict, user_approved: bool = False
413
- ) -> StructuredToolResult:
417
+ def _invoke(self, params: dict, context: ToolInvokeContext) -> StructuredToolResult:
414
418
  def error(msg: str) -> StructuredToolResult:
415
419
  return StructuredToolResult(
416
420
  status=StructuredToolResultStatus.ERROR,
@@ -628,9 +632,7 @@ class GitUpdatePR(Tool):
628
632
  toolset=toolset, # type: ignore
629
633
  )
630
634
 
631
- def _invoke(
632
- self, params: dict, user_approved: bool = False
633
- ) -> StructuredToolResult:
635
+ def _invoke(self, params: dict, context: ToolInvokeContext) -> StructuredToolResult:
634
636
  try:
635
637
  line = params["line"]
636
638
  filename = params["filename"]