holmesgpt 0.13.2__py3-none-any.whl → 0.18.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (188) hide show
  1. holmes/__init__.py +3 -5
  2. holmes/clients/robusta_client.py +20 -6
  3. holmes/common/env_vars.py +58 -3
  4. holmes/common/openshift.py +1 -1
  5. holmes/config.py +123 -148
  6. holmes/core/conversations.py +71 -15
  7. holmes/core/feedback.py +191 -0
  8. holmes/core/investigation.py +31 -39
  9. holmes/core/investigation_structured_output.py +3 -3
  10. holmes/core/issue.py +1 -1
  11. holmes/core/llm.py +508 -88
  12. holmes/core/models.py +108 -4
  13. holmes/core/openai_formatting.py +14 -1
  14. holmes/core/prompt.py +48 -3
  15. holmes/core/runbooks.py +1 -0
  16. holmes/core/safeguards.py +8 -6
  17. holmes/core/supabase_dal.py +295 -100
  18. holmes/core/tool_calling_llm.py +489 -428
  19. holmes/core/tools.py +325 -56
  20. holmes/core/tools_utils/token_counting.py +21 -0
  21. holmes/core/tools_utils/tool_context_window_limiter.py +40 -0
  22. holmes/core/tools_utils/tool_executor.py +0 -13
  23. holmes/core/tools_utils/toolset_utils.py +1 -0
  24. holmes/core/toolset_manager.py +191 -5
  25. holmes/core/tracing.py +19 -3
  26. holmes/core/transformers/__init__.py +23 -0
  27. holmes/core/transformers/base.py +63 -0
  28. holmes/core/transformers/llm_summarize.py +175 -0
  29. holmes/core/transformers/registry.py +123 -0
  30. holmes/core/transformers/transformer.py +32 -0
  31. holmes/core/truncation/compaction.py +94 -0
  32. holmes/core/truncation/dal_truncation_utils.py +23 -0
  33. holmes/core/truncation/input_context_window_limiter.py +219 -0
  34. holmes/interactive.py +228 -31
  35. holmes/main.py +23 -40
  36. holmes/plugins/interfaces.py +2 -1
  37. holmes/plugins/prompts/__init__.py +2 -1
  38. holmes/plugins/prompts/_fetch_logs.jinja2 +31 -6
  39. holmes/plugins/prompts/_general_instructions.jinja2 +1 -2
  40. holmes/plugins/prompts/_runbook_instructions.jinja2 +24 -12
  41. holmes/plugins/prompts/base_user_prompt.jinja2 +7 -0
  42. holmes/plugins/prompts/conversation_history_compaction.jinja2 +89 -0
  43. holmes/plugins/prompts/generic_ask.jinja2 +0 -4
  44. holmes/plugins/prompts/generic_ask_conversation.jinja2 +0 -1
  45. holmes/plugins/prompts/generic_ask_for_issue_conversation.jinja2 +0 -1
  46. holmes/plugins/prompts/generic_investigation.jinja2 +0 -1
  47. holmes/plugins/prompts/investigation_procedure.jinja2 +50 -1
  48. holmes/plugins/prompts/kubernetes_workload_ask.jinja2 +0 -1
  49. holmes/plugins/prompts/kubernetes_workload_chat.jinja2 +0 -1
  50. holmes/plugins/runbooks/__init__.py +145 -17
  51. holmes/plugins/runbooks/catalog.json +2 -0
  52. holmes/plugins/sources/github/__init__.py +4 -2
  53. holmes/plugins/sources/prometheus/models.py +1 -0
  54. holmes/plugins/toolsets/__init__.py +44 -27
  55. holmes/plugins/toolsets/aks-node-health.yaml +46 -0
  56. holmes/plugins/toolsets/aks.yaml +64 -0
  57. holmes/plugins/toolsets/atlas_mongodb/mongodb_atlas.py +38 -47
  58. holmes/plugins/toolsets/azure_sql/apis/alert_monitoring_api.py +3 -2
  59. holmes/plugins/toolsets/azure_sql/apis/azure_sql_api.py +2 -1
  60. holmes/plugins/toolsets/azure_sql/apis/connection_failure_api.py +3 -2
  61. holmes/plugins/toolsets/azure_sql/apis/connection_monitoring_api.py +3 -1
  62. holmes/plugins/toolsets/azure_sql/apis/storage_analysis_api.py +3 -1
  63. holmes/plugins/toolsets/azure_sql/azure_sql_toolset.py +12 -13
  64. holmes/plugins/toolsets/azure_sql/tools/analyze_connection_failures.py +15 -12
  65. holmes/plugins/toolsets/azure_sql/tools/analyze_database_connections.py +15 -12
  66. holmes/plugins/toolsets/azure_sql/tools/analyze_database_health_status.py +11 -11
  67. holmes/plugins/toolsets/azure_sql/tools/analyze_database_performance.py +11 -9
  68. holmes/plugins/toolsets/azure_sql/tools/analyze_database_storage.py +15 -12
  69. holmes/plugins/toolsets/azure_sql/tools/get_active_alerts.py +15 -15
  70. holmes/plugins/toolsets/azure_sql/tools/get_slow_queries.py +11 -8
  71. holmes/plugins/toolsets/azure_sql/tools/get_top_cpu_queries.py +11 -8
  72. holmes/plugins/toolsets/azure_sql/tools/get_top_data_io_queries.py +11 -8
  73. holmes/plugins/toolsets/azure_sql/tools/get_top_log_io_queries.py +11 -8
  74. holmes/plugins/toolsets/azure_sql/utils.py +0 -32
  75. holmes/plugins/toolsets/bash/argocd/__init__.py +3 -3
  76. holmes/plugins/toolsets/bash/aws/__init__.py +4 -4
  77. holmes/plugins/toolsets/bash/azure/__init__.py +4 -4
  78. holmes/plugins/toolsets/bash/bash_toolset.py +11 -15
  79. holmes/plugins/toolsets/bash/common/bash.py +23 -13
  80. holmes/plugins/toolsets/bash/common/bash_command.py +1 -1
  81. holmes/plugins/toolsets/bash/common/stringify.py +1 -1
  82. holmes/plugins/toolsets/bash/kubectl/__init__.py +2 -1
  83. holmes/plugins/toolsets/bash/kubectl/constants.py +0 -1
  84. holmes/plugins/toolsets/bash/kubectl/kubectl_get.py +3 -4
  85. holmes/plugins/toolsets/bash/parse_command.py +12 -13
  86. holmes/plugins/toolsets/cilium.yaml +284 -0
  87. holmes/plugins/toolsets/connectivity_check.py +124 -0
  88. holmes/plugins/toolsets/coralogix/api.py +132 -119
  89. holmes/plugins/toolsets/coralogix/coralogix.jinja2 +14 -0
  90. holmes/plugins/toolsets/coralogix/toolset_coralogix.py +219 -0
  91. holmes/plugins/toolsets/coralogix/utils.py +15 -79
  92. holmes/plugins/toolsets/datadog/datadog_api.py +525 -26
  93. holmes/plugins/toolsets/datadog/datadog_logs_instructions.jinja2 +55 -11
  94. holmes/plugins/toolsets/datadog/datadog_metrics_instructions.jinja2 +3 -3
  95. holmes/plugins/toolsets/datadog/datadog_models.py +59 -0
  96. holmes/plugins/toolsets/datadog/datadog_url_utils.py +213 -0
  97. holmes/plugins/toolsets/datadog/instructions_datadog_traces.jinja2 +165 -28
  98. holmes/plugins/toolsets/datadog/toolset_datadog_general.py +417 -241
  99. holmes/plugins/toolsets/datadog/toolset_datadog_logs.py +234 -214
  100. holmes/plugins/toolsets/datadog/toolset_datadog_metrics.py +167 -79
  101. holmes/plugins/toolsets/datadog/toolset_datadog_traces.py +374 -363
  102. holmes/plugins/toolsets/elasticsearch/__init__.py +6 -0
  103. holmes/plugins/toolsets/elasticsearch/elasticsearch.py +834 -0
  104. holmes/plugins/toolsets/elasticsearch/opensearch_ppl_query_docs.jinja2 +1616 -0
  105. holmes/plugins/toolsets/elasticsearch/opensearch_query_assist.py +78 -0
  106. holmes/plugins/toolsets/elasticsearch/opensearch_query_assist_instructions.jinja2 +223 -0
  107. holmes/plugins/toolsets/git.py +54 -50
  108. holmes/plugins/toolsets/grafana/base_grafana_toolset.py +16 -4
  109. holmes/plugins/toolsets/grafana/common.py +13 -29
  110. holmes/plugins/toolsets/grafana/grafana_tempo_api.py +455 -0
  111. holmes/plugins/toolsets/grafana/loki/instructions.jinja2 +25 -0
  112. holmes/plugins/toolsets/grafana/loki/toolset_grafana_loki.py +191 -0
  113. holmes/plugins/toolsets/grafana/loki_api.py +4 -0
  114. holmes/plugins/toolsets/grafana/toolset_grafana.py +293 -89
  115. holmes/plugins/toolsets/grafana/toolset_grafana_dashboard.jinja2 +49 -0
  116. holmes/plugins/toolsets/grafana/toolset_grafana_tempo.jinja2 +246 -11
  117. holmes/plugins/toolsets/grafana/toolset_grafana_tempo.py +820 -292
  118. holmes/plugins/toolsets/grafana/trace_parser.py +4 -3
  119. holmes/plugins/toolsets/internet/internet.py +15 -16
  120. holmes/plugins/toolsets/internet/notion.py +9 -11
  121. holmes/plugins/toolsets/investigator/core_investigation.py +44 -36
  122. holmes/plugins/toolsets/investigator/model.py +3 -1
  123. holmes/plugins/toolsets/json_filter_mixin.py +134 -0
  124. holmes/plugins/toolsets/kafka.py +36 -42
  125. holmes/plugins/toolsets/kubernetes.yaml +317 -113
  126. holmes/plugins/toolsets/kubernetes_logs.py +9 -9
  127. holmes/plugins/toolsets/kubernetes_logs.yaml +32 -0
  128. holmes/plugins/toolsets/logging_utils/logging_api.py +94 -8
  129. holmes/plugins/toolsets/mcp/toolset_mcp.py +218 -64
  130. holmes/plugins/toolsets/newrelic/new_relic_api.py +165 -0
  131. holmes/plugins/toolsets/newrelic/newrelic.jinja2 +65 -0
  132. holmes/plugins/toolsets/newrelic/newrelic.py +320 -0
  133. holmes/plugins/toolsets/openshift.yaml +283 -0
  134. holmes/plugins/toolsets/prometheus/prometheus.py +1202 -421
  135. holmes/plugins/toolsets/prometheus/prometheus_instructions.jinja2 +54 -5
  136. holmes/plugins/toolsets/prometheus/utils.py +28 -0
  137. holmes/plugins/toolsets/rabbitmq/api.py +23 -4
  138. holmes/plugins/toolsets/rabbitmq/toolset_rabbitmq.py +13 -14
  139. holmes/plugins/toolsets/robusta/robusta.py +239 -68
  140. holmes/plugins/toolsets/robusta/robusta_instructions.jinja2 +26 -9
  141. holmes/plugins/toolsets/runbook/runbook_fetcher.py +157 -27
  142. holmes/plugins/toolsets/service_discovery.py +1 -1
  143. holmes/plugins/toolsets/servicenow_tables/instructions.jinja2 +83 -0
  144. holmes/plugins/toolsets/servicenow_tables/servicenow_tables.py +426 -0
  145. holmes/plugins/toolsets/utils.py +88 -0
  146. holmes/utils/config_utils.py +91 -0
  147. holmes/utils/connection_utils.py +31 -0
  148. holmes/utils/console/result.py +10 -0
  149. holmes/utils/default_toolset_installation_guide.jinja2 +1 -22
  150. holmes/utils/env.py +7 -0
  151. holmes/utils/file_utils.py +2 -1
  152. holmes/utils/global_instructions.py +60 -11
  153. holmes/utils/holmes_status.py +6 -4
  154. holmes/utils/holmes_sync_toolsets.py +0 -2
  155. holmes/utils/krr_utils.py +188 -0
  156. holmes/utils/log.py +15 -0
  157. holmes/utils/markdown_utils.py +2 -3
  158. holmes/utils/memory_limit.py +58 -0
  159. holmes/utils/sentry_helper.py +64 -0
  160. holmes/utils/stream.py +69 -8
  161. holmes/utils/tags.py +4 -3
  162. holmes/version.py +37 -15
  163. holmesgpt-0.18.4.dist-info/LICENSE +178 -0
  164. {holmesgpt-0.13.2.dist-info → holmesgpt-0.18.4.dist-info}/METADATA +35 -31
  165. holmesgpt-0.18.4.dist-info/RECORD +258 -0
  166. holmes/core/performance_timing.py +0 -72
  167. holmes/plugins/toolsets/aws.yaml +0 -80
  168. holmes/plugins/toolsets/coralogix/toolset_coralogix_logs.py +0 -112
  169. holmes/plugins/toolsets/datadog/datadog_traces_formatter.py +0 -310
  170. holmes/plugins/toolsets/datadog/toolset_datadog_rds.py +0 -739
  171. holmes/plugins/toolsets/grafana/grafana_api.py +0 -42
  172. holmes/plugins/toolsets/grafana/tempo_api.py +0 -124
  173. holmes/plugins/toolsets/grafana/toolset_grafana_loki.py +0 -110
  174. holmes/plugins/toolsets/newrelic.py +0 -231
  175. holmes/plugins/toolsets/opensearch/opensearch.py +0 -257
  176. holmes/plugins/toolsets/opensearch/opensearch_logs.py +0 -161
  177. holmes/plugins/toolsets/opensearch/opensearch_traces.py +0 -218
  178. holmes/plugins/toolsets/opensearch/opensearch_traces_instructions.jinja2 +0 -12
  179. holmes/plugins/toolsets/opensearch/opensearch_utils.py +0 -166
  180. holmes/plugins/toolsets/servicenow/install.md +0 -37
  181. holmes/plugins/toolsets/servicenow/instructions.jinja2 +0 -3
  182. holmes/plugins/toolsets/servicenow/servicenow.py +0 -219
  183. holmes/utils/keygen_utils.py +0 -6
  184. holmesgpt-0.13.2.dist-info/LICENSE.txt +0 -21
  185. holmesgpt-0.13.2.dist-info/RECORD +0 -234
  186. /holmes/plugins/toolsets/{opensearch → newrelic}/__init__.py +0 -0
  187. {holmesgpt-0.13.2.dist-info → holmesgpt-0.18.4.dist-info}/WHEEL +0 -0
  188. {holmesgpt-0.13.2.dist-info → holmesgpt-0.18.4.dist-info}/entry_points.txt +0 -0
@@ -1,26 +1,40 @@
1
1
  import json
2
2
  import logging
3
3
  import os
4
- from typing import Any, Optional, Dict, Tuple
4
+ from datetime import datetime
5
+ from typing import Any, Dict, Optional, Tuple
6
+
7
+ from pydantic import AnyUrl
8
+
5
9
  from holmes.core.tools import (
6
10
  CallablePrerequisite,
7
11
  StructuredToolResult,
12
+ StructuredToolResultStatus,
8
13
  Tool,
14
+ ToolInvokeContext,
9
15
  ToolParameter,
10
- ToolResultStatus,
11
16
  Toolset,
12
17
  ToolsetTag,
13
18
  )
14
19
  from holmes.plugins.toolsets.consts import (
15
- TOOLSET_CONFIG_MISSING_ERROR,
16
20
  STANDARD_END_DATETIME_TOOL_PARAM_DESCRIPTION,
21
+ TOOLSET_CONFIG_MISSING_ERROR,
17
22
  )
18
23
  from holmes.plugins.toolsets.datadog.datadog_api import (
19
- DatadogBaseConfig,
24
+ MAX_RETRY_COUNT_ON_RATE_LIMIT,
20
25
  DataDogRequestError,
21
26
  execute_datadog_http_request,
22
27
  get_headers,
23
- MAX_RETRY_COUNT_ON_RATE_LIMIT,
28
+ )
29
+ from holmes.plugins.toolsets.datadog.datadog_models import DatadogMetricsConfig
30
+ from holmes.plugins.toolsets.datadog.datadog_url_utils import (
31
+ generate_datadog_metric_metadata_url,
32
+ generate_datadog_metric_tags_url,
33
+ generate_datadog_metrics_explorer_url,
34
+ generate_datadog_metrics_list_url,
35
+ )
36
+ from holmes.plugins.toolsets.logging_utils.logging_api import (
37
+ DEFAULT_TIME_SPAN_SECONDS,
24
38
  )
25
39
  from holmes.plugins.toolsets.utils import (
26
40
  get_param_or_raise,
@@ -28,18 +42,6 @@ from holmes.plugins.toolsets.utils import (
28
42
  standard_start_datetime_tool_param_description,
29
43
  toolset_name_for_one_liner,
30
44
  )
31
- from holmes.plugins.toolsets.logging_utils.logging_api import (
32
- DEFAULT_TIME_SPAN_SECONDS,
33
- DEFAULT_LOG_LIMIT,
34
- )
35
-
36
- from datetime import datetime
37
-
38
- from holmes.utils.keygen_utils import generate_random_key
39
-
40
-
41
- class DatadogMetricsConfig(DatadogBaseConfig):
42
- default_limit: int = DEFAULT_LOG_LIMIT
43
45
 
44
46
 
45
47
  class BaseDatadogMetricsTool(Tool):
@@ -54,7 +56,7 @@ class ListActiveMetrics(BaseDatadogMetricsTool):
54
56
  def __init__(self, toolset: "DatadogMetricsToolset"):
55
57
  super().__init__(
56
58
  name="list_active_datadog_metrics",
57
- description=f"List active metrics from Datadog for the last {ACTIVE_METRICS_DEFAULT_LOOK_BACK_HOURS} hours. This includes metrics that have actively reported data points, including from pods no longer in the cluster.",
59
+ description=f"[datadog/metrics toolset] List active metrics from Datadog for the last {ACTIVE_METRICS_DEFAULT_LOOK_BACK_HOURS} hours. This includes metrics that have actively reported data points, including from pods no longer in the cluster.",
58
60
  parameters={
59
61
  "from_time": ToolParameter(
60
62
  description=f"Start time for listing metrics. Can be an RFC3339 formatted datetime (e.g. '2023-03-01T10:30:00Z') or a negative integer for relative seconds from now (e.g. -86400 for 24 hours ago). Defaults to {ACTIVE_METRICS_DEFAULT_LOOK_BACK_HOURS} hours ago",
@@ -75,12 +77,10 @@ class ListActiveMetrics(BaseDatadogMetricsTool):
75
77
  toolset=toolset,
76
78
  )
77
79
 
78
- def _invoke(
79
- self, params: dict, user_approved: bool = False
80
- ) -> StructuredToolResult:
80
+ def _invoke(self, params: dict, context: ToolInvokeContext) -> StructuredToolResult:
81
81
  if not self.toolset.dd_config:
82
82
  return StructuredToolResult(
83
- status=ToolResultStatus.ERROR,
83
+ status=StructuredToolResultStatus.ERROR,
84
84
  error=TOOLSET_CONFIG_MISSING_ERROR,
85
85
  params=params,
86
86
  )
@@ -121,7 +121,7 @@ class ListActiveMetrics(BaseDatadogMetricsTool):
121
121
  metrics = data.get("metrics", [])
122
122
  if not metrics:
123
123
  return StructuredToolResult(
124
- status=ToolResultStatus.ERROR,
124
+ status=StructuredToolResultStatus.ERROR,
125
125
  data="Your filter returned no metrics. Change your filter and try again",
126
126
  params=params,
127
127
  )
@@ -132,10 +132,18 @@ class ListActiveMetrics(BaseDatadogMetricsTool):
132
132
  for metric in sorted(metrics):
133
133
  output.append(metric)
134
134
 
135
+ url = generate_datadog_metrics_list_url(
136
+ self.toolset.dd_config,
137
+ from_time,
138
+ params.get("host"),
139
+ params.get("tag_filter"),
140
+ )
141
+
135
142
  return StructuredToolResult(
136
- status=ToolResultStatus.SUCCESS,
143
+ status=StructuredToolResultStatus.SUCCESS,
137
144
  data="\n".join(output),
138
145
  params=params,
146
+ url=url,
139
147
  )
140
148
 
141
149
  except DataDogRequestError as e:
@@ -149,10 +157,30 @@ class ListActiveMetrics(BaseDatadogMetricsTool):
149
157
  f"and 'timeseries_query' permissions. Error: {str(e)}"
150
158
  )
151
159
  else:
152
- error_msg = f"Exception while querying Datadog: {str(e)}"
160
+ # Include full API error details for better debugging
161
+ error_msg = (
162
+ f"Datadog API error (status {e.status_code}): {e.response_text}"
163
+ )
164
+ if params:
165
+ # ListActiveMetrics parameters: from_time, host, tag_filter
166
+ if params.get("host"):
167
+ error_msg += f"\nHost filter: {params.get('host')}"
168
+ if params.get("tag_filter"):
169
+ error_msg += f"\nTag filter: {params.get('tag_filter')}"
170
+
171
+ from_time_param = params.get("from_time")
172
+ if from_time_param:
173
+ time_desc = from_time_param
174
+ else:
175
+ time_desc = f"default (last {ACTIVE_METRICS_DEFAULT_LOOK_BACK_HOURS} hours)"
176
+ error_msg += f"\nTime range: {time_desc}"
177
+
178
+ # Note: We cannot generate a Datadog Metrics Explorer URL for ListActiveMetrics
179
+ # because the Metrics Explorer requires a specific metric query,
180
+ # while ListActiveMetrics just lists available metrics without querying any specific one
153
181
 
154
182
  return StructuredToolResult(
155
- status=ToolResultStatus.ERROR,
183
+ status=StructuredToolResultStatus.ERROR,
156
184
  error=error_msg,
157
185
  params=params,
158
186
  invocation=json.dumps({"url": url, "params": query_params})
@@ -165,7 +193,7 @@ class ListActiveMetrics(BaseDatadogMetricsTool):
165
193
  f"Failed to query Datadog metrics for params: {params}", exc_info=True
166
194
  )
167
195
  return StructuredToolResult(
168
- status=ToolResultStatus.ERROR,
196
+ status=StructuredToolResultStatus.ERROR,
169
197
  error=f"Exception while querying Datadog: {str(e)}",
170
198
  params=params,
171
199
  )
@@ -184,7 +212,7 @@ class QueryMetrics(BaseDatadogMetricsTool):
184
212
  def __init__(self, toolset: "DatadogMetricsToolset"):
185
213
  super().__init__(
186
214
  name="query_datadog_metrics",
187
- description="Query timeseries data from Datadog for a specific metric, including historical data for pods no longer in the cluster",
215
+ description="[datadog/metrics toolset] Query timeseries data from Datadog for a specific metric, including historical data for pods no longer in the cluster",
188
216
  parameters={
189
217
  "query": ToolParameter(
190
218
  description="The metric query string (e.g., 'system.cpu.user{host:myhost}')",
@@ -217,12 +245,10 @@ class QueryMetrics(BaseDatadogMetricsTool):
217
245
  toolset=toolset,
218
246
  )
219
247
 
220
- def _invoke(
221
- self, params: dict, user_approved: bool = False
222
- ) -> StructuredToolResult:
248
+ def _invoke(self, params: dict, context: ToolInvokeContext) -> StructuredToolResult:
223
249
  if not self.toolset.dd_config:
224
250
  return StructuredToolResult(
225
- status=ToolResultStatus.ERROR,
251
+ status=StructuredToolResultStatus.ERROR,
226
252
  error=TOOLSET_CONFIG_MISSING_ERROR,
227
253
  params=params,
228
254
  )
@@ -261,9 +287,29 @@ class QueryMetrics(BaseDatadogMetricsTool):
261
287
  output_type = params.get("output_type", "Plain")
262
288
 
263
289
  if not series:
290
+ # Include detailed context in error message
291
+ from_time_param = params.get("from_time")
292
+ to_time_param = params.get("to_time")
293
+
294
+ if from_time_param:
295
+ from_desc = from_time_param
296
+ else:
297
+ from_desc = (
298
+ f"default (last {DEFAULT_TIME_SPAN_SECONDS // 86400} days)"
299
+ )
300
+
301
+ to_desc = to_time_param or "now"
302
+
303
+ error_msg = (
304
+ f"The query returned no data.\n"
305
+ f"Query: {params.get('query', 'not specified')}\n"
306
+ f"Time range: {from_desc} to {to_desc}\n"
307
+ f"Please check your query syntax and ensure data exists for this time range."
308
+ )
309
+
264
310
  return StructuredToolResult(
265
- status=ToolResultStatus.NO_DATA,
266
- error="The query returned no data. Please check your query syntax and time range.",
311
+ status=StructuredToolResultStatus.NO_DATA,
312
+ error=error_msg,
267
313
  params=params,
268
314
  )
269
315
 
@@ -304,7 +350,6 @@ class QueryMetrics(BaseDatadogMetricsTool):
304
350
  response_data = {
305
351
  "status": "success",
306
352
  "error_message": None,
307
- "random_key": generate_random_key(),
308
353
  "tool_name": self.name,
309
354
  "description": description,
310
355
  "query": query,
@@ -315,11 +360,18 @@ class QueryMetrics(BaseDatadogMetricsTool):
315
360
  "data": {"resultType": "matrix", "result": prometheus_result},
316
361
  }
317
362
 
318
- data_str = json.dumps(response_data, indent=2)
363
+ url = generate_datadog_metrics_explorer_url(
364
+ self.toolset.dd_config,
365
+ query,
366
+ from_time,
367
+ to_time,
368
+ )
369
+
319
370
  return StructuredToolResult(
320
- status=ToolResultStatus.SUCCESS,
321
- data=data_str,
371
+ status=StructuredToolResultStatus.SUCCESS,
372
+ data=response_data,
322
373
  params=params,
374
+ url=url,
323
375
  )
324
376
 
325
377
  except DataDogRequestError as e:
@@ -333,10 +385,28 @@ class QueryMetrics(BaseDatadogMetricsTool):
333
385
  f"and 'timeseries_query' permissions. Error: {str(e)}"
334
386
  )
335
387
  else:
336
- error_msg = f"Exception while querying Datadog: {str(e)}"
388
+ # Include full API error details for better debugging
389
+ error_msg = (
390
+ f"Datadog API error (status {e.status_code}): {e.response_text}"
391
+ )
392
+ if params:
393
+ error_msg += f"\nQuery: {params.get('query', 'not specified')}"
394
+
395
+ from_time_param = params.get("from_time")
396
+ to_time_param = params.get("to_time")
397
+
398
+ if from_time_param:
399
+ from_desc = from_time_param
400
+ else:
401
+ from_desc = (
402
+ f"default (last {DEFAULT_TIME_SPAN_SECONDS // 86400} days)"
403
+ )
404
+
405
+ to_desc = to_time_param or "now"
406
+ error_msg += f"\nTime range: {from_desc} to {to_desc}"
337
407
 
338
408
  return StructuredToolResult(
339
- status=ToolResultStatus.ERROR,
409
+ status=StructuredToolResultStatus.ERROR,
340
410
  error=error_msg,
341
411
  params=params,
342
412
  invocation=json.dumps({"url": url, "params": query_params})
@@ -350,7 +420,7 @@ class QueryMetrics(BaseDatadogMetricsTool):
350
420
  )
351
421
 
352
422
  return StructuredToolResult(
353
- status=ToolResultStatus.ERROR,
423
+ status=StructuredToolResultStatus.ERROR,
354
424
  error=f"Exception while querying Datadog: {str(e)}",
355
425
  params=params,
356
426
  )
@@ -364,7 +434,7 @@ class QueryMetricsMetadata(BaseDatadogMetricsTool):
364
434
  def __init__(self, toolset: "DatadogMetricsToolset"):
365
435
  super().__init__(
366
436
  name="get_datadog_metric_metadata",
367
- description="Get metadata about one or more metrics including their type, description, unit, and other properties",
437
+ description="[datadog/metrics toolset] Get metadata about one or more metrics including their type, description, unit, and other properties",
368
438
  parameters={
369
439
  "metric_names": ToolParameter(
370
440
  description="Comma-separated list of metric names to get metadata for (e.g., 'system.cpu.user, system.mem.used')",
@@ -375,12 +445,10 @@ class QueryMetricsMetadata(BaseDatadogMetricsTool):
375
445
  toolset=toolset,
376
446
  )
377
447
 
378
- def _invoke(
379
- self, params: dict, user_approved: bool = False
380
- ) -> StructuredToolResult:
448
+ def _invoke(self, params: dict, context: ToolInvokeContext) -> StructuredToolResult:
381
449
  if not self.toolset.dd_config:
382
450
  return StructuredToolResult(
383
- status=ToolResultStatus.ERROR,
451
+ status=StructuredToolResultStatus.ERROR,
384
452
  error=TOOLSET_CONFIG_MISSING_ERROR,
385
453
  params=params,
386
454
  )
@@ -396,7 +464,7 @@ class QueryMetricsMetadata(BaseDatadogMetricsTool):
396
464
 
397
465
  if not metric_names:
398
466
  return StructuredToolResult(
399
- status=ToolResultStatus.ERROR,
467
+ status=StructuredToolResultStatus.ERROR,
400
468
  error="metric_names cannot be empty",
401
469
  params=params,
402
470
  )
@@ -408,10 +476,10 @@ class QueryMetricsMetadata(BaseDatadogMetricsTool):
408
476
 
409
477
  for metric_name in metric_names:
410
478
  try:
411
- url = f"{self.toolset.dd_config.site_api_url}/api/v1/metrics/{metric_name}"
479
+ api_url = f"{self.toolset.dd_config.site_api_url}/api/v1/metrics/{metric_name}"
412
480
 
413
481
  data = execute_datadog_http_request(
414
- url=url,
482
+ url=api_url,
415
483
  headers=headers,
416
484
  payload_or_params={},
417
485
  timeout=self.toolset.dd_config.request_timeout,
@@ -440,18 +508,29 @@ class QueryMetricsMetadata(BaseDatadogMetricsTool):
440
508
  "failed": len(errors),
441
509
  }
442
510
 
511
+ # Generate URL for the first metric (or a general metrics page if multiple)
512
+ if metric_names:
513
+ url = generate_datadog_metric_metadata_url(
514
+ self.toolset.dd_config,
515
+ metric_names[0],
516
+ )
517
+ else:
518
+ url = None
519
+
443
520
  if not results and errors:
444
521
  return StructuredToolResult(
445
- status=ToolResultStatus.ERROR,
522
+ status=StructuredToolResultStatus.ERROR,
446
523
  error="Failed to retrieve metadata for all metrics",
447
- data=json.dumps(response_data, indent=2),
524
+ data=response_data,
448
525
  params=params,
526
+ url=url,
449
527
  )
450
528
 
451
529
  return StructuredToolResult(
452
- status=ToolResultStatus.SUCCESS,
453
- data=json.dumps(response_data, indent=2),
530
+ status=StructuredToolResultStatus.SUCCESS,
531
+ data=response_data,
454
532
  params=params,
533
+ url=url,
455
534
  )
456
535
 
457
536
  except Exception as e:
@@ -461,7 +540,7 @@ class QueryMetricsMetadata(BaseDatadogMetricsTool):
461
540
  )
462
541
 
463
542
  return StructuredToolResult(
464
- status=ToolResultStatus.ERROR,
543
+ status=StructuredToolResultStatus.ERROR,
465
544
  error=f"Exception while querying Datadog: {str(e)}",
466
545
  params=params,
467
546
  )
@@ -480,7 +559,7 @@ class ListMetricTags(BaseDatadogMetricsTool):
480
559
  def __init__(self, toolset: "DatadogMetricsToolset"):
481
560
  super().__init__(
482
561
  name="list_datadog_metric_tags",
483
- description="List all available tags and aggregations for a specific metric. This helps in building queries by showing what dimensions are available for filtering.",
562
+ description="[datadog/metrics toolset] List all available tags and aggregations for a specific metric. This helps in building queries by showing what dimensions are available for filtering.",
484
563
  parameters={
485
564
  "metric_name": ToolParameter(
486
565
  description="The name of the metric to get tags for (e.g., 'system.cpu.user', 'container.memory.usage')",
@@ -491,37 +570,41 @@ class ListMetricTags(BaseDatadogMetricsTool):
491
570
  toolset=toolset,
492
571
  )
493
572
 
494
- def _invoke(
495
- self, params: dict, user_approved: bool = False
496
- ) -> StructuredToolResult:
573
+ def _invoke(self, params: dict, context: ToolInvokeContext) -> StructuredToolResult:
497
574
  if not self.toolset.dd_config:
498
575
  return StructuredToolResult(
499
- status=ToolResultStatus.ERROR,
576
+ status=StructuredToolResultStatus.ERROR,
500
577
  error=TOOLSET_CONFIG_MISSING_ERROR,
501
578
  params=params,
502
579
  )
503
580
 
504
- url = None
581
+ api_url = None
505
582
  query_params = None
506
583
 
507
584
  try:
508
585
  metric_name = get_param_or_raise(params, "metric_name")
509
586
 
510
- url = f"{self.toolset.dd_config.site_api_url}/api/v2/metrics/{metric_name}/active-configurations"
587
+ api_url = f"{self.toolset.dd_config.site_api_url}/api/v2/metrics/{metric_name}/active-configurations"
511
588
  headers = get_headers(self.toolset.dd_config)
512
589
 
513
590
  data = execute_datadog_http_request(
514
- url=url,
591
+ url=api_url,
515
592
  headers=headers,
516
593
  timeout=self.toolset.dd_config.request_timeout,
517
594
  method="GET",
518
595
  payload_or_params={},
519
596
  )
520
597
 
598
+ web_url = generate_datadog_metric_tags_url(
599
+ self.toolset.dd_config,
600
+ metric_name,
601
+ )
602
+
521
603
  return StructuredToolResult(
522
- status=ToolResultStatus.SUCCESS,
604
+ status=StructuredToolResultStatus.SUCCESS,
523
605
  data=data,
524
606
  params=params,
607
+ url=web_url,
525
608
  )
526
609
 
527
610
  except DataDogRequestError as e:
@@ -537,14 +620,21 @@ class ListMetricTags(BaseDatadogMetricsTool):
537
620
  f"permissions. Error: {str(e)}"
538
621
  )
539
622
  else:
540
- error_msg = f"Exception while querying Datadog: {str(e)}"
623
+ # Include full API error details for better debugging
624
+ error_msg = (
625
+ f"Datadog API error (status {e.status_code}): {e.response_text}"
626
+ )
627
+ if params:
628
+ error_msg += (
629
+ f"\nMetric name: {params.get('metric_name', 'not specified')}"
630
+ )
541
631
 
542
632
  return StructuredToolResult(
543
- status=ToolResultStatus.ERROR,
633
+ status=StructuredToolResultStatus.ERROR,
544
634
  error=error_msg,
545
635
  params=params,
546
- invocation=json.dumps({"url": url, "params": query_params})
547
- if url and query_params
636
+ invocation=json.dumps({"url": api_url, "params": query_params})
637
+ if api_url and query_params
548
638
  else None,
549
639
  )
550
640
 
@@ -554,7 +644,7 @@ class ListMetricTags(BaseDatadogMetricsTool):
554
644
  exc_info=True,
555
645
  )
556
646
  return StructuredToolResult(
557
- status=ToolResultStatus.ERROR,
647
+ status=StructuredToolResultStatus.ERROR,
558
648
  error=f"Exception while querying Datadog: {str(e)}",
559
649
  params=params,
560
650
  )
@@ -571,7 +661,7 @@ class DatadogMetricsToolset(Toolset):
571
661
  super().__init__(
572
662
  name="datadog/metrics",
573
663
  description="Toolset for fetching metrics and metadata from Datadog, including historical data for pods no longer in the cluster",
574
- docs_url="https://docs.datadoghq.com/api/latest/metrics/",
664
+ docs_url="https://holmesgpt.dev/data-sources/builtin-toolsets/datadog/",
575
665
  icon_url="https://imgix.datadoghq.com//img/about/presskit/DDlogo.jpg",
576
666
  prerequisites=[CallablePrerequisite(callable=self.prerequisites_callable)],
577
667
  tools=[
@@ -580,14 +670,13 @@ class DatadogMetricsToolset(Toolset):
580
670
  QueryMetricsMetadata(toolset=self),
581
671
  ListMetricTags(toolset=self),
582
672
  ],
583
- experimental=True,
584
673
  tags=[ToolsetTag.CORE],
585
674
  )
586
675
  self._reload_instructions()
587
676
 
588
677
  def _perform_healthcheck(self, dd_config: DatadogMetricsConfig) -> Tuple[bool, str]:
589
678
  try:
590
- logging.info("Performing Datadog metrics configuration healthcheck...")
679
+ logging.debug("Performing Datadog metrics configuration healthcheck...")
591
680
 
592
681
  url = f"{dd_config.site_api_url}/api/v1/validate"
593
682
  headers = get_headers(dd_config)
@@ -616,7 +705,7 @@ class DatadogMetricsToolset(Toolset):
616
705
  if not config:
617
706
  return (
618
707
  False,
619
- TOOLSET_CONFIG_MISSING_ERROR,
708
+ "Missing config for dd_api_key, dd_app_key, or site_api_url. For details: https://holmesgpt.dev/data-sources/builtin-toolsets/datadog/",
620
709
  )
621
710
 
622
711
  try:
@@ -631,13 +720,12 @@ class DatadogMetricsToolset(Toolset):
631
720
  return (False, f"Failed to parse Datadog configuration: {str(e)}")
632
721
 
633
722
  def get_example_config(self) -> Dict[str, Any]:
634
- return {
635
- "dd_api_key": "your-datadog-api-key",
636
- "dd_app_key": "your-datadog-application-key",
637
- "site_api_url": "https://api.datadoghq.com",
638
- "default_limit": 1000,
639
- "request_timeout": 60,
640
- }
723
+ example_config = DatadogMetricsConfig(
724
+ dd_api_key="<your_datadog_api_key>",
725
+ dd_app_key="<your_datadog_app_key>",
726
+ site_api_url=AnyUrl("https://api.datadoghq.com"),
727
+ )
728
+ return example_config.model_dump(mode="json")
641
729
 
642
730
  def _reload_instructions(self):
643
731
  """Load Datadog metrics specific troubleshooting instructions."""