holmesgpt 0.14.0a0__py3-none-any.whl → 0.14.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of holmesgpt might be problematic. Click here for more details.
- holmes/__init__.py +1 -1
- holmes/clients/robusta_client.py +15 -4
- holmes/common/env_vars.py +8 -1
- holmes/config.py +66 -139
- holmes/core/investigation.py +1 -2
- holmes/core/llm.py +295 -52
- holmes/core/models.py +2 -0
- holmes/core/safeguards.py +4 -4
- holmes/core/supabase_dal.py +14 -8
- holmes/core/tool_calling_llm.py +110 -102
- holmes/core/tools.py +260 -25
- holmes/core/tools_utils/data_types.py +81 -0
- holmes/core/tools_utils/tool_context_window_limiter.py +33 -0
- holmes/core/tools_utils/tool_executor.py +2 -2
- holmes/core/toolset_manager.py +150 -3
- holmes/core/transformers/__init__.py +23 -0
- holmes/core/transformers/base.py +62 -0
- holmes/core/transformers/llm_summarize.py +174 -0
- holmes/core/transformers/registry.py +122 -0
- holmes/core/transformers/transformer.py +31 -0
- holmes/main.py +5 -0
- holmes/plugins/prompts/_fetch_logs.jinja2 +10 -1
- holmes/plugins/toolsets/aks-node-health.yaml +46 -0
- holmes/plugins/toolsets/aks.yaml +64 -0
- holmes/plugins/toolsets/atlas_mongodb/mongodb_atlas.py +17 -15
- holmes/plugins/toolsets/azure_sql/tools/analyze_connection_failures.py +8 -4
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_connections.py +7 -3
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_health_status.py +3 -3
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_performance.py +3 -3
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_storage.py +7 -3
- holmes/plugins/toolsets/azure_sql/tools/get_active_alerts.py +4 -4
- holmes/plugins/toolsets/azure_sql/tools/get_slow_queries.py +7 -3
- holmes/plugins/toolsets/azure_sql/tools/get_top_cpu_queries.py +7 -3
- holmes/plugins/toolsets/azure_sql/tools/get_top_data_io_queries.py +7 -3
- holmes/plugins/toolsets/azure_sql/tools/get_top_log_io_queries.py +7 -3
- holmes/plugins/toolsets/bash/bash_toolset.py +6 -6
- holmes/plugins/toolsets/bash/common/bash.py +7 -7
- holmes/plugins/toolsets/coralogix/toolset_coralogix_logs.py +5 -3
- holmes/plugins/toolsets/datadog/datadog_api.py +490 -24
- holmes/plugins/toolsets/datadog/datadog_logs_instructions.jinja2 +21 -10
- holmes/plugins/toolsets/datadog/toolset_datadog_general.py +344 -205
- holmes/plugins/toolsets/datadog/toolset_datadog_logs.py +189 -17
- holmes/plugins/toolsets/datadog/toolset_datadog_metrics.py +95 -30
- holmes/plugins/toolsets/datadog/toolset_datadog_rds.py +10 -10
- holmes/plugins/toolsets/datadog/toolset_datadog_traces.py +20 -20
- holmes/plugins/toolsets/git.py +21 -21
- holmes/plugins/toolsets/grafana/common.py +2 -2
- holmes/plugins/toolsets/grafana/toolset_grafana.py +4 -4
- holmes/plugins/toolsets/grafana/toolset_grafana_loki.py +5 -4
- holmes/plugins/toolsets/grafana/toolset_grafana_tempo.jinja2 +123 -23
- holmes/plugins/toolsets/grafana/toolset_grafana_tempo.py +165 -307
- holmes/plugins/toolsets/internet/internet.py +3 -3
- holmes/plugins/toolsets/internet/notion.py +3 -3
- holmes/plugins/toolsets/investigator/core_investigation.py +3 -3
- holmes/plugins/toolsets/kafka.py +18 -18
- holmes/plugins/toolsets/kubernetes.yaml +58 -0
- holmes/plugins/toolsets/kubernetes_logs.py +6 -6
- holmes/plugins/toolsets/kubernetes_logs.yaml +32 -0
- holmes/plugins/toolsets/logging_utils/logging_api.py +1 -1
- holmes/plugins/toolsets/mcp/toolset_mcp.py +4 -4
- holmes/plugins/toolsets/newrelic.py +5 -5
- holmes/plugins/toolsets/opensearch/opensearch.py +5 -5
- holmes/plugins/toolsets/opensearch/opensearch_logs.py +7 -7
- holmes/plugins/toolsets/opensearch/opensearch_traces.py +10 -10
- holmes/plugins/toolsets/prometheus/prometheus.py +841 -351
- holmes/plugins/toolsets/prometheus/prometheus_instructions.jinja2 +39 -2
- holmes/plugins/toolsets/prometheus/utils.py +28 -0
- holmes/plugins/toolsets/rabbitmq/toolset_rabbitmq.py +6 -4
- holmes/plugins/toolsets/robusta/robusta.py +10 -10
- holmes/plugins/toolsets/runbook/runbook_fetcher.py +4 -4
- holmes/plugins/toolsets/servicenow/servicenow.py +6 -6
- holmes/plugins/toolsets/utils.py +88 -0
- holmes/utils/config_utils.py +91 -0
- holmes/utils/env.py +7 -0
- holmes/utils/holmes_status.py +2 -1
- holmes/utils/sentry_helper.py +41 -0
- holmes/utils/stream.py +9 -0
- {holmesgpt-0.14.0a0.dist-info → holmesgpt-0.14.1.dist-info}/METADATA +10 -14
- {holmesgpt-0.14.0a0.dist-info → holmesgpt-0.14.1.dist-info}/RECORD +82 -72
- {holmesgpt-0.14.0a0.dist-info → holmesgpt-0.14.1.dist-info}/LICENSE.txt +0 -0
- {holmesgpt-0.14.0a0.dist-info → holmesgpt-0.14.1.dist-info}/WHEEL +0 -0
- {holmesgpt-0.14.0a0.dist-info → holmesgpt-0.14.1.dist-info}/entry_points.txt +0 -0
|
@@ -3,12 +3,13 @@ from enum import Enum
|
|
|
3
3
|
import json
|
|
4
4
|
import logging
|
|
5
5
|
from typing import Any, Optional, Dict, Tuple, Set
|
|
6
|
+
from urllib.parse import urlencode
|
|
6
7
|
from holmes.core.tools import (
|
|
7
8
|
CallablePrerequisite,
|
|
8
9
|
ToolsetTag,
|
|
9
10
|
)
|
|
10
11
|
from pydantic import BaseModel, Field
|
|
11
|
-
from holmes.core.tools import StructuredToolResult,
|
|
12
|
+
from holmes.core.tools import StructuredToolResult, StructuredToolResultStatus
|
|
12
13
|
from holmes.plugins.toolsets.consts import TOOLSET_CONFIG_MISSING_ERROR
|
|
13
14
|
from holmes.plugins.toolsets.datadog.datadog_api import (
|
|
14
15
|
DatadogBaseConfig,
|
|
@@ -16,6 +17,8 @@ from holmes.plugins.toolsets.datadog.datadog_api import (
|
|
|
16
17
|
execute_paginated_datadog_http_request,
|
|
17
18
|
get_headers,
|
|
18
19
|
MAX_RETRY_COUNT_ON_RATE_LIMIT,
|
|
20
|
+
enhance_error_message,
|
|
21
|
+
preprocess_time_fields,
|
|
19
22
|
)
|
|
20
23
|
from holmes.plugins.toolsets.logging_utils.logging_api import (
|
|
21
24
|
DEFAULT_TIME_SPAN_SECONDS,
|
|
@@ -99,23 +102,28 @@ def fetch_paginated_logs(
|
|
|
99
102
|
"page": {"limit": calculate_page_size(params, dd_config, [])},
|
|
100
103
|
}
|
|
101
104
|
|
|
105
|
+
# Preprocess time fields to ensure correct format
|
|
106
|
+
processed_payload = preprocess_time_fields(payload, "/api/v2/logs/events/search")
|
|
107
|
+
|
|
102
108
|
logs, cursor = execute_paginated_datadog_http_request(
|
|
103
109
|
url=url,
|
|
104
110
|
headers=headers,
|
|
105
|
-
payload_or_params=
|
|
111
|
+
payload_or_params=processed_payload,
|
|
106
112
|
timeout=dd_config.request_timeout,
|
|
107
113
|
)
|
|
108
114
|
|
|
109
115
|
while cursor and len(logs) < limit:
|
|
110
|
-
|
|
116
|
+
processed_payload["page"]["cursor"] = cursor
|
|
117
|
+
processed_payload["page"]["limit"] = calculate_page_size(
|
|
118
|
+
params, dd_config, logs
|
|
119
|
+
)
|
|
111
120
|
new_logs, cursor = execute_paginated_datadog_http_request(
|
|
112
121
|
url=url,
|
|
113
122
|
headers=headers,
|
|
114
|
-
payload_or_params=
|
|
123
|
+
payload_or_params=processed_payload,
|
|
115
124
|
timeout=dd_config.request_timeout,
|
|
116
125
|
)
|
|
117
126
|
logs += new_logs
|
|
118
|
-
payload["page"]["limit"] = calculate_page_size(params, dd_config, logs)
|
|
119
127
|
|
|
120
128
|
# logs are fetched descending order. Unified logging API follows the pattern of kubectl logs where oldest logs are first
|
|
121
129
|
logs.reverse()
|
|
@@ -129,14 +137,73 @@ def format_logs(raw_logs: list[dict]) -> str:
|
|
|
129
137
|
logs = []
|
|
130
138
|
|
|
131
139
|
for raw_log_item in raw_logs:
|
|
140
|
+
# Extract timestamp - Datadog returns it in ISO format
|
|
141
|
+
timestamp = raw_log_item.get("attributes", {}).get("timestamp", "")
|
|
142
|
+
if not timestamp:
|
|
143
|
+
# Fallback to @timestamp if timestamp is not in attributes
|
|
144
|
+
timestamp = raw_log_item.get("attributes", {}).get("@timestamp", "")
|
|
145
|
+
|
|
146
|
+
# Extract message
|
|
132
147
|
message = raw_log_item.get("attributes", {}).get(
|
|
133
148
|
"message", json.dumps(raw_log_item)
|
|
134
149
|
)
|
|
135
|
-
|
|
150
|
+
|
|
151
|
+
# Format as: [timestamp] message
|
|
152
|
+
if timestamp:
|
|
153
|
+
logs.append(f"[{timestamp}] {message}")
|
|
154
|
+
else:
|
|
155
|
+
logs.append(message)
|
|
136
156
|
|
|
137
157
|
return "\n".join(logs)
|
|
138
158
|
|
|
139
159
|
|
|
160
|
+
def generate_datadog_logs_url(
|
|
161
|
+
dd_config: DatadogLogsConfig,
|
|
162
|
+
params: FetchPodLogsParams,
|
|
163
|
+
storage_tier: DataDogStorageTier,
|
|
164
|
+
) -> str:
|
|
165
|
+
"""Generate a Datadog web UI URL for the logs query."""
|
|
166
|
+
from holmes.plugins.toolsets.utils import process_timestamps_to_int
|
|
167
|
+
from holmes.plugins.toolsets.datadog.datadog_api import convert_api_url_to_app_url
|
|
168
|
+
|
|
169
|
+
# Convert API URL to app URL using the shared helper
|
|
170
|
+
base_url = convert_api_url_to_app_url(dd_config.site_api_url)
|
|
171
|
+
|
|
172
|
+
# Build the query string
|
|
173
|
+
query = f"{dd_config.labels.namespace}:{params.namespace}"
|
|
174
|
+
query += f" {dd_config.labels.pod}:{params.pod_name}"
|
|
175
|
+
if params.filter:
|
|
176
|
+
filter = params.filter.replace('"', '\\"')
|
|
177
|
+
query += f' "{filter}"'
|
|
178
|
+
|
|
179
|
+
# Process timestamps - get Unix timestamps in seconds
|
|
180
|
+
(from_time_seconds, to_time_seconds) = process_timestamps_to_int(
|
|
181
|
+
start=params.start_time,
|
|
182
|
+
end=params.end_time,
|
|
183
|
+
default_time_span_seconds=DEFAULT_TIME_SPAN_SECONDS,
|
|
184
|
+
)
|
|
185
|
+
|
|
186
|
+
# Convert to milliseconds for Datadog web UI
|
|
187
|
+
from_time_ms = from_time_seconds * 1000
|
|
188
|
+
to_time_ms = to_time_seconds * 1000
|
|
189
|
+
|
|
190
|
+
# Build URL parameters matching Datadog's web UI format
|
|
191
|
+
url_params = {
|
|
192
|
+
"query": query,
|
|
193
|
+
"from_ts": str(from_time_ms),
|
|
194
|
+
"to_ts": str(to_time_ms),
|
|
195
|
+
"live": "true",
|
|
196
|
+
"storage": storage_tier.value,
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
# Add indexes if not default
|
|
200
|
+
if dd_config.indexes != ["*"]:
|
|
201
|
+
url_params["index"] = ",".join(dd_config.indexes)
|
|
202
|
+
|
|
203
|
+
# Construct the full URL
|
|
204
|
+
return f"{base_url}/logs?{urlencode(url_params)}"
|
|
205
|
+
|
|
206
|
+
|
|
140
207
|
class DatadogLogsToolset(BasePodLoggingToolset):
|
|
141
208
|
dd_config: Optional[DatadogLogsConfig] = None
|
|
142
209
|
|
|
@@ -167,7 +234,7 @@ class DatadogLogsToolset(BasePodLoggingToolset):
|
|
|
167
234
|
def fetch_pod_logs(self, params: FetchPodLogsParams) -> StructuredToolResult:
|
|
168
235
|
if not self.dd_config:
|
|
169
236
|
return StructuredToolResult(
|
|
170
|
-
status=
|
|
237
|
+
status=StructuredToolResultStatus.ERROR,
|
|
171
238
|
data=TOOLSET_CONFIG_MISSING_ERROR,
|
|
172
239
|
params=params.model_dump(),
|
|
173
240
|
)
|
|
@@ -181,29 +248,134 @@ class DatadogLogsToolset(BasePodLoggingToolset):
|
|
|
181
248
|
|
|
182
249
|
if raw_logs:
|
|
183
250
|
logs_str = format_logs(raw_logs)
|
|
251
|
+
# Generate Datadog web UI URL
|
|
252
|
+
datadog_url = generate_datadog_logs_url(
|
|
253
|
+
self.dd_config, params, storage_tier
|
|
254
|
+
)
|
|
255
|
+
logs_with_link = f"{logs_str}\n\nView in Datadog: {datadog_url}"
|
|
184
256
|
return StructuredToolResult(
|
|
185
|
-
status=
|
|
186
|
-
data=
|
|
257
|
+
status=StructuredToolResultStatus.SUCCESS,
|
|
258
|
+
data=logs_with_link,
|
|
259
|
+
url=datadog_url,
|
|
187
260
|
params=params.model_dump(),
|
|
188
261
|
)
|
|
189
262
|
|
|
263
|
+
# Include detailed diagnostic context
|
|
264
|
+
query = f"{self.dd_config.labels.namespace}:{params.namespace} {self.dd_config.labels.pod}:{params.pod_name}"
|
|
265
|
+
if params.filter:
|
|
266
|
+
query += f' "{params.filter}"'
|
|
267
|
+
|
|
268
|
+
# Get actual time range used
|
|
269
|
+
(from_time, to_time) = process_timestamps_to_rfc3339(
|
|
270
|
+
start_timestamp=params.start_time,
|
|
271
|
+
end_timestamp=params.end_time,
|
|
272
|
+
default_time_span_seconds=DEFAULT_TIME_SPAN_SECONDS,
|
|
273
|
+
)
|
|
274
|
+
|
|
275
|
+
# Generate Datadog web UI URL for the last storage tier checked
|
|
276
|
+
datadog_url = generate_datadog_logs_url(
|
|
277
|
+
self.dd_config, params, self.dd_config.storage_tiers[-1]
|
|
278
|
+
)
|
|
279
|
+
|
|
280
|
+
# Build diagnostic information
|
|
281
|
+
diagnostics: Dict[str, Any] = {
|
|
282
|
+
"query_executed": query,
|
|
283
|
+
"time_range": f"{from_time} to {to_time}",
|
|
284
|
+
"indexes_searched": self.dd_config.indexes,
|
|
285
|
+
"storage_tiers_checked": [
|
|
286
|
+
tier.value for tier in self.dd_config.storage_tiers
|
|
287
|
+
],
|
|
288
|
+
"field_mappings": {
|
|
289
|
+
"namespace_field": self.dd_config.labels.namespace,
|
|
290
|
+
"pod_field": self.dd_config.labels.pod,
|
|
291
|
+
},
|
|
292
|
+
"limit": params.limit or self.dd_config.default_limit,
|
|
293
|
+
"datadog_url": datadog_url,
|
|
294
|
+
}
|
|
295
|
+
|
|
296
|
+
# Format diagnostic info as structured text
|
|
297
|
+
error_msg = (
|
|
298
|
+
f"No logs found.\n\n"
|
|
299
|
+
f"Diagnostic Information:\n"
|
|
300
|
+
f"----------------------\n"
|
|
301
|
+
f"Query executed: {diagnostics['query_executed']}\n"
|
|
302
|
+
f"Time range: {diagnostics['time_range']}\n"
|
|
303
|
+
f"Indexes searched: {diagnostics['indexes_searched']}\n"
|
|
304
|
+
f"Storage tiers checked: {', '.join(str(tier) for tier in diagnostics.get('storage_tiers_checked', []))}\n"
|
|
305
|
+
f"Field mappings:\n"
|
|
306
|
+
f" - Namespace field: {diagnostics.get('field_mappings', {}).get('namespace_field', 'N/A')}\n"
|
|
307
|
+
f" - Pod field: {diagnostics.get('field_mappings', {}).get('pod_field', 'N/A')}\n"
|
|
308
|
+
f"Limit: {diagnostics['limit']}\n\n"
|
|
309
|
+
f"View in Datadog: {diagnostics['datadog_url']}"
|
|
310
|
+
)
|
|
311
|
+
|
|
190
312
|
return StructuredToolResult(
|
|
191
|
-
status=
|
|
313
|
+
status=StructuredToolResultStatus.NO_DATA,
|
|
314
|
+
error=error_msg,
|
|
315
|
+
url=datadog_url,
|
|
192
316
|
params=params.model_dump(),
|
|
193
317
|
)
|
|
194
318
|
|
|
195
319
|
except DataDogRequestError as e:
|
|
196
320
|
logging.exception(e, exc_info=True)
|
|
197
321
|
|
|
322
|
+
# Always try to generate Datadog URL for debugging
|
|
323
|
+
try:
|
|
324
|
+
datadog_url = generate_datadog_logs_url(
|
|
325
|
+
self.dd_config, params, self.dd_config.storage_tiers[0]
|
|
326
|
+
)
|
|
327
|
+
except Exception:
|
|
328
|
+
datadog_url = None
|
|
329
|
+
|
|
198
330
|
# Provide more specific error message for rate limiting failures
|
|
199
331
|
if e.status_code == 429:
|
|
200
332
|
error_msg = f"Datadog API rate limit exceeded. Failed after {MAX_RETRY_COUNT_ON_RATE_LIMIT} retry attempts."
|
|
333
|
+
if datadog_url:
|
|
334
|
+
error_msg += f"\nView in Datadog: {datadog_url}"
|
|
335
|
+
elif e.status_code == 400:
|
|
336
|
+
# Use enhanced error message for validation errors
|
|
337
|
+
error_msg = enhance_error_message(
|
|
338
|
+
e,
|
|
339
|
+
"/api/v2/logs/events/search",
|
|
340
|
+
"POST",
|
|
341
|
+
str(self.dd_config.site_api_url),
|
|
342
|
+
)
|
|
343
|
+
|
|
344
|
+
# Add query context
|
|
345
|
+
query = f"{self.dd_config.labels.namespace}:{params.namespace} {self.dd_config.labels.pod}:{params.pod_name}"
|
|
346
|
+
if params.filter:
|
|
347
|
+
query += f' "{params.filter}"'
|
|
348
|
+
error_msg += f"\n\nQuery attempted: {query}"
|
|
349
|
+
|
|
350
|
+
# Add Datadog web UI URL to error message
|
|
351
|
+
if datadog_url:
|
|
352
|
+
error_msg += f"\nView in Datadog: {datadog_url}"
|
|
201
353
|
else:
|
|
202
|
-
|
|
354
|
+
# Include full API error details and query context
|
|
355
|
+
error_msg = (
|
|
356
|
+
f"Datadog API error (status {e.status_code}): {e.response_text}"
|
|
357
|
+
)
|
|
358
|
+
query = f"{self.dd_config.labels.namespace}:{params.namespace} {self.dd_config.labels.pod}:{params.pod_name}"
|
|
359
|
+
if params.filter:
|
|
360
|
+
query += f' "{params.filter}"'
|
|
361
|
+
error_msg += f"\nQuery: {query}"
|
|
362
|
+
|
|
363
|
+
# Get actual time range used
|
|
364
|
+
(from_time, to_time) = process_timestamps_to_rfc3339(
|
|
365
|
+
start_timestamp=params.start_time,
|
|
366
|
+
end_timestamp=params.end_time,
|
|
367
|
+
default_time_span_seconds=DEFAULT_TIME_SPAN_SECONDS,
|
|
368
|
+
)
|
|
369
|
+
error_msg += f"\nTime range: {from_time} to {to_time}"
|
|
370
|
+
|
|
371
|
+
# Add Datadog web UI URL to error message
|
|
372
|
+
if datadog_url:
|
|
373
|
+
error_msg += f"\nView in Datadog: {datadog_url}"
|
|
203
374
|
|
|
204
375
|
return StructuredToolResult(
|
|
205
|
-
status=
|
|
376
|
+
status=StructuredToolResultStatus.ERROR,
|
|
206
377
|
error=error_msg,
|
|
378
|
+
url=datadog_url,
|
|
207
379
|
params=params.model_dump(),
|
|
208
380
|
invocation=json.dumps(e.payload),
|
|
209
381
|
)
|
|
@@ -213,7 +385,7 @@ class DatadogLogsToolset(BasePodLoggingToolset):
|
|
|
213
385
|
f"Failed to query Datadog logs for params: {params}", exc_info=True
|
|
214
386
|
)
|
|
215
387
|
return StructuredToolResult(
|
|
216
|
-
status=
|
|
388
|
+
status=StructuredToolResultStatus.ERROR,
|
|
217
389
|
error=f"Exception while querying Datadog: {str(e)}",
|
|
218
390
|
params=params.model_dump(),
|
|
219
391
|
)
|
|
@@ -224,7 +396,7 @@ class DatadogLogsToolset(BasePodLoggingToolset):
|
|
|
224
396
|
Returns (success, error_message).
|
|
225
397
|
"""
|
|
226
398
|
try:
|
|
227
|
-
logging.
|
|
399
|
+
logging.debug("Performing Datadog configuration healthcheck...")
|
|
228
400
|
healthcheck_params = FetchPodLogsParams(
|
|
229
401
|
namespace="*",
|
|
230
402
|
pod_name="*",
|
|
@@ -234,11 +406,11 @@ class DatadogLogsToolset(BasePodLoggingToolset):
|
|
|
234
406
|
|
|
235
407
|
result = self.fetch_pod_logs(healthcheck_params)
|
|
236
408
|
|
|
237
|
-
if result.status ==
|
|
409
|
+
if result.status == StructuredToolResultStatus.ERROR:
|
|
238
410
|
error_msg = result.error or "Unknown error during healthcheck"
|
|
239
411
|
logging.error(f"Datadog healthcheck failed: {error_msg}")
|
|
240
412
|
return False, f"Datadog healthcheck failed: {error_msg}"
|
|
241
|
-
elif result.status ==
|
|
413
|
+
elif result.status == StructuredToolResultStatus.NO_DATA:
|
|
242
414
|
error_msg = "No logs were found in the last 48 hours using wildcards for pod and namespace. Is the configuration correct?"
|
|
243
415
|
logging.error(f"Datadog healthcheck failed: {error_msg}")
|
|
244
416
|
return False, f"Datadog healthcheck failed: {error_msg}"
|
|
@@ -254,7 +426,7 @@ class DatadogLogsToolset(BasePodLoggingToolset):
|
|
|
254
426
|
if not config:
|
|
255
427
|
return (
|
|
256
428
|
False,
|
|
257
|
-
|
|
429
|
+
"Datadog logs toolset requires configuration. Please provide: dd_api_key, dd_app_key, and site_api_url in your Holmes config. For more details, see https://holmesgpt.dev/data-sources/builtin-toolsets/datadog/",
|
|
258
430
|
)
|
|
259
431
|
|
|
260
432
|
try:
|
|
@@ -7,7 +7,7 @@ from holmes.core.tools import (
|
|
|
7
7
|
StructuredToolResult,
|
|
8
8
|
Tool,
|
|
9
9
|
ToolParameter,
|
|
10
|
-
|
|
10
|
+
StructuredToolResultStatus,
|
|
11
11
|
Toolset,
|
|
12
12
|
ToolsetTag,
|
|
13
13
|
)
|
|
@@ -54,7 +54,7 @@ class ListActiveMetrics(BaseDatadogMetricsTool):
|
|
|
54
54
|
def __init__(self, toolset: "DatadogMetricsToolset"):
|
|
55
55
|
super().__init__(
|
|
56
56
|
name="list_active_datadog_metrics",
|
|
57
|
-
description=f"List active metrics from Datadog for the last {ACTIVE_METRICS_DEFAULT_LOOK_BACK_HOURS} hours. This includes metrics that have actively reported data points, including from pods no longer in the cluster.",
|
|
57
|
+
description=f"[datadog/metrics toolset] List active metrics from Datadog for the last {ACTIVE_METRICS_DEFAULT_LOOK_BACK_HOURS} hours. This includes metrics that have actively reported data points, including from pods no longer in the cluster.",
|
|
58
58
|
parameters={
|
|
59
59
|
"from_time": ToolParameter(
|
|
60
60
|
description=f"Start time for listing metrics. Can be an RFC3339 formatted datetime (e.g. '2023-03-01T10:30:00Z') or a negative integer for relative seconds from now (e.g. -86400 for 24 hours ago). Defaults to {ACTIVE_METRICS_DEFAULT_LOOK_BACK_HOURS} hours ago",
|
|
@@ -80,7 +80,7 @@ class ListActiveMetrics(BaseDatadogMetricsTool):
|
|
|
80
80
|
) -> StructuredToolResult:
|
|
81
81
|
if not self.toolset.dd_config:
|
|
82
82
|
return StructuredToolResult(
|
|
83
|
-
status=
|
|
83
|
+
status=StructuredToolResultStatus.ERROR,
|
|
84
84
|
error=TOOLSET_CONFIG_MISSING_ERROR,
|
|
85
85
|
params=params,
|
|
86
86
|
)
|
|
@@ -121,7 +121,7 @@ class ListActiveMetrics(BaseDatadogMetricsTool):
|
|
|
121
121
|
metrics = data.get("metrics", [])
|
|
122
122
|
if not metrics:
|
|
123
123
|
return StructuredToolResult(
|
|
124
|
-
status=
|
|
124
|
+
status=StructuredToolResultStatus.ERROR,
|
|
125
125
|
data="Your filter returned no metrics. Change your filter and try again",
|
|
126
126
|
params=params,
|
|
127
127
|
)
|
|
@@ -133,7 +133,7 @@ class ListActiveMetrics(BaseDatadogMetricsTool):
|
|
|
133
133
|
output.append(metric)
|
|
134
134
|
|
|
135
135
|
return StructuredToolResult(
|
|
136
|
-
status=
|
|
136
|
+
status=StructuredToolResultStatus.SUCCESS,
|
|
137
137
|
data="\n".join(output),
|
|
138
138
|
params=params,
|
|
139
139
|
)
|
|
@@ -149,10 +149,30 @@ class ListActiveMetrics(BaseDatadogMetricsTool):
|
|
|
149
149
|
f"and 'timeseries_query' permissions. Error: {str(e)}"
|
|
150
150
|
)
|
|
151
151
|
else:
|
|
152
|
-
|
|
152
|
+
# Include full API error details for better debugging
|
|
153
|
+
error_msg = (
|
|
154
|
+
f"Datadog API error (status {e.status_code}): {e.response_text}"
|
|
155
|
+
)
|
|
156
|
+
if params:
|
|
157
|
+
# ListActiveMetrics parameters: from_time, host, tag_filter
|
|
158
|
+
if params.get("host"):
|
|
159
|
+
error_msg += f"\nHost filter: {params.get('host')}"
|
|
160
|
+
if params.get("tag_filter"):
|
|
161
|
+
error_msg += f"\nTag filter: {params.get('tag_filter')}"
|
|
162
|
+
|
|
163
|
+
from_time_param = params.get("from_time")
|
|
164
|
+
if from_time_param:
|
|
165
|
+
time_desc = from_time_param
|
|
166
|
+
else:
|
|
167
|
+
time_desc = f"default (last {ACTIVE_METRICS_DEFAULT_LOOK_BACK_HOURS} hours)"
|
|
168
|
+
error_msg += f"\nTime range: {time_desc}"
|
|
169
|
+
|
|
170
|
+
# Note: We cannot generate a Datadog Metrics Explorer URL for ListActiveMetrics
|
|
171
|
+
# because the Metrics Explorer requires a specific metric query,
|
|
172
|
+
# while ListActiveMetrics just lists available metrics without querying any specific one
|
|
153
173
|
|
|
154
174
|
return StructuredToolResult(
|
|
155
|
-
status=
|
|
175
|
+
status=StructuredToolResultStatus.ERROR,
|
|
156
176
|
error=error_msg,
|
|
157
177
|
params=params,
|
|
158
178
|
invocation=json.dumps({"url": url, "params": query_params})
|
|
@@ -165,7 +185,7 @@ class ListActiveMetrics(BaseDatadogMetricsTool):
|
|
|
165
185
|
f"Failed to query Datadog metrics for params: {params}", exc_info=True
|
|
166
186
|
)
|
|
167
187
|
return StructuredToolResult(
|
|
168
|
-
status=
|
|
188
|
+
status=StructuredToolResultStatus.ERROR,
|
|
169
189
|
error=f"Exception while querying Datadog: {str(e)}",
|
|
170
190
|
params=params,
|
|
171
191
|
)
|
|
@@ -184,7 +204,7 @@ class QueryMetrics(BaseDatadogMetricsTool):
|
|
|
184
204
|
def __init__(self, toolset: "DatadogMetricsToolset"):
|
|
185
205
|
super().__init__(
|
|
186
206
|
name="query_datadog_metrics",
|
|
187
|
-
description="Query timeseries data from Datadog for a specific metric, including historical data for pods no longer in the cluster",
|
|
207
|
+
description="[datadog/metrics toolset] Query timeseries data from Datadog for a specific metric, including historical data for pods no longer in the cluster",
|
|
188
208
|
parameters={
|
|
189
209
|
"query": ToolParameter(
|
|
190
210
|
description="The metric query string (e.g., 'system.cpu.user{host:myhost}')",
|
|
@@ -222,7 +242,7 @@ class QueryMetrics(BaseDatadogMetricsTool):
|
|
|
222
242
|
) -> StructuredToolResult:
|
|
223
243
|
if not self.toolset.dd_config:
|
|
224
244
|
return StructuredToolResult(
|
|
225
|
-
status=
|
|
245
|
+
status=StructuredToolResultStatus.ERROR,
|
|
226
246
|
error=TOOLSET_CONFIG_MISSING_ERROR,
|
|
227
247
|
params=params,
|
|
228
248
|
)
|
|
@@ -261,9 +281,29 @@ class QueryMetrics(BaseDatadogMetricsTool):
|
|
|
261
281
|
output_type = params.get("output_type", "Plain")
|
|
262
282
|
|
|
263
283
|
if not series:
|
|
284
|
+
# Include detailed context in error message
|
|
285
|
+
from_time_param = params.get("from_time")
|
|
286
|
+
to_time_param = params.get("to_time")
|
|
287
|
+
|
|
288
|
+
if from_time_param:
|
|
289
|
+
from_desc = from_time_param
|
|
290
|
+
else:
|
|
291
|
+
from_desc = (
|
|
292
|
+
f"default (last {DEFAULT_TIME_SPAN_SECONDS // 86400} days)"
|
|
293
|
+
)
|
|
294
|
+
|
|
295
|
+
to_desc = to_time_param or "now"
|
|
296
|
+
|
|
297
|
+
error_msg = (
|
|
298
|
+
f"The query returned no data.\n"
|
|
299
|
+
f"Query: {params.get('query', 'not specified')}\n"
|
|
300
|
+
f"Time range: {from_desc} to {to_desc}\n"
|
|
301
|
+
f"Please check your query syntax and ensure data exists for this time range."
|
|
302
|
+
)
|
|
303
|
+
|
|
264
304
|
return StructuredToolResult(
|
|
265
|
-
status=
|
|
266
|
-
error=
|
|
305
|
+
status=StructuredToolResultStatus.NO_DATA,
|
|
306
|
+
error=error_msg,
|
|
267
307
|
params=params,
|
|
268
308
|
)
|
|
269
309
|
|
|
@@ -317,7 +357,7 @@ class QueryMetrics(BaseDatadogMetricsTool):
|
|
|
317
357
|
|
|
318
358
|
data_str = json.dumps(response_data, indent=2)
|
|
319
359
|
return StructuredToolResult(
|
|
320
|
-
status=
|
|
360
|
+
status=StructuredToolResultStatus.SUCCESS,
|
|
321
361
|
data=data_str,
|
|
322
362
|
params=params,
|
|
323
363
|
)
|
|
@@ -333,10 +373,28 @@ class QueryMetrics(BaseDatadogMetricsTool):
|
|
|
333
373
|
f"and 'timeseries_query' permissions. Error: {str(e)}"
|
|
334
374
|
)
|
|
335
375
|
else:
|
|
336
|
-
|
|
376
|
+
# Include full API error details for better debugging
|
|
377
|
+
error_msg = (
|
|
378
|
+
f"Datadog API error (status {e.status_code}): {e.response_text}"
|
|
379
|
+
)
|
|
380
|
+
if params:
|
|
381
|
+
error_msg += f"\nQuery: {params.get('query', 'not specified')}"
|
|
382
|
+
|
|
383
|
+
from_time_param = params.get("from_time")
|
|
384
|
+
to_time_param = params.get("to_time")
|
|
385
|
+
|
|
386
|
+
if from_time_param:
|
|
387
|
+
from_desc = from_time_param
|
|
388
|
+
else:
|
|
389
|
+
from_desc = (
|
|
390
|
+
f"default (last {DEFAULT_TIME_SPAN_SECONDS // 86400} days)"
|
|
391
|
+
)
|
|
392
|
+
|
|
393
|
+
to_desc = to_time_param or "now"
|
|
394
|
+
error_msg += f"\nTime range: {from_desc} to {to_desc}"
|
|
337
395
|
|
|
338
396
|
return StructuredToolResult(
|
|
339
|
-
status=
|
|
397
|
+
status=StructuredToolResultStatus.ERROR,
|
|
340
398
|
error=error_msg,
|
|
341
399
|
params=params,
|
|
342
400
|
invocation=json.dumps({"url": url, "params": query_params})
|
|
@@ -350,7 +408,7 @@ class QueryMetrics(BaseDatadogMetricsTool):
|
|
|
350
408
|
)
|
|
351
409
|
|
|
352
410
|
return StructuredToolResult(
|
|
353
|
-
status=
|
|
411
|
+
status=StructuredToolResultStatus.ERROR,
|
|
354
412
|
error=f"Exception while querying Datadog: {str(e)}",
|
|
355
413
|
params=params,
|
|
356
414
|
)
|
|
@@ -364,7 +422,7 @@ class QueryMetricsMetadata(BaseDatadogMetricsTool):
|
|
|
364
422
|
def __init__(self, toolset: "DatadogMetricsToolset"):
|
|
365
423
|
super().__init__(
|
|
366
424
|
name="get_datadog_metric_metadata",
|
|
367
|
-
description="Get metadata about one or more metrics including their type, description, unit, and other properties",
|
|
425
|
+
description="[datadog/metrics toolset] Get metadata about one or more metrics including their type, description, unit, and other properties",
|
|
368
426
|
parameters={
|
|
369
427
|
"metric_names": ToolParameter(
|
|
370
428
|
description="Comma-separated list of metric names to get metadata for (e.g., 'system.cpu.user, system.mem.used')",
|
|
@@ -380,7 +438,7 @@ class QueryMetricsMetadata(BaseDatadogMetricsTool):
|
|
|
380
438
|
) -> StructuredToolResult:
|
|
381
439
|
if not self.toolset.dd_config:
|
|
382
440
|
return StructuredToolResult(
|
|
383
|
-
status=
|
|
441
|
+
status=StructuredToolResultStatus.ERROR,
|
|
384
442
|
error=TOOLSET_CONFIG_MISSING_ERROR,
|
|
385
443
|
params=params,
|
|
386
444
|
)
|
|
@@ -396,7 +454,7 @@ class QueryMetricsMetadata(BaseDatadogMetricsTool):
|
|
|
396
454
|
|
|
397
455
|
if not metric_names:
|
|
398
456
|
return StructuredToolResult(
|
|
399
|
-
status=
|
|
457
|
+
status=StructuredToolResultStatus.ERROR,
|
|
400
458
|
error="metric_names cannot be empty",
|
|
401
459
|
params=params,
|
|
402
460
|
)
|
|
@@ -442,14 +500,14 @@ class QueryMetricsMetadata(BaseDatadogMetricsTool):
|
|
|
442
500
|
|
|
443
501
|
if not results and errors:
|
|
444
502
|
return StructuredToolResult(
|
|
445
|
-
status=
|
|
503
|
+
status=StructuredToolResultStatus.ERROR,
|
|
446
504
|
error="Failed to retrieve metadata for all metrics",
|
|
447
505
|
data=json.dumps(response_data, indent=2),
|
|
448
506
|
params=params,
|
|
449
507
|
)
|
|
450
508
|
|
|
451
509
|
return StructuredToolResult(
|
|
452
|
-
status=
|
|
510
|
+
status=StructuredToolResultStatus.SUCCESS,
|
|
453
511
|
data=json.dumps(response_data, indent=2),
|
|
454
512
|
params=params,
|
|
455
513
|
)
|
|
@@ -461,7 +519,7 @@ class QueryMetricsMetadata(BaseDatadogMetricsTool):
|
|
|
461
519
|
)
|
|
462
520
|
|
|
463
521
|
return StructuredToolResult(
|
|
464
|
-
status=
|
|
522
|
+
status=StructuredToolResultStatus.ERROR,
|
|
465
523
|
error=f"Exception while querying Datadog: {str(e)}",
|
|
466
524
|
params=params,
|
|
467
525
|
)
|
|
@@ -480,7 +538,7 @@ class ListMetricTags(BaseDatadogMetricsTool):
|
|
|
480
538
|
def __init__(self, toolset: "DatadogMetricsToolset"):
|
|
481
539
|
super().__init__(
|
|
482
540
|
name="list_datadog_metric_tags",
|
|
483
|
-
description="List all available tags and aggregations for a specific metric. This helps in building queries by showing what dimensions are available for filtering.",
|
|
541
|
+
description="[datadog/metrics toolset] List all available tags and aggregations for a specific metric. This helps in building queries by showing what dimensions are available for filtering.",
|
|
484
542
|
parameters={
|
|
485
543
|
"metric_name": ToolParameter(
|
|
486
544
|
description="The name of the metric to get tags for (e.g., 'system.cpu.user', 'container.memory.usage')",
|
|
@@ -496,7 +554,7 @@ class ListMetricTags(BaseDatadogMetricsTool):
|
|
|
496
554
|
) -> StructuredToolResult:
|
|
497
555
|
if not self.toolset.dd_config:
|
|
498
556
|
return StructuredToolResult(
|
|
499
|
-
status=
|
|
557
|
+
status=StructuredToolResultStatus.ERROR,
|
|
500
558
|
error=TOOLSET_CONFIG_MISSING_ERROR,
|
|
501
559
|
params=params,
|
|
502
560
|
)
|
|
@@ -519,7 +577,7 @@ class ListMetricTags(BaseDatadogMetricsTool):
|
|
|
519
577
|
)
|
|
520
578
|
|
|
521
579
|
return StructuredToolResult(
|
|
522
|
-
status=
|
|
580
|
+
status=StructuredToolResultStatus.SUCCESS,
|
|
523
581
|
data=data,
|
|
524
582
|
params=params,
|
|
525
583
|
)
|
|
@@ -537,10 +595,17 @@ class ListMetricTags(BaseDatadogMetricsTool):
|
|
|
537
595
|
f"permissions. Error: {str(e)}"
|
|
538
596
|
)
|
|
539
597
|
else:
|
|
540
|
-
|
|
598
|
+
# Include full API error details for better debugging
|
|
599
|
+
error_msg = (
|
|
600
|
+
f"Datadog API error (status {e.status_code}): {e.response_text}"
|
|
601
|
+
)
|
|
602
|
+
if params:
|
|
603
|
+
error_msg += (
|
|
604
|
+
f"\nMetric name: {params.get('metric_name', 'not specified')}"
|
|
605
|
+
)
|
|
541
606
|
|
|
542
607
|
return StructuredToolResult(
|
|
543
|
-
status=
|
|
608
|
+
status=StructuredToolResultStatus.ERROR,
|
|
544
609
|
error=error_msg,
|
|
545
610
|
params=params,
|
|
546
611
|
invocation=json.dumps({"url": url, "params": query_params})
|
|
@@ -554,7 +619,7 @@ class ListMetricTags(BaseDatadogMetricsTool):
|
|
|
554
619
|
exc_info=True,
|
|
555
620
|
)
|
|
556
621
|
return StructuredToolResult(
|
|
557
|
-
status=
|
|
622
|
+
status=StructuredToolResultStatus.ERROR,
|
|
558
623
|
error=f"Exception while querying Datadog: {str(e)}",
|
|
559
624
|
params=params,
|
|
560
625
|
)
|
|
@@ -586,7 +651,7 @@ class DatadogMetricsToolset(Toolset):
|
|
|
586
651
|
|
|
587
652
|
def _perform_healthcheck(self, dd_config: DatadogMetricsConfig) -> Tuple[bool, str]:
|
|
588
653
|
try:
|
|
589
|
-
logging.
|
|
654
|
+
logging.debug("Performing Datadog metrics configuration healthcheck...")
|
|
590
655
|
|
|
591
656
|
url = f"{dd_config.site_api_url}/api/v1/validate"
|
|
592
657
|
headers = get_headers(dd_config)
|
|
@@ -615,7 +680,7 @@ class DatadogMetricsToolset(Toolset):
|
|
|
615
680
|
if not config:
|
|
616
681
|
return (
|
|
617
682
|
False,
|
|
618
|
-
|
|
683
|
+
"Datadog metrics toolset requires configuration. Please provide: dd_api_key, dd_app_key, and site_api_url in your Holmes config. For more details, see https://holmesgpt.dev/data-sources/builtin-toolsets/datadog/",
|
|
619
684
|
)
|
|
620
685
|
|
|
621
686
|
try:
|