holmesgpt 0.11.5__py3-none-any.whl → 0.12.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of holmesgpt might be problematic. Click here for more details.
- holmes/__init__.py +1 -1
- holmes/common/env_vars.py +8 -4
- holmes/config.py +52 -13
- holmes/core/investigation_structured_output.py +7 -0
- holmes/core/llm.py +14 -4
- holmes/core/models.py +24 -0
- holmes/core/tool_calling_llm.py +48 -6
- holmes/core/tools.py +7 -4
- holmes/core/toolset_manager.py +24 -5
- holmes/core/tracing.py +224 -0
- holmes/interactive.py +761 -44
- holmes/main.py +59 -127
- holmes/plugins/prompts/_fetch_logs.jinja2 +4 -0
- holmes/plugins/prompts/kubernetes_workload_ask.jinja2 +2 -10
- holmes/plugins/toolsets/__init__.py +10 -2
- holmes/plugins/toolsets/azure_sql/apis/azure_sql_api.py +2 -1
- holmes/plugins/toolsets/coralogix/toolset_coralogix_logs.py +3 -0
- holmes/plugins/toolsets/datadog/datadog_api.py +161 -0
- holmes/plugins/toolsets/datadog/datadog_metrics_instructions.jinja2 +26 -0
- holmes/plugins/toolsets/datadog/datadog_traces_formatter.py +310 -0
- holmes/plugins/toolsets/datadog/instructions_datadog_traces.jinja2 +51 -0
- holmes/plugins/toolsets/datadog/toolset_datadog_logs.py +267 -0
- holmes/plugins/toolsets/datadog/toolset_datadog_metrics.py +488 -0
- holmes/plugins/toolsets/datadog/toolset_datadog_traces.py +689 -0
- holmes/plugins/toolsets/grafana/toolset_grafana_loki.py +3 -0
- holmes/plugins/toolsets/internet/internet.py +1 -1
- holmes/plugins/toolsets/logging_utils/logging_api.py +9 -3
- holmes/plugins/toolsets/opensearch/opensearch_logs.py +3 -0
- holmes/plugins/toolsets/utils.py +6 -2
- holmes/utils/cache.py +4 -4
- holmes/utils/console/consts.py +2 -0
- holmes/utils/console/logging.py +95 -0
- holmes/utils/console/result.py +37 -0
- {holmesgpt-0.11.5.dist-info → holmesgpt-0.12.0.dist-info}/METADATA +3 -4
- {holmesgpt-0.11.5.dist-info → holmesgpt-0.12.0.dist-info}/RECORD +38 -29
- {holmesgpt-0.11.5.dist-info → holmesgpt-0.12.0.dist-info}/WHEEL +1 -1
- holmes/__init__.py.bak +0 -76
- holmes/plugins/toolsets/datadog.py +0 -153
- {holmesgpt-0.11.5.dist-info → holmesgpt-0.12.0.dist-info}/LICENSE.txt +0 -0
- {holmesgpt-0.11.5.dist-info → holmesgpt-0.12.0.dist-info}/entry_points.txt +0 -0
|
@@ -0,0 +1,689 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import logging
|
|
3
|
+
import os
|
|
4
|
+
import time
|
|
5
|
+
from typing import Any, Dict, Optional, Tuple
|
|
6
|
+
|
|
7
|
+
from holmes.core.tools import (
|
|
8
|
+
CallablePrerequisite,
|
|
9
|
+
Tool,
|
|
10
|
+
ToolParameter,
|
|
11
|
+
Toolset,
|
|
12
|
+
StructuredToolResult,
|
|
13
|
+
ToolResultStatus,
|
|
14
|
+
ToolsetTag,
|
|
15
|
+
)
|
|
16
|
+
from holmes.plugins.toolsets.datadog.datadog_api import (
|
|
17
|
+
DataDogRequestError,
|
|
18
|
+
DatadogBaseConfig,
|
|
19
|
+
execute_datadog_http_request,
|
|
20
|
+
get_headers,
|
|
21
|
+
MAX_RETRY_COUNT_ON_RATE_LIMIT,
|
|
22
|
+
)
|
|
23
|
+
from holmes.plugins.toolsets.utils import process_timestamps_to_int
|
|
24
|
+
from holmes.plugins.toolsets.datadog.datadog_traces_formatter import (
|
|
25
|
+
format_traces_list,
|
|
26
|
+
format_trace_hierarchy,
|
|
27
|
+
format_spans_search,
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
class DatadogTracesConfig(DatadogBaseConfig):
|
|
32
|
+
indexes: list[str] = ["*"]
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class DatadogTracesToolset(Toolset):
|
|
36
|
+
"""Toolset for working with Datadog traces/APM data."""
|
|
37
|
+
|
|
38
|
+
dd_config: Optional[DatadogTracesConfig] = None
|
|
39
|
+
|
|
40
|
+
def __init__(self):
|
|
41
|
+
super().__init__(
|
|
42
|
+
name="datadog/traces",
|
|
43
|
+
description="Toolset for interacting with Datadog APM to fetch and analyze traces",
|
|
44
|
+
docs_url="https://docs.datadoghq.com/api/latest/spans/",
|
|
45
|
+
icon_url="https://imgix.datadoghq.com//img/about/presskit/DDlogo.jpg",
|
|
46
|
+
prerequisites=[CallablePrerequisite(callable=self.prerequisites_callable)],
|
|
47
|
+
tools=[
|
|
48
|
+
FetchDatadogTracesList(toolset=self),
|
|
49
|
+
FetchDatadogTraceById(toolset=self),
|
|
50
|
+
FetchDatadogSpansByFilter(toolset=self),
|
|
51
|
+
],
|
|
52
|
+
experimental=True,
|
|
53
|
+
tags=[ToolsetTag.CORE],
|
|
54
|
+
)
|
|
55
|
+
self._reload_instructions()
|
|
56
|
+
|
|
57
|
+
def _reload_instructions(self):
|
|
58
|
+
"""Load Datadog traces specific troubleshooting instructions."""
|
|
59
|
+
template_file_path = os.path.abspath(
|
|
60
|
+
os.path.join(
|
|
61
|
+
os.path.dirname(__file__), "instructions_datadog_traces.jinja2"
|
|
62
|
+
)
|
|
63
|
+
)
|
|
64
|
+
self._load_llm_instructions(jinja_template=f"file://{template_file_path}")
|
|
65
|
+
|
|
66
|
+
def prerequisites_callable(self, config: dict[str, Any]) -> Tuple[bool, str]:
|
|
67
|
+
"""Check prerequisites with configuration."""
|
|
68
|
+
if not config:
|
|
69
|
+
return False, "No configuration provided for Datadog Traces toolset"
|
|
70
|
+
|
|
71
|
+
try:
|
|
72
|
+
dd_config = DatadogTracesConfig(**config)
|
|
73
|
+
self.dd_config = dd_config
|
|
74
|
+
success, error_msg = self._perform_healthcheck(dd_config)
|
|
75
|
+
return success, error_msg
|
|
76
|
+
except Exception as e:
|
|
77
|
+
logging.exception("Failed to set up Datadog traces toolset")
|
|
78
|
+
return False, f"Failed to parse Datadog configuration: {str(e)}"
|
|
79
|
+
|
|
80
|
+
def _perform_healthcheck(self, dd_config: DatadogTracesConfig) -> Tuple[bool, str]:
|
|
81
|
+
"""Perform health check on Datadog traces API."""
|
|
82
|
+
try:
|
|
83
|
+
logging.info("Performing Datadog traces configuration healthcheck...")
|
|
84
|
+
headers = get_headers(dd_config)
|
|
85
|
+
|
|
86
|
+
# The spans API uses POST, not GET
|
|
87
|
+
payload = {
|
|
88
|
+
"data": {
|
|
89
|
+
"type": "search_request",
|
|
90
|
+
"attributes": {
|
|
91
|
+
"filter": {
|
|
92
|
+
"from": "now-1m",
|
|
93
|
+
"to": "now",
|
|
94
|
+
"query": "*",
|
|
95
|
+
"indexes": dd_config.indexes,
|
|
96
|
+
},
|
|
97
|
+
"page": {"limit": 1},
|
|
98
|
+
},
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
# Use search endpoint instead
|
|
103
|
+
search_url = f"{dd_config.site_api_url}/api/v2/spans/events/search"
|
|
104
|
+
|
|
105
|
+
execute_datadog_http_request(
|
|
106
|
+
url=search_url,
|
|
107
|
+
headers=headers,
|
|
108
|
+
payload_or_params=payload,
|
|
109
|
+
timeout=dd_config.request_timeout,
|
|
110
|
+
method="POST",
|
|
111
|
+
)
|
|
112
|
+
|
|
113
|
+
return True, ""
|
|
114
|
+
|
|
115
|
+
except DataDogRequestError as e:
|
|
116
|
+
logging.error(
|
|
117
|
+
f"Datadog API error during healthcheck: {e.status_code} - {e.response_text}"
|
|
118
|
+
)
|
|
119
|
+
if e.status_code == 403:
|
|
120
|
+
return (
|
|
121
|
+
False,
|
|
122
|
+
"API key lacks required permissions. Make sure your API key has 'apm_read' scope.",
|
|
123
|
+
)
|
|
124
|
+
else:
|
|
125
|
+
return False, f"Datadog API error: {e.status_code} - {e.response_text}"
|
|
126
|
+
except Exception as e:
|
|
127
|
+
logging.exception("Failed during Datadog traces healthcheck")
|
|
128
|
+
return False, f"Healthcheck failed with exception: {str(e)}"
|
|
129
|
+
|
|
130
|
+
def get_example_config(self) -> Dict[str, Any]:
|
|
131
|
+
"""Get example configuration for this toolset."""
|
|
132
|
+
return {
|
|
133
|
+
"dd_api_key": "<your_datadog_api_key>",
|
|
134
|
+
"dd_app_key": "<your_datadog_app_key>",
|
|
135
|
+
"site_api_url": "https://api.datadoghq.com", # or https://api.datadoghq.eu for EU
|
|
136
|
+
"request_timeout": 60,
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
class BaseDatadogTracesTool(Tool):
|
|
141
|
+
"""Base class for Datadog traces tools."""
|
|
142
|
+
|
|
143
|
+
toolset: "DatadogTracesToolset"
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
class FetchDatadogTracesList(BaseDatadogTracesTool):
|
|
147
|
+
"""Tool to fetch a list of traces from Datadog."""
|
|
148
|
+
|
|
149
|
+
def __init__(self, toolset: "DatadogTracesToolset"):
|
|
150
|
+
super().__init__(
|
|
151
|
+
name="fetch_datadog_traces",
|
|
152
|
+
description="Fetch a list of traces from Datadog with optional filters",
|
|
153
|
+
parameters={
|
|
154
|
+
"service": ToolParameter(
|
|
155
|
+
description="Filter by service name",
|
|
156
|
+
type="string",
|
|
157
|
+
required=False,
|
|
158
|
+
),
|
|
159
|
+
"operation": ToolParameter(
|
|
160
|
+
description="Filter by operation name",
|
|
161
|
+
type="string",
|
|
162
|
+
required=False,
|
|
163
|
+
),
|
|
164
|
+
"resource": ToolParameter(
|
|
165
|
+
description="Filter by resource name",
|
|
166
|
+
type="string",
|
|
167
|
+
required=False,
|
|
168
|
+
),
|
|
169
|
+
"min_duration": ToolParameter(
|
|
170
|
+
description="Minimum duration (e.g., '5s', '500ms', '1m')",
|
|
171
|
+
type="string",
|
|
172
|
+
required=False,
|
|
173
|
+
),
|
|
174
|
+
"start_datetime": ToolParameter(
|
|
175
|
+
description="Start time in RFC3339 format or relative time in seconds (negative for past)",
|
|
176
|
+
type="string",
|
|
177
|
+
required=False,
|
|
178
|
+
),
|
|
179
|
+
"end_datetime": ToolParameter(
|
|
180
|
+
description="End time in RFC3339 format or relative time in seconds (negative for past)",
|
|
181
|
+
type="string",
|
|
182
|
+
required=False,
|
|
183
|
+
),
|
|
184
|
+
"limit": ToolParameter(
|
|
185
|
+
description="Maximum number of traces to return",
|
|
186
|
+
type="integer",
|
|
187
|
+
required=False,
|
|
188
|
+
),
|
|
189
|
+
},
|
|
190
|
+
toolset=toolset,
|
|
191
|
+
)
|
|
192
|
+
|
|
193
|
+
def get_parameterized_one_liner(self, params: dict) -> str:
|
|
194
|
+
"""Get a one-liner description of the tool invocation."""
|
|
195
|
+
filters = []
|
|
196
|
+
if "service" in params:
|
|
197
|
+
filters.append(f"service={params['service']}")
|
|
198
|
+
if "operation" in params:
|
|
199
|
+
filters.append(f"operation={params['operation']}")
|
|
200
|
+
if "min_duration" in params:
|
|
201
|
+
filters.append(f"duration>{params['min_duration']}")
|
|
202
|
+
|
|
203
|
+
filter_str = " AND ".join(filters) if filters else "all traces"
|
|
204
|
+
return f"DataDog: fetch traces matching {filter_str}"
|
|
205
|
+
|
|
206
|
+
def _invoke(self, params: Any) -> StructuredToolResult:
|
|
207
|
+
"""Execute the tool to fetch traces."""
|
|
208
|
+
if not self.toolset.dd_config:
|
|
209
|
+
return StructuredToolResult(
|
|
210
|
+
status=ToolResultStatus.ERROR,
|
|
211
|
+
error="Datadog configuration not initialized",
|
|
212
|
+
params=params,
|
|
213
|
+
)
|
|
214
|
+
|
|
215
|
+
url = None
|
|
216
|
+
payload = None
|
|
217
|
+
|
|
218
|
+
try:
|
|
219
|
+
# Process timestamps
|
|
220
|
+
from_time_int, to_time_int = process_timestamps_to_int(
|
|
221
|
+
start=params.get("start_datetime"),
|
|
222
|
+
end=params.get("end_datetime"),
|
|
223
|
+
default_time_span_seconds=3600, # Default to 1 hour
|
|
224
|
+
)
|
|
225
|
+
|
|
226
|
+
# Convert to milliseconds for Datadog API
|
|
227
|
+
from_time_ms = from_time_int * 1000
|
|
228
|
+
to_time_ms = to_time_int * 1000
|
|
229
|
+
|
|
230
|
+
# Build search query
|
|
231
|
+
query_parts = []
|
|
232
|
+
|
|
233
|
+
if params.get("service"):
|
|
234
|
+
query_parts.append(f"service:{params['service']}")
|
|
235
|
+
|
|
236
|
+
if params.get("operation"):
|
|
237
|
+
query_parts.append(f"operation_name:{params['operation']}")
|
|
238
|
+
|
|
239
|
+
if params.get("resource"):
|
|
240
|
+
query_parts.append(f"resource_name:{params['resource']}")
|
|
241
|
+
|
|
242
|
+
if params.get("min_duration"):
|
|
243
|
+
# Parse duration string (e.g., "5s", "500ms", "1m")
|
|
244
|
+
duration_str = params["min_duration"].lower()
|
|
245
|
+
if duration_str.endswith("ms"):
|
|
246
|
+
duration_ns = int(float(duration_str[:-2]) * 1_000_000)
|
|
247
|
+
elif duration_str.endswith("s"):
|
|
248
|
+
duration_ns = int(float(duration_str[:-1]) * 1_000_000_000)
|
|
249
|
+
elif duration_str.endswith("m"):
|
|
250
|
+
duration_ns = int(float(duration_str[:-1]) * 60 * 1_000_000_000)
|
|
251
|
+
else:
|
|
252
|
+
# Assume milliseconds if no unit
|
|
253
|
+
duration_ns = int(float(duration_str) * 1_000_000)
|
|
254
|
+
|
|
255
|
+
query_parts.append(f"@duration:>{duration_ns}")
|
|
256
|
+
|
|
257
|
+
query = " ".join(query_parts) if query_parts else "*"
|
|
258
|
+
|
|
259
|
+
# Prepare API request - use POST search endpoint
|
|
260
|
+
url = f"{self.toolset.dd_config.site_api_url}/api/v2/spans/events/search"
|
|
261
|
+
headers = get_headers(self.toolset.dd_config)
|
|
262
|
+
|
|
263
|
+
payload = {
|
|
264
|
+
"data": {
|
|
265
|
+
"type": "search_request",
|
|
266
|
+
"attributes": {
|
|
267
|
+
"filter": {
|
|
268
|
+
"query": query,
|
|
269
|
+
"from": str(from_time_ms),
|
|
270
|
+
"to": str(to_time_ms),
|
|
271
|
+
"indexes": self.toolset.dd_config.indexes,
|
|
272
|
+
},
|
|
273
|
+
"page": {"limit": params.get("limit", 50)},
|
|
274
|
+
"sort": "-timestamp",
|
|
275
|
+
},
|
|
276
|
+
}
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
response = execute_datadog_http_request(
|
|
280
|
+
url=url,
|
|
281
|
+
headers=headers,
|
|
282
|
+
payload_or_params=payload,
|
|
283
|
+
timeout=self.toolset.dd_config.request_timeout,
|
|
284
|
+
method="POST",
|
|
285
|
+
)
|
|
286
|
+
|
|
287
|
+
# Handle tuple response from POST requests
|
|
288
|
+
if isinstance(response, tuple):
|
|
289
|
+
spans, _ = response
|
|
290
|
+
elif response:
|
|
291
|
+
spans = response.get("data", [])
|
|
292
|
+
else:
|
|
293
|
+
spans = []
|
|
294
|
+
|
|
295
|
+
# Format the traces using the formatter
|
|
296
|
+
formatted_output = format_traces_list(spans, limit=params.get("limit", 50))
|
|
297
|
+
if not formatted_output:
|
|
298
|
+
return StructuredToolResult(
|
|
299
|
+
status=ToolResultStatus.NO_DATA,
|
|
300
|
+
params=params,
|
|
301
|
+
data="No matching traces found.",
|
|
302
|
+
)
|
|
303
|
+
|
|
304
|
+
return StructuredToolResult(
|
|
305
|
+
status=ToolResultStatus.SUCCESS,
|
|
306
|
+
data=formatted_output,
|
|
307
|
+
params=params,
|
|
308
|
+
)
|
|
309
|
+
|
|
310
|
+
except DataDogRequestError as e:
|
|
311
|
+
logging.exception(e, exc_info=True)
|
|
312
|
+
|
|
313
|
+
if e.status_code == 429:
|
|
314
|
+
error_msg = f"Datadog API rate limit exceeded. Failed after {MAX_RETRY_COUNT_ON_RATE_LIMIT} retry attempts."
|
|
315
|
+
elif e.status_code == 403:
|
|
316
|
+
error_msg = (
|
|
317
|
+
f"Permission denied. Ensure your Datadog Application Key has the 'apm_read' "
|
|
318
|
+
f"permission. Error: {str(e)}"
|
|
319
|
+
)
|
|
320
|
+
else:
|
|
321
|
+
error_msg = f"Exception while querying Datadog: {str(e)}"
|
|
322
|
+
|
|
323
|
+
return StructuredToolResult(
|
|
324
|
+
status=ToolResultStatus.ERROR,
|
|
325
|
+
error=error_msg,
|
|
326
|
+
params=params,
|
|
327
|
+
invocation=(
|
|
328
|
+
json.dumps({"url": url, "payload": payload})
|
|
329
|
+
if url and payload
|
|
330
|
+
else None
|
|
331
|
+
),
|
|
332
|
+
)
|
|
333
|
+
|
|
334
|
+
except Exception as e:
|
|
335
|
+
logging.exception(e, exc_info=True)
|
|
336
|
+
return StructuredToolResult(
|
|
337
|
+
status=ToolResultStatus.ERROR,
|
|
338
|
+
error=f"Unexpected error: {str(e)}",
|
|
339
|
+
params=params,
|
|
340
|
+
invocation=(
|
|
341
|
+
json.dumps({"url": url, "payload": payload})
|
|
342
|
+
if url and payload
|
|
343
|
+
else None
|
|
344
|
+
),
|
|
345
|
+
)
|
|
346
|
+
|
|
347
|
+
|
|
348
|
+
class FetchDatadogTraceById(BaseDatadogTracesTool):
|
|
349
|
+
"""Tool to fetch detailed information about a specific trace."""
|
|
350
|
+
|
|
351
|
+
def __init__(self, toolset: "DatadogTracesToolset"):
|
|
352
|
+
super().__init__(
|
|
353
|
+
name="fetch_datadog_trace_by_id",
|
|
354
|
+
description="Fetch detailed information about a specific trace by its ID",
|
|
355
|
+
parameters={
|
|
356
|
+
"trace_id": ToolParameter(
|
|
357
|
+
description="The trace ID to fetch details for",
|
|
358
|
+
type="string",
|
|
359
|
+
required=True,
|
|
360
|
+
),
|
|
361
|
+
},
|
|
362
|
+
toolset=toolset,
|
|
363
|
+
)
|
|
364
|
+
|
|
365
|
+
def get_parameterized_one_liner(self, params: dict) -> str:
|
|
366
|
+
"""Get a one-liner description of the tool invocation."""
|
|
367
|
+
return (
|
|
368
|
+
f"DataDog: fetch trace details for ID {params.get('trace_id', 'unknown')}"
|
|
369
|
+
)
|
|
370
|
+
|
|
371
|
+
def _invoke(self, params: Any) -> StructuredToolResult:
|
|
372
|
+
"""Execute the tool to fetch trace details."""
|
|
373
|
+
if not self.toolset.dd_config:
|
|
374
|
+
return StructuredToolResult(
|
|
375
|
+
status=ToolResultStatus.ERROR,
|
|
376
|
+
error="Datadog configuration not initialized",
|
|
377
|
+
params=params,
|
|
378
|
+
)
|
|
379
|
+
|
|
380
|
+
trace_id = params.get("trace_id")
|
|
381
|
+
if not trace_id:
|
|
382
|
+
return StructuredToolResult(
|
|
383
|
+
status=ToolResultStatus.ERROR,
|
|
384
|
+
error="trace_id parameter is required",
|
|
385
|
+
params=params,
|
|
386
|
+
)
|
|
387
|
+
|
|
388
|
+
url = None
|
|
389
|
+
payload = None
|
|
390
|
+
|
|
391
|
+
try:
|
|
392
|
+
# For Datadog, we need to search for all spans with the given trace_id
|
|
393
|
+
# Using a reasonable time window (last 7 days by default)
|
|
394
|
+
current_time = int(time.time())
|
|
395
|
+
from_time_ms = (current_time - 604800) * 1000 # 7 days ago
|
|
396
|
+
to_time_ms = current_time * 1000
|
|
397
|
+
|
|
398
|
+
url = f"{self.toolset.dd_config.site_api_url}/api/v2/spans/events/search"
|
|
399
|
+
headers = get_headers(self.toolset.dd_config)
|
|
400
|
+
|
|
401
|
+
payload = {
|
|
402
|
+
"data": {
|
|
403
|
+
"type": "search_request",
|
|
404
|
+
"attributes": {
|
|
405
|
+
"filter": {
|
|
406
|
+
"query": f"trace_id:{trace_id}",
|
|
407
|
+
"from": str(from_time_ms),
|
|
408
|
+
"to": str(to_time_ms),
|
|
409
|
+
"indexes": self.toolset.dd_config.indexes,
|
|
410
|
+
},
|
|
411
|
+
"page": {"limit": 1000}, # Get all spans for the trace
|
|
412
|
+
"sort": "timestamp",
|
|
413
|
+
},
|
|
414
|
+
}
|
|
415
|
+
}
|
|
416
|
+
|
|
417
|
+
response = execute_datadog_http_request(
|
|
418
|
+
url=url,
|
|
419
|
+
headers=headers,
|
|
420
|
+
payload_or_params=payload,
|
|
421
|
+
timeout=self.toolset.dd_config.request_timeout,
|
|
422
|
+
method="POST",
|
|
423
|
+
)
|
|
424
|
+
|
|
425
|
+
# Handle tuple response from POST requests
|
|
426
|
+
if isinstance(response, tuple):
|
|
427
|
+
spans, _ = response
|
|
428
|
+
elif response:
|
|
429
|
+
spans = response.get("data", [])
|
|
430
|
+
else:
|
|
431
|
+
spans = []
|
|
432
|
+
|
|
433
|
+
# Format the trace hierarchy using the formatter
|
|
434
|
+
formatted_output = format_trace_hierarchy(trace_id, spans)
|
|
435
|
+
if not formatted_output:
|
|
436
|
+
return StructuredToolResult(
|
|
437
|
+
status=ToolResultStatus.NO_DATA,
|
|
438
|
+
params=params,
|
|
439
|
+
data=f"No trace found for trace_id: {trace_id}",
|
|
440
|
+
)
|
|
441
|
+
|
|
442
|
+
return StructuredToolResult(
|
|
443
|
+
status=ToolResultStatus.SUCCESS,
|
|
444
|
+
data=formatted_output,
|
|
445
|
+
params=params,
|
|
446
|
+
)
|
|
447
|
+
|
|
448
|
+
except DataDogRequestError as e:
|
|
449
|
+
logging.exception(e, exc_info=True)
|
|
450
|
+
|
|
451
|
+
if e.status_code == 429:
|
|
452
|
+
error_msg = f"Datadog API rate limit exceeded. Failed after {MAX_RETRY_COUNT_ON_RATE_LIMIT} retry attempts."
|
|
453
|
+
elif e.status_code == 403:
|
|
454
|
+
error_msg = (
|
|
455
|
+
f"Permission denied. Ensure your Datadog Application Key has the 'apm_read' "
|
|
456
|
+
f"permission. Error: {str(e)}"
|
|
457
|
+
)
|
|
458
|
+
else:
|
|
459
|
+
error_msg = f"Exception while querying Datadog: {str(e)}"
|
|
460
|
+
|
|
461
|
+
return StructuredToolResult(
|
|
462
|
+
status=ToolResultStatus.ERROR,
|
|
463
|
+
error=error_msg,
|
|
464
|
+
params=params,
|
|
465
|
+
invocation=(
|
|
466
|
+
json.dumps({"url": url, "payload": payload})
|
|
467
|
+
if url and payload
|
|
468
|
+
else None
|
|
469
|
+
),
|
|
470
|
+
)
|
|
471
|
+
|
|
472
|
+
except Exception as e:
|
|
473
|
+
logging.exception(e, exc_info=True)
|
|
474
|
+
return StructuredToolResult(
|
|
475
|
+
status=ToolResultStatus.ERROR,
|
|
476
|
+
error=f"Unexpected error: {str(e)}",
|
|
477
|
+
params=params,
|
|
478
|
+
invocation=(
|
|
479
|
+
json.dumps({"url": url, "payload": payload})
|
|
480
|
+
if url and payload
|
|
481
|
+
else None
|
|
482
|
+
),
|
|
483
|
+
)
|
|
484
|
+
|
|
485
|
+
|
|
486
|
+
class FetchDatadogSpansByFilter(BaseDatadogTracesTool):
|
|
487
|
+
"""Tool to search for spans with specific filters."""
|
|
488
|
+
|
|
489
|
+
def __init__(self, toolset: "DatadogTracesToolset"):
|
|
490
|
+
super().__init__(
|
|
491
|
+
name="fetch_datadog_spans",
|
|
492
|
+
description="Search for spans in Datadog with detailed filters",
|
|
493
|
+
parameters={
|
|
494
|
+
"query": ToolParameter(
|
|
495
|
+
description="Datadog search query (e.g., 'service:web-app @http.status_code:500')",
|
|
496
|
+
type="string",
|
|
497
|
+
required=False,
|
|
498
|
+
),
|
|
499
|
+
"service": ToolParameter(
|
|
500
|
+
description="Filter by service name",
|
|
501
|
+
type="string",
|
|
502
|
+
required=False,
|
|
503
|
+
),
|
|
504
|
+
"operation": ToolParameter(
|
|
505
|
+
description="Filter by operation name",
|
|
506
|
+
type="string",
|
|
507
|
+
required=False,
|
|
508
|
+
),
|
|
509
|
+
"resource": ToolParameter(
|
|
510
|
+
description="Filter by resource name",
|
|
511
|
+
type="string",
|
|
512
|
+
required=False,
|
|
513
|
+
),
|
|
514
|
+
"tags": ToolParameter(
|
|
515
|
+
description="Filter by tags (e.g., {'env': 'production', 'version': '1.2.3'})",
|
|
516
|
+
type="object",
|
|
517
|
+
required=False,
|
|
518
|
+
),
|
|
519
|
+
"start_datetime": ToolParameter(
|
|
520
|
+
description="Start time in RFC3339 format or relative time in seconds (negative for past)",
|
|
521
|
+
type="string",
|
|
522
|
+
required=False,
|
|
523
|
+
),
|
|
524
|
+
"end_datetime": ToolParameter(
|
|
525
|
+
description="End time in RFC3339 format or relative time in seconds (negative for past)",
|
|
526
|
+
type="string",
|
|
527
|
+
required=False,
|
|
528
|
+
),
|
|
529
|
+
"limit": ToolParameter(
|
|
530
|
+
description="Maximum number of spans to return",
|
|
531
|
+
type="integer",
|
|
532
|
+
required=False,
|
|
533
|
+
),
|
|
534
|
+
},
|
|
535
|
+
toolset=toolset,
|
|
536
|
+
)
|
|
537
|
+
|
|
538
|
+
def get_parameterized_one_liner(self, params: dict) -> str:
|
|
539
|
+
"""Get a one-liner description of the tool invocation."""
|
|
540
|
+
if "query" in params:
|
|
541
|
+
return f"DataDog: search spans with query: {params['query']}"
|
|
542
|
+
|
|
543
|
+
filters = []
|
|
544
|
+
if "service" in params:
|
|
545
|
+
filters.append(f"service={params['service']}")
|
|
546
|
+
if "operation" in params:
|
|
547
|
+
filters.append(f"operation={params['operation']}")
|
|
548
|
+
|
|
549
|
+
filter_str = " AND ".join(filters) if filters else "all spans"
|
|
550
|
+
return f"DataDog: search spans matching {filter_str}"
|
|
551
|
+
|
|
552
|
+
def _invoke(self, params: Any) -> StructuredToolResult:
|
|
553
|
+
"""Execute the tool to search spans."""
|
|
554
|
+
if not self.toolset.dd_config:
|
|
555
|
+
return StructuredToolResult(
|
|
556
|
+
status=ToolResultStatus.ERROR,
|
|
557
|
+
error="Datadog configuration not initialized",
|
|
558
|
+
params=params,
|
|
559
|
+
)
|
|
560
|
+
|
|
561
|
+
url = None
|
|
562
|
+
payload = None
|
|
563
|
+
|
|
564
|
+
try:
|
|
565
|
+
# Process timestamps
|
|
566
|
+
from_time_int, to_time_int = process_timestamps_to_int(
|
|
567
|
+
start=params.get("start_datetime"),
|
|
568
|
+
end=params.get("end_datetime"),
|
|
569
|
+
default_time_span_seconds=3600, # Default to 1 hour
|
|
570
|
+
)
|
|
571
|
+
|
|
572
|
+
# Convert to milliseconds for Datadog API
|
|
573
|
+
from_time_ms = from_time_int * 1000
|
|
574
|
+
to_time_ms = to_time_int * 1000
|
|
575
|
+
|
|
576
|
+
# Build search query
|
|
577
|
+
query_parts = []
|
|
578
|
+
|
|
579
|
+
# If a custom query is provided, use it as the base
|
|
580
|
+
if params.get("query"):
|
|
581
|
+
query_parts.append(params["query"])
|
|
582
|
+
|
|
583
|
+
# Add additional filters
|
|
584
|
+
if params.get("service"):
|
|
585
|
+
query_parts.append(f"service:{params['service']}")
|
|
586
|
+
|
|
587
|
+
if params.get("operation"):
|
|
588
|
+
query_parts.append(f"operation_name:{params['operation']}")
|
|
589
|
+
|
|
590
|
+
if params.get("resource"):
|
|
591
|
+
query_parts.append(f"resource_name:{params['resource']}")
|
|
592
|
+
|
|
593
|
+
# Add tag filters
|
|
594
|
+
if params.get("tags"):
|
|
595
|
+
tags = params["tags"]
|
|
596
|
+
if isinstance(tags, dict):
|
|
597
|
+
for key, value in tags.items():
|
|
598
|
+
query_parts.append(f"@{key}:{value}")
|
|
599
|
+
|
|
600
|
+
query = " ".join(query_parts) if query_parts else "*"
|
|
601
|
+
|
|
602
|
+
# Use POST endpoint for more complex searches
|
|
603
|
+
url = f"{self.toolset.dd_config.site_api_url}/api/v2/spans/events/search"
|
|
604
|
+
headers = get_headers(self.toolset.dd_config)
|
|
605
|
+
|
|
606
|
+
payload = {
|
|
607
|
+
"data": {
|
|
608
|
+
"type": "search_request",
|
|
609
|
+
"attributes": {
|
|
610
|
+
"filter": {
|
|
611
|
+
"query": query,
|
|
612
|
+
"from": str(from_time_ms),
|
|
613
|
+
"to": str(to_time_ms),
|
|
614
|
+
"indexes": self.toolset.dd_config.indexes,
|
|
615
|
+
},
|
|
616
|
+
"page": {
|
|
617
|
+
"limit": params.get("limit", 100),
|
|
618
|
+
},
|
|
619
|
+
"sort": "-timestamp",
|
|
620
|
+
},
|
|
621
|
+
}
|
|
622
|
+
}
|
|
623
|
+
|
|
624
|
+
response = execute_datadog_http_request(
|
|
625
|
+
url=url,
|
|
626
|
+
headers=headers,
|
|
627
|
+
payload_or_params=payload,
|
|
628
|
+
timeout=self.toolset.dd_config.request_timeout,
|
|
629
|
+
method="POST",
|
|
630
|
+
)
|
|
631
|
+
|
|
632
|
+
# Handle tuple response from POST requests
|
|
633
|
+
if isinstance(response, tuple):
|
|
634
|
+
spans, _ = response
|
|
635
|
+
elif response:
|
|
636
|
+
spans = response.get("data", [])
|
|
637
|
+
else:
|
|
638
|
+
spans = []
|
|
639
|
+
|
|
640
|
+
# Format the spans search results using the formatter
|
|
641
|
+
formatted_output = format_spans_search(spans)
|
|
642
|
+
if not formatted_output:
|
|
643
|
+
return StructuredToolResult(
|
|
644
|
+
status=ToolResultStatus.NO_DATA,
|
|
645
|
+
params=params,
|
|
646
|
+
data="No matching spans found.",
|
|
647
|
+
)
|
|
648
|
+
|
|
649
|
+
return StructuredToolResult(
|
|
650
|
+
status=ToolResultStatus.SUCCESS,
|
|
651
|
+
data=formatted_output,
|
|
652
|
+
params=params,
|
|
653
|
+
)
|
|
654
|
+
|
|
655
|
+
except DataDogRequestError as e:
|
|
656
|
+
logging.exception(e, exc_info=True)
|
|
657
|
+
if e.status_code == 429:
|
|
658
|
+
error_msg = f"Datadog API rate limit exceeded. Failed after {MAX_RETRY_COUNT_ON_RATE_LIMIT} retry attempts."
|
|
659
|
+
elif e.status_code == 403:
|
|
660
|
+
error_msg = (
|
|
661
|
+
f"Permission denied. Ensure your Datadog Application Key has the 'apm_read' "
|
|
662
|
+
f"permission. Error: {str(e)}"
|
|
663
|
+
)
|
|
664
|
+
else:
|
|
665
|
+
error_msg = f"Exception while querying Datadog: {str(e)}"
|
|
666
|
+
|
|
667
|
+
return StructuredToolResult(
|
|
668
|
+
status=ToolResultStatus.ERROR,
|
|
669
|
+
error=error_msg,
|
|
670
|
+
params=params,
|
|
671
|
+
invocation=(
|
|
672
|
+
json.dumps({"url": url, "payload": payload})
|
|
673
|
+
if url and payload
|
|
674
|
+
else None
|
|
675
|
+
),
|
|
676
|
+
)
|
|
677
|
+
|
|
678
|
+
except Exception as e:
|
|
679
|
+
logging.exception(e, exc_info=True)
|
|
680
|
+
return StructuredToolResult(
|
|
681
|
+
status=ToolResultStatus.ERROR,
|
|
682
|
+
error=f"Unexpected error: {str(e)}",
|
|
683
|
+
params=params,
|
|
684
|
+
invocation=(
|
|
685
|
+
json.dumps({"url": url, "payload": payload})
|
|
686
|
+
if url and payload
|
|
687
|
+
else None
|
|
688
|
+
),
|
|
689
|
+
)
|
|
@@ -67,6 +67,9 @@ class GrafanaLokiToolset(BasePodLoggingToolset):
|
|
|
67
67
|
def grafana_config(self) -> GrafanaLokiConfig:
|
|
68
68
|
return cast(GrafanaLokiConfig, self.config)
|
|
69
69
|
|
|
70
|
+
def logger_name(self) -> str:
|
|
71
|
+
return "Loki"
|
|
72
|
+
|
|
70
73
|
def fetch_pod_logs(self, params: FetchPodLogsParams) -> StructuredToolResult:
|
|
71
74
|
(start, end) = process_timestamps_to_rfc3339(
|
|
72
75
|
start_timestamp=params.start_time,
|