holmesgpt 0.16.2a0__py3-none-any.whl → 0.18.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (162) hide show
  1. holmes/__init__.py +3 -5
  2. holmes/clients/robusta_client.py +4 -3
  3. holmes/common/env_vars.py +18 -2
  4. holmes/common/openshift.py +1 -1
  5. holmes/config.py +11 -6
  6. holmes/core/conversations.py +30 -13
  7. holmes/core/investigation.py +21 -25
  8. holmes/core/investigation_structured_output.py +3 -3
  9. holmes/core/issue.py +1 -1
  10. holmes/core/llm.py +50 -31
  11. holmes/core/models.py +19 -17
  12. holmes/core/openai_formatting.py +1 -1
  13. holmes/core/prompt.py +47 -2
  14. holmes/core/runbooks.py +1 -0
  15. holmes/core/safeguards.py +4 -2
  16. holmes/core/supabase_dal.py +4 -2
  17. holmes/core/tool_calling_llm.py +102 -141
  18. holmes/core/tools.py +19 -28
  19. holmes/core/tools_utils/token_counting.py +9 -2
  20. holmes/core/tools_utils/tool_context_window_limiter.py +13 -30
  21. holmes/core/tools_utils/tool_executor.py +0 -18
  22. holmes/core/tools_utils/toolset_utils.py +1 -0
  23. holmes/core/toolset_manager.py +37 -2
  24. holmes/core/tracing.py +13 -2
  25. holmes/core/transformers/__init__.py +1 -1
  26. holmes/core/transformers/base.py +1 -0
  27. holmes/core/transformers/llm_summarize.py +3 -2
  28. holmes/core/transformers/registry.py +2 -1
  29. holmes/core/transformers/transformer.py +1 -0
  30. holmes/core/truncation/compaction.py +37 -2
  31. holmes/core/truncation/input_context_window_limiter.py +3 -2
  32. holmes/interactive.py +52 -8
  33. holmes/main.py +17 -37
  34. holmes/plugins/interfaces.py +2 -1
  35. holmes/plugins/prompts/__init__.py +2 -1
  36. holmes/plugins/prompts/_fetch_logs.jinja2 +5 -5
  37. holmes/plugins/prompts/_runbook_instructions.jinja2 +2 -1
  38. holmes/plugins/prompts/base_user_prompt.jinja2 +7 -0
  39. holmes/plugins/prompts/conversation_history_compaction.jinja2 +2 -1
  40. holmes/plugins/prompts/generic_ask.jinja2 +0 -2
  41. holmes/plugins/prompts/generic_ask_conversation.jinja2 +0 -2
  42. holmes/plugins/prompts/generic_ask_for_issue_conversation.jinja2 +0 -2
  43. holmes/plugins/prompts/generic_investigation.jinja2 +0 -2
  44. holmes/plugins/prompts/investigation_procedure.jinja2 +2 -1
  45. holmes/plugins/prompts/kubernetes_workload_ask.jinja2 +0 -2
  46. holmes/plugins/prompts/kubernetes_workload_chat.jinja2 +0 -2
  47. holmes/plugins/runbooks/__init__.py +32 -3
  48. holmes/plugins/sources/github/__init__.py +4 -2
  49. holmes/plugins/sources/prometheus/models.py +1 -0
  50. holmes/plugins/toolsets/__init__.py +30 -26
  51. holmes/plugins/toolsets/atlas_mongodb/mongodb_atlas.py +13 -12
  52. holmes/plugins/toolsets/azure_sql/apis/alert_monitoring_api.py +3 -2
  53. holmes/plugins/toolsets/azure_sql/apis/azure_sql_api.py +2 -1
  54. holmes/plugins/toolsets/azure_sql/apis/connection_failure_api.py +3 -2
  55. holmes/plugins/toolsets/azure_sql/apis/connection_monitoring_api.py +3 -1
  56. holmes/plugins/toolsets/azure_sql/apis/storage_analysis_api.py +3 -1
  57. holmes/plugins/toolsets/azure_sql/azure_sql_toolset.py +12 -12
  58. holmes/plugins/toolsets/azure_sql/tools/analyze_connection_failures.py +7 -7
  59. holmes/plugins/toolsets/azure_sql/tools/analyze_database_connections.py +7 -7
  60. holmes/plugins/toolsets/azure_sql/tools/analyze_database_health_status.py +3 -5
  61. holmes/plugins/toolsets/azure_sql/tools/analyze_database_performance.py +3 -3
  62. holmes/plugins/toolsets/azure_sql/tools/analyze_database_storage.py +7 -7
  63. holmes/plugins/toolsets/azure_sql/tools/get_active_alerts.py +6 -8
  64. holmes/plugins/toolsets/azure_sql/tools/get_slow_queries.py +3 -3
  65. holmes/plugins/toolsets/azure_sql/tools/get_top_cpu_queries.py +3 -3
  66. holmes/plugins/toolsets/azure_sql/tools/get_top_data_io_queries.py +3 -3
  67. holmes/plugins/toolsets/azure_sql/tools/get_top_log_io_queries.py +3 -3
  68. holmes/plugins/toolsets/azure_sql/utils.py +0 -32
  69. holmes/plugins/toolsets/bash/argocd/__init__.py +3 -3
  70. holmes/plugins/toolsets/bash/aws/__init__.py +4 -4
  71. holmes/plugins/toolsets/bash/azure/__init__.py +4 -4
  72. holmes/plugins/toolsets/bash/bash_toolset.py +2 -3
  73. holmes/plugins/toolsets/bash/common/bash.py +19 -9
  74. holmes/plugins/toolsets/bash/common/bash_command.py +1 -1
  75. holmes/plugins/toolsets/bash/common/stringify.py +1 -1
  76. holmes/plugins/toolsets/bash/kubectl/__init__.py +2 -1
  77. holmes/plugins/toolsets/bash/kubectl/constants.py +0 -1
  78. holmes/plugins/toolsets/bash/kubectl/kubectl_get.py +3 -4
  79. holmes/plugins/toolsets/bash/parse_command.py +12 -13
  80. holmes/plugins/toolsets/connectivity_check.py +124 -0
  81. holmes/plugins/toolsets/coralogix/api.py +132 -119
  82. holmes/plugins/toolsets/coralogix/coralogix.jinja2 +14 -0
  83. holmes/plugins/toolsets/coralogix/toolset_coralogix.py +219 -0
  84. holmes/plugins/toolsets/coralogix/utils.py +15 -79
  85. holmes/plugins/toolsets/datadog/datadog_api.py +36 -3
  86. holmes/plugins/toolsets/datadog/datadog_logs_instructions.jinja2 +34 -1
  87. holmes/plugins/toolsets/datadog/datadog_metrics_instructions.jinja2 +3 -3
  88. holmes/plugins/toolsets/datadog/datadog_models.py +59 -0
  89. holmes/plugins/toolsets/datadog/datadog_url_utils.py +213 -0
  90. holmes/plugins/toolsets/datadog/instructions_datadog_traces.jinja2 +165 -28
  91. holmes/plugins/toolsets/datadog/toolset_datadog_general.py +71 -28
  92. holmes/plugins/toolsets/datadog/toolset_datadog_logs.py +224 -375
  93. holmes/plugins/toolsets/datadog/toolset_datadog_metrics.py +67 -36
  94. holmes/plugins/toolsets/datadog/toolset_datadog_traces.py +360 -343
  95. holmes/plugins/toolsets/elasticsearch/__init__.py +6 -0
  96. holmes/plugins/toolsets/elasticsearch/elasticsearch.py +834 -0
  97. holmes/plugins/toolsets/git.py +7 -8
  98. holmes/plugins/toolsets/grafana/base_grafana_toolset.py +16 -4
  99. holmes/plugins/toolsets/grafana/common.py +2 -30
  100. holmes/plugins/toolsets/grafana/grafana_tempo_api.py +2 -1
  101. holmes/plugins/toolsets/grafana/loki/instructions.jinja2 +18 -2
  102. holmes/plugins/toolsets/grafana/loki/toolset_grafana_loki.py +92 -18
  103. holmes/plugins/toolsets/grafana/loki_api.py +4 -0
  104. holmes/plugins/toolsets/grafana/toolset_grafana.py +109 -25
  105. holmes/plugins/toolsets/grafana/toolset_grafana_dashboard.jinja2 +22 -0
  106. holmes/plugins/toolsets/grafana/toolset_grafana_tempo.py +201 -33
  107. holmes/plugins/toolsets/grafana/trace_parser.py +3 -2
  108. holmes/plugins/toolsets/internet/internet.py +10 -10
  109. holmes/plugins/toolsets/internet/notion.py +5 -6
  110. holmes/plugins/toolsets/investigator/core_investigation.py +3 -3
  111. holmes/plugins/toolsets/investigator/model.py +3 -1
  112. holmes/plugins/toolsets/json_filter_mixin.py +134 -0
  113. holmes/plugins/toolsets/kafka.py +12 -7
  114. holmes/plugins/toolsets/kubernetes.yaml +260 -30
  115. holmes/plugins/toolsets/kubernetes_logs.py +3 -3
  116. holmes/plugins/toolsets/logging_utils/logging_api.py +16 -6
  117. holmes/plugins/toolsets/mcp/toolset_mcp.py +88 -60
  118. holmes/plugins/toolsets/newrelic/new_relic_api.py +41 -1
  119. holmes/plugins/toolsets/newrelic/newrelic.jinja2 +24 -0
  120. holmes/plugins/toolsets/newrelic/newrelic.py +212 -55
  121. holmes/plugins/toolsets/prometheus/prometheus.py +358 -102
  122. holmes/plugins/toolsets/prometheus/prometheus_instructions.jinja2 +11 -3
  123. holmes/plugins/toolsets/rabbitmq/api.py +23 -4
  124. holmes/plugins/toolsets/rabbitmq/toolset_rabbitmq.py +5 -5
  125. holmes/plugins/toolsets/robusta/robusta.py +5 -5
  126. holmes/plugins/toolsets/runbook/runbook_fetcher.py +25 -6
  127. holmes/plugins/toolsets/servicenow_tables/servicenow_tables.py +1 -1
  128. holmes/plugins/toolsets/utils.py +1 -1
  129. holmes/utils/config_utils.py +1 -1
  130. holmes/utils/connection_utils.py +31 -0
  131. holmes/utils/console/result.py +10 -0
  132. holmes/utils/file_utils.py +2 -1
  133. holmes/utils/global_instructions.py +10 -26
  134. holmes/utils/holmes_status.py +4 -3
  135. holmes/utils/log.py +15 -0
  136. holmes/utils/markdown_utils.py +2 -3
  137. holmes/utils/memory_limit.py +58 -0
  138. holmes/utils/sentry_helper.py +23 -0
  139. holmes/utils/stream.py +12 -5
  140. holmes/utils/tags.py +4 -3
  141. holmes/version.py +3 -1
  142. {holmesgpt-0.16.2a0.dist-info → holmesgpt-0.18.4.dist-info}/METADATA +12 -10
  143. holmesgpt-0.18.4.dist-info/RECORD +258 -0
  144. holmes/plugins/toolsets/aws.yaml +0 -80
  145. holmes/plugins/toolsets/coralogix/toolset_coralogix_logs.py +0 -114
  146. holmes/plugins/toolsets/datadog/datadog_traces_formatter.py +0 -310
  147. holmes/plugins/toolsets/datadog/toolset_datadog_rds.py +0 -736
  148. holmes/plugins/toolsets/grafana/grafana_api.py +0 -64
  149. holmes/plugins/toolsets/opensearch/__init__.py +0 -0
  150. holmes/plugins/toolsets/opensearch/opensearch.py +0 -250
  151. holmes/plugins/toolsets/opensearch/opensearch_logs.py +0 -161
  152. holmes/plugins/toolsets/opensearch/opensearch_traces.py +0 -215
  153. holmes/plugins/toolsets/opensearch/opensearch_traces_instructions.jinja2 +0 -12
  154. holmes/plugins/toolsets/opensearch/opensearch_utils.py +0 -166
  155. holmes/utils/keygen_utils.py +0 -6
  156. holmesgpt-0.16.2a0.dist-info/RECORD +0 -258
  157. holmes/plugins/toolsets/{opensearch → elasticsearch}/opensearch_ppl_query_docs.jinja2 +0 -0
  158. holmes/plugins/toolsets/{opensearch → elasticsearch}/opensearch_query_assist.py +2 -2
  159. /holmes/plugins/toolsets/{opensearch → elasticsearch}/opensearch_query_assist_instructions.jinja2 +0 -0
  160. {holmesgpt-0.16.2a0.dist-info → holmesgpt-0.18.4.dist-info}/LICENSE +0 -0
  161. {holmesgpt-0.16.2a0.dist-info → holmesgpt-0.18.4.dist-info}/WHEEL +0 -0
  162. {holmesgpt-0.16.2a0.dist-info → holmesgpt-0.18.4.dist-info}/entry_points.txt +0 -0
@@ -1,44 +1,47 @@
1
1
  """Datadog Traces toolset for HolmesGPT."""
2
2
 
3
+ import copy
3
4
  import json
4
5
  import logging
5
6
  import os
6
- import time
7
+ import re
7
8
  from typing import Any, Dict, Optional, Tuple
8
9
 
10
+ from pydantic import AnyUrl
11
+
9
12
  from holmes.core.tools import (
10
13
  CallablePrerequisite,
14
+ StructuredToolResult,
15
+ StructuredToolResultStatus,
11
16
  Tool,
12
17
  ToolInvokeContext,
13
18
  ToolParameter,
14
19
  Toolset,
15
- StructuredToolResult,
16
- StructuredToolResultStatus,
17
20
  ToolsetTag,
18
21
  )
22
+ from holmes.plugins.toolsets.consts import STANDARD_END_DATETIME_TOOL_PARAM_DESCRIPTION
19
23
  from holmes.plugins.toolsets.datadog.datadog_api import (
24
+ MAX_RETRY_COUNT_ON_RATE_LIMIT,
20
25
  DataDogRequestError,
21
- DatadogBaseConfig,
22
26
  execute_datadog_http_request,
23
27
  get_headers,
24
- MAX_RETRY_COUNT_ON_RATE_LIMIT,
25
28
  )
26
- from holmes.plugins.toolsets.utils import (
27
- process_timestamps_to_int,
28
- toolset_name_for_one_liner,
29
- )
30
- from holmes.plugins.toolsets.datadog.datadog_traces_formatter import (
31
- format_traces_list,
32
- format_trace_hierarchy,
33
- format_spans_search,
29
+ from holmes.plugins.toolsets.datadog.datadog_models import DatadogTracesConfig
30
+ from holmes.plugins.toolsets.datadog.datadog_url_utils import (
31
+ generate_datadog_spans_analytics_url,
32
+ generate_datadog_spans_url,
34
33
  )
35
34
  from holmes.plugins.toolsets.logging_utils.logging_api import (
36
35
  DEFAULT_TIME_SPAN_SECONDS,
37
36
  )
37
+ from holmes.plugins.toolsets.utils import (
38
+ process_timestamps_to_int,
39
+ standard_start_datetime_tool_param_description,
40
+ toolset_name_for_one_liner,
41
+ )
38
42
 
39
-
40
- class DatadogTracesConfig(DatadogBaseConfig):
41
- indexes: list[str] = ["*"]
43
+ # Valid percentile aggregations supported by Datadog
44
+ PERCENTILE_AGGREGATIONS = ["pc75", "pc90", "pc95", "pc98", "pc99"]
42
45
 
43
46
 
44
47
  class DatadogTracesToolset(Toolset):
@@ -54,22 +57,14 @@ class DatadogTracesToolset(Toolset):
54
57
  icon_url="https://imgix.datadoghq.com//img/about/presskit/DDlogo.jpg",
55
58
  prerequisites=[CallablePrerequisite(callable=self.prerequisites_callable)],
56
59
  tools=[
57
- FetchDatadogTracesList(toolset=self),
58
- FetchDatadogTraceById(toolset=self),
59
- FetchDatadogSpansByFilter(toolset=self),
60
+ GetSpans(toolset=self),
61
+ AggregateSpans(toolset=self),
60
62
  ],
61
63
  tags=[ToolsetTag.CORE],
62
64
  )
63
- self._reload_instructions()
64
-
65
- def _reload_instructions(self):
66
- """Load Datadog traces specific troubleshooting instructions."""
67
- template_file_path = os.path.abspath(
68
- os.path.join(
69
- os.path.dirname(__file__), "instructions_datadog_traces.jinja2"
70
- )
65
+ self._load_llm_instructions_from_file(
66
+ os.path.dirname(__file__), "instructions_datadog_traces.jinja2"
71
67
  )
72
- self._load_llm_instructions(jinja_template=f"file://{template_file_path}")
73
68
 
74
69
  def prerequisites_callable(self, config: dict[str, Any]) -> Tuple[bool, str]:
75
70
  """Check prerequisites with configuration."""
@@ -136,13 +131,12 @@ class DatadogTracesToolset(Toolset):
136
131
  return False, f"Healthcheck failed with exception: {str(e)}"
137
132
 
138
133
  def get_example_config(self) -> Dict[str, Any]:
139
- """Get example configuration for this toolset."""
140
- return {
141
- "dd_api_key": "<your_datadog_api_key>",
142
- "dd_app_key": "<your_datadog_app_key>",
143
- "site_api_url": "https://api.datadoghq.com", # or https://api.datadoghq.eu for EU
144
- "request_timeout": 60,
145
- }
134
+ example_config = DatadogTracesConfig(
135
+ dd_api_key="<your_datadog_api_key>",
136
+ dd_app_key="<your_datadog_app_key>",
137
+ site_api_url=AnyUrl("https://api.datadoghq.com"),
138
+ )
139
+ return example_config.model_dump(mode="json")
146
140
 
147
141
 
148
142
  class BaseDatadogTracesTool(Tool):
@@ -151,68 +145,88 @@ class BaseDatadogTracesTool(Tool):
151
145
  toolset: "DatadogTracesToolset"
152
146
 
153
147
 
154
- class FetchDatadogTracesList(BaseDatadogTracesTool):
155
- """Tool to fetch a list of traces from Datadog."""
148
+ # Schema defines what fields to keep in compact mode
149
+ COMPACT_SCHEMA = {
150
+ "custom": {
151
+ "duration": True,
152
+ "http": {"status_code": True, "host": True, "method": True, "url": True},
153
+ },
154
+ "status": True,
155
+ "start_timestamp": True,
156
+ "end_timestamp": True,
157
+ "error": True,
158
+ "single_span": True,
159
+ "span_id": True,
160
+ "trace_id": True,
161
+ "parent_id": True,
162
+ "service": True,
163
+ "resource_name": True,
164
+ "tags": {"_filter": "startswith", "_values": ["pod_name:"]}, # Generic array filter
165
+ }
166
+
167
+
168
+ class GetSpans(BaseDatadogTracesTool):
169
+ """Tool to search for spans with specific filters."""
156
170
 
157
171
  def __init__(self, toolset: "DatadogTracesToolset"):
158
172
  super().__init__(
159
- name="fetch_datadog_traces",
160
- description="[datadog/traces toolset] Fetch a list of traces from Datadog with optional filters",
173
+ name="fetch_datadog_spans",
174
+ description="Search for spans in Datadog using span syntax. "
175
+ "Supports wildcards (*) for pattern matching: @http.route:*payment*, resource_name:*user*, service:*api*. "
176
+ "Uses the DataDog api endpoint: POST /api/v2/spans/events/search with 'query' parameter.",
161
177
  parameters={
162
- "service": ToolParameter(
163
- description="Filter by service name",
164
- type="string",
165
- required=False,
166
- ),
167
- "operation": ToolParameter(
168
- description="Filter by operation name",
178
+ "query": ToolParameter(
179
+ description="The search query following span syntax. Supports wildcards (*) for pattern matching. Examples: @http.route:*payment*, resource_name:*user*, service:*api*. Default: *",
169
180
  type="string",
170
181
  required=False,
171
182
  ),
172
- "resource": ToolParameter(
173
- description="Filter by resource name",
183
+ "start_datetime": ToolParameter(
184
+ description=standard_start_datetime_tool_param_description(
185
+ DEFAULT_TIME_SPAN_SECONDS
186
+ ),
174
187
  type="string",
175
188
  required=False,
176
189
  ),
177
- "min_duration": ToolParameter(
178
- description="Minimum duration (e.g., '5s', '500ms', '1m')",
190
+ "end_datetime": ToolParameter(
191
+ description=STANDARD_END_DATETIME_TOOL_PARAM_DESCRIPTION,
179
192
  type="string",
180
193
  required=False,
181
194
  ),
182
- "start_datetime": ToolParameter(
183
- description="Start time in RFC3339 format or relative time in seconds (negative for past)",
195
+ "timezone": ToolParameter(
196
+ description="The timezone can be specified as GMT, UTC, an offset from UTC (like UTC+1), or as a Timezone Database identifier (like America/New_York). default: UTC",
184
197
  type="string",
185
198
  required=False,
186
199
  ),
187
- "end_datetime": ToolParameter(
188
- description="End time in RFC3339 format or relative time in seconds (negative for past)",
200
+ "cursor": ToolParameter(
201
+ description="The returned paging point to use to get the next results. IMPORTANT: Cursors are single-use and stateful - never reuse the same cursor value multiple times or parallelize cursor-based calls. Each response provides a new cursor for the subsequent request.",
189
202
  type="string",
190
203
  required=False,
191
204
  ),
192
205
  "limit": ToolParameter(
193
- description="Maximum number of traces to return",
206
+ description="Maximum number of spans to return. Default: 10. Warning: Using values higher than 10 may result in too much data and cause the tool call to fail.",
194
207
  type="integer",
195
208
  required=False,
196
209
  ),
210
+ "sort_desc": ToolParameter(
211
+ description="Get the results in descending order. default: true",
212
+ type="boolean",
213
+ required=False,
214
+ ),
215
+ "compact": ToolParameter(
216
+ description="Return only essential fields to reduce output size. Use with higher limits (50-100) for initial exploration, then use compact=false with lower limits (5-10) for detailed investigation. Default: True",
217
+ type="boolean",
218
+ required=True,
219
+ ),
197
220
  },
198
221
  toolset=toolset,
199
222
  )
200
223
 
201
224
  def get_parameterized_one_liner(self, params: dict) -> str:
202
225
  """Get a one-liner description of the tool invocation."""
203
- filters = []
204
- if "service" in params:
205
- filters.append(f"service={params['service']}")
206
- if "operation" in params:
207
- filters.append(f"operation={params['operation']}")
208
- if "min_duration" in params:
209
- filters.append(f"duration>{params['min_duration']}")
210
-
211
- filter_str = ", ".join(filters) if filters else "all"
212
- return f"{toolset_name_for_one_liner(self.toolset.name)}: Fetch Traces ({filter_str})"
226
+ return f"{toolset_name_for_one_liner(self.toolset.name)}: Search Spans ({params['query'] if 'query' in params else ''})"
213
227
 
214
228
  def _invoke(self, params: dict, context: ToolInvokeContext) -> StructuredToolResult:
215
- """Execute the tool to fetch traces."""
229
+ """Execute the tool to search spans."""
216
230
  if not self.toolset.dd_config:
217
231
  return StructuredToolResult(
218
232
  status=StructuredToolResultStatus.ERROR,
@@ -221,7 +235,7 @@ class FetchDatadogTracesList(BaseDatadogTracesTool):
221
235
  )
222
236
 
223
237
  url = None
224
- payload = None
238
+ payload: Optional[Dict[str, Any]] = None
225
239
 
226
240
  try:
227
241
  # Process timestamps
@@ -235,36 +249,14 @@ class FetchDatadogTracesList(BaseDatadogTracesTool):
235
249
  from_time_ms = from_time_int * 1000
236
250
  to_time_ms = to_time_int * 1000
237
251
 
238
- # Build search query
239
- query_parts = []
240
-
241
- if params.get("service"):
242
- query_parts.append(f"service:{params['service']}")
243
-
244
- if params.get("operation"):
245
- query_parts.append(f"operation_name:{params['operation']}")
246
-
247
- if params.get("resource"):
248
- query_parts.append(f"resource_name:{params['resource']}")
249
-
250
- if params.get("min_duration"):
251
- # Parse duration string (e.g., "5s", "500ms", "1m")
252
- duration_str = params["min_duration"].lower()
253
- if duration_str.endswith("ms"):
254
- duration_ns = int(float(duration_str[:-2]) * 1_000_000)
255
- elif duration_str.endswith("s"):
256
- duration_ns = int(float(duration_str[:-1]) * 1_000_000_000)
257
- elif duration_str.endswith("m"):
258
- duration_ns = int(float(duration_str[:-1]) * 60 * 1_000_000_000)
259
- else:
260
- # Assume milliseconds if no unit
261
- duration_ns = int(float(duration_str) * 1_000_000)
262
-
263
- query_parts.append(f"@duration:>{duration_ns}")
264
-
265
- query = " ".join(query_parts) if query_parts else "*"
252
+ query: str = params.get("query") if params.get("query") else "*" # type: ignore
253
+ limit = params.get("limit") if params.get("limit") else 10
254
+ if params.get("sort") is not None:
255
+ sort = "-timestamp" if params.get("sort") else True
256
+ else:
257
+ sort = "-timestamp"
266
258
 
267
- # Prepare API request - use POST search endpoint
259
+ # Use POST endpoint for more complex searches
268
260
  url = f"{self.toolset.dd_config.site_api_url}/api/v2/spans/events/search"
269
261
  headers = get_headers(self.toolset.dd_config)
270
262
 
@@ -278,12 +270,17 @@ class FetchDatadogTracesList(BaseDatadogTracesTool):
278
270
  "to": str(to_time_ms),
279
271
  "indexes": self.toolset.dd_config.indexes,
280
272
  },
281
- "page": {"limit": params.get("limit", 50)},
282
- "sort": "-timestamp",
273
+ "page": {
274
+ "limit": limit,
275
+ },
276
+ "sort": sort,
283
277
  },
284
278
  }
285
279
  }
286
280
 
281
+ if params.get("cursor"):
282
+ payload["data"]["attributes"]["page"]["cursor"] = params["cursor"]
283
+
287
284
  response = execute_datadog_http_request(
288
285
  url=url,
289
286
  headers=headers,
@@ -292,32 +289,28 @@ class FetchDatadogTracesList(BaseDatadogTracesTool):
292
289
  method="POST",
293
290
  )
294
291
 
295
- # Handle tuple response from POST requests
296
- if isinstance(response, tuple):
297
- spans, _ = response
298
- elif response:
299
- spans = response.get("data", [])
300
- else:
301
- spans = []
302
-
303
- # Format the traces using the formatter
304
- formatted_output = format_traces_list(spans, limit=params.get("limit", 50))
305
- if not formatted_output:
306
- return StructuredToolResult(
307
- status=StructuredToolResultStatus.NO_DATA,
308
- params=params,
309
- data="No matching traces found.",
310
- )
292
+ # Apply compact filtering if requested
293
+ if params.get("compact", False) and "data" in response:
294
+ response["data"] = [
295
+ self._filter_span_attributes(span) for span in response["data"]
296
+ ]
297
+
298
+ web_url = generate_datadog_spans_url(
299
+ self.toolset.dd_config,
300
+ query,
301
+ from_time_ms,
302
+ to_time_ms,
303
+ )
311
304
 
312
305
  return StructuredToolResult(
313
306
  status=StructuredToolResultStatus.SUCCESS,
314
- data=formatted_output,
307
+ data=response,
315
308
  params=params,
309
+ url=web_url,
316
310
  )
317
311
 
318
312
  except DataDogRequestError as e:
319
313
  logging.exception(e, exc_info=True)
320
-
321
314
  if e.status_code == 429:
322
315
  error_msg = f"Datadog API rate limit exceeded. Failed after {MAX_RETRY_COUNT_ON_RATE_LIMIT} retry attempts."
323
316
  elif e.status_code == 403:
@@ -352,212 +345,254 @@ class FetchDatadogTracesList(BaseDatadogTracesTool):
352
345
  ),
353
346
  )
354
347
 
348
+ def _apply_compact_schema(self, source: dict, schema: dict) -> dict:
349
+ """Apply schema to filter fields from source dict."""
350
+ result: Dict[str, Any] = {}
351
+
352
+ for key, value in schema.items():
353
+ if key not in source:
354
+ continue
355
+
356
+ source_value = source[key]
357
+
358
+ if isinstance(value, dict):
359
+ # Check if it's a filter directive for arrays
360
+ if "_filter" in value and isinstance(source_value, list):
361
+ filter_type = value["_filter"]
362
+ filter_values = value.get("_values", [])
363
+
364
+ if filter_type == "startswith":
365
+ # Filter array items that start with any of the specified values
366
+ filtered = [
367
+ item
368
+ for item in source_value
369
+ if isinstance(item, str)
370
+ and any(item.startswith(prefix) for prefix in filter_values)
371
+ ]
372
+ if filtered:
373
+ result[key] = filtered
374
+
375
+ elif isinstance(source_value, dict):
376
+ # Regular nested object - recurse
377
+ nested_result = self._apply_compact_schema(source_value, value)
378
+ if nested_result:
379
+ result[key] = nested_result
380
+
381
+ elif value is True:
382
+ # Copy the field as-is
383
+ result[key] = source_value
384
+
385
+ return result
386
+
387
+ def _filter_span_attributes(self, span: dict) -> dict:
388
+ """Filter span to include only essential fields."""
389
+ filtered_span = {
390
+ "id": span.get("id"),
391
+ "type": span.get("type"),
392
+ }
355
393
 
356
- class FetchDatadogTraceById(BaseDatadogTracesTool):
357
- """Tool to fetch detailed information about a specific trace."""
358
-
359
- def __init__(self, toolset: "DatadogTracesToolset"):
360
- super().__init__(
361
- name="fetch_datadog_trace_by_id",
362
- description="[datadog/traces toolset] Fetch detailed information about a specific trace by its ID",
363
- parameters={
364
- "trace_id": ToolParameter(
365
- description="The trace ID to fetch details for",
366
- type="string",
367
- required=True,
368
- ),
369
- },
370
- toolset=toolset,
371
- )
372
-
373
- def get_parameterized_one_liner(self, params: dict) -> str:
374
- """Get a one-liner description of the tool invocation."""
375
- trace_id = params.get("trace_id", "unknown")
376
- return f"{toolset_name_for_one_liner(self.toolset.name)}: Fetch Trace Details ({trace_id})"
377
-
378
- def _invoke(self, params: dict, context: ToolInvokeContext) -> StructuredToolResult:
379
- """Execute the tool to fetch trace details."""
380
- if not self.toolset.dd_config:
381
- return StructuredToolResult(
382
- status=StructuredToolResultStatus.ERROR,
383
- error="Datadog configuration not initialized",
384
- params=params,
385
- )
386
-
387
- trace_id = params.get("trace_id")
388
- if not trace_id:
389
- return StructuredToolResult(
390
- status=StructuredToolResultStatus.ERROR,
391
- error="trace_id parameter is required",
392
- params=params,
393
- )
394
-
395
- url = None
396
- payload = None
397
-
398
- try:
399
- # For Datadog, we need to search for all spans with the given trace_id
400
- # Using a reasonable time window (last 7 days by default)
401
- current_time = int(time.time())
402
- from_time_ms = (current_time - 604800) * 1000 # 7 days ago
403
- to_time_ms = current_time * 1000
404
-
405
- url = f"{self.toolset.dd_config.site_api_url}/api/v2/spans/events/search"
406
- headers = get_headers(self.toolset.dd_config)
407
-
408
- payload = {
409
- "data": {
410
- "type": "search_request",
411
- "attributes": {
412
- "filter": {
413
- "query": f"trace_id:{trace_id}",
414
- "from": str(from_time_ms),
415
- "to": str(to_time_ms),
416
- "indexes": self.toolset.dd_config.indexes,
417
- },
418
- "page": {"limit": 1000}, # Get all spans for the trace
419
- "sort": "timestamp",
420
- },
421
- }
422
- }
423
-
424
- response = execute_datadog_http_request(
425
- url=url,
426
- headers=headers,
427
- payload_or_params=payload,
428
- timeout=self.toolset.dd_config.request_timeout,
429
- method="POST",
430
- )
431
-
432
- # Handle tuple response from POST requests
433
- if isinstance(response, tuple):
434
- spans, _ = response
435
- elif response:
436
- spans = response.get("data", [])
437
- else:
438
- spans = []
439
-
440
- # Format the trace hierarchy using the formatter
441
- formatted_output = format_trace_hierarchy(trace_id, spans)
442
- if not formatted_output:
443
- return StructuredToolResult(
444
- status=StructuredToolResultStatus.NO_DATA,
445
- params=params,
446
- data=f"No trace found for trace_id: {trace_id}",
447
- )
448
-
449
- return StructuredToolResult(
450
- status=StructuredToolResultStatus.SUCCESS,
451
- data=formatted_output,
452
- params=params,
453
- )
454
-
455
- except DataDogRequestError as e:
456
- logging.exception(e, exc_info=True)
457
-
458
- if e.status_code == 429:
459
- error_msg = f"Datadog API rate limit exceeded. Failed after {MAX_RETRY_COUNT_ON_RATE_LIMIT} retry attempts."
460
- elif e.status_code == 403:
461
- error_msg = (
462
- f"Permission denied. Ensure your Datadog Application Key has the 'apm_read' "
463
- f"permission. Error: {str(e)}"
464
- )
465
- else:
466
- error_msg = f"Exception while querying Datadog: {str(e)}"
467
-
468
- return StructuredToolResult(
469
- status=StructuredToolResultStatus.ERROR,
470
- error=error_msg,
471
- params=params,
472
- invocation=(
473
- json.dumps({"url": url, "payload": payload})
474
- if url and payload
475
- else None
476
- ),
394
+ if "attributes" in span:
395
+ filtered_span["attributes"] = self._apply_compact_schema(
396
+ span["attributes"], COMPACT_SCHEMA
477
397
  )
478
398
 
479
- except Exception as e:
480
- logging.exception(e, exc_info=True)
481
- return StructuredToolResult(
482
- status=StructuredToolResultStatus.ERROR,
483
- error=f"Unexpected error: {str(e)}",
484
- params=params,
485
- invocation=(
486
- json.dumps({"url": url, "payload": payload})
487
- if url and payload
488
- else None
489
- ),
490
- )
399
+ return filtered_span
491
400
 
492
401
 
493
- class FetchDatadogSpansByFilter(BaseDatadogTracesTool):
494
- """Tool to search for spans with specific filters."""
402
+ class AggregateSpans(BaseDatadogTracesTool):
403
+ """Tool to aggregate span data into buckets and compute metrics and timeseries."""
495
404
 
496
405
  def __init__(self, toolset: "DatadogTracesToolset"):
497
406
  super().__init__(
498
- name="fetch_datadog_spans",
499
- description="[datadog/traces toolset] Search for spans in Datadog with detailed filters",
407
+ name="aggregate_datadog_spans",
408
+ description="Aggregate spans into buckets and compute metrics and timeseries. "
409
+ "Uses the DataDog api endpoint: POST /api/v2/spans/analytics/aggregate",
500
410
  parameters={
501
411
  "query": ToolParameter(
502
- description="Datadog search query (e.g., 'service:web-app @http.status_code:500')",
503
- type="string",
504
- required=False,
505
- ),
506
- "service": ToolParameter(
507
- description="Filter by service name",
412
+ description="Search query following span syntax. Default: '*'",
508
413
  type="string",
509
414
  required=False,
510
415
  ),
511
- "operation": ToolParameter(
512
- description="Filter by operation name",
416
+ "start_datetime": ToolParameter(
417
+ description=standard_start_datetime_tool_param_description(
418
+ DEFAULT_TIME_SPAN_SECONDS
419
+ ),
513
420
  type="string",
514
421
  required=False,
515
422
  ),
516
- "resource": ToolParameter(
517
- description="Filter by resource name",
423
+ "end_datetime": ToolParameter(
424
+ description=STANDARD_END_DATETIME_TOOL_PARAM_DESCRIPTION,
518
425
  type="string",
519
426
  required=False,
520
427
  ),
521
- "tags": ToolParameter(
522
- description="Filter by tags (e.g., {'env': 'production', 'version': '1.2.3'})",
523
- type="object",
524
- required=False,
428
+ "compute": ToolParameter(
429
+ description="List of metrics to compute from the matching spans. Supports up to 10 computes at the same time.",
430
+ type="array",
431
+ required=True,
432
+ items=ToolParameter(
433
+ type="object",
434
+ properties={
435
+ "aggregation": ToolParameter(
436
+ type="string",
437
+ required=True,
438
+ enum=[
439
+ "count",
440
+ "cardinality",
441
+ "sum",
442
+ "min",
443
+ "max",
444
+ "avg",
445
+ "median",
446
+ ]
447
+ + PERCENTILE_AGGREGATIONS,
448
+ description="The aggregation method.",
449
+ ),
450
+ "metric": ToolParameter(
451
+ type="string",
452
+ required=False,
453
+ description="The span attribute to aggregate. Required for all non-count aggregations",
454
+ ),
455
+ "type": ToolParameter(
456
+ type="string",
457
+ required=False,
458
+ enum=["total", "timeseries"],
459
+ description="Compute type for the aggregation. Default: 'total'",
460
+ ),
461
+ "interval": ToolParameter(
462
+ type="string",
463
+ required=False,
464
+ description="The time buckets for timeseries results (e.g., '5m', '1h'). The time buckets' size (only used for type=timeseries) Defaults to a resolution of 150 points.",
465
+ ),
466
+ },
467
+ ),
525
468
  ),
526
- "start_datetime": ToolParameter(
527
- description="Start time in RFC3339 format or relative time in seconds (negative for past)",
528
- type="string",
469
+ "group_by": ToolParameter(
470
+ description="List of facets to split the aggregate data by",
471
+ type="array",
529
472
  required=False,
473
+ items=ToolParameter(
474
+ type="object",
475
+ properties={
476
+ "facet": ToolParameter(
477
+ type="string",
478
+ required=True,
479
+ description="The span attribute to split by",
480
+ ),
481
+ "limit": ToolParameter(
482
+ type="integer",
483
+ required=False,
484
+ description="Maximum number of facet groups to return. Default: 10",
485
+ ),
486
+ "missing": ToolParameter(
487
+ type="string",
488
+ required=False,
489
+ description="The value to use for spans that don't have the facet",
490
+ ),
491
+ "sort": ToolParameter(
492
+ type="object",
493
+ required=False,
494
+ description="Sort configuration for the groups",
495
+ properties={
496
+ # Not working correctly
497
+ # "aggregation": ToolParameter(
498
+ # type="string",
499
+ # required=True,
500
+ # description="The aggregation method to sort by",
501
+ # ),
502
+ "metric": ToolParameter(
503
+ type="string",
504
+ required=False,
505
+ description="The metric to sort by when using a metric aggregation. (only used for type=measure).",
506
+ ),
507
+ "type": ToolParameter(
508
+ type="string",
509
+ required=False,
510
+ enum=["alphabetical", "measure"],
511
+ description="The type of sorting to use",
512
+ ),
513
+ "order": ToolParameter(
514
+ type="string",
515
+ required=False,
516
+ enum=["asc", "desc"],
517
+ description="The sort order. Default: 'desc'",
518
+ ),
519
+ },
520
+ ),
521
+ "total": ToolParameter(
522
+ type="boolean",
523
+ required=False,
524
+ description="Whether to include a 'total' group with all non-faceted results",
525
+ ),
526
+ "histogram": ToolParameter(
527
+ type="object",
528
+ required=False,
529
+ description="Histogram configuration for numeric facets",
530
+ properties={
531
+ "interval": ToolParameter(
532
+ type="number",
533
+ required=True,
534
+ description="The bin size for the histogram",
535
+ ),
536
+ "min": ToolParameter(
537
+ type="number",
538
+ required=False,
539
+ description="The minimum value for the histogram",
540
+ ),
541
+ "max": ToolParameter(
542
+ type="number",
543
+ required=False,
544
+ description="The maximum value for the histogram",
545
+ ),
546
+ },
547
+ ),
548
+ },
549
+ ),
530
550
  ),
531
- "end_datetime": ToolParameter(
532
- description="End time in RFC3339 format or relative time in seconds (negative for past)",
551
+ "timezone": ToolParameter(
552
+ description="The timezone for time-based results (e.g., 'GMT', 'UTC', 'America/New_York'). Default: 'UTC'",
533
553
  type="string",
534
554
  required=False,
535
555
  ),
536
- "limit": ToolParameter(
537
- description="Maximum number of spans to return",
538
- type="integer",
539
- required=False,
540
- ),
541
556
  },
542
557
  toolset=toolset,
543
558
  )
544
559
 
545
560
  def get_parameterized_one_liner(self, params: dict) -> str:
546
561
  """Get a one-liner description of the tool invocation."""
547
- if "query" in params:
548
- return f"{toolset_name_for_one_liner(self.toolset.name)}: Search Spans ({params['query']})"
549
-
550
- filters = []
551
- if "service" in params:
552
- filters.append(f"service={params['service']}")
553
- if "operation" in params:
554
- filters.append(f"operation={params['operation']}")
555
-
556
- filter_str = ", ".join(filters) if filters else "all"
557
- return f"{toolset_name_for_one_liner(self.toolset.name)}: Search Spans ({filter_str})"
562
+ query = params.get("query", "*")
563
+ compute_info = ""
564
+ if params.get("compute"):
565
+ aggregations = [c.get("aggregation", "") for c in params["compute"]]
566
+ compute_info = f" (computing: {', '.join(aggregations)})"
567
+ return f"{toolset_name_for_one_liner(self.toolset.name)}: Aggregate Spans ({query}){compute_info}"
568
+
569
+ def _fix_percentile_aggregations(self, compute_params: list) -> list:
570
+ """Fix common percentile format mistakes that the LLM makes when choosing from the enum (e.g., p95 -> pc95).
571
+
572
+ Args:
573
+ compute_params: List of compute parameter dictionaries
574
+
575
+ Returns:
576
+ List of compute parameters with corrected aggregation values
577
+ """
578
+ # Deep copy the entire compute params to avoid modifying the original
579
+ processed_compute = copy.deepcopy(compute_params)
580
+
581
+ # Simple replacement for each known percentile
582
+ for compute_item in processed_compute:
583
+ if isinstance(compute_item, dict) and "aggregation" in compute_item:
584
+ agg_value = compute_item["aggregation"]
585
+ # Check if it matches p\d\d pattern (e.g., p95)
586
+ if re.match(r"^p\d{2}$", agg_value):
587
+ # Convert to pc format and check if it's valid
588
+ pc_version = "pc" + agg_value[1:]
589
+ if pc_version in PERCENTILE_AGGREGATIONS:
590
+ compute_item["aggregation"] = pc_version
591
+
592
+ return processed_compute
558
593
 
559
594
  def _invoke(self, params: dict, context: ToolInvokeContext) -> StructuredToolResult:
560
- """Execute the tool to search spans."""
595
+ """Execute the tool to aggregate spans."""
561
596
  if not self.toolset.dd_config:
562
597
  return StructuredToolResult(
563
598
  status=StructuredToolResultStatus.ERROR,
@@ -580,51 +615,42 @@ class FetchDatadogSpansByFilter(BaseDatadogTracesTool):
580
615
  from_time_ms = from_time_int * 1000
581
616
  to_time_ms = to_time_int * 1000
582
617
 
583
- # Build search query
584
- query_parts = []
618
+ query = params.get("query", "*")
585
619
 
586
- # If a custom query is provided, use it as the base
587
- if params.get("query"):
588
- query_parts.append(params["query"])
589
-
590
- # Add additional filters
591
- if params.get("service"):
592
- query_parts.append(f"service:{params['service']}")
593
-
594
- if params.get("operation"):
595
- query_parts.append(f"operation_name:{params['operation']}")
620
+ # Build the request payload
621
+ url = f"{self.toolset.dd_config.site_api_url}/api/v2/spans/analytics/aggregate"
622
+ headers = get_headers(self.toolset.dd_config)
596
623
 
597
- if params.get("resource"):
598
- query_parts.append(f"resource_name:{params['resource']}")
624
+ # Build payload attributes first
625
+ # Process compute parameter to fix common p95->pc95 style mistakes
626
+ compute_params = params.get("compute", [])
627
+ processed_compute = self._fix_percentile_aggregations(compute_params)
628
+
629
+ attributes: Dict[str, Any] = {
630
+ "filter": {
631
+ "query": query,
632
+ "from": str(from_time_ms),
633
+ "to": str(to_time_ms),
634
+ },
635
+ "compute": processed_compute,
636
+ }
599
637
 
600
- # Add tag filters
601
- if params.get("tags"):
602
- tags = params["tags"]
603
- if isinstance(tags, dict):
604
- for key, value in tags.items():
605
- query_parts.append(f"@{key}:{value}")
638
+ # Add optional fields
639
+ if params.get("group_by"):
640
+ attributes["group_by"] = params["group_by"]
606
641
 
607
- query = " ".join(query_parts) if query_parts else "*"
642
+ # Add options if timezone is specified
643
+ options: Dict[str, Any] = {}
644
+ if params.get("timezone"):
645
+ options["timezone"] = params["timezone"]
608
646
 
609
- # Use POST endpoint for more complex searches
610
- url = f"{self.toolset.dd_config.site_api_url}/api/v2/spans/events/search"
611
- headers = get_headers(self.toolset.dd_config)
647
+ if options:
648
+ attributes["options"] = options
612
649
 
613
650
  payload = {
614
651
  "data": {
615
- "type": "search_request",
616
- "attributes": {
617
- "filter": {
618
- "query": query,
619
- "from": str(from_time_ms),
620
- "to": str(to_time_ms),
621
- "indexes": self.toolset.dd_config.indexes,
622
- },
623
- "page": {
624
- "limit": params.get("limit", 100),
625
- },
626
- "sort": "-timestamp",
627
- },
652
+ "type": "aggregate_request",
653
+ "attributes": attributes,
628
654
  }
629
655
  }
630
656
 
@@ -636,27 +662,18 @@ class FetchDatadogSpansByFilter(BaseDatadogTracesTool):
636
662
  method="POST",
637
663
  )
638
664
 
639
- # Handle tuple response from POST requests
640
- if isinstance(response, tuple):
641
- spans, _ = response
642
- elif response:
643
- spans = response.get("data", [])
644
- else:
645
- spans = []
646
-
647
- # Format the spans search results using the formatter
648
- formatted_output = format_spans_search(spans)
649
- if not formatted_output:
650
- return StructuredToolResult(
651
- status=StructuredToolResultStatus.NO_DATA,
652
- params=params,
653
- data="No matching spans found.",
654
- )
665
+ web_url = generate_datadog_spans_analytics_url(
666
+ self.toolset.dd_config,
667
+ query,
668
+ from_time_ms,
669
+ to_time_ms,
670
+ )
655
671
 
656
672
  return StructuredToolResult(
657
673
  status=StructuredToolResultStatus.SUCCESS,
658
- data=formatted_output,
674
+ data=response,
659
675
  params=params,
676
+ url=web_url,
660
677
  )
661
678
 
662
679
  except DataDogRequestError as e: