holmesgpt 0.16.2a0__py3-none-any.whl → 0.18.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (162) hide show
  1. holmes/__init__.py +3 -5
  2. holmes/clients/robusta_client.py +4 -3
  3. holmes/common/env_vars.py +18 -2
  4. holmes/common/openshift.py +1 -1
  5. holmes/config.py +11 -6
  6. holmes/core/conversations.py +30 -13
  7. holmes/core/investigation.py +21 -25
  8. holmes/core/investigation_structured_output.py +3 -3
  9. holmes/core/issue.py +1 -1
  10. holmes/core/llm.py +50 -31
  11. holmes/core/models.py +19 -17
  12. holmes/core/openai_formatting.py +1 -1
  13. holmes/core/prompt.py +47 -2
  14. holmes/core/runbooks.py +1 -0
  15. holmes/core/safeguards.py +4 -2
  16. holmes/core/supabase_dal.py +4 -2
  17. holmes/core/tool_calling_llm.py +102 -141
  18. holmes/core/tools.py +19 -28
  19. holmes/core/tools_utils/token_counting.py +9 -2
  20. holmes/core/tools_utils/tool_context_window_limiter.py +13 -30
  21. holmes/core/tools_utils/tool_executor.py +0 -18
  22. holmes/core/tools_utils/toolset_utils.py +1 -0
  23. holmes/core/toolset_manager.py +37 -2
  24. holmes/core/tracing.py +13 -2
  25. holmes/core/transformers/__init__.py +1 -1
  26. holmes/core/transformers/base.py +1 -0
  27. holmes/core/transformers/llm_summarize.py +3 -2
  28. holmes/core/transformers/registry.py +2 -1
  29. holmes/core/transformers/transformer.py +1 -0
  30. holmes/core/truncation/compaction.py +37 -2
  31. holmes/core/truncation/input_context_window_limiter.py +3 -2
  32. holmes/interactive.py +52 -8
  33. holmes/main.py +17 -37
  34. holmes/plugins/interfaces.py +2 -1
  35. holmes/plugins/prompts/__init__.py +2 -1
  36. holmes/plugins/prompts/_fetch_logs.jinja2 +5 -5
  37. holmes/plugins/prompts/_runbook_instructions.jinja2 +2 -1
  38. holmes/plugins/prompts/base_user_prompt.jinja2 +7 -0
  39. holmes/plugins/prompts/conversation_history_compaction.jinja2 +2 -1
  40. holmes/plugins/prompts/generic_ask.jinja2 +0 -2
  41. holmes/plugins/prompts/generic_ask_conversation.jinja2 +0 -2
  42. holmes/plugins/prompts/generic_ask_for_issue_conversation.jinja2 +0 -2
  43. holmes/plugins/prompts/generic_investigation.jinja2 +0 -2
  44. holmes/plugins/prompts/investigation_procedure.jinja2 +2 -1
  45. holmes/plugins/prompts/kubernetes_workload_ask.jinja2 +0 -2
  46. holmes/plugins/prompts/kubernetes_workload_chat.jinja2 +0 -2
  47. holmes/plugins/runbooks/__init__.py +32 -3
  48. holmes/plugins/sources/github/__init__.py +4 -2
  49. holmes/plugins/sources/prometheus/models.py +1 -0
  50. holmes/plugins/toolsets/__init__.py +30 -26
  51. holmes/plugins/toolsets/atlas_mongodb/mongodb_atlas.py +13 -12
  52. holmes/plugins/toolsets/azure_sql/apis/alert_monitoring_api.py +3 -2
  53. holmes/plugins/toolsets/azure_sql/apis/azure_sql_api.py +2 -1
  54. holmes/plugins/toolsets/azure_sql/apis/connection_failure_api.py +3 -2
  55. holmes/plugins/toolsets/azure_sql/apis/connection_monitoring_api.py +3 -1
  56. holmes/plugins/toolsets/azure_sql/apis/storage_analysis_api.py +3 -1
  57. holmes/plugins/toolsets/azure_sql/azure_sql_toolset.py +12 -12
  58. holmes/plugins/toolsets/azure_sql/tools/analyze_connection_failures.py +7 -7
  59. holmes/plugins/toolsets/azure_sql/tools/analyze_database_connections.py +7 -7
  60. holmes/plugins/toolsets/azure_sql/tools/analyze_database_health_status.py +3 -5
  61. holmes/plugins/toolsets/azure_sql/tools/analyze_database_performance.py +3 -3
  62. holmes/plugins/toolsets/azure_sql/tools/analyze_database_storage.py +7 -7
  63. holmes/plugins/toolsets/azure_sql/tools/get_active_alerts.py +6 -8
  64. holmes/plugins/toolsets/azure_sql/tools/get_slow_queries.py +3 -3
  65. holmes/plugins/toolsets/azure_sql/tools/get_top_cpu_queries.py +3 -3
  66. holmes/plugins/toolsets/azure_sql/tools/get_top_data_io_queries.py +3 -3
  67. holmes/plugins/toolsets/azure_sql/tools/get_top_log_io_queries.py +3 -3
  68. holmes/plugins/toolsets/azure_sql/utils.py +0 -32
  69. holmes/plugins/toolsets/bash/argocd/__init__.py +3 -3
  70. holmes/plugins/toolsets/bash/aws/__init__.py +4 -4
  71. holmes/plugins/toolsets/bash/azure/__init__.py +4 -4
  72. holmes/plugins/toolsets/bash/bash_toolset.py +2 -3
  73. holmes/plugins/toolsets/bash/common/bash.py +19 -9
  74. holmes/plugins/toolsets/bash/common/bash_command.py +1 -1
  75. holmes/plugins/toolsets/bash/common/stringify.py +1 -1
  76. holmes/plugins/toolsets/bash/kubectl/__init__.py +2 -1
  77. holmes/plugins/toolsets/bash/kubectl/constants.py +0 -1
  78. holmes/plugins/toolsets/bash/kubectl/kubectl_get.py +3 -4
  79. holmes/plugins/toolsets/bash/parse_command.py +12 -13
  80. holmes/plugins/toolsets/connectivity_check.py +124 -0
  81. holmes/plugins/toolsets/coralogix/api.py +132 -119
  82. holmes/plugins/toolsets/coralogix/coralogix.jinja2 +14 -0
  83. holmes/plugins/toolsets/coralogix/toolset_coralogix.py +219 -0
  84. holmes/plugins/toolsets/coralogix/utils.py +15 -79
  85. holmes/plugins/toolsets/datadog/datadog_api.py +36 -3
  86. holmes/plugins/toolsets/datadog/datadog_logs_instructions.jinja2 +34 -1
  87. holmes/plugins/toolsets/datadog/datadog_metrics_instructions.jinja2 +3 -3
  88. holmes/plugins/toolsets/datadog/datadog_models.py +59 -0
  89. holmes/plugins/toolsets/datadog/datadog_url_utils.py +213 -0
  90. holmes/plugins/toolsets/datadog/instructions_datadog_traces.jinja2 +165 -28
  91. holmes/plugins/toolsets/datadog/toolset_datadog_general.py +71 -28
  92. holmes/plugins/toolsets/datadog/toolset_datadog_logs.py +224 -375
  93. holmes/plugins/toolsets/datadog/toolset_datadog_metrics.py +67 -36
  94. holmes/plugins/toolsets/datadog/toolset_datadog_traces.py +360 -343
  95. holmes/plugins/toolsets/elasticsearch/__init__.py +6 -0
  96. holmes/plugins/toolsets/elasticsearch/elasticsearch.py +834 -0
  97. holmes/plugins/toolsets/git.py +7 -8
  98. holmes/plugins/toolsets/grafana/base_grafana_toolset.py +16 -4
  99. holmes/plugins/toolsets/grafana/common.py +2 -30
  100. holmes/plugins/toolsets/grafana/grafana_tempo_api.py +2 -1
  101. holmes/plugins/toolsets/grafana/loki/instructions.jinja2 +18 -2
  102. holmes/plugins/toolsets/grafana/loki/toolset_grafana_loki.py +92 -18
  103. holmes/plugins/toolsets/grafana/loki_api.py +4 -0
  104. holmes/plugins/toolsets/grafana/toolset_grafana.py +109 -25
  105. holmes/plugins/toolsets/grafana/toolset_grafana_dashboard.jinja2 +22 -0
  106. holmes/plugins/toolsets/grafana/toolset_grafana_tempo.py +201 -33
  107. holmes/plugins/toolsets/grafana/trace_parser.py +3 -2
  108. holmes/plugins/toolsets/internet/internet.py +10 -10
  109. holmes/plugins/toolsets/internet/notion.py +5 -6
  110. holmes/plugins/toolsets/investigator/core_investigation.py +3 -3
  111. holmes/plugins/toolsets/investigator/model.py +3 -1
  112. holmes/plugins/toolsets/json_filter_mixin.py +134 -0
  113. holmes/plugins/toolsets/kafka.py +12 -7
  114. holmes/plugins/toolsets/kubernetes.yaml +260 -30
  115. holmes/plugins/toolsets/kubernetes_logs.py +3 -3
  116. holmes/plugins/toolsets/logging_utils/logging_api.py +16 -6
  117. holmes/plugins/toolsets/mcp/toolset_mcp.py +88 -60
  118. holmes/plugins/toolsets/newrelic/new_relic_api.py +41 -1
  119. holmes/plugins/toolsets/newrelic/newrelic.jinja2 +24 -0
  120. holmes/plugins/toolsets/newrelic/newrelic.py +212 -55
  121. holmes/plugins/toolsets/prometheus/prometheus.py +358 -102
  122. holmes/plugins/toolsets/prometheus/prometheus_instructions.jinja2 +11 -3
  123. holmes/plugins/toolsets/rabbitmq/api.py +23 -4
  124. holmes/plugins/toolsets/rabbitmq/toolset_rabbitmq.py +5 -5
  125. holmes/plugins/toolsets/robusta/robusta.py +5 -5
  126. holmes/plugins/toolsets/runbook/runbook_fetcher.py +25 -6
  127. holmes/plugins/toolsets/servicenow_tables/servicenow_tables.py +1 -1
  128. holmes/plugins/toolsets/utils.py +1 -1
  129. holmes/utils/config_utils.py +1 -1
  130. holmes/utils/connection_utils.py +31 -0
  131. holmes/utils/console/result.py +10 -0
  132. holmes/utils/file_utils.py +2 -1
  133. holmes/utils/global_instructions.py +10 -26
  134. holmes/utils/holmes_status.py +4 -3
  135. holmes/utils/log.py +15 -0
  136. holmes/utils/markdown_utils.py +2 -3
  137. holmes/utils/memory_limit.py +58 -0
  138. holmes/utils/sentry_helper.py +23 -0
  139. holmes/utils/stream.py +12 -5
  140. holmes/utils/tags.py +4 -3
  141. holmes/version.py +3 -1
  142. {holmesgpt-0.16.2a0.dist-info → holmesgpt-0.18.4.dist-info}/METADATA +12 -10
  143. holmesgpt-0.18.4.dist-info/RECORD +258 -0
  144. holmes/plugins/toolsets/aws.yaml +0 -80
  145. holmes/plugins/toolsets/coralogix/toolset_coralogix_logs.py +0 -114
  146. holmes/plugins/toolsets/datadog/datadog_traces_formatter.py +0 -310
  147. holmes/plugins/toolsets/datadog/toolset_datadog_rds.py +0 -736
  148. holmes/plugins/toolsets/grafana/grafana_api.py +0 -64
  149. holmes/plugins/toolsets/opensearch/__init__.py +0 -0
  150. holmes/plugins/toolsets/opensearch/opensearch.py +0 -250
  151. holmes/plugins/toolsets/opensearch/opensearch_logs.py +0 -161
  152. holmes/plugins/toolsets/opensearch/opensearch_traces.py +0 -215
  153. holmes/plugins/toolsets/opensearch/opensearch_traces_instructions.jinja2 +0 -12
  154. holmes/plugins/toolsets/opensearch/opensearch_utils.py +0 -166
  155. holmes/utils/keygen_utils.py +0 -6
  156. holmesgpt-0.16.2a0.dist-info/RECORD +0 -258
  157. holmes/plugins/toolsets/{opensearch → elasticsearch}/opensearch_ppl_query_docs.jinja2 +0 -0
  158. holmes/plugins/toolsets/{opensearch → elasticsearch}/opensearch_query_assist.py +2 -2
  159. /holmes/plugins/toolsets/{opensearch → elasticsearch}/opensearch_query_assist_instructions.jinja2 +0 -0
  160. {holmesgpt-0.16.2a0.dist-info → holmesgpt-0.18.4.dist-info}/LICENSE +0 -0
  161. {holmesgpt-0.16.2a0.dist-info → holmesgpt-0.18.4.dist-info}/WHEEL +0 -0
  162. {holmesgpt-0.16.2a0.dist-info → holmesgpt-0.18.4.dist-info}/entry_points.txt +0 -0
@@ -1,219 +1,69 @@
1
- import os
2
- from enum import Enum
3
1
  import json
4
2
  import logging
5
- from typing import Any, Optional, Dict, Tuple, Set
6
- from urllib.parse import urlencode
3
+ import os
4
+ from typing import Any, Dict, Optional, Tuple
5
+
6
+ from pydantic import AnyUrl
7
+
7
8
  from holmes.core.tools import (
8
9
  CallablePrerequisite,
10
+ StructuredToolResult,
11
+ StructuredToolResultStatus,
12
+ Tool,
13
+ ToolInvokeContext,
14
+ ToolParameter,
9
15
  ToolsetTag,
10
16
  )
11
- from pydantic import BaseModel, Field
12
- from holmes.core.tools import StructuredToolResult, StructuredToolResultStatus
13
- from holmes.plugins.toolsets.consts import TOOLSET_CONFIG_MISSING_ERROR
17
+ from holmes.plugins.toolsets.consts import STANDARD_END_DATETIME_TOOL_PARAM_DESCRIPTION
14
18
  from holmes.plugins.toolsets.datadog.datadog_api import (
15
- DatadogBaseConfig,
19
+ MAX_RETRY_COUNT_ON_RATE_LIMIT,
16
20
  DataDogRequestError,
17
- execute_paginated_datadog_http_request,
21
+ execute_datadog_http_request,
18
22
  get_headers,
19
- MAX_RETRY_COUNT_ON_RATE_LIMIT,
20
- enhance_error_message,
21
- preprocess_time_fields,
22
23
  )
24
+ from holmes.plugins.toolsets.datadog.datadog_models import (
25
+ DatadogLogsConfig,
26
+ )
27
+ from holmes.plugins.toolsets.datadog.datadog_url_utils import generate_datadog_logs_url
23
28
  from holmes.plugins.toolsets.logging_utils.logging_api import (
24
- DEFAULT_TIME_SPAN_SECONDS,
25
29
  DEFAULT_LOG_LIMIT,
26
- BasePodLoggingToolset,
27
- FetchPodLogsParams,
28
- LoggingCapability,
29
- PodLoggingTool,
30
+ DEFAULT_TIME_SPAN_SECONDS,
31
+ Toolset,
32
+ )
33
+ from holmes.plugins.toolsets.utils import (
34
+ process_timestamps_to_int,
35
+ standard_start_datetime_tool_param_description,
36
+ toolset_name_for_one_liner,
30
37
  )
31
- from holmes.plugins.toolsets.utils import process_timestamps_to_rfc3339
32
-
33
-
34
- class DataDogLabelsMapping(BaseModel):
35
- pod: str = "pod_name"
36
- namespace: str = "kube_namespace"
37
-
38
-
39
- class DataDogStorageTier(str, Enum):
40
- INDEXES = "indexes"
41
- ONLINE_ARCHIVES = "online-archives"
42
- FLEX = "flex"
43
-
44
-
45
- DEFAULT_STORAGE_TIERS = [DataDogStorageTier.INDEXES]
46
-
47
-
48
- class DatadogLogsConfig(DatadogBaseConfig):
49
- indexes: list[str] = ["*"]
50
- # Ordered list of storage tiers. Works as fallback. Subsequent tiers are queried only if the previous tier yielded no result
51
- storage_tiers: list[DataDogStorageTier] = Field(
52
- default=DEFAULT_STORAGE_TIERS, min_length=1
53
- )
54
- labels: DataDogLabelsMapping = DataDogLabelsMapping()
55
- page_size: int = 300
56
- default_limit: int = DEFAULT_LOG_LIMIT
57
-
58
-
59
- def calculate_page_size(
60
- params: FetchPodLogsParams, dd_config: DatadogLogsConfig, logs: list
61
- ) -> int:
62
- logs_count = len(logs)
63
-
64
- max_logs_count = dd_config.default_limit
65
- if params.limit:
66
- max_logs_count = params.limit
67
-
68
- return min(dd_config.page_size, max(0, max_logs_count - logs_count))
69
-
70
-
71
- def fetch_paginated_logs(
72
- params: FetchPodLogsParams,
73
- dd_config: DatadogLogsConfig,
74
- storage_tier: DataDogStorageTier,
75
- ) -> list[dict]:
76
- limit = params.limit or dd_config.default_limit
77
-
78
- (from_time, to_time) = process_timestamps_to_rfc3339(
79
- start_timestamp=params.start_time,
80
- end_timestamp=params.end_time,
81
- default_time_span_seconds=DEFAULT_TIME_SPAN_SECONDS,
82
- )
83
-
84
- url = f"{dd_config.site_api_url}/api/v2/logs/events/search"
85
- headers = get_headers(dd_config)
86
-
87
- query = f"{dd_config.labels.namespace}:{params.namespace}"
88
- query += f" {dd_config.labels.pod}:{params.pod_name}"
89
- if params.filter:
90
- filter = params.filter.replace('"', '\\"')
91
- query += f' "{filter}"'
92
-
93
- payload: Dict[str, Any] = {
94
- "filter": {
95
- "from": from_time,
96
- "to": to_time,
97
- "query": query,
98
- "indexes": dd_config.indexes,
99
- "storage_tier": storage_tier.value,
100
- },
101
- "sort": "-timestamp",
102
- "page": {"limit": calculate_page_size(params, dd_config, [])},
103
- }
104
-
105
- # Preprocess time fields to ensure correct format
106
- processed_payload = preprocess_time_fields(payload, "/api/v2/logs/events/search")
107
-
108
- logs, cursor = execute_paginated_datadog_http_request(
109
- url=url,
110
- headers=headers,
111
- payload_or_params=processed_payload,
112
- timeout=dd_config.request_timeout,
113
- )
114
-
115
- while cursor and len(logs) < limit:
116
- processed_payload["page"]["cursor"] = cursor
117
- processed_payload["page"]["limit"] = calculate_page_size(
118
- params, dd_config, logs
119
- )
120
- new_logs, cursor = execute_paginated_datadog_http_request(
121
- url=url,
122
- headers=headers,
123
- payload_or_params=processed_payload,
124
- timeout=dd_config.request_timeout,
125
- )
126
- logs += new_logs
127
-
128
- # logs are fetched descending order. Unified logging API follows the pattern of kubectl logs where oldest logs are first
129
- logs.reverse()
130
-
131
- if len(logs) > limit:
132
- logs = logs[-limit:]
133
- return logs
134
38
 
135
39
 
136
40
  def format_logs(raw_logs: list[dict]) -> str:
41
+ # Use similar structure to Datadog Log Explorer
137
42
  logs = []
138
43
 
139
44
  for raw_log_item in raw_logs:
140
- # Extract timestamp - Datadog returns it in ISO format
141
- timestamp = raw_log_item.get("attributes", {}).get("timestamp", "")
142
- if not timestamp:
143
- # Fallback to @timestamp if timestamp is not in attributes
144
- timestamp = raw_log_item.get("attributes", {}).get("@timestamp", "")
145
-
146
- # Extract message
147
- message = raw_log_item.get("attributes", {}).get(
148
- "message", json.dumps(raw_log_item)
149
- )
45
+ attrs = raw_log_item.get("attributes", {})
150
46
 
151
- # Format as: [timestamp] message
152
- if timestamp:
153
- logs.append(f"[{timestamp}] {message}")
154
- else:
155
- logs.append(message)
47
+ timestamp = attrs.get("timestamp") or attrs.get("@timestamp", "")
48
+ host = attrs.get("host", "")
49
+ service = attrs.get("service", "")
50
+ status = attrs.get("attributes", {}).get("status") or attrs.get("status", "")
51
+ message = attrs.get("message", json.dumps(raw_log_item))
52
+ tags = attrs.get("tags", [])
156
53
 
157
- return "\n".join(logs)
54
+ pod_name_tag = next((t for t in tags if t.startswith("pod_")), "")
158
55
 
56
+ log_line = f"{timestamp} {host} {pod_name_tag} {service} {status} {message}"
57
+ logs.append(log_line)
159
58
 
160
- def generate_datadog_logs_url(
161
- dd_config: DatadogLogsConfig,
162
- params: FetchPodLogsParams,
163
- storage_tier: DataDogStorageTier,
164
- ) -> str:
165
- """Generate a Datadog web UI URL for the logs query."""
166
- from holmes.plugins.toolsets.utils import process_timestamps_to_int
167
- from holmes.plugins.toolsets.datadog.datadog_api import convert_api_url_to_app_url
168
-
169
- # Convert API URL to app URL using the shared helper
170
- base_url = convert_api_url_to_app_url(dd_config.site_api_url)
171
-
172
- # Build the query string
173
- query = f"{dd_config.labels.namespace}:{params.namespace}"
174
- query += f" {dd_config.labels.pod}:{params.pod_name}"
175
- if params.filter:
176
- filter = params.filter.replace('"', '\\"')
177
- query += f' "{filter}"'
178
-
179
- # Process timestamps - get Unix timestamps in seconds
180
- (from_time_seconds, to_time_seconds) = process_timestamps_to_int(
181
- start=params.start_time,
182
- end=params.end_time,
183
- default_time_span_seconds=DEFAULT_TIME_SPAN_SECONDS,
184
- )
185
-
186
- # Convert to milliseconds for Datadog web UI
187
- from_time_ms = from_time_seconds * 1000
188
- to_time_ms = to_time_seconds * 1000
189
-
190
- # Build URL parameters matching Datadog's web UI format
191
- url_params = {
192
- "query": query,
193
- "from_ts": str(from_time_ms),
194
- "to_ts": str(to_time_ms),
195
- "live": "true",
196
- "storage": storage_tier.value,
197
- }
198
-
199
- # Add indexes if not default
200
- if dd_config.indexes != ["*"]:
201
- url_params["index"] = ",".join(dd_config.indexes)
59
+ return "\n".join(logs)
202
60
 
203
- # Construct the full URL
204
- return f"{base_url}/logs?{urlencode(url_params)}"
205
61
 
62
+ class DatadogLogsToolset(Toolset):
63
+ """Toolset for working with Datadog logs data."""
206
64
 
207
- class DatadogLogsToolset(BasePodLoggingToolset):
208
65
  dd_config: Optional[DatadogLogsConfig] = None
209
66
 
210
- @property
211
- def supported_capabilities(self) -> Set[LoggingCapability]:
212
- """Datadog logs API supports historical data and substring matching"""
213
- return {
214
- LoggingCapability.HISTORICAL_DATA
215
- } # No regex support, no exclude filter, but supports historical data
216
-
217
67
  def __init__(self):
218
68
  super().__init__(
219
69
  name="datadog/logs",
@@ -225,201 +75,50 @@ class DatadogLogsToolset(BasePodLoggingToolset):
225
75
  tags=[ToolsetTag.CORE],
226
76
  )
227
77
  # Now that parent is initialized and self.name exists, create the tool
228
- self.tools = [PodLoggingTool(self)]
78
+ self.tools = [GetLogs(toolset=self)]
229
79
  self._reload_instructions()
230
80
 
231
- def logger_name(self) -> str:
232
- return "DataDog"
233
-
234
- def fetch_pod_logs(self, params: FetchPodLogsParams) -> StructuredToolResult:
81
+ def _perform_healthcheck(self) -> Tuple[bool, str]:
82
+ """Perform health check on Datadog logs API."""
235
83
  if not self.dd_config:
236
- return StructuredToolResult(
237
- status=StructuredToolResultStatus.ERROR,
238
- data=TOOLSET_CONFIG_MISSING_ERROR,
239
- params=params.model_dump(),
240
- )
241
-
84
+ return False, "Datadog configuration not initialized"
242
85
  try:
243
- raw_logs = []
244
- for storage_tier in self.dd_config.storage_tiers:
245
- raw_logs = fetch_paginated_logs(
246
- params, self.dd_config, storage_tier=storage_tier
247
- )
248
-
249
- if raw_logs:
250
- logs_str = format_logs(raw_logs)
251
- # Generate Datadog web UI URL
252
- datadog_url = generate_datadog_logs_url(
253
- self.dd_config, params, storage_tier
254
- )
255
- logs_with_link = f"{logs_str}\n\nView in Datadog: {datadog_url}"
256
- return StructuredToolResult(
257
- status=StructuredToolResultStatus.SUCCESS,
258
- data=logs_with_link,
259
- url=datadog_url,
260
- params=params.model_dump(),
261
- )
262
-
263
- # Include detailed diagnostic context
264
- query = f"{self.dd_config.labels.namespace}:{params.namespace} {self.dd_config.labels.pod}:{params.pod_name}"
265
- if params.filter:
266
- query += f' "{params.filter}"'
267
-
268
- # Get actual time range used
269
- (from_time, to_time) = process_timestamps_to_rfc3339(
270
- start_timestamp=params.start_time,
271
- end_timestamp=params.end_time,
272
- default_time_span_seconds=DEFAULT_TIME_SPAN_SECONDS,
273
- )
274
-
275
- # Generate Datadog web UI URL for the last storage tier checked
276
- datadog_url = generate_datadog_logs_url(
277
- self.dd_config, params, self.dd_config.storage_tiers[-1]
278
- )
279
-
280
- # Build diagnostic information
281
- diagnostics: Dict[str, Any] = {
282
- "query_executed": query,
283
- "time_range": f"{from_time} to {to_time}",
284
- "indexes_searched": self.dd_config.indexes,
285
- "storage_tiers_checked": [
286
- tier.value for tier in self.dd_config.storage_tiers
287
- ],
288
- "field_mappings": {
289
- "namespace_field": self.dd_config.labels.namespace,
290
- "pod_field": self.dd_config.labels.pod,
86
+ logging.info("Performing Datadog logs configuration healthcheck...")
87
+ headers = get_headers(self.dd_config)
88
+ payload = {
89
+ "filter": {
90
+ "from": "now-1m",
91
+ "to": "now",
92
+ "query": "*",
93
+ "indexes": self.dd_config.indexes,
291
94
  },
292
- "limit": params.limit or self.dd_config.default_limit,
293
- "datadog_url": datadog_url,
95
+ "page": {"limit": 1},
294
96
  }
295
97
 
296
- # Format diagnostic info as structured text
297
- error_msg = (
298
- f"No logs found.\n\n"
299
- f"Diagnostic Information:\n"
300
- f"----------------------\n"
301
- f"Query executed: {diagnostics['query_executed']}\n"
302
- f"Time range: {diagnostics['time_range']}\n"
303
- f"Indexes searched: {diagnostics['indexes_searched']}\n"
304
- f"Storage tiers checked: {', '.join(str(tier) for tier in diagnostics.get('storage_tiers_checked', []))}\n"
305
- f"Field mappings:\n"
306
- f" - Namespace field: {diagnostics.get('field_mappings', {}).get('namespace_field', 'N/A')}\n"
307
- f" - Pod field: {diagnostics.get('field_mappings', {}).get('pod_field', 'N/A')}\n"
308
- f"Limit: {diagnostics['limit']}\n\n"
309
- f"View in Datadog: {diagnostics['datadog_url']}"
98
+ search_url = f"{self.dd_config.site_api_url}/api/v2/logs/events/search"
99
+ execute_datadog_http_request(
100
+ url=search_url,
101
+ headers=headers,
102
+ payload_or_params=payload,
103
+ timeout=self.dd_config.request_timeout,
104
+ method="POST",
310
105
  )
311
106
 
312
- return StructuredToolResult(
313
- status=StructuredToolResultStatus.NO_DATA,
314
- error=error_msg,
315
- url=datadog_url,
316
- params=params.model_dump(),
317
- )
107
+ return True, ""
318
108
 
319
109
  except DataDogRequestError as e:
320
- logging.exception(e, exc_info=True)
321
-
322
- # Always try to generate Datadog URL for debugging
323
- try:
324
- datadog_url = generate_datadog_logs_url(
325
- self.dd_config, params, self.dd_config.storage_tiers[0]
326
- )
327
- except Exception:
328
- datadog_url = None
329
-
330
- # Provide more specific error message for rate limiting failures
331
- if e.status_code == 429:
332
- error_msg = f"Datadog API rate limit exceeded. Failed after {MAX_RETRY_COUNT_ON_RATE_LIMIT} retry attempts."
333
- if datadog_url:
334
- error_msg += f"\nView in Datadog: {datadog_url}"
335
- elif e.status_code == 400:
336
- # Use enhanced error message for validation errors
337
- error_msg = enhance_error_message(
338
- e,
339
- "/api/v2/logs/events/search",
340
- "POST",
341
- str(self.dd_config.site_api_url),
110
+ logging.error(
111
+ f"Datadog API error during healthcheck: {e.status_code} - {e.response_text}"
112
+ )
113
+ if e.status_code == 403:
114
+ return (
115
+ False,
116
+ "API key lacks required permissions. Make sure your API key has 'apm_read' scope.",
342
117
  )
343
-
344
- # Add query context
345
- query = f"{self.dd_config.labels.namespace}:{params.namespace} {self.dd_config.labels.pod}:{params.pod_name}"
346
- if params.filter:
347
- query += f' "{params.filter}"'
348
- error_msg += f"\n\nQuery attempted: {query}"
349
-
350
- # Add Datadog web UI URL to error message
351
- if datadog_url:
352
- error_msg += f"\nView in Datadog: {datadog_url}"
353
118
  else:
354
- # Include full API error details and query context
355
- error_msg = (
356
- f"Datadog API error (status {e.status_code}): {e.response_text}"
357
- )
358
- query = f"{self.dd_config.labels.namespace}:{params.namespace} {self.dd_config.labels.pod}:{params.pod_name}"
359
- if params.filter:
360
- query += f' "{params.filter}"'
361
- error_msg += f"\nQuery: {query}"
362
-
363
- # Get actual time range used
364
- (from_time, to_time) = process_timestamps_to_rfc3339(
365
- start_timestamp=params.start_time,
366
- end_timestamp=params.end_time,
367
- default_time_span_seconds=DEFAULT_TIME_SPAN_SECONDS,
368
- )
369
- error_msg += f"\nTime range: {from_time} to {to_time}"
370
-
371
- # Add Datadog web UI URL to error message
372
- if datadog_url:
373
- error_msg += f"\nView in Datadog: {datadog_url}"
374
-
375
- return StructuredToolResult(
376
- status=StructuredToolResultStatus.ERROR,
377
- error=error_msg,
378
- url=datadog_url,
379
- params=params.model_dump(),
380
- invocation=json.dumps(e.payload),
381
- )
382
-
383
- except Exception as e:
384
- logging.exception(
385
- f"Failed to query Datadog logs for params: {params}", exc_info=True
386
- )
387
- return StructuredToolResult(
388
- status=StructuredToolResultStatus.ERROR,
389
- error=f"Exception while querying Datadog: {str(e)}",
390
- params=params.model_dump(),
391
- )
392
-
393
- def _perform_healthcheck(self) -> Tuple[bool, str]:
394
- """
395
- Perform a healthcheck by fetching a single log from Datadog.
396
- Returns (success, error_message).
397
- """
398
- try:
399
- logging.debug("Performing Datadog configuration healthcheck...")
400
- healthcheck_params = FetchPodLogsParams(
401
- namespace="*",
402
- pod_name="*",
403
- limit=1,
404
- start_time="-172800", # 48 hours in seconds
405
- )
406
-
407
- result = self.fetch_pod_logs(healthcheck_params)
408
-
409
- if result.status == StructuredToolResultStatus.ERROR:
410
- error_msg = result.error or "Unknown error during healthcheck"
411
- logging.error(f"Datadog healthcheck failed: {error_msg}")
412
- return False, f"Datadog healthcheck failed: {error_msg}"
413
- elif result.status == StructuredToolResultStatus.NO_DATA:
414
- error_msg = "No logs were found in the last 48 hours using wildcards for pod and namespace. Is the configuration correct?"
415
- logging.error(f"Datadog healthcheck failed: {error_msg}")
416
- return False, f"Datadog healthcheck failed: {error_msg}"
417
-
418
- logging.info("Datadog healthcheck completed successfully")
419
- return True, ""
420
-
119
+ return False, f"Datadog API error: {e.status_code} - {e.response_text}"
421
120
  except Exception as e:
422
- logging.exception("Failed during Datadog healthcheck")
121
+ logging.exception("Failed during Datadog traces healthcheck")
423
122
  return False, f"Healthcheck failed with exception: {str(e)}"
424
123
 
425
124
  def prerequisites_callable(self, config: dict[str, Any]) -> Tuple[bool, str]:
@@ -433,7 +132,6 @@ class DatadogLogsToolset(BasePodLoggingToolset):
433
132
  dd_config = DatadogLogsConfig(**config)
434
133
  self.dd_config = dd_config
435
134
 
436
- # Perform healthcheck
437
135
  success, error_msg = self._perform_healthcheck()
438
136
  return success, error_msg
439
137
 
@@ -442,11 +140,13 @@ class DatadogLogsToolset(BasePodLoggingToolset):
442
140
  return (False, f"Failed to parse Datadog configuration: {str(e)}")
443
141
 
444
142
  def get_example_config(self) -> Dict[str, Any]:
445
- return {
446
- "dd_api_key": "your-datadog-api-key",
447
- "dd_app_key": "your-datadog-application-key",
448
- "site_api_url": "https://api.datadoghq.com",
449
- }
143
+ """Get example configuration for this toolset."""
144
+ example_config = DatadogLogsConfig(
145
+ dd_api_key="<your_datadog_api_key>",
146
+ dd_app_key="<your_datadog_app_key>",
147
+ site_api_url=AnyUrl("https://api.datadoghq.com"),
148
+ )
149
+ return example_config.model_dump(mode="json")
450
150
 
451
151
  def _reload_instructions(self):
452
152
  """Load Datadog logs specific troubleshooting instructions."""
@@ -454,3 +154,152 @@ class DatadogLogsToolset(BasePodLoggingToolset):
454
154
  os.path.join(os.path.dirname(__file__), "datadog_logs_instructions.jinja2")
455
155
  )
456
156
  self._load_llm_instructions(jinja_template=f"file://{template_file_path}")
157
+
158
+
159
+ class GetLogs(Tool):
160
+ """Tool to search for logs with specific search query."""
161
+
162
+ toolset: "DatadogLogsToolset"
163
+ name: str = "fetch_datadog_logs"
164
+ description: str = "Search for logs in Datadog using search query syntax"
165
+ "Uses the DataDog api endpoint: POST /api/v2/logs/events/search with 'query' parameter. (e.g., 'service:web-app @http.status_code:500')"
166
+ parameters: Dict[str, ToolParameter] = {
167
+ "query": ToolParameter(
168
+ description="The search query - following the logs search syntax. default: *",
169
+ type="string",
170
+ required=False,
171
+ ),
172
+ "start_datetime": ToolParameter(
173
+ description=standard_start_datetime_tool_param_description(
174
+ DEFAULT_TIME_SPAN_SECONDS
175
+ ),
176
+ type="string",
177
+ required=False,
178
+ ),
179
+ "end_datetime": ToolParameter(
180
+ description=STANDARD_END_DATETIME_TOOL_PARAM_DESCRIPTION,
181
+ type="string",
182
+ required=False,
183
+ ),
184
+ "cursor": ToolParameter(
185
+ description="The returned paging point to use to get the next results. IMPORTANT: Cursors are single-use and stateful - never reuse the same cursor value multiple times or parallelize cursor-based calls. Each response provides a new cursor for the subsequent request.",
186
+ type="string",
187
+ required=False,
188
+ ),
189
+ "limit": ToolParameter(
190
+ description=f"Maximum number of log records to return. Defaults to {DEFAULT_LOG_LIMIT}. This value is user-configured and represents the maximum allowed limit.",
191
+ type="integer",
192
+ required=False,
193
+ ),
194
+ "sort_desc": ToolParameter(
195
+ description="Get the results in descending order. default: true",
196
+ type="boolean",
197
+ required=False,
198
+ ),
199
+ }
200
+
201
+ def get_parameterized_one_liner(self, params: dict) -> str:
202
+ """Get a one-liner description of the tool invocation."""
203
+ return f"{toolset_name_for_one_liner(self.toolset.name)}: Search Logs ({params['query'] if 'query' in params else ''})"
204
+
205
+ def _invoke(self, params: dict, context: ToolInvokeContext) -> StructuredToolResult:
206
+ """Execute the tool to search logs."""
207
+ if not self.toolset.dd_config:
208
+ return StructuredToolResult(
209
+ status=StructuredToolResultStatus.ERROR,
210
+ error="Datadog configuration not initialized",
211
+ params=params,
212
+ )
213
+ url = None
214
+ payload: Optional[Dict[str, Any]] = None
215
+ try:
216
+ # Process timestamps
217
+ from_time_int, to_time_int = process_timestamps_to_int(
218
+ start=params.get("start_datetime"),
219
+ end=params.get("end_datetime"),
220
+ default_time_span_seconds=DEFAULT_TIME_SPAN_SECONDS,
221
+ )
222
+
223
+ # Convert to milliseconds for Datadog API
224
+ from_time_ms = from_time_int * 1000
225
+ to_time_ms = to_time_int * 1000
226
+
227
+ config_limit = self.toolset.dd_config.default_limit
228
+ limit = min(params.get("limit", config_limit), config_limit)
229
+ params["limit"] = limit
230
+ sort = "timestamp" if params.get("sort_desc", False) else "-timestamp"
231
+
232
+ url = f"{self.toolset.dd_config.site_api_url}/api/v2/logs/events/search"
233
+ headers = get_headers(self.toolset.dd_config)
234
+
235
+ storage = self.toolset.dd_config.storage_tiers[-1]
236
+ payload = {
237
+ "filter": {
238
+ "query": params.get("query", "*"),
239
+ "from": str(from_time_ms),
240
+ "to": str(to_time_ms),
241
+ "storage_tier": storage,
242
+ "indexes": self.toolset.dd_config.indexes,
243
+ },
244
+ "page": {
245
+ "limit": limit,
246
+ },
247
+ "sort": sort,
248
+ }
249
+
250
+ if params.get("cursor"):
251
+ payload["page"]["cursor"] = params["cursor"]
252
+
253
+ response = execute_datadog_http_request(
254
+ url=url,
255
+ headers=headers,
256
+ payload_or_params=payload,
257
+ timeout=self.toolset.dd_config.request_timeout,
258
+ method="POST",
259
+ )
260
+
261
+ if self.toolset.dd_config.compact_logs and response.get("data"):
262
+ response["data"] = format_logs(response["data"])
263
+
264
+ return StructuredToolResult(
265
+ status=StructuredToolResultStatus.SUCCESS,
266
+ data=response,
267
+ params=params,
268
+ url=generate_datadog_logs_url(self.toolset.dd_config, payload),
269
+ )
270
+
271
+ except DataDogRequestError as e:
272
+ logging.exception(e, exc_info=True)
273
+ if e.status_code == 429:
274
+ error_msg = f"Datadog API rate limit exceeded. Failed after {MAX_RETRY_COUNT_ON_RATE_LIMIT} retry attempts."
275
+ elif e.status_code == 403:
276
+ error_msg = (
277
+ f"Permission denied. Ensure your Datadog Application Key has the 'apm_read' "
278
+ f"permission. Error: {str(e)}"
279
+ )
280
+ else:
281
+ error_msg = f"Exception while querying Datadog: {str(e)}"
282
+
283
+ return StructuredToolResult(
284
+ status=StructuredToolResultStatus.ERROR,
285
+ error=error_msg,
286
+ params=params,
287
+ invocation=(
288
+ json.dumps({"url": url, "payload": payload})
289
+ if url and payload
290
+ else None
291
+ ),
292
+ )
293
+
294
+ except Exception as e:
295
+ logging.exception(e, exc_info=True)
296
+ return StructuredToolResult(
297
+ status=StructuredToolResultStatus.ERROR,
298
+ error=f"Unexpected error: {str(e)}",
299
+ params=params,
300
+ invocation=(
301
+ json.dumps({"url": url, "payload": payload})
302
+ if url and payload
303
+ else None
304
+ ),
305
+ )