holmesgpt 0.13.2__py3-none-any.whl → 0.18.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (188) hide show
  1. holmes/__init__.py +3 -5
  2. holmes/clients/robusta_client.py +20 -6
  3. holmes/common/env_vars.py +58 -3
  4. holmes/common/openshift.py +1 -1
  5. holmes/config.py +123 -148
  6. holmes/core/conversations.py +71 -15
  7. holmes/core/feedback.py +191 -0
  8. holmes/core/investigation.py +31 -39
  9. holmes/core/investigation_structured_output.py +3 -3
  10. holmes/core/issue.py +1 -1
  11. holmes/core/llm.py +508 -88
  12. holmes/core/models.py +108 -4
  13. holmes/core/openai_formatting.py +14 -1
  14. holmes/core/prompt.py +48 -3
  15. holmes/core/runbooks.py +1 -0
  16. holmes/core/safeguards.py +8 -6
  17. holmes/core/supabase_dal.py +295 -100
  18. holmes/core/tool_calling_llm.py +489 -428
  19. holmes/core/tools.py +325 -56
  20. holmes/core/tools_utils/token_counting.py +21 -0
  21. holmes/core/tools_utils/tool_context_window_limiter.py +40 -0
  22. holmes/core/tools_utils/tool_executor.py +0 -13
  23. holmes/core/tools_utils/toolset_utils.py +1 -0
  24. holmes/core/toolset_manager.py +191 -5
  25. holmes/core/tracing.py +19 -3
  26. holmes/core/transformers/__init__.py +23 -0
  27. holmes/core/transformers/base.py +63 -0
  28. holmes/core/transformers/llm_summarize.py +175 -0
  29. holmes/core/transformers/registry.py +123 -0
  30. holmes/core/transformers/transformer.py +32 -0
  31. holmes/core/truncation/compaction.py +94 -0
  32. holmes/core/truncation/dal_truncation_utils.py +23 -0
  33. holmes/core/truncation/input_context_window_limiter.py +219 -0
  34. holmes/interactive.py +228 -31
  35. holmes/main.py +23 -40
  36. holmes/plugins/interfaces.py +2 -1
  37. holmes/plugins/prompts/__init__.py +2 -1
  38. holmes/plugins/prompts/_fetch_logs.jinja2 +31 -6
  39. holmes/plugins/prompts/_general_instructions.jinja2 +1 -2
  40. holmes/plugins/prompts/_runbook_instructions.jinja2 +24 -12
  41. holmes/plugins/prompts/base_user_prompt.jinja2 +7 -0
  42. holmes/plugins/prompts/conversation_history_compaction.jinja2 +89 -0
  43. holmes/plugins/prompts/generic_ask.jinja2 +0 -4
  44. holmes/plugins/prompts/generic_ask_conversation.jinja2 +0 -1
  45. holmes/plugins/prompts/generic_ask_for_issue_conversation.jinja2 +0 -1
  46. holmes/plugins/prompts/generic_investigation.jinja2 +0 -1
  47. holmes/plugins/prompts/investigation_procedure.jinja2 +50 -1
  48. holmes/plugins/prompts/kubernetes_workload_ask.jinja2 +0 -1
  49. holmes/plugins/prompts/kubernetes_workload_chat.jinja2 +0 -1
  50. holmes/plugins/runbooks/__init__.py +145 -17
  51. holmes/plugins/runbooks/catalog.json +2 -0
  52. holmes/plugins/sources/github/__init__.py +4 -2
  53. holmes/plugins/sources/prometheus/models.py +1 -0
  54. holmes/plugins/toolsets/__init__.py +44 -27
  55. holmes/plugins/toolsets/aks-node-health.yaml +46 -0
  56. holmes/plugins/toolsets/aks.yaml +64 -0
  57. holmes/plugins/toolsets/atlas_mongodb/mongodb_atlas.py +38 -47
  58. holmes/plugins/toolsets/azure_sql/apis/alert_monitoring_api.py +3 -2
  59. holmes/plugins/toolsets/azure_sql/apis/azure_sql_api.py +2 -1
  60. holmes/plugins/toolsets/azure_sql/apis/connection_failure_api.py +3 -2
  61. holmes/plugins/toolsets/azure_sql/apis/connection_monitoring_api.py +3 -1
  62. holmes/plugins/toolsets/azure_sql/apis/storage_analysis_api.py +3 -1
  63. holmes/plugins/toolsets/azure_sql/azure_sql_toolset.py +12 -13
  64. holmes/plugins/toolsets/azure_sql/tools/analyze_connection_failures.py +15 -12
  65. holmes/plugins/toolsets/azure_sql/tools/analyze_database_connections.py +15 -12
  66. holmes/plugins/toolsets/azure_sql/tools/analyze_database_health_status.py +11 -11
  67. holmes/plugins/toolsets/azure_sql/tools/analyze_database_performance.py +11 -9
  68. holmes/plugins/toolsets/azure_sql/tools/analyze_database_storage.py +15 -12
  69. holmes/plugins/toolsets/azure_sql/tools/get_active_alerts.py +15 -15
  70. holmes/plugins/toolsets/azure_sql/tools/get_slow_queries.py +11 -8
  71. holmes/plugins/toolsets/azure_sql/tools/get_top_cpu_queries.py +11 -8
  72. holmes/plugins/toolsets/azure_sql/tools/get_top_data_io_queries.py +11 -8
  73. holmes/plugins/toolsets/azure_sql/tools/get_top_log_io_queries.py +11 -8
  74. holmes/plugins/toolsets/azure_sql/utils.py +0 -32
  75. holmes/plugins/toolsets/bash/argocd/__init__.py +3 -3
  76. holmes/plugins/toolsets/bash/aws/__init__.py +4 -4
  77. holmes/plugins/toolsets/bash/azure/__init__.py +4 -4
  78. holmes/plugins/toolsets/bash/bash_toolset.py +11 -15
  79. holmes/plugins/toolsets/bash/common/bash.py +23 -13
  80. holmes/plugins/toolsets/bash/common/bash_command.py +1 -1
  81. holmes/plugins/toolsets/bash/common/stringify.py +1 -1
  82. holmes/plugins/toolsets/bash/kubectl/__init__.py +2 -1
  83. holmes/plugins/toolsets/bash/kubectl/constants.py +0 -1
  84. holmes/plugins/toolsets/bash/kubectl/kubectl_get.py +3 -4
  85. holmes/plugins/toolsets/bash/parse_command.py +12 -13
  86. holmes/plugins/toolsets/cilium.yaml +284 -0
  87. holmes/plugins/toolsets/connectivity_check.py +124 -0
  88. holmes/plugins/toolsets/coralogix/api.py +132 -119
  89. holmes/plugins/toolsets/coralogix/coralogix.jinja2 +14 -0
  90. holmes/plugins/toolsets/coralogix/toolset_coralogix.py +219 -0
  91. holmes/plugins/toolsets/coralogix/utils.py +15 -79
  92. holmes/plugins/toolsets/datadog/datadog_api.py +525 -26
  93. holmes/plugins/toolsets/datadog/datadog_logs_instructions.jinja2 +55 -11
  94. holmes/plugins/toolsets/datadog/datadog_metrics_instructions.jinja2 +3 -3
  95. holmes/plugins/toolsets/datadog/datadog_models.py +59 -0
  96. holmes/plugins/toolsets/datadog/datadog_url_utils.py +213 -0
  97. holmes/plugins/toolsets/datadog/instructions_datadog_traces.jinja2 +165 -28
  98. holmes/plugins/toolsets/datadog/toolset_datadog_general.py +417 -241
  99. holmes/plugins/toolsets/datadog/toolset_datadog_logs.py +234 -214
  100. holmes/plugins/toolsets/datadog/toolset_datadog_metrics.py +167 -79
  101. holmes/plugins/toolsets/datadog/toolset_datadog_traces.py +374 -363
  102. holmes/plugins/toolsets/elasticsearch/__init__.py +6 -0
  103. holmes/plugins/toolsets/elasticsearch/elasticsearch.py +834 -0
  104. holmes/plugins/toolsets/elasticsearch/opensearch_ppl_query_docs.jinja2 +1616 -0
  105. holmes/plugins/toolsets/elasticsearch/opensearch_query_assist.py +78 -0
  106. holmes/plugins/toolsets/elasticsearch/opensearch_query_assist_instructions.jinja2 +223 -0
  107. holmes/plugins/toolsets/git.py +54 -50
  108. holmes/plugins/toolsets/grafana/base_grafana_toolset.py +16 -4
  109. holmes/plugins/toolsets/grafana/common.py +13 -29
  110. holmes/plugins/toolsets/grafana/grafana_tempo_api.py +455 -0
  111. holmes/plugins/toolsets/grafana/loki/instructions.jinja2 +25 -0
  112. holmes/plugins/toolsets/grafana/loki/toolset_grafana_loki.py +191 -0
  113. holmes/plugins/toolsets/grafana/loki_api.py +4 -0
  114. holmes/plugins/toolsets/grafana/toolset_grafana.py +293 -89
  115. holmes/plugins/toolsets/grafana/toolset_grafana_dashboard.jinja2 +49 -0
  116. holmes/plugins/toolsets/grafana/toolset_grafana_tempo.jinja2 +246 -11
  117. holmes/plugins/toolsets/grafana/toolset_grafana_tempo.py +820 -292
  118. holmes/plugins/toolsets/grafana/trace_parser.py +4 -3
  119. holmes/plugins/toolsets/internet/internet.py +15 -16
  120. holmes/plugins/toolsets/internet/notion.py +9 -11
  121. holmes/plugins/toolsets/investigator/core_investigation.py +44 -36
  122. holmes/plugins/toolsets/investigator/model.py +3 -1
  123. holmes/plugins/toolsets/json_filter_mixin.py +134 -0
  124. holmes/plugins/toolsets/kafka.py +36 -42
  125. holmes/plugins/toolsets/kubernetes.yaml +317 -113
  126. holmes/plugins/toolsets/kubernetes_logs.py +9 -9
  127. holmes/plugins/toolsets/kubernetes_logs.yaml +32 -0
  128. holmes/plugins/toolsets/logging_utils/logging_api.py +94 -8
  129. holmes/plugins/toolsets/mcp/toolset_mcp.py +218 -64
  130. holmes/plugins/toolsets/newrelic/new_relic_api.py +165 -0
  131. holmes/plugins/toolsets/newrelic/newrelic.jinja2 +65 -0
  132. holmes/plugins/toolsets/newrelic/newrelic.py +320 -0
  133. holmes/plugins/toolsets/openshift.yaml +283 -0
  134. holmes/plugins/toolsets/prometheus/prometheus.py +1202 -421
  135. holmes/plugins/toolsets/prometheus/prometheus_instructions.jinja2 +54 -5
  136. holmes/plugins/toolsets/prometheus/utils.py +28 -0
  137. holmes/plugins/toolsets/rabbitmq/api.py +23 -4
  138. holmes/plugins/toolsets/rabbitmq/toolset_rabbitmq.py +13 -14
  139. holmes/plugins/toolsets/robusta/robusta.py +239 -68
  140. holmes/plugins/toolsets/robusta/robusta_instructions.jinja2 +26 -9
  141. holmes/plugins/toolsets/runbook/runbook_fetcher.py +157 -27
  142. holmes/plugins/toolsets/service_discovery.py +1 -1
  143. holmes/plugins/toolsets/servicenow_tables/instructions.jinja2 +83 -0
  144. holmes/plugins/toolsets/servicenow_tables/servicenow_tables.py +426 -0
  145. holmes/plugins/toolsets/utils.py +88 -0
  146. holmes/utils/config_utils.py +91 -0
  147. holmes/utils/connection_utils.py +31 -0
  148. holmes/utils/console/result.py +10 -0
  149. holmes/utils/default_toolset_installation_guide.jinja2 +1 -22
  150. holmes/utils/env.py +7 -0
  151. holmes/utils/file_utils.py +2 -1
  152. holmes/utils/global_instructions.py +60 -11
  153. holmes/utils/holmes_status.py +6 -4
  154. holmes/utils/holmes_sync_toolsets.py +0 -2
  155. holmes/utils/krr_utils.py +188 -0
  156. holmes/utils/log.py +15 -0
  157. holmes/utils/markdown_utils.py +2 -3
  158. holmes/utils/memory_limit.py +58 -0
  159. holmes/utils/sentry_helper.py +64 -0
  160. holmes/utils/stream.py +69 -8
  161. holmes/utils/tags.py +4 -3
  162. holmes/version.py +37 -15
  163. holmesgpt-0.18.4.dist-info/LICENSE +178 -0
  164. {holmesgpt-0.13.2.dist-info → holmesgpt-0.18.4.dist-info}/METADATA +35 -31
  165. holmesgpt-0.18.4.dist-info/RECORD +258 -0
  166. holmes/core/performance_timing.py +0 -72
  167. holmes/plugins/toolsets/aws.yaml +0 -80
  168. holmes/plugins/toolsets/coralogix/toolset_coralogix_logs.py +0 -112
  169. holmes/plugins/toolsets/datadog/datadog_traces_formatter.py +0 -310
  170. holmes/plugins/toolsets/datadog/toolset_datadog_rds.py +0 -739
  171. holmes/plugins/toolsets/grafana/grafana_api.py +0 -42
  172. holmes/plugins/toolsets/grafana/tempo_api.py +0 -124
  173. holmes/plugins/toolsets/grafana/toolset_grafana_loki.py +0 -110
  174. holmes/plugins/toolsets/newrelic.py +0 -231
  175. holmes/plugins/toolsets/opensearch/opensearch.py +0 -257
  176. holmes/plugins/toolsets/opensearch/opensearch_logs.py +0 -161
  177. holmes/plugins/toolsets/opensearch/opensearch_traces.py +0 -218
  178. holmes/plugins/toolsets/opensearch/opensearch_traces_instructions.jinja2 +0 -12
  179. holmes/plugins/toolsets/opensearch/opensearch_utils.py +0 -166
  180. holmes/plugins/toolsets/servicenow/install.md +0 -37
  181. holmes/plugins/toolsets/servicenow/instructions.jinja2 +0 -3
  182. holmes/plugins/toolsets/servicenow/servicenow.py +0 -219
  183. holmes/utils/keygen_utils.py +0 -6
  184. holmesgpt-0.13.2.dist-info/LICENSE.txt +0 -21
  185. holmesgpt-0.13.2.dist-info/RECORD +0 -234
  186. /holmes/plugins/toolsets/{opensearch → newrelic}/__init__.py +0 -0
  187. {holmesgpt-0.13.2.dist-info → holmesgpt-0.18.4.dist-info}/WHEEL +0 -0
  188. {holmesgpt-0.13.2.dist-info → holmesgpt-0.18.4.dist-info}/entry_points.txt +0 -0
@@ -1,268 +1,137 @@
1
- import os
2
- from enum import Enum
3
1
  import json
4
2
  import logging
5
- from typing import Any, Optional, Dict, Tuple, Set
3
+ import os
4
+ from typing import Any, Dict, Optional, Tuple
5
+
6
+ from pydantic import AnyUrl
7
+
6
8
  from holmes.core.tools import (
7
9
  CallablePrerequisite,
10
+ StructuredToolResult,
11
+ StructuredToolResultStatus,
12
+ Tool,
13
+ ToolInvokeContext,
14
+ ToolParameter,
8
15
  ToolsetTag,
9
16
  )
10
- from pydantic import BaseModel, Field
11
- from holmes.core.tools import StructuredToolResult, ToolResultStatus
12
- from holmes.plugins.toolsets.consts import TOOLSET_CONFIG_MISSING_ERROR
17
+ from holmes.plugins.toolsets.consts import STANDARD_END_DATETIME_TOOL_PARAM_DESCRIPTION
13
18
  from holmes.plugins.toolsets.datadog.datadog_api import (
14
- DatadogBaseConfig,
19
+ MAX_RETRY_COUNT_ON_RATE_LIMIT,
15
20
  DataDogRequestError,
16
- execute_paginated_datadog_http_request,
21
+ execute_datadog_http_request,
17
22
  get_headers,
18
- MAX_RETRY_COUNT_ON_RATE_LIMIT,
19
23
  )
24
+ from holmes.plugins.toolsets.datadog.datadog_models import (
25
+ DatadogLogsConfig,
26
+ )
27
+ from holmes.plugins.toolsets.datadog.datadog_url_utils import generate_datadog_logs_url
20
28
  from holmes.plugins.toolsets.logging_utils.logging_api import (
21
- DEFAULT_TIME_SPAN_SECONDS,
22
29
  DEFAULT_LOG_LIMIT,
23
- BasePodLoggingToolset,
24
- FetchPodLogsParams,
25
- LoggingCapability,
26
- PodLoggingTool,
30
+ DEFAULT_TIME_SPAN_SECONDS,
31
+ Toolset,
32
+ )
33
+ from holmes.plugins.toolsets.utils import (
34
+ process_timestamps_to_int,
35
+ standard_start_datetime_tool_param_description,
36
+ toolset_name_for_one_liner,
27
37
  )
28
- from holmes.plugins.toolsets.utils import process_timestamps_to_rfc3339
29
-
30
-
31
- class DataDogLabelsMapping(BaseModel):
32
- pod: str = "pod_name"
33
- namespace: str = "kube_namespace"
34
-
35
-
36
- class DataDogStorageTier(str, Enum):
37
- INDEXES = "indexes"
38
- ONLINE_ARCHIVES = "online-archives"
39
- FLEX = "flex"
40
-
41
-
42
- DEFAULT_STORAGE_TIERS = [DataDogStorageTier.INDEXES]
43
-
44
-
45
- class DatadogLogsConfig(DatadogBaseConfig):
46
- indexes: list[str] = ["*"]
47
- # Ordered list of storage tiers. Works as fallback. Subsequent tiers are queried only if the previous tier yielded no result
48
- storage_tiers: list[DataDogStorageTier] = Field(
49
- default=DEFAULT_STORAGE_TIERS, min_length=1
50
- )
51
- labels: DataDogLabelsMapping = DataDogLabelsMapping()
52
- page_size: int = 300
53
- default_limit: int = DEFAULT_LOG_LIMIT
54
-
55
-
56
- def calculate_page_size(
57
- params: FetchPodLogsParams, dd_config: DatadogLogsConfig, logs: list
58
- ) -> int:
59
- logs_count = len(logs)
60
-
61
- max_logs_count = dd_config.default_limit
62
- if params.limit:
63
- max_logs_count = params.limit
64
-
65
- return min(dd_config.page_size, max(0, max_logs_count - logs_count))
66
-
67
-
68
- def fetch_paginated_logs(
69
- params: FetchPodLogsParams,
70
- dd_config: DatadogLogsConfig,
71
- storage_tier: DataDogStorageTier,
72
- ) -> list[dict]:
73
- limit = params.limit or dd_config.default_limit
74
-
75
- (from_time, to_time) = process_timestamps_to_rfc3339(
76
- start_timestamp=params.start_time,
77
- end_timestamp=params.end_time,
78
- default_time_span_seconds=DEFAULT_TIME_SPAN_SECONDS,
79
- )
80
-
81
- url = f"{dd_config.site_api_url}/api/v2/logs/events/search"
82
- headers = get_headers(dd_config)
83
-
84
- query = f"{dd_config.labels.namespace}:{params.namespace}"
85
- query += f" {dd_config.labels.pod}:{params.pod_name}"
86
- if params.filter:
87
- filter = params.filter.replace('"', '\\"')
88
- query += f' "{filter}"'
89
-
90
- payload: Dict[str, Any] = {
91
- "filter": {
92
- "from": from_time,
93
- "to": to_time,
94
- "query": query,
95
- "indexes": dd_config.indexes,
96
- "storage_tier": storage_tier.value,
97
- },
98
- "sort": "-timestamp",
99
- "page": {"limit": calculate_page_size(params, dd_config, [])},
100
- }
101
-
102
- logs, cursor = execute_paginated_datadog_http_request(
103
- url=url,
104
- headers=headers,
105
- payload_or_params=payload,
106
- timeout=dd_config.request_timeout,
107
- )
108
-
109
- while cursor and len(logs) < limit:
110
- payload["page"]["cursor"] = cursor
111
- new_logs, cursor = execute_paginated_datadog_http_request(
112
- url=url,
113
- headers=headers,
114
- payload_or_params=payload,
115
- timeout=dd_config.request_timeout,
116
- )
117
- logs += new_logs
118
- payload["page"]["limit"] = calculate_page_size(params, dd_config, logs)
119
-
120
- # logs are fetched descending order. Unified logging API follows the pattern of kubectl logs where oldest logs are first
121
- logs.reverse()
122
-
123
- if len(logs) > limit:
124
- logs = logs[-limit:]
125
- return logs
126
38
 
127
39
 
128
40
  def format_logs(raw_logs: list[dict]) -> str:
41
+ # Use similar structure to Datadog Log Explorer
129
42
  logs = []
130
43
 
131
44
  for raw_log_item in raw_logs:
132
- message = raw_log_item.get("attributes", {}).get(
133
- "message", json.dumps(raw_log_item)
134
- )
135
- logs.append(message)
45
+ attrs = raw_log_item.get("attributes", {})
46
+
47
+ timestamp = attrs.get("timestamp") or attrs.get("@timestamp", "")
48
+ host = attrs.get("host", "")
49
+ service = attrs.get("service", "")
50
+ status = attrs.get("attributes", {}).get("status") or attrs.get("status", "")
51
+ message = attrs.get("message", json.dumps(raw_log_item))
52
+ tags = attrs.get("tags", [])
53
+
54
+ pod_name_tag = next((t for t in tags if t.startswith("pod_")), "")
55
+
56
+ log_line = f"{timestamp} {host} {pod_name_tag} {service} {status} {message}"
57
+ logs.append(log_line)
136
58
 
137
59
  return "\n".join(logs)
138
60
 
139
61
 
140
- class DatadogLogsToolset(BasePodLoggingToolset):
141
- dd_config: Optional[DatadogLogsConfig] = None
62
+ class DatadogLogsToolset(Toolset):
63
+ """Toolset for working with Datadog logs data."""
142
64
 
143
- @property
144
- def supported_capabilities(self) -> Set[LoggingCapability]:
145
- """Datadog logs API supports historical data and substring matching"""
146
- return {
147
- LoggingCapability.HISTORICAL_DATA
148
- } # No regex support, no exclude filter, but supports historical data
65
+ dd_config: Optional[DatadogLogsConfig] = None
149
66
 
150
67
  def __init__(self):
151
68
  super().__init__(
152
69
  name="datadog/logs",
153
70
  description="Toolset for fetching logs from Datadog, including historical data for pods no longer in the cluster",
154
- docs_url="https://docs.datadoghq.com/api/latest/logs/",
71
+ docs_url="https://holmesgpt.dev/data-sources/builtin-toolsets/datadog/",
155
72
  icon_url="https://imgix.datadoghq.com//img/about/presskit/DDlogo.jpg",
156
73
  prerequisites=[CallablePrerequisite(callable=self.prerequisites_callable)],
157
74
  tools=[], # Initialize with empty tools first
158
- experimental=True,
159
75
  tags=[ToolsetTag.CORE],
160
76
  )
161
77
  # Now that parent is initialized and self.name exists, create the tool
162
- self.tools = [PodLoggingTool(self)]
78
+ self.tools = [GetLogs(toolset=self)]
163
79
  self._reload_instructions()
164
80
 
165
- def logger_name(self) -> str:
166
- return "DataDog"
167
-
168
- def fetch_pod_logs(self, params: FetchPodLogsParams) -> StructuredToolResult:
169
- if not self.dd_config:
170
- return StructuredToolResult(
171
- status=ToolResultStatus.ERROR,
172
- data=TOOLSET_CONFIG_MISSING_ERROR,
173
- params=params.model_dump(),
174
- )
175
-
176
- try:
177
- raw_logs = []
178
- for storage_tier in self.dd_config.storage_tiers:
179
- raw_logs = fetch_paginated_logs(
180
- params, self.dd_config, storage_tier=storage_tier
181
- )
182
-
183
- if raw_logs:
184
- logs_str = format_logs(raw_logs)
185
- return StructuredToolResult(
186
- status=ToolResultStatus.SUCCESS,
187
- data=logs_str,
188
- params=params.model_dump(),
189
- )
190
-
191
- return StructuredToolResult(
192
- status=ToolResultStatus.NO_DATA,
193
- params=params.model_dump(),
194
- )
195
-
196
- except DataDogRequestError as e:
197
- logging.exception(e, exc_info=True)
198
-
199
- # Provide more specific error message for rate limiting failures
200
- if e.status_code == 429:
201
- error_msg = f"Datadog API rate limit exceeded. Failed after {MAX_RETRY_COUNT_ON_RATE_LIMIT} retry attempts."
202
- else:
203
- error_msg = f"Exception while querying Datadog: {str(e)}"
204
-
205
- return StructuredToolResult(
206
- status=ToolResultStatus.ERROR,
207
- error=error_msg,
208
- params=params.model_dump(),
209
- invocation=json.dumps(e.payload),
210
- )
211
-
212
- except Exception as e:
213
- logging.exception(
214
- f"Failed to query Datadog logs for params: {params}", exc_info=True
215
- )
216
- return StructuredToolResult(
217
- status=ToolResultStatus.ERROR,
218
- error=f"Exception while querying Datadog: {str(e)}",
219
- params=params.model_dump(),
220
- )
221
-
222
81
  def _perform_healthcheck(self) -> Tuple[bool, str]:
223
- """
224
- Perform a healthcheck by fetching a single log from Datadog.
225
- Returns (success, error_message).
226
- """
82
+ """Perform health check on Datadog logs API."""
83
+ if not self.dd_config:
84
+ return False, "Datadog configuration not initialized"
227
85
  try:
228
- logging.info("Performing Datadog configuration healthcheck...")
229
- healthcheck_params = FetchPodLogsParams(
230
- namespace="*",
231
- pod_name="*",
232
- limit=1,
233
- start_time="-172800", # 48 hours in seconds
86
+ logging.info("Performing Datadog logs configuration healthcheck...")
87
+ headers = get_headers(self.dd_config)
88
+ payload = {
89
+ "filter": {
90
+ "from": "now-1m",
91
+ "to": "now",
92
+ "query": "*",
93
+ "indexes": self.dd_config.indexes,
94
+ },
95
+ "page": {"limit": 1},
96
+ }
97
+
98
+ search_url = f"{self.dd_config.site_api_url}/api/v2/logs/events/search"
99
+ execute_datadog_http_request(
100
+ url=search_url,
101
+ headers=headers,
102
+ payload_or_params=payload,
103
+ timeout=self.dd_config.request_timeout,
104
+ method="POST",
234
105
  )
235
106
 
236
- result = self.fetch_pod_logs(healthcheck_params)
237
-
238
- if result.status == ToolResultStatus.ERROR:
239
- error_msg = result.error or "Unknown error during healthcheck"
240
- logging.error(f"Datadog healthcheck failed: {error_msg}")
241
- return False, f"Datadog healthcheck failed: {error_msg}"
242
- elif result.status == ToolResultStatus.NO_DATA:
243
- error_msg = "No logs were found in the last 48 hours using wildcards for pod and namespace. Is the configuration correct?"
244
- logging.error(f"Datadog healthcheck failed: {error_msg}")
245
- return False, f"Datadog healthcheck failed: {error_msg}"
246
-
247
- logging.info("Datadog healthcheck completed successfully")
248
107
  return True, ""
249
108
 
109
+ except DataDogRequestError as e:
110
+ logging.error(
111
+ f"Datadog API error during healthcheck: {e.status_code} - {e.response_text}"
112
+ )
113
+ if e.status_code == 403:
114
+ return (
115
+ False,
116
+ "API key lacks required permissions. Make sure your API key has 'apm_read' scope.",
117
+ )
118
+ else:
119
+ return False, f"Datadog API error: {e.status_code} - {e.response_text}"
250
120
  except Exception as e:
251
- logging.exception("Failed during Datadog healthcheck")
121
+ logging.exception("Failed during Datadog traces healthcheck")
252
122
  return False, f"Healthcheck failed with exception: {str(e)}"
253
123
 
254
124
  def prerequisites_callable(self, config: dict[str, Any]) -> Tuple[bool, str]:
255
125
  if not config:
256
126
  return (
257
127
  False,
258
- TOOLSET_CONFIG_MISSING_ERROR,
128
+ "Missing config for dd_api_key, dd_app_key, or site_api_url. For details: https://holmesgpt.dev/data-sources/builtin-toolsets/datadog/",
259
129
  )
260
130
 
261
131
  try:
262
132
  dd_config = DatadogLogsConfig(**config)
263
133
  self.dd_config = dd_config
264
134
 
265
- # Perform healthcheck
266
135
  success, error_msg = self._perform_healthcheck()
267
136
  return success, error_msg
268
137
 
@@ -271,11 +140,13 @@ class DatadogLogsToolset(BasePodLoggingToolset):
271
140
  return (False, f"Failed to parse Datadog configuration: {str(e)}")
272
141
 
273
142
  def get_example_config(self) -> Dict[str, Any]:
274
- return {
275
- "dd_api_key": "your-datadog-api-key",
276
- "dd_app_key": "your-datadog-application-key",
277
- "site_api_url": "https://api.datadoghq.com",
278
- }
143
+ """Get example configuration for this toolset."""
144
+ example_config = DatadogLogsConfig(
145
+ dd_api_key="<your_datadog_api_key>",
146
+ dd_app_key="<your_datadog_app_key>",
147
+ site_api_url=AnyUrl("https://api.datadoghq.com"),
148
+ )
149
+ return example_config.model_dump(mode="json")
279
150
 
280
151
  def _reload_instructions(self):
281
152
  """Load Datadog logs specific troubleshooting instructions."""
@@ -283,3 +154,152 @@ class DatadogLogsToolset(BasePodLoggingToolset):
283
154
  os.path.join(os.path.dirname(__file__), "datadog_logs_instructions.jinja2")
284
155
  )
285
156
  self._load_llm_instructions(jinja_template=f"file://{template_file_path}")
157
+
158
+
159
+ class GetLogs(Tool):
160
+ """Tool to search for logs with specific search query."""
161
+
162
+ toolset: "DatadogLogsToolset"
163
+ name: str = "fetch_datadog_logs"
164
+ description: str = "Search for logs in Datadog using search query syntax"
165
+ "Uses the DataDog api endpoint: POST /api/v2/logs/events/search with 'query' parameter. (e.g., 'service:web-app @http.status_code:500')"
166
+ parameters: Dict[str, ToolParameter] = {
167
+ "query": ToolParameter(
168
+ description="The search query - following the logs search syntax. default: *",
169
+ type="string",
170
+ required=False,
171
+ ),
172
+ "start_datetime": ToolParameter(
173
+ description=standard_start_datetime_tool_param_description(
174
+ DEFAULT_TIME_SPAN_SECONDS
175
+ ),
176
+ type="string",
177
+ required=False,
178
+ ),
179
+ "end_datetime": ToolParameter(
180
+ description=STANDARD_END_DATETIME_TOOL_PARAM_DESCRIPTION,
181
+ type="string",
182
+ required=False,
183
+ ),
184
+ "cursor": ToolParameter(
185
+ description="The returned paging point to use to get the next results. IMPORTANT: Cursors are single-use and stateful - never reuse the same cursor value multiple times or parallelize cursor-based calls. Each response provides a new cursor for the subsequent request.",
186
+ type="string",
187
+ required=False,
188
+ ),
189
+ "limit": ToolParameter(
190
+ description=f"Maximum number of log records to return. Defaults to {DEFAULT_LOG_LIMIT}. This value is user-configured and represents the maximum allowed limit.",
191
+ type="integer",
192
+ required=False,
193
+ ),
194
+ "sort_desc": ToolParameter(
195
+ description="Get the results in descending order. default: true",
196
+ type="boolean",
197
+ required=False,
198
+ ),
199
+ }
200
+
201
+ def get_parameterized_one_liner(self, params: dict) -> str:
202
+ """Get a one-liner description of the tool invocation."""
203
+ return f"{toolset_name_for_one_liner(self.toolset.name)}: Search Logs ({params['query'] if 'query' in params else ''})"
204
+
205
+ def _invoke(self, params: dict, context: ToolInvokeContext) -> StructuredToolResult:
206
+ """Execute the tool to search logs."""
207
+ if not self.toolset.dd_config:
208
+ return StructuredToolResult(
209
+ status=StructuredToolResultStatus.ERROR,
210
+ error="Datadog configuration not initialized",
211
+ params=params,
212
+ )
213
+ url = None
214
+ payload: Optional[Dict[str, Any]] = None
215
+ try:
216
+ # Process timestamps
217
+ from_time_int, to_time_int = process_timestamps_to_int(
218
+ start=params.get("start_datetime"),
219
+ end=params.get("end_datetime"),
220
+ default_time_span_seconds=DEFAULT_TIME_SPAN_SECONDS,
221
+ )
222
+
223
+ # Convert to milliseconds for Datadog API
224
+ from_time_ms = from_time_int * 1000
225
+ to_time_ms = to_time_int * 1000
226
+
227
+ config_limit = self.toolset.dd_config.default_limit
228
+ limit = min(params.get("limit", config_limit), config_limit)
229
+ params["limit"] = limit
230
+ sort = "timestamp" if params.get("sort_desc", False) else "-timestamp"
231
+
232
+ url = f"{self.toolset.dd_config.site_api_url}/api/v2/logs/events/search"
233
+ headers = get_headers(self.toolset.dd_config)
234
+
235
+ storage = self.toolset.dd_config.storage_tiers[-1]
236
+ payload = {
237
+ "filter": {
238
+ "query": params.get("query", "*"),
239
+ "from": str(from_time_ms),
240
+ "to": str(to_time_ms),
241
+ "storage_tier": storage,
242
+ "indexes": self.toolset.dd_config.indexes,
243
+ },
244
+ "page": {
245
+ "limit": limit,
246
+ },
247
+ "sort": sort,
248
+ }
249
+
250
+ if params.get("cursor"):
251
+ payload["page"]["cursor"] = params["cursor"]
252
+
253
+ response = execute_datadog_http_request(
254
+ url=url,
255
+ headers=headers,
256
+ payload_or_params=payload,
257
+ timeout=self.toolset.dd_config.request_timeout,
258
+ method="POST",
259
+ )
260
+
261
+ if self.toolset.dd_config.compact_logs and response.get("data"):
262
+ response["data"] = format_logs(response["data"])
263
+
264
+ return StructuredToolResult(
265
+ status=StructuredToolResultStatus.SUCCESS,
266
+ data=response,
267
+ params=params,
268
+ url=generate_datadog_logs_url(self.toolset.dd_config, payload),
269
+ )
270
+
271
+ except DataDogRequestError as e:
272
+ logging.exception(e, exc_info=True)
273
+ if e.status_code == 429:
274
+ error_msg = f"Datadog API rate limit exceeded. Failed after {MAX_RETRY_COUNT_ON_RATE_LIMIT} retry attempts."
275
+ elif e.status_code == 403:
276
+ error_msg = (
277
+ f"Permission denied. Ensure your Datadog Application Key has the 'apm_read' "
278
+ f"permission. Error: {str(e)}"
279
+ )
280
+ else:
281
+ error_msg = f"Exception while querying Datadog: {str(e)}"
282
+
283
+ return StructuredToolResult(
284
+ status=StructuredToolResultStatus.ERROR,
285
+ error=error_msg,
286
+ params=params,
287
+ invocation=(
288
+ json.dumps({"url": url, "payload": payload})
289
+ if url and payload
290
+ else None
291
+ ),
292
+ )
293
+
294
+ except Exception as e:
295
+ logging.exception(e, exc_info=True)
296
+ return StructuredToolResult(
297
+ status=StructuredToolResultStatus.ERROR,
298
+ error=f"Unexpected error: {str(e)}",
299
+ params=params,
300
+ invocation=(
301
+ json.dumps({"url": url, "payload": payload})
302
+ if url and payload
303
+ else None
304
+ ),
305
+ )