holmesgpt 0.13.2__py3-none-any.whl → 0.18.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- holmes/__init__.py +3 -5
- holmes/clients/robusta_client.py +20 -6
- holmes/common/env_vars.py +58 -3
- holmes/common/openshift.py +1 -1
- holmes/config.py +123 -148
- holmes/core/conversations.py +71 -15
- holmes/core/feedback.py +191 -0
- holmes/core/investigation.py +31 -39
- holmes/core/investigation_structured_output.py +3 -3
- holmes/core/issue.py +1 -1
- holmes/core/llm.py +508 -88
- holmes/core/models.py +108 -4
- holmes/core/openai_formatting.py +14 -1
- holmes/core/prompt.py +48 -3
- holmes/core/runbooks.py +1 -0
- holmes/core/safeguards.py +8 -6
- holmes/core/supabase_dal.py +295 -100
- holmes/core/tool_calling_llm.py +489 -428
- holmes/core/tools.py +325 -56
- holmes/core/tools_utils/token_counting.py +21 -0
- holmes/core/tools_utils/tool_context_window_limiter.py +40 -0
- holmes/core/tools_utils/tool_executor.py +0 -13
- holmes/core/tools_utils/toolset_utils.py +1 -0
- holmes/core/toolset_manager.py +191 -5
- holmes/core/tracing.py +19 -3
- holmes/core/transformers/__init__.py +23 -0
- holmes/core/transformers/base.py +63 -0
- holmes/core/transformers/llm_summarize.py +175 -0
- holmes/core/transformers/registry.py +123 -0
- holmes/core/transformers/transformer.py +32 -0
- holmes/core/truncation/compaction.py +94 -0
- holmes/core/truncation/dal_truncation_utils.py +23 -0
- holmes/core/truncation/input_context_window_limiter.py +219 -0
- holmes/interactive.py +228 -31
- holmes/main.py +23 -40
- holmes/plugins/interfaces.py +2 -1
- holmes/plugins/prompts/__init__.py +2 -1
- holmes/plugins/prompts/_fetch_logs.jinja2 +31 -6
- holmes/plugins/prompts/_general_instructions.jinja2 +1 -2
- holmes/plugins/prompts/_runbook_instructions.jinja2 +24 -12
- holmes/plugins/prompts/base_user_prompt.jinja2 +7 -0
- holmes/plugins/prompts/conversation_history_compaction.jinja2 +89 -0
- holmes/plugins/prompts/generic_ask.jinja2 +0 -4
- holmes/plugins/prompts/generic_ask_conversation.jinja2 +0 -1
- holmes/plugins/prompts/generic_ask_for_issue_conversation.jinja2 +0 -1
- holmes/plugins/prompts/generic_investigation.jinja2 +0 -1
- holmes/plugins/prompts/investigation_procedure.jinja2 +50 -1
- holmes/plugins/prompts/kubernetes_workload_ask.jinja2 +0 -1
- holmes/plugins/prompts/kubernetes_workload_chat.jinja2 +0 -1
- holmes/plugins/runbooks/__init__.py +145 -17
- holmes/plugins/runbooks/catalog.json +2 -0
- holmes/plugins/sources/github/__init__.py +4 -2
- holmes/plugins/sources/prometheus/models.py +1 -0
- holmes/plugins/toolsets/__init__.py +44 -27
- holmes/plugins/toolsets/aks-node-health.yaml +46 -0
- holmes/plugins/toolsets/aks.yaml +64 -0
- holmes/plugins/toolsets/atlas_mongodb/mongodb_atlas.py +38 -47
- holmes/plugins/toolsets/azure_sql/apis/alert_monitoring_api.py +3 -2
- holmes/plugins/toolsets/azure_sql/apis/azure_sql_api.py +2 -1
- holmes/plugins/toolsets/azure_sql/apis/connection_failure_api.py +3 -2
- holmes/plugins/toolsets/azure_sql/apis/connection_monitoring_api.py +3 -1
- holmes/plugins/toolsets/azure_sql/apis/storage_analysis_api.py +3 -1
- holmes/plugins/toolsets/azure_sql/azure_sql_toolset.py +12 -13
- holmes/plugins/toolsets/azure_sql/tools/analyze_connection_failures.py +15 -12
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_connections.py +15 -12
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_health_status.py +11 -11
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_performance.py +11 -9
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_storage.py +15 -12
- holmes/plugins/toolsets/azure_sql/tools/get_active_alerts.py +15 -15
- holmes/plugins/toolsets/azure_sql/tools/get_slow_queries.py +11 -8
- holmes/plugins/toolsets/azure_sql/tools/get_top_cpu_queries.py +11 -8
- holmes/plugins/toolsets/azure_sql/tools/get_top_data_io_queries.py +11 -8
- holmes/plugins/toolsets/azure_sql/tools/get_top_log_io_queries.py +11 -8
- holmes/plugins/toolsets/azure_sql/utils.py +0 -32
- holmes/plugins/toolsets/bash/argocd/__init__.py +3 -3
- holmes/plugins/toolsets/bash/aws/__init__.py +4 -4
- holmes/plugins/toolsets/bash/azure/__init__.py +4 -4
- holmes/plugins/toolsets/bash/bash_toolset.py +11 -15
- holmes/plugins/toolsets/bash/common/bash.py +23 -13
- holmes/plugins/toolsets/bash/common/bash_command.py +1 -1
- holmes/plugins/toolsets/bash/common/stringify.py +1 -1
- holmes/plugins/toolsets/bash/kubectl/__init__.py +2 -1
- holmes/plugins/toolsets/bash/kubectl/constants.py +0 -1
- holmes/plugins/toolsets/bash/kubectl/kubectl_get.py +3 -4
- holmes/plugins/toolsets/bash/parse_command.py +12 -13
- holmes/plugins/toolsets/cilium.yaml +284 -0
- holmes/plugins/toolsets/connectivity_check.py +124 -0
- holmes/plugins/toolsets/coralogix/api.py +132 -119
- holmes/plugins/toolsets/coralogix/coralogix.jinja2 +14 -0
- holmes/plugins/toolsets/coralogix/toolset_coralogix.py +219 -0
- holmes/plugins/toolsets/coralogix/utils.py +15 -79
- holmes/plugins/toolsets/datadog/datadog_api.py +525 -26
- holmes/plugins/toolsets/datadog/datadog_logs_instructions.jinja2 +55 -11
- holmes/plugins/toolsets/datadog/datadog_metrics_instructions.jinja2 +3 -3
- holmes/plugins/toolsets/datadog/datadog_models.py +59 -0
- holmes/plugins/toolsets/datadog/datadog_url_utils.py +213 -0
- holmes/plugins/toolsets/datadog/instructions_datadog_traces.jinja2 +165 -28
- holmes/plugins/toolsets/datadog/toolset_datadog_general.py +417 -241
- holmes/plugins/toolsets/datadog/toolset_datadog_logs.py +234 -214
- holmes/plugins/toolsets/datadog/toolset_datadog_metrics.py +167 -79
- holmes/plugins/toolsets/datadog/toolset_datadog_traces.py +374 -363
- holmes/plugins/toolsets/elasticsearch/__init__.py +6 -0
- holmes/plugins/toolsets/elasticsearch/elasticsearch.py +834 -0
- holmes/plugins/toolsets/elasticsearch/opensearch_ppl_query_docs.jinja2 +1616 -0
- holmes/plugins/toolsets/elasticsearch/opensearch_query_assist.py +78 -0
- holmes/plugins/toolsets/elasticsearch/opensearch_query_assist_instructions.jinja2 +223 -0
- holmes/plugins/toolsets/git.py +54 -50
- holmes/plugins/toolsets/grafana/base_grafana_toolset.py +16 -4
- holmes/plugins/toolsets/grafana/common.py +13 -29
- holmes/plugins/toolsets/grafana/grafana_tempo_api.py +455 -0
- holmes/plugins/toolsets/grafana/loki/instructions.jinja2 +25 -0
- holmes/plugins/toolsets/grafana/loki/toolset_grafana_loki.py +191 -0
- holmes/plugins/toolsets/grafana/loki_api.py +4 -0
- holmes/plugins/toolsets/grafana/toolset_grafana.py +293 -89
- holmes/plugins/toolsets/grafana/toolset_grafana_dashboard.jinja2 +49 -0
- holmes/plugins/toolsets/grafana/toolset_grafana_tempo.jinja2 +246 -11
- holmes/plugins/toolsets/grafana/toolset_grafana_tempo.py +820 -292
- holmes/plugins/toolsets/grafana/trace_parser.py +4 -3
- holmes/plugins/toolsets/internet/internet.py +15 -16
- holmes/plugins/toolsets/internet/notion.py +9 -11
- holmes/plugins/toolsets/investigator/core_investigation.py +44 -36
- holmes/plugins/toolsets/investigator/model.py +3 -1
- holmes/plugins/toolsets/json_filter_mixin.py +134 -0
- holmes/plugins/toolsets/kafka.py +36 -42
- holmes/plugins/toolsets/kubernetes.yaml +317 -113
- holmes/plugins/toolsets/kubernetes_logs.py +9 -9
- holmes/plugins/toolsets/kubernetes_logs.yaml +32 -0
- holmes/plugins/toolsets/logging_utils/logging_api.py +94 -8
- holmes/plugins/toolsets/mcp/toolset_mcp.py +218 -64
- holmes/plugins/toolsets/newrelic/new_relic_api.py +165 -0
- holmes/plugins/toolsets/newrelic/newrelic.jinja2 +65 -0
- holmes/plugins/toolsets/newrelic/newrelic.py +320 -0
- holmes/plugins/toolsets/openshift.yaml +283 -0
- holmes/plugins/toolsets/prometheus/prometheus.py +1202 -421
- holmes/plugins/toolsets/prometheus/prometheus_instructions.jinja2 +54 -5
- holmes/plugins/toolsets/prometheus/utils.py +28 -0
- holmes/plugins/toolsets/rabbitmq/api.py +23 -4
- holmes/plugins/toolsets/rabbitmq/toolset_rabbitmq.py +13 -14
- holmes/plugins/toolsets/robusta/robusta.py +239 -68
- holmes/plugins/toolsets/robusta/robusta_instructions.jinja2 +26 -9
- holmes/plugins/toolsets/runbook/runbook_fetcher.py +157 -27
- holmes/plugins/toolsets/service_discovery.py +1 -1
- holmes/plugins/toolsets/servicenow_tables/instructions.jinja2 +83 -0
- holmes/plugins/toolsets/servicenow_tables/servicenow_tables.py +426 -0
- holmes/plugins/toolsets/utils.py +88 -0
- holmes/utils/config_utils.py +91 -0
- holmes/utils/connection_utils.py +31 -0
- holmes/utils/console/result.py +10 -0
- holmes/utils/default_toolset_installation_guide.jinja2 +1 -22
- holmes/utils/env.py +7 -0
- holmes/utils/file_utils.py +2 -1
- holmes/utils/global_instructions.py +60 -11
- holmes/utils/holmes_status.py +6 -4
- holmes/utils/holmes_sync_toolsets.py +0 -2
- holmes/utils/krr_utils.py +188 -0
- holmes/utils/log.py +15 -0
- holmes/utils/markdown_utils.py +2 -3
- holmes/utils/memory_limit.py +58 -0
- holmes/utils/sentry_helper.py +64 -0
- holmes/utils/stream.py +69 -8
- holmes/utils/tags.py +4 -3
- holmes/version.py +37 -15
- holmesgpt-0.18.4.dist-info/LICENSE +178 -0
- {holmesgpt-0.13.2.dist-info → holmesgpt-0.18.4.dist-info}/METADATA +35 -31
- holmesgpt-0.18.4.dist-info/RECORD +258 -0
- holmes/core/performance_timing.py +0 -72
- holmes/plugins/toolsets/aws.yaml +0 -80
- holmes/plugins/toolsets/coralogix/toolset_coralogix_logs.py +0 -112
- holmes/plugins/toolsets/datadog/datadog_traces_formatter.py +0 -310
- holmes/plugins/toolsets/datadog/toolset_datadog_rds.py +0 -739
- holmes/plugins/toolsets/grafana/grafana_api.py +0 -42
- holmes/plugins/toolsets/grafana/tempo_api.py +0 -124
- holmes/plugins/toolsets/grafana/toolset_grafana_loki.py +0 -110
- holmes/plugins/toolsets/newrelic.py +0 -231
- holmes/plugins/toolsets/opensearch/opensearch.py +0 -257
- holmes/plugins/toolsets/opensearch/opensearch_logs.py +0 -161
- holmes/plugins/toolsets/opensearch/opensearch_traces.py +0 -218
- holmes/plugins/toolsets/opensearch/opensearch_traces_instructions.jinja2 +0 -12
- holmes/plugins/toolsets/opensearch/opensearch_utils.py +0 -166
- holmes/plugins/toolsets/servicenow/install.md +0 -37
- holmes/plugins/toolsets/servicenow/instructions.jinja2 +0 -3
- holmes/plugins/toolsets/servicenow/servicenow.py +0 -219
- holmes/utils/keygen_utils.py +0 -6
- holmesgpt-0.13.2.dist-info/LICENSE.txt +0 -21
- holmesgpt-0.13.2.dist-info/RECORD +0 -234
- /holmes/plugins/toolsets/{opensearch → newrelic}/__init__.py +0 -0
- {holmesgpt-0.13.2.dist-info → holmesgpt-0.18.4.dist-info}/WHEEL +0 -0
- {holmesgpt-0.13.2.dist-info → holmesgpt-0.18.4.dist-info}/entry_points.txt +0 -0
|
@@ -1,26 +1,40 @@
|
|
|
1
1
|
import json
|
|
2
2
|
import logging
|
|
3
3
|
import os
|
|
4
|
-
from
|
|
4
|
+
from datetime import datetime
|
|
5
|
+
from typing import Any, Dict, Optional, Tuple
|
|
6
|
+
|
|
7
|
+
from pydantic import AnyUrl
|
|
8
|
+
|
|
5
9
|
from holmes.core.tools import (
|
|
6
10
|
CallablePrerequisite,
|
|
7
11
|
StructuredToolResult,
|
|
12
|
+
StructuredToolResultStatus,
|
|
8
13
|
Tool,
|
|
14
|
+
ToolInvokeContext,
|
|
9
15
|
ToolParameter,
|
|
10
|
-
ToolResultStatus,
|
|
11
16
|
Toolset,
|
|
12
17
|
ToolsetTag,
|
|
13
18
|
)
|
|
14
19
|
from holmes.plugins.toolsets.consts import (
|
|
15
|
-
TOOLSET_CONFIG_MISSING_ERROR,
|
|
16
20
|
STANDARD_END_DATETIME_TOOL_PARAM_DESCRIPTION,
|
|
21
|
+
TOOLSET_CONFIG_MISSING_ERROR,
|
|
17
22
|
)
|
|
18
23
|
from holmes.plugins.toolsets.datadog.datadog_api import (
|
|
19
|
-
|
|
24
|
+
MAX_RETRY_COUNT_ON_RATE_LIMIT,
|
|
20
25
|
DataDogRequestError,
|
|
21
26
|
execute_datadog_http_request,
|
|
22
27
|
get_headers,
|
|
23
|
-
|
|
28
|
+
)
|
|
29
|
+
from holmes.plugins.toolsets.datadog.datadog_models import DatadogMetricsConfig
|
|
30
|
+
from holmes.plugins.toolsets.datadog.datadog_url_utils import (
|
|
31
|
+
generate_datadog_metric_metadata_url,
|
|
32
|
+
generate_datadog_metric_tags_url,
|
|
33
|
+
generate_datadog_metrics_explorer_url,
|
|
34
|
+
generate_datadog_metrics_list_url,
|
|
35
|
+
)
|
|
36
|
+
from holmes.plugins.toolsets.logging_utils.logging_api import (
|
|
37
|
+
DEFAULT_TIME_SPAN_SECONDS,
|
|
24
38
|
)
|
|
25
39
|
from holmes.plugins.toolsets.utils import (
|
|
26
40
|
get_param_or_raise,
|
|
@@ -28,18 +42,6 @@ from holmes.plugins.toolsets.utils import (
|
|
|
28
42
|
standard_start_datetime_tool_param_description,
|
|
29
43
|
toolset_name_for_one_liner,
|
|
30
44
|
)
|
|
31
|
-
from holmes.plugins.toolsets.logging_utils.logging_api import (
|
|
32
|
-
DEFAULT_TIME_SPAN_SECONDS,
|
|
33
|
-
DEFAULT_LOG_LIMIT,
|
|
34
|
-
)
|
|
35
|
-
|
|
36
|
-
from datetime import datetime
|
|
37
|
-
|
|
38
|
-
from holmes.utils.keygen_utils import generate_random_key
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
class DatadogMetricsConfig(DatadogBaseConfig):
|
|
42
|
-
default_limit: int = DEFAULT_LOG_LIMIT
|
|
43
45
|
|
|
44
46
|
|
|
45
47
|
class BaseDatadogMetricsTool(Tool):
|
|
@@ -54,7 +56,7 @@ class ListActiveMetrics(BaseDatadogMetricsTool):
|
|
|
54
56
|
def __init__(self, toolset: "DatadogMetricsToolset"):
|
|
55
57
|
super().__init__(
|
|
56
58
|
name="list_active_datadog_metrics",
|
|
57
|
-
description=f"List active metrics from Datadog for the last {ACTIVE_METRICS_DEFAULT_LOOK_BACK_HOURS} hours. This includes metrics that have actively reported data points, including from pods no longer in the cluster.",
|
|
59
|
+
description=f"[datadog/metrics toolset] List active metrics from Datadog for the last {ACTIVE_METRICS_DEFAULT_LOOK_BACK_HOURS} hours. This includes metrics that have actively reported data points, including from pods no longer in the cluster.",
|
|
58
60
|
parameters={
|
|
59
61
|
"from_time": ToolParameter(
|
|
60
62
|
description=f"Start time for listing metrics. Can be an RFC3339 formatted datetime (e.g. '2023-03-01T10:30:00Z') or a negative integer for relative seconds from now (e.g. -86400 for 24 hours ago). Defaults to {ACTIVE_METRICS_DEFAULT_LOOK_BACK_HOURS} hours ago",
|
|
@@ -75,12 +77,10 @@ class ListActiveMetrics(BaseDatadogMetricsTool):
|
|
|
75
77
|
toolset=toolset,
|
|
76
78
|
)
|
|
77
79
|
|
|
78
|
-
def _invoke(
|
|
79
|
-
self, params: dict, user_approved: bool = False
|
|
80
|
-
) -> StructuredToolResult:
|
|
80
|
+
def _invoke(self, params: dict, context: ToolInvokeContext) -> StructuredToolResult:
|
|
81
81
|
if not self.toolset.dd_config:
|
|
82
82
|
return StructuredToolResult(
|
|
83
|
-
status=
|
|
83
|
+
status=StructuredToolResultStatus.ERROR,
|
|
84
84
|
error=TOOLSET_CONFIG_MISSING_ERROR,
|
|
85
85
|
params=params,
|
|
86
86
|
)
|
|
@@ -121,7 +121,7 @@ class ListActiveMetrics(BaseDatadogMetricsTool):
|
|
|
121
121
|
metrics = data.get("metrics", [])
|
|
122
122
|
if not metrics:
|
|
123
123
|
return StructuredToolResult(
|
|
124
|
-
status=
|
|
124
|
+
status=StructuredToolResultStatus.ERROR,
|
|
125
125
|
data="Your filter returned no metrics. Change your filter and try again",
|
|
126
126
|
params=params,
|
|
127
127
|
)
|
|
@@ -132,10 +132,18 @@ class ListActiveMetrics(BaseDatadogMetricsTool):
|
|
|
132
132
|
for metric in sorted(metrics):
|
|
133
133
|
output.append(metric)
|
|
134
134
|
|
|
135
|
+
url = generate_datadog_metrics_list_url(
|
|
136
|
+
self.toolset.dd_config,
|
|
137
|
+
from_time,
|
|
138
|
+
params.get("host"),
|
|
139
|
+
params.get("tag_filter"),
|
|
140
|
+
)
|
|
141
|
+
|
|
135
142
|
return StructuredToolResult(
|
|
136
|
-
status=
|
|
143
|
+
status=StructuredToolResultStatus.SUCCESS,
|
|
137
144
|
data="\n".join(output),
|
|
138
145
|
params=params,
|
|
146
|
+
url=url,
|
|
139
147
|
)
|
|
140
148
|
|
|
141
149
|
except DataDogRequestError as e:
|
|
@@ -149,10 +157,30 @@ class ListActiveMetrics(BaseDatadogMetricsTool):
|
|
|
149
157
|
f"and 'timeseries_query' permissions. Error: {str(e)}"
|
|
150
158
|
)
|
|
151
159
|
else:
|
|
152
|
-
|
|
160
|
+
# Include full API error details for better debugging
|
|
161
|
+
error_msg = (
|
|
162
|
+
f"Datadog API error (status {e.status_code}): {e.response_text}"
|
|
163
|
+
)
|
|
164
|
+
if params:
|
|
165
|
+
# ListActiveMetrics parameters: from_time, host, tag_filter
|
|
166
|
+
if params.get("host"):
|
|
167
|
+
error_msg += f"\nHost filter: {params.get('host')}"
|
|
168
|
+
if params.get("tag_filter"):
|
|
169
|
+
error_msg += f"\nTag filter: {params.get('tag_filter')}"
|
|
170
|
+
|
|
171
|
+
from_time_param = params.get("from_time")
|
|
172
|
+
if from_time_param:
|
|
173
|
+
time_desc = from_time_param
|
|
174
|
+
else:
|
|
175
|
+
time_desc = f"default (last {ACTIVE_METRICS_DEFAULT_LOOK_BACK_HOURS} hours)"
|
|
176
|
+
error_msg += f"\nTime range: {time_desc}"
|
|
177
|
+
|
|
178
|
+
# Note: We cannot generate a Datadog Metrics Explorer URL for ListActiveMetrics
|
|
179
|
+
# because the Metrics Explorer requires a specific metric query,
|
|
180
|
+
# while ListActiveMetrics just lists available metrics without querying any specific one
|
|
153
181
|
|
|
154
182
|
return StructuredToolResult(
|
|
155
|
-
status=
|
|
183
|
+
status=StructuredToolResultStatus.ERROR,
|
|
156
184
|
error=error_msg,
|
|
157
185
|
params=params,
|
|
158
186
|
invocation=json.dumps({"url": url, "params": query_params})
|
|
@@ -165,7 +193,7 @@ class ListActiveMetrics(BaseDatadogMetricsTool):
|
|
|
165
193
|
f"Failed to query Datadog metrics for params: {params}", exc_info=True
|
|
166
194
|
)
|
|
167
195
|
return StructuredToolResult(
|
|
168
|
-
status=
|
|
196
|
+
status=StructuredToolResultStatus.ERROR,
|
|
169
197
|
error=f"Exception while querying Datadog: {str(e)}",
|
|
170
198
|
params=params,
|
|
171
199
|
)
|
|
@@ -184,7 +212,7 @@ class QueryMetrics(BaseDatadogMetricsTool):
|
|
|
184
212
|
def __init__(self, toolset: "DatadogMetricsToolset"):
|
|
185
213
|
super().__init__(
|
|
186
214
|
name="query_datadog_metrics",
|
|
187
|
-
description="Query timeseries data from Datadog for a specific metric, including historical data for pods no longer in the cluster",
|
|
215
|
+
description="[datadog/metrics toolset] Query timeseries data from Datadog for a specific metric, including historical data for pods no longer in the cluster",
|
|
188
216
|
parameters={
|
|
189
217
|
"query": ToolParameter(
|
|
190
218
|
description="The metric query string (e.g., 'system.cpu.user{host:myhost}')",
|
|
@@ -217,12 +245,10 @@ class QueryMetrics(BaseDatadogMetricsTool):
|
|
|
217
245
|
toolset=toolset,
|
|
218
246
|
)
|
|
219
247
|
|
|
220
|
-
def _invoke(
|
|
221
|
-
self, params: dict, user_approved: bool = False
|
|
222
|
-
) -> StructuredToolResult:
|
|
248
|
+
def _invoke(self, params: dict, context: ToolInvokeContext) -> StructuredToolResult:
|
|
223
249
|
if not self.toolset.dd_config:
|
|
224
250
|
return StructuredToolResult(
|
|
225
|
-
status=
|
|
251
|
+
status=StructuredToolResultStatus.ERROR,
|
|
226
252
|
error=TOOLSET_CONFIG_MISSING_ERROR,
|
|
227
253
|
params=params,
|
|
228
254
|
)
|
|
@@ -261,9 +287,29 @@ class QueryMetrics(BaseDatadogMetricsTool):
|
|
|
261
287
|
output_type = params.get("output_type", "Plain")
|
|
262
288
|
|
|
263
289
|
if not series:
|
|
290
|
+
# Include detailed context in error message
|
|
291
|
+
from_time_param = params.get("from_time")
|
|
292
|
+
to_time_param = params.get("to_time")
|
|
293
|
+
|
|
294
|
+
if from_time_param:
|
|
295
|
+
from_desc = from_time_param
|
|
296
|
+
else:
|
|
297
|
+
from_desc = (
|
|
298
|
+
f"default (last {DEFAULT_TIME_SPAN_SECONDS // 86400} days)"
|
|
299
|
+
)
|
|
300
|
+
|
|
301
|
+
to_desc = to_time_param or "now"
|
|
302
|
+
|
|
303
|
+
error_msg = (
|
|
304
|
+
f"The query returned no data.\n"
|
|
305
|
+
f"Query: {params.get('query', 'not specified')}\n"
|
|
306
|
+
f"Time range: {from_desc} to {to_desc}\n"
|
|
307
|
+
f"Please check your query syntax and ensure data exists for this time range."
|
|
308
|
+
)
|
|
309
|
+
|
|
264
310
|
return StructuredToolResult(
|
|
265
|
-
status=
|
|
266
|
-
error=
|
|
311
|
+
status=StructuredToolResultStatus.NO_DATA,
|
|
312
|
+
error=error_msg,
|
|
267
313
|
params=params,
|
|
268
314
|
)
|
|
269
315
|
|
|
@@ -304,7 +350,6 @@ class QueryMetrics(BaseDatadogMetricsTool):
|
|
|
304
350
|
response_data = {
|
|
305
351
|
"status": "success",
|
|
306
352
|
"error_message": None,
|
|
307
|
-
"random_key": generate_random_key(),
|
|
308
353
|
"tool_name": self.name,
|
|
309
354
|
"description": description,
|
|
310
355
|
"query": query,
|
|
@@ -315,11 +360,18 @@ class QueryMetrics(BaseDatadogMetricsTool):
|
|
|
315
360
|
"data": {"resultType": "matrix", "result": prometheus_result},
|
|
316
361
|
}
|
|
317
362
|
|
|
318
|
-
|
|
363
|
+
url = generate_datadog_metrics_explorer_url(
|
|
364
|
+
self.toolset.dd_config,
|
|
365
|
+
query,
|
|
366
|
+
from_time,
|
|
367
|
+
to_time,
|
|
368
|
+
)
|
|
369
|
+
|
|
319
370
|
return StructuredToolResult(
|
|
320
|
-
status=
|
|
321
|
-
data=
|
|
371
|
+
status=StructuredToolResultStatus.SUCCESS,
|
|
372
|
+
data=response_data,
|
|
322
373
|
params=params,
|
|
374
|
+
url=url,
|
|
323
375
|
)
|
|
324
376
|
|
|
325
377
|
except DataDogRequestError as e:
|
|
@@ -333,10 +385,28 @@ class QueryMetrics(BaseDatadogMetricsTool):
|
|
|
333
385
|
f"and 'timeseries_query' permissions. Error: {str(e)}"
|
|
334
386
|
)
|
|
335
387
|
else:
|
|
336
|
-
|
|
388
|
+
# Include full API error details for better debugging
|
|
389
|
+
error_msg = (
|
|
390
|
+
f"Datadog API error (status {e.status_code}): {e.response_text}"
|
|
391
|
+
)
|
|
392
|
+
if params:
|
|
393
|
+
error_msg += f"\nQuery: {params.get('query', 'not specified')}"
|
|
394
|
+
|
|
395
|
+
from_time_param = params.get("from_time")
|
|
396
|
+
to_time_param = params.get("to_time")
|
|
397
|
+
|
|
398
|
+
if from_time_param:
|
|
399
|
+
from_desc = from_time_param
|
|
400
|
+
else:
|
|
401
|
+
from_desc = (
|
|
402
|
+
f"default (last {DEFAULT_TIME_SPAN_SECONDS // 86400} days)"
|
|
403
|
+
)
|
|
404
|
+
|
|
405
|
+
to_desc = to_time_param or "now"
|
|
406
|
+
error_msg += f"\nTime range: {from_desc} to {to_desc}"
|
|
337
407
|
|
|
338
408
|
return StructuredToolResult(
|
|
339
|
-
status=
|
|
409
|
+
status=StructuredToolResultStatus.ERROR,
|
|
340
410
|
error=error_msg,
|
|
341
411
|
params=params,
|
|
342
412
|
invocation=json.dumps({"url": url, "params": query_params})
|
|
@@ -350,7 +420,7 @@ class QueryMetrics(BaseDatadogMetricsTool):
|
|
|
350
420
|
)
|
|
351
421
|
|
|
352
422
|
return StructuredToolResult(
|
|
353
|
-
status=
|
|
423
|
+
status=StructuredToolResultStatus.ERROR,
|
|
354
424
|
error=f"Exception while querying Datadog: {str(e)}",
|
|
355
425
|
params=params,
|
|
356
426
|
)
|
|
@@ -364,7 +434,7 @@ class QueryMetricsMetadata(BaseDatadogMetricsTool):
|
|
|
364
434
|
def __init__(self, toolset: "DatadogMetricsToolset"):
|
|
365
435
|
super().__init__(
|
|
366
436
|
name="get_datadog_metric_metadata",
|
|
367
|
-
description="Get metadata about one or more metrics including their type, description, unit, and other properties",
|
|
437
|
+
description="[datadog/metrics toolset] Get metadata about one or more metrics including their type, description, unit, and other properties",
|
|
368
438
|
parameters={
|
|
369
439
|
"metric_names": ToolParameter(
|
|
370
440
|
description="Comma-separated list of metric names to get metadata for (e.g., 'system.cpu.user, system.mem.used')",
|
|
@@ -375,12 +445,10 @@ class QueryMetricsMetadata(BaseDatadogMetricsTool):
|
|
|
375
445
|
toolset=toolset,
|
|
376
446
|
)
|
|
377
447
|
|
|
378
|
-
def _invoke(
|
|
379
|
-
self, params: dict, user_approved: bool = False
|
|
380
|
-
) -> StructuredToolResult:
|
|
448
|
+
def _invoke(self, params: dict, context: ToolInvokeContext) -> StructuredToolResult:
|
|
381
449
|
if not self.toolset.dd_config:
|
|
382
450
|
return StructuredToolResult(
|
|
383
|
-
status=
|
|
451
|
+
status=StructuredToolResultStatus.ERROR,
|
|
384
452
|
error=TOOLSET_CONFIG_MISSING_ERROR,
|
|
385
453
|
params=params,
|
|
386
454
|
)
|
|
@@ -396,7 +464,7 @@ class QueryMetricsMetadata(BaseDatadogMetricsTool):
|
|
|
396
464
|
|
|
397
465
|
if not metric_names:
|
|
398
466
|
return StructuredToolResult(
|
|
399
|
-
status=
|
|
467
|
+
status=StructuredToolResultStatus.ERROR,
|
|
400
468
|
error="metric_names cannot be empty",
|
|
401
469
|
params=params,
|
|
402
470
|
)
|
|
@@ -408,10 +476,10 @@ class QueryMetricsMetadata(BaseDatadogMetricsTool):
|
|
|
408
476
|
|
|
409
477
|
for metric_name in metric_names:
|
|
410
478
|
try:
|
|
411
|
-
|
|
479
|
+
api_url = f"{self.toolset.dd_config.site_api_url}/api/v1/metrics/{metric_name}"
|
|
412
480
|
|
|
413
481
|
data = execute_datadog_http_request(
|
|
414
|
-
url=
|
|
482
|
+
url=api_url,
|
|
415
483
|
headers=headers,
|
|
416
484
|
payload_or_params={},
|
|
417
485
|
timeout=self.toolset.dd_config.request_timeout,
|
|
@@ -440,18 +508,29 @@ class QueryMetricsMetadata(BaseDatadogMetricsTool):
|
|
|
440
508
|
"failed": len(errors),
|
|
441
509
|
}
|
|
442
510
|
|
|
511
|
+
# Generate URL for the first metric (or a general metrics page if multiple)
|
|
512
|
+
if metric_names:
|
|
513
|
+
url = generate_datadog_metric_metadata_url(
|
|
514
|
+
self.toolset.dd_config,
|
|
515
|
+
metric_names[0],
|
|
516
|
+
)
|
|
517
|
+
else:
|
|
518
|
+
url = None
|
|
519
|
+
|
|
443
520
|
if not results and errors:
|
|
444
521
|
return StructuredToolResult(
|
|
445
|
-
status=
|
|
522
|
+
status=StructuredToolResultStatus.ERROR,
|
|
446
523
|
error="Failed to retrieve metadata for all metrics",
|
|
447
|
-
data=
|
|
524
|
+
data=response_data,
|
|
448
525
|
params=params,
|
|
526
|
+
url=url,
|
|
449
527
|
)
|
|
450
528
|
|
|
451
529
|
return StructuredToolResult(
|
|
452
|
-
status=
|
|
453
|
-
data=
|
|
530
|
+
status=StructuredToolResultStatus.SUCCESS,
|
|
531
|
+
data=response_data,
|
|
454
532
|
params=params,
|
|
533
|
+
url=url,
|
|
455
534
|
)
|
|
456
535
|
|
|
457
536
|
except Exception as e:
|
|
@@ -461,7 +540,7 @@ class QueryMetricsMetadata(BaseDatadogMetricsTool):
|
|
|
461
540
|
)
|
|
462
541
|
|
|
463
542
|
return StructuredToolResult(
|
|
464
|
-
status=
|
|
543
|
+
status=StructuredToolResultStatus.ERROR,
|
|
465
544
|
error=f"Exception while querying Datadog: {str(e)}",
|
|
466
545
|
params=params,
|
|
467
546
|
)
|
|
@@ -480,7 +559,7 @@ class ListMetricTags(BaseDatadogMetricsTool):
|
|
|
480
559
|
def __init__(self, toolset: "DatadogMetricsToolset"):
|
|
481
560
|
super().__init__(
|
|
482
561
|
name="list_datadog_metric_tags",
|
|
483
|
-
description="List all available tags and aggregations for a specific metric. This helps in building queries by showing what dimensions are available for filtering.",
|
|
562
|
+
description="[datadog/metrics toolset] List all available tags and aggregations for a specific metric. This helps in building queries by showing what dimensions are available for filtering.",
|
|
484
563
|
parameters={
|
|
485
564
|
"metric_name": ToolParameter(
|
|
486
565
|
description="The name of the metric to get tags for (e.g., 'system.cpu.user', 'container.memory.usage')",
|
|
@@ -491,37 +570,41 @@ class ListMetricTags(BaseDatadogMetricsTool):
|
|
|
491
570
|
toolset=toolset,
|
|
492
571
|
)
|
|
493
572
|
|
|
494
|
-
def _invoke(
|
|
495
|
-
self, params: dict, user_approved: bool = False
|
|
496
|
-
) -> StructuredToolResult:
|
|
573
|
+
def _invoke(self, params: dict, context: ToolInvokeContext) -> StructuredToolResult:
|
|
497
574
|
if not self.toolset.dd_config:
|
|
498
575
|
return StructuredToolResult(
|
|
499
|
-
status=
|
|
576
|
+
status=StructuredToolResultStatus.ERROR,
|
|
500
577
|
error=TOOLSET_CONFIG_MISSING_ERROR,
|
|
501
578
|
params=params,
|
|
502
579
|
)
|
|
503
580
|
|
|
504
|
-
|
|
581
|
+
api_url = None
|
|
505
582
|
query_params = None
|
|
506
583
|
|
|
507
584
|
try:
|
|
508
585
|
metric_name = get_param_or_raise(params, "metric_name")
|
|
509
586
|
|
|
510
|
-
|
|
587
|
+
api_url = f"{self.toolset.dd_config.site_api_url}/api/v2/metrics/{metric_name}/active-configurations"
|
|
511
588
|
headers = get_headers(self.toolset.dd_config)
|
|
512
589
|
|
|
513
590
|
data = execute_datadog_http_request(
|
|
514
|
-
url=
|
|
591
|
+
url=api_url,
|
|
515
592
|
headers=headers,
|
|
516
593
|
timeout=self.toolset.dd_config.request_timeout,
|
|
517
594
|
method="GET",
|
|
518
595
|
payload_or_params={},
|
|
519
596
|
)
|
|
520
597
|
|
|
598
|
+
web_url = generate_datadog_metric_tags_url(
|
|
599
|
+
self.toolset.dd_config,
|
|
600
|
+
metric_name,
|
|
601
|
+
)
|
|
602
|
+
|
|
521
603
|
return StructuredToolResult(
|
|
522
|
-
status=
|
|
604
|
+
status=StructuredToolResultStatus.SUCCESS,
|
|
523
605
|
data=data,
|
|
524
606
|
params=params,
|
|
607
|
+
url=web_url,
|
|
525
608
|
)
|
|
526
609
|
|
|
527
610
|
except DataDogRequestError as e:
|
|
@@ -537,14 +620,21 @@ class ListMetricTags(BaseDatadogMetricsTool):
|
|
|
537
620
|
f"permissions. Error: {str(e)}"
|
|
538
621
|
)
|
|
539
622
|
else:
|
|
540
|
-
|
|
623
|
+
# Include full API error details for better debugging
|
|
624
|
+
error_msg = (
|
|
625
|
+
f"Datadog API error (status {e.status_code}): {e.response_text}"
|
|
626
|
+
)
|
|
627
|
+
if params:
|
|
628
|
+
error_msg += (
|
|
629
|
+
f"\nMetric name: {params.get('metric_name', 'not specified')}"
|
|
630
|
+
)
|
|
541
631
|
|
|
542
632
|
return StructuredToolResult(
|
|
543
|
-
status=
|
|
633
|
+
status=StructuredToolResultStatus.ERROR,
|
|
544
634
|
error=error_msg,
|
|
545
635
|
params=params,
|
|
546
|
-
invocation=json.dumps({"url":
|
|
547
|
-
if
|
|
636
|
+
invocation=json.dumps({"url": api_url, "params": query_params})
|
|
637
|
+
if api_url and query_params
|
|
548
638
|
else None,
|
|
549
639
|
)
|
|
550
640
|
|
|
@@ -554,7 +644,7 @@ class ListMetricTags(BaseDatadogMetricsTool):
|
|
|
554
644
|
exc_info=True,
|
|
555
645
|
)
|
|
556
646
|
return StructuredToolResult(
|
|
557
|
-
status=
|
|
647
|
+
status=StructuredToolResultStatus.ERROR,
|
|
558
648
|
error=f"Exception while querying Datadog: {str(e)}",
|
|
559
649
|
params=params,
|
|
560
650
|
)
|
|
@@ -571,7 +661,7 @@ class DatadogMetricsToolset(Toolset):
|
|
|
571
661
|
super().__init__(
|
|
572
662
|
name="datadog/metrics",
|
|
573
663
|
description="Toolset for fetching metrics and metadata from Datadog, including historical data for pods no longer in the cluster",
|
|
574
|
-
docs_url="https://
|
|
664
|
+
docs_url="https://holmesgpt.dev/data-sources/builtin-toolsets/datadog/",
|
|
575
665
|
icon_url="https://imgix.datadoghq.com//img/about/presskit/DDlogo.jpg",
|
|
576
666
|
prerequisites=[CallablePrerequisite(callable=self.prerequisites_callable)],
|
|
577
667
|
tools=[
|
|
@@ -580,14 +670,13 @@ class DatadogMetricsToolset(Toolset):
|
|
|
580
670
|
QueryMetricsMetadata(toolset=self),
|
|
581
671
|
ListMetricTags(toolset=self),
|
|
582
672
|
],
|
|
583
|
-
experimental=True,
|
|
584
673
|
tags=[ToolsetTag.CORE],
|
|
585
674
|
)
|
|
586
675
|
self._reload_instructions()
|
|
587
676
|
|
|
588
677
|
def _perform_healthcheck(self, dd_config: DatadogMetricsConfig) -> Tuple[bool, str]:
|
|
589
678
|
try:
|
|
590
|
-
logging.
|
|
679
|
+
logging.debug("Performing Datadog metrics configuration healthcheck...")
|
|
591
680
|
|
|
592
681
|
url = f"{dd_config.site_api_url}/api/v1/validate"
|
|
593
682
|
headers = get_headers(dd_config)
|
|
@@ -616,7 +705,7 @@ class DatadogMetricsToolset(Toolset):
|
|
|
616
705
|
if not config:
|
|
617
706
|
return (
|
|
618
707
|
False,
|
|
619
|
-
|
|
708
|
+
"Missing config for dd_api_key, dd_app_key, or site_api_url. For details: https://holmesgpt.dev/data-sources/builtin-toolsets/datadog/",
|
|
620
709
|
)
|
|
621
710
|
|
|
622
711
|
try:
|
|
@@ -631,13 +720,12 @@ class DatadogMetricsToolset(Toolset):
|
|
|
631
720
|
return (False, f"Failed to parse Datadog configuration: {str(e)}")
|
|
632
721
|
|
|
633
722
|
def get_example_config(self) -> Dict[str, Any]:
|
|
634
|
-
|
|
635
|
-
|
|
636
|
-
|
|
637
|
-
|
|
638
|
-
|
|
639
|
-
|
|
640
|
-
}
|
|
723
|
+
example_config = DatadogMetricsConfig(
|
|
724
|
+
dd_api_key="<your_datadog_api_key>",
|
|
725
|
+
dd_app_key="<your_datadog_app_key>",
|
|
726
|
+
site_api_url=AnyUrl("https://api.datadoghq.com"),
|
|
727
|
+
)
|
|
728
|
+
return example_config.model_dump(mode="json")
|
|
641
729
|
|
|
642
730
|
def _reload_instructions(self):
|
|
643
731
|
"""Load Datadog metrics specific troubleshooting instructions."""
|