holmesgpt 0.16.2a0__py3-none-any.whl → 0.18.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- holmes/__init__.py +3 -5
- holmes/clients/robusta_client.py +4 -3
- holmes/common/env_vars.py +18 -2
- holmes/common/openshift.py +1 -1
- holmes/config.py +11 -6
- holmes/core/conversations.py +30 -13
- holmes/core/investigation.py +21 -25
- holmes/core/investigation_structured_output.py +3 -3
- holmes/core/issue.py +1 -1
- holmes/core/llm.py +50 -31
- holmes/core/models.py +19 -17
- holmes/core/openai_formatting.py +1 -1
- holmes/core/prompt.py +47 -2
- holmes/core/runbooks.py +1 -0
- holmes/core/safeguards.py +4 -2
- holmes/core/supabase_dal.py +4 -2
- holmes/core/tool_calling_llm.py +102 -141
- holmes/core/tools.py +19 -28
- holmes/core/tools_utils/token_counting.py +9 -2
- holmes/core/tools_utils/tool_context_window_limiter.py +13 -30
- holmes/core/tools_utils/tool_executor.py +0 -18
- holmes/core/tools_utils/toolset_utils.py +1 -0
- holmes/core/toolset_manager.py +37 -2
- holmes/core/tracing.py +13 -2
- holmes/core/transformers/__init__.py +1 -1
- holmes/core/transformers/base.py +1 -0
- holmes/core/transformers/llm_summarize.py +3 -2
- holmes/core/transformers/registry.py +2 -1
- holmes/core/transformers/transformer.py +1 -0
- holmes/core/truncation/compaction.py +37 -2
- holmes/core/truncation/input_context_window_limiter.py +3 -2
- holmes/interactive.py +52 -8
- holmes/main.py +17 -37
- holmes/plugins/interfaces.py +2 -1
- holmes/plugins/prompts/__init__.py +2 -1
- holmes/plugins/prompts/_fetch_logs.jinja2 +5 -5
- holmes/plugins/prompts/_runbook_instructions.jinja2 +2 -1
- holmes/plugins/prompts/base_user_prompt.jinja2 +7 -0
- holmes/plugins/prompts/conversation_history_compaction.jinja2 +2 -1
- holmes/plugins/prompts/generic_ask.jinja2 +0 -2
- holmes/plugins/prompts/generic_ask_conversation.jinja2 +0 -2
- holmes/plugins/prompts/generic_ask_for_issue_conversation.jinja2 +0 -2
- holmes/plugins/prompts/generic_investigation.jinja2 +0 -2
- holmes/plugins/prompts/investigation_procedure.jinja2 +2 -1
- holmes/plugins/prompts/kubernetes_workload_ask.jinja2 +0 -2
- holmes/plugins/prompts/kubernetes_workload_chat.jinja2 +0 -2
- holmes/plugins/runbooks/__init__.py +32 -3
- holmes/plugins/sources/github/__init__.py +4 -2
- holmes/plugins/sources/prometheus/models.py +1 -0
- holmes/plugins/toolsets/__init__.py +30 -26
- holmes/plugins/toolsets/atlas_mongodb/mongodb_atlas.py +13 -12
- holmes/plugins/toolsets/azure_sql/apis/alert_monitoring_api.py +3 -2
- holmes/plugins/toolsets/azure_sql/apis/azure_sql_api.py +2 -1
- holmes/plugins/toolsets/azure_sql/apis/connection_failure_api.py +3 -2
- holmes/plugins/toolsets/azure_sql/apis/connection_monitoring_api.py +3 -1
- holmes/plugins/toolsets/azure_sql/apis/storage_analysis_api.py +3 -1
- holmes/plugins/toolsets/azure_sql/azure_sql_toolset.py +12 -12
- holmes/plugins/toolsets/azure_sql/tools/analyze_connection_failures.py +7 -7
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_connections.py +7 -7
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_health_status.py +3 -5
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_performance.py +3 -3
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_storage.py +7 -7
- holmes/plugins/toolsets/azure_sql/tools/get_active_alerts.py +6 -8
- holmes/plugins/toolsets/azure_sql/tools/get_slow_queries.py +3 -3
- holmes/plugins/toolsets/azure_sql/tools/get_top_cpu_queries.py +3 -3
- holmes/plugins/toolsets/azure_sql/tools/get_top_data_io_queries.py +3 -3
- holmes/plugins/toolsets/azure_sql/tools/get_top_log_io_queries.py +3 -3
- holmes/plugins/toolsets/azure_sql/utils.py +0 -32
- holmes/plugins/toolsets/bash/argocd/__init__.py +3 -3
- holmes/plugins/toolsets/bash/aws/__init__.py +4 -4
- holmes/plugins/toolsets/bash/azure/__init__.py +4 -4
- holmes/plugins/toolsets/bash/bash_toolset.py +2 -3
- holmes/plugins/toolsets/bash/common/bash.py +19 -9
- holmes/plugins/toolsets/bash/common/bash_command.py +1 -1
- holmes/plugins/toolsets/bash/common/stringify.py +1 -1
- holmes/plugins/toolsets/bash/kubectl/__init__.py +2 -1
- holmes/plugins/toolsets/bash/kubectl/constants.py +0 -1
- holmes/plugins/toolsets/bash/kubectl/kubectl_get.py +3 -4
- holmes/plugins/toolsets/bash/parse_command.py +12 -13
- holmes/plugins/toolsets/connectivity_check.py +124 -0
- holmes/plugins/toolsets/coralogix/api.py +132 -119
- holmes/plugins/toolsets/coralogix/coralogix.jinja2 +14 -0
- holmes/plugins/toolsets/coralogix/toolset_coralogix.py +219 -0
- holmes/plugins/toolsets/coralogix/utils.py +15 -79
- holmes/plugins/toolsets/datadog/datadog_api.py +36 -3
- holmes/plugins/toolsets/datadog/datadog_logs_instructions.jinja2 +34 -1
- holmes/plugins/toolsets/datadog/datadog_metrics_instructions.jinja2 +3 -3
- holmes/plugins/toolsets/datadog/datadog_models.py +59 -0
- holmes/plugins/toolsets/datadog/datadog_url_utils.py +213 -0
- holmes/plugins/toolsets/datadog/instructions_datadog_traces.jinja2 +165 -28
- holmes/plugins/toolsets/datadog/toolset_datadog_general.py +71 -28
- holmes/plugins/toolsets/datadog/toolset_datadog_logs.py +224 -375
- holmes/plugins/toolsets/datadog/toolset_datadog_metrics.py +67 -36
- holmes/plugins/toolsets/datadog/toolset_datadog_traces.py +360 -343
- holmes/plugins/toolsets/elasticsearch/__init__.py +6 -0
- holmes/plugins/toolsets/elasticsearch/elasticsearch.py +834 -0
- holmes/plugins/toolsets/git.py +7 -8
- holmes/plugins/toolsets/grafana/base_grafana_toolset.py +16 -4
- holmes/plugins/toolsets/grafana/common.py +2 -30
- holmes/plugins/toolsets/grafana/grafana_tempo_api.py +2 -1
- holmes/plugins/toolsets/grafana/loki/instructions.jinja2 +18 -2
- holmes/plugins/toolsets/grafana/loki/toolset_grafana_loki.py +92 -18
- holmes/plugins/toolsets/grafana/loki_api.py +4 -0
- holmes/plugins/toolsets/grafana/toolset_grafana.py +109 -25
- holmes/plugins/toolsets/grafana/toolset_grafana_dashboard.jinja2 +22 -0
- holmes/plugins/toolsets/grafana/toolset_grafana_tempo.py +201 -33
- holmes/plugins/toolsets/grafana/trace_parser.py +3 -2
- holmes/plugins/toolsets/internet/internet.py +10 -10
- holmes/plugins/toolsets/internet/notion.py +5 -6
- holmes/plugins/toolsets/investigator/core_investigation.py +3 -3
- holmes/plugins/toolsets/investigator/model.py +3 -1
- holmes/plugins/toolsets/json_filter_mixin.py +134 -0
- holmes/plugins/toolsets/kafka.py +12 -7
- holmes/plugins/toolsets/kubernetes.yaml +260 -30
- holmes/plugins/toolsets/kubernetes_logs.py +3 -3
- holmes/plugins/toolsets/logging_utils/logging_api.py +16 -6
- holmes/plugins/toolsets/mcp/toolset_mcp.py +88 -60
- holmes/plugins/toolsets/newrelic/new_relic_api.py +41 -1
- holmes/plugins/toolsets/newrelic/newrelic.jinja2 +24 -0
- holmes/plugins/toolsets/newrelic/newrelic.py +212 -55
- holmes/plugins/toolsets/prometheus/prometheus.py +358 -102
- holmes/plugins/toolsets/prometheus/prometheus_instructions.jinja2 +11 -3
- holmes/plugins/toolsets/rabbitmq/api.py +23 -4
- holmes/plugins/toolsets/rabbitmq/toolset_rabbitmq.py +5 -5
- holmes/plugins/toolsets/robusta/robusta.py +5 -5
- holmes/plugins/toolsets/runbook/runbook_fetcher.py +25 -6
- holmes/plugins/toolsets/servicenow_tables/servicenow_tables.py +1 -1
- holmes/plugins/toolsets/utils.py +1 -1
- holmes/utils/config_utils.py +1 -1
- holmes/utils/connection_utils.py +31 -0
- holmes/utils/console/result.py +10 -0
- holmes/utils/file_utils.py +2 -1
- holmes/utils/global_instructions.py +10 -26
- holmes/utils/holmes_status.py +4 -3
- holmes/utils/log.py +15 -0
- holmes/utils/markdown_utils.py +2 -3
- holmes/utils/memory_limit.py +58 -0
- holmes/utils/sentry_helper.py +23 -0
- holmes/utils/stream.py +12 -5
- holmes/utils/tags.py +4 -3
- holmes/version.py +3 -1
- {holmesgpt-0.16.2a0.dist-info → holmesgpt-0.18.4.dist-info}/METADATA +12 -10
- holmesgpt-0.18.4.dist-info/RECORD +258 -0
- holmes/plugins/toolsets/aws.yaml +0 -80
- holmes/plugins/toolsets/coralogix/toolset_coralogix_logs.py +0 -114
- holmes/plugins/toolsets/datadog/datadog_traces_formatter.py +0 -310
- holmes/plugins/toolsets/datadog/toolset_datadog_rds.py +0 -736
- holmes/plugins/toolsets/grafana/grafana_api.py +0 -64
- holmes/plugins/toolsets/opensearch/__init__.py +0 -0
- holmes/plugins/toolsets/opensearch/opensearch.py +0 -250
- holmes/plugins/toolsets/opensearch/opensearch_logs.py +0 -161
- holmes/plugins/toolsets/opensearch/opensearch_traces.py +0 -215
- holmes/plugins/toolsets/opensearch/opensearch_traces_instructions.jinja2 +0 -12
- holmes/plugins/toolsets/opensearch/opensearch_utils.py +0 -166
- holmes/utils/keygen_utils.py +0 -6
- holmesgpt-0.16.2a0.dist-info/RECORD +0 -258
- holmes/plugins/toolsets/{opensearch → elasticsearch}/opensearch_ppl_query_docs.jinja2 +0 -0
- holmes/plugins/toolsets/{opensearch → elasticsearch}/opensearch_query_assist.py +2 -2
- /holmes/plugins/toolsets/{opensearch → elasticsearch}/opensearch_query_assist_instructions.jinja2 +0 -0
- {holmesgpt-0.16.2a0.dist-info → holmesgpt-0.18.4.dist-info}/LICENSE +0 -0
- {holmesgpt-0.16.2a0.dist-info → holmesgpt-0.18.4.dist-info}/WHEEL +0 -0
- {holmesgpt-0.16.2a0.dist-info → holmesgpt-0.18.4.dist-info}/entry_points.txt +0 -0
|
@@ -1,219 +1,69 @@
|
|
|
1
|
-
import os
|
|
2
|
-
from enum import Enum
|
|
3
1
|
import json
|
|
4
2
|
import logging
|
|
5
|
-
|
|
6
|
-
from
|
|
3
|
+
import os
|
|
4
|
+
from typing import Any, Dict, Optional, Tuple
|
|
5
|
+
|
|
6
|
+
from pydantic import AnyUrl
|
|
7
|
+
|
|
7
8
|
from holmes.core.tools import (
|
|
8
9
|
CallablePrerequisite,
|
|
10
|
+
StructuredToolResult,
|
|
11
|
+
StructuredToolResultStatus,
|
|
12
|
+
Tool,
|
|
13
|
+
ToolInvokeContext,
|
|
14
|
+
ToolParameter,
|
|
9
15
|
ToolsetTag,
|
|
10
16
|
)
|
|
11
|
-
from
|
|
12
|
-
from holmes.core.tools import StructuredToolResult, StructuredToolResultStatus
|
|
13
|
-
from holmes.plugins.toolsets.consts import TOOLSET_CONFIG_MISSING_ERROR
|
|
17
|
+
from holmes.plugins.toolsets.consts import STANDARD_END_DATETIME_TOOL_PARAM_DESCRIPTION
|
|
14
18
|
from holmes.plugins.toolsets.datadog.datadog_api import (
|
|
15
|
-
|
|
19
|
+
MAX_RETRY_COUNT_ON_RATE_LIMIT,
|
|
16
20
|
DataDogRequestError,
|
|
17
|
-
|
|
21
|
+
execute_datadog_http_request,
|
|
18
22
|
get_headers,
|
|
19
|
-
MAX_RETRY_COUNT_ON_RATE_LIMIT,
|
|
20
|
-
enhance_error_message,
|
|
21
|
-
preprocess_time_fields,
|
|
22
23
|
)
|
|
24
|
+
from holmes.plugins.toolsets.datadog.datadog_models import (
|
|
25
|
+
DatadogLogsConfig,
|
|
26
|
+
)
|
|
27
|
+
from holmes.plugins.toolsets.datadog.datadog_url_utils import generate_datadog_logs_url
|
|
23
28
|
from holmes.plugins.toolsets.logging_utils.logging_api import (
|
|
24
|
-
DEFAULT_TIME_SPAN_SECONDS,
|
|
25
29
|
DEFAULT_LOG_LIMIT,
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
+
DEFAULT_TIME_SPAN_SECONDS,
|
|
31
|
+
Toolset,
|
|
32
|
+
)
|
|
33
|
+
from holmes.plugins.toolsets.utils import (
|
|
34
|
+
process_timestamps_to_int,
|
|
35
|
+
standard_start_datetime_tool_param_description,
|
|
36
|
+
toolset_name_for_one_liner,
|
|
30
37
|
)
|
|
31
|
-
from holmes.plugins.toolsets.utils import process_timestamps_to_rfc3339
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
class DataDogLabelsMapping(BaseModel):
|
|
35
|
-
pod: str = "pod_name"
|
|
36
|
-
namespace: str = "kube_namespace"
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
class DataDogStorageTier(str, Enum):
|
|
40
|
-
INDEXES = "indexes"
|
|
41
|
-
ONLINE_ARCHIVES = "online-archives"
|
|
42
|
-
FLEX = "flex"
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
DEFAULT_STORAGE_TIERS = [DataDogStorageTier.INDEXES]
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
class DatadogLogsConfig(DatadogBaseConfig):
|
|
49
|
-
indexes: list[str] = ["*"]
|
|
50
|
-
# Ordered list of storage tiers. Works as fallback. Subsequent tiers are queried only if the previous tier yielded no result
|
|
51
|
-
storage_tiers: list[DataDogStorageTier] = Field(
|
|
52
|
-
default=DEFAULT_STORAGE_TIERS, min_length=1
|
|
53
|
-
)
|
|
54
|
-
labels: DataDogLabelsMapping = DataDogLabelsMapping()
|
|
55
|
-
page_size: int = 300
|
|
56
|
-
default_limit: int = DEFAULT_LOG_LIMIT
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
def calculate_page_size(
|
|
60
|
-
params: FetchPodLogsParams, dd_config: DatadogLogsConfig, logs: list
|
|
61
|
-
) -> int:
|
|
62
|
-
logs_count = len(logs)
|
|
63
|
-
|
|
64
|
-
max_logs_count = dd_config.default_limit
|
|
65
|
-
if params.limit:
|
|
66
|
-
max_logs_count = params.limit
|
|
67
|
-
|
|
68
|
-
return min(dd_config.page_size, max(0, max_logs_count - logs_count))
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
def fetch_paginated_logs(
|
|
72
|
-
params: FetchPodLogsParams,
|
|
73
|
-
dd_config: DatadogLogsConfig,
|
|
74
|
-
storage_tier: DataDogStorageTier,
|
|
75
|
-
) -> list[dict]:
|
|
76
|
-
limit = params.limit or dd_config.default_limit
|
|
77
|
-
|
|
78
|
-
(from_time, to_time) = process_timestamps_to_rfc3339(
|
|
79
|
-
start_timestamp=params.start_time,
|
|
80
|
-
end_timestamp=params.end_time,
|
|
81
|
-
default_time_span_seconds=DEFAULT_TIME_SPAN_SECONDS,
|
|
82
|
-
)
|
|
83
|
-
|
|
84
|
-
url = f"{dd_config.site_api_url}/api/v2/logs/events/search"
|
|
85
|
-
headers = get_headers(dd_config)
|
|
86
|
-
|
|
87
|
-
query = f"{dd_config.labels.namespace}:{params.namespace}"
|
|
88
|
-
query += f" {dd_config.labels.pod}:{params.pod_name}"
|
|
89
|
-
if params.filter:
|
|
90
|
-
filter = params.filter.replace('"', '\\"')
|
|
91
|
-
query += f' "{filter}"'
|
|
92
|
-
|
|
93
|
-
payload: Dict[str, Any] = {
|
|
94
|
-
"filter": {
|
|
95
|
-
"from": from_time,
|
|
96
|
-
"to": to_time,
|
|
97
|
-
"query": query,
|
|
98
|
-
"indexes": dd_config.indexes,
|
|
99
|
-
"storage_tier": storage_tier.value,
|
|
100
|
-
},
|
|
101
|
-
"sort": "-timestamp",
|
|
102
|
-
"page": {"limit": calculate_page_size(params, dd_config, [])},
|
|
103
|
-
}
|
|
104
|
-
|
|
105
|
-
# Preprocess time fields to ensure correct format
|
|
106
|
-
processed_payload = preprocess_time_fields(payload, "/api/v2/logs/events/search")
|
|
107
|
-
|
|
108
|
-
logs, cursor = execute_paginated_datadog_http_request(
|
|
109
|
-
url=url,
|
|
110
|
-
headers=headers,
|
|
111
|
-
payload_or_params=processed_payload,
|
|
112
|
-
timeout=dd_config.request_timeout,
|
|
113
|
-
)
|
|
114
|
-
|
|
115
|
-
while cursor and len(logs) < limit:
|
|
116
|
-
processed_payload["page"]["cursor"] = cursor
|
|
117
|
-
processed_payload["page"]["limit"] = calculate_page_size(
|
|
118
|
-
params, dd_config, logs
|
|
119
|
-
)
|
|
120
|
-
new_logs, cursor = execute_paginated_datadog_http_request(
|
|
121
|
-
url=url,
|
|
122
|
-
headers=headers,
|
|
123
|
-
payload_or_params=processed_payload,
|
|
124
|
-
timeout=dd_config.request_timeout,
|
|
125
|
-
)
|
|
126
|
-
logs += new_logs
|
|
127
|
-
|
|
128
|
-
# logs are fetched descending order. Unified logging API follows the pattern of kubectl logs where oldest logs are first
|
|
129
|
-
logs.reverse()
|
|
130
|
-
|
|
131
|
-
if len(logs) > limit:
|
|
132
|
-
logs = logs[-limit:]
|
|
133
|
-
return logs
|
|
134
38
|
|
|
135
39
|
|
|
136
40
|
def format_logs(raw_logs: list[dict]) -> str:
|
|
41
|
+
# Use similar structure to Datadog Log Explorer
|
|
137
42
|
logs = []
|
|
138
43
|
|
|
139
44
|
for raw_log_item in raw_logs:
|
|
140
|
-
|
|
141
|
-
timestamp = raw_log_item.get("attributes", {}).get("timestamp", "")
|
|
142
|
-
if not timestamp:
|
|
143
|
-
# Fallback to @timestamp if timestamp is not in attributes
|
|
144
|
-
timestamp = raw_log_item.get("attributes", {}).get("@timestamp", "")
|
|
145
|
-
|
|
146
|
-
# Extract message
|
|
147
|
-
message = raw_log_item.get("attributes", {}).get(
|
|
148
|
-
"message", json.dumps(raw_log_item)
|
|
149
|
-
)
|
|
45
|
+
attrs = raw_log_item.get("attributes", {})
|
|
150
46
|
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
47
|
+
timestamp = attrs.get("timestamp") or attrs.get("@timestamp", "")
|
|
48
|
+
host = attrs.get("host", "")
|
|
49
|
+
service = attrs.get("service", "")
|
|
50
|
+
status = attrs.get("attributes", {}).get("status") or attrs.get("status", "")
|
|
51
|
+
message = attrs.get("message", json.dumps(raw_log_item))
|
|
52
|
+
tags = attrs.get("tags", [])
|
|
156
53
|
|
|
157
|
-
|
|
54
|
+
pod_name_tag = next((t for t in tags if t.startswith("pod_")), "")
|
|
158
55
|
|
|
56
|
+
log_line = f"{timestamp} {host} {pod_name_tag} {service} {status} {message}"
|
|
57
|
+
logs.append(log_line)
|
|
159
58
|
|
|
160
|
-
|
|
161
|
-
dd_config: DatadogLogsConfig,
|
|
162
|
-
params: FetchPodLogsParams,
|
|
163
|
-
storage_tier: DataDogStorageTier,
|
|
164
|
-
) -> str:
|
|
165
|
-
"""Generate a Datadog web UI URL for the logs query."""
|
|
166
|
-
from holmes.plugins.toolsets.utils import process_timestamps_to_int
|
|
167
|
-
from holmes.plugins.toolsets.datadog.datadog_api import convert_api_url_to_app_url
|
|
168
|
-
|
|
169
|
-
# Convert API URL to app URL using the shared helper
|
|
170
|
-
base_url = convert_api_url_to_app_url(dd_config.site_api_url)
|
|
171
|
-
|
|
172
|
-
# Build the query string
|
|
173
|
-
query = f"{dd_config.labels.namespace}:{params.namespace}"
|
|
174
|
-
query += f" {dd_config.labels.pod}:{params.pod_name}"
|
|
175
|
-
if params.filter:
|
|
176
|
-
filter = params.filter.replace('"', '\\"')
|
|
177
|
-
query += f' "{filter}"'
|
|
178
|
-
|
|
179
|
-
# Process timestamps - get Unix timestamps in seconds
|
|
180
|
-
(from_time_seconds, to_time_seconds) = process_timestamps_to_int(
|
|
181
|
-
start=params.start_time,
|
|
182
|
-
end=params.end_time,
|
|
183
|
-
default_time_span_seconds=DEFAULT_TIME_SPAN_SECONDS,
|
|
184
|
-
)
|
|
185
|
-
|
|
186
|
-
# Convert to milliseconds for Datadog web UI
|
|
187
|
-
from_time_ms = from_time_seconds * 1000
|
|
188
|
-
to_time_ms = to_time_seconds * 1000
|
|
189
|
-
|
|
190
|
-
# Build URL parameters matching Datadog's web UI format
|
|
191
|
-
url_params = {
|
|
192
|
-
"query": query,
|
|
193
|
-
"from_ts": str(from_time_ms),
|
|
194
|
-
"to_ts": str(to_time_ms),
|
|
195
|
-
"live": "true",
|
|
196
|
-
"storage": storage_tier.value,
|
|
197
|
-
}
|
|
198
|
-
|
|
199
|
-
# Add indexes if not default
|
|
200
|
-
if dd_config.indexes != ["*"]:
|
|
201
|
-
url_params["index"] = ",".join(dd_config.indexes)
|
|
59
|
+
return "\n".join(logs)
|
|
202
60
|
|
|
203
|
-
# Construct the full URL
|
|
204
|
-
return f"{base_url}/logs?{urlencode(url_params)}"
|
|
205
61
|
|
|
62
|
+
class DatadogLogsToolset(Toolset):
|
|
63
|
+
"""Toolset for working with Datadog logs data."""
|
|
206
64
|
|
|
207
|
-
class DatadogLogsToolset(BasePodLoggingToolset):
|
|
208
65
|
dd_config: Optional[DatadogLogsConfig] = None
|
|
209
66
|
|
|
210
|
-
@property
|
|
211
|
-
def supported_capabilities(self) -> Set[LoggingCapability]:
|
|
212
|
-
"""Datadog logs API supports historical data and substring matching"""
|
|
213
|
-
return {
|
|
214
|
-
LoggingCapability.HISTORICAL_DATA
|
|
215
|
-
} # No regex support, no exclude filter, but supports historical data
|
|
216
|
-
|
|
217
67
|
def __init__(self):
|
|
218
68
|
super().__init__(
|
|
219
69
|
name="datadog/logs",
|
|
@@ -225,201 +75,50 @@ class DatadogLogsToolset(BasePodLoggingToolset):
|
|
|
225
75
|
tags=[ToolsetTag.CORE],
|
|
226
76
|
)
|
|
227
77
|
# Now that parent is initialized and self.name exists, create the tool
|
|
228
|
-
self.tools = [
|
|
78
|
+
self.tools = [GetLogs(toolset=self)]
|
|
229
79
|
self._reload_instructions()
|
|
230
80
|
|
|
231
|
-
def
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
def fetch_pod_logs(self, params: FetchPodLogsParams) -> StructuredToolResult:
|
|
81
|
+
def _perform_healthcheck(self) -> Tuple[bool, str]:
|
|
82
|
+
"""Perform health check on Datadog logs API."""
|
|
235
83
|
if not self.dd_config:
|
|
236
|
-
return
|
|
237
|
-
status=StructuredToolResultStatus.ERROR,
|
|
238
|
-
data=TOOLSET_CONFIG_MISSING_ERROR,
|
|
239
|
-
params=params.model_dump(),
|
|
240
|
-
)
|
|
241
|
-
|
|
84
|
+
return False, "Datadog configuration not initialized"
|
|
242
85
|
try:
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
# Generate Datadog web UI URL
|
|
252
|
-
datadog_url = generate_datadog_logs_url(
|
|
253
|
-
self.dd_config, params, storage_tier
|
|
254
|
-
)
|
|
255
|
-
logs_with_link = f"{logs_str}\n\nView in Datadog: {datadog_url}"
|
|
256
|
-
return StructuredToolResult(
|
|
257
|
-
status=StructuredToolResultStatus.SUCCESS,
|
|
258
|
-
data=logs_with_link,
|
|
259
|
-
url=datadog_url,
|
|
260
|
-
params=params.model_dump(),
|
|
261
|
-
)
|
|
262
|
-
|
|
263
|
-
# Include detailed diagnostic context
|
|
264
|
-
query = f"{self.dd_config.labels.namespace}:{params.namespace} {self.dd_config.labels.pod}:{params.pod_name}"
|
|
265
|
-
if params.filter:
|
|
266
|
-
query += f' "{params.filter}"'
|
|
267
|
-
|
|
268
|
-
# Get actual time range used
|
|
269
|
-
(from_time, to_time) = process_timestamps_to_rfc3339(
|
|
270
|
-
start_timestamp=params.start_time,
|
|
271
|
-
end_timestamp=params.end_time,
|
|
272
|
-
default_time_span_seconds=DEFAULT_TIME_SPAN_SECONDS,
|
|
273
|
-
)
|
|
274
|
-
|
|
275
|
-
# Generate Datadog web UI URL for the last storage tier checked
|
|
276
|
-
datadog_url = generate_datadog_logs_url(
|
|
277
|
-
self.dd_config, params, self.dd_config.storage_tiers[-1]
|
|
278
|
-
)
|
|
279
|
-
|
|
280
|
-
# Build diagnostic information
|
|
281
|
-
diagnostics: Dict[str, Any] = {
|
|
282
|
-
"query_executed": query,
|
|
283
|
-
"time_range": f"{from_time} to {to_time}",
|
|
284
|
-
"indexes_searched": self.dd_config.indexes,
|
|
285
|
-
"storage_tiers_checked": [
|
|
286
|
-
tier.value for tier in self.dd_config.storage_tiers
|
|
287
|
-
],
|
|
288
|
-
"field_mappings": {
|
|
289
|
-
"namespace_field": self.dd_config.labels.namespace,
|
|
290
|
-
"pod_field": self.dd_config.labels.pod,
|
|
86
|
+
logging.info("Performing Datadog logs configuration healthcheck...")
|
|
87
|
+
headers = get_headers(self.dd_config)
|
|
88
|
+
payload = {
|
|
89
|
+
"filter": {
|
|
90
|
+
"from": "now-1m",
|
|
91
|
+
"to": "now",
|
|
92
|
+
"query": "*",
|
|
93
|
+
"indexes": self.dd_config.indexes,
|
|
291
94
|
},
|
|
292
|
-
"
|
|
293
|
-
"datadog_url": datadog_url,
|
|
95
|
+
"page": {"limit": 1},
|
|
294
96
|
}
|
|
295
97
|
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
f"Indexes searched: {diagnostics['indexes_searched']}\n"
|
|
304
|
-
f"Storage tiers checked: {', '.join(str(tier) for tier in diagnostics.get('storage_tiers_checked', []))}\n"
|
|
305
|
-
f"Field mappings:\n"
|
|
306
|
-
f" - Namespace field: {diagnostics.get('field_mappings', {}).get('namespace_field', 'N/A')}\n"
|
|
307
|
-
f" - Pod field: {diagnostics.get('field_mappings', {}).get('pod_field', 'N/A')}\n"
|
|
308
|
-
f"Limit: {diagnostics['limit']}\n\n"
|
|
309
|
-
f"View in Datadog: {diagnostics['datadog_url']}"
|
|
98
|
+
search_url = f"{self.dd_config.site_api_url}/api/v2/logs/events/search"
|
|
99
|
+
execute_datadog_http_request(
|
|
100
|
+
url=search_url,
|
|
101
|
+
headers=headers,
|
|
102
|
+
payload_or_params=payload,
|
|
103
|
+
timeout=self.dd_config.request_timeout,
|
|
104
|
+
method="POST",
|
|
310
105
|
)
|
|
311
106
|
|
|
312
|
-
return
|
|
313
|
-
status=StructuredToolResultStatus.NO_DATA,
|
|
314
|
-
error=error_msg,
|
|
315
|
-
url=datadog_url,
|
|
316
|
-
params=params.model_dump(),
|
|
317
|
-
)
|
|
107
|
+
return True, ""
|
|
318
108
|
|
|
319
109
|
except DataDogRequestError as e:
|
|
320
|
-
logging.
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
except Exception:
|
|
328
|
-
datadog_url = None
|
|
329
|
-
|
|
330
|
-
# Provide more specific error message for rate limiting failures
|
|
331
|
-
if e.status_code == 429:
|
|
332
|
-
error_msg = f"Datadog API rate limit exceeded. Failed after {MAX_RETRY_COUNT_ON_RATE_LIMIT} retry attempts."
|
|
333
|
-
if datadog_url:
|
|
334
|
-
error_msg += f"\nView in Datadog: {datadog_url}"
|
|
335
|
-
elif e.status_code == 400:
|
|
336
|
-
# Use enhanced error message for validation errors
|
|
337
|
-
error_msg = enhance_error_message(
|
|
338
|
-
e,
|
|
339
|
-
"/api/v2/logs/events/search",
|
|
340
|
-
"POST",
|
|
341
|
-
str(self.dd_config.site_api_url),
|
|
110
|
+
logging.error(
|
|
111
|
+
f"Datadog API error during healthcheck: {e.status_code} - {e.response_text}"
|
|
112
|
+
)
|
|
113
|
+
if e.status_code == 403:
|
|
114
|
+
return (
|
|
115
|
+
False,
|
|
116
|
+
"API key lacks required permissions. Make sure your API key has 'apm_read' scope.",
|
|
342
117
|
)
|
|
343
|
-
|
|
344
|
-
# Add query context
|
|
345
|
-
query = f"{self.dd_config.labels.namespace}:{params.namespace} {self.dd_config.labels.pod}:{params.pod_name}"
|
|
346
|
-
if params.filter:
|
|
347
|
-
query += f' "{params.filter}"'
|
|
348
|
-
error_msg += f"\n\nQuery attempted: {query}"
|
|
349
|
-
|
|
350
|
-
# Add Datadog web UI URL to error message
|
|
351
|
-
if datadog_url:
|
|
352
|
-
error_msg += f"\nView in Datadog: {datadog_url}"
|
|
353
118
|
else:
|
|
354
|
-
|
|
355
|
-
error_msg = (
|
|
356
|
-
f"Datadog API error (status {e.status_code}): {e.response_text}"
|
|
357
|
-
)
|
|
358
|
-
query = f"{self.dd_config.labels.namespace}:{params.namespace} {self.dd_config.labels.pod}:{params.pod_name}"
|
|
359
|
-
if params.filter:
|
|
360
|
-
query += f' "{params.filter}"'
|
|
361
|
-
error_msg += f"\nQuery: {query}"
|
|
362
|
-
|
|
363
|
-
# Get actual time range used
|
|
364
|
-
(from_time, to_time) = process_timestamps_to_rfc3339(
|
|
365
|
-
start_timestamp=params.start_time,
|
|
366
|
-
end_timestamp=params.end_time,
|
|
367
|
-
default_time_span_seconds=DEFAULT_TIME_SPAN_SECONDS,
|
|
368
|
-
)
|
|
369
|
-
error_msg += f"\nTime range: {from_time} to {to_time}"
|
|
370
|
-
|
|
371
|
-
# Add Datadog web UI URL to error message
|
|
372
|
-
if datadog_url:
|
|
373
|
-
error_msg += f"\nView in Datadog: {datadog_url}"
|
|
374
|
-
|
|
375
|
-
return StructuredToolResult(
|
|
376
|
-
status=StructuredToolResultStatus.ERROR,
|
|
377
|
-
error=error_msg,
|
|
378
|
-
url=datadog_url,
|
|
379
|
-
params=params.model_dump(),
|
|
380
|
-
invocation=json.dumps(e.payload),
|
|
381
|
-
)
|
|
382
|
-
|
|
383
|
-
except Exception as e:
|
|
384
|
-
logging.exception(
|
|
385
|
-
f"Failed to query Datadog logs for params: {params}", exc_info=True
|
|
386
|
-
)
|
|
387
|
-
return StructuredToolResult(
|
|
388
|
-
status=StructuredToolResultStatus.ERROR,
|
|
389
|
-
error=f"Exception while querying Datadog: {str(e)}",
|
|
390
|
-
params=params.model_dump(),
|
|
391
|
-
)
|
|
392
|
-
|
|
393
|
-
def _perform_healthcheck(self) -> Tuple[bool, str]:
|
|
394
|
-
"""
|
|
395
|
-
Perform a healthcheck by fetching a single log from Datadog.
|
|
396
|
-
Returns (success, error_message).
|
|
397
|
-
"""
|
|
398
|
-
try:
|
|
399
|
-
logging.debug("Performing Datadog configuration healthcheck...")
|
|
400
|
-
healthcheck_params = FetchPodLogsParams(
|
|
401
|
-
namespace="*",
|
|
402
|
-
pod_name="*",
|
|
403
|
-
limit=1,
|
|
404
|
-
start_time="-172800", # 48 hours in seconds
|
|
405
|
-
)
|
|
406
|
-
|
|
407
|
-
result = self.fetch_pod_logs(healthcheck_params)
|
|
408
|
-
|
|
409
|
-
if result.status == StructuredToolResultStatus.ERROR:
|
|
410
|
-
error_msg = result.error or "Unknown error during healthcheck"
|
|
411
|
-
logging.error(f"Datadog healthcheck failed: {error_msg}")
|
|
412
|
-
return False, f"Datadog healthcheck failed: {error_msg}"
|
|
413
|
-
elif result.status == StructuredToolResultStatus.NO_DATA:
|
|
414
|
-
error_msg = "No logs were found in the last 48 hours using wildcards for pod and namespace. Is the configuration correct?"
|
|
415
|
-
logging.error(f"Datadog healthcheck failed: {error_msg}")
|
|
416
|
-
return False, f"Datadog healthcheck failed: {error_msg}"
|
|
417
|
-
|
|
418
|
-
logging.info("Datadog healthcheck completed successfully")
|
|
419
|
-
return True, ""
|
|
420
|
-
|
|
119
|
+
return False, f"Datadog API error: {e.status_code} - {e.response_text}"
|
|
421
120
|
except Exception as e:
|
|
422
|
-
logging.exception("Failed during Datadog healthcheck")
|
|
121
|
+
logging.exception("Failed during Datadog traces healthcheck")
|
|
423
122
|
return False, f"Healthcheck failed with exception: {str(e)}"
|
|
424
123
|
|
|
425
124
|
def prerequisites_callable(self, config: dict[str, Any]) -> Tuple[bool, str]:
|
|
@@ -433,7 +132,6 @@ class DatadogLogsToolset(BasePodLoggingToolset):
|
|
|
433
132
|
dd_config = DatadogLogsConfig(**config)
|
|
434
133
|
self.dd_config = dd_config
|
|
435
134
|
|
|
436
|
-
# Perform healthcheck
|
|
437
135
|
success, error_msg = self._perform_healthcheck()
|
|
438
136
|
return success, error_msg
|
|
439
137
|
|
|
@@ -442,11 +140,13 @@ class DatadogLogsToolset(BasePodLoggingToolset):
|
|
|
442
140
|
return (False, f"Failed to parse Datadog configuration: {str(e)}")
|
|
443
141
|
|
|
444
142
|
def get_example_config(self) -> Dict[str, Any]:
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
"
|
|
448
|
-
"
|
|
449
|
-
|
|
143
|
+
"""Get example configuration for this toolset."""
|
|
144
|
+
example_config = DatadogLogsConfig(
|
|
145
|
+
dd_api_key="<your_datadog_api_key>",
|
|
146
|
+
dd_app_key="<your_datadog_app_key>",
|
|
147
|
+
site_api_url=AnyUrl("https://api.datadoghq.com"),
|
|
148
|
+
)
|
|
149
|
+
return example_config.model_dump(mode="json")
|
|
450
150
|
|
|
451
151
|
def _reload_instructions(self):
|
|
452
152
|
"""Load Datadog logs specific troubleshooting instructions."""
|
|
@@ -454,3 +154,152 @@ class DatadogLogsToolset(BasePodLoggingToolset):
|
|
|
454
154
|
os.path.join(os.path.dirname(__file__), "datadog_logs_instructions.jinja2")
|
|
455
155
|
)
|
|
456
156
|
self._load_llm_instructions(jinja_template=f"file://{template_file_path}")
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
class GetLogs(Tool):
|
|
160
|
+
"""Tool to search for logs with specific search query."""
|
|
161
|
+
|
|
162
|
+
toolset: "DatadogLogsToolset"
|
|
163
|
+
name: str = "fetch_datadog_logs"
|
|
164
|
+
description: str = "Search for logs in Datadog using search query syntax"
|
|
165
|
+
"Uses the DataDog api endpoint: POST /api/v2/logs/events/search with 'query' parameter. (e.g., 'service:web-app @http.status_code:500')"
|
|
166
|
+
parameters: Dict[str, ToolParameter] = {
|
|
167
|
+
"query": ToolParameter(
|
|
168
|
+
description="The search query - following the logs search syntax. default: *",
|
|
169
|
+
type="string",
|
|
170
|
+
required=False,
|
|
171
|
+
),
|
|
172
|
+
"start_datetime": ToolParameter(
|
|
173
|
+
description=standard_start_datetime_tool_param_description(
|
|
174
|
+
DEFAULT_TIME_SPAN_SECONDS
|
|
175
|
+
),
|
|
176
|
+
type="string",
|
|
177
|
+
required=False,
|
|
178
|
+
),
|
|
179
|
+
"end_datetime": ToolParameter(
|
|
180
|
+
description=STANDARD_END_DATETIME_TOOL_PARAM_DESCRIPTION,
|
|
181
|
+
type="string",
|
|
182
|
+
required=False,
|
|
183
|
+
),
|
|
184
|
+
"cursor": ToolParameter(
|
|
185
|
+
description="The returned paging point to use to get the next results. IMPORTANT: Cursors are single-use and stateful - never reuse the same cursor value multiple times or parallelize cursor-based calls. Each response provides a new cursor for the subsequent request.",
|
|
186
|
+
type="string",
|
|
187
|
+
required=False,
|
|
188
|
+
),
|
|
189
|
+
"limit": ToolParameter(
|
|
190
|
+
description=f"Maximum number of log records to return. Defaults to {DEFAULT_LOG_LIMIT}. This value is user-configured and represents the maximum allowed limit.",
|
|
191
|
+
type="integer",
|
|
192
|
+
required=False,
|
|
193
|
+
),
|
|
194
|
+
"sort_desc": ToolParameter(
|
|
195
|
+
description="Get the results in descending order. default: true",
|
|
196
|
+
type="boolean",
|
|
197
|
+
required=False,
|
|
198
|
+
),
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
def get_parameterized_one_liner(self, params: dict) -> str:
|
|
202
|
+
"""Get a one-liner description of the tool invocation."""
|
|
203
|
+
return f"{toolset_name_for_one_liner(self.toolset.name)}: Search Logs ({params['query'] if 'query' in params else ''})"
|
|
204
|
+
|
|
205
|
+
def _invoke(self, params: dict, context: ToolInvokeContext) -> StructuredToolResult:
|
|
206
|
+
"""Execute the tool to search logs."""
|
|
207
|
+
if not self.toolset.dd_config:
|
|
208
|
+
return StructuredToolResult(
|
|
209
|
+
status=StructuredToolResultStatus.ERROR,
|
|
210
|
+
error="Datadog configuration not initialized",
|
|
211
|
+
params=params,
|
|
212
|
+
)
|
|
213
|
+
url = None
|
|
214
|
+
payload: Optional[Dict[str, Any]] = None
|
|
215
|
+
try:
|
|
216
|
+
# Process timestamps
|
|
217
|
+
from_time_int, to_time_int = process_timestamps_to_int(
|
|
218
|
+
start=params.get("start_datetime"),
|
|
219
|
+
end=params.get("end_datetime"),
|
|
220
|
+
default_time_span_seconds=DEFAULT_TIME_SPAN_SECONDS,
|
|
221
|
+
)
|
|
222
|
+
|
|
223
|
+
# Convert to milliseconds for Datadog API
|
|
224
|
+
from_time_ms = from_time_int * 1000
|
|
225
|
+
to_time_ms = to_time_int * 1000
|
|
226
|
+
|
|
227
|
+
config_limit = self.toolset.dd_config.default_limit
|
|
228
|
+
limit = min(params.get("limit", config_limit), config_limit)
|
|
229
|
+
params["limit"] = limit
|
|
230
|
+
sort = "timestamp" if params.get("sort_desc", False) else "-timestamp"
|
|
231
|
+
|
|
232
|
+
url = f"{self.toolset.dd_config.site_api_url}/api/v2/logs/events/search"
|
|
233
|
+
headers = get_headers(self.toolset.dd_config)
|
|
234
|
+
|
|
235
|
+
storage = self.toolset.dd_config.storage_tiers[-1]
|
|
236
|
+
payload = {
|
|
237
|
+
"filter": {
|
|
238
|
+
"query": params.get("query", "*"),
|
|
239
|
+
"from": str(from_time_ms),
|
|
240
|
+
"to": str(to_time_ms),
|
|
241
|
+
"storage_tier": storage,
|
|
242
|
+
"indexes": self.toolset.dd_config.indexes,
|
|
243
|
+
},
|
|
244
|
+
"page": {
|
|
245
|
+
"limit": limit,
|
|
246
|
+
},
|
|
247
|
+
"sort": sort,
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
if params.get("cursor"):
|
|
251
|
+
payload["page"]["cursor"] = params["cursor"]
|
|
252
|
+
|
|
253
|
+
response = execute_datadog_http_request(
|
|
254
|
+
url=url,
|
|
255
|
+
headers=headers,
|
|
256
|
+
payload_or_params=payload,
|
|
257
|
+
timeout=self.toolset.dd_config.request_timeout,
|
|
258
|
+
method="POST",
|
|
259
|
+
)
|
|
260
|
+
|
|
261
|
+
if self.toolset.dd_config.compact_logs and response.get("data"):
|
|
262
|
+
response["data"] = format_logs(response["data"])
|
|
263
|
+
|
|
264
|
+
return StructuredToolResult(
|
|
265
|
+
status=StructuredToolResultStatus.SUCCESS,
|
|
266
|
+
data=response,
|
|
267
|
+
params=params,
|
|
268
|
+
url=generate_datadog_logs_url(self.toolset.dd_config, payload),
|
|
269
|
+
)
|
|
270
|
+
|
|
271
|
+
except DataDogRequestError as e:
|
|
272
|
+
logging.exception(e, exc_info=True)
|
|
273
|
+
if e.status_code == 429:
|
|
274
|
+
error_msg = f"Datadog API rate limit exceeded. Failed after {MAX_RETRY_COUNT_ON_RATE_LIMIT} retry attempts."
|
|
275
|
+
elif e.status_code == 403:
|
|
276
|
+
error_msg = (
|
|
277
|
+
f"Permission denied. Ensure your Datadog Application Key has the 'apm_read' "
|
|
278
|
+
f"permission. Error: {str(e)}"
|
|
279
|
+
)
|
|
280
|
+
else:
|
|
281
|
+
error_msg = f"Exception while querying Datadog: {str(e)}"
|
|
282
|
+
|
|
283
|
+
return StructuredToolResult(
|
|
284
|
+
status=StructuredToolResultStatus.ERROR,
|
|
285
|
+
error=error_msg,
|
|
286
|
+
params=params,
|
|
287
|
+
invocation=(
|
|
288
|
+
json.dumps({"url": url, "payload": payload})
|
|
289
|
+
if url and payload
|
|
290
|
+
else None
|
|
291
|
+
),
|
|
292
|
+
)
|
|
293
|
+
|
|
294
|
+
except Exception as e:
|
|
295
|
+
logging.exception(e, exc_info=True)
|
|
296
|
+
return StructuredToolResult(
|
|
297
|
+
status=StructuredToolResultStatus.ERROR,
|
|
298
|
+
error=f"Unexpected error: {str(e)}",
|
|
299
|
+
params=params,
|
|
300
|
+
invocation=(
|
|
301
|
+
json.dumps({"url": url, "payload": payload})
|
|
302
|
+
if url and payload
|
|
303
|
+
else None
|
|
304
|
+
),
|
|
305
|
+
)
|