holmesgpt 0.13.2__py3-none-any.whl → 0.18.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- holmes/__init__.py +3 -5
- holmes/clients/robusta_client.py +20 -6
- holmes/common/env_vars.py +58 -3
- holmes/common/openshift.py +1 -1
- holmes/config.py +123 -148
- holmes/core/conversations.py +71 -15
- holmes/core/feedback.py +191 -0
- holmes/core/investigation.py +31 -39
- holmes/core/investigation_structured_output.py +3 -3
- holmes/core/issue.py +1 -1
- holmes/core/llm.py +508 -88
- holmes/core/models.py +108 -4
- holmes/core/openai_formatting.py +14 -1
- holmes/core/prompt.py +48 -3
- holmes/core/runbooks.py +1 -0
- holmes/core/safeguards.py +8 -6
- holmes/core/supabase_dal.py +295 -100
- holmes/core/tool_calling_llm.py +489 -428
- holmes/core/tools.py +325 -56
- holmes/core/tools_utils/token_counting.py +21 -0
- holmes/core/tools_utils/tool_context_window_limiter.py +40 -0
- holmes/core/tools_utils/tool_executor.py +0 -13
- holmes/core/tools_utils/toolset_utils.py +1 -0
- holmes/core/toolset_manager.py +191 -5
- holmes/core/tracing.py +19 -3
- holmes/core/transformers/__init__.py +23 -0
- holmes/core/transformers/base.py +63 -0
- holmes/core/transformers/llm_summarize.py +175 -0
- holmes/core/transformers/registry.py +123 -0
- holmes/core/transformers/transformer.py +32 -0
- holmes/core/truncation/compaction.py +94 -0
- holmes/core/truncation/dal_truncation_utils.py +23 -0
- holmes/core/truncation/input_context_window_limiter.py +219 -0
- holmes/interactive.py +228 -31
- holmes/main.py +23 -40
- holmes/plugins/interfaces.py +2 -1
- holmes/plugins/prompts/__init__.py +2 -1
- holmes/plugins/prompts/_fetch_logs.jinja2 +31 -6
- holmes/plugins/prompts/_general_instructions.jinja2 +1 -2
- holmes/plugins/prompts/_runbook_instructions.jinja2 +24 -12
- holmes/plugins/prompts/base_user_prompt.jinja2 +7 -0
- holmes/plugins/prompts/conversation_history_compaction.jinja2 +89 -0
- holmes/plugins/prompts/generic_ask.jinja2 +0 -4
- holmes/plugins/prompts/generic_ask_conversation.jinja2 +0 -1
- holmes/plugins/prompts/generic_ask_for_issue_conversation.jinja2 +0 -1
- holmes/plugins/prompts/generic_investigation.jinja2 +0 -1
- holmes/plugins/prompts/investigation_procedure.jinja2 +50 -1
- holmes/plugins/prompts/kubernetes_workload_ask.jinja2 +0 -1
- holmes/plugins/prompts/kubernetes_workload_chat.jinja2 +0 -1
- holmes/plugins/runbooks/__init__.py +145 -17
- holmes/plugins/runbooks/catalog.json +2 -0
- holmes/plugins/sources/github/__init__.py +4 -2
- holmes/plugins/sources/prometheus/models.py +1 -0
- holmes/plugins/toolsets/__init__.py +44 -27
- holmes/plugins/toolsets/aks-node-health.yaml +46 -0
- holmes/plugins/toolsets/aks.yaml +64 -0
- holmes/plugins/toolsets/atlas_mongodb/mongodb_atlas.py +38 -47
- holmes/plugins/toolsets/azure_sql/apis/alert_monitoring_api.py +3 -2
- holmes/plugins/toolsets/azure_sql/apis/azure_sql_api.py +2 -1
- holmes/plugins/toolsets/azure_sql/apis/connection_failure_api.py +3 -2
- holmes/plugins/toolsets/azure_sql/apis/connection_monitoring_api.py +3 -1
- holmes/plugins/toolsets/azure_sql/apis/storage_analysis_api.py +3 -1
- holmes/plugins/toolsets/azure_sql/azure_sql_toolset.py +12 -13
- holmes/plugins/toolsets/azure_sql/tools/analyze_connection_failures.py +15 -12
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_connections.py +15 -12
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_health_status.py +11 -11
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_performance.py +11 -9
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_storage.py +15 -12
- holmes/plugins/toolsets/azure_sql/tools/get_active_alerts.py +15 -15
- holmes/plugins/toolsets/azure_sql/tools/get_slow_queries.py +11 -8
- holmes/plugins/toolsets/azure_sql/tools/get_top_cpu_queries.py +11 -8
- holmes/plugins/toolsets/azure_sql/tools/get_top_data_io_queries.py +11 -8
- holmes/plugins/toolsets/azure_sql/tools/get_top_log_io_queries.py +11 -8
- holmes/plugins/toolsets/azure_sql/utils.py +0 -32
- holmes/plugins/toolsets/bash/argocd/__init__.py +3 -3
- holmes/plugins/toolsets/bash/aws/__init__.py +4 -4
- holmes/plugins/toolsets/bash/azure/__init__.py +4 -4
- holmes/plugins/toolsets/bash/bash_toolset.py +11 -15
- holmes/plugins/toolsets/bash/common/bash.py +23 -13
- holmes/plugins/toolsets/bash/common/bash_command.py +1 -1
- holmes/plugins/toolsets/bash/common/stringify.py +1 -1
- holmes/plugins/toolsets/bash/kubectl/__init__.py +2 -1
- holmes/plugins/toolsets/bash/kubectl/constants.py +0 -1
- holmes/plugins/toolsets/bash/kubectl/kubectl_get.py +3 -4
- holmes/plugins/toolsets/bash/parse_command.py +12 -13
- holmes/plugins/toolsets/cilium.yaml +284 -0
- holmes/plugins/toolsets/connectivity_check.py +124 -0
- holmes/plugins/toolsets/coralogix/api.py +132 -119
- holmes/plugins/toolsets/coralogix/coralogix.jinja2 +14 -0
- holmes/plugins/toolsets/coralogix/toolset_coralogix.py +219 -0
- holmes/plugins/toolsets/coralogix/utils.py +15 -79
- holmes/plugins/toolsets/datadog/datadog_api.py +525 -26
- holmes/plugins/toolsets/datadog/datadog_logs_instructions.jinja2 +55 -11
- holmes/plugins/toolsets/datadog/datadog_metrics_instructions.jinja2 +3 -3
- holmes/plugins/toolsets/datadog/datadog_models.py +59 -0
- holmes/plugins/toolsets/datadog/datadog_url_utils.py +213 -0
- holmes/plugins/toolsets/datadog/instructions_datadog_traces.jinja2 +165 -28
- holmes/plugins/toolsets/datadog/toolset_datadog_general.py +417 -241
- holmes/plugins/toolsets/datadog/toolset_datadog_logs.py +234 -214
- holmes/plugins/toolsets/datadog/toolset_datadog_metrics.py +167 -79
- holmes/plugins/toolsets/datadog/toolset_datadog_traces.py +374 -363
- holmes/plugins/toolsets/elasticsearch/__init__.py +6 -0
- holmes/plugins/toolsets/elasticsearch/elasticsearch.py +834 -0
- holmes/plugins/toolsets/elasticsearch/opensearch_ppl_query_docs.jinja2 +1616 -0
- holmes/plugins/toolsets/elasticsearch/opensearch_query_assist.py +78 -0
- holmes/plugins/toolsets/elasticsearch/opensearch_query_assist_instructions.jinja2 +223 -0
- holmes/plugins/toolsets/git.py +54 -50
- holmes/plugins/toolsets/grafana/base_grafana_toolset.py +16 -4
- holmes/plugins/toolsets/grafana/common.py +13 -29
- holmes/plugins/toolsets/grafana/grafana_tempo_api.py +455 -0
- holmes/plugins/toolsets/grafana/loki/instructions.jinja2 +25 -0
- holmes/plugins/toolsets/grafana/loki/toolset_grafana_loki.py +191 -0
- holmes/plugins/toolsets/grafana/loki_api.py +4 -0
- holmes/plugins/toolsets/grafana/toolset_grafana.py +293 -89
- holmes/plugins/toolsets/grafana/toolset_grafana_dashboard.jinja2 +49 -0
- holmes/plugins/toolsets/grafana/toolset_grafana_tempo.jinja2 +246 -11
- holmes/plugins/toolsets/grafana/toolset_grafana_tempo.py +820 -292
- holmes/plugins/toolsets/grafana/trace_parser.py +4 -3
- holmes/plugins/toolsets/internet/internet.py +15 -16
- holmes/plugins/toolsets/internet/notion.py +9 -11
- holmes/plugins/toolsets/investigator/core_investigation.py +44 -36
- holmes/plugins/toolsets/investigator/model.py +3 -1
- holmes/plugins/toolsets/json_filter_mixin.py +134 -0
- holmes/plugins/toolsets/kafka.py +36 -42
- holmes/plugins/toolsets/kubernetes.yaml +317 -113
- holmes/plugins/toolsets/kubernetes_logs.py +9 -9
- holmes/plugins/toolsets/kubernetes_logs.yaml +32 -0
- holmes/plugins/toolsets/logging_utils/logging_api.py +94 -8
- holmes/plugins/toolsets/mcp/toolset_mcp.py +218 -64
- holmes/plugins/toolsets/newrelic/new_relic_api.py +165 -0
- holmes/plugins/toolsets/newrelic/newrelic.jinja2 +65 -0
- holmes/plugins/toolsets/newrelic/newrelic.py +320 -0
- holmes/plugins/toolsets/openshift.yaml +283 -0
- holmes/plugins/toolsets/prometheus/prometheus.py +1202 -421
- holmes/plugins/toolsets/prometheus/prometheus_instructions.jinja2 +54 -5
- holmes/plugins/toolsets/prometheus/utils.py +28 -0
- holmes/plugins/toolsets/rabbitmq/api.py +23 -4
- holmes/plugins/toolsets/rabbitmq/toolset_rabbitmq.py +13 -14
- holmes/plugins/toolsets/robusta/robusta.py +239 -68
- holmes/plugins/toolsets/robusta/robusta_instructions.jinja2 +26 -9
- holmes/plugins/toolsets/runbook/runbook_fetcher.py +157 -27
- holmes/plugins/toolsets/service_discovery.py +1 -1
- holmes/plugins/toolsets/servicenow_tables/instructions.jinja2 +83 -0
- holmes/plugins/toolsets/servicenow_tables/servicenow_tables.py +426 -0
- holmes/plugins/toolsets/utils.py +88 -0
- holmes/utils/config_utils.py +91 -0
- holmes/utils/connection_utils.py +31 -0
- holmes/utils/console/result.py +10 -0
- holmes/utils/default_toolset_installation_guide.jinja2 +1 -22
- holmes/utils/env.py +7 -0
- holmes/utils/file_utils.py +2 -1
- holmes/utils/global_instructions.py +60 -11
- holmes/utils/holmes_status.py +6 -4
- holmes/utils/holmes_sync_toolsets.py +0 -2
- holmes/utils/krr_utils.py +188 -0
- holmes/utils/log.py +15 -0
- holmes/utils/markdown_utils.py +2 -3
- holmes/utils/memory_limit.py +58 -0
- holmes/utils/sentry_helper.py +64 -0
- holmes/utils/stream.py +69 -8
- holmes/utils/tags.py +4 -3
- holmes/version.py +37 -15
- holmesgpt-0.18.4.dist-info/LICENSE +178 -0
- {holmesgpt-0.13.2.dist-info → holmesgpt-0.18.4.dist-info}/METADATA +35 -31
- holmesgpt-0.18.4.dist-info/RECORD +258 -0
- holmes/core/performance_timing.py +0 -72
- holmes/plugins/toolsets/aws.yaml +0 -80
- holmes/plugins/toolsets/coralogix/toolset_coralogix_logs.py +0 -112
- holmes/plugins/toolsets/datadog/datadog_traces_formatter.py +0 -310
- holmes/plugins/toolsets/datadog/toolset_datadog_rds.py +0 -739
- holmes/plugins/toolsets/grafana/grafana_api.py +0 -42
- holmes/plugins/toolsets/grafana/tempo_api.py +0 -124
- holmes/plugins/toolsets/grafana/toolset_grafana_loki.py +0 -110
- holmes/plugins/toolsets/newrelic.py +0 -231
- holmes/plugins/toolsets/opensearch/opensearch.py +0 -257
- holmes/plugins/toolsets/opensearch/opensearch_logs.py +0 -161
- holmes/plugins/toolsets/opensearch/opensearch_traces.py +0 -218
- holmes/plugins/toolsets/opensearch/opensearch_traces_instructions.jinja2 +0 -12
- holmes/plugins/toolsets/opensearch/opensearch_utils.py +0 -166
- holmes/plugins/toolsets/servicenow/install.md +0 -37
- holmes/plugins/toolsets/servicenow/instructions.jinja2 +0 -3
- holmes/plugins/toolsets/servicenow/servicenow.py +0 -219
- holmes/utils/keygen_utils.py +0 -6
- holmesgpt-0.13.2.dist-info/LICENSE.txt +0 -21
- holmesgpt-0.13.2.dist-info/RECORD +0 -234
- /holmes/plugins/toolsets/{opensearch → newrelic}/__init__.py +0 -0
- {holmesgpt-0.13.2.dist-info → holmesgpt-0.18.4.dist-info}/WHEEL +0 -0
- {holmesgpt-0.13.2.dist-info → holmesgpt-0.18.4.dist-info}/entry_points.txt +0 -0
|
@@ -1,268 +1,137 @@
|
|
|
1
|
-
import os
|
|
2
|
-
from enum import Enum
|
|
3
1
|
import json
|
|
4
2
|
import logging
|
|
5
|
-
|
|
3
|
+
import os
|
|
4
|
+
from typing import Any, Dict, Optional, Tuple
|
|
5
|
+
|
|
6
|
+
from pydantic import AnyUrl
|
|
7
|
+
|
|
6
8
|
from holmes.core.tools import (
|
|
7
9
|
CallablePrerequisite,
|
|
10
|
+
StructuredToolResult,
|
|
11
|
+
StructuredToolResultStatus,
|
|
12
|
+
Tool,
|
|
13
|
+
ToolInvokeContext,
|
|
14
|
+
ToolParameter,
|
|
8
15
|
ToolsetTag,
|
|
9
16
|
)
|
|
10
|
-
from
|
|
11
|
-
from holmes.core.tools import StructuredToolResult, ToolResultStatus
|
|
12
|
-
from holmes.plugins.toolsets.consts import TOOLSET_CONFIG_MISSING_ERROR
|
|
17
|
+
from holmes.plugins.toolsets.consts import STANDARD_END_DATETIME_TOOL_PARAM_DESCRIPTION
|
|
13
18
|
from holmes.plugins.toolsets.datadog.datadog_api import (
|
|
14
|
-
|
|
19
|
+
MAX_RETRY_COUNT_ON_RATE_LIMIT,
|
|
15
20
|
DataDogRequestError,
|
|
16
|
-
|
|
21
|
+
execute_datadog_http_request,
|
|
17
22
|
get_headers,
|
|
18
|
-
MAX_RETRY_COUNT_ON_RATE_LIMIT,
|
|
19
23
|
)
|
|
24
|
+
from holmes.plugins.toolsets.datadog.datadog_models import (
|
|
25
|
+
DatadogLogsConfig,
|
|
26
|
+
)
|
|
27
|
+
from holmes.plugins.toolsets.datadog.datadog_url_utils import generate_datadog_logs_url
|
|
20
28
|
from holmes.plugins.toolsets.logging_utils.logging_api import (
|
|
21
|
-
DEFAULT_TIME_SPAN_SECONDS,
|
|
22
29
|
DEFAULT_LOG_LIMIT,
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
30
|
+
DEFAULT_TIME_SPAN_SECONDS,
|
|
31
|
+
Toolset,
|
|
32
|
+
)
|
|
33
|
+
from holmes.plugins.toolsets.utils import (
|
|
34
|
+
process_timestamps_to_int,
|
|
35
|
+
standard_start_datetime_tool_param_description,
|
|
36
|
+
toolset_name_for_one_liner,
|
|
27
37
|
)
|
|
28
|
-
from holmes.plugins.toolsets.utils import process_timestamps_to_rfc3339
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
class DataDogLabelsMapping(BaseModel):
|
|
32
|
-
pod: str = "pod_name"
|
|
33
|
-
namespace: str = "kube_namespace"
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
class DataDogStorageTier(str, Enum):
|
|
37
|
-
INDEXES = "indexes"
|
|
38
|
-
ONLINE_ARCHIVES = "online-archives"
|
|
39
|
-
FLEX = "flex"
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
DEFAULT_STORAGE_TIERS = [DataDogStorageTier.INDEXES]
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
class DatadogLogsConfig(DatadogBaseConfig):
|
|
46
|
-
indexes: list[str] = ["*"]
|
|
47
|
-
# Ordered list of storage tiers. Works as fallback. Subsequent tiers are queried only if the previous tier yielded no result
|
|
48
|
-
storage_tiers: list[DataDogStorageTier] = Field(
|
|
49
|
-
default=DEFAULT_STORAGE_TIERS, min_length=1
|
|
50
|
-
)
|
|
51
|
-
labels: DataDogLabelsMapping = DataDogLabelsMapping()
|
|
52
|
-
page_size: int = 300
|
|
53
|
-
default_limit: int = DEFAULT_LOG_LIMIT
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
def calculate_page_size(
|
|
57
|
-
params: FetchPodLogsParams, dd_config: DatadogLogsConfig, logs: list
|
|
58
|
-
) -> int:
|
|
59
|
-
logs_count = len(logs)
|
|
60
|
-
|
|
61
|
-
max_logs_count = dd_config.default_limit
|
|
62
|
-
if params.limit:
|
|
63
|
-
max_logs_count = params.limit
|
|
64
|
-
|
|
65
|
-
return min(dd_config.page_size, max(0, max_logs_count - logs_count))
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
def fetch_paginated_logs(
|
|
69
|
-
params: FetchPodLogsParams,
|
|
70
|
-
dd_config: DatadogLogsConfig,
|
|
71
|
-
storage_tier: DataDogStorageTier,
|
|
72
|
-
) -> list[dict]:
|
|
73
|
-
limit = params.limit or dd_config.default_limit
|
|
74
|
-
|
|
75
|
-
(from_time, to_time) = process_timestamps_to_rfc3339(
|
|
76
|
-
start_timestamp=params.start_time,
|
|
77
|
-
end_timestamp=params.end_time,
|
|
78
|
-
default_time_span_seconds=DEFAULT_TIME_SPAN_SECONDS,
|
|
79
|
-
)
|
|
80
|
-
|
|
81
|
-
url = f"{dd_config.site_api_url}/api/v2/logs/events/search"
|
|
82
|
-
headers = get_headers(dd_config)
|
|
83
|
-
|
|
84
|
-
query = f"{dd_config.labels.namespace}:{params.namespace}"
|
|
85
|
-
query += f" {dd_config.labels.pod}:{params.pod_name}"
|
|
86
|
-
if params.filter:
|
|
87
|
-
filter = params.filter.replace('"', '\\"')
|
|
88
|
-
query += f' "{filter}"'
|
|
89
|
-
|
|
90
|
-
payload: Dict[str, Any] = {
|
|
91
|
-
"filter": {
|
|
92
|
-
"from": from_time,
|
|
93
|
-
"to": to_time,
|
|
94
|
-
"query": query,
|
|
95
|
-
"indexes": dd_config.indexes,
|
|
96
|
-
"storage_tier": storage_tier.value,
|
|
97
|
-
},
|
|
98
|
-
"sort": "-timestamp",
|
|
99
|
-
"page": {"limit": calculate_page_size(params, dd_config, [])},
|
|
100
|
-
}
|
|
101
|
-
|
|
102
|
-
logs, cursor = execute_paginated_datadog_http_request(
|
|
103
|
-
url=url,
|
|
104
|
-
headers=headers,
|
|
105
|
-
payload_or_params=payload,
|
|
106
|
-
timeout=dd_config.request_timeout,
|
|
107
|
-
)
|
|
108
|
-
|
|
109
|
-
while cursor and len(logs) < limit:
|
|
110
|
-
payload["page"]["cursor"] = cursor
|
|
111
|
-
new_logs, cursor = execute_paginated_datadog_http_request(
|
|
112
|
-
url=url,
|
|
113
|
-
headers=headers,
|
|
114
|
-
payload_or_params=payload,
|
|
115
|
-
timeout=dd_config.request_timeout,
|
|
116
|
-
)
|
|
117
|
-
logs += new_logs
|
|
118
|
-
payload["page"]["limit"] = calculate_page_size(params, dd_config, logs)
|
|
119
|
-
|
|
120
|
-
# logs are fetched descending order. Unified logging API follows the pattern of kubectl logs where oldest logs are first
|
|
121
|
-
logs.reverse()
|
|
122
|
-
|
|
123
|
-
if len(logs) > limit:
|
|
124
|
-
logs = logs[-limit:]
|
|
125
|
-
return logs
|
|
126
38
|
|
|
127
39
|
|
|
128
40
|
def format_logs(raw_logs: list[dict]) -> str:
|
|
41
|
+
# Use similar structure to Datadog Log Explorer
|
|
129
42
|
logs = []
|
|
130
43
|
|
|
131
44
|
for raw_log_item in raw_logs:
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
)
|
|
135
|
-
|
|
45
|
+
attrs = raw_log_item.get("attributes", {})
|
|
46
|
+
|
|
47
|
+
timestamp = attrs.get("timestamp") or attrs.get("@timestamp", "")
|
|
48
|
+
host = attrs.get("host", "")
|
|
49
|
+
service = attrs.get("service", "")
|
|
50
|
+
status = attrs.get("attributes", {}).get("status") or attrs.get("status", "")
|
|
51
|
+
message = attrs.get("message", json.dumps(raw_log_item))
|
|
52
|
+
tags = attrs.get("tags", [])
|
|
53
|
+
|
|
54
|
+
pod_name_tag = next((t for t in tags if t.startswith("pod_")), "")
|
|
55
|
+
|
|
56
|
+
log_line = f"{timestamp} {host} {pod_name_tag} {service} {status} {message}"
|
|
57
|
+
logs.append(log_line)
|
|
136
58
|
|
|
137
59
|
return "\n".join(logs)
|
|
138
60
|
|
|
139
61
|
|
|
140
|
-
class DatadogLogsToolset(
|
|
141
|
-
|
|
62
|
+
class DatadogLogsToolset(Toolset):
|
|
63
|
+
"""Toolset for working with Datadog logs data."""
|
|
142
64
|
|
|
143
|
-
|
|
144
|
-
def supported_capabilities(self) -> Set[LoggingCapability]:
|
|
145
|
-
"""Datadog logs API supports historical data and substring matching"""
|
|
146
|
-
return {
|
|
147
|
-
LoggingCapability.HISTORICAL_DATA
|
|
148
|
-
} # No regex support, no exclude filter, but supports historical data
|
|
65
|
+
dd_config: Optional[DatadogLogsConfig] = None
|
|
149
66
|
|
|
150
67
|
def __init__(self):
|
|
151
68
|
super().__init__(
|
|
152
69
|
name="datadog/logs",
|
|
153
70
|
description="Toolset for fetching logs from Datadog, including historical data for pods no longer in the cluster",
|
|
154
|
-
docs_url="https://
|
|
71
|
+
docs_url="https://holmesgpt.dev/data-sources/builtin-toolsets/datadog/",
|
|
155
72
|
icon_url="https://imgix.datadoghq.com//img/about/presskit/DDlogo.jpg",
|
|
156
73
|
prerequisites=[CallablePrerequisite(callable=self.prerequisites_callable)],
|
|
157
74
|
tools=[], # Initialize with empty tools first
|
|
158
|
-
experimental=True,
|
|
159
75
|
tags=[ToolsetTag.CORE],
|
|
160
76
|
)
|
|
161
77
|
# Now that parent is initialized and self.name exists, create the tool
|
|
162
|
-
self.tools = [
|
|
78
|
+
self.tools = [GetLogs(toolset=self)]
|
|
163
79
|
self._reload_instructions()
|
|
164
80
|
|
|
165
|
-
def logger_name(self) -> str:
|
|
166
|
-
return "DataDog"
|
|
167
|
-
|
|
168
|
-
def fetch_pod_logs(self, params: FetchPodLogsParams) -> StructuredToolResult:
|
|
169
|
-
if not self.dd_config:
|
|
170
|
-
return StructuredToolResult(
|
|
171
|
-
status=ToolResultStatus.ERROR,
|
|
172
|
-
data=TOOLSET_CONFIG_MISSING_ERROR,
|
|
173
|
-
params=params.model_dump(),
|
|
174
|
-
)
|
|
175
|
-
|
|
176
|
-
try:
|
|
177
|
-
raw_logs = []
|
|
178
|
-
for storage_tier in self.dd_config.storage_tiers:
|
|
179
|
-
raw_logs = fetch_paginated_logs(
|
|
180
|
-
params, self.dd_config, storage_tier=storage_tier
|
|
181
|
-
)
|
|
182
|
-
|
|
183
|
-
if raw_logs:
|
|
184
|
-
logs_str = format_logs(raw_logs)
|
|
185
|
-
return StructuredToolResult(
|
|
186
|
-
status=ToolResultStatus.SUCCESS,
|
|
187
|
-
data=logs_str,
|
|
188
|
-
params=params.model_dump(),
|
|
189
|
-
)
|
|
190
|
-
|
|
191
|
-
return StructuredToolResult(
|
|
192
|
-
status=ToolResultStatus.NO_DATA,
|
|
193
|
-
params=params.model_dump(),
|
|
194
|
-
)
|
|
195
|
-
|
|
196
|
-
except DataDogRequestError as e:
|
|
197
|
-
logging.exception(e, exc_info=True)
|
|
198
|
-
|
|
199
|
-
# Provide more specific error message for rate limiting failures
|
|
200
|
-
if e.status_code == 429:
|
|
201
|
-
error_msg = f"Datadog API rate limit exceeded. Failed after {MAX_RETRY_COUNT_ON_RATE_LIMIT} retry attempts."
|
|
202
|
-
else:
|
|
203
|
-
error_msg = f"Exception while querying Datadog: {str(e)}"
|
|
204
|
-
|
|
205
|
-
return StructuredToolResult(
|
|
206
|
-
status=ToolResultStatus.ERROR,
|
|
207
|
-
error=error_msg,
|
|
208
|
-
params=params.model_dump(),
|
|
209
|
-
invocation=json.dumps(e.payload),
|
|
210
|
-
)
|
|
211
|
-
|
|
212
|
-
except Exception as e:
|
|
213
|
-
logging.exception(
|
|
214
|
-
f"Failed to query Datadog logs for params: {params}", exc_info=True
|
|
215
|
-
)
|
|
216
|
-
return StructuredToolResult(
|
|
217
|
-
status=ToolResultStatus.ERROR,
|
|
218
|
-
error=f"Exception while querying Datadog: {str(e)}",
|
|
219
|
-
params=params.model_dump(),
|
|
220
|
-
)
|
|
221
|
-
|
|
222
81
|
def _perform_healthcheck(self) -> Tuple[bool, str]:
|
|
223
|
-
"""
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
"""
|
|
82
|
+
"""Perform health check on Datadog logs API."""
|
|
83
|
+
if not self.dd_config:
|
|
84
|
+
return False, "Datadog configuration not initialized"
|
|
227
85
|
try:
|
|
228
|
-
logging.info("Performing Datadog configuration healthcheck...")
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
86
|
+
logging.info("Performing Datadog logs configuration healthcheck...")
|
|
87
|
+
headers = get_headers(self.dd_config)
|
|
88
|
+
payload = {
|
|
89
|
+
"filter": {
|
|
90
|
+
"from": "now-1m",
|
|
91
|
+
"to": "now",
|
|
92
|
+
"query": "*",
|
|
93
|
+
"indexes": self.dd_config.indexes,
|
|
94
|
+
},
|
|
95
|
+
"page": {"limit": 1},
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
search_url = f"{self.dd_config.site_api_url}/api/v2/logs/events/search"
|
|
99
|
+
execute_datadog_http_request(
|
|
100
|
+
url=search_url,
|
|
101
|
+
headers=headers,
|
|
102
|
+
payload_or_params=payload,
|
|
103
|
+
timeout=self.dd_config.request_timeout,
|
|
104
|
+
method="POST",
|
|
234
105
|
)
|
|
235
106
|
|
|
236
|
-
result = self.fetch_pod_logs(healthcheck_params)
|
|
237
|
-
|
|
238
|
-
if result.status == ToolResultStatus.ERROR:
|
|
239
|
-
error_msg = result.error or "Unknown error during healthcheck"
|
|
240
|
-
logging.error(f"Datadog healthcheck failed: {error_msg}")
|
|
241
|
-
return False, f"Datadog healthcheck failed: {error_msg}"
|
|
242
|
-
elif result.status == ToolResultStatus.NO_DATA:
|
|
243
|
-
error_msg = "No logs were found in the last 48 hours using wildcards for pod and namespace. Is the configuration correct?"
|
|
244
|
-
logging.error(f"Datadog healthcheck failed: {error_msg}")
|
|
245
|
-
return False, f"Datadog healthcheck failed: {error_msg}"
|
|
246
|
-
|
|
247
|
-
logging.info("Datadog healthcheck completed successfully")
|
|
248
107
|
return True, ""
|
|
249
108
|
|
|
109
|
+
except DataDogRequestError as e:
|
|
110
|
+
logging.error(
|
|
111
|
+
f"Datadog API error during healthcheck: {e.status_code} - {e.response_text}"
|
|
112
|
+
)
|
|
113
|
+
if e.status_code == 403:
|
|
114
|
+
return (
|
|
115
|
+
False,
|
|
116
|
+
"API key lacks required permissions. Make sure your API key has 'apm_read' scope.",
|
|
117
|
+
)
|
|
118
|
+
else:
|
|
119
|
+
return False, f"Datadog API error: {e.status_code} - {e.response_text}"
|
|
250
120
|
except Exception as e:
|
|
251
|
-
logging.exception("Failed during Datadog healthcheck")
|
|
121
|
+
logging.exception("Failed during Datadog traces healthcheck")
|
|
252
122
|
return False, f"Healthcheck failed with exception: {str(e)}"
|
|
253
123
|
|
|
254
124
|
def prerequisites_callable(self, config: dict[str, Any]) -> Tuple[bool, str]:
|
|
255
125
|
if not config:
|
|
256
126
|
return (
|
|
257
127
|
False,
|
|
258
|
-
|
|
128
|
+
"Missing config for dd_api_key, dd_app_key, or site_api_url. For details: https://holmesgpt.dev/data-sources/builtin-toolsets/datadog/",
|
|
259
129
|
)
|
|
260
130
|
|
|
261
131
|
try:
|
|
262
132
|
dd_config = DatadogLogsConfig(**config)
|
|
263
133
|
self.dd_config = dd_config
|
|
264
134
|
|
|
265
|
-
# Perform healthcheck
|
|
266
135
|
success, error_msg = self._perform_healthcheck()
|
|
267
136
|
return success, error_msg
|
|
268
137
|
|
|
@@ -271,11 +140,13 @@ class DatadogLogsToolset(BasePodLoggingToolset):
|
|
|
271
140
|
return (False, f"Failed to parse Datadog configuration: {str(e)}")
|
|
272
141
|
|
|
273
142
|
def get_example_config(self) -> Dict[str, Any]:
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
"
|
|
277
|
-
"
|
|
278
|
-
|
|
143
|
+
"""Get example configuration for this toolset."""
|
|
144
|
+
example_config = DatadogLogsConfig(
|
|
145
|
+
dd_api_key="<your_datadog_api_key>",
|
|
146
|
+
dd_app_key="<your_datadog_app_key>",
|
|
147
|
+
site_api_url=AnyUrl("https://api.datadoghq.com"),
|
|
148
|
+
)
|
|
149
|
+
return example_config.model_dump(mode="json")
|
|
279
150
|
|
|
280
151
|
def _reload_instructions(self):
|
|
281
152
|
"""Load Datadog logs specific troubleshooting instructions."""
|
|
@@ -283,3 +154,152 @@ class DatadogLogsToolset(BasePodLoggingToolset):
|
|
|
283
154
|
os.path.join(os.path.dirname(__file__), "datadog_logs_instructions.jinja2")
|
|
284
155
|
)
|
|
285
156
|
self._load_llm_instructions(jinja_template=f"file://{template_file_path}")
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
class GetLogs(Tool):
|
|
160
|
+
"""Tool to search for logs with specific search query."""
|
|
161
|
+
|
|
162
|
+
toolset: "DatadogLogsToolset"
|
|
163
|
+
name: str = "fetch_datadog_logs"
|
|
164
|
+
description: str = "Search for logs in Datadog using search query syntax"
|
|
165
|
+
"Uses the DataDog api endpoint: POST /api/v2/logs/events/search with 'query' parameter. (e.g., 'service:web-app @http.status_code:500')"
|
|
166
|
+
parameters: Dict[str, ToolParameter] = {
|
|
167
|
+
"query": ToolParameter(
|
|
168
|
+
description="The search query - following the logs search syntax. default: *",
|
|
169
|
+
type="string",
|
|
170
|
+
required=False,
|
|
171
|
+
),
|
|
172
|
+
"start_datetime": ToolParameter(
|
|
173
|
+
description=standard_start_datetime_tool_param_description(
|
|
174
|
+
DEFAULT_TIME_SPAN_SECONDS
|
|
175
|
+
),
|
|
176
|
+
type="string",
|
|
177
|
+
required=False,
|
|
178
|
+
),
|
|
179
|
+
"end_datetime": ToolParameter(
|
|
180
|
+
description=STANDARD_END_DATETIME_TOOL_PARAM_DESCRIPTION,
|
|
181
|
+
type="string",
|
|
182
|
+
required=False,
|
|
183
|
+
),
|
|
184
|
+
"cursor": ToolParameter(
|
|
185
|
+
description="The returned paging point to use to get the next results. IMPORTANT: Cursors are single-use and stateful - never reuse the same cursor value multiple times or parallelize cursor-based calls. Each response provides a new cursor for the subsequent request.",
|
|
186
|
+
type="string",
|
|
187
|
+
required=False,
|
|
188
|
+
),
|
|
189
|
+
"limit": ToolParameter(
|
|
190
|
+
description=f"Maximum number of log records to return. Defaults to {DEFAULT_LOG_LIMIT}. This value is user-configured and represents the maximum allowed limit.",
|
|
191
|
+
type="integer",
|
|
192
|
+
required=False,
|
|
193
|
+
),
|
|
194
|
+
"sort_desc": ToolParameter(
|
|
195
|
+
description="Get the results in descending order. default: true",
|
|
196
|
+
type="boolean",
|
|
197
|
+
required=False,
|
|
198
|
+
),
|
|
199
|
+
}
|
|
200
|
+
|
|
201
|
+
def get_parameterized_one_liner(self, params: dict) -> str:
|
|
202
|
+
"""Get a one-liner description of the tool invocation."""
|
|
203
|
+
return f"{toolset_name_for_one_liner(self.toolset.name)}: Search Logs ({params['query'] if 'query' in params else ''})"
|
|
204
|
+
|
|
205
|
+
def _invoke(self, params: dict, context: ToolInvokeContext) -> StructuredToolResult:
|
|
206
|
+
"""Execute the tool to search logs."""
|
|
207
|
+
if not self.toolset.dd_config:
|
|
208
|
+
return StructuredToolResult(
|
|
209
|
+
status=StructuredToolResultStatus.ERROR,
|
|
210
|
+
error="Datadog configuration not initialized",
|
|
211
|
+
params=params,
|
|
212
|
+
)
|
|
213
|
+
url = None
|
|
214
|
+
payload: Optional[Dict[str, Any]] = None
|
|
215
|
+
try:
|
|
216
|
+
# Process timestamps
|
|
217
|
+
from_time_int, to_time_int = process_timestamps_to_int(
|
|
218
|
+
start=params.get("start_datetime"),
|
|
219
|
+
end=params.get("end_datetime"),
|
|
220
|
+
default_time_span_seconds=DEFAULT_TIME_SPAN_SECONDS,
|
|
221
|
+
)
|
|
222
|
+
|
|
223
|
+
# Convert to milliseconds for Datadog API
|
|
224
|
+
from_time_ms = from_time_int * 1000
|
|
225
|
+
to_time_ms = to_time_int * 1000
|
|
226
|
+
|
|
227
|
+
config_limit = self.toolset.dd_config.default_limit
|
|
228
|
+
limit = min(params.get("limit", config_limit), config_limit)
|
|
229
|
+
params["limit"] = limit
|
|
230
|
+
sort = "timestamp" if params.get("sort_desc", False) else "-timestamp"
|
|
231
|
+
|
|
232
|
+
url = f"{self.toolset.dd_config.site_api_url}/api/v2/logs/events/search"
|
|
233
|
+
headers = get_headers(self.toolset.dd_config)
|
|
234
|
+
|
|
235
|
+
storage = self.toolset.dd_config.storage_tiers[-1]
|
|
236
|
+
payload = {
|
|
237
|
+
"filter": {
|
|
238
|
+
"query": params.get("query", "*"),
|
|
239
|
+
"from": str(from_time_ms),
|
|
240
|
+
"to": str(to_time_ms),
|
|
241
|
+
"storage_tier": storage,
|
|
242
|
+
"indexes": self.toolset.dd_config.indexes,
|
|
243
|
+
},
|
|
244
|
+
"page": {
|
|
245
|
+
"limit": limit,
|
|
246
|
+
},
|
|
247
|
+
"sort": sort,
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
if params.get("cursor"):
|
|
251
|
+
payload["page"]["cursor"] = params["cursor"]
|
|
252
|
+
|
|
253
|
+
response = execute_datadog_http_request(
|
|
254
|
+
url=url,
|
|
255
|
+
headers=headers,
|
|
256
|
+
payload_or_params=payload,
|
|
257
|
+
timeout=self.toolset.dd_config.request_timeout,
|
|
258
|
+
method="POST",
|
|
259
|
+
)
|
|
260
|
+
|
|
261
|
+
if self.toolset.dd_config.compact_logs and response.get("data"):
|
|
262
|
+
response["data"] = format_logs(response["data"])
|
|
263
|
+
|
|
264
|
+
return StructuredToolResult(
|
|
265
|
+
status=StructuredToolResultStatus.SUCCESS,
|
|
266
|
+
data=response,
|
|
267
|
+
params=params,
|
|
268
|
+
url=generate_datadog_logs_url(self.toolset.dd_config, payload),
|
|
269
|
+
)
|
|
270
|
+
|
|
271
|
+
except DataDogRequestError as e:
|
|
272
|
+
logging.exception(e, exc_info=True)
|
|
273
|
+
if e.status_code == 429:
|
|
274
|
+
error_msg = f"Datadog API rate limit exceeded. Failed after {MAX_RETRY_COUNT_ON_RATE_LIMIT} retry attempts."
|
|
275
|
+
elif e.status_code == 403:
|
|
276
|
+
error_msg = (
|
|
277
|
+
f"Permission denied. Ensure your Datadog Application Key has the 'apm_read' "
|
|
278
|
+
f"permission. Error: {str(e)}"
|
|
279
|
+
)
|
|
280
|
+
else:
|
|
281
|
+
error_msg = f"Exception while querying Datadog: {str(e)}"
|
|
282
|
+
|
|
283
|
+
return StructuredToolResult(
|
|
284
|
+
status=StructuredToolResultStatus.ERROR,
|
|
285
|
+
error=error_msg,
|
|
286
|
+
params=params,
|
|
287
|
+
invocation=(
|
|
288
|
+
json.dumps({"url": url, "payload": payload})
|
|
289
|
+
if url and payload
|
|
290
|
+
else None
|
|
291
|
+
),
|
|
292
|
+
)
|
|
293
|
+
|
|
294
|
+
except Exception as e:
|
|
295
|
+
logging.exception(e, exc_info=True)
|
|
296
|
+
return StructuredToolResult(
|
|
297
|
+
status=StructuredToolResultStatus.ERROR,
|
|
298
|
+
error=f"Unexpected error: {str(e)}",
|
|
299
|
+
params=params,
|
|
300
|
+
invocation=(
|
|
301
|
+
json.dumps({"url": url, "payload": payload})
|
|
302
|
+
if url and payload
|
|
303
|
+
else None
|
|
304
|
+
),
|
|
305
|
+
)
|