holmesgpt 0.13.2__py3-none-any.whl → 0.18.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- holmes/__init__.py +3 -5
- holmes/clients/robusta_client.py +20 -6
- holmes/common/env_vars.py +58 -3
- holmes/common/openshift.py +1 -1
- holmes/config.py +123 -148
- holmes/core/conversations.py +71 -15
- holmes/core/feedback.py +191 -0
- holmes/core/investigation.py +31 -39
- holmes/core/investigation_structured_output.py +3 -3
- holmes/core/issue.py +1 -1
- holmes/core/llm.py +508 -88
- holmes/core/models.py +108 -4
- holmes/core/openai_formatting.py +14 -1
- holmes/core/prompt.py +48 -3
- holmes/core/runbooks.py +1 -0
- holmes/core/safeguards.py +8 -6
- holmes/core/supabase_dal.py +295 -100
- holmes/core/tool_calling_llm.py +489 -428
- holmes/core/tools.py +325 -56
- holmes/core/tools_utils/token_counting.py +21 -0
- holmes/core/tools_utils/tool_context_window_limiter.py +40 -0
- holmes/core/tools_utils/tool_executor.py +0 -13
- holmes/core/tools_utils/toolset_utils.py +1 -0
- holmes/core/toolset_manager.py +191 -5
- holmes/core/tracing.py +19 -3
- holmes/core/transformers/__init__.py +23 -0
- holmes/core/transformers/base.py +63 -0
- holmes/core/transformers/llm_summarize.py +175 -0
- holmes/core/transformers/registry.py +123 -0
- holmes/core/transformers/transformer.py +32 -0
- holmes/core/truncation/compaction.py +94 -0
- holmes/core/truncation/dal_truncation_utils.py +23 -0
- holmes/core/truncation/input_context_window_limiter.py +219 -0
- holmes/interactive.py +228 -31
- holmes/main.py +23 -40
- holmes/plugins/interfaces.py +2 -1
- holmes/plugins/prompts/__init__.py +2 -1
- holmes/plugins/prompts/_fetch_logs.jinja2 +31 -6
- holmes/plugins/prompts/_general_instructions.jinja2 +1 -2
- holmes/plugins/prompts/_runbook_instructions.jinja2 +24 -12
- holmes/plugins/prompts/base_user_prompt.jinja2 +7 -0
- holmes/plugins/prompts/conversation_history_compaction.jinja2 +89 -0
- holmes/plugins/prompts/generic_ask.jinja2 +0 -4
- holmes/plugins/prompts/generic_ask_conversation.jinja2 +0 -1
- holmes/plugins/prompts/generic_ask_for_issue_conversation.jinja2 +0 -1
- holmes/plugins/prompts/generic_investigation.jinja2 +0 -1
- holmes/plugins/prompts/investigation_procedure.jinja2 +50 -1
- holmes/plugins/prompts/kubernetes_workload_ask.jinja2 +0 -1
- holmes/plugins/prompts/kubernetes_workload_chat.jinja2 +0 -1
- holmes/plugins/runbooks/__init__.py +145 -17
- holmes/plugins/runbooks/catalog.json +2 -0
- holmes/plugins/sources/github/__init__.py +4 -2
- holmes/plugins/sources/prometheus/models.py +1 -0
- holmes/plugins/toolsets/__init__.py +44 -27
- holmes/plugins/toolsets/aks-node-health.yaml +46 -0
- holmes/plugins/toolsets/aks.yaml +64 -0
- holmes/plugins/toolsets/atlas_mongodb/mongodb_atlas.py +38 -47
- holmes/plugins/toolsets/azure_sql/apis/alert_monitoring_api.py +3 -2
- holmes/plugins/toolsets/azure_sql/apis/azure_sql_api.py +2 -1
- holmes/plugins/toolsets/azure_sql/apis/connection_failure_api.py +3 -2
- holmes/plugins/toolsets/azure_sql/apis/connection_monitoring_api.py +3 -1
- holmes/plugins/toolsets/azure_sql/apis/storage_analysis_api.py +3 -1
- holmes/plugins/toolsets/azure_sql/azure_sql_toolset.py +12 -13
- holmes/plugins/toolsets/azure_sql/tools/analyze_connection_failures.py +15 -12
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_connections.py +15 -12
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_health_status.py +11 -11
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_performance.py +11 -9
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_storage.py +15 -12
- holmes/plugins/toolsets/azure_sql/tools/get_active_alerts.py +15 -15
- holmes/plugins/toolsets/azure_sql/tools/get_slow_queries.py +11 -8
- holmes/plugins/toolsets/azure_sql/tools/get_top_cpu_queries.py +11 -8
- holmes/plugins/toolsets/azure_sql/tools/get_top_data_io_queries.py +11 -8
- holmes/plugins/toolsets/azure_sql/tools/get_top_log_io_queries.py +11 -8
- holmes/plugins/toolsets/azure_sql/utils.py +0 -32
- holmes/plugins/toolsets/bash/argocd/__init__.py +3 -3
- holmes/plugins/toolsets/bash/aws/__init__.py +4 -4
- holmes/plugins/toolsets/bash/azure/__init__.py +4 -4
- holmes/plugins/toolsets/bash/bash_toolset.py +11 -15
- holmes/plugins/toolsets/bash/common/bash.py +23 -13
- holmes/plugins/toolsets/bash/common/bash_command.py +1 -1
- holmes/plugins/toolsets/bash/common/stringify.py +1 -1
- holmes/plugins/toolsets/bash/kubectl/__init__.py +2 -1
- holmes/plugins/toolsets/bash/kubectl/constants.py +0 -1
- holmes/plugins/toolsets/bash/kubectl/kubectl_get.py +3 -4
- holmes/plugins/toolsets/bash/parse_command.py +12 -13
- holmes/plugins/toolsets/cilium.yaml +284 -0
- holmes/plugins/toolsets/connectivity_check.py +124 -0
- holmes/plugins/toolsets/coralogix/api.py +132 -119
- holmes/plugins/toolsets/coralogix/coralogix.jinja2 +14 -0
- holmes/plugins/toolsets/coralogix/toolset_coralogix.py +219 -0
- holmes/plugins/toolsets/coralogix/utils.py +15 -79
- holmes/plugins/toolsets/datadog/datadog_api.py +525 -26
- holmes/plugins/toolsets/datadog/datadog_logs_instructions.jinja2 +55 -11
- holmes/plugins/toolsets/datadog/datadog_metrics_instructions.jinja2 +3 -3
- holmes/plugins/toolsets/datadog/datadog_models.py +59 -0
- holmes/plugins/toolsets/datadog/datadog_url_utils.py +213 -0
- holmes/plugins/toolsets/datadog/instructions_datadog_traces.jinja2 +165 -28
- holmes/plugins/toolsets/datadog/toolset_datadog_general.py +417 -241
- holmes/plugins/toolsets/datadog/toolset_datadog_logs.py +234 -214
- holmes/plugins/toolsets/datadog/toolset_datadog_metrics.py +167 -79
- holmes/plugins/toolsets/datadog/toolset_datadog_traces.py +374 -363
- holmes/plugins/toolsets/elasticsearch/__init__.py +6 -0
- holmes/plugins/toolsets/elasticsearch/elasticsearch.py +834 -0
- holmes/plugins/toolsets/elasticsearch/opensearch_ppl_query_docs.jinja2 +1616 -0
- holmes/plugins/toolsets/elasticsearch/opensearch_query_assist.py +78 -0
- holmes/plugins/toolsets/elasticsearch/opensearch_query_assist_instructions.jinja2 +223 -0
- holmes/plugins/toolsets/git.py +54 -50
- holmes/plugins/toolsets/grafana/base_grafana_toolset.py +16 -4
- holmes/plugins/toolsets/grafana/common.py +13 -29
- holmes/plugins/toolsets/grafana/grafana_tempo_api.py +455 -0
- holmes/plugins/toolsets/grafana/loki/instructions.jinja2 +25 -0
- holmes/plugins/toolsets/grafana/loki/toolset_grafana_loki.py +191 -0
- holmes/plugins/toolsets/grafana/loki_api.py +4 -0
- holmes/plugins/toolsets/grafana/toolset_grafana.py +293 -89
- holmes/plugins/toolsets/grafana/toolset_grafana_dashboard.jinja2 +49 -0
- holmes/plugins/toolsets/grafana/toolset_grafana_tempo.jinja2 +246 -11
- holmes/plugins/toolsets/grafana/toolset_grafana_tempo.py +820 -292
- holmes/plugins/toolsets/grafana/trace_parser.py +4 -3
- holmes/plugins/toolsets/internet/internet.py +15 -16
- holmes/plugins/toolsets/internet/notion.py +9 -11
- holmes/plugins/toolsets/investigator/core_investigation.py +44 -36
- holmes/plugins/toolsets/investigator/model.py +3 -1
- holmes/plugins/toolsets/json_filter_mixin.py +134 -0
- holmes/plugins/toolsets/kafka.py +36 -42
- holmes/plugins/toolsets/kubernetes.yaml +317 -113
- holmes/plugins/toolsets/kubernetes_logs.py +9 -9
- holmes/plugins/toolsets/kubernetes_logs.yaml +32 -0
- holmes/plugins/toolsets/logging_utils/logging_api.py +94 -8
- holmes/plugins/toolsets/mcp/toolset_mcp.py +218 -64
- holmes/plugins/toolsets/newrelic/new_relic_api.py +165 -0
- holmes/plugins/toolsets/newrelic/newrelic.jinja2 +65 -0
- holmes/plugins/toolsets/newrelic/newrelic.py +320 -0
- holmes/plugins/toolsets/openshift.yaml +283 -0
- holmes/plugins/toolsets/prometheus/prometheus.py +1202 -421
- holmes/plugins/toolsets/prometheus/prometheus_instructions.jinja2 +54 -5
- holmes/plugins/toolsets/prometheus/utils.py +28 -0
- holmes/plugins/toolsets/rabbitmq/api.py +23 -4
- holmes/plugins/toolsets/rabbitmq/toolset_rabbitmq.py +13 -14
- holmes/plugins/toolsets/robusta/robusta.py +239 -68
- holmes/plugins/toolsets/robusta/robusta_instructions.jinja2 +26 -9
- holmes/plugins/toolsets/runbook/runbook_fetcher.py +157 -27
- holmes/plugins/toolsets/service_discovery.py +1 -1
- holmes/plugins/toolsets/servicenow_tables/instructions.jinja2 +83 -0
- holmes/plugins/toolsets/servicenow_tables/servicenow_tables.py +426 -0
- holmes/plugins/toolsets/utils.py +88 -0
- holmes/utils/config_utils.py +91 -0
- holmes/utils/connection_utils.py +31 -0
- holmes/utils/console/result.py +10 -0
- holmes/utils/default_toolset_installation_guide.jinja2 +1 -22
- holmes/utils/env.py +7 -0
- holmes/utils/file_utils.py +2 -1
- holmes/utils/global_instructions.py +60 -11
- holmes/utils/holmes_status.py +6 -4
- holmes/utils/holmes_sync_toolsets.py +0 -2
- holmes/utils/krr_utils.py +188 -0
- holmes/utils/log.py +15 -0
- holmes/utils/markdown_utils.py +2 -3
- holmes/utils/memory_limit.py +58 -0
- holmes/utils/sentry_helper.py +64 -0
- holmes/utils/stream.py +69 -8
- holmes/utils/tags.py +4 -3
- holmes/version.py +37 -15
- holmesgpt-0.18.4.dist-info/LICENSE +178 -0
- {holmesgpt-0.13.2.dist-info → holmesgpt-0.18.4.dist-info}/METADATA +35 -31
- holmesgpt-0.18.4.dist-info/RECORD +258 -0
- holmes/core/performance_timing.py +0 -72
- holmes/plugins/toolsets/aws.yaml +0 -80
- holmes/plugins/toolsets/coralogix/toolset_coralogix_logs.py +0 -112
- holmes/plugins/toolsets/datadog/datadog_traces_formatter.py +0 -310
- holmes/plugins/toolsets/datadog/toolset_datadog_rds.py +0 -739
- holmes/plugins/toolsets/grafana/grafana_api.py +0 -42
- holmes/plugins/toolsets/grafana/tempo_api.py +0 -124
- holmes/plugins/toolsets/grafana/toolset_grafana_loki.py +0 -110
- holmes/plugins/toolsets/newrelic.py +0 -231
- holmes/plugins/toolsets/opensearch/opensearch.py +0 -257
- holmes/plugins/toolsets/opensearch/opensearch_logs.py +0 -161
- holmes/plugins/toolsets/opensearch/opensearch_traces.py +0 -218
- holmes/plugins/toolsets/opensearch/opensearch_traces_instructions.jinja2 +0 -12
- holmes/plugins/toolsets/opensearch/opensearch_utils.py +0 -166
- holmes/plugins/toolsets/servicenow/install.md +0 -37
- holmes/plugins/toolsets/servicenow/instructions.jinja2 +0 -3
- holmes/plugins/toolsets/servicenow/servicenow.py +0 -219
- holmes/utils/keygen_utils.py +0 -6
- holmesgpt-0.13.2.dist-info/LICENSE.txt +0 -21
- holmesgpt-0.13.2.dist-info/RECORD +0 -234
- /holmes/plugins/toolsets/{opensearch → newrelic}/__init__.py +0 -0
- {holmesgpt-0.13.2.dist-info → holmesgpt-0.18.4.dist-info}/WHEEL +0 -0
- {holmesgpt-0.13.2.dist-info → holmesgpt-0.18.4.dist-info}/entry_points.txt +0 -0
|
@@ -73,7 +73,7 @@ When investigating metrics-related issues:
|
|
|
73
73
|
|
|
74
74
|
# Handling queries results
|
|
75
75
|
* ALWAYS embed the execution results into your answer
|
|
76
|
-
* You only need to embed the partial result in your response. Include the "tool_name" and "
|
|
76
|
+
* You only need to embed the partial result in your response. Include the "tool_name" and "tool_call_id". For example: << {"type": "datadogql", "tool_name": "query_datadog_metrics", "tool_call_id": "92jf2hf"} >>
|
|
77
77
|
* Post processing will parse your response, re-run the query from the tool output and create a chart visible to the user
|
|
78
78
|
* You MUST ensure that the query is successful.
|
|
79
79
|
* ALWAYS embed a DataDog graph in the response. The graph should visualize data related to the incident.
|
|
@@ -81,6 +81,6 @@ When investigating metrics-related issues:
|
|
|
81
81
|
* When embedding multiple graphs, always add line spacing between them
|
|
82
82
|
For example:
|
|
83
83
|
|
|
84
|
-
<<{"type": "datadogql", "tool_name": "query_datadog_metrics", "
|
|
84
|
+
<<{"type": "datadogql", "tool_name": "query_datadog_metrics", "tool_call_id": "lBaA"}>>
|
|
85
85
|
|
|
86
|
-
<<{"type": "datadogql", "tool_name": "query_datadog_metrics", "
|
|
86
|
+
<<{"type": "datadogql", "tool_name": "query_datadog_metrics", "tool_call_id": "IKtq"}>>
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
from enum import Enum
|
|
2
|
+
|
|
3
|
+
from pydantic import Field
|
|
4
|
+
|
|
5
|
+
from holmes.plugins.toolsets.datadog.datadog_api import DatadogBaseConfig
|
|
6
|
+
from holmes.plugins.toolsets.logging_utils.logging_api import DEFAULT_LOG_LIMIT
|
|
7
|
+
|
|
8
|
+
# Constants for RDS toolset
|
|
9
|
+
DEFAULT_TIME_SPAN_SECONDS = 3600
|
|
10
|
+
DEFAULT_TOP_INSTANCES = 10
|
|
11
|
+
|
|
12
|
+
# Constants for general toolset
|
|
13
|
+
MAX_RESPONSE_SIZE = 10 * 1024 * 1024 # 10MB
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class DataDogStorageTier(str, Enum):
|
|
17
|
+
"""Storage tier enum for Datadog logs."""
|
|
18
|
+
|
|
19
|
+
INDEXES = "indexes"
|
|
20
|
+
ONLINE_ARCHIVES = "online-archives"
|
|
21
|
+
FLEX = "flex"
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
# Constants for logs toolset
|
|
25
|
+
DEFAULT_STORAGE_TIERS = [DataDogStorageTier.INDEXES]
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class DatadogMetricsConfig(DatadogBaseConfig):
|
|
29
|
+
"""Configuration for Datadog metrics toolset."""
|
|
30
|
+
|
|
31
|
+
default_limit: int = DEFAULT_LOG_LIMIT
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
class DatadogTracesConfig(DatadogBaseConfig):
|
|
35
|
+
"""Configuration for Datadog traces toolset."""
|
|
36
|
+
|
|
37
|
+
indexes: list[str] = ["*"]
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class DatadogLogsConfig(DatadogBaseConfig):
|
|
41
|
+
"""Configuration for Datadog logs toolset."""
|
|
42
|
+
|
|
43
|
+
indexes: list[str] = ["*"]
|
|
44
|
+
# TODO storage tier just works with first element. need to add support for multi stoarge tiers.
|
|
45
|
+
storage_tiers: list[DataDogStorageTier] = Field(
|
|
46
|
+
default_factory=lambda: [DataDogStorageTier.INDEXES], min_length=1
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
compact_logs: bool = True
|
|
50
|
+
default_limit: int = DEFAULT_LOG_LIMIT
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
class DatadogGeneralConfig(DatadogBaseConfig):
|
|
54
|
+
"""Configuration for general-purpose Datadog toolset."""
|
|
55
|
+
|
|
56
|
+
max_response_size: int = MAX_RESPONSE_SIZE
|
|
57
|
+
allow_custom_endpoints: bool = (
|
|
58
|
+
False # If True, allows endpoints not in whitelist (still filtered for safety)
|
|
59
|
+
)
|
|
@@ -0,0 +1,213 @@
|
|
|
1
|
+
import re
|
|
2
|
+
from typing import Any, Dict, Optional
|
|
3
|
+
from urllib.parse import urlencode, urlparse
|
|
4
|
+
|
|
5
|
+
from holmes.plugins.toolsets.datadog.datadog_api import convert_api_url_to_app_url
|
|
6
|
+
from holmes.plugins.toolsets.datadog.datadog_models import (
|
|
7
|
+
DatadogGeneralConfig,
|
|
8
|
+
DatadogLogsConfig,
|
|
9
|
+
DatadogMetricsConfig,
|
|
10
|
+
DatadogTracesConfig,
|
|
11
|
+
)
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def generate_datadog_metrics_explorer_url(
|
|
15
|
+
dd_config: DatadogMetricsConfig,
|
|
16
|
+
query: str,
|
|
17
|
+
from_time: int,
|
|
18
|
+
to_time: int,
|
|
19
|
+
) -> str:
|
|
20
|
+
base_url = convert_api_url_to_app_url(dd_config.site_api_url)
|
|
21
|
+
|
|
22
|
+
params = {
|
|
23
|
+
"query": query,
|
|
24
|
+
"from_ts": from_time * 1000, # seconds -> ms
|
|
25
|
+
"to_ts": to_time * 1000, # seconds -> ms
|
|
26
|
+
"live": "true",
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
return f"{base_url}/metric/explorer?{urlencode(params)}"
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
def generate_datadog_metrics_list_url(
|
|
33
|
+
dd_config: DatadogMetricsConfig,
|
|
34
|
+
from_time: int,
|
|
35
|
+
host: Optional[str] = None,
|
|
36
|
+
tag_filter: Optional[str] = None,
|
|
37
|
+
metric_filter: Optional[str] = None,
|
|
38
|
+
) -> str:
|
|
39
|
+
base_url = convert_api_url_to_app_url(dd_config.site_api_url)
|
|
40
|
+
|
|
41
|
+
params = {}
|
|
42
|
+
if metric_filter:
|
|
43
|
+
params["filter"] = metric_filter
|
|
44
|
+
|
|
45
|
+
if host:
|
|
46
|
+
params["host"] = host
|
|
47
|
+
if tag_filter:
|
|
48
|
+
params["tag_filter"] = tag_filter
|
|
49
|
+
|
|
50
|
+
qs = urlencode(params) if params else ""
|
|
51
|
+
return f"{base_url}/metric/summary" + (f"?{qs}" if qs else "")
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def generate_datadog_metric_metadata_url(
|
|
55
|
+
dd_config: DatadogMetricsConfig,
|
|
56
|
+
metric_name: str,
|
|
57
|
+
) -> str:
|
|
58
|
+
base_url = convert_api_url_to_app_url(dd_config.site_api_url)
|
|
59
|
+
params = {"metric": metric_name}
|
|
60
|
+
return f"{base_url}/metric/summary?{urlencode(params)}"
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def generate_datadog_metric_tags_url(
|
|
64
|
+
dd_config: DatadogMetricsConfig,
|
|
65
|
+
metric_name: str,
|
|
66
|
+
) -> str:
|
|
67
|
+
base_url = convert_api_url_to_app_url(dd_config.site_api_url)
|
|
68
|
+
params = {"metric": metric_name}
|
|
69
|
+
return f"{base_url}/metric/summary?{urlencode(params)}"
|
|
70
|
+
|
|
71
|
+
|
|
72
|
+
def generate_datadog_spans_url(
|
|
73
|
+
dd_config: DatadogTracesConfig,
|
|
74
|
+
query: str,
|
|
75
|
+
from_time_ms: int,
|
|
76
|
+
to_time_ms: int,
|
|
77
|
+
) -> str:
|
|
78
|
+
base_url = convert_api_url_to_app_url(dd_config.site_api_url)
|
|
79
|
+
|
|
80
|
+
url_params = {
|
|
81
|
+
"query": query,
|
|
82
|
+
"from_ts": from_time_ms,
|
|
83
|
+
"to_ts": to_time_ms,
|
|
84
|
+
"live": "true",
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
return f"{base_url}/apm/traces?{urlencode(url_params)}"
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def generate_datadog_spans_analytics_url(
|
|
91
|
+
dd_config: DatadogTracesConfig,
|
|
92
|
+
query: str,
|
|
93
|
+
from_time_ms: int,
|
|
94
|
+
to_time_ms: int,
|
|
95
|
+
) -> str:
|
|
96
|
+
base_url = convert_api_url_to_app_url(dd_config.site_api_url)
|
|
97
|
+
|
|
98
|
+
url_params = {
|
|
99
|
+
"query": query,
|
|
100
|
+
"from_ts": from_time_ms,
|
|
101
|
+
"to_ts": to_time_ms,
|
|
102
|
+
"live": "true",
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
return f"{base_url}/apm/analytics?{urlencode(url_params)}"
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def generate_datadog_logs_url(
|
|
109
|
+
dd_config: DatadogLogsConfig,
|
|
110
|
+
params: dict,
|
|
111
|
+
) -> str:
|
|
112
|
+
base_url = convert_api_url_to_app_url(dd_config.site_api_url)
|
|
113
|
+
url_params = {
|
|
114
|
+
"query": params["filter"]["query"],
|
|
115
|
+
"from_ts": params["filter"]["from"],
|
|
116
|
+
"to_ts": params["filter"]["to"],
|
|
117
|
+
"live": "true",
|
|
118
|
+
"storage": params["filter"]["storage_tier"],
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
if dd_config.indexes != ["*"]:
|
|
122
|
+
url_params["index"] = ",".join(dd_config.indexes)
|
|
123
|
+
|
|
124
|
+
# Construct the full URL
|
|
125
|
+
return f"{base_url}/logs?{urlencode(url_params)}"
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
def _build_qs(
|
|
129
|
+
query_params: Optional[Dict[str, Any]], allowed: Optional[set] = None
|
|
130
|
+
) -> str:
|
|
131
|
+
if not query_params:
|
|
132
|
+
return ""
|
|
133
|
+
allowed = allowed or {
|
|
134
|
+
"filter",
|
|
135
|
+
"query",
|
|
136
|
+
"tags",
|
|
137
|
+
"status",
|
|
138
|
+
"start",
|
|
139
|
+
"end",
|
|
140
|
+
"from",
|
|
141
|
+
"to",
|
|
142
|
+
}
|
|
143
|
+
url_params = {}
|
|
144
|
+
for k, v in query_params.items():
|
|
145
|
+
if k not in allowed or v is None:
|
|
146
|
+
continue
|
|
147
|
+
if k in ("start", "from"):
|
|
148
|
+
url_params["from_ts"] = v * 1000
|
|
149
|
+
elif k in ("end", "to"):
|
|
150
|
+
url_params["to_ts"] = v * 1000
|
|
151
|
+
elif k in ("query", "filter", "tags"):
|
|
152
|
+
url_params["q"] = v
|
|
153
|
+
else:
|
|
154
|
+
url_params[k] = v
|
|
155
|
+
qs = urlencode(url_params) if url_params else ""
|
|
156
|
+
return f"?{qs}" if qs else ""
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
def generate_datadog_general_url(
|
|
160
|
+
dd_config: DatadogGeneralConfig,
|
|
161
|
+
endpoint: str,
|
|
162
|
+
query_params: Optional[Dict[str, Any]] = None,
|
|
163
|
+
) -> Optional[str]:
|
|
164
|
+
base_url = convert_api_url_to_app_url(dd_config.site_api_url)
|
|
165
|
+
path = urlparse(endpoint).path
|
|
166
|
+
|
|
167
|
+
if "/logs" in path:
|
|
168
|
+
return f"{base_url}/logs{_build_qs(query_params, {'start', 'end'})}"
|
|
169
|
+
|
|
170
|
+
if "/monitor" in path:
|
|
171
|
+
qs = _build_qs(query_params, {"filter", "query", "tags", "status"})
|
|
172
|
+
monitor_id_match = re.search(r"/monitor/(\d+)", path)
|
|
173
|
+
if monitor_id_match:
|
|
174
|
+
return f"{base_url}/monitors/{monitor_id_match.group(1)}{qs}"
|
|
175
|
+
return f"{base_url}/monitors{qs}"
|
|
176
|
+
|
|
177
|
+
if "/dashboard" in path:
|
|
178
|
+
qs = _build_qs(query_params, {"filter", "query", "tags"})
|
|
179
|
+
if re.match(r"^/api/v\d+/dashboard/[^/]+", path):
|
|
180
|
+
return f"{base_url}/dashboard/{path.split('/')[-1]}{qs}"
|
|
181
|
+
return f"{base_url}/dashboard{qs}"
|
|
182
|
+
|
|
183
|
+
if "/slo" in path:
|
|
184
|
+
qs = _build_qs(query_params, {"filter", "query", "tags"})
|
|
185
|
+
if re.match(r"^/api/v\d+/slo/[^/]+", path):
|
|
186
|
+
return f"{base_url}/slo/{path.split('/')[-1]}{qs}"
|
|
187
|
+
return f"{base_url}/slo{qs}"
|
|
188
|
+
|
|
189
|
+
if "/events" in path:
|
|
190
|
+
return f"{base_url}/events{_build_qs(query_params, {'start', 'end'})}"
|
|
191
|
+
|
|
192
|
+
if "/incidents" in path:
|
|
193
|
+
qs = _build_qs(query_params, {"filter", "query", "status"})
|
|
194
|
+
if re.match(r"^/api/v\d+/incidents/[^/]+", path):
|
|
195
|
+
return f"{base_url}/incidents/{path.split('/')[-1]}{qs}"
|
|
196
|
+
return f"{base_url}/incidents{qs}"
|
|
197
|
+
|
|
198
|
+
if "/synthetics" in path:
|
|
199
|
+
qs = _build_qs(query_params, {"filter", "query", "tags", "status"})
|
|
200
|
+
if re.match(r"^/api/v\d+/synthetics/tests/[^/]+", path):
|
|
201
|
+
return f"{base_url}/synthetics/tests/{path.split('/')[-1]}{qs}"
|
|
202
|
+
return f"{base_url}/synthetics/tests{qs}"
|
|
203
|
+
|
|
204
|
+
if "/hosts" in path:
|
|
205
|
+
return f"{base_url}/infrastructure{_build_qs(query_params, {'filter', 'query', 'tags'})}"
|
|
206
|
+
|
|
207
|
+
if "/services" in path:
|
|
208
|
+
return f"{base_url}/apm/services{_build_qs(query_params, {'filter', 'query', 'tags'})}"
|
|
209
|
+
|
|
210
|
+
if "/metrics" in path or "/query" in path:
|
|
211
|
+
return f"{base_url}/metrics/explorer{_build_qs(query_params, {'from', 'to', 'query'})}"
|
|
212
|
+
|
|
213
|
+
return f"{base_url}/apm/home"
|
|
@@ -3,49 +3,186 @@
|
|
|
3
3
|
Tools to search and analyze distributed traces from Datadog APM.
|
|
4
4
|
|
|
5
5
|
### Available Tools:
|
|
6
|
-
- **fetch_datadog_traces** - List traces with filters (service, operation, duration)
|
|
7
|
-
- **fetch_datadog_trace_by_id** - Get detailed span hierarchy for a specific trace
|
|
8
6
|
- **fetch_datadog_spans** - Search spans with Datadog query syntax
|
|
7
|
+
- **aggregate_datadog_spans** - Aggregate span data into buckets and compute metrics
|
|
9
8
|
|
|
10
9
|
### Common Usage:
|
|
11
10
|
|
|
12
11
|
```python
|
|
13
|
-
#
|
|
14
|
-
|
|
12
|
+
# Search for errors using Datadog query syntax
|
|
13
|
+
fetch_datadog_spans(query="@http.status_code:500", limit=5)
|
|
14
|
+
fetch_datadog_spans(query="service:api status:error", limit=10)
|
|
15
|
+
```
|
|
15
16
|
|
|
16
|
-
|
|
17
|
-
fetch_datadog_trace_by_id(trace_id="6878d11e0000000064837efe7e97f5f8")
|
|
17
|
+
### Query Patterns:
|
|
18
18
|
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
fetch_datadog_spans(
|
|
19
|
+
```python
|
|
20
|
+
# Specific HTTP endpoint (any method)
|
|
21
|
+
fetch_datadog_spans(query="@http.route:/api/orders", limit=5)
|
|
22
|
+
|
|
23
|
+
# HTTP routes containing substring (wildcard search)
|
|
24
|
+
fetch_datadog_spans(query="@http.route:*payment*", limit=5)
|
|
25
|
+
|
|
26
|
+
# Broad search across all span types
|
|
27
|
+
fetch_datadog_spans(query="resource_name:*user*", limit=10)
|
|
28
|
+
|
|
29
|
+
# Errors by service with wildcard
|
|
30
|
+
fetch_datadog_spans(query="service:payment @http.status_code:5*", limit=5)
|
|
31
|
+
|
|
32
|
+
# Database queries with time range (last hour)
|
|
33
|
+
fetch_datadog_spans(
|
|
34
|
+
query="service:postgres @duration:>1000000000",
|
|
35
|
+
start_datetime="-3600", # 1 hour in seconds
|
|
36
|
+
limit=10
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
# Production errors
|
|
40
|
+
fetch_datadog_spans(query="env:production error:true", limit=5)
|
|
22
41
|
|
|
23
|
-
#
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
start_datetime="-
|
|
27
|
-
|
|
42
|
+
# Specific endpoint pattern with custom time range
|
|
43
|
+
fetch_datadog_spans(
|
|
44
|
+
query='@http.route:*/user/* @http.status_code:>=400',
|
|
45
|
+
start_datetime="-1800", # 30 minutes in seconds
|
|
46
|
+
limit=10
|
|
47
|
+
)
|
|
48
|
+
|
|
49
|
+
# Combining multiple conditions with wildcards
|
|
50
|
+
fetch_datadog_spans(
|
|
51
|
+
query='service:*api* @http.route:*/user/* @http.status_code:[400 TO 599]',
|
|
52
|
+
limit=10
|
|
28
53
|
)
|
|
29
54
|
```
|
|
30
55
|
|
|
31
|
-
###
|
|
56
|
+
### Aggregate Examples:
|
|
32
57
|
|
|
33
58
|
```python
|
|
34
|
-
#
|
|
35
|
-
|
|
59
|
+
# Count spans grouped by status code (last 15 minutes)
|
|
60
|
+
aggregate_datadog_spans(
|
|
61
|
+
query='resource_name:*api* @http.method:POST',
|
|
62
|
+
compute=[{"aggregation": "count", "type": "total"}],
|
|
63
|
+
group_by=[{"facet": "@http.status_code", "limit": 50}],
|
|
64
|
+
start_datetime="-900" # 15 minutes in seconds
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
# Get average duration by service (last hour)
|
|
68
|
+
aggregate_datadog_spans(
|
|
69
|
+
query='service:*backend* OR service:*api*',
|
|
70
|
+
compute=[{"aggregation": "avg", "metric": "@duration", "type": "total"}],
|
|
71
|
+
group_by=[{"facet": "service", "limit": 50}],
|
|
72
|
+
start_datetime="-3600" # 1 hour in seconds
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
# Get P95 latency timeseries by service
|
|
76
|
+
aggregate_datadog_spans(
|
|
77
|
+
query='@http.route:*/api/* @http.status_code:[200 TO 299]',
|
|
78
|
+
compute=[{
|
|
79
|
+
"aggregation": "pc95",
|
|
80
|
+
"metric": "@duration",
|
|
81
|
+
"type": "timeseries",
|
|
82
|
+
"interval": "5m"
|
|
83
|
+
}],
|
|
84
|
+
group_by=[{"facet": "service", "limit": 50}]
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
# Complex aggregation with histogram
|
|
88
|
+
aggregate_datadog_spans(
|
|
89
|
+
query='resource_name:*product* OR resource_name:*catalog*',
|
|
90
|
+
compute=[
|
|
91
|
+
{"aggregation": "avg", "metric": "@duration", "type": "total"},
|
|
92
|
+
{"aggregation": "count", "type": "total"}
|
|
93
|
+
],
|
|
94
|
+
group_by=[{
|
|
95
|
+
"facet": "@duration",
|
|
96
|
+
"histogram": {"interval": 100, "min": 0, "max": 1000},
|
|
97
|
+
"limit": 50
|
|
98
|
+
}]
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
# Error rate calculation by endpoint
|
|
102
|
+
aggregate_datadog_spans(
|
|
103
|
+
query='@http.route:* @http.status_code:[400 TO 599]',
|
|
104
|
+
compute=[{"aggregation": "count", "type": "total"}],
|
|
105
|
+
group_by=[
|
|
106
|
+
{"facet": "resource_name", "limit": 50},
|
|
107
|
+
{"facet": "@http.status_code", "limit": 50}
|
|
108
|
+
]
|
|
109
|
+
)
|
|
110
|
+
```
|
|
111
|
+
|
|
112
|
+
### Query Pattern Tips:
|
|
113
|
+
|
|
114
|
+
| Your Goal | Use This Pattern |
|
|
115
|
+
|-----------|------------------|
|
|
116
|
+
| Specific HTTP endpoint, any method | `@http.route:/api/users` |
|
|
117
|
+
| HTTP routes containing substring | `@http.route:*payment*` |
|
|
118
|
+
| Broad search across all span types | `resource_name:*user*` |
|
|
119
|
+
| Service name patterns | `service:*api*` or `service:payment-*` |
|
|
120
|
+
| Multiple wildcards | `@http.route:*/user/*/profile` |
|
|
121
|
+
| Error status codes | `@http.status_code:5*` or `@http.status_code:[400 TO 599]` |
|
|
122
|
+
|
|
123
|
+
### General Tips:
|
|
124
|
+
- Wildcards (*) can be used in most fields for flexible pattern matching
|
|
125
|
+
- For aggregations: use @-prefixed attributes (e.g., @duration, @http.status_code)
|
|
126
|
+
- Keep fetch_datadog_spans limit low (5-10) to avoid too much data
|
|
127
|
+
- aggregate_datadog_spans can handle higher limits (50+) for group_by facets
|
|
128
|
+
|
|
129
|
+
### CRITICAL: Cursor Usage Rules
|
|
130
|
+
**NEVER parallelize cursor-based calls or reuse cursor values!**
|
|
131
|
+
|
|
132
|
+
Cursors are stateful pointers - each one is single-use and represents a unique position in the data stream.
|
|
133
|
+
|
|
134
|
+
**WRONG (causes duplicate data):**
|
|
135
|
+
```
|
|
136
|
+
Batch 1 → cursor_A
|
|
137
|
+
Then call Batch 2, 3, 4 ALL with cursor_A in parallel ❌
|
|
138
|
+
Result: Duplicate data, incomplete results
|
|
139
|
+
```
|
|
36
140
|
|
|
37
|
-
|
|
38
|
-
|
|
141
|
+
**CORRECT (sequential pagination):**
|
|
142
|
+
```
|
|
143
|
+
Batch 1 → cursor_A
|
|
144
|
+
Wait for response → use cursor_A for Batch 2 → cursor_B
|
|
145
|
+
Wait for response → use cursor_B for Batch 3 → cursor_C
|
|
146
|
+
Result: Complete unique data ✅
|
|
147
|
+
```
|
|
148
|
+
|
|
149
|
+
**Key Rules:**
|
|
150
|
+
- Each response provides a NEW cursor for the NEXT request
|
|
151
|
+
- NEVER reuse the same cursor value multiple times
|
|
152
|
+
- NEVER make parallel calls with the same cursor
|
|
153
|
+
- Always wait for response before using the returned cursor
|
|
154
|
+
|
|
155
|
+
### Compact Mode Strategy:
|
|
156
|
+
|
|
157
|
+
The `compact` parameter reduces output size by returning only essential fields. Use this strategy:
|
|
39
158
|
|
|
40
|
-
|
|
41
|
-
|
|
159
|
+
1. **Initial exploration**: Use compact=true with higher limits (50-100) to get an overview
|
|
160
|
+
2. **Detailed investigation**: Use compact=false with lower limits (5-10) for specific spans
|
|
42
161
|
|
|
43
|
-
|
|
44
|
-
|
|
162
|
+
```python
|
|
163
|
+
# STEP 1: Initial search with compact mode to find patterns
|
|
164
|
+
fetch_datadog_spans(
|
|
165
|
+
query="service:api @http.status_code:5*",
|
|
166
|
+
compact=true,
|
|
167
|
+
limit=100 # Higher limit safe with compact mode
|
|
168
|
+
)
|
|
169
|
+
|
|
170
|
+
# STEP 2: Detailed investigation of specific issues
|
|
171
|
+
fetch_datadog_spans(
|
|
172
|
+
query="service:api @http.status_code:500 resource_name:*/user/*",
|
|
173
|
+
compact=false, # Full details for deep analysis
|
|
174
|
+
limit=10
|
|
175
|
+
)
|
|
45
176
|
```
|
|
46
177
|
|
|
47
|
-
|
|
48
|
-
-
|
|
49
|
-
-
|
|
50
|
-
-
|
|
51
|
-
-
|
|
178
|
+
**When to use compact=true:**
|
|
179
|
+
- Initial searches to identify patterns
|
|
180
|
+
- When you need to scan many spans for errors or performance issues
|
|
181
|
+
- When looking for specific span IDs or trace IDs
|
|
182
|
+
- When the full span details aren't needed yet
|
|
183
|
+
|
|
184
|
+
**When to use compact=false (default):**
|
|
185
|
+
- Investigating specific errors
|
|
186
|
+
- Analyzing request/response headers
|
|
187
|
+
- Examining user agent details
|
|
188
|
+
- Debugging authentication issues or HTTP details
|