holmesgpt 0.13.1__py3-none-any.whl → 0.13.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- holmes/__init__.py +1 -1
- holmes/common/env_vars.py +7 -0
- holmes/config.py +3 -1
- holmes/core/conversations.py +0 -11
- holmes/core/investigation.py +0 -6
- holmes/core/llm.py +60 -1
- holmes/core/prompt.py +0 -2
- holmes/core/supabase_dal.py +2 -2
- holmes/core/todo_tasks_formatter.py +51 -0
- holmes/core/tool_calling_llm.py +166 -91
- holmes/core/tools.py +20 -4
- holmes/interactive.py +63 -2
- holmes/main.py +0 -1
- holmes/plugins/prompts/_general_instructions.jinja2 +3 -1
- holmes/plugins/prompts/investigation_procedure.jinja2 +3 -13
- holmes/plugins/toolsets/__init__.py +5 -1
- holmes/plugins/toolsets/argocd.yaml +1 -1
- holmes/plugins/toolsets/atlas_mongodb/mongodb_atlas.py +18 -6
- holmes/plugins/toolsets/aws.yaml +9 -5
- holmes/plugins/toolsets/azure_sql/tools/analyze_connection_failures.py +3 -1
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_connections.py +3 -1
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_health_status.py +3 -1
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_performance.py +3 -1
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_storage.py +3 -1
- holmes/plugins/toolsets/azure_sql/tools/get_active_alerts.py +3 -1
- holmes/plugins/toolsets/azure_sql/tools/get_slow_queries.py +3 -1
- holmes/plugins/toolsets/azure_sql/tools/get_top_cpu_queries.py +3 -1
- holmes/plugins/toolsets/azure_sql/tools/get_top_data_io_queries.py +3 -1
- holmes/plugins/toolsets/azure_sql/tools/get_top_log_io_queries.py +3 -1
- holmes/plugins/toolsets/bash/bash_toolset.py +31 -20
- holmes/plugins/toolsets/confluence.yaml +1 -1
- holmes/plugins/toolsets/coralogix/api.py +3 -1
- holmes/plugins/toolsets/coralogix/toolset_coralogix_logs.py +4 -4
- holmes/plugins/toolsets/coralogix/utils.py +41 -14
- holmes/plugins/toolsets/datadog/datadog_api.py +45 -2
- holmes/plugins/toolsets/datadog/datadog_general_instructions.jinja2 +208 -0
- holmes/plugins/toolsets/datadog/datadog_logs_instructions.jinja2 +43 -0
- holmes/plugins/toolsets/datadog/datadog_metrics_instructions.jinja2 +12 -9
- holmes/plugins/toolsets/datadog/toolset_datadog_general.py +722 -0
- holmes/plugins/toolsets/datadog/toolset_datadog_logs.py +17 -6
- holmes/plugins/toolsets/datadog/toolset_datadog_metrics.py +15 -7
- holmes/plugins/toolsets/datadog/toolset_datadog_rds.py +6 -2
- holmes/plugins/toolsets/datadog/toolset_datadog_traces.py +9 -3
- holmes/plugins/toolsets/docker.yaml +1 -1
- holmes/plugins/toolsets/git.py +15 -5
- holmes/plugins/toolsets/grafana/toolset_grafana.py +25 -4
- holmes/plugins/toolsets/grafana/toolset_grafana_loki.py +4 -4
- holmes/plugins/toolsets/grafana/toolset_grafana_tempo.jinja2 +5 -3
- holmes/plugins/toolsets/grafana/toolset_grafana_tempo.py +299 -32
- holmes/plugins/toolsets/helm.yaml +1 -1
- holmes/plugins/toolsets/internet/internet.py +4 -2
- holmes/plugins/toolsets/internet/notion.py +4 -2
- holmes/plugins/toolsets/investigator/core_investigation.py +5 -17
- holmes/plugins/toolsets/investigator/investigator_instructions.jinja2 +1 -5
- holmes/plugins/toolsets/kafka.py +19 -7
- holmes/plugins/toolsets/kubernetes.yaml +5 -5
- holmes/plugins/toolsets/kubernetes_logs.py +4 -4
- holmes/plugins/toolsets/kubernetes_logs.yaml +1 -1
- holmes/plugins/toolsets/logging_utils/logging_api.py +15 -2
- holmes/plugins/toolsets/mcp/toolset_mcp.py +3 -1
- holmes/plugins/toolsets/newrelic.py +8 -4
- holmes/plugins/toolsets/opensearch/opensearch.py +13 -5
- holmes/plugins/toolsets/opensearch/opensearch_logs.py +4 -4
- holmes/plugins/toolsets/opensearch/opensearch_traces.py +9 -6
- holmes/plugins/toolsets/prometheus/prometheus.py +193 -82
- holmes/plugins/toolsets/rabbitmq/toolset_rabbitmq.py +7 -3
- holmes/plugins/toolsets/robusta/robusta.py +10 -4
- holmes/plugins/toolsets/runbook/runbook_fetcher.py +4 -2
- holmes/plugins/toolsets/servicenow/servicenow.py +9 -3
- holmes/plugins/toolsets/slab.yaml +1 -1
- {holmesgpt-0.13.1.dist-info → holmesgpt-0.13.2.dist-info}/METADATA +3 -2
- {holmesgpt-0.13.1.dist-info → holmesgpt-0.13.2.dist-info}/RECORD +75 -72
- holmes/core/todo_manager.py +0 -88
- {holmesgpt-0.13.1.dist-info → holmesgpt-0.13.2.dist-info}/LICENSE.txt +0 -0
- {holmesgpt-0.13.1.dist-info → holmesgpt-0.13.2.dist-info}/WHEEL +0 -0
- {holmesgpt-0.13.1.dist-info → holmesgpt-0.13.2.dist-info}/entry_points.txt +0 -0
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import os
|
|
1
2
|
from enum import Enum
|
|
2
3
|
import json
|
|
3
4
|
import logging
|
|
@@ -141,22 +142,25 @@ class DatadogLogsToolset(BasePodLoggingToolset):
|
|
|
141
142
|
|
|
142
143
|
@property
|
|
143
144
|
def supported_capabilities(self) -> Set[LoggingCapability]:
|
|
144
|
-
"""Datadog logs API
|
|
145
|
-
return
|
|
145
|
+
"""Datadog logs API supports historical data and substring matching"""
|
|
146
|
+
return {
|
|
147
|
+
LoggingCapability.HISTORICAL_DATA
|
|
148
|
+
} # No regex support, no exclude filter, but supports historical data
|
|
146
149
|
|
|
147
150
|
def __init__(self):
|
|
148
151
|
super().__init__(
|
|
149
152
|
name="datadog/logs",
|
|
150
|
-
description="Toolset for
|
|
153
|
+
description="Toolset for fetching logs from Datadog, including historical data for pods no longer in the cluster",
|
|
151
154
|
docs_url="https://docs.datadoghq.com/api/latest/logs/",
|
|
152
155
|
icon_url="https://imgix.datadoghq.com//img/about/presskit/DDlogo.jpg",
|
|
153
156
|
prerequisites=[CallablePrerequisite(callable=self.prerequisites_callable)],
|
|
154
|
-
tools=[
|
|
155
|
-
PodLoggingTool(self),
|
|
156
|
-
],
|
|
157
|
+
tools=[], # Initialize with empty tools first
|
|
157
158
|
experimental=True,
|
|
158
159
|
tags=[ToolsetTag.CORE],
|
|
159
160
|
)
|
|
161
|
+
# Now that parent is initialized and self.name exists, create the tool
|
|
162
|
+
self.tools = [PodLoggingTool(self)]
|
|
163
|
+
self._reload_instructions()
|
|
160
164
|
|
|
161
165
|
def logger_name(self) -> str:
|
|
162
166
|
return "DataDog"
|
|
@@ -272,3 +276,10 @@ class DatadogLogsToolset(BasePodLoggingToolset):
|
|
|
272
276
|
"dd_app_key": "your-datadog-application-key",
|
|
273
277
|
"site_api_url": "https://api.datadoghq.com",
|
|
274
278
|
}
|
|
279
|
+
|
|
280
|
+
def _reload_instructions(self):
|
|
281
|
+
"""Load Datadog logs specific troubleshooting instructions."""
|
|
282
|
+
template_file_path = os.path.abspath(
|
|
283
|
+
os.path.join(os.path.dirname(__file__), "datadog_logs_instructions.jinja2")
|
|
284
|
+
)
|
|
285
|
+
self._load_llm_instructions(jinja_template=f"file://{template_file_path}")
|
|
@@ -54,7 +54,7 @@ class ListActiveMetrics(BaseDatadogMetricsTool):
|
|
|
54
54
|
def __init__(self, toolset: "DatadogMetricsToolset"):
|
|
55
55
|
super().__init__(
|
|
56
56
|
name="list_active_datadog_metrics",
|
|
57
|
-
description=f"List active metrics from the last {ACTIVE_METRICS_DEFAULT_LOOK_BACK_HOURS} hours. This includes metrics that have actively reported data points.",
|
|
57
|
+
description=f"List active metrics from Datadog for the last {ACTIVE_METRICS_DEFAULT_LOOK_BACK_HOURS} hours. This includes metrics that have actively reported data points, including from pods no longer in the cluster.",
|
|
58
58
|
parameters={
|
|
59
59
|
"from_time": ToolParameter(
|
|
60
60
|
description=f"Start time for listing metrics. Can be an RFC3339 formatted datetime (e.g. '2023-03-01T10:30:00Z') or a negative integer for relative seconds from now (e.g. -86400 for 24 hours ago). Defaults to {ACTIVE_METRICS_DEFAULT_LOOK_BACK_HOURS} hours ago",
|
|
@@ -75,7 +75,9 @@ class ListActiveMetrics(BaseDatadogMetricsTool):
|
|
|
75
75
|
toolset=toolset,
|
|
76
76
|
)
|
|
77
77
|
|
|
78
|
-
def _invoke(
|
|
78
|
+
def _invoke(
|
|
79
|
+
self, params: dict, user_approved: bool = False
|
|
80
|
+
) -> StructuredToolResult:
|
|
79
81
|
if not self.toolset.dd_config:
|
|
80
82
|
return StructuredToolResult(
|
|
81
83
|
status=ToolResultStatus.ERROR,
|
|
@@ -182,7 +184,7 @@ class QueryMetrics(BaseDatadogMetricsTool):
|
|
|
182
184
|
def __init__(self, toolset: "DatadogMetricsToolset"):
|
|
183
185
|
super().__init__(
|
|
184
186
|
name="query_datadog_metrics",
|
|
185
|
-
description="Query timeseries data for a specific metric",
|
|
187
|
+
description="Query timeseries data from Datadog for a specific metric, including historical data for pods no longer in the cluster",
|
|
186
188
|
parameters={
|
|
187
189
|
"query": ToolParameter(
|
|
188
190
|
description="The metric query string (e.g., 'system.cpu.user{host:myhost}')",
|
|
@@ -215,7 +217,9 @@ class QueryMetrics(BaseDatadogMetricsTool):
|
|
|
215
217
|
toolset=toolset,
|
|
216
218
|
)
|
|
217
219
|
|
|
218
|
-
def _invoke(
|
|
220
|
+
def _invoke(
|
|
221
|
+
self, params: dict, user_approved: bool = False
|
|
222
|
+
) -> StructuredToolResult:
|
|
219
223
|
if not self.toolset.dd_config:
|
|
220
224
|
return StructuredToolResult(
|
|
221
225
|
status=ToolResultStatus.ERROR,
|
|
@@ -371,7 +375,9 @@ class QueryMetricsMetadata(BaseDatadogMetricsTool):
|
|
|
371
375
|
toolset=toolset,
|
|
372
376
|
)
|
|
373
377
|
|
|
374
|
-
def _invoke(
|
|
378
|
+
def _invoke(
|
|
379
|
+
self, params: dict, user_approved: bool = False
|
|
380
|
+
) -> StructuredToolResult:
|
|
375
381
|
if not self.toolset.dd_config:
|
|
376
382
|
return StructuredToolResult(
|
|
377
383
|
status=ToolResultStatus.ERROR,
|
|
@@ -485,7 +491,9 @@ class ListMetricTags(BaseDatadogMetricsTool):
|
|
|
485
491
|
toolset=toolset,
|
|
486
492
|
)
|
|
487
493
|
|
|
488
|
-
def _invoke(
|
|
494
|
+
def _invoke(
|
|
495
|
+
self, params: dict, user_approved: bool = False
|
|
496
|
+
) -> StructuredToolResult:
|
|
489
497
|
if not self.toolset.dd_config:
|
|
490
498
|
return StructuredToolResult(
|
|
491
499
|
status=ToolResultStatus.ERROR,
|
|
@@ -562,7 +570,7 @@ class DatadogMetricsToolset(Toolset):
|
|
|
562
570
|
def __init__(self):
|
|
563
571
|
super().__init__(
|
|
564
572
|
name="datadog/metrics",
|
|
565
|
-
description="Toolset for
|
|
573
|
+
description="Toolset for fetching metrics and metadata from Datadog, including historical data for pods no longer in the cluster",
|
|
566
574
|
docs_url="https://docs.datadoghq.com/api/latest/metrics/",
|
|
567
575
|
icon_url="https://imgix.datadoghq.com//img/about/presskit/DDlogo.jpg",
|
|
568
576
|
prerequisites=[CallablePrerequisite(callable=self.prerequisites_callable)],
|
|
@@ -92,7 +92,9 @@ class GenerateRDSPerformanceReport(BaseDatadogRDSTool):
|
|
|
92
92
|
toolset=toolset,
|
|
93
93
|
)
|
|
94
94
|
|
|
95
|
-
def _invoke(
|
|
95
|
+
def _invoke(
|
|
96
|
+
self, params: dict, user_approved: bool = False
|
|
97
|
+
) -> StructuredToolResult:
|
|
96
98
|
if not self.toolset.dd_config:
|
|
97
99
|
return StructuredToolResult(
|
|
98
100
|
status=ToolResultStatus.ERROR,
|
|
@@ -390,7 +392,9 @@ class GetTopWorstPerformingRDSInstances(BaseDatadogRDSTool):
|
|
|
390
392
|
toolset=toolset,
|
|
391
393
|
)
|
|
392
394
|
|
|
393
|
-
def _invoke(
|
|
395
|
+
def _invoke(
|
|
396
|
+
self, params: dict, user_approved: bool = False
|
|
397
|
+
) -> StructuredToolResult:
|
|
394
398
|
if not self.toolset.dd_config:
|
|
395
399
|
return StructuredToolResult(
|
|
396
400
|
status=ToolResultStatus.ERROR,
|
|
@@ -211,7 +211,9 @@ class FetchDatadogTracesList(BaseDatadogTracesTool):
|
|
|
211
211
|
filter_str = ", ".join(filters) if filters else "all"
|
|
212
212
|
return f"{toolset_name_for_one_liner(self.toolset.name)}: Fetch Traces ({filter_str})"
|
|
213
213
|
|
|
214
|
-
def _invoke(
|
|
214
|
+
def _invoke(
|
|
215
|
+
self, params: dict, user_approved: bool = False
|
|
216
|
+
) -> StructuredToolResult:
|
|
215
217
|
"""Execute the tool to fetch traces."""
|
|
216
218
|
if not self.toolset.dd_config:
|
|
217
219
|
return StructuredToolResult(
|
|
@@ -375,7 +377,9 @@ class FetchDatadogTraceById(BaseDatadogTracesTool):
|
|
|
375
377
|
trace_id = params.get("trace_id", "unknown")
|
|
376
378
|
return f"{toolset_name_for_one_liner(self.toolset.name)}: Fetch Trace Details ({trace_id})"
|
|
377
379
|
|
|
378
|
-
def _invoke(
|
|
380
|
+
def _invoke(
|
|
381
|
+
self, params: dict, user_approved: bool = False
|
|
382
|
+
) -> StructuredToolResult:
|
|
379
383
|
"""Execute the tool to fetch trace details."""
|
|
380
384
|
if not self.toolset.dd_config:
|
|
381
385
|
return StructuredToolResult(
|
|
@@ -556,7 +560,9 @@ class FetchDatadogSpansByFilter(BaseDatadogTracesTool):
|
|
|
556
560
|
filter_str = ", ".join(filters) if filters else "all"
|
|
557
561
|
return f"{toolset_name_for_one_liner(self.toolset.name)}: Search Spans ({filter_str})"
|
|
558
562
|
|
|
559
|
-
def _invoke(
|
|
563
|
+
def _invoke(
|
|
564
|
+
self, params: dict, user_approved: bool = False
|
|
565
|
+
) -> StructuredToolResult:
|
|
560
566
|
"""Execute the tool to search spans."""
|
|
561
567
|
if not self.toolset.dd_config:
|
|
562
568
|
return StructuredToolResult(
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
toolsets:
|
|
2
2
|
docker/core:
|
|
3
3
|
description: "Read access to Docker resources"
|
|
4
|
-
docs_url: "https://
|
|
4
|
+
docs_url: "https://holmesgpt.dev/data-sources/builtin-toolsets/docker/"
|
|
5
5
|
icon_url: "https://platform.robusta.dev/demos/docker.svg"
|
|
6
6
|
tags:
|
|
7
7
|
- cli
|
holmes/plugins/toolsets/git.py
CHANGED
|
@@ -249,7 +249,9 @@ class GitReadFileWithLineNumbers(Tool):
|
|
|
249
249
|
toolset=toolset, # type: ignore
|
|
250
250
|
)
|
|
251
251
|
|
|
252
|
-
def _invoke(
|
|
252
|
+
def _invoke(
|
|
253
|
+
self, params: dict, user_approved: bool = False
|
|
254
|
+
) -> StructuredToolResult:
|
|
253
255
|
filepath = params["filepath"]
|
|
254
256
|
try:
|
|
255
257
|
headers = {"Authorization": f"token {self.toolset.git_credentials}"}
|
|
@@ -293,7 +295,9 @@ class GitListFiles(Tool):
|
|
|
293
295
|
toolset=toolset, # type: ignore
|
|
294
296
|
)
|
|
295
297
|
|
|
296
|
-
def _invoke(
|
|
298
|
+
def _invoke(
|
|
299
|
+
self, params: dict, user_approved: bool = False
|
|
300
|
+
) -> StructuredToolResult:
|
|
297
301
|
try:
|
|
298
302
|
headers = {"Authorization": f"token {self.toolset.git_credentials}"}
|
|
299
303
|
url = f"https://api.github.com/repos/{self.toolset.git_repo}/git/trees/{self.toolset.git_branch}?recursive=1"
|
|
@@ -334,7 +338,9 @@ class GitListOpenPRs(Tool):
|
|
|
334
338
|
toolset=toolset, # type: ignore
|
|
335
339
|
)
|
|
336
340
|
|
|
337
|
-
def _invoke(
|
|
341
|
+
def _invoke(
|
|
342
|
+
self, params: dict, user_approved: bool = False
|
|
343
|
+
) -> StructuredToolResult:
|
|
338
344
|
try:
|
|
339
345
|
prs = self.toolset.list_open_prs()
|
|
340
346
|
formatted = [
|
|
@@ -402,7 +408,9 @@ class GitExecuteChanges(Tool):
|
|
|
402
408
|
toolset=toolset, # type: ignore
|
|
403
409
|
)
|
|
404
410
|
|
|
405
|
-
def _invoke(
|
|
411
|
+
def _invoke(
|
|
412
|
+
self, params: dict, user_approved: bool = False
|
|
413
|
+
) -> StructuredToolResult:
|
|
406
414
|
def error(msg: str) -> StructuredToolResult:
|
|
407
415
|
return StructuredToolResult(
|
|
408
416
|
status=ToolResultStatus.ERROR,
|
|
@@ -620,7 +628,9 @@ class GitUpdatePR(Tool):
|
|
|
620
628
|
toolset=toolset, # type: ignore
|
|
621
629
|
)
|
|
622
630
|
|
|
623
|
-
def _invoke(
|
|
631
|
+
def _invoke(
|
|
632
|
+
self, params: dict, user_approved: bool = False
|
|
633
|
+
) -> StructuredToolResult:
|
|
624
634
|
try:
|
|
625
635
|
line = params["line"]
|
|
626
636
|
filename = params["filename"]
|
|
@@ -1,6 +1,11 @@
|
|
|
1
1
|
from typing import Dict, List
|
|
2
2
|
from urllib.parse import urlencode, urljoin
|
|
3
|
-
from holmes.core.tools import
|
|
3
|
+
from holmes.core.tools import (
|
|
4
|
+
StructuredToolResult,
|
|
5
|
+
Tool,
|
|
6
|
+
ToolParameter,
|
|
7
|
+
ToolResultStatus,
|
|
8
|
+
)
|
|
4
9
|
from holmes.plugins.toolsets.grafana.base_grafana_toolset import BaseGrafanaToolset
|
|
5
10
|
import requests # type: ignore
|
|
6
11
|
import logging
|
|
@@ -38,7 +43,9 @@ class ListAndBuildGrafanaDashboardURLs(Tool):
|
|
|
38
43
|
)
|
|
39
44
|
self._toolset = toolset
|
|
40
45
|
|
|
41
|
-
def _invoke(
|
|
46
|
+
def _invoke(
|
|
47
|
+
self, params: dict, user_approved: bool = False
|
|
48
|
+
) -> StructuredToolResult:
|
|
42
49
|
url = urljoin(
|
|
43
50
|
self._toolset._grafana_config.url, "/api/search?query=&type=dash-db"
|
|
44
51
|
)
|
|
@@ -82,10 +89,24 @@ class ListAndBuildGrafanaDashboardURLs(Tool):
|
|
|
82
89
|
f"Title: {dash['title']}\nURL: {dashboard_url}\n"
|
|
83
90
|
)
|
|
84
91
|
|
|
85
|
-
return
|
|
92
|
+
return StructuredToolResult(
|
|
93
|
+
status=ToolResultStatus.SUCCESS
|
|
94
|
+
if formatted_dashboards
|
|
95
|
+
else ToolResultStatus.NO_DATA,
|
|
96
|
+
data="\n".join(formatted_dashboards)
|
|
97
|
+
if formatted_dashboards
|
|
98
|
+
else "No dashboards found.",
|
|
99
|
+
url=url,
|
|
100
|
+
params=params,
|
|
101
|
+
)
|
|
86
102
|
except requests.RequestException as e:
|
|
87
103
|
logging.error(f"Error fetching dashboards: {str(e)}")
|
|
88
|
-
return
|
|
104
|
+
return StructuredToolResult(
|
|
105
|
+
status=ToolResultStatus.ERROR,
|
|
106
|
+
error=f"Error fetching dashboards: {str(e)}",
|
|
107
|
+
url=url,
|
|
108
|
+
params=params,
|
|
109
|
+
)
|
|
89
110
|
|
|
90
111
|
def get_parameterized_one_liner(self, params: Dict) -> str:
|
|
91
112
|
return (
|
|
@@ -45,12 +45,12 @@ class GrafanaLokiToolset(BasePodLoggingToolset):
|
|
|
45
45
|
name="grafana/loki",
|
|
46
46
|
description="Fetches kubernetes pods logs from Loki",
|
|
47
47
|
icon_url="https://grafana.com/media/docs/loki/logo-grafana-loki.png",
|
|
48
|
-
docs_url="https://
|
|
48
|
+
docs_url="https://holmesgpt.dev/data-sources/builtin-toolsets/grafanaloki/",
|
|
49
49
|
prerequisites=[CallablePrerequisite(callable=self.prerequisites_callable)],
|
|
50
|
-
tools=[
|
|
51
|
-
PodLoggingTool(self),
|
|
52
|
-
],
|
|
50
|
+
tools=[], # Initialize with empty tools first
|
|
53
51
|
)
|
|
52
|
+
# Now that parent is initialized and self.name exists, create the tool
|
|
53
|
+
self.tools = [PodLoggingTool(self)]
|
|
54
54
|
|
|
55
55
|
def prerequisites_callable(self, config: dict[str, Any]) -> tuple[bool, str]:
|
|
56
56
|
if not config:
|
|
@@ -1,10 +1,12 @@
|
|
|
1
1
|
Use Tempo when investigating latency or performance issues. Tempo provides traces information for application running on the cluster.
|
|
2
2
|
Assume every application provides tempo traces.
|
|
3
3
|
1. Start by identifying an initial filter to use. This can be a pod name, a deployment name or a service name
|
|
4
|
-
2.
|
|
4
|
+
2. Call fetch_tempo_traces_comparative_sample first when investigating performance issues via traces. This tool provides comprehensive analysis for identifying patterns. For other issues not related to performance, you can start with fetch_tempo_traces.
|
|
5
|
+
3. Use `fetch_tempo_traces` setting the appropriate query params
|
|
5
6
|
- Use the min_duration filter to ensure you get traces that trigger the alert when you are investigating a performance issue
|
|
6
7
|
- If possible, use start and end date to narrow down your search.
|
|
7
8
|
- Use fetch_finding_by_id if you are provided with a finding/alert id. It will contain details about when the alert was triggered
|
|
8
9
|
- Use at least one of the following argument to ensure you get relevant traces: `service_name`, `pod_name` or `deployment_name`.
|
|
9
|
-
|
|
10
|
-
|
|
10
|
+
4. When you have a specific trace ID to investigate, use `fetch_tempo_trace_by_id` to get detailed information about that trace.
|
|
11
|
+
5. Look at the duration of each span in any single trace and deduce any issues.
|
|
12
|
+
6. ALWAYS fetch the logs for a pod once you identify a span that is taking a long time. There may be an explanation for the slowness in the logs.
|