holmesgpt 0.13.2__py3-none-any.whl → 0.18.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (188) hide show
  1. holmes/__init__.py +3 -5
  2. holmes/clients/robusta_client.py +20 -6
  3. holmes/common/env_vars.py +58 -3
  4. holmes/common/openshift.py +1 -1
  5. holmes/config.py +123 -148
  6. holmes/core/conversations.py +71 -15
  7. holmes/core/feedback.py +191 -0
  8. holmes/core/investigation.py +31 -39
  9. holmes/core/investigation_structured_output.py +3 -3
  10. holmes/core/issue.py +1 -1
  11. holmes/core/llm.py +508 -88
  12. holmes/core/models.py +108 -4
  13. holmes/core/openai_formatting.py +14 -1
  14. holmes/core/prompt.py +48 -3
  15. holmes/core/runbooks.py +1 -0
  16. holmes/core/safeguards.py +8 -6
  17. holmes/core/supabase_dal.py +295 -100
  18. holmes/core/tool_calling_llm.py +489 -428
  19. holmes/core/tools.py +325 -56
  20. holmes/core/tools_utils/token_counting.py +21 -0
  21. holmes/core/tools_utils/tool_context_window_limiter.py +40 -0
  22. holmes/core/tools_utils/tool_executor.py +0 -13
  23. holmes/core/tools_utils/toolset_utils.py +1 -0
  24. holmes/core/toolset_manager.py +191 -5
  25. holmes/core/tracing.py +19 -3
  26. holmes/core/transformers/__init__.py +23 -0
  27. holmes/core/transformers/base.py +63 -0
  28. holmes/core/transformers/llm_summarize.py +175 -0
  29. holmes/core/transformers/registry.py +123 -0
  30. holmes/core/transformers/transformer.py +32 -0
  31. holmes/core/truncation/compaction.py +94 -0
  32. holmes/core/truncation/dal_truncation_utils.py +23 -0
  33. holmes/core/truncation/input_context_window_limiter.py +219 -0
  34. holmes/interactive.py +228 -31
  35. holmes/main.py +23 -40
  36. holmes/plugins/interfaces.py +2 -1
  37. holmes/plugins/prompts/__init__.py +2 -1
  38. holmes/plugins/prompts/_fetch_logs.jinja2 +31 -6
  39. holmes/plugins/prompts/_general_instructions.jinja2 +1 -2
  40. holmes/plugins/prompts/_runbook_instructions.jinja2 +24 -12
  41. holmes/plugins/prompts/base_user_prompt.jinja2 +7 -0
  42. holmes/plugins/prompts/conversation_history_compaction.jinja2 +89 -0
  43. holmes/plugins/prompts/generic_ask.jinja2 +0 -4
  44. holmes/plugins/prompts/generic_ask_conversation.jinja2 +0 -1
  45. holmes/plugins/prompts/generic_ask_for_issue_conversation.jinja2 +0 -1
  46. holmes/plugins/prompts/generic_investigation.jinja2 +0 -1
  47. holmes/plugins/prompts/investigation_procedure.jinja2 +50 -1
  48. holmes/plugins/prompts/kubernetes_workload_ask.jinja2 +0 -1
  49. holmes/plugins/prompts/kubernetes_workload_chat.jinja2 +0 -1
  50. holmes/plugins/runbooks/__init__.py +145 -17
  51. holmes/plugins/runbooks/catalog.json +2 -0
  52. holmes/plugins/sources/github/__init__.py +4 -2
  53. holmes/plugins/sources/prometheus/models.py +1 -0
  54. holmes/plugins/toolsets/__init__.py +44 -27
  55. holmes/plugins/toolsets/aks-node-health.yaml +46 -0
  56. holmes/plugins/toolsets/aks.yaml +64 -0
  57. holmes/plugins/toolsets/atlas_mongodb/mongodb_atlas.py +38 -47
  58. holmes/plugins/toolsets/azure_sql/apis/alert_monitoring_api.py +3 -2
  59. holmes/plugins/toolsets/azure_sql/apis/azure_sql_api.py +2 -1
  60. holmes/plugins/toolsets/azure_sql/apis/connection_failure_api.py +3 -2
  61. holmes/plugins/toolsets/azure_sql/apis/connection_monitoring_api.py +3 -1
  62. holmes/plugins/toolsets/azure_sql/apis/storage_analysis_api.py +3 -1
  63. holmes/plugins/toolsets/azure_sql/azure_sql_toolset.py +12 -13
  64. holmes/plugins/toolsets/azure_sql/tools/analyze_connection_failures.py +15 -12
  65. holmes/plugins/toolsets/azure_sql/tools/analyze_database_connections.py +15 -12
  66. holmes/plugins/toolsets/azure_sql/tools/analyze_database_health_status.py +11 -11
  67. holmes/plugins/toolsets/azure_sql/tools/analyze_database_performance.py +11 -9
  68. holmes/plugins/toolsets/azure_sql/tools/analyze_database_storage.py +15 -12
  69. holmes/plugins/toolsets/azure_sql/tools/get_active_alerts.py +15 -15
  70. holmes/plugins/toolsets/azure_sql/tools/get_slow_queries.py +11 -8
  71. holmes/plugins/toolsets/azure_sql/tools/get_top_cpu_queries.py +11 -8
  72. holmes/plugins/toolsets/azure_sql/tools/get_top_data_io_queries.py +11 -8
  73. holmes/plugins/toolsets/azure_sql/tools/get_top_log_io_queries.py +11 -8
  74. holmes/plugins/toolsets/azure_sql/utils.py +0 -32
  75. holmes/plugins/toolsets/bash/argocd/__init__.py +3 -3
  76. holmes/plugins/toolsets/bash/aws/__init__.py +4 -4
  77. holmes/plugins/toolsets/bash/azure/__init__.py +4 -4
  78. holmes/plugins/toolsets/bash/bash_toolset.py +11 -15
  79. holmes/plugins/toolsets/bash/common/bash.py +23 -13
  80. holmes/plugins/toolsets/bash/common/bash_command.py +1 -1
  81. holmes/plugins/toolsets/bash/common/stringify.py +1 -1
  82. holmes/plugins/toolsets/bash/kubectl/__init__.py +2 -1
  83. holmes/plugins/toolsets/bash/kubectl/constants.py +0 -1
  84. holmes/plugins/toolsets/bash/kubectl/kubectl_get.py +3 -4
  85. holmes/plugins/toolsets/bash/parse_command.py +12 -13
  86. holmes/plugins/toolsets/cilium.yaml +284 -0
  87. holmes/plugins/toolsets/connectivity_check.py +124 -0
  88. holmes/plugins/toolsets/coralogix/api.py +132 -119
  89. holmes/plugins/toolsets/coralogix/coralogix.jinja2 +14 -0
  90. holmes/plugins/toolsets/coralogix/toolset_coralogix.py +219 -0
  91. holmes/plugins/toolsets/coralogix/utils.py +15 -79
  92. holmes/plugins/toolsets/datadog/datadog_api.py +525 -26
  93. holmes/plugins/toolsets/datadog/datadog_logs_instructions.jinja2 +55 -11
  94. holmes/plugins/toolsets/datadog/datadog_metrics_instructions.jinja2 +3 -3
  95. holmes/plugins/toolsets/datadog/datadog_models.py +59 -0
  96. holmes/plugins/toolsets/datadog/datadog_url_utils.py +213 -0
  97. holmes/plugins/toolsets/datadog/instructions_datadog_traces.jinja2 +165 -28
  98. holmes/plugins/toolsets/datadog/toolset_datadog_general.py +417 -241
  99. holmes/plugins/toolsets/datadog/toolset_datadog_logs.py +234 -214
  100. holmes/plugins/toolsets/datadog/toolset_datadog_metrics.py +167 -79
  101. holmes/plugins/toolsets/datadog/toolset_datadog_traces.py +374 -363
  102. holmes/plugins/toolsets/elasticsearch/__init__.py +6 -0
  103. holmes/plugins/toolsets/elasticsearch/elasticsearch.py +834 -0
  104. holmes/plugins/toolsets/elasticsearch/opensearch_ppl_query_docs.jinja2 +1616 -0
  105. holmes/plugins/toolsets/elasticsearch/opensearch_query_assist.py +78 -0
  106. holmes/plugins/toolsets/elasticsearch/opensearch_query_assist_instructions.jinja2 +223 -0
  107. holmes/plugins/toolsets/git.py +54 -50
  108. holmes/plugins/toolsets/grafana/base_grafana_toolset.py +16 -4
  109. holmes/plugins/toolsets/grafana/common.py +13 -29
  110. holmes/plugins/toolsets/grafana/grafana_tempo_api.py +455 -0
  111. holmes/plugins/toolsets/grafana/loki/instructions.jinja2 +25 -0
  112. holmes/plugins/toolsets/grafana/loki/toolset_grafana_loki.py +191 -0
  113. holmes/plugins/toolsets/grafana/loki_api.py +4 -0
  114. holmes/plugins/toolsets/grafana/toolset_grafana.py +293 -89
  115. holmes/plugins/toolsets/grafana/toolset_grafana_dashboard.jinja2 +49 -0
  116. holmes/plugins/toolsets/grafana/toolset_grafana_tempo.jinja2 +246 -11
  117. holmes/plugins/toolsets/grafana/toolset_grafana_tempo.py +820 -292
  118. holmes/plugins/toolsets/grafana/trace_parser.py +4 -3
  119. holmes/plugins/toolsets/internet/internet.py +15 -16
  120. holmes/plugins/toolsets/internet/notion.py +9 -11
  121. holmes/plugins/toolsets/investigator/core_investigation.py +44 -36
  122. holmes/plugins/toolsets/investigator/model.py +3 -1
  123. holmes/plugins/toolsets/json_filter_mixin.py +134 -0
  124. holmes/plugins/toolsets/kafka.py +36 -42
  125. holmes/plugins/toolsets/kubernetes.yaml +317 -113
  126. holmes/plugins/toolsets/kubernetes_logs.py +9 -9
  127. holmes/plugins/toolsets/kubernetes_logs.yaml +32 -0
  128. holmes/plugins/toolsets/logging_utils/logging_api.py +94 -8
  129. holmes/plugins/toolsets/mcp/toolset_mcp.py +218 -64
  130. holmes/plugins/toolsets/newrelic/new_relic_api.py +165 -0
  131. holmes/plugins/toolsets/newrelic/newrelic.jinja2 +65 -0
  132. holmes/plugins/toolsets/newrelic/newrelic.py +320 -0
  133. holmes/plugins/toolsets/openshift.yaml +283 -0
  134. holmes/plugins/toolsets/prometheus/prometheus.py +1202 -421
  135. holmes/plugins/toolsets/prometheus/prometheus_instructions.jinja2 +54 -5
  136. holmes/plugins/toolsets/prometheus/utils.py +28 -0
  137. holmes/plugins/toolsets/rabbitmq/api.py +23 -4
  138. holmes/plugins/toolsets/rabbitmq/toolset_rabbitmq.py +13 -14
  139. holmes/plugins/toolsets/robusta/robusta.py +239 -68
  140. holmes/plugins/toolsets/robusta/robusta_instructions.jinja2 +26 -9
  141. holmes/plugins/toolsets/runbook/runbook_fetcher.py +157 -27
  142. holmes/plugins/toolsets/service_discovery.py +1 -1
  143. holmes/plugins/toolsets/servicenow_tables/instructions.jinja2 +83 -0
  144. holmes/plugins/toolsets/servicenow_tables/servicenow_tables.py +426 -0
  145. holmes/plugins/toolsets/utils.py +88 -0
  146. holmes/utils/config_utils.py +91 -0
  147. holmes/utils/connection_utils.py +31 -0
  148. holmes/utils/console/result.py +10 -0
  149. holmes/utils/default_toolset_installation_guide.jinja2 +1 -22
  150. holmes/utils/env.py +7 -0
  151. holmes/utils/file_utils.py +2 -1
  152. holmes/utils/global_instructions.py +60 -11
  153. holmes/utils/holmes_status.py +6 -4
  154. holmes/utils/holmes_sync_toolsets.py +0 -2
  155. holmes/utils/krr_utils.py +188 -0
  156. holmes/utils/log.py +15 -0
  157. holmes/utils/markdown_utils.py +2 -3
  158. holmes/utils/memory_limit.py +58 -0
  159. holmes/utils/sentry_helper.py +64 -0
  160. holmes/utils/stream.py +69 -8
  161. holmes/utils/tags.py +4 -3
  162. holmes/version.py +37 -15
  163. holmesgpt-0.18.4.dist-info/LICENSE +178 -0
  164. {holmesgpt-0.13.2.dist-info → holmesgpt-0.18.4.dist-info}/METADATA +35 -31
  165. holmesgpt-0.18.4.dist-info/RECORD +258 -0
  166. holmes/core/performance_timing.py +0 -72
  167. holmes/plugins/toolsets/aws.yaml +0 -80
  168. holmes/plugins/toolsets/coralogix/toolset_coralogix_logs.py +0 -112
  169. holmes/plugins/toolsets/datadog/datadog_traces_formatter.py +0 -310
  170. holmes/plugins/toolsets/datadog/toolset_datadog_rds.py +0 -739
  171. holmes/plugins/toolsets/grafana/grafana_api.py +0 -42
  172. holmes/plugins/toolsets/grafana/tempo_api.py +0 -124
  173. holmes/plugins/toolsets/grafana/toolset_grafana_loki.py +0 -110
  174. holmes/plugins/toolsets/newrelic.py +0 -231
  175. holmes/plugins/toolsets/opensearch/opensearch.py +0 -257
  176. holmes/plugins/toolsets/opensearch/opensearch_logs.py +0 -161
  177. holmes/plugins/toolsets/opensearch/opensearch_traces.py +0 -218
  178. holmes/plugins/toolsets/opensearch/opensearch_traces_instructions.jinja2 +0 -12
  179. holmes/plugins/toolsets/opensearch/opensearch_utils.py +0 -166
  180. holmes/plugins/toolsets/servicenow/install.md +0 -37
  181. holmes/plugins/toolsets/servicenow/instructions.jinja2 +0 -3
  182. holmes/plugins/toolsets/servicenow/servicenow.py +0 -219
  183. holmes/utils/keygen_utils.py +0 -6
  184. holmesgpt-0.13.2.dist-info/LICENSE.txt +0 -21
  185. holmesgpt-0.13.2.dist-info/RECORD +0 -234
  186. /holmes/plugins/toolsets/{opensearch → newrelic}/__init__.py +0 -0
  187. {holmesgpt-0.13.2.dist-info → holmesgpt-0.18.4.dist-info}/WHEEL +0 -0
  188. {holmesgpt-0.13.2.dist-info → holmesgpt-0.18.4.dist-info}/entry_points.txt +0 -0
@@ -1,55 +1,136 @@
1
+ import json
1
2
  import os
2
- import re
3
- from typing import Any, Dict, List, cast
3
+ import time
4
+ import uuid
5
+ from typing import Any, Dict, List, Optional, Tuple, cast
6
+ from urllib.parse import quote
4
7
 
5
- import requests # type: ignore
6
- import yaml # type: ignore
7
- from pydantic import BaseModel
8
-
9
- from holmes.common.env_vars import load_bool
8
+ from holmes.common.env_vars import MAX_GRAPH_POINTS, load_bool
10
9
  from holmes.core.tools import (
11
10
  StructuredToolResult,
11
+ StructuredToolResultStatus,
12
12
  Tool,
13
+ ToolInvokeContext,
13
14
  ToolParameter,
14
- ToolResultStatus,
15
15
  )
16
+ from holmes.plugins.toolsets.consts import STANDARD_END_DATETIME_TOOL_PARAM_DESCRIPTION
16
17
  from holmes.plugins.toolsets.grafana.base_grafana_toolset import BaseGrafanaToolset
17
18
  from holmes.plugins.toolsets.grafana.common import (
18
- GrafanaConfig,
19
- build_headers,
20
- get_base_url,
21
- )
22
- from holmes.plugins.toolsets.grafana.tempo_api import (
23
- query_tempo_trace_by_id,
24
- query_tempo_traces,
19
+ GrafanaTempoConfig,
25
20
  )
26
- from holmes.plugins.toolsets.grafana.trace_parser import format_traces_list
21
+ from holmes.plugins.toolsets.grafana.grafana_tempo_api import GrafanaTempoAPI
27
22
  from holmes.plugins.toolsets.logging_utils.logging_api import (
28
- DEFAULT_TIME_SPAN_SECONDS,
23
+ DEFAULT_GRAPH_TIME_SPAN_SECONDS,
29
24
  )
30
25
  from holmes.plugins.toolsets.utils import (
31
- get_param_or_raise,
26
+ adjust_step_for_max_points,
27
+ duration_string_to_seconds,
32
28
  process_timestamps_to_int,
29
+ seconds_to_duration_string,
30
+ standard_start_datetime_tool_param_description,
33
31
  toolset_name_for_one_liner,
34
32
  )
35
33
 
36
34
  TEMPO_LABELS_ADD_PREFIX = load_bool("TEMPO_LABELS_ADD_PREFIX", True)
37
35
 
38
- ONE_HOUR_IN_SECONDS = 3600
39
- DEFAULT_TRACES_TIME_SPAN_SECONDS = DEFAULT_TIME_SPAN_SECONDS # 7 days
40
- DEFAULT_TAGS_TIME_SPAN_SECONDS = 8 * ONE_HOUR_IN_SECONDS # 8 hours
41
36
 
37
+ def _build_grafana_explore_tempo_url(
38
+ config: GrafanaTempoConfig,
39
+ query: Optional[str] = None,
40
+ start: Optional[int] = None,
41
+ end: Optional[int] = None,
42
+ limit: int = 20,
43
+ trace_id: Optional[str] = None,
44
+ filters: Optional[List[Dict[str, Any]]] = None,
45
+ tags: Optional[str] = None,
46
+ ) -> Optional[str]:
47
+ if not config.grafana_datasource_uid:
48
+ return None
49
+ try:
50
+ base_url = config.external_url or config.url
51
+ datasource_uid = config.grafana_datasource_uid
52
+ now_s = int(time.time())
53
+ start_ts = start if start else now_s - 3600
54
+ end_ts = end if end else now_s
55
+ start_delta = max(0, now_s - start_ts)
56
+ end_delta = max(0, now_s - end_ts)
57
+ from_str = f"now-{start_delta}s" if start_delta > 0 else "now-1h"
58
+ to_str = "now" if end_delta == 0 else f"now-{end_delta}s"
59
+ pane_id = "tmp"
60
+
61
+ if trace_id:
62
+ # Direct trace ID lookup - query is just the traceID string
63
+ query_obj = {
64
+ "refId": "A",
65
+ "datasource": {"type": "tempo", "uid": datasource_uid},
66
+ "queryType": "traceql",
67
+ "limit": limit,
68
+ "tableType": "traces",
69
+ "metricsQueryType": "range",
70
+ "query": trace_id,
71
+ }
72
+ elif tags:
73
+ # Build filters from tag name
74
+ scope = "resource" if tags.startswith("resource.") else "span"
75
+ filter_id = str(uuid.uuid4())[:8]
76
+ filters = [
77
+ {
78
+ "id": filter_id,
79
+ "operator": "=",
80
+ "scope": scope,
81
+ "tag": tags,
82
+ "value": [],
83
+ }
84
+ ]
85
+ query_obj = {
86
+ "refId": "A",
87
+ "datasource": {"type": "tempo", "uid": datasource_uid},
88
+ "queryType": "traceqlSearch",
89
+ "limit": limit,
90
+ "tableType": "traces",
91
+ "metricsQueryType": "range",
92
+ "query": "",
93
+ "filters": filters,
94
+ }
95
+ elif filters:
96
+ # Tag filters - use traceqlSearch with filters array
97
+ query_obj = {
98
+ "refId": "A",
99
+ "datasource": {"type": "tempo", "uid": datasource_uid},
100
+ "queryType": "traceqlSearch",
101
+ "limit": limit,
102
+ "tableType": "traces",
103
+ "metricsQueryType": "range",
104
+ "query": "",
105
+ "filters": filters,
106
+ }
107
+ else:
108
+ # Regular TraceQL query
109
+ safe_query = query if query else "{}"
110
+ query_obj = {
111
+ "refId": "A",
112
+ "datasource": {"type": "tempo", "uid": datasource_uid},
113
+ "queryType": "traceql",
114
+ "limit": limit,
115
+ "tableType": "traces",
116
+ "metricsQueryType": "range",
117
+ "query": safe_query,
118
+ }
42
119
 
43
- class GrafanaTempoLabelsConfig(BaseModel):
44
- pod: str = "k8s.pod.name"
45
- namespace: str = "k8s.namespace.name"
46
- deployment: str = "k8s.deployment.name"
47
- node: str = "k8s.node.name"
48
- service: str = "service.name"
49
-
120
+ panes = {
121
+ pane_id: {
122
+ "datasource": datasource_uid,
123
+ "queries": [query_obj],
124
+ "range": {"from": from_str, "to": to_str},
125
+ }
126
+ }
50
127
 
51
- class GrafanaTempoConfig(GrafanaConfig):
52
- labels: GrafanaTempoLabelsConfig = GrafanaTempoLabelsConfig()
128
+ panes_encoded = quote(
129
+ json.dumps(panes, separators=(",", ":"), ensure_ascii=False), safe=""
130
+ )
131
+ return f"{base_url}/explore?schemaVersion=1&panes={panes_encoded}&orgId=1"
132
+ except Exception:
133
+ return None
53
134
 
54
135
 
55
136
  class BaseGrafanaTempoToolset(BaseGrafanaToolset):
@@ -67,6 +148,18 @@ class BaseGrafanaTempoToolset(BaseGrafanaToolset):
67
148
  def grafana_config(self) -> GrafanaTempoConfig:
68
149
  return cast(GrafanaTempoConfig, self._grafana_config)
69
150
 
151
+ def health_check(self) -> Tuple[bool, str]:
152
+ """Test a dummy query to check if service available."""
153
+ try:
154
+ _ = GrafanaTempoAPI(self.grafana_config).search_traces_by_query(
155
+ q='{ .service.name = "test-endpoint" }',
156
+ limit=1,
157
+ )
158
+ except Exception as e:
159
+ return False, f"Unable to connect to Tempo.\n{str(e)}"
160
+
161
+ return True, ""
162
+
70
163
  def build_k8s_filters(
71
164
  self, params: Dict[str, Any], use_exact_match: bool
72
165
  ) -> List[str]:
@@ -107,228 +200,25 @@ class BaseGrafanaTempoToolset(BaseGrafanaToolset):
107
200
  escaped_value = value.replace('"', '\\"')
108
201
  filters.append(f'{prefix}{label}="{escaped_value}"')
109
202
  else:
110
- # Escape regex special characters for partial match
111
- escaped_value = re.escape(value)
112
- filters.append(f'{prefix}{label}=~".*{escaped_value}.*"')
203
+ # For partial match, use simple substring matching
204
+ # Don't escape anything - let Tempo handle the regex
205
+ filters.append(f'{prefix}{label}=~".*{value}.*"')
113
206
 
114
207
  return filters
115
208
 
116
-
117
- def validate_params(params: Dict[str, Any], expected_params: List[str]):
118
- for param in expected_params:
119
- if param in params and params[param] not in (None, "", [], {}):
120
- return None
121
-
122
- return f"At least one of the following argument is expected but none were set: {expected_params}"
123
-
124
-
125
- class GetTempoTraces(Tool):
126
- def __init__(self, toolset: BaseGrafanaTempoToolset):
127
- super().__init__(
128
- name="fetch_tempo_traces",
129
- description="""Lists Tempo traces. At least one of `service_name`, `pod_name` or `deployment_name` argument is required.""",
130
- parameters={
131
- "min_duration": ToolParameter(
132
- description="The minimum duration of traces to fetch, e.g., '5s' for 5 seconds.",
133
- type="string",
134
- required=True,
135
- ),
136
- "service_name": ToolParameter(
137
- description="Filter traces by service name",
138
- type="string",
139
- required=False,
140
- ),
141
- "pod_name": ToolParameter(
142
- description="Filter traces by pod name",
143
- type="string",
144
- required=False,
145
- ),
146
- "namespace_name": ToolParameter(
147
- description="Filter traces by namespace",
148
- type="string",
149
- required=False,
150
- ),
151
- "deployment_name": ToolParameter(
152
- description="Filter traces by deployment name",
153
- type="string",
154
- required=False,
155
- ),
156
- "node_name": ToolParameter(
157
- description="Filter traces by node",
158
- type="string",
159
- required=False,
160
- ),
161
- "start_datetime": ToolParameter(
162
- description=f"The beginning time boundary for the trace search period. String in RFC3339 format. If a negative integer, the number of seconds relative to the end_timestamp. Defaults to -{DEFAULT_TRACES_TIME_SPAN_SECONDS}",
163
- type="string",
164
- required=False,
165
- ),
166
- "end_datetime": ToolParameter(
167
- description="The ending time boundary for the trace search period. String in RFC3339 format. Defaults to NOW().",
168
- type="string",
169
- required=False,
170
- ),
171
- "limit": ToolParameter(
172
- description="Maximum number of traces to return. Defaults to 50",
173
- type="string",
174
- required=False,
175
- ),
176
- "sort": ToolParameter(
177
- description="One of 'descending', 'ascending' or 'none' for no sorting. Defaults to descending",
178
- type="string",
179
- required=False,
180
- ),
181
- },
182
- )
183
- self._toolset = toolset
184
-
185
- def _invoke(
186
- self, params: dict, user_approved: bool = False
187
- ) -> StructuredToolResult:
188
- api_key = self._toolset.grafana_config.api_key
189
- headers = self._toolset.grafana_config.headers
190
-
191
- invalid_params_error = validate_params(
192
- params, ["service_name", "pod_name", "deployment_name"]
209
+ @staticmethod
210
+ def adjust_start_end_time(params: Dict) -> Tuple[int, int]:
211
+ return process_timestamps_to_int(
212
+ start=params.get("start"),
213
+ end=params.get("end"),
214
+ default_time_span_seconds=DEFAULT_GRAPH_TIME_SPAN_SECONDS,
193
215
  )
194
- if invalid_params_error:
195
- return StructuredToolResult(
196
- status=ToolResultStatus.ERROR,
197
- error=invalid_params_error,
198
- params=params,
199
- )
200
-
201
- start, end = process_timestamps_to_int(
202
- params.get("start_datetime"),
203
- params.get("end_datetime"),
204
- default_time_span_seconds=DEFAULT_TRACES_TIME_SPAN_SECONDS,
205
- )
206
-
207
- filters = self._toolset.build_k8s_filters(params, use_exact_match=True)
208
-
209
- filters.append(f'duration>{get_param_or_raise(params, "min_duration")}')
210
-
211
- query = " && ".join(filters)
212
- query = f"{{{query}}}"
213
-
214
- base_url = get_base_url(self._toolset.grafana_config)
215
- traces = query_tempo_traces(
216
- base_url=base_url,
217
- api_key=api_key,
218
- headers=headers,
219
- query=query,
220
- start=start,
221
- end=end,
222
- limit=params.get("limit", 50),
223
- )
224
- return StructuredToolResult(
225
- status=ToolResultStatus.SUCCESS,
226
- data=format_traces_list(traces),
227
- params=params,
228
- invocation=query,
229
- )
230
-
231
- def get_parameterized_one_liner(self, params: Dict) -> str:
232
- return f"{toolset_name_for_one_liner(self._toolset.name)}: Fetched Tempo Traces (min_duration={params.get('min_duration')})"
233
-
234
-
235
- class GetTempoTags(Tool):
236
- def __init__(self, toolset: BaseGrafanaTempoToolset):
237
- super().__init__(
238
- name="fetch_tempo_tags",
239
- description="List the tags available in Tempo",
240
- parameters={
241
- "start_datetime": ToolParameter(
242
- description=f"The beginning time boundary for the search period. String in RFC3339 format. If a negative integer, the number of seconds relative to the end_timestamp. Defaults to -{DEFAULT_TAGS_TIME_SPAN_SECONDS}",
243
- type="string",
244
- required=False,
245
- ),
246
- "end_datetime": ToolParameter(
247
- description="The ending time boundary for the search period. String in RFC3339 format. Defaults to NOW().",
248
- type="string",
249
- required=False,
250
- ),
251
- },
252
- )
253
- self._toolset = toolset
254
-
255
- def _invoke(
256
- self, params: dict, user_approved: bool = False
257
- ) -> StructuredToolResult:
258
- api_key = self._toolset.grafana_config.api_key
259
- headers = self._toolset.grafana_config.headers
260
- start, end = process_timestamps_to_int(
261
- start=params.get("start_datetime"),
262
- end=params.get("end_datetime"),
263
- default_time_span_seconds=DEFAULT_TAGS_TIME_SPAN_SECONDS,
264
- )
265
-
266
- base_url = get_base_url(self._toolset.grafana_config)
267
- url = f"{base_url}/api/v2/search/tags?start={start}&end={end}"
268
-
269
- try:
270
- response = requests.get(
271
- url,
272
- headers=build_headers(api_key=api_key, additional_headers=headers),
273
- timeout=60,
274
- )
275
- response.raise_for_status() # Raise an error for non-2xx responses
276
- data = response.json()
277
- return StructuredToolResult(
278
- status=ToolResultStatus.SUCCESS,
279
- data=yaml.dump(data.get("scopes")),
280
- params=params,
281
- )
282
- except requests.exceptions.RequestException as e:
283
- raise Exception(f"Failed to retrieve tags: {e} \n for URL: {url}")
284
-
285
- def get_parameterized_one_liner(self, params: Dict) -> str:
286
- return f"{toolset_name_for_one_liner(self._toolset.name)}: Fetched Tempo tags"
287
-
288
-
289
- class GetTempoTraceById(Tool):
290
- def __init__(self, toolset: BaseGrafanaTempoToolset):
291
- super().__init__(
292
- name="fetch_tempo_trace_by_id",
293
- description="""Retrieves detailed information about a Tempo trace using its trace ID. Use this to investigate a trace.""",
294
- parameters={
295
- "trace_id": ToolParameter(
296
- description="The unique trace ID to fetch.",
297
- type="string",
298
- required=True,
299
- ),
300
- },
301
- )
302
- self._toolset = toolset
303
-
304
- def _invoke(
305
- self, params: dict, user_approved: bool = False
306
- ) -> StructuredToolResult:
307
- labels_mapping = self._toolset.grafana_config.labels
308
- labels = list(labels_mapping.model_dump().values())
309
-
310
- base_url = get_base_url(self._toolset.grafana_config)
311
- trace_data = query_tempo_trace_by_id(
312
- base_url=base_url,
313
- api_key=self._toolset.grafana_config.api_key,
314
- headers=self._toolset.grafana_config.headers,
315
- trace_id=get_param_or_raise(params, "trace_id"),
316
- key_labels=labels,
317
- )
318
- return StructuredToolResult(
319
- status=ToolResultStatus.SUCCESS,
320
- data=trace_data,
321
- params=params,
322
- )
323
-
324
- def get_parameterized_one_liner(self, params: Dict) -> str:
325
- return f"{toolset_name_for_one_liner(self._toolset.name)}: Fetched Tempo Trace (trace_id={params.get('trace_id')})"
326
216
 
327
217
 
328
218
  class FetchTracesSimpleComparison(Tool):
329
219
  def __init__(self, toolset: BaseGrafanaTempoToolset):
330
220
  super().__init__(
331
- name="fetch_tempo_traces_comparative_sample",
221
+ name="tempo_fetch_traces_comparative_sample",
332
222
  description="""Fetches statistics and representative samples of fast, slow, and typical traces for performance analysis. Requires either a `base_query` OR at least one of `service_name`, `pod_name`, `namespace_name`, `deployment_name`, `node_name`.
333
223
 
334
224
  Important: call this tool first when investigating performance issues via traces. This tool provides comprehensive analysis for identifying patterns.
@@ -364,7 +254,11 @@ Examples:
364
254
  required=False,
365
255
  ),
366
256
  "base_query": ToolParameter(
367
- description="Custom TraceQL filter",
257
+ description=(
258
+ "Custom TraceQL filter. Supports span/resource attributes, "
259
+ "duration, and aggregates (count(), avg(), min(), max(), sum()). "
260
+ "Examples: '{span.http.status_code>=400}', '{duration>100ms}'"
261
+ ),
368
262
  type="string",
369
263
  required=False,
370
264
  ),
@@ -373,13 +267,15 @@ Examples:
373
267
  type="integer",
374
268
  required=False,
375
269
  ),
376
- "start_datetime": ToolParameter(
377
- description=f"The beginning time boundary for the trace search period. String in RFC3339 format. If a negative integer, the number of seconds relative to the end_timestamp. Defaults to -{DEFAULT_TRACES_TIME_SPAN_SECONDS}",
270
+ "start": ToolParameter(
271
+ description=standard_start_datetime_tool_param_description(
272
+ DEFAULT_GRAPH_TIME_SPAN_SECONDS
273
+ ),
378
274
  type="string",
379
275
  required=False,
380
276
  ),
381
- "end_datetime": ToolParameter(
382
- description="The ending time boundary for the trace search period. String in RFC3339 format. Defaults to NOW().",
277
+ "end": ToolParameter(
278
+ description=STANDARD_END_DATETIME_TOOL_PARAM_DESCRIPTION,
383
279
  type="string",
384
280
  required=False,
385
281
  ),
@@ -387,9 +283,15 @@ Examples:
387
283
  )
388
284
  self._toolset = toolset
389
285
 
390
- def _invoke(
391
- self, params: dict, user_approved: bool = False
392
- ) -> StructuredToolResult:
286
+ @staticmethod
287
+ def validate_params(params: Dict[str, Any], expected_params: List[str]):
288
+ for param in expected_params:
289
+ if param in params and params[param] not in (None, "", [], {}):
290
+ return None
291
+
292
+ return f"At least one of the following argument is expected but none were set: {expected_params}"
293
+
294
+ def _invoke(self, params: dict, context: ToolInvokeContext) -> StructuredToolResult:
393
295
  try:
394
296
  # Build query
395
297
  if params.get("base_query"):
@@ -399,7 +301,7 @@ Examples:
399
301
  filters = self._toolset.build_k8s_filters(params, use_exact_match=False)
400
302
 
401
303
  # Validate that at least one parameter was provided
402
- invalid_params_error = validate_params(
304
+ invalid_params_error = FetchTracesSimpleComparison.validate_params(
403
305
  params,
404
306
  [
405
307
  "service_name",
@@ -411,7 +313,7 @@ Examples:
411
313
  )
412
314
  if invalid_params_error:
413
315
  return StructuredToolResult(
414
- status=ToolResultStatus.ERROR,
316
+ status=StructuredToolResultStatus.ERROR,
415
317
  error=invalid_params_error,
416
318
  params=params,
417
319
  )
@@ -420,30 +322,35 @@ Examples:
420
322
 
421
323
  sample_count = params.get("sample_count", 3)
422
324
 
423
- start, end = process_timestamps_to_int(
424
- params.get("start_datetime"),
425
- params.get("end_datetime"),
426
- default_time_span_seconds=DEFAULT_TRACES_TIME_SPAN_SECONDS,
427
- )
325
+ start, end = BaseGrafanaTempoToolset.adjust_start_end_time(params)
428
326
 
429
- base_url = get_base_url(self._toolset.grafana_config)
327
+ # Create API instance
328
+ api = GrafanaTempoAPI(self._toolset.grafana_config)
430
329
 
431
330
  # Step 1: Get all trace summaries
432
331
  stats_query = f"{{{base_query}}}"
433
- all_traces_response = query_tempo_traces(
434
- base_url=base_url,
435
- api_key=self._toolset.grafana_config.api_key,
436
- headers=self._toolset.grafana_config.headers,
437
- query=stats_query,
332
+
333
+ # Debug log the query (useful for troubleshooting)
334
+ import logging
335
+
336
+ logger = logging.getLogger(__name__)
337
+ logger.debug(f"Tempo query: {stats_query}")
338
+
339
+ logger.debug(f"start: {start}, end: {end}")
340
+
341
+ all_traces_response = api.search_traces_by_query(
342
+ q=stats_query,
438
343
  start=start,
439
344
  end=end,
440
345
  limit=1000,
441
346
  )
442
347
 
348
+ logger.debug(f"Response: {all_traces_response}")
349
+
443
350
  traces = all_traces_response.get("traces", [])
444
351
  if not traces:
445
352
  return StructuredToolResult(
446
- status=ToolResultStatus.SUCCESS,
353
+ status=StructuredToolResultStatus.SUCCESS,
447
354
  data="No traces found matching the query",
448
355
  params=params,
449
356
  )
@@ -488,44 +395,33 @@ Examples:
488
395
  return None
489
396
 
490
397
  try:
491
- url = f"{base_url}/api/traces/{trace_id}"
492
- response = requests.get(
493
- url,
494
- headers=build_headers(
495
- api_key=self._toolset.grafana_config.api_key,
496
- additional_headers=self._toolset.grafana_config.headers,
497
- ),
498
- timeout=5,
398
+ start_nano = trace_summary.get("startTimeUnixNano")
399
+ trace_start = (
400
+ int(int(start_nano) / 1_000_000_000) if start_nano else None
401
+ )
402
+
403
+ trace_data = api.query_trace_by_id_v2(
404
+ trace_id=trace_id, start=trace_start
499
405
  )
500
- response.raise_for_status()
501
406
  return {
502
407
  "traceID": trace_id,
503
408
  "durationMs": trace_summary.get("durationMs", 0),
504
409
  "rootServiceName": trace_summary.get(
505
410
  "rootServiceName", "unknown"
506
411
  ),
507
- "traceData": response.json(), # Raw trace data
412
+ "traceData": trace_data, # Raw trace data
508
413
  }
509
- except requests.exceptions.RequestException as e:
414
+ except Exception as e:
510
415
  error_msg = f"Failed to fetch full trace: {str(e)}"
511
- if hasattr(e, "response") and e.response is not None:
512
- error_msg += f" (Status: {e.response.status_code})"
513
416
  return {
514
417
  "traceID": trace_id,
515
418
  "durationMs": trace_summary.get("durationMs", 0),
516
419
  "error": error_msg,
517
420
  }
518
- except (ValueError, KeyError) as e:
519
- return {
520
- "traceID": trace_id,
521
- "durationMs": trace_summary.get("durationMs", 0),
522
- "error": f"Failed to parse trace data: {str(e)}",
523
- }
524
421
 
525
422
  # Fetch the selected traces
526
423
  result = {
527
424
  "statistics": stats,
528
- "all_trace_durations_ms": durations, # All durations for distribution analysis
529
425
  "fastest_traces": [
530
426
  fetch_full_trace(sorted_traces[i]) for i in fastest_indices
531
427
  ],
@@ -535,16 +431,23 @@ Examples:
535
431
  ],
536
432
  }
537
433
 
538
- # Return as YAML for readability
434
+ explore_url = _build_grafana_explore_tempo_url(
435
+ self._toolset.grafana_config,
436
+ query=f"{{{base_query}}}",
437
+ start=start,
438
+ end=end,
439
+ )
440
+
539
441
  return StructuredToolResult(
540
- status=ToolResultStatus.SUCCESS,
541
- data=yaml.dump(result, default_flow_style=False, sort_keys=False),
442
+ status=StructuredToolResultStatus.SUCCESS,
443
+ data=result,
542
444
  params=params,
445
+ url=explore_url,
543
446
  )
544
447
 
545
448
  except Exception as e:
546
449
  return StructuredToolResult(
547
- status=ToolResultStatus.ERROR,
450
+ status=StructuredToolResultStatus.ERROR,
548
451
  error=f"Error fetching traces: {str(e)}",
549
452
  params=params,
550
453
  )
@@ -553,6 +456,627 @@ Examples:
553
456
  return f"{toolset_name_for_one_liner(self._toolset.name)}: Simple Tempo Traces Comparison"
554
457
 
555
458
 
459
+ class SearchTracesByQuery(Tool):
460
+ def __init__(self, toolset: BaseGrafanaTempoToolset):
461
+ super().__init__(
462
+ name="tempo_search_traces_by_query",
463
+ description=(
464
+ "Search for traces using TraceQL query language. "
465
+ "Uses the Tempo API endpoint: GET /api/search with 'q' parameter.\n\n"
466
+ "TraceQL can select traces based on:\n"
467
+ "- Span and resource attributes\n"
468
+ "- Timing and duration\n"
469
+ "- Aggregate functions:\n"
470
+ " • count() - Count number of spans\n"
471
+ " • avg(attribute) - Calculate average\n"
472
+ " • min(attribute) - Find minimum value\n"
473
+ " • max(attribute) - Find maximum value\n"
474
+ " • sum(attribute) - Sum values\n\n"
475
+ "Examples:\n"
476
+ '- Specific operation: {resource.service.name = "frontend" && name = "POST /api/orders"}\n'
477
+ '- Error traces: {resource.service.name="frontend" && name = "POST /api/orders" && status = error}\n'
478
+ '- HTTP errors: {resource.service.name="frontend" && name = "POST /api/orders" && span.http.status_code >= 500}\n'
479
+ '- Multi-service: {span.service.name="frontend" && name = "GET /api/products/{id}"} && {span.db.system="postgresql"}\n'
480
+ "- With aggregates: { status = error } | by(resource.service.name) | count() > 1"
481
+ ),
482
+ parameters={
483
+ "q": ToolParameter(
484
+ description=(
485
+ "TraceQL query. Supports filtering by span/resource attributes, "
486
+ "duration, and aggregate functions (count(), avg(), min(), max(), sum()). "
487
+ "Examples: '{resource.service.name = \"frontend\"}', "
488
+ '\'{resource.service.name="frontend" && name = "POST /api/orders" && status = error}\', '
489
+ '\'{resource.service.name="frontend" && name = "POST /api/orders" && span.http.status_code >= 500}\', '
490
+ "'{} | count() > 10'"
491
+ ),
492
+ type="string",
493
+ required=True,
494
+ ),
495
+ "limit": ToolParameter(
496
+ description="Maximum number of traces to return",
497
+ type="integer",
498
+ required=False,
499
+ ),
500
+ "start": ToolParameter(
501
+ description=standard_start_datetime_tool_param_description(
502
+ DEFAULT_GRAPH_TIME_SPAN_SECONDS
503
+ ),
504
+ type="string",
505
+ required=False,
506
+ ),
507
+ "end": ToolParameter(
508
+ description=STANDARD_END_DATETIME_TOOL_PARAM_DESCRIPTION,
509
+ type="string",
510
+ required=False,
511
+ ),
512
+ "spss": ToolParameter(
513
+ description="Spans per span set",
514
+ type="integer",
515
+ required=False,
516
+ ),
517
+ },
518
+ )
519
+ self._toolset = toolset
520
+
521
+ def _invoke(self, params: Dict, context: ToolInvokeContext) -> StructuredToolResult:
522
+ api = GrafanaTempoAPI(self._toolset.grafana_config)
523
+
524
+ start, end = BaseGrafanaTempoToolset.adjust_start_end_time(params)
525
+
526
+ try:
527
+ result = api.search_traces_by_query(
528
+ q=params["q"],
529
+ limit=params.get("limit"),
530
+ start=start,
531
+ end=end,
532
+ spss=params.get("spss"),
533
+ )
534
+
535
+ explore_url = _build_grafana_explore_tempo_url(
536
+ self._toolset.grafana_config,
537
+ query=params["q"],
538
+ start=start,
539
+ end=end,
540
+ limit=params.get("limit") or 20,
541
+ )
542
+
543
+ return StructuredToolResult(
544
+ status=StructuredToolResultStatus.SUCCESS,
545
+ data=result,
546
+ params=params,
547
+ url=explore_url,
548
+ )
549
+ except Exception as e:
550
+ return StructuredToolResult(
551
+ status=StructuredToolResultStatus.ERROR,
552
+ error=str(e),
553
+ params=params,
554
+ )
555
+
556
+ def get_parameterized_one_liner(self, params: Dict) -> str:
557
+ return f"{toolset_name_for_one_liner(self._toolset.name)}: Searched traces with TraceQL"
558
+
559
+
560
+ class SearchTracesByTags(Tool):
561
+ def __init__(self, toolset: BaseGrafanaTempoToolset):
562
+ super().__init__(
563
+ name="tempo_search_traces_by_tags",
564
+ description=(
565
+ "Search for traces using logfmt-encoded tags. "
566
+ "Uses the Tempo API endpoint: GET /api/search with 'tags' parameter. "
567
+ 'Example: service.name="api" http.status_code="500"'
568
+ ),
569
+ parameters={
570
+ "tags": ToolParameter(
571
+ description='Logfmt-encoded span/process attributes (e.g., \'service.name="api" http.status_code="500"\')',
572
+ type="string",
573
+ required=True,
574
+ ),
575
+ "min_duration": ToolParameter(
576
+ description="Minimum trace duration (e.g., '5s', '100ms')",
577
+ type="string",
578
+ required=False,
579
+ ),
580
+ "max_duration": ToolParameter(
581
+ description="Maximum trace duration (e.g., '10s', '1000ms')",
582
+ type="string",
583
+ required=False,
584
+ ),
585
+ "limit": ToolParameter(
586
+ description="Maximum number of traces to return",
587
+ type="integer",
588
+ required=False,
589
+ ),
590
+ "start": ToolParameter(
591
+ description=standard_start_datetime_tool_param_description(
592
+ DEFAULT_GRAPH_TIME_SPAN_SECONDS
593
+ ),
594
+ type="string",
595
+ required=False,
596
+ ),
597
+ "end": ToolParameter(
598
+ description=STANDARD_END_DATETIME_TOOL_PARAM_DESCRIPTION,
599
+ type="string",
600
+ required=False,
601
+ ),
602
+ "spss": ToolParameter(
603
+ description="Spans per span set",
604
+ type="integer",
605
+ required=False,
606
+ ),
607
+ },
608
+ )
609
+ self._toolset = toolset
610
+
611
+ def _invoke(self, params: Dict, context: ToolInvokeContext) -> StructuredToolResult:
612
+ api = GrafanaTempoAPI(self._toolset.grafana_config)
613
+
614
+ start, end = BaseGrafanaTempoToolset.adjust_start_end_time(params)
615
+
616
+ try:
617
+ result = api.search_traces_by_tags(
618
+ tags=params["tags"],
619
+ min_duration=params.get("min_duration"),
620
+ max_duration=params.get("max_duration"),
621
+ limit=params.get("limit"),
622
+ start=start,
623
+ end=end,
624
+ spss=params.get("spss"),
625
+ )
626
+
627
+ tag_filters = params["tags"].replace(" ", " && ")
628
+ explore_url = _build_grafana_explore_tempo_url(
629
+ self._toolset.grafana_config,
630
+ query=f"{{{tag_filters}}}",
631
+ start=start,
632
+ end=end,
633
+ limit=params.get("limit") or 20,
634
+ )
635
+
636
+ return StructuredToolResult(
637
+ status=StructuredToolResultStatus.SUCCESS,
638
+ data=result,
639
+ params=params,
640
+ url=explore_url,
641
+ )
642
+ except Exception as e:
643
+ return StructuredToolResult(
644
+ status=StructuredToolResultStatus.ERROR,
645
+ error=str(e),
646
+ params=params,
647
+ )
648
+
649
+ def get_parameterized_one_liner(self, params: Dict) -> str:
650
+ return f"{toolset_name_for_one_liner(self._toolset.name)}: Searched traces with tags"
651
+
652
+
653
+ class QueryTraceById(Tool):
654
+ def __init__(self, toolset: BaseGrafanaTempoToolset):
655
+ super().__init__(
656
+ name="tempo_query_trace_by_id",
657
+ description=(
658
+ "Retrieve detailed trace information by trace ID. "
659
+ "Uses the Tempo API endpoint: GET /api/v2/traces/{trace_id}. "
660
+ "Returns the full trace data in OpenTelemetry format."
661
+ ),
662
+ parameters={
663
+ "trace_id": ToolParameter(
664
+ description="The unique trace ID to fetch",
665
+ type="string",
666
+ required=True,
667
+ ),
668
+ "start": ToolParameter(
669
+ description=standard_start_datetime_tool_param_description(
670
+ DEFAULT_GRAPH_TIME_SPAN_SECONDS
671
+ ),
672
+ type="string",
673
+ required=False,
674
+ ),
675
+ "end": ToolParameter(
676
+ description=STANDARD_END_DATETIME_TOOL_PARAM_DESCRIPTION,
677
+ type="string",
678
+ required=False,
679
+ ),
680
+ },
681
+ )
682
+ self._toolset = toolset
683
+
684
+ def _invoke(self, params: Dict, context: ToolInvokeContext) -> StructuredToolResult:
685
+ api = GrafanaTempoAPI(self._toolset.grafana_config)
686
+
687
+ start, end = BaseGrafanaTempoToolset.adjust_start_end_time(params)
688
+
689
+ try:
690
+ trace_data = api.query_trace_by_id_v2(
691
+ trace_id=params["trace_id"],
692
+ start=start,
693
+ end=end,
694
+ )
695
+
696
+ explore_url = _build_grafana_explore_tempo_url(
697
+ self._toolset.grafana_config,
698
+ trace_id=params["trace_id"],
699
+ start=start,
700
+ end=end,
701
+ )
702
+
703
+ return StructuredToolResult(
704
+ status=StructuredToolResultStatus.SUCCESS,
705
+ data=trace_data,
706
+ params=params,
707
+ url=explore_url,
708
+ )
709
+ except Exception as e:
710
+ return StructuredToolResult(
711
+ status=StructuredToolResultStatus.ERROR,
712
+ error=str(e),
713
+ params=params,
714
+ )
715
+
716
+ def get_parameterized_one_liner(self, params: Dict) -> str:
717
+ return f"{toolset_name_for_one_liner(self._toolset.name)}: Retrieved trace {params.get('trace_id')}"
718
+
719
+
720
+ class SearchTagNames(Tool):
721
+ def __init__(self, toolset: BaseGrafanaTempoToolset):
722
+ super().__init__(
723
+ name="tempo_search_tag_names",
724
+ description=(
725
+ "Discover available tag names across traces. "
726
+ "Uses the Tempo API endpoint: GET /api/v2/search/tags. "
727
+ "Returns tags organized by scope (resource, span, intrinsic)."
728
+ ),
729
+ parameters={
730
+ "scope": ToolParameter(
731
+ description="Filter by scope: 'resource', 'span', or 'intrinsic'",
732
+ type="string",
733
+ required=False,
734
+ ),
735
+ "q": ToolParameter(
736
+ description="TraceQL query to filter tags (e.g., '{resource.cluster=\"us-east-1\"}')",
737
+ type="string",
738
+ required=False,
739
+ ),
740
+ "start": ToolParameter(
741
+ description=standard_start_datetime_tool_param_description(
742
+ DEFAULT_GRAPH_TIME_SPAN_SECONDS
743
+ ),
744
+ type="string",
745
+ required=False,
746
+ ),
747
+ "end": ToolParameter(
748
+ description=STANDARD_END_DATETIME_TOOL_PARAM_DESCRIPTION,
749
+ type="string",
750
+ required=False,
751
+ ),
752
+ "limit": ToolParameter(
753
+ description="Maximum number of tag names to return",
754
+ type="integer",
755
+ required=False,
756
+ ),
757
+ "max_stale_values": ToolParameter(
758
+ description="Maximum stale values parameter",
759
+ type="integer",
760
+ required=False,
761
+ ),
762
+ },
763
+ )
764
+ self._toolset = toolset
765
+
766
+ def _invoke(self, params: Dict, context: ToolInvokeContext) -> StructuredToolResult:
767
+ api = GrafanaTempoAPI(self._toolset.grafana_config)
768
+
769
+ start, end = BaseGrafanaTempoToolset.adjust_start_end_time(params)
770
+
771
+ try:
772
+ result = api.search_tag_names_v2(
773
+ scope=params.get("scope"),
774
+ q=params.get("q"),
775
+ start=start,
776
+ end=end,
777
+ limit=params.get("limit"),
778
+ max_stale_values=params.get("max_stale_values"),
779
+ )
780
+
781
+ query_filter = params.get("q") or "{}"
782
+ explore_url = _build_grafana_explore_tempo_url(
783
+ self._toolset.grafana_config,
784
+ query=query_filter,
785
+ start=start,
786
+ end=end,
787
+ )
788
+
789
+ return StructuredToolResult(
790
+ status=StructuredToolResultStatus.SUCCESS,
791
+ data=result,
792
+ params=params,
793
+ url=explore_url,
794
+ )
795
+ except Exception as e:
796
+ return StructuredToolResult(
797
+ status=StructuredToolResultStatus.ERROR,
798
+ error=str(e),
799
+ params=params,
800
+ )
801
+
802
+ def get_parameterized_one_liner(self, params: Dict) -> str:
803
+ return f"{toolset_name_for_one_liner(self._toolset.name)}: Discovered tag names"
804
+
805
+
806
+ class SearchTagValues(Tool):
807
+ def __init__(self, toolset: BaseGrafanaTempoToolset):
808
+ super().__init__(
809
+ name="tempo_search_tag_values",
810
+ description=(
811
+ "Get all values for a specific tag. "
812
+ "Uses the Tempo API endpoint: GET /api/v2/search/tag/{tag}/values. "
813
+ "Useful for discovering what values exist for a given tag."
814
+ ),
815
+ parameters={
816
+ "tag": ToolParameter(
817
+ description="The tag name to get values for (e.g., 'resource.service.name', 'http.status_code')",
818
+ type="string",
819
+ required=True,
820
+ ),
821
+ "q": ToolParameter(
822
+ description="TraceQL query to filter tag values (e.g., '{resource.cluster=\"us-east-1\"}')",
823
+ type="string",
824
+ required=False,
825
+ ),
826
+ "start": ToolParameter(
827
+ description=standard_start_datetime_tool_param_description(
828
+ DEFAULT_GRAPH_TIME_SPAN_SECONDS
829
+ ),
830
+ type="string",
831
+ required=False,
832
+ ),
833
+ "end": ToolParameter(
834
+ description=STANDARD_END_DATETIME_TOOL_PARAM_DESCRIPTION,
835
+ type="string",
836
+ required=False,
837
+ ),
838
+ "limit": ToolParameter(
839
+ description="Maximum number of values to return",
840
+ type="integer",
841
+ required=False,
842
+ ),
843
+ "max_stale_values": ToolParameter(
844
+ description="Maximum stale values parameter",
845
+ type="integer",
846
+ required=False,
847
+ ),
848
+ },
849
+ )
850
+ self._toolset = toolset
851
+
852
+ def _invoke(self, params: Dict, context: ToolInvokeContext) -> StructuredToolResult:
853
+ api = GrafanaTempoAPI(self._toolset.grafana_config)
854
+
855
+ start, end = BaseGrafanaTempoToolset.adjust_start_end_time(params)
856
+
857
+ try:
858
+ result = api.search_tag_values_v2(
859
+ tag=params["tag"],
860
+ q=params.get("q"),
861
+ start=start,
862
+ end=end,
863
+ limit=params.get("limit"),
864
+ max_stale_values=params.get("max_stale_values"),
865
+ )
866
+
867
+ explore_url = _build_grafana_explore_tempo_url(
868
+ self._toolset.grafana_config,
869
+ start=start,
870
+ end=end,
871
+ tags=params["tag"],
872
+ )
873
+
874
+ return StructuredToolResult(
875
+ status=StructuredToolResultStatus.SUCCESS,
876
+ data=result,
877
+ params=params,
878
+ url=explore_url,
879
+ )
880
+ except Exception as e:
881
+ return StructuredToolResult(
882
+ status=StructuredToolResultStatus.ERROR,
883
+ error=str(e),
884
+ params=params,
885
+ )
886
+
887
+ def get_parameterized_one_liner(self, params: Dict) -> str:
888
+ return f"{toolset_name_for_one_liner(self._toolset.name)}: Retrieved values for tag '{params.get('tag')}'"
889
+
890
+
891
+ class QueryMetricsInstant(Tool):
892
+ def __init__(self, toolset: BaseGrafanaTempoToolset):
893
+ super().__init__(
894
+ name="tempo_query_metrics_instant",
895
+ description=(
896
+ "Compute a single TraceQL metric value across time range. "
897
+ "Uses the Tempo API endpoint: GET /api/metrics/query. "
898
+ "TraceQL metrics compute aggregated metrics from trace data. "
899
+ "Returns a single value for the entire time range. "
900
+ "Basic syntax: {selector} | function(attribute) [by (grouping)]\n\n"
901
+ "TraceQL metrics can help answer questions like:\n"
902
+ "- How many database calls across all systems are downstream of your application?\n"
903
+ "- What services beneath a given endpoint are failing?\n"
904
+ "- What services beneath an endpoint are slow?\n\n"
905
+ "TraceQL metrics help you answer these questions by parsing your traces in aggregate. "
906
+ "The instant version returns a single value for the query and is preferred over "
907
+ "query_metrics_range when you don't need the granularity of a full time-series but want "
908
+ "a total sum or single value computed across the whole time range."
909
+ ),
910
+ parameters={
911
+ "q": ToolParameter(
912
+ description=(
913
+ "TraceQL metrics query. Supported functions: rate, count_over_time, "
914
+ "sum_over_time, max_over_time, min_over_time, avg_over_time, "
915
+ "quantile_over_time, histogram_over_time, compare. "
916
+ "Can use topk or bottomk modifiers. "
917
+ "Syntax: {selector} | function(attribute) [by (grouping)]. "
918
+ 'Example: {resource.service.name="api"} | avg_over_time(duration)'
919
+ ),
920
+ type="string",
921
+ required=True,
922
+ ),
923
+ "start": ToolParameter(
924
+ description=standard_start_datetime_tool_param_description(
925
+ DEFAULT_GRAPH_TIME_SPAN_SECONDS
926
+ ),
927
+ type="string",
928
+ required=False,
929
+ ),
930
+ "end": ToolParameter(
931
+ description=STANDARD_END_DATETIME_TOOL_PARAM_DESCRIPTION,
932
+ type="string",
933
+ required=False,
934
+ ),
935
+ },
936
+ )
937
+ self._toolset = toolset
938
+
939
+ def _invoke(self, params: Dict, context: ToolInvokeContext) -> StructuredToolResult:
940
+ api = GrafanaTempoAPI(self._toolset.grafana_config)
941
+
942
+ start, end = BaseGrafanaTempoToolset.adjust_start_end_time(params)
943
+
944
+ try:
945
+ result = api.query_metrics_instant(
946
+ q=params["q"],
947
+ start=start,
948
+ end=end,
949
+ )
950
+
951
+ explore_url = _build_grafana_explore_tempo_url(
952
+ self._toolset.grafana_config,
953
+ query=params["q"],
954
+ start=start,
955
+ end=end,
956
+ )
957
+
958
+ return StructuredToolResult(
959
+ status=StructuredToolResultStatus.SUCCESS,
960
+ data=result,
961
+ params=params,
962
+ url=explore_url,
963
+ )
964
+ except Exception as e:
965
+ return StructuredToolResult(
966
+ status=StructuredToolResultStatus.ERROR,
967
+ error=str(e),
968
+ params=params,
969
+ )
970
+
971
+ def get_parameterized_one_liner(self, params: Dict) -> str:
972
+ return (
973
+ f"{toolset_name_for_one_liner(self._toolset.name)}: Computed TraceQL metric"
974
+ )
975
+
976
+
977
+ class QueryMetricsRange(Tool):
978
+ def __init__(self, toolset: BaseGrafanaTempoToolset):
979
+ super().__init__(
980
+ name="tempo_query_metrics_range",
981
+ description=(
982
+ "Get time series data from TraceQL metrics queries. "
983
+ "Uses the Tempo API endpoint: GET /api/metrics/query_range. "
984
+ "Returns metrics computed at regular intervals (controlled by 'step' parameter). "
985
+ "Use this for graphing metrics over time or analyzing trends. "
986
+ "Basic syntax: {selector} | function(attribute) [by (grouping)]\n\n"
987
+ "TraceQL metrics can help answer questions like:\n"
988
+ "- How many database calls across all systems are downstream of your application?\n"
989
+ "- What services beneath a given endpoint are failing?\n"
990
+ "- What services beneath an endpoint are slow?\n\n"
991
+ "TraceQL metrics help you answer these questions by parsing your traces in aggregate."
992
+ ),
993
+ parameters={
994
+ "q": ToolParameter(
995
+ description=(
996
+ "TraceQL metrics query. Supported functions: rate, count_over_time, "
997
+ "sum_over_time, max_over_time, min_over_time, avg_over_time, "
998
+ "quantile_over_time, histogram_over_time, compare. "
999
+ "Can use topk or bottomk modifiers. "
1000
+ "Syntax: {selector} | function(attribute) [by (grouping)]. "
1001
+ 'Example: {resource.service.name="api"} | avg_over_time(duration)'
1002
+ ),
1003
+ type="string",
1004
+ required=True,
1005
+ ),
1006
+ "step": ToolParameter(
1007
+ description="Time series granularity (e.g., '1m', '5m', '1h')",
1008
+ type="string",
1009
+ required=False,
1010
+ ),
1011
+ "start": ToolParameter(
1012
+ description=standard_start_datetime_tool_param_description(
1013
+ DEFAULT_GRAPH_TIME_SPAN_SECONDS
1014
+ ),
1015
+ type="string",
1016
+ required=False,
1017
+ ),
1018
+ "end": ToolParameter(
1019
+ description=STANDARD_END_DATETIME_TOOL_PARAM_DESCRIPTION,
1020
+ type="string",
1021
+ required=False,
1022
+ ),
1023
+ "exemplars": ToolParameter(
1024
+ description="Maximum number of exemplars to return",
1025
+ type="integer",
1026
+ required=False,
1027
+ ),
1028
+ },
1029
+ )
1030
+ self._toolset = toolset
1031
+
1032
+ def _invoke(self, params: Dict, context: ToolInvokeContext) -> StructuredToolResult:
1033
+ api = GrafanaTempoAPI(self._toolset.grafana_config)
1034
+
1035
+ start, end = BaseGrafanaTempoToolset.adjust_start_end_time(params)
1036
+
1037
+ # Calculate appropriate step
1038
+ step_param = params.get("step")
1039
+ step_seconds = duration_string_to_seconds(step_param) if step_param else None
1040
+ adjusted_step = adjust_step_for_max_points(
1041
+ end - start,
1042
+ int(MAX_GRAPH_POINTS),
1043
+ step_seconds,
1044
+ )
1045
+ step = seconds_to_duration_string(adjusted_step)
1046
+
1047
+ try:
1048
+ result = api.query_metrics_range(
1049
+ q=params["q"],
1050
+ step=step,
1051
+ start=start,
1052
+ end=end,
1053
+ exemplars=params.get("exemplars"),
1054
+ )
1055
+
1056
+ explore_url = _build_grafana_explore_tempo_url(
1057
+ self._toolset.grafana_config,
1058
+ query=params["q"],
1059
+ start=start,
1060
+ end=end,
1061
+ )
1062
+
1063
+ return StructuredToolResult(
1064
+ status=StructuredToolResultStatus.SUCCESS,
1065
+ data=result,
1066
+ params=params,
1067
+ url=explore_url,
1068
+ )
1069
+ except Exception as e:
1070
+ return StructuredToolResult(
1071
+ status=StructuredToolResultStatus.ERROR,
1072
+ error=str(e),
1073
+ params=params,
1074
+ )
1075
+
1076
+ def get_parameterized_one_liner(self, params: Dict) -> str:
1077
+ return f"{toolset_name_for_one_liner(self._toolset.name)}: Retrieved TraceQL metrics time series"
1078
+
1079
+
556
1080
  class GrafanaTempoToolset(BaseGrafanaTempoToolset):
557
1081
  def __init__(self):
558
1082
  super().__init__(
@@ -562,9 +1086,13 @@ class GrafanaTempoToolset(BaseGrafanaTempoToolset):
562
1086
  docs_url="https://holmesgpt.dev/data-sources/builtin-toolsets/grafanatempo/",
563
1087
  tools=[
564
1088
  FetchTracesSimpleComparison(self),
565
- GetTempoTraces(self),
566
- GetTempoTraceById(self),
567
- GetTempoTags(self),
1089
+ SearchTracesByQuery(self),
1090
+ SearchTracesByTags(self),
1091
+ QueryTraceById(self),
1092
+ SearchTagNames(self),
1093
+ SearchTagValues(self),
1094
+ QueryMetricsInstant(self),
1095
+ QueryMetricsRange(self),
568
1096
  ],
569
1097
  )
570
1098
  template_file_path = os.path.abspath(