holmesgpt 0.14.0a0__py3-none-any.whl → 0.14.1a0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of holmesgpt might be problematic. Click here for more details.

Files changed (78) hide show
  1. holmes/__init__.py +1 -1
  2. holmes/clients/robusta_client.py +10 -2
  3. holmes/common/env_vars.py +8 -1
  4. holmes/config.py +66 -139
  5. holmes/core/investigation.py +1 -2
  6. holmes/core/llm.py +256 -51
  7. holmes/core/models.py +2 -0
  8. holmes/core/safeguards.py +4 -4
  9. holmes/core/supabase_dal.py +14 -8
  10. holmes/core/tool_calling_llm.py +101 -101
  11. holmes/core/tools.py +260 -25
  12. holmes/core/tools_utils/data_types.py +81 -0
  13. holmes/core/tools_utils/tool_context_window_limiter.py +33 -0
  14. holmes/core/tools_utils/tool_executor.py +2 -2
  15. holmes/core/toolset_manager.py +150 -3
  16. holmes/core/transformers/__init__.py +23 -0
  17. holmes/core/transformers/base.py +62 -0
  18. holmes/core/transformers/llm_summarize.py +174 -0
  19. holmes/core/transformers/registry.py +122 -0
  20. holmes/core/transformers/transformer.py +31 -0
  21. holmes/main.py +5 -0
  22. holmes/plugins/toolsets/aks-node-health.yaml +46 -0
  23. holmes/plugins/toolsets/aks.yaml +64 -0
  24. holmes/plugins/toolsets/atlas_mongodb/mongodb_atlas.py +17 -15
  25. holmes/plugins/toolsets/azure_sql/tools/analyze_connection_failures.py +8 -4
  26. holmes/plugins/toolsets/azure_sql/tools/analyze_database_connections.py +7 -3
  27. holmes/plugins/toolsets/azure_sql/tools/analyze_database_health_status.py +3 -3
  28. holmes/plugins/toolsets/azure_sql/tools/analyze_database_performance.py +3 -3
  29. holmes/plugins/toolsets/azure_sql/tools/analyze_database_storage.py +7 -3
  30. holmes/plugins/toolsets/azure_sql/tools/get_active_alerts.py +4 -4
  31. holmes/plugins/toolsets/azure_sql/tools/get_slow_queries.py +7 -3
  32. holmes/plugins/toolsets/azure_sql/tools/get_top_cpu_queries.py +7 -3
  33. holmes/plugins/toolsets/azure_sql/tools/get_top_data_io_queries.py +7 -3
  34. holmes/plugins/toolsets/azure_sql/tools/get_top_log_io_queries.py +7 -3
  35. holmes/plugins/toolsets/bash/bash_toolset.py +6 -6
  36. holmes/plugins/toolsets/bash/common/bash.py +7 -7
  37. holmes/plugins/toolsets/coralogix/toolset_coralogix_logs.py +5 -3
  38. holmes/plugins/toolsets/datadog/toolset_datadog_general.py +15 -15
  39. holmes/plugins/toolsets/datadog/toolset_datadog_logs.py +8 -8
  40. holmes/plugins/toolsets/datadog/toolset_datadog_metrics.py +20 -20
  41. holmes/plugins/toolsets/datadog/toolset_datadog_rds.py +8 -8
  42. holmes/plugins/toolsets/datadog/toolset_datadog_traces.py +17 -17
  43. holmes/plugins/toolsets/git.py +21 -21
  44. holmes/plugins/toolsets/grafana/common.py +2 -2
  45. holmes/plugins/toolsets/grafana/toolset_grafana.py +4 -4
  46. holmes/plugins/toolsets/grafana/toolset_grafana_loki.py +3 -3
  47. holmes/plugins/toolsets/grafana/toolset_grafana_tempo.jinja2 +123 -23
  48. holmes/plugins/toolsets/grafana/toolset_grafana_tempo.py +165 -307
  49. holmes/plugins/toolsets/internet/internet.py +3 -3
  50. holmes/plugins/toolsets/internet/notion.py +3 -3
  51. holmes/plugins/toolsets/investigator/core_investigation.py +3 -3
  52. holmes/plugins/toolsets/kafka.py +18 -18
  53. holmes/plugins/toolsets/kubernetes.yaml +58 -0
  54. holmes/plugins/toolsets/kubernetes_logs.py +6 -6
  55. holmes/plugins/toolsets/kubernetes_logs.yaml +32 -0
  56. holmes/plugins/toolsets/mcp/toolset_mcp.py +4 -4
  57. holmes/plugins/toolsets/newrelic.py +5 -5
  58. holmes/plugins/toolsets/opensearch/opensearch.py +5 -5
  59. holmes/plugins/toolsets/opensearch/opensearch_logs.py +7 -7
  60. holmes/plugins/toolsets/opensearch/opensearch_traces.py +10 -10
  61. holmes/plugins/toolsets/prometheus/prometheus.py +172 -39
  62. holmes/plugins/toolsets/prometheus/prometheus_instructions.jinja2 +25 -0
  63. holmes/plugins/toolsets/prometheus/utils.py +28 -0
  64. holmes/plugins/toolsets/rabbitmq/toolset_rabbitmq.py +6 -4
  65. holmes/plugins/toolsets/robusta/robusta.py +10 -10
  66. holmes/plugins/toolsets/runbook/runbook_fetcher.py +4 -4
  67. holmes/plugins/toolsets/servicenow/servicenow.py +6 -6
  68. holmes/plugins/toolsets/utils.py +88 -0
  69. holmes/utils/config_utils.py +91 -0
  70. holmes/utils/env.py +7 -0
  71. holmes/utils/holmes_status.py +2 -1
  72. holmes/utils/sentry_helper.py +41 -0
  73. holmes/utils/stream.py +9 -0
  74. {holmesgpt-0.14.0a0.dist-info → holmesgpt-0.14.1a0.dist-info}/METADATA +9 -13
  75. {holmesgpt-0.14.0a0.dist-info → holmesgpt-0.14.1a0.dist-info}/RECORD +78 -68
  76. {holmesgpt-0.14.0a0.dist-info → holmesgpt-0.14.1a0.dist-info}/LICENSE.txt +0 -0
  77. {holmesgpt-0.14.0a0.dist-info → holmesgpt-0.14.1a0.dist-info}/WHEEL +0 -0
  78. {holmesgpt-0.14.0a0.dist-info → holmesgpt-0.14.1a0.dist-info}/entry_points.txt +0 -0
@@ -4,7 +4,7 @@ import requests # type: ignore
4
4
  import os
5
5
  from typing import Any, Optional, Dict, List, Tuple
6
6
  from pydantic import BaseModel
7
- from holmes.core.tools import StructuredToolResult, ToolResultStatus
7
+ from holmes.core.tools import StructuredToolResult, StructuredToolResultStatus
8
8
 
9
9
  from holmes.core.tools import (
10
10
  Toolset,
@@ -259,7 +259,7 @@ class GitReadFileWithLineNumbers(Tool):
259
259
  resp = requests.get(url, headers=headers)
260
260
  if resp.status_code != 200:
261
261
  return StructuredToolResult(
262
- status=ToolResultStatus.ERROR,
262
+ status=StructuredToolResultStatus.ERROR,
263
263
  data=self.toolset._sanitize_error(
264
264
  f"Error fetching file: {resp.text}"
265
265
  ),
@@ -268,13 +268,13 @@ class GitReadFileWithLineNumbers(Tool):
268
268
  content = base64.b64decode(resp.json()["content"]).decode().splitlines()
269
269
  numbered = "\n".join(f"{i+1}: {line}" for i, line in enumerate(content))
270
270
  return StructuredToolResult(
271
- status=ToolResultStatus.SUCCESS,
271
+ status=StructuredToolResultStatus.SUCCESS,
272
272
  data=numbered,
273
273
  params=params,
274
274
  )
275
275
  except Exception as e:
276
276
  return StructuredToolResult(
277
- status=ToolResultStatus.ERROR,
277
+ status=StructuredToolResultStatus.ERROR,
278
278
  data=self.toolset._sanitize_error(str(e)),
279
279
  params=params,
280
280
  )
@@ -304,7 +304,7 @@ class GitListFiles(Tool):
304
304
  resp = requests.get(url, headers=headers)
305
305
  if resp.status_code != 200:
306
306
  return StructuredToolResult(
307
- status=ToolResultStatus.ERROR,
307
+ status=StructuredToolResultStatus.ERROR,
308
308
  data=self.toolset._sanitize_error(
309
309
  f"Error listing files: {resp.text}"
310
310
  ),
@@ -312,13 +312,13 @@ class GitListFiles(Tool):
312
312
  )
313
313
  paths = [entry["path"] for entry in resp.json()["tree"]]
314
314
  return StructuredToolResult(
315
- status=ToolResultStatus.SUCCESS,
315
+ status=StructuredToolResultStatus.SUCCESS,
316
316
  data=paths,
317
317
  params=params,
318
318
  )
319
319
  except Exception as e:
320
320
  return StructuredToolResult(
321
- status=ToolResultStatus.ERROR,
321
+ status=StructuredToolResultStatus.ERROR,
322
322
  data=self.toolset._sanitize_error(str(e)),
323
323
  params=params,
324
324
  )
@@ -353,13 +353,13 @@ class GitListOpenPRs(Tool):
353
353
  for pr in prs
354
354
  ]
355
355
  return StructuredToolResult(
356
- status=ToolResultStatus.SUCCESS,
356
+ status=StructuredToolResultStatus.SUCCESS,
357
357
  data=formatted,
358
358
  params=params,
359
359
  )
360
360
  except Exception as e:
361
361
  return StructuredToolResult(
362
- status=ToolResultStatus.ERROR,
362
+ status=StructuredToolResultStatus.ERROR,
363
363
  data=self.toolset._sanitize_error(str(e)),
364
364
  params=params,
365
365
  )
@@ -413,14 +413,14 @@ class GitExecuteChanges(Tool):
413
413
  ) -> StructuredToolResult:
414
414
  def error(msg: str) -> StructuredToolResult:
415
415
  return StructuredToolResult(
416
- status=ToolResultStatus.ERROR,
416
+ status=StructuredToolResultStatus.ERROR,
417
417
  data=self.toolset._sanitize_error(msg),
418
418
  params=params,
419
419
  )
420
420
 
421
421
  def success(msg: Any) -> StructuredToolResult:
422
422
  return StructuredToolResult(
423
- status=ToolResultStatus.SUCCESS, data=msg, params=params
423
+ status=StructuredToolResultStatus.SUCCESS, data=msg, params=params
424
424
  )
425
425
 
426
426
  def modify_lines(lines: List[str]) -> List[str]:
@@ -643,24 +643,24 @@ class GitUpdatePR(Tool):
643
643
  # Validate inputs
644
644
  if not commit_message.strip():
645
645
  return StructuredToolResult(
646
- status=ToolResultStatus.ERROR,
646
+ status=StructuredToolResultStatus.ERROR,
647
647
  error="Tool call failed to run: Commit message cannot be empty",
648
648
  )
649
649
  if not filename.strip():
650
650
  return StructuredToolResult(
651
- status=ToolResultStatus.ERROR,
651
+ status=StructuredToolResultStatus.ERROR,
652
652
  error="Tool call failed to run: Filename cannot be empty",
653
653
  )
654
654
  if line < 1:
655
655
  return StructuredToolResult(
656
- status=ToolResultStatus.ERROR,
656
+ status=StructuredToolResultStatus.ERROR,
657
657
  error="Tool call failed to run: Line number must be positive",
658
658
  )
659
659
 
660
660
  # Verify this is a PR created by our tool
661
661
  if not self.toolset.is_created_pr(pr_number):
662
662
  return StructuredToolResult(
663
- status=ToolResultStatus.ERROR,
663
+ status=StructuredToolResultStatus.ERROR,
664
664
  error=f"Tool call failed to run: PR #{pr_number} was not created by this tool. Only PRs created using git_execute_changes can be updated.",
665
665
  )
666
666
 
@@ -714,7 +714,7 @@ class GitUpdatePR(Tool):
714
714
  del content_lines[line - 1]
715
715
  else:
716
716
  return StructuredToolResult(
717
- status=ToolResultStatus.ERROR,
717
+ status=StructuredToolResultStatus.ERROR,
718
718
  error=f"Tool call failed to run: Invalid command: {command}",
719
719
  )
720
720
 
@@ -722,7 +722,7 @@ class GitUpdatePR(Tool):
722
722
 
723
723
  if dry_run:
724
724
  return StructuredToolResult(
725
- status=ToolResultStatus.SUCCESS,
725
+ status=StructuredToolResultStatus.SUCCESS,
726
726
  data=f"DRY RUN: Updated content for PR #{pr_number}:\n\n{updated_content}",
727
727
  )
728
728
 
@@ -731,13 +731,13 @@ class GitUpdatePR(Tool):
731
731
  pr_number, filename, updated_content, commit_message
732
732
  )
733
733
  return StructuredToolResult(
734
- status=ToolResultStatus.SUCCESS,
734
+ status=StructuredToolResultStatus.SUCCESS,
735
735
  data=f"Added commit to PR #{pr_number} successfully",
736
736
  )
737
737
 
738
738
  except Exception as e:
739
739
  return StructuredToolResult(
740
- status=ToolResultStatus.ERROR,
740
+ status=StructuredToolResultStatus.ERROR,
741
741
  error=self.toolset._sanitize_error(
742
742
  f"Tool call failed to run: Error updating PR: {str(e)}"
743
743
  ),
@@ -745,14 +745,14 @@ class GitUpdatePR(Tool):
745
745
 
746
746
  except requests.exceptions.RequestException as e:
747
747
  return StructuredToolResult(
748
- status=ToolResultStatus.ERROR,
748
+ status=StructuredToolResultStatus.ERROR,
749
749
  error=self.toolset._sanitize_error(
750
750
  f"Tool call failed to run: Network error: {str(e)}"
751
751
  ),
752
752
  )
753
753
  except Exception as e:
754
754
  return StructuredToolResult(
755
- status=ToolResultStatus.ERROR,
755
+ status=StructuredToolResultStatus.ERROR,
756
756
  error=self.toolset._sanitize_error(
757
757
  f"Tool call failed to run: Unexpected error: {str(e)}"
758
758
  ),
@@ -3,7 +3,7 @@ from typing import Dict, Optional
3
3
  from pydantic import BaseModel
4
4
  import datetime
5
5
 
6
- from holmes.core.tools import StructuredToolResult, ToolResultStatus
6
+ from holmes.core.tools import StructuredToolResult, StructuredToolResultStatus
7
7
 
8
8
 
9
9
  class GrafanaConfig(BaseModel):
@@ -61,7 +61,7 @@ def ensure_grafana_uid_or_return_error_result(
61
61
  ) -> Optional[StructuredToolResult]:
62
62
  if not config.grafana_datasource_uid:
63
63
  return StructuredToolResult(
64
- status=ToolResultStatus.ERROR,
64
+ status=StructuredToolResultStatus.ERROR,
65
65
  error="This tool only works when the toolset is configued ",
66
66
  )
67
67
  else:
@@ -4,7 +4,7 @@ from holmes.core.tools import (
4
4
  StructuredToolResult,
5
5
  Tool,
6
6
  ToolParameter,
7
- ToolResultStatus,
7
+ StructuredToolResultStatus,
8
8
  )
9
9
  from holmes.plugins.toolsets.grafana.base_grafana_toolset import BaseGrafanaToolset
10
10
  import requests # type: ignore
@@ -90,9 +90,9 @@ class ListAndBuildGrafanaDashboardURLs(Tool):
90
90
  )
91
91
 
92
92
  return StructuredToolResult(
93
- status=ToolResultStatus.SUCCESS
93
+ status=StructuredToolResultStatus.SUCCESS
94
94
  if formatted_dashboards
95
- else ToolResultStatus.NO_DATA,
95
+ else StructuredToolResultStatus.NO_DATA,
96
96
  data="\n".join(formatted_dashboards)
97
97
  if formatted_dashboards
98
98
  else "No dashboards found.",
@@ -102,7 +102,7 @@ class ListAndBuildGrafanaDashboardURLs(Tool):
102
102
  except requests.RequestException as e:
103
103
  logging.error(f"Error fetching dashboards: {str(e)}")
104
104
  return StructuredToolResult(
105
- status=ToolResultStatus.ERROR,
105
+ status=StructuredToolResultStatus.ERROR,
106
106
  error=f"Error fetching dashboards: {str(e)}",
107
107
  url=url,
108
108
  params=params,
@@ -22,7 +22,7 @@ from holmes.plugins.toolsets.utils import (
22
22
  from holmes.plugins.toolsets.grafana.loki_api import (
23
23
  query_loki_logs_by_label,
24
24
  )
25
- from holmes.core.tools import StructuredToolResult, ToolResultStatus
25
+ from holmes.core.tools import StructuredToolResult, StructuredToolResultStatus
26
26
 
27
27
 
28
28
  class GrafanaLokiLabelsConfig(BaseModel):
@@ -99,12 +99,12 @@ class GrafanaLokiToolset(BasePodLoggingToolset):
99
99
  if logs:
100
100
  logs.sort(key=lambda x: x["timestamp"])
101
101
  return StructuredToolResult(
102
- status=ToolResultStatus.SUCCESS,
102
+ status=StructuredToolResultStatus.SUCCESS,
103
103
  data="\n".join([format_log(log) for log in logs]),
104
104
  params=params.model_dump(),
105
105
  )
106
106
  else:
107
107
  return StructuredToolResult(
108
- status=ToolResultStatus.NO_DATA,
108
+ status=StructuredToolResultStatus.NO_DATA,
109
109
  params=params.model_dump(),
110
110
  )
@@ -5,43 +5,142 @@ Assume every application provides tempo traces.
5
5
  ## API Endpoints and Tool Mapping
6
6
 
7
7
  1. **Trace Search** (GET /api/search)
8
- - `search_traces_by_query`: Use with 'q' parameter for TraceQL queries
9
- - `search_traces_by_tags`: Use with 'tags' parameter for logfmt queries
8
+ - `tempo_search_traces_by_query`: Use with 'q' parameter for TraceQL queries
9
+ - `tempo_search_traces_by_tags`: Use with 'tags' parameter for logfmt queries
10
10
 
11
11
  2. **Trace Details** (GET /api/v2/traces/{trace_id})
12
- - `query_trace_by_id`: Retrieve full trace data
12
+ - `tempo_query_trace_by_id`: Retrieve full trace data
13
13
 
14
14
  3. **Tag Discovery**
15
- - `search_tag_names` (GET /api/v2/search/tags): List available tags
16
- - `search_tag_values` (GET /api/v2/search/tag/{tag}/values): Get values for a tag
15
+ - `tempo_search_tag_names` (GET /api/v2/search/tags): List available tags
16
+ - `tempo_search_tag_values` (GET /api/v2/search/tag/{tag}/values): Get values for a tag
17
17
 
18
18
  4. **TraceQL Metrics**
19
- - `query_metrics_instant` (GET /api/metrics/query): Single value computation
20
- - `query_metrics_range` (GET /api/metrics/query_range): Time series data
19
+ - `tempo_query_metrics_instant` (GET /api/metrics/query): Single value computation
20
+ - `tempo_query_metrics_range` (GET /api/metrics/query_range): Time series data
21
21
 
22
22
  ## Usage Workflow
23
23
 
24
24
  ### 1. Discovering Available Data
25
25
  Start by understanding what tags and values exist:
26
- - Use `search_tag_names` to discover available tags
27
- - Use `search_tag_values` to see all values for a specific tag (e.g., service names)
26
+ - Use `tempo_search_tag_names` to discover available tags
27
+ - Use `tempo_search_tag_values` to see all values for a specific tag (e.g., service names)
28
28
 
29
29
  ### 2. Searching for Traces
30
+
30
31
  **TraceQL Search (recommended):**
31
- Use `search_traces_by_query` with TraceQL syntax for powerful filtering:
32
- - Find errors: `{span.http.status_code>=400}`
33
- - Service traces: `{resource.service.name="api"}`
34
- - Slow traces: `{duration>100ms}`
35
- - Complex queries: `{resource.service.name="api" && span.http.status_code=500 && duration>1s}`
32
+ Use `tempo_search_traces_by_query` with TraceQL syntax for powerful filtering.
33
+
34
+ **TraceQL Capabilities:**
35
+ TraceQL can select traces based on the following:
36
+ - **Span and resource attributes** - Filter by any attribute on spans or resources
37
+ - **Timing and duration** - Filter by trace/span duration
38
+ - **Basic aggregates** - Use aggregate functions to compute values across spans
39
+
40
+ **Supported Aggregate Functions:**
41
+ - `count()` - Count the number of spans matching the criteria
42
+ - `avg(attribute)` - Calculate average of a numeric attribute across spans
43
+ - `min(attribute)` - Find minimum value of a numeric attribute
44
+ - `max(attribute)` - Find maximum value of a numeric attribute
45
+ - `sum(attribute)` - Sum values of a numeric attribute across spans
46
+
47
+ **Aggregate Function Usage:**
48
+ Aggregates are used with the pipe operator `|` to filter traces based on computed values across their spans.
49
+
50
+ **Aggregate Examples:**
51
+ - `{ span.http.status_code = 200 } | count() > 3` - Find traces with more than 3 spans having HTTP 200 status
52
+ - `{ } | sum(span.bytesProcessed) > 1000000000` - Find traces where total processed bytes exceed 1 GB
53
+ - `{ status = error } | by(resource.service.name) | count() > 1` - Find services with more than 1 error
54
+
55
+ **Select Function:**
56
+ - `{ status = error } | select(span.http.status_code, span.http.url)` - Select specific attributes from error spans
57
+
58
+ **TraceQL Query Structure:**
59
+ TraceQL queries follow the pattern: `{span-selectors} | aggregate`
60
+
61
+ **TraceQL Query Examples (from official docs):**
62
+
63
+ 1. **Find traces of a specific operation:**
64
+ ```
65
+ {resource.service.name = "frontend" && name = "POST /api/orders"}
66
+ ```
67
+ ```
68
+ {
69
+ resource.service.namespace = "ecommerce" &&
70
+ resource.service.name = "frontend" &&
71
+ resource.deployment.environment = "production" &&
72
+ name = "POST /api/orders"
73
+ }
74
+ ```
75
+
76
+ 2. **Find traces with a particular outcome:**
77
+ ```
78
+ {
79
+ resource.service.name="frontend" &&
80
+ name = "POST /api/orders" &&
81
+ status = error
82
+ }
83
+ ```
84
+ ```
85
+ {
86
+ resource.service.name="frontend" &&
87
+ name = "POST /api/orders" &&
88
+ span.http.status_code >= 500
89
+ }
90
+ ```
91
+
92
+ 3. **Find traces with a particular behavior:**
93
+ ```
94
+ {span.service.name="frontend" && name = "GET /api/products/{id}"} && {span.db.system="postgresql"}
95
+ ```
96
+
97
+ 4. **Find traces across environments:**
98
+ ```
99
+ { resource.deployment.environment = "production" } && { resource.deployment.environment = "staging" }
100
+ ```
101
+
102
+ 5. **Structural operators (advanced):**
103
+ ```
104
+ { resource.service.name="frontend" } >> { status = error } # Frontend spans followed by errors
105
+ { } !< { resource.service.name = "productcatalogservice" } # Traces without productcatalog as child
106
+ { resource.service.name = "productcatalogservice" } ~ { resource.service.name="frontend" } # Sibling spans
107
+ ```
108
+
109
+ 6. **Additional operator examples:**
110
+ ```
111
+ { span.http.method = "GET" && status = ok } && { span.http.method = "DELETE" && status != ok } # && for multiple conditions
112
+ ```
113
+
114
+ ```
115
+ { resource.deployment.environment =~ "prod-.*" && span.http.status_code = 200 } # =~ regex match
116
+ { span.http.method =~ "DELETE|GET" } # Regex match multiple values
117
+ { trace:rootName !~ ".*perf.*" } # !~ negated regex
118
+ { resource.cloud.region = "us-east-1" } || { resource.cloud.region = "us-west-1" } # || OR operator
119
+ ```
120
+
121
+ ```
122
+ { span.http.status_code >= 400 && span.http.status_code < 500 } # Client errors (4xx)
123
+ { span.http.url = "/path/of/api" } >> { span.db.name = "db-shard-001" } # >> descendant
124
+ { span.http.status_code = 200 } | select(resource.service.name) # Select specific attributes
125
+ ```
126
+
127
+ **Common Attributes to Query:**
128
+ - `resource.service.name` - Service name
129
+ - `resource.k8s.*` - Kubernetes metadata (pod.name, namespace.name, deployment.name, etc.)
130
+ - `span.http.*` - HTTP attributes (status_code, method, route, url, etc.)
131
+ - `name` - Span name
132
+ - `status` - Span status (error, ok)
133
+ - `duration` - Span duration
134
+ - `kind` - Span kind (server, client, producer, consumer, internal)
36
135
 
37
136
  **Tag-based Search (legacy):**
38
- Use `search_traces_by_tags` with logfmt format when you need min/max duration filters:
39
- - Example: `resource.service.name="api" http.status_code="500"`
137
+ Use `tempo_search_traces_by_tags` with logfmt format when you need min/max duration filters:
138
+ - Example: `service.name="api" http.status_code="500"`
40
139
  - Supports `min_duration` and `max_duration` parameters
41
140
 
42
141
  ### 3. Analyzing Specific Traces
43
142
  When you have trace IDs from search results:
44
- - Use `query_trace_by_id` to get full trace details
143
+ - Use `tempo_query_trace_by_id` to get full trace details
45
144
  - Examine spans for errors, slow operations, and bottlenecks
46
145
 
47
146
  ### 4. Computing Metrics from Traces
@@ -115,26 +214,26 @@ TraceQL metrics parse your traces in aggregate to provide RED (Rate, Error, Dura
115
214
  ```
116
215
 
117
216
  10. **Using topk modifier** - Find top 10 endpoints by request rate:
118
- ```
119
- { resource.service.name = "foo" } | rate() by (span.http.url) | topk(10)
120
- ```
217
+ ```
218
+ { resource.service.name = "foo" } | rate() by (span.http.url) | topk(10)
219
+ ```
121
220
 
122
221
  **Choosing Between Instant and Range Queries:**
123
222
 
124
- **Instant Metrics** (`query_metrics_instant`) - Returns a single aggregated value for the entire time range. Use this when:
223
+ **Instant Metrics** (`tempo_query_metrics_instant`) - Returns a single aggregated value for the entire time range. Use this when:
125
224
  - You need a total count or sum across the whole period
126
225
  - You want a single metric value (e.g., total error count, average latency)
127
226
  - You don't need to see how the metric changes over time
128
227
  - You're computing a KPI or summary statistic
129
228
 
130
- **Time Series Metrics** (`query_metrics_range`) - Returns values at regular intervals controlled by the 'step' parameter. Use this when:
229
+ **Time Series Metrics** (`tempo_query_metrics_range`) - Returns values at regular intervals controlled by the 'step' parameter. Use this when:
131
230
  - You need to graph metrics over time or analyze trends
132
231
  - You want to see patterns, spikes, or changes in metrics
133
232
  - You're troubleshooting time-based issues
134
233
  - You need to correlate metrics with specific time periods
135
234
 
136
235
  ## Special workflow for performance issues
137
- When investigating performance issues in kubernetes via traces, call fetch_tempo_traces_comparative_sample. This tool provides comprehensive analysis for identifying patterns.
236
+ When investigating performance issues in kubernetes via traces, call tempo_fetch_traces_comparative_sample. This tool provides comprehensive analysis for identifying patterns.
138
237
 
139
238
  ## Important Notes
140
239
  - TraceQL is the modern query language - prefer it over tag-based search
@@ -145,3 +244,4 @@ When investigating performance issues in kubernetes via traces, call fetch_tempo
145
244
  - Use time filters (start/end) to improve query performance
146
245
  - To get information about Kubernetes resources try these first: resource.service.name, resource.k8s.pod.name, resource.k8s.namespace.name, resource.k8s.deployment.name, resource.k8s.node.name, resource.k8s.container.name
147
246
  - TraceQL and TraceQL metrics language are complex. If you get empty data, try to simplify your query and try again!
247
+ - IMPORTANT: TraceQL is not the same as 'TraceQL metrics' - Make sure you use the correct syntax and functions