holmesgpt 0.13.2__py3-none-any.whl → 0.16.2a0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (134) hide show
  1. holmes/__init__.py +1 -1
  2. holmes/clients/robusta_client.py +17 -4
  3. holmes/common/env_vars.py +40 -1
  4. holmes/config.py +114 -144
  5. holmes/core/conversations.py +53 -14
  6. holmes/core/feedback.py +191 -0
  7. holmes/core/investigation.py +18 -22
  8. holmes/core/llm.py +489 -88
  9. holmes/core/models.py +103 -1
  10. holmes/core/openai_formatting.py +13 -0
  11. holmes/core/prompt.py +1 -1
  12. holmes/core/safeguards.py +4 -4
  13. holmes/core/supabase_dal.py +293 -100
  14. holmes/core/tool_calling_llm.py +423 -323
  15. holmes/core/tools.py +311 -33
  16. holmes/core/tools_utils/token_counting.py +14 -0
  17. holmes/core/tools_utils/tool_context_window_limiter.py +57 -0
  18. holmes/core/tools_utils/tool_executor.py +13 -8
  19. holmes/core/toolset_manager.py +155 -4
  20. holmes/core/tracing.py +6 -1
  21. holmes/core/transformers/__init__.py +23 -0
  22. holmes/core/transformers/base.py +62 -0
  23. holmes/core/transformers/llm_summarize.py +174 -0
  24. holmes/core/transformers/registry.py +122 -0
  25. holmes/core/transformers/transformer.py +31 -0
  26. holmes/core/truncation/compaction.py +59 -0
  27. holmes/core/truncation/dal_truncation_utils.py +23 -0
  28. holmes/core/truncation/input_context_window_limiter.py +218 -0
  29. holmes/interactive.py +177 -24
  30. holmes/main.py +7 -4
  31. holmes/plugins/prompts/_fetch_logs.jinja2 +26 -1
  32. holmes/plugins/prompts/_general_instructions.jinja2 +1 -2
  33. holmes/plugins/prompts/_runbook_instructions.jinja2 +23 -12
  34. holmes/plugins/prompts/conversation_history_compaction.jinja2 +88 -0
  35. holmes/plugins/prompts/generic_ask.jinja2 +2 -4
  36. holmes/plugins/prompts/generic_ask_conversation.jinja2 +2 -1
  37. holmes/plugins/prompts/generic_ask_for_issue_conversation.jinja2 +2 -1
  38. holmes/plugins/prompts/generic_investigation.jinja2 +2 -1
  39. holmes/plugins/prompts/investigation_procedure.jinja2 +48 -0
  40. holmes/plugins/prompts/kubernetes_workload_ask.jinja2 +2 -1
  41. holmes/plugins/prompts/kubernetes_workload_chat.jinja2 +2 -1
  42. holmes/plugins/runbooks/__init__.py +117 -18
  43. holmes/plugins/runbooks/catalog.json +2 -0
  44. holmes/plugins/toolsets/__init__.py +21 -8
  45. holmes/plugins/toolsets/aks-node-health.yaml +46 -0
  46. holmes/plugins/toolsets/aks.yaml +64 -0
  47. holmes/plugins/toolsets/atlas_mongodb/mongodb_atlas.py +26 -36
  48. holmes/plugins/toolsets/azure_sql/azure_sql_toolset.py +0 -1
  49. holmes/plugins/toolsets/azure_sql/tools/analyze_connection_failures.py +10 -7
  50. holmes/plugins/toolsets/azure_sql/tools/analyze_database_connections.py +9 -6
  51. holmes/plugins/toolsets/azure_sql/tools/analyze_database_health_status.py +8 -6
  52. holmes/plugins/toolsets/azure_sql/tools/analyze_database_performance.py +8 -6
  53. holmes/plugins/toolsets/azure_sql/tools/analyze_database_storage.py +9 -6
  54. holmes/plugins/toolsets/azure_sql/tools/get_active_alerts.py +9 -7
  55. holmes/plugins/toolsets/azure_sql/tools/get_slow_queries.py +9 -6
  56. holmes/plugins/toolsets/azure_sql/tools/get_top_cpu_queries.py +9 -6
  57. holmes/plugins/toolsets/azure_sql/tools/get_top_data_io_queries.py +9 -6
  58. holmes/plugins/toolsets/azure_sql/tools/get_top_log_io_queries.py +9 -6
  59. holmes/plugins/toolsets/bash/bash_toolset.py +10 -13
  60. holmes/plugins/toolsets/bash/common/bash.py +7 -7
  61. holmes/plugins/toolsets/cilium.yaml +284 -0
  62. holmes/plugins/toolsets/coralogix/toolset_coralogix_logs.py +5 -3
  63. holmes/plugins/toolsets/datadog/datadog_api.py +490 -24
  64. holmes/plugins/toolsets/datadog/datadog_logs_instructions.jinja2 +21 -10
  65. holmes/plugins/toolsets/datadog/toolset_datadog_general.py +349 -216
  66. holmes/plugins/toolsets/datadog/toolset_datadog_logs.py +190 -19
  67. holmes/plugins/toolsets/datadog/toolset_datadog_metrics.py +101 -44
  68. holmes/plugins/toolsets/datadog/toolset_datadog_rds.py +13 -16
  69. holmes/plugins/toolsets/datadog/toolset_datadog_traces.py +25 -31
  70. holmes/plugins/toolsets/git.py +51 -46
  71. holmes/plugins/toolsets/grafana/common.py +15 -3
  72. holmes/plugins/toolsets/grafana/grafana_api.py +46 -24
  73. holmes/plugins/toolsets/grafana/grafana_tempo_api.py +454 -0
  74. holmes/plugins/toolsets/grafana/loki/instructions.jinja2 +9 -0
  75. holmes/plugins/toolsets/grafana/loki/toolset_grafana_loki.py +117 -0
  76. holmes/plugins/toolsets/grafana/toolset_grafana.py +211 -91
  77. holmes/plugins/toolsets/grafana/toolset_grafana_dashboard.jinja2 +27 -0
  78. holmes/plugins/toolsets/grafana/toolset_grafana_tempo.jinja2 +246 -11
  79. holmes/plugins/toolsets/grafana/toolset_grafana_tempo.py +653 -293
  80. holmes/plugins/toolsets/grafana/trace_parser.py +1 -1
  81. holmes/plugins/toolsets/internet/internet.py +6 -7
  82. holmes/plugins/toolsets/internet/notion.py +5 -6
  83. holmes/plugins/toolsets/investigator/core_investigation.py +42 -34
  84. holmes/plugins/toolsets/kafka.py +25 -36
  85. holmes/plugins/toolsets/kubernetes.yaml +58 -84
  86. holmes/plugins/toolsets/kubernetes_logs.py +6 -6
  87. holmes/plugins/toolsets/kubernetes_logs.yaml +32 -0
  88. holmes/plugins/toolsets/logging_utils/logging_api.py +80 -4
  89. holmes/plugins/toolsets/mcp/toolset_mcp.py +181 -55
  90. holmes/plugins/toolsets/newrelic/__init__.py +0 -0
  91. holmes/plugins/toolsets/newrelic/new_relic_api.py +125 -0
  92. holmes/plugins/toolsets/newrelic/newrelic.jinja2 +41 -0
  93. holmes/plugins/toolsets/newrelic/newrelic.py +163 -0
  94. holmes/plugins/toolsets/opensearch/opensearch.py +10 -17
  95. holmes/plugins/toolsets/opensearch/opensearch_logs.py +7 -7
  96. holmes/plugins/toolsets/opensearch/opensearch_ppl_query_docs.jinja2 +1616 -0
  97. holmes/plugins/toolsets/opensearch/opensearch_query_assist.py +78 -0
  98. holmes/plugins/toolsets/opensearch/opensearch_query_assist_instructions.jinja2 +223 -0
  99. holmes/plugins/toolsets/opensearch/opensearch_traces.py +13 -16
  100. holmes/plugins/toolsets/openshift.yaml +283 -0
  101. holmes/plugins/toolsets/prometheus/prometheus.py +915 -390
  102. holmes/plugins/toolsets/prometheus/prometheus_instructions.jinja2 +43 -2
  103. holmes/plugins/toolsets/prometheus/utils.py +28 -0
  104. holmes/plugins/toolsets/rabbitmq/toolset_rabbitmq.py +9 -10
  105. holmes/plugins/toolsets/robusta/robusta.py +236 -65
  106. holmes/plugins/toolsets/robusta/robusta_instructions.jinja2 +26 -9
  107. holmes/plugins/toolsets/runbook/runbook_fetcher.py +137 -26
  108. holmes/plugins/toolsets/service_discovery.py +1 -1
  109. holmes/plugins/toolsets/servicenow_tables/instructions.jinja2 +83 -0
  110. holmes/plugins/toolsets/servicenow_tables/servicenow_tables.py +426 -0
  111. holmes/plugins/toolsets/utils.py +88 -0
  112. holmes/utils/config_utils.py +91 -0
  113. holmes/utils/default_toolset_installation_guide.jinja2 +1 -22
  114. holmes/utils/env.py +7 -0
  115. holmes/utils/global_instructions.py +75 -10
  116. holmes/utils/holmes_status.py +2 -1
  117. holmes/utils/holmes_sync_toolsets.py +0 -2
  118. holmes/utils/krr_utils.py +188 -0
  119. holmes/utils/sentry_helper.py +41 -0
  120. holmes/utils/stream.py +61 -7
  121. holmes/version.py +34 -14
  122. holmesgpt-0.16.2a0.dist-info/LICENSE +178 -0
  123. {holmesgpt-0.13.2.dist-info → holmesgpt-0.16.2a0.dist-info}/METADATA +29 -27
  124. {holmesgpt-0.13.2.dist-info → holmesgpt-0.16.2a0.dist-info}/RECORD +126 -102
  125. holmes/core/performance_timing.py +0 -72
  126. holmes/plugins/toolsets/grafana/tempo_api.py +0 -124
  127. holmes/plugins/toolsets/grafana/toolset_grafana_loki.py +0 -110
  128. holmes/plugins/toolsets/newrelic.py +0 -231
  129. holmes/plugins/toolsets/servicenow/install.md +0 -37
  130. holmes/plugins/toolsets/servicenow/instructions.jinja2 +0 -3
  131. holmes/plugins/toolsets/servicenow/servicenow.py +0 -219
  132. holmesgpt-0.13.2.dist-info/LICENSE.txt +0 -21
  133. {holmesgpt-0.13.2.dist-info → holmesgpt-0.16.2a0.dist-info}/WHEEL +0 -0
  134. {holmesgpt-0.13.2.dist-info → holmesgpt-0.16.2a0.dist-info}/entry_points.txt +0 -0
@@ -3,12 +3,12 @@ from typing import Dict, Optional
3
3
  from pydantic import BaseModel
4
4
  import datetime
5
5
 
6
- from holmes.core.tools import StructuredToolResult, ToolResultStatus
6
+ from holmes.core.tools import StructuredToolResult, StructuredToolResultStatus
7
7
 
8
8
 
9
9
  class GrafanaConfig(BaseModel):
10
10
  """A config that represents one of the Grafana related tools like Loki or Tempo
11
- If `grafana_datasource_uid` is set, then it is assume that Holmes will proxy all
11
+ If `grafana_datasource_uid` is set, then it is assumed that Holmes will proxy all
12
12
  requests through grafana. In this case `url` should be the grafana URL.
13
13
  If `grafana_datasource_uid` is not set, it is assumed that the `url` is the
14
14
  systems' URL
@@ -61,8 +61,20 @@ def ensure_grafana_uid_or_return_error_result(
61
61
  ) -> Optional[StructuredToolResult]:
62
62
  if not config.grafana_datasource_uid:
63
63
  return StructuredToolResult(
64
- status=ToolResultStatus.ERROR,
64
+ status=StructuredToolResultStatus.ERROR,
65
65
  error="This tool only works when the toolset is configued ",
66
66
  )
67
67
  else:
68
68
  return None
69
+
70
+
71
+ class GrafanaTempoLabelsConfig(BaseModel):
72
+ pod: str = "k8s.pod.name"
73
+ namespace: str = "k8s.namespace.name"
74
+ deployment: str = "k8s.deployment.name"
75
+ node: str = "k8s.node.name"
76
+ service: str = "service.name"
77
+
78
+
79
+ class GrafanaTempoConfig(GrafanaConfig):
80
+ labels: GrafanaTempoLabelsConfig = GrafanaTempoLabelsConfig()
@@ -6,37 +6,59 @@ import backoff
6
6
  from holmes.plugins.toolsets.grafana.common import (
7
7
  GrafanaConfig,
8
8
  build_headers,
9
- get_base_url,
10
9
  )
11
10
 
12
11
 
13
12
  @backoff.on_exception(
14
- backoff.expo, # Exponential backoff
15
- requests.exceptions.RequestException, # Retry on request exceptions
16
- max_tries=5, # Maximum retries
13
+ backoff.expo,
14
+ requests.exceptions.RequestException,
15
+ max_tries=2,
17
16
  giveup=lambda e: isinstance(e, requests.exceptions.HTTPError)
18
17
  and e.response.status_code < 500,
19
18
  )
19
+ def _try_health_url(url: str, headers: dict) -> None:
20
+ response = requests.get(url, headers=headers, timeout=5)
21
+ response.raise_for_status()
22
+
23
+
20
24
  def grafana_health_check(config: GrafanaConfig) -> Tuple[bool, str]:
21
- base_url = get_base_url(config)
22
- url = f"{base_url}/{config.healthcheck}"
23
- try:
24
- headers_ = build_headers(api_key=config.api_key, additional_headers=None)
25
-
26
- response = requests.get(url, headers=headers_, timeout=10) # Added timeout
27
- response.raise_for_status()
28
- return True, ""
29
- except Exception as e:
30
- logging.error(f"Failed to fetch grafana health status at {url}", exc_info=True)
31
- error_msg = f"Failed to fetch grafana health status at {url}. {str(e)}"
32
-
33
- # Add helpful hint if this looks like a common misconfiguration
34
- if config.grafana_datasource_uid and ":3100" in config.url:
35
- error_msg += (
36
- "\n\nPossible configuration issue: grafana_datasource_uid is set but URL contains port 3100 "
37
- "(typically used for direct Loki connections). Please verify:\n"
38
- "- If connecting directly to Loki: remove grafana_datasource_uid from config\n"
39
- "- If connecting via Grafana proxy: ensure URL points to Grafana (usually port 3000)"
25
+ """
26
+ Tests a healthcheck url for grafna loki.
27
+ 1. When using grafana as proxy, grafana_datasource_uid is provided, use the data source health url (docs are added).
28
+ 2. When using loki directly there are two cases.
29
+ a. Using loki cloud, health check is provided on the base url.
30
+ b. Using local loki, uses url/healthcheck default is url/ready
31
+ c. This function tries both direct loki cases for the user.
32
+ """
33
+ health_urls = []
34
+ if config.grafana_datasource_uid:
35
+ # https://grafana.com/docs/grafana/latest/developers/http_api/data_source/#check-data-source-health
36
+ health_urls.append(
37
+ f"{config.url}/api/datasources/uid/{config.grafana_datasource_uid}/health"
38
+ )
39
+ else:
40
+ health_urls.append(f"{config.url}/{config.healthcheck}")
41
+ health_urls.append(config.url) # loki cloud uses no suffix.
42
+ g_headers = build_headers(api_key=config.api_key, additional_headers=config.headers)
43
+
44
+ error_msg = ""
45
+ for url in health_urls:
46
+ try:
47
+ _try_health_url(url, g_headers)
48
+ return True, ""
49
+ except Exception as e:
50
+ logging.debug(
51
+ f"Failed to fetch grafana health status at {url}", exc_info=True
40
52
  )
53
+ error_msg += f"Failed to fetch grafana health status at {url}. {str(e)}\n"
54
+
55
+ # Add helpful hint if this looks like a common misconfiguration
56
+ if config.grafana_datasource_uid and ":3100" in config.url:
57
+ error_msg += (
58
+ "\n\nPossible configuration issue: grafana_datasource_uid is set but URL contains port 3100 "
59
+ "(typically used for direct Loki connections). Please verify:\n"
60
+ "- If connecting directly to Loki: remove grafana_datasource_uid from config\n"
61
+ "- If connecting via Grafana proxy: ensure URL points to Grafana (usually port 3000)"
62
+ )
41
63
 
42
- return False, error_msg
64
+ return False, error_msg
@@ -0,0 +1,454 @@
1
+ """Grafana Tempo API wrapper for querying traces and metrics."""
2
+
3
+ import logging
4
+ from typing import Any, Dict, Optional, Union
5
+ from urllib.parse import quote
6
+
7
+ import backoff
8
+ import requests # type: ignore
9
+
10
+ from holmes.plugins.toolsets.grafana.common import (
11
+ GrafanaTempoConfig,
12
+ build_headers,
13
+ get_base_url,
14
+ )
15
+
16
+
17
+ logger = logging.getLogger(__name__)
18
+
19
+
20
+ class TempoAPIError(Exception):
21
+ """Custom exception for Tempo API errors with detailed response information."""
22
+
23
+ def __init__(self, status_code: int, response_text: str, url: str):
24
+ self.status_code = status_code
25
+ self.response_text = response_text
26
+ self.url = url
27
+
28
+ # Try to extract error message from JSON response
29
+ try:
30
+ import json
31
+
32
+ error_data = json.loads(response_text)
33
+ # Tempo may return errors in different formats
34
+ error_message = (
35
+ error_data.get("error")
36
+ or error_data.get("message")
37
+ or error_data.get("errorType")
38
+ or response_text
39
+ )
40
+ except (json.JSONDecodeError, TypeError):
41
+ error_message = response_text
42
+
43
+ super().__init__(f"Tempo API error {status_code}: {error_message}")
44
+
45
+
46
+ class GrafanaTempoAPI:
47
+ """Python wrapper for Grafana Tempo REST API.
48
+
49
+ This class provides a clean interface to all Tempo API endpoints.
50
+ """
51
+
52
+ def __init__(self, config: GrafanaTempoConfig):
53
+ """Initialize the Tempo API wrapper.
54
+
55
+ Args:
56
+ config: GrafanaTempoConfig instance with connection details
57
+ """
58
+ self.config = config
59
+ self.base_url = get_base_url(config)
60
+ self.headers = build_headers(config.api_key, config.headers)
61
+
62
+ def _make_request(
63
+ self,
64
+ endpoint: str,
65
+ params: Optional[Dict[str, Any]] = None,
66
+ path_params: Optional[Dict[str, str]] = None,
67
+ timeout: int = 30,
68
+ retries: int = 3,
69
+ ) -> Dict[str, Any]:
70
+ """Make HTTP request to Tempo API with retry logic.
71
+
72
+ Args:
73
+ endpoint: API endpoint path (e.g., "/api/echo")
74
+ params: Query parameters
75
+ path_params: Parameters to substitute in the endpoint path
76
+ timeout: Request timeout in seconds
77
+ retries: Number of retry attempts
78
+
79
+ Returns:
80
+ JSON response from the API
81
+
82
+ Raises:
83
+ Exception: If the request fails after all retries
84
+ """
85
+ # Format endpoint with path parameters
86
+ if path_params:
87
+ for key, value in path_params.items():
88
+ endpoint = endpoint.replace(f"{{{key}}}", quote(str(value), safe=""))
89
+
90
+ url = f"{self.base_url}{endpoint}"
91
+
92
+ @backoff.on_exception(
93
+ backoff.expo,
94
+ requests.exceptions.RequestException,
95
+ max_tries=retries,
96
+ giveup=lambda e: isinstance(e, requests.exceptions.HTTPError)
97
+ and getattr(e, "response", None) is not None
98
+ and e.response.status_code < 500,
99
+ )
100
+ def make_request():
101
+ # GET request with query parameters
102
+ response = requests.get(
103
+ url,
104
+ headers=self.headers,
105
+ params=params,
106
+ timeout=timeout,
107
+ )
108
+ response.raise_for_status()
109
+ return response.json()
110
+
111
+ try:
112
+ return make_request()
113
+ except requests.exceptions.HTTPError as e:
114
+ # Extract detailed error message from response
115
+ response = e.response
116
+ if response is not None:
117
+ logger.error(
118
+ f"HTTP error {response.status_code} for {url}: {response.text}"
119
+ )
120
+ raise TempoAPIError(
121
+ status_code=response.status_code,
122
+ response_text=response.text,
123
+ url=url,
124
+ )
125
+ else:
126
+ logger.error(f"Request failed for {url}: {e}")
127
+ raise
128
+ except requests.exceptions.RequestException as e:
129
+ logger.error(f"Request failed for {url}: {e}")
130
+ raise
131
+
132
+ def query_echo_endpoint(self) -> bool:
133
+ """Query the echo endpoint to check Tempo status.
134
+
135
+ API Endpoint: GET /api/echo
136
+ HTTP Method: GET
137
+
138
+ Returns:
139
+ bool: True if endpoint returns 200 status code, False otherwise
140
+ """
141
+ url = f"{self.base_url}/api/echo"
142
+
143
+ try:
144
+ response = requests.get(
145
+ url,
146
+ headers=self.headers,
147
+ timeout=30,
148
+ )
149
+
150
+ # Just check status code, don't try to parse JSON
151
+ return response.status_code == 200
152
+
153
+ except requests.exceptions.RequestException as e:
154
+ logger.error(f"Request failed for {url}: {e}")
155
+ return False
156
+
157
+ def query_trace_by_id_v2(
158
+ self,
159
+ trace_id: str,
160
+ start: Optional[int] = None,
161
+ end: Optional[int] = None,
162
+ ) -> Dict[str, Any]:
163
+ """Query a trace by its ID.
164
+
165
+ API Endpoint: GET /api/v2/traces/{trace_id}
166
+ HTTP Method: GET
167
+
168
+ Args:
169
+ trace_id: The trace ID to retrieve
170
+ start: Optional start time in Unix epoch seconds
171
+ end: Optional end time in Unix epoch seconds
172
+
173
+ Returns:
174
+ dict: OpenTelemetry format trace data
175
+ """
176
+ params = {}
177
+ if start is not None:
178
+ params["start"] = str(start)
179
+ if end is not None:
180
+ params["end"] = str(end)
181
+
182
+ return self._make_request(
183
+ "/api/v2/traces/{trace_id}",
184
+ params=params,
185
+ path_params={"trace_id": trace_id},
186
+ )
187
+
188
+ def _search_traces_common(
189
+ self,
190
+ search_params: Dict[str, Any],
191
+ limit: Optional[int] = None,
192
+ start: Optional[int] = None,
193
+ end: Optional[int] = None,
194
+ spss: Optional[int] = None,
195
+ ) -> Dict[str, Any]:
196
+ """Common search implementation for both tag and TraceQL searches.
197
+
198
+ Args:
199
+ search_params: The search-specific parameters (tags or q)
200
+ limit: Optional max number of traces to return
201
+ start: Optional start time in Unix epoch seconds
202
+ end: Optional end time in Unix epoch seconds
203
+ spss: Optional spans per span set
204
+
205
+ Returns:
206
+ dict: Search results with trace metadata
207
+ """
208
+ params = search_params.copy()
209
+
210
+ if limit is not None:
211
+ params["limit"] = str(limit)
212
+ if start is not None:
213
+ params["start"] = str(start)
214
+ if end is not None:
215
+ params["end"] = str(end)
216
+ if spss is not None:
217
+ params["spss"] = str(spss)
218
+
219
+ return self._make_request("/api/search", params=params)
220
+
221
+ def search_traces_by_tags(
222
+ self,
223
+ tags: str,
224
+ min_duration: Optional[str] = None,
225
+ max_duration: Optional[str] = None,
226
+ limit: Optional[int] = None,
227
+ start: Optional[int] = None,
228
+ end: Optional[int] = None,
229
+ spss: Optional[int] = None,
230
+ ) -> Dict[str, Any]:
231
+ """Search for traces using tag-based search.
232
+
233
+ API Endpoint: GET /api/search
234
+ HTTP Method: GET
235
+
236
+ Args:
237
+ tags: logfmt-encoded span/process attributes (required)
238
+ min_duration: Optional minimum trace duration (e.g., "5s")
239
+ max_duration: Optional maximum trace duration
240
+ limit: Optional max number of traces to return
241
+ start: Optional start time in Unix epoch seconds
242
+ end: Optional end time in Unix epoch seconds
243
+ spss: Optional spans per span set
244
+
245
+ Returns:
246
+ dict: Search results with trace metadata
247
+ """
248
+ search_params = {"tags": tags}
249
+
250
+ # minDuration and maxDuration are only supported with tag-based search
251
+ if min_duration is not None:
252
+ search_params["minDuration"] = min_duration
253
+ if max_duration is not None:
254
+ search_params["maxDuration"] = max_duration
255
+
256
+ return self._search_traces_common(
257
+ search_params=search_params,
258
+ limit=limit,
259
+ start=start,
260
+ end=end,
261
+ spss=spss,
262
+ )
263
+
264
+ def search_traces_by_query(
265
+ self,
266
+ q: str,
267
+ limit: Optional[int] = None,
268
+ start: Optional[int] = None,
269
+ end: Optional[int] = None,
270
+ spss: Optional[int] = None,
271
+ ) -> Dict[str, Any]:
272
+ """Search for traces using TraceQL query.
273
+
274
+ API Endpoint: GET /api/search
275
+ HTTP Method: GET
276
+
277
+ Note: minDuration and maxDuration are not supported with TraceQL queries.
278
+ Use the TraceQL query syntax to filter by duration instead.
279
+
280
+ Args:
281
+ q: TraceQL query (required)
282
+ limit: Optional max number of traces to return
283
+ start: Optional start time in Unix epoch seconds
284
+ end: Optional end time in Unix epoch seconds
285
+ spss: Optional spans per span set
286
+
287
+ Returns:
288
+ dict: Search results with trace metadata
289
+ """
290
+ return self._search_traces_common(
291
+ search_params={"q": q},
292
+ limit=limit,
293
+ start=start,
294
+ end=end,
295
+ spss=spss,
296
+ )
297
+
298
+ def search_tag_names_v2(
299
+ self,
300
+ scope: Optional[str] = None,
301
+ q: Optional[str] = None,
302
+ start: Optional[int] = None,
303
+ end: Optional[int] = None,
304
+ limit: Optional[int] = None,
305
+ max_stale_values: Optional[int] = None,
306
+ ) -> Dict[str, Any]:
307
+ """Search for available tag names.
308
+
309
+ API Endpoint: GET /api/v2/search/tags
310
+ HTTP Method: GET
311
+
312
+ Args:
313
+ scope: Optional scope filter ("resource", "span", or "intrinsic")
314
+ q: Optional TraceQL query to filter tags
315
+ start: Optional start time in Unix epoch seconds
316
+ end: Optional end time in Unix epoch seconds
317
+ limit: Optional max number of tag names
318
+ max_stale_values: Optional max stale values parameter
319
+
320
+ Returns:
321
+ dict: Available tag names organized by scope
322
+ """
323
+ params = {}
324
+ if scope is not None:
325
+ params["scope"] = scope
326
+ if q is not None:
327
+ params["q"] = q
328
+ if start is not None:
329
+ params["start"] = str(start)
330
+ if end is not None:
331
+ params["end"] = str(end)
332
+ if limit is not None:
333
+ params["limit"] = str(limit)
334
+ if max_stale_values is not None:
335
+ params["maxStaleValues"] = str(max_stale_values)
336
+
337
+ return self._make_request("/api/v2/search/tags", params=params)
338
+
339
+ def search_tag_values_v2(
340
+ self,
341
+ tag: str,
342
+ q: Optional[str] = None,
343
+ start: Optional[int] = None,
344
+ end: Optional[int] = None,
345
+ limit: Optional[int] = None,
346
+ max_stale_values: Optional[int] = None,
347
+ ) -> Dict[str, Any]:
348
+ """Search for values of a specific tag with optional TraceQL filtering.
349
+
350
+ API Endpoint: GET /api/v2/search/tag/{tag}/values
351
+ HTTP Method: GET
352
+
353
+ Args:
354
+ tag: The tag name to get values for (required)
355
+ q: Optional TraceQL query to filter tag values (e.g., '{resource.cluster="us-east-1"}')
356
+ start: Optional start time in Unix epoch seconds
357
+ end: Optional end time in Unix epoch seconds
358
+ limit: Optional max number of values
359
+ max_stale_values: Optional max stale values parameter
360
+
361
+ Returns:
362
+ dict: List of discovered values for the tag
363
+ """
364
+ params = {}
365
+ if q is not None:
366
+ params["q"] = q
367
+ if start is not None:
368
+ params["start"] = str(start)
369
+ if end is not None:
370
+ params["end"] = str(end)
371
+ if limit is not None:
372
+ params["limit"] = str(limit)
373
+ if max_stale_values is not None:
374
+ params["maxStaleValues"] = str(max_stale_values)
375
+
376
+ return self._make_request(
377
+ "/api/v2/search/tag/{tag}/values",
378
+ params=params,
379
+ path_params={"tag": tag},
380
+ )
381
+
382
+ def query_metrics_instant(
383
+ self,
384
+ q: str,
385
+ start: Optional[Union[int, str]] = None,
386
+ end: Optional[Union[int, str]] = None,
387
+ since: Optional[str] = None,
388
+ ) -> Dict[str, Any]:
389
+ """Query TraceQL metrics for an instant value.
390
+
391
+ Computes a single value across the entire time range.
392
+
393
+ API Endpoint: GET /api/metrics/query
394
+ HTTP Method: GET
395
+
396
+ Args:
397
+ q: TraceQL metrics query (required)
398
+ start: Optional start time (Unix seconds/nanoseconds/RFC3339)
399
+ end: Optional end time (Unix seconds/nanoseconds/RFC3339)
400
+ since: Optional duration string (e.g., "1h")
401
+
402
+ Returns:
403
+ dict: Single computed metric value
404
+ """
405
+ params = {"q": q}
406
+ if start is not None:
407
+ params["start"] = str(start)
408
+ if end is not None:
409
+ params["end"] = str(end)
410
+ if since is not None:
411
+ params["since"] = since
412
+
413
+ return self._make_request("/api/metrics/query", params=params)
414
+
415
+ def query_metrics_range(
416
+ self,
417
+ q: str,
418
+ step: Optional[str] = None,
419
+ start: Optional[Union[int, str]] = None,
420
+ end: Optional[Union[int, str]] = None,
421
+ since: Optional[str] = None,
422
+ exemplars: Optional[int] = None,
423
+ ) -> Dict[str, Any]:
424
+ """Query TraceQL metrics for a time series range.
425
+
426
+ Returns metrics computed at regular intervals over the time range.
427
+
428
+ API Endpoint: GET /api/metrics/query_range
429
+ HTTP Method: GET
430
+
431
+ Args:
432
+ q: TraceQL metrics query (required)
433
+ step: Optional time series granularity (e.g., "1m", "5m")
434
+ start: Optional start time (Unix seconds/nanoseconds/RFC3339)
435
+ end: Optional end time (Unix seconds/nanoseconds/RFC3339)
436
+ since: Optional duration string (e.g., "3h")
437
+ exemplars: Optional maximum number of exemplars to return
438
+
439
+ Returns:
440
+ dict: Time series of metric values
441
+ """
442
+ params = {"q": q}
443
+ if step is not None:
444
+ params["step"] = step
445
+ if start is not None:
446
+ params["start"] = str(start)
447
+ if end is not None:
448
+ params["end"] = str(end)
449
+ if since is not None:
450
+ params["since"] = since
451
+ if exemplars is not None:
452
+ params["exemplars"] = str(exemplars)
453
+
454
+ return self._make_request("/api/metrics/query_range", params=params)
@@ -0,0 +1,9 @@
1
+ Grafana Loki is a multi-tenant log aggregation system designed to store and query logs from all your applications and infrastructure.
2
+
3
+ * Deleted K8s objects don’t delete their past logs. You can still find them by time-bounded queries on stable labels (e.g., namespace + app) or by regex on pod names.
4
+ * If you can't find the kubernetes workload in the cluster YOU SHOULD still try to find logs from it using non specific values on labels.
5
+ * If asked to check for logs, you must always try 1-2 of the best queries you can construct to search for the logs.
6
+
7
+ Loki indexes log lines using labels to help find relevant log lines.
8
+ For example a default Kubernetes labels setup would look like that
9
+ {namespace="prod", app="backend-api", container="api", pod="backend-api-68b7d9df9c-xyz12", stream="stdout"}