holmesgpt 0.13.2__py3-none-any.whl → 0.18.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (188) hide show
  1. holmes/__init__.py +3 -5
  2. holmes/clients/robusta_client.py +20 -6
  3. holmes/common/env_vars.py +58 -3
  4. holmes/common/openshift.py +1 -1
  5. holmes/config.py +123 -148
  6. holmes/core/conversations.py +71 -15
  7. holmes/core/feedback.py +191 -0
  8. holmes/core/investigation.py +31 -39
  9. holmes/core/investigation_structured_output.py +3 -3
  10. holmes/core/issue.py +1 -1
  11. holmes/core/llm.py +508 -88
  12. holmes/core/models.py +108 -4
  13. holmes/core/openai_formatting.py +14 -1
  14. holmes/core/prompt.py +48 -3
  15. holmes/core/runbooks.py +1 -0
  16. holmes/core/safeguards.py +8 -6
  17. holmes/core/supabase_dal.py +295 -100
  18. holmes/core/tool_calling_llm.py +489 -428
  19. holmes/core/tools.py +325 -56
  20. holmes/core/tools_utils/token_counting.py +21 -0
  21. holmes/core/tools_utils/tool_context_window_limiter.py +40 -0
  22. holmes/core/tools_utils/tool_executor.py +0 -13
  23. holmes/core/tools_utils/toolset_utils.py +1 -0
  24. holmes/core/toolset_manager.py +191 -5
  25. holmes/core/tracing.py +19 -3
  26. holmes/core/transformers/__init__.py +23 -0
  27. holmes/core/transformers/base.py +63 -0
  28. holmes/core/transformers/llm_summarize.py +175 -0
  29. holmes/core/transformers/registry.py +123 -0
  30. holmes/core/transformers/transformer.py +32 -0
  31. holmes/core/truncation/compaction.py +94 -0
  32. holmes/core/truncation/dal_truncation_utils.py +23 -0
  33. holmes/core/truncation/input_context_window_limiter.py +219 -0
  34. holmes/interactive.py +228 -31
  35. holmes/main.py +23 -40
  36. holmes/plugins/interfaces.py +2 -1
  37. holmes/plugins/prompts/__init__.py +2 -1
  38. holmes/plugins/prompts/_fetch_logs.jinja2 +31 -6
  39. holmes/plugins/prompts/_general_instructions.jinja2 +1 -2
  40. holmes/plugins/prompts/_runbook_instructions.jinja2 +24 -12
  41. holmes/plugins/prompts/base_user_prompt.jinja2 +7 -0
  42. holmes/plugins/prompts/conversation_history_compaction.jinja2 +89 -0
  43. holmes/plugins/prompts/generic_ask.jinja2 +0 -4
  44. holmes/plugins/prompts/generic_ask_conversation.jinja2 +0 -1
  45. holmes/plugins/prompts/generic_ask_for_issue_conversation.jinja2 +0 -1
  46. holmes/plugins/prompts/generic_investigation.jinja2 +0 -1
  47. holmes/plugins/prompts/investigation_procedure.jinja2 +50 -1
  48. holmes/plugins/prompts/kubernetes_workload_ask.jinja2 +0 -1
  49. holmes/plugins/prompts/kubernetes_workload_chat.jinja2 +0 -1
  50. holmes/plugins/runbooks/__init__.py +145 -17
  51. holmes/plugins/runbooks/catalog.json +2 -0
  52. holmes/plugins/sources/github/__init__.py +4 -2
  53. holmes/plugins/sources/prometheus/models.py +1 -0
  54. holmes/plugins/toolsets/__init__.py +44 -27
  55. holmes/plugins/toolsets/aks-node-health.yaml +46 -0
  56. holmes/plugins/toolsets/aks.yaml +64 -0
  57. holmes/plugins/toolsets/atlas_mongodb/mongodb_atlas.py +38 -47
  58. holmes/plugins/toolsets/azure_sql/apis/alert_monitoring_api.py +3 -2
  59. holmes/plugins/toolsets/azure_sql/apis/azure_sql_api.py +2 -1
  60. holmes/plugins/toolsets/azure_sql/apis/connection_failure_api.py +3 -2
  61. holmes/plugins/toolsets/azure_sql/apis/connection_monitoring_api.py +3 -1
  62. holmes/plugins/toolsets/azure_sql/apis/storage_analysis_api.py +3 -1
  63. holmes/plugins/toolsets/azure_sql/azure_sql_toolset.py +12 -13
  64. holmes/plugins/toolsets/azure_sql/tools/analyze_connection_failures.py +15 -12
  65. holmes/plugins/toolsets/azure_sql/tools/analyze_database_connections.py +15 -12
  66. holmes/plugins/toolsets/azure_sql/tools/analyze_database_health_status.py +11 -11
  67. holmes/plugins/toolsets/azure_sql/tools/analyze_database_performance.py +11 -9
  68. holmes/plugins/toolsets/azure_sql/tools/analyze_database_storage.py +15 -12
  69. holmes/plugins/toolsets/azure_sql/tools/get_active_alerts.py +15 -15
  70. holmes/plugins/toolsets/azure_sql/tools/get_slow_queries.py +11 -8
  71. holmes/plugins/toolsets/azure_sql/tools/get_top_cpu_queries.py +11 -8
  72. holmes/plugins/toolsets/azure_sql/tools/get_top_data_io_queries.py +11 -8
  73. holmes/plugins/toolsets/azure_sql/tools/get_top_log_io_queries.py +11 -8
  74. holmes/plugins/toolsets/azure_sql/utils.py +0 -32
  75. holmes/plugins/toolsets/bash/argocd/__init__.py +3 -3
  76. holmes/plugins/toolsets/bash/aws/__init__.py +4 -4
  77. holmes/plugins/toolsets/bash/azure/__init__.py +4 -4
  78. holmes/plugins/toolsets/bash/bash_toolset.py +11 -15
  79. holmes/plugins/toolsets/bash/common/bash.py +23 -13
  80. holmes/plugins/toolsets/bash/common/bash_command.py +1 -1
  81. holmes/plugins/toolsets/bash/common/stringify.py +1 -1
  82. holmes/plugins/toolsets/bash/kubectl/__init__.py +2 -1
  83. holmes/plugins/toolsets/bash/kubectl/constants.py +0 -1
  84. holmes/plugins/toolsets/bash/kubectl/kubectl_get.py +3 -4
  85. holmes/plugins/toolsets/bash/parse_command.py +12 -13
  86. holmes/plugins/toolsets/cilium.yaml +284 -0
  87. holmes/plugins/toolsets/connectivity_check.py +124 -0
  88. holmes/plugins/toolsets/coralogix/api.py +132 -119
  89. holmes/plugins/toolsets/coralogix/coralogix.jinja2 +14 -0
  90. holmes/plugins/toolsets/coralogix/toolset_coralogix.py +219 -0
  91. holmes/plugins/toolsets/coralogix/utils.py +15 -79
  92. holmes/plugins/toolsets/datadog/datadog_api.py +525 -26
  93. holmes/plugins/toolsets/datadog/datadog_logs_instructions.jinja2 +55 -11
  94. holmes/plugins/toolsets/datadog/datadog_metrics_instructions.jinja2 +3 -3
  95. holmes/plugins/toolsets/datadog/datadog_models.py +59 -0
  96. holmes/plugins/toolsets/datadog/datadog_url_utils.py +213 -0
  97. holmes/plugins/toolsets/datadog/instructions_datadog_traces.jinja2 +165 -28
  98. holmes/plugins/toolsets/datadog/toolset_datadog_general.py +417 -241
  99. holmes/plugins/toolsets/datadog/toolset_datadog_logs.py +234 -214
  100. holmes/plugins/toolsets/datadog/toolset_datadog_metrics.py +167 -79
  101. holmes/plugins/toolsets/datadog/toolset_datadog_traces.py +374 -363
  102. holmes/plugins/toolsets/elasticsearch/__init__.py +6 -0
  103. holmes/plugins/toolsets/elasticsearch/elasticsearch.py +834 -0
  104. holmes/plugins/toolsets/elasticsearch/opensearch_ppl_query_docs.jinja2 +1616 -0
  105. holmes/plugins/toolsets/elasticsearch/opensearch_query_assist.py +78 -0
  106. holmes/plugins/toolsets/elasticsearch/opensearch_query_assist_instructions.jinja2 +223 -0
  107. holmes/plugins/toolsets/git.py +54 -50
  108. holmes/plugins/toolsets/grafana/base_grafana_toolset.py +16 -4
  109. holmes/plugins/toolsets/grafana/common.py +13 -29
  110. holmes/plugins/toolsets/grafana/grafana_tempo_api.py +455 -0
  111. holmes/plugins/toolsets/grafana/loki/instructions.jinja2 +25 -0
  112. holmes/plugins/toolsets/grafana/loki/toolset_grafana_loki.py +191 -0
  113. holmes/plugins/toolsets/grafana/loki_api.py +4 -0
  114. holmes/plugins/toolsets/grafana/toolset_grafana.py +293 -89
  115. holmes/plugins/toolsets/grafana/toolset_grafana_dashboard.jinja2 +49 -0
  116. holmes/plugins/toolsets/grafana/toolset_grafana_tempo.jinja2 +246 -11
  117. holmes/plugins/toolsets/grafana/toolset_grafana_tempo.py +820 -292
  118. holmes/plugins/toolsets/grafana/trace_parser.py +4 -3
  119. holmes/plugins/toolsets/internet/internet.py +15 -16
  120. holmes/plugins/toolsets/internet/notion.py +9 -11
  121. holmes/plugins/toolsets/investigator/core_investigation.py +44 -36
  122. holmes/plugins/toolsets/investigator/model.py +3 -1
  123. holmes/plugins/toolsets/json_filter_mixin.py +134 -0
  124. holmes/plugins/toolsets/kafka.py +36 -42
  125. holmes/plugins/toolsets/kubernetes.yaml +317 -113
  126. holmes/plugins/toolsets/kubernetes_logs.py +9 -9
  127. holmes/plugins/toolsets/kubernetes_logs.yaml +32 -0
  128. holmes/plugins/toolsets/logging_utils/logging_api.py +94 -8
  129. holmes/plugins/toolsets/mcp/toolset_mcp.py +218 -64
  130. holmes/plugins/toolsets/newrelic/new_relic_api.py +165 -0
  131. holmes/plugins/toolsets/newrelic/newrelic.jinja2 +65 -0
  132. holmes/plugins/toolsets/newrelic/newrelic.py +320 -0
  133. holmes/plugins/toolsets/openshift.yaml +283 -0
  134. holmes/plugins/toolsets/prometheus/prometheus.py +1202 -421
  135. holmes/plugins/toolsets/prometheus/prometheus_instructions.jinja2 +54 -5
  136. holmes/plugins/toolsets/prometheus/utils.py +28 -0
  137. holmes/plugins/toolsets/rabbitmq/api.py +23 -4
  138. holmes/plugins/toolsets/rabbitmq/toolset_rabbitmq.py +13 -14
  139. holmes/plugins/toolsets/robusta/robusta.py +239 -68
  140. holmes/plugins/toolsets/robusta/robusta_instructions.jinja2 +26 -9
  141. holmes/plugins/toolsets/runbook/runbook_fetcher.py +157 -27
  142. holmes/plugins/toolsets/service_discovery.py +1 -1
  143. holmes/plugins/toolsets/servicenow_tables/instructions.jinja2 +83 -0
  144. holmes/plugins/toolsets/servicenow_tables/servicenow_tables.py +426 -0
  145. holmes/plugins/toolsets/utils.py +88 -0
  146. holmes/utils/config_utils.py +91 -0
  147. holmes/utils/connection_utils.py +31 -0
  148. holmes/utils/console/result.py +10 -0
  149. holmes/utils/default_toolset_installation_guide.jinja2 +1 -22
  150. holmes/utils/env.py +7 -0
  151. holmes/utils/file_utils.py +2 -1
  152. holmes/utils/global_instructions.py +60 -11
  153. holmes/utils/holmes_status.py +6 -4
  154. holmes/utils/holmes_sync_toolsets.py +0 -2
  155. holmes/utils/krr_utils.py +188 -0
  156. holmes/utils/log.py +15 -0
  157. holmes/utils/markdown_utils.py +2 -3
  158. holmes/utils/memory_limit.py +58 -0
  159. holmes/utils/sentry_helper.py +64 -0
  160. holmes/utils/stream.py +69 -8
  161. holmes/utils/tags.py +4 -3
  162. holmes/version.py +37 -15
  163. holmesgpt-0.18.4.dist-info/LICENSE +178 -0
  164. {holmesgpt-0.13.2.dist-info → holmesgpt-0.18.4.dist-info}/METADATA +35 -31
  165. holmesgpt-0.18.4.dist-info/RECORD +258 -0
  166. holmes/core/performance_timing.py +0 -72
  167. holmes/plugins/toolsets/aws.yaml +0 -80
  168. holmes/plugins/toolsets/coralogix/toolset_coralogix_logs.py +0 -112
  169. holmes/plugins/toolsets/datadog/datadog_traces_formatter.py +0 -310
  170. holmes/plugins/toolsets/datadog/toolset_datadog_rds.py +0 -739
  171. holmes/plugins/toolsets/grafana/grafana_api.py +0 -42
  172. holmes/plugins/toolsets/grafana/tempo_api.py +0 -124
  173. holmes/plugins/toolsets/grafana/toolset_grafana_loki.py +0 -110
  174. holmes/plugins/toolsets/newrelic.py +0 -231
  175. holmes/plugins/toolsets/opensearch/opensearch.py +0 -257
  176. holmes/plugins/toolsets/opensearch/opensearch_logs.py +0 -161
  177. holmes/plugins/toolsets/opensearch/opensearch_traces.py +0 -218
  178. holmes/plugins/toolsets/opensearch/opensearch_traces_instructions.jinja2 +0 -12
  179. holmes/plugins/toolsets/opensearch/opensearch_utils.py +0 -166
  180. holmes/plugins/toolsets/servicenow/install.md +0 -37
  181. holmes/plugins/toolsets/servicenow/instructions.jinja2 +0 -3
  182. holmes/plugins/toolsets/servicenow/servicenow.py +0 -219
  183. holmes/utils/keygen_utils.py +0 -6
  184. holmesgpt-0.13.2.dist-info/LICENSE.txt +0 -21
  185. holmesgpt-0.13.2.dist-info/RECORD +0 -234
  186. /holmes/plugins/toolsets/{opensearch → newrelic}/__init__.py +0 -0
  187. {holmesgpt-0.13.2.dist-info → holmesgpt-0.18.4.dist-info}/WHEEL +0 -0
  188. {holmesgpt-0.13.2.dist-info → holmesgpt-0.18.4.dist-info}/entry_points.txt +0 -0
@@ -1,12 +1,11 @@
1
1
  import logging
2
+ from abc import abstractmethod
2
3
  from typing import Any, ClassVar, Tuple, Type
3
4
 
4
5
  from holmes.core.tools import CallablePrerequisite, Tool, Toolset, ToolsetTag
5
6
  from holmes.plugins.toolsets.consts import TOOLSET_CONFIG_MISSING_ERROR
6
7
  from holmes.plugins.toolsets.grafana.common import GrafanaConfig
7
8
 
8
- from holmes.plugins.toolsets.grafana.grafana_api import grafana_health_check
9
-
10
9
 
11
10
  class BaseGrafanaToolset(Toolset):
12
11
  config_class: ClassVar[Type[GrafanaConfig]] = GrafanaConfig
@@ -39,16 +38,29 @@ class BaseGrafanaToolset(Toolset):
39
38
 
40
39
  try:
41
40
  self._grafana_config = self.config_class(**config)
42
- return grafana_health_check(self._grafana_config)
41
+ return self.health_check()
43
42
 
44
43
  except Exception as e:
45
44
  logging.exception(f"Failed to set up grafana toolset {self.name}")
46
45
  return False, str(e)
47
46
 
47
+ @abstractmethod
48
+ def health_check(self) -> Tuple[bool, str]:
49
+ """
50
+ Check if the toolset is healthy and can connect to its data source.
51
+
52
+ Subclasses must implement this method to verify connectivity.
53
+ This method should NOT raise exceptions - catch them internally
54
+ and return (False, "error message") instead.
55
+
56
+ Returns:
57
+ Tuple[bool, str]: (True, "") on success, (False, "error message") on failure.
58
+ """
59
+ raise NotImplementedError("Subclasses must implement health_check()")
60
+
48
61
  def get_example_config(self):
49
62
  example_config = GrafanaConfig(
50
63
  api_key="YOUR API KEY",
51
64
  url="YOUR GRAFANA URL",
52
- grafana_datasource_uid="UID OF DATASOURCE IN GRAFANA",
53
65
  )
54
66
  return example_config.model_dump()
@@ -1,14 +1,11 @@
1
- import json
2
1
  from typing import Dict, Optional
3
- from pydantic import BaseModel
4
- import datetime
5
2
 
6
- from holmes.core.tools import StructuredToolResult, ToolResultStatus
3
+ from pydantic import BaseModel
7
4
 
8
5
 
9
6
  class GrafanaConfig(BaseModel):
10
7
  """A config that represents one of the Grafana related tools like Loki or Tempo
11
- If `grafana_datasource_uid` is set, then it is assume that Holmes will proxy all
8
+ If `grafana_datasource_uid` is set, then it is assumed that Holmes will proxy all
12
9
  requests through grafana. In this case `url` should be the grafana URL.
13
10
  If `grafana_datasource_uid` is not set, it is assumed that the `url` is the
14
11
  systems' URL
@@ -19,7 +16,7 @@ class GrafanaConfig(BaseModel):
19
16
  url: str
20
17
  grafana_datasource_uid: Optional[str] = None
21
18
  external_url: Optional[str] = None
22
- healthcheck: Optional[str] = "ready"
19
+ verify_ssl: bool = True
23
20
 
24
21
 
25
22
  def build_headers(api_key: Optional[str], additional_headers: Optional[Dict[str, str]]):
@@ -36,19 +33,6 @@ def build_headers(api_key: Optional[str], additional_headers: Optional[Dict[str,
36
33
  return headers
37
34
 
38
35
 
39
- def format_log(log: Dict) -> str:
40
- log_str = log.get("log", "")
41
- timestamp_nanoseconds = log.get("timestamp")
42
- if timestamp_nanoseconds:
43
- timestamp_seconds = int(timestamp_nanoseconds) // 1_000_000_000
44
- dt = datetime.datetime.fromtimestamp(timestamp_seconds)
45
- log_str = dt.strftime("%Y-%m-%dT%H:%M:%SZ") + " " + log_str
46
- else:
47
- log_str = json.dumps(log)
48
-
49
- return log_str
50
-
51
-
52
36
  def get_base_url(config: GrafanaConfig) -> str:
53
37
  if config.grafana_datasource_uid:
54
38
  return f"{config.url}/api/datasources/proxy/uid/{config.grafana_datasource_uid}"
@@ -56,13 +40,13 @@ def get_base_url(config: GrafanaConfig) -> str:
56
40
  return config.url
57
41
 
58
42
 
59
- def ensure_grafana_uid_or_return_error_result(
60
- config: GrafanaConfig,
61
- ) -> Optional[StructuredToolResult]:
62
- if not config.grafana_datasource_uid:
63
- return StructuredToolResult(
64
- status=ToolResultStatus.ERROR,
65
- error="This tool only works when the toolset is configued ",
66
- )
67
- else:
68
- return None
43
+ class GrafanaTempoLabelsConfig(BaseModel):
44
+ pod: str = "k8s.pod.name"
45
+ namespace: str = "k8s.namespace.name"
46
+ deployment: str = "k8s.deployment.name"
47
+ node: str = "k8s.node.name"
48
+ service: str = "service.name"
49
+
50
+
51
+ class GrafanaTempoConfig(GrafanaConfig):
52
+ labels: GrafanaTempoLabelsConfig = GrafanaTempoLabelsConfig()
@@ -0,0 +1,455 @@
1
+ """Grafana Tempo API wrapper for querying traces and metrics."""
2
+
3
+ import logging
4
+ from typing import Any, Dict, Optional, Union
5
+ from urllib.parse import quote
6
+
7
+ import backoff
8
+ import requests # type: ignore
9
+
10
+ from holmes.plugins.toolsets.grafana.common import (
11
+ GrafanaTempoConfig,
12
+ build_headers,
13
+ get_base_url,
14
+ )
15
+
16
+ logger = logging.getLogger(__name__)
17
+
18
+
19
+ class TempoAPIError(Exception):
20
+ """Custom exception for Tempo API errors with detailed response information."""
21
+
22
+ def __init__(self, status_code: int, response_text: str, url: str):
23
+ self.status_code = status_code
24
+ self.response_text = response_text
25
+ self.url = url
26
+
27
+ # Try to extract error message from JSON response
28
+ try:
29
+ import json
30
+
31
+ error_data = json.loads(response_text)
32
+ # Tempo may return errors in different formats
33
+ error_message = (
34
+ error_data.get("error")
35
+ or error_data.get("message")
36
+ or error_data.get("errorType")
37
+ or response_text
38
+ )
39
+ except (json.JSONDecodeError, TypeError):
40
+ error_message = response_text
41
+
42
+ super().__init__(f"Tempo API error {status_code}: {error_message}")
43
+
44
+
45
+ class GrafanaTempoAPI:
46
+ """Python wrapper for Grafana Tempo REST API.
47
+
48
+ This class provides a clean interface to all Tempo API endpoints.
49
+ """
50
+
51
+ def __init__(self, config: GrafanaTempoConfig):
52
+ """Initialize the Tempo API wrapper.
53
+
54
+ Args:
55
+ config: GrafanaTempoConfig instance with connection details
56
+ """
57
+ self.config = config
58
+ self.base_url = get_base_url(config)
59
+ self.headers = build_headers(config.api_key, config.headers)
60
+
61
+ def _make_request(
62
+ self,
63
+ endpoint: str,
64
+ params: Optional[Dict[str, Any]] = None,
65
+ path_params: Optional[Dict[str, str]] = None,
66
+ timeout: int = 30,
67
+ retries: int = 3,
68
+ ) -> Dict[str, Any]:
69
+ """Make HTTP request to Tempo API with retry logic.
70
+
71
+ Args:
72
+ endpoint: API endpoint path (e.g., "/api/echo")
73
+ params: Query parameters
74
+ path_params: Parameters to substitute in the endpoint path
75
+ timeout: Request timeout in seconds
76
+ retries: Number of retry attempts
77
+
78
+ Returns:
79
+ JSON response from the API
80
+
81
+ Raises:
82
+ Exception: If the request fails after all retries
83
+ """
84
+ # Format endpoint with path parameters
85
+ if path_params:
86
+ for key, value in path_params.items():
87
+ endpoint = endpoint.replace(f"{{{key}}}", quote(str(value), safe=""))
88
+
89
+ url = f"{self.base_url}{endpoint}"
90
+
91
+ @backoff.on_exception(
92
+ backoff.expo,
93
+ requests.exceptions.RequestException,
94
+ max_tries=retries,
95
+ giveup=lambda e: isinstance(e, requests.exceptions.HTTPError)
96
+ and getattr(e, "response", None) is not None
97
+ and e.response.status_code < 500,
98
+ )
99
+ def make_request():
100
+ # GET request with query parameters
101
+ response = requests.get(
102
+ url,
103
+ headers=self.headers,
104
+ params=params,
105
+ timeout=timeout,
106
+ verify=self.config.verify_ssl,
107
+ )
108
+ response.raise_for_status()
109
+ return response.json()
110
+
111
+ try:
112
+ return make_request()
113
+ except requests.exceptions.HTTPError as e:
114
+ # Extract detailed error message from response
115
+ response = e.response
116
+ if response is not None:
117
+ logger.error(
118
+ f"HTTP error {response.status_code} for {url}: {response.text}"
119
+ )
120
+ raise TempoAPIError(
121
+ status_code=response.status_code,
122
+ response_text=response.text,
123
+ url=url,
124
+ )
125
+ else:
126
+ logger.error(f"Request failed for {url}: {e}")
127
+ raise
128
+ except requests.exceptions.RequestException as e:
129
+ logger.error(f"Request failed for {url}: {e}")
130
+ raise
131
+
132
+ def query_echo_endpoint(self) -> bool:
133
+ """Query the echo endpoint to check Tempo status.
134
+
135
+ API Endpoint: GET /api/echo
136
+ HTTP Method: GET
137
+
138
+ Returns:
139
+ bool: True if endpoint returns 200 status code, False otherwise
140
+ """
141
+ url = f"{self.base_url}/api/echo"
142
+
143
+ try:
144
+ response = requests.get(
145
+ url,
146
+ headers=self.headers,
147
+ timeout=30,
148
+ verify=self.config.verify_ssl,
149
+ )
150
+
151
+ # Just check status code, don't try to parse JSON
152
+ return response.status_code == 200
153
+
154
+ except requests.exceptions.RequestException as e:
155
+ logger.error(f"Request failed for {url}: {e}")
156
+ return False
157
+
158
+ def query_trace_by_id_v2(
159
+ self,
160
+ trace_id: str,
161
+ start: Optional[int] = None,
162
+ end: Optional[int] = None,
163
+ ) -> Dict[str, Any]:
164
+ """Query a trace by its ID.
165
+
166
+ API Endpoint: GET /api/v2/traces/{trace_id}
167
+ HTTP Method: GET
168
+
169
+ Args:
170
+ trace_id: The trace ID to retrieve
171
+ start: Optional start time in Unix epoch seconds
172
+ end: Optional end time in Unix epoch seconds
173
+
174
+ Returns:
175
+ dict: OpenTelemetry format trace data
176
+ """
177
+ params = {}
178
+ if start is not None:
179
+ params["start"] = str(start)
180
+ if end is not None:
181
+ params["end"] = str(end)
182
+
183
+ return self._make_request(
184
+ "/api/v2/traces/{trace_id}",
185
+ params=params,
186
+ path_params={"trace_id": trace_id},
187
+ )
188
+
189
+ def _search_traces_common(
190
+ self,
191
+ search_params: Dict[str, Any],
192
+ limit: Optional[int] = None,
193
+ start: Optional[int] = None,
194
+ end: Optional[int] = None,
195
+ spss: Optional[int] = None,
196
+ ) -> Dict[str, Any]:
197
+ """Common search implementation for both tag and TraceQL searches.
198
+
199
+ Args:
200
+ search_params: The search-specific parameters (tags or q)
201
+ limit: Optional max number of traces to return
202
+ start: Optional start time in Unix epoch seconds
203
+ end: Optional end time in Unix epoch seconds
204
+ spss: Optional spans per span set
205
+
206
+ Returns:
207
+ dict: Search results with trace metadata
208
+ """
209
+ params = search_params.copy()
210
+
211
+ if limit is not None:
212
+ params["limit"] = str(limit)
213
+ if start is not None:
214
+ params["start"] = str(start)
215
+ if end is not None:
216
+ params["end"] = str(end)
217
+ if spss is not None:
218
+ params["spss"] = str(spss)
219
+
220
+ return self._make_request("/api/search", params=params)
221
+
222
+ def search_traces_by_tags(
223
+ self,
224
+ tags: str,
225
+ min_duration: Optional[str] = None,
226
+ max_duration: Optional[str] = None,
227
+ limit: Optional[int] = None,
228
+ start: Optional[int] = None,
229
+ end: Optional[int] = None,
230
+ spss: Optional[int] = None,
231
+ ) -> Dict[str, Any]:
232
+ """Search for traces using tag-based search.
233
+
234
+ API Endpoint: GET /api/search
235
+ HTTP Method: GET
236
+
237
+ Args:
238
+ tags: logfmt-encoded span/process attributes (required)
239
+ min_duration: Optional minimum trace duration (e.g., "5s")
240
+ max_duration: Optional maximum trace duration
241
+ limit: Optional max number of traces to return
242
+ start: Optional start time in Unix epoch seconds
243
+ end: Optional end time in Unix epoch seconds
244
+ spss: Optional spans per span set
245
+
246
+ Returns:
247
+ dict: Search results with trace metadata
248
+ """
249
+ search_params = {"tags": tags}
250
+
251
+ # minDuration and maxDuration are only supported with tag-based search
252
+ if min_duration is not None:
253
+ search_params["minDuration"] = min_duration
254
+ if max_duration is not None:
255
+ search_params["maxDuration"] = max_duration
256
+
257
+ return self._search_traces_common(
258
+ search_params=search_params,
259
+ limit=limit,
260
+ start=start,
261
+ end=end,
262
+ spss=spss,
263
+ )
264
+
265
+ def search_traces_by_query(
266
+ self,
267
+ q: str,
268
+ limit: Optional[int] = None,
269
+ start: Optional[int] = None,
270
+ end: Optional[int] = None,
271
+ spss: Optional[int] = None,
272
+ ) -> Dict[str, Any]:
273
+ """Search for traces using TraceQL query.
274
+
275
+ API Endpoint: GET /api/search
276
+ HTTP Method: GET
277
+
278
+ Note: minDuration and maxDuration are not supported with TraceQL queries.
279
+ Use the TraceQL query syntax to filter by duration instead.
280
+
281
+ Args:
282
+ q: TraceQL query (required)
283
+ limit: Optional max number of traces to return
284
+ start: Optional start time in Unix epoch seconds
285
+ end: Optional end time in Unix epoch seconds
286
+ spss: Optional spans per span set
287
+
288
+ Returns:
289
+ dict: Search results with trace metadata
290
+ """
291
+ return self._search_traces_common(
292
+ search_params={"q": q},
293
+ limit=limit,
294
+ start=start,
295
+ end=end,
296
+ spss=spss,
297
+ )
298
+
299
+ def search_tag_names_v2(
300
+ self,
301
+ scope: Optional[str] = None,
302
+ q: Optional[str] = None,
303
+ start: Optional[int] = None,
304
+ end: Optional[int] = None,
305
+ limit: Optional[int] = None,
306
+ max_stale_values: Optional[int] = None,
307
+ ) -> Dict[str, Any]:
308
+ """Search for available tag names.
309
+
310
+ API Endpoint: GET /api/v2/search/tags
311
+ HTTP Method: GET
312
+
313
+ Args:
314
+ scope: Optional scope filter ("resource", "span", or "intrinsic")
315
+ q: Optional TraceQL query to filter tags
316
+ start: Optional start time in Unix epoch seconds
317
+ end: Optional end time in Unix epoch seconds
318
+ limit: Optional max number of tag names
319
+ max_stale_values: Optional max stale values parameter
320
+
321
+ Returns:
322
+ dict: Available tag names organized by scope
323
+ """
324
+ params = {}
325
+ if scope is not None:
326
+ params["scope"] = scope
327
+ if q is not None:
328
+ params["q"] = q
329
+ if start is not None:
330
+ params["start"] = str(start)
331
+ if end is not None:
332
+ params["end"] = str(end)
333
+ if limit is not None:
334
+ params["limit"] = str(limit)
335
+ if max_stale_values is not None:
336
+ params["maxStaleValues"] = str(max_stale_values)
337
+
338
+ return self._make_request("/api/v2/search/tags", params=params)
339
+
340
+ def search_tag_values_v2(
341
+ self,
342
+ tag: str,
343
+ q: Optional[str] = None,
344
+ start: Optional[int] = None,
345
+ end: Optional[int] = None,
346
+ limit: Optional[int] = None,
347
+ max_stale_values: Optional[int] = None,
348
+ ) -> Dict[str, Any]:
349
+ """Search for values of a specific tag with optional TraceQL filtering.
350
+
351
+ API Endpoint: GET /api/v2/search/tag/{tag}/values
352
+ HTTP Method: GET
353
+
354
+ Args:
355
+ tag: The tag name to get values for (required)
356
+ q: Optional TraceQL query to filter tag values (e.g., '{resource.cluster="us-east-1"}')
357
+ start: Optional start time in Unix epoch seconds
358
+ end: Optional end time in Unix epoch seconds
359
+ limit: Optional max number of values
360
+ max_stale_values: Optional max stale values parameter
361
+
362
+ Returns:
363
+ dict: List of discovered values for the tag
364
+ """
365
+ params = {}
366
+ if q is not None:
367
+ params["q"] = q
368
+ if start is not None:
369
+ params["start"] = str(start)
370
+ if end is not None:
371
+ params["end"] = str(end)
372
+ if limit is not None:
373
+ params["limit"] = str(limit)
374
+ if max_stale_values is not None:
375
+ params["maxStaleValues"] = str(max_stale_values)
376
+
377
+ return self._make_request(
378
+ "/api/v2/search/tag/{tag}/values",
379
+ params=params,
380
+ path_params={"tag": tag},
381
+ )
382
+
383
+ def query_metrics_instant(
384
+ self,
385
+ q: str,
386
+ start: Optional[Union[int, str]] = None,
387
+ end: Optional[Union[int, str]] = None,
388
+ since: Optional[str] = None,
389
+ ) -> Dict[str, Any]:
390
+ """Query TraceQL metrics for an instant value.
391
+
392
+ Computes a single value across the entire time range.
393
+
394
+ API Endpoint: GET /api/metrics/query
395
+ HTTP Method: GET
396
+
397
+ Args:
398
+ q: TraceQL metrics query (required)
399
+ start: Optional start time (Unix seconds/nanoseconds/RFC3339)
400
+ end: Optional end time (Unix seconds/nanoseconds/RFC3339)
401
+ since: Optional duration string (e.g., "1h")
402
+
403
+ Returns:
404
+ dict: Single computed metric value
405
+ """
406
+ params = {"q": q}
407
+ if start is not None:
408
+ params["start"] = str(start)
409
+ if end is not None:
410
+ params["end"] = str(end)
411
+ if since is not None:
412
+ params["since"] = since
413
+
414
+ return self._make_request("/api/metrics/query", params=params)
415
+
416
+ def query_metrics_range(
417
+ self,
418
+ q: str,
419
+ step: Optional[str] = None,
420
+ start: Optional[Union[int, str]] = None,
421
+ end: Optional[Union[int, str]] = None,
422
+ since: Optional[str] = None,
423
+ exemplars: Optional[int] = None,
424
+ ) -> Dict[str, Any]:
425
+ """Query TraceQL metrics for a time series range.
426
+
427
+ Returns metrics computed at regular intervals over the time range.
428
+
429
+ API Endpoint: GET /api/metrics/query_range
430
+ HTTP Method: GET
431
+
432
+ Args:
433
+ q: TraceQL metrics query (required)
434
+ step: Optional time series granularity (e.g., "1m", "5m")
435
+ start: Optional start time (Unix seconds/nanoseconds/RFC3339)
436
+ end: Optional end time (Unix seconds/nanoseconds/RFC3339)
437
+ since: Optional duration string (e.g., "3h")
438
+ exemplars: Optional maximum number of exemplars to return
439
+
440
+ Returns:
441
+ dict: Time series of metric values
442
+ """
443
+ params = {"q": q}
444
+ if step is not None:
445
+ params["step"] = step
446
+ if start is not None:
447
+ params["start"] = str(start)
448
+ if end is not None:
449
+ params["end"] = str(end)
450
+ if since is not None:
451
+ params["since"] = since
452
+ if exemplars is not None:
453
+ params["exemplars"] = str(exemplars)
454
+
455
+ return self._make_request("/api/metrics/query_range", params=params)
@@ -0,0 +1,25 @@
1
+ Grafana Loki is a multi-tenant log aggregation system designed to store and query logs from all your applications and infrastructure.
2
+
3
+ **IMPORTANT WILDCARD USAGE:**
4
+ - **ALWAYS use wildcards** when searching for pods unless you have the COMPLETE pod name with all suffixes
5
+ - Kubernetes pod names include deployment hash + replica ID (e.g., `nginx-ingress-7b9899-x2km9`, `frontend-5f4d3b2a1-abc123`)
6
+ - When user says "nginx pod" or "frontend pod", search for `nginx-*` or `frontend-*` NOT just `nginx` or `frontend`
7
+ - Loki supports wildcards: `*` matches any characters (e.g., `nginx-*`, `*ingress*`, `*-x2km9`)
8
+ - For partial matches, use wildcards on both sides: `*keyword*` to find logs from any pod containing "keyword"
9
+
10
+ **When user provides what looks like a complete pod name** (e.g., `my-workload-5f9d8b7c4d-x2km9`):
11
+ - Query Loki directly with that exact pod name
12
+ - Do NOT try to verify if the pod exists in Kubernetes first
13
+ - This allows querying historical pods that have been deleted/replaced
14
+
15
+ * If asked to check for logs, you must always try 1-2 of the best queries you can construct to search for the logs.
16
+
17
+ Loki indexes log lines using labels to help find relevant log lines.
18
+ For example a default Kubernetes labels setup would look like that
19
+ {namespace="prod", app="backend-api", container="api", pod="backend-api-68b7d9df9c-xyz12", stream="stdout"}
20
+
21
+
22
+ ### Time Parameters
23
+ - Use RFC3339 format: `2023-03-01T10:30:00Z`
24
+ - Or relative seconds: `-3600` for 1 hour ago
25
+ - If no time range is specificed use last 4 hours as default time.