holmesgpt 0.13.2__py3-none-any.whl → 0.16.2a0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- holmes/__init__.py +1 -1
- holmes/clients/robusta_client.py +17 -4
- holmes/common/env_vars.py +40 -1
- holmes/config.py +114 -144
- holmes/core/conversations.py +53 -14
- holmes/core/feedback.py +191 -0
- holmes/core/investigation.py +18 -22
- holmes/core/llm.py +489 -88
- holmes/core/models.py +103 -1
- holmes/core/openai_formatting.py +13 -0
- holmes/core/prompt.py +1 -1
- holmes/core/safeguards.py +4 -4
- holmes/core/supabase_dal.py +293 -100
- holmes/core/tool_calling_llm.py +423 -323
- holmes/core/tools.py +311 -33
- holmes/core/tools_utils/token_counting.py +14 -0
- holmes/core/tools_utils/tool_context_window_limiter.py +57 -0
- holmes/core/tools_utils/tool_executor.py +13 -8
- holmes/core/toolset_manager.py +155 -4
- holmes/core/tracing.py +6 -1
- holmes/core/transformers/__init__.py +23 -0
- holmes/core/transformers/base.py +62 -0
- holmes/core/transformers/llm_summarize.py +174 -0
- holmes/core/transformers/registry.py +122 -0
- holmes/core/transformers/transformer.py +31 -0
- holmes/core/truncation/compaction.py +59 -0
- holmes/core/truncation/dal_truncation_utils.py +23 -0
- holmes/core/truncation/input_context_window_limiter.py +218 -0
- holmes/interactive.py +177 -24
- holmes/main.py +7 -4
- holmes/plugins/prompts/_fetch_logs.jinja2 +26 -1
- holmes/plugins/prompts/_general_instructions.jinja2 +1 -2
- holmes/plugins/prompts/_runbook_instructions.jinja2 +23 -12
- holmes/plugins/prompts/conversation_history_compaction.jinja2 +88 -0
- holmes/plugins/prompts/generic_ask.jinja2 +2 -4
- holmes/plugins/prompts/generic_ask_conversation.jinja2 +2 -1
- holmes/plugins/prompts/generic_ask_for_issue_conversation.jinja2 +2 -1
- holmes/plugins/prompts/generic_investigation.jinja2 +2 -1
- holmes/plugins/prompts/investigation_procedure.jinja2 +48 -0
- holmes/plugins/prompts/kubernetes_workload_ask.jinja2 +2 -1
- holmes/plugins/prompts/kubernetes_workload_chat.jinja2 +2 -1
- holmes/plugins/runbooks/__init__.py +117 -18
- holmes/plugins/runbooks/catalog.json +2 -0
- holmes/plugins/toolsets/__init__.py +21 -8
- holmes/plugins/toolsets/aks-node-health.yaml +46 -0
- holmes/plugins/toolsets/aks.yaml +64 -0
- holmes/plugins/toolsets/atlas_mongodb/mongodb_atlas.py +26 -36
- holmes/plugins/toolsets/azure_sql/azure_sql_toolset.py +0 -1
- holmes/plugins/toolsets/azure_sql/tools/analyze_connection_failures.py +10 -7
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_connections.py +9 -6
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_health_status.py +8 -6
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_performance.py +8 -6
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_storage.py +9 -6
- holmes/plugins/toolsets/azure_sql/tools/get_active_alerts.py +9 -7
- holmes/plugins/toolsets/azure_sql/tools/get_slow_queries.py +9 -6
- holmes/plugins/toolsets/azure_sql/tools/get_top_cpu_queries.py +9 -6
- holmes/plugins/toolsets/azure_sql/tools/get_top_data_io_queries.py +9 -6
- holmes/plugins/toolsets/azure_sql/tools/get_top_log_io_queries.py +9 -6
- holmes/plugins/toolsets/bash/bash_toolset.py +10 -13
- holmes/plugins/toolsets/bash/common/bash.py +7 -7
- holmes/plugins/toolsets/cilium.yaml +284 -0
- holmes/plugins/toolsets/coralogix/toolset_coralogix_logs.py +5 -3
- holmes/plugins/toolsets/datadog/datadog_api.py +490 -24
- holmes/plugins/toolsets/datadog/datadog_logs_instructions.jinja2 +21 -10
- holmes/plugins/toolsets/datadog/toolset_datadog_general.py +349 -216
- holmes/plugins/toolsets/datadog/toolset_datadog_logs.py +190 -19
- holmes/plugins/toolsets/datadog/toolset_datadog_metrics.py +101 -44
- holmes/plugins/toolsets/datadog/toolset_datadog_rds.py +13 -16
- holmes/plugins/toolsets/datadog/toolset_datadog_traces.py +25 -31
- holmes/plugins/toolsets/git.py +51 -46
- holmes/plugins/toolsets/grafana/common.py +15 -3
- holmes/plugins/toolsets/grafana/grafana_api.py +46 -24
- holmes/plugins/toolsets/grafana/grafana_tempo_api.py +454 -0
- holmes/plugins/toolsets/grafana/loki/instructions.jinja2 +9 -0
- holmes/plugins/toolsets/grafana/loki/toolset_grafana_loki.py +117 -0
- holmes/plugins/toolsets/grafana/toolset_grafana.py +211 -91
- holmes/plugins/toolsets/grafana/toolset_grafana_dashboard.jinja2 +27 -0
- holmes/plugins/toolsets/grafana/toolset_grafana_tempo.jinja2 +246 -11
- holmes/plugins/toolsets/grafana/toolset_grafana_tempo.py +653 -293
- holmes/plugins/toolsets/grafana/trace_parser.py +1 -1
- holmes/plugins/toolsets/internet/internet.py +6 -7
- holmes/plugins/toolsets/internet/notion.py +5 -6
- holmes/plugins/toolsets/investigator/core_investigation.py +42 -34
- holmes/plugins/toolsets/kafka.py +25 -36
- holmes/plugins/toolsets/kubernetes.yaml +58 -84
- holmes/plugins/toolsets/kubernetes_logs.py +6 -6
- holmes/plugins/toolsets/kubernetes_logs.yaml +32 -0
- holmes/plugins/toolsets/logging_utils/logging_api.py +80 -4
- holmes/plugins/toolsets/mcp/toolset_mcp.py +181 -55
- holmes/plugins/toolsets/newrelic/__init__.py +0 -0
- holmes/plugins/toolsets/newrelic/new_relic_api.py +125 -0
- holmes/plugins/toolsets/newrelic/newrelic.jinja2 +41 -0
- holmes/plugins/toolsets/newrelic/newrelic.py +163 -0
- holmes/plugins/toolsets/opensearch/opensearch.py +10 -17
- holmes/plugins/toolsets/opensearch/opensearch_logs.py +7 -7
- holmes/plugins/toolsets/opensearch/opensearch_ppl_query_docs.jinja2 +1616 -0
- holmes/plugins/toolsets/opensearch/opensearch_query_assist.py +78 -0
- holmes/plugins/toolsets/opensearch/opensearch_query_assist_instructions.jinja2 +223 -0
- holmes/plugins/toolsets/opensearch/opensearch_traces.py +13 -16
- holmes/plugins/toolsets/openshift.yaml +283 -0
- holmes/plugins/toolsets/prometheus/prometheus.py +915 -390
- holmes/plugins/toolsets/prometheus/prometheus_instructions.jinja2 +43 -2
- holmes/plugins/toolsets/prometheus/utils.py +28 -0
- holmes/plugins/toolsets/rabbitmq/toolset_rabbitmq.py +9 -10
- holmes/plugins/toolsets/robusta/robusta.py +236 -65
- holmes/plugins/toolsets/robusta/robusta_instructions.jinja2 +26 -9
- holmes/plugins/toolsets/runbook/runbook_fetcher.py +137 -26
- holmes/plugins/toolsets/service_discovery.py +1 -1
- holmes/plugins/toolsets/servicenow_tables/instructions.jinja2 +83 -0
- holmes/plugins/toolsets/servicenow_tables/servicenow_tables.py +426 -0
- holmes/plugins/toolsets/utils.py +88 -0
- holmes/utils/config_utils.py +91 -0
- holmes/utils/default_toolset_installation_guide.jinja2 +1 -22
- holmes/utils/env.py +7 -0
- holmes/utils/global_instructions.py +75 -10
- holmes/utils/holmes_status.py +2 -1
- holmes/utils/holmes_sync_toolsets.py +0 -2
- holmes/utils/krr_utils.py +188 -0
- holmes/utils/sentry_helper.py +41 -0
- holmes/utils/stream.py +61 -7
- holmes/version.py +34 -14
- holmesgpt-0.16.2a0.dist-info/LICENSE +178 -0
- {holmesgpt-0.13.2.dist-info → holmesgpt-0.16.2a0.dist-info}/METADATA +29 -27
- {holmesgpt-0.13.2.dist-info → holmesgpt-0.16.2a0.dist-info}/RECORD +126 -102
- holmes/core/performance_timing.py +0 -72
- holmes/plugins/toolsets/grafana/tempo_api.py +0 -124
- holmes/plugins/toolsets/grafana/toolset_grafana_loki.py +0 -110
- holmes/plugins/toolsets/newrelic.py +0 -231
- holmes/plugins/toolsets/servicenow/install.md +0 -37
- holmes/plugins/toolsets/servicenow/instructions.jinja2 +0 -3
- holmes/plugins/toolsets/servicenow/servicenow.py +0 -219
- holmesgpt-0.13.2.dist-info/LICENSE.txt +0 -21
- {holmesgpt-0.13.2.dist-info → holmesgpt-0.16.2a0.dist-info}/WHEEL +0 -0
- {holmesgpt-0.13.2.dist-info → holmesgpt-0.16.2a0.dist-info}/entry_points.txt +0 -0
|
@@ -3,12 +3,12 @@ from typing import Dict, Optional
|
|
|
3
3
|
from pydantic import BaseModel
|
|
4
4
|
import datetime
|
|
5
5
|
|
|
6
|
-
from holmes.core.tools import StructuredToolResult,
|
|
6
|
+
from holmes.core.tools import StructuredToolResult, StructuredToolResultStatus
|
|
7
7
|
|
|
8
8
|
|
|
9
9
|
class GrafanaConfig(BaseModel):
|
|
10
10
|
"""A config that represents one of the Grafana related tools like Loki or Tempo
|
|
11
|
-
If `grafana_datasource_uid` is set, then it is
|
|
11
|
+
If `grafana_datasource_uid` is set, then it is assumed that Holmes will proxy all
|
|
12
12
|
requests through grafana. In this case `url` should be the grafana URL.
|
|
13
13
|
If `grafana_datasource_uid` is not set, it is assumed that the `url` is the
|
|
14
14
|
systems' URL
|
|
@@ -61,8 +61,20 @@ def ensure_grafana_uid_or_return_error_result(
|
|
|
61
61
|
) -> Optional[StructuredToolResult]:
|
|
62
62
|
if not config.grafana_datasource_uid:
|
|
63
63
|
return StructuredToolResult(
|
|
64
|
-
status=
|
|
64
|
+
status=StructuredToolResultStatus.ERROR,
|
|
65
65
|
error="This tool only works when the toolset is configued ",
|
|
66
66
|
)
|
|
67
67
|
else:
|
|
68
68
|
return None
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
class GrafanaTempoLabelsConfig(BaseModel):
|
|
72
|
+
pod: str = "k8s.pod.name"
|
|
73
|
+
namespace: str = "k8s.namespace.name"
|
|
74
|
+
deployment: str = "k8s.deployment.name"
|
|
75
|
+
node: str = "k8s.node.name"
|
|
76
|
+
service: str = "service.name"
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
class GrafanaTempoConfig(GrafanaConfig):
|
|
80
|
+
labels: GrafanaTempoLabelsConfig = GrafanaTempoLabelsConfig()
|
|
@@ -6,37 +6,59 @@ import backoff
|
|
|
6
6
|
from holmes.plugins.toolsets.grafana.common import (
|
|
7
7
|
GrafanaConfig,
|
|
8
8
|
build_headers,
|
|
9
|
-
get_base_url,
|
|
10
9
|
)
|
|
11
10
|
|
|
12
11
|
|
|
13
12
|
@backoff.on_exception(
|
|
14
|
-
backoff.expo,
|
|
15
|
-
requests.exceptions.RequestException,
|
|
16
|
-
max_tries=
|
|
13
|
+
backoff.expo,
|
|
14
|
+
requests.exceptions.RequestException,
|
|
15
|
+
max_tries=2,
|
|
17
16
|
giveup=lambda e: isinstance(e, requests.exceptions.HTTPError)
|
|
18
17
|
and e.response.status_code < 500,
|
|
19
18
|
)
|
|
19
|
+
def _try_health_url(url: str, headers: dict) -> None:
|
|
20
|
+
response = requests.get(url, headers=headers, timeout=5)
|
|
21
|
+
response.raise_for_status()
|
|
22
|
+
|
|
23
|
+
|
|
20
24
|
def grafana_health_check(config: GrafanaConfig) -> Tuple[bool, str]:
|
|
21
|
-
|
|
22
|
-
url
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
25
|
+
"""
|
|
26
|
+
Tests a healthcheck url for grafna loki.
|
|
27
|
+
1. When using grafana as proxy, grafana_datasource_uid is provided, use the data source health url (docs are added).
|
|
28
|
+
2. When using loki directly there are two cases.
|
|
29
|
+
a. Using loki cloud, health check is provided on the base url.
|
|
30
|
+
b. Using local loki, uses url/healthcheck default is url/ready
|
|
31
|
+
c. This function tries both direct loki cases for the user.
|
|
32
|
+
"""
|
|
33
|
+
health_urls = []
|
|
34
|
+
if config.grafana_datasource_uid:
|
|
35
|
+
# https://grafana.com/docs/grafana/latest/developers/http_api/data_source/#check-data-source-health
|
|
36
|
+
health_urls.append(
|
|
37
|
+
f"{config.url}/api/datasources/uid/{config.grafana_datasource_uid}/health"
|
|
38
|
+
)
|
|
39
|
+
else:
|
|
40
|
+
health_urls.append(f"{config.url}/{config.healthcheck}")
|
|
41
|
+
health_urls.append(config.url) # loki cloud uses no suffix.
|
|
42
|
+
g_headers = build_headers(api_key=config.api_key, additional_headers=config.headers)
|
|
43
|
+
|
|
44
|
+
error_msg = ""
|
|
45
|
+
for url in health_urls:
|
|
46
|
+
try:
|
|
47
|
+
_try_health_url(url, g_headers)
|
|
48
|
+
return True, ""
|
|
49
|
+
except Exception as e:
|
|
50
|
+
logging.debug(
|
|
51
|
+
f"Failed to fetch grafana health status at {url}", exc_info=True
|
|
40
52
|
)
|
|
53
|
+
error_msg += f"Failed to fetch grafana health status at {url}. {str(e)}\n"
|
|
54
|
+
|
|
55
|
+
# Add helpful hint if this looks like a common misconfiguration
|
|
56
|
+
if config.grafana_datasource_uid and ":3100" in config.url:
|
|
57
|
+
error_msg += (
|
|
58
|
+
"\n\nPossible configuration issue: grafana_datasource_uid is set but URL contains port 3100 "
|
|
59
|
+
"(typically used for direct Loki connections). Please verify:\n"
|
|
60
|
+
"- If connecting directly to Loki: remove grafana_datasource_uid from config\n"
|
|
61
|
+
"- If connecting via Grafana proxy: ensure URL points to Grafana (usually port 3000)"
|
|
62
|
+
)
|
|
41
63
|
|
|
42
|
-
|
|
64
|
+
return False, error_msg
|
|
@@ -0,0 +1,454 @@
|
|
|
1
|
+
"""Grafana Tempo API wrapper for querying traces and metrics."""
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
from typing import Any, Dict, Optional, Union
|
|
5
|
+
from urllib.parse import quote
|
|
6
|
+
|
|
7
|
+
import backoff
|
|
8
|
+
import requests # type: ignore
|
|
9
|
+
|
|
10
|
+
from holmes.plugins.toolsets.grafana.common import (
|
|
11
|
+
GrafanaTempoConfig,
|
|
12
|
+
build_headers,
|
|
13
|
+
get_base_url,
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
logger = logging.getLogger(__name__)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class TempoAPIError(Exception):
|
|
21
|
+
"""Custom exception for Tempo API errors with detailed response information."""
|
|
22
|
+
|
|
23
|
+
def __init__(self, status_code: int, response_text: str, url: str):
|
|
24
|
+
self.status_code = status_code
|
|
25
|
+
self.response_text = response_text
|
|
26
|
+
self.url = url
|
|
27
|
+
|
|
28
|
+
# Try to extract error message from JSON response
|
|
29
|
+
try:
|
|
30
|
+
import json
|
|
31
|
+
|
|
32
|
+
error_data = json.loads(response_text)
|
|
33
|
+
# Tempo may return errors in different formats
|
|
34
|
+
error_message = (
|
|
35
|
+
error_data.get("error")
|
|
36
|
+
or error_data.get("message")
|
|
37
|
+
or error_data.get("errorType")
|
|
38
|
+
or response_text
|
|
39
|
+
)
|
|
40
|
+
except (json.JSONDecodeError, TypeError):
|
|
41
|
+
error_message = response_text
|
|
42
|
+
|
|
43
|
+
super().__init__(f"Tempo API error {status_code}: {error_message}")
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
class GrafanaTempoAPI:
|
|
47
|
+
"""Python wrapper for Grafana Tempo REST API.
|
|
48
|
+
|
|
49
|
+
This class provides a clean interface to all Tempo API endpoints.
|
|
50
|
+
"""
|
|
51
|
+
|
|
52
|
+
def __init__(self, config: GrafanaTempoConfig):
|
|
53
|
+
"""Initialize the Tempo API wrapper.
|
|
54
|
+
|
|
55
|
+
Args:
|
|
56
|
+
config: GrafanaTempoConfig instance with connection details
|
|
57
|
+
"""
|
|
58
|
+
self.config = config
|
|
59
|
+
self.base_url = get_base_url(config)
|
|
60
|
+
self.headers = build_headers(config.api_key, config.headers)
|
|
61
|
+
|
|
62
|
+
def _make_request(
|
|
63
|
+
self,
|
|
64
|
+
endpoint: str,
|
|
65
|
+
params: Optional[Dict[str, Any]] = None,
|
|
66
|
+
path_params: Optional[Dict[str, str]] = None,
|
|
67
|
+
timeout: int = 30,
|
|
68
|
+
retries: int = 3,
|
|
69
|
+
) -> Dict[str, Any]:
|
|
70
|
+
"""Make HTTP request to Tempo API with retry logic.
|
|
71
|
+
|
|
72
|
+
Args:
|
|
73
|
+
endpoint: API endpoint path (e.g., "/api/echo")
|
|
74
|
+
params: Query parameters
|
|
75
|
+
path_params: Parameters to substitute in the endpoint path
|
|
76
|
+
timeout: Request timeout in seconds
|
|
77
|
+
retries: Number of retry attempts
|
|
78
|
+
|
|
79
|
+
Returns:
|
|
80
|
+
JSON response from the API
|
|
81
|
+
|
|
82
|
+
Raises:
|
|
83
|
+
Exception: If the request fails after all retries
|
|
84
|
+
"""
|
|
85
|
+
# Format endpoint with path parameters
|
|
86
|
+
if path_params:
|
|
87
|
+
for key, value in path_params.items():
|
|
88
|
+
endpoint = endpoint.replace(f"{{{key}}}", quote(str(value), safe=""))
|
|
89
|
+
|
|
90
|
+
url = f"{self.base_url}{endpoint}"
|
|
91
|
+
|
|
92
|
+
@backoff.on_exception(
|
|
93
|
+
backoff.expo,
|
|
94
|
+
requests.exceptions.RequestException,
|
|
95
|
+
max_tries=retries,
|
|
96
|
+
giveup=lambda e: isinstance(e, requests.exceptions.HTTPError)
|
|
97
|
+
and getattr(e, "response", None) is not None
|
|
98
|
+
and e.response.status_code < 500,
|
|
99
|
+
)
|
|
100
|
+
def make_request():
|
|
101
|
+
# GET request with query parameters
|
|
102
|
+
response = requests.get(
|
|
103
|
+
url,
|
|
104
|
+
headers=self.headers,
|
|
105
|
+
params=params,
|
|
106
|
+
timeout=timeout,
|
|
107
|
+
)
|
|
108
|
+
response.raise_for_status()
|
|
109
|
+
return response.json()
|
|
110
|
+
|
|
111
|
+
try:
|
|
112
|
+
return make_request()
|
|
113
|
+
except requests.exceptions.HTTPError as e:
|
|
114
|
+
# Extract detailed error message from response
|
|
115
|
+
response = e.response
|
|
116
|
+
if response is not None:
|
|
117
|
+
logger.error(
|
|
118
|
+
f"HTTP error {response.status_code} for {url}: {response.text}"
|
|
119
|
+
)
|
|
120
|
+
raise TempoAPIError(
|
|
121
|
+
status_code=response.status_code,
|
|
122
|
+
response_text=response.text,
|
|
123
|
+
url=url,
|
|
124
|
+
)
|
|
125
|
+
else:
|
|
126
|
+
logger.error(f"Request failed for {url}: {e}")
|
|
127
|
+
raise
|
|
128
|
+
except requests.exceptions.RequestException as e:
|
|
129
|
+
logger.error(f"Request failed for {url}: {e}")
|
|
130
|
+
raise
|
|
131
|
+
|
|
132
|
+
def query_echo_endpoint(self) -> bool:
|
|
133
|
+
"""Query the echo endpoint to check Tempo status.
|
|
134
|
+
|
|
135
|
+
API Endpoint: GET /api/echo
|
|
136
|
+
HTTP Method: GET
|
|
137
|
+
|
|
138
|
+
Returns:
|
|
139
|
+
bool: True if endpoint returns 200 status code, False otherwise
|
|
140
|
+
"""
|
|
141
|
+
url = f"{self.base_url}/api/echo"
|
|
142
|
+
|
|
143
|
+
try:
|
|
144
|
+
response = requests.get(
|
|
145
|
+
url,
|
|
146
|
+
headers=self.headers,
|
|
147
|
+
timeout=30,
|
|
148
|
+
)
|
|
149
|
+
|
|
150
|
+
# Just check status code, don't try to parse JSON
|
|
151
|
+
return response.status_code == 200
|
|
152
|
+
|
|
153
|
+
except requests.exceptions.RequestException as e:
|
|
154
|
+
logger.error(f"Request failed for {url}: {e}")
|
|
155
|
+
return False
|
|
156
|
+
|
|
157
|
+
def query_trace_by_id_v2(
|
|
158
|
+
self,
|
|
159
|
+
trace_id: str,
|
|
160
|
+
start: Optional[int] = None,
|
|
161
|
+
end: Optional[int] = None,
|
|
162
|
+
) -> Dict[str, Any]:
|
|
163
|
+
"""Query a trace by its ID.
|
|
164
|
+
|
|
165
|
+
API Endpoint: GET /api/v2/traces/{trace_id}
|
|
166
|
+
HTTP Method: GET
|
|
167
|
+
|
|
168
|
+
Args:
|
|
169
|
+
trace_id: The trace ID to retrieve
|
|
170
|
+
start: Optional start time in Unix epoch seconds
|
|
171
|
+
end: Optional end time in Unix epoch seconds
|
|
172
|
+
|
|
173
|
+
Returns:
|
|
174
|
+
dict: OpenTelemetry format trace data
|
|
175
|
+
"""
|
|
176
|
+
params = {}
|
|
177
|
+
if start is not None:
|
|
178
|
+
params["start"] = str(start)
|
|
179
|
+
if end is not None:
|
|
180
|
+
params["end"] = str(end)
|
|
181
|
+
|
|
182
|
+
return self._make_request(
|
|
183
|
+
"/api/v2/traces/{trace_id}",
|
|
184
|
+
params=params,
|
|
185
|
+
path_params={"trace_id": trace_id},
|
|
186
|
+
)
|
|
187
|
+
|
|
188
|
+
def _search_traces_common(
|
|
189
|
+
self,
|
|
190
|
+
search_params: Dict[str, Any],
|
|
191
|
+
limit: Optional[int] = None,
|
|
192
|
+
start: Optional[int] = None,
|
|
193
|
+
end: Optional[int] = None,
|
|
194
|
+
spss: Optional[int] = None,
|
|
195
|
+
) -> Dict[str, Any]:
|
|
196
|
+
"""Common search implementation for both tag and TraceQL searches.
|
|
197
|
+
|
|
198
|
+
Args:
|
|
199
|
+
search_params: The search-specific parameters (tags or q)
|
|
200
|
+
limit: Optional max number of traces to return
|
|
201
|
+
start: Optional start time in Unix epoch seconds
|
|
202
|
+
end: Optional end time in Unix epoch seconds
|
|
203
|
+
spss: Optional spans per span set
|
|
204
|
+
|
|
205
|
+
Returns:
|
|
206
|
+
dict: Search results with trace metadata
|
|
207
|
+
"""
|
|
208
|
+
params = search_params.copy()
|
|
209
|
+
|
|
210
|
+
if limit is not None:
|
|
211
|
+
params["limit"] = str(limit)
|
|
212
|
+
if start is not None:
|
|
213
|
+
params["start"] = str(start)
|
|
214
|
+
if end is not None:
|
|
215
|
+
params["end"] = str(end)
|
|
216
|
+
if spss is not None:
|
|
217
|
+
params["spss"] = str(spss)
|
|
218
|
+
|
|
219
|
+
return self._make_request("/api/search", params=params)
|
|
220
|
+
|
|
221
|
+
def search_traces_by_tags(
|
|
222
|
+
self,
|
|
223
|
+
tags: str,
|
|
224
|
+
min_duration: Optional[str] = None,
|
|
225
|
+
max_duration: Optional[str] = None,
|
|
226
|
+
limit: Optional[int] = None,
|
|
227
|
+
start: Optional[int] = None,
|
|
228
|
+
end: Optional[int] = None,
|
|
229
|
+
spss: Optional[int] = None,
|
|
230
|
+
) -> Dict[str, Any]:
|
|
231
|
+
"""Search for traces using tag-based search.
|
|
232
|
+
|
|
233
|
+
API Endpoint: GET /api/search
|
|
234
|
+
HTTP Method: GET
|
|
235
|
+
|
|
236
|
+
Args:
|
|
237
|
+
tags: logfmt-encoded span/process attributes (required)
|
|
238
|
+
min_duration: Optional minimum trace duration (e.g., "5s")
|
|
239
|
+
max_duration: Optional maximum trace duration
|
|
240
|
+
limit: Optional max number of traces to return
|
|
241
|
+
start: Optional start time in Unix epoch seconds
|
|
242
|
+
end: Optional end time in Unix epoch seconds
|
|
243
|
+
spss: Optional spans per span set
|
|
244
|
+
|
|
245
|
+
Returns:
|
|
246
|
+
dict: Search results with trace metadata
|
|
247
|
+
"""
|
|
248
|
+
search_params = {"tags": tags}
|
|
249
|
+
|
|
250
|
+
# minDuration and maxDuration are only supported with tag-based search
|
|
251
|
+
if min_duration is not None:
|
|
252
|
+
search_params["minDuration"] = min_duration
|
|
253
|
+
if max_duration is not None:
|
|
254
|
+
search_params["maxDuration"] = max_duration
|
|
255
|
+
|
|
256
|
+
return self._search_traces_common(
|
|
257
|
+
search_params=search_params,
|
|
258
|
+
limit=limit,
|
|
259
|
+
start=start,
|
|
260
|
+
end=end,
|
|
261
|
+
spss=spss,
|
|
262
|
+
)
|
|
263
|
+
|
|
264
|
+
def search_traces_by_query(
|
|
265
|
+
self,
|
|
266
|
+
q: str,
|
|
267
|
+
limit: Optional[int] = None,
|
|
268
|
+
start: Optional[int] = None,
|
|
269
|
+
end: Optional[int] = None,
|
|
270
|
+
spss: Optional[int] = None,
|
|
271
|
+
) -> Dict[str, Any]:
|
|
272
|
+
"""Search for traces using TraceQL query.
|
|
273
|
+
|
|
274
|
+
API Endpoint: GET /api/search
|
|
275
|
+
HTTP Method: GET
|
|
276
|
+
|
|
277
|
+
Note: minDuration and maxDuration are not supported with TraceQL queries.
|
|
278
|
+
Use the TraceQL query syntax to filter by duration instead.
|
|
279
|
+
|
|
280
|
+
Args:
|
|
281
|
+
q: TraceQL query (required)
|
|
282
|
+
limit: Optional max number of traces to return
|
|
283
|
+
start: Optional start time in Unix epoch seconds
|
|
284
|
+
end: Optional end time in Unix epoch seconds
|
|
285
|
+
spss: Optional spans per span set
|
|
286
|
+
|
|
287
|
+
Returns:
|
|
288
|
+
dict: Search results with trace metadata
|
|
289
|
+
"""
|
|
290
|
+
return self._search_traces_common(
|
|
291
|
+
search_params={"q": q},
|
|
292
|
+
limit=limit,
|
|
293
|
+
start=start,
|
|
294
|
+
end=end,
|
|
295
|
+
spss=spss,
|
|
296
|
+
)
|
|
297
|
+
|
|
298
|
+
def search_tag_names_v2(
|
|
299
|
+
self,
|
|
300
|
+
scope: Optional[str] = None,
|
|
301
|
+
q: Optional[str] = None,
|
|
302
|
+
start: Optional[int] = None,
|
|
303
|
+
end: Optional[int] = None,
|
|
304
|
+
limit: Optional[int] = None,
|
|
305
|
+
max_stale_values: Optional[int] = None,
|
|
306
|
+
) -> Dict[str, Any]:
|
|
307
|
+
"""Search for available tag names.
|
|
308
|
+
|
|
309
|
+
API Endpoint: GET /api/v2/search/tags
|
|
310
|
+
HTTP Method: GET
|
|
311
|
+
|
|
312
|
+
Args:
|
|
313
|
+
scope: Optional scope filter ("resource", "span", or "intrinsic")
|
|
314
|
+
q: Optional TraceQL query to filter tags
|
|
315
|
+
start: Optional start time in Unix epoch seconds
|
|
316
|
+
end: Optional end time in Unix epoch seconds
|
|
317
|
+
limit: Optional max number of tag names
|
|
318
|
+
max_stale_values: Optional max stale values parameter
|
|
319
|
+
|
|
320
|
+
Returns:
|
|
321
|
+
dict: Available tag names organized by scope
|
|
322
|
+
"""
|
|
323
|
+
params = {}
|
|
324
|
+
if scope is not None:
|
|
325
|
+
params["scope"] = scope
|
|
326
|
+
if q is not None:
|
|
327
|
+
params["q"] = q
|
|
328
|
+
if start is not None:
|
|
329
|
+
params["start"] = str(start)
|
|
330
|
+
if end is not None:
|
|
331
|
+
params["end"] = str(end)
|
|
332
|
+
if limit is not None:
|
|
333
|
+
params["limit"] = str(limit)
|
|
334
|
+
if max_stale_values is not None:
|
|
335
|
+
params["maxStaleValues"] = str(max_stale_values)
|
|
336
|
+
|
|
337
|
+
return self._make_request("/api/v2/search/tags", params=params)
|
|
338
|
+
|
|
339
|
+
def search_tag_values_v2(
|
|
340
|
+
self,
|
|
341
|
+
tag: str,
|
|
342
|
+
q: Optional[str] = None,
|
|
343
|
+
start: Optional[int] = None,
|
|
344
|
+
end: Optional[int] = None,
|
|
345
|
+
limit: Optional[int] = None,
|
|
346
|
+
max_stale_values: Optional[int] = None,
|
|
347
|
+
) -> Dict[str, Any]:
|
|
348
|
+
"""Search for values of a specific tag with optional TraceQL filtering.
|
|
349
|
+
|
|
350
|
+
API Endpoint: GET /api/v2/search/tag/{tag}/values
|
|
351
|
+
HTTP Method: GET
|
|
352
|
+
|
|
353
|
+
Args:
|
|
354
|
+
tag: The tag name to get values for (required)
|
|
355
|
+
q: Optional TraceQL query to filter tag values (e.g., '{resource.cluster="us-east-1"}')
|
|
356
|
+
start: Optional start time in Unix epoch seconds
|
|
357
|
+
end: Optional end time in Unix epoch seconds
|
|
358
|
+
limit: Optional max number of values
|
|
359
|
+
max_stale_values: Optional max stale values parameter
|
|
360
|
+
|
|
361
|
+
Returns:
|
|
362
|
+
dict: List of discovered values for the tag
|
|
363
|
+
"""
|
|
364
|
+
params = {}
|
|
365
|
+
if q is not None:
|
|
366
|
+
params["q"] = q
|
|
367
|
+
if start is not None:
|
|
368
|
+
params["start"] = str(start)
|
|
369
|
+
if end is not None:
|
|
370
|
+
params["end"] = str(end)
|
|
371
|
+
if limit is not None:
|
|
372
|
+
params["limit"] = str(limit)
|
|
373
|
+
if max_stale_values is not None:
|
|
374
|
+
params["maxStaleValues"] = str(max_stale_values)
|
|
375
|
+
|
|
376
|
+
return self._make_request(
|
|
377
|
+
"/api/v2/search/tag/{tag}/values",
|
|
378
|
+
params=params,
|
|
379
|
+
path_params={"tag": tag},
|
|
380
|
+
)
|
|
381
|
+
|
|
382
|
+
def query_metrics_instant(
|
|
383
|
+
self,
|
|
384
|
+
q: str,
|
|
385
|
+
start: Optional[Union[int, str]] = None,
|
|
386
|
+
end: Optional[Union[int, str]] = None,
|
|
387
|
+
since: Optional[str] = None,
|
|
388
|
+
) -> Dict[str, Any]:
|
|
389
|
+
"""Query TraceQL metrics for an instant value.
|
|
390
|
+
|
|
391
|
+
Computes a single value across the entire time range.
|
|
392
|
+
|
|
393
|
+
API Endpoint: GET /api/metrics/query
|
|
394
|
+
HTTP Method: GET
|
|
395
|
+
|
|
396
|
+
Args:
|
|
397
|
+
q: TraceQL metrics query (required)
|
|
398
|
+
start: Optional start time (Unix seconds/nanoseconds/RFC3339)
|
|
399
|
+
end: Optional end time (Unix seconds/nanoseconds/RFC3339)
|
|
400
|
+
since: Optional duration string (e.g., "1h")
|
|
401
|
+
|
|
402
|
+
Returns:
|
|
403
|
+
dict: Single computed metric value
|
|
404
|
+
"""
|
|
405
|
+
params = {"q": q}
|
|
406
|
+
if start is not None:
|
|
407
|
+
params["start"] = str(start)
|
|
408
|
+
if end is not None:
|
|
409
|
+
params["end"] = str(end)
|
|
410
|
+
if since is not None:
|
|
411
|
+
params["since"] = since
|
|
412
|
+
|
|
413
|
+
return self._make_request("/api/metrics/query", params=params)
|
|
414
|
+
|
|
415
|
+
def query_metrics_range(
|
|
416
|
+
self,
|
|
417
|
+
q: str,
|
|
418
|
+
step: Optional[str] = None,
|
|
419
|
+
start: Optional[Union[int, str]] = None,
|
|
420
|
+
end: Optional[Union[int, str]] = None,
|
|
421
|
+
since: Optional[str] = None,
|
|
422
|
+
exemplars: Optional[int] = None,
|
|
423
|
+
) -> Dict[str, Any]:
|
|
424
|
+
"""Query TraceQL metrics for a time series range.
|
|
425
|
+
|
|
426
|
+
Returns metrics computed at regular intervals over the time range.
|
|
427
|
+
|
|
428
|
+
API Endpoint: GET /api/metrics/query_range
|
|
429
|
+
HTTP Method: GET
|
|
430
|
+
|
|
431
|
+
Args:
|
|
432
|
+
q: TraceQL metrics query (required)
|
|
433
|
+
step: Optional time series granularity (e.g., "1m", "5m")
|
|
434
|
+
start: Optional start time (Unix seconds/nanoseconds/RFC3339)
|
|
435
|
+
end: Optional end time (Unix seconds/nanoseconds/RFC3339)
|
|
436
|
+
since: Optional duration string (e.g., "3h")
|
|
437
|
+
exemplars: Optional maximum number of exemplars to return
|
|
438
|
+
|
|
439
|
+
Returns:
|
|
440
|
+
dict: Time series of metric values
|
|
441
|
+
"""
|
|
442
|
+
params = {"q": q}
|
|
443
|
+
if step is not None:
|
|
444
|
+
params["step"] = step
|
|
445
|
+
if start is not None:
|
|
446
|
+
params["start"] = str(start)
|
|
447
|
+
if end is not None:
|
|
448
|
+
params["end"] = str(end)
|
|
449
|
+
if since is not None:
|
|
450
|
+
params["since"] = since
|
|
451
|
+
if exemplars is not None:
|
|
452
|
+
params["exemplars"] = str(exemplars)
|
|
453
|
+
|
|
454
|
+
return self._make_request("/api/metrics/query_range", params=params)
|
|
@@ -0,0 +1,9 @@
|
|
|
1
|
+
Grafana Loki is a multi-tenant log aggregation system designed to store and query logs from all your applications and infrastructure.
|
|
2
|
+
|
|
3
|
+
* Deleted K8s objects don’t delete their past logs. You can still find them by time-bounded queries on stable labels (e.g., namespace + app) or by regex on pod names.
|
|
4
|
+
* If you can't find the kubernetes workload in the cluster YOU SHOULD still try to find logs from it using non specific values on labels.
|
|
5
|
+
* If asked to check for logs, you must always try 1-2 of the best queries you can construct to search for the logs.
|
|
6
|
+
|
|
7
|
+
Loki indexes log lines using labels to help find relevant log lines.
|
|
8
|
+
For example a default Kubernetes labels setup would look like that
|
|
9
|
+
{namespace="prod", app="backend-api", container="api", pod="backend-api-68b7d9df9c-xyz12", stream="stdout"}
|