holmesgpt 0.13.1__py3-none-any.whl → 0.13.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of holmesgpt might be problematic. Click here for more details.
- holmes/__init__.py +1 -1
- holmes/common/env_vars.py +7 -0
- holmes/config.py +3 -1
- holmes/core/conversations.py +0 -11
- holmes/core/investigation.py +0 -6
- holmes/core/llm.py +60 -1
- holmes/core/prompt.py +0 -2
- holmes/core/supabase_dal.py +2 -2
- holmes/core/todo_tasks_formatter.py +51 -0
- holmes/core/tool_calling_llm.py +166 -91
- holmes/core/tools.py +20 -4
- holmes/interactive.py +63 -2
- holmes/main.py +0 -1
- holmes/plugins/prompts/_general_instructions.jinja2 +3 -1
- holmes/plugins/prompts/investigation_procedure.jinja2 +3 -13
- holmes/plugins/toolsets/__init__.py +5 -1
- holmes/plugins/toolsets/argocd.yaml +1 -1
- holmes/plugins/toolsets/atlas_mongodb/mongodb_atlas.py +18 -6
- holmes/plugins/toolsets/aws.yaml +9 -5
- holmes/plugins/toolsets/azure_sql/tools/analyze_connection_failures.py +3 -1
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_connections.py +3 -1
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_health_status.py +3 -1
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_performance.py +3 -1
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_storage.py +3 -1
- holmes/plugins/toolsets/azure_sql/tools/get_active_alerts.py +3 -1
- holmes/plugins/toolsets/azure_sql/tools/get_slow_queries.py +3 -1
- holmes/plugins/toolsets/azure_sql/tools/get_top_cpu_queries.py +3 -1
- holmes/plugins/toolsets/azure_sql/tools/get_top_data_io_queries.py +3 -1
- holmes/plugins/toolsets/azure_sql/tools/get_top_log_io_queries.py +3 -1
- holmes/plugins/toolsets/bash/bash_toolset.py +31 -20
- holmes/plugins/toolsets/confluence.yaml +1 -1
- holmes/plugins/toolsets/coralogix/api.py +3 -1
- holmes/plugins/toolsets/coralogix/toolset_coralogix_logs.py +4 -4
- holmes/plugins/toolsets/coralogix/utils.py +41 -14
- holmes/plugins/toolsets/datadog/datadog_api.py +45 -2
- holmes/plugins/toolsets/datadog/datadog_general_instructions.jinja2 +208 -0
- holmes/plugins/toolsets/datadog/datadog_logs_instructions.jinja2 +43 -0
- holmes/plugins/toolsets/datadog/datadog_metrics_instructions.jinja2 +12 -9
- holmes/plugins/toolsets/datadog/toolset_datadog_general.py +722 -0
- holmes/plugins/toolsets/datadog/toolset_datadog_logs.py +17 -6
- holmes/plugins/toolsets/datadog/toolset_datadog_metrics.py +15 -7
- holmes/plugins/toolsets/datadog/toolset_datadog_rds.py +6 -2
- holmes/plugins/toolsets/datadog/toolset_datadog_traces.py +9 -3
- holmes/plugins/toolsets/docker.yaml +1 -1
- holmes/plugins/toolsets/git.py +15 -5
- holmes/plugins/toolsets/grafana/toolset_grafana.py +25 -4
- holmes/plugins/toolsets/grafana/toolset_grafana_loki.py +4 -4
- holmes/plugins/toolsets/grafana/toolset_grafana_tempo.jinja2 +5 -3
- holmes/plugins/toolsets/grafana/toolset_grafana_tempo.py +299 -32
- holmes/plugins/toolsets/helm.yaml +1 -1
- holmes/plugins/toolsets/internet/internet.py +4 -2
- holmes/plugins/toolsets/internet/notion.py +4 -2
- holmes/plugins/toolsets/investigator/core_investigation.py +5 -17
- holmes/plugins/toolsets/investigator/investigator_instructions.jinja2 +1 -5
- holmes/plugins/toolsets/kafka.py +19 -7
- holmes/plugins/toolsets/kubernetes.yaml +5 -5
- holmes/plugins/toolsets/kubernetes_logs.py +4 -4
- holmes/plugins/toolsets/kubernetes_logs.yaml +1 -1
- holmes/plugins/toolsets/logging_utils/logging_api.py +15 -2
- holmes/plugins/toolsets/mcp/toolset_mcp.py +3 -1
- holmes/plugins/toolsets/newrelic.py +8 -4
- holmes/plugins/toolsets/opensearch/opensearch.py +13 -5
- holmes/plugins/toolsets/opensearch/opensearch_logs.py +4 -4
- holmes/plugins/toolsets/opensearch/opensearch_traces.py +9 -6
- holmes/plugins/toolsets/prometheus/prometheus.py +193 -82
- holmes/plugins/toolsets/rabbitmq/toolset_rabbitmq.py +7 -3
- holmes/plugins/toolsets/robusta/robusta.py +10 -4
- holmes/plugins/toolsets/runbook/runbook_fetcher.py +4 -2
- holmes/plugins/toolsets/servicenow/servicenow.py +9 -3
- holmes/plugins/toolsets/slab.yaml +1 -1
- {holmesgpt-0.13.1.dist-info → holmesgpt-0.13.3.dist-info}/METADATA +3 -2
- {holmesgpt-0.13.1.dist-info → holmesgpt-0.13.3.dist-info}/RECORD +75 -72
- holmes/core/todo_manager.py +0 -88
- {holmesgpt-0.13.1.dist-info → holmesgpt-0.13.3.dist-info}/LICENSE.txt +0 -0
- {holmesgpt-0.13.1.dist-info → holmesgpt-0.13.3.dist-info}/WHEEL +0 -0
- {holmesgpt-0.13.1.dist-info → holmesgpt-0.13.3.dist-info}/entry_points.txt +0 -0
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import os
|
|
2
|
+
import re
|
|
2
3
|
from typing import Any, Dict, List, cast
|
|
3
4
|
|
|
4
5
|
import requests # type: ignore
|
|
@@ -35,6 +36,8 @@ from holmes.plugins.toolsets.utils import (
|
|
|
35
36
|
TEMPO_LABELS_ADD_PREFIX = load_bool("TEMPO_LABELS_ADD_PREFIX", True)
|
|
36
37
|
|
|
37
38
|
ONE_HOUR_IN_SECONDS = 3600
|
|
39
|
+
DEFAULT_TRACES_TIME_SPAN_SECONDS = DEFAULT_TIME_SPAN_SECONDS # 7 days
|
|
40
|
+
DEFAULT_TAGS_TIME_SPAN_SECONDS = 8 * ONE_HOUR_IN_SECONDS # 8 hours
|
|
38
41
|
|
|
39
42
|
|
|
40
43
|
class GrafanaTempoLabelsConfig(BaseModel):
|
|
@@ -64,6 +67,52 @@ class BaseGrafanaTempoToolset(BaseGrafanaToolset):
|
|
|
64
67
|
def grafana_config(self) -> GrafanaTempoConfig:
|
|
65
68
|
return cast(GrafanaTempoConfig, self._grafana_config)
|
|
66
69
|
|
|
70
|
+
def build_k8s_filters(
|
|
71
|
+
self, params: Dict[str, Any], use_exact_match: bool
|
|
72
|
+
) -> List[str]:
|
|
73
|
+
"""Build TraceQL filters for k8s parameters.
|
|
74
|
+
|
|
75
|
+
Args:
|
|
76
|
+
params: Dictionary containing k8s parameters
|
|
77
|
+
use_exact_match: If True, uses exact match (=), if False uses regex match (=~)
|
|
78
|
+
|
|
79
|
+
Returns:
|
|
80
|
+
List of TraceQL filter strings
|
|
81
|
+
"""
|
|
82
|
+
prefix = ""
|
|
83
|
+
if TEMPO_LABELS_ADD_PREFIX:
|
|
84
|
+
prefix = "resource."
|
|
85
|
+
|
|
86
|
+
filters = []
|
|
87
|
+
labels = self.grafana_config.labels
|
|
88
|
+
|
|
89
|
+
# Define parameter mappings: (param_name, label_attribute)
|
|
90
|
+
parameter_mappings = [
|
|
91
|
+
("service_name", "service"),
|
|
92
|
+
("pod_name", "pod"),
|
|
93
|
+
("namespace_name", "namespace"),
|
|
94
|
+
("deployment_name", "deployment"),
|
|
95
|
+
("node_name", "node"),
|
|
96
|
+
]
|
|
97
|
+
|
|
98
|
+
for param_name, label_attr in parameter_mappings:
|
|
99
|
+
value = params.get(param_name)
|
|
100
|
+
if value:
|
|
101
|
+
# Get the label from the config
|
|
102
|
+
label = getattr(labels, label_attr)
|
|
103
|
+
|
|
104
|
+
# Build the filter based on match type
|
|
105
|
+
if use_exact_match:
|
|
106
|
+
# Escape double quotes in the value for exact match
|
|
107
|
+
escaped_value = value.replace('"', '\\"')
|
|
108
|
+
filters.append(f'{prefix}{label}="{escaped_value}"')
|
|
109
|
+
else:
|
|
110
|
+
# Escape regex special characters for partial match
|
|
111
|
+
escaped_value = re.escape(value)
|
|
112
|
+
filters.append(f'{prefix}{label}=~".*{escaped_value}.*"')
|
|
113
|
+
|
|
114
|
+
return filters
|
|
115
|
+
|
|
67
116
|
|
|
68
117
|
def validate_params(params: Dict[str, Any], expected_params: List[str]):
|
|
69
118
|
for param in expected_params:
|
|
@@ -110,7 +159,7 @@ class GetTempoTraces(Tool):
|
|
|
110
159
|
required=False,
|
|
111
160
|
),
|
|
112
161
|
"start_datetime": ToolParameter(
|
|
113
|
-
description="The beginning time boundary for the trace search period. String in RFC3339 format. If a negative integer, the number of seconds relative to the end_timestamp. Defaults to
|
|
162
|
+
description=f"The beginning time boundary for the trace search period. String in RFC3339 format. If a negative integer, the number of seconds relative to the end_timestamp. Defaults to -{DEFAULT_TRACES_TIME_SPAN_SECONDS}",
|
|
114
163
|
type="string",
|
|
115
164
|
required=False,
|
|
116
165
|
),
|
|
@@ -133,10 +182,11 @@ class GetTempoTraces(Tool):
|
|
|
133
182
|
)
|
|
134
183
|
self._toolset = toolset
|
|
135
184
|
|
|
136
|
-
def _invoke(
|
|
185
|
+
def _invoke(
|
|
186
|
+
self, params: dict, user_approved: bool = False
|
|
187
|
+
) -> StructuredToolResult:
|
|
137
188
|
api_key = self._toolset.grafana_config.api_key
|
|
138
189
|
headers = self._toolset.grafana_config.headers
|
|
139
|
-
labels = self._toolset.grafana_config.labels
|
|
140
190
|
|
|
141
191
|
invalid_params_error = validate_params(
|
|
142
192
|
params, ["service_name", "pod_name", "deployment_name"]
|
|
@@ -151,28 +201,10 @@ class GetTempoTraces(Tool):
|
|
|
151
201
|
start, end = process_timestamps_to_int(
|
|
152
202
|
params.get("start_datetime"),
|
|
153
203
|
params.get("end_datetime"),
|
|
154
|
-
default_time_span_seconds=
|
|
204
|
+
default_time_span_seconds=DEFAULT_TRACES_TIME_SPAN_SECONDS,
|
|
155
205
|
)
|
|
156
206
|
|
|
157
|
-
|
|
158
|
-
if TEMPO_LABELS_ADD_PREFIX:
|
|
159
|
-
prefix = "resource."
|
|
160
|
-
|
|
161
|
-
filters = []
|
|
162
|
-
if params.get("service_name"):
|
|
163
|
-
filters.append(f'{prefix}{labels.service}="{params.get("service_name")}"')
|
|
164
|
-
if params.get("pod_name"):
|
|
165
|
-
filters.append(f'{prefix}{labels.pod}="{params.get("pod_name")}"')
|
|
166
|
-
if params.get("namespace_name"):
|
|
167
|
-
filters.append(
|
|
168
|
-
f'{prefix}{labels.namespace}="{params.get("namespace_name")}"'
|
|
169
|
-
)
|
|
170
|
-
if params.get("deployment_name"):
|
|
171
|
-
filters.append(
|
|
172
|
-
f'{prefix}{labels.deployment}="{params.get("deployment_name")}"'
|
|
173
|
-
)
|
|
174
|
-
if params.get("node_name"):
|
|
175
|
-
filters.append(f'{prefix}{labels.node}="{params.get("node_name")}"')
|
|
207
|
+
filters = self._toolset.build_k8s_filters(params, use_exact_match=True)
|
|
176
208
|
|
|
177
209
|
filters.append(f'duration>{get_param_or_raise(params, "min_duration")}')
|
|
178
210
|
|
|
@@ -207,7 +239,7 @@ class GetTempoTags(Tool):
|
|
|
207
239
|
description="List the tags available in Tempo",
|
|
208
240
|
parameters={
|
|
209
241
|
"start_datetime": ToolParameter(
|
|
210
|
-
description="The beginning time boundary for the search period. String in RFC3339 format. If a negative integer, the number of seconds relative to the end_timestamp. Defaults to
|
|
242
|
+
description=f"The beginning time boundary for the search period. String in RFC3339 format. If a negative integer, the number of seconds relative to the end_timestamp. Defaults to -{DEFAULT_TAGS_TIME_SPAN_SECONDS}",
|
|
211
243
|
type="string",
|
|
212
244
|
required=False,
|
|
213
245
|
),
|
|
@@ -220,13 +252,15 @@ class GetTempoTags(Tool):
|
|
|
220
252
|
)
|
|
221
253
|
self._toolset = toolset
|
|
222
254
|
|
|
223
|
-
def _invoke(
|
|
255
|
+
def _invoke(
|
|
256
|
+
self, params: dict, user_approved: bool = False
|
|
257
|
+
) -> StructuredToolResult:
|
|
224
258
|
api_key = self._toolset.grafana_config.api_key
|
|
225
259
|
headers = self._toolset.grafana_config.headers
|
|
226
260
|
start, end = process_timestamps_to_int(
|
|
227
261
|
start=params.get("start_datetime"),
|
|
228
262
|
end=params.get("end_datetime"),
|
|
229
|
-
default_time_span_seconds=
|
|
263
|
+
default_time_span_seconds=DEFAULT_TAGS_TIME_SPAN_SECONDS,
|
|
230
264
|
)
|
|
231
265
|
|
|
232
266
|
base_url = get_base_url(self._toolset.grafana_config)
|
|
@@ -246,9 +280,7 @@ class GetTempoTags(Tool):
|
|
|
246
280
|
params=params,
|
|
247
281
|
)
|
|
248
282
|
except requests.exceptions.RequestException as e:
|
|
249
|
-
raise Exception(
|
|
250
|
-
f"Failed to retrieve trace by ID after retries: {e} \n for URL: {url}"
|
|
251
|
-
)
|
|
283
|
+
raise Exception(f"Failed to retrieve tags: {e} \n for URL: {url}")
|
|
252
284
|
|
|
253
285
|
def get_parameterized_one_liner(self, params: Dict) -> str:
|
|
254
286
|
return f"{toolset_name_for_one_liner(self._toolset.name)}: Fetched Tempo tags"
|
|
@@ -269,7 +301,9 @@ class GetTempoTraceById(Tool):
|
|
|
269
301
|
)
|
|
270
302
|
self._toolset = toolset
|
|
271
303
|
|
|
272
|
-
def _invoke(
|
|
304
|
+
def _invoke(
|
|
305
|
+
self, params: dict, user_approved: bool = False
|
|
306
|
+
) -> StructuredToolResult:
|
|
273
307
|
labels_mapping = self._toolset.grafana_config.labels
|
|
274
308
|
labels = list(labels_mapping.model_dump().values())
|
|
275
309
|
|
|
@@ -291,14 +325,247 @@ class GetTempoTraceById(Tool):
|
|
|
291
325
|
return f"{toolset_name_for_one_liner(self._toolset.name)}: Fetched Tempo Trace (trace_id={params.get('trace_id')})"
|
|
292
326
|
|
|
293
327
|
|
|
328
|
+
class FetchTracesSimpleComparison(Tool):
|
|
329
|
+
def __init__(self, toolset: BaseGrafanaTempoToolset):
|
|
330
|
+
super().__init__(
|
|
331
|
+
name="fetch_tempo_traces_comparative_sample",
|
|
332
|
+
description="""Fetches statistics and representative samples of fast, slow, and typical traces for performance analysis. Requires either a `base_query` OR at least one of `service_name`, `pod_name`, `namespace_name`, `deployment_name`, `node_name`.
|
|
333
|
+
|
|
334
|
+
Important: call this tool first when investigating performance issues via traces. This tool provides comprehensive analysis for identifying patterns.
|
|
335
|
+
|
|
336
|
+
Examples:
|
|
337
|
+
- For service latency: service_name="payment" (matches "payment-service" too)
|
|
338
|
+
- For namespace issues: namespace_name="production"
|
|
339
|
+
- Combined: service_name="auth", namespace_name="staging\"""",
|
|
340
|
+
parameters={
|
|
341
|
+
"service_name": ToolParameter(
|
|
342
|
+
description="Service to analyze (partial match supported)",
|
|
343
|
+
type="string",
|
|
344
|
+
required=False,
|
|
345
|
+
),
|
|
346
|
+
"pod_name": ToolParameter(
|
|
347
|
+
description="Filter traces by pod name (partial match supported)",
|
|
348
|
+
type="string",
|
|
349
|
+
required=False,
|
|
350
|
+
),
|
|
351
|
+
"namespace_name": ToolParameter(
|
|
352
|
+
description="Kubernetes namespace to filter traces",
|
|
353
|
+
type="string",
|
|
354
|
+
required=False,
|
|
355
|
+
),
|
|
356
|
+
"deployment_name": ToolParameter(
|
|
357
|
+
description="Filter traces by deployment name (partial match supported)",
|
|
358
|
+
type="string",
|
|
359
|
+
required=False,
|
|
360
|
+
),
|
|
361
|
+
"node_name": ToolParameter(
|
|
362
|
+
description="Filter traces by node name",
|
|
363
|
+
type="string",
|
|
364
|
+
required=False,
|
|
365
|
+
),
|
|
366
|
+
"base_query": ToolParameter(
|
|
367
|
+
description="Custom TraceQL filter",
|
|
368
|
+
type="string",
|
|
369
|
+
required=False,
|
|
370
|
+
),
|
|
371
|
+
"sample_count": ToolParameter(
|
|
372
|
+
description="Number of traces to fetch from each category (fastest/slowest). Default 3",
|
|
373
|
+
type="integer",
|
|
374
|
+
required=False,
|
|
375
|
+
),
|
|
376
|
+
"start_datetime": ToolParameter(
|
|
377
|
+
description=f"The beginning time boundary for the trace search period. String in RFC3339 format. If a negative integer, the number of seconds relative to the end_timestamp. Defaults to -{DEFAULT_TRACES_TIME_SPAN_SECONDS}",
|
|
378
|
+
type="string",
|
|
379
|
+
required=False,
|
|
380
|
+
),
|
|
381
|
+
"end_datetime": ToolParameter(
|
|
382
|
+
description="The ending time boundary for the trace search period. String in RFC3339 format. Defaults to NOW().",
|
|
383
|
+
type="string",
|
|
384
|
+
required=False,
|
|
385
|
+
),
|
|
386
|
+
},
|
|
387
|
+
)
|
|
388
|
+
self._toolset = toolset
|
|
389
|
+
|
|
390
|
+
def _invoke(
|
|
391
|
+
self, params: dict, user_approved: bool = False
|
|
392
|
+
) -> StructuredToolResult:
|
|
393
|
+
try:
|
|
394
|
+
# Build query
|
|
395
|
+
if params.get("base_query"):
|
|
396
|
+
base_query = params["base_query"]
|
|
397
|
+
else:
|
|
398
|
+
# Use the shared utility with partial matching (regex)
|
|
399
|
+
filters = self._toolset.build_k8s_filters(params, use_exact_match=False)
|
|
400
|
+
|
|
401
|
+
# Validate that at least one parameter was provided
|
|
402
|
+
invalid_params_error = validate_params(
|
|
403
|
+
params,
|
|
404
|
+
[
|
|
405
|
+
"service_name",
|
|
406
|
+
"pod_name",
|
|
407
|
+
"namespace_name",
|
|
408
|
+
"deployment_name",
|
|
409
|
+
"node_name",
|
|
410
|
+
],
|
|
411
|
+
)
|
|
412
|
+
if invalid_params_error:
|
|
413
|
+
return StructuredToolResult(
|
|
414
|
+
status=ToolResultStatus.ERROR,
|
|
415
|
+
error=invalid_params_error,
|
|
416
|
+
params=params,
|
|
417
|
+
)
|
|
418
|
+
|
|
419
|
+
base_query = " && ".join(filters)
|
|
420
|
+
|
|
421
|
+
sample_count = params.get("sample_count", 3)
|
|
422
|
+
|
|
423
|
+
start, end = process_timestamps_to_int(
|
|
424
|
+
params.get("start_datetime"),
|
|
425
|
+
params.get("end_datetime"),
|
|
426
|
+
default_time_span_seconds=DEFAULT_TRACES_TIME_SPAN_SECONDS,
|
|
427
|
+
)
|
|
428
|
+
|
|
429
|
+
base_url = get_base_url(self._toolset.grafana_config)
|
|
430
|
+
|
|
431
|
+
# Step 1: Get all trace summaries
|
|
432
|
+
stats_query = f"{{{base_query}}}"
|
|
433
|
+
all_traces_response = query_tempo_traces(
|
|
434
|
+
base_url=base_url,
|
|
435
|
+
api_key=self._toolset.grafana_config.api_key,
|
|
436
|
+
headers=self._toolset.grafana_config.headers,
|
|
437
|
+
query=stats_query,
|
|
438
|
+
start=start,
|
|
439
|
+
end=end,
|
|
440
|
+
limit=1000,
|
|
441
|
+
)
|
|
442
|
+
|
|
443
|
+
traces = all_traces_response.get("traces", [])
|
|
444
|
+
if not traces:
|
|
445
|
+
return StructuredToolResult(
|
|
446
|
+
status=ToolResultStatus.SUCCESS,
|
|
447
|
+
data="No traces found matching the query",
|
|
448
|
+
params=params,
|
|
449
|
+
)
|
|
450
|
+
|
|
451
|
+
# Step 2: Sort traces by duration
|
|
452
|
+
sorted_traces = sorted(traces, key=lambda x: x.get("durationMs", 0))
|
|
453
|
+
|
|
454
|
+
# Step 3: Calculate basic statistics
|
|
455
|
+
durations = [t.get("durationMs", 0) for t in sorted_traces]
|
|
456
|
+
stats = {
|
|
457
|
+
"trace_count": len(durations),
|
|
458
|
+
"min_ms": durations[0],
|
|
459
|
+
"p25_ms": durations[len(durations) // 4]
|
|
460
|
+
if len(durations) >= 4
|
|
461
|
+
else durations[0],
|
|
462
|
+
"p50_ms": durations[len(durations) // 2],
|
|
463
|
+
"p75_ms": durations[3 * len(durations) // 4]
|
|
464
|
+
if len(durations) >= 4
|
|
465
|
+
else durations[-1],
|
|
466
|
+
"p90_ms": durations[int(len(durations) * 0.9)]
|
|
467
|
+
if len(durations) >= 10
|
|
468
|
+
else durations[-1],
|
|
469
|
+
"p99_ms": durations[int(len(durations) * 0.99)]
|
|
470
|
+
if len(durations) >= 100
|
|
471
|
+
else durations[-1],
|
|
472
|
+
"max_ms": durations[-1],
|
|
473
|
+
}
|
|
474
|
+
|
|
475
|
+
# Step 4: Select representative traces to fetch
|
|
476
|
+
fastest_indices = list(range(min(sample_count, len(sorted_traces))))
|
|
477
|
+
slowest_indices = list(
|
|
478
|
+
range(max(0, len(sorted_traces) - sample_count), len(sorted_traces))
|
|
479
|
+
)
|
|
480
|
+
|
|
481
|
+
# Add median trace
|
|
482
|
+
median_idx = len(sorted_traces) // 2
|
|
483
|
+
|
|
484
|
+
# Step 5: Fetch full trace details
|
|
485
|
+
def fetch_full_trace(trace_summary):
|
|
486
|
+
trace_id = trace_summary.get("traceID")
|
|
487
|
+
if not trace_id:
|
|
488
|
+
return None
|
|
489
|
+
|
|
490
|
+
try:
|
|
491
|
+
url = f"{base_url}/api/traces/{trace_id}"
|
|
492
|
+
response = requests.get(
|
|
493
|
+
url,
|
|
494
|
+
headers=build_headers(
|
|
495
|
+
api_key=self._toolset.grafana_config.api_key,
|
|
496
|
+
additional_headers=self._toolset.grafana_config.headers,
|
|
497
|
+
),
|
|
498
|
+
timeout=5,
|
|
499
|
+
)
|
|
500
|
+
response.raise_for_status()
|
|
501
|
+
return {
|
|
502
|
+
"traceID": trace_id,
|
|
503
|
+
"durationMs": trace_summary.get("durationMs", 0),
|
|
504
|
+
"rootServiceName": trace_summary.get(
|
|
505
|
+
"rootServiceName", "unknown"
|
|
506
|
+
),
|
|
507
|
+
"traceData": response.json(), # Raw trace data
|
|
508
|
+
}
|
|
509
|
+
except requests.exceptions.RequestException as e:
|
|
510
|
+
error_msg = f"Failed to fetch full trace: {str(e)}"
|
|
511
|
+
if hasattr(e, "response") and e.response is not None:
|
|
512
|
+
error_msg += f" (Status: {e.response.status_code})"
|
|
513
|
+
return {
|
|
514
|
+
"traceID": trace_id,
|
|
515
|
+
"durationMs": trace_summary.get("durationMs", 0),
|
|
516
|
+
"error": error_msg,
|
|
517
|
+
}
|
|
518
|
+
except (ValueError, KeyError) as e:
|
|
519
|
+
return {
|
|
520
|
+
"traceID": trace_id,
|
|
521
|
+
"durationMs": trace_summary.get("durationMs", 0),
|
|
522
|
+
"error": f"Failed to parse trace data: {str(e)}",
|
|
523
|
+
}
|
|
524
|
+
|
|
525
|
+
# Fetch the selected traces
|
|
526
|
+
result = {
|
|
527
|
+
"statistics": stats,
|
|
528
|
+
"all_trace_durations_ms": durations, # All durations for distribution analysis
|
|
529
|
+
"fastest_traces": [
|
|
530
|
+
fetch_full_trace(sorted_traces[i]) for i in fastest_indices
|
|
531
|
+
],
|
|
532
|
+
"median_trace": fetch_full_trace(sorted_traces[median_idx]),
|
|
533
|
+
"slowest_traces": [
|
|
534
|
+
fetch_full_trace(sorted_traces[i]) for i in slowest_indices
|
|
535
|
+
],
|
|
536
|
+
}
|
|
537
|
+
|
|
538
|
+
# Return as YAML for readability
|
|
539
|
+
return StructuredToolResult(
|
|
540
|
+
status=ToolResultStatus.SUCCESS,
|
|
541
|
+
data=yaml.dump(result, default_flow_style=False, sort_keys=False),
|
|
542
|
+
params=params,
|
|
543
|
+
)
|
|
544
|
+
|
|
545
|
+
except Exception as e:
|
|
546
|
+
return StructuredToolResult(
|
|
547
|
+
status=ToolResultStatus.ERROR,
|
|
548
|
+
error=f"Error fetching traces: {str(e)}",
|
|
549
|
+
params=params,
|
|
550
|
+
)
|
|
551
|
+
|
|
552
|
+
def get_parameterized_one_liner(self, params: Dict) -> str:
|
|
553
|
+
return f"{toolset_name_for_one_liner(self._toolset.name)}: Simple Tempo Traces Comparison"
|
|
554
|
+
|
|
555
|
+
|
|
294
556
|
class GrafanaTempoToolset(BaseGrafanaTempoToolset):
|
|
295
557
|
def __init__(self):
|
|
296
558
|
super().__init__(
|
|
297
559
|
name="grafana/tempo",
|
|
298
560
|
description="Fetches kubernetes traces from Tempo",
|
|
299
561
|
icon_url="https://grafana.com/static/assets/img/blog/tempo.png",
|
|
300
|
-
docs_url="https://
|
|
301
|
-
tools=[
|
|
562
|
+
docs_url="https://holmesgpt.dev/data-sources/builtin-toolsets/grafanatempo/",
|
|
563
|
+
tools=[
|
|
564
|
+
FetchTracesSimpleComparison(self),
|
|
565
|
+
GetTempoTraces(self),
|
|
566
|
+
GetTempoTraceById(self),
|
|
567
|
+
GetTempoTags(self),
|
|
568
|
+
],
|
|
302
569
|
)
|
|
303
570
|
template_file_path = os.path.abspath(
|
|
304
571
|
os.path.join(os.path.dirname(__file__), "toolset_grafana_tempo.jinja2")
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
toolsets:
|
|
2
2
|
helm/core:
|
|
3
3
|
description: "Read access to cluster's Helm charts and releases"
|
|
4
|
-
docs_url: "https://
|
|
4
|
+
docs_url: "https://holmesgpt.dev/data-sources/builtin-toolsets/helm/"
|
|
5
5
|
icon_url: "https://helm.sh/img/helm.svg"
|
|
6
6
|
tags:
|
|
7
7
|
- core
|
|
@@ -186,7 +186,9 @@ class FetchWebpage(Tool):
|
|
|
186
186
|
toolset=toolset, # type: ignore
|
|
187
187
|
)
|
|
188
188
|
|
|
189
|
-
def _invoke(
|
|
189
|
+
def _invoke(
|
|
190
|
+
self, params: dict, user_approved: bool = False
|
|
191
|
+
) -> StructuredToolResult:
|
|
190
192
|
url: str = params["url"]
|
|
191
193
|
|
|
192
194
|
additional_headers = (
|
|
@@ -268,7 +270,7 @@ class InternetToolset(InternetBaseToolset):
|
|
|
268
270
|
tools=[
|
|
269
271
|
FetchWebpage(self),
|
|
270
272
|
],
|
|
271
|
-
docs_url="https://
|
|
273
|
+
docs_url="https://holmesgpt.dev/data-sources/builtin-toolsets/internet/",
|
|
272
274
|
tags=[
|
|
273
275
|
ToolsetTag.CORE,
|
|
274
276
|
],
|
|
@@ -44,7 +44,9 @@ class FetchNotion(Tool):
|
|
|
44
44
|
return f"https://api.notion.com/v1/blocks/{notion_id}/children"
|
|
45
45
|
return url # Return original URL if no match is found
|
|
46
46
|
|
|
47
|
-
def _invoke(
|
|
47
|
+
def _invoke(
|
|
48
|
+
self, params: dict, user_approved: bool = False
|
|
49
|
+
) -> StructuredToolResult:
|
|
48
50
|
url: str = params["url"]
|
|
49
51
|
|
|
50
52
|
# Get headers from the toolset configuration
|
|
@@ -118,7 +120,7 @@ class NotionToolset(InternetBaseToolset):
|
|
|
118
120
|
name="notion",
|
|
119
121
|
description="Fetch notion webpages",
|
|
120
122
|
icon_url="https://upload.wikimedia.org/wikipedia/commons/thumb/e/e9/Notion-logo.svg/2048px-Notion-logo.svg.png",
|
|
121
|
-
docs_url="https://
|
|
123
|
+
docs_url="https://holmesgpt.dev/data-sources/builtin-toolsets/notion/",
|
|
122
124
|
tools=[
|
|
123
125
|
FetchNotion(self),
|
|
124
126
|
],
|
|
@@ -3,10 +3,7 @@ import os
|
|
|
3
3
|
from typing import Any, Dict
|
|
4
4
|
|
|
5
5
|
from uuid import uuid4
|
|
6
|
-
from holmes.core.
|
|
7
|
-
get_todo_manager,
|
|
8
|
-
)
|
|
9
|
-
|
|
6
|
+
from holmes.core.todo_tasks_formatter import format_tasks
|
|
10
7
|
from holmes.core.tools import (
|
|
11
8
|
Toolset,
|
|
12
9
|
ToolsetTag,
|
|
@@ -35,11 +32,6 @@ class TodoWriteTool(Tool):
|
|
|
35
32
|
},
|
|
36
33
|
),
|
|
37
34
|
),
|
|
38
|
-
"investigation_id": ToolParameter(
|
|
39
|
-
description="This investigation identifier. This is a uuid that represents the investigation session id.",
|
|
40
|
-
type="string",
|
|
41
|
-
required=True,
|
|
42
|
-
),
|
|
43
35
|
}
|
|
44
36
|
|
|
45
37
|
# Print a nice table to console/log
|
|
@@ -82,7 +74,9 @@ class TodoWriteTool(Tool):
|
|
|
82
74
|
|
|
83
75
|
logging.info(separator)
|
|
84
76
|
|
|
85
|
-
def _invoke(
|
|
77
|
+
def _invoke(
|
|
78
|
+
self, params: dict, user_approved: bool = False
|
|
79
|
+
) -> StructuredToolResult:
|
|
86
80
|
try:
|
|
87
81
|
todos_data = params.get("todos", [])
|
|
88
82
|
|
|
@@ -99,14 +93,8 @@ class TodoWriteTool(Tool):
|
|
|
99
93
|
|
|
100
94
|
logging.info(f"Tasks: {len(tasks)}")
|
|
101
95
|
|
|
102
|
-
# Store tasks in session storage
|
|
103
|
-
todo_manager = get_todo_manager()
|
|
104
|
-
session_id = params.get("investigation_id", "")
|
|
105
|
-
todo_manager.update_session_tasks(session_id, tasks)
|
|
106
|
-
|
|
107
96
|
self.print_tasks_table(tasks)
|
|
108
|
-
|
|
109
|
-
formatted_tasks = todo_manager.format_tasks_for_prompt(session_id)
|
|
97
|
+
formatted_tasks = format_tasks(tasks)
|
|
110
98
|
|
|
111
99
|
response_data = f"✅ Investigation plan updated with {len(tasks)} tasks. Tasks are now stored in session and will appear in subsequent prompts.\n\n"
|
|
112
100
|
if formatted_tasks:
|
|
@@ -59,12 +59,8 @@ The user will primarily request you perform reliability troubleshooting and inci
|
|
|
59
59
|
|
|
60
60
|
You MUST answer concisely with fewer than 4 lines of text (not including tool use or code generation), unless user asks for detail.
|
|
61
61
|
|
|
62
|
-
IMPORTANT: Refuse to write code or explain code that may be used maliciously; even if the user claims it is for educational purposes. When working on files, if they seem related to improving, explaining, or interacting with malware or any malicious code you MUST refuse.
|
|
63
|
-
IMPORTANT: Before you begin work, think about what the code you're editing is supposed to do based on the filenames directory structure. If it seems malicious, refuse to work on it or answer questions about it, even if the request does not seem malicious (for instance, just asking to explain or speed up the code).
|
|
64
|
-
|
|
65
62
|
IMPORTANT: Always use the TodoWrite tool to plan and track tasks throughout the conversation.
|
|
66
63
|
|
|
67
|
-
|
|
68
64
|
# TodoWrite
|
|
69
65
|
Use this tool to create and manage a structured task list for your current coding session. This helps you track progress, organize complex tasks, and demonstrate thoroughness to the user.
|
|
70
66
|
It also helps the user understand the progress of the task and overall progress of their requests.
|
|
@@ -221,7 +217,7 @@ The assistant did not use the todo list because this is a single command executi
|
|
|
221
217
|
2. **Task Management**:
|
|
222
218
|
- Update task status in real-time as you work
|
|
223
219
|
- Mark tasks complete IMMEDIATELY after finishing (don't batch completions)
|
|
224
|
-
-
|
|
220
|
+
- If tasks are not dependent on one another, handle multiple tasks in parallel and mark them in_progress.
|
|
225
221
|
|
|
226
222
|
3. **Task Completion Requirements**:
|
|
227
223
|
- ONLY mark a task as completed when you have FULLY accomplished it
|
holmes/plugins/toolsets/kafka.py
CHANGED
|
@@ -153,7 +153,9 @@ class ListKafkaConsumers(BaseKafkaTool):
|
|
|
153
153
|
toolset=toolset,
|
|
154
154
|
)
|
|
155
155
|
|
|
156
|
-
def _invoke(
|
|
156
|
+
def _invoke(
|
|
157
|
+
self, params: dict, user_approved: bool = False
|
|
158
|
+
) -> StructuredToolResult:
|
|
157
159
|
try:
|
|
158
160
|
kafka_cluster_name = get_param_or_raise(params, "kafka_cluster_name")
|
|
159
161
|
client = self.get_kafka_client(kafka_cluster_name)
|
|
@@ -226,7 +228,9 @@ class DescribeConsumerGroup(BaseKafkaTool):
|
|
|
226
228
|
toolset=toolset,
|
|
227
229
|
)
|
|
228
230
|
|
|
229
|
-
def _invoke(
|
|
231
|
+
def _invoke(
|
|
232
|
+
self, params: dict, user_approved: bool = False
|
|
233
|
+
) -> StructuredToolResult:
|
|
230
234
|
group_id = params["group_id"]
|
|
231
235
|
try:
|
|
232
236
|
kafka_cluster_name = get_param_or_raise(params, "kafka_cluster_name")
|
|
@@ -282,7 +286,9 @@ class ListTopics(BaseKafkaTool):
|
|
|
282
286
|
toolset=toolset,
|
|
283
287
|
)
|
|
284
288
|
|
|
285
|
-
def _invoke(
|
|
289
|
+
def _invoke(
|
|
290
|
+
self, params: dict, user_approved: bool = False
|
|
291
|
+
) -> StructuredToolResult:
|
|
286
292
|
try:
|
|
287
293
|
kafka_cluster_name = get_param_or_raise(params, "kafka_cluster_name")
|
|
288
294
|
client = self.get_kafka_client(kafka_cluster_name)
|
|
@@ -338,7 +344,9 @@ class DescribeTopic(BaseKafkaTool):
|
|
|
338
344
|
toolset=toolset,
|
|
339
345
|
)
|
|
340
346
|
|
|
341
|
-
def _invoke(
|
|
347
|
+
def _invoke(
|
|
348
|
+
self, params: dict, user_approved: bool = False
|
|
349
|
+
) -> StructuredToolResult:
|
|
342
350
|
topic_name = params["topic_name"]
|
|
343
351
|
try:
|
|
344
352
|
kafka_cluster_name = get_param_or_raise(params, "kafka_cluster_name")
|
|
@@ -461,7 +469,9 @@ class FindConsumerGroupsByTopic(BaseKafkaTool):
|
|
|
461
469
|
toolset=toolset,
|
|
462
470
|
)
|
|
463
471
|
|
|
464
|
-
def _invoke(
|
|
472
|
+
def _invoke(
|
|
473
|
+
self, params: dict, user_approved: bool = False
|
|
474
|
+
) -> StructuredToolResult:
|
|
465
475
|
topic_name = params["topic_name"]
|
|
466
476
|
try:
|
|
467
477
|
kafka_cluster_name = get_param_or_raise(params, "kafka_cluster_name")
|
|
@@ -549,7 +559,9 @@ class ListKafkaClusters(BaseKafkaTool):
|
|
|
549
559
|
toolset=toolset,
|
|
550
560
|
)
|
|
551
561
|
|
|
552
|
-
def _invoke(
|
|
562
|
+
def _invoke(
|
|
563
|
+
self, params: dict, user_approved: bool = False
|
|
564
|
+
) -> StructuredToolResult:
|
|
553
565
|
cluster_names = list(self.toolset.clients.keys())
|
|
554
566
|
return StructuredToolResult(
|
|
555
567
|
status=ToolResultStatus.SUCCESS,
|
|
@@ -571,7 +583,7 @@ class KafkaToolset(Toolset):
|
|
|
571
583
|
name="kafka/admin",
|
|
572
584
|
description="Fetches metadata from multiple Kafka clusters",
|
|
573
585
|
prerequisites=[CallablePrerequisite(callable=self.prerequisites_callable)],
|
|
574
|
-
docs_url="https://
|
|
586
|
+
docs_url="https://holmesgpt.dev/data-sources/builtin-toolsets/kafka/",
|
|
575
587
|
icon_url="https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcT-cR1JrBgJxB_SPVKUIRwtiHnR8qBvLeHXjQ&s",
|
|
576
588
|
tags=[ToolsetTag.CORE],
|
|
577
589
|
tools=[
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
toolsets:
|
|
2
2
|
kubernetes/core:
|
|
3
3
|
description: "Read access to cluster resources (excluding secrets and other sensitive data)"
|
|
4
|
-
docs_url: "https://
|
|
4
|
+
docs_url: "https://holmesgpt.dev/data-sources/builtin-toolsets/kubernetes/"
|
|
5
5
|
icon_url: "https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcRPKA-U9m5BxYQDF1O7atMfj9EMMXEoGu4t0Q&s"
|
|
6
6
|
tags:
|
|
7
7
|
- core
|
|
@@ -197,7 +197,7 @@ toolsets:
|
|
|
197
197
|
|
|
198
198
|
kubernetes/live-metrics:
|
|
199
199
|
description: "Provides real-time metrics for pods and nodes"
|
|
200
|
-
docs_url: "https://
|
|
200
|
+
docs_url: "https://holmesgpt.dev/data-sources/builtin-toolsets/kubernetes/"
|
|
201
201
|
icon_url: "https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcRPKA-U9m5BxYQDF1O7atMfj9EMMXEoGu4t0Q&s"
|
|
202
202
|
llm_instructions: |
|
|
203
203
|
The kubectl_top_pods or kubectl_top_nodes do not return time series data or metrics that can be used for graphs
|
|
@@ -219,7 +219,7 @@ toolsets:
|
|
|
219
219
|
|
|
220
220
|
kubernetes/kube-prometheus-stack:
|
|
221
221
|
description: "Fetches prometheus definition"
|
|
222
|
-
docs_url: "https://
|
|
222
|
+
docs_url: "https://holmesgpt.dev/data-sources/builtin-toolsets/kubernetes/"
|
|
223
223
|
icon_url: "https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcRPKA-U9m5BxYQDF1O7atMfj9EMMXEoGu4t0Q&s"
|
|
224
224
|
tags:
|
|
225
225
|
- core
|
|
@@ -230,7 +230,7 @@ toolsets:
|
|
|
230
230
|
|
|
231
231
|
kubernetes/krew-extras: # To make this work, install kube-lineage with krew
|
|
232
232
|
description: "Fetches children/dependents and parents/dependencies resources using kube-lineage installed via `kubectl krew`"
|
|
233
|
-
docs_url: "https://
|
|
233
|
+
docs_url: "https://holmesgpt.dev/data-sources/builtin-toolsets/kubernetes/"
|
|
234
234
|
icon_url: "https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcRPKA-U9m5BxYQDF1O7atMfj9EMMXEoGu4t0Q&s"
|
|
235
235
|
tags:
|
|
236
236
|
- cli
|
|
@@ -246,7 +246,7 @@ toolsets:
|
|
|
246
246
|
|
|
247
247
|
kubernetes/kube-lineage-extras: # To make this work, build kube-lineage from source
|
|
248
248
|
description: "Fetches children/dependents and parents/dependencies resources using kube-lineage"
|
|
249
|
-
docs_url: "https://
|
|
249
|
+
docs_url: "https://holmesgpt.dev/data-sources/builtin-toolsets/kubernetes/"
|
|
250
250
|
icon_url: "https://encrypted-tbn0.gstatic.com/images?q=tbn:ANd9GcRPKA-U9m5BxYQDF1O7atMfj9EMMXEoGu4t0Q&s"
|
|
251
251
|
tags:
|
|
252
252
|
- cluster
|