holmesgpt 0.13.3a0__py3-none-any.whl → 0.14.1a0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of holmesgpt might be problematic. Click here for more details.
- holmes/__init__.py +1 -1
- holmes/clients/robusta_client.py +10 -2
- holmes/common/env_vars.py +8 -1
- holmes/config.py +66 -139
- holmes/core/investigation.py +1 -2
- holmes/core/llm.py +256 -51
- holmes/core/models.py +2 -0
- holmes/core/safeguards.py +4 -4
- holmes/core/supabase_dal.py +14 -8
- holmes/core/tool_calling_llm.py +193 -176
- holmes/core/tools.py +260 -25
- holmes/core/tools_utils/data_types.py +81 -0
- holmes/core/tools_utils/tool_context_window_limiter.py +33 -0
- holmes/core/tools_utils/tool_executor.py +2 -2
- holmes/core/toolset_manager.py +150 -3
- holmes/core/tracing.py +6 -1
- holmes/core/transformers/__init__.py +23 -0
- holmes/core/transformers/base.py +62 -0
- holmes/core/transformers/llm_summarize.py +174 -0
- holmes/core/transformers/registry.py +122 -0
- holmes/core/transformers/transformer.py +31 -0
- holmes/main.py +5 -0
- holmes/plugins/toolsets/aks-node-health.yaml +46 -0
- holmes/plugins/toolsets/aks.yaml +64 -0
- holmes/plugins/toolsets/atlas_mongodb/mongodb_atlas.py +17 -15
- holmes/plugins/toolsets/azure_sql/tools/analyze_connection_failures.py +8 -4
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_connections.py +7 -3
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_health_status.py +3 -3
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_performance.py +3 -3
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_storage.py +7 -3
- holmes/plugins/toolsets/azure_sql/tools/get_active_alerts.py +4 -4
- holmes/plugins/toolsets/azure_sql/tools/get_slow_queries.py +7 -3
- holmes/plugins/toolsets/azure_sql/tools/get_top_cpu_queries.py +7 -3
- holmes/plugins/toolsets/azure_sql/tools/get_top_data_io_queries.py +7 -3
- holmes/plugins/toolsets/azure_sql/tools/get_top_log_io_queries.py +7 -3
- holmes/plugins/toolsets/bash/bash_toolset.py +6 -6
- holmes/plugins/toolsets/bash/common/bash.py +7 -7
- holmes/plugins/toolsets/coralogix/toolset_coralogix_logs.py +5 -3
- holmes/plugins/toolsets/datadog/toolset_datadog_general.py +16 -17
- holmes/plugins/toolsets/datadog/toolset_datadog_logs.py +9 -10
- holmes/plugins/toolsets/datadog/toolset_datadog_metrics.py +21 -22
- holmes/plugins/toolsets/datadog/toolset_datadog_rds.py +8 -8
- holmes/plugins/toolsets/datadog/toolset_datadog_traces.py +18 -19
- holmes/plugins/toolsets/git.py +22 -22
- holmes/plugins/toolsets/grafana/common.py +14 -2
- holmes/plugins/toolsets/grafana/grafana_tempo_api.py +473 -0
- holmes/plugins/toolsets/grafana/toolset_grafana.py +4 -4
- holmes/plugins/toolsets/grafana/toolset_grafana_loki.py +3 -3
- holmes/plugins/toolsets/grafana/toolset_grafana_tempo.jinja2 +246 -11
- holmes/plugins/toolsets/grafana/toolset_grafana_tempo.py +662 -290
- holmes/plugins/toolsets/grafana/trace_parser.py +1 -1
- holmes/plugins/toolsets/internet/internet.py +3 -3
- holmes/plugins/toolsets/internet/notion.py +3 -3
- holmes/plugins/toolsets/investigator/core_investigation.py +3 -3
- holmes/plugins/toolsets/kafka.py +18 -18
- holmes/plugins/toolsets/kubernetes.yaml +58 -0
- holmes/plugins/toolsets/kubernetes_logs.py +6 -6
- holmes/plugins/toolsets/kubernetes_logs.yaml +32 -0
- holmes/plugins/toolsets/mcp/toolset_mcp.py +4 -4
- holmes/plugins/toolsets/newrelic.py +8 -8
- holmes/plugins/toolsets/opensearch/opensearch.py +5 -5
- holmes/plugins/toolsets/opensearch/opensearch_logs.py +7 -7
- holmes/plugins/toolsets/opensearch/opensearch_traces.py +10 -10
- holmes/plugins/toolsets/prometheus/prometheus.py +172 -39
- holmes/plugins/toolsets/prometheus/prometheus_instructions.jinja2 +25 -0
- holmes/plugins/toolsets/prometheus/utils.py +28 -0
- holmes/plugins/toolsets/rabbitmq/toolset_rabbitmq.py +6 -4
- holmes/plugins/toolsets/robusta/robusta.py +10 -10
- holmes/plugins/toolsets/runbook/runbook_fetcher.py +4 -4
- holmes/plugins/toolsets/servicenow/servicenow.py +6 -6
- holmes/plugins/toolsets/utils.py +88 -0
- holmes/utils/config_utils.py +91 -0
- holmes/utils/env.py +7 -0
- holmes/utils/holmes_status.py +2 -1
- holmes/utils/sentry_helper.py +41 -0
- holmes/utils/stream.py +9 -0
- {holmesgpt-0.13.3a0.dist-info → holmesgpt-0.14.1a0.dist-info}/METADATA +10 -14
- {holmesgpt-0.13.3a0.dist-info → holmesgpt-0.14.1a0.dist-info}/RECORD +81 -71
- holmes/plugins/toolsets/grafana/tempo_api.py +0 -124
- {holmesgpt-0.13.3a0.dist-info → holmesgpt-0.14.1a0.dist-info}/LICENSE.txt +0 -0
- {holmesgpt-0.13.3a0.dist-info → holmesgpt-0.14.1a0.dist-info}/WHEEL +0 -0
- {holmesgpt-0.13.3a0.dist-info → holmesgpt-0.14.1a0.dist-info}/entry_points.txt +0 -0
|
@@ -187,7 +187,7 @@ def format_traces_list(trace_data: Dict) -> str:
|
|
|
187
187
|
else "\n"
|
|
188
188
|
)
|
|
189
189
|
trace_str += f"\tstartTime={unix_nano_to_rfc3339(int(trace.get('startTimeUnixNano')))}"
|
|
190
|
-
trace_str += f" rootServiceName={trace.get('
|
|
190
|
+
trace_str += f" rootServiceName={trace.get('rootServiceName')}"
|
|
191
191
|
trace_str += f" rootTraceName={trace.get('rootTraceName')}"
|
|
192
192
|
traces_str.append(trace_str)
|
|
193
193
|
return "\n".join(traces_str)
|
|
@@ -15,7 +15,7 @@ from markdownify import markdownify
|
|
|
15
15
|
from bs4 import BeautifulSoup
|
|
16
16
|
|
|
17
17
|
import requests # type: ignore
|
|
18
|
-
from holmes.core.tools import StructuredToolResult,
|
|
18
|
+
from holmes.core.tools import StructuredToolResult, StructuredToolResultStatus
|
|
19
19
|
from holmes.plugins.toolsets.utils import toolset_name_for_one_liner
|
|
20
20
|
|
|
21
21
|
|
|
@@ -199,7 +199,7 @@ class FetchWebpage(Tool):
|
|
|
199
199
|
if not content:
|
|
200
200
|
logging.error(f"Failed to retrieve content from {url}")
|
|
201
201
|
return StructuredToolResult(
|
|
202
|
-
status=
|
|
202
|
+
status=StructuredToolResultStatus.ERROR,
|
|
203
203
|
error=f"Failed to retrieve content from {url}",
|
|
204
204
|
params=params,
|
|
205
205
|
)
|
|
@@ -211,7 +211,7 @@ class FetchWebpage(Tool):
|
|
|
211
211
|
content = html_to_markdown(content)
|
|
212
212
|
|
|
213
213
|
return StructuredToolResult(
|
|
214
|
-
status=
|
|
214
|
+
status=StructuredToolResultStatus.SUCCESS,
|
|
215
215
|
data=content,
|
|
216
216
|
params=params,
|
|
217
217
|
)
|
|
@@ -13,7 +13,7 @@ from holmes.plugins.toolsets.internet.internet import (
|
|
|
13
13
|
)
|
|
14
14
|
from holmes.core.tools import (
|
|
15
15
|
StructuredToolResult,
|
|
16
|
-
|
|
16
|
+
StructuredToolResultStatus,
|
|
17
17
|
)
|
|
18
18
|
from holmes.plugins.toolsets.utils import toolset_name_for_one_liner
|
|
19
19
|
|
|
@@ -59,13 +59,13 @@ class FetchNotion(Tool):
|
|
|
59
59
|
if not content:
|
|
60
60
|
logging.error(f"Failed to retrieve content from {url}")
|
|
61
61
|
return StructuredToolResult(
|
|
62
|
-
status=
|
|
62
|
+
status=StructuredToolResultStatus.ERROR,
|
|
63
63
|
error=f"Failed to retrieve content from {url}",
|
|
64
64
|
params=params,
|
|
65
65
|
)
|
|
66
66
|
|
|
67
67
|
return StructuredToolResult(
|
|
68
|
-
status=
|
|
68
|
+
status=StructuredToolResultStatus.SUCCESS,
|
|
69
69
|
data=self.parse_notion_content(content),
|
|
70
70
|
params=params,
|
|
71
71
|
)
|
|
@@ -10,7 +10,7 @@ from holmes.core.tools import (
|
|
|
10
10
|
ToolParameter,
|
|
11
11
|
Tool,
|
|
12
12
|
StructuredToolResult,
|
|
13
|
-
|
|
13
|
+
StructuredToolResultStatus,
|
|
14
14
|
)
|
|
15
15
|
from holmes.plugins.toolsets.investigator.model import Task, TaskStatus
|
|
16
16
|
|
|
@@ -103,7 +103,7 @@ class TodoWriteTool(Tool):
|
|
|
103
103
|
response_data += "No tasks currently in the investigation plan."
|
|
104
104
|
|
|
105
105
|
return StructuredToolResult(
|
|
106
|
-
status=
|
|
106
|
+
status=StructuredToolResultStatus.SUCCESS,
|
|
107
107
|
data=response_data,
|
|
108
108
|
params=params,
|
|
109
109
|
)
|
|
@@ -111,7 +111,7 @@ class TodoWriteTool(Tool):
|
|
|
111
111
|
except Exception as e:
|
|
112
112
|
logging.exception("error using todowrite tool")
|
|
113
113
|
return StructuredToolResult(
|
|
114
|
-
status=
|
|
114
|
+
status=StructuredToolResultStatus.ERROR,
|
|
115
115
|
error=f"Failed to process tasks: {str(e)}",
|
|
116
116
|
params=params,
|
|
117
117
|
)
|
holmes/plugins/toolsets/kafka.py
CHANGED
|
@@ -28,7 +28,7 @@ from holmes.core.tools import (
|
|
|
28
28
|
StructuredToolResult,
|
|
29
29
|
Tool,
|
|
30
30
|
ToolParameter,
|
|
31
|
-
|
|
31
|
+
StructuredToolResultStatus,
|
|
32
32
|
Toolset,
|
|
33
33
|
ToolsetTag,
|
|
34
34
|
)
|
|
@@ -161,7 +161,7 @@ class ListKafkaConsumers(BaseKafkaTool):
|
|
|
161
161
|
client = self.get_kafka_client(kafka_cluster_name)
|
|
162
162
|
if client is None:
|
|
163
163
|
return StructuredToolResult(
|
|
164
|
-
status=
|
|
164
|
+
status=StructuredToolResultStatus.ERROR,
|
|
165
165
|
error="No admin_client on toolset. This toolset is misconfigured.",
|
|
166
166
|
params=params,
|
|
167
167
|
)
|
|
@@ -190,7 +190,7 @@ class ListKafkaConsumers(BaseKafkaTool):
|
|
|
190
190
|
if errors_text:
|
|
191
191
|
result_text = result_text + "\n\n" + errors_text
|
|
192
192
|
return StructuredToolResult(
|
|
193
|
-
status=
|
|
193
|
+
status=StructuredToolResultStatus.SUCCESS,
|
|
194
194
|
data=result_text,
|
|
195
195
|
params=params,
|
|
196
196
|
)
|
|
@@ -198,7 +198,7 @@ class ListKafkaConsumers(BaseKafkaTool):
|
|
|
198
198
|
error_msg = f"Failed to list consumer groups: {str(e)}"
|
|
199
199
|
logging.error(error_msg)
|
|
200
200
|
return StructuredToolResult(
|
|
201
|
-
status=
|
|
201
|
+
status=StructuredToolResultStatus.ERROR,
|
|
202
202
|
error=error_msg,
|
|
203
203
|
params=params,
|
|
204
204
|
)
|
|
@@ -237,7 +237,7 @@ class DescribeConsumerGroup(BaseKafkaTool):
|
|
|
237
237
|
client = self.get_kafka_client(kafka_cluster_name)
|
|
238
238
|
if client is None:
|
|
239
239
|
return StructuredToolResult(
|
|
240
|
-
status=
|
|
240
|
+
status=StructuredToolResultStatus.ERROR,
|
|
241
241
|
error="No admin_client on toolset. This toolset is misconfigured.",
|
|
242
242
|
params=params,
|
|
243
243
|
)
|
|
@@ -247,13 +247,13 @@ class DescribeConsumerGroup(BaseKafkaTool):
|
|
|
247
247
|
if futures.get(group_id):
|
|
248
248
|
group_metadata = futures.get(group_id).result()
|
|
249
249
|
return StructuredToolResult(
|
|
250
|
-
status=
|
|
250
|
+
status=StructuredToolResultStatus.SUCCESS,
|
|
251
251
|
data=yaml.dump(convert_to_dict(group_metadata)),
|
|
252
252
|
params=params,
|
|
253
253
|
)
|
|
254
254
|
else:
|
|
255
255
|
return StructuredToolResult(
|
|
256
|
-
status=
|
|
256
|
+
status=StructuredToolResultStatus.ERROR,
|
|
257
257
|
error="Group not found",
|
|
258
258
|
params=params,
|
|
259
259
|
)
|
|
@@ -261,7 +261,7 @@ class DescribeConsumerGroup(BaseKafkaTool):
|
|
|
261
261
|
error_msg = f"Failed to describe consumer group {group_id}: {str(e)}"
|
|
262
262
|
logging.error(error_msg)
|
|
263
263
|
return StructuredToolResult(
|
|
264
|
-
status=
|
|
264
|
+
status=StructuredToolResultStatus.ERROR,
|
|
265
265
|
error=error_msg,
|
|
266
266
|
params=params,
|
|
267
267
|
)
|
|
@@ -294,14 +294,14 @@ class ListTopics(BaseKafkaTool):
|
|
|
294
294
|
client = self.get_kafka_client(kafka_cluster_name)
|
|
295
295
|
if client is None:
|
|
296
296
|
return StructuredToolResult(
|
|
297
|
-
status=
|
|
297
|
+
status=StructuredToolResultStatus.ERROR,
|
|
298
298
|
error="No admin_client on toolset. This toolset is misconfigured.",
|
|
299
299
|
params=params,
|
|
300
300
|
)
|
|
301
301
|
|
|
302
302
|
topics = client.list_topics()
|
|
303
303
|
return StructuredToolResult(
|
|
304
|
-
status=
|
|
304
|
+
status=StructuredToolResultStatus.SUCCESS,
|
|
305
305
|
data=yaml.dump(convert_to_dict(topics)),
|
|
306
306
|
params=params,
|
|
307
307
|
)
|
|
@@ -309,7 +309,7 @@ class ListTopics(BaseKafkaTool):
|
|
|
309
309
|
error_msg = f"Failed to list topics: {str(e)}"
|
|
310
310
|
logging.error(error_msg)
|
|
311
311
|
return StructuredToolResult(
|
|
312
|
-
status=
|
|
312
|
+
status=StructuredToolResultStatus.ERROR,
|
|
313
313
|
error=error_msg,
|
|
314
314
|
params=params,
|
|
315
315
|
)
|
|
@@ -353,7 +353,7 @@ class DescribeTopic(BaseKafkaTool):
|
|
|
353
353
|
client = self.get_kafka_client(kafka_cluster_name)
|
|
354
354
|
if client is None:
|
|
355
355
|
return StructuredToolResult(
|
|
356
|
-
status=
|
|
356
|
+
status=StructuredToolResultStatus.ERROR,
|
|
357
357
|
error="No admin_client on toolset. This toolset is misconfigured.",
|
|
358
358
|
params=params,
|
|
359
359
|
)
|
|
@@ -373,7 +373,7 @@ class DescribeTopic(BaseKafkaTool):
|
|
|
373
373
|
result["configuration"] = convert_to_dict(config)
|
|
374
374
|
|
|
375
375
|
return StructuredToolResult(
|
|
376
|
-
status=
|
|
376
|
+
status=StructuredToolResultStatus.SUCCESS,
|
|
377
377
|
data=yaml.dump(result),
|
|
378
378
|
params=params,
|
|
379
379
|
)
|
|
@@ -381,7 +381,7 @@ class DescribeTopic(BaseKafkaTool):
|
|
|
381
381
|
error_msg = f"Failed to describe topic {topic_name}: {str(e)}"
|
|
382
382
|
logging.error(error_msg, exc_info=True)
|
|
383
383
|
return StructuredToolResult(
|
|
384
|
-
status=
|
|
384
|
+
status=StructuredToolResultStatus.ERROR,
|
|
385
385
|
error=error_msg,
|
|
386
386
|
params=params,
|
|
387
387
|
)
|
|
@@ -478,7 +478,7 @@ class FindConsumerGroupsByTopic(BaseKafkaTool):
|
|
|
478
478
|
client = self.get_kafka_client(kafka_cluster_name)
|
|
479
479
|
if client is None:
|
|
480
480
|
return StructuredToolResult(
|
|
481
|
-
status=
|
|
481
|
+
status=StructuredToolResultStatus.ERROR,
|
|
482
482
|
error="No admin_client on toolset. This toolset is misconfigured.",
|
|
483
483
|
params=params,
|
|
484
484
|
)
|
|
@@ -530,7 +530,7 @@ class FindConsumerGroupsByTopic(BaseKafkaTool):
|
|
|
530
530
|
result_text = result_text + "\n\n" + errors_text
|
|
531
531
|
|
|
532
532
|
return StructuredToolResult(
|
|
533
|
-
status=
|
|
533
|
+
status=StructuredToolResultStatus.SUCCESS,
|
|
534
534
|
data=result_text,
|
|
535
535
|
params=params,
|
|
536
536
|
)
|
|
@@ -540,7 +540,7 @@ class FindConsumerGroupsByTopic(BaseKafkaTool):
|
|
|
540
540
|
)
|
|
541
541
|
logging.error(error_msg)
|
|
542
542
|
return StructuredToolResult(
|
|
543
|
-
status=
|
|
543
|
+
status=StructuredToolResultStatus.ERROR,
|
|
544
544
|
error=error_msg,
|
|
545
545
|
params=params,
|
|
546
546
|
)
|
|
@@ -564,7 +564,7 @@ class ListKafkaClusters(BaseKafkaTool):
|
|
|
564
564
|
) -> StructuredToolResult:
|
|
565
565
|
cluster_names = list(self.toolset.clients.keys())
|
|
566
566
|
return StructuredToolResult(
|
|
567
|
-
status=
|
|
567
|
+
status=StructuredToolResultStatus.SUCCESS,
|
|
568
568
|
data="Available Kafka Clusters:\n" + "\n".join(cluster_names),
|
|
569
569
|
params=params,
|
|
570
570
|
)
|
|
@@ -8,6 +8,10 @@ toolsets:
|
|
|
8
8
|
prerequisites:
|
|
9
9
|
- command: "kubectl version --client"
|
|
10
10
|
|
|
11
|
+
# Note: Many tools in this toolset use transformers with llm_summarize
|
|
12
|
+
# to automatically summarize large kubectl outputs when a fast model is configured.
|
|
13
|
+
# This reduces context window usage while preserving key information for debugging.
|
|
14
|
+
|
|
11
15
|
tools:
|
|
12
16
|
- name: "kubectl_describe"
|
|
13
17
|
description: >
|
|
@@ -17,6 +21,20 @@ toolsets:
|
|
|
17
21
|
- 'describe pod xyz-123'
|
|
18
22
|
- 'show service xyz-123 in namespace my-ns'
|
|
19
23
|
command: "kubectl describe {{ kind }} {{ name }}{% if namespace %} -n {{ namespace }}{% endif %}"
|
|
24
|
+
transformers:
|
|
25
|
+
- name: llm_summarize
|
|
26
|
+
config:
|
|
27
|
+
input_threshold: 1000
|
|
28
|
+
prompt: |
|
|
29
|
+
Summarize this kubectl describe output focusing on:
|
|
30
|
+
- What needs attention or immediate action
|
|
31
|
+
- Resource status and health indicators
|
|
32
|
+
- Any errors, warnings, or non-standard states
|
|
33
|
+
- Key configuration details that could affect functionality
|
|
34
|
+
- When possible, mention exact field names so the user can grep for specific details
|
|
35
|
+
- Be concise: aim for ≤ 50% of the original length; avoid repeating defaults/healthy/unchanged details
|
|
36
|
+
- Prefer aggregates and counts; list only outliers and actionable items
|
|
37
|
+
- Keep grep-friendly: include exact field names/values that matter
|
|
20
38
|
|
|
21
39
|
- name: "kubectl_get_by_name"
|
|
22
40
|
description: "Run `kubectl get <kind> <name> --show-labels`"
|
|
@@ -25,10 +43,36 @@ toolsets:
|
|
|
25
43
|
- name: "kubectl_get_by_kind_in_namespace"
|
|
26
44
|
description: "Run `kubectl get <kind> -n <namespace> --show-labels` to get all resources of a given type in namespace"
|
|
27
45
|
command: "kubectl get --show-labels -o wide {{ kind }} -n {{namespace}}"
|
|
46
|
+
transformers:
|
|
47
|
+
- name: llm_summarize
|
|
48
|
+
config:
|
|
49
|
+
input_threshold: 1000
|
|
50
|
+
prompt: |
|
|
51
|
+
Summarize this kubectl output focusing on:
|
|
52
|
+
- What needs attention or immediate action
|
|
53
|
+
- Group similar resources into aggregate descriptions
|
|
54
|
+
- Make sure to mention outliers, errors, and non-standard states
|
|
55
|
+
- List healthy resources as aggregate descriptions
|
|
56
|
+
- When listing unhealthy resources, also try to use aggregate descriptions when possible
|
|
57
|
+
- When possible, mention exact keywords so the user can rerun the command with | grep <keyword> and drill down
|
|
58
|
+
- Be concise and avoid expansion: target ≤ 50% of input size; prefer counts + outliers over full listings
|
|
28
59
|
|
|
29
60
|
- name: "kubectl_get_by_kind_in_cluster"
|
|
30
61
|
description: "Run `kubectl get -A <kind> --show-labels` to get all resources of a given type in the cluster"
|
|
31
62
|
command: "kubectl get -A --show-labels -o wide {{ kind }}"
|
|
63
|
+
transformers:
|
|
64
|
+
- name: llm_summarize
|
|
65
|
+
config:
|
|
66
|
+
input_threshold: 1000
|
|
67
|
+
prompt: |
|
|
68
|
+
Summarize this kubectl output focusing on:
|
|
69
|
+
- What needs attention or immediate action
|
|
70
|
+
- Group similar resources into a single line and description
|
|
71
|
+
- Make sure to mention outliers, errors, and non-standard states
|
|
72
|
+
- List healthy resources as aggregate descriptions
|
|
73
|
+
- When listing unhealthy resources, also try to use aggregate descriptions when possible
|
|
74
|
+
- When possible, mention exact keywords so the user can rerun the command with | grep <keyword> and drill down on the parts they care about
|
|
75
|
+
- Strive for ≤ 50% of the original size; keep results compact and grep-friendly (one line per aggregate)
|
|
32
76
|
|
|
33
77
|
- name: "kubectl_find_resource"
|
|
34
78
|
description: "Run `kubectl get {{ kind }} -A --show-labels | grep {{ keyword }}` to find a resource where you know a substring of the name, IP, namespace, or labels"
|
|
@@ -131,6 +175,20 @@ toolsets:
|
|
|
131
175
|
description: >
|
|
132
176
|
Use kubectl to get json for all resources of a specific kind pipe the results to jq to filter them. Do not worry about escaping the jq_expr it will be done by the system on an unescaped expression that you give. e.g. give an expression like .items[] | .spec.containers[].image | select(test("^gcr.io/") | not)
|
|
133
177
|
command: kubectl get {{ kind }} --all-namespaces -o json | jq -r {{ jq_expr }}
|
|
178
|
+
transformers:
|
|
179
|
+
- name: llm_summarize
|
|
180
|
+
config:
|
|
181
|
+
input_threshold: 1000
|
|
182
|
+
prompt: |
|
|
183
|
+
Summarize this jq query output focusing on:
|
|
184
|
+
- Key patterns and commonalities in the data
|
|
185
|
+
- Notable outliers, anomalies, or items that need attention
|
|
186
|
+
- Group similar results into aggregate descriptions when possible
|
|
187
|
+
- Highlight any empty results, null values, or missing data
|
|
188
|
+
- When applicable, mention specific resource names, namespaces, or values that stand out
|
|
189
|
+
- Organize findings in a structured way that helps with troubleshooting
|
|
190
|
+
- Be concise: aim for ≤ 50% of the original text; prioritize aggregates and actionable outliers
|
|
191
|
+
- Include grep-ready keys/values; avoid repeating entire objects or unchanged defaults
|
|
134
192
|
|
|
135
193
|
- name: "kubernetes_count"
|
|
136
194
|
user_description: "Count Kubernetes Resources: kubectl get {{kind}} --all-namespaces -o json | jq -c -r {{ jq_expr }}"
|
|
@@ -10,7 +10,7 @@ from holmes.common.env_vars import KUBERNETES_LOGS_TIMEOUT_SECONDS
|
|
|
10
10
|
from holmes.core.tools import (
|
|
11
11
|
StaticPrerequisite,
|
|
12
12
|
StructuredToolResult,
|
|
13
|
-
|
|
13
|
+
StructuredToolResultStatus,
|
|
14
14
|
ToolsetTag,
|
|
15
15
|
)
|
|
16
16
|
from holmes.plugins.toolsets.logging_utils.logging_api import (
|
|
@@ -140,7 +140,7 @@ class KubernetesLogsToolset(BasePodLoggingToolset):
|
|
|
140
140
|
# Ensure both results are not None (they should always be set by the loop)
|
|
141
141
|
if current_logs_result is None or previous_logs_result is None:
|
|
142
142
|
return StructuredToolResult(
|
|
143
|
-
status=
|
|
143
|
+
status=StructuredToolResultStatus.ERROR,
|
|
144
144
|
error="Internal error: Failed to fetch logs",
|
|
145
145
|
params=params.model_dump(),
|
|
146
146
|
)
|
|
@@ -162,7 +162,7 @@ class KubernetesLogsToolset(BasePodLoggingToolset):
|
|
|
162
162
|
):
|
|
163
163
|
# Both commands failed - return error from current logs
|
|
164
164
|
return StructuredToolResult(
|
|
165
|
-
status=
|
|
165
|
+
status=StructuredToolResultStatus.ERROR,
|
|
166
166
|
error=current_logs_result.error,
|
|
167
167
|
params=params.model_dump(),
|
|
168
168
|
return_code=return_code,
|
|
@@ -206,7 +206,7 @@ class KubernetesLogsToolset(BasePodLoggingToolset):
|
|
|
206
206
|
if len(filtered_logs) == 0:
|
|
207
207
|
# Return NO_DATA status when there are no logs
|
|
208
208
|
return StructuredToolResult(
|
|
209
|
-
status=
|
|
209
|
+
status=StructuredToolResultStatus.NO_DATA,
|
|
210
210
|
data="\n".join(
|
|
211
211
|
metadata_lines
|
|
212
212
|
), # Still include metadata for context
|
|
@@ -218,7 +218,7 @@ class KubernetesLogsToolset(BasePodLoggingToolset):
|
|
|
218
218
|
response_data = formatted_logs + "\n" + "\n".join(metadata_lines)
|
|
219
219
|
|
|
220
220
|
return StructuredToolResult(
|
|
221
|
-
status=
|
|
221
|
+
status=StructuredToolResultStatus.SUCCESS,
|
|
222
222
|
data=response_data,
|
|
223
223
|
params=params.model_dump(),
|
|
224
224
|
return_code=return_code,
|
|
@@ -226,7 +226,7 @@ class KubernetesLogsToolset(BasePodLoggingToolset):
|
|
|
226
226
|
except Exception as e:
|
|
227
227
|
logging.exception(f"Error fetching logs for pod {params.pod_name}")
|
|
228
228
|
return StructuredToolResult(
|
|
229
|
-
status=
|
|
229
|
+
status=StructuredToolResultStatus.ERROR,
|
|
230
230
|
error=f"Error fetching logs: {str(e)}",
|
|
231
231
|
params=params.model_dump(),
|
|
232
232
|
)
|
|
@@ -8,6 +8,10 @@ toolsets:
|
|
|
8
8
|
prerequisites:
|
|
9
9
|
- command: "kubectl version --client"
|
|
10
10
|
|
|
11
|
+
# Note: Log tools use transformers with llm_summarize to automatically
|
|
12
|
+
# summarize large log outputs when a fast model is configured. This helps
|
|
13
|
+
# focus on errors, patterns, and key information while reducing context usage.
|
|
14
|
+
|
|
11
15
|
tools:
|
|
12
16
|
- name: "kubectl_previous_logs"
|
|
13
17
|
description: "Run `kubectl logs --previous` on a single Kubernetes pod. Used to fetch logs for a pod that crashed and see logs from before the crash. Never give a deployment name or a resource that is not a pod."
|
|
@@ -24,10 +28,38 @@ toolsets:
|
|
|
24
28
|
- name: "kubectl_logs"
|
|
25
29
|
description: "Run `kubectl logs` on a single Kubernetes pod. Never give a deployment name or a resource that is not a pod."
|
|
26
30
|
command: "kubectl logs {{pod_name}} -n {{ namespace }}"
|
|
31
|
+
transformers:
|
|
32
|
+
- name: llm_summarize
|
|
33
|
+
config:
|
|
34
|
+
input_threshold: 1000
|
|
35
|
+
prompt: |
|
|
36
|
+
Summarize these pod logs focusing on:
|
|
37
|
+
- Errors, exceptions, and warning messages
|
|
38
|
+
- Recent activity patterns and trends
|
|
39
|
+
- Any authentication, connection, or startup issues
|
|
40
|
+
- Performance indicators (response times, throughput)
|
|
41
|
+
- Group similar log entries together
|
|
42
|
+
- When possible, mention exact error codes or keywords for easier searching
|
|
43
|
+
- Be concise: aim for ≤ 50% of the original text; prioritize aggregates and actionable outliers
|
|
44
|
+
- Include grep-ready keys/values; avoid repeating entire logs or unchanged defaults
|
|
27
45
|
|
|
28
46
|
- name: "kubectl_logs_all_containers"
|
|
29
47
|
description: "Run `kubectl logs` on all containers within a single Kubernetes pod."
|
|
30
48
|
command: "kubectl logs {{pod_name}} -n {{ namespace }} --all-containers"
|
|
49
|
+
transformers:
|
|
50
|
+
- name: llm_summarize
|
|
51
|
+
config:
|
|
52
|
+
input_threshold: 1000
|
|
53
|
+
prompt: |
|
|
54
|
+
Summarize these multi-container pod logs focusing on:
|
|
55
|
+
- Errors, exceptions, and warning messages by container
|
|
56
|
+
- Inter-container communication patterns
|
|
57
|
+
- Any authentication, connection, or startup issues
|
|
58
|
+
- Performance indicators and resource usage patterns
|
|
59
|
+
- Group similar log entries together by container
|
|
60
|
+
- When possible, mention exact error codes or keywords for easier searching
|
|
61
|
+
- Strive for ≤ 50% of the original size; keep results compact and grep-friendly (one line per aggregate)
|
|
62
|
+
- Prioritize aggregates and actionable outliers over comprehensive details
|
|
31
63
|
|
|
32
64
|
- name: "kubectl_container_logs"
|
|
33
65
|
description: "Run `kubectl logs` on a single container within a Kubernetes pod. This is to get the logs of a specific container in a multi-container pod."
|
|
@@ -3,7 +3,7 @@ from holmes.core.tools import (
|
|
|
3
3
|
Tool,
|
|
4
4
|
ToolParameter,
|
|
5
5
|
StructuredToolResult,
|
|
6
|
-
|
|
6
|
+
StructuredToolResultStatus,
|
|
7
7
|
CallablePrerequisite,
|
|
8
8
|
)
|
|
9
9
|
|
|
@@ -31,7 +31,7 @@ class RemoteMCPTool(Tool):
|
|
|
31
31
|
return asyncio.run(self._invoke_async(params))
|
|
32
32
|
except Exception as e:
|
|
33
33
|
return StructuredToolResult(
|
|
34
|
-
status=
|
|
34
|
+
status=StructuredToolResultStatus.ERROR,
|
|
35
35
|
error=str(e.args),
|
|
36
36
|
params=params,
|
|
37
37
|
invocation=f"MCPtool {self.name} with params {params}",
|
|
@@ -48,9 +48,9 @@ class RemoteMCPTool(Tool):
|
|
|
48
48
|
)
|
|
49
49
|
return StructuredToolResult(
|
|
50
50
|
status=(
|
|
51
|
-
|
|
51
|
+
StructuredToolResultStatus.ERROR
|
|
52
52
|
if tool_result.isError
|
|
53
|
-
else
|
|
53
|
+
else StructuredToolResultStatus.SUCCESS
|
|
54
54
|
),
|
|
55
55
|
data=merged_text,
|
|
56
56
|
params=params,
|
|
@@ -9,7 +9,7 @@ from holmes.core.tools import (
|
|
|
9
9
|
ToolsetTag,
|
|
10
10
|
)
|
|
11
11
|
from pydantic import BaseModel
|
|
12
|
-
from holmes.core.tools import StructuredToolResult,
|
|
12
|
+
from holmes.core.tools import StructuredToolResult, StructuredToolResultStatus
|
|
13
13
|
from holmes.plugins.toolsets.utils import get_param_or_raise, toolset_name_for_one_liner
|
|
14
14
|
|
|
15
15
|
|
|
@@ -42,14 +42,14 @@ class GetLogs(BaseNewRelicTool):
|
|
|
42
42
|
) -> StructuredToolResult:
|
|
43
43
|
def success(msg: Any) -> StructuredToolResult:
|
|
44
44
|
return StructuredToolResult(
|
|
45
|
-
status=
|
|
45
|
+
status=StructuredToolResultStatus.SUCCESS,
|
|
46
46
|
data=msg,
|
|
47
47
|
params=params,
|
|
48
48
|
)
|
|
49
49
|
|
|
50
50
|
def error(msg: str) -> StructuredToolResult:
|
|
51
51
|
return StructuredToolResult(
|
|
52
|
-
status=
|
|
52
|
+
status=StructuredToolResultStatus.ERROR,
|
|
53
53
|
data=msg,
|
|
54
54
|
params=params,
|
|
55
55
|
)
|
|
@@ -79,7 +79,7 @@ class GetLogs(BaseNewRelicTool):
|
|
|
79
79
|
|
|
80
80
|
try:
|
|
81
81
|
logging.info(f"Getting New Relic logs for app {app} since {since}")
|
|
82
|
-
response = requests.post(url, headers=headers, json=query)
|
|
82
|
+
response = requests.post(url, headers=headers, json=query) # type: ignore[arg-type]
|
|
83
83
|
|
|
84
84
|
if response.status_code == 200:
|
|
85
85
|
return success(response.json())
|
|
@@ -122,14 +122,14 @@ class GetTraces(BaseNewRelicTool):
|
|
|
122
122
|
) -> StructuredToolResult:
|
|
123
123
|
def success(msg: Any) -> StructuredToolResult:
|
|
124
124
|
return StructuredToolResult(
|
|
125
|
-
status=
|
|
125
|
+
status=StructuredToolResultStatus.SUCCESS,
|
|
126
126
|
data=msg,
|
|
127
127
|
params=params,
|
|
128
128
|
)
|
|
129
129
|
|
|
130
130
|
def error(msg: str) -> StructuredToolResult:
|
|
131
131
|
return StructuredToolResult(
|
|
132
|
-
status=
|
|
132
|
+
status=StructuredToolResultStatus.ERROR,
|
|
133
133
|
data=msg,
|
|
134
134
|
params=params,
|
|
135
135
|
)
|
|
@@ -164,7 +164,7 @@ class GetTraces(BaseNewRelicTool):
|
|
|
164
164
|
|
|
165
165
|
try:
|
|
166
166
|
logging.info(f"Getting New Relic traces with duration > {duration}s")
|
|
167
|
-
response = requests.post(url, headers=headers, json=query)
|
|
167
|
+
response = requests.post(url, headers=headers, json=query) # type: ignore[arg-type]
|
|
168
168
|
|
|
169
169
|
if response.status_code == 200:
|
|
170
170
|
return success(response.json())
|
|
@@ -197,7 +197,7 @@ class NewRelicToolset(Toolset):
|
|
|
197
197
|
super().__init__(
|
|
198
198
|
name="newrelic",
|
|
199
199
|
description="Toolset for interacting with New Relic to fetch logs and traces",
|
|
200
|
-
docs_url="https://
|
|
200
|
+
docs_url="https://holmesgpt.dev/data-sources/builtin-toolsets/newrelic/",
|
|
201
201
|
icon_url="https://companieslogo.com/img/orig/NEWR-de5fcb2e.png?t=1720244493",
|
|
202
202
|
prerequisites=[CallablePrerequisite(callable=self.prerequisites_callable)],
|
|
203
203
|
tools=[
|
|
@@ -9,7 +9,7 @@ from holmes.core.tools import (
|
|
|
9
9
|
StructuredToolResult,
|
|
10
10
|
Tool,
|
|
11
11
|
ToolParameter,
|
|
12
|
-
|
|
12
|
+
StructuredToolResultStatus,
|
|
13
13
|
Toolset,
|
|
14
14
|
ToolsetTag,
|
|
15
15
|
)
|
|
@@ -99,7 +99,7 @@ class ListShards(BaseOpenSearchTool):
|
|
|
99
99
|
client = get_client(self.toolset.clients, host=params.get("host", ""))
|
|
100
100
|
shards = client.client.cat.shards()
|
|
101
101
|
return StructuredToolResult(
|
|
102
|
-
status=
|
|
102
|
+
status=StructuredToolResultStatus.SUCCESS,
|
|
103
103
|
data=str(shards),
|
|
104
104
|
params=params,
|
|
105
105
|
)
|
|
@@ -132,7 +132,7 @@ class GetClusterSettings(BaseOpenSearchTool):
|
|
|
132
132
|
include_defaults=True, flat_settings=True
|
|
133
133
|
)
|
|
134
134
|
return StructuredToolResult(
|
|
135
|
-
status=
|
|
135
|
+
status=StructuredToolResultStatus.SUCCESS,
|
|
136
136
|
data=str(response),
|
|
137
137
|
params=params,
|
|
138
138
|
)
|
|
@@ -163,7 +163,7 @@ class GetClusterHealth(BaseOpenSearchTool):
|
|
|
163
163
|
client = get_client(self.toolset.clients, host=params.get("host", ""))
|
|
164
164
|
health = client.client.cluster.health()
|
|
165
165
|
return StructuredToolResult(
|
|
166
|
-
status=
|
|
166
|
+
status=StructuredToolResultStatus.SUCCESS,
|
|
167
167
|
data=str(health),
|
|
168
168
|
params=params,
|
|
169
169
|
)
|
|
@@ -187,7 +187,7 @@ class ListOpenSearchHosts(BaseOpenSearchTool):
|
|
|
187
187
|
) -> StructuredToolResult:
|
|
188
188
|
hosts = [host for client in self.toolset.clients for host in client.hosts]
|
|
189
189
|
return StructuredToolResult(
|
|
190
|
-
status=
|
|
190
|
+
status=StructuredToolResultStatus.SUCCESS,
|
|
191
191
|
data=str(hosts),
|
|
192
192
|
params=params,
|
|
193
193
|
)
|
|
@@ -8,7 +8,7 @@ from urllib.parse import urljoin
|
|
|
8
8
|
from holmes.core.tools import (
|
|
9
9
|
CallablePrerequisite,
|
|
10
10
|
StructuredToolResult,
|
|
11
|
-
|
|
11
|
+
StructuredToolResultStatus,
|
|
12
12
|
ToolsetTag,
|
|
13
13
|
)
|
|
14
14
|
from holmes.plugins.toolsets.logging_utils.logging_api import (
|
|
@@ -79,7 +79,7 @@ class OpenSearchLogsToolset(BasePodLoggingToolset):
|
|
|
79
79
|
def fetch_pod_logs(self, params: FetchPodLogsParams) -> StructuredToolResult:
|
|
80
80
|
if not self.opensearch_config:
|
|
81
81
|
return StructuredToolResult(
|
|
82
|
-
status=
|
|
82
|
+
status=StructuredToolResultStatus.ERROR,
|
|
83
83
|
error="Missing OpenSearch configuration",
|
|
84
84
|
params=params.model_dump(),
|
|
85
85
|
)
|
|
@@ -126,13 +126,13 @@ class OpenSearchLogsToolset(BasePodLoggingToolset):
|
|
|
126
126
|
config=self.opensearch_config,
|
|
127
127
|
)
|
|
128
128
|
return StructuredToolResult(
|
|
129
|
-
status=
|
|
129
|
+
status=StructuredToolResultStatus.SUCCESS,
|
|
130
130
|
data=logs,
|
|
131
131
|
params=params.model_dump(),
|
|
132
132
|
)
|
|
133
133
|
else:
|
|
134
134
|
return StructuredToolResult(
|
|
135
|
-
status=
|
|
135
|
+
status=StructuredToolResultStatus.ERROR,
|
|
136
136
|
return_code=logs_response.status_code,
|
|
137
137
|
error=logs_response.text,
|
|
138
138
|
params=params.model_dump(),
|
|
@@ -141,21 +141,21 @@ class OpenSearchLogsToolset(BasePodLoggingToolset):
|
|
|
141
141
|
except requests.Timeout:
|
|
142
142
|
logging.warning("Timeout while fetching OpenSearch logs", exc_info=True)
|
|
143
143
|
return StructuredToolResult(
|
|
144
|
-
status=
|
|
144
|
+
status=StructuredToolResultStatus.ERROR,
|
|
145
145
|
error="Request timed out while fetching OpenSearch logs",
|
|
146
146
|
params=params.model_dump(),
|
|
147
147
|
)
|
|
148
148
|
except RequestException as e:
|
|
149
149
|
logging.warning("Failed to fetch OpenSearch logs", exc_info=True)
|
|
150
150
|
return StructuredToolResult(
|
|
151
|
-
status=
|
|
151
|
+
status=StructuredToolResultStatus.ERROR,
|
|
152
152
|
error=f"Network error while fetching OpenSearch logs: {str(e)}",
|
|
153
153
|
params=params.model_dump(),
|
|
154
154
|
)
|
|
155
155
|
except Exception as e:
|
|
156
156
|
logging.warning("Failed to process OpenSearch logs", exc_info=True)
|
|
157
157
|
return StructuredToolResult(
|
|
158
|
-
status=
|
|
158
|
+
status=StructuredToolResultStatus.ERROR,
|
|
159
159
|
error=f"Unexpected error: {str(e)}",
|
|
160
160
|
params=params.model_dump(),
|
|
161
161
|
)
|