holmesgpt 0.13.2__py3-none-any.whl → 0.16.2a0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- holmes/__init__.py +1 -1
- holmes/clients/robusta_client.py +17 -4
- holmes/common/env_vars.py +40 -1
- holmes/config.py +114 -144
- holmes/core/conversations.py +53 -14
- holmes/core/feedback.py +191 -0
- holmes/core/investigation.py +18 -22
- holmes/core/llm.py +489 -88
- holmes/core/models.py +103 -1
- holmes/core/openai_formatting.py +13 -0
- holmes/core/prompt.py +1 -1
- holmes/core/safeguards.py +4 -4
- holmes/core/supabase_dal.py +293 -100
- holmes/core/tool_calling_llm.py +423 -323
- holmes/core/tools.py +311 -33
- holmes/core/tools_utils/token_counting.py +14 -0
- holmes/core/tools_utils/tool_context_window_limiter.py +57 -0
- holmes/core/tools_utils/tool_executor.py +13 -8
- holmes/core/toolset_manager.py +155 -4
- holmes/core/tracing.py +6 -1
- holmes/core/transformers/__init__.py +23 -0
- holmes/core/transformers/base.py +62 -0
- holmes/core/transformers/llm_summarize.py +174 -0
- holmes/core/transformers/registry.py +122 -0
- holmes/core/transformers/transformer.py +31 -0
- holmes/core/truncation/compaction.py +59 -0
- holmes/core/truncation/dal_truncation_utils.py +23 -0
- holmes/core/truncation/input_context_window_limiter.py +218 -0
- holmes/interactive.py +177 -24
- holmes/main.py +7 -4
- holmes/plugins/prompts/_fetch_logs.jinja2 +26 -1
- holmes/plugins/prompts/_general_instructions.jinja2 +1 -2
- holmes/plugins/prompts/_runbook_instructions.jinja2 +23 -12
- holmes/plugins/prompts/conversation_history_compaction.jinja2 +88 -0
- holmes/plugins/prompts/generic_ask.jinja2 +2 -4
- holmes/plugins/prompts/generic_ask_conversation.jinja2 +2 -1
- holmes/plugins/prompts/generic_ask_for_issue_conversation.jinja2 +2 -1
- holmes/plugins/prompts/generic_investigation.jinja2 +2 -1
- holmes/plugins/prompts/investigation_procedure.jinja2 +48 -0
- holmes/plugins/prompts/kubernetes_workload_ask.jinja2 +2 -1
- holmes/plugins/prompts/kubernetes_workload_chat.jinja2 +2 -1
- holmes/plugins/runbooks/__init__.py +117 -18
- holmes/plugins/runbooks/catalog.json +2 -0
- holmes/plugins/toolsets/__init__.py +21 -8
- holmes/plugins/toolsets/aks-node-health.yaml +46 -0
- holmes/plugins/toolsets/aks.yaml +64 -0
- holmes/plugins/toolsets/atlas_mongodb/mongodb_atlas.py +26 -36
- holmes/plugins/toolsets/azure_sql/azure_sql_toolset.py +0 -1
- holmes/plugins/toolsets/azure_sql/tools/analyze_connection_failures.py +10 -7
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_connections.py +9 -6
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_health_status.py +8 -6
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_performance.py +8 -6
- holmes/plugins/toolsets/azure_sql/tools/analyze_database_storage.py +9 -6
- holmes/plugins/toolsets/azure_sql/tools/get_active_alerts.py +9 -7
- holmes/plugins/toolsets/azure_sql/tools/get_slow_queries.py +9 -6
- holmes/plugins/toolsets/azure_sql/tools/get_top_cpu_queries.py +9 -6
- holmes/plugins/toolsets/azure_sql/tools/get_top_data_io_queries.py +9 -6
- holmes/plugins/toolsets/azure_sql/tools/get_top_log_io_queries.py +9 -6
- holmes/plugins/toolsets/bash/bash_toolset.py +10 -13
- holmes/plugins/toolsets/bash/common/bash.py +7 -7
- holmes/plugins/toolsets/cilium.yaml +284 -0
- holmes/plugins/toolsets/coralogix/toolset_coralogix_logs.py +5 -3
- holmes/plugins/toolsets/datadog/datadog_api.py +490 -24
- holmes/plugins/toolsets/datadog/datadog_logs_instructions.jinja2 +21 -10
- holmes/plugins/toolsets/datadog/toolset_datadog_general.py +349 -216
- holmes/plugins/toolsets/datadog/toolset_datadog_logs.py +190 -19
- holmes/plugins/toolsets/datadog/toolset_datadog_metrics.py +101 -44
- holmes/plugins/toolsets/datadog/toolset_datadog_rds.py +13 -16
- holmes/plugins/toolsets/datadog/toolset_datadog_traces.py +25 -31
- holmes/plugins/toolsets/git.py +51 -46
- holmes/plugins/toolsets/grafana/common.py +15 -3
- holmes/plugins/toolsets/grafana/grafana_api.py +46 -24
- holmes/plugins/toolsets/grafana/grafana_tempo_api.py +454 -0
- holmes/plugins/toolsets/grafana/loki/instructions.jinja2 +9 -0
- holmes/plugins/toolsets/grafana/loki/toolset_grafana_loki.py +117 -0
- holmes/plugins/toolsets/grafana/toolset_grafana.py +211 -91
- holmes/plugins/toolsets/grafana/toolset_grafana_dashboard.jinja2 +27 -0
- holmes/plugins/toolsets/grafana/toolset_grafana_tempo.jinja2 +246 -11
- holmes/plugins/toolsets/grafana/toolset_grafana_tempo.py +653 -293
- holmes/plugins/toolsets/grafana/trace_parser.py +1 -1
- holmes/plugins/toolsets/internet/internet.py +6 -7
- holmes/plugins/toolsets/internet/notion.py +5 -6
- holmes/plugins/toolsets/investigator/core_investigation.py +42 -34
- holmes/plugins/toolsets/kafka.py +25 -36
- holmes/plugins/toolsets/kubernetes.yaml +58 -84
- holmes/plugins/toolsets/kubernetes_logs.py +6 -6
- holmes/plugins/toolsets/kubernetes_logs.yaml +32 -0
- holmes/plugins/toolsets/logging_utils/logging_api.py +80 -4
- holmes/plugins/toolsets/mcp/toolset_mcp.py +181 -55
- holmes/plugins/toolsets/newrelic/__init__.py +0 -0
- holmes/plugins/toolsets/newrelic/new_relic_api.py +125 -0
- holmes/plugins/toolsets/newrelic/newrelic.jinja2 +41 -0
- holmes/plugins/toolsets/newrelic/newrelic.py +163 -0
- holmes/plugins/toolsets/opensearch/opensearch.py +10 -17
- holmes/plugins/toolsets/opensearch/opensearch_logs.py +7 -7
- holmes/plugins/toolsets/opensearch/opensearch_ppl_query_docs.jinja2 +1616 -0
- holmes/plugins/toolsets/opensearch/opensearch_query_assist.py +78 -0
- holmes/plugins/toolsets/opensearch/opensearch_query_assist_instructions.jinja2 +223 -0
- holmes/plugins/toolsets/opensearch/opensearch_traces.py +13 -16
- holmes/plugins/toolsets/openshift.yaml +283 -0
- holmes/plugins/toolsets/prometheus/prometheus.py +915 -390
- holmes/plugins/toolsets/prometheus/prometheus_instructions.jinja2 +43 -2
- holmes/plugins/toolsets/prometheus/utils.py +28 -0
- holmes/plugins/toolsets/rabbitmq/toolset_rabbitmq.py +9 -10
- holmes/plugins/toolsets/robusta/robusta.py +236 -65
- holmes/plugins/toolsets/robusta/robusta_instructions.jinja2 +26 -9
- holmes/plugins/toolsets/runbook/runbook_fetcher.py +137 -26
- holmes/plugins/toolsets/service_discovery.py +1 -1
- holmes/plugins/toolsets/servicenow_tables/instructions.jinja2 +83 -0
- holmes/plugins/toolsets/servicenow_tables/servicenow_tables.py +426 -0
- holmes/plugins/toolsets/utils.py +88 -0
- holmes/utils/config_utils.py +91 -0
- holmes/utils/default_toolset_installation_guide.jinja2 +1 -22
- holmes/utils/env.py +7 -0
- holmes/utils/global_instructions.py +75 -10
- holmes/utils/holmes_status.py +2 -1
- holmes/utils/holmes_sync_toolsets.py +0 -2
- holmes/utils/krr_utils.py +188 -0
- holmes/utils/sentry_helper.py +41 -0
- holmes/utils/stream.py +61 -7
- holmes/version.py +34 -14
- holmesgpt-0.16.2a0.dist-info/LICENSE +178 -0
- {holmesgpt-0.13.2.dist-info → holmesgpt-0.16.2a0.dist-info}/METADATA +29 -27
- {holmesgpt-0.13.2.dist-info → holmesgpt-0.16.2a0.dist-info}/RECORD +126 -102
- holmes/core/performance_timing.py +0 -72
- holmes/plugins/toolsets/grafana/tempo_api.py +0 -124
- holmes/plugins/toolsets/grafana/toolset_grafana_loki.py +0 -110
- holmes/plugins/toolsets/newrelic.py +0 -231
- holmes/plugins/toolsets/servicenow/install.md +0 -37
- holmes/plugins/toolsets/servicenow/instructions.jinja2 +0 -3
- holmes/plugins/toolsets/servicenow/servicenow.py +0 -219
- holmesgpt-0.13.2.dist-info/LICENSE.txt +0 -21
- {holmesgpt-0.13.2.dist-info → holmesgpt-0.16.2a0.dist-info}/WHEEL +0 -0
- {holmesgpt-0.13.2.dist-info → holmesgpt-0.16.2a0.dist-info}/entry_points.txt +0 -0
|
@@ -187,7 +187,7 @@ def format_traces_list(trace_data: Dict) -> str:
|
|
|
187
187
|
else "\n"
|
|
188
188
|
)
|
|
189
189
|
trace_str += f"\tstartTime={unix_nano_to_rfc3339(int(trace.get('startTimeUnixNano')))}"
|
|
190
|
-
trace_str += f" rootServiceName={trace.get('
|
|
190
|
+
trace_str += f" rootServiceName={trace.get('rootServiceName')}"
|
|
191
191
|
trace_str += f" rootTraceName={trace.get('rootTraceName')}"
|
|
192
192
|
traces_str.append(trace_str)
|
|
193
193
|
return "\n".join(traces_str)
|
|
@@ -6,6 +6,7 @@ from typing import Any, Optional, Tuple, Dict, List
|
|
|
6
6
|
from requests import RequestException, Timeout # type: ignore
|
|
7
7
|
from holmes.core.tools import (
|
|
8
8
|
Tool,
|
|
9
|
+
ToolInvokeContext,
|
|
9
10
|
ToolParameter,
|
|
10
11
|
Toolset,
|
|
11
12
|
ToolsetTag,
|
|
@@ -15,7 +16,7 @@ from markdownify import markdownify
|
|
|
15
16
|
from bs4 import BeautifulSoup
|
|
16
17
|
|
|
17
18
|
import requests # type: ignore
|
|
18
|
-
from holmes.core.tools import StructuredToolResult,
|
|
19
|
+
from holmes.core.tools import StructuredToolResult, StructuredToolResultStatus
|
|
19
20
|
from holmes.plugins.toolsets.utils import toolset_name_for_one_liner
|
|
20
21
|
|
|
21
22
|
|
|
@@ -25,7 +26,7 @@ INTERNET_TOOLSET_USER_AGENT = os.environ.get(
|
|
|
25
26
|
"Mozilla/5.0 (X11; Linux x86_64; rv:128.0; holmesgpt;) Gecko/20100101 Firefox/128.0",
|
|
26
27
|
)
|
|
27
28
|
INTERNET_TOOLSET_TIMEOUT_SECONDS = int(
|
|
28
|
-
os.environ.get("INTERNET_TOOLSET_TIMEOUT_SECONDS", "
|
|
29
|
+
os.environ.get("INTERNET_TOOLSET_TIMEOUT_SECONDS", "5")
|
|
29
30
|
)
|
|
30
31
|
|
|
31
32
|
SELECTORS_TO_REMOVE = [
|
|
@@ -186,9 +187,7 @@ class FetchWebpage(Tool):
|
|
|
186
187
|
toolset=toolset, # type: ignore
|
|
187
188
|
)
|
|
188
189
|
|
|
189
|
-
def _invoke(
|
|
190
|
-
self, params: dict, user_approved: bool = False
|
|
191
|
-
) -> StructuredToolResult:
|
|
190
|
+
def _invoke(self, params: dict, context: ToolInvokeContext) -> StructuredToolResult:
|
|
192
191
|
url: str = params["url"]
|
|
193
192
|
|
|
194
193
|
additional_headers = (
|
|
@@ -199,7 +198,7 @@ class FetchWebpage(Tool):
|
|
|
199
198
|
if not content:
|
|
200
199
|
logging.error(f"Failed to retrieve content from {url}")
|
|
201
200
|
return StructuredToolResult(
|
|
202
|
-
status=
|
|
201
|
+
status=StructuredToolResultStatus.ERROR,
|
|
203
202
|
error=f"Failed to retrieve content from {url}",
|
|
204
203
|
params=params,
|
|
205
204
|
)
|
|
@@ -211,7 +210,7 @@ class FetchWebpage(Tool):
|
|
|
211
210
|
content = html_to_markdown(content)
|
|
212
211
|
|
|
213
212
|
return StructuredToolResult(
|
|
214
|
-
status=
|
|
213
|
+
status=StructuredToolResultStatus.SUCCESS,
|
|
215
214
|
data=content,
|
|
216
215
|
params=params,
|
|
217
216
|
)
|
|
@@ -4,6 +4,7 @@ import json
|
|
|
4
4
|
from typing import Any, Dict, Tuple
|
|
5
5
|
from holmes.core.tools import (
|
|
6
6
|
Tool,
|
|
7
|
+
ToolInvokeContext,
|
|
7
8
|
ToolParameter,
|
|
8
9
|
ToolsetTag,
|
|
9
10
|
)
|
|
@@ -13,7 +14,7 @@ from holmes.plugins.toolsets.internet.internet import (
|
|
|
13
14
|
)
|
|
14
15
|
from holmes.core.tools import (
|
|
15
16
|
StructuredToolResult,
|
|
16
|
-
|
|
17
|
+
StructuredToolResultStatus,
|
|
17
18
|
)
|
|
18
19
|
from holmes.plugins.toolsets.utils import toolset_name_for_one_liner
|
|
19
20
|
|
|
@@ -44,9 +45,7 @@ class FetchNotion(Tool):
|
|
|
44
45
|
return f"https://api.notion.com/v1/blocks/{notion_id}/children"
|
|
45
46
|
return url # Return original URL if no match is found
|
|
46
47
|
|
|
47
|
-
def _invoke(
|
|
48
|
-
self, params: dict, user_approved: bool = False
|
|
49
|
-
) -> StructuredToolResult:
|
|
48
|
+
def _invoke(self, params: dict, context: ToolInvokeContext) -> StructuredToolResult:
|
|
50
49
|
url: str = params["url"]
|
|
51
50
|
|
|
52
51
|
# Get headers from the toolset configuration
|
|
@@ -59,13 +58,13 @@ class FetchNotion(Tool):
|
|
|
59
58
|
if not content:
|
|
60
59
|
logging.error(f"Failed to retrieve content from {url}")
|
|
61
60
|
return StructuredToolResult(
|
|
62
|
-
status=
|
|
61
|
+
status=StructuredToolResultStatus.ERROR,
|
|
63
62
|
error=f"Failed to retrieve content from {url}",
|
|
64
63
|
params=params,
|
|
65
64
|
)
|
|
66
65
|
|
|
67
66
|
return StructuredToolResult(
|
|
68
|
-
status=
|
|
67
|
+
status=StructuredToolResultStatus.SUCCESS,
|
|
69
68
|
data=self.parse_notion_content(content),
|
|
70
69
|
params=params,
|
|
71
70
|
)
|
|
@@ -3,20 +3,39 @@ import os
|
|
|
3
3
|
from typing import Any, Dict
|
|
4
4
|
|
|
5
5
|
from uuid import uuid4
|
|
6
|
+
|
|
6
7
|
from holmes.core.todo_tasks_formatter import format_tasks
|
|
7
8
|
from holmes.core.tools import (
|
|
9
|
+
StructuredToolResult,
|
|
10
|
+
StructuredToolResultStatus,
|
|
11
|
+
Tool,
|
|
12
|
+
ToolInvokeContext,
|
|
13
|
+
ToolParameter,
|
|
8
14
|
Toolset,
|
|
9
15
|
ToolsetTag,
|
|
10
|
-
ToolParameter,
|
|
11
|
-
Tool,
|
|
12
|
-
StructuredToolResult,
|
|
13
|
-
ToolResultStatus,
|
|
14
16
|
)
|
|
15
17
|
from holmes.plugins.toolsets.investigator.model import Task, TaskStatus
|
|
16
18
|
|
|
19
|
+
TODO_WRITE_TOOL_NAME = "TodoWrite"
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def parse_tasks(todos_data: Any) -> list[Task]:
|
|
23
|
+
tasks = []
|
|
24
|
+
|
|
25
|
+
for todo_item in todos_data:
|
|
26
|
+
if isinstance(todo_item, dict):
|
|
27
|
+
task = Task(
|
|
28
|
+
id=todo_item.get("id", str(uuid4())),
|
|
29
|
+
content=todo_item.get("content", ""),
|
|
30
|
+
status=TaskStatus(todo_item.get("status", "pending")),
|
|
31
|
+
)
|
|
32
|
+
tasks.append(task)
|
|
33
|
+
|
|
34
|
+
return tasks
|
|
35
|
+
|
|
17
36
|
|
|
18
37
|
class TodoWriteTool(Tool):
|
|
19
|
-
name: str =
|
|
38
|
+
name: str = TODO_WRITE_TOOL_NAME
|
|
20
39
|
description: str = "Save investigation tasks to break down complex problems into manageable sub-tasks. ALWAYS provide the COMPLETE list of all tasks, not just the ones being updated."
|
|
21
40
|
parameters: Dict[str, ToolParameter] = {
|
|
22
41
|
"todos": ToolParameter(
|
|
@@ -28,7 +47,11 @@ class TodoWriteTool(Tool):
|
|
|
28
47
|
properties={
|
|
29
48
|
"id": ToolParameter(type="string", required=True),
|
|
30
49
|
"content": ToolParameter(type="string", required=True),
|
|
31
|
-
"status": ToolParameter(
|
|
50
|
+
"status": ToolParameter(
|
|
51
|
+
type="string",
|
|
52
|
+
required=True,
|
|
53
|
+
enum=["pending", "in_progress", "completed"],
|
|
54
|
+
),
|
|
32
55
|
},
|
|
33
56
|
),
|
|
34
57
|
),
|
|
@@ -57,41 +80,28 @@ class TodoWriteTool(Tool):
|
|
|
57
80
|
content_width = max(max_content_width, len("Content"))
|
|
58
81
|
status_width = max(max_status_display_width, len("Status"))
|
|
59
82
|
|
|
60
|
-
# Build table
|
|
61
83
|
separator = f"+{'-' * (id_width + 2)}+{'-' * (content_width + 2)}+{'-' * (status_width + 2)}+"
|
|
62
84
|
header = f"| {'ID':<{id_width}} | {'Content':<{content_width}} | {'Status':<{status_width}} |"
|
|
63
|
-
|
|
64
|
-
# Log the table
|
|
65
|
-
logging.info("Updated Investigation Tasks:")
|
|
66
|
-
logging.info(separator)
|
|
67
|
-
logging.info(header)
|
|
68
|
-
logging.info(separator)
|
|
85
|
+
tasks_to_display = []
|
|
69
86
|
|
|
70
87
|
for task in tasks:
|
|
71
88
|
status_display = f"{status_icons[task.status.value]} {task.status.value}"
|
|
72
89
|
row = f"| {task.id:<{id_width}} | {task.content:<{content_width}} | {status_display:<{status_width}} |"
|
|
73
|
-
|
|
90
|
+
tasks_to_display.append(row)
|
|
74
91
|
|
|
75
|
-
logging.info(
|
|
92
|
+
logging.info(
|
|
93
|
+
f"Task List:\n{separator}\n{header}\n{separator}\n"
|
|
94
|
+
+ "\n".join(tasks_to_display)
|
|
95
|
+
+ f"\n{separator}"
|
|
96
|
+
)
|
|
76
97
|
|
|
77
|
-
def _invoke(
|
|
78
|
-
self, params: dict, user_approved: bool = False
|
|
79
|
-
) -> StructuredToolResult:
|
|
98
|
+
def _invoke(self, params: dict, context: ToolInvokeContext) -> StructuredToolResult:
|
|
80
99
|
try:
|
|
81
100
|
todos_data = params.get("todos", [])
|
|
82
101
|
|
|
83
|
-
tasks =
|
|
84
|
-
|
|
85
|
-
for todo_item in todos_data:
|
|
86
|
-
if isinstance(todo_item, dict):
|
|
87
|
-
task = Task(
|
|
88
|
-
id=todo_item.get("id", str(uuid4())),
|
|
89
|
-
content=todo_item.get("content", ""),
|
|
90
|
-
status=TaskStatus(todo_item.get("status", "pending")),
|
|
91
|
-
)
|
|
92
|
-
tasks.append(task)
|
|
102
|
+
tasks = parse_tasks(todos_data=todos_data)
|
|
93
103
|
|
|
94
|
-
logging.
|
|
104
|
+
logging.debug(f"Tasks: {len(tasks)}")
|
|
95
105
|
|
|
96
106
|
self.print_tasks_table(tasks)
|
|
97
107
|
formatted_tasks = format_tasks(tasks)
|
|
@@ -103,7 +113,7 @@ class TodoWriteTool(Tool):
|
|
|
103
113
|
response_data += "No tasks currently in the investigation plan."
|
|
104
114
|
|
|
105
115
|
return StructuredToolResult(
|
|
106
|
-
status=
|
|
116
|
+
status=StructuredToolResultStatus.SUCCESS,
|
|
107
117
|
data=response_data,
|
|
108
118
|
params=params,
|
|
109
119
|
)
|
|
@@ -111,14 +121,13 @@ class TodoWriteTool(Tool):
|
|
|
111
121
|
except Exception as e:
|
|
112
122
|
logging.exception("error using todowrite tool")
|
|
113
123
|
return StructuredToolResult(
|
|
114
|
-
status=
|
|
124
|
+
status=StructuredToolResultStatus.ERROR,
|
|
115
125
|
error=f"Failed to process tasks: {str(e)}",
|
|
116
126
|
params=params,
|
|
117
127
|
)
|
|
118
128
|
|
|
119
129
|
def get_parameterized_one_liner(self, params: Dict) -> str:
|
|
120
|
-
|
|
121
|
-
return f"Write {todos} investigation tasks"
|
|
130
|
+
return "Update investigation tasks"
|
|
122
131
|
|
|
123
132
|
|
|
124
133
|
class CoreInvestigationToolset(Toolset):
|
|
@@ -133,7 +142,6 @@ class CoreInvestigationToolset(Toolset):
|
|
|
133
142
|
tags=[ToolsetTag.CORE],
|
|
134
143
|
is_default=True,
|
|
135
144
|
)
|
|
136
|
-
logging.info("Core investigation toolset loaded")
|
|
137
145
|
|
|
138
146
|
def get_example_config(self) -> Dict[str, Any]:
|
|
139
147
|
return {}
|
holmes/plugins/toolsets/kafka.py
CHANGED
|
@@ -27,8 +27,9 @@ from holmes.core.tools import (
|
|
|
27
27
|
CallablePrerequisite,
|
|
28
28
|
StructuredToolResult,
|
|
29
29
|
Tool,
|
|
30
|
+
ToolInvokeContext,
|
|
30
31
|
ToolParameter,
|
|
31
|
-
|
|
32
|
+
StructuredToolResultStatus,
|
|
32
33
|
Toolset,
|
|
33
34
|
ToolsetTag,
|
|
34
35
|
)
|
|
@@ -153,15 +154,13 @@ class ListKafkaConsumers(BaseKafkaTool):
|
|
|
153
154
|
toolset=toolset,
|
|
154
155
|
)
|
|
155
156
|
|
|
156
|
-
def _invoke(
|
|
157
|
-
self, params: dict, user_approved: bool = False
|
|
158
|
-
) -> StructuredToolResult:
|
|
157
|
+
def _invoke(self, params: dict, context: ToolInvokeContext) -> StructuredToolResult:
|
|
159
158
|
try:
|
|
160
159
|
kafka_cluster_name = get_param_or_raise(params, "kafka_cluster_name")
|
|
161
160
|
client = self.get_kafka_client(kafka_cluster_name)
|
|
162
161
|
if client is None:
|
|
163
162
|
return StructuredToolResult(
|
|
164
|
-
status=
|
|
163
|
+
status=StructuredToolResultStatus.ERROR,
|
|
165
164
|
error="No admin_client on toolset. This toolset is misconfigured.",
|
|
166
165
|
params=params,
|
|
167
166
|
)
|
|
@@ -190,7 +189,7 @@ class ListKafkaConsumers(BaseKafkaTool):
|
|
|
190
189
|
if errors_text:
|
|
191
190
|
result_text = result_text + "\n\n" + errors_text
|
|
192
191
|
return StructuredToolResult(
|
|
193
|
-
status=
|
|
192
|
+
status=StructuredToolResultStatus.SUCCESS,
|
|
194
193
|
data=result_text,
|
|
195
194
|
params=params,
|
|
196
195
|
)
|
|
@@ -198,7 +197,7 @@ class ListKafkaConsumers(BaseKafkaTool):
|
|
|
198
197
|
error_msg = f"Failed to list consumer groups: {str(e)}"
|
|
199
198
|
logging.error(error_msg)
|
|
200
199
|
return StructuredToolResult(
|
|
201
|
-
status=
|
|
200
|
+
status=StructuredToolResultStatus.ERROR,
|
|
202
201
|
error=error_msg,
|
|
203
202
|
params=params,
|
|
204
203
|
)
|
|
@@ -228,16 +227,14 @@ class DescribeConsumerGroup(BaseKafkaTool):
|
|
|
228
227
|
toolset=toolset,
|
|
229
228
|
)
|
|
230
229
|
|
|
231
|
-
def _invoke(
|
|
232
|
-
self, params: dict, user_approved: bool = False
|
|
233
|
-
) -> StructuredToolResult:
|
|
230
|
+
def _invoke(self, params: dict, context: ToolInvokeContext) -> StructuredToolResult:
|
|
234
231
|
group_id = params["group_id"]
|
|
235
232
|
try:
|
|
236
233
|
kafka_cluster_name = get_param_or_raise(params, "kafka_cluster_name")
|
|
237
234
|
client = self.get_kafka_client(kafka_cluster_name)
|
|
238
235
|
if client is None:
|
|
239
236
|
return StructuredToolResult(
|
|
240
|
-
status=
|
|
237
|
+
status=StructuredToolResultStatus.ERROR,
|
|
241
238
|
error="No admin_client on toolset. This toolset is misconfigured.",
|
|
242
239
|
params=params,
|
|
243
240
|
)
|
|
@@ -247,13 +244,13 @@ class DescribeConsumerGroup(BaseKafkaTool):
|
|
|
247
244
|
if futures.get(group_id):
|
|
248
245
|
group_metadata = futures.get(group_id).result()
|
|
249
246
|
return StructuredToolResult(
|
|
250
|
-
status=
|
|
247
|
+
status=StructuredToolResultStatus.SUCCESS,
|
|
251
248
|
data=yaml.dump(convert_to_dict(group_metadata)),
|
|
252
249
|
params=params,
|
|
253
250
|
)
|
|
254
251
|
else:
|
|
255
252
|
return StructuredToolResult(
|
|
256
|
-
status=
|
|
253
|
+
status=StructuredToolResultStatus.ERROR,
|
|
257
254
|
error="Group not found",
|
|
258
255
|
params=params,
|
|
259
256
|
)
|
|
@@ -261,7 +258,7 @@ class DescribeConsumerGroup(BaseKafkaTool):
|
|
|
261
258
|
error_msg = f"Failed to describe consumer group {group_id}: {str(e)}"
|
|
262
259
|
logging.error(error_msg)
|
|
263
260
|
return StructuredToolResult(
|
|
264
|
-
status=
|
|
261
|
+
status=StructuredToolResultStatus.ERROR,
|
|
265
262
|
error=error_msg,
|
|
266
263
|
params=params,
|
|
267
264
|
)
|
|
@@ -286,22 +283,20 @@ class ListTopics(BaseKafkaTool):
|
|
|
286
283
|
toolset=toolset,
|
|
287
284
|
)
|
|
288
285
|
|
|
289
|
-
def _invoke(
|
|
290
|
-
self, params: dict, user_approved: bool = False
|
|
291
|
-
) -> StructuredToolResult:
|
|
286
|
+
def _invoke(self, params: dict, context: ToolInvokeContext) -> StructuredToolResult:
|
|
292
287
|
try:
|
|
293
288
|
kafka_cluster_name = get_param_or_raise(params, "kafka_cluster_name")
|
|
294
289
|
client = self.get_kafka_client(kafka_cluster_name)
|
|
295
290
|
if client is None:
|
|
296
291
|
return StructuredToolResult(
|
|
297
|
-
status=
|
|
292
|
+
status=StructuredToolResultStatus.ERROR,
|
|
298
293
|
error="No admin_client on toolset. This toolset is misconfigured.",
|
|
299
294
|
params=params,
|
|
300
295
|
)
|
|
301
296
|
|
|
302
297
|
topics = client.list_topics()
|
|
303
298
|
return StructuredToolResult(
|
|
304
|
-
status=
|
|
299
|
+
status=StructuredToolResultStatus.SUCCESS,
|
|
305
300
|
data=yaml.dump(convert_to_dict(topics)),
|
|
306
301
|
params=params,
|
|
307
302
|
)
|
|
@@ -309,7 +304,7 @@ class ListTopics(BaseKafkaTool):
|
|
|
309
304
|
error_msg = f"Failed to list topics: {str(e)}"
|
|
310
305
|
logging.error(error_msg)
|
|
311
306
|
return StructuredToolResult(
|
|
312
|
-
status=
|
|
307
|
+
status=StructuredToolResultStatus.ERROR,
|
|
313
308
|
error=error_msg,
|
|
314
309
|
params=params,
|
|
315
310
|
)
|
|
@@ -344,16 +339,14 @@ class DescribeTopic(BaseKafkaTool):
|
|
|
344
339
|
toolset=toolset,
|
|
345
340
|
)
|
|
346
341
|
|
|
347
|
-
def _invoke(
|
|
348
|
-
self, params: dict, user_approved: bool = False
|
|
349
|
-
) -> StructuredToolResult:
|
|
342
|
+
def _invoke(self, params: dict, context: ToolInvokeContext) -> StructuredToolResult:
|
|
350
343
|
topic_name = params["topic_name"]
|
|
351
344
|
try:
|
|
352
345
|
kafka_cluster_name = get_param_or_raise(params, "kafka_cluster_name")
|
|
353
346
|
client = self.get_kafka_client(kafka_cluster_name)
|
|
354
347
|
if client is None:
|
|
355
348
|
return StructuredToolResult(
|
|
356
|
-
status=
|
|
349
|
+
status=StructuredToolResultStatus.ERROR,
|
|
357
350
|
error="No admin_client on toolset. This toolset is misconfigured.",
|
|
358
351
|
params=params,
|
|
359
352
|
)
|
|
@@ -373,7 +366,7 @@ class DescribeTopic(BaseKafkaTool):
|
|
|
373
366
|
result["configuration"] = convert_to_dict(config)
|
|
374
367
|
|
|
375
368
|
return StructuredToolResult(
|
|
376
|
-
status=
|
|
369
|
+
status=StructuredToolResultStatus.SUCCESS,
|
|
377
370
|
data=yaml.dump(result),
|
|
378
371
|
params=params,
|
|
379
372
|
)
|
|
@@ -381,7 +374,7 @@ class DescribeTopic(BaseKafkaTool):
|
|
|
381
374
|
error_msg = f"Failed to describe topic {topic_name}: {str(e)}"
|
|
382
375
|
logging.error(error_msg, exc_info=True)
|
|
383
376
|
return StructuredToolResult(
|
|
384
|
-
status=
|
|
377
|
+
status=StructuredToolResultStatus.ERROR,
|
|
385
378
|
error=error_msg,
|
|
386
379
|
params=params,
|
|
387
380
|
)
|
|
@@ -469,16 +462,14 @@ class FindConsumerGroupsByTopic(BaseKafkaTool):
|
|
|
469
462
|
toolset=toolset,
|
|
470
463
|
)
|
|
471
464
|
|
|
472
|
-
def _invoke(
|
|
473
|
-
self, params: dict, user_approved: bool = False
|
|
474
|
-
) -> StructuredToolResult:
|
|
465
|
+
def _invoke(self, params: dict, context: ToolInvokeContext) -> StructuredToolResult:
|
|
475
466
|
topic_name = params["topic_name"]
|
|
476
467
|
try:
|
|
477
468
|
kafka_cluster_name = get_param_or_raise(params, "kafka_cluster_name")
|
|
478
469
|
client = self.get_kafka_client(kafka_cluster_name)
|
|
479
470
|
if client is None:
|
|
480
471
|
return StructuredToolResult(
|
|
481
|
-
status=
|
|
472
|
+
status=StructuredToolResultStatus.ERROR,
|
|
482
473
|
error="No admin_client on toolset. This toolset is misconfigured.",
|
|
483
474
|
params=params,
|
|
484
475
|
)
|
|
@@ -530,7 +521,7 @@ class FindConsumerGroupsByTopic(BaseKafkaTool):
|
|
|
530
521
|
result_text = result_text + "\n\n" + errors_text
|
|
531
522
|
|
|
532
523
|
return StructuredToolResult(
|
|
533
|
-
status=
|
|
524
|
+
status=StructuredToolResultStatus.SUCCESS,
|
|
534
525
|
data=result_text,
|
|
535
526
|
params=params,
|
|
536
527
|
)
|
|
@@ -540,7 +531,7 @@ class FindConsumerGroupsByTopic(BaseKafkaTool):
|
|
|
540
531
|
)
|
|
541
532
|
logging.error(error_msg)
|
|
542
533
|
return StructuredToolResult(
|
|
543
|
-
status=
|
|
534
|
+
status=StructuredToolResultStatus.ERROR,
|
|
544
535
|
error=error_msg,
|
|
545
536
|
params=params,
|
|
546
537
|
)
|
|
@@ -559,12 +550,10 @@ class ListKafkaClusters(BaseKafkaTool):
|
|
|
559
550
|
toolset=toolset,
|
|
560
551
|
)
|
|
561
552
|
|
|
562
|
-
def _invoke(
|
|
563
|
-
self, params: dict, user_approved: bool = False
|
|
564
|
-
) -> StructuredToolResult:
|
|
553
|
+
def _invoke(self, params: dict, context: ToolInvokeContext) -> StructuredToolResult:
|
|
565
554
|
cluster_names = list(self.toolset.clients.keys())
|
|
566
555
|
return StructuredToolResult(
|
|
567
|
-
status=
|
|
556
|
+
status=StructuredToolResultStatus.SUCCESS,
|
|
568
557
|
data="Available Kafka Clusters:\n" + "\n".join(cluster_names),
|
|
569
558
|
params=params,
|
|
570
559
|
)
|
|
@@ -8,6 +8,10 @@ toolsets:
|
|
|
8
8
|
prerequisites:
|
|
9
9
|
- command: "kubectl version --client"
|
|
10
10
|
|
|
11
|
+
# Note: Many tools in this toolset use transformers with llm_summarize
|
|
12
|
+
# to automatically summarize large kubectl outputs when a fast model is configured.
|
|
13
|
+
# This reduces context window usage while preserving key information for debugging.
|
|
14
|
+
|
|
11
15
|
tools:
|
|
12
16
|
- name: "kubectl_describe"
|
|
13
17
|
description: >
|
|
@@ -17,6 +21,20 @@ toolsets:
|
|
|
17
21
|
- 'describe pod xyz-123'
|
|
18
22
|
- 'show service xyz-123 in namespace my-ns'
|
|
19
23
|
command: "kubectl describe {{ kind }} {{ name }}{% if namespace %} -n {{ namespace }}{% endif %}"
|
|
24
|
+
transformers:
|
|
25
|
+
- name: llm_summarize
|
|
26
|
+
config:
|
|
27
|
+
input_threshold: 1000
|
|
28
|
+
prompt: |
|
|
29
|
+
Summarize this kubectl describe output focusing on:
|
|
30
|
+
- What needs attention or immediate action
|
|
31
|
+
- Resource status and health indicators
|
|
32
|
+
- Any errors, warnings, or non-standard states
|
|
33
|
+
- Key configuration details that could affect functionality
|
|
34
|
+
- When possible, mention exact field names so the user can grep for specific details
|
|
35
|
+
- Be concise: aim for ≤ 50% of the original length; avoid repeating defaults/healthy/unchanged details
|
|
36
|
+
- Prefer aggregates and counts; list only outliers and actionable items
|
|
37
|
+
- Keep grep-friendly: include exact field names/values that matter
|
|
20
38
|
|
|
21
39
|
- name: "kubectl_get_by_name"
|
|
22
40
|
description: "Run `kubectl get <kind> <name> --show-labels`"
|
|
@@ -25,10 +43,36 @@ toolsets:
|
|
|
25
43
|
- name: "kubectl_get_by_kind_in_namespace"
|
|
26
44
|
description: "Run `kubectl get <kind> -n <namespace> --show-labels` to get all resources of a given type in namespace"
|
|
27
45
|
command: "kubectl get --show-labels -o wide {{ kind }} -n {{namespace}}"
|
|
46
|
+
transformers:
|
|
47
|
+
- name: llm_summarize
|
|
48
|
+
config:
|
|
49
|
+
input_threshold: 1000
|
|
50
|
+
prompt: |
|
|
51
|
+
Summarize this kubectl output focusing on:
|
|
52
|
+
- What needs attention or immediate action
|
|
53
|
+
- Group similar resources into aggregate descriptions
|
|
54
|
+
- Make sure to mention outliers, errors, and non-standard states
|
|
55
|
+
- List healthy resources as aggregate descriptions
|
|
56
|
+
- When listing unhealthy resources, also try to use aggregate descriptions when possible
|
|
57
|
+
- When possible, mention exact keywords so the user can rerun the command with | grep <keyword> and drill down
|
|
58
|
+
- Be concise and avoid expansion: target ≤ 50% of input size; prefer counts + outliers over full listings
|
|
28
59
|
|
|
29
60
|
- name: "kubectl_get_by_kind_in_cluster"
|
|
30
61
|
description: "Run `kubectl get -A <kind> --show-labels` to get all resources of a given type in the cluster"
|
|
31
62
|
command: "kubectl get -A --show-labels -o wide {{ kind }}"
|
|
63
|
+
transformers:
|
|
64
|
+
- name: llm_summarize
|
|
65
|
+
config:
|
|
66
|
+
input_threshold: 1000
|
|
67
|
+
prompt: |
|
|
68
|
+
Summarize this kubectl output focusing on:
|
|
69
|
+
- What needs attention or immediate action
|
|
70
|
+
- Group similar resources into a single line and description
|
|
71
|
+
- Make sure to mention outliers, errors, and non-standard states
|
|
72
|
+
- List healthy resources as aggregate descriptions
|
|
73
|
+
- When listing unhealthy resources, also try to use aggregate descriptions when possible
|
|
74
|
+
- When possible, mention exact keywords so the user can rerun the command with | grep <keyword> and drill down on the parts they care about
|
|
75
|
+
- Strive for ≤ 50% of the original size; keep results compact and grep-friendly (one line per aggregate)
|
|
32
76
|
|
|
33
77
|
- name: "kubectl_find_resource"
|
|
34
78
|
description: "Run `kubectl get {{ kind }} -A --show-labels | grep {{ keyword }}` to find a resource where you know a substring of the name, IP, namespace, or labels"
|
|
@@ -42,95 +86,25 @@ toolsets:
|
|
|
42
86
|
description: "Retrieve the events for a specific Kubernetes resource. `resource_type` can be any kubernetes resource type: 'pod', 'service', 'deployment', 'job', 'node', etc."
|
|
43
87
|
command: "kubectl events --for {{resource_type}}/{{ resource_name }}{% if namespace %} -n {{ namespace }}{% endif %}"
|
|
44
88
|
|
|
45
|
-
- name: "kubectl_memory_requests_all_namespaces"
|
|
46
|
-
description: "Fetch and display memory requests for all pods across all namespaces in MiB, summing requests across multiple containers where applicable and handling binary, decimal, and millibyte units correctly."
|
|
47
|
-
command: |
|
|
48
|
-
kubectl get pods --all-namespaces -o custom-columns="NAMESPACE:.metadata.namespace,NAME:.metadata.name,MEMORY_REQUEST:.spec.containers[*].resources.requests.memory" --no-headers | \
|
|
49
|
-
awk '
|
|
50
|
-
function convert_to_mib(value) {
|
|
51
|
-
if (value ~ /^[0-9]+e[0-9]+$/) return (value + 0) / (1024 * 1024); # Scientific notation
|
|
52
|
-
if (value ~ /m$/) return (value + 0) / (1024^2 * 1000); # Millibytes (m)
|
|
53
|
-
if (value ~ /Ei$/) return (value + 0) * 1024^6 / (1024^2); # Binary units
|
|
54
|
-
if (value ~ /Pi$/) return (value + 0) * 1024^5 / (1024^2);
|
|
55
|
-
if (value ~ /Ti$/) return (value + 0) * 1024^4 / (1024^2);
|
|
56
|
-
if (value ~ /Gi$/) return (value + 0) * 1024^3 / (1024^2);
|
|
57
|
-
if (value ~ /Mi$/) return (value + 0);
|
|
58
|
-
if (value ~ /Ki$/) return (value + 0) / 1024;
|
|
59
|
-
if (value ~ /E$/) return (value + 0) * 1000^6 / (1024^2); # Decimal units
|
|
60
|
-
if (value ~ /P$/) return (value + 0) * 1000^5 / (1024^2);
|
|
61
|
-
if (value ~ /T$/) return (value + 0) * 1000^4 / (1024^2);
|
|
62
|
-
if (value ~ /G$/) return (value + 0) * 1000^3 / (1024^2);
|
|
63
|
-
if (value ~ /M$/) return (value + 0) * 1000^2 / (1024^2);
|
|
64
|
-
if (value ~ /k$/) return (value + 0) * 1000 / (1024^2);
|
|
65
|
-
return (value + 0) / (1024 * 1024); # Default: bytes
|
|
66
|
-
}
|
|
67
|
-
function sum_memory(requests) {
|
|
68
|
-
gsub(/^[ \t]+|[ \t]+$/, "", requests);
|
|
69
|
-
if (requests == "" || requests == "<none>") return 0;
|
|
70
|
-
split(requests, arr, ",");
|
|
71
|
-
total = 0;
|
|
72
|
-
for (i in arr) {
|
|
73
|
-
if (arr[i] != "<none>") total += convert_to_mib(arr[i]);
|
|
74
|
-
}
|
|
75
|
-
return total;
|
|
76
|
-
}
|
|
77
|
-
{
|
|
78
|
-
namespace = $1;
|
|
79
|
-
name = $2;
|
|
80
|
-
requests = $3;
|
|
81
|
-
for (i=4; i<=NF; i++) {
|
|
82
|
-
requests = requests " " $i;
|
|
83
|
-
}
|
|
84
|
-
print namespace, name, sum_memory(requests) " Mi";
|
|
85
|
-
}' | sort -k3 -nr
|
|
86
|
-
|
|
87
|
-
- name: "kubectl_memory_requests_namespace"
|
|
88
|
-
description: "Fetch and display memory requests for all pods in a specified namespace in MiB, summing requests across multiple containers where applicable and handling binary, decimal, and millibyte units correctly."
|
|
89
|
-
command: |
|
|
90
|
-
kubectl get pods -n {{ namespace }} -o custom-columns="NAMESPACE:.metadata.namespace,NAME:.metadata.name,MEMORY_REQUEST:.spec.containers[*].resources.requests.memory" --no-headers | \
|
|
91
|
-
awk '
|
|
92
|
-
function convert_to_mib(value) {
|
|
93
|
-
if (value ~ /^[0-9]+e[0-9]+$/) return (value + 0) / (1024 * 1024); # Scientific notation
|
|
94
|
-
if (value ~ /m$/) return (value + 0) / (1024^2 * 1000); # Millibytes (m)
|
|
95
|
-
if (value ~ /Ei$/) return (value + 0) * 1024^6 / (1024^2); # Binary units
|
|
96
|
-
if (value ~ /Pi$/) return (value + 0) * 1024^5 / (1024^2);
|
|
97
|
-
if (value ~ /Ti$/) return (value + 0) * 1024^4 / (1024^2);
|
|
98
|
-
if (value ~ /Gi$/) return (value + 0) * 1024^3 / (1024^2);
|
|
99
|
-
if (value ~ /Mi$/) return (value + 0);
|
|
100
|
-
if (value ~ /Ki$/) return (value + 0) / 1024;
|
|
101
|
-
if (value ~ /E$/) return (value + 0) * 1000^6 / (1024^2); # Decimal units
|
|
102
|
-
if (value ~ /P$/) return (value + 0) * 1000^5 / (1024^2);
|
|
103
|
-
if (value ~ /T$/) return (value + 0) * 1000^4 / (1024^2);
|
|
104
|
-
if (value ~ /G$/) return (value + 0) * 1000^3 / (1024^2);
|
|
105
|
-
if (value ~ /M$/) return (value + 0) * 1000^2 / (1024^2);
|
|
106
|
-
if (value ~ /k$/) return (value + 0) * 1000 / (1024^2);
|
|
107
|
-
return (value + 0) / (1024 * 1024); # Default: bytes
|
|
108
|
-
}
|
|
109
|
-
function sum_memory(requests) {
|
|
110
|
-
gsub(/^[ \t]+|[ \t]+$/, "", requests);
|
|
111
|
-
if (requests == "" || requests == "<none>") return 0;
|
|
112
|
-
split(requests, arr, ",");
|
|
113
|
-
total = 0;
|
|
114
|
-
for (i in arr) {
|
|
115
|
-
if (arr[i] != "<none>") total += convert_to_mib(arr[i]);
|
|
116
|
-
}
|
|
117
|
-
return total;
|
|
118
|
-
}
|
|
119
|
-
{
|
|
120
|
-
namespace = $1;
|
|
121
|
-
name = $2;
|
|
122
|
-
requests = $3;
|
|
123
|
-
for (i=4; i<=NF; i++) {
|
|
124
|
-
requests = requests " " $i;
|
|
125
|
-
}
|
|
126
|
-
print namespace, name, sum_memory(requests) " Mi";
|
|
127
|
-
}' | sort -k3 -nr
|
|
128
|
-
|
|
129
89
|
- name: "kubernetes_jq_query"
|
|
130
90
|
user_description: "Query Kubernetes Resources: kubectl get {{kind}} --all-namespaces -o json | jq -r {{jq_expr}}"
|
|
131
91
|
description: >
|
|
132
92
|
Use kubectl to get json for all resources of a specific kind pipe the results to jq to filter them. Do not worry about escaping the jq_expr it will be done by the system on an unescaped expression that you give. e.g. give an expression like .items[] | .spec.containers[].image | select(test("^gcr.io/") | not)
|
|
133
93
|
command: kubectl get {{ kind }} --all-namespaces -o json | jq -r {{ jq_expr }}
|
|
94
|
+
transformers:
|
|
95
|
+
- name: llm_summarize
|
|
96
|
+
config:
|
|
97
|
+
input_threshold: 1000
|
|
98
|
+
prompt: |
|
|
99
|
+
Summarize this jq query output focusing on:
|
|
100
|
+
- Key patterns and commonalities in the data
|
|
101
|
+
- Notable outliers, anomalies, or items that need attention
|
|
102
|
+
- Group similar results into aggregate descriptions when possible
|
|
103
|
+
- Highlight any empty results, null values, or missing data
|
|
104
|
+
- When applicable, mention specific resource names, namespaces, or values that stand out
|
|
105
|
+
- Organize findings in a structured way that helps with troubleshooting
|
|
106
|
+
- Be concise: aim for ≤ 50% of the original text; prioritize aggregates and actionable outliers
|
|
107
|
+
- Include grep-ready keys/values; avoid repeating entire objects or unchanged defaults
|
|
134
108
|
|
|
135
109
|
- name: "kubernetes_count"
|
|
136
110
|
user_description: "Count Kubernetes Resources: kubectl get {{kind}} --all-namespaces -o json | jq -c -r {{ jq_expr }}"
|