kubeagent-cli 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- kubeagent/__init__.py +4 -0
- kubeagent/__main__.py +6 -0
- kubeagent/agent/__init__.py +0 -0
- kubeagent/agent/agent.py +615 -0
- kubeagent/agent/deps.py +17 -0
- kubeagent/agent/memory.py +171 -0
- kubeagent/agent/model.py +83 -0
- kubeagent/agent/policy.py +212 -0
- kubeagent/agent/prompt_engine.py +133 -0
- kubeagent/agent/prompts.py +22 -0
- kubeagent/agent/subagent.py +359 -0
- kubeagent/cli/__init__.py +0 -0
- kubeagent/cli/headless.py +122 -0
- kubeagent/cli/main.py +176 -0
- kubeagent/cli/output.py +151 -0
- kubeagent/cli/repl.py +295 -0
- kubeagent/cli/setup_wizard.py +243 -0
- kubeagent/config/__init__.py +0 -0
- kubeagent/config/settings.py +110 -0
- kubeagent/hooks/__init__.py +5 -0
- kubeagent/hooks/engine.py +224 -0
- kubeagent/infra/__init__.py +0 -0
- kubeagent/infra/cluster.py +175 -0
- kubeagent/infra/executor.py +726 -0
- kubeagent/infra/kubectl.py +152 -0
- kubeagent/infra/model_router.py +312 -0
- kubeagent/infra/storage.py +78 -0
- kubeagent/mcp/__init__.py +5 -0
- kubeagent/mcp/cli.py +115 -0
- kubeagent/mcp/ecosystem/__init__.py +1 -0
- kubeagent/mcp/ecosystem/argocd.py +143 -0
- kubeagent/mcp/ecosystem/grafana.py +132 -0
- kubeagent/mcp/ecosystem/helm.py +207 -0
- kubeagent/mcp/ecosystem/istio.py +94 -0
- kubeagent/mcp/ecosystem/prometheus.py +137 -0
- kubeagent/mcp/server.py +192 -0
- kubeagent/plugins/__init__.py +10 -0
- kubeagent/plugins/cli.py +243 -0
- kubeagent/plugins/interface.py +101 -0
- kubeagent/plugins/manager.py +208 -0
- kubeagent/plugins/sandbox.py +107 -0
- kubeagent/plugins/user_tools.py +164 -0
- kubeagent/skills/__init__.py +6 -0
- kubeagent/skills/base.py +47 -0
- kubeagent/skills/builtin/__init__.py +8 -0
- kubeagent/skills/builtin/deploy.py +86 -0
- kubeagent/skills/builtin/diagnose.py +45 -0
- kubeagent/skills/builtin/rollback.py +61 -0
- kubeagent/skills/builtin/security_audit.py +46 -0
- kubeagent/skills/loader.py +92 -0
- kubeagent/skills/registry.py +66 -0
- kubeagent/tools/__init__.py +0 -0
- kubeagent/tools/base.py +31 -0
- kubeagent/tools/builtin/__init__.py +0 -0
- kubeagent/tools/builtin/apply.py +23 -0
- kubeagent/tools/builtin/configmaps.py +16 -0
- kubeagent/tools/builtin/delete.py +24 -0
- kubeagent/tools/builtin/describe.py +23 -0
- kubeagent/tools/builtin/events.py +32 -0
- kubeagent/tools/builtin/kubectl.py +76 -0
- kubeagent/tools/builtin/logs.py +33 -0
- kubeagent/tools/builtin/namespaces.py +16 -0
- kubeagent/tools/builtin/nodes.py +27 -0
- kubeagent/tools/builtin/nodes_ops.py +40 -0
- kubeagent/tools/builtin/pods.py +33 -0
- kubeagent/tools/builtin/restart.py +22 -0
- kubeagent/tools/builtin/scale.py +27 -0
- kubeagent/tools/builtin/services.py +27 -0
- kubeagent/tools/registry.py +100 -0
- kubeagent_cli-1.0.0.dist-info/METADATA +114 -0
- kubeagent_cli-1.0.0.dist-info/RECORD +74 -0
- kubeagent_cli-1.0.0.dist-info/WHEEL +4 -0
- kubeagent_cli-1.0.0.dist-info/entry_points.txt +2 -0
- kubeagent_cli-1.0.0.dist-info/licenses/LICENSE +21 -0
kubeagent/__init__.py
ADDED
kubeagent/__main__.py
ADDED
|
File without changes
|
kubeagent/agent/agent.py
ADDED
|
@@ -0,0 +1,615 @@
|
|
|
1
|
+
"""KubeAgent — Pydantic AI agent with Kubernetes tools."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import inspect
|
|
6
|
+
import json
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
from pydantic import BaseModel, Field
|
|
10
|
+
from pydantic_ai import Agent, RunContext
|
|
11
|
+
|
|
12
|
+
from kubeagent.agent.deps import KubeAgentDeps
|
|
13
|
+
from kubeagent.agent.model import get_agent_model
|
|
14
|
+
from kubeagent.agent.policy import PolicyDecision, build_impact_description, check_policy
|
|
15
|
+
from kubeagent.agent.prompts import SYSTEM_PROMPT
|
|
16
|
+
from kubeagent.config.settings import KubeAgentConfig, load_config
|
|
17
|
+
from kubeagent.infra.executor import PythonClientExecutor, SecurityLevel
|
|
18
|
+
from kubeagent.tools.registry import get_registry
|
|
19
|
+
|
|
20
|
+
# ---------------------------------------------------------------------------
|
|
21
|
+
# Input models for each tool (used for pydantic-ai tool schema generation)
|
|
22
|
+
# ---------------------------------------------------------------------------
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class GetPodsInput(BaseModel):
|
|
26
|
+
"""Input for get_pods."""
|
|
27
|
+
|
|
28
|
+
namespace: str = Field(default="", description="Namespace to query. Empty = all namespaces.")
|
|
29
|
+
label_selector: dict[str, str] | None = Field(default=None, description="Filter by labels.")
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class GetNodesInput(BaseModel):
|
|
33
|
+
"""Input for get_nodes."""
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
class GetNamespacesInput(BaseModel):
|
|
37
|
+
"""Input for get_namespaces."""
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
class GetServicesInput(BaseModel):
|
|
41
|
+
"""Input for get_services."""
|
|
42
|
+
|
|
43
|
+
namespace: str = Field(default="default", description="Namespace to query.")
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
class GetConfigMapsInput(BaseModel):
|
|
47
|
+
"""Input for get_configmaps."""
|
|
48
|
+
|
|
49
|
+
namespace: str = Field(default="default", description="Namespace to query.")
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
class DescribeResourceInput(BaseModel):
|
|
53
|
+
"""Input for describe_resource."""
|
|
54
|
+
|
|
55
|
+
kind: str = Field(description="Resource kind: pod, node, service, configmap, deployment, etc.")
|
|
56
|
+
name: str = Field(description="Resource name.")
|
|
57
|
+
namespace: str = Field(default="default", description="Namespace.")
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
class GetEventsInput(BaseModel):
|
|
61
|
+
"""Input for get_events."""
|
|
62
|
+
|
|
63
|
+
namespace: str = Field(default="", description="Namespace. Empty = all namespaces.")
|
|
64
|
+
field_selector: str = Field(default="", description="Kubernetes field selector filter.")
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
class GetPodLogsInput(BaseModel):
|
|
68
|
+
"""Input for get_pod_logs."""
|
|
69
|
+
|
|
70
|
+
name: str = Field(description="Pod name.")
|
|
71
|
+
namespace: str = Field(default="default", description="Namespace.")
|
|
72
|
+
container: str | None = Field(default=None, description="Container name.")
|
|
73
|
+
tail_lines: int = Field(default=100, description="Number of log lines to fetch.")
|
|
74
|
+
|
|
75
|
+
|
|
76
|
+
class ApplyYamlInput(BaseModel):
|
|
77
|
+
"""Input for apply_yaml."""
|
|
78
|
+
|
|
79
|
+
yaml_content: str = Field(description="YAML content for the resource(s) to apply.")
|
|
80
|
+
namespace: str = Field(default="default", description="Target namespace.")
|
|
81
|
+
dry_run: bool = Field(default=False, description="Preview changes without applying.")
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
class DeleteResourceInput(BaseModel):
|
|
85
|
+
"""Input for delete_resource."""
|
|
86
|
+
|
|
87
|
+
kind: str = Field(description="Resource kind.")
|
|
88
|
+
name: str = Field(description="Resource name.")
|
|
89
|
+
namespace: str = Field(default="default", description="Namespace.")
|
|
90
|
+
dry_run: bool = Field(default=False, description="Preview deletion.")
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
class ScaleResourceInput(BaseModel):
|
|
94
|
+
"""Input for scale_resource."""
|
|
95
|
+
|
|
96
|
+
kind: str = Field(description="Kind (deployment or statefulset).")
|
|
97
|
+
name: str = Field(description="Resource name.")
|
|
98
|
+
namespace: str = Field(default="default", description="Namespace.")
|
|
99
|
+
replicas: int = Field(description="Target replica count.")
|
|
100
|
+
dry_run: bool = Field(default=False, description="Preview scaling.")
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
class RestartPodInput(BaseModel):
|
|
104
|
+
"""Input for restart_pod."""
|
|
105
|
+
|
|
106
|
+
name: str = Field(description="Pod name.")
|
|
107
|
+
namespace: str = Field(default="default", description="Namespace.")
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
class CordonNodeInput(BaseModel):
|
|
111
|
+
"""Input for cordon_node."""
|
|
112
|
+
|
|
113
|
+
name: str = Field(description="Node name.")
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
class UncordonNodeInput(BaseModel):
|
|
117
|
+
"""Input for uncordon_node."""
|
|
118
|
+
|
|
119
|
+
name: str = Field(description="Node name.")
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
class DrainNodeInput(BaseModel):
|
|
123
|
+
"""Input for drain_node."""
|
|
124
|
+
|
|
125
|
+
name: str = Field(description="Node name.")
|
|
126
|
+
force: bool = Field(default=False, description="Force delete non-daemonset pods.")
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
class KubectlExecInput(BaseModel):
|
|
130
|
+
"""Input for kubectl_exec."""
|
|
131
|
+
|
|
132
|
+
pod: str = Field(description="Pod name.")
|
|
133
|
+
namespace: str = Field(default="default", description="Namespace.")
|
|
134
|
+
container: str | None = Field(default=None, description="Container name.")
|
|
135
|
+
command: list[str] = Field(
|
|
136
|
+
default_factory=lambda: ["/bin/sh"],
|
|
137
|
+
description="Command to execute as a list of strings.",
|
|
138
|
+
)
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
class KubectlTopInput(BaseModel):
|
|
142
|
+
"""Input for kubectl_top."""
|
|
143
|
+
|
|
144
|
+
resource: str = Field(default="pods", description="Resource type: pods or nodes.")
|
|
145
|
+
namespace: str = Field(default="", description="Namespace filter.")
|
|
146
|
+
selector: str = Field(default="", description="Label selector filter.")
|
|
147
|
+
|
|
148
|
+
|
|
149
|
+
class KubectlApplyFileInput(BaseModel):
|
|
150
|
+
"""Input for kubectl_apply_file."""
|
|
151
|
+
|
|
152
|
+
file_path: str = Field(description="Path to the YAML manifest file.")
|
|
153
|
+
namespace: str = Field(default="default", description="Target namespace.")
|
|
154
|
+
|
|
155
|
+
|
|
156
|
+
class DiagnoseIssueInput(BaseModel):
|
|
157
|
+
"""Input for diagnose_issue — parallel SubAgent diagnostic."""
|
|
158
|
+
|
|
159
|
+
query: str = Field(
|
|
160
|
+
description="The diagnostic question (e.g., 'Why is the payment-service crashing?')."
|
|
161
|
+
)
|
|
162
|
+
namespace: str = Field(default="default", description="Target namespace.")
|
|
163
|
+
|
|
164
|
+
|
|
165
|
+
# ---------------------------------------------------------------------------
|
|
166
|
+
# Agent creation
|
|
167
|
+
# ---------------------------------------------------------------------------
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
def create_agent(
|
|
171
|
+
config: KubeAgentConfig | None = None,
|
|
172
|
+
system_prompt: str | None = None,
|
|
173
|
+
) -> Agent[KubeAgentDeps, str]:
|
|
174
|
+
"""Create and configure the KubeAgent with all tools bound."""
|
|
175
|
+
if config is None:
|
|
176
|
+
config = load_config()
|
|
177
|
+
|
|
178
|
+
model = get_agent_model(config.model)
|
|
179
|
+
prompt = system_prompt or SYSTEM_PROMPT
|
|
180
|
+
|
|
181
|
+
agent: Agent[KubeAgentDeps, str] = Agent(
|
|
182
|
+
model=model,
|
|
183
|
+
system_prompt=prompt,
|
|
184
|
+
deps_type=KubeAgentDeps,
|
|
185
|
+
output_type=str,
|
|
186
|
+
retries=2,
|
|
187
|
+
defer_model_check=True,
|
|
188
|
+
)
|
|
189
|
+
|
|
190
|
+
_register_read_tools(agent)
|
|
191
|
+
_register_write_tools(agent)
|
|
192
|
+
_register_kubectl_tools(agent)
|
|
193
|
+
|
|
194
|
+
return agent
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
# ---------------------------------------------------------------------------
|
|
198
|
+
# Tool registration helpers
|
|
199
|
+
# ---------------------------------------------------------------------------
|
|
200
|
+
|
|
201
|
+
|
|
202
|
+
def _get_executor(ctx: RunContext[KubeAgentDeps]) -> PythonClientExecutor:
|
|
203
|
+
"""Create an executor from the current context."""
|
|
204
|
+
return PythonClientExecutor(kubeconfig_path=ctx.deps.config.cluster.kubeconfig)
|
|
205
|
+
|
|
206
|
+
|
|
207
|
+
def _format_result(result: Any) -> str:
|
|
208
|
+
"""Format a tool result for LLM consumption."""
|
|
209
|
+
if isinstance(result, str):
|
|
210
|
+
return result
|
|
211
|
+
if isinstance(result, list):
|
|
212
|
+
if not result:
|
|
213
|
+
return "No results found."
|
|
214
|
+
lines: list[str] = []
|
|
215
|
+
for i, item in enumerate(result):
|
|
216
|
+
if isinstance(item, dict):
|
|
217
|
+
parts = [f"{k}={v}" for k, v in item.items() if v is not None]
|
|
218
|
+
lines.append(f" {i + 1}. {' | '.join(parts)}")
|
|
219
|
+
else:
|
|
220
|
+
lines.append(f" {i + 1}. {item}")
|
|
221
|
+
return "\n".join(lines)
|
|
222
|
+
if isinstance(result, dict):
|
|
223
|
+
lines = []
|
|
224
|
+
for k, v in result.items():
|
|
225
|
+
if isinstance(v, list) and v and isinstance(v[0], dict):
|
|
226
|
+
lines.append(f"{k}:")
|
|
227
|
+
for item in v:
|
|
228
|
+
parts = [f"{ik}={iv}" for ik, iv in item.items() if iv is not None]
|
|
229
|
+
lines.append(f" - {' | '.join(parts)}")
|
|
230
|
+
elif isinstance(v, list):
|
|
231
|
+
lines.append(f"{k}: {', '.join(str(x) for x in v)}")
|
|
232
|
+
else:
|
|
233
|
+
lines.append(f"{k}: {v}")
|
|
234
|
+
return "\n".join(lines)
|
|
235
|
+
return str(result)
|
|
236
|
+
|
|
237
|
+
|
|
238
|
+
def _call_tool(tool_class: type, ctx: RunContext[KubeAgentDeps], **kwargs: Any) -> str:
|
|
239
|
+
"""Execute a tool with policy check and format the result."""
|
|
240
|
+
|
|
241
|
+
tool = tool_class()
|
|
242
|
+
deps = ctx.deps
|
|
243
|
+
|
|
244
|
+
# --- Policy gate ---
|
|
245
|
+
if tool.security_level != SecurityLevel.SAFE:
|
|
246
|
+
registry = get_registry()
|
|
247
|
+
decision = check_policy(
|
|
248
|
+
tool.name,
|
|
249
|
+
registry,
|
|
250
|
+
args=kwargs,
|
|
251
|
+
auto_approve=deps.auto_approve,
|
|
252
|
+
dry_run=deps.dry_run,
|
|
253
|
+
)
|
|
254
|
+
if decision == PolicyDecision.DENY:
|
|
255
|
+
return f"DENIED: Tool '{tool.name}' is not permitted."
|
|
256
|
+
if decision == PolicyDecision.CONFIRM:
|
|
257
|
+
impact = build_impact_description(tool.name, kwargs or {}, registry)
|
|
258
|
+
return (
|
|
259
|
+
f"CONFIRMATION REQUIRED: {impact} "
|
|
260
|
+
"The user must confirm this operation before it can proceed. "
|
|
261
|
+
"Ask the user to confirm, or suggest they use /yes to enable auto-approve mode."
|
|
262
|
+
)
|
|
263
|
+
|
|
264
|
+
# --- Dry-run override ---
|
|
265
|
+
if deps.dry_run and "dry_run" in inspect.signature(tool.execute).parameters:
|
|
266
|
+
kwargs["dry_run"] = True
|
|
267
|
+
|
|
268
|
+
try:
|
|
269
|
+
executor = _get_executor(ctx)
|
|
270
|
+
result = tool.execute(executor, **kwargs)
|
|
271
|
+
formatted = _format_result(result)
|
|
272
|
+
|
|
273
|
+
# --- Audit logging (non-SAFE operations only) ---
|
|
274
|
+
if tool.security_level != SecurityLevel.SAFE and deps.memory is not None:
|
|
275
|
+
deps.memory.audit.log(
|
|
276
|
+
cluster=None,
|
|
277
|
+
namespace=kwargs.get("namespace"),
|
|
278
|
+
tool_name=tool.name,
|
|
279
|
+
args=kwargs,
|
|
280
|
+
result=formatted[:200],
|
|
281
|
+
success=True,
|
|
282
|
+
)
|
|
283
|
+
|
|
284
|
+
if deps.dry_run:
|
|
285
|
+
return f"[DRY-RUN] {formatted}"
|
|
286
|
+
return formatted
|
|
287
|
+
except ConnectionError as e:
|
|
288
|
+
_audit_failure(tool, deps, kwargs, f"Cannot connect to cluster — {e}")
|
|
289
|
+
return f"Error: Cannot connect to cluster — {e}"
|
|
290
|
+
except RuntimeError as e:
|
|
291
|
+
_audit_failure(tool, deps, kwargs, str(e))
|
|
292
|
+
return f"Error: {e}"
|
|
293
|
+
except Exception as e:
|
|
294
|
+
_audit_failure(tool, deps, kwargs, str(e))
|
|
295
|
+
return f"Unexpected error: {e}"
|
|
296
|
+
|
|
297
|
+
|
|
298
|
+
def _audit_failure(tool: Any, deps: Any, kwargs: dict, error: str) -> None:
|
|
299
|
+
"""Log a failed tool execution to audit."""
|
|
300
|
+
if tool.security_level != SecurityLevel.SAFE and deps.memory is not None:
|
|
301
|
+
deps.memory.audit.log(
|
|
302
|
+
cluster=None,
|
|
303
|
+
namespace=kwargs.get("namespace"),
|
|
304
|
+
tool_name=tool.name,
|
|
305
|
+
args=kwargs,
|
|
306
|
+
result=error[:200],
|
|
307
|
+
success=False,
|
|
308
|
+
)
|
|
309
|
+
|
|
310
|
+
|
|
311
|
+
# ---------------------------------------------------------------------------
|
|
312
|
+
# Read tools
|
|
313
|
+
# ---------------------------------------------------------------------------
|
|
314
|
+
|
|
315
|
+
|
|
316
|
+
def _register_read_tools(agent: Agent[KubeAgentDeps, str]) -> None:
|
|
317
|
+
"""Register all read-only tools."""
|
|
318
|
+
from kubeagent.tools.builtin.configmaps import GetConfigMapsTool
|
|
319
|
+
from kubeagent.tools.builtin.describe import DescribeResourceTool
|
|
320
|
+
from kubeagent.tools.builtin.events import GetEventsTool
|
|
321
|
+
from kubeagent.tools.builtin.logs import GetPodLogsTool
|
|
322
|
+
from kubeagent.tools.builtin.namespaces import GetNamespacesTool
|
|
323
|
+
from kubeagent.tools.builtin.nodes import GetNodesTool
|
|
324
|
+
from kubeagent.tools.builtin.pods import GetPodsTool
|
|
325
|
+
from kubeagent.tools.builtin.services import GetServicesTool
|
|
326
|
+
|
|
327
|
+
@agent.tool(retries=1)
|
|
328
|
+
async def get_pods(ctx: RunContext[KubeAgentDeps], input_data: GetPodsInput) -> str:
|
|
329
|
+
"""List pods with status, ready count, restarts, node, and age."""
|
|
330
|
+
labels = input_data.label_selector
|
|
331
|
+
if labels and isinstance(labels, str):
|
|
332
|
+
# Tool input may come as JSON string
|
|
333
|
+
labels = json.loads(labels)
|
|
334
|
+
return _call_tool(GetPodsTool, ctx, namespace=input_data.namespace, label_selector=labels)
|
|
335
|
+
|
|
336
|
+
@agent.tool(retries=1)
|
|
337
|
+
async def get_nodes(ctx: RunContext[KubeAgentDeps]) -> str:
|
|
338
|
+
"""List all nodes with status, roles, version, and allocatable resources."""
|
|
339
|
+
return _call_tool(GetNodesTool, ctx)
|
|
340
|
+
|
|
341
|
+
@agent.tool(retries=1)
|
|
342
|
+
async def get_namespaces(ctx: RunContext[KubeAgentDeps]) -> str:
|
|
343
|
+
"""List all namespaces in the cluster."""
|
|
344
|
+
return _call_tool(GetNamespacesTool, ctx)
|
|
345
|
+
|
|
346
|
+
@agent.tool(retries=1)
|
|
347
|
+
async def get_services(ctx: RunContext[KubeAgentDeps], input_data: GetServicesInput) -> str:
|
|
348
|
+
"""List services in a namespace with type, cluster IP, and ports."""
|
|
349
|
+
return _call_tool(GetServicesTool, ctx, namespace=input_data.namespace)
|
|
350
|
+
|
|
351
|
+
@agent.tool(retries=1)
|
|
352
|
+
async def get_configmaps(ctx: RunContext[KubeAgentDeps], input_data: GetConfigMapsInput) -> str:
|
|
353
|
+
"""List configmaps in a namespace."""
|
|
354
|
+
return _call_tool(GetConfigMapsTool, ctx, namespace=input_data.namespace)
|
|
355
|
+
|
|
356
|
+
@agent.tool(retries=1)
|
|
357
|
+
async def describe_resource(
|
|
358
|
+
ctx: RunContext[KubeAgentDeps], input_data: DescribeResourceInput
|
|
359
|
+
) -> str:
|
|
360
|
+
"""Get full details of a Kubernetes resource by kind and name."""
|
|
361
|
+
return _call_tool(
|
|
362
|
+
DescribeResourceTool,
|
|
363
|
+
ctx,
|
|
364
|
+
kind=input_data.kind,
|
|
365
|
+
name=input_data.name,
|
|
366
|
+
namespace=input_data.namespace,
|
|
367
|
+
)
|
|
368
|
+
|
|
369
|
+
@agent.tool(retries=1)
|
|
370
|
+
async def get_events(ctx: RunContext[KubeAgentDeps], input_data: GetEventsInput) -> str:
|
|
371
|
+
"""Get recent events in a namespace or across the cluster."""
|
|
372
|
+
return _call_tool(
|
|
373
|
+
GetEventsTool,
|
|
374
|
+
ctx,
|
|
375
|
+
namespace=input_data.namespace,
|
|
376
|
+
field_selector=input_data.field_selector,
|
|
377
|
+
)
|
|
378
|
+
|
|
379
|
+
@agent.tool(retries=1)
|
|
380
|
+
async def get_pod_logs(ctx: RunContext[KubeAgentDeps], input_data: GetPodLogsInput) -> str:
|
|
381
|
+
"""Fetch logs from a pod container."""
|
|
382
|
+
return _call_tool(
|
|
383
|
+
GetPodLogsTool,
|
|
384
|
+
ctx,
|
|
385
|
+
name=input_data.name,
|
|
386
|
+
namespace=input_data.namespace,
|
|
387
|
+
container=input_data.container,
|
|
388
|
+
tail_lines=input_data.tail_lines,
|
|
389
|
+
)
|
|
390
|
+
|
|
391
|
+
@agent.tool(retries=1)
|
|
392
|
+
async def diagnose_issue(ctx: RunContext[KubeAgentDeps], input_data: DiagnoseIssueInput) -> str:
|
|
393
|
+
"""Diagnose a complex cluster issue using parallel SubAgents.
|
|
394
|
+
|
|
395
|
+
Creates specialized SubAgents for different diagnostic angles:
|
|
396
|
+
- Pod status and recent events
|
|
397
|
+
- Logs and crash information
|
|
398
|
+
- Resource usage and scheduling
|
|
399
|
+
|
|
400
|
+
Use this for multi-faceted questions like 'Why is X crashing?'
|
|
401
|
+
or 'Diagnose the payment service'.
|
|
402
|
+
"""
|
|
403
|
+
from kubeagent.agent.subagent import SubAgentConfig, SubAgentDispatcher, SubAgentFactory
|
|
404
|
+
from kubeagent.infra.model_router import get_router
|
|
405
|
+
|
|
406
|
+
router = get_router(ctx.deps.config.model)
|
|
407
|
+
subagent_model = router.select_model_for_subagent(input_data.query, [])
|
|
408
|
+
factory = SubAgentFactory(config=ctx.deps.config.model)
|
|
409
|
+
dispatcher = SubAgentDispatcher(factory)
|
|
410
|
+
|
|
411
|
+
# Use router-selected model for all SubAgents
|
|
412
|
+
subagents = [
|
|
413
|
+
SubAgentConfig(
|
|
414
|
+
task=f"Check pod status and events in namespace {input_data.namespace}",
|
|
415
|
+
tools=["get_pods", "get_events"],
|
|
416
|
+
model=subagent_model,
|
|
417
|
+
context={"namespace": input_data.namespace},
|
|
418
|
+
),
|
|
419
|
+
SubAgentConfig(
|
|
420
|
+
task=f"Check node status for pods in namespace {input_data.namespace}",
|
|
421
|
+
tools=["get_nodes", "get_pods"],
|
|
422
|
+
model=subagent_model,
|
|
423
|
+
context={"namespace": input_data.namespace},
|
|
424
|
+
),
|
|
425
|
+
]
|
|
426
|
+
|
|
427
|
+
results = await dispatcher.dispatch(subagents, timeout=60)
|
|
428
|
+
synthesis = dispatcher.synthesize(results, main_task=input_data.query)
|
|
429
|
+
return synthesis
|
|
430
|
+
dispatcher = SubAgentDispatcher(factory)
|
|
431
|
+
|
|
432
|
+
subagents = [
|
|
433
|
+
SubAgentConfig(
|
|
434
|
+
task=f"Check pod status and events in namespace {input_data.namespace}",
|
|
435
|
+
tools=["get_pods", "get_events"],
|
|
436
|
+
context={"namespace": input_data.namespace},
|
|
437
|
+
),
|
|
438
|
+
SubAgentConfig(
|
|
439
|
+
task=f"Check node status for pods in namespace {input_data.namespace}",
|
|
440
|
+
tools=["get_nodes", "get_pods"],
|
|
441
|
+
context={"namespace": input_data.namespace},
|
|
442
|
+
),
|
|
443
|
+
]
|
|
444
|
+
|
|
445
|
+
results = await dispatcher.dispatch(subagents, timeout=60)
|
|
446
|
+
synthesis = dispatcher.synthesize(results, main_task=input_data.query)
|
|
447
|
+
return synthesis
|
|
448
|
+
|
|
449
|
+
|
|
450
|
+
# ---------------------------------------------------------------------------
|
|
451
|
+
# Write tools
|
|
452
|
+
# ---------------------------------------------------------------------------
|
|
453
|
+
|
|
454
|
+
|
|
455
|
+
def _register_write_tools(agent: Agent[KubeAgentDeps, str]) -> None:
|
|
456
|
+
"""Register all write operation tools."""
|
|
457
|
+
from kubeagent.tools.builtin.apply import ApplyYamlTool
|
|
458
|
+
from kubeagent.tools.builtin.delete import DeleteResourceTool
|
|
459
|
+
from kubeagent.tools.builtin.nodes_ops import CordonNodeTool, DrainNodeTool, UncordonNodeTool
|
|
460
|
+
from kubeagent.tools.builtin.restart import RestartPodTool
|
|
461
|
+
from kubeagent.tools.builtin.scale import ScaleResourceTool
|
|
462
|
+
|
|
463
|
+
@agent.tool(retries=0)
|
|
464
|
+
async def apply_yaml(ctx: RunContext[KubeAgentDeps], input_data: ApplyYamlInput) -> str:
|
|
465
|
+
"""Create or update resources from YAML. SENSITIVE: modifies cluster state."""
|
|
466
|
+
return _call_tool(
|
|
467
|
+
ApplyYamlTool,
|
|
468
|
+
ctx,
|
|
469
|
+
yaml_content=input_data.yaml_content,
|
|
470
|
+
namespace=input_data.namespace,
|
|
471
|
+
dry_run=input_data.dry_run,
|
|
472
|
+
)
|
|
473
|
+
|
|
474
|
+
@agent.tool(retries=0)
|
|
475
|
+
async def delete_resource(
|
|
476
|
+
ctx: RunContext[KubeAgentDeps], input_data: DeleteResourceInput
|
|
477
|
+
) -> str:
|
|
478
|
+
"""Delete a resource by kind and name. DANGEROUS: irreversibly removes resources."""
|
|
479
|
+
return _call_tool(
|
|
480
|
+
DeleteResourceTool,
|
|
481
|
+
ctx,
|
|
482
|
+
kind=input_data.kind,
|
|
483
|
+
name=input_data.name,
|
|
484
|
+
namespace=input_data.namespace,
|
|
485
|
+
dry_run=input_data.dry_run,
|
|
486
|
+
)
|
|
487
|
+
|
|
488
|
+
@agent.tool(retries=0)
|
|
489
|
+
async def scale_resource(ctx: RunContext[KubeAgentDeps], input_data: ScaleResourceInput) -> str:
|
|
490
|
+
"""Scale a deployment or statefulset. SENSITIVE: changes replica count."""
|
|
491
|
+
return _call_tool(
|
|
492
|
+
ScaleResourceTool,
|
|
493
|
+
ctx,
|
|
494
|
+
kind=input_data.kind,
|
|
495
|
+
name=input_data.name,
|
|
496
|
+
namespace=input_data.namespace,
|
|
497
|
+
replicas=input_data.replicas,
|
|
498
|
+
dry_run=input_data.dry_run,
|
|
499
|
+
)
|
|
500
|
+
|
|
501
|
+
@agent.tool(retries=0)
|
|
502
|
+
async def restart_pod(ctx: RunContext[KubeAgentDeps], input_data: RestartPodInput) -> str:
|
|
503
|
+
"""Restart a pod by deleting it. DANGEROUS: causes pod disruption."""
|
|
504
|
+
return _call_tool(RestartPodTool, ctx, name=input_data.name, namespace=input_data.namespace)
|
|
505
|
+
|
|
506
|
+
@agent.tool(retries=0)
|
|
507
|
+
async def cordon_node(ctx: RunContext[KubeAgentDeps], input_data: CordonNodeInput) -> str:
|
|
508
|
+
"""Mark a node as unschedulable. DANGEROUS: stops new pods from being scheduled."""
|
|
509
|
+
return _call_tool(CordonNodeTool, ctx, name=input_data.name)
|
|
510
|
+
|
|
511
|
+
@agent.tool(retries=1)
|
|
512
|
+
async def uncordon_node(ctx: RunContext[KubeAgentDeps], input_data: UncordonNodeInput) -> str:
|
|
513
|
+
"""Mark a node as schedulable again."""
|
|
514
|
+
return _call_tool(UncordonNodeTool, ctx, name=input_data.name)
|
|
515
|
+
|
|
516
|
+
@agent.tool(retries=0)
|
|
517
|
+
async def drain_node(ctx: RunContext[KubeAgentDeps], input_data: DrainNodeInput) -> str:
|
|
518
|
+
"""Drain a node: cordon + evict all non-daemonset pods. DANGEROUS: disrupts workloads."""
|
|
519
|
+
return _call_tool(DrainNodeTool, ctx, name=input_data.name, force=input_data.force)
|
|
520
|
+
|
|
521
|
+
|
|
522
|
+
# ---------------------------------------------------------------------------
|
|
523
|
+
# kubectl tools
|
|
524
|
+
# ---------------------------------------------------------------------------
|
|
525
|
+
|
|
526
|
+
|
|
527
|
+
def _register_kubectl_tools(agent: Agent[KubeAgentDeps, str]) -> None:
|
|
528
|
+
"""Register kubectl wrapper tools."""
|
|
529
|
+
from kubeagent.tools.builtin.kubectl import (
|
|
530
|
+
KubectlApplyFileTool,
|
|
531
|
+
KubectlExecTool,
|
|
532
|
+
KubectlTopTool,
|
|
533
|
+
)
|
|
534
|
+
|
|
535
|
+
@agent.tool(retries=1)
|
|
536
|
+
async def kubectl_exec(ctx: RunContext[KubeAgentDeps], input_data: KubectlExecInput) -> str:
|
|
537
|
+
"""Execute a command in a pod via kubectl exec."""
|
|
538
|
+
return _call_tool(
|
|
539
|
+
KubectlExecTool,
|
|
540
|
+
ctx,
|
|
541
|
+
pod=input_data.pod,
|
|
542
|
+
namespace=input_data.namespace,
|
|
543
|
+
container=input_data.container,
|
|
544
|
+
command=input_data.command,
|
|
545
|
+
)
|
|
546
|
+
|
|
547
|
+
@agent.tool(retries=1)
|
|
548
|
+
async def kubectl_top(ctx: RunContext[KubeAgentDeps], input_data: KubectlTopInput) -> str:
|
|
549
|
+
"""Show resource (CPU/memory) usage for pods or nodes."""
|
|
550
|
+
return _call_tool(
|
|
551
|
+
KubectlTopTool,
|
|
552
|
+
ctx,
|
|
553
|
+
resource=input_data.resource,
|
|
554
|
+
namespace=input_data.namespace,
|
|
555
|
+
selector=input_data.selector,
|
|
556
|
+
)
|
|
557
|
+
|
|
558
|
+
@agent.tool(retries=0)
|
|
559
|
+
async def kubectl_apply_file(
|
|
560
|
+
ctx: RunContext[KubeAgentDeps], input_data: KubectlApplyFileInput
|
|
561
|
+
) -> str:
|
|
562
|
+
"""Apply a YAML manifest file via kubectl apply -f. SENSITIVE: modifies cluster state."""
|
|
563
|
+
return _call_tool(
|
|
564
|
+
KubectlApplyFileTool,
|
|
565
|
+
ctx,
|
|
566
|
+
file_path=input_data.file_path,
|
|
567
|
+
namespace=input_data.namespace,
|
|
568
|
+
)
|
|
569
|
+
|
|
570
|
+
|
|
571
|
+
# ---------------------------------------------------------------------------
|
|
572
|
+
# Public API
|
|
573
|
+
# ---------------------------------------------------------------------------
|
|
574
|
+
|
|
575
|
+
|
|
576
|
+
async def run_single_turn(
|
|
577
|
+
prompt: str,
|
|
578
|
+
config: KubeAgentConfig | None = None,
|
|
579
|
+
) -> str:
|
|
580
|
+
"""Run a single-turn conversation.
|
|
581
|
+
|
|
582
|
+
Args:
|
|
583
|
+
prompt: User's natural language query.
|
|
584
|
+
config: Optional config override.
|
|
585
|
+
|
|
586
|
+
Returns:
|
|
587
|
+
Agent's text response.
|
|
588
|
+
"""
|
|
589
|
+
if config is None:
|
|
590
|
+
config = load_config()
|
|
591
|
+
|
|
592
|
+
agent = create_agent(config)
|
|
593
|
+
deps = KubeAgentDeps(config=config)
|
|
594
|
+
|
|
595
|
+
result = await agent.run(prompt, deps=deps)
|
|
596
|
+
return result.output
|
|
597
|
+
|
|
598
|
+
|
|
599
|
+
async def run_single_turn_stream(
|
|
600
|
+
prompt: str,
|
|
601
|
+
config: KubeAgentConfig | None = None,
|
|
602
|
+
):
|
|
603
|
+
"""Run a single-turn conversation with streaming.
|
|
604
|
+
|
|
605
|
+
Yields text chunks as they arrive.
|
|
606
|
+
"""
|
|
607
|
+
if config is None:
|
|
608
|
+
config = load_config()
|
|
609
|
+
|
|
610
|
+
agent = create_agent(config)
|
|
611
|
+
deps = KubeAgentDeps(config=config)
|
|
612
|
+
|
|
613
|
+
async with agent.run_stream(prompt, deps=deps) as response:
|
|
614
|
+
async for chunk in response.stream_text(delta=True):
|
|
615
|
+
yield chunk
|
kubeagent/agent/deps.py
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
"""Core agent module for KubeAgent."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass
|
|
6
|
+
|
|
7
|
+
from kubeagent.config.settings import KubeAgentConfig
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
@dataclass
|
|
11
|
+
class KubeAgentDeps:
|
|
12
|
+
"""Dependency injection container for the KubeAgent."""
|
|
13
|
+
|
|
14
|
+
config: KubeAgentConfig
|
|
15
|
+
auto_approve: bool = False
|
|
16
|
+
dry_run: bool = False
|
|
17
|
+
memory: object | None = None # MemoryManager, typed as object to avoid circular import
|