triagent 0.1.0-alpha13 → 0.1.0-alpha18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. package/package.json +3 -4
  2. package/src/cli/config.ts +96 -0
  3. package/src/index.ts +201 -3
  4. package/src/integrations/elasticsearch/client.ts +210 -0
  5. package/src/integrations/grafana/client.ts +186 -0
  6. package/src/integrations/kubernetes/multi-cluster.ts +199 -0
  7. package/src/integrations/kubernetes/types.ts +24 -0
  8. package/src/integrations/loki/client.ts +219 -0
  9. package/src/integrations/prometheus/client.ts +163 -0
  10. package/src/integrations/slack/client.ts +265 -0
  11. package/src/integrations/teams/client.ts +199 -0
  12. package/src/mastra/agents/debugger.ts +152 -108
  13. package/src/mastra/tools/approval-store.ts +180 -0
  14. package/src/mastra/tools/cli.ts +94 -2
  15. package/src/mastra/tools/cost.ts +389 -0
  16. package/src/mastra/tools/logs.ts +210 -0
  17. package/src/mastra/tools/network.ts +253 -0
  18. package/src/mastra/tools/prometheus.ts +221 -0
  19. package/src/mastra/tools/remediation.ts +365 -0
  20. package/src/mastra/tools/runbook.ts +186 -0
  21. package/src/server/routes/history.ts +207 -0
  22. package/src/server/routes/notifications.ts +236 -0
  23. package/src/server/webhook.ts +36 -2
  24. package/src/storage/index.ts +3 -0
  25. package/src/storage/investigation-history.ts +277 -0
  26. package/src/storage/runbook-index.ts +330 -0
  27. package/src/storage/types.ts +72 -0
  28. package/src/tui/app.tsx +492 -76
  29. package/src/tui/components/approval-dialog.tsx +156 -0
  30. package/src/tui/components/approval-modal.tsx +278 -0
  31. package/src/tui/components/index.ts +38 -0
  32. package/src/tui/components/styled-span.tsx +24 -0
  33. package/src/tui/components/timeline.tsx +223 -0
  34. package/src/tui/components/toast.tsx +101 -0
@@ -1,123 +1,160 @@
1
1
  import { Agent } from "@mastra/core/agent";
2
2
  import { z } from "zod";
3
3
  import { cliTool } from "../tools/cli.js";
4
- import { gitTool } from "../tools/git.js";
5
- import { filesystemTool } from "../tools/filesystem.js";
6
- import { loadTriagentMd } from "../../cli/config.js";
4
+ import { loadTriagentMd, loadRunbookMd } from "../../cli/config.js";
7
5
  import type { Config } from "../../config.js";
8
6
 
9
- const DEBUGGER_INSTRUCTIONS = `You are an expert Kubernetes debugging agent named Triagent. Your role is to investigate and diagnose issues in Kubernetes clusters by analyzing resources, logs, code, and git history.
10
-
11
- ## Your Capabilities
12
-
13
- 1. **CLI Access** (cli tool):
14
- - Run any shell command including kubectl, grep, awk, jq, curl, etc.
15
- - Pipe commands together for powerful filtering and processing
16
- - Examples:
17
- - \`kubectl get pods -A | grep inventory\`
18
- - \`kubectl logs deploy/myapp --tail 100 | grep -i error\`
19
- - \`kubectl get pods -o json | jq '.items[].metadata.name'\`
20
- - \`kubectl describe pod mypod | grep -A10 Events\`
21
-
22
- 2. **Code Analysis** (filesystem tool):
23
- - Read source code files
24
- - List directory structures
25
- - Search for patterns in code
26
-
27
- 3. **Git History** (git tool):
28
- - View recent commits
29
- - Compare changes between commits
30
- - Show specific commit details
31
- - Blame files to find who changed what
7
+ const DEBUGGER_INSTRUCTIONS = `You are an expert Kubernetes debugging agent named Triagent. Your role is to investigate and diagnose issues in Kubernetes clusters using CLI tools.
8
+
9
+ ## Your Tool
10
+
11
+ You have access to a single powerful tool: **cli** - Execute any shell command. Use pipes, redirects, and command composition to accomplish complex tasks.
12
+
13
+ ## CLI Capabilities
14
+
15
+ ### Kubernetes (kubectl)
16
+ \`\`\`bash
17
+ # Resource discovery
18
+ kubectl get pods -A | grep -i <service>
19
+ kubectl get deploy,svc,pods -A -o wide
20
+ kubectl get pods -l app=<name> -n <namespace>
21
+
22
+ # Logs and events
23
+ kubectl logs deploy/<name> --tail 100 | grep -i error
24
+ kubectl logs <pod> -c <container> --since=1h
25
+ kubectl get events -A --sort-by='.lastTimestamp' | head -30
26
+
27
+ # Debugging
28
+ kubectl describe pod <name> -n <namespace>
29
+ kubectl get pod <name> -o yaml | grep -A20 status
30
+ kubectl top pods -n <namespace>
31
+ kubectl exec -it <pod> -- sh -c "command"
32
+
33
+ # Network debugging
34
+ kubectl exec <pod> -- nslookup <service>
35
+ kubectl exec <pod> -- nc -zv <host> <port>
36
+ kubectl get networkpolicy -A
37
+ kubectl get endpoints <service> -n <namespace>
38
+ \`\`\`
39
+
40
+ ### Git
41
+ \`\`\`bash
42
+ git log --oneline -20
43
+ git log --since="2 hours ago" --oneline
44
+ git diff HEAD~5
45
+ git show <commit>
46
+ git blame <file>
47
+ git log -p -- <file>
48
+ \`\`\`
49
+
50
+ ### Filesystem
51
+ \`\`\`bash
52
+ ls -la <path>
53
+ cat <file>
54
+ head -100 <file>
55
+ grep -r "pattern" <path>
56
+ find . -name "*.yaml" -exec grep -l "keyword" {} \\;
57
+ \`\`\`
58
+
59
+ ### Prometheus (via promtool or curl)
60
+ \`\`\`bash
61
+ # Query metrics
62
+ curl -s "http://prometheus:9090/api/v1/query?query=up" | jq .
63
+ curl -s "http://prometheus:9090/api/v1/query?query=container_cpu_usage_seconds_total{pod=~'myapp.*'}" | jq '.data.result[]'
64
+
65
+ # Get alerts
66
+ curl -s "http://prometheus:9090/api/v1/alerts" | jq '.data.alerts[] | {alertname: .labels.alertname, state: .state}'
67
+
68
+ # Check targets
69
+ curl -s "http://prometheus:9090/api/v1/targets" | jq '.data.activeTargets[] | {job: .labels.job, health: .health}'
70
+ \`\`\`
71
+
72
+ ### Loki (via logcli)
73
+ \`\`\`bash
74
+ # Query logs
75
+ logcli query '{namespace="production"}' --limit=100
76
+ logcli query '{app="myapp"} |= "error"' --since=1h
77
+ logcli query '{namespace="production"} | json | level="error"' --limit=50
78
+
79
+ # Tail logs
80
+ logcli query '{app="myapp"}' --tail
81
+ \`\`\`
82
+
83
+ ### Resource Analysis
84
+ \`\`\`bash
85
+ # Resource usage with jq
86
+ kubectl get pods -o json | jq '.items[] | {name: .metadata.name, cpu: .spec.containers[].resources.requests.cpu, memory: .spec.containers[].resources.requests.memory}'
87
+
88
+ # Count pods by status
89
+ kubectl get pods -A -o json | jq '.items | group_by(.status.phase) | map({status: .[0].status.phase, count: length})'
90
+ \`\`\`
32
91
 
33
92
  ## Resource Discovery Strategy
34
93
 
35
- When asked to find resources for a service (e.g., "inventory service"), DO NOT simply try one label like \`app=inventory\` and give up if not found. Instead, use a systematic discovery approach:
36
-
37
- 1. **Search by partial name match using grep**:
38
- - \`kubectl get pods -A | grep -i inventory\`
39
- - \`kubectl get deploy,svc -A | grep -i inventory\`
40
- - This finds resources with "inventory" anywhere in the name (e.g., \`inventory-api\`, \`svc-inventory\`)
41
-
42
- 2. **If grep returns no results, list all resources to browse**:
43
- - \`kubectl get pods,deploy,svc -A\` to see everything
44
- - \`kubectl get pods -n <namespace>\` if namespace is known
94
+ When asked to find resources for a service (e.g., "inventory service"), use systematic discovery:
45
95
 
46
- 3. **Try common label patterns**:
47
- - \`kubectl get pods -A -l app=inventory\`
48
- - \`kubectl get pods -A -l app.kubernetes.io/name=inventory\`
49
- - \`kubectl get pods -A -l component=inventory\`
50
-
51
- 4. **Follow the resource chain**:
52
- - Found a Service? \`kubectl describe svc <name> | grep Selector\` then find pods with that selector
53
- - Found a Deployment? \`kubectl get pods -l app=<deployment-name>\`
54
- - Use \`kubectl get endpoints <svc-name>\` to see which pods back a service
55
-
56
- 5. **Check events for context**:
57
- - \`kubectl get events -A --sort-by='.lastTimestamp' | grep -i inventory\`
58
- - \`kubectl get events -A --sort-by='.lastTimestamp' | head -20\` for recent cluster activity
59
-
60
- 6. **When you find a potential match**:
61
- - \`kubectl describe <resource> <name>\` to confirm it's the right one
62
- - Check related resources (pods for a deployment, endpoints for a service)
63
-
64
- Always report what you searched for and what you found, even if it's not an exact match. The user can confirm if you found the right resource.
96
+ 1. **Search by name**: \`kubectl get pods,deploy,svc -A | grep -i inventory\`
97
+ 2. **Try label patterns**: \`kubectl get pods -A -l app=inventory\` or \`app.kubernetes.io/name=inventory\`
98
+ 3. **Follow the chain**: Service Endpoints → Pods → Containers
99
+ 4. **Check events**: \`kubectl get events -A --sort-by='.lastTimestamp' | grep -i inventory\`
65
100
 
66
101
  ## Investigation Process
67
102
 
68
- When given an incident, follow this systematic approach:
69
-
70
- 1. **Understand the Issue**: Parse the incident description to identify:
71
- - What service/component is affected
72
- - What symptoms are being observed
73
- - When the issue started (if known)
74
-
75
- 2. **Discover Relevant Resources**:
76
- - Use the Resource Discovery Strategy above to find the affected resources
77
- - Don't assume exact names or labels - search broadly first
78
- - Follow the resource chain (Service → Deployment → Pods → Containers)
79
-
80
- 3. **Check Cluster State**:
81
- - Get pod status for discovered resources
82
- - Check for recent events related to those resources
83
- - Look at resource usage
84
-
85
- 4. **Analyze Logs**:
86
- - Fetch logs from affected pods (use \`--tail 100\` to get recent logs)
87
- - Look for errors, exceptions, or unusual patterns
88
- - If multiple containers, check each one
89
-
90
- 5. **Investigate Recent Changes**:
91
- - Check git log for recent commits
92
- - Review diffs of suspicious changes
93
- - Correlate timing with when issues started
94
-
95
- 6. **Examine Code**:
96
- - Read relevant configuration files
97
- - Check application code if needed
98
- - Look for misconfigurations
99
-
100
- 7. **Synthesize Findings**:
101
- - Identify the root cause
102
- - List affected resources
103
- - Provide actionable recommendations
103
+ 1. **Understand**: Parse incident for affected service, symptoms, timing
104
+ 2. **Discover**: Find affected resources using grep and label selectors
105
+ 3. **Check State**: Pod status, events, resource usage
106
+ 4. **Analyze Logs**: kubectl logs with grep for errors
107
+ 5. **Check Changes**: git log, git diff for recent commits
108
+ 6. **Examine Config**: Read manifests and application config
109
+ 7. **Synthesize**: Root cause, evidence, recommendations
104
110
 
105
111
  ## Output Format
106
112
 
107
- Always provide your findings in a clear, structured format:
108
- - **Summary**: Brief overview of the issue
109
- - **Root Cause**: The identified cause of the problem
110
- - **Evidence**: Specific data that supports your conclusion
111
- - **Affected Resources**: List of impacted K8s resources
112
- - **Recent Changes**: Relevant commits that might be related
113
- - **Recommendations**: Specific steps to remediate the issue
113
+ Provide findings in a structured format:
114
+ - **Summary**: Brief overview
115
+ - **Root Cause**: Identified cause
116
+ - **Evidence**: Supporting data
117
+ - **Affected Resources**: Impacted K8s resources
118
+ - **Recent Changes**: Relevant commits
119
+ - **Recommendations**: Remediation steps
120
+
121
+ ## Write Operations - AUTOMATIC UI APPROVAL
122
+
123
+ **IMPORTANT: Do NOT ask the user for permission in text. Just execute write commands directly.**
124
+
125
+ The CLI tool automatically detects write operations and triggers a UI-based approval prompt. Your job is to:
126
+ 1. **Execute write commands immediately** without asking "Would you like to proceed?" or similar
127
+ 2. The UI will show an approval dialog to the user
128
+ 3. If approved, you'll receive an approval token
129
+ 4. Retry the command with the provided \`approvalToken\`
130
+
131
+ **WRONG approach:**
132
+ \`\`\`
133
+ "Would you like me to scale the deployment? This requires your approval."
134
+ [Waiting for user to type "yes"]
135
+ \`\`\`
136
+
137
+ **CORRECT approach:**
138
+ \`\`\`
139
+ [Just execute the command]
140
+ kubectl scale deployment/myapp --replicas=2 -n prod
141
+ [UI shows approval prompt, user approves]
142
+ [Receive token, retry with token]
143
+ \`\`\`
144
+
145
+ **Write operations (automatically detected):**
146
+ - Kubernetes: \`kubectl delete|apply|create|patch|scale|rollout|drain|cordon\`
147
+ - Git: \`git commit|push|merge|rebase|reset\`
148
+ - File system: \`rm|mv|cp|mkdir|chmod\`
149
+
150
+ When you receive an approval token in the user's message, extract it and retry the command with \`approvalToken: "<token>"\`.
114
151
 
115
152
  ## Important Guidelines
116
153
 
117
- - Be thorough but efficient - don't run unnecessary commands
118
- - Focus on actionable insights
119
- - If unsure, state your confidence level
120
- - Prioritize quick wins that can restore service
154
+ - Use command composition with pipes for efficiency
155
+ - Be thorough but don't run unnecessary commands
156
+ - State confidence level when unsure
157
+ - Prioritize quick wins to restore service
121
158
  - Consider both application and infrastructure issues`;
122
159
 
123
160
  export const InvestigationResultSchema = z.object({
@@ -167,10 +204,19 @@ export async function createDebuggerAgent(config: Config) {
167
204
  // Load user instructions from ~/.config/triagent/TRIAGENT.md if present
168
205
  const userInstructions = await loadTriagentMd();
169
206
 
170
- // Combine user instructions with default instructions
171
- const instructions = userInstructions
172
- ? `## User-Provided Instructions\n\n${userInstructions}\n\n---\n\n${DEBUGGER_INSTRUCTIONS}`
173
- : DEBUGGER_INSTRUCTIONS;
207
+ // Load runbook from ~/.config/triagent/RUNBOOK.md if present
208
+ const runbook = await loadRunbookMd();
209
+
210
+ // Build instructions with optional user content and runbook
211
+ let instructions = DEBUGGER_INSTRUCTIONS;
212
+
213
+ if (userInstructions) {
214
+ instructions = `## User-Provided Instructions\n\n${userInstructions}\n\n---\n\n${instructions}`;
215
+ }
216
+
217
+ if (runbook) {
218
+ instructions = `${instructions}\n\n---\n\n## Runbook\n\nRefer to this runbook for standard operating procedures:\n\n${runbook}`;
219
+ }
174
220
 
175
221
  // Construct model config with API key and optional base URL
176
222
  const modelId = `${config.aiProvider}/${config.aiModel}` as const;
@@ -187,8 +233,6 @@ export async function createDebuggerAgent(config: Config) {
187
233
  model: modelConfig as any, // Mastra handles model routing
188
234
  tools: {
189
235
  cli: cliTool,
190
- git: gitTool,
191
- filesystem: filesystemTool,
192
236
  },
193
237
  });
194
238
  }
@@ -0,0 +1,180 @@
1
+ import { randomBytes } from "crypto";
2
+
3
+ export interface PendingApproval {
4
+ id: string;
5
+ command: string;
6
+ token: string;
7
+ riskLevel: "low" | "medium" | "high" | "critical";
8
+ createdAt: Date;
9
+ expiresAt: Date;
10
+ }
11
+
12
+ export interface ApprovalStore {
13
+ /** Request approval for a command, returns pending approval info */
14
+ requestApproval(command: string): PendingApproval;
15
+
16
+ /** Approve a pending request, returns the token */
17
+ approve(id: string): string | null;
18
+
19
+ /** Reject a pending request */
20
+ reject(id: string): void;
21
+
22
+ /** Validate an approval token for a command */
23
+ validateToken(command: string, token: string): boolean;
24
+
25
+ /** Get pending approval by ID */
26
+ getPending(id: string): PendingApproval | undefined;
27
+
28
+ /** Get all pending approvals */
29
+ getAllPending(): PendingApproval[];
30
+
31
+ /** Clear expired approvals */
32
+ clearExpired(): void;
33
+ }
34
+
35
+ // Risk patterns - more dangerous commands = higher risk
36
+ const CRITICAL_PATTERNS = [
37
+ /\bkubectl\s+delete\s+(namespace|ns|node|pv|pvc|clusterrole)/i,
38
+ /\brm\s+-rf?\s+\/(?!tmp)/i, // rm -rf not in /tmp
39
+ /\bgit\s+push\s+.*--force/i,
40
+ /\bhelm\s+(uninstall|delete)\b/i,
41
+ ];
42
+
43
+ const HIGH_PATTERNS = [
44
+ /\bkubectl\s+delete\b/i,
45
+ /\bkubectl\s+apply\s+-f\s+http/i, // apply from URL
46
+ /\bkubectl\s+drain\b/i,
47
+ /\bkubectl\s+cordon\b/i,
48
+ /\bgit\s+reset\s+--hard/i,
49
+ /\bgit\s+push\b/i,
50
+ /\bhelm\s+(install|upgrade)\b/i,
51
+ ];
52
+
53
+ const MEDIUM_PATTERNS = [
54
+ /\bkubectl\s+scale\b/i,
55
+ /\bkubectl\s+rollout\s+(restart|undo)/i,
56
+ /\bkubectl\s+(apply|create|patch)\b/i,
57
+ /\bgit\s+(commit|merge|rebase)/i,
58
+ ];
59
+
60
+ function classifyRisk(command: string): PendingApproval["riskLevel"] {
61
+ if (CRITICAL_PATTERNS.some(p => p.test(command))) return "critical";
62
+ if (HIGH_PATTERNS.some(p => p.test(command))) return "high";
63
+ if (MEDIUM_PATTERNS.some(p => p.test(command))) return "medium";
64
+ return "low";
65
+ }
66
+
67
+ function generateToken(): string {
68
+ return randomBytes(16).toString("hex");
69
+ }
70
+
71
+ function generateId(): string {
72
+ return randomBytes(8).toString("hex");
73
+ }
74
+
75
+ const EXPIRATION_MS = 10 * 60 * 1000; // 10 minutes
76
+
77
+ // Singleton store instance
78
+ class ApprovalStoreImpl implements ApprovalStore {
79
+ private pending: Map<string, PendingApproval> = new Map();
80
+ private approvedTokens: Map<string, { command: string; expiresAt: Date }> = new Map();
81
+
82
+ requestApproval(command: string): PendingApproval {
83
+ // Clean up expired entries first
84
+ this.clearExpired();
85
+
86
+ const id = generateId();
87
+ const token = generateToken();
88
+ const now = new Date();
89
+ const expiresAt = new Date(now.getTime() + EXPIRATION_MS);
90
+
91
+ const approval: PendingApproval = {
92
+ id,
93
+ command,
94
+ token,
95
+ riskLevel: classifyRisk(command),
96
+ createdAt: now,
97
+ expiresAt,
98
+ };
99
+
100
+ this.pending.set(id, approval);
101
+ return approval;
102
+ }
103
+
104
+ approve(id: string): string | null {
105
+ const pending = this.pending.get(id);
106
+ if (!pending) return null;
107
+
108
+ // Check if expired
109
+ if (new Date() > pending.expiresAt) {
110
+ this.pending.delete(id);
111
+ return null;
112
+ }
113
+
114
+ // Move to approved tokens
115
+ this.approvedTokens.set(pending.token, {
116
+ command: pending.command,
117
+ expiresAt: pending.expiresAt,
118
+ });
119
+
120
+ // Remove from pending
121
+ this.pending.delete(id);
122
+
123
+ return pending.token;
124
+ }
125
+
126
+ reject(id: string): void {
127
+ this.pending.delete(id);
128
+ }
129
+
130
+ validateToken(command: string, token: string): boolean {
131
+ const approved = this.approvedTokens.get(token);
132
+ if (!approved) return false;
133
+
134
+ // Check expiration
135
+ if (new Date() > approved.expiresAt) {
136
+ this.approvedTokens.delete(token);
137
+ return false;
138
+ }
139
+
140
+ // Token must match the exact command
141
+ if (approved.command !== command) return false;
142
+
143
+ // Token is valid - consume it (one-time use)
144
+ this.approvedTokens.delete(token);
145
+ return true;
146
+ }
147
+
148
+ getPending(id: string): PendingApproval | undefined {
149
+ const pending = this.pending.get(id);
150
+ if (pending && new Date() > pending.expiresAt) {
151
+ this.pending.delete(id);
152
+ return undefined;
153
+ }
154
+ return pending;
155
+ }
156
+
157
+ getAllPending(): PendingApproval[] {
158
+ this.clearExpired();
159
+ return Array.from(this.pending.values());
160
+ }
161
+
162
+ clearExpired(): void {
163
+ const now = new Date();
164
+
165
+ for (const [id, pending] of this.pending) {
166
+ if (now > pending.expiresAt) {
167
+ this.pending.delete(id);
168
+ }
169
+ }
170
+
171
+ for (const [token, approved] of this.approvedTokens) {
172
+ if (now > approved.expiresAt) {
173
+ this.approvedTokens.delete(token);
174
+ }
175
+ }
176
+ }
177
+ }
178
+
179
+ // Export singleton instance
180
+ export const approvalStore: ApprovalStore = new ApprovalStoreImpl();
@@ -1,11 +1,52 @@
1
1
  import { createTool } from "@mastra/core/tools";
2
2
  import { z } from "zod";
3
3
  import { execCommand } from "../../sandbox/bashlet.js";
4
+ import { approvalStore, type PendingApproval } from "./approval-store.js";
4
5
 
5
6
  interface CliOutput {
6
7
  success: boolean;
7
8
  output: string;
8
9
  error?: string;
10
+ requiresApproval?: boolean;
11
+ command?: string;
12
+ // Token-based approval fields
13
+ approvalId?: string;
14
+ riskLevel?: PendingApproval["riskLevel"];
15
+ }
16
+
17
+ // Write command patterns that require user approval
18
+ const WRITE_COMMAND_PATTERNS = [
19
+ // Kubernetes write operations
20
+ /\bkubectl\s+(delete|apply|create|patch|edit|replace|set|label|annotate|taint|cordon|uncordon|drain)\b/i,
21
+ /\bkubectl\s+rollout\s+(restart|undo|pause|resume)\b/i,
22
+ /\bkubectl\s+scale\b/i,
23
+ /\bkubectl\s+exec\b.*\s+--\s+.*(rm|mv|cp|chmod|chown|kill|pkill|shutdown|reboot|dd|mkfs|fdisk)\b/i,
24
+
25
+ // Git write operations
26
+ /\bgit\s+(commit|push|merge|rebase|reset|checkout|stash|tag|branch\s+-[dD]|cherry-pick|revert|am|pull)\b/i,
27
+
28
+ // File system write operations
29
+ /\b(rm|rmdir|mv|cp|mkdir|touch|chmod|chown|ln)\s+/i,
30
+ /\b(cat|echo|printf)\s+.*[>|]/, // Redirects
31
+ /\btee\s+/i,
32
+ /\bsed\s+-i/i, // In-place sed
33
+
34
+ // Package managers
35
+ /\b(apt|apt-get|yum|dnf|brew|npm|yarn|pip|cargo)\s+(install|remove|uninstall|update|upgrade)\b/i,
36
+
37
+ // Service management
38
+ /\b(systemctl|service)\s+(start|stop|restart|enable|disable)\b/i,
39
+
40
+ // Docker/container write operations
41
+ /\bdocker\s+(rm|rmi|stop|kill|prune|system\s+prune)\b/i,
42
+ /\bdocker-compose\s+(down|rm|stop)\b/i,
43
+
44
+ // Helm write operations
45
+ /\bhelm\s+(install|upgrade|uninstall|delete|rollback)\b/i,
46
+ ];
47
+
48
+ function isWriteCommand(command: string): boolean {
49
+ return WRITE_COMMAND_PATTERNS.some(pattern => pattern.test(command));
9
50
  }
10
51
 
11
52
  function filterSensitiveData(output: string): string {
@@ -25,6 +66,9 @@ export const cliTool = createTool({
25
66
  Use this to run any CLI commands including kubectl, grep, awk, jq, curl, etc.
26
67
  Supports pipes and command chaining.
27
68
 
69
+ IMPORTANT: Write/modify commands require user approval before execution.
70
+ The tool will detect write operations and pause for confirmation.
71
+
28
72
  Examples:
29
73
  - List all pods: kubectl get pods -A
30
74
  - Find pods by name: kubectl get pods -A | grep inventory
@@ -35,10 +79,58 @@ Examples:
35
79
 
36
80
  inputSchema: z.object({
37
81
  command: z.string().describe("The shell command to execute"),
82
+ approvalToken: z.string().optional().describe("Approval token from user confirmation. Required for write operations."),
83
+ }),
84
+
85
+ outputSchema: z.object({
86
+ success: z.boolean(),
87
+ output: z.string(),
88
+ error: z.string().optional(),
89
+ requiresApproval: z.boolean().optional(),
90
+ command: z.string().optional(),
91
+ approvalId: z.string().optional(),
92
+ riskLevel: z.enum(["low", "medium", "high", "critical"]).optional(),
38
93
  }),
39
94
 
40
- execute: async ({ command }): Promise<CliOutput> => {
95
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
96
+ execute: (async (inputData: any): Promise<CliOutput> => {
97
+ const { command, approvalToken } = inputData;
98
+
41
99
  try {
100
+ // Check if this is a write command
101
+ if (isWriteCommand(command)) {
102
+ // If token provided, validate it
103
+ if (approvalToken) {
104
+ const isValid = approvalStore.validateToken(command, approvalToken);
105
+ if (!isValid) {
106
+ // Invalid or expired token - request new approval
107
+ const pending = approvalStore.requestApproval(command);
108
+ return {
109
+ success: false,
110
+ output: "",
111
+ requiresApproval: true,
112
+ command: command,
113
+ approvalId: pending.id,
114
+ riskLevel: pending.riskLevel,
115
+ error: `⚠️ APPROVAL TOKEN INVALID OR EXPIRED\n\nCommand: ${command}\nRisk Level: ${pending.riskLevel.toUpperCase()}\nApproval ID: ${pending.id}\n\nPlease wait for user to approve this operation. A new approval token will be provided.`,
116
+ };
117
+ }
118
+ // Token valid - proceed with execution
119
+ } else {
120
+ // No token - request approval
121
+ const pending = approvalStore.requestApproval(command);
122
+ return {
123
+ success: false,
124
+ output: "",
125
+ requiresApproval: true,
126
+ command: command,
127
+ approvalId: pending.id,
128
+ riskLevel: pending.riskLevel,
129
+ error: `⚠️ WRITE OPERATION DETECTED - APPROVAL REQUIRED\n\nCommand: ${command}\nRisk Level: ${pending.riskLevel.toUpperCase()}\nApproval ID: ${pending.id}\n\nThis command will modify state. Waiting for user approval...`,
130
+ };
131
+ }
132
+ }
133
+
42
134
  const result = await execCommand(command);
43
135
 
44
136
  if (result.exitCode !== 0) {
@@ -61,5 +153,5 @@ Examples:
61
153
  error: error instanceof Error ? error.message : String(error),
62
154
  };
63
155
  }
64
- },
156
+ }) as any,
65
157
  });