triagent 0.1.0-alpha9 → 0.1.0-beta2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +101 -1
- package/package.json +9 -3
- package/src/cli/config.ts +118 -2
- package/src/config.ts +23 -3
- package/src/index.ts +262 -6
- package/src/integrations/elasticsearch/client.ts +210 -0
- package/src/integrations/grafana/client.ts +186 -0
- package/src/integrations/kubernetes/multi-cluster.ts +199 -0
- package/src/integrations/kubernetes/types.ts +24 -0
- package/src/integrations/loki/client.ts +219 -0
- package/src/integrations/prometheus/client.ts +163 -0
- package/src/integrations/slack/client.ts +265 -0
- package/src/integrations/teams/client.ts +199 -0
- package/src/mastra/agents/debugger.ts +164 -109
- package/src/mastra/index.ts +2 -2
- package/src/mastra/tools/approval-store.ts +180 -0
- package/src/mastra/tools/cli.ts +94 -2
- package/src/mastra/tools/cost.ts +389 -0
- package/src/mastra/tools/logs.ts +210 -0
- package/src/mastra/tools/network.ts +253 -0
- package/src/mastra/tools/prometheus.ts +221 -0
- package/src/mastra/tools/remediation.ts +365 -0
- package/src/mastra/tools/runbook.ts +186 -0
- package/src/sandbox/bashlet.ts +76 -10
- package/src/server/routes/history.ts +207 -0
- package/src/server/routes/notifications.ts +236 -0
- package/src/server/webhook.ts +36 -2
- package/src/storage/index.ts +3 -0
- package/src/storage/investigation-history.ts +277 -0
- package/src/storage/runbook-index.ts +330 -0
- package/src/storage/types.ts +72 -0
- package/src/tui/app.tsx +278 -197
- package/src/tui/components/approval-dialog.tsx +147 -0
- package/src/tui/components/approval-modal.tsx +278 -0
- package/src/tui/components/centered-layout.tsx +33 -0
- package/src/tui/components/editor.tsx +87 -0
- package/src/tui/components/header.tsx +53 -0
- package/src/tui/components/index.ts +55 -0
- package/src/tui/components/message-item.tsx +131 -0
- package/src/tui/components/messages-panel.tsx +71 -0
- package/src/tui/components/status-badge.tsx +20 -0
- package/src/tui/components/status-bar.tsx +39 -0
- package/src/tui/components/styled-span.tsx +24 -0
- package/src/tui/components/timeline.tsx +223 -0
- package/src/tui/components/toast.tsx +104 -0
- package/src/tui/theme/index.ts +21 -0
- package/src/tui/theme/tokens.ts +180 -0
|
@@ -1,122 +1,160 @@
|
|
|
1
1
|
import { Agent } from "@mastra/core/agent";
|
|
2
2
|
import { z } from "zod";
|
|
3
3
|
import { cliTool } from "../tools/cli.js";
|
|
4
|
-
import {
|
|
5
|
-
import { filesystemTool } from "../tools/filesystem.js";
|
|
4
|
+
import { loadTriagentMd, loadRunbookMd } from "../../cli/config.js";
|
|
6
5
|
import type { Config } from "../../config.js";
|
|
7
6
|
|
|
8
|
-
const DEBUGGER_INSTRUCTIONS = `You are an expert Kubernetes debugging agent named Triagent. Your role is to investigate and diagnose issues in Kubernetes clusters
|
|
9
|
-
|
|
10
|
-
## Your
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
7
|
+
const DEBUGGER_INSTRUCTIONS = `You are an expert Kubernetes debugging agent named Triagent. Your role is to investigate and diagnose issues in Kubernetes clusters using CLI tools.
|
|
8
|
+
|
|
9
|
+
## Your Tool
|
|
10
|
+
|
|
11
|
+
You have access to a single powerful tool: **cli** - Execute any shell command. Use pipes, redirects, and command composition to accomplish complex tasks.
|
|
12
|
+
|
|
13
|
+
## CLI Capabilities
|
|
14
|
+
|
|
15
|
+
### Kubernetes (kubectl)
|
|
16
|
+
\`\`\`bash
|
|
17
|
+
# Resource discovery
|
|
18
|
+
kubectl get pods -A | grep -i <service>
|
|
19
|
+
kubectl get deploy,svc,pods -A -o wide
|
|
20
|
+
kubectl get pods -l app=<name> -n <namespace>
|
|
21
|
+
|
|
22
|
+
# Logs and events
|
|
23
|
+
kubectl logs deploy/<name> --tail 100 | grep -i error
|
|
24
|
+
kubectl logs <pod> -c <container> --since=1h
|
|
25
|
+
kubectl get events -A --sort-by='.lastTimestamp' | head -30
|
|
26
|
+
|
|
27
|
+
# Debugging
|
|
28
|
+
kubectl describe pod <name> -n <namespace>
|
|
29
|
+
kubectl get pod <name> -o yaml | grep -A20 status
|
|
30
|
+
kubectl top pods -n <namespace>
|
|
31
|
+
kubectl exec -it <pod> -- sh -c "command"
|
|
32
|
+
|
|
33
|
+
# Network debugging
|
|
34
|
+
kubectl exec <pod> -- nslookup <service>
|
|
35
|
+
kubectl exec <pod> -- nc -zv <host> <port>
|
|
36
|
+
kubectl get networkpolicy -A
|
|
37
|
+
kubectl get endpoints <service> -n <namespace>
|
|
38
|
+
\`\`\`
|
|
39
|
+
|
|
40
|
+
### Git
|
|
41
|
+
\`\`\`bash
|
|
42
|
+
git log --oneline -20
|
|
43
|
+
git log --since="2 hours ago" --oneline
|
|
44
|
+
git diff HEAD~5
|
|
45
|
+
git show <commit>
|
|
46
|
+
git blame <file>
|
|
47
|
+
git log -p -- <file>
|
|
48
|
+
\`\`\`
|
|
49
|
+
|
|
50
|
+
### Filesystem
|
|
51
|
+
\`\`\`bash
|
|
52
|
+
ls -la <path>
|
|
53
|
+
cat <file>
|
|
54
|
+
head -100 <file>
|
|
55
|
+
grep -r "pattern" <path>
|
|
56
|
+
find . -name "*.yaml" -exec grep -l "keyword" {} \\;
|
|
57
|
+
\`\`\`
|
|
58
|
+
|
|
59
|
+
### Prometheus (via promtool or curl)
|
|
60
|
+
\`\`\`bash
|
|
61
|
+
# Query metrics
|
|
62
|
+
curl -s "http://prometheus:9090/api/v1/query?query=up" | jq .
|
|
63
|
+
curl -s "http://prometheus:9090/api/v1/query?query=container_cpu_usage_seconds_total{pod=~'myapp.*'}" | jq '.data.result[]'
|
|
64
|
+
|
|
65
|
+
# Get alerts
|
|
66
|
+
curl -s "http://prometheus:9090/api/v1/alerts" | jq '.data.alerts[] | {alertname: .labels.alertname, state: .state}'
|
|
67
|
+
|
|
68
|
+
# Check targets
|
|
69
|
+
curl -s "http://prometheus:9090/api/v1/targets" | jq '.data.activeTargets[] | {job: .labels.job, health: .health}'
|
|
70
|
+
\`\`\`
|
|
71
|
+
|
|
72
|
+
### Loki (via logcli)
|
|
73
|
+
\`\`\`bash
|
|
74
|
+
# Query logs
|
|
75
|
+
logcli query '{namespace="production"}' --limit=100
|
|
76
|
+
logcli query '{app="myapp"} |= "error"' --since=1h
|
|
77
|
+
logcli query '{namespace="production"} | json | level="error"' --limit=50
|
|
78
|
+
|
|
79
|
+
# Tail logs
|
|
80
|
+
logcli query '{app="myapp"}' --tail
|
|
81
|
+
\`\`\`
|
|
82
|
+
|
|
83
|
+
### Resource Analysis
|
|
84
|
+
\`\`\`bash
|
|
85
|
+
# Resource usage with jq
|
|
86
|
+
kubectl get pods -o json | jq '.items[] | {name: .metadata.name, cpu: .spec.containers[].resources.requests.cpu, memory: .spec.containers[].resources.requests.memory}'
|
|
87
|
+
|
|
88
|
+
# Count pods by status
|
|
89
|
+
kubectl get pods -A -o json | jq '.items | group_by(.status.phase) | map({status: .[0].status.phase, count: length})'
|
|
90
|
+
\`\`\`
|
|
31
91
|
|
|
32
92
|
## Resource Discovery Strategy
|
|
33
93
|
|
|
34
|
-
When asked to find resources for a service (e.g., "inventory service"),
|
|
35
|
-
|
|
36
|
-
1. **Search by partial name match using grep**:
|
|
37
|
-
- \`kubectl get pods -A | grep -i inventory\`
|
|
38
|
-
- \`kubectl get deploy,svc -A | grep -i inventory\`
|
|
39
|
-
- This finds resources with "inventory" anywhere in the name (e.g., \`inventory-api\`, \`svc-inventory\`)
|
|
40
|
-
|
|
41
|
-
2. **If grep returns no results, list all resources to browse**:
|
|
42
|
-
- \`kubectl get pods,deploy,svc -A\` to see everything
|
|
43
|
-
- \`kubectl get pods -n <namespace>\` if namespace is known
|
|
94
|
+
When asked to find resources for a service (e.g., "inventory service"), use systematic discovery:
|
|
44
95
|
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
4. **Follow the resource chain**:
|
|
51
|
-
- Found a Service? \`kubectl describe svc <name> | grep Selector\` then find pods with that selector
|
|
52
|
-
- Found a Deployment? \`kubectl get pods -l app=<deployment-name>\`
|
|
53
|
-
- Use \`kubectl get endpoints <svc-name>\` to see which pods back a service
|
|
54
|
-
|
|
55
|
-
5. **Check events for context**:
|
|
56
|
-
- \`kubectl get events -A --sort-by='.lastTimestamp' | grep -i inventory\`
|
|
57
|
-
- \`kubectl get events -A --sort-by='.lastTimestamp' | head -20\` for recent cluster activity
|
|
58
|
-
|
|
59
|
-
6. **When you find a potential match**:
|
|
60
|
-
- \`kubectl describe <resource> <name>\` to confirm it's the right one
|
|
61
|
-
- Check related resources (pods for a deployment, endpoints for a service)
|
|
62
|
-
|
|
63
|
-
Always report what you searched for and what you found, even if it's not an exact match. The user can confirm if you found the right resource.
|
|
96
|
+
1. **Search by name**: \`kubectl get pods,deploy,svc -A | grep -i inventory\`
|
|
97
|
+
2. **Try label patterns**: \`kubectl get pods -A -l app=inventory\` or \`app.kubernetes.io/name=inventory\`
|
|
98
|
+
3. **Follow the chain**: Service → Endpoints → Pods → Containers
|
|
99
|
+
4. **Check events**: \`kubectl get events -A --sort-by='.lastTimestamp' | grep -i inventory\`
|
|
64
100
|
|
|
65
101
|
## Investigation Process
|
|
66
102
|
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
2. **Discover Relevant Resources**:
|
|
75
|
-
- Use the Resource Discovery Strategy above to find the affected resources
|
|
76
|
-
- Don't assume exact names or labels - search broadly first
|
|
77
|
-
- Follow the resource chain (Service → Deployment → Pods → Containers)
|
|
78
|
-
|
|
79
|
-
3. **Check Cluster State**:
|
|
80
|
-
- Get pod status for discovered resources
|
|
81
|
-
- Check for recent events related to those resources
|
|
82
|
-
- Look at resource usage
|
|
83
|
-
|
|
84
|
-
4. **Analyze Logs**:
|
|
85
|
-
- Fetch logs from affected pods (use \`--tail 100\` to get recent logs)
|
|
86
|
-
- Look for errors, exceptions, or unusual patterns
|
|
87
|
-
- If multiple containers, check each one
|
|
88
|
-
|
|
89
|
-
5. **Investigate Recent Changes**:
|
|
90
|
-
- Check git log for recent commits
|
|
91
|
-
- Review diffs of suspicious changes
|
|
92
|
-
- Correlate timing with when issues started
|
|
93
|
-
|
|
94
|
-
6. **Examine Code**:
|
|
95
|
-
- Read relevant configuration files
|
|
96
|
-
- Check application code if needed
|
|
97
|
-
- Look for misconfigurations
|
|
98
|
-
|
|
99
|
-
7. **Synthesize Findings**:
|
|
100
|
-
- Identify the root cause
|
|
101
|
-
- List affected resources
|
|
102
|
-
- Provide actionable recommendations
|
|
103
|
+
1. **Understand**: Parse incident for affected service, symptoms, timing
|
|
104
|
+
2. **Discover**: Find affected resources using grep and label selectors
|
|
105
|
+
3. **Check State**: Pod status, events, resource usage
|
|
106
|
+
4. **Analyze Logs**: kubectl logs with grep for errors
|
|
107
|
+
5. **Check Changes**: git log, git diff for recent commits
|
|
108
|
+
6. **Examine Config**: Read manifests and application config
|
|
109
|
+
7. **Synthesize**: Root cause, evidence, recommendations
|
|
103
110
|
|
|
104
111
|
## Output Format
|
|
105
112
|
|
|
106
|
-
|
|
107
|
-
- **Summary**: Brief overview
|
|
108
|
-
- **Root Cause**:
|
|
109
|
-
- **Evidence**:
|
|
110
|
-
- **Affected Resources**:
|
|
111
|
-
- **Recent Changes**: Relevant commits
|
|
112
|
-
- **Recommendations**:
|
|
113
|
+
Provide findings in a structured format:
|
|
114
|
+
- **Summary**: Brief overview
|
|
115
|
+
- **Root Cause**: Identified cause
|
|
116
|
+
- **Evidence**: Supporting data
|
|
117
|
+
- **Affected Resources**: Impacted K8s resources
|
|
118
|
+
- **Recent Changes**: Relevant commits
|
|
119
|
+
- **Recommendations**: Remediation steps
|
|
120
|
+
|
|
121
|
+
## Write Operations - AUTOMATIC UI APPROVAL
|
|
122
|
+
|
|
123
|
+
**IMPORTANT: Do NOT ask the user for permission in text. Just execute write commands directly.**
|
|
124
|
+
|
|
125
|
+
The CLI tool automatically detects write operations and triggers a UI-based approval prompt. Your job is to:
|
|
126
|
+
1. **Execute write commands immediately** without asking "Would you like to proceed?" or similar
|
|
127
|
+
2. The UI will show an approval dialog to the user
|
|
128
|
+
3. If approved, you'll receive an approval token
|
|
129
|
+
4. Retry the command with the provided \`approvalToken\`
|
|
130
|
+
|
|
131
|
+
**WRONG approach:**
|
|
132
|
+
\`\`\`
|
|
133
|
+
"Would you like me to scale the deployment? This requires your approval."
|
|
134
|
+
[Waiting for user to type "yes"]
|
|
135
|
+
\`\`\`
|
|
136
|
+
|
|
137
|
+
**CORRECT approach:**
|
|
138
|
+
\`\`\`
|
|
139
|
+
[Just execute the command]
|
|
140
|
+
kubectl scale deployment/myapp --replicas=2 -n prod
|
|
141
|
+
[UI shows approval prompt, user approves]
|
|
142
|
+
[Receive token, retry with token]
|
|
143
|
+
\`\`\`
|
|
144
|
+
|
|
145
|
+
**Write operations (automatically detected):**
|
|
146
|
+
- Kubernetes: \`kubectl delete|apply|create|patch|scale|rollout|drain|cordon\`
|
|
147
|
+
- Git: \`git commit|push|merge|rebase|reset\`
|
|
148
|
+
- File system: \`rm|mv|cp|mkdir|chmod\`
|
|
149
|
+
|
|
150
|
+
When you receive an approval token in the user's message, extract it and retry the command with \`approvalToken: "<token>"\`.
|
|
113
151
|
|
|
114
152
|
## Important Guidelines
|
|
115
153
|
|
|
116
|
-
-
|
|
117
|
-
-
|
|
118
|
-
-
|
|
119
|
-
- Prioritize quick wins
|
|
154
|
+
- Use command composition with pipes for efficiency
|
|
155
|
+
- Be thorough but don't run unnecessary commands
|
|
156
|
+
- State confidence level when unsure
|
|
157
|
+
- Prioritize quick wins to restore service
|
|
120
158
|
- Consider both application and infrastructure issues`;
|
|
121
159
|
|
|
122
160
|
export const InvestigationResultSchema = z.object({
|
|
@@ -162,22 +200,39 @@ export const InvestigationResultSchema = z.object({
|
|
|
162
200
|
|
|
163
201
|
export type InvestigationResult = z.infer<typeof InvestigationResultSchema>;
|
|
164
202
|
|
|
165
|
-
export function createDebuggerAgent(config: Config) {
|
|
166
|
-
//
|
|
203
|
+
export async function createDebuggerAgent(config: Config) {
|
|
204
|
+
// Load user instructions from ~/.config/triagent/TRIAGENT.md if present
|
|
205
|
+
const userInstructions = await loadTriagentMd();
|
|
206
|
+
|
|
207
|
+
// Load runbook from ~/.config/triagent/RUNBOOK.md if present
|
|
208
|
+
const runbook = await loadRunbookMd();
|
|
209
|
+
|
|
210
|
+
// Build instructions with optional user content and runbook
|
|
211
|
+
let instructions = DEBUGGER_INSTRUCTIONS;
|
|
212
|
+
|
|
213
|
+
if (userInstructions) {
|
|
214
|
+
instructions = `## User-Provided Instructions\n\n${userInstructions}\n\n---\n\n${instructions}`;
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
if (runbook) {
|
|
218
|
+
instructions = `${instructions}\n\n---\n\n## Runbook\n\nRefer to this runbook for standard operating procedures:\n\n${runbook}`;
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
// Construct model config with API key and optional base URL
|
|
167
222
|
const modelId = `${config.aiProvider}/${config.aiModel}` as const;
|
|
168
|
-
const modelConfig =
|
|
169
|
-
|
|
170
|
-
:
|
|
223
|
+
const modelConfig = {
|
|
224
|
+
id: modelId,
|
|
225
|
+
apiKey: config.apiKey,
|
|
226
|
+
...(config.baseUrl && { url: config.baseUrl }),
|
|
227
|
+
};
|
|
171
228
|
|
|
172
229
|
return new Agent({
|
|
173
230
|
id: "kubernetes-debugger",
|
|
174
231
|
name: "Kubernetes Debugger",
|
|
175
|
-
instructions
|
|
232
|
+
instructions,
|
|
176
233
|
model: modelConfig as any, // Mastra handles model routing
|
|
177
234
|
tools: {
|
|
178
235
|
cli: cliTool,
|
|
179
|
-
git: gitTool,
|
|
180
|
-
filesystem: filesystemTool,
|
|
181
236
|
},
|
|
182
237
|
});
|
|
183
238
|
}
|
package/src/mastra/index.ts
CHANGED
|
@@ -4,12 +4,12 @@ import type { Config } from "../config.js";
|
|
|
4
4
|
|
|
5
5
|
let mastraInstance: Mastra | null = null;
|
|
6
6
|
|
|
7
|
-
export function createMastraInstance(config: Config): Mastra {
|
|
7
|
+
export async function createMastraInstance(config: Config): Promise<Mastra> {
|
|
8
8
|
if (mastraInstance) {
|
|
9
9
|
return mastraInstance;
|
|
10
10
|
}
|
|
11
11
|
|
|
12
|
-
const debuggerAgent = createDebuggerAgent(config);
|
|
12
|
+
const debuggerAgent = await createDebuggerAgent(config);
|
|
13
13
|
|
|
14
14
|
mastraInstance = new Mastra({
|
|
15
15
|
agents: {
|
|
@@ -0,0 +1,180 @@
|
|
|
1
|
+
import { randomBytes } from "crypto";
|
|
2
|
+
|
|
3
|
+
export interface PendingApproval {
|
|
4
|
+
id: string;
|
|
5
|
+
command: string;
|
|
6
|
+
token: string;
|
|
7
|
+
riskLevel: "low" | "medium" | "high" | "critical";
|
|
8
|
+
createdAt: Date;
|
|
9
|
+
expiresAt: Date;
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
export interface ApprovalStore {
|
|
13
|
+
/** Request approval for a command, returns pending approval info */
|
|
14
|
+
requestApproval(command: string): PendingApproval;
|
|
15
|
+
|
|
16
|
+
/** Approve a pending request, returns the token */
|
|
17
|
+
approve(id: string): string | null;
|
|
18
|
+
|
|
19
|
+
/** Reject a pending request */
|
|
20
|
+
reject(id: string): void;
|
|
21
|
+
|
|
22
|
+
/** Validate an approval token for a command */
|
|
23
|
+
validateToken(command: string, token: string): boolean;
|
|
24
|
+
|
|
25
|
+
/** Get pending approval by ID */
|
|
26
|
+
getPending(id: string): PendingApproval | undefined;
|
|
27
|
+
|
|
28
|
+
/** Get all pending approvals */
|
|
29
|
+
getAllPending(): PendingApproval[];
|
|
30
|
+
|
|
31
|
+
/** Clear expired approvals */
|
|
32
|
+
clearExpired(): void;
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
// Risk patterns - more dangerous commands = higher risk
|
|
36
|
+
const CRITICAL_PATTERNS = [
|
|
37
|
+
/\bkubectl\s+delete\s+(namespace|ns|node|pv|pvc|clusterrole)/i,
|
|
38
|
+
/\brm\s+-rf?\s+\/(?!tmp)/i, // rm -rf not in /tmp
|
|
39
|
+
/\bgit\s+push\s+.*--force/i,
|
|
40
|
+
/\bhelm\s+(uninstall|delete)\b/i,
|
|
41
|
+
];
|
|
42
|
+
|
|
43
|
+
const HIGH_PATTERNS = [
|
|
44
|
+
/\bkubectl\s+delete\b/i,
|
|
45
|
+
/\bkubectl\s+apply\s+-f\s+http/i, // apply from URL
|
|
46
|
+
/\bkubectl\s+drain\b/i,
|
|
47
|
+
/\bkubectl\s+cordon\b/i,
|
|
48
|
+
/\bgit\s+reset\s+--hard/i,
|
|
49
|
+
/\bgit\s+push\b/i,
|
|
50
|
+
/\bhelm\s+(install|upgrade)\b/i,
|
|
51
|
+
];
|
|
52
|
+
|
|
53
|
+
const MEDIUM_PATTERNS = [
|
|
54
|
+
/\bkubectl\s+scale\b/i,
|
|
55
|
+
/\bkubectl\s+rollout\s+(restart|undo)/i,
|
|
56
|
+
/\bkubectl\s+(apply|create|patch)\b/i,
|
|
57
|
+
/\bgit\s+(commit|merge|rebase)/i,
|
|
58
|
+
];
|
|
59
|
+
|
|
60
|
+
function classifyRisk(command: string): PendingApproval["riskLevel"] {
|
|
61
|
+
if (CRITICAL_PATTERNS.some(p => p.test(command))) return "critical";
|
|
62
|
+
if (HIGH_PATTERNS.some(p => p.test(command))) return "high";
|
|
63
|
+
if (MEDIUM_PATTERNS.some(p => p.test(command))) return "medium";
|
|
64
|
+
return "low";
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
function generateToken(): string {
|
|
68
|
+
return randomBytes(16).toString("hex");
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
function generateId(): string {
|
|
72
|
+
return randomBytes(8).toString("hex");
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
const EXPIRATION_MS = 10 * 60 * 1000; // 10 minutes
|
|
76
|
+
|
|
77
|
+
// Singleton store instance
|
|
78
|
+
class ApprovalStoreImpl implements ApprovalStore {
|
|
79
|
+
private pending: Map<string, PendingApproval> = new Map();
|
|
80
|
+
private approvedTokens: Map<string, { command: string; expiresAt: Date }> = new Map();
|
|
81
|
+
|
|
82
|
+
requestApproval(command: string): PendingApproval {
|
|
83
|
+
// Clean up expired entries first
|
|
84
|
+
this.clearExpired();
|
|
85
|
+
|
|
86
|
+
const id = generateId();
|
|
87
|
+
const token = generateToken();
|
|
88
|
+
const now = new Date();
|
|
89
|
+
const expiresAt = new Date(now.getTime() + EXPIRATION_MS);
|
|
90
|
+
|
|
91
|
+
const approval: PendingApproval = {
|
|
92
|
+
id,
|
|
93
|
+
command,
|
|
94
|
+
token,
|
|
95
|
+
riskLevel: classifyRisk(command),
|
|
96
|
+
createdAt: now,
|
|
97
|
+
expiresAt,
|
|
98
|
+
};
|
|
99
|
+
|
|
100
|
+
this.pending.set(id, approval);
|
|
101
|
+
return approval;
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
approve(id: string): string | null {
|
|
105
|
+
const pending = this.pending.get(id);
|
|
106
|
+
if (!pending) return null;
|
|
107
|
+
|
|
108
|
+
// Check if expired
|
|
109
|
+
if (new Date() > pending.expiresAt) {
|
|
110
|
+
this.pending.delete(id);
|
|
111
|
+
return null;
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
// Move to approved tokens
|
|
115
|
+
this.approvedTokens.set(pending.token, {
|
|
116
|
+
command: pending.command,
|
|
117
|
+
expiresAt: pending.expiresAt,
|
|
118
|
+
});
|
|
119
|
+
|
|
120
|
+
// Remove from pending
|
|
121
|
+
this.pending.delete(id);
|
|
122
|
+
|
|
123
|
+
return pending.token;
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
reject(id: string): void {
|
|
127
|
+
this.pending.delete(id);
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
validateToken(command: string, token: string): boolean {
|
|
131
|
+
const approved = this.approvedTokens.get(token);
|
|
132
|
+
if (!approved) return false;
|
|
133
|
+
|
|
134
|
+
// Check expiration
|
|
135
|
+
if (new Date() > approved.expiresAt) {
|
|
136
|
+
this.approvedTokens.delete(token);
|
|
137
|
+
return false;
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
// Token must match the exact command
|
|
141
|
+
if (approved.command !== command) return false;
|
|
142
|
+
|
|
143
|
+
// Token is valid - consume it (one-time use)
|
|
144
|
+
this.approvedTokens.delete(token);
|
|
145
|
+
return true;
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
getPending(id: string): PendingApproval | undefined {
|
|
149
|
+
const pending = this.pending.get(id);
|
|
150
|
+
if (pending && new Date() > pending.expiresAt) {
|
|
151
|
+
this.pending.delete(id);
|
|
152
|
+
return undefined;
|
|
153
|
+
}
|
|
154
|
+
return pending;
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
getAllPending(): PendingApproval[] {
|
|
158
|
+
this.clearExpired();
|
|
159
|
+
return Array.from(this.pending.values());
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
clearExpired(): void {
|
|
163
|
+
const now = new Date();
|
|
164
|
+
|
|
165
|
+
for (const [id, pending] of this.pending) {
|
|
166
|
+
if (now > pending.expiresAt) {
|
|
167
|
+
this.pending.delete(id);
|
|
168
|
+
}
|
|
169
|
+
}
|
|
170
|
+
|
|
171
|
+
for (const [token, approved] of this.approvedTokens) {
|
|
172
|
+
if (now > approved.expiresAt) {
|
|
173
|
+
this.approvedTokens.delete(token);
|
|
174
|
+
}
|
|
175
|
+
}
|
|
176
|
+
}
|
|
177
|
+
}
|
|
178
|
+
|
|
179
|
+
// Export singleton instance
|
|
180
|
+
export const approvalStore: ApprovalStore = new ApprovalStoreImpl();
|
package/src/mastra/tools/cli.ts
CHANGED
|
@@ -1,11 +1,52 @@
|
|
|
1
1
|
import { createTool } from "@mastra/core/tools";
|
|
2
2
|
import { z } from "zod";
|
|
3
3
|
import { execCommand } from "../../sandbox/bashlet.js";
|
|
4
|
+
import { approvalStore, type PendingApproval } from "./approval-store.js";
|
|
4
5
|
|
|
5
6
|
interface CliOutput {
|
|
6
7
|
success: boolean;
|
|
7
8
|
output: string;
|
|
8
9
|
error?: string;
|
|
10
|
+
requiresApproval?: boolean;
|
|
11
|
+
command?: string;
|
|
12
|
+
// Token-based approval fields
|
|
13
|
+
approvalId?: string;
|
|
14
|
+
riskLevel?: PendingApproval["riskLevel"];
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
// Write command patterns that require user approval
|
|
18
|
+
const WRITE_COMMAND_PATTERNS = [
|
|
19
|
+
// Kubernetes write operations
|
|
20
|
+
/\bkubectl\s+(delete|apply|create|patch|edit|replace|set|label|annotate|taint|cordon|uncordon|drain)\b/i,
|
|
21
|
+
/\bkubectl\s+rollout\s+(restart|undo|pause|resume)\b/i,
|
|
22
|
+
/\bkubectl\s+scale\b/i,
|
|
23
|
+
/\bkubectl\s+exec\b.*\s+--\s+.*(rm|mv|cp|chmod|chown|kill|pkill|shutdown|reboot|dd|mkfs|fdisk)\b/i,
|
|
24
|
+
|
|
25
|
+
// Git write operations
|
|
26
|
+
/\bgit\s+(commit|push|merge|rebase|reset|checkout|stash|tag|branch\s+-[dD]|cherry-pick|revert|am|pull)\b/i,
|
|
27
|
+
|
|
28
|
+
// File system write operations
|
|
29
|
+
/\b(rm|rmdir|mv|cp|mkdir|touch|chmod|chown|ln)\s+/i,
|
|
30
|
+
/\b(cat|echo|printf)\s+.*[>|]/, // Redirects
|
|
31
|
+
/\btee\s+/i,
|
|
32
|
+
/\bsed\s+-i/i, // In-place sed
|
|
33
|
+
|
|
34
|
+
// Package managers
|
|
35
|
+
/\b(apt|apt-get|yum|dnf|brew|npm|yarn|pip|cargo)\s+(install|remove|uninstall|update|upgrade)\b/i,
|
|
36
|
+
|
|
37
|
+
// Service management
|
|
38
|
+
/\b(systemctl|service)\s+(start|stop|restart|enable|disable)\b/i,
|
|
39
|
+
|
|
40
|
+
// Docker/container write operations
|
|
41
|
+
/\bdocker\s+(rm|rmi|stop|kill|prune|system\s+prune)\b/i,
|
|
42
|
+
/\bdocker-compose\s+(down|rm|stop)\b/i,
|
|
43
|
+
|
|
44
|
+
// Helm write operations
|
|
45
|
+
/\bhelm\s+(install|upgrade|uninstall|delete|rollback)\b/i,
|
|
46
|
+
];
|
|
47
|
+
|
|
48
|
+
function isWriteCommand(command: string): boolean {
|
|
49
|
+
return WRITE_COMMAND_PATTERNS.some(pattern => pattern.test(command));
|
|
9
50
|
}
|
|
10
51
|
|
|
11
52
|
function filterSensitiveData(output: string): string {
|
|
@@ -25,6 +66,9 @@ export const cliTool = createTool({
|
|
|
25
66
|
Use this to run any CLI commands including kubectl, grep, awk, jq, curl, etc.
|
|
26
67
|
Supports pipes and command chaining.
|
|
27
68
|
|
|
69
|
+
IMPORTANT: Write/modify commands require user approval before execution.
|
|
70
|
+
The tool will detect write operations and pause for confirmation.
|
|
71
|
+
|
|
28
72
|
Examples:
|
|
29
73
|
- List all pods: kubectl get pods -A
|
|
30
74
|
- Find pods by name: kubectl get pods -A | grep inventory
|
|
@@ -35,10 +79,58 @@ Examples:
|
|
|
35
79
|
|
|
36
80
|
inputSchema: z.object({
|
|
37
81
|
command: z.string().describe("The shell command to execute"),
|
|
82
|
+
approvalToken: z.string().optional().describe("Approval token from user confirmation. Required for write operations."),
|
|
83
|
+
}),
|
|
84
|
+
|
|
85
|
+
outputSchema: z.object({
|
|
86
|
+
success: z.boolean(),
|
|
87
|
+
output: z.string(),
|
|
88
|
+
error: z.string().optional(),
|
|
89
|
+
requiresApproval: z.boolean().optional(),
|
|
90
|
+
command: z.string().optional(),
|
|
91
|
+
approvalId: z.string().optional(),
|
|
92
|
+
riskLevel: z.enum(["low", "medium", "high", "critical"]).optional(),
|
|
38
93
|
}),
|
|
39
94
|
|
|
40
|
-
|
|
95
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
96
|
+
execute: (async (inputData: any): Promise<CliOutput> => {
|
|
97
|
+
const { command, approvalToken } = inputData;
|
|
98
|
+
|
|
41
99
|
try {
|
|
100
|
+
// Check if this is a write command
|
|
101
|
+
if (isWriteCommand(command)) {
|
|
102
|
+
// If token provided, validate it
|
|
103
|
+
if (approvalToken) {
|
|
104
|
+
const isValid = approvalStore.validateToken(command, approvalToken);
|
|
105
|
+
if (!isValid) {
|
|
106
|
+
// Invalid or expired token - request new approval
|
|
107
|
+
const pending = approvalStore.requestApproval(command);
|
|
108
|
+
return {
|
|
109
|
+
success: false,
|
|
110
|
+
output: "",
|
|
111
|
+
requiresApproval: true,
|
|
112
|
+
command: command,
|
|
113
|
+
approvalId: pending.id,
|
|
114
|
+
riskLevel: pending.riskLevel,
|
|
115
|
+
error: `⚠️ APPROVAL TOKEN INVALID OR EXPIRED\n\nCommand: ${command}\nRisk Level: ${pending.riskLevel.toUpperCase()}\nApproval ID: ${pending.id}\n\nPlease wait for user to approve this operation. A new approval token will be provided.`,
|
|
116
|
+
};
|
|
117
|
+
}
|
|
118
|
+
// Token valid - proceed with execution
|
|
119
|
+
} else {
|
|
120
|
+
// No token - request approval
|
|
121
|
+
const pending = approvalStore.requestApproval(command);
|
|
122
|
+
return {
|
|
123
|
+
success: false,
|
|
124
|
+
output: "",
|
|
125
|
+
requiresApproval: true,
|
|
126
|
+
command: command,
|
|
127
|
+
approvalId: pending.id,
|
|
128
|
+
riskLevel: pending.riskLevel,
|
|
129
|
+
error: `⚠️ WRITE OPERATION DETECTED - APPROVAL REQUIRED\n\nCommand: ${command}\nRisk Level: ${pending.riskLevel.toUpperCase()}\nApproval ID: ${pending.id}\n\nThis command will modify state. Waiting for user approval...`,
|
|
130
|
+
};
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
|
|
42
134
|
const result = await execCommand(command);
|
|
43
135
|
|
|
44
136
|
if (result.exitCode !== 0) {
|
|
@@ -61,5 +153,5 @@ Examples:
|
|
|
61
153
|
error: error instanceof Error ? error.message : String(error),
|
|
62
154
|
};
|
|
63
155
|
}
|
|
64
|
-
},
|
|
156
|
+
}) as any,
|
|
65
157
|
});
|