triagent 0.1.0-alpha13 → 0.1.0-alpha18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. package/package.json +3 -4
  2. package/src/cli/config.ts +96 -0
  3. package/src/index.ts +201 -3
  4. package/src/integrations/elasticsearch/client.ts +210 -0
  5. package/src/integrations/grafana/client.ts +186 -0
  6. package/src/integrations/kubernetes/multi-cluster.ts +199 -0
  7. package/src/integrations/kubernetes/types.ts +24 -0
  8. package/src/integrations/loki/client.ts +219 -0
  9. package/src/integrations/prometheus/client.ts +163 -0
  10. package/src/integrations/slack/client.ts +265 -0
  11. package/src/integrations/teams/client.ts +199 -0
  12. package/src/mastra/agents/debugger.ts +152 -108
  13. package/src/mastra/tools/approval-store.ts +180 -0
  14. package/src/mastra/tools/cli.ts +94 -2
  15. package/src/mastra/tools/cost.ts +389 -0
  16. package/src/mastra/tools/logs.ts +210 -0
  17. package/src/mastra/tools/network.ts +253 -0
  18. package/src/mastra/tools/prometheus.ts +221 -0
  19. package/src/mastra/tools/remediation.ts +365 -0
  20. package/src/mastra/tools/runbook.ts +186 -0
  21. package/src/server/routes/history.ts +207 -0
  22. package/src/server/routes/notifications.ts +236 -0
  23. package/src/server/webhook.ts +36 -2
  24. package/src/storage/index.ts +3 -0
  25. package/src/storage/investigation-history.ts +277 -0
  26. package/src/storage/runbook-index.ts +330 -0
  27. package/src/storage/types.ts +72 -0
  28. package/src/tui/app.tsx +492 -76
  29. package/src/tui/components/approval-dialog.tsx +156 -0
  30. package/src/tui/components/approval-modal.tsx +278 -0
  31. package/src/tui/components/index.ts +38 -0
  32. package/src/tui/components/styled-span.tsx +24 -0
  33. package/src/tui/components/timeline.tsx +223 -0
  34. package/src/tui/components/toast.tsx +101 -0
@@ -0,0 +1,365 @@
1
+ import { createTool } from "@mastra/core/tools";
2
+ import { z } from "zod";
3
+ import { exec } from "child_process";
4
+ import { promisify } from "util";
5
+ import { randomBytes } from "crypto";
6
+
7
+ const execAsync = promisify(exec);
8
+
9
+ // Store pending approvals with expiration
10
+ const pendingApprovals = new Map<string, {
11
+ action: RemediationAction;
12
+ token: string;
13
+ expiresAt: Date;
14
+ createdAt: Date;
15
+ }>();
16
+
17
+ interface RemediationAction {
18
+ type: "restart_pod" | "scale_deployment" | "rollback_deployment" | "delete_resource" | "apply_config";
19
+ target: {
20
+ kind: string;
21
+ name: string;
22
+ namespace: string;
23
+ };
24
+ parameters?: Record<string, unknown>;
25
+ }
26
+
27
+ function generateApprovalToken(): string {
28
+ return randomBytes(16).toString("hex");
29
+ }
30
+
31
+ function getRiskLevel(action: RemediationAction): "low" | "medium" | "high" | "critical" {
32
+ switch (action.type) {
33
+ case "restart_pod":
34
+ return "low";
35
+ case "scale_deployment":
36
+ return "medium";
37
+ case "rollback_deployment":
38
+ return "medium";
39
+ case "delete_resource":
40
+ return action.target.kind.toLowerCase() === "pod" ? "medium" : "high";
41
+ case "apply_config":
42
+ return "high";
43
+ default:
44
+ return "critical";
45
+ }
46
+ }
47
+
48
+ function getActionDescription(action: RemediationAction): string {
49
+ const target = `${action.target.kind}/${action.target.name} in ${action.target.namespace}`;
50
+
51
+ switch (action.type) {
52
+ case "restart_pod":
53
+ return `Restart pod ${target}`;
54
+ case "scale_deployment":
55
+ const replicas = action.parameters?.replicas || "?";
56
+ return `Scale ${target} to ${replicas} replicas`;
57
+ case "rollback_deployment":
58
+ const revision = action.parameters?.revision || "previous";
59
+ return `Rollback ${target} to ${revision} revision`;
60
+ case "delete_resource":
61
+ return `Delete ${target}`;
62
+ case "apply_config":
63
+ return `Apply configuration to ${target}`;
64
+ default:
65
+ return `Unknown action on ${target}`;
66
+ }
67
+ }
68
+
69
+ async function executeAction(action: RemediationAction): Promise<{ success: boolean; output: string }> {
70
+ const { type, target, parameters } = action;
71
+ const { kind, name, namespace } = target;
72
+
73
+ let command: string;
74
+
75
+ switch (type) {
76
+ case "restart_pod":
77
+ if (kind.toLowerCase() === "pod") {
78
+ command = `kubectl delete pod ${name} -n ${namespace}`;
79
+ } else if (kind.toLowerCase() === "deployment") {
80
+ command = `kubectl rollout restart deployment/${name} -n ${namespace}`;
81
+ } else {
82
+ command = `kubectl rollout restart ${kind.toLowerCase()}/${name} -n ${namespace}`;
83
+ }
84
+ break;
85
+
86
+ case "scale_deployment":
87
+ const replicas = parameters?.replicas || 1;
88
+ command = `kubectl scale ${kind.toLowerCase()}/${name} -n ${namespace} --replicas=${replicas}`;
89
+ break;
90
+
91
+ case "rollback_deployment":
92
+ if (parameters?.revision) {
93
+ command = `kubectl rollout undo ${kind.toLowerCase()}/${name} -n ${namespace} --to-revision=${parameters.revision}`;
94
+ } else {
95
+ command = `kubectl rollout undo ${kind.toLowerCase()}/${name} -n ${namespace}`;
96
+ }
97
+ break;
98
+
99
+ case "delete_resource":
100
+ command = `kubectl delete ${kind.toLowerCase()} ${name} -n ${namespace}`;
101
+ break;
102
+
103
+ case "apply_config":
104
+ // For apply, the config should be provided in parameters
105
+ if (!parameters?.config) {
106
+ return { success: false, output: "No config provided for apply action" };
107
+ }
108
+ // This would need to write to a temp file and apply
109
+ return { success: false, output: "Apply config not yet implemented" };
110
+
111
+ default:
112
+ return { success: false, output: `Unknown action type: ${type}` };
113
+ }
114
+
115
+ try {
116
+ const { stdout, stderr } = await execAsync(command, { timeout: 60000 });
117
+ return {
118
+ success: true,
119
+ output: stdout + (stderr ? `\nWarnings: ${stderr}` : ""),
120
+ };
121
+ } catch (error) {
122
+ return {
123
+ success: false,
124
+ output: error instanceof Error ? error.message : String(error),
125
+ };
126
+ }
127
+ }
128
+
129
+ export const remediationTool = createTool({
130
+ id: "remediation",
131
+ description: `Execute remediation actions on Kubernetes resources with approval workflow.
132
+ Use this tool to:
133
+ - Suggest fixes based on diagnosis
134
+ - Execute approved remediation actions
135
+ - Rollback changes if needed
136
+
137
+ IMPORTANT: All destructive actions require user approval. The workflow is:
138
+ 1. Call with operation="suggest" to propose an action
139
+ 2. User reviews and approves (generates approval token)
140
+ 3. Call with operation="execute" and the approval token
141
+
142
+ Available action types:
143
+ - restart_pod: Restart a pod (or rollout restart for deployments)
144
+ - scale_deployment: Change replica count
145
+ - rollback_deployment: Rollback to previous or specific revision
146
+ - delete_resource: Delete a resource (use with caution)`,
147
+ inputSchema: z.object({
148
+ operation: z.enum(["suggest", "execute", "rollback", "status"]).describe(
149
+ "Operation: suggest (propose action), execute (run with approval), rollback (undo last action), status (check pending approvals)"
150
+ ),
151
+ action: z.object({
152
+ type: z.enum(["restart_pod", "scale_deployment", "rollback_deployment", "delete_resource", "apply_config"]).describe("Type of remediation action"),
153
+ target: z.object({
154
+ kind: z.string().describe("Kubernetes resource kind (Pod, Deployment, etc.)"),
155
+ name: z.string().describe("Resource name"),
156
+ namespace: z.string().describe("Resource namespace"),
157
+ }),
158
+ parameters: z.record(z.unknown()).optional().describe("Action-specific parameters (e.g., replicas, revision)"),
159
+ }).optional().describe("The remediation action to perform"),
160
+ approvalToken: z.string().optional().describe("Approval token for executing actions"),
161
+ }),
162
+ outputSchema: z.object({
163
+ success: z.boolean(),
164
+ data: z.string(),
165
+ requiresApproval: z.boolean().optional(),
166
+ approvalId: z.string().optional(),
167
+ riskLevel: z.enum(["low", "medium", "high", "critical"]).optional(),
168
+ error: z.string().optional(),
169
+ }),
170
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
171
+ execute: (async ({ operation, action, approvalToken }: any) => {
172
+
173
+ try {
174
+ switch (operation) {
175
+ case "suggest": {
176
+ if (!action) {
177
+ return {
178
+ success: false,
179
+ data: "",
180
+ error: "Action is required for suggest operation",
181
+ };
182
+ }
183
+
184
+ const riskLevel = getRiskLevel(action);
185
+ const description = getActionDescription(action);
186
+ const token = generateApprovalToken();
187
+ const approvalId = randomBytes(8).toString("hex");
188
+
189
+ // Store pending approval (expires in 10 minutes)
190
+ pendingApprovals.set(approvalId, {
191
+ action,
192
+ token,
193
+ expiresAt: new Date(Date.now() + 10 * 60 * 1000),
194
+ createdAt: new Date(),
195
+ });
196
+
197
+ const riskEmoji = {
198
+ low: "🟢",
199
+ medium: "🟡",
200
+ high: "🟠",
201
+ critical: "🔴",
202
+ }[riskLevel];
203
+
204
+ return {
205
+ success: true,
206
+ data: `Suggested remediation action:
207
+
208
+ ${riskEmoji} Risk Level: ${riskLevel.toUpperCase()}
209
+
210
+ Action: ${description}
211
+ Target: ${action.target.kind}/${action.target.name} in namespace ${action.target.namespace}
212
+ ${action.parameters ? `Parameters: ${JSON.stringify(action.parameters)}` : ""}
213
+
214
+ To execute this action, approve it and call remediation with:
215
+ - operation: "execute"
216
+ - approvalToken: "${token}"
217
+
218
+ Approval ID: ${approvalId}
219
+ Expires: 10 minutes`,
220
+ requiresApproval: true,
221
+ approvalId,
222
+ riskLevel,
223
+ };
224
+ }
225
+
226
+ case "execute": {
227
+ if (!approvalToken) {
228
+ return {
229
+ success: false,
230
+ data: "",
231
+ error: "Approval token is required to execute actions",
232
+ };
233
+ }
234
+
235
+ // Find the pending approval with this token
236
+ let foundApproval: { action: RemediationAction; approvalId: string } | null = null;
237
+
238
+ for (const [approvalId, approval] of pendingApprovals) {
239
+ if (approval.token === approvalToken) {
240
+ if (new Date() > approval.expiresAt) {
241
+ pendingApprovals.delete(approvalId);
242
+ return {
243
+ success: false,
244
+ data: "",
245
+ error: "Approval token has expired. Please suggest the action again.",
246
+ };
247
+ }
248
+ foundApproval = { action: approval.action, approvalId };
249
+ break;
250
+ }
251
+ }
252
+
253
+ if (!foundApproval) {
254
+ return {
255
+ success: false,
256
+ data: "",
257
+ error: "Invalid approval token. Please suggest the action first.",
258
+ };
259
+ }
260
+
261
+ // Execute the action
262
+ const result = await executeAction(foundApproval.action);
263
+
264
+ // Remove the used approval
265
+ pendingApprovals.delete(foundApproval.approvalId);
266
+
267
+ if (result.success) {
268
+ return {
269
+ success: true,
270
+ data: `✅ Action executed successfully:
271
+
272
+ ${getActionDescription(foundApproval.action)}
273
+
274
+ Output:
275
+ ${result.output}`,
276
+ };
277
+ } else {
278
+ return {
279
+ success: false,
280
+ data: "",
281
+ error: `Action failed: ${result.output}`,
282
+ };
283
+ }
284
+ }
285
+
286
+ case "rollback": {
287
+ if (!action) {
288
+ return {
289
+ success: false,
290
+ data: "",
291
+ error: "Action with target is required for rollback",
292
+ };
293
+ }
294
+
295
+ // For rollback, we create a rollback action
296
+ const rollbackAction: RemediationAction = {
297
+ type: "rollback_deployment",
298
+ target: action.target,
299
+ parameters: action.parameters,
300
+ };
301
+
302
+ const result = await executeAction(rollbackAction);
303
+
304
+ if (result.success) {
305
+ return {
306
+ success: true,
307
+ data: `✅ Rollback executed:
308
+
309
+ ${getActionDescription(rollbackAction)}
310
+
311
+ Output:
312
+ ${result.output}`,
313
+ };
314
+ } else {
315
+ return {
316
+ success: false,
317
+ data: "",
318
+ error: `Rollback failed: ${result.output}`,
319
+ };
320
+ }
321
+ }
322
+
323
+ case "status": {
324
+ const pending = Array.from(pendingApprovals.entries())
325
+ .filter(([, a]) => new Date() < a.expiresAt)
326
+ .map(([id, a]) => ({
327
+ id,
328
+ action: getActionDescription(a.action),
329
+ risk: getRiskLevel(a.action),
330
+ expiresIn: Math.round((a.expiresAt.getTime() - Date.now()) / 1000 / 60),
331
+ }));
332
+
333
+ if (pending.length === 0) {
334
+ return {
335
+ success: true,
336
+ data: "No pending approval requests",
337
+ };
338
+ }
339
+
340
+ const lines = ["Pending approval requests:\n"];
341
+ for (const p of pending) {
342
+ lines.push(`ID: ${p.id}`);
343
+ lines.push(` Action: ${p.action}`);
344
+ lines.push(` Risk: ${p.risk}`);
345
+ lines.push(` Expires in: ${p.expiresIn} minutes\n`);
346
+ }
347
+
348
+ return {
349
+ success: true,
350
+ data: lines.join("\n"),
351
+ };
352
+ }
353
+
354
+ default:
355
+ return { success: false, data: "", error: `Unknown operation: ${operation}` };
356
+ }
357
+ } catch (error) {
358
+ return {
359
+ success: false,
360
+ data: "",
361
+ error: error instanceof Error ? error.message : String(error),
362
+ };
363
+ }
364
+ }) as any,
365
+ });
@@ -0,0 +1,186 @@
1
+ import { createTool } from "@mastra/core/tools";
2
+ import { z } from "zod";
3
+ import { getRunbookIndexer } from "../../storage/runbook-index.js";
4
+ import { readFile } from "fs/promises";
5
+
6
+ export const runbookTool = createTool({
7
+ id: "runbook",
8
+ description: `Search and retrieve runbooks and SOPs (Standard Operating Procedures).
9
+ Use this tool to:
10
+ - Find relevant runbooks by symptoms or keywords
11
+ - Look up established procedures for common issues
12
+ - Get step-by-step remediation guides
13
+
14
+ Runbooks are indexed from configured paths and searched using TF-IDF similarity.
15
+ Configure runbook paths in triagent config.`,
16
+ inputSchema: z.object({
17
+ operation: z.enum(["search", "get", "list", "index"]).describe(
18
+ "Operation: search (find by query), get (read specific runbook), list (show all), index (re-index runbooks)"
19
+ ),
20
+ query: z.string().optional().describe("Search query for finding runbooks"),
21
+ symptoms: z.array(z.string()).optional().describe("List of symptoms to match against runbooks"),
22
+ tags: z.array(z.string()).optional().describe("Filter runbooks by tags"),
23
+ runbookId: z.string().optional().describe("Specific runbook ID to retrieve"),
24
+ limit: z.number().default(5).describe("Maximum number of results to return"),
25
+ }),
26
+ outputSchema: z.object({
27
+ success: z.boolean(),
28
+ data: z.string(),
29
+ runbooks: z.array(z.object({
30
+ id: z.string(),
31
+ title: z.string(),
32
+ path: z.string(),
33
+ tags: z.array(z.string()),
34
+ excerpt: z.string().optional(),
35
+ })).optional(),
36
+ error: z.string().optional(),
37
+ }),
38
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
39
+ execute: (async ({ operation, query, symptoms, tags, runbookId, limit }: any) => {
40
+
41
+ try {
42
+ const indexer = getRunbookIndexer();
43
+
44
+ switch (operation) {
45
+ case "search": {
46
+ let results;
47
+
48
+ if (symptoms && symptoms.length > 0) {
49
+ results = indexer.searchBySymptoms(symptoms, limit);
50
+ } else if (query) {
51
+ results = indexer.search(query, limit);
52
+ } else if (tags && tags.length > 0) {
53
+ results = indexer.getByTags(tags).slice(0, limit);
54
+ } else {
55
+ return {
56
+ success: false,
57
+ data: "",
58
+ error: "Query, symptoms, or tags required for search",
59
+ };
60
+ }
61
+
62
+ if (results.length === 0) {
63
+ return {
64
+ success: true,
65
+ data: "No matching runbooks found",
66
+ runbooks: [],
67
+ };
68
+ }
69
+
70
+ const lines: string[] = [`Found ${results.length} runbook(s):\n`];
71
+ const runbooks = results.map((r) => {
72
+ const excerpt = r.content.slice(0, 200).replace(/\n/g, " ") + "...";
73
+ lines.push(`📖 ${r.title}`);
74
+ lines.push(` Path: ${r.path}`);
75
+ lines.push(` Tags: ${r.tags.join(", ") || "none"}`);
76
+ lines.push(` ${excerpt}\n`);
77
+
78
+ return {
79
+ id: r.id,
80
+ title: r.title,
81
+ path: r.path,
82
+ tags: r.tags,
83
+ excerpt,
84
+ };
85
+ });
86
+
87
+ return {
88
+ success: true,
89
+ data: lines.join("\n"),
90
+ runbooks,
91
+ };
92
+ }
93
+
94
+ case "get": {
95
+ if (!runbookId) {
96
+ return {
97
+ success: false,
98
+ data: "",
99
+ error: "runbookId is required for get operation",
100
+ };
101
+ }
102
+
103
+ // Decode the runbook ID to get the path
104
+ const path = Buffer.from(runbookId, "base64").toString("utf-8");
105
+
106
+ try {
107
+ const content = await readFile(path, "utf-8");
108
+ return {
109
+ success: true,
110
+ data: content,
111
+ };
112
+ } catch {
113
+ return {
114
+ success: false,
115
+ data: "",
116
+ error: `Runbook not found at path: ${path}`,
117
+ };
118
+ }
119
+ }
120
+
121
+ case "list": {
122
+ const stats = indexer.getStats();
123
+
124
+ if (stats.totalRunbooks === 0) {
125
+ return {
126
+ success: true,
127
+ data: "No runbooks indexed. Configure runbook paths and run 'index' operation.",
128
+ runbooks: [],
129
+ };
130
+ }
131
+
132
+ // Get all runbooks (with optional tag filter)
133
+ let allRunbooks = tags && tags.length > 0
134
+ ? indexer.getByTags(tags)
135
+ : indexer.search("*", 100); // Get all via broad search
136
+
137
+ // If broad search returns nothing, the index might be empty or need different approach
138
+ if (allRunbooks.length === 0) {
139
+ allRunbooks = indexer.searchBySymptoms(["error", "issue", "problem"], 100);
140
+ }
141
+
142
+ const lines: string[] = [
143
+ `Runbook Index Stats:`,
144
+ ` Total runbooks: ${stats.totalRunbooks}`,
145
+ ` Last indexed: ${stats.lastIndexed.toISOString()}`,
146
+ `\nRunbooks:\n`,
147
+ ];
148
+
149
+ const runbooks = allRunbooks.slice(0, limit).map((r) => {
150
+ lines.push(`📖 ${r.title}`);
151
+ lines.push(` Tags: ${r.tags.join(", ") || "none"}`);
152
+
153
+ return {
154
+ id: r.id,
155
+ title: r.title,
156
+ path: r.path,
157
+ tags: r.tags,
158
+ };
159
+ });
160
+
161
+ return {
162
+ success: true,
163
+ data: lines.join("\n"),
164
+ runbooks,
165
+ };
166
+ }
167
+
168
+ case "index": {
169
+ return {
170
+ success: true,
171
+ data: "Runbook indexing should be triggered via CLI or startup. Use 'triagent config' to set runbook paths.",
172
+ };
173
+ }
174
+
175
+ default:
176
+ return { success: false, data: "", error: `Unknown operation: ${operation}` };
177
+ }
178
+ } catch (error) {
179
+ return {
180
+ success: false,
181
+ data: "",
182
+ error: error instanceof Error ? error.message : String(error),
183
+ };
184
+ }
185
+ }) as any,
186
+ });