triagent 0.1.0-alpha13 → 0.1.0-alpha18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. package/package.json +3 -4
  2. package/src/cli/config.ts +96 -0
  3. package/src/index.ts +201 -3
  4. package/src/integrations/elasticsearch/client.ts +210 -0
  5. package/src/integrations/grafana/client.ts +186 -0
  6. package/src/integrations/kubernetes/multi-cluster.ts +199 -0
  7. package/src/integrations/kubernetes/types.ts +24 -0
  8. package/src/integrations/loki/client.ts +219 -0
  9. package/src/integrations/prometheus/client.ts +163 -0
  10. package/src/integrations/slack/client.ts +265 -0
  11. package/src/integrations/teams/client.ts +199 -0
  12. package/src/mastra/agents/debugger.ts +152 -108
  13. package/src/mastra/tools/approval-store.ts +180 -0
  14. package/src/mastra/tools/cli.ts +94 -2
  15. package/src/mastra/tools/cost.ts +389 -0
  16. package/src/mastra/tools/logs.ts +210 -0
  17. package/src/mastra/tools/network.ts +253 -0
  18. package/src/mastra/tools/prometheus.ts +221 -0
  19. package/src/mastra/tools/remediation.ts +365 -0
  20. package/src/mastra/tools/runbook.ts +186 -0
  21. package/src/server/routes/history.ts +207 -0
  22. package/src/server/routes/notifications.ts +236 -0
  23. package/src/server/webhook.ts +36 -2
  24. package/src/storage/index.ts +3 -0
  25. package/src/storage/investigation-history.ts +277 -0
  26. package/src/storage/runbook-index.ts +330 -0
  27. package/src/storage/types.ts +72 -0
  28. package/src/tui/app.tsx +492 -76
  29. package/src/tui/components/approval-dialog.tsx +156 -0
  30. package/src/tui/components/approval-modal.tsx +278 -0
  31. package/src/tui/components/index.ts +38 -0
  32. package/src/tui/components/styled-span.tsx +24 -0
  33. package/src/tui/components/timeline.tsx +223 -0
  34. package/src/tui/components/toast.tsx +101 -0
@@ -0,0 +1,253 @@
1
+ import { createTool } from "@mastra/core/tools";
2
+ import { z } from "zod";
3
+ import { exec } from "child_process";
4
+ import { promisify } from "util";
5
+
6
+ const execAsync = promisify(exec);
7
+
8
+ export const networkTool = createTool({
9
+ id: "network",
10
+ description: `Debug network connectivity and policies in Kubernetes.
11
+ Use this tool to:
12
+ - Test DNS resolution from within pods
13
+ - Check connectivity between services
14
+ - Analyze NetworkPolicies
15
+ - View service endpoints
16
+ - Trace network paths
17
+
18
+ This tool executes kubectl commands to inspect network-related resources
19
+ and can run network diagnostics inside pods using kubectl exec.`,
20
+ inputSchema: z.object({
21
+ operation: z.enum(["dns", "connectivity", "policies", "endpoints", "trace"]).describe(
22
+ "Operation: dns (resolve names), connectivity (test connection), policies (list NetworkPolicies), endpoints (show service endpoints), trace (network path)"
23
+ ),
24
+ source: z.object({
25
+ pod: z.string().optional().describe("Source pod name for tests"),
26
+ namespace: z.string().optional().describe("Source namespace"),
27
+ }).optional().describe("Source for network tests"),
28
+ target: z.object({
29
+ host: z.string().optional().describe("Target hostname or IP"),
30
+ port: z.number().optional().describe("Target port"),
31
+ service: z.string().optional().describe("Target service name"),
32
+ namespace: z.string().optional().describe("Target namespace"),
33
+ }).optional().describe("Target for network tests"),
34
+ }),
35
+ outputSchema: z.object({
36
+ success: z.boolean(),
37
+ data: z.string(),
38
+ error: z.string().optional(),
39
+ }),
40
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
41
+ execute: (async ({ operation, source, target }: any) => {
42
+
43
+ try {
44
+ switch (operation) {
45
+ case "dns": {
46
+ const hostname = target?.host || target?.service;
47
+ if (!hostname) {
48
+ return {
49
+ success: false,
50
+ data: "",
51
+ error: "Target host or service is required for DNS lookup",
52
+ };
53
+ }
54
+
55
+ // Try to resolve DNS from within a pod if specified
56
+ if (source?.pod && source?.namespace) {
57
+ const cmd = `kubectl exec -n ${source.namespace} ${source.pod} -- nslookup ${hostname} 2>&1 || kubectl exec -n ${source.namespace} ${source.pod} -- getent hosts ${hostname} 2>&1`;
58
+ const { stdout, stderr } = await execAsync(cmd, { timeout: 30000 });
59
+ return {
60
+ success: true,
61
+ data: `DNS lookup for ${hostname} from ${source.namespace}/${source.pod}:\n${stdout}${stderr ? `\nErrors: ${stderr}` : ""}`,
62
+ };
63
+ }
64
+
65
+ // Fallback to coredns query
66
+ const { stdout } = await execAsync(
67
+ `kubectl get svc -A -o json | grep -i "${hostname}" || echo "Service not found in cluster"`,
68
+ { timeout: 30000 }
69
+ );
70
+ return {
71
+ success: true,
72
+ data: `DNS/Service lookup for ${hostname}:\n${stdout}`,
73
+ };
74
+ }
75
+
76
+ case "connectivity": {
77
+ if (!source?.pod || !source?.namespace) {
78
+ return {
79
+ success: false,
80
+ data: "",
81
+ error: "Source pod and namespace are required for connectivity test",
82
+ };
83
+ }
84
+
85
+ const targetHost = target?.host || target?.service;
86
+ const targetPort = target?.port || 80;
87
+
88
+ if (!targetHost) {
89
+ return {
90
+ success: false,
91
+ data: "",
92
+ error: "Target host or service is required",
93
+ };
94
+ }
95
+
96
+ // Build full service name if namespace provided
97
+ const fullTarget = target?.namespace && target?.service
98
+ ? `${target.service}.${target.namespace}.svc.cluster.local`
99
+ : targetHost;
100
+
101
+ // Try different connectivity tools
102
+ const tests = [
103
+ `kubectl exec -n ${source.namespace} ${source.pod} -- nc -zv ${fullTarget} ${targetPort} 2>&1`,
104
+ `kubectl exec -n ${source.namespace} ${source.pod} -- wget -q --spider --timeout=5 http://${fullTarget}:${targetPort} 2>&1`,
105
+ `kubectl exec -n ${source.namespace} ${source.pod} -- curl -s -o /dev/null -w "%{http_code}" --connect-timeout 5 http://${fullTarget}:${targetPort} 2>&1`,
106
+ ];
107
+
108
+ const results: string[] = [`Connectivity test from ${source.namespace}/${source.pod} to ${fullTarget}:${targetPort}\n`];
109
+
110
+ for (const test of tests) {
111
+ try {
112
+ const { stdout, stderr } = await execAsync(test, { timeout: 15000 });
113
+ if (stdout.trim() || stderr.trim()) {
114
+ results.push(`✓ ${stdout.trim()}${stderr ? ` ${stderr.trim()}` : ""}`);
115
+ break;
116
+ }
117
+ } catch (e) {
118
+ results.push(`✗ Connection failed: ${e instanceof Error ? e.message : String(e)}`);
119
+ }
120
+ }
121
+
122
+ return {
123
+ success: true,
124
+ data: results.join("\n"),
125
+ };
126
+ }
127
+
128
+ case "policies": {
129
+ const namespace = source?.namespace || target?.namespace;
130
+ const nsArg = namespace ? `-n ${namespace}` : "-A";
131
+
132
+ const { stdout: policies } = await execAsync(
133
+ `kubectl get networkpolicy ${nsArg} -o wide 2>&1`,
134
+ { timeout: 30000 }
135
+ );
136
+
137
+ let details = "";
138
+ if (namespace) {
139
+ try {
140
+ const { stdout } = await execAsync(
141
+ `kubectl get networkpolicy ${nsArg} -o yaml 2>&1`,
142
+ { timeout: 30000 }
143
+ );
144
+ // Extract just the important parts
145
+ const policyNames = stdout.match(/name:\s+(\S+)/g) || [];
146
+ const podSelectors = stdout.match(/podSelector:[\s\S]*?(?=ingress:|egress:|spec:|---)/g) || [];
147
+ details = `\nPolicy details:\n${policyNames.join("\n")}`;
148
+ } catch {
149
+ // Ignore details errors
150
+ }
151
+ }
152
+
153
+ return {
154
+ success: true,
155
+ data: `NetworkPolicies${namespace ? ` in ${namespace}` : " (all namespaces)"}:\n${policies}${details}`,
156
+ };
157
+ }
158
+
159
+ case "endpoints": {
160
+ const service = target?.service;
161
+ const namespace = target?.namespace || "default";
162
+
163
+ if (!service) {
164
+ // List all endpoints
165
+ const { stdout } = await execAsync(
166
+ `kubectl get endpoints -A -o wide 2>&1`,
167
+ { timeout: 30000 }
168
+ );
169
+ return {
170
+ success: true,
171
+ data: `All endpoints:\n${stdout}`,
172
+ };
173
+ }
174
+
175
+ const { stdout: endpoints } = await execAsync(
176
+ `kubectl get endpoints ${service} -n ${namespace} -o yaml 2>&1`,
177
+ { timeout: 30000 }
178
+ );
179
+
180
+ const { stdout: svc } = await execAsync(
181
+ `kubectl get svc ${service} -n ${namespace} -o wide 2>&1`,
182
+ { timeout: 30000 }
183
+ );
184
+
185
+ return {
186
+ success: true,
187
+ data: `Service: ${service} in ${namespace}\n\n${svc}\n\nEndpoints:\n${endpoints}`,
188
+ };
189
+ }
190
+
191
+ case "trace": {
192
+ const targetHost = target?.host || target?.service;
193
+ if (!targetHost) {
194
+ return {
195
+ success: false,
196
+ data: "",
197
+ error: "Target host or service is required for trace",
198
+ };
199
+ }
200
+
201
+ const results: string[] = [`Network trace to ${targetHost}:\n`];
202
+
203
+ // Get service details if it's a service name
204
+ if (target?.service) {
205
+ const ns = target.namespace || "default";
206
+ try {
207
+ const { stdout: svc } = await execAsync(
208
+ `kubectl get svc ${target.service} -n ${ns} -o jsonpath='{.spec.clusterIP}:{.spec.ports[0].port}' 2>&1`,
209
+ { timeout: 10000 }
210
+ );
211
+ results.push(`Service ClusterIP: ${svc}`);
212
+
213
+ const { stdout: endpoints } = await execAsync(
214
+ `kubectl get endpoints ${target.service} -n ${ns} -o jsonpath='{.subsets[*].addresses[*].ip}' 2>&1`,
215
+ { timeout: 10000 }
216
+ );
217
+ results.push(`Backend pods: ${endpoints || "None"}`);
218
+ } catch {
219
+ results.push("Could not get service details");
220
+ }
221
+ }
222
+
223
+ // If we have a source pod, trace from there
224
+ if (source?.pod && source?.namespace) {
225
+ try {
226
+ const { stdout } = await execAsync(
227
+ `kubectl exec -n ${source.namespace} ${source.pod} -- traceroute -n -m 10 ${targetHost} 2>&1 || kubectl exec -n ${source.namespace} ${source.pod} -- tracepath ${targetHost} 2>&1`,
228
+ { timeout: 60000 }
229
+ );
230
+ results.push(`\nTraceroute from ${source.namespace}/${source.pod}:\n${stdout}`);
231
+ } catch (e) {
232
+ results.push(`\nTraceroute not available: ${e instanceof Error ? e.message : String(e)}`);
233
+ }
234
+ }
235
+
236
+ return {
237
+ success: true,
238
+ data: results.join("\n"),
239
+ };
240
+ }
241
+
242
+ default:
243
+ return { success: false, data: "", error: `Unknown operation: ${operation}` };
244
+ }
245
+ } catch (error) {
246
+ return {
247
+ success: false,
248
+ data: "",
249
+ error: error instanceof Error ? error.message : String(error),
250
+ };
251
+ }
252
+ }) as any,
253
+ });
@@ -0,0 +1,221 @@
1
+ import { createTool } from "@mastra/core/tools";
2
+ import { z } from "zod";
3
+ import { getPrometheusClient } from "../../integrations/prometheus/client.js";
4
+ import { getGrafanaClient } from "../../integrations/grafana/client.js";
5
+
6
+ export const prometheusTool = createTool({
7
+ id: "prometheus",
8
+ description: `Query Prometheus metrics and alerts. Use this tool to:
9
+ - Query current metric values with PromQL
10
+ - Query metric ranges over time
11
+ - Get active alerts
12
+ - Check scrape targets health
13
+
14
+ Example queries:
15
+ - CPU usage: container_cpu_usage_seconds_total{pod=~"myapp.*"}
16
+ - Memory: container_memory_usage_bytes{namespace="production"}
17
+ - Request rate: rate(http_requests_total[5m])
18
+ - Error rate: sum(rate(http_requests_total{status=~"5.."}[5m])) / sum(rate(http_requests_total[5m]))`,
19
+ inputSchema: z.object({
20
+ operation: z.enum(["query", "query_range", "alerts", "targets", "dashboards"]).describe(
21
+ "Operation: query (instant), query_range (time series), alerts (active alerts), targets (scrape health), dashboards (Grafana dashboards)"
22
+ ),
23
+ query: z.string().optional().describe("PromQL query for query/query_range operations"),
24
+ start: z.string().optional().describe("Start time for query_range (ISO 8601 or relative like '1h')"),
25
+ end: z.string().optional().describe("End time for query_range (ISO 8601 or 'now')"),
26
+ step: z.string().optional().describe("Step interval for query_range (e.g., '1m', '5m')"),
27
+ dashboardSearch: z.string().optional().describe("Search term for Grafana dashboards"),
28
+ }),
29
+ outputSchema: z.object({
30
+ success: z.boolean(),
31
+ data: z.string(),
32
+ error: z.string().optional(),
33
+ }),
34
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
35
+ execute: (async ({ operation, query, start, end, step, dashboardSearch }: any) => {
36
+
37
+ try {
38
+ const prometheusClient = getPrometheusClient();
39
+ const grafanaClient = getGrafanaClient();
40
+
41
+ switch (operation) {
42
+ case "query": {
43
+ if (!prometheusClient) {
44
+ return {
45
+ success: false,
46
+ data: "",
47
+ error: "Prometheus not configured. Set prometheus.url in config.",
48
+ };
49
+ }
50
+ if (!query) {
51
+ return { success: false, data: "", error: "Query is required for 'query' operation" };
52
+ }
53
+ const result = await prometheusClient.query(query);
54
+ return {
55
+ success: result.status === "success",
56
+ data: prometheusClient.formatQueryResult(result),
57
+ error: result.error,
58
+ };
59
+ }
60
+
61
+ case "query_range": {
62
+ if (!prometheusClient) {
63
+ return {
64
+ success: false,
65
+ data: "",
66
+ error: "Prometheus not configured. Set prometheus.url in config.",
67
+ };
68
+ }
69
+ if (!query || !start || !end) {
70
+ return {
71
+ success: false,
72
+ data: "",
73
+ error: "Query, start, and end are required for 'query_range' operation",
74
+ };
75
+ }
76
+
77
+ // Convert relative times to ISO format
78
+ const startTime = parseRelativeTime(start);
79
+ const endTime = end === "now" ? new Date().toISOString() : parseRelativeTime(end);
80
+ const stepInterval = step || "1m";
81
+
82
+ const result = await prometheusClient.queryRange(query, startTime, endTime, stepInterval);
83
+ return {
84
+ success: result.status === "success",
85
+ data: prometheusClient.formatQueryResult(result),
86
+ error: result.error,
87
+ };
88
+ }
89
+
90
+ case "alerts": {
91
+ if (!prometheusClient) {
92
+ return {
93
+ success: false,
94
+ data: "",
95
+ error: "Prometheus not configured. Set prometheus.url in config.",
96
+ };
97
+ }
98
+ const alertsData = await prometheusClient.getAlerts();
99
+ const alerts = alertsData.alerts || [];
100
+
101
+ if (alerts.length === 0) {
102
+ return { success: true, data: "No active alerts" };
103
+ }
104
+
105
+ const lines: string[] = [`Active alerts (${alerts.length}):\n`];
106
+ for (const alert of alerts) {
107
+ const labels = Object.entries(alert.labels)
108
+ .map(([k, v]) => `${k}="${v}"`)
109
+ .join(", ");
110
+ lines.push(`[${alert.state.toUpperCase()}] {${labels}}`);
111
+ if (alert.annotations.summary) {
112
+ lines.push(` Summary: ${alert.annotations.summary}`);
113
+ }
114
+ if (alert.annotations.description) {
115
+ lines.push(` Description: ${alert.annotations.description}`);
116
+ }
117
+ lines.push("");
118
+ }
119
+
120
+ return { success: true, data: lines.join("\n") };
121
+ }
122
+
123
+ case "targets": {
124
+ if (!prometheusClient) {
125
+ return {
126
+ success: false,
127
+ data: "",
128
+ error: "Prometheus not configured. Set prometheus.url in config.",
129
+ };
130
+ }
131
+ const targetsData = await prometheusClient.getTargets();
132
+ const targets = targetsData.activeTargets || [];
133
+
134
+ const grouped = new Map<string, typeof targets>();
135
+ for (const target of targets) {
136
+ const pool = target.scrapePool;
137
+ if (!grouped.has(pool)) {
138
+ grouped.set(pool, []);
139
+ }
140
+ grouped.get(pool)!.push(target);
141
+ }
142
+
143
+ const lines: string[] = [`Scrape targets (${targets.length} total):\n`];
144
+ for (const [pool, poolTargets] of grouped) {
145
+ const upCount = poolTargets.filter((t) => t.health === "up").length;
146
+ lines.push(`${pool}: ${upCount}/${poolTargets.length} up`);
147
+ for (const target of poolTargets) {
148
+ const icon = target.health === "up" ? "✓" : "✗";
149
+ lines.push(` ${icon} ${target.scrapeUrl}`);
150
+ if (target.lastError) {
151
+ lines.push(` Error: ${target.lastError}`);
152
+ }
153
+ }
154
+ lines.push("");
155
+ }
156
+
157
+ return { success: true, data: lines.join("\n") };
158
+ }
159
+
160
+ case "dashboards": {
161
+ if (!grafanaClient) {
162
+ return {
163
+ success: false,
164
+ data: "",
165
+ error: "Grafana not configured. Set grafana.url and grafana.apiKey in config.",
166
+ };
167
+ }
168
+ const dashboards = await grafanaClient.searchDashboards(dashboardSearch);
169
+
170
+ if (dashboards.length === 0) {
171
+ return { success: true, data: "No dashboards found" };
172
+ }
173
+
174
+ const lines: string[] = [`Found ${dashboards.length} dashboards:\n`];
175
+ for (const dash of dashboards) {
176
+ const tags = dash.tags.length > 0 ? ` [${dash.tags.join(", ")}]` : "";
177
+ const folder = dash.folderTitle ? ` (${dash.folderTitle})` : "";
178
+ lines.push(`- ${dash.title}${folder}${tags}`);
179
+ lines.push(` URL: ${grafanaClient.getDashboardUrl(dash.uid)}`);
180
+ }
181
+
182
+ return { success: true, data: lines.join("\n") };
183
+ }
184
+
185
+ default:
186
+ return { success: false, data: "", error: `Unknown operation: ${operation}` };
187
+ }
188
+ } catch (error) {
189
+ return {
190
+ success: false,
191
+ data: "",
192
+ error: error instanceof Error ? error.message : String(error),
193
+ };
194
+ }
195
+ }) as any,
196
+ });
197
+
198
+ function parseRelativeTime(timeStr: string): string {
199
+ // If it's already ISO format, return as-is
200
+ if (timeStr.includes("T") || timeStr.includes("-")) {
201
+ return timeStr;
202
+ }
203
+
204
+ // Parse relative time like "1h", "30m", "2d"
205
+ const match = timeStr.match(/^(\d+)([smhdw])$/);
206
+ if (!match) {
207
+ return timeStr;
208
+ }
209
+
210
+ const [, amount, unit] = match;
211
+ const now = new Date();
212
+ const ms = parseInt(amount, 10) * {
213
+ s: 1000,
214
+ m: 60 * 1000,
215
+ h: 60 * 60 * 1000,
216
+ d: 24 * 60 * 60 * 1000,
217
+ w: 7 * 24 * 60 * 60 * 1000,
218
+ }[unit as "s" | "m" | "h" | "d" | "w"]!;
219
+
220
+ return new Date(now.getTime() - ms).toISOString();
221
+ }