triagent 0.1.0-alpha8 → 0.1.0-beta2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +101 -1
- package/package.json +9 -3
- package/src/cli/config.ts +118 -2
- package/src/config.ts +23 -3
- package/src/index.ts +262 -6
- package/src/integrations/elasticsearch/client.ts +210 -0
- package/src/integrations/grafana/client.ts +186 -0
- package/src/integrations/kubernetes/multi-cluster.ts +199 -0
- package/src/integrations/kubernetes/types.ts +24 -0
- package/src/integrations/loki/client.ts +219 -0
- package/src/integrations/prometheus/client.ts +163 -0
- package/src/integrations/slack/client.ts +265 -0
- package/src/integrations/teams/client.ts +199 -0
- package/src/mastra/agents/debugger.ts +164 -109
- package/src/mastra/index.ts +2 -2
- package/src/mastra/tools/approval-store.ts +180 -0
- package/src/mastra/tools/cli.ts +94 -2
- package/src/mastra/tools/cost.ts +389 -0
- package/src/mastra/tools/logs.ts +210 -0
- package/src/mastra/tools/network.ts +253 -0
- package/src/mastra/tools/prometheus.ts +221 -0
- package/src/mastra/tools/remediation.ts +365 -0
- package/src/mastra/tools/runbook.ts +186 -0
- package/src/sandbox/bashlet.ts +76 -10
- package/src/server/routes/history.ts +207 -0
- package/src/server/routes/notifications.ts +236 -0
- package/src/server/webhook.ts +36 -2
- package/src/storage/index.ts +3 -0
- package/src/storage/investigation-history.ts +277 -0
- package/src/storage/runbook-index.ts +330 -0
- package/src/storage/types.ts +72 -0
- package/src/tui/app.tsx +278 -198
- package/src/tui/components/approval-dialog.tsx +147 -0
- package/src/tui/components/approval-modal.tsx +278 -0
- package/src/tui/components/centered-layout.tsx +33 -0
- package/src/tui/components/editor.tsx +87 -0
- package/src/tui/components/header.tsx +53 -0
- package/src/tui/components/index.ts +55 -0
- package/src/tui/components/message-item.tsx +131 -0
- package/src/tui/components/messages-panel.tsx +71 -0
- package/src/tui/components/status-badge.tsx +20 -0
- package/src/tui/components/status-bar.tsx +39 -0
- package/src/tui/components/styled-span.tsx +24 -0
- package/src/tui/components/timeline.tsx +223 -0
- package/src/tui/components/toast.tsx +104 -0
- package/src/tui/theme/index.ts +21 -0
- package/src/tui/theme/tokens.ts +180 -0
|
@@ -0,0 +1,389 @@
|
|
|
1
|
+
import { createTool } from "@mastra/core/tools";
|
|
2
|
+
import { z } from "zod";
|
|
3
|
+
import { exec } from "child_process";
|
|
4
|
+
import { promisify } from "util";
|
|
5
|
+
import type { CostAnalysisConfig } from "../../cli/config.js";
|
|
6
|
+
|
|
7
|
+
const execAsync = promisify(exec);
|
|
8
|
+
|
|
9
|
+
// Default hourly rates (USD) - based on typical cloud pricing
|
|
10
|
+
const DEFAULT_RATES = {
|
|
11
|
+
cpu: 0.03, // per vCPU-hour
|
|
12
|
+
memory: 0.004, // per GB-hour
|
|
13
|
+
storage: 0.0001, // per GB-hour
|
|
14
|
+
};
|
|
15
|
+
|
|
16
|
+
interface ResourceUsage {
|
|
17
|
+
cpuCores: number;
|
|
18
|
+
memoryGB: number;
|
|
19
|
+
storageGB: number;
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
interface CostEstimate {
|
|
23
|
+
hourly: number;
|
|
24
|
+
daily: number;
|
|
25
|
+
monthly: number;
|
|
26
|
+
breakdown: {
|
|
27
|
+
cpu: number;
|
|
28
|
+
memory: number;
|
|
29
|
+
storage: number;
|
|
30
|
+
};
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
// Store config for cost calculations
|
|
34
|
+
let costConfig: CostAnalysisConfig | null = null;
|
|
35
|
+
|
|
36
|
+
export function initCostConfig(config?: CostAnalysisConfig): void {
|
|
37
|
+
costConfig = config || null;
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
function getRates(): typeof DEFAULT_RATES {
|
|
41
|
+
if (costConfig?.hourlyRates) {
|
|
42
|
+
return {
|
|
43
|
+
cpu: costConfig.hourlyRates.cpu || DEFAULT_RATES.cpu,
|
|
44
|
+
memory: costConfig.hourlyRates.memory || DEFAULT_RATES.memory,
|
|
45
|
+
storage: costConfig.hourlyRates.storage || DEFAULT_RATES.storage,
|
|
46
|
+
};
|
|
47
|
+
}
|
|
48
|
+
return DEFAULT_RATES;
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
function calculateCost(usage: ResourceUsage): CostEstimate {
|
|
52
|
+
const rates = getRates();
|
|
53
|
+
|
|
54
|
+
const cpuCost = usage.cpuCores * rates.cpu;
|
|
55
|
+
const memoryCost = usage.memoryGB * rates.memory;
|
|
56
|
+
const storageCost = usage.storageGB * rates.storage;
|
|
57
|
+
|
|
58
|
+
const hourly = cpuCost + memoryCost + storageCost;
|
|
59
|
+
|
|
60
|
+
return {
|
|
61
|
+
hourly,
|
|
62
|
+
daily: hourly * 24,
|
|
63
|
+
monthly: hourly * 24 * 30,
|
|
64
|
+
breakdown: {
|
|
65
|
+
cpu: cpuCost,
|
|
66
|
+
memory: memoryCost,
|
|
67
|
+
storage: storageCost,
|
|
68
|
+
},
|
|
69
|
+
};
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
function formatCurrency(amount: number): string {
|
|
73
|
+
return `$${amount.toFixed(4)}`;
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
async function getResourceUsage(kind: string, name: string, namespace: string): Promise<ResourceUsage | null> {
|
|
77
|
+
try {
|
|
78
|
+
// Get resource requests/limits
|
|
79
|
+
const { stdout } = await execAsync(
|
|
80
|
+
`kubectl get ${kind.toLowerCase()} ${name} -n ${namespace} -o jsonpath='{.spec.template.spec.containers[*].resources}' 2>/dev/null || kubectl get ${kind.toLowerCase()} ${name} -n ${namespace} -o jsonpath='{.spec.containers[*].resources}' 2>/dev/null`,
|
|
81
|
+
{ timeout: 10000 }
|
|
82
|
+
);
|
|
83
|
+
|
|
84
|
+
// Parse CPU (convert from millicores to cores)
|
|
85
|
+
const cpuMatch = stdout.match(/"cpu":\s*"?(\d+)(m)?/);
|
|
86
|
+
let cpuCores = 0;
|
|
87
|
+
if (cpuMatch) {
|
|
88
|
+
cpuCores = cpuMatch[2] === "m"
|
|
89
|
+
? parseInt(cpuMatch[1], 10) / 1000
|
|
90
|
+
: parseInt(cpuMatch[1], 10);
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
// Parse Memory (convert to GB)
|
|
94
|
+
const memMatch = stdout.match(/"memory":\s*"?(\d+)([KMGTPEi]+)?/);
|
|
95
|
+
let memoryGB = 0;
|
|
96
|
+
if (memMatch) {
|
|
97
|
+
const value = parseInt(memMatch[1], 10);
|
|
98
|
+
const unit = memMatch[2] || "";
|
|
99
|
+
const multipliers: Record<string, number> = {
|
|
100
|
+
"": 1 / (1024 * 1024 * 1024),
|
|
101
|
+
"Ki": 1 / (1024 * 1024),
|
|
102
|
+
"Mi": 1 / 1024,
|
|
103
|
+
"Gi": 1,
|
|
104
|
+
"Ti": 1024,
|
|
105
|
+
};
|
|
106
|
+
memoryGB = value * (multipliers[unit] || 1 / (1024 * 1024 * 1024));
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
// Get PVC storage if any
|
|
110
|
+
const { stdout: pvcStdout } = await execAsync(
|
|
111
|
+
`kubectl get pvc -n ${namespace} -l app=${name} -o jsonpath='{.items[*].spec.resources.requests.storage}' 2>/dev/null || echo ""`,
|
|
112
|
+
{ timeout: 10000 }
|
|
113
|
+
);
|
|
114
|
+
|
|
115
|
+
let storageGB = 0;
|
|
116
|
+
const storageMatches = pvcStdout.matchAll(/(\d+)([KMGTPEi]+)?/g);
|
|
117
|
+
for (const match of storageMatches) {
|
|
118
|
+
const value = parseInt(match[1], 10);
|
|
119
|
+
const unit = match[2] || "";
|
|
120
|
+
const multipliers: Record<string, number> = {
|
|
121
|
+
"": 1 / (1024 * 1024 * 1024),
|
|
122
|
+
"Ki": 1 / (1024 * 1024),
|
|
123
|
+
"Mi": 1 / 1024,
|
|
124
|
+
"Gi": 1,
|
|
125
|
+
"Ti": 1024,
|
|
126
|
+
};
|
|
127
|
+
storageGB += value * (multipliers[unit] || 1 / (1024 * 1024 * 1024));
|
|
128
|
+
}
|
|
129
|
+
|
|
130
|
+
return { cpuCores, memoryGB, storageGB };
|
|
131
|
+
} catch {
|
|
132
|
+
return null;
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
|
|
136
|
+
async function getReplicaCount(kind: string, name: string, namespace: string): Promise<number> {
|
|
137
|
+
try {
|
|
138
|
+
const { stdout } = await execAsync(
|
|
139
|
+
`kubectl get ${kind.toLowerCase()} ${name} -n ${namespace} -o jsonpath='{.spec.replicas}' 2>/dev/null || echo "1"`,
|
|
140
|
+
{ timeout: 10000 }
|
|
141
|
+
);
|
|
142
|
+
return parseInt(stdout.trim(), 10) || 1;
|
|
143
|
+
} catch {
|
|
144
|
+
return 1;
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
export const costTool = createTool({
|
|
149
|
+
id: "cost",
|
|
150
|
+
description: `Analyze resource costs and incident impact.
|
|
151
|
+
Use this tool to:
|
|
152
|
+
- Estimate resource costs for Kubernetes workloads
|
|
153
|
+
- Calculate incident cost impact (downtime costs)
|
|
154
|
+
- Suggest cost optimization opportunities
|
|
155
|
+
|
|
156
|
+
Costs are calculated based on configured hourly rates or default cloud pricing.`,
|
|
157
|
+
inputSchema: z.object({
|
|
158
|
+
operation: z.enum(["resource", "incident", "optimization"]).describe(
|
|
159
|
+
"Operation: resource (estimate workload cost), incident (calculate downtime impact), optimization (find savings)"
|
|
160
|
+
),
|
|
161
|
+
target: z.object({
|
|
162
|
+
kind: z.string().describe("Kubernetes resource kind"),
|
|
163
|
+
name: z.string().describe("Resource name"),
|
|
164
|
+
namespace: z.string().describe("Resource namespace"),
|
|
165
|
+
}).optional().describe("Target resource for cost analysis"),
|
|
166
|
+
timeRange: z.object({
|
|
167
|
+
start: z.string().describe("Incident start time (ISO 8601)"),
|
|
168
|
+
end: z.string().optional().describe("Incident end time (ISO 8601 or 'now')"),
|
|
169
|
+
}).optional().describe("Time range for incident cost calculation"),
|
|
170
|
+
}),
|
|
171
|
+
outputSchema: z.object({
|
|
172
|
+
success: z.boolean(),
|
|
173
|
+
data: z.string(),
|
|
174
|
+
costs: z.object({
|
|
175
|
+
hourly: z.number().optional(),
|
|
176
|
+
daily: z.number().optional(),
|
|
177
|
+
monthly: z.number().optional(),
|
|
178
|
+
incident: z.number().optional(),
|
|
179
|
+
business: z.number().optional(),
|
|
180
|
+
}).optional(),
|
|
181
|
+
error: z.string().optional(),
|
|
182
|
+
}),
|
|
183
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
184
|
+
execute: (async ({ operation, target, timeRange }: any) => {
|
|
185
|
+
|
|
186
|
+
try {
|
|
187
|
+
switch (operation) {
|
|
188
|
+
case "resource": {
|
|
189
|
+
if (!target) {
|
|
190
|
+
return {
|
|
191
|
+
success: false,
|
|
192
|
+
data: "",
|
|
193
|
+
error: "Target resource is required for cost estimation",
|
|
194
|
+
};
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
const usage = await getResourceUsage(target.kind, target.name, target.namespace);
|
|
198
|
+
if (!usage) {
|
|
199
|
+
return {
|
|
200
|
+
success: false,
|
|
201
|
+
data: "",
|
|
202
|
+
error: `Could not get resource usage for ${target.kind}/${target.name}`,
|
|
203
|
+
};
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
const replicas = await getReplicaCount(target.kind, target.name, target.namespace);
|
|
207
|
+
const totalUsage: ResourceUsage = {
|
|
208
|
+
cpuCores: usage.cpuCores * replicas,
|
|
209
|
+
memoryGB: usage.memoryGB * replicas,
|
|
210
|
+
storageGB: usage.storageGB,
|
|
211
|
+
};
|
|
212
|
+
|
|
213
|
+
const cost = calculateCost(totalUsage);
|
|
214
|
+
const rates = getRates();
|
|
215
|
+
|
|
216
|
+
const lines = [
|
|
217
|
+
`💰 Cost Analysis: ${target.kind}/${target.name}`,
|
|
218
|
+
` Namespace: ${target.namespace}`,
|
|
219
|
+
` Replicas: ${replicas}`,
|
|
220
|
+
``,
|
|
221
|
+
`📊 Resource Usage (total):`,
|
|
222
|
+
` CPU: ${totalUsage.cpuCores.toFixed(2)} cores`,
|
|
223
|
+
` Memory: ${totalUsage.memoryGB.toFixed(2)} GB`,
|
|
224
|
+
` Storage: ${totalUsage.storageGB.toFixed(2)} GB`,
|
|
225
|
+
``,
|
|
226
|
+
`💵 Cost Estimate:`,
|
|
227
|
+
` Hourly: ${formatCurrency(cost.hourly)}`,
|
|
228
|
+
` Daily: ${formatCurrency(cost.daily)}`,
|
|
229
|
+
` Monthly: ${formatCurrency(cost.monthly)}`,
|
|
230
|
+
``,
|
|
231
|
+
`📈 Breakdown (hourly):`,
|
|
232
|
+
` CPU: ${formatCurrency(cost.breakdown.cpu)} (${formatCurrency(rates.cpu)}/core/hr)`,
|
|
233
|
+
` Memory: ${formatCurrency(cost.breakdown.memory)} (${formatCurrency(rates.memory)}/GB/hr)`,
|
|
234
|
+
` Storage: ${formatCurrency(cost.breakdown.storage)} (${formatCurrency(rates.storage)}/GB/hr)`,
|
|
235
|
+
];
|
|
236
|
+
|
|
237
|
+
return {
|
|
238
|
+
success: true,
|
|
239
|
+
data: lines.join("\n"),
|
|
240
|
+
costs: {
|
|
241
|
+
hourly: cost.hourly,
|
|
242
|
+
daily: cost.daily,
|
|
243
|
+
monthly: cost.monthly,
|
|
244
|
+
},
|
|
245
|
+
};
|
|
246
|
+
}
|
|
247
|
+
|
|
248
|
+
case "incident": {
|
|
249
|
+
if (!timeRange) {
|
|
250
|
+
return {
|
|
251
|
+
success: false,
|
|
252
|
+
data: "",
|
|
253
|
+
error: "Time range is required for incident cost calculation",
|
|
254
|
+
};
|
|
255
|
+
}
|
|
256
|
+
|
|
257
|
+
const startTime = new Date(timeRange.start);
|
|
258
|
+
const endTime = timeRange.end === "now" || !timeRange.end
|
|
259
|
+
? new Date()
|
|
260
|
+
: new Date(timeRange.end);
|
|
261
|
+
|
|
262
|
+
const durationMs = endTime.getTime() - startTime.getTime();
|
|
263
|
+
const durationMinutes = durationMs / 60000;
|
|
264
|
+
const durationHours = durationMs / 3600000;
|
|
265
|
+
|
|
266
|
+
// Calculate resource cost during downtime if target provided
|
|
267
|
+
let resourceCost = 0;
|
|
268
|
+
let usageLines: string[] = [];
|
|
269
|
+
|
|
270
|
+
if (target) {
|
|
271
|
+
const usage = await getResourceUsage(target.kind, target.name, target.namespace);
|
|
272
|
+
if (usage) {
|
|
273
|
+
const replicas = await getReplicaCount(target.kind, target.name, target.namespace);
|
|
274
|
+
const totalUsage: ResourceUsage = {
|
|
275
|
+
cpuCores: usage.cpuCores * replicas,
|
|
276
|
+
memoryGB: usage.memoryGB * replicas,
|
|
277
|
+
storageGB: usage.storageGB,
|
|
278
|
+
};
|
|
279
|
+
const cost = calculateCost(totalUsage);
|
|
280
|
+
resourceCost = cost.hourly * durationHours;
|
|
281
|
+
usageLines = [
|
|
282
|
+
` Affected: ${target.kind}/${target.name}`,
|
|
283
|
+
` Resource cost during incident: ${formatCurrency(resourceCost)}`,
|
|
284
|
+
];
|
|
285
|
+
}
|
|
286
|
+
}
|
|
287
|
+
|
|
288
|
+
// Calculate business impact if configured
|
|
289
|
+
let businessImpact = 0;
|
|
290
|
+
let businessLines: string[] = [];
|
|
291
|
+
|
|
292
|
+
if (costConfig?.businessImpact?.revenuePerMinute) {
|
|
293
|
+
businessImpact = costConfig.businessImpact.revenuePerMinute * durationMinutes;
|
|
294
|
+
businessLines = [
|
|
295
|
+
``,
|
|
296
|
+
`📉 Business Impact:`,
|
|
297
|
+
` Revenue rate: ${formatCurrency(costConfig.businessImpact.revenuePerMinute)}/min`,
|
|
298
|
+
` Estimated lost revenue: ${formatCurrency(businessImpact)}`,
|
|
299
|
+
];
|
|
300
|
+
}
|
|
301
|
+
|
|
302
|
+
const totalCost = resourceCost + businessImpact;
|
|
303
|
+
|
|
304
|
+
const lines = [
|
|
305
|
+
`⏱️ Incident Duration Analysis`,
|
|
306
|
+
``,
|
|
307
|
+
`📅 Time Range:`,
|
|
308
|
+
` Start: ${startTime.toISOString()}`,
|
|
309
|
+
` End: ${endTime.toISOString()}`,
|
|
310
|
+
` Duration: ${durationMinutes.toFixed(0)} minutes (${durationHours.toFixed(2)} hours)`,
|
|
311
|
+
``,
|
|
312
|
+
`💰 Resource Cost:`,
|
|
313
|
+
...usageLines,
|
|
314
|
+
...businessLines,
|
|
315
|
+
``,
|
|
316
|
+
`📊 Total Incident Cost: ${formatCurrency(totalCost)}`,
|
|
317
|
+
];
|
|
318
|
+
|
|
319
|
+
return {
|
|
320
|
+
success: true,
|
|
321
|
+
data: lines.join("\n"),
|
|
322
|
+
costs: {
|
|
323
|
+
incident: resourceCost,
|
|
324
|
+
business: businessImpact,
|
|
325
|
+
},
|
|
326
|
+
};
|
|
327
|
+
}
|
|
328
|
+
|
|
329
|
+
case "optimization": {
|
|
330
|
+
// Get all deployments and analyze for optimization opportunities
|
|
331
|
+
const { stdout } = await execAsync(
|
|
332
|
+
`kubectl get deployments -A -o jsonpath='{range .items[*]}{.metadata.namespace},{.metadata.name},{.spec.replicas},{.spec.template.spec.containers[0].resources.requests.cpu},{.spec.template.spec.containers[0].resources.requests.memory}{" "}' 2>/dev/null || echo ""`,
|
|
333
|
+
{ timeout: 30000 }
|
|
334
|
+
);
|
|
335
|
+
|
|
336
|
+
const suggestions: string[] = [];
|
|
337
|
+
const items = stdout.trim().split(" ").filter(Boolean);
|
|
338
|
+
|
|
339
|
+
for (const item of items) {
|
|
340
|
+
const [namespace, name, replicas, cpu, memory] = item.split(",");
|
|
341
|
+
|
|
342
|
+
// Check for over-provisioned resources
|
|
343
|
+
if (parseInt(replicas, 10) > 3) {
|
|
344
|
+
suggestions.push(`• ${namespace}/${name}: Consider autoscaling (currently ${replicas} replicas)`);
|
|
345
|
+
}
|
|
346
|
+
|
|
347
|
+
// Check for missing resource requests
|
|
348
|
+
if (!cpu || !memory) {
|
|
349
|
+
suggestions.push(`• ${namespace}/${name}: Add resource requests for better scheduling`);
|
|
350
|
+
}
|
|
351
|
+
}
|
|
352
|
+
|
|
353
|
+
if (suggestions.length === 0) {
|
|
354
|
+
return {
|
|
355
|
+
success: true,
|
|
356
|
+
data: "✅ No obvious cost optimization opportunities found.\n\nConsider:\n- Reviewing unused PVCs\n- Right-sizing node pools\n- Using spot/preemptible instances",
|
|
357
|
+
};
|
|
358
|
+
}
|
|
359
|
+
|
|
360
|
+
const lines = [
|
|
361
|
+
`💡 Cost Optimization Suggestions:`,
|
|
362
|
+
``,
|
|
363
|
+
...suggestions,
|
|
364
|
+
``,
|
|
365
|
+
`General recommendations:`,
|
|
366
|
+
`- Review unused PVCs and delete if not needed`,
|
|
367
|
+
`- Consider using horizontal pod autoscaling`,
|
|
368
|
+
`- Use spot/preemptible instances for non-critical workloads`,
|
|
369
|
+
`- Right-size resource requests based on actual usage`,
|
|
370
|
+
];
|
|
371
|
+
|
|
372
|
+
return {
|
|
373
|
+
success: true,
|
|
374
|
+
data: lines.join("\n"),
|
|
375
|
+
};
|
|
376
|
+
}
|
|
377
|
+
|
|
378
|
+
default:
|
|
379
|
+
return { success: false, data: "", error: `Unknown operation: ${operation}` };
|
|
380
|
+
}
|
|
381
|
+
} catch (error) {
|
|
382
|
+
return {
|
|
383
|
+
success: false,
|
|
384
|
+
data: "",
|
|
385
|
+
error: error instanceof Error ? error.message : String(error),
|
|
386
|
+
};
|
|
387
|
+
}
|
|
388
|
+
}) as any,
|
|
389
|
+
});
|
|
@@ -0,0 +1,210 @@
|
|
|
1
|
+
import { createTool } from "@mastra/core/tools";
|
|
2
|
+
import { z } from "zod";
|
|
3
|
+
import { getElasticsearchClient } from "../../integrations/elasticsearch/client.js";
|
|
4
|
+
import { getLokiClient } from "../../integrations/loki/client.js";
|
|
5
|
+
|
|
6
|
+
export const logsTool = createTool({
|
|
7
|
+
id: "logs",
|
|
8
|
+
description: `Search and aggregate logs from centralized log systems (Elasticsearch or Loki).
|
|
9
|
+
Use this tool for:
|
|
10
|
+
- Searching logs beyond kubectl retention
|
|
11
|
+
- Cross-pod log correlation
|
|
12
|
+
- Historical log analysis
|
|
13
|
+
- Log aggregation and statistics
|
|
14
|
+
|
|
15
|
+
The query syntax depends on the configured log provider:
|
|
16
|
+
- Elasticsearch: Lucene query syntax (e.g., "error AND pod:myapp*")
|
|
17
|
+
- Loki: LogQL syntax (e.g., '{namespace="production"} |= "error"')`,
|
|
18
|
+
inputSchema: z.object({
|
|
19
|
+
operation: z.enum(["search", "tail", "aggregate"]).describe(
|
|
20
|
+
"Operation: search (query logs), tail (recent logs), aggregate (group by field)"
|
|
21
|
+
),
|
|
22
|
+
query: z.string().describe(
|
|
23
|
+
"Search query. For ES use Lucene syntax, for Loki use LogQL"
|
|
24
|
+
),
|
|
25
|
+
timeRange: z.object({
|
|
26
|
+
start: z.string().describe("Start time (ISO 8601 or relative like '1h')"),
|
|
27
|
+
end: z.string().optional().describe("End time (ISO 8601, 'now', or omit for current)"),
|
|
28
|
+
}).optional().describe("Time range for search"),
|
|
29
|
+
limit: z.number().default(100).describe("Maximum number of logs to return"),
|
|
30
|
+
aggregateField: z.string().optional().describe("Field to aggregate by (for aggregate operation)"),
|
|
31
|
+
}),
|
|
32
|
+
outputSchema: z.object({
|
|
33
|
+
success: z.boolean(),
|
|
34
|
+
data: z.string(),
|
|
35
|
+
count: z.number().optional(),
|
|
36
|
+
error: z.string().optional(),
|
|
37
|
+
}),
|
|
38
|
+
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
39
|
+
execute: (async ({ operation, query, timeRange, limit, aggregateField }: any) => {
|
|
40
|
+
|
|
41
|
+
try {
|
|
42
|
+
const esClient = getElasticsearchClient();
|
|
43
|
+
const lokiClient = getLokiClient();
|
|
44
|
+
|
|
45
|
+
// Determine which client to use
|
|
46
|
+
const client = esClient || lokiClient;
|
|
47
|
+
const clientType = esClient ? "elasticsearch" : lokiClient ? "loki" : null;
|
|
48
|
+
|
|
49
|
+
if (!client) {
|
|
50
|
+
return {
|
|
51
|
+
success: false,
|
|
52
|
+
data: "",
|
|
53
|
+
error: "No log provider configured. Set elasticsearch or loki in config.",
|
|
54
|
+
};
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
switch (operation) {
|
|
58
|
+
case "search": {
|
|
59
|
+
if (clientType === "elasticsearch") {
|
|
60
|
+
const logs = await esClient!.search({
|
|
61
|
+
query,
|
|
62
|
+
timeRange: timeRange ? {
|
|
63
|
+
start: parseRelativeTime(timeRange.start),
|
|
64
|
+
end: timeRange.end ? parseRelativeTime(timeRange.end) : undefined,
|
|
65
|
+
} : undefined,
|
|
66
|
+
limit,
|
|
67
|
+
});
|
|
68
|
+
|
|
69
|
+
return {
|
|
70
|
+
success: true,
|
|
71
|
+
data: logs.length > 0 ? esClient!.formatLogs(logs) : "No logs found",
|
|
72
|
+
count: logs.length,
|
|
73
|
+
};
|
|
74
|
+
} else {
|
|
75
|
+
const logs = await lokiClient!.query({
|
|
76
|
+
query,
|
|
77
|
+
start: timeRange?.start,
|
|
78
|
+
end: timeRange?.end,
|
|
79
|
+
limit,
|
|
80
|
+
});
|
|
81
|
+
|
|
82
|
+
return {
|
|
83
|
+
success: true,
|
|
84
|
+
data: logs.length > 0 ? lokiClient!.formatLogs(logs) : "No logs found",
|
|
85
|
+
count: logs.length,
|
|
86
|
+
};
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
case "tail": {
|
|
91
|
+
if (clientType === "elasticsearch") {
|
|
92
|
+
// For ES, tail is just search with latest logs
|
|
93
|
+
const logs = await esClient!.search({
|
|
94
|
+
query,
|
|
95
|
+
limit,
|
|
96
|
+
sort: "desc",
|
|
97
|
+
});
|
|
98
|
+
|
|
99
|
+
return {
|
|
100
|
+
success: true,
|
|
101
|
+
data: logs.length > 0 ? esClient!.formatLogs(logs) : "No recent logs",
|
|
102
|
+
count: logs.length,
|
|
103
|
+
};
|
|
104
|
+
} else {
|
|
105
|
+
const logs = await lokiClient!.tail({
|
|
106
|
+
query,
|
|
107
|
+
limit,
|
|
108
|
+
});
|
|
109
|
+
|
|
110
|
+
return {
|
|
111
|
+
success: true,
|
|
112
|
+
data: logs.length > 0 ? lokiClient!.formatLogs(logs) : "No recent logs",
|
|
113
|
+
count: logs.length,
|
|
114
|
+
};
|
|
115
|
+
}
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
case "aggregate": {
|
|
119
|
+
if (!aggregateField) {
|
|
120
|
+
return {
|
|
121
|
+
success: false,
|
|
122
|
+
data: "",
|
|
123
|
+
error: "aggregateField is required for aggregate operation",
|
|
124
|
+
};
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
if (clientType === "elasticsearch") {
|
|
128
|
+
const aggregations = await esClient!.aggregate({
|
|
129
|
+
query,
|
|
130
|
+
field: aggregateField,
|
|
131
|
+
timeRange: timeRange ? {
|
|
132
|
+
start: parseRelativeTime(timeRange.start),
|
|
133
|
+
end: timeRange.end ? parseRelativeTime(timeRange.end) : undefined,
|
|
134
|
+
} : undefined,
|
|
135
|
+
});
|
|
136
|
+
|
|
137
|
+
const lines = [`Aggregation by ${aggregateField}:\n`];
|
|
138
|
+
for (const agg of aggregations) {
|
|
139
|
+
lines.push(` ${agg.key}: ${agg.count} logs`);
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
return {
|
|
143
|
+
success: true,
|
|
144
|
+
data: lines.join("\n"),
|
|
145
|
+
count: aggregations.length,
|
|
146
|
+
};
|
|
147
|
+
} else {
|
|
148
|
+
// Loki doesn't have native aggregations, use label cardinality
|
|
149
|
+
const series = await lokiClient!.getSeries([query]);
|
|
150
|
+
const counts = new Map<string, number>();
|
|
151
|
+
|
|
152
|
+
for (const s of series) {
|
|
153
|
+
const value = s[aggregateField] || "unknown";
|
|
154
|
+
counts.set(value, (counts.get(value) || 0) + 1);
|
|
155
|
+
}
|
|
156
|
+
|
|
157
|
+
const lines = [`Aggregation by ${aggregateField}:\n`];
|
|
158
|
+
for (const [key, count] of counts) {
|
|
159
|
+
lines.push(` ${key}: ${count} streams`);
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
return {
|
|
163
|
+
success: true,
|
|
164
|
+
data: lines.join("\n"),
|
|
165
|
+
count: counts.size,
|
|
166
|
+
};
|
|
167
|
+
}
|
|
168
|
+
}
|
|
169
|
+
|
|
170
|
+
default:
|
|
171
|
+
return { success: false, data: "", error: `Unknown operation: ${operation}` };
|
|
172
|
+
}
|
|
173
|
+
} catch (error) {
|
|
174
|
+
return {
|
|
175
|
+
success: false,
|
|
176
|
+
data: "",
|
|
177
|
+
error: error instanceof Error ? error.message : String(error),
|
|
178
|
+
};
|
|
179
|
+
}
|
|
180
|
+
}) as any,
|
|
181
|
+
});
|
|
182
|
+
|
|
183
|
+
function parseRelativeTime(timeStr: string): string {
|
|
184
|
+
if (timeStr === "now") {
|
|
185
|
+
return new Date().toISOString();
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
// If it's already ISO format, return as-is
|
|
189
|
+
if (timeStr.includes("T") || timeStr.includes("-")) {
|
|
190
|
+
return timeStr;
|
|
191
|
+
}
|
|
192
|
+
|
|
193
|
+
// Parse relative time like "1h", "30m", "2d"
|
|
194
|
+
const match = timeStr.match(/^(\d+)([smhdw])$/);
|
|
195
|
+
if (!match) {
|
|
196
|
+
return timeStr;
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
const [, amount, unit] = match;
|
|
200
|
+
const now = new Date();
|
|
201
|
+
const ms = parseInt(amount, 10) * {
|
|
202
|
+
s: 1000,
|
|
203
|
+
m: 60 * 1000,
|
|
204
|
+
h: 60 * 60 * 1000,
|
|
205
|
+
d: 24 * 60 * 60 * 1000,
|
|
206
|
+
w: 7 * 24 * 60 * 60 * 1000,
|
|
207
|
+
}[unit as "s" | "m" | "h" | "d" | "w"]!;
|
|
208
|
+
|
|
209
|
+
return new Date(now.getTime() - ms).toISOString();
|
|
210
|
+
}
|