kubeagent 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +72 -0
- package/README.md +154 -0
- package/dist/auth.d.ts +23 -0
- package/dist/auth.js +162 -0
- package/dist/cli.d.ts +2 -0
- package/dist/cli.js +447 -0
- package/dist/config.d.ts +50 -0
- package/dist/config.js +79 -0
- package/dist/debug.d.ts +10 -0
- package/dist/debug.js +18 -0
- package/dist/diagnoser/index.d.ts +17 -0
- package/dist/diagnoser/index.js +251 -0
- package/dist/diagnoser/tools.d.ts +119 -0
- package/dist/diagnoser/tools.js +108 -0
- package/dist/kb/loader.d.ts +1 -0
- package/dist/kb/loader.js +41 -0
- package/dist/kb/writer.d.ts +11 -0
- package/dist/kb/writer.js +36 -0
- package/dist/kubectl-config.d.ts +7 -0
- package/dist/kubectl-config.js +47 -0
- package/dist/kubectl.d.ts +13 -0
- package/dist/kubectl.js +57 -0
- package/dist/monitor/checks.d.ts +71 -0
- package/dist/monitor/checks.js +167 -0
- package/dist/monitor/index.d.ts +7 -0
- package/dist/monitor/index.js +126 -0
- package/dist/monitor/types.d.ts +11 -0
- package/dist/monitor/types.js +1 -0
- package/dist/notify/index.d.ts +5 -0
- package/dist/notify/index.js +40 -0
- package/dist/notify/setup.d.ts +4 -0
- package/dist/notify/setup.js +88 -0
- package/dist/notify/slack.d.ts +4 -0
- package/dist/notify/slack.js +76 -0
- package/dist/notify/telegram.d.ts +8 -0
- package/dist/notify/telegram.js +63 -0
- package/dist/notify/webhook.d.ts +3 -0
- package/dist/notify/webhook.js +49 -0
- package/dist/onboard/cluster-scan.d.ts +42 -0
- package/dist/onboard/cluster-scan.js +103 -0
- package/dist/onboard/code-scan.d.ts +9 -0
- package/dist/onboard/code-scan.js +114 -0
- package/dist/onboard/index.d.ts +1 -0
- package/dist/onboard/index.js +328 -0
- package/dist/onboard/interview.d.ts +12 -0
- package/dist/onboard/interview.js +71 -0
- package/dist/onboard/project-matcher.d.ts +25 -0
- package/dist/onboard/project-matcher.js +149 -0
- package/dist/orchestrator.d.ts +3 -0
- package/dist/orchestrator.js +222 -0
- package/dist/proxy-client.d.ts +15 -0
- package/dist/proxy-client.js +72 -0
- package/dist/render.d.ts +5 -0
- package/dist/render.js +143 -0
- package/dist/verifier.d.ts +9 -0
- package/dist/verifier.js +17 -0
- package/package.json +39 -0
package/dist/kubectl.js
ADDED
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
import { execFile } from "node:child_process";
|
|
2
|
+
import { promisify } from "node:util";
|
|
3
|
+
import { dbg } from "./debug.js";
|
|
4
|
+
const execFileAsync = promisify(execFile);
|
|
5
|
+
export class KubectlError extends Error {
|
|
6
|
+
stderr;
|
|
7
|
+
exitCode;
|
|
8
|
+
constructor(message, stderr, exitCode) {
|
|
9
|
+
super(message);
|
|
10
|
+
this.stderr = stderr;
|
|
11
|
+
this.exitCode = exitCode;
|
|
12
|
+
this.name = "KubectlError";
|
|
13
|
+
}
|
|
14
|
+
}
|
|
15
|
+
export function parseKubectlJson(raw) {
|
|
16
|
+
if (!raw.trim()) {
|
|
17
|
+
throw new KubectlError("Empty kubectl output");
|
|
18
|
+
}
|
|
19
|
+
try {
|
|
20
|
+
return JSON.parse(raw);
|
|
21
|
+
}
|
|
22
|
+
catch {
|
|
23
|
+
throw new KubectlError(`Failed to parse kubectl output: ${raw.slice(0, 200)}`);
|
|
24
|
+
}
|
|
25
|
+
}
|
|
26
|
+
export async function kubectl(args, options = {}) {
|
|
27
|
+
const fullArgs = [...args];
|
|
28
|
+
if (options.context) {
|
|
29
|
+
fullArgs.unshift("--context", options.context);
|
|
30
|
+
}
|
|
31
|
+
if (options.namespace) {
|
|
32
|
+
fullArgs.unshift("-n", options.namespace);
|
|
33
|
+
}
|
|
34
|
+
dbg("kubectl", `kubectl ${fullArgs.join(" ")}`);
|
|
35
|
+
try {
|
|
36
|
+
const { stdout, stderr } = await execFileAsync("kubectl", fullArgs, {
|
|
37
|
+
timeout: (options.timeout ?? 30) * 1000,
|
|
38
|
+
maxBuffer: 10 * 1024 * 1024,
|
|
39
|
+
});
|
|
40
|
+
dbg("kubectl", `exit 0, stdout ${stdout.length}B${stderr ? `, stderr: ${stderr.slice(0, 200)}` : ""}`);
|
|
41
|
+
if (stderr && !stdout) {
|
|
42
|
+
throw new KubectlError(`kubectl error: ${stderr}`, stderr);
|
|
43
|
+
}
|
|
44
|
+
return stdout;
|
|
45
|
+
}
|
|
46
|
+
catch (err) {
|
|
47
|
+
if (err instanceof KubectlError)
|
|
48
|
+
throw err;
|
|
49
|
+
const e = err;
|
|
50
|
+
dbg("kubectl", `failed: ${e.message}`, { stderr: e.stderr, code: e.code });
|
|
51
|
+
throw new KubectlError(e.message ?? "kubectl failed", e.stderr, e.code);
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
export async function kubectlJson(args, options = {}) {
|
|
55
|
+
const output = await kubectl([...args, "-o", "json"], options);
|
|
56
|
+
return parseKubectlJson(output);
|
|
57
|
+
}
|
|
@@ -0,0 +1,71 @@
|
|
|
1
|
+
import type { Issue } from "./types.js";
|
|
2
|
+
interface PodStatus {
|
|
3
|
+
metadata: {
|
|
4
|
+
name: string;
|
|
5
|
+
namespace: string;
|
|
6
|
+
};
|
|
7
|
+
status: {
|
|
8
|
+
phase: string;
|
|
9
|
+
reason?: string;
|
|
10
|
+
containerStatuses?: Array<{
|
|
11
|
+
name: string;
|
|
12
|
+
state: {
|
|
13
|
+
waiting?: {
|
|
14
|
+
reason?: string;
|
|
15
|
+
};
|
|
16
|
+
terminated?: {
|
|
17
|
+
reason?: string;
|
|
18
|
+
};
|
|
19
|
+
running?: Record<string, unknown>;
|
|
20
|
+
};
|
|
21
|
+
lastState?: {
|
|
22
|
+
terminated?: {
|
|
23
|
+
reason?: string;
|
|
24
|
+
};
|
|
25
|
+
};
|
|
26
|
+
restartCount: number;
|
|
27
|
+
}>;
|
|
28
|
+
};
|
|
29
|
+
}
|
|
30
|
+
interface NodeStatus {
|
|
31
|
+
metadata: {
|
|
32
|
+
name: string;
|
|
33
|
+
};
|
|
34
|
+
status: {
|
|
35
|
+
conditions: Array<{
|
|
36
|
+
type: string;
|
|
37
|
+
status: string;
|
|
38
|
+
reason?: string;
|
|
39
|
+
}>;
|
|
40
|
+
};
|
|
41
|
+
}
|
|
42
|
+
export declare function findPodIssues(podList: {
|
|
43
|
+
items: PodStatus[];
|
|
44
|
+
}): Issue[];
|
|
45
|
+
interface JobStatus {
|
|
46
|
+
metadata: {
|
|
47
|
+
name: string;
|
|
48
|
+
namespace: string;
|
|
49
|
+
};
|
|
50
|
+
status: {
|
|
51
|
+
failed?: number;
|
|
52
|
+
succeeded?: number;
|
|
53
|
+
active?: number;
|
|
54
|
+
conditions?: Array<{
|
|
55
|
+
type: string;
|
|
56
|
+
status: string;
|
|
57
|
+
reason?: string;
|
|
58
|
+
message?: string;
|
|
59
|
+
}>;
|
|
60
|
+
};
|
|
61
|
+
spec?: {
|
|
62
|
+
backoffLimit?: number;
|
|
63
|
+
};
|
|
64
|
+
}
|
|
65
|
+
export declare function findJobIssues(jobList: {
|
|
66
|
+
items: JobStatus[];
|
|
67
|
+
}): Issue[];
|
|
68
|
+
export declare function findNodeIssues(nodeList: {
|
|
69
|
+
items: NodeStatus[];
|
|
70
|
+
}): Issue[];
|
|
71
|
+
export {};
|
|
@@ -0,0 +1,167 @@
|
|
|
1
|
+
export function findPodIssues(podList) {
|
|
2
|
+
const issues = [];
|
|
3
|
+
const now = new Date();
|
|
4
|
+
for (const pod of podList.items) {
|
|
5
|
+
const ns = pod.metadata.namespace;
|
|
6
|
+
const name = pod.metadata.name;
|
|
7
|
+
// Check Pending phase
|
|
8
|
+
if (pod.status.phase === "Pending") {
|
|
9
|
+
issues.push({
|
|
10
|
+
kind: "pod_pending",
|
|
11
|
+
severity: "warning",
|
|
12
|
+
namespace: ns,
|
|
13
|
+
resource: name,
|
|
14
|
+
message: `Pod ${name} is Pending in ${ns}`,
|
|
15
|
+
details: { phase: pod.status.phase },
|
|
16
|
+
timestamp: now,
|
|
17
|
+
});
|
|
18
|
+
continue;
|
|
19
|
+
}
|
|
20
|
+
// Check Failed phase (includes Evicted)
|
|
21
|
+
if (pod.status.phase === "Failed") {
|
|
22
|
+
if (pod.status.reason === "Evicted") {
|
|
23
|
+
issues.push({
|
|
24
|
+
kind: "pod_evicted",
|
|
25
|
+
severity: "warning",
|
|
26
|
+
namespace: ns,
|
|
27
|
+
resource: name,
|
|
28
|
+
message: `Pod ${name} was Evicted in ${ns}`,
|
|
29
|
+
details: { reason: "Evicted" },
|
|
30
|
+
timestamp: now,
|
|
31
|
+
});
|
|
32
|
+
}
|
|
33
|
+
else {
|
|
34
|
+
issues.push({
|
|
35
|
+
kind: "pod_failed",
|
|
36
|
+
severity: "critical",
|
|
37
|
+
namespace: ns,
|
|
38
|
+
resource: name,
|
|
39
|
+
message: `Pod ${name} is in Failed phase in ${ns}`,
|
|
40
|
+
details: { phase: "Failed", reason: pod.status.reason },
|
|
41
|
+
timestamp: now,
|
|
42
|
+
});
|
|
43
|
+
}
|
|
44
|
+
continue;
|
|
45
|
+
}
|
|
46
|
+
// Check Unknown phase
|
|
47
|
+
if (pod.status.phase === "Unknown") {
|
|
48
|
+
issues.push({
|
|
49
|
+
kind: "pod_failed",
|
|
50
|
+
severity: "critical",
|
|
51
|
+
namespace: ns,
|
|
52
|
+
resource: name,
|
|
53
|
+
message: `Pod ${name} is in Unknown phase in ${ns} (node may be unreachable)`,
|
|
54
|
+
details: { phase: "Unknown" },
|
|
55
|
+
timestamp: now,
|
|
56
|
+
});
|
|
57
|
+
continue;
|
|
58
|
+
}
|
|
59
|
+
for (const cs of pod.status.containerStatuses ?? []) {
|
|
60
|
+
const waitReason = cs.state.waiting?.reason;
|
|
61
|
+
const lastTermReason = cs.lastState?.terminated?.reason;
|
|
62
|
+
if (waitReason === "CrashLoopBackOff") {
|
|
63
|
+
issues.push({
|
|
64
|
+
kind: "pod_crashloop",
|
|
65
|
+
severity: "critical",
|
|
66
|
+
namespace: ns,
|
|
67
|
+
resource: name,
|
|
68
|
+
message: `Container ${cs.name} in ${name} is CrashLoopBackOff (${cs.restartCount} restarts)`,
|
|
69
|
+
details: { container: cs.name, restartCount: cs.restartCount },
|
|
70
|
+
timestamp: now,
|
|
71
|
+
});
|
|
72
|
+
}
|
|
73
|
+
else if (waitReason === "ImagePullBackOff" || waitReason === "ErrImagePull") {
|
|
74
|
+
issues.push({
|
|
75
|
+
kind: "pod_image_pull",
|
|
76
|
+
severity: "critical",
|
|
77
|
+
namespace: ns,
|
|
78
|
+
resource: name,
|
|
79
|
+
message: `Container ${cs.name} in ${name} has ${waitReason}`,
|
|
80
|
+
details: { container: cs.name, reason: waitReason },
|
|
81
|
+
timestamp: now,
|
|
82
|
+
});
|
|
83
|
+
}
|
|
84
|
+
else if (lastTermReason === "OOMKilled") {
|
|
85
|
+
issues.push({
|
|
86
|
+
kind: "pod_oom",
|
|
87
|
+
severity: "critical",
|
|
88
|
+
namespace: ns,
|
|
89
|
+
resource: name,
|
|
90
|
+
message: `Container ${cs.name} in ${name} was OOMKilled (${cs.restartCount} restarts)`,
|
|
91
|
+
details: { container: cs.name, restartCount: cs.restartCount },
|
|
92
|
+
timestamp: now,
|
|
93
|
+
});
|
|
94
|
+
}
|
|
95
|
+
else if (waitReason === "Error" || cs.state.terminated?.reason === "Error") {
|
|
96
|
+
issues.push({
|
|
97
|
+
kind: "pod_error",
|
|
98
|
+
severity: "warning",
|
|
99
|
+
namespace: ns,
|
|
100
|
+
resource: name,
|
|
101
|
+
message: `Container ${cs.name} in ${name} is in Error state`,
|
|
102
|
+
details: { container: cs.name },
|
|
103
|
+
timestamp: now,
|
|
104
|
+
});
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
return issues;
|
|
109
|
+
}
|
|
110
|
+
export function findJobIssues(jobList) {
|
|
111
|
+
const issues = [];
|
|
112
|
+
const now = new Date();
|
|
113
|
+
for (const job of jobList.items) {
|
|
114
|
+
const ns = job.metadata.namespace;
|
|
115
|
+
const name = job.metadata.name;
|
|
116
|
+
const failedCondition = job.status.conditions?.find((c) => c.type === "Failed" && c.status === "True");
|
|
117
|
+
if (failedCondition) {
|
|
118
|
+
issues.push({
|
|
119
|
+
kind: "job_failed",
|
|
120
|
+
severity: "critical",
|
|
121
|
+
namespace: ns,
|
|
122
|
+
resource: name,
|
|
123
|
+
message: `Job ${name} failed in ${ns}: ${failedCondition.reason ?? "unknown reason"}`,
|
|
124
|
+
details: {
|
|
125
|
+
reason: failedCondition.reason,
|
|
126
|
+
message: failedCondition.message,
|
|
127
|
+
failed: job.status.failed ?? 0,
|
|
128
|
+
},
|
|
129
|
+
timestamp: now,
|
|
130
|
+
});
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
return issues;
|
|
134
|
+
}
|
|
135
|
+
export function findNodeIssues(nodeList) {
|
|
136
|
+
const issues = [];
|
|
137
|
+
const now = new Date();
|
|
138
|
+
const pressureTypes = ["MemoryPressure", "DiskPressure", "PIDPressure"];
|
|
139
|
+
for (const node of nodeList.items) {
|
|
140
|
+
const name = node.metadata.name;
|
|
141
|
+
for (const cond of node.status.conditions) {
|
|
142
|
+
if (cond.type === "Ready" && cond.status !== "True") {
|
|
143
|
+
issues.push({
|
|
144
|
+
kind: "node_not_ready",
|
|
145
|
+
severity: "critical",
|
|
146
|
+
namespace: "",
|
|
147
|
+
resource: name,
|
|
148
|
+
message: `Node ${name} is NotReady: ${cond.reason ?? "unknown"}`,
|
|
149
|
+
details: { reason: cond.reason },
|
|
150
|
+
timestamp: now,
|
|
151
|
+
});
|
|
152
|
+
}
|
|
153
|
+
if (pressureTypes.includes(cond.type) && cond.status === "True") {
|
|
154
|
+
issues.push({
|
|
155
|
+
kind: "node_pressure",
|
|
156
|
+
severity: "warning",
|
|
157
|
+
namespace: "",
|
|
158
|
+
resource: name,
|
|
159
|
+
message: `Node ${name} has ${cond.type}`,
|
|
160
|
+
details: { conditionType: cond.type },
|
|
161
|
+
timestamp: now,
|
|
162
|
+
});
|
|
163
|
+
}
|
|
164
|
+
}
|
|
165
|
+
}
|
|
166
|
+
return issues;
|
|
167
|
+
}
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
import { type KubectlOptions } from "../kubectl.js";
|
|
2
|
+
import type { Issue } from "./types.js";
|
|
3
|
+
export type IssueCallback = (issues: Issue[]) => void | Promise<void>;
|
|
4
|
+
export declare function runChecks(options: KubectlOptions): Promise<Issue[]>;
|
|
5
|
+
export declare function startMonitor(options: KubectlOptions, intervalMs: number, onIssues: IssueCallback): {
|
|
6
|
+
stop: () => void;
|
|
7
|
+
};
|
|
@@ -0,0 +1,126 @@
|
|
|
1
|
+
import { kubectlJson } from "../kubectl.js";
|
|
2
|
+
import { findPodIssues, findNodeIssues, findJobIssues } from "./checks.js";
|
|
3
|
+
import chalk from "chalk";
|
|
4
|
+
function deduplicateIssues(issues) {
|
|
5
|
+
// First pass: deduplicate exact duplicates
|
|
6
|
+
const seen = new Set();
|
|
7
|
+
const deduped = issues.filter((issue) => {
|
|
8
|
+
const key = `${issue.kind}:${issue.namespace}:${issue.resource}`;
|
|
9
|
+
if (seen.has(key))
|
|
10
|
+
return false;
|
|
11
|
+
seen.add(key);
|
|
12
|
+
return true;
|
|
13
|
+
});
|
|
14
|
+
// Second pass: group multiple pods of the same kind in the same namespace.
|
|
15
|
+
// If there are N pods with the same kind+namespace, keep only the first and
|
|
16
|
+
// annotate its message so the agent knows it's representative.
|
|
17
|
+
const groupCounts = new Map();
|
|
18
|
+
for (const issue of deduped) {
|
|
19
|
+
const gk = `${issue.kind}:${issue.namespace}`;
|
|
20
|
+
groupCounts.set(gk, (groupCounts.get(gk) ?? 0) + 1);
|
|
21
|
+
}
|
|
22
|
+
const groupSeen = new Set();
|
|
23
|
+
return deduped.filter((issue) => {
|
|
24
|
+
const gk = `${issue.kind}:${issue.namespace}`;
|
|
25
|
+
const count = groupCounts.get(gk) ?? 1;
|
|
26
|
+
if (count <= 1)
|
|
27
|
+
return true;
|
|
28
|
+
if (groupSeen.has(gk))
|
|
29
|
+
return false;
|
|
30
|
+
groupSeen.add(gk);
|
|
31
|
+
// Annotate the representative issue
|
|
32
|
+
issue.message = `${issue.message} (+${count - 1} similar in ${issue.namespace})`;
|
|
33
|
+
return true;
|
|
34
|
+
});
|
|
35
|
+
}
|
|
36
|
+
export async function runChecks(options) {
|
|
37
|
+
const allIssues = [];
|
|
38
|
+
// Get all pods across namespaces
|
|
39
|
+
const podList = (await kubectlJson(["get", "pods", "--all-namespaces"], options));
|
|
40
|
+
allIssues.push(...findPodIssues(podList));
|
|
41
|
+
// Get nodes
|
|
42
|
+
const nodeList = (await kubectlJson(["get", "nodes"], options));
|
|
43
|
+
allIssues.push(...findNodeIssues(nodeList));
|
|
44
|
+
// Get failed jobs
|
|
45
|
+
const jobList = (await kubectlJson(["get", "jobs", "--all-namespaces", "--field-selector=status.failed>0"], options).catch(() => ({ items: [] })));
|
|
46
|
+
allIssues.push(...findJobIssues(jobList));
|
|
47
|
+
return deduplicateIssues(allIssues);
|
|
48
|
+
}
|
|
49
|
+
// How long a pod must be Pending before it's reported as an issue.
|
|
50
|
+
const PENDING_GRACE_MS = 60_000;
|
|
51
|
+
export function startMonitor(options, intervalMs, onIssues) {
|
|
52
|
+
let running = true;
|
|
53
|
+
let inFlight = false;
|
|
54
|
+
// Tracks when each pending pod was first seen: "namespace/name" -> Date
|
|
55
|
+
const pendingSince = new Map();
|
|
56
|
+
let pendingRecheckTimer = null;
|
|
57
|
+
const tick = async () => {
|
|
58
|
+
if (!running || inFlight)
|
|
59
|
+
return;
|
|
60
|
+
inFlight = true;
|
|
61
|
+
const ts = new Date().toLocaleTimeString("en-GB", { hour: "2-digit", minute: "2-digit", second: "2-digit" });
|
|
62
|
+
process.stdout.write(chalk.dim(` ${ts} checking...`));
|
|
63
|
+
try {
|
|
64
|
+
const allIssues = await runChecks(options);
|
|
65
|
+
const now = new Date();
|
|
66
|
+
const pendingIssues = allIssues.filter((i) => i.kind === "pod_pending");
|
|
67
|
+
const otherIssues = allIssues.filter((i) => i.kind !== "pod_pending");
|
|
68
|
+
// Remove pods that are no longer pending
|
|
69
|
+
const currentPendingKeys = new Set(pendingIssues.map((i) => `${i.namespace}/${i.resource}`));
|
|
70
|
+
for (const key of pendingSince.keys()) {
|
|
71
|
+
if (!currentPendingKeys.has(key))
|
|
72
|
+
pendingSince.delete(key);
|
|
73
|
+
}
|
|
74
|
+
// Separate newly pending from those that have exceeded the grace period
|
|
75
|
+
let hasNewPending = false;
|
|
76
|
+
const reportablePending = [];
|
|
77
|
+
for (const issue of pendingIssues) {
|
|
78
|
+
const key = `${issue.namespace}/${issue.resource}`;
|
|
79
|
+
if (!pendingSince.has(key)) {
|
|
80
|
+
pendingSince.set(key, now);
|
|
81
|
+
hasNewPending = true;
|
|
82
|
+
process.stdout.write(chalk.dim(`\n ○ ${issue.resource} (${issue.namespace}) is Pending — rechecking in ${PENDING_GRACE_MS / 1000}s`));
|
|
83
|
+
}
|
|
84
|
+
else if (now.getTime() - pendingSince.get(key).getTime() >= PENDING_GRACE_MS) {
|
|
85
|
+
reportablePending.push(issue);
|
|
86
|
+
}
|
|
87
|
+
}
|
|
88
|
+
// Schedule a one-shot recheck for newly discovered pending pods
|
|
89
|
+
if (hasNewPending && !pendingRecheckTimer && running) {
|
|
90
|
+
pendingRecheckTimer = setTimeout(() => {
|
|
91
|
+
pendingRecheckTimer = null;
|
|
92
|
+
void tick();
|
|
93
|
+
}, PENDING_GRACE_MS);
|
|
94
|
+
}
|
|
95
|
+
const reportableIssues = [...otherIssues, ...reportablePending];
|
|
96
|
+
if (reportableIssues.length > 0 && running) {
|
|
97
|
+
process.stdout.write("\r\x1b[K");
|
|
98
|
+
await onIssues(reportableIssues);
|
|
99
|
+
}
|
|
100
|
+
else if (!hasNewPending) {
|
|
101
|
+
process.stdout.write(chalk.dim(` ✔ all clear\n`));
|
|
102
|
+
}
|
|
103
|
+
else {
|
|
104
|
+
process.stdout.write("\n");
|
|
105
|
+
}
|
|
106
|
+
}
|
|
107
|
+
catch (err) {
|
|
108
|
+
process.stdout.write("\n");
|
|
109
|
+
console.error(chalk.red("Monitor check failed:"), err.message);
|
|
110
|
+
}
|
|
111
|
+
finally {
|
|
112
|
+
inFlight = false;
|
|
113
|
+
}
|
|
114
|
+
};
|
|
115
|
+
// Run immediately, then on interval
|
|
116
|
+
tick();
|
|
117
|
+
const timer = setInterval(tick, intervalMs);
|
|
118
|
+
return {
|
|
119
|
+
stop: () => {
|
|
120
|
+
running = false;
|
|
121
|
+
clearInterval(timer);
|
|
122
|
+
if (pendingRecheckTimer)
|
|
123
|
+
clearTimeout(pendingRecheckTimer);
|
|
124
|
+
},
|
|
125
|
+
};
|
|
126
|
+
}
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
export type Severity = "info" | "warning" | "critical";
|
|
2
|
+
export type IssueKind = "pod_crashloop" | "pod_oom" | "pod_image_pull" | "pod_pending" | "pod_failed" | "pod_evicted" | "pod_error" | "node_not_ready" | "node_pressure" | "cert_expiring" | "resource_high" | "job_failed" | "rollout_stuck";
|
|
3
|
+
export interface Issue {
|
|
4
|
+
kind: IssueKind;
|
|
5
|
+
severity: Severity;
|
|
6
|
+
namespace: string;
|
|
7
|
+
resource: string;
|
|
8
|
+
message: string;
|
|
9
|
+
details: Record<string, unknown>;
|
|
10
|
+
timestamp: Date;
|
|
11
|
+
}
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,5 @@
|
|
|
1
|
+
import type { Issue } from "../monitor/types.js";
|
|
2
|
+
import type { KubeAgentConfig, NotificationChannel } from "../config.js";
|
|
3
|
+
export declare function sendNotification(issues: Issue[], config: KubeAgentConfig, clusterContext?: string): Promise<void>;
|
|
4
|
+
export declare function broadcastQuestion(question: string, choices: string[] | undefined, config: KubeAgentConfig, clusterContext?: string): Promise<void>;
|
|
5
|
+
export declare function describeChannel(channel: NotificationChannel): string;
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
import { sendSlack, sendSlackQuestion } from "./slack.js";
|
|
2
|
+
import { sendTelegram, sendTelegramQuestion } from "./telegram.js";
|
|
3
|
+
const SEVERITY_ORDER = { info: 0, warning: 1, critical: 2 };
|
|
4
|
+
export async function sendNotification(issues, config, clusterContext) {
|
|
5
|
+
if (!config.notifications.channels.length)
|
|
6
|
+
return;
|
|
7
|
+
await Promise.all(config.notifications.channels.map(async (channel) => {
|
|
8
|
+
const minLevel = SEVERITY_ORDER[channel.severity];
|
|
9
|
+
const filtered = issues.filter((i) => SEVERITY_ORDER[i.severity] >= minLevel);
|
|
10
|
+
if (!filtered.length)
|
|
11
|
+
return;
|
|
12
|
+
switch (channel.type) {
|
|
13
|
+
case "slack":
|
|
14
|
+
await sendSlack(filtered, channel, clusterContext);
|
|
15
|
+
break;
|
|
16
|
+
case "telegram":
|
|
17
|
+
await sendTelegram(filtered, channel, clusterContext);
|
|
18
|
+
break;
|
|
19
|
+
}
|
|
20
|
+
}));
|
|
21
|
+
}
|
|
22
|
+
export async function broadcastQuestion(question, choices, config, clusterContext) {
|
|
23
|
+
if (!config.notifications.channels.length)
|
|
24
|
+
return;
|
|
25
|
+
await Promise.all(config.notifications.channels.map((channel) => {
|
|
26
|
+
switch (channel.type) {
|
|
27
|
+
case "slack": return sendSlackQuestion(channel, question, choices, clusterContext);
|
|
28
|
+
case "telegram": return sendTelegramQuestion(channel, question, choices, clusterContext);
|
|
29
|
+
}
|
|
30
|
+
}));
|
|
31
|
+
}
|
|
32
|
+
export function describeChannel(channel) {
|
|
33
|
+
const label = channel.label ? ` (${channel.label})` : "";
|
|
34
|
+
switch (channel.type) {
|
|
35
|
+
case "slack":
|
|
36
|
+
return `Slack${label} ${channel.webhook_url.slice(0, 40)}… min: ${channel.severity}`;
|
|
37
|
+
case "telegram":
|
|
38
|
+
return `Telegram${label} chat: ${channel.chat_id} min: ${channel.severity}`;
|
|
39
|
+
}
|
|
40
|
+
}
|
|
@@ -0,0 +1,4 @@
|
|
|
1
|
+
import type { NotificationChannel, SlackChannel, TelegramChannel } from "../config.js";
|
|
2
|
+
export declare function setupSlack(): Promise<SlackChannel | null>;
|
|
3
|
+
export declare function setupTelegram(): Promise<TelegramChannel | null>;
|
|
4
|
+
export declare function interactiveAddChannel(): Promise<NotificationChannel | null>;
|
|
@@ -0,0 +1,88 @@
|
|
|
1
|
+
import readline from "node:readline";
|
|
2
|
+
import chalk from "chalk";
|
|
3
|
+
import { sendSlack } from "./slack.js";
|
|
4
|
+
import { testTelegramCredentials } from "./telegram.js";
|
|
5
|
+
async function ask(question, hint) {
|
|
6
|
+
const rl = readline.createInterface({ input: process.stdin, output: process.stdout });
|
|
7
|
+
const hintStr = hint ? chalk.dim(` (${hint})`) : "";
|
|
8
|
+
return new Promise((resolve) => {
|
|
9
|
+
rl.question(chalk.cyan(` ${question}${hintStr}: `), (answer) => {
|
|
10
|
+
rl.close();
|
|
11
|
+
resolve(answer.trim());
|
|
12
|
+
});
|
|
13
|
+
});
|
|
14
|
+
}
|
|
15
|
+
async function pickSeverity() {
|
|
16
|
+
console.log(chalk.dim("\n Minimum severity to notify:"));
|
|
17
|
+
console.log(` ${chalk.cyan("1")}. warning ${chalk.dim("— warning + critical")}`);
|
|
18
|
+
console.log(` ${chalk.cyan("2")}. critical ${chalk.dim("— critical only")}`);
|
|
19
|
+
console.log(` ${chalk.cyan("3")}. info ${chalk.dim("— everything")}`);
|
|
20
|
+
const ans = await ask("Choice", "1");
|
|
21
|
+
const map = { "1": "warning", "2": "critical", "3": "info" };
|
|
22
|
+
return map[ans] ?? "warning";
|
|
23
|
+
}
|
|
24
|
+
export async function setupSlack() {
|
|
25
|
+
console.log(chalk.bold("\n Slack Setup"));
|
|
26
|
+
console.log(chalk.dim(" Create an incoming webhook at: https://api.slack.com/apps\n"));
|
|
27
|
+
const webhook_url = await ask("Webhook URL");
|
|
28
|
+
if (!webhook_url)
|
|
29
|
+
return null;
|
|
30
|
+
const label = await ask("Label (optional)", "e.g. #alerts");
|
|
31
|
+
const severity = await pickSeverity();
|
|
32
|
+
const channel = { type: "slack", webhook_url, severity, label: label || undefined };
|
|
33
|
+
// Test it
|
|
34
|
+
process.stdout.write(chalk.dim("\n Sending test message..."));
|
|
35
|
+
await sendSlack([{ kind: "pod_pending", severity: "warning", namespace: "test", resource: "kubeagent-test", message: "KubeAgent Slack integration working ✓", details: {}, timestamp: new Date() }], channel);
|
|
36
|
+
console.log(chalk.green(" sent!"));
|
|
37
|
+
const confirmed = await ask("Did you receive it? [y/N]");
|
|
38
|
+
if (confirmed.toLowerCase() !== "y") {
|
|
39
|
+
console.log(chalk.yellow(" Webhook not confirmed — channel not saved."));
|
|
40
|
+
return null;
|
|
41
|
+
}
|
|
42
|
+
return channel;
|
|
43
|
+
}
|
|
44
|
+
export async function setupTelegram() {
|
|
45
|
+
console.log(chalk.bold("\n Telegram Setup"));
|
|
46
|
+
console.log(chalk.dim(" 1. Create a bot via @BotFather and copy the token"));
|
|
47
|
+
console.log(chalk.dim(" 2. Start a chat with your bot (or add it to a group)"));
|
|
48
|
+
console.log(chalk.dim(" 3. Get your chat ID: message the bot, then visit:"));
|
|
49
|
+
console.log(chalk.dim(" https://api.telegram.org/bot<TOKEN>/getUpdates\n"));
|
|
50
|
+
const bot_token = await ask("Bot token");
|
|
51
|
+
if (!bot_token)
|
|
52
|
+
return null;
|
|
53
|
+
const chat_id = await ask("Chat ID");
|
|
54
|
+
if (!chat_id)
|
|
55
|
+
return null;
|
|
56
|
+
const label = await ask("Label (optional)", "e.g. ops-alerts");
|
|
57
|
+
const severity = await pickSeverity();
|
|
58
|
+
// Test credentials
|
|
59
|
+
process.stdout.write(chalk.dim("\n Sending test message..."));
|
|
60
|
+
const result = await testTelegramCredentials(bot_token, chat_id);
|
|
61
|
+
if (!result.ok) {
|
|
62
|
+
console.log(chalk.red(` failed: ${result.error}`));
|
|
63
|
+
console.log(chalk.dim(" Check your bot token and chat ID."));
|
|
64
|
+
return null;
|
|
65
|
+
}
|
|
66
|
+
console.log(chalk.green(" sent!"));
|
|
67
|
+
const confirmed = await ask("Did you receive it? [y/N]");
|
|
68
|
+
if (confirmed.toLowerCase() !== "y") {
|
|
69
|
+
console.log(chalk.yellow(" Not confirmed — channel not saved."));
|
|
70
|
+
return null;
|
|
71
|
+
}
|
|
72
|
+
return { type: "telegram", bot_token, chat_id, severity, label: label || undefined };
|
|
73
|
+
}
|
|
74
|
+
export async function interactiveAddChannel() {
|
|
75
|
+
console.log(chalk.bold("\nAdd notification channel:\n"));
|
|
76
|
+
console.log(` ${chalk.cyan("1")}. Slack`);
|
|
77
|
+
console.log(` ${chalk.cyan("2")}. Telegram`);
|
|
78
|
+
console.log(` ${chalk.cyan("3")}. Cancel\n`);
|
|
79
|
+
const rl = readline.createInterface({ input: process.stdin, output: process.stdout });
|
|
80
|
+
const choice = await new Promise((resolve) => {
|
|
81
|
+
rl.question(chalk.cyan(" Choice [1]: "), (a) => { rl.close(); resolve(a.trim()); });
|
|
82
|
+
});
|
|
83
|
+
if (choice === "3" || choice === "")
|
|
84
|
+
return null;
|
|
85
|
+
if (choice === "2")
|
|
86
|
+
return setupTelegram();
|
|
87
|
+
return setupSlack();
|
|
88
|
+
}
|
|
@@ -0,0 +1,4 @@
|
|
|
1
|
+
import type { Issue } from "../monitor/types.js";
|
|
2
|
+
import type { SlackChannel } from "../config.js";
|
|
3
|
+
export declare function sendSlackQuestion(channel: SlackChannel, question: string, choices: string[] | undefined, clusterContext?: string): Promise<void>;
|
|
4
|
+
export declare function sendSlack(issues: Issue[], channel: SlackChannel, clusterContext?: string): Promise<void>;
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
function formatSlackMessage(issues, clusterContext) {
|
|
2
|
+
const critical = issues.filter((i) => i.severity === "critical");
|
|
3
|
+
const warning = issues.filter((i) => i.severity === "warning");
|
|
4
|
+
const header = clusterContext
|
|
5
|
+
? `*KubeAgent Alert* — \`${clusterContext}\``
|
|
6
|
+
: `*KubeAgent Alert*`;
|
|
7
|
+
const lines = issues.map((i) => {
|
|
8
|
+
const icon = i.severity === "critical" ? "🔴" : "🟡";
|
|
9
|
+
const ns = i.namespace ? ` \`${i.namespace}\`` : "";
|
|
10
|
+
return `${icon} ${i.message}${ns}`;
|
|
11
|
+
});
|
|
12
|
+
const summary = [
|
|
13
|
+
critical.length ? `${critical.length} critical` : "",
|
|
14
|
+
warning.length ? `${warning.length} warning` : "",
|
|
15
|
+
].filter(Boolean).join(", ");
|
|
16
|
+
return {
|
|
17
|
+
blocks: [
|
|
18
|
+
{
|
|
19
|
+
type: "header",
|
|
20
|
+
text: { type: "plain_text", text: "⚠ KubeAgent Alert", emoji: true },
|
|
21
|
+
},
|
|
22
|
+
{
|
|
23
|
+
type: "section",
|
|
24
|
+
text: { type: "mrkdwn", text: `${header}\n${summary}` },
|
|
25
|
+
},
|
|
26
|
+
{ type: "divider" },
|
|
27
|
+
{
|
|
28
|
+
type: "section",
|
|
29
|
+
text: { type: "mrkdwn", text: lines.join("\n") },
|
|
30
|
+
},
|
|
31
|
+
],
|
|
32
|
+
};
|
|
33
|
+
}
|
|
34
|
+
async function postToSlack(channel, payload) {
|
|
35
|
+
try {
|
|
36
|
+
const url = new URL(channel.webhook_url);
|
|
37
|
+
if (!["https:", "http:"].includes(url.protocol)) {
|
|
38
|
+
console.error(`Slack: invalid URL protocol`);
|
|
39
|
+
return;
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
catch {
|
|
43
|
+
console.error(`Slack: invalid webhook URL`);
|
|
44
|
+
return;
|
|
45
|
+
}
|
|
46
|
+
try {
|
|
47
|
+
const res = await fetch(channel.webhook_url, {
|
|
48
|
+
method: "POST",
|
|
49
|
+
headers: { "Content-Type": "application/json" },
|
|
50
|
+
body: JSON.stringify(payload),
|
|
51
|
+
});
|
|
52
|
+
if (!res.ok) {
|
|
53
|
+
console.error(`Slack webhook failed: ${res.status} ${res.statusText}`);
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
catch (err) {
|
|
57
|
+
console.error(`Slack webhook error: ${err.message}`);
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
export async function sendSlackQuestion(channel, question, choices, clusterContext) {
|
|
61
|
+
const ctx = clusterContext ? ` — \`${clusterContext}\`` : "";
|
|
62
|
+
const choiceLines = choices?.map((c, i) => `${i + 1}. ${c}`) ?? [];
|
|
63
|
+
const body = choiceLines.length
|
|
64
|
+
? `${question}\n\n${choiceLines.join("\n")}\n\n_Answer via terminal or reply in your incident channel._`
|
|
65
|
+
: `${question}\n\n_Answer required via terminal._`;
|
|
66
|
+
await postToSlack(channel, {
|
|
67
|
+
blocks: [
|
|
68
|
+
{ type: "header", text: { type: "plain_text", text: "❓ KubeAgent needs input", emoji: true } },
|
|
69
|
+
{ type: "section", text: { type: "mrkdwn", text: `*Question*${ctx}` } },
|
|
70
|
+
{ type: "section", text: { type: "mrkdwn", text: body } },
|
|
71
|
+
],
|
|
72
|
+
});
|
|
73
|
+
}
|
|
74
|
+
export async function sendSlack(issues, channel, clusterContext) {
|
|
75
|
+
await postToSlack(channel, formatSlackMessage(issues, clusterContext));
|
|
76
|
+
}
|