@glassmkr/crucible 0.7.1 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/alerts/__tests__/rules.test.d.ts +1 -0
- package/dist/alerts/__tests__/rules.test.js +325 -0
- package/dist/alerts/__tests__/rules.test.js.map +1 -0
- package/dist/alerts/rules.d.ts +8 -0
- package/dist/alerts/rules.js +139 -32
- package/dist/alerts/rules.js.map +1 -1
- package/dist/api.d.ts +2 -0
- package/dist/api.js +7 -0
- package/dist/api.js.map +1 -0
- package/dist/collect/__tests__/dmi.test.d.ts +1 -0
- package/dist/collect/__tests__/dmi.test.js +114 -0
- package/dist/collect/__tests__/dmi.test.js.map +1 -0
- package/dist/collect/__tests__/ipmi.test.js +47 -1
- package/dist/collect/__tests__/ipmi.test.js.map +1 -1
- package/dist/collect/__tests__/thermal.test.d.ts +1 -0
- package/dist/collect/__tests__/thermal.test.js +164 -0
- package/dist/collect/__tests__/thermal.test.js.map +1 -0
- package/dist/collect/dmi.d.ts +19 -0
- package/dist/collect/dmi.js +109 -0
- package/dist/collect/dmi.js.map +1 -0
- package/dist/collect/ipmi.d.ts +27 -2
- package/dist/collect/ipmi.js +90 -2
- package/dist/collect/ipmi.js.map +1 -1
- package/dist/collect/thermal.d.ts +10 -0
- package/dist/collect/thermal.js +187 -0
- package/dist/collect/thermal.js.map +1 -0
- package/dist/config.d.ts +10 -0
- package/dist/config.js +2 -0
- package/dist/config.js.map +1 -1
- package/dist/index.js +51 -1
- package/dist/index.js.map +1 -1
- package/dist/lib/__tests__/capability.test.d.ts +1 -0
- package/dist/lib/__tests__/capability.test.js +87 -0
- package/dist/lib/__tests__/capability.test.js.map +1 -0
- package/dist/lib/__tests__/vendor-sensors.test.d.ts +1 -0
- package/dist/lib/__tests__/vendor-sensors.test.js +49 -0
- package/dist/lib/__tests__/vendor-sensors.test.js.map +1 -0
- package/dist/lib/capability.d.ts +21 -0
- package/dist/lib/capability.js +110 -0
- package/dist/lib/capability.js.map +1 -0
- package/dist/lib/cpu-thermal-chips.d.ts +2 -0
- package/dist/lib/cpu-thermal-chips.js +28 -0
- package/dist/lib/cpu-thermal-chips.js.map +1 -0
- package/dist/lib/types.d.ts +58 -0
- package/dist/lib/vendor-sensors.d.ts +27 -0
- package/dist/lib/vendor-sensors.js +63 -0
- package/dist/lib/vendor-sensors.js.map +1 -0
- package/dist/notify/telegram.js +1 -1
- package/dist/notify/telegram.js.map +1 -1
- package/package.json +16 -1
- package/rule-ids.json +29 -0
- package/.dockerignore +0 -13
- package/.github/ISSUE_TEMPLATE/bug_report.md +0 -24
- package/.github/ISSUE_TEMPLATE/no_data.md +0 -26
- package/.github/workflows/docker.yml +0 -53
- package/.github/workflows/publish.yml +0 -25
- package/Dockerfile +0 -59
- package/config/collector.example.yaml +0 -43
- package/docker-compose.yml +0 -26
- package/scripts/sign-release.sh +0 -29
- package/src/__tests__/cli.test.ts +0 -74
- package/src/__tests__/reboot-marker.test.ts +0 -122
- package/src/alerts/evaluator.ts +0 -15
- package/src/alerts/rules.ts +0 -283
- package/src/alerts/state.ts +0 -92
- package/src/cli.ts +0 -112
- package/src/collect/__tests__/ipmi.test.ts +0 -96
- package/src/collect/__tests__/smart.test.ts +0 -68
- package/src/collect/__tests__/system.test.ts +0 -29
- package/src/collect/__tests__/zfs.test.ts +0 -72
- package/src/collect/conntrack.ts +0 -27
- package/src/collect/cpu.ts +0 -92
- package/src/collect/disks.ts +0 -91
- package/src/collect/fd.ts +0 -31
- package/src/collect/io-errors.ts +0 -23
- package/src/collect/io-latency.ts +0 -103
- package/src/collect/ipmi.ts +0 -207
- package/src/collect/memory.ts +0 -30
- package/src/collect/network.ts +0 -193
- package/src/collect/ntp.ts +0 -114
- package/src/collect/os-alerts.ts +0 -43
- package/src/collect/raid.ts +0 -40
- package/src/collect/security.ts +0 -268
- package/src/collect/smart.ts +0 -72
- package/src/collect/system.ts +0 -32
- package/src/collect/systemd.ts +0 -33
- package/src/collect/zfs.ts +0 -66
- package/src/config.ts +0 -65
- package/src/index.ts +0 -221
- package/src/lib/__tests__/parse.test.ts +0 -28
- package/src/lib/exec.ts +0 -16
- package/src/lib/parse.ts +0 -29
- package/src/lib/reboot-marker.ts +0 -88
- package/src/lib/types.ts +0 -226
- package/src/lib/version-check.ts +0 -39
- package/src/lib/version.ts +0 -33
- package/src/metrics-server.ts +0 -123
- package/src/notify/email.ts +0 -69
- package/src/notify/slack.ts +0 -47
- package/src/notify/telegram.ts +0 -65
- package/src/push/forge.ts +0 -109
- package/tsconfig.json +0 -15
- package/vitest.config.ts +0 -12
|
@@ -1,29 +0,0 @@
|
|
|
1
|
-
import { describe, it, expect } from "vitest";
|
|
2
|
-
import { readOsReleaseField } from "../system.js";
|
|
3
|
-
|
|
4
|
-
describe("readOsReleaseField", () => {
|
|
5
|
-
it("parses unquoted Ubuntu values", () => {
|
|
6
|
-
const s = 'NAME="Ubuntu"\nID=ubuntu\nID_LIKE=debian\nVERSION_ID="24.04"';
|
|
7
|
-
expect(readOsReleaseField(s, "ID")).toBe("ubuntu");
|
|
8
|
-
expect(readOsReleaseField(s, "ID_LIKE")).toBe("debian");
|
|
9
|
-
});
|
|
10
|
-
|
|
11
|
-
it("parses quoted RHEL-family values", () => {
|
|
12
|
-
const s = 'NAME="Rocky Linux"\nID="rocky"\nID_LIKE="rhel centos fedora"';
|
|
13
|
-
expect(readOsReleaseField(s, "ID")).toBe("rocky");
|
|
14
|
-
expect(readOsReleaseField(s, "ID_LIKE")).toBe("rhel centos fedora");
|
|
15
|
-
});
|
|
16
|
-
|
|
17
|
-
it("lowercases the result (some distros uppercase their ID)", () => {
|
|
18
|
-
expect(readOsReleaseField("ID=Alpine", "ID")).toBe("alpine");
|
|
19
|
-
});
|
|
20
|
-
|
|
21
|
-
it("returns undefined for a missing key", () => {
|
|
22
|
-
expect(readOsReleaseField("ID=arch", "ID_LIKE")).toBeUndefined();
|
|
23
|
-
});
|
|
24
|
-
|
|
25
|
-
it("does not confuse ID with VERSION_ID", () => {
|
|
26
|
-
const s = 'VERSION_ID="24.04"\nID=ubuntu';
|
|
27
|
-
expect(readOsReleaseField(s, "ID")).toBe("ubuntu");
|
|
28
|
-
});
|
|
29
|
-
});
|
|
@@ -1,72 +0,0 @@
|
|
|
1
|
-
import { describe, it, expect } from "vitest";
|
|
2
|
-
import { parseZpoolStatus } from "../zfs.js";
|
|
3
|
-
|
|
4
|
-
describe("parseZpoolStatus", () => {
|
|
5
|
-
it("parses a healthy pool", () => {
|
|
6
|
-
const raw = ` pool: tank
|
|
7
|
-
state: ONLINE
|
|
8
|
-
scan: scrub repaired 0B in 01:23:45 with 0 errors on Sun Apr 5 12:34:56 2026
|
|
9
|
-
config:
|
|
10
|
-
|
|
11
|
-
NAME STATE READ WRITE CKSUM
|
|
12
|
-
tank ONLINE 0 0 0
|
|
13
|
-
mirror-0 ONLINE 0 0 0
|
|
14
|
-
|
|
15
|
-
errors: No known data errors
|
|
16
|
-
`;
|
|
17
|
-
const pools = parseZpoolStatus(raw);
|
|
18
|
-
expect(pools).toHaveLength(1);
|
|
19
|
-
expect(pools[0]).toMatchObject({
|
|
20
|
-
name: "tank",
|
|
21
|
-
state: "ONLINE",
|
|
22
|
-
errors_text: "No known data errors",
|
|
23
|
-
scrub_errors: 0,
|
|
24
|
-
scrub_repaired: "0B",
|
|
25
|
-
});
|
|
26
|
-
expect(pools[0].last_scrub_date).toContain("2026");
|
|
27
|
-
});
|
|
28
|
-
|
|
29
|
-
it("parses a DEGRADED pool", () => {
|
|
30
|
-
const raw = ` pool: tank
|
|
31
|
-
state: DEGRADED
|
|
32
|
-
scan: scrub repaired 16K in 02:00:00 with 3 errors on Sun Apr 5 12:34:56 2026
|
|
33
|
-
|
|
34
|
-
errors: 3 data errors, use '-v' for a list
|
|
35
|
-
`;
|
|
36
|
-
const [p] = parseZpoolStatus(raw);
|
|
37
|
-
expect(p.state).toBe("DEGRADED");
|
|
38
|
-
expect(p.scrub_errors).toBe(3);
|
|
39
|
-
expect(p.scrub_repaired).toBe("16K");
|
|
40
|
-
});
|
|
41
|
-
|
|
42
|
-
it("flags never-scrubbed pools", () => {
|
|
43
|
-
const raw = ` pool: tank
|
|
44
|
-
state: ONLINE
|
|
45
|
-
scan: none requested
|
|
46
|
-
|
|
47
|
-
errors: No known data errors
|
|
48
|
-
`;
|
|
49
|
-
const [p] = parseZpoolStatus(raw);
|
|
50
|
-
expect(p.scrub_never_run).toBe(true);
|
|
51
|
-
expect(p.scrub_errors).toBeUndefined();
|
|
52
|
-
});
|
|
53
|
-
|
|
54
|
-
it("returns empty for no pools", () => {
|
|
55
|
-
expect(parseZpoolStatus("no pools available")).toEqual([]);
|
|
56
|
-
});
|
|
57
|
-
|
|
58
|
-
it("parses multiple pools", () => {
|
|
59
|
-
const raw = ` pool: tank
|
|
60
|
-
state: ONLINE
|
|
61
|
-
scan: none requested
|
|
62
|
-
errors: No known data errors
|
|
63
|
-
pool: data
|
|
64
|
-
state: FAULTED
|
|
65
|
-
scan: none requested
|
|
66
|
-
errors: 2 data errors
|
|
67
|
-
`;
|
|
68
|
-
const pools = parseZpoolStatus(raw);
|
|
69
|
-
expect(pools.map((p) => p.name)).toEqual(["tank", "data"]);
|
|
70
|
-
expect(pools[1].state).toBe("FAULTED");
|
|
71
|
-
});
|
|
72
|
-
});
|
package/src/collect/conntrack.ts
DELETED
|
@@ -1,27 +0,0 @@
|
|
|
1
|
-
import { readProcFile } from "../lib/parse.js";
|
|
2
|
-
|
|
3
|
-
export interface ConntrackData {
|
|
4
|
-
available: boolean;
|
|
5
|
-
count: number;
|
|
6
|
-
max: number;
|
|
7
|
-
percent: number;
|
|
8
|
-
}
|
|
9
|
-
|
|
10
|
-
export function collectConntrack(): ConntrackData {
|
|
11
|
-
const countRaw = readProcFile("/proc/sys/net/netfilter/nf_conntrack_count");
|
|
12
|
-
const maxRaw = readProcFile("/proc/sys/net/netfilter/nf_conntrack_max");
|
|
13
|
-
|
|
14
|
-
if (!countRaw || !maxRaw) {
|
|
15
|
-
return { available: false, count: 0, max: 0, percent: 0 };
|
|
16
|
-
}
|
|
17
|
-
|
|
18
|
-
const count = parseInt(countRaw.trim(), 10);
|
|
19
|
-
const max = parseInt(maxRaw.trim(), 10);
|
|
20
|
-
|
|
21
|
-
if (isNaN(count) || isNaN(max) || max === 0) {
|
|
22
|
-
return { available: false, count: 0, max: 0, percent: 0 };
|
|
23
|
-
}
|
|
24
|
-
|
|
25
|
-
const percent = Math.round(((count / max) * 100) * 10) / 10;
|
|
26
|
-
return { available: true, count, max, percent };
|
|
27
|
-
}
|
package/src/collect/cpu.ts
DELETED
|
@@ -1,92 +0,0 @@
|
|
|
1
|
-
import { readProcFile, sleep } from "../lib/parse.js";
|
|
2
|
-
import type { CpuInfo, CpuCoreInfo } from "../lib/types.js";
|
|
3
|
-
|
|
4
|
-
interface CpuStat {
|
|
5
|
-
user: number; nice: number; system: number; idle: number;
|
|
6
|
-
iowait: number; irq: number; softirq: number; steal: number;
|
|
7
|
-
}
|
|
8
|
-
|
|
9
|
-
function parseLine(line: string): CpuStat {
|
|
10
|
-
const parts = line.split(/\s+/).slice(1).map(Number);
|
|
11
|
-
return {
|
|
12
|
-
user: parts[0] || 0, nice: parts[1] || 0, system: parts[2] || 0, idle: parts[3] || 0,
|
|
13
|
-
iowait: parts[4] || 0, irq: parts[5] || 0, softirq: parts[6] || 0, steal: parts[7] || 0,
|
|
14
|
-
};
|
|
15
|
-
}
|
|
16
|
-
|
|
17
|
-
function parseProcStat(): { aggregate: CpuStat; cores: CpuStat[] } {
|
|
18
|
-
const raw = readProcFile("/proc/stat") || "";
|
|
19
|
-
const lines = raw.split("\n");
|
|
20
|
-
const aggLine = lines.find((l) => l.startsWith("cpu "));
|
|
21
|
-
const aggregate = aggLine ? parseLine(aggLine) : { user: 0, nice: 0, system: 0, idle: 0, iowait: 0, irq: 0, softirq: 0, steal: 0 };
|
|
22
|
-
|
|
23
|
-
const cores: CpuStat[] = [];
|
|
24
|
-
for (const line of lines) {
|
|
25
|
-
if (/^cpu\d+\s/.test(line)) {
|
|
26
|
-
cores.push(parseLine(line));
|
|
27
|
-
}
|
|
28
|
-
}
|
|
29
|
-
|
|
30
|
-
return { aggregate, cores };
|
|
31
|
-
}
|
|
32
|
-
|
|
33
|
-
function calcPercents(d: CpuStat): { user: number; system: number; iowait: number; idle: number; irq: number; softirq: number } {
|
|
34
|
-
const total = Object.values(d).reduce((a, b) => a + b, 0) || 1;
|
|
35
|
-
const r = (v: number) => Math.round((v / total) * 10000) / 100;
|
|
36
|
-
return {
|
|
37
|
-
user: r(d.user + d.nice),
|
|
38
|
-
system: r(d.system),
|
|
39
|
-
iowait: r(d.iowait),
|
|
40
|
-
idle: r(d.idle),
|
|
41
|
-
irq: r(d.irq),
|
|
42
|
-
softirq: r(d.softirq),
|
|
43
|
-
};
|
|
44
|
-
}
|
|
45
|
-
|
|
46
|
-
function delta(a: CpuStat, b: CpuStat): CpuStat {
|
|
47
|
-
return {
|
|
48
|
-
user: b.user - a.user, nice: b.nice - a.nice,
|
|
49
|
-
system: b.system - a.system, idle: b.idle - a.idle,
|
|
50
|
-
iowait: b.iowait - a.iowait, irq: b.irq - a.irq,
|
|
51
|
-
softirq: b.softirq - a.softirq, steal: b.steal - a.steal,
|
|
52
|
-
};
|
|
53
|
-
}
|
|
54
|
-
|
|
55
|
-
export async function collectCpu(): Promise<CpuInfo> {
|
|
56
|
-
const stat1 = parseProcStat();
|
|
57
|
-
await sleep(1000);
|
|
58
|
-
const stat2 = parseProcStat();
|
|
59
|
-
|
|
60
|
-
const aggDelta = delta(stat1.aggregate, stat2.aggregate);
|
|
61
|
-
const agg = calcPercents(aggDelta);
|
|
62
|
-
|
|
63
|
-
const loadavg = (readProcFile("/proc/loadavg") || "0 0 0").trim().split(" ");
|
|
64
|
-
|
|
65
|
-
// Per-core stats
|
|
66
|
-
const cores: CpuCoreInfo[] = [];
|
|
67
|
-
const coreCount = Math.min(stat1.cores.length, stat2.cores.length);
|
|
68
|
-
for (let i = 0; i < coreCount; i++) {
|
|
69
|
-
const d = delta(stat1.cores[i], stat2.cores[i]);
|
|
70
|
-
const p = calcPercents(d);
|
|
71
|
-
cores.push({
|
|
72
|
-
core: i,
|
|
73
|
-
user_percent: p.user,
|
|
74
|
-
system_percent: p.system,
|
|
75
|
-
iowait_percent: p.iowait,
|
|
76
|
-
idle_percent: p.idle,
|
|
77
|
-
irq_percent: p.irq,
|
|
78
|
-
softirq_percent: p.softirq,
|
|
79
|
-
});
|
|
80
|
-
}
|
|
81
|
-
|
|
82
|
-
return {
|
|
83
|
-
user_percent: agg.user,
|
|
84
|
-
system_percent: agg.system,
|
|
85
|
-
iowait_percent: agg.iowait,
|
|
86
|
-
idle_percent: agg.idle,
|
|
87
|
-
load_1m: parseFloat(loadavg[0]) || 0,
|
|
88
|
-
load_5m: parseFloat(loadavg[1]) || 0,
|
|
89
|
-
load_15m: parseFloat(loadavg[2]) || 0,
|
|
90
|
-
cores,
|
|
91
|
-
};
|
|
92
|
-
}
|
package/src/collect/disks.ts
DELETED
|
@@ -1,91 +0,0 @@
|
|
|
1
|
-
import { run } from "../lib/exec.js";
|
|
2
|
-
import { readProcFile } from "../lib/parse.js";
|
|
3
|
-
import type { DiskInfo } from "../lib/types.js";
|
|
4
|
-
|
|
5
|
-
interface MountInfo {
|
|
6
|
-
device: string;
|
|
7
|
-
mount: string;
|
|
8
|
-
fstype: string;
|
|
9
|
-
options: string;
|
|
10
|
-
}
|
|
11
|
-
|
|
12
|
-
function parseMounts(): MountInfo[] {
|
|
13
|
-
const raw = readProcFile("/proc/mounts") || "";
|
|
14
|
-
const result: MountInfo[] = [];
|
|
15
|
-
for (const line of raw.split("\n")) {
|
|
16
|
-
const parts = line.split(" ");
|
|
17
|
-
if (parts.length < 4) continue;
|
|
18
|
-
result.push({
|
|
19
|
-
device: parts[0],
|
|
20
|
-
mount: parts[1],
|
|
21
|
-
fstype: parts[2],
|
|
22
|
-
options: parts[3],
|
|
23
|
-
});
|
|
24
|
-
}
|
|
25
|
-
return result;
|
|
26
|
-
}
|
|
27
|
-
|
|
28
|
-
export async function collectDisks(): Promise<DiskInfo[]> {
|
|
29
|
-
const dfOutput = await run("df", ["-B1", "--output=source,target,size,used,avail,pcent", "-x", "tmpfs", "-x", "devtmpfs", "-x", "squashfs"]);
|
|
30
|
-
if (!dfOutput) return [];
|
|
31
|
-
|
|
32
|
-
// Get inode data (df -i without --output, parse standard columns)
|
|
33
|
-
const dfInodeOutput = await run("df", ["-i", "-x", "tmpfs", "-x", "devtmpfs", "-x", "squashfs"]);
|
|
34
|
-
const inodeMap = new Map<string, { total: number; used: number; free: number }>();
|
|
35
|
-
if (dfInodeOutput) {
|
|
36
|
-
// Standard df -i output: Filesystem Inodes IUsed IFree IUse% Mounted_on
|
|
37
|
-
for (const line of dfInodeOutput.trim().split("\n").slice(1)) {
|
|
38
|
-
const parts = line.trim().split(/\s+/);
|
|
39
|
-
if (parts.length < 6) continue;
|
|
40
|
-
const mountPoint = parts[5];
|
|
41
|
-
inodeMap.set(mountPoint, {
|
|
42
|
-
total: parseInt(parts[1]) || 0,
|
|
43
|
-
used: parseInt(parts[2]) || 0,
|
|
44
|
-
free: parseInt(parts[3]) || 0,
|
|
45
|
-
});
|
|
46
|
-
}
|
|
47
|
-
}
|
|
48
|
-
|
|
49
|
-
// Get mount options and fstype from /proc/mounts
|
|
50
|
-
const mounts = parseMounts();
|
|
51
|
-
const mountMap = new Map<string, MountInfo>();
|
|
52
|
-
for (const m of mounts) {
|
|
53
|
-
mountMap.set(m.mount, m);
|
|
54
|
-
}
|
|
55
|
-
|
|
56
|
-
const lines = dfOutput.trim().split("\n").slice(1);
|
|
57
|
-
const disks: DiskInfo[] = [];
|
|
58
|
-
|
|
59
|
-
for (const line of lines) {
|
|
60
|
-
const parts = line.trim().split(/\s+/);
|
|
61
|
-
if (parts.length < 6) continue;
|
|
62
|
-
const device = parts[0];
|
|
63
|
-
const mount = parts[1];
|
|
64
|
-
const totalBytes = parseInt(parts[2]) || 0;
|
|
65
|
-
const usedBytes = parseInt(parts[3]) || 0;
|
|
66
|
-
const availBytes = parseInt(parts[4]) || 0;
|
|
67
|
-
const pctStr = parts[5].replace("%", "");
|
|
68
|
-
const percent = parseInt(pctStr) || 0;
|
|
69
|
-
|
|
70
|
-
if (!device.startsWith("/dev/")) continue;
|
|
71
|
-
|
|
72
|
-
const mountInfo = mountMap.get(mount);
|
|
73
|
-
const inodes = inodeMap.get(mount);
|
|
74
|
-
|
|
75
|
-
disks.push({
|
|
76
|
-
device,
|
|
77
|
-
mount,
|
|
78
|
-
total_gb: Math.round((totalBytes / 1073741824) * 100) / 100,
|
|
79
|
-
used_gb: Math.round((usedBytes / 1073741824) * 100) / 100,
|
|
80
|
-
available_gb: Math.round((availBytes / 1073741824) * 100) / 100,
|
|
81
|
-
percent_used: percent,
|
|
82
|
-
fstype: mountInfo?.fstype,
|
|
83
|
-
options: mountInfo?.options,
|
|
84
|
-
inodes_total: inodes?.total,
|
|
85
|
-
inodes_used: inodes?.used,
|
|
86
|
-
inodes_free: inodes?.free,
|
|
87
|
-
});
|
|
88
|
-
}
|
|
89
|
-
|
|
90
|
-
return disks;
|
|
91
|
-
}
|
package/src/collect/fd.ts
DELETED
|
@@ -1,31 +0,0 @@
|
|
|
1
|
-
import { readProcFile } from "../lib/parse.js";
|
|
2
|
-
|
|
3
|
-
export interface FileDescriptorData {
|
|
4
|
-
allocated: number;
|
|
5
|
-
free: number;
|
|
6
|
-
max: number;
|
|
7
|
-
percent: number;
|
|
8
|
-
}
|
|
9
|
-
|
|
10
|
-
export function collectFileDescriptors(): FileDescriptorData {
|
|
11
|
-
const raw = readProcFile("/proc/sys/fs/file-nr");
|
|
12
|
-
if (!raw) {
|
|
13
|
-
return { allocated: 0, free: 0, max: 0, percent: 0 };
|
|
14
|
-
}
|
|
15
|
-
|
|
16
|
-
const parts = raw.trim().split(/\s+/);
|
|
17
|
-
if (parts.length < 3) {
|
|
18
|
-
return { allocated: 0, free: 0, max: 0, percent: 0 };
|
|
19
|
-
}
|
|
20
|
-
|
|
21
|
-
const allocated = parseInt(parts[0], 10);
|
|
22
|
-
const free = parseInt(parts[1], 10);
|
|
23
|
-
const max = parseInt(parts[2], 10);
|
|
24
|
-
|
|
25
|
-
if (isNaN(allocated) || isNaN(max) || max === 0) {
|
|
26
|
-
return { allocated: 0, free: 0, max: 0, percent: 0 };
|
|
27
|
-
}
|
|
28
|
-
|
|
29
|
-
const percent = Math.round(((allocated / max) * 100) * 10) / 10;
|
|
30
|
-
return { allocated, free: isNaN(free) ? 0 : free, max, percent };
|
|
31
|
-
}
|
package/src/collect/io-errors.ts
DELETED
|
@@ -1,23 +0,0 @@
|
|
|
1
|
-
import { run } from "../lib/exec.js";
|
|
2
|
-
|
|
3
|
-
export async function collectIoErrors(): Promise<{ count: number; devices: string[] } | null> {
|
|
4
|
-
// Parse dmesg for recent I/O errors (last 10 minutes covers the 5-min collection interval)
|
|
5
|
-
const output = await run("bash", ["-c", 'dmesg -T --since "10 minutes ago" 2>/dev/null | grep -i "I/O error\\|Buffer I/O error\\|blk_update_request.*error"'], 5000);
|
|
6
|
-
if (!output || !output.trim()) return null;
|
|
7
|
-
|
|
8
|
-
const lines = output.trim().split("\n").filter((l) => l.trim());
|
|
9
|
-
if (lines.length === 0) return null;
|
|
10
|
-
|
|
11
|
-
// Extract device names from error messages
|
|
12
|
-
const deviceSet = new Set<string>();
|
|
13
|
-
for (const line of lines) {
|
|
14
|
-
// "blk_update_request: I/O error, dev sda, sector 12345"
|
|
15
|
-
const devMatch = line.match(/dev\s+(\w+)/);
|
|
16
|
-
if (devMatch) deviceSet.add(devMatch[1]);
|
|
17
|
-
// "Buffer I/O error on device sda1"
|
|
18
|
-
const bufMatch = line.match(/on device\s+(\w+)/);
|
|
19
|
-
if (bufMatch) deviceSet.add(bufMatch[1]);
|
|
20
|
-
}
|
|
21
|
-
|
|
22
|
-
return { count: lines.length, devices: Array.from(deviceSet) };
|
|
23
|
-
}
|
|
@@ -1,103 +0,0 @@
|
|
|
1
|
-
import { readProcFile } from "../lib/parse.js";
|
|
2
|
-
|
|
3
|
-
export interface IoLatencyInfo {
|
|
4
|
-
device: string;
|
|
5
|
-
avg_read_latency_ms: number | null;
|
|
6
|
-
avg_write_latency_ms: number | null;
|
|
7
|
-
read_iops: number;
|
|
8
|
-
write_iops: number;
|
|
9
|
-
}
|
|
10
|
-
|
|
11
|
-
interface DiskstatsCounters {
|
|
12
|
-
reads_completed: number;
|
|
13
|
-
read_time_ms: number;
|
|
14
|
-
writes_completed: number;
|
|
15
|
-
write_time_ms: number;
|
|
16
|
-
}
|
|
17
|
-
|
|
18
|
-
// Previous cumulative counters for delta computation
|
|
19
|
-
const previousCounters = new Map<string, DiskstatsCounters>();
|
|
20
|
-
|
|
21
|
-
// Match physical block devices, not partitions or virtual devices
|
|
22
|
-
function isPhysicalDevice(name: string): boolean {
|
|
23
|
-
// sd*, vd*, xvd* without trailing partition number
|
|
24
|
-
if (/^(sd|vd|xvd)[a-z]+$/.test(name)) return true;
|
|
25
|
-
// nvme*n* without partition suffix (nvme0n1 yes, nvme0n1p1 no)
|
|
26
|
-
if (/^nvme\d+n\d+$/.test(name)) return true;
|
|
27
|
-
// md* (RAID arrays)
|
|
28
|
-
if (/^md\d+$/.test(name)) return true;
|
|
29
|
-
return false;
|
|
30
|
-
}
|
|
31
|
-
|
|
32
|
-
function parseDiskstats(): Record<string, DiskstatsCounters> {
|
|
33
|
-
const raw = readProcFile("/proc/diskstats") || "";
|
|
34
|
-
const result: Record<string, DiskstatsCounters> = {};
|
|
35
|
-
|
|
36
|
-
for (const line of raw.split("\n")) {
|
|
37
|
-
const parts = line.trim().split(/\s+/);
|
|
38
|
-
if (parts.length < 11) continue;
|
|
39
|
-
|
|
40
|
-
const name = parts[2];
|
|
41
|
-
if (!isPhysicalDevice(name)) continue;
|
|
42
|
-
|
|
43
|
-
result[name] = {
|
|
44
|
-
reads_completed: Number(parts[3]) || 0,
|
|
45
|
-
read_time_ms: Number(parts[6]) || 0,
|
|
46
|
-
writes_completed: Number(parts[7]) || 0,
|
|
47
|
-
write_time_ms: Number(parts[10]) || 0,
|
|
48
|
-
};
|
|
49
|
-
}
|
|
50
|
-
|
|
51
|
-
return result;
|
|
52
|
-
}
|
|
53
|
-
|
|
54
|
-
function delta(current: number, previous: number): number {
|
|
55
|
-
if (current >= previous) return current - previous;
|
|
56
|
-
return current; // counter wrapped or reset
|
|
57
|
-
}
|
|
58
|
-
|
|
59
|
-
export function collectIoLatency(): IoLatencyInfo[] {
|
|
60
|
-
const current = parseDiskstats();
|
|
61
|
-
const results: IoLatencyInfo[] = [];
|
|
62
|
-
const currentDevices = new Set<string>();
|
|
63
|
-
|
|
64
|
-
for (const [name, counters] of Object.entries(current)) {
|
|
65
|
-
currentDevices.add(name);
|
|
66
|
-
const prev = previousCounters.get(name);
|
|
67
|
-
|
|
68
|
-
// Store current for next cycle
|
|
69
|
-
previousCounters.set(name, { ...counters });
|
|
70
|
-
|
|
71
|
-
if (!prev) {
|
|
72
|
-
// First cycle: no delta, report null latency
|
|
73
|
-
results.push({
|
|
74
|
-
device: name,
|
|
75
|
-
avg_read_latency_ms: null,
|
|
76
|
-
avg_write_latency_ms: null,
|
|
77
|
-
read_iops: 0,
|
|
78
|
-
write_iops: 0,
|
|
79
|
-
});
|
|
80
|
-
continue;
|
|
81
|
-
}
|
|
82
|
-
|
|
83
|
-
const deltaReads = delta(counters.reads_completed, prev.reads_completed);
|
|
84
|
-
const deltaReadTime = delta(counters.read_time_ms, prev.read_time_ms);
|
|
85
|
-
const deltaWrites = delta(counters.writes_completed, prev.writes_completed);
|
|
86
|
-
const deltaWriteTime = delta(counters.write_time_ms, prev.write_time_ms);
|
|
87
|
-
|
|
88
|
-
results.push({
|
|
89
|
-
device: name,
|
|
90
|
-
avg_read_latency_ms: deltaReads > 0 ? Math.round((deltaReadTime / deltaReads) * 100) / 100 : null,
|
|
91
|
-
avg_write_latency_ms: deltaWrites > 0 ? Math.round((deltaWriteTime / deltaWrites) * 100) / 100 : null,
|
|
92
|
-
read_iops: deltaReads,
|
|
93
|
-
write_iops: deltaWrites,
|
|
94
|
-
});
|
|
95
|
-
}
|
|
96
|
-
|
|
97
|
-
// Remove stale devices
|
|
98
|
-
for (const name of previousCounters.keys()) {
|
|
99
|
-
if (!currentDevices.has(name)) previousCounters.delete(name);
|
|
100
|
-
}
|
|
101
|
-
|
|
102
|
-
return results;
|
|
103
|
-
}
|
package/src/collect/ipmi.ts
DELETED
|
@@ -1,207 +0,0 @@
|
|
|
1
|
-
import { run } from "../lib/exec.js";
|
|
2
|
-
import type { IpmiInfo, SelEvent, FanStatus } from "../lib/types.js";
|
|
3
|
-
|
|
4
|
-
export async function collectIpmi(): Promise<IpmiInfo> {
|
|
5
|
-
const sensorRaw = await run("ipmitool", ["sensor"]);
|
|
6
|
-
if (!sensorRaw) {
|
|
7
|
-
return { available: false, sensors: [], ecc_errors: { correctable: 0, uncorrectable: 0 }, sel_entries_count: 0, sel_events_recent: [], fans: [] };
|
|
8
|
-
}
|
|
9
|
-
|
|
10
|
-
// Parse sensor readings
|
|
11
|
-
const sensors: IpmiInfo["sensors"] = [];
|
|
12
|
-
for (const line of sensorRaw.split("\n")) {
|
|
13
|
-
const parts = line.split("|").map((s) => s.trim());
|
|
14
|
-
if (parts.length < 4) continue;
|
|
15
|
-
const name = parts[0];
|
|
16
|
-
const rawValue = parts[1];
|
|
17
|
-
const unit = parts[2];
|
|
18
|
-
const status = parts[3];
|
|
19
|
-
|
|
20
|
-
const numValue = parseFloat(rawValue);
|
|
21
|
-
const value: number | string = isNaN(numValue) ? rawValue : numValue;
|
|
22
|
-
|
|
23
|
-
let upperCritical: number | undefined;
|
|
24
|
-
if (parts[8]) {
|
|
25
|
-
const uc = parseFloat(parts[8]);
|
|
26
|
-
if (!isNaN(uc)) upperCritical = uc;
|
|
27
|
-
}
|
|
28
|
-
|
|
29
|
-
sensors.push({ name, value, unit, status, upper_critical: upperCritical });
|
|
30
|
-
}
|
|
31
|
-
|
|
32
|
-
// ECC errors from memory-type sensors
|
|
33
|
-
let correctable = 0;
|
|
34
|
-
let uncorrectable = 0;
|
|
35
|
-
for (const sensor of sensors) {
|
|
36
|
-
const name = sensor.name.toLowerCase();
|
|
37
|
-
if (name.includes("correctable") && typeof sensor.value === "number") {
|
|
38
|
-
correctable += sensor.value;
|
|
39
|
-
}
|
|
40
|
-
if (name.includes("uncorrectable") && typeof sensor.value === "number") {
|
|
41
|
-
uncorrectable += sensor.value;
|
|
42
|
-
}
|
|
43
|
-
}
|
|
44
|
-
|
|
45
|
-
// SEL entry count
|
|
46
|
-
let selCount = 0;
|
|
47
|
-
const selInfo = await run("ipmitool", ["sel", "info"]);
|
|
48
|
-
if (selInfo) {
|
|
49
|
-
const match = selInfo.match(/Entries\s*:\s*(\d+)/i);
|
|
50
|
-
if (match) selCount = parseInt(match[1], 10);
|
|
51
|
-
}
|
|
52
|
-
|
|
53
|
-
// SEL recent events
|
|
54
|
-
const selEvents = await collectSelEvents();
|
|
55
|
-
|
|
56
|
-
// Fan status
|
|
57
|
-
const fans = await collectFanStatus();
|
|
58
|
-
|
|
59
|
-
return {
|
|
60
|
-
available: true,
|
|
61
|
-
sensors,
|
|
62
|
-
ecc_errors: { correctable, uncorrectable },
|
|
63
|
-
sel_entries_count: selCount,
|
|
64
|
-
sel_events_recent: selEvents,
|
|
65
|
-
fans,
|
|
66
|
-
};
|
|
67
|
-
}
|
|
68
|
-
|
|
69
|
-
async function collectSelEvents(): Promise<SelEvent[]> {
|
|
70
|
-
const output = await run("ipmitool", ["sel", "elist"]);
|
|
71
|
-
if (!output) return [];
|
|
72
|
-
|
|
73
|
-
const events: SelEvent[] = [];
|
|
74
|
-
const lines = output.trim().split("\n");
|
|
75
|
-
const fiveMinAgo = new Date(Date.now() - 5 * 60 * 1000);
|
|
76
|
-
|
|
77
|
-
for (const line of lines) {
|
|
78
|
-
const parts = line.split("|").map((s) => s.trim());
|
|
79
|
-
if (parts.length < 5) continue;
|
|
80
|
-
|
|
81
|
-
const [idStr, date, time, sensor, event, direction] = parts;
|
|
82
|
-
|
|
83
|
-
const timestamp = parseSelTimestamp(date, time);
|
|
84
|
-
const tsDate = new Date(timestamp);
|
|
85
|
-
|
|
86
|
-
// Only include events from the last 5 minutes on subsequent runs
|
|
87
|
-
// On first run this will include everything (fiveMinAgo is always recent)
|
|
88
|
-
// We keep last 20 events max regardless
|
|
89
|
-
const sensorType = classifySensor(sensor);
|
|
90
|
-
const severity = deriveSelSeverity(event, sensorType);
|
|
91
|
-
|
|
92
|
-
events.push({
|
|
93
|
-
id: parseInt(idStr) || 0,
|
|
94
|
-
timestamp,
|
|
95
|
-
sensor,
|
|
96
|
-
sensor_type: sensorType,
|
|
97
|
-
event,
|
|
98
|
-
direction: direction || "Asserted",
|
|
99
|
-
severity,
|
|
100
|
-
});
|
|
101
|
-
}
|
|
102
|
-
|
|
103
|
-
// Return last 20 events, most recent first
|
|
104
|
-
return events.slice(-20).reverse();
|
|
105
|
-
}
|
|
106
|
-
|
|
107
|
-
export function parseSelTimestamp(date: string, time: string): string {
|
|
108
|
-
if (!date || !time) return new Date().toISOString();
|
|
109
|
-
// Format: "04/05/2026" and "14:23:05"
|
|
110
|
-
const parts = date.split("/");
|
|
111
|
-
if (parts.length !== 3) return new Date().toISOString();
|
|
112
|
-
const [month, day, year] = parts;
|
|
113
|
-
return `${year}-${month.padStart(2, "0")}-${day.padStart(2, "0")}T${time}Z`;
|
|
114
|
-
}
|
|
115
|
-
|
|
116
|
-
export function classifySensor(sensor: string): string {
|
|
117
|
-
const lower = sensor.toLowerCase();
|
|
118
|
-
if (lower.includes("memory") || lower.includes("dimm")) return "memory";
|
|
119
|
-
if (lower.includes("power supply") || lower.includes("psu")) return "power";
|
|
120
|
-
if (lower.includes("fan")) return "fan";
|
|
121
|
-
if (lower.includes("watchdog")) return "watchdog";
|
|
122
|
-
if (lower.includes("processor") || lower.includes("cpu")) return "processor";
|
|
123
|
-
if (lower.includes("temperature") || lower.includes("temp")) return "temperature";
|
|
124
|
-
if (lower.includes("voltage")) return "voltage";
|
|
125
|
-
if (lower.includes("drive") || lower.includes("disk")) return "storage";
|
|
126
|
-
if (lower.includes("chassis") || lower.includes("intrusion")) return "chassis";
|
|
127
|
-
return "other";
|
|
128
|
-
}
|
|
129
|
-
|
|
130
|
-
export function deriveSelSeverity(event: string, sensorType: string): string {
|
|
131
|
-
const lower = event.toLowerCase();
|
|
132
|
-
|
|
133
|
-
// Critical events
|
|
134
|
-
if (lower.includes("uncorrectable")) return "critical";
|
|
135
|
-
if (lower.includes("failure detected")) return "critical";
|
|
136
|
-
if (lower.includes("ac lost")) return "critical";
|
|
137
|
-
if (lower.includes("hard reset")) return "critical";
|
|
138
|
-
if (lower.includes("power off")) return "critical";
|
|
139
|
-
if (lower.includes("critical")) return "critical";
|
|
140
|
-
if (lower.includes("non-recoverable")) return "critical";
|
|
141
|
-
if (lower.includes("thermal trip")) return "critical";
|
|
142
|
-
if (lower.includes("processor disabled")) return "critical";
|
|
143
|
-
if (lower.includes("machine check")) return "critical";
|
|
144
|
-
|
|
145
|
-
// Warning events
|
|
146
|
-
if (lower.includes("correctable ecc")) return "warning";
|
|
147
|
-
if (lower.includes("logging limit")) return "warning";
|
|
148
|
-
if (lower.includes("lower critical going low")) return "warning";
|
|
149
|
-
if (lower.includes("upper critical going high")) return "warning";
|
|
150
|
-
if (lower.includes("redundancy lost")) return "warning";
|
|
151
|
-
if (lower.includes("predictive failure")) return "warning";
|
|
152
|
-
if (lower.includes("degraded")) return "warning";
|
|
153
|
-
|
|
154
|
-
// Info events
|
|
155
|
-
if (lower.includes("presence detected")) return "info";
|
|
156
|
-
if (lower.includes("power cycle")) return "info";
|
|
157
|
-
if (lower.includes("oem")) return "info";
|
|
158
|
-
|
|
159
|
-
if (["memory", "power", "fan", "processor"].includes(sensorType)) return "warning";
|
|
160
|
-
return "info";
|
|
161
|
-
}
|
|
162
|
-
|
|
163
|
-
async function collectFanStatus(): Promise<FanStatus[]> {
|
|
164
|
-
const output = await run("ipmitool", ["sdr", "type", "Fan"]);
|
|
165
|
-
if (!output) return [];
|
|
166
|
-
return parseFanStatus(output);
|
|
167
|
-
}
|
|
168
|
-
|
|
169
|
-
export function parseFanStatus(output: string): FanStatus[] {
|
|
170
|
-
const fans: FanStatus[] = [];
|
|
171
|
-
const lines = output.trim().split("\n");
|
|
172
|
-
|
|
173
|
-
for (const line of lines) {
|
|
174
|
-
const parts = line.split("|").map((s) => s.trim());
|
|
175
|
-
if (parts.length < 3) continue;
|
|
176
|
-
|
|
177
|
-
const name = parts[0];
|
|
178
|
-
const fullLine = parts.join(" ");
|
|
179
|
-
|
|
180
|
-
let rpm = 0;
|
|
181
|
-
let status = "ok";
|
|
182
|
-
|
|
183
|
-
// Search all fields for RPM value (format varies by BMC)
|
|
184
|
-
const rpmMatch = fullLine.match(/(\d+)\s*RPM/i);
|
|
185
|
-
if (rpmMatch) {
|
|
186
|
-
rpm = parseInt(rpmMatch[1]);
|
|
187
|
-
}
|
|
188
|
-
|
|
189
|
-
// Check status codes across all fields
|
|
190
|
-
const hasNoReading = fullLine.toLowerCase().includes("no reading");
|
|
191
|
-
const statusCodes = parts.slice(1).map((p) => p.toLowerCase());
|
|
192
|
-
const hasCritical = statusCodes.some((s) => s === "cr" || s === "nr");
|
|
193
|
-
const hasWarning = statusCodes.some((s) => s === "nc");
|
|
194
|
-
const hasAbsent = statusCodes.some((s) => s === "ns") || hasNoReading;
|
|
195
|
-
const hasOk = statusCodes.some((s) => s === "ok");
|
|
196
|
-
|
|
197
|
-
if (hasCritical) status = "critical";
|
|
198
|
-
else if (hasWarning) status = "warning";
|
|
199
|
-
else if (hasAbsent) status = "absent";
|
|
200
|
-
else if (hasOk) status = "ok";
|
|
201
|
-
else if (rpm === 0 && !hasNoReading) status = "critical";
|
|
202
|
-
|
|
203
|
-
fans.push({ name, rpm, status });
|
|
204
|
-
}
|
|
205
|
-
|
|
206
|
-
return fans;
|
|
207
|
-
}
|