@glassmkr/crucible 0.7.1 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/alerts/__tests__/rules.test.d.ts +1 -0
- package/dist/alerts/__tests__/rules.test.js +325 -0
- package/dist/alerts/__tests__/rules.test.js.map +1 -0
- package/dist/alerts/rules.d.ts +8 -0
- package/dist/alerts/rules.js +139 -32
- package/dist/alerts/rules.js.map +1 -1
- package/dist/api.d.ts +2 -0
- package/dist/api.js +7 -0
- package/dist/api.js.map +1 -0
- package/dist/collect/__tests__/dmi.test.d.ts +1 -0
- package/dist/collect/__tests__/dmi.test.js +114 -0
- package/dist/collect/__tests__/dmi.test.js.map +1 -0
- package/dist/collect/__tests__/ipmi.test.js +47 -1
- package/dist/collect/__tests__/ipmi.test.js.map +1 -1
- package/dist/collect/__tests__/thermal.test.d.ts +1 -0
- package/dist/collect/__tests__/thermal.test.js +164 -0
- package/dist/collect/__tests__/thermal.test.js.map +1 -0
- package/dist/collect/dmi.d.ts +19 -0
- package/dist/collect/dmi.js +109 -0
- package/dist/collect/dmi.js.map +1 -0
- package/dist/collect/ipmi.d.ts +27 -2
- package/dist/collect/ipmi.js +90 -2
- package/dist/collect/ipmi.js.map +1 -1
- package/dist/collect/thermal.d.ts +10 -0
- package/dist/collect/thermal.js +187 -0
- package/dist/collect/thermal.js.map +1 -0
- package/dist/config.d.ts +10 -0
- package/dist/config.js +2 -0
- package/dist/config.js.map +1 -1
- package/dist/index.js +51 -1
- package/dist/index.js.map +1 -1
- package/dist/lib/__tests__/capability.test.d.ts +1 -0
- package/dist/lib/__tests__/capability.test.js +87 -0
- package/dist/lib/__tests__/capability.test.js.map +1 -0
- package/dist/lib/__tests__/vendor-sensors.test.d.ts +1 -0
- package/dist/lib/__tests__/vendor-sensors.test.js +49 -0
- package/dist/lib/__tests__/vendor-sensors.test.js.map +1 -0
- package/dist/lib/capability.d.ts +21 -0
- package/dist/lib/capability.js +110 -0
- package/dist/lib/capability.js.map +1 -0
- package/dist/lib/cpu-thermal-chips.d.ts +2 -0
- package/dist/lib/cpu-thermal-chips.js +28 -0
- package/dist/lib/cpu-thermal-chips.js.map +1 -0
- package/dist/lib/types.d.ts +58 -0
- package/dist/lib/vendor-sensors.d.ts +27 -0
- package/dist/lib/vendor-sensors.js +63 -0
- package/dist/lib/vendor-sensors.js.map +1 -0
- package/dist/notify/telegram.js +1 -1
- package/dist/notify/telegram.js.map +1 -1
- package/package.json +16 -1
- package/rule-ids.json +29 -0
- package/.dockerignore +0 -13
- package/.github/ISSUE_TEMPLATE/bug_report.md +0 -24
- package/.github/ISSUE_TEMPLATE/no_data.md +0 -26
- package/.github/workflows/docker.yml +0 -53
- package/.github/workflows/publish.yml +0 -25
- package/Dockerfile +0 -59
- package/config/collector.example.yaml +0 -43
- package/docker-compose.yml +0 -26
- package/scripts/sign-release.sh +0 -29
- package/src/__tests__/cli.test.ts +0 -74
- package/src/__tests__/reboot-marker.test.ts +0 -122
- package/src/alerts/evaluator.ts +0 -15
- package/src/alerts/rules.ts +0 -283
- package/src/alerts/state.ts +0 -92
- package/src/cli.ts +0 -112
- package/src/collect/__tests__/ipmi.test.ts +0 -96
- package/src/collect/__tests__/smart.test.ts +0 -68
- package/src/collect/__tests__/system.test.ts +0 -29
- package/src/collect/__tests__/zfs.test.ts +0 -72
- package/src/collect/conntrack.ts +0 -27
- package/src/collect/cpu.ts +0 -92
- package/src/collect/disks.ts +0 -91
- package/src/collect/fd.ts +0 -31
- package/src/collect/io-errors.ts +0 -23
- package/src/collect/io-latency.ts +0 -103
- package/src/collect/ipmi.ts +0 -207
- package/src/collect/memory.ts +0 -30
- package/src/collect/network.ts +0 -193
- package/src/collect/ntp.ts +0 -114
- package/src/collect/os-alerts.ts +0 -43
- package/src/collect/raid.ts +0 -40
- package/src/collect/security.ts +0 -268
- package/src/collect/smart.ts +0 -72
- package/src/collect/system.ts +0 -32
- package/src/collect/systemd.ts +0 -33
- package/src/collect/zfs.ts +0 -66
- package/src/config.ts +0 -65
- package/src/index.ts +0 -221
- package/src/lib/__tests__/parse.test.ts +0 -28
- package/src/lib/exec.ts +0 -16
- package/src/lib/parse.ts +0 -29
- package/src/lib/reboot-marker.ts +0 -88
- package/src/lib/types.ts +0 -226
- package/src/lib/version-check.ts +0 -39
- package/src/lib/version.ts +0 -33
- package/src/metrics-server.ts +0 -123
- package/src/notify/email.ts +0 -69
- package/src/notify/slack.ts +0 -47
- package/src/notify/telegram.ts +0 -65
- package/src/push/forge.ts +0 -109
- package/tsconfig.json +0 -15
- package/vitest.config.ts +0 -12
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,325 @@
|
|
|
1
|
+
import { describe, it, expect } from "vitest";
|
|
2
|
+
import { allRules } from "../rules.js";
|
|
3
|
+
const baseThresholds = {
|
|
4
|
+
ram_percent: 90,
|
|
5
|
+
swap_alert: true,
|
|
6
|
+
disk_percent: 85,
|
|
7
|
+
iowait_percent: 20,
|
|
8
|
+
nvme_wear_percent: 85,
|
|
9
|
+
disk_latency_nvme_ms: 50,
|
|
10
|
+
disk_latency_hdd_ms: 200,
|
|
11
|
+
cpu_temp_warning_c: 80,
|
|
12
|
+
cpu_temp_critical_c: 90,
|
|
13
|
+
interface_utilization_percent: 90,
|
|
14
|
+
};
|
|
15
|
+
function emptySnap() {
|
|
16
|
+
return {
|
|
17
|
+
collector_version: "test",
|
|
18
|
+
timestamp: "2026-01-01T00:00:00Z",
|
|
19
|
+
system: { hostname: "h", ip: "1.2.3.4", os: "linux", kernel: "6.0", uptime_seconds: 1000 },
|
|
20
|
+
cpu: { user_percent: 0, system_percent: 0, iowait_percent: 0, idle_percent: 100, load_1m: 0, load_5m: 0, load_15m: 0 },
|
|
21
|
+
memory: { total_mb: 16384, used_mb: 1000, available_mb: 15000, swap_total_mb: 0, swap_used_mb: 0 },
|
|
22
|
+
disks: [],
|
|
23
|
+
smart: [],
|
|
24
|
+
network: [],
|
|
25
|
+
raid: [],
|
|
26
|
+
ipmi: { available: false, sensors: [], ecc_errors: { correctable: 0, uncorrectable: 0 }, sel_entries_count: 0, sel_events_recent: [], fans: [] },
|
|
27
|
+
os_alerts: { oom_kills_recent: 0, zombie_processes: 0, time_drift_ms: 0 },
|
|
28
|
+
};
|
|
29
|
+
}
|
|
30
|
+
const diskLatencyRule = allRules.find(r => r.type === "disk_latency_high");
|
|
31
|
+
const swapHighRule = allRules.find(r => r.type === "swap_high");
|
|
32
|
+
describe("swap_high (formerly swap_active)", () => {
|
|
33
|
+
it("does not fire when no swap is in use", () => {
|
|
34
|
+
const snap = emptySnap();
|
|
35
|
+
expect(swapHighRule.evaluate(snap, baseThresholds)).toEqual([]);
|
|
36
|
+
});
|
|
37
|
+
it("fires warning when swap is in use", () => {
|
|
38
|
+
const snap = emptySnap();
|
|
39
|
+
snap.memory.swap_used_mb = 128;
|
|
40
|
+
const out = swapHighRule.evaluate(snap, baseThresholds);
|
|
41
|
+
expect(out).toHaveLength(1);
|
|
42
|
+
expect(out[0].severity).toBe("warning");
|
|
43
|
+
expect(out[0].type).toBe("swap_high");
|
|
44
|
+
expect(out[0].evidence.swap_used_mb).toBe(128);
|
|
45
|
+
});
|
|
46
|
+
it("respects t.swap_alert=false", () => {
|
|
47
|
+
const snap = emptySnap();
|
|
48
|
+
snap.memory.swap_used_mb = 128;
|
|
49
|
+
expect(swapHighRule.evaluate(snap, { ...baseThresholds, swap_alert: false })).toEqual([]);
|
|
50
|
+
});
|
|
51
|
+
});
|
|
52
|
+
const cpuTempRule = allRules.find(r => r.type === "cpu_temperature_high");
|
|
53
|
+
const eccRule = allRules.find(r => r.type === "ecc_errors");
|
|
54
|
+
const psuRule = allRules.find(r => r.type === "psu_redundancy_loss");
|
|
55
|
+
describe("ecc_errors (Dell-style SEL path)", () => {
|
|
56
|
+
it("fires when SEL ECC counts are higher than named-sensor counts", () => {
|
|
57
|
+
const snap = emptySnap();
|
|
58
|
+
snap.ipmi = {
|
|
59
|
+
available: true, sensors: [],
|
|
60
|
+
ecc_errors: { correctable: 0, uncorrectable: 0 },
|
|
61
|
+
ecc_errors_from_sel: { correctable: 3, uncorrectable: 0, newest_event_timestamp: "2026-04-05T14:31:00Z" },
|
|
62
|
+
sel_entries_count: 3, sel_events_recent: [], fans: [],
|
|
63
|
+
};
|
|
64
|
+
const out = eccRule.evaluate(snap, baseThresholds);
|
|
65
|
+
expect(out).toHaveLength(1);
|
|
66
|
+
expect(out[0].severity).toBe("warning");
|
|
67
|
+
expect(out[0].evidence.source).toBe("ipmi_sel");
|
|
68
|
+
expect(out[0].evidence.correctable).toBe(3);
|
|
69
|
+
});
|
|
70
|
+
it("uses named-sensor source when those counts exceed SEL", () => {
|
|
71
|
+
const snap = emptySnap();
|
|
72
|
+
snap.ipmi = {
|
|
73
|
+
available: true, sensors: [],
|
|
74
|
+
ecc_errors: { correctable: 5, uncorrectable: 0 },
|
|
75
|
+
ecc_errors_from_sel: { correctable: 0, uncorrectable: 0, newest_event_timestamp: null },
|
|
76
|
+
sel_entries_count: 0, sel_events_recent: [], fans: [],
|
|
77
|
+
};
|
|
78
|
+
const out = eccRule.evaluate(snap, baseThresholds);
|
|
79
|
+
expect(out[0].evidence.source).toBe("ipmi_sensors");
|
|
80
|
+
expect(out[0].evidence.correctable).toBe(5);
|
|
81
|
+
});
|
|
82
|
+
it("does not double-count when both sources are populated; uses max", () => {
|
|
83
|
+
const snap = emptySnap();
|
|
84
|
+
snap.ipmi = {
|
|
85
|
+
available: true, sensors: [],
|
|
86
|
+
ecc_errors: { correctable: 2, uncorrectable: 0 },
|
|
87
|
+
ecc_errors_from_sel: { correctable: 5, uncorrectable: 0, newest_event_timestamp: "2026-04-05T14:31:00Z" },
|
|
88
|
+
sel_entries_count: 5, sel_events_recent: [], fans: [],
|
|
89
|
+
};
|
|
90
|
+
const out = eccRule.evaluate(snap, baseThresholds);
|
|
91
|
+
expect(out[0].evidence.correctable).toBe(5); // max(2, 5)
|
|
92
|
+
});
|
|
93
|
+
it("escalates to critical on uncorrectable from SEL", () => {
|
|
94
|
+
const snap = emptySnap();
|
|
95
|
+
snap.ipmi = {
|
|
96
|
+
available: true, sensors: [],
|
|
97
|
+
ecc_errors: { correctable: 0, uncorrectable: 0 },
|
|
98
|
+
ecc_errors_from_sel: { correctable: 0, uncorrectable: 1, newest_event_timestamp: "2026-04-05T14:31:00Z" },
|
|
99
|
+
sel_entries_count: 1, sel_events_recent: [], fans: [],
|
|
100
|
+
};
|
|
101
|
+
const out = eccRule.evaluate(snap, baseThresholds);
|
|
102
|
+
expect(out[0].severity).toBe("critical");
|
|
103
|
+
});
|
|
104
|
+
it("does not fire when both sources are zero", () => {
|
|
105
|
+
const snap = emptySnap();
|
|
106
|
+
snap.ipmi.available = true;
|
|
107
|
+
expect(eccRule.evaluate(snap, baseThresholds)).toEqual([]);
|
|
108
|
+
});
|
|
109
|
+
});
|
|
110
|
+
describe("psu_redundancy_loss (Dell + Supermicro)", () => {
|
|
111
|
+
it("fires from aggregate redundancy state on Dell even if individual PS sensors look OK", () => {
|
|
112
|
+
const snap = emptySnap();
|
|
113
|
+
snap.dmi = { available: true, vendor: "dell", raw_vendor: "Dell Inc.", product_name: "PowerEdge R740", bios_version: "2.21", bios_date: "2024-08-15", is_virtual: false };
|
|
114
|
+
snap.ipmi = {
|
|
115
|
+
available: true,
|
|
116
|
+
sensors: [
|
|
117
|
+
{ name: "PS1 Status", value: "0x01", unit: "discrete", status: "ok" },
|
|
118
|
+
{ name: "PS2 Status", value: "0x01", unit: "discrete", status: "ok" },
|
|
119
|
+
],
|
|
120
|
+
ecc_errors: { correctable: 0, uncorrectable: 0 },
|
|
121
|
+
psu_redundancy_state: "redundancy_lost",
|
|
122
|
+
sel_entries_count: 0, sel_events_recent: [], fans: [],
|
|
123
|
+
};
|
|
124
|
+
const out = psuRule.evaluate(snap, baseThresholds);
|
|
125
|
+
expect(out).toHaveLength(1);
|
|
126
|
+
expect(out[0].evidence.source).toBe("aggregate_sensor");
|
|
127
|
+
expect(out[0].evidence.redundancy_state).toBe("redundancy_lost");
|
|
128
|
+
});
|
|
129
|
+
it("fires from per-PSU status on Dell when redundancy state is not set", () => {
|
|
130
|
+
const snap = emptySnap();
|
|
131
|
+
snap.dmi = { available: true, vendor: "dell", raw_vendor: "Dell Inc.", product_name: "PowerEdge R740", bios_version: null, bios_date: null, is_virtual: false };
|
|
132
|
+
snap.ipmi = {
|
|
133
|
+
available: true,
|
|
134
|
+
sensors: [
|
|
135
|
+
{ name: "PS1 Status", value: "0x01", unit: "discrete", status: "ok" },
|
|
136
|
+
{ name: "PS2 Status", value: "Failure detected", unit: "discrete", status: "failure" },
|
|
137
|
+
],
|
|
138
|
+
ecc_errors: { correctable: 0, uncorrectable: 0 },
|
|
139
|
+
sel_entries_count: 0, sel_events_recent: [], fans: [],
|
|
140
|
+
};
|
|
141
|
+
const out = psuRule.evaluate(snap, baseThresholds);
|
|
142
|
+
expect(out).toHaveLength(1);
|
|
143
|
+
expect(out[0].evidence.source).toBe("per_psu_sensors");
|
|
144
|
+
expect(out[0].evidence.vendor).toBe("dell");
|
|
145
|
+
});
|
|
146
|
+
it("fires on Supermicro PSU1 Status critical (no regression from old behaviour)", () => {
|
|
147
|
+
const snap = emptySnap();
|
|
148
|
+
snap.dmi = { available: true, vendor: "supermicro", raw_vendor: "Supermicro", product_name: "X11", bios_version: null, bios_date: null, is_virtual: false };
|
|
149
|
+
snap.ipmi = {
|
|
150
|
+
available: true,
|
|
151
|
+
sensors: [
|
|
152
|
+
{ name: "PSU1 Status", value: "absent", unit: "discrete", status: "absent" },
|
|
153
|
+
{ name: "PSU2 Status", value: "OK", unit: "discrete", status: "ok" },
|
|
154
|
+
],
|
|
155
|
+
ecc_errors: { correctable: 0, uncorrectable: 0 },
|
|
156
|
+
sel_entries_count: 0, sel_events_recent: [], fans: [],
|
|
157
|
+
};
|
|
158
|
+
const out = psuRule.evaluate(snap, baseThresholds);
|
|
159
|
+
expect(out).toHaveLength(1);
|
|
160
|
+
expect(out[0].evidence.vendor).toBe("supermicro");
|
|
161
|
+
});
|
|
162
|
+
it("does not fire on a VM with no PSU sensors", () => {
|
|
163
|
+
const snap = emptySnap();
|
|
164
|
+
snap.dmi = { available: true, vendor: "virtual", raw_vendor: "QEMU", product_name: "Standard PC", bios_version: null, bios_date: null, is_virtual: true };
|
|
165
|
+
snap.ipmi = {
|
|
166
|
+
available: true, sensors: [], ecc_errors: { correctable: 0, uncorrectable: 0 },
|
|
167
|
+
sel_entries_count: 0, sel_events_recent: [], fans: [],
|
|
168
|
+
};
|
|
169
|
+
expect(psuRule.evaluate(snap, baseThresholds)).toEqual([]);
|
|
170
|
+
});
|
|
171
|
+
it("does not fire when fully redundant on Dell", () => {
|
|
172
|
+
const snap = emptySnap();
|
|
173
|
+
snap.dmi = { available: true, vendor: "dell", raw_vendor: "Dell Inc.", product_name: "PowerEdge R740", bios_version: null, bios_date: null, is_virtual: false };
|
|
174
|
+
snap.ipmi = {
|
|
175
|
+
available: true,
|
|
176
|
+
sensors: [
|
|
177
|
+
{ name: "PS1 Status", value: "0x01", unit: "discrete", status: "ok" },
|
|
178
|
+
{ name: "PS2 Status", value: "0x01", unit: "discrete", status: "ok" },
|
|
179
|
+
],
|
|
180
|
+
ecc_errors: { correctable: 0, uncorrectable: 0 },
|
|
181
|
+
psu_redundancy_state: "fully_redundant",
|
|
182
|
+
sel_entries_count: 0, sel_events_recent: [], fans: [],
|
|
183
|
+
};
|
|
184
|
+
expect(psuRule.evaluate(snap, baseThresholds)).toEqual([]);
|
|
185
|
+
});
|
|
186
|
+
});
|
|
187
|
+
describe("cpu_temperature_high (hwmon path)", () => {
|
|
188
|
+
it("fires from hwmon when no IPMI is available (Pi)", () => {
|
|
189
|
+
const snap = emptySnap();
|
|
190
|
+
snap.thermal = {
|
|
191
|
+
available: true, source: "hwmon",
|
|
192
|
+
cpu_readings: [{ label: "cpu_thermal temp1", value_celsius: 85, source_chip: "cpu_thermal", source: "hwmon" }],
|
|
193
|
+
other_readings: [], max_cpu_celsius: 85,
|
|
194
|
+
};
|
|
195
|
+
const out = cpuTempRule.evaluate(snap, baseThresholds);
|
|
196
|
+
expect(out).toHaveLength(1);
|
|
197
|
+
expect(out[0].severity).toBe("warning");
|
|
198
|
+
expect(out[0].evidence.source).toBe("hwmon");
|
|
199
|
+
expect(out[0].evidence.chip).toBe("cpu_thermal");
|
|
200
|
+
});
|
|
201
|
+
it("fires critical when value_celsius >= cpu_temp_critical_c", () => {
|
|
202
|
+
const snap = emptySnap();
|
|
203
|
+
snap.thermal = {
|
|
204
|
+
available: true, source: "hwmon",
|
|
205
|
+
cpu_readings: [{ label: "coretemp Package id 0", value_celsius: 95, source_chip: "coretemp", source: "hwmon" }],
|
|
206
|
+
other_readings: [], max_cpu_celsius: 95,
|
|
207
|
+
};
|
|
208
|
+
const out = cpuTempRule.evaluate(snap, baseThresholds);
|
|
209
|
+
expect(out[0].severity).toBe("critical");
|
|
210
|
+
});
|
|
211
|
+
it("does not fire when max_cpu_celsius is null (VM)", () => {
|
|
212
|
+
const snap = emptySnap();
|
|
213
|
+
snap.thermal = { available: true, source: "none", cpu_readings: [], other_readings: [], max_cpu_celsius: null };
|
|
214
|
+
expect(cpuTempRule.evaluate(snap, baseThresholds)).toEqual([]);
|
|
215
|
+
});
|
|
216
|
+
it("falls back to IPMI substring filter when hwmon is unavailable", () => {
|
|
217
|
+
const snap = emptySnap();
|
|
218
|
+
snap.ipmi = {
|
|
219
|
+
available: true, ecc_errors: { correctable: 0, uncorrectable: 0 },
|
|
220
|
+
sel_entries_count: 0, sel_events_recent: [], fans: [],
|
|
221
|
+
sensors: [{ name: "CPU1 Temp", value: 85, unit: "degrees C", status: "ok" }],
|
|
222
|
+
};
|
|
223
|
+
const out = cpuTempRule.evaluate(snap, baseThresholds);
|
|
224
|
+
expect(out).toHaveLength(1);
|
|
225
|
+
expect(out[0].evidence.source).toBe("ipmi");
|
|
226
|
+
});
|
|
227
|
+
it("hwmon takes priority over IPMI when both are present", () => {
|
|
228
|
+
const snap = emptySnap();
|
|
229
|
+
snap.thermal = {
|
|
230
|
+
available: true, source: "hwmon",
|
|
231
|
+
cpu_readings: [{ label: "coretemp Package id 0", value_celsius: 88, source_chip: "coretemp", source: "hwmon" }],
|
|
232
|
+
other_readings: [], max_cpu_celsius: 88,
|
|
233
|
+
};
|
|
234
|
+
snap.ipmi = {
|
|
235
|
+
available: true, ecc_errors: { correctable: 0, uncorrectable: 0 },
|
|
236
|
+
sel_entries_count: 0, sel_events_recent: [], fans: [],
|
|
237
|
+
sensors: [{ name: "CPU1 Temp", value: 99, unit: "degrees C", status: "ok" }],
|
|
238
|
+
};
|
|
239
|
+
const out = cpuTempRule.evaluate(snap, baseThresholds);
|
|
240
|
+
expect(out).toHaveLength(1);
|
|
241
|
+
expect(out[0].evidence.source).toBe("hwmon");
|
|
242
|
+
expect(out[0].evidence.value).toBe(88);
|
|
243
|
+
});
|
|
244
|
+
});
|
|
245
|
+
describe("disk_latency_high", () => {
|
|
246
|
+
it("does not fire when io_latency is missing", () => {
|
|
247
|
+
const snap = emptySnap();
|
|
248
|
+
expect(diskLatencyRule.evaluate(snap, baseThresholds)).toEqual([]);
|
|
249
|
+
});
|
|
250
|
+
it("does not fire when io_latency is empty", () => {
|
|
251
|
+
const snap = emptySnap();
|
|
252
|
+
snap.io_latency = [];
|
|
253
|
+
expect(diskLatencyRule.evaluate(snap, baseThresholds)).toEqual([]);
|
|
254
|
+
});
|
|
255
|
+
it("does not fire on healthy NVMe (1ms)", () => {
|
|
256
|
+
const snap = emptySnap();
|
|
257
|
+
snap.io_latency = [
|
|
258
|
+
{ device: "nvme0n1", avg_read_latency_ms: 1, avg_write_latency_ms: 0.5, read_iops: 100, write_iops: 50 },
|
|
259
|
+
];
|
|
260
|
+
expect(diskLatencyRule.evaluate(snap, baseThresholds)).toEqual([]);
|
|
261
|
+
});
|
|
262
|
+
it("fires on hot NVMe (60ms read on default 50ms threshold)", () => {
|
|
263
|
+
const snap = emptySnap();
|
|
264
|
+
snap.io_latency = [
|
|
265
|
+
{ device: "nvme0n1", avg_read_latency_ms: 60, avg_write_latency_ms: 1, read_iops: 100, write_iops: 50 },
|
|
266
|
+
];
|
|
267
|
+
const out = diskLatencyRule.evaluate(snap, baseThresholds);
|
|
268
|
+
expect(out).toHaveLength(1);
|
|
269
|
+
expect(out[0].severity).toBe("warning");
|
|
270
|
+
expect(out[0].evidence.device).toBe("nvme0n1");
|
|
271
|
+
expect(out[0].evidence.threshold_ms).toBe(50);
|
|
272
|
+
});
|
|
273
|
+
it("fires on hot SATA HDD (250ms write on default 200ms threshold)", () => {
|
|
274
|
+
const snap = emptySnap();
|
|
275
|
+
snap.io_latency = [
|
|
276
|
+
{ device: "sda", avg_read_latency_ms: 10, avg_write_latency_ms: 250, read_iops: 5, write_iops: 20 },
|
|
277
|
+
];
|
|
278
|
+
const out = diskLatencyRule.evaluate(snap, baseThresholds);
|
|
279
|
+
expect(out).toHaveLength(1);
|
|
280
|
+
expect(out[0].evidence.device).toBe("sda");
|
|
281
|
+
expect(out[0].evidence.threshold_ms).toBe(200);
|
|
282
|
+
});
|
|
283
|
+
it("does not fire on borderline-cold SATA (180ms vs 200ms threshold)", () => {
|
|
284
|
+
const snap = emptySnap();
|
|
285
|
+
snap.io_latency = [
|
|
286
|
+
{ device: "sda", avg_read_latency_ms: 180, avg_write_latency_ms: 50, read_iops: 5, write_iops: 5 },
|
|
287
|
+
];
|
|
288
|
+
expect(diskLatencyRule.evaluate(snap, baseThresholds)).toEqual([]);
|
|
289
|
+
});
|
|
290
|
+
it("fires once per hot device when multiple devices present", () => {
|
|
291
|
+
const snap = emptySnap();
|
|
292
|
+
snap.io_latency = [
|
|
293
|
+
{ device: "nvme0n1", avg_read_latency_ms: 1, avg_write_latency_ms: 1, read_iops: 100, write_iops: 50 }, // healthy
|
|
294
|
+
{ device: "sda", avg_read_latency_ms: 300, avg_write_latency_ms: 10, read_iops: 5, write_iops: 5 }, // hot HDD
|
|
295
|
+
{ device: "nvme1n1", avg_read_latency_ms: 80, avg_write_latency_ms: 0.5, read_iops: 100, write_iops: 50 }, // hot NVMe
|
|
296
|
+
];
|
|
297
|
+
const out = diskLatencyRule.evaluate(snap, baseThresholds);
|
|
298
|
+
expect(out).toHaveLength(2);
|
|
299
|
+
expect(out.map(a => a.evidence.device).sort()).toEqual(["nvme1n1", "sda"]);
|
|
300
|
+
});
|
|
301
|
+
it("skips devices with zero IOPS over the interval (no samples)", () => {
|
|
302
|
+
const snap = emptySnap();
|
|
303
|
+
snap.io_latency = [
|
|
304
|
+
{ device: "sda", avg_read_latency_ms: null, avg_write_latency_ms: null, read_iops: 0, write_iops: 0 },
|
|
305
|
+
];
|
|
306
|
+
expect(diskLatencyRule.evaluate(snap, baseThresholds)).toEqual([]);
|
|
307
|
+
});
|
|
308
|
+
});
|
|
309
|
+
describe("ALL_RULE_IDS export sync", () => {
|
|
310
|
+
it("matches the actual rule definitions in allRules", async () => {
|
|
311
|
+
const { ALL_RULE_IDS } = await import("../rules.js");
|
|
312
|
+
const idsFromArray = allRules.map(r => r.type);
|
|
313
|
+
expect([...ALL_RULE_IDS]).toEqual(idsFromArray);
|
|
314
|
+
});
|
|
315
|
+
it("matches the static rule-ids.json file (npm-published metadata)", async () => {
|
|
316
|
+
const { ALL_RULE_IDS } = await import("../rules.js");
|
|
317
|
+
const fs = await import("node:fs/promises");
|
|
318
|
+
const url = await import("node:url");
|
|
319
|
+
const path = await import("node:path");
|
|
320
|
+
const here = path.dirname(url.fileURLToPath(import.meta.url));
|
|
321
|
+
const json = JSON.parse(await fs.readFile(path.resolve(here, "../../../rule-ids.json"), "utf-8"));
|
|
322
|
+
expect(json.rule_ids).toEqual([...ALL_RULE_IDS]);
|
|
323
|
+
});
|
|
324
|
+
});
|
|
325
|
+
//# sourceMappingURL=rules.test.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"rules.test.js","sourceRoot":"","sources":["../../../src/alerts/__tests__/rules.test.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,EAAE,EAAE,MAAM,EAAE,MAAM,QAAQ,CAAC;AAC9C,OAAO,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AAGvC,MAAM,cAAc,GAAG;IACrB,WAAW,EAAE,EAAE;IACf,UAAU,EAAE,IAAI;IAChB,YAAY,EAAE,EAAE;IAChB,cAAc,EAAE,EAAE;IAClB,iBAAiB,EAAE,EAAE;IACrB,oBAAoB,EAAE,EAAE;IACxB,mBAAmB,EAAE,GAAG;IACxB,kBAAkB,EAAE,EAAE;IACtB,mBAAmB,EAAE,EAAE;IACvB,6BAA6B,EAAE,EAAE;CAClC,CAAC;AAEF,SAAS,SAAS;IAChB,OAAO;QACL,iBAAiB,EAAE,MAAM;QACzB,SAAS,EAAE,sBAAsB;QACjC,MAAM,EAAE,EAAE,QAAQ,EAAE,GAAG,EAAE,EAAE,EAAE,SAAS,EAAE,EAAE,EAAE,OAAO,EAAE,MAAM,EAAE,KAAK,EAAE,cAAc,EAAE,IAAI,EAAE;QAC1F,GAAG,EAAE,EAAE,YAAY,EAAE,CAAC,EAAE,cAAc,EAAE,CAAC,EAAE,cAAc,EAAE,CAAC,EAAE,YAAY,EAAE,GAAG,EAAE,OAAO,EAAE,CAAC,EAAE,OAAO,EAAE,CAAC,EAAE,QAAQ,EAAE,CAAC,EAAE;QACtH,MAAM,EAAE,EAAE,QAAQ,EAAE,KAAK,EAAE,OAAO,EAAE,IAAI,EAAE,YAAY,EAAE,KAAK,EAAE,aAAa,EAAE,CAAC,EAAE,YAAY,EAAE,CAAC,EAAE;QAClG,KAAK,EAAE,EAAE;QACT,KAAK,EAAE,EAAE;QACT,OAAO,EAAE,EAAE;QACX,IAAI,EAAE,EAAE;QACR,IAAI,EAAE,EAAE,SAAS,EAAE,KAAK,EAAE,OAAO,EAAE,EAAE,EAAE,UAAU,EAAE,EAAE,WAAW,EAAE,CAAC,EAAE,aAAa,EAAE,CAAC,EAAE,EAAE,iBAAiB,EAAE,CAAC,EAAE,iBAAiB,EAAE,EAAE,EAAE,IAAI,EAAE,EAAE,EAAE;QAChJ,SAAS,EAAE,EAAE,gBAAgB,EAAE,CAAC,EAAE,gBAAgB,EAAE,CAAC,EAAE,aAAa,EAAE,CAAC,EAAE;KAC1E,CAAC;AACJ,CAAC;AAED,MAAM,eAAe,GAAG,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,mBAAmB,CAAE,CAAC;AAC5E,MAAM,YAAY,GAAG,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,WAAW,CAAE,CAAC;AAEjE,QAAQ,CAAC,kCAAkC,EAAE,GAAG,EAAE;IAChD,EAAE,CAAC,sCAAsC,EAAE,GAAG,EAAE;QAC9C,MAAM,IAAI,GAAG,SAAS,EAAE,CAAC;QACzB,MAAM,CAAC,YAAY,CAAC,QAAQ,CAAC,IAAI,EAAE,cAAc,CAAC,CAAC,CAAC,OAAO,CAAC,EAAE,CAAC,CAAC;IAClE,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,mCAAmC,EAAE,GAAG,EAAE;QAC3C,MAAM,IAAI,GAAG,SAAS,EAAE,CAAC;QACzB,IAAI,CAAC,MAAM,CAAC,YAAY,GAAG,GAAG,CAAC;QAC/B,MAAM,GAAG,GAAG,YAAY,CAAC,QAAQ,CAAC,IAAI,EAAE,cAAc,CAAC,CAAC;QACxD,MAAM,CAAC,GAAG,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;QAC5B,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;QACxC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,WAAW,CAAC,CAAC;QACtC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,YAAY,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;IACjD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,6BAA6B,EAAE,GAAG,EAAE;QACrC,MAAM,IAAI,GAAG,SAAS,EAAE,CAAC;QACzB,IAAI,CAAC,MAAM,CAAC,YAAY,GAAG,GAAG,CAAC;QAC/B,MAAM,CAAC,YAAY,CAAC,QAAQ,CAAC,IAAI,EAAE,EAAE,GAAG,cAAc,EAAE,UAAU,EAAE,KAAK,EAAE,CAAC,CAAC,CAAC,OAAO,CAAC,EAAE,CAAC,CAAC;IAC5F,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC;AACH,MAAM,WAAW,GAAG,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,sBAAsB,CAAE,CAAC;AAC3E,MAAM,OAAO,GAAG,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,YAAY,CAAE,CAAC;AAC7D,MAAM,OAAO,GAAG,QAAQ,CAAC,IAAI,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,KAAK,qBAAqB,CAAE,CAAC;AAEtE,QAAQ,CAAC,kCAAkC,EAAE,GAAG,EAAE;IAChD,EAAE,CAAC,+DAA+D,EAAE,GAAG,EAAE;QACvE,MAAM,IAAI,GAAG,SAAS,EAAE,CAAC;QACzB,IAAI,CAAC,IAAI,GAAG;YACV,SAAS,EAAE,IAAI,EAAE,OAAO,EAAE,EAAE;YAC5B,UAAU,EAAE,EAAE,WAAW,EAAE,CAAC,EAAE,aAAa,EAAE,CAAC,EAAE;YAChD,mBAAmB,EAAE,EAAE,WAAW,EAAE,CAAC,EAAE,aAAa,EAAE,CAAC,EAAE,sBAAsB,EAAE,sBAAsB,EAAE;YACzG,iBAAiB,EAAE,CAAC,EAAE,iBAAiB,EAAE,EAAE,EAAE,IAAI,EAAE,EAAE;SACtD,CAAC;QACF,MAAM,GAAG,GAAG,OAAO,CAAC,QAAQ,CAAC,IAAI,EAAE,cAAc,CAAC,CAAC;QACnD,MAAM,CAAC,GAAG,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;QAC5B,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;QACxC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;QAChD,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,WAAW,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAC9C,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,uDAAuD,EAAE,GAAG,EAAE;QAC/D,MAAM,IAAI,GAAG,SAAS,EAAE,CAAC;QACzB,IAAI,CAAC,IAAI,GAAG;YACV,SAAS,EAAE,IAAI,EAAE,OAAO,EAAE,EAAE;YAC5B,UAAU,EAAE,EAAE,WAAW,EAAE,CAAC,EAAE,aAAa,EAAE,CAAC,EAAE;YAChD,mBAAmB,EAAE,EAAE,WAAW,EAAE,CAAC,EAAE,aAAa,EAAE,CAAC,EAAE,sBAAsB,EAAE,IAAI,EAAE;YACvF,iBAAiB,EAAE,CAAC,EAAE,iBAAiB,EAAE,EAAE,EAAE,IAAI,EAAE,EAAE;SACtD,CAAC;QACF,MAAM,GAAG,GAAG,OAAO,CAAC,QAAQ,CAAC,IAAI,EAAE,cAAc,CAAC,CAAC;QACnD,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,cAAc,CAAC,CAAC;QACpD,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,WAAW,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC;IAC9C,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,iEAAiE,EAAE,GAAG,EAAE;QACzE,MAAM,IAAI,GAAG,SAAS,EAAE,CAAC;QACzB,IAAI,CAAC,IAAI,GAAG;YACV,SAAS,EAAE,IAAI,EAAE,OAAO,EAAE,EAAE;YAC5B,UAAU,EAAE,EAAE,WAAW,EAAE,CAAC,EAAE,aAAa,EAAE,CAAC,EAAE;YAChD,mBAAmB,EAAE,EAAE,WAAW,EAAE,CAAC,EAAE,aAAa,EAAE,CAAC,EAAE,sBAAsB,EAAE,sBAAsB,EAAE;YACzG,iBAAiB,EAAE,CAAC,EAAE,iBAAiB,EAAE,EAAE,EAAE,IAAI,EAAE,EAAE;SACtD,CAAC;QACF,MAAM,GAAG,GAAG,OAAO,CAAC,QAAQ,CAAC,IAAI,EAAE,cAAc,CAAC,CAAC;QACnD,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,WAAW,CAAC,CAAC,IAAI,CAAC,CAAC,CAAC,CAAC,CAAC,YAAY;IAC3D,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,iDAAiD,EAAE,GAAG,EAAE;QACzD,MAAM,IAAI,GAAG,SAAS,EAAE,CAAC;QACzB,IAAI,CAAC,IAAI,GAAG;YACV,SAAS,EAAE,IAAI,EAAE,OAAO,EAAE,EAAE;YAC5B,UAAU,EAAE,EAAE,WAAW,EAAE,CAAC,EAAE,aAAa,EAAE,CAAC,EAAE;YAChD,mBAAmB,EAAE,EAAE,WAAW,EAAE,CAAC,EAAE,aAAa,EAAE,CAAC,EAAE,sBAAsB,EAAE,sBAAsB,EAAE;YACzG,iBAAiB,EAAE,CAAC,EAAE,iBAAiB,EAAE,EAAE,EAAE,IAAI,EAAE,EAAE;SACtD,CAAC;QACF,MAAM,GAAG,GAAG,OAAO,CAAC,QAAQ,CAAC,IAAI,EAAE,cAAc,CAAC,CAAC;QACnD,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;IAC3C,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,0CAA0C,EAAE,GAAG,EAAE;QAClD,MAAM,IAAI,GAAG,SAAS,EAAE,CAAC;QACzB,IAAI,CAAC,IAAI,CAAC,SAAS,GAAG,IAAI,CAAC;QAC3B,MAAM,CAAC,OAAO,CAAC,QAAQ,CAAC,IAAI,EAAE,cAAc,CAAC,CAAC,CAAC,OAAO,CAAC,EAAE,CAAC,CAAC;IAC7D,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC;AAEH,QAAQ,CAAC,yCAAyC,EAAE,GAAG,EAAE;IACvD,EAAE,CAAC,qFAAqF,EAAE,GAAG,EAAE;QAC7F,MAAM,IAAI,GAAG,SAAS,EAAE,CAAC;QACzB,IAAI,CAAC,GAAG,GAAG,EAAE,SAAS,EAAE,IAAI,EAAE,MAAM,EAAE,MAAM,EAAE,UAAU,EAAE,WAAW,EAAE,YAAY,EAAE,gBAAgB,EAAE,YAAY,EAAE,MAAM,EAAE,SAAS,EAAE,YAAY,EAAE,UAAU,EAAE,KAAK,EAAE,CAAC;QAC1K,IAAI,CAAC,IAAI,GAAG;YACV,SAAS,EAAE,IAAI;YACf,OAAO,EAAE;gBACP,EAAE,IAAI,EAAE,YAAY,EAAE,KAAK,EAAE,MAAM,EAAE,IAAI,EAAE,UAAU,EAAE,MAAM,EAAE,IAAI,EAAE;gBACrE,EAAE,IAAI,EAAE,YAAY,EAAE,KAAK,EAAE,MAAM,EAAE,IAAI,EAAE,UAAU,EAAE,MAAM,EAAE,IAAI,EAAE;aACtE;YACD,UAAU,EAAE,EAAE,WAAW,EAAE,CAAC,EAAE,aAAa,EAAE,CAAC,EAAE;YAChD,oBAAoB,EAAE,iBAAiB;YACvC,iBAAiB,EAAE,CAAC,EAAE,iBAAiB,EAAE,EAAE,EAAE,IAAI,EAAE,EAAE;SACtD,CAAC;QACF,MAAM,GAAG,GAAG,OAAO,CAAC,QAAQ,CAAC,IAAI,EAAE,cAAc,CAAC,CAAC;QACnD,MAAM,CAAC,GAAG,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;QAC5B,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,kBAAkB,CAAC,CAAC;QACxD,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,gBAAgB,CAAC,CAAC,IAAI,CAAC,iBAAiB,CAAC,CAAC;IACnE,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,oEAAoE,EAAE,GAAG,EAAE;QAC5E,MAAM,IAAI,GAAG,SAAS,EAAE,CAAC;QACzB,IAAI,CAAC,GAAG,GAAG,EAAE,SAAS,EAAE,IAAI,EAAE,MAAM,EAAE,MAAM,EAAE,UAAU,EAAE,WAAW,EAAE,YAAY,EAAE,gBAAgB,EAAE,YAAY,EAAE,IAAI,EAAE,SAAS,EAAE,IAAI,EAAE,UAAU,EAAE,KAAK,EAAE,CAAC;QAChK,IAAI,CAAC,IAAI,GAAG;YACV,SAAS,EAAE,IAAI;YACf,OAAO,EAAE;gBACP,EAAE,IAAI,EAAE,YAAY,EAAE,KAAK,EAAE,MAAM,EAAE,IAAI,EAAE,UAAU,EAAE,MAAM,EAAE,IAAI,EAAE;gBACrE,EAAE,IAAI,EAAE,YAAY,EAAE,KAAK,EAAE,kBAAkB,EAAE,IAAI,EAAE,UAAU,EAAE,MAAM,EAAE,SAAS,EAAE;aACvF;YACD,UAAU,EAAE,EAAE,WAAW,EAAE,CAAC,EAAE,aAAa,EAAE,CAAC,EAAE;YAChD,iBAAiB,EAAE,CAAC,EAAE,iBAAiB,EAAE,EAAE,EAAE,IAAI,EAAE,EAAE;SACtD,CAAC;QACF,MAAM,GAAG,GAAG,OAAO,CAAC,QAAQ,CAAC,IAAI,EAAE,cAAc,CAAC,CAAC;QACnD,MAAM,CAAC,GAAG,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;QAC5B,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,iBAAiB,CAAC,CAAC;QACvD,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;IAC9C,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,6EAA6E,EAAE,GAAG,EAAE;QACrF,MAAM,IAAI,GAAG,SAAS,EAAE,CAAC;QACzB,IAAI,CAAC,GAAG,GAAG,EAAE,SAAS,EAAE,IAAI,EAAE,MAAM,EAAE,YAAY,EAAE,UAAU,EAAE,YAAY,EAAE,YAAY,EAAE,KAAK,EAAE,YAAY,EAAE,IAAI,EAAE,SAAS,EAAE,IAAI,EAAE,UAAU,EAAE,KAAK,EAAE,CAAC;QAC5J,IAAI,CAAC,IAAI,GAAG;YACV,SAAS,EAAE,IAAI;YACf,OAAO,EAAE;gBACP,EAAE,IAAI,EAAE,aAAa,EAAE,KAAK,EAAE,QAAQ,EAAE,IAAI,EAAE,UAAU,EAAE,MAAM,EAAE,QAAQ,EAAE;gBAC5E,EAAE,IAAI,EAAE,aAAa,EAAE,KAAK,EAAE,IAAI,EAAE,IAAI,EAAE,UAAU,EAAE,MAAM,EAAE,IAAI,EAAE;aACrE;YACD,UAAU,EAAE,EAAE,WAAW,EAAE,CAAC,EAAE,aAAa,EAAE,CAAC,EAAE;YAChD,iBAAiB,EAAE,CAAC,EAAE,iBAAiB,EAAE,EAAE,EAAE,IAAI,EAAE,EAAE;SACtD,CAAC;QACF,MAAM,GAAG,GAAG,OAAO,CAAC,QAAQ,CAAC,IAAI,EAAE,cAAc,CAAC,CAAC;QACnD,MAAM,CAAC,GAAG,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;QAC5B,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,YAAY,CAAC,CAAC;IACpD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,2CAA2C,EAAE,GAAG,EAAE;QACnD,MAAM,IAAI,GAAG,SAAS,EAAE,CAAC;QACzB,IAAI,CAAC,GAAG,GAAG,EAAE,SAAS,EAAE,IAAI,EAAE,MAAM,EAAE,SAAS,EAAE,UAAU,EAAE,MAAM,EAAE,YAAY,EAAE,aAAa,EAAE,YAAY,EAAE,IAAI,EAAE,SAAS,EAAE,IAAI,EAAE,UAAU,EAAE,IAAI,EAAE,CAAC;QAC1J,IAAI,CAAC,IAAI,GAAG;YACV,SAAS,EAAE,IAAI,EAAE,OAAO,EAAE,EAAE,EAAE,UAAU,EAAE,EAAE,WAAW,EAAE,CAAC,EAAE,aAAa,EAAE,CAAC,EAAE;YAC9E,iBAAiB,EAAE,CAAC,EAAE,iBAAiB,EAAE,EAAE,EAAE,IAAI,EAAE,EAAE;SACtD,CAAC;QACF,MAAM,CAAC,OAAO,CAAC,QAAQ,CAAC,IAAI,EAAE,cAAc,CAAC,CAAC,CAAC,OAAO,CAAC,EAAE,CAAC,CAAC;IAC7D,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,4CAA4C,EAAE,GAAG,EAAE;QACpD,MAAM,IAAI,GAAG,SAAS,EAAE,CAAC;QACzB,IAAI,CAAC,GAAG,GAAG,EAAE,SAAS,EAAE,IAAI,EAAE,MAAM,EAAE,MAAM,EAAE,UAAU,EAAE,WAAW,EAAE,YAAY,EAAE,gBAAgB,EAAE,YAAY,EAAE,IAAI,EAAE,SAAS,EAAE,IAAI,EAAE,UAAU,EAAE,KAAK,EAAE,CAAC;QAChK,IAAI,CAAC,IAAI,GAAG;YACV,SAAS,EAAE,IAAI;YACf,OAAO,EAAE;gBACP,EAAE,IAAI,EAAE,YAAY,EAAE,KAAK,EAAE,MAAM,EAAE,IAAI,EAAE,UAAU,EAAE,MAAM,EAAE,IAAI,EAAE;gBACrE,EAAE,IAAI,EAAE,YAAY,EAAE,KAAK,EAAE,MAAM,EAAE,IAAI,EAAE,UAAU,EAAE,MAAM,EAAE,IAAI,EAAE;aACtE;YACD,UAAU,EAAE,EAAE,WAAW,EAAE,CAAC,EAAE,aAAa,EAAE,CAAC,EAAE;YAChD,oBAAoB,EAAE,iBAAiB;YACvC,iBAAiB,EAAE,CAAC,EAAE,iBAAiB,EAAE,EAAE,EAAE,IAAI,EAAE,EAAE;SACtD,CAAC;QACF,MAAM,CAAC,OAAO,CAAC,QAAQ,CAAC,IAAI,EAAE,cAAc,CAAC,CAAC,CAAC,OAAO,CAAC,EAAE,CAAC,CAAC;IAC7D,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC;AAEH,QAAQ,CAAC,mCAAmC,EAAE,GAAG,EAAE;IACjD,EAAE,CAAC,iDAAiD,EAAE,GAAG,EAAE;QACzD,MAAM,IAAI,GAAG,SAAS,EAAE,CAAC;QACzB,IAAI,CAAC,OAAO,GAAG;YACb,SAAS,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO;YAChC,YAAY,EAAE,CAAC,EAAE,KAAK,EAAE,mBAAmB,EAAE,aAAa,EAAE,EAAE,EAAE,WAAW,EAAE,aAAa,EAAE,MAAM,EAAE,OAAO,EAAE,CAAC;YAC9G,cAAc,EAAE,EAAE,EAAE,eAAe,EAAE,EAAE;SACxC,CAAC;QACF,MAAM,GAAG,GAAG,WAAW,CAAC,QAAQ,CAAC,IAAI,EAAE,cAAc,CAAC,CAAC;QACvD,MAAM,CAAC,GAAG,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;QAC5B,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;QACxC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;QAC7C,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAC,IAAI,CAAC,aAAa,CAAC,CAAC;IACnD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,0DAA0D,EAAE,GAAG,EAAE;QAClE,MAAM,IAAI,GAAG,SAAS,EAAE,CAAC;QACzB,IAAI,CAAC,OAAO,GAAG;YACb,SAAS,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO;YAChC,YAAY,EAAE,CAAC,EAAE,KAAK,EAAE,uBAAuB,EAAE,aAAa,EAAE,EAAE,EAAE,WAAW,EAAE,UAAU,EAAE,MAAM,EAAE,OAAO,EAAE,CAAC;YAC/G,cAAc,EAAE,EAAE,EAAE,eAAe,EAAE,EAAE;SACxC,CAAC;QACF,MAAM,GAAG,GAAG,WAAW,CAAC,QAAQ,CAAC,IAAI,EAAE,cAAc,CAAC,CAAC;QACvD,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,IAAI,CAAC,UAAU,CAAC,CAAC;IAC3C,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,iDAAiD,EAAE,GAAG,EAAE;QACzD,MAAM,IAAI,GAAG,SAAS,EAAE,CAAC;QACzB,IAAI,CAAC,OAAO,GAAG,EAAE,SAAS,EAAE,IAAI,EAAE,MAAM,EAAE,MAAM,EAAE,YAAY,EAAE,EAAE,EAAE,cAAc,EAAE,EAAE,EAAE,eAAe,EAAE,IAAI,EAAE,CAAC;QAChH,MAAM,CAAC,WAAW,CAAC,QAAQ,CAAC,IAAI,EAAE,cAAc,CAAC,CAAC,CAAC,OAAO,CAAC,EAAE,CAAC,CAAC;IACjE,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,+DAA+D,EAAE,GAAG,EAAE;QACvE,MAAM,IAAI,GAAG,SAAS,EAAE,CAAC;QACzB,IAAI,CAAC,IAAI,GAAG;YACV,SAAS,EAAE,IAAI,EAAE,UAAU,EAAE,EAAE,WAAW,EAAE,CAAC,EAAE,aAAa,EAAE,CAAC,EAAE;YACjE,iBAAiB,EAAE,CAAC,EAAE,iBAAiB,EAAE,EAAE,EAAE,IAAI,EAAE,EAAE;YACrD,OAAO,EAAE,CAAC,EAAE,IAAI,EAAE,WAAW,EAAE,KAAK,EAAE,EAAE,EAAE,IAAI,EAAE,WAAW,EAAE,MAAM,EAAE,IAAI,EAAE,CAAC;SAC7E,CAAC;QACF,MAAM,GAAG,GAAG,WAAW,CAAC,QAAQ,CAAC,IAAI,EAAE,cAAc,CAAC,CAAC;QACvD,MAAM,CAAC,GAAG,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;QAC5B,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,MAAM,CAAC,CAAC;IAC9C,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,sDAAsD,EAAE,GAAG,EAAE;QAC9D,MAAM,IAAI,GAAG,SAAS,EAAE,CAAC;QACzB,IAAI,CAAC,OAAO,GAAG;YACb,SAAS,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO;YAChC,YAAY,EAAE,CAAC,EAAE,KAAK,EAAE,uBAAuB,EAAE,aAAa,EAAE,EAAE,EAAE,WAAW,EAAE,UAAU,EAAE,MAAM,EAAE,OAAO,EAAE,CAAC;YAC/G,cAAc,EAAE,EAAE,EAAE,eAAe,EAAE,EAAE;SACxC,CAAC;QACF,IAAI,CAAC,IAAI,GAAG;YACV,SAAS,EAAE,IAAI,EAAE,UAAU,EAAE,EAAE,WAAW,EAAE,CAAC,EAAE,aAAa,EAAE,CAAC,EAAE;YACjE,iBAAiB,EAAE,CAAC,EAAE,iBAAiB,EAAE,EAAE,EAAE,IAAI,EAAE,EAAE;YACrD,OAAO,EAAE,CAAC,EAAE,IAAI,EAAE,WAAW,EAAE,KAAK,EAAE,EAAE,EAAE,IAAI,EAAE,WAAW,EAAE,MAAM,EAAE,IAAI,EAAE,CAAC;SAC7E,CAAC;QACF,MAAM,GAAG,GAAG,WAAW,CAAC,QAAQ,CAAC,IAAI,EAAE,cAAc,CAAC,CAAC;QACvD,MAAM,CAAC,GAAG,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;QAC5B,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,OAAO,CAAC,CAAC;QAC7C,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IACzC,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC;AAEH,QAAQ,CAAC,mBAAmB,EAAE,GAAG,EAAE;IACjC,EAAE,CAAC,0CAA0C,EAAE,GAAG,EAAE;QAClD,MAAM,IAAI,GAAG,SAAS,EAAE,CAAC;QACzB,MAAM,CAAC,eAAe,CAAC,QAAQ,CAAC,IAAI,EAAE,cAAc,CAAC,CAAC,CAAC,OAAO,CAAC,EAAE,CAAC,CAAC;IACrE,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,wCAAwC,EAAE,GAAG,EAAE;QAChD,MAAM,IAAI,GAAG,SAAS,EAAE,CAAC;QACzB,IAAI,CAAC,UAAU,GAAG,EAAE,CAAC;QACrB,MAAM,CAAC,eAAe,CAAC,QAAQ,CAAC,IAAI,EAAE,cAAc,CAAC,CAAC,CAAC,OAAO,CAAC,EAAE,CAAC,CAAC;IACrE,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,qCAAqC,EAAE,GAAG,EAAE;QAC7C,MAAM,IAAI,GAAG,SAAS,EAAE,CAAC;QACzB,IAAI,CAAC,UAAU,GAAG;YAChB,EAAE,MAAM,EAAE,SAAS,EAAE,mBAAmB,EAAE,CAAC,EAAE,oBAAoB,EAAE,GAAG,EAAE,SAAS,EAAE,GAAG,EAAE,UAAU,EAAE,EAAE,EAAE;SACzG,CAAC;QACF,MAAM,CAAC,eAAe,CAAC,QAAQ,CAAC,IAAI,EAAE,cAAc,CAAC,CAAC,CAAC,OAAO,CAAC,EAAE,CAAC,CAAC;IACrE,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,yDAAyD,EAAE,GAAG,EAAE;QACjE,MAAM,IAAI,GAAG,SAAS,EAAE,CAAC;QACzB,IAAI,CAAC,UAAU,GAAG;YAChB,EAAE,MAAM,EAAE,SAAS,EAAE,mBAAmB,EAAE,EAAE,EAAE,oBAAoB,EAAE,CAAC,EAAE,SAAS,EAAE,GAAG,EAAE,UAAU,EAAE,EAAE,EAAE;SACxG,CAAC;QACF,MAAM,GAAG,GAAG,eAAe,CAAC,QAAQ,CAAC,IAAI,EAAE,cAAc,CAAC,CAAC;QAC3D,MAAM,CAAC,GAAG,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;QAC5B,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;QACxC,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,SAAS,CAAC,CAAC;QAC/C,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,YAAY,CAAC,CAAC,IAAI,CAAC,EAAE,CAAC,CAAC;IAChD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,gEAAgE,EAAE,GAAG,EAAE;QACxE,MAAM,IAAI,GAAG,SAAS,EAAE,CAAC;QACzB,IAAI,CAAC,UAAU,GAAG;YAChB,EAAE,MAAM,EAAE,KAAK,EAAE,mBAAmB,EAAE,EAAE,EAAE,oBAAoB,EAAE,GAAG,EAAE,SAAS,EAAE,CAAC,EAAE,UAAU,EAAE,EAAE,EAAE;SACpG,CAAC;QACF,MAAM,GAAG,GAAG,eAAe,CAAC,QAAQ,CAAC,IAAI,EAAE,cAAc,CAAC,CAAC;QAC3D,MAAM,CAAC,GAAG,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;QAC5B,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,CAAC;QAC3C,MAAM,CAAC,GAAG,CAAC,CAAC,CAAC,CAAC,QAAQ,CAAC,YAAY,CAAC,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC;IACjD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,kEAAkE,EAAE,GAAG,EAAE;QAC1E,MAAM,IAAI,GAAG,SAAS,EAAE,CAAC;QACzB,IAAI,CAAC,UAAU,GAAG;YAChB,EAAE,MAAM,EAAE,KAAK,EAAE,mBAAmB,EAAE,GAAG,EAAE,oBAAoB,EAAE,EAAE,EAAE,SAAS,EAAE,CAAC,EAAE,UAAU,EAAE,CAAC,EAAE;SACnG,CAAC;QACF,MAAM,CAAC,eAAe,CAAC,QAAQ,CAAC,IAAI,EAAE,cAAc,CAAC,CAAC,CAAC,OAAO,CAAC,EAAE,CAAC,CAAC;IACrE,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,yDAAyD,EAAE,GAAG,EAAE;QACjE,MAAM,IAAI,GAAG,SAAS,EAAE,CAAC;QACzB,IAAI,CAAC,UAAU,GAAG;YAChB,EAAE,MAAM,EAAE,SAAS,EAAE,mBAAmB,EAAE,CAAC,EAAE,oBAAoB,EAAE,CAAC,EAAE,SAAS,EAAE,GAAG,EAAE,UAAU,EAAE,EAAE,EAAE,EAAE,UAAU;YAClH,EAAE,MAAM,EAAE,KAAK,EAAE,mBAAmB,EAAE,GAAG,EAAE,oBAAoB,EAAE,EAAE,EAAE,SAAS,EAAE,CAAC,EAAE,UAAU,EAAE,CAAC,EAAE,EAAE,UAAU;YAC9G,EAAE,MAAM,EAAE,SAAS,EAAE,mBAAmB,EAAE,EAAE,EAAE,oBAAoB,EAAE,GAAG,EAAE,SAAS,EAAE,GAAG,EAAE,UAAU,EAAE,EAAE,EAAE,EAAE,WAAW;SACvH,CAAC;QACF,MAAM,GAAG,GAAG,eAAe,CAAC,QAAQ,CAAC,IAAI,EAAE,cAAc,CAAC,CAAC;QAC3D,MAAM,CAAC,GAAG,CAAC,CAAC,YAAY,CAAC,CAAC,CAAC,CAAC;QAC5B,MAAM,CAAC,GAAG,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAC,IAAI,EAAE,CAAC,CAAC,OAAO,CAAC,CAAC,SAAS,EAAE,KAAK,CAAC,CAAC,CAAC;IAC7E,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,6DAA6D,EAAE,GAAG,EAAE;QACrE,MAAM,IAAI,GAAG,SAAS,EAAE,CAAC;QACzB,IAAI,CAAC,UAAU,GAAG;YAChB,EAAE,MAAM,EAAE,KAAK,EAAE,mBAAmB,EAAE,IAAI,EAAE,oBAAoB,EAAE,IAAI,EAAE,SAAS,EAAE,CAAC,EAAE,UAAU,EAAE,CAAC,EAAE;SACtG,CAAC;QACF,MAAM,CAAC,eAAe,CAAC,QAAQ,CAAC,IAAI,EAAE,cAAc,CAAC,CAAC,CAAC,OAAO,CAAC,EAAE,CAAC,CAAC;IACrE,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC;AAEH,QAAQ,CAAC,0BAA0B,EAAE,GAAG,EAAE;IACxC,EAAE,CAAC,iDAAiD,EAAE,KAAK,IAAI,EAAE;QAC/D,MAAM,EAAE,YAAY,EAAE,GAAG,MAAM,MAAM,CAAC,aAAa,CAAC,CAAC;QACrD,MAAM,YAAY,GAAG,QAAQ,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,CAAC,CAAC,CAAC,IAAI,CAAC,CAAC;QAC/C,MAAM,CAAC,CAAC,GAAG,YAAY,CAAC,CAAC,CAAC,OAAO,CAAC,YAAY,CAAC,CAAC;IAClD,CAAC,CAAC,CAAC;IAEH,EAAE,CAAC,gEAAgE,EAAE,KAAK,IAAI,EAAE;QAC9E,MAAM,EAAE,YAAY,EAAE,GAAG,MAAM,MAAM,CAAC,aAAa,CAAC,CAAC;QACrD,MAAM,EAAE,GAAG,MAAM,MAAM,CAAC,kBAAkB,CAAC,CAAC;QAC5C,MAAM,GAAG,GAAG,MAAM,MAAM,CAAC,UAAU,CAAC,CAAC;QACrC,MAAM,IAAI,GAAG,MAAM,MAAM,CAAC,WAAW,CAAC,CAAC;QACvC,MAAM,IAAI,GAAG,IAAI,CAAC,OAAO,CAAC,GAAG,CAAC,aAAa,CAAC,MAAM,CAAC,IAAI,CAAC,GAAG,CAAC,CAAC,CAAC;QAC9D,MAAM,IAAI,GAAG,IAAI,CAAC,KAAK,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,IAAI,CAAC,OAAO,CAAC,IAAI,EAAE,wBAAwB,CAAC,EAAE,OAAO,CAAC,CAAC,CAAC;QAClG,MAAM,CAAC,IAAI,CAAC,QAAQ,CAAC,CAAC,OAAO,CAAC,CAAC,GAAG,YAAY,CAAC,CAAC,CAAC;IACnD,CAAC,CAAC,CAAC;AACL,CAAC,CAAC,CAAC"}
|
package/dist/alerts/rules.d.ts
CHANGED
|
@@ -4,4 +4,12 @@ export interface AlertRule {
|
|
|
4
4
|
type: string;
|
|
5
5
|
evaluate(snap: Snapshot, thresholds: Config["thresholds"]): AlertResult[];
|
|
6
6
|
}
|
|
7
|
+
/**
|
|
8
|
+
* Stable, ordered list of every rule ID this collector ships. Exported as
|
|
9
|
+
* a public API so downstream tooling (Glassmkr's drift validator, Forge's
|
|
10
|
+
* UI registry) can verify both sides agree on what exists. When you add
|
|
11
|
+
* or remove a rule, this list updates automatically — but RULES.json in
|
|
12
|
+
* the Glassmkr monorepo is hand-maintained and must be updated separately.
|
|
13
|
+
*/
|
|
14
|
+
export declare const ALL_RULE_IDS: readonly string[];
|
|
7
15
|
export declare const allRules: AlertRule[];
|
package/dist/alerts/rules.js
CHANGED
|
@@ -1,6 +1,29 @@
|
|
|
1
|
-
//
|
|
2
|
-
//
|
|
3
|
-
//
|
|
1
|
+
// Collector-side alert rules. Currently 23 rules covering RAM/swap/disk,
|
|
2
|
+
// CPU, SMART, RAID, network, IPMI thermal/ECC/PSU/SEL/fan, and security.
|
|
3
|
+
// Forge runs an additional set of server-side rules on top of these
|
|
4
|
+
// (predictive, fleet-wide). See RULES_COUNT.md (TBD) for the canonical
|
|
5
|
+
// customer-facing total.
|
|
6
|
+
//
|
|
7
|
+
// When you add or remove a rule, also update:
|
|
8
|
+
// - RULE_AUDIT.md (one section per rule)
|
|
9
|
+
// - the count in this header comment
|
|
10
|
+
import { isPsuSensor } from "../lib/vendor-sensors.js";
|
|
11
|
+
/**
|
|
12
|
+
* Stable, ordered list of every rule ID this collector ships. Exported as
|
|
13
|
+
* a public API so downstream tooling (Glassmkr's drift validator, Forge's
|
|
14
|
+
* UI registry) can verify both sides agree on what exists. When you add
|
|
15
|
+
* or remove a rule, this list updates automatically — but RULES.json in
|
|
16
|
+
* the Glassmkr monorepo is hand-maintained and must be updated separately.
|
|
17
|
+
*/
|
|
18
|
+
export const ALL_RULE_IDS = [
|
|
19
|
+
"ram_high", "swap_high", "disk_space_high", "cpu_iowait_high", "oom_kills",
|
|
20
|
+
"smart_failing", "nvme_wear_high", "raid_degraded", "disk_latency_high",
|
|
21
|
+
"interface_errors", "link_speed_mismatch", "interface_saturation",
|
|
22
|
+
"cpu_temperature_high", "ecc_errors", "psu_redundancy_loss",
|
|
23
|
+
"ipmi_sel_critical", "ipmi_fan_failure",
|
|
24
|
+
"ssh_root_password", "no_firewall", "pending_security_updates",
|
|
25
|
+
"kernel_vulnerabilities", "kernel_needs_reboot", "unattended_upgrades_disabled",
|
|
26
|
+
];
|
|
4
27
|
export const allRules = [
|
|
5
28
|
// 1. RAM high
|
|
6
29
|
{ type: "ram_high", evaluate(snap, t) {
|
|
@@ -16,10 +39,10 @@ export const allRules = [
|
|
|
16
39
|
recommendation: "Check: ps aux --sort=-rss | head -20" }];
|
|
17
40
|
} },
|
|
18
41
|
// 2. Swap active
|
|
19
|
-
{ type: "
|
|
42
|
+
{ type: "swap_high", evaluate(snap, t) {
|
|
20
43
|
if (t.swap_alert === false || !snap.memory || snap.memory.swap_used_mb <= 0)
|
|
21
44
|
return [];
|
|
22
|
-
return [{ type: "
|
|
45
|
+
return [{ type: "swap_high", severity: "warning", title: `Swap in use: ${snap.memory.swap_used_mb}MB`,
|
|
23
46
|
message: "Server is using swap space, indicating memory pressure.",
|
|
24
47
|
evidence: { swap_used_mb: snap.memory.swap_used_mb },
|
|
25
48
|
recommendation: "Check: free -h && ps aux --sort=-rss | head -20" }];
|
|
@@ -87,19 +110,52 @@ export const allRules = [
|
|
|
87
110
|
recommendation: "Replace failed drive immediately." }));
|
|
88
111
|
} },
|
|
89
112
|
// 9. Disk latency
|
|
113
|
+
// Reads `snap.io_latency` (populated by `collectIoLatency` from /proc/diskstats
|
|
114
|
+
// deltas), not `snap.disks` (which never had a latency field populated). The
|
|
115
|
+
// pre-fix version of this rule referenced `d.latency_p99_ms` on `snap.disks`
|
|
116
|
+
// and never fired on any host, ever.
|
|
117
|
+
//
|
|
118
|
+
// io-latency reports avg_read_latency_ms / avg_write_latency_ms over the
|
|
119
|
+
// collection interval (not p99). We take max(read, write) per device and
|
|
120
|
+
// compare against the per-class threshold:
|
|
121
|
+
// nvme*: t.disk_latency_nvme_ms (default 50ms)
|
|
122
|
+
// sd*/vd*/xvd*/md*: t.disk_latency_hdd_ms (default 200ms)
|
|
123
|
+
// 50ms healthy NVMe is generous; SATA SSD and HDD use the 200ms bucket.
|
|
90
124
|
{ type: "disk_latency_high", evaluate(snap, t) {
|
|
91
|
-
if (!snap.
|
|
125
|
+
if (!snap.io_latency || snap.io_latency.length === 0)
|
|
92
126
|
return [];
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
const
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
127
|
+
const findings = [];
|
|
128
|
+
for (const entry of snap.io_latency) {
|
|
129
|
+
const r = entry.avg_read_latency_ms;
|
|
130
|
+
const w = entry.avg_write_latency_ms;
|
|
131
|
+
if (r == null && w == null)
|
|
132
|
+
continue;
|
|
133
|
+
// No samples this interval (read_iops + write_iops both 0): skip silently.
|
|
134
|
+
if ((entry.read_iops ?? 0) === 0 && (entry.write_iops ?? 0) === 0)
|
|
135
|
+
continue;
|
|
136
|
+
const worst = Math.max(r ?? 0, w ?? 0);
|
|
137
|
+
if (worst <= 0)
|
|
138
|
+
continue;
|
|
139
|
+
const isNvme = entry.device.startsWith("nvme");
|
|
140
|
+
const thresh = isNvme ? (t.disk_latency_nvme_ms ?? 50) : (t.disk_latency_hdd_ms ?? 200);
|
|
141
|
+
if (worst < thresh)
|
|
142
|
+
continue;
|
|
143
|
+
findings.push({
|
|
144
|
+
type: "disk_latency_high", severity: "warning",
|
|
145
|
+
title: `Disk ${entry.device} latency ${worst.toFixed(1)}ms`,
|
|
146
|
+
message: `Average I/O latency on ${entry.device} is high (read ${r ?? 0}ms, write ${w ?? 0}ms over interval).`,
|
|
147
|
+
evidence: {
|
|
148
|
+
device: entry.device,
|
|
149
|
+
avg_read_latency_ms: r,
|
|
150
|
+
avg_write_latency_ms: w,
|
|
151
|
+
read_iops: entry.read_iops,
|
|
152
|
+
write_iops: entry.write_iops,
|
|
153
|
+
threshold_ms: thresh,
|
|
154
|
+
},
|
|
155
|
+
recommendation: "Check: iotop -oP",
|
|
156
|
+
});
|
|
157
|
+
}
|
|
158
|
+
return findings;
|
|
103
159
|
} },
|
|
104
160
|
// 10. Interface errors
|
|
105
161
|
{ type: "interface_errors", evaluate(snap) {
|
|
@@ -144,10 +200,28 @@ export const allRules = [
|
|
|
144
200
|
});
|
|
145
201
|
} },
|
|
146
202
|
// 13. CPU temperature
|
|
203
|
+
// Primary: /sys/class/hwmon (vendor-agnostic, works on Pi + Dell + everything).
|
|
204
|
+
// Fallback: IPMI sensors with the historical "cpu" + "temp" substring filter,
|
|
205
|
+
// used only when hwmon produced no usable CPU reading.
|
|
147
206
|
{ type: "cpu_temperature_high", evaluate(snap, t) {
|
|
207
|
+
const warn = t.cpu_temp_warning_c ?? 80;
|
|
208
|
+
const crit = t.cpu_temp_critical_c ?? 90;
|
|
209
|
+
// Primary path: hwmon
|
|
210
|
+
if (snap.thermal?.available && snap.thermal.cpu_readings.length > 0 && snap.thermal.max_cpu_celsius != null) {
|
|
211
|
+
return snap.thermal.cpu_readings
|
|
212
|
+
.filter(r => r.value_celsius >= warn)
|
|
213
|
+
.map(r => ({
|
|
214
|
+
type: "cpu_temperature_high",
|
|
215
|
+
severity: r.value_celsius >= crit ? "critical" : "warning",
|
|
216
|
+
title: `${r.label}: ${r.value_celsius}°C`,
|
|
217
|
+
message: `CPU temperature above warning threshold (${r.source} ${r.source_chip}).`,
|
|
218
|
+
evidence: { sensor: r.label, value: r.value_celsius, source: r.source, chip: r.source_chip },
|
|
219
|
+
recommendation: "Check cooling, fans, airflow.",
|
|
220
|
+
}));
|
|
221
|
+
}
|
|
222
|
+
// Fallback path: IPMI substring filter (Supermicro/ASRock-style names).
|
|
148
223
|
if (!snap.ipmi?.available || !snap.ipmi.sensors)
|
|
149
224
|
return [];
|
|
150
|
-
const warn = t.cpu_temp_warning_c ?? 80;
|
|
151
225
|
return snap.ipmi.sensors.filter(s => {
|
|
152
226
|
const n = s.name.toLowerCase();
|
|
153
227
|
if (!n.includes("cpu") && !n.includes("temp"))
|
|
@@ -156,35 +230,67 @@ export const allRules = [
|
|
|
156
230
|
return !isNaN(v) && v >= warn;
|
|
157
231
|
}).map(s => {
|
|
158
232
|
const v = typeof s.value === "number" ? s.value : parseFloat(String(s.value));
|
|
159
|
-
const
|
|
160
|
-
return { type: "cpu_temperature_high", severity: v >=
|
|
161
|
-
title: `${s.name}: ${v}${s.unit}`, message: `Temperature above warning threshold.`,
|
|
162
|
-
evidence: { sensor: s.name, value: v },
|
|
233
|
+
const sensorCrit = s.upper_critical ?? crit;
|
|
234
|
+
return { type: "cpu_temperature_high", severity: v >= sensorCrit ? "critical" : "warning",
|
|
235
|
+
title: `${s.name}: ${v}${s.unit}`, message: `Temperature above warning threshold (IPMI sensor).`,
|
|
236
|
+
evidence: { sensor: s.name, value: v, source: "ipmi" },
|
|
163
237
|
recommendation: "Check cooling, fans, airflow." };
|
|
164
238
|
});
|
|
165
239
|
} },
|
|
166
240
|
// 14. ECC errors
|
|
241
|
+
// Reads max(named-sensor counts, SEL-derived counts). Dell iDRAC does
|
|
242
|
+
// not expose ECC as named numeric sensors; SEL is the only signal.
|
|
243
|
+
// Supermicro / HPE / ASRockRack expose them as named sensors.
|
|
244
|
+
// Caveat: SEL counts are cumulative since last SEL clear, not rate.
|
|
167
245
|
{ type: "ecc_errors", evaluate(snap) {
|
|
168
246
|
if (!snap.ipmi?.ecc_errors)
|
|
169
247
|
return [];
|
|
170
|
-
const
|
|
248
|
+
const named = snap.ipmi.ecc_errors;
|
|
249
|
+
const sel = snap.ipmi.ecc_errors_from_sel ?? { correctable: 0, uncorrectable: 0, newest_event_timestamp: null };
|
|
250
|
+
const correctable = Math.max(named.correctable, sel.correctable);
|
|
251
|
+
const uncorrectable = Math.max(named.uncorrectable, sel.uncorrectable);
|
|
171
252
|
if (correctable <= 0 && uncorrectable <= 0)
|
|
172
253
|
return [];
|
|
254
|
+
const sourceUsed = (sel.correctable > named.correctable || sel.uncorrectable > named.uncorrectable) ? "ipmi_sel" : "ipmi_sensors";
|
|
255
|
+
const sourceLabel = sourceUsed === "ipmi_sel" ? "IPMI SEL events" : "IPMI named sensors";
|
|
173
256
|
if (uncorrectable > 0)
|
|
174
257
|
return [{ type: "ecc_errors", severity: "critical",
|
|
175
|
-
title: `${uncorrectable} uncorrectable ECC error(s)`,
|
|
176
|
-
|
|
177
|
-
|
|
258
|
+
title: `${uncorrectable} uncorrectable ECC error(s)`,
|
|
259
|
+
message: `${uncorrectable} uncorrectable ECC error(s) from ${sourceLabel}. Data corruption possible. DIMM failing.`,
|
|
260
|
+
evidence: { correctable, uncorrectable, source: sourceUsed, named, sel },
|
|
261
|
+
recommendation: "Replace DIMM immediately. Run: ipmitool sel elist | grep -i memory" }];
|
|
178
262
|
return [{ type: "ecc_errors", severity: "warning",
|
|
179
|
-
title: `${correctable} correctable ECC error(s)`,
|
|
180
|
-
|
|
181
|
-
|
|
263
|
+
title: `${correctable} correctable ECC error(s)`,
|
|
264
|
+
message: `${correctable} correctable ECC error(s) from ${sourceLabel}. Early warning of DIMM failure.`,
|
|
265
|
+
evidence: { correctable, uncorrectable, source: sourceUsed, named, sel },
|
|
266
|
+
recommendation: "Schedule DIMM replacement. Run: ipmitool sel elist | grep -i memory" }];
|
|
182
267
|
} },
|
|
183
268
|
// 15. PSU redundancy
|
|
269
|
+
// Two paths:
|
|
270
|
+
// A. Per-PSU status: filter individual PSU sensors via vendor-aware
|
|
271
|
+
// classifier (covers Supermicro `PSU1 Status`, HPE `Power Supply 1`,
|
|
272
|
+
// Dell `PS1 Status`). If 2+ PSUs and any has failed/absent, fire.
|
|
273
|
+
// B. Aggregate redundancy state (Dell `PS Redundancy` only today): if
|
|
274
|
+
// anything other than fully_redundant or unknown, fire — even when
|
|
275
|
+
// individual PS sensors look OK. This catches "redundancy degraded"
|
|
276
|
+
// cases the per-PSU path would miss.
|
|
184
277
|
{ type: "psu_redundancy_loss", evaluate(snap) {
|
|
185
|
-
if (!snap.ipmi?.available
|
|
278
|
+
if (!snap.ipmi?.available)
|
|
279
|
+
return [];
|
|
280
|
+
const vendor = snap.dmi?.vendor ?? "generic";
|
|
281
|
+
// Path B: aggregate redundancy state
|
|
282
|
+
const redundancyState = snap.ipmi.psu_redundancy_state;
|
|
283
|
+
if (redundancyState && redundancyState !== "fully_redundant" && redundancyState !== "unknown") {
|
|
284
|
+
return [{ type: "psu_redundancy_loss", severity: "critical",
|
|
285
|
+
title: "PSU redundancy lost",
|
|
286
|
+
message: `BMC reports redundancy state: ${redundancyState.replace(/_/g, " ")}.`,
|
|
287
|
+
evidence: { redundancy_state: redundancyState, source: "aggregate_sensor", vendor },
|
|
288
|
+
recommendation: "Replace failed PSU. Check power connections and BMC `ipmitool chassis status`." }];
|
|
289
|
+
}
|
|
290
|
+
// Path A: per-PSU sensor status
|
|
291
|
+
if (!snap.ipmi.sensors)
|
|
186
292
|
return [];
|
|
187
|
-
const psus = snap.ipmi.sensors.filter(s =>
|
|
293
|
+
const psus = snap.ipmi.sensors.filter(s => isPsuSensor(s.name, vendor));
|
|
188
294
|
if (psus.length < 2)
|
|
189
295
|
return [];
|
|
190
296
|
const failed = psus.filter(s => {
|
|
@@ -195,8 +301,9 @@ export const allRules = [
|
|
|
195
301
|
if (failed.length === 0)
|
|
196
302
|
return [];
|
|
197
303
|
return [{ type: "psu_redundancy_loss", severity: "critical",
|
|
198
|
-
title: "PSU redundancy lost",
|
|
199
|
-
|
|
304
|
+
title: "PSU redundancy lost",
|
|
305
|
+
message: `${failed.length} PSU(s) failed/absent: ${failed.map(p => p.name).join(", ")}.`,
|
|
306
|
+
evidence: { failed: failed.map(p => ({ name: p.name, status: p.status })), source: "per_psu_sensors", vendor },
|
|
200
307
|
recommendation: "Replace failed PSU. Check power connections." }];
|
|
201
308
|
} },
|
|
202
309
|
// 19. IPMI SEL critical events
|