@glassmkr/crucible 0.7.1 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/alerts/__tests__/rules.test.d.ts +1 -0
- package/dist/alerts/__tests__/rules.test.js +325 -0
- package/dist/alerts/__tests__/rules.test.js.map +1 -0
- package/dist/alerts/rules.d.ts +8 -0
- package/dist/alerts/rules.js +139 -32
- package/dist/alerts/rules.js.map +1 -1
- package/dist/api.d.ts +2 -0
- package/dist/api.js +7 -0
- package/dist/api.js.map +1 -0
- package/dist/collect/__tests__/dmi.test.d.ts +1 -0
- package/dist/collect/__tests__/dmi.test.js +114 -0
- package/dist/collect/__tests__/dmi.test.js.map +1 -0
- package/dist/collect/__tests__/ipmi.test.js +47 -1
- package/dist/collect/__tests__/ipmi.test.js.map +1 -1
- package/dist/collect/__tests__/thermal.test.d.ts +1 -0
- package/dist/collect/__tests__/thermal.test.js +164 -0
- package/dist/collect/__tests__/thermal.test.js.map +1 -0
- package/dist/collect/dmi.d.ts +19 -0
- package/dist/collect/dmi.js +109 -0
- package/dist/collect/dmi.js.map +1 -0
- package/dist/collect/ipmi.d.ts +27 -2
- package/dist/collect/ipmi.js +90 -2
- package/dist/collect/ipmi.js.map +1 -1
- package/dist/collect/thermal.d.ts +10 -0
- package/dist/collect/thermal.js +187 -0
- package/dist/collect/thermal.js.map +1 -0
- package/dist/config.d.ts +10 -0
- package/dist/config.js +2 -0
- package/dist/config.js.map +1 -1
- package/dist/index.js +51 -1
- package/dist/index.js.map +1 -1
- package/dist/lib/__tests__/capability.test.d.ts +1 -0
- package/dist/lib/__tests__/capability.test.js +87 -0
- package/dist/lib/__tests__/capability.test.js.map +1 -0
- package/dist/lib/__tests__/vendor-sensors.test.d.ts +1 -0
- package/dist/lib/__tests__/vendor-sensors.test.js +49 -0
- package/dist/lib/__tests__/vendor-sensors.test.js.map +1 -0
- package/dist/lib/capability.d.ts +21 -0
- package/dist/lib/capability.js +110 -0
- package/dist/lib/capability.js.map +1 -0
- package/dist/lib/cpu-thermal-chips.d.ts +2 -0
- package/dist/lib/cpu-thermal-chips.js +28 -0
- package/dist/lib/cpu-thermal-chips.js.map +1 -0
- package/dist/lib/types.d.ts +58 -0
- package/dist/lib/vendor-sensors.d.ts +27 -0
- package/dist/lib/vendor-sensors.js +63 -0
- package/dist/lib/vendor-sensors.js.map +1 -0
- package/dist/notify/telegram.js +1 -1
- package/dist/notify/telegram.js.map +1 -1
- package/package.json +16 -1
- package/rule-ids.json +29 -0
- package/.dockerignore +0 -13
- package/.github/ISSUE_TEMPLATE/bug_report.md +0 -24
- package/.github/ISSUE_TEMPLATE/no_data.md +0 -26
- package/.github/workflows/docker.yml +0 -53
- package/.github/workflows/publish.yml +0 -25
- package/Dockerfile +0 -59
- package/config/collector.example.yaml +0 -43
- package/docker-compose.yml +0 -26
- package/scripts/sign-release.sh +0 -29
- package/src/__tests__/cli.test.ts +0 -74
- package/src/__tests__/reboot-marker.test.ts +0 -122
- package/src/alerts/evaluator.ts +0 -15
- package/src/alerts/rules.ts +0 -283
- package/src/alerts/state.ts +0 -92
- package/src/cli.ts +0 -112
- package/src/collect/__tests__/ipmi.test.ts +0 -96
- package/src/collect/__tests__/smart.test.ts +0 -68
- package/src/collect/__tests__/system.test.ts +0 -29
- package/src/collect/__tests__/zfs.test.ts +0 -72
- package/src/collect/conntrack.ts +0 -27
- package/src/collect/cpu.ts +0 -92
- package/src/collect/disks.ts +0 -91
- package/src/collect/fd.ts +0 -31
- package/src/collect/io-errors.ts +0 -23
- package/src/collect/io-latency.ts +0 -103
- package/src/collect/ipmi.ts +0 -207
- package/src/collect/memory.ts +0 -30
- package/src/collect/network.ts +0 -193
- package/src/collect/ntp.ts +0 -114
- package/src/collect/os-alerts.ts +0 -43
- package/src/collect/raid.ts +0 -40
- package/src/collect/security.ts +0 -268
- package/src/collect/smart.ts +0 -72
- package/src/collect/system.ts +0 -32
- package/src/collect/systemd.ts +0 -33
- package/src/collect/zfs.ts +0 -66
- package/src/config.ts +0 -65
- package/src/index.ts +0 -221
- package/src/lib/__tests__/parse.test.ts +0 -28
- package/src/lib/exec.ts +0 -16
- package/src/lib/parse.ts +0 -29
- package/src/lib/reboot-marker.ts +0 -88
- package/src/lib/types.ts +0 -226
- package/src/lib/version-check.ts +0 -39
- package/src/lib/version.ts +0 -33
- package/src/metrics-server.ts +0 -123
- package/src/notify/email.ts +0 -69
- package/src/notify/slack.ts +0 -47
- package/src/notify/telegram.ts +0 -65
- package/src/push/forge.ts +0 -109
- package/tsconfig.json +0 -15
- package/vitest.config.ts +0 -12
package/src/lib/types.ts
DELETED
|
@@ -1,226 +0,0 @@
|
|
|
1
|
-
export interface Snapshot {
|
|
2
|
-
collector_version: string;
|
|
3
|
-
timestamp: string;
|
|
4
|
-
system: SystemInfo;
|
|
5
|
-
cpu: CpuInfo;
|
|
6
|
-
memory: MemoryInfo;
|
|
7
|
-
disks: DiskInfo[];
|
|
8
|
-
smart: SmartInfo[];
|
|
9
|
-
network: NetworkInfo[];
|
|
10
|
-
raid: RaidInfo[];
|
|
11
|
-
ipmi: IpmiInfo;
|
|
12
|
-
os_alerts: OsAlerts;
|
|
13
|
-
security?: SecurityData;
|
|
14
|
-
zfs?: ZfsData;
|
|
15
|
-
io_errors?: { count: number; devices: string[] };
|
|
16
|
-
io_latency?: Array<{ device: string; avg_read_latency_ms: number | null; avg_write_latency_ms: number | null; read_iops: number; write_iops: number }>;
|
|
17
|
-
conntrack?: ConntrackData;
|
|
18
|
-
systemd?: SystemdData;
|
|
19
|
-
ntp?: NtpData;
|
|
20
|
-
file_descriptors?: FileDescriptorData;
|
|
21
|
-
// Planned-reboot flag: set only on the first snapshot after a reboot
|
|
22
|
-
// that was marked with `crucible-agent mark-reboot` / `reboot`. Forge
|
|
23
|
-
// reads this to suppress the `unexpected_reboot` rule. Single-use:
|
|
24
|
-
// subsequent snapshots don't carry it.
|
|
25
|
-
expected_reboot?: boolean;
|
|
26
|
-
expected_reboot_reason?: string;
|
|
27
|
-
}
|
|
28
|
-
|
|
29
|
-
export interface ConntrackData {
|
|
30
|
-
available: boolean;
|
|
31
|
-
count: number;
|
|
32
|
-
max: number;
|
|
33
|
-
percent: number;
|
|
34
|
-
}
|
|
35
|
-
|
|
36
|
-
export interface SystemdData {
|
|
37
|
-
failed_units: string[];
|
|
38
|
-
failed_count: number;
|
|
39
|
-
}
|
|
40
|
-
|
|
41
|
-
export interface NtpData {
|
|
42
|
-
synced: boolean;
|
|
43
|
-
offset_seconds: number;
|
|
44
|
-
source: string;
|
|
45
|
-
daemon_running: boolean;
|
|
46
|
-
}
|
|
47
|
-
|
|
48
|
-
export interface FileDescriptorData {
|
|
49
|
-
allocated: number;
|
|
50
|
-
free: number;
|
|
51
|
-
max: number;
|
|
52
|
-
percent: number;
|
|
53
|
-
}
|
|
54
|
-
|
|
55
|
-
export interface ZfsPool {
|
|
56
|
-
name: string;
|
|
57
|
-
state: string;
|
|
58
|
-
errors_text: string;
|
|
59
|
-
scrub_errors?: number;
|
|
60
|
-
scrub_repaired?: string;
|
|
61
|
-
last_scrub_date?: string;
|
|
62
|
-
scrub_never_run?: boolean;
|
|
63
|
-
}
|
|
64
|
-
|
|
65
|
-
export interface ZfsData {
|
|
66
|
-
pools: ZfsPool[];
|
|
67
|
-
}
|
|
68
|
-
|
|
69
|
-
export interface SecurityData {
|
|
70
|
-
ssh: { permitRootLogin: string; passwordAuthentication: string; rootPasswordExposed: boolean } | null;
|
|
71
|
-
firewall: { active: boolean; source: string; details: string };
|
|
72
|
-
pending_updates: { distro: string; pendingCount: number; available: boolean } | null;
|
|
73
|
-
kernel_vulns: Array<{ name: string; status: string; mitigated: boolean }>;
|
|
74
|
-
kernel_reboot: { running: string; installed: string; needsReboot: boolean } | null;
|
|
75
|
-
auto_updates: { configured: boolean; mechanism: string; details: string };
|
|
76
|
-
}
|
|
77
|
-
|
|
78
|
-
export interface SystemInfo {
|
|
79
|
-
hostname: string;
|
|
80
|
-
ip: string;
|
|
81
|
-
os: string;
|
|
82
|
-
/** `ID=` from /etc/os-release, lowercased. e.g. "ubuntu", "debian", "rocky", "arch", "alpine". */
|
|
83
|
-
os_id?: string;
|
|
84
|
-
/** `ID_LIKE=` from /etc/os-release, lowercased, space-separated. Used by Forge
|
|
85
|
-
* to pick distro-family-specific fix command variants. e.g. on Rocky this
|
|
86
|
-
* is "rhel centos fedora"; on Ubuntu it is "debian". */
|
|
87
|
-
os_id_like?: string;
|
|
88
|
-
kernel: string;
|
|
89
|
-
uptime_seconds: number;
|
|
90
|
-
}
|
|
91
|
-
|
|
92
|
-
export interface CpuCoreInfo {
|
|
93
|
-
core: number;
|
|
94
|
-
user_percent: number;
|
|
95
|
-
system_percent: number;
|
|
96
|
-
iowait_percent: number;
|
|
97
|
-
idle_percent: number;
|
|
98
|
-
irq_percent: number;
|
|
99
|
-
softirq_percent: number;
|
|
100
|
-
}
|
|
101
|
-
|
|
102
|
-
export interface CpuInfo {
|
|
103
|
-
user_percent: number;
|
|
104
|
-
system_percent: number;
|
|
105
|
-
iowait_percent: number;
|
|
106
|
-
idle_percent: number;
|
|
107
|
-
load_1m: number;
|
|
108
|
-
load_5m: number;
|
|
109
|
-
load_15m: number;
|
|
110
|
-
cores?: CpuCoreInfo[];
|
|
111
|
-
}
|
|
112
|
-
|
|
113
|
-
export interface MemoryInfo {
|
|
114
|
-
total_mb: number;
|
|
115
|
-
used_mb: number;
|
|
116
|
-
available_mb: number;
|
|
117
|
-
swap_total_mb: number;
|
|
118
|
-
swap_used_mb: number;
|
|
119
|
-
}
|
|
120
|
-
|
|
121
|
-
export interface DiskInfo {
|
|
122
|
-
device: string;
|
|
123
|
-
mount: string;
|
|
124
|
-
total_gb: number;
|
|
125
|
-
used_gb: number;
|
|
126
|
-
available_gb: number;
|
|
127
|
-
percent_used: number;
|
|
128
|
-
fstype?: string;
|
|
129
|
-
options?: string;
|
|
130
|
-
inodes_total?: number;
|
|
131
|
-
inodes_used?: number;
|
|
132
|
-
inodes_free?: number;
|
|
133
|
-
io_read_mb_s?: number;
|
|
134
|
-
io_write_mb_s?: number;
|
|
135
|
-
latency_p99_ms?: number;
|
|
136
|
-
}
|
|
137
|
-
|
|
138
|
-
export interface SmartInfo {
|
|
139
|
-
device: string;
|
|
140
|
-
model: string;
|
|
141
|
-
health: string;
|
|
142
|
-
temperature_c?: number;
|
|
143
|
-
percentage_used?: number;
|
|
144
|
-
reallocated_sectors?: number;
|
|
145
|
-
pending_sectors?: number;
|
|
146
|
-
power_on_hours?: number;
|
|
147
|
-
}
|
|
148
|
-
|
|
149
|
-
export interface NetworkInfo {
|
|
150
|
-
interface: string;
|
|
151
|
-
speed_mbps: number;
|
|
152
|
-
rx_bytes_sec: number;
|
|
153
|
-
tx_bytes_sec: number;
|
|
154
|
-
/** Delta over the collection interval (rx_errors + any subtype counter). */
|
|
155
|
-
rx_errors: number;
|
|
156
|
-
tx_errors: number;
|
|
157
|
-
rx_drops: number;
|
|
158
|
-
tx_drops: number;
|
|
159
|
-
/** Delta over the collection interval. Null if counter not available on this NIC. */
|
|
160
|
-
rx_packets?: number;
|
|
161
|
-
tx_packets?: number;
|
|
162
|
-
/** Fine-grained RX hardware-error subtypes (deltas). Null if unavailable. */
|
|
163
|
-
rx_crc_errors?: number;
|
|
164
|
-
rx_frame_errors?: number;
|
|
165
|
-
rx_length_errors?: number;
|
|
166
|
-
/** TX physical-layer fault counter (delta). Null if unavailable. */
|
|
167
|
-
tx_carrier_errors?: number;
|
|
168
|
-
operstate?: string; // "up", "down", "unknown", etc. from /sys/class/net/{iface}/operstate
|
|
169
|
-
bond_master?: string; // if this interface is a bond slave, the bond name
|
|
170
|
-
is_bond_master?: boolean; // true when this entry represents the bond aggregate
|
|
171
|
-
}
|
|
172
|
-
|
|
173
|
-
export interface RaidInfo {
|
|
174
|
-
device: string;
|
|
175
|
-
level: string;
|
|
176
|
-
status: string;
|
|
177
|
-
degraded: boolean;
|
|
178
|
-
disks: string[];
|
|
179
|
-
failed_disks: string[];
|
|
180
|
-
}
|
|
181
|
-
|
|
182
|
-
export interface SelEvent {
|
|
183
|
-
id: number;
|
|
184
|
-
timestamp: string;
|
|
185
|
-
sensor: string;
|
|
186
|
-
sensor_type: string;
|
|
187
|
-
event: string;
|
|
188
|
-
direction: string;
|
|
189
|
-
severity: string;
|
|
190
|
-
}
|
|
191
|
-
|
|
192
|
-
export interface FanStatus {
|
|
193
|
-
name: string;
|
|
194
|
-
rpm: number;
|
|
195
|
-
status: string;
|
|
196
|
-
}
|
|
197
|
-
|
|
198
|
-
export interface IpmiInfo {
|
|
199
|
-
available: boolean;
|
|
200
|
-
sensors: Array<{
|
|
201
|
-
name: string;
|
|
202
|
-
value: number | string;
|
|
203
|
-
unit: string;
|
|
204
|
-
status: string;
|
|
205
|
-
upper_critical?: number;
|
|
206
|
-
}>;
|
|
207
|
-
ecc_errors: { correctable: number; uncorrectable: number };
|
|
208
|
-
sel_entries_count: number;
|
|
209
|
-
sel_events_recent: SelEvent[];
|
|
210
|
-
fans: FanStatus[];
|
|
211
|
-
}
|
|
212
|
-
|
|
213
|
-
export interface OsAlerts {
|
|
214
|
-
oom_kills_recent: number;
|
|
215
|
-
zombie_processes: number;
|
|
216
|
-
time_drift_ms: number;
|
|
217
|
-
}
|
|
218
|
-
|
|
219
|
-
export interface AlertResult {
|
|
220
|
-
type: string;
|
|
221
|
-
severity: "critical" | "warning";
|
|
222
|
-
title: string;
|
|
223
|
-
message: string;
|
|
224
|
-
evidence: Record<string, unknown>;
|
|
225
|
-
recommendation: string;
|
|
226
|
-
}
|
package/src/lib/version-check.ts
DELETED
|
@@ -1,39 +0,0 @@
|
|
|
1
|
-
import { CRUCIBLE_VERSION as CURRENT_VERSION } from "./version.js";
|
|
2
|
-
|
|
3
|
-
let lastCheckTime = 0;
|
|
4
|
-
let lastResult: { updateAvailable: boolean; latest: string; changelog: string } | null = null;
|
|
5
|
-
const CHECK_INTERVAL = 6 * 60 * 60 * 1000; // check every 6 hours
|
|
6
|
-
|
|
7
|
-
export function getCurrentVersion(): string {
|
|
8
|
-
return CURRENT_VERSION;
|
|
9
|
-
}
|
|
10
|
-
|
|
11
|
-
export async function checkForUpdates(forgeUrl?: string): Promise<void> {
|
|
12
|
-
const now = Date.now();
|
|
13
|
-
if (now - lastCheckTime < CHECK_INTERVAL) return;
|
|
14
|
-
lastCheckTime = now;
|
|
15
|
-
|
|
16
|
-
const url = forgeUrl || "https://forge.glassmkr.com";
|
|
17
|
-
try {
|
|
18
|
-
const res = await fetch(`${url}/api/v1/version`, { signal: AbortSignal.timeout(5000) });
|
|
19
|
-
if (!res.ok) return;
|
|
20
|
-
const data = await res.json() as { crucible?: { latest?: string; min_supported?: string; changelog_url?: string } };
|
|
21
|
-
const latest = data.crucible?.latest;
|
|
22
|
-
if (!latest) return;
|
|
23
|
-
|
|
24
|
-
if (latest !== CURRENT_VERSION) {
|
|
25
|
-
console.log(`[update] New Crucible version available: ${latest} (current: ${CURRENT_VERSION})`);
|
|
26
|
-
console.log(`[update] Changelog: ${data.crucible?.changelog_url || "https://github.com/glassmkr/crucible/releases"}`);
|
|
27
|
-
console.log(`[update] Run: npm update -g @glassmkr/crucible && sudo systemctl restart glassmkr-crucible`);
|
|
28
|
-
lastResult = { updateAvailable: true, latest, changelog: data.crucible?.changelog_url || "" };
|
|
29
|
-
} else {
|
|
30
|
-
lastResult = { updateAvailable: false, latest, changelog: "" };
|
|
31
|
-
}
|
|
32
|
-
} catch {
|
|
33
|
-
// Version check is non-critical, fail silently
|
|
34
|
-
}
|
|
35
|
-
}
|
|
36
|
-
|
|
37
|
-
export function getUpdateStatus() {
|
|
38
|
-
return lastResult;
|
|
39
|
-
}
|
package/src/lib/version.ts
DELETED
|
@@ -1,33 +0,0 @@
|
|
|
1
|
-
// Single source of truth for the Crucible version string at runtime.
|
|
2
|
-
// Read from package.json so a release bump propagates everywhere
|
|
3
|
-
// (notification footers, version-check log lines, --version flag, the
|
|
4
|
-
// `collector_version` field on every snapshot) without anyone having to
|
|
5
|
-
// remember to update a hardcoded constant.
|
|
6
|
-
//
|
|
7
|
-
// Returns "0.0.0" on read failure rather than throwing; the agent has to
|
|
8
|
-
// keep running even if its package.json is somehow missing.
|
|
9
|
-
|
|
10
|
-
import { readFileSync } from "node:fs";
|
|
11
|
-
import { fileURLToPath } from "node:url";
|
|
12
|
-
import { dirname, join } from "node:path";
|
|
13
|
-
|
|
14
|
-
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
15
|
-
|
|
16
|
-
function readPkgVersion(): string {
|
|
17
|
-
// src/lib/version.ts -> ../../package.json under src layout, but at
|
|
18
|
-
// runtime (compiled to dist/lib/version.js) it's still ../../package.json.
|
|
19
|
-
// Both paths resolve correctly because package.json sits one level above
|
|
20
|
-
// dist/ AND one level above src/.
|
|
21
|
-
for (const candidate of [
|
|
22
|
-
join(__dirname, "..", "..", "package.json"),
|
|
23
|
-
join(__dirname, "..", "package.json"),
|
|
24
|
-
]) {
|
|
25
|
-
try {
|
|
26
|
-
const pkg = JSON.parse(readFileSync(candidate, "utf8"));
|
|
27
|
-
if (pkg && typeof pkg.version === "string") return pkg.version;
|
|
28
|
-
} catch { /* try next */ }
|
|
29
|
-
}
|
|
30
|
-
return "0.0.0";
|
|
31
|
-
}
|
|
32
|
-
|
|
33
|
-
export const CRUCIBLE_VERSION = readPkgVersion();
|
package/src/metrics-server.ts
DELETED
|
@@ -1,123 +0,0 @@
|
|
|
1
|
-
import { createServer } from "http";
|
|
2
|
-
import type { Snapshot } from "./lib/types.js";
|
|
3
|
-
|
|
4
|
-
let latestSnapshot: Snapshot | null = null;
|
|
5
|
-
|
|
6
|
-
export function updateMetrics(snapshot: Snapshot) {
|
|
7
|
-
latestSnapshot = snapshot;
|
|
8
|
-
}
|
|
9
|
-
|
|
10
|
-
export function startMetricsServer(port: number) {
|
|
11
|
-
const server = createServer((req, res) => {
|
|
12
|
-
if (req.url === "/metrics" && req.method === "GET") {
|
|
13
|
-
if (!latestSnapshot) {
|
|
14
|
-
res.writeHead(503);
|
|
15
|
-
res.end("# No data collected yet\n");
|
|
16
|
-
return;
|
|
17
|
-
}
|
|
18
|
-
res.writeHead(200, { "Content-Type": "text/plain; version=0.0.4" });
|
|
19
|
-
res.end(formatPrometheus(latestSnapshot));
|
|
20
|
-
} else if (req.url === "/health") {
|
|
21
|
-
res.writeHead(200);
|
|
22
|
-
res.end("ok\n");
|
|
23
|
-
} else {
|
|
24
|
-
res.writeHead(404);
|
|
25
|
-
res.end("Not found\n");
|
|
26
|
-
}
|
|
27
|
-
});
|
|
28
|
-
|
|
29
|
-
server.listen(port, "0.0.0.0", () => {
|
|
30
|
-
console.log(`[metrics] Prometheus endpoint listening on :${port}/metrics`);
|
|
31
|
-
});
|
|
32
|
-
}
|
|
33
|
-
|
|
34
|
-
function formatPrometheus(snap: Snapshot): string {
|
|
35
|
-
const lines: string[] = [];
|
|
36
|
-
|
|
37
|
-
// CPU
|
|
38
|
-
lines.push("# HELP glassmkr_cpu_user_percent CPU user utilization");
|
|
39
|
-
lines.push("# TYPE glassmkr_cpu_user_percent gauge");
|
|
40
|
-
lines.push(`glassmkr_cpu_user_percent ${snap.cpu.user_percent}`);
|
|
41
|
-
lines.push(`glassmkr_cpu_system_percent ${snap.cpu.system_percent}`);
|
|
42
|
-
lines.push(`glassmkr_cpu_iowait_percent ${snap.cpu.iowait_percent}`);
|
|
43
|
-
lines.push(`glassmkr_cpu_idle_percent ${snap.cpu.idle_percent}`);
|
|
44
|
-
lines.push(`glassmkr_load_1m ${snap.cpu.load_1m}`);
|
|
45
|
-
lines.push(`glassmkr_load_5m ${snap.cpu.load_5m}`);
|
|
46
|
-
lines.push(`glassmkr_load_15m ${snap.cpu.load_15m}`);
|
|
47
|
-
|
|
48
|
-
// Memory
|
|
49
|
-
lines.push("# HELP glassmkr_memory_used_mb Memory used in MB");
|
|
50
|
-
lines.push("# TYPE glassmkr_memory_used_mb gauge");
|
|
51
|
-
lines.push(`glassmkr_memory_used_mb ${snap.memory.used_mb}`);
|
|
52
|
-
lines.push(`glassmkr_memory_total_mb ${snap.memory.total_mb}`);
|
|
53
|
-
lines.push(`glassmkr_memory_available_mb ${snap.memory.available_mb}`);
|
|
54
|
-
lines.push(`glassmkr_swap_used_mb ${snap.memory.swap_used_mb}`);
|
|
55
|
-
|
|
56
|
-
// Disks
|
|
57
|
-
lines.push("# HELP glassmkr_disk_used_percent Disk usage percentage");
|
|
58
|
-
lines.push("# TYPE glassmkr_disk_used_percent gauge");
|
|
59
|
-
for (const disk of snap.disks) {
|
|
60
|
-
const labels = `mount="${disk.mount}",device="${disk.device}"`;
|
|
61
|
-
lines.push(`glassmkr_disk_used_percent{${labels}} ${disk.percent_used}`);
|
|
62
|
-
lines.push(`glassmkr_disk_total_gb{${labels}} ${disk.total_gb}`);
|
|
63
|
-
lines.push(`glassmkr_disk_used_gb{${labels}} ${disk.used_gb}`);
|
|
64
|
-
}
|
|
65
|
-
|
|
66
|
-
// Network
|
|
67
|
-
lines.push("# HELP glassmkr_net_rx_bytes_sec Network receive bytes per second");
|
|
68
|
-
lines.push("# TYPE glassmkr_net_rx_bytes_sec gauge");
|
|
69
|
-
for (const iface of snap.network) {
|
|
70
|
-
const labels = `interface="${iface.interface}"`;
|
|
71
|
-
lines.push(`glassmkr_net_rx_bytes_sec{${labels}} ${iface.rx_bytes_sec}`);
|
|
72
|
-
lines.push(`glassmkr_net_tx_bytes_sec{${labels}} ${iface.tx_bytes_sec}`);
|
|
73
|
-
lines.push(`glassmkr_net_rx_errors{${labels}} ${iface.rx_errors}`);
|
|
74
|
-
lines.push(`glassmkr_net_tx_errors{${labels}} ${iface.tx_errors}`);
|
|
75
|
-
lines.push(`glassmkr_net_speed_mbps{${labels}} ${iface.speed_mbps}`);
|
|
76
|
-
}
|
|
77
|
-
|
|
78
|
-
// SMART
|
|
79
|
-
for (const drive of snap.smart) {
|
|
80
|
-
const labels = `device="${drive.device}",model="${drive.model}"`;
|
|
81
|
-
if (drive.temperature_c != null) lines.push(`glassmkr_smart_temperature_c{${labels}} ${drive.temperature_c}`);
|
|
82
|
-
if (drive.percentage_used != null) lines.push(`glassmkr_smart_percentage_used{${labels}} ${drive.percentage_used}`);
|
|
83
|
-
if (drive.reallocated_sectors != null) lines.push(`glassmkr_smart_reallocated_sectors{${labels}} ${drive.reallocated_sectors}`);
|
|
84
|
-
}
|
|
85
|
-
|
|
86
|
-
// IPMI
|
|
87
|
-
if (snap.ipmi?.available) {
|
|
88
|
-
for (const sensor of snap.ipmi.sensors) {
|
|
89
|
-
if (typeof sensor.value === "number") {
|
|
90
|
-
const sensorName = sensor.name.replace(/[^a-zA-Z0-9_]/g, "_").toLowerCase();
|
|
91
|
-
lines.push(`glassmkr_ipmi_sensor{sensor="${sensor.name}",unit="${sensor.unit}"} ${sensor.value}`);
|
|
92
|
-
}
|
|
93
|
-
}
|
|
94
|
-
lines.push(`glassmkr_ipmi_ecc_correctable ${snap.ipmi.ecc_errors.correctable}`);
|
|
95
|
-
lines.push(`glassmkr_ipmi_ecc_uncorrectable ${snap.ipmi.ecc_errors.uncorrectable}`);
|
|
96
|
-
|
|
97
|
-
// Fans
|
|
98
|
-
if (snap.ipmi.fans) {
|
|
99
|
-
for (const fan of snap.ipmi.fans) {
|
|
100
|
-
lines.push(`glassmkr_ipmi_fan_rpm{fan="${fan.name}",status="${fan.status}"} ${fan.rpm}`);
|
|
101
|
-
}
|
|
102
|
-
}
|
|
103
|
-
}
|
|
104
|
-
|
|
105
|
-
// OS alerts
|
|
106
|
-
lines.push(`glassmkr_oom_kills_recent ${snap.os_alerts.oom_kills_recent}`);
|
|
107
|
-
lines.push(`glassmkr_zombie_processes ${snap.os_alerts.zombie_processes}`);
|
|
108
|
-
|
|
109
|
-
// Security
|
|
110
|
-
if (snap.security) {
|
|
111
|
-
lines.push(`glassmkr_ssh_root_password_exposed ${snap.security.ssh?.rootPasswordExposed ? 1 : 0}`);
|
|
112
|
-
lines.push(`glassmkr_firewall_active ${snap.security.firewall.active ? 1 : 0}`);
|
|
113
|
-
if (snap.security.pending_updates?.available) {
|
|
114
|
-
lines.push(`glassmkr_pending_security_updates ${snap.security.pending_updates.pendingCount}`);
|
|
115
|
-
}
|
|
116
|
-
const unmitigated = snap.security.kernel_vulns.filter(v => !v.mitigated).length;
|
|
117
|
-
lines.push(`glassmkr_kernel_vulns_unmitigated ${unmitigated}`);
|
|
118
|
-
lines.push(`glassmkr_kernel_needs_reboot ${snap.security.kernel_reboot?.needsReboot ? 1 : 0}`);
|
|
119
|
-
lines.push(`glassmkr_auto_updates_configured ${snap.security.auto_updates.configured ? 1 : 0}`);
|
|
120
|
-
}
|
|
121
|
-
|
|
122
|
-
return lines.join("\n") + "\n";
|
|
123
|
-
}
|
package/src/notify/email.ts
DELETED
|
@@ -1,69 +0,0 @@
|
|
|
1
|
-
import { execFile } from "child_process";
|
|
2
|
-
import { promisify } from "util";
|
|
3
|
-
import type { AlertResult } from "../lib/types.js";
|
|
4
|
-
import { CRUCIBLE_VERSION } from "../lib/version.js";
|
|
5
|
-
|
|
6
|
-
const execFileAsync = promisify(execFile);
|
|
7
|
-
|
|
8
|
-
export async function sendEmail(
|
|
9
|
-
config: { to: string },
|
|
10
|
-
newAlerts: AlertResult[],
|
|
11
|
-
resolvedAlerts: AlertResult[],
|
|
12
|
-
serverName: string
|
|
13
|
-
): Promise<boolean> {
|
|
14
|
-
if (!config.to) return false;
|
|
15
|
-
|
|
16
|
-
const subject = buildSubject(newAlerts, resolvedAlerts, serverName);
|
|
17
|
-
const body = buildBody(newAlerts, resolvedAlerts, serverName);
|
|
18
|
-
|
|
19
|
-
const email = [
|
|
20
|
-
`To: ${config.to}`,
|
|
21
|
-
`From: glassmkr-crucible@${serverName}`,
|
|
22
|
-
`Subject: ${subject}`,
|
|
23
|
-
`Content-Type: text/plain; charset=utf-8`,
|
|
24
|
-
"",
|
|
25
|
-
body,
|
|
26
|
-
].join("\n");
|
|
27
|
-
|
|
28
|
-
try {
|
|
29
|
-
const child = execFileAsync("/usr/sbin/sendmail", ["-t"], { timeout: 10000 });
|
|
30
|
-
child.child.stdin?.write(email);
|
|
31
|
-
child.child.stdin?.end();
|
|
32
|
-
await child;
|
|
33
|
-
return true;
|
|
34
|
-
} catch {
|
|
35
|
-
console.error("[email] Failed to send. Is sendmail/postfix/msmtp installed?");
|
|
36
|
-
return false;
|
|
37
|
-
}
|
|
38
|
-
}
|
|
39
|
-
|
|
40
|
-
function buildSubject(newAlerts: AlertResult[], resolvedAlerts: AlertResult[], serverName: string): string {
|
|
41
|
-
if (newAlerts.length > 0) {
|
|
42
|
-
const worst = newAlerts.find((a) => a.severity === "critical") ? "CRITICAL" : "WARNING";
|
|
43
|
-
return `[${worst}] ${serverName}: ${newAlerts.length} alert(s)`;
|
|
44
|
-
}
|
|
45
|
-
return `[RESOLVED] ${serverName}: ${resolvedAlerts.length} alert(s) cleared`;
|
|
46
|
-
}
|
|
47
|
-
|
|
48
|
-
function buildBody(newAlerts: AlertResult[], resolvedAlerts: AlertResult[], serverName: string): string {
|
|
49
|
-
const lines: string[] = [];
|
|
50
|
-
lines.push(`Server: ${serverName}`);
|
|
51
|
-
lines.push(`Time: ${new Date().toISOString()}`);
|
|
52
|
-
lines.push("");
|
|
53
|
-
|
|
54
|
-
for (const a of newAlerts) {
|
|
55
|
-
lines.push(`[${a.severity.toUpperCase()}] ${a.title}`);
|
|
56
|
-
lines.push(a.message);
|
|
57
|
-
lines.push(`Action: ${a.recommendation}`);
|
|
58
|
-
lines.push("");
|
|
59
|
-
}
|
|
60
|
-
|
|
61
|
-
for (const a of resolvedAlerts) {
|
|
62
|
-
lines.push(`[RESOLVED] ${a.title}`);
|
|
63
|
-
lines.push("");
|
|
64
|
-
}
|
|
65
|
-
|
|
66
|
-
lines.push("---");
|
|
67
|
-
lines.push(`Glassmkr Crucible v${CRUCIBLE_VERSION}`);
|
|
68
|
-
return lines.join("\n");
|
|
69
|
-
}
|
package/src/notify/slack.ts
DELETED
|
@@ -1,47 +0,0 @@
|
|
|
1
|
-
import type { AlertResult } from "../lib/types.js";
|
|
2
|
-
import { CRUCIBLE_VERSION } from "../lib/version.js";
|
|
3
|
-
|
|
4
|
-
export async function sendSlack(
|
|
5
|
-
webhookUrl: string,
|
|
6
|
-
newAlerts: AlertResult[],
|
|
7
|
-
resolvedAlerts: AlertResult[],
|
|
8
|
-
serverName: string
|
|
9
|
-
): Promise<boolean> {
|
|
10
|
-
const blocks: any[] = [];
|
|
11
|
-
|
|
12
|
-
if (newAlerts.length > 0) {
|
|
13
|
-
const criticals = newAlerts.filter((a) => a.severity === "critical");
|
|
14
|
-
const warnings = newAlerts.filter((a) => a.severity === "warning");
|
|
15
|
-
|
|
16
|
-
if (criticals.length > 0) {
|
|
17
|
-
blocks.push({ type: "section", text: { type: "mrkdwn", text: `\u{1F534} *${criticals.length} CRITICAL* on *${serverName}*` } });
|
|
18
|
-
for (const a of criticals) blocks.push({ type: "section", text: { type: "mrkdwn", text: `*${a.title}*\n${a.recommendation}` } });
|
|
19
|
-
}
|
|
20
|
-
if (warnings.length > 0) {
|
|
21
|
-
blocks.push({ type: "section", text: { type: "mrkdwn", text: `\u{1F7E1} *${warnings.length} WARNING* on *${serverName}*` } });
|
|
22
|
-
for (const a of warnings) blocks.push({ type: "section", text: { type: "mrkdwn", text: `*${a.title}*\n${a.recommendation}` } });
|
|
23
|
-
}
|
|
24
|
-
}
|
|
25
|
-
|
|
26
|
-
if (resolvedAlerts.length > 0) {
|
|
27
|
-
blocks.push({ type: "section", text: { type: "mrkdwn", text: `\u2705 *${resolvedAlerts.length} resolved* on *${serverName}*` } });
|
|
28
|
-
}
|
|
29
|
-
|
|
30
|
-
if (blocks.length === 0) return true;
|
|
31
|
-
|
|
32
|
-
blocks.push({ type: "divider" });
|
|
33
|
-
blocks.push({ type: "context", elements: [{ type: "mrkdwn", text: `Glassmkr Crucible v${CRUCIBLE_VERSION}` }] });
|
|
34
|
-
|
|
35
|
-
try {
|
|
36
|
-
const res = await fetch(webhookUrl, {
|
|
37
|
-
method: "POST",
|
|
38
|
-
headers: { "Content-Type": "application/json" },
|
|
39
|
-
body: JSON.stringify({ blocks }),
|
|
40
|
-
signal: AbortSignal.timeout(10000),
|
|
41
|
-
});
|
|
42
|
-
return res.ok;
|
|
43
|
-
} catch {
|
|
44
|
-
console.error("[slack] Failed to send notification");
|
|
45
|
-
return false;
|
|
46
|
-
}
|
|
47
|
-
}
|
package/src/notify/telegram.ts
DELETED
|
@@ -1,65 +0,0 @@
|
|
|
1
|
-
import type { AlertResult } from "../lib/types.js";
|
|
2
|
-
|
|
3
|
-
const PRIORITY_MAP: Record<string, string> = {
|
|
4
|
-
raid_degraded: "P1", smart_failing: "P1", ecc_errors: "P1", psu_redundancy_loss: "P1", ipmi_fan_failure: "P1",
|
|
5
|
-
oom_kills: "P2", ram_high: "P2", disk_space_high: "P2", ipmi_sel_critical: "P2", disk_io_errors: "P2", zfs_pool_unhealthy: "P2",
|
|
6
|
-
cpu_iowait_high: "P3", nvme_wear_high: "P3", disk_latency_high: "P3", cpu_temperature_high: "P3",
|
|
7
|
-
ssh_root_password: "P3", pending_security_updates: "P3", kernel_vulnerabilities: "P3", zfs_scrub_errors: "P3",
|
|
8
|
-
swap_active: "P4", no_firewall: "P4", kernel_needs_reboot: "P4", unattended_upgrades_disabled: "P4",
|
|
9
|
-
interface_errors: "P4", link_speed_mismatch: "P4", interface_saturation: "P4",
|
|
10
|
-
};
|
|
11
|
-
|
|
12
|
-
const PRIORITY_LABELS: Record<string, string> = {
|
|
13
|
-
P1: "\u{1F534} P1 Urgent", P2: "\u{1F7E0} P2 High", P3: "\u{1F7E1} P3 Medium", P4: "\u{1F535} P4 Low",
|
|
14
|
-
};
|
|
15
|
-
|
|
16
|
-
function getPriority(alertType: string): string {
|
|
17
|
-
return PRIORITY_MAP[alertType] || "P3";
|
|
18
|
-
}
|
|
19
|
-
|
|
20
|
-
export async function sendTelegram(
|
|
21
|
-
botToken: string,
|
|
22
|
-
chatId: string,
|
|
23
|
-
newAlerts: AlertResult[],
|
|
24
|
-
resolvedAlerts: AlertResult[],
|
|
25
|
-
serverName: string
|
|
26
|
-
): Promise<boolean> {
|
|
27
|
-
const parts: string[] = [];
|
|
28
|
-
|
|
29
|
-
if (newAlerts.length > 0) {
|
|
30
|
-
// Group by priority
|
|
31
|
-
const byPriority: Record<string, AlertResult[]> = {};
|
|
32
|
-
for (const a of newAlerts) {
|
|
33
|
-
const p = getPriority(a.type);
|
|
34
|
-
if (!byPriority[p]) byPriority[p] = [];
|
|
35
|
-
byPriority[p].push(a);
|
|
36
|
-
}
|
|
37
|
-
|
|
38
|
-
for (const p of ["P1", "P2", "P3", "P4"]) {
|
|
39
|
-
const alerts = byPriority[p];
|
|
40
|
-
if (!alerts?.length) continue;
|
|
41
|
-
parts.push(`${PRIORITY_LABELS[p]} on <b>${serverName}</b>:\n`);
|
|
42
|
-
for (const a of alerts) parts.push(` \u2022 <b>${a.title}</b>\n ${a.recommendation}\n`);
|
|
43
|
-
}
|
|
44
|
-
}
|
|
45
|
-
|
|
46
|
-
if (resolvedAlerts.length > 0) {
|
|
47
|
-
parts.push(`\u2705 <b>${resolvedAlerts.length} resolved</b> on <b>${serverName}</b>:\n`);
|
|
48
|
-
for (const a of resolvedAlerts) parts.push(` \u2022 ${a.title}\n`);
|
|
49
|
-
}
|
|
50
|
-
|
|
51
|
-
if (parts.length === 0) return true;
|
|
52
|
-
|
|
53
|
-
try {
|
|
54
|
-
const res = await fetch(`https://api.telegram.org/bot${botToken}/sendMessage`, {
|
|
55
|
-
method: "POST",
|
|
56
|
-
headers: { "Content-Type": "application/json" },
|
|
57
|
-
body: JSON.stringify({ chat_id: chatId, text: parts.join("\n"), parse_mode: "HTML", disable_web_page_preview: true }),
|
|
58
|
-
signal: AbortSignal.timeout(10000),
|
|
59
|
-
});
|
|
60
|
-
return res.ok;
|
|
61
|
-
} catch {
|
|
62
|
-
console.error("[telegram] Failed to send notification");
|
|
63
|
-
return false;
|
|
64
|
-
}
|
|
65
|
-
}
|