@glassmkr/crucible 0.7.1 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (103) hide show
  1. package/dist/alerts/__tests__/rules.test.d.ts +1 -0
  2. package/dist/alerts/__tests__/rules.test.js +325 -0
  3. package/dist/alerts/__tests__/rules.test.js.map +1 -0
  4. package/dist/alerts/rules.d.ts +8 -0
  5. package/dist/alerts/rules.js +139 -32
  6. package/dist/alerts/rules.js.map +1 -1
  7. package/dist/api.d.ts +2 -0
  8. package/dist/api.js +7 -0
  9. package/dist/api.js.map +1 -0
  10. package/dist/collect/__tests__/dmi.test.d.ts +1 -0
  11. package/dist/collect/__tests__/dmi.test.js +114 -0
  12. package/dist/collect/__tests__/dmi.test.js.map +1 -0
  13. package/dist/collect/__tests__/ipmi.test.js +47 -1
  14. package/dist/collect/__tests__/ipmi.test.js.map +1 -1
  15. package/dist/collect/__tests__/thermal.test.d.ts +1 -0
  16. package/dist/collect/__tests__/thermal.test.js +164 -0
  17. package/dist/collect/__tests__/thermal.test.js.map +1 -0
  18. package/dist/collect/dmi.d.ts +19 -0
  19. package/dist/collect/dmi.js +109 -0
  20. package/dist/collect/dmi.js.map +1 -0
  21. package/dist/collect/ipmi.d.ts +27 -2
  22. package/dist/collect/ipmi.js +90 -2
  23. package/dist/collect/ipmi.js.map +1 -1
  24. package/dist/collect/thermal.d.ts +10 -0
  25. package/dist/collect/thermal.js +187 -0
  26. package/dist/collect/thermal.js.map +1 -0
  27. package/dist/config.d.ts +10 -0
  28. package/dist/config.js +2 -0
  29. package/dist/config.js.map +1 -1
  30. package/dist/index.js +51 -1
  31. package/dist/index.js.map +1 -1
  32. package/dist/lib/__tests__/capability.test.d.ts +1 -0
  33. package/dist/lib/__tests__/capability.test.js +87 -0
  34. package/dist/lib/__tests__/capability.test.js.map +1 -0
  35. package/dist/lib/__tests__/vendor-sensors.test.d.ts +1 -0
  36. package/dist/lib/__tests__/vendor-sensors.test.js +49 -0
  37. package/dist/lib/__tests__/vendor-sensors.test.js.map +1 -0
  38. package/dist/lib/capability.d.ts +21 -0
  39. package/dist/lib/capability.js +110 -0
  40. package/dist/lib/capability.js.map +1 -0
  41. package/dist/lib/cpu-thermal-chips.d.ts +2 -0
  42. package/dist/lib/cpu-thermal-chips.js +28 -0
  43. package/dist/lib/cpu-thermal-chips.js.map +1 -0
  44. package/dist/lib/types.d.ts +58 -0
  45. package/dist/lib/vendor-sensors.d.ts +27 -0
  46. package/dist/lib/vendor-sensors.js +63 -0
  47. package/dist/lib/vendor-sensors.js.map +1 -0
  48. package/dist/notify/telegram.js +1 -1
  49. package/dist/notify/telegram.js.map +1 -1
  50. package/package.json +16 -1
  51. package/rule-ids.json +29 -0
  52. package/.dockerignore +0 -13
  53. package/.github/ISSUE_TEMPLATE/bug_report.md +0 -24
  54. package/.github/ISSUE_TEMPLATE/no_data.md +0 -26
  55. package/.github/workflows/docker.yml +0 -53
  56. package/.github/workflows/publish.yml +0 -25
  57. package/Dockerfile +0 -59
  58. package/config/collector.example.yaml +0 -43
  59. package/docker-compose.yml +0 -26
  60. package/scripts/sign-release.sh +0 -29
  61. package/src/__tests__/cli.test.ts +0 -74
  62. package/src/__tests__/reboot-marker.test.ts +0 -122
  63. package/src/alerts/evaluator.ts +0 -15
  64. package/src/alerts/rules.ts +0 -283
  65. package/src/alerts/state.ts +0 -92
  66. package/src/cli.ts +0 -112
  67. package/src/collect/__tests__/ipmi.test.ts +0 -96
  68. package/src/collect/__tests__/smart.test.ts +0 -68
  69. package/src/collect/__tests__/system.test.ts +0 -29
  70. package/src/collect/__tests__/zfs.test.ts +0 -72
  71. package/src/collect/conntrack.ts +0 -27
  72. package/src/collect/cpu.ts +0 -92
  73. package/src/collect/disks.ts +0 -91
  74. package/src/collect/fd.ts +0 -31
  75. package/src/collect/io-errors.ts +0 -23
  76. package/src/collect/io-latency.ts +0 -103
  77. package/src/collect/ipmi.ts +0 -207
  78. package/src/collect/memory.ts +0 -30
  79. package/src/collect/network.ts +0 -193
  80. package/src/collect/ntp.ts +0 -114
  81. package/src/collect/os-alerts.ts +0 -43
  82. package/src/collect/raid.ts +0 -40
  83. package/src/collect/security.ts +0 -268
  84. package/src/collect/smart.ts +0 -72
  85. package/src/collect/system.ts +0 -32
  86. package/src/collect/systemd.ts +0 -33
  87. package/src/collect/zfs.ts +0 -66
  88. package/src/config.ts +0 -65
  89. package/src/index.ts +0 -221
  90. package/src/lib/__tests__/parse.test.ts +0 -28
  91. package/src/lib/exec.ts +0 -16
  92. package/src/lib/parse.ts +0 -29
  93. package/src/lib/reboot-marker.ts +0 -88
  94. package/src/lib/types.ts +0 -226
  95. package/src/lib/version-check.ts +0 -39
  96. package/src/lib/version.ts +0 -33
  97. package/src/metrics-server.ts +0 -123
  98. package/src/notify/email.ts +0 -69
  99. package/src/notify/slack.ts +0 -47
  100. package/src/notify/telegram.ts +0 -65
  101. package/src/push/forge.ts +0 -109
  102. package/tsconfig.json +0 -15
  103. package/vitest.config.ts +0 -12
package/src/lib/types.ts DELETED
@@ -1,226 +0,0 @@
1
- export interface Snapshot {
2
- collector_version: string;
3
- timestamp: string;
4
- system: SystemInfo;
5
- cpu: CpuInfo;
6
- memory: MemoryInfo;
7
- disks: DiskInfo[];
8
- smart: SmartInfo[];
9
- network: NetworkInfo[];
10
- raid: RaidInfo[];
11
- ipmi: IpmiInfo;
12
- os_alerts: OsAlerts;
13
- security?: SecurityData;
14
- zfs?: ZfsData;
15
- io_errors?: { count: number; devices: string[] };
16
- io_latency?: Array<{ device: string; avg_read_latency_ms: number | null; avg_write_latency_ms: number | null; read_iops: number; write_iops: number }>;
17
- conntrack?: ConntrackData;
18
- systemd?: SystemdData;
19
- ntp?: NtpData;
20
- file_descriptors?: FileDescriptorData;
21
- // Planned-reboot flag: set only on the first snapshot after a reboot
22
- // that was marked with `crucible-agent mark-reboot` / `reboot`. Forge
23
- // reads this to suppress the `unexpected_reboot` rule. Single-use:
24
- // subsequent snapshots don't carry it.
25
- expected_reboot?: boolean;
26
- expected_reboot_reason?: string;
27
- }
28
-
29
- export interface ConntrackData {
30
- available: boolean;
31
- count: number;
32
- max: number;
33
- percent: number;
34
- }
35
-
36
- export interface SystemdData {
37
- failed_units: string[];
38
- failed_count: number;
39
- }
40
-
41
- export interface NtpData {
42
- synced: boolean;
43
- offset_seconds: number;
44
- source: string;
45
- daemon_running: boolean;
46
- }
47
-
48
- export interface FileDescriptorData {
49
- allocated: number;
50
- free: number;
51
- max: number;
52
- percent: number;
53
- }
54
-
55
- export interface ZfsPool {
56
- name: string;
57
- state: string;
58
- errors_text: string;
59
- scrub_errors?: number;
60
- scrub_repaired?: string;
61
- last_scrub_date?: string;
62
- scrub_never_run?: boolean;
63
- }
64
-
65
- export interface ZfsData {
66
- pools: ZfsPool[];
67
- }
68
-
69
- export interface SecurityData {
70
- ssh: { permitRootLogin: string; passwordAuthentication: string; rootPasswordExposed: boolean } | null;
71
- firewall: { active: boolean; source: string; details: string };
72
- pending_updates: { distro: string; pendingCount: number; available: boolean } | null;
73
- kernel_vulns: Array<{ name: string; status: string; mitigated: boolean }>;
74
- kernel_reboot: { running: string; installed: string; needsReboot: boolean } | null;
75
- auto_updates: { configured: boolean; mechanism: string; details: string };
76
- }
77
-
78
- export interface SystemInfo {
79
- hostname: string;
80
- ip: string;
81
- os: string;
82
- /** `ID=` from /etc/os-release, lowercased. e.g. "ubuntu", "debian", "rocky", "arch", "alpine". */
83
- os_id?: string;
84
- /** `ID_LIKE=` from /etc/os-release, lowercased, space-separated. Used by Forge
85
- * to pick distro-family-specific fix command variants. e.g. on Rocky this
86
- * is "rhel centos fedora"; on Ubuntu it is "debian". */
87
- os_id_like?: string;
88
- kernel: string;
89
- uptime_seconds: number;
90
- }
91
-
92
- export interface CpuCoreInfo {
93
- core: number;
94
- user_percent: number;
95
- system_percent: number;
96
- iowait_percent: number;
97
- idle_percent: number;
98
- irq_percent: number;
99
- softirq_percent: number;
100
- }
101
-
102
- export interface CpuInfo {
103
- user_percent: number;
104
- system_percent: number;
105
- iowait_percent: number;
106
- idle_percent: number;
107
- load_1m: number;
108
- load_5m: number;
109
- load_15m: number;
110
- cores?: CpuCoreInfo[];
111
- }
112
-
113
- export interface MemoryInfo {
114
- total_mb: number;
115
- used_mb: number;
116
- available_mb: number;
117
- swap_total_mb: number;
118
- swap_used_mb: number;
119
- }
120
-
121
- export interface DiskInfo {
122
- device: string;
123
- mount: string;
124
- total_gb: number;
125
- used_gb: number;
126
- available_gb: number;
127
- percent_used: number;
128
- fstype?: string;
129
- options?: string;
130
- inodes_total?: number;
131
- inodes_used?: number;
132
- inodes_free?: number;
133
- io_read_mb_s?: number;
134
- io_write_mb_s?: number;
135
- latency_p99_ms?: number;
136
- }
137
-
138
- export interface SmartInfo {
139
- device: string;
140
- model: string;
141
- health: string;
142
- temperature_c?: number;
143
- percentage_used?: number;
144
- reallocated_sectors?: number;
145
- pending_sectors?: number;
146
- power_on_hours?: number;
147
- }
148
-
149
- export interface NetworkInfo {
150
- interface: string;
151
- speed_mbps: number;
152
- rx_bytes_sec: number;
153
- tx_bytes_sec: number;
154
- /** Delta over the collection interval (rx_errors + any subtype counter). */
155
- rx_errors: number;
156
- tx_errors: number;
157
- rx_drops: number;
158
- tx_drops: number;
159
- /** Delta over the collection interval. Null if counter not available on this NIC. */
160
- rx_packets?: number;
161
- tx_packets?: number;
162
- /** Fine-grained RX hardware-error subtypes (deltas). Null if unavailable. */
163
- rx_crc_errors?: number;
164
- rx_frame_errors?: number;
165
- rx_length_errors?: number;
166
- /** TX physical-layer fault counter (delta). Null if unavailable. */
167
- tx_carrier_errors?: number;
168
- operstate?: string; // "up", "down", "unknown", etc. from /sys/class/net/{iface}/operstate
169
- bond_master?: string; // if this interface is a bond slave, the bond name
170
- is_bond_master?: boolean; // true when this entry represents the bond aggregate
171
- }
172
-
173
- export interface RaidInfo {
174
- device: string;
175
- level: string;
176
- status: string;
177
- degraded: boolean;
178
- disks: string[];
179
- failed_disks: string[];
180
- }
181
-
182
- export interface SelEvent {
183
- id: number;
184
- timestamp: string;
185
- sensor: string;
186
- sensor_type: string;
187
- event: string;
188
- direction: string;
189
- severity: string;
190
- }
191
-
192
- export interface FanStatus {
193
- name: string;
194
- rpm: number;
195
- status: string;
196
- }
197
-
198
- export interface IpmiInfo {
199
- available: boolean;
200
- sensors: Array<{
201
- name: string;
202
- value: number | string;
203
- unit: string;
204
- status: string;
205
- upper_critical?: number;
206
- }>;
207
- ecc_errors: { correctable: number; uncorrectable: number };
208
- sel_entries_count: number;
209
- sel_events_recent: SelEvent[];
210
- fans: FanStatus[];
211
- }
212
-
213
- export interface OsAlerts {
214
- oom_kills_recent: number;
215
- zombie_processes: number;
216
- time_drift_ms: number;
217
- }
218
-
219
- export interface AlertResult {
220
- type: string;
221
- severity: "critical" | "warning";
222
- title: string;
223
- message: string;
224
- evidence: Record<string, unknown>;
225
- recommendation: string;
226
- }
@@ -1,39 +0,0 @@
1
- import { CRUCIBLE_VERSION as CURRENT_VERSION } from "./version.js";
2
-
3
- let lastCheckTime = 0;
4
- let lastResult: { updateAvailable: boolean; latest: string; changelog: string } | null = null;
5
- const CHECK_INTERVAL = 6 * 60 * 60 * 1000; // check every 6 hours
6
-
7
- export function getCurrentVersion(): string {
8
- return CURRENT_VERSION;
9
- }
10
-
11
- export async function checkForUpdates(forgeUrl?: string): Promise<void> {
12
- const now = Date.now();
13
- if (now - lastCheckTime < CHECK_INTERVAL) return;
14
- lastCheckTime = now;
15
-
16
- const url = forgeUrl || "https://forge.glassmkr.com";
17
- try {
18
- const res = await fetch(`${url}/api/v1/version`, { signal: AbortSignal.timeout(5000) });
19
- if (!res.ok) return;
20
- const data = await res.json() as { crucible?: { latest?: string; min_supported?: string; changelog_url?: string } };
21
- const latest = data.crucible?.latest;
22
- if (!latest) return;
23
-
24
- if (latest !== CURRENT_VERSION) {
25
- console.log(`[update] New Crucible version available: ${latest} (current: ${CURRENT_VERSION})`);
26
- console.log(`[update] Changelog: ${data.crucible?.changelog_url || "https://github.com/glassmkr/crucible/releases"}`);
27
- console.log(`[update] Run: npm update -g @glassmkr/crucible && sudo systemctl restart glassmkr-crucible`);
28
- lastResult = { updateAvailable: true, latest, changelog: data.crucible?.changelog_url || "" };
29
- } else {
30
- lastResult = { updateAvailable: false, latest, changelog: "" };
31
- }
32
- } catch {
33
- // Version check is non-critical, fail silently
34
- }
35
- }
36
-
37
- export function getUpdateStatus() {
38
- return lastResult;
39
- }
@@ -1,33 +0,0 @@
1
- // Single source of truth for the Crucible version string at runtime.
2
- // Read from package.json so a release bump propagates everywhere
3
- // (notification footers, version-check log lines, --version flag, the
4
- // `collector_version` field on every snapshot) without anyone having to
5
- // remember to update a hardcoded constant.
6
- //
7
- // Returns "0.0.0" on read failure rather than throwing; the agent has to
8
- // keep running even if its package.json is somehow missing.
9
-
10
- import { readFileSync } from "node:fs";
11
- import { fileURLToPath } from "node:url";
12
- import { dirname, join } from "node:path";
13
-
14
- const __dirname = dirname(fileURLToPath(import.meta.url));
15
-
16
- function readPkgVersion(): string {
17
- // src/lib/version.ts -> ../../package.json under src layout, but at
18
- // runtime (compiled to dist/lib/version.js) it's still ../../package.json.
19
- // Both paths resolve correctly because package.json sits one level above
20
- // dist/ AND one level above src/.
21
- for (const candidate of [
22
- join(__dirname, "..", "..", "package.json"),
23
- join(__dirname, "..", "package.json"),
24
- ]) {
25
- try {
26
- const pkg = JSON.parse(readFileSync(candidate, "utf8"));
27
- if (pkg && typeof pkg.version === "string") return pkg.version;
28
- } catch { /* try next */ }
29
- }
30
- return "0.0.0";
31
- }
32
-
33
- export const CRUCIBLE_VERSION = readPkgVersion();
@@ -1,123 +0,0 @@
1
- import { createServer } from "http";
2
- import type { Snapshot } from "./lib/types.js";
3
-
4
- let latestSnapshot: Snapshot | null = null;
5
-
6
- export function updateMetrics(snapshot: Snapshot) {
7
- latestSnapshot = snapshot;
8
- }
9
-
10
- export function startMetricsServer(port: number) {
11
- const server = createServer((req, res) => {
12
- if (req.url === "/metrics" && req.method === "GET") {
13
- if (!latestSnapshot) {
14
- res.writeHead(503);
15
- res.end("# No data collected yet\n");
16
- return;
17
- }
18
- res.writeHead(200, { "Content-Type": "text/plain; version=0.0.4" });
19
- res.end(formatPrometheus(latestSnapshot));
20
- } else if (req.url === "/health") {
21
- res.writeHead(200);
22
- res.end("ok\n");
23
- } else {
24
- res.writeHead(404);
25
- res.end("Not found\n");
26
- }
27
- });
28
-
29
- server.listen(port, "0.0.0.0", () => {
30
- console.log(`[metrics] Prometheus endpoint listening on :${port}/metrics`);
31
- });
32
- }
33
-
34
- function formatPrometheus(snap: Snapshot): string {
35
- const lines: string[] = [];
36
-
37
- // CPU
38
- lines.push("# HELP glassmkr_cpu_user_percent CPU user utilization");
39
- lines.push("# TYPE glassmkr_cpu_user_percent gauge");
40
- lines.push(`glassmkr_cpu_user_percent ${snap.cpu.user_percent}`);
41
- lines.push(`glassmkr_cpu_system_percent ${snap.cpu.system_percent}`);
42
- lines.push(`glassmkr_cpu_iowait_percent ${snap.cpu.iowait_percent}`);
43
- lines.push(`glassmkr_cpu_idle_percent ${snap.cpu.idle_percent}`);
44
- lines.push(`glassmkr_load_1m ${snap.cpu.load_1m}`);
45
- lines.push(`glassmkr_load_5m ${snap.cpu.load_5m}`);
46
- lines.push(`glassmkr_load_15m ${snap.cpu.load_15m}`);
47
-
48
- // Memory
49
- lines.push("# HELP glassmkr_memory_used_mb Memory used in MB");
50
- lines.push("# TYPE glassmkr_memory_used_mb gauge");
51
- lines.push(`glassmkr_memory_used_mb ${snap.memory.used_mb}`);
52
- lines.push(`glassmkr_memory_total_mb ${snap.memory.total_mb}`);
53
- lines.push(`glassmkr_memory_available_mb ${snap.memory.available_mb}`);
54
- lines.push(`glassmkr_swap_used_mb ${snap.memory.swap_used_mb}`);
55
-
56
- // Disks
57
- lines.push("# HELP glassmkr_disk_used_percent Disk usage percentage");
58
- lines.push("# TYPE glassmkr_disk_used_percent gauge");
59
- for (const disk of snap.disks) {
60
- const labels = `mount="${disk.mount}",device="${disk.device}"`;
61
- lines.push(`glassmkr_disk_used_percent{${labels}} ${disk.percent_used}`);
62
- lines.push(`glassmkr_disk_total_gb{${labels}} ${disk.total_gb}`);
63
- lines.push(`glassmkr_disk_used_gb{${labels}} ${disk.used_gb}`);
64
- }
65
-
66
- // Network
67
- lines.push("# HELP glassmkr_net_rx_bytes_sec Network receive bytes per second");
68
- lines.push("# TYPE glassmkr_net_rx_bytes_sec gauge");
69
- for (const iface of snap.network) {
70
- const labels = `interface="${iface.interface}"`;
71
- lines.push(`glassmkr_net_rx_bytes_sec{${labels}} ${iface.rx_bytes_sec}`);
72
- lines.push(`glassmkr_net_tx_bytes_sec{${labels}} ${iface.tx_bytes_sec}`);
73
- lines.push(`glassmkr_net_rx_errors{${labels}} ${iface.rx_errors}`);
74
- lines.push(`glassmkr_net_tx_errors{${labels}} ${iface.tx_errors}`);
75
- lines.push(`glassmkr_net_speed_mbps{${labels}} ${iface.speed_mbps}`);
76
- }
77
-
78
- // SMART
79
- for (const drive of snap.smart) {
80
- const labels = `device="${drive.device}",model="${drive.model}"`;
81
- if (drive.temperature_c != null) lines.push(`glassmkr_smart_temperature_c{${labels}} ${drive.temperature_c}`);
82
- if (drive.percentage_used != null) lines.push(`glassmkr_smart_percentage_used{${labels}} ${drive.percentage_used}`);
83
- if (drive.reallocated_sectors != null) lines.push(`glassmkr_smart_reallocated_sectors{${labels}} ${drive.reallocated_sectors}`);
84
- }
85
-
86
- // IPMI
87
- if (snap.ipmi?.available) {
88
- for (const sensor of snap.ipmi.sensors) {
89
- if (typeof sensor.value === "number") {
90
- const sensorName = sensor.name.replace(/[^a-zA-Z0-9_]/g, "_").toLowerCase();
91
- lines.push(`glassmkr_ipmi_sensor{sensor="${sensor.name}",unit="${sensor.unit}"} ${sensor.value}`);
92
- }
93
- }
94
- lines.push(`glassmkr_ipmi_ecc_correctable ${snap.ipmi.ecc_errors.correctable}`);
95
- lines.push(`glassmkr_ipmi_ecc_uncorrectable ${snap.ipmi.ecc_errors.uncorrectable}`);
96
-
97
- // Fans
98
- if (snap.ipmi.fans) {
99
- for (const fan of snap.ipmi.fans) {
100
- lines.push(`glassmkr_ipmi_fan_rpm{fan="${fan.name}",status="${fan.status}"} ${fan.rpm}`);
101
- }
102
- }
103
- }
104
-
105
- // OS alerts
106
- lines.push(`glassmkr_oom_kills_recent ${snap.os_alerts.oom_kills_recent}`);
107
- lines.push(`glassmkr_zombie_processes ${snap.os_alerts.zombie_processes}`);
108
-
109
- // Security
110
- if (snap.security) {
111
- lines.push(`glassmkr_ssh_root_password_exposed ${snap.security.ssh?.rootPasswordExposed ? 1 : 0}`);
112
- lines.push(`glassmkr_firewall_active ${snap.security.firewall.active ? 1 : 0}`);
113
- if (snap.security.pending_updates?.available) {
114
- lines.push(`glassmkr_pending_security_updates ${snap.security.pending_updates.pendingCount}`);
115
- }
116
- const unmitigated = snap.security.kernel_vulns.filter(v => !v.mitigated).length;
117
- lines.push(`glassmkr_kernel_vulns_unmitigated ${unmitigated}`);
118
- lines.push(`glassmkr_kernel_needs_reboot ${snap.security.kernel_reboot?.needsReboot ? 1 : 0}`);
119
- lines.push(`glassmkr_auto_updates_configured ${snap.security.auto_updates.configured ? 1 : 0}`);
120
- }
121
-
122
- return lines.join("\n") + "\n";
123
- }
@@ -1,69 +0,0 @@
1
- import { execFile } from "child_process";
2
- import { promisify } from "util";
3
- import type { AlertResult } from "../lib/types.js";
4
- import { CRUCIBLE_VERSION } from "../lib/version.js";
5
-
6
- const execFileAsync = promisify(execFile);
7
-
8
- export async function sendEmail(
9
- config: { to: string },
10
- newAlerts: AlertResult[],
11
- resolvedAlerts: AlertResult[],
12
- serverName: string
13
- ): Promise<boolean> {
14
- if (!config.to) return false;
15
-
16
- const subject = buildSubject(newAlerts, resolvedAlerts, serverName);
17
- const body = buildBody(newAlerts, resolvedAlerts, serverName);
18
-
19
- const email = [
20
- `To: ${config.to}`,
21
- `From: glassmkr-crucible@${serverName}`,
22
- `Subject: ${subject}`,
23
- `Content-Type: text/plain; charset=utf-8`,
24
- "",
25
- body,
26
- ].join("\n");
27
-
28
- try {
29
- const child = execFileAsync("/usr/sbin/sendmail", ["-t"], { timeout: 10000 });
30
- child.child.stdin?.write(email);
31
- child.child.stdin?.end();
32
- await child;
33
- return true;
34
- } catch {
35
- console.error("[email] Failed to send. Is sendmail/postfix/msmtp installed?");
36
- return false;
37
- }
38
- }
39
-
40
- function buildSubject(newAlerts: AlertResult[], resolvedAlerts: AlertResult[], serverName: string): string {
41
- if (newAlerts.length > 0) {
42
- const worst = newAlerts.find((a) => a.severity === "critical") ? "CRITICAL" : "WARNING";
43
- return `[${worst}] ${serverName}: ${newAlerts.length} alert(s)`;
44
- }
45
- return `[RESOLVED] ${serverName}: ${resolvedAlerts.length} alert(s) cleared`;
46
- }
47
-
48
- function buildBody(newAlerts: AlertResult[], resolvedAlerts: AlertResult[], serverName: string): string {
49
- const lines: string[] = [];
50
- lines.push(`Server: ${serverName}`);
51
- lines.push(`Time: ${new Date().toISOString()}`);
52
- lines.push("");
53
-
54
- for (const a of newAlerts) {
55
- lines.push(`[${a.severity.toUpperCase()}] ${a.title}`);
56
- lines.push(a.message);
57
- lines.push(`Action: ${a.recommendation}`);
58
- lines.push("");
59
- }
60
-
61
- for (const a of resolvedAlerts) {
62
- lines.push(`[RESOLVED] ${a.title}`);
63
- lines.push("");
64
- }
65
-
66
- lines.push("---");
67
- lines.push(`Glassmkr Crucible v${CRUCIBLE_VERSION}`);
68
- return lines.join("\n");
69
- }
@@ -1,47 +0,0 @@
1
- import type { AlertResult } from "../lib/types.js";
2
- import { CRUCIBLE_VERSION } from "../lib/version.js";
3
-
4
- export async function sendSlack(
5
- webhookUrl: string,
6
- newAlerts: AlertResult[],
7
- resolvedAlerts: AlertResult[],
8
- serverName: string
9
- ): Promise<boolean> {
10
- const blocks: any[] = [];
11
-
12
- if (newAlerts.length > 0) {
13
- const criticals = newAlerts.filter((a) => a.severity === "critical");
14
- const warnings = newAlerts.filter((a) => a.severity === "warning");
15
-
16
- if (criticals.length > 0) {
17
- blocks.push({ type: "section", text: { type: "mrkdwn", text: `\u{1F534} *${criticals.length} CRITICAL* on *${serverName}*` } });
18
- for (const a of criticals) blocks.push({ type: "section", text: { type: "mrkdwn", text: `*${a.title}*\n${a.recommendation}` } });
19
- }
20
- if (warnings.length > 0) {
21
- blocks.push({ type: "section", text: { type: "mrkdwn", text: `\u{1F7E1} *${warnings.length} WARNING* on *${serverName}*` } });
22
- for (const a of warnings) blocks.push({ type: "section", text: { type: "mrkdwn", text: `*${a.title}*\n${a.recommendation}` } });
23
- }
24
- }
25
-
26
- if (resolvedAlerts.length > 0) {
27
- blocks.push({ type: "section", text: { type: "mrkdwn", text: `\u2705 *${resolvedAlerts.length} resolved* on *${serverName}*` } });
28
- }
29
-
30
- if (blocks.length === 0) return true;
31
-
32
- blocks.push({ type: "divider" });
33
- blocks.push({ type: "context", elements: [{ type: "mrkdwn", text: `Glassmkr Crucible v${CRUCIBLE_VERSION}` }] });
34
-
35
- try {
36
- const res = await fetch(webhookUrl, {
37
- method: "POST",
38
- headers: { "Content-Type": "application/json" },
39
- body: JSON.stringify({ blocks }),
40
- signal: AbortSignal.timeout(10000),
41
- });
42
- return res.ok;
43
- } catch {
44
- console.error("[slack] Failed to send notification");
45
- return false;
46
- }
47
- }
@@ -1,65 +0,0 @@
1
- import type { AlertResult } from "../lib/types.js";
2
-
3
- const PRIORITY_MAP: Record<string, string> = {
4
- raid_degraded: "P1", smart_failing: "P1", ecc_errors: "P1", psu_redundancy_loss: "P1", ipmi_fan_failure: "P1",
5
- oom_kills: "P2", ram_high: "P2", disk_space_high: "P2", ipmi_sel_critical: "P2", disk_io_errors: "P2", zfs_pool_unhealthy: "P2",
6
- cpu_iowait_high: "P3", nvme_wear_high: "P3", disk_latency_high: "P3", cpu_temperature_high: "P3",
7
- ssh_root_password: "P3", pending_security_updates: "P3", kernel_vulnerabilities: "P3", zfs_scrub_errors: "P3",
8
- swap_active: "P4", no_firewall: "P4", kernel_needs_reboot: "P4", unattended_upgrades_disabled: "P4",
9
- interface_errors: "P4", link_speed_mismatch: "P4", interface_saturation: "P4",
10
- };
11
-
12
- const PRIORITY_LABELS: Record<string, string> = {
13
- P1: "\u{1F534} P1 Urgent", P2: "\u{1F7E0} P2 High", P3: "\u{1F7E1} P3 Medium", P4: "\u{1F535} P4 Low",
14
- };
15
-
16
- function getPriority(alertType: string): string {
17
- return PRIORITY_MAP[alertType] || "P3";
18
- }
19
-
20
- export async function sendTelegram(
21
- botToken: string,
22
- chatId: string,
23
- newAlerts: AlertResult[],
24
- resolvedAlerts: AlertResult[],
25
- serverName: string
26
- ): Promise<boolean> {
27
- const parts: string[] = [];
28
-
29
- if (newAlerts.length > 0) {
30
- // Group by priority
31
- const byPriority: Record<string, AlertResult[]> = {};
32
- for (const a of newAlerts) {
33
- const p = getPriority(a.type);
34
- if (!byPriority[p]) byPriority[p] = [];
35
- byPriority[p].push(a);
36
- }
37
-
38
- for (const p of ["P1", "P2", "P3", "P4"]) {
39
- const alerts = byPriority[p];
40
- if (!alerts?.length) continue;
41
- parts.push(`${PRIORITY_LABELS[p]} on <b>${serverName}</b>:\n`);
42
- for (const a of alerts) parts.push(` \u2022 <b>${a.title}</b>\n ${a.recommendation}\n`);
43
- }
44
- }
45
-
46
- if (resolvedAlerts.length > 0) {
47
- parts.push(`\u2705 <b>${resolvedAlerts.length} resolved</b> on <b>${serverName}</b>:\n`);
48
- for (const a of resolvedAlerts) parts.push(` \u2022 ${a.title}\n`);
49
- }
50
-
51
- if (parts.length === 0) return true;
52
-
53
- try {
54
- const res = await fetch(`https://api.telegram.org/bot${botToken}/sendMessage`, {
55
- method: "POST",
56
- headers: { "Content-Type": "application/json" },
57
- body: JSON.stringify({ chat_id: chatId, text: parts.join("\n"), parse_mode: "HTML", disable_web_page_preview: true }),
58
- signal: AbortSignal.timeout(10000),
59
- });
60
- return res.ok;
61
- } catch {
62
- console.error("[telegram] Failed to send notification");
63
- return false;
64
- }
65
- }