@glassmkr/crucible 0.7.0 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/alerts/__tests__/rules.test.d.ts +1 -0
- package/dist/alerts/__tests__/rules.test.js +325 -0
- package/dist/alerts/__tests__/rules.test.js.map +1 -0
- package/dist/alerts/rules.d.ts +8 -0
- package/dist/alerts/rules.js +139 -32
- package/dist/alerts/rules.js.map +1 -1
- package/dist/api.d.ts +2 -0
- package/dist/api.js +7 -0
- package/dist/api.js.map +1 -0
- package/dist/collect/__tests__/dmi.test.d.ts +1 -0
- package/dist/collect/__tests__/dmi.test.js +114 -0
- package/dist/collect/__tests__/dmi.test.js.map +1 -0
- package/dist/collect/__tests__/ipmi.test.js +47 -1
- package/dist/collect/__tests__/ipmi.test.js.map +1 -1
- package/dist/collect/__tests__/thermal.test.d.ts +1 -0
- package/dist/collect/__tests__/thermal.test.js +164 -0
- package/dist/collect/__tests__/thermal.test.js.map +1 -0
- package/dist/collect/dmi.d.ts +19 -0
- package/dist/collect/dmi.js +109 -0
- package/dist/collect/dmi.js.map +1 -0
- package/dist/collect/ipmi.d.ts +27 -2
- package/dist/collect/ipmi.js +90 -2
- package/dist/collect/ipmi.js.map +1 -1
- package/dist/collect/thermal.d.ts +10 -0
- package/dist/collect/thermal.js +187 -0
- package/dist/collect/thermal.js.map +1 -0
- package/dist/config.d.ts +10 -0
- package/dist/config.js +2 -0
- package/dist/config.js.map +1 -1
- package/dist/index.js +52 -14
- package/dist/index.js.map +1 -1
- package/dist/lib/__tests__/capability.test.d.ts +1 -0
- package/dist/lib/__tests__/capability.test.js +87 -0
- package/dist/lib/__tests__/capability.test.js.map +1 -0
- package/dist/lib/__tests__/vendor-sensors.test.d.ts +1 -0
- package/dist/lib/__tests__/vendor-sensors.test.js +49 -0
- package/dist/lib/__tests__/vendor-sensors.test.js.map +1 -0
- package/dist/lib/capability.d.ts +21 -0
- package/dist/lib/capability.js +110 -0
- package/dist/lib/capability.js.map +1 -0
- package/dist/lib/cpu-thermal-chips.d.ts +2 -0
- package/dist/lib/cpu-thermal-chips.js +28 -0
- package/dist/lib/cpu-thermal-chips.js.map +1 -0
- package/dist/lib/types.d.ts +58 -0
- package/dist/lib/vendor-sensors.d.ts +27 -0
- package/dist/lib/vendor-sensors.js +63 -0
- package/dist/lib/vendor-sensors.js.map +1 -0
- package/dist/lib/version-check.js +1 -1
- package/dist/lib/version-check.js.map +1 -1
- package/dist/lib/version.d.ts +1 -0
- package/dist/lib/version.js +32 -0
- package/dist/lib/version.js.map +1 -0
- package/dist/notify/email.js +2 -1
- package/dist/notify/email.js.map +1 -1
- package/dist/notify/slack.js +2 -1
- package/dist/notify/slack.js.map +1 -1
- package/dist/notify/telegram.js +1 -1
- package/dist/notify/telegram.js.map +1 -1
- package/package.json +16 -1
- package/rule-ids.json +29 -0
- package/.dockerignore +0 -13
- package/.github/ISSUE_TEMPLATE/bug_report.md +0 -24
- package/.github/ISSUE_TEMPLATE/no_data.md +0 -26
- package/.github/workflows/docker.yml +0 -53
- package/.github/workflows/publish.yml +0 -25
- package/Dockerfile +0 -59
- package/config/collector.example.yaml +0 -43
- package/docker-compose.yml +0 -26
- package/scripts/sign-release.sh +0 -29
- package/src/__tests__/cli.test.ts +0 -74
- package/src/__tests__/reboot-marker.test.ts +0 -122
- package/src/alerts/evaluator.ts +0 -15
- package/src/alerts/rules.ts +0 -283
- package/src/alerts/state.ts +0 -92
- package/src/cli.ts +0 -112
- package/src/collect/__tests__/ipmi.test.ts +0 -96
- package/src/collect/__tests__/smart.test.ts +0 -68
- package/src/collect/__tests__/system.test.ts +0 -29
- package/src/collect/__tests__/zfs.test.ts +0 -72
- package/src/collect/conntrack.ts +0 -27
- package/src/collect/cpu.ts +0 -92
- package/src/collect/disks.ts +0 -91
- package/src/collect/fd.ts +0 -31
- package/src/collect/io-errors.ts +0 -23
- package/src/collect/io-latency.ts +0 -103
- package/src/collect/ipmi.ts +0 -207
- package/src/collect/memory.ts +0 -30
- package/src/collect/network.ts +0 -193
- package/src/collect/ntp.ts +0 -114
- package/src/collect/os-alerts.ts +0 -43
- package/src/collect/raid.ts +0 -40
- package/src/collect/security.ts +0 -268
- package/src/collect/smart.ts +0 -72
- package/src/collect/system.ts +0 -32
- package/src/collect/systemd.ts +0 -33
- package/src/collect/zfs.ts +0 -66
- package/src/config.ts +0 -65
- package/src/index.ts +0 -233
- package/src/lib/__tests__/parse.test.ts +0 -28
- package/src/lib/exec.ts +0 -16
- package/src/lib/parse.ts +0 -29
- package/src/lib/reboot-marker.ts +0 -88
- package/src/lib/types.ts +0 -226
- package/src/lib/version-check.ts +0 -38
- package/src/metrics-server.ts +0 -123
- package/src/notify/email.ts +0 -68
- package/src/notify/slack.ts +0 -46
- package/src/notify/telegram.ts +0 -65
- package/src/push/forge.ts +0 -109
- package/tsconfig.json +0 -15
- package/vitest.config.ts +0 -12
package/src/lib/types.ts
DELETED
|
@@ -1,226 +0,0 @@
|
|
|
1
|
-
export interface Snapshot {
|
|
2
|
-
collector_version: string;
|
|
3
|
-
timestamp: string;
|
|
4
|
-
system: SystemInfo;
|
|
5
|
-
cpu: CpuInfo;
|
|
6
|
-
memory: MemoryInfo;
|
|
7
|
-
disks: DiskInfo[];
|
|
8
|
-
smart: SmartInfo[];
|
|
9
|
-
network: NetworkInfo[];
|
|
10
|
-
raid: RaidInfo[];
|
|
11
|
-
ipmi: IpmiInfo;
|
|
12
|
-
os_alerts: OsAlerts;
|
|
13
|
-
security?: SecurityData;
|
|
14
|
-
zfs?: ZfsData;
|
|
15
|
-
io_errors?: { count: number; devices: string[] };
|
|
16
|
-
io_latency?: Array<{ device: string; avg_read_latency_ms: number | null; avg_write_latency_ms: number | null; read_iops: number; write_iops: number }>;
|
|
17
|
-
conntrack?: ConntrackData;
|
|
18
|
-
systemd?: SystemdData;
|
|
19
|
-
ntp?: NtpData;
|
|
20
|
-
file_descriptors?: FileDescriptorData;
|
|
21
|
-
// Planned-reboot flag: set only on the first snapshot after a reboot
|
|
22
|
-
// that was marked with `crucible-agent mark-reboot` / `reboot`. Forge
|
|
23
|
-
// reads this to suppress the `unexpected_reboot` rule. Single-use:
|
|
24
|
-
// subsequent snapshots don't carry it.
|
|
25
|
-
expected_reboot?: boolean;
|
|
26
|
-
expected_reboot_reason?: string;
|
|
27
|
-
}
|
|
28
|
-
|
|
29
|
-
export interface ConntrackData {
|
|
30
|
-
available: boolean;
|
|
31
|
-
count: number;
|
|
32
|
-
max: number;
|
|
33
|
-
percent: number;
|
|
34
|
-
}
|
|
35
|
-
|
|
36
|
-
export interface SystemdData {
|
|
37
|
-
failed_units: string[];
|
|
38
|
-
failed_count: number;
|
|
39
|
-
}
|
|
40
|
-
|
|
41
|
-
export interface NtpData {
|
|
42
|
-
synced: boolean;
|
|
43
|
-
offset_seconds: number;
|
|
44
|
-
source: string;
|
|
45
|
-
daemon_running: boolean;
|
|
46
|
-
}
|
|
47
|
-
|
|
48
|
-
export interface FileDescriptorData {
|
|
49
|
-
allocated: number;
|
|
50
|
-
free: number;
|
|
51
|
-
max: number;
|
|
52
|
-
percent: number;
|
|
53
|
-
}
|
|
54
|
-
|
|
55
|
-
export interface ZfsPool {
|
|
56
|
-
name: string;
|
|
57
|
-
state: string;
|
|
58
|
-
errors_text: string;
|
|
59
|
-
scrub_errors?: number;
|
|
60
|
-
scrub_repaired?: string;
|
|
61
|
-
last_scrub_date?: string;
|
|
62
|
-
scrub_never_run?: boolean;
|
|
63
|
-
}
|
|
64
|
-
|
|
65
|
-
export interface ZfsData {
|
|
66
|
-
pools: ZfsPool[];
|
|
67
|
-
}
|
|
68
|
-
|
|
69
|
-
export interface SecurityData {
|
|
70
|
-
ssh: { permitRootLogin: string; passwordAuthentication: string; rootPasswordExposed: boolean } | null;
|
|
71
|
-
firewall: { active: boolean; source: string; details: string };
|
|
72
|
-
pending_updates: { distro: string; pendingCount: number; available: boolean } | null;
|
|
73
|
-
kernel_vulns: Array<{ name: string; status: string; mitigated: boolean }>;
|
|
74
|
-
kernel_reboot: { running: string; installed: string; needsReboot: boolean } | null;
|
|
75
|
-
auto_updates: { configured: boolean; mechanism: string; details: string };
|
|
76
|
-
}
|
|
77
|
-
|
|
78
|
-
export interface SystemInfo {
|
|
79
|
-
hostname: string;
|
|
80
|
-
ip: string;
|
|
81
|
-
os: string;
|
|
82
|
-
/** `ID=` from /etc/os-release, lowercased. e.g. "ubuntu", "debian", "rocky", "arch", "alpine". */
|
|
83
|
-
os_id?: string;
|
|
84
|
-
/** `ID_LIKE=` from /etc/os-release, lowercased, space-separated. Used by Forge
|
|
85
|
-
* to pick distro-family-specific fix command variants. e.g. on Rocky this
|
|
86
|
-
* is "rhel centos fedora"; on Ubuntu it is "debian". */
|
|
87
|
-
os_id_like?: string;
|
|
88
|
-
kernel: string;
|
|
89
|
-
uptime_seconds: number;
|
|
90
|
-
}
|
|
91
|
-
|
|
92
|
-
export interface CpuCoreInfo {
|
|
93
|
-
core: number;
|
|
94
|
-
user_percent: number;
|
|
95
|
-
system_percent: number;
|
|
96
|
-
iowait_percent: number;
|
|
97
|
-
idle_percent: number;
|
|
98
|
-
irq_percent: number;
|
|
99
|
-
softirq_percent: number;
|
|
100
|
-
}
|
|
101
|
-
|
|
102
|
-
export interface CpuInfo {
|
|
103
|
-
user_percent: number;
|
|
104
|
-
system_percent: number;
|
|
105
|
-
iowait_percent: number;
|
|
106
|
-
idle_percent: number;
|
|
107
|
-
load_1m: number;
|
|
108
|
-
load_5m: number;
|
|
109
|
-
load_15m: number;
|
|
110
|
-
cores?: CpuCoreInfo[];
|
|
111
|
-
}
|
|
112
|
-
|
|
113
|
-
export interface MemoryInfo {
|
|
114
|
-
total_mb: number;
|
|
115
|
-
used_mb: number;
|
|
116
|
-
available_mb: number;
|
|
117
|
-
swap_total_mb: number;
|
|
118
|
-
swap_used_mb: number;
|
|
119
|
-
}
|
|
120
|
-
|
|
121
|
-
export interface DiskInfo {
|
|
122
|
-
device: string;
|
|
123
|
-
mount: string;
|
|
124
|
-
total_gb: number;
|
|
125
|
-
used_gb: number;
|
|
126
|
-
available_gb: number;
|
|
127
|
-
percent_used: number;
|
|
128
|
-
fstype?: string;
|
|
129
|
-
options?: string;
|
|
130
|
-
inodes_total?: number;
|
|
131
|
-
inodes_used?: number;
|
|
132
|
-
inodes_free?: number;
|
|
133
|
-
io_read_mb_s?: number;
|
|
134
|
-
io_write_mb_s?: number;
|
|
135
|
-
latency_p99_ms?: number;
|
|
136
|
-
}
|
|
137
|
-
|
|
138
|
-
export interface SmartInfo {
|
|
139
|
-
device: string;
|
|
140
|
-
model: string;
|
|
141
|
-
health: string;
|
|
142
|
-
temperature_c?: number;
|
|
143
|
-
percentage_used?: number;
|
|
144
|
-
reallocated_sectors?: number;
|
|
145
|
-
pending_sectors?: number;
|
|
146
|
-
power_on_hours?: number;
|
|
147
|
-
}
|
|
148
|
-
|
|
149
|
-
export interface NetworkInfo {
|
|
150
|
-
interface: string;
|
|
151
|
-
speed_mbps: number;
|
|
152
|
-
rx_bytes_sec: number;
|
|
153
|
-
tx_bytes_sec: number;
|
|
154
|
-
/** Delta over the collection interval (rx_errors + any subtype counter). */
|
|
155
|
-
rx_errors: number;
|
|
156
|
-
tx_errors: number;
|
|
157
|
-
rx_drops: number;
|
|
158
|
-
tx_drops: number;
|
|
159
|
-
/** Delta over the collection interval. Null if counter not available on this NIC. */
|
|
160
|
-
rx_packets?: number;
|
|
161
|
-
tx_packets?: number;
|
|
162
|
-
/** Fine-grained RX hardware-error subtypes (deltas). Null if unavailable. */
|
|
163
|
-
rx_crc_errors?: number;
|
|
164
|
-
rx_frame_errors?: number;
|
|
165
|
-
rx_length_errors?: number;
|
|
166
|
-
/** TX physical-layer fault counter (delta). Null if unavailable. */
|
|
167
|
-
tx_carrier_errors?: number;
|
|
168
|
-
operstate?: string; // "up", "down", "unknown", etc. from /sys/class/net/{iface}/operstate
|
|
169
|
-
bond_master?: string; // if this interface is a bond slave, the bond name
|
|
170
|
-
is_bond_master?: boolean; // true when this entry represents the bond aggregate
|
|
171
|
-
}
|
|
172
|
-
|
|
173
|
-
export interface RaidInfo {
|
|
174
|
-
device: string;
|
|
175
|
-
level: string;
|
|
176
|
-
status: string;
|
|
177
|
-
degraded: boolean;
|
|
178
|
-
disks: string[];
|
|
179
|
-
failed_disks: string[];
|
|
180
|
-
}
|
|
181
|
-
|
|
182
|
-
export interface SelEvent {
|
|
183
|
-
id: number;
|
|
184
|
-
timestamp: string;
|
|
185
|
-
sensor: string;
|
|
186
|
-
sensor_type: string;
|
|
187
|
-
event: string;
|
|
188
|
-
direction: string;
|
|
189
|
-
severity: string;
|
|
190
|
-
}
|
|
191
|
-
|
|
192
|
-
export interface FanStatus {
|
|
193
|
-
name: string;
|
|
194
|
-
rpm: number;
|
|
195
|
-
status: string;
|
|
196
|
-
}
|
|
197
|
-
|
|
198
|
-
export interface IpmiInfo {
|
|
199
|
-
available: boolean;
|
|
200
|
-
sensors: Array<{
|
|
201
|
-
name: string;
|
|
202
|
-
value: number | string;
|
|
203
|
-
unit: string;
|
|
204
|
-
status: string;
|
|
205
|
-
upper_critical?: number;
|
|
206
|
-
}>;
|
|
207
|
-
ecc_errors: { correctable: number; uncorrectable: number };
|
|
208
|
-
sel_entries_count: number;
|
|
209
|
-
sel_events_recent: SelEvent[];
|
|
210
|
-
fans: FanStatus[];
|
|
211
|
-
}
|
|
212
|
-
|
|
213
|
-
export interface OsAlerts {
|
|
214
|
-
oom_kills_recent: number;
|
|
215
|
-
zombie_processes: number;
|
|
216
|
-
time_drift_ms: number;
|
|
217
|
-
}
|
|
218
|
-
|
|
219
|
-
export interface AlertResult {
|
|
220
|
-
type: string;
|
|
221
|
-
severity: "critical" | "warning";
|
|
222
|
-
title: string;
|
|
223
|
-
message: string;
|
|
224
|
-
evidence: Record<string, unknown>;
|
|
225
|
-
recommendation: string;
|
|
226
|
-
}
|
package/src/lib/version-check.ts
DELETED
|
@@ -1,38 +0,0 @@
|
|
|
1
|
-
const CURRENT_VERSION = "0.1.0";
|
|
2
|
-
let lastCheckTime = 0;
|
|
3
|
-
let lastResult: { updateAvailable: boolean; latest: string; changelog: string } | null = null;
|
|
4
|
-
const CHECK_INTERVAL = 6 * 60 * 60 * 1000; // check every 6 hours
|
|
5
|
-
|
|
6
|
-
export function getCurrentVersion(): string {
|
|
7
|
-
return CURRENT_VERSION;
|
|
8
|
-
}
|
|
9
|
-
|
|
10
|
-
export async function checkForUpdates(forgeUrl?: string): Promise<void> {
|
|
11
|
-
const now = Date.now();
|
|
12
|
-
if (now - lastCheckTime < CHECK_INTERVAL) return;
|
|
13
|
-
lastCheckTime = now;
|
|
14
|
-
|
|
15
|
-
const url = forgeUrl || "https://forge.glassmkr.com";
|
|
16
|
-
try {
|
|
17
|
-
const res = await fetch(`${url}/api/v1/version`, { signal: AbortSignal.timeout(5000) });
|
|
18
|
-
if (!res.ok) return;
|
|
19
|
-
const data = await res.json() as { crucible?: { latest?: string; min_supported?: string; changelog_url?: string } };
|
|
20
|
-
const latest = data.crucible?.latest;
|
|
21
|
-
if (!latest) return;
|
|
22
|
-
|
|
23
|
-
if (latest !== CURRENT_VERSION) {
|
|
24
|
-
console.log(`[update] New Crucible version available: ${latest} (current: ${CURRENT_VERSION})`);
|
|
25
|
-
console.log(`[update] Changelog: ${data.crucible?.changelog_url || "https://github.com/glassmkr/crucible/releases"}`);
|
|
26
|
-
console.log(`[update] Run: npm update -g @glassmkr/crucible && sudo systemctl restart glassmkr-crucible`);
|
|
27
|
-
lastResult = { updateAvailable: true, latest, changelog: data.crucible?.changelog_url || "" };
|
|
28
|
-
} else {
|
|
29
|
-
lastResult = { updateAvailable: false, latest, changelog: "" };
|
|
30
|
-
}
|
|
31
|
-
} catch {
|
|
32
|
-
// Version check is non-critical, fail silently
|
|
33
|
-
}
|
|
34
|
-
}
|
|
35
|
-
|
|
36
|
-
export function getUpdateStatus() {
|
|
37
|
-
return lastResult;
|
|
38
|
-
}
|
package/src/metrics-server.ts
DELETED
|
@@ -1,123 +0,0 @@
|
|
|
1
|
-
import { createServer } from "http";
|
|
2
|
-
import type { Snapshot } from "./lib/types.js";
|
|
3
|
-
|
|
4
|
-
let latestSnapshot: Snapshot | null = null;
|
|
5
|
-
|
|
6
|
-
export function updateMetrics(snapshot: Snapshot) {
|
|
7
|
-
latestSnapshot = snapshot;
|
|
8
|
-
}
|
|
9
|
-
|
|
10
|
-
export function startMetricsServer(port: number) {
|
|
11
|
-
const server = createServer((req, res) => {
|
|
12
|
-
if (req.url === "/metrics" && req.method === "GET") {
|
|
13
|
-
if (!latestSnapshot) {
|
|
14
|
-
res.writeHead(503);
|
|
15
|
-
res.end("# No data collected yet\n");
|
|
16
|
-
return;
|
|
17
|
-
}
|
|
18
|
-
res.writeHead(200, { "Content-Type": "text/plain; version=0.0.4" });
|
|
19
|
-
res.end(formatPrometheus(latestSnapshot));
|
|
20
|
-
} else if (req.url === "/health") {
|
|
21
|
-
res.writeHead(200);
|
|
22
|
-
res.end("ok\n");
|
|
23
|
-
} else {
|
|
24
|
-
res.writeHead(404);
|
|
25
|
-
res.end("Not found\n");
|
|
26
|
-
}
|
|
27
|
-
});
|
|
28
|
-
|
|
29
|
-
server.listen(port, "0.0.0.0", () => {
|
|
30
|
-
console.log(`[metrics] Prometheus endpoint listening on :${port}/metrics`);
|
|
31
|
-
});
|
|
32
|
-
}
|
|
33
|
-
|
|
34
|
-
function formatPrometheus(snap: Snapshot): string {
|
|
35
|
-
const lines: string[] = [];
|
|
36
|
-
|
|
37
|
-
// CPU
|
|
38
|
-
lines.push("# HELP glassmkr_cpu_user_percent CPU user utilization");
|
|
39
|
-
lines.push("# TYPE glassmkr_cpu_user_percent gauge");
|
|
40
|
-
lines.push(`glassmkr_cpu_user_percent ${snap.cpu.user_percent}`);
|
|
41
|
-
lines.push(`glassmkr_cpu_system_percent ${snap.cpu.system_percent}`);
|
|
42
|
-
lines.push(`glassmkr_cpu_iowait_percent ${snap.cpu.iowait_percent}`);
|
|
43
|
-
lines.push(`glassmkr_cpu_idle_percent ${snap.cpu.idle_percent}`);
|
|
44
|
-
lines.push(`glassmkr_load_1m ${snap.cpu.load_1m}`);
|
|
45
|
-
lines.push(`glassmkr_load_5m ${snap.cpu.load_5m}`);
|
|
46
|
-
lines.push(`glassmkr_load_15m ${snap.cpu.load_15m}`);
|
|
47
|
-
|
|
48
|
-
// Memory
|
|
49
|
-
lines.push("# HELP glassmkr_memory_used_mb Memory used in MB");
|
|
50
|
-
lines.push("# TYPE glassmkr_memory_used_mb gauge");
|
|
51
|
-
lines.push(`glassmkr_memory_used_mb ${snap.memory.used_mb}`);
|
|
52
|
-
lines.push(`glassmkr_memory_total_mb ${snap.memory.total_mb}`);
|
|
53
|
-
lines.push(`glassmkr_memory_available_mb ${snap.memory.available_mb}`);
|
|
54
|
-
lines.push(`glassmkr_swap_used_mb ${snap.memory.swap_used_mb}`);
|
|
55
|
-
|
|
56
|
-
// Disks
|
|
57
|
-
lines.push("# HELP glassmkr_disk_used_percent Disk usage percentage");
|
|
58
|
-
lines.push("# TYPE glassmkr_disk_used_percent gauge");
|
|
59
|
-
for (const disk of snap.disks) {
|
|
60
|
-
const labels = `mount="${disk.mount}",device="${disk.device}"`;
|
|
61
|
-
lines.push(`glassmkr_disk_used_percent{${labels}} ${disk.percent_used}`);
|
|
62
|
-
lines.push(`glassmkr_disk_total_gb{${labels}} ${disk.total_gb}`);
|
|
63
|
-
lines.push(`glassmkr_disk_used_gb{${labels}} ${disk.used_gb}`);
|
|
64
|
-
}
|
|
65
|
-
|
|
66
|
-
// Network
|
|
67
|
-
lines.push("# HELP glassmkr_net_rx_bytes_sec Network receive bytes per second");
|
|
68
|
-
lines.push("# TYPE glassmkr_net_rx_bytes_sec gauge");
|
|
69
|
-
for (const iface of snap.network) {
|
|
70
|
-
const labels = `interface="${iface.interface}"`;
|
|
71
|
-
lines.push(`glassmkr_net_rx_bytes_sec{${labels}} ${iface.rx_bytes_sec}`);
|
|
72
|
-
lines.push(`glassmkr_net_tx_bytes_sec{${labels}} ${iface.tx_bytes_sec}`);
|
|
73
|
-
lines.push(`glassmkr_net_rx_errors{${labels}} ${iface.rx_errors}`);
|
|
74
|
-
lines.push(`glassmkr_net_tx_errors{${labels}} ${iface.tx_errors}`);
|
|
75
|
-
lines.push(`glassmkr_net_speed_mbps{${labels}} ${iface.speed_mbps}`);
|
|
76
|
-
}
|
|
77
|
-
|
|
78
|
-
// SMART
|
|
79
|
-
for (const drive of snap.smart) {
|
|
80
|
-
const labels = `device="${drive.device}",model="${drive.model}"`;
|
|
81
|
-
if (drive.temperature_c != null) lines.push(`glassmkr_smart_temperature_c{${labels}} ${drive.temperature_c}`);
|
|
82
|
-
if (drive.percentage_used != null) lines.push(`glassmkr_smart_percentage_used{${labels}} ${drive.percentage_used}`);
|
|
83
|
-
if (drive.reallocated_sectors != null) lines.push(`glassmkr_smart_reallocated_sectors{${labels}} ${drive.reallocated_sectors}`);
|
|
84
|
-
}
|
|
85
|
-
|
|
86
|
-
// IPMI
|
|
87
|
-
if (snap.ipmi?.available) {
|
|
88
|
-
for (const sensor of snap.ipmi.sensors) {
|
|
89
|
-
if (typeof sensor.value === "number") {
|
|
90
|
-
const sensorName = sensor.name.replace(/[^a-zA-Z0-9_]/g, "_").toLowerCase();
|
|
91
|
-
lines.push(`glassmkr_ipmi_sensor{sensor="${sensor.name}",unit="${sensor.unit}"} ${sensor.value}`);
|
|
92
|
-
}
|
|
93
|
-
}
|
|
94
|
-
lines.push(`glassmkr_ipmi_ecc_correctable ${snap.ipmi.ecc_errors.correctable}`);
|
|
95
|
-
lines.push(`glassmkr_ipmi_ecc_uncorrectable ${snap.ipmi.ecc_errors.uncorrectable}`);
|
|
96
|
-
|
|
97
|
-
// Fans
|
|
98
|
-
if (snap.ipmi.fans) {
|
|
99
|
-
for (const fan of snap.ipmi.fans) {
|
|
100
|
-
lines.push(`glassmkr_ipmi_fan_rpm{fan="${fan.name}",status="${fan.status}"} ${fan.rpm}`);
|
|
101
|
-
}
|
|
102
|
-
}
|
|
103
|
-
}
|
|
104
|
-
|
|
105
|
-
// OS alerts
|
|
106
|
-
lines.push(`glassmkr_oom_kills_recent ${snap.os_alerts.oom_kills_recent}`);
|
|
107
|
-
lines.push(`glassmkr_zombie_processes ${snap.os_alerts.zombie_processes}`);
|
|
108
|
-
|
|
109
|
-
// Security
|
|
110
|
-
if (snap.security) {
|
|
111
|
-
lines.push(`glassmkr_ssh_root_password_exposed ${snap.security.ssh?.rootPasswordExposed ? 1 : 0}`);
|
|
112
|
-
lines.push(`glassmkr_firewall_active ${snap.security.firewall.active ? 1 : 0}`);
|
|
113
|
-
if (snap.security.pending_updates?.available) {
|
|
114
|
-
lines.push(`glassmkr_pending_security_updates ${snap.security.pending_updates.pendingCount}`);
|
|
115
|
-
}
|
|
116
|
-
const unmitigated = snap.security.kernel_vulns.filter(v => !v.mitigated).length;
|
|
117
|
-
lines.push(`glassmkr_kernel_vulns_unmitigated ${unmitigated}`);
|
|
118
|
-
lines.push(`glassmkr_kernel_needs_reboot ${snap.security.kernel_reboot?.needsReboot ? 1 : 0}`);
|
|
119
|
-
lines.push(`glassmkr_auto_updates_configured ${snap.security.auto_updates.configured ? 1 : 0}`);
|
|
120
|
-
}
|
|
121
|
-
|
|
122
|
-
return lines.join("\n") + "\n";
|
|
123
|
-
}
|
package/src/notify/email.ts
DELETED
|
@@ -1,68 +0,0 @@
|
|
|
1
|
-
import { execFile } from "child_process";
|
|
2
|
-
import { promisify } from "util";
|
|
3
|
-
import type { AlertResult } from "../lib/types.js";
|
|
4
|
-
|
|
5
|
-
const execFileAsync = promisify(execFile);
|
|
6
|
-
|
|
7
|
-
export async function sendEmail(
|
|
8
|
-
config: { to: string },
|
|
9
|
-
newAlerts: AlertResult[],
|
|
10
|
-
resolvedAlerts: AlertResult[],
|
|
11
|
-
serverName: string
|
|
12
|
-
): Promise<boolean> {
|
|
13
|
-
if (!config.to) return false;
|
|
14
|
-
|
|
15
|
-
const subject = buildSubject(newAlerts, resolvedAlerts, serverName);
|
|
16
|
-
const body = buildBody(newAlerts, resolvedAlerts, serverName);
|
|
17
|
-
|
|
18
|
-
const email = [
|
|
19
|
-
`To: ${config.to}`,
|
|
20
|
-
`From: glassmkr-crucible@${serverName}`,
|
|
21
|
-
`Subject: ${subject}`,
|
|
22
|
-
`Content-Type: text/plain; charset=utf-8`,
|
|
23
|
-
"",
|
|
24
|
-
body,
|
|
25
|
-
].join("\n");
|
|
26
|
-
|
|
27
|
-
try {
|
|
28
|
-
const child = execFileAsync("/usr/sbin/sendmail", ["-t"], { timeout: 10000 });
|
|
29
|
-
child.child.stdin?.write(email);
|
|
30
|
-
child.child.stdin?.end();
|
|
31
|
-
await child;
|
|
32
|
-
return true;
|
|
33
|
-
} catch {
|
|
34
|
-
console.error("[email] Failed to send. Is sendmail/postfix/msmtp installed?");
|
|
35
|
-
return false;
|
|
36
|
-
}
|
|
37
|
-
}
|
|
38
|
-
|
|
39
|
-
function buildSubject(newAlerts: AlertResult[], resolvedAlerts: AlertResult[], serverName: string): string {
|
|
40
|
-
if (newAlerts.length > 0) {
|
|
41
|
-
const worst = newAlerts.find((a) => a.severity === "critical") ? "CRITICAL" : "WARNING";
|
|
42
|
-
return `[${worst}] ${serverName}: ${newAlerts.length} alert(s)`;
|
|
43
|
-
}
|
|
44
|
-
return `[RESOLVED] ${serverName}: ${resolvedAlerts.length} alert(s) cleared`;
|
|
45
|
-
}
|
|
46
|
-
|
|
47
|
-
function buildBody(newAlerts: AlertResult[], resolvedAlerts: AlertResult[], serverName: string): string {
|
|
48
|
-
const lines: string[] = [];
|
|
49
|
-
lines.push(`Server: ${serverName}`);
|
|
50
|
-
lines.push(`Time: ${new Date().toISOString()}`);
|
|
51
|
-
lines.push("");
|
|
52
|
-
|
|
53
|
-
for (const a of newAlerts) {
|
|
54
|
-
lines.push(`[${a.severity.toUpperCase()}] ${a.title}`);
|
|
55
|
-
lines.push(a.message);
|
|
56
|
-
lines.push(`Action: ${a.recommendation}`);
|
|
57
|
-
lines.push("");
|
|
58
|
-
}
|
|
59
|
-
|
|
60
|
-
for (const a of resolvedAlerts) {
|
|
61
|
-
lines.push(`[RESOLVED] ${a.title}`);
|
|
62
|
-
lines.push("");
|
|
63
|
-
}
|
|
64
|
-
|
|
65
|
-
lines.push("---");
|
|
66
|
-
lines.push("Glassmkr Collector v0.1.0");
|
|
67
|
-
return lines.join("\n");
|
|
68
|
-
}
|
package/src/notify/slack.ts
DELETED
|
@@ -1,46 +0,0 @@
|
|
|
1
|
-
import type { AlertResult } from "../lib/types.js";
|
|
2
|
-
|
|
3
|
-
export async function sendSlack(
|
|
4
|
-
webhookUrl: string,
|
|
5
|
-
newAlerts: AlertResult[],
|
|
6
|
-
resolvedAlerts: AlertResult[],
|
|
7
|
-
serverName: string
|
|
8
|
-
): Promise<boolean> {
|
|
9
|
-
const blocks: any[] = [];
|
|
10
|
-
|
|
11
|
-
if (newAlerts.length > 0) {
|
|
12
|
-
const criticals = newAlerts.filter((a) => a.severity === "critical");
|
|
13
|
-
const warnings = newAlerts.filter((a) => a.severity === "warning");
|
|
14
|
-
|
|
15
|
-
if (criticals.length > 0) {
|
|
16
|
-
blocks.push({ type: "section", text: { type: "mrkdwn", text: `\u{1F534} *${criticals.length} CRITICAL* on *${serverName}*` } });
|
|
17
|
-
for (const a of criticals) blocks.push({ type: "section", text: { type: "mrkdwn", text: `*${a.title}*\n${a.recommendation}` } });
|
|
18
|
-
}
|
|
19
|
-
if (warnings.length > 0) {
|
|
20
|
-
blocks.push({ type: "section", text: { type: "mrkdwn", text: `\u{1F7E1} *${warnings.length} WARNING* on *${serverName}*` } });
|
|
21
|
-
for (const a of warnings) blocks.push({ type: "section", text: { type: "mrkdwn", text: `*${a.title}*\n${a.recommendation}` } });
|
|
22
|
-
}
|
|
23
|
-
}
|
|
24
|
-
|
|
25
|
-
if (resolvedAlerts.length > 0) {
|
|
26
|
-
blocks.push({ type: "section", text: { type: "mrkdwn", text: `\u2705 *${resolvedAlerts.length} resolved* on *${serverName}*` } });
|
|
27
|
-
}
|
|
28
|
-
|
|
29
|
-
if (blocks.length === 0) return true;
|
|
30
|
-
|
|
31
|
-
blocks.push({ type: "divider" });
|
|
32
|
-
blocks.push({ type: "context", elements: [{ type: "mrkdwn", text: "Glassmkr Collector v0.1.0" }] });
|
|
33
|
-
|
|
34
|
-
try {
|
|
35
|
-
const res = await fetch(webhookUrl, {
|
|
36
|
-
method: "POST",
|
|
37
|
-
headers: { "Content-Type": "application/json" },
|
|
38
|
-
body: JSON.stringify({ blocks }),
|
|
39
|
-
signal: AbortSignal.timeout(10000),
|
|
40
|
-
});
|
|
41
|
-
return res.ok;
|
|
42
|
-
} catch {
|
|
43
|
-
console.error("[slack] Failed to send notification");
|
|
44
|
-
return false;
|
|
45
|
-
}
|
|
46
|
-
}
|
package/src/notify/telegram.ts
DELETED
|
@@ -1,65 +0,0 @@
|
|
|
1
|
-
import type { AlertResult } from "../lib/types.js";
|
|
2
|
-
|
|
3
|
-
const PRIORITY_MAP: Record<string, string> = {
|
|
4
|
-
raid_degraded: "P1", smart_failing: "P1", ecc_errors: "P1", psu_redundancy_loss: "P1", ipmi_fan_failure: "P1",
|
|
5
|
-
oom_kills: "P2", ram_high: "P2", disk_space_high: "P2", ipmi_sel_critical: "P2", disk_io_errors: "P2", zfs_pool_unhealthy: "P2",
|
|
6
|
-
cpu_iowait_high: "P3", nvme_wear_high: "P3", disk_latency_high: "P3", cpu_temperature_high: "P3",
|
|
7
|
-
ssh_root_password: "P3", pending_security_updates: "P3", kernel_vulnerabilities: "P3", zfs_scrub_errors: "P3",
|
|
8
|
-
swap_active: "P4", no_firewall: "P4", kernel_needs_reboot: "P4", unattended_upgrades_disabled: "P4",
|
|
9
|
-
interface_errors: "P4", link_speed_mismatch: "P4", interface_saturation: "P4",
|
|
10
|
-
};
|
|
11
|
-
|
|
12
|
-
const PRIORITY_LABELS: Record<string, string> = {
|
|
13
|
-
P1: "\u{1F534} P1 Urgent", P2: "\u{1F7E0} P2 High", P3: "\u{1F7E1} P3 Medium", P4: "\u{1F535} P4 Low",
|
|
14
|
-
};
|
|
15
|
-
|
|
16
|
-
function getPriority(alertType: string): string {
|
|
17
|
-
return PRIORITY_MAP[alertType] || "P3";
|
|
18
|
-
}
|
|
19
|
-
|
|
20
|
-
export async function sendTelegram(
|
|
21
|
-
botToken: string,
|
|
22
|
-
chatId: string,
|
|
23
|
-
newAlerts: AlertResult[],
|
|
24
|
-
resolvedAlerts: AlertResult[],
|
|
25
|
-
serverName: string
|
|
26
|
-
): Promise<boolean> {
|
|
27
|
-
const parts: string[] = [];
|
|
28
|
-
|
|
29
|
-
if (newAlerts.length > 0) {
|
|
30
|
-
// Group by priority
|
|
31
|
-
const byPriority: Record<string, AlertResult[]> = {};
|
|
32
|
-
for (const a of newAlerts) {
|
|
33
|
-
const p = getPriority(a.type);
|
|
34
|
-
if (!byPriority[p]) byPriority[p] = [];
|
|
35
|
-
byPriority[p].push(a);
|
|
36
|
-
}
|
|
37
|
-
|
|
38
|
-
for (const p of ["P1", "P2", "P3", "P4"]) {
|
|
39
|
-
const alerts = byPriority[p];
|
|
40
|
-
if (!alerts?.length) continue;
|
|
41
|
-
parts.push(`${PRIORITY_LABELS[p]} on <b>${serverName}</b>:\n`);
|
|
42
|
-
for (const a of alerts) parts.push(` \u2022 <b>${a.title}</b>\n ${a.recommendation}\n`);
|
|
43
|
-
}
|
|
44
|
-
}
|
|
45
|
-
|
|
46
|
-
if (resolvedAlerts.length > 0) {
|
|
47
|
-
parts.push(`\u2705 <b>${resolvedAlerts.length} resolved</b> on <b>${serverName}</b>:\n`);
|
|
48
|
-
for (const a of resolvedAlerts) parts.push(` \u2022 ${a.title}\n`);
|
|
49
|
-
}
|
|
50
|
-
|
|
51
|
-
if (parts.length === 0) return true;
|
|
52
|
-
|
|
53
|
-
try {
|
|
54
|
-
const res = await fetch(`https://api.telegram.org/bot${botToken}/sendMessage`, {
|
|
55
|
-
method: "POST",
|
|
56
|
-
headers: { "Content-Type": "application/json" },
|
|
57
|
-
body: JSON.stringify({ chat_id: chatId, text: parts.join("\n"), parse_mode: "HTML", disable_web_page_preview: true }),
|
|
58
|
-
signal: AbortSignal.timeout(10000),
|
|
59
|
-
});
|
|
60
|
-
return res.ok;
|
|
61
|
-
} catch {
|
|
62
|
-
console.error("[telegram] Failed to send notification");
|
|
63
|
-
return false;
|
|
64
|
-
}
|
|
65
|
-
}
|
package/src/push/forge.ts
DELETED
|
@@ -1,109 +0,0 @@
|
|
|
1
|
-
import https from "https";
|
|
2
|
-
import tls from "tls";
|
|
3
|
-
import crypto from "crypto";
|
|
4
|
-
import type { Snapshot } from "../lib/types.js";
|
|
5
|
-
|
|
6
|
-
let agent: https.Agent | undefined;
|
|
7
|
-
|
|
8
|
-
export function initForgeAgent(tlsPin?: string): void {
|
|
9
|
-
if (!tlsPin) {
|
|
10
|
-
agent = undefined; // Use default (Node built-in fetch)
|
|
11
|
-
return;
|
|
12
|
-
}
|
|
13
|
-
|
|
14
|
-
agent = new https.Agent({
|
|
15
|
-
rejectUnauthorized: true,
|
|
16
|
-
checkServerIdentity: (hostname: string, cert: any) => {
|
|
17
|
-
const err = tls.checkServerIdentity(hostname, cert);
|
|
18
|
-
if (err) return err;
|
|
19
|
-
|
|
20
|
-
const pubkey = cert.pubkey;
|
|
21
|
-
if (!pubkey) return new Error("Certificate has no public key");
|
|
22
|
-
|
|
23
|
-
const hash = crypto.createHash("sha256").update(pubkey).digest("base64");
|
|
24
|
-
if (hash !== tlsPin) {
|
|
25
|
-
return new Error(
|
|
26
|
-
`TLS pin mismatch for ${hostname}. ` +
|
|
27
|
-
`Expected: ${tlsPin}, Got: ${hash}. ` +
|
|
28
|
-
`If the server certificate was rotated with a new key, update tls_pin in collector.yaml.`
|
|
29
|
-
);
|
|
30
|
-
}
|
|
31
|
-
|
|
32
|
-
return undefined;
|
|
33
|
-
},
|
|
34
|
-
});
|
|
35
|
-
}
|
|
36
|
-
|
|
37
|
-
export async function pushToForge(url: string, apiKey: string, snapshot: Snapshot): Promise<boolean> {
|
|
38
|
-
// If TLS pinning is enabled, use https.request (fetch doesn't support custom agents)
|
|
39
|
-
if (agent) {
|
|
40
|
-
return pushWithAgent(url, apiKey, snapshot);
|
|
41
|
-
}
|
|
42
|
-
|
|
43
|
-
// Default: use fetch (no pinning)
|
|
44
|
-
try {
|
|
45
|
-
const response = await fetch(`${url}/api/v1/ingest`, {
|
|
46
|
-
method: "POST",
|
|
47
|
-
headers: { Authorization: `Bearer ${apiKey}`, "Content-Type": "application/json" },
|
|
48
|
-
body: JSON.stringify(snapshot),
|
|
49
|
-
signal: AbortSignal.timeout(10000),
|
|
50
|
-
});
|
|
51
|
-
if (response.ok) {
|
|
52
|
-
const data = await response.json() as { active_alerts?: number };
|
|
53
|
-
console.log(`[forge] Push successful. Active alerts: ${data.active_alerts ?? 0}`);
|
|
54
|
-
} else {
|
|
55
|
-
console.error(`[forge] Push failed: ${response.status} ${response.statusText}`);
|
|
56
|
-
}
|
|
57
|
-
return response.ok;
|
|
58
|
-
} catch (err) {
|
|
59
|
-
console.error("[forge] Push failed, will retry next cycle");
|
|
60
|
-
return false;
|
|
61
|
-
}
|
|
62
|
-
}
|
|
63
|
-
|
|
64
|
-
function pushWithAgent(url: string, apiKey: string, snapshot: Snapshot): Promise<boolean> {
|
|
65
|
-
return new Promise((resolve) => {
|
|
66
|
-
const parsed = new URL(`${url}/api/v1/ingest`);
|
|
67
|
-
const body = JSON.stringify(snapshot);
|
|
68
|
-
|
|
69
|
-
const req = https.request({
|
|
70
|
-
hostname: parsed.hostname,
|
|
71
|
-
port: parsed.port ? parseInt(parsed.port) : 443,
|
|
72
|
-
path: parsed.pathname,
|
|
73
|
-
method: "POST",
|
|
74
|
-
agent,
|
|
75
|
-
headers: {
|
|
76
|
-
Authorization: `Bearer ${apiKey}`,
|
|
77
|
-
"Content-Type": "application/json",
|
|
78
|
-
"Content-Length": Buffer.byteLength(body),
|
|
79
|
-
},
|
|
80
|
-
timeout: 10000,
|
|
81
|
-
}, (res) => {
|
|
82
|
-
let data = "";
|
|
83
|
-
res.on("data", (chunk) => data += chunk);
|
|
84
|
-
res.on("end", () => {
|
|
85
|
-
if (res.statusCode && res.statusCode >= 200 && res.statusCode < 300) {
|
|
86
|
-
try {
|
|
87
|
-
const parsed = JSON.parse(data);
|
|
88
|
-
console.log(`[forge] Push successful (pinned). Active alerts: ${parsed.active_alerts ?? 0}`);
|
|
89
|
-
} catch { /* ignore parse errors */ }
|
|
90
|
-
resolve(true);
|
|
91
|
-
} else {
|
|
92
|
-
console.error(`[forge] Push failed (pinned): ${res.statusCode}`);
|
|
93
|
-
resolve(false);
|
|
94
|
-
}
|
|
95
|
-
});
|
|
96
|
-
});
|
|
97
|
-
|
|
98
|
-
req.on("error", (err) => {
|
|
99
|
-
console.error(`[forge] Push failed (pinned): ${err.message}`);
|
|
100
|
-
resolve(false);
|
|
101
|
-
});
|
|
102
|
-
req.on("timeout", () => {
|
|
103
|
-
req.destroy(new Error("Request timed out"));
|
|
104
|
-
resolve(false);
|
|
105
|
-
});
|
|
106
|
-
req.write(body);
|
|
107
|
-
req.end();
|
|
108
|
-
});
|
|
109
|
-
}
|
package/tsconfig.json
DELETED
|
@@ -1,15 +0,0 @@
|
|
|
1
|
-
{
|
|
2
|
-
"compilerOptions": {
|
|
3
|
-
"target": "ES2022",
|
|
4
|
-
"module": "NodeNext",
|
|
5
|
-
"moduleResolution": "NodeNext",
|
|
6
|
-
"strict": true,
|
|
7
|
-
"esModuleInterop": true,
|
|
8
|
-
"skipLibCheck": true,
|
|
9
|
-
"declaration": true,
|
|
10
|
-
"sourceMap": true,
|
|
11
|
-
"outDir": "./dist",
|
|
12
|
-
"rootDir": "./src"
|
|
13
|
-
},
|
|
14
|
-
"include": ["src/**/*"]
|
|
15
|
-
}
|