npm - @glassmkr/crucible - Versions diffs - 0.7.1 → 0.8.1 - Mend

@glassmkr/crucible 0.7.1 → 0.8.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (103) hide show

package/dist/alerts/__tests__/rules.test.d.ts +1 -0
package/dist/alerts/__tests__/rules.test.js +437 -0
package/dist/alerts/__tests__/rules.test.js.map +1 -0
package/dist/alerts/rules.d.ts +8 -0
package/dist/alerts/rules.js +175 -34
package/dist/alerts/rules.js.map +1 -1
package/dist/api.d.ts +2 -0
package/dist/api.js +7 -0
package/dist/api.js.map +1 -0
package/dist/collect/__tests__/dmi.test.d.ts +1 -0
package/dist/collect/__tests__/dmi.test.js +133 -0
package/dist/collect/__tests__/dmi.test.js.map +1 -0
package/dist/collect/__tests__/ipmi.test.js +47 -1
package/dist/collect/__tests__/ipmi.test.js.map +1 -1
package/dist/collect/__tests__/thermal.test.d.ts +1 -0
package/dist/collect/__tests__/thermal.test.js +224 -0
package/dist/collect/__tests__/thermal.test.js.map +1 -0
package/dist/collect/dmi.d.ts +19 -0
package/dist/collect/dmi.js +118 -0
package/dist/collect/dmi.js.map +1 -0
package/dist/collect/ipmi.d.ts +27 -2
package/dist/collect/ipmi.js +90 -2
package/dist/collect/ipmi.js.map +1 -1
package/dist/collect/thermal.d.ts +10 -0
package/dist/collect/thermal.js +232 -0
package/dist/collect/thermal.js.map +1 -0
package/dist/config.d.ts +10 -0
package/dist/config.js +2 -0
package/dist/config.js.map +1 -1
package/dist/index.js +51 -1
package/dist/index.js.map +1 -1
package/dist/lib/__tests__/capability.test.d.ts +1 -0
package/dist/lib/__tests__/capability.test.js +87 -0
package/dist/lib/__tests__/capability.test.js.map +1 -0
package/dist/lib/__tests__/vendor-sensors.test.d.ts +1 -0
package/dist/lib/__tests__/vendor-sensors.test.js +49 -0
package/dist/lib/__tests__/vendor-sensors.test.js.map +1 -0
package/dist/lib/capability.d.ts +21 -0
package/dist/lib/capability.js +110 -0
package/dist/lib/capability.js.map +1 -0
package/dist/lib/cpu-thermal-chips.d.ts +2 -0
package/dist/lib/cpu-thermal-chips.js +28 -0
package/dist/lib/cpu-thermal-chips.js.map +1 -0
package/dist/lib/types.d.ts +58 -0
package/dist/lib/vendor-sensors.d.ts +27 -0
package/dist/lib/vendor-sensors.js +63 -0
package/dist/lib/vendor-sensors.js.map +1 -0
package/dist/notify/telegram.js +1 -1
package/dist/notify/telegram.js.map +1 -1
package/package.json +16 -1
package/rule-ids.json +29 -0
package/.dockerignore +0 -13
package/.github/ISSUE_TEMPLATE/bug_report.md +0 -24
package/.github/ISSUE_TEMPLATE/no_data.md +0 -26
package/.github/workflows/docker.yml +0 -53
package/.github/workflows/publish.yml +0 -25
package/Dockerfile +0 -59
package/config/collector.example.yaml +0 -43
package/docker-compose.yml +0 -26
package/scripts/sign-release.sh +0 -29
package/src/__tests__/cli.test.ts +0 -74
package/src/__tests__/reboot-marker.test.ts +0 -122
package/src/alerts/evaluator.ts +0 -15
package/src/alerts/rules.ts +0 -283
package/src/alerts/state.ts +0 -92
package/src/cli.ts +0 -112
package/src/collect/__tests__/ipmi.test.ts +0 -96
package/src/collect/__tests__/smart.test.ts +0 -68
package/src/collect/__tests__/system.test.ts +0 -29
package/src/collect/__tests__/zfs.test.ts +0 -72
package/src/collect/conntrack.ts +0 -27
package/src/collect/cpu.ts +0 -92
package/src/collect/disks.ts +0 -91
package/src/collect/fd.ts +0 -31
package/src/collect/io-errors.ts +0 -23
package/src/collect/io-latency.ts +0 -103
package/src/collect/ipmi.ts +0 -207
package/src/collect/memory.ts +0 -30
package/src/collect/network.ts +0 -193
package/src/collect/ntp.ts +0 -114
package/src/collect/os-alerts.ts +0 -43
package/src/collect/raid.ts +0 -40
package/src/collect/security.ts +0 -268
package/src/collect/smart.ts +0 -72
package/src/collect/system.ts +0 -32
package/src/collect/systemd.ts +0 -33
package/src/collect/zfs.ts +0 -66
package/src/config.ts +0 -65
package/src/index.ts +0 -221
package/src/lib/__tests__/parse.test.ts +0 -28
package/src/lib/exec.ts +0 -16
package/src/lib/parse.ts +0 -29
package/src/lib/reboot-marker.ts +0 -88
package/src/lib/types.ts +0 -226
package/src/lib/version-check.ts +0 -39
package/src/lib/version.ts +0 -33
package/src/metrics-server.ts +0 -123
package/src/notify/email.ts +0 -69
package/src/notify/slack.ts +0 -47
package/src/notify/telegram.ts +0 -65
package/src/push/forge.ts +0 -109
package/tsconfig.json +0 -15
package/vitest.config.ts +0 -12

package/config/collector.example.yaml DELETED Viewed

@@ -1,43 +0,0 @@
-# Glassmkr Crucible Configuration
-# Copy to /etc/glassmkr/crucible.yaml
-# Server identity
-server_name: "my-server"
-# Collection settings
-collection:
-  interval_seconds: 300        # How often to collect (default 5 minutes)
-  ipmi: true                   # Collect IPMI data (requires ipmitool)
-  smart: true                  # Collect SMART data (requires smartmontools)
-# Forge integration (optional dashboard)
-forge:
-  enabled: false
-  url: "https://forge.glassmkr.com"
-  api_key: ""                  # Get this from forge.glassmkr.com after registering a server
-# Alert thresholds (all optional, sensible defaults used if omitted)
-thresholds:
-  ram_percent: 90              # Alert when RAM usage exceeds this
-  swap_alert: true             # Alert on any swap usage
-  disk_percent: 85             # Alert when any disk exceeds this
-  iowait_percent: 20           # Alert when CPU iowait exceeds this
-  nvme_wear_percent: 85        # Alert when NVMe lifetime wear exceeds this
-  disk_latency_nvme_ms: 50     # p99 latency threshold for NVMe
-  disk_latency_hdd_ms: 200     # p99 latency threshold for HDD
-  cpu_temp_warning_c: 80       # CPU temperature warning
-  cpu_temp_critical_c: 90      # CPU temperature critical
-  interface_utilization_percent: 90  # Network saturation threshold
-# Notification channels (all optional)
-channels:
-  telegram:
-    enabled: false
-    bot_token: ""
-    chat_id: ""
-  email:
-    enabled: false
-    to: ""
-  slack:
-    enabled: false
-    webhook_url: ""

package/docker-compose.yml DELETED Viewed

@@ -1,26 +0,0 @@
-# Glassmkr Crucible - docker compose deployment
-#
-# Before starting, create /etc/glassmkr/collector.yaml on the host with your
-# Forge collector key. See https://forge.glassmkr.com/docs/getting-started.
-#
-# Why privileged + host network:
-# - privileged: true gives access to /dev/ipmi0 (IPMI sensors) and raw disk devices (SMART)
-# - network_mode: host lets the agent read the real host network interfaces and bond state
-# - /proc and /sys are mounted so the agent monitors the host, not the container
-services:
-  crucible:
-    image: ghcr.io/glassmkr/crucible:latest
-    container_name: glassmkr-crucible
-    restart: unless-stopped
-    privileged: true
-    network_mode: host
-    volumes:
-      - /etc/glassmkr:/etc/glassmkr:ro
-      - /proc:/host/proc:ro
-      - /sys:/host/sys:ro
-      - /dev:/dev:ro
-      - /run/dbus:/run/dbus:ro
-    environment:
-      - HOST_PROC=/host/proc
-      - HOST_SYS=/host/sys

package/scripts/sign-release.sh DELETED Viewed

@@ -1,29 +0,0 @@
-#!/bin/bash
-# Sign a Crucible release
-# Usage: ./scripts/sign-release.sh <version>
-VERSION=$1
-DIST_DIR="dist"
-if [ -z "$VERSION" ]; then
-  echo "Usage: ./scripts/sign-release.sh <version>"
-  echo "Example: ./scripts/sign-release.sh v0.2.0"
-  exit 1
-fi
-echo "Signing Crucible $VERSION"
-# Generate checksums
-cd "$DIST_DIR" || exit 1
-sha256sum *.tar.gz *.deb 2>/dev/null > SHA256SUMS || sha256sum *.js > SHA256SUMS
-# Sign the checksums file
-gpg --armor --detach-sign --local-user security@glassmkr.com SHA256SUMS
-echo ""
-echo "Release artifacts:"
-ls -la SHA256SUMS SHA256SUMS.asc
-echo ""
-echo "Verify with:"
-echo "  gpg --verify SHA256SUMS.asc SHA256SUMS"
-echo "  sha256sum -c SHA256SUMS"

package/src/__tests__/cli.test.ts DELETED Viewed

@@ -1,74 +0,0 @@
-import { describe, it, expect } from "vitest";
-import { parseCliArgs, helpText, DEFAULT_CONFIG_PATH } from "../cli.js";
-describe("parseCliArgs", () => {
-  it("--version returns version string and mode=version", () => {
-    const { result, output } = parseCliArgs(["--version"], "1.2.3");
-    expect(result.mode).toBe("version");
-    expect(output).toBe("glassmkr-crucible v1.2.3");
-  });
-  it("-v aliases --version", () => {
-    const { result, output } = parseCliArgs(["-v"], "1.2.3");
-    expect(result.mode).toBe("version");
-    expect(output).toBe("glassmkr-crucible v1.2.3");
-  });
-  it("--help returns help text and mode=help", () => {
-    const { result, output } = parseCliArgs(["--help"], "1.2.3");
-    expect(result.mode).toBe("help");
-    expect(output).toContain("glassmkr-crucible v1.2.3");
-    expect(output).toContain("Usage:");
-    expect(output).toContain("--version");
-    expect(output).toContain("--help");
-    expect(output).toContain("--config");
-  });
-  it("-h aliases --help", () => {
-    const { result } = parseCliArgs(["-h"], "1.2.3");
-    expect(result.mode).toBe("help");
-  });
-  it("no args returns mode=run with the default config path", () => {
-    const { result, output } = parseCliArgs([], "1.2.3");
-    expect(result.mode).toBe("run");
-    expect(result.configPath).toBe(DEFAULT_CONFIG_PATH);
-    expect(output).toBeNull();
-  });
-  it("-c accepts a path in the next argument", () => {
-    const { result } = parseCliArgs(["-c", "/tmp/a.yaml"], "1.2.3");
-    expect(result.configPath).toBe("/tmp/a.yaml");
-  });
-  it("--config accepts a path in the next argument", () => {
-    const { result } = parseCliArgs(["--config", "/tmp/b.yaml"], "1.2.3");
-    expect(result.configPath).toBe("/tmp/b.yaml");
-  });
-  it("--config=PATH form works", () => {
-    const { result } = parseCliArgs(["--config=/tmp/c.yaml"], "1.2.3");
-    expect(result.configPath).toBe("/tmp/c.yaml");
-  });
-  it("legacy positional argument still sets config path", () => {
-    const { result } = parseCliArgs(["/tmp/legacy.yaml"], "1.2.3");
-    expect(result.configPath).toBe("/tmp/legacy.yaml");
-  });
-  it("--version wins over a provided config path (no collector start)", () => {
-    const { result } = parseCliArgs(["--config", "/tmp/x.yaml", "--version"], "1.2.3");
-    expect(result.mode).toBe("version");
-  });
-});
-describe("helpText", () => {
-  it("mentions the binary name, default config path, and both flags", () => {
-    const txt = helpText("0.6.1");
-    expect(txt).toContain("glassmkr-crucible v0.6.1");
-    expect(txt).toContain(DEFAULT_CONFIG_PATH);
-    expect(txt).toContain("-v, --version");
-    expect(txt).toContain("-h, --help");
-    expect(txt).toContain("-c, --config");
-  });
-});

package/src/__tests__/reboot-marker.test.ts DELETED Viewed

@@ -1,122 +0,0 @@
-import { describe, it, expect, beforeEach, afterEach } from "vitest";
-import { mkdtempSync, existsSync, writeFileSync, statSync, rmSync, chmodSync } from "node:fs";
-import { tmpdir } from "node:os";
-import { join } from "node:path";
-import {
-  consumeRebootMarker,
-  writeRebootMarker,
-  parseDuration,
-} from "../lib/reboot-marker.js";
-import { parseCliArgs } from "../cli.js";
-let tmpDir: string;
-let path: string;
-beforeEach(() => {
-  tmpDir = mkdtempSync(join(tmpdir(), "crucible-test-"));
-  path = join(tmpDir, "reboot-expected");
-});
-afterEach(() => {
-  try { rmSync(tmpDir, { recursive: true, force: true }); } catch {}
-});
-describe("consumeRebootMarker", () => {
-  it("7. marker present, not expired: returns flag, deletes file", () => {
-    const future = new Date(Date.now() + 5 * 60_000).toISOString();
-    writeFileSync(path, JSON.stringify({ expires_at: future, reason: "kernel update" }));
-    const out = consumeRebootMarker(path);
-    expect(out).toEqual({ expected: true, reason: "kernel update" });
-    expect(existsSync(path)).toBe(false);
-  });
-  it("8. marker present, expired: returns null, deletes file", () => {
-    const past = new Date(Date.now() - 60_000).toISOString();
-    writeFileSync(path, JSON.stringify({ expires_at: past, reason: "stale" }));
-    expect(consumeRebootMarker(path)).toBeNull();
-    expect(existsSync(path)).toBe(false);
-  });
-  it("9. marker absent: returns null, no throw", () => {
-    expect(consumeRebootMarker(path)).toBeNull();
-  });
-  it("15. malformed JSON: returns null, file deleted, no crash", () => {
-    writeFileSync(path, "{not json at all");
-    expect(consumeRebootMarker(path)).toBeNull();
-    expect(existsSync(path)).toBe(false);
-  });
-  it("invalid expires_at (missing): returns null, file deleted", () => {
-    writeFileSync(path, JSON.stringify({ reason: "oops" }));
-    expect(consumeRebootMarker(path)).toBeNull();
-    expect(existsSync(path)).toBe(false);
-  });
-  it("consumed marker cannot be re-read (single-use)", () => {
-    const future = new Date(Date.now() + 60_000).toISOString();
-    writeFileSync(path, JSON.stringify({ expires_at: future }));
-    expect(consumeRebootMarker(path)).not.toBeNull();
-    expect(consumeRebootMarker(path)).toBeNull();
-  });
-});
-describe("writeRebootMarker", () => {
-  it("13. writes file at given path with correct TTL and reason, 0600 mode", () => {
-    const now = new Date("2026-04-21T22:00:00Z");
-    const res = writeRebootMarker({ path, reason: "kernel update", ttlMs: 10 * 60_000, now });
-    expect(res.path).toBe(path);
-    expect(res.expires_at).toBe("2026-04-21T22:10:00.000Z");
-    expect(existsSync(path)).toBe(true);
-    const mode = statSync(path).mode & 0o777;
-    expect(mode).toBe(0o600);
-    const round = consumeRebootMarker(path, new Date("2026-04-21T22:05:00Z"));
-    expect(round).toEqual({ expected: true, reason: "kernel update" });
-  });
-  it("default TTL is 10 minutes", () => {
-    const now = new Date("2026-04-21T22:00:00Z");
-    const res = writeRebootMarker({ path, now });
-    expect(res.expires_at).toBe("2026-04-21T22:10:00.000Z");
-  });
-});
-describe("parseDuration", () => {
-  it.each([
-    ["10m", 600_000],
-    ["2h", 7_200_000],
-    ["600s", 600_000],
-    ["500ms", 500],
-    ["30", 30_000], // bare number -> seconds
-  ])("%s -> %d ms", (input, ms) => {
-    expect(parseDuration(input)).toBe(ms);
-  });
-  it("rejects garbage", () => {
-    expect(parseDuration("forever")).toBeNull();
-    expect(parseDuration("-5m")).toBeNull();
-    expect(parseDuration("")).toBeNull();
-  });
-});
-describe("CLI parseCliArgs subcommands", () => {
-  it("14. `reboot` subcommand captured with flags", () => {
-    const { result } = parseCliArgs(["reboot", "--reason", "kernel update"], "1.0.0");
-    expect(result.mode).toBe("reboot");
-    expect(result.reason).toBe("kernel update");
-  });
-  it("`mark-reboot` with --ttl parsed through", () => {
-    const { result } = parseCliArgs(["mark-reboot", "--ttl=5m", "--reason=test"], "1.0.0");
-    expect(result.mode).toBe("mark-reboot");
-    expect(result.ttl).toBe("5m");
-    expect(result.reason).toBe("test");
-  });
-  it("`mark-reboot --help` returns help output without running", () => {
-    const { result, output } = parseCliArgs(["mark-reboot", "--help"], "1.0.0");
-    expect(result.mode).toBe("help");
-    expect(output).toContain("mark-reboot");
-  });
-  it("top-level help lists the new subcommands", () => {
-    const { output } = parseCliArgs(["--help"], "1.0.0");
-    expect(output).toMatch(/mark-reboot/);
-    expect(output).toMatch(/reboot/);
-  });
-});

package/src/alerts/evaluator.ts DELETED Viewed

@@ -1,15 +0,0 @@
-import { allRules } from "./rules.js";
-import type { Snapshot, AlertResult } from "../lib/types.js";
-import type { Config } from "../config.js";
-export function evaluateAlerts(snapshot: Snapshot, thresholds: Config["thresholds"]): AlertResult[] {
-  const results: AlertResult[] = [];
-  for (const rule of allRules) {
-    try {
-      results.push(...rule.evaluate(snapshot, thresholds));
-    } catch (err) {
-      console.error(`[alerts] Rule ${rule.type} error:`, err);
-    }
-  }
-  return results;
-}

package/src/alerts/rules.ts DELETED Viewed

@@ -1,283 +0,0 @@
-// Alert rules for the collector are identical to the Forge evaluator.
-// Re-export from a shared definition to avoid duplication.
-// For the collector, we use the same 15 rules but with local thresholds from config.
-import type { Snapshot, AlertResult } from "../lib/types.js";
-import type { Config } from "../config.js";
-export interface AlertRule {
-  type: string;
-  evaluate(snap: Snapshot, thresholds: Config["thresholds"]): AlertResult[];
-}
-export const allRules: AlertRule[] = [
-  // 1. RAM high
-  { type: "ram_high", evaluate(snap, t) {
-    if (!snap.memory?.total_mb) return [];
-    const pct = (snap.memory.used_mb / snap.memory.total_mb) * 100;
-    if (pct < (t.ram_percent ?? 90)) return [];
-    return [{ type: "ram_high", severity: pct >= 95 ? "critical" : "warning",
-      title: `RAM usage at ${pct.toFixed(1)}%`,
-      message: `Using ${snap.memory.used_mb}MB of ${snap.memory.total_mb}MB. ${snap.memory.available_mb}MB available.`,
-      evidence: { used_mb: snap.memory.used_mb, total_mb: snap.memory.total_mb, percent: Math.round(pct * 10) / 10 },
-      recommendation: "Check: ps aux --sort=-rss | head -20" }];
-  }},
-  // 2. Swap active
-  { type: "swap_active", evaluate(snap, t) {
-    if (t.swap_alert === false || !snap.memory || snap.memory.swap_used_mb <= 0) return [];
-    return [{ type: "swap_active", severity: "warning", title: `Swap in use: ${snap.memory.swap_used_mb}MB`,
-      message: "Server is using swap space, indicating memory pressure.",
-      evidence: { swap_used_mb: snap.memory.swap_used_mb },
-      recommendation: "Check: free -h && ps aux --sort=-rss | head -20" }];
-  }},
-  // 3. Disk space high
-  { type: "disk_space_high", evaluate(snap, t) {
-    if (!snap.disks) return [];
-    const threshold = t.disk_percent ?? 85;
-    return snap.disks.filter(d => d.percent_used >= threshold).map(d => ({
-      type: "disk_space_high", severity: d.percent_used >= 95 ? "critical" as const : "warning" as const,
-      title: `Disk ${d.mount} at ${d.percent_used}%`,
-      message: `${d.device}: ${d.used_gb}GB of ${d.total_gb}GB used. ${d.available_gb}GB available.`,
-      evidence: { device: d.device, mount: d.mount, percent_used: d.percent_used },
-      recommendation: "Check: du -sh /* | sort -rh | head -20" }));
-  }},
-  // 4. CPU iowait
-  { type: "cpu_iowait_high", evaluate(snap, t) {
-    if (!snap.cpu || snap.cpu.iowait_percent < (t.iowait_percent ?? 20)) return [];
-    return [{ type: "cpu_iowait_high", severity: "warning", title: `CPU iowait at ${snap.cpu.iowait_percent.toFixed(1)}%`,
-      message: `High I/O wait: CPU spending ${snap.cpu.iowait_percent.toFixed(1)}% waiting for disk.`,
-      evidence: { iowait_percent: snap.cpu.iowait_percent },
-      recommendation: "Check: iotop -oP or iostat -x 1 5" }];
-  }},
-  // 5. OOM kills
-  { type: "oom_kills", evaluate(snap) {
-    if (!snap.os_alerts || snap.os_alerts.oom_kills_recent <= 0) return [];
-    return [{ type: "oom_kills", severity: "critical", title: `${snap.os_alerts.oom_kills_recent} OOM kill(s)`,
-      message: `Kernel OOM killer terminated ${snap.os_alerts.oom_kills_recent} process(es).`,
-      evidence: { oom_kills_recent: snap.os_alerts.oom_kills_recent },
-      recommendation: "Check: dmesg | grep -i 'out of memory'" }];
-  }},
-  // 6. SMART failing
-  { type: "smart_failing", evaluate(snap) {
-    if (!snap.smart) return [];
-    return snap.smart.filter(d => d.health !== "PASSED" || (d.reallocated_sectors && d.reallocated_sectors > 0) || (d.pending_sectors && d.pending_sectors > 0))
-      .map(d => ({ type: "smart_failing", severity: "critical" as const,
-        title: `SMART failure: ${d.device}`, message: `${d.model}: drive showing signs of failure.`,
-        evidence: { device: d.device, health: d.health, reallocated_sectors: d.reallocated_sectors, pending_sectors: d.pending_sectors },
-        recommendation: `Back up data. Schedule replacement for ${d.device}.` }));
-  }},
-  // 7. NVMe wear
-  { type: "nvme_wear_high", evaluate(snap, t) {
-    if (!snap.smart) return [];
-    const threshold = t.nvme_wear_percent ?? 85;
-    return snap.smart.filter(d => d.percentage_used != null && d.percentage_used >= threshold)
-      .map(d => ({ type: "nvme_wear_high", severity: d.percentage_used! >= 95 ? "critical" as const : "warning" as const,
-        title: `NVMe ${d.device} wear at ${d.percentage_used}%`, message: `${d.model} at ${d.percentage_used}% lifetime wear.`,
-        evidence: { device: d.device, percentage_used: d.percentage_used },
-        recommendation: "Plan drive replacement." }));
-  }},
-  // 8. RAID degraded
-  { type: "raid_degraded", evaluate(snap) {
-    if (!snap.raid) return [];
-    return snap.raid.filter(r => r.degraded || r.failed_disks.length > 0)
-      .map(r => ({ type: "raid_degraded", severity: "critical" as const,
-        title: `RAID ${r.device} degraded`, message: `${r.device} (${r.level}) degraded. Failed: ${r.failed_disks.join(", ") || "unknown"}.`,
-        evidence: { device: r.device, failed_disks: r.failed_disks },
-        recommendation: "Replace failed drive immediately." }));
-  }},
-  // 9. Disk latency
-  { type: "disk_latency_high", evaluate(snap, t) {
-    if (!snap.disks) return [];
-    return snap.disks.filter(d => {
-      if (d.latency_p99_ms == null) return false;
-      const thresh = d.device.includes("nvme") ? (t.disk_latency_nvme_ms ?? 50) : (t.disk_latency_hdd_ms ?? 200);
-      return d.latency_p99_ms >= thresh;
-    }).map(d => ({ type: "disk_latency_high", severity: "warning" as const,
-      title: `Disk ${d.device} latency ${d.latency_p99_ms!.toFixed(1)}ms`,
-      message: `p99 I/O latency on ${d.device} is high.`,
-      evidence: { device: d.device, latency_p99_ms: d.latency_p99_ms },
-      recommendation: "Check: iotop -oP" }));
-  }},
-  // 10. Interface errors
-  { type: "interface_errors", evaluate(snap) {
-    if (!snap.network) return [];
-    return snap.network.filter(i => (i.rx_errors + i.tx_errors + i.rx_drops + i.tx_drops) > 0)
-      .map(i => ({ type: "interface_errors", severity: "warning" as const,
-        title: `${i.interface}: errors/drops detected`,
-        message: `RX errors=${i.rx_errors}, TX errors=${i.tx_errors}, RX drops=${i.rx_drops}, TX drops=${i.tx_drops}.`,
-        evidence: { interface: i.interface, rx_errors: i.rx_errors, tx_errors: i.tx_errors, rx_drops: i.rx_drops, tx_drops: i.tx_drops },
-        recommendation: "Check cables and SFP/transceiver." }));
-  }},
-  // 11. Link speed mismatch
-  { type: "link_speed_mismatch", evaluate(snap) {
-    if (!snap.network) return [];
-    return snap.network.filter(i => i.speed_mbps > 0 && i.speed_mbps < 1000)
-      .map(i => ({ type: "link_speed_mismatch", severity: "warning" as const,
-        title: `${i.interface} at ${i.speed_mbps} Mbps`,
-        message: `Interface negotiated below 1 Gbps.`,
-        evidence: { interface: i.interface, speed_mbps: i.speed_mbps },
-        recommendation: "Check cable, SFP, switch port config." }));
-  }},
-  // 12. Interface saturation
-  { type: "interface_saturation", evaluate(snap, t) {
-    if (!snap.network) return [];
-    const threshold = (t.interface_utilization_percent ?? 90) / 100;
-    return snap.network.filter(i => {
-      if (!i.speed_mbps) return false;
-      const maxBps = (i.speed_mbps * 1_000_000) / 8;
-      return Math.max(i.rx_bytes_sec, i.tx_bytes_sec) / maxBps >= threshold;
-    }).map(i => {
-      const maxBps = (i.speed_mbps * 1_000_000) / 8;
-      const util = Math.max(i.rx_bytes_sec, i.tx_bytes_sec) / maxBps * 100;
-      return { type: "interface_saturation", severity: "warning" as const,
-        title: `${i.interface} at ${util.toFixed(0)}% utilization`,
-        message: `Interface ${i.interface} (${i.speed_mbps} Mbps) near saturation.`,
-        evidence: { interface: i.interface, utilization_percent: Math.round(util * 10) / 10 },
-        recommendation: "Check: iftop or nload" };
-    });
-  }},
-  // 13. CPU temperature
-  { type: "cpu_temperature_high", evaluate(snap, t) {
-    if (!snap.ipmi?.available || !snap.ipmi.sensors) return [];
-    const warn = t.cpu_temp_warning_c ?? 80;
-    return snap.ipmi.sensors.filter(s => {
-      const n = s.name.toLowerCase();
-      if (!n.includes("cpu") && !n.includes("temp")) return false;
-      const v = typeof s.value === "number" ? s.value : parseFloat(String(s.value));
-      return !isNaN(v) && v >= warn;
-    }).map(s => {
-      const v = typeof s.value === "number" ? s.value : parseFloat(String(s.value));
-      const crit = s.upper_critical ?? (t.cpu_temp_critical_c ?? 90);
-      return { type: "cpu_temperature_high", severity: v >= crit ? "critical" as const : "warning" as const,
-        title: `${s.name}: ${v}${s.unit}`, message: `Temperature above warning threshold.`,
-        evidence: { sensor: s.name, value: v },
-        recommendation: "Check cooling, fans, airflow." };
-    });
-  }},
-  // 14. ECC errors
-  { type: "ecc_errors", evaluate(snap) {
-    if (!snap.ipmi?.ecc_errors) return [];
-    const { correctable, uncorrectable } = snap.ipmi.ecc_errors;
-    if (correctable <= 0 && uncorrectable <= 0) return [];
-    if (uncorrectable > 0) return [{ type: "ecc_errors", severity: "critical",
-      title: `${uncorrectable} uncorrectable ECC error(s)`, message: "Data corruption possible. DIMM failing.",
-      evidence: { correctable, uncorrectable },
-      recommendation: "Replace DIMM immediately. Run: ipmitool sdr type Memory" }];
-    return [{ type: "ecc_errors", severity: "warning",
-      title: `${correctable} correctable ECC error(s)`, message: "Early warning of DIMM failure.",
-      evidence: { correctable, uncorrectable },
-      recommendation: "Schedule DIMM replacement. Run: ipmitool sdr type Memory" }];
-  }},
-  // 15. PSU redundancy
-  { type: "psu_redundancy_loss", evaluate(snap) {
-    if (!snap.ipmi?.available || !snap.ipmi.sensors) return [];
-    const psus = snap.ipmi.sensors.filter(s => { const n = s.name.toLowerCase(); return n.includes("psu") || n.includes("power supply"); });
-    if (psus.length < 2) return [];
-    const failed = psus.filter(s => { const st = String(s.status).toLowerCase(); const v = String(s.value).toLowerCase();
-      return st.includes("fail") || st.includes("absent") || v.includes("fail") || v.includes("absent"); });
-    if (failed.length === 0) return [];
-    return [{ type: "psu_redundancy_loss", severity: "critical",
-      title: "PSU redundancy lost", message: `${failed.length} PSU(s) failed/absent: ${failed.map(p => p.name).join(", ")}.`,
-      evidence: { failed: failed.map(p => ({ name: p.name, status: p.status })) },
-      recommendation: "Replace failed PSU. Check power connections." }];
-  }},
-  // 19. IPMI SEL critical events
-  { type: "ipmi_sel_critical", evaluate(snap) {
-    if (!snap.ipmi?.available || !snap.ipmi.sel_events_recent?.length) return [];
-    const critical = snap.ipmi.sel_events_recent.filter(e => e.severity === "critical" && e.direction === "Asserted");
-    if (critical.length === 0) return [];
-    const byType: Record<string, typeof critical> = {};
-    for (const e of critical) { if (!byType[e.sensor_type]) byType[e.sensor_type] = []; byType[e.sensor_type].push(e); }
-    const details = Object.entries(byType).map(([t, evts]) => `${t}: ${evts.map(e => `${e.sensor}: ${e.event}`).join(", ")}`).join("; ");
-    const recs: string[] = [];
-    if (byType.memory) recs.push("Memory errors: identify slot with `ipmitool sel elist | grep -i memory`. Schedule DIMM replacement.");
-    if (byType.power) recs.push("PSU event: check physical PSU and connections. Verify redundancy: `ipmitool chassis status`.");
-    if (byType.watchdog) recs.push("Watchdog reset: OS or BMC became unresponsive. Check dmesg for root cause.");
-    if (byType.processor) recs.push("CPU event: check for thermal throttling or MCE. Run `dmesg | grep -i mce`.");
-    if (recs.length === 0) recs.push("Review full SEL: `ipmitool sel elist`.");
-    return [{ type: "ipmi_sel_critical", severity: "critical",
-      title: `IPMI: ${critical.length} critical hardware event(s)`,
-      message: `BMC System Event Log: ${critical.length} critical event(s). ${details}`,
-      evidence: { critical_events: critical, sensor_types: Object.keys(byType) },
-      recommendation: recs.join(" ") }];
-  }},
-  // 20. Fan failure
-  { type: "ipmi_fan_failure", evaluate(snap) {
-    if (!snap.ipmi?.available || !snap.ipmi.fans?.length) return [];
-    const failed = snap.ipmi.fans.filter(f => f.status === "critical" || (f.rpm === 0 && f.status !== "absent"));
-    if (failed.length === 0) return [];
-    const total = snap.ipmi.fans.filter(f => f.status !== "absent").length;
-    const names = failed.map(f => `${f.name} (${f.rpm} RPM)`).join(", ");
-    return [{ type: "ipmi_fan_failure", severity: "critical",
-      title: `Fan failure: ${failed.length} of ${total} fans`,
-      message: `${failed.length} fan(s) stopped or critically slow: ${names}. Reduced cooling capacity.`,
-      evidence: { failed_fans: failed, total_fans: total, all_fans: snap.ipmi.fans.filter(f => f.status !== "absent") },
-      recommendation: "Check physical fans. Monitor temps: `ipmitool sdr type Temperature`. Replace failed fan module." }];
-  }},
-  // === Security (6) ===
-  // 21. SSH root password login
-  { type: "ssh_root_password", evaluate(snap) {
-    if (!snap.security?.ssh?.rootPasswordExposed) return [];
-    return [{ type: "ssh_root_password", severity: "warning",
-      title: "SSH root login with password enabled",
-      message: `PermitRootLogin is "${snap.security.ssh.permitRootLogin}" and PasswordAuthentication is "${snap.security.ssh.passwordAuthentication}". Root can be brute-forced over SSH.`,
-      evidence: { permitRootLogin: snap.security.ssh.permitRootLogin, passwordAuthentication: snap.security.ssh.passwordAuthentication },
-      recommendation: 'Set "PermitRootLogin prohibit-password" in /etc/ssh/sshd_config and restart sshd. Key-based root login still works.' }];
-  }},
-  // 22. No firewall
-  { type: "no_firewall", evaluate(snap) {
-    if (!snap.security || snap.security.firewall.active) return [];
-    return [{ type: "no_firewall", severity: "warning" as const,
-      title: "No firewall active",
-      message: "No active firewall rules detected (checked UFW, firewalld, nftables, iptables). All ports are exposed unless protected by network-level ACLs.",
-      evidence: { source: snap.security.firewall.source },
-      recommendation: 'Enable a firewall: "sudo ufw enable" (Debian/Ubuntu) or "sudo systemctl start firewalld" (RHEL/Rocky).' }];
-  }},
-  // 23. Pending security updates
-  { type: "pending_security_updates", evaluate(snap, t) {
-    if (!snap.security?.pending_updates?.available) return [];
-    const maxPending = 10;
-    if (snap.security.pending_updates.pendingCount <= maxPending) return [];
-    const d = snap.security.pending_updates;
-    return [{ type: "pending_security_updates", severity: "warning",
-      title: `${d.pendingCount} security updates pending`,
-      message: `${d.pendingCount} security updates pending on this ${d.distro} server.`,
-      evidence: { pendingCount: d.pendingCount, distro: d.distro },
-      recommendation: d.distro === "ubuntu" || d.distro === "debian" ? 'Apply with: "sudo apt-get upgrade"' : 'Apply with: "sudo dnf update --security"' }];
-  }},
-  // 24. Kernel vulnerabilities
-  { type: "kernel_vulnerabilities", evaluate(snap) {
-    if (!snap.security?.kernel_vulns?.length) return [];
-    const unmitigated = snap.security.kernel_vulns.filter(v => !v.mitigated);
-    if (unmitigated.length === 0) return [];
-    const details = unmitigated.map(v => `${v.name}: ${v.status}`).join("; ");
-    return [{ type: "kernel_vulnerabilities", severity: "warning",
-      title: `${unmitigated.length} CPU vulnerability mitigations missing`,
-      message: `Unmitigated: ${details}. Update the kernel and CPU microcode to apply mitigations.`,
-      evidence: { unmitigated, total: snap.security.kernel_vulns.length },
-      recommendation: 'Check: "grep . /sys/devices/system/cpu/vulnerabilities/*". Update kernel and microcode packages.' }];
-  }},
-  // 25. Kernel needs reboot
-  { type: "kernel_needs_reboot", evaluate(snap) {
-    if (!snap.security?.kernel_reboot?.needsReboot) return [];
-    const k = snap.security.kernel_reboot;
-    return [{ type: "kernel_needs_reboot", severity: "warning" as const,
-      title: "Reboot required for kernel update",
-      message: `Running kernel: ${k.running}. Installed kernel: ${k.installed}. A reboot is needed to apply the newer kernel.`,
-      evidence: { running: k.running, installed: k.installed },
-      recommendation: "Schedule a reboot to apply the newer kernel. Security patches may not be active until then." }];
-  }},
-  // 26. Unattended upgrades disabled
-  { type: "unattended_upgrades_disabled", evaluate(snap) {
-    if (!snap.security || snap.security.auto_updates.configured) return [];
-    const a = snap.security.auto_updates;
-    const hint = a.mechanism === "unattended-upgrades" ? 'Enable: "sudo dpkg-reconfigure -plow unattended-upgrades"'
-      : a.mechanism === "dnf-automatic" ? 'Enable: "sudo systemctl enable --now dnf-automatic-install.timer"'
-      : 'Install: "sudo apt install unattended-upgrades" (Debian/Ubuntu) or "sudo dnf install dnf-automatic" (RHEL/Rocky)';
-    return [{ type: "unattended_upgrades_disabled", severity: "warning" as const,
-      title: "Automatic security updates not configured",
-      message: `${a.details}. Without automatic updates, security patches must be applied manually.`,
-      evidence: { mechanism: a.mechanism, details: a.details },
-      recommendation: hint }];
-  }},
-];