npm - @glassmkr/crucible - Versions diffs - 0.7.1 → 0.8.0 - Mend

@glassmkr/crucible 0.7.1 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (103) hide show

package/dist/alerts/__tests__/rules.test.d.ts +1 -0
package/dist/alerts/__tests__/rules.test.js +325 -0
package/dist/alerts/__tests__/rules.test.js.map +1 -0
package/dist/alerts/rules.d.ts +8 -0
package/dist/alerts/rules.js +139 -32
package/dist/alerts/rules.js.map +1 -1
package/dist/api.d.ts +2 -0
package/dist/api.js +7 -0
package/dist/api.js.map +1 -0
package/dist/collect/__tests__/dmi.test.d.ts +1 -0
package/dist/collect/__tests__/dmi.test.js +114 -0
package/dist/collect/__tests__/dmi.test.js.map +1 -0
package/dist/collect/__tests__/ipmi.test.js +47 -1
package/dist/collect/__tests__/ipmi.test.js.map +1 -1
package/dist/collect/__tests__/thermal.test.d.ts +1 -0
package/dist/collect/__tests__/thermal.test.js +164 -0
package/dist/collect/__tests__/thermal.test.js.map +1 -0
package/dist/collect/dmi.d.ts +19 -0
package/dist/collect/dmi.js +109 -0
package/dist/collect/dmi.js.map +1 -0
package/dist/collect/ipmi.d.ts +27 -2
package/dist/collect/ipmi.js +90 -2
package/dist/collect/ipmi.js.map +1 -1
package/dist/collect/thermal.d.ts +10 -0
package/dist/collect/thermal.js +187 -0
package/dist/collect/thermal.js.map +1 -0
package/dist/config.d.ts +10 -0
package/dist/config.js +2 -0
package/dist/config.js.map +1 -1
package/dist/index.js +51 -1
package/dist/index.js.map +1 -1
package/dist/lib/__tests__/capability.test.d.ts +1 -0
package/dist/lib/__tests__/capability.test.js +87 -0
package/dist/lib/__tests__/capability.test.js.map +1 -0
package/dist/lib/__tests__/vendor-sensors.test.d.ts +1 -0
package/dist/lib/__tests__/vendor-sensors.test.js +49 -0
package/dist/lib/__tests__/vendor-sensors.test.js.map +1 -0
package/dist/lib/capability.d.ts +21 -0
package/dist/lib/capability.js +110 -0
package/dist/lib/capability.js.map +1 -0
package/dist/lib/cpu-thermal-chips.d.ts +2 -0
package/dist/lib/cpu-thermal-chips.js +28 -0
package/dist/lib/cpu-thermal-chips.js.map +1 -0
package/dist/lib/types.d.ts +58 -0
package/dist/lib/vendor-sensors.d.ts +27 -0
package/dist/lib/vendor-sensors.js +63 -0
package/dist/lib/vendor-sensors.js.map +1 -0
package/dist/notify/telegram.js +1 -1
package/dist/notify/telegram.js.map +1 -1
package/package.json +16 -1
package/rule-ids.json +29 -0
package/.dockerignore +0 -13
package/.github/ISSUE_TEMPLATE/bug_report.md +0 -24
package/.github/ISSUE_TEMPLATE/no_data.md +0 -26
package/.github/workflows/docker.yml +0 -53
package/.github/workflows/publish.yml +0 -25
package/Dockerfile +0 -59
package/config/collector.example.yaml +0 -43
package/docker-compose.yml +0 -26
package/scripts/sign-release.sh +0 -29
package/src/__tests__/cli.test.ts +0 -74
package/src/__tests__/reboot-marker.test.ts +0 -122
package/src/alerts/evaluator.ts +0 -15
package/src/alerts/rules.ts +0 -283
package/src/alerts/state.ts +0 -92
package/src/cli.ts +0 -112
package/src/collect/__tests__/ipmi.test.ts +0 -96
package/src/collect/__tests__/smart.test.ts +0 -68
package/src/collect/__tests__/system.test.ts +0 -29
package/src/collect/__tests__/zfs.test.ts +0 -72
package/src/collect/conntrack.ts +0 -27
package/src/collect/cpu.ts +0 -92
package/src/collect/disks.ts +0 -91
package/src/collect/fd.ts +0 -31
package/src/collect/io-errors.ts +0 -23
package/src/collect/io-latency.ts +0 -103
package/src/collect/ipmi.ts +0 -207
package/src/collect/memory.ts +0 -30
package/src/collect/network.ts +0 -193
package/src/collect/ntp.ts +0 -114
package/src/collect/os-alerts.ts +0 -43
package/src/collect/raid.ts +0 -40
package/src/collect/security.ts +0 -268
package/src/collect/smart.ts +0 -72
package/src/collect/system.ts +0 -32
package/src/collect/systemd.ts +0 -33
package/src/collect/zfs.ts +0 -66
package/src/config.ts +0 -65
package/src/index.ts +0 -221
package/src/lib/__tests__/parse.test.ts +0 -28
package/src/lib/exec.ts +0 -16
package/src/lib/parse.ts +0 -29
package/src/lib/reboot-marker.ts +0 -88
package/src/lib/types.ts +0 -226
package/src/lib/version-check.ts +0 -39
package/src/lib/version.ts +0 -33
package/src/metrics-server.ts +0 -123
package/src/notify/email.ts +0 -69
package/src/notify/slack.ts +0 -47
package/src/notify/telegram.ts +0 -65
package/src/push/forge.ts +0 -109
package/tsconfig.json +0 -15
package/vitest.config.ts +0 -12

package/src/alerts/rules.ts DELETED Viewed

@@ -1,283 +0,0 @@
-// Alert rules for the collector are identical to the Forge evaluator.
-// Re-export from a shared definition to avoid duplication.
-// For the collector, we use the same 15 rules but with local thresholds from config.
-import type { Snapshot, AlertResult } from "../lib/types.js";
-import type { Config } from "../config.js";
-export interface AlertRule {
-  type: string;
-  evaluate(snap: Snapshot, thresholds: Config["thresholds"]): AlertResult[];
-}
-export const allRules: AlertRule[] = [
-  // 1. RAM high
-  { type: "ram_high", evaluate(snap, t) {
-    if (!snap.memory?.total_mb) return [];
-    const pct = (snap.memory.used_mb / snap.memory.total_mb) * 100;
-    if (pct < (t.ram_percent ?? 90)) return [];
-    return [{ type: "ram_high", severity: pct >= 95 ? "critical" : "warning",
-      title: `RAM usage at ${pct.toFixed(1)}%`,
-      message: `Using ${snap.memory.used_mb}MB of ${snap.memory.total_mb}MB. ${snap.memory.available_mb}MB available.`,
-      evidence: { used_mb: snap.memory.used_mb, total_mb: snap.memory.total_mb, percent: Math.round(pct * 10) / 10 },
-      recommendation: "Check: ps aux --sort=-rss | head -20" }];
-  }},
-  // 2. Swap active
-  { type: "swap_active", evaluate(snap, t) {
-    if (t.swap_alert === false || !snap.memory || snap.memory.swap_used_mb <= 0) return [];
-    return [{ type: "swap_active", severity: "warning", title: `Swap in use: ${snap.memory.swap_used_mb}MB`,
-      message: "Server is using swap space, indicating memory pressure.",
-      evidence: { swap_used_mb: snap.memory.swap_used_mb },
-      recommendation: "Check: free -h && ps aux --sort=-rss | head -20" }];
-  }},
-  // 3. Disk space high
-  { type: "disk_space_high", evaluate(snap, t) {
-    if (!snap.disks) return [];
-    const threshold = t.disk_percent ?? 85;
-    return snap.disks.filter(d => d.percent_used >= threshold).map(d => ({
-      type: "disk_space_high", severity: d.percent_used >= 95 ? "critical" as const : "warning" as const,
-      title: `Disk ${d.mount} at ${d.percent_used}%`,
-      message: `${d.device}: ${d.used_gb}GB of ${d.total_gb}GB used. ${d.available_gb}GB available.`,
-      evidence: { device: d.device, mount: d.mount, percent_used: d.percent_used },
-      recommendation: "Check: du -sh /* | sort -rh | head -20" }));
-  }},
-  // 4. CPU iowait
-  { type: "cpu_iowait_high", evaluate(snap, t) {
-    if (!snap.cpu || snap.cpu.iowait_percent < (t.iowait_percent ?? 20)) return [];
-    return [{ type: "cpu_iowait_high", severity: "warning", title: `CPU iowait at ${snap.cpu.iowait_percent.toFixed(1)}%`,
-      message: `High I/O wait: CPU spending ${snap.cpu.iowait_percent.toFixed(1)}% waiting for disk.`,
-      evidence: { iowait_percent: snap.cpu.iowait_percent },
-      recommendation: "Check: iotop -oP or iostat -x 1 5" }];
-  }},
-  // 5. OOM kills
-  { type: "oom_kills", evaluate(snap) {
-    if (!snap.os_alerts || snap.os_alerts.oom_kills_recent <= 0) return [];
-    return [{ type: "oom_kills", severity: "critical", title: `${snap.os_alerts.oom_kills_recent} OOM kill(s)`,
-      message: `Kernel OOM killer terminated ${snap.os_alerts.oom_kills_recent} process(es).`,
-      evidence: { oom_kills_recent: snap.os_alerts.oom_kills_recent },
-      recommendation: "Check: dmesg | grep -i 'out of memory'" }];
-  }},
-  // 6. SMART failing
-  { type: "smart_failing", evaluate(snap) {
-    if (!snap.smart) return [];
-    return snap.smart.filter(d => d.health !== "PASSED" || (d.reallocated_sectors && d.reallocated_sectors > 0) || (d.pending_sectors && d.pending_sectors > 0))
-      .map(d => ({ type: "smart_failing", severity: "critical" as const,
-        title: `SMART failure: ${d.device}`, message: `${d.model}: drive showing signs of failure.`,
-        evidence: { device: d.device, health: d.health, reallocated_sectors: d.reallocated_sectors, pending_sectors: d.pending_sectors },
-        recommendation: `Back up data. Schedule replacement for ${d.device}.` }));
-  }},
-  // 7. NVMe wear
-  { type: "nvme_wear_high", evaluate(snap, t) {
-    if (!snap.smart) return [];
-    const threshold = t.nvme_wear_percent ?? 85;
-    return snap.smart.filter(d => d.percentage_used != null && d.percentage_used >= threshold)
-      .map(d => ({ type: "nvme_wear_high", severity: d.percentage_used! >= 95 ? "critical" as const : "warning" as const,
-        title: `NVMe ${d.device} wear at ${d.percentage_used}%`, message: `${d.model} at ${d.percentage_used}% lifetime wear.`,
-        evidence: { device: d.device, percentage_used: d.percentage_used },
-        recommendation: "Plan drive replacement." }));
-  }},
-  // 8. RAID degraded
-  { type: "raid_degraded", evaluate(snap) {
-    if (!snap.raid) return [];
-    return snap.raid.filter(r => r.degraded || r.failed_disks.length > 0)
-      .map(r => ({ type: "raid_degraded", severity: "critical" as const,
-        title: `RAID ${r.device} degraded`, message: `${r.device} (${r.level}) degraded. Failed: ${r.failed_disks.join(", ") || "unknown"}.`,
-        evidence: { device: r.device, failed_disks: r.failed_disks },
-        recommendation: "Replace failed drive immediately." }));
-  }},
-  // 9. Disk latency
-  { type: "disk_latency_high", evaluate(snap, t) {
-    if (!snap.disks) return [];
-    return snap.disks.filter(d => {
-      if (d.latency_p99_ms == null) return false;
-      const thresh = d.device.includes("nvme") ? (t.disk_latency_nvme_ms ?? 50) : (t.disk_latency_hdd_ms ?? 200);
-      return d.latency_p99_ms >= thresh;
-    }).map(d => ({ type: "disk_latency_high", severity: "warning" as const,
-      title: `Disk ${d.device} latency ${d.latency_p99_ms!.toFixed(1)}ms`,
-      message: `p99 I/O latency on ${d.device} is high.`,
-      evidence: { device: d.device, latency_p99_ms: d.latency_p99_ms },
-      recommendation: "Check: iotop -oP" }));
-  }},
-  // 10. Interface errors
-  { type: "interface_errors", evaluate(snap) {
-    if (!snap.network) return [];
-    return snap.network.filter(i => (i.rx_errors + i.tx_errors + i.rx_drops + i.tx_drops) > 0)
-      .map(i => ({ type: "interface_errors", severity: "warning" as const,
-        title: `${i.interface}: errors/drops detected`,
-        message: `RX errors=${i.rx_errors}, TX errors=${i.tx_errors}, RX drops=${i.rx_drops}, TX drops=${i.tx_drops}.`,
-        evidence: { interface: i.interface, rx_errors: i.rx_errors, tx_errors: i.tx_errors, rx_drops: i.rx_drops, tx_drops: i.tx_drops },
-        recommendation: "Check cables and SFP/transceiver." }));
-  }},
-  // 11. Link speed mismatch
-  { type: "link_speed_mismatch", evaluate(snap) {
-    if (!snap.network) return [];
-    return snap.network.filter(i => i.speed_mbps > 0 && i.speed_mbps < 1000)
-      .map(i => ({ type: "link_speed_mismatch", severity: "warning" as const,
-        title: `${i.interface} at ${i.speed_mbps} Mbps`,
-        message: `Interface negotiated below 1 Gbps.`,
-        evidence: { interface: i.interface, speed_mbps: i.speed_mbps },
-        recommendation: "Check cable, SFP, switch port config." }));
-  }},
-  // 12. Interface saturation
-  { type: "interface_saturation", evaluate(snap, t) {
-    if (!snap.network) return [];
-    const threshold = (t.interface_utilization_percent ?? 90) / 100;
-    return snap.network.filter(i => {
-      if (!i.speed_mbps) return false;
-      const maxBps = (i.speed_mbps * 1_000_000) / 8;
-      return Math.max(i.rx_bytes_sec, i.tx_bytes_sec) / maxBps >= threshold;
-    }).map(i => {
-      const maxBps = (i.speed_mbps * 1_000_000) / 8;
-      const util = Math.max(i.rx_bytes_sec, i.tx_bytes_sec) / maxBps * 100;
-      return { type: "interface_saturation", severity: "warning" as const,
-        title: `${i.interface} at ${util.toFixed(0)}% utilization`,
-        message: `Interface ${i.interface} (${i.speed_mbps} Mbps) near saturation.`,
-        evidence: { interface: i.interface, utilization_percent: Math.round(util * 10) / 10 },
-        recommendation: "Check: iftop or nload" };
-    });
-  }},
-  // 13. CPU temperature
-  { type: "cpu_temperature_high", evaluate(snap, t) {
-    if (!snap.ipmi?.available || !snap.ipmi.sensors) return [];
-    const warn = t.cpu_temp_warning_c ?? 80;
-    return snap.ipmi.sensors.filter(s => {
-      const n = s.name.toLowerCase();
-      if (!n.includes("cpu") && !n.includes("temp")) return false;
-      const v = typeof s.value === "number" ? s.value : parseFloat(String(s.value));
-      return !isNaN(v) && v >= warn;
-    }).map(s => {
-      const v = typeof s.value === "number" ? s.value : parseFloat(String(s.value));
-      const crit = s.upper_critical ?? (t.cpu_temp_critical_c ?? 90);
-      return { type: "cpu_temperature_high", severity: v >= crit ? "critical" as const : "warning" as const,
-        title: `${s.name}: ${v}${s.unit}`, message: `Temperature above warning threshold.`,
-        evidence: { sensor: s.name, value: v },
-        recommendation: "Check cooling, fans, airflow." };
-    });
-  }},
-  // 14. ECC errors
-  { type: "ecc_errors", evaluate(snap) {
-    if (!snap.ipmi?.ecc_errors) return [];
-    const { correctable, uncorrectable } = snap.ipmi.ecc_errors;
-    if (correctable <= 0 && uncorrectable <= 0) return [];
-    if (uncorrectable > 0) return [{ type: "ecc_errors", severity: "critical",
-      title: `${uncorrectable} uncorrectable ECC error(s)`, message: "Data corruption possible. DIMM failing.",
-      evidence: { correctable, uncorrectable },
-      recommendation: "Replace DIMM immediately. Run: ipmitool sdr type Memory" }];
-    return [{ type: "ecc_errors", severity: "warning",
-      title: `${correctable} correctable ECC error(s)`, message: "Early warning of DIMM failure.",
-      evidence: { correctable, uncorrectable },
-      recommendation: "Schedule DIMM replacement. Run: ipmitool sdr type Memory" }];
-  }},
-  // 15. PSU redundancy
-  { type: "psu_redundancy_loss", evaluate(snap) {
-    if (!snap.ipmi?.available || !snap.ipmi.sensors) return [];
-    const psus = snap.ipmi.sensors.filter(s => { const n = s.name.toLowerCase(); return n.includes("psu") || n.includes("power supply"); });
-    if (psus.length < 2) return [];
-    const failed = psus.filter(s => { const st = String(s.status).toLowerCase(); const v = String(s.value).toLowerCase();
-      return st.includes("fail") || st.includes("absent") || v.includes("fail") || v.includes("absent"); });
-    if (failed.length === 0) return [];
-    return [{ type: "psu_redundancy_loss", severity: "critical",
-      title: "PSU redundancy lost", message: `${failed.length} PSU(s) failed/absent: ${failed.map(p => p.name).join(", ")}.`,
-      evidence: { failed: failed.map(p => ({ name: p.name, status: p.status })) },
-      recommendation: "Replace failed PSU. Check power connections." }];
-  }},
-  // 19. IPMI SEL critical events
-  { type: "ipmi_sel_critical", evaluate(snap) {
-    if (!snap.ipmi?.available || !snap.ipmi.sel_events_recent?.length) return [];
-    const critical = snap.ipmi.sel_events_recent.filter(e => e.severity === "critical" && e.direction === "Asserted");
-    if (critical.length === 0) return [];
-    const byType: Record<string, typeof critical> = {};
-    for (const e of critical) { if (!byType[e.sensor_type]) byType[e.sensor_type] = []; byType[e.sensor_type].push(e); }
-    const details = Object.entries(byType).map(([t, evts]) => `${t}: ${evts.map(e => `${e.sensor}: ${e.event}`).join(", ")}`).join("; ");
-    const recs: string[] = [];
-    if (byType.memory) recs.push("Memory errors: identify slot with `ipmitool sel elist | grep -i memory`. Schedule DIMM replacement.");
-    if (byType.power) recs.push("PSU event: check physical PSU and connections. Verify redundancy: `ipmitool chassis status`.");
-    if (byType.watchdog) recs.push("Watchdog reset: OS or BMC became unresponsive. Check dmesg for root cause.");
-    if (byType.processor) recs.push("CPU event: check for thermal throttling or MCE. Run `dmesg | grep -i mce`.");
-    if (recs.length === 0) recs.push("Review full SEL: `ipmitool sel elist`.");
-    return [{ type: "ipmi_sel_critical", severity: "critical",
-      title: `IPMI: ${critical.length} critical hardware event(s)`,
-      message: `BMC System Event Log: ${critical.length} critical event(s). ${details}`,
-      evidence: { critical_events: critical, sensor_types: Object.keys(byType) },
-      recommendation: recs.join(" ") }];
-  }},
-  // 20. Fan failure
-  { type: "ipmi_fan_failure", evaluate(snap) {
-    if (!snap.ipmi?.available || !snap.ipmi.fans?.length) return [];
-    const failed = snap.ipmi.fans.filter(f => f.status === "critical" || (f.rpm === 0 && f.status !== "absent"));
-    if (failed.length === 0) return [];
-    const total = snap.ipmi.fans.filter(f => f.status !== "absent").length;
-    const names = failed.map(f => `${f.name} (${f.rpm} RPM)`).join(", ");
-    return [{ type: "ipmi_fan_failure", severity: "critical",
-      title: `Fan failure: ${failed.length} of ${total} fans`,
-      message: `${failed.length} fan(s) stopped or critically slow: ${names}. Reduced cooling capacity.`,
-      evidence: { failed_fans: failed, total_fans: total, all_fans: snap.ipmi.fans.filter(f => f.status !== "absent") },
-      recommendation: "Check physical fans. Monitor temps: `ipmitool sdr type Temperature`. Replace failed fan module." }];
-  }},
-  // === Security (6) ===
-  // 21. SSH root password login
-  { type: "ssh_root_password", evaluate(snap) {
-    if (!snap.security?.ssh?.rootPasswordExposed) return [];
-    return [{ type: "ssh_root_password", severity: "warning",
-      title: "SSH root login with password enabled",
-      message: `PermitRootLogin is "${snap.security.ssh.permitRootLogin}" and PasswordAuthentication is "${snap.security.ssh.passwordAuthentication}". Root can be brute-forced over SSH.`,
-      evidence: { permitRootLogin: snap.security.ssh.permitRootLogin, passwordAuthentication: snap.security.ssh.passwordAuthentication },
-      recommendation: 'Set "PermitRootLogin prohibit-password" in /etc/ssh/sshd_config and restart sshd. Key-based root login still works.' }];
-  }},
-  // 22. No firewall
-  { type: "no_firewall", evaluate(snap) {
-    if (!snap.security || snap.security.firewall.active) return [];
-    return [{ type: "no_firewall", severity: "warning" as const,
-      title: "No firewall active",
-      message: "No active firewall rules detected (checked UFW, firewalld, nftables, iptables). All ports are exposed unless protected by network-level ACLs.",
-      evidence: { source: snap.security.firewall.source },
-      recommendation: 'Enable a firewall: "sudo ufw enable" (Debian/Ubuntu) or "sudo systemctl start firewalld" (RHEL/Rocky).' }];
-  }},
-  // 23. Pending security updates
-  { type: "pending_security_updates", evaluate(snap, t) {
-    if (!snap.security?.pending_updates?.available) return [];
-    const maxPending = 10;
-    if (snap.security.pending_updates.pendingCount <= maxPending) return [];
-    const d = snap.security.pending_updates;
-    return [{ type: "pending_security_updates", severity: "warning",
-      title: `${d.pendingCount} security updates pending`,
-      message: `${d.pendingCount} security updates pending on this ${d.distro} server.`,
-      evidence: { pendingCount: d.pendingCount, distro: d.distro },
-      recommendation: d.distro === "ubuntu" || d.distro === "debian" ? 'Apply with: "sudo apt-get upgrade"' : 'Apply with: "sudo dnf update --security"' }];
-  }},
-  // 24. Kernel vulnerabilities
-  { type: "kernel_vulnerabilities", evaluate(snap) {
-    if (!snap.security?.kernel_vulns?.length) return [];
-    const unmitigated = snap.security.kernel_vulns.filter(v => !v.mitigated);
-    if (unmitigated.length === 0) return [];
-    const details = unmitigated.map(v => `${v.name}: ${v.status}`).join("; ");
-    return [{ type: "kernel_vulnerabilities", severity: "warning",
-      title: `${unmitigated.length} CPU vulnerability mitigations missing`,
-      message: `Unmitigated: ${details}. Update the kernel and CPU microcode to apply mitigations.`,
-      evidence: { unmitigated, total: snap.security.kernel_vulns.length },
-      recommendation: 'Check: "grep . /sys/devices/system/cpu/vulnerabilities/*". Update kernel and microcode packages.' }];
-  }},
-  // 25. Kernel needs reboot
-  { type: "kernel_needs_reboot", evaluate(snap) {
-    if (!snap.security?.kernel_reboot?.needsReboot) return [];
-    const k = snap.security.kernel_reboot;
-    return [{ type: "kernel_needs_reboot", severity: "warning" as const,
-      title: "Reboot required for kernel update",
-      message: `Running kernel: ${k.running}. Installed kernel: ${k.installed}. A reboot is needed to apply the newer kernel.`,
-      evidence: { running: k.running, installed: k.installed },
-      recommendation: "Schedule a reboot to apply the newer kernel. Security patches may not be active until then." }];
-  }},
-  // 26. Unattended upgrades disabled
-  { type: "unattended_upgrades_disabled", evaluate(snap) {
-    if (!snap.security || snap.security.auto_updates.configured) return [];
-    const a = snap.security.auto_updates;
-    const hint = a.mechanism === "unattended-upgrades" ? 'Enable: "sudo dpkg-reconfigure -plow unattended-upgrades"'
-      : a.mechanism === "dnf-automatic" ? 'Enable: "sudo systemctl enable --now dnf-automatic-install.timer"'
-      : 'Install: "sudo apt install unattended-upgrades" (Debian/Ubuntu) or "sudo dnf install dnf-automatic" (RHEL/Rocky)';
-    return [{ type: "unattended_upgrades_disabled", severity: "warning" as const,
-      title: "Automatic security updates not configured",
-      message: `${a.details}. Without automatic updates, security patches must be applied manually.`,
-      evidence: { mechanism: a.mechanism, details: a.details },
-      recommendation: hint }];
-  }},
-];

package/src/alerts/state.ts DELETED Viewed

@@ -1,92 +0,0 @@
-import { readFileSync, writeFileSync, mkdirSync } from "fs";
-import type { AlertResult } from "../lib/types.js";
-const STATE_FILE = "/var/lib/glassmkr/alert-state.json";
-interface AlertState {
-  type: string;
-  first_seen: string;
-  last_seen: string;
-  notified: boolean;
-}
-let state: Map<string, AlertState> = new Map();
-function load() {
-  try {
-    const raw = readFileSync(STATE_FILE, "utf-8");
-    const data: Record<string, AlertState> = JSON.parse(raw);
-    state = new Map(Object.entries(data));
-  } catch {
-    state = new Map();
-  }
-}
-function save() {
-  try {
-    mkdirSync("/var/lib/glassmkr", { recursive: true });
-    const obj: Record<string, AlertState> = {};
-    for (const [k, v] of state) obj[k] = v;
-    writeFileSync(STATE_FILE, JSON.stringify(obj, null, 2));
-  } catch (err) {
-    console.error("[state] Failed to save alert state:", err);
-  }
-}
-// Initialize on import
-load();
-export function updateAlertState(currentAlerts: AlertResult[]): {
-  newAlerts: AlertResult[];
-  resolvedAlerts: AlertResult[];
-} {
-  const now = new Date().toISOString();
-  const currentTypes = new Set(currentAlerts.map((a) => a.type));
-  const newAlerts: AlertResult[] = [];
-  const resolvedAlerts: AlertResult[] = [];
-  // Check for new alerts
-  for (const alert of currentAlerts) {
-    const existing = state.get(alert.type);
-    if (!existing) {
-      // New alert
-      state.set(alert.type, { type: alert.type, first_seen: now, last_seen: now, notified: false });
-      newAlerts.push(alert);
-    } else {
-      // Existing alert, update last_seen
-      existing.last_seen = now;
-    }
-  }
-  // Check for resolved alerts
-  for (const [type, alertState] of state) {
-    if (!currentTypes.has(type)) {
-      resolvedAlerts.push({
-        type,
-        severity: "warning",
-        title: `Resolved: ${type}`,
-        message: `Condition cleared. Active for ${timeSince(alertState.first_seen)}.`,
-        evidence: {},
-        recommendation: "",
-      });
-      state.delete(type);
-    }
-  }
-  save();
-  return { newAlerts, resolvedAlerts };
-}
-function timeSince(isoDate: string): string {
-  const ms = Date.now() - new Date(isoDate).getTime();
-  const minutes = Math.floor(ms / 60000);
-  if (minutes < 60) return `${minutes} minute(s)`;
-  const hours = Math.floor(minutes / 60);
-  if (hours < 24) return `${hours} hour(s) ${minutes % 60} minute(s)`;
-  const days = Math.floor(hours / 24);
-  return `${days} day(s)`;
-}
-export function getActiveAlerts(): string[] {
-  return Array.from(state.keys());
-}

package/src/cli.ts DELETED Viewed

@@ -1,112 +0,0 @@
-// CLI argument handling for the Crucible binary. Runs before any config load
-// or collector initialization so --version and --help exit cleanly even when
-// the config file is missing or the host lacks the tools the collectors need.
-export type CliMode = "version" | "help" | "run" | "mark-reboot" | "reboot";
-export interface CliArgs {
-  mode: CliMode;
-  configPath: string;
-  reason?: string;
-  ttl?: string; // raw duration string, parsed by caller
-}
-export const DEFAULT_CONFIG_PATH = "/etc/glassmkr/collector.yaml";
-export function parseCliArgs(argv: string[], version: string): { result: CliArgs; output: string | null } {
-  // argv is typically process.argv.slice(2)
-  let configPath = DEFAULT_CONFIG_PATH;
-  // Subcommand dispatch: `mark-reboot` and `reboot` take their own flags
-  // (--reason, --ttl) but re-use --help.
-  if (argv[0] === "mark-reboot" || argv[0] === "reboot") {
-    const mode: "mark-reboot" | "reboot" = argv[0];
-    let reason: string | undefined;
-    let ttl: string | undefined;
-    for (let i = 1; i < argv.length; i++) {
-      const a = argv[i];
-      if (a === "--help" || a === "-h") {
-        return { result: { mode: "help", configPath: "" }, output: subcommandHelp(mode, version) };
-      }
-      if (a === "--reason") { reason = argv[++i]; continue; }
-      if (a.startsWith("--reason=")) { reason = a.slice("--reason=".length); continue; }
-      if (a === "--ttl") { ttl = argv[++i]; continue; }
-      if (a.startsWith("--ttl=")) { ttl = a.slice("--ttl=".length); continue; }
-    }
-    return { result: { mode, configPath: "", reason, ttl }, output: null };
-  }
-  for (let i = 0; i < argv.length; i++) {
-    const arg = argv[i];
-    if (arg === "--version" || arg === "-v") {
-      return { result: { mode: "version", configPath: "" }, output: `glassmkr-crucible v${version}` };
-    }
-    if (arg === "--help" || arg === "-h") {
-      return { result: { mode: "help", configPath: "" }, output: helpText(version) };
-    }
-    // -c <path> or --config <path>
-    if (arg === "-c" || arg === "--config") {
-      const next = argv[i + 1];
-      if (next) {
-        configPath = next;
-        i++;
-      }
-      continue;
-    }
-    // --config=<path>
-    if (arg.startsWith("--config=")) {
-      configPath = arg.slice("--config=".length);
-      continue;
-    }
-    // Legacy positional argument: first non-flag token
-    if (!arg.startsWith("-")) {
-      configPath = arg;
-    }
-  }
-  return { result: { mode: "run", configPath }, output: null };
-}
-export function helpText(version: string): string {
-  return [
-    `glassmkr-crucible v${version} - Bare metal server monitoring agent`,
-    "",
-    "Usage:",
-    "  glassmkr-crucible [options]",
-    "  glassmkr-crucible mark-reboot [--reason TEXT] [--ttl DURATION]",
-    "  glassmkr-crucible reboot      [--reason TEXT] [--ttl DURATION]",
-    "",
-    "Options:",
-    "  -v, --version    Print version and exit",
-    "  -h, --help       Print this help and exit",
-    `  -c, --config     Path to config file (default: ${DEFAULT_CONFIG_PATH})`,
-    "",
-    "Subcommands:",
-    "  mark-reboot      Write a planned-reboot marker so the next boot",
-    "                   does not fire `server_rebooted_unexpectedly`.",
-    "                   You run the reboot yourself afterwards.",
-    "  reboot           Write the marker, then invoke `systemctl reboot`.",
-    "",
-    "Without options, starts the collector daemon using the config file.",
-    "Docs: https://github.com/glassmkr/crucible",
-  ].join("\n");
-}
-function subcommandHelp(mode: "mark-reboot" | "reboot", version: string): string {
-  const action = mode === "reboot"
-    ? "Write a planned-reboot marker and invoke `systemctl reboot`."
-    : "Write a planned-reboot marker; operator triggers the reboot.";
-  return [
-    `glassmkr-crucible ${mode} - ${action}`,
-    "",
-    "Usage:",
-    `  glassmkr-crucible ${mode} [--reason TEXT] [--ttl DURATION]`,
-    "",
-    "Options:",
-    '  --reason TEXT    Free-text reason (e.g. "kernel update")',
-    "  --ttl DURATION   Expiry window; e.g. 5m, 10m, 1h (default 10m)",
-    "",
-    `Marker path: /var/lib/crucible/reboot-expected (requires root).`,
-    `v${version}`,
-  ].join("\n");
-}

package/src/collect/__tests__/ipmi.test.ts DELETED Viewed

@@ -1,96 +0,0 @@
-import { describe, it, expect } from "vitest";
-import { classifySensor, deriveSelSeverity, parseSelTimestamp, parseFanStatus } from "../ipmi.js";
-describe("classifySensor", () => {
-  it("recognizes memory sensors", () => {
-    expect(classifySensor("DIMM_A1")).toBe("memory");
-    expect(classifySensor("Memory ECC")).toBe("memory");
-  });
-  it("recognizes power supplies", () => {
-    expect(classifySensor("PSU1 Status")).toBe("power");
-    expect(classifySensor("Power Supply 1")).toBe("power");
-  });
-  it("recognizes fans, watchdog, processors, temps, voltage, storage, chassis", () => {
-    expect(classifySensor("Fan1")).toBe("fan");
-    expect(classifySensor("Watchdog")).toBe("watchdog");
-    expect(classifySensor("Processor 0")).toBe("processor");
-    // CPU-named temperature sensors classify as processor (cpu check wins over temp).
-    expect(classifySensor("CPU1 Temp")).toBe("processor");
-    expect(classifySensor("Inlet Temp")).toBe("temperature");
-    expect(classifySensor("VCore Voltage")).toBe("voltage");
-    expect(classifySensor("Drive Slot 1")).toBe("storage");
-    expect(classifySensor("Chassis Intrusion")).toBe("chassis");
-  });
-  it("falls back to 'other'", () => {
-    expect(classifySensor("Weird Sensor")).toBe("other");
-  });
-});
-describe("deriveSelSeverity", () => {
-  it("treats uncorrectable, thermal trip, AC lost as critical", () => {
-    expect(deriveSelSeverity("Uncorrectable ECC", "memory")).toBe("critical");
-    expect(deriveSelSeverity("Thermal trip", "processor")).toBe("critical");
-    expect(deriveSelSeverity("AC lost", "power")).toBe("critical");
-    expect(deriveSelSeverity("Machine check", "processor")).toBe("critical");
-  });
-  it("treats correctable ECC and redundancy lost as warning", () => {
-    expect(deriveSelSeverity("Correctable ECC", "memory")).toBe("warning");
-    expect(deriveSelSeverity("Redundancy lost", "power")).toBe("warning");
-  });
-  it("treats presence detected as info", () => {
-    expect(deriveSelSeverity("Presence detected", "memory")).toBe("info");
-  });
-  it("defaults to warning for memory/power/fan/processor sensor types", () => {
-    expect(deriveSelSeverity("Some odd event", "memory")).toBe("warning");
-    expect(deriveSelSeverity("Some odd event", "fan")).toBe("warning");
-  });
-  it("defaults to info for other sensor types", () => {
-    expect(deriveSelSeverity("Some odd event", "other")).toBe("info");
-  });
-});
-describe("parseSelTimestamp", () => {
-  it("formats a known date/time", () => {
-    expect(parseSelTimestamp("04/05/2026", "14:23:05")).toBe("2026-04-05T14:23:05Z");
-  });
-  it("pads single digit month/day", () => {
-    expect(parseSelTimestamp("4/5/2026", "09:00:00")).toBe("2026-04-05T09:00:00Z");
-  });
-  it("returns an ISO string for bad input (does not crash)", () => {
-    const out = parseSelTimestamp("", "");
-    expect(typeof out).toBe("string");
-    expect(out.length).toBeGreaterThan(10);
-  });
-});
-describe("parseFanStatus", () => {
-  it("parses healthy fan output", () => {
-    const raw = [
-      "FAN1       | 30h | ok  |  7.1 | 5000 RPM",
-      "FAN2       | 31h | ok  |  7.2 | 5100 RPM",
-    ].join("\n");
-    const fans = parseFanStatus(raw);
-    expect(fans).toHaveLength(2);
-    expect(fans[0]).toMatchObject({ name: "FAN1", rpm: 5000, status: "ok" });
-    expect(fans[1].rpm).toBe(5100);
-  });
-  it("marks critical fans (cr/nr) as critical", () => {
-    const raw = "FAN1 | 30h | cr  | 7.1 | 0 RPM";
-    const fans = parseFanStatus(raw);
-    expect(fans[0].status).toBe("critical");
-  });
-  it("marks absent/no-reading fans as absent", () => {
-    const raw = "FAN3 | 30h | ns  | 7.1 | no reading";
-    const fans = parseFanStatus(raw);
-    expect(fans[0].status).toBe("absent");
-    expect(fans[0].rpm).toBe(0);
-  });
-  it("treats 0 RPM with no explicit status as critical", () => {
-    const raw = "FAN1 | 30h | 7.1 | 0 RPM";
-    const fans = parseFanStatus(raw);
-    expect(fans[0].status).toBe("critical");
-  });
-});

package/src/collect/__tests__/smart.test.ts DELETED Viewed

@@ -1,68 +0,0 @@
-import { describe, it, expect } from "vitest";
-import { parseSmartctlJson } from "../smart.js";
-describe("parseSmartctlJson", () => {
-  it("parses a healthy SATA SSD", () => {
-    const data = {
-      model_name: "Samsung SSD 970 EVO 1TB",
-      smart_status: { passed: true },
-      temperature: { current: 38 },
-      power_on_time: { hours: 9000 },
-      ata_smart_attributes: {
-        table: [
-          { id: 5, name: "Reallocated_Sector_Ct", raw: { value: 0 } },
-          { id: 197, name: "Current_Pending_Sector", raw: { value: 0 } },
-        ],
-      },
-    };
-    const info = parseSmartctlJson(data, "/dev/sda");
-    expect(info).toMatchObject({
-      device: "/dev/sda",
-      model: "Samsung SSD 970 EVO 1TB",
-      health: "PASSED",
-      temperature_c: 38,
-      power_on_hours: 9000,
-      reallocated_sectors: 0,
-      pending_sectors: 0,
-    });
-  });
-  it("parses a failing SATA drive with reallocated sectors", () => {
-    const data = {
-      model_name: "WD Red 4TB",
-      smart_status: { passed: false },
-      ata_smart_attributes: {
-        table: [
-          { id: 5, raw: { value: 12 } },
-          { id: 197, raw: { value: 3 } },
-        ],
-      },
-    };
-    const info = parseSmartctlJson(data, "/dev/sdb");
-    expect(info.health).toBe("FAILED");
-    expect(info.reallocated_sectors).toBe(12);
-    expect(info.pending_sectors).toBe(3);
-  });
-  it("parses an NVMe drive with percentage_used", () => {
-    const data = {
-      model_name: "Samsung 980 PRO",
-      smart_status: { passed: true },
-      nvme_smart_health_information_log: { percentage_used: 22, temperature: 41 },
-    };
-    const info = parseSmartctlJson(data, "/dev/nvme0n1");
-    expect(info.percentage_used).toBe(22);
-    expect(info.temperature_c).toBe(41);
-    expect(info.health).toBe("PASSED");
-  });
-  it("falls back to 'unknown' model when absent", () => {
-    const info = parseSmartctlJson({ smart_status: { passed: true } }, "/dev/sdc");
-    expect(info.model).toBe("unknown");
-  });
-  it("treats missing smart_status as FAILED (safer default)", () => {
-    const info = parseSmartctlJson({}, "/dev/sdd");
-    expect(info.health).toBe("FAILED");
-  });
-});