openclaw-aegis 1.5.0 → 1.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -23,7 +23,7 @@ It stands between your gateway and disaster — a tireless sentinel that detects
23
23
 
24
24
  | | |
25
25
  |---|---|
26
- | **Detects** | 10 health probes scan process, port, HTTP, config, WebSocket, TUN, memory, CPU, disk, and logs every 10 seconds |
26
+ | **Detects** | 11 health probes scan process, port, HTTP, config, WebSocket, TUN, memory, CPU, disk, logs, and channel readiness every 10 seconds |
27
27
  | **Diagnoses** | 6 failure pattern matchers identify poison configs, stale PIDs, port conflicts, permission errors, corruption, and OOM kills |
28
28
  | **Heals** | L1 restart, L2 targeted repair, L3 deep repair (network, dependencies, safe mode, disk), config rollback — all automatic |
29
29
  | **Alerts** | 8 out-of-band providers (ntfy, Telegram, WhatsApp, Slack, Discord, Email, Pushover, webhook) that work even when the gateway is dead |
@@ -65,7 +65,7 @@ Your gateway is now protected.
65
65
  |---------|-------------|
66
66
  | `aegis init` | Interactive setup — walks you through everything |
67
67
  | `aegis init --auto` | Zero-config setup — detects gateway, sets defaults |
68
- | `aegis check` | Run all 10 probes, get a health verdict |
68
+ | `aegis check` | Run all 11 probes, get a health verdict |
69
69
  | `aegis check --json` | Machine-readable output for scripts and monitoring |
70
70
  | `aegis status` | Live dashboard — every probe, color-coded |
71
71
  | `aegis test-alert` | Fire a test alert to all configured channels |
@@ -80,7 +80,7 @@ Your gateway is now protected.
80
80
  ```
81
81
  OpenClaw Gateway Aegis Sidecar
82
82
  ┌─────────────────────┐ ┌──────────────────────────────┐
83
- │ │ │ Health Monitor (10 probes) │
83
+ │ │ │ Health Monitor (11 probes) │
84
84
  │ ~/.openclaw/ │◄────────►│ Config Guardian │
85
85
  │ openclaw.json │ │ Dead Man's Switch │
86
86
  │ logs/ │ │ Recovery Orchestrator │
package/dist/cli/index.js CHANGED
@@ -263,7 +263,8 @@ var PROBE_WEIGHTS = {
263
263
  memory: 1,
264
264
  cpu: 1,
265
265
  disk: 1,
266
- logTail: 1
266
+ logTail: 1,
267
+ channels: 1
267
268
  };
268
269
  var MAX_HEALTH_SCORE = Object.values(PROBE_WEIGHTS).reduce((a, b) => a + b, 0) * 2;
269
270
  var CRITICAL_CONFIG_KEYS = [
@@ -961,6 +962,110 @@ async function websocketProbe(target, port, timeoutMs = 5e3) {
961
962
  });
962
963
  }
963
964
 
965
+ // src/health/probes/channels.ts
966
+ var import_node_child_process6 = require("child_process");
967
+ var import_node_util3 = require("util");
968
+ var execFileAsync3 = (0, import_node_util3.promisify)(import_node_child_process6.execFile);
969
+ async function channelsProbe(_target, timeoutMs = 1e4) {
970
+ const start = Date.now();
971
+ try {
972
+ const { stdout } = await execFileAsync3("openclaw", ["channels", "status", "--json"], {
973
+ timeout: timeoutMs
974
+ });
975
+ const jsonStart = stdout.indexOf("{");
976
+ if (jsonStart === -1) {
977
+ return {
978
+ name: "channels",
979
+ healthy: false,
980
+ score: 0,
981
+ message: "No JSON output from openclaw channels status",
982
+ latencyMs: Date.now() - start
983
+ };
984
+ }
985
+ const json = stdout.slice(jsonStart);
986
+ const data = JSON.parse(json);
987
+ if (!data.channels) {
988
+ return {
989
+ name: "channels",
990
+ healthy: false,
991
+ score: 0,
992
+ message: "No channels configured",
993
+ latencyMs: Date.now() - start
994
+ };
995
+ }
996
+ const issues = [];
997
+ let totalEnabled = 0;
998
+ let totalReady = 0;
999
+ for (const [channelName, accountData] of Object.entries(data.channels)) {
1000
+ const accounts = Array.isArray(accountData) ? accountData : [accountData];
1001
+ for (const account of accounts) {
1002
+ if (!account.enabled && !account.configured) continue;
1003
+ totalEnabled++;
1004
+ const label = account.accountId ? `${channelName}/${account.accountId}` : channelName;
1005
+ if (!account.running) {
1006
+ issues.push(`${label}: not running`);
1007
+ continue;
1008
+ }
1009
+ if ("connected" in account && !account.connected) {
1010
+ issues.push(`${label}: running but not connected`);
1011
+ continue;
1012
+ }
1013
+ if ("linked" in account && !account.linked) {
1014
+ issues.push(`${label}: not linked`);
1015
+ continue;
1016
+ }
1017
+ if (account.lastError) {
1018
+ issues.push(`${label}: ${account.lastError}`);
1019
+ continue;
1020
+ }
1021
+ totalReady++;
1022
+ }
1023
+ }
1024
+ if (totalEnabled === 0) {
1025
+ return {
1026
+ name: "channels",
1027
+ healthy: true,
1028
+ score: 2,
1029
+ message: "No channels enabled",
1030
+ latencyMs: Date.now() - start
1031
+ };
1032
+ }
1033
+ if (totalReady === totalEnabled) {
1034
+ return {
1035
+ name: "channels",
1036
+ healthy: true,
1037
+ score: 2,
1038
+ message: `${totalReady}/${totalEnabled} channels ready`,
1039
+ latencyMs: Date.now() - start
1040
+ };
1041
+ }
1042
+ if (totalReady > 0) {
1043
+ return {
1044
+ name: "channels",
1045
+ healthy: false,
1046
+ score: 1,
1047
+ message: `${totalReady}/${totalEnabled} channels ready. Issues: ${issues.join("; ")}`,
1048
+ latencyMs: Date.now() - start
1049
+ };
1050
+ }
1051
+ return {
1052
+ name: "channels",
1053
+ healthy: false,
1054
+ score: 0,
1055
+ message: `0/${totalEnabled} channels ready. Issues: ${issues.join("; ")}`,
1056
+ latencyMs: Date.now() - start
1057
+ };
1058
+ } catch (err) {
1059
+ return {
1060
+ name: "channels",
1061
+ healthy: false,
1062
+ score: 0,
1063
+ message: `Channel probe failed: ${err instanceof Error ? err.message : String(err)}`,
1064
+ latencyMs: Date.now() - start
1065
+ };
1066
+ }
1067
+ }
1068
+
964
1069
  // src/health/monitor.ts
965
1070
  var HealthMonitor = class extends import_node_events.EventEmitter {
966
1071
  constructor(config) {
@@ -1030,7 +1135,8 @@ var HealthMonitor = class extends import_node_events.EventEmitter {
1030
1135
  "disk"
1031
1136
  ),
1032
1137
  withTimeout(() => logTailProbe(target, this.config.gateway.logPath), "logTail"),
1033
- withTimeout(() => websocketProbe(target, this.config.gateway.port, timeout), "websocket")
1138
+ withTimeout(() => websocketProbe(target, this.config.gateway.port, timeout), "websocket"),
1139
+ withTimeout(() => channelsProbe(target, timeout), "channels")
1034
1140
  ]);
1035
1141
  const probeResults = results.map((r, i) => {
1036
1142
  if (r.status === "fulfilled") return r.value;
@@ -1044,7 +1150,8 @@ var HealthMonitor = class extends import_node_events.EventEmitter {
1044
1150
  "cpu",
1045
1151
  "disk",
1046
1152
  "logTail",
1047
- "websocket"
1153
+ "websocket",
1154
+ "channels"
1048
1155
  ];
1049
1156
  return {
1050
1157
  name: names[i] ?? "unknown",
@@ -3379,8 +3486,8 @@ var DeadManSwitch = class extends import_node_events2.EventEmitter {
3379
3486
 
3380
3487
  // src/recovery/orchestrator.ts
3381
3488
  var import_node_events3 = require("events");
3382
- var import_node_child_process7 = require("child_process");
3383
- var import_node_util4 = require("util");
3489
+ var import_node_child_process8 = require("child_process");
3490
+ var import_node_util5 = require("util");
3384
3491
  var fs16 = __toESM(require("fs"));
3385
3492
 
3386
3493
  // src/recovery/circuit-breaker.ts
@@ -3427,9 +3534,9 @@ var CircuitBreaker = class {
3427
3534
  var fs15 = __toESM(require("fs"));
3428
3535
  var os7 = __toESM(require("os"));
3429
3536
  var path6 = __toESM(require("path"));
3430
- var import_node_child_process6 = require("child_process");
3431
- var import_node_util3 = require("util");
3432
- var execFileAsync3 = (0, import_node_util3.promisify)(import_node_child_process6.execFile);
3537
+ var import_node_child_process7 = require("child_process");
3538
+ var import_node_util4 = require("util");
3539
+ var execFileAsync4 = (0, import_node_util4.promisify)(import_node_child_process7.execFile);
3433
3540
  function createL3Patterns() {
3434
3541
  return [
3435
3542
  // L3-1: Network Repair — DNS resolution, stale routes, TUN interface reset
@@ -3438,7 +3545,7 @@ function createL3Patterns() {
3438
3545
  name: "network-repair",
3439
3546
  async detect(_ctx) {
3440
3547
  try {
3441
- await execFileAsync3("getent", ["hosts", "localhost"], { timeout: 5e3 });
3548
+ await execFileAsync4("getent", ["hosts", "localhost"], { timeout: 5e3 });
3442
3549
  } catch {
3443
3550
  return true;
3444
3551
  }
@@ -3466,7 +3573,7 @@ function createL3Patterns() {
3466
3573
  try {
3467
3574
  const cmd = os7.platform() === "darwin" ? "netstat" : "ip";
3468
3575
  const args = os7.platform() === "darwin" ? ["-rn"] : ["route", "show", "default"];
3469
- const { stdout } = await execFileAsync3(cmd, args, { timeout: 5e3 });
3576
+ const { stdout } = await execFileAsync4(cmd, args, { timeout: 5e3 });
3470
3577
  if (!stdout.includes("default") && !stdout.includes("0.0.0.0")) return true;
3471
3578
  } catch {
3472
3579
  return true;
@@ -3478,9 +3585,9 @@ function createL3Patterns() {
3478
3585
  let fixed = false;
3479
3586
  try {
3480
3587
  if (os7.platform() === "darwin") {
3481
- await execFileAsync3("dscacheutil", ["-flushcache"], { timeout: 5e3 });
3588
+ await execFileAsync4("dscacheutil", ["-flushcache"], { timeout: 5e3 });
3482
3589
  } else {
3483
- await execFileAsync3("systemd-resolve", ["--flush-caches"], { timeout: 5e3 });
3590
+ await execFileAsync4("systemd-resolve", ["--flush-caches"], { timeout: 5e3 });
3484
3591
  }
3485
3592
  fixed = true;
3486
3593
  } catch {
@@ -3499,7 +3606,7 @@ function createL3Patterns() {
3499
3606
  if (fs15.existsSync(operstatePath)) {
3500
3607
  const state = fs15.readFileSync(operstatePath, "utf-8").trim();
3501
3608
  if (state === "down") {
3502
- await execFileAsync3("ip", ["link", "set", iface, "up"], { timeout: 5e3 });
3609
+ await execFileAsync4("ip", ["link", "set", iface, "up"], { timeout: 5e3 });
3503
3610
  fixed = true;
3504
3611
  }
3505
3612
  }
@@ -3523,7 +3630,7 @@ function createL3Patterns() {
3523
3630
  name: "process-resurrection",
3524
3631
  async detect(_ctx) {
3525
3632
  try {
3526
- await execFileAsync3("which", ["openclaw"], { timeout: 5e3 });
3633
+ await execFileAsync4("which", ["openclaw"], { timeout: 5e3 });
3527
3634
  return false;
3528
3635
  } catch {
3529
3636
  return true;
@@ -3532,11 +3639,11 @@ function createL3Patterns() {
3532
3639
  async fix(_ctx) {
3533
3640
  const start = Date.now();
3534
3641
  try {
3535
- await execFileAsync3("npm", ["install", "-g", "openclaw"], {
3642
+ await execFileAsync4("npm", ["install", "-g", "openclaw"], {
3536
3643
  timeout: 12e4,
3537
3644
  env: { ...process.env, NODE_ENV: "production" }
3538
3645
  });
3539
- await execFileAsync3("which", ["openclaw"], { timeout: 5e3 });
3646
+ await execFileAsync4("which", ["openclaw"], { timeout: 5e3 });
3540
3647
  return {
3541
3648
  level: "L3",
3542
3649
  action: "process-resurrection-reinstall",
@@ -3572,7 +3679,7 @@ function createL3Patterns() {
3572
3679
  }
3573
3680
  }
3574
3681
  try {
3575
- await execFileAsync3(
3682
+ await execFileAsync4(
3576
3683
  "node",
3577
3684
  ["-e", "try { require('openclaw') } catch(e) { if (e.code === 'MODULE_NOT_FOUND') process.exit(1) }"],
3578
3685
  { timeout: 1e4, cwd: installDir }
@@ -3598,7 +3705,7 @@ function createL3Patterns() {
3598
3705
  if (fs15.existsSync(nodeModules)) {
3599
3706
  fs15.rmSync(nodeModules, { recursive: true, force: true });
3600
3707
  }
3601
- await execFileAsync3("npm", ["install", "--production"], {
3708
+ await execFileAsync4("npm", ["install", "--production"], {
3602
3709
  timeout: 12e4,
3603
3710
  cwd: installDir
3604
3711
  });
@@ -3624,7 +3731,7 @@ function createL3Patterns() {
3624
3731
  name: "safe-mode-boot",
3625
3732
  async detect(ctx) {
3626
3733
  try {
3627
- const { stdout } = await execFileAsync3("pgrep", ["-f", "openclaw"], { timeout: 5e3 });
3734
+ const { stdout } = await execFileAsync4("pgrep", ["-f", "openclaw"], { timeout: 5e3 });
3628
3735
  if (stdout.trim().length > 0) return false;
3629
3736
  } catch {
3630
3737
  }
@@ -3643,14 +3750,14 @@ function createL3Patterns() {
3643
3750
  const safeModeConfigPath = ctx.configPath + ".safemode";
3644
3751
  try {
3645
3752
  fs15.writeFileSync(safeModeConfigPath, JSON.stringify(safeModeConfig, null, 2));
3646
- await execFileAsync3(
3753
+ await execFileAsync4(
3647
3754
  "openclaw",
3648
3755
  ["gateway", "start", "--config", safeModeConfigPath],
3649
3756
  { timeout: 3e4 }
3650
3757
  );
3651
3758
  await sleep(3e3);
3652
3759
  try {
3653
- await execFileAsync3("pgrep", ["-f", "openclaw"], { timeout: 5e3 });
3760
+ await execFileAsync4("pgrep", ["-f", "openclaw"], { timeout: 5e3 });
3654
3761
  } catch {
3655
3762
  return {
3656
3763
  level: "L3",
@@ -3687,7 +3794,7 @@ function createL3Patterns() {
3687
3794
  async detect(ctx) {
3688
3795
  try {
3689
3796
  const configDir = path6.dirname(ctx.configPath);
3690
- const { stdout } = await execFileAsync3("df", ["-BM", configDir], { timeout: 5e3 });
3797
+ const { stdout } = await execFileAsync4("df", ["-BM", configDir], { timeout: 5e3 });
3691
3798
  const lines = stdout.trim().split("\n");
3692
3799
  if (lines.length < 2) return false;
3693
3800
  const parts = lines[1].split(/\s+/);
@@ -3751,7 +3858,7 @@ function createL3Patterns() {
3751
3858
  }
3752
3859
  function resolveGatewayInstallDir() {
3753
3860
  try {
3754
- const output = (0, import_node_child_process6.execSync)("which openclaw", { encoding: "utf-8", timeout: 5e3 }).trim();
3861
+ const output = (0, import_node_child_process7.execSync)("which openclaw", { encoding: "utf-8", timeout: 5e3 }).trim();
3755
3862
  const realPath = fs15.realpathSync(output);
3756
3863
  let dir = path6.dirname(realPath);
3757
3864
  for (let i = 0; i < 5; i++) {
@@ -3782,7 +3889,7 @@ function sleep(ms) {
3782
3889
  }
3783
3890
 
3784
3891
  // src/recovery/orchestrator.ts
3785
- var execFileAsync4 = (0, import_node_util4.promisify)(import_node_child_process7.execFile);
3892
+ var execFileAsync5 = (0, import_node_util5.promisify)(import_node_child_process8.execFile);
3786
3893
  var RecoveryOrchestrator = class extends import_node_events3.EventEmitter {
3787
3894
  config;
3788
3895
  diagnosisEngine;
@@ -3868,7 +3975,7 @@ var RecoveryOrchestrator = class extends import_node_events3.EventEmitter {
3868
3975
  }
3869
3976
  const start = Date.now();
3870
3977
  try {
3871
- await execFileAsync4("openclaw", ["gateway", "restart"], { timeout: 3e4 });
3978
+ await execFileAsync5("openclaw", ["gateway", "restart"], { timeout: 3e4 });
3872
3979
  actions.push({
3873
3980
  level: "L1",
3874
3981
  action: "restart",
@@ -3978,9 +4085,9 @@ function sleep2(ms) {
3978
4085
 
3979
4086
  // src/diagnosis/engine.ts
3980
4087
  var fs17 = __toESM(require("fs"));
3981
- var import_node_child_process8 = require("child_process");
3982
- var import_node_util5 = require("util");
3983
- var execFileAsync5 = (0, import_node_util5.promisify)(import_node_child_process8.execFile);
4088
+ var import_node_child_process9 = require("child_process");
4089
+ var import_node_util6 = require("util");
4090
+ var execFileAsync6 = (0, import_node_util6.promisify)(import_node_child_process9.execFile);
3984
4091
  var DiagnosisEngine = class {
3985
4092
  patterns;
3986
4093
  constructor(backupManager) {
@@ -4064,7 +4171,7 @@ function createPatterns(backupManager) {
4064
4171
  name: "port-conflict",
4065
4172
  async detect(ctx) {
4066
4173
  try {
4067
- const { stdout } = await execFileAsync5("lsof", ["-i", `:${ctx.gatewayPort}`, "-t"]);
4174
+ const { stdout } = await execFileAsync6("lsof", ["-i", `:${ctx.gatewayPort}`, "-t"]);
4068
4175
  const pids = stdout.trim().split("\n").filter(Boolean);
4069
4176
  if (pids.length === 0) return false;
4070
4177
  if (fs17.existsSync(ctx.pidFile)) {
@@ -4079,7 +4186,7 @@ function createPatterns(backupManager) {
4079
4186
  async fix(ctx) {
4080
4187
  const start = Date.now();
4081
4188
  try {
4082
- const { stdout } = await execFileAsync5("lsof", ["-i", `:${ctx.gatewayPort}`, "-t"]);
4189
+ const { stdout } = await execFileAsync6("lsof", ["-i", `:${ctx.gatewayPort}`, "-t"]);
4083
4190
  const pids = stdout.trim().split("\n").filter(Boolean);
4084
4191
  return {
4085
4192
  level: "L2",
@@ -4158,7 +4265,7 @@ function createPatterns(backupManager) {
4158
4265
  name: "oom-kill",
4159
4266
  async detect(_ctx) {
4160
4267
  try {
4161
- const { stdout } = await execFileAsync5("dmesg", ["--time-format", "reltime"], {
4268
+ const { stdout } = await execFileAsync6("dmesg", ["--time-format", "reltime"], {
4162
4269
  timeout: 5e3
4163
4270
  });
4164
4271
  return /oom_kill_process|Out of memory/.test(stdout);