openclaw-aegis 1.5.0 → 1.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +3 -3
- package/dist/cli/index.js +138 -31
- package/dist/cli/index.js.map +1 -1
- package/dist/index.js +144 -35
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -23,7 +23,7 @@ It stands between your gateway and disaster — a tireless sentinel that detects
|
|
|
23
23
|
|
|
24
24
|
| | |
|
|
25
25
|
|---|---|
|
|
26
|
-
| **Detects** |
|
|
26
|
+
| **Detects** | 11 health probes scan process, port, HTTP, config, WebSocket, TUN, memory, CPU, disk, logs, and channel readiness every 10 seconds |
|
|
27
27
|
| **Diagnoses** | 6 failure pattern matchers identify poison configs, stale PIDs, port conflicts, permission errors, corruption, and OOM kills |
|
|
28
28
|
| **Heals** | L1 restart, L2 targeted repair, L3 deep repair (network, dependencies, safe mode, disk), config rollback — all automatic |
|
|
29
29
|
| **Alerts** | 8 out-of-band providers (ntfy, Telegram, WhatsApp, Slack, Discord, Email, Pushover, webhook) that work even when the gateway is dead |
|
|
@@ -65,7 +65,7 @@ Your gateway is now protected.
|
|
|
65
65
|
|---------|-------------|
|
|
66
66
|
| `aegis init` | Interactive setup — walks you through everything |
|
|
67
67
|
| `aegis init --auto` | Zero-config setup — detects gateway, sets defaults |
|
|
68
|
-
| `aegis check` | Run all
|
|
68
|
+
| `aegis check` | Run all 11 probes, get a health verdict |
|
|
69
69
|
| `aegis check --json` | Machine-readable output for scripts and monitoring |
|
|
70
70
|
| `aegis status` | Live dashboard — every probe, color-coded |
|
|
71
71
|
| `aegis test-alert` | Fire a test alert to all configured channels |
|
|
@@ -80,7 +80,7 @@ Your gateway is now protected.
|
|
|
80
80
|
```
|
|
81
81
|
OpenClaw Gateway Aegis Sidecar
|
|
82
82
|
┌─────────────────────┐ ┌──────────────────────────────┐
|
|
83
|
-
│ │ │ Health Monitor (
|
|
83
|
+
│ │ │ Health Monitor (11 probes) │
|
|
84
84
|
│ ~/.openclaw/ │◄────────►│ Config Guardian │
|
|
85
85
|
│ openclaw.json │ │ Dead Man's Switch │
|
|
86
86
|
│ logs/ │ │ Recovery Orchestrator │
|
package/dist/cli/index.js
CHANGED
|
@@ -263,7 +263,8 @@ var PROBE_WEIGHTS = {
|
|
|
263
263
|
memory: 1,
|
|
264
264
|
cpu: 1,
|
|
265
265
|
disk: 1,
|
|
266
|
-
logTail: 1
|
|
266
|
+
logTail: 1,
|
|
267
|
+
channels: 1
|
|
267
268
|
};
|
|
268
269
|
var MAX_HEALTH_SCORE = Object.values(PROBE_WEIGHTS).reduce((a, b) => a + b, 0) * 2;
|
|
269
270
|
var CRITICAL_CONFIG_KEYS = [
|
|
@@ -961,6 +962,110 @@ async function websocketProbe(target, port, timeoutMs = 5e3) {
|
|
|
961
962
|
});
|
|
962
963
|
}
|
|
963
964
|
|
|
965
|
+
// src/health/probes/channels.ts
|
|
966
|
+
var import_node_child_process6 = require("child_process");
|
|
967
|
+
var import_node_util3 = require("util");
|
|
968
|
+
var execFileAsync3 = (0, import_node_util3.promisify)(import_node_child_process6.execFile);
|
|
969
|
+
async function channelsProbe(_target, timeoutMs = 1e4) {
|
|
970
|
+
const start = Date.now();
|
|
971
|
+
try {
|
|
972
|
+
const { stdout } = await execFileAsync3("openclaw", ["channels", "status", "--json"], {
|
|
973
|
+
timeout: timeoutMs
|
|
974
|
+
});
|
|
975
|
+
const jsonStart = stdout.indexOf("{");
|
|
976
|
+
if (jsonStart === -1) {
|
|
977
|
+
return {
|
|
978
|
+
name: "channels",
|
|
979
|
+
healthy: false,
|
|
980
|
+
score: 0,
|
|
981
|
+
message: "No JSON output from openclaw channels status",
|
|
982
|
+
latencyMs: Date.now() - start
|
|
983
|
+
};
|
|
984
|
+
}
|
|
985
|
+
const json = stdout.slice(jsonStart);
|
|
986
|
+
const data = JSON.parse(json);
|
|
987
|
+
if (!data.channels) {
|
|
988
|
+
return {
|
|
989
|
+
name: "channels",
|
|
990
|
+
healthy: false,
|
|
991
|
+
score: 0,
|
|
992
|
+
message: "No channels configured",
|
|
993
|
+
latencyMs: Date.now() - start
|
|
994
|
+
};
|
|
995
|
+
}
|
|
996
|
+
const issues = [];
|
|
997
|
+
let totalEnabled = 0;
|
|
998
|
+
let totalReady = 0;
|
|
999
|
+
for (const [channelName, accountData] of Object.entries(data.channels)) {
|
|
1000
|
+
const accounts = Array.isArray(accountData) ? accountData : [accountData];
|
|
1001
|
+
for (const account of accounts) {
|
|
1002
|
+
if (!account.enabled && !account.configured) continue;
|
|
1003
|
+
totalEnabled++;
|
|
1004
|
+
const label = account.accountId ? `${channelName}/${account.accountId}` : channelName;
|
|
1005
|
+
if (!account.running) {
|
|
1006
|
+
issues.push(`${label}: not running`);
|
|
1007
|
+
continue;
|
|
1008
|
+
}
|
|
1009
|
+
if ("connected" in account && !account.connected) {
|
|
1010
|
+
issues.push(`${label}: running but not connected`);
|
|
1011
|
+
continue;
|
|
1012
|
+
}
|
|
1013
|
+
if ("linked" in account && !account.linked) {
|
|
1014
|
+
issues.push(`${label}: not linked`);
|
|
1015
|
+
continue;
|
|
1016
|
+
}
|
|
1017
|
+
if (account.lastError) {
|
|
1018
|
+
issues.push(`${label}: ${account.lastError}`);
|
|
1019
|
+
continue;
|
|
1020
|
+
}
|
|
1021
|
+
totalReady++;
|
|
1022
|
+
}
|
|
1023
|
+
}
|
|
1024
|
+
if (totalEnabled === 0) {
|
|
1025
|
+
return {
|
|
1026
|
+
name: "channels",
|
|
1027
|
+
healthy: true,
|
|
1028
|
+
score: 2,
|
|
1029
|
+
message: "No channels enabled",
|
|
1030
|
+
latencyMs: Date.now() - start
|
|
1031
|
+
};
|
|
1032
|
+
}
|
|
1033
|
+
if (totalReady === totalEnabled) {
|
|
1034
|
+
return {
|
|
1035
|
+
name: "channels",
|
|
1036
|
+
healthy: true,
|
|
1037
|
+
score: 2,
|
|
1038
|
+
message: `${totalReady}/${totalEnabled} channels ready`,
|
|
1039
|
+
latencyMs: Date.now() - start
|
|
1040
|
+
};
|
|
1041
|
+
}
|
|
1042
|
+
if (totalReady > 0) {
|
|
1043
|
+
return {
|
|
1044
|
+
name: "channels",
|
|
1045
|
+
healthy: false,
|
|
1046
|
+
score: 1,
|
|
1047
|
+
message: `${totalReady}/${totalEnabled} channels ready. Issues: ${issues.join("; ")}`,
|
|
1048
|
+
latencyMs: Date.now() - start
|
|
1049
|
+
};
|
|
1050
|
+
}
|
|
1051
|
+
return {
|
|
1052
|
+
name: "channels",
|
|
1053
|
+
healthy: false,
|
|
1054
|
+
score: 0,
|
|
1055
|
+
message: `0/${totalEnabled} channels ready. Issues: ${issues.join("; ")}`,
|
|
1056
|
+
latencyMs: Date.now() - start
|
|
1057
|
+
};
|
|
1058
|
+
} catch (err) {
|
|
1059
|
+
return {
|
|
1060
|
+
name: "channels",
|
|
1061
|
+
healthy: false,
|
|
1062
|
+
score: 0,
|
|
1063
|
+
message: `Channel probe failed: ${err instanceof Error ? err.message : String(err)}`,
|
|
1064
|
+
latencyMs: Date.now() - start
|
|
1065
|
+
};
|
|
1066
|
+
}
|
|
1067
|
+
}
|
|
1068
|
+
|
|
964
1069
|
// src/health/monitor.ts
|
|
965
1070
|
var HealthMonitor = class extends import_node_events.EventEmitter {
|
|
966
1071
|
constructor(config) {
|
|
@@ -1030,7 +1135,8 @@ var HealthMonitor = class extends import_node_events.EventEmitter {
|
|
|
1030
1135
|
"disk"
|
|
1031
1136
|
),
|
|
1032
1137
|
withTimeout(() => logTailProbe(target, this.config.gateway.logPath), "logTail"),
|
|
1033
|
-
withTimeout(() => websocketProbe(target, this.config.gateway.port, timeout), "websocket")
|
|
1138
|
+
withTimeout(() => websocketProbe(target, this.config.gateway.port, timeout), "websocket"),
|
|
1139
|
+
withTimeout(() => channelsProbe(target, timeout), "channels")
|
|
1034
1140
|
]);
|
|
1035
1141
|
const probeResults = results.map((r, i) => {
|
|
1036
1142
|
if (r.status === "fulfilled") return r.value;
|
|
@@ -1044,7 +1150,8 @@ var HealthMonitor = class extends import_node_events.EventEmitter {
|
|
|
1044
1150
|
"cpu",
|
|
1045
1151
|
"disk",
|
|
1046
1152
|
"logTail",
|
|
1047
|
-
"websocket"
|
|
1153
|
+
"websocket",
|
|
1154
|
+
"channels"
|
|
1048
1155
|
];
|
|
1049
1156
|
return {
|
|
1050
1157
|
name: names[i] ?? "unknown",
|
|
@@ -3379,8 +3486,8 @@ var DeadManSwitch = class extends import_node_events2.EventEmitter {
|
|
|
3379
3486
|
|
|
3380
3487
|
// src/recovery/orchestrator.ts
|
|
3381
3488
|
var import_node_events3 = require("events");
|
|
3382
|
-
var
|
|
3383
|
-
var
|
|
3489
|
+
var import_node_child_process8 = require("child_process");
|
|
3490
|
+
var import_node_util5 = require("util");
|
|
3384
3491
|
var fs16 = __toESM(require("fs"));
|
|
3385
3492
|
|
|
3386
3493
|
// src/recovery/circuit-breaker.ts
|
|
@@ -3427,9 +3534,9 @@ var CircuitBreaker = class {
|
|
|
3427
3534
|
var fs15 = __toESM(require("fs"));
|
|
3428
3535
|
var os7 = __toESM(require("os"));
|
|
3429
3536
|
var path6 = __toESM(require("path"));
|
|
3430
|
-
var
|
|
3431
|
-
var
|
|
3432
|
-
var
|
|
3537
|
+
var import_node_child_process7 = require("child_process");
|
|
3538
|
+
var import_node_util4 = require("util");
|
|
3539
|
+
var execFileAsync4 = (0, import_node_util4.promisify)(import_node_child_process7.execFile);
|
|
3433
3540
|
function createL3Patterns() {
|
|
3434
3541
|
return [
|
|
3435
3542
|
// L3-1: Network Repair — DNS resolution, stale routes, TUN interface reset
|
|
@@ -3438,7 +3545,7 @@ function createL3Patterns() {
|
|
|
3438
3545
|
name: "network-repair",
|
|
3439
3546
|
async detect(_ctx) {
|
|
3440
3547
|
try {
|
|
3441
|
-
await
|
|
3548
|
+
await execFileAsync4("getent", ["hosts", "localhost"], { timeout: 5e3 });
|
|
3442
3549
|
} catch {
|
|
3443
3550
|
return true;
|
|
3444
3551
|
}
|
|
@@ -3466,7 +3573,7 @@ function createL3Patterns() {
|
|
|
3466
3573
|
try {
|
|
3467
3574
|
const cmd = os7.platform() === "darwin" ? "netstat" : "ip";
|
|
3468
3575
|
const args = os7.platform() === "darwin" ? ["-rn"] : ["route", "show", "default"];
|
|
3469
|
-
const { stdout } = await
|
|
3576
|
+
const { stdout } = await execFileAsync4(cmd, args, { timeout: 5e3 });
|
|
3470
3577
|
if (!stdout.includes("default") && !stdout.includes("0.0.0.0")) return true;
|
|
3471
3578
|
} catch {
|
|
3472
3579
|
return true;
|
|
@@ -3478,9 +3585,9 @@ function createL3Patterns() {
|
|
|
3478
3585
|
let fixed = false;
|
|
3479
3586
|
try {
|
|
3480
3587
|
if (os7.platform() === "darwin") {
|
|
3481
|
-
await
|
|
3588
|
+
await execFileAsync4("dscacheutil", ["-flushcache"], { timeout: 5e3 });
|
|
3482
3589
|
} else {
|
|
3483
|
-
await
|
|
3590
|
+
await execFileAsync4("systemd-resolve", ["--flush-caches"], { timeout: 5e3 });
|
|
3484
3591
|
}
|
|
3485
3592
|
fixed = true;
|
|
3486
3593
|
} catch {
|
|
@@ -3499,7 +3606,7 @@ function createL3Patterns() {
|
|
|
3499
3606
|
if (fs15.existsSync(operstatePath)) {
|
|
3500
3607
|
const state = fs15.readFileSync(operstatePath, "utf-8").trim();
|
|
3501
3608
|
if (state === "down") {
|
|
3502
|
-
await
|
|
3609
|
+
await execFileAsync4("ip", ["link", "set", iface, "up"], { timeout: 5e3 });
|
|
3503
3610
|
fixed = true;
|
|
3504
3611
|
}
|
|
3505
3612
|
}
|
|
@@ -3523,7 +3630,7 @@ function createL3Patterns() {
|
|
|
3523
3630
|
name: "process-resurrection",
|
|
3524
3631
|
async detect(_ctx) {
|
|
3525
3632
|
try {
|
|
3526
|
-
await
|
|
3633
|
+
await execFileAsync4("which", ["openclaw"], { timeout: 5e3 });
|
|
3527
3634
|
return false;
|
|
3528
3635
|
} catch {
|
|
3529
3636
|
return true;
|
|
@@ -3532,11 +3639,11 @@ function createL3Patterns() {
|
|
|
3532
3639
|
async fix(_ctx) {
|
|
3533
3640
|
const start = Date.now();
|
|
3534
3641
|
try {
|
|
3535
|
-
await
|
|
3642
|
+
await execFileAsync4("npm", ["install", "-g", "openclaw"], {
|
|
3536
3643
|
timeout: 12e4,
|
|
3537
3644
|
env: { ...process.env, NODE_ENV: "production" }
|
|
3538
3645
|
});
|
|
3539
|
-
await
|
|
3646
|
+
await execFileAsync4("which", ["openclaw"], { timeout: 5e3 });
|
|
3540
3647
|
return {
|
|
3541
3648
|
level: "L3",
|
|
3542
3649
|
action: "process-resurrection-reinstall",
|
|
@@ -3572,7 +3679,7 @@ function createL3Patterns() {
|
|
|
3572
3679
|
}
|
|
3573
3680
|
}
|
|
3574
3681
|
try {
|
|
3575
|
-
await
|
|
3682
|
+
await execFileAsync4(
|
|
3576
3683
|
"node",
|
|
3577
3684
|
["-e", "try { require('openclaw') } catch(e) { if (e.code === 'MODULE_NOT_FOUND') process.exit(1) }"],
|
|
3578
3685
|
{ timeout: 1e4, cwd: installDir }
|
|
@@ -3598,7 +3705,7 @@ function createL3Patterns() {
|
|
|
3598
3705
|
if (fs15.existsSync(nodeModules)) {
|
|
3599
3706
|
fs15.rmSync(nodeModules, { recursive: true, force: true });
|
|
3600
3707
|
}
|
|
3601
|
-
await
|
|
3708
|
+
await execFileAsync4("npm", ["install", "--production"], {
|
|
3602
3709
|
timeout: 12e4,
|
|
3603
3710
|
cwd: installDir
|
|
3604
3711
|
});
|
|
@@ -3624,7 +3731,7 @@ function createL3Patterns() {
|
|
|
3624
3731
|
name: "safe-mode-boot",
|
|
3625
3732
|
async detect(ctx) {
|
|
3626
3733
|
try {
|
|
3627
|
-
const { stdout } = await
|
|
3734
|
+
const { stdout } = await execFileAsync4("pgrep", ["-f", "openclaw"], { timeout: 5e3 });
|
|
3628
3735
|
if (stdout.trim().length > 0) return false;
|
|
3629
3736
|
} catch {
|
|
3630
3737
|
}
|
|
@@ -3643,14 +3750,14 @@ function createL3Patterns() {
|
|
|
3643
3750
|
const safeModeConfigPath = ctx.configPath + ".safemode";
|
|
3644
3751
|
try {
|
|
3645
3752
|
fs15.writeFileSync(safeModeConfigPath, JSON.stringify(safeModeConfig, null, 2));
|
|
3646
|
-
await
|
|
3753
|
+
await execFileAsync4(
|
|
3647
3754
|
"openclaw",
|
|
3648
3755
|
["gateway", "start", "--config", safeModeConfigPath],
|
|
3649
3756
|
{ timeout: 3e4 }
|
|
3650
3757
|
);
|
|
3651
3758
|
await sleep(3e3);
|
|
3652
3759
|
try {
|
|
3653
|
-
await
|
|
3760
|
+
await execFileAsync4("pgrep", ["-f", "openclaw"], { timeout: 5e3 });
|
|
3654
3761
|
} catch {
|
|
3655
3762
|
return {
|
|
3656
3763
|
level: "L3",
|
|
@@ -3687,7 +3794,7 @@ function createL3Patterns() {
|
|
|
3687
3794
|
async detect(ctx) {
|
|
3688
3795
|
try {
|
|
3689
3796
|
const configDir = path6.dirname(ctx.configPath);
|
|
3690
|
-
const { stdout } = await
|
|
3797
|
+
const { stdout } = await execFileAsync4("df", ["-BM", configDir], { timeout: 5e3 });
|
|
3691
3798
|
const lines = stdout.trim().split("\n");
|
|
3692
3799
|
if (lines.length < 2) return false;
|
|
3693
3800
|
const parts = lines[1].split(/\s+/);
|
|
@@ -3751,7 +3858,7 @@ function createL3Patterns() {
|
|
|
3751
3858
|
}
|
|
3752
3859
|
function resolveGatewayInstallDir() {
|
|
3753
3860
|
try {
|
|
3754
|
-
const output = (0,
|
|
3861
|
+
const output = (0, import_node_child_process7.execSync)("which openclaw", { encoding: "utf-8", timeout: 5e3 }).trim();
|
|
3755
3862
|
const realPath = fs15.realpathSync(output);
|
|
3756
3863
|
let dir = path6.dirname(realPath);
|
|
3757
3864
|
for (let i = 0; i < 5; i++) {
|
|
@@ -3782,7 +3889,7 @@ function sleep(ms) {
|
|
|
3782
3889
|
}
|
|
3783
3890
|
|
|
3784
3891
|
// src/recovery/orchestrator.ts
|
|
3785
|
-
var
|
|
3892
|
+
var execFileAsync5 = (0, import_node_util5.promisify)(import_node_child_process8.execFile);
|
|
3786
3893
|
var RecoveryOrchestrator = class extends import_node_events3.EventEmitter {
|
|
3787
3894
|
config;
|
|
3788
3895
|
diagnosisEngine;
|
|
@@ -3868,7 +3975,7 @@ var RecoveryOrchestrator = class extends import_node_events3.EventEmitter {
|
|
|
3868
3975
|
}
|
|
3869
3976
|
const start = Date.now();
|
|
3870
3977
|
try {
|
|
3871
|
-
await
|
|
3978
|
+
await execFileAsync5("openclaw", ["gateway", "restart"], { timeout: 3e4 });
|
|
3872
3979
|
actions.push({
|
|
3873
3980
|
level: "L1",
|
|
3874
3981
|
action: "restart",
|
|
@@ -3978,9 +4085,9 @@ function sleep2(ms) {
|
|
|
3978
4085
|
|
|
3979
4086
|
// src/diagnosis/engine.ts
|
|
3980
4087
|
var fs17 = __toESM(require("fs"));
|
|
3981
|
-
var
|
|
3982
|
-
var
|
|
3983
|
-
var
|
|
4088
|
+
var import_node_child_process9 = require("child_process");
|
|
4089
|
+
var import_node_util6 = require("util");
|
|
4090
|
+
var execFileAsync6 = (0, import_node_util6.promisify)(import_node_child_process9.execFile);
|
|
3984
4091
|
var DiagnosisEngine = class {
|
|
3985
4092
|
patterns;
|
|
3986
4093
|
constructor(backupManager) {
|
|
@@ -4064,7 +4171,7 @@ function createPatterns(backupManager) {
|
|
|
4064
4171
|
name: "port-conflict",
|
|
4065
4172
|
async detect(ctx) {
|
|
4066
4173
|
try {
|
|
4067
|
-
const { stdout } = await
|
|
4174
|
+
const { stdout } = await execFileAsync6("lsof", ["-i", `:${ctx.gatewayPort}`, "-t"]);
|
|
4068
4175
|
const pids = stdout.trim().split("\n").filter(Boolean);
|
|
4069
4176
|
if (pids.length === 0) return false;
|
|
4070
4177
|
if (fs17.existsSync(ctx.pidFile)) {
|
|
@@ -4079,7 +4186,7 @@ function createPatterns(backupManager) {
|
|
|
4079
4186
|
async fix(ctx) {
|
|
4080
4187
|
const start = Date.now();
|
|
4081
4188
|
try {
|
|
4082
|
-
const { stdout } = await
|
|
4189
|
+
const { stdout } = await execFileAsync6("lsof", ["-i", `:${ctx.gatewayPort}`, "-t"]);
|
|
4083
4190
|
const pids = stdout.trim().split("\n").filter(Boolean);
|
|
4084
4191
|
return {
|
|
4085
4192
|
level: "L2",
|
|
@@ -4158,7 +4265,7 @@ function createPatterns(backupManager) {
|
|
|
4158
4265
|
name: "oom-kill",
|
|
4159
4266
|
async detect(_ctx) {
|
|
4160
4267
|
try {
|
|
4161
|
-
const { stdout } = await
|
|
4268
|
+
const { stdout } = await execFileAsync6("dmesg", ["--time-format", "reltime"], {
|
|
4162
4269
|
timeout: 5e3
|
|
4163
4270
|
});
|
|
4164
4271
|
return /oom_kill_process|Out of memory/.test(stdout);
|