@steadwing/openalerts 0.2.0 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +90 -41
- package/dist/core/engine.d.ts +7 -0
- package/dist/core/engine.js +50 -6
- package/dist/core/evaluator.js +19 -2
- package/dist/core/index.d.ts +2 -1
- package/dist/core/index.js +2 -0
- package/dist/core/llm-enrichment.d.ts +14 -0
- package/dist/core/llm-enrichment.js +174 -0
- package/dist/core/rules.js +63 -25
- package/dist/core/types.d.ts +6 -1
- package/dist/core/types.js +1 -1
- package/dist/index.js +9 -4
- package/dist/plugin/adapter.d.ts +14 -2
- package/dist/plugin/adapter.js +70 -9
- package/dist/plugin/commands.js +2 -21
- package/dist/plugin/dashboard-html.js +14 -7
- package/dist/plugin/dashboard-routes.js +27 -2
- package/dist/plugin/log-bridge.js +27 -0
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -37,7 +37,9 @@ openclaw plugins install @steadwing/openalerts
|
|
|
37
37
|
|
|
38
38
|
### 2. Configure
|
|
39
39
|
|
|
40
|
-
|
|
40
|
+
If you already have a channel paired with OpenClaw (e.g. Telegram via `openclaw pair`), **no config is needed** — OpenAlerts auto-detects where to send alerts.
|
|
41
|
+
|
|
42
|
+
Otherwise, set it explicitly in `openclaw.json`:
|
|
41
43
|
|
|
42
44
|
```jsonc
|
|
43
45
|
{
|
|
@@ -55,69 +57,128 @@ Add to your `openclaw.json`:
|
|
|
55
57
|
}
|
|
56
58
|
```
|
|
57
59
|
|
|
60
|
+
**Auto-detection priority:** explicit config > static `allowFrom` in channel config > pairing store.
|
|
61
|
+
|
|
58
62
|
### 3. Restart & verify
|
|
59
63
|
|
|
60
64
|
```bash
|
|
61
65
|
openclaw gateway stop && openclaw gateway run
|
|
62
66
|
```
|
|
63
67
|
|
|
68
|
+
|
|
64
69
|
Send `/health` to your bot. You should get a live status report back — zero LLM tokens consumed.
|
|
65
70
|
|
|
66
71
|
That's it. OpenAlerts is now watching your agent.
|
|
67
72
|
|
|
73
|
+
## Dashboard
|
|
74
|
+
|
|
75
|
+
A real-time web dashboard is embedded in the gateway at:
|
|
76
|
+
|
|
77
|
+
```
|
|
78
|
+
http://127.0.0.1:18789/openalerts
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
- **Activity** — Live event timeline with session flows, tool calls, LLM usage
|
|
82
|
+
- **System Logs** — Filtered, structured logs with search
|
|
83
|
+
- **Health** — Rule status, alert history, system stats
|
|
84
|
+
|
|
68
85
|
## Alert Rules
|
|
69
86
|
|
|
70
|
-
|
|
87
|
+
Eight rules run against every event in real-time:
|
|
71
88
|
|
|
72
89
|
| Rule | Watches for | Severity |
|
|
73
90
|
|---|---|---|
|
|
74
|
-
| **llm-errors** |
|
|
75
|
-
| **infra-errors** |
|
|
76
|
-
| **gateway-down** | No heartbeat for
|
|
91
|
+
| **llm-errors** | 1+ LLM/agent failure in 1 minute | ERROR |
|
|
92
|
+
| **infra-errors** | 1+ infrastructure error in 1 minute | ERROR |
|
|
93
|
+
| **gateway-down** | No heartbeat for 30+ seconds | CRITICAL |
|
|
77
94
|
| **session-stuck** | Session idle for 120+ seconds | WARN |
|
|
78
95
|
| **high-error-rate** | 50%+ of last 20 messages failed | ERROR |
|
|
79
96
|
| **queue-depth** | 10+ items queued | WARN |
|
|
97
|
+
| **tool-errors** | 1+ tool failure in 1 minute | WARN |
|
|
80
98
|
| **heartbeat-fail** | 3 consecutive heartbeat failures | ERROR |
|
|
81
99
|
|
|
82
|
-
All thresholds and cooldowns are [configurable per-rule](#configuration).
|
|
100
|
+
All thresholds and cooldowns are [configurable per-rule](#advanced-configuration).
|
|
83
101
|
|
|
84
|
-
##
|
|
102
|
+
## LLM-Enriched Alerts
|
|
85
103
|
|
|
86
|
-
|
|
104
|
+
By default, OpenAlerts uses your configured LLM model to enrich alerts with a human-friendly summary and an actionable suggestion. The enrichment is appended below the original alert detail:
|
|
105
|
+
|
|
106
|
+
```
|
|
107
|
+
1 agent error(s) on unknown in the last minute. Last: 401 Incorrect API key...
|
|
108
|
+
|
|
109
|
+
Summary: Your OpenAI API key is invalid or expired — the agent cannot make LLM calls.
|
|
110
|
+
Action: Update your API key in ~/.openclaw/.env with a valid key from platform.openai.com/api-keys
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
- **Model**: reads from `agents.defaults.model.primary` in your `openclaw.json` (e.g. `"openai/gpt-4o-mini"`)
|
|
114
|
+
- **API key**: reads from the corresponding environment variable (`OPENAI_API_KEY`, `ANTHROPIC_API_KEY`, `GROQ_API_KEY`, etc.)
|
|
115
|
+
- **Supported providers**: OpenAI, Anthropic, Groq, Together, DeepSeek (and any OpenAI-compatible API)
|
|
116
|
+
- **Graceful fallback**: if the LLM call fails or times out (10s), the original alert is sent unchanged
|
|
117
|
+
|
|
118
|
+
To disable LLM enrichment, set `"llmEnriched": false` in your plugin config:
|
|
87
119
|
|
|
88
120
|
```jsonc
|
|
89
121
|
{
|
|
90
|
-
"
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
"threshold": 120000 // override: 2 min instead of 90s
|
|
98
|
-
},
|
|
99
|
-
"high-error-rate": {
|
|
100
|
-
"enabled": false // disable a rule entirely
|
|
101
|
-
},
|
|
102
|
-
"llm-errors": {
|
|
103
|
-
"threshold": 5, // require 5 errors instead of 3
|
|
104
|
-
"cooldownMinutes": 30 // longer cooldown for this rule
|
|
122
|
+
"plugins": {
|
|
123
|
+
"entries": {
|
|
124
|
+
"openalerts": {
|
|
125
|
+
"config": {
|
|
126
|
+
"llmEnriched": false
|
|
127
|
+
}
|
|
128
|
+
}
|
|
105
129
|
}
|
|
106
130
|
}
|
|
107
131
|
}
|
|
108
132
|
```
|
|
109
133
|
|
|
110
|
-
##
|
|
134
|
+
## Advanced Configuration
|
|
111
135
|
|
|
112
|
-
|
|
136
|
+
Each rule can be individually tuned or disabled. You can also set global options like `cooldownMinutes` (default: `15`) and `quiet: true` for log-only mode.
|
|
113
137
|
|
|
138
|
+
**Step 1.** Add a `rules` object inside `plugins.entries.openalerts.config` in your `~/.openclaw/openclaw.json`:
|
|
139
|
+
|
|
140
|
+
```jsonc
|
|
141
|
+
{
|
|
142
|
+
"plugins": {
|
|
143
|
+
"entries": {
|
|
144
|
+
"openalerts": {
|
|
145
|
+
"enabled": true,
|
|
146
|
+
"config": {
|
|
147
|
+
"rules": {
|
|
148
|
+
"llm-errors": { "threshold": 5 },
|
|
149
|
+
"infra-errors": { "cooldownMinutes": 30 },
|
|
150
|
+
"high-error-rate": { "enabled": false },
|
|
151
|
+
"gateway-down": { "threshold": 60000 }
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
}
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
}
|
|
114
158
|
```
|
|
115
|
-
|
|
159
|
+
|
|
160
|
+
**Step 2.** Restart the gateway to apply:
|
|
161
|
+
|
|
162
|
+
```bash
|
|
163
|
+
openclaw gateway stop && openclaw gateway run
|
|
116
164
|
```
|
|
117
165
|
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
166
|
+
### Rule reference
|
|
167
|
+
|
|
168
|
+
| Rule | `threshold` unit | Default |
|
|
169
|
+
|---|---|---|
|
|
170
|
+
| `llm-errors` | Error count in 1 min window | `1` |
|
|
171
|
+
| `infra-errors` | Error count in 1 min window | `1` |
|
|
172
|
+
| `gateway-down` | Milliseconds without heartbeat | `30000` (30s) |
|
|
173
|
+
| `session-stuck` | Milliseconds idle | `120000` (2 min) |
|
|
174
|
+
| `high-error-rate` | Error percentage (0-100) | `50` |
|
|
175
|
+
| `queue-depth` | Number of queued items | `10` |
|
|
176
|
+
| `tool-errors` | Error count in 1 min window | `1` |
|
|
177
|
+
| `heartbeat-fail` | Consecutive failures | `3` |
|
|
178
|
+
|
|
179
|
+
Every rule also accepts:
|
|
180
|
+
- **`enabled`** — `false` to disable the rule (default: `true`)
|
|
181
|
+
- **`cooldownMinutes`** — minutes before the same rule can fire again (default: `15`)
|
|
121
182
|
|
|
122
183
|
## Commands
|
|
123
184
|
|
|
@@ -129,18 +190,6 @@ Zero-token chat commands available in any connected channel:
|
|
|
129
190
|
| `/alerts` | Recent alert history with severity and timestamps |
|
|
130
191
|
| `/dashboard` | Returns the dashboard URL |
|
|
131
192
|
|
|
132
|
-
## Architecture
|
|
133
|
-
|
|
134
|
-
```
|
|
135
|
-
src/core/ Framework-agnostic engine, zero dependencies
|
|
136
|
-
Rules engine, evaluator, event bus, state store, formatter
|
|
137
|
-
|
|
138
|
-
src/plugin/ OpenClaw adapter plugin
|
|
139
|
-
Event translation, alert routing, dashboard, chat commands
|
|
140
|
-
```
|
|
141
|
-
|
|
142
|
-
Everything ships as a single `@steadwing/openalerts` package. The core is completely framework-agnostic — adding monitoring for a new framework only requires writing an adapter.
|
|
143
|
-
|
|
144
193
|
## Development
|
|
145
194
|
|
|
146
195
|
```bash
|
package/dist/core/engine.d.ts
CHANGED
|
@@ -13,11 +13,14 @@ export declare class OpenAlertsEngine {
|
|
|
13
13
|
private stateDir;
|
|
14
14
|
private dispatcher;
|
|
15
15
|
private platform;
|
|
16
|
+
private enricher;
|
|
16
17
|
private logger;
|
|
17
18
|
private logPrefix;
|
|
18
19
|
private watchdogTimer;
|
|
19
20
|
private pruneTimer;
|
|
20
21
|
private running;
|
|
22
|
+
private eventRing;
|
|
23
|
+
private static readonly RING_MAX;
|
|
21
24
|
constructor(options: OpenAlertsInitOptions);
|
|
22
25
|
/** Start the engine: warm from history, start timers. */
|
|
23
26
|
start(): void;
|
|
@@ -30,12 +33,16 @@ export declare class OpenAlertsEngine {
|
|
|
30
33
|
readonly name: string;
|
|
31
34
|
send(alert: AlertEvent, formatted: string): Promise<void> | void;
|
|
32
35
|
}): void;
|
|
36
|
+
/** Fire a test alert to verify delivery. */
|
|
37
|
+
sendTestAlert(): void;
|
|
33
38
|
/** Whether the platform sync is connected. */
|
|
34
39
|
get platformConnected(): boolean;
|
|
35
40
|
/** Whether the engine is running. */
|
|
36
41
|
get isRunning(): boolean;
|
|
37
42
|
/** Read recent stored events (for /alerts command). */
|
|
38
43
|
getRecentEvents(limit?: number): StoredEvent[];
|
|
44
|
+
/** Get recent full events from the in-memory ring buffer (for dashboard history). */
|
|
45
|
+
getRecentLiveEvents(limit?: number): OpenAlertsEvent[];
|
|
39
46
|
private handleEvent;
|
|
40
47
|
private fireAlert;
|
|
41
48
|
}
|
package/dist/core/engine.js
CHANGED
|
@@ -17,14 +17,18 @@ export class OpenAlertsEngine {
|
|
|
17
17
|
stateDir;
|
|
18
18
|
dispatcher;
|
|
19
19
|
platform = null;
|
|
20
|
+
enricher;
|
|
20
21
|
logger;
|
|
21
22
|
logPrefix;
|
|
22
23
|
watchdogTimer = null;
|
|
23
24
|
pruneTimer = null;
|
|
24
25
|
running = false;
|
|
26
|
+
eventRing = [];
|
|
27
|
+
static RING_MAX = 500;
|
|
25
28
|
constructor(options) {
|
|
26
29
|
this.config = options.config;
|
|
27
30
|
this.stateDir = options.stateDir;
|
|
31
|
+
this.enricher = options.enricher ?? null;
|
|
28
32
|
this.logger = options.logger ?? console;
|
|
29
33
|
this.logPrefix = options.logPrefix ?? "openalerts";
|
|
30
34
|
this.bus = new OpenAlertsEventBus();
|
|
@@ -64,7 +68,9 @@ export class OpenAlertsEngine {
|
|
|
64
68
|
this.watchdogTimer = setInterval(() => {
|
|
65
69
|
const alerts = processWatchdogTick(this.state, this.config);
|
|
66
70
|
for (const alert of alerts) {
|
|
67
|
-
this.fireAlert(alert)
|
|
71
|
+
void this.fireAlert(alert).catch((err) => {
|
|
72
|
+
this.logger.error(`${this.logPrefix}: watchdog alert failed: ${String(err)}`);
|
|
73
|
+
});
|
|
68
74
|
}
|
|
69
75
|
}, DEFAULTS.watchdogIntervalMs);
|
|
70
76
|
// Prune timer (cleans old log entries every 6h)
|
|
@@ -82,7 +88,7 @@ export class OpenAlertsEngine {
|
|
|
82
88
|
const channelNames = this.dispatcher.hasChannels
|
|
83
89
|
? `${this.dispatcher.channelCount} channel(s)`
|
|
84
90
|
: "log-only (no alert channels)";
|
|
85
|
-
this.logger.info(`${this.logPrefix}: started, ${channelNames},
|
|
91
|
+
this.logger.info(`${this.logPrefix}: started, ${channelNames}, 8 rules active`);
|
|
86
92
|
}
|
|
87
93
|
/** Ingest a universal event. Can be called directly or via the event bus. */
|
|
88
94
|
ingest(event) {
|
|
@@ -109,6 +115,21 @@ export class OpenAlertsEngine {
|
|
|
109
115
|
addChannel(channel) {
|
|
110
116
|
this.dispatcher.addChannel(channel);
|
|
111
117
|
}
|
|
118
|
+
/** Fire a test alert to verify delivery. */
|
|
119
|
+
sendTestAlert() {
|
|
120
|
+
void this.fireAlert({
|
|
121
|
+
type: "alert",
|
|
122
|
+
id: `test:manual:${Date.now()}`,
|
|
123
|
+
ruleId: "test",
|
|
124
|
+
severity: "info",
|
|
125
|
+
title: "Test alert — delivery verified",
|
|
126
|
+
detail: "This is a test alert from /test_alert. If you see this, alert delivery is working.",
|
|
127
|
+
ts: Date.now(),
|
|
128
|
+
fingerprint: "test:manual",
|
|
129
|
+
}).catch((err) => {
|
|
130
|
+
this.logger.error(`${this.logPrefix}: test alert failed: ${String(err)}`);
|
|
131
|
+
});
|
|
132
|
+
}
|
|
112
133
|
/** Whether the platform sync is connected. */
|
|
113
134
|
get platformConnected() {
|
|
114
135
|
return this.platform?.isConnected() ?? false;
|
|
@@ -121,8 +142,17 @@ export class OpenAlertsEngine {
|
|
|
121
142
|
getRecentEvents(limit = 100) {
|
|
122
143
|
return readRecentEvents(this.stateDir, limit);
|
|
123
144
|
}
|
|
145
|
+
/** Get recent full events from the in-memory ring buffer (for dashboard history). */
|
|
146
|
+
getRecentLiveEvents(limit = 200) {
|
|
147
|
+
return this.eventRing.slice(-limit);
|
|
148
|
+
}
|
|
124
149
|
// ─── Internal ──────────────────────────────────────────────────────────────
|
|
125
150
|
handleEvent(event) {
|
|
151
|
+
// Add to in-memory ring buffer
|
|
152
|
+
this.eventRing.push(event);
|
|
153
|
+
if (this.eventRing.length > OpenAlertsEngine.RING_MAX) {
|
|
154
|
+
this.eventRing = this.eventRing.slice(-OpenAlertsEngine.RING_MAX);
|
|
155
|
+
}
|
|
126
156
|
// Persist as diagnostic snapshot
|
|
127
157
|
const snapshot = {
|
|
128
158
|
type: "diagnostic",
|
|
@@ -141,13 +171,15 @@ export class OpenAlertsEngine {
|
|
|
141
171
|
// Run through evaluator
|
|
142
172
|
const alerts = processEvent(this.state, this.config, event);
|
|
143
173
|
for (const alert of alerts) {
|
|
144
|
-
this.fireAlert(alert)
|
|
174
|
+
void this.fireAlert(alert).catch((err) => {
|
|
175
|
+
this.logger.error(`${this.logPrefix}: alert fire failed: ${String(err)}`);
|
|
176
|
+
});
|
|
145
177
|
}
|
|
146
178
|
// Forward to platform
|
|
147
179
|
this.platform?.enqueue(snapshot);
|
|
148
180
|
}
|
|
149
|
-
fireAlert(alert) {
|
|
150
|
-
// Persist alert
|
|
181
|
+
async fireAlert(alert) {
|
|
182
|
+
// Persist alert (original, before enrichment)
|
|
151
183
|
try {
|
|
152
184
|
appendEvent(this.stateDir, alert);
|
|
153
185
|
}
|
|
@@ -156,9 +188,21 @@ export class OpenAlertsEngine {
|
|
|
156
188
|
}
|
|
157
189
|
// Forward to platform
|
|
158
190
|
this.platform?.enqueue(alert);
|
|
191
|
+
// Enrich with LLM if enricher is available
|
|
192
|
+
let enriched = alert;
|
|
193
|
+
if (this.enricher) {
|
|
194
|
+
try {
|
|
195
|
+
const result = await this.enricher(alert);
|
|
196
|
+
if (result)
|
|
197
|
+
enriched = result;
|
|
198
|
+
}
|
|
199
|
+
catch (err) {
|
|
200
|
+
this.logger.warn(`${this.logPrefix}: llm enrichment failed, using original: ${String(err)}`);
|
|
201
|
+
}
|
|
202
|
+
}
|
|
159
203
|
// Dispatch to channels (unless quiet mode)
|
|
160
204
|
if (!this.config.quiet) {
|
|
161
|
-
void this.dispatcher.dispatch(
|
|
205
|
+
void this.dispatcher.dispatch(enriched).catch((err) => {
|
|
162
206
|
this.logger.error(`${this.logPrefix}: alert dispatch failed: ${String(err)}`);
|
|
163
207
|
});
|
|
164
208
|
}
|
package/dist/core/evaluator.js
CHANGED
|
@@ -65,7 +65,7 @@ export function processEvent(state, config, event) {
|
|
|
65
65
|
state.stats.totalCostUsd = 0;
|
|
66
66
|
state.stats.lastResetTs = now;
|
|
67
67
|
}
|
|
68
|
-
// Track event types in stats
|
|
68
|
+
// Track event types in stats (independent of rule enabled state)
|
|
69
69
|
if (event.type === "infra.error") {
|
|
70
70
|
state.stats.webhookErrors++;
|
|
71
71
|
}
|
|
@@ -83,6 +83,16 @@ export function processEvent(state, config, event) {
|
|
|
83
83
|
if (event.type === "session.start") {
|
|
84
84
|
state.stats.sessionsStarted++;
|
|
85
85
|
}
|
|
86
|
+
if (event.type === "session.stuck") {
|
|
87
|
+
state.stats.stuckSessions++;
|
|
88
|
+
}
|
|
89
|
+
if (event.type === "llm.call" || event.type === "llm.error" || event.type === "agent.error") {
|
|
90
|
+
state.stats.messagesProcessed++;
|
|
91
|
+
if (event.type === "llm.error" || event.type === "agent.error" ||
|
|
92
|
+
event.outcome === "error" || event.outcome === "timeout") {
|
|
93
|
+
state.stats.messageErrors++;
|
|
94
|
+
}
|
|
95
|
+
}
|
|
86
96
|
if (event.type === "llm.token_usage") {
|
|
87
97
|
if (typeof event.tokenCount === "number")
|
|
88
98
|
state.stats.totalTokens += event.tokenCount;
|
|
@@ -103,7 +113,14 @@ export function processEvent(state, config, event) {
|
|
|
103
113
|
const ctx = { state, config, now };
|
|
104
114
|
const fired = [];
|
|
105
115
|
for (const rule of ALL_RULES) {
|
|
106
|
-
|
|
116
|
+
let alert;
|
|
117
|
+
try {
|
|
118
|
+
alert = rule.evaluate(event, ctx);
|
|
119
|
+
}
|
|
120
|
+
catch {
|
|
121
|
+
// One broken rule must never block the rest
|
|
122
|
+
continue;
|
|
123
|
+
}
|
|
107
124
|
if (!alert)
|
|
108
125
|
continue;
|
|
109
126
|
// Check cooldown
|
package/dist/core/index.d.ts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
export type { AlertChannel, AlertEvent, AlertRuleDefinition, AlertSeverity, AlertTarget, DiagnosticSnapshot, EvaluatorState, HeartbeatSnapshot, MonitorConfig, RuleContext, RuleOverride, OpenAlertsEvent, OpenAlertsEventType, OpenAlertsInitOptions, OpenAlertsLogger, StoredEvent, WindowEntry, } from "./types.js";
|
|
1
|
+
export type { AlertChannel, AlertEnricher, AlertEvent, AlertRuleDefinition, AlertSeverity, AlertTarget, DiagnosticSnapshot, EvaluatorState, HeartbeatSnapshot, MonitorConfig, RuleContext, RuleOverride, OpenAlertsEvent, OpenAlertsEventType, OpenAlertsInitOptions, OpenAlertsLogger, StoredEvent, WindowEntry, } from "./types.js";
|
|
2
2
|
export { DEFAULTS, LOG_FILENAME, STORE_DIR_NAME } from "./types.js";
|
|
3
3
|
export { OpenAlertsEngine } from "./engine.js";
|
|
4
4
|
export { OpenAlertsEventBus } from "./event-bus.js";
|
|
@@ -6,6 +6,7 @@ export { AlertDispatcher } from "./alert-channel.js";
|
|
|
6
6
|
export { createEvaluatorState, processEvent, processWatchdogTick, warmFromHistory, } from "./evaluator.js";
|
|
7
7
|
export { ALL_RULES } from "./rules.js";
|
|
8
8
|
export { appendEvent, pruneLog, readAllEvents, readRecentEvents, } from "./store.js";
|
|
9
|
+
export { createLlmEnricher, type LlmEnricherOptions } from "./llm-enrichment.js";
|
|
9
10
|
export { formatAlertMessage, formatAlertsOutput, formatHealthOutput, } from "./formatter.js";
|
|
10
11
|
export { createPlatformSync, type PlatformSync } from "./platform.js";
|
|
11
12
|
export { BoundedMap, type BoundedMapOptions, type BoundedMapStats, } from "./bounded-map.js";
|
package/dist/core/index.js
CHANGED
|
@@ -13,6 +13,8 @@ export { createEvaluatorState, processEvent, processWatchdogTick, warmFromHistor
|
|
|
13
13
|
export { ALL_RULES } from "./rules.js";
|
|
14
14
|
// Store
|
|
15
15
|
export { appendEvent, pruneLog, readAllEvents, readRecentEvents, } from "./store.js";
|
|
16
|
+
// LLM Enrichment
|
|
17
|
+
export { createLlmEnricher } from "./llm-enrichment.js";
|
|
16
18
|
// Formatter
|
|
17
19
|
export { formatAlertMessage, formatAlertsOutput, formatHealthOutput, } from "./formatter.js";
|
|
18
20
|
// Platform
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
import type { AlertEnricher, OpenAlertsLogger } from "./types.js";
|
|
2
|
+
export type LlmEnricherOptions = {
|
|
3
|
+
/** Model string from config, e.g. "openai/gpt-5-nano" */
|
|
4
|
+
modelString: string;
|
|
5
|
+
/** Logger for debug/warn messages */
|
|
6
|
+
logger?: OpenAlertsLogger;
|
|
7
|
+
/** Timeout in ms (default: 10000) */
|
|
8
|
+
timeoutMs?: number;
|
|
9
|
+
};
|
|
10
|
+
/**
|
|
11
|
+
* Create an AlertEnricher that calls an LLM to add a summary + action to alerts.
|
|
12
|
+
* Returns null if provider or API key can't be resolved.
|
|
13
|
+
*/
|
|
14
|
+
export declare function createLlmEnricher(opts: LlmEnricherOptions): AlertEnricher | null;
|
|
@@ -0,0 +1,174 @@
|
|
|
1
|
+
const PROVIDER_MAP = {
|
|
2
|
+
openai: {
|
|
3
|
+
type: "openai-compatible",
|
|
4
|
+
baseUrl: "https://api.openai.com/v1",
|
|
5
|
+
apiKeyEnvVar: "OPENAI_API_KEY",
|
|
6
|
+
},
|
|
7
|
+
groq: {
|
|
8
|
+
type: "openai-compatible",
|
|
9
|
+
baseUrl: "https://api.groq.com/openai/v1",
|
|
10
|
+
apiKeyEnvVar: "GROQ_API_KEY",
|
|
11
|
+
},
|
|
12
|
+
together: {
|
|
13
|
+
type: "openai-compatible",
|
|
14
|
+
baseUrl: "https://api.together.xyz/v1",
|
|
15
|
+
apiKeyEnvVar: "TOGETHER_API_KEY",
|
|
16
|
+
},
|
|
17
|
+
deepseek: {
|
|
18
|
+
type: "openai-compatible",
|
|
19
|
+
baseUrl: "https://api.deepseek.com/v1",
|
|
20
|
+
apiKeyEnvVar: "DEEPSEEK_API_KEY",
|
|
21
|
+
},
|
|
22
|
+
anthropic: {
|
|
23
|
+
type: "anthropic",
|
|
24
|
+
baseUrl: "https://api.anthropic.com/v1",
|
|
25
|
+
apiKeyEnvVar: "ANTHROPIC_API_KEY",
|
|
26
|
+
},
|
|
27
|
+
};
|
|
28
|
+
// ─── Prompt ─────────────────────────────────────────────────────────────────
|
|
29
|
+
function buildPrompt(alert) {
|
|
30
|
+
return `You are a concise DevOps alert analyst. Given this monitoring alert, provide:
|
|
31
|
+
1. A brief human-friendly summary (1 sentence, plain language)
|
|
32
|
+
2. One actionable suggestion to resolve it
|
|
33
|
+
|
|
34
|
+
Alert:
|
|
35
|
+
- Rule: ${alert.ruleId}
|
|
36
|
+
- Severity: ${alert.severity}
|
|
37
|
+
- Title: ${alert.title}
|
|
38
|
+
- Detail: ${alert.detail}
|
|
39
|
+
|
|
40
|
+
Reply in exactly this format (2 lines only):
|
|
41
|
+
Summary: <your summary>
|
|
42
|
+
Action: <your suggestion>`;
|
|
43
|
+
}
|
|
44
|
+
// ─── Response Parsing ───────────────────────────────────────────────────────
|
|
45
|
+
function parseEnrichment(text) {
|
|
46
|
+
const lines = text.trim().split("\n");
|
|
47
|
+
let summary = "";
|
|
48
|
+
let action = "";
|
|
49
|
+
for (const line of lines) {
|
|
50
|
+
const trimmed = line.trim();
|
|
51
|
+
if (trimmed.toLowerCase().startsWith("summary:")) {
|
|
52
|
+
summary = trimmed.slice("summary:".length).trim();
|
|
53
|
+
}
|
|
54
|
+
else if (trimmed.toLowerCase().startsWith("action:")) {
|
|
55
|
+
action = trimmed.slice("action:".length).trim();
|
|
56
|
+
}
|
|
57
|
+
}
|
|
58
|
+
if (!summary && !action)
|
|
59
|
+
return null;
|
|
60
|
+
return { summary, action };
|
|
61
|
+
}
|
|
62
|
+
// ─── HTTP Calls ─────────────────────────────────────────────────────────────
|
|
63
|
+
async function callOpenAICompatible(baseUrl, apiKey, model, prompt, timeoutMs) {
|
|
64
|
+
const controller = new AbortController();
|
|
65
|
+
const timer = setTimeout(() => controller.abort(), timeoutMs);
|
|
66
|
+
try {
|
|
67
|
+
const res = await fetch(`${baseUrl}/chat/completions`, {
|
|
68
|
+
method: "POST",
|
|
69
|
+
headers: {
|
|
70
|
+
"Content-Type": "application/json",
|
|
71
|
+
Authorization: `Bearer ${apiKey}`,
|
|
72
|
+
},
|
|
73
|
+
body: JSON.stringify({
|
|
74
|
+
model,
|
|
75
|
+
messages: [{ role: "user", content: prompt }],
|
|
76
|
+
max_tokens: 200,
|
|
77
|
+
temperature: 0.3,
|
|
78
|
+
}),
|
|
79
|
+
signal: controller.signal,
|
|
80
|
+
});
|
|
81
|
+
if (!res.ok)
|
|
82
|
+
return null;
|
|
83
|
+
const data = (await res.json());
|
|
84
|
+
return data.choices?.[0]?.message?.content ?? null;
|
|
85
|
+
}
|
|
86
|
+
catch {
|
|
87
|
+
return null;
|
|
88
|
+
}
|
|
89
|
+
finally {
|
|
90
|
+
clearTimeout(timer);
|
|
91
|
+
}
|
|
92
|
+
}
|
|
93
|
+
async function callAnthropic(baseUrl, apiKey, model, prompt, timeoutMs) {
|
|
94
|
+
const controller = new AbortController();
|
|
95
|
+
const timer = setTimeout(() => controller.abort(), timeoutMs);
|
|
96
|
+
try {
|
|
97
|
+
const res = await fetch(`${baseUrl}/messages`, {
|
|
98
|
+
method: "POST",
|
|
99
|
+
headers: {
|
|
100
|
+
"Content-Type": "application/json",
|
|
101
|
+
"x-api-key": apiKey,
|
|
102
|
+
"anthropic-version": "2023-06-01",
|
|
103
|
+
},
|
|
104
|
+
body: JSON.stringify({
|
|
105
|
+
model,
|
|
106
|
+
max_tokens: 200,
|
|
107
|
+
messages: [{ role: "user", content: prompt }],
|
|
108
|
+
}),
|
|
109
|
+
signal: controller.signal,
|
|
110
|
+
});
|
|
111
|
+
if (!res.ok)
|
|
112
|
+
return null;
|
|
113
|
+
const data = (await res.json());
|
|
114
|
+
const textBlock = data.content?.find((b) => b.type === "text");
|
|
115
|
+
return textBlock?.text ?? null;
|
|
116
|
+
}
|
|
117
|
+
catch {
|
|
118
|
+
return null;
|
|
119
|
+
}
|
|
120
|
+
finally {
|
|
121
|
+
clearTimeout(timer);
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
// ─── Factory ────────────────────────────────────────────────────────────────
|
|
125
|
+
/**
|
|
126
|
+
* Create an AlertEnricher that calls an LLM to add a summary + action to alerts.
|
|
127
|
+
* Returns null if provider or API key can't be resolved.
|
|
128
|
+
*/
|
|
129
|
+
export function createLlmEnricher(opts) {
|
|
130
|
+
const { modelString, logger, timeoutMs = 10_000 } = opts;
|
|
131
|
+
// Parse "provider/model-name" format
|
|
132
|
+
const slashIdx = modelString.indexOf("/");
|
|
133
|
+
if (slashIdx < 1) {
|
|
134
|
+
logger?.warn(`openalerts: llm-enrichment skipped — invalid model string "${modelString}"`);
|
|
135
|
+
return null;
|
|
136
|
+
}
|
|
137
|
+
const providerKey = modelString.slice(0, slashIdx).toLowerCase();
|
|
138
|
+
const model = modelString.slice(slashIdx + 1);
|
|
139
|
+
const providerConfig = PROVIDER_MAP[providerKey];
|
|
140
|
+
if (!providerConfig) {
|
|
141
|
+
logger?.warn(`openalerts: llm-enrichment skipped — unknown provider "${providerKey}"`);
|
|
142
|
+
return null;
|
|
143
|
+
}
|
|
144
|
+
const apiKey = process.env[providerConfig.apiKeyEnvVar];
|
|
145
|
+
if (!apiKey) {
|
|
146
|
+
logger?.warn(`openalerts: llm-enrichment skipped — ${providerConfig.apiKeyEnvVar} not set in environment`);
|
|
147
|
+
return null;
|
|
148
|
+
}
|
|
149
|
+
logger?.info(`openalerts: llm-enrichment enabled (${providerKey}/${model})`);
|
|
150
|
+
return async (alert) => {
|
|
151
|
+
const prompt = buildPrompt(alert);
|
|
152
|
+
let responseText = null;
|
|
153
|
+
if (providerConfig.type === "anthropic") {
|
|
154
|
+
responseText = await callAnthropic(providerConfig.baseUrl, apiKey, model, prompt, timeoutMs);
|
|
155
|
+
}
|
|
156
|
+
else {
|
|
157
|
+
responseText = await callOpenAICompatible(providerConfig.baseUrl, apiKey, model, prompt, timeoutMs);
|
|
158
|
+
}
|
|
159
|
+
if (!responseText)
|
|
160
|
+
return null;
|
|
161
|
+
const parsed = parseEnrichment(responseText);
|
|
162
|
+
if (!parsed)
|
|
163
|
+
return null;
|
|
164
|
+
// Append enrichment to the original detail
|
|
165
|
+
let enrichedDetail = alert.detail;
|
|
166
|
+
if (parsed.summary) {
|
|
167
|
+
enrichedDetail += `\n\nSummary: ${parsed.summary}`;
|
|
168
|
+
}
|
|
169
|
+
if (parsed.action) {
|
|
170
|
+
enrichedDetail += `\nAction: ${parsed.action}`;
|
|
171
|
+
}
|
|
172
|
+
return { ...alert, detail: enrichedDetail };
|
|
173
|
+
};
|
|
174
|
+
}
|
package/dist/core/rules.js
CHANGED
|
@@ -32,7 +32,7 @@ function isRuleEnabled(ctx, ruleId) {
|
|
|
32
32
|
const infraErrors = {
|
|
33
33
|
id: "infra-errors",
|
|
34
34
|
defaultCooldownMs: 15 * 60 * 1000,
|
|
35
|
-
defaultThreshold:
|
|
35
|
+
defaultThreshold: 1,
|
|
36
36
|
evaluate(event, ctx) {
|
|
37
37
|
if (event.type !== "infra.error")
|
|
38
38
|
return null;
|
|
@@ -40,8 +40,8 @@ const infraErrors = {
|
|
|
40
40
|
return null;
|
|
41
41
|
const channel = event.channel ?? "unknown";
|
|
42
42
|
pushWindow(ctx, "infra-errors", { ts: ctx.now });
|
|
43
|
-
const threshold = getRuleThreshold(ctx, "infra-errors",
|
|
44
|
-
const windowMs =
|
|
43
|
+
const threshold = getRuleThreshold(ctx, "infra-errors", 1);
|
|
44
|
+
const windowMs = 60 * 1000; // 1 minute
|
|
45
45
|
const count = countInWindow(ctx, "infra-errors", windowMs);
|
|
46
46
|
if (count < threshold)
|
|
47
47
|
return null;
|
|
@@ -52,7 +52,7 @@ const infraErrors = {
|
|
|
52
52
|
ruleId: "infra-errors",
|
|
53
53
|
severity: "error",
|
|
54
54
|
title: "Infrastructure errors spike",
|
|
55
|
-
detail: `${count} infra
|
|
55
|
+
detail: `${count} infra error(s) on ${channel} in the last minute.${event.error ? ` Last: ${event.error}` : ""}`,
|
|
56
56
|
ts: ctx.now,
|
|
57
57
|
fingerprint,
|
|
58
58
|
};
|
|
@@ -62,32 +62,37 @@ const infraErrors = {
|
|
|
62
62
|
const llmErrors = {
|
|
63
63
|
id: "llm-errors",
|
|
64
64
|
defaultCooldownMs: 15 * 60 * 1000,
|
|
65
|
-
defaultThreshold:
|
|
65
|
+
defaultThreshold: 1,
|
|
66
66
|
evaluate(event, ctx) {
|
|
67
|
-
|
|
67
|
+
// Trigger on LLM call/error events AND agent errors (agent failing before/during LLM call)
|
|
68
|
+
if (event.type !== "llm.call" && event.type !== "llm.error" && event.type !== "agent.error")
|
|
68
69
|
return null;
|
|
69
70
|
if (!isRuleEnabled(ctx, "llm-errors"))
|
|
70
71
|
return null;
|
|
71
|
-
//
|
|
72
|
-
|
|
73
|
-
if (event.
|
|
74
|
-
|
|
75
|
-
|
|
72
|
+
// Stats are tracked in the evaluator (independent of rule state).
|
|
73
|
+
// Only proceed for actual errors:
|
|
74
|
+
if (event.type === "llm.call") {
|
|
75
|
+
// Only explicit error/timeout outcomes trigger alerting; undefined = OK
|
|
76
|
+
if (event.outcome !== "error" && event.outcome !== "timeout")
|
|
77
|
+
return null;
|
|
78
|
+
}
|
|
79
|
+
// llm.error and agent.error are always errors — no outcome check needed
|
|
76
80
|
const channel = event.channel ?? "unknown";
|
|
77
81
|
pushWindow(ctx, "llm-errors", { ts: ctx.now });
|
|
78
|
-
const threshold = getRuleThreshold(ctx, "llm-errors",
|
|
79
|
-
const windowMs =
|
|
82
|
+
const threshold = getRuleThreshold(ctx, "llm-errors", 1);
|
|
83
|
+
const windowMs = 60 * 1000; // 1 minute
|
|
80
84
|
const count = countInWindow(ctx, "llm-errors", windowMs);
|
|
81
85
|
if (count < threshold)
|
|
82
86
|
return null;
|
|
83
87
|
const fingerprint = `llm-errors:${channel}`;
|
|
88
|
+
const label = event.type === "agent.error" ? "agent error(s)" : "LLM error(s)";
|
|
84
89
|
return {
|
|
85
90
|
type: "alert",
|
|
86
91
|
id: makeAlertId("llm-errors", fingerprint, ctx.now),
|
|
87
92
|
ruleId: "llm-errors",
|
|
88
93
|
severity: "error",
|
|
89
94
|
title: "LLM call errors",
|
|
90
|
-
detail: `${count}
|
|
95
|
+
detail: `${count} ${label} on ${channel} in the last minute.${event.error ? ` Last: ${event.error}` : ""}`,
|
|
91
96
|
ts: ctx.now,
|
|
92
97
|
fingerprint,
|
|
93
98
|
};
|
|
@@ -103,7 +108,7 @@ const sessionStuck = {
|
|
|
103
108
|
return null;
|
|
104
109
|
if (!isRuleEnabled(ctx, "session-stuck"))
|
|
105
110
|
return null;
|
|
106
|
-
|
|
111
|
+
// Stats tracked in evaluator (independent of rule state)
|
|
107
112
|
const ageMs = event.ageMs ?? 0;
|
|
108
113
|
const threshold = getRuleThreshold(ctx, "session-stuck", 120_000);
|
|
109
114
|
if (ageMs < threshold)
|
|
@@ -153,10 +158,8 @@ const heartbeatFail = {
|
|
|
153
158
|
fingerprint,
|
|
154
159
|
};
|
|
155
160
|
}
|
|
156
|
-
// Reset on success
|
|
157
|
-
|
|
158
|
-
ctx.state.consecutives.set(counterKey, 0);
|
|
159
|
-
}
|
|
161
|
+
// Reset on any non-error (success, undefined, etc.)
|
|
162
|
+
ctx.state.consecutives.set(counterKey, 0);
|
|
160
163
|
return null;
|
|
161
164
|
},
|
|
162
165
|
};
|
|
@@ -169,12 +172,12 @@ const queueDepth = {
|
|
|
169
172
|
// Fire on heartbeat (which carries queue depth) and dedicated queue_depth events
|
|
170
173
|
if (event.type !== "infra.heartbeat" && event.type !== "infra.queue_depth")
|
|
171
174
|
return null;
|
|
172
|
-
|
|
173
|
-
return null;
|
|
174
|
-
// Update last heartbeat timestamp (used by gateway-down rule)
|
|
175
|
+
// Always update heartbeat timestamp regardless of rule state (gateway-down depends on it)
|
|
175
176
|
if (event.type === "infra.heartbeat") {
|
|
176
177
|
ctx.state.lastHeartbeatTs = ctx.now;
|
|
177
178
|
}
|
|
179
|
+
if (!isRuleEnabled(ctx, "queue-depth"))
|
|
180
|
+
return null;
|
|
178
181
|
const queued = event.queueDepth ?? 0;
|
|
179
182
|
const threshold = getRuleThreshold(ctx, "queue-depth", 10);
|
|
180
183
|
if (queued < threshold)
|
|
@@ -198,11 +201,15 @@ const highErrorRate = {
|
|
|
198
201
|
defaultCooldownMs: 30 * 60 * 1000,
|
|
199
202
|
defaultThreshold: 50, // percent
|
|
200
203
|
evaluate(event, ctx) {
|
|
201
|
-
if (event.type !== "llm.call")
|
|
204
|
+
if (event.type !== "llm.call" && event.type !== "llm.error" && event.type !== "agent.error")
|
|
202
205
|
return null;
|
|
203
206
|
if (!isRuleEnabled(ctx, "high-error-rate"))
|
|
204
207
|
return null;
|
|
205
|
-
|
|
208
|
+
// agent.error and llm.error are always errors; llm.call checks outcome (timeout counts as error)
|
|
209
|
+
const isError = event.type === "agent.error" ||
|
|
210
|
+
event.type === "llm.error" ||
|
|
211
|
+
event.outcome === "error" ||
|
|
212
|
+
event.outcome === "timeout";
|
|
206
213
|
pushWindow(ctx, "msg-outcomes", { ts: ctx.now, value: isError ? 1 : 0 });
|
|
207
214
|
const window = ctx.state.windows.get("msg-outcomes");
|
|
208
215
|
if (!window || window.length < 20)
|
|
@@ -227,11 +234,41 @@ const highErrorRate = {
|
|
|
227
234
|
};
|
|
228
235
|
},
|
|
229
236
|
};
|
|
237
|
+
// ─── Rule: tool-errors ───────────────────────────────────────────────────
|
|
238
|
+
const toolErrors = {
|
|
239
|
+
id: "tool-errors",
|
|
240
|
+
defaultCooldownMs: 15 * 60 * 1000,
|
|
241
|
+
defaultThreshold: 1, // 1 tool error in 1 minute
|
|
242
|
+
evaluate(event, ctx) {
|
|
243
|
+
if (event.type !== "tool.error")
|
|
244
|
+
return null;
|
|
245
|
+
if (!isRuleEnabled(ctx, "tool-errors"))
|
|
246
|
+
return null;
|
|
247
|
+
pushWindow(ctx, "tool-errors", { ts: ctx.now });
|
|
248
|
+
const threshold = getRuleThreshold(ctx, "tool-errors", 1);
|
|
249
|
+
const windowMs = 60 * 1000; // 1 minute
|
|
250
|
+
const count = countInWindow(ctx, "tool-errors", windowMs);
|
|
251
|
+
if (count < threshold)
|
|
252
|
+
return null;
|
|
253
|
+
const toolName = event.meta?.toolName ?? "unknown";
|
|
254
|
+
const fingerprint = `tool-errors:${toolName}`;
|
|
255
|
+
return {
|
|
256
|
+
type: "alert",
|
|
257
|
+
id: makeAlertId("tool-errors", fingerprint, ctx.now),
|
|
258
|
+
ruleId: "tool-errors",
|
|
259
|
+
severity: "warn",
|
|
260
|
+
title: "Tool errors spike",
|
|
261
|
+
detail: `${count} tool error(s) in the last minute.${event.error ? ` Last: ${event.error}` : ""}`,
|
|
262
|
+
ts: ctx.now,
|
|
263
|
+
fingerprint,
|
|
264
|
+
};
|
|
265
|
+
},
|
|
266
|
+
};
|
|
230
267
|
// ─── Rule: gateway-down ──────────────────────────────────────────────────────
|
|
231
268
|
const gatewayDown = {
|
|
232
269
|
id: "gateway-down",
|
|
233
270
|
defaultCooldownMs: 60 * 60 * 1000,
|
|
234
|
-
defaultThreshold:
|
|
271
|
+
defaultThreshold: 30_000, // 30 seconds
|
|
235
272
|
evaluate(event, ctx) {
|
|
236
273
|
// This rule is called by the watchdog timer, not by events directly.
|
|
237
274
|
if (event.type !== "watchdog.tick")
|
|
@@ -270,5 +307,6 @@ export const ALL_RULES = [
|
|
|
270
307
|
heartbeatFail,
|
|
271
308
|
queueDepth,
|
|
272
309
|
highErrorRate,
|
|
310
|
+
toolErrors,
|
|
273
311
|
gatewayDown,
|
|
274
312
|
];
|
package/dist/core/types.d.ts
CHANGED
|
@@ -51,6 +51,8 @@ export type AlertTarget = {
|
|
|
51
51
|
to: string;
|
|
52
52
|
accountId?: string;
|
|
53
53
|
};
|
|
54
|
+
/** Enriches an alert with LLM-generated summary/action. Returns enriched alert or null to skip. */
|
|
55
|
+
export type AlertEnricher = (alert: AlertEvent) => Promise<AlertEvent | null>;
|
|
54
56
|
export type RuleOverride = {
|
|
55
57
|
enabled?: boolean;
|
|
56
58
|
threshold?: number;
|
|
@@ -65,6 +67,7 @@ export type MonitorConfig = {
|
|
|
65
67
|
maxLogSizeKb?: number;
|
|
66
68
|
maxLogAgeDays?: number;
|
|
67
69
|
quiet?: boolean;
|
|
70
|
+
llmEnriched?: boolean;
|
|
68
71
|
rules?: Record<string, RuleOverride>;
|
|
69
72
|
};
|
|
70
73
|
export type OpenAlertsInitOptions = {
|
|
@@ -80,6 +83,8 @@ export type OpenAlertsInitOptions = {
|
|
|
80
83
|
logPrefix?: string;
|
|
81
84
|
/** Diagnosis hint shown in critical alerts (e.g., 'Run "openclaw doctor"') */
|
|
82
85
|
diagnosisHint?: string;
|
|
86
|
+
/** Optional LLM enricher — adds smart summaries to alerts before dispatch */
|
|
87
|
+
enricher?: AlertEnricher;
|
|
83
88
|
};
|
|
84
89
|
export type OpenAlertsLogger = {
|
|
85
90
|
info: (msg: string) => void;
|
|
@@ -148,5 +153,5 @@ export declare const DEFAULTS: {
|
|
|
148
153
|
readonly pruneIntervalMs: number;
|
|
149
154
|
readonly platformFlushIntervalMs: number;
|
|
150
155
|
readonly platformBatchSize: 100;
|
|
151
|
-
readonly gatewayDownThresholdMs:
|
|
156
|
+
readonly gatewayDownThresholdMs: 30000;
|
|
152
157
|
};
|
package/dist/core/types.js
CHANGED
package/dist/index.js
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import { OpenAlertsEngine } from "./core/index.js";
|
|
2
2
|
import { onDiagnosticEvent, registerLogTransport } from "openclaw/plugin-sdk";
|
|
3
3
|
import { createLogBridge } from "./plugin/log-bridge.js";
|
|
4
|
-
import { OpenClawAlertChannel, parseConfig, resolveAlertTarget, translateOpenClawEvent, translateToolCallHook, translateAgentStartHook, translateAgentEndHook, translateSessionStartHook, translateSessionEndHook, translateMessageSentHook, translateMessageReceivedHook, translateBeforeToolCallHook, translateBeforeCompactionHook, translateAfterCompactionHook, translateMessageSendingHook, translateToolResultPersistHook, translateGatewayStartHook, translateGatewayStopHook, } from "./plugin/adapter.js";
|
|
4
|
+
import { OpenClawAlertChannel, createOpenClawEnricher, parseConfig, resolveAlertTarget, translateOpenClawEvent, translateToolCallHook, translateAgentStartHook, translateAgentEndHook, translateSessionStartHook, translateSessionEndHook, translateMessageSentHook, translateMessageReceivedHook, translateBeforeToolCallHook, translateBeforeCompactionHook, translateAfterCompactionHook, translateMessageSendingHook, translateToolResultPersistHook, translateGatewayStartHook, translateGatewayStopHook, } from "./plugin/adapter.js";
|
|
5
5
|
import { bindEngine, createMonitorCommands } from "./plugin/commands.js";
|
|
6
6
|
import { createDashboardHandler, closeDashboardConnections, } from "./plugin/dashboard-routes.js";
|
|
7
7
|
const PLUGIN_ID = "openalerts";
|
|
@@ -13,12 +13,16 @@ let logBridgeCleanup = null;
|
|
|
13
13
|
function createMonitorService(api) {
|
|
14
14
|
return {
|
|
15
15
|
id: PLUGIN_ID,
|
|
16
|
-
start(ctx) {
|
|
16
|
+
async start(ctx) {
|
|
17
17
|
const logger = ctx.logger;
|
|
18
18
|
const config = parseConfig(api.pluginConfig);
|
|
19
19
|
// Resolve alert target + create OpenClaw alert channel
|
|
20
|
-
const target = resolveAlertTarget(api, config);
|
|
20
|
+
const target = await resolveAlertTarget(api, config);
|
|
21
21
|
const channels = target ? [new OpenClawAlertChannel(api, target)] : [];
|
|
22
|
+
// Create LLM enricher if enabled (default: true)
|
|
23
|
+
const enricher = config.llmEnriched !== false
|
|
24
|
+
? createOpenClawEnricher(api, logger)
|
|
25
|
+
: null;
|
|
22
26
|
// Create and start the universal engine
|
|
23
27
|
engine = new OpenAlertsEngine({
|
|
24
28
|
stateDir: ctx.stateDir,
|
|
@@ -27,6 +31,7 @@ function createMonitorService(api) {
|
|
|
27
31
|
logger,
|
|
28
32
|
logPrefix: LOG_PREFIX,
|
|
29
33
|
diagnosisHint: 'Run "openclaw doctor" to diagnose.',
|
|
34
|
+
enricher: enricher ?? undefined,
|
|
30
35
|
});
|
|
31
36
|
engine.start();
|
|
32
37
|
// Wire commands to engine
|
|
@@ -169,7 +174,7 @@ function createMonitorService(api) {
|
|
|
169
174
|
const targetDesc = target
|
|
170
175
|
? `alerting to ${target.channel}:${target.to}`
|
|
171
176
|
: "log-only (no alert channel detected)";
|
|
172
|
-
logger.info(`${LOG_PREFIX}: started, ${targetDesc}, log-bridge active,
|
|
177
|
+
logger.info(`${LOG_PREFIX}: started, ${targetDesc}, log-bridge active, 8 rules active`);
|
|
173
178
|
},
|
|
174
179
|
stop() {
|
|
175
180
|
closeDashboardConnections();
|
package/dist/plugin/adapter.d.ts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import type { AlertChannel, AlertEvent, AlertTarget, MonitorConfig, OpenAlertsEvent } from "../core/index.js";
|
|
1
|
+
import type { AlertChannel, AlertEnricher, AlertEvent, AlertTarget, MonitorConfig, OpenAlertsEvent } from "../core/index.js";
|
|
2
2
|
import type { OpenClawPluginApi } from "openclaw/plugin-sdk";
|
|
3
3
|
/**
|
|
4
4
|
* Translate an OpenClaw diagnostic event into a universal OpenAlertsEvent.
|
|
@@ -128,11 +128,23 @@ export declare class OpenClawAlertChannel implements AlertChannel {
|
|
|
128
128
|
readonly name: string;
|
|
129
129
|
private api;
|
|
130
130
|
private target;
|
|
131
|
+
private warnedMissing;
|
|
131
132
|
constructor(api: OpenClawPluginApi, target: AlertTarget);
|
|
132
133
|
send(alert: AlertEvent, formatted: string): Promise<void>;
|
|
133
134
|
}
|
|
134
135
|
/**
|
|
135
136
|
* Resolve the alert target from plugin config or by auto-detecting from OpenClaw config.
|
|
136
137
|
*/
|
|
137
|
-
export declare function resolveAlertTarget(api: OpenClawPluginApi, pluginConfig: MonitorConfig): AlertTarget | null
|
|
138
|
+
export declare function resolveAlertTarget(api: OpenClawPluginApi, pluginConfig: MonitorConfig): Promise<AlertTarget | null>;
|
|
138
139
|
export declare function parseConfig(raw: Record<string, unknown> | undefined): MonitorConfig;
|
|
140
|
+
/**
|
|
141
|
+
* Create an AlertEnricher from the OpenClaw plugin API.
|
|
142
|
+
* Reads the model from api.config.agents.defaults.model.primary (e.g. "openai/gpt-5-nano")
|
|
143
|
+
* and resolves the API key from process.env.
|
|
144
|
+
* Returns null if no model is configured or enricher can't be created.
|
|
145
|
+
*/
|
|
146
|
+
export declare function createOpenClawEnricher(api: OpenClawPluginApi, logger?: {
|
|
147
|
+
info: (msg: string) => void;
|
|
148
|
+
warn: (msg: string) => void;
|
|
149
|
+
error: (msg: string) => void;
|
|
150
|
+
}): AlertEnricher | null;
|
package/dist/plugin/adapter.js
CHANGED
|
@@ -1,3 +1,7 @@
|
|
|
1
|
+
import { createLlmEnricher } from "../core/llm-enrichment.js";
|
|
2
|
+
import { readFile } from "node:fs/promises";
|
|
3
|
+
import { join } from "node:path";
|
|
4
|
+
import { homedir } from "node:os";
|
|
1
5
|
// ─── Diagnostic Event Translation ───────────────────────────────────────────
|
|
2
6
|
//
|
|
3
7
|
// OpenClaw emits 12 diagnostic event types through onDiagnosticEvent():
|
|
@@ -230,7 +234,7 @@ export function translateMessageSentHook(data, context) {
|
|
|
230
234
|
sessionKey: context.sessionId,
|
|
231
235
|
outcome: data.success ? "success" : "error",
|
|
232
236
|
error: data.error,
|
|
233
|
-
meta: { to: data.to, source: "hook:message_sent" },
|
|
237
|
+
meta: { to: data.to, content: data.content, source: "hook:message_sent" },
|
|
234
238
|
};
|
|
235
239
|
}
|
|
236
240
|
/** Translate gateway_start hook data into OpenAlertsEvent. */
|
|
@@ -261,6 +265,7 @@ export function translateMessageReceivedHook(data, context) {
|
|
|
261
265
|
outcome: "success",
|
|
262
266
|
meta: {
|
|
263
267
|
from: data.from,
|
|
268
|
+
content: data.content,
|
|
264
269
|
accountId: context.accountId,
|
|
265
270
|
openclawHook: "message_received",
|
|
266
271
|
source: "hook:message_received",
|
|
@@ -310,6 +315,7 @@ export function translateAfterCompactionHook(data, context) {
|
|
|
310
315
|
messageCount: data.messageCount,
|
|
311
316
|
tokenCount: data.tokenCount,
|
|
312
317
|
compactedCount: data.compactedCount,
|
|
318
|
+
compaction: true,
|
|
313
319
|
openclawHook: "after_compaction",
|
|
314
320
|
source: "hook:after_compaction",
|
|
315
321
|
},
|
|
@@ -324,6 +330,7 @@ export function translateMessageSendingHook(data, context) {
|
|
|
324
330
|
outcome: "success",
|
|
325
331
|
meta: {
|
|
326
332
|
to: data.to,
|
|
333
|
+
content: data.content,
|
|
327
334
|
accountId: context.accountId,
|
|
328
335
|
openclawHook: "message_sending",
|
|
329
336
|
source: "hook:message_sending",
|
|
@@ -356,6 +363,7 @@ export class OpenClawAlertChannel {
|
|
|
356
363
|
name;
|
|
357
364
|
api;
|
|
358
365
|
target;
|
|
366
|
+
warnedMissing = false;
|
|
359
367
|
constructor(api, target) {
|
|
360
368
|
this.api = api;
|
|
361
369
|
this.target = target;
|
|
@@ -364,8 +372,13 @@ export class OpenClawAlertChannel {
|
|
|
364
372
|
async send(alert, formatted) {
|
|
365
373
|
const runtime = this.api.runtime;
|
|
366
374
|
const channel = runtime.channel;
|
|
367
|
-
if (!channel)
|
|
375
|
+
if (!channel) {
|
|
376
|
+
if (!this.warnedMissing) {
|
|
377
|
+
this.warnedMissing = true;
|
|
378
|
+
throw new Error(`runtime.channel not available — alert dropped`);
|
|
379
|
+
}
|
|
368
380
|
return;
|
|
381
|
+
}
|
|
369
382
|
const opts = this.target.accountId
|
|
370
383
|
? { accountId: this.target.accountId }
|
|
371
384
|
: {};
|
|
@@ -377,20 +390,22 @@ export class OpenClawAlertChannel {
|
|
|
377
390
|
signal: "sendMessageSignal",
|
|
378
391
|
};
|
|
379
392
|
const methodName = channelMethods[this.target.channel];
|
|
380
|
-
if (!methodName)
|
|
381
|
-
|
|
393
|
+
if (!methodName) {
|
|
394
|
+
throw new Error(`unsupported channel "${this.target.channel}" — no send method mapped`);
|
|
395
|
+
}
|
|
382
396
|
const channelMod = channel[this.target.channel];
|
|
383
397
|
const sendFn = channelMod?.[methodName];
|
|
384
|
-
if (sendFn) {
|
|
385
|
-
|
|
398
|
+
if (!sendFn) {
|
|
399
|
+
throw new Error(`${this.target.channel}.${methodName} not found on runtime — alert dropped`);
|
|
386
400
|
}
|
|
401
|
+
await sendFn(this.target.to, formatted, opts);
|
|
387
402
|
}
|
|
388
403
|
}
|
|
389
404
|
// ─── Alert Target Resolution ────────────────────────────────────────────────
|
|
390
405
|
/**
|
|
391
406
|
* Resolve the alert target from plugin config or by auto-detecting from OpenClaw config.
|
|
392
407
|
*/
|
|
393
|
-
export function resolveAlertTarget(api, pluginConfig) {
|
|
408
|
+
export async function resolveAlertTarget(api, pluginConfig) {
|
|
394
409
|
// 1. Explicit config
|
|
395
410
|
if (pluginConfig.alertChannel && pluginConfig.alertTo) {
|
|
396
411
|
return {
|
|
@@ -400,7 +415,8 @@ export function resolveAlertTarget(api, pluginConfig) {
|
|
|
400
415
|
};
|
|
401
416
|
}
|
|
402
417
|
const cfg = api.config;
|
|
403
|
-
|
|
418
|
+
const channelsCfg = cfg.channels ??
|
|
419
|
+
{};
|
|
404
420
|
const channelKeys = [
|
|
405
421
|
"telegram",
|
|
406
422
|
"discord",
|
|
@@ -408,14 +424,33 @@ export function resolveAlertTarget(api, pluginConfig) {
|
|
|
408
424
|
"whatsapp",
|
|
409
425
|
"signal",
|
|
410
426
|
];
|
|
427
|
+
// 2. Auto-detect from static allowFrom in channel config
|
|
411
428
|
for (const channelKey of channelKeys) {
|
|
412
|
-
const channelConfig =
|
|
429
|
+
const channelConfig = channelsCfg[channelKey];
|
|
413
430
|
if (!channelConfig || typeof channelConfig !== "object")
|
|
414
431
|
continue;
|
|
415
432
|
const target = extractFirstAllowFrom(channelKey, channelConfig);
|
|
416
433
|
if (target)
|
|
417
434
|
return target;
|
|
418
435
|
}
|
|
436
|
+
// 3. Auto-detect from pairing store (runtime-paired users)
|
|
437
|
+
// The store lives at ~/.openclaw/credentials/<channel>-allowFrom.json
|
|
438
|
+
const credDir = join(process.env.OPENCLAW_HOME ?? join(homedir(), ".openclaw"), "credentials");
|
|
439
|
+
for (const channelKey of channelKeys) {
|
|
440
|
+
const channelConfig = channelsCfg[channelKey];
|
|
441
|
+
if (!channelConfig || typeof channelConfig !== "object")
|
|
442
|
+
continue;
|
|
443
|
+
try {
|
|
444
|
+
const raw = await readFile(join(credDir, `${channelKey}-allowFrom.json`), "utf-8");
|
|
445
|
+
const data = JSON.parse(raw);
|
|
446
|
+
if (Array.isArray(data.allowFrom) && data.allowFrom.length > 0) {
|
|
447
|
+
return { channel: channelKey, to: String(data.allowFrom[0]) };
|
|
448
|
+
}
|
|
449
|
+
}
|
|
450
|
+
catch {
|
|
451
|
+
// File doesn't exist or isn't valid — skip this channel
|
|
452
|
+
}
|
|
453
|
+
}
|
|
419
454
|
return null;
|
|
420
455
|
}
|
|
421
456
|
function extractFirstAllowFrom(channel, channelConfig) {
|
|
@@ -451,8 +486,34 @@ export function parseConfig(raw) {
|
|
|
451
486
|
maxLogSizeKb: typeof raw.maxLogSizeKb === "number" ? raw.maxLogSizeKb : undefined,
|
|
452
487
|
maxLogAgeDays: typeof raw.maxLogAgeDays === "number" ? raw.maxLogAgeDays : undefined,
|
|
453
488
|
quiet: typeof raw.quiet === "boolean" ? raw.quiet : undefined,
|
|
489
|
+
llmEnriched: typeof raw.llmEnriched === "boolean" ? raw.llmEnriched : undefined,
|
|
454
490
|
rules: raw.rules && typeof raw.rules === "object"
|
|
455
491
|
? raw.rules
|
|
456
492
|
: undefined,
|
|
457
493
|
};
|
|
458
494
|
}
|
|
495
|
+
// ─── LLM Enricher Factory ───────────────────────────────────────────────────
|
|
496
|
+
/**
|
|
497
|
+
* Create an AlertEnricher from the OpenClaw plugin API.
|
|
498
|
+
* Reads the model from api.config.agents.defaults.model.primary (e.g. "openai/gpt-5-nano")
|
|
499
|
+
* and resolves the API key from process.env.
|
|
500
|
+
* Returns null if no model is configured or enricher can't be created.
|
|
501
|
+
*/
|
|
502
|
+
export function createOpenClawEnricher(api, logger) {
|
|
503
|
+
try {
|
|
504
|
+
const cfg = api.config;
|
|
505
|
+
const agents = cfg.agents;
|
|
506
|
+
const defaults = agents?.defaults;
|
|
507
|
+
const model = defaults?.model;
|
|
508
|
+
const primary = model?.primary;
|
|
509
|
+
if (typeof primary !== "string" || !primary.includes("/")) {
|
|
510
|
+
logger?.warn("openalerts: llm-enrichment skipped — no model configured at agents.defaults.model.primary");
|
|
511
|
+
return null;
|
|
512
|
+
}
|
|
513
|
+
return createLlmEnricher({ modelString: primary, logger });
|
|
514
|
+
}
|
|
515
|
+
catch (err) {
|
|
516
|
+
logger?.warn(`openalerts: llm-enrichment setup failed: ${String(err)}`);
|
|
517
|
+
return null;
|
|
518
|
+
}
|
|
519
|
+
}
|
package/dist/plugin/commands.js
CHANGED
|
@@ -29,7 +29,7 @@ export function createMonitorCommands(api) {
|
|
|
29
29
|
handler: () => handleDashboard(),
|
|
30
30
|
},
|
|
31
31
|
{
|
|
32
|
-
name: "
|
|
32
|
+
name: "test_alert",
|
|
33
33
|
description: "Send a test alert to verify alert delivery",
|
|
34
34
|
acceptsArgs: false,
|
|
35
35
|
handler: () => handleTestAlert(),
|
|
@@ -72,26 +72,7 @@ function handleTestAlert() {
|
|
|
72
72
|
if (!_engine) {
|
|
73
73
|
return { text: "OpenAlerts not initialized yet. Wait for gateway startup." };
|
|
74
74
|
}
|
|
75
|
-
|
|
76
|
-
// This won't fire an actual alert unless the threshold (3 errors) is reached,
|
|
77
|
-
// so we fire a one-off test alert directly through the engine.
|
|
78
|
-
const testEvent = {
|
|
79
|
-
type: "alert",
|
|
80
|
-
id: `test:manual:${Date.now()}`,
|
|
81
|
-
ruleId: "test",
|
|
82
|
-
severity: "info",
|
|
83
|
-
title: "Test alert — delivery verified",
|
|
84
|
-
detail: "This is a test alert from /test-alert. If you see this, alert delivery is working.",
|
|
85
|
-
ts: Date.now(),
|
|
86
|
-
fingerprint: `test:manual`,
|
|
87
|
-
};
|
|
88
|
-
// Ingest as a custom event so it appears in the dashboard
|
|
89
|
-
_engine.ingest({
|
|
90
|
-
type: "custom",
|
|
91
|
-
ts: Date.now(),
|
|
92
|
-
outcome: "success",
|
|
93
|
-
meta: { openclawLog: "test_alert", source: "command:test-alert" },
|
|
94
|
-
});
|
|
75
|
+
_engine.sendTestAlert();
|
|
95
76
|
return {
|
|
96
77
|
text: "Test alert sent. Check your alert channel (Telegram/Discord/etc) for delivery confirmation.\n\nIf you don't receive it, check /health for channel status.",
|
|
97
78
|
};
|
|
@@ -423,6 +423,8 @@ export function getDashboardHtml() {
|
|
|
423
423
|
var ft=ev.type||'?';
|
|
424
424
|
if(ft==='custom'&&m.openclawEventType==='session.state')ft='session.'+(m.sessionState||'state');
|
|
425
425
|
if(ft==='custom'&&m.openclawEventType==='message_sent')ft='msg.delivered';
|
|
426
|
+
if(ft==='custom'&&m.openclawHook==='message_received')ft='msg.in';
|
|
427
|
+
if(ft==='custom'&&m.openclawHook==='message_sending')ft='msg.out';
|
|
426
428
|
|
|
427
429
|
var h='<div class="r-main">';
|
|
428
430
|
h+='<span class="r-time">'+fT(ev.ts)+'</span>';
|
|
@@ -439,6 +441,7 @@ export function getDashboardHtml() {
|
|
|
439
441
|
if(m.model)h+='<span class="p m">'+esc(String(m.model))+'</span>';
|
|
440
442
|
if(ev.channel)h+='<span class="p ch">'+esc(ev.channel)+'</span>';
|
|
441
443
|
if(m.messageCount!=null)h+='<span class="p">'+m.messageCount+' msgs</span>';
|
|
444
|
+
if(m.content){var preview=String(m.content);if(preview.length>60)preview=preview.slice(0,57)+'...';h+='<span class="p">'+esc(preview)+'</span>'}
|
|
442
445
|
if(m.source&&String(m.source)!=='simulate')h+='<span class="p s">'+esc(String(m.source))+'</span>';
|
|
443
446
|
h+='</span></div>';
|
|
444
447
|
|
|
@@ -596,17 +599,21 @@ export function getDashboardHtml() {
|
|
|
596
599
|
// ─── SSE (OpenAlerts events + OpenClaw log tailing) ──────────────────────
|
|
597
600
|
function connectSSE(){
|
|
598
601
|
if(evSrc)evSrc.close();
|
|
599
|
-
|
|
600
|
-
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
|
|
602
|
+
try{
|
|
603
|
+
evSrc=new EventSource('/openalerts/events');
|
|
604
|
+
evSrc.addEventListener('openalerts',function(e){try{addEvent(JSON.parse(e.data))}catch(_){}});
|
|
605
|
+
evSrc.addEventListener('history',function(e){try{var evs=JSON.parse(e.data);for(var i=0;i<evs.length;i++)addEvent(evs[i])}catch(_){}});
|
|
606
|
+
evSrc.addEventListener('oclog',function(e){try{addLogEntry(JSON.parse(e.data))}catch(_){}});
|
|
607
|
+
evSrc.onopen=function(){$('sDot').className='dot live';$('sConn').textContent='live'};
|
|
608
|
+
evSrc.onerror=function(e){$('sDot').className='dot dead';$('sConn').textContent='err:'+evSrc.readyState};
|
|
609
|
+
}catch(e){$('sConn').textContent='SSE fail:'+e.message}
|
|
604
610
|
}
|
|
605
611
|
|
|
606
612
|
// ─── State polling ──────────────────────
|
|
607
613
|
var prevAl={};
|
|
608
614
|
function pollState(){
|
|
609
|
-
fetch('/openalerts/state').then(function(r){return r.json()}).then(function(s){
|
|
615
|
+
fetch('/openalerts/state').then(function(r){if(!r.ok)throw new Error('HTTP '+r.status);return r.json()}).catch(function(e){$('sUp').textContent='fetch err: '+e.message;return null}).then(function(s){
|
|
616
|
+
if(!s)return;
|
|
610
617
|
if(s.stats){
|
|
611
618
|
$('sMsgs').textContent=s.stats.messagesProcessed||0;
|
|
612
619
|
$('sErr').textContent=(s.stats.messageErrors||0)+(s.stats.webhookErrors||0)+(s.stats.toolErrors||0);
|
|
@@ -736,7 +743,7 @@ export function getDashboardHtml() {
|
|
|
736
743
|
var btn=row.querySelector('.log-copy');
|
|
737
744
|
if(btn)lines.push(btn.getAttribute('data-raw'));
|
|
738
745
|
});
|
|
739
|
-
var blob=new Blob([lines.join('
|
|
746
|
+
var blob=new Blob([lines.join('\\n')],{type:'text/plain'});
|
|
740
747
|
var url=URL.createObjectURL(blob);
|
|
741
748
|
var a=document.createElement('a');
|
|
742
749
|
a.href=url;a.download='openalerts-logs-'+Date.now()+'.txt';
|
|
@@ -26,14 +26,25 @@ const RULE_IDS = [
|
|
|
26
26
|
"heartbeat-fail",
|
|
27
27
|
"queue-depth",
|
|
28
28
|
"high-error-rate",
|
|
29
|
+
"tool-errors",
|
|
29
30
|
"gateway-down",
|
|
30
31
|
];
|
|
31
32
|
function getRuleStatuses(engine) {
|
|
32
33
|
const state = engine.state;
|
|
33
34
|
const now = Date.now();
|
|
35
|
+
const cooldownWindow = 15 * 60 * 1000;
|
|
34
36
|
return RULE_IDS.map((id) => {
|
|
35
|
-
|
|
36
|
-
|
|
37
|
+
// Cooldown keys are fingerprints like "llm-errors:unknown", not bare rule IDs.
|
|
38
|
+
// Check if ANY cooldown key starting with this rule ID has fired recently.
|
|
39
|
+
let fired = false;
|
|
40
|
+
for (const [key, ts] of state.cooldowns) {
|
|
41
|
+
if (key === id || key.startsWith(id + ":")) {
|
|
42
|
+
if (ts > now - cooldownWindow) {
|
|
43
|
+
fired = true;
|
|
44
|
+
break;
|
|
45
|
+
}
|
|
46
|
+
}
|
|
47
|
+
}
|
|
37
48
|
return { id, status: fired ? "fired" : "ok" };
|
|
38
49
|
});
|
|
39
50
|
}
|
|
@@ -208,6 +219,20 @@ export function createDashboardHandler(getEngine) {
|
|
|
208
219
|
"Access-Control-Allow-Origin": "*",
|
|
209
220
|
});
|
|
210
221
|
res.flushHeaders();
|
|
222
|
+
// Send initial connection event so the browser knows the stream is live
|
|
223
|
+
res.write(`:ok\n\n`);
|
|
224
|
+
// Send current state snapshot as initial event
|
|
225
|
+
const state = engine.state;
|
|
226
|
+
res.write(`event: state\ndata: ${JSON.stringify({
|
|
227
|
+
uptimeMs: Date.now() - state.startedAt,
|
|
228
|
+
stats: state.stats,
|
|
229
|
+
rules: getRuleStatuses(engine),
|
|
230
|
+
})}\n\n`);
|
|
231
|
+
// Send event history so dashboard survives refreshes
|
|
232
|
+
const history = engine.getRecentLiveEvents(200);
|
|
233
|
+
if (history.length > 0) {
|
|
234
|
+
res.write(`event: history\ndata: ${JSON.stringify(history)}\n\n`);
|
|
235
|
+
}
|
|
211
236
|
// Subscribe to engine events
|
|
212
237
|
const unsub = engine.bus.on((event) => {
|
|
213
238
|
try {
|
|
@@ -272,6 +272,30 @@ export function createLogBridge(engine) {
|
|
|
272
272
|
},
|
|
273
273
|
});
|
|
274
274
|
}
|
|
275
|
+
// ── Lane task error (diagnostic) ────────────────────────────────────────────
|
|
276
|
+
// Safety net: catches lane-level errors from diagnostic logs.
|
|
277
|
+
// The agent_end hook already covers agent errors → llm-errors rule.
|
|
278
|
+
// This emits as infra.error to avoid double-counting in the llm-errors window
|
|
279
|
+
// while still ensuring infra-errors fires if the hook path fails.
|
|
280
|
+
// Format: "lane task error: lane=main durationMs=1 error="Error: ...""
|
|
281
|
+
function handleLaneTaskError(rec) {
|
|
282
|
+
const { lane, error: errorMsg } = rec.kvs;
|
|
283
|
+
const dedupeKey = `lane-error:${lane}:${rec.ts}`;
|
|
284
|
+
if (dedupeSet.has(dedupeKey))
|
|
285
|
+
return;
|
|
286
|
+
dedupeSet.add(dedupeKey);
|
|
287
|
+
ingest({
|
|
288
|
+
type: "infra.error",
|
|
289
|
+
ts: rec.ts,
|
|
290
|
+
outcome: "error",
|
|
291
|
+
error: errorMsg,
|
|
292
|
+
meta: {
|
|
293
|
+
lane,
|
|
294
|
+
source: "log-bridge",
|
|
295
|
+
openclawLog: "lane_task_error",
|
|
296
|
+
},
|
|
297
|
+
});
|
|
298
|
+
}
|
|
275
299
|
// ── Exec command (exec) ────────────────────────────────────────────────────
|
|
276
300
|
function handleExecCommand(rec) {
|
|
277
301
|
pendingCommand = rec.message;
|
|
@@ -312,6 +336,9 @@ export function createLogBridge(engine) {
|
|
|
312
336
|
if (msg.startsWith("session state:")) {
|
|
313
337
|
handleSessionState(rec);
|
|
314
338
|
}
|
|
339
|
+
else if (msg.startsWith("lane task error:")) {
|
|
340
|
+
handleLaneTaskError(rec);
|
|
341
|
+
}
|
|
315
342
|
}
|
|
316
343
|
else if (rec.subsystem === "exec") {
|
|
317
344
|
if (msg.startsWith("elevated command")) {
|