@steadwing/openalerts 0.2.0 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -37,7 +37,9 @@ openclaw plugins install @steadwing/openalerts
37
37
 
38
38
  ### 2. Configure
39
39
 
40
- Add to your `openclaw.json`:
40
+ If you already have a channel paired with OpenClaw (e.g. Telegram via `openclaw pair`), **no config is needed** — OpenAlerts auto-detects where to send alerts.
41
+
42
+ Otherwise, set it explicitly in `openclaw.json`:
41
43
 
42
44
  ```jsonc
43
45
  {
@@ -55,69 +57,128 @@ Add to your `openclaw.json`:
55
57
  }
56
58
  ```
57
59
 
60
+ **Auto-detection priority:** explicit config > static `allowFrom` in channel config > pairing store.
61
+
58
62
  ### 3. Restart & verify
59
63
 
60
64
  ```bash
61
65
  openclaw gateway stop && openclaw gateway run
62
66
  ```
63
67
 
68
+
64
69
  Send `/health` to your bot. You should get a live status report back — zero LLM tokens consumed.
65
70
 
66
71
  That's it. OpenAlerts is now watching your agent.
67
72
 
73
+ ## Dashboard
74
+
75
+ A real-time web dashboard is embedded in the gateway at:
76
+
77
+ ```
78
+ http://127.0.0.1:18789/openalerts
79
+ ```
80
+
81
+ - **Activity** — Live event timeline with session flows, tool calls, LLM usage
82
+ - **System Logs** — Filtered, structured logs with search
83
+ - **Health** — Rule status, alert history, system stats
84
+
68
85
  ## Alert Rules
69
86
 
70
- Seven rules run against every event in real-time:
87
+ Eight rules run against every event in real-time:
71
88
 
72
89
  | Rule | Watches for | Severity |
73
90
  |---|---|---|
74
- | **llm-errors** | 3+ LLM failures in 5 minutes | ERROR |
75
- | **infra-errors** | 3+ infrastructure errors in 5 minutes | ERROR |
76
- | **gateway-down** | No heartbeat for 90+ seconds | CRITICAL |
91
+ | **llm-errors** | 1+ LLM/agent failure in 1 minute | ERROR |
92
+ | **infra-errors** | 1+ infrastructure error in 1 minute | ERROR |
93
+ | **gateway-down** | No heartbeat for 30+ seconds | CRITICAL |
77
94
  | **session-stuck** | Session idle for 120+ seconds | WARN |
78
95
  | **high-error-rate** | 50%+ of last 20 messages failed | ERROR |
79
96
  | **queue-depth** | 10+ items queued | WARN |
97
+ | **tool-errors** | 1+ tool failure in 1 minute | WARN |
80
98
  | **heartbeat-fail** | 3 consecutive heartbeat failures | ERROR |
81
99
 
82
- All thresholds and cooldowns are [configurable per-rule](#configuration).
100
+ All thresholds and cooldowns are [configurable per-rule](#advanced-configuration).
83
101
 
84
- ## Configuration
102
+ ## LLM-Enriched Alerts
85
103
 
86
- Full config reference under `plugins.entries.openalerts.config`:
104
+ By default, OpenAlerts uses your configured LLM model to enrich alerts with a human-friendly summary and an actionable suggestion. The enrichment is appended below the original alert detail:
105
+
106
+ ```
107
+ 1 agent error(s) on unknown in the last minute. Last: 401 Incorrect API key...
108
+
109
+ Summary: Your OpenAI API key is invalid or expired — the agent cannot make LLM calls.
110
+ Action: Update your API key in ~/.openclaw/.env with a valid key from platform.openai.com/api-keys
111
+ ```
112
+
113
+ - **Model**: reads from `agents.defaults.model.primary` in your `openclaw.json` (e.g. `"openai/gpt-4o-mini"`)
114
+ - **API key**: reads from the corresponding environment variable (`OPENAI_API_KEY`, `ANTHROPIC_API_KEY`, `GROQ_API_KEY`, etc.)
115
+ - **Supported providers**: OpenAI, Anthropic, Groq, Together, DeepSeek (and any OpenAI-compatible API)
116
+ - **Graceful fallback**: if the LLM call fails or times out (10s), the original alert is sent unchanged
117
+
118
+ To disable LLM enrichment, set `"llmEnriched": false` in your plugin config:
87
119
 
88
120
  ```jsonc
89
121
  {
90
- "alertChannel": "telegram", // telegram | discord | slack | whatsapp | signal
91
- "alertTo": "YOUR_CHAT_ID", // chat/user ID on that channel
92
- "cooldownMinutes": 15, // minutes between repeated alerts (default: 15)
93
- "quiet": false, // true = log only, no messages sent
94
-
95
- "rules": {
96
- "gateway-down": {
97
- "threshold": 120000 // override: 2 min instead of 90s
98
- },
99
- "high-error-rate": {
100
- "enabled": false // disable a rule entirely
101
- },
102
- "llm-errors": {
103
- "threshold": 5, // require 5 errors instead of 3
104
- "cooldownMinutes": 30 // longer cooldown for this rule
122
+ "plugins": {
123
+ "entries": {
124
+ "openalerts": {
125
+ "config": {
126
+ "llmEnriched": false
127
+ }
128
+ }
105
129
  }
106
130
  }
107
131
  }
108
132
  ```
109
133
 
110
- ## Dashboard
134
+ ## Advanced Configuration
111
135
 
112
- A real-time web dashboard is embedded in the gateway at:
136
+ Each rule can be individually tuned or disabled. You can also set global options like `cooldownMinutes` (default: `15`) and `quiet: true` for log-only mode.
113
137
 
138
+ **Step 1.** Add a `rules` object inside `plugins.entries.openalerts.config` in your `~/.openclaw/openclaw.json`:
139
+
140
+ ```jsonc
141
+ {
142
+ "plugins": {
143
+ "entries": {
144
+ "openalerts": {
145
+ "enabled": true,
146
+ "config": {
147
+ "rules": {
148
+ "llm-errors": { "threshold": 5 },
149
+ "infra-errors": { "cooldownMinutes": 30 },
150
+ "high-error-rate": { "enabled": false },
151
+ "gateway-down": { "threshold": 60000 }
152
+ }
153
+ }
154
+ }
155
+ }
156
+ }
157
+ }
114
158
  ```
115
- http://127.0.0.1:18789/openalerts
159
+
160
+ **Step 2.** Restart the gateway to apply:
161
+
162
+ ```bash
163
+ openclaw gateway stop && openclaw gateway run
116
164
  ```
117
165
 
118
- - **Activity** — Live event timeline with session flows, tool calls, LLM usage
119
- - **System Logs** — Filtered, structured logs with search
120
- - **Health** Rule status, alert history, system stats
166
+ ### Rule reference
167
+
168
+ | Rule | `threshold` unit | Default |
169
+ |---|---|---|
170
+ | `llm-errors` | Error count in 1 min window | `1` |
171
+ | `infra-errors` | Error count in 1 min window | `1` |
172
+ | `gateway-down` | Milliseconds without heartbeat | `30000` (30s) |
173
+ | `session-stuck` | Milliseconds idle | `120000` (2 min) |
174
+ | `high-error-rate` | Error percentage (0-100) | `50` |
175
+ | `queue-depth` | Number of queued items | `10` |
176
+ | `tool-errors` | Error count in 1 min window | `1` |
177
+ | `heartbeat-fail` | Consecutive failures | `3` |
178
+
179
+ Every rule also accepts:
180
+ - **`enabled`** — `false` to disable the rule (default: `true`)
181
+ - **`cooldownMinutes`** — minutes before the same rule can fire again (default: `15`)
121
182
 
122
183
  ## Commands
123
184
 
@@ -129,18 +190,6 @@ Zero-token chat commands available in any connected channel:
129
190
  | `/alerts` | Recent alert history with severity and timestamps |
130
191
  | `/dashboard` | Returns the dashboard URL |
131
192
 
132
- ## Architecture
133
-
134
- ```
135
- src/core/ Framework-agnostic engine, zero dependencies
136
- Rules engine, evaluator, event bus, state store, formatter
137
-
138
- src/plugin/ OpenClaw adapter plugin
139
- Event translation, alert routing, dashboard, chat commands
140
- ```
141
-
142
- Everything ships as a single `@steadwing/openalerts` package. The core is completely framework-agnostic — adding monitoring for a new framework only requires writing an adapter.
143
-
144
193
  ## Development
145
194
 
146
195
  ```bash
@@ -13,11 +13,14 @@ export declare class OpenAlertsEngine {
13
13
  private stateDir;
14
14
  private dispatcher;
15
15
  private platform;
16
+ private enricher;
16
17
  private logger;
17
18
  private logPrefix;
18
19
  private watchdogTimer;
19
20
  private pruneTimer;
20
21
  private running;
22
+ private eventRing;
23
+ private static readonly RING_MAX;
21
24
  constructor(options: OpenAlertsInitOptions);
22
25
  /** Start the engine: warm from history, start timers. */
23
26
  start(): void;
@@ -30,12 +33,16 @@ export declare class OpenAlertsEngine {
30
33
  readonly name: string;
31
34
  send(alert: AlertEvent, formatted: string): Promise<void> | void;
32
35
  }): void;
36
+ /** Fire a test alert to verify delivery. */
37
+ sendTestAlert(): void;
33
38
  /** Whether the platform sync is connected. */
34
39
  get platformConnected(): boolean;
35
40
  /** Whether the engine is running. */
36
41
  get isRunning(): boolean;
37
42
  /** Read recent stored events (for /alerts command). */
38
43
  getRecentEvents(limit?: number): StoredEvent[];
44
+ /** Get recent full events from the in-memory ring buffer (for dashboard history). */
45
+ getRecentLiveEvents(limit?: number): OpenAlertsEvent[];
39
46
  private handleEvent;
40
47
  private fireAlert;
41
48
  }
@@ -17,14 +17,18 @@ export class OpenAlertsEngine {
17
17
  stateDir;
18
18
  dispatcher;
19
19
  platform = null;
20
+ enricher;
20
21
  logger;
21
22
  logPrefix;
22
23
  watchdogTimer = null;
23
24
  pruneTimer = null;
24
25
  running = false;
26
+ eventRing = [];
27
+ static RING_MAX = 500;
25
28
  constructor(options) {
26
29
  this.config = options.config;
27
30
  this.stateDir = options.stateDir;
31
+ this.enricher = options.enricher ?? null;
28
32
  this.logger = options.logger ?? console;
29
33
  this.logPrefix = options.logPrefix ?? "openalerts";
30
34
  this.bus = new OpenAlertsEventBus();
@@ -64,7 +68,9 @@ export class OpenAlertsEngine {
64
68
  this.watchdogTimer = setInterval(() => {
65
69
  const alerts = processWatchdogTick(this.state, this.config);
66
70
  for (const alert of alerts) {
67
- this.fireAlert(alert);
71
+ void this.fireAlert(alert).catch((err) => {
72
+ this.logger.error(`${this.logPrefix}: watchdog alert failed: ${String(err)}`);
73
+ });
68
74
  }
69
75
  }, DEFAULTS.watchdogIntervalMs);
70
76
  // Prune timer (cleans old log entries every 6h)
@@ -82,7 +88,7 @@ export class OpenAlertsEngine {
82
88
  const channelNames = this.dispatcher.hasChannels
83
89
  ? `${this.dispatcher.channelCount} channel(s)`
84
90
  : "log-only (no alert channels)";
85
- this.logger.info(`${this.logPrefix}: started, ${channelNames}, 7 rules active`);
91
+ this.logger.info(`${this.logPrefix}: started, ${channelNames}, 8 rules active`);
86
92
  }
87
93
  /** Ingest a universal event. Can be called directly or via the event bus. */
88
94
  ingest(event) {
@@ -109,6 +115,21 @@ export class OpenAlertsEngine {
109
115
  addChannel(channel) {
110
116
  this.dispatcher.addChannel(channel);
111
117
  }
118
+ /** Fire a test alert to verify delivery. */
119
+ sendTestAlert() {
120
+ void this.fireAlert({
121
+ type: "alert",
122
+ id: `test:manual:${Date.now()}`,
123
+ ruleId: "test",
124
+ severity: "info",
125
+ title: "Test alert — delivery verified",
126
+ detail: "This is a test alert from /test_alert. If you see this, alert delivery is working.",
127
+ ts: Date.now(),
128
+ fingerprint: "test:manual",
129
+ }).catch((err) => {
130
+ this.logger.error(`${this.logPrefix}: test alert failed: ${String(err)}`);
131
+ });
132
+ }
112
133
  /** Whether the platform sync is connected. */
113
134
  get platformConnected() {
114
135
  return this.platform?.isConnected() ?? false;
@@ -121,8 +142,17 @@ export class OpenAlertsEngine {
121
142
  getRecentEvents(limit = 100) {
122
143
  return readRecentEvents(this.stateDir, limit);
123
144
  }
145
+ /** Get recent full events from the in-memory ring buffer (for dashboard history). */
146
+ getRecentLiveEvents(limit = 200) {
147
+ return this.eventRing.slice(-limit);
148
+ }
124
149
  // ─── Internal ──────────────────────────────────────────────────────────────
125
150
  handleEvent(event) {
151
+ // Add to in-memory ring buffer
152
+ this.eventRing.push(event);
153
+ if (this.eventRing.length > OpenAlertsEngine.RING_MAX) {
154
+ this.eventRing = this.eventRing.slice(-OpenAlertsEngine.RING_MAX);
155
+ }
126
156
  // Persist as diagnostic snapshot
127
157
  const snapshot = {
128
158
  type: "diagnostic",
@@ -141,13 +171,15 @@ export class OpenAlertsEngine {
141
171
  // Run through evaluator
142
172
  const alerts = processEvent(this.state, this.config, event);
143
173
  for (const alert of alerts) {
144
- this.fireAlert(alert);
174
+ void this.fireAlert(alert).catch((err) => {
175
+ this.logger.error(`${this.logPrefix}: alert fire failed: ${String(err)}`);
176
+ });
145
177
  }
146
178
  // Forward to platform
147
179
  this.platform?.enqueue(snapshot);
148
180
  }
149
- fireAlert(alert) {
150
- // Persist alert
181
+ async fireAlert(alert) {
182
+ // Persist alert (original, before enrichment)
151
183
  try {
152
184
  appendEvent(this.stateDir, alert);
153
185
  }
@@ -156,9 +188,21 @@ export class OpenAlertsEngine {
156
188
  }
157
189
  // Forward to platform
158
190
  this.platform?.enqueue(alert);
191
+ // Enrich with LLM if enricher is available
192
+ let enriched = alert;
193
+ if (this.enricher) {
194
+ try {
195
+ const result = await this.enricher(alert);
196
+ if (result)
197
+ enriched = result;
198
+ }
199
+ catch (err) {
200
+ this.logger.warn(`${this.logPrefix}: llm enrichment failed, using original: ${String(err)}`);
201
+ }
202
+ }
159
203
  // Dispatch to channels (unless quiet mode)
160
204
  if (!this.config.quiet) {
161
- void this.dispatcher.dispatch(alert).catch((err) => {
205
+ void this.dispatcher.dispatch(enriched).catch((err) => {
162
206
  this.logger.error(`${this.logPrefix}: alert dispatch failed: ${String(err)}`);
163
207
  });
164
208
  }
@@ -65,7 +65,7 @@ export function processEvent(state, config, event) {
65
65
  state.stats.totalCostUsd = 0;
66
66
  state.stats.lastResetTs = now;
67
67
  }
68
- // Track event types in stats
68
+ // Track event types in stats (independent of rule enabled state)
69
69
  if (event.type === "infra.error") {
70
70
  state.stats.webhookErrors++;
71
71
  }
@@ -83,6 +83,16 @@ export function processEvent(state, config, event) {
83
83
  if (event.type === "session.start") {
84
84
  state.stats.sessionsStarted++;
85
85
  }
86
+ if (event.type === "session.stuck") {
87
+ state.stats.stuckSessions++;
88
+ }
89
+ if (event.type === "llm.call" || event.type === "llm.error" || event.type === "agent.error") {
90
+ state.stats.messagesProcessed++;
91
+ if (event.type === "llm.error" || event.type === "agent.error" ||
92
+ event.outcome === "error" || event.outcome === "timeout") {
93
+ state.stats.messageErrors++;
94
+ }
95
+ }
86
96
  if (event.type === "llm.token_usage") {
87
97
  if (typeof event.tokenCount === "number")
88
98
  state.stats.totalTokens += event.tokenCount;
@@ -103,7 +113,14 @@ export function processEvent(state, config, event) {
103
113
  const ctx = { state, config, now };
104
114
  const fired = [];
105
115
  for (const rule of ALL_RULES) {
106
- const alert = rule.evaluate(event, ctx);
116
+ let alert;
117
+ try {
118
+ alert = rule.evaluate(event, ctx);
119
+ }
120
+ catch {
121
+ // One broken rule must never block the rest
122
+ continue;
123
+ }
107
124
  if (!alert)
108
125
  continue;
109
126
  // Check cooldown
@@ -1,4 +1,4 @@
1
- export type { AlertChannel, AlertEvent, AlertRuleDefinition, AlertSeverity, AlertTarget, DiagnosticSnapshot, EvaluatorState, HeartbeatSnapshot, MonitorConfig, RuleContext, RuleOverride, OpenAlertsEvent, OpenAlertsEventType, OpenAlertsInitOptions, OpenAlertsLogger, StoredEvent, WindowEntry, } from "./types.js";
1
+ export type { AlertChannel, AlertEnricher, AlertEvent, AlertRuleDefinition, AlertSeverity, AlertTarget, DiagnosticSnapshot, EvaluatorState, HeartbeatSnapshot, MonitorConfig, RuleContext, RuleOverride, OpenAlertsEvent, OpenAlertsEventType, OpenAlertsInitOptions, OpenAlertsLogger, StoredEvent, WindowEntry, } from "./types.js";
2
2
  export { DEFAULTS, LOG_FILENAME, STORE_DIR_NAME } from "./types.js";
3
3
  export { OpenAlertsEngine } from "./engine.js";
4
4
  export { OpenAlertsEventBus } from "./event-bus.js";
@@ -6,6 +6,7 @@ export { AlertDispatcher } from "./alert-channel.js";
6
6
  export { createEvaluatorState, processEvent, processWatchdogTick, warmFromHistory, } from "./evaluator.js";
7
7
  export { ALL_RULES } from "./rules.js";
8
8
  export { appendEvent, pruneLog, readAllEvents, readRecentEvents, } from "./store.js";
9
+ export { createLlmEnricher, type LlmEnricherOptions } from "./llm-enrichment.js";
9
10
  export { formatAlertMessage, formatAlertsOutput, formatHealthOutput, } from "./formatter.js";
10
11
  export { createPlatformSync, type PlatformSync } from "./platform.js";
11
12
  export { BoundedMap, type BoundedMapOptions, type BoundedMapStats, } from "./bounded-map.js";
@@ -13,6 +13,8 @@ export { createEvaluatorState, processEvent, processWatchdogTick, warmFromHistor
13
13
  export { ALL_RULES } from "./rules.js";
14
14
  // Store
15
15
  export { appendEvent, pruneLog, readAllEvents, readRecentEvents, } from "./store.js";
16
+ // LLM Enrichment
17
+ export { createLlmEnricher } from "./llm-enrichment.js";
16
18
  // Formatter
17
19
  export { formatAlertMessage, formatAlertsOutput, formatHealthOutput, } from "./formatter.js";
18
20
  // Platform
@@ -0,0 +1,14 @@
1
+ import type { AlertEnricher, OpenAlertsLogger } from "./types.js";
2
+ export type LlmEnricherOptions = {
3
+ /** Model string from config, e.g. "openai/gpt-5-nano" */
4
+ modelString: string;
5
+ /** Logger for debug/warn messages */
6
+ logger?: OpenAlertsLogger;
7
+ /** Timeout in ms (default: 10000) */
8
+ timeoutMs?: number;
9
+ };
10
+ /**
11
+ * Create an AlertEnricher that calls an LLM to add a summary + action to alerts.
12
+ * Returns null if provider or API key can't be resolved.
13
+ */
14
+ export declare function createLlmEnricher(opts: LlmEnricherOptions): AlertEnricher | null;
@@ -0,0 +1,174 @@
1
+ const PROVIDER_MAP = {
2
+ openai: {
3
+ type: "openai-compatible",
4
+ baseUrl: "https://api.openai.com/v1",
5
+ apiKeyEnvVar: "OPENAI_API_KEY",
6
+ },
7
+ groq: {
8
+ type: "openai-compatible",
9
+ baseUrl: "https://api.groq.com/openai/v1",
10
+ apiKeyEnvVar: "GROQ_API_KEY",
11
+ },
12
+ together: {
13
+ type: "openai-compatible",
14
+ baseUrl: "https://api.together.xyz/v1",
15
+ apiKeyEnvVar: "TOGETHER_API_KEY",
16
+ },
17
+ deepseek: {
18
+ type: "openai-compatible",
19
+ baseUrl: "https://api.deepseek.com/v1",
20
+ apiKeyEnvVar: "DEEPSEEK_API_KEY",
21
+ },
22
+ anthropic: {
23
+ type: "anthropic",
24
+ baseUrl: "https://api.anthropic.com/v1",
25
+ apiKeyEnvVar: "ANTHROPIC_API_KEY",
26
+ },
27
+ };
28
+ // ─── Prompt ─────────────────────────────────────────────────────────────────
29
+ function buildPrompt(alert) {
30
+ return `You are a concise DevOps alert analyst. Given this monitoring alert, provide:
31
+ 1. A brief human-friendly summary (1 sentence, plain language)
32
+ 2. One actionable suggestion to resolve it
33
+
34
+ Alert:
35
+ - Rule: ${alert.ruleId}
36
+ - Severity: ${alert.severity}
37
+ - Title: ${alert.title}
38
+ - Detail: ${alert.detail}
39
+
40
+ Reply in exactly this format (2 lines only):
41
+ Summary: <your summary>
42
+ Action: <your suggestion>`;
43
+ }
44
+ // ─── Response Parsing ───────────────────────────────────────────────────────
45
+ function parseEnrichment(text) {
46
+ const lines = text.trim().split("\n");
47
+ let summary = "";
48
+ let action = "";
49
+ for (const line of lines) {
50
+ const trimmed = line.trim();
51
+ if (trimmed.toLowerCase().startsWith("summary:")) {
52
+ summary = trimmed.slice("summary:".length).trim();
53
+ }
54
+ else if (trimmed.toLowerCase().startsWith("action:")) {
55
+ action = trimmed.slice("action:".length).trim();
56
+ }
57
+ }
58
+ if (!summary && !action)
59
+ return null;
60
+ return { summary, action };
61
+ }
62
+ // ─── HTTP Calls ─────────────────────────────────────────────────────────────
63
+ async function callOpenAICompatible(baseUrl, apiKey, model, prompt, timeoutMs) {
64
+ const controller = new AbortController();
65
+ const timer = setTimeout(() => controller.abort(), timeoutMs);
66
+ try {
67
+ const res = await fetch(`${baseUrl}/chat/completions`, {
68
+ method: "POST",
69
+ headers: {
70
+ "Content-Type": "application/json",
71
+ Authorization: `Bearer ${apiKey}`,
72
+ },
73
+ body: JSON.stringify({
74
+ model,
75
+ messages: [{ role: "user", content: prompt }],
76
+ max_tokens: 200,
77
+ temperature: 0.3,
78
+ }),
79
+ signal: controller.signal,
80
+ });
81
+ if (!res.ok)
82
+ return null;
83
+ const data = (await res.json());
84
+ return data.choices?.[0]?.message?.content ?? null;
85
+ }
86
+ catch {
87
+ return null;
88
+ }
89
+ finally {
90
+ clearTimeout(timer);
91
+ }
92
+ }
93
+ async function callAnthropic(baseUrl, apiKey, model, prompt, timeoutMs) {
94
+ const controller = new AbortController();
95
+ const timer = setTimeout(() => controller.abort(), timeoutMs);
96
+ try {
97
+ const res = await fetch(`${baseUrl}/messages`, {
98
+ method: "POST",
99
+ headers: {
100
+ "Content-Type": "application/json",
101
+ "x-api-key": apiKey,
102
+ "anthropic-version": "2023-06-01",
103
+ },
104
+ body: JSON.stringify({
105
+ model,
106
+ max_tokens: 200,
107
+ messages: [{ role: "user", content: prompt }],
108
+ }),
109
+ signal: controller.signal,
110
+ });
111
+ if (!res.ok)
112
+ return null;
113
+ const data = (await res.json());
114
+ const textBlock = data.content?.find((b) => b.type === "text");
115
+ return textBlock?.text ?? null;
116
+ }
117
+ catch {
118
+ return null;
119
+ }
120
+ finally {
121
+ clearTimeout(timer);
122
+ }
123
+ }
124
+ // ─── Factory ────────────────────────────────────────────────────────────────
125
+ /**
126
+ * Create an AlertEnricher that calls an LLM to add a summary + action to alerts.
127
+ * Returns null if provider or API key can't be resolved.
128
+ */
129
+ export function createLlmEnricher(opts) {
130
+ const { modelString, logger, timeoutMs = 10_000 } = opts;
131
+ // Parse "provider/model-name" format
132
+ const slashIdx = modelString.indexOf("/");
133
+ if (slashIdx < 1) {
134
+ logger?.warn(`openalerts: llm-enrichment skipped — invalid model string "${modelString}"`);
135
+ return null;
136
+ }
137
+ const providerKey = modelString.slice(0, slashIdx).toLowerCase();
138
+ const model = modelString.slice(slashIdx + 1);
139
+ const providerConfig = PROVIDER_MAP[providerKey];
140
+ if (!providerConfig) {
141
+ logger?.warn(`openalerts: llm-enrichment skipped — unknown provider "${providerKey}"`);
142
+ return null;
143
+ }
144
+ const apiKey = process.env[providerConfig.apiKeyEnvVar];
145
+ if (!apiKey) {
146
+ logger?.warn(`openalerts: llm-enrichment skipped — ${providerConfig.apiKeyEnvVar} not set in environment`);
147
+ return null;
148
+ }
149
+ logger?.info(`openalerts: llm-enrichment enabled (${providerKey}/${model})`);
150
+ return async (alert) => {
151
+ const prompt = buildPrompt(alert);
152
+ let responseText = null;
153
+ if (providerConfig.type === "anthropic") {
154
+ responseText = await callAnthropic(providerConfig.baseUrl, apiKey, model, prompt, timeoutMs);
155
+ }
156
+ else {
157
+ responseText = await callOpenAICompatible(providerConfig.baseUrl, apiKey, model, prompt, timeoutMs);
158
+ }
159
+ if (!responseText)
160
+ return null;
161
+ const parsed = parseEnrichment(responseText);
162
+ if (!parsed)
163
+ return null;
164
+ // Append enrichment to the original detail
165
+ let enrichedDetail = alert.detail;
166
+ if (parsed.summary) {
167
+ enrichedDetail += `\n\nSummary: ${parsed.summary}`;
168
+ }
169
+ if (parsed.action) {
170
+ enrichedDetail += `\nAction: ${parsed.action}`;
171
+ }
172
+ return { ...alert, detail: enrichedDetail };
173
+ };
174
+ }
@@ -32,7 +32,7 @@ function isRuleEnabled(ctx, ruleId) {
32
32
  const infraErrors = {
33
33
  id: "infra-errors",
34
34
  defaultCooldownMs: 15 * 60 * 1000,
35
- defaultThreshold: 3,
35
+ defaultThreshold: 1,
36
36
  evaluate(event, ctx) {
37
37
  if (event.type !== "infra.error")
38
38
  return null;
@@ -40,8 +40,8 @@ const infraErrors = {
40
40
  return null;
41
41
  const channel = event.channel ?? "unknown";
42
42
  pushWindow(ctx, "infra-errors", { ts: ctx.now });
43
- const threshold = getRuleThreshold(ctx, "infra-errors", 3);
44
- const windowMs = 5 * 60 * 1000; // 5 minutes
43
+ const threshold = getRuleThreshold(ctx, "infra-errors", 1);
44
+ const windowMs = 60 * 1000; // 1 minute
45
45
  const count = countInWindow(ctx, "infra-errors", windowMs);
46
46
  if (count < threshold)
47
47
  return null;
@@ -52,7 +52,7 @@ const infraErrors = {
52
52
  ruleId: "infra-errors",
53
53
  severity: "error",
54
54
  title: "Infrastructure errors spike",
55
- detail: `${count} infra errors on ${channel} in the last 5 minutes.`,
55
+ detail: `${count} infra error(s) on ${channel} in the last minute.${event.error ? ` Last: ${event.error}` : ""}`,
56
56
  ts: ctx.now,
57
57
  fingerprint,
58
58
  };
@@ -62,32 +62,37 @@ const infraErrors = {
62
62
  const llmErrors = {
63
63
  id: "llm-errors",
64
64
  defaultCooldownMs: 15 * 60 * 1000,
65
- defaultThreshold: 3,
65
+ defaultThreshold: 1,
66
66
  evaluate(event, ctx) {
67
- if (event.type !== "llm.call")
67
+ // Trigger on LLM call/error events AND agent errors (agent failing before/during LLM call)
68
+ if (event.type !== "llm.call" && event.type !== "llm.error" && event.type !== "agent.error")
68
69
  return null;
69
70
  if (!isRuleEnabled(ctx, "llm-errors"))
70
71
  return null;
71
- // Track all LLM calls for stats
72
- ctx.state.stats.messagesProcessed++;
73
- if (event.outcome !== "error")
74
- return null;
75
- ctx.state.stats.messageErrors++;
72
+ // Stats are tracked in the evaluator (independent of rule state).
73
+ // Only proceed for actual errors:
74
+ if (event.type === "llm.call") {
75
+ // Only explicit error/timeout outcomes trigger alerting; undefined = OK
76
+ if (event.outcome !== "error" && event.outcome !== "timeout")
77
+ return null;
78
+ }
79
+ // llm.error and agent.error are always errors — no outcome check needed
76
80
  const channel = event.channel ?? "unknown";
77
81
  pushWindow(ctx, "llm-errors", { ts: ctx.now });
78
- const threshold = getRuleThreshold(ctx, "llm-errors", 3);
79
- const windowMs = 5 * 60 * 1000;
82
+ const threshold = getRuleThreshold(ctx, "llm-errors", 1);
83
+ const windowMs = 60 * 1000; // 1 minute
80
84
  const count = countInWindow(ctx, "llm-errors", windowMs);
81
85
  if (count < threshold)
82
86
  return null;
83
87
  const fingerprint = `llm-errors:${channel}`;
88
+ const label = event.type === "agent.error" ? "agent error(s)" : "LLM error(s)";
84
89
  return {
85
90
  type: "alert",
86
91
  id: makeAlertId("llm-errors", fingerprint, ctx.now),
87
92
  ruleId: "llm-errors",
88
93
  severity: "error",
89
94
  title: "LLM call errors",
90
- detail: `${count} LLM errors on ${channel} in the last 5 minutes.`,
95
+ detail: `${count} ${label} on ${channel} in the last minute.${event.error ? ` Last: ${event.error}` : ""}`,
91
96
  ts: ctx.now,
92
97
  fingerprint,
93
98
  };
@@ -103,7 +108,7 @@ const sessionStuck = {
103
108
  return null;
104
109
  if (!isRuleEnabled(ctx, "session-stuck"))
105
110
  return null;
106
- ctx.state.stats.stuckSessions++;
111
+ // Stats tracked in evaluator (independent of rule state)
107
112
  const ageMs = event.ageMs ?? 0;
108
113
  const threshold = getRuleThreshold(ctx, "session-stuck", 120_000);
109
114
  if (ageMs < threshold)
@@ -153,10 +158,8 @@ const heartbeatFail = {
153
158
  fingerprint,
154
159
  };
155
160
  }
156
- // Reset on success
157
- if (event.outcome === "success") {
158
- ctx.state.consecutives.set(counterKey, 0);
159
- }
161
+ // Reset on any non-error (success, undefined, etc.)
162
+ ctx.state.consecutives.set(counterKey, 0);
160
163
  return null;
161
164
  },
162
165
  };
@@ -169,12 +172,12 @@ const queueDepth = {
169
172
  // Fire on heartbeat (which carries queue depth) and dedicated queue_depth events
170
173
  if (event.type !== "infra.heartbeat" && event.type !== "infra.queue_depth")
171
174
  return null;
172
- if (!isRuleEnabled(ctx, "queue-depth"))
173
- return null;
174
- // Update last heartbeat timestamp (used by gateway-down rule)
175
+ // Always update heartbeat timestamp regardless of rule state (gateway-down depends on it)
175
176
  if (event.type === "infra.heartbeat") {
176
177
  ctx.state.lastHeartbeatTs = ctx.now;
177
178
  }
179
+ if (!isRuleEnabled(ctx, "queue-depth"))
180
+ return null;
178
181
  const queued = event.queueDepth ?? 0;
179
182
  const threshold = getRuleThreshold(ctx, "queue-depth", 10);
180
183
  if (queued < threshold)
@@ -198,11 +201,15 @@ const highErrorRate = {
198
201
  defaultCooldownMs: 30 * 60 * 1000,
199
202
  defaultThreshold: 50, // percent
200
203
  evaluate(event, ctx) {
201
- if (event.type !== "llm.call")
204
+ if (event.type !== "llm.call" && event.type !== "llm.error" && event.type !== "agent.error")
202
205
  return null;
203
206
  if (!isRuleEnabled(ctx, "high-error-rate"))
204
207
  return null;
205
- const isError = event.outcome === "error";
208
+ // agent.error and llm.error are always errors; llm.call checks outcome (timeout counts as error)
209
+ const isError = event.type === "agent.error" ||
210
+ event.type === "llm.error" ||
211
+ event.outcome === "error" ||
212
+ event.outcome === "timeout";
206
213
  pushWindow(ctx, "msg-outcomes", { ts: ctx.now, value: isError ? 1 : 0 });
207
214
  const window = ctx.state.windows.get("msg-outcomes");
208
215
  if (!window || window.length < 20)
@@ -227,11 +234,41 @@ const highErrorRate = {
227
234
  };
228
235
  },
229
236
  };
237
+ // ─── Rule: tool-errors ───────────────────────────────────────────────────
238
+ const toolErrors = {
239
+ id: "tool-errors",
240
+ defaultCooldownMs: 15 * 60 * 1000,
241
+ defaultThreshold: 1, // 1 tool error in 1 minute
242
+ evaluate(event, ctx) {
243
+ if (event.type !== "tool.error")
244
+ return null;
245
+ if (!isRuleEnabled(ctx, "tool-errors"))
246
+ return null;
247
+ pushWindow(ctx, "tool-errors", { ts: ctx.now });
248
+ const threshold = getRuleThreshold(ctx, "tool-errors", 1);
249
+ const windowMs = 60 * 1000; // 1 minute
250
+ const count = countInWindow(ctx, "tool-errors", windowMs);
251
+ if (count < threshold)
252
+ return null;
253
+ const toolName = event.meta?.toolName ?? "unknown";
254
+ const fingerprint = `tool-errors:${toolName}`;
255
+ return {
256
+ type: "alert",
257
+ id: makeAlertId("tool-errors", fingerprint, ctx.now),
258
+ ruleId: "tool-errors",
259
+ severity: "warn",
260
+ title: "Tool errors spike",
261
+ detail: `${count} tool error(s) in the last minute.${event.error ? ` Last: ${event.error}` : ""}`,
262
+ ts: ctx.now,
263
+ fingerprint,
264
+ };
265
+ },
266
+ };
230
267
  // ─── Rule: gateway-down ──────────────────────────────────────────────────────
231
268
  const gatewayDown = {
232
269
  id: "gateway-down",
233
270
  defaultCooldownMs: 60 * 60 * 1000,
234
- defaultThreshold: 90_000, // 90 seconds
271
+ defaultThreshold: 30_000, // 30 seconds
235
272
  evaluate(event, ctx) {
236
273
  // This rule is called by the watchdog timer, not by events directly.
237
274
  if (event.type !== "watchdog.tick")
@@ -270,5 +307,6 @@ export const ALL_RULES = [
270
307
  heartbeatFail,
271
308
  queueDepth,
272
309
  highErrorRate,
310
+ toolErrors,
273
311
  gatewayDown,
274
312
  ];
@@ -51,6 +51,8 @@ export type AlertTarget = {
51
51
  to: string;
52
52
  accountId?: string;
53
53
  };
54
+ /** Enriches an alert with LLM-generated summary/action. Returns enriched alert or null to skip. */
55
+ export type AlertEnricher = (alert: AlertEvent) => Promise<AlertEvent | null>;
54
56
  export type RuleOverride = {
55
57
  enabled?: boolean;
56
58
  threshold?: number;
@@ -65,6 +67,7 @@ export type MonitorConfig = {
65
67
  maxLogSizeKb?: number;
66
68
  maxLogAgeDays?: number;
67
69
  quiet?: boolean;
70
+ llmEnriched?: boolean;
68
71
  rules?: Record<string, RuleOverride>;
69
72
  };
70
73
  export type OpenAlertsInitOptions = {
@@ -80,6 +83,8 @@ export type OpenAlertsInitOptions = {
80
83
  logPrefix?: string;
81
84
  /** Diagnosis hint shown in critical alerts (e.g., 'Run "openclaw doctor"') */
82
85
  diagnosisHint?: string;
86
+ /** Optional LLM enricher — adds smart summaries to alerts before dispatch */
87
+ enricher?: AlertEnricher;
83
88
  };
84
89
  export type OpenAlertsLogger = {
85
90
  info: (msg: string) => void;
@@ -148,5 +153,5 @@ export declare const DEFAULTS: {
148
153
  readonly pruneIntervalMs: number;
149
154
  readonly platformFlushIntervalMs: number;
150
155
  readonly platformBatchSize: 100;
151
- readonly gatewayDownThresholdMs: 90000;
156
+ readonly gatewayDownThresholdMs: 30000;
152
157
  };
@@ -13,5 +13,5 @@ export const DEFAULTS = {
13
13
  pruneIntervalMs: 6 * 60 * 60 * 1000, // 6 hours
14
14
  platformFlushIntervalMs: 5 * 60 * 1000, // 5 minutes
15
15
  platformBatchSize: 100,
16
- gatewayDownThresholdMs: 90_000, // 90 seconds
16
+ gatewayDownThresholdMs: 30_000, // 30 seconds
17
17
  };
package/dist/index.js CHANGED
@@ -1,7 +1,7 @@
1
1
  import { OpenAlertsEngine } from "./core/index.js";
2
2
  import { onDiagnosticEvent, registerLogTransport } from "openclaw/plugin-sdk";
3
3
  import { createLogBridge } from "./plugin/log-bridge.js";
4
- import { OpenClawAlertChannel, parseConfig, resolveAlertTarget, translateOpenClawEvent, translateToolCallHook, translateAgentStartHook, translateAgentEndHook, translateSessionStartHook, translateSessionEndHook, translateMessageSentHook, translateMessageReceivedHook, translateBeforeToolCallHook, translateBeforeCompactionHook, translateAfterCompactionHook, translateMessageSendingHook, translateToolResultPersistHook, translateGatewayStartHook, translateGatewayStopHook, } from "./plugin/adapter.js";
4
+ import { OpenClawAlertChannel, createOpenClawEnricher, parseConfig, resolveAlertTarget, translateOpenClawEvent, translateToolCallHook, translateAgentStartHook, translateAgentEndHook, translateSessionStartHook, translateSessionEndHook, translateMessageSentHook, translateMessageReceivedHook, translateBeforeToolCallHook, translateBeforeCompactionHook, translateAfterCompactionHook, translateMessageSendingHook, translateToolResultPersistHook, translateGatewayStartHook, translateGatewayStopHook, } from "./plugin/adapter.js";
5
5
  import { bindEngine, createMonitorCommands } from "./plugin/commands.js";
6
6
  import { createDashboardHandler, closeDashboardConnections, } from "./plugin/dashboard-routes.js";
7
7
  const PLUGIN_ID = "openalerts";
@@ -13,12 +13,16 @@ let logBridgeCleanup = null;
13
13
  function createMonitorService(api) {
14
14
  return {
15
15
  id: PLUGIN_ID,
16
- start(ctx) {
16
+ async start(ctx) {
17
17
  const logger = ctx.logger;
18
18
  const config = parseConfig(api.pluginConfig);
19
19
  // Resolve alert target + create OpenClaw alert channel
20
- const target = resolveAlertTarget(api, config);
20
+ const target = await resolveAlertTarget(api, config);
21
21
  const channels = target ? [new OpenClawAlertChannel(api, target)] : [];
22
+ // Create LLM enricher if enabled (default: true)
23
+ const enricher = config.llmEnriched !== false
24
+ ? createOpenClawEnricher(api, logger)
25
+ : null;
22
26
  // Create and start the universal engine
23
27
  engine = new OpenAlertsEngine({
24
28
  stateDir: ctx.stateDir,
@@ -27,6 +31,7 @@ function createMonitorService(api) {
27
31
  logger,
28
32
  logPrefix: LOG_PREFIX,
29
33
  diagnosisHint: 'Run "openclaw doctor" to diagnose.',
34
+ enricher: enricher ?? undefined,
30
35
  });
31
36
  engine.start();
32
37
  // Wire commands to engine
@@ -169,7 +174,7 @@ function createMonitorService(api) {
169
174
  const targetDesc = target
170
175
  ? `alerting to ${target.channel}:${target.to}`
171
176
  : "log-only (no alert channel detected)";
172
- logger.info(`${LOG_PREFIX}: started, ${targetDesc}, log-bridge active, 7 rules active`);
177
+ logger.info(`${LOG_PREFIX}: started, ${targetDesc}, log-bridge active, 8 rules active`);
173
178
  },
174
179
  stop() {
175
180
  closeDashboardConnections();
@@ -1,4 +1,4 @@
1
- import type { AlertChannel, AlertEvent, AlertTarget, MonitorConfig, OpenAlertsEvent } from "../core/index.js";
1
+ import type { AlertChannel, AlertEnricher, AlertEvent, AlertTarget, MonitorConfig, OpenAlertsEvent } from "../core/index.js";
2
2
  import type { OpenClawPluginApi } from "openclaw/plugin-sdk";
3
3
  /**
4
4
  * Translate an OpenClaw diagnostic event into a universal OpenAlertsEvent.
@@ -128,11 +128,23 @@ export declare class OpenClawAlertChannel implements AlertChannel {
128
128
  readonly name: string;
129
129
  private api;
130
130
  private target;
131
+ private warnedMissing;
131
132
  constructor(api: OpenClawPluginApi, target: AlertTarget);
132
133
  send(alert: AlertEvent, formatted: string): Promise<void>;
133
134
  }
134
135
  /**
135
136
  * Resolve the alert target from plugin config or by auto-detecting from OpenClaw config.
136
137
  */
137
- export declare function resolveAlertTarget(api: OpenClawPluginApi, pluginConfig: MonitorConfig): AlertTarget | null;
138
+ export declare function resolveAlertTarget(api: OpenClawPluginApi, pluginConfig: MonitorConfig): Promise<AlertTarget | null>;
138
139
  export declare function parseConfig(raw: Record<string, unknown> | undefined): MonitorConfig;
140
+ /**
141
+ * Create an AlertEnricher from the OpenClaw plugin API.
142
+ * Reads the model from api.config.agents.defaults.model.primary (e.g. "openai/gpt-5-nano")
143
+ * and resolves the API key from process.env.
144
+ * Returns null if no model is configured or enricher can't be created.
145
+ */
146
+ export declare function createOpenClawEnricher(api: OpenClawPluginApi, logger?: {
147
+ info: (msg: string) => void;
148
+ warn: (msg: string) => void;
149
+ error: (msg: string) => void;
150
+ }): AlertEnricher | null;
@@ -1,3 +1,7 @@
1
+ import { createLlmEnricher } from "../core/llm-enrichment.js";
2
+ import { readFile } from "node:fs/promises";
3
+ import { join } from "node:path";
4
+ import { homedir } from "node:os";
1
5
  // ─── Diagnostic Event Translation ───────────────────────────────────────────
2
6
  //
3
7
  // OpenClaw emits 12 diagnostic event types through onDiagnosticEvent():
@@ -230,7 +234,7 @@ export function translateMessageSentHook(data, context) {
230
234
  sessionKey: context.sessionId,
231
235
  outcome: data.success ? "success" : "error",
232
236
  error: data.error,
233
- meta: { to: data.to, source: "hook:message_sent" },
237
+ meta: { to: data.to, content: data.content, source: "hook:message_sent" },
234
238
  };
235
239
  }
236
240
  /** Translate gateway_start hook data into OpenAlertsEvent. */
@@ -261,6 +265,7 @@ export function translateMessageReceivedHook(data, context) {
261
265
  outcome: "success",
262
266
  meta: {
263
267
  from: data.from,
268
+ content: data.content,
264
269
  accountId: context.accountId,
265
270
  openclawHook: "message_received",
266
271
  source: "hook:message_received",
@@ -310,6 +315,7 @@ export function translateAfterCompactionHook(data, context) {
310
315
  messageCount: data.messageCount,
311
316
  tokenCount: data.tokenCount,
312
317
  compactedCount: data.compactedCount,
318
+ compaction: true,
313
319
  openclawHook: "after_compaction",
314
320
  source: "hook:after_compaction",
315
321
  },
@@ -324,6 +330,7 @@ export function translateMessageSendingHook(data, context) {
324
330
  outcome: "success",
325
331
  meta: {
326
332
  to: data.to,
333
+ content: data.content,
327
334
  accountId: context.accountId,
328
335
  openclawHook: "message_sending",
329
336
  source: "hook:message_sending",
@@ -356,6 +363,7 @@ export class OpenClawAlertChannel {
356
363
  name;
357
364
  api;
358
365
  target;
366
+ warnedMissing = false;
359
367
  constructor(api, target) {
360
368
  this.api = api;
361
369
  this.target = target;
@@ -364,8 +372,13 @@ export class OpenClawAlertChannel {
364
372
  async send(alert, formatted) {
365
373
  const runtime = this.api.runtime;
366
374
  const channel = runtime.channel;
367
- if (!channel)
375
+ if (!channel) {
376
+ if (!this.warnedMissing) {
377
+ this.warnedMissing = true;
378
+ throw new Error(`runtime.channel not available — alert dropped`);
379
+ }
368
380
  return;
381
+ }
369
382
  const opts = this.target.accountId
370
383
  ? { accountId: this.target.accountId }
371
384
  : {};
@@ -377,20 +390,22 @@ export class OpenClawAlertChannel {
377
390
  signal: "sendMessageSignal",
378
391
  };
379
392
  const methodName = channelMethods[this.target.channel];
380
- if (!methodName)
381
- return;
393
+ if (!methodName) {
394
+ throw new Error(`unsupported channel "${this.target.channel}" — no send method mapped`);
395
+ }
382
396
  const channelMod = channel[this.target.channel];
383
397
  const sendFn = channelMod?.[methodName];
384
- if (sendFn) {
385
- await sendFn(this.target.to, formatted, opts);
398
+ if (!sendFn) {
399
+ throw new Error(`${this.target.channel}.${methodName} not found on runtime — alert dropped`);
386
400
  }
401
+ await sendFn(this.target.to, formatted, opts);
387
402
  }
388
403
  }
389
404
  // ─── Alert Target Resolution ────────────────────────────────────────────────
390
405
  /**
391
406
  * Resolve the alert target from plugin config or by auto-detecting from OpenClaw config.
392
407
  */
393
- export function resolveAlertTarget(api, pluginConfig) {
408
+ export async function resolveAlertTarget(api, pluginConfig) {
394
409
  // 1. Explicit config
395
410
  if (pluginConfig.alertChannel && pluginConfig.alertTo) {
396
411
  return {
@@ -400,7 +415,8 @@ export function resolveAlertTarget(api, pluginConfig) {
400
415
  };
401
416
  }
402
417
  const cfg = api.config;
403
- // 2. Auto-detect from configured channels
418
+ const channelsCfg = cfg.channels ??
419
+ {};
404
420
  const channelKeys = [
405
421
  "telegram",
406
422
  "discord",
@@ -408,14 +424,33 @@ export function resolveAlertTarget(api, pluginConfig) {
408
424
  "whatsapp",
409
425
  "signal",
410
426
  ];
427
+ // 2. Auto-detect from static allowFrom in channel config
411
428
  for (const channelKey of channelKeys) {
412
- const channelConfig = cfg[channelKey];
429
+ const channelConfig = channelsCfg[channelKey];
413
430
  if (!channelConfig || typeof channelConfig !== "object")
414
431
  continue;
415
432
  const target = extractFirstAllowFrom(channelKey, channelConfig);
416
433
  if (target)
417
434
  return target;
418
435
  }
436
+ // 3. Auto-detect from pairing store (runtime-paired users)
437
+ // The store lives at ~/.openclaw/credentials/<channel>-allowFrom.json
438
+ const credDir = join(process.env.OPENCLAW_HOME ?? join(homedir(), ".openclaw"), "credentials");
439
+ for (const channelKey of channelKeys) {
440
+ const channelConfig = channelsCfg[channelKey];
441
+ if (!channelConfig || typeof channelConfig !== "object")
442
+ continue;
443
+ try {
444
+ const raw = await readFile(join(credDir, `${channelKey}-allowFrom.json`), "utf-8");
445
+ const data = JSON.parse(raw);
446
+ if (Array.isArray(data.allowFrom) && data.allowFrom.length > 0) {
447
+ return { channel: channelKey, to: String(data.allowFrom[0]) };
448
+ }
449
+ }
450
+ catch {
451
+ // File doesn't exist or isn't valid — skip this channel
452
+ }
453
+ }
419
454
  return null;
420
455
  }
421
456
  function extractFirstAllowFrom(channel, channelConfig) {
@@ -451,8 +486,34 @@ export function parseConfig(raw) {
451
486
  maxLogSizeKb: typeof raw.maxLogSizeKb === "number" ? raw.maxLogSizeKb : undefined,
452
487
  maxLogAgeDays: typeof raw.maxLogAgeDays === "number" ? raw.maxLogAgeDays : undefined,
453
488
  quiet: typeof raw.quiet === "boolean" ? raw.quiet : undefined,
489
+ llmEnriched: typeof raw.llmEnriched === "boolean" ? raw.llmEnriched : undefined,
454
490
  rules: raw.rules && typeof raw.rules === "object"
455
491
  ? raw.rules
456
492
  : undefined,
457
493
  };
458
494
  }
495
+ // ─── LLM Enricher Factory ───────────────────────────────────────────────────
496
+ /**
497
+ * Create an AlertEnricher from the OpenClaw plugin API.
498
+ * Reads the model from api.config.agents.defaults.model.primary (e.g. "openai/gpt-5-nano")
499
+ * and resolves the API key from process.env.
500
+ * Returns null if no model is configured or enricher can't be created.
501
+ */
502
+ export function createOpenClawEnricher(api, logger) {
503
+ try {
504
+ const cfg = api.config;
505
+ const agents = cfg.agents;
506
+ const defaults = agents?.defaults;
507
+ const model = defaults?.model;
508
+ const primary = model?.primary;
509
+ if (typeof primary !== "string" || !primary.includes("/")) {
510
+ logger?.warn("openalerts: llm-enrichment skipped — no model configured at agents.defaults.model.primary");
511
+ return null;
512
+ }
513
+ return createLlmEnricher({ modelString: primary, logger });
514
+ }
515
+ catch (err) {
516
+ logger?.warn(`openalerts: llm-enrichment setup failed: ${String(err)}`);
517
+ return null;
518
+ }
519
+ }
@@ -29,7 +29,7 @@ export function createMonitorCommands(api) {
29
29
  handler: () => handleDashboard(),
30
30
  },
31
31
  {
32
- name: "test-alert",
32
+ name: "test_alert",
33
33
  description: "Send a test alert to verify alert delivery",
34
34
  acceptsArgs: false,
35
35
  handler: () => handleTestAlert(),
@@ -72,26 +72,7 @@ function handleTestAlert() {
72
72
  if (!_engine) {
73
73
  return { text: "OpenAlerts not initialized yet. Wait for gateway startup." };
74
74
  }
75
- // Ingest a synthetic infra.error to trigger the infra-errors rule evaluation.
76
- // This won't fire an actual alert unless the threshold (3 errors) is reached,
77
- // so we fire a one-off test alert directly through the engine.
78
- const testEvent = {
79
- type: "alert",
80
- id: `test:manual:${Date.now()}`,
81
- ruleId: "test",
82
- severity: "info",
83
- title: "Test alert — delivery verified",
84
- detail: "This is a test alert from /test-alert. If you see this, alert delivery is working.",
85
- ts: Date.now(),
86
- fingerprint: `test:manual`,
87
- };
88
- // Ingest as a custom event so it appears in the dashboard
89
- _engine.ingest({
90
- type: "custom",
91
- ts: Date.now(),
92
- outcome: "success",
93
- meta: { openclawLog: "test_alert", source: "command:test-alert" },
94
- });
75
+ _engine.sendTestAlert();
95
76
  return {
96
77
  text: "Test alert sent. Check your alert channel (Telegram/Discord/etc) for delivery confirmation.\n\nIf you don't receive it, check /health for channel status.",
97
78
  };
@@ -423,6 +423,8 @@ export function getDashboardHtml() {
423
423
  var ft=ev.type||'?';
424
424
  if(ft==='custom'&&m.openclawEventType==='session.state')ft='session.'+(m.sessionState||'state');
425
425
  if(ft==='custom'&&m.openclawEventType==='message_sent')ft='msg.delivered';
426
+ if(ft==='custom'&&m.openclawHook==='message_received')ft='msg.in';
427
+ if(ft==='custom'&&m.openclawHook==='message_sending')ft='msg.out';
426
428
 
427
429
  var h='<div class="r-main">';
428
430
  h+='<span class="r-time">'+fT(ev.ts)+'</span>';
@@ -439,6 +441,7 @@ export function getDashboardHtml() {
439
441
  if(m.model)h+='<span class="p m">'+esc(String(m.model))+'</span>';
440
442
  if(ev.channel)h+='<span class="p ch">'+esc(ev.channel)+'</span>';
441
443
  if(m.messageCount!=null)h+='<span class="p">'+m.messageCount+' msgs</span>';
444
+ if(m.content){var preview=String(m.content);if(preview.length>60)preview=preview.slice(0,57)+'...';h+='<span class="p">'+esc(preview)+'</span>'}
442
445
  if(m.source&&String(m.source)!=='simulate')h+='<span class="p s">'+esc(String(m.source))+'</span>';
443
446
  h+='</span></div>';
444
447
 
@@ -596,17 +599,21 @@ export function getDashboardHtml() {
596
599
  // ─── SSE (OpenAlerts events + OpenClaw log tailing) ──────────────────────
597
600
  function connectSSE(){
598
601
  if(evSrc)evSrc.close();
599
- evSrc=new EventSource('/openalerts/events');
600
- evSrc.addEventListener('openalerts',function(e){try{addEvent(JSON.parse(e.data))}catch(_){}});
601
- evSrc.addEventListener('oclog',function(e){try{addLogEntry(JSON.parse(e.data))}catch(_){}});
602
- evSrc.onopen=function(){$('sDot').className='dot live';$('sConn').textContent='live'};
603
- evSrc.onerror=function(){$('sDot').className='dot dead';$('sConn').textContent='reconnecting...'};
602
+ try{
603
+ evSrc=new EventSource('/openalerts/events');
604
+ evSrc.addEventListener('openalerts',function(e){try{addEvent(JSON.parse(e.data))}catch(_){}});
605
+ evSrc.addEventListener('history',function(e){try{var evs=JSON.parse(e.data);for(var i=0;i<evs.length;i++)addEvent(evs[i])}catch(_){}});
606
+ evSrc.addEventListener('oclog',function(e){try{addLogEntry(JSON.parse(e.data))}catch(_){}});
607
+ evSrc.onopen=function(){$('sDot').className='dot live';$('sConn').textContent='live'};
608
+ evSrc.onerror=function(e){$('sDot').className='dot dead';$('sConn').textContent='err:'+evSrc.readyState};
609
+ }catch(e){$('sConn').textContent='SSE fail:'+e.message}
604
610
  }
605
611
 
606
612
  // ─── State polling ──────────────────────
607
613
  var prevAl={};
608
614
  function pollState(){
609
- fetch('/openalerts/state').then(function(r){return r.json()}).then(function(s){
615
+ fetch('/openalerts/state').then(function(r){if(!r.ok)throw new Error('HTTP '+r.status);return r.json()}).catch(function(e){$('sUp').textContent='fetch err: '+e.message;return null}).then(function(s){
616
+ if(!s)return;
610
617
  if(s.stats){
611
618
  $('sMsgs').textContent=s.stats.messagesProcessed||0;
612
619
  $('sErr').textContent=(s.stats.messageErrors||0)+(s.stats.webhookErrors||0)+(s.stats.toolErrors||0);
@@ -736,7 +743,7 @@ export function getDashboardHtml() {
736
743
  var btn=row.querySelector('.log-copy');
737
744
  if(btn)lines.push(btn.getAttribute('data-raw'));
738
745
  });
739
- var blob=new Blob([lines.join('\n')],{type:'text/plain'});
746
+ var blob=new Blob([lines.join('\\n')],{type:'text/plain'});
740
747
  var url=URL.createObjectURL(blob);
741
748
  var a=document.createElement('a');
742
749
  a.href=url;a.download='openalerts-logs-'+Date.now()+'.txt';
@@ -26,14 +26,25 @@ const RULE_IDS = [
26
26
  "heartbeat-fail",
27
27
  "queue-depth",
28
28
  "high-error-rate",
29
+ "tool-errors",
29
30
  "gateway-down",
30
31
  ];
31
32
  function getRuleStatuses(engine) {
32
33
  const state = engine.state;
33
34
  const now = Date.now();
35
+ const cooldownWindow = 15 * 60 * 1000;
34
36
  return RULE_IDS.map((id) => {
35
- const cooldownTs = state.cooldowns.get(id);
36
- const fired = cooldownTs != null && cooldownTs > now - 15 * 60 * 1000;
37
+ // Cooldown keys are fingerprints like "llm-errors:unknown", not bare rule IDs.
38
+ // Check if ANY cooldown key starting with this rule ID has fired recently.
39
+ let fired = false;
40
+ for (const [key, ts] of state.cooldowns) {
41
+ if (key === id || key.startsWith(id + ":")) {
42
+ if (ts > now - cooldownWindow) {
43
+ fired = true;
44
+ break;
45
+ }
46
+ }
47
+ }
37
48
  return { id, status: fired ? "fired" : "ok" };
38
49
  });
39
50
  }
@@ -208,6 +219,20 @@ export function createDashboardHandler(getEngine) {
208
219
  "Access-Control-Allow-Origin": "*",
209
220
  });
210
221
  res.flushHeaders();
222
+ // Send initial connection event so the browser knows the stream is live
223
+ res.write(`:ok\n\n`);
224
+ // Send current state snapshot as initial event
225
+ const state = engine.state;
226
+ res.write(`event: state\ndata: ${JSON.stringify({
227
+ uptimeMs: Date.now() - state.startedAt,
228
+ stats: state.stats,
229
+ rules: getRuleStatuses(engine),
230
+ })}\n\n`);
231
+ // Send event history so dashboard survives refreshes
232
+ const history = engine.getRecentLiveEvents(200);
233
+ if (history.length > 0) {
234
+ res.write(`event: history\ndata: ${JSON.stringify(history)}\n\n`);
235
+ }
211
236
  // Subscribe to engine events
212
237
  const unsub = engine.bus.on((event) => {
213
238
  try {
@@ -272,6 +272,30 @@ export function createLogBridge(engine) {
272
272
  },
273
273
  });
274
274
  }
275
+ // ── Lane task error (diagnostic) ────────────────────────────────────────────
276
+ // Safety net: catches lane-level errors from diagnostic logs.
277
+ // The agent_end hook already covers agent errors → llm-errors rule.
278
+ // This emits as infra.error to avoid double-counting in the llm-errors window
279
+ // while still ensuring infra-errors fires if the hook path fails.
280
+ // Format: "lane task error: lane=main durationMs=1 error="Error: ...""
281
+ function handleLaneTaskError(rec) {
282
+ const { lane, error: errorMsg } = rec.kvs;
283
+ const dedupeKey = `lane-error:${lane}:${rec.ts}`;
284
+ if (dedupeSet.has(dedupeKey))
285
+ return;
286
+ dedupeSet.add(dedupeKey);
287
+ ingest({
288
+ type: "infra.error",
289
+ ts: rec.ts,
290
+ outcome: "error",
291
+ error: errorMsg,
292
+ meta: {
293
+ lane,
294
+ source: "log-bridge",
295
+ openclawLog: "lane_task_error",
296
+ },
297
+ });
298
+ }
275
299
  // ── Exec command (exec) ────────────────────────────────────────────────────
276
300
  function handleExecCommand(rec) {
277
301
  pendingCommand = rec.message;
@@ -312,6 +336,9 @@ export function createLogBridge(engine) {
312
336
  if (msg.startsWith("session state:")) {
313
337
  handleSessionState(rec);
314
338
  }
339
+ else if (msg.startsWith("lane task error:")) {
340
+ handleLaneTaskError(rec);
341
+ }
315
342
  }
316
343
  else if (rec.subsystem === "exec") {
317
344
  if (msg.startsWith("elevated command")) {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@steadwing/openalerts",
3
- "version": "0.2.0",
3
+ "version": "0.2.2",
4
4
  "type": "module",
5
5
  "description": "OpenAlerts — An alerting layer for agentic frameworks",
6
6
  "author": "Steadwing",