@steadwing/openalerts 0.1.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md ADDED
@@ -0,0 +1,155 @@
1
+ <p align="center">
2
+ <h1 align="center">OpenAlerts</h1>
3
+ <p align="center">
4
+ An alerting layer for agentic frameworks.
5
+ </p>
6
+ </p>
7
+
8
+ <p align="center">
9
+ <a href="https://www.npmjs.com/package/@steadwing/openalerts"><img src="https://img.shields.io/npm/v/@steadwing/openalerts?style=flat&color=blue" alt="npm"></a>
10
+ <a href="https://github.com/steadwing/openalerts/blob/main/LICENSE"><img src="https://img.shields.io/badge/license-Apache--2.0-green" alt="License"></a>
11
+ <a href="https://github.com/steadwing/openalerts/stargazers"><img src="https://img.shields.io/github/stars/steadwing/openalerts?style=flat" alt="GitHub stars"></a>
12
+ </p>
13
+
14
+ <p align="center">
15
+ <a href="#quickstart">Quickstart</a> &middot;
16
+ <a href="#alert-rules">Alert Rules</a> &middot;
17
+ <a href="#configuration">Configuration</a> &middot;
18
+ <a href="#dashboard">Dashboard</a> &middot;
19
+ <a href="#commands">Commands</a>
20
+ </p>
21
+
22
+ ---
23
+
24
+ AI agents fail silently. LLM errors, stuck sessions, gateway outages — nobody knows until a user complains.
25
+
26
+ OpenAlerts watches your agent in real-time and alerts you the moment something goes wrong. A framework-agnostic core with adapter plugins — starting with [OpenClaw](https://github.com/openclaw/openclaw).
27
+
28
+ ## Quickstart
29
+
30
+ > Currently supports OpenClaw. More framework adapters coming soon.
31
+
32
+ ### 1. Install
33
+
34
+ ```bash
35
+ openclaw plugins install @steadwing/openalerts
36
+ ```
37
+
38
+ ### 2. Configure
39
+
40
+ Add to your `openclaw.json`:
41
+
42
+ ```jsonc
43
+ {
44
+ "plugins": {
45
+ "entries": {
46
+ "openalerts": {
47
+ "enabled": true,
48
+ "config": {
49
+ "alertChannel": "telegram", // telegram | discord | slack | whatsapp | signal
50
+ "alertTo": "YOUR_CHAT_ID"
51
+ }
52
+ }
53
+ }
54
+ }
55
+ }
56
+ ```
57
+
58
+ ### 3. Restart & verify
59
+
60
+ ```bash
61
+ openclaw gateway stop && openclaw gateway run
62
+ ```
63
+
64
+ Send `/health` to your bot. You should get a live status report back — zero LLM tokens consumed.
65
+
66
+ That's it. OpenAlerts is now watching your agent.
67
+
68
+ ## Alert Rules
69
+
70
+ Seven rules run against every event in real-time:
71
+
72
+ | Rule | Watches for | Severity |
73
+ |---|---|---|
74
+ | **llm-errors** | 3+ LLM failures in 5 minutes | ERROR |
75
+ | **infra-errors** | 3+ infrastructure errors in 5 minutes | ERROR |
76
+ | **gateway-down** | No heartbeat for 90+ seconds | CRITICAL |
77
+ | **session-stuck** | Session idle for 120+ seconds | WARN |
78
+ | **high-error-rate** | 50%+ of last 20 messages failed | ERROR |
79
+ | **queue-depth** | 10+ items queued | WARN |
80
+ | **heartbeat-fail** | 3 consecutive heartbeat failures | ERROR |
81
+
82
+ All thresholds and cooldowns are [configurable per-rule](#configuration).
83
+
84
+ ## Configuration
85
+
86
+ Full config reference under `plugins.entries.openalerts.config`:
87
+
88
+ ```jsonc
89
+ {
90
+ "alertChannel": "telegram", // telegram | discord | slack | whatsapp | signal
91
+ "alertTo": "YOUR_CHAT_ID", // chat/user ID on that channel
92
+ "cooldownMinutes": 15, // minutes between repeated alerts (default: 15)
93
+ "quiet": false, // true = log only, no messages sent
94
+
95
+ "rules": {
96
+ "gateway-down": {
97
+ "threshold": 120000 // override: 2 min instead of 90s
98
+ },
99
+ "high-error-rate": {
100
+ "enabled": false // disable a rule entirely
101
+ },
102
+ "llm-errors": {
103
+ "threshold": 5, // require 5 errors instead of 3
104
+ "cooldownMinutes": 30 // longer cooldown for this rule
105
+ }
106
+ }
107
+ }
108
+ ```
109
+
110
+ ## Dashboard
111
+
112
+ A real-time web dashboard is embedded in the gateway at:
113
+
114
+ ```
115
+ http://127.0.0.1:18789/openalerts
116
+ ```
117
+
118
+ - **Activity** — Live event timeline with session flows, tool calls, LLM usage
119
+ - **System Logs** — Filtered, structured logs with search
120
+ - **Health** — Rule status, alert history, system stats
121
+
122
+ ## Commands
123
+
124
+ Zero-token chat commands available in any connected channel:
125
+
126
+ | Command | What it does |
127
+ |---|---|
128
+ | `/health` | System health snapshot — uptime, active alerts, stats |
129
+ | `/alerts` | Recent alert history with severity and timestamps |
130
+ | `/dashboard` | Returns the dashboard URL |
131
+
132
+ ## Architecture
133
+
134
+ ```
135
+ src/core/ Framework-agnostic engine, zero dependencies
136
+ Rules engine, evaluator, event bus, state store, formatter
137
+
138
+ src/plugin/ OpenClaw adapter plugin
139
+ Event translation, alert routing, dashboard, chat commands
140
+ ```
141
+
142
+ Everything ships as a single `@steadwing/openalerts` package. The core is completely framework-agnostic — adding monitoring for a new framework only requires writing an adapter.
143
+
144
+ ## Development
145
+
146
+ ```bash
147
+ npm install # install dependencies
148
+ npm run build # compile TypeScript
149
+ npm run typecheck # type-check without emitting
150
+ npm run clean # remove dist/
151
+ ```
152
+
153
+ ## License
154
+
155
+ Apache-2.0
@@ -0,0 +1,23 @@
1
+ import type { AlertChannel, AlertEvent, OpenAlertsLogger } from "./types.js";
2
+ /**
3
+ * Dispatches alerts to all registered channels.
4
+ * Fire-and-forget: individual channel failures don't block others.
5
+ */
6
+ export declare class AlertDispatcher {
7
+ private channels;
8
+ private logger;
9
+ private diagnosisHint?;
10
+ constructor(opts: {
11
+ channels?: AlertChannel[];
12
+ logger?: OpenAlertsLogger;
13
+ diagnosisHint?: string;
14
+ });
15
+ /** Add a channel at runtime. */
16
+ addChannel(channel: AlertChannel): void;
17
+ /** Send an alert to all registered channels. */
18
+ dispatch(alert: AlertEvent): Promise<void>;
19
+ /** Whether any channels are registered. */
20
+ get hasChannels(): boolean;
21
+ /** Number of registered channels. */
22
+ get channelCount(): number;
23
+ }
@@ -0,0 +1,44 @@
1
+ import { formatAlertMessage } from "./formatter.js";
2
+ /**
3
+ * Dispatches alerts to all registered channels.
4
+ * Fire-and-forget: individual channel failures don't block others.
5
+ */
6
+ export class AlertDispatcher {
7
+ channels = [];
8
+ logger;
9
+ diagnosisHint;
10
+ constructor(opts) {
11
+ this.channels = opts.channels ?? [];
12
+ this.logger = opts.logger ?? console;
13
+ this.diagnosisHint = opts.diagnosisHint;
14
+ }
15
+ /** Add a channel at runtime. */
16
+ addChannel(channel) {
17
+ this.channels.push(channel);
18
+ }
19
+ /** Send an alert to all registered channels. */
20
+ async dispatch(alert) {
21
+ if (this.channels.length === 0)
22
+ return;
23
+ const formatted = formatAlertMessage(alert, {
24
+ diagnosisHint: this.diagnosisHint,
25
+ });
26
+ const results = this.channels.map(async (ch) => {
27
+ try {
28
+ await ch.send(alert, formatted);
29
+ }
30
+ catch (err) {
31
+ this.logger.error(`[openalerts] alert channel "${ch.name}" failed: ${String(err)}`);
32
+ }
33
+ });
34
+ await Promise.allSettled(results);
35
+ }
36
+ /** Whether any channels are registered. */
37
+ get hasChannels() {
38
+ return this.channels.length > 0;
39
+ }
40
+ /** Number of registered channels. */
41
+ get channelCount() {
42
+ return this.channels.length;
43
+ }
44
+ }
@@ -0,0 +1,51 @@
1
+ /**
2
+ * Bounded Map with LRU Eviction
3
+ *
4
+ * Prevents memory leaks by automatically pruning oldest entries when size limit is reached.
5
+ * Uses LRU (Least Recently Used) eviction strategy.
6
+ */
7
+ export type BoundedMapOptions = {
8
+ /** Maximum number of entries */
9
+ maxSize: number;
10
+ /** Optional callback when entries are evicted */
11
+ onEvict?: (key: string, value: unknown) => void;
12
+ /** Optional TTL in ms for entries */
13
+ ttlMs?: number;
14
+ };
15
+ export type BoundedMapStats = {
16
+ size: number;
17
+ maxSize: number;
18
+ evictionCount: number;
19
+ hitCount: number;
20
+ missCount: number;
21
+ };
22
+ export declare class BoundedMap<K extends string, V> {
23
+ private readonly options;
24
+ private map;
25
+ private evictionCount;
26
+ private hitCount;
27
+ private missCount;
28
+ constructor(options: BoundedMapOptions);
29
+ /** Set a value */
30
+ set(key: K, value: V): this;
31
+ /** Get a value */
32
+ get(key: K): V | undefined;
33
+ /** Check if key exists */
34
+ has(key: K): boolean;
35
+ /** Delete a key */
36
+ delete(key: K): boolean;
37
+ /** Clear all entries */
38
+ clear(): void;
39
+ /** Get current size */
40
+ get size(): number;
41
+ /** Get all keys */
42
+ keys(): IterableIterator<K>;
43
+ /** Get all values */
44
+ values(): IterableIterator<V>;
45
+ /** Get all entries */
46
+ entries(): IterableIterator<[K, V]>;
47
+ /** Get stats */
48
+ getStats(): BoundedMapStats;
49
+ /** Evict oldest (least recently used) entry */
50
+ private evictOldest;
51
+ }
@@ -0,0 +1,128 @@
1
+ /**
2
+ * Bounded Map with LRU Eviction
3
+ *
4
+ * Prevents memory leaks by automatically pruning oldest entries when size limit is reached.
5
+ * Uses LRU (Least Recently Used) eviction strategy.
6
+ */
7
+ export class BoundedMap {
8
+ options;
9
+ map = new Map();
10
+ evictionCount = 0;
11
+ hitCount = 0;
12
+ missCount = 0;
13
+ constructor(options) {
14
+ this.options = options;
15
+ if (options.maxSize <= 0) {
16
+ throw new Error("maxSize must be positive");
17
+ }
18
+ }
19
+ /** Set a value */
20
+ set(key, value) {
21
+ const now = Date.now();
22
+ // If key exists, update it
23
+ if (this.map.has(key)) {
24
+ this.map.set(key, { value, accessTs: now, createTs: this.map.get(key).createTs });
25
+ return this;
26
+ }
27
+ // If at capacity, evict oldest entry
28
+ if (this.map.size >= this.options.maxSize) {
29
+ this.evictOldest();
30
+ }
31
+ this.map.set(key, { value, accessTs: now, createTs: now });
32
+ return this;
33
+ }
34
+ /** Get a value */
35
+ get(key) {
36
+ const entry = this.map.get(key);
37
+ if (!entry) {
38
+ this.missCount++;
39
+ return undefined;
40
+ }
41
+ // Check TTL if configured
42
+ if (this.options.ttlMs) {
43
+ const age = Date.now() - entry.createTs;
44
+ if (age > this.options.ttlMs) {
45
+ this.delete(key);
46
+ this.missCount++;
47
+ return undefined;
48
+ }
49
+ }
50
+ // Update access time (LRU)
51
+ entry.accessTs = Date.now();
52
+ this.hitCount++;
53
+ return entry.value;
54
+ }
55
+ /** Check if key exists */
56
+ has(key) {
57
+ if (!this.map.has(key))
58
+ return false;
59
+ // Check TTL if configured
60
+ if (this.options.ttlMs) {
61
+ const entry = this.map.get(key);
62
+ const age = Date.now() - entry.createTs;
63
+ if (age > this.options.ttlMs) {
64
+ this.delete(key);
65
+ return false;
66
+ }
67
+ }
68
+ return true;
69
+ }
70
+ /** Delete a key */
71
+ delete(key) {
72
+ const entry = this.map.get(key);
73
+ if (entry) {
74
+ this.options.onEvict?.(key, entry.value);
75
+ }
76
+ return this.map.delete(key);
77
+ }
78
+ /** Clear all entries */
79
+ clear() {
80
+ if (this.options.onEvict) {
81
+ for (const [key, entry] of this.map) {
82
+ this.options.onEvict(key, entry.value);
83
+ }
84
+ }
85
+ this.map.clear();
86
+ }
87
+ /** Get current size */
88
+ get size() {
89
+ return this.map.size;
90
+ }
91
+ /** Get all keys */
92
+ keys() {
93
+ return this.map.keys();
94
+ }
95
+ /** Get all values */
96
+ values() {
97
+ return Array.from(this.map.values()).map(e => e.value).values();
98
+ }
99
+ /** Get all entries */
100
+ entries() {
101
+ return Array.from(this.map.entries()).map(([k, e]) => [k, e.value]).values();
102
+ }
103
+ /** Get stats */
104
+ getStats() {
105
+ return {
106
+ size: this.map.size,
107
+ maxSize: this.options.maxSize,
108
+ evictionCount: this.evictionCount,
109
+ hitCount: this.hitCount,
110
+ missCount: this.missCount,
111
+ };
112
+ }
113
+ /** Evict oldest (least recently used) entry */
114
+ evictOldest() {
115
+ let oldestKey;
116
+ let oldestTs = Infinity;
117
+ for (const [key, entry] of this.map) {
118
+ if (entry.accessTs < oldestTs) {
119
+ oldestTs = entry.accessTs;
120
+ oldestKey = key;
121
+ }
122
+ }
123
+ if (oldestKey) {
124
+ this.delete(oldestKey);
125
+ this.evictionCount++;
126
+ }
127
+ }
128
+ }
@@ -0,0 +1,41 @@
1
+ import { OpenAlertsEventBus } from "./event-bus.js";
2
+ import { type AlertEvent, type EvaluatorState, type OpenAlertsEvent, type OpenAlertsInitOptions, type StoredEvent } from "./types.js";
3
+ /**
4
+ * OpenAlertsEngine — central orchestrator for monitoring and alerting.
5
+ *
6
+ * Framework-agnostic. Adapters (OpenClaw, Nanobot, LangChain, etc.)
7
+ * translate their events into OpenAlertsEvent and feed them to `ingest()`.
8
+ */
9
+ export declare class OpenAlertsEngine {
10
+ readonly bus: OpenAlertsEventBus;
11
+ readonly state: EvaluatorState;
12
+ private config;
13
+ private stateDir;
14
+ private dispatcher;
15
+ private platform;
16
+ private logger;
17
+ private logPrefix;
18
+ private watchdogTimer;
19
+ private pruneTimer;
20
+ private running;
21
+ constructor(options: OpenAlertsInitOptions);
22
+ /** Start the engine: warm from history, start timers. */
23
+ start(): void;
24
+ /** Ingest a universal event. Can be called directly or via the event bus. */
25
+ ingest(event: OpenAlertsEvent): void;
26
+ /** Stop the engine: clear timers, flush platform, clear bus. */
27
+ stop(): void;
28
+ /** Add a channel at runtime (e.g., after detecting available transports). */
29
+ addChannel(channel: {
30
+ readonly name: string;
31
+ send(alert: AlertEvent, formatted: string): Promise<void> | void;
32
+ }): void;
33
+ /** Whether the platform sync is connected. */
34
+ get platformConnected(): boolean;
35
+ /** Whether the engine is running. */
36
+ get isRunning(): boolean;
37
+ /** Read recent stored events (for /alerts command). */
38
+ getRecentEvents(limit?: number): StoredEvent[];
39
+ private handleEvent;
40
+ private fireAlert;
41
+ }
@@ -0,0 +1,167 @@
1
+ import { AlertDispatcher } from "./alert-channel.js";
2
+ import { OpenAlertsEventBus } from "./event-bus.js";
3
+ import { createEvaluatorState, processEvent, processWatchdogTick, warmFromHistory } from "./evaluator.js";
4
+ import { createPlatformSync } from "./platform.js";
5
+ import { appendEvent, pruneLog, readAllEvents, readRecentEvents } from "./store.js";
6
+ import { DEFAULTS, } from "./types.js";
7
+ /**
8
+ * OpenAlertsEngine — central orchestrator for monitoring and alerting.
9
+ *
10
+ * Framework-agnostic. Adapters (OpenClaw, Nanobot, LangChain, etc.)
11
+ * translate their events into OpenAlertsEvent and feed them to `ingest()`.
12
+ */
13
+ export class OpenAlertsEngine {
14
+ bus;
15
+ state;
16
+ config;
17
+ stateDir;
18
+ dispatcher;
19
+ platform = null;
20
+ logger;
21
+ logPrefix;
22
+ watchdogTimer = null;
23
+ pruneTimer = null;
24
+ running = false;
25
+ constructor(options) {
26
+ this.config = options.config;
27
+ this.stateDir = options.stateDir;
28
+ this.logger = options.logger ?? console;
29
+ this.logPrefix = options.logPrefix ?? "openalerts";
30
+ this.bus = new OpenAlertsEventBus();
31
+ this.state = createEvaluatorState();
32
+ this.dispatcher = new AlertDispatcher({
33
+ channels: options.channels,
34
+ logger: this.logger,
35
+ diagnosisHint: options.diagnosisHint,
36
+ });
37
+ // Wire up: bus events → evaluator → dispatcher
38
+ this.bus.on((event) => this.handleEvent(event));
39
+ }
40
+ /** Start the engine: warm from history, start timers. */
41
+ start() {
42
+ if (this.running)
43
+ return;
44
+ this.running = true;
45
+ // Warm from persisted events
46
+ try {
47
+ const history = readAllEvents(this.stateDir);
48
+ warmFromHistory(this.state, history);
49
+ this.logger.info(`${this.logPrefix}: warmed from ${history.length} persisted events`);
50
+ }
51
+ catch (err) {
52
+ this.logger.warn(`${this.logPrefix}: warm-start failed: ${String(err)}`);
53
+ }
54
+ // Start platform sync if apiKey present
55
+ if (this.config.apiKey) {
56
+ this.platform = createPlatformSync({
57
+ apiKey: this.config.apiKey,
58
+ logger: this.logger,
59
+ logPrefix: this.logPrefix,
60
+ });
61
+ this.logger.info(`${this.logPrefix}: platform sync enabled`);
62
+ }
63
+ // Watchdog timer (checks for gateway-down every 30s)
64
+ this.watchdogTimer = setInterval(() => {
65
+ const alerts = processWatchdogTick(this.state, this.config);
66
+ for (const alert of alerts) {
67
+ this.fireAlert(alert);
68
+ }
69
+ }, DEFAULTS.watchdogIntervalMs);
70
+ // Prune timer (cleans old log entries every 6h)
71
+ this.pruneTimer = setInterval(() => {
72
+ try {
73
+ pruneLog(this.stateDir, {
74
+ maxAgeMs: (this.config.maxLogAgeDays ?? DEFAULTS.maxLogAgeDays) * 24 * 60 * 60 * 1000,
75
+ maxSizeKb: this.config.maxLogSizeKb ?? DEFAULTS.maxLogSizeKb,
76
+ });
77
+ }
78
+ catch (err) {
79
+ this.logger.warn(`${this.logPrefix}: prune failed: ${String(err)}`);
80
+ }
81
+ }, DEFAULTS.pruneIntervalMs);
82
+ const channelNames = this.dispatcher.hasChannels
83
+ ? `${this.dispatcher.channelCount} channel(s)`
84
+ : "log-only (no alert channels)";
85
+ this.logger.info(`${this.logPrefix}: started, ${channelNames}, 7 rules active`);
86
+ }
87
+ /** Ingest a universal event. Can be called directly or via the event bus. */
88
+ ingest(event) {
89
+ this.bus.emit(event);
90
+ }
91
+ /** Stop the engine: clear timers, flush platform, clear bus. */
92
+ stop() {
93
+ if (!this.running)
94
+ return;
95
+ this.running = false;
96
+ if (this.watchdogTimer) {
97
+ clearInterval(this.watchdogTimer);
98
+ this.watchdogTimer = null;
99
+ }
100
+ if (this.pruneTimer) {
101
+ clearInterval(this.pruneTimer);
102
+ this.pruneTimer = null;
103
+ }
104
+ this.platform?.stop();
105
+ this.bus.clear();
106
+ this.logger.info(`${this.logPrefix}: stopped`);
107
+ }
108
+ /** Add a channel at runtime (e.g., after detecting available transports). */
109
+ addChannel(channel) {
110
+ this.dispatcher.addChannel(channel);
111
+ }
112
+ /** Whether the platform sync is connected. */
113
+ get platformConnected() {
114
+ return this.platform?.isConnected() ?? false;
115
+ }
116
+ /** Whether the engine is running. */
117
+ get isRunning() {
118
+ return this.running;
119
+ }
120
+ /** Read recent stored events (for /alerts command). */
121
+ getRecentEvents(limit = 100) {
122
+ return readRecentEvents(this.stateDir, limit);
123
+ }
124
+ // ─── Internal ──────────────────────────────────────────────────────────────
125
+ handleEvent(event) {
126
+ // Persist as diagnostic snapshot
127
+ const snapshot = {
128
+ type: "diagnostic",
129
+ eventType: event.type,
130
+ ts: event.ts,
131
+ summary: `${event.type}${event.outcome ? `:${event.outcome}` : ""}`,
132
+ channel: event.channel,
133
+ sessionKey: event.sessionKey,
134
+ };
135
+ try {
136
+ appendEvent(this.stateDir, snapshot);
137
+ }
138
+ catch (err) {
139
+ this.logger.warn(`${this.logPrefix}: failed to persist event: ${String(err)}`);
140
+ }
141
+ // Run through evaluator
142
+ const alerts = processEvent(this.state, this.config, event);
143
+ for (const alert of alerts) {
144
+ this.fireAlert(alert);
145
+ }
146
+ // Forward to platform
147
+ this.platform?.enqueue(snapshot);
148
+ }
149
+ fireAlert(alert) {
150
+ // Persist alert
151
+ try {
152
+ appendEvent(this.stateDir, alert);
153
+ }
154
+ catch (err) {
155
+ this.logger.warn(`${this.logPrefix}: failed to persist alert: ${String(err)}`);
156
+ }
157
+ // Forward to platform
158
+ this.platform?.enqueue(alert);
159
+ // Dispatch to channels (unless quiet mode)
160
+ if (!this.config.quiet) {
161
+ void this.dispatcher.dispatch(alert).catch((err) => {
162
+ this.logger.error(`${this.logPrefix}: alert dispatch failed: ${String(err)}`);
163
+ });
164
+ }
165
+ this.logger.info(`${this.logPrefix}: [${alert.severity}] ${alert.title}`);
166
+ }
167
+ }
@@ -0,0 +1,18 @@
1
+ import { type AlertEvent, type EvaluatorState, type MonitorConfig, type OpenAlertsEvent, type StoredEvent } from "./types.js";
2
+ /** Create a fresh evaluator state. */
3
+ export declare function createEvaluatorState(): EvaluatorState;
4
+ /**
5
+ * Warm the evaluator state from persisted events.
6
+ * Replays recent events to rebuild windows/counters without re-firing alerts.
7
+ */
8
+ export declare function warmFromHistory(state: EvaluatorState, events: StoredEvent[]): void;
9
+ /**
10
+ * Process a single event through all rules.
11
+ * Returns alerts that should be fired (already filtered by cooldown + hourly cap).
12
+ */
13
+ export declare function processEvent(state: EvaluatorState, config: MonitorConfig, event: OpenAlertsEvent): AlertEvent[];
14
+ /**
15
+ * Run the watchdog tick — checks for gateway-down condition.
16
+ * Called every 30 seconds by the engine timer.
17
+ */
18
+ export declare function processWatchdogTick(state: EvaluatorState, config: MonitorConfig): AlertEvent[];