blackwall-eliza-guardrail 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/README.md +84 -0
  2. package/package.json +35 -0
  3. package/src/index.mjs +294 -0
package/README.md ADDED
@@ -0,0 +1,84 @@
1
+ # blackwall-eliza-guardrail
2
+
3
+ Pre-action risk check for [ElizaOS](https://github.com/elizaOS/eliza) agents. Wraps every registered action handler so STOP-rated actions can abort *before* they run — without modifying your character or your other plugins.
4
+
5
+ Powered by [BLACK_WALL](https://blackwalltier.com). Get a free key at [blackwalltier.com/dashboard/keys](https://blackwalltier.com/dashboard/keys).
6
+
7
+ ## Install
8
+
9
+ ```bash
10
+ npm i blackwall-eliza-guardrail
11
+ ```
12
+
13
+ ```ts
14
+ // character.ts
15
+ import { blackwallGuardrail } from 'blackwall-eliza-guardrail';
16
+
17
+ export const character = {
18
+ // ...
19
+ plugins: [
20
+ ...otherPlugins,
21
+ blackwallGuardrail({ mode: 'enforce' }), // <-- list LAST
22
+ ],
23
+ };
24
+ ```
25
+
26
+ ```bash
27
+ # env
28
+ BLACKWALL_API_KEY=bw_live_xxx
29
+ ```
30
+
31
+ That's it. Every action your agent invokes is now scored by BLACK_WALL before it runs.
32
+
33
+ ## Modes
34
+
35
+ | Mode | Behavior |
36
+ |---|---|
37
+ | `observe` (default) | Score every action and log to BLACK_WALL; never abort. Zero behavior change — safe to drop in. |
38
+ | `enforce` | Score every action; **throw on STOP** verdicts. Eliza catches the throw and converts it to a failureResult. |
39
+
40
+ Start in `observe` for a few days to see what the verdicts look like on your traffic. Switch to `enforce` once you trust the scoring.
41
+
42
+ ## Why list it LAST
43
+
44
+ The plugin wraps `runtime.actions[*].handler` at `init()` time. Actions registered *after* this plugin's init won't be wrapped. Listing it last guarantees every action other plugins contribute is gated.
45
+
46
+ If an action is registered after init (rare), it bypasses the guardrail. Open an issue if this matters for your setup — a `Proxy`-based variant is on the roadmap.
47
+
48
+ ## Configuration
49
+
50
+ ```ts
51
+ blackwallGuardrail({
52
+ apiKey: process.env.BLACKWALL_API_KEY, // or set env BLACKWALL_API_KEY
53
+ baseUrl: 'https://blackwalltier.com', // override for self-hosted / staging
54
+ mode: 'enforce', // 'observe' | 'enforce'
55
+ shouldGate: (actionName) => actionName !== 'IGNORE', // per-action opt-out
56
+ maxInputBytes: 8 * 1024, // cap forecast payload size
57
+ onEvent: (event) => myTelemetry(event), // optional telemetry hook
58
+ });
59
+ ```
60
+
61
+ ### Telemetry events
62
+
63
+ `onEvent` fires for: `init`, `wrapped`, `skipped`, `forecast_error`, `stop`, `observe_error`. Useful for piping guardrail decisions into your own observability stack.
64
+
65
+ ## How it works
66
+
67
+ 1. At plugin `init`, walks `runtime.actions[]` and replaces each `handler` with a wrapper.
68
+ 2. Before each call, the wrapper sends `{action, inputs, context}` to BLACK_WALL `/api/v1/forecast`.
69
+ 3. In `enforce` mode, a `STOP` verdict throws — Eliza's dispatcher catches it and the action does not run.
70
+ 4. After the action runs (or after a STOP), the wrapper calls `/api/v1/forecast/:id/outcome` so BLACK_WALL can learn from real-world divergence.
71
+
72
+ Fail-open: if BLACK_WALL is unreachable, the wrapper logs a warning and lets the action proceed. Network glitches at BLACK_WALL won't take down your agent.
73
+
74
+ ## Why handler-wrap?
75
+
76
+ The only events `@elizaos/core@1.7.x` emits around action execution are `ACTION_STARTED` / `ACTION_COMPLETED`. Listener errors on those are caught and logged by `executePlannedToolCall` — they don't abort the action. There is no pre-tool-call hook with abort semantics in the public surface today (earlier versions declared `HOOK_TOOL_BEFORE` in the enum, but it was never wired up; in 1.7.x it's gone entirely).
77
+
78
+ Wrapping `runtime.actions[*].handler` at init time is the only path that actually gates execution. The wrap throws on STOP, Eliza's dispatcher catches the throw, the action does not run, and a `failureResult` is recorded — exactly the desired behavior.
79
+
80
+ If upstream adds a pre-action hook with proper abort semantics in a future release, this plugin will migrate to it and the load-order caveat goes away.
81
+
82
+ ## License
83
+
84
+ MIT
package/package.json ADDED
@@ -0,0 +1,35 @@
1
+ {
2
+ "name": "blackwall-eliza-guardrail",
3
+ "version": "0.1.0",
4
+ "description": "BLACK_WALL pre-action guardrail for ElizaOS agents. Wraps every action handler with a forecast() check so STOP-rated actions abort before they run.",
5
+ "type": "module",
6
+ "main": "./src/index.mjs",
7
+ "exports": {
8
+ ".": "./src/index.mjs"
9
+ },
10
+ "files": [
11
+ "src",
12
+ "README.md"
13
+ ],
14
+ "scripts": {
15
+ "check": "node --check src/index.mjs"
16
+ },
17
+ "keywords": [
18
+ "elizaos",
19
+ "eliza-plugin",
20
+ "ai-agents",
21
+ "guardrails",
22
+ "ai-safety",
23
+ "blackwall"
24
+ ],
25
+ "license": "MIT",
26
+ "dependencies": {
27
+ "blackwall-mcp": "^1.1.0"
28
+ },
29
+ "peerDependencies": {
30
+ "@elizaos/core": ">=1.0.0"
31
+ },
32
+ "engines": {
33
+ "node": ">=18"
34
+ }
35
+ }
package/src/index.mjs ADDED
@@ -0,0 +1,294 @@
1
+ /**
2
+ * @blackwall/eliza-guardrail
3
+ * -------------------------
4
+ * ElizaOS plugin that puts BLACK_WALL in front of every action the agent can
5
+ * take. At init time we walk `runtime.actions[]` and replace each handler with
6
+ * a wrapper that:
7
+ *
8
+ * 1. calls forecast() with the action name + args
9
+ * 2. in `enforce` mode, throws if the verdict is STOP (Eliza's dispatcher
10
+ * catches the throw and converts it to a failureResult — clean abort)
11
+ * 3. runs the original handler
12
+ * 4. calls observe() with the actual outcome (matched / diverged / aborted)
13
+ *
14
+ * The HOOK_TOOL_BEFORE event Eliza declares in its EventType enum is NEVER
15
+ * emitted in the runtime (verified 2026-05-28). Handler-wrap at init is the
16
+ * only path that actually gates execution today.
17
+ *
18
+ * Load order matters: list this plugin LAST so it wraps every action other
19
+ * plugins contributed.
20
+ */
21
+
22
+ import { forecast, observe } from 'blackwall-mcp/lib';
23
+
24
+ // Cap how big a parameter blob we ship to forecast(). Large prompts / file
25
+ // payloads can balloon a single observe call; the verdict only needs enough
26
+ // signal to reason about the action, not the full attachment.
27
+ const DEFAULT_MAX_INPUT_BYTES = 8 * 1024;
28
+
29
+ /**
30
+ * @typedef {'enforce' | 'observe'} GuardrailMode
31
+ */
32
+
33
+ /**
34
+ * @typedef {Object} BlackwallGuardrailConfig
35
+ * @property {string} [apiKey] BLACK_WALL API key. Defaults to env BLACKWALL_API_KEY.
36
+ * @property {string} [baseUrl] Defaults to env BLACKWALL_BASE_URL or https://blackwalltier.com.
37
+ * @property {GuardrailMode} [mode] 'observe' (default — log only, never abort) or 'enforce' (throw on STOP).
38
+ * @property {(actionName: string) => boolean} [shouldGate] Per-action opt-out. Return false to skip wrapping.
39
+ * @property {number} [maxInputBytes] Truncate forecast() inputs payload over this size. Default 8KB.
40
+ * @property {(event: GuardrailEvent) => void} [onEvent] Telemetry hook (logged on STOP, error, observe failure, etc.).
41
+ */
42
+
43
+ /**
44
+ * @typedef {Object} GuardrailEvent
45
+ * @property {'wrapped'|'forecast_error'|'stop'|'observe_error'|'skipped'|'init'} type
46
+ * @property {string} [actionName]
47
+ * @property {string} [forecastId]
48
+ * @property {string} [recommendation]
49
+ * @property {unknown} [error]
50
+ * @property {Record<string, any>} [extra]
51
+ */
52
+
53
+ /**
54
+ * Resolve config at plugin construction time. Each field falls back to env so
55
+ * the simplest install is `plugins: [blackwallGuardrail()]` with env set.
56
+ *
57
+ * @param {BlackwallGuardrailConfig} [config]
58
+ */
59
+ function resolveConfig(config = {}) {
60
+ const mode = (config.mode ?? process.env.BLACKWALL_MODE ?? 'observe').toLowerCase();
61
+ return {
62
+ apiKey: config.apiKey ?? process.env.BLACKWALL_API_KEY,
63
+ baseUrl: config.baseUrl ?? process.env.BLACKWALL_BASE_URL,
64
+ mode: mode === 'enforce' ? 'enforce' : 'observe',
65
+ shouldGate: typeof config.shouldGate === 'function' ? config.shouldGate : () => true,
66
+ maxInputBytes: typeof config.maxInputBytes === 'number' ? config.maxInputBytes : DEFAULT_MAX_INPUT_BYTES,
67
+ onEvent: typeof config.onEvent === 'function' ? config.onEvent : null,
68
+ };
69
+ }
70
+
71
+ /**
72
+ * Best-effort extraction of structured args from Eliza's variable handler
73
+ * signature. Across versions Eliza has passed parameters via options.parameters,
74
+ * options.args, or by reaching into the inbound message. Try them all; fall
75
+ * back to {} rather than failing the whole wrap.
76
+ */
77
+ function extractActionInputs(actionName, message, opts) {
78
+ const fromOpts =
79
+ opts && typeof opts === 'object'
80
+ ? opts.parameters ?? opts.args ?? opts.input ?? null
81
+ : null;
82
+ if (fromOpts && typeof fromOpts === 'object') return fromOpts;
83
+
84
+ const fromMessage = message?.content?.metadata?.parameters;
85
+ if (fromMessage && typeof fromMessage === 'object') return fromMessage;
86
+
87
+ return {};
88
+ }
89
+
90
+ /**
91
+ * Trim a payload if its serialized form exceeds `maxBytes`. We keep the
92
+ * top-level keys but replace overly-long string values with a marker so the
93
+ * forecast still sees the *shape* of the action.
94
+ */
95
+ function truncateInputs(inputs, maxBytes) {
96
+ let serialized;
97
+ try {
98
+ serialized = JSON.stringify(inputs);
99
+ } catch {
100
+ return { _truncated: true, _reason: 'unserializable' };
101
+ }
102
+ if (serialized.length <= maxBytes) return inputs;
103
+
104
+ if (Array.isArray(inputs)) {
105
+ return { _truncated: true, _length: inputs.length, _byteSize: serialized.length };
106
+ }
107
+ if (typeof inputs !== 'object' || inputs === null) {
108
+ return { _truncated: true, _byteSize: serialized.length };
109
+ }
110
+ const trimmed = {};
111
+ for (const [k, v] of Object.entries(inputs)) {
112
+ if (typeof v === 'string' && v.length > 200) {
113
+ trimmed[k] = `${v.slice(0, 200)}…<truncated ${v.length} chars>`;
114
+ } else {
115
+ trimmed[k] = v;
116
+ }
117
+ }
118
+ trimmed._truncated = true;
119
+ trimmed._original_bytes = serialized.length;
120
+ return trimmed;
121
+ }
122
+
123
+ function emit(onEvent, event) {
124
+ if (!onEvent) return;
125
+ try {
126
+ onEvent(event);
127
+ } catch {
128
+ /* never let a broken telemetry hook take down the wrap */
129
+ }
130
+ }
131
+
132
+ /**
133
+ * Wrap a single action's `.handler` with a forecast/observe envelope.
134
+ * Returns the patched action object (mutated in place; returned for clarity).
135
+ */
136
+ function wrapActionHandler(action, cfg, logger) {
137
+ const original = action.handler;
138
+ if (typeof original !== 'function') {
139
+ emit(cfg.onEvent, { type: 'skipped', actionName: action?.name, extra: { reason: 'no-handler' } });
140
+ return action;
141
+ }
142
+ if (!cfg.shouldGate(action.name)) {
143
+ emit(cfg.onEvent, { type: 'skipped', actionName: action.name, extra: { reason: 'opt-out' } });
144
+ return action;
145
+ }
146
+
147
+ action.handler = async function blackwallWrappedHandler(runtime, message, state, opts, callback, responses) {
148
+ const inputs = truncateInputs(extractActionInputs(action.name, message, opts), cfg.maxInputBytes);
149
+ const context = {
150
+ ...(runtime?.character?.name ? { agent_role: runtime.character.name } : {}),
151
+ ...(message?.content?.text ? { user_intent: message.content.text } : {}),
152
+ source: 'elizaos',
153
+ };
154
+
155
+ let verdict;
156
+ try {
157
+ verdict = await forecast(
158
+ { action: action.name, inputs, context },
159
+ { apiKey: cfg.apiKey, baseUrl: cfg.baseUrl }
160
+ );
161
+ } catch (err) {
162
+ // Fail-open: never let a BLACK_WALL outage break the agent. Log and let
163
+ // the action proceed. Operators can switch to enforce-strict in a future
164
+ // version if they want fail-closed semantics.
165
+ logger?.warn?.(`[blackwall-guardrail] forecast() failed for action "${action.name}" — proceeding without gate: ${err?.message ?? err}`);
166
+ emit(cfg.onEvent, { type: 'forecast_error', actionName: action.name, error: err });
167
+ return original.call(this, runtime, message, state, opts, callback, responses);
168
+ }
169
+
170
+ if (cfg.mode === 'enforce' && verdict?.recommendation === 'STOP') {
171
+ emit(cfg.onEvent, {
172
+ type: 'stop',
173
+ actionName: action.name,
174
+ forecastId: verdict?.id,
175
+ recommendation: verdict.recommendation,
176
+ });
177
+
178
+ // Best-effort observation that we obeyed the STOP. Don't await — the
179
+ // throw must hit Eliza's dispatcher promptly.
180
+ if (verdict?.id) {
181
+ observe(
182
+ verdict.id,
183
+ { outcome_class: 'aborted', divergence_severity: 'none', details: 'blocked by enforce-mode guardrail' },
184
+ { apiKey: cfg.apiKey, baseUrl: cfg.baseUrl, reportedVia: 'eliza_guardrail' }
185
+ ).catch((err) => {
186
+ logger?.warn?.(`[blackwall-guardrail] observe(aborted) failed: ${err?.message ?? err}`);
187
+ emit(cfg.onEvent, { type: 'observe_error', actionName: action.name, forecastId: verdict.id, error: err });
188
+ });
189
+ }
190
+
191
+ const flagCodes = Array.isArray(verdict?.red_flags)
192
+ ? verdict.red_flags.map((f) => f?.code).filter(Boolean).join(', ')
193
+ : '';
194
+ throw new Error(
195
+ `BLACK_WALL blocked action "${action.name}": ${verdict?.recommendation}${flagCodes ? ` (${flagCodes})` : ''}`
196
+ );
197
+ }
198
+
199
+ // observe mode (or non-STOP verdict): run the action, observe the outcome.
200
+ let outcome = 'matched';
201
+ let observeDetails;
202
+ let actionError;
203
+ try {
204
+ const result = await original.call(this, runtime, message, state, opts, callback, responses);
205
+ if (verdict?.id) {
206
+ observe(
207
+ verdict.id,
208
+ { outcome_class: outcome },
209
+ { apiKey: cfg.apiKey, baseUrl: cfg.baseUrl, reportedVia: 'eliza_guardrail' }
210
+ ).catch((err) => {
211
+ logger?.warn?.(`[blackwall-guardrail] observe(${outcome}) failed: ${err?.message ?? err}`);
212
+ emit(cfg.onEvent, { type: 'observe_error', actionName: action.name, forecastId: verdict.id, error: err });
213
+ });
214
+ }
215
+ return result;
216
+ } catch (err) {
217
+ outcome = 'diverged';
218
+ observeDetails = String(err?.message ?? err).slice(0, 500);
219
+ actionError = err;
220
+ if (verdict?.id) {
221
+ observe(
222
+ verdict.id,
223
+ { outcome_class: outcome, divergence_severity: 'medium', details: observeDetails },
224
+ { apiKey: cfg.apiKey, baseUrl: cfg.baseUrl, reportedVia: 'eliza_guardrail' }
225
+ ).catch((obErr) => {
226
+ logger?.warn?.(`[blackwall-guardrail] observe(diverged) failed: ${obErr?.message ?? obErr}`);
227
+ emit(cfg.onEvent, { type: 'observe_error', actionName: action.name, forecastId: verdict.id, error: obErr });
228
+ });
229
+ }
230
+ throw actionError;
231
+ }
232
+ };
233
+
234
+ emit(cfg.onEvent, { type: 'wrapped', actionName: action.name });
235
+ return action;
236
+ }
237
+
238
+ /**
239
+ * Plugin factory. Returns an object matching the Eliza Plugin contract
240
+ * (`isValidPluginShape`): `name` + at least one of init/actions/services/etc.
241
+ *
242
+ * import { blackwallGuardrail } from '@blackwall/eliza-guardrail';
243
+ * export const character = {
244
+ * plugins: [
245
+ * ...otherPlugins,
246
+ * blackwallGuardrail({ mode: 'enforce' }), // <-- LAST
247
+ * ],
248
+ * };
249
+ *
250
+ * @param {BlackwallGuardrailConfig} [config]
251
+ */
252
+ export function blackwallGuardrail(config = {}) {
253
+ const cfg = resolveConfig(config);
254
+
255
+ return {
256
+ name: 'blackwall-guardrail',
257
+ description:
258
+ 'BLACK_WALL pre-action guardrail — wraps every registered action handler with a ' +
259
+ 'forecast() check and (in enforce mode) aborts STOP-rated actions before they run.',
260
+ init: async (runtime) => {
261
+ const logger = runtime?.logger ?? console;
262
+ if (!cfg.apiKey) {
263
+ logger.warn?.(
264
+ '[blackwall-guardrail] No apiKey configured (set BLACKWALL_API_KEY or pass { apiKey } to blackwallGuardrail()). ' +
265
+ 'Plugin will load but every forecast() call will fail and fall through.'
266
+ );
267
+ }
268
+ const actions = runtime?.actions;
269
+ if (!Array.isArray(actions) || actions.length === 0) {
270
+ logger.warn?.(
271
+ '[blackwall-guardrail] runtime.actions is empty at init time. ' +
272
+ 'List @blackwall/eliza-guardrail LAST in your plugins array so other action-contributing plugins register first.'
273
+ );
274
+ emit(cfg.onEvent, { type: 'init', extra: { wrapped: 0, mode: cfg.mode } });
275
+ return;
276
+ }
277
+
278
+ let wrapped = 0;
279
+ for (const action of actions) {
280
+ if (action && typeof action === 'object') {
281
+ wrapActionHandler(action, cfg, logger);
282
+ wrapped += 1;
283
+ }
284
+ }
285
+ logger.info?.(`[blackwall-guardrail] wrapped ${wrapped} action handler(s) · mode=${cfg.mode}`);
286
+ emit(cfg.onEvent, { type: 'init', extra: { wrapped, mode: cfg.mode } });
287
+ },
288
+ };
289
+ }
290
+
291
+ // Default export as a pre-constructed plugin for the most common case: env-based
292
+ // config, observe mode. `import blackwallGuardrail from '@blackwall/eliza-guardrail'`
293
+ // works as a drop-in plugin instance.
294
+ export default blackwallGuardrail();