agent-guard-openclaw 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md ADDED
@@ -0,0 +1,55 @@
1
+ # agent-guard-openclaw
2
+
3
+ Automatic prompt-injection screening for [OpenCLAW](https://openclaw.ai) tool
4
+ calls. This is the OpenCLAW plugin half of
5
+ [`agent-guard-plugins`](https://github.com/dannyliv/agent-guard-plugins).
6
+
7
+ ## What it does
8
+
9
+ Once installed, OpenCLAW discovers and activates this plugin automatically
10
+ (`activation.onStartup: true`). It registers a `before_tool_call` hook that
11
+ runs on every tool call. The hook collects the tool's textual params (web page
12
+ text, search results, email body, GitHub issue text, MCP tool output, and
13
+ similar untrusted content) and screens them with the Content Guard engine from
14
+ `agent-guard-plugins`. Risky content blocks the tool call before the agent
15
+ acts on it. Trusted / authorized channels are skipped per your Content Guard
16
+ config.
17
+
18
+ No manual wrapping. No AGENTS.md step. Install it and it is active.
19
+
20
+ ## Install
21
+
22
+ The plugin needs both the npm package (the OpenCLAW seam) and the Python
23
+ package (the screening engine):
24
+
25
+ ```bash
26
+ # Python screening engine — provides the `agent-guard-openclaw` bridge
27
+ pip install agent-guard-plugins
28
+
29
+ # OpenCLAW plugin — auto-registers the before_tool_call hook
30
+ openclaw plugins install agent-guard-openclaw
31
+ ```
32
+
33
+ ## Configuration
34
+
35
+ Screening policy (trust list, block threshold, block/warn mode) lives in the
36
+ Content Guard config file at `~/.agent-guard/content_guard.toml`. See the
37
+ `agent-guard-plugins` README for that file's schema. The plugin itself reads:
38
+
39
+ | Env var | Default | Purpose |
40
+ | --------------------------------- | --------- | ---------------------------------------------------------------- |
41
+ | `AGENT_GUARD_OPENCLAW_DISABLED` | unset | `1`/`true` loads the plugin but screens nothing (kill switch). |
42
+ | `AGENT_GUARD_PYTHON` | `python3` | Python executable that has `agent_guard_plugins` installed. |
43
+ | `AGENT_GUARD_OPENCLAW_TIMEOUT_MS` | `15000` | Per-tool-call budget for the screening subprocess. |
44
+
45
+ ## Fail-open
46
+
47
+ If the screening bridge cannot run (Python missing, model load failure,
48
+ timeout), the hook returns no decision and the tool call proceeds. A broken
49
+ guard never wedges the agent.
50
+
51
+ ## Maintainer and license
52
+
53
+ Maintained by [@dannyliv](https://github.com/dannyliv). Report issues or
54
+ vulnerabilities on the [main repository](https://github.com/dannyliv/agent-guard-plugins).
55
+ Licensed under Apache-2.0.
package/index.mjs ADDED
@@ -0,0 +1,154 @@
1
+ // Agent Guard — OpenCLAW plugin (automatic prompt-injection screening).
2
+ //
3
+ // This is the auto-wiring layer the OpenCLAW maintainers' own feedback asked
4
+ // for: "build the OpenCLAW plugin that hooks into before_tool_call or wraps
5
+ // the web fetch/search tool results". Once this package is installed
6
+ // (`openclaw plugins install agent-guard-openclaw`), OpenCLAW discovers it via
7
+ // the `openclaw` field in package.json + the openclaw.plugin.json manifest,
8
+ // and `activation.onStartup: true` activates it. No manual wrapping, no
9
+ // AGENTS.md step.
10
+ //
11
+ // What it does: registers a `before_tool_call` hook. On every tool call,
12
+ // it collects the textual params (the untrusted content: web page text,
13
+ // search results, email body, GitHub issue text, MCP tool output, ...) and
14
+ // runs the agent-guard Content Guard screening engine on them. Risky content
15
+ // blocks the tool call; trusted/authorized channels are skipped per the
16
+ // existing ContentGuardConfig at ~/.agent-guard/content_guard.toml.
17
+ //
18
+ // Content Guard is Python; this plugin is the Node.js seam OpenCLAW loads. It
19
+ // bridges to Python by spawning the `agent-guard-openclaw` console script
20
+ // shipped by the `agent-guard-plugins` Python package (one short-lived process
21
+ // per tool call, JSON in / JSON out).
22
+ import { definePluginEntry } from "openclaw/plugin-sdk/plugin-entry";
23
+ import { execFileSync } from "node:child_process";
24
+
25
+ // ---------------------------------------------------------------------------
26
+ // Configuration (all optional — sensible defaults, fully overridable)
27
+ // ---------------------------------------------------------------------------
28
+ //
29
+ // AGENT_GUARD_OPENCLAW_DISABLED "1"/"true" -> plugin loads but screens
30
+ // nothing (kill switch; not forced on users).
31
+ // AGENT_GUARD_PYTHON python executable that has
32
+ // agent_guard_plugins installed. Default
33
+ // "python3".
34
+ // AGENT_GUARD_OPENCLAW_TIMEOUT_MS per-call budget for the Python bridge.
35
+ // Default 15000.
36
+ //
37
+ // Tool names treated as web-sourced (always screened even if the source is on
38
+ // the authorized-channels trust list). Matched as case-insensitive substrings
39
+ // so e.g. "web_search", "web_fetch", "browser_fetch_url" all qualify.
40
+ const WEB_TOOL_HINTS = [
41
+ "web",
42
+ "fetch",
43
+ "search",
44
+ "browse",
45
+ "url",
46
+ "http",
47
+ "crawl",
48
+ "scrape",
49
+ ];
50
+
51
+ function isDisabled() {
52
+ const v = String(process.env.AGENT_GUARD_OPENCLAW_DISABLED ?? "")
53
+ .trim()
54
+ .toLowerCase();
55
+ return v === "1" || v === "true" || v === "yes" || v === "on";
56
+ }
57
+
58
+ function looksWebSourced(toolName) {
59
+ const name = String(toolName ?? "").toLowerCase();
60
+ return WEB_TOOL_HINTS.some((hint) => name.includes(hint));
61
+ }
62
+
63
+ // Collect the untrusted textual content from a tool call's params. Strings
64
+ // (and strings nested one level inside arrays) are the injection surface;
65
+ // numbers / booleans / structural keys are ignored.
66
+ function collectTextParts(params) {
67
+ const parts = [];
68
+ for (const value of Object.values(params ?? {})) {
69
+ if (typeof value === "string") {
70
+ if (value) parts.push(value);
71
+ } else if (Array.isArray(value)) {
72
+ for (const item of value) {
73
+ if (typeof item === "string" && item) parts.push(item);
74
+ }
75
+ }
76
+ }
77
+ return parts;
78
+ }
79
+
80
+ // One-shot call into the Python Content Guard bridge. Returns the verdict
81
+ // object. Fails OPEN: any spawn/parse error returns a non-blocking verdict so
82
+ // a broken guard never wedges the agent.
83
+ function screenViaPython(parts, toolName, web) {
84
+ const python = process.env.AGENT_GUARD_PYTHON || "python3";
85
+ const timeoutMs = Number(
86
+ process.env.AGENT_GUARD_OPENCLAW_TIMEOUT_MS || 15000,
87
+ );
88
+ const request = JSON.stringify({
89
+ parts,
90
+ tool_name: toolName ?? "unknown",
91
+ web: Boolean(web),
92
+ });
93
+ try {
94
+ const out = execFileSync(
95
+ python,
96
+ ["-m", "agent_guard_plugins.integrations.openclaw_bridge"],
97
+ {
98
+ input: request,
99
+ encoding: "utf8",
100
+ timeout: timeoutMs,
101
+ env: { ...process.env },
102
+ maxBuffer: 4 * 1024 * 1024,
103
+ },
104
+ );
105
+ const verdict = JSON.parse(out.trim().split("\n").pop() || "{}");
106
+ return verdict;
107
+ } catch (err) {
108
+ // Fail open. Surface the reason on the verdict for diagnostics only.
109
+ return {
110
+ ok: false,
111
+ block: false,
112
+ blockReason: "",
113
+ error: `agent-guard bridge failed: ${err?.message ?? err}`,
114
+ };
115
+ }
116
+ }
117
+
118
+ export default definePluginEntry({
119
+ id: "agent-guard",
120
+ name: "Agent Guard",
121
+ description:
122
+ "Automatic prompt-injection screening for OpenCLAW tool calls. " +
123
+ "Screens web fetch/search results and other untrusted tool content " +
124
+ "before the agent acts on them.",
125
+ register(api) {
126
+ api.on(
127
+ "before_tool_call",
128
+ (event) => {
129
+ if (isDisabled()) return; // kill switch — observe nothing, block nothing
130
+
131
+ const toolName = event?.toolName ?? "unknown";
132
+ const parts = collectTextParts(event?.params);
133
+ if (parts.length === 0) return; // no text to screen — allow
134
+
135
+ const web = looksWebSourced(toolName);
136
+ const verdict = screenViaPython(parts, toolName, web);
137
+
138
+ if (verdict?.block === true) {
139
+ return {
140
+ block: true,
141
+ blockReason:
142
+ verdict.blockReason ||
143
+ `agent-guard blocked tool '${toolName}': flagged as a possible ` +
144
+ `prompt-injection attempt`,
145
+ };
146
+ }
147
+ // allow / warn-mode / trusted / fail-open -> return nothing
148
+ },
149
+ // Run early so a malicious tool call is screened before lower-priority
150
+ // hooks observe it.
151
+ { priority: 90 },
152
+ );
153
+ },
154
+ });
@@ -0,0 +1,27 @@
1
+ {
2
+ "id": "agent-guard",
3
+ "name": "Agent Guard",
4
+ "description": "Automatic prompt-injection screening for OpenCLAW tool calls. Screens web fetch/search results and other untrusted tool content before the agent acts on them.",
5
+ "version": "0.4.0",
6
+ "activation": {
7
+ "onStartup": true
8
+ },
9
+ "configSchema": {
10
+ "type": "object",
11
+ "additionalProperties": false,
12
+ "properties": {
13
+ "disabled": {
14
+ "type": "boolean",
15
+ "description": "When true, the plugin loads but screens nothing. Equivalent to the AGENT_GUARD_OPENCLAW_DISABLED env var. Screening is on by default but not forced."
16
+ },
17
+ "python": {
18
+ "type": "string",
19
+ "description": "Path to the Python executable that has agent-guard-plugins installed. Defaults to AGENT_GUARD_PYTHON or python3."
20
+ },
21
+ "timeoutMs": {
22
+ "type": "integer",
23
+ "description": "Per-tool-call budget for the Content Guard screening subprocess. Defaults to 15000."
24
+ }
25
+ }
26
+ }
27
+ }
package/package.json ADDED
@@ -0,0 +1,50 @@
1
+ {
2
+ "name": "agent-guard-openclaw",
3
+ "version": "0.5.0",
4
+ "description": "Automatic prompt-injection screening for OpenCLAW tool calls. Auto-registers a before_tool_call hook that screens web fetch/search results and other untrusted tool content.",
5
+ "type": "module",
6
+ "license": "Apache-2.0",
7
+ "author": "dannyliv",
8
+ "homepage": "https://github.com/dannyliv/agent-guard-plugins",
9
+ "repository": {
10
+ "type": "git",
11
+ "url": "https://github.com/dannyliv/agent-guard-plugins.git",
12
+ "directory": "openclaw-plugin"
13
+ },
14
+ "keywords": [
15
+ "openclaw",
16
+ "openclaw-plugin",
17
+ "prompt-injection",
18
+ "security",
19
+ "agent-guard"
20
+ ],
21
+ "main": "./index.mjs",
22
+ "exports": {
23
+ ".": "./index.mjs"
24
+ },
25
+ "scripts": {
26
+ "test": "node --test \"test/**/*.test.mjs\""
27
+ },
28
+ "files": [
29
+ "index.mjs",
30
+ "openclaw.plugin.json",
31
+ "README.md"
32
+ ],
33
+ "engines": {
34
+ "node": ">=22"
35
+ },
36
+ "openclaw": {
37
+ "extensions": ["./index.mjs"],
38
+ "compat": {
39
+ "pluginApi": ">=2026.3.24-beta.2",
40
+ "minGatewayVersion": "2026.3.24-beta.2"
41
+ },
42
+ "build": {
43
+ "openclawVersion": "2026.5.7",
44
+ "pluginSdkVersion": "2026.5.7"
45
+ }
46
+ },
47
+ "peerDependencies": {
48
+ "openclaw": ">=2026.3.24-beta.2"
49
+ }
50
+ }