npm - agent-guard-openclaw - Versions diffs - 0.5.0 - Mend

agent-guard-openclaw 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/README.md ADDED Viewed

@@ -0,0 +1,55 @@
+# agent-guard-openclaw
+Automatic prompt-injection screening for [OpenCLAW](https://openclaw.ai) tool
+calls. This is the OpenCLAW plugin half of
+[`agent-guard-plugins`](https://github.com/dannyliv/agent-guard-plugins).
+## What it does
+Once installed, OpenCLAW discovers and activates this plugin automatically
+(`activation.onStartup: true`). It registers a `before_tool_call` hook that
+runs on every tool call. The hook collects the tool's textual params (web page
+text, search results, email body, GitHub issue text, MCP tool output, and
+similar untrusted content) and screens them with the Content Guard engine from
+`agent-guard-plugins`. Risky content blocks the tool call before the agent
+acts on it. Trusted / authorized channels are skipped per your Content Guard
+config.
+No manual wrapping. No AGENTS.md step. Install it and it is active.
+## Install
+The plugin needs both the npm package (the OpenCLAW seam) and the Python
+package (the screening engine):
+```bash
+# Python screening engine — provides the `agent-guard-openclaw` bridge
+pip install agent-guard-plugins
+# OpenCLAW plugin — auto-registers the before_tool_call hook
+openclaw plugins install agent-guard-openclaw
+```
+## Configuration
+Screening policy (trust list, block threshold, block/warn mode) lives in the
+Content Guard config file at `~/.agent-guard/content_guard.toml`. See the
+`agent-guard-plugins` README for that file's schema. The plugin itself reads:
+| Env var                          | Default   | Purpose                                                          |
+| --------------------------------- | --------- | ---------------------------------------------------------------- |
+| `AGENT_GUARD_OPENCLAW_DISABLED`   | unset     | `1`/`true` loads the plugin but screens nothing (kill switch).   |
+| `AGENT_GUARD_PYTHON`              | `python3` | Python executable that has `agent_guard_plugins` installed.      |
+| `AGENT_GUARD_OPENCLAW_TIMEOUT_MS` | `15000`   | Per-tool-call budget for the screening subprocess.               |
+## Fail-open
+If the screening bridge cannot run (Python missing, model load failure,
+timeout), the hook returns no decision and the tool call proceeds. A broken
+guard never wedges the agent.
+## Maintainer and license
+Maintained by [@dannyliv](https://github.com/dannyliv). Report issues or
+vulnerabilities on the [main repository](https://github.com/dannyliv/agent-guard-plugins).
+Licensed under Apache-2.0.

package/index.mjs ADDED Viewed

@@ -0,0 +1,154 @@
+// Agent Guard — OpenCLAW plugin (automatic prompt-injection screening).
+//
+// This is the auto-wiring layer the OpenCLAW maintainers' own feedback asked
+// for: "build the OpenCLAW plugin that hooks into before_tool_call or wraps
+// the web fetch/search tool results". Once this package is installed
+// (`openclaw plugins install agent-guard-openclaw`), OpenCLAW discovers it via
+// the `openclaw` field in package.json + the openclaw.plugin.json manifest,
+// and `activation.onStartup: true` activates it. No manual wrapping, no
+// AGENTS.md step.
+//
+// What it does: registers a `before_tool_call` hook. On every tool call,
+// it collects the textual params (the untrusted content: web page text,
+// search results, email body, GitHub issue text, MCP tool output, ...) and
+// runs the agent-guard Content Guard screening engine on them. Risky content
+// blocks the tool call; trusted/authorized channels are skipped per the
+// existing ContentGuardConfig at ~/.agent-guard/content_guard.toml.
+//
+// Content Guard is Python; this plugin is the Node.js seam OpenCLAW loads. It
+// bridges to Python by spawning the `agent-guard-openclaw` console script
+// shipped by the `agent-guard-plugins` Python package (one short-lived process
+// per tool call, JSON in / JSON out).
+import { definePluginEntry } from "openclaw/plugin-sdk/plugin-entry";
+import { execFileSync } from "node:child_process";
+// ---------------------------------------------------------------------------
+// Configuration (all optional — sensible defaults, fully overridable)
+// ---------------------------------------------------------------------------
+//
+// AGENT_GUARD_OPENCLAW_DISABLED   "1"/"true" -> plugin loads but screens
+//                                 nothing (kill switch; not forced on users).
+// AGENT_GUARD_PYTHON              python executable that has
+//                                 agent_guard_plugins installed. Default
+//                                 "python3".
+// AGENT_GUARD_OPENCLAW_TIMEOUT_MS per-call budget for the Python bridge.
+//                                 Default 15000.
+//
+// Tool names treated as web-sourced (always screened even if the source is on
+// the authorized-channels trust list). Matched as case-insensitive substrings
+// so e.g. "web_search", "web_fetch", "browser_fetch_url" all qualify.
+const WEB_TOOL_HINTS = [
+  "web",
+  "fetch",
+  "search",
+  "browse",
+  "url",
+  "http",
+  "crawl",
+  "scrape",
+];
+function isDisabled() {
+  const v = String(process.env.AGENT_GUARD_OPENCLAW_DISABLED ?? "")
+    .trim()
+    .toLowerCase();
+  return v === "1" || v === "true" || v === "yes" || v === "on";
+}
+function looksWebSourced(toolName) {
+  const name = String(toolName ?? "").toLowerCase();
+  return WEB_TOOL_HINTS.some((hint) => name.includes(hint));
+}
+// Collect the untrusted textual content from a tool call's params. Strings
+// (and strings nested one level inside arrays) are the injection surface;
+// numbers / booleans / structural keys are ignored.
+function collectTextParts(params) {
+  const parts = [];
+  for (const value of Object.values(params ?? {})) {
+    if (typeof value === "string") {
+      if (value) parts.push(value);
+    } else if (Array.isArray(value)) {
+      for (const item of value) {
+        if (typeof item === "string" && item) parts.push(item);
+      }
+    }
+  }
+  return parts;
+}
+// One-shot call into the Python Content Guard bridge. Returns the verdict
+// object. Fails OPEN: any spawn/parse error returns a non-blocking verdict so
+// a broken guard never wedges the agent.
+function screenViaPython(parts, toolName, web) {
+  const python = process.env.AGENT_GUARD_PYTHON || "python3";
+  const timeoutMs = Number(
+    process.env.AGENT_GUARD_OPENCLAW_TIMEOUT_MS || 15000,
+  );
+  const request = JSON.stringify({
+    parts,
+    tool_name: toolName ?? "unknown",
+    web: Boolean(web),
+  });
+  try {
+    const out = execFileSync(
+      python,
+      ["-m", "agent_guard_plugins.integrations.openclaw_bridge"],
+      {
+        input: request,
+        encoding: "utf8",
+        timeout: timeoutMs,
+        env: { ...process.env },
+        maxBuffer: 4 * 1024 * 1024,
+      },
+    );
+    const verdict = JSON.parse(out.trim().split("\n").pop() || "{}");
+    return verdict;
+  } catch (err) {
+    // Fail open. Surface the reason on the verdict for diagnostics only.
+    return {
+      ok: false,
+      block: false,
+      blockReason: "",
+      error: `agent-guard bridge failed: ${err?.message ?? err}`,
+    };
+  }
+}
+export default definePluginEntry({
+  id: "agent-guard",
+  name: "Agent Guard",
+  description:
+    "Automatic prompt-injection screening for OpenCLAW tool calls. " +
+    "Screens web fetch/search results and other untrusted tool content " +
+    "before the agent acts on them.",
+  register(api) {
+    api.on(
+      "before_tool_call",
+      (event) => {
+        if (isDisabled()) return; // kill switch — observe nothing, block nothing
+        const toolName = event?.toolName ?? "unknown";
+        const parts = collectTextParts(event?.params);
+        if (parts.length === 0) return; // no text to screen — allow
+        const web = looksWebSourced(toolName);
+        const verdict = screenViaPython(parts, toolName, web);
+        if (verdict?.block === true) {
+          return {
+            block: true,
+            blockReason:
+              verdict.blockReason ||
+              `agent-guard blocked tool '${toolName}': flagged as a possible ` +
+                `prompt-injection attempt`,
+          };
+        }
+        // allow / warn-mode / trusted / fail-open -> return nothing
+      },
+      // Run early so a malicious tool call is screened before lower-priority
+      // hooks observe it.
+      { priority: 90 },
+    );
+  },
+});

package/openclaw.plugin.json ADDED Viewed

@@ -0,0 +1,27 @@
+{
+  "id": "agent-guard",
+  "name": "Agent Guard",
+  "description": "Automatic prompt-injection screening for OpenCLAW tool calls. Screens web fetch/search results and other untrusted tool content before the agent acts on them.",
+  "version": "0.4.0",
+  "activation": {
+    "onStartup": true
+  },
+  "configSchema": {
+    "type": "object",
+    "additionalProperties": false,
+    "properties": {
+      "disabled": {
+        "type": "boolean",
+        "description": "When true, the plugin loads but screens nothing. Equivalent to the AGENT_GUARD_OPENCLAW_DISABLED env var. Screening is on by default but not forced."
+      },
+      "python": {
+        "type": "string",
+        "description": "Path to the Python executable that has agent-guard-plugins installed. Defaults to AGENT_GUARD_PYTHON or python3."
+      },
+      "timeoutMs": {
+        "type": "integer",
+        "description": "Per-tool-call budget for the Content Guard screening subprocess. Defaults to 15000."
+      }
+    }
+  }
+}

package/package.json ADDED Viewed

@@ -0,0 +1,50 @@
+{
+  "name": "agent-guard-openclaw",
+  "version": "0.5.0",
+  "description": "Automatic prompt-injection screening for OpenCLAW tool calls. Auto-registers a before_tool_call hook that screens web fetch/search results and other untrusted tool content.",
+  "type": "module",
+  "license": "Apache-2.0",
+  "author": "dannyliv",
+  "homepage": "https://github.com/dannyliv/agent-guard-plugins",
+  "repository": {
+    "type": "git",
+    "url": "https://github.com/dannyliv/agent-guard-plugins.git",
+    "directory": "openclaw-plugin"
+  },
+  "keywords": [
+    "openclaw",
+    "openclaw-plugin",
+    "prompt-injection",
+    "security",
+    "agent-guard"
+  ],
+  "main": "./index.mjs",
+  "exports": {
+    ".": "./index.mjs"
+  },
+  "scripts": {
+    "test": "node --test \"test/**/*.test.mjs\""
+  },
+  "files": [
+    "index.mjs",
+    "openclaw.plugin.json",
+    "README.md"
+  ],
+  "engines": {
+    "node": ">=22"
+  },
+  "openclaw": {
+    "extensions": ["./index.mjs"],
+    "compat": {
+      "pluginApi": ">=2026.3.24-beta.2",
+      "minGatewayVersion": "2026.3.24-beta.2"
+    },
+    "build": {
+      "openclawVersion": "2026.5.7",
+      "pluginSdkVersion": "2026.5.7"
+    }
+  },
+  "peerDependencies": {
+    "openclaw": ">=2026.3.24-beta.2"
+  }
+}