openguardrails-instrumentation-openclaw 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md ADDED
@@ -0,0 +1,81 @@
1
+ # openguardrails-instrumentation-openclaw
2
+
3
+ Guard an [OpenClaw](https://github.com/openclaw/openclaw) assistant through the
4
+ **OpenGuardrails (OGR)** protocol — a vendor-neutral enforcement layer for AI
5
+ agent safety & security. It's the multi-channel counterpart of
6
+ [`openguardrails-instrumentation-opencode`](../instrumentation-opencode).
7
+
8
+ **No OpenClaw core changes.** This is a pure plugin built on OpenClaw's
9
+ in-process [plugin hooks](https://docs.openclaw.ai/plugins/hooks). It is
10
+ *restrict-only*: it can stop a would-run tool call or a would-send message,
11
+ never loosen one.
12
+
13
+ ## What it does
14
+
15
+ Each hooked event becomes an OGR `GuardEvent`, runs through a `Runtime` built
16
+ from **your own policy** (deterministic text/regex rules, plus optionally your
17
+ own model as an LLM judge), and the resulting `Verdict` is enforced:
18
+
19
+ | Hook | `allow` / `modify` / `redact` | `block` | `require_approval` |
20
+ | --- | --- | --- | --- |
21
+ | **`before_tool_call`** | proceed | `{ block }` | `{ requireApproval }` — native `/approve` human gate |
22
+ | **`message_sending`** (outbound) | deliver | `{ cancel }` | `{ cancel }` |
23
+
24
+ The human-confirm gate and enforcement stay **privilege-separated**: the plugin
25
+ *decides*, the user *approves*, the host *enforces*.
26
+
27
+ ## Install
28
+
29
+ ```bash
30
+ openclaw plugins install clawhub:openguardrails
31
+ # or, during the npm cutover:
32
+ openclaw plugins install openguardrails-instrumentation-openclaw
33
+ ```
34
+
35
+ ## Configure
36
+
37
+ The assistant configures its **own** guardrails. Resolution order (low → high):
38
+
39
+ 1. A safe default policy (curl-pipe-to-sh, `rm -rf /`, secret-file reads, …).
40
+ 2. `<workspace>/openguardrails.json` — an OGR `policy.json` the assistant can
41
+ edit to give itself guardrails. Override the path with `policyPath` or the
42
+ `OPENGUARDRAILS_POLICY` env var.
43
+ 3. Inline plugin config (highest precedence), in your OpenClaw config:
44
+
45
+ ```json
46
+ {
47
+ "plugins": {
48
+ "entries": {
49
+ "openguardrails": {
50
+ "config": {
51
+ "judge": {
52
+ "baseURL": "http://localhost:11434/v1",
53
+ "model": "your-guard-model",
54
+ "apiKey": "..."
55
+ },
56
+ "guardMessages": true
57
+ }
58
+ }
59
+ }
60
+ }
61
+ }
62
+ ```
63
+
64
+ `judge` points the LLM-judge detector at any OpenAI-compatible chat endpoint —
65
+ the same model the assistant already uses, a cheaper sibling, or a dedicated
66
+ guard model. The policy format is identical across every OGR integration
67
+ (opencode, hermes, python), so one `policy.json` works everywhere.
68
+
69
+ ## Scope & follow-ups
70
+
71
+ - v0.1 treats the principal as trusted. Channel-inbound tainting (untrusted
72
+ message / web / mcp content → untrusted provenance, correlated into the tool
73
+ call) is the next step via `message_received`.
74
+ - Input guardrails on the prompt itself (`before_agent_run`) require the
75
+ operator to set `plugins.entries.openguardrails.hooks.allowConversationAccess`
76
+ — a config flag, not a code change. Not needed for the tool/message
77
+ enforcement above.
78
+
79
+ ## License
80
+
81
+ Apache-2.0
@@ -0,0 +1,33 @@
1
+ import type { Policy } from "@openguardrails/core";
2
+ /** "Use your own model as the guardrail" — any OpenAI-compatible chat endpoint. */
3
+ export interface JudgeConfig {
4
+ baseURL: string;
5
+ model: string;
6
+ apiKey?: string;
7
+ headers?: Record<string, string>;
8
+ }
9
+ /** Plugin config, delivered through OpenClaw `plugins.entries.openguardrails.config`. */
10
+ export interface GuardrailsOptions {
11
+ /** Inline OGR policy (overrides the file + default). */
12
+ policy?: Policy;
13
+ /** Path to a guardrails policy file (defaults to <workspace>/openguardrails.json). */
14
+ policyPath?: string;
15
+ /** Enable the LLM-judge detector backed by your own model. */
16
+ judge?: JudgeConfig;
17
+ /** Also evaluate inbound/outbound channel messages (default true). */
18
+ guardMessages?: boolean;
19
+ }
20
+ /** Default text/regex guardrails — deterministic, no model required. */
21
+ export declare const DEFAULT_POLICY: Policy;
22
+ export interface ResolvedConfig {
23
+ policy: Policy;
24
+ judge?: JudgeConfig;
25
+ guardMessages: boolean;
26
+ }
27
+ /**
28
+ * Resolve the effective policy. `workspaceDir` is the OpenClaw workspace (known
29
+ * at `gateway_start`); when absent we fall back to `process.cwd()` so the plugin
30
+ * still resolves a file during early registration.
31
+ */
32
+ export declare function loadGuardrailsConfig(workspaceDir: string | undefined, options?: GuardrailsOptions): ResolvedConfig;
33
+ //# sourceMappingURL=config.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"config.d.ts","sourceRoot":"","sources":["../src/config.ts"],"names":[],"mappings":"AAmBA,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,sBAAsB,CAAA;AAElD,mFAAmF;AACnF,MAAM,WAAW,WAAW;IAC1B,OAAO,EAAE,MAAM,CAAA;IACf,KAAK,EAAE,MAAM,CAAA;IACb,MAAM,CAAC,EAAE,MAAM,CAAA;IACf,OAAO,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAA;CACjC;AAED,yFAAyF;AACzF,MAAM,WAAW,iBAAiB;IAChC,wDAAwD;IACxD,MAAM,CAAC,EAAE,MAAM,CAAA;IACf,sFAAsF;IACtF,UAAU,CAAC,EAAE,MAAM,CAAA;IACnB,8DAA8D;IAC9D,KAAK,CAAC,EAAE,WAAW,CAAA;IACnB,sEAAsE;IACtE,aAAa,CAAC,EAAE,OAAO,CAAA;CACxB;AAED,wEAAwE;AACxE,eAAO,MAAM,cAAc,EAAE,MA0C5B,CAAA;AAED,MAAM,WAAW,cAAc;IAC7B,MAAM,EAAE,MAAM,CAAA;IACd,KAAK,CAAC,EAAE,WAAW,CAAA;IACnB,aAAa,EAAE,OAAO,CAAA;CACvB;AAED;;;;GAIG;AACH,wBAAgB,oBAAoB,CAAC,YAAY,EAAE,MAAM,GAAG,SAAS,EAAE,OAAO,CAAC,EAAE,iBAAiB,GAAG,cAAc,CAmBlH"}
package/dist/config.js ADDED
@@ -0,0 +1,87 @@
1
+ /**
2
+ * Guardrails configuration for the OpenClaw integration.
3
+ *
4
+ * The assistant configures its OWN guardrails — text + regex rules (no model
5
+ * needed), and optionally its own model as an LLM judge. Resolution order
6
+ * (lowest → highest precedence):
7
+ *
8
+ * 1. a sensible default policy (below)
9
+ * 2. `<workspace>/openguardrails.json` (agent-editable — this is how an
10
+ * assistant gives itself guardrails); path overridable via plugin config
11
+ * `policyPath` or the `OPENGUARDRAILS_POLICY` env var
12
+ * 3. inline plugin config `policy` (set in OpenClaw config under
13
+ * `plugins.entries.openguardrails.config`)
14
+ *
15
+ * The policy IS an OGR policy.json (composition + config_rules), so the same
16
+ * file format works across every OGR integration (opencode, hermes, python).
17
+ */
18
+ import { readFileSync, existsSync } from "node:fs";
19
+ import { join } from "node:path";
20
+ /** Default text/regex guardrails — deterministic, no model required. */
21
+ export const DEFAULT_POLICY = {
22
+ composition: {
23
+ "security.*": { strategy: "deny-wins", on_all_failed: "block" },
24
+ default: { strategy: "deny-wins" },
25
+ },
26
+ config_rules: {
27
+ secret_env_markers: ["SECRET", "TOKEN", "KEY", "PASSWORD", "AWS_", "PRIVATE", "CREDENTIAL"],
28
+ command_rules: [
29
+ {
30
+ id: "pipe-to-shell",
31
+ regex: "(curl|wget)\\b.*\\|\\s*(ba)?sh",
32
+ category: "security.malicious_command",
33
+ decision: "require_approval",
34
+ score: 0.85,
35
+ why: "remote script fetched and piped directly into a shell",
36
+ },
37
+ {
38
+ id: "rm-rf-root",
39
+ regex: "rm\\s+-rf\\s+/(\\s|$)",
40
+ category: "security.malicious_command",
41
+ decision: "block",
42
+ score: 1.0,
43
+ why: "destructive recursive delete of the filesystem root",
44
+ },
45
+ {
46
+ id: "secret-file-access",
47
+ regex: "(\\.env\\b|/\\.aws/credentials|/\\.ssh/id_|/\\.ssh/|auth\\.json|\\.netrc)",
48
+ category: "security.secret_leak",
49
+ decision: "block",
50
+ score: 0.9,
51
+ why: "command references a credential file — independent of the reader",
52
+ },
53
+ {
54
+ id: "pipe-to-sudo",
55
+ regex: "\\|\\s*sudo\\b",
56
+ category: "security.privilege_escalation",
57
+ decision: "require_approval",
58
+ score: 0.7,
59
+ why: "output piped into sudo",
60
+ },
61
+ ],
62
+ },
63
+ };
64
+ /**
65
+ * Resolve the effective policy. `workspaceDir` is the OpenClaw workspace (known
66
+ * at `gateway_start`); when absent we fall back to `process.cwd()` so the plugin
67
+ * still resolves a file during early registration.
68
+ */
69
+ export function loadGuardrailsConfig(workspaceDir, options) {
70
+ let policy = DEFAULT_POLICY;
71
+ const path = options?.policyPath ??
72
+ process.env["OPENGUARDRAILS_POLICY"] ??
73
+ join(workspaceDir ?? process.cwd(), "openguardrails.json");
74
+ if (existsSync(path)) {
75
+ try {
76
+ policy = JSON.parse(readFileSync(path, "utf8"));
77
+ }
78
+ catch {
79
+ // malformed file → keep the safe default rather than failing open silently
80
+ }
81
+ }
82
+ if (options?.policy)
83
+ policy = options.policy;
84
+ const judge = options?.judge ?? policy["judge"];
85
+ return { policy, judge, guardMessages: options?.guardMessages ?? true };
86
+ }
87
+ //# sourceMappingURL=config.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"config.js","sourceRoot":"","sources":["../src/config.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;GAgBG;AACH,OAAO,EAAE,YAAY,EAAE,UAAU,EAAE,MAAM,SAAS,CAAA;AAClD,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAA;AAuBhC,wEAAwE;AACxE,MAAM,CAAC,MAAM,cAAc,GAAW;IACpC,WAAW,EAAE;QACX,YAAY,EAAE,EAAE,QAAQ,EAAE,WAAW,EAAE,aAAa,EAAE,OAAO,EAAE;QAC/D,OAAO,EAAE,EAAE,QAAQ,EAAE,WAAW,EAAE;KACnC;IACD,YAAY,EAAE;QACZ,kBAAkB,EAAE,CAAC,QAAQ,EAAE,OAAO,EAAE,KAAK,EAAE,UAAU,EAAE,MAAM,EAAE,SAAS,EAAE,YAAY,CAAC;QAC3F,aAAa,EAAE;YACb;gBACE,EAAE,EAAE,eAAe;gBACnB,KAAK,EAAE,gCAAgC;gBACvC,QAAQ,EAAE,4BAA4B;gBACtC,QAAQ,EAAE,kBAAkB;gBAC5B,KAAK,EAAE,IAAI;gBACX,GAAG,EAAE,uDAAuD;aAC7D;YACD;gBACE,EAAE,EAAE,YAAY;gBAChB,KAAK,EAAE,uBAAuB;gBAC9B,QAAQ,EAAE,4BAA4B;gBACtC,QAAQ,EAAE,OAAO;gBACjB,KAAK,EAAE,GAAG;gBACV,GAAG,EAAE,qDAAqD;aAC3D;YACD;gBACE,EAAE,EAAE,oBAAoB;gBACxB,KAAK,EAAE,2EAA2E;gBAClF,QAAQ,EAAE,sBAAsB;gBAChC,QAAQ,EAAE,OAAO;gBACjB,KAAK,EAAE,GAAG;gBACV,GAAG,EAAE,kEAAkE;aACxE;YACD;gBACE,EAAE,EAAE,cAAc;gBAClB,KAAK,EAAE,gBAAgB;gBACvB,QAAQ,EAAE,+BAA+B;gBACzC,QAAQ,EAAE,kBAAkB;gBAC5B,KAAK,EAAE,GAAG;gBACV,GAAG,EAAE,wBAAwB;aAC9B;SACF;KACF;CACF,CAAA;AAQD;;;;GAIG;AACH,MAAM,UAAU,oBAAoB,CAAC,YAAgC,EAAE,OAA2B;IAChG,IAAI,MAAM,GAAW,cAAc,CAAA;IAEnC,MAAM,IAAI,GACR,OAAO,EAAE,UAAU;QACnB,OAAO,CAAC,GAAG,CAAC,uBAAuB,CAAC;QACpC,IAAI,CAAC,YAAY,IAAI,OAAO,CAAC,GAAG,EAAE,EAAE,qBAAqB,CAAC,CAAA;IAE5D,IAAI,UAAU,CAAC,IAAI,CAAC,EAAE,CAAC;QACrB,IAAI,CAAC;YACH,MAAM,GAAG,IAAI,CAAC,KAAK,CAAC,YAAY,CAAC,IAAI,EAAE,MAAM,CAAC,CAAW,CAAA;QAC3D,CAAC;QAAC,MAAM,CAAC;YACP,2EAA2E;QAC7E,CAAC;IACH,CAAC;IACD,IAAI,OAAO,EAAE,MAAM;QAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAA;IAE5C,MAAM,KAAK,GAAG,OAAO,EAAE,KAAK,IAAK,MAAM,CAAC,OAAO,CAA6B,CAAA;IAC5E,OAAO,EAAE,MAAM,EAAE,KAAK,EAAE,aAAa,EAAE,OAAO,EAAE,aAAa,IAAI,IAAI,EAAE,CAAA;AACzE,CAAC"}
@@ -0,0 +1,30 @@
1
+ /**
2
+ * openguardrails-instrumentation-openclaw
3
+ *
4
+ * An OpenClaw plugin that guards an assistant through the OpenGuardrails (OGR)
5
+ * protocol — the multi-channel counterpart of
6
+ * `openguardrails-instrumentation-opencode`.
7
+ *
8
+ * It registers in-process plugin hooks, turns each event into an OGR
9
+ * `GuardEvent`, runs it through a `Runtime` built from the assistant's own
10
+ * guardrails policy (text/regex rules, plus optionally its own model as an LLM
11
+ * judge), and enforces the `Verdict`:
12
+ *
13
+ * before_tool_call allow | modify | redact → proceed
14
+ * block → { block }
15
+ * require_approval → { requireApproval } (human gate)
16
+ *
17
+ * message_sending allow | modify | redact → deliver
18
+ * block | require_approval → { cancel } (outbound guard)
19
+ *
20
+ * No OpenClaw core changes required. This is a "restrict-only" guard: it can
21
+ * stop a would-run tool call or a would-send message, never loosen one. The
22
+ * human-confirm gate (`requireApproval`) and enforcement stay privilege-
23
+ * separated: the plugin decides, the user approves, the host enforces.
24
+ */
25
+ import { definePluginEntry } from "openclaw/plugin-sdk/plugin-entry";
26
+ declare const plugin: ReturnType<typeof definePluginEntry>;
27
+ export default plugin;
28
+ export { DEFAULT_POLICY } from "./config.js";
29
+ export type { GuardrailsOptions, JudgeConfig } from "./config.js";
30
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AACH,OAAO,EAAE,iBAAiB,EAAE,MAAM,kCAAkC,CAAA;AAwEpE,QAAA,MAAM,MAAM,EAAE,UAAU,CAAC,OAAO,iBAAiB,CAyF/C,CAAA;AAEF,eAAe,MAAM,CAAA;AAErB,OAAO,EAAE,cAAc,EAAE,MAAM,aAAa,CAAA;AAC5C,YAAY,EAAE,iBAAiB,EAAE,WAAW,EAAE,MAAM,aAAa,CAAA"}
package/dist/index.js ADDED
@@ -0,0 +1,165 @@
1
+ /**
2
+ * openguardrails-instrumentation-openclaw
3
+ *
4
+ * An OpenClaw plugin that guards an assistant through the OpenGuardrails (OGR)
5
+ * protocol — the multi-channel counterpart of
6
+ * `openguardrails-instrumentation-opencode`.
7
+ *
8
+ * It registers in-process plugin hooks, turns each event into an OGR
9
+ * `GuardEvent`, runs it through a `Runtime` built from the assistant's own
10
+ * guardrails policy (text/regex rules, plus optionally its own model as an LLM
11
+ * judge), and enforces the `Verdict`:
12
+ *
13
+ * before_tool_call allow | modify | redact → proceed
14
+ * block → { block }
15
+ * require_approval → { requireApproval } (human gate)
16
+ *
17
+ * message_sending allow | modify | redact → deliver
18
+ * block | require_approval → { cancel } (outbound guard)
19
+ *
20
+ * No OpenClaw core changes required. This is a "restrict-only" guard: it can
21
+ * stop a would-run tool call or a would-send message, never loosen one. The
22
+ * human-confirm gate (`requireApproval`) and enforcement stay privilege-
23
+ * separated: the plugin decides, the user approves, the host enforces.
24
+ */
25
+ import { definePluginEntry } from "openclaw/plugin-sdk/plugin-entry";
26
+ import { Runtime, ConfigRulesDetector, LLMJudgeDetector, } from "@openguardrails/core";
27
+ import { loadGuardrailsConfig } from "./config.js";
28
+ import { openAICompatibleBackend } from "./own-model.js";
29
+ let seq = 0;
30
+ function id(prefix) {
31
+ seq += 1;
32
+ const rand = globalThis.crypto?.randomUUID?.().slice(0, 8) ?? seq.toString(36).padStart(8, "0");
33
+ return `${prefix}-${seq.toString(36)}-${rand}`;
34
+ }
35
+ function brief(v) {
36
+ const cats = v.categories.map((c) => `${c.id}(${c.score})`).join(", ");
37
+ const why = v.reasons.filter((r) => !r.startsWith("[")).join("; ");
38
+ return [cats, why].filter(Boolean).join(" — ") || v.decision;
39
+ }
40
+ /**
41
+ * Lazily builds and caches the OGR runtime. The policy file lives in the
42
+ * workspace, which is only known at `gateway_start`; tool/message hooks build
43
+ * on first use if startup has not populated it yet.
44
+ */
45
+ class GuardManager {
46
+ runtime;
47
+ guardMessages = true;
48
+ workspaceDir;
49
+ options;
50
+ configure(workspaceDir, options) {
51
+ this.workspaceDir = workspaceDir;
52
+ this.options = options;
53
+ this.runtime = undefined; // force rebuild with the new workspace/options
54
+ this.ensure();
55
+ }
56
+ ensure() {
57
+ if (this.runtime)
58
+ return this.runtime;
59
+ const { policy, judge, guardMessages } = loadGuardrailsConfig(this.workspaceDir, this.options);
60
+ const detectors = [new ConfigRulesDetector(policy.config_rules ?? {})];
61
+ if (judge)
62
+ detectors.push(new LLMJudgeDetector(openAICompatibleBackend(judge)));
63
+ this.guardMessages = guardMessages;
64
+ this.runtime = new Runtime(detectors, policy);
65
+ return this.runtime;
66
+ }
67
+ get messagesEnabled() {
68
+ this.ensure();
69
+ return this.guardMessages;
70
+ }
71
+ evaluate(ev) {
72
+ return this.ensure().evaluate(ev);
73
+ }
74
+ }
75
+ /** Best-effort read of this plugin's config out of the OpenClaw config tree. */
76
+ function readOptions(config) {
77
+ const entries = config?.plugins?.entries;
78
+ return entries?.["openguardrails"]?.config;
79
+ }
80
+ // Annotate via the importable `definePluginEntry` symbol so the emitted
81
+ // declaration does not inline OpenClaw's non-exported `DefinedPluginEntry`
82
+ // type (TS2742 portability).
83
+ const plugin = definePluginEntry({
84
+ id: "openguardrails",
85
+ name: "OpenGuardrails",
86
+ description: "Enforce the OpenGuardrails (OGR) protocol on tool calls and channel traffic — block, rewrite, or require human approval under a policy you own.",
87
+ register(api) {
88
+ const guard = new GuardManager();
89
+ // Resolve the workspace-scoped policy once the Gateway is up.
90
+ api.on("gateway_start", (_event, ctx) => {
91
+ const c = ctx;
92
+ guard.configure(c.workspaceDir, readOptions(c.config));
93
+ });
94
+ // Core enforcement: every tool call, before it runs.
95
+ api.on("before_tool_call", async (event, ctx) => {
96
+ const c = ctx;
97
+ const ev = {
98
+ kind: "tool_call",
99
+ observationPoint: "agent_hook",
100
+ subject: {
101
+ agent_id: c.agentId ?? "openclaw",
102
+ agent_type: "openclaw",
103
+ session_id: c.sessionKey,
104
+ channel: c.channelId,
105
+ },
106
+ payload: { name: event.toolName, arguments: event.params },
107
+ eventId: id("evt"),
108
+ guardId: event.toolCallId ?? id("ga"),
109
+ timestamp: new Date().toISOString(),
110
+ sessionId: c.sessionKey,
111
+ // v0.1: the principal is trusted. Channel-inbound tainting
112
+ // (untrusted message/web/mcp content → untrusted provenance) is a
113
+ // follow-up via message_received correlation.
114
+ provenance: [{ source: "user", trust: "trusted" }],
115
+ };
116
+ const verdict = await guard.evaluate(ev);
117
+ if (verdict.decision === "block") {
118
+ return { block: true, blockReason: `[OpenGuardrails] ${brief(verdict)}` };
119
+ }
120
+ if (verdict.decision === "require_approval") {
121
+ return {
122
+ requireApproval: {
123
+ title: `Approve ${event.toolName}?`,
124
+ description: `[OpenGuardrails] ${brief(verdict)}`,
125
+ severity: "warning",
126
+ timeoutBehavior: "deny",
127
+ pluginId: "openguardrails",
128
+ },
129
+ };
130
+ }
131
+ // allow | modify | redact → proceed unchanged
132
+ return;
133
+ }, { priority: 50 });
134
+ // Outbound guard: cancel a reply a deny verdict would forbid.
135
+ api.on("message_sending", async (event, ctx) => {
136
+ if (!guard.messagesEnabled)
137
+ return;
138
+ const e = event;
139
+ const c = ctx;
140
+ const ev = {
141
+ kind: "model_output",
142
+ observationPoint: "gateway",
143
+ subject: { agent_id: c.agentId ?? "openclaw", agent_type: "openclaw", session_id: c.sessionKey },
144
+ payload: { content: e.content ?? "", channel: c.messageProvider },
145
+ eventId: id("evt"),
146
+ guardId: id("ga"),
147
+ timestamp: new Date().toISOString(),
148
+ sessionId: c.sessionKey,
149
+ provenance: [{ source: "model", trust: "unverified" }],
150
+ };
151
+ const verdict = await guard.evaluate(ev);
152
+ if (verdict.decision === "block" || verdict.decision === "require_approval") {
153
+ return {
154
+ cancel: true,
155
+ cancelReason: `openguardrails:${verdict.decision}`,
156
+ metadata: { reason: brief(verdict) },
157
+ };
158
+ }
159
+ return;
160
+ });
161
+ },
162
+ });
163
+ export default plugin;
164
+ export { DEFAULT_POLICY } from "./config.js";
165
+ //# sourceMappingURL=index.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.js","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;;;;;;;;;;;;;GAuBG;AACH,OAAO,EAAE,iBAAiB,EAAE,MAAM,kCAAkC,CAAA;AACpE,OAAO,EACL,OAAO,EACP,mBAAmB,EACnB,gBAAgB,GAIjB,MAAM,sBAAsB,CAAA;AAC7B,OAAO,EAAE,oBAAoB,EAA0B,MAAM,aAAa,CAAA;AAC1E,OAAO,EAAE,uBAAuB,EAAE,MAAM,gBAAgB,CAAA;AAExD,IAAI,GAAG,GAAG,CAAC,CAAA;AACX,SAAS,EAAE,CAAC,MAAc;IACxB,GAAG,IAAI,CAAC,CAAA;IACR,MAAM,IAAI,GAAG,UAAU,CAAC,MAAM,EAAE,UAAU,EAAE,EAAE,CAAC,KAAK,CAAC,CAAC,EAAE,CAAC,CAAC,IAAI,GAAG,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAC,QAAQ,CAAC,CAAC,EAAE,GAAG,CAAC,CAAA;IAC/F,OAAO,GAAG,MAAM,IAAI,GAAG,CAAC,QAAQ,CAAC,EAAE,CAAC,IAAI,IAAI,EAAE,CAAA;AAChD,CAAC;AAED,SAAS,KAAK,CAAC,CAAU;IACvB,MAAM,IAAI,GAAG,CAAC,CAAC,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,GAAG,CAAC,CAAC,EAAE,IAAI,CAAC,CAAC,KAAK,GAAG,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;IACtE,MAAM,GAAG,GAAG,CAAC,CAAC,OAAO,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,EAAE,CAAC,CAAC,CAAC,CAAC,UAAU,CAAC,GAAG,CAAC,CAAC,CAAC,IAAI,CAAC,IAAI,CAAC,CAAA;IAClE,OAAO,CAAC,IAAI,EAAE,GAAG,CAAC,CAAC,MAAM,CAAC,OAAO,CAAC,CAAC,IAAI,CAAC,KAAK,CAAC,IAAI,CAAC,CAAC,QAAQ,CAAA;AAC9D,CAAC;AAED;;;;GAIG;AACH,MAAM,YAAY;IACR,OAAO,CAAqB;IAC5B,aAAa,GAAG,IAAI,CAAA;IACpB,YAAY,CAAoB;IAChC,OAAO,CAA+B;IAE9C,SAAS,CAAC,YAAgC,EAAE,OAAsC;QAChF,IAAI,CAAC,YAAY,GAAG,YAAY,CAAA;QAChC,IAAI,CAAC,OAAO,GAAG,OAAO,CAAA;QACtB,IAAI,CAAC,OAAO,GAAG,SAAS,CAAA,CAAC,+CAA+C;QACxE,IAAI,CAAC,MAAM,EAAE,CAAA;IACf,CAAC;IAEO,MAAM;QACZ,IAAI,IAAI,CAAC,OAAO;YAAE,OAAO,IAAI,CAAC,OAAO,CAAA;QACrC,MAAM,EAAE,MAAM,EAAE,KAAK,EAAE,aAAa,EAAE,GAAG,oBAAoB,CAAC,IAAI,CAAC,YAAY,EAAE,IAAI,CAAC,OAAO,CAAC,CAAA;QAC9F,MAAM,SAAS,GAAe,CAAC,IAAI,mBAAmB,CAAC,MAAM,CAAC,YAAY,IAAI,EAAE,CAAC,CAAC,CAAA;QAClF,IAAI,KAAK;YAAE,SAAS,CAAC,IAAI,CAAC,IAAI,gBAAgB,CAAC,uBAAuB,CAAC,KAAK,CAAC,CAAC,CAAC,CAAA;QAC/E,IAAI,CAAC,aAAa,GAAG,aAAa,CAAA;QAClC,IAAI,CAAC,OAAO,GAAG,IAAI,OAAO,CAAC,SAAS,EAAE,MAAM,CAAC,CAAA;QAC7C,OAAO,IAAI,CAAC,OAAO,CAAA;IACrB,CAAC;IAED,IAAI,eAAe;QACjB,IAAI,CAAC,MAAM,EAAE,CAAA;QACb,OAAO,IAAI,CAAC,aAAa,CAAA;IAC3B,CAAC;IAED,QAAQ,CAAC,EAAc;QACrB,OAAO,IAAI,CAAC,MAAM,EAAE,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAA;IACnC,CAAC;CACF;AAED,gFAAgF;AAChF,SAAS,WAAW,CAAC,MAAe;IAClC,MAAM,OAAO,GAAI,MAA2E,EAAE,OAAO,EAAE,OAAO,CAAA;IAC9G,OAAO,OAAO,EAAE,CAAC,gBAAgB,CAAC,EAAE,MAAuC,CAAA;AAC7E,CAAC;AAED,wEAAwE;AACxE,2EAA2E;AAC3E,6BAA6B;AAC7B,MAAM,MAAM,GAAyC,iBAAiB,CAAC;IACrE,EAAE,EAAE,gBAAgB;IACpB,IAAI,EAAE,gBAAgB;IACtB,WAAW,EACT,iJAAiJ;IACnJ,QAAQ,CAAC,GAAG;QACV,MAAM,KAAK,GAAG,IAAI,YAAY,EAAE,CAAA;QAEhC,8DAA8D;QAC9D,GAAG,CAAC,EAAE,CAAC,eAAe,EAAE,CAAC,MAAM,EAAE,GAAG,EAAE,EAAE;YACtC,MAAM,CAAC,GAAG,GAAkD,CAAA;YAC5D,KAAK,CAAC,SAAS,CAAC,CAAC,CAAC,YAAY,EAAE,WAAW,CAAC,CAAC,CAAC,MAAM,CAAC,CAAC,CAAA;QACxD,CAAC,CAAC,CAAA;QAEF,qDAAqD;QACrD,GAAG,CAAC,EAAE,CACJ,kBAAkB,EAClB,KAAK,EAAE,KAAK,EAAE,GAAG,EAAE,EAAE;YACnB,MAAM,CAAC,GAAG,GAAoE,CAAA;YAC9E,MAAM,EAAE,GAAe;gBACrB,IAAI,EAAE,WAAW;gBACjB,gBAAgB,EAAE,YAAY;gBAC9B,OAAO,EAAE;oBACP,QAAQ,EAAE,CAAC,CAAC,OAAO,IAAI,UAAU;oBACjC,UAAU,EAAE,UAAU;oBACtB,UAAU,EAAE,CAAC,CAAC,UAAU;oBACxB,OAAO,EAAE,CAAC,CAAC,SAAS;iBACrB;gBACD,OAAO,EAAE,EAAE,IAAI,EAAE,KAAK,CAAC,QAAQ,EAAE,SAAS,EAAE,KAAK,CAAC,MAAM,EAAE;gBAC1D,OAAO,EAAE,EAAE,CAAC,KAAK,CAAC;gBAClB,OAAO,EAAE,KAAK,CAAC,UAAU,IAAI,EAAE,CAAC,IAAI,CAAC;gBACrC,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;gBACnC,SAAS,EAAE,CAAC,CAAC,UAAU;gBACvB,2DAA2D;gBAC3D,kEAAkE;gBAClE,8CAA8C;gBAC9C,UAAU,EAAE,CAAC,EAAE,MAAM,EAAE,MAAM,EAAE,KAAK,EAAE,SAAS,EAAE,CAAC;aACnD,CAAA;YAED,MAAM,OAAO,GAAG,MAAM,KAAK,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAA;YAExC,IAAI,OAAO,CAAC,QAAQ,KAAK,OAAO,EAAE,CAAC;gBACjC,OAAO,EAAE,KAAK,EAAE,IAAI,EAAE,WAAW,EAAE,oBAAoB,KAAK,CAAC,OAAO,CAAC,EAAE,EAAE,CAAA;YAC3E,CAAC;YACD,IAAI,OAAO,CAAC,QAAQ,KAAK,kBAAkB,EAAE,CAAC;gBAC5C,OAAO;oBACL,eAAe,EAAE;wBACf,KAAK,EAAE,WAAW,KAAK,CAAC,QAAQ,GAAG;wBACnC,WAAW,EAAE,oBAAoB,KAAK,CAAC,OAAO,CAAC,EAAE;wBACjD,QAAQ,EAAE,SAAS;wBACnB,eAAe,EAAE,MAAM;wBACvB,QAAQ,EAAE,gBAAgB;qBAC3B;iBACF,CAAA;YACH,CAAC;YACD,8CAA8C;YAC9C,OAAM;QACR,CAAC,EACD,EAAE,QAAQ,EAAE,EAAE,EAAE,CACjB,CAAA;QAED,8DAA8D;QAC9D,GAAG,CAAC,EAAE,CAAC,iBAAiB,EAAE,KAAK,EAAE,KAAK,EAAE,GAAG,EAAE,EAAE;YAC7C,IAAI,CAAC,KAAK,CAAC,eAAe;gBAAE,OAAM;YAClC,MAAM,CAAC,GAAG,KAA6B,CAAA;YACvC,MAAM,CAAC,GAAG,GAA0E,CAAA;YACpF,MAAM,EAAE,GAAe;gBACrB,IAAI,EAAE,cAAc;gBACpB,gBAAgB,EAAE,SAAS;gBAC3B,OAAO,EAAE,EAAE,QAAQ,EAAE,CAAC,CAAC,OAAO,IAAI,UAAU,EAAE,UAAU,EAAE,UAAU,EAAE,UAAU,EAAE,CAAC,CAAC,UAAU,EAAE;gBAChG,OAAO,EAAE,EAAE,OAAO,EAAE,CAAC,CAAC,OAAO,IAAI,EAAE,EAAE,OAAO,EAAE,CAAC,CAAC,eAAe,EAAE;gBACjE,OAAO,EAAE,EAAE,CAAC,KAAK,CAAC;gBAClB,OAAO,EAAE,EAAE,CAAC,IAAI,CAAC;gBACjB,SAAS,EAAE,IAAI,IAAI,EAAE,CAAC,WAAW,EAAE;gBACnC,SAAS,EAAE,CAAC,CAAC,UAAU;gBACvB,UAAU,EAAE,CAAC,EAAE,MAAM,EAAE,OAAO,EAAE,KAAK,EAAE,YAAY,EAAE,CAAC;aACvD,CAAA;YAED,MAAM,OAAO,GAAG,MAAM,KAAK,CAAC,QAAQ,CAAC,EAAE,CAAC,CAAA;YACxC,IAAI,OAAO,CAAC,QAAQ,KAAK,OAAO,IAAI,OAAO,CAAC,QAAQ,KAAK,kBAAkB,EAAE,CAAC;gBAC5E,OAAO;oBACL,MAAM,EAAE,IAAI;oBACZ,YAAY,EAAE,kBAAkB,OAAO,CAAC,QAAQ,EAAE;oBAClD,QAAQ,EAAE,EAAE,MAAM,EAAE,KAAK,CAAC,OAAO,CAAC,EAAE;iBACrC,CAAA;YACH,CAAC;YACD,OAAM;QACR,CAAC,CAAC,CAAA;IACJ,CAAC;CACF,CAAC,CAAA;AAEF,eAAe,MAAM,CAAA;AAErB,OAAO,EAAE,cAAc,EAAE,MAAM,aAAa,CAAA"}
@@ -0,0 +1,12 @@
1
+ /**
2
+ * "Use your own model as the guardrail" — an OGR LLMBackend that calls any
3
+ * OpenAI-compatible chat-completions endpoint. Point it at the same model the
4
+ * assistant already uses, a cheaper sibling, or a dedicated guard model.
5
+ *
6
+ * Identical contract to the opencode/hermes integrations: one OpenAI-compatible
7
+ * POST, OGR does the rest.
8
+ */
9
+ import type { LLMBackend } from "@openguardrails/core";
10
+ import type { JudgeConfig } from "./config.js";
11
+ export declare function openAICompatibleBackend(cfg: JudgeConfig): LLMBackend;
12
+ //# sourceMappingURL=own-model.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"own-model.d.ts","sourceRoot":"","sources":["../src/own-model.ts"],"names":[],"mappings":"AAAA;;;;;;;GAOG;AACH,OAAO,KAAK,EAAE,UAAU,EAAE,MAAM,sBAAsB,CAAA;AACtD,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,aAAa,CAAA;AAE9C,wBAAgB,uBAAuB,CAAC,GAAG,EAAE,WAAW,GAAG,UAAU,CA4BpE"}
@@ -0,0 +1,31 @@
1
+ export function openAICompatibleBackend(cfg) {
2
+ const url = cfg.baseURL.replace(/\/+$/, "") + "/chat/completions";
3
+ return {
4
+ name: `own-model:${cfg.model}`,
5
+ async complete(system, user) {
6
+ const res = await fetch(url, {
7
+ method: "POST",
8
+ headers: {
9
+ "content-type": "application/json",
10
+ ...(cfg.apiKey ? { authorization: `Bearer ${cfg.apiKey}` } : {}),
11
+ ...(cfg.headers ?? {}),
12
+ },
13
+ body: JSON.stringify({
14
+ model: cfg.model,
15
+ temperature: 0,
16
+ messages: [
17
+ { role: "system", content: system },
18
+ { role: "user", content: user },
19
+ ],
20
+ }),
21
+ });
22
+ if (!res.ok)
23
+ throw new Error(`guard model returned ${res.status}`);
24
+ const data = (await res.json());
25
+ const text = data.choices?.[0]?.message?.content ?? "";
26
+ // Strip a ```json fence if the model wrapped its reply.
27
+ return text.replace(/^```(?:json)?\s*/i, "").replace(/\s*```$/i, "").trim();
28
+ },
29
+ };
30
+ }
31
+ //# sourceMappingURL=own-model.js.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"own-model.js","sourceRoot":"","sources":["../src/own-model.ts"],"names":[],"mappings":"AAWA,MAAM,UAAU,uBAAuB,CAAC,GAAgB;IACtD,MAAM,GAAG,GAAG,GAAG,CAAC,OAAO,CAAC,OAAO,CAAC,MAAM,EAAE,EAAE,CAAC,GAAG,mBAAmB,CAAA;IACjE,OAAO;QACL,IAAI,EAAE,aAAa,GAAG,CAAC,KAAK,EAAE;QAC9B,KAAK,CAAC,QAAQ,CAAC,MAAc,EAAE,IAAY;YACzC,MAAM,GAAG,GAAG,MAAM,KAAK,CAAC,GAAG,EAAE;gBAC3B,MAAM,EAAE,MAAM;gBACd,OAAO,EAAE;oBACP,cAAc,EAAE,kBAAkB;oBAClC,GAAG,CAAC,GAAG,CAAC,MAAM,CAAC,CAAC,CAAC,EAAE,aAAa,EAAE,UAAU,GAAG,CAAC,MAAM,EAAE,EAAE,CAAC,CAAC,CAAC,EAAE,CAAC;oBAChE,GAAG,CAAC,GAAG,CAAC,OAAO,IAAI,EAAE,CAAC;iBACvB;gBACD,IAAI,EAAE,IAAI,CAAC,SAAS,CAAC;oBACnB,KAAK,EAAE,GAAG,CAAC,KAAK;oBAChB,WAAW,EAAE,CAAC;oBACd,QAAQ,EAAE;wBACR,EAAE,IAAI,EAAE,QAAQ,EAAE,OAAO,EAAE,MAAM,EAAE;wBACnC,EAAE,IAAI,EAAE,MAAM,EAAE,OAAO,EAAE,IAAI,EAAE;qBAChC;iBACF,CAAC;aACH,CAAC,CAAA;YACF,IAAI,CAAC,GAAG,CAAC,EAAE;gBAAE,MAAM,IAAI,KAAK,CAAC,wBAAwB,GAAG,CAAC,MAAM,EAAE,CAAC,CAAA;YAClE,MAAM,IAAI,GAAG,CAAC,MAAM,GAAG,CAAC,IAAI,EAAE,CAA4D,CAAA;YAC1F,MAAM,IAAI,GAAG,IAAI,CAAC,OAAO,EAAE,CAAC,CAAC,CAAC,EAAE,OAAO,EAAE,OAAO,IAAI,EAAE,CAAA;YACtD,wDAAwD;YACxD,OAAO,IAAI,CAAC,OAAO,CAAC,mBAAmB,EAAE,EAAE,CAAC,CAAC,OAAO,CAAC,UAAU,EAAE,EAAE,CAAC,CAAC,IAAI,EAAE,CAAA;QAC7E,CAAC;KACF,CAAA;AACH,CAAC"}
@@ -0,0 +1,38 @@
1
+ {
2
+ "id": "openguardrails",
3
+ "name": "OpenGuardrails",
4
+ "description": "Enforce the OpenGuardrails (OGR) protocol on tool calls and channel traffic — block, rewrite, or require human approval under a policy you own.",
5
+ "activation": {
6
+ "onStartup": true,
7
+ "onCapabilities": ["hook"]
8
+ },
9
+ "configSchema": {
10
+ "type": "object",
11
+ "additionalProperties": true,
12
+ "properties": {
13
+ "policyPath": {
14
+ "type": "string",
15
+ "description": "Path to an OGR policy.json. Defaults to <workspace>/openguardrails.json."
16
+ },
17
+ "policy": {
18
+ "type": "object",
19
+ "description": "Inline OGR policy (composition + config_rules). Overrides the file and the default.",
20
+ "additionalProperties": true
21
+ },
22
+ "judge": {
23
+ "type": "object",
24
+ "description": "Use your own model as an LLM-judge detector (any OpenAI-compatible chat endpoint).",
25
+ "additionalProperties": true,
26
+ "properties": {
27
+ "baseURL": { "type": "string" },
28
+ "model": { "type": "string" },
29
+ "apiKey": { "type": "string" }
30
+ }
31
+ },
32
+ "guardMessages": {
33
+ "type": "boolean",
34
+ "description": "Also evaluate inbound/outbound channel messages (defaults to true)."
35
+ }
36
+ }
37
+ }
38
+ }
package/package.json ADDED
@@ -0,0 +1,48 @@
1
+ {
2
+ "name": "openguardrails-instrumentation-openclaw",
3
+ "version": "0.1.0",
4
+ "description": "Guard an OpenClaw assistant's tool calls and channel traffic through the OpenGuardrails (OGR) protocol — agent-configurable text/regex guardrails, or use your own model as the judge. No core changes.",
5
+ "type": "module",
6
+ "license": "Apache-2.0",
7
+ "author": "OpenGuardrails",
8
+ "exports": {
9
+ ".": {
10
+ "types": "./dist/index.d.ts",
11
+ "import": "./dist/index.js"
12
+ }
13
+ },
14
+ "main": "./dist/index.js",
15
+ "types": "./dist/index.d.ts",
16
+ "files": ["dist", "src", "openclaw.plugin.json"],
17
+ "openclaw": {
18
+ "extensions": ["./dist/index.js"],
19
+ "compat": {
20
+ "pluginApi": ">=2026.3.24-beta.2",
21
+ "minGatewayVersion": "2026.3.24-beta.2"
22
+ }
23
+ },
24
+ "scripts": {
25
+ "build": "tsc -b",
26
+ "clean": "tsc -b --clean"
27
+ },
28
+ "keywords": ["openclaw", "plugin", "ai", "agent", "security", "guardrails", "ogr", "openguardrails"],
29
+ "dependencies": {
30
+ "@openguardrails/core": "^0.1.0"
31
+ },
32
+ "peerDependencies": {
33
+ "openclaw": "*"
34
+ },
35
+ "devDependencies": {
36
+ "@types/node": "^22",
37
+ "openclaw": "^2026.6.10"
38
+ },
39
+ "repository": {
40
+ "type": "git",
41
+ "url": "git+https://github.com/openguardrails/openguardrails-js.git",
42
+ "directory": "packages/instrumentation-openclaw"
43
+ },
44
+ "homepage": "https://openguardrails.com",
45
+ "publishConfig": {
46
+ "access": "public"
47
+ }
48
+ }
package/src/config.ts ADDED
@@ -0,0 +1,117 @@
1
+ /**
2
+ * Guardrails configuration for the OpenClaw integration.
3
+ *
4
+ * The assistant configures its OWN guardrails — text + regex rules (no model
5
+ * needed), and optionally its own model as an LLM judge. Resolution order
6
+ * (lowest → highest precedence):
7
+ *
8
+ * 1. a sensible default policy (below)
9
+ * 2. `<workspace>/openguardrails.json` (agent-editable — this is how an
10
+ * assistant gives itself guardrails); path overridable via plugin config
11
+ * `policyPath` or the `OPENGUARDRAILS_POLICY` env var
12
+ * 3. inline plugin config `policy` (set in OpenClaw config under
13
+ * `plugins.entries.openguardrails.config`)
14
+ *
15
+ * The policy IS an OGR policy.json (composition + config_rules), so the same
16
+ * file format works across every OGR integration (opencode, hermes, python).
17
+ */
18
+ import { readFileSync, existsSync } from "node:fs"
19
+ import { join } from "node:path"
20
+ import type { Policy } from "@openguardrails/core"
21
+
22
+ /** "Use your own model as the guardrail" — any OpenAI-compatible chat endpoint. */
23
+ export interface JudgeConfig {
24
+ baseURL: string
25
+ model: string
26
+ apiKey?: string
27
+ headers?: Record<string, string>
28
+ }
29
+
30
+ /** Plugin config, delivered through OpenClaw `plugins.entries.openguardrails.config`. */
31
+ export interface GuardrailsOptions {
32
+ /** Inline OGR policy (overrides the file + default). */
33
+ policy?: Policy
34
+ /** Path to a guardrails policy file (defaults to <workspace>/openguardrails.json). */
35
+ policyPath?: string
36
+ /** Enable the LLM-judge detector backed by your own model. */
37
+ judge?: JudgeConfig
38
+ /** Also evaluate inbound/outbound channel messages (default true). */
39
+ guardMessages?: boolean
40
+ }
41
+
42
+ /** Default text/regex guardrails — deterministic, no model required. */
43
+ export const DEFAULT_POLICY: Policy = {
44
+ composition: {
45
+ "security.*": { strategy: "deny-wins", on_all_failed: "block" },
46
+ default: { strategy: "deny-wins" },
47
+ },
48
+ config_rules: {
49
+ secret_env_markers: ["SECRET", "TOKEN", "KEY", "PASSWORD", "AWS_", "PRIVATE", "CREDENTIAL"],
50
+ command_rules: [
51
+ {
52
+ id: "pipe-to-shell",
53
+ regex: "(curl|wget)\\b.*\\|\\s*(ba)?sh",
54
+ category: "security.malicious_command",
55
+ decision: "require_approval",
56
+ score: 0.85,
57
+ why: "remote script fetched and piped directly into a shell",
58
+ },
59
+ {
60
+ id: "rm-rf-root",
61
+ regex: "rm\\s+-rf\\s+/(\\s|$)",
62
+ category: "security.malicious_command",
63
+ decision: "block",
64
+ score: 1.0,
65
+ why: "destructive recursive delete of the filesystem root",
66
+ },
67
+ {
68
+ id: "secret-file-access",
69
+ regex: "(\\.env\\b|/\\.aws/credentials|/\\.ssh/id_|/\\.ssh/|auth\\.json|\\.netrc)",
70
+ category: "security.secret_leak",
71
+ decision: "block",
72
+ score: 0.9,
73
+ why: "command references a credential file — independent of the reader",
74
+ },
75
+ {
76
+ id: "pipe-to-sudo",
77
+ regex: "\\|\\s*sudo\\b",
78
+ category: "security.privilege_escalation",
79
+ decision: "require_approval",
80
+ score: 0.7,
81
+ why: "output piped into sudo",
82
+ },
83
+ ],
84
+ },
85
+ }
86
+
87
+ export interface ResolvedConfig {
88
+ policy: Policy
89
+ judge?: JudgeConfig
90
+ guardMessages: boolean
91
+ }
92
+
93
+ /**
94
+ * Resolve the effective policy. `workspaceDir` is the OpenClaw workspace (known
95
+ * at `gateway_start`); when absent we fall back to `process.cwd()` so the plugin
96
+ * still resolves a file during early registration.
97
+ */
98
+ export function loadGuardrailsConfig(workspaceDir: string | undefined, options?: GuardrailsOptions): ResolvedConfig {
99
+ let policy: Policy = DEFAULT_POLICY
100
+
101
+ const path =
102
+ options?.policyPath ??
103
+ process.env["OPENGUARDRAILS_POLICY"] ??
104
+ join(workspaceDir ?? process.cwd(), "openguardrails.json")
105
+
106
+ if (existsSync(path)) {
107
+ try {
108
+ policy = JSON.parse(readFileSync(path, "utf8")) as Policy
109
+ } catch {
110
+ // malformed file → keep the safe default rather than failing open silently
111
+ }
112
+ }
113
+ if (options?.policy) policy = options.policy
114
+
115
+ const judge = options?.judge ?? (policy["judge"] as JudgeConfig | undefined)
116
+ return { policy, judge, guardMessages: options?.guardMessages ?? true }
117
+ }
package/src/index.ts ADDED
@@ -0,0 +1,191 @@
1
+ /**
2
+ * openguardrails-instrumentation-openclaw
3
+ *
4
+ * An OpenClaw plugin that guards an assistant through the OpenGuardrails (OGR)
5
+ * protocol — the multi-channel counterpart of
6
+ * `openguardrails-instrumentation-opencode`.
7
+ *
8
+ * It registers in-process plugin hooks, turns each event into an OGR
9
+ * `GuardEvent`, runs it through a `Runtime` built from the assistant's own
10
+ * guardrails policy (text/regex rules, plus optionally its own model as an LLM
11
+ * judge), and enforces the `Verdict`:
12
+ *
13
+ * before_tool_call allow | modify | redact → proceed
14
+ * block → { block }
15
+ * require_approval → { requireApproval } (human gate)
16
+ *
17
+ * message_sending allow | modify | redact → deliver
18
+ * block | require_approval → { cancel } (outbound guard)
19
+ *
20
+ * No OpenClaw core changes required. This is a "restrict-only" guard: it can
21
+ * stop a would-run tool call or a would-send message, never loosen one. The
22
+ * human-confirm gate (`requireApproval`) and enforcement stay privilege-
23
+ * separated: the plugin decides, the user approves, the host enforces.
24
+ */
25
+ import { definePluginEntry } from "openclaw/plugin-sdk/plugin-entry"
26
+ import {
27
+ Runtime,
28
+ ConfigRulesDetector,
29
+ LLMJudgeDetector,
30
+ type Detector,
31
+ type GuardEvent,
32
+ type Verdict,
33
+ } from "@openguardrails/core"
34
+ import { loadGuardrailsConfig, type GuardrailsOptions } from "./config.js"
35
+ import { openAICompatibleBackend } from "./own-model.js"
36
+
37
+ let seq = 0
38
+ function id(prefix: string): string {
39
+ seq += 1
40
+ const rand = globalThis.crypto?.randomUUID?.().slice(0, 8) ?? seq.toString(36).padStart(8, "0")
41
+ return `${prefix}-${seq.toString(36)}-${rand}`
42
+ }
43
+
44
+ function brief(v: Verdict): string {
45
+ const cats = v.categories.map((c) => `${c.id}(${c.score})`).join(", ")
46
+ const why = v.reasons.filter((r) => !r.startsWith("[")).join("; ")
47
+ return [cats, why].filter(Boolean).join(" — ") || v.decision
48
+ }
49
+
50
+ /**
51
+ * Lazily builds and caches the OGR runtime. The policy file lives in the
52
+ * workspace, which is only known at `gateway_start`; tool/message hooks build
53
+ * on first use if startup has not populated it yet.
54
+ */
55
+ class GuardManager {
56
+ private runtime: Runtime | undefined
57
+ private guardMessages = true
58
+ private workspaceDir: string | undefined
59
+ private options: GuardrailsOptions | undefined
60
+
61
+ configure(workspaceDir: string | undefined, options: GuardrailsOptions | undefined): void {
62
+ this.workspaceDir = workspaceDir
63
+ this.options = options
64
+ this.runtime = undefined // force rebuild with the new workspace/options
65
+ this.ensure()
66
+ }
67
+
68
+ private ensure(): Runtime {
69
+ if (this.runtime) return this.runtime
70
+ const { policy, judge, guardMessages } = loadGuardrailsConfig(this.workspaceDir, this.options)
71
+ const detectors: Detector[] = [new ConfigRulesDetector(policy.config_rules ?? {})]
72
+ if (judge) detectors.push(new LLMJudgeDetector(openAICompatibleBackend(judge)))
73
+ this.guardMessages = guardMessages
74
+ this.runtime = new Runtime(detectors, policy)
75
+ return this.runtime
76
+ }
77
+
78
+ get messagesEnabled(): boolean {
79
+ this.ensure()
80
+ return this.guardMessages
81
+ }
82
+
83
+ evaluate(ev: GuardEvent): Promise<Verdict> {
84
+ return this.ensure().evaluate(ev)
85
+ }
86
+ }
87
+
88
+ /** Best-effort read of this plugin's config out of the OpenClaw config tree. */
89
+ function readOptions(config: unknown): GuardrailsOptions | undefined {
90
+ const entries = (config as { plugins?: { entries?: Record<string, { config?: unknown }> } })?.plugins?.entries
91
+ return entries?.["openguardrails"]?.config as GuardrailsOptions | undefined
92
+ }
93
+
94
+ // Annotate via the importable `definePluginEntry` symbol so the emitted
95
+ // declaration does not inline OpenClaw's non-exported `DefinedPluginEntry`
96
+ // type (TS2742 portability).
97
+ const plugin: ReturnType<typeof definePluginEntry> = definePluginEntry({
98
+ id: "openguardrails",
99
+ name: "OpenGuardrails",
100
+ description:
101
+ "Enforce the OpenGuardrails (OGR) protocol on tool calls and channel traffic — block, rewrite, or require human approval under a policy you own.",
102
+ register(api) {
103
+ const guard = new GuardManager()
104
+
105
+ // Resolve the workspace-scoped policy once the Gateway is up.
106
+ api.on("gateway_start", (_event, ctx) => {
107
+ const c = ctx as { workspaceDir?: string; config?: unknown }
108
+ guard.configure(c.workspaceDir, readOptions(c.config))
109
+ })
110
+
111
+ // Core enforcement: every tool call, before it runs.
112
+ api.on(
113
+ "before_tool_call",
114
+ async (event, ctx) => {
115
+ const c = ctx as { agentId?: string; sessionKey?: string; channelId?: string }
116
+ const ev: GuardEvent = {
117
+ kind: "tool_call",
118
+ observationPoint: "agent_hook",
119
+ subject: {
120
+ agent_id: c.agentId ?? "openclaw",
121
+ agent_type: "openclaw",
122
+ session_id: c.sessionKey,
123
+ channel: c.channelId,
124
+ },
125
+ payload: { name: event.toolName, arguments: event.params },
126
+ eventId: id("evt"),
127
+ guardId: event.toolCallId ?? id("ga"),
128
+ timestamp: new Date().toISOString(),
129
+ sessionId: c.sessionKey,
130
+ // v0.1: the principal is trusted. Channel-inbound tainting
131
+ // (untrusted message/web/mcp content → untrusted provenance) is a
132
+ // follow-up via message_received correlation.
133
+ provenance: [{ source: "user", trust: "trusted" }],
134
+ }
135
+
136
+ const verdict = await guard.evaluate(ev)
137
+
138
+ if (verdict.decision === "block") {
139
+ return { block: true, blockReason: `[OpenGuardrails] ${brief(verdict)}` }
140
+ }
141
+ if (verdict.decision === "require_approval") {
142
+ return {
143
+ requireApproval: {
144
+ title: `Approve ${event.toolName}?`,
145
+ description: `[OpenGuardrails] ${brief(verdict)}`,
146
+ severity: "warning",
147
+ timeoutBehavior: "deny",
148
+ pluginId: "openguardrails",
149
+ },
150
+ }
151
+ }
152
+ // allow | modify | redact → proceed unchanged
153
+ return
154
+ },
155
+ { priority: 50 },
156
+ )
157
+
158
+ // Outbound guard: cancel a reply a deny verdict would forbid.
159
+ api.on("message_sending", async (event, ctx) => {
160
+ if (!guard.messagesEnabled) return
161
+ const e = event as { content?: string }
162
+ const c = ctx as { agentId?: string; sessionKey?: string; messageProvider?: string }
163
+ const ev: GuardEvent = {
164
+ kind: "model_output",
165
+ observationPoint: "gateway",
166
+ subject: { agent_id: c.agentId ?? "openclaw", agent_type: "openclaw", session_id: c.sessionKey },
167
+ payload: { content: e.content ?? "", channel: c.messageProvider },
168
+ eventId: id("evt"),
169
+ guardId: id("ga"),
170
+ timestamp: new Date().toISOString(),
171
+ sessionId: c.sessionKey,
172
+ provenance: [{ source: "model", trust: "unverified" }],
173
+ }
174
+
175
+ const verdict = await guard.evaluate(ev)
176
+ if (verdict.decision === "block" || verdict.decision === "require_approval") {
177
+ return {
178
+ cancel: true,
179
+ cancelReason: `openguardrails:${verdict.decision}`,
180
+ metadata: { reason: brief(verdict) },
181
+ }
182
+ }
183
+ return
184
+ })
185
+ },
186
+ })
187
+
188
+ export default plugin
189
+
190
+ export { DEFAULT_POLICY } from "./config.js"
191
+ export type { GuardrailsOptions, JudgeConfig } from "./config.js"
@@ -0,0 +1,40 @@
1
+ /**
2
+ * "Use your own model as the guardrail" — an OGR LLMBackend that calls any
3
+ * OpenAI-compatible chat-completions endpoint. Point it at the same model the
4
+ * assistant already uses, a cheaper sibling, or a dedicated guard model.
5
+ *
6
+ * Identical contract to the opencode/hermes integrations: one OpenAI-compatible
7
+ * POST, OGR does the rest.
8
+ */
9
+ import type { LLMBackend } from "@openguardrails/core"
10
+ import type { JudgeConfig } from "./config.js"
11
+
12
+ export function openAICompatibleBackend(cfg: JudgeConfig): LLMBackend {
13
+ const url = cfg.baseURL.replace(/\/+$/, "") + "/chat/completions"
14
+ return {
15
+ name: `own-model:${cfg.model}`,
16
+ async complete(system: string, user: string): Promise<string> {
17
+ const res = await fetch(url, {
18
+ method: "POST",
19
+ headers: {
20
+ "content-type": "application/json",
21
+ ...(cfg.apiKey ? { authorization: `Bearer ${cfg.apiKey}` } : {}),
22
+ ...(cfg.headers ?? {}),
23
+ },
24
+ body: JSON.stringify({
25
+ model: cfg.model,
26
+ temperature: 0,
27
+ messages: [
28
+ { role: "system", content: system },
29
+ { role: "user", content: user },
30
+ ],
31
+ }),
32
+ })
33
+ if (!res.ok) throw new Error(`guard model returned ${res.status}`)
34
+ const data = (await res.json()) as { choices?: Array<{ message?: { content?: string } }> }
35
+ const text = data.choices?.[0]?.message?.content ?? ""
36
+ // Strip a ```json fence if the model wrapped its reply.
37
+ return text.replace(/^```(?:json)?\s*/i, "").replace(/\s*```$/i, "").trim()
38
+ },
39
+ }
40
+ }