@safefence/openclaw-guardrails 0.6.1 → 0.6.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +414 -67
- package/dist/core/token-usage-store.d.ts +5 -0
- package/dist/core/token-usage-store.js +23 -19
- package/dist/plugin/event-adapter.d.ts +1 -9
- package/dist/plugin/openclaw-adapter.d.ts +1 -0
- package/dist/plugin/openclaw-adapter.js +6 -22
- package/dist/plugin/openclaw-extension.js +16 -28
- package/dist/plugin/version.d.ts +1 -0
- package/dist/plugin/version.js +1 -0
- package/dist/redaction/redact.js +18 -3
- package/openclaw.plugin.json +1 -1
- package/package.json +13 -2
package/README.md
CHANGED
|
@@ -1,5 +1,8 @@
|
|
|
1
1
|
# OpenClaw Guardrails
|
|
2
2
|
|
|
3
|
+
[](https://www.npmjs.com/package/@safefence/openclaw-guardrails)
|
|
4
|
+
[](https://docs.npmjs.com/generating-provenance-statements)
|
|
5
|
+
|
|
3
6
|
> **Experimental** -- This project is under active development and not yet production-ready. APIs, config schemas, and behavior may change without notice between releases.
|
|
4
7
|
|
|
5
8
|
Native TypeScript security kernel for OpenClaw (`>=2026.2.25`) with deterministic local enforcement, principal-aware authorization, and owner approval for group/multi-user safety.
|
|
@@ -19,6 +22,200 @@ Native TypeScript security kernel for OpenClaw (`>=2026.2.25`) with deterministi
|
|
|
19
22
|
- Zero runtime dependencies — uses only Node.js built-ins (`fetch()`, `fs`).
|
|
20
23
|
- Audit mode still applies redaction by default.
|
|
21
24
|
|
|
25
|
+
## How It Works
|
|
26
|
+
|
|
27
|
+
### Plugin ↔ Engine Flow
|
|
28
|
+
|
|
29
|
+
The plugin has three layers: `openclaw-extension.ts` registers typed hooks with OpenClaw, `event-adapter.ts` maps between OpenClaw's structured `(event, ctx)` pairs and the internal `OpenClawContext`, and `openclaw-adapter.ts` converts contexts into `GuardEvent`s for the engine.
|
|
30
|
+
|
|
31
|
+
```
|
|
32
|
+
OpenClaw Runtime
|
|
33
|
+
│
|
|
34
|
+
▼
|
|
35
|
+
openclaw-extension.ts ──► api.on(hookName, handler)
|
|
36
|
+
│
|
|
37
|
+
├──► event-adapter.ts: map*(event, ctx) → OpenClawContext
|
|
38
|
+
│
|
|
39
|
+
▼
|
|
40
|
+
openclaw-adapter.ts
|
|
41
|
+
│ toEvent(phase, ctx) → GuardEvent
|
|
42
|
+
│
|
|
43
|
+
▼
|
|
44
|
+
GuardrailsEngine ──► engine.evaluate(guardEvent, phase)
|
|
45
|
+
│
|
|
46
|
+
▼
|
|
47
|
+
GuardDecision
|
|
48
|
+
│
|
|
49
|
+
▼
|
|
50
|
+
openclaw-adapter.ts
|
|
51
|
+
│ applyRolloutPolicy()
|
|
52
|
+
│ updateMetrics()
|
|
53
|
+
│
|
|
54
|
+
▼
|
|
55
|
+
OpenClawHookResult
|
|
56
|
+
│
|
|
57
|
+
▼
|
|
58
|
+
event-adapter.ts: mapTo*Result(hookResult) → typed result
|
|
59
|
+
│
|
|
60
|
+
▼
|
|
61
|
+
OpenClaw Runtime ◄── hook-specific return value
|
|
62
|
+
```
|
|
63
|
+
|
|
64
|
+
### Hook Lifecycle
|
|
65
|
+
|
|
66
|
+
Six lifecycle hooks span the full agent interaction. Each hook has different blocking/redaction capabilities:
|
|
67
|
+
|
|
68
|
+
```
|
|
69
|
+
User / Channel OpenClaw Guardrails Plugin
|
|
70
|
+
│ │ │
|
|
71
|
+
│ ┌──────────────────────────────────────────────────────┐
|
|
72
|
+
│ │ 1. Agent Initialization │
|
|
73
|
+
│ │ OC ──► before_agent_start(prompt, agentCtx) │
|
|
74
|
+
│ │ OC ◄── { prependSystemContext: securityPolicy } │
|
|
75
|
+
│ │ Injects immutable security prompt │
|
|
76
|
+
│ └──────────────────────────────────────────────────────┘
|
|
77
|
+
│ │ │
|
|
78
|
+
│ ┌──────────────────────────────────────────────────────┐
|
|
79
|
+
│ │ 2. Inbound Message │
|
|
80
|
+
│ ──► │ OC ──► message_received(from, content, channelCtx)│
|
|
81
|
+
│ │ OC ◄── void (observe-only, cannot block) │
|
|
82
|
+
│ │ Audits violations, defers enforcement │
|
|
83
|
+
│ └──────────────────────────────────────────────────────┘
|
|
84
|
+
│ │ │
|
|
85
|
+
│ ┌──────────────────────────────────────────────────────┐
|
|
86
|
+
│ │ 3. Tool Execution Gate │
|
|
87
|
+
│ │ OC ──► before_tool_call(toolName, params, agentCtx│
|
|
88
|
+
│ │ OC ◄── { block: true, blockReason } or {} │
|
|
89
|
+
│ │ *** Primary enforcement point *** │
|
|
90
|
+
│ └──────────────────────────────────────────────────────┘
|
|
91
|
+
│ │ │
|
|
92
|
+
│ ┌──────────────────────────────────────────────────────┐
|
|
93
|
+
│ │ 4. Tool Result Persistence │
|
|
94
|
+
│ │ OC ──► tool_result_persist(message, toolCtx) │
|
|
95
|
+
│ │ OC ◄── { message: { content: redacted } } or {} │
|
|
96
|
+
│ │ Sync regex redaction; async engine eval for audit │
|
|
97
|
+
│ └──────────────────────────────────────────────────────┘
|
|
98
|
+
│ │ │
|
|
99
|
+
│ ┌──────────────────────────────────────────────────────┐
|
|
100
|
+
│ │ 5. Outbound Message Gate │
|
|
101
|
+
│ │ OC ──► message_sending(content, channelCtx) │
|
|
102
|
+
│ │ OC ◄── { cancel: true } or { content: redacted } │
|
|
103
|
+
│ │ Blocks system prompt leaks │
|
|
104
|
+
│ │ Always enforced in stage_b rollout │
|
|
105
|
+
│ └──────────────────────────────────────────────────────┘
|
|
106
|
+
│ │ │
|
|
107
|
+
│ ┌──────────────────────────────────────────────────────┐
|
|
108
|
+
│ │ 6. Session End │
|
|
109
|
+
│ │ OC ──► agent_end(messages, success, agentCtx) │
|
|
110
|
+
│ │ OC ◄── void (observe-only) │
|
|
111
|
+
│ │ Emits metrics + monitoring snapshot │
|
|
112
|
+
│ └──────────────────────────────────────────────────────┘
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
### Hook Capability Matrix
|
|
116
|
+
|
|
117
|
+
| Hook | Can Block | Can Redact | Can Cancel | Return Type |
|
|
118
|
+
|---|---|---|---|---|
|
|
119
|
+
| `before_agent_start` | No | No | No | `{ prependSystemContext }` |
|
|
120
|
+
| `message_received` | No (void) | No | No | void |
|
|
121
|
+
| `before_tool_call` | **Yes** | No | No | `{ block, blockReason }` |
|
|
122
|
+
| `tool_result_persist` | No | **Yes** (sync) | No | `{ message }` |
|
|
123
|
+
| `message_sending` | **Yes** | **Yes** | **Yes** | `{ cancel }` or `{ content }` |
|
|
124
|
+
| `agent_end` | No (void) | No | No | void |
|
|
125
|
+
|
|
126
|
+
### Detector Pipeline
|
|
127
|
+
|
|
128
|
+
All 12 detectors run sequentially for every `engine.evaluate()` call. No short-circuiting — an early DENY does not skip later detectors. All hits are merged, then `DENY > REDACT > ALLOW` precedence determines the outcome.
|
|
129
|
+
|
|
130
|
+
```
|
|
131
|
+
Engine.evaluate()
|
|
132
|
+
│
|
|
133
|
+
│ normalizeGuardEvent(rawEvent)
|
|
134
|
+
│
|
|
135
|
+
├──► D1 Input Intent ── size limits, injection, exfil, context probes ──► hits[]
|
|
136
|
+
├──► D2 Command Policy ── tool allowlist, binary allowlist, shell ops ──► hits[]
|
|
137
|
+
│ (before_tool_call only)
|
|
138
|
+
├──► D3 Path Canonical ── path traversal, workspace boundary, symlinks ──► hits[]
|
|
139
|
+
│ (async realpath, before_tool_call only)
|
|
140
|
+
├──► D4 Network Egress ── host allowlist, private egress, DNS ──► hits[]
|
|
141
|
+
│ (async DNS, before_tool_call only)
|
|
142
|
+
├──► D5 Provenance ── supply chain trust + retrieval trust ──► hits[]
|
|
143
|
+
│ (async, before_tool_call only)
|
|
144
|
+
├──► D6 Principal Authz ── identity, RBAC, mention-gating ──► hits[] + approvalRequirement?
|
|
145
|
+
│ (anti-spoofing: owner/admin derived from config only)
|
|
146
|
+
├──► D7 Owner Approval ── challenge/verify approval token ──► hits[] + approvalChallenge?
|
|
147
|
+
│ (only runs if D6 returned approvalRequirement)
|
|
148
|
+
├──► D8 Sensitive Data ── secret patterns → PII patterns (cascaded) ──► hits[] + redactedContent?
|
|
149
|
+
├──► D9 Restricted Info ── data-class redaction for non-owner principals ──► hits[] + redactedContent?
|
|
150
|
+
├──► D10 Output Safety ── prompt leak, injected filenames, suspicious patterns ──► hits[] + redactedContent?
|
|
151
|
+
│ (receives pre-redacted content from D9/D8)
|
|
152
|
+
├──► D11 Budget ── requests/min + tool calls/min (sliding window) ──► hits[]
|
|
153
|
+
└──► D12 Extensions ── external HTTP + custom validators ──► hits[]
|
|
154
|
+
(concurrent via Promise.all, custom validators fail-open)
|
|
155
|
+
│
|
|
156
|
+
│ decideFromHits(): DENY > REDACT > ALLOW
|
|
157
|
+
│ aggregateRisk(): 1 - exp(-weighted_sum)
|
|
158
|
+
│ finalizeDecision(): audit mode override
|
|
159
|
+
│ auditSink.append() if enabled
|
|
160
|
+
▼
|
|
161
|
+
```
|
|
162
|
+
|
|
163
|
+
#### Detector Details
|
|
164
|
+
|
|
165
|
+
| # | Detector | Active Phases | What It Checks | Decision | Weight |
|
|
166
|
+
|---|---|---|---|---|---|
|
|
167
|
+
| 1 | Input Intent | All | Input size limits, prompt injection patterns, exfiltration patterns, context probing (injected filenames, workspace probing) | DENY | 0.75–0.95 |
|
|
168
|
+
| 2 | Command Policy | `before_tool_call` | Tool allowlist, binary allowlist, shell operators, destructive command patterns, arg pattern validation | DENY | 0.8–1.0 |
|
|
169
|
+
| 3 | Path Canonical | `before_tool_call` | Path traversal patterns, workspace boundary (realpath), symlink traversal | DENY | 0.9–0.95 |
|
|
170
|
+
| 4 | Network Egress | `before_tool_call` | Host allowlist, private/local IP blocking, DNS resolution, egress tool detection | DENY | 0.7–0.9 |
|
|
171
|
+
| 5 | Provenance | `before_tool_call` | Skill source trust, hash integrity, retrieval trust level, signed source | DENY | 0.7–0.85 |
|
|
172
|
+
| 6 | Principal Authz | All | Identity resolution, role-based tool policy, mention-gating, group channel enforcement, data-class restrictions | DENY | 0.7–0.95 |
|
|
173
|
+
| 7 | Owner Approval | Conditional | Challenge creation, token verification (TTL, digest, conversation, replay) | DENY | 0.8–0.9 |
|
|
174
|
+
| 8 | Sensitive Data | All | Secret patterns (AWS keys, GitHub PATs, PEM keys, etc.), PII patterns (emails, SSNs, credit cards) | REDACT | 0.5–0.7 |
|
|
175
|
+
| 9 | Restricted Info | `message_received`, `tool_result_persist`, `message_sending` | Data-class policy for non-owner principals, cross-principal redaction | DENY/REDACT | 0.7–0.9 |
|
|
176
|
+
| 10 | Output Safety | `message_received`, `tool_result_persist`, `message_sending` | System prompt leak patterns, injected filename references, suspicious patterns (script tags, bearer tokens) | DENY/REDACT | 0.55–0.95 |
|
|
177
|
+
| 11 | Budget | All (tool calls: `before_tool_call` only) | Requests/minute, tool calls/minute (sliding 60s window, per-principal partitioned) | DENY | 0.65–0.75 |
|
|
178
|
+
| 12 | Extensions | All | External HTTP validators (circuit breaker, timeout), custom validator functions (phase-filtered) | DENY | 0.5–0.7 |
|
|
179
|
+
|
|
180
|
+
### Risk Scoring
|
|
181
|
+
|
|
182
|
+
Risk score formula: `1 - exp(-Σ(clamp(weight, 0, 1) × multiplier))` where DENY multiplier = 1.0, REDACT multiplier = 0.6. This produces a diminishing-returns curve: many small hits converge toward 1.0 but never exceed it. Rounded to 4 decimal places.
|
|
183
|
+
|
|
184
|
+
### Decision Finalization
|
|
185
|
+
|
|
186
|
+
```
|
|
187
|
+
All RuleHits merged
|
|
188
|
+
│
|
|
189
|
+
▼
|
|
190
|
+
Any DENY hit? ──Yes──► decision = DENY ──┐
|
|
191
|
+
│ No │
|
|
192
|
+
▼ ▼
|
|
193
|
+
Any REDACT hit? ──Yes──► decision = REDACT ──► mode = audit?
|
|
194
|
+
│ No │
|
|
195
|
+
▼ Yes ─┤── No
|
|
196
|
+
decision = ALLOW │ │
|
|
197
|
+
│ ▼ ▼
|
|
198
|
+
│ Override to ALLOW Return as-is
|
|
199
|
+
│ + AUDIT_WOULD_DENY with enforcement
|
|
200
|
+
│ + redact only if │
|
|
201
|
+
│ applyInAuditMode │
|
|
202
|
+
│ │ │
|
|
203
|
+
└─────────────────────────────────────┴─────────────────┘
|
|
204
|
+
│
|
|
205
|
+
▼
|
|
206
|
+
Return GuardDecision
|
|
207
|
+
```
|
|
208
|
+
|
|
209
|
+
### Rollout Stages
|
|
210
|
+
|
|
211
|
+
```
|
|
212
|
+
stage_a_audit ──────────────────► stage_b_high_risk_enforce ──────────► stage_c_full_enforce ──► Production
|
|
213
|
+
All violations message_sending: always enforce All violations
|
|
214
|
+
audit-only before_tool_call: enforce if enforced
|
|
215
|
+
highRiskTools
|
|
216
|
+
others: audit-only
|
|
217
|
+
```
|
|
218
|
+
|
|
22
219
|
## Security Features
|
|
23
220
|
|
|
24
221
|
### Identity and Authorization
|
|
@@ -26,37 +223,143 @@ Native TypeScript security kernel for OpenClaw (`>=2026.2.25`) with deterministi
|
|
|
26
223
|
- **Anti-spoofing**: privileged roles (`owner`/`admin`) are derived exclusively from `principal.ownerIds`/`adminIds` in config — caller-supplied `metadata.role` values of `"owner"` or `"admin"` are downgraded to `"member"`.
|
|
27
224
|
- Group-aware authorization (mention-gating + role-based tool policy).
|
|
28
225
|
|
|
29
|
-
### Approval Workflow
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
226
|
+
### Owner Approval Workflow
|
|
227
|
+
|
|
228
|
+
```
|
|
229
|
+
┌─────────────────────────────────────────────────────────────────────────┐
|
|
230
|
+
│ Phase 1: Challenge │
|
|
231
|
+
│ │
|
|
232
|
+
│ Agent ──► Engine: before_tool_call (restricted tool, member role) │
|
|
233
|
+
│ Engine ──► D6 Principal Authz: evaluateAuthorization() │
|
|
234
|
+
│ D6 ◄──── approvalRequirement (requiredRole, reason) │
|
|
235
|
+
│ Engine ──► D7 Owner Approval: detectOwnerApproval(requirement) │
|
|
236
|
+
│ D7 ──► ApprovalBroker: createChallenge(toolName, args, requesterId)│
|
|
237
|
+
│ ApprovalBroker: requestId = randomUUID() │
|
|
238
|
+
│ ApprovalBroker: actionDigest = SHA-256({toolName, args, ...}) │
|
|
239
|
+
│ ApprovalBroker ──► ApprovalStore: save(record, expiresAt) │
|
|
240
|
+
│ ApprovalBroker ──► NotificationSink: notify({requestId, ...}) │
|
|
241
|
+
│ D7 ◄── { requestId, expiresAt, requiredRole } │
|
|
242
|
+
│ Engine ◄── DENY + approvalChallenge │
|
|
243
|
+
│ Agent ◄── DENY with approvalChallenge.requestId │
|
|
244
|
+
└─────────────────────────────────────────────────────────────────────────┘
|
|
245
|
+
|
|
246
|
+
┌─────────────────────────────────────────────────────────────────────────┐
|
|
247
|
+
│ Phase 2: Approval │
|
|
248
|
+
│ │
|
|
249
|
+
│ Owner ──► Engine: /approve <requestId> │
|
|
250
|
+
│ Engine ──► ApprovalBroker: approveRequest(requestId, ownerId, "owner")│
|
|
251
|
+
│ ApprovalBroker ──► ApprovalStore: lookup(requestId) │
|
|
252
|
+
│ ApprovalBroker: Verify not expired, role sufficient, not self │
|
|
253
|
+
│ ApprovalBroker: Check quorum (approverIds.length >= ownerQuorum?) │
|
|
254
|
+
│ ApprovalBroker: Generate token: apr_<uuid> │
|
|
255
|
+
│ ApprovalBroker ──► ApprovalStore: setToken(requestId, token) │
|
|
256
|
+
│ Engine ◄── token string │
|
|
257
|
+
│ Owner ◄── "Approved. Token: apr_..." │
|
|
258
|
+
└─────────────────────────────────────────────────────────────────────────┘
|
|
259
|
+
|
|
260
|
+
┌─────────────────────────────────────────────────────────────────────────┐
|
|
261
|
+
│ Phase 3: Redemption │
|
|
262
|
+
│ │
|
|
263
|
+
│ Agent ──► Engine: before_tool_call (same tool + approval.token) │
|
|
264
|
+
│ Engine ──► D7: detectOwnerApproval(requirement) │
|
|
265
|
+
│ D7 ──► ApprovalBroker: verifyAndConsumeToken(token) │
|
|
266
|
+
│ Verify: not expired, not used, conversation match │
|
|
267
|
+
│ Verify: action digest match (same tool + args) │
|
|
268
|
+
│ ApprovalStore: markUsed(requestId) │
|
|
269
|
+
│ D7 ◄── "valid" │
|
|
270
|
+
│ Engine ◄── no hits (ALLOW) │
|
|
271
|
+
│ Agent ◄── ALLOW │
|
|
272
|
+
└─────────────────────────────────────────────────────────────────────────┘
|
|
273
|
+
|
|
274
|
+
┌─────────────────────────────────────────────────────────────────────────┐
|
|
275
|
+
│ Replay Prevention │
|
|
276
|
+
│ │
|
|
277
|
+
│ Agent ──► Engine: before_tool_call (same token again) │
|
|
278
|
+
│ D7 ──► ApprovalBroker: verifyAndConsumeToken(token) │
|
|
279
|
+
│ Token already has usedAt timestamp │
|
|
280
|
+
│ D7 ◄── "replayed" │
|
|
281
|
+
│ Engine ◄── DENY (OWNER_APPROVAL_REPLAYED) │
|
|
282
|
+
│ Agent ◄── DENY │
|
|
283
|
+
└─────────────────────────────────────────────────────────────────────────┘
|
|
284
|
+
```
|
|
285
|
+
|
|
286
|
+
**Approval verification checks** (in order):
|
|
287
|
+
1. Token exists and maps to a valid record
|
|
288
|
+
2. Record not expired (TTL from creation)
|
|
289
|
+
3. Token not already consumed (`usedAt` is null)
|
|
290
|
+
4. RequestId matches (if provided by caller)
|
|
291
|
+
5. Requester identity matches original requester
|
|
292
|
+
6. Conversation matches (if `bindToConversation` enabled)
|
|
293
|
+
7. Action digest matches (SHA-256 of tool + args + context)
|
|
294
|
+
|
|
295
|
+
### Outbound Guard (System Prompt Leak Prevention)
|
|
296
|
+
|
|
297
|
+
```
|
|
298
|
+
Agent ──► Adapter: message_sending(context)
|
|
299
|
+
│
|
|
300
|
+
│ extractOutboundContent()
|
|
301
|
+
│ (scans ALL string fields, not just "content")
|
|
302
|
+
│
|
|
303
|
+
▼
|
|
304
|
+
Adapter ──► Engine: evaluate(guardEvent, "message_sending")
|
|
305
|
+
│
|
|
306
|
+
▼
|
|
307
|
+
Engine ──► D10 Output Safety: check leak patterns + injected filenames
|
|
308
|
+
│
|
|
309
|
+
├── System prompt content detected:
|
|
310
|
+
│ D10 → DENY (SYSTEM_PROMPT_LEAK, weight 0.95)
|
|
311
|
+
│ Agent ◄── { cancel: true }
|
|
312
|
+
│
|
|
313
|
+
├── Suspicious patterns (script tags, tokens):
|
|
314
|
+
│ D10 → REDACT (UNTRUSTED_OUTPUT, weight 0.55)
|
|
315
|
+
│ Agent ◄── { content: redactedContent }
|
|
316
|
+
│
|
|
317
|
+
└── Clean:
|
|
318
|
+
D10 → no hits → ALLOW
|
|
319
|
+
Agent ◄── {}
|
|
320
|
+
```
|
|
321
|
+
|
|
322
|
+
### `tool_result_persist` — Split Sync/Async Strategy
|
|
323
|
+
|
|
324
|
+
This hook is synchronous in OpenClaw but the engine is async. The adapter splits the work:
|
|
325
|
+
|
|
326
|
+
```
|
|
327
|
+
OpenClaw (sync) ──► Extension: tool_result_persist(event, ctx)
|
|
328
|
+
│
|
|
329
|
+
├── [Sync path — returns to OpenClaw immediately]
|
|
330
|
+
│ Extension: redactWithPatterns(content, precompiled patterns)
|
|
331
|
+
│ OpenClaw ◄── { message: { content: redacted } } or {}
|
|
332
|
+
│
|
|
333
|
+
└── [Async path — fire-and-forget]
|
|
334
|
+
Extension ──► Adapter: hooks.tool_result_persist(oclCtx)
|
|
335
|
+
Adapter: engine.evaluate() + metrics
|
|
336
|
+
Adapter ──► AuditSink: auditSink.append()
|
|
337
|
+
(Promise .catch() logs errors)
|
|
338
|
+
```
|
|
339
|
+
|
|
340
|
+
### Reason Code Sanitization
|
|
341
|
+
|
|
342
|
+
Sensitive reason codes are replaced before reaching the client to prevent detection fingerprinting:
|
|
343
|
+
|
|
344
|
+
| Internal Code | Client-Facing Code |
|
|
345
|
+
|---|---|
|
|
346
|
+
| `SECRET_DETECTED` | `CONTENT_POLICY_VIOLATION` |
|
|
347
|
+
| `PII_DETECTED` | `CONTENT_POLICY_VIOLATION` |
|
|
348
|
+
| `EXFIL_PATTERN` | `CONTENT_POLICY_VIOLATION` |
|
|
349
|
+
| `SYSTEM_PROMPT_LEAK` | `CONTENT_POLICY_VIOLATION` |
|
|
350
|
+
|
|
351
|
+
All other reason codes pass through unchanged.
|
|
352
|
+
|
|
353
|
+
### Redaction Cascade
|
|
354
|
+
|
|
355
|
+
Sensitive data, restricted info, and output safety detectors produce redacted content in a priority chain:
|
|
356
|
+
|
|
357
|
+
```
|
|
358
|
+
D8: Sensitive Data ──► D9: Restricted Info ──► D10: Output Safety ──► Engine picks:
|
|
359
|
+
(secrets → PII) (data-class policy) (leak patterns) D10 > D9 > D8
|
|
360
|
+
│ │ │
|
|
361
|
+
└── redactedContent ──► └── redactedContent ──► └── Final redactedContent
|
|
362
|
+
```
|
|
60
363
|
|
|
61
364
|
## Architecture
|
|
62
365
|
|
|
@@ -101,27 +404,28 @@ src/
|
|
|
101
404
|
│ ├── restricted-info-detector.ts # Non-privileged group redaction
|
|
102
405
|
│ └── sensitive-data-detector.ts # Secret/PII detection
|
|
103
406
|
├── plugin/
|
|
104
|
-
│ ├──
|
|
105
|
-
│
|
|
407
|
+
│ ├── version.ts # Shared version constant
|
|
408
|
+
│ ├── event-adapter.ts # OpenClaw typed hook ↔ internal context mapping
|
|
409
|
+
│ ├── openclaw-adapter.ts # Core guardrails engine adapter + telemetry
|
|
410
|
+
│ └── openclaw-extension.ts # Plugin entry point (api.on() typed hooks)
|
|
106
411
|
├── redaction/
|
|
107
|
-
│ └── redact.ts # Secret/PII redaction engine
|
|
412
|
+
│ └── redact.ts # Secret/PII redaction engine (cached regex)
|
|
108
413
|
└── rules/
|
|
109
414
|
├── default-policy.ts # Default config factory + merge
|
|
110
415
|
└── patterns.ts # Detection pattern definitions
|
|
111
416
|
```
|
|
112
417
|
|
|
113
|
-
##
|
|
418
|
+
## Provenance
|
|
114
419
|
|
|
115
|
-
|
|
116
|
-
2. Engine returns `DENY` with `OWNER_APPROVAL_REQUIRED` and `approvalChallenge`.
|
|
117
|
-
3. Owner/admin approves out-of-band and issues one-time token.
|
|
118
|
-
4. Caller retries with `metadata.approval.token` (and optionally `requestId`).
|
|
119
|
-
5. Engine verifies TTL, digest, conversation binding, requester identity binding, requestId (when provided), and replay status.
|
|
120
|
-
6. Valid token allows reevaluation and execution.
|
|
420
|
+
This package is published with [npm provenance](https://docs.npmjs.com/generating-provenance-statements) via GitHub Actions. Every published version includes a signed attestation linking the tarball to the exact source commit and build workflow in this repository.
|
|
121
421
|
|
|
122
|
-
|
|
422
|
+
You can verify provenance for any version:
|
|
123
423
|
|
|
124
|
-
|
|
424
|
+
```bash
|
|
425
|
+
npm audit signatures
|
|
426
|
+
```
|
|
427
|
+
|
|
428
|
+
The publish workflow (`.github/workflows/publish.yml`) uses GitHub's OIDC token (`id-token: write`) to generate Sigstore-backed provenance statements automatically — no manual signing keys are involved.
|
|
125
429
|
|
|
126
430
|
## Install in OpenClaw
|
|
127
431
|
|
|
@@ -158,7 +462,13 @@ After changing plugin install/config, restart the OpenClaw service or gateway pr
|
|
|
158
462
|
Three main entry points:
|
|
159
463
|
|
|
160
464
|
```ts
|
|
161
|
-
// 1.
|
|
465
|
+
// 1. OpenClaw plugin — default export, auto-discovered by OpenClaw via
|
|
466
|
+
// package.json "openclaw.extensions". Registers all typed hooks via api.on().
|
|
467
|
+
import { openclawGuardrailsPlugin } from "@safefence/openclaw-guardrails";
|
|
468
|
+
// openclawGuardrailsPlugin.register(api) is called automatically by OpenClaw.
|
|
469
|
+
|
|
470
|
+
// 2. Plugin factory — returns a guardrails engine with hook handlers,
|
|
471
|
+
// useful for testing or manual integration.
|
|
162
472
|
import { createOpenClawGuardrailsPlugin } from "@safefence/openclaw-guardrails";
|
|
163
473
|
|
|
164
474
|
const plugin = createOpenClawGuardrailsPlugin({
|
|
@@ -170,10 +480,6 @@ const plugin = createOpenClawGuardrailsPlugin({
|
|
|
170
480
|
// Out-of-band owner approval
|
|
171
481
|
const token = plugin.approveRequest(requestId, "owner-user-id", "owner");
|
|
172
482
|
|
|
173
|
-
// 2. OpenClaw extension entry — auto-registers all hooks from plugin config
|
|
174
|
-
import { registerOpenClawGuardrails } from "@safefence/openclaw-guardrails";
|
|
175
|
-
registerOpenClawGuardrails(api);
|
|
176
|
-
|
|
177
483
|
// 3. Engine directly — for custom integrations outside OpenClaw
|
|
178
484
|
import { GuardrailsEngine } from "@safefence/openclaw-guardrails";
|
|
179
485
|
const engine = new GuardrailsEngine(config);
|
|
@@ -242,6 +548,58 @@ const engine = new GuardrailsEngine(config, { customValidators: [spendingLimit]
|
|
|
242
548
|
|
|
243
549
|
| Section | Key | Type | Default | Description |
|
|
244
550
|
|---------|-----|------|---------|-------------|
|
|
551
|
+
| *(root)* | `mode` | `"enforce" \| "audit"` | `"enforce"` | Whether violations block or just log |
|
|
552
|
+
| *(root)* | `failClosed` | `boolean` | `true` | On engine error: DENY (true) or ALLOW (false) |
|
|
553
|
+
| *(root)* | `workspaceRoot` | `string` | `process.cwd()` | Anchor for path resolution |
|
|
554
|
+
| `allow` | `tools` | `string[]` | 8 tools | Allowed tool names |
|
|
555
|
+
| `allow` | `commands` | `CommandEntry[]` | 6 binaries | Allowed binaries with optional argPattern |
|
|
556
|
+
| `allow` | `writablePaths` | `string[]` | `[workspaceRoot]` | Filesystem write boundary |
|
|
557
|
+
| `allow` | `networkHosts` | `string[]` | localhost only | Allowed egress hosts |
|
|
558
|
+
| `allow` | `allowPrivateEgress` | `boolean` | `false` | Allow RFC 1918 / loopback destinations |
|
|
559
|
+
| `deny` | `commandPatterns` | `string[]` | 8 patterns | Destructive command regexes |
|
|
560
|
+
| `deny` | `pathPatterns` | `string[]` | 8 patterns | Path traversal regexes |
|
|
561
|
+
| `deny` | `promptInjectionPatterns` | `string[]` | 6 patterns | Injection attempt regexes |
|
|
562
|
+
| `deny` | `exfiltrationPatterns` | `string[]` | 4 patterns | Data exfiltration regexes |
|
|
563
|
+
| `deny` | `shellOperatorPatterns` | `string[]` | 9 patterns | Shell chaining/redirect regexes |
|
|
564
|
+
| `redaction` | `secretPatterns` | `string[]` | 7 patterns | Secret detection regexes (AWS, GitHub, PEM, etc.) |
|
|
565
|
+
| `redaction` | `piiPatterns` | `string[]` | 4 patterns | PII detection regexes (email, SSN, CC, phone) |
|
|
566
|
+
| `redaction` | `replacement` | `string` | `"[REDACTED]"` | Replacement string for matches |
|
|
567
|
+
| `redaction` | `applyInAuditMode` | `boolean` | `true` | Redact even when mode=audit |
|
|
568
|
+
| `limits` | `maxInputChars` | `number` | `20000` | Max input content length |
|
|
569
|
+
| `limits` | `maxToolArgChars` | `number` | `10000` | Max serialized tool args length |
|
|
570
|
+
| `limits` | `maxOutputChars` | `number` | `50000` | Max tool output length |
|
|
571
|
+
| `limits` | `maxRequestsPerMinute` | `number` | `120` | Rate limit: requests per 60s window |
|
|
572
|
+
| `limits` | `maxToolCallsPerMinute` | `number` | `60` | Rate limit: tool calls per 60s window |
|
|
573
|
+
| `pathPolicy` | `enforceCanonicalRealpath` | `boolean` | `true` | Resolve symlinks and verify workspace boundary |
|
|
574
|
+
| `pathPolicy` | `denySymlinkTraversal` | `boolean` | `true` | Block symlinks that escape workspace |
|
|
575
|
+
| `supplyChain` | `trustedSkillSources` | `string[]` | — | Allowed skill installation domains |
|
|
576
|
+
| `supplyChain` | `requireSkillHash` | `boolean` | `true` | Require hash for remote skills |
|
|
577
|
+
| `supplyChain` | `allowedSkillHashes` | `string[]` | — | Pre-approved skill hashes |
|
|
578
|
+
| `principal` | `requireContext` | `boolean` | `true` | Require identity context |
|
|
579
|
+
| `principal` | `ownerIds` | `string[]` | `[]` | User IDs with owner privilege |
|
|
580
|
+
| `principal` | `adminIds` | `string[]` | `[]` | User IDs with admin privilege |
|
|
581
|
+
| `principal` | `failUnknownInGroup` | `boolean` | `true` | Deny unknown users in group channels |
|
|
582
|
+
| `authorization` | `defaultEffect` | `"deny" \| "allow"` | `"deny"` | Default when no explicit rule matches |
|
|
583
|
+
| `authorization` | `requireMentionInGroups` | `boolean` | `true` | Require @mention for group messages |
|
|
584
|
+
| `authorization` | `restrictedTools` | `string[]` | 6 tools | Tools requiring elevated role or approval |
|
|
585
|
+
| `authorization` | `restrictedDataClasses` | `string[]` | — | Data classes requiring elevated access |
|
|
586
|
+
| `authorization` | `toolAllowByRole` | `Record<Role, string[]>` | Role-tiered | Per-role tool access lists |
|
|
587
|
+
| `approval` | `enabled` | `boolean` | `true` | Enable owner approval workflow |
|
|
588
|
+
| `approval` | `ttlSeconds` | `number` | `300` | Approval challenge TTL |
|
|
589
|
+
| `approval` | `requireForTools` | `string[]` | 6 tools | Tools requiring approval |
|
|
590
|
+
| `approval` | `requireForDataClasses` | `string[]` | `["restricted", "secret"]` | Data classes requiring approval |
|
|
591
|
+
| `approval` | `ownerQuorum` | `number` | `1` | Number of approvers required |
|
|
592
|
+
| `approval` | `bindToConversation` | `boolean` | `true` | Bind token to originating conversation |
|
|
593
|
+
| `approval` | `storagePath` | `string?` | — | JSON file for persistent approvals |
|
|
594
|
+
| `tenancy` | `budgetKeyMode` | `string` | `"agent+principal+conversation"` | Budget partitioning strategy |
|
|
595
|
+
| `tenancy` | `redactCrossPrincipalOutput` | `boolean` | `true` | Redact vs deny for restricted data |
|
|
596
|
+
| `outboundGuard` | `enabled` | `boolean` | `true` | Enable outbound leak prevention |
|
|
597
|
+
| `outboundGuard` | `systemPromptLeakPatterns` | `string[]` | 8 patterns | Patterns indicating prompt leakage |
|
|
598
|
+
| `outboundGuard` | `injectedFileNames` | `string[]` | 9 names | Config filenames to block in output |
|
|
599
|
+
| `rollout` | `stage` | `RolloutStage` | `"stage_c_full_enforce"` | Current enforcement stage |
|
|
600
|
+
| `rollout` | `highRiskTools` | `string[]` | — | Tools enforced in stage B |
|
|
601
|
+
| `monitoring` | `falsePositiveThresholdPct` | `number` | `3` | False positive rate threshold |
|
|
602
|
+
| `monitoring` | `consecutiveDaysForTuning` | `number` | `2` | Days above threshold before signaling |
|
|
245
603
|
| `audit` | `enabled` | `boolean` | `false` | Enable JSONL audit trail |
|
|
246
604
|
| `audit` | `sinkPath` | `string?` | — | File path for JSONL audit events |
|
|
247
605
|
| `externalValidation` | `enabled` | `boolean` | `false` | Enable HTTP external validators |
|
|
@@ -254,27 +612,16 @@ const engine = new GuardrailsEngine(config, { customValidators: [spendingLimit]
|
|
|
254
612
|
| `notifications` | `enabled` | `boolean` | `false` | Enable approval notifications |
|
|
255
613
|
| `notifications` | `adminChannelId` | `string?` | — | Target channel for notifications |
|
|
256
614
|
|
|
257
|
-
##
|
|
258
|
-
|
|
259
|
-
Most config has secure defaults. Override only what you need:
|
|
260
|
-
|
|
261
|
-
```ts
|
|
262
|
-
const plugin = createOpenClawGuardrailsPlugin({
|
|
263
|
-
workspaceRoot: "/workspace/project",
|
|
264
|
-
principal: {
|
|
265
|
-
ownerIds: ["owner-user-id"],
|
|
266
|
-
adminIds: ["admin-user-id"]
|
|
267
|
-
},
|
|
268
|
-
approval: {
|
|
269
|
-
enabled: true,
|
|
270
|
-
storagePath: "/workspace/project/.openclaw/approval-store.json"
|
|
271
|
-
}
|
|
272
|
-
});
|
|
273
|
-
```
|
|
615
|
+
## Migration
|
|
274
616
|
|
|
275
|
-
|
|
617
|
+
### v0.6.0 → v0.6.1
|
|
276
618
|
|
|
277
|
-
|
|
619
|
+
1. **Plugin API alignment**: The plugin now uses OpenClaw's typed hook system (`api.on()`) instead of `api.registerHook()`. Security decisions (block, cancel, redact) are now properly honoured by OpenClaw's pipeline — previously they were silently discarded.
|
|
620
|
+
2. **New event adapter layer**: `src/plugin/event-adapter.ts` bridges OpenClaw's structured `(event, ctx)` hook pairs to the internal `OpenClawContext`. No changes needed for users of `createOpenClawGuardrailsPlugin()` or `GuardrailsEngine` directly.
|
|
621
|
+
3. **Plugin export**: The default export is now an `{ id, name, version, register }` object (compatible with `resolvePluginModuleExport()`). The `registerOpenClawGuardrails` named export is preserved for backward compatibility.
|
|
622
|
+
4. **`tool_result_persist` sync redaction**: Uses the existing `redactWithPatterns()` utility for synchronous redaction. Full async engine evaluation runs fire-and-forget for audit/metrics.
|
|
623
|
+
5. **Manifest cleaned**: Removed unrecognized `entry` and `hooks` fields from `openclaw.plugin.json`. Set `additionalProperties: false` on root config schema.
|
|
624
|
+
6. **Peer dependency**: `openclaw` is now declared as a `peerDependency` (`>=2026.2.25`).
|
|
278
625
|
|
|
279
626
|
### v0.5.x → v0.6.0
|
|
280
627
|
|
|
@@ -12,7 +12,12 @@ export interface TokenUsageRecord {
|
|
|
12
12
|
export declare class TokenUsageStore {
|
|
13
13
|
private records;
|
|
14
14
|
private writer;
|
|
15
|
+
private totalInput;
|
|
16
|
+
private totalOutput;
|
|
17
|
+
private totalTokens;
|
|
18
|
+
private byUser;
|
|
15
19
|
constructor(storagePath?: string);
|
|
20
|
+
private addToCounters;
|
|
16
21
|
record(entry: TokenUsageRecord): void;
|
|
17
22
|
getByUser(senderId: string): TokenUsageRecord[];
|
|
18
23
|
getSummary(): TokenUsageSummary;
|
|
@@ -2,41 +2,45 @@ import { JsonlWriter, readJsonlFile } from "./jsonl-writer.js";
|
|
|
2
2
|
export class TokenUsageStore {
|
|
3
3
|
records = [];
|
|
4
4
|
writer = null;
|
|
5
|
+
totalInput = 0;
|
|
6
|
+
totalOutput = 0;
|
|
7
|
+
totalTokens = 0;
|
|
8
|
+
byUser = {};
|
|
5
9
|
constructor(storagePath) {
|
|
6
10
|
if (storagePath) {
|
|
7
11
|
this.records = readJsonlFile(storagePath);
|
|
8
12
|
this.writer = new JsonlWriter(storagePath);
|
|
13
|
+
for (const r of this.records) {
|
|
14
|
+
this.addToCounters(r);
|
|
15
|
+
}
|
|
16
|
+
}
|
|
17
|
+
}
|
|
18
|
+
addToCounters(r) {
|
|
19
|
+
this.totalInput += r.inputTokens;
|
|
20
|
+
this.totalOutput += r.outputTokens;
|
|
21
|
+
this.totalTokens += r.totalTokens;
|
|
22
|
+
if (!this.byUser[r.senderId]) {
|
|
23
|
+
this.byUser[r.senderId] = { input: 0, output: 0, total: 0 };
|
|
9
24
|
}
|
|
25
|
+
this.byUser[r.senderId].input += r.inputTokens;
|
|
26
|
+
this.byUser[r.senderId].output += r.outputTokens;
|
|
27
|
+
this.byUser[r.senderId].total += r.totalTokens;
|
|
10
28
|
}
|
|
11
29
|
record(entry) {
|
|
12
30
|
this.records.push(entry);
|
|
31
|
+
this.addToCounters(entry);
|
|
13
32
|
this.writer?.append(entry);
|
|
14
33
|
}
|
|
15
34
|
getByUser(senderId) {
|
|
16
35
|
return this.records.filter((r) => r.senderId === senderId);
|
|
17
36
|
}
|
|
18
37
|
getSummary() {
|
|
19
|
-
const byUser = {};
|
|
20
|
-
let totalInput = 0;
|
|
21
|
-
let totalOutput = 0;
|
|
22
|
-
let totalTokens = 0;
|
|
23
|
-
for (const r of this.records) {
|
|
24
|
-
totalInput += r.inputTokens;
|
|
25
|
-
totalOutput += r.outputTokens;
|
|
26
|
-
totalTokens += r.totalTokens;
|
|
27
|
-
if (!byUser[r.senderId]) {
|
|
28
|
-
byUser[r.senderId] = { input: 0, output: 0, total: 0 };
|
|
29
|
-
}
|
|
30
|
-
byUser[r.senderId].input += r.inputTokens;
|
|
31
|
-
byUser[r.senderId].output += r.outputTokens;
|
|
32
|
-
byUser[r.senderId].total += r.totalTokens;
|
|
33
|
-
}
|
|
34
38
|
return {
|
|
35
|
-
totalInputTokens: totalInput,
|
|
36
|
-
totalOutputTokens: totalOutput,
|
|
37
|
-
totalTokens,
|
|
39
|
+
totalInputTokens: this.totalInput,
|
|
40
|
+
totalOutputTokens: this.totalOutput,
|
|
41
|
+
totalTokens: this.totalTokens,
|
|
38
42
|
recordCount: this.records.length,
|
|
39
|
-
byUser
|
|
43
|
+
byUser: { ...this.byUser }
|
|
40
44
|
};
|
|
41
45
|
}
|
|
42
46
|
close() {
|
|
@@ -101,15 +101,7 @@ export interface AgentEndEvent {
|
|
|
101
101
|
error?: string;
|
|
102
102
|
durationMs?: number;
|
|
103
103
|
}
|
|
104
|
-
export
|
|
105
|
-
agentId?: string;
|
|
106
|
-
sessionKey?: string;
|
|
107
|
-
sessionId?: string;
|
|
108
|
-
workspaceDir?: string;
|
|
109
|
-
messageProvider?: string;
|
|
110
|
-
trigger?: string;
|
|
111
|
-
channelId?: string;
|
|
112
|
-
}
|
|
104
|
+
export type AgentEndContext = BeforeAgentStartContext;
|
|
113
105
|
export declare function mapBeforeAgentStart(event: BeforeAgentStartEvent, ctx: BeforeAgentStartContext): OpenClawContext;
|
|
114
106
|
export declare function mapMessageReceived(event: MessageReceivedEvent, ctx: MessageReceivedContext): OpenClawContext;
|
|
115
107
|
export declare function mapBeforeToolCall(event: BeforeToolCallEvent, ctx: BeforeToolCallContext): OpenClawContext;
|
|
@@ -34,6 +34,7 @@ export interface OpenClawHookResult extends OpenClawContext {
|
|
|
34
34
|
export interface OpenClawPlugin {
|
|
35
35
|
name: string;
|
|
36
36
|
version: string;
|
|
37
|
+
config: GuardrailsConfig;
|
|
37
38
|
approveRequest: (requestId: string, approverId: string, approverRole: ApproverRole) => string | null;
|
|
38
39
|
hooks: {
|
|
39
40
|
before_agent_start: (context: OpenClawContext) => Promise<OpenClawHookResult>;
|
|
@@ -1,9 +1,10 @@
|
|
|
1
1
|
import { JsonlAuditSink, NoopAuditSink } from "../core/audit-sink.js";
|
|
2
2
|
import { GuardrailsEngine } from "../core/engine.js";
|
|
3
|
-
import { unique } from "../core/event-utils.js";
|
|
3
|
+
import { isObject, unique } from "../core/event-utils.js";
|
|
4
4
|
import { ConsoleNotificationSink } from "../core/notification-sink.js";
|
|
5
5
|
import { REASON_CODES } from "../core/reason-codes.js";
|
|
6
6
|
import { TokenUsageStore } from "../core/token-usage-store.js";
|
|
7
|
+
import { PLUGIN_VERSION } from "./version.js";
|
|
7
8
|
import { createDefaultConfig, mergeConfig } from "../rules/default-policy.js";
|
|
8
9
|
// Reason codes that reveal what type of sensitive content was detected.
|
|
9
10
|
// Map these to a generic code before exposing in hook results to prevent
|
|
@@ -249,7 +250,7 @@ function buildMonitoringSnapshot(config, metrics) {
|
|
|
249
250
|
};
|
|
250
251
|
}
|
|
251
252
|
function isPluginOptions(arg) {
|
|
252
|
-
if (!arg
|
|
253
|
+
if (!isObject(arg))
|
|
253
254
|
return false;
|
|
254
255
|
const obj = arg;
|
|
255
256
|
// PluginOptions has keys that never appear on GuardrailsConfig
|
|
@@ -282,7 +283,7 @@ export function createOpenClawGuardrailsPlugin(overridesOrOptions = {}) {
|
|
|
282
283
|
});
|
|
283
284
|
const metrics = createMetrics();
|
|
284
285
|
console.log("[guardrails] plugin created", {
|
|
285
|
-
version:
|
|
286
|
+
version: PLUGIN_VERSION,
|
|
286
287
|
outboundGuardEnabled: config.outboundGuard.enabled,
|
|
287
288
|
injectedFileNames: config.outboundGuard.injectedFileNames,
|
|
288
289
|
mode: config.mode
|
|
@@ -295,11 +296,11 @@ export function createOpenClawGuardrailsPlugin(overridesOrOptions = {}) {
|
|
|
295
296
|
};
|
|
296
297
|
return {
|
|
297
298
|
name: "openclaw-guardrails",
|
|
298
|
-
version:
|
|
299
|
+
version: PLUGIN_VERSION,
|
|
300
|
+
config,
|
|
299
301
|
approveRequest: (requestId, approverId, approverRole) => engine.approveRequest(requestId, approverId, approverRole),
|
|
300
302
|
hooks: {
|
|
301
303
|
async before_agent_start(context) {
|
|
302
|
-
console.log("[guardrails:before_agent_start] hook fired", { contextKeys: Object.keys(context) });
|
|
303
304
|
const decision = await evaluate("before_agent_start", context);
|
|
304
305
|
const guardPrompt = buildGuardPrompt(config);
|
|
305
306
|
const existingPrompt = typeof context.systemPrompt === "string"
|
|
@@ -323,10 +324,6 @@ export function createOpenClawGuardrailsPlugin(overridesOrOptions = {}) {
|
|
|
323
324
|
return output;
|
|
324
325
|
},
|
|
325
326
|
async message_received(context) {
|
|
326
|
-
console.log("[guardrails:message_received] hook fired", {
|
|
327
|
-
contextKeys: Object.keys(context),
|
|
328
|
-
contentPreview: typeof context.content === "string" ? context.content.slice(0, 120) : undefined
|
|
329
|
-
});
|
|
330
327
|
const decision = await evaluate("message_received", context);
|
|
331
328
|
const transformedContext = decision.redactedContent
|
|
332
329
|
? upsertContentField(context, decision.redactedContent)
|
|
@@ -396,19 +393,6 @@ export function createOpenClawGuardrailsPlugin(overridesOrOptions = {}) {
|
|
|
396
393
|
};
|
|
397
394
|
},
|
|
398
395
|
async message_sending(context) {
|
|
399
|
-
const aggregated = extractOutboundContent(context);
|
|
400
|
-
const stringFields = {};
|
|
401
|
-
for (const [k, v] of Object.entries(context)) {
|
|
402
|
-
if (typeof v === "string" && v.length > 0) {
|
|
403
|
-
stringFields[k] = v.length > 120 ? v.slice(0, 120) + "…" : v;
|
|
404
|
-
}
|
|
405
|
-
}
|
|
406
|
-
console.log("[guardrails:message_sending] hook fired", {
|
|
407
|
-
aggregatedLength: aggregated.length,
|
|
408
|
-
aggregatedPreview: aggregated.slice(0, 200),
|
|
409
|
-
stringFields,
|
|
410
|
-
contextKeys: Object.keys(context)
|
|
411
|
-
});
|
|
412
396
|
if (!config.outboundGuard.enabled) {
|
|
413
397
|
return { ...context };
|
|
414
398
|
}
|
|
@@ -7,8 +7,9 @@
|
|
|
7
7
|
* - `api.logger` for structured logging
|
|
8
8
|
* - `api.registerCommand()` for the /approve command
|
|
9
9
|
*/
|
|
10
|
-
import {
|
|
10
|
+
import { redactWithPatterns } from "../redaction/redact.js";
|
|
11
11
|
import { createOpenClawGuardrailsPlugin } from "./openclaw-adapter.js";
|
|
12
|
+
import { PLUGIN_VERSION } from "./version.js";
|
|
12
13
|
import { mapBeforeAgentStart, mapMessageReceived, mapBeforeToolCall, mapToolResultPersist, mapMessageSending, mapAgentEnd, mapToBeforeAgentStartResult, mapToBeforeToolCallResult, mapToMessageSendingResult, } from "./event-adapter.js";
|
|
13
14
|
// ---------------------------------------------------------------------------
|
|
14
15
|
// Plugin definition
|
|
@@ -16,12 +17,12 @@ import { mapBeforeAgentStart, mapMessageReceived, mapBeforeToolCall, mapToolResu
|
|
|
16
17
|
const plugin = {
|
|
17
18
|
id: "openclaw-guardrails",
|
|
18
19
|
name: "OpenClaw Guardrails",
|
|
19
|
-
version:
|
|
20
|
+
version: PLUGIN_VERSION,
|
|
20
21
|
register(api) {
|
|
21
22
|
const rawConfig = (api.pluginConfig ?? {});
|
|
22
23
|
const log = api.logger;
|
|
23
|
-
const mergedConfig = mergeConfig(createDefaultConfig(rawConfig.workspaceRoot ?? process.cwd()), rawConfig);
|
|
24
24
|
const guardrails = createOpenClawGuardrailsPlugin(rawConfig);
|
|
25
|
+
const mergedConfig = guardrails.config;
|
|
25
26
|
log.info(`[guardrails] plugin registered (v${guardrails.version}, mode=${mergedConfig.mode})`);
|
|
26
27
|
// ------------------------------------------------------------------
|
|
27
28
|
// before_agent_start — inject security policy prompt
|
|
@@ -64,6 +65,12 @@ const plugin = {
|
|
|
64
65
|
// Outbound content redaction is still enforced by the async
|
|
65
66
|
// `message_sending` hook, which catches leaks before they reach users.
|
|
66
67
|
// ------------------------------------------------------------------
|
|
68
|
+
// Pre-compile redaction patterns once (config is immutable after merge).
|
|
69
|
+
const allRedactionPatterns = [
|
|
70
|
+
...mergedConfig.redaction.secretPatterns,
|
|
71
|
+
...mergedConfig.redaction.piiPatterns,
|
|
72
|
+
];
|
|
73
|
+
const redactionReplacement = mergedConfig.redaction.replacement;
|
|
67
74
|
api.on("tool_result_persist", (event, ctx) => {
|
|
68
75
|
const oclCtx = mapToolResultPersist(event, ctx);
|
|
69
76
|
// Fire engine evaluation async for audit trail and metrics.
|
|
@@ -71,31 +78,12 @@ const plugin = {
|
|
|
71
78
|
guardrails.hooks.tool_result_persist(oclCtx).catch((err) => {
|
|
72
79
|
log.error(`[guardrails:tool_result_persist] async audit failed: ${String(err)}`);
|
|
73
80
|
});
|
|
74
|
-
// Sync redaction:
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
const allPatterns = [
|
|
81
|
-
...mergedConfig.redaction.secretPatterns,
|
|
82
|
-
...mergedConfig.redaction.piiPatterns,
|
|
83
|
-
];
|
|
84
|
-
if (allPatterns.length > 0) {
|
|
85
|
-
const replacement = mergedConfig.redaction.replacement;
|
|
86
|
-
let redacted = content;
|
|
87
|
-
for (const pattern of allPatterns) {
|
|
88
|
-
try {
|
|
89
|
-
const regex = new RegExp(pattern, "gi");
|
|
90
|
-
redacted = redacted.replace(regex, replacement);
|
|
91
|
-
}
|
|
92
|
-
catch {
|
|
93
|
-
// skip invalid patterns
|
|
94
|
-
}
|
|
95
|
-
}
|
|
96
|
-
if (redacted !== content) {
|
|
97
|
-
return { message: { ...event.message, content: redacted } };
|
|
98
|
-
}
|
|
81
|
+
// Sync redaction: reuse content already extracted by the mapper.
|
|
82
|
+
const content = oclCtx.output;
|
|
83
|
+
if (content && allRedactionPatterns.length > 0) {
|
|
84
|
+
const { redacted } = redactWithPatterns(content, allRedactionPatterns, redactionReplacement);
|
|
85
|
+
if (redacted !== content) {
|
|
86
|
+
return { message: { ...event.message, content: redacted } };
|
|
99
87
|
}
|
|
100
88
|
}
|
|
101
89
|
return {};
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export declare const PLUGIN_VERSION = "0.6.2";
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export const PLUGIN_VERSION = "0.6.2";
|
package/dist/redaction/redact.js
CHANGED
|
@@ -1,4 +1,14 @@
|
|
|
1
1
|
import { compilePatterns } from "../rules/patterns.js";
|
|
2
|
+
const patternCache = new Map();
|
|
3
|
+
function getCachedPatterns(patterns, flags) {
|
|
4
|
+
const key = flags + "\0" + patterns.join("\0");
|
|
5
|
+
let cached = patternCache.get(key);
|
|
6
|
+
if (!cached) {
|
|
7
|
+
cached = compilePatterns(patterns, flags);
|
|
8
|
+
patternCache.set(key, cached);
|
|
9
|
+
}
|
|
10
|
+
return cached;
|
|
11
|
+
}
|
|
2
12
|
export function redactWithPatterns(input, patterns, replacement) {
|
|
3
13
|
if (!input) {
|
|
4
14
|
return {
|
|
@@ -8,13 +18,15 @@ export function redactWithPatterns(input, patterns, replacement) {
|
|
|
8
18
|
}
|
|
9
19
|
let redacted = input;
|
|
10
20
|
const matches = new Set();
|
|
11
|
-
const regexes =
|
|
21
|
+
const regexes = getCachedPatterns(patterns, "gi");
|
|
12
22
|
for (const regex of regexes) {
|
|
23
|
+
regex.lastIndex = 0;
|
|
13
24
|
for (const match of input.matchAll(regex)) {
|
|
14
25
|
if (match[0]) {
|
|
15
26
|
matches.add(match[0]);
|
|
16
27
|
}
|
|
17
28
|
}
|
|
29
|
+
regex.lastIndex = 0;
|
|
18
30
|
redacted = redacted.replace(regex, replacement);
|
|
19
31
|
}
|
|
20
32
|
return {
|
|
@@ -26,6 +38,9 @@ export function hasPatternMatch(input, patterns) {
|
|
|
26
38
|
if (!input) {
|
|
27
39
|
return false;
|
|
28
40
|
}
|
|
29
|
-
const regexes =
|
|
30
|
-
return regexes.some((regex) =>
|
|
41
|
+
const regexes = getCachedPatterns(patterns, "gi");
|
|
42
|
+
return regexes.some((regex) => {
|
|
43
|
+
regex.lastIndex = 0;
|
|
44
|
+
return regex.test(input);
|
|
45
|
+
});
|
|
31
46
|
}
|
package/openclaw.plugin.json
CHANGED
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@safefence/openclaw-guardrails",
|
|
3
|
-
"version": "0.6.
|
|
3
|
+
"version": "0.6.3",
|
|
4
4
|
"description": "Native deterministic guardrails plugin for OpenClaw",
|
|
5
5
|
"openclaw": {
|
|
6
6
|
"extensions": [
|
|
@@ -19,7 +19,10 @@
|
|
|
19
19
|
"build": "tsc -p tsconfig.json",
|
|
20
20
|
"test": "vitest run",
|
|
21
21
|
"test:coverage": "vitest run --coverage",
|
|
22
|
-
"test:watch": "vitest"
|
|
22
|
+
"test:watch": "vitest",
|
|
23
|
+
"preversion": "npm test && npm run build",
|
|
24
|
+
"version": "bash scripts/sync-version.sh",
|
|
25
|
+
"postversion": "echo '\nRun this to publish via CI:\n git push origin master --tags'"
|
|
23
26
|
},
|
|
24
27
|
"engines": {
|
|
25
28
|
"node": ">=20"
|
|
@@ -31,6 +34,14 @@
|
|
|
31
34
|
"owasp",
|
|
32
35
|
"llm"
|
|
33
36
|
],
|
|
37
|
+
"repository": {
|
|
38
|
+
"type": "git",
|
|
39
|
+
"url": "https://github.com/douglasswm/safefence.git",
|
|
40
|
+
"directory": "packages/openclaw-guardrails"
|
|
41
|
+
},
|
|
42
|
+
"publishConfig": {
|
|
43
|
+
"provenance": true
|
|
44
|
+
},
|
|
34
45
|
"license": "MIT",
|
|
35
46
|
"peerDependencies": {
|
|
36
47
|
"openclaw": ">=2026.2.25"
|