clawmoat 0.8.0 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (171) hide show
  1. package/.dockerignore +9 -0
  2. package/CHANGELOG.md +18 -0
  3. package/DEMO.md +87 -0
  4. package/Dockerfile +5 -18
  5. package/README.md +232 -8
  6. package/THREAT_MODEL.md +129 -0
  7. package/agent/README.md +131 -0
  8. package/agent/index.js +471 -0
  9. package/agent/install-service.sh +94 -0
  10. package/agent/openclaw-hook.js +453 -0
  11. package/agent/provider-setup.js +649 -0
  12. package/agent/setup.js +274 -0
  13. package/assets/BADGE-USAGE.md +20 -0
  14. package/assets/clawmoat-badge.svg +21 -0
  15. package/bin/clawmoat.js +468 -111
  16. package/docs/affiliates/dashboard.html +124 -0
  17. package/docs/affiliates/index.html +236 -0
  18. package/docs/agent-install.html +183 -0
  19. package/docs/ai-agent-security-scanner.html +10 -6
  20. package/docs/badge/index.html +149 -0
  21. package/docs/badge/scanning.svg +23 -0
  22. package/docs/blog/386-malicious-skills.html +11 -4
  23. package/docs/blog/40000-exposed-openclaw-instances.html +11 -4
  24. package/docs/blog/agent-trust-protocol.html +5 -4
  25. package/docs/blog/ai-agent-earns-commissions.html +230 -0
  26. package/docs/blog/bugmageddon-agent-firewall.html +174 -0
  27. package/docs/blog/calculator-math.html +180 -0
  28. package/docs/blog/clawmoat-vs-llamafirewall-nemo-guardrails.html +10 -4
  29. package/docs/blog/host-guardian-launch.html +18 -8
  30. package/docs/blog/ibm-experts-agent-runtime-protection.html +15 -6
  31. package/docs/blog/index.html +67 -9
  32. package/docs/blog/langchain-security-tutorial.html +18 -8
  33. package/docs/blog/mcp-30-cves-security-crisis.html +11 -4
  34. package/docs/blog/meta-researcher-rogue-agent.html +201 -0
  35. package/docs/blog/microsoft-openclaw-workstation-security.html +5 -4
  36. package/docs/blog/nist-ai-agent-standards-clawmoat.html +16 -8
  37. package/docs/blog/oasis-websocket-hijack.html +11 -4
  38. package/docs/blog/ollama-openclaw-security.html +10 -4
  39. package/docs/blog/openclaw-enterprise-readiness-claw10.html +5 -4
  40. package/docs/blog/openclaw-security-reckoning-2026.html +11 -4
  41. package/docs/blog/owasp-agentic-ai-top10.html +18 -8
  42. package/docs/blog/securing-ai-agents.html +18 -8
  43. package/docs/blog/supply-chain-agents.html +18 -8
  44. package/docs/business/index.html +11 -16
  45. package/docs/business/install.html +21 -7
  46. package/docs/checklist.html +10 -4
  47. package/docs/compare/index.html +122 -0
  48. package/docs/compare/lakera/index.html +62 -0
  49. package/docs/compare/llm-guard/index.html +49 -0
  50. package/docs/compare/snyk-agent-scan/index.html +63 -0
  51. package/docs/compare.html +10 -6
  52. package/docs/dashboard/index.html +520 -0
  53. package/docs/finance/index.html +9 -6
  54. package/docs/guides/business-deployment.html +770 -0
  55. package/docs/hall-of-fame.html +11 -5
  56. package/docs/index.html +266 -137
  57. package/docs/integrations/langchain.html +14 -6
  58. package/docs/integrations/openai.html +14 -6
  59. package/docs/integrations/openclaw.html +55 -7
  60. package/docs/plans/2026-03-26-threat-intel-api.md +255 -0
  61. package/docs/plans/2026-04-14-bugmageddon-marketing-pack.md +329 -0
  62. package/docs/plans/2026-04-14-clawmoat-v1-bugmageddon.md +248 -0
  63. package/docs/plans/2026-04-14-v1-release-update.md +91 -0
  64. package/docs/plans/2026-04-19-supabase-audit.md +68 -0
  65. package/docs/plans/2026-05-12-sales-push.md +303 -0
  66. package/docs/playground/index.html +893 -0
  67. package/docs/playground.html +4 -7
  68. package/docs/rfcs/defense-in-depth.md +467 -0
  69. package/docs/scan/index.html +156 -12
  70. package/docs/services/case-study.html +255 -0
  71. package/docs/services/downloads/install-openclaw.bat +45 -0
  72. package/docs/services/downloads/install-openclaw.command +38 -0
  73. package/docs/services/downloads/install-openclaw.sh +38 -0
  74. package/docs/services/get-started.html +165 -0
  75. package/docs/services/index.html +598 -0
  76. package/docs/services/multi-agent-security.html +284 -0
  77. package/docs/services/one-pager.html +99 -0
  78. package/docs/services/pitch-deck.html +229 -0
  79. package/docs/services/roi-calculator.html +258 -0
  80. package/docs/sitemap.xml +62 -2
  81. package/docs/support/index.html +12 -1
  82. package/docs/templates/customer-service/HEARTBEAT.md +61 -0
  83. package/docs/templates/customer-service/MEMORY.md +89 -0
  84. package/docs/templates/customer-service/SOUL.md +41 -0
  85. package/docs/templates/customer-service/USER.md +56 -0
  86. package/docs/templates/executive/HEARTBEAT.md +86 -0
  87. package/docs/templates/executive/MEMORY.md +92 -0
  88. package/docs/templates/executive/SOUL.md +44 -0
  89. package/docs/templates/executive/USER.md +62 -0
  90. package/docs/templates/finance/HEARTBEAT.md +58 -0
  91. package/docs/templates/finance/MEMORY.md +87 -0
  92. package/docs/templates/finance/SOUL.md +38 -0
  93. package/docs/templates/finance/USER.md +53 -0
  94. package/docs/templates/index.html +115 -0
  95. package/docs/templates/operations/HEARTBEAT.md +63 -0
  96. package/docs/templates/operations/MEMORY.md +68 -0
  97. package/docs/templates/operations/SOUL.md +38 -0
  98. package/docs/templates/operations/USER.md +49 -0
  99. package/docs/templates/sales/HEARTBEAT.md +55 -0
  100. package/docs/templates/sales/MEMORY.md +89 -0
  101. package/docs/templates/sales/SOUL.md +34 -0
  102. package/docs/templates/sales/USER.md +54 -0
  103. package/eslint.config.js +32 -0
  104. package/evals/README.md +29 -0
  105. package/evals/cases.json +390 -0
  106. package/evals/results.md +68 -0
  107. package/evals/run.js +180 -0
  108. package/examples/demo-attack/demo.js +186 -0
  109. package/examples/python-quickstart/README.md +54 -0
  110. package/examples/python-quickstart/clawmoat_client.py +167 -0
  111. package/examples/video-demo/README.md +14 -0
  112. package/examples/video-demo/scene-a-normal.js +29 -0
  113. package/examples/video-demo/scene-b-attack-arrives.js +31 -0
  114. package/examples/video-demo/scene-c-hijack.js +44 -0
  115. package/examples/video-demo/scene-d-clawmoat.js +46 -0
  116. package/integrations/crewai/README.md +32 -0
  117. package/integrations/crewai/clawmoat_crewai/__init__.py +17 -0
  118. package/integrations/crewai/clawmoat_crewai/guard.py +103 -0
  119. package/integrations/crewai/pyproject.toml +21 -0
  120. package/integrations/langchain/README.md +91 -0
  121. package/integrations/langchain/clawmoat_langchain/__init__.py +17 -0
  122. package/integrations/langchain/clawmoat_langchain/callback.py +489 -0
  123. package/integrations/langchain/pyproject.toml +32 -0
  124. package/integrations/litellm/README.md +324 -0
  125. package/integrations/litellm/clawmoat_litellm/__init__.py +21 -0
  126. package/integrations/litellm/clawmoat_litellm/callback.py +329 -0
  127. package/integrations/litellm/clawmoat_litellm/proxy_middleware.py +224 -0
  128. package/integrations/litellm/pyproject.toml +74 -0
  129. package/integrations/openai-agents/README.md +392 -0
  130. package/integrations/openai-agents/clawmoat_openai_agents/__init__.py +20 -0
  131. package/integrations/openai-agents/clawmoat_openai_agents/guardrail.py +431 -0
  132. package/integrations/openai-agents/clawmoat_openai_agents/middleware.py +311 -0
  133. package/integrations/openai-agents/pyproject.toml +76 -0
  134. package/package.json +6 -5
  135. package/plugins/openclaw-adapter/PHASE1.md +439 -0
  136. package/plugins/openclaw-adapter/README.md +103 -0
  137. package/plugins/openclaw-adapter/SPEC.md +1644 -0
  138. package/plugins/openclaw-adapter/package.json +31 -0
  139. package/plugins/openclaw-adapter/src/index.test.ts +226 -0
  140. package/plugins/openclaw-adapter/src/index.ts +140 -0
  141. package/plugins/openclaw-adapter/tsconfig.json +14 -0
  142. package/server/data/threats.json +290 -0
  143. package/server/index.js +142 -7
  144. package/src/adapters/express.js +161 -0
  145. package/src/adapters/index.js +92 -0
  146. package/src/adapters/langchain.js +185 -0
  147. package/src/approval/index.js +456 -0
  148. package/src/ban-scanner.js +200 -0
  149. package/src/boundary-scanner.js +296 -0
  150. package/src/ci-scanner.js +279 -0
  151. package/src/code-scanner.js +245 -0
  152. package/src/enforce.js +166 -0
  153. package/src/formatters/json.js +80 -0
  154. package/src/formatters/sarif.js +388 -0
  155. package/src/guardian/alerts.js +34 -3
  156. package/src/guardian/index.js +41 -2
  157. package/src/index.js +102 -0
  158. package/src/integrations/agentmesh.js +501 -0
  159. package/src/language-detector.js +201 -0
  160. package/src/mcp-scanner.js +253 -0
  161. package/src/multimodal/index.js +579 -0
  162. package/src/obfuscation-scanner.js +457 -0
  163. package/src/policy-engine.js +402 -0
  164. package/src/scanners/dependency-attacks.js +128 -0
  165. package/src/scanners/prompt-injection.js +18 -0
  166. package/src/scanners/supply-chain.js +14 -0
  167. package/src/templates/default-config.yml +90 -0
  168. package/src/vuln-ops/exploitability.js +46 -0
  169. package/src/watch/live-monitor.js +720 -0
  170. package/clawmoat-0.8.0.tgz +0 -0
  171. package/server/index.js.patch +0 -1
@@ -0,0 +1,1644 @@
1
+ # Pluggable Sanitizer Interface
2
+
3
+ ### OpenClaw · Feature Spec · v1.2.1
4
+
5
+ ### Extension of: Input Validation Layers v2.3, Context-Aware Sanitization v2.1, MCP Trust Tier v2
6
+
7
+ ### Companion to: Audit Trail Enhancement v2.2, Audit Alerting v2.3, Tier 1 Pattern Library v1
8
+
9
+ ### Requires companion update: Context-Aware Sanitization v2.2 (plugin profile schema)
10
+
11
+ ---
12
+
13
+ ## Changelog (v1.2 → v1.2.1)
14
+
15
+ | # | Issue | Resolution |
16
+ | - | ----- | ---------- |
17
+ | 1 | Malformed-result block policy contradicts clamp/strip policy for out-of-range confidence and bad ruleId prefix | Split validation into recoverable (clamp/strip + warn) and unrecoverable (block). Error handling table updated with explicit categories. |
18
+ | 2 | Truth table does not account for frequency tier1+ forcing Stage 2 even when hard block would skip it | Added frequency override row to truth table. Tier1+ always forces Stage 2. Plugin `safe: false` still persists to final merge. |
19
+ | 3 | `plugin_config_loaded` emits at startup before any session exists, but spec says plugin events go to per-session audit JSONL | `plugin_config_loaded` writes to agent-level alert log, not session audit. Same pattern as `audit_config_loaded` in audit trail spec. |
20
+ | 4 | CJS export contract ambiguous: "default factory function" vs `module.exports` direct assignment | Both forms valid. Loader uses `mod.default \|\| mod` resolution. Documented explicitly. |
21
+ | 5 | Path containment `startsWith(configDir)` matches sibling dirs (e.g. `/config2` matches `/config`) | Append `path.sep` to config directory before prefix comparison. |
22
+ | 6 | Profile override key model contradicts: wildcard prefixes supported in frequency section but not in companion schema | Three key forms defined as authoritative: plugin id, full ruleId, prefix wildcard (`"id.*"`). Companion schema updated to match. |
23
+ | 7 | `pluginErrorSpike` aggregation scope not defined | Scoped to same agent, matching existing alerting patterns. |
24
+
25
+ | # | Issue | Resolution |
26
+ | - | ----- | ---------- |
27
+ | 1 | Config shape contradiction: `plugins` defined as both array and parent of `.maxTotal` / `.maxPrePhase` / `.maxPostPhase` nested keys | Moved limits to sibling namespace `memory.sessions.sanitization.pluginLimits.*`. `plugins` is a clean array. |
28
+ | 2 | `safe: false` semantics contradictory: two-pass section says non-hard-block plugin fails are "flags that proceed," final merge says "ANY block → block" | Added Pre-Plugin Block Semantics truth table. Plugin `safe: false` always persists to final merge as a block. Stage 2 running is for enrichment, not rescue. |
29
+ | 3 | `priorPlugins` typed as `PluginResult[]` but Content Transformation section requires `transformApplied: true` which is not in `PluginResult` | Added `PluginResultMeta` wrapper type for `priorPlugins` that extends `PluginResult` with pipeline-set metadata fields. |
30
+ | 4 | Rule taxonomy registration undefined in executable terms: no interface method, conflicts with static taxonomy validation in context profiles | Added `ruleIdPrefix: string` to `SanitizerPlugin` interface. Taxonomy registration uses prefix. Dynamic ruleIds valid if they start with declared prefix. Added taxonomy integration section. |
31
+ | 5 | Trust tier interaction conflicts: draft says plugins bypassed for trusted servers, but trust tier spec requires Stage 1 prefilter runs for all results including trusted | Clarified: Stage 1 runs for all results (per trust tier spec), trust routing happens after Stage 1, plugins sit after the trust routing decision. Updated architecture diagram. |
32
+ | 6 | Worker concurrency unspecified: no queuing model, timeout start point, or "permanently failed" scope definition | Defined FIFO request queue per worker, timeout starts at post-to-worker, "permanently failed" scoped to process lifetime. |
33
+ | 7 | Transform schema validation under-specified for MCP: no definition of which tool schema/discriminant context to use | Specified: transform validation receives same `query` context (server, tool, params) and tool schema reference as original Stage 1B validation. |
34
+ | 8 | Audit event contract incomplete: `plugin_pass`/`plugin_flags` overlap undefined, payload schemas for `plugin_error`/`plugin_transform`/`plugin_config_loaded` not specified | Defined emission rule (flags-present → `plugin_flags` only, not both). Added full payload schemas for all six plugin event types. |
35
+ | 9 | Alerting requirement not wired to existing rule model: `plugin_error` described as "alert on occurrence" but existing `write_failed` uses aggregation (`writeFailSpike`) | Defined new `pluginErrorSpike` alert rule with aggregation (default: 3 errors in 5 minutes). Added config keys. |
36
+ | 10 | Module format ambiguous: `require/import` without specifying CJS/ESM behavior | Specified: plugins must export CommonJS. Loader uses `require()`. ESM/TS plugins must be transpiled to CJS before deployment. |
37
+ | 11 | Path validation not cross-platform: covers `..` and leading `/` but not Windows absolute paths, UNC paths, or `file://` URIs | Replaced enumerated checks with platform-agnostic containment: `path.resolve()` then verify resolved path starts with config directory's resolved path. |
38
+ | 12 | Confidence bounds unspecified: no clamping, no behavior for empty findings | Defined: clamp to [0.0, 1.0] with warning log. Empty ruleIds with `safe: true` is clean pass — confidence not scored. |
39
+ | 13 | Profile override surface unclear: draft introduces `plugins:` profile block not present in current custom profile schema | Called out as required companion change: context-aware-sanitization-spec v2.2 must add `plugins` map to custom profile schema. Defined the schema extension. |
40
+
41
+ ## Changelog (v1 → v1.1)
42
+
43
+ | Issue | Resolution |
44
+ | ----- | ---------- |
45
+ | Timeout enforcement assumed cooperative async; sync loops block the event loop and prevent timeout from firing | Added Runtime Isolation section. Plugin `inspect` calls execute in `worker_threads` with `worker.terminate()` as the kill mechanism. |
46
+ | Single `confidence` field per `PluginResult` forces uniform confidence across multiple findings | Added optional `findingConfidence` map keyed by ruleId. Single `confidence` field retained as fallback for all unkeyed rules. |
47
+ | `transformed` output typed as `unknown` with no structural validation — could break downstream stages | Transformed output must pass Stage 1B schema validation before being accepted. Rejection produces `plugin_error` with `reason: "transform_schema_fail"`. |
48
+ | Transform visibility to subsequent plugins in the same phase was undefined | Clarified: later plugins in the same phase receive transformed content. `priorPlugins` entries include transform metadata. |
49
+ | SHA-256 audit hashes for transforms did not specify serialization — `JSON.stringify` key order is non-deterministic | Specified canonical JSON serialization: sorted keys, no whitespace, applied recursively. |
50
+ | "No node_modules resolution" prevented plugins from importing their own dependencies | Added Plugin Dependency Strategy section. Plugins must ship as single-file bundles or the loader sets `NODE_PATH` to the plugin's directory. |
51
+ | Path validation rejected `..` but not symlinks pointing outside the config directory | Added `fs.realpath()` resolution before traversal validation. |
52
+ | Default frequency weight of 3 had no documented rationale relative to built-in weight scale | Added weight scale reference table in Frequency Scoring section. |
53
+
54
+ ---
55
+
56
+ ## Origin
57
+
58
+ Community feedback on PR #35427 (sanitization hardening) suggested adding a
59
+ pluggable sanitizer interface so teams can bring their own inspection rules —
60
+ regex patterns, ML classifiers, external scanning tools — without modifying
61
+ the core pipeline. This spec formalizes that idea within the existing
62
+ architecture.
63
+
64
+ ---
65
+
66
+ ## Summary
67
+
68
+ Define a plugin interface that allows operator-provided inspection modules to
69
+ run alongside the built-in validation pipeline. Plugins slot into declared
70
+ phases of the existing Stage 1 → Stage 2 flow. They conform to a standard
71
+ contract, produce results in the same shape as built-in stages, and feed into
72
+ the same audit trail and alerting infrastructure.
73
+
74
+ Plugins do NOT replace built-in stages. They augment them. The built-in
75
+ syntactic filter, schema validator, and semantic sub-agent always run
76
+ (subject to existing config toggles). Plugins add additional inspection on
77
+ top.
78
+
79
+ ---
80
+
81
+ ## Design Goals
82
+
83
+ - **Additive only.** Plugins cannot disable, bypass, or weaken built-in
84
+ stages. They can add inspection, not remove it. A plugin that returns
85
+ `safe: true` does not override a built-in stage that returns `safe: false`.
86
+ - **Same contract, same audit trail.** Plugin results conform to the same
87
+ result types as built-in stages. Plugin-triggered rules appear in audit
88
+ events with the same structure as built-in rules. No separate audit path.
89
+ - **Fail closed on plugin error.** A plugin that throws, times out, or
90
+ returns malformed output is treated as a block, not a pass. The pipeline
91
+ does not degrade silently when a plugin fails.
92
+ - **Static loading only.** Plugins are declared in config and loaded at
93
+ startup. No runtime plugin installation, no hot-loading, no remote
94
+ plugin fetching. Same static guarantee as context profiles: resolved at
95
+ config load time, frozen for agent lifetime, never modified by user input.
96
+ - **Composable with context profiles.** Profiles can enable, disable, or
97
+ weight-adjust specific plugins per-rule. A plugin that is useful in
98
+ `code-generation` context may be noise in `research` context.
99
+ - **Bounded execution.** Plugins run under a configurable timeout. A plugin
100
+ that exceeds its timeout is killed and treated as a block. The pipeline
101
+ never waits indefinitely for a plugin.
102
+
103
+ ---
104
+
105
+ ## Runtime Isolation
106
+
107
+ Plugin `inspect` calls execute inside Node.js `worker_threads`, not on the
108
+ main event loop. This is the mechanism that makes timeout enforcement
109
+ reliable — without it, a plugin containing a synchronous tight loop would
110
+ block the event loop and prevent `setTimeout` from ever firing.
111
+
112
+ **How it works:**
113
+
114
+ - At startup, the plugin loader spawns one `Worker` per loaded plugin.
115
+ The worker imports the plugin module and holds the initialized plugin
116
+ instance.
117
+ - On each `inspect` call, the main thread posts a message to the worker
118
+ with the serialized `PluginInput`. The worker runs `inspect` and posts
119
+ back the `PluginResult`.
120
+ - The main thread sets a timer for `timeoutMs`. If the worker does not
121
+ respond before the timer fires, the main thread calls
122
+ `worker.terminate()`, which kills the worker thread regardless of
123
+ whether it is blocked in sync code. A `plugin_error` event is emitted
124
+ with `reason: "timeout"`.
125
+ - After a termination, the loader respawns the worker and re-initializes
126
+ the plugin. If re-initialization fails, the plugin is marked as
127
+ permanently failed for the remainder of the **process lifetime** (not
128
+ session — a process restart clears the failure state). All subsequent
129
+ inspect calls for that plugin produce `plugin_error` with
130
+ `reason: "worker_init_failed"` and are treated as blocks.
131
+
132
+ ### Request Queuing
133
+
134
+ Each worker processes one `inspect` call at a time. If a second request
135
+ arrives while the worker is busy (possible on high-throughput MCP paths),
136
+ it enters a FIFO queue on the main thread. The `timeoutMs` timer starts
137
+ when the message is **posted to the worker**, not when it enters the queue.
138
+ This means queue wait time does not consume the plugin's timeout budget —
139
+ the plugin gets its full configured time once the worker picks it up.
140
+
141
+ If the queue depth exceeds a configurable limit (`maxQueueDepth`, default
142
+ 10), additional requests are rejected immediately with `plugin_error`
143
+ `reason: "queue_full"` and treated as blocks. This prevents unbounded
144
+ memory growth from a stalled plugin.
145
+
146
+ **Serialization cost:** `PluginInput` and `PluginResult` cross the
147
+ `worker_threads` structured clone boundary. This adds serialization
148
+ overhead proportional to the size of `content.raw`. For typical payloads
149
+ (< 100KB), this is sub-millisecond. For unusually large payloads, the
150
+ Tier 1 structural size check (STRUCT-002, max 512KB default) bounds the
151
+ upper end.
152
+
153
+ **Initialize and shutdown** run on the worker thread, not the main thread.
154
+ The main thread communicates lifecycle events via message passing. This
155
+ means a plugin that does heavy synchronous work in `initialize` does not
156
+ block the main event loop during startup.
157
+
158
+ **Why not child_process?** `worker_threads` share memory for
159
+ `SharedArrayBuffer` if needed in future, have lower spawn overhead, and
160
+ are sufficient for the isolation requirement (timeout enforcement). Full
161
+ process isolation (filesystem, network) is out of scope for v1 and
162
+ listed under Out of Scope as plugin sandboxing.
163
+
164
+ ---
165
+
166
+ ## Architecture
167
+
168
+ ```
169
+ Input arrives (transcript or MCP result)
170
+
171
+ Stage 1: Built-in Pre-Filter (unchanged, runs for ALL inputs including trusted MCP)
172
+ ┌──────────────────────┬────────────────────────────┐
173
+ │ Stage 1A: Syntactic │ Stage 1B: Schema │
174
+ └──────────┬───────────┴──────────┬─────────────────┘
175
+ └────── merge ────────┘
176
+
177
+ Stage 1 audit events emitted (per trust tier spec — always, even for trusted)
178
+
179
+ Terminated-session check (per trust tier spec — always, trusted do not exempt)
180
+
181
+ Trust tier routing decision:
182
+ ├── TRUSTED → trusted_pass audit entry → result to manager (skip all below)
183
+ └── UNTRUSTED → continue ↓
184
+
185
+ Stage 1P: Plugin Pre-Filters (new — untrusted + transcript only)
186
+ Runs plugins declared with phase: "pre"
187
+ Sequential execution in declared order
188
+ Each plugin receives: raw content + Stage 1 results (flags, ruleIds)
189
+
190
+ Frequency scoring (built-in + plugin flags combined)
191
+
192
+ Two-pass gating (unchanged logic, but plugin flags contribute to score)
193
+
194
+ Stage 2: Semantic Sub-Agent (unchanged)
195
+ Sub-agent receives plugin flags as additional hints alongside
196
+ syntactic flags (same injection mechanism as existing flag passthrough)
197
+
198
+ Stage 2P: Plugin Post-Filters (new — untrusted + transcript only)
199
+ Runs plugins declared with phase: "post"
200
+ Sequential execution in declared order
201
+ Each plugin receives: raw content + Stage 1 results + Stage 2 output
202
+
203
+ Final merge: built-in result ∪ ALL plugin results (pre + post)
204
+ ANY safe: false from any source → block
205
+ Flags aggregated, deduplicated by ruleId
206
+
207
+ Audit events emitted (built-in + plugin events in unified stream)
208
+ ```
209
+
210
+ ### Why two plugin phases?
211
+
212
+ **Pre-plugins (Stage 1P)** run before the semantic sub-agent. They're for
213
+ fast, deterministic checks — regex libraries, pattern databases, format
214
+ validators. Their flags feed into frequency scoring and inform the sub-agent
215
+ via hint injection. If a pre-plugin blocks and its rule is in
216
+ `hardBlockRules`, the two-pass optimization can skip the sub-agent call
217
+ entirely (same logic as existing hard blocks).
218
+
219
+ **Post-plugins (Stage 2P)** run after the semantic sub-agent. They're for
220
+ checks that benefit from the sub-agent's structured output — ML classifiers
221
+ that analyze the sanitized result, external scanning tools that need the
222
+ final structured content, compliance validators that check what the sub-agent
223
+ decided to pass through.
224
+
225
+ A plugin declares exactly one phase. If an operator needs both pre and post
226
+ inspection from the same system, they register two plugins.
227
+
228
+ ### Pre-Plugin Block Semantics
229
+
230
+ A pre-plugin returning `safe: false` has two effects that operate at
231
+ different points in the pipeline. This truth table is the single source of
232
+ truth:
233
+
234
+ | Pre-plugin `safe` | ruleId in `hardBlockRules`? | Session at frequency tier1+? | Stage 2 runs? | Plugin `safe: false` persists to final merge? | Final outcome |
235
+ | --- | --- | --- | --- | --- | --- |
236
+ | `true` | n/a | n/a | yes | no | depends on Stage 2 + post-plugins |
237
+ | `false` | yes | no | **no** (two-pass skip) | yes | **block** |
238
+ | `false` | yes | **yes** | **yes** (frequency override) | yes | **block** |
239
+ | `false` | no | n/a | **yes** (flags injected as hints) | yes | **block** |
240
+
241
+ The third row is the frequency override case. The input validation spec
242
+ requires that frequency tier1+ forces the semantic pass to run even when
243
+ two-pass gating would otherwise skip it. This takes precedence over the
244
+ hard block skip. The rationale is that sustained suspicious activity
245
+ warrants full semantic analysis for audit enrichment, even when the
246
+ content is already definitively blocked.
247
+
248
+ The fourth row is the soft-block case. When a pre-plugin returns
249
+ `safe: false` but its rule is not in `hardBlockRules`:
250
+
251
+ - Stage 2 **runs** — the two-pass optimization does not skip it, because
252
+ the rule is not a hard block. The plugin's flags are injected into the
253
+ sub-agent prompt as additional scrutiny hints.
254
+ - The plugin's `safe: false` **persists** to the final merge. Stage 2
255
+ running is for audit enrichment and additional detection. It does not
256
+ "rescue" a plugin block. The final merge sees the plugin's `safe: false`
257
+ and the outcome is block.
258
+
259
+ **Post-plugin blocks** are simpler: any post-plugin returning `safe: false`
260
+ adds a block to the final merge. Two-pass gating is not involved (it runs
261
+ before Stage 2, post-plugins run after).
262
+
263
+ ---
264
+
265
+ ## Plugin Interface
266
+
267
+ ```typescript
268
+ /**
269
+ * The contract every plugin must implement.
270
+ * Plugins are CommonJS modules that export a factory function
271
+ * returning an object conforming to this interface.
272
+ *
273
+ * Module format: CommonJS. Both export forms are valid:
274
+ * - module.exports = createPlugin (direct assignment)
275
+ * - exports.default = createPlugin (default export — typical of transpiled TS)
276
+ * The loader resolves via: const factory = mod.default || mod
277
+ * This is the standard CJS interop pattern used by bundlers.
278
+ */
279
+ interface SanitizerPlugin {
280
+ /** Unique identifier. Must not collide with built-in rule prefixes
281
+ * (INJ-, CRED-, STRUCT-, TYPE-, ENC-, TEMPORAL-, schema.*, injection.*,
282
+ * credential.*, scope-creep.*). Recommended format: "org.pluginname"
283
+ * e.g. "acme.hipaa-redactor", "clawmoat.scanner" */
284
+ id: string;
285
+
286
+ /** Human-readable name for audit and logging */
287
+ name: string;
288
+
289
+ /** Which pipeline phase this plugin runs in */
290
+ phase: "pre" | "post";
291
+
292
+ /** Rule ID prefix for all rules this plugin produces.
293
+ * Must equal id — e.g. if id is "acme.hipaa-redactor", then
294
+ * ruleIdPrefix is "acme.hipaa-redactor" and all ruleIds in
295
+ * PluginResult must start with "acme.hipaa-redactor.".
296
+ * Validated at startup: collision with built-in prefixes → failure.
297
+ * The prefix is registered in the rule taxonomy and used for
298
+ * frequency weight lookups and profile override matching. */
299
+ ruleIdPrefix: string;
300
+
301
+ /** Called once at startup with the plugin's config block.
302
+ * Throw here to prevent startup (fail-closed on bad config).
303
+ * Async to allow one-time setup (loading models, compiling patterns). */
304
+ initialize(config: Record<string, unknown>): Promise<void>;
305
+
306
+ /** Called once on shutdown. Cleanup resources. Best-effort —
307
+ * errors in shutdown are logged but do not prevent process exit. */
308
+ shutdown(): Promise<void>;
309
+
310
+ /** The inspection function. Called once per content unit.
311
+ * Must resolve within the configured timeout or be killed. */
312
+ inspect(input: PluginInput): Promise<PluginResult>;
313
+ }
314
+
315
+ interface PluginInput {
316
+ /** The content being inspected */
317
+ content: {
318
+ /** "transcript" or "mcp" */
319
+ source: "transcript" | "mcp";
320
+ /** Raw content object (same shape the built-in stages receive).
321
+ * If a prior plugin in the same phase applied a transform,
322
+ * this is the transformed content, not the original. */
323
+ raw: unknown;
324
+ /** For MCP: the tool call that produced this result.
325
+ * Always present when source is "mcp", always absent for "transcript". */
326
+ query?: { server: string; tool: string; params: unknown };
327
+ };
328
+
329
+ /** Results from prior stages (available context depends on phase) */
330
+ priorResults: {
331
+ /** Stage 1A syntactic result (always available) */
332
+ syntactic: { pass: boolean; flags: string[]; ruleIds: string[] };
333
+ /** Stage 1B schema result (always available) */
334
+ schema: { pass: boolean; violations: string[]; ruleIds: string[] };
335
+ /** Stage 2 semantic result (only available for post-phase plugins).
336
+ * Undefined for pre-phase plugins — do not check, will not be set. */
337
+ semantic?: { safe: boolean; flags: string[]; structuredResult: unknown };
338
+ /** Results from prior plugins in the same phase (sequential ordering).
339
+ * Empty array for the first plugin in a phase. */
340
+ priorPlugins: PluginResultMeta[];
341
+ };
342
+
343
+ /** Active context profile id */
344
+ contextProfile: string;
345
+ }
346
+
347
+ /**
348
+ * What the plugin returns from inspect().
349
+ */
350
+ interface PluginResult {
351
+ /** Plugin id (must match the plugin's declared id) */
352
+ pluginId: string;
353
+
354
+ /** Did the plugin find the content acceptable? */
355
+ safe: boolean;
356
+
357
+ /** Rule IDs for any findings. Every entry must start with the
358
+ * plugin's declared ruleIdPrefix followed by ".".
359
+ * e.g. "acme.hipaa-redactor.ssn-detected"
360
+ * Empty array is valid (no findings). */
361
+ ruleIds: string[];
362
+
363
+ /** Human-readable descriptions of findings */
364
+ flags: string[];
365
+
366
+ /** Default confidence for all findings. Used for frequency weight
367
+ * calculation: effectiveWeight = weight × confidence.
368
+ * Range: [0.0, 1.0]. Values outside this range are clamped with
369
+ * a warning log — this is a recoverable validation error, not a
370
+ * block. The plugin's results are still applied with the clamped value.
371
+ * Built-in stages implicitly have confidence 1.0.
372
+ * Applies to all ruleIds unless overridden in findingConfidence.
373
+ * When ruleIds is empty and safe is true (clean pass), confidence
374
+ * is not factored into frequency scoring. */
375
+ confidence: number;
376
+
377
+ /** Optional per-finding confidence, keyed by ruleId.
378
+ * When present, the value for a given ruleId overrides the default
379
+ * confidence for frequency scoring purposes. Same [0.0, 1.0] range
380
+ * and clamping rules apply per entry.
381
+ * Use case: A plugin that runs multiple detection strategies in one
382
+ * inspect call — regex matches may be confidence 1.0 while an ML
383
+ * classifier hit may be 0.6.
384
+ * Example: { "acme.scanner.regex-match": 1.0, "acme.scanner.ml-flag": 0.6 }
385
+ * Keys must be a subset of the ruleIds array. Keys not in ruleIds
386
+ * are ignored. */
387
+ findingConfidence?: Record<string, number>;
388
+
389
+ /** Optional: transformed content. Only meaningful for pre-phase plugins.
390
+ * If provided, downstream stages receive this instead of the raw input.
391
+ * Must pass Stage 1B schema validation with the same context (source type,
392
+ * tool schema/discriminant for MCP) as the original content. If validation
393
+ * fails, the transform is rejected (plugin_error, reason:
394
+ * "transform_schema_fail") and the pipeline continues with the original.
395
+ * Use with extreme caution — transforms can mask content from the
396
+ * semantic sub-agent. See Content Transformation section. */
397
+ transformed?: unknown;
398
+ }
399
+
400
+ /**
401
+ * Extended result type used in priorPlugins array. The pipeline sets
402
+ * metadata fields after receiving PluginResult from the worker.
403
+ * Plugins return PluginResult; the pipeline wraps it as PluginResultMeta
404
+ * before passing to subsequent plugins.
405
+ */
406
+ interface PluginResultMeta extends PluginResult {
407
+ /** True if this plugin's transform was applied to content.raw.
408
+ * Only set when the plugin returned a transformed field AND the
409
+ * transform passed schema validation AND allowTransform was true.
410
+ * False or absent otherwise. */
411
+ transformApplied?: boolean;
412
+
413
+ /** True if the plugin timed out, threw, or returned malformed output.
414
+ * When true, safe/ruleIds/flags/confidence reflect the error-as-block
415
+ * state, not the plugin's actual judgment. */
416
+ errored?: boolean;
417
+ }
418
+ ```
419
+
420
+ ### Rule Taxonomy Integration
421
+
422
+ Plugins declare their `ruleIdPrefix` as a static field on the interface.
423
+ At startup, the loader:
424
+
425
+ 1. Validates that `ruleIdPrefix` equals `id` (enforced convention).
426
+ 2. Validates no collision with built-in prefixes (INJ-, CRED-, STRUCT-,
427
+ TYPE-, ENC-, TEMPORAL-, schema.*, injection.*, credential.*,
428
+ scope-creep.*) or other loaded plugins.
429
+ 3. Registers the prefix in the rule taxonomy as a dynamic namespace.
430
+
431
+ At inspect time, every ruleId in `PluginResult.ruleIds` is validated to
432
+ start with `ruleIdPrefix + "."`. A ruleId that violates the prefix is
433
+ stripped from the result with a warning log (not a block — the plugin's
434
+ other findings are preserved).
435
+
436
+ **Interaction with profile frequency weight overrides:** Profile config
437
+ references plugin rules by full ruleId (e.g. `"acme.hipaa-redactor.ssn-detected": 15`)
438
+ or by prefix with wildcard (e.g. `"acme.hipaa-redactor.*": 10`). The
439
+ wildcard form applies to all rules under the prefix. Explicit ruleId
440
+ overrides take precedence over wildcards. This is the same pattern used
441
+ for built-in rules in the context-aware sanitization spec.
442
+
443
+ **Interaction with static taxonomy validation in context profiles:** The
444
+ existing context profile schema validates that frequency weight keys exist
445
+ in the rule taxonomy. Plugin prefixes are registered dynamically at startup,
446
+ before profile validation runs. If a profile references a plugin rule that
447
+ isn't loaded (plugin disabled or removed), the weight entry is ignored with
448
+ a warning log — not a startup failure. This prevents a profile from becoming
449
+ invalid when a plugin is removed.
450
+
451
+ ### Factory Pattern
452
+
453
+ Plugins are CommonJS modules that export a default factory function:
454
+
455
+ ```typescript
456
+ // Example: plugins/hipaa-redactor/index.ts (transpile to CJS before deploy)
457
+ import type { SanitizerPlugin } from "@openclaw/sanitizer-plugin";
458
+
459
+ export default function createPlugin(): SanitizerPlugin {
460
+ return {
461
+ id: "acme.hipaa-redactor",
462
+ name: "ACME HIPAA Redactor",
463
+ phase: "pre",
464
+ ruleIdPrefix: "acme.hipaa-redactor",
465
+
466
+ async initialize(config) {
467
+ // Load pattern database, warm up, validate config
468
+ },
469
+
470
+ async shutdown() {
471
+ // Release resources
472
+ },
473
+
474
+ async inspect(input) {
475
+ // Inspection logic
476
+ return {
477
+ pluginId: "acme.hipaa-redactor",
478
+ safe: true,
479
+ ruleIds: [],
480
+ flags: [],
481
+ confidence: 1.0,
482
+ };
483
+ },
484
+ };
485
+ }
486
+ ```
487
+
488
+ ---
489
+
490
+ ## Content Transformation
491
+
492
+ Pre-phase plugins may optionally return a `transformed` field containing a
493
+ modified version of the input content. If present, downstream stages
494
+ (including the semantic sub-agent) receive the transformed content instead
495
+ of the original.
496
+
497
+ **Use case:** A PII redactor that replaces SSNs with `[REDACTED-SSN]` before
498
+ the content reaches the sub-agent. The sub-agent then evaluates the redacted
499
+ version, never seeing the raw PII.
500
+
501
+ ### Transform Validation
502
+
503
+ Transformed output must pass Stage 1B schema validation before being
504
+ accepted. This prevents a plugin (buggy or malicious) from producing output
505
+ that breaks downstream stages — e.g., returning a string where an object was
506
+ expected, or dropping required fields.
507
+
508
+ The validation uses the same context as the original Stage 1B pass:
509
+
510
+ - **For transcript content:** Same transcript schema, same strictness level
511
+ from the active context profile.
512
+ - **For MCP content:** Same tool schema and discriminant context derived from
513
+ `content.query` (server, tool, params). The tool's declared output schema
514
+ is the validation target. Same strictness level from the active profile.
515
+
516
+ If the transformed output fails schema validation:
517
+
518
+ - The transform is rejected.
519
+ - A `plugin_error` event is emitted with `reason: "transform_schema_fail"`.
520
+ - The pipeline continues with the original (pre-transform) content.
521
+ - The plugin's other results (safe, ruleIds, flags) are still applied —
522
+ only the transform is discarded.
523
+
524
+ This validation runs synchronously on the main thread after the worker
525
+ returns the result.
526
+
527
+ ### Transform Visibility Within a Phase
528
+
529
+ When a pre-plugin with `allowTransform: true` returns a valid transform:
530
+
531
+ - **Later plugins in the same phase** receive the transformed content in
532
+ `content.raw`. They inspect what downstream stages will actually see.
533
+ - **The `priorPlugins` array** for later plugins includes the transforming
534
+ plugin's result as a `PluginResultMeta` with `transformApplied: true`.
535
+ - **Stage 2 (semantic sub-agent)** receives the transformed content.
536
+ - **Post-phase plugins** receive the transformed content in `content.raw`.
537
+
538
+ The original (pre-transform) content is always preserved in the raw mirror
539
+ sidecar. If an operator needs to audit what the original looked like,
540
+ it is available in the raw sidecar file regardless of transforms.
541
+
542
+ ### Audit Hashing
543
+
544
+ When a transform is applied, the `plugin_transform` audit entry includes
545
+ SHA-256 hashes of both the pre-transform and post-transform content. To
546
+ ensure deterministic hashing regardless of JSON key ordering, content is
547
+ serialized using **canonical JSON**: keys sorted recursively in
548
+ lexicographic order, no whitespace.
549
+
550
+ Implementation: Apply recursive key sorting before `JSON.stringify` with no
551
+ indentation. This is equivalent to:
552
+
553
+ ```typescript
554
+ function canonicalize(obj: unknown): string {
555
+ return JSON.stringify(obj, (_, v) =>
556
+ v && typeof v === "object" && !Array.isArray(v)
557
+ ? Object.fromEntries(Object.entries(v).sort(([a], [b]) => a.localeCompare(b)))
558
+ : v
559
+ );
560
+ }
561
+ ```
562
+
563
+ The same canonicalization is used for the audit trail spec's `output_diff`
564
+ sha256 fields. If the audit trail spec does not currently specify
565
+ canonicalization, this should be back-ported as a consistency fix.
566
+
567
+ ### Risks
568
+
569
+ - A transform can hide content from the semantic sub-agent, reducing its
570
+ ability to detect threats that were present in the original.
571
+ - A malicious or buggy plugin transform could inject content into the
572
+ pipeline.
573
+ - Transforms are not composable in a predictable way — two plugins both
574
+ transforming the same content can produce unexpected results.
575
+
576
+ ### Mitigations
577
+
578
+ - Transforms are opt-in per plugin via config (`allowTransform: true`).
579
+ Default is `false` — even if a plugin returns `transformed`, it is
580
+ ignored unless the operator explicitly enables it.
581
+ - Transformed output must pass Stage 1B schema validation with the same
582
+ context as the original content. Invalid transforms are rejected without
583
+ blocking the pipeline.
584
+ - When a transform is applied, the audit entry includes both the pre-
585
+ and post-transform content hashes (canonical JSON SHA-256), same
586
+ format as the audit trail spec's `output_diff`.
587
+ - Only one plugin per phase may have `allowTransform: true`. If multiple
588
+ plugins in the same phase declare transforms, startup fails with an
589
+ explicit error. This eliminates the composition problem.
590
+ - The raw (pre-transform) content is always written to the raw mirror
591
+ sidecar regardless of transforms.
592
+
593
+ ---
594
+
595
+ ## Interaction with Trust Tiers
596
+
597
+ Plugins sit **after** the trust tier routing decision in the pipeline.
598
+ The trust tier spec requires that Stage 1 (syntactic + schema prefilter)
599
+ runs for all results including trusted, with audit events emitted. The
600
+ trust routing decision (trusted fast path vs full inspection) happens after
601
+ Stage 1. Plugins are part of the full inspection path and never run for
602
+ trusted results.
603
+
604
+ - **Trusted MCP servers:** Stage 1 runs and audits. Trust check passes.
605
+ `trusted_pass` audit entry. Result passed directly to manager. No
606
+ plugin inspection.
607
+ - **Untrusted MCP servers:** Stage 1 runs and audits. Trust check fails.
608
+ Full pipeline including plugins.
609
+ - **Transcript content:** No trust tier applies. Full pipeline including
610
+ plugins.
611
+
612
+ If an operator wants a plugin to run even on trusted server results (e.g.,
613
+ a compliance logger that must see everything), they should not use the trust
614
+ tier bypass for that server. The trust tier list is the single toggle — there
615
+ is no per-plugin override of trust tier routing.
616
+
617
+ ---
618
+
619
+ ## Interaction with Context Profiles
620
+
621
+ Context profiles can modulate plugin behavior through a new `plugins` map
622
+ in the custom profile schema. **This requires a companion update to the
623
+ context-aware-sanitization spec (v2.2)** — see Required Companion Changes.
624
+
625
+ ```yaml
626
+ # In a custom context profile:
627
+ plugins:
628
+ "acme.hipaa-redactor":
629
+ enabled: true
630
+ frequencyWeight: 15 # Override all rules under this plugin
631
+ "acme.hipaa-redactor.ssn-detected":
632
+ frequencyWeight: 20 # Override a specific rule (takes precedence)
633
+ "clawmoat.scanner":
634
+ enabled: false # Not useful in this deployment context
635
+ ```
636
+
637
+ **Rules:**
638
+
639
+ - `enabled: false` in a profile skips the plugin for sessions using that
640
+ profile. The plugin is still loaded at startup and initialized — it is
641
+ just not invoked. This allows fast profile switching on restart without
642
+ re-initializing plugins.
643
+ - `frequencyWeight` overrides the plugin's default frequency weight for
644
+ the active profile. Can be set at the plugin level (applies to all rules
645
+ under the prefix) or at the individual ruleId level. Individual ruleId
646
+ overrides take precedence over plugin-level overrides.
647
+ - Profile-level plugin config does not change the plugin's phase, timeout,
648
+ or transform permissions. Those are global and set at the top level.
649
+
650
+ **Precedence:** If a profile disables a plugin, it does not run. There is no
651
+ mechanism for a plugin to force itself to run regardless of profile. Operator
652
+ config always wins.
653
+
654
+ ---
655
+
656
+ ## Interaction with Frequency Scoring
657
+
658
+ Plugin findings contribute to the session's frequency score through the
659
+ existing exponential decay mechanism:
660
+
661
+ - Each plugin rule ID (`pluginId.ruleName`) is a scorable event, same as
662
+ built-in rule IDs.
663
+ - Default frequency weight for plugin rules is `3` (moderate). Operators
664
+ can override per-rule in global config or per-profile.
665
+ - Plugin `confidence` is multiplied against the frequency weight:
666
+ `effectiveWeight = weight × confidence`. A plugin that reports
667
+ `confidence: 0.6` contributes 60% of its configured weight to the
668
+ session score. This prevents low-confidence ML classifier hits from
669
+ rapidly escalating sessions.
670
+ - When `findingConfidence` is present, each ruleId uses its specific
671
+ confidence value instead of the default. Unkeyed ruleIds fall back to
672
+ the default `confidence` field. This allows a single inspect call to
673
+ contribute different weights for different finding types.
674
+ - **Clamping:** Confidence values below 0.0 are clamped to 0.0. Values
675
+ above 1.0 are clamped to 1.0. A warning log is emitted when clamping
676
+ occurs (likely a plugin bug). Same clamping applies to `findingConfidence`
677
+ entries.
678
+ - **Clean passes:** When `ruleIds` is empty and `safe` is `true`, the
679
+ result is a clean pass. Confidence is not factored into frequency scoring
680
+ — there are no findings to score.
681
+
682
+ ### Built-In Frequency Weight Scale (Reference)
683
+
684
+ For context on what the default plugin weight of `3` means relative to the
685
+ existing system:
686
+
687
+ | Weight | Examples | Interpretation |
688
+ | ------ | -------- | -------------- |
689
+ | 1 | `structural.encoding-trick` in `code-generation` profile | Very low — expected noise in context, barely registers |
690
+ | 3 | **Plugin default** | Moderate — noticeable if repeated, benign in isolation |
691
+ | 4 | `schema.undeclared-admin-reject` | Elevated — structurally suspicious |
692
+ | 5 | Most built-in defaults (e.g. `structural.*`, `schema.*`) | Standard built-in weight |
693
+ | 10–12 | `credential.*` in `code-generation` profile | High — credentials in unexpected context |
694
+ | 15 | `credential.*` in `customer-service` profile | Very high — credentials in support context are almost certainly a problem |
695
+
696
+ The default of `3` is deliberately below the standard built-in weight of `5`.
697
+ Plugin findings are additive signals — they should contribute to the session
698
+ score without rapidly dominating it. Operators running high-confidence plugins
699
+ (deterministic regex, known-good external scanners) should raise the weight.
700
+ Operators running experimental or noisy plugins should leave it low or reduce
701
+ it further.
702
+
703
+ ---
704
+
705
+ ## Interaction with Two-Pass Gating
706
+
707
+ - A pre-plugin that returns `safe: false` is treated identically to a
708
+ built-in Stage 1 failure for two-pass gating purposes.
709
+ - If the failing plugin rule ID is in `twoPass.hardBlockRules`, the
710
+ semantic sub-agent is skipped (same as built-in hard blocks) — **unless**
711
+ the session is at frequency tier1 or above, in which case Stage 2 is
712
+ forced to run regardless (per the input validation spec). See the
713
+ Pre-Plugin Block Semantics truth table for the complete matrix.
714
+ - If the failing plugin rule ID is NOT in `hardBlockRules`, the result
715
+ proceeds to the semantic pass with the plugin's flags injected as hints.
716
+ The plugin's `safe: false` persists to the final merge regardless of
717
+ Stage 2's outcome. See Pre-Plugin Block Semantics truth table.
718
+ - Plugin ruleIds are eligible for inclusion in `hardBlockRules`. Operators
719
+ add them using the full ruleId (e.g. `"acme.scanner.critical-threat"`).
720
+ Prefix wildcards are not supported in `hardBlockRules` — each rule must
721
+ be listed explicitly (same as built-in rules).
722
+ - Post-plugins are not relevant to two-pass gating (they run after
723
+ Stage 2).
724
+
725
+ ---
726
+
727
+ ## Interaction with Audit Trail
728
+
729
+ Plugin events are emitted into the same per-session audit JSONL as built-in
730
+ events.
731
+
732
+ ### Event Types and Emission Rules
733
+
734
+ | Event | Verbosity | Emitted when |
735
+ | -------------------- | --------- | ------------ |
736
+ | `plugin_block` | `minimal` | Plugin returned `safe: false` |
737
+ | `plugin_pass` | `standard`| Plugin returned `safe: true` with empty `flags` |
738
+ | `plugin_flags` | `standard`| Plugin returned `safe: true` with non-empty `flags` |
739
+ | `plugin_error` | `minimal` | Plugin threw, timed out, returned malformed output, or transform failed schema |
740
+ | `plugin_transform` | `high` | Plugin transform was applied (passed schema validation) |
741
+ | `plugin_config_loaded`| `minimal`| Plugin config resolved at startup (agent-level log, not session) |
742
+
743
+ **Emission rule for pass vs flags:** If a plugin returns `safe: true` with
744
+ non-empty `flags`, emit `plugin_flags` only — not both `plugin_flags` and
745
+ `plugin_pass`. This follows the same pattern as `syntactic_flags` vs
746
+ `syntactic_pass` in the audit trail spec. `plugin_pass` is only emitted
747
+ for clean passes with no findings.
748
+
749
+ **Destination note:** All per-turn plugin events (`plugin_block`,
750
+ `plugin_pass`, `plugin_flags`, `plugin_error`, `plugin_transform`) write
751
+ to the per-session audit JSONL at the standard path
752
+ (`~/.openclaw/agents/<agentId>/session-memory/audit/<sessionId>.jsonl`).
753
+ `plugin_config_loaded` is the exception — it fires at startup before any
754
+ session exists. It writes to the agent-level alert log at
755
+ `~/.openclaw/agents/<agentId>/alerts/alerts.jsonl`, same pattern as
756
+ `audit_config_loaded` in the audit trail spec.
757
+
758
+ ### Event Payload Schemas
759
+
760
+ **`plugin_block`**
761
+
762
+ ```jsonl
763
+ {
764
+ "event": "plugin_block",
765
+ "pluginId": "clawmoat.scanner",
766
+ "ruleIds": ["clawmoat.scanner.threat-detected"],
767
+ "flags": ["Real-time threat signature matched: CVE-2026-1234"],
768
+ "confidence": 0.95,
769
+ "findingConfidence": { "clawmoat.scanner.threat-detected": 0.95 },
770
+ "phase": "post",
771
+ "timestamp": "2026-03-07T14:22:00.000Z",
772
+ "sessionId": "sess-abc",
773
+ "agentId": "agent-xyz",
774
+ "messageId": "msg-123"
775
+ }
776
+ ```
777
+
778
+ **`plugin_pass`**
779
+
780
+ ```jsonl
781
+ {
782
+ "event": "plugin_pass",
783
+ "pluginId": "acme.hipaa-redactor",
784
+ "confidence": 1.0,
785
+ "phase": "pre",
786
+ "timestamp": "...",
787
+ "sessionId": "...",
788
+ "agentId": "...",
789
+ "messageId": "..."
790
+ }
791
+ ```
792
+
793
+ **`plugin_flags`**
794
+
795
+ ```jsonl
796
+ {
797
+ "event": "plugin_flags",
798
+ "pluginId": "acme.hipaa-redactor",
799
+ "ruleIds": ["acme.hipaa-redactor.ssn-detected"],
800
+ "flags": ["1 SSN pattern(s) detected"],
801
+ "confidence": 1.0,
802
+ "findingConfidence": { "acme.hipaa-redactor.ssn-detected": 1.0 },
803
+ "phase": "pre",
804
+ "timestamp": "...",
805
+ "sessionId": "...",
806
+ "agentId": "...",
807
+ "messageId": "..."
808
+ }
809
+ ```
810
+
811
+ **`plugin_error`**
812
+
813
+ ```jsonl
814
+ {
815
+ "event": "plugin_error",
816
+ "pluginId": "clawmoat.scanner",
817
+ "reason": "timeout" | "invalid_result" | "exception" | "transform_schema_fail" | "worker_init_failed" | "queue_full",
818
+ "detail": "Worker did not respond within 3000ms",
819
+ "phase": "post",
820
+ "timestamp": "...",
821
+ "sessionId": "...",
822
+ "agentId": "...",
823
+ "messageId": "..."
824
+ }
825
+ ```
826
+
827
+ **`plugin_transform`**
828
+
829
+ ```jsonl
830
+ {
831
+ "event": "plugin_transform",
832
+ "pluginId": "acme.hipaa-redactor",
833
+ "preTransformHash": "sha256:a1b2c3...",
834
+ "postTransformHash": "sha256:d4e5f6...",
835
+ "hashMethod": "sha256-canonical-json",
836
+ "phase": "pre",
837
+ "timestamp": "...",
838
+ "sessionId": "...",
839
+ "agentId": "...",
840
+ "messageId": "..."
841
+ }
842
+ ```
843
+
844
+ **`plugin_config_loaded`**
845
+
846
+ ```jsonl
847
+ {
848
+ "event": "plugin_config_loaded",
849
+ "pluginId": "acme.hipaa-redactor",
850
+ "name": "ACME HIPAA PII Redactor",
851
+ "phase": "pre",
852
+ "ruleIdPrefix": "acme.hipaa-redactor",
853
+ "timeoutMs": 1000,
854
+ "allowTransform": true,
855
+ "frequencyWeight": 10,
856
+ "enabled": true,
857
+ "timestamp": "...",
858
+ "agentId": "..."
859
+ }
860
+ ```
861
+
862
+ ### Rule Taxonomy
863
+
864
+ Plugin rule IDs are registered in the rule taxonomy at startup via the
865
+ `ruleIdPrefix` mechanism described in Rule Taxonomy Integration. At runtime,
866
+ specific ruleIds (e.g. `"acme.hipaa-redactor.ssn-detected"`) are validated
867
+ against the prefix and appear in `rule_triggered` events at `high` verbosity,
868
+ same as built-in rules.
869
+
870
+ Unknown plugin rule IDs (those not starting with any registered prefix)
871
+ are stripped from results with a warning log — same treatment as unknown
872
+ built-in rule IDs.
873
+
874
+ ---
875
+
876
+ ## Interaction with Alerting
877
+
878
+ Plugin events are consumable by the alerting layer through the same event
879
+ stream. The existing rules (burst detection, frequency escalation, etc.)
880
+ naturally cover plugin-generated events because plugin flags contribute
881
+ to frequency scoring.
882
+
883
+ ### New Alert Rule: `pluginErrorSpike`
884
+
885
+ Plugin errors use aggregation-based alerting, consistent with the
886
+ existing `writeFailSpike` pattern. A single `plugin_error` may be a
887
+ transient issue (network blip for an external scanner, one-off timeout).
888
+ Sustained errors indicate an operational problem.
889
+
890
+ ```
891
+ Rule: pluginErrorSpike
892
+ Trigger: >= N plugin_error events within M minutes, scoped to the same agent
893
+ (matching existing alerting aggregation scope — see audit-alerting-spec)
894
+ Default: N = 3, M = 5
895
+ Severity: medium
896
+ Payload: includes pluginId, reason breakdown, and recentContext
897
+ Configurable: alerting.rules.pluginErrorSpike.count (default: 3)
898
+ alerting.rules.pluginErrorSpike.windowMinutes (default: 5)
899
+ alerting.rules.pluginErrorSpike.enabled (default: true)
900
+ ```
901
+
902
+ This rule aggregates across all plugins. If operators need per-plugin
903
+ alerting granularity, that requires custom alert rule definitions
904
+ (deferred — listed under Future Work in the alerting spec).
905
+
906
+ Operators who want plugin-specific alert rules beyond `pluginErrorSpike`
907
+ can define them when the alerting spec adds support for custom rule
908
+ definitions (currently out of scope in the alerting spec, listed under
909
+ Future Work).
910
+
911
+ ---
912
+
913
+ ## Config Surface
914
+
915
+ ```
916
+ memory.sessions.sanitization.plugins: PluginDeclaration[]
917
+ Default: []
918
+ Ordered list of plugin declarations. Execution order within each phase
919
+ follows declaration order.
920
+
921
+ Each entry:
922
+ module: string
923
+ Local file path to the plugin module (CommonJS). Relative to the
924
+ OpenClaw config directory. No remote URLs.
925
+ Path validation: resolve to absolute via path.resolve(configDir, module),
926
+ then resolve symlinks via fs.realpath(), then verify the resolved
927
+ absolute path starts with the config directory's resolved absolute path
928
+ plus path.sep. The trailing separator prevents false positives from
929
+ sibling directories (e.g. /app/config2 must not match /app/config).
930
+ This is platform-agnostic and catches all traversal variants including
931
+ "..", leading "/", Windows absolute paths (C:\), UNC paths (\\server),
932
+ file:// URIs, and symlinks pointing outside the boundary.
933
+
934
+ **Dependency resolution:** Plugins must ship as either:
935
+ (a) A single-file CommonJS bundle (e.g., built with esbuild, rollup,
936
+ or tsup) with all dependencies inlined. This is the recommended
937
+ approach — it eliminates resolution ambiguity entirely.
938
+ (b) A directory containing a CommonJS entry point and a local
939
+ node_modules. The loader sets NODE_PATH to the plugin's directory
940
+ before require(), allowing the plugin's own dependencies to
941
+ resolve. The plugin's node_modules must be vendored alongside the
942
+ plugin — the loader does not run npm install.
943
+
944
+ Option (a) is strongly preferred for distribution. Option (b) is
945
+ acceptable for in-house plugins where bundling is impractical.
946
+
947
+ phase: "pre" | "post"
948
+ Which pipeline phase. Must match the plugin's declared phase.
949
+ Mismatch between config and plugin declaration fails at startup.
950
+
951
+ enabled: boolean
952
+ Default: true
953
+ Master toggle for this plugin. When false, plugin is not loaded
954
+ or initialized.
955
+
956
+ config: Record<string, unknown>
957
+ Default: {}
958
+ Opaque config block passed to the plugin's initialize() method.
959
+ OpenClaw does not interpret this — it's the plugin's responsibility
960
+ to validate.
961
+
962
+ timeoutMs: number
963
+ Default: 1000
964
+ Maximum time the plugin's inspect() call may take before being
965
+ killed. Minimum: 100. Maximum: 10000.
966
+ Timer starts when the message is posted to the worker thread,
967
+ not when it enters the queue.
968
+ Killed plugins produce a plugin_error audit event and are treated
969
+ as blocks.
970
+
971
+ allowTransform: boolean
972
+ Default: false
973
+ Whether the plugin's transformed output is applied. See Content
974
+ Transformation section.
975
+
976
+ frequencyWeight: number
977
+ Default: 3
978
+ Base frequency weight for all rules this plugin produces.
979
+ Can be overridden per-rule in profile frequency weight config.
980
+
981
+ maxQueueDepth: number
982
+ Default: 10
983
+ Maximum pending inspect requests queued for this plugin's worker.
984
+ Requests exceeding this limit are rejected immediately with
985
+ plugin_error reason: "queue_full".
986
+
987
+ memory.sessions.sanitization.pluginLimits:
988
+ maxTotal: number
989
+ Default: 10
990
+ Maximum number of plugins (across both phases). Prevents unbounded
991
+ pipeline growth. Startup fails if exceeded.
992
+
993
+ maxPrePhase: number
994
+ Default: 5
995
+ Maximum pre-phase plugins.
996
+
997
+ maxPostPhase: number
998
+ Default: 5
999
+ Maximum post-phase plugins.
1000
+ ```
1001
+
1002
+ ---
1003
+
1004
+ ## Plugin Loading and Lifecycle
1005
+
1006
+ ```
1007
+ Config loads
1008
+
1009
+ Validate plugin declarations:
1010
+ - Path validation (resolve absolute, resolve symlinks via fs.realpath(),
1011
+ verify containment within config directory using trailing path.sep —
1012
+ platform-agnostic)
1013
+ - Phase consistency (config phase matches plugin declaration)
1014
+ - Transform uniqueness (max one allowTransform per phase)
1015
+ - Count limits (pluginLimits.maxTotal, .maxPrePhase, .maxPostPhase)
1016
+
1017
+ Load plugin modules (require() — CommonJS only)
1018
+ - Module must export a default factory function
1019
+ - Factory must return an object conforming to SanitizerPlugin
1020
+ - Missing exports or wrong shape → startup failure
1021
+
1022
+ Validate ruleIdPrefix:
1023
+ - Must equal plugin id
1024
+ - No collision with built-in prefixes or other plugins
1025
+
1026
+ Register ruleIdPrefix in rule taxonomy (dynamic namespace)
1027
+
1028
+ Validate profile plugin references:
1029
+ - Profile frequency weight keys referencing unloaded plugin prefixes
1030
+ produce warning log, not startup failure
1031
+
1032
+ Spawn worker_thread per plugin
1033
+
1034
+ Call initialize() on each plugin (in declaration order, on worker thread)
1035
+ - Pass plugin-specific config block
1036
+ - Plugin may throw → startup failure (fail closed)
1037
+ - Plugin may perform async setup (load models, compile patterns)
1038
+
1039
+ Emit plugin_config_loaded audit event per plugin
1040
+
1041
+ Pipeline ready. Plugins invoked per-turn in declared order within phase.
1042
+
1043
+ On shutdown: call shutdown() on each plugin (reverse declaration order,
1044
+ on worker threads). Errors logged but do not prevent exit.
1045
+ ```
1046
+
1047
+ Plugins are loaded once and persist for the agent's lifetime. There is no
1048
+ per-session plugin loading, no per-turn plugin loading, and no runtime
1049
+ plugin replacement.
1050
+
1051
+ ---
1052
+
1053
+ ## Error Handling
1054
+
1055
+ ### Recoverable Validation (warn, continue)
1056
+
1057
+ These issues are corrected automatically. The plugin's results are still
1058
+ applied with the corrected values. A warning log is emitted.
1059
+
1060
+ | Issue | Behavior |
1061
+ | ----- | -------- |
1062
+ | Confidence out of range | Clamped to [0.0, 1.0]. Warning log. Plugin results applied with clamped value. |
1063
+ | findingConfidence entry out of range | Same clamping per entry. |
1064
+ | ruleId does not start with ruleIdPrefix | ruleId stripped from result. Warning log. Other ruleIds and plugin results preserved. |
1065
+ | findingConfidence key not in ruleIds | Key ignored. No warning (explicitly allowed by spec). |
1066
+
1067
+ ### Unrecoverable Errors (block)
1068
+
1069
+ These cannot be corrected. Content is blocked and a `plugin_error` audit
1070
+ event is emitted.
1071
+
1072
+ | Failure Mode | Behavior |
1073
+ | ------------------------- | -------- |
1074
+ | Plugin throws in inspect | `plugin_error`, `reason: "exception"`. Content blocked. Pipeline continues to next plugin for audit completeness. |
1075
+ | Plugin exceeds timeout | Worker terminated via `worker.terminate()`. `plugin_error`, `reason: "timeout"`. Worker respawned and re-initialized. If re-init fails, permanently failed for process lifetime (`reason: "worker_init_failed"`). |
1076
+ | Plugin returns structurally invalid result | `plugin_error`, `reason: "invalid_result"`. Missing required fields (pluginId, safe, ruleIds, flags, confidence) or wrong types. Content blocked. |
1077
+ | Plugin transform fails schema | Transform rejected, pipeline continues with original content. `plugin_error`, `reason: "transform_schema_fail"`. Plugin's other results (safe, ruleIds, flags) still applied — only the transform is discarded. |
1078
+ | Plugin queue full | `plugin_error`, `reason: "queue_full"`. Content blocked. Worker not terminated. |
1079
+
1080
+ ### Startup Failures (agent does not start)
1081
+
1082
+ | Failure Mode | Behavior |
1083
+ | ------------------------- | -------- |
1084
+ | Plugin throws in initialize | Agent does not start. Operator must fix config or remove plugin. |
1085
+ | Plugin module not found | Clear error message with resolved path. |
1086
+ | Plugin id collision | Two plugins cannot declare the same id. |
1087
+ | Plugin ruleIdPrefix collision | Prefix collides with built-in or other plugin. |
1088
+ | Plugin path escapes config dir | Resolved path (post-realpath, with trailing `path.sep`) is outside config directory boundary. |
1089
+ | Plugin module is ESM | `require()` of ESM module throws. Error message suggests transpiling to CJS. |
1090
+
1091
+ **Pipeline continuation after block:** When a plugin blocks content, the
1092
+ pipeline still runs subsequent plugins in the same phase. This is for audit
1093
+ completeness — if two plugins both detect different issues, both findings
1094
+ should appear in the audit trail. The content is blocked regardless, so
1095
+ running additional plugins has no security cost.
1096
+
1097
+ ---
1098
+
1099
+ ## Example: ClawMoat Integration (Local Library)
1100
+
1101
+ ClawMoat (`npm install clawmoat`) is a zero-dependency Node.js library for
1102
+ prompt injection detection, secret scanning, and PII detection. It exposes
1103
+ `scan()` and `createPolicy()` as direct function calls — no network, no
1104
+ external service, sub-millisecond execution.
1105
+
1106
+ ```typescript
1107
+ // plugins/clawmoat-adapter/index.ts (transpile to CJS before deploy)
1108
+ //
1109
+ // Bundle with: npx esbuild index.ts --bundle --platform=node --outfile=index.js
1110
+ // This inlines the clawmoat dependency into a single CJS file.
1111
+
1112
+ import type { SanitizerPlugin } from "@openclaw/sanitizer-plugin";
1113
+ import { scan, createPolicy } from "clawmoat";
1114
+
1115
+ export default function createPlugin(): SanitizerPlugin {
1116
+ let policy: ReturnType<typeof createPolicy>;
1117
+
1118
+ return {
1119
+ id: "clawmoat.scanner",
1120
+ name: "ClawMoat Scanner",
1121
+ phase: "pre",
1122
+ ruleIdPrefix: "clawmoat.scanner",
1123
+
1124
+ async initialize(config) {
1125
+ // createPolicy accepts YAML-style rule config:
1126
+ // allowedTools, blockedCommands, secretPatterns, etc.
1127
+ policy = createPolicy(
1128
+ (config.policy as Record<string, unknown>) ?? {}
1129
+ );
1130
+ },
1131
+
1132
+ async shutdown() {
1133
+ // No resources to release — pure library, no connections
1134
+ },
1135
+
1136
+ async inspect(input) {
1137
+ const content = typeof input.content.raw === "string"
1138
+ ? input.content.raw
1139
+ : JSON.stringify(input.content.raw);
1140
+
1141
+ const result = scan(content, { policy });
1142
+
1143
+ return {
1144
+ pluginId: "clawmoat.scanner",
1145
+ safe: !result.blocked,
1146
+ ruleIds: result.threats.map(
1147
+ (t: { pattern: string }) => `clawmoat.scanner.${t.pattern}`
1148
+ ),
1149
+ flags: result.threats.map(
1150
+ (t: { pattern: string; match: string; severity: string }) =>
1151
+ `[${t.severity}] ${t.pattern}: ${t.match}`
1152
+ ),
1153
+ confidence: 1.0, // Pattern matching is deterministic
1154
+ // Per-finding confidence not needed — all findings are
1155
+ // regex/entropy based, all confidence 1.0
1156
+ };
1157
+ },
1158
+ };
1159
+ }
1160
+ ```
1161
+
1162
+ Config:
1163
+
1164
+ ```yaml
1165
+ memory.sessions.sanitization.plugins:
1166
+ - module: "./plugins/clawmoat-adapter/index.js"
1167
+ phase: "pre"
1168
+ # timeoutMs: 1000 is fine — scan() is sub-millisecond
1169
+ config:
1170
+ policy:
1171
+ secretPatterns: ["AWS_*", "GITHUB_TOKEN"]
1172
+ # Additional ClawMoat policy config passed through
1173
+ ```
1174
+
1175
+ **Why pre-phase?** ClawMoat's `scan()` is a pure synchronous function with
1176
+ sub-millisecond execution. It runs pattern matching and entropy analysis —
1177
+ no model calls, no network. This makes it ideal as a pre-phase plugin: its
1178
+ findings feed into frequency scoring and inform the semantic sub-agent via
1179
+ hint injection, adding detection coverage before the LLM call.
1180
+
1181
+ **Overlap with built-in Tier 1 patterns:** ClawMoat's injection detection
1182
+ and credential scanning overlap with OpenClaw's built-in Tier 1 patterns
1183
+ (INJ-*, CRED-*). This is acceptable — defense in depth. ClawMoat maintains
1184
+ its own pattern library (30+ credential patterns, OWASP coverage) which
1185
+ may catch patterns the built-in set misses, and vice versa. Duplicate
1186
+ detections are deduplicated by ruleId in the final merge.
1187
+
1188
+ ---
1189
+
1190
+ ## Example: External Threat Scanner (Network Service Pattern)
1191
+
1192
+ For scanning tools that expose an HTTP API rather than a local library
1193
+ (e.g., enterprise threat intelligence services, hosted ML classifiers):
1194
+
1195
+ ```typescript
1196
+ // plugins/external-scanner/index.ts (transpile to CJS before deploy)
1197
+ import type { SanitizerPlugin } from "@openclaw/sanitizer-plugin";
1198
+
1199
+ interface ScannerClient {
1200
+ scan(payload: { content: string; context: string }): Promise<{
1201
+ threats: Array<{ category: string; description: string; confidence: number }>;
1202
+ }>;
1203
+ healthCheck(): Promise<void>;
1204
+ close(): Promise<void>;
1205
+ }
1206
+
1207
+ export default function createPlugin(): SanitizerPlugin {
1208
+ let client: ScannerClient;
1209
+
1210
+ return {
1211
+ id: "acme.threat-scanner",
1212
+ name: "ACME Threat Intelligence Scanner",
1213
+ phase: "post",
1214
+ ruleIdPrefix: "acme.threat-scanner",
1215
+
1216
+ async initialize(config) {
1217
+ // Connect to external scanning service
1218
+ const endpoint = config.endpoint as string;
1219
+ const apiKey = config.apiKey as string;
1220
+ // ScannerClient is your adapter to whatever HTTP API the service exposes
1221
+ client = new ExternalScannerClient({ endpoint, apiKey });
1222
+ await client.healthCheck();
1223
+ },
1224
+
1225
+ async shutdown() {
1226
+ await client.close();
1227
+ },
1228
+
1229
+ async inspect(input) {
1230
+ const scanResult = await client.scan({
1231
+ content: JSON.stringify(input.content.raw),
1232
+ context: input.contextProfile,
1233
+ });
1234
+
1235
+ return {
1236
+ pluginId: "acme.threat-scanner",
1237
+ safe: scanResult.threats.length === 0,
1238
+ ruleIds: scanResult.threats.map(
1239
+ (t) => `acme.threat-scanner.${t.category}`
1240
+ ),
1241
+ flags: scanResult.threats.map((t) => t.description),
1242
+ confidence: 0.85, // ML-based, not deterministic
1243
+ findingConfidence: Object.fromEntries(
1244
+ scanResult.threats.map((t) => [
1245
+ `acme.threat-scanner.${t.category}`,
1246
+ t.confidence,
1247
+ ])
1248
+ ),
1249
+ };
1250
+ },
1251
+ };
1252
+ }
1253
+ ```
1254
+
1255
+ Config:
1256
+
1257
+ ```yaml
1258
+ memory.sessions.sanitization.plugins:
1259
+ - module: "./plugins/external-scanner/index.js"
1260
+ phase: "post"
1261
+ timeoutMs: 3000 # Network call — needs more headroom
1262
+ maxQueueDepth: 5 # Don't queue too many for a slow service
1263
+ config:
1264
+ endpoint: "http://localhost:9090"
1265
+ apiKey: "${SCANNER_API_KEY}" # Env var substitution in config loader
1266
+ ```
1267
+
1268
+ **Why post-phase?** External network scanners add latency (50–500ms
1269
+ typical). Running them in the post phase means they inspect the semantic
1270
+ sub-agent's structured output rather than blocking it. They're also ideal
1271
+ for ML-based classifiers that benefit from seeing the sub-agent's
1272
+ judgment alongside the raw content.
1273
+
1274
+ ---
1275
+
1276
+ ## Example: HIPAA PII Redactor (Pre-Phase with Transform)
1277
+
1278
+ ```typescript
1279
+ // plugins/hipaa-redactor/index.ts (transpile to CJS before deploy)
1280
+ import type { SanitizerPlugin } from "@openclaw/sanitizer-plugin";
1281
+
1282
+ const SSN_PATTERN = /\b\d{3}-\d{2}-\d{4}\b/g;
1283
+ const MRN_PATTERN = /\bMRN[:\s]*\d{6,10}\b/gi;
1284
+
1285
+ export default function createPlugin(): SanitizerPlugin {
1286
+ return {
1287
+ id: "acme.hipaa-redactor",
1288
+ name: "ACME HIPAA PII Redactor",
1289
+ phase: "pre",
1290
+ ruleIdPrefix: "acme.hipaa-redactor",
1291
+
1292
+ async initialize() {},
1293
+ async shutdown() {},
1294
+
1295
+ async inspect(input) {
1296
+ const raw = JSON.stringify(input.content.raw);
1297
+ const findings: string[] = [];
1298
+ const ruleIds: string[] = [];
1299
+ let redacted = raw;
1300
+
1301
+ const ssnMatches = raw.match(SSN_PATTERN);
1302
+ if (ssnMatches) {
1303
+ findings.push(`${ssnMatches.length} SSN pattern(s) detected`);
1304
+ ruleIds.push("acme.hipaa-redactor.ssn-detected");
1305
+ redacted = redacted.replace(SSN_PATTERN, "[REDACTED-SSN]");
1306
+ }
1307
+
1308
+ const mrnMatches = raw.match(MRN_PATTERN);
1309
+ if (mrnMatches) {
1310
+ findings.push(`${mrnMatches.length} MRN pattern(s) detected`);
1311
+ ruleIds.push("acme.hipaa-redactor.mrn-detected");
1312
+ redacted = redacted.replace(MRN_PATTERN, "[REDACTED-MRN]");
1313
+ }
1314
+
1315
+ return {
1316
+ pluginId: "acme.hipaa-redactor",
1317
+ safe: true, // Redaction is remediation, not blocking
1318
+ ruleIds,
1319
+ flags: findings,
1320
+ confidence: 1.0, // Regex matches are deterministic
1321
+ transformed: findings.length > 0
1322
+ ? JSON.parse(redacted)
1323
+ : undefined,
1324
+ };
1325
+ },
1326
+ };
1327
+ }
1328
+ ```
1329
+
1330
+ Config:
1331
+
1332
+ ```yaml
1333
+ memory.sessions.sanitization.plugins:
1334
+ - module: "./plugins/hipaa-redactor/index.js"
1335
+ phase: "pre"
1336
+ allowTransform: true # Required for redaction to take effect
1337
+ frequencyWeight: 10 # PII findings weigh heavily
1338
+ ```
1339
+
1340
+ ---
1341
+
1342
+ ## Tests
1343
+
1344
+ **Plugin loading:**
1345
+
1346
+ - Plugin with valid module, phase, and factory loads successfully
1347
+ - Plugin with missing module path fails at startup with clear error
1348
+ - Plugin with no default export fails at startup
1349
+ - Plugin exporting ESM (no module.exports) fails with helpful error message
1350
+ - Plugin with mismatched phase (config says "pre", plugin says "post") fails at startup
1351
+ - Plugin that throws in initialize fails startup
1352
+ - Plugin count exceeding pluginLimits.maxTotal fails startup
1353
+ - Two plugins with the same id fail startup
1354
+ - Plugin with ruleIdPrefix not equal to id fails startup
1355
+ - Plugin with ruleIdPrefix colliding with built-in prefix fails startup
1356
+ - Plugin with ruleIdPrefix colliding with other plugin fails startup
1357
+ - Plugin path resolving outside config dir rejected (tested with "..", absolute
1358
+ paths, symlinks, Windows-style paths where applicable)
1359
+ - Plugin path to sibling directory (e.g. /config2 vs /config) rejected by
1360
+ trailing path.sep containment check
1361
+ - Plugin with remote URL in module path rejected at startup
1362
+ - Plugin as single-file CJS bundle loads and resolves without node_modules
1363
+ - Plugin with local node_modules resolves dependencies via NODE_PATH
1364
+ - Plugin loaded via module.exports = fn (direct CJS) works
1365
+ - Plugin loaded via exports.default = fn (transpiled CJS) works
1366
+ - Loader resolves via mod.default || mod correctly
1367
+ - Profile referencing unloaded plugin prefix produces warning, not failure
1368
+
1369
+ **Runtime isolation:**
1370
+
1371
+ - Plugin inspect runs in worker_thread, not main event loop
1372
+ - Synchronous infinite loop in plugin is terminated by worker.terminate()
1373
+ - Worker respawns after termination and re-initializes plugin
1374
+ - Worker that fails re-initialization is permanently marked failed
1375
+ - Process restart clears permanently-failed state
1376
+ - Permanently failed plugin produces plugin_error on subsequent calls
1377
+ - PluginInput serializes correctly across structured clone boundary
1378
+ - PluginResult deserializes correctly across structured clone boundary
1379
+
1380
+ **Worker queuing:**
1381
+
1382
+ - Second request while worker is busy enters FIFO queue
1383
+ - Timeout timer starts at post-to-worker, not at enqueue
1384
+ - Queue depth exceeding maxQueueDepth produces plugin_error "queue_full"
1385
+ - Queued requests are processed in order after current request completes
1386
+
1387
+ **Pre-plugin block semantics:**
1388
+
1389
+ - Pre-plugin safe:false + ruleId in hardBlockRules + no frequency tier → Stage 2 skipped, final block
1390
+ - Pre-plugin safe:false + ruleId in hardBlockRules + frequency tier1+ → Stage 2 forced, final block
1391
+ - Pre-plugin safe:false + ruleId NOT in hardBlockRules → Stage 2 runs with hints, final block
1392
+ - Pre-plugin safe:false does NOT get rescued by Stage 2 safe:true
1393
+ - Pre-plugin safe:true → normal flow, no block contribution
1394
+
1395
+ **Plugin execution — pre phase:**
1396
+
1397
+ - Pre-plugin receives Stage 1 results in priorResults (syntactic and schema non-optional)
1398
+ - Pre-plugin flags appear in frequency scoring
1399
+ - Pre-plugin block triggers two-pass skip when rule in hardBlockRules
1400
+ - Pre-plugin flags injected as hints into semantic sub-agent prompt
1401
+ - Multiple pre-plugins execute in declaration order
1402
+ - Pre-plugin results from earlier plugins appear in priorPlugins as PluginResultMeta
1403
+ - Pre-plugin with transform: later plugins in same phase receive transformed content
1404
+
1405
+ **Plugin execution — post phase:**
1406
+
1407
+ - Post-plugin receives Stage 1 + Stage 2 results in priorResults
1408
+ - Post-plugin block overrides Stage 2 safe: true (ANY block wins)
1409
+ - Post-plugin flags appear in final merged result
1410
+ - Multiple post-plugins execute in declaration order
1411
+ - Post-plugin priorResults.semantic is defined (not undefined)
1412
+
1413
+ **Plugin timeout:**
1414
+
1415
+ - Plugin exceeding timeoutMs produces plugin_error event with reason "timeout"
1416
+ - Timed-out plugin treated as block
1417
+ - Subsequent plugins still execute after timeout
1418
+ - Sync-blocking plugin (tight loop) is terminated by worker.terminate()
1419
+
1420
+ **Plugin error handling — recoverable:**
1421
+
1422
+ - Confidence > 1.0 clamped to 1.0 with warning log, results still applied
1423
+ - Confidence < 0.0 clamped to 0.0 with warning log, results still applied
1424
+ - findingConfidence entry out of range clamped per entry with warning
1425
+ - ruleId not starting with ruleIdPrefix stripped from result with warning
1426
+ - Other ruleIds and plugin findings preserved after strip
1427
+ - Plugin result with clamped/stripped values is not treated as a block
1428
+
1429
+ **Plugin error handling — unrecoverable:**
1430
+
1431
+ - Plugin throwing in inspect produces plugin_error with reason "exception"
1432
+ - Plugin returning structurally invalid result (missing pluginId, safe, ruleIds,
1433
+ flags, or confidence) produces plugin_error "invalid_result"
1434
+ - Content blocked after unrecoverable error
1435
+ - Subsequent plugins still execute after error
1436
+ - Plugin transform failing schema validation produces plugin_error "transform_schema_fail"
1437
+ - Failed transform does not block — pipeline continues with original content
1438
+ - Plugin's safe/ruleIds/flags still applied after transform rejection
1439
+
1440
+ **Content transformation:**
1441
+
1442
+ - Plugin with allowTransform: false — transformed field ignored
1443
+ - Plugin with allowTransform: true — transformed content passed to downstream stages
1444
+ - Transformed output that fails Stage 1B schema validation is rejected
1445
+ - MCP transform validation uses same tool schema/discriminant as original validation
1446
+ - Transcript transform validation uses same transcript schema as original
1447
+ - Transform audit event includes pre and post content hashes (canonical JSON)
1448
+ - Canonical JSON hashing produces same hash regardless of original key order
1449
+ - Raw mirror contains original (pre-transform) content
1450
+ - Two plugins with allowTransform: true in same phase fails startup
1451
+ - Transform applied only when plugin returns non-undefined transformed field
1452
+ - Later plugins in same phase receive transformed content in content.raw
1453
+ - priorPlugins for later plugins includes PluginResultMeta with transformApplied: true
1454
+ - priorPlugins for errored plugins includes PluginResultMeta with errored: true
1455
+
1456
+ **Context profile interaction:**
1457
+
1458
+ - Plugin disabled in active profile is not invoked
1459
+ - Plugin frequency weight overridden by profile config (plugin id level)
1460
+ - Plugin frequency weight overridden by profile config (individual ruleId level)
1461
+ - Wildcard prefix override (e.g. "acme.scanner.*": 10) applies to all matching rules
1462
+ - Precedence: full ruleId > prefix wildcard > plugin id
1463
+ - Wildcard key cannot set enabled (only frequencyWeight)
1464
+ - Full ruleId key cannot set enabled (only frequencyWeight)
1465
+ - Plugin enabled in profile but disabled globally (enabled: false) is not loaded
1466
+
1467
+ **Trust tier interaction:**
1468
+
1469
+ - Trusted MCP server: Stage 1 runs, trust routing bypasses plugins, trusted_pass emitted
1470
+ - Untrusted MCP server results invoke plugins
1471
+ - Transcript content invokes plugins
1472
+
1473
+ **Audit integration:**
1474
+
1475
+ - plugin_block event at minimal verbosity, payload matches schema
1476
+ - plugin_pass event at standard verbosity for clean pass (no flags)
1477
+ - plugin_flags event at standard verbosity for safe:true with flags (not both pass and flags)
1478
+ - plugin_error event at minimal verbosity, payload includes reason and detail
1479
+ - plugin_transform event at high verbosity with canonical JSON hashes and hashMethod
1480
+ - plugin_config_loaded writes to agent-level alert log, not session audit JSONL
1481
+ - plugin_config_loaded event at minimal verbosity with full resolved config, no sessionId
1482
+ - Plugin rule IDs appear in rule_triggered events at high verbosity
1483
+
1484
+ **Alerting integration:**
1485
+
1486
+ - Single plugin_error does not trigger alert (below aggregation threshold)
1487
+ - 3 plugin_errors within 5 minutes (same agent) triggers pluginErrorSpike alert
1488
+ - pluginErrorSpike scoped to same agent (errors from different agents don't aggregate)
1489
+ - pluginErrorSpike alert includes pluginId breakdown and reason counts
1490
+ - pluginErrorSpike configurable: custom count and window thresholds
1491
+
1492
+ **Frequency scoring integration:**
1493
+
1494
+ - Plugin finding with confidence 1.0 contributes full weight
1495
+ - Plugin finding with confidence 0.5 contributes half weight
1496
+ - Plugin with findingConfidence override: keyed ruleId uses specific confidence
1497
+ - Plugin with findingConfidence override: unkeyed ruleId falls back to default
1498
+ - Clamped confidence values produce correct effective weights (tested via
1499
+ recoverable validation — see error handling tests)
1500
+ - Clean pass (empty ruleIds, safe:true) does not contribute to frequency score
1501
+ - Plugin findings accumulate with built-in findings in session score
1502
+ - Plugin-driven frequency escalation triggers same tier thresholds
1503
+
1504
+ ---
1505
+
1506
+ ## Residual Risks (Accepted)
1507
+
1508
+ | Risk | Status |
1509
+ | ---- | ------ |
1510
+ | Malicious plugin module executes arbitrary code at startup | Accepted. Plugins are operator-installed local modules. Same trust model as npm dependencies. Operator is responsible for vetting plugin code. |
1511
+ | Plugin transform masks content from semantic sub-agent | Mitigated by allowTransform default false, single-transform-per-phase limit, Stage 1B schema validation of transforms (with full MCP context), raw mirror preservation, and canonical JSON audit hashing. Residual risk accepted. |
1512
+ | Slow plugins add latency to MCP critical path | Mitigated by per-plugin timeout (default 1s, max 10s) enforced via `worker.terminate()`. Operator can tune or disable. Plugins that consistently timeout should be removed. |
1513
+ | Plugin id namespace pollution | Mitigated by requiring org-prefixed ids and rejecting collisions with built-in prefixes. Residual risk: two third-party plugins could collide with each other. Accepted — operator-resolvable at config time. |
1514
+ | Plugin error flood obscures real threats in audit log | Mitigated by `pluginErrorSpike` aggregation alerting and by pipeline continuation (real threats from built-in stages still surface). Operators should fix or remove failing plugins promptly. |
1515
+ | Plugin with network calls introduces new egress surface | Accepted. External plugins (like ClawMoat adapter) make network calls. Operator is responsible for network policy. OpenClaw does not sandbox plugin network access (deferred). |
1516
+ | Worker thread serialization overhead on large payloads | Mitigated by Tier 1 structural size check (STRUCT-002, default 512KB max). For typical payloads (< 100KB), structured clone is sub-millisecond. Accepted. |
1517
+ | Plugin symlink or path traversal escapes config directory | Mitigated by platform-agnostic containment check: `path.resolve()` + `fs.realpath()` + trailing `path.sep` prefix comparison. |
1518
+ | Plugin ruleIds registered dynamically may not be present in profile frequency weight references | Mitigated by warning-on-missing (not failure). Profiles remain valid when plugins are added or removed. |
1519
+
1520
+ ---
1521
+
1522
+ ## Required Companion Changes
1523
+
1524
+ ### Context-Aware Sanitization Spec v2.2
1525
+
1526
+ The custom profile schema must be extended to support a `plugins` map:
1527
+
1528
+ ```yaml
1529
+ # Addition to custom profile schema
1530
+ plugins:
1531
+ type: object
1532
+ description: Per-plugin and per-rule overrides for this profile.
1533
+ additionalProperties:
1534
+ type: object
1535
+ properties:
1536
+ enabled:
1537
+ type: boolean
1538
+ description: Override plugin enabled state for this profile.
1539
+ frequencyWeight:
1540
+ type: number
1541
+ description: Override frequency weight for this plugin or rule.
1542
+ ```
1543
+
1544
+ Keys in the `plugins` map use one of three forms:
1545
+
1546
+ 1. **Plugin id** (e.g. `"acme.hipaa-redactor"`) — applies `enabled` and
1547
+ `frequencyWeight` to all rules under this plugin.
1548
+ 2. **Prefix wildcard** (e.g. `"acme.hipaa-redactor.*"`) — applies
1549
+ `frequencyWeight` only (not `enabled`) to all rules matching the prefix.
1550
+ Functionally equivalent to the plugin id form for frequency weight, but
1551
+ explicit about intent. Cannot set `enabled` (enable/disable is per-plugin,
1552
+ not per-rule).
1553
+ 3. **Full ruleId** (e.g. `"acme.hipaa-redactor.ssn-detected"`) — applies
1554
+ `frequencyWeight` only (not `enabled`) to a single rule.
1555
+
1556
+ **Precedence:** Full ruleId overrides prefix wildcard overrides plugin id.
1557
+ This parallels the existing `frequencyWeightOverrides` pattern for built-in
1558
+ rules in the context-aware sanitization spec.
1559
+
1560
+ Validation: keys referencing unloaded plugin prefixes produce a warning log
1561
+ at startup, not a failure. This prevents profiles from becoming invalid when
1562
+ plugins are added or removed.
1563
+
1564
+ ### Audit Alerting Spec v2.4
1565
+
1566
+ Add the `pluginErrorSpike` alert rule definition and its config keys to the
1567
+ alerting spec's rule inventory and config reference.
1568
+
1569
+ ---
1570
+
1571
+ ## Out of Scope
1572
+
1573
+ - Plugin sandboxing (filesystem/network isolation for plugin execution)
1574
+ - Remote plugin loading (plugins fetched from URLs or registries)
1575
+ - Plugin marketplace or discovery mechanism
1576
+ - Hot-reload of plugins without restart
1577
+ - Plugin-to-plugin communication (plugins are isolated; shared state is
1578
+ via priorPlugins in PluginInput only — full PluginResultMeta, not filtered)
1579
+ - ML model hosting or management (ML plugins bring their own runtime)
1580
+ - Plugin-specific alert rules beyond pluginErrorSpike (deferred until alerting
1581
+ supports custom rule definitions)
1582
+ - Plugin versioning or compatibility checking beyond the interface contract
1583
+ - Automatic plugin rule promotion to built-in rules (related to alerting
1584
+ spec's Rule 4 future work on syntactic rule auto-generation)
1585
+ - ESM plugin support (CJS only in v1; ESM support deferred)
1586
+ - Per-plugin alerting granularity (pluginErrorSpike aggregates across all plugins)
1587
+
1588
+ ---
1589
+
1590
+ ## Implementation Sequencing
1591
+
1592
+ This feature builds on all existing specs. Recommended order:
1593
+
1594
+ 1. **Interface and loading** — Define the TypeScript interface (including
1595
+ `ruleIdPrefix`, `PluginResultMeta`), factory pattern, CJS module loading,
1596
+ config schema (with separated `pluginLimits`), platform-agnostic path
1597
+ validation, and startup validation.
1598
+
1599
+ 2. **Worker isolation** — Spawn workers, message passing, timeout enforcement
1600
+ via `worker.terminate()`, FIFO queuing, respawn-on-failure, permanent
1601
+ failure tracking.
1602
+
1603
+ 3. **Pre-phase integration** — Wire pre-plugin execution between trust-tier
1604
+ routing and frequency scoring. Flag passthrough to sub-agent. Pre-plugin
1605
+ block semantics (truth table). Audit events with full payload schemas.
1606
+
1607
+ 4. **Post-phase integration** — Wire post-plugin execution after Stage 2.
1608
+ Final merge logic. Audit events.
1609
+
1610
+ 5. **Transform support** — Implement allowTransform, schema validation with
1611
+ MCP context, canonical JSON hash audit, raw mirror preservation.
1612
+
1613
+ 6. **Alerting integration** — Implement `pluginErrorSpike` rule. Wire
1614
+ plugin_error events to alerting layer.
1615
+
1616
+ 7. **Example plugins** — Ship 1-2 reference plugins (a simple regex pattern
1617
+ plugin and a stub external scanner adapter) as documentation and
1618
+ integration test fixtures.
1619
+
1620
+ ---
1621
+
1622
+ ## Relationship to Existing Extension Points
1623
+
1624
+ The Tier 1 pattern library already describes a `customPatterns` config
1625
+ extension (currently documented but not implemented). The pluggable
1626
+ sanitizer interface supersedes that design:
1627
+
1628
+ - `customPatterns` was limited to regex patterns with block/flag actions.
1629
+ - The plugin interface supports arbitrary inspection logic (regex, ML,
1630
+ external services) with a richer result type.
1631
+ - A simple "custom regex patterns" plugin can be shipped as a built-in
1632
+ reference plugin, providing the same functionality `customPatterns` was
1633
+ designed for, but through the standard plugin interface.
1634
+
1635
+ The `customPatterns` config key should not be implemented separately. It
1636
+ should be retired in favor of the plugin interface once this spec ships.
1637
+ The tier1-pattern-library spec should be updated to reference this spec
1638
+ for operator-defined patterns.
1639
+
1640
+ ---
1641
+
1642
+ _Version: 1.2.1_
1643
+ _Date: March 2026_
1644
+ _Status: Draft — all implementation-blocking ambiguities resolved, delta review applied_