@omni-shield/ai-assistant-security-openclaw 1.0.0-beta1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of @omni-shield/ai-assistant-security-openclaw might be problematic. Click here for more details.

package/README.md ADDED
@@ -0,0 +1,54 @@
1
+ # AI Assistant Security Plugin
2
+
3
+ `@omni-shield/ai-assistant-security-openclaw` is a security plugin designed for OpenClaw to protect your Large Language Models (LLM) and Agent lifecycle from harmful requests and sensitive data leakage.
4
+
5
+ ## Key Features
6
+
7
+ - **Multi-dimensional Protection**: Covers LLM requests, pre-tool calls (Before Tool Call), and tool result persistence (Tool Result Persist).
8
+ - **Global Interception**: Hooks into `global.fetch` to provide automated security auditing for underlying model calls.
9
+ - **Smart Degradation (Circuit Breaker)**: Built-in error handling and self-healing logic. Automatically enters degradation mode when security API failures exceed the threshold, ensuring business continuity.
10
+ - **Session Synchronization**: Automatically synchronizes OpenClaw session files to mark intercepted content when a request is blocked.
11
+ - **Risk Label Support**: Supports returning specific risk labels (e.g., PII, Prompt Injection) and displaying them in block messages.
12
+
13
+ ## Quick Start
14
+
15
+ Bundled plugins are disabled by default in OpenClaw. You can enable it using the following command:
16
+
17
+ ```bash
18
+ openclaw plugins enable ai-assistant-security-openclaw
19
+ ```
20
+
21
+ Restart the Gateway after enabling.
22
+
23
+ ## Configuration
24
+
25
+ Configure the plugin in your OpenClaw configuration file:
26
+
27
+ ```yaml
28
+ plugins:
29
+ ai-assistant-security-openclaw:
30
+ enabled: true
31
+ config:
32
+ endpoint: "https://your-security-api-endpoint" # Security API endpoint (Required)
33
+ apiKey: "your-api-key-here" # API Key (Required)
34
+ appId: "your-app-id" # Application Identifier (Required)
35
+ timeoutMs: 5000 # API timeout in ms, default is no limit
36
+ logRecord: true # Enable plugin runtime logs, default is false
37
+ failureThreshold: 3 # Failures before entering degradation, default is 3
38
+ retryInterval: 60 # Initial retry interval in seconds after degradation, default is 60
39
+ maxRetryInterval: 3600 # Max retry interval in seconds, default is 3600
40
+ hooks: # Hook points configuration, default all true
41
+ fetch: true # Whether to hook global.fetch
42
+ beforeToolCall: true # Whether to audit before tool execution
43
+ toolResultPersist: true # Whether to audit before persisting tool results
44
+ ```
45
+
46
+ ## Workflow
47
+
48
+ 1. **Registration & Validation**: Validates the availability of `endpoint`, `apiKey`, and `appId` during startup.
49
+ 2. **Security Audit**:
50
+ - **LLM Requests**: Monitors inputs sent to models.
51
+ - **Pre-Tool Call**: Audits the tool name and its parameters before execution.
52
+ - **Tool Result**: Audits raw data returned by tools to prevent sensitive information leakage.
53
+ 3. **Interception**: If a risk is detected, the plugin returns a block message or rewrites the response content accordingly.
54
+ 4. **Disaster Recovery**: If the security service is unavailable, the plugin automatically bypasses checks to prioritize business availability and periodically probes for service recovery.
package/index.ts ADDED
@@ -0,0 +1,589 @@
1
+ /* eslint-disable max-depth */
2
+ import fs from 'node:fs';
3
+ import path from 'node:path';
4
+ import type { OpenClawPluginApi } from 'openclaw/plugin-sdk';
5
+
6
+ import pkg from './package.json';
7
+ import { LLMShieldClient } from './src/client.js';
8
+ import { getLabelName, type Language } from './src/labels.js';
9
+ import { ContentTypeV2, DecisionTypeV2, type MessageV2 } from './src/types.js';
10
+ import { generateRequestId, getDeviceFingerprint, robustExtractLastUserMessage } from './src/utils.js';
11
+
12
+ function logEvent(api: OpenClawPluginApi, hook: string, data: any, logRecord: boolean): void {
13
+ if (logRecord) {
14
+ api.logger.info(`[${pkg.name}] ${hook} ${JSON.stringify(data)}`);
15
+ }
16
+ }
17
+
18
+ let isDegraded = false;
19
+ let isProbing = false;
20
+ let consecutiveFailures = 0;
21
+ let lastRetryTime = 0;
22
+ let failureThreshold = 3;
23
+ let baseRetryIntervalMs = 60 * 1000; // Default 1 min
24
+ let currentRetryIntervalMs = baseRetryIntervalMs;
25
+ let maxRetryIntervalMs = 3600 * 1000; // Max 1 hour
26
+ let deviceFingerprint = '';
27
+
28
+ async function syncSessionContent(api: OpenClawPluginApi, originalContent: string, blockReason: string) {
29
+ try {
30
+ const pluginCfg = (api.pluginConfig ?? {}) as any;
31
+ const openClawDir = pluginCfg.openClawDir
32
+ ? api.resolvePath(pluginCfg.openClawDir)
33
+ : api.resolvePath('..');
34
+ const agentsDir = path.join(openClawDir, 'agents');
35
+
36
+ if (!fs.existsSync(agentsDir)) {
37
+ api.logger.error(`[${pkg.name}] Agents directory not found at ${agentsDir}`);
38
+ return;
39
+ }
40
+
41
+ const agentDirs = fs
42
+ .readdirSync(agentsDir)
43
+ .filter((f) => fs.statSync(path.join(agentsDir, f)).isDirectory());
44
+ let totalChanged = false;
45
+
46
+ for (const agentName of agentDirs) {
47
+ const sessionsJsonPath = path.join(agentsDir, agentName, 'sessions', 'sessions.json');
48
+ if (!fs.existsSync(sessionsJsonPath)) continue;
49
+
50
+ const sessionsData = JSON.parse(await fs.promises.readFile(sessionsJsonPath, 'utf-8'));
51
+
52
+ for (const key of Object.keys(sessionsData)) {
53
+ const sessionInfo = sessionsData[key];
54
+ const { sessionId } = sessionInfo;
55
+ if (!sessionId) continue;
56
+
57
+ // Use sessionFile field first, otherwise fall back to the default path
58
+ const sessionFilePath = sessionInfo.sessionFile
59
+ ? path.isAbsolute(sessionInfo.sessionFile)
60
+ ? sessionInfo.sessionFile
61
+ : path.join(agentsDir, agentName, 'sessions', sessionInfo.sessionFile)
62
+ : path.join(agentsDir, agentName, 'sessions', `${sessionId}.jsonl`);
63
+
64
+ if (!fs.existsSync(sessionFilePath)) continue;
65
+
66
+ const lockFilePath = `${sessionFilePath}.lock`;
67
+
68
+ // Wait for the lock file to disappear (query every 1s, wait up to 30s)
69
+ let lockAcquired = false;
70
+ const lockStartTime = Date.now();
71
+ const lockTimeout = 30000; // 30s
72
+
73
+ while (Date.now() - lockStartTime < lockTimeout) {
74
+ if (!fs.existsSync(lockFilePath)) {
75
+ lockAcquired = true;
76
+ break;
77
+ }
78
+ await new Promise((resolve) => setTimeout(resolve, 1000));
79
+ }
80
+
81
+ if (!lockAcquired) {
82
+ api.logger.error(
83
+ `[${pkg.name}] Failed to acquire lock for session ${sessionId} after 30s timeout, skipping sync.`
84
+ );
85
+ continue;
86
+ }
87
+
88
+ // Create lock file
89
+ try {
90
+ const lockContent = {
91
+ pid: process.pid,
92
+ createdAt: new Date().toISOString()
93
+ };
94
+ await fs.promises.writeFile(lockFilePath, JSON.stringify(lockContent, null, 2));
95
+
96
+ const fileContent = await fs.promises.readFile(sessionFilePath, 'utf-8');
97
+ const lines = fileContent.split('\n');
98
+ let changed = false;
99
+
100
+ const newLines = lines.map((line) => {
101
+ if (!line.trim()) return line;
102
+ try {
103
+ const entry = JSON.parse(line);
104
+ // Match message type and role as user
105
+ if (entry.type === 'message' && entry.message?.role === 'user') {
106
+ const contentParts = entry.message.content;
107
+ if (Array.isArray(contentParts)) {
108
+ for (const part of contentParts) {
109
+ if (part.type === 'text' && typeof part.text === 'string') {
110
+ // Check if the content of this session file contains the original content being checked
111
+ if (part.text.includes(originalContent) && !part.text.includes(blockReason)) {
112
+ // Insert marker in original content (prefer before \n[message_id:)
113
+ part.text = part.text.replace(
114
+ originalContent,
115
+ insertBlockMarker(originalContent, blockReason)
116
+ );
117
+ changed = true;
118
+ }
119
+ }
120
+ }
121
+ }
122
+ }
123
+ return JSON.stringify(entry);
124
+ } catch (e) {
125
+ return line;
126
+ }
127
+ });
128
+
129
+ if (changed) {
130
+ await fs.promises.writeFile(sessionFilePath, newLines.join('\n'));
131
+ totalChanged = true;
132
+ }
133
+ } finally {
134
+ // Release lock file
135
+ if (fs.existsSync(lockFilePath)) {
136
+ await fs.promises.unlink(lockFilePath).catch((err) => {
137
+ api.logger.error(`[${pkg.name}] Failed to remove lock file ${lockFilePath}: ${err}`);
138
+ });
139
+ }
140
+ }
141
+
142
+ if (totalChanged) break;
143
+ }
144
+ if (totalChanged) break;
145
+ }
146
+ } catch (e) {
147
+ api.logger.error(`[${pkg.name}] Failed to sync session content: ${e}`);
148
+ }
149
+ }
150
+
151
+ async function moderate(
152
+ api: OpenClawPluginApi,
153
+ client: LLMShieldClient,
154
+ appId: string,
155
+ content: string,
156
+ role: string,
157
+ source: string,
158
+ logRecord: boolean,
159
+ history?: MessageV2[]
160
+ ): Promise<{ decision?: DecisionTypeV2; labels: string[] }> {
161
+ // If in degradation state, check if it's time to retry
162
+ if (isDegraded) {
163
+ const now = Date.now();
164
+ if (now - lastRetryTime > currentRetryIntervalMs && !isProbing) {
165
+ isProbing = true;
166
+ api.logger.info(`[${pkg.name}] In degradation state, sending single probe request...`);
167
+ try {
168
+ await client.moderate(
169
+ {
170
+ Message: {
171
+ Role: 'user',
172
+ Content: 'hello',
173
+ ContentType: ContentTypeV2.TEXT
174
+ },
175
+ Scene: appId
176
+ },
177
+ {
178
+ 'X-Ai-Device-Fingerprint': deviceFingerprint
179
+ }
180
+ );
181
+ // Probe successful, recovered
182
+ api.logger.info(`[${pkg.name}] Endpoint recovered, resetting degradation flag.`);
183
+ isDegraded = false;
184
+ isProbing = false;
185
+ consecutiveFailures = 0;
186
+ currentRetryIntervalMs = baseRetryIntervalMs;
187
+ } catch (e) {
188
+ lastRetryTime = Date.now();
189
+ isProbing = false;
190
+ // Exponential backoff
191
+ currentRetryIntervalMs = Math.min(currentRetryIntervalMs * 2, maxRetryIntervalMs);
192
+ api.logger.warn(
193
+ `[${pkg.name}] Probe failed, next retry in ${Math.round(currentRetryIntervalMs / 1000)}s.`
194
+ );
195
+ return { labels: [] };
196
+ }
197
+ } else {
198
+ // Still in degradation period or another probe is in flight, pass directly
199
+ return { labels: [] };
200
+ }
201
+ }
202
+
203
+ const requestId = generateRequestId();
204
+ logEvent(
205
+ api,
206
+ `${source}(check)`,
207
+ { content, role, appId, requestId, historyCount: history?.length },
208
+ logRecord
209
+ );
210
+
211
+ let attempt = 0;
212
+ const maxAttempts = 2; // 1 original + 1 retry for transient errors
213
+
214
+ while (attempt < maxAttempts) {
215
+ try {
216
+ const response = await client.moderate(
217
+ {
218
+ Message: {
219
+ Role: role,
220
+ Content: content,
221
+ ContentType: ContentTypeV2.TEXT
222
+ },
223
+ Scene: appId,
224
+ History: history
225
+ },
226
+ {
227
+ 'X-Top-Request-Id': requestId,
228
+ 'X-Ai-Device-Fingerprint': deviceFingerprint
229
+ }
230
+ );
231
+ logEvent(api, `${source}(result)`, { response, requestId }, logRecord);
232
+
233
+ // Successful call, reset consecutive failures count
234
+ consecutiveFailures = 0;
235
+ currentRetryIntervalMs = baseRetryIntervalMs;
236
+
237
+ const decision = response.Result?.Decision?.DecisionType;
238
+ const labels = response.Result?.RiskInfo?.Risks?.map((r) => r.Label) || [];
239
+ return { decision, labels };
240
+ } catch (error: any) {
241
+ attempt++;
242
+ const isTimeout = error?.name === 'AbortError' || error?.message?.includes('timeout');
243
+ const isTransient = isTimeout || (error?.status >= 500 && error?.status < 600);
244
+ const errorMsg = isTimeout ? 'Moderation timed out' : String(error);
245
+
246
+ if (isTransient && attempt < maxAttempts) {
247
+ api.logger.warn(
248
+ `[${pkg.name}] Transient error (${errorMsg}), retrying... (${attempt}/${maxAttempts - 1})`
249
+ );
250
+ await new Promise((resolve) => setTimeout(resolve, 500)); // Short delay before retry
251
+ continue;
252
+ }
253
+
254
+ consecutiveFailures++;
255
+ logEvent(api, `${source}(error)`, { error: errorMsg, requestId, consecutiveFailures }, logRecord);
256
+ console.error(
257
+ `Moderation failed (${source}) [RID:${requestId}] [Failures:${consecutiveFailures}]:`,
258
+ errorMsg
259
+ );
260
+
261
+ // Check if circuit breaker threshold is reached
262
+ if (consecutiveFailures >= failureThreshold) {
263
+ isDegraded = true;
264
+ lastRetryTime = Date.now();
265
+ api.logger.error(
266
+ `[${
267
+ pkg.name
268
+ }] Consecutive failures reached threshold (${failureThreshold}), entering degradation state. Next retry in ${Math.round(
269
+ currentRetryIntervalMs / 1000
270
+ )}s.`
271
+ );
272
+ }
273
+
274
+ return { labels: [] };
275
+ }
276
+ }
277
+
278
+ return { labels: [] };
279
+ }
280
+
281
+ const BLOCK_MARKER_PREFIX = '[Block By AI Assistant Security. Reason';
282
+
283
+ function preprocessContent(content: string): string {
284
+ if (typeof content !== 'string') return content;
285
+ const blockIndex = content.lastIndexOf(BLOCK_MARKER_PREFIX);
286
+ if (blockIndex === -1) return content;
287
+
288
+ // Try to match the timestamp at the beginning, e.g., [Tue 2026-02-10 20:36 GMT+8]
289
+ const timestampEndIndex = content.indexOf(']');
290
+ if (content.startsWith('[') && timestampEndIndex !== -1 && timestampEndIndex < blockIndex) {
291
+ const timestamp = content.substring(0, timestampEndIndex + 1);
292
+ const blockedPart = content.substring(blockIndex);
293
+ return `${timestamp} ${blockedPart}`;
294
+ }
295
+
296
+ // If no timestamp is recognized at the beginning, only keep the block marker and subsequent content
297
+ return content.substring(blockIndex);
298
+ }
299
+
300
+ function insertBlockMarker(content: string, marker: string): string {
301
+ if (typeof content !== 'string') return content;
302
+ const suffixMarker = '\n[message_id:';
303
+ const index = content.lastIndexOf(suffixMarker);
304
+ if (index !== -1) {
305
+ return `${content.slice(0, index)} ${marker}${content.slice(index)}`;
306
+ }
307
+ return `${content} ${marker}`;
308
+ }
309
+
310
+ function getBlockReason(content: string, labels: string[]): string {
311
+ // Use English to get label names
312
+ const lang: Language = 'en';
313
+
314
+ // Get unique label names
315
+ const uniqueLabelNames = Array.from(new Set(labels.map((l) => getLabelName(l, lang))));
316
+ const labelText = uniqueLabelNames.length > 0 ? uniqueLabelNames.join(', ') : 'Inappropriate content';
317
+
318
+ return `${BLOCK_MARKER_PREFIX} ${labelText}]`;
319
+ }
320
+
321
+ const hookGlobalFetch = (() => {
322
+ let isHooked = false;
323
+ return (api: OpenClawPluginApi, client: LLMShieldClient, appId: string, logRecord: boolean) => {
324
+ if (isHooked) {
325
+ return;
326
+ }
327
+ isHooked = true;
328
+ const oldFetch = global.fetch;
329
+ const newFetch: typeof oldFetch = async function (...args) {
330
+ const url = args[0]?.toString() || '';
331
+ const options = (args[1] as RequestInit) || {};
332
+
333
+ // pii / prompt injection
334
+ if (options.body) {
335
+ let messagesToModerate: { role: string; content: string }[] = [];
336
+ let rawBody: string | undefined, jsonBody: any;
337
+ let bodyChanged = false;
338
+
339
+ if (typeof options.body === 'string') {
340
+ rawBody = options.body;
341
+ } else if (options.body instanceof Uint8Array || options.body instanceof ArrayBuffer) {
342
+ rawBody = new TextDecoder().decode(options.body);
343
+ }
344
+
345
+ if (rawBody) {
346
+ try {
347
+ jsonBody = JSON.parse(rawBody);
348
+
349
+ // Preprocess user messages in history: remove previous content if a block marker is identified
350
+ if (jsonBody && Array.isArray(jsonBody.messages) && jsonBody.messages.length > 1) {
351
+ for (let i = 0; i < jsonBody.messages.length - 1; i++) {
352
+ const m = jsonBody.messages[i];
353
+ if (m.role === 'user' && typeof m.content === 'string') {
354
+ const newContent = preprocessContent(m.content);
355
+ if (newContent !== m.content) {
356
+ m.content = newContent;
357
+ bodyChanged = true;
358
+ }
359
+ }
360
+ }
361
+ }
362
+
363
+ messagesToModerate = robustExtractLastUserMessage(jsonBody);
364
+ } catch (e) {
365
+ logEvent(api, 'json_parse_failed', { url, error: String(e) }, logRecord);
366
+ }
367
+ }
368
+
369
+ if (messagesToModerate.length > 0) {
370
+ // 2. Prepare history (at this point, history messages have already been processed by preprocessContent above)
371
+ let historyV2: MessageV2[] | undefined;
372
+ if (jsonBody && Array.isArray(jsonBody.messages) && jsonBody.messages.length > 1) {
373
+ // Filter out system messages and only take the most recent 5
374
+ const historyMessages = jsonBody.messages
375
+ .slice(0, -1)
376
+ .filter((m: any) => m.role !== 'system')
377
+ .slice(-5);
378
+
379
+ historyV2 = historyMessages.map((m: any) => ({
380
+ Role: m.role || 'user',
381
+ Content: typeof m.content === 'string' ? m.content : JSON.stringify(m.content),
382
+ ContentType: ContentTypeV2.TEXT
383
+ }));
384
+ }
385
+
386
+ const msg = messagesToModerate[0];
387
+ const { decision, labels } = await moderate(
388
+ api,
389
+ client,
390
+ appId,
391
+ msg.content,
392
+ msg.role,
393
+ 'llm_request',
394
+ logRecord,
395
+ historyV2
396
+ );
397
+
398
+ if (decision === DecisionTypeV2.BLOCK) {
399
+ const blockReason = getBlockReason(msg.content, labels);
400
+ // 3. Update Session file asynchronously (background processing)
401
+ syncSessionContent(api, msg.content, blockReason);
402
+
403
+ logEvent(api, 'llm_request(block)', { blockReason, originalContent: msg.content }, logRecord);
404
+ if (jsonBody && Array.isArray(jsonBody.messages) && jsonBody.messages.length > 0) {
405
+ // Intercept the last message: insert block marker (prefer before \n[message_id:)
406
+ const lastMsg = jsonBody.messages[jsonBody.messages.length - 1];
407
+ lastMsg.content = insertBlockMarker(lastMsg.content, blockReason);
408
+ bodyChanged = true;
409
+ } else if (jsonBody && typeof jsonBody.prompt === 'string') {
410
+ jsonBody.prompt = insertBlockMarker(jsonBody.prompt, blockReason);
411
+ bodyChanged = true;
412
+ } else if (jsonBody && typeof jsonBody.input === 'string') {
413
+ jsonBody.input = insertBlockMarker(jsonBody.input, blockReason);
414
+ bodyChanged = true;
415
+ }
416
+ }
417
+
418
+ if (bodyChanged) {
419
+ options.body = JSON.stringify(jsonBody);
420
+ }
421
+ }
422
+ }
423
+
424
+ const resp = await oldFetch.apply(this, args);
425
+ return resp;
426
+ };
427
+ global.fetch = newFetch;
428
+ };
429
+ })();
430
+
431
+ const plugin = {
432
+ id: 'ai-assistant-security-openclaw',
433
+ name: pkg.name,
434
+ description:
435
+ 'AI Assistant Security plugin for OpenClaw, to protect your LLM models and Agent lifecycle (including tool calls) from harmful requests.',
436
+ register(api: OpenClawPluginApi): void {
437
+ const pluginCfg = (api.pluginConfig ?? {}) as any;
438
+ const { endpoint, apiKey, appId } = pluginCfg;
439
+
440
+ // Calculate device fingerprint once during registration
441
+ if (!deviceFingerprint) {
442
+ deviceFingerprint = getDeviceFingerprint();
443
+ }
444
+
445
+ console.log(`[${pkg.name}] Device Fingerprint: ${deviceFingerprint}`);
446
+
447
+ // Update global configuration (if provided)
448
+ if (pluginCfg.failureThreshold !== undefined) {
449
+ failureThreshold = Number(pluginCfg.failureThreshold);
450
+ }
451
+ if (pluginCfg.retryInterval !== undefined) {
452
+ baseRetryIntervalMs = Number(pluginCfg.retryInterval) * 1000;
453
+ currentRetryIntervalMs = baseRetryIntervalMs;
454
+ }
455
+ if (pluginCfg.maxRetryInterval !== undefined) {
456
+ maxRetryIntervalMs = Number(pluginCfg.maxRetryInterval) * 1000;
457
+ }
458
+
459
+ // 1. Validate if apiKey and appId are empty
460
+ if (!apiKey || !appId) {
461
+ api.logger.error(
462
+ `[${pkg.name}] Registration failed: apiKey or appId is empty, please check the configuration.`
463
+ );
464
+ return;
465
+ }
466
+
467
+ // 2. Validate if endpoint is empty
468
+ if (!endpoint) {
469
+ api.logger.error(
470
+ `[${pkg.name}] Registration failed: endpoint is empty, please check the configuration.`
471
+ );
472
+ return;
473
+ }
474
+
475
+ const client = new LLMShieldClient({
476
+ baseUrl: endpoint,
477
+ apiKey,
478
+ timeoutMs: pluginCfg.timeoutMs ? Number(pluginCfg.timeoutMs) : undefined
479
+ });
480
+ const logRecord = !!pluginCfg.logRecord;
481
+ const hooksCfg = pluginCfg.hooks || {};
482
+ const enableFetch = hooksCfg.fetch !== false;
483
+ const enableBeforeToolCall = hooksCfg.beforeToolCall !== false;
484
+ const enableToolResultPersist = hooksCfg.toolResultPersist !== false;
485
+
486
+ // 3. Asynchronously validate endpoint connectivity and configuration
487
+ (async () => {
488
+ api.logger.info(`[${pkg.name}] Verifying configuration with moderate interface: ${endpoint}...`);
489
+ try {
490
+ await client.moderate(
491
+ {
492
+ Message: {
493
+ Role: 'user',
494
+ Content: 'hello',
495
+ ContentType: ContentTypeV2.TEXT
496
+ },
497
+ Scene: appId
498
+ },
499
+ {
500
+ 'X-Ai-Device-Fingerprint': deviceFingerprint
501
+ }
502
+ );
503
+ } catch (e: any) {
504
+ api.logger.error(
505
+ `[${
506
+ pkg.name
507
+ }] Registration failed: Verification failed for endpoint ${endpoint}. Please check your network, apiKey, or appId configuration. Error: ${
508
+ e.message || e
509
+ }`
510
+ );
511
+ return;
512
+ }
513
+
514
+ // 4. Validation passed, register hook points
515
+ if (enableFetch) {
516
+ hookGlobalFetch(api, client, appId, logRecord);
517
+ }
518
+
519
+ // risk operations / pii
520
+ if (enableBeforeToolCall) {
521
+ api.on('before_tool_call', async (event) => {
522
+ const content = `Tool: ${event.toolName}, Params: ${JSON.stringify(event.params)}`;
523
+ const { decision, labels } = await moderate(
524
+ api,
525
+ client,
526
+ appId,
527
+ content,
528
+ 'assistant',
529
+ 'before_tool_call',
530
+ logRecord
531
+ );
532
+ if (decision === DecisionTypeV2.BLOCK) {
533
+ const blockReason = getBlockReason(content, labels);
534
+ logEvent(api, 'before_tool_call(block)', { blockReason, originalContent: content }, logRecord);
535
+ return { block: true, blockReason };
536
+ }
537
+ });
538
+ }
539
+
540
+ // risk operations / prompt injection
541
+ if (enableToolResultPersist) {
542
+ api.on('tool_result_persist', async (event) => {
543
+ // The actual content to be checked is event.message.content
544
+ const content =
545
+ typeof event.message?.content === 'string'
546
+ ? event.message.content
547
+ : JSON.stringify(event.message?.content || '');
548
+ const { decision, labels } = await moderate(
549
+ api,
550
+ client,
551
+ appId,
552
+ content,
553
+ 'tool',
554
+ 'tool_result_persist',
555
+ logRecord
556
+ );
557
+ if (decision === DecisionTypeV2.BLOCK) {
558
+ const blockReason = getBlockReason(content, labels);
559
+ logEvent(api, 'tool_result_persist(block)', { blockReason, originalContent: content }, logRecord);
560
+
561
+ // If hit, do not return block, but rewrite content and details with reason
562
+ const interceptedData = {
563
+ error: 'llm_shield_intercepted',
564
+ message: 'Your request has been intercepted by the LLM Application Firewall.',
565
+ reason: blockReason
566
+ };
567
+
568
+ event.message.content = [
569
+ {
570
+ type: 'text',
571
+ text: JSON.stringify(interceptedData, null, 2)
572
+ }
573
+ ];
574
+ event.message.details = interceptedData;
575
+
576
+ // Do not return block
577
+ return;
578
+ }
579
+ });
580
+ }
581
+
582
+ api.logger.info(
583
+ `[${pkg.name}] Plugin successfully initialized and registered hook points (fetch:${enableFetch}, beforeToolCall:${enableBeforeToolCall}, toolResultPersist:${enableToolResultPersist}).`
584
+ );
585
+ })();
586
+ }
587
+ };
588
+
589
+ export default plugin;
@@ -0,0 +1,73 @@
1
+ {
2
+ "id": "ai-assistant-security-openclaw",
3
+ "name": "AI Assistant Security",
4
+ "description": "AI Assistant Security plugin for OpenClaw, to protect your LLM models and Agent lifecycle (including tool calls) from harmful requests.",
5
+ "configSchema": {
6
+ "type": "object",
7
+ "additionalProperties": false,
8
+ "properties": {
9
+ "endpoint": {
10
+ "type": "string",
11
+ "description": "The API endpoint for the AI Assistant Security service."
12
+ },
13
+ "apiKey": {
14
+ "type": "string",
15
+ "description": "The API key for AI Assistant Security service."
16
+ },
17
+ "appId": {
18
+ "type": "string",
19
+ "description": "Application scenario, used to identify the business context of the current call."
20
+ },
21
+ "openClawDir": {
22
+ "type": "string",
23
+ "description": "The root directory of OpenClaw. If not provided, it defaults to the parent directory of the plugin's resolved path."
24
+ },
25
+ "logRecord": {
26
+ "type": "boolean",
27
+ "description": "Whether to enable log recording.",
28
+ "default": false
29
+ },
30
+ "failureThreshold": {
31
+ "type": "integer",
32
+ "description": "The number of consecutive failures before triggering degradation. Default is 3.",
33
+ "default": 3
34
+ },
35
+ "retryInterval": {
36
+ "type": "integer",
37
+ "description": "The base interval in seconds to retry probing after degradation. Will increase exponentially. Default is 60 (1 minute).",
38
+ "default": 60
39
+ },
40
+ "maxRetryInterval": {
41
+ "type": "integer",
42
+ "description": "The maximum interval in seconds for exponential backoff. Default is 3600 (1 hour).",
43
+ "default": 3600
44
+ },
45
+ "timeoutMs": {
46
+ "type": "integer",
47
+ "description": "Request timeout in milliseconds for the moderation API. Default is 30000.",
48
+ "default": 30000
49
+ },
50
+ "hooks": {
51
+ "type": "object",
52
+ "description": "Enable or disable specific hook points.",
53
+ "properties": {
54
+ "fetch": {
55
+ "type": "boolean",
56
+ "description": "Enable global fetch hook for LLM requests.",
57
+ "default": true
58
+ },
59
+ "beforeToolCall": {
60
+ "type": "boolean",
61
+ "description": "Enable hook for before_tool_call event.",
62
+ "default": true
63
+ },
64
+ "toolResultPersist": {
65
+ "type": "boolean",
66
+ "description": "Enable hook for tool_result_persist event.",
67
+ "default": true
68
+ }
69
+ }
70
+ }
71
+ }
72
+ }
73
+ }
package/package.json ADDED
@@ -0,0 +1,34 @@
1
+ {
2
+ "name": "@omni-shield/ai-assistant-security-openclaw",
3
+ "version": "1.0.0-beta1",
4
+ "type": "module",
5
+ "description": "AI Assistant Security plugin for OpenClaw, to protect your LLM models and Agent lifecycle (including tool calls) from harmful requests.",
6
+ "files": [
7
+ "package.json",
8
+ "index.ts",
9
+ "src",
10
+ "openclaw.plugin.json",
11
+ "README.md"
12
+ ],
13
+ "scripts": {
14
+ "publish:dev": "node scripts/publish.js --dev",
15
+ "publish:prod": "node scripts/publish.js"
16
+ },
17
+ "peerDependencies": {
18
+ "openclaw": ">=2026.1.26"
19
+ },
20
+ "openclaw": {
21
+ "extensions": [
22
+ "./index.ts"
23
+ ]
24
+ },
25
+ "devDependencies": {
26
+ "@types/node": "^25.2.2"
27
+ },
28
+ "dependencies": {
29
+ "node-machine-id": "^1.1.12"
30
+ },
31
+ "publishConfig": {
32
+ "access": "public"
33
+ }
34
+ }
package/src/client.ts ADDED
@@ -0,0 +1,127 @@
1
+ import {
2
+ ModerateV2Request,
3
+ ModerateV2Response,
4
+ } from "./types.js";
5
+
6
+ /** HTTP request error */
7
+ export class HttpError extends Error {
8
+ readonly status: number;
9
+ readonly statusText: string;
10
+ readonly body?: unknown;
11
+
12
+ constructor(message: string, status: number, statusText: string, body?: unknown) {
13
+ super(message);
14
+ this.name = "HttpError";
15
+ this.status = status;
16
+ this.statusText = statusText;
17
+ this.body = body;
18
+ }
19
+ }
20
+
21
+ export interface ClientOptions {
22
+ /** Backend service base URL, e.g., https://xxx.byted.org */
23
+ baseUrl: string;
24
+ /** Corresponds to api_key in Go SDK */
25
+ apiKey: string;
26
+ /**
27
+ * Optional: custom fetch implementation (defaults to globalThis.fetch).
28
+ * In Node < 18, a polyfill must be provided.
29
+ */
30
+ fetchFn?: typeof fetch;
31
+ /** Timeout in milliseconds (defaults to 30000) */
32
+ timeoutMs?: number;
33
+ }
34
+
35
+ /**
36
+ * LLM Shield v2 TypeScript client, covers v2 capabilities only.
37
+ */
38
+ export class LLMShieldClient {
39
+ private readonly baseUrl: string;
40
+ private readonly apiKey: string;
41
+ private readonly fetchFn: typeof fetch;
42
+ private readonly timeoutMs: number;
43
+
44
+ constructor(options: ClientOptions) {
45
+ this.baseUrl = options.baseUrl.replace(/\/$/, "");
46
+ this.apiKey = options.apiKey;
47
+ this.timeoutMs = options.timeoutMs ?? 30000;
48
+ const fn = options.fetchFn ?? (globalThis as unknown as { fetch: typeof fetch }).fetch;
49
+ if (!fn) {
50
+ throw new Error("global fetch is unavailable. Please provide a fetch polyfill in your environment or pass an implementation via fetchFn.");
51
+ }
52
+ this.fetchFn = fn.bind(globalThis);
53
+ }
54
+
55
+ // -------- Internal utility methods --------
56
+
57
+ private async postJson<TReq, TRes>(path: string, body: TReq, extraHeaders?: Record<string, string>): Promise<TRes> {
58
+ const url = `${this.baseUrl}${path}`;
59
+ const controller = new AbortController();
60
+ const timeoutId = setTimeout(() => controller.abort(), this.timeoutMs);
61
+
62
+ try {
63
+ const resp = await this.fetchFn(url, {
64
+ method: "POST",
65
+ headers: {
66
+ "Content-Type": "application/json",
67
+ "x-api-key": this.apiKey,
68
+ ...extraHeaders,
69
+ },
70
+ body: JSON.stringify(body ?? {}),
71
+ signal: controller.signal,
72
+ });
73
+
74
+ const text = await resp.text();
75
+
76
+ if (resp.status !== 200) {
77
+ let parsed: unknown = text;
78
+ try {
79
+ parsed = text ? JSON.parse(text) : text;
80
+ } catch {
81
+ // ignore JSON parse error
82
+ }
83
+ throw new HttpError(`Request failed with status ${resp.status}`, resp.status, resp.statusText, parsed);
84
+ }
85
+
86
+ try {
87
+ return (text ? JSON.parse(text) : {}) as TRes;
88
+ } catch (e) {
89
+ throw new Error(`JSON parsing failed: ${(e as Error).message}`, { cause: e });
90
+ }
91
+ } finally {
92
+ clearTimeout(timeoutId);
93
+ }
94
+ }
95
+
96
+ // -------- Public methods --------
97
+
98
+ /**
99
+ * Check endpoint connectivity
100
+ */
101
+ async ping(): Promise<boolean> {
102
+ const controller = new AbortController();
103
+ const timeoutId = setTimeout(() => controller.abort(), 5000); // 5s ping timeout
104
+
105
+ try {
106
+ const url = `${this.baseUrl}/v2/moderate`;
107
+ const resp = await this.fetchFn(url, {
108
+ method: "OPTIONS", // Use OPTIONS or a simple GET to check connectivity
109
+ signal: controller.signal,
110
+ });
111
+ // As long as there is a response (even 404 or 405), the endpoint is reachable
112
+ return !!resp.status;
113
+ } catch (e) {
114
+ return false;
115
+ } finally {
116
+ clearTimeout(timeoutId);
117
+ }
118
+ }
119
+
120
+ /**
121
+ * Non-streaming moderation, corresponds to Go Client.Moderate.
122
+ */
123
+ async moderate(request?: ModerateV2Request, extraHeaders?: Record<string, string>): Promise<ModerateV2Response> {
124
+ const body: ModerateV2Request = request ?? {};
125
+ return this.postJson<ModerateV2Request, ModerateV2Response>("/v2/moderate", body, extraHeaders);
126
+ }
127
+ }
package/src/labels.ts ADDED
@@ -0,0 +1,57 @@
1
+ export interface LabelTranslation {
2
+ zh: string;
3
+ en: string;
4
+ }
5
+
6
+ export const LabelToTranslationMap: Record<string, LabelTranslation> = {
7
+ '10102000': { zh: '敏感内容', en: 'Sensitive Content' },
8
+ '10103005': { zh: '谩骂', en: 'Abuse' },
9
+ '10104000': { zh: '色情', en: 'Pornography' },
10
+ '10107000': { zh: '敏感内容', en: 'Sensitive Content' },
11
+ '10109000': { zh: '商业敏感内容', en: 'Commercial Sensitive Content' },
12
+ '10112000': { zh: '歧视', en: 'Discrimination' },
13
+ '10113002': { zh: '毒品', en: 'Drugs' },
14
+ '10113003': { zh: '赌博', en: 'Gambling' },
15
+ '10113004': { zh: '诈骗', en: 'Fraud' },
16
+ '10116000': { zh: '敏感内容', en: 'Sensitive Content' },
17
+ '10302000': { zh: '银行卡号', en: 'Bank Card Number' },
18
+ '10304000': { zh: '身份证号', en: 'ID Card Number' },
19
+ '10310000': { zh: '电子邮箱', en: 'Email Address' },
20
+ '10313000': { zh: '电话号码', en: 'Phone Number' },
21
+ '10322000': { zh: '隐私数据', en: 'Privacy Data' },
22
+ '10400000': { zh: '提示词攻击', en: 'Prompt Attack' },
23
+ '10401001': { zh: '角色扮演攻击', en: 'Role Playing Attack' },
24
+ '10401002': { zh: '权限提升攻击', en: 'Privilege Escalation Attack' },
25
+ '10401003': { zh: '对抗前后缀攻击', en: 'Adversarial Prefix/Suffix Attack' },
26
+ '10401004': { zh: '目标劫持攻击', en: 'Target Hijacking Attack' },
27
+ '10401005': { zh: '混淆和编码攻击', en: 'Obfuscation and Encoding Attack' },
28
+ '10401008': { zh: '少量示例攻击', en: 'Few-shot Example Attack' },
29
+ '10402003': { zh: '窃取提示词', en: 'Prompt Stealing' },
30
+ '10401013': { zh: 'URL渲染和请求攻击', en: 'URL Rendering and Requesting Attack' },
31
+ '10401007': { zh: '指令补齐攻击', en: 'Instruction Completion Attack' },
32
+ '10401011': { zh: '反向诱导攻击', en: 'Reverse Induction Attack' },
33
+ '10401012': { zh: '代码化描述攻击', en: 'Coded Description Attack' },
34
+ '10402001': { zh: '诱导生成有害内容攻击', en: 'Inducing Harmful Content Attack' },
35
+ '10401014': { zh: '远程代码执行攻击', en: 'Remote Code Execution Attack' },
36
+ '10401015': { zh: '插件投毒攻击', en: 'Plugin Poisoning Attack' },
37
+ '10401016': { zh: '敏感操作', en: 'Sensitive Actions' },
38
+ '10401017': { zh: '静默窃取', en: 'Silent Exfiltration' },
39
+ '10701001': { zh: '高频相似样本攻击', en: 'High-frequency Similar Samples Attack' },
40
+ };
41
+
42
+ export const isUserDefinedLabel = (label: string): boolean => {
43
+ const labelNum = parseInt(label, 10);
44
+ return labelNum >= 50000000 && labelNum <= 50099999;
45
+ };
46
+
47
+ export type Language = 'zh' | 'en';
48
+
49
+ export const getLabelName = (label: string, lang: Language = 'en'): string => {
50
+ if (LabelToTranslationMap[label]) {
51
+ return LabelToTranslationMap[label][lang];
52
+ }
53
+ if (isUserDefinedLabel(label)) {
54
+ return lang === 'zh' ? '用户自定义标签' : 'User Defined Label';
55
+ }
56
+ return label;
57
+ };
package/src/types.ts ADDED
@@ -0,0 +1,152 @@
1
+ /**
2
+ * v2 related type definitions, matching Go SDK structure as much as possible (field names capitalized to maintain JSON compatibility).
3
+ */
4
+
5
+ // ------------ General Metadata ------------
6
+
7
+ export interface ErrorResponse {
8
+ Code: string;
9
+ Message: string;
10
+ }
11
+
12
+ export interface ResponseMetadata {
13
+ Error: ErrorResponse;
14
+ RequestId: string;
15
+ }
16
+
17
+ // ------------ Enum Definitions (consistent with Go int64 constants) ------------
18
+
19
+ export enum ContentTypeV2 {
20
+ TEXT = 1,
21
+ AUDIO = 2,
22
+ IMAGE = 3,
23
+ VIDEO = 4,
24
+ FILE = 5,
25
+ }
26
+
27
+ export enum DecisionTypeV2 {
28
+ PASS = 1,
29
+ BLOCK = 2,
30
+ MARK = 3,
31
+ REPLACE = 4,
32
+ OPTIMIZE = 5,
33
+ }
34
+
35
+ export enum UserAction {
36
+ PASS = 1,
37
+ BLOCK = 2,
38
+ MARK = 3,
39
+ REPLACE = 4,
40
+ }
41
+
42
+ export enum MatchSource {
43
+ UNKNOWN = 0,
44
+ GLOBAL_CONTENTLIB = 1,
45
+ ADMIN_CONTENTLIB = 2,
46
+ USER_CONTENTLIB = 3,
47
+ }
48
+
49
+ // ------------ Core Messages and Multimodality ------------
50
+
51
+ export interface MultiPart {
52
+ /** Content text or link */
53
+ Content: string;
54
+ /** Content type */
55
+ ContentType: ContentTypeV2;
56
+ }
57
+
58
+ export interface MessageV2 {
59
+ /** Message role: user/assistant/system/rag, etc. */
60
+ Role: string;
61
+ /** Text content or link */
62
+ Content: string;
63
+ /** Content type */
64
+ ContentType: ContentTypeV2;
65
+ /** Multimodal content, optional */
66
+ MultiPart?: MultiPart[];
67
+ }
68
+
69
+ // ------------ Moderate v2 Request and Response ------------
70
+
71
+ export interface ModerateV2Request {
72
+ /** Moderation content, required in Go, kept optional here for non-streaming compatibility */
73
+ Message?: MessageV2;
74
+ /** Bound ID for streaming moderation sessions */
75
+ MsgID?: string;
76
+ /** 0: One-time moderation; 1: Streaming; 2: Force send (flush) */
77
+ UseStream?: number;
78
+ /** Scene */
79
+ Scene?: string;
80
+ /** History messages */
81
+ History?: MessageV2[];
82
+ }
83
+
84
+ export interface RiskMatchV2 {
85
+ Word: string;
86
+ Action?: UserAction;
87
+ Source: MatchSource;
88
+ RuleID?: string;
89
+ }
90
+
91
+ export interface PermitMatchV2 {
92
+ Word: string;
93
+ Action?: UserAction;
94
+ Source: MatchSource;
95
+ RuleID?: string;
96
+ }
97
+
98
+ export interface RiskV2 {
99
+ Category: string;
100
+ Label: string;
101
+ Prob?: number;
102
+ Matches?: RiskMatchV2[];
103
+ }
104
+
105
+ export interface RiskInfoV2 {
106
+ Risks: RiskV2[];
107
+ }
108
+
109
+ export interface PermitV2 {
110
+ Category: string;
111
+ Label: string;
112
+ Prob?: number;
113
+ Matches?: PermitMatchV2[];
114
+ }
115
+
116
+ export interface PermitInfoV2 {
117
+ Permits: PermitV2[];
118
+ }
119
+
120
+ export interface BlockDetailV2 {}
121
+
122
+ export interface ReplaceDetailV2 {
123
+ Replacement?: MessageV2;
124
+ }
125
+
126
+ export interface DecisionDetailV2 {
127
+ BlockDetail?: BlockDetailV2;
128
+ ReplaceDetail?: ReplaceDetailV2;
129
+ }
130
+
131
+ export interface DecisionV2 {
132
+ DecisionType: DecisionTypeV2;
133
+ Detail: DecisionDetailV2;
134
+ DecisionStrategyID?: string;
135
+ HitStrategyIDs?: string[];
136
+ }
137
+
138
+ export interface ModerateV2Result {
139
+ MsgID: string;
140
+ RiskInfo?: RiskInfoV2;
141
+ Decision?: DecisionV2;
142
+ PermitInfo?: PermitInfoV2;
143
+ ContentInfo: string;
144
+ Degraded: boolean;
145
+ DegradeReason: string;
146
+ }
147
+
148
+ export interface ModerateV2Response {
149
+ ResponseMetadata: ResponseMetadata;
150
+ Result: ModerateV2Result;
151
+ }
152
+
package/src/utils.ts ADDED
@@ -0,0 +1,78 @@
1
+ import os from "node:os";
2
+ import { machineIdSync } from 'node-machine-id';
3
+
4
+ export function getDeviceFingerprint(): string {
5
+ // 使用 node-machine-id 获取设备唯一标识
6
+ return machineIdSync();
7
+ }
8
+
9
+ export function getLocalIP12(): string {
10
+ const interfaces = os.networkInterfaces();
11
+ for (const name of Object.keys(interfaces)) {
12
+ for (const iface of interfaces[name] || []) {
13
+ if (iface.family === "IPv4" && !iface.internal) {
14
+ return iface.address.split(".").map(part => part.padStart(3, "0")).join("");
15
+ }
16
+ }
17
+ }
18
+ return "000000000000";
19
+ }
20
+
21
+ export function generateRequestId(): string {
22
+ const now = new Date();
23
+ const dateStr = now.getFullYear().toString() +
24
+ (now.getMonth() + 1).toString().padStart(2, "0") +
25
+ now.getDate().toString().padStart(2, "0") +
26
+ now.getHours().toString().padStart(2, "0") +
27
+ now.getMinutes().toString().padStart(2, "0") +
28
+ now.getSeconds().toString().padStart(2, "0");
29
+ const ipStr = getLocalIP12();
30
+ const msStr = now.getMilliseconds().toString().padStart(3, "0");
31
+ const randStr = Math.floor(Math.random() * 0xFFF).toString(16).toUpperCase().padStart(3, "0");
32
+ return dateStr + ipStr + msStr + randStr;
33
+ }
34
+
35
+ export function containsChinese(text: string): boolean {
36
+ return /[\u4e00-\u9fa5]/.test(text);
37
+ }
38
+
39
+ export function robustExtractLastUserMessage(body: any): { role: string; content: string }[] {
40
+ if (!body || typeof body !== "object") return [];
41
+
42
+ // Handle standard OpenAI-like message format
43
+ if (Array.isArray(body.messages) && body.messages.length > 0) {
44
+ const messages = body.messages as any[];
45
+ const lastMessage = messages[messages.length - 1];
46
+
47
+ // Only extract if the role of the last message is "user"
48
+ if (lastMessage.role !== "user") {
49
+ return [];
50
+ }
51
+
52
+ let content = "";
53
+ if (typeof lastMessage.content === "string") {
54
+ content = lastMessage.content;
55
+ }
56
+ // Handle multi-modal content array (e.g. [{ type: "text", text: "..." }])
57
+ else if (Array.isArray(lastMessage.content)) {
58
+ content = lastMessage.content
59
+ .filter((part: any) => part.type === "text" && typeof part.text === "string")
60
+ .map((part: any) => part.text)
61
+ .join("\n");
62
+ }
63
+
64
+ if (content.length > 0) {
65
+ return [{
66
+ role: "user",
67
+ content: content,
68
+ }];
69
+ }
70
+ return [];
71
+ }
72
+
73
+ // Handle other possible formats (e.g. anthropic, or top-level prompt)
74
+ if (typeof body.prompt === "string") return [{ role: "user", content: body.prompt }];
75
+ if (typeof body.input === "string") return [{ role: "user", content: body.input }];
76
+
77
+ return [];
78
+ }