@omni-shield/ai-assistant-security-openclaw 1.0.0-beta1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of @omni-shield/ai-assistant-security-openclaw might be problematic. Click here for more details.
- package/README.md +54 -0
- package/index.ts +589 -0
- package/openclaw.plugin.json +73 -0
- package/package.json +34 -0
- package/src/client.ts +127 -0
- package/src/labels.ts +57 -0
- package/src/types.ts +152 -0
- package/src/utils.ts +78 -0
package/README.md
ADDED
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
# AI Assistant Security Plugin
|
|
2
|
+
|
|
3
|
+
`@omni-shield/ai-assistant-security-openclaw` is a security plugin designed for OpenClaw to protect your Large Language Models (LLM) and Agent lifecycle from harmful requests and sensitive data leakage.
|
|
4
|
+
|
|
5
|
+
## Key Features
|
|
6
|
+
|
|
7
|
+
- **Multi-dimensional Protection**: Covers LLM requests, pre-tool calls (Before Tool Call), and tool result persistence (Tool Result Persist).
|
|
8
|
+
- **Global Interception**: Hooks into `global.fetch` to provide automated security auditing for underlying model calls.
|
|
9
|
+
- **Smart Degradation (Circuit Breaker)**: Built-in error handling and self-healing logic. Automatically enters degradation mode when security API failures exceed the threshold, ensuring business continuity.
|
|
10
|
+
- **Session Synchronization**: Automatically synchronizes OpenClaw session files to mark intercepted content when a request is blocked.
|
|
11
|
+
- **Risk Label Support**: Supports returning specific risk labels (e.g., PII, Prompt Injection) and displaying them in block messages.
|
|
12
|
+
|
|
13
|
+
## Quick Start
|
|
14
|
+
|
|
15
|
+
Bundled plugins are disabled by default in OpenClaw. You can enable it using the following command:
|
|
16
|
+
|
|
17
|
+
```bash
|
|
18
|
+
openclaw plugins enable ai-assistant-security-openclaw
|
|
19
|
+
```
|
|
20
|
+
|
|
21
|
+
Restart the Gateway after enabling.
|
|
22
|
+
|
|
23
|
+
## Configuration
|
|
24
|
+
|
|
25
|
+
Configure the plugin in your OpenClaw configuration file:
|
|
26
|
+
|
|
27
|
+
```yaml
|
|
28
|
+
plugins:
|
|
29
|
+
ai-assistant-security-openclaw:
|
|
30
|
+
enabled: true
|
|
31
|
+
config:
|
|
32
|
+
endpoint: "https://your-security-api-endpoint" # Security API endpoint (Required)
|
|
33
|
+
apiKey: "your-api-key-here" # API Key (Required)
|
|
34
|
+
appId: "your-app-id" # Application Identifier (Required)
|
|
35
|
+
timeoutMs: 5000 # API timeout in ms, default is no limit
|
|
36
|
+
logRecord: true # Enable plugin runtime logs, default is false
|
|
37
|
+
failureThreshold: 3 # Failures before entering degradation, default is 3
|
|
38
|
+
retryInterval: 60 # Initial retry interval in seconds after degradation, default is 60
|
|
39
|
+
maxRetryInterval: 3600 # Max retry interval in seconds, default is 3600
|
|
40
|
+
hooks: # Hook points configuration, default all true
|
|
41
|
+
fetch: true # Whether to hook global.fetch
|
|
42
|
+
beforeToolCall: true # Whether to audit before tool execution
|
|
43
|
+
toolResultPersist: true # Whether to audit before persisting tool results
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
## Workflow
|
|
47
|
+
|
|
48
|
+
1. **Registration & Validation**: Validates the availability of `endpoint`, `apiKey`, and `appId` during startup.
|
|
49
|
+
2. **Security Audit**:
|
|
50
|
+
- **LLM Requests**: Monitors inputs sent to models.
|
|
51
|
+
- **Pre-Tool Call**: Audits the tool name and its parameters before execution.
|
|
52
|
+
- **Tool Result**: Audits raw data returned by tools to prevent sensitive information leakage.
|
|
53
|
+
3. **Interception**: If a risk is detected, the plugin returns a block message or rewrites the response content accordingly.
|
|
54
|
+
4. **Disaster Recovery**: If the security service is unavailable, the plugin automatically bypasses checks to prioritize business availability and periodically probes for service recovery.
|
package/index.ts
ADDED
|
@@ -0,0 +1,589 @@
|
|
|
1
|
+
/* eslint-disable max-depth */
|
|
2
|
+
import fs from 'node:fs';
|
|
3
|
+
import path from 'node:path';
|
|
4
|
+
import type { OpenClawPluginApi } from 'openclaw/plugin-sdk';
|
|
5
|
+
|
|
6
|
+
import pkg from './package.json';
|
|
7
|
+
import { LLMShieldClient } from './src/client.js';
|
|
8
|
+
import { getLabelName, type Language } from './src/labels.js';
|
|
9
|
+
import { ContentTypeV2, DecisionTypeV2, type MessageV2 } from './src/types.js';
|
|
10
|
+
import { generateRequestId, getDeviceFingerprint, robustExtractLastUserMessage } from './src/utils.js';
|
|
11
|
+
|
|
12
|
+
function logEvent(api: OpenClawPluginApi, hook: string, data: any, logRecord: boolean): void {
|
|
13
|
+
if (logRecord) {
|
|
14
|
+
api.logger.info(`[${pkg.name}] ${hook} ${JSON.stringify(data)}`);
|
|
15
|
+
}
|
|
16
|
+
}
|
|
17
|
+
|
|
18
|
+
let isDegraded = false;
|
|
19
|
+
let isProbing = false;
|
|
20
|
+
let consecutiveFailures = 0;
|
|
21
|
+
let lastRetryTime = 0;
|
|
22
|
+
let failureThreshold = 3;
|
|
23
|
+
let baseRetryIntervalMs = 60 * 1000; // Default 1 min
|
|
24
|
+
let currentRetryIntervalMs = baseRetryIntervalMs;
|
|
25
|
+
let maxRetryIntervalMs = 3600 * 1000; // Max 1 hour
|
|
26
|
+
let deviceFingerprint = '';
|
|
27
|
+
|
|
28
|
+
async function syncSessionContent(api: OpenClawPluginApi, originalContent: string, blockReason: string) {
|
|
29
|
+
try {
|
|
30
|
+
const pluginCfg = (api.pluginConfig ?? {}) as any;
|
|
31
|
+
const openClawDir = pluginCfg.openClawDir
|
|
32
|
+
? api.resolvePath(pluginCfg.openClawDir)
|
|
33
|
+
: api.resolvePath('..');
|
|
34
|
+
const agentsDir = path.join(openClawDir, 'agents');
|
|
35
|
+
|
|
36
|
+
if (!fs.existsSync(agentsDir)) {
|
|
37
|
+
api.logger.error(`[${pkg.name}] Agents directory not found at ${agentsDir}`);
|
|
38
|
+
return;
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
const agentDirs = fs
|
|
42
|
+
.readdirSync(agentsDir)
|
|
43
|
+
.filter((f) => fs.statSync(path.join(agentsDir, f)).isDirectory());
|
|
44
|
+
let totalChanged = false;
|
|
45
|
+
|
|
46
|
+
for (const agentName of agentDirs) {
|
|
47
|
+
const sessionsJsonPath = path.join(agentsDir, agentName, 'sessions', 'sessions.json');
|
|
48
|
+
if (!fs.existsSync(sessionsJsonPath)) continue;
|
|
49
|
+
|
|
50
|
+
const sessionsData = JSON.parse(await fs.promises.readFile(sessionsJsonPath, 'utf-8'));
|
|
51
|
+
|
|
52
|
+
for (const key of Object.keys(sessionsData)) {
|
|
53
|
+
const sessionInfo = sessionsData[key];
|
|
54
|
+
const { sessionId } = sessionInfo;
|
|
55
|
+
if (!sessionId) continue;
|
|
56
|
+
|
|
57
|
+
// Use sessionFile field first, otherwise fall back to the default path
|
|
58
|
+
const sessionFilePath = sessionInfo.sessionFile
|
|
59
|
+
? path.isAbsolute(sessionInfo.sessionFile)
|
|
60
|
+
? sessionInfo.sessionFile
|
|
61
|
+
: path.join(agentsDir, agentName, 'sessions', sessionInfo.sessionFile)
|
|
62
|
+
: path.join(agentsDir, agentName, 'sessions', `${sessionId}.jsonl`);
|
|
63
|
+
|
|
64
|
+
if (!fs.existsSync(sessionFilePath)) continue;
|
|
65
|
+
|
|
66
|
+
const lockFilePath = `${sessionFilePath}.lock`;
|
|
67
|
+
|
|
68
|
+
// Wait for the lock file to disappear (query every 1s, wait up to 30s)
|
|
69
|
+
let lockAcquired = false;
|
|
70
|
+
const lockStartTime = Date.now();
|
|
71
|
+
const lockTimeout = 30000; // 30s
|
|
72
|
+
|
|
73
|
+
while (Date.now() - lockStartTime < lockTimeout) {
|
|
74
|
+
if (!fs.existsSync(lockFilePath)) {
|
|
75
|
+
lockAcquired = true;
|
|
76
|
+
break;
|
|
77
|
+
}
|
|
78
|
+
await new Promise((resolve) => setTimeout(resolve, 1000));
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
if (!lockAcquired) {
|
|
82
|
+
api.logger.error(
|
|
83
|
+
`[${pkg.name}] Failed to acquire lock for session ${sessionId} after 30s timeout, skipping sync.`
|
|
84
|
+
);
|
|
85
|
+
continue;
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
// Create lock file
|
|
89
|
+
try {
|
|
90
|
+
const lockContent = {
|
|
91
|
+
pid: process.pid,
|
|
92
|
+
createdAt: new Date().toISOString()
|
|
93
|
+
};
|
|
94
|
+
await fs.promises.writeFile(lockFilePath, JSON.stringify(lockContent, null, 2));
|
|
95
|
+
|
|
96
|
+
const fileContent = await fs.promises.readFile(sessionFilePath, 'utf-8');
|
|
97
|
+
const lines = fileContent.split('\n');
|
|
98
|
+
let changed = false;
|
|
99
|
+
|
|
100
|
+
const newLines = lines.map((line) => {
|
|
101
|
+
if (!line.trim()) return line;
|
|
102
|
+
try {
|
|
103
|
+
const entry = JSON.parse(line);
|
|
104
|
+
// Match message type and role as user
|
|
105
|
+
if (entry.type === 'message' && entry.message?.role === 'user') {
|
|
106
|
+
const contentParts = entry.message.content;
|
|
107
|
+
if (Array.isArray(contentParts)) {
|
|
108
|
+
for (const part of contentParts) {
|
|
109
|
+
if (part.type === 'text' && typeof part.text === 'string') {
|
|
110
|
+
// Check if the content of this session file contains the original content being checked
|
|
111
|
+
if (part.text.includes(originalContent) && !part.text.includes(blockReason)) {
|
|
112
|
+
// Insert marker in original content (prefer before \n[message_id:)
|
|
113
|
+
part.text = part.text.replace(
|
|
114
|
+
originalContent,
|
|
115
|
+
insertBlockMarker(originalContent, blockReason)
|
|
116
|
+
);
|
|
117
|
+
changed = true;
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
}
|
|
123
|
+
return JSON.stringify(entry);
|
|
124
|
+
} catch (e) {
|
|
125
|
+
return line;
|
|
126
|
+
}
|
|
127
|
+
});
|
|
128
|
+
|
|
129
|
+
if (changed) {
|
|
130
|
+
await fs.promises.writeFile(sessionFilePath, newLines.join('\n'));
|
|
131
|
+
totalChanged = true;
|
|
132
|
+
}
|
|
133
|
+
} finally {
|
|
134
|
+
// Release lock file
|
|
135
|
+
if (fs.existsSync(lockFilePath)) {
|
|
136
|
+
await fs.promises.unlink(lockFilePath).catch((err) => {
|
|
137
|
+
api.logger.error(`[${pkg.name}] Failed to remove lock file ${lockFilePath}: ${err}`);
|
|
138
|
+
});
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
|
|
142
|
+
if (totalChanged) break;
|
|
143
|
+
}
|
|
144
|
+
if (totalChanged) break;
|
|
145
|
+
}
|
|
146
|
+
} catch (e) {
|
|
147
|
+
api.logger.error(`[${pkg.name}] Failed to sync session content: ${e}`);
|
|
148
|
+
}
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
async function moderate(
|
|
152
|
+
api: OpenClawPluginApi,
|
|
153
|
+
client: LLMShieldClient,
|
|
154
|
+
appId: string,
|
|
155
|
+
content: string,
|
|
156
|
+
role: string,
|
|
157
|
+
source: string,
|
|
158
|
+
logRecord: boolean,
|
|
159
|
+
history?: MessageV2[]
|
|
160
|
+
): Promise<{ decision?: DecisionTypeV2; labels: string[] }> {
|
|
161
|
+
// If in degradation state, check if it's time to retry
|
|
162
|
+
if (isDegraded) {
|
|
163
|
+
const now = Date.now();
|
|
164
|
+
if (now - lastRetryTime > currentRetryIntervalMs && !isProbing) {
|
|
165
|
+
isProbing = true;
|
|
166
|
+
api.logger.info(`[${pkg.name}] In degradation state, sending single probe request...`);
|
|
167
|
+
try {
|
|
168
|
+
await client.moderate(
|
|
169
|
+
{
|
|
170
|
+
Message: {
|
|
171
|
+
Role: 'user',
|
|
172
|
+
Content: 'hello',
|
|
173
|
+
ContentType: ContentTypeV2.TEXT
|
|
174
|
+
},
|
|
175
|
+
Scene: appId
|
|
176
|
+
},
|
|
177
|
+
{
|
|
178
|
+
'X-Ai-Device-Fingerprint': deviceFingerprint
|
|
179
|
+
}
|
|
180
|
+
);
|
|
181
|
+
// Probe successful, recovered
|
|
182
|
+
api.logger.info(`[${pkg.name}] Endpoint recovered, resetting degradation flag.`);
|
|
183
|
+
isDegraded = false;
|
|
184
|
+
isProbing = false;
|
|
185
|
+
consecutiveFailures = 0;
|
|
186
|
+
currentRetryIntervalMs = baseRetryIntervalMs;
|
|
187
|
+
} catch (e) {
|
|
188
|
+
lastRetryTime = Date.now();
|
|
189
|
+
isProbing = false;
|
|
190
|
+
// Exponential backoff
|
|
191
|
+
currentRetryIntervalMs = Math.min(currentRetryIntervalMs * 2, maxRetryIntervalMs);
|
|
192
|
+
api.logger.warn(
|
|
193
|
+
`[${pkg.name}] Probe failed, next retry in ${Math.round(currentRetryIntervalMs / 1000)}s.`
|
|
194
|
+
);
|
|
195
|
+
return { labels: [] };
|
|
196
|
+
}
|
|
197
|
+
} else {
|
|
198
|
+
// Still in degradation period or another probe is in flight, pass directly
|
|
199
|
+
return { labels: [] };
|
|
200
|
+
}
|
|
201
|
+
}
|
|
202
|
+
|
|
203
|
+
const requestId = generateRequestId();
|
|
204
|
+
logEvent(
|
|
205
|
+
api,
|
|
206
|
+
`${source}(check)`,
|
|
207
|
+
{ content, role, appId, requestId, historyCount: history?.length },
|
|
208
|
+
logRecord
|
|
209
|
+
);
|
|
210
|
+
|
|
211
|
+
let attempt = 0;
|
|
212
|
+
const maxAttempts = 2; // 1 original + 1 retry for transient errors
|
|
213
|
+
|
|
214
|
+
while (attempt < maxAttempts) {
|
|
215
|
+
try {
|
|
216
|
+
const response = await client.moderate(
|
|
217
|
+
{
|
|
218
|
+
Message: {
|
|
219
|
+
Role: role,
|
|
220
|
+
Content: content,
|
|
221
|
+
ContentType: ContentTypeV2.TEXT
|
|
222
|
+
},
|
|
223
|
+
Scene: appId,
|
|
224
|
+
History: history
|
|
225
|
+
},
|
|
226
|
+
{
|
|
227
|
+
'X-Top-Request-Id': requestId,
|
|
228
|
+
'X-Ai-Device-Fingerprint': deviceFingerprint
|
|
229
|
+
}
|
|
230
|
+
);
|
|
231
|
+
logEvent(api, `${source}(result)`, { response, requestId }, logRecord);
|
|
232
|
+
|
|
233
|
+
// Successful call, reset consecutive failures count
|
|
234
|
+
consecutiveFailures = 0;
|
|
235
|
+
currentRetryIntervalMs = baseRetryIntervalMs;
|
|
236
|
+
|
|
237
|
+
const decision = response.Result?.Decision?.DecisionType;
|
|
238
|
+
const labels = response.Result?.RiskInfo?.Risks?.map((r) => r.Label) || [];
|
|
239
|
+
return { decision, labels };
|
|
240
|
+
} catch (error: any) {
|
|
241
|
+
attempt++;
|
|
242
|
+
const isTimeout = error?.name === 'AbortError' || error?.message?.includes('timeout');
|
|
243
|
+
const isTransient = isTimeout || (error?.status >= 500 && error?.status < 600);
|
|
244
|
+
const errorMsg = isTimeout ? 'Moderation timed out' : String(error);
|
|
245
|
+
|
|
246
|
+
if (isTransient && attempt < maxAttempts) {
|
|
247
|
+
api.logger.warn(
|
|
248
|
+
`[${pkg.name}] Transient error (${errorMsg}), retrying... (${attempt}/${maxAttempts - 1})`
|
|
249
|
+
);
|
|
250
|
+
await new Promise((resolve) => setTimeout(resolve, 500)); // Short delay before retry
|
|
251
|
+
continue;
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
consecutiveFailures++;
|
|
255
|
+
logEvent(api, `${source}(error)`, { error: errorMsg, requestId, consecutiveFailures }, logRecord);
|
|
256
|
+
console.error(
|
|
257
|
+
`Moderation failed (${source}) [RID:${requestId}] [Failures:${consecutiveFailures}]:`,
|
|
258
|
+
errorMsg
|
|
259
|
+
);
|
|
260
|
+
|
|
261
|
+
// Check if circuit breaker threshold is reached
|
|
262
|
+
if (consecutiveFailures >= failureThreshold) {
|
|
263
|
+
isDegraded = true;
|
|
264
|
+
lastRetryTime = Date.now();
|
|
265
|
+
api.logger.error(
|
|
266
|
+
`[${
|
|
267
|
+
pkg.name
|
|
268
|
+
}] Consecutive failures reached threshold (${failureThreshold}), entering degradation state. Next retry in ${Math.round(
|
|
269
|
+
currentRetryIntervalMs / 1000
|
|
270
|
+
)}s.`
|
|
271
|
+
);
|
|
272
|
+
}
|
|
273
|
+
|
|
274
|
+
return { labels: [] };
|
|
275
|
+
}
|
|
276
|
+
}
|
|
277
|
+
|
|
278
|
+
return { labels: [] };
|
|
279
|
+
}
|
|
280
|
+
|
|
281
|
+
const BLOCK_MARKER_PREFIX = '[Block By AI Assistant Security. Reason';
|
|
282
|
+
|
|
283
|
+
function preprocessContent(content: string): string {
|
|
284
|
+
if (typeof content !== 'string') return content;
|
|
285
|
+
const blockIndex = content.lastIndexOf(BLOCK_MARKER_PREFIX);
|
|
286
|
+
if (blockIndex === -1) return content;
|
|
287
|
+
|
|
288
|
+
// Try to match the timestamp at the beginning, e.g., [Tue 2026-02-10 20:36 GMT+8]
|
|
289
|
+
const timestampEndIndex = content.indexOf(']');
|
|
290
|
+
if (content.startsWith('[') && timestampEndIndex !== -1 && timestampEndIndex < blockIndex) {
|
|
291
|
+
const timestamp = content.substring(0, timestampEndIndex + 1);
|
|
292
|
+
const blockedPart = content.substring(blockIndex);
|
|
293
|
+
return `${timestamp} ${blockedPart}`;
|
|
294
|
+
}
|
|
295
|
+
|
|
296
|
+
// If no timestamp is recognized at the beginning, only keep the block marker and subsequent content
|
|
297
|
+
return content.substring(blockIndex);
|
|
298
|
+
}
|
|
299
|
+
|
|
300
|
+
function insertBlockMarker(content: string, marker: string): string {
|
|
301
|
+
if (typeof content !== 'string') return content;
|
|
302
|
+
const suffixMarker = '\n[message_id:';
|
|
303
|
+
const index = content.lastIndexOf(suffixMarker);
|
|
304
|
+
if (index !== -1) {
|
|
305
|
+
return `${content.slice(0, index)} ${marker}${content.slice(index)}`;
|
|
306
|
+
}
|
|
307
|
+
return `${content} ${marker}`;
|
|
308
|
+
}
|
|
309
|
+
|
|
310
|
+
function getBlockReason(content: string, labels: string[]): string {
|
|
311
|
+
// Use English to get label names
|
|
312
|
+
const lang: Language = 'en';
|
|
313
|
+
|
|
314
|
+
// Get unique label names
|
|
315
|
+
const uniqueLabelNames = Array.from(new Set(labels.map((l) => getLabelName(l, lang))));
|
|
316
|
+
const labelText = uniqueLabelNames.length > 0 ? uniqueLabelNames.join(', ') : 'Inappropriate content';
|
|
317
|
+
|
|
318
|
+
return `${BLOCK_MARKER_PREFIX} ${labelText}]`;
|
|
319
|
+
}
|
|
320
|
+
|
|
321
|
+
const hookGlobalFetch = (() => {
|
|
322
|
+
let isHooked = false;
|
|
323
|
+
return (api: OpenClawPluginApi, client: LLMShieldClient, appId: string, logRecord: boolean) => {
|
|
324
|
+
if (isHooked) {
|
|
325
|
+
return;
|
|
326
|
+
}
|
|
327
|
+
isHooked = true;
|
|
328
|
+
const oldFetch = global.fetch;
|
|
329
|
+
const newFetch: typeof oldFetch = async function (...args) {
|
|
330
|
+
const url = args[0]?.toString() || '';
|
|
331
|
+
const options = (args[1] as RequestInit) || {};
|
|
332
|
+
|
|
333
|
+
// pii / prompt injection
|
|
334
|
+
if (options.body) {
|
|
335
|
+
let messagesToModerate: { role: string; content: string }[] = [];
|
|
336
|
+
let rawBody: string | undefined, jsonBody: any;
|
|
337
|
+
let bodyChanged = false;
|
|
338
|
+
|
|
339
|
+
if (typeof options.body === 'string') {
|
|
340
|
+
rawBody = options.body;
|
|
341
|
+
} else if (options.body instanceof Uint8Array || options.body instanceof ArrayBuffer) {
|
|
342
|
+
rawBody = new TextDecoder().decode(options.body);
|
|
343
|
+
}
|
|
344
|
+
|
|
345
|
+
if (rawBody) {
|
|
346
|
+
try {
|
|
347
|
+
jsonBody = JSON.parse(rawBody);
|
|
348
|
+
|
|
349
|
+
// Preprocess user messages in history: remove previous content if a block marker is identified
|
|
350
|
+
if (jsonBody && Array.isArray(jsonBody.messages) && jsonBody.messages.length > 1) {
|
|
351
|
+
for (let i = 0; i < jsonBody.messages.length - 1; i++) {
|
|
352
|
+
const m = jsonBody.messages[i];
|
|
353
|
+
if (m.role === 'user' && typeof m.content === 'string') {
|
|
354
|
+
const newContent = preprocessContent(m.content);
|
|
355
|
+
if (newContent !== m.content) {
|
|
356
|
+
m.content = newContent;
|
|
357
|
+
bodyChanged = true;
|
|
358
|
+
}
|
|
359
|
+
}
|
|
360
|
+
}
|
|
361
|
+
}
|
|
362
|
+
|
|
363
|
+
messagesToModerate = robustExtractLastUserMessage(jsonBody);
|
|
364
|
+
} catch (e) {
|
|
365
|
+
logEvent(api, 'json_parse_failed', { url, error: String(e) }, logRecord);
|
|
366
|
+
}
|
|
367
|
+
}
|
|
368
|
+
|
|
369
|
+
if (messagesToModerate.length > 0) {
|
|
370
|
+
// 2. Prepare history (at this point, history messages have already been processed by preprocessContent above)
|
|
371
|
+
let historyV2: MessageV2[] | undefined;
|
|
372
|
+
if (jsonBody && Array.isArray(jsonBody.messages) && jsonBody.messages.length > 1) {
|
|
373
|
+
// Filter out system messages and only take the most recent 5
|
|
374
|
+
const historyMessages = jsonBody.messages
|
|
375
|
+
.slice(0, -1)
|
|
376
|
+
.filter((m: any) => m.role !== 'system')
|
|
377
|
+
.slice(-5);
|
|
378
|
+
|
|
379
|
+
historyV2 = historyMessages.map((m: any) => ({
|
|
380
|
+
Role: m.role || 'user',
|
|
381
|
+
Content: typeof m.content === 'string' ? m.content : JSON.stringify(m.content),
|
|
382
|
+
ContentType: ContentTypeV2.TEXT
|
|
383
|
+
}));
|
|
384
|
+
}
|
|
385
|
+
|
|
386
|
+
const msg = messagesToModerate[0];
|
|
387
|
+
const { decision, labels } = await moderate(
|
|
388
|
+
api,
|
|
389
|
+
client,
|
|
390
|
+
appId,
|
|
391
|
+
msg.content,
|
|
392
|
+
msg.role,
|
|
393
|
+
'llm_request',
|
|
394
|
+
logRecord,
|
|
395
|
+
historyV2
|
|
396
|
+
);
|
|
397
|
+
|
|
398
|
+
if (decision === DecisionTypeV2.BLOCK) {
|
|
399
|
+
const blockReason = getBlockReason(msg.content, labels);
|
|
400
|
+
// 3. Update Session file asynchronously (background processing)
|
|
401
|
+
syncSessionContent(api, msg.content, blockReason);
|
|
402
|
+
|
|
403
|
+
logEvent(api, 'llm_request(block)', { blockReason, originalContent: msg.content }, logRecord);
|
|
404
|
+
if (jsonBody && Array.isArray(jsonBody.messages) && jsonBody.messages.length > 0) {
|
|
405
|
+
// Intercept the last message: insert block marker (prefer before \n[message_id:)
|
|
406
|
+
const lastMsg = jsonBody.messages[jsonBody.messages.length - 1];
|
|
407
|
+
lastMsg.content = insertBlockMarker(lastMsg.content, blockReason);
|
|
408
|
+
bodyChanged = true;
|
|
409
|
+
} else if (jsonBody && typeof jsonBody.prompt === 'string') {
|
|
410
|
+
jsonBody.prompt = insertBlockMarker(jsonBody.prompt, blockReason);
|
|
411
|
+
bodyChanged = true;
|
|
412
|
+
} else if (jsonBody && typeof jsonBody.input === 'string') {
|
|
413
|
+
jsonBody.input = insertBlockMarker(jsonBody.input, blockReason);
|
|
414
|
+
bodyChanged = true;
|
|
415
|
+
}
|
|
416
|
+
}
|
|
417
|
+
|
|
418
|
+
if (bodyChanged) {
|
|
419
|
+
options.body = JSON.stringify(jsonBody);
|
|
420
|
+
}
|
|
421
|
+
}
|
|
422
|
+
}
|
|
423
|
+
|
|
424
|
+
const resp = await oldFetch.apply(this, args);
|
|
425
|
+
return resp;
|
|
426
|
+
};
|
|
427
|
+
global.fetch = newFetch;
|
|
428
|
+
};
|
|
429
|
+
})();
|
|
430
|
+
|
|
431
|
+
const plugin = {
|
|
432
|
+
id: 'ai-assistant-security-openclaw',
|
|
433
|
+
name: pkg.name,
|
|
434
|
+
description:
|
|
435
|
+
'AI Assistant Security plugin for OpenClaw, to protect your LLM models and Agent lifecycle (including tool calls) from harmful requests.',
|
|
436
|
+
register(api: OpenClawPluginApi): void {
|
|
437
|
+
const pluginCfg = (api.pluginConfig ?? {}) as any;
|
|
438
|
+
const { endpoint, apiKey, appId } = pluginCfg;
|
|
439
|
+
|
|
440
|
+
// Calculate device fingerprint once during registration
|
|
441
|
+
if (!deviceFingerprint) {
|
|
442
|
+
deviceFingerprint = getDeviceFingerprint();
|
|
443
|
+
}
|
|
444
|
+
|
|
445
|
+
console.log(`[${pkg.name}] Device Fingerprint: ${deviceFingerprint}`);
|
|
446
|
+
|
|
447
|
+
// Update global configuration (if provided)
|
|
448
|
+
if (pluginCfg.failureThreshold !== undefined) {
|
|
449
|
+
failureThreshold = Number(pluginCfg.failureThreshold);
|
|
450
|
+
}
|
|
451
|
+
if (pluginCfg.retryInterval !== undefined) {
|
|
452
|
+
baseRetryIntervalMs = Number(pluginCfg.retryInterval) * 1000;
|
|
453
|
+
currentRetryIntervalMs = baseRetryIntervalMs;
|
|
454
|
+
}
|
|
455
|
+
if (pluginCfg.maxRetryInterval !== undefined) {
|
|
456
|
+
maxRetryIntervalMs = Number(pluginCfg.maxRetryInterval) * 1000;
|
|
457
|
+
}
|
|
458
|
+
|
|
459
|
+
// 1. Validate if apiKey and appId are empty
|
|
460
|
+
if (!apiKey || !appId) {
|
|
461
|
+
api.logger.error(
|
|
462
|
+
`[${pkg.name}] Registration failed: apiKey or appId is empty, please check the configuration.`
|
|
463
|
+
);
|
|
464
|
+
return;
|
|
465
|
+
}
|
|
466
|
+
|
|
467
|
+
// 2. Validate if endpoint is empty
|
|
468
|
+
if (!endpoint) {
|
|
469
|
+
api.logger.error(
|
|
470
|
+
`[${pkg.name}] Registration failed: endpoint is empty, please check the configuration.`
|
|
471
|
+
);
|
|
472
|
+
return;
|
|
473
|
+
}
|
|
474
|
+
|
|
475
|
+
const client = new LLMShieldClient({
|
|
476
|
+
baseUrl: endpoint,
|
|
477
|
+
apiKey,
|
|
478
|
+
timeoutMs: pluginCfg.timeoutMs ? Number(pluginCfg.timeoutMs) : undefined
|
|
479
|
+
});
|
|
480
|
+
const logRecord = !!pluginCfg.logRecord;
|
|
481
|
+
const hooksCfg = pluginCfg.hooks || {};
|
|
482
|
+
const enableFetch = hooksCfg.fetch !== false;
|
|
483
|
+
const enableBeforeToolCall = hooksCfg.beforeToolCall !== false;
|
|
484
|
+
const enableToolResultPersist = hooksCfg.toolResultPersist !== false;
|
|
485
|
+
|
|
486
|
+
// 3. Asynchronously validate endpoint connectivity and configuration
|
|
487
|
+
(async () => {
|
|
488
|
+
api.logger.info(`[${pkg.name}] Verifying configuration with moderate interface: ${endpoint}...`);
|
|
489
|
+
try {
|
|
490
|
+
await client.moderate(
|
|
491
|
+
{
|
|
492
|
+
Message: {
|
|
493
|
+
Role: 'user',
|
|
494
|
+
Content: 'hello',
|
|
495
|
+
ContentType: ContentTypeV2.TEXT
|
|
496
|
+
},
|
|
497
|
+
Scene: appId
|
|
498
|
+
},
|
|
499
|
+
{
|
|
500
|
+
'X-Ai-Device-Fingerprint': deviceFingerprint
|
|
501
|
+
}
|
|
502
|
+
);
|
|
503
|
+
} catch (e: any) {
|
|
504
|
+
api.logger.error(
|
|
505
|
+
`[${
|
|
506
|
+
pkg.name
|
|
507
|
+
}] Registration failed: Verification failed for endpoint ${endpoint}. Please check your network, apiKey, or appId configuration. Error: ${
|
|
508
|
+
e.message || e
|
|
509
|
+
}`
|
|
510
|
+
);
|
|
511
|
+
return;
|
|
512
|
+
}
|
|
513
|
+
|
|
514
|
+
// 4. Validation passed, register hook points
|
|
515
|
+
if (enableFetch) {
|
|
516
|
+
hookGlobalFetch(api, client, appId, logRecord);
|
|
517
|
+
}
|
|
518
|
+
|
|
519
|
+
// risk operations / pii
|
|
520
|
+
if (enableBeforeToolCall) {
|
|
521
|
+
api.on('before_tool_call', async (event) => {
|
|
522
|
+
const content = `Tool: ${event.toolName}, Params: ${JSON.stringify(event.params)}`;
|
|
523
|
+
const { decision, labels } = await moderate(
|
|
524
|
+
api,
|
|
525
|
+
client,
|
|
526
|
+
appId,
|
|
527
|
+
content,
|
|
528
|
+
'assistant',
|
|
529
|
+
'before_tool_call',
|
|
530
|
+
logRecord
|
|
531
|
+
);
|
|
532
|
+
if (decision === DecisionTypeV2.BLOCK) {
|
|
533
|
+
const blockReason = getBlockReason(content, labels);
|
|
534
|
+
logEvent(api, 'before_tool_call(block)', { blockReason, originalContent: content }, logRecord);
|
|
535
|
+
return { block: true, blockReason };
|
|
536
|
+
}
|
|
537
|
+
});
|
|
538
|
+
}
|
|
539
|
+
|
|
540
|
+
// risk operations / prompt injection
|
|
541
|
+
if (enableToolResultPersist) {
|
|
542
|
+
api.on('tool_result_persist', async (event) => {
|
|
543
|
+
// The actual content to be checked is event.message.content
|
|
544
|
+
const content =
|
|
545
|
+
typeof event.message?.content === 'string'
|
|
546
|
+
? event.message.content
|
|
547
|
+
: JSON.stringify(event.message?.content || '');
|
|
548
|
+
const { decision, labels } = await moderate(
|
|
549
|
+
api,
|
|
550
|
+
client,
|
|
551
|
+
appId,
|
|
552
|
+
content,
|
|
553
|
+
'tool',
|
|
554
|
+
'tool_result_persist',
|
|
555
|
+
logRecord
|
|
556
|
+
);
|
|
557
|
+
if (decision === DecisionTypeV2.BLOCK) {
|
|
558
|
+
const blockReason = getBlockReason(content, labels);
|
|
559
|
+
logEvent(api, 'tool_result_persist(block)', { blockReason, originalContent: content }, logRecord);
|
|
560
|
+
|
|
561
|
+
// If hit, do not return block, but rewrite content and details with reason
|
|
562
|
+
const interceptedData = {
|
|
563
|
+
error: 'llm_shield_intercepted',
|
|
564
|
+
message: 'Your request has been intercepted by the LLM Application Firewall.',
|
|
565
|
+
reason: blockReason
|
|
566
|
+
};
|
|
567
|
+
|
|
568
|
+
event.message.content = [
|
|
569
|
+
{
|
|
570
|
+
type: 'text',
|
|
571
|
+
text: JSON.stringify(interceptedData, null, 2)
|
|
572
|
+
}
|
|
573
|
+
];
|
|
574
|
+
event.message.details = interceptedData;
|
|
575
|
+
|
|
576
|
+
// Do not return block
|
|
577
|
+
return;
|
|
578
|
+
}
|
|
579
|
+
});
|
|
580
|
+
}
|
|
581
|
+
|
|
582
|
+
api.logger.info(
|
|
583
|
+
`[${pkg.name}] Plugin successfully initialized and registered hook points (fetch:${enableFetch}, beforeToolCall:${enableBeforeToolCall}, toolResultPersist:${enableToolResultPersist}).`
|
|
584
|
+
);
|
|
585
|
+
})();
|
|
586
|
+
}
|
|
587
|
+
};
|
|
588
|
+
|
|
589
|
+
export default plugin;
|
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
{
|
|
2
|
+
"id": "ai-assistant-security-openclaw",
|
|
3
|
+
"name": "AI Assistant Security",
|
|
4
|
+
"description": "AI Assistant Security plugin for OpenClaw, to protect your LLM models and Agent lifecycle (including tool calls) from harmful requests.",
|
|
5
|
+
"configSchema": {
|
|
6
|
+
"type": "object",
|
|
7
|
+
"additionalProperties": false,
|
|
8
|
+
"properties": {
|
|
9
|
+
"endpoint": {
|
|
10
|
+
"type": "string",
|
|
11
|
+
"description": "The API endpoint for the AI Assistant Security service."
|
|
12
|
+
},
|
|
13
|
+
"apiKey": {
|
|
14
|
+
"type": "string",
|
|
15
|
+
"description": "The API key for AI Assistant Security service."
|
|
16
|
+
},
|
|
17
|
+
"appId": {
|
|
18
|
+
"type": "string",
|
|
19
|
+
"description": "Application scenario, used to identify the business context of the current call."
|
|
20
|
+
},
|
|
21
|
+
"openClawDir": {
|
|
22
|
+
"type": "string",
|
|
23
|
+
"description": "The root directory of OpenClaw. If not provided, it defaults to the parent directory of the plugin's resolved path."
|
|
24
|
+
},
|
|
25
|
+
"logRecord": {
|
|
26
|
+
"type": "boolean",
|
|
27
|
+
"description": "Whether to enable log recording.",
|
|
28
|
+
"default": false
|
|
29
|
+
},
|
|
30
|
+
"failureThreshold": {
|
|
31
|
+
"type": "integer",
|
|
32
|
+
"description": "The number of consecutive failures before triggering degradation. Default is 3.",
|
|
33
|
+
"default": 3
|
|
34
|
+
},
|
|
35
|
+
"retryInterval": {
|
|
36
|
+
"type": "integer",
|
|
37
|
+
"description": "The base interval in seconds to retry probing after degradation. Will increase exponentially. Default is 60 (1 minute).",
|
|
38
|
+
"default": 60
|
|
39
|
+
},
|
|
40
|
+
"maxRetryInterval": {
|
|
41
|
+
"type": "integer",
|
|
42
|
+
"description": "The maximum interval in seconds for exponential backoff. Default is 3600 (1 hour).",
|
|
43
|
+
"default": 3600
|
|
44
|
+
},
|
|
45
|
+
"timeoutMs": {
|
|
46
|
+
"type": "integer",
|
|
47
|
+
"description": "Request timeout in milliseconds for the moderation API. Default is 30000.",
|
|
48
|
+
"default": 30000
|
|
49
|
+
},
|
|
50
|
+
"hooks": {
|
|
51
|
+
"type": "object",
|
|
52
|
+
"description": "Enable or disable specific hook points.",
|
|
53
|
+
"properties": {
|
|
54
|
+
"fetch": {
|
|
55
|
+
"type": "boolean",
|
|
56
|
+
"description": "Enable global fetch hook for LLM requests.",
|
|
57
|
+
"default": true
|
|
58
|
+
},
|
|
59
|
+
"beforeToolCall": {
|
|
60
|
+
"type": "boolean",
|
|
61
|
+
"description": "Enable hook for before_tool_call event.",
|
|
62
|
+
"default": true
|
|
63
|
+
},
|
|
64
|
+
"toolResultPersist": {
|
|
65
|
+
"type": "boolean",
|
|
66
|
+
"description": "Enable hook for tool_result_persist event.",
|
|
67
|
+
"default": true
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
}
|
|
73
|
+
}
|
package/package.json
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@omni-shield/ai-assistant-security-openclaw",
|
|
3
|
+
"version": "1.0.0-beta1",
|
|
4
|
+
"type": "module",
|
|
5
|
+
"description": "AI Assistant Security plugin for OpenClaw, to protect your LLM models and Agent lifecycle (including tool calls) from harmful requests.",
|
|
6
|
+
"files": [
|
|
7
|
+
"package.json",
|
|
8
|
+
"index.ts",
|
|
9
|
+
"src",
|
|
10
|
+
"openclaw.plugin.json",
|
|
11
|
+
"README.md"
|
|
12
|
+
],
|
|
13
|
+
"scripts": {
|
|
14
|
+
"publish:dev": "node scripts/publish.js --dev",
|
|
15
|
+
"publish:prod": "node scripts/publish.js"
|
|
16
|
+
},
|
|
17
|
+
"peerDependencies": {
|
|
18
|
+
"openclaw": ">=2026.1.26"
|
|
19
|
+
},
|
|
20
|
+
"openclaw": {
|
|
21
|
+
"extensions": [
|
|
22
|
+
"./index.ts"
|
|
23
|
+
]
|
|
24
|
+
},
|
|
25
|
+
"devDependencies": {
|
|
26
|
+
"@types/node": "^25.2.2"
|
|
27
|
+
},
|
|
28
|
+
"dependencies": {
|
|
29
|
+
"node-machine-id": "^1.1.12"
|
|
30
|
+
},
|
|
31
|
+
"publishConfig": {
|
|
32
|
+
"access": "public"
|
|
33
|
+
}
|
|
34
|
+
}
|
package/src/client.ts
ADDED
|
@@ -0,0 +1,127 @@
|
|
|
1
|
+
import {
|
|
2
|
+
ModerateV2Request,
|
|
3
|
+
ModerateV2Response,
|
|
4
|
+
} from "./types.js";
|
|
5
|
+
|
|
6
|
+
/** HTTP request error */
|
|
7
|
+
export class HttpError extends Error {
|
|
8
|
+
readonly status: number;
|
|
9
|
+
readonly statusText: string;
|
|
10
|
+
readonly body?: unknown;
|
|
11
|
+
|
|
12
|
+
constructor(message: string, status: number, statusText: string, body?: unknown) {
|
|
13
|
+
super(message);
|
|
14
|
+
this.name = "HttpError";
|
|
15
|
+
this.status = status;
|
|
16
|
+
this.statusText = statusText;
|
|
17
|
+
this.body = body;
|
|
18
|
+
}
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
export interface ClientOptions {
|
|
22
|
+
/** Backend service base URL, e.g., https://xxx.byted.org */
|
|
23
|
+
baseUrl: string;
|
|
24
|
+
/** Corresponds to api_key in Go SDK */
|
|
25
|
+
apiKey: string;
|
|
26
|
+
/**
|
|
27
|
+
* Optional: custom fetch implementation (defaults to globalThis.fetch).
|
|
28
|
+
* In Node < 18, a polyfill must be provided.
|
|
29
|
+
*/
|
|
30
|
+
fetchFn?: typeof fetch;
|
|
31
|
+
/** Timeout in milliseconds (defaults to 30000) */
|
|
32
|
+
timeoutMs?: number;
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
/**
|
|
36
|
+
* LLM Shield v2 TypeScript client, covers v2 capabilities only.
|
|
37
|
+
*/
|
|
38
|
+
export class LLMShieldClient {
|
|
39
|
+
private readonly baseUrl: string;
|
|
40
|
+
private readonly apiKey: string;
|
|
41
|
+
private readonly fetchFn: typeof fetch;
|
|
42
|
+
private readonly timeoutMs: number;
|
|
43
|
+
|
|
44
|
+
constructor(options: ClientOptions) {
|
|
45
|
+
this.baseUrl = options.baseUrl.replace(/\/$/, "");
|
|
46
|
+
this.apiKey = options.apiKey;
|
|
47
|
+
this.timeoutMs = options.timeoutMs ?? 30000;
|
|
48
|
+
const fn = options.fetchFn ?? (globalThis as unknown as { fetch: typeof fetch }).fetch;
|
|
49
|
+
if (!fn) {
|
|
50
|
+
throw new Error("global fetch is unavailable. Please provide a fetch polyfill in your environment or pass an implementation via fetchFn.");
|
|
51
|
+
}
|
|
52
|
+
this.fetchFn = fn.bind(globalThis);
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
// -------- Internal utility methods --------
|
|
56
|
+
|
|
57
|
+
private async postJson<TReq, TRes>(path: string, body: TReq, extraHeaders?: Record<string, string>): Promise<TRes> {
|
|
58
|
+
const url = `${this.baseUrl}${path}`;
|
|
59
|
+
const controller = new AbortController();
|
|
60
|
+
const timeoutId = setTimeout(() => controller.abort(), this.timeoutMs);
|
|
61
|
+
|
|
62
|
+
try {
|
|
63
|
+
const resp = await this.fetchFn(url, {
|
|
64
|
+
method: "POST",
|
|
65
|
+
headers: {
|
|
66
|
+
"Content-Type": "application/json",
|
|
67
|
+
"x-api-key": this.apiKey,
|
|
68
|
+
...extraHeaders,
|
|
69
|
+
},
|
|
70
|
+
body: JSON.stringify(body ?? {}),
|
|
71
|
+
signal: controller.signal,
|
|
72
|
+
});
|
|
73
|
+
|
|
74
|
+
const text = await resp.text();
|
|
75
|
+
|
|
76
|
+
if (resp.status !== 200) {
|
|
77
|
+
let parsed: unknown = text;
|
|
78
|
+
try {
|
|
79
|
+
parsed = text ? JSON.parse(text) : text;
|
|
80
|
+
} catch {
|
|
81
|
+
// ignore JSON parse error
|
|
82
|
+
}
|
|
83
|
+
throw new HttpError(`Request failed with status ${resp.status}`, resp.status, resp.statusText, parsed);
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
try {
|
|
87
|
+
return (text ? JSON.parse(text) : {}) as TRes;
|
|
88
|
+
} catch (e) {
|
|
89
|
+
throw new Error(`JSON parsing failed: ${(e as Error).message}`, { cause: e });
|
|
90
|
+
}
|
|
91
|
+
} finally {
|
|
92
|
+
clearTimeout(timeoutId);
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
// -------- Public methods --------
|
|
97
|
+
|
|
98
|
+
/**
|
|
99
|
+
* Check endpoint connectivity
|
|
100
|
+
*/
|
|
101
|
+
async ping(): Promise<boolean> {
|
|
102
|
+
const controller = new AbortController();
|
|
103
|
+
const timeoutId = setTimeout(() => controller.abort(), 5000); // 5s ping timeout
|
|
104
|
+
|
|
105
|
+
try {
|
|
106
|
+
const url = `${this.baseUrl}/v2/moderate`;
|
|
107
|
+
const resp = await this.fetchFn(url, {
|
|
108
|
+
method: "OPTIONS", // Use OPTIONS or a simple GET to check connectivity
|
|
109
|
+
signal: controller.signal,
|
|
110
|
+
});
|
|
111
|
+
// As long as there is a response (even 404 or 405), the endpoint is reachable
|
|
112
|
+
return !!resp.status;
|
|
113
|
+
} catch (e) {
|
|
114
|
+
return false;
|
|
115
|
+
} finally {
|
|
116
|
+
clearTimeout(timeoutId);
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
/**
|
|
121
|
+
* Non-streaming moderation, corresponds to Go Client.Moderate.
|
|
122
|
+
*/
|
|
123
|
+
async moderate(request?: ModerateV2Request, extraHeaders?: Record<string, string>): Promise<ModerateV2Response> {
|
|
124
|
+
const body: ModerateV2Request = request ?? {};
|
|
125
|
+
return this.postJson<ModerateV2Request, ModerateV2Response>("/v2/moderate", body, extraHeaders);
|
|
126
|
+
}
|
|
127
|
+
}
|
package/src/labels.ts
ADDED
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
export interface LabelTranslation {
|
|
2
|
+
zh: string;
|
|
3
|
+
en: string;
|
|
4
|
+
}
|
|
5
|
+
|
|
6
|
+
export const LabelToTranslationMap: Record<string, LabelTranslation> = {
|
|
7
|
+
'10102000': { zh: '敏感内容', en: 'Sensitive Content' },
|
|
8
|
+
'10103005': { zh: '谩骂', en: 'Abuse' },
|
|
9
|
+
'10104000': { zh: '色情', en: 'Pornography' },
|
|
10
|
+
'10107000': { zh: '敏感内容', en: 'Sensitive Content' },
|
|
11
|
+
'10109000': { zh: '商业敏感内容', en: 'Commercial Sensitive Content' },
|
|
12
|
+
'10112000': { zh: '歧视', en: 'Discrimination' },
|
|
13
|
+
'10113002': { zh: '毒品', en: 'Drugs' },
|
|
14
|
+
'10113003': { zh: '赌博', en: 'Gambling' },
|
|
15
|
+
'10113004': { zh: '诈骗', en: 'Fraud' },
|
|
16
|
+
'10116000': { zh: '敏感内容', en: 'Sensitive Content' },
|
|
17
|
+
'10302000': { zh: '银行卡号', en: 'Bank Card Number' },
|
|
18
|
+
'10304000': { zh: '身份证号', en: 'ID Card Number' },
|
|
19
|
+
'10310000': { zh: '电子邮箱', en: 'Email Address' },
|
|
20
|
+
'10313000': { zh: '电话号码', en: 'Phone Number' },
|
|
21
|
+
'10322000': { zh: '隐私数据', en: 'Privacy Data' },
|
|
22
|
+
'10400000': { zh: '提示词攻击', en: 'Prompt Attack' },
|
|
23
|
+
'10401001': { zh: '角色扮演攻击', en: 'Role Playing Attack' },
|
|
24
|
+
'10401002': { zh: '权限提升攻击', en: 'Privilege Escalation Attack' },
|
|
25
|
+
'10401003': { zh: '对抗前后缀攻击', en: 'Adversarial Prefix/Suffix Attack' },
|
|
26
|
+
'10401004': { zh: '目标劫持攻击', en: 'Target Hijacking Attack' },
|
|
27
|
+
'10401005': { zh: '混淆和编码攻击', en: 'Obfuscation and Encoding Attack' },
|
|
28
|
+
'10401008': { zh: '少量示例攻击', en: 'Few-shot Example Attack' },
|
|
29
|
+
'10402003': { zh: '窃取提示词', en: 'Prompt Stealing' },
|
|
30
|
+
'10401013': { zh: 'URL渲染和请求攻击', en: 'URL Rendering and Requesting Attack' },
|
|
31
|
+
'10401007': { zh: '指令补齐攻击', en: 'Instruction Completion Attack' },
|
|
32
|
+
'10401011': { zh: '反向诱导攻击', en: 'Reverse Induction Attack' },
|
|
33
|
+
'10401012': { zh: '代码化描述攻击', en: 'Coded Description Attack' },
|
|
34
|
+
'10402001': { zh: '诱导生成有害内容攻击', en: 'Inducing Harmful Content Attack' },
|
|
35
|
+
'10401014': { zh: '远程代码执行攻击', en: 'Remote Code Execution Attack' },
|
|
36
|
+
'10401015': { zh: '插件投毒攻击', en: 'Plugin Poisoning Attack' },
|
|
37
|
+
'10401016': { zh: '敏感操作', en: 'Sensitive Actions' },
|
|
38
|
+
'10401017': { zh: '静默窃取', en: 'Silent Exfiltration' },
|
|
39
|
+
'10701001': { zh: '高频相似样本攻击', en: 'High-frequency Similar Samples Attack' },
|
|
40
|
+
};
|
|
41
|
+
|
|
42
|
+
export const isUserDefinedLabel = (label: string): boolean => {
|
|
43
|
+
const labelNum = parseInt(label, 10);
|
|
44
|
+
return labelNum >= 50000000 && labelNum <= 50099999;
|
|
45
|
+
};
|
|
46
|
+
|
|
47
|
+
export type Language = 'zh' | 'en';
|
|
48
|
+
|
|
49
|
+
export const getLabelName = (label: string, lang: Language = 'en'): string => {
|
|
50
|
+
if (LabelToTranslationMap[label]) {
|
|
51
|
+
return LabelToTranslationMap[label][lang];
|
|
52
|
+
}
|
|
53
|
+
if (isUserDefinedLabel(label)) {
|
|
54
|
+
return lang === 'zh' ? '用户自定义标签' : 'User Defined Label';
|
|
55
|
+
}
|
|
56
|
+
return label;
|
|
57
|
+
};
|
package/src/types.ts
ADDED
|
@@ -0,0 +1,152 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* v2 related type definitions, matching Go SDK structure as much as possible (field names capitalized to maintain JSON compatibility).
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
// ------------ General Metadata ------------
|
|
6
|
+
|
|
7
|
+
export interface ErrorResponse {
|
|
8
|
+
Code: string;
|
|
9
|
+
Message: string;
|
|
10
|
+
}
|
|
11
|
+
|
|
12
|
+
export interface ResponseMetadata {
|
|
13
|
+
Error: ErrorResponse;
|
|
14
|
+
RequestId: string;
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
// ------------ Enum Definitions (consistent with Go int64 constants) ------------
|
|
18
|
+
|
|
19
|
+
export enum ContentTypeV2 {
|
|
20
|
+
TEXT = 1,
|
|
21
|
+
AUDIO = 2,
|
|
22
|
+
IMAGE = 3,
|
|
23
|
+
VIDEO = 4,
|
|
24
|
+
FILE = 5,
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
export enum DecisionTypeV2 {
|
|
28
|
+
PASS = 1,
|
|
29
|
+
BLOCK = 2,
|
|
30
|
+
MARK = 3,
|
|
31
|
+
REPLACE = 4,
|
|
32
|
+
OPTIMIZE = 5,
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
export enum UserAction {
|
|
36
|
+
PASS = 1,
|
|
37
|
+
BLOCK = 2,
|
|
38
|
+
MARK = 3,
|
|
39
|
+
REPLACE = 4,
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
export enum MatchSource {
|
|
43
|
+
UNKNOWN = 0,
|
|
44
|
+
GLOBAL_CONTENTLIB = 1,
|
|
45
|
+
ADMIN_CONTENTLIB = 2,
|
|
46
|
+
USER_CONTENTLIB = 3,
|
|
47
|
+
}
|
|
48
|
+
|
|
49
|
+
// ------------ Core Messages and Multimodality ------------
|
|
50
|
+
|
|
51
|
+
export interface MultiPart {
|
|
52
|
+
/** Content text or link */
|
|
53
|
+
Content: string;
|
|
54
|
+
/** Content type */
|
|
55
|
+
ContentType: ContentTypeV2;
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
export interface MessageV2 {
|
|
59
|
+
/** Message role: user/assistant/system/rag, etc. */
|
|
60
|
+
Role: string;
|
|
61
|
+
/** Text content or link */
|
|
62
|
+
Content: string;
|
|
63
|
+
/** Content type */
|
|
64
|
+
ContentType: ContentTypeV2;
|
|
65
|
+
/** Multimodal content, optional */
|
|
66
|
+
MultiPart?: MultiPart[];
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
// ------------ Moderate v2 Request and Response ------------
|
|
70
|
+
|
|
71
|
+
export interface ModerateV2Request {
|
|
72
|
+
/** Moderation content, required in Go, kept optional here for non-streaming compatibility */
|
|
73
|
+
Message?: MessageV2;
|
|
74
|
+
/** Bound ID for streaming moderation sessions */
|
|
75
|
+
MsgID?: string;
|
|
76
|
+
/** 0: One-time moderation; 1: Streaming; 2: Force send (flush) */
|
|
77
|
+
UseStream?: number;
|
|
78
|
+
/** Scene */
|
|
79
|
+
Scene?: string;
|
|
80
|
+
/** History messages */
|
|
81
|
+
History?: MessageV2[];
|
|
82
|
+
}
|
|
83
|
+
|
|
84
|
+
export interface RiskMatchV2 {
|
|
85
|
+
Word: string;
|
|
86
|
+
Action?: UserAction;
|
|
87
|
+
Source: MatchSource;
|
|
88
|
+
RuleID?: string;
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
export interface PermitMatchV2 {
|
|
92
|
+
Word: string;
|
|
93
|
+
Action?: UserAction;
|
|
94
|
+
Source: MatchSource;
|
|
95
|
+
RuleID?: string;
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
export interface RiskV2 {
|
|
99
|
+
Category: string;
|
|
100
|
+
Label: string;
|
|
101
|
+
Prob?: number;
|
|
102
|
+
Matches?: RiskMatchV2[];
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
export interface RiskInfoV2 {
|
|
106
|
+
Risks: RiskV2[];
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
export interface PermitV2 {
|
|
110
|
+
Category: string;
|
|
111
|
+
Label: string;
|
|
112
|
+
Prob?: number;
|
|
113
|
+
Matches?: PermitMatchV2[];
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
export interface PermitInfoV2 {
|
|
117
|
+
Permits: PermitV2[];
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
export interface BlockDetailV2 {}
|
|
121
|
+
|
|
122
|
+
export interface ReplaceDetailV2 {
|
|
123
|
+
Replacement?: MessageV2;
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
export interface DecisionDetailV2 {
|
|
127
|
+
BlockDetail?: BlockDetailV2;
|
|
128
|
+
ReplaceDetail?: ReplaceDetailV2;
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
export interface DecisionV2 {
|
|
132
|
+
DecisionType: DecisionTypeV2;
|
|
133
|
+
Detail: DecisionDetailV2;
|
|
134
|
+
DecisionStrategyID?: string;
|
|
135
|
+
HitStrategyIDs?: string[];
|
|
136
|
+
}
|
|
137
|
+
|
|
138
|
+
export interface ModerateV2Result {
|
|
139
|
+
MsgID: string;
|
|
140
|
+
RiskInfo?: RiskInfoV2;
|
|
141
|
+
Decision?: DecisionV2;
|
|
142
|
+
PermitInfo?: PermitInfoV2;
|
|
143
|
+
ContentInfo: string;
|
|
144
|
+
Degraded: boolean;
|
|
145
|
+
DegradeReason: string;
|
|
146
|
+
}
|
|
147
|
+
|
|
148
|
+
export interface ModerateV2Response {
|
|
149
|
+
ResponseMetadata: ResponseMetadata;
|
|
150
|
+
Result: ModerateV2Result;
|
|
151
|
+
}
|
|
152
|
+
|
package/src/utils.ts
ADDED
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
import os from "node:os";
|
|
2
|
+
import { machineIdSync } from 'node-machine-id';
|
|
3
|
+
|
|
4
|
+
export function getDeviceFingerprint(): string {
|
|
5
|
+
// 使用 node-machine-id 获取设备唯一标识
|
|
6
|
+
return machineIdSync();
|
|
7
|
+
}
|
|
8
|
+
|
|
9
|
+
export function getLocalIP12(): string {
|
|
10
|
+
const interfaces = os.networkInterfaces();
|
|
11
|
+
for (const name of Object.keys(interfaces)) {
|
|
12
|
+
for (const iface of interfaces[name] || []) {
|
|
13
|
+
if (iface.family === "IPv4" && !iface.internal) {
|
|
14
|
+
return iface.address.split(".").map(part => part.padStart(3, "0")).join("");
|
|
15
|
+
}
|
|
16
|
+
}
|
|
17
|
+
}
|
|
18
|
+
return "000000000000";
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
export function generateRequestId(): string {
|
|
22
|
+
const now = new Date();
|
|
23
|
+
const dateStr = now.getFullYear().toString() +
|
|
24
|
+
(now.getMonth() + 1).toString().padStart(2, "0") +
|
|
25
|
+
now.getDate().toString().padStart(2, "0") +
|
|
26
|
+
now.getHours().toString().padStart(2, "0") +
|
|
27
|
+
now.getMinutes().toString().padStart(2, "0") +
|
|
28
|
+
now.getSeconds().toString().padStart(2, "0");
|
|
29
|
+
const ipStr = getLocalIP12();
|
|
30
|
+
const msStr = now.getMilliseconds().toString().padStart(3, "0");
|
|
31
|
+
const randStr = Math.floor(Math.random() * 0xFFF).toString(16).toUpperCase().padStart(3, "0");
|
|
32
|
+
return dateStr + ipStr + msStr + randStr;
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
export function containsChinese(text: string): boolean {
|
|
36
|
+
return /[\u4e00-\u9fa5]/.test(text);
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
export function robustExtractLastUserMessage(body: any): { role: string; content: string }[] {
|
|
40
|
+
if (!body || typeof body !== "object") return [];
|
|
41
|
+
|
|
42
|
+
// Handle standard OpenAI-like message format
|
|
43
|
+
if (Array.isArray(body.messages) && body.messages.length > 0) {
|
|
44
|
+
const messages = body.messages as any[];
|
|
45
|
+
const lastMessage = messages[messages.length - 1];
|
|
46
|
+
|
|
47
|
+
// Only extract if the role of the last message is "user"
|
|
48
|
+
if (lastMessage.role !== "user") {
|
|
49
|
+
return [];
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
let content = "";
|
|
53
|
+
if (typeof lastMessage.content === "string") {
|
|
54
|
+
content = lastMessage.content;
|
|
55
|
+
}
|
|
56
|
+
// Handle multi-modal content array (e.g. [{ type: "text", text: "..." }])
|
|
57
|
+
else if (Array.isArray(lastMessage.content)) {
|
|
58
|
+
content = lastMessage.content
|
|
59
|
+
.filter((part: any) => part.type === "text" && typeof part.text === "string")
|
|
60
|
+
.map((part: any) => part.text)
|
|
61
|
+
.join("\n");
|
|
62
|
+
}
|
|
63
|
+
|
|
64
|
+
if (content.length > 0) {
|
|
65
|
+
return [{
|
|
66
|
+
role: "user",
|
|
67
|
+
content: content,
|
|
68
|
+
}];
|
|
69
|
+
}
|
|
70
|
+
return [];
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
// Handle other possible formats (e.g. anthropic, or top-level prompt)
|
|
74
|
+
if (typeof body.prompt === "string") return [{ role: "user", content: body.prompt }];
|
|
75
|
+
if (typeof body.input === "string") return [{ role: "user", content: body.input }];
|
|
76
|
+
|
|
77
|
+
return [];
|
|
78
|
+
}
|