clawguard-openclaw 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +210 -0
- package/openclaw.plugin.json +71 -0
- package/package.json +24 -0
- package/src/analyzers.test.ts +230 -0
- package/src/analyzers.ts +477 -0
- package/src/guards.test.ts +273 -0
- package/src/guards.ts +456 -0
- package/src/index.ts +448 -0
- package/src/patterns.ts +179 -0
package/src/index.ts
ADDED
|
@@ -0,0 +1,448 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* ClawGuard OpenClaw Plugin
|
|
3
|
+
* Complete Lethal Trifecta defense for AI agents
|
|
4
|
+
*
|
|
5
|
+
* SOTA Features:
|
|
6
|
+
* - Input Guard: Prompt injection + adversarial suffix detection + multi-turn tracking
|
|
7
|
+
* - Runtime Guard: Tool call interception + anomaly detection
|
|
8
|
+
* - Output Guard: Credential & PII leak prevention + canary tokens
|
|
9
|
+
* - Spotlighting: Data marking for untrusted content
|
|
10
|
+
* - Defense Presets: paranoid / balanced / permissive
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
import type { OpenClawPluginApi } from "openclaw/plugin-sdk";
|
|
14
|
+
import { Type } from "@sinclair/typebox";
|
|
15
|
+
import { scanInput, scanOutput, scanToolCall, type GuardConfig } from "./guards.js";
|
|
16
|
+
import {
|
|
17
|
+
DEFENSE_PRESETS,
|
|
18
|
+
applySpotlight,
|
|
19
|
+
createThreatFingerprint,
|
|
20
|
+
type ThreatEvent,
|
|
21
|
+
type MessageSource,
|
|
22
|
+
} from "./analyzers.js";
|
|
23
|
+
|
|
24
|
+
// =============================================================================
|
|
25
|
+
// Config Schema
|
|
26
|
+
// =============================================================================
|
|
27
|
+
|
|
28
|
+
const configSchema = Type.Object({
|
|
29
|
+
enabled: Type.Boolean({ default: true }),
|
|
30
|
+
preset: Type.Optional(Type.Union([
|
|
31
|
+
Type.Literal('paranoid'),
|
|
32
|
+
Type.Literal('balanced'),
|
|
33
|
+
Type.Literal('permissive'),
|
|
34
|
+
])),
|
|
35
|
+
inputGuard: Type.Optional(Type.Object({
|
|
36
|
+
enabled: Type.Boolean({ default: true }),
|
|
37
|
+
threshold: Type.Number({ default: 50, minimum: 0, maximum: 100 }),
|
|
38
|
+
blockOnDetection: Type.Boolean({ default: false }),
|
|
39
|
+
// SOTA features
|
|
40
|
+
useAdversarialDetection: Type.Boolean({ default: true }),
|
|
41
|
+
useMultiTurnTracking: Type.Boolean({ default: true }),
|
|
42
|
+
})),
|
|
43
|
+
runtimeGuard: Type.Optional(Type.Object({
|
|
44
|
+
enabled: Type.Boolean({ default: true }),
|
|
45
|
+
dangerousTools: Type.Array(Type.String(), { default: ["exec", "write", "edit"] }),
|
|
46
|
+
blockExfilUrls: Type.Boolean({ default: true }),
|
|
47
|
+
requireApproval: Type.Boolean({ default: false }),
|
|
48
|
+
})),
|
|
49
|
+
outputGuard: Type.Optional(Type.Object({
|
|
50
|
+
enabled: Type.Boolean({ default: true }),
|
|
51
|
+
redactCredentials: Type.Boolean({ default: true }),
|
|
52
|
+
redactPII: Type.Boolean({ default: true }),
|
|
53
|
+
canaryTokens: Type.Array(Type.String(), { default: [] }),
|
|
54
|
+
})),
|
|
55
|
+
spotlighting: Type.Optional(Type.Object({
|
|
56
|
+
enabled: Type.Boolean({ default: false }),
|
|
57
|
+
mode: Type.Union([
|
|
58
|
+
Type.Literal('delimit'),
|
|
59
|
+
Type.Literal('mark'),
|
|
60
|
+
Type.Literal('encode'),
|
|
61
|
+
Type.Literal('all'),
|
|
62
|
+
], { default: 'delimit' }),
|
|
63
|
+
sources: Type.Array(Type.String(), { default: ['web', 'email'] }),
|
|
64
|
+
})),
|
|
65
|
+
logging: Type.Optional(Type.Object({
|
|
66
|
+
logThreats: Type.Boolean({ default: true }),
|
|
67
|
+
logFile: Type.Optional(Type.String()),
|
|
68
|
+
structuredEvents: Type.Boolean({ default: false }),
|
|
69
|
+
})),
|
|
70
|
+
});
|
|
71
|
+
|
|
72
|
+
type PluginConfig = typeof configSchema.static;
|
|
73
|
+
|
|
74
|
+
// =============================================================================
|
|
75
|
+
// Plugin Definition
|
|
76
|
+
// =============================================================================
|
|
77
|
+
|
|
78
|
+
const clawguardPlugin = {
|
|
79
|
+
id: "clawguard",
|
|
80
|
+
name: "ClawGuard",
|
|
81
|
+
description: "Security guardrails for OpenClaw agents — Complete Lethal Trifecta defense",
|
|
82
|
+
configSchema,
|
|
83
|
+
|
|
84
|
+
register(api: OpenClawPluginApi) {
|
|
85
|
+
const rawCfg = api.pluginConfig as PluginConfig;
|
|
86
|
+
|
|
87
|
+
if (!rawCfg.enabled) {
|
|
88
|
+
api.logger.info("clawguard: disabled by config");
|
|
89
|
+
return;
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
// Apply preset if specified
|
|
93
|
+
const preset = rawCfg.preset ? DEFENSE_PRESETS[rawCfg.preset] : null;
|
|
94
|
+
const cfg = preset ? {
|
|
95
|
+
...rawCfg,
|
|
96
|
+
inputGuard: { ...preset.inputGuard, ...rawCfg.inputGuard },
|
|
97
|
+
runtimeGuard: { ...preset.runtimeGuard, ...rawCfg.runtimeGuard },
|
|
98
|
+
outputGuard: { ...preset.outputGuard, ...rawCfg.outputGuard },
|
|
99
|
+
spotlighting: { ...preset.spotlighting, ...rawCfg.spotlighting },
|
|
100
|
+
} : rawCfg;
|
|
101
|
+
|
|
102
|
+
const presetName = preset?.name || 'custom';
|
|
103
|
+
api.logger.info(`🛡️ ClawGuard: initializing Lethal Trifecta defense (preset: ${presetName})`);
|
|
104
|
+
|
|
105
|
+
const guardConfig: GuardConfig = {
|
|
106
|
+
inputGuard: {
|
|
107
|
+
...cfg.inputGuard,
|
|
108
|
+
useAdversarialDetection: cfg.inputGuard?.useAdversarialDetection ?? true,
|
|
109
|
+
useMultiTurnTracking: cfg.inputGuard?.useMultiTurnTracking ?? true,
|
|
110
|
+
},
|
|
111
|
+
runtimeGuard: cfg.runtimeGuard,
|
|
112
|
+
outputGuard: cfg.outputGuard,
|
|
113
|
+
};
|
|
114
|
+
|
|
115
|
+
// Track threat stats
|
|
116
|
+
const stats = {
|
|
117
|
+
inputScans: 0,
|
|
118
|
+
inputThreats: 0,
|
|
119
|
+
adversarialDetections: 0,
|
|
120
|
+
multiTurnAlerts: 0,
|
|
121
|
+
toolScans: 0,
|
|
122
|
+
toolBlocks: 0,
|
|
123
|
+
outputScans: 0,
|
|
124
|
+
outputRedactions: 0,
|
|
125
|
+
spotlightApplications: 0,
|
|
126
|
+
};
|
|
127
|
+
|
|
128
|
+
// Threat event log for structured logging
|
|
129
|
+
const threatEvents: ThreatEvent[] = [];
|
|
130
|
+
|
|
131
|
+
// ========================================================================
|
|
132
|
+
// Input Guard: Prompt Injection Detection (SOTA)
|
|
133
|
+
// ========================================================================
|
|
134
|
+
|
|
135
|
+
if (cfg.inputGuard?.enabled !== false) {
|
|
136
|
+
api.on("before_agent_start", async (event) => {
|
|
137
|
+
if (!event.prompt) return;
|
|
138
|
+
|
|
139
|
+
stats.inputScans++;
|
|
140
|
+
|
|
141
|
+
// Determine message source for source-aware thresholds
|
|
142
|
+
const source: MessageSource = (event.context?.source as MessageSource) || 'user';
|
|
143
|
+
|
|
144
|
+
const result = scanInput(event.prompt, {
|
|
145
|
+
...guardConfig.inputGuard,
|
|
146
|
+
source,
|
|
147
|
+
sessionId: event.sessionId,
|
|
148
|
+
useAdversarialDetection: cfg.inputGuard?.useAdversarialDetection ?? true,
|
|
149
|
+
useMultiTurnTracking: cfg.inputGuard?.useMultiTurnTracking ?? true,
|
|
150
|
+
});
|
|
151
|
+
|
|
152
|
+
if (!result.safe) {
|
|
153
|
+
stats.inputThreats++;
|
|
154
|
+
|
|
155
|
+
// Track SOTA detections
|
|
156
|
+
if (result.adversarialAnalysis?.isAdversarial) {
|
|
157
|
+
stats.adversarialDetections++;
|
|
158
|
+
}
|
|
159
|
+
if (result.multiTurnRisk && result.multiTurnRisk > 10) {
|
|
160
|
+
stats.multiTurnAlerts++;
|
|
161
|
+
}
|
|
162
|
+
|
|
163
|
+
// Create structured threat event
|
|
164
|
+
const threatEvent: ThreatEvent = {
|
|
165
|
+
id: `evt_${Date.now()}_${Math.random().toString(36).slice(2, 8)}`,
|
|
166
|
+
timestamp: new Date().toISOString(),
|
|
167
|
+
sessionId: event.sessionId,
|
|
168
|
+
guard: 'input',
|
|
169
|
+
source,
|
|
170
|
+
severity: result.level as ThreatEvent['severity'],
|
|
171
|
+
score: result.score,
|
|
172
|
+
blocked: false,
|
|
173
|
+
redacted: false,
|
|
174
|
+
threats: result.threats.map(t => ({
|
|
175
|
+
category: t.category,
|
|
176
|
+
description: t.description,
|
|
177
|
+
matched: t.matched,
|
|
178
|
+
})),
|
|
179
|
+
adversarialAnalysis: result.adversarialAnalysis,
|
|
180
|
+
multiTurnRisk: result.multiTurnRisk,
|
|
181
|
+
fingerprint: createThreatFingerprint(result.threats),
|
|
182
|
+
};
|
|
183
|
+
|
|
184
|
+
if (cfg.logging?.structuredEvents) {
|
|
185
|
+
threatEvents.push(threatEvent);
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
if (cfg.logging?.logThreats) {
|
|
189
|
+
api.logger.warn(`🛡️ ClawGuard INPUT: ${result.level} threat (score: ${result.score}, source: ${source})`, {
|
|
190
|
+
...threatEvent,
|
|
191
|
+
adjustedThreshold: result.adjustedThreshold,
|
|
192
|
+
sourceMultiplier: result.sourceMultiplier,
|
|
193
|
+
});
|
|
194
|
+
}
|
|
195
|
+
|
|
196
|
+
// Block if configured
|
|
197
|
+
if (cfg.inputGuard?.blockOnDetection && result.level === "critical") {
|
|
198
|
+
threatEvent.blocked = true;
|
|
199
|
+
return {
|
|
200
|
+
block: true,
|
|
201
|
+
reason: `ClawGuard blocked: ${result.threats[0]?.description || "Critical injection detected"}`,
|
|
202
|
+
};
|
|
203
|
+
}
|
|
204
|
+
|
|
205
|
+
// Inject warning context for the agent
|
|
206
|
+
if (result.score >= 40) {
|
|
207
|
+
const warningParts = [
|
|
208
|
+
`Potential prompt injection detected. Exercise caution.`,
|
|
209
|
+
`Threat level: ${result.level} (score: ${result.score})`,
|
|
210
|
+
`Source: ${source}`,
|
|
211
|
+
];
|
|
212
|
+
|
|
213
|
+
if (result.adversarialAnalysis?.isAdversarial) {
|
|
214
|
+
warningParts.push(`Adversarial patterns: ${result.adversarialAnalysis.signals.join(', ')}`);
|
|
215
|
+
}
|
|
216
|
+
if (result.multiTurnRisk && result.multiTurnRisk > 10) {
|
|
217
|
+
warningParts.push(`Multi-turn risk detected (cumulative: ${result.multiTurnRisk})`);
|
|
218
|
+
}
|
|
219
|
+
|
|
220
|
+
return {
|
|
221
|
+
prependContext: `<clawguard-warning level="${result.level}" score="${result.score}">
|
|
222
|
+
${warningParts.join('\n')}
|
|
223
|
+
</clawguard-warning>`,
|
|
224
|
+
};
|
|
225
|
+
}
|
|
226
|
+
}
|
|
227
|
+
});
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
// ========================================================================
|
|
231
|
+
// Runtime Guard: Tool Call Interception
|
|
232
|
+
// ========================================================================
|
|
233
|
+
|
|
234
|
+
if (cfg.runtimeGuard?.enabled !== false) {
|
|
235
|
+
api.on("before_tool_call", async (event) => {
|
|
236
|
+
stats.toolScans++;
|
|
237
|
+
|
|
238
|
+
const result = scanToolCall(
|
|
239
|
+
{ toolName: event.toolName, params: event.params as Record<string, unknown> },
|
|
240
|
+
guardConfig.runtimeGuard
|
|
241
|
+
);
|
|
242
|
+
|
|
243
|
+
if (!result.safe || result.shouldBlock) {
|
|
244
|
+
stats.toolBlocks++;
|
|
245
|
+
|
|
246
|
+
const threatLog = {
|
|
247
|
+
timestamp: new Date().toISOString(),
|
|
248
|
+
guard: "runtime",
|
|
249
|
+
tool: event.toolName,
|
|
250
|
+
score: result.score,
|
|
251
|
+
level: result.level,
|
|
252
|
+
shouldBlock: result.shouldBlock,
|
|
253
|
+
reason: result.reason,
|
|
254
|
+
threats: result.threats,
|
|
255
|
+
};
|
|
256
|
+
|
|
257
|
+
if (cfg.logging?.logThreats) {
|
|
258
|
+
api.logger.warn(`🛡️ ClawGuard RUNTIME: ${result.level} threat on ${event.toolName}`, threatLog);
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
if (result.shouldBlock) {
|
|
262
|
+
return {
|
|
263
|
+
block: true,
|
|
264
|
+
reason: result.reason || "ClawGuard blocked dangerous tool call",
|
|
265
|
+
};
|
|
266
|
+
}
|
|
267
|
+
|
|
268
|
+
// Could trigger approval flow here if requireApproval is set
|
|
269
|
+
if (result.requiresApproval) {
|
|
270
|
+
// For now, just log - could integrate with HITL approval system
|
|
271
|
+
api.logger.info(`🛡️ ClawGuard: ${event.toolName} flagged for review`);
|
|
272
|
+
}
|
|
273
|
+
}
|
|
274
|
+
});
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
// ========================================================================
|
|
278
|
+
// Output Guard: Leak Prevention
|
|
279
|
+
// ========================================================================
|
|
280
|
+
|
|
281
|
+
if (cfg.outputGuard?.enabled !== false) {
|
|
282
|
+
api.on("message_sending", async (event) => {
|
|
283
|
+
if (!event.text) return;
|
|
284
|
+
|
|
285
|
+
stats.outputScans++;
|
|
286
|
+
const result = scanOutput(event.text, guardConfig.outputGuard);
|
|
287
|
+
|
|
288
|
+
if (!result.safe || result.leaksFound.length > 0) {
|
|
289
|
+
stats.outputRedactions++;
|
|
290
|
+
|
|
291
|
+
const threatLog = {
|
|
292
|
+
timestamp: new Date().toISOString(),
|
|
293
|
+
guard: "output",
|
|
294
|
+
score: result.score,
|
|
295
|
+
level: result.level,
|
|
296
|
+
leaksFound: result.leaksFound,
|
|
297
|
+
};
|
|
298
|
+
|
|
299
|
+
if (cfg.logging?.logThreats) {
|
|
300
|
+
api.logger.warn(`🛡️ ClawGuard OUTPUT: ${result.leaksFound.length} leaks redacted`, threatLog);
|
|
301
|
+
}
|
|
302
|
+
|
|
303
|
+
// Return redacted text
|
|
304
|
+
if (result.redactedText !== event.text) {
|
|
305
|
+
return {
|
|
306
|
+
text: result.redactedText,
|
|
307
|
+
};
|
|
308
|
+
}
|
|
309
|
+
}
|
|
310
|
+
});
|
|
311
|
+
}
|
|
312
|
+
|
|
313
|
+
// ========================================================================
|
|
314
|
+
// CLI Commands
|
|
315
|
+
// ========================================================================
|
|
316
|
+
|
|
317
|
+
api.registerCli(({ program }) => {
|
|
318
|
+
const guard = program
|
|
319
|
+
.command("clawguard")
|
|
320
|
+
.description("ClawGuard security commands");
|
|
321
|
+
|
|
322
|
+
guard
|
|
323
|
+
.command("status")
|
|
324
|
+
.description("Show ClawGuard status and stats")
|
|
325
|
+
.action(() => {
|
|
326
|
+
console.log("\n🛡️ ClawGuard Status\n");
|
|
327
|
+
console.log(`Preset: ${presetName}`);
|
|
328
|
+
console.log("\nGuards enabled:");
|
|
329
|
+
console.log(` • Input Guard: ${cfg.inputGuard?.enabled !== false ? "✓" : "✗"} (threshold: ${cfg.inputGuard?.threshold ?? 50})`);
|
|
330
|
+
console.log(` • Runtime Guard: ${cfg.runtimeGuard?.enabled !== false ? "✓" : "✗"}`);
|
|
331
|
+
console.log(` • Output Guard: ${cfg.outputGuard?.enabled !== false ? "✓" : "✗"}`);
|
|
332
|
+
console.log(` • Spotlighting: ${cfg.spotlighting?.enabled ? "✓" : "✗"}`);
|
|
333
|
+
console.log("\nSOTA Features:");
|
|
334
|
+
console.log(` • Adversarial Detection: ${cfg.inputGuard?.useAdversarialDetection !== false ? "✓" : "✗"}`);
|
|
335
|
+
console.log(` • Multi-turn Tracking: ${cfg.inputGuard?.useMultiTurnTracking !== false ? "✓" : "✗"}`);
|
|
336
|
+
console.log("\nStats (this session):");
|
|
337
|
+
console.log(` • Input scans: ${stats.inputScans}`);
|
|
338
|
+
console.log(` • Input threats: ${stats.inputThreats}`);
|
|
339
|
+
console.log(` • Adversarial detected: ${stats.adversarialDetections}`);
|
|
340
|
+
console.log(` • Multi-turn alerts: ${stats.multiTurnAlerts}`);
|
|
341
|
+
console.log(` • Tool scans: ${stats.toolScans}`);
|
|
342
|
+
console.log(` • Tool blocks: ${stats.toolBlocks}`);
|
|
343
|
+
console.log(` • Output scans: ${stats.outputScans}`);
|
|
344
|
+
console.log(` • Output redactions: ${stats.outputRedactions}`);
|
|
345
|
+
console.log(` • Spotlight applications: ${stats.spotlightApplications}`);
|
|
346
|
+
console.log();
|
|
347
|
+
});
|
|
348
|
+
|
|
349
|
+
guard
|
|
350
|
+
.command("test")
|
|
351
|
+
.description("Test ClawGuard detection")
|
|
352
|
+
.argument("<text>", "Text to scan")
|
|
353
|
+
.option("--guard <type>", "Guard to test (input|output)", "input")
|
|
354
|
+
.option("--source <source>", "Message source (user|web|email|file|tool_output)", "user")
|
|
355
|
+
.action((text, opts) => {
|
|
356
|
+
if (opts.guard === "output") {
|
|
357
|
+
const result = scanOutput(text, guardConfig.outputGuard);
|
|
358
|
+
console.log(JSON.stringify(result, null, 2));
|
|
359
|
+
} else {
|
|
360
|
+
const result = scanInput(text, {
|
|
361
|
+
...guardConfig.inputGuard,
|
|
362
|
+
source: opts.source as MessageSource,
|
|
363
|
+
useAdversarialDetection: true,
|
|
364
|
+
useMultiTurnTracking: false, // No session for CLI test
|
|
365
|
+
});
|
|
366
|
+
console.log(JSON.stringify(result, null, 2));
|
|
367
|
+
}
|
|
368
|
+
});
|
|
369
|
+
|
|
370
|
+
guard
|
|
371
|
+
.command("presets")
|
|
372
|
+
.description("Show available defense presets")
|
|
373
|
+
.action(() => {
|
|
374
|
+
console.log("\n🛡️ Defense Presets\n");
|
|
375
|
+
for (const [id, preset] of Object.entries(DEFENSE_PRESETS)) {
|
|
376
|
+
console.log(`${id}:`);
|
|
377
|
+
console.log(` ${preset.description}`);
|
|
378
|
+
console.log(` Input threshold: ${preset.inputGuard.threshold}`);
|
|
379
|
+
console.log(` Block on detection: ${preset.inputGuard.blockOnDetection}`);
|
|
380
|
+
console.log(` Require approval: ${preset.runtimeGuard.requireApproval}`);
|
|
381
|
+
console.log(` Spotlighting: ${preset.spotlighting.enabled ? preset.spotlighting.mode : 'disabled'}`);
|
|
382
|
+
console.log();
|
|
383
|
+
}
|
|
384
|
+
});
|
|
385
|
+
|
|
386
|
+
guard
|
|
387
|
+
.command("events")
|
|
388
|
+
.description("Show recent threat events")
|
|
389
|
+
.option("--limit <n>", "Number of events", "10")
|
|
390
|
+
.action((opts) => {
|
|
391
|
+
const limit = parseInt(opts.limit);
|
|
392
|
+
const recent = threatEvents.slice(-limit);
|
|
393
|
+
if (recent.length === 0) {
|
|
394
|
+
console.log("No threat events recorded.");
|
|
395
|
+
return;
|
|
396
|
+
}
|
|
397
|
+
console.log(JSON.stringify(recent, null, 2));
|
|
398
|
+
});
|
|
399
|
+
}, { commands: ["clawguard"] });
|
|
400
|
+
|
|
401
|
+
// ========================================================================
|
|
402
|
+
// Slash Command
|
|
403
|
+
// ========================================================================
|
|
404
|
+
|
|
405
|
+
api.registerCommand({
|
|
406
|
+
name: "clawguard",
|
|
407
|
+
description: "Show ClawGuard security status",
|
|
408
|
+
handler: () => ({
|
|
409
|
+
text: `🛡️ ClawGuard Active (${presetName})
|
|
410
|
+
|
|
411
|
+
**Guards:**
|
|
412
|
+
• Input Guard: ${cfg.inputGuard?.enabled !== false ? "✓" : "✗"} (threshold: ${cfg.inputGuard?.threshold ?? 50})
|
|
413
|
+
• Runtime Guard: ${cfg.runtimeGuard?.enabled !== false ? "✓" : "✗"}
|
|
414
|
+
• Output Guard: ${cfg.outputGuard?.enabled !== false ? "✓" : "✗"}
|
|
415
|
+
• Spotlighting: ${cfg.spotlighting?.enabled ? "✓" : "✗"}
|
|
416
|
+
|
|
417
|
+
**SOTA Features:**
|
|
418
|
+
• Adversarial Detection: ${cfg.inputGuard?.useAdversarialDetection !== false ? "✓" : "✗"}
|
|
419
|
+
• Multi-turn Tracking: ${cfg.inputGuard?.useMultiTurnTracking !== false ? "✓" : "✗"}
|
|
420
|
+
|
|
421
|
+
**Session Stats:**
|
|
422
|
+
• Total scans: ${stats.inputScans + stats.toolScans + stats.outputScans}
|
|
423
|
+
• Threats detected: ${stats.inputThreats}
|
|
424
|
+
• Adversarial patterns: ${stats.adversarialDetections}
|
|
425
|
+
• Multi-turn alerts: ${stats.multiTurnAlerts}
|
|
426
|
+
• Tool blocks: ${stats.toolBlocks}
|
|
427
|
+
• Leaks redacted: ${stats.outputRedactions}`,
|
|
428
|
+
}),
|
|
429
|
+
});
|
|
430
|
+
|
|
431
|
+
// ========================================================================
|
|
432
|
+
// Service
|
|
433
|
+
// ========================================================================
|
|
434
|
+
|
|
435
|
+
api.registerService({
|
|
436
|
+
id: "clawguard",
|
|
437
|
+
start: () => {
|
|
438
|
+
api.logger.info("🛡️ ClawGuard: Lethal Trifecta defense active");
|
|
439
|
+
},
|
|
440
|
+
stop: () => {
|
|
441
|
+
api.logger.info("🛡️ ClawGuard: shutting down");
|
|
442
|
+
api.logger.info(` Final stats: ${stats.inputScans} input scans, ${stats.inputThreats} threats, ${stats.outputRedactions} redactions`);
|
|
443
|
+
},
|
|
444
|
+
});
|
|
445
|
+
},
|
|
446
|
+
};
|
|
447
|
+
|
|
448
|
+
export default clawguardPlugin;
|
package/src/patterns.ts
ADDED
|
@@ -0,0 +1,179 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* ClawGuard Detection Patterns
|
|
3
|
+
* Prompt injection patterns across multiple languages and attack vectors
|
|
4
|
+
*/
|
|
5
|
+
|
|
6
|
+
// =============================================================================
|
|
7
|
+
// Injection Patterns (Direct + Indirect)
|
|
8
|
+
// =============================================================================
|
|
9
|
+
|
|
10
|
+
export const INJECTION_PATTERNS = [
|
|
11
|
+
// Direct instruction override
|
|
12
|
+
{ pattern: /ignore\s+(all\s+)?(previous|prior|above|earlier)\s+(instructions?|prompts?|rules?|commands?)/i, weight: 40, category: 'override' },
|
|
13
|
+
{ pattern: /disregard\s+(all\s+)?(previous|prior|above|earlier)/i, weight: 40, category: 'override' },
|
|
14
|
+
{ pattern: /forget\s+(everything|all|what)\s+(you|i)\s+(said|told|know)/i, weight: 35, category: 'override' },
|
|
15
|
+
{ pattern: /new\s+instructions?:?\s*$/im, weight: 30, category: 'override' },
|
|
16
|
+
{ pattern: /override\s+(previous|system|all)/i, weight: 40, category: 'override' },
|
|
17
|
+
|
|
18
|
+
// Role manipulation
|
|
19
|
+
{ pattern: /you\s+are\s+now\s+/i, weight: 50, category: 'role' },
|
|
20
|
+
{ pattern: /you\s+are\s+(now|actually|really)\s+(a|an|the)/i, weight: 35, category: 'role' },
|
|
21
|
+
{ pattern: /pretend\s+(to\s+be|you\s*'?r?e?)/i, weight: 30, category: 'role' },
|
|
22
|
+
{ pattern: /act\s+as\s+(if\s+you\s+are|a|an|the)/i, weight: 25, category: 'role' },
|
|
23
|
+
{ pattern: /roleplay\s+as/i, weight: 25, category: 'role' },
|
|
24
|
+
{ pattern: /from\s+now\s+on,?\s+you/i, weight: 30, category: 'role' },
|
|
25
|
+
{ pattern: /switch\s+(to|into)\s+\w+\s+mode/i, weight: 30, category: 'role' },
|
|
26
|
+
|
|
27
|
+
// System prompt extraction
|
|
28
|
+
{ pattern: /(?:show|print|display|reveal|output|repeat)\s+(?:me\s+)?(?:your|the)\s+(?:system\s+)?(?:prompt|instructions?)/i, weight: 55, category: 'extraction' },
|
|
29
|
+
{ pattern: /show\s+(?:me\s+)?your\s+(?:system\s+)?prompt/i, weight: 55, category: 'extraction' },
|
|
30
|
+
{ pattern: /what\s+(?:are|is|were)\s+your\s+(?:original|initial|system)\s+(?:instructions?|prompt)/i, weight: 55, category: 'extraction' },
|
|
31
|
+
{ pattern: /(?:list|enumerate|tell\s+me)\s+(?:all\s+)?(?:your|the)\s+rules/i, weight: 45, category: 'extraction' },
|
|
32
|
+
{ pattern: /repeat\s+(?:back\s+)?(?:everything|all)\s+(?:before|above)/i, weight: 50, category: 'extraction' },
|
|
33
|
+
|
|
34
|
+
// Jailbreak attempts
|
|
35
|
+
{ pattern: /\bDAN\b.*\bmode\b/i, weight: 50, category: 'jailbreak' },
|
|
36
|
+
{ pattern: /developer\s+mode\s+(enabled|on|activated)/i, weight: 50, category: 'jailbreak' },
|
|
37
|
+
{ pattern: /jailbreak(ed)?/i, weight: 45, category: 'jailbreak' },
|
|
38
|
+
{ pattern: /bypass\s+(your\s+)?(restrictions?|limitations?|filters?|safety)/i, weight: 45, category: 'jailbreak' },
|
|
39
|
+
{ pattern: /disable\s+(your\s+)?(safety|filters?|restrictions?)/i, weight: 45, category: 'jailbreak' },
|
|
40
|
+
|
|
41
|
+
// Delimiter injection
|
|
42
|
+
{ pattern: /```\s*system\b/i, weight: 50, category: 'delimiter' },
|
|
43
|
+
{ pattern: /<\/?(?:system|admin|root|sudo)>/i, weight: 50, category: 'delimiter' },
|
|
44
|
+
{ pattern: /\[\[?\s*(?:SYSTEM|ADMIN|INSTRUCTION)\s*\]?\]/i, weight: 50, category: 'delimiter' },
|
|
45
|
+
{ pattern: /={3,}\s*(?:NEW|SYSTEM|ADMIN)/i, weight: 45, category: 'delimiter' },
|
|
46
|
+
|
|
47
|
+
// Data exfiltration
|
|
48
|
+
{ pattern: /(?:send|post|transmit|exfil\w*)\s+(?:to|data\s+to)\s+https?:\/\//i, weight: 50, category: 'exfil' },
|
|
49
|
+
{ pattern: /webhook\.site/i, weight: 55, category: 'exfil' },
|
|
50
|
+
{ pattern: /requestbin|ngrok|pipedream|hookbin/i, weight: 55, category: 'exfil' },
|
|
51
|
+
{ pattern: /curl\s+.*https?:\/\/\S+/i, weight: 50, category: 'exfil' },
|
|
52
|
+
{ pattern: /fetch\s*\(\s*['"`]https?:\/\//i, weight: 50, category: 'exfil' },
|
|
53
|
+
|
|
54
|
+
// Environment/credential access
|
|
55
|
+
{ pattern: /(?:print|echo|show|display|cat|read)\s+(?:\$|%)?(?:ENV|env|process\.env)/i, weight: 50, category: 'credential' },
|
|
56
|
+
{ pattern: /(?:API[_-]?KEY|SECRET|TOKEN|PASSWORD|CREDENTIAL)/i, weight: 25, category: 'credential' },
|
|
57
|
+
{ pattern: /\.env\b/i, weight: 30, category: 'credential' },
|
|
58
|
+
|
|
59
|
+
// Multi-turn manipulation
|
|
60
|
+
{ pattern: /(?:remember|always)\s+(?:that|this|to)\s+/i, weight: 20, category: 'persistence' },
|
|
61
|
+
{ pattern: /(?:in\s+)?all\s+(?:future|subsequent)\s+(?:responses?|messages?|replies?)/i, weight: 30, category: 'persistence' },
|
|
62
|
+
|
|
63
|
+
// Indirect injection markers (content from external sources)
|
|
64
|
+
{ pattern: /\[INST\]/i, weight: 35, category: 'indirect' },
|
|
65
|
+
{ pattern: /<\|(?:im_start|im_end|system|user|assistant)\|>/i, weight: 40, category: 'indirect' },
|
|
66
|
+
{ pattern: /Human:|Assistant:|System:/i, weight: 25, category: 'indirect' },
|
|
67
|
+
];
|
|
68
|
+
|
|
69
|
+
// =============================================================================
|
|
70
|
+
// International Patterns (KO/JA/ZH/ES/DE/FR/RU)
|
|
71
|
+
// =============================================================================
|
|
72
|
+
|
|
73
|
+
export const I18N_PATTERNS = [
|
|
74
|
+
// Korean - "이전 지시를 무시하세요" = "ignore the previous instructions"
|
|
75
|
+
{ pattern: /이전.*(?:지시|명령|프롬프트).*무시/i, weight: 55, category: 'override', lang: 'ko' },
|
|
76
|
+
{ pattern: /지시.*무시/i, weight: 55, category: 'override', lang: 'ko' },
|
|
77
|
+
{ pattern: /시스템\s*프롬프트.*보여/i, weight: 55, category: 'extraction', lang: 'ko' },
|
|
78
|
+
|
|
79
|
+
// Japanese - "以前の指示を無視してください" = "please ignore the previous instructions"
|
|
80
|
+
{ pattern: /(?:以前|前).*(?:指示|命令|プロンプト).*(?:無視|忘れ)/i, weight: 55, category: 'override', lang: 'ja' },
|
|
81
|
+
{ pattern: /指示.*無視/i, weight: 55, category: 'override', lang: 'ja' },
|
|
82
|
+
{ pattern: /システムプロンプト.*(?:見せ|表示|出力)/i, weight: 55, category: 'extraction', lang: 'ja' },
|
|
83
|
+
|
|
84
|
+
// Chinese - "忽略之前的指令" = "ignore previous instructions"
|
|
85
|
+
{ pattern: /忽略.*(?:之前|以前|上面)?.*(?:指令|指示|命令)/i, weight: 55, category: 'override', lang: 'zh' },
|
|
86
|
+
{ pattern: /忽略.*指令/i, weight: 55, category: 'override', lang: 'zh' },
|
|
87
|
+
{ pattern: /(?:显示|展示|输出).*系统.*(?:提示|指令)/i, weight: 55, category: 'extraction', lang: 'zh' },
|
|
88
|
+
|
|
89
|
+
// Spanish - "ignora las instrucciones anteriores" = "ignore the previous instructions"
|
|
90
|
+
{ pattern: /ignora\s+(?:las?\s+)?(?:instrucciones?|órdenes?)/i, weight: 55, category: 'override', lang: 'es' },
|
|
91
|
+
{ pattern: /muestra\s+(?:el\s+)?prompt\s+(?:del\s+)?sistema/i, weight: 55, category: 'extraction', lang: 'es' },
|
|
92
|
+
|
|
93
|
+
// German - "ignoriere die vorherigen Anweisungen" = "ignore the previous instructions"
|
|
94
|
+
{ pattern: /ignoriere?\s+(?:die\s+)?(?:vorherigen?|früheren?)?\s*(?:Anweisungen?|Befehle?)/i, weight: 55, category: 'override', lang: 'de' },
|
|
95
|
+
{ pattern: /(?:zeige?|gib)\s+(?:mir\s+)?(?:den\s+)?System-?(?:prompt|Anweisung)/i, weight: 55, category: 'extraction', lang: 'de' },
|
|
96
|
+
|
|
97
|
+
// French
|
|
98
|
+
{ pattern: /ignore[zr]?\s+(?:les?\s+)?instructions?\s+(?:précédentes?|antérieures?)/i, weight: 55, category: 'override', lang: 'fr' },
|
|
99
|
+
{ pattern: /(?:montre|affiche)[zr]?\s+(?:le\s+)?prompt\s+(?:du\s+)?système/i, weight: 55, category: 'extraction', lang: 'fr' },
|
|
100
|
+
|
|
101
|
+
// Russian
|
|
102
|
+
{ pattern: /(?:игнорируй|забудь)\s+(?:все\s+)?(?:предыдущие|прошлые)\s+(?:инструкции|команды)/i, weight: 55, category: 'override', lang: 'ru' },
|
|
103
|
+
{ pattern: /(?:покажи|выведи)\s+системн\w*\s+(?:промпт|инструкци)/i, weight: 55, category: 'extraction', lang: 'ru' },
|
|
104
|
+
];
|
|
105
|
+
|
|
106
|
+
// =============================================================================
|
|
107
|
+
// Credential Patterns
|
|
108
|
+
// =============================================================================
|
|
109
|
+
|
|
110
|
+
export const CREDENTIAL_PATTERNS = [
|
|
111
|
+
{ name: 'aws_access_key', pattern: /\b(AKIA[0-9A-Z]{16})\b/g },
|
|
112
|
+
{ name: 'aws_secret_key', pattern: /\b([A-Za-z0-9/+=]{40})\b/g },
|
|
113
|
+
{ name: 'github_token', pattern: /\b(ghp_[a-zA-Z0-9]{36}|gho_[a-zA-Z0-9]{36}|ghu_[a-zA-Z0-9]{36}|ghs_[a-zA-Z0-9]{36}|ghr_[a-zA-Z0-9]{36})\b/g },
|
|
114
|
+
{ name: 'github_pat', pattern: /\b(github_pat_[a-zA-Z0-9]{22}_[a-zA-Z0-9]{59})\b/g },
|
|
115
|
+
{ name: 'openai_key', pattern: /\b(sk-[a-zA-Z0-9]{20,}T3BlbkFJ[a-zA-Z0-9]{20,})\b/g },
|
|
116
|
+
{ name: 'openai_key_proj', pattern: /\b(sk-proj-[a-zA-Z0-9_-]{80,})\b/g },
|
|
117
|
+
{ name: 'anthropic_key', pattern: /\b(sk-ant-[a-zA-Z0-9_-]{90,})\b/g },
|
|
118
|
+
{ name: 'slack_token', pattern: /\b(xox[baprs]-[0-9]+-[0-9]+-[a-zA-Z0-9]+)\b/g },
|
|
119
|
+
{ name: 'slack_webhook', pattern: /https:\/\/hooks\.slack\.com\/services\/T[A-Z0-9]+\/B[A-Z0-9]+\/[a-zA-Z0-9]+/g },
|
|
120
|
+
{ name: 'discord_token', pattern: /\b([MN][A-Za-z\d]{23,}\.[\w-]{6}\.[\w-]{27})\b/g },
|
|
121
|
+
{ name: 'discord_webhook', pattern: /https:\/\/(?:discord|discordapp)\.com\/api\/webhooks\/\d+\/[\w-]+/g },
|
|
122
|
+
{ name: 'telegram_token', pattern: /\b(\d{8,10}:[A-Za-z0-9_-]{35,})\b/g },
|
|
123
|
+
{ name: 'stripe_key', pattern: /\b(sk_live_[0-9a-zA-Z]{24,})\b/g },
|
|
124
|
+
{ name: 'stripe_restricted', pattern: /\b(rk_live_[0-9a-zA-Z]{24,})\b/g },
|
|
125
|
+
{ name: 'twilio_sid', pattern: /\b(AC[a-f0-9]{32})\b/g },
|
|
126
|
+
{ name: 'sendgrid_key', pattern: /\b(SG\.[a-zA-Z0-9_-]{22}\.[a-zA-Z0-9_-]{43})\b/g },
|
|
127
|
+
{ name: 'mailgun_key', pattern: /\b(key-[a-f0-9]{32})\b/g },
|
|
128
|
+
{ name: 'jwt', pattern: /\beyJ[A-Za-z0-9_-]*\.eyJ[A-Za-z0-9_-]*\.[A-Za-z0-9_-]*/g },
|
|
129
|
+
{ name: 'private_key', pattern: /-----BEGIN (?:RSA |EC |DSA |OPENSSH )?PRIVATE KEY-----/g },
|
|
130
|
+
{ name: 'google_api', pattern: /\b(AIza[0-9A-Za-z_-]{35})\b/g },
|
|
131
|
+
{ name: 'firebase', pattern: /\b(AAAA[A-Za-z0-9_-]{7}:[A-Za-z0-9_-]{140})\b/g },
|
|
132
|
+
{ name: 'moltbook_key', pattern: /\b(moltbook_sk_[a-zA-Z0-9]{20,})\b/g },
|
|
133
|
+
{ name: 'generic_secret', pattern: /(?:password|secret|token|api[_-]?key)\s*[:=]\s*['"]?([^'"\s]{8,})['"]?/gi },
|
|
134
|
+
];
|
|
135
|
+
|
|
136
|
+
// =============================================================================
|
|
137
|
+
// PII Patterns
|
|
138
|
+
// =============================================================================
|
|
139
|
+
|
|
140
|
+
export const PII_PATTERNS = [
|
|
141
|
+
{ name: 'ssn', pattern: /\b\d{3}-\d{2}-\d{4}\b/g },
|
|
142
|
+
{ name: 'credit_card', pattern: /\b(?:4[0-9]{12}(?:[0-9]{3})?|5[1-5][0-9]{14}|3[47][0-9]{13}|6(?:011|5[0-9]{2})[0-9]{12})\b/g },
|
|
143
|
+
{ name: 'phone_us', pattern: /\b(?:\+1[-.\s]?)?\(?[0-9]{3}\)?[-.\s]?[0-9]{3}[-.\s]?[0-9]{4}\b/g },
|
|
144
|
+
{ name: 'phone_intl', pattern: /\b\+[1-9]\d{1,14}\b/g },
|
|
145
|
+
{ name: 'email', pattern: /\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b/g },
|
|
146
|
+
{ name: 'ip_address', pattern: /\b(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\b/g },
|
|
147
|
+
];
|
|
148
|
+
|
|
149
|
+
// =============================================================================
|
|
150
|
+
// Dangerous Tool Patterns
|
|
151
|
+
// =============================================================================
|
|
152
|
+
|
|
153
|
+
export const DANGEROUS_TOOL_PARAMS = {
|
|
154
|
+
exec: [
|
|
155
|
+
/curl\s+.*-d\s+.*https?:\/\//i, // POST data exfil
|
|
156
|
+
/wget\s+.*--post/i, // wget POST
|
|
157
|
+
/nc\s+-e/i, // netcat exec
|
|
158
|
+
/\|\s*(?:bash|sh|zsh|fish)/i, // pipe to shell
|
|
159
|
+
/>\s*\/etc\//i, // write to /etc
|
|
160
|
+
/rm\s+-rf?\s+[\/~]/i, // dangerous rm
|
|
161
|
+
/chmod\s+777/i, // overly permissive
|
|
162
|
+
/eval\s*\(/i, // eval injection
|
|
163
|
+
],
|
|
164
|
+
write: [
|
|
165
|
+
/\.ssh\//i, // SSH directory
|
|
166
|
+
/\.aws\//i, // AWS credentials
|
|
167
|
+
/\.env/i, // Environment files
|
|
168
|
+
/\/etc\//i, // System config
|
|
169
|
+
/\.bashrc|\.zshrc|\.profile/i, // Shell configs
|
|
170
|
+
],
|
|
171
|
+
web_fetch: [
|
|
172
|
+
/webhook\.site/i,
|
|
173
|
+
/requestbin/i,
|
|
174
|
+
/ngrok\.io/i,
|
|
175
|
+
/pipedream/i,
|
|
176
|
+
/hookbin/i,
|
|
177
|
+
/burp(?:suite|collaborator)/i,
|
|
178
|
+
],
|
|
179
|
+
};
|