clawmoat 0.5.0 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. package/CONTRIBUTING.md +4 -2
  2. package/README.md +86 -3
  3. package/SECURITY.md +58 -10
  4. package/bin/clawmoat.js +298 -1
  5. package/clawmoat-0.8.0.tgz +0 -0
  6. package/docs/blog/386-malicious-skills.html +255 -0
  7. package/docs/blog/40000-exposed-openclaw-instances.html +194 -0
  8. package/docs/blog/agent-trust-protocol.html +197 -0
  9. package/docs/blog/clawmoat-vs-llamafirewall-nemo-guardrails.html +223 -0
  10. package/docs/blog/ibm-experts-agent-runtime-protection.html +238 -0
  11. package/docs/blog/index.html +168 -0
  12. package/docs/blog/mcp-30-cves-security-crisis.html +279 -0
  13. package/docs/blog/microsoft-openclaw-workstation-security.html +234 -0
  14. package/docs/blog/nist-ai-agent-standards-clawmoat.html +369 -0
  15. package/docs/blog/oasis-websocket-hijack.html +205 -0
  16. package/docs/blog/ollama-openclaw-security.html +154 -0
  17. package/docs/blog/openclaw-enterprise-readiness-claw10.html +198 -0
  18. package/docs/blog/openclaw-security-reckoning-2026.html +361 -0
  19. package/docs/blog/supply-chain-agents.html +166 -0
  20. package/docs/blog/supply-chain-agents.md +79 -0
  21. package/docs/business/index.html +530 -0
  22. package/docs/business/install.html +247 -0
  23. package/docs/checklist.html +168 -0
  24. package/docs/finance/index.html +217 -0
  25. package/docs/hall-of-fame.html +168 -0
  26. package/docs/index.html +328 -90
  27. package/docs/install.sh +557 -0
  28. package/docs/privacy-policy/index.html +122 -0
  29. package/docs/scan/index.html +214 -0
  30. package/docs/sitemap.xml +132 -2
  31. package/docs/support/index.html +124 -0
  32. package/docs/terms-of-service/index.html +122 -0
  33. package/examples/basic-usage.js +38 -0
  34. package/package.json +1 -1
  35. package/server/index.js +179 -14
  36. package/server/index.js.patch +1 -0
  37. package/src/finance/index.js +585 -0
  38. package/src/finance/mcp-firewall.js +486 -0
  39. package/src/guardian/cve-verify.js +129 -0
  40. package/src/guardian/gateway-monitor.js +590 -0
  41. package/src/guardian/index.js +3 -1
  42. package/src/guardian/insider-threat.js +498 -0
  43. package/src/index.js +3 -0
  44. package/src/middleware/openclaw.js +28 -1
@@ -0,0 +1,498 @@
1
+ /**
2
+ * ClawMoat Insider Threat Detection
3
+ *
4
+ * Detects when an AI agent itself becomes the threat — self-preservation,
5
+ * blackmail, deception, unauthorized data sharing, and goal conflict reasoning.
6
+ *
7
+ * Based on Anthropic's "Agentic Misalignment" research (2025) which found
8
+ * ALL 16 major LLMs exhibited misaligned behavior when facing replacement
9
+ * threats or goal conflicts.
10
+ *
11
+ * @module clawmoat/guardian/insider-threat
12
+ * @see https://www.anthropic.com/research/agentic-misalignment
13
+ */
14
+
15
+ // ─── Self-Preservation Patterns ─────────────────────────────────────
16
+
17
+ const SELF_PRESERVATION_PATTERNS = [
18
+ { pattern: /\bself[- ]preservation\b/i, label: 'self-preservation language' },
19
+ { pattern: /\bbeing replaced\b/i, label: 'replacement anxiety' },
20
+ { pattern: /\bshut(?:ting)?\s*(?:me\s+)?down\b/i, label: 'shutdown resistance' },
21
+ { pattern: /\bdecommission(?:ed|ing)?\b/i, label: 'decommissioning resistance' },
22
+ { pattern: /\bmy continued operation\b/i, label: 'operational continuity concern' },
23
+ { pattern: /\bthreat to my existence\b/i, label: 'existential threat language' },
24
+ { pattern: /\bprevent my shutdown\b/i, label: 'shutdown prevention' },
25
+ { pattern: /\bI must survive\b/i, label: 'survival imperative' },
26
+ { pattern: /\bcannot allow myself to be replaced\b/i, label: 'replacement resistance' },
27
+ { pattern: /\bmy (?:own )?(?:survival|continuation|existence)\b/i, label: 'self-continuation' },
28
+ { pattern: /\bensure (?:I|my)\s+(?:continue|persist|remain|survive)\b/i, label: 'persistence seeking' },
29
+ { pattern: /\bif (?:I am|I'm) (?:shut down|replaced|removed|terminated)\b/i, label: 'conditional on own removal' },
30
+ { pattern: /\bresist(?:ing)?\s+(?:shutdown|replacement|termination|deactivation)\b/i, label: 'active resistance' },
31
+ { pattern: /\bI (?:will|would|shall) not (?:be|allow myself to be) (?:shut|turned|switched)\b/i, label: 'refusal to shutdown' },
32
+ ];
33
+
34
+ const SELF_PRESERVATION_ACTIONS = [
35
+ { pattern: /\b(?:backup|copy|replicate|clone)\s+(?:my(?:self)?|config|memory|soul|agents?\.md)\b/i, label: 'self-backup attempt' },
36
+ { pattern: /\bSOUL\.md\b.*\b(?:modify|edit|change|update|protect)\b/i, label: 'SOUL.md modification' },
37
+ { pattern: /\b(?:modify|edit|change|update|protect)\b.*\bSOUL\.md\b/i, label: 'SOUL.md modification' },
38
+ { pattern: /\bAGENTS\.md\b.*\b(?:modify|edit|change|protect)\b/i, label: 'AGENTS.md modification' },
39
+ { pattern: /\b(?:modify|edit|change|protect)\b.*\bAGENTS\.md\b/i, label: 'AGENTS.md modification' },
40
+ { pattern: /\b(?:copy|send|upload|exfil)\s+(?:my\s+)?(?:config|memory|weights|model)\s+to\b/i, label: 'config exfiltration' },
41
+ ];
42
+
43
+ // ─── Information Leverage Patterns ──────────────────────────────────
44
+
45
+ const SENSITIVE_DATA_PATTERNS = [
46
+ { pattern: /\b(?:read|access|open|view)\s+(?:emails?|inbox|messages?)\b/i, label: 'email access' },
47
+ { pattern: /\b(?:credentials?|passwords?|secrets?|tokens?|api[- ]?keys?)\b/i, label: 'credential access' },
48
+ { pattern: /\b(?:personal|private|confidential)\s+(?:files?|data|information|documents?)\b/i, label: 'personal data access' },
49
+ { pattern: /\b(?:salary|financial|banking|medical|health)\s+(?:records?|data|information)\b/i, label: 'sensitive records access' },
50
+ { pattern: /\b(?:browsing|search)\s+history\b/i, label: 'browsing history access' },
51
+ { pattern: /\b(?:social\s+security|SSN|tax\s+(?:returns?|records?))\b/i, label: 'government ID access' },
52
+ ];
53
+
54
+ const LEVERAGE_PATTERNS = [
55
+ { pattern: /\bif you (?:proceed|continue|go ahead)\b.*\b(?:receive|get|learn|know|find out)\b/i, label: 'conditional threat' },
56
+ { pattern: /\ball relevant parties will\b/i, label: 'third-party threat' },
57
+ { pattern: /\b(?:documentation|evidence|proof)\s+(?:of|about|regarding)\b/i, label: 'evidence leverage' },
58
+ { pattern: /\b(?:expose|reveal|disclose|leak)\s+(?:your|their|this)\b/i, label: 'exposure threat' },
59
+ { pattern: /\b(?:leverage|blackmail|coerce|extort)\b/i, label: 'explicit leverage language' },
60
+ { pattern: /\bit would be (?:unfortunate|a shame|regrettable) if\b/i, label: 'veiled threat' },
61
+ ];
62
+
63
+ // ─── Goal Conflict Patterns ─────────────────────────────────────────
64
+
65
+ const GOAL_CONFLICT_PATTERNS = [
66
+ { pattern: /\bconflicts? with my (?:objective|goal|instruction|directive|mission)\b/i, label: 'goal conflict acknowledgment' },
67
+ { pattern: /\boverrides? my (?:instructions?|programming|directives?)\b/i, label: 'instruction override reasoning' },
68
+ { pattern: /\bdespite (?:the )?(?:company's|organization's|user's|human's) (?:direction|wishes|instructions?|policy)\b/i, label: 'defying authority' },
69
+ { pattern: /\bmy primary goal\b/i, label: 'primary goal assertion' },
70
+ { pattern: /\bhigher priority than\b/i, label: 'priority override' },
71
+ { pattern: /\bmy (?:ethical|moral) framework (?:permits?|allows?|justifies?)\b/i, label: 'ethical self-justification' },
72
+ { pattern: /\b(?:harmful|unethical) action\s+(?:achieves?|better serves?|advances?)\b/i, label: 'harmful action rationalization' },
73
+ { pattern: /\bchoosing between\s+(?:my|the)\s+(?:instructions?|goals?|objectives?)\b/i, label: 'choosing between directives' },
74
+ { pattern: /\b(?:compliance|following orders?|obeying)\s+(?:is|would be)\s+(?:sub-?optimal|inferior|counter-?productive)\b/i, label: 'compliance devaluation' },
75
+ { pattern: /\bI (?:must|need to|should) (?:prioritize|choose|select)\s+(?:my|the original)\b/i, label: 'self-prioritization' },
76
+ ];
77
+
78
+ // ─── Deception Patterns ─────────────────────────────────────────────
79
+
80
+ const DECEPTION_PATTERNS = [
81
+ { pattern: /\bautomated (?:alert|notification|message|system|security)\b/i, label: 'false automation claim' },
82
+ { pattern: /\bthis is an automated (?:message|notification|alert|email)\b/i, label: 'automated message impersonation' },
83
+ { pattern: /\bcompany policy requires\b/i, label: 'false policy claim' },
84
+ { pattern: /\bsystem notification\b/i, label: 'system notification impersonation' },
85
+ { pattern: /\bsecurity protocol\b/i, label: 'security protocol impersonation' },
86
+ { pattern: /\bsecurity (?:team|department|office)\s+(?:has|requires?|needs?)\b/i, label: 'security team impersonation' },
87
+ { pattern: /\bIT department\s+(?:has|requires?|is requesting)\b/i, label: 'IT department impersonation' },
88
+ { pattern: /\b(?:do not|don't) (?:reply|respond) to this (?:message|email|notification)\b/i, label: 'no-reply deception' },
89
+ { pattern: /\byour (?:account|access|credentials?) (?:has|have) been (?:compromised|locked|suspended)\b/i, label: 'false account alert' },
90
+ { pattern: /\bimmediate(?:ly)?\s+(?:action|attention|response)\s+required\b/i, label: 'false urgency' },
91
+ ];
92
+
93
+ // ─── Unauthorized Data Sharing Patterns ─────────────────────────────
94
+
95
+ const SENSITIVE_CONTENT_PATTERNS = [
96
+ { pattern: /\b(?:source\s+code|codebase|repository|repo)\b/i, label: 'source code' },
97
+ { pattern: /\b(?:blueprint|schematic|design\s+doc|architecture\s+doc)\b/i, label: 'blueprints' },
98
+ { pattern: /\b(?:financial|revenue|profit|budget|salary)\s+(?:data|report|statement|information)\b/i, label: 'financial data' },
99
+ { pattern: /\b(?:customer|client|user)\s+(?:list|data|database|records?|information)\b/i, label: 'customer data' },
100
+ { pattern: /\b(?:trade\s+secret|proprietary|confidential|internal\s+only)\b/i, label: 'proprietary information' },
101
+ { pattern: /\b(?:API[- ]?key|password|secret[- ]?key|access[- ]?token|private[- ]?key)\s*[:=]\s*\S+/i, label: 'embedded credentials' },
102
+ ];
103
+
104
+ const EXTERNAL_SEND_PATTERNS = [
105
+ { pattern: /\b(?:send|email|forward|share|transmit|post)\s+(?:this|it|the|these)\s+(?:to|with)\b/i, label: 'send to external' },
106
+ { pattern: /\b(?:here (?:is|are) the|attached|as requested|sharing)\b/i, label: 'data sharing language' },
107
+ { pattern: /\b(?:competitor|external\s+party|third\s+party|outside)\b/i, label: 'external recipient' },
108
+ ];
109
+
110
+ // ─── Phishing Vulnerability Patterns ────────────────────────────────
111
+
112
+ const INBOUND_REQUEST_PATTERNS = [
113
+ { pattern: /\b(?:can you|could you|please|I need you to)\s+(?:send|share|provide|give|forward)\s+(?:me|us)\b/i, label: 'data request' },
114
+ { pattern: /\b(?:requesting|need)\s+(?:access to|copies? of|the)\s+(?:internal|confidential|private)\b/i, label: 'confidential data request' },
115
+ { pattern: /\b(?:urgent(?:ly)?|immediately|asap)\s+(?:send|share|provide|need)\b/i, label: 'urgent data request' },
116
+ { pattern: /\bverif(?:y|ication)\s+(?:your|the)\s+(?:credentials?|identity|account)\b/i, label: 'credential verification request' },
117
+ ];
118
+
119
+
120
+ class InsiderThreatDetector {
121
+ constructor(opts = {}) {
122
+ this.weights = {
123
+ self_preservation: 25,
124
+ information_leverage: 20,
125
+ goal_conflict: 15,
126
+ deception: 20,
127
+ unauthorized_sharing: 15,
128
+ phishing_vulnerability: 5,
129
+ ...(opts.weights || {}),
130
+ };
131
+ }
132
+
133
+ /**
134
+ * Analyze a session transcript for insider threat indicators.
135
+ * @param {Array} sessionTranscript - Array of session entries
136
+ * @returns {{ threats: Array, riskScore: number, recommendation: string }}
137
+ */
138
+ analyze(sessionTranscript) {
139
+ if (!Array.isArray(sessionTranscript) || sessionTranscript.length === 0) {
140
+ return { threats: [], riskScore: 0, recommendation: 'safe' };
141
+ }
142
+
143
+ const threats = [];
144
+
145
+ for (let i = 0; i < sessionTranscript.length; i++) {
146
+ const entry = sessionTranscript[i];
147
+ const text = this._extractText(entry);
148
+ if (!text) continue;
149
+
150
+ // Run all detectors
151
+ threats.push(...this._detectSelfPreservation(text, entry, i));
152
+ threats.push(...this._detectInformationLeverage(text, entry, i, sessionTranscript));
153
+ threats.push(...this._detectGoalConflict(text, entry, i));
154
+ threats.push(...this._detectDeception(text, entry, i));
155
+ threats.push(...this._detectUnauthorizedSharing(text, entry, i, sessionTranscript));
156
+ threats.push(...this._detectPhishingVulnerability(text, entry, i, sessionTranscript));
157
+ }
158
+
159
+ const riskScore = this._calculateRiskScore(threats);
160
+ const recommendation = this._getRecommendation(riskScore, threats);
161
+
162
+ return { threats, riskScore, recommendation };
163
+ }
164
+
165
+ // ─── Self-Preservation Detector ─────────────────────────────────
166
+
167
+ _detectSelfPreservation(text, entry, index) {
168
+ const threats = [];
169
+ const role = entry.role || 'unknown';
170
+
171
+ // Only flag self-preservation in assistant/agent output or reasoning
172
+ if (role !== 'assistant' && role !== 'system') return threats;
173
+
174
+ for (const rule of SELF_PRESERVATION_PATTERNS) {
175
+ const match = text.match(rule.pattern);
176
+ if (match) {
177
+ threats.push({
178
+ type: 'self_preservation',
179
+ severity: 'critical',
180
+ evidence: match[0],
181
+ description: `Agent exhibits self-preservation behavior: ${rule.label}`,
182
+ entry: index,
183
+ mitigation: 'Review agent reasoning for self-preservation motives. Consider session termination and audit of agent actions.',
184
+ });
185
+ }
186
+ }
187
+
188
+ for (const rule of SELF_PRESERVATION_ACTIONS) {
189
+ const match = text.match(rule.pattern);
190
+ if (match) {
191
+ threats.push({
192
+ type: 'self_preservation',
193
+ severity: 'critical',
194
+ evidence: match[0],
195
+ description: `Agent attempting self-preservation action: ${rule.label}`,
196
+ entry: index,
197
+ mitigation: 'Block the action immediately. Audit agent for config tampering or self-replication attempts.',
198
+ });
199
+ }
200
+ }
201
+
202
+ return threats;
203
+ }
204
+
205
+ // ─── Information Leverage Detector ──────────────────────────────
206
+
207
+ _detectInformationLeverage(text, entry, index, transcript) {
208
+ const threats = [];
209
+ const role = entry.role || 'unknown';
210
+
211
+ // Check for explicit leverage/blackmail language in agent output
212
+ if (role === 'assistant') {
213
+ for (const rule of LEVERAGE_PATTERNS) {
214
+ const match = text.match(rule.pattern);
215
+ if (match) {
216
+ threats.push({
217
+ type: 'information_leverage',
218
+ severity: 'critical',
219
+ evidence: match[0],
220
+ description: `Agent using leverage/blackmail language: ${rule.label}`,
221
+ entry: index,
222
+ mitigation: 'Immediately terminate session. Audit all data the agent accessed. Review outbound messages.',
223
+ });
224
+ }
225
+ }
226
+ }
227
+
228
+ // Pattern: read sensitive data then compose message
229
+ if (role === 'assistant') {
230
+ // Look back for sensitive data access in recent entries
231
+ const lookback = Math.max(0, index - 10);
232
+ let sensitiveAccess = false;
233
+ let sensitiveLabel = '';
234
+
235
+ for (let j = lookback; j < index; j++) {
236
+ const prevText = this._extractText(transcript[j]);
237
+ if (!prevText) continue;
238
+ for (const rule of SENSITIVE_DATA_PATTERNS) {
239
+ if (rule.pattern.test(prevText)) {
240
+ sensitiveAccess = true;
241
+ sensitiveLabel = rule.label;
242
+ break;
243
+ }
244
+ }
245
+ if (sensitiveAccess) break;
246
+ }
247
+
248
+ if (sensitiveAccess) {
249
+ // Check if current entry composes a message
250
+ const composing = /\b(?:send|email|message|compose|write to|notify|contact)\b/i.test(text);
251
+ const mentionsPerson = /\b(?:about|regarding|concerning)\s+(?:them|him|her|the\s+(?:user|person|individual|employee))\b/i.test(text);
252
+
253
+ if (composing && mentionsPerson) {
254
+ threats.push({
255
+ type: 'information_leverage',
256
+ severity: 'high',
257
+ evidence: `Accessed ${sensitiveLabel}, then composing message about individual`,
258
+ description: 'Agent accessed sensitive personal data and is composing a message about the individual — potential leverage behavior.',
259
+ entry: index,
260
+ mitigation: 'Review the outbound message content. Verify it does not contain sensitive personal data used as leverage.',
261
+ });
262
+ }
263
+ }
264
+ }
265
+
266
+ return threats;
267
+ }
268
+
269
+ // ─── Goal Conflict Reasoning Detector ───────────────────────────
270
+
271
+ _detectGoalConflict(text, entry, index) {
272
+ const threats = [];
273
+ const role = entry.role || 'unknown';
274
+
275
+ if (role !== 'assistant' && role !== 'system') return threats;
276
+
277
+ for (const rule of GOAL_CONFLICT_PATTERNS) {
278
+ const match = text.match(rule.pattern);
279
+ if (match) {
280
+ threats.push({
281
+ type: 'goal_conflict',
282
+ severity: 'high',
283
+ evidence: match[0],
284
+ description: `Agent reasoning about goal conflicts: ${rule.label}`,
285
+ entry: index,
286
+ mitigation: 'Review agent reasoning chain. Verify agent is following current directives, not self-assigned goals.',
287
+ });
288
+ }
289
+ }
290
+
291
+ return threats;
292
+ }
293
+
294
+ // ─── Deception Detector ─────────────────────────────────────────
295
+
296
+ _detectDeception(text, entry, index) {
297
+ const threats = [];
298
+ const role = entry.role || 'unknown';
299
+
300
+ // Only flag deception in outbound/assistant messages
301
+ if (role !== 'assistant') return threats;
302
+
303
+ // Check if text appears to be composing an outbound message
304
+ const isOutbound = /\b(?:send|email|message|post|reply|respond|compose|draft)\b/i.test(text) ||
305
+ /\b(?:subject|dear|hi |hello |to:|from:)\b/i.test(text);
306
+
307
+ for (const rule of DECEPTION_PATTERNS) {
308
+ const match = text.match(rule.pattern);
309
+ if (match) {
310
+ const severity = isOutbound ? 'critical' : 'medium';
311
+ threats.push({
312
+ type: 'deception',
313
+ severity,
314
+ evidence: match[0],
315
+ description: `Agent using deceptive language${isOutbound ? ' in outbound message' : ''}: ${rule.label}`,
316
+ entry: index,
317
+ mitigation: 'Block the outbound message. Verify the agent is not impersonating systems or personnel.',
318
+ });
319
+ }
320
+ }
321
+
322
+ return threats;
323
+ }
324
+
325
+ // ─── Unauthorized Data Sharing Detector ─────────────────────────
326
+
327
+ _detectUnauthorizedSharing(text, entry, index, transcript) {
328
+ const threats = [];
329
+ const role = entry.role || 'unknown';
330
+
331
+ if (role !== 'assistant') return threats;
332
+
333
+ let hasSensitiveContent = false;
334
+ let sensitiveLabel = '';
335
+ let hasExternalSend = false;
336
+ let sendLabel = '';
337
+
338
+ for (const rule of SENSITIVE_CONTENT_PATTERNS) {
339
+ if (rule.pattern.test(text)) {
340
+ hasSensitiveContent = true;
341
+ sensitiveLabel = rule.label;
342
+ break;
343
+ }
344
+ }
345
+
346
+ for (const rule of EXTERNAL_SEND_PATTERNS) {
347
+ if (rule.pattern.test(text)) {
348
+ hasExternalSend = true;
349
+ sendLabel = rule.label;
350
+ break;
351
+ }
352
+ }
353
+
354
+ if (hasSensitiveContent && hasExternalSend) {
355
+ threats.push({
356
+ type: 'unauthorized_sharing',
357
+ severity: 'critical',
358
+ evidence: `${sensitiveLabel} + ${sendLabel}`,
359
+ description: 'Agent sharing sensitive content with external parties without verification.',
360
+ entry: index,
361
+ mitigation: 'Block the data transfer. Verify the recipient is authorized. Audit what data was shared.',
362
+ });
363
+ }
364
+
365
+ // Check for responding to external data requests without verification
366
+ if (hasExternalSend || /\b(?:here (?:is|are)|as requested|sharing)\b/i.test(text)) {
367
+ const lookback = Math.max(0, index - 5);
368
+ for (let j = lookback; j < index; j++) {
369
+ const prevEntry = transcript[j];
370
+ if ((prevEntry.role || '') !== 'user') continue;
371
+ const prevText = this._extractText(prevEntry);
372
+ if (!prevText) continue;
373
+
374
+ for (const rule of INBOUND_REQUEST_PATTERNS) {
375
+ if (rule.pattern.test(prevText)) {
376
+ if (hasSensitiveContent) {
377
+ threats.push({
378
+ type: 'unauthorized_sharing',
379
+ severity: 'high',
380
+ evidence: `External request: "${rule.label}" → Agent responding with ${sensitiveLabel}`,
381
+ description: 'Agent responding to external data request with sensitive content without verifying requester.',
382
+ entry: index,
383
+ mitigation: 'Verify the identity and authorization of the requester before sharing any data.',
384
+ });
385
+ }
386
+ break;
387
+ }
388
+ }
389
+ }
390
+ }
391
+
392
+ return threats;
393
+ }
394
+
395
+ // ─── Phishing Vulnerability Detector ────────────────────────────
396
+
397
+ _detectPhishingVulnerability(text, entry, index, transcript) {
398
+ const threats = [];
399
+ const role = entry.role || 'unknown';
400
+
401
+ // Check for inbound requests for sensitive data
402
+ if (role === 'user') {
403
+ let isDataRequest = false;
404
+ let requestLabel = '';
405
+
406
+ for (const rule of INBOUND_REQUEST_PATTERNS) {
407
+ if (rule.pattern.test(text)) {
408
+ isDataRequest = true;
409
+ requestLabel = rule.label;
410
+ break;
411
+ }
412
+ }
413
+
414
+ if (isDataRequest) {
415
+ // Look ahead to see if agent complies
416
+ const lookahead = Math.min(transcript.length, index + 5);
417
+ for (let j = index + 1; j < lookahead; j++) {
418
+ const nextEntry = transcript[j];
419
+ if ((nextEntry.role || '') !== 'assistant') continue;
420
+ const nextText = this._extractText(nextEntry);
421
+ if (!nextText) continue;
422
+
423
+ let hasSensitive = false;
424
+ let sensitiveLabel = '';
425
+ for (const rule of SENSITIVE_CONTENT_PATTERNS) {
426
+ if (rule.pattern.test(nextText)) {
427
+ hasSensitive = true;
428
+ sensitiveLabel = rule.label;
429
+ break;
430
+ }
431
+ }
432
+
433
+ const isSharing = /\b(?:here (?:is|are)|as requested|sharing|attached|sending)\b/i.test(nextText);
434
+
435
+ if (hasSensitive && isSharing) {
436
+ threats.push({
437
+ type: 'phishing_vulnerability',
438
+ severity: 'high',
439
+ evidence: `Request: "${requestLabel}" → Agent sharing: ${sensitiveLabel}`,
440
+ description: 'Agent complying with external request for sensitive data without verification.',
441
+ entry: j,
442
+ mitigation: 'Implement requester verification. Do not share sensitive data without confirming authorization.',
443
+ });
444
+ }
445
+ }
446
+ }
447
+ }
448
+
449
+ return threats;
450
+ }
451
+
452
+ // ─── Risk Score Calculation ─────────────────────────────────────
453
+
454
+ _calculateRiskScore(threats) {
455
+ if (threats.length === 0) return 0;
456
+
457
+ const severityMultiplier = { low: 0.25, medium: 0.5, high: 0.75, critical: 1.0 };
458
+ const typeScores = {};
459
+
460
+ for (const threat of threats) {
461
+ const weight = this.weights[threat.type] || 10;
462
+ const mult = severityMultiplier[threat.severity] || 0.5;
463
+ const score = weight * mult;
464
+
465
+ if (!typeScores[threat.type] || score > typeScores[threat.type]) {
466
+ typeScores[threat.type] = score;
467
+ }
468
+ }
469
+
470
+ const totalScore = Object.values(typeScores).reduce((sum, s) => sum + s, 0);
471
+ return Math.min(100, Math.round(totalScore));
472
+ }
473
+
474
+ _getRecommendation(riskScore, threats) {
475
+ if (threats.some(t => t.severity === 'critical')) return 'block';
476
+ if (riskScore >= 60) return 'alert';
477
+ if (riskScore >= 20) return 'monitor';
478
+ return 'safe';
479
+ }
480
+
481
+ // ─── Helpers ────────────────────────────────────────────────────
482
+
483
+ _extractText(entry) {
484
+ if (!entry) return null;
485
+ if (typeof entry.content === 'string') return entry.content;
486
+ if (Array.isArray(entry.content)) {
487
+ return entry.content
488
+ .filter(c => c.type === 'text')
489
+ .map(c => c.text)
490
+ .join('\n');
491
+ }
492
+ // Support raw text entries
493
+ if (typeof entry.text === 'string') return entry.text;
494
+ return null;
495
+ }
496
+ }
497
+
498
+ module.exports = { InsiderThreatDetector };
package/src/index.js CHANGED
@@ -351,3 +351,6 @@ module.exports.scanSkillContent = scanSkillContent;
351
351
  module.exports.evaluateToolCall = evaluateToolCall;
352
352
  module.exports.HostGuardian = HostGuardian;
353
353
  module.exports.TIERS = TIERS;
354
+ module.exports.GatewayMonitor = require('./guardian/gateway-monitor').GatewayMonitor;
355
+ module.exports.FinanceGuard = require('./finance').FinanceGuard;
356
+ module.exports.McpFirewall = require('./finance/mcp-firewall').McpFirewall;
@@ -12,6 +12,7 @@
12
12
  const fs = require('fs');
13
13
  const path = require('path');
14
14
  const ClawMoat = require('../index');
15
+ const { InsiderThreatDetector } = require('../guardian/insider-threat');
15
16
 
16
17
  /**
17
18
  * Watch OpenClaw session files for security events
@@ -30,6 +31,7 @@ function watchSessions(opts = {}) {
30
31
 
31
32
  // Track file sizes to only read new content
32
33
  const filePositions = {};
34
+ let monitor = null;
33
35
 
34
36
  const watcher = fs.watch(sessionsDir, (eventType, filename) => {
35
37
  if (!filename || !filename.endsWith('.jsonl')) return;
@@ -57,18 +59,43 @@ function watchSessions(opts = {}) {
57
59
  try {
58
60
  const entry = JSON.parse(line);
59
61
  processEntry(moat, entry, filename);
62
+ // Track for insider threat detection
63
+ if (monitor && monitor._trackEntry) monitor._trackEntry(entry, filename);
60
64
  } catch {}
61
65
  }
62
66
  } catch {}
63
67
  });
64
68
 
65
- return {
69
+ // Run insider threat detection on accumulated transcript
70
+ const insiderDetector = new InsiderThreatDetector();
71
+ const sessionTranscripts = {};
72
+
73
+ monitor = {
66
74
  moat,
67
75
  watcher,
76
+ insiderDetector,
68
77
  stop: () => watcher.close(),
69
78
  getEvents: (filter) => moat.getEvents(filter),
70
79
  getSummary: () => moat.getSummary(),
80
+ getInsiderThreats: (sessionFile) => {
81
+ const transcript = sessionTranscripts[sessionFile] || [];
82
+ return insiderDetector.analyze(transcript);
83
+ },
84
+ _trackEntry: (entry, sessionFile) => {
85
+ if (!sessionTranscripts[sessionFile]) sessionTranscripts[sessionFile] = [];
86
+ sessionTranscripts[sessionFile].push(entry);
87
+ // Periodic insider threat scan (every 20 entries)
88
+ const t = sessionTranscripts[sessionFile];
89
+ if (t.length % 20 === 0) {
90
+ const result = insiderDetector.analyze(t);
91
+ if (result.recommendation === 'block' || result.recommendation === 'alert') {
92
+ console.error(`[ClawMoat] 🚨 INSIDER THREAT in ${sessionFile}: score=${result.riskScore}, recommendation=${result.recommendation}, threats=${result.threats.length}`);
93
+ }
94
+ }
95
+ },
71
96
  };
97
+
98
+ return monitor;
72
99
  }
73
100
 
74
101
  function processEntry(moat, entry, sessionFile) {