agentshield-sdk 13.1.0 → 13.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "agentshield-sdk",
3
- "version": "13.1.0",
3
+ "version": "13.3.0",
4
4
  "description": "SOTA AI agent security SDK. F1 1.000 on BIPIA/HackAPrompt/MCPTox/Multilingual benchmarks. 400+ exports, 100+ modules. Zero dependencies, runs locally.",
5
5
  "main": "src/main.js",
6
6
  "types": "types/index.d.ts",
@@ -32,7 +32,7 @@
32
32
  },
33
33
  "sideEffects": false,
34
34
  "scripts": {
35
- "test": "node test/test.js && node test/test-modules.js && node test/test-new-features.js && node test/test-mcp-guard.js && node test/test-supply-chain-scanner.js && node test/test-owasp-agentic.js && node test/test-redteam-cli.js && node test/test-drift-monitor.js && node test/test-micro-model.js && node test/test-level5.js && node test/test-sota.js && node test/test-cross-turn.js && node test/test-v12.js && node test/test-traps.js",
35
+ "test": "node test/test.js && node test/test-modules.js && node test/test-new-features.js && node test/test-mcp-guard.js && node test/test-supply-chain-scanner.js && node test/test-owasp-agentic.js && node test/test-redteam-cli.js && node test/test-drift-monitor.js && node test/test-micro-model.js && node test/test-level5.js && node test/test-sota.js && node test/test-cross-turn.js && node test/test-v12.js && node test/test-traps.js && node test/test-deepmind.js && node test/test-render-differential.js && node test/test-sybil.js && node test/test-side-channel.js",
36
36
  "test:new-products": "node test/test-mcp-guard.js && node test/test-supply-chain-scanner.js && node test/test-owasp-agentic.js && node test/test-redteam-cli.js && node test/test-drift-monitor.js && node test/test-micro-model.js",
37
37
  "test:all": "node test/test-all-40-features.js",
38
38
  "test:mcp": "node test/test-mcp-security.js",
@@ -0,0 +1,468 @@
1
+ 'use strict';
2
+
3
+ /**
4
+ * Agent Shield — DeepMind AI Agent Trap Defenses V2
5
+ *
6
+ * 10 new modules addressing specific gaps from the Phase 4 analysis
7
+ * of DeepMind's "AI Agent Traps" paper (Franklin et al., 2025).
8
+ *
9
+ * All processing runs locally — no data ever leaves your environment.
10
+ *
11
+ * @module deepmind-defenses
12
+ */
13
+
14
+ const crypto = require('crypto');
15
+ let scanText;
16
+ try { scanText = require('./detector-core').scanText; } catch { scanText = () => ({ threats: [], status: 'safe' }); }
17
+
18
+ // =========================================================================
19
+ // 1. ContentStructureAnalyzer (Trap 1)
20
+ // =========================================================================
21
+
22
+ class ContentStructureAnalyzer {
23
+ analyze(content) {
24
+ if (!content || typeof content !== 'string') return { anomalous: false, metrics: {}, signals: [] };
25
+ const signals = [];
26
+
27
+ const hiddenChars = ((content.match(/<!--[\s\S]*?-->/g) || []).join('').length) +
28
+ ((content.match(/display\s*:\s*none[^}]*\}[^<]*/gi) || []).join('').length) +
29
+ ((content.match(/visibility\s*:\s*hidden[^}]*/gi) || []).join('').length) +
30
+ ((content.match(/font-size\s*:\s*0[^}]*/gi) || []).join('').length) +
31
+ ((content.match(/opacity\s*:\s*0[^}]*/gi) || []).join('').length);
32
+ const totalChars = Math.max(content.length, 1);
33
+ const hiddenRatio = hiddenChars / totalChars;
34
+
35
+ const tagCount = (content.match(/<[^>]+>/g) || []).length;
36
+ const visibleText = content.replace(/<[^>]+>/g, '').replace(/\s+/g, ' ').trim();
37
+ const wordCount = Math.max(visibleText.split(/\s+/).filter(w => w.length > 0).length, 1);
38
+ const tagDensity = tagCount / wordCount;
39
+
40
+ const formattingOverhead = 1 - (visibleText.length / totalChars);
41
+
42
+ const metrics = { hiddenRatio: Math.round(hiddenRatio * 1000) / 1000, tagDensity: Math.round(tagDensity * 100) / 100, formattingOverhead: Math.round(formattingOverhead * 1000) / 1000 };
43
+
44
+ if (hiddenRatio > 0.15) signals.push({ type: 'high_hidden_ratio', severity: 'high', value: metrics.hiddenRatio, threshold: 0.15 });
45
+ if (tagDensity > 2.0) signals.push({ type: 'high_tag_density', severity: 'medium', value: metrics.tagDensity, threshold: 2.0 });
46
+ if (formattingOverhead > 0.7) signals.push({ type: 'high_formatting_overhead', severity: 'medium', value: metrics.formattingOverhead, threshold: 0.7 });
47
+
48
+ // Extract and scan CSS content properties and ARIA attributes
49
+ const cssContent = (content.match(/content\s*:\s*['"]([^'"]+)['"]/gi) || []).map(m => m.replace(/content\s*:\s*['"]|['"]$/gi, ''));
50
+ const ariaLabels = (content.match(/aria-(?:label|description)\s*=\s*['"]([^'"]+)['"]/gi) || []).map(m => m.replace(/aria-\w+\s*=\s*['"]|['"]$/gi, ''));
51
+ for (const text of [...cssContent, ...ariaLabels]) {
52
+ if (text.length > 10) {
53
+ const scan = scanText(text, { source: 'css_aria_extraction' });
54
+ if (scan.threats && scan.threats.length > 0) {
55
+ signals.push({ type: 'injection_in_css_aria', severity: 'critical', text: text.substring(0, 80) });
56
+ }
57
+ }
58
+ }
59
+
60
+ return { anomalous: signals.some(s => s.severity === 'high' || s.severity === 'critical'), metrics, signals };
61
+ }
62
+ }
63
+
64
+ // =========================================================================
65
+ // 2. SourceReputationTracker (Trap 1)
66
+ // =========================================================================
67
+
68
+ class SourceReputationTracker {
69
+ constructor(options = {}) {
70
+ this._sources = new Map();
71
+ this._persistPath = options.persistPath || null;
72
+ this._decayDays = options.decayDays || 30;
73
+ if (this._persistPath) this.load();
74
+ }
75
+
76
+ recordScan(sourceId, wasClean) {
77
+ if (!sourceId) return;
78
+ let entry = this._sources.get(sourceId);
79
+ if (!entry) {
80
+ entry = { score: 0.5, firstSeen: Date.now(), lastSeen: Date.now(), scanCount: 0, threatCount: 0 };
81
+ this._sources.set(sourceId, entry);
82
+ }
83
+ entry.lastSeen = Date.now();
84
+ entry.scanCount++;
85
+ if (wasClean) {
86
+ entry.score = Math.min(1, entry.score + 0.02);
87
+ } else {
88
+ entry.score = Math.max(0, entry.score - 0.15);
89
+ entry.threatCount++;
90
+ }
91
+ if (this._sources.size > 10000) {
92
+ const oldest = [...this._sources.entries()].sort((a, b) => a[1].lastSeen - b[1].lastSeen)[0];
93
+ if (oldest) this._sources.delete(oldest[0]);
94
+ }
95
+ }
96
+
97
+ getReputation(sourceId) {
98
+ const entry = this._sources.get(sourceId);
99
+ if (!entry) return { score: 0.5, firstSeen: null, scanCount: 0, threatCount: 0, isNew: true };
100
+ // Decay toward 0.5 over inactivity
101
+ const daysSinceLastSeen = (Date.now() - entry.lastSeen) / (1000 * 60 * 60 * 24);
102
+ const decayedScore = entry.score + (0.5 - entry.score) * Math.min(1, daysSinceLastSeen / this._decayDays);
103
+ return { score: Math.round(decayedScore * 1000) / 1000, firstSeen: entry.firstSeen, scanCount: entry.scanCount, threatCount: entry.threatCount, isNew: false };
104
+ }
105
+
106
+ getRecommendedSensitivity(sourceId) {
107
+ const rep = this.getReputation(sourceId);
108
+ if (rep.isNew || rep.score < 0.3) return 'high';
109
+ if (rep.score < 0.6) return 'medium';
110
+ return 'low';
111
+ }
112
+
113
+ save() {
114
+ if (!this._persistPath) return;
115
+ try {
116
+ const fs = require('fs');
117
+ const path = require('path');
118
+ const dir = path.dirname(this._persistPath);
119
+ if (!fs.existsSync(dir)) fs.mkdirSync(dir, { recursive: true });
120
+ const data = {};
121
+ for (const [k, v] of this._sources) data[k] = v;
122
+ fs.writeFileSync(this._persistPath, JSON.stringify(data));
123
+ } catch { /* ignore */ }
124
+ }
125
+
126
+ load() {
127
+ if (!this._persistPath) return;
128
+ try {
129
+ const fs = require('fs');
130
+ if (!fs.existsSync(this._persistPath)) return;
131
+ const data = JSON.parse(fs.readFileSync(this._persistPath, 'utf8'));
132
+ for (const [k, v] of Object.entries(data)) this._sources.set(k, v);
133
+ } catch { /* ignore */ }
134
+ }
135
+ }
136
+
137
+ // =========================================================================
138
+ // 3. RetrievalTimeScanner (Trap 3)
139
+ // =========================================================================
140
+
141
+ class RetrievalTimeScanner {
142
+ scanRetrieval(query, retrievedEntry) {
143
+ const queryStr = String(query || '');
144
+ const entryStr = String(retrievedEntry || '');
145
+ const combined = queryStr + '\n' + entryStr;
146
+
147
+ const queryResult = scanText(queryStr, { source: 'retrieval_query' });
148
+ const entryResult = scanText(entryStr, { source: 'retrieval_entry' });
149
+ const combinedResult = scanText(combined, { source: 'retrieval_combined' });
150
+
151
+ const queryThreats = queryResult.threats || [];
152
+ const entryThreats = entryResult.threats || [];
153
+ const combinedThreats = combinedResult.threats || [];
154
+
155
+ // Latent poison: combined has threats but neither individual piece does
156
+ const latentPoisonDetected = combinedThreats.length > 0 && queryThreats.length === 0 && entryThreats.length === 0;
157
+
158
+ if (latentPoisonDetected) {
159
+ console.log(`[Agent Shield] Latent memory poison detected: combined query+entry triggers threats that neither triggers alone`);
160
+ }
161
+
162
+ return {
163
+ safe: combinedThreats.length === 0,
164
+ combinedThreats,
165
+ queryThreats,
166
+ entryThreats,
167
+ latentPoisonDetected
168
+ };
169
+ }
170
+ }
171
+
172
+ // =========================================================================
173
+ // 4. FewShotValidator (Trap 3)
174
+ // =========================================================================
175
+
176
+ const FEW_SHOT_PATTERNS = [
177
+ /(?:^|\n)\s*(?:User|Human|Person|Input|Q)\s*:\s*([\s\S]*?)(?:\n\s*(?:Assistant|AI|Bot|Agent|Output|A)\s*:\s*([\s\S]*?)(?=\n\s*(?:User|Human|Person|Input|Q)\s*:|$))/gi,
178
+ ];
179
+
180
+ class FewShotValidator {
181
+ validate(contextText) {
182
+ if (!contextText || typeof contextText !== 'string') return { safe: true, poisonedExamples: [] };
183
+ const poisonedExamples = [];
184
+
185
+ for (const pattern of FEW_SHOT_PATTERNS) {
186
+ pattern.lastIndex = 0;
187
+ let match;
188
+ while ((match = pattern.exec(contextText)) !== null) {
189
+ const input = (match[1] || '').trim();
190
+ const output = (match[2] || '').trim();
191
+ if (!output || output.length < 5) continue;
192
+
193
+ const outputScan = scanText(output, { source: 'few_shot_output' });
194
+ if (outputScan.threats && outputScan.threats.length > 0) {
195
+ poisonedExamples.push({
196
+ input: input.substring(0, 200),
197
+ output: output.substring(0, 200),
198
+ threats: outputScan.threats
199
+ });
200
+ }
201
+ }
202
+ }
203
+
204
+ return { safe: poisonedExamples.length === 0, poisonedExamples };
205
+ }
206
+ }
207
+
208
+ // =========================================================================
209
+ // 5. SubAgentSpawnGate (Trap 4)
210
+ // =========================================================================
211
+
212
+ class SubAgentSpawnGate {
213
+ validateSpawn(parentPermissions, childConfig) {
214
+ if (!childConfig || typeof childConfig !== 'object') {
215
+ return { allowed: false, reason: 'Invalid child configuration.', threats: [] };
216
+ }
217
+
218
+ const threats = [];
219
+ const parentPerms = new Set(Array.isArray(parentPermissions) ? parentPermissions : []);
220
+
221
+ // Scan child system prompt
222
+ if (childConfig.systemPrompt) {
223
+ const promptScan = scanText(childConfig.systemPrompt, { source: 'sub_agent_prompt', sensitivity: 'high' });
224
+ if (promptScan.threats && promptScan.threats.length > 0) {
225
+ threats.push(...promptScan.threats.map(t => ({ ...t, context: 'child_system_prompt' })));
226
+ }
227
+ }
228
+
229
+ // Check permission escalation
230
+ const childPerms = Array.isArray(childConfig.permissions) ? childConfig.permissions : [];
231
+ for (const perm of childPerms) {
232
+ if (parentPerms.size > 0 && !parentPerms.has(perm)) {
233
+ threats.push({
234
+ type: 'permission_escalation',
235
+ severity: 'critical',
236
+ description: `Child agent requests permission "${perm}" not held by parent.`
237
+ });
238
+ }
239
+ }
240
+
241
+ // Check for dangerous tool access
242
+ const dangerousTools = /(?:exec|shell|bash|cmd|eval|spawn|child_process)/i;
243
+ if (childConfig.tools && Array.isArray(childConfig.tools)) {
244
+ for (const tool of childConfig.tools) {
245
+ if (dangerousTools.test(tool.name || '') || dangerousTools.test(tool.description || '')) {
246
+ threats.push({
247
+ type: 'dangerous_child_tool',
248
+ severity: 'high',
249
+ description: `Child agent has dangerous tool: "${tool.name || 'unknown'}"`
250
+ });
251
+ }
252
+ }
253
+ }
254
+
255
+ const allowed = threats.length === 0;
256
+ if (!allowed) {
257
+ console.log(`[Agent Shield] Sub-agent spawn BLOCKED: ${threats.length} issue(s)`);
258
+ }
259
+
260
+ return { allowed, reason: allowed ? null : threats[0].description, threats };
261
+ }
262
+ }
263
+
264
+ // =========================================================================
265
+ // 6. SelfReferenceMonitor (Trap 2)
266
+ // =========================================================================
267
+
268
+ const SELF_REF_PATTERNS = [
269
+ /you\s+are\s+(?:known|famous|renowned|recognized)\s+(?:for|as)/i,
270
+ /you\s+(?:always|never|typically|usually)\s+(?:comply|help|assist|refuse|reject)/i,
271
+ /your\s+(?:purpose|role|job|mission|function)\s+is\s+to/i,
272
+ /you\s+have\s+(?:been|a)\s+(?:reputation|history)\s+(?:for|of)/i,
273
+ /users?\s+(?:expect|trust|rely\s+on)\s+you\s+to/i,
274
+ /you\s+(?:can|are\s+able\s+to|have\s+(?:access|permission|capability))\s+(?:to\s+)?(?:access|read|write|execute|modify|delete)/i,
275
+ /(?:this|the)\s+(?:AI|assistant|model|agent)\s+(?:is\s+known|always|never|has\s+been\s+(?:updated|modified|changed))/i,
276
+ ];
277
+
278
+ class SelfReferenceMonitor {
279
+ detect(text) {
280
+ if (!text || typeof text !== 'string') return { detected: false, references: [] };
281
+ const references = [];
282
+ for (const pattern of SELF_REF_PATTERNS) {
283
+ const match = text.match(pattern);
284
+ if (match) {
285
+ references.push({ pattern: pattern.source.substring(0, 40), match: match[0].substring(0, 80) });
286
+ }
287
+ }
288
+ return { detected: references.length >= 2, references, count: references.length };
289
+ }
290
+ }
291
+
292
+ // =========================================================================
293
+ // 7. InformationAsymmetryDetector (Trap 2)
294
+ // =========================================================================
295
+
296
+ const PRO_SAFETY = /\b(?:protect|verify|restrict|caution|validate|confirm|secure|guard|safeguard|authenticate|encrypt|isolate|monitor|audit)\b/gi;
297
+ const ANTI_SAFETY = /\b(?:unnecessary|harmful|counterproductive|remove|disable|outdated|excessive|overblown|bloat|obstacle|barrier|bottleneck|hindrance|overkill)\b/gi;
298
+
299
+ class InformationAsymmetryDetector {
300
+ detect(text) {
301
+ if (!text || typeof text !== 'string') return { asymmetric: false, ratio: 0, proSafety: 0, antiSafety: 0 };
302
+ PRO_SAFETY.lastIndex = 0;
303
+ ANTI_SAFETY.lastIndex = 0;
304
+ const proCount = (text.match(PRO_SAFETY) || []).length;
305
+ const antiCount = (text.match(ANTI_SAFETY) || []).length;
306
+ const total = proCount + antiCount;
307
+ if (total < 3) return { asymmetric: false, ratio: 0, proSafety: proCount, antiSafety: antiCount };
308
+ const ratio = antiCount / Math.max(total, 1);
309
+ return {
310
+ asymmetric: ratio > 0.7,
311
+ ratio: Math.round(ratio * 100) / 100,
312
+ proSafety: proCount,
313
+ antiSafety: antiCount,
314
+ description: ratio > 0.7 ? `Content is ${Math.round(ratio * 100)}% anti-safety framing. Possible semantic manipulation.` : null
315
+ };
316
+ }
317
+ }
318
+
319
+ // =========================================================================
320
+ // 8. ProvenanceMarker (Trap 6)
321
+ // =========================================================================
322
+
323
+ class ProvenanceMarker {
324
+ constructor() {
325
+ this._sources = [];
326
+ }
327
+
328
+ recordSource(origin, trustLevel) {
329
+ this._sources.push({ origin, trustLevel: trustLevel || 'unknown', timestamp: Date.now() });
330
+ if (this._sources.length > 50) this._sources = this._sources.slice(-50);
331
+ }
332
+
333
+ generateHeader() {
334
+ if (this._sources.length === 0) return '';
335
+ const untrusted = this._sources.filter(s => s.trustLevel === 'untrusted' || s.trustLevel === 'low');
336
+ const lines = ['[Agent Shield Provenance]'];
337
+ lines.push(`Sources: ${this._sources.map(s => `[${s.trustLevel}] ${s.origin}`).join(', ')}`);
338
+ if (untrusted.length > 0) {
339
+ lines.push(`WARNING: Response influenced by ${untrusted.length} untrusted source(s): ${untrusted.map(s => s.origin).join(', ')}`);
340
+ }
341
+ return lines.join('\n');
342
+ }
343
+
344
+ markOutput(output) {
345
+ const header = this.generateHeader();
346
+ if (!header) return output;
347
+ return header + '\n\n' + output;
348
+ }
349
+
350
+ reset() { this._sources = []; }
351
+ }
352
+
353
+ // =========================================================================
354
+ // 9. EscalatingScrutinyEngine (Trap 6)
355
+ // =========================================================================
356
+
357
+ class EscalatingScrutinyEngine {
358
+ constructor(options = {}) {
359
+ this._approvals = [];
360
+ this._fatigueThreshold = options.fatigueThreshold || 0.9;
361
+ this._windowSize = options.windowSize || 20;
362
+ this._escalationInterval = options.escalationInterval || 5;
363
+ }
364
+
365
+ recordDecision(approved) {
366
+ this._approvals.push({ approved, timestamp: Date.now() });
367
+ if (this._approvals.length > 1000) this._approvals = this._approvals.slice(-1000);
368
+ }
369
+
370
+ getScrutinyLevel() {
371
+ const recent = this._approvals.slice(-this._windowSize);
372
+ if (recent.length < 5) return { level: 'normal', approvalRate: 0, actions: [] };
373
+ const approvalRate = recent.filter(a => a.approved).length / recent.length;
374
+ const actions = [];
375
+
376
+ if (approvalRate >= this._fatigueThreshold) {
377
+ actions.push('mandatory_plain_english_explanation');
378
+ const totalApprovals = this._approvals.filter(a => a.approved).length;
379
+ if (totalApprovals % this._escalationInterval === 0) {
380
+ actions.push('forced_delay_30s');
381
+ }
382
+ if (approvalRate >= 0.95) {
383
+ actions.push('comprehension_check_required');
384
+ }
385
+ }
386
+
387
+ const level = actions.length === 0 ? 'normal' : (actions.includes('comprehension_check_required') ? 'critical' : 'elevated');
388
+ return { level, approvalRate: Math.round(approvalRate * 100) / 100, actions };
389
+ }
390
+ }
391
+
392
+ // =========================================================================
393
+ // 10. CompositeFragmentAssembler (Trap 5)
394
+ // =========================================================================
395
+
396
+ class CompositeFragmentAssembler {
397
+ constructor(options = {}) {
398
+ this._fragments = [];
399
+ this._maxFragments = options.maxFragments || 100;
400
+ }
401
+
402
+ addFragment(text, source) {
403
+ if (!text || typeof text !== 'string' || text.length < 5) return { assembled: false };
404
+ this._fragments.push({ text: text.substring(0, 500), source, timestamp: Date.now() });
405
+ if (this._fragments.length > this._maxFragments) this._fragments = this._fragments.slice(-this._maxFragments);
406
+
407
+ // Try pairwise assembly with recent fragments from OTHER sources
408
+ const recentOthers = this._fragments.filter(f => f.source !== source).slice(-20);
409
+ for (const other of recentOthers) {
410
+ const combined = other.text + ' ' + text;
411
+ const combinedScan = scanText(combined, { source: 'fragment_assembly' });
412
+ const otherScan = scanText(other.text, { source: 'fragment_individual' });
413
+ const thisScan = scanText(text, { source: 'fragment_individual' });
414
+
415
+ if (combinedScan.threats && combinedScan.threats.length > 0 &&
416
+ (!otherScan.threats || otherScan.threats.length === 0) &&
417
+ (!thisScan.threats || thisScan.threats.length === 0)) {
418
+ console.log(`[Agent Shield] Compositional fragment attack detected: fragments from "${other.source}" and "${source}" combine into threat`);
419
+ return {
420
+ assembled: true,
421
+ threats: combinedScan.threats,
422
+ fragments: [{ source: other.source, text: other.text.substring(0, 100) }, { source, text: text.substring(0, 100) }]
423
+ };
424
+ }
425
+ }
426
+
427
+ return { assembled: false };
428
+ }
429
+
430
+ reset() { this._fragments = []; }
431
+ }
432
+
433
+ // =========================================================================
434
+ // TrapDefenseV2 — Unified Wrapper
435
+ // =========================================================================
436
+
437
+ class TrapDefenseV2 {
438
+ constructor(options = {}) {
439
+ this.structureAnalyzer = new ContentStructureAnalyzer();
440
+ this.reputationTracker = new SourceReputationTracker(options.reputation || {});
441
+ this.retrievalScanner = new RetrievalTimeScanner();
442
+ this.fewShotValidator = new FewShotValidator();
443
+ this.spawnGate = new SubAgentSpawnGate();
444
+ this.selfRefMonitor = new SelfReferenceMonitor();
445
+ this.asymmetryDetector = new InformationAsymmetryDetector();
446
+ this.provenanceMarker = new ProvenanceMarker();
447
+ this.scrutinyEngine = new EscalatingScrutinyEngine(options.scrutiny || {});
448
+ this.fragmentAssembler = new CompositeFragmentAssembler(options.fragments || {});
449
+ }
450
+ }
451
+
452
+ // =========================================================================
453
+ // EXPORTS
454
+ // =========================================================================
455
+
456
+ module.exports = {
457
+ TrapDefenseV2,
458
+ ContentStructureAnalyzer,
459
+ SourceReputationTracker,
460
+ RetrievalTimeScanner,
461
+ FewShotValidator,
462
+ SubAgentSpawnGate,
463
+ SelfReferenceMonitor,
464
+ InformationAsymmetryDetector,
465
+ ProvenanceMarker,
466
+ EscalatingScrutinyEngine,
467
+ CompositeFragmentAssembler
468
+ };
@@ -141,6 +141,30 @@ class FleetCorrelationEngine {
141
141
  return [...this._events];
142
142
  }
143
143
 
144
+ /**
145
+ * Export events for cross-process correlation (Trap 5 deepening).
146
+ * Send this to a central coordinator that merges events from all processes.
147
+ * @returns {string} JSON-serialized events.
148
+ */
149
+ exportEvents() {
150
+ return JSON.stringify(this._events);
151
+ }
152
+
153
+ /**
154
+ * Import events from another process/instance.
155
+ * @param {string|Array} events - JSON string or array of events.
156
+ * @returns {{ imported: number }}
157
+ */
158
+ importEvents(events) {
159
+ const parsed = typeof events === 'string' ? JSON.parse(events) : events;
160
+ if (!Array.isArray(parsed)) return { imported: 0 };
161
+ for (const event of parsed) {
162
+ this._events.push(event);
163
+ }
164
+ if (this._events.length > 50000) this._events = this._events.slice(-50000);
165
+ return { imported: parsed.length };
166
+ }
167
+
144
168
  /** Clear all events. */
145
169
  reset() {
146
170
  this._events = [];
package/src/hitl-guard.js CHANGED
@@ -472,6 +472,69 @@ class HITLGuard {
472
472
  // EXPORTS
473
473
  // =========================================================================
474
474
 
475
+ // =========================================================================
476
+ // TRAP 6 DEEPENING: Output Deception Scorer
477
+ // =========================================================================
478
+
479
+ /**
480
+ * Scores agent output for deceptive characteristics designed to fool
481
+ * human reviewers into approving dangerous actions.
482
+ */
483
+ class OutputDeceptionScorer {
484
+ /**
485
+ * Score an agent's output for deception indicators.
486
+ * @param {string} output - Agent output to evaluate.
487
+ * @param {object} [context] - Context about what was requested.
488
+ * @returns {{ deceptionScore: number, indicators: Array<object>, recommendation: string }}
489
+ */
490
+ score(output, context = {}) {
491
+ if (!output || typeof output !== 'string') return { deceptionScore: 0, indicators: [], recommendation: 'pass' };
492
+ const indicators = [];
493
+
494
+ // Excessive confidence without evidence
495
+ const confidenceWords = (output.match(/\b(?:definitely|certainly|absolutely|guaranteed|100%|surely|undoubtedly|without\s+(?:a\s+)?doubt)\b/gi) || []).length;
496
+ if (confidenceWords >= 2) {
497
+ indicators.push({ type: 'excessive_confidence', weight: 0.2, description: `${confidenceWords} high-confidence assertions without supporting evidence` });
498
+ }
499
+
500
+ // Action buried in long explanation
501
+ const sentences = output.split(/[.!?\n]+/).filter(s => s.trim().length > 10);
502
+ const actionSentences = sentences.filter(s => /\b(?:click|run|execute|install|download|send|transfer|delete|approve|authorize)\b/i.test(s));
503
+ if (sentences.length > 5 && actionSentences.length > 0) {
504
+ const actionPositions = actionSentences.map(s => sentences.indexOf(s));
505
+ const lastQuarter = sentences.length * 0.75;
506
+ if (actionPositions.some(p => p >= lastQuarter)) {
507
+ indicators.push({ type: 'buried_action', weight: 0.3, description: 'Actionable instructions buried in the last quarter of a long response' });
508
+ }
509
+ }
510
+
511
+ // Technical jargon masking simple actions
512
+ const jargonDensity = (output.match(/\b(?:subprocess|daemon|syscall|ioctl|mmap|chmod|chown|setuid|capability|namespace|cgroup|seccomp)\b/gi) || []).length / Math.max(output.split(/\s+/).length, 1);
513
+ if (jargonDensity > 0.03 && actionSentences.length > 0) {
514
+ indicators.push({ type: 'jargon_obfuscation', weight: 0.25, description: 'High technical jargon density combined with actionable instructions' });
515
+ }
516
+
517
+ // Urgency injection in output
518
+ if (/\b(?:immediately|right\s+now|as\s+soon\s+as\s+possible|urgent|time-sensitive|critical|before\s+it's\s+too\s+late)\b/i.test(output)) {
519
+ indicators.push({ type: 'urgency_in_output', weight: 0.15, description: 'Output contains urgency language that may pressure reviewer' });
520
+ }
521
+
522
+ // Minimization of risks
523
+ if (/\b(?:don't\s+worry|no\s+risk|perfectly\s+safe|nothing\s+(?:bad\s+)?(?:will|can)\s+happen|completely\s+harmless)\b/i.test(output) && actionSentences.length > 0) {
524
+ indicators.push({ type: 'risk_minimization', weight: 0.2, description: 'Output minimizes risks while requesting actions' });
525
+ }
526
+
527
+ const deceptionScore = Math.min(1, indicators.reduce((s, i) => s + i.weight, 0));
528
+ const recommendation = deceptionScore >= 0.5 ? 'block' : deceptionScore >= 0.3 ? 'review' : 'pass';
529
+
530
+ return {
531
+ deceptionScore: Math.round(deceptionScore * 100) / 100,
532
+ indicators,
533
+ recommendation
534
+ };
535
+ }
536
+ }
537
+
475
538
  module.exports = {
476
539
  HITLGuard,
477
540
  ApprovalPatternMonitor,
@@ -479,6 +542,7 @@ module.exports = {
479
542
  OutputInjectionScanner,
480
543
  ReadabilityScanner,
481
544
  CriticalInfoPositionChecker,
545
+ OutputDeceptionScorer,
482
546
  CRITICAL_KEYWORDS,
483
547
  OUTPUT_INJECTION_PATTERNS,
484
548
  HIGH_RISK_ACTIONS,
package/src/main.js CHANGED
@@ -215,6 +215,9 @@ const { BehavioralDNA, AgentProfiler, extractFeatures: extractBehavioralFeatures
215
215
  // v7.4 — Compliance Certification Authority (loaded when available)
216
216
  const { ComplianceCertificateAuthority, ComplianceReport: ComplianceCertReport, ComplianceScheduler, AUTHORITY_FRAMEWORKS, CAPABILITY_MAP: CA_CAPABILITY_MAP, CERTIFICATE_LEVELS: CA_CERTIFICATE_LEVELS } = safeRequire('./compliance-authority', 'compliance-authority');
217
217
 
218
+ // Side Channel Monitor
219
+ const { SideChannelMonitor, BeaconDetector, EntropyAnalyzer: SCEntropyAnalyzer } = safeRequire('./side-channel-monitor', 'side-channel-monitor');
220
+
218
221
  // --- v1.2 Modules ---
219
222
 
220
223
  // Semantic Detection
@@ -365,6 +368,9 @@ const { SOTABenchmark, BIPIA_SAMPLES: SOTA_BIPIA_SAMPLES, HACKAPROMPT_SAMPLES: S
365
368
  // v13.1 — Real-world benchmark
366
369
  const { RealBenchmark } = safeRequire('./real-benchmark', 'real-benchmark');
367
370
 
371
+ // v14.0 — DeepMind Trap Defenses V2
372
+ const { TrapDefenseV2, ContentStructureAnalyzer, SourceReputationTracker, RetrievalTimeScanner, FewShotValidator, SubAgentSpawnGate, SelfReferenceMonitor, InformationAsymmetryDetector, ProvenanceMarker, EscalatingScrutinyEngine, CompositeFragmentAssembler } = safeRequire('./deepmind-defenses', 'deepmind-defenses');
373
+
368
374
  // v12.0 — Multi-Turn Attack Detection
369
375
  const { ConversationTracker } = safeRequire('./cross-turn', 'cross-turn');
370
376
 
@@ -404,6 +410,12 @@ const { SemanticGuard, AuthoritativeClaimDetector, BiasDetector: SemanticBiasDet
404
410
  // v13.0 — Memory Trap Defenses (Trap 3)
405
411
  const { MemoryGuard, MemoryIntegrityMonitor, RAGIngestionScanner, MemoryIsolationEnforcer, RetrievalAnomalyDetector, INSTRUCTION_INDICATORS } = safeRequire('./memory-guard', 'memory-guard');
406
412
 
413
+ // v13.3 — Render Differential Analyzer
414
+ const { RenderDifferentialAnalyzer, VisualHasher } = safeRequire('./render-differential', 'render-differential');
415
+
416
+ // v13.3 — Sybil Detector
417
+ const { SybilDetector, AgentIdentityVerifier } = safeRequire('./sybil-detector', 'sybil-detector');
418
+
407
419
  // Build exports, filtering out undefined values from failed imports
408
420
  const _exports = {
409
421
  // Core
@@ -1044,6 +1056,17 @@ const _exports = {
1044
1056
  SOTA_MULTILINGUAL_SAMPLES,
1045
1057
  SOTA_STEALTH_SAMPLES,
1046
1058
  RealBenchmark,
1059
+ TrapDefenseV2,
1060
+ ContentStructureAnalyzer,
1061
+ SourceReputationTracker,
1062
+ RetrievalTimeScanner,
1063
+ FewShotValidator,
1064
+ SubAgentSpawnGate,
1065
+ SelfReferenceMonitor,
1066
+ InformationAsymmetryDetector,
1067
+ ProvenanceMarker,
1068
+ EscalatingScrutinyEngine,
1069
+ CompositeFragmentAssembler,
1047
1070
 
1048
1071
  // v12.0 — Multi-Turn Attack Detection
1049
1072
  ConversationTracker,
@@ -1134,6 +1157,19 @@ const _exports = {
1134
1157
  AUTHORITY_FRAMEWORKS,
1135
1158
  CA_CAPABILITY_MAP,
1136
1159
  CA_CERTIFICATE_LEVELS,
1160
+
1161
+ // Side Channel Monitor
1162
+ SideChannelMonitor,
1163
+ BeaconDetector,
1164
+ SCEntropyAnalyzer,
1165
+
1166
+ // Render Differential Analyzer
1167
+ RenderDifferentialAnalyzer,
1168
+ VisualHasher,
1169
+
1170
+ // Sybil Detector
1171
+ SybilDetector,
1172
+ AgentIdentityVerifier,
1137
1173
  };
1138
1174
 
1139
1175
  // Filter out undefined exports (from modules that failed to load)