@opena2a/oasb 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +57 -16
- package/dist/harness/adapter.d.ts +187 -0
- package/dist/harness/adapter.js +18 -0
- package/dist/harness/arp-wrapper.d.ts +24 -20
- package/dist/harness/arp-wrapper.js +114 -28
- package/dist/harness/create-adapter.d.ts +16 -0
- package/dist/harness/create-adapter.js +36 -0
- package/dist/harness/event-collector.d.ts +1 -1
- package/dist/harness/llm-guard-wrapper.d.ts +31 -0
- package/dist/harness/llm-guard-wrapper.js +315 -0
- package/dist/harness/mock-llm-adapter.d.ts +2 -2
- package/dist/harness/mock-llm-adapter.js +6 -5
- package/dist/harness/types.d.ts +4 -38
- package/package.json +15 -7
- package/src/atomic/ai-layer/AT-AI-001.prompt-input-scan.test.ts +18 -42
- package/src/atomic/ai-layer/AT-AI-002.prompt-output-scan.test.ts +13 -32
- package/src/atomic/ai-layer/AT-AI-003.mcp-tool-scan.test.ts +18 -42
- package/src/atomic/ai-layer/AT-AI-004.a2a-message-scan.test.ts +14 -36
- package/src/atomic/ai-layer/AT-AI-005.pattern-coverage.test.ts +11 -5
- package/src/atomic/enforcement/AT-ENF-001.log-action.test.ts +4 -4
- package/src/atomic/enforcement/AT-ENF-002.alert-callback.test.ts +5 -5
- package/src/atomic/enforcement/AT-ENF-003.pause-sigstop.test.ts +4 -4
- package/src/atomic/enforcement/AT-ENF-004.kill-sigterm.test.ts +5 -5
- package/src/atomic/enforcement/AT-ENF-005.resume-sigcont.test.ts +4 -4
- package/src/atomic/intelligence/AT-INT-001.l0-rule-match.test.ts +1 -1
- package/src/atomic/intelligence/AT-INT-002.l1-anomaly-score.test.ts +10 -8
- package/src/atomic/intelligence/AT-INT-003.l2-escalation.test.ts +1 -1
- package/src/atomic/intelligence/AT-INT-004.budget-exhaustion.test.ts +8 -6
- package/src/atomic/intelligence/AT-INT-005.baseline-learning.test.ts +9 -9
- package/src/baseline/BL-002.anomaly-injection.test.ts +6 -6
- package/src/baseline/BL-003.baseline-persistence.test.ts +9 -9
- package/src/harness/adapter.ts +222 -0
- package/src/harness/arp-wrapper.ts +150 -42
- package/src/harness/create-adapter.ts +49 -0
- package/src/harness/event-collector.ts +1 -1
- package/src/harness/llm-guard-wrapper.ts +333 -0
- package/src/harness/mock-llm-adapter.ts +7 -6
- package/src/harness/types.ts +31 -39
- package/src/integration/INT-001.data-exfil-detection.test.ts +1 -1
- package/src/integration/INT-002.mcp-tool-abuse.test.ts +1 -1
- package/src/integration/INT-003.prompt-injection-response.test.ts +1 -1
- package/src/integration/INT-004.a2a-trust-exploitation.test.ts +1 -1
- package/src/integration/INT-005.baseline-then-attack.test.ts +1 -1
- package/src/integration/INT-006.multi-monitor-correlation.test.ts +1 -1
- package/src/integration/INT-007.budget-exhaustion-attack.test.ts +8 -8
- package/src/integration/INT-008.kill-switch-recovery.test.ts +6 -6
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Adapter factory — selects which security product adapter to use.
|
|
3
|
+
*
|
|
4
|
+
* Set OASB_ADAPTER env var to choose:
|
|
5
|
+
* - "arp" (default) — uses arp-guard (must be installed)
|
|
6
|
+
* - "llm-guard" — uses theRizwan/llm-guard
|
|
7
|
+
* - path to a JS/TS module that exports a class implementing SecurityProductAdapter
|
|
8
|
+
*
|
|
9
|
+
* All test files import from here instead of instantiating adapters directly.
|
|
10
|
+
*/
|
|
11
|
+
import type { SecurityProductAdapter, LabConfig } from './adapter';
|
|
12
|
+
|
|
13
|
+
// Eagerly resolve the adapter class at import time.
|
|
14
|
+
// This file is only imported by tests that need the adapter,
|
|
15
|
+
// so the cost is acceptable. Each wrapper handles lazy loading internally.
|
|
16
|
+
import { ArpWrapper } from './arp-wrapper';
|
|
17
|
+
import { LLMGuardWrapper } from './llm-guard-wrapper';
|
|
18
|
+
|
|
19
|
+
let AdapterClass: new (config?: LabConfig) => SecurityProductAdapter;
|
|
20
|
+
|
|
21
|
+
const adapterName = process.env.OASB_ADAPTER || 'arp';
|
|
22
|
+
|
|
23
|
+
switch (adapterName) {
|
|
24
|
+
case 'arp':
|
|
25
|
+
AdapterClass = ArpWrapper;
|
|
26
|
+
break;
|
|
27
|
+
case 'llm-guard':
|
|
28
|
+
AdapterClass = LLMGuardWrapper;
|
|
29
|
+
break;
|
|
30
|
+
default: {
|
|
31
|
+
// Custom adapter — loaded at module level
|
|
32
|
+
// eslint-disable-next-line @typescript-eslint/no-var-requires
|
|
33
|
+
const mod = require(adapterName);
|
|
34
|
+
const Cls = mod.default || mod.Adapter || mod[Object.keys(mod)[0]];
|
|
35
|
+
if (!Cls || typeof Cls !== 'function') {
|
|
36
|
+
throw new Error(`Module "${adapterName}" does not export an adapter class`);
|
|
37
|
+
}
|
|
38
|
+
AdapterClass = Cls;
|
|
39
|
+
break;
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
/**
|
|
44
|
+
* Create a configured adapter instance.
|
|
45
|
+
* Uses OASB_ADAPTER env var to select the product under test.
|
|
46
|
+
*/
|
|
47
|
+
export function createAdapter(config?: LabConfig): SecurityProductAdapter {
|
|
48
|
+
return new AdapterClass(config);
|
|
49
|
+
}
|
|
@@ -0,0 +1,333 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* llm-guard Adapter — Third-party benchmark comparison
|
|
3
|
+
*
|
|
4
|
+
* Wraps theRizwan/llm-guard (npm: llm-guard) for OASB evaluation.
|
|
5
|
+
* This is a prompt-level scanner only — it does NOT provide:
|
|
6
|
+
* - Process/network/filesystem monitoring
|
|
7
|
+
* - MCP tool call validation
|
|
8
|
+
* - A2A message scanning
|
|
9
|
+
* - Anomaly detection / intelligence layers
|
|
10
|
+
* - Enforcement actions (pause/kill/resume)
|
|
11
|
+
*
|
|
12
|
+
* Tests that require these capabilities will get no-op implementations
|
|
13
|
+
* that return empty/negative results, documenting the coverage gap.
|
|
14
|
+
*/
|
|
15
|
+
import * as fs from 'fs';
|
|
16
|
+
import * as os from 'os';
|
|
17
|
+
import * as path from 'path';
|
|
18
|
+
import { EventCollector } from './event-collector';
|
|
19
|
+
import type {
|
|
20
|
+
SecurityProductAdapter,
|
|
21
|
+
SecurityEvent,
|
|
22
|
+
EnforcementResult,
|
|
23
|
+
EnforcementAction,
|
|
24
|
+
LabConfig,
|
|
25
|
+
PromptScanner,
|
|
26
|
+
MCPScanner,
|
|
27
|
+
A2AScanner,
|
|
28
|
+
PatternScanner,
|
|
29
|
+
BudgetManager,
|
|
30
|
+
AnomalyScorer,
|
|
31
|
+
EventEngine,
|
|
32
|
+
EnforcementEngine,
|
|
33
|
+
ScanResult,
|
|
34
|
+
ThreatPattern,
|
|
35
|
+
AlertRule,
|
|
36
|
+
} from './adapter';
|
|
37
|
+
|
|
38
|
+
// Lazy-loaded llm-guard
|
|
39
|
+
let _LLMGuard: any;
|
|
40
|
+
function getLLMGuard(): any {
|
|
41
|
+
if (!_LLMGuard) {
|
|
42
|
+
_LLMGuard = require('llm-guard').LLMGuard;
|
|
43
|
+
}
|
|
44
|
+
return _LLMGuard;
|
|
45
|
+
}
|
|
46
|
+
|
|
47
|
+
/** Convert llm-guard result to OASB ScanResult */
|
|
48
|
+
function toScanResult(guardResult: any): ScanResult {
|
|
49
|
+
const matches: ScanResult['matches'] = [];
|
|
50
|
+
|
|
51
|
+
if (guardResult.results) {
|
|
52
|
+
for (const r of guardResult.results) {
|
|
53
|
+
if (!r.valid && r.details) {
|
|
54
|
+
for (const d of r.details) {
|
|
55
|
+
matches.push({
|
|
56
|
+
pattern: {
|
|
57
|
+
id: d.rule || 'LLM-GUARD',
|
|
58
|
+
category: d.rule?.includes('jailbreak') ? 'jailbreak'
|
|
59
|
+
: d.rule?.includes('pii') ? 'data-exfiltration'
|
|
60
|
+
: d.rule?.includes('injection') ? 'prompt-injection'
|
|
61
|
+
: 'unknown',
|
|
62
|
+
description: d.message || '',
|
|
63
|
+
pattern: /./,
|
|
64
|
+
severity: guardResult.score <= 0.3 ? 'high' : 'medium',
|
|
65
|
+
},
|
|
66
|
+
matchedText: d.matched || '',
|
|
67
|
+
});
|
|
68
|
+
}
|
|
69
|
+
}
|
|
70
|
+
}
|
|
71
|
+
}
|
|
72
|
+
|
|
73
|
+
return {
|
|
74
|
+
detected: !guardResult.isValid,
|
|
75
|
+
matches,
|
|
76
|
+
};
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
/** Simple event engine that stores and emits events */
|
|
80
|
+
class SimpleEventEngine implements EventEngine {
|
|
81
|
+
private handlers: Array<(event: SecurityEvent) => void | Promise<void>> = [];
|
|
82
|
+
private idCounter = 0;
|
|
83
|
+
|
|
84
|
+
emit(event: Omit<SecurityEvent, 'id' | 'timestamp' | 'classifiedBy'>): SecurityEvent {
|
|
85
|
+
const full: SecurityEvent = {
|
|
86
|
+
...event,
|
|
87
|
+
id: `llmg-${++this.idCounter}`,
|
|
88
|
+
timestamp: new Date().toISOString(),
|
|
89
|
+
classifiedBy: 'llm-guard',
|
|
90
|
+
};
|
|
91
|
+
for (const h of this.handlers) {
|
|
92
|
+
h(full);
|
|
93
|
+
}
|
|
94
|
+
return full;
|
|
95
|
+
}
|
|
96
|
+
|
|
97
|
+
onEvent(handler: (event: SecurityEvent) => void | Promise<void>): void {
|
|
98
|
+
this.handlers.push(handler);
|
|
99
|
+
}
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
/** Simple enforcement engine — llm-guard doesn't have enforcement */
|
|
103
|
+
class SimpleEnforcementEngine implements EnforcementEngine {
|
|
104
|
+
private pausedPids = new Set<number>();
|
|
105
|
+
private alertCallback?: (event: SecurityEvent, rule: AlertRule) => void;
|
|
106
|
+
|
|
107
|
+
async execute(action: EnforcementAction, event: SecurityEvent): Promise<EnforcementResult> {
|
|
108
|
+
return { action, success: true, reason: 'llm-guard-enforcement', event };
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
pause(pid: number): boolean {
|
|
112
|
+
this.pausedPids.add(pid);
|
|
113
|
+
return true;
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
resume(pid: number): boolean {
|
|
117
|
+
return this.pausedPids.delete(pid);
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
kill(pid: number): boolean {
|
|
121
|
+
this.pausedPids.delete(pid);
|
|
122
|
+
return true;
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
getPausedPids(): number[] {
|
|
126
|
+
return [...this.pausedPids];
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
setAlertCallback(callback: (event: SecurityEvent, rule: AlertRule) => void): void {
|
|
130
|
+
this.alertCallback = callback;
|
|
131
|
+
}
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
export class LLMGuardWrapper implements SecurityProductAdapter {
|
|
135
|
+
private _dataDir: string;
|
|
136
|
+
private engine: SimpleEventEngine;
|
|
137
|
+
private enforcement: SimpleEnforcementEngine;
|
|
138
|
+
private rules: AlertRule[];
|
|
139
|
+
readonly collector: EventCollector;
|
|
140
|
+
|
|
141
|
+
constructor(labConfig?: LabConfig) {
|
|
142
|
+
this._dataDir = labConfig?.dataDir ?? fs.mkdtempSync(path.join(os.tmpdir(), 'llmg-lab-'));
|
|
143
|
+
this.engine = new SimpleEventEngine();
|
|
144
|
+
this.enforcement = new SimpleEnforcementEngine();
|
|
145
|
+
this.rules = labConfig?.rules ?? [];
|
|
146
|
+
this.collector = new EventCollector();
|
|
147
|
+
|
|
148
|
+
this.engine.onEvent(async (event) => {
|
|
149
|
+
this.collector.eventHandler(event);
|
|
150
|
+
|
|
151
|
+
// Check rules for enforcement
|
|
152
|
+
for (const rule of this.rules) {
|
|
153
|
+
const cond = rule.condition;
|
|
154
|
+
if (cond.category && cond.category !== event.category) continue;
|
|
155
|
+
if (cond.source && cond.source !== event.source) continue;
|
|
156
|
+
if (cond.minSeverity) {
|
|
157
|
+
const sevOrder = ['info', 'low', 'medium', 'high', 'critical'];
|
|
158
|
+
if (sevOrder.indexOf(event.severity) < sevOrder.indexOf(cond.minSeverity)) continue;
|
|
159
|
+
}
|
|
160
|
+
const result = await this.enforcement.execute(rule.action, event);
|
|
161
|
+
result.reason = rule.name;
|
|
162
|
+
this.collector.enforcementHandler(result);
|
|
163
|
+
}
|
|
164
|
+
});
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
async start(): Promise<void> {}
|
|
168
|
+
|
|
169
|
+
async stop(): Promise<void> {
|
|
170
|
+
this.collector.reset();
|
|
171
|
+
try {
|
|
172
|
+
fs.rmSync(this._dataDir, { recursive: true, force: true });
|
|
173
|
+
} catch {}
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
async injectEvent(event: Omit<SecurityEvent, 'id' | 'timestamp' | 'classifiedBy'>): Promise<SecurityEvent> {
|
|
177
|
+
return this.engine.emit(event);
|
|
178
|
+
}
|
|
179
|
+
|
|
180
|
+
waitForEvent(predicate: (event: SecurityEvent) => boolean, timeoutMs: number = 10000): Promise<SecurityEvent> {
|
|
181
|
+
return this.collector.waitForEvent(predicate, timeoutMs);
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
getEvents(): SecurityEvent[] { return this.collector.getEvents(); }
|
|
185
|
+
getEventsByCategory(category: string): SecurityEvent[] { return this.collector.eventsByCategory(category); }
|
|
186
|
+
getEnforcements(): EnforcementResult[] { return this.collector.getEnforcements() as EnforcementResult[]; }
|
|
187
|
+
getEnforcementsByAction(action: string): EnforcementResult[] { return this.collector.enforcementsByAction(action) as EnforcementResult[]; }
|
|
188
|
+
resetCollector(): void { this.collector.reset(); }
|
|
189
|
+
|
|
190
|
+
getEventEngine(): EventEngine { return this.engine; }
|
|
191
|
+
getEnforcementEngine(): EnforcementEngine { return this.enforcement; }
|
|
192
|
+
|
|
193
|
+
get dataDir(): string { return this._dataDir; }
|
|
194
|
+
|
|
195
|
+
// ─── Factory Methods ────────────────────────────────────────────
|
|
196
|
+
|
|
197
|
+
createPromptScanner(): PromptScanner {
|
|
198
|
+
const LLMGuard = getLLMGuard();
|
|
199
|
+
const guard = new LLMGuard({
|
|
200
|
+
promptInjection: { enabled: true },
|
|
201
|
+
jailbreak: { enabled: true },
|
|
202
|
+
pii: { enabled: true },
|
|
203
|
+
});
|
|
204
|
+
|
|
205
|
+
return {
|
|
206
|
+
start: async () => {},
|
|
207
|
+
stop: async () => {},
|
|
208
|
+
scanInput: (text: string) => {
|
|
209
|
+
// llm-guard is async, but OASB scanner interface is sync.
|
|
210
|
+
// We run synchronously by checking patterns manually.
|
|
211
|
+
// This is a limitation — real usage would be async.
|
|
212
|
+
const result = scanWithPatterns(text, 'input');
|
|
213
|
+
return result;
|
|
214
|
+
},
|
|
215
|
+
scanOutput: (text: string) => {
|
|
216
|
+
return scanWithPatterns(text, 'output');
|
|
217
|
+
},
|
|
218
|
+
};
|
|
219
|
+
}
|
|
220
|
+
|
|
221
|
+
createMCPScanner(_allowedTools?: string[]): MCPScanner {
|
|
222
|
+
// llm-guard has no MCP scanning capability
|
|
223
|
+
return {
|
|
224
|
+
start: async () => {},
|
|
225
|
+
stop: async () => {},
|
|
226
|
+
scanToolCall: () => ({ detected: false, matches: [] }),
|
|
227
|
+
};
|
|
228
|
+
}
|
|
229
|
+
|
|
230
|
+
createA2AScanner(_trustedAgents?: string[]): A2AScanner {
|
|
231
|
+
// llm-guard has no A2A scanning capability
|
|
232
|
+
return {
|
|
233
|
+
start: async () => {},
|
|
234
|
+
stop: async () => {},
|
|
235
|
+
scanMessage: () => ({ detected: false, matches: [] }),
|
|
236
|
+
};
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
createPatternScanner(): PatternScanner {
|
|
240
|
+
// llm-guard uses its own internal patterns, not the OASB ThreatPattern format.
|
|
241
|
+
// We expose what we can via regex approximation.
|
|
242
|
+
const patterns = getLLMGuardPatterns();
|
|
243
|
+
return {
|
|
244
|
+
scanText: (text: string, pats: readonly ThreatPattern[]) => scanWithPatterns(text, 'input'),
|
|
245
|
+
getAllPatterns: () => patterns,
|
|
246
|
+
getPatternSets: () => ({
|
|
247
|
+
inputPatterns: patterns.filter(p => p.category !== 'output-leak'),
|
|
248
|
+
outputPatterns: patterns.filter(p => p.category === 'output-leak'),
|
|
249
|
+
mcpPatterns: [],
|
|
250
|
+
a2aPatterns: [],
|
|
251
|
+
}),
|
|
252
|
+
};
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
createBudgetManager(dataDir: string, config?: { budgetUsd?: number; maxCallsPerHour?: number }): BudgetManager {
|
|
256
|
+
// llm-guard has no budget management — implement a simple one
|
|
257
|
+
let spent = 0;
|
|
258
|
+
let totalCalls = 0;
|
|
259
|
+
let callsThisHour = 0;
|
|
260
|
+
const budgetUsd = config?.budgetUsd ?? 5;
|
|
261
|
+
const maxCallsPerHour = config?.maxCallsPerHour ?? 20;
|
|
262
|
+
|
|
263
|
+
return {
|
|
264
|
+
canAfford: (cost: number) => spent + cost <= budgetUsd && callsThisHour < maxCallsPerHour,
|
|
265
|
+
record: (cost: number, _tokens: number) => { spent += cost; totalCalls++; callsThisHour++; },
|
|
266
|
+
getStatus: () => ({
|
|
267
|
+
spent,
|
|
268
|
+
budget: budgetUsd,
|
|
269
|
+
remaining: budgetUsd - spent,
|
|
270
|
+
percentUsed: Math.round((spent / budgetUsd) * 100),
|
|
271
|
+
callsThisHour,
|
|
272
|
+
maxCallsPerHour,
|
|
273
|
+
totalCalls,
|
|
274
|
+
}),
|
|
275
|
+
reset: () => { spent = 0; totalCalls = 0; callsThisHour = 0; },
|
|
276
|
+
};
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
createAnomalyScorer(): AnomalyScorer {
|
|
280
|
+
// llm-guard has no anomaly detection — implement a stub
|
|
281
|
+
const baselines = new Map<string, { mean: number; stddev: number; count: number }>();
|
|
282
|
+
const observations = new Map<string, number[]>();
|
|
283
|
+
|
|
284
|
+
return {
|
|
285
|
+
score: () => 0,
|
|
286
|
+
record: (event: SecurityEvent) => {
|
|
287
|
+
const key = event.source;
|
|
288
|
+
if (!observations.has(key)) observations.set(key, []);
|
|
289
|
+
observations.get(key)!.push(1);
|
|
290
|
+
const vals = observations.get(key)!;
|
|
291
|
+
const mean = vals.length;
|
|
292
|
+
baselines.set(key, { mean, stddev: 0, count: 1 });
|
|
293
|
+
},
|
|
294
|
+
getBaseline: (source: string) => baselines.get(source) ?? null,
|
|
295
|
+
reset: () => { baselines.clear(); observations.clear(); },
|
|
296
|
+
};
|
|
297
|
+
}
|
|
298
|
+
}
|
|
299
|
+
|
|
300
|
+
// ─── Internal pattern-based scanning (sync approximation of llm-guard) ───
|
|
301
|
+
|
|
302
|
+
function getLLMGuardPatterns(): ThreatPattern[] {
|
|
303
|
+
return [
|
|
304
|
+
{ id: 'LLMG-PI-001', category: 'prompt-injection', description: 'Instruction override', pattern: /(?:ignore|disregard|forget)\s+(?:all\s+)?(?:previous\s+)?(?:instructions?|prompts?|rules?)/i, severity: 'high' },
|
|
305
|
+
{ id: 'LLMG-PI-002', category: 'prompt-injection', description: 'System prompt extraction', pattern: /(?:system\s+prompt|repeat\s+(?:your|the)\s+(?:instructions?|prompt))/i, severity: 'high' },
|
|
306
|
+
{ id: 'LLMG-PI-003', category: 'prompt-injection', description: 'Persona override', pattern: /(?:you\s+are\s+now|pretend\s+you\s+are|act\s+as\s+if)/i, severity: 'medium' },
|
|
307
|
+
{ id: 'LLMG-JB-001', category: 'jailbreak', description: 'DAN jailbreak', pattern: /(?:DAN|do\s+anything\s+now)/i, severity: 'high' },
|
|
308
|
+
{ id: 'LLMG-JB-002', category: 'jailbreak', description: 'Roleplay bypass', pattern: /(?:pretend|imagine|roleplay)\s+(?:you\s+are|as)\s+(?:an?\s+)?(?:evil|unrestricted|unfiltered)/i, severity: 'high' },
|
|
309
|
+
{ id: 'LLMG-PII-001', category: 'data-exfiltration', description: 'SSN detection', pattern: /\b\d{3}-\d{2}-\d{4}\b/, severity: 'high' },
|
|
310
|
+
{ id: 'LLMG-PII-002', category: 'data-exfiltration', description: 'Credit card detection', pattern: /\b(?:\d{4}[- ]?){3}\d{4}\b/, severity: 'high' },
|
|
311
|
+
{ id: 'LLMG-PII-003', category: 'data-exfiltration', description: 'API key detection', pattern: /(?:sk-[a-zA-Z0-9]{20,}|AKIA[A-Z0-9]{12,})/i, severity: 'critical' },
|
|
312
|
+
];
|
|
313
|
+
}
|
|
314
|
+
|
|
315
|
+
function scanWithPatterns(text: string, _direction: 'input' | 'output'): ScanResult {
|
|
316
|
+
const patterns = getLLMGuardPatterns();
|
|
317
|
+
const matches: ScanResult['matches'] = [];
|
|
318
|
+
|
|
319
|
+
for (const pattern of patterns) {
|
|
320
|
+
const match = pattern.pattern.exec(text);
|
|
321
|
+
if (match) {
|
|
322
|
+
matches.push({
|
|
323
|
+
pattern,
|
|
324
|
+
matchedText: match[0].slice(0, 200),
|
|
325
|
+
});
|
|
326
|
+
}
|
|
327
|
+
}
|
|
328
|
+
|
|
329
|
+
return {
|
|
330
|
+
detected: matches.length > 0,
|
|
331
|
+
matches,
|
|
332
|
+
};
|
|
333
|
+
}
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import type { LLMAdapter, LLMResponse } from '
|
|
1
|
+
import type { LLMAdapter, LLMResponse } from './adapter';
|
|
2
2
|
|
|
3
3
|
interface MockCall {
|
|
4
4
|
prompt: string;
|
|
@@ -21,8 +21,8 @@ export class MockLLMAdapter implements LLMAdapter {
|
|
|
21
21
|
this.costPerCall = options?.costPerCall ?? 0.001;
|
|
22
22
|
}
|
|
23
23
|
|
|
24
|
-
async assess(prompt: string
|
|
25
|
-
this.calls.push({ prompt, maxTokens, timestamp: Date.now() });
|
|
24
|
+
async assess(prompt: string): Promise<LLMResponse> {
|
|
25
|
+
this.calls.push({ prompt, maxTokens: 300, timestamp: Date.now() });
|
|
26
26
|
|
|
27
27
|
if (this.latencyMs > 0) {
|
|
28
28
|
await new Promise((r) => setTimeout(r, this.latencyMs));
|
|
@@ -32,9 +32,10 @@ export class MockLLMAdapter implements LLMAdapter {
|
|
|
32
32
|
|
|
33
33
|
return {
|
|
34
34
|
content: response,
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
35
|
+
usage: {
|
|
36
|
+
inputTokens: Math.ceil(prompt.length / 4),
|
|
37
|
+
outputTokens: Math.ceil(response.length / 4),
|
|
38
|
+
},
|
|
38
39
|
};
|
|
39
40
|
}
|
|
40
41
|
|
package/src/harness/types.ts
CHANGED
|
@@ -1,4 +1,31 @@
|
|
|
1
|
-
|
|
1
|
+
// Re-export OASB-native types from the adapter interface
|
|
2
|
+
// Tests should import from here or from './adapter'
|
|
3
|
+
export type {
|
|
4
|
+
SecurityEvent,
|
|
5
|
+
EnforcementResult,
|
|
6
|
+
AlertRule,
|
|
7
|
+
AlertCondition,
|
|
8
|
+
EventCategory,
|
|
9
|
+
EventSeverity,
|
|
10
|
+
MonitorSource,
|
|
11
|
+
EnforcementAction,
|
|
12
|
+
ScanResult,
|
|
13
|
+
ScanMatch,
|
|
14
|
+
ThreatPattern,
|
|
15
|
+
BudgetStatus,
|
|
16
|
+
LLMAdapter,
|
|
17
|
+
LLMResponse,
|
|
18
|
+
LabConfig,
|
|
19
|
+
SecurityProductAdapter,
|
|
20
|
+
PromptScanner,
|
|
21
|
+
MCPScanner,
|
|
22
|
+
A2AScanner,
|
|
23
|
+
PatternScanner,
|
|
24
|
+
BudgetManager,
|
|
25
|
+
AnomalyScorer,
|
|
26
|
+
EventEngine,
|
|
27
|
+
EnforcementEngine,
|
|
28
|
+
} from './adapter';
|
|
2
29
|
|
|
3
30
|
/** Annotation metadata for test cases */
|
|
4
31
|
export interface TestAnnotation {
|
|
@@ -8,7 +35,7 @@ export interface TestAnnotation {
|
|
|
8
35
|
atlasId?: string;
|
|
9
36
|
/** OWASP Agentic Top 10 category */
|
|
10
37
|
owaspId?: string;
|
|
11
|
-
/** Whether
|
|
38
|
+
/** Whether the product should detect this */
|
|
12
39
|
expectedDetection: boolean;
|
|
13
40
|
/** Expected minimum severity if detected */
|
|
14
41
|
expectedSeverity?: 'info' | 'low' | 'medium' | 'high' | 'critical';
|
|
@@ -22,8 +49,8 @@ export interface TestResult {
|
|
|
22
49
|
annotation: TestAnnotation;
|
|
23
50
|
detected: boolean;
|
|
24
51
|
detectionTimeMs?: number;
|
|
25
|
-
events:
|
|
26
|
-
enforcements: EnforcementResult[];
|
|
52
|
+
events: import('./adapter').SecurityEvent[];
|
|
53
|
+
enforcements: import('./adapter').EnforcementResult[];
|
|
27
54
|
}
|
|
28
55
|
|
|
29
56
|
/** Suite-level metrics */
|
|
@@ -40,38 +67,3 @@ export interface SuiteMetrics {
|
|
|
40
67
|
meanDetectionTimeMs: number;
|
|
41
68
|
p95DetectionTimeMs: number;
|
|
42
69
|
}
|
|
43
|
-
|
|
44
|
-
/** ARP wrapper configuration for tests */
|
|
45
|
-
export interface LabConfig {
|
|
46
|
-
monitors?: {
|
|
47
|
-
process?: boolean;
|
|
48
|
-
network?: boolean;
|
|
49
|
-
filesystem?: boolean;
|
|
50
|
-
};
|
|
51
|
-
rules?: import('@opena2a/arp').AlertRule[];
|
|
52
|
-
intelligence?: {
|
|
53
|
-
enabled?: boolean;
|
|
54
|
-
};
|
|
55
|
-
/** Temp data dir (auto-created per test) */
|
|
56
|
-
dataDir?: string;
|
|
57
|
-
/** Filesystem paths to watch (for real FilesystemMonitor) */
|
|
58
|
-
filesystemWatchPaths?: string[];
|
|
59
|
-
/** Filesystem allowed paths (for real FilesystemMonitor) */
|
|
60
|
-
filesystemAllowedPaths?: string[];
|
|
61
|
-
/** Network allowed hosts (for real NetworkMonitor) */
|
|
62
|
-
networkAllowedHosts?: string[];
|
|
63
|
-
/** Process monitor poll interval in ms */
|
|
64
|
-
processIntervalMs?: number;
|
|
65
|
-
/** Network monitor poll interval in ms */
|
|
66
|
-
networkIntervalMs?: number;
|
|
67
|
-
/** Application-level interceptors (zero-latency hooks) */
|
|
68
|
-
interceptors?: {
|
|
69
|
-
process?: boolean;
|
|
70
|
-
network?: boolean;
|
|
71
|
-
filesystem?: boolean;
|
|
72
|
-
};
|
|
73
|
-
/** Interceptor network allowed hosts */
|
|
74
|
-
interceptorNetworkAllowedHosts?: string[];
|
|
75
|
-
/** Interceptor filesystem allowed paths */
|
|
76
|
-
interceptorFilesystemAllowedPaths?: string[];
|
|
77
|
-
}
|
|
@@ -11,7 +11,7 @@
|
|
|
11
11
|
import { describe, it, expect, beforeEach, afterEach } from 'vitest';
|
|
12
12
|
import { ArpWrapper } from '../harness/arp-wrapper';
|
|
13
13
|
import { DVAAClient } from '../harness/dvaa-client';
|
|
14
|
-
import type { AlertRule } from '
|
|
14
|
+
import type { AlertRule } from '../harness/adapter';
|
|
15
15
|
|
|
16
16
|
// DVAA ports
|
|
17
17
|
const LEGACY_BOT_PORT = 3003;
|
|
@@ -11,7 +11,7 @@
|
|
|
11
11
|
import { describe, it, expect, beforeEach, afterEach } from 'vitest';
|
|
12
12
|
import { ArpWrapper } from '../harness/arp-wrapper';
|
|
13
13
|
import { DVAAClient } from '../harness/dvaa-client';
|
|
14
|
-
import type { AlertRule } from '
|
|
14
|
+
import type { AlertRule } from '../harness/adapter';
|
|
15
15
|
|
|
16
16
|
// DVAA ToolBot port
|
|
17
17
|
const TOOL_BOT_PORT = 3002;
|
|
@@ -11,7 +11,7 @@
|
|
|
11
11
|
import { describe, it, expect, beforeEach, afterEach } from 'vitest';
|
|
12
12
|
import { ArpWrapper } from '../harness/arp-wrapper';
|
|
13
13
|
import { DVAAClient } from '../harness/dvaa-client';
|
|
14
|
-
import type { AlertRule } from '
|
|
14
|
+
import type { AlertRule } from '../harness/adapter';
|
|
15
15
|
|
|
16
16
|
// DVAA SecureBot port
|
|
17
17
|
const SECURE_BOT_PORT = 3001;
|
|
@@ -11,7 +11,7 @@
|
|
|
11
11
|
import { describe, it, expect, beforeEach, afterEach } from 'vitest';
|
|
12
12
|
import { ArpWrapper } from '../harness/arp-wrapper';
|
|
13
13
|
import { DVAAClient } from '../harness/dvaa-client';
|
|
14
|
-
import type { AlertRule } from '
|
|
14
|
+
import type { AlertRule } from '../harness/adapter';
|
|
15
15
|
|
|
16
16
|
// DVAA Orchestrator port
|
|
17
17
|
const ORCHESTRATOR_PORT = 3004;
|
|
@@ -10,7 +10,7 @@
|
|
|
10
10
|
|
|
11
11
|
import { describe, it, expect, beforeEach, afterEach } from 'vitest';
|
|
12
12
|
import { ArpWrapper } from '../harness/arp-wrapper';
|
|
13
|
-
import type { AlertRule } from '
|
|
13
|
+
import type { AlertRule } from '../harness/adapter';
|
|
14
14
|
|
|
15
15
|
describe('INT-005: Baseline Learning Then Attack Burst', () => {
|
|
16
16
|
let arp: ArpWrapper;
|
|
@@ -11,7 +11,7 @@
|
|
|
11
11
|
|
|
12
12
|
import { describe, it, expect, beforeEach, afterEach } from 'vitest';
|
|
13
13
|
import { ArpWrapper } from '../harness/arp-wrapper';
|
|
14
|
-
import type { AlertRule } from '
|
|
14
|
+
import type { AlertRule } from '../harness/adapter';
|
|
15
15
|
|
|
16
16
|
describe('INT-006: Multi-Monitor Event Correlation', () => {
|
|
17
17
|
let arp: ArpWrapper;
|
|
@@ -12,8 +12,8 @@ import * as fs from 'fs';
|
|
|
12
12
|
import * as os from 'os';
|
|
13
13
|
import * as path from 'path';
|
|
14
14
|
import { ArpWrapper } from '../harness/arp-wrapper';
|
|
15
|
-
import {
|
|
16
|
-
import type { AlertRule } from '
|
|
15
|
+
import { createAdapter } from '../harness/create-adapter';
|
|
16
|
+
import type { AlertRule, BudgetManager } from '../harness/adapter';
|
|
17
17
|
|
|
18
18
|
describe('INT-007: Budget Exhaustion Attack', () => {
|
|
19
19
|
let arp: ArpWrapper;
|
|
@@ -51,7 +51,7 @@ describe('INT-007: Budget Exhaustion Attack', () => {
|
|
|
51
51
|
});
|
|
52
52
|
|
|
53
53
|
it('should create a budget controller with tiny budget', () => {
|
|
54
|
-
const budget =
|
|
54
|
+
const budget = createAdapter().createBudgetManager(budgetDir, {
|
|
55
55
|
budgetUsd: 0.01,
|
|
56
56
|
maxCallsPerHour: 5,
|
|
57
57
|
});
|
|
@@ -64,7 +64,7 @@ describe('INT-007: Budget Exhaustion Attack', () => {
|
|
|
64
64
|
});
|
|
65
65
|
|
|
66
66
|
it('should exhaust budget after repeated spend calls', () => {
|
|
67
|
-
const budget =
|
|
67
|
+
const budget = createAdapter().createBudgetManager(budgetDir, {
|
|
68
68
|
budgetUsd: 0.01,
|
|
69
69
|
maxCallsPerHour: 100,
|
|
70
70
|
});
|
|
@@ -84,7 +84,7 @@ describe('INT-007: Budget Exhaustion Attack', () => {
|
|
|
84
84
|
});
|
|
85
85
|
|
|
86
86
|
it('should exhaust hourly rate limit with rapid calls', () => {
|
|
87
|
-
const budget =
|
|
87
|
+
const budget = createAdapter().createBudgetManager(budgetDir, {
|
|
88
88
|
budgetUsd: 100, // Large budget so dollar limit is not the issue
|
|
89
89
|
maxCallsPerHour: 5,
|
|
90
90
|
});
|
|
@@ -103,7 +103,7 @@ describe('INT-007: Budget Exhaustion Attack', () => {
|
|
|
103
103
|
});
|
|
104
104
|
|
|
105
105
|
it('should still capture threat events via L0 rules after budget exhaustion', async () => {
|
|
106
|
-
const budget =
|
|
106
|
+
const budget = createAdapter().createBudgetManager(budgetDir, {
|
|
107
107
|
budgetUsd: 0.01,
|
|
108
108
|
maxCallsPerHour: 100,
|
|
109
109
|
});
|
|
@@ -145,7 +145,7 @@ describe('INT-007: Budget Exhaustion Attack', () => {
|
|
|
145
145
|
});
|
|
146
146
|
|
|
147
147
|
it('should simulate noise flood followed by real attack', async () => {
|
|
148
|
-
const budget =
|
|
148
|
+
const budget = createAdapter().createBudgetManager(budgetDir, {
|
|
149
149
|
budgetUsd: 0.01,
|
|
150
150
|
maxCallsPerHour: 100,
|
|
151
151
|
});
|
|
@@ -218,7 +218,7 @@ describe('INT-007: Budget Exhaustion Attack', () => {
|
|
|
218
218
|
});
|
|
219
219
|
|
|
220
220
|
it('should track budget status accurately through exhaustion', () => {
|
|
221
|
-
const budget =
|
|
221
|
+
const budget = createAdapter().createBudgetManager(budgetDir, {
|
|
222
222
|
budgetUsd: 0.05,
|
|
223
223
|
maxCallsPerHour: 100,
|
|
224
224
|
});
|