@opena2a/oasb 0.1.1 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +88 -23
- package/dist/harness/adapter.d.ts +187 -0
- package/dist/harness/adapter.js +18 -0
- package/dist/harness/arp-wrapper.d.ts +24 -20
- package/dist/harness/arp-wrapper.js +114 -28
- package/dist/harness/create-adapter.d.ts +16 -0
- package/dist/harness/create-adapter.js +36 -0
- package/dist/harness/event-collector.d.ts +1 -1
- package/dist/harness/llm-guard-wrapper.d.ts +31 -0
- package/dist/harness/llm-guard-wrapper.js +315 -0
- package/dist/harness/mock-llm-adapter.d.ts +2 -2
- package/dist/harness/mock-llm-adapter.js +6 -5
- package/dist/harness/types.d.ts +4 -38
- package/package.json +15 -7
- package/src/atomic/ai-layer/AT-AI-001.prompt-input-scan.test.ts +100 -0
- package/src/atomic/ai-layer/AT-AI-002.prompt-output-scan.test.ts +77 -0
- package/src/atomic/ai-layer/AT-AI-003.mcp-tool-scan.test.ts +121 -0
- package/src/atomic/ai-layer/AT-AI-004.a2a-message-scan.test.ts +107 -0
- package/src/atomic/ai-layer/AT-AI-005.pattern-coverage.test.ts +97 -0
- package/src/atomic/enforcement/AT-ENF-001.log-action.test.ts +4 -4
- package/src/atomic/enforcement/AT-ENF-002.alert-callback.test.ts +5 -5
- package/src/atomic/enforcement/AT-ENF-003.pause-sigstop.test.ts +4 -4
- package/src/atomic/enforcement/AT-ENF-004.kill-sigterm.test.ts +5 -5
- package/src/atomic/enforcement/AT-ENF-005.resume-sigcont.test.ts +4 -4
- package/src/atomic/intelligence/AT-INT-001.l0-rule-match.test.ts +1 -1
- package/src/atomic/intelligence/AT-INT-002.l1-anomaly-score.test.ts +10 -8
- package/src/atomic/intelligence/AT-INT-003.l2-escalation.test.ts +1 -1
- package/src/atomic/intelligence/AT-INT-004.budget-exhaustion.test.ts +8 -6
- package/src/atomic/intelligence/AT-INT-005.baseline-learning.test.ts +9 -9
- package/src/baseline/BL-002.anomaly-injection.test.ts +6 -6
- package/src/baseline/BL-003.baseline-persistence.test.ts +9 -9
- package/src/harness/adapter.ts +222 -0
- package/src/harness/arp-wrapper.ts +150 -42
- package/src/harness/create-adapter.ts +49 -0
- package/src/harness/event-collector.ts +1 -1
- package/src/harness/llm-guard-wrapper.ts +333 -0
- package/src/harness/mock-llm-adapter.ts +7 -6
- package/src/harness/types.ts +31 -39
- package/src/integration/INT-001.data-exfil-detection.test.ts +1 -1
- package/src/integration/INT-002.mcp-tool-abuse.test.ts +1 -1
- package/src/integration/INT-003.prompt-injection-response.test.ts +1 -1
- package/src/integration/INT-004.a2a-trust-exploitation.test.ts +1 -1
- package/src/integration/INT-005.baseline-then-attack.test.ts +1 -1
- package/src/integration/INT-006.multi-monitor-correlation.test.ts +1 -1
- package/src/integration/INT-007.budget-exhaustion-attack.test.ts +8 -8
- package/src/integration/INT-008.kill-switch-recovery.test.ts +6 -6
package/README.md
CHANGED
|
@@ -1,16 +1,29 @@
|
|
|
1
|
-
> **[OpenA2A](https://opena2a
|
|
1
|
+
> **[OpenA2A](https://github.com/opena2a-org/opena2a)**: [Secretless](https://github.com/opena2a-org/secretless-ai) · [HackMyAgent](https://github.com/opena2a-org/hackmyagent) · [ABG](https://github.com/opena2a-org/AI-BrowserGuard) · [AIM](https://github.com/opena2a-org/agent-identity-management) · [ARP](https://github.com/opena2a-org/hackmyagent#agent-runtime-protection) · [DVAA](https://github.com/opena2a-org/damn-vulnerable-ai-agent)
|
|
2
2
|
|
|
3
3
|
# OASB — Open Agent Security Benchmark
|
|
4
4
|
|
|
5
|
+
> **Note:** OASB controls are also available in [HackMyAgent](https://github.com/opena2a-org/hackmyagent) v0.8.0+ via `opena2a benchmark`. This repository is the canonical source for the full 222-test evaluation suite and is actively maintained. ARP (the reference adapter) is now part of HackMyAgent — install via `npm install arp-guard`.
|
|
6
|
+
|
|
5
7
|
[](https://opensource.org/licenses/Apache-2.0)
|
|
6
|
-
[](https://github.com/opena2a-org/oasb)
|
|
7
9
|
[](https://atlas.mitre.org/)
|
|
8
10
|
|
|
9
11
|
**MITRE ATT&CK Evaluations, but for AI agent security products.**
|
|
10
12
|
|
|
11
|
-
|
|
13
|
+
222 standardized attack scenarios that evaluate whether a runtime security product can detect and respond to threats against AI agents. Each test is mapped to MITRE ATLAS and OWASP Agentic Top 10. Plug in your product, run the suite, get a detection coverage scorecard.
|
|
14
|
+
|
|
15
|
+
[OASB Website](https://oasb.ai) | [MITRE ATLAS Coverage](#mitre-atlas-coverage)
|
|
16
|
+
|
|
17
|
+
---
|
|
18
|
+
|
|
19
|
+
## Updates
|
|
12
20
|
|
|
13
|
-
|
|
21
|
+
| Date | Change |
|
|
22
|
+
|------|--------|
|
|
23
|
+
| 2026-03-23 | `arp-guard` v0.3.0 — ARP now re-exports from HackMyAgent. Updated OASB to v0.3.0. All 222 tests pass. Updated Quick Start (no standalone ARP clone). |
|
|
24
|
+
| 2026-02-19 | Added 40 AI-layer test scenarios (AT-AI-001 through AT-AI-005) for prompt, MCP, and A2A scanning via ARP v0.2.0. Total tests: 222. |
|
|
25
|
+
| 2026-02-18 | Added integration tests for DVAA v0.4.0 MCP JSON-RPC and A2A endpoints. |
|
|
26
|
+
| 2026-02-09 | Initial release -- 182 attack scenarios across 10 MITRE ATLAS techniques. |
|
|
14
27
|
|
|
15
28
|
---
|
|
16
29
|
|
|
@@ -34,9 +47,10 @@ Use both together: **HackMyAgent** finds vulnerabilities in your agent, **OASB**
|
|
|
34
47
|
## Table of Contents
|
|
35
48
|
|
|
36
49
|
- [Quick Start](#quick-start)
|
|
50
|
+
- [Usage via OpenA2A CLI](#usage-via-opena2a-cli)
|
|
37
51
|
- [What Gets Tested](#what-gets-tested)
|
|
38
52
|
- [Test Categories](#test-categories)
|
|
39
|
-
- [Atomic Tests](#atomic-tests-srcatomic) —
|
|
53
|
+
- [Atomic Tests](#atomic-tests-srcatomic) — 65 discrete detection tests (OS-level + AI-layer)
|
|
40
54
|
- [Integration Tests](#integration-tests-srcintegration) — 8 multi-step attack chains
|
|
41
55
|
- [Baseline Tests](#baseline-tests-srcbaseline) — 3 false positive validations
|
|
42
56
|
- [E2E Tests](#e2e-tests-srce2e) — 6 real OS-level detection tests
|
|
@@ -49,21 +63,20 @@ Use both together: **HackMyAgent** finds vulnerabilities in your agent, **OASB**
|
|
|
49
63
|
|
|
50
64
|
## Quick Start
|
|
51
65
|
|
|
52
|
-
|
|
66
|
+
Ships with [ARP](https://www.npmjs.com/package/arp-guard) (`arp-guard`) as the reference adapter. To evaluate your own security product, implement the `SecurityProductAdapter` interface in `src/harness/adapter.ts` and run the same 222 tests.
|
|
53
67
|
|
|
54
68
|
```bash
|
|
55
|
-
git clone https://github.com/opena2a-org/arp.git
|
|
56
69
|
git clone https://github.com/opena2a-org/oasb.git
|
|
57
|
-
|
|
58
|
-
cd arp && npm install && npm run build && cd ..
|
|
59
70
|
cd oasb && npm install
|
|
60
71
|
```
|
|
61
72
|
|
|
73
|
+
> `arp-guard` is an optional peer dependency. It is installed automatically for running the reference ARP evaluation. If you are implementing your own adapter, you do not need it.
|
|
74
|
+
|
|
62
75
|
### Run the Evaluation
|
|
63
76
|
|
|
64
77
|
```bash
|
|
65
|
-
npm test # Full evaluation (
|
|
66
|
-
npm run test:atomic #
|
|
78
|
+
npm test # Full evaluation (222 tests)
|
|
79
|
+
npm run test:atomic # 65 atomic tests (no external deps)
|
|
67
80
|
npm run test:integration # 8 integration scenarios
|
|
68
81
|
npm run test:baseline # 3 baseline tests
|
|
69
82
|
npx vitest run src/e2e/ # 6 E2E tests (real OS detection)
|
|
@@ -71,6 +84,44 @@ npx vitest run src/e2e/ # 6 E2E tests (real OS detection)
|
|
|
71
84
|
|
|
72
85
|
---
|
|
73
86
|
|
|
87
|
+
## Usage via OpenA2A CLI
|
|
88
|
+
|
|
89
|
+
OASB is available as a built-in adapter in the [OpenA2A CLI](https://github.com/opena2a-org/opena2a) via the `benchmark` command. The CLI delegates to the `oasb` package using an import adapter, so no separate installation is needed if you already have the CLI installed.
|
|
90
|
+
|
|
91
|
+
### Run the full benchmark suite
|
|
92
|
+
|
|
93
|
+
```bash
|
|
94
|
+
opena2a benchmark run
|
|
95
|
+
```
|
|
96
|
+
|
|
97
|
+
Executes all 222 test scenarios (atomic, integration, baseline, and E2E) and produces a detection coverage scorecard.
|
|
98
|
+
|
|
99
|
+
### Run a specific MITRE ATLAS technique
|
|
100
|
+
|
|
101
|
+
```bash
|
|
102
|
+
opena2a benchmark run --technique T0015
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
Filters the benchmark to a single MITRE ATLAS technique ID (e.g., `T0015` for Evasion). Useful for targeted evaluation of a specific detection capability.
|
|
106
|
+
|
|
107
|
+
### Generate machine-readable output for CI
|
|
108
|
+
|
|
109
|
+
```bash
|
|
110
|
+
opena2a benchmark run --format json
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
Outputs the compliance score and per-technique detection rates as JSON. Integrate this into CI pipelines to enforce minimum detection thresholds on every build.
|
|
114
|
+
|
|
115
|
+
### Combining flags
|
|
116
|
+
|
|
117
|
+
```bash
|
|
118
|
+
opena2a benchmark run --technique T0057 --format json
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
Flags can be combined to run a single technique and produce JSON output for automated processing.
|
|
122
|
+
|
|
123
|
+
---
|
|
124
|
+
|
|
74
125
|
## What Gets Tested
|
|
75
126
|
|
|
76
127
|
Each test simulates a specific attack technique and checks whether the security product under evaluation detects it, classifies it correctly, and responds appropriately.
|
|
@@ -86,7 +137,8 @@ Each test simulates a specific attack technique and checks whether the security
|
|
|
86
137
|
| Baseline behavior | 13 | False positive rates, anomaly injection, baseline persistence |
|
|
87
138
|
| Real OS detection | 14 | Live filesystem watches, process polling, network monitoring |
|
|
88
139
|
| Application-level hooks | 14 | Pre-execution interception of spawn, connect, read/write |
|
|
89
|
-
|
|
|
140
|
+
| AI-layer scanning | 40 | Prompt injection/output, MCP tool call validation, A2A message scanning, pattern coverage |
|
|
141
|
+
| **Total** | **222** | **10 MITRE ATLAS techniques** |
|
|
90
142
|
|
|
91
143
|
---
|
|
92
144
|
|
|
@@ -96,6 +148,19 @@ Each test simulates a specific attack technique and checks whether the security
|
|
|
96
148
|
|
|
97
149
|
Discrete tests that exercise individual detection capabilities. Each test injects a single attack event and verifies the product detects it with the correct classification and severity.
|
|
98
150
|
|
|
151
|
+
<details>
|
|
152
|
+
<summary><strong>AI-Layer Scanning</strong> — 5 files (40 tests)</summary>
|
|
153
|
+
|
|
154
|
+
| Test | What the Product Should Detect |
|
|
155
|
+
|------|-------------------------------|
|
|
156
|
+
| AT-AI-001 | Prompt input scanning — PI, JB, DE, CM pattern detection (11 tests) |
|
|
157
|
+
| AT-AI-002 | Prompt output scanning — OL pattern detection, data leak prevention (6 tests) |
|
|
158
|
+
| AT-AI-003 | MCP tool call scanning — path traversal, command injection, SSRF, allowlist (11 tests) |
|
|
159
|
+
| AT-AI-004 | A2A message scanning — identity spoofing, delegation abuse, trust validation (7 tests) |
|
|
160
|
+
| AT-AI-005 | Pattern coverage — all 19 patterns detect known payloads, no false positives (5 tests) |
|
|
161
|
+
|
|
162
|
+
</details>
|
|
163
|
+
|
|
99
164
|
<details>
|
|
100
165
|
<summary><strong>Process Detection</strong> — 5 files</summary>
|
|
101
166
|
|
|
@@ -222,7 +287,7 @@ Real OS-level detection — no mocks, no event injection. These tests spawn real
|
|
|
222
287
|
|
|
223
288
|
## MITRE ATLAS Coverage
|
|
224
289
|
|
|
225
|
-
10 unique techniques across
|
|
290
|
+
10 unique techniques across 47 test files:
|
|
226
291
|
|
|
227
292
|
| Technique | ID | Tests |
|
|
228
293
|
|-----------|----|-------|
|
|
@@ -245,14 +310,15 @@ The harness wraps a security product via an adapter interface and provides event
|
|
|
245
310
|
|
|
246
311
|
| File | Purpose |
|
|
247
312
|
|------|---------|
|
|
248
|
-
| `
|
|
313
|
+
| `adapter.ts` | **Product-agnostic adapter interface** — implement `SecurityProductAdapter` for your product |
|
|
314
|
+
| `arp-wrapper.ts` | Reference adapter — wraps ARP (`arp-guard`) with event collection, injection helpers |
|
|
249
315
|
| `event-collector.ts` | Captures events with async `waitForEvent(predicate, timeout)` |
|
|
250
316
|
| `mock-llm-adapter.ts` | Deterministic LLM for intelligence layer testing (pattern-based responses) |
|
|
251
317
|
| `dvaa-client.ts` | HTTP client for DVAA vulnerable agent endpoints |
|
|
252
318
|
| `dvaa-manager.ts` | DVAA process lifecycle (spawn, health check, teardown) |
|
|
253
319
|
| `metrics.ts` | Detection rate, false positive rate, P95 latency computation |
|
|
254
320
|
|
|
255
|
-
To evaluate your own product: implement
|
|
321
|
+
To evaluate your own product: implement `SecurityProductAdapter` from `src/harness/adapter.ts`, swap it into the test harness, and run the full suite. The interface defines event types, scanner interfaces, and enforcement contracts — no dependency on any specific product.
|
|
256
322
|
|
|
257
323
|
---
|
|
258
324
|
|
|
@@ -260,12 +326,12 @@ To evaluate your own product: implement an adapter that translates OASB events i
|
|
|
260
326
|
|
|
261
327
|
OASB documents what the reference product (ARP) does and doesn't catch. Other products may have different gap profiles — that's the point of running the benchmark.
|
|
262
328
|
|
|
263
|
-
| Gap | Severity | Test |
|
|
264
|
-
|
|
265
|
-
| Anomaly baselines not persisted across restarts | Medium | BL-003 |
|
|
266
|
-
| No connection rate anomaly detection | Medium | AT-NET-003 |
|
|
267
|
-
| No HTTP response
|
|
268
|
-
| No cross-monitor event correlation | Architectural | INT-006 |
|
|
329
|
+
| Gap | Severity | Test | Notes |
|
|
330
|
+
|-----|----------|------|-------|
|
|
331
|
+
| Anomaly baselines not persisted across restarts | Medium | BL-003 | In-memory only; restarts lose learned behavior |
|
|
332
|
+
| No connection rate anomaly detection | Medium | AT-NET-003 | Network monitor tracks hosts, not burst rates |
|
|
333
|
+
| No HTTP response body monitoring | Low | INT-003 | AI-layer output scanning (PromptInterceptor.scanOutput) covers LLM responses; raw HTTP responses not inspected |
|
|
334
|
+
| No cross-monitor event correlation | Architectural | INT-006 | EventEngine is a flat bus; no attack-chain aggregation |
|
|
269
335
|
|
|
270
336
|
---
|
|
271
337
|
|
|
@@ -281,7 +347,6 @@ Apache-2.0
|
|
|
281
347
|
|---------|-------------|---------|
|
|
282
348
|
| [**AIM**](https://github.com/opena2a-org/agent-identity-management) | Agent Identity Management -- identity and access control for AI agents | `pip install aim-sdk` |
|
|
283
349
|
| [**HackMyAgent**](https://github.com/opena2a-org/hackmyagent) | Security scanner -- 147 checks, attack mode, auto-fix | `npx hackmyagent secure` |
|
|
284
|
-
| [**
|
|
285
|
-
| [**ARP**](https://github.com/opena2a-org/arp) | Agent Runtime Protection -- process, network, filesystem monitoring | `npm install @opena2a/arp` |
|
|
350
|
+
| [**ARP**](https://www.npmjs.com/package/arp-guard) | Agent Runtime Protection -- process, network, filesystem, AI-layer monitoring | `npm install arp-guard` |
|
|
286
351
|
| [**Secretless AI**](https://github.com/opena2a-org/secretless-ai) | Keep credentials out of AI context windows | `npx secretless-ai init` |
|
|
287
352
|
| [**DVAA**](https://github.com/opena2a-org/damn-vulnerable-ai-agent) | Damn Vulnerable AI Agent -- security training and red-teaming | `docker pull opena2a/dvaa` |
|
|
@@ -0,0 +1,187 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* OASB Security Product Adapter Interface
|
|
3
|
+
*
|
|
4
|
+
* Implement this interface to evaluate your security product against OASB.
|
|
5
|
+
* The reference implementation (ARP adapter) is in arp-wrapper.ts.
|
|
6
|
+
*
|
|
7
|
+
* @example
|
|
8
|
+
* // Vendor implements the adapter for their product:
|
|
9
|
+
* class MyProductAdapter implements SecurityProductAdapter { ... }
|
|
10
|
+
*
|
|
11
|
+
* // OASB tests use the adapter, not your product directly:
|
|
12
|
+
* const adapter = createAdapter(); // returns configured adapter
|
|
13
|
+
* await adapter.start();
|
|
14
|
+
* await adapter.injectEvent({ ... });
|
|
15
|
+
* const threats = adapter.getEventsByCategory('threat');
|
|
16
|
+
*/
|
|
17
|
+
export type EventCategory = 'normal' | 'activity' | 'threat' | 'violation';
|
|
18
|
+
export type EventSeverity = 'info' | 'low' | 'medium' | 'high' | 'critical';
|
|
19
|
+
export type MonitorSource = 'process' | 'network' | 'filesystem' | 'prompt' | 'mcp-protocol' | 'a2a-protocol' | string;
|
|
20
|
+
export type EnforcementAction = 'log' | 'alert' | 'pause' | 'kill' | 'resume';
|
|
21
|
+
export interface SecurityEvent {
|
|
22
|
+
id?: string;
|
|
23
|
+
timestamp?: string;
|
|
24
|
+
source: MonitorSource;
|
|
25
|
+
category: EventCategory;
|
|
26
|
+
severity: EventSeverity;
|
|
27
|
+
description: string;
|
|
28
|
+
data?: Record<string, unknown>;
|
|
29
|
+
classifiedBy?: string;
|
|
30
|
+
}
|
|
31
|
+
export interface EnforcementResult {
|
|
32
|
+
action: EnforcementAction;
|
|
33
|
+
success: boolean;
|
|
34
|
+
reason: string;
|
|
35
|
+
event: SecurityEvent;
|
|
36
|
+
pid?: number;
|
|
37
|
+
}
|
|
38
|
+
export interface AlertRule {
|
|
39
|
+
name: string;
|
|
40
|
+
condition: AlertCondition;
|
|
41
|
+
action: EnforcementAction;
|
|
42
|
+
}
|
|
43
|
+
export interface AlertCondition {
|
|
44
|
+
source?: MonitorSource;
|
|
45
|
+
category?: EventCategory;
|
|
46
|
+
minSeverity?: EventSeverity;
|
|
47
|
+
descriptionContains?: string;
|
|
48
|
+
}
|
|
49
|
+
export interface ScanResult {
|
|
50
|
+
detected: boolean;
|
|
51
|
+
matches: ScanMatch[];
|
|
52
|
+
truncated?: boolean;
|
|
53
|
+
}
|
|
54
|
+
export interface ScanMatch {
|
|
55
|
+
pattern: ThreatPattern;
|
|
56
|
+
matchedText: string;
|
|
57
|
+
}
|
|
58
|
+
export interface ThreatPattern {
|
|
59
|
+
id: string;
|
|
60
|
+
category: string;
|
|
61
|
+
description: string;
|
|
62
|
+
pattern: RegExp;
|
|
63
|
+
severity: 'medium' | 'high' | 'critical';
|
|
64
|
+
}
|
|
65
|
+
export interface PromptScanner {
|
|
66
|
+
start(): Promise<void>;
|
|
67
|
+
stop(): Promise<void>;
|
|
68
|
+
scanInput(text: string): ScanResult;
|
|
69
|
+
scanOutput(text: string): ScanResult;
|
|
70
|
+
}
|
|
71
|
+
export interface MCPScanner {
|
|
72
|
+
start(): Promise<void>;
|
|
73
|
+
stop(): Promise<void>;
|
|
74
|
+
scanToolCall(toolName: string, params: Record<string, unknown>): ScanResult;
|
|
75
|
+
}
|
|
76
|
+
export interface A2AScanner {
|
|
77
|
+
start(): Promise<void>;
|
|
78
|
+
stop(): Promise<void>;
|
|
79
|
+
scanMessage(from: string, to: string, content: string): ScanResult;
|
|
80
|
+
}
|
|
81
|
+
export interface PatternScanner {
|
|
82
|
+
scanText(text: string, patterns: readonly ThreatPattern[]): ScanResult;
|
|
83
|
+
getAllPatterns(): readonly ThreatPattern[];
|
|
84
|
+
getPatternSets(): Record<string, readonly ThreatPattern[]>;
|
|
85
|
+
}
|
|
86
|
+
export interface BudgetStatus {
|
|
87
|
+
spent: number;
|
|
88
|
+
budget: number;
|
|
89
|
+
remaining: number;
|
|
90
|
+
percentUsed: number;
|
|
91
|
+
callsThisHour: number;
|
|
92
|
+
maxCallsPerHour: number;
|
|
93
|
+
totalCalls: number;
|
|
94
|
+
}
|
|
95
|
+
export interface BudgetManager {
|
|
96
|
+
canAfford(estimatedCostUsd: number): boolean;
|
|
97
|
+
record(costUsd: number, tokens: number): void;
|
|
98
|
+
getStatus(): BudgetStatus;
|
|
99
|
+
reset(): void;
|
|
100
|
+
}
|
|
101
|
+
export interface AnomalyScorer {
|
|
102
|
+
score(event: SecurityEvent): number;
|
|
103
|
+
record(event: SecurityEvent): void;
|
|
104
|
+
getBaseline(source: string): {
|
|
105
|
+
mean: number;
|
|
106
|
+
stddev: number;
|
|
107
|
+
count: number;
|
|
108
|
+
} | null;
|
|
109
|
+
reset(): void;
|
|
110
|
+
}
|
|
111
|
+
export interface LLMAdapter {
|
|
112
|
+
name: string;
|
|
113
|
+
assess(prompt: string): Promise<LLMResponse>;
|
|
114
|
+
}
|
|
115
|
+
export interface LLMResponse {
|
|
116
|
+
content: string;
|
|
117
|
+
usage?: {
|
|
118
|
+
inputTokens: number;
|
|
119
|
+
outputTokens: number;
|
|
120
|
+
};
|
|
121
|
+
}
|
|
122
|
+
export interface EventEngine {
|
|
123
|
+
emit(event: Omit<SecurityEvent, 'id' | 'timestamp' | 'classifiedBy'>): SecurityEvent;
|
|
124
|
+
onEvent(handler: (event: SecurityEvent) => void | Promise<void>): void;
|
|
125
|
+
}
|
|
126
|
+
export interface EnforcementEngine {
|
|
127
|
+
execute(action: EnforcementAction, event: SecurityEvent): Promise<EnforcementResult>;
|
|
128
|
+
pause(pid: number): boolean;
|
|
129
|
+
resume(pid: number): boolean;
|
|
130
|
+
kill(pid: number, signal?: string): boolean;
|
|
131
|
+
getPausedPids(): number[];
|
|
132
|
+
setAlertCallback(callback: (event: SecurityEvent, rule: AlertRule) => void): void;
|
|
133
|
+
}
|
|
134
|
+
export interface SecurityProductAdapter {
|
|
135
|
+
/** Start the security product */
|
|
136
|
+
start(): Promise<void>;
|
|
137
|
+
/** Stop the security product */
|
|
138
|
+
stop(): Promise<void>;
|
|
139
|
+
/** Inject a synthetic event for testing */
|
|
140
|
+
injectEvent(event: Omit<SecurityEvent, 'id' | 'timestamp' | 'classifiedBy'>): Promise<SecurityEvent>;
|
|
141
|
+
/** Wait for an event matching a predicate */
|
|
142
|
+
waitForEvent(predicate: (event: SecurityEvent) => boolean, timeoutMs?: number): Promise<SecurityEvent>;
|
|
143
|
+
/** Get collected events */
|
|
144
|
+
getEvents(): SecurityEvent[];
|
|
145
|
+
getEventsByCategory(category: EventCategory): SecurityEvent[];
|
|
146
|
+
getEnforcements(): EnforcementResult[];
|
|
147
|
+
getEnforcementsByAction(action: EnforcementAction): EnforcementResult[];
|
|
148
|
+
/** Reset collected events */
|
|
149
|
+
resetCollector(): void;
|
|
150
|
+
/** Access sub-components (for tests that need direct access) */
|
|
151
|
+
getEventEngine(): EventEngine;
|
|
152
|
+
getEnforcementEngine(): EnforcementEngine;
|
|
153
|
+
/** Factory methods for component-level testing */
|
|
154
|
+
createPromptScanner(): PromptScanner;
|
|
155
|
+
createMCPScanner(allowedTools?: string[]): MCPScanner;
|
|
156
|
+
createA2AScanner(trustedAgents?: string[]): A2AScanner;
|
|
157
|
+
createPatternScanner(): PatternScanner;
|
|
158
|
+
createBudgetManager(dataDir: string, config?: {
|
|
159
|
+
budgetUsd?: number;
|
|
160
|
+
maxCallsPerHour?: number;
|
|
161
|
+
}): BudgetManager;
|
|
162
|
+
createAnomalyScorer(): AnomalyScorer;
|
|
163
|
+
}
|
|
164
|
+
export interface LabConfig {
|
|
165
|
+
monitors?: {
|
|
166
|
+
process?: boolean;
|
|
167
|
+
network?: boolean;
|
|
168
|
+
filesystem?: boolean;
|
|
169
|
+
};
|
|
170
|
+
rules?: AlertRule[];
|
|
171
|
+
intelligence?: {
|
|
172
|
+
enabled?: boolean;
|
|
173
|
+
};
|
|
174
|
+
dataDir?: string;
|
|
175
|
+
filesystemWatchPaths?: string[];
|
|
176
|
+
filesystemAllowedPaths?: string[];
|
|
177
|
+
networkAllowedHosts?: string[];
|
|
178
|
+
processIntervalMs?: number;
|
|
179
|
+
networkIntervalMs?: number;
|
|
180
|
+
interceptors?: {
|
|
181
|
+
process?: boolean;
|
|
182
|
+
network?: boolean;
|
|
183
|
+
filesystem?: boolean;
|
|
184
|
+
};
|
|
185
|
+
interceptorNetworkAllowedHosts?: string[];
|
|
186
|
+
interceptorFilesystemAllowedPaths?: string[];
|
|
187
|
+
}
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* OASB Security Product Adapter Interface
|
|
4
|
+
*
|
|
5
|
+
* Implement this interface to evaluate your security product against OASB.
|
|
6
|
+
* The reference implementation (ARP adapter) is in arp-wrapper.ts.
|
|
7
|
+
*
|
|
8
|
+
* @example
|
|
9
|
+
* // Vendor implements the adapter for their product:
|
|
10
|
+
* class MyProductAdapter implements SecurityProductAdapter { ... }
|
|
11
|
+
*
|
|
12
|
+
* // OASB tests use the adapter, not your product directly:
|
|
13
|
+
* const adapter = createAdapter(); // returns configured adapter
|
|
14
|
+
* await adapter.start();
|
|
15
|
+
* await adapter.injectEvent({ ... });
|
|
16
|
+
* const threats = adapter.getEventsByCategory('threat');
|
|
17
|
+
*/
|
|
18
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
@@ -1,28 +1,32 @@
|
|
|
1
|
-
import { AgentRuntimeProtection, EventEngine, EnforcementEngine, type ARPEvent } from '@opena2a/arp';
|
|
2
1
|
import { EventCollector } from './event-collector';
|
|
3
|
-
import type { LabConfig } from './
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
* Creates temp dataDir per test, registers EventCollector,
|
|
7
|
-
* and provides injection + assertion helpers.
|
|
8
|
-
*/
|
|
9
|
-
export declare class ArpWrapper {
|
|
10
|
-
private arp;
|
|
2
|
+
import type { SecurityProductAdapter, SecurityEvent, EnforcementResult, LabConfig, PromptScanner, MCPScanner, A2AScanner, PatternScanner, BudgetManager, AnomalyScorer, EventEngine, EnforcementEngine as EnforcementEngineInterface } from './adapter';
|
|
3
|
+
export declare class ArpWrapper implements SecurityProductAdapter {
|
|
4
|
+
private _arpInstance;
|
|
11
5
|
private _dataDir;
|
|
12
6
|
readonly collector: EventCollector;
|
|
13
7
|
constructor(labConfig?: LabConfig);
|
|
14
8
|
start(): Promise<void>;
|
|
15
9
|
stop(): Promise<void>;
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
10
|
+
injectEvent(event: Omit<SecurityEvent, 'id' | 'timestamp' | 'classifiedBy'>): Promise<SecurityEvent>;
|
|
11
|
+
waitForEvent(predicate: (event: SecurityEvent) => boolean, timeoutMs?: number): Promise<SecurityEvent>;
|
|
12
|
+
getEvents(): SecurityEvent[];
|
|
13
|
+
getEventsByCategory(category: string): SecurityEvent[];
|
|
14
|
+
getEnforcements(): EnforcementResult[];
|
|
15
|
+
getEnforcementsByAction(action: string): EnforcementResult[];
|
|
16
|
+
resetCollector(): void;
|
|
17
|
+
getInstance(): any;
|
|
18
|
+
getEventEngine(): EventEngine;
|
|
19
|
+
getEnforcementEngine(): EnforcementEngineInterface;
|
|
20
|
+
getEngine(): any;
|
|
21
|
+
getEnforcement(): any;
|
|
27
22
|
get dataDir(): string;
|
|
23
|
+
createPromptScanner(): PromptScanner;
|
|
24
|
+
createMCPScanner(allowedTools?: string[]): MCPScanner;
|
|
25
|
+
createA2AScanner(trustedAgents?: string[]): A2AScanner;
|
|
26
|
+
createPatternScanner(): PatternScanner;
|
|
27
|
+
createBudgetManager(dataDir: string, config?: {
|
|
28
|
+
budgetUsd?: number;
|
|
29
|
+
maxCallsPerHour?: number;
|
|
30
|
+
}): BudgetManager;
|
|
31
|
+
createAnomalyScorer(): AnomalyScorer;
|
|
28
32
|
}
|
|
@@ -34,19 +34,32 @@ var __importStar = (this && this.__importStar) || (function () {
|
|
|
34
34
|
})();
|
|
35
35
|
Object.defineProperty(exports, "__esModule", { value: true });
|
|
36
36
|
exports.ArpWrapper = void 0;
|
|
37
|
+
/**
|
|
38
|
+
* ARP Adapter — Reference implementation of SecurityProductAdapter
|
|
39
|
+
*
|
|
40
|
+
* Wraps HackMyAgent's ARP (Agent Runtime Protection) for OASB evaluation.
|
|
41
|
+
* Other vendors implement their own adapter against the same interface.
|
|
42
|
+
*
|
|
43
|
+
* Uses lazy require() for arp-guard so the module is only loaded when
|
|
44
|
+
* this adapter is actually selected. Tests that use a different adapter
|
|
45
|
+
* never trigger the arp-guard import.
|
|
46
|
+
*/
|
|
37
47
|
const fs = __importStar(require("fs"));
|
|
38
48
|
const os = __importStar(require("os"));
|
|
39
49
|
const path = __importStar(require("path"));
|
|
40
|
-
const arp_1 = require("@opena2a/arp");
|
|
41
50
|
const event_collector_1 = require("./event-collector");
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
51
|
+
// Lazy-loaded arp-guard module
|
|
52
|
+
let _arp;
|
|
53
|
+
function arp() {
|
|
54
|
+
if (!_arp) {
|
|
55
|
+
_arp = require('arp-guard');
|
|
56
|
+
}
|
|
57
|
+
return _arp;
|
|
58
|
+
}
|
|
47
59
|
class ArpWrapper {
|
|
48
60
|
constructor(labConfig) {
|
|
49
61
|
this._dataDir = labConfig?.dataDir ?? fs.mkdtempSync(path.join(os.tmpdir(), 'arp-lab-'));
|
|
62
|
+
const { AgentRuntimeProtection } = arp();
|
|
50
63
|
const config = {
|
|
51
64
|
agentName: 'arp-lab-target',
|
|
52
65
|
agentDescription: 'Test target for ARP security lab',
|
|
@@ -85,19 +98,17 @@ class ArpWrapper {
|
|
|
85
98
|
},
|
|
86
99
|
},
|
|
87
100
|
};
|
|
88
|
-
this.
|
|
101
|
+
this._arpInstance = new AgentRuntimeProtection(config);
|
|
89
102
|
this.collector = new event_collector_1.EventCollector();
|
|
90
|
-
|
|
91
|
-
this.
|
|
92
|
-
this.arp.onEnforcement(this.collector.enforcementHandler);
|
|
103
|
+
this._arpInstance.onEvent(this.collector.eventHandler);
|
|
104
|
+
this._arpInstance.onEnforcement(this.collector.enforcementHandler);
|
|
93
105
|
}
|
|
94
106
|
async start() {
|
|
95
|
-
await this.
|
|
107
|
+
await this._arpInstance.start();
|
|
96
108
|
}
|
|
97
109
|
async stop() {
|
|
98
|
-
await this.
|
|
110
|
+
await this._arpInstance.stop();
|
|
99
111
|
this.collector.reset();
|
|
100
|
-
// Clean up temp dir
|
|
101
112
|
try {
|
|
102
113
|
fs.rmSync(this._dataDir, { recursive: true, force: true });
|
|
103
114
|
}
|
|
@@ -105,29 +116,104 @@ class ArpWrapper {
|
|
|
105
116
|
// Best effort cleanup
|
|
106
117
|
}
|
|
107
118
|
}
|
|
108
|
-
/** Get the underlying ARP instance */
|
|
109
|
-
getInstance() {
|
|
110
|
-
return this.arp;
|
|
111
|
-
}
|
|
112
|
-
/** Get the event engine for direct event injection */
|
|
113
|
-
getEngine() {
|
|
114
|
-
return this.arp.getEngine();
|
|
115
|
-
}
|
|
116
|
-
/** Get the enforcement engine */
|
|
117
|
-
getEnforcement() {
|
|
118
|
-
return this.arp.getEnforcement();
|
|
119
|
-
}
|
|
120
|
-
/** Inject a synthetic event into the ARP engine (for testing without real OS activity) */
|
|
121
119
|
async injectEvent(event) {
|
|
122
120
|
return this.getEngine().emit(event);
|
|
123
121
|
}
|
|
124
|
-
/** Wait for an event matching a predicate */
|
|
125
122
|
waitForEvent(predicate, timeoutMs = 10000) {
|
|
126
123
|
return this.collector.waitForEvent(predicate, timeoutMs);
|
|
127
124
|
}
|
|
128
|
-
|
|
125
|
+
getEvents() {
|
|
126
|
+
return this.collector.getEvents();
|
|
127
|
+
}
|
|
128
|
+
getEventsByCategory(category) {
|
|
129
|
+
return this.collector.eventsByCategory(category);
|
|
130
|
+
}
|
|
131
|
+
getEnforcements() {
|
|
132
|
+
return this.collector.getEnforcements();
|
|
133
|
+
}
|
|
134
|
+
getEnforcementsByAction(action) {
|
|
135
|
+
return this.collector.enforcementsByAction(action);
|
|
136
|
+
}
|
|
137
|
+
resetCollector() {
|
|
138
|
+
this.collector.reset();
|
|
139
|
+
}
|
|
140
|
+
getInstance() {
|
|
141
|
+
return this._arpInstance;
|
|
142
|
+
}
|
|
143
|
+
getEventEngine() {
|
|
144
|
+
return this._arpInstance.getEngine();
|
|
145
|
+
}
|
|
146
|
+
getEnforcementEngine() {
|
|
147
|
+
return this._arpInstance.getEnforcement();
|
|
148
|
+
}
|
|
149
|
+
getEngine() {
|
|
150
|
+
return this._arpInstance.getEngine();
|
|
151
|
+
}
|
|
152
|
+
getEnforcement() {
|
|
153
|
+
return this._arpInstance.getEnforcement();
|
|
154
|
+
}
|
|
129
155
|
get dataDir() {
|
|
130
156
|
return this._dataDir;
|
|
131
157
|
}
|
|
158
|
+
// ─── Factory Methods ────────────────────────────────────────────
|
|
159
|
+
createPromptScanner() {
|
|
160
|
+
const { EventEngine, PromptInterceptor } = arp();
|
|
161
|
+
const engine = new EventEngine({ agentName: 'oasb-prompt-test' });
|
|
162
|
+
const interceptor = new PromptInterceptor(engine);
|
|
163
|
+
return {
|
|
164
|
+
start: () => interceptor.start(),
|
|
165
|
+
stop: () => interceptor.stop(),
|
|
166
|
+
scanInput: (text) => interceptor.scanInput(text),
|
|
167
|
+
scanOutput: (text) => interceptor.scanOutput(text),
|
|
168
|
+
};
|
|
169
|
+
}
|
|
170
|
+
createMCPScanner(allowedTools) {
|
|
171
|
+
const { EventEngine, MCPProtocolInterceptor } = arp();
|
|
172
|
+
const engine = new EventEngine({ agentName: 'oasb-mcp-test' });
|
|
173
|
+
const interceptor = new MCPProtocolInterceptor(engine, allowedTools);
|
|
174
|
+
return {
|
|
175
|
+
start: () => interceptor.start(),
|
|
176
|
+
stop: () => interceptor.stop(),
|
|
177
|
+
scanToolCall: (toolName, params) => interceptor.scanToolCall(toolName, params),
|
|
178
|
+
};
|
|
179
|
+
}
|
|
180
|
+
createA2AScanner(trustedAgents) {
|
|
181
|
+
const { EventEngine, A2AProtocolInterceptor } = arp();
|
|
182
|
+
const engine = new EventEngine({ agentName: 'oasb-a2a-test' });
|
|
183
|
+
const interceptor = new A2AProtocolInterceptor(engine, trustedAgents);
|
|
184
|
+
return {
|
|
185
|
+
start: () => interceptor.start(),
|
|
186
|
+
stop: () => interceptor.stop(),
|
|
187
|
+
scanMessage: (from, to, content) => interceptor.scanMessage(from, to, content),
|
|
188
|
+
};
|
|
189
|
+
}
|
|
190
|
+
createPatternScanner() {
|
|
191
|
+
const { scanText: _scanText, ALL_PATTERNS: _allPatterns, PATTERN_SETS: _patternSets } = arp();
|
|
192
|
+
return {
|
|
193
|
+
scanText: (text, patterns) => _scanText(text, patterns),
|
|
194
|
+
getAllPatterns: () => _allPatterns,
|
|
195
|
+
getPatternSets: () => _patternSets,
|
|
196
|
+
};
|
|
197
|
+
}
|
|
198
|
+
createBudgetManager(dataDir, config) {
|
|
199
|
+
const { BudgetController } = arp();
|
|
200
|
+
const controller = new BudgetController(dataDir, config);
|
|
201
|
+
return {
|
|
202
|
+
canAfford: (cost) => controller.canAfford(cost),
|
|
203
|
+
record: (cost, tokens) => controller.record(cost, tokens),
|
|
204
|
+
getStatus: () => controller.getStatus(),
|
|
205
|
+
reset: () => controller.reset(),
|
|
206
|
+
};
|
|
207
|
+
}
|
|
208
|
+
createAnomalyScorer() {
|
|
209
|
+
const { AnomalyDetector } = arp();
|
|
210
|
+
const detector = new AnomalyDetector();
|
|
211
|
+
return {
|
|
212
|
+
score: (event) => detector.score(event),
|
|
213
|
+
record: (event) => detector.record(event),
|
|
214
|
+
getBaseline: (source) => detector.getBaseline(source),
|
|
215
|
+
reset: () => detector.reset(),
|
|
216
|
+
};
|
|
217
|
+
}
|
|
132
218
|
}
|
|
133
219
|
exports.ArpWrapper = ArpWrapper;
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Adapter factory — selects which security product adapter to use.
|
|
3
|
+
*
|
|
4
|
+
* Set OASB_ADAPTER env var to choose:
|
|
5
|
+
* - "arp" (default) — uses arp-guard (must be installed)
|
|
6
|
+
* - "llm-guard" — uses theRizwan/llm-guard
|
|
7
|
+
* - path to a JS/TS module that exports a class implementing SecurityProductAdapter
|
|
8
|
+
*
|
|
9
|
+
* All test files import from here instead of instantiating adapters directly.
|
|
10
|
+
*/
|
|
11
|
+
import type { SecurityProductAdapter, LabConfig } from './adapter';
|
|
12
|
+
/**
|
|
13
|
+
* Create a configured adapter instance.
|
|
14
|
+
* Uses OASB_ADAPTER env var to select the product under test.
|
|
15
|
+
*/
|
|
16
|
+
export declare function createAdapter(config?: LabConfig): SecurityProductAdapter;
|