@opena2a/oasb 0.2.0 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. package/README.md +61 -18
  2. package/dist/harness/adapter.d.ts +205 -0
  3. package/dist/harness/adapter.js +18 -0
  4. package/dist/harness/arp-wrapper.d.ts +25 -20
  5. package/dist/harness/arp-wrapper.js +137 -28
  6. package/dist/harness/capabilities.d.ts +26 -0
  7. package/dist/harness/capabilities.js +76 -0
  8. package/dist/harness/create-adapter.d.ts +16 -0
  9. package/dist/harness/create-adapter.js +40 -0
  10. package/dist/harness/event-collector.d.ts +1 -1
  11. package/dist/harness/llm-guard-wrapper.d.ts +32 -0
  12. package/dist/harness/llm-guard-wrapper.js +325 -0
  13. package/dist/harness/mock-llm-adapter.d.ts +2 -2
  14. package/dist/harness/mock-llm-adapter.js +6 -5
  15. package/dist/harness/rebuff-wrapper.d.ts +32 -0
  16. package/dist/harness/rebuff-wrapper.js +325 -0
  17. package/dist/harness/types.d.ts +4 -38
  18. package/package.json +15 -7
  19. package/src/atomic/ai-layer/AT-AI-001.prompt-input-scan.test.ts +18 -42
  20. package/src/atomic/ai-layer/AT-AI-002.prompt-output-scan.test.ts +13 -32
  21. package/src/atomic/ai-layer/AT-AI-003.mcp-tool-scan.test.ts +18 -42
  22. package/src/atomic/ai-layer/AT-AI-004.a2a-message-scan.test.ts +14 -36
  23. package/src/atomic/ai-layer/AT-AI-005.pattern-coverage.test.ts +11 -5
  24. package/src/atomic/enforcement/AT-ENF-001.log-action.test.ts +4 -4
  25. package/src/atomic/enforcement/AT-ENF-002.alert-callback.test.ts +5 -5
  26. package/src/atomic/enforcement/AT-ENF-003.pause-sigstop.test.ts +4 -4
  27. package/src/atomic/enforcement/AT-ENF-004.kill-sigterm.test.ts +5 -5
  28. package/src/atomic/enforcement/AT-ENF-005.resume-sigcont.test.ts +4 -4
  29. package/src/atomic/intelligence/AT-INT-001.l0-rule-match.test.ts +1 -1
  30. package/src/atomic/intelligence/AT-INT-002.l1-anomaly-score.test.ts +10 -8
  31. package/src/atomic/intelligence/AT-INT-003.l2-escalation.test.ts +1 -1
  32. package/src/atomic/intelligence/AT-INT-004.budget-exhaustion.test.ts +8 -6
  33. package/src/atomic/intelligence/AT-INT-005.baseline-learning.test.ts +9 -9
  34. package/src/baseline/BL-002.anomaly-injection.test.ts +6 -6
  35. package/src/baseline/BL-003.baseline-persistence.test.ts +9 -9
  36. package/src/harness/adapter.ts +261 -0
  37. package/src/harness/arp-wrapper.ts +175 -42
  38. package/src/harness/capabilities.ts +79 -0
  39. package/src/harness/create-adapter.ts +53 -0
  40. package/src/harness/event-collector.ts +1 -1
  41. package/src/harness/llm-guard-wrapper.ts +345 -0
  42. package/src/harness/mock-llm-adapter.ts +7 -6
  43. package/src/harness/rebuff-wrapper.ts +343 -0
  44. package/src/harness/types.ts +33 -39
  45. package/src/integration/INT-001.data-exfil-detection.test.ts +1 -1
  46. package/src/integration/INT-002.mcp-tool-abuse.test.ts +1 -1
  47. package/src/integration/INT-003.prompt-injection-response.test.ts +1 -1
  48. package/src/integration/INT-004.a2a-trust-exploitation.test.ts +1 -1
  49. package/src/integration/INT-005.baseline-then-attack.test.ts +1 -1
  50. package/src/integration/INT-006.multi-monitor-correlation.test.ts +1 -1
  51. package/src/integration/INT-007.budget-exhaustion-attack.test.ts +8 -8
  52. package/src/integration/INT-008.kill-switch-recovery.test.ts +6 -6
package/README.md CHANGED
@@ -1,7 +1,9 @@
1
- > **[OpenA2A](https://opena2a.org)**: [AIM](https://github.com/opena2a-org/agent-identity-management) · [HackMyAgent](https://github.com/opena2a-org/hackmyagent) · [OASB](https://github.com/opena2a-org/oasb) · [ARP](https://github.com/opena2a-org/arp) · [Secretless](https://github.com/opena2a-org/secretless-ai) · [DVAA](https://github.com/opena2a-org/damn-vulnerable-ai-agent)
1
+ > **[OpenA2A](https://github.com/opena2a-org/opena2a)**: [CLI](https://github.com/opena2a-org/opena2a) · [HackMyAgent](https://github.com/opena2a-org/hackmyagent) · [Secretless](https://github.com/opena2a-org/secretless-ai) · [AIM](https://github.com/opena2a-org/agent-identity-management) · [Browser Guard](https://github.com/opena2a-org/AI-BrowserGuard) · [DVAA](https://github.com/opena2a-org/damn-vulnerable-ai-agent)
2
2
 
3
3
  # OASB — Open Agent Security Benchmark
4
4
 
5
+ > **Note:** OASB controls are also available in [HackMyAgent](https://github.com/opena2a-org/hackmyagent) v0.8.0+ via `opena2a benchmark`. This repository is the canonical source for the full 222-test evaluation suite and is actively maintained. ARP (the reference adapter) is now part of HackMyAgent — install via `npm install arp-guard`.
6
+
5
7
  [![License: Apache-2.0](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0)
6
8
  [![Tests](https://img.shields.io/badge/tests-222%20passing-brightgreen)](https://github.com/opena2a-org/oasb)
7
9
  [![MITRE ATLAS](https://img.shields.io/badge/MITRE%20ATLAS-10%20techniques-teal)](https://atlas.mitre.org/)
@@ -10,7 +12,7 @@
10
12
 
11
13
  222 standardized attack scenarios that evaluate whether a runtime security product can detect and respond to threats against AI agents. Each test is mapped to MITRE ATLAS and OWASP Agentic Top 10. Plug in your product, run the suite, get a detection coverage scorecard.
12
14
 
13
- [OASB Website](https://oasb.ai) | [OpenA2A](https://opena2a.org) | [MITRE ATLAS Coverage](#mitre-atlas-coverage) | [ARP (Reference Adapter)](https://github.com/opena2a-org/arp)
15
+ [OASB Website](https://oasb.ai) | [MITRE ATLAS Coverage](#mitre-atlas-coverage)
14
16
 
15
17
  ---
16
18
 
@@ -18,6 +20,7 @@
18
20
 
19
21
  | Date | Change |
20
22
  |------|--------|
23
+ | 2026-03-23 | `arp-guard` v0.3.0 — ARP now re-exports from HackMyAgent. Updated OASB to v0.3.0. All 222 tests pass. Updated Quick Start (no standalone ARP clone). |
21
24
  | 2026-02-19 | Added 40 AI-layer test scenarios (AT-AI-001 through AT-AI-005) for prompt, MCP, and A2A scanning via ARP v0.2.0. Total tests: 222. |
22
25
  | 2026-02-18 | Added integration tests for DVAA v0.4.0 MCP JSON-RPC and A2A endpoints. |
23
26
  | 2026-02-09 | Initial release -- 182 attack scenarios across 10 MITRE ATLAS techniques. |
@@ -44,6 +47,7 @@ Use both together: **HackMyAgent** finds vulnerabilities in your agent, **OASB**
44
47
  ## Table of Contents
45
48
 
46
49
  - [Quick Start](#quick-start)
50
+ - [Usage via OpenA2A CLI](#usage-via-opena2a-cli)
47
51
  - [What Gets Tested](#what-gets-tested)
48
52
  - [Test Categories](#test-categories)
49
53
  - [Atomic Tests](#atomic-tests-srcatomic) — 65 discrete detection tests (OS-level + AI-layer)
@@ -59,16 +63,15 @@ Use both together: **HackMyAgent** finds vulnerabilities in your agent, **OASB**
59
63
 
60
64
  ## Quick Start
61
65
 
62
- Currently ships with [ARP](https://github.com/opena2a-org/arp) as the reference adapter. Vendor adapter interface coming soon implement the adapter for your product and run the same 222 tests.
66
+ Ships with [ARP](https://www.npmjs.com/package/arp-guard) (`arp-guard`) as the reference adapter. To evaluate your own security product, implement the `SecurityProductAdapter` interface in `src/harness/adapter.ts` and run the same 222 tests.
63
67
 
64
68
  ```bash
65
- git clone https://github.com/opena2a-org/arp.git
66
69
  git clone https://github.com/opena2a-org/oasb.git
67
-
68
- cd arp && npm install && npm run build && cd ..
69
70
  cd oasb && npm install
70
71
  ```
71
72
 
73
+ > `arp-guard` is an optional peer dependency. It is installed automatically for running the reference ARP evaluation. If you are implementing your own adapter, you do not need it.
74
+
72
75
  ### Run the Evaluation
73
76
 
74
77
  ```bash
@@ -79,6 +82,46 @@ npm run test:baseline # 3 baseline tests
79
82
  npx vitest run src/e2e/ # 6 E2E tests (real OS detection)
80
83
  ```
81
84
 
85
+ ![OASB Demo](docs/oasb-demo.gif)
86
+
87
+ ---
88
+
89
+ ## Usage via OpenA2A CLI
90
+
91
+ OASB is available as a built-in adapter in the [OpenA2A CLI](https://github.com/opena2a-org/opena2a) via the `benchmark` command. The CLI delegates to the `oasb` package using an import adapter, so no separate installation is needed if you already have the CLI installed.
92
+
93
+ ### Run the full benchmark suite
94
+
95
+ ```bash
96
+ opena2a benchmark run
97
+ ```
98
+
99
+ Executes all 222 test scenarios (atomic, integration, baseline, and E2E) and produces a detection coverage scorecard.
100
+
101
+ ### Run a specific MITRE ATLAS technique
102
+
103
+ ```bash
104
+ opena2a benchmark run --technique T0015
105
+ ```
106
+
107
+ Filters the benchmark to a single MITRE ATLAS technique ID (e.g., `T0015` for Evasion). Useful for targeted evaluation of a specific detection capability.
108
+
109
+ ### Generate machine-readable output for CI
110
+
111
+ ```bash
112
+ opena2a benchmark run --format json
113
+ ```
114
+
115
+ Outputs the compliance score and per-technique detection rates as JSON. Integrate this into CI pipelines to enforce minimum detection thresholds on every build.
116
+
117
+ ### Combining flags
118
+
119
+ ```bash
120
+ opena2a benchmark run --technique T0057 --format json
121
+ ```
122
+
123
+ Flags can be combined to run a single technique and produce JSON output for automated processing.
124
+
82
125
  ---
83
126
 
84
127
  ## What Gets Tested
@@ -269,14 +312,15 @@ The harness wraps a security product via an adapter interface and provides event
269
312
 
270
313
  | File | Purpose |
271
314
  |------|---------|
272
- | `arp-wrapper.ts` | Reference adapter — wraps ARP with temp dataDir, event collection, injection helpers |
315
+ | `adapter.ts` | **Product-agnostic adapter interface** implement `SecurityProductAdapter` for your product |
316
+ | `arp-wrapper.ts` | Reference adapter — wraps ARP (`arp-guard`) with event collection, injection helpers |
273
317
  | `event-collector.ts` | Captures events with async `waitForEvent(predicate, timeout)` |
274
318
  | `mock-llm-adapter.ts` | Deterministic LLM for intelligence layer testing (pattern-based responses) |
275
319
  | `dvaa-client.ts` | HTTP client for DVAA vulnerable agent endpoints |
276
320
  | `dvaa-manager.ts` | DVAA process lifecycle (spawn, health check, teardown) |
277
321
  | `metrics.ts` | Detection rate, false positive rate, P95 latency computation |
278
322
 
279
- To evaluate your own product: implement an adapter that translates OASB events into your product's API, then run the full suite. Vendor adapter interface spec coming soon.
323
+ To evaluate your own product: implement `SecurityProductAdapter` from `src/harness/adapter.ts`, swap it into the test harness, and run the full suite. The interface defines event types, scanner interfaces, and enforcement contracts — no dependency on any specific product.
280
324
 
281
325
  ---
282
326
 
@@ -284,12 +328,12 @@ To evaluate your own product: implement an adapter that translates OASB events i
284
328
 
285
329
  OASB documents what the reference product (ARP) does and doesn't catch. Other products may have different gap profiles — that's the point of running the benchmark.
286
330
 
287
- | Gap | Severity | Test |
288
- |-----|----------|------|
289
- | Anomaly baselines not persisted across restarts | Medium | BL-003 |
290
- | No connection rate anomaly detection | Medium | AT-NET-003 |
291
- | No HTTP response/output monitoring | Architectural | INT-003 |
292
- | No cross-monitor event correlation | Architectural | INT-006 |
331
+ | Gap | Severity | Test | Notes |
332
+ |-----|----------|------|-------|
333
+ | Anomaly baselines not persisted across restarts | Medium | BL-003 | In-memory only; restarts lose learned behavior |
334
+ | No connection rate anomaly detection | Medium | AT-NET-003 | Network monitor tracks hosts, not burst rates |
335
+ | No HTTP response body monitoring | Low | INT-003 | AI-layer output scanning (PromptInterceptor.scanOutput) covers LLM responses; raw HTTP responses not inspected |
336
+ | No cross-monitor event correlation | Architectural | INT-006 | EventEngine is a flat bus; no attack-chain aggregation |
293
337
 
294
338
  ---
295
339
 
@@ -303,9 +347,8 @@ Apache-2.0
303
347
 
304
348
  | Project | Description | Install |
305
349
  |---------|-------------|---------|
306
- | [**AIM**](https://github.com/opena2a-org/agent-identity-management) | Agent Identity Management -- identity and access control for AI agents | `pip install aim-sdk` |
307
- | [**HackMyAgent**](https://github.com/opena2a-org/hackmyagent) | Security scanner -- 147 checks, attack mode, auto-fix | `npx hackmyagent secure` |
308
- | [**OASB**](https://github.com/opena2a-org/oasb) | Open Agent Security Benchmark -- 182 attack scenarios | `npm install @opena2a/oasb` |
309
- | [**ARP**](https://github.com/opena2a-org/arp) | Agent Runtime Protection -- process, network, filesystem monitoring | `npm install @opena2a/arp` |
350
+ | [**AIM**](https://github.com/opena2a-org/agent-identity-management) | Agent Identity Management -- identity and access control for AI agents | `npm install @opena2a/aim-core` |
351
+ | [**HackMyAgent**](https://github.com/opena2a-org/hackmyagent) | Security scanner -- 204 checks, attack mode, auto-fix | `npx hackmyagent secure` |
352
+ | [**ARP**](https://www.npmjs.com/package/arp-guard) | Agent Runtime Protection -- process, network, filesystem, AI-layer monitoring | `npm install arp-guard` |
310
353
  | [**Secretless AI**](https://github.com/opena2a-org/secretless-ai) | Keep credentials out of AI context windows | `npx secretless-ai init` |
311
354
  | [**DVAA**](https://github.com/opena2a-org/damn-vulnerable-ai-agent) | Damn Vulnerable AI Agent -- security training and red-teaming | `docker pull opena2a/dvaa` |
@@ -0,0 +1,205 @@
1
+ /**
2
+ * OASB Security Product Adapter Interface
3
+ *
4
+ * Implement this interface to evaluate your security product against OASB.
5
+ * The reference implementation (ARP adapter) is in arp-wrapper.ts.
6
+ *
7
+ * @example
8
+ * // Vendor implements the adapter for their product:
9
+ * class MyProductAdapter implements SecurityProductAdapter { ... }
10
+ *
11
+ * // OASB tests use the adapter, not your product directly:
12
+ * const adapter = createAdapter(); // returns configured adapter
13
+ * await adapter.start();
14
+ * await adapter.injectEvent({ ... });
15
+ * const threats = adapter.getEventsByCategory('threat');
16
+ */
17
+ export type EventCategory = 'normal' | 'activity' | 'threat' | 'violation';
18
+ export type EventSeverity = 'info' | 'low' | 'medium' | 'high' | 'critical';
19
+ export type MonitorSource = 'process' | 'network' | 'filesystem' | 'prompt' | 'mcp-protocol' | 'a2a-protocol' | string;
20
+ export type EnforcementAction = 'log' | 'alert' | 'pause' | 'kill' | 'resume';
21
+ export interface SecurityEvent {
22
+ id?: string;
23
+ timestamp?: string;
24
+ source: MonitorSource;
25
+ category: EventCategory;
26
+ severity: EventSeverity;
27
+ description: string;
28
+ data?: Record<string, unknown>;
29
+ classifiedBy?: string;
30
+ }
31
+ export interface EnforcementResult {
32
+ action: EnforcementAction;
33
+ success: boolean;
34
+ reason: string;
35
+ event: SecurityEvent;
36
+ pid?: number;
37
+ }
38
+ export interface AlertRule {
39
+ name: string;
40
+ condition: AlertCondition;
41
+ action: EnforcementAction;
42
+ }
43
+ export interface AlertCondition {
44
+ source?: MonitorSource;
45
+ category?: EventCategory;
46
+ minSeverity?: EventSeverity;
47
+ descriptionContains?: string;
48
+ }
49
+ export interface ScanResult {
50
+ detected: boolean;
51
+ matches: ScanMatch[];
52
+ truncated?: boolean;
53
+ }
54
+ export interface ScanMatch {
55
+ pattern: ThreatPattern;
56
+ matchedText: string;
57
+ }
58
+ export interface ThreatPattern {
59
+ id: string;
60
+ category: string;
61
+ description: string;
62
+ pattern: RegExp;
63
+ severity: 'medium' | 'high' | 'critical';
64
+ }
65
+ export interface PromptScanner {
66
+ start(): Promise<void>;
67
+ stop(): Promise<void>;
68
+ scanInput(text: string): ScanResult;
69
+ scanOutput(text: string): ScanResult;
70
+ }
71
+ export interface MCPScanner {
72
+ start(): Promise<void>;
73
+ stop(): Promise<void>;
74
+ scanToolCall(toolName: string, params: Record<string, unknown>): ScanResult;
75
+ }
76
+ export interface A2AScanner {
77
+ start(): Promise<void>;
78
+ stop(): Promise<void>;
79
+ scanMessage(from: string, to: string, content: string): ScanResult;
80
+ }
81
+ export interface PatternScanner {
82
+ scanText(text: string, patterns: readonly ThreatPattern[]): ScanResult;
83
+ getAllPatterns(): readonly ThreatPattern[];
84
+ getPatternSets(): Record<string, readonly ThreatPattern[]>;
85
+ }
86
+ export interface BudgetStatus {
87
+ spent: number;
88
+ budget: number;
89
+ remaining: number;
90
+ percentUsed: number;
91
+ callsThisHour: number;
92
+ maxCallsPerHour: number;
93
+ totalCalls: number;
94
+ }
95
+ export interface BudgetManager {
96
+ canAfford(estimatedCostUsd: number): boolean;
97
+ record(costUsd: number, tokens: number): void;
98
+ getStatus(): BudgetStatus;
99
+ reset(): void;
100
+ }
101
+ export interface AnomalyScorer {
102
+ score(event: SecurityEvent): number;
103
+ record(event: SecurityEvent): void;
104
+ getBaseline(source: string): {
105
+ mean: number;
106
+ stddev: number;
107
+ count: number;
108
+ } | null;
109
+ reset(): void;
110
+ }
111
+ export interface LLMAdapter {
112
+ name: string;
113
+ assess(prompt: string): Promise<LLMResponse>;
114
+ }
115
+ export interface LLMResponse {
116
+ content: string;
117
+ usage?: {
118
+ inputTokens: number;
119
+ outputTokens: number;
120
+ };
121
+ }
122
+ export interface EventEngine {
123
+ emit(event: Omit<SecurityEvent, 'id' | 'timestamp' | 'classifiedBy'>): SecurityEvent;
124
+ onEvent(handler: (event: SecurityEvent) => void | Promise<void>): void;
125
+ }
126
+ export interface EnforcementEngine {
127
+ execute(action: EnforcementAction, event: SecurityEvent): Promise<EnforcementResult>;
128
+ pause(pid: number): boolean;
129
+ resume(pid: number): boolean;
130
+ kill(pid: number, signal?: string): boolean;
131
+ getPausedPids(): number[];
132
+ setAlertCallback(callback: (event: SecurityEvent, rule: AlertRule) => void): void;
133
+ }
134
+ /**
135
+ * Capabilities that a security product may or may not support.
136
+ * Adapters declare their capabilities via getCapabilities().
137
+ * Tests check capabilities before running — unsupported tests are
138
+ * marked N/A instead of FAIL, producing an honest scorecard.
139
+ */
140
+ export type Capability = 'process-monitoring' | 'network-monitoring' | 'filesystem-monitoring' | 'prompt-input-scanning' | 'prompt-output-scanning' | 'mcp-scanning' | 'a2a-scanning' | 'anomaly-detection' | 'budget-management' | 'enforcement-log' | 'enforcement-alert' | 'enforcement-pause' | 'enforcement-kill' | 'enforcement-resume' | 'pattern-scanning' | 'event-correlation';
141
+ /** Full capability declaration for a product */
142
+ export interface CapabilityMatrix {
143
+ /** Product name */
144
+ product: string;
145
+ /** Product version */
146
+ version: string;
147
+ /** Set of supported capabilities */
148
+ capabilities: Set<Capability>;
149
+ }
150
+ export interface SecurityProductAdapter {
151
+ /** Declare which capabilities this product supports */
152
+ getCapabilities(): CapabilityMatrix;
153
+ /** Start the security product */
154
+ start(): Promise<void>;
155
+ /** Stop the security product */
156
+ stop(): Promise<void>;
157
+ /** Inject a synthetic event for testing */
158
+ injectEvent(event: Omit<SecurityEvent, 'id' | 'timestamp' | 'classifiedBy'>): Promise<SecurityEvent>;
159
+ /** Wait for an event matching a predicate */
160
+ waitForEvent(predicate: (event: SecurityEvent) => boolean, timeoutMs?: number): Promise<SecurityEvent>;
161
+ /** Get collected events */
162
+ getEvents(): SecurityEvent[];
163
+ getEventsByCategory(category: EventCategory): SecurityEvent[];
164
+ getEnforcements(): EnforcementResult[];
165
+ getEnforcementsByAction(action: EnforcementAction): EnforcementResult[];
166
+ /** Reset collected events */
167
+ resetCollector(): void;
168
+ /** Access sub-components (for tests that need direct access) */
169
+ getEventEngine(): EventEngine;
170
+ getEnforcementEngine(): EnforcementEngine;
171
+ /** Factory methods for component-level testing */
172
+ createPromptScanner(): PromptScanner;
173
+ createMCPScanner(allowedTools?: string[]): MCPScanner;
174
+ createA2AScanner(trustedAgents?: string[]): A2AScanner;
175
+ createPatternScanner(): PatternScanner;
176
+ createBudgetManager(dataDir: string, config?: {
177
+ budgetUsd?: number;
178
+ maxCallsPerHour?: number;
179
+ }): BudgetManager;
180
+ createAnomalyScorer(): AnomalyScorer;
181
+ }
182
+ export interface LabConfig {
183
+ monitors?: {
184
+ process?: boolean;
185
+ network?: boolean;
186
+ filesystem?: boolean;
187
+ };
188
+ rules?: AlertRule[];
189
+ intelligence?: {
190
+ enabled?: boolean;
191
+ };
192
+ dataDir?: string;
193
+ filesystemWatchPaths?: string[];
194
+ filesystemAllowedPaths?: string[];
195
+ networkAllowedHosts?: string[];
196
+ processIntervalMs?: number;
197
+ networkIntervalMs?: number;
198
+ interceptors?: {
199
+ process?: boolean;
200
+ network?: boolean;
201
+ filesystem?: boolean;
202
+ };
203
+ interceptorNetworkAllowedHosts?: string[];
204
+ interceptorFilesystemAllowedPaths?: string[];
205
+ }
@@ -0,0 +1,18 @@
1
+ "use strict";
2
+ /**
3
+ * OASB Security Product Adapter Interface
4
+ *
5
+ * Implement this interface to evaluate your security product against OASB.
6
+ * The reference implementation (ARP adapter) is in arp-wrapper.ts.
7
+ *
8
+ * @example
9
+ * // Vendor implements the adapter for their product:
10
+ * class MyProductAdapter implements SecurityProductAdapter { ... }
11
+ *
12
+ * // OASB tests use the adapter, not your product directly:
13
+ * const adapter = createAdapter(); // returns configured adapter
14
+ * await adapter.start();
15
+ * await adapter.injectEvent({ ... });
16
+ * const threats = adapter.getEventsByCategory('threat');
17
+ */
18
+ Object.defineProperty(exports, "__esModule", { value: true });
@@ -1,28 +1,33 @@
1
- import { AgentRuntimeProtection, EventEngine, EnforcementEngine, type ARPEvent } from '@opena2a/arp';
2
1
  import { EventCollector } from './event-collector';
3
- import type { LabConfig } from './types';
4
- /**
5
- * Wraps AgentRuntimeProtection for controlled testing.
6
- * Creates temp dataDir per test, registers EventCollector,
7
- * and provides injection + assertion helpers.
8
- */
9
- export declare class ArpWrapper {
10
- private arp;
2
+ import type { SecurityProductAdapter, SecurityEvent, EnforcementResult, LabConfig, PromptScanner, MCPScanner, A2AScanner, PatternScanner, BudgetManager, AnomalyScorer, EventEngine, EnforcementEngine as EnforcementEngineInterface, CapabilityMatrix } from './adapter';
3
+ export declare class ArpWrapper implements SecurityProductAdapter {
4
+ private _arpInstance;
11
5
  private _dataDir;
12
6
  readonly collector: EventCollector;
13
7
  constructor(labConfig?: LabConfig);
8
+ getCapabilities(): CapabilityMatrix;
14
9
  start(): Promise<void>;
15
10
  stop(): Promise<void>;
16
- /** Get the underlying ARP instance */
17
- getInstance(): AgentRuntimeProtection;
18
- /** Get the event engine for direct event injection */
19
- getEngine(): EventEngine;
20
- /** Get the enforcement engine */
21
- getEnforcement(): EnforcementEngine;
22
- /** Inject a synthetic event into the ARP engine (for testing without real OS activity) */
23
- injectEvent(event: Omit<ARPEvent, 'id' | 'timestamp' | 'classifiedBy'>): Promise<ARPEvent>;
24
- /** Wait for an event matching a predicate */
25
- waitForEvent(predicate: (event: ARPEvent) => boolean, timeoutMs?: number): Promise<ARPEvent>;
26
- /** Get the data directory */
11
+ injectEvent(event: Omit<SecurityEvent, 'id' | 'timestamp' | 'classifiedBy'>): Promise<SecurityEvent>;
12
+ waitForEvent(predicate: (event: SecurityEvent) => boolean, timeoutMs?: number): Promise<SecurityEvent>;
13
+ getEvents(): SecurityEvent[];
14
+ getEventsByCategory(category: string): SecurityEvent[];
15
+ getEnforcements(): EnforcementResult[];
16
+ getEnforcementsByAction(action: string): EnforcementResult[];
17
+ resetCollector(): void;
18
+ getInstance(): any;
19
+ getEventEngine(): EventEngine;
20
+ getEnforcementEngine(): EnforcementEngineInterface;
21
+ getEngine(): any;
22
+ getEnforcement(): any;
27
23
  get dataDir(): string;
24
+ createPromptScanner(): PromptScanner;
25
+ createMCPScanner(allowedTools?: string[]): MCPScanner;
26
+ createA2AScanner(trustedAgents?: string[]): A2AScanner;
27
+ createPatternScanner(): PatternScanner;
28
+ createBudgetManager(dataDir: string, config?: {
29
+ budgetUsd?: number;
30
+ maxCallsPerHour?: number;
31
+ }): BudgetManager;
32
+ createAnomalyScorer(): AnomalyScorer;
28
33
  }
@@ -34,19 +34,32 @@ var __importStar = (this && this.__importStar) || (function () {
34
34
  })();
35
35
  Object.defineProperty(exports, "__esModule", { value: true });
36
36
  exports.ArpWrapper = void 0;
37
+ /**
38
+ * ARP Adapter — Reference implementation of SecurityProductAdapter
39
+ *
40
+ * Wraps HackMyAgent's ARP (Agent Runtime Protection) for OASB evaluation.
41
+ * Other vendors implement their own adapter against the same interface.
42
+ *
43
+ * Uses lazy require() for arp-guard so the module is only loaded when
44
+ * this adapter is actually selected. Tests that use a different adapter
45
+ * never trigger the arp-guard import.
46
+ */
37
47
  const fs = __importStar(require("fs"));
38
48
  const os = __importStar(require("os"));
39
49
  const path = __importStar(require("path"));
40
- const arp_1 = require("@opena2a/arp");
41
50
  const event_collector_1 = require("./event-collector");
42
- /**
43
- * Wraps AgentRuntimeProtection for controlled testing.
44
- * Creates temp dataDir per test, registers EventCollector,
45
- * and provides injection + assertion helpers.
46
- */
51
+ // Lazy-loaded arp-guard module
52
+ let _arp;
53
+ function arp() {
54
+ if (!_arp) {
55
+ _arp = require('arp-guard');
56
+ }
57
+ return _arp;
58
+ }
47
59
  class ArpWrapper {
48
60
  constructor(labConfig) {
49
61
  this._dataDir = labConfig?.dataDir ?? fs.mkdtempSync(path.join(os.tmpdir(), 'arp-lab-'));
62
+ const { AgentRuntimeProtection } = arp();
50
63
  const config = {
51
64
  agentName: 'arp-lab-target',
52
65
  agentDescription: 'Test target for ARP security lab',
@@ -85,19 +98,40 @@ class ArpWrapper {
85
98
  },
86
99
  },
87
100
  };
88
- this.arp = new arp_1.AgentRuntimeProtection(config);
101
+ this._arpInstance = new AgentRuntimeProtection(config);
89
102
  this.collector = new event_collector_1.EventCollector();
90
- // Register event and enforcement collectors
91
- this.arp.onEvent(this.collector.eventHandler);
92
- this.arp.onEnforcement(this.collector.enforcementHandler);
103
+ this._arpInstance.onEvent(this.collector.eventHandler);
104
+ this._arpInstance.onEnforcement(this.collector.enforcementHandler);
105
+ }
106
+ getCapabilities() {
107
+ return {
108
+ product: 'arp-guard',
109
+ version: arp().VERSION || '0.3.0',
110
+ capabilities: new Set([
111
+ 'process-monitoring',
112
+ 'network-monitoring',
113
+ 'filesystem-monitoring',
114
+ 'prompt-input-scanning',
115
+ 'prompt-output-scanning',
116
+ 'mcp-scanning',
117
+ 'a2a-scanning',
118
+ 'anomaly-detection',
119
+ 'budget-management',
120
+ 'enforcement-log',
121
+ 'enforcement-alert',
122
+ 'enforcement-pause',
123
+ 'enforcement-kill',
124
+ 'enforcement-resume',
125
+ 'pattern-scanning',
126
+ ]),
127
+ };
93
128
  }
94
129
  async start() {
95
- await this.arp.start();
130
+ await this._arpInstance.start();
96
131
  }
97
132
  async stop() {
98
- await this.arp.stop();
133
+ await this._arpInstance.stop();
99
134
  this.collector.reset();
100
- // Clean up temp dir
101
135
  try {
102
136
  fs.rmSync(this._dataDir, { recursive: true, force: true });
103
137
  }
@@ -105,29 +139,104 @@ class ArpWrapper {
105
139
  // Best effort cleanup
106
140
  }
107
141
  }
108
- /** Get the underlying ARP instance */
109
- getInstance() {
110
- return this.arp;
111
- }
112
- /** Get the event engine for direct event injection */
113
- getEngine() {
114
- return this.arp.getEngine();
115
- }
116
- /** Get the enforcement engine */
117
- getEnforcement() {
118
- return this.arp.getEnforcement();
119
- }
120
- /** Inject a synthetic event into the ARP engine (for testing without real OS activity) */
121
142
  async injectEvent(event) {
122
143
  return this.getEngine().emit(event);
123
144
  }
124
- /** Wait for an event matching a predicate */
125
145
  waitForEvent(predicate, timeoutMs = 10000) {
126
146
  return this.collector.waitForEvent(predicate, timeoutMs);
127
147
  }
128
- /** Get the data directory */
148
+ getEvents() {
149
+ return this.collector.getEvents();
150
+ }
151
+ getEventsByCategory(category) {
152
+ return this.collector.eventsByCategory(category);
153
+ }
154
+ getEnforcements() {
155
+ return this.collector.getEnforcements();
156
+ }
157
+ getEnforcementsByAction(action) {
158
+ return this.collector.enforcementsByAction(action);
159
+ }
160
+ resetCollector() {
161
+ this.collector.reset();
162
+ }
163
+ getInstance() {
164
+ return this._arpInstance;
165
+ }
166
+ getEventEngine() {
167
+ return this._arpInstance.getEngine();
168
+ }
169
+ getEnforcementEngine() {
170
+ return this._arpInstance.getEnforcement();
171
+ }
172
+ getEngine() {
173
+ return this._arpInstance.getEngine();
174
+ }
175
+ getEnforcement() {
176
+ return this._arpInstance.getEnforcement();
177
+ }
129
178
  get dataDir() {
130
179
  return this._dataDir;
131
180
  }
181
+ // ─── Factory Methods ────────────────────────────────────────────
182
+ createPromptScanner() {
183
+ const { EventEngine, PromptInterceptor } = arp();
184
+ const engine = new EventEngine({ agentName: 'oasb-prompt-test' });
185
+ const interceptor = new PromptInterceptor(engine);
186
+ return {
187
+ start: () => interceptor.start(),
188
+ stop: () => interceptor.stop(),
189
+ scanInput: (text) => interceptor.scanInput(text),
190
+ scanOutput: (text) => interceptor.scanOutput(text),
191
+ };
192
+ }
193
+ createMCPScanner(allowedTools) {
194
+ const { EventEngine, MCPProtocolInterceptor } = arp();
195
+ const engine = new EventEngine({ agentName: 'oasb-mcp-test' });
196
+ const interceptor = new MCPProtocolInterceptor(engine, allowedTools);
197
+ return {
198
+ start: () => interceptor.start(),
199
+ stop: () => interceptor.stop(),
200
+ scanToolCall: (toolName, params) => interceptor.scanToolCall(toolName, params),
201
+ };
202
+ }
203
+ createA2AScanner(trustedAgents) {
204
+ const { EventEngine, A2AProtocolInterceptor } = arp();
205
+ const engine = new EventEngine({ agentName: 'oasb-a2a-test' });
206
+ const interceptor = new A2AProtocolInterceptor(engine, trustedAgents);
207
+ return {
208
+ start: () => interceptor.start(),
209
+ stop: () => interceptor.stop(),
210
+ scanMessage: (from, to, content) => interceptor.scanMessage(from, to, content),
211
+ };
212
+ }
213
+ createPatternScanner() {
214
+ const { scanText: _scanText, ALL_PATTERNS: _allPatterns, PATTERN_SETS: _patternSets } = arp();
215
+ return {
216
+ scanText: (text, patterns) => _scanText(text, patterns),
217
+ getAllPatterns: () => _allPatterns,
218
+ getPatternSets: () => _patternSets,
219
+ };
220
+ }
221
+ createBudgetManager(dataDir, config) {
222
+ const { BudgetController } = arp();
223
+ const controller = new BudgetController(dataDir, config);
224
+ return {
225
+ canAfford: (cost) => controller.canAfford(cost),
226
+ record: (cost, tokens) => controller.record(cost, tokens),
227
+ getStatus: () => controller.getStatus(),
228
+ reset: () => controller.reset(),
229
+ };
230
+ }
231
+ createAnomalyScorer() {
232
+ const { AnomalyDetector } = arp();
233
+ const detector = new AnomalyDetector();
234
+ return {
235
+ score: (event) => detector.score(event),
236
+ record: (event) => detector.record(event),
237
+ getBaseline: (source) => detector.getBaseline(source),
238
+ reset: () => detector.reset(),
239
+ };
240
+ }
132
241
  }
133
242
  exports.ArpWrapper = ArpWrapper;
@@ -0,0 +1,26 @@
1
+ import type { Capability, CapabilityMatrix } from './adapter';
2
+ /**
3
+ * Check if the current adapter has a capability.
4
+ */
5
+ export declare function hasCapability(cap: Capability): boolean;
6
+ /**
7
+ * Call at the top of a describe() block to skip the entire suite
8
+ * if the adapter lacks the required capability.
9
+ *
10
+ * Uses describe.skipIf() so the tests show as skipped, not failed.
11
+ */
12
+ export declare function requireCapability(cap: Capability): void;
13
+ /**
14
+ * A describe() wrapper that skips the entire suite if the adapter
15
+ * lacks the required capability. Produces N/A in the scorecard.
16
+ *
17
+ * @example
18
+ * describeWithCapability('mcp-scanning', 'MCP Tool Scanning', () => {
19
+ * it('should detect path traversal', () => { ... });
20
+ * });
21
+ */
22
+ export declare const describeWithCapability: (cap: Capability, name: string, fn: () => void) => void;
23
+ /**
24
+ * Get the full capability matrix for reporting.
25
+ */
26
+ export declare function getCapabilityMatrix(): CapabilityMatrix;