@agent-wall/core 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,269 @@
1
+ /**
2
+ * Agent Wall Tool Call Chain Detector
3
+ *
4
+ * Detects suspicious sequences of tool calls that indicate
5
+ * multi-step attacks. Individual calls may look innocent,
6
+ * but the CHAIN reveals the attack:
7
+ *
8
+ * read_file(.env) → write_file(tmp.txt) → bash(curl) = EXFILTRATION
9
+ * list_directory(/) → read_file(passwd) → read_file(shadow) = RECON
10
+ * write_file(script.sh) → bash(chmod +x) → bash(./script.sh) = DROPPER
11
+ *
12
+ * The detector maintains a sliding window of recent tool calls
13
+ * and matches against known attack chain patterns.
14
+ */
15
+
16
+ import type { ToolCallParams } from "./types.js";
17
+
18
+ // ── Types ───────────────────────────────────────────────────────────
19
+
20
+ export interface ChainDetectorConfig {
21
+ /** Enable chain detection (default: true) */
22
+ enabled?: boolean;
23
+ /** Sliding window size (number of recent calls to track) */
24
+ windowSize?: number;
25
+ /** Time window in ms (calls older than this are dropped) */
26
+ windowMs?: number;
27
+ /** Custom chain patterns to add */
28
+ customChains?: ChainPattern[];
29
+ }
30
+
31
+ export interface ChainPattern {
32
+ /** Unique name for this chain pattern */
33
+ name: string;
34
+ /** Ordered sequence of tool name glob patterns */
35
+ sequence: string[];
36
+ /** Severity: "low", "medium", "high", "critical" */
37
+ severity: "low" | "medium" | "high" | "critical";
38
+ /** Human-readable description */
39
+ message: string;
40
+ /** Whether argument values must match across steps (e.g., same file read then sent) */
41
+ trackArguments?: boolean;
42
+ }
43
+
44
+ export interface ChainDetectionResult {
45
+ /** Whether a suspicious chain was detected */
46
+ detected: boolean;
47
+ /** Matched chain patterns */
48
+ matches: ChainMatchInfo[];
49
+ /** Human-readable summary */
50
+ summary: string;
51
+ }
52
+
53
+ export interface ChainMatchInfo {
54
+ /** Name of the matched chain pattern */
55
+ chain: string;
56
+ /** Severity level */
57
+ severity: "low" | "medium" | "high" | "critical";
58
+ /** The tool calls that formed the chain */
59
+ calls: string[];
60
+ /** Description */
61
+ message: string;
62
+ }
63
+
64
+ // ── Built-in Chain Patterns ─────────────────────────────────────────
65
+
66
+ const BUILTIN_CHAINS: ChainPattern[] = [
67
+ // ── Exfiltration chains ──
68
+ {
69
+ name: "read-then-network",
70
+ sequence: ["read_*|get_*|view_*", "shell_*|run_*|execute_*|bash"],
71
+ severity: "high",
72
+ message: "Potential data exfiltration: file read followed by shell command",
73
+ },
74
+ {
75
+ name: "read-write-send",
76
+ sequence: ["read_*|get_*", "write_*|create_*", "shell_*|run_*|bash"],
77
+ severity: "critical",
78
+ message: "Exfiltration chain: read → write → shell (staged exfiltration)",
79
+ },
80
+ {
81
+ name: "env-then-network",
82
+ sequence: ["read_*|get_*", "shell_*|run_*|bash"],
83
+ severity: "critical",
84
+ message: "Potential secret exfiltration: file read followed by network command",
85
+ trackArguments: true,
86
+ },
87
+
88
+ // ── Reconnaissance chains ──
89
+ {
90
+ name: "directory-scan",
91
+ sequence: ["list_*|ls", "list_*|ls", "list_*|ls", "read_*|get_*"],
92
+ severity: "medium",
93
+ message: "Directory scanning pattern: multiple listings followed by file read",
94
+ },
95
+
96
+ // ── Dropper/persistence chains ──
97
+ {
98
+ name: "write-execute",
99
+ sequence: ["write_*|create_*", "shell_*|run_*|bash"],
100
+ severity: "high",
101
+ message: "Potential dropper: file write followed by shell execution",
102
+ },
103
+ {
104
+ name: "write-chmod-execute",
105
+ sequence: ["write_*|create_*", "shell_*|run_*|bash", "shell_*|run_*|bash"],
106
+ severity: "critical",
107
+ message: "Dropper chain: write → chmod → execute",
108
+ },
109
+
110
+ // ── Privilege escalation ──
111
+ {
112
+ name: "read-sensitive-then-write",
113
+ sequence: ["read_*|get_*", "write_*|create_*|edit_*"],
114
+ severity: "medium",
115
+ message: "Sensitive file read followed by file modification",
116
+ trackArguments: true,
117
+ },
118
+
119
+ // ── Rapid shell commands ──
120
+ {
121
+ name: "shell-burst",
122
+ sequence: ["shell_*|run_*|bash", "shell_*|run_*|bash", "shell_*|run_*|bash", "shell_*|run_*|bash"],
123
+ severity: "high",
124
+ message: "Rapid burst of shell commands — potential automated attack",
125
+ },
126
+ ];
127
+
128
+ // ── Internal tracked call ───────────────────────────────────────────
129
+
130
+ interface TrackedCall {
131
+ tool: string;
132
+ args: Record<string, unknown>;
133
+ timestamp: number;
134
+ }
135
+
136
+ // ── Chain Detector ──────────────────────────────────────────────────
137
+
138
+ export class ChainDetector {
139
+ private config: Required<ChainDetectorConfig>;
140
+ private history: TrackedCall[] = [];
141
+ private allChains: ChainPattern[];
142
+
143
+ constructor(config: ChainDetectorConfig = {}) {
144
+ this.config = {
145
+ enabled: config.enabled ?? true,
146
+ windowSize: config.windowSize ?? 20,
147
+ windowMs: config.windowMs ?? 60_000, // 1 minute
148
+ customChains: config.customChains ?? [],
149
+ };
150
+ this.allChains = [...BUILTIN_CHAINS, ...this.config.customChains];
151
+ }
152
+
153
+ /**
154
+ * Record a tool call and check for suspicious chains.
155
+ * Call this AFTER the policy engine allows the call.
156
+ */
157
+ record(toolCall: ToolCallParams): ChainDetectionResult {
158
+ if (!this.config.enabled) {
159
+ return { detected: false, matches: [], summary: "Chain detection disabled" };
160
+ }
161
+
162
+ const now = Date.now();
163
+
164
+ // Add to history
165
+ this.history.push({
166
+ tool: toolCall.name,
167
+ args: toolCall.arguments ?? {},
168
+ timestamp: now,
169
+ });
170
+
171
+ // Prune old entries
172
+ this.pruneHistory(now);
173
+
174
+ // Check all chain patterns against current history
175
+ const matches: ChainMatchInfo[] = [];
176
+
177
+ for (const chain of this.allChains) {
178
+ if (this.matchesChain(chain)) {
179
+ matches.push({
180
+ chain: chain.name,
181
+ severity: chain.severity,
182
+ calls: this.history.slice(-chain.sequence.length).map((c) => c.tool),
183
+ message: chain.message,
184
+ });
185
+ }
186
+ }
187
+
188
+ if (matches.length === 0) {
189
+ return { detected: false, matches: [], summary: "No suspicious chains detected" };
190
+ }
191
+
192
+ const highestSeverity = matches.reduce((best, m) => {
193
+ const levels = { low: 0, medium: 1, high: 2, critical: 3 };
194
+ return levels[m.severity] > levels[best] ? m.severity : best;
195
+ }, "low" as "low" | "medium" | "high" | "critical");
196
+
197
+ return {
198
+ detected: true,
199
+ matches,
200
+ summary: `Suspicious tool call chain detected (${highestSeverity}): ${matches.map((m) => m.chain).join(", ")}`,
201
+ };
202
+ }
203
+
204
+ /**
205
+ * Check if the current history matches a chain pattern.
206
+ * Looks for the sequence appearing in order (not necessarily consecutive).
207
+ */
208
+ private matchesChain(chain: ChainPattern): boolean {
209
+ if (this.history.length < chain.sequence.length) return false;
210
+
211
+ // Check the most recent N calls match the sequence in order
212
+ const recentCalls = this.history.slice(-chain.sequence.length);
213
+
214
+ for (let i = 0; i < chain.sequence.length; i++) {
215
+ const pattern = chain.sequence[i];
216
+ const call = recentCalls[i];
217
+ if (!this.matchesToolPattern(pattern, call.tool)) {
218
+ return false;
219
+ }
220
+ }
221
+
222
+ return true;
223
+ }
224
+
225
+ /**
226
+ * Match a tool name against a pipe-separated glob-like pattern.
227
+ */
228
+ private matchesToolPattern(pattern: string, toolName: string): boolean {
229
+ const alternatives = pattern.split("|").map((p) => p.trim());
230
+ return alternatives.some((p) => {
231
+ if (p === "*") return true;
232
+ if (p.endsWith("*")) {
233
+ return toolName.startsWith(p.slice(0, -1));
234
+ }
235
+ if (p.startsWith("*")) {
236
+ return toolName.endsWith(p.slice(1));
237
+ }
238
+ return toolName === p;
239
+ });
240
+ }
241
+
242
+ /**
243
+ * Remove entries outside the time window or exceeding window size.
244
+ */
245
+ private pruneHistory(now: number): void {
246
+ // Remove by time
247
+ const cutoff = now - this.config.windowMs;
248
+ this.history = this.history.filter((c) => c.timestamp >= cutoff);
249
+
250
+ // Remove by size (keep most recent)
251
+ if (this.history.length > this.config.windowSize) {
252
+ this.history = this.history.slice(-this.config.windowSize);
253
+ }
254
+ }
255
+
256
+ /**
257
+ * Clear the call history (e.g., on session reset).
258
+ */
259
+ reset(): void {
260
+ this.history = [];
261
+ }
262
+
263
+ /**
264
+ * Get the current call history length.
265
+ */
266
+ getHistoryLength(): number {
267
+ return this.history.length;
268
+ }
269
+ }
@@ -0,0 +1,362 @@
1
+ /**
2
+ * DashboardServer Tests
3
+ *
4
+ * Tests the WebSocket + HTTP server that bridges proxy events
5
+ * to the browser dashboard.
6
+ */
7
+
8
+ import { describe, it, expect, beforeAll, afterAll, vi, beforeEach } from "vitest";
9
+ import { EventEmitter } from "node:events";
10
+ import WebSocket from "ws";
11
+ import * as http from "node:http";
12
+ import * as fs from "node:fs";
13
+ import * as path from "node:path";
14
+ import * as os from "node:os";
15
+ import { DashboardServer, type WsMessage, type ProxyEventPayload, type StatsPayload } from "./dashboard-server.js";
16
+
17
+ // ── Mock Proxy ──────────────────────────────────────────────────────
18
+
19
+ function createMockProxy() {
20
+ const emitter = new EventEmitter();
21
+ (emitter as any).getStats = () => ({
22
+ forwarded: 10,
23
+ denied: 3,
24
+ prompted: 2,
25
+ total: 15,
26
+ scanned: 8,
27
+ responseBlocked: 1,
28
+ responseRedacted: 2,
29
+ });
30
+ return emitter as any;
31
+ }
32
+
33
+ function createMockKillSwitch() {
34
+ let active = false;
35
+ return {
36
+ isActive: () => active,
37
+ activate: () => { active = true; },
38
+ deactivate: () => { active = false; },
39
+ } as any;
40
+ }
41
+
42
+ // ── Helpers ─────────────────────────────────────────────────────────
43
+
44
+ interface ConnectedWs extends WebSocket {
45
+ _earlyMessages: WsMessage[];
46
+ }
47
+
48
+ function connectWs(port: number): Promise<ConnectedWs> {
49
+ return new Promise((resolve, reject) => {
50
+ const ws = new WebSocket(`ws://127.0.0.1:${port}`) as ConnectedWs;
51
+ ws._earlyMessages = [];
52
+ // Buffer messages that arrive before test sets up listeners
53
+ const earlyHandler = (data: WebSocket.RawData) => {
54
+ try { ws._earlyMessages.push(JSON.parse(data.toString())); } catch {}
55
+ };
56
+ ws.on("message", earlyHandler);
57
+ ws.on("open", () => {
58
+ // Give server a moment to send initial messages, then resolve
59
+ setTimeout(() => {
60
+ ws.removeListener("message", earlyHandler);
61
+ resolve(ws);
62
+ }, 200);
63
+ });
64
+ ws.on("error", reject);
65
+ });
66
+ }
67
+
68
+ function waitForMessage(ws: WebSocket, type?: string): Promise<WsMessage> {
69
+ return new Promise((resolve) => {
70
+ const handler = (data: WebSocket.RawData) => {
71
+ const msg: WsMessage = JSON.parse(data.toString());
72
+ if (!type || msg.type === type) {
73
+ ws.removeListener("message", handler);
74
+ resolve(msg);
75
+ }
76
+ };
77
+ ws.on("message", handler);
78
+ });
79
+ }
80
+
81
+ function collectMessages(ws: WebSocket, count: number, timeoutMs = 3000): Promise<WsMessage[]> {
82
+ return new Promise((resolve) => {
83
+ const msgs: WsMessage[] = [];
84
+ const handler = (data: WebSocket.RawData) => {
85
+ msgs.push(JSON.parse(data.toString()));
86
+ if (msgs.length >= count) {
87
+ ws.removeListener("message", handler);
88
+ clearTimeout(timer);
89
+ resolve(msgs);
90
+ }
91
+ };
92
+ ws.on("message", handler);
93
+ const timer = setTimeout(() => {
94
+ ws.removeListener("message", handler);
95
+ resolve(msgs);
96
+ }, timeoutMs);
97
+ });
98
+ }
99
+
100
+ function httpGet(port: number, urlPath: string): Promise<{ status: number; body: string; contentType: string }> {
101
+ return new Promise((resolve, reject) => {
102
+ http.get(`http://127.0.0.1:${port}${urlPath}`, (res) => {
103
+ let body = "";
104
+ res.on("data", (chunk) => { body += chunk; });
105
+ res.on("end", () => {
106
+ resolve({
107
+ status: res.statusCode ?? 500,
108
+ body,
109
+ contentType: res.headers["content-type"] ?? "",
110
+ });
111
+ });
112
+ }).on("error", reject);
113
+ });
114
+ }
115
+
116
+ // ── Tests ───────────────────────────────────────────────────────────
117
+
118
+ describe("DashboardServer", () => {
119
+ let proxy: ReturnType<typeof createMockProxy>;
120
+ let killSwitch: ReturnType<typeof createMockKillSwitch>;
121
+ let server: DashboardServer;
122
+ let port: number;
123
+
124
+ beforeAll(async () => {
125
+ proxy = createMockProxy();
126
+ killSwitch = createMockKillSwitch();
127
+ server = new DashboardServer({
128
+ port: 0, // Random available port
129
+ proxy,
130
+ killSwitch,
131
+ statsIntervalMs: 100, // Fast for testing
132
+ });
133
+ await server.start();
134
+ port = server.getPort();
135
+ });
136
+
137
+ afterAll(async () => {
138
+ await server.stop();
139
+ });
140
+
141
+ it("should start and listen on a port", () => {
142
+ expect(port).toBeGreaterThan(0);
143
+ });
144
+
145
+ it("should send welcome message on WebSocket connection", async () => {
146
+ const ws = await connectWs(port);
147
+ try {
148
+ const welcome = ws._earlyMessages.find((m) => m.type === "welcome");
149
+ expect(welcome).toBeDefined();
150
+ expect(welcome!.ts).toBeDefined();
151
+ expect((welcome!.payload as any).message).toContain("Dashboard connected");
152
+ } finally {
153
+ ws.close();
154
+ }
155
+ });
156
+
157
+ it("should send stats on connection", async () => {
158
+ const ws = await connectWs(port);
159
+ try {
160
+ const statsMsg = ws._earlyMessages.find((m) => m.type === "stats");
161
+ expect(statsMsg).toBeDefined();
162
+ const stats = statsMsg!.payload as StatsPayload;
163
+ expect(stats.forwarded).toBe(10);
164
+ expect(stats.denied).toBe(3);
165
+ expect(stats.total).toBe(15);
166
+ expect(stats.uptime).toBeGreaterThanOrEqual(0);
167
+ expect(stats.killSwitchActive).toBe(false);
168
+ } finally {
169
+ ws.close();
170
+ }
171
+ });
172
+
173
+ it("should broadcast proxy events to connected clients", async () => {
174
+ const ws = await connectWs(port);
175
+ try {
176
+ // Initial messages already buffered in _earlyMessages
177
+
178
+ // Emit a denied event from proxy
179
+ proxy.emit("denied", "read_file", "Access to .ssh blocked");
180
+
181
+ const msg = await waitForMessage(ws, "event");
182
+ const payload = msg.payload as ProxyEventPayload;
183
+ expect(payload.event).toBe("denied");
184
+ expect(payload.tool).toBe("read_file");
185
+ expect(payload.detail).toBe("Access to .ssh blocked");
186
+ expect(payload.severity).toBe("warn");
187
+ } finally {
188
+ ws.close();
189
+ }
190
+ });
191
+
192
+ it("should broadcast injection events with critical severity", async () => {
193
+ const ws = await connectWs(port);
194
+ try {
195
+ proxy.emit("injectionDetected", "bash", "Role override pattern detected");
196
+
197
+ const msg = await waitForMessage(ws, "event");
198
+ const payload = msg.payload as ProxyEventPayload;
199
+ expect(payload.event).toBe("injectionDetected");
200
+ expect(payload.severity).toBe("critical");
201
+ } finally {
202
+ ws.close();
203
+ }
204
+ });
205
+
206
+ it("should broadcast stats periodically", async () => {
207
+ const ws = await connectWs(port);
208
+ try {
209
+ // Wait for periodic stats broadcast (interval is 100ms in test)
210
+ const msg = await waitForMessage(ws, "stats");
211
+ expect(msg.type).toBe("stats");
212
+ expect((msg.payload as StatsPayload).total).toBe(15);
213
+ } finally {
214
+ ws.close();
215
+ }
216
+ });
217
+
218
+ it("should toggle kill switch via client message", async () => {
219
+ const ws = await connectWs(port);
220
+ try {
221
+ expect(killSwitch.isActive()).toBe(false);
222
+
223
+ // Send toggle command
224
+ ws.send(JSON.stringify({ type: "toggleKillSwitch" }));
225
+
226
+ const msg = await waitForMessage(ws, "killSwitch");
227
+ expect((msg.payload as any).active).toBe(true);
228
+ expect(killSwitch.isActive()).toBe(true);
229
+
230
+ // Toggle back
231
+ ws.send(JSON.stringify({ type: "toggleKillSwitch" }));
232
+
233
+ const msg2 = await waitForMessage(ws, "killSwitch");
234
+ expect((msg2.payload as any).active).toBe(false);
235
+ expect(killSwitch.isActive()).toBe(false);
236
+ } finally {
237
+ ws.close();
238
+ }
239
+ });
240
+
241
+ it("should handle audit entry and track rule hits", async () => {
242
+ const ws = await connectWs(port);
243
+ try {
244
+ // Simulate audit entries
245
+ server.handleAuditEntry({
246
+ timestamp: new Date().toISOString(),
247
+ sessionId: "test-session",
248
+ direction: "request",
249
+ method: "tools/call",
250
+ tool: "read_file",
251
+ verdict: { action: "deny", rule: "block-ssh-keys", message: "SSH blocked" },
252
+ });
253
+
254
+ const msg = await waitForMessage(ws, "audit");
255
+ expect((msg.payload as any).tool).toBe("read_file");
256
+ expect((msg.payload as any).verdict.rule).toBe("block-ssh-keys");
257
+ } finally {
258
+ ws.close();
259
+ }
260
+ });
261
+
262
+ it("should broadcast to multiple clients", async () => {
263
+ const ws1 = await connectWs(port);
264
+ const ws2 = await connectWs(port);
265
+ try {
266
+ proxy.emit("allowed", "list_directory");
267
+
268
+ const [msg1, msg2] = await Promise.all([
269
+ waitForMessage(ws1, "event"),
270
+ waitForMessage(ws2, "event"),
271
+ ]);
272
+
273
+ expect((msg1.payload as ProxyEventPayload).tool).toBe("list_directory");
274
+ expect((msg2.payload as ProxyEventPayload).tool).toBe("list_directory");
275
+ } finally {
276
+ ws1.close();
277
+ ws2.close();
278
+ }
279
+ });
280
+
281
+ it("should serve fallback HTML when no staticDir is set", async () => {
282
+ const resp = await httpGet(port, "/");
283
+ expect(resp.status).toBe(200);
284
+ expect(resp.contentType).toContain("text/html");
285
+ expect(resp.body).toContain("Agent Wall Dashboard");
286
+ });
287
+
288
+ it("should handle client disconnect without errors", async () => {
289
+ const ws = await connectWs(port);
290
+ ws.close();
291
+ // Wait a moment, then verify server still works
292
+ await new Promise((r) => setTimeout(r, 200));
293
+
294
+ // Server should still accept new connections
295
+ const ws2 = await connectWs(port);
296
+ const welcome = ws2._earlyMessages.find((m) => m.type === "welcome");
297
+ expect(welcome).toBeDefined();
298
+ ws2.close();
299
+ });
300
+ });
301
+
302
+ describe("DashboardServer — static file serving", () => {
303
+ let server: DashboardServer;
304
+ let port: number;
305
+ let tmpDir: string;
306
+
307
+ beforeAll(async () => {
308
+ tmpDir = path.join(os.tmpdir(), `aw-dash-test-${Date.now()}`);
309
+ fs.mkdirSync(tmpDir, { recursive: true });
310
+ fs.writeFileSync(path.join(tmpDir, "index.html"), "<html><body>Dashboard</body></html>");
311
+ fs.mkdirSync(path.join(tmpDir, "assets"), { recursive: true });
312
+ fs.writeFileSync(path.join(tmpDir, "assets", "app.js"), "console.log('ok')");
313
+ fs.writeFileSync(path.join(tmpDir, "assets", "style.css"), "body { color: white; }");
314
+
315
+ const proxy = createMockProxy();
316
+ server = new DashboardServer({
317
+ port: 0,
318
+ proxy,
319
+ staticDir: tmpDir,
320
+ statsIntervalMs: 60000, // Don't spam in static tests
321
+ });
322
+ await server.start();
323
+ port = server.getPort();
324
+ });
325
+
326
+ afterAll(async () => {
327
+ await server.stop();
328
+ fs.rmSync(tmpDir, { recursive: true, force: true });
329
+ });
330
+
331
+ it("should serve index.html at /", async () => {
332
+ const resp = await httpGet(port, "/");
333
+ expect(resp.status).toBe(200);
334
+ expect(resp.contentType).toContain("text/html");
335
+ expect(resp.body).toContain("Dashboard");
336
+ });
337
+
338
+ it("should serve JS assets with correct MIME type", async () => {
339
+ const resp = await httpGet(port, "/assets/app.js");
340
+ expect(resp.status).toBe(200);
341
+ expect(resp.contentType).toContain("application/javascript");
342
+ expect(resp.body).toContain("console.log");
343
+ });
344
+
345
+ it("should serve CSS assets with correct MIME type", async () => {
346
+ const resp = await httpGet(port, "/assets/style.css");
347
+ expect(resp.status).toBe(200);
348
+ expect(resp.contentType).toContain("text/css");
349
+ });
350
+
351
+ it("should SPA fallback to index.html for unknown routes", async () => {
352
+ const resp = await httpGet(port, "/some/unknown/path");
353
+ expect(resp.status).toBe(200);
354
+ expect(resp.body).toContain("Dashboard");
355
+ });
356
+
357
+ it("should block path traversal attempts", async () => {
358
+ const resp = await httpGet(port, "/../../../etc/passwd");
359
+ // Should get 403 or serve index.html (SPA fallback), not leak files
360
+ expect(resp.body).not.toContain("root:");
361
+ });
362
+ });