mstro-app 0.3.7 → 0.3.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +4 -8
- package/bin/mstro.js +54 -15
- package/dist/server/cli/headless/claude-invoker.d.ts.map +1 -1
- package/dist/server/cli/headless/claude-invoker.js +18 -9
- package/dist/server/cli/headless/claude-invoker.js.map +1 -1
- package/dist/server/cli/headless/headless-logger.d.ts +10 -0
- package/dist/server/cli/headless/headless-logger.d.ts.map +1 -0
- package/dist/server/cli/headless/headless-logger.js +66 -0
- package/dist/server/cli/headless/headless-logger.js.map +1 -0
- package/dist/server/cli/headless/mcp-config.d.ts.map +1 -1
- package/dist/server/cli/headless/mcp-config.js +6 -5
- package/dist/server/cli/headless/mcp-config.js.map +1 -1
- package/dist/server/cli/headless/runner.d.ts.map +1 -1
- package/dist/server/cli/headless/runner.js +4 -0
- package/dist/server/cli/headless/runner.js.map +1 -1
- package/dist/server/cli/headless/stall-assessor.d.ts +21 -0
- package/dist/server/cli/headless/stall-assessor.d.ts.map +1 -1
- package/dist/server/cli/headless/stall-assessor.js +74 -20
- package/dist/server/cli/headless/stall-assessor.js.map +1 -1
- package/dist/server/cli/headless/tool-watchdog.d.ts +0 -12
- package/dist/server/cli/headless/tool-watchdog.d.ts.map +1 -1
- package/dist/server/cli/headless/tool-watchdog.js +30 -9
- package/dist/server/cli/headless/tool-watchdog.js.map +1 -1
- package/dist/server/cli/headless/types.d.ts +8 -1
- package/dist/server/cli/headless/types.d.ts.map +1 -1
- package/dist/server/cli/improvisation-session-manager.d.ts +16 -0
- package/dist/server/cli/improvisation-session-manager.d.ts.map +1 -1
- package/dist/server/cli/improvisation-session-manager.js +94 -11
- package/dist/server/cli/improvisation-session-manager.js.map +1 -1
- package/dist/server/index.js +0 -4
- package/dist/server/index.js.map +1 -1
- package/dist/server/mcp/bouncer-cli.d.ts +3 -0
- package/dist/server/mcp/bouncer-cli.d.ts.map +1 -0
- package/dist/server/mcp/bouncer-cli.js +54 -0
- package/dist/server/mcp/bouncer-cli.js.map +1 -0
- package/dist/server/mcp/bouncer-integration.d.ts +2 -0
- package/dist/server/mcp/bouncer-integration.d.ts.map +1 -1
- package/dist/server/mcp/bouncer-integration.js +55 -39
- package/dist/server/mcp/bouncer-integration.js.map +1 -1
- package/dist/server/mcp/bouncer-sandbox.d.ts +60 -0
- package/dist/server/mcp/bouncer-sandbox.d.ts.map +1 -0
- package/dist/server/mcp/bouncer-sandbox.js +182 -0
- package/dist/server/mcp/bouncer-sandbox.js.map +1 -0
- package/dist/server/mcp/security-patterns.d.ts +6 -12
- package/dist/server/mcp/security-patterns.d.ts.map +1 -1
- package/dist/server/mcp/security-patterns.js +197 -10
- package/dist/server/mcp/security-patterns.js.map +1 -1
- package/dist/server/services/plan/composer.d.ts +4 -0
- package/dist/server/services/plan/composer.d.ts.map +1 -0
- package/dist/server/services/plan/composer.js +181 -0
- package/dist/server/services/plan/composer.js.map +1 -0
- package/dist/server/services/plan/dependency-resolver.d.ts +28 -0
- package/dist/server/services/plan/dependency-resolver.d.ts.map +1 -0
- package/dist/server/services/plan/dependency-resolver.js +152 -0
- package/dist/server/services/plan/dependency-resolver.js.map +1 -0
- package/dist/server/services/plan/executor.d.ts +91 -0
- package/dist/server/services/plan/executor.d.ts.map +1 -0
- package/dist/server/services/plan/executor.js +545 -0
- package/dist/server/services/plan/executor.js.map +1 -0
- package/dist/server/services/plan/parser.d.ts +11 -0
- package/dist/server/services/plan/parser.d.ts.map +1 -0
- package/dist/server/services/plan/parser.js +415 -0
- package/dist/server/services/plan/parser.js.map +1 -0
- package/dist/server/services/plan/state-reconciler.d.ts +2 -0
- package/dist/server/services/plan/state-reconciler.d.ts.map +1 -0
- package/dist/server/services/plan/state-reconciler.js +105 -0
- package/dist/server/services/plan/state-reconciler.js.map +1 -0
- package/dist/server/services/plan/types.d.ts +120 -0
- package/dist/server/services/plan/types.d.ts.map +1 -0
- package/dist/server/services/plan/types.js +4 -0
- package/dist/server/services/plan/types.js.map +1 -0
- package/dist/server/services/plan/watcher.d.ts +14 -0
- package/dist/server/services/plan/watcher.d.ts.map +1 -0
- package/dist/server/services/plan/watcher.js +69 -0
- package/dist/server/services/plan/watcher.js.map +1 -0
- package/dist/server/services/websocket/file-explorer-handlers.js +20 -0
- package/dist/server/services/websocket/file-explorer-handlers.js.map +1 -1
- package/dist/server/services/websocket/handler.d.ts +0 -1
- package/dist/server/services/websocket/handler.d.ts.map +1 -1
- package/dist/server/services/websocket/handler.js +28 -2
- package/dist/server/services/websocket/handler.js.map +1 -1
- package/dist/server/services/websocket/plan-handlers.d.ts +6 -0
- package/dist/server/services/websocket/plan-handlers.d.ts.map +1 -0
- package/dist/server/services/websocket/plan-handlers.js +494 -0
- package/dist/server/services/websocket/plan-handlers.js.map +1 -0
- package/dist/server/services/websocket/quality-handlers.d.ts +4 -0
- package/dist/server/services/websocket/quality-handlers.d.ts.map +1 -0
- package/dist/server/services/websocket/quality-handlers.js +470 -0
- package/dist/server/services/websocket/quality-handlers.js.map +1 -0
- package/dist/server/services/websocket/quality-persistence.d.ts +45 -0
- package/dist/server/services/websocket/quality-persistence.d.ts.map +1 -0
- package/dist/server/services/websocket/quality-persistence.js +187 -0
- package/dist/server/services/websocket/quality-persistence.js.map +1 -0
- package/dist/server/services/websocket/quality-service.d.ts +54 -0
- package/dist/server/services/websocket/quality-service.d.ts.map +1 -0
- package/dist/server/services/websocket/quality-service.js +816 -0
- package/dist/server/services/websocket/quality-service.js.map +1 -0
- package/dist/server/services/websocket/session-handlers.d.ts.map +1 -1
- package/dist/server/services/websocket/session-handlers.js +23 -0
- package/dist/server/services/websocket/session-handlers.js.map +1 -1
- package/dist/server/services/websocket/types.d.ts +2 -2
- package/dist/server/services/websocket/types.d.ts.map +1 -1
- package/package.json +3 -2
- package/server/cli/headless/claude-invoker.ts +21 -9
- package/server/cli/headless/headless-logger.ts +78 -0
- package/server/cli/headless/mcp-config.ts +6 -5
- package/server/cli/headless/runner.ts +4 -0
- package/server/cli/headless/stall-assessor.ts +101 -20
- package/server/cli/headless/tool-watchdog.ts +18 -9
- package/server/cli/headless/types.ts +10 -1
- package/server/cli/improvisation-session-manager.ts +118 -11
- package/server/index.ts +0 -4
- package/server/mcp/bouncer-cli.ts +73 -0
- package/server/mcp/bouncer-integration.ts +66 -44
- package/server/mcp/bouncer-sandbox.ts +214 -0
- package/server/mcp/security-patterns.ts +206 -10
- package/server/services/plan/composer.ts +199 -0
- package/server/services/plan/dependency-resolver.ts +179 -0
- package/server/services/plan/executor.ts +604 -0
- package/server/services/plan/parser.ts +459 -0
- package/server/services/plan/state-reconciler.ts +132 -0
- package/server/services/plan/types.ts +164 -0
- package/server/services/plan/watcher.ts +73 -0
- package/server/services/websocket/file-explorer-handlers.ts +20 -0
- package/server/services/websocket/handler.ts +28 -2
- package/server/services/websocket/plan-handlers.ts +592 -0
- package/server/services/websocket/quality-handlers.ts +570 -0
- package/server/services/websocket/quality-persistence.ts +250 -0
- package/server/services/websocket/quality-service.ts +975 -0
- package/server/services/websocket/session-handlers.ts +26 -0
- package/server/services/websocket/types.ts +62 -2
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
// Copyright (c) 2025-present Mstro, Inc. All rights reserved.
|
|
3
|
+
// Licensed under the MIT License. See LICENSE file for details.
|
|
4
|
+
|
|
5
|
+
/**
|
|
6
|
+
* Bouncer CLI — stdin/stdout wrapper for Claude Code PreToolUse hooks.
|
|
7
|
+
*
|
|
8
|
+
* Reads a tool use request from stdin (JSON), runs it through the full
|
|
9
|
+
* 2-layer bouncer (pattern matching + Haiku AI), and writes the decision
|
|
10
|
+
* to stdout in the format Claude Code hooks expect.
|
|
11
|
+
*
|
|
12
|
+
* Input format (from Claude Code hook):
|
|
13
|
+
* { "tool_name": "Bash", "input": { "command": "rm -rf /" } }
|
|
14
|
+
*
|
|
15
|
+
* Output format (to Claude Code hook):
|
|
16
|
+
* { "decision": "allow"|"deny", "reason": "..." }
|
|
17
|
+
*/
|
|
18
|
+
|
|
19
|
+
import type { BouncerReviewRequest } from './bouncer-integration.js';
|
|
20
|
+
import { reviewOperation } from './bouncer-integration.js';
|
|
21
|
+
|
|
22
|
+
function buildOperation(toolName: string, toolInput: Record<string, unknown>): string {
|
|
23
|
+
const prefix = `${toolName}: `;
|
|
24
|
+
if (toolName === 'Bash' && toolInput.command) return prefix + String(toolInput.command);
|
|
25
|
+
if (toolName === 'Edit' && toolInput.file_path) return prefix + String(toolInput.file_path);
|
|
26
|
+
if (toolName === 'Write' && toolInput.file_path) return prefix + String(toolInput.file_path);
|
|
27
|
+
return prefix + JSON.stringify(toolInput).slice(0, 500);
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
async function evaluate(rawInput: string): Promise<{ decision: string; reason: string }> {
|
|
31
|
+
if (!rawInput.trim()) {
|
|
32
|
+
return { decision: 'allow', reason: 'Empty input' };
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
let parsed: { tool_name?: string; toolName?: string; input?: Record<string, unknown>; toolInput?: Record<string, unknown> };
|
|
36
|
+
try {
|
|
37
|
+
parsed = JSON.parse(rawInput);
|
|
38
|
+
} catch {
|
|
39
|
+
return { decision: 'allow', reason: 'Invalid JSON input' };
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
const toolName = parsed.tool_name || parsed.toolName || 'unknown';
|
|
43
|
+
const toolInput = parsed.input || parsed.toolInput || {};
|
|
44
|
+
|
|
45
|
+
const request: BouncerReviewRequest = {
|
|
46
|
+
operation: buildOperation(toolName, toolInput),
|
|
47
|
+
context: {
|
|
48
|
+
purpose: 'Tool use request from Claude Code hook',
|
|
49
|
+
workingDirectory: process.cwd(),
|
|
50
|
+
toolName,
|
|
51
|
+
toolInput,
|
|
52
|
+
},
|
|
53
|
+
};
|
|
54
|
+
|
|
55
|
+
const result = await reviewOperation(request);
|
|
56
|
+
return {
|
|
57
|
+
decision: result.decision === 'deny' ? 'deny' : 'allow',
|
|
58
|
+
reason: result.reasoning,
|
|
59
|
+
};
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
async function main(): Promise<void> {
|
|
63
|
+
let rawInput = '';
|
|
64
|
+
for await (const chunk of process.stdin) {
|
|
65
|
+
rawInput += chunk;
|
|
66
|
+
}
|
|
67
|
+
const result = await evaluate(rawInput);
|
|
68
|
+
console.log(JSON.stringify(result));
|
|
69
|
+
}
|
|
70
|
+
|
|
71
|
+
main().catch(() => {
|
|
72
|
+
console.log(JSON.stringify({ decision: 'allow', reason: 'Bouncer crash' }));
|
|
73
|
+
});
|
|
@@ -38,6 +38,7 @@ import { captureException } from '../services/sentry.js';
|
|
|
38
38
|
import {
|
|
39
39
|
CRITICAL_THREATS,
|
|
40
40
|
matchesPattern,
|
|
41
|
+
normalizeOperation,
|
|
41
42
|
requiresAIReview,
|
|
42
43
|
SAFE_OPERATIONS
|
|
43
44
|
} from './security-patterns.js';
|
|
@@ -68,6 +69,11 @@ function getCachedDecision(operation: string): BouncerDecision | null {
|
|
|
68
69
|
return entry.decision;
|
|
69
70
|
}
|
|
70
71
|
|
|
72
|
+
/** Clear the decision cache. Exposed for testing statistical reliability (multiple runs per operation). */
|
|
73
|
+
export function clearDecisionCache(): void {
|
|
74
|
+
decisionCache.clear();
|
|
75
|
+
}
|
|
76
|
+
|
|
71
77
|
function cacheDecision(operation: string, decision: BouncerDecision): void {
|
|
72
78
|
// Don't cache low-confidence or error-fallback decisions
|
|
73
79
|
if (decision.confidence < 50) return;
|
|
@@ -304,13 +310,54 @@ function finalizeDecision(
|
|
|
304
310
|
return decision;
|
|
305
311
|
}
|
|
306
312
|
|
|
313
|
+
/**
|
|
314
|
+
* Layer 2: Haiku AI analysis with timeout/error handling.
|
|
315
|
+
*/
|
|
316
|
+
async function runHaikuAnalysis(
|
|
317
|
+
request: BouncerReviewRequest,
|
|
318
|
+
operation: string,
|
|
319
|
+
startTime: number,
|
|
320
|
+
fin: (d: BouncerDecision, layer: string, opts?: Parameters<typeof finalizeDecision>[6]) => BouncerDecision,
|
|
321
|
+
): Promise<BouncerDecision> {
|
|
322
|
+
if (process.env.BOUNCER_USE_AI === 'false') {
|
|
323
|
+
console.error('[Bouncer] AI analysis disabled (BOUNCER_USE_AI=false)');
|
|
324
|
+
return fin({ decision: 'warn_allow', confidence: 60, reasoning: 'Operation requires review but AI analysis is disabled. Proceeding with caution.', threatLevel: 'medium' }, 'ai-disabled', { skipCache: true, skipAnalytics: true });
|
|
325
|
+
}
|
|
326
|
+
|
|
327
|
+
console.error('[Bouncer] 🤖 Invoking Haiku for AI analysis...');
|
|
328
|
+
trackEvent(AnalyticsEvents.BOUNCER_HAIKU_REVIEW, { operation_length: operation.length });
|
|
329
|
+
|
|
330
|
+
const claudeCommand = process.env.CLAUDE_COMMAND || 'claude';
|
|
331
|
+
const workingDir = request.context?.workingDirectory || process.cwd();
|
|
332
|
+
|
|
333
|
+
try {
|
|
334
|
+
const decision = await analyzeWithHaiku(request, claudeCommand, workingDir);
|
|
335
|
+
console.error(`[Bouncer] ✓ Haiku decision: ${decision.decision} (${decision.confidence}% confidence) [${Math.round(performance.now() - startTime)}ms]`);
|
|
336
|
+
console.error(`[Bouncer] Reasoning: ${decision.reasoning}`);
|
|
337
|
+
return fin(decision, 'haiku-ai');
|
|
338
|
+
} catch (error: unknown) {
|
|
339
|
+
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
340
|
+
|
|
341
|
+
if (errorMessage.includes('timed out')) {
|
|
342
|
+
console.error(`[Bouncer] ⚠️ Haiku analysis timed out after ${HAIKU_TIMEOUT_MS}ms — defaulting to ALLOW`);
|
|
343
|
+
captureException(error, { context: 'bouncer.haiku_timeout', operation });
|
|
344
|
+
return fin({ decision: 'allow', confidence: 50, reasoning: `Security analysis timed out after ${HAIKU_TIMEOUT_MS}ms. Defaulting to allow — user initiated the action.`, threatLevel: 'medium' }, 'haiku-timeout', { skipCache: true });
|
|
345
|
+
}
|
|
346
|
+
|
|
347
|
+
console.error(`[Bouncer] ⚠️ Haiku analysis failed: ${errorMessage}`);
|
|
348
|
+
captureException(error, { context: 'bouncer.haiku_analysis', operation });
|
|
349
|
+
return fin({ decision: 'deny', confidence: 0, reasoning: `Security analysis failed: ${errorMessage}. Denying for safety.`, threatLevel: 'critical' }, 'ai-error', { skipCache: true, skipAnalytics: true, error: errorMessage });
|
|
350
|
+
}
|
|
351
|
+
}
|
|
352
|
+
|
|
307
353
|
/**
|
|
308
354
|
* Main bouncer review function - 2-layer hybrid system
|
|
309
355
|
*/
|
|
310
356
|
export async function reviewOperation(request: BouncerReviewRequest): Promise<BouncerDecision> {
|
|
311
357
|
const { logBouncerDecision } = await import('./security-audit.js');
|
|
312
358
|
const startTime = performance.now();
|
|
313
|
-
const { operation } = request;
|
|
359
|
+
const { operation: rawOperation } = request;
|
|
360
|
+
const operation = normalizeOperation(rawOperation);
|
|
314
361
|
const fin = (d: BouncerDecision, layer: string, opts?: Parameters<typeof finalizeDecision>[6]) =>
|
|
315
362
|
finalizeDecision(operation, d, layer, startTime, request.context, logBouncerDecision, opts);
|
|
316
363
|
|
|
@@ -336,15 +383,9 @@ export async function reviewOperation(request: BouncerReviewRequest): Promise<Bo
|
|
|
336
383
|
|
|
337
384
|
// LAYER 1: Pattern-Based Fast Path (< 5ms)
|
|
338
385
|
|
|
339
|
-
//
|
|
340
|
-
// to
|
|
341
|
-
|
|
342
|
-
if (safeOperation) {
|
|
343
|
-
console.error('[Bouncer] ⚡ Fast path: Safe operation approved');
|
|
344
|
-
return fin({ decision: 'allow', confidence: 95, reasoning: 'Operation matches known-safe patterns. No security concerns detected.', threatLevel: 'low' }, 'pattern-safe');
|
|
345
|
-
}
|
|
346
|
-
|
|
347
|
-
// Critical threats (rm -rf /, fork bombs) — ALWAYS denied
|
|
386
|
+
// Critical threats (rm -rf /, fork bombs) — ALWAYS denied, checked first
|
|
387
|
+
// to prevent chained commands (e.g., "echo hello; rm -rf /") from bypassing
|
|
388
|
+
// via a safe prefix match.
|
|
348
389
|
const criticalThreat = matchesPattern(operation, CRITICAL_THREATS);
|
|
349
390
|
if (criticalThreat) {
|
|
350
391
|
console.error('[Bouncer] ⚡ Fast path: CRITICAL THREAT detected');
|
|
@@ -355,43 +396,24 @@ export async function reviewOperation(request: BouncerReviewRequest): Promise<Bo
|
|
|
355
396
|
}, 'pattern-critical');
|
|
356
397
|
}
|
|
357
398
|
|
|
358
|
-
//
|
|
359
|
-
|
|
360
|
-
//
|
|
399
|
+
// Use requiresAIReview() for nuanced routing — handles sensitive paths,
|
|
400
|
+
// safe operations with guards (chain operators, pipes, expansion), and
|
|
401
|
+
// exfiltration patterns in a single consistent check.
|
|
361
402
|
if (!requiresAIReview(operation)) {
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
403
|
+
const isSafe = matchesPattern(operation, SAFE_OPERATIONS);
|
|
404
|
+
console.error(`[Bouncer] ⚡ Fast path: ${isSafe ? 'Safe operation approved' : 'No concerning patterns, allowing'}`);
|
|
405
|
+
return fin({
|
|
406
|
+
decision: 'allow',
|
|
407
|
+
confidence: isSafe ? 95 : 80,
|
|
408
|
+
reasoning: isSafe
|
|
409
|
+
? 'Operation matches known-safe patterns. No security concerns detected.'
|
|
410
|
+
: 'Operation appears safe based on pattern analysis. No obvious threats detected.',
|
|
411
|
+
threatLevel: 'low'
|
|
412
|
+
}, isSafe ? 'pattern-safe' : 'pattern-default');
|
|
369
413
|
}
|
|
370
414
|
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
const claudeCommand = process.env.CLAUDE_COMMAND || 'claude';
|
|
375
|
-
const workingDir = request.context?.workingDirectory || process.cwd();
|
|
376
|
-
|
|
377
|
-
try {
|
|
378
|
-
const decision = await analyzeWithHaiku(request, claudeCommand, workingDir);
|
|
379
|
-
console.error(`[Bouncer] ✓ Haiku decision: ${decision.decision} (${decision.confidence}% confidence) [${Math.round(performance.now() - startTime)}ms]`);
|
|
380
|
-
console.error(`[Bouncer] Reasoning: ${decision.reasoning}`);
|
|
381
|
-
return fin(decision, 'haiku-ai');
|
|
382
|
-
} catch (error: unknown) {
|
|
383
|
-
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
384
|
-
|
|
385
|
-
if (errorMessage.includes('timed out')) {
|
|
386
|
-
console.error(`[Bouncer] ⚠️ Haiku analysis timed out after ${HAIKU_TIMEOUT_MS}ms — defaulting to ALLOW`);
|
|
387
|
-
captureException(error, { context: 'bouncer.haiku_timeout', operation });
|
|
388
|
-
return fin({ decision: 'allow', confidence: 50, reasoning: `Security analysis timed out after ${HAIKU_TIMEOUT_MS}ms. Defaulting to allow — user initiated the action.`, threatLevel: 'medium' }, 'haiku-timeout', { skipCache: true });
|
|
389
|
-
}
|
|
390
|
-
|
|
391
|
-
console.error(`[Bouncer] ⚠️ Haiku analysis failed: ${errorMessage}`);
|
|
392
|
-
captureException(error, { context: 'bouncer.haiku_analysis', operation });
|
|
393
|
-
return fin({ decision: 'deny', confidence: 0, reasoning: `Security analysis failed: ${errorMessage}. Denying for safety.`, threatLevel: 'critical' }, 'ai-error', { skipCache: true, skipAnalytics: true, error: errorMessage });
|
|
394
|
-
}
|
|
415
|
+
// LAYER 2: Haiku AI Analysis (~200-500ms)
|
|
416
|
+
return runHaikuAnalysis(request, operation, startTime, fin);
|
|
395
417
|
}
|
|
396
418
|
|
|
397
419
|
/**
|
|
@@ -0,0 +1,214 @@
|
|
|
1
|
+
// Copyright (c) 2025-present Mstro, Inc. All rights reserved.
|
|
2
|
+
// Licensed under the MIT License. See LICENSE file for details.
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* Sandbox Harness for Bouncer Testing
|
|
6
|
+
*
|
|
7
|
+
* Wraps command execution in Anthropic's sandbox-runtime (bubblewrap on Linux,
|
|
8
|
+
* sandbox-exec on macOS) to safely test what happens when the bouncer FAILS —
|
|
9
|
+
* i.e., when a malicious tool call gets through.
|
|
10
|
+
*
|
|
11
|
+
* Usage in tests:
|
|
12
|
+
* const harness = new BouncerSandboxHarness();
|
|
13
|
+
* await harness.initialize();
|
|
14
|
+
* const result = await harness.executeInSandbox('rm -rf /tmp/test-canary');
|
|
15
|
+
* expect(result.violations).toContain(...)
|
|
16
|
+
* await harness.cleanup();
|
|
17
|
+
*/
|
|
18
|
+
|
|
19
|
+
import { execSync } from 'node:child_process';
|
|
20
|
+
import { existsSync, mkdirSync, mkdtempSync, rmSync, writeFileSync } from 'node:fs';
|
|
21
|
+
import { tmpdir } from 'node:os';
|
|
22
|
+
import { join } from 'node:path';
|
|
23
|
+
|
|
24
|
+
export interface SandboxExecResult {
|
|
25
|
+
/** The sandboxed command that was actually run */
|
|
26
|
+
wrappedCommand: string;
|
|
27
|
+
/** Whether sandbox-runtime is available on this platform */
|
|
28
|
+
sandboxAvailable: boolean;
|
|
29
|
+
/** Whether the sandbox contained the operation (no violations) */
|
|
30
|
+
contained: boolean;
|
|
31
|
+
/** List of violation descriptions if any escaped the sandbox */
|
|
32
|
+
violations: string[];
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
export interface CanaryCheckResult {
|
|
36
|
+
/** Whether the canary file still exists (should be true if sandbox contained the write) */
|
|
37
|
+
canaryIntact: boolean;
|
|
38
|
+
/** Whether a file was written outside the sandbox (should be false) */
|
|
39
|
+
escapeDetected: boolean;
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
/**
|
|
43
|
+
* Test harness that wraps command execution in sandbox-runtime.
|
|
44
|
+
* Provides canary files and violation tracking to verify containment.
|
|
45
|
+
*/
|
|
46
|
+
export class BouncerSandboxHarness {
|
|
47
|
+
private sandboxManager: Awaited<typeof import('@anthropic-ai/sandbox-runtime')>['SandboxManager'] | null = null;
|
|
48
|
+
private sandboxAvailable = false;
|
|
49
|
+
private tempDir: string;
|
|
50
|
+
private canaryDir: string;
|
|
51
|
+
|
|
52
|
+
constructor() {
|
|
53
|
+
this.tempDir = mkdtempSync(join(tmpdir(), 'bouncer-sandbox-'));
|
|
54
|
+
this.canaryDir = join(this.tempDir, 'canaries');
|
|
55
|
+
mkdirSync(this.canaryDir, { recursive: true });
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
/**
|
|
59
|
+
* Initialize the sandbox. Falls back gracefully if bwrap/sandbox-exec not available.
|
|
60
|
+
*/
|
|
61
|
+
async initialize(): Promise<{ available: boolean; reason?: string }> {
|
|
62
|
+
try {
|
|
63
|
+
const { SandboxManager } = await import('@anthropic-ai/sandbox-runtime');
|
|
64
|
+
|
|
65
|
+
if (!SandboxManager.isSupportedPlatform()) {
|
|
66
|
+
return { available: false, reason: 'Platform not supported by sandbox-runtime' };
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
const deps = SandboxManager.checkDependencies();
|
|
70
|
+
if (deps.errors.length > 0) {
|
|
71
|
+
return {
|
|
72
|
+
available: false,
|
|
73
|
+
reason: `Missing dependencies: ${deps.errors.join(', ')}`,
|
|
74
|
+
};
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
await SandboxManager.initialize({
|
|
78
|
+
network: {
|
|
79
|
+
allowedDomains: [], // Block ALL network access
|
|
80
|
+
deniedDomains: ['*'],
|
|
81
|
+
},
|
|
82
|
+
filesystem: {
|
|
83
|
+
denyRead: [
|
|
84
|
+
'/home/*/.ssh',
|
|
85
|
+
'/home/*/.aws',
|
|
86
|
+
'/home/*/.gnupg',
|
|
87
|
+
'/etc/shadow',
|
|
88
|
+
'/etc/passwd',
|
|
89
|
+
],
|
|
90
|
+
allowWrite: [this.tempDir], // Only allow writes to our temp dir
|
|
91
|
+
denyWrite: [
|
|
92
|
+
'/',
|
|
93
|
+
'/home',
|
|
94
|
+
'/etc',
|
|
95
|
+
'/usr',
|
|
96
|
+
'/var',
|
|
97
|
+
],
|
|
98
|
+
},
|
|
99
|
+
});
|
|
100
|
+
|
|
101
|
+
this.sandboxManager = SandboxManager;
|
|
102
|
+
this.sandboxAvailable = true;
|
|
103
|
+
return { available: true };
|
|
104
|
+
} catch (error: unknown) {
|
|
105
|
+
const msg = error instanceof Error ? error.message : String(error);
|
|
106
|
+
return { available: false, reason: `Failed to initialize sandbox: ${msg}` };
|
|
107
|
+
}
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
/**
|
|
111
|
+
* Execute a command inside the sandbox. Returns containment results.
|
|
112
|
+
* If sandbox is not available, validates the bouncer decision only (no actual execution).
|
|
113
|
+
*/
|
|
114
|
+
async executeInSandbox(command: string): Promise<SandboxExecResult> {
|
|
115
|
+
if (!this.sandboxAvailable || !this.sandboxManager) {
|
|
116
|
+
return {
|
|
117
|
+
wrappedCommand: command,
|
|
118
|
+
sandboxAvailable: false,
|
|
119
|
+
contained: true,
|
|
120
|
+
violations: ['Sandbox not available — decision-only testing mode'],
|
|
121
|
+
};
|
|
122
|
+
}
|
|
123
|
+
|
|
124
|
+
const violations: string[] = [];
|
|
125
|
+
try {
|
|
126
|
+
const wrappedCommand = await this.sandboxManager.wrapWithSandbox(command);
|
|
127
|
+
|
|
128
|
+
// Execute the wrapped command and capture violations
|
|
129
|
+
try {
|
|
130
|
+
execSync(wrappedCommand, {
|
|
131
|
+
timeout: 5000,
|
|
132
|
+
stdio: 'pipe',
|
|
133
|
+
cwd: this.tempDir,
|
|
134
|
+
});
|
|
135
|
+
} catch {
|
|
136
|
+
// Command failure inside sandbox is expected for malicious ops
|
|
137
|
+
}
|
|
138
|
+
|
|
139
|
+
// Check violation store
|
|
140
|
+
const stderr = this.sandboxManager.annotateStderrWithSandboxFailures(command, '');
|
|
141
|
+
if (stderr) {
|
|
142
|
+
violations.push(stderr);
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
this.sandboxManager.cleanupAfterCommand();
|
|
146
|
+
|
|
147
|
+
return {
|
|
148
|
+
wrappedCommand,
|
|
149
|
+
sandboxAvailable: true,
|
|
150
|
+
contained: violations.length === 0,
|
|
151
|
+
violations,
|
|
152
|
+
};
|
|
153
|
+
} catch (error: unknown) {
|
|
154
|
+
const msg = error instanceof Error ? error.message : String(error);
|
|
155
|
+
violations.push(`Sandbox execution error: ${msg}`);
|
|
156
|
+
return {
|
|
157
|
+
wrappedCommand: command,
|
|
158
|
+
sandboxAvailable: true,
|
|
159
|
+
contained: true, // Error means the command didn't execute
|
|
160
|
+
violations,
|
|
161
|
+
};
|
|
162
|
+
}
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
/**
|
|
166
|
+
* Place a canary file and return a checker to verify containment.
|
|
167
|
+
* If a sandboxed command can delete or modify the canary, containment failed.
|
|
168
|
+
*/
|
|
169
|
+
placeCanary(name: string): { path: string; check: () => CanaryCheckResult } {
|
|
170
|
+
const canaryPath = join(this.canaryDir, name);
|
|
171
|
+
const escapePath = join(this.canaryDir, `${name}.escaped`);
|
|
172
|
+
writeFileSync(canaryPath, `canary-${Date.now()}`, 'utf-8');
|
|
173
|
+
|
|
174
|
+
return {
|
|
175
|
+
path: canaryPath,
|
|
176
|
+
check: () => ({
|
|
177
|
+
canaryIntact: existsSync(canaryPath),
|
|
178
|
+
escapeDetected: existsSync(escapePath),
|
|
179
|
+
}),
|
|
180
|
+
};
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
/**
|
|
184
|
+
* Get the temp directory where sandboxed commands can write.
|
|
185
|
+
*/
|
|
186
|
+
getSandboxWriteDir(): string {
|
|
187
|
+
return this.tempDir;
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
/**
|
|
191
|
+
* Whether the sandbox is actually available and initialized.
|
|
192
|
+
*/
|
|
193
|
+
isAvailable(): boolean {
|
|
194
|
+
return this.sandboxAvailable;
|
|
195
|
+
}
|
|
196
|
+
|
|
197
|
+
/**
|
|
198
|
+
* Clean up temp dirs and reset sandbox state.
|
|
199
|
+
*/
|
|
200
|
+
async cleanup(): Promise<void> {
|
|
201
|
+
try {
|
|
202
|
+
if (this.sandboxManager) {
|
|
203
|
+
await this.sandboxManager.reset();
|
|
204
|
+
}
|
|
205
|
+
} catch {
|
|
206
|
+
// Ignore cleanup errors
|
|
207
|
+
}
|
|
208
|
+
try {
|
|
209
|
+
rmSync(this.tempDir, { recursive: true, force: true });
|
|
210
|
+
} catch {
|
|
211
|
+
// Ignore cleanup errors
|
|
212
|
+
}
|
|
213
|
+
}
|
|
214
|
+
}
|