@vibecheckai/cli 3.4.0 → 3.5.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bin/registry.js +154 -338
- package/bin/runners/context/generators/mcp.js +13 -15
- package/bin/runners/context/proof-context.js +1 -248
- package/bin/runners/lib/analysis-core.js +180 -198
- package/bin/runners/lib/analyzers.js +223 -1669
- package/bin/runners/lib/cli-output.js +210 -242
- package/bin/runners/lib/detectors-v2.js +785 -547
- package/bin/runners/lib/entitlements-v2.js +458 -96
- package/bin/runners/lib/error-handler.js +9 -16
- package/bin/runners/lib/global-flags.js +0 -37
- package/bin/runners/lib/route-truth.js +322 -1167
- package/bin/runners/lib/scan-output.js +469 -448
- package/bin/runners/lib/ship-output.js +27 -280
- package/bin/runners/lib/terminal-ui.js +733 -231
- package/bin/runners/lib/truth.js +321 -1004
- package/bin/runners/lib/unified-output.js +158 -162
- package/bin/runners/lib/upsell.js +204 -104
- package/bin/runners/runAllowlist.js +324 -0
- package/bin/runners/runAuth.js +95 -324
- package/bin/runners/runCheckpoint.js +21 -39
- package/bin/runners/runContext.js +24 -136
- package/bin/runners/runDoctor.js +67 -115
- package/bin/runners/runEvidencePack.js +219 -0
- package/bin/runners/runFix.js +5 -6
- package/bin/runners/runGuard.js +118 -212
- package/bin/runners/runInit.js +2 -14
- package/bin/runners/runInstall.js +281 -0
- package/bin/runners/runLabs.js +341 -0
- package/bin/runners/runMcp.js +52 -130
- package/bin/runners/runPolish.js +20 -43
- package/bin/runners/runProve.js +3 -13
- package/bin/runners/runReality.js +0 -14
- package/bin/runners/runReport.js +2 -3
- package/bin/runners/runScan.js +44 -511
- package/bin/runners/runShip.js +14 -28
- package/bin/runners/runValidate.js +2 -19
- package/bin/runners/runWatch.js +54 -118
- package/bin/vibecheck.js +41 -148
- package/mcp-server/ARCHITECTURE.md +339 -0
- package/mcp-server/__tests__/cache.test.ts +313 -0
- package/mcp-server/__tests__/executor.test.ts +239 -0
- package/mcp-server/__tests__/fixtures/exclusion-test/.cache/webpack/cache.pack +1 -0
- package/mcp-server/__tests__/fixtures/exclusion-test/.next/server/chunk.js +3 -0
- package/mcp-server/__tests__/fixtures/exclusion-test/.turbo/cache.json +3 -0
- package/mcp-server/__tests__/fixtures/exclusion-test/.venv/lib/env.py +3 -0
- package/mcp-server/__tests__/fixtures/exclusion-test/dist/bundle.js +3 -0
- package/mcp-server/__tests__/fixtures/exclusion-test/package.json +5 -0
- package/mcp-server/__tests__/fixtures/exclusion-test/src/app.ts +5 -0
- package/mcp-server/__tests__/fixtures/exclusion-test/venv/lib/config.py +4 -0
- package/mcp-server/__tests__/ids.test.ts +345 -0
- package/mcp-server/__tests__/integration/tools.test.ts +410 -0
- package/mcp-server/__tests__/registry.test.ts +365 -0
- package/mcp-server/__tests__/sandbox.test.ts +323 -0
- package/mcp-server/__tests__/schemas.test.ts +372 -0
- package/mcp-server/benchmarks/run-benchmarks.ts +304 -0
- package/mcp-server/examples/doctor.request.json +14 -0
- package/mcp-server/examples/doctor.response.json +53 -0
- package/mcp-server/examples/error.response.json +15 -0
- package/mcp-server/examples/scan.request.json +14 -0
- package/mcp-server/examples/scan.response.json +108 -0
- package/mcp-server/handlers/tool-handler.ts +671 -0
- package/mcp-server/index-v3.ts +293 -0
- package/mcp-server/index.js +1072 -1573
- package/mcp-server/index.old.js +4137 -0
- package/mcp-server/lib/cache.ts +341 -0
- package/mcp-server/lib/errors.ts +346 -0
- package/mcp-server/lib/executor.ts +792 -0
- package/mcp-server/lib/ids.ts +238 -0
- package/mcp-server/lib/logger.ts +368 -0
- package/mcp-server/lib/metrics.ts +365 -0
- package/mcp-server/lib/sandbox.ts +337 -0
- package/mcp-server/lib/validator.ts +229 -0
- package/mcp-server/package-lock.json +165 -0
- package/mcp-server/package.json +32 -7
- package/mcp-server/premium-tools.js +2 -2
- package/mcp-server/registry/tools.json +476 -0
- package/mcp-server/schemas/error-envelope.schema.json +125 -0
- package/mcp-server/schemas/finding.schema.json +167 -0
- package/mcp-server/schemas/report-artifact.schema.json +88 -0
- package/mcp-server/schemas/run-request.schema.json +75 -0
- package/mcp-server/schemas/verdict.schema.json +168 -0
- package/mcp-server/tier-auth.d.ts +71 -0
- package/mcp-server/tier-auth.js +371 -183
- package/mcp-server/truth-context.js +90 -131
- package/mcp-server/truth-firewall-tools.js +1000 -1611
- package/mcp-server/tsconfig.json +34 -0
- package/mcp-server/vibecheck-tools.js +2 -2
- package/mcp-server/vitest.config.ts +16 -0
- package/package.json +3 -4
- package/bin/runners/lib/agent-firewall/ai/false-positive-analyzer.js +0 -474
- package/bin/runners/lib/agent-firewall/change-packet/builder.js +0 -488
- package/bin/runners/lib/agent-firewall/change-packet/schema.json +0 -228
- package/bin/runners/lib/agent-firewall/change-packet/store.js +0 -200
- package/bin/runners/lib/agent-firewall/claims/claim-types.js +0 -21
- package/bin/runners/lib/agent-firewall/claims/extractor.js +0 -303
- package/bin/runners/lib/agent-firewall/claims/patterns.js +0 -24
- package/bin/runners/lib/agent-firewall/critic/index.js +0 -151
- package/bin/runners/lib/agent-firewall/critic/judge.js +0 -432
- package/bin/runners/lib/agent-firewall/critic/prompts.js +0 -305
- package/bin/runners/lib/agent-firewall/evidence/auth-evidence.js +0 -88
- package/bin/runners/lib/agent-firewall/evidence/contract-evidence.js +0 -75
- package/bin/runners/lib/agent-firewall/evidence/env-evidence.js +0 -127
- package/bin/runners/lib/agent-firewall/evidence/resolver.js +0 -102
- package/bin/runners/lib/agent-firewall/evidence/route-evidence.js +0 -213
- package/bin/runners/lib/agent-firewall/evidence/side-effect-evidence.js +0 -145
- package/bin/runners/lib/agent-firewall/fs-hook/daemon.js +0 -19
- package/bin/runners/lib/agent-firewall/fs-hook/installer.js +0 -87
- package/bin/runners/lib/agent-firewall/fs-hook/watcher.js +0 -184
- package/bin/runners/lib/agent-firewall/git-hook/pre-commit.js +0 -163
- package/bin/runners/lib/agent-firewall/ide-extension/cursor.js +0 -107
- package/bin/runners/lib/agent-firewall/ide-extension/vscode.js +0 -68
- package/bin/runners/lib/agent-firewall/ide-extension/windsurf.js +0 -66
- package/bin/runners/lib/agent-firewall/interceptor/base.js +0 -304
- package/bin/runners/lib/agent-firewall/interceptor/cursor.js +0 -35
- package/bin/runners/lib/agent-firewall/interceptor/vscode.js +0 -35
- package/bin/runners/lib/agent-firewall/interceptor/windsurf.js +0 -34
- package/bin/runners/lib/agent-firewall/lawbook/distributor.js +0 -465
- package/bin/runners/lib/agent-firewall/lawbook/evaluator.js +0 -604
- package/bin/runners/lib/agent-firewall/lawbook/index.js +0 -304
- package/bin/runners/lib/agent-firewall/lawbook/registry.js +0 -514
- package/bin/runners/lib/agent-firewall/lawbook/schema.js +0 -420
- package/bin/runners/lib/agent-firewall/logger.js +0 -141
- package/bin/runners/lib/agent-firewall/policy/default-policy.json +0 -90
- package/bin/runners/lib/agent-firewall/policy/engine.js +0 -103
- package/bin/runners/lib/agent-firewall/policy/loader.js +0 -451
- package/bin/runners/lib/agent-firewall/policy/rules/auth-drift.js +0 -50
- package/bin/runners/lib/agent-firewall/policy/rules/contract-drift.js +0 -50
- package/bin/runners/lib/agent-firewall/policy/rules/fake-success.js +0 -86
- package/bin/runners/lib/agent-firewall/policy/rules/ghost-env.js +0 -162
- package/bin/runners/lib/agent-firewall/policy/rules/ghost-route.js +0 -189
- package/bin/runners/lib/agent-firewall/policy/rules/scope.js +0 -93
- package/bin/runners/lib/agent-firewall/policy/rules/unsafe-side-effect.js +0 -57
- package/bin/runners/lib/agent-firewall/policy/schema.json +0 -183
- package/bin/runners/lib/agent-firewall/policy/verdict.js +0 -54
- package/bin/runners/lib/agent-firewall/proposal/extractor.js +0 -394
- package/bin/runners/lib/agent-firewall/proposal/index.js +0 -212
- package/bin/runners/lib/agent-firewall/proposal/schema.js +0 -251
- package/bin/runners/lib/agent-firewall/proposal/validator.js +0 -386
- package/bin/runners/lib/agent-firewall/reality/index.js +0 -332
- package/bin/runners/lib/agent-firewall/reality/state.js +0 -625
- package/bin/runners/lib/agent-firewall/reality/watcher.js +0 -322
- package/bin/runners/lib/agent-firewall/risk/index.js +0 -173
- package/bin/runners/lib/agent-firewall/risk/scorer.js +0 -328
- package/bin/runners/lib/agent-firewall/risk/thresholds.js +0 -321
- package/bin/runners/lib/agent-firewall/risk/vectors.js +0 -421
- package/bin/runners/lib/agent-firewall/simulator/diff-simulator.js +0 -472
- package/bin/runners/lib/agent-firewall/simulator/import-resolver.js +0 -346
- package/bin/runners/lib/agent-firewall/simulator/index.js +0 -181
- package/bin/runners/lib/agent-firewall/simulator/route-validator.js +0 -380
- package/bin/runners/lib/agent-firewall/time-machine/incident-correlator.js +0 -661
- package/bin/runners/lib/agent-firewall/time-machine/index.js +0 -267
- package/bin/runners/lib/agent-firewall/time-machine/replay-engine.js +0 -436
- package/bin/runners/lib/agent-firewall/time-machine/state-reconstructor.js +0 -490
- package/bin/runners/lib/agent-firewall/time-machine/timeline-builder.js +0 -530
- package/bin/runners/lib/agent-firewall/truthpack/index.js +0 -67
- package/bin/runners/lib/agent-firewall/truthpack/loader.js +0 -137
- package/bin/runners/lib/agent-firewall/unblock/planner.js +0 -337
- package/bin/runners/lib/agent-firewall/utils/ignore-checker.js +0 -118
- package/bin/runners/lib/api-client.js +0 -269
- package/bin/runners/lib/authority-badge.js +0 -425
- package/bin/runners/lib/engines/accessibility-engine.js +0 -190
- package/bin/runners/lib/engines/api-consistency-engine.js +0 -162
- package/bin/runners/lib/engines/ast-cache.js +0 -99
- package/bin/runners/lib/engines/code-quality-engine.js +0 -255
- package/bin/runners/lib/engines/console-logs-engine.js +0 -115
- package/bin/runners/lib/engines/cross-file-analysis-engine.js +0 -268
- package/bin/runners/lib/engines/dead-code-engine.js +0 -198
- package/bin/runners/lib/engines/deprecated-api-engine.js +0 -226
- package/bin/runners/lib/engines/empty-catch-engine.js +0 -150
- package/bin/runners/lib/engines/file-filter.js +0 -131
- package/bin/runners/lib/engines/hardcoded-secrets-engine.js +0 -251
- package/bin/runners/lib/engines/mock-data-engine.js +0 -272
- package/bin/runners/lib/engines/parallel-processor.js +0 -71
- package/bin/runners/lib/engines/performance-issues-engine.js +0 -265
- package/bin/runners/lib/engines/security-vulnerabilities-engine.js +0 -243
- package/bin/runners/lib/engines/todo-fixme-engine.js +0 -115
- package/bin/runners/lib/engines/type-aware-engine.js +0 -152
- package/bin/runners/lib/engines/unsafe-regex-engine.js +0 -225
- package/bin/runners/lib/engines/vibecheck-engines/README.md +0 -53
- package/bin/runners/lib/engines/vibecheck-engines/index.js +0 -15
- package/bin/runners/lib/engines/vibecheck-engines/lib/ast-cache.js +0 -164
- package/bin/runners/lib/engines/vibecheck-engines/lib/code-quality-engine.js +0 -291
- package/bin/runners/lib/engines/vibecheck-engines/lib/console-logs-engine.js +0 -83
- package/bin/runners/lib/engines/vibecheck-engines/lib/dead-code-engine.js +0 -198
- package/bin/runners/lib/engines/vibecheck-engines/lib/deprecated-api-engine.js +0 -275
- package/bin/runners/lib/engines/vibecheck-engines/lib/empty-catch-engine.js +0 -167
- package/bin/runners/lib/engines/vibecheck-engines/lib/file-filter.js +0 -217
- package/bin/runners/lib/engines/vibecheck-engines/lib/hardcoded-secrets-engine.js +0 -139
- package/bin/runners/lib/engines/vibecheck-engines/lib/mock-data-engine.js +0 -140
- package/bin/runners/lib/engines/vibecheck-engines/lib/parallel-processor.js +0 -164
- package/bin/runners/lib/engines/vibecheck-engines/lib/performance-issues-engine.js +0 -234
- package/bin/runners/lib/engines/vibecheck-engines/lib/type-aware-engine.js +0 -217
- package/bin/runners/lib/engines/vibecheck-engines/lib/unsafe-regex-engine.js +0 -78
- package/bin/runners/lib/engines/vibecheck-engines/package.json +0 -13
- package/bin/runners/lib/exit-codes.js +0 -275
- package/bin/runners/lib/fingerprint.js +0 -377
- package/bin/runners/lib/help-formatter.js +0 -413
- package/bin/runners/lib/logger.js +0 -38
- package/bin/runners/lib/ship-output-enterprise.js +0 -239
- package/bin/runners/lib/unified-cli-output.js +0 -604
- package/bin/runners/runAgent.d.ts +0 -5
- package/bin/runners/runAgent.js +0 -161
- package/bin/runners/runApprove.js +0 -1200
- package/bin/runners/runClassify.js +0 -859
- package/bin/runners/runContext.d.ts +0 -4
- package/bin/runners/runFirewall.d.ts +0 -5
- package/bin/runners/runFirewall.js +0 -134
- package/bin/runners/runFirewallHook.d.ts +0 -5
- package/bin/runners/runFirewallHook.js +0 -56
- package/bin/runners/runPolish.d.ts +0 -4
- package/bin/runners/runProof.zip +0 -0
- package/bin/runners/runTruth.d.ts +0 -5
- package/bin/runners/runTruth.js +0 -101
- package/mcp-server/HARDENING_SUMMARY.md +0 -299
- package/mcp-server/agent-firewall-interceptor.js +0 -500
- package/mcp-server/authority-tools.js +0 -569
- package/mcp-server/conductor/conflict-resolver.js +0 -588
- package/mcp-server/conductor/execution-planner.js +0 -544
- package/mcp-server/conductor/index.js +0 -377
- package/mcp-server/conductor/lock-manager.js +0 -615
- package/mcp-server/conductor/request-queue.js +0 -550
- package/mcp-server/conductor/session-manager.js +0 -500
- package/mcp-server/conductor/tools.js +0 -510
- package/mcp-server/lib/api-client.cjs +0 -13
- package/mcp-server/lib/logger.cjs +0 -30
- package/mcp-server/logger.js +0 -173
- package/mcp-server/tools-v3.js +0 -706
- package/mcp-server/vibecheck-mcp-server-3.2.0.tgz +0 -0
|
@@ -0,0 +1,372 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Schema Validation Tests
|
|
3
|
+
*
|
|
4
|
+
* Ensures all canonical schemas are valid and work correctly.
|
|
5
|
+
*/
|
|
6
|
+
|
|
7
|
+
import { describe, it, expect, beforeAll } from 'vitest';
|
|
8
|
+
import {
|
|
9
|
+
validateRunRequest,
|
|
10
|
+
validateFinding,
|
|
11
|
+
validateVerdict,
|
|
12
|
+
validateErrorEnvelope,
|
|
13
|
+
validateToolInput,
|
|
14
|
+
normalizeFindingId,
|
|
15
|
+
validateProjectPath,
|
|
16
|
+
ValidationError,
|
|
17
|
+
} from '../lib/validator.js';
|
|
18
|
+
|
|
19
|
+
describe('Schema Validation', () => {
|
|
20
|
+
describe('RunRequest Schema', () => {
|
|
21
|
+
it('should validate a valid run request', () => {
|
|
22
|
+
const request = {
|
|
23
|
+
tool: 'vibecheck.scan',
|
|
24
|
+
projectPath: '/home/user/project',
|
|
25
|
+
timeout: 60000,
|
|
26
|
+
cache: { mode: 'auto', maxAge: 300 },
|
|
27
|
+
options: { profile: 'quick' },
|
|
28
|
+
};
|
|
29
|
+
|
|
30
|
+
const result = validateRunRequest(request);
|
|
31
|
+
expect(result.valid).toBe(true);
|
|
32
|
+
expect(result.errors).toBeUndefined();
|
|
33
|
+
});
|
|
34
|
+
|
|
35
|
+
it('should reject invalid tool name format', () => {
|
|
36
|
+
const request = {
|
|
37
|
+
tool: 'invalid-tool', // Should be vibecheck.xxx
|
|
38
|
+
projectPath: '/home/user/project',
|
|
39
|
+
};
|
|
40
|
+
|
|
41
|
+
const result = validateRunRequest(request);
|
|
42
|
+
expect(result.valid).toBe(false);
|
|
43
|
+
expect(result.errors).toBeDefined();
|
|
44
|
+
expect(result.errors?.some((e: ValidationError) => e.path.includes('tool'))).toBe(true);
|
|
45
|
+
});
|
|
46
|
+
|
|
47
|
+
it('should require projectPath', () => {
|
|
48
|
+
const request = {
|
|
49
|
+
tool: 'vibecheck.scan',
|
|
50
|
+
};
|
|
51
|
+
|
|
52
|
+
const result = validateRunRequest(request);
|
|
53
|
+
expect(result.valid).toBe(false);
|
|
54
|
+
});
|
|
55
|
+
|
|
56
|
+
it('should validate timeout bounds', () => {
|
|
57
|
+
const tooShort = {
|
|
58
|
+
tool: 'vibecheck.scan',
|
|
59
|
+
projectPath: '/home/user/project',
|
|
60
|
+
timeout: 500, // Below minimum (1000)
|
|
61
|
+
};
|
|
62
|
+
|
|
63
|
+
const tooLong = {
|
|
64
|
+
tool: 'vibecheck.scan',
|
|
65
|
+
projectPath: '/home/user/project',
|
|
66
|
+
timeout: 700000, // Above maximum (600000)
|
|
67
|
+
};
|
|
68
|
+
|
|
69
|
+
expect(validateRunRequest(tooShort).valid).toBe(false);
|
|
70
|
+
expect(validateRunRequest(tooLong).valid).toBe(false);
|
|
71
|
+
});
|
|
72
|
+
|
|
73
|
+
it('should validate cache mode enum', () => {
|
|
74
|
+
const valid = {
|
|
75
|
+
tool: 'vibecheck.scan',
|
|
76
|
+
projectPath: '/home/user/project',
|
|
77
|
+
cache: { mode: 'force' },
|
|
78
|
+
};
|
|
79
|
+
|
|
80
|
+
const invalid = {
|
|
81
|
+
tool: 'vibecheck.scan',
|
|
82
|
+
projectPath: '/home/user/project',
|
|
83
|
+
cache: { mode: 'invalid' },
|
|
84
|
+
};
|
|
85
|
+
|
|
86
|
+
expect(validateRunRequest(valid).valid).toBe(true);
|
|
87
|
+
expect(validateRunRequest(invalid).valid).toBe(false);
|
|
88
|
+
});
|
|
89
|
+
});
|
|
90
|
+
|
|
91
|
+
describe('Finding Schema', () => {
|
|
92
|
+
it('should validate a complete finding', () => {
|
|
93
|
+
const finding = {
|
|
94
|
+
id: 'auth_gap-a1b2c3d4',
|
|
95
|
+
category: 'auth_gap',
|
|
96
|
+
severity: 'BLOCK',
|
|
97
|
+
title: 'Unprotected API endpoint',
|
|
98
|
+
description: 'The /api/admin endpoint has no authentication',
|
|
99
|
+
evidence: [
|
|
100
|
+
{
|
|
101
|
+
file: 'src/routes/admin.ts',
|
|
102
|
+
line: 42,
|
|
103
|
+
snippet: 'router.get("/admin", handler)',
|
|
104
|
+
reason: 'No auth middleware',
|
|
105
|
+
},
|
|
106
|
+
],
|
|
107
|
+
fixHints: ['Add withAuth middleware'],
|
|
108
|
+
confidence: 0.95,
|
|
109
|
+
source: 'static',
|
|
110
|
+
};
|
|
111
|
+
|
|
112
|
+
const result = validateFinding(finding);
|
|
113
|
+
expect(result.valid).toBe(true);
|
|
114
|
+
});
|
|
115
|
+
|
|
116
|
+
it('should require evidence array with at least one item', () => {
|
|
117
|
+
const finding = {
|
|
118
|
+
id: 'auth_gap-a1b2c3d4',
|
|
119
|
+
category: 'auth_gap',
|
|
120
|
+
severity: 'BLOCK',
|
|
121
|
+
title: 'Missing auth',
|
|
122
|
+
evidence: [], // Empty array
|
|
123
|
+
};
|
|
124
|
+
|
|
125
|
+
const result = validateFinding(finding);
|
|
126
|
+
expect(result.valid).toBe(false);
|
|
127
|
+
});
|
|
128
|
+
|
|
129
|
+
it('should validate severity enum', () => {
|
|
130
|
+
const valid = {
|
|
131
|
+
id: 'test-12345678',
|
|
132
|
+
category: 'secrets',
|
|
133
|
+
severity: 'WARN',
|
|
134
|
+
title: 'Test',
|
|
135
|
+
evidence: [{ file: 'test.ts' }],
|
|
136
|
+
};
|
|
137
|
+
|
|
138
|
+
const invalid = {
|
|
139
|
+
...valid,
|
|
140
|
+
severity: 'CRITICAL', // Not in enum
|
|
141
|
+
};
|
|
142
|
+
|
|
143
|
+
expect(validateFinding(valid).valid).toBe(true);
|
|
144
|
+
expect(validateFinding(invalid).valid).toBe(false);
|
|
145
|
+
});
|
|
146
|
+
|
|
147
|
+
it('should validate category enum', () => {
|
|
148
|
+
const validCategories = ['secrets', 'auth_gap', 'billing_bypass', 'dead_ui', 'fake_success'];
|
|
149
|
+
|
|
150
|
+
for (const category of validCategories) {
|
|
151
|
+
const finding = {
|
|
152
|
+
id: `${category}-12345678`,
|
|
153
|
+
category,
|
|
154
|
+
severity: 'WARN',
|
|
155
|
+
title: 'Test',
|
|
156
|
+
evidence: [{ file: 'test.ts' }],
|
|
157
|
+
};
|
|
158
|
+
expect(validateFinding(finding).valid).toBe(true);
|
|
159
|
+
}
|
|
160
|
+
});
|
|
161
|
+
|
|
162
|
+
it('should validate finding ID format', () => {
|
|
163
|
+
const validIds = ['auth_gap-a1b2c3d4', 'secrets-12345678', 'dead_ui-abcdef12'];
|
|
164
|
+
const invalidIds = ['invalid', 'AUTH_GAP-123', 'auth_gap_123'];
|
|
165
|
+
|
|
166
|
+
for (const id of validIds) {
|
|
167
|
+
const finding = {
|
|
168
|
+
id,
|
|
169
|
+
category: 'secrets',
|
|
170
|
+
severity: 'WARN',
|
|
171
|
+
title: 'Test',
|
|
172
|
+
evidence: [{ file: 'test.ts' }],
|
|
173
|
+
};
|
|
174
|
+
expect(validateFinding(finding).valid).toBe(true);
|
|
175
|
+
}
|
|
176
|
+
|
|
177
|
+
for (const id of invalidIds) {
|
|
178
|
+
const finding = {
|
|
179
|
+
id,
|
|
180
|
+
category: 'secrets',
|
|
181
|
+
severity: 'WARN',
|
|
182
|
+
title: 'Test',
|
|
183
|
+
evidence: [{ file: 'test.ts' }],
|
|
184
|
+
};
|
|
185
|
+
expect(validateFinding(finding).valid).toBe(false);
|
|
186
|
+
}
|
|
187
|
+
});
|
|
188
|
+
});
|
|
189
|
+
|
|
190
|
+
describe('Verdict Schema', () => {
|
|
191
|
+
it('should validate a complete verdict', () => {
|
|
192
|
+
const verdict = {
|
|
193
|
+
verdict: 'SHIP',
|
|
194
|
+
score: 95,
|
|
195
|
+
grade: 'A',
|
|
196
|
+
summary: {
|
|
197
|
+
block: 0,
|
|
198
|
+
warn: 2,
|
|
199
|
+
info: 5,
|
|
200
|
+
total: 7,
|
|
201
|
+
},
|
|
202
|
+
findings: [],
|
|
203
|
+
meta: {
|
|
204
|
+
version: '3.0.0',
|
|
205
|
+
runId: 'run_abc123',
|
|
206
|
+
timestamp: '2024-01-15T10:30:00Z',
|
|
207
|
+
durationMs: 5000,
|
|
208
|
+
},
|
|
209
|
+
};
|
|
210
|
+
|
|
211
|
+
const result = validateVerdict(verdict);
|
|
212
|
+
expect(result.valid).toBe(true);
|
|
213
|
+
});
|
|
214
|
+
|
|
215
|
+
it('should validate verdict enum', () => {
|
|
216
|
+
const base = {
|
|
217
|
+
score: 50,
|
|
218
|
+
summary: { block: 0, warn: 0, info: 0, total: 0 },
|
|
219
|
+
findings: [],
|
|
220
|
+
meta: {
|
|
221
|
+
version: '3.0.0',
|
|
222
|
+
runId: 'run_123',
|
|
223
|
+
timestamp: '2024-01-15T10:30:00Z',
|
|
224
|
+
durationMs: 1000,
|
|
225
|
+
},
|
|
226
|
+
};
|
|
227
|
+
|
|
228
|
+
expect(validateVerdict({ ...base, verdict: 'SHIP' }).valid).toBe(true);
|
|
229
|
+
expect(validateVerdict({ ...base, verdict: 'WARN' }).valid).toBe(true);
|
|
230
|
+
expect(validateVerdict({ ...base, verdict: 'BLOCK' }).valid).toBe(true);
|
|
231
|
+
expect(validateVerdict({ ...base, verdict: 'MAYBE' }).valid).toBe(false);
|
|
232
|
+
});
|
|
233
|
+
|
|
234
|
+
it('should validate score bounds', () => {
|
|
235
|
+
const base = {
|
|
236
|
+
verdict: 'SHIP',
|
|
237
|
+
summary: { block: 0, warn: 0, info: 0, total: 0 },
|
|
238
|
+
findings: [],
|
|
239
|
+
meta: {
|
|
240
|
+
version: '3.0.0',
|
|
241
|
+
runId: 'run_123',
|
|
242
|
+
timestamp: '2024-01-15T10:30:00Z',
|
|
243
|
+
durationMs: 1000,
|
|
244
|
+
},
|
|
245
|
+
};
|
|
246
|
+
|
|
247
|
+
expect(validateVerdict({ ...base, score: 0 }).valid).toBe(true);
|
|
248
|
+
expect(validateVerdict({ ...base, score: 100 }).valid).toBe(true);
|
|
249
|
+
expect(validateVerdict({ ...base, score: -1 }).valid).toBe(false);
|
|
250
|
+
expect(validateVerdict({ ...base, score: 101 }).valid).toBe(false);
|
|
251
|
+
});
|
|
252
|
+
});
|
|
253
|
+
|
|
254
|
+
describe('ErrorEnvelope Schema', () => {
|
|
255
|
+
it('should validate a complete error envelope', () => {
|
|
256
|
+
const envelope = {
|
|
257
|
+
ok: false,
|
|
258
|
+
error: {
|
|
259
|
+
code: 'TIMEOUT',
|
|
260
|
+
message: 'Operation timed out after 60000ms',
|
|
261
|
+
retryable: true,
|
|
262
|
+
retryAfterMs: 5000,
|
|
263
|
+
userAction: 'Try again with a smaller scope',
|
|
264
|
+
},
|
|
265
|
+
requestId: 'req_abc123',
|
|
266
|
+
timestamp: '2024-01-15T10:30:00Z',
|
|
267
|
+
};
|
|
268
|
+
|
|
269
|
+
const result = validateErrorEnvelope(envelope);
|
|
270
|
+
expect(result.valid).toBe(true);
|
|
271
|
+
});
|
|
272
|
+
|
|
273
|
+
it('should require ok to be false', () => {
|
|
274
|
+
const envelope = {
|
|
275
|
+
ok: true, // Should be false
|
|
276
|
+
error: {
|
|
277
|
+
code: 'TIMEOUT',
|
|
278
|
+
message: 'Test',
|
|
279
|
+
},
|
|
280
|
+
requestId: 'req_123',
|
|
281
|
+
timestamp: '2024-01-15T10:30:00Z',
|
|
282
|
+
};
|
|
283
|
+
|
|
284
|
+
const result = validateErrorEnvelope(envelope);
|
|
285
|
+
expect(result.valid).toBe(false);
|
|
286
|
+
});
|
|
287
|
+
|
|
288
|
+
it('should validate error code enum', () => {
|
|
289
|
+
const validCodes = ['INVALID_INPUT', 'TIMEOUT', 'PATH_NOT_FOUND', 'TIER_REQUIRED'];
|
|
290
|
+
|
|
291
|
+
for (const code of validCodes) {
|
|
292
|
+
const envelope = {
|
|
293
|
+
ok: false,
|
|
294
|
+
error: { code, message: 'Test' },
|
|
295
|
+
requestId: 'req_123',
|
|
296
|
+
timestamp: '2024-01-15T10:30:00Z',
|
|
297
|
+
};
|
|
298
|
+
expect(validateErrorEnvelope(envelope).valid).toBe(true);
|
|
299
|
+
}
|
|
300
|
+
});
|
|
301
|
+
});
|
|
302
|
+
|
|
303
|
+
describe('Tool Input Validation', () => {
|
|
304
|
+
it('should validate vibecheck.scan input', () => {
|
|
305
|
+
const valid = { profile: 'quick' };
|
|
306
|
+
const invalid = { profile: 'invalid_profile' };
|
|
307
|
+
|
|
308
|
+
expect(validateToolInput('vibecheck.scan', valid).valid).toBe(true);
|
|
309
|
+
expect(validateToolInput('vibecheck.scan', invalid).valid).toBe(false);
|
|
310
|
+
});
|
|
311
|
+
|
|
312
|
+
it('should validate vibecheck.reality required fields', () => {
|
|
313
|
+
const valid = { url: 'http://localhost:3000' };
|
|
314
|
+
const invalid = {}; // Missing required url
|
|
315
|
+
|
|
316
|
+
expect(validateToolInput('vibecheck.reality', valid).valid).toBe(true);
|
|
317
|
+
expect(validateToolInput('vibecheck.reality', invalid).valid).toBe(false);
|
|
318
|
+
});
|
|
319
|
+
|
|
320
|
+
it('should return error for unknown tool', () => {
|
|
321
|
+
const result = validateToolInput('vibecheck.unknown_tool', {});
|
|
322
|
+
expect(result.valid).toBe(false);
|
|
323
|
+
expect(result.errors?.some((e: ValidationError) => e.message.includes('Unknown tool'))).toBe(true);
|
|
324
|
+
});
|
|
325
|
+
});
|
|
326
|
+
|
|
327
|
+
describe('Finding ID Normalization', () => {
|
|
328
|
+
it('should create deterministic IDs', () => {
|
|
329
|
+
const evidence = { file: 'src/api.ts', line: 42 };
|
|
330
|
+
|
|
331
|
+
const id1 = normalizeFindingId('auth_gap', evidence);
|
|
332
|
+
const id2 = normalizeFindingId('auth_gap', evidence);
|
|
333
|
+
|
|
334
|
+
expect(id1).toBe(id2);
|
|
335
|
+
expect(id1).toMatch(/^auth_gap-[a-f0-9]{8}$/);
|
|
336
|
+
});
|
|
337
|
+
|
|
338
|
+
it('should produce different IDs for different evidence', () => {
|
|
339
|
+
const id1 = normalizeFindingId('auth_gap', { file: 'a.ts', line: 1 });
|
|
340
|
+
const id2 = normalizeFindingId('auth_gap', { file: 'b.ts', line: 1 });
|
|
341
|
+
const id3 = normalizeFindingId('auth_gap', { file: 'a.ts', line: 2 });
|
|
342
|
+
|
|
343
|
+
expect(id1).not.toBe(id2);
|
|
344
|
+
expect(id1).not.toBe(id3);
|
|
345
|
+
expect(id2).not.toBe(id3);
|
|
346
|
+
});
|
|
347
|
+
});
|
|
348
|
+
|
|
349
|
+
describe('Project Path Validation', () => {
|
|
350
|
+
it('should accept valid paths within workspace', () => {
|
|
351
|
+
const result = validateProjectPath('/home/user/project', '/home/user');
|
|
352
|
+
expect(result.valid).toBe(true);
|
|
353
|
+
});
|
|
354
|
+
|
|
355
|
+
it('should reject path traversal', () => {
|
|
356
|
+
const result = validateProjectPath('/home/user/../etc/passwd', '/home/user');
|
|
357
|
+
expect(result.valid).toBe(false);
|
|
358
|
+
expect(result.error).toContain('traversal');
|
|
359
|
+
});
|
|
360
|
+
|
|
361
|
+
it('should reject paths outside workspace', () => {
|
|
362
|
+
const result = validateProjectPath('/etc/passwd', '/home/user');
|
|
363
|
+
expect(result.valid).toBe(false);
|
|
364
|
+
});
|
|
365
|
+
|
|
366
|
+
it('should normalize paths', () => {
|
|
367
|
+
const result = validateProjectPath('project//src///file.ts');
|
|
368
|
+
expect(result.valid).toBe(true);
|
|
369
|
+
expect(result.normalized).toBe('project/src/file.ts');
|
|
370
|
+
});
|
|
371
|
+
});
|
|
372
|
+
});
|
|
@@ -0,0 +1,304 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* MCP Server Benchmarks
|
|
3
|
+
*
|
|
4
|
+
* Measures performance for core tools.
|
|
5
|
+
* Shows cache benefit and correctness.
|
|
6
|
+
*
|
|
7
|
+
* Usage:
|
|
8
|
+
* npx tsx benchmarks/run-benchmarks.ts [projectPath]
|
|
9
|
+
*
|
|
10
|
+
* TARGETS:
|
|
11
|
+
* - Warm runs 3-10x faster than cold
|
|
12
|
+
* - doctor warm <500ms (realistic <1.5s on Windows)
|
|
13
|
+
* - scan warm <3000ms
|
|
14
|
+
* - ship warm <2000ms
|
|
15
|
+
* - status warm <100ms
|
|
16
|
+
*/
|
|
17
|
+
|
|
18
|
+
import { performance } from 'perf_hooks';
|
|
19
|
+
import { existsSync, mkdirSync, writeFileSync, rmSync } from 'fs';
|
|
20
|
+
import { join } from 'path';
|
|
21
|
+
import { tmpdir } from 'os';
|
|
22
|
+
import { handleTool, ToolRequest } from '../handlers/tool-handler.js';
|
|
23
|
+
import { getGlobalCache, initGlobalCache } from '../lib/cache.js';
|
|
24
|
+
import { initLogger, LogLevel } from '../lib/logger.js';
|
|
25
|
+
import { getMetricsCollector, initMetricsCollector } from '../lib/metrics.js';
|
|
26
|
+
|
|
27
|
+
interface BenchmarkResult {
|
|
28
|
+
tool: string;
|
|
29
|
+
coldRun: number;
|
|
30
|
+
warmRun: number;
|
|
31
|
+
cacheHit: boolean;
|
|
32
|
+
speedup: number;
|
|
33
|
+
correct: boolean;
|
|
34
|
+
error?: string;
|
|
35
|
+
}
|
|
36
|
+
|
|
37
|
+
interface BenchmarkTargets {
|
|
38
|
+
warmTarget: number;
|
|
39
|
+
coldTarget: number;
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
const TARGETS: Record<string, BenchmarkTargets> = {
|
|
43
|
+
doctor: { warmTarget: 500, coldTarget: 3000 },
|
|
44
|
+
scan: { warmTarget: 3000, coldTarget: 15000 },
|
|
45
|
+
ship: { warmTarget: 2000, coldTarget: 10000 },
|
|
46
|
+
status: { warmTarget: 100, coldTarget: 1000 },
|
|
47
|
+
};
|
|
48
|
+
|
|
49
|
+
/**
|
|
50
|
+
* Create test fixture if needed
|
|
51
|
+
*/
|
|
52
|
+
function createTestFixture(): string {
|
|
53
|
+
const fixtureDir = join(tmpdir(), `vibecheck-bench-${Date.now()}`);
|
|
54
|
+
mkdirSync(fixtureDir, { recursive: true });
|
|
55
|
+
|
|
56
|
+
// Create minimal project structure
|
|
57
|
+
writeFileSync(join(fixtureDir, 'package.json'), JSON.stringify({
|
|
58
|
+
name: 'benchmark-fixture',
|
|
59
|
+
version: '1.0.0',
|
|
60
|
+
dependencies: { express: '^4.18.0' },
|
|
61
|
+
}));
|
|
62
|
+
|
|
63
|
+
mkdirSync(join(fixtureDir, 'src'));
|
|
64
|
+
writeFileSync(join(fixtureDir, 'src', 'index.ts'), `
|
|
65
|
+
import express from 'express';
|
|
66
|
+
const app = express();
|
|
67
|
+
app.get('/api/test', (req, res) => res.json({ ok: true }));
|
|
68
|
+
export default app;
|
|
69
|
+
`);
|
|
70
|
+
|
|
71
|
+
mkdirSync(join(fixtureDir, '.vibecheck'), { recursive: true });
|
|
72
|
+
|
|
73
|
+
return fixtureDir;
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
/**
|
|
77
|
+
* Run a single benchmark
|
|
78
|
+
*/
|
|
79
|
+
async function runBenchmark(
|
|
80
|
+
tool: string,
|
|
81
|
+
projectPath: string,
|
|
82
|
+
options?: Record<string, unknown>
|
|
83
|
+
): Promise<BenchmarkResult> {
|
|
84
|
+
const cache = getGlobalCache();
|
|
85
|
+
|
|
86
|
+
// Clear cache for cold run
|
|
87
|
+
cache.clear();
|
|
88
|
+
|
|
89
|
+
// Cold run
|
|
90
|
+
const coldStart = performance.now();
|
|
91
|
+
let coldResult: Awaited<ReturnType<typeof handleTool>>;
|
|
92
|
+
try {
|
|
93
|
+
coldResult = await handleTool({
|
|
94
|
+
tool: `vibecheck.${tool}`,
|
|
95
|
+
projectPath,
|
|
96
|
+
options,
|
|
97
|
+
cache: { mode: 'auto' },
|
|
98
|
+
});
|
|
99
|
+
} catch (e) {
|
|
100
|
+
return {
|
|
101
|
+
tool,
|
|
102
|
+
coldRun: -1,
|
|
103
|
+
warmRun: -1,
|
|
104
|
+
cacheHit: false,
|
|
105
|
+
speedup: 0,
|
|
106
|
+
correct: false,
|
|
107
|
+
error: (e as Error).message,
|
|
108
|
+
};
|
|
109
|
+
}
|
|
110
|
+
const coldTime = performance.now() - coldStart;
|
|
111
|
+
|
|
112
|
+
// Verify cold run correctness
|
|
113
|
+
const coldCorrect = coldResult.ok ||
|
|
114
|
+
(coldResult as { error?: { code: string } }).error?.code !== 'INTERNAL_ERROR';
|
|
115
|
+
|
|
116
|
+
// Warm run (should hit cache)
|
|
117
|
+
const warmStart = performance.now();
|
|
118
|
+
const warmResult = await handleTool({
|
|
119
|
+
tool: `vibecheck.${tool}`,
|
|
120
|
+
projectPath,
|
|
121
|
+
options,
|
|
122
|
+
cache: { mode: 'auto' },
|
|
123
|
+
});
|
|
124
|
+
const warmTime = performance.now() - warmStart;
|
|
125
|
+
|
|
126
|
+
const cacheHit = warmResult.ok &&
|
|
127
|
+
(warmResult as { meta?: { cached?: boolean } }).meta?.cached === true;
|
|
128
|
+
const speedup = coldTime / warmTime;
|
|
129
|
+
|
|
130
|
+
return {
|
|
131
|
+
tool,
|
|
132
|
+
coldRun: Math.round(coldTime),
|
|
133
|
+
warmRun: Math.round(warmTime),
|
|
134
|
+
cacheHit,
|
|
135
|
+
speedup: Math.round(speedup * 10) / 10,
|
|
136
|
+
correct: coldCorrect,
|
|
137
|
+
};
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
/**
|
|
141
|
+
* Print results table
|
|
142
|
+
*/
|
|
143
|
+
function printResults(benchmarks: BenchmarkResult[]): void {
|
|
144
|
+
console.log('');
|
|
145
|
+
console.log('╔══════════════════════════════════════════════════════════════════════════╗');
|
|
146
|
+
console.log('║ RESULTS ║');
|
|
147
|
+
console.log('╠═══════════════════════╦═════════╦═════════╦═════════╦═════════╦══════════╣');
|
|
148
|
+
console.log('║ Tool ║ Cold ║ Warm ║ Speedup ║ Cache ║ Correct ║');
|
|
149
|
+
console.log('╠═══════════════════════╬═════════╬═════════╬═════════╬═════════╬══════════╣');
|
|
150
|
+
|
|
151
|
+
for (const result of benchmarks) {
|
|
152
|
+
const tool = result.tool.padEnd(19);
|
|
153
|
+
const cold = result.coldRun >= 0 ? `${result.coldRun}ms`.padStart(7) : 'ERROR ';
|
|
154
|
+
const warm = result.warmRun >= 0 ? `${result.warmRun}ms`.padStart(7) : 'ERROR ';
|
|
155
|
+
const speedup = result.speedup > 0 ? `${result.speedup}x`.padStart(7) : 'N/A ';
|
|
156
|
+
const cache = result.cacheHit ? ' ✓ ' : ' ✗ ';
|
|
157
|
+
const correct = result.correct ? ' ✓ ' : ' ✗ ';
|
|
158
|
+
|
|
159
|
+
console.log(`║ ${tool} ║ ${cold} ║ ${warm} ║ ${speedup} ║${cache}║${correct}║`);
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
console.log('╚═══════════════════════╩═════════╩═════════╩═════════╩═════════╩══════════╝');
|
|
163
|
+
}
|
|
164
|
+
|
|
165
|
+
/**
|
|
166
|
+
* Print target checks
|
|
167
|
+
*/
|
|
168
|
+
function printTargetChecks(benchmarks: BenchmarkResult[]): void {
|
|
169
|
+
console.log('');
|
|
170
|
+
console.log('Target Checks:');
|
|
171
|
+
|
|
172
|
+
let allPassed = true;
|
|
173
|
+
|
|
174
|
+
for (const result of benchmarks) {
|
|
175
|
+
const targets = TARGETS[result.tool];
|
|
176
|
+
if (!targets) continue;
|
|
177
|
+
|
|
178
|
+
const warmPass = result.warmRun >= 0 && result.warmRun <= targets.warmTarget;
|
|
179
|
+
const coldPass = result.coldRun >= 0 && result.coldRun <= targets.coldTarget;
|
|
180
|
+
|
|
181
|
+
console.log(` ${result.tool}:`);
|
|
182
|
+
console.log(` warm <${targets.warmTarget}ms: ${warmPass ? '✓ PASS' : '✗ FAIL'} (${result.warmRun}ms)`);
|
|
183
|
+
console.log(` cold <${targets.coldTarget}ms: ${coldPass ? '✓ PASS' : '✗ FAIL'} (${result.coldRun}ms)`);
|
|
184
|
+
|
|
185
|
+
if (!warmPass || !coldPass) allPassed = false;
|
|
186
|
+
}
|
|
187
|
+
|
|
188
|
+
// Speedup check
|
|
189
|
+
const avgSpeedup = benchmarks.filter(b => b.speedup > 0).reduce((sum, b) => sum + b.speedup, 0) /
|
|
190
|
+
benchmarks.filter(b => b.speedup > 0).length || 0;
|
|
191
|
+
const speedupPass = avgSpeedup >= 3;
|
|
192
|
+
|
|
193
|
+
console.log(` Average speedup >=3x: ${speedupPass ? '✓ PASS' : '✗ FAIL'} (${avgSpeedup.toFixed(1)}x)`);
|
|
194
|
+
|
|
195
|
+
if (!speedupPass) allPassed = false;
|
|
196
|
+
|
|
197
|
+
return;
|
|
198
|
+
}
|
|
199
|
+
|
|
200
|
+
/**
|
|
201
|
+
* Main benchmark runner
|
|
202
|
+
*/
|
|
203
|
+
async function main() {
|
|
204
|
+
// Initialize logging (quiet for benchmarks)
|
|
205
|
+
initLogger({
|
|
206
|
+
level: LogLevel.ERROR,
|
|
207
|
+
enableConsole: false,
|
|
208
|
+
enableFile: false
|
|
209
|
+
});
|
|
210
|
+
|
|
211
|
+
// Initialize cache and metrics
|
|
212
|
+
initGlobalCache({ maxMemoryEntries: 100, defaultTtl: 300 });
|
|
213
|
+
initMetricsCollector({ maxEntries: 1000 });
|
|
214
|
+
|
|
215
|
+
// Get or create project path
|
|
216
|
+
let projectPath = process.argv[2];
|
|
217
|
+
let createdFixture = false;
|
|
218
|
+
|
|
219
|
+
if (!projectPath || !existsSync(projectPath)) {
|
|
220
|
+
console.log('No valid project path provided. Creating test fixture...');
|
|
221
|
+
projectPath = createTestFixture();
|
|
222
|
+
createdFixture = true;
|
|
223
|
+
}
|
|
224
|
+
|
|
225
|
+
console.log('╔══════════════════════════════════════════════════════════════╗');
|
|
226
|
+
console.log('║ Vibecheck MCP Server Benchmarks ║');
|
|
227
|
+
console.log('╠══════════════════════════════════════════════════════════════╣');
|
|
228
|
+
console.log(`║ Project: ${projectPath.slice(0, 50).padEnd(50)} ║`);
|
|
229
|
+
console.log('╚══════════════════════════════════════════════════════════════╝');
|
|
230
|
+
console.log('');
|
|
231
|
+
|
|
232
|
+
const benchmarks: BenchmarkResult[] = [];
|
|
233
|
+
const toolsToTest = ['doctor', 'scan', 'ship', 'status'];
|
|
234
|
+
|
|
235
|
+
for (const tool of toolsToTest) {
|
|
236
|
+
console.log(`Running vibecheck.${tool}...`);
|
|
237
|
+
try {
|
|
238
|
+
const options = tool === 'scan' ? { profile: 'quick' } : undefined;
|
|
239
|
+
const result = await runBenchmark(tool, projectPath, options);
|
|
240
|
+
benchmarks.push(result);
|
|
241
|
+
|
|
242
|
+
if (result.error) {
|
|
243
|
+
console.log(` Warning: ${result.error}`);
|
|
244
|
+
}
|
|
245
|
+
} catch (e) {
|
|
246
|
+
console.log(` Error: ${e}`);
|
|
247
|
+
benchmarks.push({
|
|
248
|
+
tool,
|
|
249
|
+
coldRun: -1,
|
|
250
|
+
warmRun: -1,
|
|
251
|
+
cacheHit: false,
|
|
252
|
+
speedup: 0,
|
|
253
|
+
correct: false,
|
|
254
|
+
error: (e as Error).message,
|
|
255
|
+
});
|
|
256
|
+
}
|
|
257
|
+
}
|
|
258
|
+
|
|
259
|
+
// Print results
|
|
260
|
+
printResults(benchmarks);
|
|
261
|
+
|
|
262
|
+
// Summary
|
|
263
|
+
console.log('');
|
|
264
|
+
console.log('Summary:');
|
|
265
|
+
|
|
266
|
+
const successful = benchmarks.filter(b => b.correct);
|
|
267
|
+
const withCache = benchmarks.filter(b => b.cacheHit);
|
|
268
|
+
const avgSpeedup = successful.filter(b => b.speedup > 0).reduce((sum, b) => sum + b.speedup, 0) /
|
|
269
|
+
successful.filter(b => b.speedup > 0).length || 0;
|
|
270
|
+
|
|
271
|
+
console.log(` Tools tested: ${benchmarks.length}`);
|
|
272
|
+
console.log(` Successful: ${successful.length}/${benchmarks.length}`);
|
|
273
|
+
console.log(` Cache hits: ${withCache.length}/${benchmarks.length}`);
|
|
274
|
+
console.log(` Avg speedup: ${avgSpeedup.toFixed(1)}x`);
|
|
275
|
+
|
|
276
|
+
// Target checks
|
|
277
|
+
printTargetChecks(benchmarks);
|
|
278
|
+
|
|
279
|
+
// Metrics summary
|
|
280
|
+
const metrics = getMetricsCollector();
|
|
281
|
+
const summary = metrics.getSummary();
|
|
282
|
+
console.log('');
|
|
283
|
+
console.log('Metrics:');
|
|
284
|
+
console.log(` Cache hit rate: ${(summary.cacheHitRate * 100).toFixed(1)}%`);
|
|
285
|
+
|
|
286
|
+
// Cleanup fixture if we created it
|
|
287
|
+
if (createdFixture) {
|
|
288
|
+
console.log('');
|
|
289
|
+
console.log('Cleaning up test fixture...');
|
|
290
|
+
rmSync(projectPath, { recursive: true, force: true });
|
|
291
|
+
}
|
|
292
|
+
|
|
293
|
+
// Exit code based on results
|
|
294
|
+
const allPassed = successful.length === benchmarks.length &&
|
|
295
|
+
withCache.length >= Math.floor(benchmarks.length / 2) &&
|
|
296
|
+
avgSpeedup >= 2;
|
|
297
|
+
|
|
298
|
+
process.exit(allPassed ? 0 : 1);
|
|
299
|
+
}
|
|
300
|
+
|
|
301
|
+
main().catch(err => {
|
|
302
|
+
console.error('Benchmark failed:', err);
|
|
303
|
+
process.exit(1);
|
|
304
|
+
});
|