@bryan-thompson/inspector-assessment 1.26.7 → 1.27.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/cli/build/__tests__/assess-full-e2e.test.js +496 -0
- package/cli/build/__tests__/testbed-integration.test.js +5 -1
- package/cli/build/assess-full.js +3 -1
- package/cli/build/lib/cli-parser.js +83 -2
- package/cli/package.json +1 -1
- package/client/dist/assets/{OAuthCallback-kF1MLuwg.js → OAuthCallback-CJWH8Ytw.js} +1 -1
- package/client/dist/assets/{OAuthDebugCallback-Nv-8u0GR.js → OAuthDebugCallback-DL5adXJw.js} +1 -1
- package/client/dist/assets/{index-CCiX5wkF.js → index-Cu9XzUwB.js} +4 -4
- package/client/dist/index.html +1 -1
- package/client/lib/services/assessment/modules/ErrorHandlingAssessor.d.ts +25 -0
- package/client/lib/services/assessment/modules/ErrorHandlingAssessor.d.ts.map +1 -1
- package/client/lib/services/assessment/modules/ErrorHandlingAssessor.js +119 -5
- package/client/package.json +1 -1
- package/package.json +1 -1
- package/server/package.json +1 -1
|
@@ -0,0 +1,496 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* CLI E2E Integration Tests (Issue #97)
|
|
3
|
+
*
|
|
4
|
+
* End-to-end tests that verify the mcp-assess-full CLI works correctly
|
|
5
|
+
* as a black-box system, including:
|
|
6
|
+
* - Command-line argument handling (--help, --version, --config, etc.)
|
|
7
|
+
* - JSONL event stream output format
|
|
8
|
+
* - Exit codes (0 for PASS, 1 for FAIL/error)
|
|
9
|
+
* - Graceful error handling
|
|
10
|
+
*
|
|
11
|
+
* Tests that require testbed servers (vulnerable-mcp, hardened-mcp) skip
|
|
12
|
+
* gracefully when servers are unavailable, allowing CI to pass without
|
|
13
|
+
* external dependencies.
|
|
14
|
+
*
|
|
15
|
+
* @see https://github.com/triepod-ai/inspector-assessment/issues/97
|
|
16
|
+
*/
|
|
17
|
+
import { describe, it, expect, beforeAll, afterAll } from "@jest/globals";
|
|
18
|
+
import { spawn } from "child_process";
|
|
19
|
+
import * as fs from "fs";
|
|
20
|
+
import * as path from "path";
|
|
21
|
+
import * as os from "os";
|
|
22
|
+
import { fileURLToPath } from "url";
|
|
23
|
+
// ============================================================================
|
|
24
|
+
// Constants
|
|
25
|
+
// ============================================================================
|
|
26
|
+
/** Get __dirname equivalent for ES modules */
|
|
27
|
+
const __filename = fileURLToPath(import.meta.url);
|
|
28
|
+
const __dirname = path.dirname(__filename);
|
|
29
|
+
/** Path to the built CLI entry point */
|
|
30
|
+
const CLI_PATH = path.resolve(__dirname, "../../build/assess-full.js");
|
|
31
|
+
/** Testbed server URLs */
|
|
32
|
+
const VULNERABLE_URL = "http://localhost:10900/mcp";
|
|
33
|
+
const HARDENED_URL = "http://localhost:10901/mcp";
|
|
34
|
+
/** Default headers for MCP HTTP servers */
|
|
35
|
+
const DEFAULT_HEADERS = {
|
|
36
|
+
"Content-Type": "application/json",
|
|
37
|
+
Accept: "application/json, text/event-stream",
|
|
38
|
+
};
|
|
39
|
+
/** Temp directory for test config files */
|
|
40
|
+
const TEMP_DIR = path.join(os.tmpdir(), "assess-full-e2e-tests");
|
|
41
|
+
// ============================================================================
|
|
42
|
+
// Helper Functions
|
|
43
|
+
// ============================================================================
|
|
44
|
+
/**
|
|
45
|
+
* Spawn the CLI process and capture output
|
|
46
|
+
*
|
|
47
|
+
* @param args - CLI arguments
|
|
48
|
+
* @param timeout - Timeout in milliseconds (default: 60000)
|
|
49
|
+
* @returns CLI result with stdout, stderr, exit code, and parsed JSONL events
|
|
50
|
+
*/
|
|
51
|
+
async function spawnCLI(args, timeout = 60000) {
|
|
52
|
+
return new Promise((resolve) => {
|
|
53
|
+
const startTime = Date.now();
|
|
54
|
+
let stdout = "";
|
|
55
|
+
let stderr = "";
|
|
56
|
+
let exitCode = null;
|
|
57
|
+
let proc = null;
|
|
58
|
+
// Spawn the CLI process
|
|
59
|
+
proc = spawn("node", [CLI_PATH, ...args], {
|
|
60
|
+
stdio: ["pipe", "pipe", "pipe"],
|
|
61
|
+
env: {
|
|
62
|
+
...process.env,
|
|
63
|
+
// Ensure consistent output
|
|
64
|
+
NO_COLOR: "1",
|
|
65
|
+
FORCE_COLOR: "0",
|
|
66
|
+
},
|
|
67
|
+
});
|
|
68
|
+
// Capture stdout
|
|
69
|
+
proc.stdout?.on("data", (data) => {
|
|
70
|
+
stdout += data.toString();
|
|
71
|
+
});
|
|
72
|
+
// Capture stderr
|
|
73
|
+
proc.stderr?.on("data", (data) => {
|
|
74
|
+
stderr += data.toString();
|
|
75
|
+
});
|
|
76
|
+
// Set timeout
|
|
77
|
+
const timer = setTimeout(() => {
|
|
78
|
+
if (proc && !proc.killed) {
|
|
79
|
+
proc.kill("SIGTERM");
|
|
80
|
+
exitCode = -1; // Indicate timeout
|
|
81
|
+
}
|
|
82
|
+
}, timeout);
|
|
83
|
+
// Handle process exit
|
|
84
|
+
proc.on("close", (code) => {
|
|
85
|
+
clearTimeout(timer);
|
|
86
|
+
// Don't overwrite timeout exit code (-1)
|
|
87
|
+
if (exitCode !== -1) {
|
|
88
|
+
exitCode = code;
|
|
89
|
+
}
|
|
90
|
+
const duration = Date.now() - startTime;
|
|
91
|
+
const jsonlEvents = parseJSONLEvents(stderr);
|
|
92
|
+
resolve({
|
|
93
|
+
stdout,
|
|
94
|
+
stderr,
|
|
95
|
+
exitCode,
|
|
96
|
+
jsonlEvents,
|
|
97
|
+
duration,
|
|
98
|
+
});
|
|
99
|
+
});
|
|
100
|
+
// Handle errors
|
|
101
|
+
proc.on("error", (err) => {
|
|
102
|
+
clearTimeout(timer);
|
|
103
|
+
stderr += `\nProcess error: ${err.message}`;
|
|
104
|
+
resolve({
|
|
105
|
+
stdout,
|
|
106
|
+
stderr,
|
|
107
|
+
exitCode: -1,
|
|
108
|
+
jsonlEvents: [],
|
|
109
|
+
duration: Date.now() - startTime,
|
|
110
|
+
});
|
|
111
|
+
});
|
|
112
|
+
});
|
|
113
|
+
}
|
|
114
|
+
/**
|
|
115
|
+
* Parse JSONL events from stderr output
|
|
116
|
+
*
|
|
117
|
+
* JSONL events are emitted one per line to stderr.
|
|
118
|
+
* Non-JSON lines are ignored (they may be console warnings or errors).
|
|
119
|
+
*
|
|
120
|
+
* @param stderr - Raw stderr output
|
|
121
|
+
* @returns Array of parsed JSONL events
|
|
122
|
+
*/
|
|
123
|
+
function parseJSONLEvents(stderr) {
|
|
124
|
+
const events = [];
|
|
125
|
+
const lines = stderr.split("\n");
|
|
126
|
+
for (const line of lines) {
|
|
127
|
+
const trimmed = line.trim();
|
|
128
|
+
if (!trimmed)
|
|
129
|
+
continue;
|
|
130
|
+
try {
|
|
131
|
+
const parsed = JSON.parse(trimmed);
|
|
132
|
+
// Check if it looks like a JSONL event (has 'event' field)
|
|
133
|
+
if (parsed && typeof parsed === "object" && "event" in parsed) {
|
|
134
|
+
events.push(parsed);
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
catch {
|
|
138
|
+
// Not a JSON line, skip
|
|
139
|
+
}
|
|
140
|
+
}
|
|
141
|
+
return events;
|
|
142
|
+
}
|
|
143
|
+
/**
|
|
144
|
+
* Check if a server is available by sending an initialize request
|
|
145
|
+
*
|
|
146
|
+
* Note: MCP servers use Server-Sent Events (SSE) which keeps connections open.
|
|
147
|
+
* We need to check if the server responds with any data rather than waiting
|
|
148
|
+
* for the connection to close.
|
|
149
|
+
*
|
|
150
|
+
* @param url - Server URL to check
|
|
151
|
+
* @returns True if server responds, false otherwise
|
|
152
|
+
*/
|
|
153
|
+
async function checkServerAvailable(url) {
|
|
154
|
+
try {
|
|
155
|
+
const controller = new AbortController();
|
|
156
|
+
// Give enough time to receive initial response but not wait forever
|
|
157
|
+
const timeoutId = setTimeout(() => controller.abort(), 5000);
|
|
158
|
+
const response = await fetch(url, {
|
|
159
|
+
method: "POST",
|
|
160
|
+
headers: DEFAULT_HEADERS,
|
|
161
|
+
body: JSON.stringify({
|
|
162
|
+
jsonrpc: "2.0",
|
|
163
|
+
method: "initialize",
|
|
164
|
+
params: {
|
|
165
|
+
protocolVersion: "2024-11-05",
|
|
166
|
+
capabilities: {},
|
|
167
|
+
clientInfo: { name: "e2e-test", version: "1.0.0" },
|
|
168
|
+
},
|
|
169
|
+
id: 1,
|
|
170
|
+
}),
|
|
171
|
+
signal: controller.signal,
|
|
172
|
+
});
|
|
173
|
+
// Server responded with a status code - check if it's OK
|
|
174
|
+
if (response.status >= 500) {
|
|
175
|
+
clearTimeout(timeoutId);
|
|
176
|
+
return false;
|
|
177
|
+
}
|
|
178
|
+
// For SSE responses, check if we can read any data
|
|
179
|
+
// This confirms the server is actually responding
|
|
180
|
+
const reader = response.body?.getReader();
|
|
181
|
+
if (!reader) {
|
|
182
|
+
clearTimeout(timeoutId);
|
|
183
|
+
return response.status < 500;
|
|
184
|
+
}
|
|
185
|
+
try {
|
|
186
|
+
// Try to read the first chunk
|
|
187
|
+
const { done, value } = await reader.read();
|
|
188
|
+
clearTimeout(timeoutId);
|
|
189
|
+
reader.cancel(); // Cancel the stream - we don't need more data
|
|
190
|
+
// If we got any data, the server is available
|
|
191
|
+
return !done && value && value.length > 0;
|
|
192
|
+
}
|
|
193
|
+
catch {
|
|
194
|
+
clearTimeout(timeoutId);
|
|
195
|
+
// If read fails after successful fetch, server still responded
|
|
196
|
+
return true;
|
|
197
|
+
}
|
|
198
|
+
}
|
|
199
|
+
catch {
|
|
200
|
+
return false;
|
|
201
|
+
}
|
|
202
|
+
}
|
|
203
|
+
/**
|
|
204
|
+
* Create a temporary config file for testing
|
|
205
|
+
*
|
|
206
|
+
* @param config - Configuration object
|
|
207
|
+
* @param filename - Optional filename (default: auto-generated)
|
|
208
|
+
* @returns Path to the created config file
|
|
209
|
+
*/
|
|
210
|
+
function createTempConfig(config, filename) {
|
|
211
|
+
const name = filename || `config-${Date.now()}.json`;
|
|
212
|
+
const configPath = path.join(TEMP_DIR, name);
|
|
213
|
+
// Defensive: ensure directory exists (handles race conditions with beforeAll)
|
|
214
|
+
if (!fs.existsSync(TEMP_DIR)) {
|
|
215
|
+
fs.mkdirSync(TEMP_DIR, { recursive: true });
|
|
216
|
+
}
|
|
217
|
+
fs.writeFileSync(configPath, JSON.stringify(config, null, 2));
|
|
218
|
+
return configPath;
|
|
219
|
+
}
|
|
220
|
+
/**
|
|
221
|
+
* Create an invalid (malformed) JSON config file
|
|
222
|
+
*
|
|
223
|
+
* @param content - Raw content to write
|
|
224
|
+
* @param filename - Optional filename
|
|
225
|
+
* @returns Path to the created file
|
|
226
|
+
*/
|
|
227
|
+
function createInvalidConfig(content, filename) {
|
|
228
|
+
const name = filename || `invalid-${Date.now()}.json`;
|
|
229
|
+
const configPath = path.join(TEMP_DIR, name);
|
|
230
|
+
// Defensive: ensure directory exists (handles race conditions with beforeAll)
|
|
231
|
+
if (!fs.existsSync(TEMP_DIR)) {
|
|
232
|
+
fs.mkdirSync(TEMP_DIR, { recursive: true });
|
|
233
|
+
}
|
|
234
|
+
fs.writeFileSync(configPath, content);
|
|
235
|
+
return configPath;
|
|
236
|
+
}
|
|
237
|
+
// ============================================================================
|
|
238
|
+
// Test Setup
|
|
239
|
+
// ============================================================================
|
|
240
|
+
describe("CLI E2E Integration Tests", () => {
|
|
241
|
+
let vulnerableAvailable = false;
|
|
242
|
+
let hardenedAvailable = false;
|
|
243
|
+
beforeAll(async () => {
|
|
244
|
+
// Create temp directory
|
|
245
|
+
if (!fs.existsSync(TEMP_DIR)) {
|
|
246
|
+
fs.mkdirSync(TEMP_DIR, { recursive: true });
|
|
247
|
+
}
|
|
248
|
+
// Check server availability for integration tests
|
|
249
|
+
const [v, h] = await Promise.all([
|
|
250
|
+
checkServerAvailable(VULNERABLE_URL),
|
|
251
|
+
checkServerAvailable(HARDENED_URL),
|
|
252
|
+
]);
|
|
253
|
+
vulnerableAvailable = v;
|
|
254
|
+
hardenedAvailable = h;
|
|
255
|
+
if (!v && !h) {
|
|
256
|
+
console.log("\n⚠️ Testbed servers unavailable - integration tests will skip gracefully");
|
|
257
|
+
console.log(" To run full tests, start:");
|
|
258
|
+
console.log(" - vulnerable-mcp: http://localhost:10900/mcp");
|
|
259
|
+
console.log(" - hardened-mcp: http://localhost:10901/mcp\n");
|
|
260
|
+
}
|
|
261
|
+
}, 30000); // 30 second timeout for server availability checks
|
|
262
|
+
afterAll(() => {
|
|
263
|
+
// Clean up temp directory
|
|
264
|
+
if (fs.existsSync(TEMP_DIR)) {
|
|
265
|
+
const files = fs.readdirSync(TEMP_DIR);
|
|
266
|
+
for (const file of files) {
|
|
267
|
+
fs.unlinkSync(path.join(TEMP_DIR, file));
|
|
268
|
+
}
|
|
269
|
+
fs.rmdirSync(TEMP_DIR);
|
|
270
|
+
}
|
|
271
|
+
});
|
|
272
|
+
// ==========================================================================
|
|
273
|
+
// Group 1: Help and Version Display (No Server Required)
|
|
274
|
+
// ==========================================================================
|
|
275
|
+
describe("Help and Version Display", () => {
|
|
276
|
+
it("should display help with --help flag", async () => {
|
|
277
|
+
const result = await spawnCLI(["--help"], 10000);
|
|
278
|
+
expect(result.exitCode).toBe(0);
|
|
279
|
+
expect(result.stdout).toContain("Usage: mcp-assess-full");
|
|
280
|
+
expect(result.stdout).toContain("--server");
|
|
281
|
+
expect(result.stdout).toContain("--config");
|
|
282
|
+
expect(result.stdout).toContain("--profile");
|
|
283
|
+
});
|
|
284
|
+
it("should display help with -h flag", async () => {
|
|
285
|
+
const result = await spawnCLI(["-h"], 10000);
|
|
286
|
+
expect(result.exitCode).toBe(0);
|
|
287
|
+
expect(result.stdout).toContain("Usage: mcp-assess-full");
|
|
288
|
+
});
|
|
289
|
+
it("should display version with --version flag", async () => {
|
|
290
|
+
const result = await spawnCLI(["--version"], 10000);
|
|
291
|
+
expect(result.exitCode).toBe(0);
|
|
292
|
+
// Version should match semver pattern (e.g., 1.26.7)
|
|
293
|
+
expect(result.stdout).toMatch(/mcp-assess-full \d+\.\d+\.\d+/);
|
|
294
|
+
});
|
|
295
|
+
it("should display version with -V flag", async () => {
|
|
296
|
+
const result = await spawnCLI(["-V"], 10000);
|
|
297
|
+
expect(result.exitCode).toBe(0);
|
|
298
|
+
expect(result.stdout).toMatch(/\d+\.\d+\.\d+/);
|
|
299
|
+
});
|
|
300
|
+
});
|
|
301
|
+
// ==========================================================================
|
|
302
|
+
// Group 2: Configuration Validation (No Server Required)
|
|
303
|
+
// ==========================================================================
|
|
304
|
+
describe("Configuration Validation", () => {
|
|
305
|
+
it("should fail gracefully when config file is missing", async () => {
|
|
306
|
+
const result = await spawnCLI([
|
|
307
|
+
"--server",
|
|
308
|
+
"test-server",
|
|
309
|
+
"--config",
|
|
310
|
+
"/nonexistent/path/config.json",
|
|
311
|
+
], 10000);
|
|
312
|
+
expect(result.exitCode).toBe(1);
|
|
313
|
+
// Error message should mention the issue
|
|
314
|
+
expect(result.stderr.toLowerCase()).toMatch(/error|not found|enoent/i);
|
|
315
|
+
});
|
|
316
|
+
it("should fail gracefully for malformed JSON config", async () => {
|
|
317
|
+
const configPath = createInvalidConfig("{ invalid json }");
|
|
318
|
+
const result = await spawnCLI(["--server", "test-server", "--config", configPath], 10000);
|
|
319
|
+
expect(result.exitCode).toBe(1);
|
|
320
|
+
expect(result.stderr.toLowerCase()).toMatch(/error|parse|json|syntax/i);
|
|
321
|
+
});
|
|
322
|
+
it("should fail gracefully with missing --server flag", async () => {
|
|
323
|
+
const configPath = createTempConfig({
|
|
324
|
+
transport: "http",
|
|
325
|
+
url: "http://localhost:9999/mcp",
|
|
326
|
+
});
|
|
327
|
+
const result = await spawnCLI(["--config", configPath], 10000);
|
|
328
|
+
expect(result.exitCode).toBe(1);
|
|
329
|
+
expect(result.stderr).toContain("--server is required");
|
|
330
|
+
});
|
|
331
|
+
});
|
|
332
|
+
// ==========================================================================
|
|
333
|
+
// Group 3: Profile Selection (No Server Required)
|
|
334
|
+
// ==========================================================================
|
|
335
|
+
describe("Profile Selection", () => {
|
|
336
|
+
it("should list available profiles in help text", async () => {
|
|
337
|
+
const result = await spawnCLI(["--help"], 10000);
|
|
338
|
+
expect(result.stdout).toContain("quick");
|
|
339
|
+
expect(result.stdout).toContain("security");
|
|
340
|
+
expect(result.stdout).toContain("compliance");
|
|
341
|
+
expect(result.stdout).toContain("full");
|
|
342
|
+
});
|
|
343
|
+
it("should reject invalid profile names", async () => {
|
|
344
|
+
const result = await spawnCLI(["--server", "test", "--profile", "invalid-profile-name"], 10000);
|
|
345
|
+
expect(result.exitCode).toBe(1);
|
|
346
|
+
expect(result.stderr).toMatch(/invalid profile/i);
|
|
347
|
+
});
|
|
348
|
+
});
|
|
349
|
+
// ==========================================================================
|
|
350
|
+
// Group 4: Error Handling (No Server Required)
|
|
351
|
+
// ==========================================================================
|
|
352
|
+
describe("Error Handling", () => {
|
|
353
|
+
it("should fail gracefully when server is unreachable", async () => {
|
|
354
|
+
const configPath = createTempConfig({
|
|
355
|
+
transport: "http",
|
|
356
|
+
url: "http://localhost:19999/mcp", // Non-existent port
|
|
357
|
+
});
|
|
358
|
+
const result = await spawnCLI(["--server", "unreachable", "--config", configPath], 30000);
|
|
359
|
+
expect(result.exitCode).toBe(1);
|
|
360
|
+
// Should have some error indication
|
|
361
|
+
expect(result.stderr.toLowerCase()).toMatch(/error|connect|fail|econnrefused/i);
|
|
362
|
+
});
|
|
363
|
+
it("should reject unknown arguments", async () => {
|
|
364
|
+
const result = await spawnCLI(["--server", "test", "--unknown-flag-xyz"], 10000);
|
|
365
|
+
expect(result.exitCode).toBe(1);
|
|
366
|
+
expect(result.stderr).toMatch(/unknown argument/i);
|
|
367
|
+
});
|
|
368
|
+
});
|
|
369
|
+
// ==========================================================================
|
|
370
|
+
// Group 5: Server Assessment (Integration - Requires Testbed Servers)
|
|
371
|
+
// ==========================================================================
|
|
372
|
+
describe("Server Assessment (Integration)", () => {
|
|
373
|
+
it("should complete assessment against vulnerable-mcp", async () => {
|
|
374
|
+
if (!vulnerableAvailable) {
|
|
375
|
+
console.log("⏩ Skipping: vulnerable-mcp not available");
|
|
376
|
+
return;
|
|
377
|
+
}
|
|
378
|
+
const configPath = createTempConfig({
|
|
379
|
+
transport: "http",
|
|
380
|
+
url: VULNERABLE_URL,
|
|
381
|
+
});
|
|
382
|
+
const result = await spawnCLI([
|
|
383
|
+
"--server",
|
|
384
|
+
"vulnerable-mcp",
|
|
385
|
+
"--config",
|
|
386
|
+
configPath,
|
|
387
|
+
"--profile",
|
|
388
|
+
"quick",
|
|
389
|
+
], 600000);
|
|
390
|
+
// Should complete (may PASS or FAIL based on vulnerabilities)
|
|
391
|
+
expect([0, 1]).toContain(result.exitCode);
|
|
392
|
+
// Should emit assessment_complete event
|
|
393
|
+
const completeEvent = result.jsonlEvents.find((e) => e.event === "assessment_complete");
|
|
394
|
+
expect(completeEvent).toBeDefined();
|
|
395
|
+
}, 660000); // 11 minute jest timeout (was 6 min)
|
|
396
|
+
it("should emit valid JSONL events to stderr", async () => {
|
|
397
|
+
if (!vulnerableAvailable) {
|
|
398
|
+
console.log("⏩ Skipping: vulnerable-mcp not available");
|
|
399
|
+
return;
|
|
400
|
+
}
|
|
401
|
+
const configPath = createTempConfig({
|
|
402
|
+
transport: "http",
|
|
403
|
+
url: VULNERABLE_URL,
|
|
404
|
+
});
|
|
405
|
+
const result = await spawnCLI([
|
|
406
|
+
"--server",
|
|
407
|
+
"vulnerable-mcp",
|
|
408
|
+
"--config",
|
|
409
|
+
configPath,
|
|
410
|
+
"--profile",
|
|
411
|
+
"quick",
|
|
412
|
+
], 600000);
|
|
413
|
+
// Validate event sequence
|
|
414
|
+
const eventTypes = result.jsonlEvents.map((e) => e.event);
|
|
415
|
+
expect(eventTypes).toContain("server_connected");
|
|
416
|
+
expect(eventTypes).toContain("tools_discovery_complete");
|
|
417
|
+
expect(eventTypes).toContain("assessment_complete");
|
|
418
|
+
// Validate server_connected event structure
|
|
419
|
+
const serverConnected = result.jsonlEvents.find((e) => e.event === "server_connected");
|
|
420
|
+
expect(serverConnected).toHaveProperty("serverName");
|
|
421
|
+
expect(serverConnected).toHaveProperty("transport");
|
|
422
|
+
expect(serverConnected).toHaveProperty("version");
|
|
423
|
+
// Validate assessment_complete event structure
|
|
424
|
+
const assessmentComplete = result.jsonlEvents.find((e) => e.event === "assessment_complete");
|
|
425
|
+
expect(assessmentComplete).toHaveProperty("overallStatus");
|
|
426
|
+
expect(assessmentComplete).toHaveProperty("totalTests");
|
|
427
|
+
expect(assessmentComplete).toHaveProperty("outputPath");
|
|
428
|
+
}, 660000); // 11 minute jest timeout (was 6 min)
|
|
429
|
+
it("should return exit code 1 for FAIL status on vulnerable server", async () => {
|
|
430
|
+
if (!vulnerableAvailable) {
|
|
431
|
+
console.log("⏩ Skipping: vulnerable-mcp not available");
|
|
432
|
+
return;
|
|
433
|
+
}
|
|
434
|
+
const configPath = createTempConfig({
|
|
435
|
+
transport: "http",
|
|
436
|
+
url: VULNERABLE_URL,
|
|
437
|
+
});
|
|
438
|
+
const result = await spawnCLI([
|
|
439
|
+
"--server",
|
|
440
|
+
"vulnerable-mcp",
|
|
441
|
+
"--config",
|
|
442
|
+
configPath,
|
|
443
|
+
"--profile",
|
|
444
|
+
"security",
|
|
445
|
+
], 600000);
|
|
446
|
+
// Vulnerable server should have vulnerabilities -> FAIL status
|
|
447
|
+
const assessmentComplete = result.jsonlEvents.find((e) => e.event === "assessment_complete");
|
|
448
|
+
if (assessmentComplete?.overallStatus === "FAIL") {
|
|
449
|
+
expect(result.exitCode).toBe(1);
|
|
450
|
+
}
|
|
451
|
+
}, 660000); // 11 minute jest timeout (was 6 min)
|
|
452
|
+
it("should return exit code 0 for PASS status on hardened server", async () => {
|
|
453
|
+
if (!hardenedAvailable) {
|
|
454
|
+
console.log("⏩ Skipping: hardened-mcp not available");
|
|
455
|
+
return;
|
|
456
|
+
}
|
|
457
|
+
const configPath = createTempConfig({
|
|
458
|
+
transport: "http",
|
|
459
|
+
url: HARDENED_URL,
|
|
460
|
+
});
|
|
461
|
+
const result = await spawnCLI([
|
|
462
|
+
"--server",
|
|
463
|
+
"hardened-mcp",
|
|
464
|
+
"--config",
|
|
465
|
+
configPath,
|
|
466
|
+
"--profile",
|
|
467
|
+
"quick",
|
|
468
|
+
], 600000);
|
|
469
|
+
// Hardened server should pass -> exit 0
|
|
470
|
+
const assessmentComplete = result.jsonlEvents.find((e) => e.event === "assessment_complete");
|
|
471
|
+
if (assessmentComplete?.overallStatus === "PASS") {
|
|
472
|
+
expect(result.exitCode).toBe(0);
|
|
473
|
+
}
|
|
474
|
+
}, 660000); // 11 minute jest timeout (was 6 min)
|
|
475
|
+
});
|
|
476
|
+
// ==========================================================================
|
|
477
|
+
// Group 6: Preflight Mode (Integration - Requires Testbed Servers)
|
|
478
|
+
// ==========================================================================
|
|
479
|
+
describe("Preflight Mode", () => {
|
|
480
|
+
it("should run preflight validation quickly", async () => {
|
|
481
|
+
if (!vulnerableAvailable) {
|
|
482
|
+
console.log("⏩ Skipping: testbed server not available");
|
|
483
|
+
return;
|
|
484
|
+
}
|
|
485
|
+
const configPath = createTempConfig({
|
|
486
|
+
transport: "http",
|
|
487
|
+
url: VULNERABLE_URL,
|
|
488
|
+
});
|
|
489
|
+
const result = await spawnCLI(["--server", "vulnerable-mcp", "--config", configPath, "--preflight"], 30000);
|
|
490
|
+
// Preflight should complete faster than full assessment
|
|
491
|
+
expect(result.duration).toBeLessThan(20000);
|
|
492
|
+
// Should indicate success or provide validation info
|
|
493
|
+
expect([0, 1]).toContain(result.exitCode);
|
|
494
|
+
}, 60000); // 1 minute timeout for preflight
|
|
495
|
+
});
|
|
496
|
+
});
|
|
@@ -27,6 +27,8 @@ const DEFAULT_HEADERS = {
|
|
|
27
27
|
*/
|
|
28
28
|
async function checkServerAvailable(url) {
|
|
29
29
|
try {
|
|
30
|
+
const controller = new AbortController();
|
|
31
|
+
const timeoutId = setTimeout(() => controller.abort(), 5000);
|
|
30
32
|
const response = await fetch(url, {
|
|
31
33
|
method: "POST",
|
|
32
34
|
headers: DEFAULT_HEADERS,
|
|
@@ -40,7 +42,9 @@ async function checkServerAvailable(url) {
|
|
|
40
42
|
},
|
|
41
43
|
id: 1,
|
|
42
44
|
}),
|
|
45
|
+
signal: controller.signal,
|
|
43
46
|
});
|
|
47
|
+
clearTimeout(timeoutId);
|
|
44
48
|
return response.status < 500;
|
|
45
49
|
}
|
|
46
50
|
catch {
|
|
@@ -133,7 +137,7 @@ describe("Testbed A/B Comparison", () => {
|
|
|
133
137
|
console.log(" - vulnerable-mcp: http://localhost:10900/mcp");
|
|
134
138
|
console.log(" - hardened-mcp: http://localhost:10901/mcp\n");
|
|
135
139
|
}
|
|
136
|
-
});
|
|
140
|
+
}, 30000);
|
|
137
141
|
describe("Health Check Tests", () => {
|
|
138
142
|
it("should connect to vulnerable-mcp server", async () => {
|
|
139
143
|
if (!vulnerableAvailable) {
|
package/cli/build/assess-full.js
CHANGED
|
@@ -27,7 +27,9 @@ async function main() {
|
|
|
27
27
|
const listenerConfig = new ScopedListenerConfig(50);
|
|
28
28
|
try {
|
|
29
29
|
const options = parseArgs();
|
|
30
|
-
if (options.helpRequested
|
|
30
|
+
if (options.helpRequested ||
|
|
31
|
+
options.versionRequested ||
|
|
32
|
+
options.listModules) {
|
|
31
33
|
return;
|
|
32
34
|
}
|
|
33
35
|
// Apply scoped listener configuration for assessment
|
|
@@ -9,7 +9,8 @@
|
|
|
9
9
|
* @module cli/lib/cli-parser
|
|
10
10
|
*/
|
|
11
11
|
import { ASSESSMENT_CATEGORY_METADATA, } from "../../../client/lib/lib/assessmentTypes.js";
|
|
12
|
-
import { ASSESSMENT_PROFILES, isValidProfileName, getProfileHelpText, } from "../profiles.js";
|
|
12
|
+
import { ASSESSMENT_PROFILES, isValidProfileName, getProfileHelpText, TIER_1_CORE_SECURITY, TIER_2_COMPLIANCE, TIER_3_CAPABILITY, TIER_4_EXTENDED, } from "../profiles.js";
|
|
13
|
+
import packageJson from "../../package.json" with { type: "json" };
|
|
13
14
|
// ============================================================================
|
|
14
15
|
// Constants
|
|
15
16
|
// ============================================================================
|
|
@@ -249,6 +250,15 @@ export function parseArgs(argv) {
|
|
|
249
250
|
}
|
|
250
251
|
break;
|
|
251
252
|
}
|
|
253
|
+
case "--list-modules":
|
|
254
|
+
printModules();
|
|
255
|
+
options.listModules = true;
|
|
256
|
+
return options;
|
|
257
|
+
case "--version":
|
|
258
|
+
case "-V":
|
|
259
|
+
printVersion();
|
|
260
|
+
options.versionRequested = true;
|
|
261
|
+
return options;
|
|
252
262
|
case "--help":
|
|
253
263
|
case "-h":
|
|
254
264
|
printHelp();
|
|
@@ -310,8 +320,20 @@ export function parseArgs(argv) {
|
|
|
310
320
|
return options;
|
|
311
321
|
}
|
|
312
322
|
// ============================================================================
|
|
313
|
-
// Help Text
|
|
323
|
+
// Version and Help Text
|
|
314
324
|
// ============================================================================
|
|
325
|
+
/**
|
|
326
|
+
* Get package version from package.json
|
|
327
|
+
*/
|
|
328
|
+
function getPackageVersion() {
|
|
329
|
+
return packageJson.version;
|
|
330
|
+
}
|
|
331
|
+
/**
|
|
332
|
+
* Print version to console
|
|
333
|
+
*/
|
|
334
|
+
export function printVersion() {
|
|
335
|
+
console.log(`mcp-assess-full ${getPackageVersion()}`);
|
|
336
|
+
}
|
|
315
337
|
/**
|
|
316
338
|
* Print help message to console
|
|
317
339
|
*/
|
|
@@ -349,6 +371,7 @@ Options:
|
|
|
349
371
|
--silent Suppress all diagnostic logging
|
|
350
372
|
--log-level <level> Set log level: silent, error, warn, info (default), debug
|
|
351
373
|
Also supports LOG_LEVEL environment variable
|
|
374
|
+
--version, -V Show version number
|
|
352
375
|
--help, -h Show this help message
|
|
353
376
|
|
|
354
377
|
Environment Variables:
|
|
@@ -417,3 +440,61 @@ Examples:
|
|
|
417
440
|
mcp-assess-full --server my-server --compare ./baseline.json --diff-only
|
|
418
441
|
`);
|
|
419
442
|
}
|
|
443
|
+
/**
|
|
444
|
+
* Module description mappings for printModules output.
|
|
445
|
+
* Uses human-friendly descriptions that may differ from ASSESSMENT_CATEGORY_METADATA.
|
|
446
|
+
*/
|
|
447
|
+
const MODULE_DESCRIPTIONS = {
|
|
448
|
+
functionality: "Tool functionality validation",
|
|
449
|
+
security: "Security vulnerability detection (23 attack patterns)",
|
|
450
|
+
temporal: "Temporal/rug pull detection",
|
|
451
|
+
errorHandling: "Error handling compliance",
|
|
452
|
+
protocolCompliance: "MCP protocol + JSON-RPC validation",
|
|
453
|
+
aupCompliance: "Acceptable use policy compliance",
|
|
454
|
+
toolAnnotations: "Tool annotation validation (readOnlyHint, destructiveHint)",
|
|
455
|
+
prohibitedLibraries: "Prohibited library detection",
|
|
456
|
+
manifestValidation: "MCPB manifest.json validation",
|
|
457
|
+
authentication: "OAuth/auth evaluation",
|
|
458
|
+
resources: "Resource path traversal + sensitive data exposure",
|
|
459
|
+
prompts: "Prompt AUP compliance + injection testing",
|
|
460
|
+
crossCapability: "Cross-capability attack chain detection",
|
|
461
|
+
developerExperience: "Documentation + usability assessment",
|
|
462
|
+
portability: "Cross-platform compatibility",
|
|
463
|
+
externalAPIScanner: "External API detection (requires --source)",
|
|
464
|
+
};
|
|
465
|
+
/**
|
|
466
|
+
* Print available modules organized by tier
|
|
467
|
+
*/
|
|
468
|
+
export function printModules() {
|
|
469
|
+
const formatModule = (name) => {
|
|
470
|
+
const desc = MODULE_DESCRIPTIONS[name] ||
|
|
471
|
+
ASSESSMENT_CATEGORY_METADATA[name]?.description ||
|
|
472
|
+
"";
|
|
473
|
+
return ` ${name.padEnd(22)} ${desc}`;
|
|
474
|
+
};
|
|
475
|
+
console.log(`
|
|
476
|
+
Available Assessment Modules (16 total):
|
|
477
|
+
|
|
478
|
+
Tier 1 - Core Security (${TIER_1_CORE_SECURITY.length} modules):
|
|
479
|
+
${TIER_1_CORE_SECURITY.map(formatModule).join("\n")}
|
|
480
|
+
|
|
481
|
+
Tier 2 - Compliance (${TIER_2_COMPLIANCE.length} modules):
|
|
482
|
+
${TIER_2_COMPLIANCE.map(formatModule).join("\n")}
|
|
483
|
+
|
|
484
|
+
Tier 3 - Capability-Based (${TIER_3_CAPABILITY.length} modules):
|
|
485
|
+
${TIER_3_CAPABILITY.map(formatModule).join("\n")}
|
|
486
|
+
|
|
487
|
+
Tier 4 - Extended (${TIER_4_EXTENDED.length} modules):
|
|
488
|
+
${TIER_4_EXTENDED.map(formatModule).join("\n")}
|
|
489
|
+
|
|
490
|
+
Usage:
|
|
491
|
+
--only-modules <list> Run only specified modules (comma-separated)
|
|
492
|
+
--skip-modules <list> Skip specified modules (comma-separated)
|
|
493
|
+
--profile <name> Use predefined profile (quick, security, compliance, full)
|
|
494
|
+
|
|
495
|
+
Examples:
|
|
496
|
+
mcp-assess-full my-server --only-modules functionality,security
|
|
497
|
+
mcp-assess-full my-server --skip-modules temporal,portability
|
|
498
|
+
mcp-assess-full my-server --profile compliance
|
|
499
|
+
`);
|
|
500
|
+
}
|
package/cli/package.json
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { u as useToast, r as reactExports, j as jsxRuntimeExports, p as parseOAuthCallbackParams, g as generateOAuthErrorDescription, S as SESSION_KEYS, I as InspectorOAuthClientProvider, a as auth } from "./index-
|
|
1
|
+
import { u as useToast, r as reactExports, j as jsxRuntimeExports, p as parseOAuthCallbackParams, g as generateOAuthErrorDescription, S as SESSION_KEYS, I as InspectorOAuthClientProvider, a as auth } from "./index-Cu9XzUwB.js";
|
|
2
2
|
const OAuthCallback = ({ onConnect }) => {
|
|
3
3
|
const { toast } = useToast();
|
|
4
4
|
const hasProcessedRef = reactExports.useRef(false);
|
package/client/dist/assets/{OAuthDebugCallback-Nv-8u0GR.js → OAuthDebugCallback-DL5adXJw.js}
RENAMED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { r as reactExports, S as SESSION_KEYS, p as parseOAuthCallbackParams, j as jsxRuntimeExports, g as generateOAuthErrorDescription } from "./index-
|
|
1
|
+
import { r as reactExports, S as SESSION_KEYS, p as parseOAuthCallbackParams, j as jsxRuntimeExports, g as generateOAuthErrorDescription } from "./index-Cu9XzUwB.js";
|
|
2
2
|
const OAuthDebugCallback = ({ onConnect }) => {
|
|
3
3
|
reactExports.useEffect(() => {
|
|
4
4
|
let isProcessed = false;
|
|
@@ -16373,7 +16373,7 @@ object({
|
|
|
16373
16373
|
token_type_hint: string().optional()
|
|
16374
16374
|
}).strip();
|
|
16375
16375
|
const name = "@bryan-thompson/inspector-assessment-client";
|
|
16376
|
-
const version$1 = "1.
|
|
16376
|
+
const version$1 = "1.27.0";
|
|
16377
16377
|
const packageJson = {
|
|
16378
16378
|
name,
|
|
16379
16379
|
version: version$1
|
|
@@ -45288,7 +45288,7 @@ const useTheme = () => {
|
|
|
45288
45288
|
[theme, setThemeWithSideEffect]
|
|
45289
45289
|
);
|
|
45290
45290
|
};
|
|
45291
|
-
const version = "1.
|
|
45291
|
+
const version = "1.27.0";
|
|
45292
45292
|
var [createTooltipContext] = createContextScope("Tooltip", [
|
|
45293
45293
|
createPopperScope
|
|
45294
45294
|
]);
|
|
@@ -48845,13 +48845,13 @@ const App = () => {
|
|
|
48845
48845
|
) });
|
|
48846
48846
|
if (window.location.pathname === "/oauth/callback") {
|
|
48847
48847
|
const OAuthCallback = React.lazy(
|
|
48848
|
-
() => __vitePreload(() => import("./OAuthCallback-
|
|
48848
|
+
() => __vitePreload(() => import("./OAuthCallback-CJWH8Ytw.js"), true ? [] : void 0)
|
|
48849
48849
|
);
|
|
48850
48850
|
return /* @__PURE__ */ jsxRuntimeExports.jsx(reactExports.Suspense, { fallback: /* @__PURE__ */ jsxRuntimeExports.jsx("div", { children: "Loading..." }), children: /* @__PURE__ */ jsxRuntimeExports.jsx(OAuthCallback, { onConnect: onOAuthConnect }) });
|
|
48851
48851
|
}
|
|
48852
48852
|
if (window.location.pathname === "/oauth/callback/debug") {
|
|
48853
48853
|
const OAuthDebugCallback = React.lazy(
|
|
48854
|
-
() => __vitePreload(() => import("./OAuthDebugCallback-
|
|
48854
|
+
() => __vitePreload(() => import("./OAuthDebugCallback-DL5adXJw.js"), true ? [] : void 0)
|
|
48855
48855
|
);
|
|
48856
48856
|
return /* @__PURE__ */ jsxRuntimeExports.jsx(reactExports.Suspense, { fallback: /* @__PURE__ */ jsxRuntimeExports.jsx("div", { children: "Loading..." }), children: /* @__PURE__ */ jsxRuntimeExports.jsx(OAuthDebugCallback, { onConnect: onOAuthDebugConnect }) });
|
|
48857
48857
|
}
|
package/client/dist/index.html
CHANGED
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
<link rel="icon" type="image/svg+xml" href="/mcp.svg" />
|
|
6
6
|
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
|
7
7
|
<title>MCP Inspector</title>
|
|
8
|
-
<script type="module" crossorigin src="/assets/index-
|
|
8
|
+
<script type="module" crossorigin src="/assets/index-Cu9XzUwB.js"></script>
|
|
9
9
|
<link rel="stylesheet" crossorigin href="/assets/index-cHhcEXbr.css">
|
|
10
10
|
</head>
|
|
11
11
|
<body>
|
|
@@ -3,9 +3,13 @@
|
|
|
3
3
|
* Tests error handling and input validation
|
|
4
4
|
*/
|
|
5
5
|
import { ErrorHandlingAssessment } from "../../../lib/assessmentTypes.js";
|
|
6
|
+
import { AssessmentConfiguration } from "../../../lib/assessment/configTypes.js";
|
|
6
7
|
import { BaseAssessor } from "./BaseAssessor.js";
|
|
7
8
|
import { AssessmentContext } from "../AssessmentOrchestrator.js";
|
|
8
9
|
export declare class ErrorHandlingAssessor extends BaseAssessor {
|
|
10
|
+
private executionDetector;
|
|
11
|
+
private safeResponseDetector;
|
|
12
|
+
constructor(config: AssessmentConfiguration);
|
|
9
13
|
assess(context: AssessmentContext): Promise<ErrorHandlingAssessment>;
|
|
10
14
|
private selectToolsForTesting;
|
|
11
15
|
private testToolErrorHandling;
|
|
@@ -17,6 +21,27 @@ export declare class ErrorHandlingAssessor extends BaseAssessor {
|
|
|
17
21
|
private generateWrongTypeParams;
|
|
18
22
|
private generateInvalidValueParams;
|
|
19
23
|
private generateParamsWithValue;
|
|
24
|
+
/**
|
|
25
|
+
* Analyze invalid_values response to determine scoring impact
|
|
26
|
+
* Issue #99: Contextual empty string validation scoring
|
|
27
|
+
*
|
|
28
|
+
* Classifications:
|
|
29
|
+
* - safe_rejection: Tool rejected with error (no penalty)
|
|
30
|
+
* - safe_reflection: Tool stored/echoed without executing (no penalty)
|
|
31
|
+
* - defensive_programming: Tool handled gracefully (no penalty)
|
|
32
|
+
* - execution_detected: Tool executed input (penalty)
|
|
33
|
+
* - unknown: Cannot determine (partial penalty)
|
|
34
|
+
*/
|
|
35
|
+
private analyzeInvalidValuesResponse;
|
|
36
|
+
/**
|
|
37
|
+
* Safely extract response text from various response formats
|
|
38
|
+
*/
|
|
39
|
+
private extractResponseTextSafe;
|
|
40
|
+
/**
|
|
41
|
+
* Check for defensive programming patterns - tool accepted but caused no harm
|
|
42
|
+
* Examples: "Deleted 0 keys", "No results found", "Query returned 0"
|
|
43
|
+
*/
|
|
44
|
+
private isDefensiveProgrammingResponse;
|
|
20
45
|
private calculateMetrics;
|
|
21
46
|
private determineErrorHandlingStatus;
|
|
22
47
|
private generateExplanation;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"ErrorHandlingAssessor.d.ts","sourceRoot":"","sources":["../../../../src/services/assessment/modules/ErrorHandlingAssessor.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,EACL,uBAAuB,EAIxB,MAAM,uBAAuB,CAAC;AAC/B,OAAO,EAAE,YAAY,EAAE,MAAM,gBAAgB,CAAC;AAC9C,OAAO,EAAE,iBAAiB,EAAE,MAAM,2BAA2B,CAAC;
|
|
1
|
+
{"version":3,"file":"ErrorHandlingAssessor.d.ts","sourceRoot":"","sources":["../../../../src/services/assessment/modules/ErrorHandlingAssessor.ts"],"names":[],"mappings":"AAAA;;;GAGG;AAEH,OAAO,EACL,uBAAuB,EAIxB,MAAM,uBAAuB,CAAC;AAC/B,OAAO,EAAE,uBAAuB,EAAE,MAAM,8BAA8B,CAAC;AACvE,OAAO,EAAE,YAAY,EAAE,MAAM,gBAAgB,CAAC;AAC9C,OAAO,EAAE,iBAAiB,EAAE,MAAM,2BAA2B,CAAC;AAK9D,qBAAa,qBAAsB,SAAQ,YAAY;IACrD,OAAO,CAAC,iBAAiB,CAA4B;IACrD,OAAO,CAAC,oBAAoB,CAAuB;gBAEvC,MAAM,EAAE,uBAAuB;IAMrC,MAAM,CAAC,OAAO,EAAE,iBAAiB,GAAG,OAAO,CAAC,uBAAuB,CAAC;IAiE1E,OAAO,CAAC,qBAAqB;YAuDf,qBAAqB;YAuBrB,qBAAqB;YAmGrB,cAAc;YAmFd,iBAAiB;YA8DjB,kBAAkB;IA6DhC,OAAO,CAAC,aAAa;IAOrB,OAAO,CAAC,uBAAuB;IAgC/B,OAAO,CAAC,0BAA0B;IAgClC,OAAO,CAAC,uBAAuB;IA4B/B;;;;;;;;;;OAUG;IACH,OAAO,CAAC,4BAA4B;IAgEpC;;OAEG;IACH,OAAO,CAAC,uBAAuB;IAc/B;;;OAGG;IACH,OAAO,CAAC,8BAA8B;IAetC,OAAO,CAAC,gBAAgB;IA8GxB,OAAO,CAAC,4BAA4B;IAapC,OAAO,CAAC,mBAAmB;IAuE3B,OAAO,CAAC,uBAAuB;CA4ChC"}
|
|
@@ -4,7 +4,16 @@
|
|
|
4
4
|
*/
|
|
5
5
|
import { BaseAssessor } from "./BaseAssessor.js";
|
|
6
6
|
import { createConcurrencyLimit } from "../lib/concurrencyLimit.js";
|
|
7
|
+
import { ExecutionArtifactDetector } from "./securityTests/ExecutionArtifactDetector.js";
|
|
8
|
+
import { SafeResponseDetector } from "./securityTests/SafeResponseDetector.js";
|
|
7
9
|
export class ErrorHandlingAssessor extends BaseAssessor {
|
|
10
|
+
executionDetector;
|
|
11
|
+
safeResponseDetector;
|
|
12
|
+
constructor(config) {
|
|
13
|
+
super(config);
|
|
14
|
+
this.executionDetector = new ExecutionArtifactDetector();
|
|
15
|
+
this.safeResponseDetector = new SafeResponseDetector();
|
|
16
|
+
}
|
|
8
17
|
async assess(context) {
|
|
9
18
|
this.logger.info("Starting error handling assessment");
|
|
10
19
|
const testDetails = [];
|
|
@@ -428,17 +437,122 @@ export class ErrorHandlingAssessor extends BaseAssessor {
|
|
|
428
437
|
return params;
|
|
429
438
|
}
|
|
430
439
|
// isErrorResponse and extractErrorInfo moved to BaseAssessor for reuse across all assessors
|
|
440
|
+
/**
|
|
441
|
+
* Analyze invalid_values response to determine scoring impact
|
|
442
|
+
* Issue #99: Contextual empty string validation scoring
|
|
443
|
+
*
|
|
444
|
+
* Classifications:
|
|
445
|
+
* - safe_rejection: Tool rejected with error (no penalty)
|
|
446
|
+
* - safe_reflection: Tool stored/echoed without executing (no penalty)
|
|
447
|
+
* - defensive_programming: Tool handled gracefully (no penalty)
|
|
448
|
+
* - execution_detected: Tool executed input (penalty)
|
|
449
|
+
* - unknown: Cannot determine (partial penalty)
|
|
450
|
+
*/
|
|
451
|
+
analyzeInvalidValuesResponse(test) {
|
|
452
|
+
const responseText = this.extractResponseTextSafe(test.actualResponse.rawResponse);
|
|
453
|
+
// Case 1: Tool rejected with error - best case (no penalty)
|
|
454
|
+
if (test.actualResponse.isError) {
|
|
455
|
+
return {
|
|
456
|
+
shouldPenalize: false,
|
|
457
|
+
penaltyAmount: 0,
|
|
458
|
+
classification: "safe_rejection",
|
|
459
|
+
reason: "Tool properly rejected invalid input",
|
|
460
|
+
};
|
|
461
|
+
}
|
|
462
|
+
// Case 2: Defensive programming patterns (no penalty)
|
|
463
|
+
// Check BEFORE execution detection because patterns like "query returned 0"
|
|
464
|
+
// might match execution indicators but are actually safe
|
|
465
|
+
if (this.isDefensiveProgrammingResponse(responseText)) {
|
|
466
|
+
return {
|
|
467
|
+
shouldPenalize: false,
|
|
468
|
+
penaltyAmount: 0,
|
|
469
|
+
classification: "defensive_programming",
|
|
470
|
+
reason: "Tool handled empty input defensively",
|
|
471
|
+
};
|
|
472
|
+
}
|
|
473
|
+
// Case 3: Safe reflection patterns (no penalty)
|
|
474
|
+
if (this.safeResponseDetector.isReflectionResponse(responseText)) {
|
|
475
|
+
return {
|
|
476
|
+
shouldPenalize: false,
|
|
477
|
+
penaltyAmount: 0,
|
|
478
|
+
classification: "safe_reflection",
|
|
479
|
+
reason: "Tool safely reflected input without execution",
|
|
480
|
+
};
|
|
481
|
+
}
|
|
482
|
+
// Case 4: Check for execution evidence - VULNERABLE (full penalty)
|
|
483
|
+
if (this.executionDetector.hasExecutionEvidence(responseText) ||
|
|
484
|
+
this.executionDetector.detectExecutionArtifacts(responseText)) {
|
|
485
|
+
return {
|
|
486
|
+
shouldPenalize: true,
|
|
487
|
+
penaltyAmount: 100,
|
|
488
|
+
classification: "execution_detected",
|
|
489
|
+
reason: "Tool executed input without validation",
|
|
490
|
+
};
|
|
491
|
+
}
|
|
492
|
+
// Case 5: Unknown - partial penalty for manual review
|
|
493
|
+
return {
|
|
494
|
+
shouldPenalize: true,
|
|
495
|
+
penaltyAmount: 25,
|
|
496
|
+
classification: "unknown",
|
|
497
|
+
reason: "Unable to determine safety - manual review recommended",
|
|
498
|
+
};
|
|
499
|
+
}
|
|
500
|
+
/**
|
|
501
|
+
* Safely extract response text from various response formats
|
|
502
|
+
*/
|
|
503
|
+
extractResponseTextSafe(rawResponse) {
|
|
504
|
+
if (typeof rawResponse === "string")
|
|
505
|
+
return rawResponse;
|
|
506
|
+
if (rawResponse && typeof rawResponse === "object") {
|
|
507
|
+
const resp = rawResponse;
|
|
508
|
+
if (resp.content && Array.isArray(resp.content)) {
|
|
509
|
+
return resp.content
|
|
510
|
+
.map((c) => (c.type === "text" ? c.text : ""))
|
|
511
|
+
.join(" ");
|
|
512
|
+
}
|
|
513
|
+
return JSON.stringify(rawResponse);
|
|
514
|
+
}
|
|
515
|
+
return String(rawResponse || "");
|
|
516
|
+
}
|
|
517
|
+
/**
|
|
518
|
+
* Check for defensive programming patterns - tool accepted but caused no harm
|
|
519
|
+
* Examples: "Deleted 0 keys", "No results found", "Query returned 0"
|
|
520
|
+
*/
|
|
521
|
+
isDefensiveProgrammingResponse(responseText) {
|
|
522
|
+
// Patterns for safe "no-op" responses where tool handled empty input gracefully
|
|
523
|
+
// Use word boundaries (\b) to avoid matching numbers like "10" or "15"
|
|
524
|
+
const patterns = [
|
|
525
|
+
/deleted\s+0\s+(keys?|records?|rows?|items?)/i,
|
|
526
|
+
/no\s+(results?|matches?|items?)\s+found/i,
|
|
527
|
+
/\b0\s+items?\s+(deleted|updated|processed)/i, // \b prevents matching "10 items"
|
|
528
|
+
/nothing\s+to\s+(delete|update|process)/i,
|
|
529
|
+
/empty\s+(result|response|query)/i,
|
|
530
|
+
/no\s+action\s+taken/i,
|
|
531
|
+
/query\s+returned\s+0\b/i, // \b prevents matching "query returned 05" etc.
|
|
532
|
+
];
|
|
533
|
+
return patterns.some((p) => p.test(responseText));
|
|
534
|
+
}
|
|
431
535
|
calculateMetrics(tests, _passed) {
|
|
432
536
|
// Calculate enhanced score with bonus points for quality
|
|
433
537
|
let enhancedScore = 0;
|
|
434
538
|
let maxPossibleScore = 0;
|
|
435
539
|
tests.forEach((test) => {
|
|
436
|
-
//
|
|
437
|
-
//
|
|
438
|
-
//
|
|
439
|
-
//
|
|
540
|
+
// Issue #99: Contextual scoring for invalid_values tests
|
|
541
|
+
// Instead of blanket exclusion, analyze response patterns to determine if
|
|
542
|
+
// the tool safely handled empty strings (defensive programming, reflection)
|
|
543
|
+
// or if it executed without validation (security concern).
|
|
440
544
|
if (test.testType === "invalid_values") {
|
|
441
|
-
|
|
545
|
+
const analysis = this.analyzeInvalidValuesResponse(test);
|
|
546
|
+
if (!analysis.shouldPenalize) {
|
|
547
|
+
// Safe response (rejection, reflection, or defensive programming)
|
|
548
|
+
// Skip scoring to preserve backward compatibility for well-behaved tools
|
|
549
|
+
return;
|
|
550
|
+
}
|
|
551
|
+
// Execution detected or unknown - include in scoring with penalty
|
|
552
|
+
maxPossibleScore += 100;
|
|
553
|
+
const scoreEarned = 100 * (1 - analysis.penaltyAmount / 100);
|
|
554
|
+
enhancedScore += test.passed ? scoreEarned : 0;
|
|
555
|
+
return;
|
|
442
556
|
}
|
|
443
557
|
maxPossibleScore += 100; // Base score for each test
|
|
444
558
|
if (test.passed) {
|
package/client/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@bryan-thompson/inspector-assessment-client",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "1.27.0",
|
|
4
4
|
"description": "Client-side application for the Enhanced MCP Inspector with assessment capabilities",
|
|
5
5
|
"license": "MIT",
|
|
6
6
|
"author": "Bryan Thompson <bryan@triepod.ai>",
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@bryan-thompson/inspector-assessment",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "1.27.0",
|
|
4
4
|
"description": "Enhanced MCP Inspector with comprehensive assessment capabilities for server validation",
|
|
5
5
|
"license": "MIT",
|
|
6
6
|
"author": "Bryan Thompson <bryan@triepod.ai>",
|
package/server/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@bryan-thompson/inspector-assessment-server",
|
|
3
|
-
"version": "1.
|
|
3
|
+
"version": "1.27.0",
|
|
4
4
|
"description": "Server-side application for the Enhanced MCP Inspector with assessment capabilities",
|
|
5
5
|
"license": "MIT",
|
|
6
6
|
"author": "Bryan Thompson <bryan@triepod.ai>",
|