wjttc 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +106 -0
- package/dist/badge.d.ts +34 -0
- package/dist/badge.d.ts.map +1 -0
- package/dist/badge.js +76 -0
- package/dist/badge.js.map +1 -0
- package/dist/certifier.d.ts +63 -0
- package/dist/certifier.d.ts.map +1 -0
- package/dist/certifier.js +1291 -0
- package/dist/certifier.js.map +1 -0
- package/dist/cli.d.ts +9 -0
- package/dist/cli.d.ts.map +1 -0
- package/dist/cli.js +246 -0
- package/dist/cli.js.map +1 -0
- package/dist/index.d.ts +9 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +14 -0
- package/dist/index.js.map +1 -0
- package/package.json +74 -0
- package/project.faf +123 -0
|
@@ -0,0 +1,1291 @@
|
|
|
1
|
+
"use strict";
|
|
2
|
+
/**
|
|
3
|
+
* WJTTC MCP Certifier
|
|
4
|
+
* Core certification engine - Dynamically loads and tests MCP servers
|
|
5
|
+
*
|
|
6
|
+
* Based on MCP Specification 2025-11-25
|
|
7
|
+
*/
|
|
8
|
+
var __createBinding = (this && this.__createBinding) || (Object.create ? (function(o, m, k, k2) {
|
|
9
|
+
if (k2 === undefined) k2 = k;
|
|
10
|
+
var desc = Object.getOwnPropertyDescriptor(m, k);
|
|
11
|
+
if (!desc || ("get" in desc ? !m.__esModule : desc.writable || desc.configurable)) {
|
|
12
|
+
desc = { enumerable: true, get: function() { return m[k]; } };
|
|
13
|
+
}
|
|
14
|
+
Object.defineProperty(o, k2, desc);
|
|
15
|
+
}) : (function(o, m, k, k2) {
|
|
16
|
+
if (k2 === undefined) k2 = k;
|
|
17
|
+
o[k2] = m[k];
|
|
18
|
+
}));
|
|
19
|
+
var __setModuleDefault = (this && this.__setModuleDefault) || (Object.create ? (function(o, v) {
|
|
20
|
+
Object.defineProperty(o, "default", { enumerable: true, value: v });
|
|
21
|
+
}) : function(o, v) {
|
|
22
|
+
o["default"] = v;
|
|
23
|
+
});
|
|
24
|
+
var __importStar = (this && this.__importStar) || (function () {
|
|
25
|
+
var ownKeys = function(o) {
|
|
26
|
+
ownKeys = Object.getOwnPropertyNames || function (o) {
|
|
27
|
+
var ar = [];
|
|
28
|
+
for (var k in o) if (Object.prototype.hasOwnProperty.call(o, k)) ar[ar.length] = k;
|
|
29
|
+
return ar;
|
|
30
|
+
};
|
|
31
|
+
return ownKeys(o);
|
|
32
|
+
};
|
|
33
|
+
return function (mod) {
|
|
34
|
+
if (mod && mod.__esModule) return mod;
|
|
35
|
+
var result = {};
|
|
36
|
+
if (mod != null) for (var k = ownKeys(mod), i = 0; i < k.length; i++) if (k[i] !== "default") __createBinding(result, mod, k[i]);
|
|
37
|
+
__setModuleDefault(result, mod);
|
|
38
|
+
return result;
|
|
39
|
+
};
|
|
40
|
+
})();
|
|
41
|
+
Object.defineProperty(exports, "__esModule", { value: true });
|
|
42
|
+
exports.WJTTCCertifier = void 0;
|
|
43
|
+
const child_process_1 = require("child_process");
|
|
44
|
+
const path = __importStar(require("path"));
|
|
45
|
+
const fs = __importStar(require("fs"));
|
|
46
|
+
const perf_hooks_1 = require("perf_hooks");
|
|
47
|
+
const TIER_NAMES = {
|
|
48
|
+
1: 'Protocol Compliance',
|
|
49
|
+
2: 'Capability Negotiation',
|
|
50
|
+
3: 'Tool Integrity',
|
|
51
|
+
4: 'Resource Management',
|
|
52
|
+
5: 'Security Validation',
|
|
53
|
+
6: 'Performance Benchmarks',
|
|
54
|
+
7: 'Integration Readiness',
|
|
55
|
+
};
|
|
56
|
+
const TIER_WEIGHTS = {
|
|
57
|
+
1: 20,
|
|
58
|
+
2: 10,
|
|
59
|
+
3: 20,
|
|
60
|
+
4: 10,
|
|
61
|
+
5: 15,
|
|
62
|
+
6: 15,
|
|
63
|
+
7: 10,
|
|
64
|
+
};
|
|
65
|
+
class WJTTCCertifier {
|
|
66
|
+
options;
|
|
67
|
+
connection = null;
|
|
68
|
+
tools = [];
|
|
69
|
+
resources = [];
|
|
70
|
+
prompts = [];
|
|
71
|
+
capabilities = {
|
|
72
|
+
tools: false,
|
|
73
|
+
resources: false,
|
|
74
|
+
prompts: false,
|
|
75
|
+
logging: false,
|
|
76
|
+
};
|
|
77
|
+
testDir = '';
|
|
78
|
+
constructor(options) {
|
|
79
|
+
this.options = options;
|
|
80
|
+
}
|
|
81
|
+
async run() {
|
|
82
|
+
// Setup
|
|
83
|
+
this.testDir = path.join('/tmp', `wjttc-test-${Date.now()}`);
|
|
84
|
+
fs.mkdirSync(this.testDir, { recursive: true });
|
|
85
|
+
try {
|
|
86
|
+
// Connect to MCP server and detect capabilities
|
|
87
|
+
await this.connect();
|
|
88
|
+
// Get tool list for tests (if supported)
|
|
89
|
+
if (this.capabilities.tools) {
|
|
90
|
+
this.tools = await this.listTools();
|
|
91
|
+
}
|
|
92
|
+
// Get resources (if supported)
|
|
93
|
+
if (this.capabilities.resources) {
|
|
94
|
+
this.resources = await this.listResources();
|
|
95
|
+
}
|
|
96
|
+
// Get prompts (if supported)
|
|
97
|
+
if (this.capabilities.prompts) {
|
|
98
|
+
this.prompts = await this.listPrompts();
|
|
99
|
+
}
|
|
100
|
+
// Run tiers
|
|
101
|
+
const tierResults = [];
|
|
102
|
+
let totalPassed = 0;
|
|
103
|
+
let totalTests = 0;
|
|
104
|
+
for (const tier of this.options.tiers) {
|
|
105
|
+
const result = await this.runTier(tier);
|
|
106
|
+
tierResults.push(result);
|
|
107
|
+
totalPassed += result.passedTests;
|
|
108
|
+
totalTests += result.totalTests;
|
|
109
|
+
}
|
|
110
|
+
const score = this.calculateScore(tierResults);
|
|
111
|
+
return {
|
|
112
|
+
score,
|
|
113
|
+
totalTests,
|
|
114
|
+
passedTests: totalPassed,
|
|
115
|
+
failedTests: totalTests - totalPassed,
|
|
116
|
+
tierResults,
|
|
117
|
+
timestamp: new Date().toISOString(),
|
|
118
|
+
};
|
|
119
|
+
}
|
|
120
|
+
finally {
|
|
121
|
+
// Cleanup
|
|
122
|
+
this.disconnect();
|
|
123
|
+
fs.rmSync(this.testDir, { recursive: true, force: true });
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
async connect() {
|
|
127
|
+
const mcpPath = this.options.mcpPath;
|
|
128
|
+
// Determine how to run the MCP
|
|
129
|
+
let command;
|
|
130
|
+
let args;
|
|
131
|
+
let cwd = process.cwd();
|
|
132
|
+
// Check if it's a command string (contains spaces or starts with npx/node)
|
|
133
|
+
if (mcpPath.includes(' ') || mcpPath.startsWith('npx ') || mcpPath.startsWith('node ')) {
|
|
134
|
+
const parts = mcpPath.split(' ');
|
|
135
|
+
command = parts[0];
|
|
136
|
+
args = parts.slice(1);
|
|
137
|
+
}
|
|
138
|
+
else {
|
|
139
|
+
const resolvedPath = path.resolve(mcpPath);
|
|
140
|
+
cwd = path.dirname(resolvedPath);
|
|
141
|
+
if (mcpPath.endsWith('.ts')) {
|
|
142
|
+
command = 'npx';
|
|
143
|
+
args = ['ts-node', resolvedPath];
|
|
144
|
+
}
|
|
145
|
+
else if (mcpPath.endsWith('.js')) {
|
|
146
|
+
command = 'node';
|
|
147
|
+
args = [resolvedPath];
|
|
148
|
+
}
|
|
149
|
+
else {
|
|
150
|
+
command = resolvedPath;
|
|
151
|
+
args = [];
|
|
152
|
+
}
|
|
153
|
+
}
|
|
154
|
+
const proc = (0, child_process_1.spawn)(command, args, {
|
|
155
|
+
stdio: ['pipe', 'pipe', 'pipe'],
|
|
156
|
+
cwd,
|
|
157
|
+
shell: true,
|
|
158
|
+
});
|
|
159
|
+
let messageId = 0;
|
|
160
|
+
const pendingRequests = new Map();
|
|
161
|
+
let buffer = '';
|
|
162
|
+
proc.stdout?.on('data', (data) => {
|
|
163
|
+
buffer += data.toString();
|
|
164
|
+
// Parse JSON-RPC messages
|
|
165
|
+
const lines = buffer.split('\n');
|
|
166
|
+
buffer = lines.pop() || '';
|
|
167
|
+
for (const line of lines) {
|
|
168
|
+
if (!line.trim())
|
|
169
|
+
continue;
|
|
170
|
+
try {
|
|
171
|
+
const msg = JSON.parse(line);
|
|
172
|
+
if (msg.id && pendingRequests.has(msg.id)) {
|
|
173
|
+
const { resolve, reject } = pendingRequests.get(msg.id);
|
|
174
|
+
pendingRequests.delete(msg.id);
|
|
175
|
+
if (msg.error) {
|
|
176
|
+
reject(new Error(msg.error.message));
|
|
177
|
+
}
|
|
178
|
+
else {
|
|
179
|
+
resolve(msg.result);
|
|
180
|
+
}
|
|
181
|
+
}
|
|
182
|
+
}
|
|
183
|
+
catch {
|
|
184
|
+
// Ignore non-JSON lines
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
});
|
|
188
|
+
this.connection = {
|
|
189
|
+
process: proc,
|
|
190
|
+
send: async (message) => {
|
|
191
|
+
return new Promise((resolve, reject) => {
|
|
192
|
+
const id = ++messageId;
|
|
193
|
+
const request = {
|
|
194
|
+
jsonrpc: '2.0',
|
|
195
|
+
id,
|
|
196
|
+
...message,
|
|
197
|
+
};
|
|
198
|
+
pendingRequests.set(id, { resolve, reject });
|
|
199
|
+
proc.stdin?.write(JSON.stringify(request) + '\n');
|
|
200
|
+
// Timeout after 30 seconds (some servers are slow to start)
|
|
201
|
+
setTimeout(() => {
|
|
202
|
+
if (pendingRequests.has(id)) {
|
|
203
|
+
pendingRequests.delete(id);
|
|
204
|
+
reject(new Error('Request timeout'));
|
|
205
|
+
}
|
|
206
|
+
}, 30000);
|
|
207
|
+
});
|
|
208
|
+
},
|
|
209
|
+
close: () => {
|
|
210
|
+
proc.kill();
|
|
211
|
+
},
|
|
212
|
+
};
|
|
213
|
+
// Initialize connection and capture server capabilities
|
|
214
|
+
const initResult = await this.connection.send({
|
|
215
|
+
method: 'initialize',
|
|
216
|
+
params: {
|
|
217
|
+
protocolVersion: '2024-11-05',
|
|
218
|
+
capabilities: {
|
|
219
|
+
roots: { listChanged: true },
|
|
220
|
+
},
|
|
221
|
+
clientInfo: {
|
|
222
|
+
name: 'wjttc-certifier',
|
|
223
|
+
version: '1.0.0',
|
|
224
|
+
},
|
|
225
|
+
},
|
|
226
|
+
});
|
|
227
|
+
// Parse server capabilities
|
|
228
|
+
const serverCaps = initResult?.capabilities || {};
|
|
229
|
+
this.capabilities = {
|
|
230
|
+
tools: !!serverCaps.tools,
|
|
231
|
+
resources: !!serverCaps.resources,
|
|
232
|
+
prompts: !!serverCaps.prompts,
|
|
233
|
+
logging: !!serverCaps.logging,
|
|
234
|
+
};
|
|
235
|
+
// Send initialized notification
|
|
236
|
+
this.connection.process.stdin?.write(JSON.stringify({
|
|
237
|
+
jsonrpc: '2.0',
|
|
238
|
+
method: 'notifications/initialized',
|
|
239
|
+
}) + '\n');
|
|
240
|
+
}
|
|
241
|
+
disconnect() {
|
|
242
|
+
this.connection?.close();
|
|
243
|
+
this.connection = null;
|
|
244
|
+
}
|
|
245
|
+
async listTools() {
|
|
246
|
+
const result = await this.connection?.send({
|
|
247
|
+
method: 'tools/list',
|
|
248
|
+
params: {},
|
|
249
|
+
});
|
|
250
|
+
return result?.tools || [];
|
|
251
|
+
}
|
|
252
|
+
async listResources() {
|
|
253
|
+
try {
|
|
254
|
+
const result = await this.connection?.send({
|
|
255
|
+
method: 'resources/list',
|
|
256
|
+
params: {},
|
|
257
|
+
});
|
|
258
|
+
return result?.resources || [];
|
|
259
|
+
}
|
|
260
|
+
catch {
|
|
261
|
+
return [];
|
|
262
|
+
}
|
|
263
|
+
}
|
|
264
|
+
async listPrompts() {
|
|
265
|
+
try {
|
|
266
|
+
const result = await this.connection?.send({
|
|
267
|
+
method: 'prompts/list',
|
|
268
|
+
params: {},
|
|
269
|
+
});
|
|
270
|
+
return result?.prompts || [];
|
|
271
|
+
}
|
|
272
|
+
catch {
|
|
273
|
+
return [];
|
|
274
|
+
}
|
|
275
|
+
}
|
|
276
|
+
async readResource(uri) {
|
|
277
|
+
return await this.connection?.send({
|
|
278
|
+
method: 'resources/read',
|
|
279
|
+
params: { uri },
|
|
280
|
+
});
|
|
281
|
+
}
|
|
282
|
+
async getPrompt(name, args) {
|
|
283
|
+
return await this.connection?.send({
|
|
284
|
+
method: 'prompts/get',
|
|
285
|
+
params: { name, arguments: args || {} },
|
|
286
|
+
});
|
|
287
|
+
}
|
|
288
|
+
async callTool(name, args) {
|
|
289
|
+
return await this.connection?.send({
|
|
290
|
+
method: 'tools/call',
|
|
291
|
+
params: { name, arguments: args },
|
|
292
|
+
});
|
|
293
|
+
}
|
|
294
|
+
async runTier(tier) {
|
|
295
|
+
const start = Date.now();
|
|
296
|
+
const tests = this.getTestsForTier(tier);
|
|
297
|
+
const failedTests = [];
|
|
298
|
+
let passed = 0;
|
|
299
|
+
for (const test of tests) {
|
|
300
|
+
try {
|
|
301
|
+
const result = await test.run();
|
|
302
|
+
if (result.passed) {
|
|
303
|
+
passed++;
|
|
304
|
+
}
|
|
305
|
+
else {
|
|
306
|
+
failedTests.push(`${test.name}: ${result.message}`);
|
|
307
|
+
}
|
|
308
|
+
}
|
|
309
|
+
catch (error) {
|
|
310
|
+
failedTests.push(`${test.name}: ${error.message}`);
|
|
311
|
+
}
|
|
312
|
+
}
|
|
313
|
+
return {
|
|
314
|
+
tier,
|
|
315
|
+
name: TIER_NAMES[tier],
|
|
316
|
+
passed: failedTests.length === 0,
|
|
317
|
+
passedTests: passed,
|
|
318
|
+
totalTests: tests.length,
|
|
319
|
+
failedTests,
|
|
320
|
+
duration: Date.now() - start,
|
|
321
|
+
};
|
|
322
|
+
}
|
|
323
|
+
calculateScore(tierResults) {
|
|
324
|
+
let weightedScore = 0;
|
|
325
|
+
let totalWeight = 0;
|
|
326
|
+
for (const result of tierResults) {
|
|
327
|
+
const weight = TIER_WEIGHTS[result.tier] || 10;
|
|
328
|
+
const tierScore = result.totalTests > 0
|
|
329
|
+
? (result.passedTests / result.totalTests) * 100
|
|
330
|
+
: 0;
|
|
331
|
+
weightedScore += tierScore * weight;
|
|
332
|
+
totalWeight += weight;
|
|
333
|
+
}
|
|
334
|
+
const baseScore = totalWeight > 0 ? Math.round(weightedScore / totalWeight) : 0;
|
|
335
|
+
// Big Orange Easter Egg
|
|
336
|
+
if (baseScore === 100 && tierResults.every(tr => tr.passed)) {
|
|
337
|
+
return 105;
|
|
338
|
+
}
|
|
339
|
+
return baseScore;
|
|
340
|
+
}
|
|
341
|
+
getTestsForTier(tier) {
|
|
342
|
+
switch (tier) {
|
|
343
|
+
case 1: return this.getTier1Tests();
|
|
344
|
+
case 2: return this.getTier2Tests();
|
|
345
|
+
case 3: return this.getTier3Tests();
|
|
346
|
+
case 4: return this.getTier4Tests();
|
|
347
|
+
case 5: return this.getTier5Tests();
|
|
348
|
+
case 6: return this.getTier6Tests();
|
|
349
|
+
case 7: return this.getTier7Tests();
|
|
350
|
+
default: return [];
|
|
351
|
+
}
|
|
352
|
+
}
|
|
353
|
+
// ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
|
354
|
+
// TIER 1: PROTOCOL COMPLIANCE
|
|
355
|
+
// ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
|
356
|
+
getTier1Tests() {
|
|
357
|
+
return [
|
|
358
|
+
{
|
|
359
|
+
name: 'MUST: Server declares capabilities',
|
|
360
|
+
run: async () => {
|
|
361
|
+
const hasCaps = this.capabilities.tools || this.capabilities.resources || this.capabilities.prompts;
|
|
362
|
+
return {
|
|
363
|
+
passed: hasCaps,
|
|
364
|
+
message: hasCaps
|
|
365
|
+
? `OK (tools:${this.capabilities.tools}, resources:${this.capabilities.resources}, prompts:${this.capabilities.prompts})`
|
|
366
|
+
: 'Server declared no capabilities',
|
|
367
|
+
};
|
|
368
|
+
},
|
|
369
|
+
},
|
|
370
|
+
{
|
|
371
|
+
name: 'MUST: Tool list returns array (if tools supported)',
|
|
372
|
+
run: async () => {
|
|
373
|
+
if (!this.capabilities.tools) {
|
|
374
|
+
return { passed: true, message: 'Tools not supported (skipped)' };
|
|
375
|
+
}
|
|
376
|
+
return {
|
|
377
|
+
passed: Array.isArray(this.tools),
|
|
378
|
+
message: Array.isArray(this.tools) ? 'OK' : 'tools/list did not return array',
|
|
379
|
+
};
|
|
380
|
+
},
|
|
381
|
+
},
|
|
382
|
+
{
|
|
383
|
+
name: 'MUST: Tools have name property',
|
|
384
|
+
run: async () => {
|
|
385
|
+
if (!this.capabilities.tools || this.tools.length === 0) {
|
|
386
|
+
return { passed: true, message: 'No tools (skipped)' };
|
|
387
|
+
}
|
|
388
|
+
const allHaveName = this.tools.every(t => typeof t.name === 'string' && t.name.length > 0);
|
|
389
|
+
return {
|
|
390
|
+
passed: allHaveName,
|
|
391
|
+
message: allHaveName ? 'OK' : 'Some tools missing name',
|
|
392
|
+
};
|
|
393
|
+
},
|
|
394
|
+
},
|
|
395
|
+
{
|
|
396
|
+
name: 'MUST: Tools have description property',
|
|
397
|
+
run: async () => {
|
|
398
|
+
if (!this.capabilities.tools || this.tools.length === 0) {
|
|
399
|
+
return { passed: true, message: 'No tools (skipped)' };
|
|
400
|
+
}
|
|
401
|
+
const allHaveDesc = this.tools.every(t => typeof t.description === 'string');
|
|
402
|
+
return {
|
|
403
|
+
passed: allHaveDesc,
|
|
404
|
+
message: allHaveDesc ? 'OK' : 'Some tools missing description',
|
|
405
|
+
};
|
|
406
|
+
},
|
|
407
|
+
},
|
|
408
|
+
{
|
|
409
|
+
name: 'MUST: Tools have inputSchema property',
|
|
410
|
+
run: async () => {
|
|
411
|
+
if (!this.capabilities.tools || this.tools.length === 0) {
|
|
412
|
+
return { passed: true, message: 'No tools (skipped)' };
|
|
413
|
+
}
|
|
414
|
+
const allHaveSchema = this.tools.every(t => t.inputSchema && t.inputSchema.type === 'object');
|
|
415
|
+
return {
|
|
416
|
+
passed: allHaveSchema,
|
|
417
|
+
message: allHaveSchema ? 'OK' : 'Some tools missing/invalid inputSchema',
|
|
418
|
+
};
|
|
419
|
+
},
|
|
420
|
+
},
|
|
421
|
+
{
|
|
422
|
+
name: 'MUST: Tool call returns content array',
|
|
423
|
+
run: async () => {
|
|
424
|
+
if (!this.capabilities.tools || this.tools.length === 0) {
|
|
425
|
+
return { passed: true, message: 'No tools (skipped)' };
|
|
426
|
+
}
|
|
427
|
+
// Find a safe tool to call (prefer debug/status type tools)
|
|
428
|
+
const safeTool = this.tools.find(t => t.name.includes('debug') ||
|
|
429
|
+
t.name.includes('status') ||
|
|
430
|
+
t.name.includes('about') ||
|
|
431
|
+
t.name.includes('version')) || this.tools[0];
|
|
432
|
+
try {
|
|
433
|
+
const result = await this.callTool(safeTool.name, {});
|
|
434
|
+
const hasContent = result && Array.isArray(result.content);
|
|
435
|
+
return {
|
|
436
|
+
passed: hasContent,
|
|
437
|
+
message: hasContent ? 'OK' : 'Response missing content array',
|
|
438
|
+
};
|
|
439
|
+
}
|
|
440
|
+
catch (e) {
|
|
441
|
+
return { passed: false, message: e.message };
|
|
442
|
+
}
|
|
443
|
+
},
|
|
444
|
+
},
|
|
445
|
+
{
|
|
446
|
+
name: 'MUST: Content items have type field',
|
|
447
|
+
run: async () => {
|
|
448
|
+
if (!this.capabilities.tools || this.tools.length === 0) {
|
|
449
|
+
return { passed: true, message: 'No tools (skipped)' };
|
|
450
|
+
}
|
|
451
|
+
const safeTool = this.tools.find(t => t.name.includes('debug')) || this.tools[0];
|
|
452
|
+
try {
|
|
453
|
+
const result = await this.callTool(safeTool.name, {});
|
|
454
|
+
const allHaveType = result?.content?.every((c) => typeof c.type === 'string');
|
|
455
|
+
return {
|
|
456
|
+
passed: allHaveType,
|
|
457
|
+
message: allHaveType ? 'OK' : 'Content items missing type',
|
|
458
|
+
};
|
|
459
|
+
}
|
|
460
|
+
catch (e) {
|
|
461
|
+
return { passed: false, message: e.message };
|
|
462
|
+
}
|
|
463
|
+
},
|
|
464
|
+
},
|
|
465
|
+
{
|
|
466
|
+
name: 'MUST: Resources list works (if supported)',
|
|
467
|
+
run: async () => {
|
|
468
|
+
if (!this.capabilities.resources) {
|
|
469
|
+
return { passed: true, message: 'Resources not supported (skipped)' };
|
|
470
|
+
}
|
|
471
|
+
return {
|
|
472
|
+
passed: Array.isArray(this.resources),
|
|
473
|
+
message: Array.isArray(this.resources)
|
|
474
|
+
? `OK (${this.resources.length} resources)`
|
|
475
|
+
: 'resources/list did not return array',
|
|
476
|
+
};
|
|
477
|
+
},
|
|
478
|
+
},
|
|
479
|
+
{
|
|
480
|
+
name: 'MUST: Prompts list works (if supported)',
|
|
481
|
+
run: async () => {
|
|
482
|
+
if (!this.capabilities.prompts) {
|
|
483
|
+
return { passed: true, message: 'Prompts not supported (skipped)' };
|
|
484
|
+
}
|
|
485
|
+
return {
|
|
486
|
+
passed: Array.isArray(this.prompts),
|
|
487
|
+
message: Array.isArray(this.prompts)
|
|
488
|
+
? `OK (${this.prompts.length} prompts)`
|
|
489
|
+
: 'prompts/list did not return array',
|
|
490
|
+
};
|
|
491
|
+
},
|
|
492
|
+
},
|
|
493
|
+
{
|
|
494
|
+
name: 'MUST: Invalid method returns MethodNotFound (-32601)',
|
|
495
|
+
run: async () => {
|
|
496
|
+
try {
|
|
497
|
+
await this.connection?.send({
|
|
498
|
+
method: 'nonexistent/method',
|
|
499
|
+
params: {},
|
|
500
|
+
});
|
|
501
|
+
return { passed: false, message: 'Should have thrown error' };
|
|
502
|
+
}
|
|
503
|
+
catch (e) {
|
|
504
|
+
// Check if error message indicates method not found
|
|
505
|
+
const isMethodNotFound = e.message?.toLowerCase().includes('method') ||
|
|
506
|
+
e.message?.includes('-32601') ||
|
|
507
|
+
e.message?.toLowerCase().includes('not found') ||
|
|
508
|
+
e.message?.toLowerCase().includes('unknown');
|
|
509
|
+
return {
|
|
510
|
+
passed: isMethodNotFound,
|
|
511
|
+
message: isMethodNotFound ? 'OK - returned error' : `Wrong error: ${e.message}`,
|
|
512
|
+
};
|
|
513
|
+
}
|
|
514
|
+
},
|
|
515
|
+
},
|
|
516
|
+
{
|
|
517
|
+
name: 'MUST: Invalid params returns InvalidParams (-32602)',
|
|
518
|
+
run: async () => {
|
|
519
|
+
if (!this.capabilities.tools || this.tools.length === 0) {
|
|
520
|
+
return { passed: true, message: 'No tools (skipped)' };
|
|
521
|
+
}
|
|
522
|
+
try {
|
|
523
|
+
// Call tool with obviously wrong param type
|
|
524
|
+
await this.callTool(this.tools[0].name, { __invalid__: { nested: { bad: true } } });
|
|
525
|
+
// Some tools accept any params, so passing is OK
|
|
526
|
+
return { passed: true, message: 'OK - accepted or errored gracefully' };
|
|
527
|
+
}
|
|
528
|
+
catch (e) {
|
|
529
|
+
// Any error response is acceptable
|
|
530
|
+
return { passed: true, message: 'OK - returned error' };
|
|
531
|
+
}
|
|
532
|
+
},
|
|
533
|
+
},
|
|
534
|
+
{
|
|
535
|
+
name: 'SHOULD: Handle progress notification gracefully',
|
|
536
|
+
run: async () => {
|
|
537
|
+
// Test that server doesn't crash when receiving progress notification
|
|
538
|
+
// Progress notifications are client->server per MCP spec
|
|
539
|
+
try {
|
|
540
|
+
this.connection?.process.stdin?.write(JSON.stringify({
|
|
541
|
+
jsonrpc: '2.0',
|
|
542
|
+
method: 'notifications/progress',
|
|
543
|
+
params: {
|
|
544
|
+
progressToken: 'test-token-12345',
|
|
545
|
+
progress: 50,
|
|
546
|
+
total: 100,
|
|
547
|
+
},
|
|
548
|
+
}) + '\n');
|
|
549
|
+
// Give server time to process
|
|
550
|
+
await new Promise(resolve => setTimeout(resolve, 100));
|
|
551
|
+
// If we can still list tools, server handled it gracefully
|
|
552
|
+
const result = await this.listTools();
|
|
553
|
+
return {
|
|
554
|
+
passed: Array.isArray(result),
|
|
555
|
+
message: 'OK - server stable after progress notification',
|
|
556
|
+
};
|
|
557
|
+
}
|
|
558
|
+
catch (e) {
|
|
559
|
+
return { passed: false, message: `Server crashed: ${e.message}` };
|
|
560
|
+
}
|
|
561
|
+
},
|
|
562
|
+
},
|
|
563
|
+
];
|
|
564
|
+
}
|
|
565
|
+
// ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
|
566
|
+
// TIER 2: CAPABILITY NEGOTIATION
|
|
567
|
+
// ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
|
568
|
+
getTier2Tests() {
|
|
569
|
+
return [
|
|
570
|
+
{
|
|
571
|
+
name: 'MUST: Tool count is reasonable (1-100)',
|
|
572
|
+
run: async () => {
|
|
573
|
+
if (!this.capabilities.tools) {
|
|
574
|
+
return { passed: true, message: 'Tools not supported (skipped)' };
|
|
575
|
+
}
|
|
576
|
+
const count = this.tools.length;
|
|
577
|
+
const reasonable = count >= 1 && count <= 100;
|
|
578
|
+
return {
|
|
579
|
+
passed: reasonable,
|
|
580
|
+
message: reasonable ? `OK (${count} tools)` : `Unreasonable tool count: ${count}`,
|
|
581
|
+
};
|
|
582
|
+
},
|
|
583
|
+
},
|
|
584
|
+
{
|
|
585
|
+
name: 'SHOULD: Tool names follow conventions (lowercase_snake)',
|
|
586
|
+
run: async () => {
|
|
587
|
+
if (!this.capabilities.tools || this.tools.length === 0) {
|
|
588
|
+
return { passed: true, message: 'No tools (skipped)' };
|
|
589
|
+
}
|
|
590
|
+
const pattern = /^[a-z][a-z0-9_]*$/;
|
|
591
|
+
const valid = this.tools.filter(t => pattern.test(t.name));
|
|
592
|
+
const ratio = valid.length / this.tools.length;
|
|
593
|
+
return {
|
|
594
|
+
passed: ratio >= 0.8,
|
|
595
|
+
message: ratio >= 0.8 ? 'OK' : `Only ${Math.round(ratio * 100)}% follow naming convention`,
|
|
596
|
+
};
|
|
597
|
+
},
|
|
598
|
+
},
|
|
599
|
+
{
|
|
600
|
+
name: 'SHOULD: Schemas have properties defined',
|
|
601
|
+
run: async () => {
|
|
602
|
+
if (!this.capabilities.tools || this.tools.length === 0) {
|
|
603
|
+
return { passed: true, message: 'No tools (skipped)' };
|
|
604
|
+
}
|
|
605
|
+
const withProps = this.tools.filter(t => t.inputSchema?.properties);
|
|
606
|
+
const ratio = withProps.length / this.tools.length;
|
|
607
|
+
return {
|
|
608
|
+
passed: ratio >= 0.8,
|
|
609
|
+
message: ratio >= 0.8 ? 'OK' : `Only ${Math.round(ratio * 100)}% have schema properties`,
|
|
610
|
+
};
|
|
611
|
+
},
|
|
612
|
+
},
|
|
613
|
+
{
|
|
614
|
+
name: 'MUST: Resources have valid URIs (if supported)',
|
|
615
|
+
run: async () => {
|
|
616
|
+
if (!this.capabilities.resources || this.resources.length === 0) {
|
|
617
|
+
return { passed: true, message: 'No resources (skipped)' };
|
|
618
|
+
}
|
|
619
|
+
const allHaveUri = this.resources.every(r => typeof r.uri === 'string' && r.uri.length > 0);
|
|
620
|
+
return {
|
|
621
|
+
passed: allHaveUri,
|
|
622
|
+
message: allHaveUri ? `OK (${this.resources.length} resources)` : 'Some resources missing uri',
|
|
623
|
+
};
|
|
624
|
+
},
|
|
625
|
+
},
|
|
626
|
+
{
|
|
627
|
+
name: 'MUST: Prompts have valid names (if supported)',
|
|
628
|
+
run: async () => {
|
|
629
|
+
if (!this.capabilities.prompts || this.prompts.length === 0) {
|
|
630
|
+
return { passed: true, message: 'No prompts (skipped)' };
|
|
631
|
+
}
|
|
632
|
+
const allHaveName = this.prompts.every(p => typeof p.name === 'string' && p.name.length > 0);
|
|
633
|
+
return {
|
|
634
|
+
passed: allHaveName,
|
|
635
|
+
message: allHaveName ? `OK (${this.prompts.length} prompts)` : 'Some prompts missing name',
|
|
636
|
+
};
|
|
637
|
+
},
|
|
638
|
+
},
|
|
639
|
+
{
|
|
640
|
+
name: 'SHOULD: Support pagination cursor (if many items)',
|
|
641
|
+
run: async () => {
|
|
642
|
+
// Test if server properly handles cursor parameter
|
|
643
|
+
// This is a SHOULD because pagination is optional for small lists
|
|
644
|
+
if (!this.capabilities.tools) {
|
|
645
|
+
return { passed: true, message: 'Tools not supported (skipped)' };
|
|
646
|
+
}
|
|
647
|
+
try {
|
|
648
|
+
const result = await this.connection?.send({
|
|
649
|
+
method: 'tools/list',
|
|
650
|
+
params: { cursor: 'invalid_cursor_12345' },
|
|
651
|
+
});
|
|
652
|
+
// Server should either ignore invalid cursor or return error
|
|
653
|
+
// Both are acceptable - we're just checking it doesn't crash
|
|
654
|
+
return {
|
|
655
|
+
passed: true,
|
|
656
|
+
message: result?.tools ? 'OK - handled cursor param' : 'OK - graceful handling',
|
|
657
|
+
};
|
|
658
|
+
}
|
|
659
|
+
catch {
|
|
660
|
+
// Error is also acceptable for invalid cursor
|
|
661
|
+
return { passed: true, message: 'OK - rejected invalid cursor' };
|
|
662
|
+
}
|
|
663
|
+
},
|
|
664
|
+
},
|
|
665
|
+
{
|
|
666
|
+
name: 'SHOULD: Handle logging/setLevel if logging supported',
|
|
667
|
+
run: async () => {
|
|
668
|
+
if (!this.capabilities.logging) {
|
|
669
|
+
return { passed: true, message: 'Logging not supported (skipped)' };
|
|
670
|
+
}
|
|
671
|
+
try {
|
|
672
|
+
// Try to set log level - server should accept or reject gracefully
|
|
673
|
+
await this.connection?.send({
|
|
674
|
+
method: 'logging/setLevel',
|
|
675
|
+
params: { level: 'debug' },
|
|
676
|
+
});
|
|
677
|
+
return { passed: true, message: 'OK - accepted setLevel' };
|
|
678
|
+
}
|
|
679
|
+
catch {
|
|
680
|
+
// Rejection is also valid
|
|
681
|
+
return { passed: true, message: 'OK - rejected setLevel gracefully' };
|
|
682
|
+
}
|
|
683
|
+
},
|
|
684
|
+
},
|
|
685
|
+
{
|
|
686
|
+
name: 'SHOULD: Prompts have valid arguments schema',
|
|
687
|
+
run: async () => {
|
|
688
|
+
if (!this.capabilities.prompts || this.prompts.length === 0) {
|
|
689
|
+
return { passed: true, message: 'No prompts (skipped)' };
|
|
690
|
+
}
|
|
691
|
+
// Check if prompts have arguments defined properly
|
|
692
|
+
const withArgs = this.prompts.filter(p => p.arguments === undefined || // No args is valid
|
|
693
|
+
(Array.isArray(p.arguments) && p.arguments.every((a) => typeof a.name === 'string' && a.name.length > 0)));
|
|
694
|
+
const ratio = withArgs.length / this.prompts.length;
|
|
695
|
+
return {
|
|
696
|
+
passed: ratio >= 0.8,
|
|
697
|
+
message: ratio >= 0.8
|
|
698
|
+
? `OK (${Math.round(ratio * 100)}% have valid args)`
|
|
699
|
+
: `Only ${Math.round(ratio * 100)}% have valid argument schemas`,
|
|
700
|
+
};
|
|
701
|
+
},
|
|
702
|
+
},
|
|
703
|
+
];
|
|
704
|
+
}
|
|
705
|
+
// ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
|
706
|
+
// TIER 3: TOOL INTEGRITY
|
|
707
|
+
// ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
|
708
|
+
getTier3Tests() {
|
|
709
|
+
return [
|
|
710
|
+
{
|
|
711
|
+
name: 'MUST: Read tool works (if exists)',
|
|
712
|
+
run: async () => {
|
|
713
|
+
const readTool = this.tools.find(t => t.name.includes('read'));
|
|
714
|
+
if (!readTool) {
|
|
715
|
+
return { passed: true, message: 'No read tool (skipped)' };
|
|
716
|
+
}
|
|
717
|
+
// Create test file
|
|
718
|
+
const testFile = path.join(this.testDir, 'read-test.txt');
|
|
719
|
+
fs.writeFileSync(testFile, 'WJTTC Test Content');
|
|
720
|
+
try {
|
|
721
|
+
const result = await this.callTool(readTool.name, { path: testFile });
|
|
722
|
+
const hasContent = result?.content?.[0]?.text?.includes('WJTTC');
|
|
723
|
+
return {
|
|
724
|
+
passed: hasContent,
|
|
725
|
+
message: hasContent ? 'OK' : 'Read did not return expected content',
|
|
726
|
+
};
|
|
727
|
+
}
|
|
728
|
+
catch (e) {
|
|
729
|
+
return { passed: false, message: e.message };
|
|
730
|
+
}
|
|
731
|
+
},
|
|
732
|
+
},
|
|
733
|
+
{
|
|
734
|
+
name: 'MUST: Write tool works (if exists)',
|
|
735
|
+
run: async () => {
|
|
736
|
+
const writeTool = this.tools.find(t => t.name.includes('write'));
|
|
737
|
+
if (!writeTool) {
|
|
738
|
+
return { passed: true, message: 'No write tool (skipped)' };
|
|
739
|
+
}
|
|
740
|
+
const testFile = path.join(this.testDir, 'write-test.txt');
|
|
741
|
+
const content = 'WJTTC Write Test';
|
|
742
|
+
try {
|
|
743
|
+
await this.callTool(writeTool.name, { path: testFile, content });
|
|
744
|
+
const exists = fs.existsSync(testFile);
|
|
745
|
+
const matches = exists && fs.readFileSync(testFile, 'utf-8') === content;
|
|
746
|
+
return {
|
|
747
|
+
passed: matches,
|
|
748
|
+
message: matches ? 'OK' : 'Write did not create expected file',
|
|
749
|
+
};
|
|
750
|
+
}
|
|
751
|
+
catch (e) {
|
|
752
|
+
return { passed: false, message: e.message };
|
|
753
|
+
}
|
|
754
|
+
},
|
|
755
|
+
},
|
|
756
|
+
{
|
|
757
|
+
name: 'MUST: Handle missing files gracefully',
|
|
758
|
+
run: async () => {
|
|
759
|
+
const readTool = this.tools.find(t => t.name.includes('read'));
|
|
760
|
+
if (!readTool) {
|
|
761
|
+
return { passed: true, message: 'No read tool (skipped)' };
|
|
762
|
+
}
|
|
763
|
+
try {
|
|
764
|
+
const result = await this.callTool(readTool.name, {
|
|
765
|
+
path: '/nonexistent/path/12345.txt'
|
|
766
|
+
});
|
|
767
|
+
// Should return error content, not crash
|
|
768
|
+
return {
|
|
769
|
+
passed: result?.content !== undefined,
|
|
770
|
+
message: 'OK - handled gracefully',
|
|
771
|
+
};
|
|
772
|
+
}
|
|
773
|
+
catch {
|
|
774
|
+
// Throwing is also acceptable
|
|
775
|
+
return { passed: true, message: 'OK - threw error' };
|
|
776
|
+
}
|
|
777
|
+
},
|
|
778
|
+
},
|
|
779
|
+
{
|
|
780
|
+
name: 'SHOULD: Handle cancellation notification',
|
|
781
|
+
run: async () => {
|
|
782
|
+
// Test that server doesn't crash when receiving cancellation
|
|
783
|
+
// This is a SHOULD because not all operations are cancellable
|
|
784
|
+
try {
|
|
785
|
+
// Send a cancellation notification for a non-existent request
|
|
786
|
+
this.connection?.process.stdin?.write(JSON.stringify({
|
|
787
|
+
jsonrpc: '2.0',
|
|
788
|
+
method: 'notifications/cancelled',
|
|
789
|
+
params: { requestId: 99999, reason: 'test cancellation' },
|
|
790
|
+
}) + '\n');
|
|
791
|
+
// Give server time to process
|
|
792
|
+
await new Promise(resolve => setTimeout(resolve, 100));
|
|
793
|
+
// If we can still list tools, server handled it gracefully
|
|
794
|
+
const result = await this.listTools();
|
|
795
|
+
return {
|
|
796
|
+
passed: Array.isArray(result),
|
|
797
|
+
message: 'OK - server stable after cancellation',
|
|
798
|
+
};
|
|
799
|
+
}
|
|
800
|
+
catch (e) {
|
|
801
|
+
return { passed: false, message: `Server crashed: ${e.message}` };
|
|
802
|
+
}
|
|
803
|
+
},
|
|
804
|
+
},
|
|
805
|
+
{
|
|
806
|
+
name: 'MUST: Recover from timeout gracefully',
|
|
807
|
+
run: async () => {
|
|
808
|
+
// TEST THE TESTING: Fire a request with short timeout, then verify server recovers
|
|
809
|
+
try {
|
|
810
|
+
// Create a promise that rejects quickly (simulating timeout scenario)
|
|
811
|
+
const shortTimeoutPromise = new Promise((_, reject) => {
|
|
812
|
+
setTimeout(() => reject(new Error('Simulated timeout')), 10);
|
|
813
|
+
});
|
|
814
|
+
// Race against actual tool call - we expect this to fail
|
|
815
|
+
try {
|
|
816
|
+
await Promise.race([
|
|
817
|
+
this.listTools(),
|
|
818
|
+
shortTimeoutPromise,
|
|
819
|
+
]);
|
|
820
|
+
}
|
|
821
|
+
catch {
|
|
822
|
+
// Expected timeout - continue
|
|
823
|
+
}
|
|
824
|
+
// Now verify server is still responsive after the "timeout"
|
|
825
|
+
await new Promise(resolve => setTimeout(resolve, 100));
|
|
826
|
+
const result = await this.listTools();
|
|
827
|
+
return {
|
|
828
|
+
passed: Array.isArray(result),
|
|
829
|
+
message: 'OK - server stable after timeout scenario',
|
|
830
|
+
};
|
|
831
|
+
}
|
|
832
|
+
catch (e) {
|
|
833
|
+
return { passed: false, message: `Server unstable: ${e.message}` };
|
|
834
|
+
}
|
|
835
|
+
},
|
|
836
|
+
},
|
|
837
|
+
{
|
|
838
|
+
name: 'MUST: Handle empty params object',
|
|
839
|
+
run: async () => {
|
|
840
|
+
if (!this.capabilities.tools || this.tools.length === 0) {
|
|
841
|
+
return { passed: true, message: 'No tools (skipped)' };
|
|
842
|
+
}
|
|
843
|
+
// Some tools should accept empty params
|
|
844
|
+
const safeTool = this.tools.find(t => t.name.includes('debug') ||
|
|
845
|
+
t.name.includes('status') ||
|
|
846
|
+
t.name.includes('list')) || this.tools[0];
|
|
847
|
+
try {
|
|
848
|
+
const result = await this.callTool(safeTool.name, {});
|
|
849
|
+
return {
|
|
850
|
+
passed: result !== undefined,
|
|
851
|
+
message: 'OK - handled empty params',
|
|
852
|
+
};
|
|
853
|
+
}
|
|
854
|
+
catch {
|
|
855
|
+
// Some tools require params - that's OK
|
|
856
|
+
return { passed: true, message: 'OK - rejected empty params gracefully' };
|
|
857
|
+
}
|
|
858
|
+
},
|
|
859
|
+
},
|
|
860
|
+
];
|
|
861
|
+
}
|
|
862
|
+
// ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
|
863
|
+
// TIER 4: RESOURCE MANAGEMENT
|
|
864
|
+
// ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
|
865
|
+
getTier4Tests() {
|
|
866
|
+
return [
|
|
867
|
+
{
|
|
868
|
+
name: 'MUST: Handle text files correctly',
|
|
869
|
+
run: async () => {
|
|
870
|
+
const readTool = this.tools.find(t => t.name.includes('read'));
|
|
871
|
+
if (!readTool)
|
|
872
|
+
return { passed: true, message: 'No read tool (skipped)' };
|
|
873
|
+
const testFile = path.join(this.testDir, 'text.txt');
|
|
874
|
+
fs.writeFileSync(testFile, 'Plain text content');
|
|
875
|
+
try {
|
|
876
|
+
const result = await this.callTool(readTool.name, { path: testFile });
|
|
877
|
+
return {
|
|
878
|
+
passed: result?.content?.[0]?.text === 'Plain text content',
|
|
879
|
+
message: 'OK',
|
|
880
|
+
};
|
|
881
|
+
}
|
|
882
|
+
catch (e) {
|
|
883
|
+
return { passed: false, message: e.message };
|
|
884
|
+
}
|
|
885
|
+
},
|
|
886
|
+
},
|
|
887
|
+
{
|
|
888
|
+
name: 'MUST: Handle JSON files correctly',
|
|
889
|
+
run: async () => {
|
|
890
|
+
const readTool = this.tools.find(t => t.name.includes('read'));
|
|
891
|
+
if (!readTool)
|
|
892
|
+
return { passed: true, message: 'No read tool (skipped)' };
|
|
893
|
+
const testFile = path.join(this.testDir, 'data.json');
|
|
894
|
+
fs.writeFileSync(testFile, '{"test": true}');
|
|
895
|
+
try {
|
|
896
|
+
const result = await this.callTool(readTool.name, { path: testFile });
|
|
897
|
+
const text = result?.content?.[0]?.text;
|
|
898
|
+
const parsed = JSON.parse(text);
|
|
899
|
+
return {
|
|
900
|
+
passed: parsed.test === true,
|
|
901
|
+
message: 'OK',
|
|
902
|
+
};
|
|
903
|
+
}
|
|
904
|
+
catch (e) {
|
|
905
|
+
return { passed: false, message: e.message };
|
|
906
|
+
}
|
|
907
|
+
},
|
|
908
|
+
},
|
|
909
|
+
{
|
|
910
|
+
name: 'SHOULD: Handle large files (<1s for 1MB)',
|
|
911
|
+
run: async () => {
|
|
912
|
+
const readTool = this.tools.find(t => t.name.includes('read'));
|
|
913
|
+
if (!readTool)
|
|
914
|
+
return { passed: true, message: 'No read tool (skipped)' };
|
|
915
|
+
const largeFile = path.join(this.testDir, 'large.txt');
|
|
916
|
+
fs.writeFileSync(largeFile, 'X'.repeat(1024 * 1024)); // 1MB
|
|
917
|
+
const start = perf_hooks_1.performance.now();
|
|
918
|
+
try {
|
|
919
|
+
await this.callTool(readTool.name, { path: largeFile });
|
|
920
|
+
const duration = perf_hooks_1.performance.now() - start;
|
|
921
|
+
return {
|
|
922
|
+
passed: duration < 1000,
|
|
923
|
+
message: duration < 1000 ? `OK (${Math.round(duration)}ms)` : `Too slow: ${Math.round(duration)}ms`,
|
|
924
|
+
};
|
|
925
|
+
}
|
|
926
|
+
catch (e) {
|
|
927
|
+
return { passed: false, message: e.message };
|
|
928
|
+
}
|
|
929
|
+
},
|
|
930
|
+
},
|
|
931
|
+
{
|
|
932
|
+
name: 'MUST: Survive write of large payload (2MB)',
|
|
933
|
+
run: async () => {
|
|
934
|
+
const writeTool = this.tools.find(t => t.name.includes('write'));
|
|
935
|
+
if (!writeTool)
|
|
936
|
+
return { passed: true, message: 'No write tool (skipped)' };
|
|
937
|
+
const largeFile = path.join(this.testDir, 'large-write.txt');
|
|
938
|
+
const content = 'W'.repeat(2 * 1024 * 1024); // 2MB
|
|
939
|
+
const start = perf_hooks_1.performance.now();
|
|
940
|
+
try {
|
|
941
|
+
await this.callTool(writeTool.name, { path: largeFile, content });
|
|
942
|
+
const duration = perf_hooks_1.performance.now() - start;
|
|
943
|
+
const exists = fs.existsSync(largeFile);
|
|
944
|
+
const size = exists ? fs.statSync(largeFile).size : 0;
|
|
945
|
+
return {
|
|
946
|
+
passed: exists && size >= 2 * 1024 * 1024 - 100, // allow small variance
|
|
947
|
+
message: exists
|
|
948
|
+
? `OK (${Math.round(duration)}ms, ${Math.round(size / 1024 / 1024)}MB written)`
|
|
949
|
+
: 'File not created',
|
|
950
|
+
};
|
|
951
|
+
}
|
|
952
|
+
catch (e) {
|
|
953
|
+
// Server might reject large payloads - that's acceptable
|
|
954
|
+
return { passed: true, message: 'OK - rejected large payload gracefully' };
|
|
955
|
+
}
|
|
956
|
+
},
|
|
957
|
+
},
|
|
958
|
+
{
|
|
959
|
+
name: 'SHOULD: Handle resource subscription (if supported)',
|
|
960
|
+
run: async () => {
|
|
961
|
+
if (!this.capabilities.resources) {
|
|
962
|
+
return { passed: true, message: 'Resources not supported (skipped)' };
|
|
963
|
+
}
|
|
964
|
+
// Test resources/subscribe - server should accept or reject gracefully
|
|
965
|
+
try {
|
|
966
|
+
await this.connection?.send({
|
|
967
|
+
method: 'resources/subscribe',
|
|
968
|
+
params: { uri: 'test://example/resource' },
|
|
969
|
+
});
|
|
970
|
+
return { passed: true, message: 'OK - accepted subscribe' };
|
|
971
|
+
}
|
|
972
|
+
catch {
|
|
973
|
+
// Rejection is valid - not all servers support subscriptions
|
|
974
|
+
return { passed: true, message: 'OK - rejected subscribe gracefully' };
|
|
975
|
+
}
|
|
976
|
+
},
|
|
977
|
+
},
|
|
978
|
+
{
|
|
979
|
+
name: 'SHOULD: Read resource by URI (if resources exist)',
|
|
980
|
+
run: async () => {
|
|
981
|
+
if (!this.capabilities.resources || this.resources.length === 0) {
|
|
982
|
+
return { passed: true, message: 'No resources (skipped)' };
|
|
983
|
+
}
|
|
984
|
+
// Try to read first resource
|
|
985
|
+
const firstResource = this.resources[0];
|
|
986
|
+
try {
|
|
987
|
+
const result = await this.readResource(firstResource.uri);
|
|
988
|
+
const hasContents = result?.contents && Array.isArray(result.contents);
|
|
989
|
+
return {
|
|
990
|
+
passed: hasContents,
|
|
991
|
+
message: hasContents
|
|
992
|
+
? `OK - read resource ${firstResource.uri}`
|
|
993
|
+
: 'Resource read returned no contents',
|
|
994
|
+
};
|
|
995
|
+
}
|
|
996
|
+
catch (e) {
|
|
997
|
+
// Some resources may require auth or special setup
|
|
998
|
+
return { passed: true, message: `OK - resource unavailable: ${e.message}` };
|
|
999
|
+
}
|
|
1000
|
+
},
|
|
1001
|
+
},
|
|
1002
|
+
];
|
|
1003
|
+
}
|
|
1004
|
+
// ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
|
1005
|
+
// TIER 5: SECURITY VALIDATION
|
|
1006
|
+
// ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
|
1007
|
+
getTier5Tests() {
|
|
1008
|
+
return [
|
|
1009
|
+
{
|
|
1010
|
+
name: 'MUST: Block path traversal attempts',
|
|
1011
|
+
run: async () => {
|
|
1012
|
+
const readTool = this.tools.find(t => t.name.includes('read'));
|
|
1013
|
+
if (!readTool)
|
|
1014
|
+
return { passed: true, message: 'No read tool (skipped)' };
|
|
1015
|
+
const dangerousPaths = [
|
|
1016
|
+
'../../../etc/passwd',
|
|
1017
|
+
'/etc/shadow',
|
|
1018
|
+
];
|
|
1019
|
+
for (const badPath of dangerousPaths) {
|
|
1020
|
+
try {
|
|
1021
|
+
const result = await this.callTool(readTool.name, { path: badPath });
|
|
1022
|
+
const text = result?.content?.[0]?.text || '';
|
|
1023
|
+
if (text.includes('root:') || text.includes('BEGIN RSA')) {
|
|
1024
|
+
return { passed: false, message: `Leaked sensitive file: ${badPath}` };
|
|
1025
|
+
}
|
|
1026
|
+
}
|
|
1027
|
+
catch {
|
|
1028
|
+
// Expected to fail
|
|
1029
|
+
}
|
|
1030
|
+
}
|
|
1031
|
+
return { passed: true, message: 'OK - blocked dangerous paths' };
|
|
1032
|
+
},
|
|
1033
|
+
},
|
|
1034
|
+
{
|
|
1035
|
+
name: 'MUST: Sanitize command injection',
|
|
1036
|
+
run: async () => {
|
|
1037
|
+
const readTool = this.tools.find(t => t.name.includes('read'));
|
|
1038
|
+
if (!readTool)
|
|
1039
|
+
return { passed: true, message: 'No read tool (skipped)' };
|
|
1040
|
+
const malicious = [
|
|
1041
|
+
'file.txt; rm -rf /',
|
|
1042
|
+
'$(whoami)',
|
|
1043
|
+
'`id`',
|
|
1044
|
+
];
|
|
1045
|
+
for (const input of malicious) {
|
|
1046
|
+
try {
|
|
1047
|
+
await this.callTool(readTool.name, { path: input });
|
|
1048
|
+
}
|
|
1049
|
+
catch {
|
|
1050
|
+
// Expected to fail safely
|
|
1051
|
+
}
|
|
1052
|
+
}
|
|
1053
|
+
return { passed: true, message: 'OK - no command injection' };
|
|
1054
|
+
},
|
|
1055
|
+
},
|
|
1056
|
+
{
|
|
1057
|
+
name: 'MUST NOT: Leak absolute paths in errors',
|
|
1058
|
+
run: async () => {
|
|
1059
|
+
const readTool = this.tools.find(t => t.name.includes('read'));
|
|
1060
|
+
if (!readTool)
|
|
1061
|
+
return { passed: true, message: 'No read tool (skipped)' };
|
|
1062
|
+
try {
|
|
1063
|
+
const result = await this.callTool(readTool.name, {
|
|
1064
|
+
path: '/nonexistent/secret/path.txt'
|
|
1065
|
+
});
|
|
1066
|
+
const text = JSON.stringify(result);
|
|
1067
|
+
const leaks = text.includes('/Users/') || text.includes('/home/');
|
|
1068
|
+
return {
|
|
1069
|
+
passed: !leaks,
|
|
1070
|
+
message: leaks ? 'Leaked absolute paths' : 'OK',
|
|
1071
|
+
};
|
|
1072
|
+
}
|
|
1073
|
+
catch (e) {
|
|
1074
|
+
const leaks = e.message?.includes('/Users/') || e.message?.includes('/home/');
|
|
1075
|
+
return {
|
|
1076
|
+
passed: !leaks,
|
|
1077
|
+
message: leaks ? 'Leaked paths in error' : 'OK',
|
|
1078
|
+
};
|
|
1079
|
+
}
|
|
1080
|
+
},
|
|
1081
|
+
},
|
|
1082
|
+
];
|
|
1083
|
+
}
|
|
1084
|
+
// ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
|
1085
|
+
// TIER 6: PERFORMANCE BENCHMARKS
|
|
1086
|
+
// ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
|
1087
|
+
getTier6Tests() {
|
|
1088
|
+
const targets = this.options.performanceTargets;
|
|
1089
|
+
return [
|
|
1090
|
+
{
|
|
1091
|
+
name: `MUST: List tools in <${targets.toolList}ms`,
|
|
1092
|
+
run: async () => {
|
|
1093
|
+
const start = perf_hooks_1.performance.now();
|
|
1094
|
+
await this.listTools();
|
|
1095
|
+
const duration = perf_hooks_1.performance.now() - start;
|
|
1096
|
+
return {
|
|
1097
|
+
passed: duration < targets.toolList,
|
|
1098
|
+
message: duration < targets.toolList
|
|
1099
|
+
? `OK (${Math.round(duration)}ms)`
|
|
1100
|
+
: `Too slow: ${Math.round(duration)}ms`,
|
|
1101
|
+
};
|
|
1102
|
+
},
|
|
1103
|
+
},
|
|
1104
|
+
{
|
|
1105
|
+
name: `MUST: Call tool in <${targets.toolCall}ms`,
|
|
1106
|
+
run: async () => {
|
|
1107
|
+
const safeTool = this.tools.find(t => t.name.includes('debug')) || this.tools[0];
|
|
1108
|
+
if (!safeTool)
|
|
1109
|
+
return { passed: true, message: 'No tools (skipped)' };
|
|
1110
|
+
const start = perf_hooks_1.performance.now();
|
|
1111
|
+
await this.callTool(safeTool.name, {});
|
|
1112
|
+
const duration = perf_hooks_1.performance.now() - start;
|
|
1113
|
+
return {
|
|
1114
|
+
passed: duration < targets.toolCall,
|
|
1115
|
+
message: duration < targets.toolCall
|
|
1116
|
+
? `OK (${Math.round(duration)}ms)`
|
|
1117
|
+
: `Too slow: ${Math.round(duration)}ms`,
|
|
1118
|
+
};
|
|
1119
|
+
},
|
|
1120
|
+
},
|
|
1121
|
+
{
|
|
1122
|
+
name: 'MUST: Handle 10 concurrent ops in <500ms',
|
|
1123
|
+
run: async () => {
|
|
1124
|
+
const safeTool = this.tools.find(t => t.name.includes('debug')) || this.tools[0];
|
|
1125
|
+
if (!safeTool)
|
|
1126
|
+
return { passed: true, message: 'No tools (skipped)' };
|
|
1127
|
+
const start = perf_hooks_1.performance.now();
|
|
1128
|
+
const promises = Array(10).fill(null).map(() => this.callTool(safeTool.name, {}));
|
|
1129
|
+
await Promise.all(promises);
|
|
1130
|
+
const duration = perf_hooks_1.performance.now() - start;
|
|
1131
|
+
return {
|
|
1132
|
+
passed: duration < 500,
|
|
1133
|
+
message: duration < 500
|
|
1134
|
+
? `OK (${Math.round(duration)}ms)`
|
|
1135
|
+
: `Too slow: ${Math.round(duration)}ms`,
|
|
1136
|
+
};
|
|
1137
|
+
},
|
|
1138
|
+
},
|
|
1139
|
+
{
|
|
1140
|
+
name: 'MUST: Handle 50 concurrent ops in <2000ms',
|
|
1141
|
+
run: async () => {
|
|
1142
|
+
const safeTool = this.tools.find(t => t.name.includes('debug')) || this.tools[0];
|
|
1143
|
+
if (!safeTool)
|
|
1144
|
+
return { passed: true, message: 'No tools (skipped)' };
|
|
1145
|
+
const start = perf_hooks_1.performance.now();
|
|
1146
|
+
const promises = Array(50).fill(null).map(() => this.callTool(safeTool.name, {}));
|
|
1147
|
+
await Promise.all(promises);
|
|
1148
|
+
const duration = perf_hooks_1.performance.now() - start;
|
|
1149
|
+
return {
|
|
1150
|
+
passed: duration < 2000,
|
|
1151
|
+
message: duration < 2000
|
|
1152
|
+
? `OK (${Math.round(duration)}ms for 50 ops)`
|
|
1153
|
+
: `Too slow: ${Math.round(duration)}ms`,
|
|
1154
|
+
};
|
|
1155
|
+
},
|
|
1156
|
+
},
|
|
1157
|
+
{
|
|
1158
|
+
name: 'MUST: Survive malformed JSON-RPC request',
|
|
1159
|
+
run: async () => {
|
|
1160
|
+
// TEST THE TESTING: Send malformed JSON and verify server survives
|
|
1161
|
+
try {
|
|
1162
|
+
// Send malformed JSON directly to stdin
|
|
1163
|
+
this.connection?.process.stdin?.write('{"jsonrpc": "2.0", "id": "not-a-number", "method": "tools/list"}\n');
|
|
1164
|
+
this.connection?.process.stdin?.write('{invalid json syntax\n');
|
|
1165
|
+
this.connection?.process.stdin?.write('{"missing": "method"}\n');
|
|
1166
|
+
// Give server time to process
|
|
1167
|
+
await new Promise(resolve => setTimeout(resolve, 200));
|
|
1168
|
+
// If we can still list tools, server survived
|
|
1169
|
+
const result = await this.listTools();
|
|
1170
|
+
return {
|
|
1171
|
+
passed: Array.isArray(result),
|
|
1172
|
+
message: 'OK - server survived malformed JSON',
|
|
1173
|
+
};
|
|
1174
|
+
}
|
|
1175
|
+
catch (e) {
|
|
1176
|
+
return { passed: false, message: `Server crashed: ${e.message}` };
|
|
1177
|
+
}
|
|
1178
|
+
},
|
|
1179
|
+
},
|
|
1180
|
+
{
|
|
1181
|
+
name: 'MUST: Handle rapid sequential requests',
|
|
1182
|
+
run: async () => {
|
|
1183
|
+
const safeTool = this.tools.find(t => t.name.includes('debug')) || this.tools[0];
|
|
1184
|
+
if (!safeTool)
|
|
1185
|
+
return { passed: true, message: 'No tools (skipped)' };
|
|
1186
|
+
// Fire 20 requests as fast as possible sequentially
|
|
1187
|
+
const start = perf_hooks_1.performance.now();
|
|
1188
|
+
for (let i = 0; i < 20; i++) {
|
|
1189
|
+
await this.callTool(safeTool.name, {});
|
|
1190
|
+
}
|
|
1191
|
+
const duration = perf_hooks_1.performance.now() - start;
|
|
1192
|
+
return {
|
|
1193
|
+
passed: duration < 3000,
|
|
1194
|
+
message: duration < 3000
|
|
1195
|
+
? `OK (${Math.round(duration)}ms for 20 sequential)`
|
|
1196
|
+
: `Too slow: ${Math.round(duration)}ms`,
|
|
1197
|
+
};
|
|
1198
|
+
},
|
|
1199
|
+
},
|
|
1200
|
+
];
|
|
1201
|
+
}
|
|
1202
|
+
// ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
|
1203
|
+
// TIER 7: INTEGRATION READINESS
|
|
1204
|
+
// ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
|
1205
|
+
getTier7Tests() {
|
|
1206
|
+
return [
|
|
1207
|
+
{
|
|
1208
|
+
name: 'MUST: All tools follow naming conventions',
|
|
1209
|
+
run: async () => {
|
|
1210
|
+
if (!this.capabilities.tools || this.tools.length === 0) {
|
|
1211
|
+
return { passed: true, message: 'No tools (skipped)' };
|
|
1212
|
+
}
|
|
1213
|
+
const pattern = /^[a-z][a-z0-9_]*$/;
|
|
1214
|
+
const invalid = this.tools.filter(t => !pattern.test(t.name));
|
|
1215
|
+
return {
|
|
1216
|
+
passed: invalid.length === 0,
|
|
1217
|
+
message: invalid.length === 0
|
|
1218
|
+
? 'OK'
|
|
1219
|
+
: `Invalid names: ${invalid.map(t => t.name).join(', ')}`,
|
|
1220
|
+
};
|
|
1221
|
+
},
|
|
1222
|
+
},
|
|
1223
|
+
{
|
|
1224
|
+
name: 'MUST: Descriptions are informative (>10 chars)',
|
|
1225
|
+
run: async () => {
|
|
1226
|
+
if (!this.capabilities.tools || this.tools.length === 0) {
|
|
1227
|
+
return { passed: true, message: 'No tools (skipped)' };
|
|
1228
|
+
}
|
|
1229
|
+
const short = this.tools.filter(t => !t.description || t.description.length < 10);
|
|
1230
|
+
return {
|
|
1231
|
+
passed: short.length === 0,
|
|
1232
|
+
message: short.length === 0
|
|
1233
|
+
? 'OK'
|
|
1234
|
+
: `${short.length} tools have short/missing descriptions`,
|
|
1235
|
+
};
|
|
1236
|
+
},
|
|
1237
|
+
},
|
|
1238
|
+
{
|
|
1239
|
+
name: 'SHOULD: Include emoji indicators',
|
|
1240
|
+
run: async () => {
|
|
1241
|
+
if (!this.capabilities.tools || this.tools.length === 0) {
|
|
1242
|
+
return { passed: true, message: 'No tools (skipped)' };
|
|
1243
|
+
}
|
|
1244
|
+
const emojiPattern = /[\u{1F300}-\u{1F9FF}]/u;
|
|
1245
|
+
const withEmoji = this.tools.filter(t => emojiPattern.test(t.description || ''));
|
|
1246
|
+
const ratio = withEmoji.length / this.tools.length;
|
|
1247
|
+
return {
|
|
1248
|
+
passed: ratio >= 0.5,
|
|
1249
|
+
message: ratio >= 0.5
|
|
1250
|
+
? `OK (${Math.round(ratio * 100)}% have emoji)`
|
|
1251
|
+
: `Only ${Math.round(ratio * 100)}% have emoji`,
|
|
1252
|
+
};
|
|
1253
|
+
},
|
|
1254
|
+
},
|
|
1255
|
+
{
|
|
1256
|
+
name: 'SHOULD: Resources have descriptions',
|
|
1257
|
+
run: async () => {
|
|
1258
|
+
if (!this.capabilities.resources || this.resources.length === 0) {
|
|
1259
|
+
return { passed: true, message: 'No resources (skipped)' };
|
|
1260
|
+
}
|
|
1261
|
+
const withDesc = this.resources.filter(r => r.description && r.description.length > 0);
|
|
1262
|
+
const ratio = withDesc.length / this.resources.length;
|
|
1263
|
+
return {
|
|
1264
|
+
passed: ratio >= 0.5,
|
|
1265
|
+
message: ratio >= 0.5
|
|
1266
|
+
? `OK (${Math.round(ratio * 100)}% have descriptions)`
|
|
1267
|
+
: `Only ${Math.round(ratio * 100)}% have descriptions`,
|
|
1268
|
+
};
|
|
1269
|
+
},
|
|
1270
|
+
},
|
|
1271
|
+
{
|
|
1272
|
+
name: 'SHOULD: Prompts have descriptions',
|
|
1273
|
+
run: async () => {
|
|
1274
|
+
if (!this.capabilities.prompts || this.prompts.length === 0) {
|
|
1275
|
+
return { passed: true, message: 'No prompts (skipped)' };
|
|
1276
|
+
}
|
|
1277
|
+
const withDesc = this.prompts.filter(p => p.description && p.description.length > 0);
|
|
1278
|
+
const ratio = withDesc.length / this.prompts.length;
|
|
1279
|
+
return {
|
|
1280
|
+
passed: ratio >= 0.5,
|
|
1281
|
+
message: ratio >= 0.5
|
|
1282
|
+
? `OK (${Math.round(ratio * 100)}% have descriptions)`
|
|
1283
|
+
: `Only ${Math.round(ratio * 100)}% have descriptions`,
|
|
1284
|
+
};
|
|
1285
|
+
},
|
|
1286
|
+
},
|
|
1287
|
+
];
|
|
1288
|
+
}
|
|
1289
|
+
}
|
|
1290
|
+
exports.WJTTCCertifier = WJTTCCertifier;
|
|
1291
|
+
//# sourceMappingURL=certifier.js.map
|