proctor-mcp-server 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +247 -0
- package/build/index.integration-with-mock.js +143 -0
- package/build/index.js +57 -0
- package/package.json +43 -0
- package/shared/index.d.ts +7 -0
- package/shared/index.js +4 -0
- package/shared/logging.d.ts +20 -0
- package/shared/logging.js +34 -0
- package/shared/proctor-client/lib/cancel-exam.d.ts +6 -0
- package/shared/proctor-client/lib/cancel-exam.js +36 -0
- package/shared/proctor-client/lib/destroy-machine.d.ts +7 -0
- package/shared/proctor-client/lib/destroy-machine.js +31 -0
- package/shared/proctor-client/lib/get-machines.d.ts +6 -0
- package/shared/proctor-client/lib/get-machines.js +27 -0
- package/shared/proctor-client/lib/get-metadata.d.ts +6 -0
- package/shared/proctor-client/lib/get-metadata.js +23 -0
- package/shared/proctor-client/lib/get-prior-result.d.ts +6 -0
- package/shared/proctor-client/lib/get-prior-result.js +35 -0
- package/shared/proctor-client/lib/run-exam.d.ts +7 -0
- package/shared/proctor-client/lib/run-exam.js +90 -0
- package/shared/proctor-client/lib/save-result.d.ts +6 -0
- package/shared/proctor-client/lib/save-result.js +42 -0
- package/shared/server.d.ts +66 -0
- package/shared/server.js +65 -0
- package/shared/tools/cancel-exam.d.ts +34 -0
- package/shared/tools/cancel-exam.js +99 -0
- package/shared/tools/destroy-machine.d.ts +30 -0
- package/shared/tools/destroy-machine.js +75 -0
- package/shared/tools/get-machines.d.ts +25 -0
- package/shared/tools/get-machines.js +83 -0
- package/shared/tools/get-metadata.d.ts +25 -0
- package/shared/tools/get-metadata.js +63 -0
- package/shared/tools/get-prior-result.d.ts +38 -0
- package/shared/tools/get-prior-result.js +106 -0
- package/shared/tools/run-exam.d.ts +58 -0
- package/shared/tools/run-exam.js +189 -0
- package/shared/tools/save-result.d.ts +52 -0
- package/shared/tools/save-result.js +122 -0
- package/shared/tools.d.ts +44 -0
- package/shared/tools.js +128 -0
- package/shared/types.d.ts +151 -0
- package/shared/types.js +4 -0
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
export function getMachines(_server, clientFactory) {
|
|
2
|
+
return {
|
|
3
|
+
name: 'get_machines',
|
|
4
|
+
description: `List active Fly.io machines used for Proctor exams.
|
|
5
|
+
|
|
6
|
+
Returns information about currently running or recently active Fly machines
|
|
7
|
+
that are being used for exam execution.
|
|
8
|
+
|
|
9
|
+
**Returns:**
|
|
10
|
+
- machines: Array of machine objects with id, state, region, and other metadata
|
|
11
|
+
|
|
12
|
+
**Use cases:**
|
|
13
|
+
- Monitor active exam execution infrastructure
|
|
14
|
+
- Find machines to clean up or cancel
|
|
15
|
+
- Debug issues with running exams
|
|
16
|
+
- Check resource utilization
|
|
17
|
+
|
|
18
|
+
**Note:**
|
|
19
|
+
- Machines may be in various states (running, stopped, etc.)
|
|
20
|
+
- Use destroy_machine to remove machines that are no longer needed
|
|
21
|
+
- Use cancel_exam to stop a running exam on a specific machine`,
|
|
22
|
+
inputSchema: {
|
|
23
|
+
type: 'object',
|
|
24
|
+
properties: {},
|
|
25
|
+
required: [],
|
|
26
|
+
},
|
|
27
|
+
handler: async () => {
|
|
28
|
+
const client = clientFactory();
|
|
29
|
+
try {
|
|
30
|
+
const response = await client.getMachines();
|
|
31
|
+
if (response.machines.length === 0) {
|
|
32
|
+
return {
|
|
33
|
+
content: [
|
|
34
|
+
{
|
|
35
|
+
type: 'text',
|
|
36
|
+
text: 'No active Fly.io machines found.',
|
|
37
|
+
},
|
|
38
|
+
],
|
|
39
|
+
};
|
|
40
|
+
}
|
|
41
|
+
let content = `## Active Machines (${response.machines.length})\n\n`;
|
|
42
|
+
for (const machine of response.machines) {
|
|
43
|
+
content += `### Machine: ${machine.id}\n`;
|
|
44
|
+
if (machine.name)
|
|
45
|
+
content += `- **Name:** ${machine.name}\n`;
|
|
46
|
+
if (machine.state)
|
|
47
|
+
content += `- **State:** ${machine.state}\n`;
|
|
48
|
+
if (machine.region)
|
|
49
|
+
content += `- **Region:** ${machine.region}\n`;
|
|
50
|
+
if (machine.created_at)
|
|
51
|
+
content += `- **Created:** ${machine.created_at}\n`;
|
|
52
|
+
// Include any other properties
|
|
53
|
+
const otherProps = Object.entries(machine).filter(([key]) => !['id', 'name', 'state', 'region', 'created_at'].includes(key));
|
|
54
|
+
if (otherProps.length > 0) {
|
|
55
|
+
for (const [key, value] of otherProps) {
|
|
56
|
+
content += `- **${key}:** ${JSON.stringify(value)}\n`;
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
content += '\n';
|
|
60
|
+
}
|
|
61
|
+
return {
|
|
62
|
+
content: [
|
|
63
|
+
{
|
|
64
|
+
type: 'text',
|
|
65
|
+
text: content.trim(),
|
|
66
|
+
},
|
|
67
|
+
],
|
|
68
|
+
};
|
|
69
|
+
}
|
|
70
|
+
catch (error) {
|
|
71
|
+
return {
|
|
72
|
+
content: [
|
|
73
|
+
{
|
|
74
|
+
type: 'text',
|
|
75
|
+
text: `Error getting machines: ${error instanceof Error ? error.message : String(error)}`,
|
|
76
|
+
},
|
|
77
|
+
],
|
|
78
|
+
isError: true,
|
|
79
|
+
};
|
|
80
|
+
}
|
|
81
|
+
},
|
|
82
|
+
};
|
|
83
|
+
}
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
import { Server } from '@modelcontextprotocol/sdk/server/index.js';
|
|
2
|
+
import type { ClientFactory } from '../server.js';
|
|
3
|
+
export declare function getMetadata(_server: Server, clientFactory: ClientFactory): {
|
|
4
|
+
name: string;
|
|
5
|
+
description: string;
|
|
6
|
+
inputSchema: {
|
|
7
|
+
type: string;
|
|
8
|
+
properties: {};
|
|
9
|
+
required: never[];
|
|
10
|
+
};
|
|
11
|
+
handler: () => Promise<{
|
|
12
|
+
content: {
|
|
13
|
+
type: string;
|
|
14
|
+
text: string;
|
|
15
|
+
}[];
|
|
16
|
+
isError?: undefined;
|
|
17
|
+
} | {
|
|
18
|
+
content: {
|
|
19
|
+
type: string;
|
|
20
|
+
text: string;
|
|
21
|
+
}[];
|
|
22
|
+
isError: boolean;
|
|
23
|
+
}>;
|
|
24
|
+
};
|
|
25
|
+
//# sourceMappingURL=get-metadata.d.ts.map
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
export function getMetadata(_server, clientFactory) {
|
|
2
|
+
return {
|
|
3
|
+
name: 'get_proctor_metadata',
|
|
4
|
+
description: `Get available runtimes and exams for Proctor testing.
|
|
5
|
+
|
|
6
|
+
Returns the list of available runtime environments (Docker images) and exam types
|
|
7
|
+
that can be used with the run_exam tool.
|
|
8
|
+
|
|
9
|
+
**Returns:**
|
|
10
|
+
- runtimes: Array of runtime configurations with id, name, and Docker image
|
|
11
|
+
- exams: Array of exam types with id, name, and description
|
|
12
|
+
|
|
13
|
+
**Use cases:**
|
|
14
|
+
- Discover available runtime environments before running an exam
|
|
15
|
+
- Find the correct exam ID for a specific test type
|
|
16
|
+
- Check which runtime versions are available
|
|
17
|
+
- Plan which exam to run against an MCP server
|
|
18
|
+
|
|
19
|
+
**Note:** Use the runtime_id and exam_id values from this response when calling run_exam.`,
|
|
20
|
+
inputSchema: {
|
|
21
|
+
type: 'object',
|
|
22
|
+
properties: {},
|
|
23
|
+
required: [],
|
|
24
|
+
},
|
|
25
|
+
handler: async () => {
|
|
26
|
+
const client = clientFactory();
|
|
27
|
+
try {
|
|
28
|
+
const response = await client.getMetadata();
|
|
29
|
+
let content = '## Available Proctor Runtimes\n\n';
|
|
30
|
+
for (const runtime of response.runtimes) {
|
|
31
|
+
content += `- **${runtime.name}** (id: \`${runtime.id}\`)\n`;
|
|
32
|
+
content += ` Image: \`${runtime.image}\`\n`;
|
|
33
|
+
}
|
|
34
|
+
content += '\n## Available Exams\n\n';
|
|
35
|
+
for (const exam of response.exams) {
|
|
36
|
+
content += `- **${exam.name}** (id: \`${exam.id}\`)\n`;
|
|
37
|
+
if (exam.description) {
|
|
38
|
+
content += ` ${exam.description}\n`;
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
return {
|
|
42
|
+
content: [
|
|
43
|
+
{
|
|
44
|
+
type: 'text',
|
|
45
|
+
text: content.trim(),
|
|
46
|
+
},
|
|
47
|
+
],
|
|
48
|
+
};
|
|
49
|
+
}
|
|
50
|
+
catch (error) {
|
|
51
|
+
return {
|
|
52
|
+
content: [
|
|
53
|
+
{
|
|
54
|
+
type: 'text',
|
|
55
|
+
text: `Error getting Proctor metadata: ${error instanceof Error ? error.message : String(error)}`,
|
|
56
|
+
},
|
|
57
|
+
],
|
|
58
|
+
isError: true,
|
|
59
|
+
};
|
|
60
|
+
}
|
|
61
|
+
},
|
|
62
|
+
};
|
|
63
|
+
}
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
import { Server } from '@modelcontextprotocol/sdk/server/index.js';
|
|
2
|
+
import type { ClientFactory } from '../server.js';
|
|
3
|
+
export declare function getPriorResult(_server: Server, clientFactory: ClientFactory): {
|
|
4
|
+
name: string;
|
|
5
|
+
description: string;
|
|
6
|
+
inputSchema: {
|
|
7
|
+
type: string;
|
|
8
|
+
properties: {
|
|
9
|
+
mirror_id: {
|
|
10
|
+
type: string;
|
|
11
|
+
description: "ID of the unofficial mirror to get prior results for.";
|
|
12
|
+
};
|
|
13
|
+
exam_id: {
|
|
14
|
+
type: string;
|
|
15
|
+
description: "Exam ID to filter results by.";
|
|
16
|
+
};
|
|
17
|
+
input_json: {
|
|
18
|
+
type: string;
|
|
19
|
+
description: "Optional JSON string of the current mcp.json for matching. If provided, returns the most recent result with matching config.";
|
|
20
|
+
};
|
|
21
|
+
};
|
|
22
|
+
required: string[];
|
|
23
|
+
};
|
|
24
|
+
handler: (args: unknown) => Promise<{
|
|
25
|
+
content: {
|
|
26
|
+
type: string;
|
|
27
|
+
text: string;
|
|
28
|
+
}[];
|
|
29
|
+
isError?: undefined;
|
|
30
|
+
} | {
|
|
31
|
+
content: {
|
|
32
|
+
type: string;
|
|
33
|
+
text: string;
|
|
34
|
+
}[];
|
|
35
|
+
isError: boolean;
|
|
36
|
+
}>;
|
|
37
|
+
};
|
|
38
|
+
//# sourceMappingURL=get-prior-result.d.ts.map
|
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
import { z } from 'zod';
|
|
2
|
+
// Parameter descriptions - single source of truth
|
|
3
|
+
const PARAM_DESCRIPTIONS = {
|
|
4
|
+
mirror_id: 'ID of the unofficial mirror to get prior results for.',
|
|
5
|
+
exam_id: 'Exam ID to filter results by.',
|
|
6
|
+
input_json: 'Optional JSON string of the current mcp.json for matching. If provided, returns the most recent result with matching config.',
|
|
7
|
+
};
|
|
8
|
+
const GetPriorResultSchema = z.object({
|
|
9
|
+
mirror_id: z.number().describe(PARAM_DESCRIPTIONS.mirror_id),
|
|
10
|
+
exam_id: z.string().min(1).describe(PARAM_DESCRIPTIONS.exam_id),
|
|
11
|
+
input_json: z.string().optional().describe(PARAM_DESCRIPTIONS.input_json),
|
|
12
|
+
});
|
|
13
|
+
export function getPriorResult(_server, clientFactory) {
|
|
14
|
+
return {
|
|
15
|
+
name: 'get_prior_result',
|
|
16
|
+
description: `Retrieve a previous exam result for comparison.
|
|
17
|
+
|
|
18
|
+
Finds the most recent prior result for the specified mirror and exam, optionally
|
|
19
|
+
matching against the current input configuration.
|
|
20
|
+
|
|
21
|
+
**Returns:**
|
|
22
|
+
- id: Result record ID
|
|
23
|
+
- datetime_performed: When the exam was run (ISO 8601)
|
|
24
|
+
- results: The full exam results
|
|
25
|
+
- runtime_image: Docker image used for the exam
|
|
26
|
+
- match_type: "exact" if mcp.json matches exactly, "entry_key" if only entry key matches
|
|
27
|
+
|
|
28
|
+
**Use cases:**
|
|
29
|
+
- Compare current test results with previous runs
|
|
30
|
+
- Detect regressions in MCP server functionality
|
|
31
|
+
- Review historical test outcomes
|
|
32
|
+
- Validate that changes haven't broken existing behavior
|
|
33
|
+
|
|
34
|
+
**Note:**
|
|
35
|
+
- Returns 404 if no prior result exists
|
|
36
|
+
- The match_type indicates how closely the prior result matches your input_json`,
|
|
37
|
+
inputSchema: {
|
|
38
|
+
type: 'object',
|
|
39
|
+
properties: {
|
|
40
|
+
mirror_id: {
|
|
41
|
+
type: 'number',
|
|
42
|
+
description: PARAM_DESCRIPTIONS.mirror_id,
|
|
43
|
+
},
|
|
44
|
+
exam_id: {
|
|
45
|
+
type: 'string',
|
|
46
|
+
description: PARAM_DESCRIPTIONS.exam_id,
|
|
47
|
+
},
|
|
48
|
+
input_json: {
|
|
49
|
+
type: 'string',
|
|
50
|
+
description: PARAM_DESCRIPTIONS.input_json,
|
|
51
|
+
},
|
|
52
|
+
},
|
|
53
|
+
required: ['mirror_id', 'exam_id'],
|
|
54
|
+
},
|
|
55
|
+
handler: async (args) => {
|
|
56
|
+
const validatedArgs = GetPriorResultSchema.parse(args);
|
|
57
|
+
const client = clientFactory();
|
|
58
|
+
try {
|
|
59
|
+
const response = await client.getPriorResult({
|
|
60
|
+
mirror_id: validatedArgs.mirror_id,
|
|
61
|
+
exam_id: validatedArgs.exam_id,
|
|
62
|
+
input_json: validatedArgs.input_json,
|
|
63
|
+
});
|
|
64
|
+
let content = '## Prior Result\n\n';
|
|
65
|
+
content += `**Result ID:** ${response.id}\n`;
|
|
66
|
+
content += `**Date Performed:** ${response.datetime_performed}\n`;
|
|
67
|
+
content += `**Runtime Image:** ${response.runtime_image}\n`;
|
|
68
|
+
content += `**Match Type:** ${response.match_type}\n\n`;
|
|
69
|
+
content += '### Results\n\n```json\n';
|
|
70
|
+
content += JSON.stringify(response.results, null, 2);
|
|
71
|
+
content += '\n```\n';
|
|
72
|
+
return {
|
|
73
|
+
content: [
|
|
74
|
+
{
|
|
75
|
+
type: 'text',
|
|
76
|
+
text: content.trim(),
|
|
77
|
+
},
|
|
78
|
+
],
|
|
79
|
+
};
|
|
80
|
+
}
|
|
81
|
+
catch (error) {
|
|
82
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
83
|
+
// Handle "no prior result found" as a non-error case
|
|
84
|
+
if (message.includes('No prior result found')) {
|
|
85
|
+
return {
|
|
86
|
+
content: [
|
|
87
|
+
{
|
|
88
|
+
type: 'text',
|
|
89
|
+
text: 'No prior result found for this mirror and exam combination.',
|
|
90
|
+
},
|
|
91
|
+
],
|
|
92
|
+
};
|
|
93
|
+
}
|
|
94
|
+
return {
|
|
95
|
+
content: [
|
|
96
|
+
{
|
|
97
|
+
type: 'text',
|
|
98
|
+
text: `Error getting prior result: ${message}`,
|
|
99
|
+
},
|
|
100
|
+
],
|
|
101
|
+
isError: true,
|
|
102
|
+
};
|
|
103
|
+
}
|
|
104
|
+
},
|
|
105
|
+
};
|
|
106
|
+
}
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
import { Server } from '@modelcontextprotocol/sdk/server/index.js';
|
|
2
|
+
import type { ClientFactory } from '../server.js';
|
|
3
|
+
export declare function runExam(_server: Server, clientFactory: ClientFactory): {
|
|
4
|
+
name: string;
|
|
5
|
+
description: string;
|
|
6
|
+
inputSchema: {
|
|
7
|
+
type: string;
|
|
8
|
+
properties: {
|
|
9
|
+
runtime_id: {
|
|
10
|
+
type: string;
|
|
11
|
+
description: "Runtime ID from get_proctor_metadata, or \"__custom__\" for a custom Docker image. Example: \"v0.0.37\"";
|
|
12
|
+
};
|
|
13
|
+
exam_id: {
|
|
14
|
+
type: string;
|
|
15
|
+
description: "Exam ID from get_proctor_metadata. Example: \"proctor-mcp-client-init-tools-list\" or \"proctor-mcp-client-auth-check\"";
|
|
16
|
+
};
|
|
17
|
+
mcp_config: {
|
|
18
|
+
type: string;
|
|
19
|
+
description: "JSON string of the mcp.json configuration for the MCP server. Must be a valid JSON object with server configurations.";
|
|
20
|
+
};
|
|
21
|
+
server_json: {
|
|
22
|
+
type: string;
|
|
23
|
+
description: "Optional JSON string of server.json for result enrichment. Provides additional context about the server being tested.";
|
|
24
|
+
};
|
|
25
|
+
custom_runtime_image: {
|
|
26
|
+
type: string;
|
|
27
|
+
description: "Required if runtime_id is \"__custom__\". Docker image URL in format: registry/image:tag";
|
|
28
|
+
};
|
|
29
|
+
max_retries: {
|
|
30
|
+
type: string;
|
|
31
|
+
description: "Maximum number of retry attempts (0-10). Default is 0.";
|
|
32
|
+
};
|
|
33
|
+
mcp_server_slug: {
|
|
34
|
+
type: string;
|
|
35
|
+
description: "Optional MCP server slug for auto-injection of proctor files and OAuth credentials.";
|
|
36
|
+
};
|
|
37
|
+
mcp_json_id: {
|
|
38
|
+
type: string;
|
|
39
|
+
description: "Optional McpJson ID for preloaded OAuth credentials.";
|
|
40
|
+
};
|
|
41
|
+
};
|
|
42
|
+
required: string[];
|
|
43
|
+
};
|
|
44
|
+
handler: (args: unknown) => Promise<{
|
|
45
|
+
content: {
|
|
46
|
+
type: string;
|
|
47
|
+
text: string;
|
|
48
|
+
}[];
|
|
49
|
+
isError: boolean;
|
|
50
|
+
} | {
|
|
51
|
+
content: {
|
|
52
|
+
type: string;
|
|
53
|
+
text: string;
|
|
54
|
+
}[];
|
|
55
|
+
isError?: undefined;
|
|
56
|
+
}>;
|
|
57
|
+
};
|
|
58
|
+
//# sourceMappingURL=run-exam.d.ts.map
|
|
@@ -0,0 +1,189 @@
|
|
|
1
|
+
import { z } from 'zod';
|
|
2
|
+
// Parameter descriptions - single source of truth
|
|
3
|
+
const PARAM_DESCRIPTIONS = {
|
|
4
|
+
runtime_id: 'Runtime ID from get_proctor_metadata, or "__custom__" for a custom Docker image. Example: "v0.0.37"',
|
|
5
|
+
exam_id: 'Exam ID from get_proctor_metadata. Example: "proctor-mcp-client-init-tools-list" or "proctor-mcp-client-auth-check"',
|
|
6
|
+
mcp_config: 'JSON string of the mcp.json configuration for the MCP server. Must be a valid JSON object with server configurations.',
|
|
7
|
+
server_json: 'Optional JSON string of server.json for result enrichment. Provides additional context about the server being tested.',
|
|
8
|
+
custom_runtime_image: 'Required if runtime_id is "__custom__". Docker image URL in format: registry/image:tag',
|
|
9
|
+
max_retries: 'Maximum number of retry attempts (0-10). Default is 0.',
|
|
10
|
+
mcp_server_slug: 'Optional MCP server slug for auto-injection of proctor files and OAuth credentials.',
|
|
11
|
+
mcp_json_id: 'Optional McpJson ID for preloaded OAuth credentials.',
|
|
12
|
+
};
|
|
13
|
+
const RunExamSchema = z.object({
|
|
14
|
+
runtime_id: z.string().min(1).describe(PARAM_DESCRIPTIONS.runtime_id),
|
|
15
|
+
exam_id: z.string().min(1).describe(PARAM_DESCRIPTIONS.exam_id),
|
|
16
|
+
mcp_config: z.string().min(1).describe(PARAM_DESCRIPTIONS.mcp_config),
|
|
17
|
+
server_json: z.string().optional().describe(PARAM_DESCRIPTIONS.server_json),
|
|
18
|
+
custom_runtime_image: z.string().optional().describe(PARAM_DESCRIPTIONS.custom_runtime_image),
|
|
19
|
+
max_retries: z.number().min(0).max(10).optional().describe(PARAM_DESCRIPTIONS.max_retries),
|
|
20
|
+
mcp_server_slug: z.string().optional().describe(PARAM_DESCRIPTIONS.mcp_server_slug),
|
|
21
|
+
mcp_json_id: z.number().optional().describe(PARAM_DESCRIPTIONS.mcp_json_id),
|
|
22
|
+
});
|
|
23
|
+
export function runExam(_server, clientFactory) {
|
|
24
|
+
return {
|
|
25
|
+
name: 'run_exam',
|
|
26
|
+
description: `Execute a Proctor exam against an MCP server.
|
|
27
|
+
|
|
28
|
+
Runs the specified exam using the provided runtime and MCP configuration. The exam
|
|
29
|
+
tests the MCP server's functionality and returns detailed results.
|
|
30
|
+
|
|
31
|
+
**Returns:**
|
|
32
|
+
- Streaming logs showing exam progress
|
|
33
|
+
- Final result with status and detailed test outcomes
|
|
34
|
+
|
|
35
|
+
**Use cases:**
|
|
36
|
+
- Test an MCP server's initialization and tool listing
|
|
37
|
+
- Verify authentication mechanisms work correctly
|
|
38
|
+
- Run comprehensive functionality tests
|
|
39
|
+
- Validate MCP protocol compliance
|
|
40
|
+
- Test before publishing a new MCP server version
|
|
41
|
+
|
|
42
|
+
**Note:**
|
|
43
|
+
- Use get_proctor_metadata first to discover available runtimes and exams
|
|
44
|
+
- The mcp_config must be a valid JSON string representing the mcp.json format
|
|
45
|
+
- Custom runtime images require the "__custom__" runtime_id and custom_runtime_image parameter
|
|
46
|
+
- Results can be saved using save_result for future comparison`,
|
|
47
|
+
inputSchema: {
|
|
48
|
+
type: 'object',
|
|
49
|
+
properties: {
|
|
50
|
+
runtime_id: {
|
|
51
|
+
type: 'string',
|
|
52
|
+
description: PARAM_DESCRIPTIONS.runtime_id,
|
|
53
|
+
},
|
|
54
|
+
exam_id: {
|
|
55
|
+
type: 'string',
|
|
56
|
+
description: PARAM_DESCRIPTIONS.exam_id,
|
|
57
|
+
},
|
|
58
|
+
mcp_config: {
|
|
59
|
+
type: 'string',
|
|
60
|
+
description: PARAM_DESCRIPTIONS.mcp_config,
|
|
61
|
+
},
|
|
62
|
+
server_json: {
|
|
63
|
+
type: 'string',
|
|
64
|
+
description: PARAM_DESCRIPTIONS.server_json,
|
|
65
|
+
},
|
|
66
|
+
custom_runtime_image: {
|
|
67
|
+
type: 'string',
|
|
68
|
+
description: PARAM_DESCRIPTIONS.custom_runtime_image,
|
|
69
|
+
},
|
|
70
|
+
max_retries: {
|
|
71
|
+
type: 'number',
|
|
72
|
+
description: PARAM_DESCRIPTIONS.max_retries,
|
|
73
|
+
},
|
|
74
|
+
mcp_server_slug: {
|
|
75
|
+
type: 'string',
|
|
76
|
+
description: PARAM_DESCRIPTIONS.mcp_server_slug,
|
|
77
|
+
},
|
|
78
|
+
mcp_json_id: {
|
|
79
|
+
type: 'number',
|
|
80
|
+
description: PARAM_DESCRIPTIONS.mcp_json_id,
|
|
81
|
+
},
|
|
82
|
+
},
|
|
83
|
+
required: ['runtime_id', 'exam_id', 'mcp_config'],
|
|
84
|
+
},
|
|
85
|
+
handler: async (args) => {
|
|
86
|
+
const validatedArgs = RunExamSchema.parse(args);
|
|
87
|
+
// Validate mcp_config is valid JSON
|
|
88
|
+
try {
|
|
89
|
+
JSON.parse(validatedArgs.mcp_config);
|
|
90
|
+
}
|
|
91
|
+
catch {
|
|
92
|
+
return {
|
|
93
|
+
content: [
|
|
94
|
+
{
|
|
95
|
+
type: 'text',
|
|
96
|
+
text: 'Error: mcp_config must be a valid JSON string',
|
|
97
|
+
},
|
|
98
|
+
],
|
|
99
|
+
isError: true,
|
|
100
|
+
};
|
|
101
|
+
}
|
|
102
|
+
// Validate custom runtime requirements
|
|
103
|
+
if (validatedArgs.runtime_id === '__custom__' && !validatedArgs.custom_runtime_image) {
|
|
104
|
+
return {
|
|
105
|
+
content: [
|
|
106
|
+
{
|
|
107
|
+
type: 'text',
|
|
108
|
+
text: 'Error: custom_runtime_image is required when runtime_id is "__custom__"',
|
|
109
|
+
},
|
|
110
|
+
],
|
|
111
|
+
isError: true,
|
|
112
|
+
};
|
|
113
|
+
}
|
|
114
|
+
const client = clientFactory();
|
|
115
|
+
try {
|
|
116
|
+
const logs = [];
|
|
117
|
+
let finalResult = null;
|
|
118
|
+
let errorMessage = null;
|
|
119
|
+
// Consume the streaming response
|
|
120
|
+
for await (const entry of client.runExam({
|
|
121
|
+
runtime_id: validatedArgs.runtime_id,
|
|
122
|
+
exam_id: validatedArgs.exam_id,
|
|
123
|
+
mcp_config: validatedArgs.mcp_config,
|
|
124
|
+
server_json: validatedArgs.server_json,
|
|
125
|
+
custom_runtime_image: validatedArgs.custom_runtime_image,
|
|
126
|
+
max_retries: validatedArgs.max_retries,
|
|
127
|
+
mcp_server_slug: validatedArgs.mcp_server_slug,
|
|
128
|
+
mcp_json_id: validatedArgs.mcp_json_id,
|
|
129
|
+
})) {
|
|
130
|
+
if (entry.type === 'log') {
|
|
131
|
+
const logData = entry.data;
|
|
132
|
+
if (logData.message) {
|
|
133
|
+
logs.push(`[${logData.time || 'LOG'}] ${logData.message}`);
|
|
134
|
+
}
|
|
135
|
+
else {
|
|
136
|
+
logs.push(`[LOG] ${JSON.stringify(logData)}`);
|
|
137
|
+
}
|
|
138
|
+
}
|
|
139
|
+
else if (entry.type === 'result') {
|
|
140
|
+
finalResult = entry.data;
|
|
141
|
+
}
|
|
142
|
+
else if (entry.type === 'error') {
|
|
143
|
+
errorMessage = entry.data.error;
|
|
144
|
+
}
|
|
145
|
+
}
|
|
146
|
+
// Build the response
|
|
147
|
+
let content = '## Exam Execution\n\n';
|
|
148
|
+
content += `**Runtime:** ${validatedArgs.runtime_id}\n`;
|
|
149
|
+
content += `**Exam:** ${validatedArgs.exam_id}\n\n`;
|
|
150
|
+
if (logs.length > 0) {
|
|
151
|
+
content += '### Logs\n\n```\n';
|
|
152
|
+
content += logs.join('\n');
|
|
153
|
+
content += '\n```\n\n';
|
|
154
|
+
}
|
|
155
|
+
if (errorMessage) {
|
|
156
|
+
content += `### Error\n\n${errorMessage}\n`;
|
|
157
|
+
return {
|
|
158
|
+
content: [{ type: 'text', text: content.trim() }],
|
|
159
|
+
isError: true,
|
|
160
|
+
};
|
|
161
|
+
}
|
|
162
|
+
if (finalResult) {
|
|
163
|
+
content += '### Result\n\n```json\n';
|
|
164
|
+
content += JSON.stringify(finalResult, null, 2);
|
|
165
|
+
content += '\n```\n';
|
|
166
|
+
}
|
|
167
|
+
return {
|
|
168
|
+
content: [
|
|
169
|
+
{
|
|
170
|
+
type: 'text',
|
|
171
|
+
text: content.trim(),
|
|
172
|
+
},
|
|
173
|
+
],
|
|
174
|
+
};
|
|
175
|
+
}
|
|
176
|
+
catch (error) {
|
|
177
|
+
return {
|
|
178
|
+
content: [
|
|
179
|
+
{
|
|
180
|
+
type: 'text',
|
|
181
|
+
text: `Error running exam: ${error instanceof Error ? error.message : String(error)}`,
|
|
182
|
+
},
|
|
183
|
+
],
|
|
184
|
+
isError: true,
|
|
185
|
+
};
|
|
186
|
+
}
|
|
187
|
+
},
|
|
188
|
+
};
|
|
189
|
+
}
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
import { Server } from '@modelcontextprotocol/sdk/server/index.js';
|
|
2
|
+
import type { ClientFactory } from '../server.js';
|
|
3
|
+
export declare function saveResult(_server: Server, clientFactory: ClientFactory): {
|
|
4
|
+
name: string;
|
|
5
|
+
description: string;
|
|
6
|
+
inputSchema: {
|
|
7
|
+
type: string;
|
|
8
|
+
properties: {
|
|
9
|
+
runtime_id: {
|
|
10
|
+
type: string;
|
|
11
|
+
description: "Runtime ID used for the exam, or \"__custom__\" if a custom Docker image was used.";
|
|
12
|
+
};
|
|
13
|
+
exam_id: {
|
|
14
|
+
type: string;
|
|
15
|
+
description: "Exam ID that was executed.";
|
|
16
|
+
};
|
|
17
|
+
mcp_server_slug: {
|
|
18
|
+
type: string;
|
|
19
|
+
description: "Slug of the MCP server that was tested.";
|
|
20
|
+
};
|
|
21
|
+
mirror_id: {
|
|
22
|
+
type: string;
|
|
23
|
+
description: "ID of the unofficial mirror associated with this test.";
|
|
24
|
+
};
|
|
25
|
+
results: {
|
|
26
|
+
oneOf: {
|
|
27
|
+
type: string;
|
|
28
|
+
}[];
|
|
29
|
+
description: "Exam results as a JSON string or object. This is the full result from run_exam.";
|
|
30
|
+
};
|
|
31
|
+
custom_runtime_image: {
|
|
32
|
+
type: string;
|
|
33
|
+
description: "Required if runtime_id is \"__custom__\". The Docker image URL that was used.";
|
|
34
|
+
};
|
|
35
|
+
};
|
|
36
|
+
required: string[];
|
|
37
|
+
};
|
|
38
|
+
handler: (args: unknown) => Promise<{
|
|
39
|
+
content: {
|
|
40
|
+
type: string;
|
|
41
|
+
text: string;
|
|
42
|
+
}[];
|
|
43
|
+
isError: boolean;
|
|
44
|
+
} | {
|
|
45
|
+
content: {
|
|
46
|
+
type: string;
|
|
47
|
+
text: string;
|
|
48
|
+
}[];
|
|
49
|
+
isError?: undefined;
|
|
50
|
+
}>;
|
|
51
|
+
};
|
|
52
|
+
//# sourceMappingURL=save-result.d.ts.map
|