@mcpjam/sdk 0.1.4 → 0.8.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +326 -78
- package/dist/index.d.mts +1942 -0
- package/dist/index.d.ts +1942 -9
- package/dist/index.js +2514 -472
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +2838 -0
- package/dist/index.mjs.map +1 -0
- package/package.json +62 -35
- package/dist/index.cjs +0 -836
- package/dist/index.cjs.map +0 -1
- package/dist/index.d.cts +0 -9
- package/dist/mcp-client-manager/index.cjs +0 -834
- package/dist/mcp-client-manager/index.cjs.map +0 -1
- package/dist/mcp-client-manager/index.d.cts +0 -1627
- package/dist/mcp-client-manager/index.d.ts +0 -1627
- package/dist/mcp-client-manager/index.js +0 -824
- package/dist/mcp-client-manager/index.js.map +0 -1
package/README.md
CHANGED
|
@@ -1,124 +1,372 @@
|
|
|
1
1
|
# @mcpjam/sdk
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
Use the MCPJam SDK to write unit tests and evals for your MCP server.
|
|
4
4
|
|
|
5
|
-
|
|
6
|
-
[](https://opensource.org/licenses/Apache-2.0)
|
|
7
|
-
|
|
8
|
-
# Installation
|
|
5
|
+
## Installation
|
|
9
6
|
|
|
10
7
|
```bash
|
|
11
8
|
npm install @mcpjam/sdk
|
|
12
9
|
```
|
|
13
10
|
|
|
14
|
-
|
|
11
|
+
Compatible with your favorite testing framework like [Jest](https://jestjs.io/) and [Vitest](https://vitest.dev/)
|
|
15
12
|
|
|
16
|
-
##
|
|
13
|
+
## Quick Start
|
|
17
14
|
|
|
18
|
-
|
|
15
|
+
### Unit Test
|
|
19
16
|
|
|
20
|
-
|
|
21
|
-
- **All transports** - STDIO, HTTP/SSE, and Streamable HTTP support
|
|
22
|
-
- **Lifecycle management** - Automatic connection handling and cleanup
|
|
23
|
-
- **Tools, resources, prompts** - Full MCP protocol support including elicitation
|
|
24
|
-
- **Agent framework integration** - Built-in adapters for Vercel AI SDK and other popular libraries
|
|
25
|
-
- **OAuth & authentication** - Bearer token and custom header support
|
|
17
|
+
Test the individual parts, request response flow of your MCP server. MCP unit tests are deterministic.
|
|
26
18
|
|
|
27
|
-
|
|
19
|
+
```ts
|
|
20
|
+
import { MCPClientManager } from "@mcpjam/sdk";
|
|
28
21
|
|
|
29
|
-
|
|
22
|
+
describe("Everything MCP example", () => {
|
|
23
|
+
let manager: MCPClientManager;
|
|
24
|
+
|
|
25
|
+
beforeAll(async () => {
|
|
26
|
+
manager = new MCPClientManager();
|
|
27
|
+
await manager.connectToServer("everything", {
|
|
28
|
+
command: "npx",
|
|
29
|
+
args: ["-y", "@modelcontextprotocol/server-everything"],
|
|
30
|
+
});
|
|
31
|
+
});
|
|
32
|
+
|
|
33
|
+
afterAll(async () => {
|
|
34
|
+
await manager.disconnectServer("everything");
|
|
35
|
+
});
|
|
36
|
+
|
|
37
|
+
test("server has expected tools", async () => {
|
|
38
|
+
const tools = await manager.listTools("everything");
|
|
39
|
+
expect(tools.tools.map((t) => t.name)).toContain("get-sum");
|
|
40
|
+
});
|
|
41
|
+
|
|
42
|
+
test("get-sum tool returns correct result", async () => {
|
|
43
|
+
const result = await manager.executeTool("everything", "get-sum", { a: 2, b: 3 });
|
|
44
|
+
expect(result.content[0].text).toBe("5");
|
|
45
|
+
});
|
|
46
|
+
});
|
|
47
|
+
```
|
|
30
48
|
|
|
31
|
-
|
|
32
|
-
- **Creating MCP clients** - Build custom clients with full protocol support
|
|
33
|
-
- **Testing MCP servers** - Write unit tests and E2E tests for your servers
|
|
34
|
-
- **LLM applications** - Add MCP support to chat applications and AI workflows
|
|
49
|
+
### MCP evals
|
|
35
50
|
|
|
36
|
-
|
|
51
|
+
Test that an LLM correctly understands how to use your MCP server. Evals are non-deterministic and multiple runs are needed.
|
|
37
52
|
|
|
38
53
|
```ts
|
|
39
|
-
import { MCPClientManager } from "@mcpjam/sdk";
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
requestInit: {
|
|
52
|
-
headers: {
|
|
53
|
-
Authorization: "Bearer YOUR_TOKEN",
|
|
54
|
+
import { MCPClientManager, TestAgent, EvalTest } from "@mcpjam/sdk";
|
|
55
|
+
|
|
56
|
+
describe("Asana MCP Evals", () => {
|
|
57
|
+
let manager: MCPClientManager;
|
|
58
|
+
let agent: TestAgent;
|
|
59
|
+
|
|
60
|
+
beforeAll(async () => {
|
|
61
|
+
manager = new MCPClientManager();
|
|
62
|
+
await manager.connectToServer("asana", {
|
|
63
|
+
url: "https://mcp.asana.com/sse",
|
|
64
|
+
requestInit: {
|
|
65
|
+
headers: { Authorization: `Bearer ${process.env.ASANA_TOKEN}` },
|
|
54
66
|
},
|
|
55
|
-
}
|
|
56
|
-
|
|
57
|
-
|
|
67
|
+
});
|
|
68
|
+
|
|
69
|
+
agent = new TestAgent({
|
|
70
|
+
tools: await manager.getToolsForAiSdk(["asana"]),
|
|
71
|
+
model: "openai/gpt-4o",
|
|
72
|
+
apiKey: process.env.OPENAI_API_KEY!,
|
|
73
|
+
});
|
|
74
|
+
});
|
|
75
|
+
|
|
76
|
+
afterAll(async () => {
|
|
77
|
+
await manager.disconnectServer("asana");
|
|
78
|
+
});
|
|
79
|
+
|
|
80
|
+
// Single-turn eval
|
|
81
|
+
test("list workspaces > 80% accuracy", async () => {
|
|
82
|
+
const evalTest = new EvalTest({
|
|
83
|
+
name: "list-workspaces",
|
|
84
|
+
test: async (agent) => {
|
|
85
|
+
const result = await agent.prompt("Show me all my Asana workspaces");
|
|
86
|
+
return result.hasToolCall("asana_list_workspaces");
|
|
87
|
+
},
|
|
88
|
+
});
|
|
89
|
+
|
|
90
|
+
await evalTest.run(agent, {
|
|
91
|
+
iterations: 10,
|
|
92
|
+
onFailure: (report) => console.error(report), // Print the report when a test iteration fails.
|
|
93
|
+
});
|
|
94
|
+
|
|
95
|
+
expect(evalTest.accuracy()).toBeGreaterThan(0.8); // Pass threshold
|
|
96
|
+
});
|
|
97
|
+
|
|
98
|
+
// Multi-turn eval
|
|
99
|
+
test("get user then list projects > 80% accuracy", async () => {
|
|
100
|
+
const evalTest = new EvalTest({
|
|
101
|
+
name: "user-then-projects",
|
|
102
|
+
test: async (agent) => {
|
|
103
|
+
const r1 = await agent.prompt("Who am I in Asana?");
|
|
104
|
+
if (!r1.hasToolCall("asana_get_user")) return false;
|
|
105
|
+
|
|
106
|
+
const r2 = await agent.prompt("Now list my projects", { context: [r1] }); // Continue the conversation from the previous prompt
|
|
107
|
+
return r2.hasToolCall("asana_get_projects");
|
|
108
|
+
},
|
|
109
|
+
});
|
|
110
|
+
|
|
111
|
+
await evalTest.run(agent, {
|
|
112
|
+
iterations: 5,
|
|
113
|
+
onFailure: (report) => console.error(report),
|
|
114
|
+
});
|
|
115
|
+
|
|
116
|
+
expect(evalTest.accuracy()).toBeGreaterThan(0.8);
|
|
117
|
+
});
|
|
118
|
+
|
|
119
|
+
// Validating tool arguments
|
|
120
|
+
test("search tasks passes correct workspace_gid", async () => {
|
|
121
|
+
const evalTest = new EvalTest({
|
|
122
|
+
name: "search-args",
|
|
123
|
+
test: async (agent) => {
|
|
124
|
+
const result = await agent.prompt("Search for tasks containing 'bug' in my workspace");
|
|
125
|
+
const args = result.getToolArguments("asana_search_tasks");
|
|
126
|
+
return result.hasToolCall("asana_search_tasks") && typeof args?.workspace_gid === "string";
|
|
127
|
+
},
|
|
128
|
+
});
|
|
129
|
+
|
|
130
|
+
await evalTest.run(agent, {
|
|
131
|
+
iterations: 5,
|
|
132
|
+
onFailure: (report) => console.error(report),
|
|
133
|
+
});
|
|
58
134
|
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
const result = await manager.executeTool("filesystem", "read_file", {
|
|
62
|
-
path: "/tmp/example.txt",
|
|
135
|
+
expect(evalTest.accuracy()).toBeGreaterThan(0.8);
|
|
136
|
+
});
|
|
63
137
|
});
|
|
138
|
+
```
|
|
139
|
+
|
|
140
|
+
---
|
|
141
|
+
|
|
142
|
+
## API Reference
|
|
143
|
+
|
|
144
|
+
<details>
|
|
145
|
+
<summary><strong>MCPClientManager</strong></summary>
|
|
146
|
+
|
|
147
|
+
Manages connections to one or more MCP servers.
|
|
148
|
+
|
|
149
|
+
```ts
|
|
150
|
+
const manager = new MCPClientManager();
|
|
64
151
|
|
|
65
|
-
//
|
|
66
|
-
|
|
67
|
-
|
|
152
|
+
// Connect to STDIO server
|
|
153
|
+
await manager.connectToServer("everything", {
|
|
154
|
+
command: "npx",
|
|
155
|
+
args: ["-y", "@modelcontextprotocol/server-everything"],
|
|
156
|
+
});
|
|
68
157
|
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
158
|
+
// Connect to HTTP/SSE server
|
|
159
|
+
await manager.connectToServer("asana", {
|
|
160
|
+
url: "https://mcp.asana.com/sse",
|
|
161
|
+
requestInit: {
|
|
162
|
+
headers: { Authorization: "Bearer TOKEN" },
|
|
163
|
+
},
|
|
73
164
|
});
|
|
165
|
+
|
|
166
|
+
// Get tools for AI SDK integration
|
|
167
|
+
const tools = await manager.getToolsForAiSdk(["everything", "asana"]);
|
|
168
|
+
|
|
169
|
+
// Direct MCP operations
|
|
170
|
+
await manager.listTools("everything");
|
|
171
|
+
await manager.executeTool("everything", "add", { a: 1, b: 2 });
|
|
172
|
+
await manager.listResources("everything");
|
|
173
|
+
await manager.readResource("everything", { uri: "file:///tmp/test.txt" });
|
|
174
|
+
await manager.listPrompts("everything");
|
|
175
|
+
await manager.getPrompt("everything", { name: "greeting" });
|
|
176
|
+
await manager.pingServer("everything");
|
|
177
|
+
|
|
178
|
+
// Disconnect
|
|
179
|
+
await manager.disconnectServer("everything");
|
|
74
180
|
```
|
|
75
181
|
|
|
76
|
-
|
|
182
|
+
</details>
|
|
183
|
+
|
|
184
|
+
<details>
|
|
185
|
+
<summary><strong>TestAgent</strong></summary>
|
|
77
186
|
|
|
78
|
-
|
|
187
|
+
Runs LLM prompts with MCP tool access.
|
|
79
188
|
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
-
|
|
84
|
-
|
|
189
|
+
```ts
|
|
190
|
+
const agent = new TestAgent({
|
|
191
|
+
tools: await manager.getToolsForAiSdk(),
|
|
192
|
+
model: "openai/gpt-4o", // provider/model format
|
|
193
|
+
apiKey: process.env.OPENAI_API_KEY!,
|
|
194
|
+
systemPrompt: "You are a helpful assistant.", // optional
|
|
195
|
+
temperature: 0.7, // optional, omit for reasoning models
|
|
196
|
+
maxSteps: 10, // optional, max tool call loops
|
|
197
|
+
});
|
|
85
198
|
|
|
86
|
-
|
|
199
|
+
// Run a prompt
|
|
200
|
+
const result = await agent.prompt("Add 2 and 3");
|
|
87
201
|
|
|
88
|
-
|
|
202
|
+
// Multi-turn with context
|
|
203
|
+
const r1 = await agent.prompt("Who am I?");
|
|
204
|
+
const r2 = await agent.prompt("List my projects", { context: [r1] });
|
|
205
|
+
```
|
|
89
206
|
|
|
90
|
-
|
|
207
|
+
**Supported providers:** `openai`, `anthropic`, `azure`, `google`, `mistral`, `deepseek`, `ollama`, `openrouter`, `xai`
|
|
91
208
|
|
|
92
|
-
|
|
209
|
+
</details>
|
|
93
210
|
|
|
94
|
-
|
|
95
|
-
|
|
211
|
+
<details>
|
|
212
|
+
<summary><strong>PromptResult</strong></summary>
|
|
213
|
+
|
|
214
|
+
Returned by `agent.prompt()`. Contains the LLM response and tool calls.
|
|
215
|
+
|
|
216
|
+
```ts
|
|
217
|
+
const result = await agent.prompt("Add 2 and 3");
|
|
218
|
+
|
|
219
|
+
// Tool calls
|
|
220
|
+
result.hasToolCall("add"); // boolean
|
|
221
|
+
result.toolsCalled(); // ["add"]
|
|
222
|
+
result.getToolCalls(); // [{ toolName: "add", arguments: { a: 2, b: 3 } }]
|
|
223
|
+
result.getToolArguments("add"); // { a: 2, b: 3 }
|
|
224
|
+
|
|
225
|
+
// Response
|
|
226
|
+
result.text; // "The result is 5"
|
|
227
|
+
|
|
228
|
+
// Messages (full conversation)
|
|
229
|
+
result.getMessages(); // CoreMessage[]
|
|
230
|
+
result.getUserMessages(); // user messages only
|
|
231
|
+
result.getAssistantMessages(); // assistant messages only
|
|
232
|
+
result.getToolMessages(); // tool result messages only
|
|
233
|
+
|
|
234
|
+
// Latency
|
|
235
|
+
result.e2eLatencyMs(); // total wall-clock time
|
|
236
|
+
result.llmLatencyMs(); // LLM API time
|
|
237
|
+
result.mcpLatencyMs(); // MCP tool execution time
|
|
238
|
+
|
|
239
|
+
// Tokens
|
|
240
|
+
result.totalTokens();
|
|
241
|
+
result.inputTokens();
|
|
242
|
+
result.outputTokens();
|
|
243
|
+
|
|
244
|
+
// Errors
|
|
245
|
+
result.hasError();
|
|
246
|
+
result.getError();
|
|
247
|
+
|
|
248
|
+
// Debug trace (JSON dump of messages)
|
|
249
|
+
result.formatTrace();
|
|
96
250
|
```
|
|
97
251
|
|
|
98
|
-
|
|
252
|
+
</details>
|
|
99
253
|
|
|
100
|
-
|
|
254
|
+
<details>
|
|
255
|
+
<summary><strong>EvalTest</strong></summary>
|
|
101
256
|
|
|
102
|
-
|
|
257
|
+
Runs a single test scenario with multiple iterations.
|
|
103
258
|
|
|
104
|
-
```
|
|
105
|
-
|
|
259
|
+
```ts
|
|
260
|
+
const test = new EvalTest({
|
|
261
|
+
name: "addition",
|
|
262
|
+
test: async (agent) => {
|
|
263
|
+
const result = await agent.prompt("Add 2 and 3");
|
|
264
|
+
return result.hasToolCall("add");
|
|
265
|
+
},
|
|
266
|
+
});
|
|
267
|
+
|
|
268
|
+
await test.run(agent, {
|
|
269
|
+
iterations: 30,
|
|
270
|
+
concurrency: 5, // parallel iterations (default: 5)
|
|
271
|
+
retries: 2, // retry failed iterations (default: 0)
|
|
272
|
+
timeoutMs: 30000, // timeout per iteration (default: 30000)
|
|
273
|
+
onProgress: (completed, total) => console.log(`${completed}/${total}`),
|
|
274
|
+
onFailure: (report) => console.error(report), // called if any iteration fails
|
|
275
|
+
});
|
|
276
|
+
|
|
277
|
+
// Metrics
|
|
278
|
+
test.accuracy(); // success rate (0-1)
|
|
279
|
+
test.averageTokenUse(); // avg tokens per iteration
|
|
280
|
+
|
|
281
|
+
// Iteration details
|
|
282
|
+
test.getAllIterations(); // all iteration results
|
|
283
|
+
test.getFailedIterations(); // failed iterations only
|
|
284
|
+
test.getSuccessfulIterations(); // successful iterations only
|
|
285
|
+
test.getFailureReport(); // formatted string of failed traces
|
|
106
286
|
```
|
|
107
287
|
|
|
108
|
-
|
|
288
|
+
</details>
|
|
109
289
|
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
- **🔧 GitHub**: [MCPJam Inspector Repository](https://github.com/MCPJam/inspector)
|
|
290
|
+
<details>
|
|
291
|
+
<summary><strong>EvalSuite</strong></summary>
|
|
113
292
|
|
|
114
|
-
|
|
293
|
+
Groups multiple `EvalTest` instances for aggregate metrics.
|
|
115
294
|
|
|
116
|
-
|
|
295
|
+
```ts
|
|
296
|
+
const suite = new EvalSuite({ name: "Math Operations" });
|
|
117
297
|
|
|
118
|
-
|
|
298
|
+
suite.add(new EvalTest({
|
|
299
|
+
name: "addition",
|
|
300
|
+
test: async (agent) => {
|
|
301
|
+
const r = await agent.prompt("Add 2+3");
|
|
302
|
+
return r.hasToolCall("add");
|
|
303
|
+
},
|
|
304
|
+
}));
|
|
119
305
|
|
|
120
|
-
|
|
306
|
+
suite.add(new EvalTest({
|
|
307
|
+
name: "multiply",
|
|
308
|
+
test: async (agent) => {
|
|
309
|
+
const r = await agent.prompt("Multiply 4*5");
|
|
310
|
+
return r.hasToolCall("multiply");
|
|
311
|
+
},
|
|
312
|
+
}));
|
|
121
313
|
|
|
122
|
-
|
|
314
|
+
await suite.run(agent, { iterations: 30 });
|
|
315
|
+
|
|
316
|
+
// Aggregate metrics
|
|
317
|
+
suite.accuracy(); // overall accuracy
|
|
318
|
+
suite.averageTokenUse();
|
|
319
|
+
|
|
320
|
+
// Individual test access
|
|
321
|
+
suite.get("addition")?.accuracy();
|
|
322
|
+
suite.get("multiply")?.accuracy();
|
|
323
|
+
suite.getAll(); // all EvalTest instances
|
|
324
|
+
```
|
|
325
|
+
|
|
326
|
+
</details>
|
|
327
|
+
|
|
328
|
+
<details>
|
|
329
|
+
<summary><strong>Validators</strong></summary>
|
|
330
|
+
|
|
331
|
+
Helper functions for matching tool calls.
|
|
332
|
+
|
|
333
|
+
```ts
|
|
334
|
+
import {
|
|
335
|
+
matchToolCalls,
|
|
336
|
+
matchToolCallsSubset,
|
|
337
|
+
matchAnyToolCall,
|
|
338
|
+
matchToolCallCount,
|
|
339
|
+
matchNoToolCalls,
|
|
340
|
+
matchToolCallWithArgs,
|
|
341
|
+
matchToolCallWithPartialArgs,
|
|
342
|
+
matchToolArgument,
|
|
343
|
+
matchToolArgumentWith,
|
|
344
|
+
} from "@mcpjam/sdk";
|
|
345
|
+
|
|
346
|
+
const tools = result.toolsCalled(); // ["add", "multiply"]
|
|
347
|
+
const calls = result.getToolCalls(); // ToolCall[]
|
|
348
|
+
|
|
349
|
+
// Exact match (order matters)
|
|
350
|
+
matchToolCalls(["add", "multiply"], tools); // true
|
|
351
|
+
matchToolCalls(["multiply", "add"], tools); // false
|
|
352
|
+
|
|
353
|
+
// Subset match (order doesn't matter)
|
|
354
|
+
matchToolCallsSubset(["add"], tools); // true
|
|
355
|
+
|
|
356
|
+
// Any match (at least one)
|
|
357
|
+
matchAnyToolCall(["add", "subtract"], tools); // true
|
|
358
|
+
|
|
359
|
+
// Count match
|
|
360
|
+
matchToolCallCount("add", tools, 1); // true
|
|
361
|
+
|
|
362
|
+
// No tools called
|
|
363
|
+
matchNoToolCalls([]); // true
|
|
364
|
+
|
|
365
|
+
// Argument matching
|
|
366
|
+
matchToolCallWithArgs("add", { a: 2, b: 3 }, calls); // exact match
|
|
367
|
+
matchToolCallWithPartialArgs("add", { a: 2 }, calls); // partial match
|
|
368
|
+
matchToolArgument("add", "a", 2, calls); // single arg
|
|
369
|
+
matchToolArgumentWith("add", "a", (v) => v > 0, calls); // predicate
|
|
370
|
+
```
|
|
123
371
|
|
|
124
|
-
|
|
372
|
+
</details>
|