@mcpjam/sdk 0.1.4 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -1,124 +1,372 @@
1
1
  # @mcpjam/sdk
2
2
 
3
- The official MCPJam SDK provides utilities for building, testing, and developing MCP clients and servers. Built on top of the [@modelcontextprotocol/sdk](https://github.com/modelcontextprotocol/sdk), it offers high-level abstractions and tools to accelerate MCP development.
3
+ Use the MCPJam SDK to write unit tests and evals for your MCP server.
4
4
 
5
- [![npm version](https://img.shields.io/npm/v/@mcpjam/sdk?style=for-the-badge&color=blue)](https://www.npmjs.com/package/@mcpjam/sdk)
6
- [![License: Apache 2.0](https://img.shields.io/badge/License-Apache%202.0-blue.svg?style=for-the-badge)](https://opensource.org/licenses/Apache-2.0)
7
-
8
- # Installation
5
+ ## Installation
9
6
 
10
7
  ```bash
11
8
  npm install @mcpjam/sdk
12
9
  ```
13
10
 
14
- # Key Features
11
+ Compatible with your favorite testing framework like [Jest](https://jestjs.io/) and [Vitest](https://vitest.dev/)
15
12
 
16
- ## MCPClientManager
13
+ ## Quick Start
17
14
 
18
- The primary utility in the SDK is `MCPClientManager`, a powerful client manager for connecting to and interacting with MCP servers:
15
+ ### Unit Test
19
16
 
20
- - **Multi-server support** - Manage multiple MCP server connections simultaneously
21
- - **All transports** - STDIO, HTTP/SSE, and Streamable HTTP support
22
- - **Lifecycle management** - Automatic connection handling and cleanup
23
- - **Tools, resources, prompts** - Full MCP protocol support including elicitation
24
- - **Agent framework integration** - Built-in adapters for Vercel AI SDK and other popular libraries
25
- - **OAuth & authentication** - Bearer token and custom header support
17
+ Test the individual parts, request response flow of your MCP server. MCP unit tests are deterministic.
26
18
 
27
- ### Use Cases
19
+ ```ts
20
+ import { MCPClientManager } from "@mcpjam/sdk";
28
21
 
29
- The SDK is designed for:
22
+ describe("Everything MCP example", () => {
23
+ let manager: MCPClientManager;
24
+
25
+ beforeAll(async () => {
26
+ manager = new MCPClientManager();
27
+ await manager.connectToServer("everything", {
28
+ command: "npx",
29
+ args: ["-y", "@modelcontextprotocol/server-everything"],
30
+ });
31
+ });
32
+
33
+ afterAll(async () => {
34
+ await manager.disconnectServer("everything");
35
+ });
36
+
37
+ test("server has expected tools", async () => {
38
+ const tools = await manager.listTools("everything");
39
+ expect(tools.tools.map((t) => t.name)).toContain("get-sum");
40
+ });
41
+
42
+ test("get-sum tool returns correct result", async () => {
43
+ const result = await manager.executeTool("everything", "get-sum", { a: 2, b: 3 });
44
+ expect(result.content[0].text).toBe("5");
45
+ });
46
+ });
47
+ ```
30
48
 
31
- - **Building AI agents** - Connect agents to MCP servers for tool access
32
- - **Creating MCP clients** - Build custom clients with full protocol support
33
- - **Testing MCP servers** - Write unit tests and E2E tests for your servers
34
- - **LLM applications** - Add MCP support to chat applications and AI workflows
49
+ ### MCP evals
35
50
 
36
- ### Quick Start
51
+ Test that an LLM correctly understands how to use your MCP server. Evals are non-deterministic and multiple runs are needed.
37
52
 
38
53
  ```ts
39
- import { MCPClientManager } from "@mcpjam/sdk";
40
-
41
- // Initialize with server configurations
42
- const manager = new MCPClientManager({
43
- // STDIO server
44
- filesystem: {
45
- command: "npx",
46
- args: ["-y", "@modelcontextprotocol/server-filesystem", "/tmp"],
47
- },
48
- // HTTP/SSE server with authentication
49
- asana: {
50
- url: new URL("https://mcp.asana.com/sse"),
51
- requestInit: {
52
- headers: {
53
- Authorization: "Bearer YOUR_TOKEN",
54
+ import { MCPClientManager, TestAgent, EvalTest } from "@mcpjam/sdk";
55
+
56
+ describe("Asana MCP Evals", () => {
57
+ let manager: MCPClientManager;
58
+ let agent: TestAgent;
59
+
60
+ beforeAll(async () => {
61
+ manager = new MCPClientManager();
62
+ await manager.connectToServer("asana", {
63
+ url: "https://mcp.asana.com/sse",
64
+ requestInit: {
65
+ headers: { Authorization: `Bearer ${process.env.ASANA_TOKEN}` },
54
66
  },
55
- },
56
- },
57
- });
67
+ });
68
+
69
+ agent = new TestAgent({
70
+ tools: await manager.getToolsForAiSdk(["asana"]),
71
+ model: "openai/gpt-4o",
72
+ apiKey: process.env.OPENAI_API_KEY!,
73
+ });
74
+ });
75
+
76
+ afterAll(async () => {
77
+ await manager.disconnectServer("asana");
78
+ });
79
+
80
+ // Single-turn eval
81
+ test("list workspaces > 80% accuracy", async () => {
82
+ const evalTest = new EvalTest({
83
+ name: "list-workspaces",
84
+ test: async (agent) => {
85
+ const result = await agent.prompt("Show me all my Asana workspaces");
86
+ return result.hasToolCall("asana_list_workspaces");
87
+ },
88
+ });
89
+
90
+ await evalTest.run(agent, {
91
+ iterations: 10,
92
+ onFailure: (report) => console.error(report), // Print the report when a test iteration fails.
93
+ });
94
+
95
+ expect(evalTest.accuracy()).toBeGreaterThan(0.8); // Pass threshold
96
+ });
97
+
98
+ // Multi-turn eval
99
+ test("get user then list projects > 80% accuracy", async () => {
100
+ const evalTest = new EvalTest({
101
+ name: "user-then-projects",
102
+ test: async (agent) => {
103
+ const r1 = await agent.prompt("Who am I in Asana?");
104
+ if (!r1.hasToolCall("asana_get_user")) return false;
105
+
106
+ const r2 = await agent.prompt("Now list my projects", { context: [r1] }); // Continue the conversation from the previous prompt
107
+ return r2.hasToolCall("asana_get_projects");
108
+ },
109
+ });
110
+
111
+ await evalTest.run(agent, {
112
+ iterations: 5,
113
+ onFailure: (report) => console.error(report),
114
+ });
115
+
116
+ expect(evalTest.accuracy()).toBeGreaterThan(0.8);
117
+ });
118
+
119
+ // Validating tool arguments
120
+ test("search tasks passes correct workspace_gid", async () => {
121
+ const evalTest = new EvalTest({
122
+ name: "search-args",
123
+ test: async (agent) => {
124
+ const result = await agent.prompt("Search for tasks containing 'bug' in my workspace");
125
+ const args = result.getToolArguments("asana_search_tasks");
126
+ return result.hasToolCall("asana_search_tasks") && typeof args?.workspace_gid === "string";
127
+ },
128
+ });
129
+
130
+ await evalTest.run(agent, {
131
+ iterations: 5,
132
+ onFailure: (report) => console.error(report),
133
+ });
58
134
 
59
- // List and execute tools
60
- const tools = await manager.getTools(["filesystem"]);
61
- const result = await manager.executeTool("filesystem", "read_file", {
62
- path: "/tmp/example.txt",
135
+ expect(evalTest.accuracy()).toBeGreaterThan(0.8);
136
+ });
63
137
  });
138
+ ```
139
+
140
+ ---
141
+
142
+ ## API Reference
143
+
144
+ <details>
145
+ <summary><strong>MCPClientManager</strong></summary>
146
+
147
+ Manages connections to one or more MCP servers.
148
+
149
+ ```ts
150
+ const manager = new MCPClientManager();
64
151
 
65
- // Integrate with Vercel AI SDK
66
- import { generateText } from "ai";
67
- import { openai } from "@ai-sdk/openai";
152
+ // Connect to STDIO server
153
+ await manager.connectToServer("everything", {
154
+ command: "npx",
155
+ args: ["-y", "@modelcontextprotocol/server-everything"],
156
+ });
68
157
 
69
- const response = await generateText({
70
- model: openai("gpt-4o-mini"),
71
- tools: manager.getToolsForAiSdk(),
72
- messages: [{ role: "user", content: "List files in /tmp" }],
158
+ // Connect to HTTP/SSE server
159
+ await manager.connectToServer("asana", {
160
+ url: "https://mcp.asana.com/sse",
161
+ requestInit: {
162
+ headers: { Authorization: "Bearer TOKEN" },
163
+ },
73
164
  });
165
+
166
+ // Get tools for AI SDK integration
167
+ const tools = await manager.getToolsForAiSdk(["everything", "asana"]);
168
+
169
+ // Direct MCP operations
170
+ await manager.listTools("everything");
171
+ await manager.executeTool("everything", "add", { a: 1, b: 2 });
172
+ await manager.listResources("everything");
173
+ await manager.readResource("everything", { uri: "file:///tmp/test.txt" });
174
+ await manager.listPrompts("everything");
175
+ await manager.getPrompt("everything", { name: "greeting" });
176
+ await manager.pingServer("everything");
177
+
178
+ // Disconnect
179
+ await manager.disconnectServer("everything");
74
180
  ```
75
181
 
76
- ## Documentation
182
+ </details>
183
+
184
+ <details>
185
+ <summary><strong>TestAgent</strong></summary>
77
186
 
78
- For detailed documentation on `MCPClientManager` including:
187
+ Runs LLM prompts with MCP tool access.
79
188
 
80
- - Connection configuration (STDIO, HTTP/SSE)
81
- - Tool execution and resource management
82
- - Elicitation handling
83
- - Agent framework integrations
84
- - API reference
189
+ ```ts
190
+ const agent = new TestAgent({
191
+ tools: await manager.getToolsForAiSdk(),
192
+ model: "openai/gpt-4o", // provider/model format
193
+ apiKey: process.env.OPENAI_API_KEY!,
194
+ systemPrompt: "You are a helpful assistant.", // optional
195
+ temperature: 0.7, // optional, omit for reasoning models
196
+ maxSteps: 10, // optional, max tool call loops
197
+ });
85
198
 
86
- See the [MCPClientManager README](./mcp-client-manager/README.md).
199
+ // Run a prompt
200
+ const result = await agent.prompt("Add 2 and 3");
87
201
 
88
- ## Development
202
+ // Multi-turn with context
203
+ const r1 = await agent.prompt("Who am I?");
204
+ const r2 = await agent.prompt("List my projects", { context: [r1] });
205
+ ```
89
206
 
90
- ### Building Locally
207
+ **Supported providers:** `openai`, `anthropic`, `azure`, `google`, `mistral`, `deepseek`, `ollama`, `openrouter`, `xai`
91
208
 
92
- Build the entire SDK workspace:
209
+ </details>
93
210
 
94
- ```bash
95
- npm run build
211
+ <details>
212
+ <summary><strong>PromptResult</strong></summary>
213
+
214
+ Returned by `agent.prompt()`. Contains the LLM response and tool calls.
215
+
216
+ ```ts
217
+ const result = await agent.prompt("Add 2 and 3");
218
+
219
+ // Tool calls
220
+ result.hasToolCall("add"); // boolean
221
+ result.toolsCalled(); // ["add"]
222
+ result.getToolCalls(); // [{ toolName: "add", arguments: { a: 2, b: 3 } }]
223
+ result.getToolArguments("add"); // { a: 2, b: 3 }
224
+
225
+ // Response
226
+ result.text; // "The result is 5"
227
+
228
+ // Messages (full conversation)
229
+ result.getMessages(); // CoreMessage[]
230
+ result.getUserMessages(); // user messages only
231
+ result.getAssistantMessages(); // assistant messages only
232
+ result.getToolMessages(); // tool result messages only
233
+
234
+ // Latency
235
+ result.e2eLatencyMs(); // total wall-clock time
236
+ result.llmLatencyMs(); // LLM API time
237
+ result.mcpLatencyMs(); // MCP tool execution time
238
+
239
+ // Tokens
240
+ result.totalTokens();
241
+ result.inputTokens();
242
+ result.outputTokens();
243
+
244
+ // Errors
245
+ result.hasError();
246
+ result.getError();
247
+
248
+ // Debug trace (JSON dump of messages)
249
+ result.formatTrace();
96
250
  ```
97
251
 
98
- This compiles all sub-packages including `mcp-client-manager` and generates distributable bundles.
252
+ </details>
99
253
 
100
- ### Development Mode
254
+ <details>
255
+ <summary><strong>EvalTest</strong></summary>
101
256
 
102
- Watch for changes and rebuild automatically:
257
+ Runs a single test scenario with multiple iterations.
103
258
 
104
- ```bash
105
- npm run dev
259
+ ```ts
260
+ const test = new EvalTest({
261
+ name: "addition",
262
+ test: async (agent) => {
263
+ const result = await agent.prompt("Add 2 and 3");
264
+ return result.hasToolCall("add");
265
+ },
266
+ });
267
+
268
+ await test.run(agent, {
269
+ iterations: 30,
270
+ concurrency: 5, // parallel iterations (default: 5)
271
+ retries: 2, // retry failed iterations (default: 0)
272
+ timeoutMs: 30000, // timeout per iteration (default: 30000)
273
+ onProgress: (completed, total) => console.log(`${completed}/${total}`),
274
+ onFailure: (report) => console.error(report), // called if any iteration fails
275
+ });
276
+
277
+ // Metrics
278
+ test.accuracy(); // success rate (0-1)
279
+ test.averageTokenUse(); // avg tokens per iteration
280
+
281
+ // Iteration details
282
+ test.getAllIterations(); // all iteration results
283
+ test.getFailedIterations(); // failed iterations only
284
+ test.getSuccessfulIterations(); // successful iterations only
285
+ test.getFailureReport(); // formatted string of failed traces
106
286
  ```
107
287
 
108
- ## Resources
288
+ </details>
109
289
 
110
- - **💬 Discord**: [Join the MCPJam Community](https://discord.gg/JEnDtz8X6z)
111
- - **📖 MCP Protocol**: [Model Context Protocol Documentation](https://modelcontextprotocol.io/)
112
- - **🔧 GitHub**: [MCPJam Inspector Repository](https://github.com/MCPJam/inspector)
290
+ <details>
291
+ <summary><strong>EvalSuite</strong></summary>
113
292
 
114
- ## Contributing
293
+ Groups multiple `EvalTest` instances for aggregate metrics.
115
294
 
116
- We welcome contributions! The SDK is part of the [MCPJam Inspector monorepo](https://github.com/MCPJam/inspector). Please see our [Contributing Guide](https://docs.mcpjam.com/CONTRIBUTING) for guidelines.
295
+ ```ts
296
+ const suite = new EvalSuite({ name: "Math Operations" });
117
297
 
118
- ## License
298
+ suite.add(new EvalTest({
299
+ name: "addition",
300
+ test: async (agent) => {
301
+ const r = await agent.prompt("Add 2+3");
302
+ return r.hasToolCall("add");
303
+ },
304
+ }));
119
305
 
120
- Apache License 2.0 - see the [LICENSE](../LICENSE) file for details.
306
+ suite.add(new EvalTest({
307
+ name: "multiply",
308
+ test: async (agent) => {
309
+ const r = await agent.prompt("Multiply 4*5");
310
+ return r.hasToolCall("multiply");
311
+ },
312
+ }));
121
313
 
122
- ---
314
+ await suite.run(agent, { iterations: 30 });
315
+
316
+ // Aggregate metrics
317
+ suite.accuracy(); // overall accuracy
318
+ suite.averageTokenUse();
319
+
320
+ // Individual test access
321
+ suite.get("addition")?.accuracy();
322
+ suite.get("multiply")?.accuracy();
323
+ suite.getAll(); // all EvalTest instances
324
+ ```
325
+
326
+ </details>
327
+
328
+ <details>
329
+ <summary><strong>Validators</strong></summary>
330
+
331
+ Helper functions for matching tool calls.
332
+
333
+ ```ts
334
+ import {
335
+ matchToolCalls,
336
+ matchToolCallsSubset,
337
+ matchAnyToolCall,
338
+ matchToolCallCount,
339
+ matchNoToolCalls,
340
+ matchToolCallWithArgs,
341
+ matchToolCallWithPartialArgs,
342
+ matchToolArgument,
343
+ matchToolArgumentWith,
344
+ } from "@mcpjam/sdk";
345
+
346
+ const tools = result.toolsCalled(); // ["add", "multiply"]
347
+ const calls = result.getToolCalls(); // ToolCall[]
348
+
349
+ // Exact match (order matters)
350
+ matchToolCalls(["add", "multiply"], tools); // true
351
+ matchToolCalls(["multiply", "add"], tools); // false
352
+
353
+ // Subset match (order doesn't matter)
354
+ matchToolCallsSubset(["add"], tools); // true
355
+
356
+ // Any match (at least one)
357
+ matchAnyToolCall(["add", "subtract"], tools); // true
358
+
359
+ // Count match
360
+ matchToolCallCount("add", tools, 1); // true
361
+
362
+ // No tools called
363
+ matchNoToolCalls([]); // true
364
+
365
+ // Argument matching
366
+ matchToolCallWithArgs("add", { a: 2, b: 3 }, calls); // exact match
367
+ matchToolCallWithPartialArgs("add", { a: 2 }, calls); // partial match
368
+ matchToolArgument("add", "a", 2, calls); // single arg
369
+ matchToolArgumentWith("add", "a", (v) => v > 0, calls); // predicate
370
+ ```
123
371
 
124
- **Built with ❤️ for the MCP community** • [🌐 MCPJam.com](https://mcpjam.com)
372
+ </details>