@mcpjam/sdk 0.1.3 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +326 -78
- package/dist/index.d.mts +2070 -0
- package/dist/index.d.ts +2070 -9
- package/dist/index.js +2841 -7
- package/dist/index.js.map +1 -1
- package/dist/index.mjs +2796 -0
- package/dist/index.mjs.map +1 -0
- package/package.json +62 -37
- package/dist/chunk-6XEFXCUG.js +0 -836
- package/dist/chunk-6XEFXCUG.js.map +0 -1
- package/dist/index.cjs +0 -842
- package/dist/index.cjs.map +0 -1
- package/dist/index.d.cts +0 -9
- package/dist/mcp-client-manager/index.cjs +0 -834
- package/dist/mcp-client-manager/index.cjs.map +0 -1
- package/dist/mcp-client-manager/index.d.cts +0 -1582
- package/dist/mcp-client-manager/index.d.ts +0 -1582
- package/dist/mcp-client-manager/index.js +0 -7
- package/dist/mcp-client-manager/index.js.map +0 -1
package/dist/index.d.mts
ADDED
|
@@ -0,0 +1,2070 @@
|
|
|
1
|
+
import { ClientOptions, Client } from '@modelcontextprotocol/sdk/client/index.js';
|
|
2
|
+
import { StreamableHTTPClientTransportOptions } from '@modelcontextprotocol/sdk/client/streamableHttp.js';
|
|
3
|
+
import { SSEClientTransportOptions } from '@modelcontextprotocol/sdk/client/sse.js';
|
|
4
|
+
import { RequestOptions } from '@modelcontextprotocol/sdk/shared/protocol.js';
|
|
5
|
+
import { ElicitResult, ElicitRequest, Tool as Tool$1, CallToolResult, ListToolsResult as ListToolsResult$1, ServerCapabilities, LoggingLevel } from '@modelcontextprotocol/sdk/types.js';
|
|
6
|
+
export { ElicitResult, PromptListChangedNotificationSchema, ResourceListChangedNotificationSchema, ResourceUpdatedNotificationSchema } from '@modelcontextprotocol/sdk/types.js';
|
|
7
|
+
import { ToolSet, ToolCallOptions, dynamicTool, Tool as Tool$2, CoreMessage, CoreUserMessage, CoreAssistantMessage, CoreToolMessage, GenerateTextResult } from 'ai';
|
|
8
|
+
export { CoreAssistantMessage, CoreMessage, CoreToolMessage, CoreUserMessage } from 'ai';
|
|
9
|
+
import { JSONSchema7 } from 'json-schema';
|
|
10
|
+
import { createOpenAI } from '@ai-sdk/openai';
|
|
11
|
+
|
|
12
|
+
/**
|
|
13
|
+
* TypeScript types and interfaces for MCPClientManager
|
|
14
|
+
*/
|
|
15
|
+
|
|
16
|
+
/**
|
|
17
|
+
* Client capability options extracted from MCP SDK ClientOptions
|
|
18
|
+
*/
|
|
19
|
+
type ClientCapabilityOptions = NonNullable<ClientOptions["capabilities"]>;
|
|
20
|
+
/**
|
|
21
|
+
* Base configuration shared by all server types
|
|
22
|
+
*/
|
|
23
|
+
type BaseServerConfig = {
|
|
24
|
+
/** Client capabilities to advertise to this server */
|
|
25
|
+
capabilities?: ClientCapabilityOptions;
|
|
26
|
+
/** Request timeout in milliseconds */
|
|
27
|
+
timeout?: number;
|
|
28
|
+
/** Client version to report */
|
|
29
|
+
version?: string;
|
|
30
|
+
/** Error handler for this server */
|
|
31
|
+
onError?: (error: unknown) => void;
|
|
32
|
+
/** Enable simple console logging of JSON-RPC traffic */
|
|
33
|
+
logJsonRpc?: boolean;
|
|
34
|
+
/** Custom logger for JSON-RPC traffic (overrides logJsonRpc) */
|
|
35
|
+
rpcLogger?: RpcLogger;
|
|
36
|
+
};
|
|
37
|
+
/**
|
|
38
|
+
* Configuration for stdio-based MCP servers (subprocess)
|
|
39
|
+
*/
|
|
40
|
+
type StdioServerConfig = BaseServerConfig & {
|
|
41
|
+
/** Command to execute */
|
|
42
|
+
command: string;
|
|
43
|
+
/** Command arguments */
|
|
44
|
+
args?: string[];
|
|
45
|
+
/** Environment variables */
|
|
46
|
+
env?: Record<string, string>;
|
|
47
|
+
url?: never;
|
|
48
|
+
accessToken?: never;
|
|
49
|
+
requestInit?: never;
|
|
50
|
+
eventSourceInit?: never;
|
|
51
|
+
authProvider?: never;
|
|
52
|
+
reconnectionOptions?: never;
|
|
53
|
+
sessionId?: never;
|
|
54
|
+
preferSSE?: never;
|
|
55
|
+
};
|
|
56
|
+
/**
|
|
57
|
+
* Configuration for HTTP-based MCP servers (SSE or Streamable HTTP)
|
|
58
|
+
*/
|
|
59
|
+
type HttpServerConfig = BaseServerConfig & {
|
|
60
|
+
/** Server URL */
|
|
61
|
+
url: string;
|
|
62
|
+
/**
|
|
63
|
+
* Access token for Bearer authentication.
|
|
64
|
+
* If provided, adds `Authorization: Bearer <accessToken>` header to requests.
|
|
65
|
+
*/
|
|
66
|
+
accessToken?: string;
|
|
67
|
+
/** Additional request initialization options */
|
|
68
|
+
requestInit?: StreamableHTTPClientTransportOptions["requestInit"];
|
|
69
|
+
/** SSE-specific event source options */
|
|
70
|
+
eventSourceInit?: SSEClientTransportOptions["eventSourceInit"];
|
|
71
|
+
/** OAuth auth provider */
|
|
72
|
+
authProvider?: StreamableHTTPClientTransportOptions["authProvider"];
|
|
73
|
+
/** Reconnection options for Streamable HTTP */
|
|
74
|
+
reconnectionOptions?: StreamableHTTPClientTransportOptions["reconnectionOptions"];
|
|
75
|
+
/** Session ID for Streamable HTTP */
|
|
76
|
+
sessionId?: StreamableHTTPClientTransportOptions["sessionId"];
|
|
77
|
+
/** Prefer SSE transport over Streamable HTTP */
|
|
78
|
+
preferSSE?: boolean;
|
|
79
|
+
command?: never;
|
|
80
|
+
args?: never;
|
|
81
|
+
env?: never;
|
|
82
|
+
};
|
|
83
|
+
/**
|
|
84
|
+
* Union type for all server configurations
|
|
85
|
+
*/
|
|
86
|
+
type MCPServerConfig = StdioServerConfig | HttpServerConfig;
|
|
87
|
+
/**
|
|
88
|
+
* Configuration map for multiple servers (serverId -> config)
|
|
89
|
+
*/
|
|
90
|
+
type MCPClientManagerConfig = Record<string, MCPServerConfig>;
|
|
91
|
+
/**
|
|
92
|
+
* Connection status for a server
|
|
93
|
+
*/
|
|
94
|
+
type MCPConnectionStatus = "connected" | "connecting" | "disconnected";
|
|
95
|
+
/**
|
|
96
|
+
* Summary information for a server
|
|
97
|
+
*/
|
|
98
|
+
type ServerSummary = {
|
|
99
|
+
id: string;
|
|
100
|
+
status: MCPConnectionStatus;
|
|
101
|
+
config?: MCPServerConfig;
|
|
102
|
+
};
|
|
103
|
+
/**
|
|
104
|
+
* Event passed to RPC loggers
|
|
105
|
+
*/
|
|
106
|
+
type RpcLogEvent = {
|
|
107
|
+
direction: "send" | "receive";
|
|
108
|
+
message: unknown;
|
|
109
|
+
serverId: string;
|
|
110
|
+
};
|
|
111
|
+
/**
|
|
112
|
+
* Function type for JSON-RPC logging
|
|
113
|
+
*/
|
|
114
|
+
type RpcLogger = (event: RpcLogEvent) => void;
|
|
115
|
+
/**
|
|
116
|
+
* Progress event from server operations
|
|
117
|
+
*/
|
|
118
|
+
type ProgressEvent = {
|
|
119
|
+
serverId: string;
|
|
120
|
+
progressToken: string | number;
|
|
121
|
+
progress: number;
|
|
122
|
+
total?: number;
|
|
123
|
+
message?: string;
|
|
124
|
+
};
|
|
125
|
+
/**
|
|
126
|
+
* Function type for progress handling
|
|
127
|
+
*/
|
|
128
|
+
type ProgressHandler = (event: ProgressEvent) => void;
|
|
129
|
+
/**
|
|
130
|
+
* Options for MCPClientManager constructor
|
|
131
|
+
*/
|
|
132
|
+
interface MCPClientManagerOptions {
|
|
133
|
+
/** Default client name to report to servers */
|
|
134
|
+
defaultClientName?: string;
|
|
135
|
+
/** Default client version to report */
|
|
136
|
+
defaultClientVersion?: string;
|
|
137
|
+
/** Default capabilities to advertise */
|
|
138
|
+
defaultCapabilities?: ClientCapabilityOptions;
|
|
139
|
+
/** Default request timeout in milliseconds */
|
|
140
|
+
defaultTimeout?: number;
|
|
141
|
+
/** Enable JSON-RPC logging for all servers by default */
|
|
142
|
+
defaultLogJsonRpc?: boolean;
|
|
143
|
+
/** Global JSON-RPC logger */
|
|
144
|
+
rpcLogger?: RpcLogger;
|
|
145
|
+
/** Global progress handler */
|
|
146
|
+
progressHandler?: ProgressHandler;
|
|
147
|
+
}
|
|
148
|
+
/**
|
|
149
|
+
* Arguments passed to tool execution
|
|
150
|
+
*/
|
|
151
|
+
type ExecuteToolArguments = Record<string, unknown>;
|
|
152
|
+
/**
|
|
153
|
+
* Options for task-augmented tool calls
|
|
154
|
+
*/
|
|
155
|
+
type TaskOptions = {
|
|
156
|
+
/** Time-to-live for the task in milliseconds */
|
|
157
|
+
ttl?: number;
|
|
158
|
+
};
|
|
159
|
+
/**
|
|
160
|
+
* Handler for server-specific elicitation requests
|
|
161
|
+
*/
|
|
162
|
+
type ElicitationHandler = (params: ElicitRequest["params"]) => Promise<ElicitResult> | ElicitResult;
|
|
163
|
+
/**
|
|
164
|
+
* Request passed to global elicitation callback
|
|
165
|
+
*/
|
|
166
|
+
type ElicitationCallbackRequest = {
|
|
167
|
+
requestId: string;
|
|
168
|
+
message: string;
|
|
169
|
+
schema: unknown;
|
|
170
|
+
/** Task ID if this elicitation is related to a task (MCP Tasks spec 2025-11-25) */
|
|
171
|
+
relatedTaskId?: string;
|
|
172
|
+
};
|
|
173
|
+
/**
|
|
174
|
+
* Global callback for handling elicitation requests
|
|
175
|
+
*/
|
|
176
|
+
type ElicitationCallback = (request: ElicitationCallbackRequest) => Promise<ElicitResult> | ElicitResult;
|
|
177
|
+
/**
|
|
178
|
+
* Task status values
|
|
179
|
+
*/
|
|
180
|
+
type MCPTaskStatus = "working" | "input_required" | "completed" | "failed" | "cancelled";
|
|
181
|
+
/**
|
|
182
|
+
* MCP Task object
|
|
183
|
+
*/
|
|
184
|
+
type MCPTask = {
|
|
185
|
+
taskId: string;
|
|
186
|
+
status: MCPTaskStatus;
|
|
187
|
+
statusMessage?: string;
|
|
188
|
+
createdAt: string;
|
|
189
|
+
lastUpdatedAt: string;
|
|
190
|
+
ttl: number | null;
|
|
191
|
+
pollInterval?: number;
|
|
192
|
+
};
|
|
193
|
+
/**
|
|
194
|
+
* Result from listing tasks
|
|
195
|
+
*/
|
|
196
|
+
type MCPListTasksResult = {
|
|
197
|
+
tasks: MCPTask[];
|
|
198
|
+
nextCursor?: string;
|
|
199
|
+
};
|
|
200
|
+
type ClientRequestOptions = RequestOptions;
|
|
201
|
+
type ListResourcesParams = Parameters<Client["listResources"]>[0];
|
|
202
|
+
type ListResourceTemplatesParams = Parameters<Client["listResourceTemplates"]>[0];
|
|
203
|
+
type ReadResourceParams = Parameters<Client["readResource"]>[0];
|
|
204
|
+
type SubscribeResourceParams = Parameters<Client["subscribeResource"]>[0];
|
|
205
|
+
type UnsubscribeResourceParams = Parameters<Client["unsubscribeResource"]>[0];
|
|
206
|
+
type ListPromptsParams = Parameters<Client["listPrompts"]>[0];
|
|
207
|
+
type GetPromptParams = Parameters<Client["getPrompt"]>[0];
|
|
208
|
+
type ListToolsResult = Awaited<ReturnType<Client["listTools"]>>;
|
|
209
|
+
type MCPPromptListResult = Awaited<ReturnType<Client["listPrompts"]>>;
|
|
210
|
+
type MCPPrompt = MCPPromptListResult["prompts"][number];
|
|
211
|
+
type MCPGetPromptResult = Awaited<ReturnType<Client["getPrompt"]>>;
|
|
212
|
+
type MCPResourceListResult = Awaited<ReturnType<Client["listResources"]>>;
|
|
213
|
+
type MCPResource = MCPResourceListResult["resources"][number];
|
|
214
|
+
type MCPReadResourceResult = Awaited<ReturnType<Client["readResource"]>>;
|
|
215
|
+
type MCPResourceTemplateListResult = Awaited<ReturnType<Client["listResourceTemplates"]>>;
|
|
216
|
+
type MCPResourceTemplate = MCPResourceTemplateListResult["resourceTemplates"][number];
|
|
217
|
+
type MCPServerSummary = ServerSummary;
|
|
218
|
+
/**
|
|
219
|
+
* An MCP tool with an execute function pre-wired to call the manager.
|
|
220
|
+
* Extends the official MCP SDK Tool type.
|
|
221
|
+
* Returned by MCPClientManager.getTools().
|
|
222
|
+
*/
|
|
223
|
+
/** Options for tool execution */
|
|
224
|
+
interface ToolExecuteOptions {
|
|
225
|
+
/** Abort signal for cancellation */
|
|
226
|
+
signal?: AbortSignal;
|
|
227
|
+
}
|
|
228
|
+
interface Tool extends Tool$1 {
|
|
229
|
+
/** Execute the tool with the given arguments */
|
|
230
|
+
execute: (args: Record<string, unknown>, options?: ToolExecuteOptions) => Promise<CallToolResult>;
|
|
231
|
+
_meta?: {
|
|
232
|
+
_serverId: string;
|
|
233
|
+
[key: string]: unknown;
|
|
234
|
+
};
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
/**
|
|
238
|
+
* AI SDK compatible tool set (Record<string, CoreTool>).
|
|
239
|
+
* Returned by MCPClientManager.getToolsForAiSdk().
|
|
240
|
+
* Can be passed directly to AI SDK's generateText().
|
|
241
|
+
*/
|
|
242
|
+
type AiSdkTool = ToolSet;
|
|
243
|
+
|
|
244
|
+
/**
|
|
245
|
+
* Notification handler management for MCPClientManager
|
|
246
|
+
*/
|
|
247
|
+
|
|
248
|
+
type NotificationSchema = Parameters<Client["setNotificationHandler"]>[0];
|
|
249
|
+
type NotificationHandler = Parameters<Client["setNotificationHandler"]>[1];
|
|
250
|
+
|
|
251
|
+
/**
|
|
252
|
+
* Tool conversion utilities for integrating MCP tools with Vercel AI SDK
|
|
253
|
+
*/
|
|
254
|
+
|
|
255
|
+
/**
|
|
256
|
+
* Normalizes a schema to a valid JSON Schema object.
|
|
257
|
+
* Many MCP tools omit the top-level type; Anthropic requires an object schema.
|
|
258
|
+
*
|
|
259
|
+
* @param schema - The input schema (may be incomplete)
|
|
260
|
+
* @returns A normalized JSONSchema7 object
|
|
261
|
+
*/
|
|
262
|
+
declare function ensureJsonSchemaObject(schema: unknown): JSONSchema7;
|
|
263
|
+
/**
|
|
264
|
+
* Function type for executing tool calls
|
|
265
|
+
*/
|
|
266
|
+
type CallToolExecutor = (params: {
|
|
267
|
+
name: string;
|
|
268
|
+
args: unknown;
|
|
269
|
+
options: ToolCallOptions;
|
|
270
|
+
}) => Promise<CallToolResult>;
|
|
271
|
+
/**
|
|
272
|
+
* Input schema type for tool definitions
|
|
273
|
+
*/
|
|
274
|
+
type ToolInputSchema = Parameters<typeof dynamicTool>[0]["inputSchema"];
|
|
275
|
+
/**
|
|
276
|
+
* Schema overrides for specific tools
|
|
277
|
+
* Maps tool name to custom input schema definition
|
|
278
|
+
*/
|
|
279
|
+
type ToolSchemaOverrides = Record<string, {
|
|
280
|
+
inputSchema: ToolInputSchema;
|
|
281
|
+
}>;
|
|
282
|
+
/**
|
|
283
|
+
* Result type for converted tools
|
|
284
|
+
* When explicit schemas are provided, returns typed object
|
|
285
|
+
* When "automatic", returns generic record
|
|
286
|
+
*/
|
|
287
|
+
type ConvertedToolSet<SCHEMAS extends ToolSchemaOverrides | "automatic"> = SCHEMAS extends ToolSchemaOverrides ? {
|
|
288
|
+
[K in keyof SCHEMAS]: Tool$2;
|
|
289
|
+
} : Record<string, Tool$2>;
|
|
290
|
+
/**
|
|
291
|
+
* Options for tool conversion
|
|
292
|
+
*/
|
|
293
|
+
interface ConvertOptions<TOOL_SCHEMAS extends ToolSchemaOverrides | "automatic"> {
|
|
294
|
+
/** Schema overrides or "automatic" for dynamic conversion */
|
|
295
|
+
schemas?: TOOL_SCHEMAS;
|
|
296
|
+
/** Function to execute tool calls */
|
|
297
|
+
callTool: CallToolExecutor;
|
|
298
|
+
}
|
|
299
|
+
/**
|
|
300
|
+
* Converts MCP tools to Vercel AI SDK format.
|
|
301
|
+
*
|
|
302
|
+
* @param listToolsResult - The result from listTools()
|
|
303
|
+
* @param options - Conversion options including callTool executor
|
|
304
|
+
* @returns A ToolSet compatible with Vercel AI SDK
|
|
305
|
+
*
|
|
306
|
+
* @example
|
|
307
|
+
* ```typescript
|
|
308
|
+
* const tools = await convertMCPToolsToVercelTools(listToolsResult, {
|
|
309
|
+
* callTool: async ({ name, args, options }) => {
|
|
310
|
+
* return await mcpClient.callTool({ name, arguments: args });
|
|
311
|
+
* },
|
|
312
|
+
* });
|
|
313
|
+
*
|
|
314
|
+
* // Use with Vercel AI SDK
|
|
315
|
+
* const result = await generateText({
|
|
316
|
+
* model: openai("gpt-4"),
|
|
317
|
+
* tools,
|
|
318
|
+
* messages: [{ role: "user", content: "..." }],
|
|
319
|
+
* });
|
|
320
|
+
* ```
|
|
321
|
+
*/
|
|
322
|
+
declare function convertMCPToolsToVercelTools(listToolsResult: ListToolsResult$1, { schemas, callTool, }: ConvertOptions<ToolSchemaOverrides | "automatic">): Promise<ToolSet>;
|
|
323
|
+
|
|
324
|
+
/**
|
|
325
|
+
* Manages multiple MCP server connections with support for tools, resources,
|
|
326
|
+
* prompts, notifications, elicitation, and tasks.
|
|
327
|
+
*
|
|
328
|
+
* @example
|
|
329
|
+
* ```typescript
|
|
330
|
+
* const manager = new MCPClientManager({
|
|
331
|
+
* everything: {
|
|
332
|
+
* command: "npx",
|
|
333
|
+
* args: ["-y", "@modelcontextprotocol/server-everything"],
|
|
334
|
+
* },
|
|
335
|
+
* myServer: {
|
|
336
|
+
* url: "https://my-server.com/mcp",
|
|
337
|
+
* accessToken: "my-token",
|
|
338
|
+
* },
|
|
339
|
+
* });
|
|
340
|
+
*
|
|
341
|
+
* const tools = await manager.listTools("everything");
|
|
342
|
+
* const result = await manager.executeTool("everything", "add", { a: 1, b: 2 });
|
|
343
|
+
* ```
|
|
344
|
+
*/
|
|
345
|
+
declare class MCPClientManager {
|
|
346
|
+
private readonly clientStates;
|
|
347
|
+
private readonly toolsMetadataCache;
|
|
348
|
+
private readonly notificationManager;
|
|
349
|
+
private readonly elicitationManager;
|
|
350
|
+
private readonly defaultClientName;
|
|
351
|
+
private readonly defaultClientVersion;
|
|
352
|
+
private readonly defaultCapabilities;
|
|
353
|
+
private readonly defaultTimeout;
|
|
354
|
+
private readonly defaultLogJsonRpc;
|
|
355
|
+
private readonly defaultRpcLogger?;
|
|
356
|
+
private readonly defaultProgressHandler?;
|
|
357
|
+
private progressTokenCounter;
|
|
358
|
+
/**
|
|
359
|
+
* Creates a new MCPClientManager.
|
|
360
|
+
*
|
|
361
|
+
* @param servers - Configuration map of server IDs to server configs
|
|
362
|
+
* @param options - Global options for the manager
|
|
363
|
+
*/
|
|
364
|
+
constructor(servers?: MCPClientManagerConfig, options?: MCPClientManagerOptions);
|
|
365
|
+
/**
|
|
366
|
+
* Lists all registered server IDs.
|
|
367
|
+
*/
|
|
368
|
+
listServers(): string[];
|
|
369
|
+
/**
|
|
370
|
+
* Checks if a server is registered.
|
|
371
|
+
*/
|
|
372
|
+
hasServer(serverId: string): boolean;
|
|
373
|
+
/**
|
|
374
|
+
* Gets summaries for all registered servers.
|
|
375
|
+
*/
|
|
376
|
+
getServerSummaries(): ServerSummary[];
|
|
377
|
+
/**
|
|
378
|
+
* Gets the connection status for a server.
|
|
379
|
+
*/
|
|
380
|
+
getConnectionStatus(serverId: string): MCPConnectionStatus;
|
|
381
|
+
/**
|
|
382
|
+
* Gets the configuration for a server.
|
|
383
|
+
*/
|
|
384
|
+
getServerConfig(serverId: string): MCPServerConfig | undefined;
|
|
385
|
+
/**
|
|
386
|
+
* Gets the capabilities reported by a server.
|
|
387
|
+
*/
|
|
388
|
+
getServerCapabilities(serverId: string): ServerCapabilities | undefined;
|
|
389
|
+
/**
|
|
390
|
+
* Gets the underlying MCP Client for a server.
|
|
391
|
+
*/
|
|
392
|
+
getClient(serverId: string): Client | undefined;
|
|
393
|
+
/**
|
|
394
|
+
* Gets initialization information for a connected server.
|
|
395
|
+
*/
|
|
396
|
+
getInitializationInfo(serverId: string): {
|
|
397
|
+
protocolVersion: string | undefined;
|
|
398
|
+
transport: string;
|
|
399
|
+
serverCapabilities: {
|
|
400
|
+
experimental?: {
|
|
401
|
+
[x: string]: object;
|
|
402
|
+
} | undefined;
|
|
403
|
+
logging?: object | undefined;
|
|
404
|
+
completions?: object | undefined;
|
|
405
|
+
prompts?: {
|
|
406
|
+
listChanged?: boolean | undefined;
|
|
407
|
+
} | undefined;
|
|
408
|
+
resources?: {
|
|
409
|
+
subscribe?: boolean | undefined;
|
|
410
|
+
listChanged?: boolean | undefined;
|
|
411
|
+
} | undefined;
|
|
412
|
+
tools?: {
|
|
413
|
+
listChanged?: boolean | undefined;
|
|
414
|
+
} | undefined;
|
|
415
|
+
tasks?: {
|
|
416
|
+
[x: string]: unknown;
|
|
417
|
+
list?: object | undefined;
|
|
418
|
+
cancel?: object | undefined;
|
|
419
|
+
requests?: {
|
|
420
|
+
[x: string]: unknown;
|
|
421
|
+
tools?: {
|
|
422
|
+
[x: string]: unknown;
|
|
423
|
+
call?: object | undefined;
|
|
424
|
+
} | undefined;
|
|
425
|
+
} | undefined;
|
|
426
|
+
} | undefined;
|
|
427
|
+
} | undefined;
|
|
428
|
+
serverVersion: {
|
|
429
|
+
version: string;
|
|
430
|
+
name: string;
|
|
431
|
+
websiteUrl?: string | undefined;
|
|
432
|
+
description?: string | undefined;
|
|
433
|
+
icons?: {
|
|
434
|
+
src: string;
|
|
435
|
+
mimeType?: string | undefined;
|
|
436
|
+
sizes?: string[] | undefined;
|
|
437
|
+
theme?: "light" | "dark" | undefined;
|
|
438
|
+
}[] | undefined;
|
|
439
|
+
title?: string | undefined;
|
|
440
|
+
} | undefined;
|
|
441
|
+
instructions: string | undefined;
|
|
442
|
+
clientCapabilities: {
|
|
443
|
+
experimental?: {
|
|
444
|
+
[x: string]: object;
|
|
445
|
+
} | undefined;
|
|
446
|
+
sampling?: {
|
|
447
|
+
context?: object | undefined;
|
|
448
|
+
tools?: object | undefined;
|
|
449
|
+
} | undefined;
|
|
450
|
+
elicitation?: {
|
|
451
|
+
[x: string]: unknown;
|
|
452
|
+
form?: {
|
|
453
|
+
[x: string]: unknown;
|
|
454
|
+
applyDefaults?: boolean | undefined;
|
|
455
|
+
} | undefined;
|
|
456
|
+
url?: object | undefined;
|
|
457
|
+
} | undefined;
|
|
458
|
+
roots?: {
|
|
459
|
+
listChanged?: boolean | undefined;
|
|
460
|
+
} | undefined;
|
|
461
|
+
tasks?: {
|
|
462
|
+
[x: string]: unknown;
|
|
463
|
+
list?: object | undefined;
|
|
464
|
+
cancel?: object | undefined;
|
|
465
|
+
requests?: {
|
|
466
|
+
[x: string]: unknown;
|
|
467
|
+
sampling?: {
|
|
468
|
+
[x: string]: unknown;
|
|
469
|
+
createMessage?: object | undefined;
|
|
470
|
+
} | undefined;
|
|
471
|
+
elicitation?: {
|
|
472
|
+
[x: string]: unknown;
|
|
473
|
+
create?: object | undefined;
|
|
474
|
+
} | undefined;
|
|
475
|
+
} | undefined;
|
|
476
|
+
} | undefined;
|
|
477
|
+
};
|
|
478
|
+
} | undefined;
|
|
479
|
+
/**
|
|
480
|
+
* Connects to an MCP server.
|
|
481
|
+
*
|
|
482
|
+
* @param serverId - Unique identifier for the server
|
|
483
|
+
* @param config - Server configuration
|
|
484
|
+
* @returns The connected MCP Client
|
|
485
|
+
*/
|
|
486
|
+
connectToServer(serverId: string, config: MCPServerConfig): Promise<Client>;
|
|
487
|
+
/**
|
|
488
|
+
* Disconnects from a server.
|
|
489
|
+
*/
|
|
490
|
+
disconnectServer(serverId: string): Promise<void>;
|
|
491
|
+
/**
|
|
492
|
+
* Removes a server from the manager entirely.
|
|
493
|
+
*/
|
|
494
|
+
removeServer(serverId: string): Promise<void>;
|
|
495
|
+
/**
|
|
496
|
+
* Disconnects from all servers.
|
|
497
|
+
*/
|
|
498
|
+
disconnectAllServers(): Promise<void>;
|
|
499
|
+
/**
|
|
500
|
+
* Lists tools available from a server.
|
|
501
|
+
*/
|
|
502
|
+
listTools(serverId: string, params?: Parameters<Client["listTools"]>[0], options?: ClientRequestOptions): Promise<ListToolsResult>;
|
|
503
|
+
/**
|
|
504
|
+
* Gets tools from multiple servers (or all servers if none specified).
|
|
505
|
+
* Returns tools with execute functions pre-wired to call this manager.
|
|
506
|
+
*
|
|
507
|
+
* @param serverIds - Server IDs to get tools from (or all if omitted)
|
|
508
|
+
* @returns Array of executable tools
|
|
509
|
+
*
|
|
510
|
+
* @example
|
|
511
|
+
* ```typescript
|
|
512
|
+
* const tools = await manager.getTools(["asana"]);
|
|
513
|
+
* const agent = new TestAgent({ tools, model: "openai/gpt-4o", apiKey });
|
|
514
|
+
* ```
|
|
515
|
+
*/
|
|
516
|
+
getTools(serverIds?: string[]): Promise<Tool[]>;
|
|
517
|
+
/**
|
|
518
|
+
* Gets cached tool metadata for a server.
|
|
519
|
+
*/
|
|
520
|
+
getAllToolsMetadata(serverId: string): Record<string, Record<string, any>>;
|
|
521
|
+
/**
|
|
522
|
+
* Gets tools formatted for Vercel AI SDK.
|
|
523
|
+
*
|
|
524
|
+
* @param serverIds - Server IDs to get tools from (or all if omitted)
|
|
525
|
+
* @param options - Schema options
|
|
526
|
+
* @returns AiSdkTool compatible with Vercel AI SDK's generateText()
|
|
527
|
+
*/
|
|
528
|
+
getToolsForAiSdk(serverIds?: string[] | string, options?: {
|
|
529
|
+
schemas?: ToolSchemaOverrides | "automatic";
|
|
530
|
+
}): Promise<AiSdkTool>;
|
|
531
|
+
/**
|
|
532
|
+
* Executes a tool on a server.
|
|
533
|
+
*
|
|
534
|
+
* @param serverId - The server ID
|
|
535
|
+
* @param toolName - The tool name
|
|
536
|
+
* @param args - Tool arguments
|
|
537
|
+
* @param options - Request options
|
|
538
|
+
* @param taskOptions - Task options for async execution
|
|
539
|
+
*/
|
|
540
|
+
executeTool(serverId: string, toolName: string, args?: ExecuteToolArguments, options?: ClientRequestOptions, taskOptions?: TaskOptions): Promise<{
|
|
541
|
+
[x: string]: unknown;
|
|
542
|
+
content: ({
|
|
543
|
+
type: "text";
|
|
544
|
+
text: string;
|
|
545
|
+
annotations?: {
|
|
546
|
+
audience?: ("user" | "assistant")[] | undefined;
|
|
547
|
+
priority?: number | undefined;
|
|
548
|
+
lastModified?: string | undefined;
|
|
549
|
+
} | undefined;
|
|
550
|
+
_meta?: Record<string, unknown> | undefined;
|
|
551
|
+
} | {
|
|
552
|
+
type: "image";
|
|
553
|
+
data: string;
|
|
554
|
+
mimeType: string;
|
|
555
|
+
annotations?: {
|
|
556
|
+
audience?: ("user" | "assistant")[] | undefined;
|
|
557
|
+
priority?: number | undefined;
|
|
558
|
+
lastModified?: string | undefined;
|
|
559
|
+
} | undefined;
|
|
560
|
+
_meta?: Record<string, unknown> | undefined;
|
|
561
|
+
} | {
|
|
562
|
+
type: "audio";
|
|
563
|
+
data: string;
|
|
564
|
+
mimeType: string;
|
|
565
|
+
annotations
|
|
566
|
+
/**
|
|
567
|
+
* Lists prompts available from a server.
|
|
568
|
+
*/
|
|
569
|
+
?: {
|
|
570
|
+
audience?: ("user" | "assistant")[] | undefined;
|
|
571
|
+
priority? /**
|
|
572
|
+
* Lists prompts available from a server.
|
|
573
|
+
*/: number | undefined;
|
|
574
|
+
lastModified?: string | undefined;
|
|
575
|
+
} | undefined;
|
|
576
|
+
_meta?: Record<string, unknown> | undefined;
|
|
577
|
+
} | {
|
|
578
|
+
type: "resource";
|
|
579
|
+
resource: {
|
|
580
|
+
uri: string;
|
|
581
|
+
text: string;
|
|
582
|
+
mimeType?: string | undefined;
|
|
583
|
+
_meta?: Record<string, unknown> | undefined;
|
|
584
|
+
} | {
|
|
585
|
+
uri: string;
|
|
586
|
+
blob: string;
|
|
587
|
+
mimeType?: string | undefined;
|
|
588
|
+
_meta?: Record<string, unknown> | undefined;
|
|
589
|
+
};
|
|
590
|
+
annotations?: {
|
|
591
|
+
audience?: ("user" | "assistant")[] | undefined;
|
|
592
|
+
priority?: number | undefined;
|
|
593
|
+
lastModified?: string | undefined;
|
|
594
|
+
} | undefined;
|
|
595
|
+
_meta?: Record<string, unknown> | undefined;
|
|
596
|
+
} | {
|
|
597
|
+
uri: string;
|
|
598
|
+
name: string;
|
|
599
|
+
type: "resource_link";
|
|
600
|
+
description?: string | undefined;
|
|
601
|
+
mimeType?: string | undefined;
|
|
602
|
+
annotations?: {
|
|
603
|
+
audience?: ("user" | "assistant")[] | undefined;
|
|
604
|
+
priority?: number | undefined;
|
|
605
|
+
lastModified?: string | undefined;
|
|
606
|
+
} | undefined;
|
|
607
|
+
_meta?: {
|
|
608
|
+
[x: string]: unknown;
|
|
609
|
+
} | undefined;
|
|
610
|
+
icons?: {
|
|
611
|
+
src: string;
|
|
612
|
+
mimeType?: string | undefined;
|
|
613
|
+
sizes?: string[] | undefined;
|
|
614
|
+
theme?: "light" | "dark" | undefined;
|
|
615
|
+
}[] | undefined;
|
|
616
|
+
title?: string | undefined;
|
|
617
|
+
})[];
|
|
618
|
+
_meta?: {
|
|
619
|
+
[x: string]: unknown;
|
|
620
|
+
progressToken?: string | number | undefined;
|
|
621
|
+
"io.modelcontextprotocol/related-task"?: {
|
|
622
|
+
taskId: string;
|
|
623
|
+
} | undefined;
|
|
624
|
+
} | undefined;
|
|
625
|
+
structuredContent?: Record<string, unknown> | undefined;
|
|
626
|
+
isError?: boolean | undefined;
|
|
627
|
+
} | {
|
|
628
|
+
[x: string]: unknown;
|
|
629
|
+
toolResult: unknown;
|
|
630
|
+
_meta?: {
|
|
631
|
+
[x: string]: unknown;
|
|
632
|
+
progressToken?: string | number | undefined;
|
|
633
|
+
"io.modelcontextprotocol/related-task"?: {
|
|
634
|
+
taskId: string;
|
|
635
|
+
} | undefined;
|
|
636
|
+
} | undefined;
|
|
637
|
+
} | {
|
|
638
|
+
task: {
|
|
639
|
+
taskId: string;
|
|
640
|
+
status: "working" | "input_required" | "completed" | "failed" | "cancelled";
|
|
641
|
+
ttl: number | null;
|
|
642
|
+
createdAt: string;
|
|
643
|
+
lastUpdatedAt: string;
|
|
644
|
+
pollInterval?: number | undefined;
|
|
645
|
+
statusMessage?: string | undefined;
|
|
646
|
+
};
|
|
647
|
+
_meta: {
|
|
648
|
+
"io.modelcontextprotocol/model-immediate-response": string;
|
|
649
|
+
};
|
|
650
|
+
}>;
|
|
651
|
+
/**
|
|
652
|
+
* Lists resources available from a server.
|
|
653
|
+
*/
|
|
654
|
+
listResources(serverId: string, params?: ListResourcesParams, options?: ClientRequestOptions): Promise<{
|
|
655
|
+
[x: string]: unknown;
|
|
656
|
+
resources: {
|
|
657
|
+
uri: string;
|
|
658
|
+
name: string;
|
|
659
|
+
description?: string | undefined;
|
|
660
|
+
mimeType?: string | undefined;
|
|
661
|
+
annotations?: {
|
|
662
|
+
audience?: ("user" | "assistant")[] | undefined;
|
|
663
|
+
priority?: number | undefined;
|
|
664
|
+
lastModified?: string | undefined;
|
|
665
|
+
} | undefined;
|
|
666
|
+
_meta?: {
|
|
667
|
+
[x: string]: unknown;
|
|
668
|
+
} | undefined;
|
|
669
|
+
icons?: {
|
|
670
|
+
src: string;
|
|
671
|
+
mimeType?: string | undefined;
|
|
672
|
+
sizes?: string[] | undefined;
|
|
673
|
+
theme?: "light" | "dark" | undefined;
|
|
674
|
+
}[] | undefined;
|
|
675
|
+
title?: string | undefined;
|
|
676
|
+
}[];
|
|
677
|
+
_meta?: {
|
|
678
|
+
[x: string]: unknown;
|
|
679
|
+
progressToken?: string | number | undefined;
|
|
680
|
+
"io.modelcontextprotocol/related-task"?: {
|
|
681
|
+
taskId: string;
|
|
682
|
+
} | undefined;
|
|
683
|
+
} | undefined;
|
|
684
|
+
nextCursor?: string | undefined;
|
|
685
|
+
}>;
|
|
686
|
+
/**
|
|
687
|
+
* Reads a resource from a server.
|
|
688
|
+
*/
|
|
689
|
+
readResource(serverId: string, params: ReadResourceParams, options?: ClientRequestOptions): Promise<{
|
|
690
|
+
[x: string]: unknown;
|
|
691
|
+
contents: ({
|
|
692
|
+
uri: string;
|
|
693
|
+
text: string;
|
|
694
|
+
mimeType?: string | undefined;
|
|
695
|
+
_meta?: Record<string, unknown> | undefined;
|
|
696
|
+
} | {
|
|
697
|
+
uri: string;
|
|
698
|
+
blob: string;
|
|
699
|
+
mimeType?: string | undefined;
|
|
700
|
+
_meta
|
|
701
|
+
/**
|
|
702
|
+
* Lists resources available from a server.
|
|
703
|
+
*/
|
|
704
|
+
?: Record<string, unknown> | undefined;
|
|
705
|
+
})[];
|
|
706
|
+
_meta?: {
|
|
707
|
+
[x: string]: unknown;
|
|
708
|
+
progressToken?: string | number | undefined;
|
|
709
|
+
"io.modelcontextprotocol/related-task"?: {
|
|
710
|
+
taskId: string;
|
|
711
|
+
} | undefined;
|
|
712
|
+
} | undefined;
|
|
713
|
+
}>;
|
|
714
|
+
/**
|
|
715
|
+
* Subscribes to resource updates.
|
|
716
|
+
*/
|
|
717
|
+
subscribeResource(serverId: string, params: SubscribeResourceParams, options?: ClientRequestOptions): Promise<{
|
|
718
|
+
_meta?: {
|
|
719
|
+
[x: string]: unknown;
|
|
720
|
+
progressToken?: string | number | undefined;
|
|
721
|
+
"io.modelcontextprotocol/related-task"?: {
|
|
722
|
+
taskId: string;
|
|
723
|
+
} | undefined;
|
|
724
|
+
} | undefined;
|
|
725
|
+
}>;
|
|
726
|
+
/**
|
|
727
|
+
* Unsubscribes from resource updates.
|
|
728
|
+
*/
|
|
729
|
+
unsubscribeResource(serverId: string, params: UnsubscribeResourceParams, options?: ClientRequestOptions): Promise<{
|
|
730
|
+
_meta?: {
|
|
731
|
+
[x: string]: unknown;
|
|
732
|
+
progressToken?: string | number | undefined;
|
|
733
|
+
"io.modelcontextprotocol/related-task"?: {
|
|
734
|
+
taskId: string;
|
|
735
|
+
} | undefined;
|
|
736
|
+
} | undefined;
|
|
737
|
+
}>;
|
|
738
|
+
/**
|
|
739
|
+
* Lists resource templates from a server.
|
|
740
|
+
*/
|
|
741
|
+
listResourceTemplates(serverId: string, params?: ListResourceTemplatesParams, options?: ClientRequestOptions): Promise<{
|
|
742
|
+
[x: string]: unknown;
|
|
743
|
+
resourceTemplates: {
|
|
744
|
+
uriTemplate: string;
|
|
745
|
+
name: string;
|
|
746
|
+
description?: string | undefined;
|
|
747
|
+
mimeType?: string | undefined;
|
|
748
|
+
annotations?: {
|
|
749
|
+
audience?: ("user" | "assistant")[] | undefined;
|
|
750
|
+
priority?: number | undefined;
|
|
751
|
+
lastModified?: string | undefined;
|
|
752
|
+
} | undefined;
|
|
753
|
+
_meta?: {
|
|
754
|
+
[x: string]: unknown;
|
|
755
|
+
} | undefined;
|
|
756
|
+
icons?: {
|
|
757
|
+
src: string;
|
|
758
|
+
mimeType?: string | undefined;
|
|
759
|
+
sizes?: string[] | undefined;
|
|
760
|
+
theme?: "light" | "dark" | undefined;
|
|
761
|
+
}[] | undefined;
|
|
762
|
+
title?: string | undefined;
|
|
763
|
+
}[];
|
|
764
|
+
_meta?: {
|
|
765
|
+
[x: string]: unknown;
|
|
766
|
+
progressToken?: string | number | undefined;
|
|
767
|
+
"io.modelcontextprotocol/related-task"?: {
|
|
768
|
+
taskId: string;
|
|
769
|
+
} | undefined;
|
|
770
|
+
} | undefined;
|
|
771
|
+
nextCursor?: string | undefined;
|
|
772
|
+
}>;
|
|
773
|
+
/**
|
|
774
|
+
* Lists prompts available from a server.
|
|
775
|
+
*/
|
|
776
|
+
listPrompts(serverId: string, params?: ListPromptsParams, options?: ClientRequestOptions): Promise<{
|
|
777
|
+
[x: string]: unknown;
|
|
778
|
+
prompts: {
|
|
779
|
+
name: string;
|
|
780
|
+
description?: string | undefined;
|
|
781
|
+
arguments?: {
|
|
782
|
+
name: string;
|
|
783
|
+
description?: string | undefined;
|
|
784
|
+
required?: boolean | undefined;
|
|
785
|
+
}[] | undefined;
|
|
786
|
+
_meta?: {
|
|
787
|
+
[x: string]: unknown;
|
|
788
|
+
} | undefined;
|
|
789
|
+
icons? /**
|
|
790
|
+
* Gets tools formatted for Vercel AI SDK.
|
|
791
|
+
*
|
|
792
|
+
* @param serverIds - Server IDs to get tools from (or all if omitted)
|
|
793
|
+
* @param options - Schema options
|
|
794
|
+
* @returns AiSdkTool compatible with Vercel AI SDK's generateText()
|
|
795
|
+
*/: {
|
|
796
|
+
src: string;
|
|
797
|
+
mimeType?: string | undefined;
|
|
798
|
+
sizes?: string[] | undefined;
|
|
799
|
+
theme?: "light" | "dark" | undefined;
|
|
800
|
+
}[] | undefined;
|
|
801
|
+
title?: string | undefined;
|
|
802
|
+
}[];
|
|
803
|
+
_meta?: {
|
|
804
|
+
[x: string]: unknown;
|
|
805
|
+
progressToken?: string | number | undefined;
|
|
806
|
+
"io.modelcontextprotocol/related-task"?: {
|
|
807
|
+
taskId: string;
|
|
808
|
+
} | undefined;
|
|
809
|
+
} | undefined;
|
|
810
|
+
nextCursor?: string | undefined;
|
|
811
|
+
}>;
|
|
812
|
+
/**
|
|
813
|
+
* Gets a prompt from a server.
|
|
814
|
+
*/
|
|
815
|
+
getPrompt(serverId: string, params: GetPromptParams, options?: ClientRequestOptions): Promise<{
|
|
816
|
+
[x: string]: unknown;
|
|
817
|
+
messages: {
|
|
818
|
+
role: "user" | "assistant";
|
|
819
|
+
content: {
|
|
820
|
+
type: "text";
|
|
821
|
+
text: string;
|
|
822
|
+
annotations?: {
|
|
823
|
+
audience?: ("user" | "assistant")[] | undefined;
|
|
824
|
+
priority?: number | undefined;
|
|
825
|
+
lastModified?: string | undefined;
|
|
826
|
+
} | undefined;
|
|
827
|
+
_meta?: Record<string, unknown> | undefined;
|
|
828
|
+
} | {
|
|
829
|
+
type: "image";
|
|
830
|
+
data: string;
|
|
831
|
+
mimeType: string;
|
|
832
|
+
annotations?: {
|
|
833
|
+
audience?: ("user" | "assistant")[] | undefined;
|
|
834
|
+
priority?: number | undefined;
|
|
835
|
+
lastModified?: string | undefined;
|
|
836
|
+
} | undefined;
|
|
837
|
+
_meta?: Record<string, unknown> | undefined;
|
|
838
|
+
} | {
|
|
839
|
+
type: "audio";
|
|
840
|
+
data: string;
|
|
841
|
+
mimeType: string;
|
|
842
|
+
annotations?: {
|
|
843
|
+
audience?: ("user" | "assistant")[] | undefined;
|
|
844
|
+
priority?: number | undefined;
|
|
845
|
+
lastModified?: string | undefined;
|
|
846
|
+
} | undefined;
|
|
847
|
+
_meta?: Record<string, unknown> | undefined;
|
|
848
|
+
} | {
|
|
849
|
+
type: "resource";
|
|
850
|
+
resource: {
|
|
851
|
+
uri: string;
|
|
852
|
+
text: string;
|
|
853
|
+
mimeType?: string | undefined;
|
|
854
|
+
_meta?: Record<string, unknown> | undefined;
|
|
855
|
+
} | {
|
|
856
|
+
uri: string;
|
|
857
|
+
blob: string;
|
|
858
|
+
mimeType?: string | undefined;
|
|
859
|
+
_meta?: Record<string, unknown> | undefined;
|
|
860
|
+
};
|
|
861
|
+
annotations?: {
|
|
862
|
+
audience?: ("user" | "assistant")[] | undefined;
|
|
863
|
+
priority?: number | undefined;
|
|
864
|
+
lastModified?: string | undefined;
|
|
865
|
+
} | undefined;
|
|
866
|
+
_meta?: Record<string, unknown> | undefined;
|
|
867
|
+
} | {
|
|
868
|
+
uri: string;
|
|
869
|
+
name: string;
|
|
870
|
+
type: "resource_link";
|
|
871
|
+
description?: string | undefined;
|
|
872
|
+
mimeType?: string | undefined;
|
|
873
|
+
annotations?: {
|
|
874
|
+
audience?: ("user" | "assistant")[] | undefined;
|
|
875
|
+
priority?: number | undefined;
|
|
876
|
+
lastModified?: string | undefined;
|
|
877
|
+
} | undefined;
|
|
878
|
+
_meta?: {
|
|
879
|
+
[x: string]: unknown;
|
|
880
|
+
} | undefined;
|
|
881
|
+
icons?: {
|
|
882
|
+
src: string;
|
|
883
|
+
mimeType?: string | undefined;
|
|
884
|
+
sizes?: string[] | undefined;
|
|
885
|
+
theme?: "light" | "dark" | undefined;
|
|
886
|
+
}[] | undefined;
|
|
887
|
+
title?: string | undefined;
|
|
888
|
+
};
|
|
889
|
+
}[];
|
|
890
|
+
_meta?: {
|
|
891
|
+
[x: string]: unknown;
|
|
892
|
+
progressToken?: string | number | undefined;
|
|
893
|
+
"io.modelcontextprotocol/related-task"?: {
|
|
894
|
+
taskId: string;
|
|
895
|
+
} | undefined;
|
|
896
|
+
} | undefined;
|
|
897
|
+
description?: string | undefined;
|
|
898
|
+
}>;
|
|
899
|
+
/**
|
|
900
|
+
* Pings a server to check connectivity.
|
|
901
|
+
*/
|
|
902
|
+
pingServer(serverId: string, options?: RequestOptions): void;
|
|
903
|
+
/**
|
|
904
|
+
* Sets the logging level for a server.
|
|
905
|
+
*/
|
|
906
|
+
setLoggingLevel(serverId: string, level?: LoggingLevel): Promise<void>;
|
|
907
|
+
/**
|
|
908
|
+
* Gets the session ID for a Streamable HTTP server.
|
|
909
|
+
*/
|
|
910
|
+
getSessionIdByServer(serverId: string): string | undefined;
|
|
911
|
+
/**
|
|
912
|
+
* Adds a notification handler for a server.
|
|
913
|
+
*/
|
|
914
|
+
addNotificationHandler(serverId: string, schema: NotificationSchema, handler: NotificationHandler): void;
|
|
915
|
+
/**
|
|
916
|
+
* Registers a handler for resource list changes.
|
|
917
|
+
*/
|
|
918
|
+
onResourceListChanged(serverId: string, handler: NotificationHandler): void;
|
|
919
|
+
/**
|
|
920
|
+
* Registers a handler for resource updates.
|
|
921
|
+
*/
|
|
922
|
+
onResourceUpdated(serverId: string, handler: NotificationHandler): void;
|
|
923
|
+
/**
|
|
924
|
+
* Registers a handler for prompt list changes.
|
|
925
|
+
*/
|
|
926
|
+
onPromptListChanged(serverId: string, handler: NotificationHandler): void;
|
|
927
|
+
/**
|
|
928
|
+
* Registers a handler for task status changes.
|
|
929
|
+
*/
|
|
930
|
+
onTaskStatusChanged(serverId: string, handler: NotificationHandler): void;
|
|
931
|
+
/**
|
|
932
|
+
* Sets a server-specific elicitation handler.
|
|
933
|
+
*/
|
|
934
|
+
setElicitationHandler(serverId: string, handler: ElicitationHandler): void;
|
|
935
|
+
/**
|
|
936
|
+
* Clears a server-specific elicitation handler.
|
|
937
|
+
*/
|
|
938
|
+
clearElicitationHandler(serverId: string): void;
|
|
939
|
+
/**
|
|
940
|
+
* Sets a global elicitation callback for all servers.
|
|
941
|
+
*/
|
|
942
|
+
setElicitationCallback(callback: ElicitationCallback): void;
|
|
943
|
+
/**
|
|
944
|
+
* Clears the global elicitation callback.
|
|
945
|
+
*/
|
|
946
|
+
clearElicitationCallback(): void;
|
|
947
|
+
/**
|
|
948
|
+
* Gets the pending elicitations map for external resolvers.
|
|
949
|
+
*/
|
|
950
|
+
getPendingElicitations(): Map<string, {
|
|
951
|
+
resolve: (value: ElicitResult) => void;
|
|
952
|
+
reject: (error: unknown) => void;
|
|
953
|
+
}>;
|
|
954
|
+
/**
|
|
955
|
+
* Responds to a pending elicitation.
|
|
956
|
+
*/
|
|
957
|
+
respondToElicitation(requestId: string, response: ElicitResult): boolean;
|
|
958
|
+
/**
|
|
959
|
+
* Lists tasks from a server.
|
|
960
|
+
*/
|
|
961
|
+
listTasks(serverId: string, cursor?: string, options?: ClientRequestOptions): Promise<MCPListTasksResult>;
|
|
962
|
+
/**
|
|
963
|
+
* Gets a task by ID.
|
|
964
|
+
*/
|
|
965
|
+
getTask(serverId: string, taskId: string, options?: ClientRequestOptions): Promise<MCPTask>;
|
|
966
|
+
/**
|
|
967
|
+
* Gets the result of a completed task.
|
|
968
|
+
*/
|
|
969
|
+
getTaskResult(serverId: string, taskId: string, options?: ClientRequestOptions): Promise<unknown>;
|
|
970
|
+
/**
|
|
971
|
+
* Cancels a task.
|
|
972
|
+
*/
|
|
973
|
+
cancelTask(serverId: string, taskId: string, options?: ClientRequestOptions): Promise<MCPTask>;
|
|
974
|
+
/**
|
|
975
|
+
* Checks if server supports task-augmented tool calls.
|
|
976
|
+
*/
|
|
977
|
+
supportsTasksForToolCalls(serverId: string): boolean;
|
|
978
|
+
/**
|
|
979
|
+
* Checks if server supports listing tasks.
|
|
980
|
+
*/
|
|
981
|
+
supportsTasksList(serverId: string): boolean;
|
|
982
|
+
/**
|
|
983
|
+
* Checks if server supports canceling tasks.
|
|
984
|
+
*/
|
|
985
|
+
supportsTasksCancel(serverId: string): boolean;
|
|
986
|
+
private performConnection;
|
|
987
|
+
private connectViaStdio;
|
|
988
|
+
private connectViaHttp;
|
|
989
|
+
private safeCloseTransport;
|
|
990
|
+
private ensureConnected;
|
|
991
|
+
private getClientOrThrow;
|
|
992
|
+
private resetState;
|
|
993
|
+
private withTimeout;
|
|
994
|
+
private withProgressHandler;
|
|
995
|
+
private buildCapabilities;
|
|
996
|
+
private resolveRpcLogger;
|
|
997
|
+
private cacheToolsMetadata;
|
|
998
|
+
private isStdioConfig;
|
|
999
|
+
}
|
|
1000
|
+
|
|
1001
|
+
/**
|
|
1002
|
+
* Transport utilities for MCPClientManager
|
|
1003
|
+
*/
|
|
1004
|
+
|
|
1005
|
+
/**
|
|
1006
|
+
* Builds the requestInit object, merging accessToken into Authorization header if provided.
|
|
1007
|
+
*
|
|
1008
|
+
* @param accessToken - Optional access token for Bearer auth
|
|
1009
|
+
* @param requestInit - Optional existing requestInit config
|
|
1010
|
+
* @returns Merged requestInit with Authorization header if accessToken provided
|
|
1011
|
+
*/
|
|
1012
|
+
declare function buildRequestInit(accessToken: string | undefined, requestInit: StreamableHTTPClientTransportOptions["requestInit"]): StreamableHTTPClientTransportOptions["requestInit"];
|
|
1013
|
+
|
|
1014
|
+
/**
|
|
1015
|
+
* Error handling utilities for MCPClientManager
|
|
1016
|
+
*/
|
|
1017
|
+
/**
|
|
1018
|
+
* Checks if an error indicates that a method is not available/implemented by the server.
|
|
1019
|
+
* Used for graceful degradation when servers don't support certain MCP features.
|
|
1020
|
+
*
|
|
1021
|
+
* @param error - The error to check
|
|
1022
|
+
* @param method - The MCP method name (e.g., "tools/list", "resources/list")
|
|
1023
|
+
* @returns True if the error indicates the method is unavailable
|
|
1024
|
+
*/
|
|
1025
|
+
declare function isMethodUnavailableError(error: unknown, method: string): boolean;
|
|
1026
|
+
/**
|
|
1027
|
+
* Formats an error for display in error messages.
|
|
1028
|
+
*
|
|
1029
|
+
* @param error - The error to format
|
|
1030
|
+
* @returns A string representation of the error
|
|
1031
|
+
*/
|
|
1032
|
+
declare function formatError(error: unknown): string;
|
|
1033
|
+
|
|
1034
|
+
/**
|
|
1035
|
+
* Custom error classes for MCP SDK
|
|
1036
|
+
*/
|
|
1037
|
+
/**
|
|
1038
|
+
* Base error class for all MCP SDK errors
|
|
1039
|
+
*/
|
|
1040
|
+
declare class MCPError extends Error {
|
|
1041
|
+
readonly code: string;
|
|
1042
|
+
constructor(message: string, code: string, options?: {
|
|
1043
|
+
cause?: unknown;
|
|
1044
|
+
});
|
|
1045
|
+
}
|
|
1046
|
+
/**
|
|
1047
|
+
* Authentication error - thrown for 401, token expired, invalid token, etc.
|
|
1048
|
+
*/
|
|
1049
|
+
declare class MCPAuthError extends MCPError {
|
|
1050
|
+
readonly statusCode?: number | undefined;
|
|
1051
|
+
constructor(message: string, statusCode?: number | undefined, options?: {
|
|
1052
|
+
cause?: unknown;
|
|
1053
|
+
});
|
|
1054
|
+
}
|
|
1055
|
+
/**
|
|
1056
|
+
* Type guard to check if an error is an MCPAuthError
|
|
1057
|
+
*/
|
|
1058
|
+
declare function isMCPAuthError(error: unknown): error is MCPAuthError;
|
|
1059
|
+
/**
|
|
1060
|
+
* Checks if an error is an authentication-related error.
|
|
1061
|
+
* Detects auth errors by:
|
|
1062
|
+
* 1. Error class name (UnauthorizedError from MCP SDK)
|
|
1063
|
+
* 2. HTTP status codes (401, 403) from transport errors
|
|
1064
|
+
* 3. Common auth-related patterns in error messages (case-insensitive)
|
|
1065
|
+
*/
|
|
1066
|
+
declare function isAuthError(error: unknown): {
|
|
1067
|
+
isAuth: boolean;
|
|
1068
|
+
statusCode?: number;
|
|
1069
|
+
};
|
|
1070
|
+
|
|
1071
|
+
/**
|
|
1072
|
+
* MCP Tasks support (experimental feature - spec 2025-11-25)
|
|
1073
|
+
*/
|
|
1074
|
+
|
|
1075
|
+
/**
|
|
1076
|
+
* Checks if server supports task-augmented tool calls.
|
|
1077
|
+
* Checks both top-level tasks and experimental.tasks namespaces.
|
|
1078
|
+
*
|
|
1079
|
+
* @param capabilities - The server capabilities
|
|
1080
|
+
* @returns True if server supports task-augmented tool calls
|
|
1081
|
+
*/
|
|
1082
|
+
declare function supportsTasksForToolCalls(capabilities: ServerCapabilities | undefined): boolean;
|
|
1083
|
+
/**
|
|
1084
|
+
* Checks if server supports tasks/list operation.
|
|
1085
|
+
*
|
|
1086
|
+
* @param capabilities - The server capabilities
|
|
1087
|
+
* @returns True if server supports listing tasks
|
|
1088
|
+
*/
|
|
1089
|
+
declare function supportsTasksList(capabilities: ServerCapabilities | undefined): boolean;
|
|
1090
|
+
/**
|
|
1091
|
+
* Checks if server supports tasks/cancel operation.
|
|
1092
|
+
*
|
|
1093
|
+
* @param capabilities - The server capabilities
|
|
1094
|
+
* @returns True if server supports canceling tasks
|
|
1095
|
+
*/
|
|
1096
|
+
declare function supportsTasksCancel(capabilities: ServerCapabilities | undefined): boolean;
|
|
1097
|
+
|
|
1098
|
+
/**
|
|
1099
|
+
* Core types for SDK evals functionality
|
|
1100
|
+
*/
|
|
1101
|
+
|
|
1102
|
+
/**
|
|
1103
|
+
* Built-in LLM providers with native SDK support
|
|
1104
|
+
*/
|
|
1105
|
+
type LLMProvider = "anthropic" | "openai" | "azure" | "deepseek" | "google" | "ollama" | "mistral" | "openrouter" | "xai";
|
|
1106
|
+
/**
|
|
1107
|
+
* Compatible API protocols for custom providers
|
|
1108
|
+
*/
|
|
1109
|
+
type CompatibleProtocol = "openai-compatible" | "anthropic-compatible";
|
|
1110
|
+
/**
|
|
1111
|
+
* Configuration for a custom provider (user-defined)
|
|
1112
|
+
*/
|
|
1113
|
+
interface CustomProvider {
|
|
1114
|
+
/** Unique name for this provider (used in model strings, e.g., "groq/llama-3") */
|
|
1115
|
+
name: string;
|
|
1116
|
+
/** API protocol this provider is compatible with */
|
|
1117
|
+
protocol: CompatibleProtocol;
|
|
1118
|
+
/** Base URL for the API endpoint */
|
|
1119
|
+
baseUrl: string;
|
|
1120
|
+
/** List of available model IDs */
|
|
1121
|
+
modelIds: string[];
|
|
1122
|
+
/** Optional API key (can also be provided at runtime) */
|
|
1123
|
+
apiKey?: string;
|
|
1124
|
+
/** Environment variable name to read API key from (fallback) */
|
|
1125
|
+
apiKeyEnvVar?: string;
|
|
1126
|
+
/**
|
|
1127
|
+
* Use Chat Completions API (.chat()) instead of default.
|
|
1128
|
+
* Required for some OpenAI-compatible providers like LiteLLM.
|
|
1129
|
+
* Only applies to openai-compatible protocol.
|
|
1130
|
+
*/
|
|
1131
|
+
useChatCompletions?: boolean;
|
|
1132
|
+
}
|
|
1133
|
+
/**
|
|
1134
|
+
* Configuration for an LLM
|
|
1135
|
+
*/
|
|
1136
|
+
interface LLMConfig {
|
|
1137
|
+
provider: LLMProvider;
|
|
1138
|
+
model: string;
|
|
1139
|
+
apiKey: string;
|
|
1140
|
+
}
|
|
1141
|
+
/**
|
|
1142
|
+
* Represents a tool call made by the LLM
|
|
1143
|
+
*/
|
|
1144
|
+
interface ToolCall {
|
|
1145
|
+
toolName: string;
|
|
1146
|
+
arguments: Record<any, any>;
|
|
1147
|
+
}
|
|
1148
|
+
/**
|
|
1149
|
+
* Token usage statistics
|
|
1150
|
+
*/
|
|
1151
|
+
interface TokenUsage {
|
|
1152
|
+
inputTokens: number;
|
|
1153
|
+
outputTokens: number;
|
|
1154
|
+
totalTokens: number;
|
|
1155
|
+
}
|
|
1156
|
+
/**
|
|
1157
|
+
* Latency breakdown for prompt execution
|
|
1158
|
+
*/
|
|
1159
|
+
interface LatencyBreakdown {
|
|
1160
|
+
/** Total wall-clock time in milliseconds */
|
|
1161
|
+
e2eMs: number;
|
|
1162
|
+
/** LLM API time in milliseconds */
|
|
1163
|
+
llmMs: number;
|
|
1164
|
+
/** MCP tool execution time in milliseconds */
|
|
1165
|
+
mcpMs: number;
|
|
1166
|
+
}
|
|
1167
|
+
/**
|
|
1168
|
+
* Raw prompt result data (used internally)
|
|
1169
|
+
*/
|
|
1170
|
+
interface PromptResultData {
|
|
1171
|
+
/** The original prompt/query that was sent */
|
|
1172
|
+
prompt: string;
|
|
1173
|
+
/** The full conversation history (user, assistant, tool messages) */
|
|
1174
|
+
messages: CoreMessage[];
|
|
1175
|
+
text: string;
|
|
1176
|
+
toolCalls: ToolCall[];
|
|
1177
|
+
usage: TokenUsage;
|
|
1178
|
+
latency: LatencyBreakdown;
|
|
1179
|
+
error?: string;
|
|
1180
|
+
}
|
|
1181
|
+
|
|
1182
|
+
/**
|
|
1183
|
+
* PromptResult class - wraps the result of a TestAgent prompt
|
|
1184
|
+
*/
|
|
1185
|
+
|
|
1186
|
+
/**
|
|
1187
|
+
* Represents the result of a TestAgent prompt.
|
|
1188
|
+
* Provides convenient methods to inspect tool calls, token usage, and errors.
|
|
1189
|
+
*/
|
|
1190
|
+
declare class PromptResult {
|
|
1191
|
+
/** The original prompt/query that was sent */
|
|
1192
|
+
readonly prompt: string;
|
|
1193
|
+
/** The text response from the LLM */
|
|
1194
|
+
readonly text: string;
|
|
1195
|
+
/** The full conversation history */
|
|
1196
|
+
private readonly _messages;
|
|
1197
|
+
/** Latency breakdown (e2e, llm, mcp) */
|
|
1198
|
+
private readonly _latency;
|
|
1199
|
+
/** Tool calls made during the prompt */
|
|
1200
|
+
private readonly _toolCalls;
|
|
1201
|
+
/** Token usage statistics */
|
|
1202
|
+
private readonly _usage;
|
|
1203
|
+
/** Error message if the prompt failed */
|
|
1204
|
+
private readonly _error?;
|
|
1205
|
+
/**
|
|
1206
|
+
* Create a new PromptResult
|
|
1207
|
+
* @param data - The raw prompt result data
|
|
1208
|
+
*/
|
|
1209
|
+
constructor(data: PromptResultData);
|
|
1210
|
+
/**
|
|
1211
|
+
* Get the original query/prompt that was sent.
|
|
1212
|
+
*
|
|
1213
|
+
* @returns The original prompt string
|
|
1214
|
+
*/
|
|
1215
|
+
getPrompt(): string;
|
|
1216
|
+
/**
|
|
1217
|
+
* Get the full conversation history (user, assistant, tool messages).
|
|
1218
|
+
* Returns a copy to prevent external modification.
|
|
1219
|
+
*
|
|
1220
|
+
* @returns Array of CoreMessage objects
|
|
1221
|
+
*/
|
|
1222
|
+
getMessages(): CoreMessage[];
|
|
1223
|
+
/**
|
|
1224
|
+
* Get only user messages from the conversation.
|
|
1225
|
+
*
|
|
1226
|
+
* @returns Array of CoreUserMessage objects
|
|
1227
|
+
*/
|
|
1228
|
+
getUserMessages(): CoreUserMessage[];
|
|
1229
|
+
/**
|
|
1230
|
+
* Get only assistant messages from the conversation.
|
|
1231
|
+
*
|
|
1232
|
+
* @returns Array of CoreAssistantMessage objects
|
|
1233
|
+
*/
|
|
1234
|
+
getAssistantMessages(): CoreAssistantMessage[];
|
|
1235
|
+
/**
|
|
1236
|
+
* Get only tool result messages from the conversation.
|
|
1237
|
+
*
|
|
1238
|
+
* @returns Array of CoreToolMessage objects
|
|
1239
|
+
*/
|
|
1240
|
+
getToolMessages(): CoreToolMessage[];
|
|
1241
|
+
/**
|
|
1242
|
+
* Get the end-to-end latency in milliseconds.
|
|
1243
|
+
* This is the total wall-clock time for the prompt.
|
|
1244
|
+
*
|
|
1245
|
+
* @returns End-to-end latency in milliseconds
|
|
1246
|
+
*/
|
|
1247
|
+
e2eLatencyMs(): number;
|
|
1248
|
+
/**
|
|
1249
|
+
* Get the LLM API latency in milliseconds.
|
|
1250
|
+
* This is the time spent waiting for LLM responses (excluding tool execution).
|
|
1251
|
+
*
|
|
1252
|
+
* @returns LLM latency in milliseconds
|
|
1253
|
+
*/
|
|
1254
|
+
llmLatencyMs(): number;
|
|
1255
|
+
/**
|
|
1256
|
+
* Get the MCP tool execution latency in milliseconds.
|
|
1257
|
+
* This is the time spent executing MCP tools.
|
|
1258
|
+
*
|
|
1259
|
+
* @returns MCP tool latency in milliseconds
|
|
1260
|
+
*/
|
|
1261
|
+
mcpLatencyMs(): number;
|
|
1262
|
+
/**
|
|
1263
|
+
* Get the full latency breakdown.
|
|
1264
|
+
*
|
|
1265
|
+
* @returns LatencyBreakdown object with e2eMs, llmMs, and mcpMs
|
|
1266
|
+
*/
|
|
1267
|
+
getLatency(): LatencyBreakdown;
|
|
1268
|
+
/**
|
|
1269
|
+
* Get the names of all tools that were called during this prompt.
|
|
1270
|
+
* Returns a standard string[] that can be used with .includes().
|
|
1271
|
+
*
|
|
1272
|
+
* @returns Array of tool names
|
|
1273
|
+
*/
|
|
1274
|
+
toolsCalled(): string[];
|
|
1275
|
+
/**
|
|
1276
|
+
* Check if a specific tool was called during this prompt.
|
|
1277
|
+
* Case-sensitive exact match.
|
|
1278
|
+
*
|
|
1279
|
+
* @param toolName - The name of the tool to check for
|
|
1280
|
+
* @returns true if the tool was called
|
|
1281
|
+
*/
|
|
1282
|
+
hasToolCall(toolName: string): boolean;
|
|
1283
|
+
/**
|
|
1284
|
+
* Get all tool calls with their arguments.
|
|
1285
|
+
*
|
|
1286
|
+
* @returns Array of ToolCall objects
|
|
1287
|
+
*/
|
|
1288
|
+
getToolCalls(): ToolCall[];
|
|
1289
|
+
/**
|
|
1290
|
+
* Get the arguments passed to a specific tool call.
|
|
1291
|
+
* Returns undefined if the tool was not called.
|
|
1292
|
+
* If the tool was called multiple times, returns the first call's arguments.
|
|
1293
|
+
*
|
|
1294
|
+
* @param toolName - The name of the tool
|
|
1295
|
+
* @returns The arguments object or undefined
|
|
1296
|
+
*/
|
|
1297
|
+
getToolArguments(toolName: string): Record<string, unknown> | undefined;
|
|
1298
|
+
/**
|
|
1299
|
+
* Get the total number of tokens used.
|
|
1300
|
+
*
|
|
1301
|
+
* @returns Total tokens (input + output)
|
|
1302
|
+
*/
|
|
1303
|
+
totalTokens(): number;
|
|
1304
|
+
/**
|
|
1305
|
+
* Get the number of input tokens used.
|
|
1306
|
+
*
|
|
1307
|
+
* @returns Input token count
|
|
1308
|
+
*/
|
|
1309
|
+
inputTokens(): number;
|
|
1310
|
+
/**
|
|
1311
|
+
* Get the number of output tokens used.
|
|
1312
|
+
*
|
|
1313
|
+
* @returns Output token count
|
|
1314
|
+
*/
|
|
1315
|
+
outputTokens(): number;
|
|
1316
|
+
/**
|
|
1317
|
+
* Get the full token usage statistics.
|
|
1318
|
+
*
|
|
1319
|
+
* @returns TokenUsage object
|
|
1320
|
+
*/
|
|
1321
|
+
getUsage(): TokenUsage;
|
|
1322
|
+
/**
|
|
1323
|
+
* Check if this prompt resulted in an error.
|
|
1324
|
+
*
|
|
1325
|
+
* @returns true if there was an error
|
|
1326
|
+
*/
|
|
1327
|
+
hasError(): boolean;
|
|
1328
|
+
/**
|
|
1329
|
+
* Get the error message if the prompt failed.
|
|
1330
|
+
*
|
|
1331
|
+
* @returns The error message or undefined
|
|
1332
|
+
*/
|
|
1333
|
+
getError(): string | undefined;
|
|
1334
|
+
/**
|
|
1335
|
+
* Create a PromptResult from raw data.
|
|
1336
|
+
* Factory method for convenience.
|
|
1337
|
+
*
|
|
1338
|
+
* @param data - The raw prompt result data
|
|
1339
|
+
* @returns A new PromptResult instance
|
|
1340
|
+
*/
|
|
1341
|
+
static from(data: PromptResultData): PromptResult;
|
|
1342
|
+
/**
|
|
1343
|
+
* Create an error PromptResult.
|
|
1344
|
+
* Factory method for error cases.
|
|
1345
|
+
*
|
|
1346
|
+
* @param error - The error message
|
|
1347
|
+
* @param latency - The latency breakdown or e2e time in milliseconds
|
|
1348
|
+
* @returns A new PromptResult instance with error state
|
|
1349
|
+
*/
|
|
1350
|
+
static error(error: string, latency?: LatencyBreakdown | number, prompt?: string): PromptResult;
|
|
1351
|
+
/**
|
|
1352
|
+
* Format the conversation trace as a JSON string.
|
|
1353
|
+
* Useful for debugging failed evaluations.
|
|
1354
|
+
*
|
|
1355
|
+
* @returns A JSON string of the conversation messages
|
|
1356
|
+
*/
|
|
1357
|
+
formatTrace(): string;
|
|
1358
|
+
}
|
|
1359
|
+
|
|
1360
|
+
/**
|
|
1361
|
+
* TestAgent - Runs LLM prompts with tool calling for evals
|
|
1362
|
+
*/
|
|
1363
|
+
|
|
1364
|
+
/**
|
|
1365
|
+
* Configuration for creating a TestAgent
|
|
1366
|
+
*/
|
|
1367
|
+
interface TestAgentConfig {
|
|
1368
|
+
/** Tools to provide to the LLM (Tool[] from manager.getTools() or AiSdkTool from manager.getToolsForAiSdk()) */
|
|
1369
|
+
tools: Tool[] | AiSdkTool;
|
|
1370
|
+
/** LLM provider and model string (e.g., "openai/gpt-4o", "anthropic/claude-3-5-sonnet-20241022") */
|
|
1371
|
+
model: string;
|
|
1372
|
+
/** API key for the LLM provider */
|
|
1373
|
+
apiKey: string;
|
|
1374
|
+
/** System prompt for the LLM (default: "You are a helpful assistant.") */
|
|
1375
|
+
systemPrompt?: string;
|
|
1376
|
+
/** Temperature for LLM responses (0-2). If undefined, uses model default. Some models (e.g., reasoning models) don't support temperature. */
|
|
1377
|
+
temperature?: number;
|
|
1378
|
+
/** Maximum number of agentic steps/tool calls (default: 10) */
|
|
1379
|
+
maxSteps?: number;
|
|
1380
|
+
/** Custom providers registry for non-standard LLM providers */
|
|
1381
|
+
customProviders?: Map<string, CustomProvider> | Record<string, CustomProvider>;
|
|
1382
|
+
}
|
|
1383
|
+
/**
|
|
1384
|
+
* Options for the prompt() method
|
|
1385
|
+
*/
|
|
1386
|
+
interface PromptOptions {
|
|
1387
|
+
/** Previous PromptResult(s) to include as conversation context for multi-turn conversations */
|
|
1388
|
+
context?: PromptResult | PromptResult[];
|
|
1389
|
+
}
|
|
1390
|
+
/**
|
|
1391
|
+
* Agent for running LLM prompts with tool calling.
|
|
1392
|
+
* Wraps the AI SDK generateText function with proper tool integration.
|
|
1393
|
+
*
|
|
1394
|
+
* @example
|
|
1395
|
+
* ```typescript
|
|
1396
|
+
* const manager = new MCPClientManager({
|
|
1397
|
+
* everything: { command: "npx", args: ["-y", "@modelcontextprotocol/server-everything"] },
|
|
1398
|
+
* });
|
|
1399
|
+
* await manager.connectToServer("everything");
|
|
1400
|
+
*
|
|
1401
|
+
* const agent = new TestAgent({
|
|
1402
|
+
* tools: await manager.getToolsForAiSdk(["everything"]),
|
|
1403
|
+
* model: "openai/gpt-4o",
|
|
1404
|
+
* apiKey: process.env.OPENAI_API_KEY!,
|
|
1405
|
+
* });
|
|
1406
|
+
*
|
|
1407
|
+
* const result = await agent.prompt("Add 2 and 3");
|
|
1408
|
+
* console.log(result.toolsCalled()); // ["add"]
|
|
1409
|
+
* console.log(result.text); // "The result of adding 2 and 3 is 5."
|
|
1410
|
+
* ```
|
|
1411
|
+
*/
|
|
1412
|
+
declare class TestAgent {
|
|
1413
|
+
private readonly tools;
|
|
1414
|
+
private readonly model;
|
|
1415
|
+
private readonly apiKey;
|
|
1416
|
+
private systemPrompt;
|
|
1417
|
+
private temperature;
|
|
1418
|
+
private readonly maxSteps;
|
|
1419
|
+
private readonly customProviders?;
|
|
1420
|
+
/** The result of the last prompt (for toolsCalled() convenience method) */
|
|
1421
|
+
private lastResult;
|
|
1422
|
+
/** History of all prompt results during a test execution */
|
|
1423
|
+
private promptHistory;
|
|
1424
|
+
/**
|
|
1425
|
+
* Create a new TestAgent
|
|
1426
|
+
* @param config - Agent configuration
|
|
1427
|
+
*/
|
|
1428
|
+
constructor(config: TestAgentConfig);
|
|
1429
|
+
/**
|
|
1430
|
+
* Create instrumented tools that track execution latency.
|
|
1431
|
+
* @param onLatency - Callback to report latency for each tool execution
|
|
1432
|
+
* @returns ToolSet with instrumented execute functions
|
|
1433
|
+
*/
|
|
1434
|
+
private createInstrumentedTools;
|
|
1435
|
+
/**
|
|
1436
|
+
* Build an array of CoreMessages from previous PromptResult(s) for multi-turn context.
|
|
1437
|
+
* @param context - Single PromptResult or array of PromptResults to include as context
|
|
1438
|
+
* @returns Array of CoreMessages representing the conversation history
|
|
1439
|
+
*/
|
|
1440
|
+
private buildContextMessages;
|
|
1441
|
+
/**
|
|
1442
|
+
* Run a prompt with the LLM, allowing tool calls.
|
|
1443
|
+
* Never throws - errors are returned in the PromptResult.
|
|
1444
|
+
*
|
|
1445
|
+
* @param message - The user message to send to the LLM
|
|
1446
|
+
* @param options - Optional settings including context for multi-turn conversations
|
|
1447
|
+
* @returns PromptResult with text response, tool calls, token usage, and latency breakdown
|
|
1448
|
+
*
|
|
1449
|
+
* @example
|
|
1450
|
+
* // Single-turn (default)
|
|
1451
|
+
* const result = await agent.prompt("Show me workspaces");
|
|
1452
|
+
*
|
|
1453
|
+
* @example
|
|
1454
|
+
* // Multi-turn with context
|
|
1455
|
+
* const r1 = await agent.prompt("Show me workspaces");
|
|
1456
|
+
* const r2 = await agent.prompt("Now show tasks", { context: r1 });
|
|
1457
|
+
*
|
|
1458
|
+
* @example
|
|
1459
|
+
* // Multi-turn with multiple context results
|
|
1460
|
+
* const r1 = await agent.prompt("Show workspaces");
|
|
1461
|
+
* const r2 = await agent.prompt("Pick the first", { context: r1 });
|
|
1462
|
+
* const r3 = await agent.prompt("Show tasks", { context: [r1, r2] });
|
|
1463
|
+
*/
|
|
1464
|
+
prompt(message: string, options?: PromptOptions): Promise<PromptResult>;
|
|
1465
|
+
/**
|
|
1466
|
+
* Get the names of tools called in the last prompt.
|
|
1467
|
+
* Convenience method for quick checks in eval functions.
|
|
1468
|
+
*
|
|
1469
|
+
* @returns Array of tool names from the last prompt, or empty array if no prompt has been run
|
|
1470
|
+
*/
|
|
1471
|
+
toolsCalled(): string[];
|
|
1472
|
+
/**
|
|
1473
|
+
* Create a new TestAgent with modified options.
|
|
1474
|
+
* Useful for creating variants for different test scenarios.
|
|
1475
|
+
*
|
|
1476
|
+
* @param options - Partial config to override
|
|
1477
|
+
* @returns A new TestAgent instance with the merged configuration
|
|
1478
|
+
*/
|
|
1479
|
+
withOptions(options: Partial<TestAgentConfig>): TestAgent;
|
|
1480
|
+
/**
|
|
1481
|
+
* Get the configured tools
|
|
1482
|
+
*/
|
|
1483
|
+
getTools(): ToolSet;
|
|
1484
|
+
/**
|
|
1485
|
+
* Get the LLM provider/model string
|
|
1486
|
+
*/
|
|
1487
|
+
getModel(): string;
|
|
1488
|
+
/**
|
|
1489
|
+
* Get the API key
|
|
1490
|
+
*/
|
|
1491
|
+
getApiKey(): string;
|
|
1492
|
+
/**
|
|
1493
|
+
* Get the current system prompt
|
|
1494
|
+
*/
|
|
1495
|
+
getSystemPrompt(): string;
|
|
1496
|
+
/**
|
|
1497
|
+
* Set a new system prompt
|
|
1498
|
+
*/
|
|
1499
|
+
setSystemPrompt(prompt: string): void;
|
|
1500
|
+
/**
|
|
1501
|
+
* Get the current temperature (undefined means model default)
|
|
1502
|
+
*/
|
|
1503
|
+
getTemperature(): number | undefined;
|
|
1504
|
+
/**
|
|
1505
|
+
* Set the temperature (must be between 0 and 2)
|
|
1506
|
+
*/
|
|
1507
|
+
setTemperature(temperature: number): void;
|
|
1508
|
+
/**
|
|
1509
|
+
* Get the max steps configuration
|
|
1510
|
+
*/
|
|
1511
|
+
getMaxSteps(): number;
|
|
1512
|
+
/**
|
|
1513
|
+
* Get the result of the last prompt
|
|
1514
|
+
*/
|
|
1515
|
+
getLastResult(): PromptResult | undefined;
|
|
1516
|
+
/**
|
|
1517
|
+
* Reset the prompt history.
|
|
1518
|
+
* Call this before each test iteration to clear previous results.
|
|
1519
|
+
*/
|
|
1520
|
+
resetPromptHistory(): void;
|
|
1521
|
+
/**
|
|
1522
|
+
* Get the history of all prompt results since the last reset.
|
|
1523
|
+
* Returns a copy of the array to prevent external modification.
|
|
1524
|
+
*/
|
|
1525
|
+
getPromptHistory(): PromptResult[];
|
|
1526
|
+
}
|
|
1527
|
+
|
|
1528
|
+
/**
|
|
1529
|
+
* Tool extraction utilities for AI SDK generateText results
|
|
1530
|
+
*/
|
|
1531
|
+
|
|
1532
|
+
/**
|
|
1533
|
+
* Extract all tool calls from an AI SDK generateText result.
|
|
1534
|
+
* Collects tool calls from all steps in the agentic loop.
|
|
1535
|
+
*
|
|
1536
|
+
* @param result - The result from AI SDK's generateText
|
|
1537
|
+
* @returns Array of ToolCall objects with toolName and arguments
|
|
1538
|
+
*/
|
|
1539
|
+
declare function extractToolCalls(result: GenerateTextResult<ToolSet, never>): ToolCall[];
|
|
1540
|
+
/**
|
|
1541
|
+
* Extract tool names from an AI SDK generateText result.
|
|
1542
|
+
* Convenience function that returns just the tool names.
|
|
1543
|
+
*
|
|
1544
|
+
* @param result - The result from AI SDK's generateText
|
|
1545
|
+
* @returns Array of tool names that were called
|
|
1546
|
+
*/
|
|
1547
|
+
declare function extractToolNames(result: GenerateTextResult<ToolSet, never>): string[];
|
|
1548
|
+
|
|
1549
|
+
/**
|
|
1550
|
+
* Validators for matching tool calls in eval tests
|
|
1551
|
+
*
|
|
1552
|
+
* All matching is case-sensitive and uses exact strings only (no wildcards).
|
|
1553
|
+
*/
|
|
1554
|
+
|
|
1555
|
+
/**
|
|
1556
|
+
* Exact match - all expected tools must be present in exact order.
|
|
1557
|
+
* Case-sensitive exact string comparison.
|
|
1558
|
+
*
|
|
1559
|
+
* @param expected - The expected tool names in order
|
|
1560
|
+
* @param actual - The actual tool names that were called
|
|
1561
|
+
* @returns true if actual matches expected exactly
|
|
1562
|
+
*
|
|
1563
|
+
* @example
|
|
1564
|
+
* matchToolCalls(['add', 'multiply'], ['add', 'multiply']) // true
|
|
1565
|
+
* matchToolCalls(['add', 'multiply'], ['multiply', 'add']) // false (wrong order)
|
|
1566
|
+
* matchToolCalls(['add'], ['add', 'multiply']) // false (extra tool)
|
|
1567
|
+
*/
|
|
1568
|
+
declare function matchToolCalls(expected: string[], actual: string[]): boolean;
|
|
1569
|
+
/**
|
|
1570
|
+
* Subset match - all expected tools must be present, order doesn't matter.
|
|
1571
|
+
* Case-sensitive exact string comparison.
|
|
1572
|
+
*
|
|
1573
|
+
* @param expected - The expected tool names (any order)
|
|
1574
|
+
* @param actual - The actual tool names that were called
|
|
1575
|
+
* @returns true if all expected tools are present in actual
|
|
1576
|
+
*
|
|
1577
|
+
* @example
|
|
1578
|
+
* matchToolCallsSubset(['add', 'multiply'], ['multiply', 'add']) // true
|
|
1579
|
+
* matchToolCallsSubset(['add'], ['add', 'multiply']) // true
|
|
1580
|
+
* matchToolCallsSubset(['add', 'subtract'], ['add', 'multiply']) // false (missing subtract)
|
|
1581
|
+
*/
|
|
1582
|
+
declare function matchToolCallsSubset(expected: string[], actual: string[]): boolean;
|
|
1583
|
+
/**
|
|
1584
|
+
* Any match - at least one expected tool must be present.
|
|
1585
|
+
* Case-sensitive exact string comparison.
|
|
1586
|
+
*
|
|
1587
|
+
* @param expected - The expected tool names (at least one must match)
|
|
1588
|
+
* @param actual - The actual tool names that were called
|
|
1589
|
+
* @returns true if at least one expected tool is present in actual
|
|
1590
|
+
*
|
|
1591
|
+
* @example
|
|
1592
|
+
* matchAnyToolCall(['add', 'subtract'], ['multiply', 'add']) // true
|
|
1593
|
+
* matchAnyToolCall(['add', 'subtract'], ['multiply', 'divide']) // false
|
|
1594
|
+
* matchAnyToolCall([], ['add']) // false (empty expected)
|
|
1595
|
+
*/
|
|
1596
|
+
declare function matchAnyToolCall(expected: string[], actual: string[]): boolean;
|
|
1597
|
+
/**
|
|
1598
|
+
* Count match - check if a specific tool was called exactly N times.
|
|
1599
|
+
* Case-sensitive exact string comparison.
|
|
1600
|
+
*
|
|
1601
|
+
* @param toolName - The tool name to count
|
|
1602
|
+
* @param actual - The actual tool names that were called
|
|
1603
|
+
* @param count - The expected number of times the tool should be called
|
|
1604
|
+
* @returns true if the tool was called exactly count times
|
|
1605
|
+
*
|
|
1606
|
+
* @example
|
|
1607
|
+
* matchToolCallCount('add', ['add', 'add', 'multiply'], 2) // true
|
|
1608
|
+
* matchToolCallCount('add', ['add', 'multiply'], 2) // false
|
|
1609
|
+
*/
|
|
1610
|
+
declare function matchToolCallCount(toolName: string, actual: string[], count: number): boolean;
|
|
1611
|
+
/**
|
|
1612
|
+
* No tools match - check that no tools were called.
|
|
1613
|
+
*
|
|
1614
|
+
* @param actual - The actual tool names that were called
|
|
1615
|
+
* @returns true if no tools were called
|
|
1616
|
+
*
|
|
1617
|
+
* @example
|
|
1618
|
+
* matchNoToolCalls([]) // true
|
|
1619
|
+
* matchNoToolCalls(['add']) // false
|
|
1620
|
+
*/
|
|
1621
|
+
declare function matchNoToolCalls(actual: string[]): boolean;
|
|
1622
|
+
/**
|
|
1623
|
+
* Check if tool was called with exact arguments (deep equality).
|
|
1624
|
+
* Returns true if any call to the tool has exactly matching arguments.
|
|
1625
|
+
* Case-sensitive for tool names.
|
|
1626
|
+
*
|
|
1627
|
+
* @param toolName - The tool name to match
|
|
1628
|
+
* @param expectedArgs - The expected arguments (exact match)
|
|
1629
|
+
* @param toolCalls - The actual tool calls made
|
|
1630
|
+
* @returns true if any call to the tool has exactly matching arguments
|
|
1631
|
+
*
|
|
1632
|
+
* @example
|
|
1633
|
+
* matchToolCallWithArgs('add', {a: 2, b: 3}, toolCalls) // true if add({a:2, b:3}) was called
|
|
1634
|
+
* matchToolCallWithArgs('add', {a: 2}, [{toolName:'add', arguments:{a:2, b:3}}]) // false (extra arg)
|
|
1635
|
+
*/
|
|
1636
|
+
declare function matchToolCallWithArgs(toolName: string, expectedArgs: Record<string, unknown>, toolCalls: ToolCall[]): boolean;
|
|
1637
|
+
/**
|
|
1638
|
+
* Check if tool was called with at least these arguments (partial match).
|
|
1639
|
+
* Allows extra arguments in the actual call.
|
|
1640
|
+
* Case-sensitive for tool names.
|
|
1641
|
+
*
|
|
1642
|
+
* @param toolName - The tool name to match
|
|
1643
|
+
* @param expectedArgs - The expected arguments (partial match)
|
|
1644
|
+
* @param toolCalls - The actual tool calls made
|
|
1645
|
+
* @returns true if any call to the tool contains all expected arguments
|
|
1646
|
+
*
|
|
1647
|
+
* @example
|
|
1648
|
+
* matchToolCallWithPartialArgs('add', {a: 2}, [{toolName:'add', arguments:{a:2, b:3}}]) // true
|
|
1649
|
+
* matchToolCallWithPartialArgs('add', {a: 2, c: 5}, [{toolName:'add', arguments:{a:2, b:3}}]) // false
|
|
1650
|
+
*/
|
|
1651
|
+
declare function matchToolCallWithPartialArgs(toolName: string, expectedArgs: Record<string, unknown>, toolCalls: ToolCall[]): boolean;
|
|
1652
|
+
/**
|
|
1653
|
+
* Check if a specific argument has a specific value in any call to the tool.
|
|
1654
|
+
* Case-sensitive for tool names.
|
|
1655
|
+
*
|
|
1656
|
+
* @param toolName - The tool name to match
|
|
1657
|
+
* @param argKey - The argument key to check
|
|
1658
|
+
* @param expectedValue - The expected value for the argument
|
|
1659
|
+
* @param toolCalls - The actual tool calls made
|
|
1660
|
+
* @returns true if any call to the tool has the specified argument value
|
|
1661
|
+
*
|
|
1662
|
+
* @example
|
|
1663
|
+
* matchToolArgument('add', 'a', 2, toolCalls) // true if any add() call had a=2
|
|
1664
|
+
*/
|
|
1665
|
+
declare function matchToolArgument(toolName: string, argKey: string, expectedValue: unknown, toolCalls: ToolCall[]): boolean;
|
|
1666
|
+
/**
|
|
1667
|
+
* Check if argument value matches a predicate function.
|
|
1668
|
+
* Useful for partial matches, type checks, or range validation.
|
|
1669
|
+
* Case-sensitive for tool names.
|
|
1670
|
+
*
|
|
1671
|
+
* @param toolName - The tool name to match
|
|
1672
|
+
* @param argKey - The argument key to check
|
|
1673
|
+
* @param predicate - Function that tests the argument value
|
|
1674
|
+
* @param toolCalls - The actual tool calls made
|
|
1675
|
+
* @returns true if any call to the tool has an argument value that passes the predicate
|
|
1676
|
+
*
|
|
1677
|
+
* @example
|
|
1678
|
+
* matchToolArgumentWith('echo', 'message', (v) => typeof v === 'string' && v.includes('hello'), toolCalls)
|
|
1679
|
+
* matchToolArgumentWith('add', 'a', (v) => typeof v === 'number' && v > 0, toolCalls)
|
|
1680
|
+
*/
|
|
1681
|
+
declare function matchToolArgumentWith(toolName: string, argKey: string, predicate: (value: unknown) => boolean, toolCalls: ToolCall[]): boolean;
|
|
1682
|
+
|
|
1683
|
+
/**
|
|
1684
|
+
* Percentile calculation utilities for latency statistics
|
|
1685
|
+
*/
|
|
1686
|
+
/**
|
|
1687
|
+
* Calculate a specific percentile from sorted values.
|
|
1688
|
+
*
|
|
1689
|
+
* @param sortedValues - Array of numbers sorted in ascending order
|
|
1690
|
+
* @param percentile - The percentile to calculate (0-100)
|
|
1691
|
+
* @returns The percentile value
|
|
1692
|
+
* @throws Error if array is empty or percentile is out of range
|
|
1693
|
+
*/
|
|
1694
|
+
declare function calculatePercentile(sortedValues: number[], percentile: number): number;
|
|
1695
|
+
/**
|
|
1696
|
+
* Statistics for latency values
|
|
1697
|
+
*/
|
|
1698
|
+
interface LatencyStats {
|
|
1699
|
+
/** Minimum value */
|
|
1700
|
+
min: number;
|
|
1701
|
+
/** Maximum value */
|
|
1702
|
+
max: number;
|
|
1703
|
+
/** Mean (average) value */
|
|
1704
|
+
mean: number;
|
|
1705
|
+
/** 50th percentile (median) */
|
|
1706
|
+
p50: number;
|
|
1707
|
+
/** 95th percentile */
|
|
1708
|
+
p95: number;
|
|
1709
|
+
/** Number of values */
|
|
1710
|
+
count: number;
|
|
1711
|
+
}
|
|
1712
|
+
/**
|
|
1713
|
+
* Calculate comprehensive latency statistics for a set of values.
|
|
1714
|
+
*
|
|
1715
|
+
* @param values - Array of latency values (milliseconds)
|
|
1716
|
+
* @returns LatencyStats object with min, max, mean, p50, p95, and count
|
|
1717
|
+
* @throws Error if array is empty
|
|
1718
|
+
*/
|
|
1719
|
+
declare function calculateLatencyStats(values: number[]): LatencyStats;
|
|
1720
|
+
|
|
1721
|
+
/**
|
|
1722
|
+
* Configuration for an EvalTest
|
|
1723
|
+
*
|
|
1724
|
+
* All tests use the multi-turn pattern with a test function that receives a TestAgent.
|
|
1725
|
+
*/
|
|
1726
|
+
interface EvalTestConfig {
|
|
1727
|
+
name: string;
|
|
1728
|
+
test: (agent: TestAgent) => boolean | Promise<boolean>;
|
|
1729
|
+
}
|
|
1730
|
+
/**
|
|
1731
|
+
* Options for running an EvalTest
|
|
1732
|
+
*/
|
|
1733
|
+
interface EvalTestRunOptions {
|
|
1734
|
+
iterations: number;
|
|
1735
|
+
concurrency?: number;
|
|
1736
|
+
retries?: number;
|
|
1737
|
+
timeoutMs?: number;
|
|
1738
|
+
onProgress?: (completed: number, total: number) => void;
|
|
1739
|
+
/** Called with a failure report if any iterations fail */
|
|
1740
|
+
onFailure?: (report: string) => void;
|
|
1741
|
+
}
|
|
1742
|
+
/**
|
|
1743
|
+
* Result details for a single iteration
|
|
1744
|
+
*/
|
|
1745
|
+
interface IterationResult {
|
|
1746
|
+
passed: boolean;
|
|
1747
|
+
latencies: LatencyBreakdown[];
|
|
1748
|
+
tokens: {
|
|
1749
|
+
total: number;
|
|
1750
|
+
input: number;
|
|
1751
|
+
output: number;
|
|
1752
|
+
};
|
|
1753
|
+
error?: string;
|
|
1754
|
+
retryCount?: number;
|
|
1755
|
+
/** The prompt results from this iteration */
|
|
1756
|
+
prompts?: PromptResult[];
|
|
1757
|
+
}
|
|
1758
|
+
/**
|
|
1759
|
+
* Result of running an EvalTest
|
|
1760
|
+
*/
|
|
1761
|
+
interface EvalRunResult {
|
|
1762
|
+
iterations: number;
|
|
1763
|
+
successes: number;
|
|
1764
|
+
failures: number;
|
|
1765
|
+
results: boolean[];
|
|
1766
|
+
iterationDetails: IterationResult[];
|
|
1767
|
+
tokenUsage: {
|
|
1768
|
+
total: number;
|
|
1769
|
+
input: number;
|
|
1770
|
+
output: number;
|
|
1771
|
+
perIteration: {
|
|
1772
|
+
total: number;
|
|
1773
|
+
input: number;
|
|
1774
|
+
output: number;
|
|
1775
|
+
}[];
|
|
1776
|
+
};
|
|
1777
|
+
latency: {
|
|
1778
|
+
e2e: LatencyStats;
|
|
1779
|
+
llm: LatencyStats;
|
|
1780
|
+
mcp: LatencyStats;
|
|
1781
|
+
perIteration: LatencyBreakdown[];
|
|
1782
|
+
};
|
|
1783
|
+
}
|
|
1784
|
+
/**
|
|
1785
|
+
* EvalTest - Runs a single test scenario with iterations
|
|
1786
|
+
*
|
|
1787
|
+
* Can be run standalone or as part of an EvalSuite.
|
|
1788
|
+
*
|
|
1789
|
+
* @example
|
|
1790
|
+
* ```ts
|
|
1791
|
+
* const test = new EvalTest({
|
|
1792
|
+
* name: "addition",
|
|
1793
|
+
* test: async (agent) => {
|
|
1794
|
+
* const result = await agent.prompt("Add 2+3");
|
|
1795
|
+
* return result.hasToolCall("add");
|
|
1796
|
+
* },
|
|
1797
|
+
* });
|
|
1798
|
+
* await test.run(agent, { iterations: 30 });
|
|
1799
|
+
* console.log(test.accuracy()); // 0.97
|
|
1800
|
+
* ```
|
|
1801
|
+
*/
|
|
1802
|
+
declare class EvalTest {
|
|
1803
|
+
private config;
|
|
1804
|
+
private lastRunResult;
|
|
1805
|
+
constructor(config: EvalTestConfig);
|
|
1806
|
+
/**
|
|
1807
|
+
* Run this test with the given agent and options
|
|
1808
|
+
*/
|
|
1809
|
+
run(agent: TestAgent, options: EvalTestRunOptions): Promise<EvalRunResult>;
|
|
1810
|
+
private aggregateResults;
|
|
1811
|
+
/**
|
|
1812
|
+
* Get the accuracy of the last run (success rate)
|
|
1813
|
+
*/
|
|
1814
|
+
accuracy(): number;
|
|
1815
|
+
/**
|
|
1816
|
+
* Get the recall (true positive rate) of the last run
|
|
1817
|
+
*/
|
|
1818
|
+
recall(): number;
|
|
1819
|
+
/**
|
|
1820
|
+
* Get the precision of the last run
|
|
1821
|
+
*/
|
|
1822
|
+
precision(): number;
|
|
1823
|
+
/**
|
|
1824
|
+
* Get the true positive rate (same as recall)
|
|
1825
|
+
*/
|
|
1826
|
+
truePositiveRate(): number;
|
|
1827
|
+
/**
|
|
1828
|
+
* Get the false positive rate
|
|
1829
|
+
*/
|
|
1830
|
+
falsePositiveRate(): number;
|
|
1831
|
+
/**
|
|
1832
|
+
* Get the average token use per iteration
|
|
1833
|
+
*/
|
|
1834
|
+
averageTokenUse(): number;
|
|
1835
|
+
/**
|
|
1836
|
+
* Get the full results of the last run
|
|
1837
|
+
*/
|
|
1838
|
+
getResults(): EvalRunResult | null;
|
|
1839
|
+
/**
|
|
1840
|
+
* Get the name of this test
|
|
1841
|
+
*/
|
|
1842
|
+
getName(): string;
|
|
1843
|
+
/**
|
|
1844
|
+
* Get the configuration of this test
|
|
1845
|
+
*/
|
|
1846
|
+
getConfig(): EvalTestConfig;
|
|
1847
|
+
/**
|
|
1848
|
+
* Get all iteration details from the last run
|
|
1849
|
+
*/
|
|
1850
|
+
getAllIterations(): IterationResult[];
|
|
1851
|
+
/**
|
|
1852
|
+
* Get only the failed iterations from the last run
|
|
1853
|
+
*/
|
|
1854
|
+
getFailedIterations(): IterationResult[];
|
|
1855
|
+
/**
|
|
1856
|
+
* Get only the successful iterations from the last run
|
|
1857
|
+
*/
|
|
1858
|
+
getSuccessfulIterations(): IterationResult[];
|
|
1859
|
+
/**
|
|
1860
|
+
* Get a failure report with traces from all failed iterations.
|
|
1861
|
+
* Useful for debugging why evaluations failed.
|
|
1862
|
+
*
|
|
1863
|
+
* @returns A formatted string with failure details
|
|
1864
|
+
*/
|
|
1865
|
+
getFailureReport(): string;
|
|
1866
|
+
}
|
|
1867
|
+
|
|
1868
|
+
/**
|
|
1869
|
+
* Configuration for an EvalSuite
|
|
1870
|
+
*/
|
|
1871
|
+
interface EvalSuiteConfig {
|
|
1872
|
+
name?: string;
|
|
1873
|
+
}
|
|
1874
|
+
/**
|
|
1875
|
+
* Result for a single test within the suite
|
|
1876
|
+
*/
|
|
1877
|
+
interface TestResult {
|
|
1878
|
+
name: string;
|
|
1879
|
+
result: EvalRunResult;
|
|
1880
|
+
}
|
|
1881
|
+
/**
|
|
1882
|
+
* Result of running an EvalSuite
|
|
1883
|
+
*/
|
|
1884
|
+
interface EvalSuiteResult {
|
|
1885
|
+
tests: Map<string, EvalRunResult>;
|
|
1886
|
+
aggregate: {
|
|
1887
|
+
iterations: number;
|
|
1888
|
+
successes: number;
|
|
1889
|
+
failures: number;
|
|
1890
|
+
accuracy: number;
|
|
1891
|
+
tokenUsage: {
|
|
1892
|
+
total: number;
|
|
1893
|
+
perTest: number[];
|
|
1894
|
+
};
|
|
1895
|
+
latency: {
|
|
1896
|
+
e2e: LatencyStats;
|
|
1897
|
+
llm: LatencyStats;
|
|
1898
|
+
mcp: LatencyStats;
|
|
1899
|
+
};
|
|
1900
|
+
};
|
|
1901
|
+
}
|
|
1902
|
+
/**
|
|
1903
|
+
* EvalSuite - Groups multiple EvalTests and provides aggregate metrics
|
|
1904
|
+
*
|
|
1905
|
+
* @example
|
|
1906
|
+
* ```ts
|
|
1907
|
+
* const suite = new EvalSuite({ name: "Math" });
|
|
1908
|
+
* suite.add(new EvalTest({
|
|
1909
|
+
* name: "addition",
|
|
1910
|
+
* test: async (agent) => {
|
|
1911
|
+
* const r = await agent.prompt("Add 2+3");
|
|
1912
|
+
* return r.hasToolCall("add");
|
|
1913
|
+
* },
|
|
1914
|
+
* }));
|
|
1915
|
+
* suite.add(new EvalTest({
|
|
1916
|
+
* name: "multiply",
|
|
1917
|
+
* test: async (agent) => {
|
|
1918
|
+
* const r = await agent.prompt("Multiply 4*5");
|
|
1919
|
+
* return r.hasToolCall("multiply");
|
|
1920
|
+
* },
|
|
1921
|
+
* }));
|
|
1922
|
+
*
|
|
1923
|
+
* await suite.run(agent, { iterations: 30 });
|
|
1924
|
+
* console.log(suite.accuracy()); // Aggregate: 0.95
|
|
1925
|
+
* console.log(suite.get("addition").accuracy()); // Individual: 0.97
|
|
1926
|
+
* ```
|
|
1927
|
+
*/
|
|
1928
|
+
declare class EvalSuite {
|
|
1929
|
+
private name;
|
|
1930
|
+
private tests;
|
|
1931
|
+
private lastRunResult;
|
|
1932
|
+
constructor(config?: EvalSuiteConfig);
|
|
1933
|
+
/**
|
|
1934
|
+
* Add a test to the suite
|
|
1935
|
+
*/
|
|
1936
|
+
add(test: EvalTest): void;
|
|
1937
|
+
/**
|
|
1938
|
+
* Get a test by name
|
|
1939
|
+
*/
|
|
1940
|
+
get(name: string): EvalTest | undefined;
|
|
1941
|
+
/**
|
|
1942
|
+
* Get all tests in the suite
|
|
1943
|
+
*/
|
|
1944
|
+
getAll(): EvalTest[];
|
|
1945
|
+
/**
|
|
1946
|
+
* Run all tests in the suite with the given agent and options
|
|
1947
|
+
*/
|
|
1948
|
+
run(agent: TestAgent, options: EvalTestRunOptions): Promise<EvalSuiteResult>;
|
|
1949
|
+
private aggregateResults;
|
|
1950
|
+
/**
|
|
1951
|
+
* Get the aggregate accuracy across all tests
|
|
1952
|
+
*/
|
|
1953
|
+
accuracy(): number;
|
|
1954
|
+
/**
|
|
1955
|
+
* Get the aggregate recall (same as accuracy in basic context)
|
|
1956
|
+
*/
|
|
1957
|
+
recall(): number;
|
|
1958
|
+
/**
|
|
1959
|
+
* Get the aggregate precision (same as accuracy in basic context)
|
|
1960
|
+
*/
|
|
1961
|
+
precision(): number;
|
|
1962
|
+
/**
|
|
1963
|
+
* Get the aggregate true positive rate (same as recall)
|
|
1964
|
+
*/
|
|
1965
|
+
truePositiveRate(): number;
|
|
1966
|
+
/**
|
|
1967
|
+
* Get the aggregate false positive rate
|
|
1968
|
+
*/
|
|
1969
|
+
falsePositiveRate(): number;
|
|
1970
|
+
/**
|
|
1971
|
+
* Get the average token use per iteration across all tests
|
|
1972
|
+
*/
|
|
1973
|
+
averageTokenUse(): number;
|
|
1974
|
+
/**
|
|
1975
|
+
* Get the full suite results
|
|
1976
|
+
*/
|
|
1977
|
+
getResults(): EvalSuiteResult | null;
|
|
1978
|
+
/**
|
|
1979
|
+
* Get the name of the suite
|
|
1980
|
+
*/
|
|
1981
|
+
getName(): string;
|
|
1982
|
+
/**
|
|
1983
|
+
* Get the number of tests in the suite
|
|
1984
|
+
*/
|
|
1985
|
+
size(): number;
|
|
1986
|
+
}
|
|
1987
|
+
|
|
1988
|
+
/**
|
|
1989
|
+
* Model factory for creating AI SDK language models from provider/model strings.
|
|
1990
|
+
* Supports both built-in providers and user-defined custom providers.
|
|
1991
|
+
*/
|
|
1992
|
+
|
|
1993
|
+
/**
|
|
1994
|
+
* Custom base URLs for built-in providers that support them.
|
|
1995
|
+
*/
|
|
1996
|
+
interface BaseUrls {
|
|
1997
|
+
ollama?: string;
|
|
1998
|
+
azure?: string;
|
|
1999
|
+
anthropic?: string;
|
|
2000
|
+
openai?: string;
|
|
2001
|
+
}
|
|
2002
|
+
/**
|
|
2003
|
+
* Options for creating a model.
|
|
2004
|
+
*/
|
|
2005
|
+
interface CreateModelOptions {
|
|
2006
|
+
apiKey: string;
|
|
2007
|
+
baseUrls?: BaseUrls;
|
|
2008
|
+
/** Custom providers registry (name -> config) */
|
|
2009
|
+
customProviders?: Map<string, CustomProvider> | Record<string, CustomProvider>;
|
|
2010
|
+
}
|
|
2011
|
+
/**
|
|
2012
|
+
* Result of parsing an LLM string
|
|
2013
|
+
*/
|
|
2014
|
+
type ParsedLLMString = {
|
|
2015
|
+
type: "builtin";
|
|
2016
|
+
provider: LLMProvider;
|
|
2017
|
+
model: string;
|
|
2018
|
+
} | {
|
|
2019
|
+
type: "custom";
|
|
2020
|
+
providerName: string;
|
|
2021
|
+
model: string;
|
|
2022
|
+
};
|
|
2023
|
+
/**
|
|
2024
|
+
* Parse an LLM string into provider and model components.
|
|
2025
|
+
* Supports both built-in providers and custom provider names.
|
|
2026
|
+
*
|
|
2027
|
+
* @param llmString - String in format "provider/model" (e.g., "openai/gpt-4o" or "my-litellm/gpt-4")
|
|
2028
|
+
* @param customProviderNames - Optional set of registered custom provider names for validation
|
|
2029
|
+
* @returns Parsed result with type discriminator
|
|
2030
|
+
*/
|
|
2031
|
+
declare function parseLLMString(llmString: string, customProviderNames?: Set<string>): ParsedLLMString;
|
|
2032
|
+
/**
|
|
2033
|
+
* Model type returned by provider factories.
|
|
2034
|
+
*/
|
|
2035
|
+
type ProviderLanguageModel = ReturnType<ReturnType<typeof createOpenAI>>;
|
|
2036
|
+
/**
|
|
2037
|
+
* Create a language model from an LLM string.
|
|
2038
|
+
* @param llmString - String in format "provider/model" (e.g., "openai/gpt-4o" or "my-provider/model")
|
|
2039
|
+
* @param options - API key, optional base URLs, and custom providers registry
|
|
2040
|
+
* @returns AI SDK language model instance
|
|
2041
|
+
*/
|
|
2042
|
+
declare function createModelFromString(llmString: string, options: CreateModelOptions): ProviderLanguageModel;
|
|
2043
|
+
/**
|
|
2044
|
+
* Parse a comma-separated string of model IDs into an array.
|
|
2045
|
+
* Handles whitespace and empty entries.
|
|
2046
|
+
*/
|
|
2047
|
+
declare function parseModelIds(modelIdsString: string): string[];
|
|
2048
|
+
/**
|
|
2049
|
+
* Create a CustomProvider configuration from user input.
|
|
2050
|
+
* This is a helper for building the configuration from form inputs.
|
|
2051
|
+
*/
|
|
2052
|
+
declare function createCustomProvider(config: {
|
|
2053
|
+
name: string;
|
|
2054
|
+
protocol: "openai-compatible" | "anthropic-compatible";
|
|
2055
|
+
baseUrl: string;
|
|
2056
|
+
modelIds: string | string[];
|
|
2057
|
+
apiKey?: string;
|
|
2058
|
+
apiKeyEnvVar?: string;
|
|
2059
|
+
useChatCompletions?: boolean;
|
|
2060
|
+
}): CustomProvider;
|
|
2061
|
+
/**
|
|
2062
|
+
* Preset configurations for common OpenAI-compatible providers.
|
|
2063
|
+
* Users can use these as starting points and customize as needed.
|
|
2064
|
+
*/
|
|
2065
|
+
declare const PROVIDER_PRESETS: {
|
|
2066
|
+
/** LiteLLM proxy - requires useChatCompletions */
|
|
2067
|
+
readonly litellm: (baseUrl: string | undefined, modelIds: string[]) => CustomProvider;
|
|
2068
|
+
};
|
|
2069
|
+
|
|
2070
|
+
export { type AiSdkTool, type BaseServerConfig, type BaseUrls, type CallToolExecutor, type ClientCapabilityOptions, type CompatibleProtocol, type ConvertedToolSet, type CreateModelOptions, type CustomProvider, type ElicitationCallback, type ElicitationCallbackRequest, type ElicitationHandler, type EvalRunResult, EvalSuite, type EvalSuiteConfig, type EvalSuiteResult, EvalTest, type EvalTestConfig, type EvalTestRunOptions, type ExecuteToolArguments, type HttpServerConfig, type IterationResult, type LLMConfig, type LLMProvider, type LatencyBreakdown, type LatencyStats, type ListToolsResult, MCPAuthError, MCPClientManager, type MCPClientManagerConfig, type MCPClientManagerOptions, type MCPConnectionStatus, MCPError, type MCPGetPromptResult, type MCPListTasksResult, type MCPPrompt, type MCPPromptListResult, type MCPReadResourceResult, type MCPResource, type MCPResourceListResult, type MCPResourceTemplate, type MCPResourceTemplateListResult, type MCPServerConfig, type MCPServerSummary, type MCPTask, type MCPTaskStatus, PROVIDER_PRESETS, type ParsedLLMString, type ProgressEvent, type ProgressHandler, type PromptOptions, PromptResult, type PromptResultData, type ProviderLanguageModel, type RpcLogEvent, type RpcLogger, type ServerSummary, type StdioServerConfig, type TaskOptions, TestAgent, type TestAgentConfig, type TestResult, type TokenUsage, type Tool, type ToolCall, type ToolExecuteOptions, type ToolSchemaOverrides, buildRequestInit, calculateLatencyStats, calculatePercentile, convertMCPToolsToVercelTools, createCustomProvider, createModelFromString, ensureJsonSchemaObject, extractToolCalls, extractToolNames, formatError, isAuthError, isMCPAuthError, isMethodUnavailableError, matchAnyToolCall, matchNoToolCalls, matchToolArgument, matchToolArgumentWith, matchToolCallCount, matchToolCallWithArgs, matchToolCallWithPartialArgs, matchToolCalls, matchToolCallsSubset, parseLLMString, parseModelIds, supportsTasksCancel, supportsTasksForToolCalls, supportsTasksList };
|