@mcpjam/sdk 0.1.3 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,2070 @@
1
+ import { ClientOptions, Client } from '@modelcontextprotocol/sdk/client/index.js';
2
+ import { StreamableHTTPClientTransportOptions } from '@modelcontextprotocol/sdk/client/streamableHttp.js';
3
+ import { SSEClientTransportOptions } from '@modelcontextprotocol/sdk/client/sse.js';
4
+ import { RequestOptions } from '@modelcontextprotocol/sdk/shared/protocol.js';
5
+ import { ElicitResult, ElicitRequest, Tool as Tool$1, CallToolResult, ListToolsResult as ListToolsResult$1, ServerCapabilities, LoggingLevel } from '@modelcontextprotocol/sdk/types.js';
6
+ export { ElicitResult, PromptListChangedNotificationSchema, ResourceListChangedNotificationSchema, ResourceUpdatedNotificationSchema } from '@modelcontextprotocol/sdk/types.js';
7
+ import { ToolSet, ToolCallOptions, dynamicTool, Tool as Tool$2, CoreMessage, CoreUserMessage, CoreAssistantMessage, CoreToolMessage, GenerateTextResult } from 'ai';
8
+ export { CoreAssistantMessage, CoreMessage, CoreToolMessage, CoreUserMessage } from 'ai';
9
+ import { JSONSchema7 } from 'json-schema';
10
+ import { createOpenAI } from '@ai-sdk/openai';
11
+
12
+ /**
13
+ * TypeScript types and interfaces for MCPClientManager
14
+ */
15
+
16
+ /**
17
+ * Client capability options extracted from MCP SDK ClientOptions
18
+ */
19
+ type ClientCapabilityOptions = NonNullable<ClientOptions["capabilities"]>;
20
+ /**
21
+ * Base configuration shared by all server types
22
+ */
23
+ type BaseServerConfig = {
24
+ /** Client capabilities to advertise to this server */
25
+ capabilities?: ClientCapabilityOptions;
26
+ /** Request timeout in milliseconds */
27
+ timeout?: number;
28
+ /** Client version to report */
29
+ version?: string;
30
+ /** Error handler for this server */
31
+ onError?: (error: unknown) => void;
32
+ /** Enable simple console logging of JSON-RPC traffic */
33
+ logJsonRpc?: boolean;
34
+ /** Custom logger for JSON-RPC traffic (overrides logJsonRpc) */
35
+ rpcLogger?: RpcLogger;
36
+ };
37
+ /**
38
+ * Configuration for stdio-based MCP servers (subprocess)
39
+ */
40
+ type StdioServerConfig = BaseServerConfig & {
41
+ /** Command to execute */
42
+ command: string;
43
+ /** Command arguments */
44
+ args?: string[];
45
+ /** Environment variables */
46
+ env?: Record<string, string>;
47
+ url?: never;
48
+ accessToken?: never;
49
+ requestInit?: never;
50
+ eventSourceInit?: never;
51
+ authProvider?: never;
52
+ reconnectionOptions?: never;
53
+ sessionId?: never;
54
+ preferSSE?: never;
55
+ };
56
+ /**
57
+ * Configuration for HTTP-based MCP servers (SSE or Streamable HTTP)
58
+ */
59
+ type HttpServerConfig = BaseServerConfig & {
60
+ /** Server URL */
61
+ url: string;
62
+ /**
63
+ * Access token for Bearer authentication.
64
+ * If provided, adds `Authorization: Bearer <accessToken>` header to requests.
65
+ */
66
+ accessToken?: string;
67
+ /** Additional request initialization options */
68
+ requestInit?: StreamableHTTPClientTransportOptions["requestInit"];
69
+ /** SSE-specific event source options */
70
+ eventSourceInit?: SSEClientTransportOptions["eventSourceInit"];
71
+ /** OAuth auth provider */
72
+ authProvider?: StreamableHTTPClientTransportOptions["authProvider"];
73
+ /** Reconnection options for Streamable HTTP */
74
+ reconnectionOptions?: StreamableHTTPClientTransportOptions["reconnectionOptions"];
75
+ /** Session ID for Streamable HTTP */
76
+ sessionId?: StreamableHTTPClientTransportOptions["sessionId"];
77
+ /** Prefer SSE transport over Streamable HTTP */
78
+ preferSSE?: boolean;
79
+ command?: never;
80
+ args?: never;
81
+ env?: never;
82
+ };
83
+ /**
84
+ * Union type for all server configurations
85
+ */
86
+ type MCPServerConfig = StdioServerConfig | HttpServerConfig;
87
+ /**
88
+ * Configuration map for multiple servers (serverId -> config)
89
+ */
90
+ type MCPClientManagerConfig = Record<string, MCPServerConfig>;
91
+ /**
92
+ * Connection status for a server
93
+ */
94
+ type MCPConnectionStatus = "connected" | "connecting" | "disconnected";
95
+ /**
96
+ * Summary information for a server
97
+ */
98
+ type ServerSummary = {
99
+ id: string;
100
+ status: MCPConnectionStatus;
101
+ config?: MCPServerConfig;
102
+ };
103
+ /**
104
+ * Event passed to RPC loggers
105
+ */
106
+ type RpcLogEvent = {
107
+ direction: "send" | "receive";
108
+ message: unknown;
109
+ serverId: string;
110
+ };
111
+ /**
112
+ * Function type for JSON-RPC logging
113
+ */
114
+ type RpcLogger = (event: RpcLogEvent) => void;
115
+ /**
116
+ * Progress event from server operations
117
+ */
118
+ type ProgressEvent = {
119
+ serverId: string;
120
+ progressToken: string | number;
121
+ progress: number;
122
+ total?: number;
123
+ message?: string;
124
+ };
125
+ /**
126
+ * Function type for progress handling
127
+ */
128
+ type ProgressHandler = (event: ProgressEvent) => void;
129
+ /**
130
+ * Options for MCPClientManager constructor
131
+ */
132
+ interface MCPClientManagerOptions {
133
+ /** Default client name to report to servers */
134
+ defaultClientName?: string;
135
+ /** Default client version to report */
136
+ defaultClientVersion?: string;
137
+ /** Default capabilities to advertise */
138
+ defaultCapabilities?: ClientCapabilityOptions;
139
+ /** Default request timeout in milliseconds */
140
+ defaultTimeout?: number;
141
+ /** Enable JSON-RPC logging for all servers by default */
142
+ defaultLogJsonRpc?: boolean;
143
+ /** Global JSON-RPC logger */
144
+ rpcLogger?: RpcLogger;
145
+ /** Global progress handler */
146
+ progressHandler?: ProgressHandler;
147
+ }
148
+ /**
149
+ * Arguments passed to tool execution
150
+ */
151
+ type ExecuteToolArguments = Record<string, unknown>;
152
+ /**
153
+ * Options for task-augmented tool calls
154
+ */
155
+ type TaskOptions = {
156
+ /** Time-to-live for the task in milliseconds */
157
+ ttl?: number;
158
+ };
159
+ /**
160
+ * Handler for server-specific elicitation requests
161
+ */
162
+ type ElicitationHandler = (params: ElicitRequest["params"]) => Promise<ElicitResult> | ElicitResult;
163
+ /**
164
+ * Request passed to global elicitation callback
165
+ */
166
+ type ElicitationCallbackRequest = {
167
+ requestId: string;
168
+ message: string;
169
+ schema: unknown;
170
+ /** Task ID if this elicitation is related to a task (MCP Tasks spec 2025-11-25) */
171
+ relatedTaskId?: string;
172
+ };
173
+ /**
174
+ * Global callback for handling elicitation requests
175
+ */
176
+ type ElicitationCallback = (request: ElicitationCallbackRequest) => Promise<ElicitResult> | ElicitResult;
177
+ /**
178
+ * Task status values
179
+ */
180
+ type MCPTaskStatus = "working" | "input_required" | "completed" | "failed" | "cancelled";
181
+ /**
182
+ * MCP Task object
183
+ */
184
+ type MCPTask = {
185
+ taskId: string;
186
+ status: MCPTaskStatus;
187
+ statusMessage?: string;
188
+ createdAt: string;
189
+ lastUpdatedAt: string;
190
+ ttl: number | null;
191
+ pollInterval?: number;
192
+ };
193
+ /**
194
+ * Result from listing tasks
195
+ */
196
+ type MCPListTasksResult = {
197
+ tasks: MCPTask[];
198
+ nextCursor?: string;
199
+ };
200
+ type ClientRequestOptions = RequestOptions;
201
+ type ListResourcesParams = Parameters<Client["listResources"]>[0];
202
+ type ListResourceTemplatesParams = Parameters<Client["listResourceTemplates"]>[0];
203
+ type ReadResourceParams = Parameters<Client["readResource"]>[0];
204
+ type SubscribeResourceParams = Parameters<Client["subscribeResource"]>[0];
205
+ type UnsubscribeResourceParams = Parameters<Client["unsubscribeResource"]>[0];
206
+ type ListPromptsParams = Parameters<Client["listPrompts"]>[0];
207
+ type GetPromptParams = Parameters<Client["getPrompt"]>[0];
208
+ type ListToolsResult = Awaited<ReturnType<Client["listTools"]>>;
209
+ type MCPPromptListResult = Awaited<ReturnType<Client["listPrompts"]>>;
210
+ type MCPPrompt = MCPPromptListResult["prompts"][number];
211
+ type MCPGetPromptResult = Awaited<ReturnType<Client["getPrompt"]>>;
212
+ type MCPResourceListResult = Awaited<ReturnType<Client["listResources"]>>;
213
+ type MCPResource = MCPResourceListResult["resources"][number];
214
+ type MCPReadResourceResult = Awaited<ReturnType<Client["readResource"]>>;
215
+ type MCPResourceTemplateListResult = Awaited<ReturnType<Client["listResourceTemplates"]>>;
216
+ type MCPResourceTemplate = MCPResourceTemplateListResult["resourceTemplates"][number];
217
+ type MCPServerSummary = ServerSummary;
218
+ /**
219
+ * An MCP tool with an execute function pre-wired to call the manager.
220
+ * Extends the official MCP SDK Tool type.
221
+ * Returned by MCPClientManager.getTools().
222
+ */
223
+ /** Options for tool execution */
224
+ interface ToolExecuteOptions {
225
+ /** Abort signal for cancellation */
226
+ signal?: AbortSignal;
227
+ }
228
+ interface Tool extends Tool$1 {
229
+ /** Execute the tool with the given arguments */
230
+ execute: (args: Record<string, unknown>, options?: ToolExecuteOptions) => Promise<CallToolResult>;
231
+ _meta?: {
232
+ _serverId: string;
233
+ [key: string]: unknown;
234
+ };
235
+ }
236
+
237
+ /**
238
+ * AI SDK compatible tool set (Record<string, CoreTool>).
239
+ * Returned by MCPClientManager.getToolsForAiSdk().
240
+ * Can be passed directly to AI SDK's generateText().
241
+ */
242
+ type AiSdkTool = ToolSet;
243
+
244
+ /**
245
+ * Notification handler management for MCPClientManager
246
+ */
247
+
248
+ type NotificationSchema = Parameters<Client["setNotificationHandler"]>[0];
249
+ type NotificationHandler = Parameters<Client["setNotificationHandler"]>[1];
250
+
251
+ /**
252
+ * Tool conversion utilities for integrating MCP tools with Vercel AI SDK
253
+ */
254
+
255
+ /**
256
+ * Normalizes a schema to a valid JSON Schema object.
257
+ * Many MCP tools omit the top-level type; Anthropic requires an object schema.
258
+ *
259
+ * @param schema - The input schema (may be incomplete)
260
+ * @returns A normalized JSONSchema7 object
261
+ */
262
+ declare function ensureJsonSchemaObject(schema: unknown): JSONSchema7;
263
+ /**
264
+ * Function type for executing tool calls
265
+ */
266
+ type CallToolExecutor = (params: {
267
+ name: string;
268
+ args: unknown;
269
+ options: ToolCallOptions;
270
+ }) => Promise<CallToolResult>;
271
+ /**
272
+ * Input schema type for tool definitions
273
+ */
274
+ type ToolInputSchema = Parameters<typeof dynamicTool>[0]["inputSchema"];
275
+ /**
276
+ * Schema overrides for specific tools
277
+ * Maps tool name to custom input schema definition
278
+ */
279
+ type ToolSchemaOverrides = Record<string, {
280
+ inputSchema: ToolInputSchema;
281
+ }>;
282
+ /**
283
+ * Result type for converted tools
284
+ * When explicit schemas are provided, returns typed object
285
+ * When "automatic", returns generic record
286
+ */
287
+ type ConvertedToolSet<SCHEMAS extends ToolSchemaOverrides | "automatic"> = SCHEMAS extends ToolSchemaOverrides ? {
288
+ [K in keyof SCHEMAS]: Tool$2;
289
+ } : Record<string, Tool$2>;
290
+ /**
291
+ * Options for tool conversion
292
+ */
293
+ interface ConvertOptions<TOOL_SCHEMAS extends ToolSchemaOverrides | "automatic"> {
294
+ /** Schema overrides or "automatic" for dynamic conversion */
295
+ schemas?: TOOL_SCHEMAS;
296
+ /** Function to execute tool calls */
297
+ callTool: CallToolExecutor;
298
+ }
299
+ /**
300
+ * Converts MCP tools to Vercel AI SDK format.
301
+ *
302
+ * @param listToolsResult - The result from listTools()
303
+ * @param options - Conversion options including callTool executor
304
+ * @returns A ToolSet compatible with Vercel AI SDK
305
+ *
306
+ * @example
307
+ * ```typescript
308
+ * const tools = await convertMCPToolsToVercelTools(listToolsResult, {
309
+ * callTool: async ({ name, args, options }) => {
310
+ * return await mcpClient.callTool({ name, arguments: args });
311
+ * },
312
+ * });
313
+ *
314
+ * // Use with Vercel AI SDK
315
+ * const result = await generateText({
316
+ * model: openai("gpt-4"),
317
+ * tools,
318
+ * messages: [{ role: "user", content: "..." }],
319
+ * });
320
+ * ```
321
+ */
322
+ declare function convertMCPToolsToVercelTools(listToolsResult: ListToolsResult$1, { schemas, callTool, }: ConvertOptions<ToolSchemaOverrides | "automatic">): Promise<ToolSet>;
323
+
324
+ /**
325
+ * Manages multiple MCP server connections with support for tools, resources,
326
+ * prompts, notifications, elicitation, and tasks.
327
+ *
328
+ * @example
329
+ * ```typescript
330
+ * const manager = new MCPClientManager({
331
+ * everything: {
332
+ * command: "npx",
333
+ * args: ["-y", "@modelcontextprotocol/server-everything"],
334
+ * },
335
+ * myServer: {
336
+ * url: "https://my-server.com/mcp",
337
+ * accessToken: "my-token",
338
+ * },
339
+ * });
340
+ *
341
+ * const tools = await manager.listTools("everything");
342
+ * const result = await manager.executeTool("everything", "add", { a: 1, b: 2 });
343
+ * ```
344
+ */
345
+ declare class MCPClientManager {
346
+ private readonly clientStates;
347
+ private readonly toolsMetadataCache;
348
+ private readonly notificationManager;
349
+ private readonly elicitationManager;
350
+ private readonly defaultClientName;
351
+ private readonly defaultClientVersion;
352
+ private readonly defaultCapabilities;
353
+ private readonly defaultTimeout;
354
+ private readonly defaultLogJsonRpc;
355
+ private readonly defaultRpcLogger?;
356
+ private readonly defaultProgressHandler?;
357
+ private progressTokenCounter;
358
+ /**
359
+ * Creates a new MCPClientManager.
360
+ *
361
+ * @param servers - Configuration map of server IDs to server configs
362
+ * @param options - Global options for the manager
363
+ */
364
+ constructor(servers?: MCPClientManagerConfig, options?: MCPClientManagerOptions);
365
+ /**
366
+ * Lists all registered server IDs.
367
+ */
368
+ listServers(): string[];
369
+ /**
370
+ * Checks if a server is registered.
371
+ */
372
+ hasServer(serverId: string): boolean;
373
+ /**
374
+ * Gets summaries for all registered servers.
375
+ */
376
+ getServerSummaries(): ServerSummary[];
377
+ /**
378
+ * Gets the connection status for a server.
379
+ */
380
+ getConnectionStatus(serverId: string): MCPConnectionStatus;
381
+ /**
382
+ * Gets the configuration for a server.
383
+ */
384
+ getServerConfig(serverId: string): MCPServerConfig | undefined;
385
+ /**
386
+ * Gets the capabilities reported by a server.
387
+ */
388
+ getServerCapabilities(serverId: string): ServerCapabilities | undefined;
389
+ /**
390
+ * Gets the underlying MCP Client for a server.
391
+ */
392
+ getClient(serverId: string): Client | undefined;
393
+ /**
394
+ * Gets initialization information for a connected server.
395
+ */
396
+ getInitializationInfo(serverId: string): {
397
+ protocolVersion: string | undefined;
398
+ transport: string;
399
+ serverCapabilities: {
400
+ experimental?: {
401
+ [x: string]: object;
402
+ } | undefined;
403
+ logging?: object | undefined;
404
+ completions?: object | undefined;
405
+ prompts?: {
406
+ listChanged?: boolean | undefined;
407
+ } | undefined;
408
+ resources?: {
409
+ subscribe?: boolean | undefined;
410
+ listChanged?: boolean | undefined;
411
+ } | undefined;
412
+ tools?: {
413
+ listChanged?: boolean | undefined;
414
+ } | undefined;
415
+ tasks?: {
416
+ [x: string]: unknown;
417
+ list?: object | undefined;
418
+ cancel?: object | undefined;
419
+ requests?: {
420
+ [x: string]: unknown;
421
+ tools?: {
422
+ [x: string]: unknown;
423
+ call?: object | undefined;
424
+ } | undefined;
425
+ } | undefined;
426
+ } | undefined;
427
+ } | undefined;
428
+ serverVersion: {
429
+ version: string;
430
+ name: string;
431
+ websiteUrl?: string | undefined;
432
+ description?: string | undefined;
433
+ icons?: {
434
+ src: string;
435
+ mimeType?: string | undefined;
436
+ sizes?: string[] | undefined;
437
+ theme?: "light" | "dark" | undefined;
438
+ }[] | undefined;
439
+ title?: string | undefined;
440
+ } | undefined;
441
+ instructions: string | undefined;
442
+ clientCapabilities: {
443
+ experimental?: {
444
+ [x: string]: object;
445
+ } | undefined;
446
+ sampling?: {
447
+ context?: object | undefined;
448
+ tools?: object | undefined;
449
+ } | undefined;
450
+ elicitation?: {
451
+ [x: string]: unknown;
452
+ form?: {
453
+ [x: string]: unknown;
454
+ applyDefaults?: boolean | undefined;
455
+ } | undefined;
456
+ url?: object | undefined;
457
+ } | undefined;
458
+ roots?: {
459
+ listChanged?: boolean | undefined;
460
+ } | undefined;
461
+ tasks?: {
462
+ [x: string]: unknown;
463
+ list?: object | undefined;
464
+ cancel?: object | undefined;
465
+ requests?: {
466
+ [x: string]: unknown;
467
+ sampling?: {
468
+ [x: string]: unknown;
469
+ createMessage?: object | undefined;
470
+ } | undefined;
471
+ elicitation?: {
472
+ [x: string]: unknown;
473
+ create?: object | undefined;
474
+ } | undefined;
475
+ } | undefined;
476
+ } | undefined;
477
+ };
478
+ } | undefined;
479
+ /**
480
+ * Connects to an MCP server.
481
+ *
482
+ * @param serverId - Unique identifier for the server
483
+ * @param config - Server configuration
484
+ * @returns The connected MCP Client
485
+ */
486
+ connectToServer(serverId: string, config: MCPServerConfig): Promise<Client>;
487
+ /**
488
+ * Disconnects from a server.
489
+ */
490
+ disconnectServer(serverId: string): Promise<void>;
491
+ /**
492
+ * Removes a server from the manager entirely.
493
+ */
494
+ removeServer(serverId: string): Promise<void>;
495
+ /**
496
+ * Disconnects from all servers.
497
+ */
498
+ disconnectAllServers(): Promise<void>;
499
+ /**
500
+ * Lists tools available from a server.
501
+ */
502
+ listTools(serverId: string, params?: Parameters<Client["listTools"]>[0], options?: ClientRequestOptions): Promise<ListToolsResult>;
503
+ /**
504
+ * Gets tools from multiple servers (or all servers if none specified).
505
+ * Returns tools with execute functions pre-wired to call this manager.
506
+ *
507
+ * @param serverIds - Server IDs to get tools from (or all if omitted)
508
+ * @returns Array of executable tools
509
+ *
510
+ * @example
511
+ * ```typescript
512
+ * const tools = await manager.getTools(["asana"]);
513
+ * const agent = new TestAgent({ tools, model: "openai/gpt-4o", apiKey });
514
+ * ```
515
+ */
516
+ getTools(serverIds?: string[]): Promise<Tool[]>;
517
+ /**
518
+ * Gets cached tool metadata for a server.
519
+ */
520
+ getAllToolsMetadata(serverId: string): Record<string, Record<string, any>>;
521
+ /**
522
+ * Gets tools formatted for Vercel AI SDK.
523
+ *
524
+ * @param serverIds - Server IDs to get tools from (or all if omitted)
525
+ * @param options - Schema options
526
+ * @returns AiSdkTool compatible with Vercel AI SDK's generateText()
527
+ */
528
+ getToolsForAiSdk(serverIds?: string[] | string, options?: {
529
+ schemas?: ToolSchemaOverrides | "automatic";
530
+ }): Promise<AiSdkTool>;
531
+ /**
532
+ * Executes a tool on a server.
533
+ *
534
+ * @param serverId - The server ID
535
+ * @param toolName - The tool name
536
+ * @param args - Tool arguments
537
+ * @param options - Request options
538
+ * @param taskOptions - Task options for async execution
539
+ */
540
+ executeTool(serverId: string, toolName: string, args?: ExecuteToolArguments, options?: ClientRequestOptions, taskOptions?: TaskOptions): Promise<{
541
+ [x: string]: unknown;
542
+ content: ({
543
+ type: "text";
544
+ text: string;
545
+ annotations?: {
546
+ audience?: ("user" | "assistant")[] | undefined;
547
+ priority?: number | undefined;
548
+ lastModified?: string | undefined;
549
+ } | undefined;
550
+ _meta?: Record<string, unknown> | undefined;
551
+ } | {
552
+ type: "image";
553
+ data: string;
554
+ mimeType: string;
555
+ annotations?: {
556
+ audience?: ("user" | "assistant")[] | undefined;
557
+ priority?: number | undefined;
558
+ lastModified?: string | undefined;
559
+ } | undefined;
560
+ _meta?: Record<string, unknown> | undefined;
561
+ } | {
562
+ type: "audio";
563
+ data: string;
564
+ mimeType: string;
565
+ annotations
566
+ /**
567
+ * Lists prompts available from a server.
568
+ */
569
+ ?: {
570
+ audience?: ("user" | "assistant")[] | undefined;
571
+ priority? /**
572
+ * Lists prompts available from a server.
573
+ */: number | undefined;
574
+ lastModified?: string | undefined;
575
+ } | undefined;
576
+ _meta?: Record<string, unknown> | undefined;
577
+ } | {
578
+ type: "resource";
579
+ resource: {
580
+ uri: string;
581
+ text: string;
582
+ mimeType?: string | undefined;
583
+ _meta?: Record<string, unknown> | undefined;
584
+ } | {
585
+ uri: string;
586
+ blob: string;
587
+ mimeType?: string | undefined;
588
+ _meta?: Record<string, unknown> | undefined;
589
+ };
590
+ annotations?: {
591
+ audience?: ("user" | "assistant")[] | undefined;
592
+ priority?: number | undefined;
593
+ lastModified?: string | undefined;
594
+ } | undefined;
595
+ _meta?: Record<string, unknown> | undefined;
596
+ } | {
597
+ uri: string;
598
+ name: string;
599
+ type: "resource_link";
600
+ description?: string | undefined;
601
+ mimeType?: string | undefined;
602
+ annotations?: {
603
+ audience?: ("user" | "assistant")[] | undefined;
604
+ priority?: number | undefined;
605
+ lastModified?: string | undefined;
606
+ } | undefined;
607
+ _meta?: {
608
+ [x: string]: unknown;
609
+ } | undefined;
610
+ icons?: {
611
+ src: string;
612
+ mimeType?: string | undefined;
613
+ sizes?: string[] | undefined;
614
+ theme?: "light" | "dark" | undefined;
615
+ }[] | undefined;
616
+ title?: string | undefined;
617
+ })[];
618
+ _meta?: {
619
+ [x: string]: unknown;
620
+ progressToken?: string | number | undefined;
621
+ "io.modelcontextprotocol/related-task"?: {
622
+ taskId: string;
623
+ } | undefined;
624
+ } | undefined;
625
+ structuredContent?: Record<string, unknown> | undefined;
626
+ isError?: boolean | undefined;
627
+ } | {
628
+ [x: string]: unknown;
629
+ toolResult: unknown;
630
+ _meta?: {
631
+ [x: string]: unknown;
632
+ progressToken?: string | number | undefined;
633
+ "io.modelcontextprotocol/related-task"?: {
634
+ taskId: string;
635
+ } | undefined;
636
+ } | undefined;
637
+ } | {
638
+ task: {
639
+ taskId: string;
640
+ status: "working" | "input_required" | "completed" | "failed" | "cancelled";
641
+ ttl: number | null;
642
+ createdAt: string;
643
+ lastUpdatedAt: string;
644
+ pollInterval?: number | undefined;
645
+ statusMessage?: string | undefined;
646
+ };
647
+ _meta: {
648
+ "io.modelcontextprotocol/model-immediate-response": string;
649
+ };
650
+ }>;
651
+ /**
652
+ * Lists resources available from a server.
653
+ */
654
+ listResources(serverId: string, params?: ListResourcesParams, options?: ClientRequestOptions): Promise<{
655
+ [x: string]: unknown;
656
+ resources: {
657
+ uri: string;
658
+ name: string;
659
+ description?: string | undefined;
660
+ mimeType?: string | undefined;
661
+ annotations?: {
662
+ audience?: ("user" | "assistant")[] | undefined;
663
+ priority?: number | undefined;
664
+ lastModified?: string | undefined;
665
+ } | undefined;
666
+ _meta?: {
667
+ [x: string]: unknown;
668
+ } | undefined;
669
+ icons?: {
670
+ src: string;
671
+ mimeType?: string | undefined;
672
+ sizes?: string[] | undefined;
673
+ theme?: "light" | "dark" | undefined;
674
+ }[] | undefined;
675
+ title?: string | undefined;
676
+ }[];
677
+ _meta?: {
678
+ [x: string]: unknown;
679
+ progressToken?: string | number | undefined;
680
+ "io.modelcontextprotocol/related-task"?: {
681
+ taskId: string;
682
+ } | undefined;
683
+ } | undefined;
684
+ nextCursor?: string | undefined;
685
+ }>;
686
+ /**
687
+ * Reads a resource from a server.
688
+ */
689
+ readResource(serverId: string, params: ReadResourceParams, options?: ClientRequestOptions): Promise<{
690
+ [x: string]: unknown;
691
+ contents: ({
692
+ uri: string;
693
+ text: string;
694
+ mimeType?: string | undefined;
695
+ _meta?: Record<string, unknown> | undefined;
696
+ } | {
697
+ uri: string;
698
+ blob: string;
699
+ mimeType?: string | undefined;
700
+ _meta
701
+ /**
702
+ * Lists resources available from a server.
703
+ */
704
+ ?: Record<string, unknown> | undefined;
705
+ })[];
706
+ _meta?: {
707
+ [x: string]: unknown;
708
+ progressToken?: string | number | undefined;
709
+ "io.modelcontextprotocol/related-task"?: {
710
+ taskId: string;
711
+ } | undefined;
712
+ } | undefined;
713
+ }>;
714
+ /**
715
+ * Subscribes to resource updates.
716
+ */
717
+ subscribeResource(serverId: string, params: SubscribeResourceParams, options?: ClientRequestOptions): Promise<{
718
+ _meta?: {
719
+ [x: string]: unknown;
720
+ progressToken?: string | number | undefined;
721
+ "io.modelcontextprotocol/related-task"?: {
722
+ taskId: string;
723
+ } | undefined;
724
+ } | undefined;
725
+ }>;
726
+ /**
727
+ * Unsubscribes from resource updates.
728
+ */
729
+ unsubscribeResource(serverId: string, params: UnsubscribeResourceParams, options?: ClientRequestOptions): Promise<{
730
+ _meta?: {
731
+ [x: string]: unknown;
732
+ progressToken?: string | number | undefined;
733
+ "io.modelcontextprotocol/related-task"?: {
734
+ taskId: string;
735
+ } | undefined;
736
+ } | undefined;
737
+ }>;
738
+ /**
739
+ * Lists resource templates from a server.
740
+ */
741
+ listResourceTemplates(serverId: string, params?: ListResourceTemplatesParams, options?: ClientRequestOptions): Promise<{
742
+ [x: string]: unknown;
743
+ resourceTemplates: {
744
+ uriTemplate: string;
745
+ name: string;
746
+ description?: string | undefined;
747
+ mimeType?: string | undefined;
748
+ annotations?: {
749
+ audience?: ("user" | "assistant")[] | undefined;
750
+ priority?: number | undefined;
751
+ lastModified?: string | undefined;
752
+ } | undefined;
753
+ _meta?: {
754
+ [x: string]: unknown;
755
+ } | undefined;
756
+ icons?: {
757
+ src: string;
758
+ mimeType?: string | undefined;
759
+ sizes?: string[] | undefined;
760
+ theme?: "light" | "dark" | undefined;
761
+ }[] | undefined;
762
+ title?: string | undefined;
763
+ }[];
764
+ _meta?: {
765
+ [x: string]: unknown;
766
+ progressToken?: string | number | undefined;
767
+ "io.modelcontextprotocol/related-task"?: {
768
+ taskId: string;
769
+ } | undefined;
770
+ } | undefined;
771
+ nextCursor?: string | undefined;
772
+ }>;
773
+ /**
774
+ * Lists prompts available from a server.
775
+ */
776
+ listPrompts(serverId: string, params?: ListPromptsParams, options?: ClientRequestOptions): Promise<{
777
+ [x: string]: unknown;
778
+ prompts: {
779
+ name: string;
780
+ description?: string | undefined;
781
+ arguments?: {
782
+ name: string;
783
+ description?: string | undefined;
784
+ required?: boolean | undefined;
785
+ }[] | undefined;
786
+ _meta?: {
787
+ [x: string]: unknown;
788
+ } | undefined;
789
+ icons? /**
790
+ * Gets tools formatted for Vercel AI SDK.
791
+ *
792
+ * @param serverIds - Server IDs to get tools from (or all if omitted)
793
+ * @param options - Schema options
794
+ * @returns AiSdkTool compatible with Vercel AI SDK's generateText()
795
+ */: {
796
+ src: string;
797
+ mimeType?: string | undefined;
798
+ sizes?: string[] | undefined;
799
+ theme?: "light" | "dark" | undefined;
800
+ }[] | undefined;
801
+ title?: string | undefined;
802
+ }[];
803
+ _meta?: {
804
+ [x: string]: unknown;
805
+ progressToken?: string | number | undefined;
806
+ "io.modelcontextprotocol/related-task"?: {
807
+ taskId: string;
808
+ } | undefined;
809
+ } | undefined;
810
+ nextCursor?: string | undefined;
811
+ }>;
812
+ /**
813
+ * Gets a prompt from a server.
814
+ */
815
+ getPrompt(serverId: string, params: GetPromptParams, options?: ClientRequestOptions): Promise<{
816
+ [x: string]: unknown;
817
+ messages: {
818
+ role: "user" | "assistant";
819
+ content: {
820
+ type: "text";
821
+ text: string;
822
+ annotations?: {
823
+ audience?: ("user" | "assistant")[] | undefined;
824
+ priority?: number | undefined;
825
+ lastModified?: string | undefined;
826
+ } | undefined;
827
+ _meta?: Record<string, unknown> | undefined;
828
+ } | {
829
+ type: "image";
830
+ data: string;
831
+ mimeType: string;
832
+ annotations?: {
833
+ audience?: ("user" | "assistant")[] | undefined;
834
+ priority?: number | undefined;
835
+ lastModified?: string | undefined;
836
+ } | undefined;
837
+ _meta?: Record<string, unknown> | undefined;
838
+ } | {
839
+ type: "audio";
840
+ data: string;
841
+ mimeType: string;
842
+ annotations?: {
843
+ audience?: ("user" | "assistant")[] | undefined;
844
+ priority?: number | undefined;
845
+ lastModified?: string | undefined;
846
+ } | undefined;
847
+ _meta?: Record<string, unknown> | undefined;
848
+ } | {
849
+ type: "resource";
850
+ resource: {
851
+ uri: string;
852
+ text: string;
853
+ mimeType?: string | undefined;
854
+ _meta?: Record<string, unknown> | undefined;
855
+ } | {
856
+ uri: string;
857
+ blob: string;
858
+ mimeType?: string | undefined;
859
+ _meta?: Record<string, unknown> | undefined;
860
+ };
861
+ annotations?: {
862
+ audience?: ("user" | "assistant")[] | undefined;
863
+ priority?: number | undefined;
864
+ lastModified?: string | undefined;
865
+ } | undefined;
866
+ _meta?: Record<string, unknown> | undefined;
867
+ } | {
868
+ uri: string;
869
+ name: string;
870
+ type: "resource_link";
871
+ description?: string | undefined;
872
+ mimeType?: string | undefined;
873
+ annotations?: {
874
+ audience?: ("user" | "assistant")[] | undefined;
875
+ priority?: number | undefined;
876
+ lastModified?: string | undefined;
877
+ } | undefined;
878
+ _meta?: {
879
+ [x: string]: unknown;
880
+ } | undefined;
881
+ icons?: {
882
+ src: string;
883
+ mimeType?: string | undefined;
884
+ sizes?: string[] | undefined;
885
+ theme?: "light" | "dark" | undefined;
886
+ }[] | undefined;
887
+ title?: string | undefined;
888
+ };
889
+ }[];
890
+ _meta?: {
891
+ [x: string]: unknown;
892
+ progressToken?: string | number | undefined;
893
+ "io.modelcontextprotocol/related-task"?: {
894
+ taskId: string;
895
+ } | undefined;
896
+ } | undefined;
897
+ description?: string | undefined;
898
+ }>;
899
+ /**
900
+ * Pings a server to check connectivity.
901
+ */
902
+ pingServer(serverId: string, options?: RequestOptions): void;
903
+ /**
904
+ * Sets the logging level for a server.
905
+ */
906
+ setLoggingLevel(serverId: string, level?: LoggingLevel): Promise<void>;
907
+ /**
908
+ * Gets the session ID for a Streamable HTTP server.
909
+ */
910
+ getSessionIdByServer(serverId: string): string | undefined;
911
+ /**
912
+ * Adds a notification handler for a server.
913
+ */
914
+ addNotificationHandler(serverId: string, schema: NotificationSchema, handler: NotificationHandler): void;
915
+ /**
916
+ * Registers a handler for resource list changes.
917
+ */
918
+ onResourceListChanged(serverId: string, handler: NotificationHandler): void;
919
+ /**
920
+ * Registers a handler for resource updates.
921
+ */
922
+ onResourceUpdated(serverId: string, handler: NotificationHandler): void;
923
+ /**
924
+ * Registers a handler for prompt list changes.
925
+ */
926
+ onPromptListChanged(serverId: string, handler: NotificationHandler): void;
927
+ /**
928
+ * Registers a handler for task status changes.
929
+ */
930
+ onTaskStatusChanged(serverId: string, handler: NotificationHandler): void;
931
+ /**
932
+ * Sets a server-specific elicitation handler.
933
+ */
934
+ setElicitationHandler(serverId: string, handler: ElicitationHandler): void;
935
+ /**
936
+ * Clears a server-specific elicitation handler.
937
+ */
938
+ clearElicitationHandler(serverId: string): void;
939
+ /**
940
+ * Sets a global elicitation callback for all servers.
941
+ */
942
+ setElicitationCallback(callback: ElicitationCallback): void;
943
+ /**
944
+ * Clears the global elicitation callback.
945
+ */
946
+ clearElicitationCallback(): void;
947
+ /**
948
+ * Gets the pending elicitations map for external resolvers.
949
+ */
950
+ getPendingElicitations(): Map<string, {
951
+ resolve: (value: ElicitResult) => void;
952
+ reject: (error: unknown) => void;
953
+ }>;
954
+ /**
955
+ * Responds to a pending elicitation.
956
+ */
957
+ respondToElicitation(requestId: string, response: ElicitResult): boolean;
958
+ /**
959
+ * Lists tasks from a server.
960
+ */
961
+ listTasks(serverId: string, cursor?: string, options?: ClientRequestOptions): Promise<MCPListTasksResult>;
962
+ /**
963
+ * Gets a task by ID.
964
+ */
965
+ getTask(serverId: string, taskId: string, options?: ClientRequestOptions): Promise<MCPTask>;
966
+ /**
967
+ * Gets the result of a completed task.
968
+ */
969
+ getTaskResult(serverId: string, taskId: string, options?: ClientRequestOptions): Promise<unknown>;
970
+ /**
971
+ * Cancels a task.
972
+ */
973
+ cancelTask(serverId: string, taskId: string, options?: ClientRequestOptions): Promise<MCPTask>;
974
+ /**
975
+ * Checks if server supports task-augmented tool calls.
976
+ */
977
+ supportsTasksForToolCalls(serverId: string): boolean;
978
+ /**
979
+ * Checks if server supports listing tasks.
980
+ */
981
+ supportsTasksList(serverId: string): boolean;
982
+ /**
983
+ * Checks if server supports canceling tasks.
984
+ */
985
+ supportsTasksCancel(serverId: string): boolean;
986
+ private performConnection;
987
+ private connectViaStdio;
988
+ private connectViaHttp;
989
+ private safeCloseTransport;
990
+ private ensureConnected;
991
+ private getClientOrThrow;
992
+ private resetState;
993
+ private withTimeout;
994
+ private withProgressHandler;
995
+ private buildCapabilities;
996
+ private resolveRpcLogger;
997
+ private cacheToolsMetadata;
998
+ private isStdioConfig;
999
+ }
1000
+
1001
+ /**
1002
+ * Transport utilities for MCPClientManager
1003
+ */
1004
+
1005
+ /**
1006
+ * Builds the requestInit object, merging accessToken into Authorization header if provided.
1007
+ *
1008
+ * @param accessToken - Optional access token for Bearer auth
1009
+ * @param requestInit - Optional existing requestInit config
1010
+ * @returns Merged requestInit with Authorization header if accessToken provided
1011
+ */
1012
+ declare function buildRequestInit(accessToken: string | undefined, requestInit: StreamableHTTPClientTransportOptions["requestInit"]): StreamableHTTPClientTransportOptions["requestInit"];
1013
+
1014
+ /**
1015
+ * Error handling utilities for MCPClientManager
1016
+ */
1017
+ /**
1018
+ * Checks if an error indicates that a method is not available/implemented by the server.
1019
+ * Used for graceful degradation when servers don't support certain MCP features.
1020
+ *
1021
+ * @param error - The error to check
1022
+ * @param method - The MCP method name (e.g., "tools/list", "resources/list")
1023
+ * @returns True if the error indicates the method is unavailable
1024
+ */
1025
+ declare function isMethodUnavailableError(error: unknown, method: string): boolean;
1026
+ /**
1027
+ * Formats an error for display in error messages.
1028
+ *
1029
+ * @param error - The error to format
1030
+ * @returns A string representation of the error
1031
+ */
1032
+ declare function formatError(error: unknown): string;
1033
+
1034
+ /**
1035
+ * Custom error classes for MCP SDK
1036
+ */
1037
+ /**
1038
+ * Base error class for all MCP SDK errors
1039
+ */
1040
+ declare class MCPError extends Error {
1041
+ readonly code: string;
1042
+ constructor(message: string, code: string, options?: {
1043
+ cause?: unknown;
1044
+ });
1045
+ }
1046
+ /**
1047
+ * Authentication error - thrown for 401, token expired, invalid token, etc.
1048
+ */
1049
+ declare class MCPAuthError extends MCPError {
1050
+ readonly statusCode?: number | undefined;
1051
+ constructor(message: string, statusCode?: number | undefined, options?: {
1052
+ cause?: unknown;
1053
+ });
1054
+ }
1055
+ /**
1056
+ * Type guard to check if an error is an MCPAuthError
1057
+ */
1058
+ declare function isMCPAuthError(error: unknown): error is MCPAuthError;
1059
+ /**
1060
+ * Checks if an error is an authentication-related error.
1061
+ * Detects auth errors by:
1062
+ * 1. Error class name (UnauthorizedError from MCP SDK)
1063
+ * 2. HTTP status codes (401, 403) from transport errors
1064
+ * 3. Common auth-related patterns in error messages (case-insensitive)
1065
+ */
1066
+ declare function isAuthError(error: unknown): {
1067
+ isAuth: boolean;
1068
+ statusCode?: number;
1069
+ };
1070
+
1071
+ /**
1072
+ * MCP Tasks support (experimental feature - spec 2025-11-25)
1073
+ */
1074
+
1075
+ /**
1076
+ * Checks if server supports task-augmented tool calls.
1077
+ * Checks both top-level tasks and experimental.tasks namespaces.
1078
+ *
1079
+ * @param capabilities - The server capabilities
1080
+ * @returns True if server supports task-augmented tool calls
1081
+ */
1082
+ declare function supportsTasksForToolCalls(capabilities: ServerCapabilities | undefined): boolean;
1083
+ /**
1084
+ * Checks if server supports tasks/list operation.
1085
+ *
1086
+ * @param capabilities - The server capabilities
1087
+ * @returns True if server supports listing tasks
1088
+ */
1089
+ declare function supportsTasksList(capabilities: ServerCapabilities | undefined): boolean;
1090
+ /**
1091
+ * Checks if server supports tasks/cancel operation.
1092
+ *
1093
+ * @param capabilities - The server capabilities
1094
+ * @returns True if server supports canceling tasks
1095
+ */
1096
+ declare function supportsTasksCancel(capabilities: ServerCapabilities | undefined): boolean;
1097
+
1098
+ /**
1099
+ * Core types for SDK evals functionality
1100
+ */
1101
+
1102
+ /**
1103
+ * Built-in LLM providers with native SDK support
1104
+ */
1105
+ type LLMProvider = "anthropic" | "openai" | "azure" | "deepseek" | "google" | "ollama" | "mistral" | "openrouter" | "xai";
1106
+ /**
1107
+ * Compatible API protocols for custom providers
1108
+ */
1109
+ type CompatibleProtocol = "openai-compatible" | "anthropic-compatible";
1110
+ /**
1111
+ * Configuration for a custom provider (user-defined)
1112
+ */
1113
+ interface CustomProvider {
1114
+ /** Unique name for this provider (used in model strings, e.g., "groq/llama-3") */
1115
+ name: string;
1116
+ /** API protocol this provider is compatible with */
1117
+ protocol: CompatibleProtocol;
1118
+ /** Base URL for the API endpoint */
1119
+ baseUrl: string;
1120
+ /** List of available model IDs */
1121
+ modelIds: string[];
1122
+ /** Optional API key (can also be provided at runtime) */
1123
+ apiKey?: string;
1124
+ /** Environment variable name to read API key from (fallback) */
1125
+ apiKeyEnvVar?: string;
1126
+ /**
1127
+ * Use Chat Completions API (.chat()) instead of default.
1128
+ * Required for some OpenAI-compatible providers like LiteLLM.
1129
+ * Only applies to openai-compatible protocol.
1130
+ */
1131
+ useChatCompletions?: boolean;
1132
+ }
1133
+ /**
1134
+ * Configuration for an LLM
1135
+ */
1136
+ interface LLMConfig {
1137
+ provider: LLMProvider;
1138
+ model: string;
1139
+ apiKey: string;
1140
+ }
1141
+ /**
1142
+ * Represents a tool call made by the LLM
1143
+ */
1144
+ interface ToolCall {
1145
+ toolName: string;
1146
+ arguments: Record<any, any>;
1147
+ }
1148
+ /**
1149
+ * Token usage statistics
1150
+ */
1151
+ interface TokenUsage {
1152
+ inputTokens: number;
1153
+ outputTokens: number;
1154
+ totalTokens: number;
1155
+ }
1156
+ /**
1157
+ * Latency breakdown for prompt execution
1158
+ */
1159
+ interface LatencyBreakdown {
1160
+ /** Total wall-clock time in milliseconds */
1161
+ e2eMs: number;
1162
+ /** LLM API time in milliseconds */
1163
+ llmMs: number;
1164
+ /** MCP tool execution time in milliseconds */
1165
+ mcpMs: number;
1166
+ }
1167
+ /**
1168
+ * Raw prompt result data (used internally)
1169
+ */
1170
+ interface PromptResultData {
1171
+ /** The original prompt/query that was sent */
1172
+ prompt: string;
1173
+ /** The full conversation history (user, assistant, tool messages) */
1174
+ messages: CoreMessage[];
1175
+ text: string;
1176
+ toolCalls: ToolCall[];
1177
+ usage: TokenUsage;
1178
+ latency: LatencyBreakdown;
1179
+ error?: string;
1180
+ }
1181
+
1182
+ /**
1183
+ * PromptResult class - wraps the result of a TestAgent prompt
1184
+ */
1185
+
1186
+ /**
1187
+ * Represents the result of a TestAgent prompt.
1188
+ * Provides convenient methods to inspect tool calls, token usage, and errors.
1189
+ */
1190
+ declare class PromptResult {
1191
+ /** The original prompt/query that was sent */
1192
+ readonly prompt: string;
1193
+ /** The text response from the LLM */
1194
+ readonly text: string;
1195
+ /** The full conversation history */
1196
+ private readonly _messages;
1197
+ /** Latency breakdown (e2e, llm, mcp) */
1198
+ private readonly _latency;
1199
+ /** Tool calls made during the prompt */
1200
+ private readonly _toolCalls;
1201
+ /** Token usage statistics */
1202
+ private readonly _usage;
1203
+ /** Error message if the prompt failed */
1204
+ private readonly _error?;
1205
+ /**
1206
+ * Create a new PromptResult
1207
+ * @param data - The raw prompt result data
1208
+ */
1209
+ constructor(data: PromptResultData);
1210
+ /**
1211
+ * Get the original query/prompt that was sent.
1212
+ *
1213
+ * @returns The original prompt string
1214
+ */
1215
+ getPrompt(): string;
1216
+ /**
1217
+ * Get the full conversation history (user, assistant, tool messages).
1218
+ * Returns a copy to prevent external modification.
1219
+ *
1220
+ * @returns Array of CoreMessage objects
1221
+ */
1222
+ getMessages(): CoreMessage[];
1223
+ /**
1224
+ * Get only user messages from the conversation.
1225
+ *
1226
+ * @returns Array of CoreUserMessage objects
1227
+ */
1228
+ getUserMessages(): CoreUserMessage[];
1229
+ /**
1230
+ * Get only assistant messages from the conversation.
1231
+ *
1232
+ * @returns Array of CoreAssistantMessage objects
1233
+ */
1234
+ getAssistantMessages(): CoreAssistantMessage[];
1235
+ /**
1236
+ * Get only tool result messages from the conversation.
1237
+ *
1238
+ * @returns Array of CoreToolMessage objects
1239
+ */
1240
+ getToolMessages(): CoreToolMessage[];
1241
+ /**
1242
+ * Get the end-to-end latency in milliseconds.
1243
+ * This is the total wall-clock time for the prompt.
1244
+ *
1245
+ * @returns End-to-end latency in milliseconds
1246
+ */
1247
+ e2eLatencyMs(): number;
1248
+ /**
1249
+ * Get the LLM API latency in milliseconds.
1250
+ * This is the time spent waiting for LLM responses (excluding tool execution).
1251
+ *
1252
+ * @returns LLM latency in milliseconds
1253
+ */
1254
+ llmLatencyMs(): number;
1255
+ /**
1256
+ * Get the MCP tool execution latency in milliseconds.
1257
+ * This is the time spent executing MCP tools.
1258
+ *
1259
+ * @returns MCP tool latency in milliseconds
1260
+ */
1261
+ mcpLatencyMs(): number;
1262
+ /**
1263
+ * Get the full latency breakdown.
1264
+ *
1265
+ * @returns LatencyBreakdown object with e2eMs, llmMs, and mcpMs
1266
+ */
1267
+ getLatency(): LatencyBreakdown;
1268
+ /**
1269
+ * Get the names of all tools that were called during this prompt.
1270
+ * Returns a standard string[] that can be used with .includes().
1271
+ *
1272
+ * @returns Array of tool names
1273
+ */
1274
+ toolsCalled(): string[];
1275
+ /**
1276
+ * Check if a specific tool was called during this prompt.
1277
+ * Case-sensitive exact match.
1278
+ *
1279
+ * @param toolName - The name of the tool to check for
1280
+ * @returns true if the tool was called
1281
+ */
1282
+ hasToolCall(toolName: string): boolean;
1283
+ /**
1284
+ * Get all tool calls with their arguments.
1285
+ *
1286
+ * @returns Array of ToolCall objects
1287
+ */
1288
+ getToolCalls(): ToolCall[];
1289
+ /**
1290
+ * Get the arguments passed to a specific tool call.
1291
+ * Returns undefined if the tool was not called.
1292
+ * If the tool was called multiple times, returns the first call's arguments.
1293
+ *
1294
+ * @param toolName - The name of the tool
1295
+ * @returns The arguments object or undefined
1296
+ */
1297
+ getToolArguments(toolName: string): Record<string, unknown> | undefined;
1298
+ /**
1299
+ * Get the total number of tokens used.
1300
+ *
1301
+ * @returns Total tokens (input + output)
1302
+ */
1303
+ totalTokens(): number;
1304
+ /**
1305
+ * Get the number of input tokens used.
1306
+ *
1307
+ * @returns Input token count
1308
+ */
1309
+ inputTokens(): number;
1310
+ /**
1311
+ * Get the number of output tokens used.
1312
+ *
1313
+ * @returns Output token count
1314
+ */
1315
+ outputTokens(): number;
1316
+ /**
1317
+ * Get the full token usage statistics.
1318
+ *
1319
+ * @returns TokenUsage object
1320
+ */
1321
+ getUsage(): TokenUsage;
1322
+ /**
1323
+ * Check if this prompt resulted in an error.
1324
+ *
1325
+ * @returns true if there was an error
1326
+ */
1327
+ hasError(): boolean;
1328
+ /**
1329
+ * Get the error message if the prompt failed.
1330
+ *
1331
+ * @returns The error message or undefined
1332
+ */
1333
+ getError(): string | undefined;
1334
+ /**
1335
+ * Create a PromptResult from raw data.
1336
+ * Factory method for convenience.
1337
+ *
1338
+ * @param data - The raw prompt result data
1339
+ * @returns A new PromptResult instance
1340
+ */
1341
+ static from(data: PromptResultData): PromptResult;
1342
+ /**
1343
+ * Create an error PromptResult.
1344
+ * Factory method for error cases.
1345
+ *
1346
+ * @param error - The error message
1347
+ * @param latency - The latency breakdown or e2e time in milliseconds
1348
+ * @returns A new PromptResult instance with error state
1349
+ */
1350
+ static error(error: string, latency?: LatencyBreakdown | number, prompt?: string): PromptResult;
1351
+ /**
1352
+ * Format the conversation trace as a JSON string.
1353
+ * Useful for debugging failed evaluations.
1354
+ *
1355
+ * @returns A JSON string of the conversation messages
1356
+ */
1357
+ formatTrace(): string;
1358
+ }
1359
+
1360
+ /**
1361
+ * TestAgent - Runs LLM prompts with tool calling for evals
1362
+ */
1363
+
1364
+ /**
1365
+ * Configuration for creating a TestAgent
1366
+ */
1367
+ interface TestAgentConfig {
1368
+ /** Tools to provide to the LLM (Tool[] from manager.getTools() or AiSdkTool from manager.getToolsForAiSdk()) */
1369
+ tools: Tool[] | AiSdkTool;
1370
+ /** LLM provider and model string (e.g., "openai/gpt-4o", "anthropic/claude-3-5-sonnet-20241022") */
1371
+ model: string;
1372
+ /** API key for the LLM provider */
1373
+ apiKey: string;
1374
+ /** System prompt for the LLM (default: "You are a helpful assistant.") */
1375
+ systemPrompt?: string;
1376
+ /** Temperature for LLM responses (0-2). If undefined, uses model default. Some models (e.g., reasoning models) don't support temperature. */
1377
+ temperature?: number;
1378
+ /** Maximum number of agentic steps/tool calls (default: 10) */
1379
+ maxSteps?: number;
1380
+ /** Custom providers registry for non-standard LLM providers */
1381
+ customProviders?: Map<string, CustomProvider> | Record<string, CustomProvider>;
1382
+ }
1383
+ /**
1384
+ * Options for the prompt() method
1385
+ */
1386
+ interface PromptOptions {
1387
+ /** Previous PromptResult(s) to include as conversation context for multi-turn conversations */
1388
+ context?: PromptResult | PromptResult[];
1389
+ }
1390
+ /**
1391
+ * Agent for running LLM prompts with tool calling.
1392
+ * Wraps the AI SDK generateText function with proper tool integration.
1393
+ *
1394
+ * @example
1395
+ * ```typescript
1396
+ * const manager = new MCPClientManager({
1397
+ * everything: { command: "npx", args: ["-y", "@modelcontextprotocol/server-everything"] },
1398
+ * });
1399
+ * await manager.connectToServer("everything");
1400
+ *
1401
+ * const agent = new TestAgent({
1402
+ * tools: await manager.getToolsForAiSdk(["everything"]),
1403
+ * model: "openai/gpt-4o",
1404
+ * apiKey: process.env.OPENAI_API_KEY!,
1405
+ * });
1406
+ *
1407
+ * const result = await agent.prompt("Add 2 and 3");
1408
+ * console.log(result.toolsCalled()); // ["add"]
1409
+ * console.log(result.text); // "The result of adding 2 and 3 is 5."
1410
+ * ```
1411
+ */
1412
+ declare class TestAgent {
1413
+ private readonly tools;
1414
+ private readonly model;
1415
+ private readonly apiKey;
1416
+ private systemPrompt;
1417
+ private temperature;
1418
+ private readonly maxSteps;
1419
+ private readonly customProviders?;
1420
+ /** The result of the last prompt (for toolsCalled() convenience method) */
1421
+ private lastResult;
1422
+ /** History of all prompt results during a test execution */
1423
+ private promptHistory;
1424
+ /**
1425
+ * Create a new TestAgent
1426
+ * @param config - Agent configuration
1427
+ */
1428
+ constructor(config: TestAgentConfig);
1429
+ /**
1430
+ * Create instrumented tools that track execution latency.
1431
+ * @param onLatency - Callback to report latency for each tool execution
1432
+ * @returns ToolSet with instrumented execute functions
1433
+ */
1434
+ private createInstrumentedTools;
1435
+ /**
1436
+ * Build an array of CoreMessages from previous PromptResult(s) for multi-turn context.
1437
+ * @param context - Single PromptResult or array of PromptResults to include as context
1438
+ * @returns Array of CoreMessages representing the conversation history
1439
+ */
1440
+ private buildContextMessages;
1441
+ /**
1442
+ * Run a prompt with the LLM, allowing tool calls.
1443
+ * Never throws - errors are returned in the PromptResult.
1444
+ *
1445
+ * @param message - The user message to send to the LLM
1446
+ * @param options - Optional settings including context for multi-turn conversations
1447
+ * @returns PromptResult with text response, tool calls, token usage, and latency breakdown
1448
+ *
1449
+ * @example
1450
+ * // Single-turn (default)
1451
+ * const result = await agent.prompt("Show me workspaces");
1452
+ *
1453
+ * @example
1454
+ * // Multi-turn with context
1455
+ * const r1 = await agent.prompt("Show me workspaces");
1456
+ * const r2 = await agent.prompt("Now show tasks", { context: r1 });
1457
+ *
1458
+ * @example
1459
+ * // Multi-turn with multiple context results
1460
+ * const r1 = await agent.prompt("Show workspaces");
1461
+ * const r2 = await agent.prompt("Pick the first", { context: r1 });
1462
+ * const r3 = await agent.prompt("Show tasks", { context: [r1, r2] });
1463
+ */
1464
+ prompt(message: string, options?: PromptOptions): Promise<PromptResult>;
1465
+ /**
1466
+ * Get the names of tools called in the last prompt.
1467
+ * Convenience method for quick checks in eval functions.
1468
+ *
1469
+ * @returns Array of tool names from the last prompt, or empty array if no prompt has been run
1470
+ */
1471
+ toolsCalled(): string[];
1472
+ /**
1473
+ * Create a new TestAgent with modified options.
1474
+ * Useful for creating variants for different test scenarios.
1475
+ *
1476
+ * @param options - Partial config to override
1477
+ * @returns A new TestAgent instance with the merged configuration
1478
+ */
1479
+ withOptions(options: Partial<TestAgentConfig>): TestAgent;
1480
+ /**
1481
+ * Get the configured tools
1482
+ */
1483
+ getTools(): ToolSet;
1484
+ /**
1485
+ * Get the LLM provider/model string
1486
+ */
1487
+ getModel(): string;
1488
+ /**
1489
+ * Get the API key
1490
+ */
1491
+ getApiKey(): string;
1492
+ /**
1493
+ * Get the current system prompt
1494
+ */
1495
+ getSystemPrompt(): string;
1496
+ /**
1497
+ * Set a new system prompt
1498
+ */
1499
+ setSystemPrompt(prompt: string): void;
1500
+ /**
1501
+ * Get the current temperature (undefined means model default)
1502
+ */
1503
+ getTemperature(): number | undefined;
1504
+ /**
1505
+ * Set the temperature (must be between 0 and 2)
1506
+ */
1507
+ setTemperature(temperature: number): void;
1508
+ /**
1509
+ * Get the max steps configuration
1510
+ */
1511
+ getMaxSteps(): number;
1512
+ /**
1513
+ * Get the result of the last prompt
1514
+ */
1515
+ getLastResult(): PromptResult | undefined;
1516
+ /**
1517
+ * Reset the prompt history.
1518
+ * Call this before each test iteration to clear previous results.
1519
+ */
1520
+ resetPromptHistory(): void;
1521
+ /**
1522
+ * Get the history of all prompt results since the last reset.
1523
+ * Returns a copy of the array to prevent external modification.
1524
+ */
1525
+ getPromptHistory(): PromptResult[];
1526
+ }
1527
+
1528
+ /**
1529
+ * Tool extraction utilities for AI SDK generateText results
1530
+ */
1531
+
1532
+ /**
1533
+ * Extract all tool calls from an AI SDK generateText result.
1534
+ * Collects tool calls from all steps in the agentic loop.
1535
+ *
1536
+ * @param result - The result from AI SDK's generateText
1537
+ * @returns Array of ToolCall objects with toolName and arguments
1538
+ */
1539
+ declare function extractToolCalls(result: GenerateTextResult<ToolSet, never>): ToolCall[];
1540
+ /**
1541
+ * Extract tool names from an AI SDK generateText result.
1542
+ * Convenience function that returns just the tool names.
1543
+ *
1544
+ * @param result - The result from AI SDK's generateText
1545
+ * @returns Array of tool names that were called
1546
+ */
1547
+ declare function extractToolNames(result: GenerateTextResult<ToolSet, never>): string[];
1548
+
1549
+ /**
1550
+ * Validators for matching tool calls in eval tests
1551
+ *
1552
+ * All matching is case-sensitive and uses exact strings only (no wildcards).
1553
+ */
1554
+
1555
+ /**
1556
+ * Exact match - all expected tools must be present in exact order.
1557
+ * Case-sensitive exact string comparison.
1558
+ *
1559
+ * @param expected - The expected tool names in order
1560
+ * @param actual - The actual tool names that were called
1561
+ * @returns true if actual matches expected exactly
1562
+ *
1563
+ * @example
1564
+ * matchToolCalls(['add', 'multiply'], ['add', 'multiply']) // true
1565
+ * matchToolCalls(['add', 'multiply'], ['multiply', 'add']) // false (wrong order)
1566
+ * matchToolCalls(['add'], ['add', 'multiply']) // false (extra tool)
1567
+ */
1568
+ declare function matchToolCalls(expected: string[], actual: string[]): boolean;
1569
+ /**
1570
+ * Subset match - all expected tools must be present, order doesn't matter.
1571
+ * Case-sensitive exact string comparison.
1572
+ *
1573
+ * @param expected - The expected tool names (any order)
1574
+ * @param actual - The actual tool names that were called
1575
+ * @returns true if all expected tools are present in actual
1576
+ *
1577
+ * @example
1578
+ * matchToolCallsSubset(['add', 'multiply'], ['multiply', 'add']) // true
1579
+ * matchToolCallsSubset(['add'], ['add', 'multiply']) // true
1580
+ * matchToolCallsSubset(['add', 'subtract'], ['add', 'multiply']) // false (missing subtract)
1581
+ */
1582
+ declare function matchToolCallsSubset(expected: string[], actual: string[]): boolean;
1583
+ /**
1584
+ * Any match - at least one expected tool must be present.
1585
+ * Case-sensitive exact string comparison.
1586
+ *
1587
+ * @param expected - The expected tool names (at least one must match)
1588
+ * @param actual - The actual tool names that were called
1589
+ * @returns true if at least one expected tool is present in actual
1590
+ *
1591
+ * @example
1592
+ * matchAnyToolCall(['add', 'subtract'], ['multiply', 'add']) // true
1593
+ * matchAnyToolCall(['add', 'subtract'], ['multiply', 'divide']) // false
1594
+ * matchAnyToolCall([], ['add']) // false (empty expected)
1595
+ */
1596
+ declare function matchAnyToolCall(expected: string[], actual: string[]): boolean;
1597
+ /**
1598
+ * Count match - check if a specific tool was called exactly N times.
1599
+ * Case-sensitive exact string comparison.
1600
+ *
1601
+ * @param toolName - The tool name to count
1602
+ * @param actual - The actual tool names that were called
1603
+ * @param count - The expected number of times the tool should be called
1604
+ * @returns true if the tool was called exactly count times
1605
+ *
1606
+ * @example
1607
+ * matchToolCallCount('add', ['add', 'add', 'multiply'], 2) // true
1608
+ * matchToolCallCount('add', ['add', 'multiply'], 2) // false
1609
+ */
1610
+ declare function matchToolCallCount(toolName: string, actual: string[], count: number): boolean;
1611
+ /**
1612
+ * No tools match - check that no tools were called.
1613
+ *
1614
+ * @param actual - The actual tool names that were called
1615
+ * @returns true if no tools were called
1616
+ *
1617
+ * @example
1618
+ * matchNoToolCalls([]) // true
1619
+ * matchNoToolCalls(['add']) // false
1620
+ */
1621
+ declare function matchNoToolCalls(actual: string[]): boolean;
1622
+ /**
1623
+ * Check if tool was called with exact arguments (deep equality).
1624
+ * Returns true if any call to the tool has exactly matching arguments.
1625
+ * Case-sensitive for tool names.
1626
+ *
1627
+ * @param toolName - The tool name to match
1628
+ * @param expectedArgs - The expected arguments (exact match)
1629
+ * @param toolCalls - The actual tool calls made
1630
+ * @returns true if any call to the tool has exactly matching arguments
1631
+ *
1632
+ * @example
1633
+ * matchToolCallWithArgs('add', {a: 2, b: 3}, toolCalls) // true if add({a:2, b:3}) was called
1634
+ * matchToolCallWithArgs('add', {a: 2}, [{toolName:'add', arguments:{a:2, b:3}}]) // false (extra arg)
1635
+ */
1636
+ declare function matchToolCallWithArgs(toolName: string, expectedArgs: Record<string, unknown>, toolCalls: ToolCall[]): boolean;
1637
+ /**
1638
+ * Check if tool was called with at least these arguments (partial match).
1639
+ * Allows extra arguments in the actual call.
1640
+ * Case-sensitive for tool names.
1641
+ *
1642
+ * @param toolName - The tool name to match
1643
+ * @param expectedArgs - The expected arguments (partial match)
1644
+ * @param toolCalls - The actual tool calls made
1645
+ * @returns true if any call to the tool contains all expected arguments
1646
+ *
1647
+ * @example
1648
+ * matchToolCallWithPartialArgs('add', {a: 2}, [{toolName:'add', arguments:{a:2, b:3}}]) // true
1649
+ * matchToolCallWithPartialArgs('add', {a: 2, c: 5}, [{toolName:'add', arguments:{a:2, b:3}}]) // false
1650
+ */
1651
+ declare function matchToolCallWithPartialArgs(toolName: string, expectedArgs: Record<string, unknown>, toolCalls: ToolCall[]): boolean;
1652
+ /**
1653
+ * Check if a specific argument has a specific value in any call to the tool.
1654
+ * Case-sensitive for tool names.
1655
+ *
1656
+ * @param toolName - The tool name to match
1657
+ * @param argKey - The argument key to check
1658
+ * @param expectedValue - The expected value for the argument
1659
+ * @param toolCalls - The actual tool calls made
1660
+ * @returns true if any call to the tool has the specified argument value
1661
+ *
1662
+ * @example
1663
+ * matchToolArgument('add', 'a', 2, toolCalls) // true if any add() call had a=2
1664
+ */
1665
+ declare function matchToolArgument(toolName: string, argKey: string, expectedValue: unknown, toolCalls: ToolCall[]): boolean;
1666
+ /**
1667
+ * Check if argument value matches a predicate function.
1668
+ * Useful for partial matches, type checks, or range validation.
1669
+ * Case-sensitive for tool names.
1670
+ *
1671
+ * @param toolName - The tool name to match
1672
+ * @param argKey - The argument key to check
1673
+ * @param predicate - Function that tests the argument value
1674
+ * @param toolCalls - The actual tool calls made
1675
+ * @returns true if any call to the tool has an argument value that passes the predicate
1676
+ *
1677
+ * @example
1678
+ * matchToolArgumentWith('echo', 'message', (v) => typeof v === 'string' && v.includes('hello'), toolCalls)
1679
+ * matchToolArgumentWith('add', 'a', (v) => typeof v === 'number' && v > 0, toolCalls)
1680
+ */
1681
+ declare function matchToolArgumentWith(toolName: string, argKey: string, predicate: (value: unknown) => boolean, toolCalls: ToolCall[]): boolean;
1682
+
1683
+ /**
1684
+ * Percentile calculation utilities for latency statistics
1685
+ */
1686
+ /**
1687
+ * Calculate a specific percentile from sorted values.
1688
+ *
1689
+ * @param sortedValues - Array of numbers sorted in ascending order
1690
+ * @param percentile - The percentile to calculate (0-100)
1691
+ * @returns The percentile value
1692
+ * @throws Error if array is empty or percentile is out of range
1693
+ */
1694
+ declare function calculatePercentile(sortedValues: number[], percentile: number): number;
1695
+ /**
1696
+ * Statistics for latency values
1697
+ */
1698
+ interface LatencyStats {
1699
+ /** Minimum value */
1700
+ min: number;
1701
+ /** Maximum value */
1702
+ max: number;
1703
+ /** Mean (average) value */
1704
+ mean: number;
1705
+ /** 50th percentile (median) */
1706
+ p50: number;
1707
+ /** 95th percentile */
1708
+ p95: number;
1709
+ /** Number of values */
1710
+ count: number;
1711
+ }
1712
+ /**
1713
+ * Calculate comprehensive latency statistics for a set of values.
1714
+ *
1715
+ * @param values - Array of latency values (milliseconds)
1716
+ * @returns LatencyStats object with min, max, mean, p50, p95, and count
1717
+ * @throws Error if array is empty
1718
+ */
1719
+ declare function calculateLatencyStats(values: number[]): LatencyStats;
1720
+
1721
+ /**
1722
+ * Configuration for an EvalTest
1723
+ *
1724
+ * All tests use the multi-turn pattern with a test function that receives a TestAgent.
1725
+ */
1726
+ interface EvalTestConfig {
1727
+ name: string;
1728
+ test: (agent: TestAgent) => boolean | Promise<boolean>;
1729
+ }
1730
+ /**
1731
+ * Options for running an EvalTest
1732
+ */
1733
+ interface EvalTestRunOptions {
1734
+ iterations: number;
1735
+ concurrency?: number;
1736
+ retries?: number;
1737
+ timeoutMs?: number;
1738
+ onProgress?: (completed: number, total: number) => void;
1739
+ /** Called with a failure report if any iterations fail */
1740
+ onFailure?: (report: string) => void;
1741
+ }
1742
+ /**
1743
+ * Result details for a single iteration
1744
+ */
1745
+ interface IterationResult {
1746
+ passed: boolean;
1747
+ latencies: LatencyBreakdown[];
1748
+ tokens: {
1749
+ total: number;
1750
+ input: number;
1751
+ output: number;
1752
+ };
1753
+ error?: string;
1754
+ retryCount?: number;
1755
+ /** The prompt results from this iteration */
1756
+ prompts?: PromptResult[];
1757
+ }
1758
+ /**
1759
+ * Result of running an EvalTest
1760
+ */
1761
+ interface EvalRunResult {
1762
+ iterations: number;
1763
+ successes: number;
1764
+ failures: number;
1765
+ results: boolean[];
1766
+ iterationDetails: IterationResult[];
1767
+ tokenUsage: {
1768
+ total: number;
1769
+ input: number;
1770
+ output: number;
1771
+ perIteration: {
1772
+ total: number;
1773
+ input: number;
1774
+ output: number;
1775
+ }[];
1776
+ };
1777
+ latency: {
1778
+ e2e: LatencyStats;
1779
+ llm: LatencyStats;
1780
+ mcp: LatencyStats;
1781
+ perIteration: LatencyBreakdown[];
1782
+ };
1783
+ }
1784
+ /**
1785
+ * EvalTest - Runs a single test scenario with iterations
1786
+ *
1787
+ * Can be run standalone or as part of an EvalSuite.
1788
+ *
1789
+ * @example
1790
+ * ```ts
1791
+ * const test = new EvalTest({
1792
+ * name: "addition",
1793
+ * test: async (agent) => {
1794
+ * const result = await agent.prompt("Add 2+3");
1795
+ * return result.hasToolCall("add");
1796
+ * },
1797
+ * });
1798
+ * await test.run(agent, { iterations: 30 });
1799
+ * console.log(test.accuracy()); // 0.97
1800
+ * ```
1801
+ */
1802
+ declare class EvalTest {
1803
+ private config;
1804
+ private lastRunResult;
1805
+ constructor(config: EvalTestConfig);
1806
+ /**
1807
+ * Run this test with the given agent and options
1808
+ */
1809
+ run(agent: TestAgent, options: EvalTestRunOptions): Promise<EvalRunResult>;
1810
+ private aggregateResults;
1811
+ /**
1812
+ * Get the accuracy of the last run (success rate)
1813
+ */
1814
+ accuracy(): number;
1815
+ /**
1816
+ * Get the recall (true positive rate) of the last run
1817
+ */
1818
+ recall(): number;
1819
+ /**
1820
+ * Get the precision of the last run
1821
+ */
1822
+ precision(): number;
1823
+ /**
1824
+ * Get the true positive rate (same as recall)
1825
+ */
1826
+ truePositiveRate(): number;
1827
+ /**
1828
+ * Get the false positive rate
1829
+ */
1830
+ falsePositiveRate(): number;
1831
+ /**
1832
+ * Get the average token use per iteration
1833
+ */
1834
+ averageTokenUse(): number;
1835
+ /**
1836
+ * Get the full results of the last run
1837
+ */
1838
+ getResults(): EvalRunResult | null;
1839
+ /**
1840
+ * Get the name of this test
1841
+ */
1842
+ getName(): string;
1843
+ /**
1844
+ * Get the configuration of this test
1845
+ */
1846
+ getConfig(): EvalTestConfig;
1847
+ /**
1848
+ * Get all iteration details from the last run
1849
+ */
1850
+ getAllIterations(): IterationResult[];
1851
+ /**
1852
+ * Get only the failed iterations from the last run
1853
+ */
1854
+ getFailedIterations(): IterationResult[];
1855
+ /**
1856
+ * Get only the successful iterations from the last run
1857
+ */
1858
+ getSuccessfulIterations(): IterationResult[];
1859
+ /**
1860
+ * Get a failure report with traces from all failed iterations.
1861
+ * Useful for debugging why evaluations failed.
1862
+ *
1863
+ * @returns A formatted string with failure details
1864
+ */
1865
+ getFailureReport(): string;
1866
+ }
1867
+
1868
+ /**
1869
+ * Configuration for an EvalSuite
1870
+ */
1871
+ interface EvalSuiteConfig {
1872
+ name?: string;
1873
+ }
1874
+ /**
1875
+ * Result for a single test within the suite
1876
+ */
1877
+ interface TestResult {
1878
+ name: string;
1879
+ result: EvalRunResult;
1880
+ }
1881
+ /**
1882
+ * Result of running an EvalSuite
1883
+ */
1884
+ interface EvalSuiteResult {
1885
+ tests: Map<string, EvalRunResult>;
1886
+ aggregate: {
1887
+ iterations: number;
1888
+ successes: number;
1889
+ failures: number;
1890
+ accuracy: number;
1891
+ tokenUsage: {
1892
+ total: number;
1893
+ perTest: number[];
1894
+ };
1895
+ latency: {
1896
+ e2e: LatencyStats;
1897
+ llm: LatencyStats;
1898
+ mcp: LatencyStats;
1899
+ };
1900
+ };
1901
+ }
1902
+ /**
1903
+ * EvalSuite - Groups multiple EvalTests and provides aggregate metrics
1904
+ *
1905
+ * @example
1906
+ * ```ts
1907
+ * const suite = new EvalSuite({ name: "Math" });
1908
+ * suite.add(new EvalTest({
1909
+ * name: "addition",
1910
+ * test: async (agent) => {
1911
+ * const r = await agent.prompt("Add 2+3");
1912
+ * return r.hasToolCall("add");
1913
+ * },
1914
+ * }));
1915
+ * suite.add(new EvalTest({
1916
+ * name: "multiply",
1917
+ * test: async (agent) => {
1918
+ * const r = await agent.prompt("Multiply 4*5");
1919
+ * return r.hasToolCall("multiply");
1920
+ * },
1921
+ * }));
1922
+ *
1923
+ * await suite.run(agent, { iterations: 30 });
1924
+ * console.log(suite.accuracy()); // Aggregate: 0.95
1925
+ * console.log(suite.get("addition").accuracy()); // Individual: 0.97
1926
+ * ```
1927
+ */
1928
+ declare class EvalSuite {
1929
+ private name;
1930
+ private tests;
1931
+ private lastRunResult;
1932
+ constructor(config?: EvalSuiteConfig);
1933
+ /**
1934
+ * Add a test to the suite
1935
+ */
1936
+ add(test: EvalTest): void;
1937
+ /**
1938
+ * Get a test by name
1939
+ */
1940
+ get(name: string): EvalTest | undefined;
1941
+ /**
1942
+ * Get all tests in the suite
1943
+ */
1944
+ getAll(): EvalTest[];
1945
+ /**
1946
+ * Run all tests in the suite with the given agent and options
1947
+ */
1948
+ run(agent: TestAgent, options: EvalTestRunOptions): Promise<EvalSuiteResult>;
1949
+ private aggregateResults;
1950
+ /**
1951
+ * Get the aggregate accuracy across all tests
1952
+ */
1953
+ accuracy(): number;
1954
+ /**
1955
+ * Get the aggregate recall (same as accuracy in basic context)
1956
+ */
1957
+ recall(): number;
1958
+ /**
1959
+ * Get the aggregate precision (same as accuracy in basic context)
1960
+ */
1961
+ precision(): number;
1962
+ /**
1963
+ * Get the aggregate true positive rate (same as recall)
1964
+ */
1965
+ truePositiveRate(): number;
1966
+ /**
1967
+ * Get the aggregate false positive rate
1968
+ */
1969
+ falsePositiveRate(): number;
1970
+ /**
1971
+ * Get the average token use per iteration across all tests
1972
+ */
1973
+ averageTokenUse(): number;
1974
+ /**
1975
+ * Get the full suite results
1976
+ */
1977
+ getResults(): EvalSuiteResult | null;
1978
+ /**
1979
+ * Get the name of the suite
1980
+ */
1981
+ getName(): string;
1982
+ /**
1983
+ * Get the number of tests in the suite
1984
+ */
1985
+ size(): number;
1986
+ }
1987
+
1988
+ /**
1989
+ * Model factory for creating AI SDK language models from provider/model strings.
1990
+ * Supports both built-in providers and user-defined custom providers.
1991
+ */
1992
+
1993
+ /**
1994
+ * Custom base URLs for built-in providers that support them.
1995
+ */
1996
+ interface BaseUrls {
1997
+ ollama?: string;
1998
+ azure?: string;
1999
+ anthropic?: string;
2000
+ openai?: string;
2001
+ }
2002
+ /**
2003
+ * Options for creating a model.
2004
+ */
2005
+ interface CreateModelOptions {
2006
+ apiKey: string;
2007
+ baseUrls?: BaseUrls;
2008
+ /** Custom providers registry (name -> config) */
2009
+ customProviders?: Map<string, CustomProvider> | Record<string, CustomProvider>;
2010
+ }
2011
+ /**
2012
+ * Result of parsing an LLM string
2013
+ */
2014
+ type ParsedLLMString = {
2015
+ type: "builtin";
2016
+ provider: LLMProvider;
2017
+ model: string;
2018
+ } | {
2019
+ type: "custom";
2020
+ providerName: string;
2021
+ model: string;
2022
+ };
2023
+ /**
2024
+ * Parse an LLM string into provider and model components.
2025
+ * Supports both built-in providers and custom provider names.
2026
+ *
2027
+ * @param llmString - String in format "provider/model" (e.g., "openai/gpt-4o" or "my-litellm/gpt-4")
2028
+ * @param customProviderNames - Optional set of registered custom provider names for validation
2029
+ * @returns Parsed result with type discriminator
2030
+ */
2031
+ declare function parseLLMString(llmString: string, customProviderNames?: Set<string>): ParsedLLMString;
2032
+ /**
2033
+ * Model type returned by provider factories.
2034
+ */
2035
+ type ProviderLanguageModel = ReturnType<ReturnType<typeof createOpenAI>>;
2036
+ /**
2037
+ * Create a language model from an LLM string.
2038
+ * @param llmString - String in format "provider/model" (e.g., "openai/gpt-4o" or "my-provider/model")
2039
+ * @param options - API key, optional base URLs, and custom providers registry
2040
+ * @returns AI SDK language model instance
2041
+ */
2042
+ declare function createModelFromString(llmString: string, options: CreateModelOptions): ProviderLanguageModel;
2043
+ /**
2044
+ * Parse a comma-separated string of model IDs into an array.
2045
+ * Handles whitespace and empty entries.
2046
+ */
2047
+ declare function parseModelIds(modelIdsString: string): string[];
2048
+ /**
2049
+ * Create a CustomProvider configuration from user input.
2050
+ * This is a helper for building the configuration from form inputs.
2051
+ */
2052
+ declare function createCustomProvider(config: {
2053
+ name: string;
2054
+ protocol: "openai-compatible" | "anthropic-compatible";
2055
+ baseUrl: string;
2056
+ modelIds: string | string[];
2057
+ apiKey?: string;
2058
+ apiKeyEnvVar?: string;
2059
+ useChatCompletions?: boolean;
2060
+ }): CustomProvider;
2061
+ /**
2062
+ * Preset configurations for common OpenAI-compatible providers.
2063
+ * Users can use these as starting points and customize as needed.
2064
+ */
2065
+ declare const PROVIDER_PRESETS: {
2066
+ /** LiteLLM proxy - requires useChatCompletions */
2067
+ readonly litellm: (baseUrl: string | undefined, modelIds: string[]) => CustomProvider;
2068
+ };
2069
+
2070
+ export { type AiSdkTool, type BaseServerConfig, type BaseUrls, type CallToolExecutor, type ClientCapabilityOptions, type CompatibleProtocol, type ConvertedToolSet, type CreateModelOptions, type CustomProvider, type ElicitationCallback, type ElicitationCallbackRequest, type ElicitationHandler, type EvalRunResult, EvalSuite, type EvalSuiteConfig, type EvalSuiteResult, EvalTest, type EvalTestConfig, type EvalTestRunOptions, type ExecuteToolArguments, type HttpServerConfig, type IterationResult, type LLMConfig, type LLMProvider, type LatencyBreakdown, type LatencyStats, type ListToolsResult, MCPAuthError, MCPClientManager, type MCPClientManagerConfig, type MCPClientManagerOptions, type MCPConnectionStatus, MCPError, type MCPGetPromptResult, type MCPListTasksResult, type MCPPrompt, type MCPPromptListResult, type MCPReadResourceResult, type MCPResource, type MCPResourceListResult, type MCPResourceTemplate, type MCPResourceTemplateListResult, type MCPServerConfig, type MCPServerSummary, type MCPTask, type MCPTaskStatus, PROVIDER_PRESETS, type ParsedLLMString, type ProgressEvent, type ProgressHandler, type PromptOptions, PromptResult, type PromptResultData, type ProviderLanguageModel, type RpcLogEvent, type RpcLogger, type ServerSummary, type StdioServerConfig, type TaskOptions, TestAgent, type TestAgentConfig, type TestResult, type TokenUsage, type Tool, type ToolCall, type ToolExecuteOptions, type ToolSchemaOverrides, buildRequestInit, calculateLatencyStats, calculatePercentile, convertMCPToolsToVercelTools, createCustomProvider, createModelFromString, ensureJsonSchemaObject, extractToolCalls, extractToolNames, formatError, isAuthError, isMCPAuthError, isMethodUnavailableError, matchAnyToolCall, matchNoToolCalls, matchToolArgument, matchToolArgumentWith, matchToolCallCount, matchToolCallWithArgs, matchToolCallWithPartialArgs, matchToolCalls, matchToolCallsSubset, parseLLMString, parseModelIds, supportsTasksCancel, supportsTasksForToolCalls, supportsTasksList };