@agentforge/testing 0.16.61 → 0.16.63
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.cts +800 -0
- package/dist/index.d.ts +800 -0
- package/package.json +3 -3
package/dist/index.d.cts
ADDED
|
@@ -0,0 +1,800 @@
|
|
|
1
|
+
import { BaseChatModel } from '@langchain/core/language_models/chat_models';
|
|
2
|
+
import { BaseMessage } from '@langchain/core/messages';
|
|
3
|
+
import { ChatResult } from '@langchain/core/outputs';
|
|
4
|
+
import { CallbackManagerForLLMRun } from '@langchain/core/callbacks/manager';
|
|
5
|
+
import { z } from 'zod';
|
|
6
|
+
import { Tool, ToolCategory, Logger } from '@agentforge/core';
|
|
7
|
+
|
|
8
|
+
/**
|
|
9
|
+
* Configuration for MockLLM
|
|
10
|
+
*/
|
|
11
|
+
interface MockLLMConfig {
|
|
12
|
+
/**
|
|
13
|
+
* Predefined responses to return
|
|
14
|
+
*/
|
|
15
|
+
responses?: string[];
|
|
16
|
+
/**
|
|
17
|
+
* Response generator function
|
|
18
|
+
*/
|
|
19
|
+
responseGenerator?: (messages: BaseMessage[]) => string;
|
|
20
|
+
/**
|
|
21
|
+
* Delay in milliseconds before responding
|
|
22
|
+
*/
|
|
23
|
+
delay?: number;
|
|
24
|
+
/**
|
|
25
|
+
* Whether to throw an error
|
|
26
|
+
*/
|
|
27
|
+
shouldError?: boolean;
|
|
28
|
+
/**
|
|
29
|
+
* Error message to throw
|
|
30
|
+
*/
|
|
31
|
+
errorMessage?: string;
|
|
32
|
+
/**
|
|
33
|
+
* Model name to report
|
|
34
|
+
*/
|
|
35
|
+
modelName?: string;
|
|
36
|
+
}
|
|
37
|
+
/**
|
|
38
|
+
* Mock LLM for testing
|
|
39
|
+
*
|
|
40
|
+
* @example
|
|
41
|
+
* ```typescript
|
|
42
|
+
* const llm = createMockLLM({
|
|
43
|
+
* responses: ['Hello!', 'How can I help?']
|
|
44
|
+
* });
|
|
45
|
+
*
|
|
46
|
+
* const result = await llm.invoke([new HumanMessage('Hi')]);
|
|
47
|
+
* console.log(result.content); // 'Hello!'
|
|
48
|
+
* ```
|
|
49
|
+
*/
|
|
50
|
+
declare class MockLLM extends BaseChatModel {
|
|
51
|
+
private responses;
|
|
52
|
+
private responseGenerator?;
|
|
53
|
+
private delay;
|
|
54
|
+
private shouldError;
|
|
55
|
+
private errorMessage;
|
|
56
|
+
private callCount;
|
|
57
|
+
_llmType(): string;
|
|
58
|
+
constructor(config?: MockLLMConfig);
|
|
59
|
+
_generate(messages: BaseMessage[], options?: this['ParsedCallOptions'], runManager?: CallbackManagerForLLMRun): Promise<ChatResult>;
|
|
60
|
+
/**
|
|
61
|
+
* Get the number of times the LLM has been called
|
|
62
|
+
*/
|
|
63
|
+
getCallCount(): number;
|
|
64
|
+
/**
|
|
65
|
+
* Reset the call count
|
|
66
|
+
*/
|
|
67
|
+
resetCallCount(): void;
|
|
68
|
+
}
|
|
69
|
+
/**
|
|
70
|
+
* Create a mock LLM for testing
|
|
71
|
+
*/
|
|
72
|
+
declare function createMockLLM(config?: MockLLMConfig): MockLLM;
|
|
73
|
+
/**
|
|
74
|
+
* Create a mock LLM that echoes the last message
|
|
75
|
+
*/
|
|
76
|
+
declare function createEchoLLM(): MockLLM;
|
|
77
|
+
/**
|
|
78
|
+
* Create a mock LLM that always errors
|
|
79
|
+
*/
|
|
80
|
+
declare function createErrorLLM(errorMessage?: string): MockLLM;
|
|
81
|
+
|
|
82
|
+
declare const defaultMockToolSchema: z.ZodObject<{
|
|
83
|
+
input: z.ZodString;
|
|
84
|
+
}, "strip", z.ZodTypeAny, {
|
|
85
|
+
input: string;
|
|
86
|
+
}, {
|
|
87
|
+
input: string;
|
|
88
|
+
}>;
|
|
89
|
+
type DefaultMockToolSchema = typeof defaultMockToolSchema;
|
|
90
|
+
type MockToolSchema = z.ZodTypeAny;
|
|
91
|
+
type MockToolInput<TSchema extends MockToolSchema> = z.infer<TSchema>;
|
|
92
|
+
type MockToolInstance<TSchema extends MockToolSchema> = Tool<MockToolInput<TSchema>, string>;
|
|
93
|
+
type MockToolConfigWithoutSchema = Omit<MockToolConfig<DefaultMockToolSchema>, 'schema'> & {
|
|
94
|
+
schema?: undefined;
|
|
95
|
+
};
|
|
96
|
+
type SchemaBackedMockToolConfig<TSchema extends MockToolSchema> = MockToolConfig<TSchema> & {
|
|
97
|
+
schema: TSchema;
|
|
98
|
+
};
|
|
99
|
+
/**
|
|
100
|
+
* Configuration for mock tool
|
|
101
|
+
*/
|
|
102
|
+
interface MockToolConfig<TSchema extends MockToolSchema = DefaultMockToolSchema> {
|
|
103
|
+
/**
|
|
104
|
+
* Tool name
|
|
105
|
+
*/
|
|
106
|
+
name?: string;
|
|
107
|
+
/**
|
|
108
|
+
* Tool description
|
|
109
|
+
*/
|
|
110
|
+
description?: string;
|
|
111
|
+
/**
|
|
112
|
+
* Tool category
|
|
113
|
+
*/
|
|
114
|
+
category?: ToolCategory;
|
|
115
|
+
/**
|
|
116
|
+
* Input schema
|
|
117
|
+
*/
|
|
118
|
+
schema?: TSchema;
|
|
119
|
+
/**
|
|
120
|
+
* Implementation function
|
|
121
|
+
*/
|
|
122
|
+
implementation?: (input: MockToolInput<TSchema>) => Promise<string> | string;
|
|
123
|
+
/**
|
|
124
|
+
* Whether to throw an error
|
|
125
|
+
*/
|
|
126
|
+
shouldError?: boolean;
|
|
127
|
+
/**
|
|
128
|
+
* Error message to throw
|
|
129
|
+
*/
|
|
130
|
+
errorMessage?: string;
|
|
131
|
+
/**
|
|
132
|
+
* Delay in milliseconds before responding
|
|
133
|
+
*/
|
|
134
|
+
delay?: number;
|
|
135
|
+
}
|
|
136
|
+
declare function createMockTool(): MockToolInstance<DefaultMockToolSchema>;
|
|
137
|
+
declare function createMockTool(config: MockToolConfigWithoutSchema): MockToolInstance<DefaultMockToolSchema>;
|
|
138
|
+
declare function createMockTool<TSchema extends MockToolSchema>(config: SchemaBackedMockToolConfig<TSchema>): MockToolInstance<TSchema>;
|
|
139
|
+
/**
|
|
140
|
+
* Create a mock tool that echoes its input
|
|
141
|
+
*/
|
|
142
|
+
declare function createEchoTool(name?: string): MockToolInstance<z.ZodObject<{
|
|
143
|
+
message: z.ZodString;
|
|
144
|
+
}, "strip", z.ZodTypeAny, {
|
|
145
|
+
message: string;
|
|
146
|
+
}, {
|
|
147
|
+
message: string;
|
|
148
|
+
}>>;
|
|
149
|
+
/**
|
|
150
|
+
* Create a mock tool that always errors
|
|
151
|
+
*/
|
|
152
|
+
declare function createErrorTool(name?: string, errorMessage?: string): MockToolInstance<z.ZodObject<{
|
|
153
|
+
input: z.ZodString;
|
|
154
|
+
}, "strip", z.ZodTypeAny, {
|
|
155
|
+
input: string;
|
|
156
|
+
}, {
|
|
157
|
+
input: string;
|
|
158
|
+
}>>;
|
|
159
|
+
/**
|
|
160
|
+
* Create a mock tool with delay
|
|
161
|
+
*/
|
|
162
|
+
declare function createDelayedTool(name?: string, delay?: number): MockToolInstance<z.ZodObject<{
|
|
163
|
+
input: z.ZodString;
|
|
164
|
+
}, "strip", z.ZodTypeAny, {
|
|
165
|
+
input: string;
|
|
166
|
+
}, {
|
|
167
|
+
input: string;
|
|
168
|
+
}>>;
|
|
169
|
+
/**
|
|
170
|
+
* Create a calculator mock tool
|
|
171
|
+
*/
|
|
172
|
+
declare function createCalculatorTool(): MockToolInstance<z.ZodObject<{
|
|
173
|
+
operation: z.ZodEnum<["add", "subtract", "multiply", "divide"]>;
|
|
174
|
+
a: z.ZodNumber;
|
|
175
|
+
b: z.ZodNumber;
|
|
176
|
+
}, "strip", z.ZodTypeAny, {
|
|
177
|
+
operation: "add" | "subtract" | "multiply" | "divide";
|
|
178
|
+
a: number;
|
|
179
|
+
b: number;
|
|
180
|
+
}, {
|
|
181
|
+
operation: "add" | "subtract" | "multiply" | "divide";
|
|
182
|
+
a: number;
|
|
183
|
+
b: number;
|
|
184
|
+
}>>;
|
|
185
|
+
|
|
186
|
+
type StateBuilderFields = Record<string, unknown> & {
|
|
187
|
+
messages?: BaseMessage[];
|
|
188
|
+
};
|
|
189
|
+
type MessageState = {
|
|
190
|
+
messages: BaseMessage[];
|
|
191
|
+
};
|
|
192
|
+
type BuiltState<TState extends StateBuilderFields> = TState & Partial<MessageState>;
|
|
193
|
+
interface TestToolCall<TArgs = unknown> {
|
|
194
|
+
name: string;
|
|
195
|
+
args: TArgs;
|
|
196
|
+
}
|
|
197
|
+
interface TestToolResult<TResult = string> {
|
|
198
|
+
name: string;
|
|
199
|
+
result: TResult;
|
|
200
|
+
}
|
|
201
|
+
interface ReActTestState<TArgs = unknown, TResult = string> {
|
|
202
|
+
messages: BaseMessage[];
|
|
203
|
+
thoughts: string[];
|
|
204
|
+
toolCalls: Array<TestToolCall<TArgs>>;
|
|
205
|
+
toolResults: Array<TestToolResult<TResult>>;
|
|
206
|
+
scratchpad: string[];
|
|
207
|
+
iterations: number;
|
|
208
|
+
maxIterations: number;
|
|
209
|
+
}
|
|
210
|
+
interface PlanningStep<TStatus extends string = string> {
|
|
211
|
+
step: string;
|
|
212
|
+
status: TStatus;
|
|
213
|
+
}
|
|
214
|
+
interface PlanningTestState<TResultMap extends Record<string, unknown> = Record<string, unknown>, TStatus extends string = string> extends StateBuilderFields {
|
|
215
|
+
messages: BaseMessage[];
|
|
216
|
+
plan: Array<PlanningStep<TStatus>>;
|
|
217
|
+
currentStep: number;
|
|
218
|
+
results: Partial<TResultMap>;
|
|
219
|
+
}
|
|
220
|
+
/**
|
|
221
|
+
* Builder for creating test states
|
|
222
|
+
*/
|
|
223
|
+
declare class StateBuilder<TState extends StateBuilderFields = StateBuilderFields> {
|
|
224
|
+
private state;
|
|
225
|
+
/**
|
|
226
|
+
* Set a field in the state
|
|
227
|
+
*/
|
|
228
|
+
set<K extends keyof TState>(key: K, value: TState[K]): this;
|
|
229
|
+
/**
|
|
230
|
+
* Set multiple fields in the state
|
|
231
|
+
*/
|
|
232
|
+
setMany(fields: Partial<TState>): this;
|
|
233
|
+
private ensureMessages;
|
|
234
|
+
/**
|
|
235
|
+
* Add a message to the messages array
|
|
236
|
+
*/
|
|
237
|
+
addMessage(message: BaseMessage): this;
|
|
238
|
+
/**
|
|
239
|
+
* Add multiple messages
|
|
240
|
+
*/
|
|
241
|
+
addMessages(messages: ReadonlyArray<BaseMessage>): this;
|
|
242
|
+
/**
|
|
243
|
+
* Add a human message
|
|
244
|
+
*/
|
|
245
|
+
addHumanMessage(content: string): this;
|
|
246
|
+
/**
|
|
247
|
+
* Add an AI message
|
|
248
|
+
*/
|
|
249
|
+
addAIMessage(content: string): this;
|
|
250
|
+
/**
|
|
251
|
+
* Add a system message
|
|
252
|
+
*/
|
|
253
|
+
addSystemMessage(content: string): this;
|
|
254
|
+
/**
|
|
255
|
+
* Build the state
|
|
256
|
+
*/
|
|
257
|
+
build(): BuiltState<TState>;
|
|
258
|
+
/**
|
|
259
|
+
* Reset the builder
|
|
260
|
+
*/
|
|
261
|
+
reset(): this;
|
|
262
|
+
}
|
|
263
|
+
/**
|
|
264
|
+
* Create a state builder
|
|
265
|
+
*/
|
|
266
|
+
declare function createStateBuilder<TState extends StateBuilderFields = StateBuilderFields>(): StateBuilder<TState>;
|
|
267
|
+
/**
|
|
268
|
+
* Create a simple conversation state
|
|
269
|
+
*/
|
|
270
|
+
declare function createConversationState(messages: ReadonlyArray<string>): MessageState;
|
|
271
|
+
/**
|
|
272
|
+
* Create a ReAct agent state
|
|
273
|
+
*/
|
|
274
|
+
declare function createReActState<TArgs = unknown, TResult = string>(config?: Partial<ReActTestState<TArgs, TResult>>): ReActTestState<TArgs, TResult>;
|
|
275
|
+
/**
|
|
276
|
+
* Create a planning agent state
|
|
277
|
+
*/
|
|
278
|
+
declare function createPlanningState<TResultMap extends Record<string, unknown> = Record<string, unknown>, TStatus extends string = string>(config?: Partial<PlanningTestState<TResultMap, TStatus>>): PlanningTestState<TResultMap, TStatus>;
|
|
279
|
+
|
|
280
|
+
type ToolCall<TArgs = unknown> = {
|
|
281
|
+
name: string;
|
|
282
|
+
args: TArgs;
|
|
283
|
+
};
|
|
284
|
+
type MessageLike<TType extends string = string> = {
|
|
285
|
+
content: unknown;
|
|
286
|
+
_getType: () => TType;
|
|
287
|
+
};
|
|
288
|
+
type AssertedMessage<TType extends string = string> = BaseMessage | MessageLike<TType>;
|
|
289
|
+
/**
|
|
290
|
+
* Assert that a value is a message of a specific type
|
|
291
|
+
*/
|
|
292
|
+
declare function assertIsMessage(value: unknown): asserts value is AssertedMessage;
|
|
293
|
+
declare function assertIsMessage(value: unknown, type: 'human'): asserts value is AssertedMessage<'human'>;
|
|
294
|
+
declare function assertIsMessage(value: unknown, type: 'ai'): asserts value is AssertedMessage<'ai'>;
|
|
295
|
+
declare function assertIsMessage(value: unknown, type: 'system'): asserts value is AssertedMessage<'system'>;
|
|
296
|
+
declare function assertIsMessage(value: unknown, type: 'tool'): asserts value is AssertedMessage<'tool'>;
|
|
297
|
+
/**
|
|
298
|
+
* Assert that messages array contains a message with specific content
|
|
299
|
+
*/
|
|
300
|
+
declare function assertMessageContains(messages: BaseMessage[], content: string): void;
|
|
301
|
+
/**
|
|
302
|
+
* Assert that the last message contains specific content
|
|
303
|
+
*/
|
|
304
|
+
declare function assertLastMessageContains(messages: BaseMessage[], content: string): void;
|
|
305
|
+
/**
|
|
306
|
+
* Assert that state has required fields
|
|
307
|
+
*/
|
|
308
|
+
declare function assertStateHasFields<TState extends object>(state: TState, fields: ReadonlyArray<keyof TState & (string | number)>): void;
|
|
309
|
+
/**
|
|
310
|
+
* Assert that a tool was called with specific arguments
|
|
311
|
+
*/
|
|
312
|
+
declare function assertToolCalled(toolCalls: ReadonlyArray<ToolCall>, toolName: string): void;
|
|
313
|
+
declare function assertToolCalled<TArgs extends Record<string, unknown>>(toolCalls: ReadonlyArray<ToolCall<TArgs>>, toolName: string, args: Partial<TArgs>): void;
|
|
314
|
+
/**
|
|
315
|
+
* Assert that execution completed within time limit
|
|
316
|
+
*/
|
|
317
|
+
declare function assertCompletesWithin(fn: () => Promise<unknown>, maxMs: number): Promise<void>;
|
|
318
|
+
/**
|
|
319
|
+
* Assert that a function throws an error with specific message
|
|
320
|
+
*/
|
|
321
|
+
declare function assertThrowsWithMessage(fn: () => Promise<unknown>, message: string): Promise<void>;
|
|
322
|
+
/**
|
|
323
|
+
* Assert that state matches a snapshot
|
|
324
|
+
*/
|
|
325
|
+
declare function assertStateSnapshot<TState extends object>(state: TState, snapshot: Partial<TState>): void;
|
|
326
|
+
/**
|
|
327
|
+
* Assert that messages have alternating types (human, ai, human, ai, ...)
|
|
328
|
+
*/
|
|
329
|
+
declare function assertAlternatingMessages(messages: BaseMessage[]): void;
|
|
330
|
+
/**
|
|
331
|
+
* Assert that an array is not empty
|
|
332
|
+
*/
|
|
333
|
+
declare function assertNotEmpty<T>(array: readonly T[]): void;
|
|
334
|
+
/**
|
|
335
|
+
* Assert that a value is within a range
|
|
336
|
+
*/
|
|
337
|
+
declare function assertInRange(value: number, min: number, max: number): void;
|
|
338
|
+
/**
|
|
339
|
+
* Assert that agent iterations are within limit
|
|
340
|
+
*/
|
|
341
|
+
declare function assertIterationsWithinLimit(iterations: number, maxIterations: number): void;
|
|
342
|
+
/**
|
|
343
|
+
* Assert that a result contains expected keys
|
|
344
|
+
*/
|
|
345
|
+
declare function assertHasKeys<TObject extends object>(obj: TObject, keys: ReadonlyArray<keyof TObject & string>): void;
|
|
346
|
+
|
|
347
|
+
/**
|
|
348
|
+
* Sample conversation: Simple greeting
|
|
349
|
+
*/
|
|
350
|
+
declare const simpleGreeting: BaseMessage[];
|
|
351
|
+
/**
|
|
352
|
+
* Sample conversation: Multi-turn conversation
|
|
353
|
+
*/
|
|
354
|
+
declare const multiTurnConversation: BaseMessage[];
|
|
355
|
+
/**
|
|
356
|
+
* Sample conversation: Tool usage
|
|
357
|
+
*/
|
|
358
|
+
declare const toolUsageConversation: BaseMessage[];
|
|
359
|
+
/**
|
|
360
|
+
* Sample conversation: Error handling
|
|
361
|
+
*/
|
|
362
|
+
declare const errorHandlingConversation: BaseMessage[];
|
|
363
|
+
/**
|
|
364
|
+
* Sample conversation: Complex reasoning
|
|
365
|
+
*/
|
|
366
|
+
declare const complexReasoningConversation: BaseMessage[];
|
|
367
|
+
/**
|
|
368
|
+
* Sample conversation: Long context
|
|
369
|
+
*/
|
|
370
|
+
declare const longContextConversation: BaseMessage[];
|
|
371
|
+
/**
|
|
372
|
+
* Create a custom conversation
|
|
373
|
+
*/
|
|
374
|
+
declare function createConversation(exchanges: Array<{
|
|
375
|
+
human: string;
|
|
376
|
+
ai: string;
|
|
377
|
+
}>): BaseMessage[];
|
|
378
|
+
/**
|
|
379
|
+
* Create a conversation with system message
|
|
380
|
+
*/
|
|
381
|
+
declare function createConversationWithSystem(systemPrompt: string, exchanges: Array<{
|
|
382
|
+
human: string;
|
|
383
|
+
ai: string;
|
|
384
|
+
}>): BaseMessage[];
|
|
385
|
+
/**
|
|
386
|
+
* Sample data for testing
|
|
387
|
+
*/
|
|
388
|
+
declare const sampleData: {
|
|
389
|
+
/**
|
|
390
|
+
* Sample user inputs
|
|
391
|
+
*/
|
|
392
|
+
userInputs: string[];
|
|
393
|
+
/**
|
|
394
|
+
* Sample AI responses
|
|
395
|
+
*/
|
|
396
|
+
aiResponses: string[];
|
|
397
|
+
/**
|
|
398
|
+
* Sample tool calls
|
|
399
|
+
*/
|
|
400
|
+
toolCalls: ({
|
|
401
|
+
name: string;
|
|
402
|
+
args: {
|
|
403
|
+
operation: string;
|
|
404
|
+
a: number;
|
|
405
|
+
b: number;
|
|
406
|
+
query?: undefined;
|
|
407
|
+
};
|
|
408
|
+
} | {
|
|
409
|
+
name: string;
|
|
410
|
+
args: {
|
|
411
|
+
query: string;
|
|
412
|
+
operation?: undefined;
|
|
413
|
+
a?: undefined;
|
|
414
|
+
b?: undefined;
|
|
415
|
+
};
|
|
416
|
+
} | {
|
|
417
|
+
name: string;
|
|
418
|
+
args: {
|
|
419
|
+
operation?: undefined;
|
|
420
|
+
a?: undefined;
|
|
421
|
+
b?: undefined;
|
|
422
|
+
query?: undefined;
|
|
423
|
+
};
|
|
424
|
+
})[];
|
|
425
|
+
/**
|
|
426
|
+
* Sample tool results
|
|
427
|
+
*/
|
|
428
|
+
toolResults: {
|
|
429
|
+
name: string;
|
|
430
|
+
result: string;
|
|
431
|
+
}[];
|
|
432
|
+
};
|
|
433
|
+
|
|
434
|
+
/**
|
|
435
|
+
* Sample calculator tool
|
|
436
|
+
*/
|
|
437
|
+
declare const calculatorTool: Tool<{
|
|
438
|
+
operation: "add" | "subtract" | "multiply" | "divide";
|
|
439
|
+
a: number;
|
|
440
|
+
b: number;
|
|
441
|
+
}, string>;
|
|
442
|
+
/**
|
|
443
|
+
* Sample search tool
|
|
444
|
+
*/
|
|
445
|
+
declare const searchTool: Tool<{
|
|
446
|
+
query: string;
|
|
447
|
+
limit?: number | undefined;
|
|
448
|
+
}, string>;
|
|
449
|
+
/**
|
|
450
|
+
* Sample time tool
|
|
451
|
+
*/
|
|
452
|
+
declare const timeTool: Tool<{
|
|
453
|
+
_dummy?: string | undefined;
|
|
454
|
+
}, string>;
|
|
455
|
+
/**
|
|
456
|
+
* Sample weather tool
|
|
457
|
+
*/
|
|
458
|
+
declare const weatherTool: Tool<{
|
|
459
|
+
location: string;
|
|
460
|
+
units?: "celsius" | "fahrenheit" | undefined;
|
|
461
|
+
}, string>;
|
|
462
|
+
/**
|
|
463
|
+
* Sample file reader tool
|
|
464
|
+
*/
|
|
465
|
+
declare const fileReaderTool: Tool<{
|
|
466
|
+
path: string;
|
|
467
|
+
}, string>;
|
|
468
|
+
/**
|
|
469
|
+
* Sample database query tool
|
|
470
|
+
*/
|
|
471
|
+
declare const databaseQueryTool: Tool<{
|
|
472
|
+
query: string;
|
|
473
|
+
}, string>;
|
|
474
|
+
/**
|
|
475
|
+
* All sample tools
|
|
476
|
+
*/
|
|
477
|
+
declare const sampleTools: (Tool<{
|
|
478
|
+
operation: "add" | "subtract" | "multiply" | "divide";
|
|
479
|
+
a: number;
|
|
480
|
+
b: number;
|
|
481
|
+
}, string> | Tool<{
|
|
482
|
+
query: string;
|
|
483
|
+
limit?: number | undefined;
|
|
484
|
+
}, string> | Tool<{
|
|
485
|
+
_dummy?: string | undefined;
|
|
486
|
+
}, string> | Tool<{
|
|
487
|
+
location: string;
|
|
488
|
+
units?: "celsius" | "fahrenheit" | undefined;
|
|
489
|
+
}, string> | Tool<{
|
|
490
|
+
path: string;
|
|
491
|
+
}, string> | Tool<{
|
|
492
|
+
query: string;
|
|
493
|
+
}, string>)[];
|
|
494
|
+
/**
|
|
495
|
+
* Get tools by category
|
|
496
|
+
*/
|
|
497
|
+
declare function getToolsByCategory(category: ToolCategory): (Tool<{
|
|
498
|
+
operation: "add" | "subtract" | "multiply" | "divide";
|
|
499
|
+
a: number;
|
|
500
|
+
b: number;
|
|
501
|
+
}, string> | Tool<{
|
|
502
|
+
query: string;
|
|
503
|
+
limit?: number | undefined;
|
|
504
|
+
}, string> | Tool<{
|
|
505
|
+
_dummy?: string | undefined;
|
|
506
|
+
}, string> | Tool<{
|
|
507
|
+
location: string;
|
|
508
|
+
units?: "celsius" | "fahrenheit" | undefined;
|
|
509
|
+
}, string> | Tool<{
|
|
510
|
+
path: string;
|
|
511
|
+
}, string> | Tool<{
|
|
512
|
+
query: string;
|
|
513
|
+
}, string>)[];
|
|
514
|
+
/**
|
|
515
|
+
* Get tool by name
|
|
516
|
+
*/
|
|
517
|
+
declare function getToolByName(name: string): Tool<{
|
|
518
|
+
operation: "add" | "subtract" | "multiply" | "divide";
|
|
519
|
+
a: number;
|
|
520
|
+
b: number;
|
|
521
|
+
}, string> | Tool<{
|
|
522
|
+
query: string;
|
|
523
|
+
limit?: number | undefined;
|
|
524
|
+
}, string> | Tool<{
|
|
525
|
+
_dummy?: string | undefined;
|
|
526
|
+
}, string> | Tool<{
|
|
527
|
+
location: string;
|
|
528
|
+
units?: "celsius" | "fahrenheit" | undefined;
|
|
529
|
+
}, string> | Tool<{
|
|
530
|
+
path: string;
|
|
531
|
+
}, string> | Tool<{
|
|
532
|
+
query: string;
|
|
533
|
+
}, string> | undefined;
|
|
534
|
+
|
|
535
|
+
/**
|
|
536
|
+
* Agent-like contract used by the test runner.
|
|
537
|
+
*/
|
|
538
|
+
interface AgentTestAgent<TInput = unknown, TState = unknown> {
|
|
539
|
+
invoke(input: TInput): TState | Promise<TState>;
|
|
540
|
+
}
|
|
541
|
+
/**
|
|
542
|
+
* Captured runner step. The current runner preserves an empty step list, but
|
|
543
|
+
* this contract gives future step capture a typed state boundary.
|
|
544
|
+
*/
|
|
545
|
+
interface AgentTestRunnerStep<TState = unknown> {
|
|
546
|
+
state: TState;
|
|
547
|
+
messages: BaseMessage[];
|
|
548
|
+
timestamp: number;
|
|
549
|
+
}
|
|
550
|
+
/**
|
|
551
|
+
* Configuration for agent test runner
|
|
552
|
+
*/
|
|
553
|
+
interface AgentTestConfig<TState = unknown> {
|
|
554
|
+
/**
|
|
555
|
+
* Maximum time to wait for agent response (ms)
|
|
556
|
+
*/
|
|
557
|
+
timeout?: number;
|
|
558
|
+
/**
|
|
559
|
+
* Whether to capture intermediate steps
|
|
560
|
+
*/
|
|
561
|
+
captureSteps?: boolean;
|
|
562
|
+
/**
|
|
563
|
+
* Whether to validate state after each step
|
|
564
|
+
*/
|
|
565
|
+
validateState?: boolean;
|
|
566
|
+
/**
|
|
567
|
+
* Custom state validator
|
|
568
|
+
*/
|
|
569
|
+
stateValidator?: (state: TState | undefined) => boolean | Promise<boolean>;
|
|
570
|
+
}
|
|
571
|
+
/**
|
|
572
|
+
* Result from agent test run
|
|
573
|
+
*/
|
|
574
|
+
interface AgentTestResult<TState = unknown, TStep = AgentTestRunnerStep<TState>> {
|
|
575
|
+
/**
|
|
576
|
+
* Final state after execution
|
|
577
|
+
*/
|
|
578
|
+
finalState: TState | undefined;
|
|
579
|
+
/**
|
|
580
|
+
* Messages exchanged
|
|
581
|
+
*/
|
|
582
|
+
messages: BaseMessage[];
|
|
583
|
+
/**
|
|
584
|
+
* Execution time in milliseconds
|
|
585
|
+
*/
|
|
586
|
+
executionTime: number;
|
|
587
|
+
/**
|
|
588
|
+
* Intermediate steps (if captured)
|
|
589
|
+
*/
|
|
590
|
+
steps?: TStep[];
|
|
591
|
+
/**
|
|
592
|
+
* Whether the test passed
|
|
593
|
+
*/
|
|
594
|
+
passed: boolean;
|
|
595
|
+
/**
|
|
596
|
+
* Error if test failed
|
|
597
|
+
*/
|
|
598
|
+
error?: Error;
|
|
599
|
+
}
|
|
600
|
+
/**
|
|
601
|
+
* Agent test runner for integration testing
|
|
602
|
+
*
|
|
603
|
+
* @example
|
|
604
|
+
* ```typescript
|
|
605
|
+
* const runner = new AgentTestRunner(agent, {
|
|
606
|
+
* timeout: 5000,
|
|
607
|
+
* captureSteps: true
|
|
608
|
+
* });
|
|
609
|
+
*
|
|
610
|
+
* const result = await runner.run({
|
|
611
|
+
* messages: [new HumanMessage('Hello')]
|
|
612
|
+
* });
|
|
613
|
+
*
|
|
614
|
+
* expect(result.passed).toBe(true);
|
|
615
|
+
* expect(result.messages.length).toBeGreaterThan(1);
|
|
616
|
+
* ```
|
|
617
|
+
*/
|
|
618
|
+
declare class AgentTestRunner<TInput = unknown, TState = unknown, TStep = AgentTestRunnerStep<TState>> {
|
|
619
|
+
private agent;
|
|
620
|
+
private config;
|
|
621
|
+
constructor(agent: AgentTestAgent<TInput, TState>, config?: AgentTestConfig<TState>);
|
|
622
|
+
/**
|
|
623
|
+
* Run the agent with given input
|
|
624
|
+
*/
|
|
625
|
+
run(input: TInput): Promise<AgentTestResult<TState, TStep>>;
|
|
626
|
+
/**
|
|
627
|
+
* Run multiple test cases
|
|
628
|
+
*/
|
|
629
|
+
runMany(inputs: TInput[]): Promise<AgentTestResult<TState, TStep>[]>;
|
|
630
|
+
}
|
|
631
|
+
/**
|
|
632
|
+
* Create an agent test runner
|
|
633
|
+
*/
|
|
634
|
+
declare function createAgentTestRunner<TInput = unknown, TState = unknown, TStep = AgentTestRunnerStep<TState>>(agent: AgentTestAgent<TInput, TState>, config?: AgentTestConfig<TState>): AgentTestRunner<TInput, TState, TStep>;
|
|
635
|
+
|
|
636
|
+
/**
|
|
637
|
+
* Configuration for conversation simulator
|
|
638
|
+
*/
|
|
639
|
+
interface ConversationSimulatorConfig {
|
|
640
|
+
/**
|
|
641
|
+
* Maximum number of turns
|
|
642
|
+
*/
|
|
643
|
+
maxTurns?: number;
|
|
644
|
+
/**
|
|
645
|
+
* Delay between turns (ms)
|
|
646
|
+
*/
|
|
647
|
+
turnDelay?: number;
|
|
648
|
+
/**
|
|
649
|
+
* Whether to log conversation
|
|
650
|
+
*/
|
|
651
|
+
verbose?: boolean;
|
|
652
|
+
/**
|
|
653
|
+
* Structured logger used for verbose conversation output
|
|
654
|
+
*/
|
|
655
|
+
logger?: Logger;
|
|
656
|
+
/**
|
|
657
|
+
* Stop condition
|
|
658
|
+
*/
|
|
659
|
+
stopCondition?: (messages: BaseMessage[]) => boolean;
|
|
660
|
+
}
|
|
661
|
+
/**
|
|
662
|
+
* Result from conversation simulation
|
|
663
|
+
*/
|
|
664
|
+
interface ConversationResult {
|
|
665
|
+
/**
|
|
666
|
+
* All messages in the conversation
|
|
667
|
+
*/
|
|
668
|
+
messages: BaseMessage[];
|
|
669
|
+
/**
|
|
670
|
+
* Number of turns
|
|
671
|
+
*/
|
|
672
|
+
turns: number;
|
|
673
|
+
/**
|
|
674
|
+
* Total execution time (ms)
|
|
675
|
+
*/
|
|
676
|
+
totalTime: number;
|
|
677
|
+
/**
|
|
678
|
+
* Whether conversation completed successfully
|
|
679
|
+
*/
|
|
680
|
+
completed: boolean;
|
|
681
|
+
/**
|
|
682
|
+
* Reason for stopping
|
|
683
|
+
*/
|
|
684
|
+
stopReason: 'max_turns' | 'stop_condition' | 'error';
|
|
685
|
+
/**
|
|
686
|
+
* Error if any
|
|
687
|
+
*/
|
|
688
|
+
error?: Error;
|
|
689
|
+
}
|
|
690
|
+
interface ConversationSimulatorInput {
|
|
691
|
+
messages: BaseMessage[];
|
|
692
|
+
}
|
|
693
|
+
/**
|
|
694
|
+
* Conversation simulator for testing multi-turn interactions
|
|
695
|
+
*
|
|
696
|
+
* @example
|
|
697
|
+
* ```typescript
|
|
698
|
+
* const simulator = new ConversationSimulator(agent, {
|
|
699
|
+
* maxTurns: 5,
|
|
700
|
+
* verbose: true
|
|
701
|
+
* });
|
|
702
|
+
*
|
|
703
|
+
* const result = await simulator.simulate([
|
|
704
|
+
* 'Hello',
|
|
705
|
+
* 'What can you do?',
|
|
706
|
+
* 'Help me calculate 2 + 2'
|
|
707
|
+
* ]);
|
|
708
|
+
*
|
|
709
|
+
* expect(result.turns).toBe(3);
|
|
710
|
+
* expect(result.completed).toBe(true);
|
|
711
|
+
* ```
|
|
712
|
+
*/
|
|
713
|
+
declare class ConversationSimulator<TState = unknown> {
|
|
714
|
+
private agent;
|
|
715
|
+
private config;
|
|
716
|
+
constructor(agent: AgentTestAgent<ConversationSimulatorInput, TState>, config?: ConversationSimulatorConfig);
|
|
717
|
+
/**
|
|
718
|
+
* Simulate a conversation with predefined user inputs
|
|
719
|
+
*/
|
|
720
|
+
simulate(userInputs: string[]): Promise<ConversationResult>;
|
|
721
|
+
/**
|
|
722
|
+
* Simulate a conversation with dynamic user input generation
|
|
723
|
+
*/
|
|
724
|
+
simulateDynamic(inputGenerator: (messages: BaseMessage[]) => string | null, maxTurns?: number): Promise<ConversationResult>;
|
|
725
|
+
private logVerboseTurn;
|
|
726
|
+
}
|
|
727
|
+
/**
|
|
728
|
+
* Create a conversation simulator
|
|
729
|
+
*/
|
|
730
|
+
declare function createConversationSimulator<TState = unknown>(agent: AgentTestAgent<ConversationSimulatorInput, TState>, config?: ConversationSimulatorConfig): ConversationSimulator<TState>;
|
|
731
|
+
|
|
732
|
+
type SnapshotObject = Record<string, unknown>;
|
|
733
|
+
interface SnapshotDiff {
|
|
734
|
+
added: SnapshotObject;
|
|
735
|
+
removed: SnapshotObject;
|
|
736
|
+
changed: Record<string, {
|
|
737
|
+
from: unknown;
|
|
738
|
+
to: unknown;
|
|
739
|
+
}>;
|
|
740
|
+
}
|
|
741
|
+
interface MessageSnapshot {
|
|
742
|
+
type: string;
|
|
743
|
+
content: unknown;
|
|
744
|
+
}
|
|
745
|
+
declare const ROOT_SNAPSHOT_DIFF_KEY = "$root";
|
|
746
|
+
/**
|
|
747
|
+
* Snapshot configuration
|
|
748
|
+
*/
|
|
749
|
+
interface SnapshotConfig {
|
|
750
|
+
/**
|
|
751
|
+
* Fields to include in snapshot
|
|
752
|
+
*/
|
|
753
|
+
includeFields?: string[];
|
|
754
|
+
/**
|
|
755
|
+
* Fields to exclude from snapshot
|
|
756
|
+
*/
|
|
757
|
+
excludeFields?: string[];
|
|
758
|
+
/**
|
|
759
|
+
* Whether to normalize timestamps
|
|
760
|
+
*/
|
|
761
|
+
normalizeTimestamps?: boolean;
|
|
762
|
+
/**
|
|
763
|
+
* Whether to normalize IDs
|
|
764
|
+
*/
|
|
765
|
+
normalizeIds?: boolean;
|
|
766
|
+
/**
|
|
767
|
+
* Custom normalizer function
|
|
768
|
+
*/
|
|
769
|
+
normalizer?: (value: unknown) => unknown;
|
|
770
|
+
}
|
|
771
|
+
/**
|
|
772
|
+
* Create a snapshot of state
|
|
773
|
+
*/
|
|
774
|
+
declare function createSnapshot<TState = unknown>(state: TState, config?: SnapshotConfig): unknown;
|
|
775
|
+
/**
|
|
776
|
+
* Assert that state matches snapshot
|
|
777
|
+
*/
|
|
778
|
+
declare function assertMatchesSnapshot<TState = unknown>(state: TState, config?: SnapshotConfig): void;
|
|
779
|
+
/**
|
|
780
|
+
* Create a snapshot of messages
|
|
781
|
+
*/
|
|
782
|
+
declare function createMessageSnapshot(messages: BaseMessage[], config?: SnapshotConfig): MessageSnapshot[];
|
|
783
|
+
/**
|
|
784
|
+
* Assert that messages match snapshot
|
|
785
|
+
*/
|
|
786
|
+
declare function assertMessagesMatchSnapshot(messages: BaseMessage[], config?: SnapshotConfig): void;
|
|
787
|
+
/**
|
|
788
|
+
* Compare two states for equality
|
|
789
|
+
*/
|
|
790
|
+
declare function compareStates<TState1 = unknown, TState2 = unknown>(state1: TState1, state2: TState2, config?: SnapshotConfig): boolean;
|
|
791
|
+
/**
|
|
792
|
+
* Create a diff between two states
|
|
793
|
+
*/
|
|
794
|
+
declare function createStateDiff<TState1 = unknown, TState2 = unknown>(state1: TState1, state2: TState2, config?: SnapshotConfig): SnapshotDiff;
|
|
795
|
+
/**
|
|
796
|
+
* Assert that state has changed
|
|
797
|
+
*/
|
|
798
|
+
declare function assertStateChanged<TStateBefore = unknown, TStateAfter = unknown>(stateBefore: TStateBefore, stateAfter: TStateAfter, expectedChanges: string[], config?: SnapshotConfig): void;
|
|
799
|
+
|
|
800
|
+
export { type AgentTestAgent, type AgentTestConfig, type AgentTestResult, AgentTestRunner, type AgentTestRunnerStep, type AssertedMessage, type ConversationResult, ConversationSimulator, type ConversationSimulatorConfig, type MessageSnapshot, MockLLM, type MockLLMConfig, type MockToolConfig, type PlanningStep, type PlanningTestState, ROOT_SNAPSHOT_DIFF_KEY, type ReActTestState, type SnapshotConfig, type SnapshotDiff, type SnapshotObject, StateBuilder, type TestToolCall, type TestToolResult, assertAlternatingMessages, assertCompletesWithin, assertHasKeys, assertInRange, assertIsMessage, assertIterationsWithinLimit, assertLastMessageContains, assertMatchesSnapshot, assertMessageContains, assertMessagesMatchSnapshot, assertNotEmpty, assertStateChanged, assertStateHasFields, assertStateSnapshot, assertThrowsWithMessage, assertToolCalled, calculatorTool, compareStates, complexReasoningConversation, createAgentTestRunner, createCalculatorTool, createConversation, createConversationSimulator, createConversationState, createConversationWithSystem, createDelayedTool, createEchoLLM, createEchoTool, createErrorLLM, createErrorTool, createMessageSnapshot, createMockLLM, createMockTool, createPlanningState, createReActState, createSnapshot, createStateBuilder, createStateDiff, databaseQueryTool, errorHandlingConversation, fileReaderTool, getToolByName, getToolsByCategory, longContextConversation, multiTurnConversation, sampleData, sampleTools, searchTool, simpleGreeting, timeTool, toolUsageConversation, weatherTool };
|