@agentforge/testing 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,708 @@
1
+ import { BaseChatModel } from '@langchain/core/language_models/chat_models';
2
+ import { BaseMessage } from '@langchain/core/messages';
3
+ import { ChatResult } from '@langchain/core/outputs';
4
+ import { CallbackManagerForLLMRun } from '@langchain/core/callbacks/manager';
5
+ import * as _agentforge_core from '@agentforge/core';
6
+ import { ToolCategory, Tool } from '@agentforge/core';
7
+ import { z } from 'zod';
8
+
9
+ /**
10
+ * Configuration for MockLLM
11
+ */
12
+ interface MockLLMConfig {
13
+ /**
14
+ * Predefined responses to return
15
+ */
16
+ responses?: string[];
17
+ /**
18
+ * Response generator function
19
+ */
20
+ responseGenerator?: (messages: BaseMessage[]) => string;
21
+ /**
22
+ * Delay in milliseconds before responding
23
+ */
24
+ delay?: number;
25
+ /**
26
+ * Whether to throw an error
27
+ */
28
+ shouldError?: boolean;
29
+ /**
30
+ * Error message to throw
31
+ */
32
+ errorMessage?: string;
33
+ /**
34
+ * Model name to report
35
+ */
36
+ modelName?: string;
37
+ }
38
+ /**
39
+ * Mock LLM for testing
40
+ *
41
+ * @example
42
+ * ```typescript
43
+ * const llm = createMockLLM({
44
+ * responses: ['Hello!', 'How can I help?']
45
+ * });
46
+ *
47
+ * const result = await llm.invoke([new HumanMessage('Hi')]);
48
+ * console.log(result.content); // 'Hello!'
49
+ * ```
50
+ */
51
+ declare class MockLLM extends BaseChatModel {
52
+ private responses;
53
+ private responseGenerator?;
54
+ private delay;
55
+ private shouldError;
56
+ private errorMessage;
57
+ private callCount;
58
+ _llmType(): string;
59
+ constructor(config?: MockLLMConfig);
60
+ _generate(messages: BaseMessage[], options?: this['ParsedCallOptions'], runManager?: CallbackManagerForLLMRun): Promise<ChatResult>;
61
+ /**
62
+ * Get the number of times the LLM has been called
63
+ */
64
+ getCallCount(): number;
65
+ /**
66
+ * Reset the call count
67
+ */
68
+ resetCallCount(): void;
69
+ }
70
+ /**
71
+ * Create a mock LLM for testing
72
+ */
73
+ declare function createMockLLM(config?: MockLLMConfig): MockLLM;
74
+ /**
75
+ * Create a mock LLM that echoes the last message
76
+ */
77
+ declare function createEchoLLM(): MockLLM;
78
+ /**
79
+ * Create a mock LLM that always errors
80
+ */
81
+ declare function createErrorLLM(errorMessage?: string): MockLLM;
82
+
83
+ /**
84
+ * Configuration for mock tool
85
+ */
86
+ interface MockToolConfig<T extends z.ZodType = z.ZodType> {
87
+ /**
88
+ * Tool name
89
+ */
90
+ name?: string;
91
+ /**
92
+ * Tool description
93
+ */
94
+ description?: string;
95
+ /**
96
+ * Tool category
97
+ */
98
+ category?: ToolCategory;
99
+ /**
100
+ * Input schema
101
+ */
102
+ schema?: T;
103
+ /**
104
+ * Implementation function
105
+ */
106
+ implementation?: (input: z.infer<T>) => Promise<string> | string;
107
+ /**
108
+ * Whether to throw an error
109
+ */
110
+ shouldError?: boolean;
111
+ /**
112
+ * Error message to throw
113
+ */
114
+ errorMessage?: string;
115
+ /**
116
+ * Delay in milliseconds before responding
117
+ */
118
+ delay?: number;
119
+ }
120
+ /**
121
+ * Create a mock tool for testing
122
+ *
123
+ * @example
124
+ * ```typescript
125
+ * const tool = createMockTool({
126
+ * name: 'test_tool',
127
+ * schema: z.object({ input: z.string().describe('Input') }),
128
+ * implementation: async ({ input }) => `Processed: ${input}`
129
+ * });
130
+ *
131
+ * const result = await tool.execute({ input: 'test' });
132
+ * console.log(result); // 'Processed: test'
133
+ * ```
134
+ */
135
+ declare function createMockTool<T extends z.ZodType = any>(config?: MockToolConfig<T>): _agentforge_core.Tool<any, string>;
136
+ /**
137
+ * Create a mock tool that echoes its input
138
+ */
139
+ declare function createEchoTool(name?: string): _agentforge_core.Tool<any, string>;
140
+ /**
141
+ * Create a mock tool that always errors
142
+ */
143
+ declare function createErrorTool(name?: string, errorMessage?: string): _agentforge_core.Tool<any, string>;
144
+ /**
145
+ * Create a mock tool with delay
146
+ */
147
+ declare function createDelayedTool(name?: string, delay?: number): _agentforge_core.Tool<any, string>;
148
+ /**
149
+ * Create a calculator mock tool
150
+ */
151
+ declare function createCalculatorTool(): _agentforge_core.Tool<any, string>;
152
+
153
+ /**
154
+ * Builder for creating test states
155
+ */
156
+ declare class StateBuilder<T extends Record<string, any> = Record<string, any>> {
157
+ private state;
158
+ /**
159
+ * Set a field in the state
160
+ */
161
+ set<K extends keyof T>(key: K, value: T[K]): this;
162
+ /**
163
+ * Set multiple fields in the state
164
+ */
165
+ setMany(fields: Partial<T>): this;
166
+ /**
167
+ * Add a message to the messages array
168
+ */
169
+ addMessage(message: BaseMessage): this;
170
+ /**
171
+ * Add multiple messages
172
+ */
173
+ addMessages(messages: BaseMessage[]): this;
174
+ /**
175
+ * Add a human message
176
+ */
177
+ addHumanMessage(content: string): this;
178
+ /**
179
+ * Add an AI message
180
+ */
181
+ addAIMessage(content: string): this;
182
+ /**
183
+ * Add a system message
184
+ */
185
+ addSystemMessage(content: string): this;
186
+ /**
187
+ * Build the state
188
+ */
189
+ build(): T;
190
+ /**
191
+ * Reset the builder
192
+ */
193
+ reset(): this;
194
+ }
195
+ /**
196
+ * Create a state builder
197
+ */
198
+ declare function createStateBuilder<T extends Record<string, any> = Record<string, any>>(): StateBuilder<T>;
199
+ /**
200
+ * Create a simple conversation state
201
+ */
202
+ declare function createConversationState(messages: string[]): {
203
+ messages: BaseMessage[];
204
+ };
205
+ /**
206
+ * Create a ReAct agent state
207
+ */
208
+ declare function createReActState(config?: {
209
+ messages?: BaseMessage[];
210
+ thoughts?: string[];
211
+ toolCalls?: Array<{
212
+ name: string;
213
+ args: any;
214
+ }>;
215
+ toolResults?: Array<{
216
+ name: string;
217
+ result: string;
218
+ }>;
219
+ scratchpad?: string[];
220
+ iterations?: number;
221
+ maxIterations?: number;
222
+ }): any;
223
+ /**
224
+ * Create a planning agent state
225
+ */
226
+ declare function createPlanningState(config?: {
227
+ messages?: BaseMessage[];
228
+ plan?: Array<{
229
+ step: string;
230
+ status: string;
231
+ }>;
232
+ currentStep?: number;
233
+ results?: Record<string, any>;
234
+ }): any;
235
+
236
+ /**
237
+ * Assert that a value is a message of a specific type
238
+ */
239
+ declare function assertIsMessage(value: any, type?: 'human' | 'ai' | 'system'): asserts value is BaseMessage;
240
+ /**
241
+ * Assert that messages array contains a message with specific content
242
+ */
243
+ declare function assertMessageContains(messages: BaseMessage[], content: string): void;
244
+ /**
245
+ * Assert that the last message contains specific content
246
+ */
247
+ declare function assertLastMessageContains(messages: BaseMessage[], content: string): void;
248
+ /**
249
+ * Assert that state has required fields
250
+ */
251
+ declare function assertStateHasFields<T extends Record<string, any>>(state: T, fields: (keyof T)[]): void;
252
+ /**
253
+ * Assert that a tool was called with specific arguments
254
+ */
255
+ declare function assertToolCalled(toolCalls: Array<{
256
+ name: string;
257
+ args: any;
258
+ }>, toolName: string, args?: Record<string, any>): void;
259
+ /**
260
+ * Assert that execution completed within time limit
261
+ */
262
+ declare function assertCompletesWithin(fn: () => Promise<any>, maxMs: number): Promise<void>;
263
+ /**
264
+ * Assert that a function throws an error with specific message
265
+ */
266
+ declare function assertThrowsWithMessage(fn: () => Promise<any>, message: string): Promise<void>;
267
+ /**
268
+ * Assert that state matches a snapshot
269
+ */
270
+ declare function assertStateSnapshot(state: any, snapshot: any): void;
271
+ /**
272
+ * Assert that messages have alternating types (human, ai, human, ai, ...)
273
+ */
274
+ declare function assertAlternatingMessages(messages: BaseMessage[]): void;
275
+ /**
276
+ * Assert that an array is not empty
277
+ */
278
+ declare function assertNotEmpty<T>(array: T[]): void;
279
+ /**
280
+ * Assert that a value is within a range
281
+ */
282
+ declare function assertInRange(value: number, min: number, max: number): void;
283
+ /**
284
+ * Assert that agent iterations are within limit
285
+ */
286
+ declare function assertIterationsWithinLimit(iterations: number, maxIterations: number): void;
287
+ /**
288
+ * Assert that a result contains expected keys
289
+ */
290
+ declare function assertHasKeys<T extends Record<string, any>>(obj: T, keys: string[]): void;
291
+
292
+ /**
293
+ * Sample conversation: Simple greeting
294
+ */
295
+ declare const simpleGreeting: BaseMessage[];
296
+ /**
297
+ * Sample conversation: Multi-turn conversation
298
+ */
299
+ declare const multiTurnConversation: BaseMessage[];
300
+ /**
301
+ * Sample conversation: Tool usage
302
+ */
303
+ declare const toolUsageConversation: BaseMessage[];
304
+ /**
305
+ * Sample conversation: Error handling
306
+ */
307
+ declare const errorHandlingConversation: BaseMessage[];
308
+ /**
309
+ * Sample conversation: Complex reasoning
310
+ */
311
+ declare const complexReasoningConversation: BaseMessage[];
312
+ /**
313
+ * Sample conversation: Long context
314
+ */
315
+ declare const longContextConversation: BaseMessage[];
316
+ /**
317
+ * Create a custom conversation
318
+ */
319
+ declare function createConversation(exchanges: Array<{
320
+ human: string;
321
+ ai: string;
322
+ }>): BaseMessage[];
323
+ /**
324
+ * Create a conversation with system message
325
+ */
326
+ declare function createConversationWithSystem(systemPrompt: string, exchanges: Array<{
327
+ human: string;
328
+ ai: string;
329
+ }>): BaseMessage[];
330
+ /**
331
+ * Sample data for testing
332
+ */
333
+ declare const sampleData: {
334
+ /**
335
+ * Sample user inputs
336
+ */
337
+ userInputs: string[];
338
+ /**
339
+ * Sample AI responses
340
+ */
341
+ aiResponses: string[];
342
+ /**
343
+ * Sample tool calls
344
+ */
345
+ toolCalls: ({
346
+ name: string;
347
+ args: {
348
+ operation: string;
349
+ a: number;
350
+ b: number;
351
+ query?: undefined;
352
+ };
353
+ } | {
354
+ name: string;
355
+ args: {
356
+ query: string;
357
+ operation?: undefined;
358
+ a?: undefined;
359
+ b?: undefined;
360
+ };
361
+ } | {
362
+ name: string;
363
+ args: {
364
+ operation?: undefined;
365
+ a?: undefined;
366
+ b?: undefined;
367
+ query?: undefined;
368
+ };
369
+ })[];
370
+ /**
371
+ * Sample tool results
372
+ */
373
+ toolResults: {
374
+ name: string;
375
+ result: string;
376
+ }[];
377
+ };
378
+
379
+ /**
380
+ * Sample calculator tool
381
+ */
382
+ declare const calculatorTool: Tool<{
383
+ operation: "add" | "subtract" | "multiply" | "divide";
384
+ a: number;
385
+ b: number;
386
+ }, string>;
387
+ /**
388
+ * Sample search tool
389
+ */
390
+ declare const searchTool: Tool<{
391
+ query: string;
392
+ limit?: number | undefined;
393
+ }, string>;
394
+ /**
395
+ * Sample time tool
396
+ */
397
+ declare const timeTool: Tool<{
398
+ _dummy?: string | undefined;
399
+ }, string>;
400
+ /**
401
+ * Sample weather tool
402
+ */
403
+ declare const weatherTool: Tool<{
404
+ location: string;
405
+ units?: "celsius" | "fahrenheit" | undefined;
406
+ }, string>;
407
+ /**
408
+ * Sample file reader tool
409
+ */
410
+ declare const fileReaderTool: Tool<{
411
+ path: string;
412
+ }, string>;
413
+ /**
414
+ * Sample database query tool
415
+ */
416
+ declare const databaseQueryTool: Tool<{
417
+ query: string;
418
+ }, string>;
419
+ /**
420
+ * All sample tools
421
+ */
422
+ declare const sampleTools: (Tool<{
423
+ operation: "add" | "subtract" | "multiply" | "divide";
424
+ a: number;
425
+ b: number;
426
+ }, string> | Tool<{
427
+ query: string;
428
+ limit?: number | undefined;
429
+ }, string> | Tool<{
430
+ _dummy?: string | undefined;
431
+ }, string> | Tool<{
432
+ location: string;
433
+ units?: "celsius" | "fahrenheit" | undefined;
434
+ }, string> | Tool<{
435
+ path: string;
436
+ }, string> | Tool<{
437
+ query: string;
438
+ }, string>)[];
439
+ /**
440
+ * Get tools by category
441
+ */
442
+ declare function getToolsByCategory(category: ToolCategory): (Tool<{
443
+ operation: "add" | "subtract" | "multiply" | "divide";
444
+ a: number;
445
+ b: number;
446
+ }, string> | Tool<{
447
+ query: string;
448
+ limit?: number | undefined;
449
+ }, string> | Tool<{
450
+ _dummy?: string | undefined;
451
+ }, string> | Tool<{
452
+ location: string;
453
+ units?: "celsius" | "fahrenheit" | undefined;
454
+ }, string> | Tool<{
455
+ path: string;
456
+ }, string> | Tool<{
457
+ query: string;
458
+ }, string>)[];
459
+ /**
460
+ * Get tool by name
461
+ */
462
+ declare function getToolByName(name: string): Tool<{
463
+ operation: "add" | "subtract" | "multiply" | "divide";
464
+ a: number;
465
+ b: number;
466
+ }, string> | Tool<{
467
+ query: string;
468
+ limit?: number | undefined;
469
+ }, string> | Tool<{
470
+ _dummy?: string | undefined;
471
+ }, string> | Tool<{
472
+ location: string;
473
+ units?: "celsius" | "fahrenheit" | undefined;
474
+ }, string> | Tool<{
475
+ path: string;
476
+ }, string> | Tool<{
477
+ query: string;
478
+ }, string> | undefined;
479
+
480
+ /**
481
+ * Configuration for agent test runner
482
+ */
483
+ interface AgentTestConfig {
484
+ /**
485
+ * Maximum time to wait for agent response (ms)
486
+ */
487
+ timeout?: number;
488
+ /**
489
+ * Whether to capture intermediate steps
490
+ */
491
+ captureSteps?: boolean;
492
+ /**
493
+ * Whether to validate state after each step
494
+ */
495
+ validateState?: boolean;
496
+ /**
497
+ * Custom state validator
498
+ */
499
+ stateValidator?: (state: any) => boolean | Promise<boolean>;
500
+ }
501
+ /**
502
+ * Result from agent test run
503
+ */
504
+ interface AgentTestResult {
505
+ /**
506
+ * Final state after execution
507
+ */
508
+ finalState: any;
509
+ /**
510
+ * Messages exchanged
511
+ */
512
+ messages: BaseMessage[];
513
+ /**
514
+ * Execution time in milliseconds
515
+ */
516
+ executionTime: number;
517
+ /**
518
+ * Intermediate steps (if captured)
519
+ */
520
+ steps?: any[];
521
+ /**
522
+ * Whether the test passed
523
+ */
524
+ passed: boolean;
525
+ /**
526
+ * Error if test failed
527
+ */
528
+ error?: Error;
529
+ }
530
+ /**
531
+ * Agent test runner for integration testing
532
+ *
533
+ * @example
534
+ * ```typescript
535
+ * const runner = new AgentTestRunner(agent, {
536
+ * timeout: 5000,
537
+ * captureSteps: true
538
+ * });
539
+ *
540
+ * const result = await runner.run({
541
+ * messages: [new HumanMessage('Hello')]
542
+ * });
543
+ *
544
+ * expect(result.passed).toBe(true);
545
+ * expect(result.messages.length).toBeGreaterThan(1);
546
+ * ```
547
+ */
548
+ declare class AgentTestRunner {
549
+ private agent;
550
+ private config;
551
+ constructor(agent: any, config?: AgentTestConfig);
552
+ /**
553
+ * Run the agent with given input
554
+ */
555
+ run(input: any): Promise<AgentTestResult>;
556
+ /**
557
+ * Run multiple test cases
558
+ */
559
+ runMany(inputs: any[]): Promise<AgentTestResult[]>;
560
+ }
561
+ /**
562
+ * Create an agent test runner
563
+ */
564
+ declare function createAgentTestRunner(agent: any, config?: AgentTestConfig): AgentTestRunner;
565
+
566
+ /**
567
+ * Configuration for conversation simulator
568
+ */
569
+ interface ConversationSimulatorConfig {
570
+ /**
571
+ * Maximum number of turns
572
+ */
573
+ maxTurns?: number;
574
+ /**
575
+ * Delay between turns (ms)
576
+ */
577
+ turnDelay?: number;
578
+ /**
579
+ * Whether to log conversation
580
+ */
581
+ verbose?: boolean;
582
+ /**
583
+ * Stop condition
584
+ */
585
+ stopCondition?: (messages: BaseMessage[]) => boolean;
586
+ }
587
+ /**
588
+ * Result from conversation simulation
589
+ */
590
+ interface ConversationResult {
591
+ /**
592
+ * All messages in the conversation
593
+ */
594
+ messages: BaseMessage[];
595
+ /**
596
+ * Number of turns
597
+ */
598
+ turns: number;
599
+ /**
600
+ * Total execution time (ms)
601
+ */
602
+ totalTime: number;
603
+ /**
604
+ * Whether conversation completed successfully
605
+ */
606
+ completed: boolean;
607
+ /**
608
+ * Reason for stopping
609
+ */
610
+ stopReason: 'max_turns' | 'stop_condition' | 'error';
611
+ /**
612
+ * Error if any
613
+ */
614
+ error?: Error;
615
+ }
616
+ /**
617
+ * Conversation simulator for testing multi-turn interactions
618
+ *
619
+ * @example
620
+ * ```typescript
621
+ * const simulator = new ConversationSimulator(agent, {
622
+ * maxTurns: 5,
623
+ * verbose: true
624
+ * });
625
+ *
626
+ * const result = await simulator.simulate([
627
+ * 'Hello',
628
+ * 'What can you do?',
629
+ * 'Help me calculate 2 + 2'
630
+ * ]);
631
+ *
632
+ * expect(result.turns).toBe(3);
633
+ * expect(result.completed).toBe(true);
634
+ * ```
635
+ */
636
+ declare class ConversationSimulator {
637
+ private agent;
638
+ private config;
639
+ constructor(agent: any, config?: ConversationSimulatorConfig);
640
+ /**
641
+ * Simulate a conversation with predefined user inputs
642
+ */
643
+ simulate(userInputs: string[]): Promise<ConversationResult>;
644
+ /**
645
+ * Simulate a conversation with dynamic user input generation
646
+ */
647
+ simulateDynamic(inputGenerator: (messages: BaseMessage[]) => string | null, maxTurns?: number): Promise<ConversationResult>;
648
+ }
649
+ /**
650
+ * Create a conversation simulator
651
+ */
652
+ declare function createConversationSimulator(agent: any, config?: ConversationSimulatorConfig): ConversationSimulator;
653
+
654
+ /**
655
+ * Snapshot configuration
656
+ */
657
+ interface SnapshotConfig {
658
+ /**
659
+ * Fields to include in snapshot
660
+ */
661
+ includeFields?: string[];
662
+ /**
663
+ * Fields to exclude from snapshot
664
+ */
665
+ excludeFields?: string[];
666
+ /**
667
+ * Whether to normalize timestamps
668
+ */
669
+ normalizeTimestamps?: boolean;
670
+ /**
671
+ * Whether to normalize IDs
672
+ */
673
+ normalizeIds?: boolean;
674
+ /**
675
+ * Custom normalizer function
676
+ */
677
+ normalizer?: (value: any) => any;
678
+ }
679
+ /**
680
+ * Create a snapshot of state
681
+ */
682
+ declare function createSnapshot(state: any, config?: SnapshotConfig): any;
683
+ /**
684
+ * Assert that state matches snapshot
685
+ */
686
+ declare function assertMatchesSnapshot(state: any, config?: SnapshotConfig): void;
687
+ /**
688
+ * Create a snapshot of messages
689
+ */
690
+ declare function createMessageSnapshot(messages: BaseMessage[], config?: SnapshotConfig): any;
691
+ /**
692
+ * Assert that messages match snapshot
693
+ */
694
+ declare function assertMessagesMatchSnapshot(messages: BaseMessage[], config?: SnapshotConfig): void;
695
+ /**
696
+ * Compare two states for equality
697
+ */
698
+ declare function compareStates(state1: any, state2: any, config?: SnapshotConfig): boolean;
699
+ /**
700
+ * Create a diff between two states
701
+ */
702
+ declare function createStateDiff(state1: any, state2: any, config?: SnapshotConfig): any;
703
+ /**
704
+ * Assert that state has changed
705
+ */
706
+ declare function assertStateChanged(stateBefore: any, stateAfter: any, expectedChanges: string[], config?: SnapshotConfig): void;
707
+
708
+ export { type AgentTestConfig, type AgentTestResult, AgentTestRunner, type ConversationResult, ConversationSimulator, type ConversationSimulatorConfig, MockLLM, type MockLLMConfig, type MockToolConfig, type SnapshotConfig, StateBuilder, assertAlternatingMessages, assertCompletesWithin, assertHasKeys, assertInRange, assertIsMessage, assertIterationsWithinLimit, assertLastMessageContains, assertMatchesSnapshot, assertMessageContains, assertMessagesMatchSnapshot, assertNotEmpty, assertStateChanged, assertStateHasFields, assertStateSnapshot, assertThrowsWithMessage, assertToolCalled, calculatorTool, compareStates, complexReasoningConversation, createAgentTestRunner, createCalculatorTool, createConversation, createConversationSimulator, createConversationState, createConversationWithSystem, createDelayedTool, createEchoLLM, createEchoTool, createErrorLLM, createErrorTool, createMessageSnapshot, createMockLLM, createMockTool, createPlanningState, createReActState, createSnapshot, createStateBuilder, createStateDiff, databaseQueryTool, errorHandlingConversation, fileReaderTool, getToolByName, getToolsByCategory, longContextConversation, multiTurnConversation, sampleData, sampleTools, searchTool, simpleGreeting, timeTool, toolUsageConversation, weatherTool };