@agentforge/testing 0.16.20 → 0.16.22

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.cts CHANGED
@@ -499,10 +499,25 @@ declare function getToolByName(name: string): Tool<{
499
499
  query: string;
500
500
  }, string> | undefined;
501
501
 
502
+ /**
503
+ * Agent-like contract used by the test runner.
504
+ */
505
+ interface AgentTestAgent<TInput = unknown, TState = unknown> {
506
+ invoke(input: TInput): TState | Promise<TState>;
507
+ }
508
+ /**
509
+ * Captured runner step. The current runner preserves an empty step list, but
510
+ * this contract gives future step capture a typed state boundary.
511
+ */
512
+ interface AgentTestRunnerStep<TState = unknown> {
513
+ state: TState;
514
+ messages: BaseMessage[];
515
+ timestamp: number;
516
+ }
502
517
  /**
503
518
  * Configuration for agent test runner
504
519
  */
505
- interface AgentTestConfig {
520
+ interface AgentTestConfig<TState = unknown> {
506
521
  /**
507
522
  * Maximum time to wait for agent response (ms)
508
523
  */
@@ -518,16 +533,16 @@ interface AgentTestConfig {
518
533
  /**
519
534
  * Custom state validator
520
535
  */
521
- stateValidator?: (state: any) => boolean | Promise<boolean>;
536
+ stateValidator?: (state: TState | undefined) => boolean | Promise<boolean>;
522
537
  }
523
538
  /**
524
539
  * Result from agent test run
525
540
  */
526
- interface AgentTestResult {
541
+ interface AgentTestResult<TState = unknown, TStep = AgentTestRunnerStep<TState>> {
527
542
  /**
528
543
  * Final state after execution
529
544
  */
530
- finalState: any;
545
+ finalState: TState | undefined;
531
546
  /**
532
547
  * Messages exchanged
533
548
  */
@@ -539,7 +554,7 @@ interface AgentTestResult {
539
554
  /**
540
555
  * Intermediate steps (if captured)
541
556
  */
542
- steps?: any[];
557
+ steps?: TStep[];
543
558
  /**
544
559
  * Whether the test passed
545
560
  */
@@ -567,23 +582,23 @@ interface AgentTestResult {
567
582
  * expect(result.messages.length).toBeGreaterThan(1);
568
583
  * ```
569
584
  */
570
- declare class AgentTestRunner {
585
+ declare class AgentTestRunner<TInput = unknown, TState = unknown, TStep = AgentTestRunnerStep<TState>> {
571
586
  private agent;
572
587
  private config;
573
- constructor(agent: any, config?: AgentTestConfig);
588
+ constructor(agent: AgentTestAgent<TInput, TState>, config?: AgentTestConfig<TState>);
574
589
  /**
575
590
  * Run the agent with given input
576
591
  */
577
- run(input: any): Promise<AgentTestResult>;
592
+ run(input: TInput): Promise<AgentTestResult<TState, TStep>>;
578
593
  /**
579
594
  * Run multiple test cases
580
595
  */
581
- runMany(inputs: any[]): Promise<AgentTestResult[]>;
596
+ runMany(inputs: TInput[]): Promise<AgentTestResult<TState, TStep>[]>;
582
597
  }
583
598
  /**
584
599
  * Create an agent test runner
585
600
  */
586
- declare function createAgentTestRunner(agent: any, config?: AgentTestConfig): AgentTestRunner;
601
+ declare function createAgentTestRunner<TInput = unknown, TState = unknown, TStep = AgentTestRunnerStep<TState>>(agent: AgentTestAgent<TInput, TState>, config?: AgentTestConfig<TState>): AgentTestRunner<TInput, TState, TStep>;
587
602
 
588
603
  /**
589
604
  * Configuration for conversation simulator
@@ -741,4 +756,4 @@ declare function createStateDiff<TState1 = unknown, TState2 = unknown>(state1: T
741
756
  */
742
757
  declare function assertStateChanged<TStateBefore = unknown, TStateAfter = unknown>(stateBefore: TStateBefore, stateAfter: TStateAfter, expectedChanges: string[], config?: SnapshotConfig): void;
743
758
 
744
- export { type AgentTestConfig, type AgentTestResult, AgentTestRunner, type AssertedMessage, type ConversationResult, ConversationSimulator, type ConversationSimulatorConfig, type MessageSnapshot, MockLLM, type MockLLMConfig, type MockToolConfig, type PlanningStep, type PlanningTestState, ROOT_SNAPSHOT_DIFF_KEY, type ReActTestState, type SnapshotConfig, type SnapshotDiff, type SnapshotObject, StateBuilder, type TestToolCall, type TestToolResult, assertAlternatingMessages, assertCompletesWithin, assertHasKeys, assertInRange, assertIsMessage, assertIterationsWithinLimit, assertLastMessageContains, assertMatchesSnapshot, assertMessageContains, assertMessagesMatchSnapshot, assertNotEmpty, assertStateChanged, assertStateHasFields, assertStateSnapshot, assertThrowsWithMessage, assertToolCalled, calculatorTool, compareStates, complexReasoningConversation, createAgentTestRunner, createCalculatorTool, createConversation, createConversationSimulator, createConversationState, createConversationWithSystem, createDelayedTool, createEchoLLM, createEchoTool, createErrorLLM, createErrorTool, createMessageSnapshot, createMockLLM, createMockTool, createPlanningState, createReActState, createSnapshot, createStateBuilder, createStateDiff, databaseQueryTool, errorHandlingConversation, fileReaderTool, getToolByName, getToolsByCategory, longContextConversation, multiTurnConversation, sampleData, sampleTools, searchTool, simpleGreeting, timeTool, toolUsageConversation, weatherTool };
759
+ export { type AgentTestAgent, type AgentTestConfig, type AgentTestResult, AgentTestRunner, type AgentTestRunnerStep, type AssertedMessage, type ConversationResult, ConversationSimulator, type ConversationSimulatorConfig, type MessageSnapshot, MockLLM, type MockLLMConfig, type MockToolConfig, type PlanningStep, type PlanningTestState, ROOT_SNAPSHOT_DIFF_KEY, type ReActTestState, type SnapshotConfig, type SnapshotDiff, type SnapshotObject, StateBuilder, type TestToolCall, type TestToolResult, assertAlternatingMessages, assertCompletesWithin, assertHasKeys, assertInRange, assertIsMessage, assertIterationsWithinLimit, assertLastMessageContains, assertMatchesSnapshot, assertMessageContains, assertMessagesMatchSnapshot, assertNotEmpty, assertStateChanged, assertStateHasFields, assertStateSnapshot, assertThrowsWithMessage, assertToolCalled, calculatorTool, compareStates, complexReasoningConversation, createAgentTestRunner, createCalculatorTool, createConversation, createConversationSimulator, createConversationState, createConversationWithSystem, createDelayedTool, createEchoLLM, createEchoTool, createErrorLLM, createErrorTool, createMessageSnapshot, createMockLLM, createMockTool, createPlanningState, createReActState, createSnapshot, createStateBuilder, createStateDiff, databaseQueryTool, errorHandlingConversation, fileReaderTool, getToolByName, getToolsByCategory, longContextConversation, multiTurnConversation, sampleData, sampleTools, searchTool, simpleGreeting, timeTool, toolUsageConversation, weatherTool };
package/dist/index.d.ts CHANGED
@@ -499,10 +499,25 @@ declare function getToolByName(name: string): Tool<{
499
499
  query: string;
500
500
  }, string> | undefined;
501
501
 
502
+ /**
503
+ * Agent-like contract used by the test runner.
504
+ */
505
+ interface AgentTestAgent<TInput = unknown, TState = unknown> {
506
+ invoke(input: TInput): TState | Promise<TState>;
507
+ }
508
+ /**
509
+ * Captured runner step. The current runner preserves an empty step list, but
510
+ * this contract gives future step capture a typed state boundary.
511
+ */
512
+ interface AgentTestRunnerStep<TState = unknown> {
513
+ state: TState;
514
+ messages: BaseMessage[];
515
+ timestamp: number;
516
+ }
502
517
  /**
503
518
  * Configuration for agent test runner
504
519
  */
505
- interface AgentTestConfig {
520
+ interface AgentTestConfig<TState = unknown> {
506
521
  /**
507
522
  * Maximum time to wait for agent response (ms)
508
523
  */
@@ -518,16 +533,16 @@ interface AgentTestConfig {
518
533
  /**
519
534
  * Custom state validator
520
535
  */
521
- stateValidator?: (state: any) => boolean | Promise<boolean>;
536
+ stateValidator?: (state: TState | undefined) => boolean | Promise<boolean>;
522
537
  }
523
538
  /**
524
539
  * Result from agent test run
525
540
  */
526
- interface AgentTestResult {
541
+ interface AgentTestResult<TState = unknown, TStep = AgentTestRunnerStep<TState>> {
527
542
  /**
528
543
  * Final state after execution
529
544
  */
530
- finalState: any;
545
+ finalState: TState | undefined;
531
546
  /**
532
547
  * Messages exchanged
533
548
  */
@@ -539,7 +554,7 @@ interface AgentTestResult {
539
554
  /**
540
555
  * Intermediate steps (if captured)
541
556
  */
542
- steps?: any[];
557
+ steps?: TStep[];
543
558
  /**
544
559
  * Whether the test passed
545
560
  */
@@ -567,23 +582,23 @@ interface AgentTestResult {
567
582
  * expect(result.messages.length).toBeGreaterThan(1);
568
583
  * ```
569
584
  */
570
- declare class AgentTestRunner {
585
+ declare class AgentTestRunner<TInput = unknown, TState = unknown, TStep = AgentTestRunnerStep<TState>> {
571
586
  private agent;
572
587
  private config;
573
- constructor(agent: any, config?: AgentTestConfig);
588
+ constructor(agent: AgentTestAgent<TInput, TState>, config?: AgentTestConfig<TState>);
574
589
  /**
575
590
  * Run the agent with given input
576
591
  */
577
- run(input: any): Promise<AgentTestResult>;
592
+ run(input: TInput): Promise<AgentTestResult<TState, TStep>>;
578
593
  /**
579
594
  * Run multiple test cases
580
595
  */
581
- runMany(inputs: any[]): Promise<AgentTestResult[]>;
596
+ runMany(inputs: TInput[]): Promise<AgentTestResult<TState, TStep>[]>;
582
597
  }
583
598
  /**
584
599
  * Create an agent test runner
585
600
  */
586
- declare function createAgentTestRunner(agent: any, config?: AgentTestConfig): AgentTestRunner;
601
+ declare function createAgentTestRunner<TInput = unknown, TState = unknown, TStep = AgentTestRunnerStep<TState>>(agent: AgentTestAgent<TInput, TState>, config?: AgentTestConfig<TState>): AgentTestRunner<TInput, TState, TStep>;
587
602
 
588
603
  /**
589
604
  * Configuration for conversation simulator
@@ -741,4 +756,4 @@ declare function createStateDiff<TState1 = unknown, TState2 = unknown>(state1: T
741
756
  */
742
757
  declare function assertStateChanged<TStateBefore = unknown, TStateAfter = unknown>(stateBefore: TStateBefore, stateAfter: TStateAfter, expectedChanges: string[], config?: SnapshotConfig): void;
743
758
 
744
- export { type AgentTestConfig, type AgentTestResult, AgentTestRunner, type AssertedMessage, type ConversationResult, ConversationSimulator, type ConversationSimulatorConfig, type MessageSnapshot, MockLLM, type MockLLMConfig, type MockToolConfig, type PlanningStep, type PlanningTestState, ROOT_SNAPSHOT_DIFF_KEY, type ReActTestState, type SnapshotConfig, type SnapshotDiff, type SnapshotObject, StateBuilder, type TestToolCall, type TestToolResult, assertAlternatingMessages, assertCompletesWithin, assertHasKeys, assertInRange, assertIsMessage, assertIterationsWithinLimit, assertLastMessageContains, assertMatchesSnapshot, assertMessageContains, assertMessagesMatchSnapshot, assertNotEmpty, assertStateChanged, assertStateHasFields, assertStateSnapshot, assertThrowsWithMessage, assertToolCalled, calculatorTool, compareStates, complexReasoningConversation, createAgentTestRunner, createCalculatorTool, createConversation, createConversationSimulator, createConversationState, createConversationWithSystem, createDelayedTool, createEchoLLM, createEchoTool, createErrorLLM, createErrorTool, createMessageSnapshot, createMockLLM, createMockTool, createPlanningState, createReActState, createSnapshot, createStateBuilder, createStateDiff, databaseQueryTool, errorHandlingConversation, fileReaderTool, getToolByName, getToolsByCategory, longContextConversation, multiTurnConversation, sampleData, sampleTools, searchTool, simpleGreeting, timeTool, toolUsageConversation, weatherTool };
759
+ export { type AgentTestAgent, type AgentTestConfig, type AgentTestResult, AgentTestRunner, type AgentTestRunnerStep, type AssertedMessage, type ConversationResult, ConversationSimulator, type ConversationSimulatorConfig, type MessageSnapshot, MockLLM, type MockLLMConfig, type MockToolConfig, type PlanningStep, type PlanningTestState, ROOT_SNAPSHOT_DIFF_KEY, type ReActTestState, type SnapshotConfig, type SnapshotDiff, type SnapshotObject, StateBuilder, type TestToolCall, type TestToolResult, assertAlternatingMessages, assertCompletesWithin, assertHasKeys, assertInRange, assertIsMessage, assertIterationsWithinLimit, assertLastMessageContains, assertMatchesSnapshot, assertMessageContains, assertMessagesMatchSnapshot, assertNotEmpty, assertStateChanged, assertStateHasFields, assertStateSnapshot, assertThrowsWithMessage, assertToolCalled, calculatorTool, compareStates, complexReasoningConversation, createAgentTestRunner, createCalculatorTool, createConversation, createConversationSimulator, createConversationState, createConversationWithSystem, createDelayedTool, createEchoLLM, createEchoTool, createErrorLLM, createErrorTool, createMessageSnapshot, createMockLLM, createMockTool, createPlanningState, createReActState, createSnapshot, createStateBuilder, createStateDiff, databaseQueryTool, errorHandlingConversation, fileReaderTool, getToolByName, getToolsByCategory, longContextConversation, multiTurnConversation, sampleData, sampleTools, searchTool, simpleGreeting, timeTool, toolUsageConversation, weatherTool };
package/dist/index.js CHANGED
@@ -12806,14 +12806,14 @@ function withTimeout(fn2, timeout, isHook = false) {
12806
12806
  if (timeout <= 0 || timeout === Number.POSITIVE_INFINITY) {
12807
12807
  return fn2;
12808
12808
  }
12809
- const { setTimeout: setTimeout2, clearTimeout } = getSafeTimers();
12809
+ const { setTimeout: setTimeout2, clearTimeout: clearTimeout2 } = getSafeTimers();
12810
12810
  return function runWithTimeout(...args) {
12811
12811
  return Promise.race([
12812
12812
  fn2(...args),
12813
12813
  new Promise((resolve4, reject) => {
12814
12814
  var _a;
12815
12815
  const timer = setTimeout2(() => {
12816
- clearTimeout(timer);
12816
+ clearTimeout2(timer);
12817
12817
  reject(new Error(makeTimeoutMsg(isHook, timeout)));
12818
12818
  }, timeout);
12819
12819
  (_a = timer.unref) == null ? void 0 : _a.call(timer);
@@ -15671,9 +15671,9 @@ function createExpectPoll(expect2) {
15671
15671
  const promise = () => new Promise((resolve4, reject) => {
15672
15672
  let intervalId;
15673
15673
  let lastError;
15674
- const { setTimeout: setTimeout2, clearTimeout } = getSafeTimers();
15674
+ const { setTimeout: setTimeout2, clearTimeout: clearTimeout2 } = getSafeTimers();
15675
15675
  const timeoutId = setTimeout2(() => {
15676
- clearTimeout(intervalId);
15676
+ clearTimeout2(intervalId);
15677
15677
  reject(
15678
15678
  copyStackTrace$1(
15679
15679
  new Error(`Matcher did not succeed in ${timeout}ms`, {
@@ -15689,8 +15689,8 @@ function createExpectPoll(expect2) {
15689
15689
  const obj = await fn2();
15690
15690
  utils_exports.flag(assertion, "object", obj);
15691
15691
  resolve4(await assertionFunction.call(assertion, ...args));
15692
- clearTimeout(intervalId);
15693
- clearTimeout(timeoutId);
15692
+ clearTimeout2(intervalId);
15693
+ clearTimeout2(timeoutId);
15694
15694
  } catch (err) {
15695
15695
  lastError = err;
15696
15696
  intervalId = setTimeout2(check, interval);
@@ -17281,7 +17281,7 @@ To automatically clean-up native timers, use \`shouldClearNativeTimers\`.`
17281
17281
  });
17282
17282
  };
17283
17283
  }
17284
- clock.clearTimeout = function clearTimeout(timerId) {
17284
+ clock.clearTimeout = function clearTimeout2(timerId) {
17285
17285
  return clearTimer(clock, timerId, "Timeout");
17286
17286
  };
17287
17287
  clock.nextTick = function nextTick(func) {
@@ -17924,7 +17924,7 @@ function copyStackTrace(target, source) {
17924
17924
  return target;
17925
17925
  }
17926
17926
  function waitFor(callback, options = {}) {
17927
- const { setTimeout: setTimeout2, setInterval, clearTimeout, clearInterval } = getSafeTimers();
17927
+ const { setTimeout: setTimeout2, setInterval, clearTimeout: clearTimeout2, clearInterval } = getSafeTimers();
17928
17928
  const { interval = 50, timeout = 1e3 } = typeof options === "number" ? { timeout: options } : options;
17929
17929
  const STACK_TRACE_ERROR = new Error("STACK_TRACE_ERROR");
17930
17930
  return new Promise((resolve4, reject) => {
@@ -17934,7 +17934,7 @@ function waitFor(callback, options = {}) {
17934
17934
  let intervalId;
17935
17935
  const onResolve = (result) => {
17936
17936
  if (timeoutId) {
17937
- clearTimeout(timeoutId);
17937
+ clearTimeout2(timeoutId);
17938
17938
  }
17939
17939
  if (intervalId) {
17940
17940
  clearInterval(intervalId);
@@ -17992,7 +17992,7 @@ function waitFor(callback, options = {}) {
17992
17992
  });
17993
17993
  }
17994
17994
  function waitUntil(callback, options = {}) {
17995
- const { setTimeout: setTimeout2, setInterval, clearTimeout, clearInterval } = getSafeTimers();
17995
+ const { setTimeout: setTimeout2, setInterval, clearTimeout: clearTimeout2, clearInterval } = getSafeTimers();
17996
17996
  const { interval = 50, timeout = 1e3 } = typeof options === "number" ? { timeout: options } : options;
17997
17997
  const STACK_TRACE_ERROR = new Error("STACK_TRACE_ERROR");
17998
17998
  return new Promise((resolve4, reject) => {
@@ -18016,7 +18016,7 @@ function waitUntil(callback, options = {}) {
18016
18016
  return;
18017
18017
  }
18018
18018
  if (timeoutId) {
18019
- clearTimeout(timeoutId);
18019
+ clearTimeout2(timeoutId);
18020
18020
  }
18021
18021
  if (intervalId) {
18022
18022
  clearInterval(intervalId);
@@ -18602,20 +18602,21 @@ var AgentTestRunner = class {
18602
18602
  let messages = [];
18603
18603
  let passed = true;
18604
18604
  let error;
18605
+ let timeoutId;
18605
18606
  try {
18606
- const timeout = this.config.timeout || 3e4;
18607
+ const timeout = this.config.timeout ?? 3e4;
18607
18608
  const timeoutPromise = new Promise((_, reject) => {
18608
- setTimeout(() => reject(new Error("Agent test timeout")), timeout);
18609
+ timeoutId = setTimeout(() => reject(new Error("Agent test timeout")), timeout);
18609
18610
  });
18610
18611
  const runPromise = (async () => {
18611
18612
  if (this.config.captureSteps) {
18612
18613
  const result = await this.agent.invoke(input);
18613
18614
  finalState = result;
18614
- messages = result.messages || [];
18615
+ messages = extractMessages(result);
18615
18616
  } else {
18616
18617
  const result = await this.agent.invoke(input);
18617
18618
  finalState = result;
18618
- messages = result.messages || [];
18619
+ messages = extractMessages(result);
18619
18620
  }
18620
18621
  if (this.config.validateState && this.config.stateValidator) {
18621
18622
  const isValid = await this.config.stateValidator(finalState);
@@ -18624,7 +18625,13 @@ var AgentTestRunner = class {
18624
18625
  }
18625
18626
  }
18626
18627
  })();
18627
- await Promise.race([runPromise, timeoutPromise]);
18628
+ try {
18629
+ await Promise.race([runPromise, timeoutPromise]);
18630
+ } finally {
18631
+ if (timeoutId !== void 0) {
18632
+ clearTimeout(timeoutId);
18633
+ }
18634
+ }
18628
18635
  } catch (err) {
18629
18636
  passed = false;
18630
18637
  error = err;
@@ -18649,6 +18656,13 @@ var AgentTestRunner = class {
18649
18656
  function createAgentTestRunner(agent, config2) {
18650
18657
  return new AgentTestRunner(agent, config2);
18651
18658
  }
18659
+ function extractMessages(state) {
18660
+ if (typeof state !== "object" || state === null) {
18661
+ return [];
18662
+ }
18663
+ const { messages } = state;
18664
+ return Array.isArray(messages) ? messages : [];
18665
+ }
18652
18666
  var ConversationSimulator = class {
18653
18667
  constructor(agent, config2 = {}) {
18654
18668
  this.agent = agent;