@langwatch/scenario 0.2.13 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.mts CHANGED
@@ -1,7 +1,18 @@
1
1
  import * as ai from 'ai';
2
- import { CoreMessage, CoreUserMessage, CoreAssistantMessage, CoreToolMessage, LanguageModel } from 'ai';
3
- import { z } from 'zod';
2
+ import { CoreMessage, CoreUserMessage, CoreAssistantMessage, CoreToolMessage, LanguageModel, generateText, ModelMessage } from 'ai';
3
+ import { z } from 'zod/v4';
4
+ import { SpanProcessor, ReadableSpan } from '@opentelemetry/sdk-trace-base';
5
+ import { RealtimeSession } from '@openai/agents/realtime';
4
6
  import { Observable } from 'rxjs';
7
+ import { z as z$1 } from 'zod';
8
+
9
+ /**
10
+ * The possible return types from an agent's `call` method.
11
+ * - string | CoreMessage | CoreMessage[]: Agent generated response
12
+ * - JudgeResult: Judge made a final decision
13
+ * - null: Judge wants to continue observing (no decision yet)
14
+ */
15
+ type AgentReturnTypes = string | CoreMessage | CoreMessage[] | JudgeResult | null;
5
16
 
6
17
  declare enum AgentRole {
7
18
  USER = "User",
@@ -42,11 +53,6 @@ interface AgentInput {
42
53
  */
43
54
  scenarioConfig: ScenarioConfig;
44
55
  }
45
- /**
46
- * The possible return types from an agent's `call` method.
47
- * Can be a simple string, a single message, an array of messages, or a ScenarioResult.
48
- */
49
- type AgentReturnTypes = string | CoreMessage | CoreMessage[] | ScenarioResult;
50
56
  /**
51
57
  * Abstract base class for integrating custom agents with the Scenario framework.
52
58
  *
@@ -71,6 +77,7 @@ type AgentReturnTypes = string | CoreMessage | CoreMessage[] | ScenarioResult;
71
77
  * ```
72
78
  */
73
79
  declare abstract class AgentAdapter {
80
+ name?: string;
74
81
  role: AgentRole;
75
82
  /**
76
83
  * Process the input and generate a response.
@@ -88,33 +95,21 @@ declare abstract class AgentAdapter {
88
95
  * Abstract base class for user simulator agents.
89
96
  * User simulator agents are responsible for generating user messages to drive the conversation.
90
97
  */
91
- declare abstract class UserSimulatorAgentAdapter implements AgentAdapter {
98
+ declare abstract class UserSimulatorAgentAdapter extends AgentAdapter {
99
+ name: string;
92
100
  role: AgentRole;
93
- /**
94
- * Process the input and generate a user message.
95
- *
96
- * @param input AgentInput containing conversation history, thread context, and scenario state.
97
- * @returns The user's response.
98
- */
99
- abstract call(input: AgentInput): Promise<AgentReturnTypes>;
100
101
  }
101
102
  /**
102
103
  * Abstract base class for judge agents.
103
104
  * Judge agents are responsible for evaluating the conversation and determining success or failure.
104
105
  */
105
- declare abstract class JudgeAgentAdapter implements AgentAdapter {
106
+ declare abstract class JudgeAgentAdapter extends AgentAdapter {
107
+ name: string;
106
108
  role: AgentRole;
107
109
  /**
108
110
  * The criteria the judge will use to evaluate the conversation.
109
111
  */
110
112
  abstract criteria: string[];
111
- /**
112
- * Process the input and evaluate the conversation.
113
- *
114
- * @param input AgentInput containing conversation history, thread context, and scenario state.
115
- * @returns A ScenarioResult if the conversation should end, otherwise should continue.
116
- */
117
- abstract call(input: AgentInput): Promise<AgentReturnTypes>;
118
113
  }
119
114
 
120
115
  declare const DEFAULT_MAX_TURNS = 10;
@@ -249,7 +244,7 @@ interface ScenarioExecutionLike {
249
244
  * A step in a scenario script.
250
245
  * This is a function that takes the current state and an executor, and performs an action.
251
246
  */
252
- type ScriptStep = (state: ScenarioExecutionStateLike, executor: ScenarioExecutionLike) => Promise<void | ScenarioResult | null> | void | ScenarioResult | null;
247
+ type ScriptStep = (state: ScenarioExecutionStateLike, executor: ScenarioExecutionLike) => Promise<void> | void;
253
248
 
254
249
  /**
255
250
  * Represents the result of a scenario execution.
@@ -348,38 +343,14 @@ interface ScenarioExecutionStateLike {
348
343
  hasToolCall(toolName: string): boolean;
349
344
  }
350
345
 
351
- /** Default temperature for language model inference */
352
- declare const DEFAULT_TEMPERATURE = 0;
353
346
  declare const scenarioProjectConfigSchema: z.ZodObject<{
354
347
  defaultModel: z.ZodOptional<z.ZodObject<{
355
- model: z.ZodType<ai.LanguageModelV1, z.ZodTypeDef, ai.LanguageModelV1>;
348
+ model: z.ZodCustom<ai.LanguageModel, ai.LanguageModel>;
356
349
  temperature: z.ZodDefault<z.ZodOptional<z.ZodNumber>>;
357
350
  maxTokens: z.ZodOptional<z.ZodNumber>;
358
- }, "strip", z.ZodTypeAny, {
359
- model: ai.LanguageModelV1;
360
- temperature: number;
361
- maxTokens?: number | undefined;
362
- }, {
363
- model: ai.LanguageModelV1;
364
- temperature?: number | undefined;
365
- maxTokens?: number | undefined;
366
- }>>;
351
+ }, z.core.$strip>>;
367
352
  headless: z.ZodDefault<z.ZodOptional<z.ZodBoolean>>;
368
- }, "strict", z.ZodTypeAny, {
369
- headless: boolean;
370
- defaultModel?: {
371
- model: ai.LanguageModelV1;
372
- temperature: number;
373
- maxTokens?: number | undefined;
374
- } | undefined;
375
- }, {
376
- defaultModel?: {
377
- model: ai.LanguageModelV1;
378
- temperature?: number | undefined;
379
- maxTokens?: number | undefined;
380
- } | undefined;
381
- headless?: boolean | undefined;
382
- }>;
353
+ }, z.core.$strict>;
383
354
  type ScenarioProjectConfig = z.infer<typeof scenarioProjectConfigSchema>;
384
355
  declare function defineConfig(config: ScenarioProjectConfig): ScenarioProjectConfig;
385
356
 
@@ -390,7 +361,6 @@ type domain_AgentReturnTypes = AgentReturnTypes;
390
361
  type domain_AgentRole = AgentRole;
391
362
  declare const domain_AgentRole: typeof AgentRole;
392
363
  declare const domain_DEFAULT_MAX_TURNS: typeof DEFAULT_MAX_TURNS;
393
- declare const domain_DEFAULT_TEMPERATURE: typeof DEFAULT_TEMPERATURE;
394
364
  declare const domain_DEFAULT_VERBOSE: typeof DEFAULT_VERBOSE;
395
365
  type domain_JudgeAgentAdapter = JudgeAgentAdapter;
396
366
  declare const domain_JudgeAgentAdapter: typeof JudgeAgentAdapter;
@@ -407,32 +377,33 @@ declare const domain_allAgentRoles: typeof allAgentRoles;
407
377
  declare const domain_defineConfig: typeof defineConfig;
408
378
  declare const domain_scenarioProjectConfigSchema: typeof scenarioProjectConfigSchema;
409
379
  declare namespace domain {
410
- export { domain_AgentAdapter as AgentAdapter, type domain_AgentInput as AgentInput, type domain_AgentReturnTypes as AgentReturnTypes, domain_AgentRole as AgentRole, domain_DEFAULT_MAX_TURNS as DEFAULT_MAX_TURNS, domain_DEFAULT_TEMPERATURE as DEFAULT_TEMPERATURE, domain_DEFAULT_VERBOSE as DEFAULT_VERBOSE, domain_JudgeAgentAdapter as JudgeAgentAdapter, type domain_ScenarioConfig as ScenarioConfig, type domain_ScenarioConfigFinal as ScenarioConfigFinal, type domain_ScenarioExecutionLike as ScenarioExecutionLike, type domain_ScenarioExecutionStateLike as ScenarioExecutionStateLike, type domain_ScenarioProjectConfig as ScenarioProjectConfig, type domain_ScenarioResult as ScenarioResult, type domain_ScriptStep as ScriptStep, domain_UserSimulatorAgentAdapter as UserSimulatorAgentAdapter, domain_allAgentRoles as allAgentRoles, domain_defineConfig as defineConfig, domain_scenarioProjectConfigSchema as scenarioProjectConfigSchema };
380
+ export { domain_AgentAdapter as AgentAdapter, type domain_AgentInput as AgentInput, type domain_AgentReturnTypes as AgentReturnTypes, domain_AgentRole as AgentRole, domain_DEFAULT_MAX_TURNS as DEFAULT_MAX_TURNS, domain_DEFAULT_VERBOSE as DEFAULT_VERBOSE, domain_JudgeAgentAdapter as JudgeAgentAdapter, type domain_ScenarioConfig as ScenarioConfig, type domain_ScenarioConfigFinal as ScenarioConfigFinal, type domain_ScenarioExecutionLike as ScenarioExecutionLike, type domain_ScenarioExecutionStateLike as ScenarioExecutionStateLike, type domain_ScenarioProjectConfig as ScenarioProjectConfig, type domain_ScenarioResult as ScenarioResult, type domain_ScriptStep as ScriptStep, domain_UserSimulatorAgentAdapter as UserSimulatorAgentAdapter, domain_allAgentRoles as allAgentRoles, domain_defineConfig as defineConfig, domain_scenarioProjectConfigSchema as scenarioProjectConfigSchema };
411
381
  }
412
382
 
413
383
  /**
414
- * Configuration for the inference parameters of a testing agent.
384
+ * Schema for a language model.
415
385
  */
416
- interface TestingAgentInferenceConfig {
417
- /**
418
- * The language model to use for generating responses.
419
- * If not provided, a default model will be used.
420
- */
421
- model?: LanguageModel;
422
- /**
423
- * The temperature for the language model.
424
- * Defaults to 0.
425
- */
426
- temperature?: number;
427
- /**
428
- * The maximum number of tokens to generate.
429
- */
430
- maxTokens?: number;
431
- }
386
+ declare const modelSchema: z.ZodObject<{
387
+ model: z.ZodCustom<LanguageModel, LanguageModel>;
388
+ temperature: z.ZodDefault<z.ZodOptional<z.ZodNumber>>;
389
+ maxTokens: z.ZodOptional<z.ZodNumber>;
390
+ }, z.core.$strip>;
391
+ type ModelConfig = z.infer<typeof modelSchema>;
392
+
393
+ /**
394
+ * Parameters for LLM invocation.
395
+ * Derived from generateText parameters for now.
396
+ */
397
+ type InvokeLLMParams = Parameters<typeof generateText>[0];
398
+ /**
399
+ * Result from LLM invocation.
400
+ * Derived from generateText return type for now.
401
+ */
402
+ type InvokeLLMResult = Pick<Awaited<ReturnType<typeof generateText>>, "text" | "content" | "toolCalls" | "toolResults">;
432
403
  /**
433
404
  * General configuration for a testing agent.
434
405
  */
435
- interface TestingAgentConfig extends TestingAgentInferenceConfig {
406
+ interface TestingAgentConfig extends Partial<ModelConfig> {
436
407
  /**
437
408
  * The name of the agent.
438
409
  */
@@ -464,6 +435,35 @@ interface FinishTestArgs {
464
435
  verdict: "success" | "failure" | "inconclusive";
465
436
  }
466
437
 
438
+ interface JudgeResult {
439
+ success: boolean;
440
+ reasoning: string;
441
+ metCriteria: string[];
442
+ unmetCriteria: string[];
443
+ }
444
+
445
+ /**
446
+ * Collects OpenTelemetry spans for judge evaluation.
447
+ * Implements SpanProcessor to intercept spans as they complete.
448
+ */
449
+ declare class JudgeSpanCollector implements SpanProcessor {
450
+ private spans;
451
+ onStart(): void;
452
+ onEnd(span: ReadableSpan): void;
453
+ forceFlush(): Promise<void>;
454
+ shutdown(): Promise<void>;
455
+ /**
456
+ * Retrieves all spans associated with a specific thread.
457
+ * @param threadId - The thread identifier to filter spans by
458
+ * @returns Array of spans for the given thread
459
+ */
460
+ getSpansForThread(threadId: string): ReadableSpan[];
461
+ }
462
+ /**
463
+ * Singleton instance of the judge span collector.
464
+ */
465
+ declare const judgeSpanCollector: JudgeSpanCollector;
466
+
467
467
  /**
468
468
  * Configuration for the judge agent.
469
469
  */
@@ -476,6 +476,10 @@ interface JudgeAgentConfig extends TestingAgentConfig {
476
476
  * The criteria that the judge will use to evaluate the conversation.
477
477
  */
478
478
  criteria: string[];
479
+ /**
480
+ * Optional span collector for telemetry. Defaults to global singleton.
481
+ */
482
+ spanCollector?: JudgeSpanCollector;
479
483
  }
480
484
  /**
481
485
  * Agent that evaluates conversations against success criteria.
@@ -489,17 +493,16 @@ interface JudgeAgentConfig extends TestingAgentConfig {
489
493
  declare class JudgeAgent extends JudgeAgentAdapter {
490
494
  private readonly cfg;
491
495
  private logger;
496
+ private readonly spanCollector;
492
497
  role: AgentRole;
493
498
  criteria: string[];
499
+ /**
500
+ * LLM invocation function. Can be overridden to customize LLM behavior.
501
+ */
502
+ invokeLLM: (params: InvokeLLMParams) => Promise<InvokeLLMResult>;
494
503
  constructor(cfg: JudgeAgentConfig);
495
- call(input: AgentInput): Promise<never[] | {
496
- success: boolean;
497
- messages: CoreMessage[];
498
- reasoning: string;
499
- metCriteria: string[];
500
- unmetCriteria: string[];
501
- }>;
502
- private generateText;
504
+ call(input: AgentInput): Promise<JudgeResult | null>;
505
+ private getOpenTelemetryTracesDigest;
503
506
  }
504
507
  /**
505
508
  * Factory function for creating JudgeAgent instances.
@@ -553,15 +556,54 @@ declare class JudgeAgent extends JudgeAgentAdapter {
553
556
  */
554
557
  declare const judgeAgent: (cfg: JudgeAgentConfig) => JudgeAgent;
555
558
 
559
+ /**
560
+ * Transforms OpenTelemetry spans into a complete plain-text digest for judge evaluation.
561
+ * Deduplicates repeated string content to reduce token usage.
562
+ */
563
+ declare class JudgeSpanDigestFormatter {
564
+ private readonly logger;
565
+ private readonly deduplicator;
566
+ /**
567
+ * Formats spans into a complete digest with full content and nesting.
568
+ * @param spans - All spans for a thread
569
+ * @returns Plain text digest
570
+ */
571
+ format(spans: ReadableSpan[]): string;
572
+ private sortByStartTime;
573
+ private buildHierarchy;
574
+ private renderNode;
575
+ private getTreePrefix;
576
+ private getAttrIndent;
577
+ private cleanAttributes;
578
+ private formatValue;
579
+ private transformValue;
580
+ private transformString;
581
+ private looksLikeJson;
582
+ private hrTimeToMs;
583
+ private calculateSpanDuration;
584
+ private calculateTotalDuration;
585
+ private formatDuration;
586
+ private formatTimestamp;
587
+ private getStatusIndicator;
588
+ private collectErrors;
589
+ }
590
+ /**
591
+ * Singleton instance for convenience.
592
+ */
593
+ declare const judgeSpanDigestFormatter: JudgeSpanDigestFormatter;
594
+
556
595
  declare class UserSimulatorAgent extends UserSimulatorAgentAdapter {
557
596
  private readonly cfg?;
558
597
  private logger;
598
+ /**
599
+ * LLM invocation function. Can be overridden to customize LLM behavior.
600
+ */
601
+ invokeLLM: (params: InvokeLLMParams) => Promise<InvokeLLMResult>;
559
602
  constructor(cfg?: TestingAgentConfig | undefined);
560
603
  call: (input: AgentInput) => Promise<{
561
604
  role: "user";
562
605
  content: string;
563
606
  }>;
564
- private generateText;
565
607
  }
566
608
  /**
567
609
  * Agent that simulates realistic user behavior in scenario conversations.
@@ -654,14 +696,169 @@ declare class UserSimulatorAgent extends UserSimulatorAgentAdapter {
654
696
  */
655
697
  declare const userSimulatorAgent: (config?: TestingAgentConfig) => UserSimulatorAgent;
656
698
 
699
+ /**
700
+ * Event emitted when an audio response is completed
701
+ */
702
+ interface AudioResponseEvent {
703
+ transcript: string;
704
+ audio: string;
705
+ }
706
+
707
+ /**
708
+ * Realtime Agent Adapter for Scenario Testing
709
+ *
710
+ * Adapts a connected RealtimeSession to the Scenario framework interface.
711
+ * The session must be created and connected before passing to this adapter.
712
+ *
713
+ * This ensures we test the REAL agent, not a mock, using the same session
714
+ * creation pattern as the browser client.
715
+ */
716
+
717
+ /**
718
+ * Configuration for RealtimeAgentAdapter
719
+ */
720
+ interface RealtimeAgentAdapterConfig {
721
+ /**
722
+ * The role of the agent
723
+ */
724
+ role: AgentRole;
725
+ /**
726
+ * A connected RealtimeSession instance
727
+ *
728
+ * The session should be created using your agent's session creator function
729
+ * and connected before passing to this adapter.
730
+ *
731
+ * @example
732
+ * ```typescript
733
+ * const session = createVegetarianRecipeSession();
734
+ * await session.connect({ apiKey: process.env.OPENAI_API_KEY });
735
+ * const adapter = new RealtimeAgentAdapter({
736
+ * session,
737
+ * role: AgentRole.AGENT,
738
+ * agentName: "Vegetarian Recipe Assistant"
739
+ * });
740
+ * ```
741
+ */
742
+ session: RealtimeSession;
743
+ /**
744
+ * Name of the agent (for logging/identification)
745
+ */
746
+ agentName: string;
747
+ /**
748
+ * Timeout for waiting for agent response (ms)
749
+ * @default 30000
750
+ */
751
+ responseTimeout?: number;
752
+ }
753
+ /**
754
+ * Adapter that connects Scenario testing framework to OpenAI Realtime API
755
+ *
756
+ * This adapter wraps a connected RealtimeSession to provide the Scenario
757
+ * framework interface. The session must be created and connected externally,
758
+ * ensuring the same session creation pattern is used in both browser and tests.
759
+ *
760
+ * @example
761
+ * ```typescript
762
+ * // In beforeAll
763
+ * const session = createVegetarianRecipeSession();
764
+ * await session.connect({ apiKey: process.env.OPENAI_API_KEY });
765
+ * const adapter = new RealtimeAgentAdapter({
766
+ * session,
767
+ * role: AgentRole.AGENT
768
+ * });
769
+ *
770
+ * // In test
771
+ * await scenario.run({
772
+ * agents: [adapter, scenario.userSimulatorAgent()],
773
+ * script: [scenario.user("quick recipe"), scenario.agent()]
774
+ * });
775
+ *
776
+ * // In afterAll
777
+ * session.close();
778
+ * ```
779
+ */
780
+ declare class RealtimeAgentAdapter extends AgentAdapter {
781
+ private config;
782
+ role: AgentRole;
783
+ name: string;
784
+ private session;
785
+ private eventHandler;
786
+ private messageProcessor;
787
+ private responseFormatter;
788
+ private audioEvents;
789
+ /**
790
+ * Creates a new RealtimeAgentAdapter instance
791
+ *
792
+ * The session can be either connected or unconnected.
793
+ * If unconnected, call connect() with an API key before use.
794
+ *
795
+ * @param config - Configuration for the realtime agent adapter
796
+ */
797
+ constructor(config: RealtimeAgentAdapterConfig);
798
+ /**
799
+ * Get the connect method from the session
800
+ */
801
+ connect(params?: Parameters<RealtimeSession["connect"]>[0] | undefined): Promise<void>;
802
+ /**
803
+ * Closes the session connection
804
+ */
805
+ disconnect(): Promise<void>;
806
+ /**
807
+ * Process input and generate response (implements AgentAdapter interface)
808
+ *
809
+ * This is called by Scenario framework for each agent turn.
810
+ * Handles both text and audio input, returns audio message with transcript.
811
+ *
812
+ * @param input - Scenario agent input with message history
813
+ * @returns Agent response as audio message or text
814
+ */
815
+ call(input: AgentInput): Promise<AgentReturnTypes>;
816
+ /**
817
+ * Handles the initial response when no user message exists
818
+ */
819
+ private handleInitialResponse;
820
+ /**
821
+ * Handles audio input from the user
822
+ */
823
+ private handleAudioInput;
824
+ /**
825
+ * Handles text input from the user
826
+ */
827
+ private handleTextInput;
828
+ /**
829
+ * Subscribe to audio response events
830
+ *
831
+ * @param callback - Function called when an audio response completes
832
+ */
833
+ onAudioResponse(callback: (event: AudioResponseEvent) => void): void;
834
+ /**
835
+ * Remove audio response listener
836
+ *
837
+ * @param callback - The callback function to remove
838
+ */
839
+ offAudioResponse(callback: (event: AudioResponseEvent) => void): void;
840
+ }
841
+
842
+ type agents_AudioResponseEvent = AudioResponseEvent;
657
843
  type agents_FinishTestArgs = FinishTestArgs;
844
+ type agents_InvokeLLMParams = InvokeLLMParams;
845
+ type agents_InvokeLLMResult = InvokeLLMResult;
658
846
  type agents_JudgeAgentConfig = JudgeAgentConfig;
847
+ type agents_JudgeResult = JudgeResult;
848
+ type agents_JudgeSpanCollector = JudgeSpanCollector;
849
+ declare const agents_JudgeSpanCollector: typeof JudgeSpanCollector;
850
+ type agents_JudgeSpanDigestFormatter = JudgeSpanDigestFormatter;
851
+ declare const agents_JudgeSpanDigestFormatter: typeof JudgeSpanDigestFormatter;
852
+ type agents_RealtimeAgentAdapter = RealtimeAgentAdapter;
853
+ declare const agents_RealtimeAgentAdapter: typeof RealtimeAgentAdapter;
854
+ type agents_RealtimeAgentAdapterConfig = RealtimeAgentAdapterConfig;
659
855
  type agents_TestingAgentConfig = TestingAgentConfig;
660
- type agents_TestingAgentInferenceConfig = TestingAgentInferenceConfig;
661
856
  declare const agents_judgeAgent: typeof judgeAgent;
857
+ declare const agents_judgeSpanCollector: typeof judgeSpanCollector;
858
+ declare const agents_judgeSpanDigestFormatter: typeof judgeSpanDigestFormatter;
662
859
  declare const agents_userSimulatorAgent: typeof userSimulatorAgent;
663
860
  declare namespace agents {
664
- export { type agents_FinishTestArgs as FinishTestArgs, type agents_JudgeAgentConfig as JudgeAgentConfig, type agents_TestingAgentConfig as TestingAgentConfig, type agents_TestingAgentInferenceConfig as TestingAgentInferenceConfig, agents_judgeAgent as judgeAgent, agents_userSimulatorAgent as userSimulatorAgent };
861
+ export { type agents_AudioResponseEvent as AudioResponseEvent, type agents_FinishTestArgs as FinishTestArgs, type agents_InvokeLLMParams as InvokeLLMParams, type agents_InvokeLLMResult as InvokeLLMResult, type agents_JudgeAgentConfig as JudgeAgentConfig, type agents_JudgeResult as JudgeResult, agents_JudgeSpanCollector as JudgeSpanCollector, agents_JudgeSpanDigestFormatter as JudgeSpanDigestFormatter, agents_RealtimeAgentAdapter as RealtimeAgentAdapter, type agents_RealtimeAgentAdapterConfig as RealtimeAgentAdapterConfig, type agents_TestingAgentConfig as TestingAgentConfig, agents_judgeAgent as judgeAgent, agents_judgeSpanCollector as judgeSpanCollector, agents_judgeSpanDigestFormatter as judgeSpanDigestFormatter, agents_userSimulatorAgent as userSimulatorAgent };
665
862
  }
666
863
 
667
864
  /**
@@ -690,26 +887,26 @@ declare enum ScenarioRunStatus {
690
887
  * Discriminated union of all possible scenario event types.
691
888
  * Enables type-safe handling of different event types based on the 'type' field.
692
889
  */
693
- declare const scenarioEventSchema: z.ZodDiscriminatedUnion<"type", [z.ZodObject<{
694
- timestamp: z.ZodNumber;
695
- rawEvent: z.ZodOptional<z.ZodAny>;
696
- batchRunId: z.ZodString;
697
- scenarioId: z.ZodString;
698
- scenarioRunId: z.ZodString;
699
- scenarioSetId: z.ZodDefault<z.ZodOptional<z.ZodString>>;
890
+ declare const scenarioEventSchema: z$1.ZodDiscriminatedUnion<"type", [z$1.ZodObject<{
891
+ timestamp: z$1.ZodNumber;
892
+ rawEvent: z$1.ZodOptional<z$1.ZodAny>;
893
+ batchRunId: z$1.ZodString;
894
+ scenarioId: z$1.ZodString;
895
+ scenarioRunId: z$1.ZodString;
896
+ scenarioSetId: z$1.ZodDefault<z$1.ZodOptional<z$1.ZodString>>;
700
897
  } & {
701
- type: z.ZodLiteral<ScenarioEventType.RUN_STARTED>;
702
- metadata: z.ZodObject<{
703
- name: z.ZodOptional<z.ZodString>;
704
- description: z.ZodOptional<z.ZodString>;
705
- }, "strip", z.ZodTypeAny, {
706
- name?: string | undefined;
898
+ type: z$1.ZodLiteral<ScenarioEventType.RUN_STARTED>;
899
+ metadata: z$1.ZodObject<{
900
+ name: z$1.ZodOptional<z$1.ZodString>;
901
+ description: z$1.ZodOptional<z$1.ZodString>;
902
+ }, "strip", z$1.ZodTypeAny, {
707
903
  description?: string | undefined;
708
- }, {
709
904
  name?: string | undefined;
905
+ }, {
710
906
  description?: string | undefined;
907
+ name?: string | undefined;
711
908
  }>;
712
- }, "strip", z.ZodTypeAny, {
909
+ }, "strip", z$1.ZodTypeAny, {
713
910
  type: ScenarioEventType.RUN_STARTED;
714
911
  timestamp: number;
715
912
  batchRunId: string;
@@ -717,8 +914,8 @@ declare const scenarioEventSchema: z.ZodDiscriminatedUnion<"type", [z.ZodObject<
717
914
  scenarioRunId: string;
718
915
  scenarioSetId: string;
719
916
  metadata: {
720
- name?: string | undefined;
721
917
  description?: string | undefined;
918
+ name?: string | undefined;
722
919
  };
723
920
  rawEvent?: any;
724
921
  }, {
@@ -728,41 +925,41 @@ declare const scenarioEventSchema: z.ZodDiscriminatedUnion<"type", [z.ZodObject<
728
925
  scenarioId: string;
729
926
  scenarioRunId: string;
730
927
  metadata: {
731
- name?: string | undefined;
732
928
  description?: string | undefined;
929
+ name?: string | undefined;
733
930
  };
734
931
  rawEvent?: any;
735
932
  scenarioSetId?: string | undefined;
736
- }>, z.ZodObject<{
737
- timestamp: z.ZodNumber;
738
- rawEvent: z.ZodOptional<z.ZodAny>;
739
- batchRunId: z.ZodString;
740
- scenarioId: z.ZodString;
741
- scenarioRunId: z.ZodString;
742
- scenarioSetId: z.ZodDefault<z.ZodOptional<z.ZodString>>;
933
+ }>, z$1.ZodObject<{
934
+ timestamp: z$1.ZodNumber;
935
+ rawEvent: z$1.ZodOptional<z$1.ZodAny>;
936
+ batchRunId: z$1.ZodString;
937
+ scenarioId: z$1.ZodString;
938
+ scenarioRunId: z$1.ZodString;
939
+ scenarioSetId: z$1.ZodDefault<z$1.ZodOptional<z$1.ZodString>>;
743
940
  } & {
744
- type: z.ZodLiteral<ScenarioEventType.RUN_FINISHED>;
745
- status: z.ZodNativeEnum<typeof ScenarioRunStatus>;
746
- results: z.ZodNullable<z.ZodOptional<z.ZodObject<{
747
- verdict: z.ZodNativeEnum<typeof Verdict>;
748
- reasoning: z.ZodOptional<z.ZodString>;
749
- metCriteria: z.ZodArray<z.ZodString, "many">;
750
- unmetCriteria: z.ZodArray<z.ZodString, "many">;
751
- error: z.ZodOptional<z.ZodString>;
752
- }, "strip", z.ZodTypeAny, {
941
+ type: z$1.ZodLiteral<ScenarioEventType.RUN_FINISHED>;
942
+ status: z$1.ZodNativeEnum<typeof ScenarioRunStatus>;
943
+ results: z$1.ZodNullable<z$1.ZodOptional<z$1.ZodObject<{
944
+ verdict: z$1.ZodNativeEnum<typeof Verdict>;
945
+ reasoning: z$1.ZodOptional<z$1.ZodString>;
946
+ metCriteria: z$1.ZodArray<z$1.ZodString, "many">;
947
+ unmetCriteria: z$1.ZodArray<z$1.ZodString, "many">;
948
+ error: z$1.ZodOptional<z$1.ZodString>;
949
+ }, "strip", z$1.ZodTypeAny, {
753
950
  verdict: Verdict;
754
951
  metCriteria: string[];
755
952
  unmetCriteria: string[];
756
- reasoning?: string | undefined;
757
953
  error?: string | undefined;
954
+ reasoning?: string | undefined;
758
955
  }, {
759
956
  verdict: Verdict;
760
957
  metCriteria: string[];
761
958
  unmetCriteria: string[];
762
- reasoning?: string | undefined;
763
959
  error?: string | undefined;
960
+ reasoning?: string | undefined;
764
961
  }>>>;
765
- }, "strip", z.ZodTypeAny, {
962
+ }, "strip", z$1.ZodTypeAny, {
766
963
  type: ScenarioEventType.RUN_FINISHED;
767
964
  status: ScenarioRunStatus;
768
965
  timestamp: number;
@@ -775,8 +972,8 @@ declare const scenarioEventSchema: z.ZodDiscriminatedUnion<"type", [z.ZodObject<
775
972
  verdict: Verdict;
776
973
  metCriteria: string[];
777
974
  unmetCriteria: string[];
778
- reasoning?: string | undefined;
779
975
  error?: string | undefined;
976
+ reasoning?: string | undefined;
780
977
  } | null | undefined;
781
978
  }, {
782
979
  type: ScenarioEventType.RUN_FINISHED;
@@ -791,19 +988,19 @@ declare const scenarioEventSchema: z.ZodDiscriminatedUnion<"type", [z.ZodObject<
791
988
  verdict: Verdict;
792
989
  metCriteria: string[];
793
990
  unmetCriteria: string[];
794
- reasoning?: string | undefined;
795
991
  error?: string | undefined;
992
+ reasoning?: string | undefined;
796
993
  } | null | undefined;
797
- }>, z.ZodObject<{
798
- messages: z.ZodArray<z.ZodDiscriminatedUnion<"role", [z.ZodObject<z.objectUtil.extendShape<{
799
- id: z.ZodString;
800
- role: z.ZodString;
801
- content: z.ZodOptional<z.ZodString>;
802
- name: z.ZodOptional<z.ZodString>;
994
+ }>, z$1.ZodObject<{
995
+ messages: z$1.ZodArray<z$1.ZodDiscriminatedUnion<"role", [z$1.ZodObject<z$1.objectUtil.extendShape<{
996
+ id: z$1.ZodString;
997
+ role: z$1.ZodString;
998
+ content: z$1.ZodOptional<z$1.ZodString>;
999
+ name: z$1.ZodOptional<z$1.ZodString>;
803
1000
  }, {
804
- role: z.ZodLiteral<"developer">;
805
- content: z.ZodString;
806
- }>, "strip", z.ZodTypeAny, {
1001
+ role: z$1.ZodLiteral<"developer">;
1002
+ content: z$1.ZodString;
1003
+ }>, "strip", z$1.ZodTypeAny, {
807
1004
  id: string;
808
1005
  role: "developer";
809
1006
  content: string;
@@ -813,15 +1010,15 @@ declare const scenarioEventSchema: z.ZodDiscriminatedUnion<"type", [z.ZodObject<
813
1010
  role: "developer";
814
1011
  content: string;
815
1012
  name?: string | undefined;
816
- }>, z.ZodObject<z.objectUtil.extendShape<{
817
- id: z.ZodString;
818
- role: z.ZodString;
819
- content: z.ZodOptional<z.ZodString>;
820
- name: z.ZodOptional<z.ZodString>;
1013
+ }>, z$1.ZodObject<z$1.objectUtil.extendShape<{
1014
+ id: z$1.ZodString;
1015
+ role: z$1.ZodString;
1016
+ content: z$1.ZodOptional<z$1.ZodString>;
1017
+ name: z$1.ZodOptional<z$1.ZodString>;
821
1018
  }, {
822
- role: z.ZodLiteral<"system">;
823
- content: z.ZodString;
824
- }>, "strip", z.ZodTypeAny, {
1019
+ role: z$1.ZodLiteral<"system">;
1020
+ content: z$1.ZodString;
1021
+ }>, "strip", z$1.ZodTypeAny, {
825
1022
  id: string;
826
1023
  role: "system";
827
1024
  content: string;
@@ -831,28 +1028,28 @@ declare const scenarioEventSchema: z.ZodDiscriminatedUnion<"type", [z.ZodObject<
831
1028
  role: "system";
832
1029
  content: string;
833
1030
  name?: string | undefined;
834
- }>, z.ZodObject<z.objectUtil.extendShape<{
835
- id: z.ZodString;
836
- role: z.ZodString;
837
- content: z.ZodOptional<z.ZodString>;
838
- name: z.ZodOptional<z.ZodString>;
1031
+ }>, z$1.ZodObject<z$1.objectUtil.extendShape<{
1032
+ id: z$1.ZodString;
1033
+ role: z$1.ZodString;
1034
+ content: z$1.ZodOptional<z$1.ZodString>;
1035
+ name: z$1.ZodOptional<z$1.ZodString>;
839
1036
  }, {
840
- role: z.ZodLiteral<"assistant">;
841
- content: z.ZodOptional<z.ZodString>;
842
- toolCalls: z.ZodOptional<z.ZodArray<z.ZodObject<{
843
- id: z.ZodString;
844
- type: z.ZodLiteral<"function">;
845
- function: z.ZodObject<{
846
- name: z.ZodString;
847
- arguments: z.ZodString;
848
- }, "strip", z.ZodTypeAny, {
1037
+ role: z$1.ZodLiteral<"assistant">;
1038
+ content: z$1.ZodOptional<z$1.ZodString>;
1039
+ toolCalls: z$1.ZodOptional<z$1.ZodArray<z$1.ZodObject<{
1040
+ id: z$1.ZodString;
1041
+ type: z$1.ZodLiteral<"function">;
1042
+ function: z$1.ZodObject<{
1043
+ name: z$1.ZodString;
1044
+ arguments: z$1.ZodString;
1045
+ }, "strip", z$1.ZodTypeAny, {
849
1046
  name: string;
850
1047
  arguments: string;
851
1048
  }, {
852
1049
  name: string;
853
1050
  arguments: string;
854
1051
  }>;
855
- }, "strip", z.ZodTypeAny, {
1052
+ }, "strip", z$1.ZodTypeAny, {
856
1053
  function: {
857
1054
  name: string;
858
1055
  arguments: string;
@@ -867,7 +1064,7 @@ declare const scenarioEventSchema: z.ZodDiscriminatedUnion<"type", [z.ZodObject<
867
1064
  type: "function";
868
1065
  id: string;
869
1066
  }>, "many">>;
870
- }>, "strip", z.ZodTypeAny, {
1067
+ }>, "strip", z$1.ZodTypeAny, {
871
1068
  id: string;
872
1069
  role: "assistant";
873
1070
  name?: string | undefined;
@@ -893,15 +1090,15 @@ declare const scenarioEventSchema: z.ZodDiscriminatedUnion<"type", [z.ZodObject<
893
1090
  type: "function";
894
1091
  id: string;
895
1092
  }[] | undefined;
896
- }>, z.ZodObject<z.objectUtil.extendShape<{
897
- id: z.ZodString;
898
- role: z.ZodString;
899
- content: z.ZodOptional<z.ZodString>;
900
- name: z.ZodOptional<z.ZodString>;
1093
+ }>, z$1.ZodObject<z$1.objectUtil.extendShape<{
1094
+ id: z$1.ZodString;
1095
+ role: z$1.ZodString;
1096
+ content: z$1.ZodOptional<z$1.ZodString>;
1097
+ name: z$1.ZodOptional<z$1.ZodString>;
901
1098
  }, {
902
- role: z.ZodLiteral<"user">;
903
- content: z.ZodString;
904
- }>, "strip", z.ZodTypeAny, {
1099
+ role: z$1.ZodLiteral<"user">;
1100
+ content: z$1.ZodString;
1101
+ }>, "strip", z$1.ZodTypeAny, {
905
1102
  id: string;
906
1103
  role: "user";
907
1104
  content: string;
@@ -911,12 +1108,12 @@ declare const scenarioEventSchema: z.ZodDiscriminatedUnion<"type", [z.ZodObject<
911
1108
  role: "user";
912
1109
  content: string;
913
1110
  name?: string | undefined;
914
- }>, z.ZodObject<{
915
- id: z.ZodString;
916
- content: z.ZodString;
917
- role: z.ZodLiteral<"tool">;
918
- toolCallId: z.ZodString;
919
- }, "strip", z.ZodTypeAny, {
1111
+ }>, z$1.ZodObject<{
1112
+ id: z$1.ZodString;
1113
+ content: z$1.ZodString;
1114
+ role: z$1.ZodLiteral<"tool">;
1115
+ toolCallId: z$1.ZodString;
1116
+ }, "strip", z$1.ZodTypeAny, {
920
1117
  id: string;
921
1118
  role: "tool";
922
1119
  content: string;
@@ -928,14 +1125,14 @@ declare const scenarioEventSchema: z.ZodDiscriminatedUnion<"type", [z.ZodObject<
928
1125
  toolCallId: string;
929
1126
  }>]>, "many">;
930
1127
  } & {
931
- timestamp: z.ZodNumber;
932
- rawEvent: z.ZodOptional<z.ZodAny>;
933
- batchRunId: z.ZodString;
934
- scenarioId: z.ZodString;
935
- scenarioRunId: z.ZodString;
936
- scenarioSetId: z.ZodDefault<z.ZodOptional<z.ZodString>>;
937
- type: z.ZodLiteral<ScenarioEventType.MESSAGE_SNAPSHOT>;
938
- }, "strip", z.ZodTypeAny, {
1128
+ timestamp: z$1.ZodNumber;
1129
+ rawEvent: z$1.ZodOptional<z$1.ZodAny>;
1130
+ batchRunId: z$1.ZodString;
1131
+ scenarioId: z$1.ZodString;
1132
+ scenarioRunId: z$1.ZodString;
1133
+ scenarioSetId: z$1.ZodDefault<z$1.ZodOptional<z$1.ZodString>>;
1134
+ type: z$1.ZodLiteral<ScenarioEventType.MESSAGE_SNAPSHOT>;
1135
+ }, "strip", z$1.ZodTypeAny, {
939
1136
  type: ScenarioEventType.MESSAGE_SNAPSHOT;
940
1137
  messages: ({
941
1138
  id: string;
@@ -1020,7 +1217,7 @@ declare const scenarioEventSchema: z.ZodDiscriminatedUnion<"type", [z.ZodObject<
1020
1217
  rawEvent?: any;
1021
1218
  scenarioSetId?: string | undefined;
1022
1219
  }>]>;
1023
- type ScenarioEvent = z.infer<typeof scenarioEventSchema>;
1220
+ type ScenarioEvent = z$1.infer<typeof scenarioEventSchema>;
1024
1221
 
1025
1222
  /**
1026
1223
  * Manages the execution of a single scenario test.
@@ -1107,8 +1304,12 @@ type ScenarioEvent = z.infer<typeof scenarioEventSchema>;
1107
1304
  * ```
1108
1305
  */
1109
1306
  declare class ScenarioExecution implements ScenarioExecutionLike {
1307
+ /** LangWatch tracer for scenario execution */
1308
+ private tracer;
1110
1309
  /** The current state of the scenario execution */
1111
1310
  private state;
1311
+ /** The final result of the scenario execution, set when a conclusion is reached */
1312
+ private _result?;
1112
1313
  /** Logger for debugging and monitoring */
1113
1314
  private logger;
1114
1315
  /** Finalized configuration with all defaults applied */
@@ -1127,10 +1328,10 @@ declare class ScenarioExecution implements ScenarioExecutionLike {
1127
1328
  * Key: agent index, Value: array of pending messages for that agent
1128
1329
  */
1129
1330
  private pendingMessages;
1130
- /** Intermediate result set by agents that make final decisions */
1131
- private partialResult;
1132
1331
  /** Accumulated execution time for each agent (for performance tracking) */
1133
1332
  private agentTimes;
1333
+ /** Current turn span for trace context management */
1334
+ private currentTurnSpan?;
1134
1335
  /** Timestamp when execution started (for total time calculation) */
1135
1336
  private totalStartTime;
1136
1337
  /** Event stream for monitoring scenario progress */
@@ -1155,9 +1356,9 @@ declare class ScenarioExecution implements ScenarioExecutionLike {
1155
1356
  /**
1156
1357
  * Gets the complete conversation history as an array of messages.
1157
1358
  *
1158
- * @returns Array of CoreMessage objects representing the full conversation
1359
+ * @returns Array of ModelMessage objects representing the full conversation
1159
1360
  */
1160
- get messages(): CoreMessage[];
1361
+ get messages(): ModelMessage[];
1161
1362
  /**
1162
1363
  * Gets the unique identifier for the conversation thread.
1163
1364
  * This ID is used to maintain conversation context across multiple runs.
@@ -1165,6 +1366,20 @@ declare class ScenarioExecution implements ScenarioExecutionLike {
1165
1366
  * @returns The thread identifier string
1166
1367
  */
1167
1368
  get threadId(): string;
1369
+ /**
1370
+ * Gets the result of the scenario execution if it has been set.
1371
+ *
1372
+ * @returns The scenario result or undefined if not yet set
1373
+ */
1374
+ get result(): ScenarioResult | undefined;
1375
+ /**
1376
+ * Sets the result of the scenario execution.
1377
+ * This is called when the scenario reaches a conclusion (success or failure).
1378
+ * Automatically includes messages, totalTime, and agentTime from the current execution context.
1379
+ *
1380
+ * @param result - The final scenario result (without messages/timing, which will be added automatically)
1381
+ */
1382
+ private setResult;
1168
1383
  /**
1169
1384
  * The total elapsed time for the scenario execution.
1170
1385
  */
@@ -1207,30 +1422,25 @@ declare class ScenarioExecution implements ScenarioExecutionLike {
1207
1422
  * - Progress to the next turn if needed
1208
1423
  * - Find the next agent that should act
1209
1424
  * - Execute that agent's response
1210
- * - Return either new messages or a final scenario result
1425
+ * - Set the result if the scenario concludes
1211
1426
  *
1212
1427
  * Note: This method is primarily for debugging or custom execution flows. Most users
1213
1428
  * will use `execute()` to run the entire scenario automatically.
1214
1429
  *
1215
- * @returns A promise that resolves with either:
1216
- * - Array of new messages added during the agent interaction, or
1217
- * - A final ScenarioResult if the interaction concludes the scenario
1218
- * @throws Error if no result is returned from the step
1430
+ * After calling this method, check `this.result` to see if the scenario has concluded.
1219
1431
  *
1220
1432
  * @example
1221
1433
  * ```typescript
1222
1434
  * const execution = new ScenarioExecution(config, script);
1223
1435
  *
1224
1436
  * // Execute one agent interaction at a time
1225
- * const messages = await execution.step();
1226
- * if (Array.isArray(messages)) {
1227
- * console.log('New messages:', messages);
1228
- * } else {
1229
- * console.log('Scenario finished:', messages.success);
1437
+ * await execution.step();
1438
+ * if (execution.result) {
1439
+ * console.log('Scenario finished:', execution.result.success);
1230
1440
  * }
1231
1441
  * ```
1232
1442
  */
1233
- step(): Promise<CoreMessage[] | ScenarioResult>;
1443
+ step(): Promise<void>;
1234
1444
  private _step;
1235
1445
  /**
1236
1446
  * Calls a specific agent to generate a response or make a decision.
@@ -1249,15 +1459,12 @@ declare class ScenarioExecution implements ScenarioExecutionLike {
1249
1459
  * After the agent responds:
1250
1460
  * - Performance timing is recorded
1251
1461
  * - Pending messages for this agent are cleared (they've been processed)
1252
- * - If the agent returns a ScenarioResult, it's returned immediately
1462
+ * - If the agent returns a ScenarioResult, it's set on this.result
1253
1463
  * - Otherwise, the agent's messages are added to the conversation and broadcast
1254
1464
  *
1255
1465
  * @param idx - The index of the agent in the agents array
1256
1466
  * @param role - The role the agent is being asked to play (USER, AGENT, or JUDGE)
1257
1467
  * @param judgmentRequest - Whether this is a judgment request (for judge agents)
1258
- * @returns A promise that resolves with either:
1259
- * - Array of messages if the agent generated a response, or
1260
- * - ScenarioResult if the agent made a final decision
1261
1468
  * @throws Error if the agent call fails
1262
1469
  */
1263
1470
  private callAgent;
@@ -1270,7 +1477,7 @@ declare class ScenarioExecution implements ScenarioExecutionLike {
1270
1477
  * - "assistant" messages are routed to AGENT role agents
1271
1478
  * - Other message types are added directly to the conversation
1272
1479
  *
1273
- * @param message - The CoreMessage to add to the conversation
1480
+ * @param message - The ModelMessage to add to the conversation
1274
1481
  *
1275
1482
  * @example
1276
1483
  * ```typescript
@@ -1280,7 +1487,7 @@ declare class ScenarioExecution implements ScenarioExecutionLike {
1280
1487
  * });
1281
1488
  * ```
1282
1489
  */
1283
- message(message: CoreMessage): Promise<void>;
1490
+ message(message: ModelMessage): Promise<void>;
1284
1491
  /**
1285
1492
  * Executes a user turn in the conversation.
1286
1493
  *
@@ -1290,7 +1497,7 @@ declare class ScenarioExecution implements ScenarioExecutionLike {
1290
1497
  *
1291
1498
  * This method is part of the ScenarioExecutionLike interface used by script steps.
1292
1499
  *
1293
- * @param content - Optional content for the user's message. Can be a string or CoreMessage.
1500
+ * @param content - Optional content for the user's message. Can be a string or ModelMessage.
1294
1501
  * If not provided, the user simulator agent will generate the content.
1295
1502
  *
1296
1503
  * @example
@@ -1301,14 +1508,14 @@ declare class ScenarioExecution implements ScenarioExecutionLike {
1301
1508
  * // Let user simulator generate content
1302
1509
  * await execution.user();
1303
1510
  *
1304
- * // Use a CoreMessage object
1511
+ * // Use a ModelMessage object
1305
1512
  * await execution.user({
1306
1513
  * role: "user",
1307
1514
  * content: "Tell me a joke"
1308
1515
  * });
1309
1516
  * ```
1310
1517
  */
1311
- user(content?: string | CoreMessage): Promise<void>;
1518
+ user(content?: string | ModelMessage): Promise<void>;
1312
1519
  /**
1313
1520
  * Executes an agent turn in the conversation.
1314
1521
  *
@@ -1318,7 +1525,7 @@ declare class ScenarioExecution implements ScenarioExecutionLike {
1318
1525
  *
1319
1526
  * This method is part of the ScenarioExecutionLike interface used by script steps.
1320
1527
  *
1321
- * @param content - Optional content for the agent's response. Can be a string or CoreMessage.
1528
+ * @param content - Optional content for the agent's response. Can be a string or ModelMessage.
1322
1529
  * If not provided, the agent under test will generate the response.
1323
1530
  *
1324
1531
  * @example
@@ -1329,14 +1536,14 @@ declare class ScenarioExecution implements ScenarioExecutionLike {
1329
1536
  * // Use provided content
1330
1537
  * await execution.agent("The weather is sunny today!");
1331
1538
  *
1332
- * // Use a CoreMessage object
1539
+ * // Use a ModelMessage object
1333
1540
  * await execution.agent({
1334
1541
  * role: "assistant",
1335
1542
  * content: "I'm here to help you with weather information."
1336
1543
  * });
1337
1544
  * ```
1338
1545
  */
1339
- agent(content?: string | CoreMessage): Promise<void>;
1546
+ agent(content?: string | ModelMessage): Promise<void>;
1340
1547
  /**
1341
1548
  * Invokes the judge agent to evaluate the current state of the conversation.
1342
1549
  *
@@ -1364,7 +1571,7 @@ declare class ScenarioExecution implements ScenarioExecutionLike {
1364
1571
  * const result = await execution.judge("Please consider the user's satisfaction level");
1365
1572
  * ```
1366
1573
  */
1367
- judge(content?: string | CoreMessage): Promise<ScenarioResult | null>;
1574
+ judge(content?: string | ModelMessage): Promise<ScenarioResult | null>;
1368
1575
  /**
1369
1576
  * Lets the scenario proceed automatically for a specified number of turns.
1370
1577
  *
@@ -1472,49 +1679,6 @@ declare class ScenarioExecution implements ScenarioExecutionLike {
1472
1679
  * ```
1473
1680
  */
1474
1681
  addAgentTime(agentIdx: number, time: number): void;
1475
- /**
1476
- * Checks if a partial result has been set for the scenario.
1477
- *
1478
- * This method is used internally to determine if a scenario has already reached
1479
- * a conclusion (success or failure) but hasn't been finalized yet. Partial results
1480
- * are typically set by agents that make final decisions (like judge agents) and
1481
- * are later finalized with the complete message history.
1482
- *
1483
- * @returns True if a partial result exists, false otherwise
1484
- *
1485
- * @example
1486
- * ```typescript
1487
- * // This is typically used internally by the execution engine
1488
- * if (execution.hasResult()) {
1489
- * console.log('Scenario has reached a conclusion');
1490
- * }
1491
- * ```
1492
- */
1493
- hasResult(): boolean;
1494
- /**
1495
- * Sets a partial result for the scenario.
1496
- *
1497
- * This method is used internally to store intermediate results that may be
1498
- * finalized later with the complete message history. Partial results are typically
1499
- * created by agents that make final decisions (like judge agents) and contain
1500
- * the success/failure status, reasoning, and criteria evaluation, but not the
1501
- * complete message history.
1502
- *
1503
- * @param result - The partial result without the messages field. Should include
1504
- * success status, reasoning, and criteria evaluation.
1505
- *
1506
- * @example
1507
- * ```typescript
1508
- * // This is typically called internally by agents that make final decisions
1509
- * execution.setResult({
1510
- * success: true,
1511
- * reasoning: "Agent provided accurate weather information",
1512
- * metCriteria: ["Provides accurate weather data"],
1513
- * unmetCriteria: []
1514
- * });
1515
- * ```
1516
- */
1517
- setResult(result: Omit<ScenarioResult, "messages">): void;
1518
1682
  /**
1519
1683
  * Internal method to handle script step calls to agents.
1520
1684
  *
@@ -1527,7 +1691,7 @@ declare class ScenarioExecution implements ScenarioExecutionLike {
1527
1691
  * - Progress to a new turn if no agent is available
1528
1692
  * - Execute the agent with the provided content or let it generate content
1529
1693
  * - Handle judgment requests for judge agents
1530
- * - Return a final result if the agent makes a decision
1694
+ * - Set the result if the agent makes a decision
1531
1695
  *
1532
1696
  * @param role - The role of the agent to call (USER, AGENT, or JUDGE)
1533
1697
  * @param content - Optional content to use instead of letting the agent generate it
@@ -1551,6 +1715,7 @@ declare class ScenarioExecution implements ScenarioExecutionLike {
1551
1715
  * - Starts the first turn
1552
1716
  * - Records the start time for performance tracking
1553
1717
  * - Clears any pending messages
1718
+ * - Clears the result from any previous execution
1554
1719
  */
1555
1720
  private reset;
1556
1721
  private nextAgentForRole;
@@ -1575,7 +1740,7 @@ declare class ScenarioExecution implements ScenarioExecutionLike {
1575
1740
  *
1576
1741
  * This method is called when the scenario execution reaches the maximum number
1577
1742
  * of turns without reaching a conclusion. It creates a failure result with
1578
- * appropriate reasoning and includes performance metrics.
1743
+ * appropriate reasoning and includes performance metrics, then sets it on this.result.
1579
1744
  *
1580
1745
  * The result includes:
1581
1746
  * - All messages from the conversation
@@ -1585,7 +1750,6 @@ declare class ScenarioExecution implements ScenarioExecutionLike {
1585
1750
  * - Total execution time and agent response times
1586
1751
  *
1587
1752
  * @param errorMessage - Optional custom error message to use instead of the default
1588
- * @returns A ScenarioResult indicating failure due to reaching max turns
1589
1753
  */
1590
1754
  private reachedMaxTurns;
1591
1755
  private getJudgeAgent;
@@ -1692,12 +1856,25 @@ declare class ScenarioExecutionState implements ScenarioExecutionStateLike {
1692
1856
  * Adds a message to the conversation history.
1693
1857
  *
1694
1858
  * @param message - The message to add.
1859
+ * @param traceId - Optional trace ID to associate with the message.
1695
1860
  */
1696
- addMessage(message: CoreMessage): void;
1697
- lastMessage(): CoreMessage;
1698
- lastUserMessage(): CoreUserMessage;
1699
- lastAgentMessage(): CoreAssistantMessage;
1700
- lastToolCall(toolName: string): CoreToolMessage;
1861
+ addMessage(message: CoreMessage & {
1862
+ traceId?: string;
1863
+ }): void;
1864
+ lastMessage(): ai.ModelMessage & {
1865
+ id: string;
1866
+ traceId?: string;
1867
+ };
1868
+ lastUserMessage(): ai.UserModelMessage & {
1869
+ id: string;
1870
+ traceId?: string;
1871
+ };
1872
+ lastAgentMessage(): CoreAssistantMessage & {
1873
+ traceId?: string;
1874
+ };
1875
+ lastToolCall(toolName: string): CoreToolMessage & {
1876
+ traceId?: string;
1877
+ };
1701
1878
  hasToolCall(toolName: string): boolean;
1702
1879
  }
1703
1880
 
@@ -1871,4 +2048,4 @@ declare namespace script {
1871
2048
  type ScenarioApi = typeof agents & typeof domain & typeof execution & typeof runner & typeof script;
1872
2049
  declare const scenario: ScenarioApi;
1873
2050
 
1874
- export { AgentAdapter, type AgentInput, type AgentReturnTypes, AgentRole, DEFAULT_MAX_TURNS, DEFAULT_TEMPERATURE, DEFAULT_VERBOSE, type FinishTestArgs, JudgeAgentAdapter, type JudgeAgentConfig, type ScenarioConfig, type ScenarioConfigFinal, ScenarioExecution, type ScenarioExecutionLike, ScenarioExecutionState, type ScenarioExecutionStateLike, type ScenarioProjectConfig, type ScenarioResult, type ScriptStep, type StateChangeEvent, StateChangeEventType, type TestingAgentConfig, type TestingAgentInferenceConfig, UserSimulatorAgentAdapter, agent, allAgentRoles, scenario as default, defineConfig, fail, judge, judgeAgent, message, proceed, run, scenario, scenarioProjectConfigSchema, succeed, user, userSimulatorAgent };
2051
+ export { AgentAdapter, type AgentInput, type AgentReturnTypes, AgentRole, type AudioResponseEvent, DEFAULT_MAX_TURNS, DEFAULT_VERBOSE, type FinishTestArgs, type InvokeLLMParams, type InvokeLLMResult, JudgeAgentAdapter, type JudgeAgentConfig, type JudgeResult, JudgeSpanCollector, JudgeSpanDigestFormatter, RealtimeAgentAdapter, type RealtimeAgentAdapterConfig, type ScenarioConfig, type ScenarioConfigFinal, ScenarioExecution, type ScenarioExecutionLike, ScenarioExecutionState, type ScenarioExecutionStateLike, type ScenarioProjectConfig, type ScenarioResult, type ScriptStep, type StateChangeEvent, StateChangeEventType, type TestingAgentConfig, UserSimulatorAgentAdapter, agent, allAgentRoles, scenario as default, defineConfig, fail, judge, judgeAgent, judgeSpanCollector, judgeSpanDigestFormatter, message, proceed, run, scenario, scenarioProjectConfigSchema, succeed, user, userSimulatorAgent };