@langwatch/scenario 0.3.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/dist/index.d.mts +328 -130
- package/dist/index.d.ts +328 -130
- package/dist/index.js +2144 -459
- package/dist/index.mjs +2576 -268
- package/dist/integrations/vitest/config.mjs +0 -2
- package/dist/integrations/vitest/reporter.js +22 -1
- package/dist/integrations/vitest/reporter.mjs +153 -6
- package/dist/integrations/vitest/setup-global.mjs +0 -2
- package/dist/integrations/vitest/setup.js +21 -9
- package/dist/integrations/vitest/setup.mjs +619 -18
- package/package.json +46 -31
- package/dist/chunk-3Z7E24UI.mjs +0 -548
- package/dist/chunk-7P6ASYW6.mjs +0 -9
- package/dist/chunk-RHTLQKEJ.mjs +0 -133
package/dist/index.d.mts
CHANGED
|
@@ -1,9 +1,19 @@
|
|
|
1
1
|
import * as ai from 'ai';
|
|
2
|
-
import { CoreMessage, CoreUserMessage, CoreAssistantMessage, CoreToolMessage, LanguageModel, ModelMessage } from 'ai';
|
|
2
|
+
import { CoreMessage, CoreUserMessage, CoreAssistantMessage, CoreToolMessage, LanguageModel, generateText, ModelMessage } from 'ai';
|
|
3
3
|
import { z } from 'zod/v4';
|
|
4
|
+
import { SpanProcessor, ReadableSpan } from '@opentelemetry/sdk-trace-base';
|
|
5
|
+
import { RealtimeSession } from '@openai/agents/realtime';
|
|
4
6
|
import { Observable } from 'rxjs';
|
|
5
7
|
import { z as z$1 } from 'zod';
|
|
6
8
|
|
|
9
|
+
/**
|
|
10
|
+
* The possible return types from an agent's `call` method.
|
|
11
|
+
* - string | CoreMessage | CoreMessage[]: Agent generated response
|
|
12
|
+
* - JudgeResult: Judge made a final decision
|
|
13
|
+
* - null: Judge wants to continue observing (no decision yet)
|
|
14
|
+
*/
|
|
15
|
+
type AgentReturnTypes = string | CoreMessage | CoreMessage[] | JudgeResult | null;
|
|
16
|
+
|
|
7
17
|
declare enum AgentRole {
|
|
8
18
|
USER = "User",
|
|
9
19
|
AGENT = "Agent",
|
|
@@ -43,11 +53,6 @@ interface AgentInput {
|
|
|
43
53
|
*/
|
|
44
54
|
scenarioConfig: ScenarioConfig;
|
|
45
55
|
}
|
|
46
|
-
/**
|
|
47
|
-
* The possible return types from an agent's `call` method.
|
|
48
|
-
* Can be a simple string, a single message, an array of messages, or a ScenarioResult.
|
|
49
|
-
*/
|
|
50
|
-
type AgentReturnTypes = string | CoreMessage | CoreMessage[] | ScenarioResult;
|
|
51
56
|
/**
|
|
52
57
|
* Abstract base class for integrating custom agents with the Scenario framework.
|
|
53
58
|
*
|
|
@@ -72,6 +77,7 @@ type AgentReturnTypes = string | CoreMessage | CoreMessage[] | ScenarioResult;
|
|
|
72
77
|
* ```
|
|
73
78
|
*/
|
|
74
79
|
declare abstract class AgentAdapter {
|
|
80
|
+
name?: string;
|
|
75
81
|
role: AgentRole;
|
|
76
82
|
/**
|
|
77
83
|
* Process the input and generate a response.
|
|
@@ -89,33 +95,21 @@ declare abstract class AgentAdapter {
|
|
|
89
95
|
* Abstract base class for user simulator agents.
|
|
90
96
|
* User simulator agents are responsible for generating user messages to drive the conversation.
|
|
91
97
|
*/
|
|
92
|
-
declare abstract class UserSimulatorAgentAdapter
|
|
98
|
+
declare abstract class UserSimulatorAgentAdapter extends AgentAdapter {
|
|
99
|
+
name: string;
|
|
93
100
|
role: AgentRole;
|
|
94
|
-
/**
|
|
95
|
-
* Process the input and generate a user message.
|
|
96
|
-
*
|
|
97
|
-
* @param input AgentInput containing conversation history, thread context, and scenario state.
|
|
98
|
-
* @returns The user's response.
|
|
99
|
-
*/
|
|
100
|
-
abstract call(input: AgentInput): Promise<AgentReturnTypes>;
|
|
101
101
|
}
|
|
102
102
|
/**
|
|
103
103
|
* Abstract base class for judge agents.
|
|
104
104
|
* Judge agents are responsible for evaluating the conversation and determining success or failure.
|
|
105
105
|
*/
|
|
106
|
-
declare abstract class JudgeAgentAdapter
|
|
106
|
+
declare abstract class JudgeAgentAdapter extends AgentAdapter {
|
|
107
|
+
name: string;
|
|
107
108
|
role: AgentRole;
|
|
108
109
|
/**
|
|
109
110
|
* The criteria the judge will use to evaluate the conversation.
|
|
110
111
|
*/
|
|
111
112
|
abstract criteria: string[];
|
|
112
|
-
/**
|
|
113
|
-
* Process the input and evaluate the conversation.
|
|
114
|
-
*
|
|
115
|
-
* @param input AgentInput containing conversation history, thread context, and scenario state.
|
|
116
|
-
* @returns A ScenarioResult if the conversation should end, otherwise should continue.
|
|
117
|
-
*/
|
|
118
|
-
abstract call(input: AgentInput): Promise<AgentReturnTypes>;
|
|
119
113
|
}
|
|
120
114
|
|
|
121
115
|
declare const DEFAULT_MAX_TURNS = 10;
|
|
@@ -250,7 +244,7 @@ interface ScenarioExecutionLike {
|
|
|
250
244
|
* A step in a scenario script.
|
|
251
245
|
* This is a function that takes the current state and an executor, and performs an action.
|
|
252
246
|
*/
|
|
253
|
-
type ScriptStep = (state: ScenarioExecutionStateLike, executor: ScenarioExecutionLike) => Promise<void
|
|
247
|
+
type ScriptStep = (state: ScenarioExecutionStateLike, executor: ScenarioExecutionLike) => Promise<void> | void;
|
|
254
248
|
|
|
255
249
|
/**
|
|
256
250
|
* Represents the result of a scenario execution.
|
|
@@ -349,11 +343,9 @@ interface ScenarioExecutionStateLike {
|
|
|
349
343
|
hasToolCall(toolName: string): boolean;
|
|
350
344
|
}
|
|
351
345
|
|
|
352
|
-
/** Default temperature for language model inference */
|
|
353
|
-
declare const DEFAULT_TEMPERATURE = 0;
|
|
354
346
|
declare const scenarioProjectConfigSchema: z.ZodObject<{
|
|
355
347
|
defaultModel: z.ZodOptional<z.ZodObject<{
|
|
356
|
-
model: z.ZodCustom<LanguageModel, LanguageModel>;
|
|
348
|
+
model: z.ZodCustom<ai.LanguageModel, ai.LanguageModel>;
|
|
357
349
|
temperature: z.ZodDefault<z.ZodOptional<z.ZodNumber>>;
|
|
358
350
|
maxTokens: z.ZodOptional<z.ZodNumber>;
|
|
359
351
|
}, z.core.$strip>>;
|
|
@@ -369,7 +361,6 @@ type domain_AgentReturnTypes = AgentReturnTypes;
|
|
|
369
361
|
type domain_AgentRole = AgentRole;
|
|
370
362
|
declare const domain_AgentRole: typeof AgentRole;
|
|
371
363
|
declare const domain_DEFAULT_MAX_TURNS: typeof DEFAULT_MAX_TURNS;
|
|
372
|
-
declare const domain_DEFAULT_TEMPERATURE: typeof DEFAULT_TEMPERATURE;
|
|
373
364
|
declare const domain_DEFAULT_VERBOSE: typeof DEFAULT_VERBOSE;
|
|
374
365
|
type domain_JudgeAgentAdapter = JudgeAgentAdapter;
|
|
375
366
|
declare const domain_JudgeAgentAdapter: typeof JudgeAgentAdapter;
|
|
@@ -386,32 +377,33 @@ declare const domain_allAgentRoles: typeof allAgentRoles;
|
|
|
386
377
|
declare const domain_defineConfig: typeof defineConfig;
|
|
387
378
|
declare const domain_scenarioProjectConfigSchema: typeof scenarioProjectConfigSchema;
|
|
388
379
|
declare namespace domain {
|
|
389
|
-
export { domain_AgentAdapter as AgentAdapter, type domain_AgentInput as AgentInput, type domain_AgentReturnTypes as AgentReturnTypes, domain_AgentRole as AgentRole, domain_DEFAULT_MAX_TURNS as DEFAULT_MAX_TURNS,
|
|
380
|
+
export { domain_AgentAdapter as AgentAdapter, type domain_AgentInput as AgentInput, type domain_AgentReturnTypes as AgentReturnTypes, domain_AgentRole as AgentRole, domain_DEFAULT_MAX_TURNS as DEFAULT_MAX_TURNS, domain_DEFAULT_VERBOSE as DEFAULT_VERBOSE, domain_JudgeAgentAdapter as JudgeAgentAdapter, type domain_ScenarioConfig as ScenarioConfig, type domain_ScenarioConfigFinal as ScenarioConfigFinal, type domain_ScenarioExecutionLike as ScenarioExecutionLike, type domain_ScenarioExecutionStateLike as ScenarioExecutionStateLike, type domain_ScenarioProjectConfig as ScenarioProjectConfig, type domain_ScenarioResult as ScenarioResult, type domain_ScriptStep as ScriptStep, domain_UserSimulatorAgentAdapter as UserSimulatorAgentAdapter, domain_allAgentRoles as allAgentRoles, domain_defineConfig as defineConfig, domain_scenarioProjectConfigSchema as scenarioProjectConfigSchema };
|
|
390
381
|
}
|
|
391
382
|
|
|
392
383
|
/**
|
|
393
|
-
*
|
|
384
|
+
* Schema for a language model.
|
|
394
385
|
*/
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
|
|
407
|
-
|
|
408
|
-
|
|
409
|
-
|
|
410
|
-
|
|
386
|
+
declare const modelSchema: z.ZodObject<{
|
|
387
|
+
model: z.ZodCustom<LanguageModel, LanguageModel>;
|
|
388
|
+
temperature: z.ZodDefault<z.ZodOptional<z.ZodNumber>>;
|
|
389
|
+
maxTokens: z.ZodOptional<z.ZodNumber>;
|
|
390
|
+
}, z.core.$strip>;
|
|
391
|
+
type ModelConfig = z.infer<typeof modelSchema>;
|
|
392
|
+
|
|
393
|
+
/**
|
|
394
|
+
* Parameters for LLM invocation.
|
|
395
|
+
* Derived from generateText parameters for now.
|
|
396
|
+
*/
|
|
397
|
+
type InvokeLLMParams = Parameters<typeof generateText>[0];
|
|
398
|
+
/**
|
|
399
|
+
* Result from LLM invocation.
|
|
400
|
+
* Derived from generateText return type for now.
|
|
401
|
+
*/
|
|
402
|
+
type InvokeLLMResult = Pick<Awaited<ReturnType<typeof generateText>>, "text" | "content" | "toolCalls" | "toolResults">;
|
|
411
403
|
/**
|
|
412
404
|
* General configuration for a testing agent.
|
|
413
405
|
*/
|
|
414
|
-
interface TestingAgentConfig extends
|
|
406
|
+
interface TestingAgentConfig extends Partial<ModelConfig> {
|
|
415
407
|
/**
|
|
416
408
|
* The name of the agent.
|
|
417
409
|
*/
|
|
@@ -443,6 +435,35 @@ interface FinishTestArgs {
|
|
|
443
435
|
verdict: "success" | "failure" | "inconclusive";
|
|
444
436
|
}
|
|
445
437
|
|
|
438
|
+
interface JudgeResult {
|
|
439
|
+
success: boolean;
|
|
440
|
+
reasoning: string;
|
|
441
|
+
metCriteria: string[];
|
|
442
|
+
unmetCriteria: string[];
|
|
443
|
+
}
|
|
444
|
+
|
|
445
|
+
/**
|
|
446
|
+
* Collects OpenTelemetry spans for judge evaluation.
|
|
447
|
+
* Implements SpanProcessor to intercept spans as they complete.
|
|
448
|
+
*/
|
|
449
|
+
declare class JudgeSpanCollector implements SpanProcessor {
|
|
450
|
+
private spans;
|
|
451
|
+
onStart(): void;
|
|
452
|
+
onEnd(span: ReadableSpan): void;
|
|
453
|
+
forceFlush(): Promise<void>;
|
|
454
|
+
shutdown(): Promise<void>;
|
|
455
|
+
/**
|
|
456
|
+
* Retrieves all spans associated with a specific thread.
|
|
457
|
+
* @param threadId - The thread identifier to filter spans by
|
|
458
|
+
* @returns Array of spans for the given thread
|
|
459
|
+
*/
|
|
460
|
+
getSpansForThread(threadId: string): ReadableSpan[];
|
|
461
|
+
}
|
|
462
|
+
/**
|
|
463
|
+
* Singleton instance of the judge span collector.
|
|
464
|
+
*/
|
|
465
|
+
declare const judgeSpanCollector: JudgeSpanCollector;
|
|
466
|
+
|
|
446
467
|
/**
|
|
447
468
|
* Configuration for the judge agent.
|
|
448
469
|
*/
|
|
@@ -455,6 +476,10 @@ interface JudgeAgentConfig extends TestingAgentConfig {
|
|
|
455
476
|
* The criteria that the judge will use to evaluate the conversation.
|
|
456
477
|
*/
|
|
457
478
|
criteria: string[];
|
|
479
|
+
/**
|
|
480
|
+
* Optional span collector for telemetry. Defaults to global singleton.
|
|
481
|
+
*/
|
|
482
|
+
spanCollector?: JudgeSpanCollector;
|
|
458
483
|
}
|
|
459
484
|
/**
|
|
460
485
|
* Agent that evaluates conversations against success criteria.
|
|
@@ -468,17 +493,16 @@ interface JudgeAgentConfig extends TestingAgentConfig {
|
|
|
468
493
|
declare class JudgeAgent extends JudgeAgentAdapter {
|
|
469
494
|
private readonly cfg;
|
|
470
495
|
private logger;
|
|
496
|
+
private readonly spanCollector;
|
|
471
497
|
role: AgentRole;
|
|
472
498
|
criteria: string[];
|
|
499
|
+
/**
|
|
500
|
+
* LLM invocation function. Can be overridden to customize LLM behavior.
|
|
501
|
+
*/
|
|
502
|
+
invokeLLM: (params: InvokeLLMParams) => Promise<InvokeLLMResult>;
|
|
473
503
|
constructor(cfg: JudgeAgentConfig);
|
|
474
|
-
call(input: AgentInput): Promise<
|
|
475
|
-
|
|
476
|
-
messages: ai.ModelMessage[];
|
|
477
|
-
reasoning: string;
|
|
478
|
-
metCriteria: string[];
|
|
479
|
-
unmetCriteria: string[];
|
|
480
|
-
}>;
|
|
481
|
-
private generateText;
|
|
504
|
+
call(input: AgentInput): Promise<JudgeResult | null>;
|
|
505
|
+
private getOpenTelemetryTracesDigest;
|
|
482
506
|
}
|
|
483
507
|
/**
|
|
484
508
|
* Factory function for creating JudgeAgent instances.
|
|
@@ -532,15 +556,54 @@ declare class JudgeAgent extends JudgeAgentAdapter {
|
|
|
532
556
|
*/
|
|
533
557
|
declare const judgeAgent: (cfg: JudgeAgentConfig) => JudgeAgent;
|
|
534
558
|
|
|
559
|
+
/**
|
|
560
|
+
* Transforms OpenTelemetry spans into a complete plain-text digest for judge evaluation.
|
|
561
|
+
* Deduplicates repeated string content to reduce token usage.
|
|
562
|
+
*/
|
|
563
|
+
declare class JudgeSpanDigestFormatter {
|
|
564
|
+
private readonly logger;
|
|
565
|
+
private readonly deduplicator;
|
|
566
|
+
/**
|
|
567
|
+
* Formats spans into a complete digest with full content and nesting.
|
|
568
|
+
* @param spans - All spans for a thread
|
|
569
|
+
* @returns Plain text digest
|
|
570
|
+
*/
|
|
571
|
+
format(spans: ReadableSpan[]): string;
|
|
572
|
+
private sortByStartTime;
|
|
573
|
+
private buildHierarchy;
|
|
574
|
+
private renderNode;
|
|
575
|
+
private getTreePrefix;
|
|
576
|
+
private getAttrIndent;
|
|
577
|
+
private cleanAttributes;
|
|
578
|
+
private formatValue;
|
|
579
|
+
private transformValue;
|
|
580
|
+
private transformString;
|
|
581
|
+
private looksLikeJson;
|
|
582
|
+
private hrTimeToMs;
|
|
583
|
+
private calculateSpanDuration;
|
|
584
|
+
private calculateTotalDuration;
|
|
585
|
+
private formatDuration;
|
|
586
|
+
private formatTimestamp;
|
|
587
|
+
private getStatusIndicator;
|
|
588
|
+
private collectErrors;
|
|
589
|
+
}
|
|
590
|
+
/**
|
|
591
|
+
* Singleton instance for convenience.
|
|
592
|
+
*/
|
|
593
|
+
declare const judgeSpanDigestFormatter: JudgeSpanDigestFormatter;
|
|
594
|
+
|
|
535
595
|
declare class UserSimulatorAgent extends UserSimulatorAgentAdapter {
|
|
536
596
|
private readonly cfg?;
|
|
537
597
|
private logger;
|
|
598
|
+
/**
|
|
599
|
+
* LLM invocation function. Can be overridden to customize LLM behavior.
|
|
600
|
+
*/
|
|
601
|
+
invokeLLM: (params: InvokeLLMParams) => Promise<InvokeLLMResult>;
|
|
538
602
|
constructor(cfg?: TestingAgentConfig | undefined);
|
|
539
603
|
call: (input: AgentInput) => Promise<{
|
|
540
604
|
role: "user";
|
|
541
605
|
content: string;
|
|
542
606
|
}>;
|
|
543
|
-
private generateText;
|
|
544
607
|
}
|
|
545
608
|
/**
|
|
546
609
|
* Agent that simulates realistic user behavior in scenario conversations.
|
|
@@ -633,14 +696,169 @@ declare class UserSimulatorAgent extends UserSimulatorAgentAdapter {
|
|
|
633
696
|
*/
|
|
634
697
|
declare const userSimulatorAgent: (config?: TestingAgentConfig) => UserSimulatorAgent;
|
|
635
698
|
|
|
699
|
+
/**
|
|
700
|
+
* Event emitted when an audio response is completed
|
|
701
|
+
*/
|
|
702
|
+
interface AudioResponseEvent {
|
|
703
|
+
transcript: string;
|
|
704
|
+
audio: string;
|
|
705
|
+
}
|
|
706
|
+
|
|
707
|
+
/**
|
|
708
|
+
* Realtime Agent Adapter for Scenario Testing
|
|
709
|
+
*
|
|
710
|
+
* Adapts a connected RealtimeSession to the Scenario framework interface.
|
|
711
|
+
* The session must be created and connected before passing to this adapter.
|
|
712
|
+
*
|
|
713
|
+
* This ensures we test the REAL agent, not a mock, using the same session
|
|
714
|
+
* creation pattern as the browser client.
|
|
715
|
+
*/
|
|
716
|
+
|
|
717
|
+
/**
|
|
718
|
+
* Configuration for RealtimeAgentAdapter
|
|
719
|
+
*/
|
|
720
|
+
interface RealtimeAgentAdapterConfig {
|
|
721
|
+
/**
|
|
722
|
+
* The role of the agent
|
|
723
|
+
*/
|
|
724
|
+
role: AgentRole;
|
|
725
|
+
/**
|
|
726
|
+
* A connected RealtimeSession instance
|
|
727
|
+
*
|
|
728
|
+
* The session should be created using your agent's session creator function
|
|
729
|
+
* and connected before passing to this adapter.
|
|
730
|
+
*
|
|
731
|
+
* @example
|
|
732
|
+
* ```typescript
|
|
733
|
+
* const session = createVegetarianRecipeSession();
|
|
734
|
+
* await session.connect({ apiKey: process.env.OPENAI_API_KEY });
|
|
735
|
+
* const adapter = new RealtimeAgentAdapter({
|
|
736
|
+
* session,
|
|
737
|
+
* role: AgentRole.AGENT,
|
|
738
|
+
* agentName: "Vegetarian Recipe Assistant"
|
|
739
|
+
* });
|
|
740
|
+
* ```
|
|
741
|
+
*/
|
|
742
|
+
session: RealtimeSession;
|
|
743
|
+
/**
|
|
744
|
+
* Name of the agent (for logging/identification)
|
|
745
|
+
*/
|
|
746
|
+
agentName: string;
|
|
747
|
+
/**
|
|
748
|
+
* Timeout for waiting for agent response (ms)
|
|
749
|
+
* @default 30000
|
|
750
|
+
*/
|
|
751
|
+
responseTimeout?: number;
|
|
752
|
+
}
|
|
753
|
+
/**
|
|
754
|
+
* Adapter that connects Scenario testing framework to OpenAI Realtime API
|
|
755
|
+
*
|
|
756
|
+
* This adapter wraps a connected RealtimeSession to provide the Scenario
|
|
757
|
+
* framework interface. The session must be created and connected externally,
|
|
758
|
+
* ensuring the same session creation pattern is used in both browser and tests.
|
|
759
|
+
*
|
|
760
|
+
* @example
|
|
761
|
+
* ```typescript
|
|
762
|
+
* // In beforeAll
|
|
763
|
+
* const session = createVegetarianRecipeSession();
|
|
764
|
+
* await session.connect({ apiKey: process.env.OPENAI_API_KEY });
|
|
765
|
+
* const adapter = new RealtimeAgentAdapter({
|
|
766
|
+
* session,
|
|
767
|
+
* role: AgentRole.AGENT
|
|
768
|
+
* });
|
|
769
|
+
*
|
|
770
|
+
* // In test
|
|
771
|
+
* await scenario.run({
|
|
772
|
+
* agents: [adapter, scenario.userSimulatorAgent()],
|
|
773
|
+
* script: [scenario.user("quick recipe"), scenario.agent()]
|
|
774
|
+
* });
|
|
775
|
+
*
|
|
776
|
+
* // In afterAll
|
|
777
|
+
* session.close();
|
|
778
|
+
* ```
|
|
779
|
+
*/
|
|
780
|
+
declare class RealtimeAgentAdapter extends AgentAdapter {
|
|
781
|
+
private config;
|
|
782
|
+
role: AgentRole;
|
|
783
|
+
name: string;
|
|
784
|
+
private session;
|
|
785
|
+
private eventHandler;
|
|
786
|
+
private messageProcessor;
|
|
787
|
+
private responseFormatter;
|
|
788
|
+
private audioEvents;
|
|
789
|
+
/**
|
|
790
|
+
* Creates a new RealtimeAgentAdapter instance
|
|
791
|
+
*
|
|
792
|
+
* The session can be either connected or unconnected.
|
|
793
|
+
* If unconnected, call connect() with an API key before use.
|
|
794
|
+
*
|
|
795
|
+
* @param config - Configuration for the realtime agent adapter
|
|
796
|
+
*/
|
|
797
|
+
constructor(config: RealtimeAgentAdapterConfig);
|
|
798
|
+
/**
|
|
799
|
+
* Get the connect method from the session
|
|
800
|
+
*/
|
|
801
|
+
connect(params?: Parameters<RealtimeSession["connect"]>[0] | undefined): Promise<void>;
|
|
802
|
+
/**
|
|
803
|
+
* Closes the session connection
|
|
804
|
+
*/
|
|
805
|
+
disconnect(): Promise<void>;
|
|
806
|
+
/**
|
|
807
|
+
* Process input and generate response (implements AgentAdapter interface)
|
|
808
|
+
*
|
|
809
|
+
* This is called by Scenario framework for each agent turn.
|
|
810
|
+
* Handles both text and audio input, returns audio message with transcript.
|
|
811
|
+
*
|
|
812
|
+
* @param input - Scenario agent input with message history
|
|
813
|
+
* @returns Agent response as audio message or text
|
|
814
|
+
*/
|
|
815
|
+
call(input: AgentInput): Promise<AgentReturnTypes>;
|
|
816
|
+
/**
|
|
817
|
+
* Handles the initial response when no user message exists
|
|
818
|
+
*/
|
|
819
|
+
private handleInitialResponse;
|
|
820
|
+
/**
|
|
821
|
+
* Handles audio input from the user
|
|
822
|
+
*/
|
|
823
|
+
private handleAudioInput;
|
|
824
|
+
/**
|
|
825
|
+
* Handles text input from the user
|
|
826
|
+
*/
|
|
827
|
+
private handleTextInput;
|
|
828
|
+
/**
|
|
829
|
+
* Subscribe to audio response events
|
|
830
|
+
*
|
|
831
|
+
* @param callback - Function called when an audio response completes
|
|
832
|
+
*/
|
|
833
|
+
onAudioResponse(callback: (event: AudioResponseEvent) => void): void;
|
|
834
|
+
/**
|
|
835
|
+
* Remove audio response listener
|
|
836
|
+
*
|
|
837
|
+
* @param callback - The callback function to remove
|
|
838
|
+
*/
|
|
839
|
+
offAudioResponse(callback: (event: AudioResponseEvent) => void): void;
|
|
840
|
+
}
|
|
841
|
+
|
|
842
|
+
type agents_AudioResponseEvent = AudioResponseEvent;
|
|
636
843
|
type agents_FinishTestArgs = FinishTestArgs;
|
|
844
|
+
type agents_InvokeLLMParams = InvokeLLMParams;
|
|
845
|
+
type agents_InvokeLLMResult = InvokeLLMResult;
|
|
637
846
|
type agents_JudgeAgentConfig = JudgeAgentConfig;
|
|
847
|
+
type agents_JudgeResult = JudgeResult;
|
|
848
|
+
type agents_JudgeSpanCollector = JudgeSpanCollector;
|
|
849
|
+
declare const agents_JudgeSpanCollector: typeof JudgeSpanCollector;
|
|
850
|
+
type agents_JudgeSpanDigestFormatter = JudgeSpanDigestFormatter;
|
|
851
|
+
declare const agents_JudgeSpanDigestFormatter: typeof JudgeSpanDigestFormatter;
|
|
852
|
+
type agents_RealtimeAgentAdapter = RealtimeAgentAdapter;
|
|
853
|
+
declare const agents_RealtimeAgentAdapter: typeof RealtimeAgentAdapter;
|
|
854
|
+
type agents_RealtimeAgentAdapterConfig = RealtimeAgentAdapterConfig;
|
|
638
855
|
type agents_TestingAgentConfig = TestingAgentConfig;
|
|
639
|
-
type agents_TestingAgentInferenceConfig = TestingAgentInferenceConfig;
|
|
640
856
|
declare const agents_judgeAgent: typeof judgeAgent;
|
|
857
|
+
declare const agents_judgeSpanCollector: typeof judgeSpanCollector;
|
|
858
|
+
declare const agents_judgeSpanDigestFormatter: typeof judgeSpanDigestFormatter;
|
|
641
859
|
declare const agents_userSimulatorAgent: typeof userSimulatorAgent;
|
|
642
860
|
declare namespace agents {
|
|
643
|
-
export { type agents_FinishTestArgs as FinishTestArgs, type agents_JudgeAgentConfig as JudgeAgentConfig, type
|
|
861
|
+
export { type agents_AudioResponseEvent as AudioResponseEvent, type agents_FinishTestArgs as FinishTestArgs, type agents_InvokeLLMParams as InvokeLLMParams, type agents_InvokeLLMResult as InvokeLLMResult, type agents_JudgeAgentConfig as JudgeAgentConfig, type agents_JudgeResult as JudgeResult, agents_JudgeSpanCollector as JudgeSpanCollector, agents_JudgeSpanDigestFormatter as JudgeSpanDigestFormatter, agents_RealtimeAgentAdapter as RealtimeAgentAdapter, type agents_RealtimeAgentAdapterConfig as RealtimeAgentAdapterConfig, type agents_TestingAgentConfig as TestingAgentConfig, agents_judgeAgent as judgeAgent, agents_judgeSpanCollector as judgeSpanCollector, agents_judgeSpanDigestFormatter as judgeSpanDigestFormatter, agents_userSimulatorAgent as userSimulatorAgent };
|
|
644
862
|
}
|
|
645
863
|
|
|
646
864
|
/**
|
|
@@ -682,11 +900,11 @@ declare const scenarioEventSchema: z$1.ZodDiscriminatedUnion<"type", [z$1.ZodObj
|
|
|
682
900
|
name: z$1.ZodOptional<z$1.ZodString>;
|
|
683
901
|
description: z$1.ZodOptional<z$1.ZodString>;
|
|
684
902
|
}, "strip", z$1.ZodTypeAny, {
|
|
685
|
-
name?: string | undefined;
|
|
686
903
|
description?: string | undefined;
|
|
687
|
-
}, {
|
|
688
904
|
name?: string | undefined;
|
|
905
|
+
}, {
|
|
689
906
|
description?: string | undefined;
|
|
907
|
+
name?: string | undefined;
|
|
690
908
|
}>;
|
|
691
909
|
}, "strip", z$1.ZodTypeAny, {
|
|
692
910
|
type: ScenarioEventType.RUN_STARTED;
|
|
@@ -696,8 +914,8 @@ declare const scenarioEventSchema: z$1.ZodDiscriminatedUnion<"type", [z$1.ZodObj
|
|
|
696
914
|
scenarioRunId: string;
|
|
697
915
|
scenarioSetId: string;
|
|
698
916
|
metadata: {
|
|
699
|
-
name?: string | undefined;
|
|
700
917
|
description?: string | undefined;
|
|
918
|
+
name?: string | undefined;
|
|
701
919
|
};
|
|
702
920
|
rawEvent?: any;
|
|
703
921
|
}, {
|
|
@@ -707,8 +925,8 @@ declare const scenarioEventSchema: z$1.ZodDiscriminatedUnion<"type", [z$1.ZodObj
|
|
|
707
925
|
scenarioId: string;
|
|
708
926
|
scenarioRunId: string;
|
|
709
927
|
metadata: {
|
|
710
|
-
name?: string | undefined;
|
|
711
928
|
description?: string | undefined;
|
|
929
|
+
name?: string | undefined;
|
|
712
930
|
};
|
|
713
931
|
rawEvent?: any;
|
|
714
932
|
scenarioSetId?: string | undefined;
|
|
@@ -1086,8 +1304,12 @@ type ScenarioEvent = z$1.infer<typeof scenarioEventSchema>;
|
|
|
1086
1304
|
* ```
|
|
1087
1305
|
*/
|
|
1088
1306
|
declare class ScenarioExecution implements ScenarioExecutionLike {
|
|
1307
|
+
/** LangWatch tracer for scenario execution */
|
|
1308
|
+
private tracer;
|
|
1089
1309
|
/** The current state of the scenario execution */
|
|
1090
1310
|
private state;
|
|
1311
|
+
/** The final result of the scenario execution, set when a conclusion is reached */
|
|
1312
|
+
private _result?;
|
|
1091
1313
|
/** Logger for debugging and monitoring */
|
|
1092
1314
|
private logger;
|
|
1093
1315
|
/** Finalized configuration with all defaults applied */
|
|
@@ -1106,10 +1328,10 @@ declare class ScenarioExecution implements ScenarioExecutionLike {
|
|
|
1106
1328
|
* Key: agent index, Value: array of pending messages for that agent
|
|
1107
1329
|
*/
|
|
1108
1330
|
private pendingMessages;
|
|
1109
|
-
/** Intermediate result set by agents that make final decisions */
|
|
1110
|
-
private partialResult;
|
|
1111
1331
|
/** Accumulated execution time for each agent (for performance tracking) */
|
|
1112
1332
|
private agentTimes;
|
|
1333
|
+
/** Current turn span for trace context management */
|
|
1334
|
+
private currentTurnSpan?;
|
|
1113
1335
|
/** Timestamp when execution started (for total time calculation) */
|
|
1114
1336
|
private totalStartTime;
|
|
1115
1337
|
/** Event stream for monitoring scenario progress */
|
|
@@ -1144,6 +1366,20 @@ declare class ScenarioExecution implements ScenarioExecutionLike {
|
|
|
1144
1366
|
* @returns The thread identifier string
|
|
1145
1367
|
*/
|
|
1146
1368
|
get threadId(): string;
|
|
1369
|
+
/**
|
|
1370
|
+
* Gets the result of the scenario execution if it has been set.
|
|
1371
|
+
*
|
|
1372
|
+
* @returns The scenario result or undefined if not yet set
|
|
1373
|
+
*/
|
|
1374
|
+
get result(): ScenarioResult | undefined;
|
|
1375
|
+
/**
|
|
1376
|
+
* Sets the result of the scenario execution.
|
|
1377
|
+
* This is called when the scenario reaches a conclusion (success or failure).
|
|
1378
|
+
* Automatically includes messages, totalTime, and agentTime from the current execution context.
|
|
1379
|
+
*
|
|
1380
|
+
* @param result - The final scenario result (without messages/timing, which will be added automatically)
|
|
1381
|
+
*/
|
|
1382
|
+
private setResult;
|
|
1147
1383
|
/**
|
|
1148
1384
|
* The total elapsed time for the scenario execution.
|
|
1149
1385
|
*/
|
|
@@ -1186,30 +1422,25 @@ declare class ScenarioExecution implements ScenarioExecutionLike {
|
|
|
1186
1422
|
* - Progress to the next turn if needed
|
|
1187
1423
|
* - Find the next agent that should act
|
|
1188
1424
|
* - Execute that agent's response
|
|
1189
|
-
* -
|
|
1425
|
+
* - Set the result if the scenario concludes
|
|
1190
1426
|
*
|
|
1191
1427
|
* Note: This method is primarily for debugging or custom execution flows. Most users
|
|
1192
1428
|
* will use `execute()` to run the entire scenario automatically.
|
|
1193
1429
|
*
|
|
1194
|
-
*
|
|
1195
|
-
* - Array of new messages added during the agent interaction, or
|
|
1196
|
-
* - A final ScenarioResult if the interaction concludes the scenario
|
|
1197
|
-
* @throws Error if no result is returned from the step
|
|
1430
|
+
* After calling this method, check `this.result` to see if the scenario has concluded.
|
|
1198
1431
|
*
|
|
1199
1432
|
* @example
|
|
1200
1433
|
* ```typescript
|
|
1201
1434
|
* const execution = new ScenarioExecution(config, script);
|
|
1202
1435
|
*
|
|
1203
1436
|
* // Execute one agent interaction at a time
|
|
1204
|
-
*
|
|
1205
|
-
* if (
|
|
1206
|
-
* console.log('
|
|
1207
|
-
* } else {
|
|
1208
|
-
* console.log('Scenario finished:', messages.success);
|
|
1437
|
+
* await execution.step();
|
|
1438
|
+
* if (execution.result) {
|
|
1439
|
+
* console.log('Scenario finished:', execution.result.success);
|
|
1209
1440
|
* }
|
|
1210
1441
|
* ```
|
|
1211
1442
|
*/
|
|
1212
|
-
step(): Promise<
|
|
1443
|
+
step(): Promise<void>;
|
|
1213
1444
|
private _step;
|
|
1214
1445
|
/**
|
|
1215
1446
|
* Calls a specific agent to generate a response or make a decision.
|
|
@@ -1228,15 +1459,12 @@ declare class ScenarioExecution implements ScenarioExecutionLike {
|
|
|
1228
1459
|
* After the agent responds:
|
|
1229
1460
|
* - Performance timing is recorded
|
|
1230
1461
|
* - Pending messages for this agent are cleared (they've been processed)
|
|
1231
|
-
* - If the agent returns a ScenarioResult, it's
|
|
1462
|
+
* - If the agent returns a ScenarioResult, it's set on this.result
|
|
1232
1463
|
* - Otherwise, the agent's messages are added to the conversation and broadcast
|
|
1233
1464
|
*
|
|
1234
1465
|
* @param idx - The index of the agent in the agents array
|
|
1235
1466
|
* @param role - The role the agent is being asked to play (USER, AGENT, or JUDGE)
|
|
1236
1467
|
* @param judgmentRequest - Whether this is a judgment request (for judge agents)
|
|
1237
|
-
* @returns A promise that resolves with either:
|
|
1238
|
-
* - Array of messages if the agent generated a response, or
|
|
1239
|
-
* - ScenarioResult if the agent made a final decision
|
|
1240
1468
|
* @throws Error if the agent call fails
|
|
1241
1469
|
*/
|
|
1242
1470
|
private callAgent;
|
|
@@ -1451,49 +1679,6 @@ declare class ScenarioExecution implements ScenarioExecutionLike {
|
|
|
1451
1679
|
* ```
|
|
1452
1680
|
*/
|
|
1453
1681
|
addAgentTime(agentIdx: number, time: number): void;
|
|
1454
|
-
/**
|
|
1455
|
-
* Checks if a partial result has been set for the scenario.
|
|
1456
|
-
*
|
|
1457
|
-
* This method is used internally to determine if a scenario has already reached
|
|
1458
|
-
* a conclusion (success or failure) but hasn't been finalized yet. Partial results
|
|
1459
|
-
* are typically set by agents that make final decisions (like judge agents) and
|
|
1460
|
-
* are later finalized with the complete message history.
|
|
1461
|
-
*
|
|
1462
|
-
* @returns True if a partial result exists, false otherwise
|
|
1463
|
-
*
|
|
1464
|
-
* @example
|
|
1465
|
-
* ```typescript
|
|
1466
|
-
* // This is typically used internally by the execution engine
|
|
1467
|
-
* if (execution.hasResult()) {
|
|
1468
|
-
* console.log('Scenario has reached a conclusion');
|
|
1469
|
-
* }
|
|
1470
|
-
* ```
|
|
1471
|
-
*/
|
|
1472
|
-
hasResult(): boolean;
|
|
1473
|
-
/**
|
|
1474
|
-
* Sets a partial result for the scenario.
|
|
1475
|
-
*
|
|
1476
|
-
* This method is used internally to store intermediate results that may be
|
|
1477
|
-
* finalized later with the complete message history. Partial results are typically
|
|
1478
|
-
* created by agents that make final decisions (like judge agents) and contain
|
|
1479
|
-
* the success/failure status, reasoning, and criteria evaluation, but not the
|
|
1480
|
-
* complete message history.
|
|
1481
|
-
*
|
|
1482
|
-
* @param result - The partial result without the messages field. Should include
|
|
1483
|
-
* success status, reasoning, and criteria evaluation.
|
|
1484
|
-
*
|
|
1485
|
-
* @example
|
|
1486
|
-
* ```typescript
|
|
1487
|
-
* // This is typically called internally by agents that make final decisions
|
|
1488
|
-
* execution.setResult({
|
|
1489
|
-
* success: true,
|
|
1490
|
-
* reasoning: "Agent provided accurate weather information",
|
|
1491
|
-
* metCriteria: ["Provides accurate weather data"],
|
|
1492
|
-
* unmetCriteria: []
|
|
1493
|
-
* });
|
|
1494
|
-
* ```
|
|
1495
|
-
*/
|
|
1496
|
-
setResult(result: Omit<ScenarioResult, "messages">): void;
|
|
1497
1682
|
/**
|
|
1498
1683
|
* Internal method to handle script step calls to agents.
|
|
1499
1684
|
*
|
|
@@ -1506,7 +1691,7 @@ declare class ScenarioExecution implements ScenarioExecutionLike {
|
|
|
1506
1691
|
* - Progress to a new turn if no agent is available
|
|
1507
1692
|
* - Execute the agent with the provided content or let it generate content
|
|
1508
1693
|
* - Handle judgment requests for judge agents
|
|
1509
|
-
* -
|
|
1694
|
+
* - Set the result if the agent makes a decision
|
|
1510
1695
|
*
|
|
1511
1696
|
* @param role - The role of the agent to call (USER, AGENT, or JUDGE)
|
|
1512
1697
|
* @param content - Optional content to use instead of letting the agent generate it
|
|
@@ -1530,6 +1715,7 @@ declare class ScenarioExecution implements ScenarioExecutionLike {
|
|
|
1530
1715
|
* - Starts the first turn
|
|
1531
1716
|
* - Records the start time for performance tracking
|
|
1532
1717
|
* - Clears any pending messages
|
|
1718
|
+
* - Clears the result from any previous execution
|
|
1533
1719
|
*/
|
|
1534
1720
|
private reset;
|
|
1535
1721
|
private nextAgentForRole;
|
|
@@ -1554,7 +1740,7 @@ declare class ScenarioExecution implements ScenarioExecutionLike {
|
|
|
1554
1740
|
*
|
|
1555
1741
|
* This method is called when the scenario execution reaches the maximum number
|
|
1556
1742
|
* of turns without reaching a conclusion. It creates a failure result with
|
|
1557
|
-
* appropriate reasoning and includes performance metrics.
|
|
1743
|
+
* appropriate reasoning and includes performance metrics, then sets it on this.result.
|
|
1558
1744
|
*
|
|
1559
1745
|
* The result includes:
|
|
1560
1746
|
* - All messages from the conversation
|
|
@@ -1564,7 +1750,6 @@ declare class ScenarioExecution implements ScenarioExecutionLike {
|
|
|
1564
1750
|
* - Total execution time and agent response times
|
|
1565
1751
|
*
|
|
1566
1752
|
* @param errorMessage - Optional custom error message to use instead of the default
|
|
1567
|
-
* @returns A ScenarioResult indicating failure due to reaching max turns
|
|
1568
1753
|
*/
|
|
1569
1754
|
private reachedMaxTurns;
|
|
1570
1755
|
private getJudgeAgent;
|
|
@@ -1671,12 +1856,25 @@ declare class ScenarioExecutionState implements ScenarioExecutionStateLike {
|
|
|
1671
1856
|
* Adds a message to the conversation history.
|
|
1672
1857
|
*
|
|
1673
1858
|
* @param message - The message to add.
|
|
1859
|
+
* @param traceId - Optional trace ID to associate with the message.
|
|
1674
1860
|
*/
|
|
1675
|
-
addMessage(message: CoreMessage
|
|
1676
|
-
|
|
1677
|
-
|
|
1678
|
-
|
|
1679
|
-
|
|
1861
|
+
addMessage(message: CoreMessage & {
|
|
1862
|
+
traceId?: string;
|
|
1863
|
+
}): void;
|
|
1864
|
+
lastMessage(): ai.ModelMessage & {
|
|
1865
|
+
id: string;
|
|
1866
|
+
traceId?: string;
|
|
1867
|
+
};
|
|
1868
|
+
lastUserMessage(): ai.UserModelMessage & {
|
|
1869
|
+
id: string;
|
|
1870
|
+
traceId?: string;
|
|
1871
|
+
};
|
|
1872
|
+
lastAgentMessage(): CoreAssistantMessage & {
|
|
1873
|
+
traceId?: string;
|
|
1874
|
+
};
|
|
1875
|
+
lastToolCall(toolName: string): CoreToolMessage & {
|
|
1876
|
+
traceId?: string;
|
|
1877
|
+
};
|
|
1680
1878
|
hasToolCall(toolName: string): boolean;
|
|
1681
1879
|
}
|
|
1682
1880
|
|
|
@@ -1850,4 +2048,4 @@ declare namespace script {
|
|
|
1850
2048
|
type ScenarioApi = typeof agents & typeof domain & typeof execution & typeof runner & typeof script;
|
|
1851
2049
|
declare const scenario: ScenarioApi;
|
|
1852
2050
|
|
|
1853
|
-
export { AgentAdapter, type AgentInput, type AgentReturnTypes, AgentRole,
|
|
2051
|
+
export { AgentAdapter, type AgentInput, type AgentReturnTypes, AgentRole, type AudioResponseEvent, DEFAULT_MAX_TURNS, DEFAULT_VERBOSE, type FinishTestArgs, type InvokeLLMParams, type InvokeLLMResult, JudgeAgentAdapter, type JudgeAgentConfig, type JudgeResult, JudgeSpanCollector, JudgeSpanDigestFormatter, RealtimeAgentAdapter, type RealtimeAgentAdapterConfig, type ScenarioConfig, type ScenarioConfigFinal, ScenarioExecution, type ScenarioExecutionLike, ScenarioExecutionState, type ScenarioExecutionStateLike, type ScenarioProjectConfig, type ScenarioResult, type ScriptStep, type StateChangeEvent, StateChangeEventType, type TestingAgentConfig, UserSimulatorAgentAdapter, agent, allAgentRoles, scenario as default, defineConfig, fail, judge, judgeAgent, judgeSpanCollector, judgeSpanDigestFormatter, message, proceed, run, scenario, scenarioProjectConfigSchema, succeed, user, userSimulatorAgent };
|