@langwatch/scenario 0.4.0 → 0.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.mts +27 -27
- package/dist/index.d.ts +27 -27
- package/dist/index.js +6 -5
- package/dist/index.mjs +6 -5
- package/dist/integrations/vitest/setup.js +1 -1
- package/dist/integrations/vitest/setup.mjs +1 -1
- package/package.json +3 -3
package/dist/index.d.mts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import * as ai from 'ai';
|
|
2
|
-
import {
|
|
2
|
+
import { ModelMessage, UserModelMessage, AssistantModelMessage, ToolModelMessage, LanguageModel, generateText } from 'ai';
|
|
3
3
|
import { z } from 'zod/v4';
|
|
4
4
|
import { SpanProcessor, ReadableSpan } from '@opentelemetry/sdk-trace-base';
|
|
5
5
|
import { RealtimeSession } from '@openai/agents/realtime';
|
|
@@ -8,11 +8,11 @@ import { z as z$1 } from 'zod';
|
|
|
8
8
|
|
|
9
9
|
/**
|
|
10
10
|
* The possible return types from an agent's `call` method.
|
|
11
|
-
* - string |
|
|
11
|
+
* - string | ModelMessage | ModelMessage[]: Agent generated response
|
|
12
12
|
* - JudgeResult: Judge made a final decision
|
|
13
13
|
* - null: Judge wants to continue observing (no decision yet)
|
|
14
14
|
*/
|
|
15
|
-
type AgentReturnTypes = string |
|
|
15
|
+
type AgentReturnTypes = string | ModelMessage | ModelMessage[] | JudgeResult | null;
|
|
16
16
|
|
|
17
17
|
declare enum AgentRole {
|
|
18
18
|
USER = "User",
|
|
@@ -31,11 +31,11 @@ interface AgentInput {
|
|
|
31
31
|
/**
|
|
32
32
|
* The full history of messages in the conversation.
|
|
33
33
|
*/
|
|
34
|
-
messages:
|
|
34
|
+
messages: ModelMessage[];
|
|
35
35
|
/**
|
|
36
36
|
* New messages added since the last time this agent was called.
|
|
37
37
|
*/
|
|
38
|
-
newMessages:
|
|
38
|
+
newMessages: ModelMessage[];
|
|
39
39
|
/**
|
|
40
40
|
* The role the agent is being asked to play in this turn.
|
|
41
41
|
*/
|
|
@@ -191,7 +191,7 @@ interface ScenarioExecutionLike {
|
|
|
191
191
|
/**
|
|
192
192
|
* The history of messages in the conversation.
|
|
193
193
|
*/
|
|
194
|
-
readonly messages:
|
|
194
|
+
readonly messages: ModelMessage[];
|
|
195
195
|
/**
|
|
196
196
|
* The ID of the conversation thread.
|
|
197
197
|
*/
|
|
@@ -200,25 +200,25 @@ interface ScenarioExecutionLike {
|
|
|
200
200
|
* Adds a message to the conversation.
|
|
201
201
|
* @param message The message to add.
|
|
202
202
|
*/
|
|
203
|
-
message(message:
|
|
203
|
+
message(message: ModelMessage): Promise<void>;
|
|
204
204
|
/**
|
|
205
205
|
* Adds a user message to the conversation.
|
|
206
206
|
* If no content is provided, the user simulator will generate a message.
|
|
207
207
|
* @param content The content of the user message.
|
|
208
208
|
*/
|
|
209
|
-
user(content?: string |
|
|
209
|
+
user(content?: string | ModelMessage): Promise<void>;
|
|
210
210
|
/**
|
|
211
211
|
* Adds an agent message to the conversation.
|
|
212
212
|
* If no content is provided, the agent under test will generate a message.
|
|
213
213
|
* @param content The content of the agent message.
|
|
214
214
|
*/
|
|
215
|
-
agent(content?: string |
|
|
215
|
+
agent(content?: string | ModelMessage): Promise<void>;
|
|
216
216
|
/**
|
|
217
217
|
* Invokes the judge agent to evaluate the current state.
|
|
218
218
|
* @param content Optional message to the judge.
|
|
219
219
|
* @returns The result of the scenario if the judge makes a final decision.
|
|
220
220
|
*/
|
|
221
|
-
judge(content?: string |
|
|
221
|
+
judge(content?: string | ModelMessage): Promise<ScenarioResult | null>;
|
|
222
222
|
/**
|
|
223
223
|
* Proceeds with the scenario automatically for a number of turns.
|
|
224
224
|
* @param turns The number of turns to proceed. Defaults to running until the scenario ends.
|
|
@@ -258,7 +258,7 @@ interface ScenarioResult {
|
|
|
258
258
|
/**
|
|
259
259
|
* The sequence of messages exchanged during the scenario.
|
|
260
260
|
*/
|
|
261
|
-
messages:
|
|
261
|
+
messages: ModelMessage[];
|
|
262
262
|
/**
|
|
263
263
|
* The reasoning behind the scenario's outcome.
|
|
264
264
|
*/
|
|
@@ -299,7 +299,7 @@ interface ScenarioExecutionStateLike {
|
|
|
299
299
|
/**
|
|
300
300
|
* The sequence of messages exchanged during the scenario.
|
|
301
301
|
*/
|
|
302
|
-
get messages():
|
|
302
|
+
get messages(): ModelMessage[];
|
|
303
303
|
/**
|
|
304
304
|
* The unique identifier for the execution thread.
|
|
305
305
|
*/
|
|
@@ -313,28 +313,28 @@ interface ScenarioExecutionStateLike {
|
|
|
313
313
|
*
|
|
314
314
|
* @param message - The core message to add.
|
|
315
315
|
*/
|
|
316
|
-
addMessage(message:
|
|
316
|
+
addMessage(message: ModelMessage): void;
|
|
317
317
|
/**
|
|
318
318
|
* Retrieves the last message from the execution state.
|
|
319
319
|
* @returns The last message.
|
|
320
320
|
*/
|
|
321
|
-
lastMessage():
|
|
321
|
+
lastMessage(): ModelMessage;
|
|
322
322
|
/**
|
|
323
323
|
* Retrieves the last user message from the execution state.
|
|
324
324
|
* @returns The last user message.
|
|
325
325
|
*/
|
|
326
|
-
lastUserMessage():
|
|
326
|
+
lastUserMessage(): UserModelMessage;
|
|
327
327
|
/**
|
|
328
328
|
* Retrieves the last agent message from the execution state.
|
|
329
329
|
* @returns The last agent message.
|
|
330
330
|
*/
|
|
331
|
-
lastAgentMessage():
|
|
331
|
+
lastAgentMessage(): AssistantModelMessage;
|
|
332
332
|
/**
|
|
333
333
|
* Retrieves the last tool call message for a specific tool.
|
|
334
334
|
* @param toolName - The name of the tool.
|
|
335
335
|
* @returns The last tool call message.
|
|
336
336
|
*/
|
|
337
|
-
lastToolCall(toolName: string):
|
|
337
|
+
lastToolCall(toolName: string): ToolModelMessage;
|
|
338
338
|
/**
|
|
339
339
|
* Checks if a tool call for a specific tool exists in the execution state.
|
|
340
340
|
* @param toolName - The name of the tool.
|
|
@@ -1847,7 +1847,7 @@ declare class ScenarioExecutionState implements ScenarioExecutionStateLike {
|
|
|
1847
1847
|
description: string;
|
|
1848
1848
|
config: ScenarioConfig;
|
|
1849
1849
|
constructor(config: ScenarioConfig);
|
|
1850
|
-
get messages():
|
|
1850
|
+
get messages(): ModelMessage[];
|
|
1851
1851
|
get currentTurn(): number;
|
|
1852
1852
|
set currentTurn(turn: number);
|
|
1853
1853
|
get threadId(): string;
|
|
@@ -1858,10 +1858,10 @@ declare class ScenarioExecutionState implements ScenarioExecutionStateLike {
|
|
|
1858
1858
|
* @param message - The message to add.
|
|
1859
1859
|
* @param traceId - Optional trace ID to associate with the message.
|
|
1860
1860
|
*/
|
|
1861
|
-
addMessage(message:
|
|
1861
|
+
addMessage(message: ModelMessage & {
|
|
1862
1862
|
traceId?: string;
|
|
1863
1863
|
}): void;
|
|
1864
|
-
lastMessage():
|
|
1864
|
+
lastMessage(): ModelMessage & {
|
|
1865
1865
|
id: string;
|
|
1866
1866
|
traceId?: string;
|
|
1867
1867
|
};
|
|
@@ -1869,10 +1869,10 @@ declare class ScenarioExecutionState implements ScenarioExecutionStateLike {
|
|
|
1869
1869
|
id: string;
|
|
1870
1870
|
traceId?: string;
|
|
1871
1871
|
};
|
|
1872
|
-
lastAgentMessage():
|
|
1872
|
+
lastAgentMessage(): AssistantModelMessage & {
|
|
1873
1873
|
traceId?: string;
|
|
1874
1874
|
};
|
|
1875
|
-
lastToolCall(toolName: string):
|
|
1875
|
+
lastToolCall(toolName: string): ToolModelMessage & {
|
|
1876
1876
|
traceId?: string;
|
|
1877
1877
|
};
|
|
1878
1878
|
hasToolCall(toolName: string): boolean;
|
|
@@ -1957,14 +1957,14 @@ declare namespace runner {
|
|
|
1957
1957
|
/**
|
|
1958
1958
|
* Add a specific message to the conversation.
|
|
1959
1959
|
*
|
|
1960
|
-
* This function allows you to inject any
|
|
1960
|
+
* This function allows you to inject any ModelMessage compatible message directly
|
|
1961
1961
|
* into the conversation at a specific point in the script. Useful for
|
|
1962
1962
|
* simulating tool responses, system messages, or specific conversational states.
|
|
1963
1963
|
*
|
|
1964
1964
|
* @param message The message to add to the conversation.
|
|
1965
1965
|
* @returns A ScriptStep function that can be used in scenario scripts.
|
|
1966
1966
|
*/
|
|
1967
|
-
declare const message: (message:
|
|
1967
|
+
declare const message: (message: ModelMessage) => ScriptStep;
|
|
1968
1968
|
/**
|
|
1969
1969
|
* Generate or specify an agent response in the conversation.
|
|
1970
1970
|
*
|
|
@@ -1976,7 +1976,7 @@ declare const message: (message: CoreMessage) => ScriptStep;
|
|
|
1976
1976
|
* If undefined, the agent under test will generate content automatically.
|
|
1977
1977
|
* @returns A ScriptStep function that can be used in scenario scripts.
|
|
1978
1978
|
*/
|
|
1979
|
-
declare const agent: (content?: string |
|
|
1979
|
+
declare const agent: (content?: string | ModelMessage) => ScriptStep;
|
|
1980
1980
|
/**
|
|
1981
1981
|
* Invoke the judge agent to evaluate the current conversation state.
|
|
1982
1982
|
*
|
|
@@ -1988,7 +1988,7 @@ declare const agent: (content?: string | CoreMessage) => ScriptStep;
|
|
|
1988
1988
|
* the judge evaluate based on its criteria.
|
|
1989
1989
|
* @returns A ScriptStep function that can be used in scenario scripts.
|
|
1990
1990
|
*/
|
|
1991
|
-
declare const judge: (content?: string |
|
|
1991
|
+
declare const judge: (content?: string | ModelMessage) => ScriptStep;
|
|
1992
1992
|
/**
|
|
1993
1993
|
* Generate or specify a user message in the conversation.
|
|
1994
1994
|
*
|
|
@@ -2000,7 +2000,7 @@ declare const judge: (content?: string | CoreMessage) => ScriptStep;
|
|
|
2000
2000
|
* If undefined, the user simulator will generate content automatically.
|
|
2001
2001
|
* @returns A ScriptStep function that can be used in scenario scripts.
|
|
2002
2002
|
*/
|
|
2003
|
-
declare const user: (content?: string |
|
|
2003
|
+
declare const user: (content?: string | ModelMessage) => ScriptStep;
|
|
2004
2004
|
/**
|
|
2005
2005
|
* Let the scenario proceed automatically for a specified number of turns.
|
|
2006
2006
|
*
|
package/dist/index.d.ts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import * as ai from 'ai';
|
|
2
|
-
import {
|
|
2
|
+
import { ModelMessage, UserModelMessage, AssistantModelMessage, ToolModelMessage, LanguageModel, generateText } from 'ai';
|
|
3
3
|
import { z } from 'zod/v4';
|
|
4
4
|
import { SpanProcessor, ReadableSpan } from '@opentelemetry/sdk-trace-base';
|
|
5
5
|
import { RealtimeSession } from '@openai/agents/realtime';
|
|
@@ -8,11 +8,11 @@ import { z as z$1 } from 'zod';
|
|
|
8
8
|
|
|
9
9
|
/**
|
|
10
10
|
* The possible return types from an agent's `call` method.
|
|
11
|
-
* - string |
|
|
11
|
+
* - string | ModelMessage | ModelMessage[]: Agent generated response
|
|
12
12
|
* - JudgeResult: Judge made a final decision
|
|
13
13
|
* - null: Judge wants to continue observing (no decision yet)
|
|
14
14
|
*/
|
|
15
|
-
type AgentReturnTypes = string |
|
|
15
|
+
type AgentReturnTypes = string | ModelMessage | ModelMessage[] | JudgeResult | null;
|
|
16
16
|
|
|
17
17
|
declare enum AgentRole {
|
|
18
18
|
USER = "User",
|
|
@@ -31,11 +31,11 @@ interface AgentInput {
|
|
|
31
31
|
/**
|
|
32
32
|
* The full history of messages in the conversation.
|
|
33
33
|
*/
|
|
34
|
-
messages:
|
|
34
|
+
messages: ModelMessage[];
|
|
35
35
|
/**
|
|
36
36
|
* New messages added since the last time this agent was called.
|
|
37
37
|
*/
|
|
38
|
-
newMessages:
|
|
38
|
+
newMessages: ModelMessage[];
|
|
39
39
|
/**
|
|
40
40
|
* The role the agent is being asked to play in this turn.
|
|
41
41
|
*/
|
|
@@ -191,7 +191,7 @@ interface ScenarioExecutionLike {
|
|
|
191
191
|
/**
|
|
192
192
|
* The history of messages in the conversation.
|
|
193
193
|
*/
|
|
194
|
-
readonly messages:
|
|
194
|
+
readonly messages: ModelMessage[];
|
|
195
195
|
/**
|
|
196
196
|
* The ID of the conversation thread.
|
|
197
197
|
*/
|
|
@@ -200,25 +200,25 @@ interface ScenarioExecutionLike {
|
|
|
200
200
|
* Adds a message to the conversation.
|
|
201
201
|
* @param message The message to add.
|
|
202
202
|
*/
|
|
203
|
-
message(message:
|
|
203
|
+
message(message: ModelMessage): Promise<void>;
|
|
204
204
|
/**
|
|
205
205
|
* Adds a user message to the conversation.
|
|
206
206
|
* If no content is provided, the user simulator will generate a message.
|
|
207
207
|
* @param content The content of the user message.
|
|
208
208
|
*/
|
|
209
|
-
user(content?: string |
|
|
209
|
+
user(content?: string | ModelMessage): Promise<void>;
|
|
210
210
|
/**
|
|
211
211
|
* Adds an agent message to the conversation.
|
|
212
212
|
* If no content is provided, the agent under test will generate a message.
|
|
213
213
|
* @param content The content of the agent message.
|
|
214
214
|
*/
|
|
215
|
-
agent(content?: string |
|
|
215
|
+
agent(content?: string | ModelMessage): Promise<void>;
|
|
216
216
|
/**
|
|
217
217
|
* Invokes the judge agent to evaluate the current state.
|
|
218
218
|
* @param content Optional message to the judge.
|
|
219
219
|
* @returns The result of the scenario if the judge makes a final decision.
|
|
220
220
|
*/
|
|
221
|
-
judge(content?: string |
|
|
221
|
+
judge(content?: string | ModelMessage): Promise<ScenarioResult | null>;
|
|
222
222
|
/**
|
|
223
223
|
* Proceeds with the scenario automatically for a number of turns.
|
|
224
224
|
* @param turns The number of turns to proceed. Defaults to running until the scenario ends.
|
|
@@ -258,7 +258,7 @@ interface ScenarioResult {
|
|
|
258
258
|
/**
|
|
259
259
|
* The sequence of messages exchanged during the scenario.
|
|
260
260
|
*/
|
|
261
|
-
messages:
|
|
261
|
+
messages: ModelMessage[];
|
|
262
262
|
/**
|
|
263
263
|
* The reasoning behind the scenario's outcome.
|
|
264
264
|
*/
|
|
@@ -299,7 +299,7 @@ interface ScenarioExecutionStateLike {
|
|
|
299
299
|
/**
|
|
300
300
|
* The sequence of messages exchanged during the scenario.
|
|
301
301
|
*/
|
|
302
|
-
get messages():
|
|
302
|
+
get messages(): ModelMessage[];
|
|
303
303
|
/**
|
|
304
304
|
* The unique identifier for the execution thread.
|
|
305
305
|
*/
|
|
@@ -313,28 +313,28 @@ interface ScenarioExecutionStateLike {
|
|
|
313
313
|
*
|
|
314
314
|
* @param message - The core message to add.
|
|
315
315
|
*/
|
|
316
|
-
addMessage(message:
|
|
316
|
+
addMessage(message: ModelMessage): void;
|
|
317
317
|
/**
|
|
318
318
|
* Retrieves the last message from the execution state.
|
|
319
319
|
* @returns The last message.
|
|
320
320
|
*/
|
|
321
|
-
lastMessage():
|
|
321
|
+
lastMessage(): ModelMessage;
|
|
322
322
|
/**
|
|
323
323
|
* Retrieves the last user message from the execution state.
|
|
324
324
|
* @returns The last user message.
|
|
325
325
|
*/
|
|
326
|
-
lastUserMessage():
|
|
326
|
+
lastUserMessage(): UserModelMessage;
|
|
327
327
|
/**
|
|
328
328
|
* Retrieves the last agent message from the execution state.
|
|
329
329
|
* @returns The last agent message.
|
|
330
330
|
*/
|
|
331
|
-
lastAgentMessage():
|
|
331
|
+
lastAgentMessage(): AssistantModelMessage;
|
|
332
332
|
/**
|
|
333
333
|
* Retrieves the last tool call message for a specific tool.
|
|
334
334
|
* @param toolName - The name of the tool.
|
|
335
335
|
* @returns The last tool call message.
|
|
336
336
|
*/
|
|
337
|
-
lastToolCall(toolName: string):
|
|
337
|
+
lastToolCall(toolName: string): ToolModelMessage;
|
|
338
338
|
/**
|
|
339
339
|
* Checks if a tool call for a specific tool exists in the execution state.
|
|
340
340
|
* @param toolName - The name of the tool.
|
|
@@ -1847,7 +1847,7 @@ declare class ScenarioExecutionState implements ScenarioExecutionStateLike {
|
|
|
1847
1847
|
description: string;
|
|
1848
1848
|
config: ScenarioConfig;
|
|
1849
1849
|
constructor(config: ScenarioConfig);
|
|
1850
|
-
get messages():
|
|
1850
|
+
get messages(): ModelMessage[];
|
|
1851
1851
|
get currentTurn(): number;
|
|
1852
1852
|
set currentTurn(turn: number);
|
|
1853
1853
|
get threadId(): string;
|
|
@@ -1858,10 +1858,10 @@ declare class ScenarioExecutionState implements ScenarioExecutionStateLike {
|
|
|
1858
1858
|
* @param message - The message to add.
|
|
1859
1859
|
* @param traceId - Optional trace ID to associate with the message.
|
|
1860
1860
|
*/
|
|
1861
|
-
addMessage(message:
|
|
1861
|
+
addMessage(message: ModelMessage & {
|
|
1862
1862
|
traceId?: string;
|
|
1863
1863
|
}): void;
|
|
1864
|
-
lastMessage():
|
|
1864
|
+
lastMessage(): ModelMessage & {
|
|
1865
1865
|
id: string;
|
|
1866
1866
|
traceId?: string;
|
|
1867
1867
|
};
|
|
@@ -1869,10 +1869,10 @@ declare class ScenarioExecutionState implements ScenarioExecutionStateLike {
|
|
|
1869
1869
|
id: string;
|
|
1870
1870
|
traceId?: string;
|
|
1871
1871
|
};
|
|
1872
|
-
lastAgentMessage():
|
|
1872
|
+
lastAgentMessage(): AssistantModelMessage & {
|
|
1873
1873
|
traceId?: string;
|
|
1874
1874
|
};
|
|
1875
|
-
lastToolCall(toolName: string):
|
|
1875
|
+
lastToolCall(toolName: string): ToolModelMessage & {
|
|
1876
1876
|
traceId?: string;
|
|
1877
1877
|
};
|
|
1878
1878
|
hasToolCall(toolName: string): boolean;
|
|
@@ -1957,14 +1957,14 @@ declare namespace runner {
|
|
|
1957
1957
|
/**
|
|
1958
1958
|
* Add a specific message to the conversation.
|
|
1959
1959
|
*
|
|
1960
|
-
* This function allows you to inject any
|
|
1960
|
+
* This function allows you to inject any ModelMessage compatible message directly
|
|
1961
1961
|
* into the conversation at a specific point in the script. Useful for
|
|
1962
1962
|
* simulating tool responses, system messages, or specific conversational states.
|
|
1963
1963
|
*
|
|
1964
1964
|
* @param message The message to add to the conversation.
|
|
1965
1965
|
* @returns A ScriptStep function that can be used in scenario scripts.
|
|
1966
1966
|
*/
|
|
1967
|
-
declare const message: (message:
|
|
1967
|
+
declare const message: (message: ModelMessage) => ScriptStep;
|
|
1968
1968
|
/**
|
|
1969
1969
|
* Generate or specify an agent response in the conversation.
|
|
1970
1970
|
*
|
|
@@ -1976,7 +1976,7 @@ declare const message: (message: CoreMessage) => ScriptStep;
|
|
|
1976
1976
|
* If undefined, the agent under test will generate content automatically.
|
|
1977
1977
|
* @returns A ScriptStep function that can be used in scenario scripts.
|
|
1978
1978
|
*/
|
|
1979
|
-
declare const agent: (content?: string |
|
|
1979
|
+
declare const agent: (content?: string | ModelMessage) => ScriptStep;
|
|
1980
1980
|
/**
|
|
1981
1981
|
* Invoke the judge agent to evaluate the current conversation state.
|
|
1982
1982
|
*
|
|
@@ -1988,7 +1988,7 @@ declare const agent: (content?: string | CoreMessage) => ScriptStep;
|
|
|
1988
1988
|
* the judge evaluate based on its criteria.
|
|
1989
1989
|
* @returns A ScriptStep function that can be used in scenario scripts.
|
|
1990
1990
|
*/
|
|
1991
|
-
declare const judge: (content?: string |
|
|
1991
|
+
declare const judge: (content?: string | ModelMessage) => ScriptStep;
|
|
1992
1992
|
/**
|
|
1993
1993
|
* Generate or specify a user message in the conversation.
|
|
1994
1994
|
*
|
|
@@ -2000,7 +2000,7 @@ declare const judge: (content?: string | CoreMessage) => ScriptStep;
|
|
|
2000
2000
|
* If undefined, the user simulator will generate content automatically.
|
|
2001
2001
|
* @returns A ScriptStep function that can be used in scenario scripts.
|
|
2002
2002
|
*/
|
|
2003
|
-
declare const user: (content?: string |
|
|
2003
|
+
declare const user: (content?: string | ModelMessage) => ScriptStep;
|
|
2004
2004
|
/**
|
|
2005
2005
|
* Let the scenario proceed automatically for a specified number of turns.
|
|
2006
2006
|
*
|
package/dist/index.js
CHANGED
|
@@ -189,7 +189,7 @@ var DEFAULT_TEMPERATURE = 0;
|
|
|
189
189
|
var modelSchema = import_v42.z.object({
|
|
190
190
|
model: import_v42.z.custom((val) => Boolean(val), {
|
|
191
191
|
message: "A model is required. Configure it in scenario.config.js defaultModel or pass directly to the agent."
|
|
192
|
-
}).describe("
|
|
192
|
+
}).describe("Language model that is used by the AI SDK Core functions."),
|
|
193
193
|
temperature: import_v42.z.number().min(0).max(1).optional().describe("The temperature for the language model.").default(DEFAULT_TEMPERATURE),
|
|
194
194
|
maxTokens: import_v42.z.number().optional().describe("The maximum number of tokens to generate.")
|
|
195
195
|
});
|
|
@@ -455,7 +455,7 @@ var JudgeUtils = {
|
|
|
455
455
|
/**
|
|
456
456
|
* Builds a minimal transcript from messages for judge evaluation.
|
|
457
457
|
* Truncates base64 media to reduce token usage.
|
|
458
|
-
* @param messages - Array of
|
|
458
|
+
* @param messages - Array of ModelMessage from conversation
|
|
459
459
|
* @returns Plain text transcript with one message per line
|
|
460
460
|
*/
|
|
461
461
|
buildTranscriptFromMessages(messages) {
|
|
@@ -2466,13 +2466,15 @@ function convertModelMessagesToAguiMessages(modelMessages) {
|
|
|
2466
2466
|
}
|
|
2467
2467
|
case msg.role === "tool":
|
|
2468
2468
|
msg.content.map((p, i) => {
|
|
2469
|
-
|
|
2469
|
+
if ("type" in p && p.type !== "tool-result") return;
|
|
2470
2470
|
aguiMessages.push({
|
|
2471
2471
|
trace_id: msg.traceId,
|
|
2472
2472
|
id: `${id}-${i}`,
|
|
2473
2473
|
role: "tool",
|
|
2474
2474
|
toolCallId: p.toolCallId,
|
|
2475
|
-
content: JSON.stringify(
|
|
2475
|
+
content: JSON.stringify(
|
|
2476
|
+
p.output && "value" in p.output ? p.output.value : p.output
|
|
2477
|
+
)
|
|
2476
2478
|
});
|
|
2477
2479
|
});
|
|
2478
2480
|
break;
|
|
@@ -3962,7 +3964,6 @@ function formatPart(part) {
|
|
|
3962
3964
|
case "reasoning":
|
|
3963
3965
|
return `(reasoning): ${part.text}`;
|
|
3964
3966
|
default:
|
|
3965
|
-
part;
|
|
3966
3967
|
return `Unknown content: ${JSON.stringify(part)}`;
|
|
3967
3968
|
}
|
|
3968
3969
|
}
|
package/dist/index.mjs
CHANGED
|
@@ -131,7 +131,7 @@ var DEFAULT_TEMPERATURE = 0;
|
|
|
131
131
|
var modelSchema = z2.object({
|
|
132
132
|
model: z2.custom((val) => Boolean(val), {
|
|
133
133
|
message: "A model is required. Configure it in scenario.config.js defaultModel or pass directly to the agent."
|
|
134
|
-
}).describe("
|
|
134
|
+
}).describe("Language model that is used by the AI SDK Core functions."),
|
|
135
135
|
temperature: z2.number().min(0).max(1).optional().describe("The temperature for the language model.").default(DEFAULT_TEMPERATURE),
|
|
136
136
|
maxTokens: z2.number().optional().describe("The maximum number of tokens to generate.")
|
|
137
137
|
});
|
|
@@ -397,7 +397,7 @@ var JudgeUtils = {
|
|
|
397
397
|
/**
|
|
398
398
|
* Builds a minimal transcript from messages for judge evaluation.
|
|
399
399
|
* Truncates base64 media to reduce token usage.
|
|
400
|
-
* @param messages - Array of
|
|
400
|
+
* @param messages - Array of ModelMessage from conversation
|
|
401
401
|
* @returns Plain text transcript with one message per line
|
|
402
402
|
*/
|
|
403
403
|
buildTranscriptFromMessages(messages) {
|
|
@@ -2408,13 +2408,15 @@ function convertModelMessagesToAguiMessages(modelMessages) {
|
|
|
2408
2408
|
}
|
|
2409
2409
|
case msg.role === "tool":
|
|
2410
2410
|
msg.content.map((p, i) => {
|
|
2411
|
-
|
|
2411
|
+
if ("type" in p && p.type !== "tool-result") return;
|
|
2412
2412
|
aguiMessages.push({
|
|
2413
2413
|
trace_id: msg.traceId,
|
|
2414
2414
|
id: `${id}-${i}`,
|
|
2415
2415
|
role: "tool",
|
|
2416
2416
|
toolCallId: p.toolCallId,
|
|
2417
|
-
content: JSON.stringify(
|
|
2417
|
+
content: JSON.stringify(
|
|
2418
|
+
p.output && "value" in p.output ? p.output.value : p.output
|
|
2419
|
+
)
|
|
2418
2420
|
});
|
|
2419
2421
|
});
|
|
2420
2422
|
break;
|
|
@@ -3911,7 +3913,6 @@ function formatPart(part) {
|
|
|
3911
3913
|
case "reasoning":
|
|
3912
3914
|
return `(reasoning): ${part.text}`;
|
|
3913
3915
|
default:
|
|
3914
|
-
part;
|
|
3915
3916
|
return `Unknown content: ${JSON.stringify(part)}`;
|
|
3916
3917
|
}
|
|
3917
3918
|
}
|
|
@@ -104,7 +104,7 @@ var DEFAULT_TEMPERATURE = 0;
|
|
|
104
104
|
var modelSchema = import_v42.z.object({
|
|
105
105
|
model: import_v42.z.custom((val) => Boolean(val), {
|
|
106
106
|
message: "A model is required. Configure it in scenario.config.js defaultModel or pass directly to the agent."
|
|
107
|
-
}).describe("
|
|
107
|
+
}).describe("Language model that is used by the AI SDK Core functions."),
|
|
108
108
|
temperature: import_v42.z.number().min(0).max(1).optional().describe("The temperature for the language model.").default(DEFAULT_TEMPERATURE),
|
|
109
109
|
maxTokens: import_v42.z.number().optional().describe("The maximum number of tokens to generate.")
|
|
110
110
|
});
|
|
@@ -87,7 +87,7 @@ var DEFAULT_TEMPERATURE = 0;
|
|
|
87
87
|
var modelSchema = z2.object({
|
|
88
88
|
model: z2.custom((val) => Boolean(val), {
|
|
89
89
|
message: "A model is required. Configure it in scenario.config.js defaultModel or pass directly to the agent."
|
|
90
|
-
}).describe("
|
|
90
|
+
}).describe("Language model that is used by the AI SDK Core functions."),
|
|
91
91
|
temperature: z2.number().min(0).max(1).optional().describe("The temperature for the language model.").default(DEFAULT_TEMPERATURE),
|
|
92
92
|
maxTokens: z2.number().optional().describe("The maximum number of tokens to generate.")
|
|
93
93
|
});
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@langwatch/scenario",
|
|
3
|
-
"version": "0.4.
|
|
3
|
+
"version": "0.4.1",
|
|
4
4
|
"description": "A TypeScript library for testing AI agents using scenarios",
|
|
5
5
|
"main": "dist/index.js",
|
|
6
6
|
"module": "dist/index.mjs",
|
|
@@ -31,7 +31,7 @@
|
|
|
31
31
|
"@ag-ui/core": "^0.0.28",
|
|
32
32
|
"@ai-sdk/openai": "^2.0.74",
|
|
33
33
|
"@openai/agents": "^0.3.3",
|
|
34
|
-
"ai": "
|
|
34
|
+
"ai": "^6.0.0",
|
|
35
35
|
"chalk": "^5.6.2",
|
|
36
36
|
"langwatch": "0.9.0",
|
|
37
37
|
"open": "11.0.0",
|
|
@@ -88,7 +88,7 @@
|
|
|
88
88
|
}
|
|
89
89
|
},
|
|
90
90
|
"peerDependencies": {
|
|
91
|
-
"ai": ">=
|
|
91
|
+
"ai": ">=6.0.0",
|
|
92
92
|
"vitest": ">=3.2.4"
|
|
93
93
|
},
|
|
94
94
|
"scripts": {
|