@langwatch/scenario 0.2.12 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +35 -8
- package/dist/{chunk-7HLDX5EL.mjs → chunk-3Z7E24UI.mjs} +52 -43
- package/dist/{chunk-OL4RFXV4.mjs → chunk-RHTLQKEJ.mjs} +1 -1
- package/dist/index.d.mts +110 -128
- package/dist/index.d.ts +110 -128
- package/dist/index.js +118 -111
- package/dist/index.mjs +23 -23
- package/dist/integrations/vitest/reporter.js +14 -10
- package/dist/integrations/vitest/reporter.mjs +7 -3
- package/dist/integrations/vitest/setup-global.js +1 -1
- package/dist/integrations/vitest/setup-global.mjs +1 -1
- package/dist/integrations/vitest/setup.js +155 -90
- package/dist/integrations/vitest/setup.mjs +2 -2
- package/package.json +8 -6
package/dist/index.mjs
CHANGED
|
@@ -17,11 +17,11 @@ import {
|
|
|
17
17
|
getBatchRunId,
|
|
18
18
|
getProjectConfig,
|
|
19
19
|
scenarioProjectConfigSchema
|
|
20
|
-
} from "./chunk-
|
|
20
|
+
} from "./chunk-3Z7E24UI.mjs";
|
|
21
21
|
import {
|
|
22
22
|
Logger,
|
|
23
23
|
getEnv
|
|
24
|
-
} from "./chunk-
|
|
24
|
+
} from "./chunk-RHTLQKEJ.mjs";
|
|
25
25
|
import {
|
|
26
26
|
__export
|
|
27
27
|
} from "./chunk-7P6ASYW6.mjs";
|
|
@@ -35,7 +35,7 @@ __export(agents_exports, {
|
|
|
35
35
|
|
|
36
36
|
// src/agents/judge-agent.ts
|
|
37
37
|
import { generateText, tool } from "ai";
|
|
38
|
-
import { z } from "zod";
|
|
38
|
+
import { z } from "zod/v4";
|
|
39
39
|
|
|
40
40
|
// src/agents/utils.ts
|
|
41
41
|
var toolMessageRole = "tool";
|
|
@@ -142,14 +142,14 @@ ${criteriaList}
|
|
|
142
142
|
function buildContinueTestTool() {
|
|
143
143
|
return tool({
|
|
144
144
|
description: "Continue the test with the next step",
|
|
145
|
-
|
|
145
|
+
inputSchema: z.object({})
|
|
146
146
|
});
|
|
147
147
|
}
|
|
148
148
|
function buildFinishTestTool(criteria) {
|
|
149
149
|
const criteriaNames = criteria.map(criterionToParamName);
|
|
150
150
|
return tool({
|
|
151
151
|
description: "Complete the test with a final verdict",
|
|
152
|
-
|
|
152
|
+
inputSchema: z.object({
|
|
153
153
|
criteria: z.object(
|
|
154
154
|
Object.fromEntries(
|
|
155
155
|
criteriaNames.map((name, idx) => [
|
|
@@ -207,7 +207,7 @@ var JudgeAgent = class extends JudgeAgentAdapter {
|
|
|
207
207
|
model: mergedConfig.model,
|
|
208
208
|
messages,
|
|
209
209
|
temperature: mergedConfig.temperature ?? 0,
|
|
210
|
-
|
|
210
|
+
maxOutputTokens: mergedConfig.maxTokens,
|
|
211
211
|
tools,
|
|
212
212
|
toolChoice
|
|
213
213
|
});
|
|
@@ -216,7 +216,7 @@ var JudgeAgent = class extends JudgeAgentAdapter {
|
|
|
216
216
|
const toolCall = completion.toolCalls[0];
|
|
217
217
|
switch (toolCall.toolName) {
|
|
218
218
|
case "finish_test": {
|
|
219
|
-
args = toolCall.
|
|
219
|
+
args = toolCall.input;
|
|
220
220
|
const verdict = args.verdict || "inconclusive";
|
|
221
221
|
const reasoning = args.reasoning || "No reasoning provided";
|
|
222
222
|
const criteria = args.criteria || {};
|
|
@@ -314,7 +314,7 @@ var UserSimulatorAgent = class extends UserSimulatorAgentAdapter {
|
|
|
314
314
|
model: mergedConfig.model,
|
|
315
315
|
messages: reversedMessages,
|
|
316
316
|
temperature: mergedConfig.temperature ?? DEFAULT_TEMPERATURE,
|
|
317
|
-
|
|
317
|
+
maxOutputTokens: mergedConfig.maxTokens
|
|
318
318
|
});
|
|
319
319
|
const messageContent = completion.text;
|
|
320
320
|
if (!messageContent) {
|
|
@@ -441,9 +441,9 @@ var ScenarioExecutionState = class {
|
|
|
441
441
|
};
|
|
442
442
|
|
|
443
443
|
// src/utils/convert-core-messages-to-agui-messages.ts
|
|
444
|
-
function
|
|
444
|
+
function convertModelMessagesToAguiMessages(modelMessages) {
|
|
445
445
|
const aguiMessages = [];
|
|
446
|
-
for (const msg of
|
|
446
|
+
for (const msg of modelMessages) {
|
|
447
447
|
const id = "id" in msg && typeof msg.id === "string" ? msg.id : generateMessageId();
|
|
448
448
|
switch (true) {
|
|
449
449
|
case msg.role === "system":
|
|
@@ -487,7 +487,7 @@ function convertCoreMessagesToAguiMessages(coreMessages) {
|
|
|
487
487
|
type: "function",
|
|
488
488
|
function: {
|
|
489
489
|
name: c.toolName,
|
|
490
|
-
arguments: JSON.stringify(c.
|
|
490
|
+
arguments: JSON.stringify(c.input)
|
|
491
491
|
}
|
|
492
492
|
}))
|
|
493
493
|
});
|
|
@@ -495,11 +495,12 @@ function convertCoreMessagesToAguiMessages(coreMessages) {
|
|
|
495
495
|
}
|
|
496
496
|
case msg.role === "tool":
|
|
497
497
|
msg.content.map((p, i) => {
|
|
498
|
+
var _a;
|
|
498
499
|
aguiMessages.push({
|
|
499
500
|
id: `${id}-${i}`,
|
|
500
501
|
role: "tool",
|
|
501
502
|
toolCallId: p.toolCallId,
|
|
502
|
-
content: JSON.stringify(p.
|
|
503
|
+
content: JSON.stringify((_a = p.output) == null ? void 0 : _a.value)
|
|
503
504
|
});
|
|
504
505
|
});
|
|
505
506
|
break;
|
|
@@ -509,7 +510,7 @@ function convertCoreMessagesToAguiMessages(coreMessages) {
|
|
|
509
510
|
}
|
|
510
511
|
return aguiMessages;
|
|
511
512
|
}
|
|
512
|
-
var convert_core_messages_to_agui_messages_default =
|
|
513
|
+
var convert_core_messages_to_agui_messages_default = convertModelMessagesToAguiMessages;
|
|
513
514
|
|
|
514
515
|
// src/execution/scenario-execution.ts
|
|
515
516
|
var ScenarioExecution = class {
|
|
@@ -575,7 +576,7 @@ var ScenarioExecution = class {
|
|
|
575
576
|
/**
|
|
576
577
|
* Gets the complete conversation history as an array of messages.
|
|
577
578
|
*
|
|
578
|
-
* @returns Array of
|
|
579
|
+
* @returns Array of ModelMessage objects representing the full conversation
|
|
579
580
|
*/
|
|
580
581
|
get messages() {
|
|
581
582
|
return this.state.messages;
|
|
@@ -808,7 +809,7 @@ var ScenarioExecution = class {
|
|
|
808
809
|
* - "assistant" messages are routed to AGENT role agents
|
|
809
810
|
* - Other message types are added directly to the conversation
|
|
810
811
|
*
|
|
811
|
-
* @param message - The
|
|
812
|
+
* @param message - The ModelMessage to add to the conversation
|
|
812
813
|
*
|
|
813
814
|
* @example
|
|
814
815
|
* ```typescript
|
|
@@ -837,7 +838,7 @@ var ScenarioExecution = class {
|
|
|
837
838
|
*
|
|
838
839
|
* This method is part of the ScenarioExecutionLike interface used by script steps.
|
|
839
840
|
*
|
|
840
|
-
* @param content - Optional content for the user's message. Can be a string or
|
|
841
|
+
* @param content - Optional content for the user's message. Can be a string or ModelMessage.
|
|
841
842
|
* If not provided, the user simulator agent will generate the content.
|
|
842
843
|
*
|
|
843
844
|
* @example
|
|
@@ -848,7 +849,7 @@ var ScenarioExecution = class {
|
|
|
848
849
|
* // Let user simulator generate content
|
|
849
850
|
* await execution.user();
|
|
850
851
|
*
|
|
851
|
-
* // Use a
|
|
852
|
+
* // Use a ModelMessage object
|
|
852
853
|
* await execution.user({
|
|
853
854
|
* role: "user",
|
|
854
855
|
* content: "Tell me a joke"
|
|
@@ -867,7 +868,7 @@ var ScenarioExecution = class {
|
|
|
867
868
|
*
|
|
868
869
|
* This method is part of the ScenarioExecutionLike interface used by script steps.
|
|
869
870
|
*
|
|
870
|
-
* @param content - Optional content for the agent's response. Can be a string or
|
|
871
|
+
* @param content - Optional content for the agent's response. Can be a string or ModelMessage.
|
|
871
872
|
* If not provided, the agent under test will generate the response.
|
|
872
873
|
*
|
|
873
874
|
* @example
|
|
@@ -878,7 +879,7 @@ var ScenarioExecution = class {
|
|
|
878
879
|
* // Use provided content
|
|
879
880
|
* await execution.agent("The weather is sunny today!");
|
|
880
881
|
*
|
|
881
|
-
* // Use a
|
|
882
|
+
* // Use a ModelMessage object
|
|
882
883
|
* await execution.agent({
|
|
883
884
|
* role: "assistant",
|
|
884
885
|
* content: "I'm here to help you with weather information."
|
|
@@ -1600,14 +1601,13 @@ function formatPart(part) {
|
|
|
1600
1601
|
case "file":
|
|
1601
1602
|
return `(file): ${part.filename} ${typeof part.data === "string" ? `url:${part.data}` : "base64:omitted"}`;
|
|
1602
1603
|
case "tool-call":
|
|
1603
|
-
return `(tool call): ${part.toolName} id:${part.toolCallId} args:(${JSON.stringify(part.
|
|
1604
|
+
return `(tool call): ${part.toolName} id:${part.toolCallId} args:(${JSON.stringify(part.input)})`;
|
|
1604
1605
|
case "tool-result":
|
|
1605
|
-
return `(tool result): ${part.toolName} id:${part.toolCallId} result:(${JSON.stringify(part.
|
|
1606
|
+
return `(tool result): ${part.toolName} id:${part.toolCallId} result:(${JSON.stringify(part.output)})`;
|
|
1606
1607
|
case "reasoning":
|
|
1607
1608
|
return `(reasoning): ${part.text}`;
|
|
1608
|
-
case "redacted-reasoning":
|
|
1609
|
-
return `(redacted reasoning): ${part.data}`;
|
|
1610
1609
|
default:
|
|
1610
|
+
part;
|
|
1611
1611
|
return `Unknown content: ${JSON.stringify(part)}`;
|
|
1612
1612
|
}
|
|
1613
1613
|
}
|
|
@@ -38,7 +38,7 @@ var import_path = __toESM(require("path"));
|
|
|
38
38
|
var import_chalk = __toESM(require("chalk"));
|
|
39
39
|
|
|
40
40
|
// src/config/env.ts
|
|
41
|
-
var
|
|
41
|
+
var import_v4 = require("zod/v4");
|
|
42
42
|
|
|
43
43
|
// src/config/log-levels.ts
|
|
44
44
|
var LogLevel = /* @__PURE__ */ ((LogLevel2) => {
|
|
@@ -51,37 +51,37 @@ var LogLevel = /* @__PURE__ */ ((LogLevel2) => {
|
|
|
51
51
|
var LOG_LEVELS = Object.values(LogLevel);
|
|
52
52
|
|
|
53
53
|
// src/config/env.ts
|
|
54
|
-
var envSchema =
|
|
54
|
+
var envSchema = import_v4.z.object({
|
|
55
55
|
/**
|
|
56
56
|
* LangWatch API key for event reporting.
|
|
57
57
|
* If not provided, events will not be sent to LangWatch.
|
|
58
58
|
*/
|
|
59
|
-
LANGWATCH_API_KEY:
|
|
59
|
+
LANGWATCH_API_KEY: import_v4.z.string().optional(),
|
|
60
60
|
/**
|
|
61
61
|
* LangWatch endpoint URL for event reporting.
|
|
62
62
|
* Defaults to the production LangWatch endpoint.
|
|
63
63
|
*/
|
|
64
|
-
LANGWATCH_ENDPOINT:
|
|
64
|
+
LANGWATCH_ENDPOINT: import_v4.z.string().url().optional().default("https://app.langwatch.ai"),
|
|
65
65
|
/**
|
|
66
66
|
* Disables simulation report info messages when set to any truthy value.
|
|
67
67
|
* Useful for CI/CD environments or when you want cleaner output.
|
|
68
68
|
*/
|
|
69
|
-
SCENARIO_DISABLE_SIMULATION_REPORT_INFO:
|
|
69
|
+
SCENARIO_DISABLE_SIMULATION_REPORT_INFO: import_v4.z.string().optional().transform((val) => Boolean(val)),
|
|
70
70
|
/**
|
|
71
71
|
* Node environment - affects logging and behavior.
|
|
72
72
|
* Defaults to 'development' if not specified.
|
|
73
73
|
*/
|
|
74
|
-
NODE_ENV:
|
|
74
|
+
NODE_ENV: import_v4.z.enum(["development", "production", "test"]).default("development"),
|
|
75
75
|
/**
|
|
76
76
|
* Case-insensitive log level for the scenario package.
|
|
77
77
|
* Defaults to 'info' if not specified.
|
|
78
78
|
*/
|
|
79
|
-
LOG_LEVEL:
|
|
79
|
+
LOG_LEVEL: import_v4.z.string().toUpperCase().pipe(import_v4.z.nativeEnum(LogLevel)).optional().default("INFO" /* INFO */),
|
|
80
80
|
/**
|
|
81
81
|
* Scenario batch run ID.
|
|
82
82
|
* If not provided, a random ID will be generated.
|
|
83
83
|
*/
|
|
84
|
-
SCENARIO_BATCH_RUN_ID:
|
|
84
|
+
SCENARIO_BATCH_RUN_ID: import_v4.z.string().optional()
|
|
85
85
|
});
|
|
86
86
|
function getEnv() {
|
|
87
87
|
return envSchema.parse(process.env);
|
|
@@ -321,8 +321,12 @@ ${indent(parsedJson)}
|
|
|
321
321
|
console.log();
|
|
322
322
|
console.log(import_chalk.default.bold.cyan("=== Scenario Test Report ==="));
|
|
323
323
|
console.log(`Total Scenarios: ${total}`);
|
|
324
|
-
console.log(
|
|
325
|
-
|
|
324
|
+
console.log(
|
|
325
|
+
passed > 0 ? import_chalk.default.green(`Passed: ${passed}`) : `Passed: ${passed}`
|
|
326
|
+
);
|
|
327
|
+
console.log(
|
|
328
|
+
failed > 0 ? import_chalk.default.red(`Failed: ${failed}`) : `Failed: ${failed}`
|
|
329
|
+
);
|
|
326
330
|
console.log(`Success Rate: ${import_chalk.default.bold(`${successRate}%`)}`);
|
|
327
331
|
this.results.forEach((r, i) => {
|
|
328
332
|
const statusColor = r.status === "SUCCESS" ? import_chalk.default.green : import_chalk.default.red;
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import {
|
|
2
2
|
Logger
|
|
3
|
-
} from "../../chunk-
|
|
3
|
+
} from "../../chunk-RHTLQKEJ.mjs";
|
|
4
4
|
import "../../chunk-7P6ASYW6.mjs";
|
|
5
5
|
|
|
6
6
|
// src/integrations/vitest/reporter.ts
|
|
@@ -161,8 +161,12 @@ ${indent(parsedJson)}
|
|
|
161
161
|
console.log();
|
|
162
162
|
console.log(chalk.bold.cyan("=== Scenario Test Report ==="));
|
|
163
163
|
console.log(`Total Scenarios: ${total}`);
|
|
164
|
-
console.log(
|
|
165
|
-
|
|
164
|
+
console.log(
|
|
165
|
+
passed > 0 ? chalk.green(`Passed: ${passed}`) : `Passed: ${passed}`
|
|
166
|
+
);
|
|
167
|
+
console.log(
|
|
168
|
+
failed > 0 ? chalk.red(`Failed: ${failed}`) : `Failed: ${failed}`
|
|
169
|
+
);
|
|
166
170
|
console.log(`Success Rate: ${chalk.bold(`${successRate}%`)}`);
|
|
167
171
|
this.results.forEach((r, i) => {
|
|
168
172
|
const statusColor = r.status === "SUCCESS" ? chalk.green : chalk.red;
|
|
@@ -25,6 +25,6 @@ __export(setup_global_exports, {
|
|
|
25
25
|
module.exports = __toCommonJS(setup_global_exports);
|
|
26
26
|
var import_xksuid = require("xksuid");
|
|
27
27
|
function setup() {
|
|
28
|
-
const scenarioBatchRunId = `
|
|
28
|
+
const scenarioBatchRunId = `scenariobatch_${(0, import_xksuid.generate)()}`;
|
|
29
29
|
process.env.SCENARIO_BATCH_RUN_ID = scenarioBatchRunId;
|
|
30
30
|
}
|
|
@@ -3,7 +3,7 @@ import "../../chunk-7P6ASYW6.mjs";
|
|
|
3
3
|
// src/integrations/vitest/setup-global.ts
|
|
4
4
|
import { generate } from "xksuid";
|
|
5
5
|
function setup() {
|
|
6
|
-
const scenarioBatchRunId = `
|
|
6
|
+
const scenarioBatchRunId = `scenariobatch_${generate()}`;
|
|
7
7
|
process.env.SCENARIO_BATCH_RUN_ID = scenarioBatchRunId;
|
|
8
8
|
}
|
|
9
9
|
export {
|