@langwatch/scenario 0.2.13 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +35 -8
- package/dist/{chunk-6SKQWXT7.mjs → chunk-3Z7E24UI.mjs} +26 -6
- package/dist/{chunk-OL4RFXV4.mjs → chunk-RHTLQKEJ.mjs} +1 -1
- package/dist/index.d.mts +109 -130
- package/dist/index.d.ts +109 -130
- package/dist/index.js +95 -75
- package/dist/index.mjs +23 -23
- package/dist/integrations/vitest/reporter.js +14 -10
- package/dist/integrations/vitest/reporter.mjs +7 -3
- package/dist/integrations/vitest/setup.js +70 -50
- package/dist/integrations/vitest/setup.mjs +2 -2
- package/package.json +6 -5
package/dist/index.js
CHANGED
|
@@ -67,7 +67,7 @@ __export(agents_exports, {
|
|
|
67
67
|
|
|
68
68
|
// src/agents/judge-agent.ts
|
|
69
69
|
var import_ai = require("ai");
|
|
70
|
-
var
|
|
70
|
+
var import_v43 = require("zod/v4");
|
|
71
71
|
|
|
72
72
|
// src/domain/index.ts
|
|
73
73
|
var domain_exports = {};
|
|
@@ -85,15 +85,15 @@ __export(domain_exports, {
|
|
|
85
85
|
});
|
|
86
86
|
|
|
87
87
|
// src/domain/core/config.ts
|
|
88
|
-
var
|
|
88
|
+
var import_v4 = require("zod/v4");
|
|
89
89
|
var DEFAULT_TEMPERATURE = 0;
|
|
90
|
-
var scenarioProjectConfigSchema =
|
|
91
|
-
defaultModel:
|
|
92
|
-
model:
|
|
93
|
-
temperature:
|
|
94
|
-
maxTokens:
|
|
90
|
+
var scenarioProjectConfigSchema = import_v4.z.object({
|
|
91
|
+
defaultModel: import_v4.z.object({
|
|
92
|
+
model: import_v4.z.custom(),
|
|
93
|
+
temperature: import_v4.z.number().min(0).max(1).optional().default(DEFAULT_TEMPERATURE),
|
|
94
|
+
maxTokens: import_v4.z.number().optional()
|
|
95
95
|
}).optional(),
|
|
96
|
-
headless:
|
|
96
|
+
headless: import_v4.z.boolean().optional().default(
|
|
97
97
|
typeof process !== "undefined" ? !["false", "0"].includes(process.env.SCENARIO_HEADLESS || "false") : false
|
|
98
98
|
)
|
|
99
99
|
}).strict();
|
|
@@ -183,7 +183,7 @@ var criterionToParamName = (criterion) => {
|
|
|
183
183
|
};
|
|
184
184
|
|
|
185
185
|
// src/config/env.ts
|
|
186
|
-
var
|
|
186
|
+
var import_v42 = require("zod/v4");
|
|
187
187
|
|
|
188
188
|
// src/config/log-levels.ts
|
|
189
189
|
var LogLevel = /* @__PURE__ */ ((LogLevel2) => {
|
|
@@ -196,37 +196,37 @@ var LogLevel = /* @__PURE__ */ ((LogLevel2) => {
|
|
|
196
196
|
var LOG_LEVELS = Object.values(LogLevel);
|
|
197
197
|
|
|
198
198
|
// src/config/env.ts
|
|
199
|
-
var envSchema =
|
|
199
|
+
var envSchema = import_v42.z.object({
|
|
200
200
|
/**
|
|
201
201
|
* LangWatch API key for event reporting.
|
|
202
202
|
* If not provided, events will not be sent to LangWatch.
|
|
203
203
|
*/
|
|
204
|
-
LANGWATCH_API_KEY:
|
|
204
|
+
LANGWATCH_API_KEY: import_v42.z.string().optional(),
|
|
205
205
|
/**
|
|
206
206
|
* LangWatch endpoint URL for event reporting.
|
|
207
207
|
* Defaults to the production LangWatch endpoint.
|
|
208
208
|
*/
|
|
209
|
-
LANGWATCH_ENDPOINT:
|
|
209
|
+
LANGWATCH_ENDPOINT: import_v42.z.string().url().optional().default("https://app.langwatch.ai"),
|
|
210
210
|
/**
|
|
211
211
|
* Disables simulation report info messages when set to any truthy value.
|
|
212
212
|
* Useful for CI/CD environments or when you want cleaner output.
|
|
213
213
|
*/
|
|
214
|
-
SCENARIO_DISABLE_SIMULATION_REPORT_INFO:
|
|
214
|
+
SCENARIO_DISABLE_SIMULATION_REPORT_INFO: import_v42.z.string().optional().transform((val) => Boolean(val)),
|
|
215
215
|
/**
|
|
216
216
|
* Node environment - affects logging and behavior.
|
|
217
217
|
* Defaults to 'development' if not specified.
|
|
218
218
|
*/
|
|
219
|
-
NODE_ENV:
|
|
219
|
+
NODE_ENV: import_v42.z.enum(["development", "production", "test"]).default("development"),
|
|
220
220
|
/**
|
|
221
221
|
* Case-insensitive log level for the scenario package.
|
|
222
222
|
* Defaults to 'info' if not specified.
|
|
223
223
|
*/
|
|
224
|
-
LOG_LEVEL:
|
|
224
|
+
LOG_LEVEL: import_v42.z.string().toUpperCase().pipe(import_v42.z.nativeEnum(LogLevel)).optional().default("INFO" /* INFO */),
|
|
225
225
|
/**
|
|
226
226
|
* Scenario batch run ID.
|
|
227
227
|
* If not provided, a random ID will be generated.
|
|
228
228
|
*/
|
|
229
|
-
SCENARIO_BATCH_RUN_ID:
|
|
229
|
+
SCENARIO_BATCH_RUN_ID: import_v42.z.string().optional()
|
|
230
230
|
});
|
|
231
231
|
function getEnv() {
|
|
232
232
|
return envSchema.parse(process.env);
|
|
@@ -423,24 +423,24 @@ ${criteriaList}
|
|
|
423
423
|
function buildContinueTestTool() {
|
|
424
424
|
return (0, import_ai.tool)({
|
|
425
425
|
description: "Continue the test with the next step",
|
|
426
|
-
|
|
426
|
+
inputSchema: import_v43.z.object({})
|
|
427
427
|
});
|
|
428
428
|
}
|
|
429
429
|
function buildFinishTestTool(criteria) {
|
|
430
430
|
const criteriaNames = criteria.map(criterionToParamName);
|
|
431
431
|
return (0, import_ai.tool)({
|
|
432
432
|
description: "Complete the test with a final verdict",
|
|
433
|
-
|
|
434
|
-
criteria:
|
|
433
|
+
inputSchema: import_v43.z.object({
|
|
434
|
+
criteria: import_v43.z.object(
|
|
435
435
|
Object.fromEntries(
|
|
436
436
|
criteriaNames.map((name, idx) => [
|
|
437
437
|
name,
|
|
438
|
-
|
|
438
|
+
import_v43.z.enum(["true", "false", "inconclusive"]).describe(criteria[idx])
|
|
439
439
|
])
|
|
440
440
|
)
|
|
441
441
|
).strict().describe("Strict verdict for each criterion"),
|
|
442
|
-
reasoning:
|
|
443
|
-
verdict:
|
|
442
|
+
reasoning: import_v43.z.string().describe("Explanation of what the final verdict should be"),
|
|
443
|
+
verdict: import_v43.z.enum(["success", "failure", "inconclusive"]).describe("The final verdict of the test")
|
|
444
444
|
})
|
|
445
445
|
});
|
|
446
446
|
}
|
|
@@ -488,7 +488,7 @@ var JudgeAgent = class extends JudgeAgentAdapter {
|
|
|
488
488
|
model: mergedConfig.model,
|
|
489
489
|
messages,
|
|
490
490
|
temperature: mergedConfig.temperature ?? 0,
|
|
491
|
-
|
|
491
|
+
maxOutputTokens: mergedConfig.maxTokens,
|
|
492
492
|
tools,
|
|
493
493
|
toolChoice
|
|
494
494
|
});
|
|
@@ -497,7 +497,7 @@ var JudgeAgent = class extends JudgeAgentAdapter {
|
|
|
497
497
|
const toolCall = completion.toolCalls[0];
|
|
498
498
|
switch (toolCall.toolName) {
|
|
499
499
|
case "finish_test": {
|
|
500
|
-
args = toolCall.
|
|
500
|
+
args = toolCall.input;
|
|
501
501
|
const verdict = args.verdict || "inconclusive";
|
|
502
502
|
const reasoning = args.reasoning || "No reasoning provided";
|
|
503
503
|
const criteria = args.criteria || {};
|
|
@@ -595,7 +595,7 @@ var UserSimulatorAgent = class extends UserSimulatorAgentAdapter {
|
|
|
595
595
|
model: mergedConfig.model,
|
|
596
596
|
messages: reversedMessages,
|
|
597
597
|
temperature: mergedConfig.temperature ?? DEFAULT_TEMPERATURE,
|
|
598
|
-
|
|
598
|
+
maxOutputTokens: mergedConfig.maxTokens
|
|
599
599
|
});
|
|
600
600
|
const messageContent = completion.text;
|
|
601
601
|
if (!messageContent) {
|
|
@@ -769,7 +769,7 @@ var ScenarioExecutionState = class {
|
|
|
769
769
|
|
|
770
770
|
// src/events/schema.ts
|
|
771
771
|
var import_core = require("@ag-ui/core");
|
|
772
|
-
var
|
|
772
|
+
var import_zod = require("zod");
|
|
773
773
|
var Verdict = /* @__PURE__ */ ((Verdict2) => {
|
|
774
774
|
Verdict2["SUCCESS"] = "success";
|
|
775
775
|
Verdict2["FAILURE"] = "failure";
|
|
@@ -785,64 +785,64 @@ var ScenarioRunStatus = /* @__PURE__ */ ((ScenarioRunStatus2) => {
|
|
|
785
785
|
ScenarioRunStatus2["FAILED"] = "FAILED";
|
|
786
786
|
return ScenarioRunStatus2;
|
|
787
787
|
})(ScenarioRunStatus || {});
|
|
788
|
-
var baseEventSchema =
|
|
789
|
-
type:
|
|
790
|
-
timestamp:
|
|
791
|
-
rawEvent:
|
|
788
|
+
var baseEventSchema = import_zod.z.object({
|
|
789
|
+
type: import_zod.z.nativeEnum(import_core.EventType),
|
|
790
|
+
timestamp: import_zod.z.number(),
|
|
791
|
+
rawEvent: import_zod.z.any().optional()
|
|
792
792
|
});
|
|
793
|
-
var batchRunIdSchema =
|
|
794
|
-
var scenarioRunIdSchema =
|
|
795
|
-
var scenarioIdSchema =
|
|
793
|
+
var batchRunIdSchema = import_zod.z.string();
|
|
794
|
+
var scenarioRunIdSchema = import_zod.z.string();
|
|
795
|
+
var scenarioIdSchema = import_zod.z.string();
|
|
796
796
|
var baseScenarioEventSchema = baseEventSchema.extend({
|
|
797
797
|
batchRunId: batchRunIdSchema,
|
|
798
798
|
scenarioId: scenarioIdSchema,
|
|
799
799
|
scenarioRunId: scenarioRunIdSchema,
|
|
800
|
-
scenarioSetId:
|
|
800
|
+
scenarioSetId: import_zod.z.string().optional().default("default")
|
|
801
801
|
});
|
|
802
802
|
var scenarioRunStartedSchema = baseScenarioEventSchema.extend({
|
|
803
|
-
type:
|
|
804
|
-
metadata:
|
|
805
|
-
name:
|
|
806
|
-
description:
|
|
803
|
+
type: import_zod.z.literal("SCENARIO_RUN_STARTED" /* RUN_STARTED */),
|
|
804
|
+
metadata: import_zod.z.object({
|
|
805
|
+
name: import_zod.z.string().optional(),
|
|
806
|
+
description: import_zod.z.string().optional()
|
|
807
807
|
})
|
|
808
808
|
});
|
|
809
|
-
var scenarioResultsSchema =
|
|
810
|
-
verdict:
|
|
811
|
-
reasoning:
|
|
812
|
-
metCriteria:
|
|
813
|
-
unmetCriteria:
|
|
814
|
-
error:
|
|
809
|
+
var scenarioResultsSchema = import_zod.z.object({
|
|
810
|
+
verdict: import_zod.z.nativeEnum(Verdict),
|
|
811
|
+
reasoning: import_zod.z.string().optional(),
|
|
812
|
+
metCriteria: import_zod.z.array(import_zod.z.string()),
|
|
813
|
+
unmetCriteria: import_zod.z.array(import_zod.z.string()),
|
|
814
|
+
error: import_zod.z.string().optional()
|
|
815
815
|
});
|
|
816
816
|
var scenarioRunFinishedSchema = baseScenarioEventSchema.extend({
|
|
817
|
-
type:
|
|
818
|
-
status:
|
|
817
|
+
type: import_zod.z.literal("SCENARIO_RUN_FINISHED" /* RUN_FINISHED */),
|
|
818
|
+
status: import_zod.z.nativeEnum(ScenarioRunStatus),
|
|
819
819
|
results: scenarioResultsSchema.optional().nullable()
|
|
820
820
|
});
|
|
821
821
|
var scenarioMessageSnapshotSchema = import_core.MessagesSnapshotEventSchema.merge(
|
|
822
822
|
baseScenarioEventSchema.extend({
|
|
823
|
-
type:
|
|
823
|
+
type: import_zod.z.literal("SCENARIO_MESSAGE_SNAPSHOT" /* MESSAGE_SNAPSHOT */)
|
|
824
824
|
})
|
|
825
825
|
);
|
|
826
|
-
var scenarioEventSchema =
|
|
826
|
+
var scenarioEventSchema = import_zod.z.discriminatedUnion("type", [
|
|
827
827
|
scenarioRunStartedSchema,
|
|
828
828
|
scenarioRunFinishedSchema,
|
|
829
829
|
scenarioMessageSnapshotSchema
|
|
830
830
|
]);
|
|
831
|
-
var successSchema =
|
|
832
|
-
var errorSchema =
|
|
833
|
-
var stateSchema =
|
|
834
|
-
state:
|
|
835
|
-
messages:
|
|
836
|
-
status:
|
|
831
|
+
var successSchema = import_zod.z.object({ success: import_zod.z.boolean() });
|
|
832
|
+
var errorSchema = import_zod.z.object({ error: import_zod.z.string() });
|
|
833
|
+
var stateSchema = import_zod.z.object({
|
|
834
|
+
state: import_zod.z.object({
|
|
835
|
+
messages: import_zod.z.array(import_zod.z.any()),
|
|
836
|
+
status: import_zod.z.string()
|
|
837
837
|
})
|
|
838
838
|
});
|
|
839
|
-
var runsSchema =
|
|
840
|
-
var eventsSchema =
|
|
839
|
+
var runsSchema = import_zod.z.object({ runs: import_zod.z.array(import_zod.z.string()) });
|
|
840
|
+
var eventsSchema = import_zod.z.object({ events: import_zod.z.array(scenarioEventSchema) });
|
|
841
841
|
|
|
842
842
|
// src/utils/convert-core-messages-to-agui-messages.ts
|
|
843
|
-
function
|
|
843
|
+
function convertModelMessagesToAguiMessages(modelMessages) {
|
|
844
844
|
const aguiMessages = [];
|
|
845
|
-
for (const msg of
|
|
845
|
+
for (const msg of modelMessages) {
|
|
846
846
|
const id = "id" in msg && typeof msg.id === "string" ? msg.id : generateMessageId();
|
|
847
847
|
switch (true) {
|
|
848
848
|
case msg.role === "system":
|
|
@@ -886,7 +886,7 @@ function convertCoreMessagesToAguiMessages(coreMessages) {
|
|
|
886
886
|
type: "function",
|
|
887
887
|
function: {
|
|
888
888
|
name: c.toolName,
|
|
889
|
-
arguments: JSON.stringify(c.
|
|
889
|
+
arguments: JSON.stringify(c.input)
|
|
890
890
|
}
|
|
891
891
|
}))
|
|
892
892
|
});
|
|
@@ -894,11 +894,12 @@ function convertCoreMessagesToAguiMessages(coreMessages) {
|
|
|
894
894
|
}
|
|
895
895
|
case msg.role === "tool":
|
|
896
896
|
msg.content.map((p, i) => {
|
|
897
|
+
var _a;
|
|
897
898
|
aguiMessages.push({
|
|
898
899
|
id: `${id}-${i}`,
|
|
899
900
|
role: "tool",
|
|
900
901
|
toolCallId: p.toolCallId,
|
|
901
|
-
content: JSON.stringify(p.
|
|
902
|
+
content: JSON.stringify((_a = p.output) == null ? void 0 : _a.value)
|
|
902
903
|
});
|
|
903
904
|
});
|
|
904
905
|
break;
|
|
@@ -908,7 +909,7 @@ function convertCoreMessagesToAguiMessages(coreMessages) {
|
|
|
908
909
|
}
|
|
909
910
|
return aguiMessages;
|
|
910
911
|
}
|
|
911
|
-
var convert_core_messages_to_agui_messages_default =
|
|
912
|
+
var convert_core_messages_to_agui_messages_default = convertModelMessagesToAguiMessages;
|
|
912
913
|
|
|
913
914
|
// src/execution/scenario-execution.ts
|
|
914
915
|
var ScenarioExecution = class {
|
|
@@ -974,7 +975,7 @@ var ScenarioExecution = class {
|
|
|
974
975
|
/**
|
|
975
976
|
* Gets the complete conversation history as an array of messages.
|
|
976
977
|
*
|
|
977
|
-
* @returns Array of
|
|
978
|
+
* @returns Array of ModelMessage objects representing the full conversation
|
|
978
979
|
*/
|
|
979
980
|
get messages() {
|
|
980
981
|
return this.state.messages;
|
|
@@ -1207,7 +1208,7 @@ var ScenarioExecution = class {
|
|
|
1207
1208
|
* - "assistant" messages are routed to AGENT role agents
|
|
1208
1209
|
* - Other message types are added directly to the conversation
|
|
1209
1210
|
*
|
|
1210
|
-
* @param message - The
|
|
1211
|
+
* @param message - The ModelMessage to add to the conversation
|
|
1211
1212
|
*
|
|
1212
1213
|
* @example
|
|
1213
1214
|
* ```typescript
|
|
@@ -1236,7 +1237,7 @@ var ScenarioExecution = class {
|
|
|
1236
1237
|
*
|
|
1237
1238
|
* This method is part of the ScenarioExecutionLike interface used by script steps.
|
|
1238
1239
|
*
|
|
1239
|
-
* @param content - Optional content for the user's message. Can be a string or
|
|
1240
|
+
* @param content - Optional content for the user's message. Can be a string or ModelMessage.
|
|
1240
1241
|
* If not provided, the user simulator agent will generate the content.
|
|
1241
1242
|
*
|
|
1242
1243
|
* @example
|
|
@@ -1247,7 +1248,7 @@ var ScenarioExecution = class {
|
|
|
1247
1248
|
* // Let user simulator generate content
|
|
1248
1249
|
* await execution.user();
|
|
1249
1250
|
*
|
|
1250
|
-
* // Use a
|
|
1251
|
+
* // Use a ModelMessage object
|
|
1251
1252
|
* await execution.user({
|
|
1252
1253
|
* role: "user",
|
|
1253
1254
|
* content: "Tell me a joke"
|
|
@@ -1266,7 +1267,7 @@ var ScenarioExecution = class {
|
|
|
1266
1267
|
*
|
|
1267
1268
|
* This method is part of the ScenarioExecutionLike interface used by script steps.
|
|
1268
1269
|
*
|
|
1269
|
-
* @param content - Optional content for the agent's response. Can be a string or
|
|
1270
|
+
* @param content - Optional content for the agent's response. Can be a string or ModelMessage.
|
|
1270
1271
|
* If not provided, the agent under test will generate the response.
|
|
1271
1272
|
*
|
|
1272
1273
|
* @example
|
|
@@ -1277,7 +1278,7 @@ var ScenarioExecution = class {
|
|
|
1277
1278
|
* // Use provided content
|
|
1278
1279
|
* await execution.agent("The weather is sunny today!");
|
|
1279
1280
|
*
|
|
1280
|
-
* // Use a
|
|
1281
|
+
* // Use a ModelMessage object
|
|
1281
1282
|
* await execution.agent({
|
|
1282
1283
|
* role: "assistant",
|
|
1283
1284
|
* content: "I'm here to help you with weather information."
|
|
@@ -1886,9 +1887,27 @@ __export(runner_exports, {
|
|
|
1886
1887
|
var import_rxjs3 = require("rxjs");
|
|
1887
1888
|
|
|
1888
1889
|
// src/events/event-alert-message-logger.ts
|
|
1890
|
+
var fs2 = __toESM(require("fs"));
|
|
1891
|
+
var os = __toESM(require("os"));
|
|
1892
|
+
var path2 = __toESM(require("path"));
|
|
1889
1893
|
var import_open = __toESM(require("open"));
|
|
1890
|
-
var EventAlertMessageLogger = class
|
|
1891
|
-
|
|
1894
|
+
var EventAlertMessageLogger = class {
|
|
1895
|
+
/**
|
|
1896
|
+
* Creates a coordination file to prevent duplicate messages across processes.
|
|
1897
|
+
* Returns true if this process should show the message (first one to create the file).
|
|
1898
|
+
*/
|
|
1899
|
+
createCoordinationFile(type) {
|
|
1900
|
+
try {
|
|
1901
|
+
const batchId = getBatchRunId();
|
|
1902
|
+
const tmpDir = os.tmpdir();
|
|
1903
|
+
const fileName = `scenario-${type}-${batchId}`;
|
|
1904
|
+
const filePath = path2.join(tmpDir, fileName);
|
|
1905
|
+
fs2.writeFileSync(filePath, process.pid.toString(), { flag: "wx" });
|
|
1906
|
+
return true;
|
|
1907
|
+
} catch {
|
|
1908
|
+
return false;
|
|
1909
|
+
}
|
|
1910
|
+
}
|
|
1892
1911
|
/**
|
|
1893
1912
|
* Shows a fancy greeting message about simulation reporting status.
|
|
1894
1913
|
* Only shows once per batch run to avoid spam.
|
|
@@ -1897,10 +1916,9 @@ var EventAlertMessageLogger = class _EventAlertMessageLogger {
|
|
|
1897
1916
|
if (this.isGreetingDisabled()) {
|
|
1898
1917
|
return;
|
|
1899
1918
|
}
|
|
1900
|
-
if (
|
|
1919
|
+
if (!this.createCoordinationFile("greeting")) {
|
|
1901
1920
|
return;
|
|
1902
1921
|
}
|
|
1903
|
-
_EventAlertMessageLogger.shownBatchIds.add(getBatchRunId());
|
|
1904
1922
|
this.displayGreeting();
|
|
1905
1923
|
}
|
|
1906
1924
|
/**
|
|
@@ -1911,6 +1929,9 @@ var EventAlertMessageLogger = class _EventAlertMessageLogger {
|
|
|
1911
1929
|
if (this.isGreetingDisabled()) {
|
|
1912
1930
|
return;
|
|
1913
1931
|
}
|
|
1932
|
+
if (!this.createCoordinationFile(`watch-${params.scenarioSetId}`)) {
|
|
1933
|
+
return;
|
|
1934
|
+
}
|
|
1914
1935
|
await this.displayWatchMessage(params);
|
|
1915
1936
|
}
|
|
1916
1937
|
isGreetingDisabled() {
|
|
@@ -2254,14 +2275,13 @@ function formatPart(part) {
|
|
|
2254
2275
|
case "file":
|
|
2255
2276
|
return `(file): ${part.filename} ${typeof part.data === "string" ? `url:${part.data}` : "base64:omitted"}`;
|
|
2256
2277
|
case "tool-call":
|
|
2257
|
-
return `(tool call): ${part.toolName} id:${part.toolCallId} args:(${JSON.stringify(part.
|
|
2278
|
+
return `(tool call): ${part.toolName} id:${part.toolCallId} args:(${JSON.stringify(part.input)})`;
|
|
2258
2279
|
case "tool-result":
|
|
2259
|
-
return `(tool result): ${part.toolName} id:${part.toolCallId} result:(${JSON.stringify(part.
|
|
2280
|
+
return `(tool result): ${part.toolName} id:${part.toolCallId} result:(${JSON.stringify(part.output)})`;
|
|
2260
2281
|
case "reasoning":
|
|
2261
2282
|
return `(reasoning): ${part.text}`;
|
|
2262
|
-
case "redacted-reasoning":
|
|
2263
|
-
return `(redacted reasoning): ${part.data}`;
|
|
2264
2283
|
default:
|
|
2284
|
+
part;
|
|
2265
2285
|
return `Unknown content: ${JSON.stringify(part)}`;
|
|
2266
2286
|
}
|
|
2267
2287
|
}
|
package/dist/index.mjs
CHANGED
|
@@ -17,11 +17,11 @@ import {
|
|
|
17
17
|
getBatchRunId,
|
|
18
18
|
getProjectConfig,
|
|
19
19
|
scenarioProjectConfigSchema
|
|
20
|
-
} from "./chunk-
|
|
20
|
+
} from "./chunk-3Z7E24UI.mjs";
|
|
21
21
|
import {
|
|
22
22
|
Logger,
|
|
23
23
|
getEnv
|
|
24
|
-
} from "./chunk-
|
|
24
|
+
} from "./chunk-RHTLQKEJ.mjs";
|
|
25
25
|
import {
|
|
26
26
|
__export
|
|
27
27
|
} from "./chunk-7P6ASYW6.mjs";
|
|
@@ -35,7 +35,7 @@ __export(agents_exports, {
|
|
|
35
35
|
|
|
36
36
|
// src/agents/judge-agent.ts
|
|
37
37
|
import { generateText, tool } from "ai";
|
|
38
|
-
import { z } from "zod";
|
|
38
|
+
import { z } from "zod/v4";
|
|
39
39
|
|
|
40
40
|
// src/agents/utils.ts
|
|
41
41
|
var toolMessageRole = "tool";
|
|
@@ -142,14 +142,14 @@ ${criteriaList}
|
|
|
142
142
|
function buildContinueTestTool() {
|
|
143
143
|
return tool({
|
|
144
144
|
description: "Continue the test with the next step",
|
|
145
|
-
|
|
145
|
+
inputSchema: z.object({})
|
|
146
146
|
});
|
|
147
147
|
}
|
|
148
148
|
function buildFinishTestTool(criteria) {
|
|
149
149
|
const criteriaNames = criteria.map(criterionToParamName);
|
|
150
150
|
return tool({
|
|
151
151
|
description: "Complete the test with a final verdict",
|
|
152
|
-
|
|
152
|
+
inputSchema: z.object({
|
|
153
153
|
criteria: z.object(
|
|
154
154
|
Object.fromEntries(
|
|
155
155
|
criteriaNames.map((name, idx) => [
|
|
@@ -207,7 +207,7 @@ var JudgeAgent = class extends JudgeAgentAdapter {
|
|
|
207
207
|
model: mergedConfig.model,
|
|
208
208
|
messages,
|
|
209
209
|
temperature: mergedConfig.temperature ?? 0,
|
|
210
|
-
|
|
210
|
+
maxOutputTokens: mergedConfig.maxTokens,
|
|
211
211
|
tools,
|
|
212
212
|
toolChoice
|
|
213
213
|
});
|
|
@@ -216,7 +216,7 @@ var JudgeAgent = class extends JudgeAgentAdapter {
|
|
|
216
216
|
const toolCall = completion.toolCalls[0];
|
|
217
217
|
switch (toolCall.toolName) {
|
|
218
218
|
case "finish_test": {
|
|
219
|
-
args = toolCall.
|
|
219
|
+
args = toolCall.input;
|
|
220
220
|
const verdict = args.verdict || "inconclusive";
|
|
221
221
|
const reasoning = args.reasoning || "No reasoning provided";
|
|
222
222
|
const criteria = args.criteria || {};
|
|
@@ -314,7 +314,7 @@ var UserSimulatorAgent = class extends UserSimulatorAgentAdapter {
|
|
|
314
314
|
model: mergedConfig.model,
|
|
315
315
|
messages: reversedMessages,
|
|
316
316
|
temperature: mergedConfig.temperature ?? DEFAULT_TEMPERATURE,
|
|
317
|
-
|
|
317
|
+
maxOutputTokens: mergedConfig.maxTokens
|
|
318
318
|
});
|
|
319
319
|
const messageContent = completion.text;
|
|
320
320
|
if (!messageContent) {
|
|
@@ -441,9 +441,9 @@ var ScenarioExecutionState = class {
|
|
|
441
441
|
};
|
|
442
442
|
|
|
443
443
|
// src/utils/convert-core-messages-to-agui-messages.ts
|
|
444
|
-
function
|
|
444
|
+
function convertModelMessagesToAguiMessages(modelMessages) {
|
|
445
445
|
const aguiMessages = [];
|
|
446
|
-
for (const msg of
|
|
446
|
+
for (const msg of modelMessages) {
|
|
447
447
|
const id = "id" in msg && typeof msg.id === "string" ? msg.id : generateMessageId();
|
|
448
448
|
switch (true) {
|
|
449
449
|
case msg.role === "system":
|
|
@@ -487,7 +487,7 @@ function convertCoreMessagesToAguiMessages(coreMessages) {
|
|
|
487
487
|
type: "function",
|
|
488
488
|
function: {
|
|
489
489
|
name: c.toolName,
|
|
490
|
-
arguments: JSON.stringify(c.
|
|
490
|
+
arguments: JSON.stringify(c.input)
|
|
491
491
|
}
|
|
492
492
|
}))
|
|
493
493
|
});
|
|
@@ -495,11 +495,12 @@ function convertCoreMessagesToAguiMessages(coreMessages) {
|
|
|
495
495
|
}
|
|
496
496
|
case msg.role === "tool":
|
|
497
497
|
msg.content.map((p, i) => {
|
|
498
|
+
var _a;
|
|
498
499
|
aguiMessages.push({
|
|
499
500
|
id: `${id}-${i}`,
|
|
500
501
|
role: "tool",
|
|
501
502
|
toolCallId: p.toolCallId,
|
|
502
|
-
content: JSON.stringify(p.
|
|
503
|
+
content: JSON.stringify((_a = p.output) == null ? void 0 : _a.value)
|
|
503
504
|
});
|
|
504
505
|
});
|
|
505
506
|
break;
|
|
@@ -509,7 +510,7 @@ function convertCoreMessagesToAguiMessages(coreMessages) {
|
|
|
509
510
|
}
|
|
510
511
|
return aguiMessages;
|
|
511
512
|
}
|
|
512
|
-
var convert_core_messages_to_agui_messages_default =
|
|
513
|
+
var convert_core_messages_to_agui_messages_default = convertModelMessagesToAguiMessages;
|
|
513
514
|
|
|
514
515
|
// src/execution/scenario-execution.ts
|
|
515
516
|
var ScenarioExecution = class {
|
|
@@ -575,7 +576,7 @@ var ScenarioExecution = class {
|
|
|
575
576
|
/**
|
|
576
577
|
* Gets the complete conversation history as an array of messages.
|
|
577
578
|
*
|
|
578
|
-
* @returns Array of
|
|
579
|
+
* @returns Array of ModelMessage objects representing the full conversation
|
|
579
580
|
*/
|
|
580
581
|
get messages() {
|
|
581
582
|
return this.state.messages;
|
|
@@ -808,7 +809,7 @@ var ScenarioExecution = class {
|
|
|
808
809
|
* - "assistant" messages are routed to AGENT role agents
|
|
809
810
|
* - Other message types are added directly to the conversation
|
|
810
811
|
*
|
|
811
|
-
* @param message - The
|
|
812
|
+
* @param message - The ModelMessage to add to the conversation
|
|
812
813
|
*
|
|
813
814
|
* @example
|
|
814
815
|
* ```typescript
|
|
@@ -837,7 +838,7 @@ var ScenarioExecution = class {
|
|
|
837
838
|
*
|
|
838
839
|
* This method is part of the ScenarioExecutionLike interface used by script steps.
|
|
839
840
|
*
|
|
840
|
-
* @param content - Optional content for the user's message. Can be a string or
|
|
841
|
+
* @param content - Optional content for the user's message. Can be a string or ModelMessage.
|
|
841
842
|
* If not provided, the user simulator agent will generate the content.
|
|
842
843
|
*
|
|
843
844
|
* @example
|
|
@@ -848,7 +849,7 @@ var ScenarioExecution = class {
|
|
|
848
849
|
* // Let user simulator generate content
|
|
849
850
|
* await execution.user();
|
|
850
851
|
*
|
|
851
|
-
* // Use a
|
|
852
|
+
* // Use a ModelMessage object
|
|
852
853
|
* await execution.user({
|
|
853
854
|
* role: "user",
|
|
854
855
|
* content: "Tell me a joke"
|
|
@@ -867,7 +868,7 @@ var ScenarioExecution = class {
|
|
|
867
868
|
*
|
|
868
869
|
* This method is part of the ScenarioExecutionLike interface used by script steps.
|
|
869
870
|
*
|
|
870
|
-
* @param content - Optional content for the agent's response. Can be a string or
|
|
871
|
+
* @param content - Optional content for the agent's response. Can be a string or ModelMessage.
|
|
871
872
|
* If not provided, the agent under test will generate the response.
|
|
872
873
|
*
|
|
873
874
|
* @example
|
|
@@ -878,7 +879,7 @@ var ScenarioExecution = class {
|
|
|
878
879
|
* // Use provided content
|
|
879
880
|
* await execution.agent("The weather is sunny today!");
|
|
880
881
|
*
|
|
881
|
-
* // Use a
|
|
882
|
+
* // Use a ModelMessage object
|
|
882
883
|
* await execution.agent({
|
|
883
884
|
* role: "assistant",
|
|
884
885
|
* content: "I'm here to help you with weather information."
|
|
@@ -1600,14 +1601,13 @@ function formatPart(part) {
|
|
|
1600
1601
|
case "file":
|
|
1601
1602
|
return `(file): ${part.filename} ${typeof part.data === "string" ? `url:${part.data}` : "base64:omitted"}`;
|
|
1602
1603
|
case "tool-call":
|
|
1603
|
-
return `(tool call): ${part.toolName} id:${part.toolCallId} args:(${JSON.stringify(part.
|
|
1604
|
+
return `(tool call): ${part.toolName} id:${part.toolCallId} args:(${JSON.stringify(part.input)})`;
|
|
1604
1605
|
case "tool-result":
|
|
1605
|
-
return `(tool result): ${part.toolName} id:${part.toolCallId} result:(${JSON.stringify(part.
|
|
1606
|
+
return `(tool result): ${part.toolName} id:${part.toolCallId} result:(${JSON.stringify(part.output)})`;
|
|
1606
1607
|
case "reasoning":
|
|
1607
1608
|
return `(reasoning): ${part.text}`;
|
|
1608
|
-
case "redacted-reasoning":
|
|
1609
|
-
return `(redacted reasoning): ${part.data}`;
|
|
1610
1609
|
default:
|
|
1610
|
+
part;
|
|
1611
1611
|
return `Unknown content: ${JSON.stringify(part)}`;
|
|
1612
1612
|
}
|
|
1613
1613
|
}
|
|
@@ -38,7 +38,7 @@ var import_path = __toESM(require("path"));
|
|
|
38
38
|
var import_chalk = __toESM(require("chalk"));
|
|
39
39
|
|
|
40
40
|
// src/config/env.ts
|
|
41
|
-
var
|
|
41
|
+
var import_v4 = require("zod/v4");
|
|
42
42
|
|
|
43
43
|
// src/config/log-levels.ts
|
|
44
44
|
var LogLevel = /* @__PURE__ */ ((LogLevel2) => {
|
|
@@ -51,37 +51,37 @@ var LogLevel = /* @__PURE__ */ ((LogLevel2) => {
|
|
|
51
51
|
var LOG_LEVELS = Object.values(LogLevel);
|
|
52
52
|
|
|
53
53
|
// src/config/env.ts
|
|
54
|
-
var envSchema =
|
|
54
|
+
var envSchema = import_v4.z.object({
|
|
55
55
|
/**
|
|
56
56
|
* LangWatch API key for event reporting.
|
|
57
57
|
* If not provided, events will not be sent to LangWatch.
|
|
58
58
|
*/
|
|
59
|
-
LANGWATCH_API_KEY:
|
|
59
|
+
LANGWATCH_API_KEY: import_v4.z.string().optional(),
|
|
60
60
|
/**
|
|
61
61
|
* LangWatch endpoint URL for event reporting.
|
|
62
62
|
* Defaults to the production LangWatch endpoint.
|
|
63
63
|
*/
|
|
64
|
-
LANGWATCH_ENDPOINT:
|
|
64
|
+
LANGWATCH_ENDPOINT: import_v4.z.string().url().optional().default("https://app.langwatch.ai"),
|
|
65
65
|
/**
|
|
66
66
|
* Disables simulation report info messages when set to any truthy value.
|
|
67
67
|
* Useful for CI/CD environments or when you want cleaner output.
|
|
68
68
|
*/
|
|
69
|
-
SCENARIO_DISABLE_SIMULATION_REPORT_INFO:
|
|
69
|
+
SCENARIO_DISABLE_SIMULATION_REPORT_INFO: import_v4.z.string().optional().transform((val) => Boolean(val)),
|
|
70
70
|
/**
|
|
71
71
|
* Node environment - affects logging and behavior.
|
|
72
72
|
* Defaults to 'development' if not specified.
|
|
73
73
|
*/
|
|
74
|
-
NODE_ENV:
|
|
74
|
+
NODE_ENV: import_v4.z.enum(["development", "production", "test"]).default("development"),
|
|
75
75
|
/**
|
|
76
76
|
* Case-insensitive log level for the scenario package.
|
|
77
77
|
* Defaults to 'info' if not specified.
|
|
78
78
|
*/
|
|
79
|
-
LOG_LEVEL:
|
|
79
|
+
LOG_LEVEL: import_v4.z.string().toUpperCase().pipe(import_v4.z.nativeEnum(LogLevel)).optional().default("INFO" /* INFO */),
|
|
80
80
|
/**
|
|
81
81
|
* Scenario batch run ID.
|
|
82
82
|
* If not provided, a random ID will be generated.
|
|
83
83
|
*/
|
|
84
|
-
SCENARIO_BATCH_RUN_ID:
|
|
84
|
+
SCENARIO_BATCH_RUN_ID: import_v4.z.string().optional()
|
|
85
85
|
});
|
|
86
86
|
function getEnv() {
|
|
87
87
|
return envSchema.parse(process.env);
|
|
@@ -321,8 +321,12 @@ ${indent(parsedJson)}
|
|
|
321
321
|
console.log();
|
|
322
322
|
console.log(import_chalk.default.bold.cyan("=== Scenario Test Report ==="));
|
|
323
323
|
console.log(`Total Scenarios: ${total}`);
|
|
324
|
-
console.log(
|
|
325
|
-
|
|
324
|
+
console.log(
|
|
325
|
+
passed > 0 ? import_chalk.default.green(`Passed: ${passed}`) : `Passed: ${passed}`
|
|
326
|
+
);
|
|
327
|
+
console.log(
|
|
328
|
+
failed > 0 ? import_chalk.default.red(`Failed: ${failed}`) : `Failed: ${failed}`
|
|
329
|
+
);
|
|
326
330
|
console.log(`Success Rate: ${import_chalk.default.bold(`${successRate}%`)}`);
|
|
327
331
|
this.results.forEach((r, i) => {
|
|
328
332
|
const statusColor = r.status === "SUCCESS" ? import_chalk.default.green : import_chalk.default.red;
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import {
|
|
2
2
|
Logger
|
|
3
|
-
} from "../../chunk-
|
|
3
|
+
} from "../../chunk-RHTLQKEJ.mjs";
|
|
4
4
|
import "../../chunk-7P6ASYW6.mjs";
|
|
5
5
|
|
|
6
6
|
// src/integrations/vitest/reporter.ts
|
|
@@ -161,8 +161,12 @@ ${indent(parsedJson)}
|
|
|
161
161
|
console.log();
|
|
162
162
|
console.log(chalk.bold.cyan("=== Scenario Test Report ==="));
|
|
163
163
|
console.log(`Total Scenarios: ${total}`);
|
|
164
|
-
console.log(
|
|
165
|
-
|
|
164
|
+
console.log(
|
|
165
|
+
passed > 0 ? chalk.green(`Passed: ${passed}`) : `Passed: ${passed}`
|
|
166
|
+
);
|
|
167
|
+
console.log(
|
|
168
|
+
failed > 0 ? chalk.red(`Failed: ${failed}`) : `Failed: ${failed}`
|
|
169
|
+
);
|
|
166
170
|
console.log(`Success Rate: ${chalk.bold(`${successRate}%`)}`);
|
|
167
171
|
this.results.forEach((r, i) => {
|
|
168
172
|
const statusColor = r.status === "SUCCESS" ? chalk.green : chalk.red;
|