@langwatch/scenario 0.2.0 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/chunk-NUZZAQV2.mjs +622 -0
- package/dist/index.d.mts +65 -15
- package/dist/index.d.ts +65 -15
- package/dist/index.js +317 -97
- package/dist/index.mjs +122 -173
- package/dist/integrations/vitest/setup.js +282 -105
- package/dist/integrations/vitest/setup.mjs +1 -1
- package/package.json +4 -3
- package/dist/chunk-ORWSJC5F.mjs +0 -309
package/dist/index.mjs
CHANGED
|
@@ -1,7 +1,23 @@
|
|
|
1
1
|
import {
|
|
2
|
+
AgentAdapter,
|
|
3
|
+
AgentRole,
|
|
4
|
+
DEFAULT_TEMPERATURE,
|
|
2
5
|
EventBus,
|
|
3
|
-
|
|
4
|
-
|
|
6
|
+
JudgeAgentAdapter,
|
|
7
|
+
Logger,
|
|
8
|
+
UserSimulatorAgentAdapter,
|
|
9
|
+
allAgentRoles,
|
|
10
|
+
defineConfig,
|
|
11
|
+
domain_exports,
|
|
12
|
+
env,
|
|
13
|
+
generateMessageId,
|
|
14
|
+
generateScenarioId,
|
|
15
|
+
generateScenarioRunId,
|
|
16
|
+
generateThreadId,
|
|
17
|
+
getBatchRunId,
|
|
18
|
+
getProjectConfig,
|
|
19
|
+
scenarioProjectConfigSchema
|
|
20
|
+
} from "./chunk-NUZZAQV2.mjs";
|
|
5
21
|
import {
|
|
6
22
|
__export
|
|
7
23
|
} from "./chunk-7P6ASYW6.mjs";
|
|
@@ -15,61 +31,7 @@ __export(agents_exports, {
|
|
|
15
31
|
|
|
16
32
|
// src/agents/judge-agent.ts
|
|
17
33
|
import { generateText, tool } from "ai";
|
|
18
|
-
import { z as z2 } from "zod";
|
|
19
|
-
|
|
20
|
-
// src/domain/index.ts
|
|
21
|
-
var domain_exports = {};
|
|
22
|
-
__export(domain_exports, {
|
|
23
|
-
AgentAdapter: () => AgentAdapter,
|
|
24
|
-
AgentRole: () => AgentRole,
|
|
25
|
-
JudgeAgentAdapter: () => JudgeAgentAdapter,
|
|
26
|
-
UserSimulatorAgentAdapter: () => UserSimulatorAgentAdapter,
|
|
27
|
-
allAgentRoles: () => allAgentRoles,
|
|
28
|
-
defineConfig: () => defineConfig,
|
|
29
|
-
scenarioProjectConfigSchema: () => scenarioProjectConfigSchema
|
|
30
|
-
});
|
|
31
|
-
|
|
32
|
-
// src/domain/core/config.ts
|
|
33
34
|
import { z } from "zod";
|
|
34
|
-
var scenarioProjectConfigSchema = z.object({
|
|
35
|
-
defaultModel: z.object({
|
|
36
|
-
model: z.custom(),
|
|
37
|
-
temperature: z.number().min(0).max(1).optional().default(0),
|
|
38
|
-
maxTokens: z.number().optional()
|
|
39
|
-
}).optional(),
|
|
40
|
-
langwatchEndpoint: z.string().optional(),
|
|
41
|
-
langwatchApiKey: z.string().optional()
|
|
42
|
-
}).strict();
|
|
43
|
-
function defineConfig(config2) {
|
|
44
|
-
return config2;
|
|
45
|
-
}
|
|
46
|
-
|
|
47
|
-
// src/domain/agents/index.ts
|
|
48
|
-
var AgentRole = /* @__PURE__ */ ((AgentRole2) => {
|
|
49
|
-
AgentRole2["USER"] = "User";
|
|
50
|
-
AgentRole2["AGENT"] = "Agent";
|
|
51
|
-
AgentRole2["JUDGE"] = "Judge";
|
|
52
|
-
return AgentRole2;
|
|
53
|
-
})(AgentRole || {});
|
|
54
|
-
var allAgentRoles = ["User" /* USER */, "Agent" /* AGENT */, "Judge" /* JUDGE */];
|
|
55
|
-
var AgentAdapter = class {
|
|
56
|
-
role = "Agent" /* AGENT */;
|
|
57
|
-
constructor(input) {
|
|
58
|
-
void input;
|
|
59
|
-
}
|
|
60
|
-
};
|
|
61
|
-
var UserSimulatorAgentAdapter = class {
|
|
62
|
-
role = "User" /* USER */;
|
|
63
|
-
constructor(input) {
|
|
64
|
-
void input;
|
|
65
|
-
}
|
|
66
|
-
};
|
|
67
|
-
var JudgeAgentAdapter = class {
|
|
68
|
-
role = "Judge" /* JUDGE */;
|
|
69
|
-
constructor(input) {
|
|
70
|
-
void input;
|
|
71
|
-
}
|
|
72
|
-
};
|
|
73
35
|
|
|
74
36
|
// src/agents/utils.ts
|
|
75
37
|
var toolMessageRole = "tool";
|
|
@@ -126,81 +88,19 @@ var criterionToParamName = (criterion) => {
|
|
|
126
88
|
return criterion.replace(/"/g, "").replace(/[^a-zA-Z0-9]/g, "_").replace(/ /g, "_").toLowerCase().substring(0, 70);
|
|
127
89
|
};
|
|
128
90
|
|
|
129
|
-
// src/config/load.ts
|
|
130
|
-
import fs from "node:fs/promises";
|
|
131
|
-
import path from "node:path";
|
|
132
|
-
import { pathToFileURL } from "node:url";
|
|
133
|
-
async function loadScenarioProjectConfig() {
|
|
134
|
-
const cwd = process.cwd();
|
|
135
|
-
const configNames = [
|
|
136
|
-
"scenario.config.js",
|
|
137
|
-
"scenario.config.mjs"
|
|
138
|
-
];
|
|
139
|
-
for (const name of configNames) {
|
|
140
|
-
const fullPath = path.join(cwd, name);
|
|
141
|
-
try {
|
|
142
|
-
await fs.access(fullPath);
|
|
143
|
-
const configModule = await import(pathToFileURL(fullPath).href);
|
|
144
|
-
const config2 = configModule.default || configModule;
|
|
145
|
-
const parsed = scenarioProjectConfigSchema.safeParse(config2);
|
|
146
|
-
if (!parsed.success) {
|
|
147
|
-
throw new Error(
|
|
148
|
-
`Invalid config file ${name}: ${JSON.stringify(parsed.error.format(), null, 2)}`
|
|
149
|
-
);
|
|
150
|
-
}
|
|
151
|
-
return parsed.data;
|
|
152
|
-
} catch (error) {
|
|
153
|
-
if (error instanceof Error && "code" in error && error.code === "ENOENT") {
|
|
154
|
-
continue;
|
|
155
|
-
}
|
|
156
|
-
throw error;
|
|
157
|
-
}
|
|
158
|
-
}
|
|
159
|
-
return await scenarioProjectConfigSchema.parseAsync({});
|
|
160
|
-
}
|
|
161
|
-
|
|
162
|
-
// src/config/index.ts
|
|
163
|
-
var logger = new Logger("scenario.config");
|
|
164
|
-
var configLoaded = false;
|
|
165
|
-
var config = null;
|
|
166
|
-
var configLoadPromise = null;
|
|
167
|
-
async function loadProjectConfig() {
|
|
168
|
-
if (configLoaded) {
|
|
169
|
-
return;
|
|
170
|
-
}
|
|
171
|
-
if (configLoadPromise) {
|
|
172
|
-
return configLoadPromise;
|
|
173
|
-
}
|
|
174
|
-
configLoadPromise = (async () => {
|
|
175
|
-
try {
|
|
176
|
-
config = await loadScenarioProjectConfig();
|
|
177
|
-
logger.info("loaded scenario project config", { config });
|
|
178
|
-
} catch (error) {
|
|
179
|
-
logger.error("error loading scenario project config", { error });
|
|
180
|
-
} finally {
|
|
181
|
-
configLoaded = true;
|
|
182
|
-
}
|
|
183
|
-
})();
|
|
184
|
-
return configLoadPromise;
|
|
185
|
-
}
|
|
186
|
-
async function getProjectConfig() {
|
|
187
|
-
await loadProjectConfig();
|
|
188
|
-
return config;
|
|
189
|
-
}
|
|
190
|
-
|
|
191
91
|
// src/utils/config.ts
|
|
192
|
-
function mergeConfig(
|
|
92
|
+
function mergeConfig(config, projectConfig) {
|
|
193
93
|
if (!projectConfig) {
|
|
194
|
-
return
|
|
94
|
+
return config;
|
|
195
95
|
}
|
|
196
96
|
return {
|
|
197
97
|
...projectConfig.defaultModel,
|
|
198
|
-
...
|
|
98
|
+
...config
|
|
199
99
|
};
|
|
200
100
|
}
|
|
201
|
-
function mergeAndValidateConfig(
|
|
101
|
+
function mergeAndValidateConfig(config, projectConfig) {
|
|
202
102
|
var _a;
|
|
203
|
-
const mergedConfig = mergeConfig(
|
|
103
|
+
const mergedConfig = mergeConfig(config, projectConfig);
|
|
204
104
|
mergedConfig.model = mergedConfig.model ?? ((_a = projectConfig == null ? void 0 : projectConfig.defaultModel) == null ? void 0 : _a.model);
|
|
205
105
|
if (!mergedConfig.model) {
|
|
206
106
|
throw new Error("Model is required");
|
|
@@ -238,24 +138,24 @@ ${criteriaList}
|
|
|
238
138
|
function buildContinueTestTool() {
|
|
239
139
|
return tool({
|
|
240
140
|
description: "Continue the test with the next step",
|
|
241
|
-
parameters:
|
|
141
|
+
parameters: z.object({})
|
|
242
142
|
});
|
|
243
143
|
}
|
|
244
144
|
function buildFinishTestTool(criteria) {
|
|
245
145
|
const criteriaNames = criteria.map(criterionToParamName);
|
|
246
146
|
return tool({
|
|
247
147
|
description: "Complete the test with a final verdict",
|
|
248
|
-
parameters:
|
|
249
|
-
criteria:
|
|
148
|
+
parameters: z.object({
|
|
149
|
+
criteria: z.object(
|
|
250
150
|
Object.fromEntries(
|
|
251
151
|
criteriaNames.map((name, idx) => [
|
|
252
152
|
name,
|
|
253
|
-
|
|
153
|
+
z.enum(["true", "false", "inconclusive"]).describe(criteria[idx])
|
|
254
154
|
])
|
|
255
155
|
)
|
|
256
156
|
).strict().describe("Strict verdict for each criterion"),
|
|
257
|
-
reasoning:
|
|
258
|
-
verdict:
|
|
157
|
+
reasoning: z.string().describe("Explanation of what the final verdict should be"),
|
|
158
|
+
verdict: z.enum(["success", "failure", "inconclusive"]).describe("The final verdict of the test")
|
|
259
159
|
})
|
|
260
160
|
});
|
|
261
161
|
}
|
|
@@ -365,18 +265,18 @@ ${description}
|
|
|
365
265
|
</rules>
|
|
366
266
|
`.trim();
|
|
367
267
|
}
|
|
368
|
-
var userSimulatorAgent = (
|
|
268
|
+
var userSimulatorAgent = (config) => {
|
|
369
269
|
return {
|
|
370
270
|
role: "User" /* USER */,
|
|
371
271
|
call: async (input) => {
|
|
372
|
-
const systemPrompt = buildSystemPrompt2(input.scenarioConfig.description);
|
|
272
|
+
const systemPrompt = (config == null ? void 0 : config.systemPrompt) ?? buildSystemPrompt2(input.scenarioConfig.description);
|
|
373
273
|
const messages = [
|
|
374
274
|
{ role: "system", content: systemPrompt },
|
|
375
275
|
{ role: "assistant", content: "Hello, how can I help you today" },
|
|
376
276
|
...input.messages
|
|
377
277
|
];
|
|
378
278
|
const projectConfig = await getProjectConfig();
|
|
379
|
-
const mergedConfig = mergeAndValidateConfig(
|
|
279
|
+
const mergedConfig = mergeAndValidateConfig(config ?? {}, projectConfig);
|
|
380
280
|
if (!mergedConfig.model) {
|
|
381
281
|
throw new Error("Model is required for the user simulator agent");
|
|
382
282
|
}
|
|
@@ -384,7 +284,7 @@ var userSimulatorAgent = (config2) => {
|
|
|
384
284
|
const completion = await generateText2({
|
|
385
285
|
model: mergedConfig.model,
|
|
386
286
|
messages: reversedMessages,
|
|
387
|
-
temperature: mergedConfig.temperature ??
|
|
287
|
+
temperature: mergedConfig.temperature ?? DEFAULT_TEMPERATURE,
|
|
388
288
|
maxTokens: mergedConfig.maxTokens
|
|
389
289
|
});
|
|
390
290
|
const messageContent = completion.text;
|
|
@@ -406,28 +306,6 @@ __export(execution_exports, {
|
|
|
406
306
|
// src/execution/scenario-execution.ts
|
|
407
307
|
import { Subject } from "rxjs";
|
|
408
308
|
|
|
409
|
-
// src/utils/ids.ts
|
|
410
|
-
import { generate, parse } from "xksuid";
|
|
411
|
-
var batchRunId = null;
|
|
412
|
-
function generateThreadId() {
|
|
413
|
-
return `thread_${generate()}`;
|
|
414
|
-
}
|
|
415
|
-
function generateScenarioRunId() {
|
|
416
|
-
return `scenariorun_${generate()}`;
|
|
417
|
-
}
|
|
418
|
-
function generateScenarioId() {
|
|
419
|
-
return `scenario_${generate()}`;
|
|
420
|
-
}
|
|
421
|
-
function getBatchRunId() {
|
|
422
|
-
if (!batchRunId) {
|
|
423
|
-
batchRunId = process.env.SCENARIO_BATCH_RUN_ID ?? `scenariobatchrun_${generate()}`;
|
|
424
|
-
}
|
|
425
|
-
return batchRunId;
|
|
426
|
-
}
|
|
427
|
-
function generateMessageId() {
|
|
428
|
-
return `scenariomsg_${generate()}`;
|
|
429
|
-
}
|
|
430
|
-
|
|
431
309
|
// src/execution/scenario-execution-state.ts
|
|
432
310
|
var ScenarioExecutionState = class {
|
|
433
311
|
_messages = [];
|
|
@@ -435,9 +313,9 @@ var ScenarioExecutionState = class {
|
|
|
435
313
|
_threadId = "";
|
|
436
314
|
description;
|
|
437
315
|
config;
|
|
438
|
-
constructor(
|
|
439
|
-
this.config =
|
|
440
|
-
this.description =
|
|
316
|
+
constructor(config) {
|
|
317
|
+
this.config = config;
|
|
318
|
+
this.description = config.description;
|
|
441
319
|
}
|
|
442
320
|
get messages() {
|
|
443
321
|
return this._messages;
|
|
@@ -499,8 +377,79 @@ var ScenarioExecutionState = class {
|
|
|
499
377
|
}
|
|
500
378
|
};
|
|
501
379
|
|
|
380
|
+
// src/utils/message-conversion.ts
|
|
381
|
+
function convertCoreMessagesToAguiMessages(coreMessages) {
|
|
382
|
+
const aguiMessages = [];
|
|
383
|
+
for (const msg of coreMessages) {
|
|
384
|
+
const id = "id" in msg && typeof msg.id === "string" ? msg.id : generateMessageId();
|
|
385
|
+
switch (true) {
|
|
386
|
+
case msg.role === "system":
|
|
387
|
+
aguiMessages.push({
|
|
388
|
+
id,
|
|
389
|
+
role: "system",
|
|
390
|
+
content: msg.content
|
|
391
|
+
});
|
|
392
|
+
break;
|
|
393
|
+
case (msg.role === "user" && typeof msg.content === "string"):
|
|
394
|
+
aguiMessages.push({
|
|
395
|
+
id,
|
|
396
|
+
role: "user",
|
|
397
|
+
content: msg.content
|
|
398
|
+
});
|
|
399
|
+
break;
|
|
400
|
+
// Handle any other user message content format
|
|
401
|
+
case (msg.role === "user" && Array.isArray(msg.content)):
|
|
402
|
+
aguiMessages.push({
|
|
403
|
+
id,
|
|
404
|
+
role: "user",
|
|
405
|
+
content: JSON.stringify(msg.content)
|
|
406
|
+
});
|
|
407
|
+
break;
|
|
408
|
+
case (msg.role === "assistant" && typeof msg.content === "string"):
|
|
409
|
+
aguiMessages.push({
|
|
410
|
+
id,
|
|
411
|
+
role: "assistant",
|
|
412
|
+
content: msg.content
|
|
413
|
+
});
|
|
414
|
+
break;
|
|
415
|
+
case (msg.role === "assistant" && Array.isArray(msg.content)): {
|
|
416
|
+
const toolCalls = msg.content.filter((p) => p.type === "tool-call");
|
|
417
|
+
const nonToolCalls = msg.content.filter((p) => p.type !== "tool-call");
|
|
418
|
+
aguiMessages.push({
|
|
419
|
+
id,
|
|
420
|
+
role: "assistant",
|
|
421
|
+
content: JSON.stringify(nonToolCalls),
|
|
422
|
+
toolCalls: toolCalls.map((c) => ({
|
|
423
|
+
id: c.toolCallId,
|
|
424
|
+
type: "function",
|
|
425
|
+
function: {
|
|
426
|
+
name: c.toolName,
|
|
427
|
+
arguments: JSON.stringify(c.args)
|
|
428
|
+
}
|
|
429
|
+
}))
|
|
430
|
+
});
|
|
431
|
+
break;
|
|
432
|
+
}
|
|
433
|
+
case msg.role === "tool":
|
|
434
|
+
msg.content.map((p, i) => {
|
|
435
|
+
aguiMessages.push({
|
|
436
|
+
id: `${id}-${i}`,
|
|
437
|
+
role: "tool",
|
|
438
|
+
toolCallId: p.toolCallId,
|
|
439
|
+
content: JSON.stringify(p.result)
|
|
440
|
+
});
|
|
441
|
+
});
|
|
442
|
+
break;
|
|
443
|
+
default:
|
|
444
|
+
throw new Error(`Unsupported message role: ${msg.role}`);
|
|
445
|
+
}
|
|
446
|
+
}
|
|
447
|
+
return aguiMessages;
|
|
448
|
+
}
|
|
449
|
+
var message_conversion_default = convertCoreMessagesToAguiMessages;
|
|
450
|
+
|
|
502
451
|
// src/execution/scenario-execution.ts
|
|
503
|
-
var
|
|
452
|
+
var batchRunId = getBatchRunId();
|
|
504
453
|
var ScenarioExecution = class {
|
|
505
454
|
state;
|
|
506
455
|
eventSubject = new Subject();
|
|
@@ -523,17 +472,17 @@ var ScenarioExecution = class {
|
|
|
523
472
|
* @param config The scenario configuration.
|
|
524
473
|
* @param script The script steps to execute.
|
|
525
474
|
*/
|
|
526
|
-
constructor(
|
|
475
|
+
constructor(config, script) {
|
|
527
476
|
this.config = {
|
|
528
|
-
id:
|
|
529
|
-
name:
|
|
530
|
-
description:
|
|
531
|
-
agents:
|
|
477
|
+
id: config.id ?? generateScenarioId(),
|
|
478
|
+
name: config.name,
|
|
479
|
+
description: config.description,
|
|
480
|
+
agents: config.agents,
|
|
532
481
|
script,
|
|
533
|
-
verbose:
|
|
534
|
-
maxTurns:
|
|
535
|
-
threadId:
|
|
536
|
-
setId:
|
|
482
|
+
verbose: config.verbose ?? false,
|
|
483
|
+
maxTurns: config.maxTurns ?? 10,
|
|
484
|
+
threadId: config.threadId ?? generateThreadId(),
|
|
485
|
+
setId: config.setId
|
|
537
486
|
};
|
|
538
487
|
this.state = new ScenarioExecutionState(this.config);
|
|
539
488
|
this.reset();
|
|
@@ -912,7 +861,7 @@ var ScenarioExecution = class {
|
|
|
912
861
|
type: "placeholder",
|
|
913
862
|
// This will be replaced by the specific event type
|
|
914
863
|
timestamp: Date.now(),
|
|
915
|
-
batchRunId
|
|
864
|
+
batchRunId,
|
|
916
865
|
scenarioId: this.config.id,
|
|
917
866
|
scenarioRunId,
|
|
918
867
|
scenarioSetId: this.config.setId
|
|
@@ -938,7 +887,7 @@ var ScenarioExecution = class {
|
|
|
938
887
|
this.emitEvent({
|
|
939
888
|
...this.makeBaseEvent({ scenarioRunId }),
|
|
940
889
|
type: "SCENARIO_MESSAGE_SNAPSHOT" /* MESSAGE_SNAPSHOT */,
|
|
941
|
-
messages: this.state.messages
|
|
890
|
+
messages: message_conversion_default(this.state.messages)
|
|
942
891
|
// Add any other required fields from MessagesSnapshotEventSchema
|
|
943
892
|
});
|
|
944
893
|
}
|
|
@@ -1061,10 +1010,9 @@ async function run(cfg) {
|
|
|
1061
1010
|
let eventBus = null;
|
|
1062
1011
|
let subscription = null;
|
|
1063
1012
|
try {
|
|
1064
|
-
const projectConfig = await loadScenarioProjectConfig();
|
|
1065
1013
|
eventBus = new EventBus({
|
|
1066
|
-
endpoint:
|
|
1067
|
-
apiKey:
|
|
1014
|
+
endpoint: env.LANGWATCH_ENDPOINT,
|
|
1015
|
+
apiKey: env.LANGWATCH_API_KEY
|
|
1068
1016
|
});
|
|
1069
1017
|
eventBus.listen();
|
|
1070
1018
|
subscription = eventBus.subscribeTo(execution.events$);
|
|
@@ -1139,6 +1087,7 @@ var index_default = scenario;
|
|
|
1139
1087
|
export {
|
|
1140
1088
|
AgentAdapter,
|
|
1141
1089
|
AgentRole,
|
|
1090
|
+
DEFAULT_TEMPERATURE,
|
|
1142
1091
|
JudgeAgentAdapter,
|
|
1143
1092
|
ScenarioExecution,
|
|
1144
1093
|
ScenarioExecutionState,
|