@langwatch/scenario 0.2.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/chunk-ZMHTHRDR.mjs +618 -0
- package/dist/index.js +232 -87
- package/dist/index.mjs +44 -167
- package/dist/integrations/vitest/setup.js +280 -105
- package/dist/integrations/vitest/setup.mjs +1 -1
- package/package.json +2 -2
- package/dist/chunk-ORWSJC5F.mjs +0 -309
package/dist/index.mjs
CHANGED
|
@@ -1,7 +1,22 @@
|
|
|
1
1
|
import {
|
|
2
|
+
AgentAdapter,
|
|
3
|
+
AgentRole,
|
|
2
4
|
EventBus,
|
|
3
|
-
|
|
4
|
-
|
|
5
|
+
JudgeAgentAdapter,
|
|
6
|
+
Logger,
|
|
7
|
+
UserSimulatorAgentAdapter,
|
|
8
|
+
allAgentRoles,
|
|
9
|
+
defineConfig,
|
|
10
|
+
domain_exports,
|
|
11
|
+
generateMessageId,
|
|
12
|
+
generateScenarioId,
|
|
13
|
+
generateScenarioRunId,
|
|
14
|
+
generateThreadId,
|
|
15
|
+
getBatchRunId,
|
|
16
|
+
getProjectConfig,
|
|
17
|
+
loadScenarioProjectConfig,
|
|
18
|
+
scenarioProjectConfigSchema
|
|
19
|
+
} from "./chunk-ZMHTHRDR.mjs";
|
|
5
20
|
import {
|
|
6
21
|
__export
|
|
7
22
|
} from "./chunk-7P6ASYW6.mjs";
|
|
@@ -15,61 +30,7 @@ __export(agents_exports, {
|
|
|
15
30
|
|
|
16
31
|
// src/agents/judge-agent.ts
|
|
17
32
|
import { generateText, tool } from "ai";
|
|
18
|
-
import { z as z2 } from "zod";
|
|
19
|
-
|
|
20
|
-
// src/domain/index.ts
|
|
21
|
-
var domain_exports = {};
|
|
22
|
-
__export(domain_exports, {
|
|
23
|
-
AgentAdapter: () => AgentAdapter,
|
|
24
|
-
AgentRole: () => AgentRole,
|
|
25
|
-
JudgeAgentAdapter: () => JudgeAgentAdapter,
|
|
26
|
-
UserSimulatorAgentAdapter: () => UserSimulatorAgentAdapter,
|
|
27
|
-
allAgentRoles: () => allAgentRoles,
|
|
28
|
-
defineConfig: () => defineConfig,
|
|
29
|
-
scenarioProjectConfigSchema: () => scenarioProjectConfigSchema
|
|
30
|
-
});
|
|
31
|
-
|
|
32
|
-
// src/domain/core/config.ts
|
|
33
33
|
import { z } from "zod";
|
|
34
|
-
var scenarioProjectConfigSchema = z.object({
|
|
35
|
-
defaultModel: z.object({
|
|
36
|
-
model: z.custom(),
|
|
37
|
-
temperature: z.number().min(0).max(1).optional().default(0),
|
|
38
|
-
maxTokens: z.number().optional()
|
|
39
|
-
}).optional(),
|
|
40
|
-
langwatchEndpoint: z.string().optional(),
|
|
41
|
-
langwatchApiKey: z.string().optional()
|
|
42
|
-
}).strict();
|
|
43
|
-
function defineConfig(config2) {
|
|
44
|
-
return config2;
|
|
45
|
-
}
|
|
46
|
-
|
|
47
|
-
// src/domain/agents/index.ts
|
|
48
|
-
var AgentRole = /* @__PURE__ */ ((AgentRole2) => {
|
|
49
|
-
AgentRole2["USER"] = "User";
|
|
50
|
-
AgentRole2["AGENT"] = "Agent";
|
|
51
|
-
AgentRole2["JUDGE"] = "Judge";
|
|
52
|
-
return AgentRole2;
|
|
53
|
-
})(AgentRole || {});
|
|
54
|
-
var allAgentRoles = ["User" /* USER */, "Agent" /* AGENT */, "Judge" /* JUDGE */];
|
|
55
|
-
var AgentAdapter = class {
|
|
56
|
-
role = "Agent" /* AGENT */;
|
|
57
|
-
constructor(input) {
|
|
58
|
-
void input;
|
|
59
|
-
}
|
|
60
|
-
};
|
|
61
|
-
var UserSimulatorAgentAdapter = class {
|
|
62
|
-
role = "User" /* USER */;
|
|
63
|
-
constructor(input) {
|
|
64
|
-
void input;
|
|
65
|
-
}
|
|
66
|
-
};
|
|
67
|
-
var JudgeAgentAdapter = class {
|
|
68
|
-
role = "Judge" /* JUDGE */;
|
|
69
|
-
constructor(input) {
|
|
70
|
-
void input;
|
|
71
|
-
}
|
|
72
|
-
};
|
|
73
34
|
|
|
74
35
|
// src/agents/utils.ts
|
|
75
36
|
var toolMessageRole = "tool";
|
|
@@ -126,81 +87,19 @@ var criterionToParamName = (criterion) => {
|
|
|
126
87
|
return criterion.replace(/"/g, "").replace(/[^a-zA-Z0-9]/g, "_").replace(/ /g, "_").toLowerCase().substring(0, 70);
|
|
127
88
|
};
|
|
128
89
|
|
|
129
|
-
// src/config/load.ts
|
|
130
|
-
import fs from "node:fs/promises";
|
|
131
|
-
import path from "node:path";
|
|
132
|
-
import { pathToFileURL } from "node:url";
|
|
133
|
-
async function loadScenarioProjectConfig() {
|
|
134
|
-
const cwd = process.cwd();
|
|
135
|
-
const configNames = [
|
|
136
|
-
"scenario.config.js",
|
|
137
|
-
"scenario.config.mjs"
|
|
138
|
-
];
|
|
139
|
-
for (const name of configNames) {
|
|
140
|
-
const fullPath = path.join(cwd, name);
|
|
141
|
-
try {
|
|
142
|
-
await fs.access(fullPath);
|
|
143
|
-
const configModule = await import(pathToFileURL(fullPath).href);
|
|
144
|
-
const config2 = configModule.default || configModule;
|
|
145
|
-
const parsed = scenarioProjectConfigSchema.safeParse(config2);
|
|
146
|
-
if (!parsed.success) {
|
|
147
|
-
throw new Error(
|
|
148
|
-
`Invalid config file ${name}: ${JSON.stringify(parsed.error.format(), null, 2)}`
|
|
149
|
-
);
|
|
150
|
-
}
|
|
151
|
-
return parsed.data;
|
|
152
|
-
} catch (error) {
|
|
153
|
-
if (error instanceof Error && "code" in error && error.code === "ENOENT") {
|
|
154
|
-
continue;
|
|
155
|
-
}
|
|
156
|
-
throw error;
|
|
157
|
-
}
|
|
158
|
-
}
|
|
159
|
-
return await scenarioProjectConfigSchema.parseAsync({});
|
|
160
|
-
}
|
|
161
|
-
|
|
162
|
-
// src/config/index.ts
|
|
163
|
-
var logger = new Logger("scenario.config");
|
|
164
|
-
var configLoaded = false;
|
|
165
|
-
var config = null;
|
|
166
|
-
var configLoadPromise = null;
|
|
167
|
-
async function loadProjectConfig() {
|
|
168
|
-
if (configLoaded) {
|
|
169
|
-
return;
|
|
170
|
-
}
|
|
171
|
-
if (configLoadPromise) {
|
|
172
|
-
return configLoadPromise;
|
|
173
|
-
}
|
|
174
|
-
configLoadPromise = (async () => {
|
|
175
|
-
try {
|
|
176
|
-
config = await loadScenarioProjectConfig();
|
|
177
|
-
logger.info("loaded scenario project config", { config });
|
|
178
|
-
} catch (error) {
|
|
179
|
-
logger.error("error loading scenario project config", { error });
|
|
180
|
-
} finally {
|
|
181
|
-
configLoaded = true;
|
|
182
|
-
}
|
|
183
|
-
})();
|
|
184
|
-
return configLoadPromise;
|
|
185
|
-
}
|
|
186
|
-
async function getProjectConfig() {
|
|
187
|
-
await loadProjectConfig();
|
|
188
|
-
return config;
|
|
189
|
-
}
|
|
190
|
-
|
|
191
90
|
// src/utils/config.ts
|
|
192
|
-
function mergeConfig(
|
|
91
|
+
function mergeConfig(config, projectConfig) {
|
|
193
92
|
if (!projectConfig) {
|
|
194
|
-
return
|
|
93
|
+
return config;
|
|
195
94
|
}
|
|
196
95
|
return {
|
|
197
96
|
...projectConfig.defaultModel,
|
|
198
|
-
...
|
|
97
|
+
...config
|
|
199
98
|
};
|
|
200
99
|
}
|
|
201
|
-
function mergeAndValidateConfig(
|
|
100
|
+
function mergeAndValidateConfig(config, projectConfig) {
|
|
202
101
|
var _a;
|
|
203
|
-
const mergedConfig = mergeConfig(
|
|
102
|
+
const mergedConfig = mergeConfig(config, projectConfig);
|
|
204
103
|
mergedConfig.model = mergedConfig.model ?? ((_a = projectConfig == null ? void 0 : projectConfig.defaultModel) == null ? void 0 : _a.model);
|
|
205
104
|
if (!mergedConfig.model) {
|
|
206
105
|
throw new Error("Model is required");
|
|
@@ -238,24 +137,24 @@ ${criteriaList}
|
|
|
238
137
|
function buildContinueTestTool() {
|
|
239
138
|
return tool({
|
|
240
139
|
description: "Continue the test with the next step",
|
|
241
|
-
parameters:
|
|
140
|
+
parameters: z.object({})
|
|
242
141
|
});
|
|
243
142
|
}
|
|
244
143
|
function buildFinishTestTool(criteria) {
|
|
245
144
|
const criteriaNames = criteria.map(criterionToParamName);
|
|
246
145
|
return tool({
|
|
247
146
|
description: "Complete the test with a final verdict",
|
|
248
|
-
parameters:
|
|
249
|
-
criteria:
|
|
147
|
+
parameters: z.object({
|
|
148
|
+
criteria: z.object(
|
|
250
149
|
Object.fromEntries(
|
|
251
150
|
criteriaNames.map((name, idx) => [
|
|
252
151
|
name,
|
|
253
|
-
|
|
152
|
+
z.enum(["true", "false", "inconclusive"]).describe(criteria[idx])
|
|
254
153
|
])
|
|
255
154
|
)
|
|
256
155
|
).strict().describe("Strict verdict for each criterion"),
|
|
257
|
-
reasoning:
|
|
258
|
-
verdict:
|
|
156
|
+
reasoning: z.string().describe("Explanation of what the final verdict should be"),
|
|
157
|
+
verdict: z.enum(["success", "failure", "inconclusive"]).describe("The final verdict of the test")
|
|
259
158
|
})
|
|
260
159
|
});
|
|
261
160
|
}
|
|
@@ -365,7 +264,7 @@ ${description}
|
|
|
365
264
|
</rules>
|
|
366
265
|
`.trim();
|
|
367
266
|
}
|
|
368
|
-
var userSimulatorAgent = (
|
|
267
|
+
var userSimulatorAgent = (config) => {
|
|
369
268
|
return {
|
|
370
269
|
role: "User" /* USER */,
|
|
371
270
|
call: async (input) => {
|
|
@@ -376,7 +275,7 @@ var userSimulatorAgent = (config2) => {
|
|
|
376
275
|
...input.messages
|
|
377
276
|
];
|
|
378
277
|
const projectConfig = await getProjectConfig();
|
|
379
|
-
const mergedConfig = mergeAndValidateConfig(
|
|
278
|
+
const mergedConfig = mergeAndValidateConfig(config ?? {}, projectConfig);
|
|
380
279
|
if (!mergedConfig.model) {
|
|
381
280
|
throw new Error("Model is required for the user simulator agent");
|
|
382
281
|
}
|
|
@@ -406,28 +305,6 @@ __export(execution_exports, {
|
|
|
406
305
|
// src/execution/scenario-execution.ts
|
|
407
306
|
import { Subject } from "rxjs";
|
|
408
307
|
|
|
409
|
-
// src/utils/ids.ts
|
|
410
|
-
import { generate, parse } from "xksuid";
|
|
411
|
-
var batchRunId = null;
|
|
412
|
-
function generateThreadId() {
|
|
413
|
-
return `thread_${generate()}`;
|
|
414
|
-
}
|
|
415
|
-
function generateScenarioRunId() {
|
|
416
|
-
return `scenariorun_${generate()}`;
|
|
417
|
-
}
|
|
418
|
-
function generateScenarioId() {
|
|
419
|
-
return `scenario_${generate()}`;
|
|
420
|
-
}
|
|
421
|
-
function getBatchRunId() {
|
|
422
|
-
if (!batchRunId) {
|
|
423
|
-
batchRunId = process.env.SCENARIO_BATCH_RUN_ID ?? `scenariobatchrun_${generate()}`;
|
|
424
|
-
}
|
|
425
|
-
return batchRunId;
|
|
426
|
-
}
|
|
427
|
-
function generateMessageId() {
|
|
428
|
-
return `scenariomsg_${generate()}`;
|
|
429
|
-
}
|
|
430
|
-
|
|
431
308
|
// src/execution/scenario-execution-state.ts
|
|
432
309
|
var ScenarioExecutionState = class {
|
|
433
310
|
_messages = [];
|
|
@@ -435,9 +312,9 @@ var ScenarioExecutionState = class {
|
|
|
435
312
|
_threadId = "";
|
|
436
313
|
description;
|
|
437
314
|
config;
|
|
438
|
-
constructor(
|
|
439
|
-
this.config =
|
|
440
|
-
this.description =
|
|
315
|
+
constructor(config) {
|
|
316
|
+
this.config = config;
|
|
317
|
+
this.description = config.description;
|
|
441
318
|
}
|
|
442
319
|
get messages() {
|
|
443
320
|
return this._messages;
|
|
@@ -500,7 +377,7 @@ var ScenarioExecutionState = class {
|
|
|
500
377
|
};
|
|
501
378
|
|
|
502
379
|
// src/execution/scenario-execution.ts
|
|
503
|
-
var
|
|
380
|
+
var batchRunId = getBatchRunId();
|
|
504
381
|
var ScenarioExecution = class {
|
|
505
382
|
state;
|
|
506
383
|
eventSubject = new Subject();
|
|
@@ -523,17 +400,17 @@ var ScenarioExecution = class {
|
|
|
523
400
|
* @param config The scenario configuration.
|
|
524
401
|
* @param script The script steps to execute.
|
|
525
402
|
*/
|
|
526
|
-
constructor(
|
|
403
|
+
constructor(config, script) {
|
|
527
404
|
this.config = {
|
|
528
|
-
id:
|
|
529
|
-
name:
|
|
530
|
-
description:
|
|
531
|
-
agents:
|
|
405
|
+
id: config.id ?? generateScenarioId(),
|
|
406
|
+
name: config.name,
|
|
407
|
+
description: config.description,
|
|
408
|
+
agents: config.agents,
|
|
532
409
|
script,
|
|
533
|
-
verbose:
|
|
534
|
-
maxTurns:
|
|
535
|
-
threadId:
|
|
536
|
-
setId:
|
|
410
|
+
verbose: config.verbose ?? false,
|
|
411
|
+
maxTurns: config.maxTurns ?? 10,
|
|
412
|
+
threadId: config.threadId ?? generateThreadId(),
|
|
413
|
+
setId: config.setId
|
|
537
414
|
};
|
|
538
415
|
this.state = new ScenarioExecutionState(this.config);
|
|
539
416
|
this.reset();
|
|
@@ -912,7 +789,7 @@ var ScenarioExecution = class {
|
|
|
912
789
|
type: "placeholder",
|
|
913
790
|
// This will be replaced by the specific event type
|
|
914
791
|
timestamp: Date.now(),
|
|
915
|
-
batchRunId
|
|
792
|
+
batchRunId,
|
|
916
793
|
scenarioId: this.config.id,
|
|
917
794
|
scenarioRunId,
|
|
918
795
|
scenarioSetId: this.config.setId
|