@langwatch/scenario 0.2.0 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/chunk-NUZZAQV2.mjs +622 -0
- package/dist/index.d.mts +65 -15
- package/dist/index.d.ts +65 -15
- package/dist/index.js +317 -97
- package/dist/index.mjs +122 -173
- package/dist/integrations/vitest/setup.js +282 -105
- package/dist/integrations/vitest/setup.mjs +1 -1
- package/package.json +4 -3
- package/dist/chunk-ORWSJC5F.mjs +0 -309
package/dist/index.js
CHANGED
|
@@ -32,6 +32,7 @@ var index_exports = {};
|
|
|
32
32
|
__export(index_exports, {
|
|
33
33
|
AgentAdapter: () => AgentAdapter,
|
|
34
34
|
AgentRole: () => AgentRole,
|
|
35
|
+
DEFAULT_TEMPERATURE: () => DEFAULT_TEMPERATURE,
|
|
35
36
|
JudgeAgentAdapter: () => JudgeAgentAdapter,
|
|
36
37
|
ScenarioExecution: () => ScenarioExecution,
|
|
37
38
|
ScenarioExecutionState: () => ScenarioExecutionState,
|
|
@@ -63,13 +64,14 @@ __export(agents_exports, {
|
|
|
63
64
|
|
|
64
65
|
// src/agents/judge-agent.ts
|
|
65
66
|
var import_ai = require("ai");
|
|
66
|
-
var
|
|
67
|
+
var import_zod3 = require("zod");
|
|
67
68
|
|
|
68
69
|
// src/domain/index.ts
|
|
69
70
|
var domain_exports = {};
|
|
70
71
|
__export(domain_exports, {
|
|
71
72
|
AgentAdapter: () => AgentAdapter,
|
|
72
73
|
AgentRole: () => AgentRole,
|
|
74
|
+
DEFAULT_TEMPERATURE: () => DEFAULT_TEMPERATURE,
|
|
73
75
|
JudgeAgentAdapter: () => JudgeAgentAdapter,
|
|
74
76
|
UserSimulatorAgentAdapter: () => UserSimulatorAgentAdapter,
|
|
75
77
|
allAgentRoles: () => allAgentRoles,
|
|
@@ -79,14 +81,13 @@ __export(domain_exports, {
|
|
|
79
81
|
|
|
80
82
|
// src/domain/core/config.ts
|
|
81
83
|
var import_zod = require("zod");
|
|
84
|
+
var DEFAULT_TEMPERATURE = 0;
|
|
82
85
|
var scenarioProjectConfigSchema = import_zod.z.object({
|
|
83
86
|
defaultModel: import_zod.z.object({
|
|
84
87
|
model: import_zod.z.custom(),
|
|
85
|
-
temperature: import_zod.z.number().min(0).max(1).optional().default(
|
|
88
|
+
temperature: import_zod.z.number().min(0).max(1).optional().default(DEFAULT_TEMPERATURE),
|
|
86
89
|
maxTokens: import_zod.z.number().optional()
|
|
87
|
-
}).optional()
|
|
88
|
-
langwatchEndpoint: import_zod.z.string().optional(),
|
|
89
|
-
langwatchApiKey: import_zod.z.string().optional()
|
|
90
|
+
}).optional()
|
|
90
91
|
}).strict();
|
|
91
92
|
function defineConfig(config2) {
|
|
92
93
|
return config2;
|
|
@@ -218,21 +219,25 @@ var Logger = class _Logger {
|
|
|
218
219
|
static create(context) {
|
|
219
220
|
return new _Logger(context);
|
|
220
221
|
}
|
|
222
|
+
getLogLevel() {
|
|
223
|
+
return env.SCENARIO_LOG_LEVEL ?? "INFO" /* INFO */;
|
|
224
|
+
}
|
|
225
|
+
getLogLevelIndex(level) {
|
|
226
|
+
return Object.values(LogLevel).indexOf(level);
|
|
227
|
+
}
|
|
221
228
|
/**
|
|
222
229
|
* Checks if logging should occur based on LOG_LEVEL env var
|
|
223
230
|
*/
|
|
224
231
|
shouldLog(level) {
|
|
225
|
-
const
|
|
226
|
-
const
|
|
227
|
-
const currentLevelIndex = levels.indexOf(logLevel);
|
|
228
|
-
const requestedLevelIndex = levels.indexOf(level);
|
|
232
|
+
const currentLevelIndex = this.getLogLevelIndex(this.getLogLevel());
|
|
233
|
+
const requestedLevelIndex = this.getLogLevelIndex(level);
|
|
229
234
|
return currentLevelIndex >= 0 && requestedLevelIndex <= currentLevelIndex;
|
|
230
235
|
}
|
|
231
236
|
formatMessage(message2) {
|
|
232
237
|
return this.context ? `[${this.context}] ${message2}` : message2;
|
|
233
238
|
}
|
|
234
239
|
error(message2, data) {
|
|
235
|
-
if (this.shouldLog("
|
|
240
|
+
if (this.shouldLog("ERROR" /* ERROR */)) {
|
|
236
241
|
const formattedMessage = this.formatMessage(message2);
|
|
237
242
|
if (data) {
|
|
238
243
|
console.error(formattedMessage, data);
|
|
@@ -242,7 +247,7 @@ var Logger = class _Logger {
|
|
|
242
247
|
}
|
|
243
248
|
}
|
|
244
249
|
warn(message2, data) {
|
|
245
|
-
if (this.shouldLog("
|
|
250
|
+
if (this.shouldLog("WARN" /* WARN */)) {
|
|
246
251
|
const formattedMessage = this.formatMessage(message2);
|
|
247
252
|
if (data) {
|
|
248
253
|
console.warn(formattedMessage, data);
|
|
@@ -252,7 +257,7 @@ var Logger = class _Logger {
|
|
|
252
257
|
}
|
|
253
258
|
}
|
|
254
259
|
info(message2, data) {
|
|
255
|
-
if (this.shouldLog("
|
|
260
|
+
if (this.shouldLog("INFO" /* INFO */)) {
|
|
256
261
|
const formattedMessage = this.formatMessage(message2);
|
|
257
262
|
if (data) {
|
|
258
263
|
console.info(formattedMessage, data);
|
|
@@ -262,7 +267,7 @@ var Logger = class _Logger {
|
|
|
262
267
|
}
|
|
263
268
|
}
|
|
264
269
|
debug(message2, data) {
|
|
265
|
-
if (this.shouldLog("
|
|
270
|
+
if (this.shouldLog("DEBUG" /* DEBUG */)) {
|
|
266
271
|
const formattedMessage = this.formatMessage(message2);
|
|
267
272
|
if (data) {
|
|
268
273
|
console.log(formattedMessage, data);
|
|
@@ -273,6 +278,53 @@ var Logger = class _Logger {
|
|
|
273
278
|
}
|
|
274
279
|
};
|
|
275
280
|
|
|
281
|
+
// src/config/env.ts
|
|
282
|
+
var import_zod2 = require("zod");
|
|
283
|
+
|
|
284
|
+
// src/config/log-levels.ts
|
|
285
|
+
var LogLevel = /* @__PURE__ */ ((LogLevel2) => {
|
|
286
|
+
LogLevel2["ERROR"] = "ERROR";
|
|
287
|
+
LogLevel2["WARN"] = "WARN";
|
|
288
|
+
LogLevel2["INFO"] = "INFO";
|
|
289
|
+
LogLevel2["DEBUG"] = "DEBUG";
|
|
290
|
+
return LogLevel2;
|
|
291
|
+
})(LogLevel || {});
|
|
292
|
+
|
|
293
|
+
// src/config/env.ts
|
|
294
|
+
var envSchema = import_zod2.z.object({
|
|
295
|
+
/**
|
|
296
|
+
* LangWatch API key for event reporting.
|
|
297
|
+
* If not provided, events will not be sent to LangWatch.
|
|
298
|
+
*/
|
|
299
|
+
LANGWATCH_API_KEY: import_zod2.z.string().optional(),
|
|
300
|
+
/**
|
|
301
|
+
* LangWatch endpoint URL for event reporting.
|
|
302
|
+
* Defaults to the production LangWatch endpoint.
|
|
303
|
+
*/
|
|
304
|
+
LANGWATCH_ENDPOINT: import_zod2.z.string().url().default("https://app.langwatch.ai"),
|
|
305
|
+
/**
|
|
306
|
+
* Disables simulation report info messages when set to any truthy value.
|
|
307
|
+
* Useful for CI/CD environments or when you want cleaner output.
|
|
308
|
+
*/
|
|
309
|
+
SCENARIO_DISABLE_SIMULATION_REPORT_INFO: import_zod2.z.string().optional().transform((val) => Boolean(val)),
|
|
310
|
+
/**
|
|
311
|
+
* Node environment - affects logging and behavior.
|
|
312
|
+
* Defaults to 'development' if not specified.
|
|
313
|
+
*/
|
|
314
|
+
NODE_ENV: import_zod2.z.enum(["development", "production", "test"]).default("development"),
|
|
315
|
+
/**
|
|
316
|
+
* Log level for the scenario package.
|
|
317
|
+
* Defaults to 'info' if not specified.
|
|
318
|
+
*/
|
|
319
|
+
SCENARIO_LOG_LEVEL: import_zod2.z.nativeEnum(LogLevel).optional(),
|
|
320
|
+
/**
|
|
321
|
+
* Scenario batch run ID.
|
|
322
|
+
* If not provided, a random ID will be generated.
|
|
323
|
+
*/
|
|
324
|
+
SCENARIO_BATCH_RUN_ID: import_zod2.z.string().optional()
|
|
325
|
+
});
|
|
326
|
+
var env = envSchema.parse(process.env);
|
|
327
|
+
|
|
276
328
|
// src/config/index.ts
|
|
277
329
|
var logger = new Logger("scenario.config");
|
|
278
330
|
var configLoaded = false;
|
|
@@ -352,24 +404,24 @@ ${criteriaList}
|
|
|
352
404
|
function buildContinueTestTool() {
|
|
353
405
|
return (0, import_ai.tool)({
|
|
354
406
|
description: "Continue the test with the next step",
|
|
355
|
-
parameters:
|
|
407
|
+
parameters: import_zod3.z.object({})
|
|
356
408
|
});
|
|
357
409
|
}
|
|
358
410
|
function buildFinishTestTool(criteria) {
|
|
359
411
|
const criteriaNames = criteria.map(criterionToParamName);
|
|
360
412
|
return (0, import_ai.tool)({
|
|
361
413
|
description: "Complete the test with a final verdict",
|
|
362
|
-
parameters:
|
|
363
|
-
criteria:
|
|
414
|
+
parameters: import_zod3.z.object({
|
|
415
|
+
criteria: import_zod3.z.object(
|
|
364
416
|
Object.fromEntries(
|
|
365
417
|
criteriaNames.map((name, idx) => [
|
|
366
418
|
name,
|
|
367
|
-
|
|
419
|
+
import_zod3.z.enum(["true", "false", "inconclusive"]).describe(criteria[idx])
|
|
368
420
|
])
|
|
369
421
|
)
|
|
370
422
|
).strict().describe("Strict verdict for each criterion"),
|
|
371
|
-
reasoning:
|
|
372
|
-
verdict:
|
|
423
|
+
reasoning: import_zod3.z.string().describe("Explanation of what the final verdict should be"),
|
|
424
|
+
verdict: import_zod3.z.enum(["success", "failure", "inconclusive"]).describe("The final verdict of the test")
|
|
373
425
|
})
|
|
374
426
|
});
|
|
375
427
|
}
|
|
@@ -483,7 +535,7 @@ var userSimulatorAgent = (config2) => {
|
|
|
483
535
|
return {
|
|
484
536
|
role: "User" /* USER */,
|
|
485
537
|
call: async (input) => {
|
|
486
|
-
const systemPrompt = buildSystemPrompt2(input.scenarioConfig.description);
|
|
538
|
+
const systemPrompt = (config2 == null ? void 0 : config2.systemPrompt) ?? buildSystemPrompt2(input.scenarioConfig.description);
|
|
487
539
|
const messages = [
|
|
488
540
|
{ role: "system", content: systemPrompt },
|
|
489
541
|
{ role: "assistant", content: "Hello, how can I help you today" },
|
|
@@ -498,7 +550,7 @@ var userSimulatorAgent = (config2) => {
|
|
|
498
550
|
const completion = await (0, import_ai2.generateText)({
|
|
499
551
|
model: mergedConfig.model,
|
|
500
552
|
messages: reversedMessages,
|
|
501
|
-
temperature: mergedConfig.temperature ??
|
|
553
|
+
temperature: mergedConfig.temperature ?? DEFAULT_TEMPERATURE,
|
|
502
554
|
maxTokens: mergedConfig.maxTokens
|
|
503
555
|
});
|
|
504
556
|
const messageContent = completion.text;
|
|
@@ -522,7 +574,6 @@ var import_rxjs = require("rxjs");
|
|
|
522
574
|
|
|
523
575
|
// src/utils/ids.ts
|
|
524
576
|
var import_xksuid = require("xksuid");
|
|
525
|
-
var batchRunId = null;
|
|
526
577
|
function generateThreadId() {
|
|
527
578
|
return `thread_${(0, import_xksuid.generate)()}`;
|
|
528
579
|
}
|
|
@@ -533,10 +584,10 @@ function generateScenarioId() {
|
|
|
533
584
|
return `scenario_${(0, import_xksuid.generate)()}`;
|
|
534
585
|
}
|
|
535
586
|
function getBatchRunId() {
|
|
536
|
-
if (!
|
|
537
|
-
|
|
587
|
+
if (!env.SCENARIO_BATCH_RUN_ID) {
|
|
588
|
+
env.SCENARIO_BATCH_RUN_ID = `scenariobatchrun_${(0, import_xksuid.generate)()}`;
|
|
538
589
|
}
|
|
539
|
-
return
|
|
590
|
+
return env.SCENARIO_BATCH_RUN_ID;
|
|
540
591
|
}
|
|
541
592
|
function generateMessageId() {
|
|
542
593
|
return `scenariomsg_${(0, import_xksuid.generate)()}`;
|
|
@@ -615,7 +666,7 @@ var ScenarioExecutionState = class {
|
|
|
615
666
|
|
|
616
667
|
// src/events/schema.ts
|
|
617
668
|
var import_core = require("@ag-ui/core");
|
|
618
|
-
var
|
|
669
|
+
var import_zod4 = require("zod");
|
|
619
670
|
var Verdict = /* @__PURE__ */ ((Verdict2) => {
|
|
620
671
|
Verdict2["SUCCESS"] = "success";
|
|
621
672
|
Verdict2["FAILURE"] = "failure";
|
|
@@ -631,62 +682,133 @@ var ScenarioRunStatus = /* @__PURE__ */ ((ScenarioRunStatus2) => {
|
|
|
631
682
|
ScenarioRunStatus2["FAILED"] = "FAILED";
|
|
632
683
|
return ScenarioRunStatus2;
|
|
633
684
|
})(ScenarioRunStatus || {});
|
|
634
|
-
var baseEventSchema =
|
|
635
|
-
type:
|
|
636
|
-
timestamp:
|
|
637
|
-
rawEvent:
|
|
685
|
+
var baseEventSchema = import_zod4.z.object({
|
|
686
|
+
type: import_zod4.z.nativeEnum(import_core.EventType),
|
|
687
|
+
timestamp: import_zod4.z.number(),
|
|
688
|
+
rawEvent: import_zod4.z.any().optional()
|
|
638
689
|
});
|
|
639
|
-
var batchRunIdSchema =
|
|
640
|
-
var scenarioRunIdSchema =
|
|
641
|
-
var scenarioIdSchema =
|
|
690
|
+
var batchRunIdSchema = import_zod4.z.string();
|
|
691
|
+
var scenarioRunIdSchema = import_zod4.z.string();
|
|
692
|
+
var scenarioIdSchema = import_zod4.z.string();
|
|
642
693
|
var baseScenarioEventSchema = baseEventSchema.extend({
|
|
643
694
|
batchRunId: batchRunIdSchema,
|
|
644
695
|
scenarioId: scenarioIdSchema,
|
|
645
696
|
scenarioRunId: scenarioRunIdSchema,
|
|
646
|
-
scenarioSetId:
|
|
697
|
+
scenarioSetId: import_zod4.z.string().optional().default("default")
|
|
647
698
|
});
|
|
648
699
|
var scenarioRunStartedSchema = baseScenarioEventSchema.extend({
|
|
649
|
-
type:
|
|
650
|
-
metadata:
|
|
651
|
-
name:
|
|
652
|
-
description:
|
|
700
|
+
type: import_zod4.z.literal("SCENARIO_RUN_STARTED" /* RUN_STARTED */),
|
|
701
|
+
metadata: import_zod4.z.object({
|
|
702
|
+
name: import_zod4.z.string().optional(),
|
|
703
|
+
description: import_zod4.z.string().optional()
|
|
653
704
|
})
|
|
654
705
|
});
|
|
655
|
-
var scenarioResultsSchema =
|
|
656
|
-
verdict:
|
|
657
|
-
reasoning:
|
|
658
|
-
metCriteria:
|
|
659
|
-
unmetCriteria:
|
|
660
|
-
error:
|
|
706
|
+
var scenarioResultsSchema = import_zod4.z.object({
|
|
707
|
+
verdict: import_zod4.z.nativeEnum(Verdict),
|
|
708
|
+
reasoning: import_zod4.z.string().optional(),
|
|
709
|
+
metCriteria: import_zod4.z.array(import_zod4.z.string()),
|
|
710
|
+
unmetCriteria: import_zod4.z.array(import_zod4.z.string()),
|
|
711
|
+
error: import_zod4.z.string().optional()
|
|
661
712
|
});
|
|
662
713
|
var scenarioRunFinishedSchema = baseScenarioEventSchema.extend({
|
|
663
|
-
type:
|
|
664
|
-
status:
|
|
714
|
+
type: import_zod4.z.literal("SCENARIO_RUN_FINISHED" /* RUN_FINISHED */),
|
|
715
|
+
status: import_zod4.z.nativeEnum(ScenarioRunStatus),
|
|
665
716
|
results: scenarioResultsSchema.optional().nullable()
|
|
666
717
|
});
|
|
667
718
|
var scenarioMessageSnapshotSchema = import_core.MessagesSnapshotEventSchema.merge(
|
|
668
719
|
baseScenarioEventSchema.extend({
|
|
669
|
-
type:
|
|
720
|
+
type: import_zod4.z.literal("SCENARIO_MESSAGE_SNAPSHOT" /* MESSAGE_SNAPSHOT */)
|
|
670
721
|
})
|
|
671
722
|
);
|
|
672
|
-
var scenarioEventSchema =
|
|
723
|
+
var scenarioEventSchema = import_zod4.z.discriminatedUnion("type", [
|
|
673
724
|
scenarioRunStartedSchema,
|
|
674
725
|
scenarioRunFinishedSchema,
|
|
675
726
|
scenarioMessageSnapshotSchema
|
|
676
727
|
]);
|
|
677
|
-
var successSchema =
|
|
678
|
-
var errorSchema =
|
|
679
|
-
var stateSchema =
|
|
680
|
-
state:
|
|
681
|
-
messages:
|
|
682
|
-
status:
|
|
728
|
+
var successSchema = import_zod4.z.object({ success: import_zod4.z.boolean() });
|
|
729
|
+
var errorSchema = import_zod4.z.object({ error: import_zod4.z.string() });
|
|
730
|
+
var stateSchema = import_zod4.z.object({
|
|
731
|
+
state: import_zod4.z.object({
|
|
732
|
+
messages: import_zod4.z.array(import_zod4.z.any()),
|
|
733
|
+
status: import_zod4.z.string()
|
|
683
734
|
})
|
|
684
735
|
});
|
|
685
|
-
var runsSchema =
|
|
686
|
-
var eventsSchema =
|
|
736
|
+
var runsSchema = import_zod4.z.object({ runs: import_zod4.z.array(import_zod4.z.string()) });
|
|
737
|
+
var eventsSchema = import_zod4.z.object({ events: import_zod4.z.array(scenarioEventSchema) });
|
|
738
|
+
|
|
739
|
+
// src/utils/message-conversion.ts
|
|
740
|
+
function convertCoreMessagesToAguiMessages(coreMessages) {
|
|
741
|
+
const aguiMessages = [];
|
|
742
|
+
for (const msg of coreMessages) {
|
|
743
|
+
const id = "id" in msg && typeof msg.id === "string" ? msg.id : generateMessageId();
|
|
744
|
+
switch (true) {
|
|
745
|
+
case msg.role === "system":
|
|
746
|
+
aguiMessages.push({
|
|
747
|
+
id,
|
|
748
|
+
role: "system",
|
|
749
|
+
content: msg.content
|
|
750
|
+
});
|
|
751
|
+
break;
|
|
752
|
+
case (msg.role === "user" && typeof msg.content === "string"):
|
|
753
|
+
aguiMessages.push({
|
|
754
|
+
id,
|
|
755
|
+
role: "user",
|
|
756
|
+
content: msg.content
|
|
757
|
+
});
|
|
758
|
+
break;
|
|
759
|
+
// Handle any other user message content format
|
|
760
|
+
case (msg.role === "user" && Array.isArray(msg.content)):
|
|
761
|
+
aguiMessages.push({
|
|
762
|
+
id,
|
|
763
|
+
role: "user",
|
|
764
|
+
content: JSON.stringify(msg.content)
|
|
765
|
+
});
|
|
766
|
+
break;
|
|
767
|
+
case (msg.role === "assistant" && typeof msg.content === "string"):
|
|
768
|
+
aguiMessages.push({
|
|
769
|
+
id,
|
|
770
|
+
role: "assistant",
|
|
771
|
+
content: msg.content
|
|
772
|
+
});
|
|
773
|
+
break;
|
|
774
|
+
case (msg.role === "assistant" && Array.isArray(msg.content)): {
|
|
775
|
+
const toolCalls = msg.content.filter((p) => p.type === "tool-call");
|
|
776
|
+
const nonToolCalls = msg.content.filter((p) => p.type !== "tool-call");
|
|
777
|
+
aguiMessages.push({
|
|
778
|
+
id,
|
|
779
|
+
role: "assistant",
|
|
780
|
+
content: JSON.stringify(nonToolCalls),
|
|
781
|
+
toolCalls: toolCalls.map((c) => ({
|
|
782
|
+
id: c.toolCallId,
|
|
783
|
+
type: "function",
|
|
784
|
+
function: {
|
|
785
|
+
name: c.toolName,
|
|
786
|
+
arguments: JSON.stringify(c.args)
|
|
787
|
+
}
|
|
788
|
+
}))
|
|
789
|
+
});
|
|
790
|
+
break;
|
|
791
|
+
}
|
|
792
|
+
case msg.role === "tool":
|
|
793
|
+
msg.content.map((p, i) => {
|
|
794
|
+
aguiMessages.push({
|
|
795
|
+
id: `${id}-${i}`,
|
|
796
|
+
role: "tool",
|
|
797
|
+
toolCallId: p.toolCallId,
|
|
798
|
+
content: JSON.stringify(p.result)
|
|
799
|
+
});
|
|
800
|
+
});
|
|
801
|
+
break;
|
|
802
|
+
default:
|
|
803
|
+
throw new Error(`Unsupported message role: ${msg.role}`);
|
|
804
|
+
}
|
|
805
|
+
}
|
|
806
|
+
return aguiMessages;
|
|
807
|
+
}
|
|
808
|
+
var message_conversion_default = convertCoreMessagesToAguiMessages;
|
|
687
809
|
|
|
688
810
|
// src/execution/scenario-execution.ts
|
|
689
|
-
var
|
|
811
|
+
var batchRunId = getBatchRunId();
|
|
690
812
|
var ScenarioExecution = class {
|
|
691
813
|
state;
|
|
692
814
|
eventSubject = new import_rxjs.Subject();
|
|
@@ -1098,7 +1220,7 @@ var ScenarioExecution = class {
|
|
|
1098
1220
|
type: "placeholder",
|
|
1099
1221
|
// This will be replaced by the specific event type
|
|
1100
1222
|
timestamp: Date.now(),
|
|
1101
|
-
batchRunId
|
|
1223
|
+
batchRunId,
|
|
1102
1224
|
scenarioId: this.config.id,
|
|
1103
1225
|
scenarioRunId,
|
|
1104
1226
|
scenarioSetId: this.config.setId
|
|
@@ -1124,7 +1246,7 @@ var ScenarioExecution = class {
|
|
|
1124
1246
|
this.emitEvent({
|
|
1125
1247
|
...this.makeBaseEvent({ scenarioRunId }),
|
|
1126
1248
|
type: "SCENARIO_MESSAGE_SNAPSHOT" /* MESSAGE_SNAPSHOT */,
|
|
1127
|
-
messages: this.state.messages
|
|
1249
|
+
messages: message_conversion_default(this.state.messages)
|
|
1128
1250
|
// Add any other required fields from MessagesSnapshotEventSchema
|
|
1129
1251
|
});
|
|
1130
1252
|
}
|
|
@@ -1187,45 +1309,114 @@ __export(runner_exports, {
|
|
|
1187
1309
|
// src/events/event-bus.ts
|
|
1188
1310
|
var import_rxjs2 = require("rxjs");
|
|
1189
1311
|
|
|
1312
|
+
// src/events/event-alert-message-logger.ts
|
|
1313
|
+
var EventAlertMessageLogger = class _EventAlertMessageLogger {
|
|
1314
|
+
static shownBatchIds = /* @__PURE__ */ new Set();
|
|
1315
|
+
/**
|
|
1316
|
+
* Shows a fancy greeting message about simulation reporting status.
|
|
1317
|
+
* Only shows once per batch run to avoid spam.
|
|
1318
|
+
*/
|
|
1319
|
+
handleGreeting() {
|
|
1320
|
+
if (this.isGreetingDisabled()) {
|
|
1321
|
+
return;
|
|
1322
|
+
}
|
|
1323
|
+
const batchRunId2 = getBatchRunId();
|
|
1324
|
+
if (_EventAlertMessageLogger.shownBatchIds.has(batchRunId2)) {
|
|
1325
|
+
return;
|
|
1326
|
+
}
|
|
1327
|
+
_EventAlertMessageLogger.shownBatchIds.add(batchRunId2);
|
|
1328
|
+
this.displayGreeting(batchRunId2);
|
|
1329
|
+
}
|
|
1330
|
+
/**
|
|
1331
|
+
* Shows a fancy message about how to watch the simulation.
|
|
1332
|
+
* Called when a run started event is received with a session ID.
|
|
1333
|
+
*/
|
|
1334
|
+
handleWatchMessage(params) {
|
|
1335
|
+
if (this.isGreetingDisabled()) {
|
|
1336
|
+
return;
|
|
1337
|
+
}
|
|
1338
|
+
this.displayWatchMessage(params);
|
|
1339
|
+
}
|
|
1340
|
+
isGreetingDisabled() {
|
|
1341
|
+
return env.SCENARIO_DISABLE_SIMULATION_REPORT_INFO === true;
|
|
1342
|
+
}
|
|
1343
|
+
displayGreeting(batchRunId2) {
|
|
1344
|
+
const separator = "\u2500".repeat(60);
|
|
1345
|
+
if (!env.LANGWATCH_API_KEY) {
|
|
1346
|
+
console.log(`
|
|
1347
|
+
${separator}`);
|
|
1348
|
+
console.log("\u{1F680} LangWatch Simulation Reporting");
|
|
1349
|
+
console.log(`${separator}`);
|
|
1350
|
+
console.log("\u27A1\uFE0F API key not configured");
|
|
1351
|
+
console.log(" Simulations will only output final results");
|
|
1352
|
+
console.log("");
|
|
1353
|
+
console.log("\u{1F4A1} To visualize conversations in real time:");
|
|
1354
|
+
console.log(" \u2022 Set LANGWATCH_API_KEY environment variable");
|
|
1355
|
+
console.log(" \u2022 Or configure apiKey in scenario.config.js");
|
|
1356
|
+
console.log("");
|
|
1357
|
+
console.log(`\u{1F4E6} Batch Run ID: ${batchRunId2}`);
|
|
1358
|
+
console.log(`${separator}
|
|
1359
|
+
`);
|
|
1360
|
+
} else {
|
|
1361
|
+
console.log(`
|
|
1362
|
+
${separator}`);
|
|
1363
|
+
console.log("\u{1F680} LangWatch Simulation Reporting");
|
|
1364
|
+
console.log(`${separator}`);
|
|
1365
|
+
console.log("\u2705 Simulation reporting enabled");
|
|
1366
|
+
console.log(` Endpoint: ${env.LANGWATCH_ENDPOINT}`);
|
|
1367
|
+
console.log(
|
|
1368
|
+
` API Key: ${env.LANGWATCH_API_KEY.length > 0 ? "Configured" : "Not configured"}`
|
|
1369
|
+
);
|
|
1370
|
+
console.log("");
|
|
1371
|
+
console.log(`\u{1F4E6} Batch Run ID: ${batchRunId2}`);
|
|
1372
|
+
console.log(`${separator}
|
|
1373
|
+
`);
|
|
1374
|
+
}
|
|
1375
|
+
}
|
|
1376
|
+
displayWatchMessage(params) {
|
|
1377
|
+
const separator = "\u2500".repeat(60);
|
|
1378
|
+
const setUrl = params.setUrl;
|
|
1379
|
+
const batchUrl = `${setUrl}/${getBatchRunId()}`;
|
|
1380
|
+
console.log(`
|
|
1381
|
+
${separator}`);
|
|
1382
|
+
console.log("\u{1F440} Watch Your Simulation Live");
|
|
1383
|
+
console.log(`${separator}`);
|
|
1384
|
+
console.log("\u{1F310} Open in your browser:");
|
|
1385
|
+
console.log(` Scenario Set: ${setUrl}`);
|
|
1386
|
+
console.log(` Batch Run: ${batchUrl}`);
|
|
1387
|
+
console.log("");
|
|
1388
|
+
console.log(`${separator}
|
|
1389
|
+
`);
|
|
1390
|
+
}
|
|
1391
|
+
};
|
|
1392
|
+
|
|
1190
1393
|
// src/events/event-reporter.ts
|
|
1191
1394
|
var EventReporter = class {
|
|
1192
|
-
eventsEndpoint;
|
|
1193
1395
|
apiKey;
|
|
1396
|
+
eventsEndpoint;
|
|
1397
|
+
eventAlertMessageLogger;
|
|
1194
1398
|
logger = new Logger("scenario.events.EventReporter");
|
|
1399
|
+
isEnabled;
|
|
1195
1400
|
constructor(config2) {
|
|
1196
|
-
this.eventsEndpoint = new URL("/api/scenario-events", config2.endpoint);
|
|
1197
1401
|
this.apiKey = config2.apiKey ?? "";
|
|
1198
|
-
|
|
1199
|
-
|
|
1200
|
-
|
|
1201
|
-
|
|
1202
|
-
);
|
|
1203
|
-
console.log(
|
|
1204
|
-
"To visualize the conversations in real time, configure your LangWatch API key (via LANGWATCH_API_KEY, or scenario.config.js)"
|
|
1205
|
-
);
|
|
1206
|
-
} else {
|
|
1207
|
-
console.log(`simulation reporting is enabled, endpoint:(${this.eventsEndpoint}) api_key_configured:(${this.apiKey.length > 0 ? "true" : "false"})`);
|
|
1208
|
-
}
|
|
1209
|
-
}
|
|
1402
|
+
this.eventsEndpoint = new URL("/api/scenario-events", config2.endpoint);
|
|
1403
|
+
this.eventAlertMessageLogger = new EventAlertMessageLogger();
|
|
1404
|
+
this.eventAlertMessageLogger.handleGreeting();
|
|
1405
|
+
this.isEnabled = this.apiKey.length > 0 && this.eventsEndpoint.href.length > 0;
|
|
1210
1406
|
}
|
|
1211
1407
|
/**
|
|
1212
1408
|
* Posts an event to the configured endpoint.
|
|
1213
1409
|
* Logs success/failure but doesn't throw - event posting shouldn't break scenario execution.
|
|
1214
1410
|
*/
|
|
1215
1411
|
async postEvent(event) {
|
|
1216
|
-
this.
|
|
1217
|
-
|
|
1218
|
-
});
|
|
1219
|
-
|
|
1220
|
-
this.logger.warn(
|
|
1221
|
-
"No LANGWATCH_ENDPOINT configured, skipping event posting"
|
|
1222
|
-
);
|
|
1223
|
-
return;
|
|
1224
|
-
}
|
|
1412
|
+
if (!this.isEnabled) return {};
|
|
1413
|
+
const result = {};
|
|
1414
|
+
this.logger.debug(`[${event.type}] Posting event`, { event });
|
|
1415
|
+
const processedEvent = this.processEventForApi(event);
|
|
1225
1416
|
try {
|
|
1226
1417
|
const response = await fetch(this.eventsEndpoint.href, {
|
|
1227
1418
|
method: "POST",
|
|
1228
|
-
body: JSON.stringify(
|
|
1419
|
+
body: JSON.stringify(processedEvent),
|
|
1229
1420
|
headers: {
|
|
1230
1421
|
"Content-Type": "application/json",
|
|
1231
1422
|
"X-Auth-Token": this.apiKey
|
|
@@ -1237,22 +1428,40 @@ var EventReporter = class {
|
|
|
1237
1428
|
if (response.ok) {
|
|
1238
1429
|
const data = await response.json();
|
|
1239
1430
|
this.logger.debug(`[${event.type}] Event POST response:`, data);
|
|
1431
|
+
result.setUrl = data.url;
|
|
1240
1432
|
} else {
|
|
1241
1433
|
const errorText = await response.text();
|
|
1242
1434
|
this.logger.error(`[${event.type}] Event POST failed:`, {
|
|
1243
1435
|
status: response.status,
|
|
1244
1436
|
statusText: response.statusText,
|
|
1245
1437
|
error: errorText,
|
|
1246
|
-
event
|
|
1438
|
+
event: JSON.stringify(processedEvent)
|
|
1247
1439
|
});
|
|
1248
1440
|
}
|
|
1249
1441
|
} catch (error) {
|
|
1250
1442
|
this.logger.error(`[${event.type}] Event POST error:`, {
|
|
1251
1443
|
error,
|
|
1252
|
-
event,
|
|
1253
|
-
endpoint: this.eventsEndpoint
|
|
1444
|
+
event: JSON.stringify(processedEvent),
|
|
1445
|
+
endpoint: this.eventsEndpoint.href
|
|
1254
1446
|
});
|
|
1255
1447
|
}
|
|
1448
|
+
return result;
|
|
1449
|
+
}
|
|
1450
|
+
/**
|
|
1451
|
+
* Processes event data to ensure API compatibility.
|
|
1452
|
+
* Converts message content objects to strings when needed.
|
|
1453
|
+
*/
|
|
1454
|
+
processEventForApi(event) {
|
|
1455
|
+
if (event.type === "SCENARIO_MESSAGE_SNAPSHOT" /* MESSAGE_SNAPSHOT */) {
|
|
1456
|
+
return {
|
|
1457
|
+
...event,
|
|
1458
|
+
messages: event.messages.map((message2) => ({
|
|
1459
|
+
...message2,
|
|
1460
|
+
content: typeof message2.content !== "string" ? JSON.stringify(message2.content) : message2.content
|
|
1461
|
+
}))
|
|
1462
|
+
};
|
|
1463
|
+
}
|
|
1464
|
+
return event;
|
|
1256
1465
|
}
|
|
1257
1466
|
};
|
|
1258
1467
|
|
|
@@ -1261,11 +1470,13 @@ var EventBus = class _EventBus {
|
|
|
1261
1470
|
static registry = /* @__PURE__ */ new Set();
|
|
1262
1471
|
events$ = new import_rxjs2.Subject();
|
|
1263
1472
|
eventReporter;
|
|
1473
|
+
eventAlertMessageLogger;
|
|
1264
1474
|
processingPromise = null;
|
|
1265
1475
|
logger = new Logger("scenario.events.EventBus");
|
|
1266
1476
|
static globalListeners = [];
|
|
1267
1477
|
constructor(config2) {
|
|
1268
1478
|
this.eventReporter = new EventReporter(config2);
|
|
1479
|
+
this.eventAlertMessageLogger = new EventAlertMessageLogger();
|
|
1269
1480
|
_EventBus.registry.add(this);
|
|
1270
1481
|
for (const listener of _EventBus.globalListeners) {
|
|
1271
1482
|
listener(this);
|
|
@@ -1297,22 +1508,31 @@ var EventBus = class _EventBus {
|
|
|
1297
1508
|
}
|
|
1298
1509
|
this.processingPromise = new Promise((resolve, reject) => {
|
|
1299
1510
|
this.events$.pipe(
|
|
1511
|
+
// Post events and get results
|
|
1300
1512
|
(0, import_rxjs2.concatMap)(async (event) => {
|
|
1301
|
-
this.logger.debug(`[${event.type}] Processing event`, {
|
|
1302
|
-
|
|
1303
|
-
}
|
|
1304
|
-
|
|
1305
|
-
|
|
1513
|
+
this.logger.debug(`[${event.type}] Processing event`, { event });
|
|
1514
|
+
const result = await this.eventReporter.postEvent(event);
|
|
1515
|
+
return { event, result };
|
|
1516
|
+
}),
|
|
1517
|
+
// Handle watch messages reactively
|
|
1518
|
+
(0, import_rxjs2.tap)(({ event, result }) => {
|
|
1519
|
+
if (event.type === "SCENARIO_RUN_STARTED" /* RUN_STARTED */ && result.setUrl) {
|
|
1520
|
+
this.eventAlertMessageLogger.handleWatchMessage({
|
|
1521
|
+
scenarioSetId: event.scenarioSetId,
|
|
1522
|
+
scenarioRunId: event.scenarioRunId,
|
|
1523
|
+
setUrl: result.setUrl
|
|
1524
|
+
});
|
|
1525
|
+
}
|
|
1306
1526
|
}),
|
|
1527
|
+
// Extract just the event for downstream processing
|
|
1528
|
+
(0, import_rxjs2.map)(({ event }) => event),
|
|
1307
1529
|
(0, import_rxjs2.catchError)((error) => {
|
|
1308
1530
|
this.logger.error("Error in event stream:", error);
|
|
1309
1531
|
return import_rxjs2.EMPTY;
|
|
1310
1532
|
})
|
|
1311
1533
|
).subscribe({
|
|
1312
1534
|
next: (event) => {
|
|
1313
|
-
this.logger.debug(`[${event.type}] Event processed`, {
|
|
1314
|
-
event
|
|
1315
|
-
});
|
|
1535
|
+
this.logger.debug(`[${event.type}] Event processed`, { event });
|
|
1316
1536
|
if (event.type === "SCENARIO_RUN_FINISHED" /* RUN_FINISHED */) {
|
|
1317
1537
|
resolve();
|
|
1318
1538
|
}
|
|
@@ -1414,10 +1634,9 @@ async function run(cfg) {
|
|
|
1414
1634
|
let eventBus = null;
|
|
1415
1635
|
let subscription = null;
|
|
1416
1636
|
try {
|
|
1417
|
-
const projectConfig = await loadScenarioProjectConfig();
|
|
1418
1637
|
eventBus = new EventBus({
|
|
1419
|
-
endpoint:
|
|
1420
|
-
apiKey:
|
|
1638
|
+
endpoint: env.LANGWATCH_ENDPOINT,
|
|
1639
|
+
apiKey: env.LANGWATCH_API_KEY
|
|
1421
1640
|
});
|
|
1422
1641
|
eventBus.listen();
|
|
1423
1642
|
subscription = eventBus.subscribeTo(execution.events$);
|
|
@@ -1493,6 +1712,7 @@ var index_default = scenario;
|
|
|
1493
1712
|
0 && (module.exports = {
|
|
1494
1713
|
AgentAdapter,
|
|
1495
1714
|
AgentRole,
|
|
1715
|
+
DEFAULT_TEMPERATURE,
|
|
1496
1716
|
JudgeAgentAdapter,
|
|
1497
1717
|
ScenarioExecution,
|
|
1498
1718
|
ScenarioExecutionState,
|