@langwatch/scenario 0.4.2 → 0.4.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.mjs CHANGED
@@ -1,341 +1,959 @@
1
1
  var __defProp = Object.defineProperty;
2
+ var __getOwnPropNames = Object.getOwnPropertyNames;
3
+ var __esm = (fn, res) => function __init() {
4
+ return fn && (res = (0, fn[__getOwnPropNames(fn)[0]])(fn = 0)), res;
5
+ };
2
6
  var __export = (target, all) => {
3
7
  for (var name in all)
4
8
  __defProp(target, name, { get: all[name], enumerable: true });
5
9
  };
6
10
 
7
- // src/tracing/setup.ts
8
- import { setupObservability } from "langwatch/observability/node";
11
+ // node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/platform/node/globalThis.js
12
+ var _globalThis;
13
+ var init_globalThis = __esm({
14
+ "node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/platform/node/globalThis.js"() {
15
+ "use strict";
16
+ _globalThis = typeof globalThis === "object" ? globalThis : global;
17
+ }
18
+ });
9
19
 
10
- // src/agents/judge/judge-span-collector.ts
11
- import { attributes } from "langwatch/observability";
12
- var JudgeSpanCollector = class {
13
- spans = [];
14
- onStart() {
20
+ // node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/platform/node/index.js
21
+ var init_node = __esm({
22
+ "node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/platform/node/index.js"() {
23
+ "use strict";
24
+ init_globalThis();
15
25
  }
16
- onEnd(span) {
17
- this.spans.push(span);
26
+ });
27
+
28
+ // node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/platform/index.js
29
+ var init_platform = __esm({
30
+ "node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/platform/index.js"() {
31
+ "use strict";
32
+ init_node();
18
33
  }
19
- forceFlush() {
20
- return Promise.resolve();
34
+ });
35
+
36
+ // node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/version.js
37
+ var VERSION;
38
+ var init_version = __esm({
39
+ "node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/version.js"() {
40
+ "use strict";
41
+ VERSION = "1.9.0";
21
42
  }
22
- shutdown() {
23
- this.spans = [];
24
- return Promise.resolve();
43
+ });
44
+
45
+ // node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/internal/semver.js
46
+ function _makeCompatibilityCheck(ownVersion) {
47
+ var acceptedVersions = /* @__PURE__ */ new Set([ownVersion]);
48
+ var rejectedVersions = /* @__PURE__ */ new Set();
49
+ var myVersionMatch = ownVersion.match(re);
50
+ if (!myVersionMatch) {
51
+ return function() {
52
+ return false;
53
+ };
25
54
  }
26
- /**
27
- * Retrieves all spans associated with a specific thread.
28
- * @param threadId - The thread identifier to filter spans by
29
- * @returns Array of spans for the given thread
30
- */
31
- getSpansForThread(threadId) {
32
- const spanMap = /* @__PURE__ */ new Map();
33
- for (const span of this.spans) {
34
- spanMap.set(span.spanContext().spanId, span);
55
+ var ownVersionParsed = {
56
+ major: +myVersionMatch[1],
57
+ minor: +myVersionMatch[2],
58
+ patch: +myVersionMatch[3],
59
+ prerelease: myVersionMatch[4]
60
+ };
61
+ if (ownVersionParsed.prerelease != null) {
62
+ return function isExactmatch(globalVersion) {
63
+ return globalVersion === ownVersion;
64
+ };
65
+ }
66
+ function _reject(v) {
67
+ rejectedVersions.add(v);
68
+ return false;
69
+ }
70
+ function _accept(v) {
71
+ acceptedVersions.add(v);
72
+ return true;
73
+ }
74
+ return function isCompatible2(globalVersion) {
75
+ if (acceptedVersions.has(globalVersion)) {
76
+ return true;
35
77
  }
36
- const belongsToThread = (span) => {
37
- var _a;
38
- if (span.attributes[attributes.ATTR_LANGWATCH_THREAD_ID] === threadId) {
39
- return true;
40
- }
41
- const parentId = (_a = span.parentSpanContext) == null ? void 0 : _a.spanId;
42
- if (parentId && spanMap.has(parentId)) {
43
- return belongsToThread(spanMap.get(parentId));
44
- }
78
+ if (rejectedVersions.has(globalVersion)) {
45
79
  return false;
80
+ }
81
+ var globalVersionMatch = globalVersion.match(re);
82
+ if (!globalVersionMatch) {
83
+ return _reject(globalVersion);
84
+ }
85
+ var globalVersionParsed = {
86
+ major: +globalVersionMatch[1],
87
+ minor: +globalVersionMatch[2],
88
+ patch: +globalVersionMatch[3],
89
+ prerelease: globalVersionMatch[4]
46
90
  };
47
- return this.spans.filter(belongsToThread);
91
+ if (globalVersionParsed.prerelease != null) {
92
+ return _reject(globalVersion);
93
+ }
94
+ if (ownVersionParsed.major !== globalVersionParsed.major) {
95
+ return _reject(globalVersion);
96
+ }
97
+ if (ownVersionParsed.major === 0) {
98
+ if (ownVersionParsed.minor === globalVersionParsed.minor && ownVersionParsed.patch <= globalVersionParsed.patch) {
99
+ return _accept(globalVersion);
100
+ }
101
+ return _reject(globalVersion);
102
+ }
103
+ if (ownVersionParsed.minor <= globalVersionParsed.minor) {
104
+ return _accept(globalVersion);
105
+ }
106
+ return _reject(globalVersion);
107
+ };
108
+ }
109
+ var re, isCompatible;
110
+ var init_semver = __esm({
111
+ "node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/internal/semver.js"() {
112
+ "use strict";
113
+ init_version();
114
+ re = /^(\d+)\.(\d+)\.(\d+)(-(.+))?$/;
115
+ isCompatible = _makeCompatibilityCheck(VERSION);
48
116
  }
49
- };
50
- var judgeSpanCollector = new JudgeSpanCollector();
51
-
52
- // src/config/env.ts
53
- import { z } from "zod/v4";
54
-
55
- // src/config/log-levels.ts
56
- var LogLevel = /* @__PURE__ */ ((LogLevel2) => {
57
- LogLevel2["ERROR"] = "ERROR";
58
- LogLevel2["WARN"] = "WARN";
59
- LogLevel2["INFO"] = "INFO";
60
- LogLevel2["DEBUG"] = "DEBUG";
61
- return LogLevel2;
62
- })(LogLevel || {});
63
- var LOG_LEVELS = Object.values(LogLevel);
64
-
65
- // src/config/env.ts
66
- var envSchema = z.object({
67
- /**
68
- * LangWatch API key for event reporting.
69
- * If not provided, events will not be sent to LangWatch.
70
- */
71
- LANGWATCH_API_KEY: z.string().optional(),
72
- /**
73
- * LangWatch endpoint URL for event reporting.
74
- * Defaults to the production LangWatch endpoint.
75
- */
76
- LANGWATCH_ENDPOINT: z.string().url().optional().default("https://app.langwatch.ai"),
77
- /**
78
- * Disables simulation report info messages when set to any truthy value.
79
- * Useful for CI/CD environments or when you want cleaner output.
80
- */
81
- SCENARIO_DISABLE_SIMULATION_REPORT_INFO: z.string().optional().transform((val) => Boolean(val)),
82
- /**
83
- * Node environment - affects logging and behavior.
84
- * Defaults to 'development' if not specified.
85
- */
86
- NODE_ENV: z.enum(["development", "production", "test"]).default("development"),
87
- /**
88
- * Case-insensitive log level for the scenario package.
89
- * Defaults to 'info' if not specified.
90
- */
91
- LOG_LEVEL: z.string().toUpperCase().pipe(z.nativeEnum(LogLevel)).optional().default("INFO" /* INFO */),
92
- /**
93
- * Scenario batch run ID.
94
- * If not provided, a random ID will be generated.
95
- */
96
- SCENARIO_BATCH_RUN_ID: z.string().optional()
97
117
  });
98
- function getEnv() {
99
- return envSchema.parse(process.env);
100
- }
101
-
102
- // src/config/load.ts
103
- import fs from "fs/promises";
104
- import path from "path";
105
- import { pathToFileURL } from "url";
106
118
 
107
- // src/domain/index.ts
108
- var domain_exports = {};
109
- __export(domain_exports, {
110
- AgentAdapter: () => AgentAdapter,
111
- AgentRole: () => AgentRole,
112
- DEFAULT_MAX_TURNS: () => DEFAULT_MAX_TURNS,
113
- DEFAULT_VERBOSE: () => DEFAULT_VERBOSE,
114
- JudgeAgentAdapter: () => JudgeAgentAdapter,
115
- UserSimulatorAgentAdapter: () => UserSimulatorAgentAdapter,
116
- allAgentRoles: () => allAgentRoles,
117
- defineConfig: () => defineConfig,
118
- scenarioProjectConfigSchema: () => scenarioProjectConfigSchema
119
+ // node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/internal/global-utils.js
120
+ function registerGlobal(type, instance, diag2, allowOverride) {
121
+ var _a;
122
+ if (allowOverride === void 0) {
123
+ allowOverride = false;
124
+ }
125
+ var api = _global[GLOBAL_OPENTELEMETRY_API_KEY] = (_a = _global[GLOBAL_OPENTELEMETRY_API_KEY]) !== null && _a !== void 0 ? _a : {
126
+ version: VERSION
127
+ };
128
+ if (!allowOverride && api[type]) {
129
+ var err = new Error("@opentelemetry/api: Attempted duplicate registration of API: " + type);
130
+ diag2.error(err.stack || err.message);
131
+ return false;
132
+ }
133
+ if (api.version !== VERSION) {
134
+ var err = new Error("@opentelemetry/api: Registration of version v" + api.version + " for " + type + " does not match previously registered API v" + VERSION);
135
+ diag2.error(err.stack || err.message);
136
+ return false;
137
+ }
138
+ api[type] = instance;
139
+ diag2.debug("@opentelemetry/api: Registered a global for " + type + " v" + VERSION + ".");
140
+ return true;
141
+ }
142
+ function getGlobal(type) {
143
+ var _a, _b;
144
+ var globalVersion = (_a = _global[GLOBAL_OPENTELEMETRY_API_KEY]) === null || _a === void 0 ? void 0 : _a.version;
145
+ if (!globalVersion || !isCompatible(globalVersion)) {
146
+ return;
147
+ }
148
+ return (_b = _global[GLOBAL_OPENTELEMETRY_API_KEY]) === null || _b === void 0 ? void 0 : _b[type];
149
+ }
150
+ function unregisterGlobal(type, diag2) {
151
+ diag2.debug("@opentelemetry/api: Unregistering a global for " + type + " v" + VERSION + ".");
152
+ var api = _global[GLOBAL_OPENTELEMETRY_API_KEY];
153
+ if (api) {
154
+ delete api[type];
155
+ }
156
+ }
157
+ var major, GLOBAL_OPENTELEMETRY_API_KEY, _global;
158
+ var init_global_utils = __esm({
159
+ "node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/internal/global-utils.js"() {
160
+ "use strict";
161
+ init_platform();
162
+ init_version();
163
+ init_semver();
164
+ major = VERSION.split(".")[0];
165
+ GLOBAL_OPENTELEMETRY_API_KEY = Symbol.for("opentelemetry.js.api." + major);
166
+ _global = _globalThis;
167
+ }
119
168
  });
120
169
 
121
- // src/domain/core/config.ts
122
- import { z as z3 } from "zod/v4";
123
-
124
- // src/domain/core/schemas/model.schema.ts
125
- import { z as z2 } from "zod/v4";
126
-
127
- // src/domain/core/constants.ts
128
- var DEFAULT_TEMPERATURE = 0;
170
+ // node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/diag/ComponentLogger.js
171
+ function logProxy(funcName, namespace, args) {
172
+ var logger2 = getGlobal("diag");
173
+ if (!logger2) {
174
+ return;
175
+ }
176
+ args.unshift(namespace);
177
+ return logger2[funcName].apply(logger2, __spreadArray([], __read(args), false));
178
+ }
179
+ var __read, __spreadArray, DiagComponentLogger;
180
+ var init_ComponentLogger = __esm({
181
+ "node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/diag/ComponentLogger.js"() {
182
+ "use strict";
183
+ init_global_utils();
184
+ __read = function(o, n) {
185
+ var m = typeof Symbol === "function" && o[Symbol.iterator];
186
+ if (!m) return o;
187
+ var i = m.call(o), r, ar = [], e;
188
+ try {
189
+ while ((n === void 0 || n-- > 0) && !(r = i.next()).done) ar.push(r.value);
190
+ } catch (error) {
191
+ e = { error };
192
+ } finally {
193
+ try {
194
+ if (r && !r.done && (m = i["return"])) m.call(i);
195
+ } finally {
196
+ if (e) throw e.error;
197
+ }
198
+ }
199
+ return ar;
200
+ };
201
+ __spreadArray = function(to, from, pack) {
202
+ if (pack || arguments.length === 2) for (var i = 0, l = from.length, ar; i < l; i++) {
203
+ if (ar || !(i in from)) {
204
+ if (!ar) ar = Array.prototype.slice.call(from, 0, i);
205
+ ar[i] = from[i];
206
+ }
207
+ }
208
+ return to.concat(ar || Array.prototype.slice.call(from));
209
+ };
210
+ DiagComponentLogger = /** @class */
211
+ (function() {
212
+ function DiagComponentLogger2(props) {
213
+ this._namespace = props.namespace || "DiagComponentLogger";
214
+ }
215
+ DiagComponentLogger2.prototype.debug = function() {
216
+ var args = [];
217
+ for (var _i = 0; _i < arguments.length; _i++) {
218
+ args[_i] = arguments[_i];
219
+ }
220
+ return logProxy("debug", this._namespace, args);
221
+ };
222
+ DiagComponentLogger2.prototype.error = function() {
223
+ var args = [];
224
+ for (var _i = 0; _i < arguments.length; _i++) {
225
+ args[_i] = arguments[_i];
226
+ }
227
+ return logProxy("error", this._namespace, args);
228
+ };
229
+ DiagComponentLogger2.prototype.info = function() {
230
+ var args = [];
231
+ for (var _i = 0; _i < arguments.length; _i++) {
232
+ args[_i] = arguments[_i];
233
+ }
234
+ return logProxy("info", this._namespace, args);
235
+ };
236
+ DiagComponentLogger2.prototype.warn = function() {
237
+ var args = [];
238
+ for (var _i = 0; _i < arguments.length; _i++) {
239
+ args[_i] = arguments[_i];
240
+ }
241
+ return logProxy("warn", this._namespace, args);
242
+ };
243
+ DiagComponentLogger2.prototype.verbose = function() {
244
+ var args = [];
245
+ for (var _i = 0; _i < arguments.length; _i++) {
246
+ args[_i] = arguments[_i];
247
+ }
248
+ return logProxy("verbose", this._namespace, args);
249
+ };
250
+ return DiagComponentLogger2;
251
+ })();
252
+ }
253
+ });
129
254
 
130
- // src/domain/core/schemas/model.schema.ts
131
- var modelSchema = z2.object({
132
- model: z2.custom((val) => Boolean(val), {
133
- message: "A model is required. Configure it in scenario.config.js defaultModel or pass directly to the agent."
134
- }).describe("Language model that is used by the AI SDK Core functions."),
135
- temperature: z2.number().min(0).max(1).optional().describe("The temperature for the language model.").default(DEFAULT_TEMPERATURE),
136
- maxTokens: z2.number().optional().describe("The maximum number of tokens to generate.")
255
+ // node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/diag/types.js
256
+ var DiagLogLevel;
257
+ var init_types = __esm({
258
+ "node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/diag/types.js"() {
259
+ "use strict";
260
+ (function(DiagLogLevel2) {
261
+ DiagLogLevel2[DiagLogLevel2["NONE"] = 0] = "NONE";
262
+ DiagLogLevel2[DiagLogLevel2["ERROR"] = 30] = "ERROR";
263
+ DiagLogLevel2[DiagLogLevel2["WARN"] = 50] = "WARN";
264
+ DiagLogLevel2[DiagLogLevel2["INFO"] = 60] = "INFO";
265
+ DiagLogLevel2[DiagLogLevel2["DEBUG"] = 70] = "DEBUG";
266
+ DiagLogLevel2[DiagLogLevel2["VERBOSE"] = 80] = "VERBOSE";
267
+ DiagLogLevel2[DiagLogLevel2["ALL"] = 9999] = "ALL";
268
+ })(DiagLogLevel || (DiagLogLevel = {}));
269
+ }
137
270
  });
138
271
 
139
- // src/domain/core/config.ts
140
- var headless = typeof process !== "undefined" ? process.env.SCENARIO_HEADLESS === "true" : false;
141
- var scenarioProjectConfigSchema = z3.object({
142
- defaultModel: modelSchema.optional(),
143
- headless: z3.boolean().optional().default(headless)
144
- }).strict();
145
- function defineConfig(config2) {
146
- return config2;
147
- }
148
-
149
- // src/domain/agents/index.ts
150
- var AgentRole = /* @__PURE__ */ ((AgentRole2) => {
151
- AgentRole2["USER"] = "User";
152
- AgentRole2["AGENT"] = "Agent";
153
- AgentRole2["JUDGE"] = "Judge";
154
- return AgentRole2;
155
- })(AgentRole || {});
156
- var allAgentRoles = [
157
- "User" /* USER */,
158
- "Agent" /* AGENT */,
159
- "Judge" /* JUDGE */
160
- ];
161
- var AgentAdapter = class {
162
- name;
163
- role = "Agent" /* AGENT */;
164
- };
165
- var UserSimulatorAgentAdapter = class extends AgentAdapter {
166
- name = "UserSimulatorAgent";
167
- role = "User" /* USER */;
168
- };
169
- var JudgeAgentAdapter = class extends AgentAdapter {
170
- name = "JudgeAgent";
171
- role = "Judge" /* JUDGE */;
172
- };
173
-
174
- // src/domain/scenarios/index.ts
175
- var DEFAULT_MAX_TURNS = 10;
176
- var DEFAULT_VERBOSE = false;
177
-
178
- // src/config/load.ts
179
- async function loadScenarioProjectConfig() {
180
- const cwd = process.cwd();
181
- const configNames = [
182
- "scenario.config.js",
183
- "scenario.config.mjs"
184
- ];
185
- for (const name of configNames) {
186
- const fullPath = path.join(cwd, name);
187
- try {
188
- await fs.access(fullPath);
189
- const configModule = await import(pathToFileURL(fullPath).href);
190
- const config2 = configModule.default || configModule;
191
- const parsed = scenarioProjectConfigSchema.safeParse(config2);
192
- if (!parsed.success) {
193
- throw new Error(
194
- `Invalid config file ${name}: ${JSON.stringify(parsed.error.format(), null, 2)}`
195
- );
196
- }
197
- return parsed.data;
198
- } catch (error) {
199
- if (error instanceof Error && "code" in error && error.code === "ENOENT") {
200
- continue;
201
- }
202
- throw error;
272
+ // node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/diag/internal/logLevelLogger.js
273
+ function createLogLevelDiagLogger(maxLevel, logger2) {
274
+ if (maxLevel < DiagLogLevel.NONE) {
275
+ maxLevel = DiagLogLevel.NONE;
276
+ } else if (maxLevel > DiagLogLevel.ALL) {
277
+ maxLevel = DiagLogLevel.ALL;
278
+ }
279
+ logger2 = logger2 || {};
280
+ function _filterFunc(funcName, theLevel) {
281
+ var theFunc = logger2[funcName];
282
+ if (typeof theFunc === "function" && maxLevel >= theLevel) {
283
+ return theFunc.bind(logger2);
203
284
  }
285
+ return function() {
286
+ };
204
287
  }
205
- return await scenarioProjectConfigSchema.parseAsync({});
288
+ return {
289
+ error: _filterFunc("error", DiagLogLevel.ERROR),
290
+ warn: _filterFunc("warn", DiagLogLevel.WARN),
291
+ info: _filterFunc("info", DiagLogLevel.INFO),
292
+ debug: _filterFunc("debug", DiagLogLevel.DEBUG),
293
+ verbose: _filterFunc("verbose", DiagLogLevel.VERBOSE)
294
+ };
206
295
  }
207
-
208
- // src/utils/logger.ts
209
- var Logger = class _Logger {
210
- constructor(context2) {
211
- this.context = context2;
212
- }
213
- /**
214
- * Creates a logger with context (e.g., class name)
215
- */
216
- static create(context2) {
217
- return new _Logger(context2);
218
- }
219
- /**
220
- * Returns the current log level from environment.
221
- * Uses a getter for clarity and idiomatic usage.
222
- */
223
- get LOG_LEVEL() {
224
- return getEnv().LOG_LEVEL;
225
- }
226
- /**
227
- * Returns the index of the given log level in the LOG_LEVELS array.
228
- * @param level - The log level to get the index for.
229
- * @returns The index of the log level in the LOG_LEVELS array.
230
- */
231
- getLogLevelIndexFor(level) {
232
- return LOG_LEVELS.indexOf(level);
233
- }
234
- /**
235
- * Checks if logging should occur based on LOG_LEVEL env var
236
- */
237
- shouldLog(level) {
238
- const currentLevelIndex = this.getLogLevelIndexFor(this.LOG_LEVEL);
239
- const requestedLevelIndex = this.getLogLevelIndexFor(level);
240
- return currentLevelIndex >= 0 && requestedLevelIndex <= currentLevelIndex;
241
- }
242
- formatMessage(message2) {
243
- return this.context ? `[${this.context}] ${message2}` : message2;
296
+ var init_logLevelLogger = __esm({
297
+ "node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/diag/internal/logLevelLogger.js"() {
298
+ "use strict";
299
+ init_types();
244
300
  }
245
- error(message2, data) {
246
- if (this.shouldLog("ERROR" /* ERROR */)) {
247
- const formattedMessage = this.formatMessage(message2);
248
- if (data) {
249
- console.error(formattedMessage, data);
250
- } else {
251
- console.error(formattedMessage);
301
+ });
302
+
303
+ // node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/api/diag.js
304
+ var __read2, __spreadArray2, API_NAME, DiagAPI;
305
+ var init_diag = __esm({
306
+ "node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/api/diag.js"() {
307
+ "use strict";
308
+ init_ComponentLogger();
309
+ init_logLevelLogger();
310
+ init_types();
311
+ init_global_utils();
312
+ __read2 = function(o, n) {
313
+ var m = typeof Symbol === "function" && o[Symbol.iterator];
314
+ if (!m) return o;
315
+ var i = m.call(o), r, ar = [], e;
316
+ try {
317
+ while ((n === void 0 || n-- > 0) && !(r = i.next()).done) ar.push(r.value);
318
+ } catch (error) {
319
+ e = { error };
320
+ } finally {
321
+ try {
322
+ if (r && !r.done && (m = i["return"])) m.call(i);
323
+ } finally {
324
+ if (e) throw e.error;
325
+ }
252
326
  }
253
- }
254
- }
255
- warn(message2, data) {
256
- if (this.shouldLog("WARN" /* WARN */)) {
257
- const formattedMessage = this.formatMessage(message2);
258
- if (data) {
259
- console.warn(formattedMessage, data);
260
- } else {
261
- console.warn(formattedMessage);
327
+ return ar;
328
+ };
329
+ __spreadArray2 = function(to, from, pack) {
330
+ if (pack || arguments.length === 2) for (var i = 0, l = from.length, ar; i < l; i++) {
331
+ if (ar || !(i in from)) {
332
+ if (!ar) ar = Array.prototype.slice.call(from, 0, i);
333
+ ar[i] = from[i];
334
+ }
262
335
  }
263
- }
264
- }
265
- info(message2, data) {
266
- if (this.shouldLog("INFO" /* INFO */)) {
267
- const formattedMessage = this.formatMessage(message2);
268
- if (data) {
269
- console.info(formattedMessage, data);
270
- } else {
271
- console.info(formattedMessage);
336
+ return to.concat(ar || Array.prototype.slice.call(from));
337
+ };
338
+ API_NAME = "diag";
339
+ DiagAPI = /** @class */
340
+ (function() {
341
+ function DiagAPI2() {
342
+ function _logProxy(funcName) {
343
+ return function() {
344
+ var args = [];
345
+ for (var _i = 0; _i < arguments.length; _i++) {
346
+ args[_i] = arguments[_i];
347
+ }
348
+ var logger2 = getGlobal("diag");
349
+ if (!logger2)
350
+ return;
351
+ return logger2[funcName].apply(logger2, __spreadArray2([], __read2(args), false));
352
+ };
353
+ }
354
+ var self = this;
355
+ var setLogger = function(logger2, optionsOrLogLevel) {
356
+ var _a, _b, _c;
357
+ if (optionsOrLogLevel === void 0) {
358
+ optionsOrLogLevel = { logLevel: DiagLogLevel.INFO };
359
+ }
360
+ if (logger2 === self) {
361
+ var err = new Error("Cannot use diag as the logger for itself. Please use a DiagLogger implementation like ConsoleDiagLogger or a custom implementation");
362
+ self.error((_a = err.stack) !== null && _a !== void 0 ? _a : err.message);
363
+ return false;
364
+ }
365
+ if (typeof optionsOrLogLevel === "number") {
366
+ optionsOrLogLevel = {
367
+ logLevel: optionsOrLogLevel
368
+ };
369
+ }
370
+ var oldLogger = getGlobal("diag");
371
+ var newLogger = createLogLevelDiagLogger((_b = optionsOrLogLevel.logLevel) !== null && _b !== void 0 ? _b : DiagLogLevel.INFO, logger2);
372
+ if (oldLogger && !optionsOrLogLevel.suppressOverrideMessage) {
373
+ var stack = (_c = new Error().stack) !== null && _c !== void 0 ? _c : "<failed to generate stacktrace>";
374
+ oldLogger.warn("Current logger will be overwritten from " + stack);
375
+ newLogger.warn("Current logger will overwrite one already registered from " + stack);
376
+ }
377
+ return registerGlobal("diag", newLogger, self, true);
378
+ };
379
+ self.setLogger = setLogger;
380
+ self.disable = function() {
381
+ unregisterGlobal(API_NAME, self);
382
+ };
383
+ self.createComponentLogger = function(options) {
384
+ return new DiagComponentLogger(options);
385
+ };
386
+ self.verbose = _logProxy("verbose");
387
+ self.debug = _logProxy("debug");
388
+ self.info = _logProxy("info");
389
+ self.warn = _logProxy("warn");
390
+ self.error = _logProxy("error");
272
391
  }
273
- }
392
+ DiagAPI2.instance = function() {
393
+ if (!this._instance) {
394
+ this._instance = new DiagAPI2();
395
+ }
396
+ return this._instance;
397
+ };
398
+ return DiagAPI2;
399
+ })();
274
400
  }
275
- debug(message2, data) {
276
- if (this.shouldLog("DEBUG" /* DEBUG */)) {
277
- const formattedMessage = this.formatMessage(message2);
278
- if (data) {
279
- console.log(formattedMessage, data);
280
- } else {
281
- console.log(formattedMessage);
401
+ });
402
+
403
+ // node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/context/context.js
404
+ function createContextKey(description) {
405
+ return Symbol.for(description);
406
+ }
407
+ var BaseContext, ROOT_CONTEXT;
408
+ var init_context = __esm({
409
+ "node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/context/context.js"() {
410
+ "use strict";
411
+ BaseContext = /** @class */
412
+ /* @__PURE__ */ (function() {
413
+ function BaseContext2(parentContext) {
414
+ var self = this;
415
+ self._currentContext = parentContext ? new Map(parentContext) : /* @__PURE__ */ new Map();
416
+ self.getValue = function(key) {
417
+ return self._currentContext.get(key);
418
+ };
419
+ self.setValue = function(key, value) {
420
+ var context2 = new BaseContext2(self._currentContext);
421
+ context2._currentContext.set(key, value);
422
+ return context2;
423
+ };
424
+ self.deleteValue = function(key) {
425
+ var context2 = new BaseContext2(self._currentContext);
426
+ context2._currentContext.delete(key);
427
+ return context2;
428
+ };
282
429
  }
283
- }
430
+ return BaseContext2;
431
+ })();
432
+ ROOT_CONTEXT = new BaseContext();
284
433
  }
285
- };
434
+ });
286
435
 
287
- // src/config/get-project-config.ts
288
- var logger = new Logger("scenario.config");
289
- var configLoaded = false;
290
- var config = null;
291
- var configLoadPromise = null;
292
- async function loadProjectConfig() {
293
- if (configLoaded) {
294
- return;
295
- }
296
- if (configLoadPromise) {
297
- return configLoadPromise;
436
+ // node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/context/NoopContextManager.js
437
+ var __read3, __spreadArray3, NoopContextManager;
438
+ var init_NoopContextManager = __esm({
439
+ "node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/context/NoopContextManager.js"() {
440
+ "use strict";
441
+ init_context();
442
+ __read3 = function(o, n) {
443
+ var m = typeof Symbol === "function" && o[Symbol.iterator];
444
+ if (!m) return o;
445
+ var i = m.call(o), r, ar = [], e;
446
+ try {
447
+ while ((n === void 0 || n-- > 0) && !(r = i.next()).done) ar.push(r.value);
448
+ } catch (error) {
449
+ e = { error };
450
+ } finally {
451
+ try {
452
+ if (r && !r.done && (m = i["return"])) m.call(i);
453
+ } finally {
454
+ if (e) throw e.error;
455
+ }
456
+ }
457
+ return ar;
458
+ };
459
+ __spreadArray3 = function(to, from, pack) {
460
+ if (pack || arguments.length === 2) for (var i = 0, l = from.length, ar; i < l; i++) {
461
+ if (ar || !(i in from)) {
462
+ if (!ar) ar = Array.prototype.slice.call(from, 0, i);
463
+ ar[i] = from[i];
464
+ }
465
+ }
466
+ return to.concat(ar || Array.prototype.slice.call(from));
467
+ };
468
+ NoopContextManager = /** @class */
469
+ (function() {
470
+ function NoopContextManager2() {
471
+ }
472
+ NoopContextManager2.prototype.active = function() {
473
+ return ROOT_CONTEXT;
474
+ };
475
+ NoopContextManager2.prototype.with = function(_context, fn, thisArg) {
476
+ var args = [];
477
+ for (var _i = 3; _i < arguments.length; _i++) {
478
+ args[_i - 3] = arguments[_i];
479
+ }
480
+ return fn.call.apply(fn, __spreadArray3([thisArg], __read3(args), false));
481
+ };
482
+ NoopContextManager2.prototype.bind = function(_context, target) {
483
+ return target;
484
+ };
485
+ NoopContextManager2.prototype.enable = function() {
486
+ return this;
487
+ };
488
+ NoopContextManager2.prototype.disable = function() {
489
+ return this;
490
+ };
491
+ return NoopContextManager2;
492
+ })();
298
493
  }
299
- configLoadPromise = (async () => {
300
- try {
301
- config = await loadScenarioProjectConfig();
302
- logger.debug("loaded scenario project config", { config });
303
- } catch (error) {
304
- logger.error("error loading scenario project config", { error });
305
- } finally {
306
- configLoaded = true;
307
- }
308
- })();
309
- return configLoadPromise;
310
- }
311
- async function getProjectConfig() {
312
- await loadProjectConfig();
313
- return config;
314
- }
494
+ });
315
495
 
316
- // src/tracing/setup.ts
317
- var envConfig = getEnv();
318
- var observabilityHandle = setupObservability({
319
- langwatch: {
320
- apiKey: envConfig.LANGWATCH_API_KEY,
321
- endpoint: envConfig.LANGWATCH_ENDPOINT
322
- },
323
- spanProcessors: [judgeSpanCollector]
496
+ // node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/api/context.js
497
+ var __read4, __spreadArray4, API_NAME2, NOOP_CONTEXT_MANAGER, ContextAPI;
498
+ var init_context2 = __esm({
499
+ "node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/api/context.js"() {
500
+ "use strict";
501
+ init_NoopContextManager();
502
+ init_global_utils();
503
+ init_diag();
504
+ __read4 = function(o, n) {
505
+ var m = typeof Symbol === "function" && o[Symbol.iterator];
506
+ if (!m) return o;
507
+ var i = m.call(o), r, ar = [], e;
508
+ try {
509
+ while ((n === void 0 || n-- > 0) && !(r = i.next()).done) ar.push(r.value);
510
+ } catch (error) {
511
+ e = { error };
512
+ } finally {
513
+ try {
514
+ if (r && !r.done && (m = i["return"])) m.call(i);
515
+ } finally {
516
+ if (e) throw e.error;
517
+ }
518
+ }
519
+ return ar;
520
+ };
521
+ __spreadArray4 = function(to, from, pack) {
522
+ if (pack || arguments.length === 2) for (var i = 0, l = from.length, ar; i < l; i++) {
523
+ if (ar || !(i in from)) {
524
+ if (!ar) ar = Array.prototype.slice.call(from, 0, i);
525
+ ar[i] = from[i];
526
+ }
527
+ }
528
+ return to.concat(ar || Array.prototype.slice.call(from));
529
+ };
530
+ API_NAME2 = "context";
531
+ NOOP_CONTEXT_MANAGER = new NoopContextManager();
532
+ ContextAPI = /** @class */
533
+ (function() {
534
+ function ContextAPI2() {
535
+ }
536
+ ContextAPI2.getInstance = function() {
537
+ if (!this._instance) {
538
+ this._instance = new ContextAPI2();
539
+ }
540
+ return this._instance;
541
+ };
542
+ ContextAPI2.prototype.setGlobalContextManager = function(contextManager) {
543
+ return registerGlobal(API_NAME2, contextManager, DiagAPI.instance());
544
+ };
545
+ ContextAPI2.prototype.active = function() {
546
+ return this._getContextManager().active();
547
+ };
548
+ ContextAPI2.prototype.with = function(context2, fn, thisArg) {
549
+ var _a;
550
+ var args = [];
551
+ for (var _i = 3; _i < arguments.length; _i++) {
552
+ args[_i - 3] = arguments[_i];
553
+ }
554
+ return (_a = this._getContextManager()).with.apply(_a, __spreadArray4([context2, fn, thisArg], __read4(args), false));
555
+ };
556
+ ContextAPI2.prototype.bind = function(context2, target) {
557
+ return this._getContextManager().bind(context2, target);
558
+ };
559
+ ContextAPI2.prototype._getContextManager = function() {
560
+ return getGlobal(API_NAME2) || NOOP_CONTEXT_MANAGER;
561
+ };
562
+ ContextAPI2.prototype.disable = function() {
563
+ this._getContextManager().disable();
564
+ unregisterGlobal(API_NAME2, DiagAPI.instance());
565
+ };
566
+ return ContextAPI2;
567
+ })();
568
+ }
324
569
  });
325
570
 
326
- // src/agents/index.ts
327
- var agents_exports = {};
328
- __export(agents_exports, {
329
- JudgeSpanCollector: () => JudgeSpanCollector,
330
- JudgeSpanDigestFormatter: () => JudgeSpanDigestFormatter,
331
- RealtimeAgentAdapter: () => RealtimeAgentAdapter,
332
- judgeAgent: () => judgeAgent,
333
- judgeSpanCollector: () => judgeSpanCollector,
334
- judgeSpanDigestFormatter: () => judgeSpanDigestFormatter,
335
- userSimulatorAgent: () => userSimulatorAgent
571
+ // node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/trace/trace_flags.js
572
+ var TraceFlags;
573
+ var init_trace_flags = __esm({
574
+ "node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/trace/trace_flags.js"() {
575
+ "use strict";
576
+ (function(TraceFlags2) {
577
+ TraceFlags2[TraceFlags2["NONE"] = 0] = "NONE";
578
+ TraceFlags2[TraceFlags2["SAMPLED"] = 1] = "SAMPLED";
579
+ })(TraceFlags || (TraceFlags = {}));
580
+ }
336
581
  });
337
582
 
338
- // src/agents/judge/judge-agent.ts
583
+ // node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/trace/invalid-span-constants.js
584
+ var INVALID_SPANID, INVALID_TRACEID, INVALID_SPAN_CONTEXT;
585
+ var init_invalid_span_constants = __esm({
586
+ "node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/trace/invalid-span-constants.js"() {
587
+ "use strict";
588
+ init_trace_flags();
589
+ INVALID_SPANID = "0000000000000000";
590
+ INVALID_TRACEID = "00000000000000000000000000000000";
591
+ INVALID_SPAN_CONTEXT = {
592
+ traceId: INVALID_TRACEID,
593
+ spanId: INVALID_SPANID,
594
+ traceFlags: TraceFlags.NONE
595
+ };
596
+ }
597
+ });
598
+
599
+ // node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/trace/NonRecordingSpan.js
600
+ var NonRecordingSpan;
601
+ var init_NonRecordingSpan = __esm({
602
+ "node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/trace/NonRecordingSpan.js"() {
603
+ "use strict";
604
+ init_invalid_span_constants();
605
+ NonRecordingSpan = /** @class */
606
+ (function() {
607
+ function NonRecordingSpan2(_spanContext) {
608
+ if (_spanContext === void 0) {
609
+ _spanContext = INVALID_SPAN_CONTEXT;
610
+ }
611
+ this._spanContext = _spanContext;
612
+ }
613
+ NonRecordingSpan2.prototype.spanContext = function() {
614
+ return this._spanContext;
615
+ };
616
+ NonRecordingSpan2.prototype.setAttribute = function(_key, _value) {
617
+ return this;
618
+ };
619
+ NonRecordingSpan2.prototype.setAttributes = function(_attributes) {
620
+ return this;
621
+ };
622
+ NonRecordingSpan2.prototype.addEvent = function(_name, _attributes) {
623
+ return this;
624
+ };
625
+ NonRecordingSpan2.prototype.addLink = function(_link) {
626
+ return this;
627
+ };
628
+ NonRecordingSpan2.prototype.addLinks = function(_links) {
629
+ return this;
630
+ };
631
+ NonRecordingSpan2.prototype.setStatus = function(_status) {
632
+ return this;
633
+ };
634
+ NonRecordingSpan2.prototype.updateName = function(_name) {
635
+ return this;
636
+ };
637
+ NonRecordingSpan2.prototype.end = function(_endTime) {
638
+ };
639
+ NonRecordingSpan2.prototype.isRecording = function() {
640
+ return false;
641
+ };
642
+ NonRecordingSpan2.prototype.recordException = function(_exception, _time) {
643
+ };
644
+ return NonRecordingSpan2;
645
+ })();
646
+ }
647
+ });
648
+
649
+ // node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/trace/context-utils.js
650
+ function getSpan(context2) {
651
+ return context2.getValue(SPAN_KEY) || void 0;
652
+ }
653
+ function getActiveSpan() {
654
+ return getSpan(ContextAPI.getInstance().active());
655
+ }
656
+ function setSpan(context2, span) {
657
+ return context2.setValue(SPAN_KEY, span);
658
+ }
659
+ function deleteSpan(context2) {
660
+ return context2.deleteValue(SPAN_KEY);
661
+ }
662
+ function setSpanContext(context2, spanContext) {
663
+ return setSpan(context2, new NonRecordingSpan(spanContext));
664
+ }
665
+ function getSpanContext(context2) {
666
+ var _a;
667
+ return (_a = getSpan(context2)) === null || _a === void 0 ? void 0 : _a.spanContext();
668
+ }
669
+ var SPAN_KEY;
670
+ var init_context_utils = __esm({
671
+ "node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/trace/context-utils.js"() {
672
+ "use strict";
673
+ init_context();
674
+ init_NonRecordingSpan();
675
+ init_context2();
676
+ SPAN_KEY = createContextKey("OpenTelemetry Context Key SPAN");
677
+ }
678
+ });
679
+
680
+ // node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/trace/spancontext-utils.js
681
+ function isValidTraceId(traceId) {
682
+ return VALID_TRACEID_REGEX.test(traceId) && traceId !== INVALID_TRACEID;
683
+ }
684
+ function isValidSpanId(spanId) {
685
+ return VALID_SPANID_REGEX.test(spanId) && spanId !== INVALID_SPANID;
686
+ }
687
+ function isSpanContextValid(spanContext) {
688
+ return isValidTraceId(spanContext.traceId) && isValidSpanId(spanContext.spanId);
689
+ }
690
+ function wrapSpanContext(spanContext) {
691
+ return new NonRecordingSpan(spanContext);
692
+ }
693
+ var VALID_TRACEID_REGEX, VALID_SPANID_REGEX;
694
+ var init_spancontext_utils = __esm({
695
+ "node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/trace/spancontext-utils.js"() {
696
+ "use strict";
697
+ init_invalid_span_constants();
698
+ init_NonRecordingSpan();
699
+ VALID_TRACEID_REGEX = /^([0-9a-f]{32})$/i;
700
+ VALID_SPANID_REGEX = /^[0-9a-f]{16}$/i;
701
+ }
702
+ });
703
+
704
+ // node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/trace/NoopTracer.js
705
+ function isSpanContext(spanContext) {
706
+ return typeof spanContext === "object" && typeof spanContext["spanId"] === "string" && typeof spanContext["traceId"] === "string" && typeof spanContext["traceFlags"] === "number";
707
+ }
708
+ var contextApi, NoopTracer;
709
+ var init_NoopTracer = __esm({
710
+ "node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/trace/NoopTracer.js"() {
711
+ "use strict";
712
+ init_context2();
713
+ init_context_utils();
714
+ init_NonRecordingSpan();
715
+ init_spancontext_utils();
716
+ contextApi = ContextAPI.getInstance();
717
+ NoopTracer = /** @class */
718
+ (function() {
719
+ function NoopTracer2() {
720
+ }
721
+ NoopTracer2.prototype.startSpan = function(name, options, context2) {
722
+ if (context2 === void 0) {
723
+ context2 = contextApi.active();
724
+ }
725
+ var root = Boolean(options === null || options === void 0 ? void 0 : options.root);
726
+ if (root) {
727
+ return new NonRecordingSpan();
728
+ }
729
+ var parentFromContext = context2 && getSpanContext(context2);
730
+ if (isSpanContext(parentFromContext) && isSpanContextValid(parentFromContext)) {
731
+ return new NonRecordingSpan(parentFromContext);
732
+ } else {
733
+ return new NonRecordingSpan();
734
+ }
735
+ };
736
+ NoopTracer2.prototype.startActiveSpan = function(name, arg2, arg3, arg4) {
737
+ var opts;
738
+ var ctx;
739
+ var fn;
740
+ if (arguments.length < 2) {
741
+ return;
742
+ } else if (arguments.length === 2) {
743
+ fn = arg2;
744
+ } else if (arguments.length === 3) {
745
+ opts = arg2;
746
+ fn = arg3;
747
+ } else {
748
+ opts = arg2;
749
+ ctx = arg3;
750
+ fn = arg4;
751
+ }
752
+ var parentContext = ctx !== null && ctx !== void 0 ? ctx : contextApi.active();
753
+ var span = this.startSpan(name, opts, parentContext);
754
+ var contextWithSpanSet = setSpan(parentContext, span);
755
+ return contextApi.with(contextWithSpanSet, fn, void 0, span);
756
+ };
757
+ return NoopTracer2;
758
+ })();
759
+ }
760
+ });
761
+
762
+ // node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/trace/ProxyTracer.js
763
+ var NOOP_TRACER, ProxyTracer;
764
+ var init_ProxyTracer = __esm({
765
+ "node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/trace/ProxyTracer.js"() {
766
+ "use strict";
767
+ init_NoopTracer();
768
+ NOOP_TRACER = new NoopTracer();
769
+ ProxyTracer = /** @class */
770
+ (function() {
771
+ function ProxyTracer2(_provider, name, version, options) {
772
+ this._provider = _provider;
773
+ this.name = name;
774
+ this.version = version;
775
+ this.options = options;
776
+ }
777
+ ProxyTracer2.prototype.startSpan = function(name, options, context2) {
778
+ return this._getTracer().startSpan(name, options, context2);
779
+ };
780
+ ProxyTracer2.prototype.startActiveSpan = function(_name, _options, _context, _fn) {
781
+ var tracer = this._getTracer();
782
+ return Reflect.apply(tracer.startActiveSpan, tracer, arguments);
783
+ };
784
+ ProxyTracer2.prototype._getTracer = function() {
785
+ if (this._delegate) {
786
+ return this._delegate;
787
+ }
788
+ var tracer = this._provider.getDelegateTracer(this.name, this.version, this.options);
789
+ if (!tracer) {
790
+ return NOOP_TRACER;
791
+ }
792
+ this._delegate = tracer;
793
+ return this._delegate;
794
+ };
795
+ return ProxyTracer2;
796
+ })();
797
+ }
798
+ });
799
+
800
+ // node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/trace/NoopTracerProvider.js
801
+ var NoopTracerProvider;
802
+ var init_NoopTracerProvider = __esm({
803
+ "node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/trace/NoopTracerProvider.js"() {
804
+ "use strict";
805
+ init_NoopTracer();
806
+ NoopTracerProvider = /** @class */
807
+ (function() {
808
+ function NoopTracerProvider2() {
809
+ }
810
+ NoopTracerProvider2.prototype.getTracer = function(_name, _version, _options) {
811
+ return new NoopTracer();
812
+ };
813
+ return NoopTracerProvider2;
814
+ })();
815
+ }
816
+ });
817
+
818
+ // node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/trace/ProxyTracerProvider.js
819
+ var NOOP_TRACER_PROVIDER, ProxyTracerProvider;
820
+ var init_ProxyTracerProvider = __esm({
821
+ "node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/trace/ProxyTracerProvider.js"() {
822
+ "use strict";
823
+ init_ProxyTracer();
824
+ init_NoopTracerProvider();
825
+ NOOP_TRACER_PROVIDER = new NoopTracerProvider();
826
+ ProxyTracerProvider = /** @class */
827
+ (function() {
828
+ function ProxyTracerProvider2() {
829
+ }
830
+ ProxyTracerProvider2.prototype.getTracer = function(name, version, options) {
831
+ var _a;
832
+ return (_a = this.getDelegateTracer(name, version, options)) !== null && _a !== void 0 ? _a : new ProxyTracer(this, name, version, options);
833
+ };
834
+ ProxyTracerProvider2.prototype.getDelegate = function() {
835
+ var _a;
836
+ return (_a = this._delegate) !== null && _a !== void 0 ? _a : NOOP_TRACER_PROVIDER;
837
+ };
838
+ ProxyTracerProvider2.prototype.setDelegate = function(delegate) {
839
+ this._delegate = delegate;
840
+ };
841
+ ProxyTracerProvider2.prototype.getDelegateTracer = function(name, version, options) {
842
+ var _a;
843
+ return (_a = this._delegate) === null || _a === void 0 ? void 0 : _a.getTracer(name, version, options);
844
+ };
845
+ return ProxyTracerProvider2;
846
+ })();
847
+ }
848
+ });
849
+
850
+ // node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/context-api.js
851
+ var context;
852
+ var init_context_api = __esm({
853
+ "node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/context-api.js"() {
854
+ "use strict";
855
+ init_context2();
856
+ context = ContextAPI.getInstance();
857
+ }
858
+ });
859
+
860
+ // node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/diag-api.js
861
+ var diag;
862
+ var init_diag_api = __esm({
863
+ "node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/diag-api.js"() {
864
+ "use strict";
865
+ init_diag();
866
+ diag = DiagAPI.instance();
867
+ }
868
+ });
869
+
870
+ // node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/api/trace.js
871
+ var API_NAME3, TraceAPI;
872
+ var init_trace = __esm({
873
+ "node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/api/trace.js"() {
874
+ "use strict";
875
+ init_global_utils();
876
+ init_ProxyTracerProvider();
877
+ init_spancontext_utils();
878
+ init_context_utils();
879
+ init_diag();
880
+ API_NAME3 = "trace";
881
+ TraceAPI = /** @class */
882
+ (function() {
883
+ function TraceAPI2() {
884
+ this._proxyTracerProvider = new ProxyTracerProvider();
885
+ this.wrapSpanContext = wrapSpanContext;
886
+ this.isSpanContextValid = isSpanContextValid;
887
+ this.deleteSpan = deleteSpan;
888
+ this.getSpan = getSpan;
889
+ this.getActiveSpan = getActiveSpan;
890
+ this.getSpanContext = getSpanContext;
891
+ this.setSpan = setSpan;
892
+ this.setSpanContext = setSpanContext;
893
+ }
894
+ TraceAPI2.getInstance = function() {
895
+ if (!this._instance) {
896
+ this._instance = new TraceAPI2();
897
+ }
898
+ return this._instance;
899
+ };
900
+ TraceAPI2.prototype.setGlobalTracerProvider = function(provider) {
901
+ var success = registerGlobal(API_NAME3, this._proxyTracerProvider, DiagAPI.instance());
902
+ if (success) {
903
+ this._proxyTracerProvider.setDelegate(provider);
904
+ }
905
+ return success;
906
+ };
907
+ TraceAPI2.prototype.getTracerProvider = function() {
908
+ return getGlobal(API_NAME3) || this._proxyTracerProvider;
909
+ };
910
+ TraceAPI2.prototype.getTracer = function(name, version) {
911
+ return this.getTracerProvider().getTracer(name, version);
912
+ };
913
+ TraceAPI2.prototype.disable = function() {
914
+ unregisterGlobal(API_NAME3, DiagAPI.instance());
915
+ this._proxyTracerProvider = new ProxyTracerProvider();
916
+ };
917
+ return TraceAPI2;
918
+ })();
919
+ }
920
+ });
921
+
922
+ // node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/trace-api.js
923
+ var trace;
924
+ var init_trace_api = __esm({
925
+ "node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/trace-api.js"() {
926
+ "use strict";
927
+ init_trace();
928
+ trace = TraceAPI.getInstance();
929
+ }
930
+ });
931
+
932
+ // node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/index.js
933
+ var init_esm = __esm({
934
+ "node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/index.js"() {
935
+ "use strict";
936
+ init_context();
937
+ init_trace_flags();
938
+ init_context_api();
939
+ init_diag_api();
940
+ init_trace_api();
941
+ }
942
+ });
943
+
944
+ // src/agents/index.ts
945
+ var agents_exports = {};
946
+ __export(agents_exports, {
947
+ JudgeSpanCollector: () => JudgeSpanCollector,
948
+ JudgeSpanDigestFormatter: () => JudgeSpanDigestFormatter,
949
+ RealtimeAgentAdapter: () => RealtimeAgentAdapter,
950
+ judgeAgent: () => judgeAgent,
951
+ judgeSpanCollector: () => judgeSpanCollector,
952
+ judgeSpanDigestFormatter: () => judgeSpanDigestFormatter,
953
+ userSimulatorAgent: () => userSimulatorAgent
954
+ });
955
+
956
+ // src/agents/judge/judge-agent.ts
339
957
  import { tool } from "ai";
340
958
  import { z as z4 } from "zod/v4";
341
959
 
@@ -408,1743 +1026,1333 @@ var JudgeUtils = {
408
1026
  }
409
1027
  };
410
1028
 
411
- // src/agents/llm-invoker.factory.ts
412
- import { generateText } from "ai";
413
- var createLLMInvoker = (logger2) => {
414
- return async (params) => {
415
- try {
416
- return await generateText({
417
- ...params,
418
- experimental_telemetry: { isEnabled: true }
419
- });
420
- } catch (error) {
421
- logger2.error("Error generating text", { error });
422
- throw error;
423
- }
424
- };
425
- };
426
-
427
- // src/agents/utils.ts
428
- var toolMessageRole = "tool";
429
- var assistantMessageRole = "assistant";
430
- var userMessageRole = "user";
431
- var hasToolContent = (message2) => {
432
- if (message2.role === toolMessageRole) return true;
433
- if (!Array.isArray(message2.content)) return false;
434
- return message2.content.some((part) => {
435
- if (!part || typeof part !== "object") return false;
436
- const partType = "type" in part ? part.type : void 0;
437
- return partType === "tool-call" || partType === "tool-result";
438
- });
439
- };
440
- var stringifyValue = (value) => {
441
- if (typeof value === "string") return value;
442
- if (value === void 0) return "undefined";
443
- try {
444
- const serialized = JSON.stringify(value);
445
- return serialized === void 0 ? String(value) : serialized;
446
- } catch {
447
- return String(value);
448
- }
449
- };
450
- var summarizeToolMessage = (message2) => {
451
- if (message2.role === toolMessageRole && !Array.isArray(message2.content)) {
452
- return `[Tool message: ${stringifyValue(message2.content)}]`;
453
- }
454
- if (message2.role === toolMessageRole) {
455
- const toolResults = message2.content.filter((part) => part.type === "tool-result").map((part) => {
456
- const contentPart = part;
457
- const name = contentPart.toolName ?? "unknown tool";
458
- const output = contentPart.output;
459
- const value = output && typeof output === "object" && "value" in output && typeof output.value === "string" ? output.value : output ?? contentPart.result;
460
- return `[Tool result from ${name}: ${stringifyValue(value)}]`;
461
- });
462
- return toolResults.length > 0 ? toolResults.join("\n") : null;
463
- }
464
- if (!Array.isArray(message2.content)) return null;
465
- const toolCalls = message2.content.filter((part) => part.type === "tool-call").map((part) => {
466
- const contentPart = part;
467
- const name = contentPart.toolName ?? "unknown tool";
468
- return `[Called tool ${name} with: ${stringifyValue(contentPart.input)}]`;
469
- });
470
- return toolCalls.length > 0 ? toolCalls.join("\n") : null;
471
- };
472
- var messageRoleReversal = (messages) => {
473
- const roleMap = {
474
- [userMessageRole]: assistantMessageRole,
475
- [assistantMessageRole]: userMessageRole
476
- };
477
- return messages.map((message2) => {
478
- if (hasToolContent(message2)) {
479
- const summary = summarizeToolMessage(message2);
480
- if (!summary) return null;
481
- return {
482
- role: userMessageRole,
483
- content: summary
484
- };
485
- }
486
- const newRole = roleMap[message2.role];
487
- if (!newRole) return message2;
488
- return {
489
- ...message2,
490
- role: newRole
491
- };
492
- }).filter((message2) => message2 !== null);
493
- };
494
- var criterionToParamName = (criterion) => {
495
- return criterion.replace(/"/g, "").replace(/[^a-zA-Z0-9]/g, "_").replace(/ /g, "_").toLowerCase().substring(0, 70);
496
- };
497
-
498
- // src/agents/judge/judge-span-digest-formatter.ts
499
- import { attributes as attributes2 } from "langwatch/observability";
1029
+ // src/config/env.ts
1030
+ import { z } from "zod/v4";
500
1031
 
501
- // src/agents/judge/deep-transform.ts
502
- function deepTransform(value, fn) {
503
- const result = fn(value);
504
- if (result !== value) return result;
505
- if (Array.isArray(value)) {
506
- return value.map((v) => deepTransform(v, fn));
507
- }
508
- if (value !== null && typeof value === "object") {
509
- const out = {};
510
- for (const [k, v] of Object.entries(value)) {
511
- out[k] = deepTransform(v, fn);
512
- }
513
- return out;
514
- }
515
- return value;
516
- }
1032
+ // src/config/log-levels.ts
1033
+ var LogLevel = /* @__PURE__ */ ((LogLevel2) => {
1034
+ LogLevel2["ERROR"] = "ERROR";
1035
+ LogLevel2["WARN"] = "WARN";
1036
+ LogLevel2["INFO"] = "INFO";
1037
+ LogLevel2["DEBUG"] = "DEBUG";
1038
+ return LogLevel2;
1039
+ })(LogLevel || {});
1040
+ var LOG_LEVELS = Object.values(LogLevel);
517
1041
 
518
- // src/agents/judge/string-deduplicator.ts
519
- var StringDeduplicator = class {
520
- seen = /* @__PURE__ */ new Map();
521
- threshold;
522
- constructor(params) {
523
- this.threshold = params.threshold;
524
- }
1042
+ // src/config/env.ts
1043
+ var envSchema = z.object({
525
1044
  /**
526
- * Resets seen strings for a new digest.
1045
+ * LangWatch API key for event reporting.
1046
+ * If not provided, events will not be sent to LangWatch.
527
1047
  */
528
- reset() {
529
- this.seen.clear();
530
- }
1048
+ LANGWATCH_API_KEY: z.string().optional(),
531
1049
  /**
532
- * Processes a string, returning duplicate marker if seen before.
533
- * @param str - String to process
534
- * @returns Original string or duplicate marker
1050
+ * LangWatch endpoint URL for event reporting.
1051
+ * Defaults to the production LangWatch endpoint.
535
1052
  */
536
- process(str) {
537
- if (str.length < this.threshold) return str;
538
- const key = this.normalize(str);
539
- if (this.seen.has(key)) return "[DUPLICATE - SEE ABOVE]";
540
- this.seen.set(key, true);
541
- return str;
542
- }
1053
+ LANGWATCH_ENDPOINT: z.string().url().optional().default("https://app.langwatch.ai"),
543
1054
  /**
544
- * Normalizes string for comparison (whitespace, case).
1055
+ * Disables simulation report info messages when set to any truthy value.
1056
+ * Useful for CI/CD environments or when you want cleaner output.
545
1057
  */
546
- normalize(str) {
547
- return str.replace(/\\[nrt]/g, " ").replace(/[\n\r\t]/g, " ").replace(/\s+/g, " ").trim().toLowerCase();
548
- }
549
- };
1058
+ SCENARIO_DISABLE_SIMULATION_REPORT_INFO: z.string().optional().transform((val) => Boolean(val)),
1059
+ /**
1060
+ * Node environment - affects logging and behavior.
1061
+ * Defaults to 'development' if not specified.
1062
+ */
1063
+ NODE_ENV: z.enum(["development", "production", "test"]).default("development"),
1064
+ /**
1065
+ * Case-insensitive log level for the scenario package.
1066
+ * Defaults to 'info' if not specified.
1067
+ */
1068
+ LOG_LEVEL: z.string().toUpperCase().pipe(z.nativeEnum(LogLevel)).optional().default("INFO" /* INFO */),
1069
+ /**
1070
+ * Scenario batch run ID.
1071
+ * If not provided, a random ID will be generated.
1072
+ */
1073
+ SCENARIO_BATCH_RUN_ID: z.string().optional()
1074
+ });
1075
+ function getEnv() {
1076
+ return envSchema.parse(process.env);
1077
+ }
550
1078
 
551
- // src/agents/judge/truncate-media.ts
552
- function truncateMediaUrl(str) {
553
- const match = str.match(
554
- /^data:((image|audio|video)\/[a-z0-9+.-]+);base64,(.+)$/i
555
- );
556
- if (!match) return str;
557
- const [, mimeType, category, data] = match;
558
- return `[${category.toUpperCase()}: ${mimeType}, ~${data.length} bytes]`;
1079
+ // src/config/load.ts
1080
+ import fs from "fs/promises";
1081
+ import path from "path";
1082
+ import { pathToFileURL } from "url";
1083
+
1084
+ // src/domain/index.ts
1085
+ var domain_exports = {};
1086
+ __export(domain_exports, {
1087
+ AgentAdapter: () => AgentAdapter,
1088
+ AgentRole: () => AgentRole,
1089
+ DEFAULT_MAX_TURNS: () => DEFAULT_MAX_TURNS,
1090
+ DEFAULT_VERBOSE: () => DEFAULT_VERBOSE,
1091
+ JudgeAgentAdapter: () => JudgeAgentAdapter,
1092
+ UserSimulatorAgentAdapter: () => UserSimulatorAgentAdapter,
1093
+ allAgentRoles: () => allAgentRoles,
1094
+ defineConfig: () => defineConfig,
1095
+ scenarioProjectConfigSchema: () => scenarioProjectConfigSchema
1096
+ });
1097
+
1098
+ // src/domain/core/config.ts
1099
+ import { z as z3 } from "zod/v4";
1100
+
1101
+ // src/domain/core/schemas/model.schema.ts
1102
+ import { z as z2 } from "zod/v4";
1103
+
1104
+ // src/domain/core/constants.ts
1105
+ var DEFAULT_TEMPERATURE = 0;
1106
+
1107
+ // src/domain/core/schemas/model.schema.ts
1108
+ var modelSchema = z2.object({
1109
+ model: z2.custom((val) => Boolean(val), {
1110
+ message: "A model is required. Configure it in scenario.config.js defaultModel or pass directly to the agent."
1111
+ }).describe("Language model that is used by the AI SDK Core functions."),
1112
+ temperature: z2.number().min(0).max(1).optional().describe("The temperature for the language model.").default(DEFAULT_TEMPERATURE),
1113
+ maxTokens: z2.number().optional().describe("The maximum number of tokens to generate.")
1114
+ });
1115
+
1116
+ // src/domain/core/config.ts
1117
+ var headless = typeof process !== "undefined" ? process.env.SCENARIO_HEADLESS === "true" : false;
1118
+ var scenarioProjectConfigSchema = z3.object({
1119
+ defaultModel: modelSchema.optional(),
1120
+ headless: z3.boolean().optional().default(headless),
1121
+ observability: z3.custom((val) => {
1122
+ return val === void 0 || typeof val === "object" && val !== null && !Array.isArray(val);
1123
+ }).optional()
1124
+ }).strict();
1125
+ function defineConfig(config2) {
1126
+ return config2;
559
1127
  }
560
- function truncateMediaPart(v) {
561
- var _a;
562
- if (v === null || typeof v !== "object" || Array.isArray(v)) return null;
563
- const obj = v;
564
- if (obj.type === "file" && typeof obj.mediaType === "string" && typeof obj.data === "string") {
565
- const category = ((_a = obj.mediaType.split("/")[0]) == null ? void 0 : _a.toUpperCase()) ?? "FILE";
566
- return {
567
- ...obj,
568
- data: `[${category}: ${obj.mediaType}, ~${obj.data.length} bytes]`
569
- };
570
- }
571
- if (obj.type === "image" && typeof obj.image === "string") {
572
- const imageData = obj.image;
573
- const dataUrlMatch = imageData.match(
574
- /^data:((image)\/[a-z0-9+.-]+);base64,(.+)$/i
575
- );
576
- if (dataUrlMatch) {
577
- return {
578
- ...obj,
579
- image: `[IMAGE: ${dataUrlMatch[1]}, ~${dataUrlMatch[3].length} bytes]`
580
- };
581
- }
582
- if (imageData.length > 1e3 && /^[A-Za-z0-9+/=]+$/.test(imageData)) {
583
- return {
584
- ...obj,
585
- image: `[IMAGE: unknown, ~${imageData.length} bytes]`
586
- };
1128
+
1129
+ // src/domain/agents/index.ts
1130
+ var AgentRole = /* @__PURE__ */ ((AgentRole2) => {
1131
+ AgentRole2["USER"] = "User";
1132
+ AgentRole2["AGENT"] = "Agent";
1133
+ AgentRole2["JUDGE"] = "Judge";
1134
+ return AgentRole2;
1135
+ })(AgentRole || {});
1136
+ var allAgentRoles = [
1137
+ "User" /* USER */,
1138
+ "Agent" /* AGENT */,
1139
+ "Judge" /* JUDGE */
1140
+ ];
1141
+ var AgentAdapter = class {
1142
+ name;
1143
+ role = "Agent" /* AGENT */;
1144
+ };
1145
+ var UserSimulatorAgentAdapter = class extends AgentAdapter {
1146
+ name = "UserSimulatorAgent";
1147
+ role = "User" /* USER */;
1148
+ };
1149
+ var JudgeAgentAdapter = class extends AgentAdapter {
1150
+ name = "JudgeAgent";
1151
+ role = "Judge" /* JUDGE */;
1152
+ };
1153
+
1154
+ // src/domain/scenarios/index.ts
1155
+ var DEFAULT_MAX_TURNS = 10;
1156
+ var DEFAULT_VERBOSE = false;
1157
+
1158
+ // src/config/load.ts
1159
+ async function loadScenarioProjectConfig() {
1160
+ const cwd = process.cwd();
1161
+ const configNames = [
1162
+ "scenario.config.js",
1163
+ "scenario.config.mjs"
1164
+ ];
1165
+ for (const name of configNames) {
1166
+ const fullPath = path.join(cwd, name);
1167
+ try {
1168
+ await fs.access(fullPath);
1169
+ const configModule = await import(pathToFileURL(fullPath).href);
1170
+ const config2 = configModule.default || configModule;
1171
+ const parsed = scenarioProjectConfigSchema.safeParse(config2);
1172
+ if (!parsed.success) {
1173
+ throw new Error(
1174
+ `Invalid config file ${name}: ${JSON.stringify(parsed.error.format(), null, 2)}`
1175
+ );
1176
+ }
1177
+ return parsed.data;
1178
+ } catch (error) {
1179
+ if (error instanceof Error && "code" in error && error.code === "ENOENT") {
1180
+ continue;
1181
+ }
1182
+ throw error;
587
1183
  }
588
1184
  }
589
- return null;
1185
+ return await scenarioProjectConfigSchema.parseAsync({});
590
1186
  }
591
1187
 
592
- // src/agents/judge/judge-span-digest-formatter.ts
593
- var JudgeSpanDigestFormatter = class {
594
- logger = new Logger("JudgeSpanDigestFormatter");
595
- deduplicator = new StringDeduplicator({ threshold: 50 });
1188
+ // src/utils/logger.ts
1189
+ var Logger = class _Logger {
1190
+ constructor(context2) {
1191
+ this.context = context2;
1192
+ }
596
1193
  /**
597
- * Formats spans into a complete digest with full content and nesting.
598
- * @param spans - All spans for a thread
599
- * @returns Plain text digest
1194
+ * Creates a logger with context (e.g., class name)
600
1195
  */
601
- format(spans) {
602
- this.deduplicator.reset();
603
- this.logger.debug("format() called", {
604
- spanCount: spans.length,
605
- spanNames: spans.map((s) => s.name)
606
- });
607
- if (spans.length === 0) {
608
- this.logger.debug("No spans to format");
609
- return "No spans recorded.";
610
- }
611
- const sortedSpans = this.sortByStartTime(spans);
612
- const tree = this.buildHierarchy(sortedSpans);
613
- const totalDuration = this.calculateTotalDuration(sortedSpans);
614
- this.logger.debug("Hierarchy built", {
615
- rootCount: tree.length,
616
- totalDuration
617
- });
618
- const lines = [
619
- `Spans: ${spans.length} | Total Duration: ${this.formatDuration(
620
- totalDuration
621
- )}`,
622
- ""
623
- ];
624
- let sequence = 1;
625
- const rootCount = tree.length;
626
- tree.forEach((node, idx) => {
627
- sequence = this.renderNode(
628
- node,
629
- lines,
630
- 0,
631
- sequence,
632
- idx === rootCount - 1
633
- );
634
- });
635
- const errors = this.collectErrors(spans);
636
- if (errors.length > 0) {
637
- lines.push("");
638
- lines.push("=== ERRORS ===");
639
- errors.forEach((e) => lines.push(e));
640
- }
641
- return lines.join("\n");
642
- }
643
- sortByStartTime(spans) {
644
- return [...spans].sort((a, b) => {
645
- const aTime = this.hrTimeToMs(a.startTime);
646
- const bTime = this.hrTimeToMs(b.startTime);
647
- return aTime - bTime;
648
- });
1196
+ static create(context2) {
1197
+ return new _Logger(context2);
649
1198
  }
650
- buildHierarchy(spans) {
651
- var _a;
652
- const spanMap = /* @__PURE__ */ new Map();
653
- const roots = [];
654
- for (const span of spans) {
655
- spanMap.set(span.spanContext().spanId, { span, children: [] });
656
- }
657
- for (const span of spans) {
658
- const node = spanMap.get(span.spanContext().spanId);
659
- const parentId = (_a = span.parentSpanContext) == null ? void 0 : _a.spanId;
660
- if (parentId && spanMap.has(parentId)) {
661
- spanMap.get(parentId).children.push(node);
1199
+ /**
1200
+ * Returns the current log level from environment.
1201
+ * Uses a getter for clarity and idiomatic usage.
1202
+ */
1203
+ get LOG_LEVEL() {
1204
+ return getEnv().LOG_LEVEL;
1205
+ }
1206
+ /**
1207
+ * Returns the index of the given log level in the LOG_LEVELS array.
1208
+ * @param level - The log level to get the index for.
1209
+ * @returns The index of the log level in the LOG_LEVELS array.
1210
+ */
1211
+ getLogLevelIndexFor(level) {
1212
+ return LOG_LEVELS.indexOf(level);
1213
+ }
1214
+ /**
1215
+ * Checks if logging should occur based on LOG_LEVEL env var
1216
+ */
1217
+ shouldLog(level) {
1218
+ const currentLevelIndex = this.getLogLevelIndexFor(this.LOG_LEVEL);
1219
+ const requestedLevelIndex = this.getLogLevelIndexFor(level);
1220
+ return currentLevelIndex >= 0 && requestedLevelIndex <= currentLevelIndex;
1221
+ }
1222
+ formatMessage(message2) {
1223
+ return this.context ? `[${this.context}] ${message2}` : message2;
1224
+ }
1225
+ error(message2, data) {
1226
+ if (this.shouldLog("ERROR" /* ERROR */)) {
1227
+ const formattedMessage = this.formatMessage(message2);
1228
+ if (data) {
1229
+ console.error(formattedMessage, data);
662
1230
  } else {
663
- roots.push(node);
1231
+ console.error(formattedMessage);
664
1232
  }
665
1233
  }
666
- return roots;
667
1234
  }
668
- renderNode(node, lines, depth, sequence, isLast = true) {
669
- const span = node.span;
670
- const duration = this.calculateSpanDuration(span);
671
- const timestamp = this.formatTimestamp(span.startTime);
672
- const status = this.getStatusIndicator(span);
673
- const prefix = this.getTreePrefix(depth, isLast);
674
- lines.push(
675
- `${prefix}[${sequence}] ${new Date(timestamp).toISOString()} ${span.name} (${this.formatDuration(duration)})${status}`
676
- );
677
- const attrIndent = this.getAttrIndent(depth, isLast);
678
- const attrs = this.cleanAttributes(span.attributes);
679
- if (Object.keys(attrs).length > 0) {
680
- for (const [key, value] of Object.entries(attrs)) {
681
- lines.push(`${attrIndent}${key}: ${this.formatValue(value)}`);
682
- }
683
- }
684
- if (span.events.length > 0) {
685
- for (const event of span.events) {
686
- lines.push(`${attrIndent}[event] ${event.name}`);
687
- if (event.attributes) {
688
- const eventAttrs = this.cleanAttributes(event.attributes);
689
- for (const [key, value] of Object.entries(eventAttrs)) {
690
- lines.push(`${attrIndent} ${key}: ${this.formatValue(value)}`);
691
- }
692
- }
1235
+ warn(message2, data) {
1236
+ if (this.shouldLog("WARN" /* WARN */)) {
1237
+ const formattedMessage = this.formatMessage(message2);
1238
+ if (data) {
1239
+ console.warn(formattedMessage, data);
1240
+ } else {
1241
+ console.warn(formattedMessage);
693
1242
  }
694
1243
  }
695
- lines.push("");
696
- let nextSeq = sequence + 1;
697
- const childCount = node.children.length;
698
- node.children.forEach((child, idx) => {
699
- nextSeq = this.renderNode(
700
- child,
701
- lines,
702
- depth + 1,
703
- nextSeq,
704
- idx === childCount - 1
705
- );
706
- });
707
- return nextSeq;
708
- }
709
- getTreePrefix(depth, isLast) {
710
- if (depth === 0) return "";
711
- const connector = isLast ? "\u2514\u2500\u2500 " : "\u251C\u2500\u2500 ";
712
- return "\u2502 ".repeat(depth - 1) + connector;
713
- }
714
- getAttrIndent(depth, isLast) {
715
- if (depth === 0) return " ";
716
- const continuation = isLast ? " " : "\u2502 ";
717
- return "\u2502 ".repeat(depth - 1) + continuation + " ";
718
1244
  }
719
- cleanAttributes(attrs) {
720
- const cleaned = {};
721
- const seen = /* @__PURE__ */ new Set();
722
- const excludedKeys = [
723
- attributes2.ATTR_LANGWATCH_THREAD_ID,
724
- "langwatch.scenario.id",
725
- "langwatch.scenario.name"
726
- ];
727
- for (const [key, value] of Object.entries(attrs)) {
728
- if (excludedKeys.includes(key)) {
729
- continue;
730
- }
731
- const cleanKey = key.replace(/^(langwatch)\./, "");
732
- if (!seen.has(cleanKey)) {
733
- seen.add(cleanKey);
734
- cleaned[cleanKey] = value;
1245
+ info(message2, data) {
1246
+ if (this.shouldLog("INFO" /* INFO */)) {
1247
+ const formattedMessage = this.formatMessage(message2);
1248
+ if (data) {
1249
+ console.info(formattedMessage, data);
1250
+ } else {
1251
+ console.info(formattedMessage);
735
1252
  }
736
1253
  }
737
- return cleaned;
738
- }
739
- formatValue(value) {
740
- const processed = this.transformValue(value);
741
- return typeof processed === "string" ? processed : JSON.stringify(processed);
742
- }
743
- transformValue(value) {
744
- return deepTransform(value, (v) => {
745
- const mediaPart = truncateMediaPart(v);
746
- if (mediaPart) return mediaPart;
747
- if (typeof v !== "string") return v;
748
- return this.transformString(v);
749
- });
750
1254
  }
751
- transformString(str) {
752
- if (this.looksLikeJson(str)) {
753
- try {
754
- const processed = this.transformValue(JSON.parse(str));
755
- return JSON.stringify(processed);
756
- } catch {
1255
+ debug(message2, data) {
1256
+ if (this.shouldLog("DEBUG" /* DEBUG */)) {
1257
+ const formattedMessage = this.formatMessage(message2);
1258
+ if (data) {
1259
+ console.log(formattedMessage, data);
1260
+ } else {
1261
+ console.log(formattedMessage);
757
1262
  }
758
1263
  }
759
- const truncated = truncateMediaUrl(str);
760
- if (truncated !== str) return truncated;
761
- return this.deduplicator.process(str);
762
- }
763
- looksLikeJson(str) {
764
- const t = str.trim();
765
- return t.startsWith("{") && t.endsWith("}") || t.startsWith("[") && t.endsWith("]");
766
- }
767
- hrTimeToMs(hrTime) {
768
- return hrTime[0] * 1e3 + hrTime[1] / 1e6;
769
- }
770
- calculateSpanDuration(span) {
771
- return this.hrTimeToMs(span.endTime) - this.hrTimeToMs(span.startTime);
772
- }
773
- calculateTotalDuration(spans) {
774
- if (spans.length === 0) return 0;
775
- const first = this.hrTimeToMs(spans[0].startTime);
776
- const last = Math.max(...spans.map((s) => this.hrTimeToMs(s.endTime)));
777
- return last - first;
778
1264
  }
779
- formatDuration(ms) {
780
- if (ms < 1e3) return `${Math.round(ms)}ms`;
781
- return `${(ms / 1e3).toFixed(2)}s`;
1265
+ };
1266
+
1267
+ // src/config/get-project-config.ts
1268
+ var logger = new Logger("scenario.config");
1269
+ var configLoaded = false;
1270
+ var config = null;
1271
+ var configLoadPromise = null;
1272
+ async function loadProjectConfig() {
1273
+ if (configLoaded) {
1274
+ return;
782
1275
  }
783
- formatTimestamp(hrTime) {
784
- const ms = this.hrTimeToMs(hrTime);
785
- return new Date(ms).toISOString();
1276
+ if (configLoadPromise) {
1277
+ return configLoadPromise;
786
1278
  }
787
- getStatusIndicator(span) {
788
- if (span.status.code === 2) {
789
- return ` \u26A0\uFE0F ERROR: ${span.status.message ?? "unknown"}`;
1279
+ configLoadPromise = (async () => {
1280
+ try {
1281
+ config = await loadScenarioProjectConfig();
1282
+ logger.debug("loaded scenario project config", { config });
1283
+ } catch (error) {
1284
+ logger.error("error loading scenario project config", { error });
1285
+ } finally {
1286
+ configLoaded = true;
790
1287
  }
791
- return "";
792
- }
793
- collectErrors(spans) {
794
- return spans.filter((s) => s.status.code === 2).map((s) => `- ${s.name}: ${s.status.message ?? "unknown error"}`);
795
- }
796
- };
797
- var judgeSpanDigestFormatter = new JudgeSpanDigestFormatter();
798
-
799
- // src/agents/judge/judge-agent.ts
800
- function buildSystemPrompt(criteria, description) {
801
- const criteriaList = (criteria == null ? void 0 : criteria.map((criterion, idx) => `${idx + 1}. ${criterion}`).join("\n")) || "No criteria provided";
802
- return `
803
- <role>
804
- You are an LLM as a judge watching a simulated conversation as it plays out live to determine if the agent under test meets the criteria or not.
805
- </role>
806
-
807
- <goal>
808
- Your goal is to determine if you already have enough information to make a verdict of the scenario below, or if the conversation should continue for longer.
809
- If you do have enough information, use the finish_test tool to determine if all the criteria have been met, if not, use the continue_test tool to let the next step play out.
810
- </goal>
811
-
812
- <scenario>
813
- ${description}
814
- </scenario>
815
-
816
- <criteria>
817
- ${criteriaList}
818
- </criteria>
819
-
820
- <rules>
821
- - Be strict, do not let the conversation continue if the agent already broke one of the "do not" or "should not" criteria.
822
- - DO NOT make any judgment calls that are not explicitly listed in the success or failure criteria, withhold judgement if necessary
823
- </rules>
824
- `.trim();
1288
+ })();
1289
+ return configLoadPromise;
825
1290
  }
826
- function buildContinueTestTool() {
827
- return tool({
828
- description: "Continue the test with the next step",
829
- inputSchema: z4.object({})
830
- });
1291
+ async function getProjectConfig() {
1292
+ await loadProjectConfig();
1293
+ return config;
831
1294
  }
832
- function buildFinishTestTool(criteria) {
833
- const criteriaNames = criteria.map(criterionToParamName);
834
- return tool({
835
- description: "Complete the test with a final verdict",
836
- inputSchema: z4.object({
837
- criteria: z4.object(
838
- Object.fromEntries(
839
- criteriaNames.map((name, idx) => [
840
- name,
841
- z4.enum(["true", "false", "inconclusive"]).describe(criteria[idx])
842
- ])
843
- )
844
- ).strict().describe("Strict verdict for each criterion"),
845
- reasoning: z4.string().describe("Explanation of what the final verdict should be"),
846
- verdict: z4.enum(["success", "failure", "inconclusive"]).describe("The final verdict of the test")
847
- })
1295
+
1296
+ // src/agents/llm-invoker.factory.ts
1297
+ import { generateText } from "ai";
1298
+ var createLLMInvoker = (logger2) => {
1299
+ return async (params) => {
1300
+ try {
1301
+ return await generateText({
1302
+ ...params,
1303
+ experimental_telemetry: { isEnabled: true }
1304
+ });
1305
+ } catch (error) {
1306
+ logger2.error("Error generating text", { error });
1307
+ throw error;
1308
+ }
1309
+ };
1310
+ };
1311
+
1312
+ // src/agents/utils.ts
1313
+ var toolMessageRole = "tool";
1314
+ var assistantMessageRole = "assistant";
1315
+ var userMessageRole = "user";
1316
+ var hasToolContent = (message2) => {
1317
+ if (message2.role === toolMessageRole) return true;
1318
+ if (!Array.isArray(message2.content)) return false;
1319
+ return message2.content.some((part) => {
1320
+ if (!part || typeof part !== "object") return false;
1321
+ const partType = "type" in part ? part.type : void 0;
1322
+ return partType === "tool-call" || partType === "tool-result";
848
1323
  });
849
- }
850
- var JudgeAgent = class extends JudgeAgentAdapter {
851
- constructor(cfg) {
852
- super();
853
- this.cfg = cfg;
854
- this.criteria = cfg.criteria ?? [];
855
- this.spanCollector = cfg.spanCollector ?? judgeSpanCollector;
1324
+ };
1325
+ var stringifyValue = (value) => {
1326
+ if (typeof value === "string") return value;
1327
+ if (value === void 0) return "undefined";
1328
+ try {
1329
+ const serialized = JSON.stringify(value);
1330
+ return serialized === void 0 ? String(value) : serialized;
1331
+ } catch {
1332
+ return String(value);
856
1333
  }
857
- logger = new Logger("JudgeAgent");
858
- spanCollector;
859
- role = "Judge" /* JUDGE */;
860
- criteria;
861
- /**
862
- * LLM invocation function. Can be overridden to customize LLM behavior.
863
- */
864
- invokeLLM = createLLMInvoker(this.logger);
865
- async call(input) {
866
- var _a, _b, _c, _d;
867
- const criteria = ((_a = input.judgmentRequest) == null ? void 0 : _a.criteria) ?? this.criteria;
868
- this.logger.debug("call() invoked", {
869
- threadId: input.threadId,
870
- currentTurn: input.scenarioState.currentTurn,
871
- maxTurns: input.scenarioConfig.maxTurns,
872
- judgmentRequest: input.judgmentRequest
873
- });
874
- const digest = this.getOpenTelemetryTracesDigest(input.threadId);
875
- this.logger.debug("OpenTelemetry traces built", { digest });
876
- const transcript = JudgeUtils.buildTranscriptFromMessages(input.messages);
877
- const contentForJudge = `
878
- <transcript>
879
- ${transcript}
880
- </transcript>
881
- <opentelemetry_traces>
882
- ${digest}
883
- </opentelemetry_traces>
884
- `;
885
- const cfg = this.cfg;
886
- const systemPrompt = cfg.systemPrompt ?? buildSystemPrompt(criteria, input.scenarioConfig.description);
887
- const messages = [
888
- { role: "system", content: systemPrompt },
889
- { role: "user", content: contentForJudge }
890
- ];
891
- const isLastMessage = input.scenarioState.currentTurn === input.scenarioConfig.maxTurns;
892
- const projectConfig = await getProjectConfig();
893
- const mergedConfig = modelSchema.parse({
894
- ...projectConfig == null ? void 0 : projectConfig.defaultModel,
895
- ...cfg
1334
+ };
1335
+ var summarizeToolMessage = (message2) => {
1336
+ if (message2.role === toolMessageRole && !Array.isArray(message2.content)) {
1337
+ return `[Tool message: ${stringifyValue(message2.content)}]`;
1338
+ }
1339
+ if (message2.role === toolMessageRole) {
1340
+ const toolResults = message2.content.filter((part) => part.type === "tool-result").map((part) => {
1341
+ const contentPart = part;
1342
+ const name = contentPart.toolName ?? "unknown tool";
1343
+ const output = contentPart.output;
1344
+ const value = output && typeof output === "object" && "value" in output && typeof output.value === "string" ? output.value : output ?? contentPart.result;
1345
+ return `[Tool result from ${name}: ${stringifyValue(value)}]`;
896
1346
  });
897
- const tools = {
898
- continue_test: buildContinueTestTool(),
899
- finish_test: buildFinishTestTool(criteria)
900
- };
901
- const enforceJudgement = input.judgmentRequest != null;
902
- const hasCriteria = criteria.length && criteria.length > 0;
903
- if (enforceJudgement && !hasCriteria) {
1347
+ return toolResults.length > 0 ? toolResults.join("\n") : null;
1348
+ }
1349
+ if (!Array.isArray(message2.content)) return null;
1350
+ const toolCalls = message2.content.filter((part) => part.type === "tool-call").map((part) => {
1351
+ const contentPart = part;
1352
+ const name = contentPart.toolName ?? "unknown tool";
1353
+ return `[Called tool ${name} with: ${stringifyValue(contentPart.input)}]`;
1354
+ });
1355
+ return toolCalls.length > 0 ? toolCalls.join("\n") : null;
1356
+ };
1357
+ var messageRoleReversal = (messages) => {
1358
+ const roleMap = {
1359
+ [userMessageRole]: assistantMessageRole,
1360
+ [assistantMessageRole]: userMessageRole
1361
+ };
1362
+ return messages.map((message2) => {
1363
+ if (hasToolContent(message2)) {
1364
+ const summary = summarizeToolMessage(message2);
1365
+ if (!summary) return null;
904
1366
  return {
905
- success: false,
906
- reasoning: "JudgeAgent: No criteria was provided to be judged against",
907
- metCriteria: [],
908
- unmetCriteria: []
1367
+ role: userMessageRole,
1368
+ content: summary
909
1369
  };
910
1370
  }
911
- const toolChoice = (isLastMessage || enforceJudgement) && hasCriteria ? { type: "tool", toolName: "finish_test" } : "required";
912
- this.logger.debug("Calling LLM", {
913
- model: mergedConfig.model,
914
- toolChoice,
915
- isLastMessage,
916
- enforceJudgement
917
- });
918
- const completion = await this.invokeLLM({
919
- model: mergedConfig.model,
920
- messages,
921
- temperature: mergedConfig.temperature ?? 0,
922
- maxOutputTokens: mergedConfig.maxTokens,
923
- tools,
924
- toolChoice
925
- });
926
- this.logger.debug("LLM response received", {
927
- toolCallCount: ((_b = completion.toolCalls) == null ? void 0 : _b.length) ?? 0,
928
- toolCalls: (_c = completion.toolCalls) == null ? void 0 : _c.map((tc) => ({
929
- toolName: tc.toolName,
930
- args: tc.input
931
- }))
932
- });
933
- let args;
934
- if ((_d = completion.toolCalls) == null ? void 0 : _d.length) {
935
- const toolCall = completion.toolCalls[0];
936
- switch (toolCall.toolName) {
937
- case "finish_test": {
938
- args = toolCall.input;
939
- const verdict = args.verdict || "inconclusive";
940
- const reasoning = args.reasoning || "No reasoning provided";
941
- const criteriaArgs = args.criteria || {};
942
- const criteriaValues = Object.values(criteriaArgs);
943
- const metCriteria = criteria.filter(
944
- (_, i) => criteriaValues[i] === "true"
945
- );
946
- const unmetCriteria = criteria.filter(
947
- (_, i) => criteriaValues[i] !== "true"
948
- );
949
- const result = {
950
- success: verdict === "success",
951
- reasoning,
952
- metCriteria,
953
- unmetCriteria
954
- };
955
- this.logger.debug("finish_test result", result);
956
- return result;
957
- }
958
- case "continue_test":
959
- this.logger.debug("continue_test - proceeding to next turn");
960
- return null;
961
- default:
962
- return {
963
- success: false,
964
- reasoning: `JudgeAgent: Unknown tool call: ${toolCall.toolName}`,
965
- metCriteria: [],
966
- unmetCriteria: criteria
967
- };
968
- }
969
- }
1371
+ const newRole = roleMap[message2.role];
1372
+ if (!newRole) return message2;
970
1373
  return {
971
- success: false,
972
- reasoning: `JudgeAgent: No tool call found in LLM output`,
973
- metCriteria: [],
974
- unmetCriteria: criteria
1374
+ ...message2,
1375
+ role: newRole
975
1376
  };
976
- }
977
- getOpenTelemetryTracesDigest(threadId) {
978
- const spans = this.spanCollector.getSpansForThread(threadId);
979
- const digest = judgeSpanDigestFormatter.format(spans);
980
- return digest;
981
- }
1377
+ }).filter((message2) => message2 !== null);
982
1378
  };
983
- var judgeAgent = (cfg) => {
984
- return new JudgeAgent(cfg ?? {});
1379
+ var criterionToParamName = (criterion) => {
1380
+ return criterion.replace(/"/g, "").replace(/[^a-zA-Z0-9]/g, "_").replace(/ /g, "_").toLowerCase().substring(0, 70);
985
1381
  };
986
1382
 
987
- // src/agents/user-simulator-agent.ts
988
- function buildSystemPrompt2(description) {
989
- return `
990
- <role>
991
- You are pretending to be a user, you are testing an AI Agent (shown as the user role) based on a scenario.
992
- Approach this naturally, as a human user would, with very short inputs, few words, all lowercase, imperative, not periods, like when they google or talk to chatgpt.
993
- </role>
994
-
995
- <goal>
996
- Your goal (assistant) is to interact with the Agent Under Test (user) as if you were a human user to see if it can complete the scenario successfully.
997
- </goal>
998
-
999
- <scenario>
1000
- ${description}
1001
- </scenario>
1002
-
1003
- <rules>
1004
- - DO NOT carry over any requests yourself, YOU ARE NOT the assistant today, you are the user
1005
- </rules>
1006
- `.trim();
1007
- }
1008
- var UserSimulatorAgent = class extends UserSimulatorAgentAdapter {
1009
- constructor(cfg) {
1010
- super();
1011
- this.cfg = cfg;
1383
+ // src/agents/judge/judge-span-collector.ts
1384
+ import { attributes } from "langwatch/observability";
1385
+ var JudgeSpanCollector = class {
1386
+ spans = [];
1387
+ onStart() {
1388
+ }
1389
+ onEnd(span) {
1390
+ this.spans.push(span);
1391
+ }
1392
+ forceFlush() {
1393
+ return Promise.resolve();
1394
+ }
1395
+ shutdown() {
1396
+ this.spans = [];
1397
+ return Promise.resolve();
1012
1398
  }
1013
- logger = new Logger(this.constructor.name);
1014
1399
  /**
1015
- * LLM invocation function. Can be overridden to customize LLM behavior.
1400
+ * Removes all spans associated with a specific thread.
1401
+ * Call this after a scenario run completes to prevent memory growth
1402
+ * in long-lived processes.
1403
+ * @param threadId - The thread identifier whose spans should be cleared
1016
1404
  */
1017
- invokeLLM = createLLMInvoker(this.logger);
1018
- call = async (input) => {
1019
- const config2 = this.cfg;
1020
- const systemPrompt = (config2 == null ? void 0 : config2.systemPrompt) ?? buildSystemPrompt2(input.scenarioConfig.description);
1021
- const messages = [
1022
- { role: "system", content: systemPrompt },
1023
- { role: "assistant", content: "Hello, how can I help you today" },
1024
- ...input.messages
1025
- ];
1026
- const projectConfig = await getProjectConfig();
1027
- const mergedConfig = modelSchema.parse({
1028
- ...projectConfig == null ? void 0 : projectConfig.defaultModel,
1029
- ...config2
1030
- });
1031
- const reversedMessages = messageRoleReversal(messages);
1032
- const completion = await this.invokeLLM({
1033
- model: mergedConfig.model,
1034
- messages: reversedMessages,
1035
- temperature: mergedConfig.temperature,
1036
- maxOutputTokens: mergedConfig.maxTokens
1037
- });
1038
- const messageContent = completion.text;
1039
- if (!messageContent) {
1040
- throw new Error("No response content from LLM");
1041
- }
1042
- return { role: "user", content: messageContent };
1043
- };
1044
- };
1045
- var userSimulatorAgent = (config2) => {
1046
- return new UserSimulatorAgent(config2);
1047
- };
1048
-
1049
- // src/agents/realtime/realtime-agent.adapter.ts
1050
- import { EventEmitter } from "events";
1051
-
1052
- // src/agents/realtime/message-processor.ts
1053
- var MessageProcessor = class {
1405
+ clearSpansForThread(threadId) {
1406
+ const threadSpanIds = new Set(
1407
+ this.getSpansForThread(threadId).map((s) => s.spanContext().spanId)
1408
+ );
1409
+ this.spans = this.spans.filter(
1410
+ (s) => !threadSpanIds.has(s.spanContext().spanId)
1411
+ );
1412
+ }
1054
1413
  /**
1055
- * Processes audio message content and extracts base64 audio data
1056
- *
1057
- * @param content - The message content to process
1058
- * @returns Base64 audio data string or null if no audio found
1059
- * @throws {Error} If audio data is invalid
1414
+ * Retrieves all spans associated with a specific thread.
1415
+ * @param threadId - The thread identifier to filter spans by
1416
+ * @returns Array of spans for the given thread
1060
1417
  */
1061
- processAudioMessage(content) {
1062
- if (!Array.isArray(content)) {
1063
- return null;
1418
+ getSpansForThread(threadId) {
1419
+ const spanMap = /* @__PURE__ */ new Map();
1420
+ for (const span of this.spans) {
1421
+ spanMap.set(span.spanContext().spanId, span);
1064
1422
  }
1065
- for (const part of content) {
1066
- if (typeof part === "object" && part !== null && "type" in part && part.type === "file" && "mediaType" in part && typeof part.mediaType === "string" && part.mediaType.startsWith("audio/")) {
1067
- if (!("data" in part) || typeof part.data !== "string") {
1068
- throw new Error(
1069
- `Audio data must be base64 string, got: ${typeof part.data}`
1070
- );
1071
- }
1072
- if (!part.data || part.data.length === 0) {
1073
- throw new Error(
1074
- `Audio message has no data. Part: ${JSON.stringify(part)}`
1075
- );
1076
- }
1077
- return part.data;
1423
+ const belongsToThread = (span, visited = /* @__PURE__ */ new Set()) => {
1424
+ const spanId = span.spanContext().spanId;
1425
+ if (visited.has(spanId)) return false;
1426
+ visited.add(spanId);
1427
+ if (span.attributes[attributes.ATTR_LANGWATCH_THREAD_ID] === threadId) {
1428
+ return true;
1078
1429
  }
1079
- }
1080
- return null;
1430
+ const parentId = getParentSpanId(span);
1431
+ if (parentId && spanMap.has(parentId)) {
1432
+ return belongsToThread(spanMap.get(parentId), visited);
1433
+ }
1434
+ return false;
1435
+ };
1436
+ return this.spans.filter((span) => belongsToThread(span));
1081
1437
  }
1082
- /**
1083
- * Extracts text content from message content
1084
- *
1085
- * @param content - The message content to process
1086
- * @returns Text string or empty string if no text found
1087
- */
1088
- extractTextMessage(content) {
1089
- return typeof content === "string" ? content : "";
1438
+ };
1439
+ function getParentSpanId(span) {
1440
+ if (span.parentSpanId) return span.parentSpanId;
1441
+ const legacy = span.parentSpanContext;
1442
+ return legacy == null ? void 0 : legacy.spanId;
1443
+ }
1444
+ var judgeSpanCollector = new JudgeSpanCollector();
1445
+
1446
+ // src/agents/judge/judge-span-digest-formatter.ts
1447
+ import { attributes as attributes2 } from "langwatch/observability";
1448
+
1449
+ // src/agents/judge/deep-transform.ts
1450
+ function deepTransform(value, fn) {
1451
+ const result = fn(value);
1452
+ if (result !== value) return result;
1453
+ if (Array.isArray(value)) {
1454
+ return value.map((v) => deepTransform(v, fn));
1090
1455
  }
1091
- /**
1092
- * Validates that a message has either text or audio content
1093
- *
1094
- * @param content - The message content to validate
1095
- * @returns True if the message has valid content
1096
- */
1097
- hasValidContent(content) {
1098
- const hasText = this.extractTextMessage(content).length > 0;
1099
- const hasAudio = this.processAudioMessage(content) !== null;
1100
- return hasText || hasAudio;
1456
+ if (value !== null && typeof value === "object") {
1457
+ const out = {};
1458
+ for (const [k, v] of Object.entries(value)) {
1459
+ out[k] = deepTransform(v, fn);
1460
+ }
1461
+ return out;
1101
1462
  }
1102
- };
1463
+ return value;
1464
+ }
1103
1465
 
1104
- // src/agents/realtime/realtime-event-handler.ts
1105
- var RealtimeEventHandler = class {
1466
+ // src/agents/judge/string-deduplicator.ts
1467
+ var StringDeduplicator = class {
1468
+ seen = /* @__PURE__ */ new Map();
1469
+ threshold;
1470
+ constructor(params) {
1471
+ this.threshold = params.threshold;
1472
+ }
1106
1473
  /**
1107
- * Creates a new RealtimeEventHandler instance
1108
- * @param session - The RealtimeSession to listen to events from
1474
+ * Resets seen strings for a new digest.
1109
1475
  */
1110
- constructor(session) {
1111
- this.session = session;
1112
- this.ensureEventListeners();
1476
+ reset() {
1477
+ this.seen.clear();
1113
1478
  }
1114
- currentResponse = "";
1115
- currentAudioChunks = [];
1116
- responseResolver = null;
1117
- errorRejecter = null;
1118
- listenersSetup = false;
1119
1479
  /**
1120
- * Gets the transport from the session
1480
+ * Processes a string, returning duplicate marker if seen before.
1481
+ * @param str - String to process
1482
+ * @returns Original string or duplicate marker
1121
1483
  */
1122
- getTransport() {
1123
- const sessionWithTransport = this.session;
1124
- return sessionWithTransport.transport ?? null;
1484
+ process(str) {
1485
+ if (str.length < this.threshold) return str;
1486
+ const key = this.normalize(str);
1487
+ if (this.seen.has(key)) return "[DUPLICATE - SEE ABOVE]";
1488
+ this.seen.set(key, true);
1489
+ return str;
1125
1490
  }
1126
1491
  /**
1127
- * Ensures event listeners are set up, retrying if transport not available
1492
+ * Normalizes string for comparison (whitespace, case).
1128
1493
  */
1129
- ensureEventListeners() {
1130
- if (this.listenersSetup) return;
1131
- const transport = this.getTransport();
1132
- if (!transport) {
1133
- setTimeout(() => this.ensureEventListeners(), 100);
1134
- return;
1494
+ normalize(str) {
1495
+ return str.replace(/\\[nrt]/g, " ").replace(/[\n\r\t]/g, " ").replace(/\s+/g, " ").trim().toLowerCase();
1496
+ }
1497
+ };
1498
+
1499
+ // src/agents/judge/truncate-media.ts
1500
+ function truncateMediaUrl(str) {
1501
+ const match = str.match(
1502
+ /^data:((image|audio|video)\/[a-z0-9+.-]+);base64,(.+)$/i
1503
+ );
1504
+ if (!match) return str;
1505
+ const [, mimeType, category, data] = match;
1506
+ return `[${category.toUpperCase()}: ${mimeType}, ~${data.length} bytes]`;
1507
+ }
1508
+ function truncateMediaPart(v) {
1509
+ var _a;
1510
+ if (v === null || typeof v !== "object" || Array.isArray(v)) return null;
1511
+ const obj = v;
1512
+ if (obj.type === "file" && typeof obj.mediaType === "string" && typeof obj.data === "string") {
1513
+ const category = ((_a = obj.mediaType.split("/")[0]) == null ? void 0 : _a.toUpperCase()) ?? "FILE";
1514
+ return {
1515
+ ...obj,
1516
+ data: `[${category}: ${obj.mediaType}, ~${obj.data.length} bytes]`
1517
+ };
1518
+ }
1519
+ if (obj.type === "image" && typeof obj.image === "string") {
1520
+ const imageData = obj.image;
1521
+ const dataUrlMatch = imageData.match(
1522
+ /^data:((image)\/[a-z0-9+.-]+);base64,(.+)$/i
1523
+ );
1524
+ if (dataUrlMatch) {
1525
+ return {
1526
+ ...obj,
1527
+ image: `[IMAGE: ${dataUrlMatch[1]}, ~${dataUrlMatch[3].length} bytes]`
1528
+ };
1529
+ }
1530
+ if (imageData.length > 1e3 && /^[A-Za-z0-9+/=]+$/.test(imageData)) {
1531
+ return {
1532
+ ...obj,
1533
+ image: `[IMAGE: unknown, ~${imageData.length} bytes]`
1534
+ };
1135
1535
  }
1136
- this.setupEventListeners();
1137
1536
  }
1537
+ return null;
1538
+ }
1539
+
1540
+ // src/agents/judge/judge-span-digest-formatter.ts
1541
+ var JudgeSpanDigestFormatter = class {
1542
+ logger = new Logger("JudgeSpanDigestFormatter");
1543
+ deduplicator = new StringDeduplicator({ threshold: 50 });
1138
1544
  /**
1139
- * Sets up event listeners for the RealtimeSession transport layer
1545
+ * Formats spans into a complete digest with full content and nesting.
1546
+ * @param spans - All spans for a thread
1547
+ * @returns Plain text digest
1140
1548
  */
1141
- setupEventListeners() {
1142
- if (this.listenersSetup) return;
1143
- const transport = this.getTransport();
1144
- if (!transport) {
1145
- console.error("\u274C Transport not available on session");
1146
- return;
1549
+ format(spans) {
1550
+ this.deduplicator.reset();
1551
+ this.logger.debug("format() called", {
1552
+ spanCount: spans.length,
1553
+ spanNames: spans.map((s) => s.name)
1554
+ });
1555
+ if (spans.length === 0) {
1556
+ this.logger.debug("No spans to format");
1557
+ return "No spans recorded.";
1147
1558
  }
1148
- transport.on("response.output_audio_transcript.delta", (event) => {
1149
- const deltaEvent = event;
1150
- if (typeof deltaEvent.delta === "string") {
1151
- this.currentResponse += deltaEvent.delta;
1152
- }
1559
+ const sortedSpans = this.sortByStartTime(spans);
1560
+ const tree = this.buildHierarchy(sortedSpans);
1561
+ const totalDuration = this.calculateTotalDuration(sortedSpans);
1562
+ this.logger.debug("Hierarchy built", {
1563
+ rootCount: tree.length,
1564
+ totalDuration
1153
1565
  });
1154
- transport.on("response.output_audio.delta", (event) => {
1155
- const deltaEvent = event;
1156
- if (typeof deltaEvent.delta === "string") {
1157
- this.currentAudioChunks.push(deltaEvent.delta);
1158
- }
1566
+ const lines = [
1567
+ `Spans: ${spans.length} | Total Duration: ${this.formatDuration(
1568
+ totalDuration
1569
+ )}`,
1570
+ ""
1571
+ ];
1572
+ let sequence = 1;
1573
+ const rootCount = tree.length;
1574
+ tree.forEach((node, idx) => {
1575
+ sequence = this.renderNode(
1576
+ node,
1577
+ lines,
1578
+ 0,
1579
+ sequence,
1580
+ idx === rootCount - 1
1581
+ );
1159
1582
  });
1160
- transport.on("response.done", () => {
1161
- const fullAudio = this.currentAudioChunks.join("");
1162
- const audioResponse = {
1163
- transcript: this.currentResponse,
1164
- audio: fullAudio
1165
- };
1166
- if (this.responseResolver) {
1167
- this.responseResolver(audioResponse);
1168
- this.reset();
1169
- }
1583
+ const errors = this.collectErrors(spans);
1584
+ if (errors.length > 0) {
1585
+ lines.push("");
1586
+ lines.push("=== ERRORS ===");
1587
+ errors.forEach((e) => lines.push(e));
1588
+ }
1589
+ return lines.join("\n");
1590
+ }
1591
+ sortByStartTime(spans) {
1592
+ return [...spans].sort((a, b) => {
1593
+ const aTime = this.hrTimeToMs(a.startTime);
1594
+ const bTime = this.hrTimeToMs(b.startTime);
1595
+ return aTime - bTime;
1170
1596
  });
1171
- transport.on("error", (error) => {
1172
- console.error(`\u274C Transport error:`, error);
1173
- if (this.errorRejecter) {
1174
- const errorObj = error instanceof Error ? error : new Error(String(error));
1175
- this.errorRejecter(errorObj);
1176
- this.reset();
1597
+ }
1598
+ buildHierarchy(spans) {
1599
+ const spanMap = /* @__PURE__ */ new Map();
1600
+ const roots = [];
1601
+ for (const span of spans) {
1602
+ spanMap.set(span.spanContext().spanId, { span, children: [] });
1603
+ }
1604
+ for (const span of spans) {
1605
+ const node = spanMap.get(span.spanContext().spanId);
1606
+ const parentId = getParentSpanId2(span);
1607
+ if (parentId && spanMap.has(parentId)) {
1608
+ spanMap.get(parentId).children.push(node);
1609
+ } else {
1610
+ roots.push(node);
1177
1611
  }
1178
- });
1179
- this.listenersSetup = true;
1612
+ }
1613
+ return roots;
1180
1614
  }
1181
- /**
1182
- * Waits for the agent response with timeout
1183
- *
1184
- * @param timeout - Maximum time to wait in milliseconds
1185
- * @returns Promise that resolves with the audio response event
1186
- * @throws {Error} If timeout occurs or transport error happens
1187
- */
1188
- waitForResponse(timeout) {
1189
- return new Promise((resolve, reject) => {
1190
- this.responseResolver = resolve;
1191
- this.errorRejecter = reject;
1192
- const timeoutId = setTimeout(() => {
1193
- if (this.responseResolver) {
1194
- this.reset();
1195
- reject(new Error(`Agent response timeout after ${timeout}ms`));
1615
+ renderNode(node, lines, depth, sequence, isLast = true) {
1616
+ const span = node.span;
1617
+ const duration = this.calculateSpanDuration(span);
1618
+ const timestamp = this.formatTimestamp(span.startTime);
1619
+ const status = this.getStatusIndicator(span);
1620
+ const prefix = this.getTreePrefix(depth, isLast);
1621
+ lines.push(
1622
+ `${prefix}[${sequence}] ${new Date(timestamp).toISOString()} ${span.name} (${this.formatDuration(duration)})${status}`
1623
+ );
1624
+ const attrIndent = this.getAttrIndent(depth, isLast);
1625
+ const attrs = this.cleanAttributes(span.attributes);
1626
+ if (Object.keys(attrs).length > 0) {
1627
+ for (const [key, value] of Object.entries(attrs)) {
1628
+ lines.push(`${attrIndent}${key}: ${this.formatValue(value)}`);
1629
+ }
1630
+ }
1631
+ if (span.events.length > 0) {
1632
+ for (const event of span.events) {
1633
+ lines.push(`${attrIndent}[event] ${event.name}`);
1634
+ if (event.attributes) {
1635
+ const eventAttrs = this.cleanAttributes(event.attributes);
1636
+ for (const [key, value] of Object.entries(eventAttrs)) {
1637
+ lines.push(`${attrIndent} ${key}: ${this.formatValue(value)}`);
1638
+ }
1196
1639
  }
1197
- }, timeout);
1198
- const originalResolver = resolve;
1199
- this.responseResolver = (value) => {
1200
- clearTimeout(timeoutId);
1201
- originalResolver(value);
1202
- };
1640
+ }
1641
+ }
1642
+ lines.push("");
1643
+ let nextSeq = sequence + 1;
1644
+ const childCount = node.children.length;
1645
+ node.children.forEach((child, idx) => {
1646
+ nextSeq = this.renderNode(
1647
+ child,
1648
+ lines,
1649
+ depth + 1,
1650
+ nextSeq,
1651
+ idx === childCount - 1
1652
+ );
1203
1653
  });
1654
+ return nextSeq;
1204
1655
  }
1205
- /**
1206
- * Resets the internal state for the next response
1207
- */
1208
- reset() {
1209
- this.responseResolver = null;
1210
- this.errorRejecter = null;
1211
- this.currentResponse = "";
1212
- this.currentAudioChunks = [];
1213
- }
1214
- };
1215
-
1216
- // src/agents/realtime/response-formatter.ts
1217
- var ResponseFormatter = class {
1218
- /**
1219
- * Formats an audio response event into Scenario framework format
1220
- *
1221
- * @param audioEvent - The audio response event from the Realtime API
1222
- * @returns Formatted assistant message with audio and text content
1223
- */
1224
- formatAudioResponse(audioEvent) {
1225
- return {
1226
- role: "assistant",
1227
- content: [
1228
- { type: "text", text: audioEvent.transcript },
1229
- { type: "file", mediaType: "audio/pcm16", data: audioEvent.audio }
1230
- ]
1231
- };
1656
+ getTreePrefix(depth, isLast) {
1657
+ if (depth === 0) return "";
1658
+ const connector = isLast ? "\u2514\u2500\u2500 " : "\u251C\u2500\u2500 ";
1659
+ return "\u2502 ".repeat(depth - 1) + connector;
1232
1660
  }
1233
- /**
1234
- * Formats a text response for the Scenario framework
1235
- *
1236
- * @param text - The text response from the agent
1237
- * @returns Plain text response string
1238
- */
1239
- formatTextResponse(text) {
1240
- return text;
1661
+ getAttrIndent(depth, isLast) {
1662
+ if (depth === 0) return " ";
1663
+ const continuation = isLast ? " " : "\u2502 ";
1664
+ return "\u2502 ".repeat(depth - 1) + continuation + " ";
1241
1665
  }
1242
- /**
1243
- * Creates an initial response message for when no user message exists
1244
- *
1245
- * @param audioEvent - The audio response event from the Realtime API
1246
- * @returns Formatted assistant message for initial responses
1247
- */
1248
- formatInitialResponse(audioEvent) {
1249
- return this.formatAudioResponse(audioEvent);
1666
+ cleanAttributes(attrs) {
1667
+ const cleaned = {};
1668
+ const seen = /* @__PURE__ */ new Set();
1669
+ const excludedKeys = [
1670
+ attributes2.ATTR_LANGWATCH_THREAD_ID,
1671
+ "langwatch.scenario.id",
1672
+ "langwatch.scenario.name"
1673
+ ];
1674
+ for (const [key, value] of Object.entries(attrs)) {
1675
+ if (excludedKeys.includes(key)) {
1676
+ continue;
1677
+ }
1678
+ const cleanKey = key.replace(/^(langwatch)\./, "");
1679
+ if (!seen.has(cleanKey)) {
1680
+ seen.add(cleanKey);
1681
+ cleaned[cleanKey] = value;
1682
+ }
1683
+ }
1684
+ return cleaned;
1250
1685
  }
1251
- };
1252
-
1253
- // src/agents/realtime/realtime-agent.adapter.ts
1254
- var RealtimeAgentAdapter = class extends AgentAdapter {
1255
- /**
1256
- * Creates a new RealtimeAgentAdapter instance
1257
- *
1258
- * The session can be either connected or unconnected.
1259
- * If unconnected, call connect() with an API key before use.
1260
- *
1261
- * @param config - Configuration for the realtime agent adapter
1262
- */
1263
- constructor(config2) {
1264
- super();
1265
- this.config = config2;
1266
- this.role = this.config.role;
1267
- this.name = this.config.agentName;
1268
- this.session = config2.session;
1269
- this.eventHandler = new RealtimeEventHandler(this.session);
1686
+ formatValue(value) {
1687
+ const processed = this.transformValue(value);
1688
+ return typeof processed === "string" ? processed : JSON.stringify(processed);
1270
1689
  }
1271
- role;
1272
- name;
1273
- session;
1274
- eventHandler;
1275
- messageProcessor = new MessageProcessor();
1276
- responseFormatter = new ResponseFormatter();
1277
- audioEvents = new EventEmitter();
1278
- /**
1279
- * Get the connect method from the session
1280
- */
1281
- async connect(params) {
1282
- const { apiKey, ...rest } = params ?? {};
1283
- await this.session.connect({
1284
- apiKey: apiKey ?? process.env.OPENAI_API_KEY,
1285
- ...rest
1690
+ transformValue(value) {
1691
+ return deepTransform(value, (v) => {
1692
+ const mediaPart = truncateMediaPart(v);
1693
+ if (mediaPart) return mediaPart;
1694
+ if (typeof v !== "string") return v;
1695
+ return this.transformString(v);
1286
1696
  });
1287
1697
  }
1288
- /**
1289
- * Closes the session connection
1290
- */
1291
- async disconnect() {
1292
- this.session.close();
1293
- }
1294
- /**
1295
- * Process input and generate response (implements AgentAdapter interface)
1296
- *
1297
- * This is called by Scenario framework for each agent turn.
1298
- * Handles both text and audio input, returns audio message with transcript.
1299
- *
1300
- * @param input - Scenario agent input with message history
1301
- * @returns Agent response as audio message or text
1302
- */
1303
- async call(input) {
1304
- console.log(`\u{1F50A} [${this.name}] being called with role: ${this.role}`);
1305
- const latestMessage = input.newMessages[input.newMessages.length - 1];
1306
- if (!latestMessage) {
1307
- return this.handleInitialResponse();
1308
- }
1309
- const audioData = this.messageProcessor.processAudioMessage(
1310
- latestMessage.content
1311
- );
1312
- if (audioData) {
1313
- return this.handleAudioInput(audioData);
1314
- }
1315
- const text = this.messageProcessor.extractTextMessage(
1316
- latestMessage.content
1317
- );
1318
- if (!text) {
1319
- throw new Error("Message has no text or audio content");
1698
+ transformString(str) {
1699
+ if (this.looksLikeJson(str)) {
1700
+ try {
1701
+ const processed = this.transformValue(JSON.parse(str));
1702
+ return JSON.stringify(processed);
1703
+ } catch {
1704
+ }
1320
1705
  }
1321
- return this.handleTextInput(text);
1706
+ const truncated = truncateMediaUrl(str);
1707
+ if (truncated !== str) return truncated;
1708
+ return this.deduplicator.process(str);
1322
1709
  }
1323
- /**
1324
- * Handles the initial response when no user message exists
1325
- */
1326
- async handleInitialResponse() {
1327
- console.log(`[${this.name}] First message, creating response`);
1328
- const sessionWithTransport = this.session;
1329
- const transport = sessionWithTransport.transport;
1330
- if (!transport) {
1331
- throw new Error("Realtime transport not available");
1332
- }
1333
- transport.sendEvent({
1334
- type: "response.create"
1335
- });
1336
- const timeout = this.config.responseTimeout ?? 6e4;
1337
- const response = await this.eventHandler.waitForResponse(timeout);
1338
- this.audioEvents.emit("audioResponse", response);
1339
- return this.responseFormatter.formatInitialResponse(response);
1710
+ looksLikeJson(str) {
1711
+ const t = str.trim();
1712
+ return t.startsWith("{") && t.endsWith("}") || t.startsWith("[") && t.endsWith("]");
1340
1713
  }
1341
- /**
1342
- * Handles audio input from the user
1343
- */
1344
- async handleAudioInput(audioData) {
1345
- const sessionWithTransport = this.session;
1346
- const transport = sessionWithTransport.transport;
1347
- if (!transport) {
1348
- throw new Error("Realtime transport not available");
1349
- }
1350
- transport.sendEvent({
1351
- type: "input_audio_buffer.append",
1352
- audio: audioData
1353
- });
1354
- transport.sendEvent({
1355
- type: "input_audio_buffer.commit"
1356
- });
1357
- transport.sendEvent({
1358
- type: "response.create"
1359
- });
1360
- const timeout = this.config.responseTimeout ?? 6e4;
1361
- const response = await this.eventHandler.waitForResponse(timeout);
1362
- this.audioEvents.emit("audioResponse", response);
1363
- return this.responseFormatter.formatAudioResponse(response);
1714
+ hrTimeToMs(hrTime) {
1715
+ return hrTime[0] * 1e3 + hrTime[1] / 1e6;
1364
1716
  }
1365
- /**
1366
- * Handles text input from the user
1367
- */
1368
- async handleTextInput(text) {
1369
- this.session.sendMessage(text);
1370
- const timeout = this.config.responseTimeout ?? 3e4;
1371
- const response = await this.eventHandler.waitForResponse(timeout);
1372
- this.audioEvents.emit("audioResponse", response);
1373
- return this.responseFormatter.formatTextResponse(response.transcript);
1717
+ calculateSpanDuration(span) {
1718
+ return this.hrTimeToMs(span.endTime) - this.hrTimeToMs(span.startTime);
1374
1719
  }
1375
- /**
1376
- * Subscribe to audio response events
1377
- *
1378
- * @param callback - Function called when an audio response completes
1379
- */
1380
- onAudioResponse(callback) {
1381
- this.audioEvents.on("audioResponse", callback);
1720
+ calculateTotalDuration(spans) {
1721
+ if (spans.length === 0) return 0;
1722
+ const first = this.hrTimeToMs(spans[0].startTime);
1723
+ const last = Math.max(...spans.map((s) => this.hrTimeToMs(s.endTime)));
1724
+ return last - first;
1382
1725
  }
1383
- /**
1384
- * Remove audio response listener
1385
- *
1386
- * @param callback - The callback function to remove
1387
- */
1388
- offAudioResponse(callback) {
1389
- this.audioEvents.off("audioResponse", callback);
1726
+ formatDuration(ms) {
1727
+ if (ms < 1e3) return `${Math.round(ms)}ms`;
1728
+ return `${(ms / 1e3).toFixed(2)}s`;
1729
+ }
1730
+ formatTimestamp(hrTime) {
1731
+ const ms = this.hrTimeToMs(hrTime);
1732
+ return new Date(ms).toISOString();
1733
+ }
1734
+ getStatusIndicator(span) {
1735
+ if (span.status.code === 2) {
1736
+ return ` \u26A0\uFE0F ERROR: ${span.status.message ?? "unknown"}`;
1737
+ }
1738
+ return "";
1739
+ }
1740
+ collectErrors(spans) {
1741
+ return spans.filter((s) => s.status.code === 2).map((s) => `- ${s.name}: ${s.status.message ?? "unknown error"}`);
1390
1742
  }
1391
1743
  };
1744
+ function getParentSpanId2(span) {
1745
+ if (span.parentSpanId) return span.parentSpanId;
1746
+ const legacy = span.parentSpanContext;
1747
+ return legacy == null ? void 0 : legacy.spanId;
1748
+ }
1749
+ var judgeSpanDigestFormatter = new JudgeSpanDigestFormatter();
1392
1750
 
1393
- // src/execution/index.ts
1394
- var execution_exports = {};
1395
- __export(execution_exports, {
1396
- ScenarioExecution: () => ScenarioExecution,
1397
- ScenarioExecutionState: () => ScenarioExecutionState,
1398
- StateChangeEventType: () => StateChangeEventType
1399
- });
1751
+ // src/agents/judge/judge-agent.ts
1752
+ function buildSystemPrompt(criteria, description) {
1753
+ const criteriaList = (criteria == null ? void 0 : criteria.map((criterion, idx) => `${idx + 1}. ${criterion}`).join("\n")) || "No criteria provided";
1754
+ return `
1755
+ <role>
1756
+ You are an LLM as a judge watching a simulated conversation as it plays out live to determine if the agent under test meets the criteria or not.
1757
+ </role>
1400
1758
 
1401
- // node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/platform/node/globalThis.js
1402
- var _globalThis = typeof globalThis === "object" ? globalThis : global;
1759
+ <goal>
1760
+ Your goal is to determine if you already have enough information to make a verdict of the scenario below, or if the conversation should continue for longer.
1761
+ If you do have enough information, use the finish_test tool to determine if all the criteria have been met, if not, use the continue_test tool to let the next step play out.
1762
+ </goal>
1403
1763
 
1404
- // node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/version.js
1405
- var VERSION = "1.9.0";
1764
+ <scenario>
1765
+ ${description}
1766
+ </scenario>
1406
1767
 
1407
- // node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/internal/semver.js
1408
- var re = /^(\d+)\.(\d+)\.(\d+)(-(.+))?$/;
1409
- function _makeCompatibilityCheck(ownVersion) {
1410
- var acceptedVersions = /* @__PURE__ */ new Set([ownVersion]);
1411
- var rejectedVersions = /* @__PURE__ */ new Set();
1412
- var myVersionMatch = ownVersion.match(re);
1413
- if (!myVersionMatch) {
1414
- return function() {
1415
- return false;
1416
- };
1417
- }
1418
- var ownVersionParsed = {
1419
- major: +myVersionMatch[1],
1420
- minor: +myVersionMatch[2],
1421
- patch: +myVersionMatch[3],
1422
- prerelease: myVersionMatch[4]
1423
- };
1424
- if (ownVersionParsed.prerelease != null) {
1425
- return function isExactmatch(globalVersion) {
1426
- return globalVersion === ownVersion;
1427
- };
1428
- }
1429
- function _reject(v) {
1430
- rejectedVersions.add(v);
1431
- return false;
1432
- }
1433
- function _accept(v) {
1434
- acceptedVersions.add(v);
1435
- return true;
1436
- }
1437
- return function isCompatible2(globalVersion) {
1438
- if (acceptedVersions.has(globalVersion)) {
1439
- return true;
1440
- }
1441
- if (rejectedVersions.has(globalVersion)) {
1442
- return false;
1443
- }
1444
- var globalVersionMatch = globalVersion.match(re);
1445
- if (!globalVersionMatch) {
1446
- return _reject(globalVersion);
1447
- }
1448
- var globalVersionParsed = {
1449
- major: +globalVersionMatch[1],
1450
- minor: +globalVersionMatch[2],
1451
- patch: +globalVersionMatch[3],
1452
- prerelease: globalVersionMatch[4]
1453
- };
1454
- if (globalVersionParsed.prerelease != null) {
1455
- return _reject(globalVersion);
1456
- }
1457
- if (ownVersionParsed.major !== globalVersionParsed.major) {
1458
- return _reject(globalVersion);
1459
- }
1460
- if (ownVersionParsed.major === 0) {
1461
- if (ownVersionParsed.minor === globalVersionParsed.minor && ownVersionParsed.patch <= globalVersionParsed.patch) {
1462
- return _accept(globalVersion);
1463
- }
1464
- return _reject(globalVersion);
1465
- }
1466
- if (ownVersionParsed.minor <= globalVersionParsed.minor) {
1467
- return _accept(globalVersion);
1468
- }
1469
- return _reject(globalVersion);
1470
- };
1471
- }
1472
- var isCompatible = _makeCompatibilityCheck(VERSION);
1768
+ <criteria>
1769
+ ${criteriaList}
1770
+ </criteria>
1473
1771
 
1474
- // node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/internal/global-utils.js
1475
- var major = VERSION.split(".")[0];
1476
- var GLOBAL_OPENTELEMETRY_API_KEY = Symbol.for("opentelemetry.js.api." + major);
1477
- var _global = _globalThis;
1478
- function registerGlobal(type, instance, diag, allowOverride) {
1479
- var _a;
1480
- if (allowOverride === void 0) {
1481
- allowOverride = false;
1482
- }
1483
- var api = _global[GLOBAL_OPENTELEMETRY_API_KEY] = (_a = _global[GLOBAL_OPENTELEMETRY_API_KEY]) !== null && _a !== void 0 ? _a : {
1484
- version: VERSION
1485
- };
1486
- if (!allowOverride && api[type]) {
1487
- var err = new Error("@opentelemetry/api: Attempted duplicate registration of API: " + type);
1488
- diag.error(err.stack || err.message);
1489
- return false;
1490
- }
1491
- if (api.version !== VERSION) {
1492
- var err = new Error("@opentelemetry/api: Registration of version v" + api.version + " for " + type + " does not match previously registered API v" + VERSION);
1493
- diag.error(err.stack || err.message);
1494
- return false;
1495
- }
1496
- api[type] = instance;
1497
- diag.debug("@opentelemetry/api: Registered a global for " + type + " v" + VERSION + ".");
1498
- return true;
1772
+ <rules>
1773
+ - Be strict, do not let the conversation continue if the agent already broke one of the "do not" or "should not" criteria.
1774
+ - DO NOT make any judgment calls that are not explicitly listed in the success or failure criteria, withhold judgement if necessary
1775
+ </rules>
1776
+ `.trim();
1499
1777
  }
1500
- function getGlobal(type) {
1501
- var _a, _b;
1502
- var globalVersion = (_a = _global[GLOBAL_OPENTELEMETRY_API_KEY]) === null || _a === void 0 ? void 0 : _a.version;
1503
- if (!globalVersion || !isCompatible(globalVersion)) {
1504
- return;
1505
- }
1506
- return (_b = _global[GLOBAL_OPENTELEMETRY_API_KEY]) === null || _b === void 0 ? void 0 : _b[type];
1778
+ function buildContinueTestTool() {
1779
+ return tool({
1780
+ description: "Continue the test with the next step",
1781
+ inputSchema: z4.object({})
1782
+ });
1507
1783
  }
1508
- function unregisterGlobal(type, diag) {
1509
- diag.debug("@opentelemetry/api: Unregistering a global for " + type + " v" + VERSION + ".");
1510
- var api = _global[GLOBAL_OPENTELEMETRY_API_KEY];
1511
- if (api) {
1512
- delete api[type];
1513
- }
1784
+ function buildFinishTestTool(criteria) {
1785
+ const criteriaNames = criteria.map(criterionToParamName);
1786
+ return tool({
1787
+ description: "Complete the test with a final verdict",
1788
+ inputSchema: z4.object({
1789
+ criteria: z4.object(
1790
+ Object.fromEntries(
1791
+ criteriaNames.map((name, idx) => [
1792
+ name,
1793
+ z4.enum(["true", "false", "inconclusive"]).describe(criteria[idx])
1794
+ ])
1795
+ )
1796
+ ).strict().describe("Strict verdict for each criterion"),
1797
+ reasoning: z4.string().describe("Explanation of what the final verdict should be"),
1798
+ verdict: z4.enum(["success", "failure", "inconclusive"]).describe("The final verdict of the test")
1799
+ })
1800
+ });
1514
1801
  }
1515
-
1516
- // node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/diag/ComponentLogger.js
1517
- var __read = function(o, n) {
1518
- var m = typeof Symbol === "function" && o[Symbol.iterator];
1519
- if (!m) return o;
1520
- var i = m.call(o), r, ar = [], e;
1521
- try {
1522
- while ((n === void 0 || n-- > 0) && !(r = i.next()).done) ar.push(r.value);
1523
- } catch (error) {
1524
- e = { error };
1525
- } finally {
1526
- try {
1527
- if (r && !r.done && (m = i["return"])) m.call(i);
1528
- } finally {
1529
- if (e) throw e.error;
1530
- }
1531
- }
1532
- return ar;
1533
- };
1534
- var __spreadArray = function(to, from, pack) {
1535
- if (pack || arguments.length === 2) for (var i = 0, l = from.length, ar; i < l; i++) {
1536
- if (ar || !(i in from)) {
1537
- if (!ar) ar = Array.prototype.slice.call(from, 0, i);
1538
- ar[i] = from[i];
1539
- }
1802
+ var JudgeAgent = class extends JudgeAgentAdapter {
1803
+ constructor(cfg) {
1804
+ super();
1805
+ this.cfg = cfg;
1806
+ this.criteria = cfg.criteria ?? [];
1807
+ this.spanCollector = cfg.spanCollector ?? judgeSpanCollector;
1540
1808
  }
1541
- return to.concat(ar || Array.prototype.slice.call(from));
1542
- };
1543
- var DiagComponentLogger = (
1544
- /** @class */
1545
- (function() {
1546
- function DiagComponentLogger2(props) {
1547
- this._namespace = props.namespace || "DiagComponentLogger";
1548
- }
1549
- DiagComponentLogger2.prototype.debug = function() {
1550
- var args = [];
1551
- for (var _i = 0; _i < arguments.length; _i++) {
1552
- args[_i] = arguments[_i];
1553
- }
1554
- return logProxy("debug", this._namespace, args);
1555
- };
1556
- DiagComponentLogger2.prototype.error = function() {
1557
- var args = [];
1558
- for (var _i = 0; _i < arguments.length; _i++) {
1559
- args[_i] = arguments[_i];
1560
- }
1561
- return logProxy("error", this._namespace, args);
1562
- };
1563
- DiagComponentLogger2.prototype.info = function() {
1564
- var args = [];
1565
- for (var _i = 0; _i < arguments.length; _i++) {
1566
- args[_i] = arguments[_i];
1567
- }
1568
- return logProxy("info", this._namespace, args);
1569
- };
1570
- DiagComponentLogger2.prototype.warn = function() {
1571
- var args = [];
1572
- for (var _i = 0; _i < arguments.length; _i++) {
1573
- args[_i] = arguments[_i];
1574
- }
1575
- return logProxy("warn", this._namespace, args);
1809
+ logger = new Logger("JudgeAgent");
1810
+ spanCollector;
1811
+ role = "Judge" /* JUDGE */;
1812
+ criteria;
1813
+ /**
1814
+ * LLM invocation function. Can be overridden to customize LLM behavior.
1815
+ */
1816
+ invokeLLM = createLLMInvoker(this.logger);
1817
+ async call(input) {
1818
+ var _a, _b, _c, _d;
1819
+ const criteria = ((_a = input.judgmentRequest) == null ? void 0 : _a.criteria) ?? this.criteria;
1820
+ this.logger.debug("call() invoked", {
1821
+ threadId: input.threadId,
1822
+ currentTurn: input.scenarioState.currentTurn,
1823
+ maxTurns: input.scenarioConfig.maxTurns,
1824
+ judgmentRequest: input.judgmentRequest
1825
+ });
1826
+ const digest = this.getOpenTelemetryTracesDigest(input.threadId);
1827
+ this.logger.debug("OpenTelemetry traces built", { digest });
1828
+ const transcript = JudgeUtils.buildTranscriptFromMessages(input.messages);
1829
+ const contentForJudge = `
1830
+ <transcript>
1831
+ ${transcript}
1832
+ </transcript>
1833
+ <opentelemetry_traces>
1834
+ ${digest}
1835
+ </opentelemetry_traces>
1836
+ `;
1837
+ const cfg = this.cfg;
1838
+ const systemPrompt = cfg.systemPrompt ?? buildSystemPrompt(criteria, input.scenarioConfig.description);
1839
+ const messages = [
1840
+ { role: "system", content: systemPrompt },
1841
+ { role: "user", content: contentForJudge }
1842
+ ];
1843
+ const isLastMessage = input.scenarioState.currentTurn === input.scenarioConfig.maxTurns;
1844
+ const projectConfig = await getProjectConfig();
1845
+ const mergedConfig = modelSchema.parse({
1846
+ ...projectConfig == null ? void 0 : projectConfig.defaultModel,
1847
+ ...cfg
1848
+ });
1849
+ const tools = {
1850
+ continue_test: buildContinueTestTool(),
1851
+ finish_test: buildFinishTestTool(criteria)
1576
1852
  };
1577
- DiagComponentLogger2.prototype.verbose = function() {
1578
- var args = [];
1579
- for (var _i = 0; _i < arguments.length; _i++) {
1580
- args[_i] = arguments[_i];
1853
+ const enforceJudgement = input.judgmentRequest != null;
1854
+ const hasCriteria = criteria.length && criteria.length > 0;
1855
+ if (enforceJudgement && !hasCriteria) {
1856
+ return {
1857
+ success: false,
1858
+ reasoning: "JudgeAgent: No criteria was provided to be judged against",
1859
+ metCriteria: [],
1860
+ unmetCriteria: []
1861
+ };
1862
+ }
1863
+ const toolChoice = (isLastMessage || enforceJudgement) && hasCriteria ? { type: "tool", toolName: "finish_test" } : "required";
1864
+ this.logger.debug("Calling LLM", {
1865
+ model: mergedConfig.model,
1866
+ toolChoice,
1867
+ isLastMessage,
1868
+ enforceJudgement
1869
+ });
1870
+ const completion = await this.invokeLLM({
1871
+ model: mergedConfig.model,
1872
+ messages,
1873
+ temperature: mergedConfig.temperature ?? 0,
1874
+ maxOutputTokens: mergedConfig.maxTokens,
1875
+ tools,
1876
+ toolChoice
1877
+ });
1878
+ this.logger.debug("LLM response received", {
1879
+ toolCallCount: ((_b = completion.toolCalls) == null ? void 0 : _b.length) ?? 0,
1880
+ toolCalls: (_c = completion.toolCalls) == null ? void 0 : _c.map((tc) => ({
1881
+ toolName: tc.toolName,
1882
+ args: tc.input
1883
+ }))
1884
+ });
1885
+ let args;
1886
+ if ((_d = completion.toolCalls) == null ? void 0 : _d.length) {
1887
+ const toolCall = completion.toolCalls[0];
1888
+ switch (toolCall.toolName) {
1889
+ case "finish_test": {
1890
+ args = toolCall.input;
1891
+ const verdict = args.verdict || "inconclusive";
1892
+ const reasoning = args.reasoning || "No reasoning provided";
1893
+ const criteriaArgs = args.criteria || {};
1894
+ const criteriaValues = Object.values(criteriaArgs);
1895
+ const metCriteria = criteria.filter(
1896
+ (_, i) => criteriaValues[i] === "true"
1897
+ );
1898
+ const unmetCriteria = criteria.filter(
1899
+ (_, i) => criteriaValues[i] !== "true"
1900
+ );
1901
+ const result = {
1902
+ success: verdict === "success",
1903
+ reasoning,
1904
+ metCriteria,
1905
+ unmetCriteria
1906
+ };
1907
+ this.logger.debug("finish_test result", result);
1908
+ return result;
1909
+ }
1910
+ case "continue_test":
1911
+ this.logger.debug("continue_test - proceeding to next turn");
1912
+ return null;
1913
+ default:
1914
+ return {
1915
+ success: false,
1916
+ reasoning: `JudgeAgent: Unknown tool call: ${toolCall.toolName}`,
1917
+ metCriteria: [],
1918
+ unmetCriteria: criteria
1919
+ };
1581
1920
  }
1582
- return logProxy("verbose", this._namespace, args);
1583
- };
1584
- return DiagComponentLogger2;
1585
- })()
1586
- );
1587
- function logProxy(funcName, namespace, args) {
1588
- var logger2 = getGlobal("diag");
1589
- if (!logger2) {
1590
- return;
1591
- }
1592
- args.unshift(namespace);
1593
- return logger2[funcName].apply(logger2, __spreadArray([], __read(args), false));
1594
- }
1595
-
1596
- // node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/diag/types.js
1597
- var DiagLogLevel;
1598
- (function(DiagLogLevel2) {
1599
- DiagLogLevel2[DiagLogLevel2["NONE"] = 0] = "NONE";
1600
- DiagLogLevel2[DiagLogLevel2["ERROR"] = 30] = "ERROR";
1601
- DiagLogLevel2[DiagLogLevel2["WARN"] = 50] = "WARN";
1602
- DiagLogLevel2[DiagLogLevel2["INFO"] = 60] = "INFO";
1603
- DiagLogLevel2[DiagLogLevel2["DEBUG"] = 70] = "DEBUG";
1604
- DiagLogLevel2[DiagLogLevel2["VERBOSE"] = 80] = "VERBOSE";
1605
- DiagLogLevel2[DiagLogLevel2["ALL"] = 9999] = "ALL";
1606
- })(DiagLogLevel || (DiagLogLevel = {}));
1607
-
1608
- // node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/diag/internal/logLevelLogger.js
1609
- function createLogLevelDiagLogger(maxLevel, logger2) {
1610
- if (maxLevel < DiagLogLevel.NONE) {
1611
- maxLevel = DiagLogLevel.NONE;
1612
- } else if (maxLevel > DiagLogLevel.ALL) {
1613
- maxLevel = DiagLogLevel.ALL;
1614
- }
1615
- logger2 = logger2 || {};
1616
- function _filterFunc(funcName, theLevel) {
1617
- var theFunc = logger2[funcName];
1618
- if (typeof theFunc === "function" && maxLevel >= theLevel) {
1619
- return theFunc.bind(logger2);
1620
1921
  }
1621
- return function() {
1922
+ return {
1923
+ success: false,
1924
+ reasoning: `JudgeAgent: No tool call found in LLM output`,
1925
+ metCriteria: [],
1926
+ unmetCriteria: criteria
1622
1927
  };
1623
1928
  }
1624
- return {
1625
- error: _filterFunc("error", DiagLogLevel.ERROR),
1626
- warn: _filterFunc("warn", DiagLogLevel.WARN),
1627
- info: _filterFunc("info", DiagLogLevel.INFO),
1628
- debug: _filterFunc("debug", DiagLogLevel.DEBUG),
1629
- verbose: _filterFunc("verbose", DiagLogLevel.VERBOSE)
1630
- };
1631
- }
1632
-
1633
- // node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/api/diag.js
1634
- var __read2 = function(o, n) {
1635
- var m = typeof Symbol === "function" && o[Symbol.iterator];
1636
- if (!m) return o;
1637
- var i = m.call(o), r, ar = [], e;
1638
- try {
1639
- while ((n === void 0 || n-- > 0) && !(r = i.next()).done) ar.push(r.value);
1640
- } catch (error) {
1641
- e = { error };
1642
- } finally {
1643
- try {
1644
- if (r && !r.done && (m = i["return"])) m.call(i);
1645
- } finally {
1646
- if (e) throw e.error;
1647
- }
1929
+ getOpenTelemetryTracesDigest(threadId) {
1930
+ const spans = this.spanCollector.getSpansForThread(threadId);
1931
+ const digest = judgeSpanDigestFormatter.format(spans);
1932
+ return digest;
1648
1933
  }
1649
- return ar;
1650
1934
  };
1651
- var __spreadArray2 = function(to, from, pack) {
1652
- if (pack || arguments.length === 2) for (var i = 0, l = from.length, ar; i < l; i++) {
1653
- if (ar || !(i in from)) {
1654
- if (!ar) ar = Array.prototype.slice.call(from, 0, i);
1655
- ar[i] = from[i];
1656
- }
1657
- }
1658
- return to.concat(ar || Array.prototype.slice.call(from));
1935
+ var judgeAgent = (cfg) => {
1936
+ return new JudgeAgent(cfg ?? {});
1659
1937
  };
1660
- var API_NAME = "diag";
1661
- var DiagAPI = (
1662
- /** @class */
1663
- (function() {
1664
- function DiagAPI2() {
1665
- function _logProxy(funcName) {
1666
- return function() {
1667
- var args = [];
1668
- for (var _i = 0; _i < arguments.length; _i++) {
1669
- args[_i] = arguments[_i];
1670
- }
1671
- var logger2 = getGlobal("diag");
1672
- if (!logger2)
1673
- return;
1674
- return logger2[funcName].apply(logger2, __spreadArray2([], __read2(args), false));
1675
- };
1676
- }
1677
- var self = this;
1678
- var setLogger = function(logger2, optionsOrLogLevel) {
1679
- var _a, _b, _c;
1680
- if (optionsOrLogLevel === void 0) {
1681
- optionsOrLogLevel = { logLevel: DiagLogLevel.INFO };
1682
- }
1683
- if (logger2 === self) {
1684
- var err = new Error("Cannot use diag as the logger for itself. Please use a DiagLogger implementation like ConsoleDiagLogger or a custom implementation");
1685
- self.error((_a = err.stack) !== null && _a !== void 0 ? _a : err.message);
1686
- return false;
1687
- }
1688
- if (typeof optionsOrLogLevel === "number") {
1689
- optionsOrLogLevel = {
1690
- logLevel: optionsOrLogLevel
1691
- };
1692
- }
1693
- var oldLogger = getGlobal("diag");
1694
- var newLogger = createLogLevelDiagLogger((_b = optionsOrLogLevel.logLevel) !== null && _b !== void 0 ? _b : DiagLogLevel.INFO, logger2);
1695
- if (oldLogger && !optionsOrLogLevel.suppressOverrideMessage) {
1696
- var stack = (_c = new Error().stack) !== null && _c !== void 0 ? _c : "<failed to generate stacktrace>";
1697
- oldLogger.warn("Current logger will be overwritten from " + stack);
1698
- newLogger.warn("Current logger will overwrite one already registered from " + stack);
1699
- }
1700
- return registerGlobal("diag", newLogger, self, true);
1701
- };
1702
- self.setLogger = setLogger;
1703
- self.disable = function() {
1704
- unregisterGlobal(API_NAME, self);
1705
- };
1706
- self.createComponentLogger = function(options) {
1707
- return new DiagComponentLogger(options);
1708
- };
1709
- self.verbose = _logProxy("verbose");
1710
- self.debug = _logProxy("debug");
1711
- self.info = _logProxy("info");
1712
- self.warn = _logProxy("warn");
1713
- self.error = _logProxy("error");
1714
- }
1715
- DiagAPI2.instance = function() {
1716
- if (!this._instance) {
1717
- this._instance = new DiagAPI2();
1718
- }
1719
- return this._instance;
1720
- };
1721
- return DiagAPI2;
1722
- })()
1723
- );
1724
1938
 
1725
- // node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/context/context.js
1726
- function createContextKey(description) {
1727
- return Symbol.for(description);
1728
- }
1729
- var BaseContext = (
1730
- /** @class */
1731
- /* @__PURE__ */ (function() {
1732
- function BaseContext2(parentContext) {
1733
- var self = this;
1734
- self._currentContext = parentContext ? new Map(parentContext) : /* @__PURE__ */ new Map();
1735
- self.getValue = function(key) {
1736
- return self._currentContext.get(key);
1737
- };
1738
- self.setValue = function(key, value) {
1739
- var context2 = new BaseContext2(self._currentContext);
1740
- context2._currentContext.set(key, value);
1741
- return context2;
1742
- };
1743
- self.deleteValue = function(key) {
1744
- var context2 = new BaseContext2(self._currentContext);
1745
- context2._currentContext.delete(key);
1746
- return context2;
1747
- };
1748
- }
1749
- return BaseContext2;
1750
- })()
1751
- );
1752
- var ROOT_CONTEXT = new BaseContext();
1939
+ // src/agents/user-simulator-agent.ts
1940
+ function buildSystemPrompt2(description) {
1941
+ return `
1942
+ <role>
1943
+ You are pretending to be a user, you are testing an AI Agent (shown as the user role) based on a scenario.
1944
+ Approach this naturally, as a human user would, with very short inputs, few words, all lowercase, imperative, not periods, like when they google or talk to chatgpt.
1945
+ </role>
1753
1946
 
1754
- // node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/context/NoopContextManager.js
1755
- var __read3 = function(o, n) {
1756
- var m = typeof Symbol === "function" && o[Symbol.iterator];
1757
- if (!m) return o;
1758
- var i = m.call(o), r, ar = [], e;
1759
- try {
1760
- while ((n === void 0 || n-- > 0) && !(r = i.next()).done) ar.push(r.value);
1761
- } catch (error) {
1762
- e = { error };
1763
- } finally {
1764
- try {
1765
- if (r && !r.done && (m = i["return"])) m.call(i);
1766
- } finally {
1767
- if (e) throw e.error;
1768
- }
1947
+ <goal>
1948
+ Your goal (assistant) is to interact with the Agent Under Test (user) as if you were a human user to see if it can complete the scenario successfully.
1949
+ </goal>
1950
+
1951
+ <scenario>
1952
+ ${description}
1953
+ </scenario>
1954
+
1955
+ <rules>
1956
+ - DO NOT carry over any requests yourself, YOU ARE NOT the assistant today, you are the user
1957
+ </rules>
1958
+ `.trim();
1959
+ }
1960
+ var UserSimulatorAgent = class extends UserSimulatorAgentAdapter {
1961
+ constructor(cfg) {
1962
+ super();
1963
+ this.cfg = cfg;
1769
1964
  }
1770
- return ar;
1771
- };
1772
- var __spreadArray3 = function(to, from, pack) {
1773
- if (pack || arguments.length === 2) for (var i = 0, l = from.length, ar; i < l; i++) {
1774
- if (ar || !(i in from)) {
1775
- if (!ar) ar = Array.prototype.slice.call(from, 0, i);
1776
- ar[i] = from[i];
1965
+ logger = new Logger(this.constructor.name);
1966
+ /**
1967
+ * LLM invocation function. Can be overridden to customize LLM behavior.
1968
+ */
1969
+ invokeLLM = createLLMInvoker(this.logger);
1970
+ call = async (input) => {
1971
+ const config2 = this.cfg;
1972
+ const systemPrompt = (config2 == null ? void 0 : config2.systemPrompt) ?? buildSystemPrompt2(input.scenarioConfig.description);
1973
+ const messages = [
1974
+ { role: "system", content: systemPrompt },
1975
+ { role: "assistant", content: "Hello, how can I help you today" },
1976
+ ...input.messages
1977
+ ];
1978
+ const projectConfig = await getProjectConfig();
1979
+ const mergedConfig = modelSchema.parse({
1980
+ ...projectConfig == null ? void 0 : projectConfig.defaultModel,
1981
+ ...config2
1982
+ });
1983
+ const reversedMessages = messageRoleReversal(messages);
1984
+ const completion = await this.invokeLLM({
1985
+ model: mergedConfig.model,
1986
+ messages: reversedMessages,
1987
+ temperature: mergedConfig.temperature,
1988
+ maxOutputTokens: mergedConfig.maxTokens
1989
+ });
1990
+ const messageContent = completion.text;
1991
+ if (!messageContent) {
1992
+ throw new Error("No response content from LLM");
1777
1993
  }
1778
- }
1779
- return to.concat(ar || Array.prototype.slice.call(from));
1994
+ return { role: "user", content: messageContent };
1995
+ };
1780
1996
  };
1781
- var NoopContextManager = (
1782
- /** @class */
1783
- (function() {
1784
- function NoopContextManager2() {
1997
+ var userSimulatorAgent = (config2) => {
1998
+ return new UserSimulatorAgent(config2);
1999
+ };
2000
+
2001
+ // src/agents/realtime/realtime-agent.adapter.ts
2002
+ import { EventEmitter } from "events";
2003
+
2004
+ // src/agents/realtime/message-processor.ts
2005
+ var MessageProcessor = class {
2006
+ /**
2007
+ * Processes audio message content and extracts base64 audio data
2008
+ *
2009
+ * @param content - The message content to process
2010
+ * @returns Base64 audio data string or null if no audio found
2011
+ * @throws {Error} If audio data is invalid
2012
+ */
2013
+ processAudioMessage(content) {
2014
+ if (!Array.isArray(content)) {
2015
+ return null;
1785
2016
  }
1786
- NoopContextManager2.prototype.active = function() {
1787
- return ROOT_CONTEXT;
1788
- };
1789
- NoopContextManager2.prototype.with = function(_context, fn, thisArg) {
1790
- var args = [];
1791
- for (var _i = 3; _i < arguments.length; _i++) {
1792
- args[_i - 3] = arguments[_i];
2017
+ for (const part of content) {
2018
+ if (typeof part === "object" && part !== null && "type" in part && part.type === "file" && "mediaType" in part && typeof part.mediaType === "string" && part.mediaType.startsWith("audio/")) {
2019
+ if (!("data" in part) || typeof part.data !== "string") {
2020
+ throw new Error(
2021
+ `Audio data must be base64 string, got: ${typeof part.data}`
2022
+ );
2023
+ }
2024
+ if (!part.data || part.data.length === 0) {
2025
+ throw new Error(
2026
+ `Audio message has no data. Part: ${JSON.stringify(part)}`
2027
+ );
2028
+ }
2029
+ return part.data;
1793
2030
  }
1794
- return fn.call.apply(fn, __spreadArray3([thisArg], __read3(args), false));
1795
- };
1796
- NoopContextManager2.prototype.bind = function(_context, target) {
1797
- return target;
1798
- };
1799
- NoopContextManager2.prototype.enable = function() {
1800
- return this;
1801
- };
1802
- NoopContextManager2.prototype.disable = function() {
1803
- return this;
1804
- };
1805
- return NoopContextManager2;
1806
- })()
1807
- );
1808
-
1809
- // node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/api/context.js
1810
- var __read4 = function(o, n) {
1811
- var m = typeof Symbol === "function" && o[Symbol.iterator];
1812
- if (!m) return o;
1813
- var i = m.call(o), r, ar = [], e;
1814
- try {
1815
- while ((n === void 0 || n-- > 0) && !(r = i.next()).done) ar.push(r.value);
1816
- } catch (error) {
1817
- e = { error };
1818
- } finally {
1819
- try {
1820
- if (r && !r.done && (m = i["return"])) m.call(i);
1821
- } finally {
1822
- if (e) throw e.error;
1823
2031
  }
2032
+ return null;
1824
2033
  }
1825
- return ar;
1826
- };
1827
- var __spreadArray4 = function(to, from, pack) {
1828
- if (pack || arguments.length === 2) for (var i = 0, l = from.length, ar; i < l; i++) {
1829
- if (ar || !(i in from)) {
1830
- if (!ar) ar = Array.prototype.slice.call(from, 0, i);
1831
- ar[i] = from[i];
1832
- }
2034
+ /**
2035
+ * Extracts text content from message content
2036
+ *
2037
+ * @param content - The message content to process
2038
+ * @returns Text string or empty string if no text found
2039
+ */
2040
+ extractTextMessage(content) {
2041
+ return typeof content === "string" ? content : "";
2042
+ }
2043
+ /**
2044
+ * Validates that a message has either text or audio content
2045
+ *
2046
+ * @param content - The message content to validate
2047
+ * @returns True if the message has valid content
2048
+ */
2049
+ hasValidContent(content) {
2050
+ const hasText = this.extractTextMessage(content).length > 0;
2051
+ const hasAudio = this.processAudioMessage(content) !== null;
2052
+ return hasText || hasAudio;
1833
2053
  }
1834
- return to.concat(ar || Array.prototype.slice.call(from));
1835
- };
1836
- var API_NAME2 = "context";
1837
- var NOOP_CONTEXT_MANAGER = new NoopContextManager();
1838
- var ContextAPI = (
1839
- /** @class */
1840
- (function() {
1841
- function ContextAPI2() {
1842
- }
1843
- ContextAPI2.getInstance = function() {
1844
- if (!this._instance) {
1845
- this._instance = new ContextAPI2();
1846
- }
1847
- return this._instance;
1848
- };
1849
- ContextAPI2.prototype.setGlobalContextManager = function(contextManager) {
1850
- return registerGlobal(API_NAME2, contextManager, DiagAPI.instance());
1851
- };
1852
- ContextAPI2.prototype.active = function() {
1853
- return this._getContextManager().active();
1854
- };
1855
- ContextAPI2.prototype.with = function(context2, fn, thisArg) {
1856
- var _a;
1857
- var args = [];
1858
- for (var _i = 3; _i < arguments.length; _i++) {
1859
- args[_i - 3] = arguments[_i];
1860
- }
1861
- return (_a = this._getContextManager()).with.apply(_a, __spreadArray4([context2, fn, thisArg], __read4(args), false));
1862
- };
1863
- ContextAPI2.prototype.bind = function(context2, target) {
1864
- return this._getContextManager().bind(context2, target);
1865
- };
1866
- ContextAPI2.prototype._getContextManager = function() {
1867
- return getGlobal(API_NAME2) || NOOP_CONTEXT_MANAGER;
1868
- };
1869
- ContextAPI2.prototype.disable = function() {
1870
- this._getContextManager().disable();
1871
- unregisterGlobal(API_NAME2, DiagAPI.instance());
1872
- };
1873
- return ContextAPI2;
1874
- })()
1875
- );
1876
-
1877
- // node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/trace/trace_flags.js
1878
- var TraceFlags;
1879
- (function(TraceFlags2) {
1880
- TraceFlags2[TraceFlags2["NONE"] = 0] = "NONE";
1881
- TraceFlags2[TraceFlags2["SAMPLED"] = 1] = "SAMPLED";
1882
- })(TraceFlags || (TraceFlags = {}));
1883
-
1884
- // node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/trace/invalid-span-constants.js
1885
- var INVALID_SPANID = "0000000000000000";
1886
- var INVALID_TRACEID = "00000000000000000000000000000000";
1887
- var INVALID_SPAN_CONTEXT = {
1888
- traceId: INVALID_TRACEID,
1889
- spanId: INVALID_SPANID,
1890
- traceFlags: TraceFlags.NONE
1891
2054
  };
1892
2055
 
1893
- // node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/trace/NonRecordingSpan.js
1894
- var NonRecordingSpan = (
1895
- /** @class */
1896
- (function() {
1897
- function NonRecordingSpan2(_spanContext) {
1898
- if (_spanContext === void 0) {
1899
- _spanContext = INVALID_SPAN_CONTEXT;
1900
- }
1901
- this._spanContext = _spanContext;
2056
+ // src/agents/realtime/realtime-event-handler.ts
2057
+ var RealtimeEventHandler = class {
2058
+ /**
2059
+ * Creates a new RealtimeEventHandler instance
2060
+ * @param session - The RealtimeSession to listen to events from
2061
+ */
2062
+ constructor(session) {
2063
+ this.session = session;
2064
+ this.ensureEventListeners();
2065
+ }
2066
+ currentResponse = "";
2067
+ currentAudioChunks = [];
2068
+ responseResolver = null;
2069
+ errorRejecter = null;
2070
+ listenersSetup = false;
2071
+ /**
2072
+ * Gets the transport from the session
2073
+ */
2074
+ getTransport() {
2075
+ const sessionWithTransport = this.session;
2076
+ return sessionWithTransport.transport ?? null;
2077
+ }
2078
+ /**
2079
+ * Ensures event listeners are set up, retrying if transport not available
2080
+ */
2081
+ ensureEventListeners() {
2082
+ if (this.listenersSetup) return;
2083
+ const transport = this.getTransport();
2084
+ if (!transport) {
2085
+ setTimeout(() => this.ensureEventListeners(), 100);
2086
+ return;
1902
2087
  }
1903
- NonRecordingSpan2.prototype.spanContext = function() {
1904
- return this._spanContext;
1905
- };
1906
- NonRecordingSpan2.prototype.setAttribute = function(_key, _value) {
1907
- return this;
1908
- };
1909
- NonRecordingSpan2.prototype.setAttributes = function(_attributes) {
1910
- return this;
1911
- };
1912
- NonRecordingSpan2.prototype.addEvent = function(_name, _attributes) {
1913
- return this;
1914
- };
1915
- NonRecordingSpan2.prototype.addLink = function(_link) {
1916
- return this;
1917
- };
1918
- NonRecordingSpan2.prototype.addLinks = function(_links) {
1919
- return this;
1920
- };
1921
- NonRecordingSpan2.prototype.setStatus = function(_status) {
1922
- return this;
1923
- };
1924
- NonRecordingSpan2.prototype.updateName = function(_name) {
1925
- return this;
1926
- };
1927
- NonRecordingSpan2.prototype.end = function(_endTime) {
1928
- };
1929
- NonRecordingSpan2.prototype.isRecording = function() {
1930
- return false;
1931
- };
1932
- NonRecordingSpan2.prototype.recordException = function(_exception, _time) {
1933
- };
1934
- return NonRecordingSpan2;
1935
- })()
1936
- );
1937
-
1938
- // node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/trace/context-utils.js
1939
- var SPAN_KEY = createContextKey("OpenTelemetry Context Key SPAN");
1940
- function getSpan(context2) {
1941
- return context2.getValue(SPAN_KEY) || void 0;
1942
- }
1943
- function getActiveSpan() {
1944
- return getSpan(ContextAPI.getInstance().active());
1945
- }
1946
- function setSpan(context2, span) {
1947
- return context2.setValue(SPAN_KEY, span);
1948
- }
1949
- function deleteSpan(context2) {
1950
- return context2.deleteValue(SPAN_KEY);
1951
- }
1952
- function setSpanContext(context2, spanContext) {
1953
- return setSpan(context2, new NonRecordingSpan(spanContext));
1954
- }
1955
- function getSpanContext(context2) {
1956
- var _a;
1957
- return (_a = getSpan(context2)) === null || _a === void 0 ? void 0 : _a.spanContext();
1958
- }
1959
-
1960
- // node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/trace/spancontext-utils.js
1961
- var VALID_TRACEID_REGEX = /^([0-9a-f]{32})$/i;
1962
- var VALID_SPANID_REGEX = /^[0-9a-f]{16}$/i;
1963
- function isValidTraceId(traceId) {
1964
- return VALID_TRACEID_REGEX.test(traceId) && traceId !== INVALID_TRACEID;
1965
- }
1966
- function isValidSpanId(spanId) {
1967
- return VALID_SPANID_REGEX.test(spanId) && spanId !== INVALID_SPANID;
1968
- }
1969
- function isSpanContextValid(spanContext) {
1970
- return isValidTraceId(spanContext.traceId) && isValidSpanId(spanContext.spanId);
1971
- }
1972
- function wrapSpanContext(spanContext) {
1973
- return new NonRecordingSpan(spanContext);
1974
- }
1975
-
1976
- // node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/trace/NoopTracer.js
1977
- var contextApi = ContextAPI.getInstance();
1978
- var NoopTracer = (
1979
- /** @class */
1980
- (function() {
1981
- function NoopTracer2() {
2088
+ this.setupEventListeners();
2089
+ }
2090
+ /**
2091
+ * Sets up event listeners for the RealtimeSession transport layer
2092
+ */
2093
+ setupEventListeners() {
2094
+ if (this.listenersSetup) return;
2095
+ const transport = this.getTransport();
2096
+ if (!transport) {
2097
+ console.error("\u274C Transport not available on session");
2098
+ return;
1982
2099
  }
1983
- NoopTracer2.prototype.startSpan = function(name, options, context2) {
1984
- if (context2 === void 0) {
1985
- context2 = contextApi.active();
1986
- }
1987
- var root = Boolean(options === null || options === void 0 ? void 0 : options.root);
1988
- if (root) {
1989
- return new NonRecordingSpan();
2100
+ transport.on("response.output_audio_transcript.delta", (event) => {
2101
+ const deltaEvent = event;
2102
+ if (typeof deltaEvent.delta === "string") {
2103
+ this.currentResponse += deltaEvent.delta;
1990
2104
  }
1991
- var parentFromContext = context2 && getSpanContext(context2);
1992
- if (isSpanContext(parentFromContext) && isSpanContextValid(parentFromContext)) {
1993
- return new NonRecordingSpan(parentFromContext);
1994
- } else {
1995
- return new NonRecordingSpan();
2105
+ });
2106
+ transport.on("response.output_audio.delta", (event) => {
2107
+ const deltaEvent = event;
2108
+ if (typeof deltaEvent.delta === "string") {
2109
+ this.currentAudioChunks.push(deltaEvent.delta);
1996
2110
  }
1997
- };
1998
- NoopTracer2.prototype.startActiveSpan = function(name, arg2, arg3, arg4) {
1999
- var opts;
2000
- var ctx;
2001
- var fn;
2002
- if (arguments.length < 2) {
2003
- return;
2004
- } else if (arguments.length === 2) {
2005
- fn = arg2;
2006
- } else if (arguments.length === 3) {
2007
- opts = arg2;
2008
- fn = arg3;
2009
- } else {
2010
- opts = arg2;
2011
- ctx = arg3;
2012
- fn = arg4;
2013
- }
2014
- var parentContext = ctx !== null && ctx !== void 0 ? ctx : contextApi.active();
2015
- var span = this.startSpan(name, opts, parentContext);
2016
- var contextWithSpanSet = setSpan(parentContext, span);
2017
- return contextApi.with(contextWithSpanSet, fn, void 0, span);
2018
- };
2019
- return NoopTracer2;
2020
- })()
2021
- );
2022
- function isSpanContext(spanContext) {
2023
- return typeof spanContext === "object" && typeof spanContext["spanId"] === "string" && typeof spanContext["traceId"] === "string" && typeof spanContext["traceFlags"] === "number";
2024
- }
2025
-
2026
- // node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/trace/ProxyTracer.js
2027
- var NOOP_TRACER = new NoopTracer();
2028
- var ProxyTracer = (
2029
- /** @class */
2030
- (function() {
2031
- function ProxyTracer2(_provider, name, version, options) {
2032
- this._provider = _provider;
2033
- this.name = name;
2034
- this.version = version;
2035
- this.options = options;
2036
- }
2037
- ProxyTracer2.prototype.startSpan = function(name, options, context2) {
2038
- return this._getTracer().startSpan(name, options, context2);
2039
- };
2040
- ProxyTracer2.prototype.startActiveSpan = function(_name, _options, _context, _fn) {
2041
- var tracer = this._getTracer();
2042
- return Reflect.apply(tracer.startActiveSpan, tracer, arguments);
2043
- };
2044
- ProxyTracer2.prototype._getTracer = function() {
2045
- if (this._delegate) {
2046
- return this._delegate;
2111
+ });
2112
+ transport.on("response.done", () => {
2113
+ const fullAudio = this.currentAudioChunks.join("");
2114
+ const audioResponse = {
2115
+ transcript: this.currentResponse,
2116
+ audio: fullAudio
2117
+ };
2118
+ if (this.responseResolver) {
2119
+ this.responseResolver(audioResponse);
2120
+ this.reset();
2047
2121
  }
2048
- var tracer = this._provider.getDelegateTracer(this.name, this.version, this.options);
2049
- if (!tracer) {
2050
- return NOOP_TRACER;
2122
+ });
2123
+ transport.on("error", (error) => {
2124
+ console.error(`\u274C Transport error:`, error);
2125
+ if (this.errorRejecter) {
2126
+ const errorObj = error instanceof Error ? error : new Error(String(error));
2127
+ this.errorRejecter(errorObj);
2128
+ this.reset();
2051
2129
  }
2052
- this._delegate = tracer;
2053
- return this._delegate;
2054
- };
2055
- return ProxyTracer2;
2056
- })()
2057
- );
2130
+ });
2131
+ this.listenersSetup = true;
2132
+ }
2133
+ /**
2134
+ * Waits for the agent response with timeout
2135
+ *
2136
+ * @param timeout - Maximum time to wait in milliseconds
2137
+ * @returns Promise that resolves with the audio response event
2138
+ * @throws {Error} If timeout occurs or transport error happens
2139
+ */
2140
+ waitForResponse(timeout) {
2141
+ return new Promise((resolve, reject) => {
2142
+ this.responseResolver = resolve;
2143
+ this.errorRejecter = reject;
2144
+ const timeoutId = setTimeout(() => {
2145
+ if (this.responseResolver) {
2146
+ this.reset();
2147
+ reject(new Error(`Agent response timeout after ${timeout}ms`));
2148
+ }
2149
+ }, timeout);
2150
+ const originalResolver = resolve;
2151
+ this.responseResolver = (value) => {
2152
+ clearTimeout(timeoutId);
2153
+ originalResolver(value);
2154
+ };
2155
+ });
2156
+ }
2157
+ /**
2158
+ * Resets the internal state for the next response
2159
+ */
2160
+ reset() {
2161
+ this.responseResolver = null;
2162
+ this.errorRejecter = null;
2163
+ this.currentResponse = "";
2164
+ this.currentAudioChunks = [];
2165
+ }
2166
+ };
2058
2167
 
2059
- // node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/trace/NoopTracerProvider.js
2060
- var NoopTracerProvider = (
2061
- /** @class */
2062
- (function() {
2063
- function NoopTracerProvider2() {
2064
- }
2065
- NoopTracerProvider2.prototype.getTracer = function(_name, _version, _options) {
2066
- return new NoopTracer();
2168
+ // src/agents/realtime/response-formatter.ts
2169
+ var ResponseFormatter = class {
2170
+ /**
2171
+ * Formats an audio response event into Scenario framework format
2172
+ *
2173
+ * @param audioEvent - The audio response event from the Realtime API
2174
+ * @returns Formatted assistant message with audio and text content
2175
+ */
2176
+ formatAudioResponse(audioEvent) {
2177
+ return {
2178
+ role: "assistant",
2179
+ content: [
2180
+ { type: "text", text: audioEvent.transcript },
2181
+ { type: "file", mediaType: "audio/pcm16", data: audioEvent.audio }
2182
+ ]
2067
2183
  };
2068
- return NoopTracerProvider2;
2069
- })()
2070
- );
2184
+ }
2185
+ /**
2186
+ * Formats a text response for the Scenario framework
2187
+ *
2188
+ * @param text - The text response from the agent
2189
+ * @returns Plain text response string
2190
+ */
2191
+ formatTextResponse(text) {
2192
+ return text;
2193
+ }
2194
+ /**
2195
+ * Creates an initial response message for when no user message exists
2196
+ *
2197
+ * @param audioEvent - The audio response event from the Realtime API
2198
+ * @returns Formatted assistant message for initial responses
2199
+ */
2200
+ formatInitialResponse(audioEvent) {
2201
+ return this.formatAudioResponse(audioEvent);
2202
+ }
2203
+ };
2071
2204
 
2072
- // node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/trace/ProxyTracerProvider.js
2073
- var NOOP_TRACER_PROVIDER = new NoopTracerProvider();
2074
- var ProxyTracerProvider = (
2075
- /** @class */
2076
- (function() {
2077
- function ProxyTracerProvider2() {
2205
+ // src/agents/realtime/realtime-agent.adapter.ts
2206
+ var RealtimeAgentAdapter = class extends AgentAdapter {
2207
+ /**
2208
+ * Creates a new RealtimeAgentAdapter instance
2209
+ *
2210
+ * The session can be either connected or unconnected.
2211
+ * If unconnected, call connect() with an API key before use.
2212
+ *
2213
+ * @param config - Configuration for the realtime agent adapter
2214
+ */
2215
+ constructor(config2) {
2216
+ super();
2217
+ this.config = config2;
2218
+ this.role = this.config.role;
2219
+ this.name = this.config.agentName;
2220
+ this.session = config2.session;
2221
+ this.eventHandler = new RealtimeEventHandler(this.session);
2222
+ }
2223
+ role;
2224
+ name;
2225
+ session;
2226
+ eventHandler;
2227
+ messageProcessor = new MessageProcessor();
2228
+ responseFormatter = new ResponseFormatter();
2229
+ audioEvents = new EventEmitter();
2230
+ /**
2231
+ * Get the connect method from the session
2232
+ */
2233
+ async connect(params) {
2234
+ const { apiKey, ...rest } = params ?? {};
2235
+ await this.session.connect({
2236
+ apiKey: apiKey ?? process.env.OPENAI_API_KEY,
2237
+ ...rest
2238
+ });
2239
+ }
2240
+ /**
2241
+ * Closes the session connection
2242
+ */
2243
+ async disconnect() {
2244
+ this.session.close();
2245
+ }
2246
+ /**
2247
+ * Process input and generate response (implements AgentAdapter interface)
2248
+ *
2249
+ * This is called by Scenario framework for each agent turn.
2250
+ * Handles both text and audio input, returns audio message with transcript.
2251
+ *
2252
+ * @param input - Scenario agent input with message history
2253
+ * @returns Agent response as audio message or text
2254
+ */
2255
+ async call(input) {
2256
+ console.log(`\u{1F50A} [${this.name}] being called with role: ${this.role}`);
2257
+ const latestMessage = input.newMessages[input.newMessages.length - 1];
2258
+ if (!latestMessage) {
2259
+ return this.handleInitialResponse();
2260
+ }
2261
+ const audioData = this.messageProcessor.processAudioMessage(
2262
+ latestMessage.content
2263
+ );
2264
+ if (audioData) {
2265
+ return this.handleAudioInput(audioData);
2266
+ }
2267
+ const text = this.messageProcessor.extractTextMessage(
2268
+ latestMessage.content
2269
+ );
2270
+ if (!text) {
2271
+ throw new Error("Message has no text or audio content");
2272
+ }
2273
+ return this.handleTextInput(text);
2274
+ }
2275
+ /**
2276
+ * Handles the initial response when no user message exists
2277
+ */
2278
+ async handleInitialResponse() {
2279
+ console.log(`[${this.name}] First message, creating response`);
2280
+ const sessionWithTransport = this.session;
2281
+ const transport = sessionWithTransport.transport;
2282
+ if (!transport) {
2283
+ throw new Error("Realtime transport not available");
2284
+ }
2285
+ transport.sendEvent({
2286
+ type: "response.create"
2287
+ });
2288
+ const timeout = this.config.responseTimeout ?? 6e4;
2289
+ const response = await this.eventHandler.waitForResponse(timeout);
2290
+ this.audioEvents.emit("audioResponse", response);
2291
+ return this.responseFormatter.formatInitialResponse(response);
2292
+ }
2293
+ /**
2294
+ * Handles audio input from the user
2295
+ */
2296
+ async handleAudioInput(audioData) {
2297
+ const sessionWithTransport = this.session;
2298
+ const transport = sessionWithTransport.transport;
2299
+ if (!transport) {
2300
+ throw new Error("Realtime transport not available");
2078
2301
  }
2079
- ProxyTracerProvider2.prototype.getTracer = function(name, version, options) {
2080
- var _a;
2081
- return (_a = this.getDelegateTracer(name, version, options)) !== null && _a !== void 0 ? _a : new ProxyTracer(this, name, version, options);
2082
- };
2083
- ProxyTracerProvider2.prototype.getDelegate = function() {
2084
- var _a;
2085
- return (_a = this._delegate) !== null && _a !== void 0 ? _a : NOOP_TRACER_PROVIDER;
2086
- };
2087
- ProxyTracerProvider2.prototype.setDelegate = function(delegate) {
2088
- this._delegate = delegate;
2089
- };
2090
- ProxyTracerProvider2.prototype.getDelegateTracer = function(name, version, options) {
2091
- var _a;
2092
- return (_a = this._delegate) === null || _a === void 0 ? void 0 : _a.getTracer(name, version, options);
2093
- };
2094
- return ProxyTracerProvider2;
2095
- })()
2096
- );
2097
-
2098
- // node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/context-api.js
2099
- var context = ContextAPI.getInstance();
2100
-
2101
- // node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/api/trace.js
2102
- var API_NAME3 = "trace";
2103
- var TraceAPI = (
2104
- /** @class */
2105
- (function() {
2106
- function TraceAPI2() {
2107
- this._proxyTracerProvider = new ProxyTracerProvider();
2108
- this.wrapSpanContext = wrapSpanContext;
2109
- this.isSpanContextValid = isSpanContextValid;
2110
- this.deleteSpan = deleteSpan;
2111
- this.getSpan = getSpan;
2112
- this.getActiveSpan = getActiveSpan;
2113
- this.getSpanContext = getSpanContext;
2114
- this.setSpan = setSpan;
2115
- this.setSpanContext = setSpanContext;
2116
- }
2117
- TraceAPI2.getInstance = function() {
2118
- if (!this._instance) {
2119
- this._instance = new TraceAPI2();
2120
- }
2121
- return this._instance;
2122
- };
2123
- TraceAPI2.prototype.setGlobalTracerProvider = function(provider) {
2124
- var success = registerGlobal(API_NAME3, this._proxyTracerProvider, DiagAPI.instance());
2125
- if (success) {
2126
- this._proxyTracerProvider.setDelegate(provider);
2127
- }
2128
- return success;
2129
- };
2130
- TraceAPI2.prototype.getTracerProvider = function() {
2131
- return getGlobal(API_NAME3) || this._proxyTracerProvider;
2132
- };
2133
- TraceAPI2.prototype.getTracer = function(name, version) {
2134
- return this.getTracerProvider().getTracer(name, version);
2135
- };
2136
- TraceAPI2.prototype.disable = function() {
2137
- unregisterGlobal(API_NAME3, DiagAPI.instance());
2138
- this._proxyTracerProvider = new ProxyTracerProvider();
2139
- };
2140
- return TraceAPI2;
2141
- })()
2142
- );
2302
+ transport.sendEvent({
2303
+ type: "input_audio_buffer.append",
2304
+ audio: audioData
2305
+ });
2306
+ transport.sendEvent({
2307
+ type: "input_audio_buffer.commit"
2308
+ });
2309
+ transport.sendEvent({
2310
+ type: "response.create"
2311
+ });
2312
+ const timeout = this.config.responseTimeout ?? 6e4;
2313
+ const response = await this.eventHandler.waitForResponse(timeout);
2314
+ this.audioEvents.emit("audioResponse", response);
2315
+ return this.responseFormatter.formatAudioResponse(response);
2316
+ }
2317
+ /**
2318
+ * Handles text input from the user
2319
+ */
2320
+ async handleTextInput(text) {
2321
+ this.session.sendMessage(text);
2322
+ const timeout = this.config.responseTimeout ?? 3e4;
2323
+ const response = await this.eventHandler.waitForResponse(timeout);
2324
+ this.audioEvents.emit("audioResponse", response);
2325
+ return this.responseFormatter.formatTextResponse(response.transcript);
2326
+ }
2327
+ /**
2328
+ * Subscribe to audio response events
2329
+ *
2330
+ * @param callback - Function called when an audio response completes
2331
+ */
2332
+ onAudioResponse(callback) {
2333
+ this.audioEvents.on("audioResponse", callback);
2334
+ }
2335
+ /**
2336
+ * Remove audio response listener
2337
+ *
2338
+ * @param callback - The callback function to remove
2339
+ */
2340
+ offAudioResponse(callback) {
2341
+ this.audioEvents.off("audioResponse", callback);
2342
+ }
2343
+ };
2143
2344
 
2144
- // node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/trace-api.js
2145
- var trace = TraceAPI.getInstance();
2345
+ // src/execution/index.ts
2346
+ var execution_exports = {};
2347
+ __export(execution_exports, {
2348
+ ScenarioExecution: () => ScenarioExecution,
2349
+ ScenarioExecutionState: () => ScenarioExecutionState,
2350
+ StateChangeEventType: () => StateChangeEventType
2351
+ });
2146
2352
 
2147
2353
  // src/execution/scenario-execution.ts
2354
+ init_esm();
2355
+ init_esm();
2148
2356
  import { getLangWatchTracer } from "langwatch";
2149
2357
  import { attributes as attributes3 } from "langwatch/observability";
2150
2358
  import { filter, Subject as Subject2 } from "rxjs";
@@ -2330,7 +2538,7 @@ var scenarioRunStartedSchema = baseScenarioEventSchema.extend({
2330
2538
  metadata: z5.object({
2331
2539
  name: z5.string().optional(),
2332
2540
  description: z5.string().optional()
2333
- })
2541
+ }).catchall(z5.unknown())
2334
2542
  });
2335
2543
  var scenarioResultsSchema = z5.object({
2336
2544
  verdict: z5.nativeEnum(Verdict),
@@ -2491,13 +2699,22 @@ var ScenarioExecution = class {
2491
2699
  * - RUN_FINISHED: When scenario execution completes (success/failure/error)
2492
2700
  */
2493
2701
  events$ = this.eventSubject.asObservable();
2702
+ /** Batch run ID for grouping scenario runs */
2703
+ batchRunId;
2704
+ /** The run ID for the current execution */
2705
+ scenarioRunId;
2494
2706
  /**
2495
2707
  * Creates a new ScenarioExecution instance.
2496
2708
  *
2497
2709
  * @param config - The scenario configuration containing agents, settings, and metadata
2498
2710
  * @param script - The ordered sequence of script steps that define the test flow
2711
+ * @param batchRunId - Batch run ID for grouping scenario runs
2499
2712
  */
2500
- constructor(config2, script) {
2713
+ constructor(config2, script, batchRunId2) {
2714
+ if (!batchRunId2) {
2715
+ throw new Error("batchRunId is required");
2716
+ }
2717
+ this.batchRunId = batchRunId2;
2501
2718
  this.config = {
2502
2719
  id: config2.id ?? generateScenarioId(),
2503
2720
  name: config2.name,
@@ -2507,7 +2724,8 @@ var ScenarioExecution = class {
2507
2724
  verbose: config2.verbose ?? DEFAULT_VERBOSE,
2508
2725
  maxTurns: config2.maxTurns ?? DEFAULT_MAX_TURNS,
2509
2726
  threadId: config2.threadId ?? generateThreadId(),
2510
- setId: config2.setId
2727
+ setId: config2.setId,
2728
+ metadata: config2.metadata
2511
2729
  };
2512
2730
  this.state = new ScenarioExecutionState(this.config);
2513
2731
  this.reset();
@@ -2545,18 +2763,21 @@ var ScenarioExecution = class {
2545
2763
  * @param result - The final scenario result (without messages/timing, which will be added automatically)
2546
2764
  */
2547
2765
  setResult(result) {
2766
+ if (!this.scenarioRunId) {
2767
+ throw new Error("Cannot set result: scenarioRunId has not been initialized. This is a bug in ScenarioExecution.");
2768
+ }
2548
2769
  const agentRoleAgentsIdx = this.agents.map((agent2, i) => ({ agent: agent2, idx: i })).filter(({ agent: agent2 }) => agent2.role === "Agent" /* AGENT */).map(({ idx }) => idx);
2549
2770
  const agentTimes = agentRoleAgentsIdx.map(
2550
2771
  (i) => this.agentTimes.get(i) || 0
2551
2772
  );
2552
2773
  const totalAgentTime = agentTimes.reduce((sum, time) => sum + time, 0);
2553
2774
  this._result = {
2775
+ runId: this.scenarioRunId,
2554
2776
  ...result,
2555
2777
  messages: this.state.messages,
2556
2778
  totalTime: this.totalTime,
2557
2779
  agentTime: totalAgentTime
2558
2780
  };
2559
- return this._result;
2560
2781
  this.logger.debug(`[${this.config.id}] Result set`, {
2561
2782
  success: result.success,
2562
2783
  reasoning: result.reasoning,
@@ -2564,6 +2785,7 @@ var ScenarioExecution = class {
2564
2785
  agentTime: totalAgentTime,
2565
2786
  messageCount: this.state.messages.length
2566
2787
  });
2788
+ return this._result;
2567
2789
  }
2568
2790
  /**
2569
2791
  * The total elapsed time for the scenario execution.
@@ -2604,7 +2826,10 @@ var ScenarioExecution = class {
2604
2826
  scriptLength: this.config.script.length
2605
2827
  });
2606
2828
  this.reset();
2829
+ this.newTurn();
2830
+ this.state.currentTurn = 0;
2607
2831
  const scenarioRunId = generateScenarioRunId();
2832
+ this.scenarioRunId = scenarioRunId;
2608
2833
  this.logger.debug(`[${this.config.id}] Generated run ID: ${scenarioRunId}`);
2609
2834
  this.emitRunStarted({ scenarioRunId });
2610
2835
  const subscription = this.state.events$.pipe(
@@ -2668,6 +2893,10 @@ var ScenarioExecution = class {
2668
2893
  });
2669
2894
  throw error;
2670
2895
  } finally {
2896
+ if (this.currentTurnSpan) {
2897
+ this.currentTurnSpan.end();
2898
+ this.currentTurnSpan = void 0;
2899
+ }
2671
2900
  subscription.unsubscribe();
2672
2901
  }
2673
2902
  }
@@ -2792,69 +3021,72 @@ var ScenarioExecution = class {
2792
3021
  const agentContext = this.currentTurnSpan ? trace.setSpan(context.active(), this.currentTurnSpan) : context.active();
2793
3022
  const agentSpanName = `${agentName !== Object.prototype.constructor.name ? agent2.constructor.name : "Agent"}.call`;
2794
3023
  try {
2795
- await this.tracer.withActiveSpan(
2796
- agentSpanName,
2797
- {
2798
- attributes: {
2799
- [attributes3.ATTR_LANGWATCH_THREAD_ID]: this.state.threadId
2800
- }
2801
- },
3024
+ await context.with(
2802
3025
  agentContext,
2803
- async (agentSpan) => {
2804
- agentSpan.setType("agent");
2805
- agentSpan.setInput("chat_messages", this.state.messages);
2806
- const agentResponse = await agent2.call(agentInput);
2807
- const endTime = Date.now();
2808
- const duration = endTime - startTime;
2809
- this.logger.debug(`[${this.config.id}] Agent responded`, {
2810
- agentIdx: idx,
2811
- duration,
2812
- responseType: typeof agentResponse,
2813
- isScenarioResult: agentResponse && typeof agentResponse === "object" && "success" in agentResponse
2814
- });
2815
- this.addAgentTime(idx, duration);
2816
- this.pendingMessages.delete(idx);
2817
- if (agentResponse && typeof agentResponse === "object" && "success" in agentResponse) {
2818
- this.logger.debug(
2819
- `[${this.config.id}] Agent returned ScenarioResult`,
2820
- {
2821
- success: agentResponse.success
2822
- }
2823
- );
2824
- this.setResult(agentResponse);
2825
- return;
2826
- }
2827
- const messages = convertAgentReturnTypesToMessages(
2828
- agentResponse,
2829
- role === "User" /* USER */ ? "user" : "assistant"
2830
- );
2831
- if (messages.length > 0) {
2832
- agentSpan.setOutput("chat_messages", messages);
2833
- }
2834
- const metrics = {
2835
- duration: endTime - startTime
2836
- };
2837
- if (agentResponse && typeof agentResponse === "object") {
2838
- const usage = agentResponse.usage;
2839
- if (usage) {
2840
- if (usage.prompt_tokens !== void 0)
2841
- metrics.promptTokens = usage.prompt_tokens;
2842
- if (usage.completion_tokens !== void 0)
2843
- metrics.completionTokens = usage.completion_tokens;
2844
- if (usage.total_tokens !== void 0)
2845
- metrics.totalTokens = usage.total_tokens;
3026
+ () => this.tracer.withActiveSpan(
3027
+ agentSpanName,
3028
+ {
3029
+ attributes: {
3030
+ [attributes3.ATTR_LANGWATCH_THREAD_ID]: this.state.threadId
2846
3031
  }
2847
- }
2848
- agentSpan.setMetrics(metrics);
2849
- const traceId = agentSpan.spanContext().traceId.toString();
2850
- for (const message2 of messages) {
2851
- this.state.addMessage({
2852
- ...message2,
2853
- traceId
3032
+ },
3033
+ agentContext,
3034
+ async (agentSpan) => {
3035
+ agentSpan.setType("agent");
3036
+ agentSpan.setInput("chat_messages", this.state.messages);
3037
+ const agentResponse = await agent2.call(agentInput);
3038
+ const endTime = Date.now();
3039
+ const duration = endTime - startTime;
3040
+ this.logger.debug(`[${this.config.id}] Agent responded`, {
3041
+ agentIdx: idx,
3042
+ duration,
3043
+ responseType: typeof agentResponse,
3044
+ isScenarioResult: agentResponse && typeof agentResponse === "object" && "success" in agentResponse
2854
3045
  });
2855
- this.broadcastMessage(message2, idx);
3046
+ this.addAgentTime(idx, duration);
3047
+ this.pendingMessages.delete(idx);
3048
+ if (agentResponse && typeof agentResponse === "object" && "success" in agentResponse) {
3049
+ this.logger.debug(
3050
+ `[${this.config.id}] Agent returned ScenarioResult`,
3051
+ {
3052
+ success: agentResponse.success
3053
+ }
3054
+ );
3055
+ this.setResult(agentResponse);
3056
+ return;
3057
+ }
3058
+ const messages = convertAgentReturnTypesToMessages(
3059
+ agentResponse,
3060
+ role === "User" /* USER */ ? "user" : "assistant"
3061
+ );
3062
+ if (messages.length > 0) {
3063
+ agentSpan.setOutput("chat_messages", messages);
3064
+ }
3065
+ const metrics = {
3066
+ duration: endTime - startTime
3067
+ };
3068
+ if (agentResponse && typeof agentResponse === "object") {
3069
+ const usage = agentResponse.usage;
3070
+ if (usage) {
3071
+ if (usage.prompt_tokens !== void 0)
3072
+ metrics.promptTokens = usage.prompt_tokens;
3073
+ if (usage.completion_tokens !== void 0)
3074
+ metrics.completionTokens = usage.completion_tokens;
3075
+ if (usage.total_tokens !== void 0)
3076
+ metrics.totalTokens = usage.total_tokens;
3077
+ }
3078
+ }
3079
+ agentSpan.setMetrics(metrics);
3080
+ const traceId = agentSpan.spanContext().traceId.toString();
3081
+ for (const message2 of messages) {
3082
+ this.state.addMessage({
3083
+ ...message2,
3084
+ traceId
3085
+ });
3086
+ this.broadcastMessage(message2, idx);
3087
+ }
2856
3088
  }
2857
- }
3089
+ )
2858
3090
  );
2859
3091
  } catch (error) {
2860
3092
  throw new Error(`[${agentName}] ${error}`, { cause: error });
@@ -3225,7 +3457,7 @@ var ScenarioExecution = class {
3225
3457
  * - Creates a new ScenarioExecutionState with the current config
3226
3458
  * - Sets up the thread ID (generates new one if not provided)
3227
3459
  * - Initializes all agents
3228
- * - Starts the first turn
3460
+ * - Initializes turn state (pending agents/roles) without creating a trace span
3229
3461
  * - Records the start time for performance tracking
3230
3462
  * - Clears any pending messages
3231
3463
  * - Clears the result from any previous execution
@@ -3239,7 +3471,8 @@ var ScenarioExecution = class {
3239
3471
  this.state = new ScenarioExecutionState(this.config);
3240
3472
  this.state.threadId = this.config.threadId || generateThreadId();
3241
3473
  this.setAgents(this.config.agents);
3242
- this.newTurn();
3474
+ this.pendingAgentsOnTurn = new Set(this.agents);
3475
+ this.pendingRolesOnTurn = ["User" /* USER */, "Agent" /* AGENT */, "Judge" /* JUDGE */];
3243
3476
  this.state.currentTurn = 0;
3244
3477
  this.totalStartTime = Date.now();
3245
3478
  this.pendingMessages.clear();
@@ -3380,7 +3613,7 @@ var ScenarioExecution = class {
3380
3613
  type: "placeholder",
3381
3614
  // This will be replaced by the specific event type
3382
3615
  timestamp: Date.now(),
3383
- batchRunId: getBatchRunId(),
3616
+ batchRunId: this.batchRunId,
3384
3617
  scenarioId: this.config.id,
3385
3618
  scenarioRunId,
3386
3619
  scenarioSetId: this.config.setId
@@ -3394,6 +3627,7 @@ var ScenarioExecution = class {
3394
3627
  ...this.makeBaseEvent({ scenarioRunId }),
3395
3628
  type: "SCENARIO_RUN_STARTED" /* RUN_STARTED */,
3396
3629
  metadata: {
3630
+ ...this.config.metadata,
3397
3631
  name: this.config.name,
3398
3632
  description: this.config.description
3399
3633
  }
@@ -3888,8 +4122,429 @@ var fail = (reasoning) => {
3888
4122
  };
3889
4123
  };
3890
4124
 
4125
+ // src/tracing/setup.ts
4126
+ init_esm();
4127
+ import { setupObservability } from "langwatch/observability/node";
4128
+ import { LangWatchTraceExporter } from "langwatch/observability";
4129
+
4130
+ // node_modules/.pnpm/@opentelemetry+core@1.30.1_@opentelemetry+api@1.9.0/node_modules/@opentelemetry/core/build/esm/trace/suppress-tracing.js
4131
+ init_esm();
4132
+ var SUPPRESS_TRACING_KEY = createContextKey("OpenTelemetry SDK Context Key SUPPRESS_TRACING");
4133
+ function suppressTracing(context2) {
4134
+ return context2.setValue(SUPPRESS_TRACING_KEY, true);
4135
+ }
4136
+
4137
+ // node_modules/.pnpm/@opentelemetry+core@1.30.1_@opentelemetry+api@1.9.0/node_modules/@opentelemetry/core/build/esm/common/logging-error-handler.js
4138
+ init_esm();
4139
+ function loggingErrorHandler() {
4140
+ return function(ex) {
4141
+ diag.error(stringifyException(ex));
4142
+ };
4143
+ }
4144
+ function stringifyException(ex) {
4145
+ if (typeof ex === "string") {
4146
+ return ex;
4147
+ } else {
4148
+ return JSON.stringify(flattenException(ex));
4149
+ }
4150
+ }
4151
+ function flattenException(ex) {
4152
+ var result = {};
4153
+ var current = ex;
4154
+ while (current !== null) {
4155
+ Object.getOwnPropertyNames(current).forEach(function(propertyName) {
4156
+ if (result[propertyName])
4157
+ return;
4158
+ var value = current[propertyName];
4159
+ if (value) {
4160
+ result[propertyName] = String(value);
4161
+ }
4162
+ });
4163
+ current = Object.getPrototypeOf(current);
4164
+ }
4165
+ return result;
4166
+ }
4167
+
4168
+ // node_modules/.pnpm/@opentelemetry+core@1.30.1_@opentelemetry+api@1.9.0/node_modules/@opentelemetry/core/build/esm/common/global-error-handler.js
4169
+ var delegateHandler = loggingErrorHandler();
4170
+ function globalErrorHandler(ex) {
4171
+ try {
4172
+ delegateHandler(ex);
4173
+ } catch (_a) {
4174
+ }
4175
+ }
4176
+
4177
+ // node_modules/.pnpm/@opentelemetry+core@1.30.1_@opentelemetry+api@1.9.0/node_modules/@opentelemetry/core/build/esm/ExportResult.js
4178
+ var ExportResultCode;
4179
+ (function(ExportResultCode2) {
4180
+ ExportResultCode2[ExportResultCode2["SUCCESS"] = 0] = "SUCCESS";
4181
+ ExportResultCode2[ExportResultCode2["FAILED"] = 1] = "FAILED";
4182
+ })(ExportResultCode || (ExportResultCode = {}));
4183
+
4184
+ // node_modules/.pnpm/@opentelemetry+core@1.30.1_@opentelemetry+api@1.9.0/node_modules/@opentelemetry/core/build/esm/utils/promise.js
4185
+ var Deferred = (
4186
+ /** @class */
4187
+ (function() {
4188
+ function Deferred2() {
4189
+ var _this = this;
4190
+ this._promise = new Promise(function(resolve, reject) {
4191
+ _this._resolve = resolve;
4192
+ _this._reject = reject;
4193
+ });
4194
+ }
4195
+ Object.defineProperty(Deferred2.prototype, "promise", {
4196
+ get: function() {
4197
+ return this._promise;
4198
+ },
4199
+ enumerable: false,
4200
+ configurable: true
4201
+ });
4202
+ Deferred2.prototype.resolve = function(val) {
4203
+ this._resolve(val);
4204
+ };
4205
+ Deferred2.prototype.reject = function(err) {
4206
+ this._reject(err);
4207
+ };
4208
+ return Deferred2;
4209
+ })()
4210
+ );
4211
+
4212
+ // node_modules/.pnpm/@opentelemetry+core@1.30.1_@opentelemetry+api@1.9.0/node_modules/@opentelemetry/core/build/esm/utils/callback.js
4213
+ var __read5 = function(o, n) {
4214
+ var m = typeof Symbol === "function" && o[Symbol.iterator];
4215
+ if (!m) return o;
4216
+ var i = m.call(o), r, ar = [], e;
4217
+ try {
4218
+ while ((n === void 0 || n-- > 0) && !(r = i.next()).done) ar.push(r.value);
4219
+ } catch (error) {
4220
+ e = { error };
4221
+ } finally {
4222
+ try {
4223
+ if (r && !r.done && (m = i["return"])) m.call(i);
4224
+ } finally {
4225
+ if (e) throw e.error;
4226
+ }
4227
+ }
4228
+ return ar;
4229
+ };
4230
+ var __spreadArray5 = function(to, from, pack) {
4231
+ if (pack || arguments.length === 2) for (var i = 0, l = from.length, ar; i < l; i++) {
4232
+ if (ar || !(i in from)) {
4233
+ if (!ar) ar = Array.prototype.slice.call(from, 0, i);
4234
+ ar[i] = from[i];
4235
+ }
4236
+ }
4237
+ return to.concat(ar || Array.prototype.slice.call(from));
4238
+ };
4239
+ var BindOnceFuture = (
4240
+ /** @class */
4241
+ (function() {
4242
+ function BindOnceFuture2(_callback, _that) {
4243
+ this._callback = _callback;
4244
+ this._that = _that;
4245
+ this._isCalled = false;
4246
+ this._deferred = new Deferred();
4247
+ }
4248
+ Object.defineProperty(BindOnceFuture2.prototype, "isCalled", {
4249
+ get: function() {
4250
+ return this._isCalled;
4251
+ },
4252
+ enumerable: false,
4253
+ configurable: true
4254
+ });
4255
+ Object.defineProperty(BindOnceFuture2.prototype, "promise", {
4256
+ get: function() {
4257
+ return this._deferred.promise;
4258
+ },
4259
+ enumerable: false,
4260
+ configurable: true
4261
+ });
4262
+ BindOnceFuture2.prototype.call = function() {
4263
+ var _a;
4264
+ var _this = this;
4265
+ var args = [];
4266
+ for (var _i = 0; _i < arguments.length; _i++) {
4267
+ args[_i] = arguments[_i];
4268
+ }
4269
+ if (!this._isCalled) {
4270
+ this._isCalled = true;
4271
+ try {
4272
+ Promise.resolve((_a = this._callback).call.apply(_a, __spreadArray5([this._that], __read5(args), false))).then(function(val) {
4273
+ return _this._deferred.resolve(val);
4274
+ }, function(err) {
4275
+ return _this._deferred.reject(err);
4276
+ });
4277
+ } catch (err) {
4278
+ this._deferred.reject(err);
4279
+ }
4280
+ }
4281
+ return this._deferred.promise;
4282
+ };
4283
+ return BindOnceFuture2;
4284
+ })()
4285
+ );
4286
+
4287
+ // node_modules/.pnpm/@opentelemetry+core@1.30.1_@opentelemetry+api@1.9.0/node_modules/@opentelemetry/core/build/esm/internal/exporter.js
4288
+ init_esm();
4289
+ function _export(exporter, arg) {
4290
+ return new Promise(function(resolve) {
4291
+ context.with(suppressTracing(context.active()), function() {
4292
+ exporter.export(arg, function(result) {
4293
+ resolve(result);
4294
+ });
4295
+ });
4296
+ });
4297
+ }
4298
+
4299
+ // node_modules/.pnpm/@opentelemetry+core@1.30.1_@opentelemetry+api@1.9.0/node_modules/@opentelemetry/core/build/esm/index.js
4300
+ var internal = {
4301
+ _export
4302
+ };
4303
+
4304
+ // node_modules/.pnpm/@opentelemetry+sdk-trace-base@1.30.1_@opentelemetry+api@1.9.0/node_modules/@opentelemetry/sdk-trace-base/build/esm/export/SimpleSpanProcessor.js
4305
+ init_esm();
4306
+ var __awaiter = function(thisArg, _arguments, P, generator) {
4307
+ function adopt(value) {
4308
+ return value instanceof P ? value : new P(function(resolve) {
4309
+ resolve(value);
4310
+ });
4311
+ }
4312
+ return new (P || (P = Promise))(function(resolve, reject) {
4313
+ function fulfilled(value) {
4314
+ try {
4315
+ step(generator.next(value));
4316
+ } catch (e) {
4317
+ reject(e);
4318
+ }
4319
+ }
4320
+ function rejected(value) {
4321
+ try {
4322
+ step(generator["throw"](value));
4323
+ } catch (e) {
4324
+ reject(e);
4325
+ }
4326
+ }
4327
+ function step(result) {
4328
+ result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected);
4329
+ }
4330
+ step((generator = generator.apply(thisArg, _arguments || [])).next());
4331
+ });
4332
+ };
4333
+ var __generator = function(thisArg, body) {
4334
+ var _ = { label: 0, sent: function() {
4335
+ if (t[0] & 1) throw t[1];
4336
+ return t[1];
4337
+ }, trys: [], ops: [] }, f, y, t, g;
4338
+ return g = { next: verb(0), "throw": verb(1), "return": verb(2) }, typeof Symbol === "function" && (g[Symbol.iterator] = function() {
4339
+ return this;
4340
+ }), g;
4341
+ function verb(n) {
4342
+ return function(v) {
4343
+ return step([n, v]);
4344
+ };
4345
+ }
4346
+ function step(op) {
4347
+ if (f) throw new TypeError("Generator is already executing.");
4348
+ while (_) try {
4349
+ if (f = 1, y && (t = op[0] & 2 ? y["return"] : op[0] ? y["throw"] || ((t = y["return"]) && t.call(y), 0) : y.next) && !(t = t.call(y, op[1])).done) return t;
4350
+ if (y = 0, t) op = [op[0] & 2, t.value];
4351
+ switch (op[0]) {
4352
+ case 0:
4353
+ case 1:
4354
+ t = op;
4355
+ break;
4356
+ case 4:
4357
+ _.label++;
4358
+ return { value: op[1], done: false };
4359
+ case 5:
4360
+ _.label++;
4361
+ y = op[1];
4362
+ op = [0];
4363
+ continue;
4364
+ case 7:
4365
+ op = _.ops.pop();
4366
+ _.trys.pop();
4367
+ continue;
4368
+ default:
4369
+ if (!(t = _.trys, t = t.length > 0 && t[t.length - 1]) && (op[0] === 6 || op[0] === 2)) {
4370
+ _ = 0;
4371
+ continue;
4372
+ }
4373
+ if (op[0] === 3 && (!t || op[1] > t[0] && op[1] < t[3])) {
4374
+ _.label = op[1];
4375
+ break;
4376
+ }
4377
+ if (op[0] === 6 && _.label < t[1]) {
4378
+ _.label = t[1];
4379
+ t = op;
4380
+ break;
4381
+ }
4382
+ if (t && _.label < t[2]) {
4383
+ _.label = t[2];
4384
+ _.ops.push(op);
4385
+ break;
4386
+ }
4387
+ if (t[2]) _.ops.pop();
4388
+ _.trys.pop();
4389
+ continue;
4390
+ }
4391
+ op = body.call(thisArg, _);
4392
+ } catch (e) {
4393
+ op = [6, e];
4394
+ y = 0;
4395
+ } finally {
4396
+ f = t = 0;
4397
+ }
4398
+ if (op[0] & 5) throw op[1];
4399
+ return { value: op[0] ? op[1] : void 0, done: true };
4400
+ }
4401
+ };
4402
+ var SimpleSpanProcessor = (
4403
+ /** @class */
4404
+ (function() {
4405
+ function SimpleSpanProcessor2(_exporter) {
4406
+ this._exporter = _exporter;
4407
+ this._shutdownOnce = new BindOnceFuture(this._shutdown, this);
4408
+ this._unresolvedExports = /* @__PURE__ */ new Set();
4409
+ }
4410
+ SimpleSpanProcessor2.prototype.forceFlush = function() {
4411
+ return __awaiter(this, void 0, void 0, function() {
4412
+ return __generator(this, function(_a) {
4413
+ switch (_a.label) {
4414
+ case 0:
4415
+ return [4, Promise.all(Array.from(this._unresolvedExports))];
4416
+ case 1:
4417
+ _a.sent();
4418
+ if (!this._exporter.forceFlush) return [3, 3];
4419
+ return [4, this._exporter.forceFlush()];
4420
+ case 2:
4421
+ _a.sent();
4422
+ _a.label = 3;
4423
+ case 3:
4424
+ return [
4425
+ 2
4426
+ /*return*/
4427
+ ];
4428
+ }
4429
+ });
4430
+ });
4431
+ };
4432
+ SimpleSpanProcessor2.prototype.onStart = function(_span, _parentContext) {
4433
+ };
4434
+ SimpleSpanProcessor2.prototype.onEnd = function(span) {
4435
+ var _this = this;
4436
+ var _a, _b;
4437
+ if (this._shutdownOnce.isCalled) {
4438
+ return;
4439
+ }
4440
+ if ((span.spanContext().traceFlags & TraceFlags.SAMPLED) === 0) {
4441
+ return;
4442
+ }
4443
+ var doExport = function() {
4444
+ return internal._export(_this._exporter, [span]).then(function(result) {
4445
+ var _a2;
4446
+ if (result.code !== ExportResultCode.SUCCESS) {
4447
+ globalErrorHandler((_a2 = result.error) !== null && _a2 !== void 0 ? _a2 : new Error("SimpleSpanProcessor: span export failed (status " + result + ")"));
4448
+ }
4449
+ }).catch(function(error) {
4450
+ globalErrorHandler(error);
4451
+ });
4452
+ };
4453
+ if (span.resource.asyncAttributesPending) {
4454
+ var exportPromise_1 = (_b = (_a = span.resource).waitForAsyncAttributes) === null || _b === void 0 ? void 0 : _b.call(_a).then(function() {
4455
+ if (exportPromise_1 != null) {
4456
+ _this._unresolvedExports.delete(exportPromise_1);
4457
+ }
4458
+ return doExport();
4459
+ }, function(err) {
4460
+ return globalErrorHandler(err);
4461
+ });
4462
+ if (exportPromise_1 != null) {
4463
+ this._unresolvedExports.add(exportPromise_1);
4464
+ }
4465
+ } else {
4466
+ void doExport();
4467
+ }
4468
+ };
4469
+ SimpleSpanProcessor2.prototype.shutdown = function() {
4470
+ return this._shutdownOnce.call();
4471
+ };
4472
+ SimpleSpanProcessor2.prototype._shutdown = function() {
4473
+ return this._exporter.shutdown();
4474
+ };
4475
+ return SimpleSpanProcessor2;
4476
+ })()
4477
+ );
4478
+
4479
+ // src/tracing/setup.ts
4480
+ var initialized = false;
4481
+ function getConcreteProvider(provider) {
4482
+ if (!provider || typeof provider !== "object") return void 0;
4483
+ if (typeof provider.addSpanProcessor === "function") {
4484
+ return provider;
4485
+ }
4486
+ const p = provider;
4487
+ const delegate = typeof p.getDelegate === "function" ? p.getDelegate() : p.delegate ?? p._delegate;
4488
+ if (delegate && typeof delegate === "object") {
4489
+ if (typeof delegate.addSpanProcessor === "function") {
4490
+ return delegate;
4491
+ }
4492
+ }
4493
+ return void 0;
4494
+ }
4495
+ function setupScenarioTracing(options) {
4496
+ if (initialized) return;
4497
+ const globalProvider = trace.getTracerProvider();
4498
+ const concrete = getConcreteProvider(globalProvider);
4499
+ if (concrete) {
4500
+ attachToExistingProvider(concrete, options);
4501
+ } else {
4502
+ initializeFullSetup(options);
4503
+ }
4504
+ initialized = true;
4505
+ }
4506
+ function ensureTracingInitialized(options) {
4507
+ if (initialized) return;
4508
+ setupScenarioTracing(options);
4509
+ }
4510
+ function attachToExistingProvider(provider, options) {
4511
+ provider.addSpanProcessor(judgeSpanCollector);
4512
+ if (options == null ? void 0 : options.spanProcessors) {
4513
+ for (const processor of options.spanProcessors) {
4514
+ provider.addSpanProcessor(processor);
4515
+ }
4516
+ }
4517
+ if (options == null ? void 0 : options.traceExporter) {
4518
+ provider.addSpanProcessor(new SimpleSpanProcessor(options.traceExporter));
4519
+ }
4520
+ const envConfig = getEnv();
4521
+ if (envConfig.LANGWATCH_API_KEY) {
4522
+ const exporter = new LangWatchTraceExporter({
4523
+ apiKey: envConfig.LANGWATCH_API_KEY,
4524
+ endpoint: envConfig.LANGWATCH_ENDPOINT
4525
+ });
4526
+ provider.addSpanProcessor(new SimpleSpanProcessor(exporter));
4527
+ }
4528
+ }
4529
+ function initializeFullSetup(options) {
4530
+ const envConfig = getEnv();
4531
+ const spanProcessors = [judgeSpanCollector];
4532
+ if (options == null ? void 0 : options.spanProcessors) {
4533
+ spanProcessors.push(...options.spanProcessors);
4534
+ }
4535
+ setupObservability({
4536
+ ...options,
4537
+ langwatch: (options == null ? void 0 : options.langwatch) ?? {
4538
+ apiKey: envConfig.LANGWATCH_API_KEY,
4539
+ endpoint: envConfig.LANGWATCH_ENDPOINT
4540
+ },
4541
+ spanProcessors
4542
+ });
4543
+ }
4544
+
3891
4545
  // src/runner/run.ts
3892
- async function run(cfg) {
4546
+ async function run(cfg, options) {
4547
+ var _a, _b;
3893
4548
  if (!cfg.name) {
3894
4549
  throw new Error("Scenario name is required");
3895
4550
  }
@@ -3914,14 +4569,17 @@ async function run(cfg) {
3914
4569
  cfg.threadId = generateThreadId();
3915
4570
  }
3916
4571
  const steps = cfg.script || [proceed()];
3917
- const execution = new ScenarioExecution(cfg, steps);
4572
+ const batchRunId2 = (options == null ? void 0 : options.batchRunId) ?? getBatchRunId();
4573
+ const execution = new ScenarioExecution(cfg, steps, batchRunId2);
3918
4574
  let eventBus = null;
3919
4575
  let subscription = null;
3920
4576
  try {
3921
- const envConfig2 = getEnv();
4577
+ const projectConfig = await getProjectConfig();
4578
+ ensureTracingInitialized(projectConfig == null ? void 0 : projectConfig.observability);
4579
+ const envConfig = getEnv();
3922
4580
  eventBus = new EventBus({
3923
- endpoint: envConfig2.LANGWATCH_ENDPOINT,
3924
- apiKey: envConfig2.LANGWATCH_API_KEY
4581
+ endpoint: ((_a = options == null ? void 0 : options.langwatch) == null ? void 0 : _a.endpoint) ?? envConfig.LANGWATCH_ENDPOINT,
4582
+ apiKey: ((_b = options == null ? void 0 : options.langwatch) == null ? void 0 : _b.apiKey) ?? envConfig.LANGWATCH_API_KEY
3925
4583
  });
3926
4584
  eventBus.listen();
3927
4585
  subscription = eventBus.subscribeTo(execution.events$);
@@ -3938,6 +4596,9 @@ async function run(cfg) {
3938
4596
  } finally {
3939
4597
  await (eventBus == null ? void 0 : eventBus.drain());
3940
4598
  subscription == null ? void 0 : subscription.unsubscribe();
4599
+ if (cfg.threadId) {
4600
+ judgeSpanCollector.clearSpansForThread(cfg.threadId);
4601
+ }
3941
4602
  }
3942
4603
  }
3943
4604
  function formatMessage(m) {
@@ -3982,6 +4643,27 @@ function formatPart(part) {
3982
4643
  }
3983
4644
  }
3984
4645
 
4646
+ // src/tracing/filters.ts
4647
+ var scenarioOnly = [
4648
+ {
4649
+ include: {
4650
+ instrumentationScopeName: [{ equals: "@langwatch/scenario" }]
4651
+ }
4652
+ }
4653
+ ];
4654
+ function withCustomScopes(...scopes) {
4655
+ return [
4656
+ {
4657
+ include: {
4658
+ instrumentationScopeName: [
4659
+ { equals: "@langwatch/scenario" },
4660
+ ...scopes.map((scope) => ({ equals: scope }))
4661
+ ]
4662
+ }
4663
+ }
4664
+ ];
4665
+ }
4666
+
3985
4667
  // src/index.ts
3986
4668
  var scenario = {
3987
4669
  ...agents_exports,
@@ -4017,8 +4699,11 @@ export {
4017
4699
  proceed,
4018
4700
  run,
4019
4701
  scenario,
4702
+ scenarioOnly,
4020
4703
  scenarioProjectConfigSchema,
4704
+ setupScenarioTracing,
4021
4705
  succeed,
4022
4706
  user,
4023
- userSimulatorAgent
4707
+ userSimulatorAgent,
4708
+ withCustomScopes
4024
4709
  };