@langwatch/scenario 0.4.1 → 0.4.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -5,6 +5,9 @@ var __getOwnPropDesc = Object.getOwnPropertyDescriptor;
5
5
  var __getOwnPropNames = Object.getOwnPropertyNames;
6
6
  var __getProtoOf = Object.getPrototypeOf;
7
7
  var __hasOwnProp = Object.prototype.hasOwnProperty;
8
+ var __esm = (fn, res) => function __init() {
9
+ return fn && (res = (0, fn[__getOwnPropNames(fn)[0]])(fn = 0)), res;
10
+ };
8
11
  var __export = (target, all) => {
9
12
  for (var name in all)
10
13
  __defProp(target, name, { get: all[name], enumerable: true });
@@ -27,359 +30,976 @@ var __toESM = (mod, isNodeMode, target) => (target = mod != null ? __create(__ge
27
30
  ));
28
31
  var __toCommonJS = (mod) => __copyProps(__defProp({}, "__esModule", { value: true }), mod);
29
32
 
30
- // src/index.ts
31
- var index_exports = {};
32
- __export(index_exports, {
33
- AgentAdapter: () => AgentAdapter,
34
- AgentRole: () => AgentRole,
35
- DEFAULT_MAX_TURNS: () => DEFAULT_MAX_TURNS,
36
- DEFAULT_VERBOSE: () => DEFAULT_VERBOSE,
37
- JudgeAgentAdapter: () => JudgeAgentAdapter,
38
- JudgeSpanCollector: () => JudgeSpanCollector,
39
- JudgeSpanDigestFormatter: () => JudgeSpanDigestFormatter,
40
- RealtimeAgentAdapter: () => RealtimeAgentAdapter,
41
- ScenarioExecution: () => ScenarioExecution,
42
- ScenarioExecutionState: () => ScenarioExecutionState,
43
- StateChangeEventType: () => StateChangeEventType,
44
- UserSimulatorAgentAdapter: () => UserSimulatorAgentAdapter,
45
- agent: () => agent,
46
- allAgentRoles: () => allAgentRoles,
47
- default: () => index_default,
48
- defineConfig: () => defineConfig,
49
- fail: () => fail,
50
- judge: () => judge,
51
- judgeAgent: () => judgeAgent,
52
- judgeSpanCollector: () => judgeSpanCollector,
53
- judgeSpanDigestFormatter: () => judgeSpanDigestFormatter,
54
- message: () => message,
55
- proceed: () => proceed,
56
- run: () => run,
57
- scenario: () => scenario,
58
- scenarioProjectConfigSchema: () => scenarioProjectConfigSchema,
59
- succeed: () => succeed,
60
- user: () => user,
61
- userSimulatorAgent: () => userSimulatorAgent
33
+ // node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/platform/node/globalThis.js
34
+ var _globalThis;
35
+ var init_globalThis = __esm({
36
+ "node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/platform/node/globalThis.js"() {
37
+ "use strict";
38
+ _globalThis = typeof globalThis === "object" ? globalThis : global;
39
+ }
62
40
  });
63
- module.exports = __toCommonJS(index_exports);
64
41
 
65
- // src/tracing/setup.ts
66
- var import_node = require("langwatch/observability/node");
42
+ // node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/platform/node/index.js
43
+ var init_node = __esm({
44
+ "node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/platform/node/index.js"() {
45
+ "use strict";
46
+ init_globalThis();
47
+ }
48
+ });
67
49
 
68
- // src/agents/judge/judge-span-collector.ts
69
- var import_observability = require("langwatch/observability");
70
- var JudgeSpanCollector = class {
71
- spans = [];
72
- onStart() {
50
+ // node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/platform/index.js
51
+ var init_platform = __esm({
52
+ "node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/platform/index.js"() {
53
+ "use strict";
54
+ init_node();
73
55
  }
74
- onEnd(span) {
75
- this.spans.push(span);
56
+ });
57
+
58
+ // node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/version.js
59
+ var VERSION;
60
+ var init_version = __esm({
61
+ "node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/version.js"() {
62
+ "use strict";
63
+ VERSION = "1.9.0";
76
64
  }
77
- forceFlush() {
78
- return Promise.resolve();
65
+ });
66
+
67
+ // node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/internal/semver.js
68
+ function _makeCompatibilityCheck(ownVersion) {
69
+ var acceptedVersions = /* @__PURE__ */ new Set([ownVersion]);
70
+ var rejectedVersions = /* @__PURE__ */ new Set();
71
+ var myVersionMatch = ownVersion.match(re);
72
+ if (!myVersionMatch) {
73
+ return function() {
74
+ return false;
75
+ };
79
76
  }
80
- shutdown() {
81
- this.spans = [];
82
- return Promise.resolve();
77
+ var ownVersionParsed = {
78
+ major: +myVersionMatch[1],
79
+ minor: +myVersionMatch[2],
80
+ patch: +myVersionMatch[3],
81
+ prerelease: myVersionMatch[4]
82
+ };
83
+ if (ownVersionParsed.prerelease != null) {
84
+ return function isExactmatch(globalVersion) {
85
+ return globalVersion === ownVersion;
86
+ };
83
87
  }
84
- /**
85
- * Retrieves all spans associated with a specific thread.
86
- * @param threadId - The thread identifier to filter spans by
87
- * @returns Array of spans for the given thread
88
- */
89
- getSpansForThread(threadId) {
90
- const spanMap = /* @__PURE__ */ new Map();
91
- for (const span of this.spans) {
92
- spanMap.set(span.spanContext().spanId, span);
88
+ function _reject(v) {
89
+ rejectedVersions.add(v);
90
+ return false;
91
+ }
92
+ function _accept(v) {
93
+ acceptedVersions.add(v);
94
+ return true;
95
+ }
96
+ return function isCompatible2(globalVersion) {
97
+ if (acceptedVersions.has(globalVersion)) {
98
+ return true;
93
99
  }
94
- const belongsToThread = (span) => {
95
- var _a;
96
- if (span.attributes[import_observability.attributes.ATTR_LANGWATCH_THREAD_ID] === threadId) {
97
- return true;
98
- }
99
- const parentId = (_a = span.parentSpanContext) == null ? void 0 : _a.spanId;
100
- if (parentId && spanMap.has(parentId)) {
101
- return belongsToThread(spanMap.get(parentId));
102
- }
100
+ if (rejectedVersions.has(globalVersion)) {
103
101
  return false;
102
+ }
103
+ var globalVersionMatch = globalVersion.match(re);
104
+ if (!globalVersionMatch) {
105
+ return _reject(globalVersion);
106
+ }
107
+ var globalVersionParsed = {
108
+ major: +globalVersionMatch[1],
109
+ minor: +globalVersionMatch[2],
110
+ patch: +globalVersionMatch[3],
111
+ prerelease: globalVersionMatch[4]
104
112
  };
105
- return this.spans.filter(belongsToThread);
113
+ if (globalVersionParsed.prerelease != null) {
114
+ return _reject(globalVersion);
115
+ }
116
+ if (ownVersionParsed.major !== globalVersionParsed.major) {
117
+ return _reject(globalVersion);
118
+ }
119
+ if (ownVersionParsed.major === 0) {
120
+ if (ownVersionParsed.minor === globalVersionParsed.minor && ownVersionParsed.patch <= globalVersionParsed.patch) {
121
+ return _accept(globalVersion);
122
+ }
123
+ return _reject(globalVersion);
124
+ }
125
+ if (ownVersionParsed.minor <= globalVersionParsed.minor) {
126
+ return _accept(globalVersion);
127
+ }
128
+ return _reject(globalVersion);
129
+ };
130
+ }
131
+ var re, isCompatible;
132
+ var init_semver = __esm({
133
+ "node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/internal/semver.js"() {
134
+ "use strict";
135
+ init_version();
136
+ re = /^(\d+)\.(\d+)\.(\d+)(-(.+))?$/;
137
+ isCompatible = _makeCompatibilityCheck(VERSION);
106
138
  }
107
- };
108
- var judgeSpanCollector = new JudgeSpanCollector();
109
-
110
- // src/config/env.ts
111
- var import_v4 = require("zod/v4");
112
-
113
- // src/config/log-levels.ts
114
- var LogLevel = /* @__PURE__ */ ((LogLevel2) => {
115
- LogLevel2["ERROR"] = "ERROR";
116
- LogLevel2["WARN"] = "WARN";
117
- LogLevel2["INFO"] = "INFO";
118
- LogLevel2["DEBUG"] = "DEBUG";
119
- return LogLevel2;
120
- })(LogLevel || {});
121
- var LOG_LEVELS = Object.values(LogLevel);
139
+ });
122
140
 
123
- // src/config/env.ts
124
- var envSchema = import_v4.z.object({
125
- /**
126
- * LangWatch API key for event reporting.
127
- * If not provided, events will not be sent to LangWatch.
128
- */
129
- LANGWATCH_API_KEY: import_v4.z.string().optional(),
130
- /**
131
- * LangWatch endpoint URL for event reporting.
132
- * Defaults to the production LangWatch endpoint.
133
- */
134
- LANGWATCH_ENDPOINT: import_v4.z.string().url().optional().default("https://app.langwatch.ai"),
135
- /**
136
- * Disables simulation report info messages when set to any truthy value.
137
- * Useful for CI/CD environments or when you want cleaner output.
138
- */
139
- SCENARIO_DISABLE_SIMULATION_REPORT_INFO: import_v4.z.string().optional().transform((val) => Boolean(val)),
140
- /**
141
- * Node environment - affects logging and behavior.
142
- * Defaults to 'development' if not specified.
143
- */
144
- NODE_ENV: import_v4.z.enum(["development", "production", "test"]).default("development"),
145
- /**
146
- * Case-insensitive log level for the scenario package.
147
- * Defaults to 'info' if not specified.
148
- */
149
- LOG_LEVEL: import_v4.z.string().toUpperCase().pipe(import_v4.z.nativeEnum(LogLevel)).optional().default("INFO" /* INFO */),
150
- /**
151
- * Scenario batch run ID.
152
- * If not provided, a random ID will be generated.
153
- */
154
- SCENARIO_BATCH_RUN_ID: import_v4.z.string().optional()
141
+ // node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/internal/global-utils.js
142
+ function registerGlobal(type, instance, diag2, allowOverride) {
143
+ var _a;
144
+ if (allowOverride === void 0) {
145
+ allowOverride = false;
146
+ }
147
+ var api = _global[GLOBAL_OPENTELEMETRY_API_KEY] = (_a = _global[GLOBAL_OPENTELEMETRY_API_KEY]) !== null && _a !== void 0 ? _a : {
148
+ version: VERSION
149
+ };
150
+ if (!allowOverride && api[type]) {
151
+ var err = new Error("@opentelemetry/api: Attempted duplicate registration of API: " + type);
152
+ diag2.error(err.stack || err.message);
153
+ return false;
154
+ }
155
+ if (api.version !== VERSION) {
156
+ var err = new Error("@opentelemetry/api: Registration of version v" + api.version + " for " + type + " does not match previously registered API v" + VERSION);
157
+ diag2.error(err.stack || err.message);
158
+ return false;
159
+ }
160
+ api[type] = instance;
161
+ diag2.debug("@opentelemetry/api: Registered a global for " + type + " v" + VERSION + ".");
162
+ return true;
163
+ }
164
+ function getGlobal(type) {
165
+ var _a, _b;
166
+ var globalVersion = (_a = _global[GLOBAL_OPENTELEMETRY_API_KEY]) === null || _a === void 0 ? void 0 : _a.version;
167
+ if (!globalVersion || !isCompatible(globalVersion)) {
168
+ return;
169
+ }
170
+ return (_b = _global[GLOBAL_OPENTELEMETRY_API_KEY]) === null || _b === void 0 ? void 0 : _b[type];
171
+ }
172
+ function unregisterGlobal(type, diag2) {
173
+ diag2.debug("@opentelemetry/api: Unregistering a global for " + type + " v" + VERSION + ".");
174
+ var api = _global[GLOBAL_OPENTELEMETRY_API_KEY];
175
+ if (api) {
176
+ delete api[type];
177
+ }
178
+ }
179
+ var major, GLOBAL_OPENTELEMETRY_API_KEY, _global;
180
+ var init_global_utils = __esm({
181
+ "node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/internal/global-utils.js"() {
182
+ "use strict";
183
+ init_platform();
184
+ init_version();
185
+ init_semver();
186
+ major = VERSION.split(".")[0];
187
+ GLOBAL_OPENTELEMETRY_API_KEY = Symbol.for("opentelemetry.js.api." + major);
188
+ _global = _globalThis;
189
+ }
155
190
  });
156
- function getEnv() {
157
- return envSchema.parse(process.env);
191
+
192
+ // node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/diag/ComponentLogger.js
193
+ function logProxy(funcName, namespace, args) {
194
+ var logger2 = getGlobal("diag");
195
+ if (!logger2) {
196
+ return;
197
+ }
198
+ args.unshift(namespace);
199
+ return logger2[funcName].apply(logger2, __spreadArray([], __read(args), false));
158
200
  }
201
+ var __read, __spreadArray, DiagComponentLogger;
202
+ var init_ComponentLogger = __esm({
203
+ "node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/diag/ComponentLogger.js"() {
204
+ "use strict";
205
+ init_global_utils();
206
+ __read = function(o, n) {
207
+ var m = typeof Symbol === "function" && o[Symbol.iterator];
208
+ if (!m) return o;
209
+ var i = m.call(o), r, ar = [], e;
210
+ try {
211
+ while ((n === void 0 || n-- > 0) && !(r = i.next()).done) ar.push(r.value);
212
+ } catch (error) {
213
+ e = { error };
214
+ } finally {
215
+ try {
216
+ if (r && !r.done && (m = i["return"])) m.call(i);
217
+ } finally {
218
+ if (e) throw e.error;
219
+ }
220
+ }
221
+ return ar;
222
+ };
223
+ __spreadArray = function(to, from, pack) {
224
+ if (pack || arguments.length === 2) for (var i = 0, l = from.length, ar; i < l; i++) {
225
+ if (ar || !(i in from)) {
226
+ if (!ar) ar = Array.prototype.slice.call(from, 0, i);
227
+ ar[i] = from[i];
228
+ }
229
+ }
230
+ return to.concat(ar || Array.prototype.slice.call(from));
231
+ };
232
+ DiagComponentLogger = /** @class */
233
+ (function() {
234
+ function DiagComponentLogger2(props) {
235
+ this._namespace = props.namespace || "DiagComponentLogger";
236
+ }
237
+ DiagComponentLogger2.prototype.debug = function() {
238
+ var args = [];
239
+ for (var _i = 0; _i < arguments.length; _i++) {
240
+ args[_i] = arguments[_i];
241
+ }
242
+ return logProxy("debug", this._namespace, args);
243
+ };
244
+ DiagComponentLogger2.prototype.error = function() {
245
+ var args = [];
246
+ for (var _i = 0; _i < arguments.length; _i++) {
247
+ args[_i] = arguments[_i];
248
+ }
249
+ return logProxy("error", this._namespace, args);
250
+ };
251
+ DiagComponentLogger2.prototype.info = function() {
252
+ var args = [];
253
+ for (var _i = 0; _i < arguments.length; _i++) {
254
+ args[_i] = arguments[_i];
255
+ }
256
+ return logProxy("info", this._namespace, args);
257
+ };
258
+ DiagComponentLogger2.prototype.warn = function() {
259
+ var args = [];
260
+ for (var _i = 0; _i < arguments.length; _i++) {
261
+ args[_i] = arguments[_i];
262
+ }
263
+ return logProxy("warn", this._namespace, args);
264
+ };
265
+ DiagComponentLogger2.prototype.verbose = function() {
266
+ var args = [];
267
+ for (var _i = 0; _i < arguments.length; _i++) {
268
+ args[_i] = arguments[_i];
269
+ }
270
+ return logProxy("verbose", this._namespace, args);
271
+ };
272
+ return DiagComponentLogger2;
273
+ })();
274
+ }
275
+ });
159
276
 
160
- // src/config/load.ts
161
- var import_promises = __toESM(require("fs/promises"));
162
- var import_node_path = __toESM(require("path"));
163
- var import_node_url = require("url");
277
+ // node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/diag/types.js
278
+ var DiagLogLevel;
279
+ var init_types = __esm({
280
+ "node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/diag/types.js"() {
281
+ "use strict";
282
+ (function(DiagLogLevel2) {
283
+ DiagLogLevel2[DiagLogLevel2["NONE"] = 0] = "NONE";
284
+ DiagLogLevel2[DiagLogLevel2["ERROR"] = 30] = "ERROR";
285
+ DiagLogLevel2[DiagLogLevel2["WARN"] = 50] = "WARN";
286
+ DiagLogLevel2[DiagLogLevel2["INFO"] = 60] = "INFO";
287
+ DiagLogLevel2[DiagLogLevel2["DEBUG"] = 70] = "DEBUG";
288
+ DiagLogLevel2[DiagLogLevel2["VERBOSE"] = 80] = "VERBOSE";
289
+ DiagLogLevel2[DiagLogLevel2["ALL"] = 9999] = "ALL";
290
+ })(DiagLogLevel || (DiagLogLevel = {}));
291
+ }
292
+ });
164
293
 
165
- // src/domain/index.ts
166
- var domain_exports = {};
167
- __export(domain_exports, {
168
- AgentAdapter: () => AgentAdapter,
169
- AgentRole: () => AgentRole,
170
- DEFAULT_MAX_TURNS: () => DEFAULT_MAX_TURNS,
171
- DEFAULT_VERBOSE: () => DEFAULT_VERBOSE,
172
- JudgeAgentAdapter: () => JudgeAgentAdapter,
173
- UserSimulatorAgentAdapter: () => UserSimulatorAgentAdapter,
174
- allAgentRoles: () => allAgentRoles,
175
- defineConfig: () => defineConfig,
176
- scenarioProjectConfigSchema: () => scenarioProjectConfigSchema
294
+ // node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/diag/internal/logLevelLogger.js
295
+ function createLogLevelDiagLogger(maxLevel, logger2) {
296
+ if (maxLevel < DiagLogLevel.NONE) {
297
+ maxLevel = DiagLogLevel.NONE;
298
+ } else if (maxLevel > DiagLogLevel.ALL) {
299
+ maxLevel = DiagLogLevel.ALL;
300
+ }
301
+ logger2 = logger2 || {};
302
+ function _filterFunc(funcName, theLevel) {
303
+ var theFunc = logger2[funcName];
304
+ if (typeof theFunc === "function" && maxLevel >= theLevel) {
305
+ return theFunc.bind(logger2);
306
+ }
307
+ return function() {
308
+ };
309
+ }
310
+ return {
311
+ error: _filterFunc("error", DiagLogLevel.ERROR),
312
+ warn: _filterFunc("warn", DiagLogLevel.WARN),
313
+ info: _filterFunc("info", DiagLogLevel.INFO),
314
+ debug: _filterFunc("debug", DiagLogLevel.DEBUG),
315
+ verbose: _filterFunc("verbose", DiagLogLevel.VERBOSE)
316
+ };
317
+ }
318
+ var init_logLevelLogger = __esm({
319
+ "node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/diag/internal/logLevelLogger.js"() {
320
+ "use strict";
321
+ init_types();
322
+ }
177
323
  });
178
324
 
179
- // src/domain/core/config.ts
180
- var import_v43 = require("zod/v4");
181
-
182
- // src/domain/core/schemas/model.schema.ts
183
- var import_v42 = require("zod/v4");
184
-
185
- // src/domain/core/constants.ts
186
- var DEFAULT_TEMPERATURE = 0;
187
-
188
- // src/domain/core/schemas/model.schema.ts
189
- var modelSchema = import_v42.z.object({
190
- model: import_v42.z.custom((val) => Boolean(val), {
191
- message: "A model is required. Configure it in scenario.config.js defaultModel or pass directly to the agent."
192
- }).describe("Language model that is used by the AI SDK Core functions."),
193
- temperature: import_v42.z.number().min(0).max(1).optional().describe("The temperature for the language model.").default(DEFAULT_TEMPERATURE),
194
- maxTokens: import_v42.z.number().optional().describe("The maximum number of tokens to generate.")
325
+ // node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/api/diag.js
326
+ var __read2, __spreadArray2, API_NAME, DiagAPI;
327
+ var init_diag = __esm({
328
+ "node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/api/diag.js"() {
329
+ "use strict";
330
+ init_ComponentLogger();
331
+ init_logLevelLogger();
332
+ init_types();
333
+ init_global_utils();
334
+ __read2 = function(o, n) {
335
+ var m = typeof Symbol === "function" && o[Symbol.iterator];
336
+ if (!m) return o;
337
+ var i = m.call(o), r, ar = [], e;
338
+ try {
339
+ while ((n === void 0 || n-- > 0) && !(r = i.next()).done) ar.push(r.value);
340
+ } catch (error) {
341
+ e = { error };
342
+ } finally {
343
+ try {
344
+ if (r && !r.done && (m = i["return"])) m.call(i);
345
+ } finally {
346
+ if (e) throw e.error;
347
+ }
348
+ }
349
+ return ar;
350
+ };
351
+ __spreadArray2 = function(to, from, pack) {
352
+ if (pack || arguments.length === 2) for (var i = 0, l = from.length, ar; i < l; i++) {
353
+ if (ar || !(i in from)) {
354
+ if (!ar) ar = Array.prototype.slice.call(from, 0, i);
355
+ ar[i] = from[i];
356
+ }
357
+ }
358
+ return to.concat(ar || Array.prototype.slice.call(from));
359
+ };
360
+ API_NAME = "diag";
361
+ DiagAPI = /** @class */
362
+ (function() {
363
+ function DiagAPI2() {
364
+ function _logProxy(funcName) {
365
+ return function() {
366
+ var args = [];
367
+ for (var _i = 0; _i < arguments.length; _i++) {
368
+ args[_i] = arguments[_i];
369
+ }
370
+ var logger2 = getGlobal("diag");
371
+ if (!logger2)
372
+ return;
373
+ return logger2[funcName].apply(logger2, __spreadArray2([], __read2(args), false));
374
+ };
375
+ }
376
+ var self = this;
377
+ var setLogger = function(logger2, optionsOrLogLevel) {
378
+ var _a, _b, _c;
379
+ if (optionsOrLogLevel === void 0) {
380
+ optionsOrLogLevel = { logLevel: DiagLogLevel.INFO };
381
+ }
382
+ if (logger2 === self) {
383
+ var err = new Error("Cannot use diag as the logger for itself. Please use a DiagLogger implementation like ConsoleDiagLogger or a custom implementation");
384
+ self.error((_a = err.stack) !== null && _a !== void 0 ? _a : err.message);
385
+ return false;
386
+ }
387
+ if (typeof optionsOrLogLevel === "number") {
388
+ optionsOrLogLevel = {
389
+ logLevel: optionsOrLogLevel
390
+ };
391
+ }
392
+ var oldLogger = getGlobal("diag");
393
+ var newLogger = createLogLevelDiagLogger((_b = optionsOrLogLevel.logLevel) !== null && _b !== void 0 ? _b : DiagLogLevel.INFO, logger2);
394
+ if (oldLogger && !optionsOrLogLevel.suppressOverrideMessage) {
395
+ var stack = (_c = new Error().stack) !== null && _c !== void 0 ? _c : "<failed to generate stacktrace>";
396
+ oldLogger.warn("Current logger will be overwritten from " + stack);
397
+ newLogger.warn("Current logger will overwrite one already registered from " + stack);
398
+ }
399
+ return registerGlobal("diag", newLogger, self, true);
400
+ };
401
+ self.setLogger = setLogger;
402
+ self.disable = function() {
403
+ unregisterGlobal(API_NAME, self);
404
+ };
405
+ self.createComponentLogger = function(options) {
406
+ return new DiagComponentLogger(options);
407
+ };
408
+ self.verbose = _logProxy("verbose");
409
+ self.debug = _logProxy("debug");
410
+ self.info = _logProxy("info");
411
+ self.warn = _logProxy("warn");
412
+ self.error = _logProxy("error");
413
+ }
414
+ DiagAPI2.instance = function() {
415
+ if (!this._instance) {
416
+ this._instance = new DiagAPI2();
417
+ }
418
+ return this._instance;
419
+ };
420
+ return DiagAPI2;
421
+ })();
422
+ }
195
423
  });
196
424
 
197
- // src/domain/core/config.ts
198
- var headless = typeof process !== "undefined" ? process.env.SCENARIO_HEADLESS === "true" : false;
199
- var scenarioProjectConfigSchema = import_v43.z.object({
200
- defaultModel: modelSchema.optional(),
201
- headless: import_v43.z.boolean().optional().default(headless)
202
- }).strict();
203
- function defineConfig(config2) {
204
- return config2;
425
+ // node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/context/context.js
426
+ function createContextKey(description) {
427
+ return Symbol.for(description);
205
428
  }
206
-
207
- // src/domain/agents/index.ts
208
- var AgentRole = /* @__PURE__ */ ((AgentRole2) => {
209
- AgentRole2["USER"] = "User";
210
- AgentRole2["AGENT"] = "Agent";
211
- AgentRole2["JUDGE"] = "Judge";
212
- return AgentRole2;
213
- })(AgentRole || {});
214
- var allAgentRoles = [
215
- "User" /* USER */,
216
- "Agent" /* AGENT */,
217
- "Judge" /* JUDGE */
218
- ];
219
- var AgentAdapter = class {
220
- name;
221
- role = "Agent" /* AGENT */;
222
- };
223
- var UserSimulatorAgentAdapter = class extends AgentAdapter {
224
- name = "UserSimulatorAgent";
225
- role = "User" /* USER */;
226
- };
227
- var JudgeAgentAdapter = class extends AgentAdapter {
228
- name = "JudgeAgent";
229
- role = "Judge" /* JUDGE */;
230
- };
231
-
232
- // src/domain/scenarios/index.ts
233
- var DEFAULT_MAX_TURNS = 10;
234
- var DEFAULT_VERBOSE = false;
235
-
236
- // src/config/load.ts
237
- async function loadScenarioProjectConfig() {
238
- const cwd = process.cwd();
239
- const configNames = [
240
- "scenario.config.js",
241
- "scenario.config.mjs"
242
- ];
243
- for (const name of configNames) {
244
- const fullPath = import_node_path.default.join(cwd, name);
245
- try {
246
- await import_promises.default.access(fullPath);
247
- const configModule = await import((0, import_node_url.pathToFileURL)(fullPath).href);
248
- const config2 = configModule.default || configModule;
249
- const parsed = scenarioProjectConfigSchema.safeParse(config2);
250
- if (!parsed.success) {
251
- throw new Error(
252
- `Invalid config file ${name}: ${JSON.stringify(parsed.error.format(), null, 2)}`
253
- );
254
- }
255
- return parsed.data;
256
- } catch (error) {
257
- if (error instanceof Error && "code" in error && error.code === "ENOENT") {
258
- continue;
429
+ var BaseContext, ROOT_CONTEXT;
430
+ var init_context = __esm({
431
+ "node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/context/context.js"() {
432
+ "use strict";
433
+ BaseContext = /** @class */
434
+ /* @__PURE__ */ (function() {
435
+ function BaseContext2(parentContext) {
436
+ var self = this;
437
+ self._currentContext = parentContext ? new Map(parentContext) : /* @__PURE__ */ new Map();
438
+ self.getValue = function(key) {
439
+ return self._currentContext.get(key);
440
+ };
441
+ self.setValue = function(key, value) {
442
+ var context2 = new BaseContext2(self._currentContext);
443
+ context2._currentContext.set(key, value);
444
+ return context2;
445
+ };
446
+ self.deleteValue = function(key) {
447
+ var context2 = new BaseContext2(self._currentContext);
448
+ context2._currentContext.delete(key);
449
+ return context2;
450
+ };
259
451
  }
260
- throw error;
261
- }
452
+ return BaseContext2;
453
+ })();
454
+ ROOT_CONTEXT = new BaseContext();
262
455
  }
263
- return await scenarioProjectConfigSchema.parseAsync({});
264
- }
456
+ });
265
457
 
266
- // src/utils/logger.ts
267
- var Logger = class _Logger {
268
- constructor(context2) {
269
- this.context = context2;
270
- }
271
- /**
272
- * Creates a logger with context (e.g., class name)
273
- */
274
- static create(context2) {
275
- return new _Logger(context2);
276
- }
277
- /**
278
- * Returns the current log level from environment.
279
- * Uses a getter for clarity and idiomatic usage.
280
- */
281
- get LOG_LEVEL() {
282
- return getEnv().LOG_LEVEL;
283
- }
284
- /**
285
- * Returns the index of the given log level in the LOG_LEVELS array.
286
- * @param level - The log level to get the index for.
287
- * @returns The index of the log level in the LOG_LEVELS array.
288
- */
289
- getLogLevelIndexFor(level) {
290
- return LOG_LEVELS.indexOf(level);
291
- }
292
- /**
293
- * Checks if logging should occur based on LOG_LEVEL env var
294
- */
295
- shouldLog(level) {
296
- const currentLevelIndex = this.getLogLevelIndexFor(this.LOG_LEVEL);
297
- const requestedLevelIndex = this.getLogLevelIndexFor(level);
298
- return currentLevelIndex >= 0 && requestedLevelIndex <= currentLevelIndex;
299
- }
300
- formatMessage(message2) {
301
- return this.context ? `[${this.context}] ${message2}` : message2;
302
- }
303
- error(message2, data) {
304
- if (this.shouldLog("ERROR" /* ERROR */)) {
305
- const formattedMessage = this.formatMessage(message2);
306
- if (data) {
307
- console.error(formattedMessage, data);
308
- } else {
309
- console.error(formattedMessage);
458
+ // node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/context/NoopContextManager.js
459
+ var __read3, __spreadArray3, NoopContextManager;
460
+ var init_NoopContextManager = __esm({
461
+ "node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/context/NoopContextManager.js"() {
462
+ "use strict";
463
+ init_context();
464
+ __read3 = function(o, n) {
465
+ var m = typeof Symbol === "function" && o[Symbol.iterator];
466
+ if (!m) return o;
467
+ var i = m.call(o), r, ar = [], e;
468
+ try {
469
+ while ((n === void 0 || n-- > 0) && !(r = i.next()).done) ar.push(r.value);
470
+ } catch (error) {
471
+ e = { error };
472
+ } finally {
473
+ try {
474
+ if (r && !r.done && (m = i["return"])) m.call(i);
475
+ } finally {
476
+ if (e) throw e.error;
477
+ }
310
478
  }
311
- }
312
- }
313
- warn(message2, data) {
314
- if (this.shouldLog("WARN" /* WARN */)) {
315
- const formattedMessage = this.formatMessage(message2);
316
- if (data) {
317
- console.warn(formattedMessage, data);
318
- } else {
319
- console.warn(formattedMessage);
479
+ return ar;
480
+ };
481
+ __spreadArray3 = function(to, from, pack) {
482
+ if (pack || arguments.length === 2) for (var i = 0, l = from.length, ar; i < l; i++) {
483
+ if (ar || !(i in from)) {
484
+ if (!ar) ar = Array.prototype.slice.call(from, 0, i);
485
+ ar[i] = from[i];
486
+ }
320
487
  }
321
- }
322
- }
323
- info(message2, data) {
324
- if (this.shouldLog("INFO" /* INFO */)) {
325
- const formattedMessage = this.formatMessage(message2);
326
- if (data) {
327
- console.info(formattedMessage, data);
328
- } else {
329
- console.info(formattedMessage);
488
+ return to.concat(ar || Array.prototype.slice.call(from));
489
+ };
490
+ NoopContextManager = /** @class */
491
+ (function() {
492
+ function NoopContextManager2() {
330
493
  }
331
- }
494
+ NoopContextManager2.prototype.active = function() {
495
+ return ROOT_CONTEXT;
496
+ };
497
+ NoopContextManager2.prototype.with = function(_context, fn, thisArg) {
498
+ var args = [];
499
+ for (var _i = 3; _i < arguments.length; _i++) {
500
+ args[_i - 3] = arguments[_i];
501
+ }
502
+ return fn.call.apply(fn, __spreadArray3([thisArg], __read3(args), false));
503
+ };
504
+ NoopContextManager2.prototype.bind = function(_context, target) {
505
+ return target;
506
+ };
507
+ NoopContextManager2.prototype.enable = function() {
508
+ return this;
509
+ };
510
+ NoopContextManager2.prototype.disable = function() {
511
+ return this;
512
+ };
513
+ return NoopContextManager2;
514
+ })();
332
515
  }
333
- debug(message2, data) {
334
- if (this.shouldLog("DEBUG" /* DEBUG */)) {
335
- const formattedMessage = this.formatMessage(message2);
336
- if (data) {
337
- console.log(formattedMessage, data);
338
- } else {
339
- console.log(formattedMessage);
516
+ });
517
+
518
+ // node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/api/context.js
519
+ var __read4, __spreadArray4, API_NAME2, NOOP_CONTEXT_MANAGER, ContextAPI;
520
+ var init_context2 = __esm({
521
+ "node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/api/context.js"() {
522
+ "use strict";
523
+ init_NoopContextManager();
524
+ init_global_utils();
525
+ init_diag();
526
+ __read4 = function(o, n) {
527
+ var m = typeof Symbol === "function" && o[Symbol.iterator];
528
+ if (!m) return o;
529
+ var i = m.call(o), r, ar = [], e;
530
+ try {
531
+ while ((n === void 0 || n-- > 0) && !(r = i.next()).done) ar.push(r.value);
532
+ } catch (error) {
533
+ e = { error };
534
+ } finally {
535
+ try {
536
+ if (r && !r.done && (m = i["return"])) m.call(i);
537
+ } finally {
538
+ if (e) throw e.error;
539
+ }
340
540
  }
341
- }
541
+ return ar;
542
+ };
543
+ __spreadArray4 = function(to, from, pack) {
544
+ if (pack || arguments.length === 2) for (var i = 0, l = from.length, ar; i < l; i++) {
545
+ if (ar || !(i in from)) {
546
+ if (!ar) ar = Array.prototype.slice.call(from, 0, i);
547
+ ar[i] = from[i];
548
+ }
549
+ }
550
+ return to.concat(ar || Array.prototype.slice.call(from));
551
+ };
552
+ API_NAME2 = "context";
553
+ NOOP_CONTEXT_MANAGER = new NoopContextManager();
554
+ ContextAPI = /** @class */
555
+ (function() {
556
+ function ContextAPI2() {
557
+ }
558
+ ContextAPI2.getInstance = function() {
559
+ if (!this._instance) {
560
+ this._instance = new ContextAPI2();
561
+ }
562
+ return this._instance;
563
+ };
564
+ ContextAPI2.prototype.setGlobalContextManager = function(contextManager) {
565
+ return registerGlobal(API_NAME2, contextManager, DiagAPI.instance());
566
+ };
567
+ ContextAPI2.prototype.active = function() {
568
+ return this._getContextManager().active();
569
+ };
570
+ ContextAPI2.prototype.with = function(context2, fn, thisArg) {
571
+ var _a;
572
+ var args = [];
573
+ for (var _i = 3; _i < arguments.length; _i++) {
574
+ args[_i - 3] = arguments[_i];
575
+ }
576
+ return (_a = this._getContextManager()).with.apply(_a, __spreadArray4([context2, fn, thisArg], __read4(args), false));
577
+ };
578
+ ContextAPI2.prototype.bind = function(context2, target) {
579
+ return this._getContextManager().bind(context2, target);
580
+ };
581
+ ContextAPI2.prototype._getContextManager = function() {
582
+ return getGlobal(API_NAME2) || NOOP_CONTEXT_MANAGER;
583
+ };
584
+ ContextAPI2.prototype.disable = function() {
585
+ this._getContextManager().disable();
586
+ unregisterGlobal(API_NAME2, DiagAPI.instance());
587
+ };
588
+ return ContextAPI2;
589
+ })();
342
590
  }
343
- };
591
+ });
344
592
 
345
- // src/config/get-project-config.ts
346
- var logger = new Logger("scenario.config");
347
- var configLoaded = false;
348
- var config = null;
349
- var configLoadPromise = null;
350
- async function loadProjectConfig() {
351
- if (configLoaded) {
352
- return;
593
+ // node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/trace/trace_flags.js
594
+ var TraceFlags;
595
+ var init_trace_flags = __esm({
596
+ "node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/trace/trace_flags.js"() {
597
+ "use strict";
598
+ (function(TraceFlags2) {
599
+ TraceFlags2[TraceFlags2["NONE"] = 0] = "NONE";
600
+ TraceFlags2[TraceFlags2["SAMPLED"] = 1] = "SAMPLED";
601
+ })(TraceFlags || (TraceFlags = {}));
353
602
  }
354
- if (configLoadPromise) {
355
- return configLoadPromise;
603
+ });
604
+
605
+ // node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/trace/invalid-span-constants.js
606
+ var INVALID_SPANID, INVALID_TRACEID, INVALID_SPAN_CONTEXT;
607
+ var init_invalid_span_constants = __esm({
608
+ "node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/trace/invalid-span-constants.js"() {
609
+ "use strict";
610
+ init_trace_flags();
611
+ INVALID_SPANID = "0000000000000000";
612
+ INVALID_TRACEID = "00000000000000000000000000000000";
613
+ INVALID_SPAN_CONTEXT = {
614
+ traceId: INVALID_TRACEID,
615
+ spanId: INVALID_SPANID,
616
+ traceFlags: TraceFlags.NONE
617
+ };
356
618
  }
357
- configLoadPromise = (async () => {
358
- try {
359
- config = await loadScenarioProjectConfig();
360
- logger.debug("loaded scenario project config", { config });
361
- } catch (error) {
362
- logger.error("error loading scenario project config", { error });
363
- } finally {
364
- configLoaded = true;
365
- }
366
- })();
367
- return configLoadPromise;
619
+ });
620
+
621
+ // node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/trace/NonRecordingSpan.js
622
+ var NonRecordingSpan;
623
+ var init_NonRecordingSpan = __esm({
624
+ "node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/trace/NonRecordingSpan.js"() {
625
+ "use strict";
626
+ init_invalid_span_constants();
627
+ NonRecordingSpan = /** @class */
628
+ (function() {
629
+ function NonRecordingSpan2(_spanContext) {
630
+ if (_spanContext === void 0) {
631
+ _spanContext = INVALID_SPAN_CONTEXT;
632
+ }
633
+ this._spanContext = _spanContext;
634
+ }
635
+ NonRecordingSpan2.prototype.spanContext = function() {
636
+ return this._spanContext;
637
+ };
638
+ NonRecordingSpan2.prototype.setAttribute = function(_key, _value) {
639
+ return this;
640
+ };
641
+ NonRecordingSpan2.prototype.setAttributes = function(_attributes) {
642
+ return this;
643
+ };
644
+ NonRecordingSpan2.prototype.addEvent = function(_name, _attributes) {
645
+ return this;
646
+ };
647
+ NonRecordingSpan2.prototype.addLink = function(_link) {
648
+ return this;
649
+ };
650
+ NonRecordingSpan2.prototype.addLinks = function(_links) {
651
+ return this;
652
+ };
653
+ NonRecordingSpan2.prototype.setStatus = function(_status) {
654
+ return this;
655
+ };
656
+ NonRecordingSpan2.prototype.updateName = function(_name) {
657
+ return this;
658
+ };
659
+ NonRecordingSpan2.prototype.end = function(_endTime) {
660
+ };
661
+ NonRecordingSpan2.prototype.isRecording = function() {
662
+ return false;
663
+ };
664
+ NonRecordingSpan2.prototype.recordException = function(_exception, _time) {
665
+ };
666
+ return NonRecordingSpan2;
667
+ })();
668
+ }
669
+ });
670
+
671
+ // node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/trace/context-utils.js
672
+ function getSpan(context2) {
673
+ return context2.getValue(SPAN_KEY) || void 0;
368
674
  }
369
- async function getProjectConfig() {
370
- await loadProjectConfig();
371
- return config;
675
+ function getActiveSpan() {
676
+ return getSpan(ContextAPI.getInstance().active());
677
+ }
678
+ function setSpan(context2, span) {
679
+ return context2.setValue(SPAN_KEY, span);
680
+ }
681
+ function deleteSpan(context2) {
682
+ return context2.deleteValue(SPAN_KEY);
683
+ }
684
+ function setSpanContext(context2, spanContext) {
685
+ return setSpan(context2, new NonRecordingSpan(spanContext));
686
+ }
687
+ function getSpanContext(context2) {
688
+ var _a;
689
+ return (_a = getSpan(context2)) === null || _a === void 0 ? void 0 : _a.spanContext();
372
690
  }
691
+ var SPAN_KEY;
692
+ var init_context_utils = __esm({
693
+ "node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/trace/context-utils.js"() {
694
+ "use strict";
695
+ init_context();
696
+ init_NonRecordingSpan();
697
+ init_context2();
698
+ SPAN_KEY = createContextKey("OpenTelemetry Context Key SPAN");
699
+ }
700
+ });
373
701
 
374
- // src/tracing/setup.ts
375
- var envConfig = getEnv();
376
- var observabilityHandle = (0, import_node.setupObservability)({
377
- langwatch: {
378
- apiKey: envConfig.LANGWATCH_API_KEY,
379
- endpoint: envConfig.LANGWATCH_ENDPOINT
380
- },
381
- spanProcessors: [judgeSpanCollector]
702
+ // node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/trace/spancontext-utils.js
703
+ function isValidTraceId(traceId) {
704
+ return VALID_TRACEID_REGEX.test(traceId) && traceId !== INVALID_TRACEID;
705
+ }
706
+ function isValidSpanId(spanId) {
707
+ return VALID_SPANID_REGEX.test(spanId) && spanId !== INVALID_SPANID;
708
+ }
709
+ function isSpanContextValid(spanContext) {
710
+ return isValidTraceId(spanContext.traceId) && isValidSpanId(spanContext.spanId);
711
+ }
712
+ function wrapSpanContext(spanContext) {
713
+ return new NonRecordingSpan(spanContext);
714
+ }
715
+ var VALID_TRACEID_REGEX, VALID_SPANID_REGEX;
716
+ var init_spancontext_utils = __esm({
717
+ "node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/trace/spancontext-utils.js"() {
718
+ "use strict";
719
+ init_invalid_span_constants();
720
+ init_NonRecordingSpan();
721
+ VALID_TRACEID_REGEX = /^([0-9a-f]{32})$/i;
722
+ VALID_SPANID_REGEX = /^[0-9a-f]{16}$/i;
723
+ }
724
+ });
725
+
726
+ // node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/trace/NoopTracer.js
727
+ function isSpanContext(spanContext) {
728
+ return typeof spanContext === "object" && typeof spanContext["spanId"] === "string" && typeof spanContext["traceId"] === "string" && typeof spanContext["traceFlags"] === "number";
729
+ }
730
+ var contextApi, NoopTracer;
731
+ var init_NoopTracer = __esm({
732
+ "node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/trace/NoopTracer.js"() {
733
+ "use strict";
734
+ init_context2();
735
+ init_context_utils();
736
+ init_NonRecordingSpan();
737
+ init_spancontext_utils();
738
+ contextApi = ContextAPI.getInstance();
739
+ NoopTracer = /** @class */
740
+ (function() {
741
+ function NoopTracer2() {
742
+ }
743
+ NoopTracer2.prototype.startSpan = function(name, options, context2) {
744
+ if (context2 === void 0) {
745
+ context2 = contextApi.active();
746
+ }
747
+ var root = Boolean(options === null || options === void 0 ? void 0 : options.root);
748
+ if (root) {
749
+ return new NonRecordingSpan();
750
+ }
751
+ var parentFromContext = context2 && getSpanContext(context2);
752
+ if (isSpanContext(parentFromContext) && isSpanContextValid(parentFromContext)) {
753
+ return new NonRecordingSpan(parentFromContext);
754
+ } else {
755
+ return new NonRecordingSpan();
756
+ }
757
+ };
758
+ NoopTracer2.prototype.startActiveSpan = function(name, arg2, arg3, arg4) {
759
+ var opts;
760
+ var ctx;
761
+ var fn;
762
+ if (arguments.length < 2) {
763
+ return;
764
+ } else if (arguments.length === 2) {
765
+ fn = arg2;
766
+ } else if (arguments.length === 3) {
767
+ opts = arg2;
768
+ fn = arg3;
769
+ } else {
770
+ opts = arg2;
771
+ ctx = arg3;
772
+ fn = arg4;
773
+ }
774
+ var parentContext = ctx !== null && ctx !== void 0 ? ctx : contextApi.active();
775
+ var span = this.startSpan(name, opts, parentContext);
776
+ var contextWithSpanSet = setSpan(parentContext, span);
777
+ return contextApi.with(contextWithSpanSet, fn, void 0, span);
778
+ };
779
+ return NoopTracer2;
780
+ })();
781
+ }
782
+ });
783
+
784
+ // node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/trace/ProxyTracer.js
785
+ var NOOP_TRACER, ProxyTracer;
786
+ var init_ProxyTracer = __esm({
787
+ "node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/trace/ProxyTracer.js"() {
788
+ "use strict";
789
+ init_NoopTracer();
790
+ NOOP_TRACER = new NoopTracer();
791
+ ProxyTracer = /** @class */
792
+ (function() {
793
+ function ProxyTracer2(_provider, name, version, options) {
794
+ this._provider = _provider;
795
+ this.name = name;
796
+ this.version = version;
797
+ this.options = options;
798
+ }
799
+ ProxyTracer2.prototype.startSpan = function(name, options, context2) {
800
+ return this._getTracer().startSpan(name, options, context2);
801
+ };
802
+ ProxyTracer2.prototype.startActiveSpan = function(_name, _options, _context, _fn) {
803
+ var tracer = this._getTracer();
804
+ return Reflect.apply(tracer.startActiveSpan, tracer, arguments);
805
+ };
806
+ ProxyTracer2.prototype._getTracer = function() {
807
+ if (this._delegate) {
808
+ return this._delegate;
809
+ }
810
+ var tracer = this._provider.getDelegateTracer(this.name, this.version, this.options);
811
+ if (!tracer) {
812
+ return NOOP_TRACER;
813
+ }
814
+ this._delegate = tracer;
815
+ return this._delegate;
816
+ };
817
+ return ProxyTracer2;
818
+ })();
819
+ }
820
+ });
821
+
822
+ // node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/trace/NoopTracerProvider.js
823
+ var NoopTracerProvider;
824
+ var init_NoopTracerProvider = __esm({
825
+ "node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/trace/NoopTracerProvider.js"() {
826
+ "use strict";
827
+ init_NoopTracer();
828
+ NoopTracerProvider = /** @class */
829
+ (function() {
830
+ function NoopTracerProvider2() {
831
+ }
832
+ NoopTracerProvider2.prototype.getTracer = function(_name, _version, _options) {
833
+ return new NoopTracer();
834
+ };
835
+ return NoopTracerProvider2;
836
+ })();
837
+ }
838
+ });
839
+
840
+ // node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/trace/ProxyTracerProvider.js
841
+ var NOOP_TRACER_PROVIDER, ProxyTracerProvider;
842
+ var init_ProxyTracerProvider = __esm({
843
+ "node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/trace/ProxyTracerProvider.js"() {
844
+ "use strict";
845
+ init_ProxyTracer();
846
+ init_NoopTracerProvider();
847
+ NOOP_TRACER_PROVIDER = new NoopTracerProvider();
848
+ ProxyTracerProvider = /** @class */
849
+ (function() {
850
+ function ProxyTracerProvider2() {
851
+ }
852
+ ProxyTracerProvider2.prototype.getTracer = function(name, version, options) {
853
+ var _a;
854
+ return (_a = this.getDelegateTracer(name, version, options)) !== null && _a !== void 0 ? _a : new ProxyTracer(this, name, version, options);
855
+ };
856
+ ProxyTracerProvider2.prototype.getDelegate = function() {
857
+ var _a;
858
+ return (_a = this._delegate) !== null && _a !== void 0 ? _a : NOOP_TRACER_PROVIDER;
859
+ };
860
+ ProxyTracerProvider2.prototype.setDelegate = function(delegate) {
861
+ this._delegate = delegate;
862
+ };
863
+ ProxyTracerProvider2.prototype.getDelegateTracer = function(name, version, options) {
864
+ var _a;
865
+ return (_a = this._delegate) === null || _a === void 0 ? void 0 : _a.getTracer(name, version, options);
866
+ };
867
+ return ProxyTracerProvider2;
868
+ })();
869
+ }
870
+ });
871
+
872
+ // node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/context-api.js
873
+ var context;
874
+ var init_context_api = __esm({
875
+ "node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/context-api.js"() {
876
+ "use strict";
877
+ init_context2();
878
+ context = ContextAPI.getInstance();
879
+ }
880
+ });
881
+
882
+ // node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/diag-api.js
883
+ var diag;
884
+ var init_diag_api = __esm({
885
+ "node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/diag-api.js"() {
886
+ "use strict";
887
+ init_diag();
888
+ diag = DiagAPI.instance();
889
+ }
890
+ });
891
+
892
+ // node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/api/trace.js
893
+ var API_NAME3, TraceAPI;
894
+ var init_trace = __esm({
895
+ "node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/api/trace.js"() {
896
+ "use strict";
897
+ init_global_utils();
898
+ init_ProxyTracerProvider();
899
+ init_spancontext_utils();
900
+ init_context_utils();
901
+ init_diag();
902
+ API_NAME3 = "trace";
903
+ TraceAPI = /** @class */
904
+ (function() {
905
+ function TraceAPI2() {
906
+ this._proxyTracerProvider = new ProxyTracerProvider();
907
+ this.wrapSpanContext = wrapSpanContext;
908
+ this.isSpanContextValid = isSpanContextValid;
909
+ this.deleteSpan = deleteSpan;
910
+ this.getSpan = getSpan;
911
+ this.getActiveSpan = getActiveSpan;
912
+ this.getSpanContext = getSpanContext;
913
+ this.setSpan = setSpan;
914
+ this.setSpanContext = setSpanContext;
915
+ }
916
+ TraceAPI2.getInstance = function() {
917
+ if (!this._instance) {
918
+ this._instance = new TraceAPI2();
919
+ }
920
+ return this._instance;
921
+ };
922
+ TraceAPI2.prototype.setGlobalTracerProvider = function(provider) {
923
+ var success = registerGlobal(API_NAME3, this._proxyTracerProvider, DiagAPI.instance());
924
+ if (success) {
925
+ this._proxyTracerProvider.setDelegate(provider);
926
+ }
927
+ return success;
928
+ };
929
+ TraceAPI2.prototype.getTracerProvider = function() {
930
+ return getGlobal(API_NAME3) || this._proxyTracerProvider;
931
+ };
932
+ TraceAPI2.prototype.getTracer = function(name, version) {
933
+ return this.getTracerProvider().getTracer(name, version);
934
+ };
935
+ TraceAPI2.prototype.disable = function() {
936
+ unregisterGlobal(API_NAME3, DiagAPI.instance());
937
+ this._proxyTracerProvider = new ProxyTracerProvider();
938
+ };
939
+ return TraceAPI2;
940
+ })();
941
+ }
942
+ });
943
+
944
+ // node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/trace-api.js
945
+ var trace;
946
+ var init_trace_api = __esm({
947
+ "node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/trace-api.js"() {
948
+ "use strict";
949
+ init_trace();
950
+ trace = TraceAPI.getInstance();
951
+ }
952
+ });
953
+
954
+ // node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/index.js
955
+ var init_esm = __esm({
956
+ "node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/index.js"() {
957
+ "use strict";
958
+ init_context();
959
+ init_trace_flags();
960
+ init_context_api();
961
+ init_diag_api();
962
+ init_trace_api();
963
+ }
964
+ });
965
+
966
+ // src/index.ts
967
+ var index_exports = {};
968
+ __export(index_exports, {
969
+ AgentAdapter: () => AgentAdapter,
970
+ AgentRole: () => AgentRole,
971
+ DEFAULT_MAX_TURNS: () => DEFAULT_MAX_TURNS,
972
+ DEFAULT_VERBOSE: () => DEFAULT_VERBOSE,
973
+ JudgeAgentAdapter: () => JudgeAgentAdapter,
974
+ JudgeSpanCollector: () => JudgeSpanCollector,
975
+ JudgeSpanDigestFormatter: () => JudgeSpanDigestFormatter,
976
+ RealtimeAgentAdapter: () => RealtimeAgentAdapter,
977
+ ScenarioExecution: () => ScenarioExecution,
978
+ ScenarioExecutionState: () => ScenarioExecutionState,
979
+ StateChangeEventType: () => StateChangeEventType,
980
+ UserSimulatorAgentAdapter: () => UserSimulatorAgentAdapter,
981
+ agent: () => agent,
982
+ allAgentRoles: () => allAgentRoles,
983
+ default: () => index_default,
984
+ defineConfig: () => defineConfig,
985
+ fail: () => fail,
986
+ judge: () => judge,
987
+ judgeAgent: () => judgeAgent,
988
+ judgeSpanCollector: () => judgeSpanCollector,
989
+ judgeSpanDigestFormatter: () => judgeSpanDigestFormatter,
990
+ message: () => message,
991
+ proceed: () => proceed,
992
+ run: () => run,
993
+ scenario: () => scenario,
994
+ scenarioOnly: () => scenarioOnly,
995
+ scenarioProjectConfigSchema: () => scenarioProjectConfigSchema,
996
+ setupScenarioTracing: () => setupScenarioTracing,
997
+ succeed: () => succeed,
998
+ user: () => user,
999
+ userSimulatorAgent: () => userSimulatorAgent,
1000
+ withCustomScopes: () => withCustomScopes
382
1001
  });
1002
+ module.exports = __toCommonJS(index_exports);
383
1003
 
384
1004
  // src/agents/index.ts
385
1005
  var agents_exports = {};
@@ -466,592 +1086,943 @@ var JudgeUtils = {
466
1086
  }
467
1087
  };
468
1088
 
469
- // src/agents/llm-invoker.factory.ts
470
- var import_ai = require("ai");
471
- var createLLMInvoker = (logger2) => {
472
- return async (params) => {
473
- try {
474
- return await (0, import_ai.generateText)({
475
- ...params,
476
- experimental_telemetry: { isEnabled: true }
477
- });
478
- } catch (error) {
479
- logger2.error("Error generating text", { error });
480
- throw error;
481
- }
482
- };
483
- };
484
-
485
- // src/agents/utils.ts
486
- var toolMessageRole = "tool";
487
- var assistantMessageRole = "assistant";
488
- var userMessageRole = "user";
489
- var groupMessagesByToolBoundaries = (messages) => {
490
- const segments = [];
491
- let currentSegment = [];
492
- for (const message2 of messages) {
493
- currentSegment.push(message2);
494
- if (message2.role === toolMessageRole) {
495
- segments.push(currentSegment);
496
- currentSegment = [];
497
- }
498
- }
499
- if (currentSegment.length > 0) {
500
- segments.push(currentSegment);
501
- }
502
- return segments;
503
- };
504
- var segmentHasToolMessages = (segment) => {
505
- return segment.some((message2) => {
506
- if (message2.role === toolMessageRole) return true;
507
- if (message2.role === assistantMessageRole && Array.isArray(message2.content)) {
508
- return message2.content.some((part) => part.type === "tool-call");
509
- }
510
- return false;
511
- });
512
- };
513
- var reverseSegmentRoles = (segment) => {
514
- return segment.map((message2) => {
515
- const hasStringContent = typeof message2.content === "string";
516
- if (!hasStringContent) return message2;
517
- const roleMap = {
518
- [userMessageRole]: assistantMessageRole,
519
- [assistantMessageRole]: userMessageRole
520
- };
521
- const newRole = roleMap[message2.role];
522
- if (!newRole) return message2;
523
- return {
524
- role: newRole,
525
- content: message2.content
526
- };
527
- });
528
- };
529
- var messageRoleReversal = (messages) => {
530
- const segments = groupMessagesByToolBoundaries(messages);
531
- const processedSegments = segments.map(
532
- (segment) => segmentHasToolMessages(segment) ? segment : reverseSegmentRoles(segment)
533
- );
534
- return processedSegments.flat();
535
- };
536
- var criterionToParamName = (criterion) => {
537
- return criterion.replace(/"/g, "").replace(/[^a-zA-Z0-9]/g, "_").replace(/ /g, "_").toLowerCase().substring(0, 70);
538
- };
539
-
540
- // src/agents/judge/judge-span-digest-formatter.ts
541
- var import_observability2 = require("langwatch/observability");
1089
+ // src/config/env.ts
1090
+ var import_v4 = require("zod/v4");
542
1091
 
543
- // src/agents/judge/deep-transform.ts
544
- function deepTransform(value, fn) {
545
- const result = fn(value);
546
- if (result !== value) return result;
547
- if (Array.isArray(value)) {
548
- return value.map((v) => deepTransform(v, fn));
549
- }
550
- if (value !== null && typeof value === "object") {
551
- const out = {};
552
- for (const [k, v] of Object.entries(value)) {
553
- out[k] = deepTransform(v, fn);
554
- }
555
- return out;
556
- }
557
- return value;
558
- }
1092
+ // src/config/log-levels.ts
1093
+ var LogLevel = /* @__PURE__ */ ((LogLevel2) => {
1094
+ LogLevel2["ERROR"] = "ERROR";
1095
+ LogLevel2["WARN"] = "WARN";
1096
+ LogLevel2["INFO"] = "INFO";
1097
+ LogLevel2["DEBUG"] = "DEBUG";
1098
+ return LogLevel2;
1099
+ })(LogLevel || {});
1100
+ var LOG_LEVELS = Object.values(LogLevel);
559
1101
 
560
- // src/agents/judge/string-deduplicator.ts
561
- var StringDeduplicator = class {
562
- seen = /* @__PURE__ */ new Map();
563
- threshold;
564
- constructor(params) {
565
- this.threshold = params.threshold;
566
- }
1102
+ // src/config/env.ts
1103
+ var envSchema = import_v4.z.object({
567
1104
  /**
568
- * Resets seen strings for a new digest.
1105
+ * LangWatch API key for event reporting.
1106
+ * If not provided, events will not be sent to LangWatch.
569
1107
  */
570
- reset() {
571
- this.seen.clear();
572
- }
1108
+ LANGWATCH_API_KEY: import_v4.z.string().optional(),
573
1109
  /**
574
- * Processes a string, returning duplicate marker if seen before.
575
- * @param str - String to process
576
- * @returns Original string or duplicate marker
1110
+ * LangWatch endpoint URL for event reporting.
1111
+ * Defaults to the production LangWatch endpoint.
577
1112
  */
578
- process(str) {
579
- if (str.length < this.threshold) return str;
580
- const key = this.normalize(str);
581
- if (this.seen.has(key)) return "[DUPLICATE - SEE ABOVE]";
582
- this.seen.set(key, true);
583
- return str;
584
- }
1113
+ LANGWATCH_ENDPOINT: import_v4.z.string().url().optional().default("https://app.langwatch.ai"),
585
1114
  /**
586
- * Normalizes string for comparison (whitespace, case).
1115
+ * Disables simulation report info messages when set to any truthy value.
1116
+ * Useful for CI/CD environments or when you want cleaner output.
587
1117
  */
588
- normalize(str) {
589
- return str.replace(/\\[nrt]/g, " ").replace(/[\n\r\t]/g, " ").replace(/\s+/g, " ").trim().toLowerCase();
590
- }
591
- };
1118
+ SCENARIO_DISABLE_SIMULATION_REPORT_INFO: import_v4.z.string().optional().transform((val) => Boolean(val)),
1119
+ /**
1120
+ * Node environment - affects logging and behavior.
1121
+ * Defaults to 'development' if not specified.
1122
+ */
1123
+ NODE_ENV: import_v4.z.enum(["development", "production", "test"]).default("development"),
1124
+ /**
1125
+ * Case-insensitive log level for the scenario package.
1126
+ * Defaults to 'info' if not specified.
1127
+ */
1128
+ LOG_LEVEL: import_v4.z.string().toUpperCase().pipe(import_v4.z.nativeEnum(LogLevel)).optional().default("INFO" /* INFO */),
1129
+ /**
1130
+ * Scenario batch run ID.
1131
+ * If not provided, a random ID will be generated.
1132
+ */
1133
+ SCENARIO_BATCH_RUN_ID: import_v4.z.string().optional()
1134
+ });
1135
+ function getEnv() {
1136
+ return envSchema.parse(process.env);
1137
+ }
592
1138
 
593
- // src/agents/judge/truncate-media.ts
594
- function truncateMediaUrl(str) {
595
- const match = str.match(
596
- /^data:((image|audio|video)\/[a-z0-9+.-]+);base64,(.+)$/i
597
- );
598
- if (!match) return str;
599
- const [, mimeType, category, data] = match;
600
- return `[${category.toUpperCase()}: ${mimeType}, ~${data.length} bytes]`;
1139
+ // src/config/load.ts
1140
+ var import_promises = __toESM(require("fs/promises"));
1141
+ var import_node_path = __toESM(require("path"));
1142
+ var import_node_url = require("url");
1143
+
1144
+ // src/domain/index.ts
1145
+ var domain_exports = {};
1146
+ __export(domain_exports, {
1147
+ AgentAdapter: () => AgentAdapter,
1148
+ AgentRole: () => AgentRole,
1149
+ DEFAULT_MAX_TURNS: () => DEFAULT_MAX_TURNS,
1150
+ DEFAULT_VERBOSE: () => DEFAULT_VERBOSE,
1151
+ JudgeAgentAdapter: () => JudgeAgentAdapter,
1152
+ UserSimulatorAgentAdapter: () => UserSimulatorAgentAdapter,
1153
+ allAgentRoles: () => allAgentRoles,
1154
+ defineConfig: () => defineConfig,
1155
+ scenarioProjectConfigSchema: () => scenarioProjectConfigSchema
1156
+ });
1157
+
1158
+ // src/domain/core/config.ts
1159
+ var import_v43 = require("zod/v4");
1160
+
1161
+ // src/domain/core/schemas/model.schema.ts
1162
+ var import_v42 = require("zod/v4");
1163
+
1164
+ // src/domain/core/constants.ts
1165
+ var DEFAULT_TEMPERATURE = 0;
1166
+
1167
+ // src/domain/core/schemas/model.schema.ts
1168
+ var modelSchema = import_v42.z.object({
1169
+ model: import_v42.z.custom((val) => Boolean(val), {
1170
+ message: "A model is required. Configure it in scenario.config.js defaultModel or pass directly to the agent."
1171
+ }).describe("Language model that is used by the AI SDK Core functions."),
1172
+ temperature: import_v42.z.number().min(0).max(1).optional().describe("The temperature for the language model.").default(DEFAULT_TEMPERATURE),
1173
+ maxTokens: import_v42.z.number().optional().describe("The maximum number of tokens to generate.")
1174
+ });
1175
+
1176
+ // src/domain/core/config.ts
1177
+ var headless = typeof process !== "undefined" ? process.env.SCENARIO_HEADLESS === "true" : false;
1178
+ var scenarioProjectConfigSchema = import_v43.z.object({
1179
+ defaultModel: modelSchema.optional(),
1180
+ headless: import_v43.z.boolean().optional().default(headless),
1181
+ observability: import_v43.z.custom((val) => {
1182
+ return val === void 0 || typeof val === "object" && val !== null && !Array.isArray(val);
1183
+ }).optional()
1184
+ }).strict();
1185
+ function defineConfig(config2) {
1186
+ return config2;
601
1187
  }
602
- function truncateMediaPart(v) {
603
- var _a;
604
- if (v === null || typeof v !== "object" || Array.isArray(v)) return null;
605
- const obj = v;
606
- if (obj.type === "file" && typeof obj.mediaType === "string" && typeof obj.data === "string") {
607
- const category = ((_a = obj.mediaType.split("/")[0]) == null ? void 0 : _a.toUpperCase()) ?? "FILE";
608
- return {
609
- ...obj,
610
- data: `[${category}: ${obj.mediaType}, ~${obj.data.length} bytes]`
611
- };
612
- }
613
- if (obj.type === "image" && typeof obj.image === "string") {
614
- const imageData = obj.image;
615
- const dataUrlMatch = imageData.match(
616
- /^data:((image)\/[a-z0-9+.-]+);base64,(.+)$/i
617
- );
618
- if (dataUrlMatch) {
619
- return {
620
- ...obj,
621
- image: `[IMAGE: ${dataUrlMatch[1]}, ~${dataUrlMatch[3].length} bytes]`
622
- };
623
- }
624
- if (imageData.length > 1e3 && /^[A-Za-z0-9+/=]+$/.test(imageData)) {
625
- return {
626
- ...obj,
627
- image: `[IMAGE: unknown, ~${imageData.length} bytes]`
628
- };
1188
+
1189
+ // src/domain/agents/index.ts
1190
+ var AgentRole = /* @__PURE__ */ ((AgentRole2) => {
1191
+ AgentRole2["USER"] = "User";
1192
+ AgentRole2["AGENT"] = "Agent";
1193
+ AgentRole2["JUDGE"] = "Judge";
1194
+ return AgentRole2;
1195
+ })(AgentRole || {});
1196
+ var allAgentRoles = [
1197
+ "User" /* USER */,
1198
+ "Agent" /* AGENT */,
1199
+ "Judge" /* JUDGE */
1200
+ ];
1201
+ var AgentAdapter = class {
1202
+ name;
1203
+ role = "Agent" /* AGENT */;
1204
+ };
1205
+ var UserSimulatorAgentAdapter = class extends AgentAdapter {
1206
+ name = "UserSimulatorAgent";
1207
+ role = "User" /* USER */;
1208
+ };
1209
+ var JudgeAgentAdapter = class extends AgentAdapter {
1210
+ name = "JudgeAgent";
1211
+ role = "Judge" /* JUDGE */;
1212
+ };
1213
+
1214
+ // src/domain/scenarios/index.ts
1215
+ var DEFAULT_MAX_TURNS = 10;
1216
+ var DEFAULT_VERBOSE = false;
1217
+
1218
+ // src/config/load.ts
1219
+ async function loadScenarioProjectConfig() {
1220
+ const cwd = process.cwd();
1221
+ const configNames = [
1222
+ "scenario.config.js",
1223
+ "scenario.config.mjs"
1224
+ ];
1225
+ for (const name of configNames) {
1226
+ const fullPath = import_node_path.default.join(cwd, name);
1227
+ try {
1228
+ await import_promises.default.access(fullPath);
1229
+ const configModule = await import((0, import_node_url.pathToFileURL)(fullPath).href);
1230
+ const config2 = configModule.default || configModule;
1231
+ const parsed = scenarioProjectConfigSchema.safeParse(config2);
1232
+ if (!parsed.success) {
1233
+ throw new Error(
1234
+ `Invalid config file ${name}: ${JSON.stringify(parsed.error.format(), null, 2)}`
1235
+ );
1236
+ }
1237
+ return parsed.data;
1238
+ } catch (error) {
1239
+ if (error instanceof Error && "code" in error && error.code === "ENOENT") {
1240
+ continue;
1241
+ }
1242
+ throw error;
629
1243
  }
630
1244
  }
631
- return null;
1245
+ return await scenarioProjectConfigSchema.parseAsync({});
632
1246
  }
633
1247
 
634
- // src/agents/judge/judge-span-digest-formatter.ts
635
- var JudgeSpanDigestFormatter = class {
636
- logger = new Logger("JudgeSpanDigestFormatter");
637
- deduplicator = new StringDeduplicator({ threshold: 50 });
1248
+ // src/utils/logger.ts
1249
+ var Logger = class _Logger {
1250
+ constructor(context2) {
1251
+ this.context = context2;
1252
+ }
638
1253
  /**
639
- * Formats spans into a complete digest with full content and nesting.
640
- * @param spans - All spans for a thread
641
- * @returns Plain text digest
1254
+ * Creates a logger with context (e.g., class name)
642
1255
  */
643
- format(spans) {
644
- this.deduplicator.reset();
645
- this.logger.debug("format() called", {
646
- spanCount: spans.length,
647
- spanNames: spans.map((s) => s.name)
648
- });
649
- if (spans.length === 0) {
650
- this.logger.debug("No spans to format");
651
- return "No spans recorded.";
652
- }
653
- const sortedSpans = this.sortByStartTime(spans);
654
- const tree = this.buildHierarchy(sortedSpans);
655
- const totalDuration = this.calculateTotalDuration(sortedSpans);
656
- this.logger.debug("Hierarchy built", {
657
- rootCount: tree.length,
658
- totalDuration
659
- });
660
- const lines = [
661
- `Spans: ${spans.length} | Total Duration: ${this.formatDuration(
662
- totalDuration
663
- )}`,
664
- ""
665
- ];
666
- let sequence = 1;
667
- const rootCount = tree.length;
668
- tree.forEach((node, idx) => {
669
- sequence = this.renderNode(
670
- node,
671
- lines,
672
- 0,
673
- sequence,
674
- idx === rootCount - 1
675
- );
676
- });
677
- const errors = this.collectErrors(spans);
678
- if (errors.length > 0) {
679
- lines.push("");
680
- lines.push("=== ERRORS ===");
681
- errors.forEach((e) => lines.push(e));
682
- }
683
- return lines.join("\n");
1256
+ static create(context2) {
1257
+ return new _Logger(context2);
684
1258
  }
685
- sortByStartTime(spans) {
686
- return [...spans].sort((a, b) => {
687
- const aTime = this.hrTimeToMs(a.startTime);
688
- const bTime = this.hrTimeToMs(b.startTime);
689
- return aTime - bTime;
690
- });
1259
+ /**
1260
+ * Returns the current log level from environment.
1261
+ * Uses a getter for clarity and idiomatic usage.
1262
+ */
1263
+ get LOG_LEVEL() {
1264
+ return getEnv().LOG_LEVEL;
691
1265
  }
692
- buildHierarchy(spans) {
693
- var _a;
694
- const spanMap = /* @__PURE__ */ new Map();
695
- const roots = [];
696
- for (const span of spans) {
697
- spanMap.set(span.spanContext().spanId, { span, children: [] });
698
- }
699
- for (const span of spans) {
700
- const node = spanMap.get(span.spanContext().spanId);
701
- const parentId = (_a = span.parentSpanContext) == null ? void 0 : _a.spanId;
702
- if (parentId && spanMap.has(parentId)) {
703
- spanMap.get(parentId).children.push(node);
1266
+ /**
1267
+ * Returns the index of the given log level in the LOG_LEVELS array.
1268
+ * @param level - The log level to get the index for.
1269
+ * @returns The index of the log level in the LOG_LEVELS array.
1270
+ */
1271
+ getLogLevelIndexFor(level) {
1272
+ return LOG_LEVELS.indexOf(level);
1273
+ }
1274
+ /**
1275
+ * Checks if logging should occur based on LOG_LEVEL env var
1276
+ */
1277
+ shouldLog(level) {
1278
+ const currentLevelIndex = this.getLogLevelIndexFor(this.LOG_LEVEL);
1279
+ const requestedLevelIndex = this.getLogLevelIndexFor(level);
1280
+ return currentLevelIndex >= 0 && requestedLevelIndex <= currentLevelIndex;
1281
+ }
1282
+ formatMessage(message2) {
1283
+ return this.context ? `[${this.context}] ${message2}` : message2;
1284
+ }
1285
+ error(message2, data) {
1286
+ if (this.shouldLog("ERROR" /* ERROR */)) {
1287
+ const formattedMessage = this.formatMessage(message2);
1288
+ if (data) {
1289
+ console.error(formattedMessage, data);
704
1290
  } else {
705
- roots.push(node);
1291
+ console.error(formattedMessage);
706
1292
  }
707
1293
  }
708
- return roots;
709
1294
  }
710
- renderNode(node, lines, depth, sequence, isLast = true) {
711
- const span = node.span;
712
- const duration = this.calculateSpanDuration(span);
713
- const timestamp = this.formatTimestamp(span.startTime);
714
- const status = this.getStatusIndicator(span);
715
- const prefix = this.getTreePrefix(depth, isLast);
716
- lines.push(
717
- `${prefix}[${sequence}] ${new Date(timestamp).toISOString()} ${span.name} (${this.formatDuration(duration)})${status}`
718
- );
719
- const attrIndent = this.getAttrIndent(depth, isLast);
720
- const attrs = this.cleanAttributes(span.attributes);
721
- if (Object.keys(attrs).length > 0) {
722
- for (const [key, value] of Object.entries(attrs)) {
723
- lines.push(`${attrIndent}${key}: ${this.formatValue(value)}`);
1295
+ warn(message2, data) {
1296
+ if (this.shouldLog("WARN" /* WARN */)) {
1297
+ const formattedMessage = this.formatMessage(message2);
1298
+ if (data) {
1299
+ console.warn(formattedMessage, data);
1300
+ } else {
1301
+ console.warn(formattedMessage);
724
1302
  }
725
1303
  }
726
- if (span.events.length > 0) {
727
- for (const event of span.events) {
728
- lines.push(`${attrIndent}[event] ${event.name}`);
729
- if (event.attributes) {
730
- const eventAttrs = this.cleanAttributes(event.attributes);
731
- for (const [key, value] of Object.entries(eventAttrs)) {
732
- lines.push(`${attrIndent} ${key}: ${this.formatValue(value)}`);
733
- }
734
- }
1304
+ }
1305
+ info(message2, data) {
1306
+ if (this.shouldLog("INFO" /* INFO */)) {
1307
+ const formattedMessage = this.formatMessage(message2);
1308
+ if (data) {
1309
+ console.info(formattedMessage, data);
1310
+ } else {
1311
+ console.info(formattedMessage);
735
1312
  }
736
1313
  }
737
- lines.push("");
738
- let nextSeq = sequence + 1;
739
- const childCount = node.children.length;
740
- node.children.forEach((child, idx) => {
741
- nextSeq = this.renderNode(
742
- child,
743
- lines,
744
- depth + 1,
745
- nextSeq,
746
- idx === childCount - 1
747
- );
748
- });
749
- return nextSeq;
750
- }
751
- getTreePrefix(depth, isLast) {
752
- if (depth === 0) return "";
753
- const connector = isLast ? "\u2514\u2500\u2500 " : "\u251C\u2500\u2500 ";
754
- return "\u2502 ".repeat(depth - 1) + connector;
755
- }
756
- getAttrIndent(depth, isLast) {
757
- if (depth === 0) return " ";
758
- const continuation = isLast ? " " : "\u2502 ";
759
- return "\u2502 ".repeat(depth - 1) + continuation + " ";
760
1314
  }
761
- cleanAttributes(attrs) {
762
- const cleaned = {};
763
- const seen = /* @__PURE__ */ new Set();
764
- const excludedKeys = [
765
- import_observability2.attributes.ATTR_LANGWATCH_THREAD_ID,
766
- "langwatch.scenario.id",
767
- "langwatch.scenario.name"
768
- ];
769
- for (const [key, value] of Object.entries(attrs)) {
770
- if (excludedKeys.includes(key)) {
771
- continue;
772
- }
773
- const cleanKey = key.replace(/^(langwatch)\./, "");
774
- if (!seen.has(cleanKey)) {
775
- seen.add(cleanKey);
776
- cleaned[cleanKey] = value;
1315
+ debug(message2, data) {
1316
+ if (this.shouldLog("DEBUG" /* DEBUG */)) {
1317
+ const formattedMessage = this.formatMessage(message2);
1318
+ if (data) {
1319
+ console.log(formattedMessage, data);
1320
+ } else {
1321
+ console.log(formattedMessage);
777
1322
  }
778
1323
  }
779
- return cleaned;
780
1324
  }
781
- formatValue(value) {
782
- const processed = this.transformValue(value);
783
- return typeof processed === "string" ? processed : JSON.stringify(processed);
1325
+ };
1326
+
1327
+ // src/config/get-project-config.ts
1328
+ var logger = new Logger("scenario.config");
1329
+ var configLoaded = false;
1330
+ var config = null;
1331
+ var configLoadPromise = null;
1332
+ async function loadProjectConfig() {
1333
+ if (configLoaded) {
1334
+ return;
784
1335
  }
785
- transformValue(value) {
786
- return deepTransform(value, (v) => {
787
- const mediaPart = truncateMediaPart(v);
788
- if (mediaPart) return mediaPart;
789
- if (typeof v !== "string") return v;
790
- return this.transformString(v);
791
- });
1336
+ if (configLoadPromise) {
1337
+ return configLoadPromise;
792
1338
  }
793
- transformString(str) {
794
- if (this.looksLikeJson(str)) {
795
- try {
796
- const processed = this.transformValue(JSON.parse(str));
797
- return JSON.stringify(processed);
798
- } catch {
799
- }
1339
+ configLoadPromise = (async () => {
1340
+ try {
1341
+ config = await loadScenarioProjectConfig();
1342
+ logger.debug("loaded scenario project config", { config });
1343
+ } catch (error) {
1344
+ logger.error("error loading scenario project config", { error });
1345
+ } finally {
1346
+ configLoaded = true;
800
1347
  }
801
- const truncated = truncateMediaUrl(str);
802
- if (truncated !== str) return truncated;
803
- return this.deduplicator.process(str);
1348
+ })();
1349
+ return configLoadPromise;
1350
+ }
1351
+ async function getProjectConfig() {
1352
+ await loadProjectConfig();
1353
+ return config;
1354
+ }
1355
+
1356
+ // src/agents/llm-invoker.factory.ts
1357
+ var import_ai = require("ai");
1358
+ var createLLMInvoker = (logger2) => {
1359
+ return async (params) => {
1360
+ try {
1361
+ return await (0, import_ai.generateText)({
1362
+ ...params,
1363
+ experimental_telemetry: { isEnabled: true }
1364
+ });
1365
+ } catch (error) {
1366
+ logger2.error("Error generating text", { error });
1367
+ throw error;
1368
+ }
1369
+ };
1370
+ };
1371
+
1372
+ // src/agents/utils.ts
1373
+ var toolMessageRole = "tool";
1374
+ var assistantMessageRole = "assistant";
1375
+ var userMessageRole = "user";
1376
+ var hasToolContent = (message2) => {
1377
+ if (message2.role === toolMessageRole) return true;
1378
+ if (!Array.isArray(message2.content)) return false;
1379
+ return message2.content.some((part) => {
1380
+ if (!part || typeof part !== "object") return false;
1381
+ const partType = "type" in part ? part.type : void 0;
1382
+ return partType === "tool-call" || partType === "tool-result";
1383
+ });
1384
+ };
1385
+ var stringifyValue = (value) => {
1386
+ if (typeof value === "string") return value;
1387
+ if (value === void 0) return "undefined";
1388
+ try {
1389
+ const serialized = JSON.stringify(value);
1390
+ return serialized === void 0 ? String(value) : serialized;
1391
+ } catch {
1392
+ return String(value);
804
1393
  }
805
- looksLikeJson(str) {
806
- const t = str.trim();
807
- return t.startsWith("{") && t.endsWith("}") || t.startsWith("[") && t.endsWith("]");
1394
+ };
1395
+ var summarizeToolMessage = (message2) => {
1396
+ if (message2.role === toolMessageRole && !Array.isArray(message2.content)) {
1397
+ return `[Tool message: ${stringifyValue(message2.content)}]`;
1398
+ }
1399
+ if (message2.role === toolMessageRole) {
1400
+ const toolResults = message2.content.filter((part) => part.type === "tool-result").map((part) => {
1401
+ const contentPart = part;
1402
+ const name = contentPart.toolName ?? "unknown tool";
1403
+ const output = contentPart.output;
1404
+ const value = output && typeof output === "object" && "value" in output && typeof output.value === "string" ? output.value : output ?? contentPart.result;
1405
+ return `[Tool result from ${name}: ${stringifyValue(value)}]`;
1406
+ });
1407
+ return toolResults.length > 0 ? toolResults.join("\n") : null;
808
1408
  }
809
- hrTimeToMs(hrTime) {
810
- return hrTime[0] * 1e3 + hrTime[1] / 1e6;
1409
+ if (!Array.isArray(message2.content)) return null;
1410
+ const toolCalls = message2.content.filter((part) => part.type === "tool-call").map((part) => {
1411
+ const contentPart = part;
1412
+ const name = contentPart.toolName ?? "unknown tool";
1413
+ return `[Called tool ${name} with: ${stringifyValue(contentPart.input)}]`;
1414
+ });
1415
+ return toolCalls.length > 0 ? toolCalls.join("\n") : null;
1416
+ };
1417
+ var messageRoleReversal = (messages) => {
1418
+ const roleMap = {
1419
+ [userMessageRole]: assistantMessageRole,
1420
+ [assistantMessageRole]: userMessageRole
1421
+ };
1422
+ return messages.map((message2) => {
1423
+ if (hasToolContent(message2)) {
1424
+ const summary = summarizeToolMessage(message2);
1425
+ if (!summary) return null;
1426
+ return {
1427
+ role: userMessageRole,
1428
+ content: summary
1429
+ };
1430
+ }
1431
+ const newRole = roleMap[message2.role];
1432
+ if (!newRole) return message2;
1433
+ return {
1434
+ ...message2,
1435
+ role: newRole
1436
+ };
1437
+ }).filter((message2) => message2 !== null);
1438
+ };
1439
+ var criterionToParamName = (criterion) => {
1440
+ return criterion.replace(/"/g, "").replace(/[^a-zA-Z0-9]/g, "_").replace(/ /g, "_").toLowerCase().substring(0, 70);
1441
+ };
1442
+
1443
+ // src/agents/judge/judge-span-collector.ts
1444
+ var import_observability = require("langwatch/observability");
1445
+ var JudgeSpanCollector = class {
1446
+ spans = [];
1447
+ onStart() {
811
1448
  }
812
- calculateSpanDuration(span) {
813
- return this.hrTimeToMs(span.endTime) - this.hrTimeToMs(span.startTime);
1449
+ onEnd(span) {
1450
+ this.spans.push(span);
814
1451
  }
815
- calculateTotalDuration(spans) {
816
- if (spans.length === 0) return 0;
817
- const first = this.hrTimeToMs(spans[0].startTime);
818
- const last = Math.max(...spans.map((s) => this.hrTimeToMs(s.endTime)));
819
- return last - first;
1452
+ forceFlush() {
1453
+ return Promise.resolve();
820
1454
  }
821
- formatDuration(ms) {
822
- if (ms < 1e3) return `${Math.round(ms)}ms`;
823
- return `${(ms / 1e3).toFixed(2)}s`;
1455
+ shutdown() {
1456
+ this.spans = [];
1457
+ return Promise.resolve();
824
1458
  }
825
- formatTimestamp(hrTime) {
826
- const ms = this.hrTimeToMs(hrTime);
827
- return new Date(ms).toISOString();
1459
+ /**
1460
+ * Removes all spans associated with a specific thread.
1461
+ * Call this after a scenario run completes to prevent memory growth
1462
+ * in long-lived processes.
1463
+ * @param threadId - The thread identifier whose spans should be cleared
1464
+ */
1465
+ clearSpansForThread(threadId) {
1466
+ const threadSpanIds = new Set(
1467
+ this.getSpansForThread(threadId).map((s) => s.spanContext().spanId)
1468
+ );
1469
+ this.spans = this.spans.filter(
1470
+ (s) => !threadSpanIds.has(s.spanContext().spanId)
1471
+ );
828
1472
  }
829
- getStatusIndicator(span) {
830
- if (span.status.code === 2) {
831
- return ` \u26A0\uFE0F ERROR: ${span.status.message ?? "unknown"}`;
1473
+ /**
1474
+ * Retrieves all spans associated with a specific thread.
1475
+ * @param threadId - The thread identifier to filter spans by
1476
+ * @returns Array of spans for the given thread
1477
+ */
1478
+ getSpansForThread(threadId) {
1479
+ const spanMap = /* @__PURE__ */ new Map();
1480
+ for (const span of this.spans) {
1481
+ spanMap.set(span.spanContext().spanId, span);
832
1482
  }
833
- return "";
834
- }
835
- collectErrors(spans) {
836
- return spans.filter((s) => s.status.code === 2).map((s) => `- ${s.name}: ${s.status.message ?? "unknown error"}`);
837
- }
1483
+ const belongsToThread = (span, visited = /* @__PURE__ */ new Set()) => {
1484
+ const spanId = span.spanContext().spanId;
1485
+ if (visited.has(spanId)) return false;
1486
+ visited.add(spanId);
1487
+ if (span.attributes[import_observability.attributes.ATTR_LANGWATCH_THREAD_ID] === threadId) {
1488
+ return true;
1489
+ }
1490
+ const parentId = getParentSpanId(span);
1491
+ if (parentId && spanMap.has(parentId)) {
1492
+ return belongsToThread(spanMap.get(parentId), visited);
1493
+ }
1494
+ return false;
1495
+ };
1496
+ return this.spans.filter((span) => belongsToThread(span));
1497
+ }
838
1498
  };
839
- var judgeSpanDigestFormatter = new JudgeSpanDigestFormatter();
840
-
841
- // src/agents/judge/judge-agent.ts
842
- function buildSystemPrompt(criteria, description) {
843
- const criteriaList = (criteria == null ? void 0 : criteria.map((criterion, idx) => `${idx + 1}. ${criterion}`).join("\n")) || "No criteria provided";
844
- return `
845
- <role>
846
- You are an LLM as a judge watching a simulated conversation as it plays out live to determine if the agent under test meets the criteria or not.
847
- </role>
848
-
849
- <goal>
850
- Your goal is to determine if you already have enough information to make a verdict of the scenario below, or if the conversation should continue for longer.
851
- If you do have enough information, use the finish_test tool to determine if all the criteria have been met, if not, use the continue_test tool to let the next step play out.
852
- </goal>
853
-
854
- <scenario>
855
- ${description}
856
- </scenario>
1499
+ function getParentSpanId(span) {
1500
+ if (span.parentSpanId) return span.parentSpanId;
1501
+ const legacy = span.parentSpanContext;
1502
+ return legacy == null ? void 0 : legacy.spanId;
1503
+ }
1504
+ var judgeSpanCollector = new JudgeSpanCollector();
857
1505
 
858
- <criteria>
859
- ${criteriaList}
860
- </criteria>
1506
+ // src/agents/judge/judge-span-digest-formatter.ts
1507
+ var import_observability2 = require("langwatch/observability");
861
1508
 
862
- <rules>
863
- - Be strict, do not let the conversation continue if the agent already broke one of the "do not" or "should not" criteria.
864
- - DO NOT make any judgment calls that are not explicitly listed in the success or failure criteria, withhold judgement if necessary
865
- </rules>
866
- `.trim();
867
- }
868
- function buildContinueTestTool() {
869
- return (0, import_ai2.tool)({
870
- description: "Continue the test with the next step",
871
- inputSchema: import_v44.z.object({})
872
- });
873
- }
874
- function buildFinishTestTool(criteria) {
875
- const criteriaNames = criteria.map(criterionToParamName);
876
- return (0, import_ai2.tool)({
877
- description: "Complete the test with a final verdict",
878
- inputSchema: import_v44.z.object({
879
- criteria: import_v44.z.object(
880
- Object.fromEntries(
881
- criteriaNames.map((name, idx) => [
882
- name,
883
- import_v44.z.enum(["true", "false", "inconclusive"]).describe(criteria[idx])
884
- ])
885
- )
886
- ).strict().describe("Strict verdict for each criterion"),
887
- reasoning: import_v44.z.string().describe("Explanation of what the final verdict should be"),
888
- verdict: import_v44.z.enum(["success", "failure", "inconclusive"]).describe("The final verdict of the test")
889
- })
890
- });
1509
+ // src/agents/judge/deep-transform.ts
1510
+ function deepTransform(value, fn) {
1511
+ const result = fn(value);
1512
+ if (result !== value) return result;
1513
+ if (Array.isArray(value)) {
1514
+ return value.map((v) => deepTransform(v, fn));
1515
+ }
1516
+ if (value !== null && typeof value === "object") {
1517
+ const out = {};
1518
+ for (const [k, v] of Object.entries(value)) {
1519
+ out[k] = deepTransform(v, fn);
1520
+ }
1521
+ return out;
1522
+ }
1523
+ return value;
891
1524
  }
892
- var JudgeAgent = class extends JudgeAgentAdapter {
893
- constructor(cfg) {
894
- super();
895
- this.cfg = cfg;
896
- this.criteria = cfg.criteria;
897
- this.spanCollector = cfg.spanCollector ?? judgeSpanCollector;
1525
+
1526
+ // src/agents/judge/string-deduplicator.ts
1527
+ var StringDeduplicator = class {
1528
+ seen = /* @__PURE__ */ new Map();
1529
+ threshold;
1530
+ constructor(params) {
1531
+ this.threshold = params.threshold;
898
1532
  }
899
- logger = new Logger("JudgeAgent");
900
- spanCollector;
901
- role = "Judge" /* JUDGE */;
902
- criteria;
903
1533
  /**
904
- * LLM invocation function. Can be overridden to customize LLM behavior.
1534
+ * Resets seen strings for a new digest.
905
1535
  */
906
- invokeLLM = createLLMInvoker(this.logger);
907
- async call(input) {
908
- var _a, _b, _c;
909
- this.logger.debug("call() invoked", {
910
- threadId: input.threadId,
911
- currentTurn: input.scenarioState.currentTurn,
912
- maxTurns: input.scenarioConfig.maxTurns,
913
- judgmentRequest: input.judgmentRequest
914
- });
915
- const digest = this.getOpenTelemetryTracesDigest(input.threadId);
916
- this.logger.debug("OpenTelemetry traces built", { digest });
917
- const transcript = JudgeUtils.buildTranscriptFromMessages(input.messages);
918
- const contentForJudge = `
919
- <transcript>
920
- ${transcript}
921
- </transcript>
922
- <opentelemetry_traces>
923
- ${digest}
924
- </opentelemetry_traces>
925
- `;
926
- const cfg = this.cfg;
927
- const systemPrompt = cfg.systemPrompt ?? buildSystemPrompt(cfg.criteria, input.scenarioConfig.description);
928
- const messages = [
929
- { role: "system", content: systemPrompt },
930
- { role: "user", content: contentForJudge }
931
- ];
932
- const isLastMessage = input.scenarioState.currentTurn === input.scenarioConfig.maxTurns;
933
- const projectConfig = await getProjectConfig();
934
- const mergedConfig = modelSchema.parse({
935
- ...projectConfig == null ? void 0 : projectConfig.defaultModel,
936
- ...cfg
937
- });
938
- const tools = {
939
- continue_test: buildContinueTestTool(),
940
- finish_test: buildFinishTestTool(cfg.criteria)
1536
+ reset() {
1537
+ this.seen.clear();
1538
+ }
1539
+ /**
1540
+ * Processes a string, returning duplicate marker if seen before.
1541
+ * @param str - String to process
1542
+ * @returns Original string or duplicate marker
1543
+ */
1544
+ process(str) {
1545
+ if (str.length < this.threshold) return str;
1546
+ const key = this.normalize(str);
1547
+ if (this.seen.has(key)) return "[DUPLICATE - SEE ABOVE]";
1548
+ this.seen.set(key, true);
1549
+ return str;
1550
+ }
1551
+ /**
1552
+ * Normalizes string for comparison (whitespace, case).
1553
+ */
1554
+ normalize(str) {
1555
+ return str.replace(/\\[nrt]/g, " ").replace(/[\n\r\t]/g, " ").replace(/\s+/g, " ").trim().toLowerCase();
1556
+ }
1557
+ };
1558
+
1559
+ // src/agents/judge/truncate-media.ts
1560
+ function truncateMediaUrl(str) {
1561
+ const match = str.match(
1562
+ /^data:((image|audio|video)\/[a-z0-9+.-]+);base64,(.+)$/i
1563
+ );
1564
+ if (!match) return str;
1565
+ const [, mimeType, category, data] = match;
1566
+ return `[${category.toUpperCase()}: ${mimeType}, ~${data.length} bytes]`;
1567
+ }
1568
+ function truncateMediaPart(v) {
1569
+ var _a;
1570
+ if (v === null || typeof v !== "object" || Array.isArray(v)) return null;
1571
+ const obj = v;
1572
+ if (obj.type === "file" && typeof obj.mediaType === "string" && typeof obj.data === "string") {
1573
+ const category = ((_a = obj.mediaType.split("/")[0]) == null ? void 0 : _a.toUpperCase()) ?? "FILE";
1574
+ return {
1575
+ ...obj,
1576
+ data: `[${category}: ${obj.mediaType}, ~${obj.data.length} bytes]`
941
1577
  };
942
- const enforceJudgement = input.judgmentRequest;
943
- const hasCriteria = cfg.criteria.length && cfg.criteria.length > 0;
944
- if (enforceJudgement && !hasCriteria) {
1578
+ }
1579
+ if (obj.type === "image" && typeof obj.image === "string") {
1580
+ const imageData = obj.image;
1581
+ const dataUrlMatch = imageData.match(
1582
+ /^data:((image)\/[a-z0-9+.-]+);base64,(.+)$/i
1583
+ );
1584
+ if (dataUrlMatch) {
945
1585
  return {
946
- success: false,
947
- reasoning: "JudgeAgent: No criteria was provided to be judged against",
948
- metCriteria: [],
949
- unmetCriteria: []
1586
+ ...obj,
1587
+ image: `[IMAGE: ${dataUrlMatch[1]}, ~${dataUrlMatch[3].length} bytes]`
950
1588
  };
951
1589
  }
952
- const toolChoice = (isLastMessage || enforceJudgement) && hasCriteria ? { type: "tool", toolName: "finish_test" } : "required";
953
- this.logger.debug("Calling LLM", {
954
- model: mergedConfig.model,
955
- toolChoice,
956
- isLastMessage,
957
- enforceJudgement
958
- });
959
- const completion = await this.invokeLLM({
960
- model: mergedConfig.model,
961
- messages,
962
- temperature: mergedConfig.temperature ?? 0,
963
- maxOutputTokens: mergedConfig.maxTokens,
964
- tools,
965
- toolChoice
966
- });
967
- this.logger.debug("LLM response received", {
968
- toolCallCount: ((_a = completion.toolCalls) == null ? void 0 : _a.length) ?? 0,
969
- toolCalls: (_b = completion.toolCalls) == null ? void 0 : _b.map((tc) => ({
970
- toolName: tc.toolName,
971
- args: tc.input
972
- }))
973
- });
974
- let args;
975
- if ((_c = completion.toolCalls) == null ? void 0 : _c.length) {
976
- const toolCall = completion.toolCalls[0];
977
- switch (toolCall.toolName) {
978
- case "finish_test": {
979
- args = toolCall.input;
980
- const verdict = args.verdict || "inconclusive";
981
- const reasoning = args.reasoning || "No reasoning provided";
982
- const criteria = args.criteria || {};
983
- const criteriaValues = Object.values(criteria);
984
- const metCriteria = cfg.criteria.filter(
985
- (_, i) => criteriaValues[i] === "true"
986
- );
987
- const unmetCriteria = cfg.criteria.filter(
988
- (_, i) => criteriaValues[i] !== "true"
989
- );
990
- const result = {
991
- success: verdict === "success",
992
- reasoning,
993
- metCriteria,
994
- unmetCriteria
995
- };
996
- this.logger.debug("finish_test result", result);
997
- return result;
998
- }
999
- case "continue_test":
1000
- this.logger.debug("continue_test - proceeding to next turn");
1001
- return null;
1002
- default:
1003
- return {
1004
- success: false,
1005
- reasoning: `JudgeAgent: Unknown tool call: ${toolCall.toolName}`,
1006
- metCriteria: [],
1007
- unmetCriteria: cfg.criteria
1008
- };
1009
- }
1590
+ if (imageData.length > 1e3 && /^[A-Za-z0-9+/=]+$/.test(imageData)) {
1591
+ return {
1592
+ ...obj,
1593
+ image: `[IMAGE: unknown, ~${imageData.length} bytes]`
1594
+ };
1010
1595
  }
1011
- return {
1012
- success: false,
1013
- reasoning: `JudgeAgent: No tool call found in LLM output`,
1014
- metCriteria: [],
1015
- unmetCriteria: cfg.criteria
1016
- };
1017
- }
1018
- getOpenTelemetryTracesDigest(threadId) {
1019
- const spans = this.spanCollector.getSpansForThread(threadId);
1020
- const digest = judgeSpanDigestFormatter.format(spans);
1021
- return digest;
1022
1596
  }
1023
- };
1024
- var judgeAgent = (cfg) => {
1025
- return new JudgeAgent(cfg);
1026
- };
1597
+ return null;
1598
+ }
1027
1599
 
1028
- // src/agents/user-simulator-agent.ts
1029
- function buildSystemPrompt2(description) {
1030
- return `
1031
- <role>
1032
- You are pretending to be a user, you are testing an AI Agent (shown as the user role) based on a scenario.
1033
- Approach this naturally, as a human user would, with very short inputs, few words, all lowercase, imperative, not periods, like when they google or talk to chatgpt.
1600
+ // src/agents/judge/judge-span-digest-formatter.ts
1601
+ var JudgeSpanDigestFormatter = class {
1602
+ logger = new Logger("JudgeSpanDigestFormatter");
1603
+ deduplicator = new StringDeduplicator({ threshold: 50 });
1604
+ /**
1605
+ * Formats spans into a complete digest with full content and nesting.
1606
+ * @param spans - All spans for a thread
1607
+ * @returns Plain text digest
1608
+ */
1609
+ format(spans) {
1610
+ this.deduplicator.reset();
1611
+ this.logger.debug("format() called", {
1612
+ spanCount: spans.length,
1613
+ spanNames: spans.map((s) => s.name)
1614
+ });
1615
+ if (spans.length === 0) {
1616
+ this.logger.debug("No spans to format");
1617
+ return "No spans recorded.";
1618
+ }
1619
+ const sortedSpans = this.sortByStartTime(spans);
1620
+ const tree = this.buildHierarchy(sortedSpans);
1621
+ const totalDuration = this.calculateTotalDuration(sortedSpans);
1622
+ this.logger.debug("Hierarchy built", {
1623
+ rootCount: tree.length,
1624
+ totalDuration
1625
+ });
1626
+ const lines = [
1627
+ `Spans: ${spans.length} | Total Duration: ${this.formatDuration(
1628
+ totalDuration
1629
+ )}`,
1630
+ ""
1631
+ ];
1632
+ let sequence = 1;
1633
+ const rootCount = tree.length;
1634
+ tree.forEach((node, idx) => {
1635
+ sequence = this.renderNode(
1636
+ node,
1637
+ lines,
1638
+ 0,
1639
+ sequence,
1640
+ idx === rootCount - 1
1641
+ );
1642
+ });
1643
+ const errors = this.collectErrors(spans);
1644
+ if (errors.length > 0) {
1645
+ lines.push("");
1646
+ lines.push("=== ERRORS ===");
1647
+ errors.forEach((e) => lines.push(e));
1648
+ }
1649
+ return lines.join("\n");
1650
+ }
1651
+ sortByStartTime(spans) {
1652
+ return [...spans].sort((a, b) => {
1653
+ const aTime = this.hrTimeToMs(a.startTime);
1654
+ const bTime = this.hrTimeToMs(b.startTime);
1655
+ return aTime - bTime;
1656
+ });
1657
+ }
1658
+ buildHierarchy(spans) {
1659
+ const spanMap = /* @__PURE__ */ new Map();
1660
+ const roots = [];
1661
+ for (const span of spans) {
1662
+ spanMap.set(span.spanContext().spanId, { span, children: [] });
1663
+ }
1664
+ for (const span of spans) {
1665
+ const node = spanMap.get(span.spanContext().spanId);
1666
+ const parentId = getParentSpanId2(span);
1667
+ if (parentId && spanMap.has(parentId)) {
1668
+ spanMap.get(parentId).children.push(node);
1669
+ } else {
1670
+ roots.push(node);
1671
+ }
1672
+ }
1673
+ return roots;
1674
+ }
1675
+ renderNode(node, lines, depth, sequence, isLast = true) {
1676
+ const span = node.span;
1677
+ const duration = this.calculateSpanDuration(span);
1678
+ const timestamp = this.formatTimestamp(span.startTime);
1679
+ const status = this.getStatusIndicator(span);
1680
+ const prefix = this.getTreePrefix(depth, isLast);
1681
+ lines.push(
1682
+ `${prefix}[${sequence}] ${new Date(timestamp).toISOString()} ${span.name} (${this.formatDuration(duration)})${status}`
1683
+ );
1684
+ const attrIndent = this.getAttrIndent(depth, isLast);
1685
+ const attrs = this.cleanAttributes(span.attributes);
1686
+ if (Object.keys(attrs).length > 0) {
1687
+ for (const [key, value] of Object.entries(attrs)) {
1688
+ lines.push(`${attrIndent}${key}: ${this.formatValue(value)}`);
1689
+ }
1690
+ }
1691
+ if (span.events.length > 0) {
1692
+ for (const event of span.events) {
1693
+ lines.push(`${attrIndent}[event] ${event.name}`);
1694
+ if (event.attributes) {
1695
+ const eventAttrs = this.cleanAttributes(event.attributes);
1696
+ for (const [key, value] of Object.entries(eventAttrs)) {
1697
+ lines.push(`${attrIndent} ${key}: ${this.formatValue(value)}`);
1698
+ }
1699
+ }
1700
+ }
1701
+ }
1702
+ lines.push("");
1703
+ let nextSeq = sequence + 1;
1704
+ const childCount = node.children.length;
1705
+ node.children.forEach((child, idx) => {
1706
+ nextSeq = this.renderNode(
1707
+ child,
1708
+ lines,
1709
+ depth + 1,
1710
+ nextSeq,
1711
+ idx === childCount - 1
1712
+ );
1713
+ });
1714
+ return nextSeq;
1715
+ }
1716
+ getTreePrefix(depth, isLast) {
1717
+ if (depth === 0) return "";
1718
+ const connector = isLast ? "\u2514\u2500\u2500 " : "\u251C\u2500\u2500 ";
1719
+ return "\u2502 ".repeat(depth - 1) + connector;
1720
+ }
1721
+ getAttrIndent(depth, isLast) {
1722
+ if (depth === 0) return " ";
1723
+ const continuation = isLast ? " " : "\u2502 ";
1724
+ return "\u2502 ".repeat(depth - 1) + continuation + " ";
1725
+ }
1726
+ cleanAttributes(attrs) {
1727
+ const cleaned = {};
1728
+ const seen = /* @__PURE__ */ new Set();
1729
+ const excludedKeys = [
1730
+ import_observability2.attributes.ATTR_LANGWATCH_THREAD_ID,
1731
+ "langwatch.scenario.id",
1732
+ "langwatch.scenario.name"
1733
+ ];
1734
+ for (const [key, value] of Object.entries(attrs)) {
1735
+ if (excludedKeys.includes(key)) {
1736
+ continue;
1737
+ }
1738
+ const cleanKey = key.replace(/^(langwatch)\./, "");
1739
+ if (!seen.has(cleanKey)) {
1740
+ seen.add(cleanKey);
1741
+ cleaned[cleanKey] = value;
1742
+ }
1743
+ }
1744
+ return cleaned;
1745
+ }
1746
+ formatValue(value) {
1747
+ const processed = this.transformValue(value);
1748
+ return typeof processed === "string" ? processed : JSON.stringify(processed);
1749
+ }
1750
+ transformValue(value) {
1751
+ return deepTransform(value, (v) => {
1752
+ const mediaPart = truncateMediaPart(v);
1753
+ if (mediaPart) return mediaPart;
1754
+ if (typeof v !== "string") return v;
1755
+ return this.transformString(v);
1756
+ });
1757
+ }
1758
+ transformString(str) {
1759
+ if (this.looksLikeJson(str)) {
1760
+ try {
1761
+ const processed = this.transformValue(JSON.parse(str));
1762
+ return JSON.stringify(processed);
1763
+ } catch {
1764
+ }
1765
+ }
1766
+ const truncated = truncateMediaUrl(str);
1767
+ if (truncated !== str) return truncated;
1768
+ return this.deduplicator.process(str);
1769
+ }
1770
+ looksLikeJson(str) {
1771
+ const t = str.trim();
1772
+ return t.startsWith("{") && t.endsWith("}") || t.startsWith("[") && t.endsWith("]");
1773
+ }
1774
+ hrTimeToMs(hrTime) {
1775
+ return hrTime[0] * 1e3 + hrTime[1] / 1e6;
1776
+ }
1777
+ calculateSpanDuration(span) {
1778
+ return this.hrTimeToMs(span.endTime) - this.hrTimeToMs(span.startTime);
1779
+ }
1780
+ calculateTotalDuration(spans) {
1781
+ if (spans.length === 0) return 0;
1782
+ const first = this.hrTimeToMs(spans[0].startTime);
1783
+ const last = Math.max(...spans.map((s) => this.hrTimeToMs(s.endTime)));
1784
+ return last - first;
1785
+ }
1786
+ formatDuration(ms) {
1787
+ if (ms < 1e3) return `${Math.round(ms)}ms`;
1788
+ return `${(ms / 1e3).toFixed(2)}s`;
1789
+ }
1790
+ formatTimestamp(hrTime) {
1791
+ const ms = this.hrTimeToMs(hrTime);
1792
+ return new Date(ms).toISOString();
1793
+ }
1794
+ getStatusIndicator(span) {
1795
+ if (span.status.code === 2) {
1796
+ return ` \u26A0\uFE0F ERROR: ${span.status.message ?? "unknown"}`;
1797
+ }
1798
+ return "";
1799
+ }
1800
+ collectErrors(spans) {
1801
+ return spans.filter((s) => s.status.code === 2).map((s) => `- ${s.name}: ${s.status.message ?? "unknown error"}`);
1802
+ }
1803
+ };
1804
+ function getParentSpanId2(span) {
1805
+ if (span.parentSpanId) return span.parentSpanId;
1806
+ const legacy = span.parentSpanContext;
1807
+ return legacy == null ? void 0 : legacy.spanId;
1808
+ }
1809
+ var judgeSpanDigestFormatter = new JudgeSpanDigestFormatter();
1810
+
1811
+ // src/agents/judge/judge-agent.ts
1812
+ function buildSystemPrompt(criteria, description) {
1813
+ const criteriaList = (criteria == null ? void 0 : criteria.map((criterion, idx) => `${idx + 1}. ${criterion}`).join("\n")) || "No criteria provided";
1814
+ return `
1815
+ <role>
1816
+ You are an LLM as a judge watching a simulated conversation as it plays out live to determine if the agent under test meets the criteria or not.
1034
1817
  </role>
1035
1818
 
1036
1819
  <goal>
1037
- Your goal (assistant) is to interact with the Agent Under Test (user) as if you were a human user to see if it can complete the scenario successfully.
1820
+ Your goal is to determine if you already have enough information to make a verdict of the scenario below, or if the conversation should continue for longer.
1821
+ If you do have enough information, use the finish_test tool to determine if all the criteria have been met, if not, use the continue_test tool to let the next step play out.
1038
1822
  </goal>
1039
1823
 
1040
1824
  <scenario>
1041
1825
  ${description}
1042
1826
  </scenario>
1043
1827
 
1828
+ <criteria>
1829
+ ${criteriaList}
1830
+ </criteria>
1831
+
1044
1832
  <rules>
1045
- - DO NOT carry over any requests yourself, YOU ARE NOT the assistant today, you are the user
1833
+ - Be strict, do not let the conversation continue if the agent already broke one of the "do not" or "should not" criteria.
1834
+ - DO NOT make any judgment calls that are not explicitly listed in the success or failure criteria, withhold judgement if necessary
1046
1835
  </rules>
1047
1836
  `.trim();
1048
1837
  }
1049
- var UserSimulatorAgent = class extends UserSimulatorAgentAdapter {
1838
+ function buildContinueTestTool() {
1839
+ return (0, import_ai2.tool)({
1840
+ description: "Continue the test with the next step",
1841
+ inputSchema: import_v44.z.object({})
1842
+ });
1843
+ }
1844
+ function buildFinishTestTool(criteria) {
1845
+ const criteriaNames = criteria.map(criterionToParamName);
1846
+ return (0, import_ai2.tool)({
1847
+ description: "Complete the test with a final verdict",
1848
+ inputSchema: import_v44.z.object({
1849
+ criteria: import_v44.z.object(
1850
+ Object.fromEntries(
1851
+ criteriaNames.map((name, idx) => [
1852
+ name,
1853
+ import_v44.z.enum(["true", "false", "inconclusive"]).describe(criteria[idx])
1854
+ ])
1855
+ )
1856
+ ).strict().describe("Strict verdict for each criterion"),
1857
+ reasoning: import_v44.z.string().describe("Explanation of what the final verdict should be"),
1858
+ verdict: import_v44.z.enum(["success", "failure", "inconclusive"]).describe("The final verdict of the test")
1859
+ })
1860
+ });
1861
+ }
1862
+ var JudgeAgent = class extends JudgeAgentAdapter {
1050
1863
  constructor(cfg) {
1051
1864
  super();
1052
1865
  this.cfg = cfg;
1866
+ this.criteria = cfg.criteria ?? [];
1867
+ this.spanCollector = cfg.spanCollector ?? judgeSpanCollector;
1053
1868
  }
1054
- logger = new Logger(this.constructor.name);
1869
+ logger = new Logger("JudgeAgent");
1870
+ spanCollector;
1871
+ role = "Judge" /* JUDGE */;
1872
+ criteria;
1873
+ /**
1874
+ * LLM invocation function. Can be overridden to customize LLM behavior.
1875
+ */
1876
+ invokeLLM = createLLMInvoker(this.logger);
1877
+ async call(input) {
1878
+ var _a, _b, _c, _d;
1879
+ const criteria = ((_a = input.judgmentRequest) == null ? void 0 : _a.criteria) ?? this.criteria;
1880
+ this.logger.debug("call() invoked", {
1881
+ threadId: input.threadId,
1882
+ currentTurn: input.scenarioState.currentTurn,
1883
+ maxTurns: input.scenarioConfig.maxTurns,
1884
+ judgmentRequest: input.judgmentRequest
1885
+ });
1886
+ const digest = this.getOpenTelemetryTracesDigest(input.threadId);
1887
+ this.logger.debug("OpenTelemetry traces built", { digest });
1888
+ const transcript = JudgeUtils.buildTranscriptFromMessages(input.messages);
1889
+ const contentForJudge = `
1890
+ <transcript>
1891
+ ${transcript}
1892
+ </transcript>
1893
+ <opentelemetry_traces>
1894
+ ${digest}
1895
+ </opentelemetry_traces>
1896
+ `;
1897
+ const cfg = this.cfg;
1898
+ const systemPrompt = cfg.systemPrompt ?? buildSystemPrompt(criteria, input.scenarioConfig.description);
1899
+ const messages = [
1900
+ { role: "system", content: systemPrompt },
1901
+ { role: "user", content: contentForJudge }
1902
+ ];
1903
+ const isLastMessage = input.scenarioState.currentTurn === input.scenarioConfig.maxTurns;
1904
+ const projectConfig = await getProjectConfig();
1905
+ const mergedConfig = modelSchema.parse({
1906
+ ...projectConfig == null ? void 0 : projectConfig.defaultModel,
1907
+ ...cfg
1908
+ });
1909
+ const tools = {
1910
+ continue_test: buildContinueTestTool(),
1911
+ finish_test: buildFinishTestTool(criteria)
1912
+ };
1913
+ const enforceJudgement = input.judgmentRequest != null;
1914
+ const hasCriteria = criteria.length && criteria.length > 0;
1915
+ if (enforceJudgement && !hasCriteria) {
1916
+ return {
1917
+ success: false,
1918
+ reasoning: "JudgeAgent: No criteria was provided to be judged against",
1919
+ metCriteria: [],
1920
+ unmetCriteria: []
1921
+ };
1922
+ }
1923
+ const toolChoice = (isLastMessage || enforceJudgement) && hasCriteria ? { type: "tool", toolName: "finish_test" } : "required";
1924
+ this.logger.debug("Calling LLM", {
1925
+ model: mergedConfig.model,
1926
+ toolChoice,
1927
+ isLastMessage,
1928
+ enforceJudgement
1929
+ });
1930
+ const completion = await this.invokeLLM({
1931
+ model: mergedConfig.model,
1932
+ messages,
1933
+ temperature: mergedConfig.temperature ?? 0,
1934
+ maxOutputTokens: mergedConfig.maxTokens,
1935
+ tools,
1936
+ toolChoice
1937
+ });
1938
+ this.logger.debug("LLM response received", {
1939
+ toolCallCount: ((_b = completion.toolCalls) == null ? void 0 : _b.length) ?? 0,
1940
+ toolCalls: (_c = completion.toolCalls) == null ? void 0 : _c.map((tc) => ({
1941
+ toolName: tc.toolName,
1942
+ args: tc.input
1943
+ }))
1944
+ });
1945
+ let args;
1946
+ if ((_d = completion.toolCalls) == null ? void 0 : _d.length) {
1947
+ const toolCall = completion.toolCalls[0];
1948
+ switch (toolCall.toolName) {
1949
+ case "finish_test": {
1950
+ args = toolCall.input;
1951
+ const verdict = args.verdict || "inconclusive";
1952
+ const reasoning = args.reasoning || "No reasoning provided";
1953
+ const criteriaArgs = args.criteria || {};
1954
+ const criteriaValues = Object.values(criteriaArgs);
1955
+ const metCriteria = criteria.filter(
1956
+ (_, i) => criteriaValues[i] === "true"
1957
+ );
1958
+ const unmetCriteria = criteria.filter(
1959
+ (_, i) => criteriaValues[i] !== "true"
1960
+ );
1961
+ const result = {
1962
+ success: verdict === "success",
1963
+ reasoning,
1964
+ metCriteria,
1965
+ unmetCriteria
1966
+ };
1967
+ this.logger.debug("finish_test result", result);
1968
+ return result;
1969
+ }
1970
+ case "continue_test":
1971
+ this.logger.debug("continue_test - proceeding to next turn");
1972
+ return null;
1973
+ default:
1974
+ return {
1975
+ success: false,
1976
+ reasoning: `JudgeAgent: Unknown tool call: ${toolCall.toolName}`,
1977
+ metCriteria: [],
1978
+ unmetCriteria: criteria
1979
+ };
1980
+ }
1981
+ }
1982
+ return {
1983
+ success: false,
1984
+ reasoning: `JudgeAgent: No tool call found in LLM output`,
1985
+ metCriteria: [],
1986
+ unmetCriteria: criteria
1987
+ };
1988
+ }
1989
+ getOpenTelemetryTracesDigest(threadId) {
1990
+ const spans = this.spanCollector.getSpansForThread(threadId);
1991
+ const digest = judgeSpanDigestFormatter.format(spans);
1992
+ return digest;
1993
+ }
1994
+ };
1995
+ var judgeAgent = (cfg) => {
1996
+ return new JudgeAgent(cfg ?? {});
1997
+ };
1998
+
1999
+ // src/agents/user-simulator-agent.ts
2000
+ function buildSystemPrompt2(description) {
2001
+ return `
2002
+ <role>
2003
+ You are pretending to be a user, you are testing an AI Agent (shown as the user role) based on a scenario.
2004
+ Approach this naturally, as a human user would, with very short inputs, few words, all lowercase, imperative, not periods, like when they google or talk to chatgpt.
2005
+ </role>
2006
+
2007
+ <goal>
2008
+ Your goal (assistant) is to interact with the Agent Under Test (user) as if you were a human user to see if it can complete the scenario successfully.
2009
+ </goal>
2010
+
2011
+ <scenario>
2012
+ ${description}
2013
+ </scenario>
2014
+
2015
+ <rules>
2016
+ - DO NOT carry over any requests yourself, YOU ARE NOT the assistant today, you are the user
2017
+ </rules>
2018
+ `.trim();
2019
+ }
2020
+ var UserSimulatorAgent = class extends UserSimulatorAgentAdapter {
2021
+ constructor(cfg) {
2022
+ super();
2023
+ this.cfg = cfg;
2024
+ }
2025
+ logger = new Logger(this.constructor.name);
1055
2026
  /**
1056
2027
  * LLM invocation function. Can be overridden to customize LLM behavior.
1057
2028
  */
@@ -1379,813 +2350,69 @@ var RealtimeAgentAdapter = class extends AgentAdapter {
1379
2350
  this.audioEvents.emit("audioResponse", response);
1380
2351
  return this.responseFormatter.formatInitialResponse(response);
1381
2352
  }
1382
- /**
1383
- * Handles audio input from the user
1384
- */
1385
- async handleAudioInput(audioData) {
1386
- const sessionWithTransport = this.session;
1387
- const transport = sessionWithTransport.transport;
1388
- if (!transport) {
1389
- throw new Error("Realtime transport not available");
1390
- }
1391
- transport.sendEvent({
1392
- type: "input_audio_buffer.append",
1393
- audio: audioData
1394
- });
1395
- transport.sendEvent({
1396
- type: "input_audio_buffer.commit"
1397
- });
1398
- transport.sendEvent({
1399
- type: "response.create"
1400
- });
1401
- const timeout = this.config.responseTimeout ?? 6e4;
1402
- const response = await this.eventHandler.waitForResponse(timeout);
1403
- this.audioEvents.emit("audioResponse", response);
1404
- return this.responseFormatter.formatAudioResponse(response);
1405
- }
1406
- /**
1407
- * Handles text input from the user
1408
- */
1409
- async handleTextInput(text) {
1410
- this.session.sendMessage(text);
1411
- const timeout = this.config.responseTimeout ?? 3e4;
1412
- const response = await this.eventHandler.waitForResponse(timeout);
1413
- this.audioEvents.emit("audioResponse", response);
1414
- return this.responseFormatter.formatTextResponse(response.transcript);
1415
- }
1416
- /**
1417
- * Subscribe to audio response events
1418
- *
1419
- * @param callback - Function called when an audio response completes
1420
- */
1421
- onAudioResponse(callback) {
1422
- this.audioEvents.on("audioResponse", callback);
1423
- }
1424
- /**
1425
- * Remove audio response listener
1426
- *
1427
- * @param callback - The callback function to remove
1428
- */
1429
- offAudioResponse(callback) {
1430
- this.audioEvents.off("audioResponse", callback);
1431
- }
1432
- };
1433
-
1434
- // src/execution/index.ts
1435
- var execution_exports = {};
1436
- __export(execution_exports, {
1437
- ScenarioExecution: () => ScenarioExecution,
1438
- ScenarioExecutionState: () => ScenarioExecutionState,
1439
- StateChangeEventType: () => StateChangeEventType
1440
- });
1441
-
1442
- // node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/platform/node/globalThis.js
1443
- var _globalThis = typeof globalThis === "object" ? globalThis : global;
1444
-
1445
- // node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/version.js
1446
- var VERSION = "1.9.0";
1447
-
1448
- // node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/internal/semver.js
1449
- var re = /^(\d+)\.(\d+)\.(\d+)(-(.+))?$/;
1450
- function _makeCompatibilityCheck(ownVersion) {
1451
- var acceptedVersions = /* @__PURE__ */ new Set([ownVersion]);
1452
- var rejectedVersions = /* @__PURE__ */ new Set();
1453
- var myVersionMatch = ownVersion.match(re);
1454
- if (!myVersionMatch) {
1455
- return function() {
1456
- return false;
1457
- };
1458
- }
1459
- var ownVersionParsed = {
1460
- major: +myVersionMatch[1],
1461
- minor: +myVersionMatch[2],
1462
- patch: +myVersionMatch[3],
1463
- prerelease: myVersionMatch[4]
1464
- };
1465
- if (ownVersionParsed.prerelease != null) {
1466
- return function isExactmatch(globalVersion) {
1467
- return globalVersion === ownVersion;
1468
- };
1469
- }
1470
- function _reject(v) {
1471
- rejectedVersions.add(v);
1472
- return false;
1473
- }
1474
- function _accept(v) {
1475
- acceptedVersions.add(v);
1476
- return true;
1477
- }
1478
- return function isCompatible2(globalVersion) {
1479
- if (acceptedVersions.has(globalVersion)) {
1480
- return true;
1481
- }
1482
- if (rejectedVersions.has(globalVersion)) {
1483
- return false;
1484
- }
1485
- var globalVersionMatch = globalVersion.match(re);
1486
- if (!globalVersionMatch) {
1487
- return _reject(globalVersion);
1488
- }
1489
- var globalVersionParsed = {
1490
- major: +globalVersionMatch[1],
1491
- minor: +globalVersionMatch[2],
1492
- patch: +globalVersionMatch[3],
1493
- prerelease: globalVersionMatch[4]
1494
- };
1495
- if (globalVersionParsed.prerelease != null) {
1496
- return _reject(globalVersion);
1497
- }
1498
- if (ownVersionParsed.major !== globalVersionParsed.major) {
1499
- return _reject(globalVersion);
1500
- }
1501
- if (ownVersionParsed.major === 0) {
1502
- if (ownVersionParsed.minor === globalVersionParsed.minor && ownVersionParsed.patch <= globalVersionParsed.patch) {
1503
- return _accept(globalVersion);
1504
- }
1505
- return _reject(globalVersion);
1506
- }
1507
- if (ownVersionParsed.minor <= globalVersionParsed.minor) {
1508
- return _accept(globalVersion);
1509
- }
1510
- return _reject(globalVersion);
1511
- };
1512
- }
1513
- var isCompatible = _makeCompatibilityCheck(VERSION);
1514
-
1515
- // node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/internal/global-utils.js
1516
- var major = VERSION.split(".")[0];
1517
- var GLOBAL_OPENTELEMETRY_API_KEY = Symbol.for("opentelemetry.js.api." + major);
1518
- var _global = _globalThis;
1519
- function registerGlobal(type, instance, diag, allowOverride) {
1520
- var _a;
1521
- if (allowOverride === void 0) {
1522
- allowOverride = false;
1523
- }
1524
- var api = _global[GLOBAL_OPENTELEMETRY_API_KEY] = (_a = _global[GLOBAL_OPENTELEMETRY_API_KEY]) !== null && _a !== void 0 ? _a : {
1525
- version: VERSION
1526
- };
1527
- if (!allowOverride && api[type]) {
1528
- var err = new Error("@opentelemetry/api: Attempted duplicate registration of API: " + type);
1529
- diag.error(err.stack || err.message);
1530
- return false;
1531
- }
1532
- if (api.version !== VERSION) {
1533
- var err = new Error("@opentelemetry/api: Registration of version v" + api.version + " for " + type + " does not match previously registered API v" + VERSION);
1534
- diag.error(err.stack || err.message);
1535
- return false;
1536
- }
1537
- api[type] = instance;
1538
- diag.debug("@opentelemetry/api: Registered a global for " + type + " v" + VERSION + ".");
1539
- return true;
1540
- }
1541
- function getGlobal(type) {
1542
- var _a, _b;
1543
- var globalVersion = (_a = _global[GLOBAL_OPENTELEMETRY_API_KEY]) === null || _a === void 0 ? void 0 : _a.version;
1544
- if (!globalVersion || !isCompatible(globalVersion)) {
1545
- return;
1546
- }
1547
- return (_b = _global[GLOBAL_OPENTELEMETRY_API_KEY]) === null || _b === void 0 ? void 0 : _b[type];
1548
- }
1549
- function unregisterGlobal(type, diag) {
1550
- diag.debug("@opentelemetry/api: Unregistering a global for " + type + " v" + VERSION + ".");
1551
- var api = _global[GLOBAL_OPENTELEMETRY_API_KEY];
1552
- if (api) {
1553
- delete api[type];
1554
- }
1555
- }
1556
-
1557
- // node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/diag/ComponentLogger.js
1558
- var __read = function(o, n) {
1559
- var m = typeof Symbol === "function" && o[Symbol.iterator];
1560
- if (!m) return o;
1561
- var i = m.call(o), r, ar = [], e;
1562
- try {
1563
- while ((n === void 0 || n-- > 0) && !(r = i.next()).done) ar.push(r.value);
1564
- } catch (error) {
1565
- e = { error };
1566
- } finally {
1567
- try {
1568
- if (r && !r.done && (m = i["return"])) m.call(i);
1569
- } finally {
1570
- if (e) throw e.error;
1571
- }
1572
- }
1573
- return ar;
1574
- };
1575
- var __spreadArray = function(to, from, pack) {
1576
- if (pack || arguments.length === 2) for (var i = 0, l = from.length, ar; i < l; i++) {
1577
- if (ar || !(i in from)) {
1578
- if (!ar) ar = Array.prototype.slice.call(from, 0, i);
1579
- ar[i] = from[i];
1580
- }
1581
- }
1582
- return to.concat(ar || Array.prototype.slice.call(from));
1583
- };
1584
- var DiagComponentLogger = (
1585
- /** @class */
1586
- (function() {
1587
- function DiagComponentLogger2(props) {
1588
- this._namespace = props.namespace || "DiagComponentLogger";
1589
- }
1590
- DiagComponentLogger2.prototype.debug = function() {
1591
- var args = [];
1592
- for (var _i = 0; _i < arguments.length; _i++) {
1593
- args[_i] = arguments[_i];
1594
- }
1595
- return logProxy("debug", this._namespace, args);
1596
- };
1597
- DiagComponentLogger2.prototype.error = function() {
1598
- var args = [];
1599
- for (var _i = 0; _i < arguments.length; _i++) {
1600
- args[_i] = arguments[_i];
1601
- }
1602
- return logProxy("error", this._namespace, args);
1603
- };
1604
- DiagComponentLogger2.prototype.info = function() {
1605
- var args = [];
1606
- for (var _i = 0; _i < arguments.length; _i++) {
1607
- args[_i] = arguments[_i];
1608
- }
1609
- return logProxy("info", this._namespace, args);
1610
- };
1611
- DiagComponentLogger2.prototype.warn = function() {
1612
- var args = [];
1613
- for (var _i = 0; _i < arguments.length; _i++) {
1614
- args[_i] = arguments[_i];
1615
- }
1616
- return logProxy("warn", this._namespace, args);
1617
- };
1618
- DiagComponentLogger2.prototype.verbose = function() {
1619
- var args = [];
1620
- for (var _i = 0; _i < arguments.length; _i++) {
1621
- args[_i] = arguments[_i];
1622
- }
1623
- return logProxy("verbose", this._namespace, args);
1624
- };
1625
- return DiagComponentLogger2;
1626
- })()
1627
- );
1628
- function logProxy(funcName, namespace, args) {
1629
- var logger2 = getGlobal("diag");
1630
- if (!logger2) {
1631
- return;
1632
- }
1633
- args.unshift(namespace);
1634
- return logger2[funcName].apply(logger2, __spreadArray([], __read(args), false));
1635
- }
1636
-
1637
- // node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/diag/types.js
1638
- var DiagLogLevel;
1639
- (function(DiagLogLevel2) {
1640
- DiagLogLevel2[DiagLogLevel2["NONE"] = 0] = "NONE";
1641
- DiagLogLevel2[DiagLogLevel2["ERROR"] = 30] = "ERROR";
1642
- DiagLogLevel2[DiagLogLevel2["WARN"] = 50] = "WARN";
1643
- DiagLogLevel2[DiagLogLevel2["INFO"] = 60] = "INFO";
1644
- DiagLogLevel2[DiagLogLevel2["DEBUG"] = 70] = "DEBUG";
1645
- DiagLogLevel2[DiagLogLevel2["VERBOSE"] = 80] = "VERBOSE";
1646
- DiagLogLevel2[DiagLogLevel2["ALL"] = 9999] = "ALL";
1647
- })(DiagLogLevel || (DiagLogLevel = {}));
1648
-
1649
- // node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/diag/internal/logLevelLogger.js
1650
- function createLogLevelDiagLogger(maxLevel, logger2) {
1651
- if (maxLevel < DiagLogLevel.NONE) {
1652
- maxLevel = DiagLogLevel.NONE;
1653
- } else if (maxLevel > DiagLogLevel.ALL) {
1654
- maxLevel = DiagLogLevel.ALL;
1655
- }
1656
- logger2 = logger2 || {};
1657
- function _filterFunc(funcName, theLevel) {
1658
- var theFunc = logger2[funcName];
1659
- if (typeof theFunc === "function" && maxLevel >= theLevel) {
1660
- return theFunc.bind(logger2);
1661
- }
1662
- return function() {
1663
- };
1664
- }
1665
- return {
1666
- error: _filterFunc("error", DiagLogLevel.ERROR),
1667
- warn: _filterFunc("warn", DiagLogLevel.WARN),
1668
- info: _filterFunc("info", DiagLogLevel.INFO),
1669
- debug: _filterFunc("debug", DiagLogLevel.DEBUG),
1670
- verbose: _filterFunc("verbose", DiagLogLevel.VERBOSE)
1671
- };
1672
- }
1673
-
1674
- // node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/api/diag.js
1675
- var __read2 = function(o, n) {
1676
- var m = typeof Symbol === "function" && o[Symbol.iterator];
1677
- if (!m) return o;
1678
- var i = m.call(o), r, ar = [], e;
1679
- try {
1680
- while ((n === void 0 || n-- > 0) && !(r = i.next()).done) ar.push(r.value);
1681
- } catch (error) {
1682
- e = { error };
1683
- } finally {
1684
- try {
1685
- if (r && !r.done && (m = i["return"])) m.call(i);
1686
- } finally {
1687
- if (e) throw e.error;
1688
- }
1689
- }
1690
- return ar;
1691
- };
1692
- var __spreadArray2 = function(to, from, pack) {
1693
- if (pack || arguments.length === 2) for (var i = 0, l = from.length, ar; i < l; i++) {
1694
- if (ar || !(i in from)) {
1695
- if (!ar) ar = Array.prototype.slice.call(from, 0, i);
1696
- ar[i] = from[i];
1697
- }
1698
- }
1699
- return to.concat(ar || Array.prototype.slice.call(from));
1700
- };
1701
- var API_NAME = "diag";
1702
- var DiagAPI = (
1703
- /** @class */
1704
- (function() {
1705
- function DiagAPI2() {
1706
- function _logProxy(funcName) {
1707
- return function() {
1708
- var args = [];
1709
- for (var _i = 0; _i < arguments.length; _i++) {
1710
- args[_i] = arguments[_i];
1711
- }
1712
- var logger2 = getGlobal("diag");
1713
- if (!logger2)
1714
- return;
1715
- return logger2[funcName].apply(logger2, __spreadArray2([], __read2(args), false));
1716
- };
1717
- }
1718
- var self = this;
1719
- var setLogger = function(logger2, optionsOrLogLevel) {
1720
- var _a, _b, _c;
1721
- if (optionsOrLogLevel === void 0) {
1722
- optionsOrLogLevel = { logLevel: DiagLogLevel.INFO };
1723
- }
1724
- if (logger2 === self) {
1725
- var err = new Error("Cannot use diag as the logger for itself. Please use a DiagLogger implementation like ConsoleDiagLogger or a custom implementation");
1726
- self.error((_a = err.stack) !== null && _a !== void 0 ? _a : err.message);
1727
- return false;
1728
- }
1729
- if (typeof optionsOrLogLevel === "number") {
1730
- optionsOrLogLevel = {
1731
- logLevel: optionsOrLogLevel
1732
- };
1733
- }
1734
- var oldLogger = getGlobal("diag");
1735
- var newLogger = createLogLevelDiagLogger((_b = optionsOrLogLevel.logLevel) !== null && _b !== void 0 ? _b : DiagLogLevel.INFO, logger2);
1736
- if (oldLogger && !optionsOrLogLevel.suppressOverrideMessage) {
1737
- var stack = (_c = new Error().stack) !== null && _c !== void 0 ? _c : "<failed to generate stacktrace>";
1738
- oldLogger.warn("Current logger will be overwritten from " + stack);
1739
- newLogger.warn("Current logger will overwrite one already registered from " + stack);
1740
- }
1741
- return registerGlobal("diag", newLogger, self, true);
1742
- };
1743
- self.setLogger = setLogger;
1744
- self.disable = function() {
1745
- unregisterGlobal(API_NAME, self);
1746
- };
1747
- self.createComponentLogger = function(options) {
1748
- return new DiagComponentLogger(options);
1749
- };
1750
- self.verbose = _logProxy("verbose");
1751
- self.debug = _logProxy("debug");
1752
- self.info = _logProxy("info");
1753
- self.warn = _logProxy("warn");
1754
- self.error = _logProxy("error");
1755
- }
1756
- DiagAPI2.instance = function() {
1757
- if (!this._instance) {
1758
- this._instance = new DiagAPI2();
1759
- }
1760
- return this._instance;
1761
- };
1762
- return DiagAPI2;
1763
- })()
1764
- );
1765
-
1766
- // node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/context/context.js
1767
- function createContextKey(description) {
1768
- return Symbol.for(description);
1769
- }
1770
- var BaseContext = (
1771
- /** @class */
1772
- /* @__PURE__ */ (function() {
1773
- function BaseContext2(parentContext) {
1774
- var self = this;
1775
- self._currentContext = parentContext ? new Map(parentContext) : /* @__PURE__ */ new Map();
1776
- self.getValue = function(key) {
1777
- return self._currentContext.get(key);
1778
- };
1779
- self.setValue = function(key, value) {
1780
- var context2 = new BaseContext2(self._currentContext);
1781
- context2._currentContext.set(key, value);
1782
- return context2;
1783
- };
1784
- self.deleteValue = function(key) {
1785
- var context2 = new BaseContext2(self._currentContext);
1786
- context2._currentContext.delete(key);
1787
- return context2;
1788
- };
1789
- }
1790
- return BaseContext2;
1791
- })()
1792
- );
1793
- var ROOT_CONTEXT = new BaseContext();
1794
-
1795
- // node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/context/NoopContextManager.js
1796
- var __read3 = function(o, n) {
1797
- var m = typeof Symbol === "function" && o[Symbol.iterator];
1798
- if (!m) return o;
1799
- var i = m.call(o), r, ar = [], e;
1800
- try {
1801
- while ((n === void 0 || n-- > 0) && !(r = i.next()).done) ar.push(r.value);
1802
- } catch (error) {
1803
- e = { error };
1804
- } finally {
1805
- try {
1806
- if (r && !r.done && (m = i["return"])) m.call(i);
1807
- } finally {
1808
- if (e) throw e.error;
1809
- }
1810
- }
1811
- return ar;
1812
- };
1813
- var __spreadArray3 = function(to, from, pack) {
1814
- if (pack || arguments.length === 2) for (var i = 0, l = from.length, ar; i < l; i++) {
1815
- if (ar || !(i in from)) {
1816
- if (!ar) ar = Array.prototype.slice.call(from, 0, i);
1817
- ar[i] = from[i];
1818
- }
1819
- }
1820
- return to.concat(ar || Array.prototype.slice.call(from));
1821
- };
1822
- var NoopContextManager = (
1823
- /** @class */
1824
- (function() {
1825
- function NoopContextManager2() {
1826
- }
1827
- NoopContextManager2.prototype.active = function() {
1828
- return ROOT_CONTEXT;
1829
- };
1830
- NoopContextManager2.prototype.with = function(_context, fn, thisArg) {
1831
- var args = [];
1832
- for (var _i = 3; _i < arguments.length; _i++) {
1833
- args[_i - 3] = arguments[_i];
1834
- }
1835
- return fn.call.apply(fn, __spreadArray3([thisArg], __read3(args), false));
1836
- };
1837
- NoopContextManager2.prototype.bind = function(_context, target) {
1838
- return target;
1839
- };
1840
- NoopContextManager2.prototype.enable = function() {
1841
- return this;
1842
- };
1843
- NoopContextManager2.prototype.disable = function() {
1844
- return this;
1845
- };
1846
- return NoopContextManager2;
1847
- })()
1848
- );
1849
-
1850
- // node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/api/context.js
1851
- var __read4 = function(o, n) {
1852
- var m = typeof Symbol === "function" && o[Symbol.iterator];
1853
- if (!m) return o;
1854
- var i = m.call(o), r, ar = [], e;
1855
- try {
1856
- while ((n === void 0 || n-- > 0) && !(r = i.next()).done) ar.push(r.value);
1857
- } catch (error) {
1858
- e = { error };
1859
- } finally {
1860
- try {
1861
- if (r && !r.done && (m = i["return"])) m.call(i);
1862
- } finally {
1863
- if (e) throw e.error;
1864
- }
1865
- }
1866
- return ar;
1867
- };
1868
- var __spreadArray4 = function(to, from, pack) {
1869
- if (pack || arguments.length === 2) for (var i = 0, l = from.length, ar; i < l; i++) {
1870
- if (ar || !(i in from)) {
1871
- if (!ar) ar = Array.prototype.slice.call(from, 0, i);
1872
- ar[i] = from[i];
1873
- }
1874
- }
1875
- return to.concat(ar || Array.prototype.slice.call(from));
1876
- };
1877
- var API_NAME2 = "context";
1878
- var NOOP_CONTEXT_MANAGER = new NoopContextManager();
1879
- var ContextAPI = (
1880
- /** @class */
1881
- (function() {
1882
- function ContextAPI2() {
1883
- }
1884
- ContextAPI2.getInstance = function() {
1885
- if (!this._instance) {
1886
- this._instance = new ContextAPI2();
1887
- }
1888
- return this._instance;
1889
- };
1890
- ContextAPI2.prototype.setGlobalContextManager = function(contextManager) {
1891
- return registerGlobal(API_NAME2, contextManager, DiagAPI.instance());
1892
- };
1893
- ContextAPI2.prototype.active = function() {
1894
- return this._getContextManager().active();
1895
- };
1896
- ContextAPI2.prototype.with = function(context2, fn, thisArg) {
1897
- var _a;
1898
- var args = [];
1899
- for (var _i = 3; _i < arguments.length; _i++) {
1900
- args[_i - 3] = arguments[_i];
1901
- }
1902
- return (_a = this._getContextManager()).with.apply(_a, __spreadArray4([context2, fn, thisArg], __read4(args), false));
1903
- };
1904
- ContextAPI2.prototype.bind = function(context2, target) {
1905
- return this._getContextManager().bind(context2, target);
1906
- };
1907
- ContextAPI2.prototype._getContextManager = function() {
1908
- return getGlobal(API_NAME2) || NOOP_CONTEXT_MANAGER;
1909
- };
1910
- ContextAPI2.prototype.disable = function() {
1911
- this._getContextManager().disable();
1912
- unregisterGlobal(API_NAME2, DiagAPI.instance());
1913
- };
1914
- return ContextAPI2;
1915
- })()
1916
- );
1917
-
1918
- // node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/trace/trace_flags.js
1919
- var TraceFlags;
1920
- (function(TraceFlags2) {
1921
- TraceFlags2[TraceFlags2["NONE"] = 0] = "NONE";
1922
- TraceFlags2[TraceFlags2["SAMPLED"] = 1] = "SAMPLED";
1923
- })(TraceFlags || (TraceFlags = {}));
1924
-
1925
- // node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/trace/invalid-span-constants.js
1926
- var INVALID_SPANID = "0000000000000000";
1927
- var INVALID_TRACEID = "00000000000000000000000000000000";
1928
- var INVALID_SPAN_CONTEXT = {
1929
- traceId: INVALID_TRACEID,
1930
- spanId: INVALID_SPANID,
1931
- traceFlags: TraceFlags.NONE
1932
- };
1933
-
1934
- // node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/trace/NonRecordingSpan.js
1935
- var NonRecordingSpan = (
1936
- /** @class */
1937
- (function() {
1938
- function NonRecordingSpan2(_spanContext) {
1939
- if (_spanContext === void 0) {
1940
- _spanContext = INVALID_SPAN_CONTEXT;
1941
- }
1942
- this._spanContext = _spanContext;
1943
- }
1944
- NonRecordingSpan2.prototype.spanContext = function() {
1945
- return this._spanContext;
1946
- };
1947
- NonRecordingSpan2.prototype.setAttribute = function(_key, _value) {
1948
- return this;
1949
- };
1950
- NonRecordingSpan2.prototype.setAttributes = function(_attributes) {
1951
- return this;
1952
- };
1953
- NonRecordingSpan2.prototype.addEvent = function(_name, _attributes) {
1954
- return this;
1955
- };
1956
- NonRecordingSpan2.prototype.addLink = function(_link) {
1957
- return this;
1958
- };
1959
- NonRecordingSpan2.prototype.addLinks = function(_links) {
1960
- return this;
1961
- };
1962
- NonRecordingSpan2.prototype.setStatus = function(_status) {
1963
- return this;
1964
- };
1965
- NonRecordingSpan2.prototype.updateName = function(_name) {
1966
- return this;
1967
- };
1968
- NonRecordingSpan2.prototype.end = function(_endTime) {
1969
- };
1970
- NonRecordingSpan2.prototype.isRecording = function() {
1971
- return false;
1972
- };
1973
- NonRecordingSpan2.prototype.recordException = function(_exception, _time) {
1974
- };
1975
- return NonRecordingSpan2;
1976
- })()
1977
- );
1978
-
1979
- // node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/trace/context-utils.js
1980
- var SPAN_KEY = createContextKey("OpenTelemetry Context Key SPAN");
1981
- function getSpan(context2) {
1982
- return context2.getValue(SPAN_KEY) || void 0;
1983
- }
1984
- function getActiveSpan() {
1985
- return getSpan(ContextAPI.getInstance().active());
1986
- }
1987
- function setSpan(context2, span) {
1988
- return context2.setValue(SPAN_KEY, span);
1989
- }
1990
- function deleteSpan(context2) {
1991
- return context2.deleteValue(SPAN_KEY);
1992
- }
1993
- function setSpanContext(context2, spanContext) {
1994
- return setSpan(context2, new NonRecordingSpan(spanContext));
1995
- }
1996
- function getSpanContext(context2) {
1997
- var _a;
1998
- return (_a = getSpan(context2)) === null || _a === void 0 ? void 0 : _a.spanContext();
1999
- }
2000
-
2001
- // node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/trace/spancontext-utils.js
2002
- var VALID_TRACEID_REGEX = /^([0-9a-f]{32})$/i;
2003
- var VALID_SPANID_REGEX = /^[0-9a-f]{16}$/i;
2004
- function isValidTraceId(traceId) {
2005
- return VALID_TRACEID_REGEX.test(traceId) && traceId !== INVALID_TRACEID;
2006
- }
2007
- function isValidSpanId(spanId) {
2008
- return VALID_SPANID_REGEX.test(spanId) && spanId !== INVALID_SPANID;
2009
- }
2010
- function isSpanContextValid(spanContext) {
2011
- return isValidTraceId(spanContext.traceId) && isValidSpanId(spanContext.spanId);
2012
- }
2013
- function wrapSpanContext(spanContext) {
2014
- return new NonRecordingSpan(spanContext);
2015
- }
2016
-
2017
- // node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/trace/NoopTracer.js
2018
- var contextApi = ContextAPI.getInstance();
2019
- var NoopTracer = (
2020
- /** @class */
2021
- (function() {
2022
- function NoopTracer2() {
2023
- }
2024
- NoopTracer2.prototype.startSpan = function(name, options, context2) {
2025
- if (context2 === void 0) {
2026
- context2 = contextApi.active();
2027
- }
2028
- var root = Boolean(options === null || options === void 0 ? void 0 : options.root);
2029
- if (root) {
2030
- return new NonRecordingSpan();
2031
- }
2032
- var parentFromContext = context2 && getSpanContext(context2);
2033
- if (isSpanContext(parentFromContext) && isSpanContextValid(parentFromContext)) {
2034
- return new NonRecordingSpan(parentFromContext);
2035
- } else {
2036
- return new NonRecordingSpan();
2037
- }
2038
- };
2039
- NoopTracer2.prototype.startActiveSpan = function(name, arg2, arg3, arg4) {
2040
- var opts;
2041
- var ctx;
2042
- var fn;
2043
- if (arguments.length < 2) {
2044
- return;
2045
- } else if (arguments.length === 2) {
2046
- fn = arg2;
2047
- } else if (arguments.length === 3) {
2048
- opts = arg2;
2049
- fn = arg3;
2050
- } else {
2051
- opts = arg2;
2052
- ctx = arg3;
2053
- fn = arg4;
2054
- }
2055
- var parentContext = ctx !== null && ctx !== void 0 ? ctx : contextApi.active();
2056
- var span = this.startSpan(name, opts, parentContext);
2057
- var contextWithSpanSet = setSpan(parentContext, span);
2058
- return contextApi.with(contextWithSpanSet, fn, void 0, span);
2059
- };
2060
- return NoopTracer2;
2061
- })()
2062
- );
2063
- function isSpanContext(spanContext) {
2064
- return typeof spanContext === "object" && typeof spanContext["spanId"] === "string" && typeof spanContext["traceId"] === "string" && typeof spanContext["traceFlags"] === "number";
2065
- }
2066
-
2067
- // node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/trace/ProxyTracer.js
2068
- var NOOP_TRACER = new NoopTracer();
2069
- var ProxyTracer = (
2070
- /** @class */
2071
- (function() {
2072
- function ProxyTracer2(_provider, name, version, options) {
2073
- this._provider = _provider;
2074
- this.name = name;
2075
- this.version = version;
2076
- this.options = options;
2077
- }
2078
- ProxyTracer2.prototype.startSpan = function(name, options, context2) {
2079
- return this._getTracer().startSpan(name, options, context2);
2080
- };
2081
- ProxyTracer2.prototype.startActiveSpan = function(_name, _options, _context, _fn) {
2082
- var tracer = this._getTracer();
2083
- return Reflect.apply(tracer.startActiveSpan, tracer, arguments);
2084
- };
2085
- ProxyTracer2.prototype._getTracer = function() {
2086
- if (this._delegate) {
2087
- return this._delegate;
2088
- }
2089
- var tracer = this._provider.getDelegateTracer(this.name, this.version, this.options);
2090
- if (!tracer) {
2091
- return NOOP_TRACER;
2092
- }
2093
- this._delegate = tracer;
2094
- return this._delegate;
2095
- };
2096
- return ProxyTracer2;
2097
- })()
2098
- );
2099
-
2100
- // node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/trace/NoopTracerProvider.js
2101
- var NoopTracerProvider = (
2102
- /** @class */
2103
- (function() {
2104
- function NoopTracerProvider2() {
2105
- }
2106
- NoopTracerProvider2.prototype.getTracer = function(_name, _version, _options) {
2107
- return new NoopTracer();
2108
- };
2109
- return NoopTracerProvider2;
2110
- })()
2111
- );
2112
-
2113
- // node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/trace/ProxyTracerProvider.js
2114
- var NOOP_TRACER_PROVIDER = new NoopTracerProvider();
2115
- var ProxyTracerProvider = (
2116
- /** @class */
2117
- (function() {
2118
- function ProxyTracerProvider2() {
2119
- }
2120
- ProxyTracerProvider2.prototype.getTracer = function(name, version, options) {
2121
- var _a;
2122
- return (_a = this.getDelegateTracer(name, version, options)) !== null && _a !== void 0 ? _a : new ProxyTracer(this, name, version, options);
2123
- };
2124
- ProxyTracerProvider2.prototype.getDelegate = function() {
2125
- var _a;
2126
- return (_a = this._delegate) !== null && _a !== void 0 ? _a : NOOP_TRACER_PROVIDER;
2127
- };
2128
- ProxyTracerProvider2.prototype.setDelegate = function(delegate) {
2129
- this._delegate = delegate;
2130
- };
2131
- ProxyTracerProvider2.prototype.getDelegateTracer = function(name, version, options) {
2132
- var _a;
2133
- return (_a = this._delegate) === null || _a === void 0 ? void 0 : _a.getTracer(name, version, options);
2134
- };
2135
- return ProxyTracerProvider2;
2136
- })()
2137
- );
2138
-
2139
- // node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/context-api.js
2140
- var context = ContextAPI.getInstance();
2141
-
2142
- // node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/api/trace.js
2143
- var API_NAME3 = "trace";
2144
- var TraceAPI = (
2145
- /** @class */
2146
- (function() {
2147
- function TraceAPI2() {
2148
- this._proxyTracerProvider = new ProxyTracerProvider();
2149
- this.wrapSpanContext = wrapSpanContext;
2150
- this.isSpanContextValid = isSpanContextValid;
2151
- this.deleteSpan = deleteSpan;
2152
- this.getSpan = getSpan;
2153
- this.getActiveSpan = getActiveSpan;
2154
- this.getSpanContext = getSpanContext;
2155
- this.setSpan = setSpan;
2156
- this.setSpanContext = setSpanContext;
2157
- }
2158
- TraceAPI2.getInstance = function() {
2159
- if (!this._instance) {
2160
- this._instance = new TraceAPI2();
2161
- }
2162
- return this._instance;
2163
- };
2164
- TraceAPI2.prototype.setGlobalTracerProvider = function(provider) {
2165
- var success = registerGlobal(API_NAME3, this._proxyTracerProvider, DiagAPI.instance());
2166
- if (success) {
2167
- this._proxyTracerProvider.setDelegate(provider);
2168
- }
2169
- return success;
2170
- };
2171
- TraceAPI2.prototype.getTracerProvider = function() {
2172
- return getGlobal(API_NAME3) || this._proxyTracerProvider;
2173
- };
2174
- TraceAPI2.prototype.getTracer = function(name, version) {
2175
- return this.getTracerProvider().getTracer(name, version);
2176
- };
2177
- TraceAPI2.prototype.disable = function() {
2178
- unregisterGlobal(API_NAME3, DiagAPI.instance());
2179
- this._proxyTracerProvider = new ProxyTracerProvider();
2180
- };
2181
- return TraceAPI2;
2182
- })()
2183
- );
2353
+ /**
2354
+ * Handles audio input from the user
2355
+ */
2356
+ async handleAudioInput(audioData) {
2357
+ const sessionWithTransport = this.session;
2358
+ const transport = sessionWithTransport.transport;
2359
+ if (!transport) {
2360
+ throw new Error("Realtime transport not available");
2361
+ }
2362
+ transport.sendEvent({
2363
+ type: "input_audio_buffer.append",
2364
+ audio: audioData
2365
+ });
2366
+ transport.sendEvent({
2367
+ type: "input_audio_buffer.commit"
2368
+ });
2369
+ transport.sendEvent({
2370
+ type: "response.create"
2371
+ });
2372
+ const timeout = this.config.responseTimeout ?? 6e4;
2373
+ const response = await this.eventHandler.waitForResponse(timeout);
2374
+ this.audioEvents.emit("audioResponse", response);
2375
+ return this.responseFormatter.formatAudioResponse(response);
2376
+ }
2377
+ /**
2378
+ * Handles text input from the user
2379
+ */
2380
+ async handleTextInput(text) {
2381
+ this.session.sendMessage(text);
2382
+ const timeout = this.config.responseTimeout ?? 3e4;
2383
+ const response = await this.eventHandler.waitForResponse(timeout);
2384
+ this.audioEvents.emit("audioResponse", response);
2385
+ return this.responseFormatter.formatTextResponse(response.transcript);
2386
+ }
2387
+ /**
2388
+ * Subscribe to audio response events
2389
+ *
2390
+ * @param callback - Function called when an audio response completes
2391
+ */
2392
+ onAudioResponse(callback) {
2393
+ this.audioEvents.on("audioResponse", callback);
2394
+ }
2395
+ /**
2396
+ * Remove audio response listener
2397
+ *
2398
+ * @param callback - The callback function to remove
2399
+ */
2400
+ offAudioResponse(callback) {
2401
+ this.audioEvents.off("audioResponse", callback);
2402
+ }
2403
+ };
2184
2404
 
2185
- // node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/trace-api.js
2186
- var trace = TraceAPI.getInstance();
2405
+ // src/execution/index.ts
2406
+ var execution_exports = {};
2407
+ __export(execution_exports, {
2408
+ ScenarioExecution: () => ScenarioExecution,
2409
+ ScenarioExecutionState: () => ScenarioExecutionState,
2410
+ StateChangeEventType: () => StateChangeEventType
2411
+ });
2187
2412
 
2188
2413
  // src/execution/scenario-execution.ts
2414
+ init_esm();
2415
+ init_esm();
2189
2416
  var import_langwatch = require("langwatch");
2190
2417
  var import_observability3 = require("langwatch/observability");
2191
2418
  var import_rxjs2 = require("rxjs");
@@ -2371,7 +2598,7 @@ var scenarioRunStartedSchema = baseScenarioEventSchema.extend({
2371
2598
  metadata: import_zod.z.object({
2372
2599
  name: import_zod.z.string().optional(),
2373
2600
  description: import_zod.z.string().optional()
2374
- })
2601
+ }).catchall(import_zod.z.unknown())
2375
2602
  });
2376
2603
  var scenarioResultsSchema = import_zod.z.object({
2377
2604
  verdict: import_zod.z.nativeEnum(Verdict),
@@ -2518,6 +2745,8 @@ var ScenarioExecution = class {
2518
2745
  currentTurnSpan;
2519
2746
  /** Timestamp when execution started (for total time calculation) */
2520
2747
  totalStartTime = 0;
2748
+ /** Accumulated results from inline judge checkpoints */
2749
+ checkpointResults = [];
2521
2750
  /** Event stream for monitoring scenario progress */
2522
2751
  eventSubject = new import_rxjs2.Subject();
2523
2752
  /**
@@ -2530,13 +2759,22 @@ var ScenarioExecution = class {
2530
2759
  * - RUN_FINISHED: When scenario execution completes (success/failure/error)
2531
2760
  */
2532
2761
  events$ = this.eventSubject.asObservable();
2762
+ /** Batch run ID for grouping scenario runs */
2763
+ batchRunId;
2764
+ /** The run ID for the current execution */
2765
+ scenarioRunId;
2533
2766
  /**
2534
2767
  * Creates a new ScenarioExecution instance.
2535
2768
  *
2536
2769
  * @param config - The scenario configuration containing agents, settings, and metadata
2537
2770
  * @param script - The ordered sequence of script steps that define the test flow
2771
+ * @param batchRunId - Batch run ID for grouping scenario runs
2538
2772
  */
2539
- constructor(config2, script) {
2773
+ constructor(config2, script, batchRunId2) {
2774
+ if (!batchRunId2) {
2775
+ throw new Error("batchRunId is required");
2776
+ }
2777
+ this.batchRunId = batchRunId2;
2540
2778
  this.config = {
2541
2779
  id: config2.id ?? generateScenarioId(),
2542
2780
  name: config2.name,
@@ -2546,7 +2784,8 @@ var ScenarioExecution = class {
2546
2784
  verbose: config2.verbose ?? DEFAULT_VERBOSE,
2547
2785
  maxTurns: config2.maxTurns ?? DEFAULT_MAX_TURNS,
2548
2786
  threadId: config2.threadId ?? generateThreadId(),
2549
- setId: config2.setId
2787
+ setId: config2.setId,
2788
+ metadata: config2.metadata
2550
2789
  };
2551
2790
  this.state = new ScenarioExecutionState(this.config);
2552
2791
  this.reset();
@@ -2584,12 +2823,16 @@ var ScenarioExecution = class {
2584
2823
  * @param result - The final scenario result (without messages/timing, which will be added automatically)
2585
2824
  */
2586
2825
  setResult(result) {
2826
+ if (!this.scenarioRunId) {
2827
+ throw new Error("Cannot set result: scenarioRunId has not been initialized. This is a bug in ScenarioExecution.");
2828
+ }
2587
2829
  const agentRoleAgentsIdx = this.agents.map((agent2, i) => ({ agent: agent2, idx: i })).filter(({ agent: agent2 }) => agent2.role === "Agent" /* AGENT */).map(({ idx }) => idx);
2588
2830
  const agentTimes = agentRoleAgentsIdx.map(
2589
2831
  (i) => this.agentTimes.get(i) || 0
2590
2832
  );
2591
2833
  const totalAgentTime = agentTimes.reduce((sum, time) => sum + time, 0);
2592
2834
  this._result = {
2835
+ runId: this.scenarioRunId,
2593
2836
  ...result,
2594
2837
  messages: this.state.messages,
2595
2838
  totalTime: this.totalTime,
@@ -2602,6 +2845,7 @@ var ScenarioExecution = class {
2602
2845
  agentTime: totalAgentTime,
2603
2846
  messageCount: this.state.messages.length
2604
2847
  });
2848
+ return this._result;
2605
2849
  }
2606
2850
  /**
2607
2851
  * The total elapsed time for the scenario execution.
@@ -2642,7 +2886,10 @@ var ScenarioExecution = class {
2642
2886
  scriptLength: this.config.script.length
2643
2887
  });
2644
2888
  this.reset();
2889
+ this.newTurn();
2890
+ this.state.currentTurn = 0;
2645
2891
  const scenarioRunId = generateScenarioRunId();
2892
+ this.scenarioRunId = scenarioRunId;
2646
2893
  this.logger.debug(`[${this.config.id}] Generated run ID: ${scenarioRunId}`);
2647
2894
  this.emitRunStarted({ scenarioRunId });
2648
2895
  const subscription = this.state.events$.pipe(
@@ -2655,6 +2902,8 @@ var ScenarioExecution = class {
2655
2902
  const scriptStep = this.config.script[i];
2656
2903
  await this.executeScriptStep(scriptStep, i);
2657
2904
  if (this.result) {
2905
+ const cp = this.compiledCheckpoints;
2906
+ this.result.metCriteria = [...cp.metCriteria, ...this.result.metCriteria];
2658
2907
  this.emitRunFinished({
2659
2908
  scenarioRunId,
2660
2909
  status: this.result.success ? "SUCCESS" /* SUCCESS */ : "FAILED" /* FAILED */,
@@ -2663,7 +2912,22 @@ var ScenarioExecution = class {
2663
2912
  return this.result;
2664
2913
  }
2665
2914
  }
2666
- this.reachedMaxTurns(
2915
+ if (this.checkpointResults.length > 0) {
2916
+ const cp = this.compiledCheckpoints;
2917
+ const result2 = this.setResult({
2918
+ success: cp.unmetCriteria.length === 0,
2919
+ reasoning: "All inline criteria checkpoints passed",
2920
+ metCriteria: cp.metCriteria,
2921
+ unmetCriteria: cp.unmetCriteria
2922
+ });
2923
+ this.emitRunFinished({
2924
+ scenarioRunId,
2925
+ status: result2.success ? "SUCCESS" /* SUCCESS */ : "FAILED" /* FAILED */,
2926
+ result: result2
2927
+ });
2928
+ return result2;
2929
+ }
2930
+ const result = this.reachedMaxTurns(
2667
2931
  [
2668
2932
  "Reached end of script without conclusion, add one of the following to the end of the script:",
2669
2933
  "- `Scenario.proceed()` to let the simulation continue to play out",
@@ -2671,11 +2935,11 @@ var ScenarioExecution = class {
2671
2935
  "- `Scenario.succeed()` or `Scenario.fail()` to end the test with an explicit result"
2672
2936
  ].join("\n")
2673
2937
  );
2674
- this.emitRunFinished({ scenarioRunId, status: "FAILED" /* FAILED */ });
2675
- return this.result;
2938
+ this.emitRunFinished({ scenarioRunId, status: "FAILED" /* FAILED */, result });
2939
+ return result;
2676
2940
  } catch (error) {
2677
2941
  const errorInfo = extractErrorInfo(error);
2678
- this.setResult({
2942
+ const result = this.setResult({
2679
2943
  success: false,
2680
2944
  reasoning: `Scenario failed with error: ${errorInfo.message}`,
2681
2945
  metCriteria: [],
@@ -2685,10 +2949,14 @@ var ScenarioExecution = class {
2685
2949
  this.emitRunFinished({
2686
2950
  scenarioRunId,
2687
2951
  status: "ERROR" /* ERROR */,
2688
- result: this.result
2952
+ result
2689
2953
  });
2690
2954
  throw error;
2691
2955
  } finally {
2956
+ if (this.currentTurnSpan) {
2957
+ this.currentTurnSpan.end();
2958
+ this.currentTurnSpan = void 0;
2959
+ }
2692
2960
  subscription.unsubscribe();
2693
2961
  }
2694
2962
  }
@@ -2789,7 +3057,7 @@ var ScenarioExecution = class {
2789
3057
  * @param judgmentRequest - Whether this is a judgment request (for judge agents)
2790
3058
  * @throws Error if the agent call fails
2791
3059
  */
2792
- async callAgent(idx, role, judgmentRequest = false) {
3060
+ async callAgent(idx, role, judgmentRequest) {
2793
3061
  var _a;
2794
3062
  const agent2 = this.agents[idx];
2795
3063
  const agentName = agent2.name ?? agent2.constructor.name;
@@ -2813,69 +3081,72 @@ var ScenarioExecution = class {
2813
3081
  const agentContext = this.currentTurnSpan ? trace.setSpan(context.active(), this.currentTurnSpan) : context.active();
2814
3082
  const agentSpanName = `${agentName !== Object.prototype.constructor.name ? agent2.constructor.name : "Agent"}.call`;
2815
3083
  try {
2816
- await this.tracer.withActiveSpan(
2817
- agentSpanName,
2818
- {
2819
- attributes: {
2820
- [import_observability3.attributes.ATTR_LANGWATCH_THREAD_ID]: this.state.threadId
2821
- }
2822
- },
3084
+ await context.with(
2823
3085
  agentContext,
2824
- async (agentSpan) => {
2825
- agentSpan.setType("agent");
2826
- agentSpan.setInput("chat_messages", this.state.messages);
2827
- const agentResponse = await agent2.call(agentInput);
2828
- const endTime = Date.now();
2829
- const duration = endTime - startTime;
2830
- this.logger.debug(`[${this.config.id}] Agent responded`, {
2831
- agentIdx: idx,
2832
- duration,
2833
- responseType: typeof agentResponse,
2834
- isScenarioResult: agentResponse && typeof agentResponse === "object" && "success" in agentResponse
2835
- });
2836
- this.addAgentTime(idx, duration);
2837
- this.pendingMessages.delete(idx);
2838
- if (agentResponse && typeof agentResponse === "object" && "success" in agentResponse) {
2839
- this.logger.debug(
2840
- `[${this.config.id}] Agent returned ScenarioResult`,
2841
- {
2842
- success: agentResponse.success
2843
- }
2844
- );
2845
- this.setResult(agentResponse);
2846
- return;
2847
- }
2848
- const messages = convertAgentReturnTypesToMessages(
2849
- agentResponse,
2850
- role === "User" /* USER */ ? "user" : "assistant"
2851
- );
2852
- if (messages.length > 0) {
2853
- agentSpan.setOutput("chat_messages", messages);
2854
- }
2855
- const metrics = {
2856
- duration: endTime - startTime
2857
- };
2858
- if (agentResponse && typeof agentResponse === "object") {
2859
- const usage = agentResponse.usage;
2860
- if (usage) {
2861
- if (usage.prompt_tokens !== void 0)
2862
- metrics.promptTokens = usage.prompt_tokens;
2863
- if (usage.completion_tokens !== void 0)
2864
- metrics.completionTokens = usage.completion_tokens;
2865
- if (usage.total_tokens !== void 0)
2866
- metrics.totalTokens = usage.total_tokens;
3086
+ () => this.tracer.withActiveSpan(
3087
+ agentSpanName,
3088
+ {
3089
+ attributes: {
3090
+ [import_observability3.attributes.ATTR_LANGWATCH_THREAD_ID]: this.state.threadId
2867
3091
  }
2868
- }
2869
- agentSpan.setMetrics(metrics);
2870
- const traceId = agentSpan.spanContext().traceId.toString();
2871
- for (const message2 of messages) {
2872
- this.state.addMessage({
2873
- ...message2,
2874
- traceId
3092
+ },
3093
+ agentContext,
3094
+ async (agentSpan) => {
3095
+ agentSpan.setType("agent");
3096
+ agentSpan.setInput("chat_messages", this.state.messages);
3097
+ const agentResponse = await agent2.call(agentInput);
3098
+ const endTime = Date.now();
3099
+ const duration = endTime - startTime;
3100
+ this.logger.debug(`[${this.config.id}] Agent responded`, {
3101
+ agentIdx: idx,
3102
+ duration,
3103
+ responseType: typeof agentResponse,
3104
+ isScenarioResult: agentResponse && typeof agentResponse === "object" && "success" in agentResponse
2875
3105
  });
2876
- this.broadcastMessage(message2, idx);
3106
+ this.addAgentTime(idx, duration);
3107
+ this.pendingMessages.delete(idx);
3108
+ if (agentResponse && typeof agentResponse === "object" && "success" in agentResponse) {
3109
+ this.logger.debug(
3110
+ `[${this.config.id}] Agent returned ScenarioResult`,
3111
+ {
3112
+ success: agentResponse.success
3113
+ }
3114
+ );
3115
+ this.setResult(agentResponse);
3116
+ return;
3117
+ }
3118
+ const messages = convertAgentReturnTypesToMessages(
3119
+ agentResponse,
3120
+ role === "User" /* USER */ ? "user" : "assistant"
3121
+ );
3122
+ if (messages.length > 0) {
3123
+ agentSpan.setOutput("chat_messages", messages);
3124
+ }
3125
+ const metrics = {
3126
+ duration: endTime - startTime
3127
+ };
3128
+ if (agentResponse && typeof agentResponse === "object") {
3129
+ const usage = agentResponse.usage;
3130
+ if (usage) {
3131
+ if (usage.prompt_tokens !== void 0)
3132
+ metrics.promptTokens = usage.prompt_tokens;
3133
+ if (usage.completion_tokens !== void 0)
3134
+ metrics.completionTokens = usage.completion_tokens;
3135
+ if (usage.total_tokens !== void 0)
3136
+ metrics.totalTokens = usage.total_tokens;
3137
+ }
3138
+ }
3139
+ agentSpan.setMetrics(metrics);
3140
+ const traceId = agentSpan.spanContext().traceId.toString();
3141
+ for (const message2 of messages) {
3142
+ this.state.addMessage({
3143
+ ...message2,
3144
+ traceId
3145
+ });
3146
+ this.broadcastMessage(message2, idx);
3147
+ }
2877
3148
  }
2878
- }
3149
+ )
2879
3150
  );
2880
3151
  } catch (error) {
2881
3152
  throw new Error(`[${agentName}] ${error}`, { cause: error });
@@ -2980,25 +3251,26 @@ var ScenarioExecution = class {
2980
3251
  *
2981
3252
  * This method is part of the ScenarioExecutionLike interface used by script steps.
2982
3253
  *
2983
- * @param content - Optional message to pass to the judge agent for additional context
3254
+ * @param options - Optional options with inline criteria to evaluate as a checkpoint.
2984
3255
  * @returns A promise that resolves with:
2985
3256
  * - ScenarioResult if the judge makes a final decision, or
2986
3257
  * - Null if the conversation should continue
2987
3258
  *
2988
3259
  * @example
2989
3260
  * ```typescript
2990
- * // Let judge evaluate current state
3261
+ * // Let judge evaluate with its configured criteria
2991
3262
  * const result = await execution.judge();
2992
- * if (result) {
2993
- * console.log(`Judge decided: ${result.success ? 'pass' : 'fail'}`);
2994
- * }
2995
3263
  *
2996
- * // Provide additional context to judge
2997
- * const result = await execution.judge("Please consider the user's satisfaction level");
3264
+ * // Evaluate inline criteria as a checkpoint
3265
+ * const result = await execution.judge({ criteria: ["Agent responded helpfully"] });
2998
3266
  * ```
2999
3267
  */
3000
- async judge(content) {
3001
- return await this.scriptCallAgent("Judge" /* JUDGE */, content, true);
3268
+ async judge(options) {
3269
+ return await this.scriptCallAgent(
3270
+ "Judge" /* JUDGE */,
3271
+ void 0,
3272
+ { criteria: options == null ? void 0 : options.criteria }
3273
+ );
3002
3274
  }
3003
3275
  /**
3004
3276
  * Lets the scenario proceed automatically for a specified number of turns.
@@ -3083,13 +3355,12 @@ var ScenarioExecution = class {
3083
3355
  * ```
3084
3356
  */
3085
3357
  async succeed(reasoning) {
3086
- this.setResult({
3358
+ return this.setResult({
3087
3359
  success: true,
3088
3360
  reasoning: reasoning || "Scenario marked as successful with Scenario.succeed()",
3089
3361
  metCriteria: [],
3090
3362
  unmetCriteria: []
3091
3363
  });
3092
- return this.result;
3093
3364
  }
3094
3365
  /**
3095
3366
  * Immediately ends the scenario with a failure verdict.
@@ -3115,13 +3386,12 @@ var ScenarioExecution = class {
3115
3386
  * ```
3116
3387
  */
3117
3388
  async fail(reasoning) {
3118
- this.setResult({
3389
+ return this.setResult({
3119
3390
  success: false,
3120
3391
  reasoning: reasoning || "Scenario marked as failed with Scenario.fail()",
3121
3392
  metCriteria: [],
3122
3393
  unmetCriteria: []
3123
3394
  });
3124
- return this.result;
3125
3395
  }
3126
3396
  /**
3127
3397
  * Adds execution time for a specific agent to the performance tracking.
@@ -3165,15 +3435,14 @@ var ScenarioExecution = class {
3165
3435
  * decision, or null if the conversation should continue
3166
3436
  * @throws Error if no agent is found for the specified role
3167
3437
  */
3168
- async scriptCallAgent(role, content, judgmentRequest = false) {
3438
+ async scriptCallAgent(role, content, judgmentRequest) {
3169
3439
  this.logger.debug(`[${this.config.id}] scriptCallAgent`, {
3170
3440
  role,
3171
3441
  hasContent: content !== void 0,
3172
- judgmentRequest
3442
+ judgmentRequest: judgmentRequest != null,
3443
+ hasInlineCriteria: (judgmentRequest == null ? void 0 : judgmentRequest.criteria) != null
3173
3444
  });
3174
3445
  this.consumeUntilRole(role);
3175
- let index = -1;
3176
- let agent2 = null;
3177
3446
  let nextAgent = this.getNextAgentForRole(role);
3178
3447
  if (!nextAgent) {
3179
3448
  this.newTurn();
@@ -3203,8 +3472,8 @@ var ScenarioExecution = class {
3203
3472
  `Cannot generate a message for role \`${role}\` because no agent with this role was found, please add ${roleClass} to the scenario \`agents\` list`
3204
3473
  );
3205
3474
  }
3206
- index = nextAgent.index;
3207
- agent2 = nextAgent.agent;
3475
+ const index = nextAgent.index;
3476
+ const agent2 = nextAgent.agent;
3208
3477
  this.removePendingAgent(agent2);
3209
3478
  if (content) {
3210
3479
  const message2 = typeof content === "string" ? {
@@ -3216,6 +3485,25 @@ var ScenarioExecution = class {
3216
3485
  return null;
3217
3486
  }
3218
3487
  await this.callAgent(index, role, judgmentRequest);
3488
+ if (this.result && (judgmentRequest == null ? void 0 : judgmentRequest.criteria) != null) {
3489
+ this.checkpointResults.push({
3490
+ metCriteria: this.result.metCriteria,
3491
+ unmetCriteria: this.result.unmetCriteria
3492
+ });
3493
+ if (this.result.success) {
3494
+ this._result = void 0;
3495
+ return null;
3496
+ } else {
3497
+ const cp = this.compiledCheckpoints;
3498
+ this.result.metCriteria = cp.metCriteria;
3499
+ this.result.unmetCriteria = cp.unmetCriteria;
3500
+ return this.result;
3501
+ }
3502
+ }
3503
+ if (this.result) {
3504
+ const cp = this.compiledCheckpoints;
3505
+ this.result.metCriteria = [...cp.metCriteria, ...this.result.metCriteria];
3506
+ }
3219
3507
  return this.result ?? null;
3220
3508
  }
3221
3509
  /**
@@ -3229,7 +3517,7 @@ var ScenarioExecution = class {
3229
3517
  * - Creates a new ScenarioExecutionState with the current config
3230
3518
  * - Sets up the thread ID (generates new one if not provided)
3231
3519
  * - Initializes all agents
3232
- * - Starts the first turn
3520
+ * - Initializes turn state (pending agents/roles) without creating a trace span
3233
3521
  * - Records the start time for performance tracking
3234
3522
  * - Clears any pending messages
3235
3523
  * - Clears the result from any previous execution
@@ -3243,16 +3531,28 @@ var ScenarioExecution = class {
3243
3531
  this.state = new ScenarioExecutionState(this.config);
3244
3532
  this.state.threadId = this.config.threadId || generateThreadId();
3245
3533
  this.setAgents(this.config.agents);
3246
- this.newTurn();
3534
+ this.pendingAgentsOnTurn = new Set(this.agents);
3535
+ this.pendingRolesOnTurn = ["User" /* USER */, "Agent" /* AGENT */, "Judge" /* JUDGE */];
3247
3536
  this.state.currentTurn = 0;
3248
3537
  this.totalStartTime = Date.now();
3249
3538
  this.pendingMessages.clear();
3250
3539
  this._result = void 0;
3540
+ this.checkpointResults = [];
3251
3541
  this.logger.debug(`[${this.config.id}] Reset complete`, {
3252
3542
  threadId: this.state.threadId,
3253
3543
  agentCount: this.agents.length
3254
3544
  });
3255
3545
  }
3546
+ /** Compiles all accumulated checkpoint results into aggregated met/unmet criteria. */
3547
+ get compiledCheckpoints() {
3548
+ const metCriteria = [];
3549
+ const unmetCriteria = [];
3550
+ for (const cp of this.checkpointResults) {
3551
+ metCriteria.push(...cp.metCriteria);
3552
+ unmetCriteria.push(...cp.unmetCriteria);
3553
+ }
3554
+ return { metCriteria, unmetCriteria };
3555
+ }
3256
3556
  nextAgentForRole(role) {
3257
3557
  for (const agent2 of this.agents) {
3258
3558
  if (agent2.role === role && this.pendingAgentsOnTurn.has(agent2) && this.pendingRolesOnTurn.includes(role)) {
@@ -3349,7 +3649,7 @@ var ScenarioExecution = class {
3349
3649
  */
3350
3650
  reachedMaxTurns(errorMessage) {
3351
3651
  var _a;
3352
- this.setResult({
3652
+ return this.setResult({
3353
3653
  success: false,
3354
3654
  reasoning: errorMessage || `Reached maximum turns (${this.config.maxTurns || 10}) without conclusion`,
3355
3655
  metCriteria: [],
@@ -3373,7 +3673,7 @@ var ScenarioExecution = class {
3373
3673
  type: "placeholder",
3374
3674
  // This will be replaced by the specific event type
3375
3675
  timestamp: Date.now(),
3376
- batchRunId: getBatchRunId(),
3676
+ batchRunId: this.batchRunId,
3377
3677
  scenarioId: this.config.id,
3378
3678
  scenarioRunId,
3379
3679
  scenarioSetId: this.config.setId
@@ -3387,6 +3687,7 @@ var ScenarioExecution = class {
3387
3687
  ...this.makeBaseEvent({ scenarioRunId }),
3388
3688
  type: "SCENARIO_RUN_STARTED" /* RUN_STARTED */,
3389
3689
  metadata: {
3690
+ ...this.config.metadata,
3390
3691
  name: this.config.name,
3391
3692
  description: this.config.description
3392
3693
  }
@@ -3850,9 +4151,9 @@ var message = (message2) => {
3850
4151
  var agent = (content) => {
3851
4152
  return (_state, executor) => executor.agent(content);
3852
4153
  };
3853
- var judge = (content) => {
4154
+ var judge = (options) => {
3854
4155
  return async (_state, executor) => {
3855
- await executor.judge(content);
4156
+ await executor.judge(options);
3856
4157
  };
3857
4158
  };
3858
4159
  var user = (content) => {
@@ -3874,8 +4175,429 @@ var fail = (reasoning) => {
3874
4175
  };
3875
4176
  };
3876
4177
 
4178
+ // src/tracing/setup.ts
4179
+ init_esm();
4180
+ var import_node = require("langwatch/observability/node");
4181
+ var import_observability4 = require("langwatch/observability");
4182
+
4183
+ // node_modules/.pnpm/@opentelemetry+core@1.30.1_@opentelemetry+api@1.9.0/node_modules/@opentelemetry/core/build/esm/trace/suppress-tracing.js
4184
+ init_esm();
4185
+ var SUPPRESS_TRACING_KEY = createContextKey("OpenTelemetry SDK Context Key SUPPRESS_TRACING");
4186
+ function suppressTracing(context2) {
4187
+ return context2.setValue(SUPPRESS_TRACING_KEY, true);
4188
+ }
4189
+
4190
+ // node_modules/.pnpm/@opentelemetry+core@1.30.1_@opentelemetry+api@1.9.0/node_modules/@opentelemetry/core/build/esm/common/logging-error-handler.js
4191
+ init_esm();
4192
+ function loggingErrorHandler() {
4193
+ return function(ex) {
4194
+ diag.error(stringifyException(ex));
4195
+ };
4196
+ }
4197
+ function stringifyException(ex) {
4198
+ if (typeof ex === "string") {
4199
+ return ex;
4200
+ } else {
4201
+ return JSON.stringify(flattenException(ex));
4202
+ }
4203
+ }
4204
+ function flattenException(ex) {
4205
+ var result = {};
4206
+ var current = ex;
4207
+ while (current !== null) {
4208
+ Object.getOwnPropertyNames(current).forEach(function(propertyName) {
4209
+ if (result[propertyName])
4210
+ return;
4211
+ var value = current[propertyName];
4212
+ if (value) {
4213
+ result[propertyName] = String(value);
4214
+ }
4215
+ });
4216
+ current = Object.getPrototypeOf(current);
4217
+ }
4218
+ return result;
4219
+ }
4220
+
4221
+ // node_modules/.pnpm/@opentelemetry+core@1.30.1_@opentelemetry+api@1.9.0/node_modules/@opentelemetry/core/build/esm/common/global-error-handler.js
4222
+ var delegateHandler = loggingErrorHandler();
4223
+ function globalErrorHandler(ex) {
4224
+ try {
4225
+ delegateHandler(ex);
4226
+ } catch (_a) {
4227
+ }
4228
+ }
4229
+
4230
+ // node_modules/.pnpm/@opentelemetry+core@1.30.1_@opentelemetry+api@1.9.0/node_modules/@opentelemetry/core/build/esm/ExportResult.js
4231
+ var ExportResultCode;
4232
+ (function(ExportResultCode2) {
4233
+ ExportResultCode2[ExportResultCode2["SUCCESS"] = 0] = "SUCCESS";
4234
+ ExportResultCode2[ExportResultCode2["FAILED"] = 1] = "FAILED";
4235
+ })(ExportResultCode || (ExportResultCode = {}));
4236
+
4237
+ // node_modules/.pnpm/@opentelemetry+core@1.30.1_@opentelemetry+api@1.9.0/node_modules/@opentelemetry/core/build/esm/utils/promise.js
4238
+ var Deferred = (
4239
+ /** @class */
4240
+ (function() {
4241
+ function Deferred2() {
4242
+ var _this = this;
4243
+ this._promise = new Promise(function(resolve, reject) {
4244
+ _this._resolve = resolve;
4245
+ _this._reject = reject;
4246
+ });
4247
+ }
4248
+ Object.defineProperty(Deferred2.prototype, "promise", {
4249
+ get: function() {
4250
+ return this._promise;
4251
+ },
4252
+ enumerable: false,
4253
+ configurable: true
4254
+ });
4255
+ Deferred2.prototype.resolve = function(val) {
4256
+ this._resolve(val);
4257
+ };
4258
+ Deferred2.prototype.reject = function(err) {
4259
+ this._reject(err);
4260
+ };
4261
+ return Deferred2;
4262
+ })()
4263
+ );
4264
+
4265
+ // node_modules/.pnpm/@opentelemetry+core@1.30.1_@opentelemetry+api@1.9.0/node_modules/@opentelemetry/core/build/esm/utils/callback.js
4266
+ var __read5 = function(o, n) {
4267
+ var m = typeof Symbol === "function" && o[Symbol.iterator];
4268
+ if (!m) return o;
4269
+ var i = m.call(o), r, ar = [], e;
4270
+ try {
4271
+ while ((n === void 0 || n-- > 0) && !(r = i.next()).done) ar.push(r.value);
4272
+ } catch (error) {
4273
+ e = { error };
4274
+ } finally {
4275
+ try {
4276
+ if (r && !r.done && (m = i["return"])) m.call(i);
4277
+ } finally {
4278
+ if (e) throw e.error;
4279
+ }
4280
+ }
4281
+ return ar;
4282
+ };
4283
+ var __spreadArray5 = function(to, from, pack) {
4284
+ if (pack || arguments.length === 2) for (var i = 0, l = from.length, ar; i < l; i++) {
4285
+ if (ar || !(i in from)) {
4286
+ if (!ar) ar = Array.prototype.slice.call(from, 0, i);
4287
+ ar[i] = from[i];
4288
+ }
4289
+ }
4290
+ return to.concat(ar || Array.prototype.slice.call(from));
4291
+ };
4292
+ var BindOnceFuture = (
4293
+ /** @class */
4294
+ (function() {
4295
+ function BindOnceFuture2(_callback, _that) {
4296
+ this._callback = _callback;
4297
+ this._that = _that;
4298
+ this._isCalled = false;
4299
+ this._deferred = new Deferred();
4300
+ }
4301
+ Object.defineProperty(BindOnceFuture2.prototype, "isCalled", {
4302
+ get: function() {
4303
+ return this._isCalled;
4304
+ },
4305
+ enumerable: false,
4306
+ configurable: true
4307
+ });
4308
+ Object.defineProperty(BindOnceFuture2.prototype, "promise", {
4309
+ get: function() {
4310
+ return this._deferred.promise;
4311
+ },
4312
+ enumerable: false,
4313
+ configurable: true
4314
+ });
4315
+ BindOnceFuture2.prototype.call = function() {
4316
+ var _a;
4317
+ var _this = this;
4318
+ var args = [];
4319
+ for (var _i = 0; _i < arguments.length; _i++) {
4320
+ args[_i] = arguments[_i];
4321
+ }
4322
+ if (!this._isCalled) {
4323
+ this._isCalled = true;
4324
+ try {
4325
+ Promise.resolve((_a = this._callback).call.apply(_a, __spreadArray5([this._that], __read5(args), false))).then(function(val) {
4326
+ return _this._deferred.resolve(val);
4327
+ }, function(err) {
4328
+ return _this._deferred.reject(err);
4329
+ });
4330
+ } catch (err) {
4331
+ this._deferred.reject(err);
4332
+ }
4333
+ }
4334
+ return this._deferred.promise;
4335
+ };
4336
+ return BindOnceFuture2;
4337
+ })()
4338
+ );
4339
+
4340
+ // node_modules/.pnpm/@opentelemetry+core@1.30.1_@opentelemetry+api@1.9.0/node_modules/@opentelemetry/core/build/esm/internal/exporter.js
4341
+ init_esm();
4342
+ function _export(exporter, arg) {
4343
+ return new Promise(function(resolve) {
4344
+ context.with(suppressTracing(context.active()), function() {
4345
+ exporter.export(arg, function(result) {
4346
+ resolve(result);
4347
+ });
4348
+ });
4349
+ });
4350
+ }
4351
+
4352
+ // node_modules/.pnpm/@opentelemetry+core@1.30.1_@opentelemetry+api@1.9.0/node_modules/@opentelemetry/core/build/esm/index.js
4353
+ var internal = {
4354
+ _export
4355
+ };
4356
+
4357
+ // node_modules/.pnpm/@opentelemetry+sdk-trace-base@1.30.1_@opentelemetry+api@1.9.0/node_modules/@opentelemetry/sdk-trace-base/build/esm/export/SimpleSpanProcessor.js
4358
+ init_esm();
4359
+ var __awaiter = function(thisArg, _arguments, P, generator) {
4360
+ function adopt(value) {
4361
+ return value instanceof P ? value : new P(function(resolve) {
4362
+ resolve(value);
4363
+ });
4364
+ }
4365
+ return new (P || (P = Promise))(function(resolve, reject) {
4366
+ function fulfilled(value) {
4367
+ try {
4368
+ step(generator.next(value));
4369
+ } catch (e) {
4370
+ reject(e);
4371
+ }
4372
+ }
4373
+ function rejected(value) {
4374
+ try {
4375
+ step(generator["throw"](value));
4376
+ } catch (e) {
4377
+ reject(e);
4378
+ }
4379
+ }
4380
+ function step(result) {
4381
+ result.done ? resolve(result.value) : adopt(result.value).then(fulfilled, rejected);
4382
+ }
4383
+ step((generator = generator.apply(thisArg, _arguments || [])).next());
4384
+ });
4385
+ };
4386
+ var __generator = function(thisArg, body) {
4387
+ var _ = { label: 0, sent: function() {
4388
+ if (t[0] & 1) throw t[1];
4389
+ return t[1];
4390
+ }, trys: [], ops: [] }, f, y, t, g;
4391
+ return g = { next: verb(0), "throw": verb(1), "return": verb(2) }, typeof Symbol === "function" && (g[Symbol.iterator] = function() {
4392
+ return this;
4393
+ }), g;
4394
+ function verb(n) {
4395
+ return function(v) {
4396
+ return step([n, v]);
4397
+ };
4398
+ }
4399
+ function step(op) {
4400
+ if (f) throw new TypeError("Generator is already executing.");
4401
+ while (_) try {
4402
+ if (f = 1, y && (t = op[0] & 2 ? y["return"] : op[0] ? y["throw"] || ((t = y["return"]) && t.call(y), 0) : y.next) && !(t = t.call(y, op[1])).done) return t;
4403
+ if (y = 0, t) op = [op[0] & 2, t.value];
4404
+ switch (op[0]) {
4405
+ case 0:
4406
+ case 1:
4407
+ t = op;
4408
+ break;
4409
+ case 4:
4410
+ _.label++;
4411
+ return { value: op[1], done: false };
4412
+ case 5:
4413
+ _.label++;
4414
+ y = op[1];
4415
+ op = [0];
4416
+ continue;
4417
+ case 7:
4418
+ op = _.ops.pop();
4419
+ _.trys.pop();
4420
+ continue;
4421
+ default:
4422
+ if (!(t = _.trys, t = t.length > 0 && t[t.length - 1]) && (op[0] === 6 || op[0] === 2)) {
4423
+ _ = 0;
4424
+ continue;
4425
+ }
4426
+ if (op[0] === 3 && (!t || op[1] > t[0] && op[1] < t[3])) {
4427
+ _.label = op[1];
4428
+ break;
4429
+ }
4430
+ if (op[0] === 6 && _.label < t[1]) {
4431
+ _.label = t[1];
4432
+ t = op;
4433
+ break;
4434
+ }
4435
+ if (t && _.label < t[2]) {
4436
+ _.label = t[2];
4437
+ _.ops.push(op);
4438
+ break;
4439
+ }
4440
+ if (t[2]) _.ops.pop();
4441
+ _.trys.pop();
4442
+ continue;
4443
+ }
4444
+ op = body.call(thisArg, _);
4445
+ } catch (e) {
4446
+ op = [6, e];
4447
+ y = 0;
4448
+ } finally {
4449
+ f = t = 0;
4450
+ }
4451
+ if (op[0] & 5) throw op[1];
4452
+ return { value: op[0] ? op[1] : void 0, done: true };
4453
+ }
4454
+ };
4455
+ var SimpleSpanProcessor = (
4456
+ /** @class */
4457
+ (function() {
4458
+ function SimpleSpanProcessor2(_exporter) {
4459
+ this._exporter = _exporter;
4460
+ this._shutdownOnce = new BindOnceFuture(this._shutdown, this);
4461
+ this._unresolvedExports = /* @__PURE__ */ new Set();
4462
+ }
4463
+ SimpleSpanProcessor2.prototype.forceFlush = function() {
4464
+ return __awaiter(this, void 0, void 0, function() {
4465
+ return __generator(this, function(_a) {
4466
+ switch (_a.label) {
4467
+ case 0:
4468
+ return [4, Promise.all(Array.from(this._unresolvedExports))];
4469
+ case 1:
4470
+ _a.sent();
4471
+ if (!this._exporter.forceFlush) return [3, 3];
4472
+ return [4, this._exporter.forceFlush()];
4473
+ case 2:
4474
+ _a.sent();
4475
+ _a.label = 3;
4476
+ case 3:
4477
+ return [
4478
+ 2
4479
+ /*return*/
4480
+ ];
4481
+ }
4482
+ });
4483
+ });
4484
+ };
4485
+ SimpleSpanProcessor2.prototype.onStart = function(_span, _parentContext) {
4486
+ };
4487
+ SimpleSpanProcessor2.prototype.onEnd = function(span) {
4488
+ var _this = this;
4489
+ var _a, _b;
4490
+ if (this._shutdownOnce.isCalled) {
4491
+ return;
4492
+ }
4493
+ if ((span.spanContext().traceFlags & TraceFlags.SAMPLED) === 0) {
4494
+ return;
4495
+ }
4496
+ var doExport = function() {
4497
+ return internal._export(_this._exporter, [span]).then(function(result) {
4498
+ var _a2;
4499
+ if (result.code !== ExportResultCode.SUCCESS) {
4500
+ globalErrorHandler((_a2 = result.error) !== null && _a2 !== void 0 ? _a2 : new Error("SimpleSpanProcessor: span export failed (status " + result + ")"));
4501
+ }
4502
+ }).catch(function(error) {
4503
+ globalErrorHandler(error);
4504
+ });
4505
+ };
4506
+ if (span.resource.asyncAttributesPending) {
4507
+ var exportPromise_1 = (_b = (_a = span.resource).waitForAsyncAttributes) === null || _b === void 0 ? void 0 : _b.call(_a).then(function() {
4508
+ if (exportPromise_1 != null) {
4509
+ _this._unresolvedExports.delete(exportPromise_1);
4510
+ }
4511
+ return doExport();
4512
+ }, function(err) {
4513
+ return globalErrorHandler(err);
4514
+ });
4515
+ if (exportPromise_1 != null) {
4516
+ this._unresolvedExports.add(exportPromise_1);
4517
+ }
4518
+ } else {
4519
+ void doExport();
4520
+ }
4521
+ };
4522
+ SimpleSpanProcessor2.prototype.shutdown = function() {
4523
+ return this._shutdownOnce.call();
4524
+ };
4525
+ SimpleSpanProcessor2.prototype._shutdown = function() {
4526
+ return this._exporter.shutdown();
4527
+ };
4528
+ return SimpleSpanProcessor2;
4529
+ })()
4530
+ );
4531
+
4532
+ // src/tracing/setup.ts
4533
+ var initialized = false;
4534
+ function getConcreteProvider(provider) {
4535
+ if (!provider || typeof provider !== "object") return void 0;
4536
+ if (typeof provider.addSpanProcessor === "function") {
4537
+ return provider;
4538
+ }
4539
+ const p = provider;
4540
+ const delegate = typeof p.getDelegate === "function" ? p.getDelegate() : p.delegate ?? p._delegate;
4541
+ if (delegate && typeof delegate === "object") {
4542
+ if (typeof delegate.addSpanProcessor === "function") {
4543
+ return delegate;
4544
+ }
4545
+ }
4546
+ return void 0;
4547
+ }
4548
+ function setupScenarioTracing(options) {
4549
+ if (initialized) return;
4550
+ const globalProvider = trace.getTracerProvider();
4551
+ const concrete = getConcreteProvider(globalProvider);
4552
+ if (concrete) {
4553
+ attachToExistingProvider(concrete, options);
4554
+ } else {
4555
+ initializeFullSetup(options);
4556
+ }
4557
+ initialized = true;
4558
+ }
4559
+ function ensureTracingInitialized(options) {
4560
+ if (initialized) return;
4561
+ setupScenarioTracing(options);
4562
+ }
4563
+ function attachToExistingProvider(provider, options) {
4564
+ provider.addSpanProcessor(judgeSpanCollector);
4565
+ if (options == null ? void 0 : options.spanProcessors) {
4566
+ for (const processor of options.spanProcessors) {
4567
+ provider.addSpanProcessor(processor);
4568
+ }
4569
+ }
4570
+ if (options == null ? void 0 : options.traceExporter) {
4571
+ provider.addSpanProcessor(new SimpleSpanProcessor(options.traceExporter));
4572
+ }
4573
+ const envConfig = getEnv();
4574
+ if (envConfig.LANGWATCH_API_KEY) {
4575
+ const exporter = new import_observability4.LangWatchTraceExporter({
4576
+ apiKey: envConfig.LANGWATCH_API_KEY,
4577
+ endpoint: envConfig.LANGWATCH_ENDPOINT
4578
+ });
4579
+ provider.addSpanProcessor(new SimpleSpanProcessor(exporter));
4580
+ }
4581
+ }
4582
+ function initializeFullSetup(options) {
4583
+ const envConfig = getEnv();
4584
+ const spanProcessors = [judgeSpanCollector];
4585
+ if (options == null ? void 0 : options.spanProcessors) {
4586
+ spanProcessors.push(...options.spanProcessors);
4587
+ }
4588
+ (0, import_node.setupObservability)({
4589
+ ...options,
4590
+ langwatch: (options == null ? void 0 : options.langwatch) ?? {
4591
+ apiKey: envConfig.LANGWATCH_API_KEY,
4592
+ endpoint: envConfig.LANGWATCH_ENDPOINT
4593
+ },
4594
+ spanProcessors
4595
+ });
4596
+ }
4597
+
3877
4598
  // src/runner/run.ts
3878
- async function run(cfg) {
4599
+ async function run(cfg, options) {
4600
+ var _a, _b;
3879
4601
  if (!cfg.name) {
3880
4602
  throw new Error("Scenario name is required");
3881
4603
  }
@@ -3900,14 +4622,17 @@ async function run(cfg) {
3900
4622
  cfg.threadId = generateThreadId();
3901
4623
  }
3902
4624
  const steps = cfg.script || [proceed()];
3903
- const execution = new ScenarioExecution(cfg, steps);
4625
+ const batchRunId2 = (options == null ? void 0 : options.batchRunId) ?? getBatchRunId();
4626
+ const execution = new ScenarioExecution(cfg, steps, batchRunId2);
3904
4627
  let eventBus = null;
3905
4628
  let subscription = null;
3906
4629
  try {
3907
- const envConfig2 = getEnv();
4630
+ const projectConfig = await getProjectConfig();
4631
+ ensureTracingInitialized(projectConfig == null ? void 0 : projectConfig.observability);
4632
+ const envConfig = getEnv();
3908
4633
  eventBus = new EventBus({
3909
- endpoint: envConfig2.LANGWATCH_ENDPOINT,
3910
- apiKey: envConfig2.LANGWATCH_API_KEY
4634
+ endpoint: ((_a = options == null ? void 0 : options.langwatch) == null ? void 0 : _a.endpoint) ?? envConfig.LANGWATCH_ENDPOINT,
4635
+ apiKey: ((_b = options == null ? void 0 : options.langwatch) == null ? void 0 : _b.apiKey) ?? envConfig.LANGWATCH_API_KEY
3911
4636
  });
3912
4637
  eventBus.listen();
3913
4638
  subscription = eventBus.subscribeTo(execution.events$);
@@ -3924,6 +4649,9 @@ async function run(cfg) {
3924
4649
  } finally {
3925
4650
  await (eventBus == null ? void 0 : eventBus.drain());
3926
4651
  subscription == null ? void 0 : subscription.unsubscribe();
4652
+ if (cfg.threadId) {
4653
+ judgeSpanCollector.clearSpansForThread(cfg.threadId);
4654
+ }
3927
4655
  }
3928
4656
  }
3929
4657
  function formatMessage(m) {
@@ -3968,6 +4696,27 @@ function formatPart(part) {
3968
4696
  }
3969
4697
  }
3970
4698
 
4699
+ // src/tracing/filters.ts
4700
+ var scenarioOnly = [
4701
+ {
4702
+ include: {
4703
+ instrumentationScopeName: [{ equals: "@langwatch/scenario" }]
4704
+ }
4705
+ }
4706
+ ];
4707
+ function withCustomScopes(...scopes) {
4708
+ return [
4709
+ {
4710
+ include: {
4711
+ instrumentationScopeName: [
4712
+ { equals: "@langwatch/scenario" },
4713
+ ...scopes.map((scope) => ({ equals: scope }))
4714
+ ]
4715
+ }
4716
+ }
4717
+ ];
4718
+ }
4719
+
3971
4720
  // src/index.ts
3972
4721
  var scenario = {
3973
4722
  ...agents_exports,
@@ -4003,8 +4752,11 @@ var index_default = scenario;
4003
4752
  proceed,
4004
4753
  run,
4005
4754
  scenario,
4755
+ scenarioOnly,
4006
4756
  scenarioProjectConfigSchema,
4757
+ setupScenarioTracing,
4007
4758
  succeed,
4008
4759
  user,
4009
- userSimulatorAgent
4760
+ userSimulatorAgent,
4761
+ withCustomScopes
4010
4762
  });