@langwatch/scenario 0.2.13 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +36 -9
- package/dist/index.d.mts +433 -256
- package/dist/index.d.ts +433 -256
- package/dist/index.js +2221 -516
- package/dist/index.mjs +2611 -303
- package/dist/integrations/vitest/config.mjs +0 -2
- package/dist/integrations/vitest/reporter.js +36 -11
- package/dist/integrations/vitest/reporter.mjs +159 -8
- package/dist/integrations/vitest/setup-global.mjs +0 -2
- package/dist/integrations/vitest/setup.js +85 -53
- package/dist/integrations/vitest/setup.mjs +619 -18
- package/package.json +46 -30
- package/dist/chunk-6SKQWXT7.mjs +0 -528
- package/dist/chunk-7P6ASYW6.mjs +0 -9
- package/dist/chunk-OL4RFXV4.mjs +0 -133
package/dist/index.js
CHANGED
|
@@ -33,9 +33,11 @@ __export(index_exports, {
|
|
|
33
33
|
AgentAdapter: () => AgentAdapter,
|
|
34
34
|
AgentRole: () => AgentRole,
|
|
35
35
|
DEFAULT_MAX_TURNS: () => DEFAULT_MAX_TURNS,
|
|
36
|
-
DEFAULT_TEMPERATURE: () => DEFAULT_TEMPERATURE,
|
|
37
36
|
DEFAULT_VERBOSE: () => DEFAULT_VERBOSE,
|
|
38
37
|
JudgeAgentAdapter: () => JudgeAgentAdapter,
|
|
38
|
+
JudgeSpanCollector: () => JudgeSpanCollector,
|
|
39
|
+
JudgeSpanDigestFormatter: () => JudgeSpanDigestFormatter,
|
|
40
|
+
RealtimeAgentAdapter: () => RealtimeAgentAdapter,
|
|
39
41
|
ScenarioExecution: () => ScenarioExecution,
|
|
40
42
|
ScenarioExecutionState: () => ScenarioExecutionState,
|
|
41
43
|
StateChangeEventType: () => StateChangeEventType,
|
|
@@ -47,6 +49,8 @@ __export(index_exports, {
|
|
|
47
49
|
fail: () => fail,
|
|
48
50
|
judge: () => judge,
|
|
49
51
|
judgeAgent: () => judgeAgent,
|
|
52
|
+
judgeSpanCollector: () => judgeSpanCollector,
|
|
53
|
+
judgeSpanDigestFormatter: () => judgeSpanDigestFormatter,
|
|
50
54
|
message: () => message,
|
|
51
55
|
proceed: () => proceed,
|
|
52
56
|
run: () => run,
|
|
@@ -58,132 +62,53 @@ __export(index_exports, {
|
|
|
58
62
|
});
|
|
59
63
|
module.exports = __toCommonJS(index_exports);
|
|
60
64
|
|
|
61
|
-
// src/
|
|
62
|
-
var
|
|
63
|
-
__export(agents_exports, {
|
|
64
|
-
judgeAgent: () => judgeAgent,
|
|
65
|
-
userSimulatorAgent: () => userSimulatorAgent
|
|
66
|
-
});
|
|
67
|
-
|
|
68
|
-
// src/agents/judge-agent.ts
|
|
69
|
-
var import_ai = require("ai");
|
|
70
|
-
var import_zod3 = require("zod");
|
|
71
|
-
|
|
72
|
-
// src/domain/index.ts
|
|
73
|
-
var domain_exports = {};
|
|
74
|
-
__export(domain_exports, {
|
|
75
|
-
AgentAdapter: () => AgentAdapter,
|
|
76
|
-
AgentRole: () => AgentRole,
|
|
77
|
-
DEFAULT_MAX_TURNS: () => DEFAULT_MAX_TURNS,
|
|
78
|
-
DEFAULT_TEMPERATURE: () => DEFAULT_TEMPERATURE,
|
|
79
|
-
DEFAULT_VERBOSE: () => DEFAULT_VERBOSE,
|
|
80
|
-
JudgeAgentAdapter: () => JudgeAgentAdapter,
|
|
81
|
-
UserSimulatorAgentAdapter: () => UserSimulatorAgentAdapter,
|
|
82
|
-
allAgentRoles: () => allAgentRoles,
|
|
83
|
-
defineConfig: () => defineConfig,
|
|
84
|
-
scenarioProjectConfigSchema: () => scenarioProjectConfigSchema
|
|
85
|
-
});
|
|
86
|
-
|
|
87
|
-
// src/domain/core/config.ts
|
|
88
|
-
var import_zod = require("zod");
|
|
89
|
-
var DEFAULT_TEMPERATURE = 0;
|
|
90
|
-
var scenarioProjectConfigSchema = import_zod.z.object({
|
|
91
|
-
defaultModel: import_zod.z.object({
|
|
92
|
-
model: import_zod.z.custom(),
|
|
93
|
-
temperature: import_zod.z.number().min(0).max(1).optional().default(DEFAULT_TEMPERATURE),
|
|
94
|
-
maxTokens: import_zod.z.number().optional()
|
|
95
|
-
}).optional(),
|
|
96
|
-
headless: import_zod.z.boolean().optional().default(
|
|
97
|
-
typeof process !== "undefined" ? !["false", "0"].includes(process.env.SCENARIO_HEADLESS || "false") : false
|
|
98
|
-
)
|
|
99
|
-
}).strict();
|
|
100
|
-
function defineConfig(config2) {
|
|
101
|
-
return config2;
|
|
102
|
-
}
|
|
103
|
-
|
|
104
|
-
// src/domain/agents/index.ts
|
|
105
|
-
var AgentRole = /* @__PURE__ */ ((AgentRole2) => {
|
|
106
|
-
AgentRole2["USER"] = "User";
|
|
107
|
-
AgentRole2["AGENT"] = "Agent";
|
|
108
|
-
AgentRole2["JUDGE"] = "Judge";
|
|
109
|
-
return AgentRole2;
|
|
110
|
-
})(AgentRole || {});
|
|
111
|
-
var allAgentRoles = [
|
|
112
|
-
"User" /* USER */,
|
|
113
|
-
"Agent" /* AGENT */,
|
|
114
|
-
"Judge" /* JUDGE */
|
|
115
|
-
];
|
|
116
|
-
var AgentAdapter = class {
|
|
117
|
-
role = "Agent" /* AGENT */;
|
|
118
|
-
};
|
|
119
|
-
var UserSimulatorAgentAdapter = class {
|
|
120
|
-
role = "User" /* USER */;
|
|
121
|
-
};
|
|
122
|
-
var JudgeAgentAdapter = class {
|
|
123
|
-
role = "Judge" /* JUDGE */;
|
|
124
|
-
};
|
|
125
|
-
|
|
126
|
-
// src/domain/scenarios/index.ts
|
|
127
|
-
var DEFAULT_MAX_TURNS = 10;
|
|
128
|
-
var DEFAULT_VERBOSE = false;
|
|
65
|
+
// src/tracing/setup.ts
|
|
66
|
+
var import_node = require("langwatch/observability/node");
|
|
129
67
|
|
|
130
|
-
// src/agents/
|
|
131
|
-
var
|
|
132
|
-
var
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
const segments = [];
|
|
136
|
-
let currentSegment = [];
|
|
137
|
-
for (const message2 of messages) {
|
|
138
|
-
currentSegment.push(message2);
|
|
139
|
-
if (message2.role === toolMessageRole) {
|
|
140
|
-
segments.push(currentSegment);
|
|
141
|
-
currentSegment = [];
|
|
142
|
-
}
|
|
68
|
+
// src/agents/judge/judge-span-collector.ts
|
|
69
|
+
var import_observability = require("langwatch/observability");
|
|
70
|
+
var JudgeSpanCollector = class {
|
|
71
|
+
spans = [];
|
|
72
|
+
onStart() {
|
|
143
73
|
}
|
|
144
|
-
|
|
145
|
-
|
|
74
|
+
onEnd(span) {
|
|
75
|
+
this.spans.push(span);
|
|
146
76
|
}
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
77
|
+
forceFlush() {
|
|
78
|
+
return Promise.resolve();
|
|
79
|
+
}
|
|
80
|
+
shutdown() {
|
|
81
|
+
this.spans = [];
|
|
82
|
+
return Promise.resolve();
|
|
83
|
+
}
|
|
84
|
+
/**
|
|
85
|
+
* Retrieves all spans associated with a specific thread.
|
|
86
|
+
* @param threadId - The thread identifier to filter spans by
|
|
87
|
+
* @returns Array of spans for the given thread
|
|
88
|
+
*/
|
|
89
|
+
getSpansForThread(threadId) {
|
|
90
|
+
const spanMap = /* @__PURE__ */ new Map();
|
|
91
|
+
for (const span of this.spans) {
|
|
92
|
+
spanMap.set(span.spanContext().spanId, span);
|
|
154
93
|
}
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
};
|
|
166
|
-
const newRole = roleMap[message2.role];
|
|
167
|
-
if (!newRole) return message2;
|
|
168
|
-
return {
|
|
169
|
-
role: newRole,
|
|
170
|
-
content: message2.content
|
|
94
|
+
const belongsToThread = (span) => {
|
|
95
|
+
var _a;
|
|
96
|
+
if (span.attributes[import_observability.attributes.ATTR_LANGWATCH_THREAD_ID] === threadId) {
|
|
97
|
+
return true;
|
|
98
|
+
}
|
|
99
|
+
const parentId = (_a = span.parentSpanContext) == null ? void 0 : _a.spanId;
|
|
100
|
+
if (parentId && spanMap.has(parentId)) {
|
|
101
|
+
return belongsToThread(spanMap.get(parentId));
|
|
102
|
+
}
|
|
103
|
+
return false;
|
|
171
104
|
};
|
|
172
|
-
|
|
173
|
-
}
|
|
174
|
-
var messageRoleReversal = (messages) => {
|
|
175
|
-
const segments = groupMessagesByToolBoundaries(messages);
|
|
176
|
-
const processedSegments = segments.map(
|
|
177
|
-
(segment) => segmentHasToolMessages(segment) ? segment : reverseSegmentRoles(segment)
|
|
178
|
-
);
|
|
179
|
-
return processedSegments.flat();
|
|
180
|
-
};
|
|
181
|
-
var criterionToParamName = (criterion) => {
|
|
182
|
-
return criterion.replace(/"/g, "").replace(/[^a-zA-Z0-9]/g, "_").replace(/ /g, "_").toLowerCase().substring(0, 70);
|
|
105
|
+
return this.spans.filter(belongsToThread);
|
|
106
|
+
}
|
|
183
107
|
};
|
|
108
|
+
var judgeSpanCollector = new JudgeSpanCollector();
|
|
184
109
|
|
|
185
110
|
// src/config/env.ts
|
|
186
|
-
var
|
|
111
|
+
var import_v4 = require("zod/v4");
|
|
187
112
|
|
|
188
113
|
// src/config/log-levels.ts
|
|
189
114
|
var LogLevel = /* @__PURE__ */ ((LogLevel2) => {
|
|
@@ -196,37 +121,37 @@ var LogLevel = /* @__PURE__ */ ((LogLevel2) => {
|
|
|
196
121
|
var LOG_LEVELS = Object.values(LogLevel);
|
|
197
122
|
|
|
198
123
|
// src/config/env.ts
|
|
199
|
-
var envSchema =
|
|
124
|
+
var envSchema = import_v4.z.object({
|
|
200
125
|
/**
|
|
201
126
|
* LangWatch API key for event reporting.
|
|
202
127
|
* If not provided, events will not be sent to LangWatch.
|
|
203
128
|
*/
|
|
204
|
-
LANGWATCH_API_KEY:
|
|
129
|
+
LANGWATCH_API_KEY: import_v4.z.string().optional(),
|
|
205
130
|
/**
|
|
206
131
|
* LangWatch endpoint URL for event reporting.
|
|
207
132
|
* Defaults to the production LangWatch endpoint.
|
|
208
133
|
*/
|
|
209
|
-
LANGWATCH_ENDPOINT:
|
|
134
|
+
LANGWATCH_ENDPOINT: import_v4.z.string().url().optional().default("https://app.langwatch.ai"),
|
|
210
135
|
/**
|
|
211
136
|
* Disables simulation report info messages when set to any truthy value.
|
|
212
137
|
* Useful for CI/CD environments or when you want cleaner output.
|
|
213
138
|
*/
|
|
214
|
-
SCENARIO_DISABLE_SIMULATION_REPORT_INFO:
|
|
139
|
+
SCENARIO_DISABLE_SIMULATION_REPORT_INFO: import_v4.z.string().optional().transform((val) => Boolean(val)),
|
|
215
140
|
/**
|
|
216
141
|
* Node environment - affects logging and behavior.
|
|
217
142
|
* Defaults to 'development' if not specified.
|
|
218
143
|
*/
|
|
219
|
-
NODE_ENV:
|
|
144
|
+
NODE_ENV: import_v4.z.enum(["development", "production", "test"]).default("development"),
|
|
220
145
|
/**
|
|
221
146
|
* Case-insensitive log level for the scenario package.
|
|
222
147
|
* Defaults to 'info' if not specified.
|
|
223
148
|
*/
|
|
224
|
-
LOG_LEVEL:
|
|
149
|
+
LOG_LEVEL: import_v4.z.string().toUpperCase().pipe(import_v4.z.nativeEnum(LogLevel)).optional().default("INFO" /* INFO */),
|
|
225
150
|
/**
|
|
226
151
|
* Scenario batch run ID.
|
|
227
152
|
* If not provided, a random ID will be generated.
|
|
228
153
|
*/
|
|
229
|
-
SCENARIO_BATCH_RUN_ID:
|
|
154
|
+
SCENARIO_BATCH_RUN_ID: import_v4.z.string().optional()
|
|
230
155
|
});
|
|
231
156
|
function getEnv() {
|
|
232
157
|
return envSchema.parse(process.env);
|
|
@@ -236,6 +161,79 @@ function getEnv() {
|
|
|
236
161
|
var import_promises = __toESM(require("fs/promises"));
|
|
237
162
|
var import_node_path = __toESM(require("path"));
|
|
238
163
|
var import_node_url = require("url");
|
|
164
|
+
|
|
165
|
+
// src/domain/index.ts
|
|
166
|
+
var domain_exports = {};
|
|
167
|
+
__export(domain_exports, {
|
|
168
|
+
AgentAdapter: () => AgentAdapter,
|
|
169
|
+
AgentRole: () => AgentRole,
|
|
170
|
+
DEFAULT_MAX_TURNS: () => DEFAULT_MAX_TURNS,
|
|
171
|
+
DEFAULT_VERBOSE: () => DEFAULT_VERBOSE,
|
|
172
|
+
JudgeAgentAdapter: () => JudgeAgentAdapter,
|
|
173
|
+
UserSimulatorAgentAdapter: () => UserSimulatorAgentAdapter,
|
|
174
|
+
allAgentRoles: () => allAgentRoles,
|
|
175
|
+
defineConfig: () => defineConfig,
|
|
176
|
+
scenarioProjectConfigSchema: () => scenarioProjectConfigSchema
|
|
177
|
+
});
|
|
178
|
+
|
|
179
|
+
// src/domain/core/config.ts
|
|
180
|
+
var import_v43 = require("zod/v4");
|
|
181
|
+
|
|
182
|
+
// src/domain/core/schemas/model.schema.ts
|
|
183
|
+
var import_v42 = require("zod/v4");
|
|
184
|
+
|
|
185
|
+
// src/domain/core/constants.ts
|
|
186
|
+
var DEFAULT_TEMPERATURE = 0;
|
|
187
|
+
|
|
188
|
+
// src/domain/core/schemas/model.schema.ts
|
|
189
|
+
var modelSchema = import_v42.z.object({
|
|
190
|
+
model: import_v42.z.custom((val) => Boolean(val), {
|
|
191
|
+
message: "A model is required. Configure it in scenario.config.js defaultModel or pass directly to the agent."
|
|
192
|
+
}).describe("The OpenAI Language Model to use for generating responses."),
|
|
193
|
+
temperature: import_v42.z.number().min(0).max(1).optional().describe("The temperature for the language model.").default(DEFAULT_TEMPERATURE),
|
|
194
|
+
maxTokens: import_v42.z.number().optional().describe("The maximum number of tokens to generate.")
|
|
195
|
+
});
|
|
196
|
+
|
|
197
|
+
// src/domain/core/config.ts
|
|
198
|
+
var headless = typeof process !== "undefined" ? process.env.SCENARIO_HEADLESS === "true" : false;
|
|
199
|
+
var scenarioProjectConfigSchema = import_v43.z.object({
|
|
200
|
+
defaultModel: modelSchema.optional(),
|
|
201
|
+
headless: import_v43.z.boolean().optional().default(headless)
|
|
202
|
+
}).strict();
|
|
203
|
+
function defineConfig(config2) {
|
|
204
|
+
return config2;
|
|
205
|
+
}
|
|
206
|
+
|
|
207
|
+
// src/domain/agents/index.ts
|
|
208
|
+
var AgentRole = /* @__PURE__ */ ((AgentRole2) => {
|
|
209
|
+
AgentRole2["USER"] = "User";
|
|
210
|
+
AgentRole2["AGENT"] = "Agent";
|
|
211
|
+
AgentRole2["JUDGE"] = "Judge";
|
|
212
|
+
return AgentRole2;
|
|
213
|
+
})(AgentRole || {});
|
|
214
|
+
var allAgentRoles = [
|
|
215
|
+
"User" /* USER */,
|
|
216
|
+
"Agent" /* AGENT */,
|
|
217
|
+
"Judge" /* JUDGE */
|
|
218
|
+
];
|
|
219
|
+
var AgentAdapter = class {
|
|
220
|
+
name;
|
|
221
|
+
role = "Agent" /* AGENT */;
|
|
222
|
+
};
|
|
223
|
+
var UserSimulatorAgentAdapter = class extends AgentAdapter {
|
|
224
|
+
name = "UserSimulatorAgent";
|
|
225
|
+
role = "User" /* USER */;
|
|
226
|
+
};
|
|
227
|
+
var JudgeAgentAdapter = class extends AgentAdapter {
|
|
228
|
+
name = "JudgeAgent";
|
|
229
|
+
role = "Judge" /* JUDGE */;
|
|
230
|
+
};
|
|
231
|
+
|
|
232
|
+
// src/domain/scenarios/index.ts
|
|
233
|
+
var DEFAULT_MAX_TURNS = 10;
|
|
234
|
+
var DEFAULT_VERBOSE = false;
|
|
235
|
+
|
|
236
|
+
// src/config/load.ts
|
|
239
237
|
async function loadScenarioProjectConfig() {
|
|
240
238
|
const cwd = process.cwd();
|
|
241
239
|
const configNames = [
|
|
@@ -267,14 +265,14 @@ async function loadScenarioProjectConfig() {
|
|
|
267
265
|
|
|
268
266
|
// src/utils/logger.ts
|
|
269
267
|
var Logger = class _Logger {
|
|
270
|
-
constructor(
|
|
271
|
-
this.context =
|
|
268
|
+
constructor(context2) {
|
|
269
|
+
this.context = context2;
|
|
272
270
|
}
|
|
273
271
|
/**
|
|
274
272
|
* Creates a logger with context (e.g., class name)
|
|
275
273
|
*/
|
|
276
|
-
static create(
|
|
277
|
-
return new _Logger(
|
|
274
|
+
static create(context2) {
|
|
275
|
+
return new _Logger(context2);
|
|
278
276
|
}
|
|
279
277
|
/**
|
|
280
278
|
* Returns the current log level from environment.
|
|
@@ -373,131 +371,612 @@ async function getProjectConfig() {
|
|
|
373
371
|
return config;
|
|
374
372
|
}
|
|
375
373
|
|
|
376
|
-
// src/
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
}
|
|
386
|
-
function mergeAndValidateConfig(config2, projectConfig) {
|
|
387
|
-
var _a;
|
|
388
|
-
const mergedConfig = mergeConfig(config2, projectConfig);
|
|
389
|
-
mergedConfig.model = mergedConfig.model ?? ((_a = projectConfig == null ? void 0 : projectConfig.defaultModel) == null ? void 0 : _a.model);
|
|
390
|
-
if (!mergedConfig.model) {
|
|
391
|
-
throw new Error("Model is required");
|
|
392
|
-
}
|
|
393
|
-
return mergedConfig;
|
|
394
|
-
}
|
|
395
|
-
|
|
396
|
-
// src/agents/judge-agent.ts
|
|
397
|
-
function buildSystemPrompt(criteria, description) {
|
|
398
|
-
const criteriaList = (criteria == null ? void 0 : criteria.map((criterion, idx) => `${idx + 1}. ${criterion}`).join("\n")) || "No criteria provided";
|
|
399
|
-
return `
|
|
400
|
-
<role>
|
|
401
|
-
You are an LLM as a judge watching a simulated conversation as it plays out live to determine if the agent under test meets the criteria or not.
|
|
402
|
-
</role>
|
|
403
|
-
|
|
404
|
-
<goal>
|
|
405
|
-
Your goal is to determine if you already have enough information to make a verdict of the scenario below, or if the conversation should continue for longer.
|
|
406
|
-
If you do have enough information, use the finish_test tool to determine if all the criteria have been met, if not, use the continue_test tool to let the next step play out.
|
|
407
|
-
</goal>
|
|
374
|
+
// src/tracing/setup.ts
|
|
375
|
+
var envConfig = getEnv();
|
|
376
|
+
var observabilityHandle = (0, import_node.setupObservability)({
|
|
377
|
+
langwatch: {
|
|
378
|
+
apiKey: envConfig.LANGWATCH_API_KEY,
|
|
379
|
+
endpoint: envConfig.LANGWATCH_ENDPOINT
|
|
380
|
+
},
|
|
381
|
+
spanProcessors: [judgeSpanCollector]
|
|
382
|
+
});
|
|
408
383
|
|
|
409
|
-
|
|
410
|
-
|
|
411
|
-
|
|
384
|
+
// src/agents/index.ts
|
|
385
|
+
var agents_exports = {};
|
|
386
|
+
__export(agents_exports, {
|
|
387
|
+
JudgeSpanCollector: () => JudgeSpanCollector,
|
|
388
|
+
JudgeSpanDigestFormatter: () => JudgeSpanDigestFormatter,
|
|
389
|
+
RealtimeAgentAdapter: () => RealtimeAgentAdapter,
|
|
390
|
+
judgeAgent: () => judgeAgent,
|
|
391
|
+
judgeSpanCollector: () => judgeSpanCollector,
|
|
392
|
+
judgeSpanDigestFormatter: () => judgeSpanDigestFormatter,
|
|
393
|
+
userSimulatorAgent: () => userSimulatorAgent
|
|
394
|
+
});
|
|
412
395
|
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
|
|
396
|
+
// src/agents/judge/judge-agent.ts
|
|
397
|
+
var import_ai2 = require("ai");
|
|
398
|
+
var import_v44 = require("zod/v4");
|
|
416
399
|
|
|
417
|
-
|
|
418
|
-
|
|
419
|
-
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
|
|
423
|
-
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
|
|
428
|
-
}
|
|
429
|
-
function buildFinishTestTool(criteria) {
|
|
430
|
-
const criteriaNames = criteria.map(criterionToParamName);
|
|
431
|
-
return (0, import_ai.tool)({
|
|
432
|
-
description: "Complete the test with a final verdict",
|
|
433
|
-
parameters: import_zod3.z.object({
|
|
434
|
-
criteria: import_zod3.z.object(
|
|
435
|
-
Object.fromEntries(
|
|
436
|
-
criteriaNames.map((name, idx) => [
|
|
437
|
-
name,
|
|
438
|
-
import_zod3.z.enum(["true", "false", "inconclusive"]).describe(criteria[idx])
|
|
439
|
-
])
|
|
440
|
-
)
|
|
441
|
-
).strict().describe("Strict verdict for each criterion"),
|
|
442
|
-
reasoning: import_zod3.z.string().describe("Explanation of what the final verdict should be"),
|
|
443
|
-
verdict: import_zod3.z.enum(["success", "failure", "inconclusive"]).describe("The final verdict of the test")
|
|
444
|
-
})
|
|
445
|
-
});
|
|
446
|
-
}
|
|
447
|
-
var JudgeAgent = class extends JudgeAgentAdapter {
|
|
448
|
-
constructor(cfg) {
|
|
449
|
-
super();
|
|
450
|
-
this.cfg = cfg;
|
|
451
|
-
this.criteria = cfg.criteria;
|
|
452
|
-
this.role = "Judge" /* JUDGE */;
|
|
453
|
-
}
|
|
454
|
-
logger = new Logger("JudgeAgent");
|
|
455
|
-
role = "Judge" /* JUDGE */;
|
|
456
|
-
criteria;
|
|
457
|
-
async call(input) {
|
|
458
|
-
var _a;
|
|
459
|
-
const cfg = this.cfg;
|
|
460
|
-
const systemPrompt = cfg.systemPrompt ?? buildSystemPrompt(cfg.criteria, input.scenarioConfig.description);
|
|
461
|
-
const messages = [
|
|
462
|
-
{ role: "system", content: systemPrompt },
|
|
463
|
-
...input.messages
|
|
464
|
-
];
|
|
465
|
-
const isLastMessage = input.scenarioState.currentTurn === input.scenarioConfig.maxTurns;
|
|
466
|
-
const projectConfig = await getProjectConfig();
|
|
467
|
-
const mergedConfig = mergeAndValidateConfig(cfg, projectConfig);
|
|
468
|
-
if (!mergedConfig.model) {
|
|
469
|
-
throw new Error("Model is required for the judge agent");
|
|
400
|
+
// src/agents/judge/judge-utils.ts
|
|
401
|
+
function truncateBase64Media(value) {
|
|
402
|
+
var _a;
|
|
403
|
+
if (typeof value === "string") {
|
|
404
|
+
const dataUrlMatch = value.match(
|
|
405
|
+
/^data:((image|audio|video)\/[a-z0-9+.-]+);base64,(.+)$/i
|
|
406
|
+
);
|
|
407
|
+
if (dataUrlMatch) {
|
|
408
|
+
const mimeType = dataUrlMatch[1];
|
|
409
|
+
const mediaType = dataUrlMatch[2].toUpperCase();
|
|
410
|
+
const size = dataUrlMatch[3].length;
|
|
411
|
+
return `[${mediaType}: ${mimeType}, ~${size} bytes]`;
|
|
470
412
|
}
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
413
|
+
return value;
|
|
414
|
+
}
|
|
415
|
+
if (Array.isArray(value)) {
|
|
416
|
+
return value.map(truncateBase64Media);
|
|
417
|
+
}
|
|
418
|
+
if (value && typeof value === "object") {
|
|
419
|
+
const obj = value;
|
|
420
|
+
if (obj.type === "file" && typeof obj.mediaType === "string" && typeof obj.data === "string") {
|
|
421
|
+
const mediaType = obj.mediaType;
|
|
422
|
+
const category = ((_a = mediaType.split("/")[0]) == null ? void 0 : _a.toUpperCase()) ?? "FILE";
|
|
478
423
|
return {
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
reasoning: "JudgeAgent: No criteria was provided to be judged against",
|
|
482
|
-
metCriteria: [],
|
|
483
|
-
unmetCriteria: []
|
|
424
|
+
...obj,
|
|
425
|
+
data: `[${category}: ${mediaType}, ~${obj.data.length} bytes]`
|
|
484
426
|
};
|
|
485
427
|
}
|
|
486
|
-
|
|
487
|
-
|
|
488
|
-
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
|
|
428
|
+
if (obj.type === "image" && typeof obj.image === "string") {
|
|
429
|
+
const imageData = obj.image;
|
|
430
|
+
const dataUrlMatch = imageData.match(
|
|
431
|
+
/^data:((image)\/[a-z0-9+.-]+);base64,(.+)$/i
|
|
432
|
+
);
|
|
433
|
+
if (dataUrlMatch) {
|
|
434
|
+
return {
|
|
435
|
+
...obj,
|
|
436
|
+
image: `[IMAGE: ${dataUrlMatch[1]}, ~${dataUrlMatch[3].length} bytes]`
|
|
437
|
+
};
|
|
438
|
+
}
|
|
439
|
+
if (imageData.length > 1e3 && /^[A-Za-z0-9+/=]+$/.test(imageData)) {
|
|
440
|
+
return {
|
|
441
|
+
...obj,
|
|
442
|
+
image: `[IMAGE: unknown, ~${imageData.length} bytes]`
|
|
443
|
+
};
|
|
444
|
+
}
|
|
445
|
+
}
|
|
446
|
+
const result = {};
|
|
447
|
+
for (const [key, val] of Object.entries(obj)) {
|
|
448
|
+
result[key] = truncateBase64Media(val);
|
|
449
|
+
}
|
|
450
|
+
return result;
|
|
451
|
+
}
|
|
452
|
+
return value;
|
|
453
|
+
}
|
|
454
|
+
var JudgeUtils = {
|
|
455
|
+
/**
|
|
456
|
+
* Builds a minimal transcript from messages for judge evaluation.
|
|
457
|
+
* Truncates base64 media to reduce token usage.
|
|
458
|
+
* @param messages - Array of CoreMessage from conversation
|
|
459
|
+
* @returns Plain text transcript with one message per line
|
|
460
|
+
*/
|
|
461
|
+
buildTranscriptFromMessages(messages) {
|
|
462
|
+
return messages.map((msg) => {
|
|
463
|
+
const truncatedContent = truncateBase64Media(msg.content);
|
|
464
|
+
return `${msg.role}: ${JSON.stringify(truncatedContent)}`;
|
|
465
|
+
}).join("\n");
|
|
466
|
+
}
|
|
467
|
+
};
|
|
468
|
+
|
|
469
|
+
// src/agents/llm-invoker.factory.ts
|
|
470
|
+
var import_ai = require("ai");
|
|
471
|
+
var createLLMInvoker = (logger2) => {
|
|
472
|
+
return async (params) => {
|
|
473
|
+
try {
|
|
474
|
+
return await (0, import_ai.generateText)({
|
|
475
|
+
...params,
|
|
476
|
+
experimental_telemetry: { isEnabled: true }
|
|
477
|
+
});
|
|
478
|
+
} catch (error) {
|
|
479
|
+
logger2.error("Error generating text", { error });
|
|
480
|
+
throw error;
|
|
481
|
+
}
|
|
482
|
+
};
|
|
483
|
+
};
|
|
484
|
+
|
|
485
|
+
// src/agents/utils.ts
|
|
486
|
+
var toolMessageRole = "tool";
|
|
487
|
+
var assistantMessageRole = "assistant";
|
|
488
|
+
var userMessageRole = "user";
|
|
489
|
+
var groupMessagesByToolBoundaries = (messages) => {
|
|
490
|
+
const segments = [];
|
|
491
|
+
let currentSegment = [];
|
|
492
|
+
for (const message2 of messages) {
|
|
493
|
+
currentSegment.push(message2);
|
|
494
|
+
if (message2.role === toolMessageRole) {
|
|
495
|
+
segments.push(currentSegment);
|
|
496
|
+
currentSegment = [];
|
|
497
|
+
}
|
|
498
|
+
}
|
|
499
|
+
if (currentSegment.length > 0) {
|
|
500
|
+
segments.push(currentSegment);
|
|
501
|
+
}
|
|
502
|
+
return segments;
|
|
503
|
+
};
|
|
504
|
+
var segmentHasToolMessages = (segment) => {
|
|
505
|
+
return segment.some((message2) => {
|
|
506
|
+
if (message2.role === toolMessageRole) return true;
|
|
507
|
+
if (message2.role === assistantMessageRole && Array.isArray(message2.content)) {
|
|
508
|
+
return message2.content.some((part) => part.type === "tool-call");
|
|
509
|
+
}
|
|
510
|
+
return false;
|
|
511
|
+
});
|
|
512
|
+
};
|
|
513
|
+
var reverseSegmentRoles = (segment) => {
|
|
514
|
+
return segment.map((message2) => {
|
|
515
|
+
const hasStringContent = typeof message2.content === "string";
|
|
516
|
+
if (!hasStringContent) return message2;
|
|
517
|
+
const roleMap = {
|
|
518
|
+
[userMessageRole]: assistantMessageRole,
|
|
519
|
+
[assistantMessageRole]: userMessageRole
|
|
520
|
+
};
|
|
521
|
+
const newRole = roleMap[message2.role];
|
|
522
|
+
if (!newRole) return message2;
|
|
523
|
+
return {
|
|
524
|
+
role: newRole,
|
|
525
|
+
content: message2.content
|
|
526
|
+
};
|
|
527
|
+
});
|
|
528
|
+
};
|
|
529
|
+
var messageRoleReversal = (messages) => {
|
|
530
|
+
const segments = groupMessagesByToolBoundaries(messages);
|
|
531
|
+
const processedSegments = segments.map(
|
|
532
|
+
(segment) => segmentHasToolMessages(segment) ? segment : reverseSegmentRoles(segment)
|
|
533
|
+
);
|
|
534
|
+
return processedSegments.flat();
|
|
535
|
+
};
|
|
536
|
+
var criterionToParamName = (criterion) => {
|
|
537
|
+
return criterion.replace(/"/g, "").replace(/[^a-zA-Z0-9]/g, "_").replace(/ /g, "_").toLowerCase().substring(0, 70);
|
|
538
|
+
};
|
|
539
|
+
|
|
540
|
+
// src/agents/judge/judge-span-digest-formatter.ts
|
|
541
|
+
var import_observability2 = require("langwatch/observability");
|
|
542
|
+
|
|
543
|
+
// src/agents/judge/deep-transform.ts
|
|
544
|
+
function deepTransform(value, fn) {
|
|
545
|
+
const result = fn(value);
|
|
546
|
+
if (result !== value) return result;
|
|
547
|
+
if (Array.isArray(value)) {
|
|
548
|
+
return value.map((v) => deepTransform(v, fn));
|
|
549
|
+
}
|
|
550
|
+
if (value !== null && typeof value === "object") {
|
|
551
|
+
const out = {};
|
|
552
|
+
for (const [k, v] of Object.entries(value)) {
|
|
553
|
+
out[k] = deepTransform(v, fn);
|
|
554
|
+
}
|
|
555
|
+
return out;
|
|
556
|
+
}
|
|
557
|
+
return value;
|
|
558
|
+
}
|
|
559
|
+
|
|
560
|
+
// src/agents/judge/string-deduplicator.ts
|
|
561
|
+
var StringDeduplicator = class {
|
|
562
|
+
seen = /* @__PURE__ */ new Map();
|
|
563
|
+
threshold;
|
|
564
|
+
constructor(params) {
|
|
565
|
+
this.threshold = params.threshold;
|
|
566
|
+
}
|
|
567
|
+
/**
|
|
568
|
+
* Resets seen strings for a new digest.
|
|
569
|
+
*/
|
|
570
|
+
reset() {
|
|
571
|
+
this.seen.clear();
|
|
572
|
+
}
|
|
573
|
+
/**
|
|
574
|
+
* Processes a string, returning duplicate marker if seen before.
|
|
575
|
+
* @param str - String to process
|
|
576
|
+
* @returns Original string or duplicate marker
|
|
577
|
+
*/
|
|
578
|
+
process(str) {
|
|
579
|
+
if (str.length < this.threshold) return str;
|
|
580
|
+
const key = this.normalize(str);
|
|
581
|
+
if (this.seen.has(key)) return "[DUPLICATE - SEE ABOVE]";
|
|
582
|
+
this.seen.set(key, true);
|
|
583
|
+
return str;
|
|
584
|
+
}
|
|
585
|
+
/**
|
|
586
|
+
* Normalizes string for comparison (whitespace, case).
|
|
587
|
+
*/
|
|
588
|
+
normalize(str) {
|
|
589
|
+
return str.replace(/\\[nrt]/g, " ").replace(/[\n\r\t]/g, " ").replace(/\s+/g, " ").trim().toLowerCase();
|
|
590
|
+
}
|
|
591
|
+
};
|
|
592
|
+
|
|
593
|
+
// src/agents/judge/truncate-media.ts
|
|
594
|
+
function truncateMediaUrl(str) {
|
|
595
|
+
const match = str.match(
|
|
596
|
+
/^data:((image|audio|video)\/[a-z0-9+.-]+);base64,(.+)$/i
|
|
597
|
+
);
|
|
598
|
+
if (!match) return str;
|
|
599
|
+
const [, mimeType, category, data] = match;
|
|
600
|
+
return `[${category.toUpperCase()}: ${mimeType}, ~${data.length} bytes]`;
|
|
601
|
+
}
|
|
602
|
+
function truncateMediaPart(v) {
|
|
603
|
+
var _a;
|
|
604
|
+
if (v === null || typeof v !== "object" || Array.isArray(v)) return null;
|
|
605
|
+
const obj = v;
|
|
606
|
+
if (obj.type === "file" && typeof obj.mediaType === "string" && typeof obj.data === "string") {
|
|
607
|
+
const category = ((_a = obj.mediaType.split("/")[0]) == null ? void 0 : _a.toUpperCase()) ?? "FILE";
|
|
608
|
+
return {
|
|
609
|
+
...obj,
|
|
610
|
+
data: `[${category}: ${obj.mediaType}, ~${obj.data.length} bytes]`
|
|
611
|
+
};
|
|
612
|
+
}
|
|
613
|
+
if (obj.type === "image" && typeof obj.image === "string") {
|
|
614
|
+
const imageData = obj.image;
|
|
615
|
+
const dataUrlMatch = imageData.match(
|
|
616
|
+
/^data:((image)\/[a-z0-9+.-]+);base64,(.+)$/i
|
|
617
|
+
);
|
|
618
|
+
if (dataUrlMatch) {
|
|
619
|
+
return {
|
|
620
|
+
...obj,
|
|
621
|
+
image: `[IMAGE: ${dataUrlMatch[1]}, ~${dataUrlMatch[3].length} bytes]`
|
|
622
|
+
};
|
|
623
|
+
}
|
|
624
|
+
if (imageData.length > 1e3 && /^[A-Za-z0-9+/=]+$/.test(imageData)) {
|
|
625
|
+
return {
|
|
626
|
+
...obj,
|
|
627
|
+
image: `[IMAGE: unknown, ~${imageData.length} bytes]`
|
|
628
|
+
};
|
|
629
|
+
}
|
|
630
|
+
}
|
|
631
|
+
return null;
|
|
632
|
+
}
|
|
633
|
+
|
|
634
|
+
// src/agents/judge/judge-span-digest-formatter.ts
|
|
635
|
+
var JudgeSpanDigestFormatter = class {
|
|
636
|
+
logger = new Logger("JudgeSpanDigestFormatter");
|
|
637
|
+
deduplicator = new StringDeduplicator({ threshold: 50 });
|
|
638
|
+
/**
|
|
639
|
+
* Formats spans into a complete digest with full content and nesting.
|
|
640
|
+
* @param spans - All spans for a thread
|
|
641
|
+
* @returns Plain text digest
|
|
642
|
+
*/
|
|
643
|
+
format(spans) {
|
|
644
|
+
this.deduplicator.reset();
|
|
645
|
+
this.logger.debug("format() called", {
|
|
646
|
+
spanCount: spans.length,
|
|
647
|
+
spanNames: spans.map((s) => s.name)
|
|
648
|
+
});
|
|
649
|
+
if (spans.length === 0) {
|
|
650
|
+
this.logger.debug("No spans to format");
|
|
651
|
+
return "No spans recorded.";
|
|
652
|
+
}
|
|
653
|
+
const sortedSpans = this.sortByStartTime(spans);
|
|
654
|
+
const tree = this.buildHierarchy(sortedSpans);
|
|
655
|
+
const totalDuration = this.calculateTotalDuration(sortedSpans);
|
|
656
|
+
this.logger.debug("Hierarchy built", {
|
|
657
|
+
rootCount: tree.length,
|
|
658
|
+
totalDuration
|
|
659
|
+
});
|
|
660
|
+
const lines = [
|
|
661
|
+
`Spans: ${spans.length} | Total Duration: ${this.formatDuration(
|
|
662
|
+
totalDuration
|
|
663
|
+
)}`,
|
|
664
|
+
""
|
|
665
|
+
];
|
|
666
|
+
let sequence = 1;
|
|
667
|
+
const rootCount = tree.length;
|
|
668
|
+
tree.forEach((node, idx) => {
|
|
669
|
+
sequence = this.renderNode(
|
|
670
|
+
node,
|
|
671
|
+
lines,
|
|
672
|
+
0,
|
|
673
|
+
sequence,
|
|
674
|
+
idx === rootCount - 1
|
|
675
|
+
);
|
|
676
|
+
});
|
|
677
|
+
const errors = this.collectErrors(spans);
|
|
678
|
+
if (errors.length > 0) {
|
|
679
|
+
lines.push("");
|
|
680
|
+
lines.push("=== ERRORS ===");
|
|
681
|
+
errors.forEach((e) => lines.push(e));
|
|
682
|
+
}
|
|
683
|
+
return lines.join("\n");
|
|
684
|
+
}
|
|
685
|
+
sortByStartTime(spans) {
|
|
686
|
+
return [...spans].sort((a, b) => {
|
|
687
|
+
const aTime = this.hrTimeToMs(a.startTime);
|
|
688
|
+
const bTime = this.hrTimeToMs(b.startTime);
|
|
689
|
+
return aTime - bTime;
|
|
690
|
+
});
|
|
691
|
+
}
|
|
692
|
+
buildHierarchy(spans) {
|
|
693
|
+
var _a;
|
|
694
|
+
const spanMap = /* @__PURE__ */ new Map();
|
|
695
|
+
const roots = [];
|
|
696
|
+
for (const span of spans) {
|
|
697
|
+
spanMap.set(span.spanContext().spanId, { span, children: [] });
|
|
698
|
+
}
|
|
699
|
+
for (const span of spans) {
|
|
700
|
+
const node = spanMap.get(span.spanContext().spanId);
|
|
701
|
+
const parentId = (_a = span.parentSpanContext) == null ? void 0 : _a.spanId;
|
|
702
|
+
if (parentId && spanMap.has(parentId)) {
|
|
703
|
+
spanMap.get(parentId).children.push(node);
|
|
704
|
+
} else {
|
|
705
|
+
roots.push(node);
|
|
706
|
+
}
|
|
707
|
+
}
|
|
708
|
+
return roots;
|
|
709
|
+
}
|
|
710
|
+
renderNode(node, lines, depth, sequence, isLast = true) {
|
|
711
|
+
const span = node.span;
|
|
712
|
+
const duration = this.calculateSpanDuration(span);
|
|
713
|
+
const timestamp = this.formatTimestamp(span.startTime);
|
|
714
|
+
const status = this.getStatusIndicator(span);
|
|
715
|
+
const prefix = this.getTreePrefix(depth, isLast);
|
|
716
|
+
lines.push(
|
|
717
|
+
`${prefix}[${sequence}] ${new Date(timestamp).toISOString()} ${span.name} (${this.formatDuration(duration)})${status}`
|
|
718
|
+
);
|
|
719
|
+
const attrIndent = this.getAttrIndent(depth, isLast);
|
|
720
|
+
const attrs = this.cleanAttributes(span.attributes);
|
|
721
|
+
if (Object.keys(attrs).length > 0) {
|
|
722
|
+
for (const [key, value] of Object.entries(attrs)) {
|
|
723
|
+
lines.push(`${attrIndent}${key}: ${this.formatValue(value)}`);
|
|
724
|
+
}
|
|
725
|
+
}
|
|
726
|
+
if (span.events.length > 0) {
|
|
727
|
+
for (const event of span.events) {
|
|
728
|
+
lines.push(`${attrIndent}[event] ${event.name}`);
|
|
729
|
+
if (event.attributes) {
|
|
730
|
+
const eventAttrs = this.cleanAttributes(event.attributes);
|
|
731
|
+
for (const [key, value] of Object.entries(eventAttrs)) {
|
|
732
|
+
lines.push(`${attrIndent} ${key}: ${this.formatValue(value)}`);
|
|
733
|
+
}
|
|
734
|
+
}
|
|
735
|
+
}
|
|
736
|
+
}
|
|
737
|
+
lines.push("");
|
|
738
|
+
let nextSeq = sequence + 1;
|
|
739
|
+
const childCount = node.children.length;
|
|
740
|
+
node.children.forEach((child, idx) => {
|
|
741
|
+
nextSeq = this.renderNode(
|
|
742
|
+
child,
|
|
743
|
+
lines,
|
|
744
|
+
depth + 1,
|
|
745
|
+
nextSeq,
|
|
746
|
+
idx === childCount - 1
|
|
747
|
+
);
|
|
748
|
+
});
|
|
749
|
+
return nextSeq;
|
|
750
|
+
}
|
|
751
|
+
getTreePrefix(depth, isLast) {
|
|
752
|
+
if (depth === 0) return "";
|
|
753
|
+
const connector = isLast ? "\u2514\u2500\u2500 " : "\u251C\u2500\u2500 ";
|
|
754
|
+
return "\u2502 ".repeat(depth - 1) + connector;
|
|
755
|
+
}
|
|
756
|
+
getAttrIndent(depth, isLast) {
|
|
757
|
+
if (depth === 0) return " ";
|
|
758
|
+
const continuation = isLast ? " " : "\u2502 ";
|
|
759
|
+
return "\u2502 ".repeat(depth - 1) + continuation + " ";
|
|
760
|
+
}
|
|
761
|
+
cleanAttributes(attrs) {
|
|
762
|
+
const cleaned = {};
|
|
763
|
+
const seen = /* @__PURE__ */ new Set();
|
|
764
|
+
const excludedKeys = [
|
|
765
|
+
import_observability2.attributes.ATTR_LANGWATCH_THREAD_ID,
|
|
766
|
+
"langwatch.scenario.id",
|
|
767
|
+
"langwatch.scenario.name"
|
|
768
|
+
];
|
|
769
|
+
for (const [key, value] of Object.entries(attrs)) {
|
|
770
|
+
if (excludedKeys.includes(key)) {
|
|
771
|
+
continue;
|
|
772
|
+
}
|
|
773
|
+
const cleanKey = key.replace(/^(langwatch)\./, "");
|
|
774
|
+
if (!seen.has(cleanKey)) {
|
|
775
|
+
seen.add(cleanKey);
|
|
776
|
+
cleaned[cleanKey] = value;
|
|
777
|
+
}
|
|
778
|
+
}
|
|
779
|
+
return cleaned;
|
|
780
|
+
}
|
|
781
|
+
formatValue(value) {
|
|
782
|
+
const processed = this.transformValue(value);
|
|
783
|
+
return typeof processed === "string" ? processed : JSON.stringify(processed);
|
|
784
|
+
}
|
|
785
|
+
transformValue(value) {
|
|
786
|
+
return deepTransform(value, (v) => {
|
|
787
|
+
const mediaPart = truncateMediaPart(v);
|
|
788
|
+
if (mediaPart) return mediaPart;
|
|
789
|
+
if (typeof v !== "string") return v;
|
|
790
|
+
return this.transformString(v);
|
|
791
|
+
});
|
|
792
|
+
}
|
|
793
|
+
transformString(str) {
|
|
794
|
+
if (this.looksLikeJson(str)) {
|
|
795
|
+
try {
|
|
796
|
+
const processed = this.transformValue(JSON.parse(str));
|
|
797
|
+
return JSON.stringify(processed);
|
|
798
|
+
} catch {
|
|
799
|
+
}
|
|
800
|
+
}
|
|
801
|
+
const truncated = truncateMediaUrl(str);
|
|
802
|
+
if (truncated !== str) return truncated;
|
|
803
|
+
return this.deduplicator.process(str);
|
|
804
|
+
}
|
|
805
|
+
looksLikeJson(str) {
|
|
806
|
+
const t = str.trim();
|
|
807
|
+
return t.startsWith("{") && t.endsWith("}") || t.startsWith("[") && t.endsWith("]");
|
|
808
|
+
}
|
|
809
|
+
hrTimeToMs(hrTime) {
|
|
810
|
+
return hrTime[0] * 1e3 + hrTime[1] / 1e6;
|
|
811
|
+
}
|
|
812
|
+
calculateSpanDuration(span) {
|
|
813
|
+
return this.hrTimeToMs(span.endTime) - this.hrTimeToMs(span.startTime);
|
|
814
|
+
}
|
|
815
|
+
calculateTotalDuration(spans) {
|
|
816
|
+
if (spans.length === 0) return 0;
|
|
817
|
+
const first = this.hrTimeToMs(spans[0].startTime);
|
|
818
|
+
const last = Math.max(...spans.map((s) => this.hrTimeToMs(s.endTime)));
|
|
819
|
+
return last - first;
|
|
820
|
+
}
|
|
821
|
+
formatDuration(ms) {
|
|
822
|
+
if (ms < 1e3) return `${Math.round(ms)}ms`;
|
|
823
|
+
return `${(ms / 1e3).toFixed(2)}s`;
|
|
824
|
+
}
|
|
825
|
+
formatTimestamp(hrTime) {
|
|
826
|
+
const ms = this.hrTimeToMs(hrTime);
|
|
827
|
+
return new Date(ms).toISOString();
|
|
828
|
+
}
|
|
829
|
+
getStatusIndicator(span) {
|
|
830
|
+
if (span.status.code === 2) {
|
|
831
|
+
return ` \u26A0\uFE0F ERROR: ${span.status.message ?? "unknown"}`;
|
|
832
|
+
}
|
|
833
|
+
return "";
|
|
834
|
+
}
|
|
835
|
+
collectErrors(spans) {
|
|
836
|
+
return spans.filter((s) => s.status.code === 2).map((s) => `- ${s.name}: ${s.status.message ?? "unknown error"}`);
|
|
837
|
+
}
|
|
838
|
+
};
|
|
839
|
+
var judgeSpanDigestFormatter = new JudgeSpanDigestFormatter();
|
|
840
|
+
|
|
841
|
+
// src/agents/judge/judge-agent.ts
|
|
842
|
+
function buildSystemPrompt(criteria, description) {
|
|
843
|
+
const criteriaList = (criteria == null ? void 0 : criteria.map((criterion, idx) => `${idx + 1}. ${criterion}`).join("\n")) || "No criteria provided";
|
|
844
|
+
return `
|
|
845
|
+
<role>
|
|
846
|
+
You are an LLM as a judge watching a simulated conversation as it plays out live to determine if the agent under test meets the criteria or not.
|
|
847
|
+
</role>
|
|
848
|
+
|
|
849
|
+
<goal>
|
|
850
|
+
Your goal is to determine if you already have enough information to make a verdict of the scenario below, or if the conversation should continue for longer.
|
|
851
|
+
If you do have enough information, use the finish_test tool to determine if all the criteria have been met, if not, use the continue_test tool to let the next step play out.
|
|
852
|
+
</goal>
|
|
853
|
+
|
|
854
|
+
<scenario>
|
|
855
|
+
${description}
|
|
856
|
+
</scenario>
|
|
857
|
+
|
|
858
|
+
<criteria>
|
|
859
|
+
${criteriaList}
|
|
860
|
+
</criteria>
|
|
861
|
+
|
|
862
|
+
<rules>
|
|
863
|
+
- Be strict, do not let the conversation continue if the agent already broke one of the "do not" or "should not" criteria.
|
|
864
|
+
- DO NOT make any judgment calls that are not explicitly listed in the success or failure criteria, withhold judgement if necessary
|
|
865
|
+
</rules>
|
|
866
|
+
`.trim();
|
|
867
|
+
}
|
|
868
|
+
function buildContinueTestTool() {
|
|
869
|
+
return (0, import_ai2.tool)({
|
|
870
|
+
description: "Continue the test with the next step",
|
|
871
|
+
inputSchema: import_v44.z.object({})
|
|
872
|
+
});
|
|
873
|
+
}
|
|
874
|
+
function buildFinishTestTool(criteria) {
|
|
875
|
+
const criteriaNames = criteria.map(criterionToParamName);
|
|
876
|
+
return (0, import_ai2.tool)({
|
|
877
|
+
description: "Complete the test with a final verdict",
|
|
878
|
+
inputSchema: import_v44.z.object({
|
|
879
|
+
criteria: import_v44.z.object(
|
|
880
|
+
Object.fromEntries(
|
|
881
|
+
criteriaNames.map((name, idx) => [
|
|
882
|
+
name,
|
|
883
|
+
import_v44.z.enum(["true", "false", "inconclusive"]).describe(criteria[idx])
|
|
884
|
+
])
|
|
885
|
+
)
|
|
886
|
+
).strict().describe("Strict verdict for each criterion"),
|
|
887
|
+
reasoning: import_v44.z.string().describe("Explanation of what the final verdict should be"),
|
|
888
|
+
verdict: import_v44.z.enum(["success", "failure", "inconclusive"]).describe("The final verdict of the test")
|
|
889
|
+
})
|
|
890
|
+
});
|
|
891
|
+
}
|
|
892
|
+
var JudgeAgent = class extends JudgeAgentAdapter {
|
|
893
|
+
constructor(cfg) {
|
|
894
|
+
super();
|
|
895
|
+
this.cfg = cfg;
|
|
896
|
+
this.criteria = cfg.criteria;
|
|
897
|
+
this.spanCollector = cfg.spanCollector ?? judgeSpanCollector;
|
|
898
|
+
}
|
|
899
|
+
logger = new Logger("JudgeAgent");
|
|
900
|
+
spanCollector;
|
|
901
|
+
role = "Judge" /* JUDGE */;
|
|
902
|
+
criteria;
|
|
903
|
+
/**
|
|
904
|
+
* LLM invocation function. Can be overridden to customize LLM behavior.
|
|
905
|
+
*/
|
|
906
|
+
invokeLLM = createLLMInvoker(this.logger);
|
|
907
|
+
async call(input) {
|
|
908
|
+
var _a, _b, _c;
|
|
909
|
+
this.logger.debug("call() invoked", {
|
|
910
|
+
threadId: input.threadId,
|
|
911
|
+
currentTurn: input.scenarioState.currentTurn,
|
|
912
|
+
maxTurns: input.scenarioConfig.maxTurns,
|
|
913
|
+
judgmentRequest: input.judgmentRequest
|
|
914
|
+
});
|
|
915
|
+
const digest = this.getOpenTelemetryTracesDigest(input.threadId);
|
|
916
|
+
this.logger.debug("OpenTelemetry traces built", { digest });
|
|
917
|
+
const transcript = JudgeUtils.buildTranscriptFromMessages(input.messages);
|
|
918
|
+
const contentForJudge = `
|
|
919
|
+
<transcript>
|
|
920
|
+
${transcript}
|
|
921
|
+
</transcript>
|
|
922
|
+
<opentelemetry_traces>
|
|
923
|
+
${digest}
|
|
924
|
+
</opentelemetry_traces>
|
|
925
|
+
`;
|
|
926
|
+
const cfg = this.cfg;
|
|
927
|
+
const systemPrompt = cfg.systemPrompt ?? buildSystemPrompt(cfg.criteria, input.scenarioConfig.description);
|
|
928
|
+
const messages = [
|
|
929
|
+
{ role: "system", content: systemPrompt },
|
|
930
|
+
{ role: "user", content: contentForJudge }
|
|
931
|
+
];
|
|
932
|
+
const isLastMessage = input.scenarioState.currentTurn === input.scenarioConfig.maxTurns;
|
|
933
|
+
const projectConfig = await getProjectConfig();
|
|
934
|
+
const mergedConfig = modelSchema.parse({
|
|
935
|
+
...projectConfig == null ? void 0 : projectConfig.defaultModel,
|
|
936
|
+
...cfg
|
|
937
|
+
});
|
|
938
|
+
const tools = {
|
|
939
|
+
continue_test: buildContinueTestTool(),
|
|
940
|
+
finish_test: buildFinishTestTool(cfg.criteria)
|
|
941
|
+
};
|
|
942
|
+
const enforceJudgement = input.judgmentRequest;
|
|
943
|
+
const hasCriteria = cfg.criteria.length && cfg.criteria.length > 0;
|
|
944
|
+
if (enforceJudgement && !hasCriteria) {
|
|
945
|
+
return {
|
|
946
|
+
success: false,
|
|
947
|
+
reasoning: "JudgeAgent: No criteria was provided to be judged against",
|
|
948
|
+
metCriteria: [],
|
|
949
|
+
unmetCriteria: []
|
|
950
|
+
};
|
|
951
|
+
}
|
|
952
|
+
const toolChoice = (isLastMessage || enforceJudgement) && hasCriteria ? { type: "tool", toolName: "finish_test" } : "required";
|
|
953
|
+
this.logger.debug("Calling LLM", {
|
|
954
|
+
model: mergedConfig.model,
|
|
955
|
+
toolChoice,
|
|
956
|
+
isLastMessage,
|
|
957
|
+
enforceJudgement
|
|
958
|
+
});
|
|
959
|
+
const completion = await this.invokeLLM({
|
|
960
|
+
model: mergedConfig.model,
|
|
961
|
+
messages,
|
|
962
|
+
temperature: mergedConfig.temperature ?? 0,
|
|
963
|
+
maxOutputTokens: mergedConfig.maxTokens,
|
|
492
964
|
tools,
|
|
493
965
|
toolChoice
|
|
494
966
|
});
|
|
967
|
+
this.logger.debug("LLM response received", {
|
|
968
|
+
toolCallCount: ((_a = completion.toolCalls) == null ? void 0 : _a.length) ?? 0,
|
|
969
|
+
toolCalls: (_b = completion.toolCalls) == null ? void 0 : _b.map((tc) => ({
|
|
970
|
+
toolName: tc.toolName,
|
|
971
|
+
args: tc.input
|
|
972
|
+
}))
|
|
973
|
+
});
|
|
495
974
|
let args;
|
|
496
|
-
if ((
|
|
975
|
+
if ((_c = completion.toolCalls) == null ? void 0 : _c.length) {
|
|
497
976
|
const toolCall = completion.toolCalls[0];
|
|
498
977
|
switch (toolCall.toolName) {
|
|
499
978
|
case "finish_test": {
|
|
500
|
-
args = toolCall.
|
|
979
|
+
args = toolCall.input;
|
|
501
980
|
const verdict = args.verdict || "inconclusive";
|
|
502
981
|
const reasoning = args.reasoning || "No reasoning provided";
|
|
503
982
|
const criteria = args.criteria || {};
|
|
@@ -508,20 +987,21 @@ var JudgeAgent = class extends JudgeAgentAdapter {
|
|
|
508
987
|
const unmetCriteria = cfg.criteria.filter(
|
|
509
988
|
(_, i) => criteriaValues[i] !== "true"
|
|
510
989
|
);
|
|
511
|
-
|
|
990
|
+
const result = {
|
|
512
991
|
success: verdict === "success",
|
|
513
|
-
messages: input.messages,
|
|
514
992
|
reasoning,
|
|
515
993
|
metCriteria,
|
|
516
994
|
unmetCriteria
|
|
517
995
|
};
|
|
996
|
+
this.logger.debug("finish_test result", result);
|
|
997
|
+
return result;
|
|
518
998
|
}
|
|
519
999
|
case "continue_test":
|
|
520
|
-
|
|
1000
|
+
this.logger.debug("continue_test - proceeding to next turn");
|
|
1001
|
+
return null;
|
|
521
1002
|
default:
|
|
522
1003
|
return {
|
|
523
1004
|
success: false,
|
|
524
|
-
messages: input.messages,
|
|
525
1005
|
reasoning: `JudgeAgent: Unknown tool call: ${toolCall.toolName}`,
|
|
526
1006
|
metCriteria: [],
|
|
527
1007
|
unmetCriteria: cfg.criteria
|
|
@@ -530,101 +1010,1184 @@ var JudgeAgent = class extends JudgeAgentAdapter {
|
|
|
530
1010
|
}
|
|
531
1011
|
return {
|
|
532
1012
|
success: false,
|
|
533
|
-
messages: input.messages,
|
|
534
1013
|
reasoning: `JudgeAgent: No tool call found in LLM output`,
|
|
535
1014
|
metCriteria: [],
|
|
536
1015
|
unmetCriteria: cfg.criteria
|
|
537
1016
|
};
|
|
538
1017
|
}
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
|
|
543
|
-
this.logger.error("Error generating text", { error });
|
|
544
|
-
throw error;
|
|
545
|
-
}
|
|
1018
|
+
getOpenTelemetryTracesDigest(threadId) {
|
|
1019
|
+
const spans = this.spanCollector.getSpansForThread(threadId);
|
|
1020
|
+
const digest = judgeSpanDigestFormatter.format(spans);
|
|
1021
|
+
return digest;
|
|
546
1022
|
}
|
|
547
1023
|
};
|
|
548
1024
|
var judgeAgent = (cfg) => {
|
|
549
1025
|
return new JudgeAgent(cfg);
|
|
550
1026
|
};
|
|
551
1027
|
|
|
552
|
-
// src/agents/user-simulator-agent.ts
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
|
|
556
|
-
|
|
557
|
-
|
|
558
|
-
|
|
559
|
-
|
|
1028
|
+
// src/agents/user-simulator-agent.ts
|
|
1029
|
+
function buildSystemPrompt2(description) {
|
|
1030
|
+
return `
|
|
1031
|
+
<role>
|
|
1032
|
+
You are pretending to be a user, you are testing an AI Agent (shown as the user role) based on a scenario.
|
|
1033
|
+
Approach this naturally, as a human user would, with very short inputs, few words, all lowercase, imperative, not periods, like when they google or talk to chatgpt.
|
|
1034
|
+
</role>
|
|
1035
|
+
|
|
1036
|
+
<goal>
|
|
1037
|
+
Your goal (assistant) is to interact with the Agent Under Test (user) as if you were a human user to see if it can complete the scenario successfully.
|
|
1038
|
+
</goal>
|
|
1039
|
+
|
|
1040
|
+
<scenario>
|
|
1041
|
+
${description}
|
|
1042
|
+
</scenario>
|
|
1043
|
+
|
|
1044
|
+
<rules>
|
|
1045
|
+
- DO NOT carry over any requests yourself, YOU ARE NOT the assistant today, you are the user
|
|
1046
|
+
</rules>
|
|
1047
|
+
`.trim();
|
|
1048
|
+
}
|
|
1049
|
+
var UserSimulatorAgent = class extends UserSimulatorAgentAdapter {
|
|
1050
|
+
constructor(cfg) {
|
|
1051
|
+
super();
|
|
1052
|
+
this.cfg = cfg;
|
|
1053
|
+
}
|
|
1054
|
+
logger = new Logger(this.constructor.name);
|
|
1055
|
+
/**
|
|
1056
|
+
* LLM invocation function. Can be overridden to customize LLM behavior.
|
|
1057
|
+
*/
|
|
1058
|
+
invokeLLM = createLLMInvoker(this.logger);
|
|
1059
|
+
call = async (input) => {
|
|
1060
|
+
const config2 = this.cfg;
|
|
1061
|
+
const systemPrompt = (config2 == null ? void 0 : config2.systemPrompt) ?? buildSystemPrompt2(input.scenarioConfig.description);
|
|
1062
|
+
const messages = [
|
|
1063
|
+
{ role: "system", content: systemPrompt },
|
|
1064
|
+
{ role: "assistant", content: "Hello, how can I help you today" },
|
|
1065
|
+
...input.messages
|
|
1066
|
+
];
|
|
1067
|
+
const projectConfig = await getProjectConfig();
|
|
1068
|
+
const mergedConfig = modelSchema.parse({
|
|
1069
|
+
...projectConfig == null ? void 0 : projectConfig.defaultModel,
|
|
1070
|
+
...config2
|
|
1071
|
+
});
|
|
1072
|
+
const reversedMessages = messageRoleReversal(messages);
|
|
1073
|
+
const completion = await this.invokeLLM({
|
|
1074
|
+
model: mergedConfig.model,
|
|
1075
|
+
messages: reversedMessages,
|
|
1076
|
+
temperature: mergedConfig.temperature,
|
|
1077
|
+
maxOutputTokens: mergedConfig.maxTokens
|
|
1078
|
+
});
|
|
1079
|
+
const messageContent = completion.text;
|
|
1080
|
+
if (!messageContent) {
|
|
1081
|
+
throw new Error("No response content from LLM");
|
|
1082
|
+
}
|
|
1083
|
+
return { role: "user", content: messageContent };
|
|
1084
|
+
};
|
|
1085
|
+
};
|
|
1086
|
+
var userSimulatorAgent = (config2) => {
|
|
1087
|
+
return new UserSimulatorAgent(config2);
|
|
1088
|
+
};
|
|
1089
|
+
|
|
1090
|
+
// src/agents/realtime/realtime-agent.adapter.ts
|
|
1091
|
+
var import_events = require("events");
|
|
1092
|
+
|
|
1093
|
+
// src/agents/realtime/message-processor.ts
|
|
1094
|
+
var MessageProcessor = class {
|
|
1095
|
+
/**
|
|
1096
|
+
* Processes audio message content and extracts base64 audio data
|
|
1097
|
+
*
|
|
1098
|
+
* @param content - The message content to process
|
|
1099
|
+
* @returns Base64 audio data string or null if no audio found
|
|
1100
|
+
* @throws {Error} If audio data is invalid
|
|
1101
|
+
*/
|
|
1102
|
+
processAudioMessage(content) {
|
|
1103
|
+
if (!Array.isArray(content)) {
|
|
1104
|
+
return null;
|
|
1105
|
+
}
|
|
1106
|
+
for (const part of content) {
|
|
1107
|
+
if (typeof part === "object" && part !== null && "type" in part && part.type === "file" && "mediaType" in part && typeof part.mediaType === "string" && part.mediaType.startsWith("audio/")) {
|
|
1108
|
+
if (!("data" in part) || typeof part.data !== "string") {
|
|
1109
|
+
throw new Error(
|
|
1110
|
+
`Audio data must be base64 string, got: ${typeof part.data}`
|
|
1111
|
+
);
|
|
1112
|
+
}
|
|
1113
|
+
if (!part.data || part.data.length === 0) {
|
|
1114
|
+
throw new Error(
|
|
1115
|
+
`Audio message has no data. Part: ${JSON.stringify(part)}`
|
|
1116
|
+
);
|
|
1117
|
+
}
|
|
1118
|
+
return part.data;
|
|
1119
|
+
}
|
|
1120
|
+
}
|
|
1121
|
+
return null;
|
|
1122
|
+
}
|
|
1123
|
+
/**
|
|
1124
|
+
* Extracts text content from message content
|
|
1125
|
+
*
|
|
1126
|
+
* @param content - The message content to process
|
|
1127
|
+
* @returns Text string or empty string if no text found
|
|
1128
|
+
*/
|
|
1129
|
+
extractTextMessage(content) {
|
|
1130
|
+
return typeof content === "string" ? content : "";
|
|
1131
|
+
}
|
|
1132
|
+
/**
|
|
1133
|
+
* Validates that a message has either text or audio content
|
|
1134
|
+
*
|
|
1135
|
+
* @param content - The message content to validate
|
|
1136
|
+
* @returns True if the message has valid content
|
|
1137
|
+
*/
|
|
1138
|
+
hasValidContent(content) {
|
|
1139
|
+
const hasText = this.extractTextMessage(content).length > 0;
|
|
1140
|
+
const hasAudio = this.processAudioMessage(content) !== null;
|
|
1141
|
+
return hasText || hasAudio;
|
|
1142
|
+
}
|
|
1143
|
+
};
|
|
1144
|
+
|
|
1145
|
+
// src/agents/realtime/realtime-event-handler.ts
|
|
1146
|
+
var RealtimeEventHandler = class {
|
|
1147
|
+
/**
|
|
1148
|
+
* Creates a new RealtimeEventHandler instance
|
|
1149
|
+
* @param session - The RealtimeSession to listen to events from
|
|
1150
|
+
*/
|
|
1151
|
+
constructor(session) {
|
|
1152
|
+
this.session = session;
|
|
1153
|
+
this.ensureEventListeners();
|
|
1154
|
+
}
|
|
1155
|
+
currentResponse = "";
|
|
1156
|
+
currentAudioChunks = [];
|
|
1157
|
+
responseResolver = null;
|
|
1158
|
+
errorRejecter = null;
|
|
1159
|
+
listenersSetup = false;
|
|
1160
|
+
/**
|
|
1161
|
+
* Gets the transport from the session
|
|
1162
|
+
*/
|
|
1163
|
+
getTransport() {
|
|
1164
|
+
const sessionWithTransport = this.session;
|
|
1165
|
+
return sessionWithTransport.transport ?? null;
|
|
1166
|
+
}
|
|
1167
|
+
/**
|
|
1168
|
+
* Ensures event listeners are set up, retrying if transport not available
|
|
1169
|
+
*/
|
|
1170
|
+
ensureEventListeners() {
|
|
1171
|
+
if (this.listenersSetup) return;
|
|
1172
|
+
const transport = this.getTransport();
|
|
1173
|
+
if (!transport) {
|
|
1174
|
+
setTimeout(() => this.ensureEventListeners(), 100);
|
|
1175
|
+
return;
|
|
1176
|
+
}
|
|
1177
|
+
this.setupEventListeners();
|
|
1178
|
+
}
|
|
1179
|
+
/**
|
|
1180
|
+
* Sets up event listeners for the RealtimeSession transport layer
|
|
1181
|
+
*/
|
|
1182
|
+
setupEventListeners() {
|
|
1183
|
+
if (this.listenersSetup) return;
|
|
1184
|
+
const transport = this.getTransport();
|
|
1185
|
+
if (!transport) {
|
|
1186
|
+
console.error("\u274C Transport not available on session");
|
|
1187
|
+
return;
|
|
1188
|
+
}
|
|
1189
|
+
transport.on("response.output_audio_transcript.delta", (event) => {
|
|
1190
|
+
const deltaEvent = event;
|
|
1191
|
+
if (typeof deltaEvent.delta === "string") {
|
|
1192
|
+
this.currentResponse += deltaEvent.delta;
|
|
1193
|
+
}
|
|
1194
|
+
});
|
|
1195
|
+
transport.on("response.output_audio.delta", (event) => {
|
|
1196
|
+
const deltaEvent = event;
|
|
1197
|
+
if (typeof deltaEvent.delta === "string") {
|
|
1198
|
+
this.currentAudioChunks.push(deltaEvent.delta);
|
|
1199
|
+
}
|
|
1200
|
+
});
|
|
1201
|
+
transport.on("response.done", () => {
|
|
1202
|
+
const fullAudio = this.currentAudioChunks.join("");
|
|
1203
|
+
const audioResponse = {
|
|
1204
|
+
transcript: this.currentResponse,
|
|
1205
|
+
audio: fullAudio
|
|
1206
|
+
};
|
|
1207
|
+
if (this.responseResolver) {
|
|
1208
|
+
this.responseResolver(audioResponse);
|
|
1209
|
+
this.reset();
|
|
1210
|
+
}
|
|
1211
|
+
});
|
|
1212
|
+
transport.on("error", (error) => {
|
|
1213
|
+
console.error(`\u274C Transport error:`, error);
|
|
1214
|
+
if (this.errorRejecter) {
|
|
1215
|
+
const errorObj = error instanceof Error ? error : new Error(String(error));
|
|
1216
|
+
this.errorRejecter(errorObj);
|
|
1217
|
+
this.reset();
|
|
1218
|
+
}
|
|
1219
|
+
});
|
|
1220
|
+
this.listenersSetup = true;
|
|
1221
|
+
}
|
|
1222
|
+
/**
|
|
1223
|
+
* Waits for the agent response with timeout
|
|
1224
|
+
*
|
|
1225
|
+
* @param timeout - Maximum time to wait in milliseconds
|
|
1226
|
+
* @returns Promise that resolves with the audio response event
|
|
1227
|
+
* @throws {Error} If timeout occurs or transport error happens
|
|
1228
|
+
*/
|
|
1229
|
+
waitForResponse(timeout) {
|
|
1230
|
+
return new Promise((resolve, reject) => {
|
|
1231
|
+
this.responseResolver = resolve;
|
|
1232
|
+
this.errorRejecter = reject;
|
|
1233
|
+
const timeoutId = setTimeout(() => {
|
|
1234
|
+
if (this.responseResolver) {
|
|
1235
|
+
this.reset();
|
|
1236
|
+
reject(new Error(`Agent response timeout after ${timeout}ms`));
|
|
1237
|
+
}
|
|
1238
|
+
}, timeout);
|
|
1239
|
+
const originalResolver = resolve;
|
|
1240
|
+
this.responseResolver = (value) => {
|
|
1241
|
+
clearTimeout(timeoutId);
|
|
1242
|
+
originalResolver(value);
|
|
1243
|
+
};
|
|
1244
|
+
});
|
|
1245
|
+
}
|
|
1246
|
+
/**
|
|
1247
|
+
* Resets the internal state for the next response
|
|
1248
|
+
*/
|
|
1249
|
+
reset() {
|
|
1250
|
+
this.responseResolver = null;
|
|
1251
|
+
this.errorRejecter = null;
|
|
1252
|
+
this.currentResponse = "";
|
|
1253
|
+
this.currentAudioChunks = [];
|
|
1254
|
+
}
|
|
1255
|
+
};
|
|
1256
|
+
|
|
1257
|
+
// src/agents/realtime/response-formatter.ts
|
|
1258
|
+
var ResponseFormatter = class {
|
|
1259
|
+
/**
|
|
1260
|
+
* Formats an audio response event into Scenario framework format
|
|
1261
|
+
*
|
|
1262
|
+
* @param audioEvent - The audio response event from the Realtime API
|
|
1263
|
+
* @returns Formatted assistant message with audio and text content
|
|
1264
|
+
*/
|
|
1265
|
+
formatAudioResponse(audioEvent) {
|
|
1266
|
+
return {
|
|
1267
|
+
role: "assistant",
|
|
1268
|
+
content: [
|
|
1269
|
+
{ type: "text", text: audioEvent.transcript },
|
|
1270
|
+
{ type: "file", mediaType: "audio/pcm16", data: audioEvent.audio }
|
|
1271
|
+
]
|
|
1272
|
+
};
|
|
1273
|
+
}
|
|
1274
|
+
/**
|
|
1275
|
+
* Formats a text response for the Scenario framework
|
|
1276
|
+
*
|
|
1277
|
+
* @param text - The text response from the agent
|
|
1278
|
+
* @returns Plain text response string
|
|
1279
|
+
*/
|
|
1280
|
+
formatTextResponse(text) {
|
|
1281
|
+
return text;
|
|
1282
|
+
}
|
|
1283
|
+
/**
|
|
1284
|
+
* Creates an initial response message for when no user message exists
|
|
1285
|
+
*
|
|
1286
|
+
* @param audioEvent - The audio response event from the Realtime API
|
|
1287
|
+
* @returns Formatted assistant message for initial responses
|
|
1288
|
+
*/
|
|
1289
|
+
formatInitialResponse(audioEvent) {
|
|
1290
|
+
return this.formatAudioResponse(audioEvent);
|
|
1291
|
+
}
|
|
1292
|
+
};
|
|
1293
|
+
|
|
1294
|
+
// src/agents/realtime/realtime-agent.adapter.ts
|
|
1295
|
+
var RealtimeAgentAdapter = class extends AgentAdapter {
|
|
1296
|
+
/**
|
|
1297
|
+
* Creates a new RealtimeAgentAdapter instance
|
|
1298
|
+
*
|
|
1299
|
+
* The session can be either connected or unconnected.
|
|
1300
|
+
* If unconnected, call connect() with an API key before use.
|
|
1301
|
+
*
|
|
1302
|
+
* @param config - Configuration for the realtime agent adapter
|
|
1303
|
+
*/
|
|
1304
|
+
constructor(config2) {
|
|
1305
|
+
super();
|
|
1306
|
+
this.config = config2;
|
|
1307
|
+
this.role = this.config.role;
|
|
1308
|
+
this.name = this.config.agentName;
|
|
1309
|
+
this.session = config2.session;
|
|
1310
|
+
this.eventHandler = new RealtimeEventHandler(this.session);
|
|
1311
|
+
}
|
|
1312
|
+
role;
|
|
1313
|
+
name;
|
|
1314
|
+
session;
|
|
1315
|
+
eventHandler;
|
|
1316
|
+
messageProcessor = new MessageProcessor();
|
|
1317
|
+
responseFormatter = new ResponseFormatter();
|
|
1318
|
+
audioEvents = new import_events.EventEmitter();
|
|
1319
|
+
/**
|
|
1320
|
+
* Get the connect method from the session
|
|
1321
|
+
*/
|
|
1322
|
+
async connect(params) {
|
|
1323
|
+
const { apiKey, ...rest } = params ?? {};
|
|
1324
|
+
await this.session.connect({
|
|
1325
|
+
apiKey: apiKey ?? process.env.OPENAI_API_KEY,
|
|
1326
|
+
...rest
|
|
1327
|
+
});
|
|
1328
|
+
}
|
|
1329
|
+
/**
|
|
1330
|
+
* Closes the session connection
|
|
1331
|
+
*/
|
|
1332
|
+
async disconnect() {
|
|
1333
|
+
this.session.close();
|
|
1334
|
+
}
|
|
1335
|
+
/**
|
|
1336
|
+
* Process input and generate response (implements AgentAdapter interface)
|
|
1337
|
+
*
|
|
1338
|
+
* This is called by Scenario framework for each agent turn.
|
|
1339
|
+
* Handles both text and audio input, returns audio message with transcript.
|
|
1340
|
+
*
|
|
1341
|
+
* @param input - Scenario agent input with message history
|
|
1342
|
+
* @returns Agent response as audio message or text
|
|
1343
|
+
*/
|
|
1344
|
+
async call(input) {
|
|
1345
|
+
console.log(`\u{1F50A} [${this.name}] being called with role: ${this.role}`);
|
|
1346
|
+
const latestMessage = input.newMessages[input.newMessages.length - 1];
|
|
1347
|
+
if (!latestMessage) {
|
|
1348
|
+
return this.handleInitialResponse();
|
|
1349
|
+
}
|
|
1350
|
+
const audioData = this.messageProcessor.processAudioMessage(
|
|
1351
|
+
latestMessage.content
|
|
1352
|
+
);
|
|
1353
|
+
if (audioData) {
|
|
1354
|
+
return this.handleAudioInput(audioData);
|
|
1355
|
+
}
|
|
1356
|
+
const text = this.messageProcessor.extractTextMessage(
|
|
1357
|
+
latestMessage.content
|
|
1358
|
+
);
|
|
1359
|
+
if (!text) {
|
|
1360
|
+
throw new Error("Message has no text or audio content");
|
|
1361
|
+
}
|
|
1362
|
+
return this.handleTextInput(text);
|
|
1363
|
+
}
|
|
1364
|
+
/**
|
|
1365
|
+
* Handles the initial response when no user message exists
|
|
1366
|
+
*/
|
|
1367
|
+
async handleInitialResponse() {
|
|
1368
|
+
console.log(`[${this.name}] First message, creating response`);
|
|
1369
|
+
const sessionWithTransport = this.session;
|
|
1370
|
+
const transport = sessionWithTransport.transport;
|
|
1371
|
+
if (!transport) {
|
|
1372
|
+
throw new Error("Realtime transport not available");
|
|
1373
|
+
}
|
|
1374
|
+
transport.sendEvent({
|
|
1375
|
+
type: "response.create"
|
|
1376
|
+
});
|
|
1377
|
+
const timeout = this.config.responseTimeout ?? 6e4;
|
|
1378
|
+
const response = await this.eventHandler.waitForResponse(timeout);
|
|
1379
|
+
this.audioEvents.emit("audioResponse", response);
|
|
1380
|
+
return this.responseFormatter.formatInitialResponse(response);
|
|
1381
|
+
}
|
|
1382
|
+
/**
|
|
1383
|
+
* Handles audio input from the user
|
|
1384
|
+
*/
|
|
1385
|
+
async handleAudioInput(audioData) {
|
|
1386
|
+
const sessionWithTransport = this.session;
|
|
1387
|
+
const transport = sessionWithTransport.transport;
|
|
1388
|
+
if (!transport) {
|
|
1389
|
+
throw new Error("Realtime transport not available");
|
|
1390
|
+
}
|
|
1391
|
+
transport.sendEvent({
|
|
1392
|
+
type: "input_audio_buffer.append",
|
|
1393
|
+
audio: audioData
|
|
1394
|
+
});
|
|
1395
|
+
transport.sendEvent({
|
|
1396
|
+
type: "input_audio_buffer.commit"
|
|
1397
|
+
});
|
|
1398
|
+
transport.sendEvent({
|
|
1399
|
+
type: "response.create"
|
|
1400
|
+
});
|
|
1401
|
+
const timeout = this.config.responseTimeout ?? 6e4;
|
|
1402
|
+
const response = await this.eventHandler.waitForResponse(timeout);
|
|
1403
|
+
this.audioEvents.emit("audioResponse", response);
|
|
1404
|
+
return this.responseFormatter.formatAudioResponse(response);
|
|
1405
|
+
}
|
|
1406
|
+
/**
|
|
1407
|
+
* Handles text input from the user
|
|
1408
|
+
*/
|
|
1409
|
+
async handleTextInput(text) {
|
|
1410
|
+
this.session.sendMessage(text);
|
|
1411
|
+
const timeout = this.config.responseTimeout ?? 3e4;
|
|
1412
|
+
const response = await this.eventHandler.waitForResponse(timeout);
|
|
1413
|
+
this.audioEvents.emit("audioResponse", response);
|
|
1414
|
+
return this.responseFormatter.formatTextResponse(response.transcript);
|
|
1415
|
+
}
|
|
1416
|
+
/**
|
|
1417
|
+
* Subscribe to audio response events
|
|
1418
|
+
*
|
|
1419
|
+
* @param callback - Function called when an audio response completes
|
|
1420
|
+
*/
|
|
1421
|
+
onAudioResponse(callback) {
|
|
1422
|
+
this.audioEvents.on("audioResponse", callback);
|
|
1423
|
+
}
|
|
1424
|
+
/**
|
|
1425
|
+
* Remove audio response listener
|
|
1426
|
+
*
|
|
1427
|
+
* @param callback - The callback function to remove
|
|
1428
|
+
*/
|
|
1429
|
+
offAudioResponse(callback) {
|
|
1430
|
+
this.audioEvents.off("audioResponse", callback);
|
|
1431
|
+
}
|
|
1432
|
+
};
|
|
1433
|
+
|
|
1434
|
+
// src/execution/index.ts
|
|
1435
|
+
var execution_exports = {};
|
|
1436
|
+
__export(execution_exports, {
|
|
1437
|
+
ScenarioExecution: () => ScenarioExecution,
|
|
1438
|
+
ScenarioExecutionState: () => ScenarioExecutionState,
|
|
1439
|
+
StateChangeEventType: () => StateChangeEventType
|
|
1440
|
+
});
|
|
1441
|
+
|
|
1442
|
+
// node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/platform/node/globalThis.js
|
|
1443
|
+
var _globalThis = typeof globalThis === "object" ? globalThis : global;
|
|
1444
|
+
|
|
1445
|
+
// node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/version.js
|
|
1446
|
+
var VERSION = "1.9.0";
|
|
1447
|
+
|
|
1448
|
+
// node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/internal/semver.js
|
|
1449
|
+
var re = /^(\d+)\.(\d+)\.(\d+)(-(.+))?$/;
|
|
1450
|
+
function _makeCompatibilityCheck(ownVersion) {
|
|
1451
|
+
var acceptedVersions = /* @__PURE__ */ new Set([ownVersion]);
|
|
1452
|
+
var rejectedVersions = /* @__PURE__ */ new Set();
|
|
1453
|
+
var myVersionMatch = ownVersion.match(re);
|
|
1454
|
+
if (!myVersionMatch) {
|
|
1455
|
+
return function() {
|
|
1456
|
+
return false;
|
|
1457
|
+
};
|
|
1458
|
+
}
|
|
1459
|
+
var ownVersionParsed = {
|
|
1460
|
+
major: +myVersionMatch[1],
|
|
1461
|
+
minor: +myVersionMatch[2],
|
|
1462
|
+
patch: +myVersionMatch[3],
|
|
1463
|
+
prerelease: myVersionMatch[4]
|
|
1464
|
+
};
|
|
1465
|
+
if (ownVersionParsed.prerelease != null) {
|
|
1466
|
+
return function isExactmatch(globalVersion) {
|
|
1467
|
+
return globalVersion === ownVersion;
|
|
1468
|
+
};
|
|
1469
|
+
}
|
|
1470
|
+
function _reject(v) {
|
|
1471
|
+
rejectedVersions.add(v);
|
|
1472
|
+
return false;
|
|
1473
|
+
}
|
|
1474
|
+
function _accept(v) {
|
|
1475
|
+
acceptedVersions.add(v);
|
|
1476
|
+
return true;
|
|
1477
|
+
}
|
|
1478
|
+
return function isCompatible2(globalVersion) {
|
|
1479
|
+
if (acceptedVersions.has(globalVersion)) {
|
|
1480
|
+
return true;
|
|
1481
|
+
}
|
|
1482
|
+
if (rejectedVersions.has(globalVersion)) {
|
|
1483
|
+
return false;
|
|
1484
|
+
}
|
|
1485
|
+
var globalVersionMatch = globalVersion.match(re);
|
|
1486
|
+
if (!globalVersionMatch) {
|
|
1487
|
+
return _reject(globalVersion);
|
|
1488
|
+
}
|
|
1489
|
+
var globalVersionParsed = {
|
|
1490
|
+
major: +globalVersionMatch[1],
|
|
1491
|
+
minor: +globalVersionMatch[2],
|
|
1492
|
+
patch: +globalVersionMatch[3],
|
|
1493
|
+
prerelease: globalVersionMatch[4]
|
|
1494
|
+
};
|
|
1495
|
+
if (globalVersionParsed.prerelease != null) {
|
|
1496
|
+
return _reject(globalVersion);
|
|
1497
|
+
}
|
|
1498
|
+
if (ownVersionParsed.major !== globalVersionParsed.major) {
|
|
1499
|
+
return _reject(globalVersion);
|
|
1500
|
+
}
|
|
1501
|
+
if (ownVersionParsed.major === 0) {
|
|
1502
|
+
if (ownVersionParsed.minor === globalVersionParsed.minor && ownVersionParsed.patch <= globalVersionParsed.patch) {
|
|
1503
|
+
return _accept(globalVersion);
|
|
1504
|
+
}
|
|
1505
|
+
return _reject(globalVersion);
|
|
1506
|
+
}
|
|
1507
|
+
if (ownVersionParsed.minor <= globalVersionParsed.minor) {
|
|
1508
|
+
return _accept(globalVersion);
|
|
1509
|
+
}
|
|
1510
|
+
return _reject(globalVersion);
|
|
1511
|
+
};
|
|
1512
|
+
}
|
|
1513
|
+
var isCompatible = _makeCompatibilityCheck(VERSION);
|
|
1514
|
+
|
|
1515
|
+
// node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/internal/global-utils.js
|
|
1516
|
+
var major = VERSION.split(".")[0];
|
|
1517
|
+
var GLOBAL_OPENTELEMETRY_API_KEY = Symbol.for("opentelemetry.js.api." + major);
|
|
1518
|
+
var _global = _globalThis;
|
|
1519
|
+
function registerGlobal(type, instance, diag, allowOverride) {
|
|
1520
|
+
var _a;
|
|
1521
|
+
if (allowOverride === void 0) {
|
|
1522
|
+
allowOverride = false;
|
|
1523
|
+
}
|
|
1524
|
+
var api = _global[GLOBAL_OPENTELEMETRY_API_KEY] = (_a = _global[GLOBAL_OPENTELEMETRY_API_KEY]) !== null && _a !== void 0 ? _a : {
|
|
1525
|
+
version: VERSION
|
|
1526
|
+
};
|
|
1527
|
+
if (!allowOverride && api[type]) {
|
|
1528
|
+
var err = new Error("@opentelemetry/api: Attempted duplicate registration of API: " + type);
|
|
1529
|
+
diag.error(err.stack || err.message);
|
|
1530
|
+
return false;
|
|
1531
|
+
}
|
|
1532
|
+
if (api.version !== VERSION) {
|
|
1533
|
+
var err = new Error("@opentelemetry/api: Registration of version v" + api.version + " for " + type + " does not match previously registered API v" + VERSION);
|
|
1534
|
+
diag.error(err.stack || err.message);
|
|
1535
|
+
return false;
|
|
1536
|
+
}
|
|
1537
|
+
api[type] = instance;
|
|
1538
|
+
diag.debug("@opentelemetry/api: Registered a global for " + type + " v" + VERSION + ".");
|
|
1539
|
+
return true;
|
|
1540
|
+
}
|
|
1541
|
+
function getGlobal(type) {
|
|
1542
|
+
var _a, _b;
|
|
1543
|
+
var globalVersion = (_a = _global[GLOBAL_OPENTELEMETRY_API_KEY]) === null || _a === void 0 ? void 0 : _a.version;
|
|
1544
|
+
if (!globalVersion || !isCompatible(globalVersion)) {
|
|
1545
|
+
return;
|
|
1546
|
+
}
|
|
1547
|
+
return (_b = _global[GLOBAL_OPENTELEMETRY_API_KEY]) === null || _b === void 0 ? void 0 : _b[type];
|
|
1548
|
+
}
|
|
1549
|
+
function unregisterGlobal(type, diag) {
|
|
1550
|
+
diag.debug("@opentelemetry/api: Unregistering a global for " + type + " v" + VERSION + ".");
|
|
1551
|
+
var api = _global[GLOBAL_OPENTELEMETRY_API_KEY];
|
|
1552
|
+
if (api) {
|
|
1553
|
+
delete api[type];
|
|
1554
|
+
}
|
|
1555
|
+
}
|
|
1556
|
+
|
|
1557
|
+
// node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/diag/ComponentLogger.js
|
|
1558
|
+
var __read = function(o, n) {
|
|
1559
|
+
var m = typeof Symbol === "function" && o[Symbol.iterator];
|
|
1560
|
+
if (!m) return o;
|
|
1561
|
+
var i = m.call(o), r, ar = [], e;
|
|
1562
|
+
try {
|
|
1563
|
+
while ((n === void 0 || n-- > 0) && !(r = i.next()).done) ar.push(r.value);
|
|
1564
|
+
} catch (error) {
|
|
1565
|
+
e = { error };
|
|
1566
|
+
} finally {
|
|
1567
|
+
try {
|
|
1568
|
+
if (r && !r.done && (m = i["return"])) m.call(i);
|
|
1569
|
+
} finally {
|
|
1570
|
+
if (e) throw e.error;
|
|
1571
|
+
}
|
|
1572
|
+
}
|
|
1573
|
+
return ar;
|
|
1574
|
+
};
|
|
1575
|
+
var __spreadArray = function(to, from, pack) {
|
|
1576
|
+
if (pack || arguments.length === 2) for (var i = 0, l = from.length, ar; i < l; i++) {
|
|
1577
|
+
if (ar || !(i in from)) {
|
|
1578
|
+
if (!ar) ar = Array.prototype.slice.call(from, 0, i);
|
|
1579
|
+
ar[i] = from[i];
|
|
1580
|
+
}
|
|
1581
|
+
}
|
|
1582
|
+
return to.concat(ar || Array.prototype.slice.call(from));
|
|
1583
|
+
};
|
|
1584
|
+
var DiagComponentLogger = (
|
|
1585
|
+
/** @class */
|
|
1586
|
+
(function() {
|
|
1587
|
+
function DiagComponentLogger2(props) {
|
|
1588
|
+
this._namespace = props.namespace || "DiagComponentLogger";
|
|
1589
|
+
}
|
|
1590
|
+
DiagComponentLogger2.prototype.debug = function() {
|
|
1591
|
+
var args = [];
|
|
1592
|
+
for (var _i = 0; _i < arguments.length; _i++) {
|
|
1593
|
+
args[_i] = arguments[_i];
|
|
1594
|
+
}
|
|
1595
|
+
return logProxy("debug", this._namespace, args);
|
|
1596
|
+
};
|
|
1597
|
+
DiagComponentLogger2.prototype.error = function() {
|
|
1598
|
+
var args = [];
|
|
1599
|
+
for (var _i = 0; _i < arguments.length; _i++) {
|
|
1600
|
+
args[_i] = arguments[_i];
|
|
1601
|
+
}
|
|
1602
|
+
return logProxy("error", this._namespace, args);
|
|
1603
|
+
};
|
|
1604
|
+
DiagComponentLogger2.prototype.info = function() {
|
|
1605
|
+
var args = [];
|
|
1606
|
+
for (var _i = 0; _i < arguments.length; _i++) {
|
|
1607
|
+
args[_i] = arguments[_i];
|
|
1608
|
+
}
|
|
1609
|
+
return logProxy("info", this._namespace, args);
|
|
1610
|
+
};
|
|
1611
|
+
DiagComponentLogger2.prototype.warn = function() {
|
|
1612
|
+
var args = [];
|
|
1613
|
+
for (var _i = 0; _i < arguments.length; _i++) {
|
|
1614
|
+
args[_i] = arguments[_i];
|
|
1615
|
+
}
|
|
1616
|
+
return logProxy("warn", this._namespace, args);
|
|
1617
|
+
};
|
|
1618
|
+
DiagComponentLogger2.prototype.verbose = function() {
|
|
1619
|
+
var args = [];
|
|
1620
|
+
for (var _i = 0; _i < arguments.length; _i++) {
|
|
1621
|
+
args[_i] = arguments[_i];
|
|
1622
|
+
}
|
|
1623
|
+
return logProxy("verbose", this._namespace, args);
|
|
1624
|
+
};
|
|
1625
|
+
return DiagComponentLogger2;
|
|
1626
|
+
})()
|
|
1627
|
+
);
|
|
1628
|
+
function logProxy(funcName, namespace, args) {
|
|
1629
|
+
var logger2 = getGlobal("diag");
|
|
1630
|
+
if (!logger2) {
|
|
1631
|
+
return;
|
|
1632
|
+
}
|
|
1633
|
+
args.unshift(namespace);
|
|
1634
|
+
return logger2[funcName].apply(logger2, __spreadArray([], __read(args), false));
|
|
1635
|
+
}
|
|
1636
|
+
|
|
1637
|
+
// node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/diag/types.js
|
|
1638
|
+
var DiagLogLevel;
|
|
1639
|
+
(function(DiagLogLevel2) {
|
|
1640
|
+
DiagLogLevel2[DiagLogLevel2["NONE"] = 0] = "NONE";
|
|
1641
|
+
DiagLogLevel2[DiagLogLevel2["ERROR"] = 30] = "ERROR";
|
|
1642
|
+
DiagLogLevel2[DiagLogLevel2["WARN"] = 50] = "WARN";
|
|
1643
|
+
DiagLogLevel2[DiagLogLevel2["INFO"] = 60] = "INFO";
|
|
1644
|
+
DiagLogLevel2[DiagLogLevel2["DEBUG"] = 70] = "DEBUG";
|
|
1645
|
+
DiagLogLevel2[DiagLogLevel2["VERBOSE"] = 80] = "VERBOSE";
|
|
1646
|
+
DiagLogLevel2[DiagLogLevel2["ALL"] = 9999] = "ALL";
|
|
1647
|
+
})(DiagLogLevel || (DiagLogLevel = {}));
|
|
1648
|
+
|
|
1649
|
+
// node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/diag/internal/logLevelLogger.js
|
|
1650
|
+
function createLogLevelDiagLogger(maxLevel, logger2) {
|
|
1651
|
+
if (maxLevel < DiagLogLevel.NONE) {
|
|
1652
|
+
maxLevel = DiagLogLevel.NONE;
|
|
1653
|
+
} else if (maxLevel > DiagLogLevel.ALL) {
|
|
1654
|
+
maxLevel = DiagLogLevel.ALL;
|
|
1655
|
+
}
|
|
1656
|
+
logger2 = logger2 || {};
|
|
1657
|
+
function _filterFunc(funcName, theLevel) {
|
|
1658
|
+
var theFunc = logger2[funcName];
|
|
1659
|
+
if (typeof theFunc === "function" && maxLevel >= theLevel) {
|
|
1660
|
+
return theFunc.bind(logger2);
|
|
1661
|
+
}
|
|
1662
|
+
return function() {
|
|
1663
|
+
};
|
|
1664
|
+
}
|
|
1665
|
+
return {
|
|
1666
|
+
error: _filterFunc("error", DiagLogLevel.ERROR),
|
|
1667
|
+
warn: _filterFunc("warn", DiagLogLevel.WARN),
|
|
1668
|
+
info: _filterFunc("info", DiagLogLevel.INFO),
|
|
1669
|
+
debug: _filterFunc("debug", DiagLogLevel.DEBUG),
|
|
1670
|
+
verbose: _filterFunc("verbose", DiagLogLevel.VERBOSE)
|
|
1671
|
+
};
|
|
1672
|
+
}
|
|
1673
|
+
|
|
1674
|
+
// node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/api/diag.js
|
|
1675
|
+
var __read2 = function(o, n) {
|
|
1676
|
+
var m = typeof Symbol === "function" && o[Symbol.iterator];
|
|
1677
|
+
if (!m) return o;
|
|
1678
|
+
var i = m.call(o), r, ar = [], e;
|
|
1679
|
+
try {
|
|
1680
|
+
while ((n === void 0 || n-- > 0) && !(r = i.next()).done) ar.push(r.value);
|
|
1681
|
+
} catch (error) {
|
|
1682
|
+
e = { error };
|
|
1683
|
+
} finally {
|
|
1684
|
+
try {
|
|
1685
|
+
if (r && !r.done && (m = i["return"])) m.call(i);
|
|
1686
|
+
} finally {
|
|
1687
|
+
if (e) throw e.error;
|
|
1688
|
+
}
|
|
1689
|
+
}
|
|
1690
|
+
return ar;
|
|
1691
|
+
};
|
|
1692
|
+
var __spreadArray2 = function(to, from, pack) {
|
|
1693
|
+
if (pack || arguments.length === 2) for (var i = 0, l = from.length, ar; i < l; i++) {
|
|
1694
|
+
if (ar || !(i in from)) {
|
|
1695
|
+
if (!ar) ar = Array.prototype.slice.call(from, 0, i);
|
|
1696
|
+
ar[i] = from[i];
|
|
1697
|
+
}
|
|
1698
|
+
}
|
|
1699
|
+
return to.concat(ar || Array.prototype.slice.call(from));
|
|
1700
|
+
};
|
|
1701
|
+
var API_NAME = "diag";
|
|
1702
|
+
var DiagAPI = (
|
|
1703
|
+
/** @class */
|
|
1704
|
+
(function() {
|
|
1705
|
+
function DiagAPI2() {
|
|
1706
|
+
function _logProxy(funcName) {
|
|
1707
|
+
return function() {
|
|
1708
|
+
var args = [];
|
|
1709
|
+
for (var _i = 0; _i < arguments.length; _i++) {
|
|
1710
|
+
args[_i] = arguments[_i];
|
|
1711
|
+
}
|
|
1712
|
+
var logger2 = getGlobal("diag");
|
|
1713
|
+
if (!logger2)
|
|
1714
|
+
return;
|
|
1715
|
+
return logger2[funcName].apply(logger2, __spreadArray2([], __read2(args), false));
|
|
1716
|
+
};
|
|
1717
|
+
}
|
|
1718
|
+
var self = this;
|
|
1719
|
+
var setLogger = function(logger2, optionsOrLogLevel) {
|
|
1720
|
+
var _a, _b, _c;
|
|
1721
|
+
if (optionsOrLogLevel === void 0) {
|
|
1722
|
+
optionsOrLogLevel = { logLevel: DiagLogLevel.INFO };
|
|
1723
|
+
}
|
|
1724
|
+
if (logger2 === self) {
|
|
1725
|
+
var err = new Error("Cannot use diag as the logger for itself. Please use a DiagLogger implementation like ConsoleDiagLogger or a custom implementation");
|
|
1726
|
+
self.error((_a = err.stack) !== null && _a !== void 0 ? _a : err.message);
|
|
1727
|
+
return false;
|
|
1728
|
+
}
|
|
1729
|
+
if (typeof optionsOrLogLevel === "number") {
|
|
1730
|
+
optionsOrLogLevel = {
|
|
1731
|
+
logLevel: optionsOrLogLevel
|
|
1732
|
+
};
|
|
1733
|
+
}
|
|
1734
|
+
var oldLogger = getGlobal("diag");
|
|
1735
|
+
var newLogger = createLogLevelDiagLogger((_b = optionsOrLogLevel.logLevel) !== null && _b !== void 0 ? _b : DiagLogLevel.INFO, logger2);
|
|
1736
|
+
if (oldLogger && !optionsOrLogLevel.suppressOverrideMessage) {
|
|
1737
|
+
var stack = (_c = new Error().stack) !== null && _c !== void 0 ? _c : "<failed to generate stacktrace>";
|
|
1738
|
+
oldLogger.warn("Current logger will be overwritten from " + stack);
|
|
1739
|
+
newLogger.warn("Current logger will overwrite one already registered from " + stack);
|
|
1740
|
+
}
|
|
1741
|
+
return registerGlobal("diag", newLogger, self, true);
|
|
1742
|
+
};
|
|
1743
|
+
self.setLogger = setLogger;
|
|
1744
|
+
self.disable = function() {
|
|
1745
|
+
unregisterGlobal(API_NAME, self);
|
|
1746
|
+
};
|
|
1747
|
+
self.createComponentLogger = function(options) {
|
|
1748
|
+
return new DiagComponentLogger(options);
|
|
1749
|
+
};
|
|
1750
|
+
self.verbose = _logProxy("verbose");
|
|
1751
|
+
self.debug = _logProxy("debug");
|
|
1752
|
+
self.info = _logProxy("info");
|
|
1753
|
+
self.warn = _logProxy("warn");
|
|
1754
|
+
self.error = _logProxy("error");
|
|
1755
|
+
}
|
|
1756
|
+
DiagAPI2.instance = function() {
|
|
1757
|
+
if (!this._instance) {
|
|
1758
|
+
this._instance = new DiagAPI2();
|
|
1759
|
+
}
|
|
1760
|
+
return this._instance;
|
|
1761
|
+
};
|
|
1762
|
+
return DiagAPI2;
|
|
1763
|
+
})()
|
|
1764
|
+
);
|
|
1765
|
+
|
|
1766
|
+
// node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/context/context.js
|
|
1767
|
+
function createContextKey(description) {
|
|
1768
|
+
return Symbol.for(description);
|
|
1769
|
+
}
|
|
1770
|
+
var BaseContext = (
|
|
1771
|
+
/** @class */
|
|
1772
|
+
/* @__PURE__ */ (function() {
|
|
1773
|
+
function BaseContext2(parentContext) {
|
|
1774
|
+
var self = this;
|
|
1775
|
+
self._currentContext = parentContext ? new Map(parentContext) : /* @__PURE__ */ new Map();
|
|
1776
|
+
self.getValue = function(key) {
|
|
1777
|
+
return self._currentContext.get(key);
|
|
1778
|
+
};
|
|
1779
|
+
self.setValue = function(key, value) {
|
|
1780
|
+
var context2 = new BaseContext2(self._currentContext);
|
|
1781
|
+
context2._currentContext.set(key, value);
|
|
1782
|
+
return context2;
|
|
1783
|
+
};
|
|
1784
|
+
self.deleteValue = function(key) {
|
|
1785
|
+
var context2 = new BaseContext2(self._currentContext);
|
|
1786
|
+
context2._currentContext.delete(key);
|
|
1787
|
+
return context2;
|
|
1788
|
+
};
|
|
1789
|
+
}
|
|
1790
|
+
return BaseContext2;
|
|
1791
|
+
})()
|
|
1792
|
+
);
|
|
1793
|
+
var ROOT_CONTEXT = new BaseContext();
|
|
1794
|
+
|
|
1795
|
+
// node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/context/NoopContextManager.js
|
|
1796
|
+
var __read3 = function(o, n) {
|
|
1797
|
+
var m = typeof Symbol === "function" && o[Symbol.iterator];
|
|
1798
|
+
if (!m) return o;
|
|
1799
|
+
var i = m.call(o), r, ar = [], e;
|
|
1800
|
+
try {
|
|
1801
|
+
while ((n === void 0 || n-- > 0) && !(r = i.next()).done) ar.push(r.value);
|
|
1802
|
+
} catch (error) {
|
|
1803
|
+
e = { error };
|
|
1804
|
+
} finally {
|
|
1805
|
+
try {
|
|
1806
|
+
if (r && !r.done && (m = i["return"])) m.call(i);
|
|
1807
|
+
} finally {
|
|
1808
|
+
if (e) throw e.error;
|
|
1809
|
+
}
|
|
1810
|
+
}
|
|
1811
|
+
return ar;
|
|
1812
|
+
};
|
|
1813
|
+
var __spreadArray3 = function(to, from, pack) {
|
|
1814
|
+
if (pack || arguments.length === 2) for (var i = 0, l = from.length, ar; i < l; i++) {
|
|
1815
|
+
if (ar || !(i in from)) {
|
|
1816
|
+
if (!ar) ar = Array.prototype.slice.call(from, 0, i);
|
|
1817
|
+
ar[i] = from[i];
|
|
1818
|
+
}
|
|
1819
|
+
}
|
|
1820
|
+
return to.concat(ar || Array.prototype.slice.call(from));
|
|
1821
|
+
};
|
|
1822
|
+
var NoopContextManager = (
|
|
1823
|
+
/** @class */
|
|
1824
|
+
(function() {
|
|
1825
|
+
function NoopContextManager2() {
|
|
1826
|
+
}
|
|
1827
|
+
NoopContextManager2.prototype.active = function() {
|
|
1828
|
+
return ROOT_CONTEXT;
|
|
1829
|
+
};
|
|
1830
|
+
NoopContextManager2.prototype.with = function(_context, fn, thisArg) {
|
|
1831
|
+
var args = [];
|
|
1832
|
+
for (var _i = 3; _i < arguments.length; _i++) {
|
|
1833
|
+
args[_i - 3] = arguments[_i];
|
|
1834
|
+
}
|
|
1835
|
+
return fn.call.apply(fn, __spreadArray3([thisArg], __read3(args), false));
|
|
1836
|
+
};
|
|
1837
|
+
NoopContextManager2.prototype.bind = function(_context, target) {
|
|
1838
|
+
return target;
|
|
1839
|
+
};
|
|
1840
|
+
NoopContextManager2.prototype.enable = function() {
|
|
1841
|
+
return this;
|
|
1842
|
+
};
|
|
1843
|
+
NoopContextManager2.prototype.disable = function() {
|
|
1844
|
+
return this;
|
|
1845
|
+
};
|
|
1846
|
+
return NoopContextManager2;
|
|
1847
|
+
})()
|
|
1848
|
+
);
|
|
1849
|
+
|
|
1850
|
+
// node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/api/context.js
|
|
1851
|
+
var __read4 = function(o, n) {
|
|
1852
|
+
var m = typeof Symbol === "function" && o[Symbol.iterator];
|
|
1853
|
+
if (!m) return o;
|
|
1854
|
+
var i = m.call(o), r, ar = [], e;
|
|
1855
|
+
try {
|
|
1856
|
+
while ((n === void 0 || n-- > 0) && !(r = i.next()).done) ar.push(r.value);
|
|
1857
|
+
} catch (error) {
|
|
1858
|
+
e = { error };
|
|
1859
|
+
} finally {
|
|
1860
|
+
try {
|
|
1861
|
+
if (r && !r.done && (m = i["return"])) m.call(i);
|
|
1862
|
+
} finally {
|
|
1863
|
+
if (e) throw e.error;
|
|
1864
|
+
}
|
|
1865
|
+
}
|
|
1866
|
+
return ar;
|
|
1867
|
+
};
|
|
1868
|
+
var __spreadArray4 = function(to, from, pack) {
|
|
1869
|
+
if (pack || arguments.length === 2) for (var i = 0, l = from.length, ar; i < l; i++) {
|
|
1870
|
+
if (ar || !(i in from)) {
|
|
1871
|
+
if (!ar) ar = Array.prototype.slice.call(from, 0, i);
|
|
1872
|
+
ar[i] = from[i];
|
|
1873
|
+
}
|
|
1874
|
+
}
|
|
1875
|
+
return to.concat(ar || Array.prototype.slice.call(from));
|
|
1876
|
+
};
|
|
1877
|
+
var API_NAME2 = "context";
|
|
1878
|
+
var NOOP_CONTEXT_MANAGER = new NoopContextManager();
|
|
1879
|
+
var ContextAPI = (
|
|
1880
|
+
/** @class */
|
|
1881
|
+
(function() {
|
|
1882
|
+
function ContextAPI2() {
|
|
1883
|
+
}
|
|
1884
|
+
ContextAPI2.getInstance = function() {
|
|
1885
|
+
if (!this._instance) {
|
|
1886
|
+
this._instance = new ContextAPI2();
|
|
1887
|
+
}
|
|
1888
|
+
return this._instance;
|
|
1889
|
+
};
|
|
1890
|
+
ContextAPI2.prototype.setGlobalContextManager = function(contextManager) {
|
|
1891
|
+
return registerGlobal(API_NAME2, contextManager, DiagAPI.instance());
|
|
1892
|
+
};
|
|
1893
|
+
ContextAPI2.prototype.active = function() {
|
|
1894
|
+
return this._getContextManager().active();
|
|
1895
|
+
};
|
|
1896
|
+
ContextAPI2.prototype.with = function(context2, fn, thisArg) {
|
|
1897
|
+
var _a;
|
|
1898
|
+
var args = [];
|
|
1899
|
+
for (var _i = 3; _i < arguments.length; _i++) {
|
|
1900
|
+
args[_i - 3] = arguments[_i];
|
|
1901
|
+
}
|
|
1902
|
+
return (_a = this._getContextManager()).with.apply(_a, __spreadArray4([context2, fn, thisArg], __read4(args), false));
|
|
1903
|
+
};
|
|
1904
|
+
ContextAPI2.prototype.bind = function(context2, target) {
|
|
1905
|
+
return this._getContextManager().bind(context2, target);
|
|
1906
|
+
};
|
|
1907
|
+
ContextAPI2.prototype._getContextManager = function() {
|
|
1908
|
+
return getGlobal(API_NAME2) || NOOP_CONTEXT_MANAGER;
|
|
1909
|
+
};
|
|
1910
|
+
ContextAPI2.prototype.disable = function() {
|
|
1911
|
+
this._getContextManager().disable();
|
|
1912
|
+
unregisterGlobal(API_NAME2, DiagAPI.instance());
|
|
1913
|
+
};
|
|
1914
|
+
return ContextAPI2;
|
|
1915
|
+
})()
|
|
1916
|
+
);
|
|
1917
|
+
|
|
1918
|
+
// node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/trace/trace_flags.js
|
|
1919
|
+
var TraceFlags;
|
|
1920
|
+
(function(TraceFlags2) {
|
|
1921
|
+
TraceFlags2[TraceFlags2["NONE"] = 0] = "NONE";
|
|
1922
|
+
TraceFlags2[TraceFlags2["SAMPLED"] = 1] = "SAMPLED";
|
|
1923
|
+
})(TraceFlags || (TraceFlags = {}));
|
|
1924
|
+
|
|
1925
|
+
// node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/trace/invalid-span-constants.js
|
|
1926
|
+
var INVALID_SPANID = "0000000000000000";
|
|
1927
|
+
var INVALID_TRACEID = "00000000000000000000000000000000";
|
|
1928
|
+
var INVALID_SPAN_CONTEXT = {
|
|
1929
|
+
traceId: INVALID_TRACEID,
|
|
1930
|
+
spanId: INVALID_SPANID,
|
|
1931
|
+
traceFlags: TraceFlags.NONE
|
|
1932
|
+
};
|
|
1933
|
+
|
|
1934
|
+
// node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/trace/NonRecordingSpan.js
|
|
1935
|
+
var NonRecordingSpan = (
|
|
1936
|
+
/** @class */
|
|
1937
|
+
(function() {
|
|
1938
|
+
function NonRecordingSpan2(_spanContext) {
|
|
1939
|
+
if (_spanContext === void 0) {
|
|
1940
|
+
_spanContext = INVALID_SPAN_CONTEXT;
|
|
1941
|
+
}
|
|
1942
|
+
this._spanContext = _spanContext;
|
|
1943
|
+
}
|
|
1944
|
+
NonRecordingSpan2.prototype.spanContext = function() {
|
|
1945
|
+
return this._spanContext;
|
|
1946
|
+
};
|
|
1947
|
+
NonRecordingSpan2.prototype.setAttribute = function(_key, _value) {
|
|
1948
|
+
return this;
|
|
1949
|
+
};
|
|
1950
|
+
NonRecordingSpan2.prototype.setAttributes = function(_attributes) {
|
|
1951
|
+
return this;
|
|
1952
|
+
};
|
|
1953
|
+
NonRecordingSpan2.prototype.addEvent = function(_name, _attributes) {
|
|
1954
|
+
return this;
|
|
1955
|
+
};
|
|
1956
|
+
NonRecordingSpan2.prototype.addLink = function(_link) {
|
|
1957
|
+
return this;
|
|
1958
|
+
};
|
|
1959
|
+
NonRecordingSpan2.prototype.addLinks = function(_links) {
|
|
1960
|
+
return this;
|
|
1961
|
+
};
|
|
1962
|
+
NonRecordingSpan2.prototype.setStatus = function(_status) {
|
|
1963
|
+
return this;
|
|
1964
|
+
};
|
|
1965
|
+
NonRecordingSpan2.prototype.updateName = function(_name) {
|
|
1966
|
+
return this;
|
|
1967
|
+
};
|
|
1968
|
+
NonRecordingSpan2.prototype.end = function(_endTime) {
|
|
1969
|
+
};
|
|
1970
|
+
NonRecordingSpan2.prototype.isRecording = function() {
|
|
1971
|
+
return false;
|
|
1972
|
+
};
|
|
1973
|
+
NonRecordingSpan2.prototype.recordException = function(_exception, _time) {
|
|
1974
|
+
};
|
|
1975
|
+
return NonRecordingSpan2;
|
|
1976
|
+
})()
|
|
1977
|
+
);
|
|
560
1978
|
|
|
561
|
-
|
|
562
|
-
|
|
563
|
-
|
|
1979
|
+
// node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/trace/context-utils.js
|
|
1980
|
+
var SPAN_KEY = createContextKey("OpenTelemetry Context Key SPAN");
|
|
1981
|
+
function getSpan(context2) {
|
|
1982
|
+
return context2.getValue(SPAN_KEY) || void 0;
|
|
1983
|
+
}
|
|
1984
|
+
function getActiveSpan() {
|
|
1985
|
+
return getSpan(ContextAPI.getInstance().active());
|
|
1986
|
+
}
|
|
1987
|
+
function setSpan(context2, span) {
|
|
1988
|
+
return context2.setValue(SPAN_KEY, span);
|
|
1989
|
+
}
|
|
1990
|
+
function deleteSpan(context2) {
|
|
1991
|
+
return context2.deleteValue(SPAN_KEY);
|
|
1992
|
+
}
|
|
1993
|
+
function setSpanContext(context2, spanContext) {
|
|
1994
|
+
return setSpan(context2, new NonRecordingSpan(spanContext));
|
|
1995
|
+
}
|
|
1996
|
+
function getSpanContext(context2) {
|
|
1997
|
+
var _a;
|
|
1998
|
+
return (_a = getSpan(context2)) === null || _a === void 0 ? void 0 : _a.spanContext();
|
|
1999
|
+
}
|
|
564
2000
|
|
|
565
|
-
|
|
566
|
-
|
|
567
|
-
|
|
2001
|
+
// node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/trace/spancontext-utils.js
|
|
2002
|
+
var VALID_TRACEID_REGEX = /^([0-9a-f]{32})$/i;
|
|
2003
|
+
var VALID_SPANID_REGEX = /^[0-9a-f]{16}$/i;
|
|
2004
|
+
function isValidTraceId(traceId) {
|
|
2005
|
+
return VALID_TRACEID_REGEX.test(traceId) && traceId !== INVALID_TRACEID;
|
|
2006
|
+
}
|
|
2007
|
+
function isValidSpanId(spanId) {
|
|
2008
|
+
return VALID_SPANID_REGEX.test(spanId) && spanId !== INVALID_SPANID;
|
|
2009
|
+
}
|
|
2010
|
+
function isSpanContextValid(spanContext) {
|
|
2011
|
+
return isValidTraceId(spanContext.traceId) && isValidSpanId(spanContext.spanId);
|
|
2012
|
+
}
|
|
2013
|
+
function wrapSpanContext(spanContext) {
|
|
2014
|
+
return new NonRecordingSpan(spanContext);
|
|
2015
|
+
}
|
|
568
2016
|
|
|
569
|
-
|
|
570
|
-
|
|
571
|
-
|
|
572
|
-
|
|
2017
|
+
// node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/trace/NoopTracer.js
|
|
2018
|
+
var contextApi = ContextAPI.getInstance();
|
|
2019
|
+
var NoopTracer = (
|
|
2020
|
+
/** @class */
|
|
2021
|
+
(function() {
|
|
2022
|
+
function NoopTracer2() {
|
|
2023
|
+
}
|
|
2024
|
+
NoopTracer2.prototype.startSpan = function(name, options, context2) {
|
|
2025
|
+
if (context2 === void 0) {
|
|
2026
|
+
context2 = contextApi.active();
|
|
2027
|
+
}
|
|
2028
|
+
var root = Boolean(options === null || options === void 0 ? void 0 : options.root);
|
|
2029
|
+
if (root) {
|
|
2030
|
+
return new NonRecordingSpan();
|
|
2031
|
+
}
|
|
2032
|
+
var parentFromContext = context2 && getSpanContext(context2);
|
|
2033
|
+
if (isSpanContext(parentFromContext) && isSpanContextValid(parentFromContext)) {
|
|
2034
|
+
return new NonRecordingSpan(parentFromContext);
|
|
2035
|
+
} else {
|
|
2036
|
+
return new NonRecordingSpan();
|
|
2037
|
+
}
|
|
2038
|
+
};
|
|
2039
|
+
NoopTracer2.prototype.startActiveSpan = function(name, arg2, arg3, arg4) {
|
|
2040
|
+
var opts;
|
|
2041
|
+
var ctx;
|
|
2042
|
+
var fn;
|
|
2043
|
+
if (arguments.length < 2) {
|
|
2044
|
+
return;
|
|
2045
|
+
} else if (arguments.length === 2) {
|
|
2046
|
+
fn = arg2;
|
|
2047
|
+
} else if (arguments.length === 3) {
|
|
2048
|
+
opts = arg2;
|
|
2049
|
+
fn = arg3;
|
|
2050
|
+
} else {
|
|
2051
|
+
opts = arg2;
|
|
2052
|
+
ctx = arg3;
|
|
2053
|
+
fn = arg4;
|
|
2054
|
+
}
|
|
2055
|
+
var parentContext = ctx !== null && ctx !== void 0 ? ctx : contextApi.active();
|
|
2056
|
+
var span = this.startSpan(name, opts, parentContext);
|
|
2057
|
+
var contextWithSpanSet = setSpan(parentContext, span);
|
|
2058
|
+
return contextApi.with(contextWithSpanSet, fn, void 0, span);
|
|
2059
|
+
};
|
|
2060
|
+
return NoopTracer2;
|
|
2061
|
+
})()
|
|
2062
|
+
);
|
|
2063
|
+
function isSpanContext(spanContext) {
|
|
2064
|
+
return typeof spanContext === "object" && typeof spanContext["spanId"] === "string" && typeof spanContext["traceId"] === "string" && typeof spanContext["traceFlags"] === "number";
|
|
573
2065
|
}
|
|
574
|
-
|
|
575
|
-
|
|
576
|
-
|
|
577
|
-
|
|
578
|
-
|
|
579
|
-
|
|
580
|
-
|
|
581
|
-
|
|
582
|
-
|
|
583
|
-
|
|
584
|
-
|
|
585
|
-
{ role: "assistant", content: "Hello, how can I help you today" },
|
|
586
|
-
...input.messages
|
|
587
|
-
];
|
|
588
|
-
const projectConfig = await getProjectConfig();
|
|
589
|
-
const mergedConfig = mergeAndValidateConfig(config2 ?? {}, projectConfig);
|
|
590
|
-
if (!mergedConfig.model) {
|
|
591
|
-
throw new Error("Model is required for the user simulator agent");
|
|
2066
|
+
|
|
2067
|
+
// node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/trace/ProxyTracer.js
|
|
2068
|
+
var NOOP_TRACER = new NoopTracer();
|
|
2069
|
+
var ProxyTracer = (
|
|
2070
|
+
/** @class */
|
|
2071
|
+
(function() {
|
|
2072
|
+
function ProxyTracer2(_provider, name, version, options) {
|
|
2073
|
+
this._provider = _provider;
|
|
2074
|
+
this.name = name;
|
|
2075
|
+
this.version = version;
|
|
2076
|
+
this.options = options;
|
|
592
2077
|
}
|
|
593
|
-
|
|
594
|
-
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
|
|
598
|
-
|
|
599
|
-
}
|
|
600
|
-
|
|
601
|
-
|
|
602
|
-
|
|
2078
|
+
ProxyTracer2.prototype.startSpan = function(name, options, context2) {
|
|
2079
|
+
return this._getTracer().startSpan(name, options, context2);
|
|
2080
|
+
};
|
|
2081
|
+
ProxyTracer2.prototype.startActiveSpan = function(_name, _options, _context, _fn) {
|
|
2082
|
+
var tracer = this._getTracer();
|
|
2083
|
+
return Reflect.apply(tracer.startActiveSpan, tracer, arguments);
|
|
2084
|
+
};
|
|
2085
|
+
ProxyTracer2.prototype._getTracer = function() {
|
|
2086
|
+
if (this._delegate) {
|
|
2087
|
+
return this._delegate;
|
|
2088
|
+
}
|
|
2089
|
+
var tracer = this._provider.getDelegateTracer(this.name, this.version, this.options);
|
|
2090
|
+
if (!tracer) {
|
|
2091
|
+
return NOOP_TRACER;
|
|
2092
|
+
}
|
|
2093
|
+
this._delegate = tracer;
|
|
2094
|
+
return this._delegate;
|
|
2095
|
+
};
|
|
2096
|
+
return ProxyTracer2;
|
|
2097
|
+
})()
|
|
2098
|
+
);
|
|
2099
|
+
|
|
2100
|
+
// node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/trace/NoopTracerProvider.js
|
|
2101
|
+
var NoopTracerProvider = (
|
|
2102
|
+
/** @class */
|
|
2103
|
+
(function() {
|
|
2104
|
+
function NoopTracerProvider2() {
|
|
603
2105
|
}
|
|
604
|
-
|
|
605
|
-
|
|
606
|
-
|
|
607
|
-
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
|
|
2106
|
+
NoopTracerProvider2.prototype.getTracer = function(_name, _version, _options) {
|
|
2107
|
+
return new NoopTracer();
|
|
2108
|
+
};
|
|
2109
|
+
return NoopTracerProvider2;
|
|
2110
|
+
})()
|
|
2111
|
+
);
|
|
2112
|
+
|
|
2113
|
+
// node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/trace/ProxyTracerProvider.js
|
|
2114
|
+
var NOOP_TRACER_PROVIDER = new NoopTracerProvider();
|
|
2115
|
+
var ProxyTracerProvider = (
|
|
2116
|
+
/** @class */
|
|
2117
|
+
(function() {
|
|
2118
|
+
function ProxyTracerProvider2() {
|
|
612
2119
|
}
|
|
613
|
-
|
|
614
|
-
|
|
615
|
-
|
|
616
|
-
|
|
617
|
-
|
|
2120
|
+
ProxyTracerProvider2.prototype.getTracer = function(name, version, options) {
|
|
2121
|
+
var _a;
|
|
2122
|
+
return (_a = this.getDelegateTracer(name, version, options)) !== null && _a !== void 0 ? _a : new ProxyTracer(this, name, version, options);
|
|
2123
|
+
};
|
|
2124
|
+
ProxyTracerProvider2.prototype.getDelegate = function() {
|
|
2125
|
+
var _a;
|
|
2126
|
+
return (_a = this._delegate) !== null && _a !== void 0 ? _a : NOOP_TRACER_PROVIDER;
|
|
2127
|
+
};
|
|
2128
|
+
ProxyTracerProvider2.prototype.setDelegate = function(delegate) {
|
|
2129
|
+
this._delegate = delegate;
|
|
2130
|
+
};
|
|
2131
|
+
ProxyTracerProvider2.prototype.getDelegateTracer = function(name, version, options) {
|
|
2132
|
+
var _a;
|
|
2133
|
+
return (_a = this._delegate) === null || _a === void 0 ? void 0 : _a.getTracer(name, version, options);
|
|
2134
|
+
};
|
|
2135
|
+
return ProxyTracerProvider2;
|
|
2136
|
+
})()
|
|
2137
|
+
);
|
|
618
2138
|
|
|
619
|
-
//
|
|
620
|
-
var
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
|
|
2139
|
+
// node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/context-api.js
|
|
2140
|
+
var context = ContextAPI.getInstance();
|
|
2141
|
+
|
|
2142
|
+
// node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/api/trace.js
|
|
2143
|
+
var API_NAME3 = "trace";
|
|
2144
|
+
var TraceAPI = (
|
|
2145
|
+
/** @class */
|
|
2146
|
+
(function() {
|
|
2147
|
+
function TraceAPI2() {
|
|
2148
|
+
this._proxyTracerProvider = new ProxyTracerProvider();
|
|
2149
|
+
this.wrapSpanContext = wrapSpanContext;
|
|
2150
|
+
this.isSpanContextValid = isSpanContextValid;
|
|
2151
|
+
this.deleteSpan = deleteSpan;
|
|
2152
|
+
this.getSpan = getSpan;
|
|
2153
|
+
this.getActiveSpan = getActiveSpan;
|
|
2154
|
+
this.getSpanContext = getSpanContext;
|
|
2155
|
+
this.setSpan = setSpan;
|
|
2156
|
+
this.setSpanContext = setSpanContext;
|
|
2157
|
+
}
|
|
2158
|
+
TraceAPI2.getInstance = function() {
|
|
2159
|
+
if (!this._instance) {
|
|
2160
|
+
this._instance = new TraceAPI2();
|
|
2161
|
+
}
|
|
2162
|
+
return this._instance;
|
|
2163
|
+
};
|
|
2164
|
+
TraceAPI2.prototype.setGlobalTracerProvider = function(provider) {
|
|
2165
|
+
var success = registerGlobal(API_NAME3, this._proxyTracerProvider, DiagAPI.instance());
|
|
2166
|
+
if (success) {
|
|
2167
|
+
this._proxyTracerProvider.setDelegate(provider);
|
|
2168
|
+
}
|
|
2169
|
+
return success;
|
|
2170
|
+
};
|
|
2171
|
+
TraceAPI2.prototype.getTracerProvider = function() {
|
|
2172
|
+
return getGlobal(API_NAME3) || this._proxyTracerProvider;
|
|
2173
|
+
};
|
|
2174
|
+
TraceAPI2.prototype.getTracer = function(name, version) {
|
|
2175
|
+
return this.getTracerProvider().getTracer(name, version);
|
|
2176
|
+
};
|
|
2177
|
+
TraceAPI2.prototype.disable = function() {
|
|
2178
|
+
unregisterGlobal(API_NAME3, DiagAPI.instance());
|
|
2179
|
+
this._proxyTracerProvider = new ProxyTracerProvider();
|
|
2180
|
+
};
|
|
2181
|
+
return TraceAPI2;
|
|
2182
|
+
})()
|
|
2183
|
+
);
|
|
2184
|
+
|
|
2185
|
+
// node_modules/.pnpm/@opentelemetry+api@1.9.0/node_modules/@opentelemetry/api/build/esm/trace-api.js
|
|
2186
|
+
var trace = TraceAPI.getInstance();
|
|
626
2187
|
|
|
627
2188
|
// src/execution/scenario-execution.ts
|
|
2189
|
+
var import_langwatch = require("langwatch");
|
|
2190
|
+
var import_observability3 = require("langwatch/observability");
|
|
628
2191
|
var import_rxjs2 = require("rxjs");
|
|
629
2192
|
|
|
630
2193
|
// src/execution/scenario-execution-state.ts
|
|
@@ -711,9 +2274,13 @@ var ScenarioExecutionState = class {
|
|
|
711
2274
|
* Adds a message to the conversation history.
|
|
712
2275
|
*
|
|
713
2276
|
* @param message - The message to add.
|
|
2277
|
+
* @param traceId - Optional trace ID to associate with the message.
|
|
714
2278
|
*/
|
|
715
2279
|
addMessage(message2) {
|
|
716
|
-
const messageWithId = {
|
|
2280
|
+
const messageWithId = {
|
|
2281
|
+
...message2,
|
|
2282
|
+
id: generateMessageId()
|
|
2283
|
+
};
|
|
717
2284
|
this._messages.push(messageWithId);
|
|
718
2285
|
this.eventSubject.next({ type: "MESSAGE_ADDED" /* MESSAGE_ADDED */ });
|
|
719
2286
|
}
|
|
@@ -769,7 +2336,7 @@ var ScenarioExecutionState = class {
|
|
|
769
2336
|
|
|
770
2337
|
// src/events/schema.ts
|
|
771
2338
|
var import_core = require("@ag-ui/core");
|
|
772
|
-
var
|
|
2339
|
+
var import_zod = require("zod");
|
|
773
2340
|
var Verdict = /* @__PURE__ */ ((Verdict2) => {
|
|
774
2341
|
Verdict2["SUCCESS"] = "success";
|
|
775
2342
|
Verdict2["FAILURE"] = "failure";
|
|
@@ -785,68 +2352,69 @@ var ScenarioRunStatus = /* @__PURE__ */ ((ScenarioRunStatus2) => {
|
|
|
785
2352
|
ScenarioRunStatus2["FAILED"] = "FAILED";
|
|
786
2353
|
return ScenarioRunStatus2;
|
|
787
2354
|
})(ScenarioRunStatus || {});
|
|
788
|
-
var baseEventSchema =
|
|
789
|
-
type:
|
|
790
|
-
timestamp:
|
|
791
|
-
rawEvent:
|
|
2355
|
+
var baseEventSchema = import_zod.z.object({
|
|
2356
|
+
type: import_zod.z.nativeEnum(import_core.EventType),
|
|
2357
|
+
timestamp: import_zod.z.number(),
|
|
2358
|
+
rawEvent: import_zod.z.any().optional()
|
|
792
2359
|
});
|
|
793
|
-
var batchRunIdSchema =
|
|
794
|
-
var scenarioRunIdSchema =
|
|
795
|
-
var scenarioIdSchema =
|
|
2360
|
+
var batchRunIdSchema = import_zod.z.string();
|
|
2361
|
+
var scenarioRunIdSchema = import_zod.z.string();
|
|
2362
|
+
var scenarioIdSchema = import_zod.z.string();
|
|
796
2363
|
var baseScenarioEventSchema = baseEventSchema.extend({
|
|
797
2364
|
batchRunId: batchRunIdSchema,
|
|
798
2365
|
scenarioId: scenarioIdSchema,
|
|
799
2366
|
scenarioRunId: scenarioRunIdSchema,
|
|
800
|
-
scenarioSetId:
|
|
2367
|
+
scenarioSetId: import_zod.z.string().optional().default("default")
|
|
801
2368
|
});
|
|
802
2369
|
var scenarioRunStartedSchema = baseScenarioEventSchema.extend({
|
|
803
|
-
type:
|
|
804
|
-
metadata:
|
|
805
|
-
name:
|
|
806
|
-
description:
|
|
2370
|
+
type: import_zod.z.literal("SCENARIO_RUN_STARTED" /* RUN_STARTED */),
|
|
2371
|
+
metadata: import_zod.z.object({
|
|
2372
|
+
name: import_zod.z.string().optional(),
|
|
2373
|
+
description: import_zod.z.string().optional()
|
|
807
2374
|
})
|
|
808
2375
|
});
|
|
809
|
-
var scenarioResultsSchema =
|
|
810
|
-
verdict:
|
|
811
|
-
reasoning:
|
|
812
|
-
metCriteria:
|
|
813
|
-
unmetCriteria:
|
|
814
|
-
error:
|
|
2376
|
+
var scenarioResultsSchema = import_zod.z.object({
|
|
2377
|
+
verdict: import_zod.z.nativeEnum(Verdict),
|
|
2378
|
+
reasoning: import_zod.z.string().optional(),
|
|
2379
|
+
metCriteria: import_zod.z.array(import_zod.z.string()),
|
|
2380
|
+
unmetCriteria: import_zod.z.array(import_zod.z.string()),
|
|
2381
|
+
error: import_zod.z.string().optional()
|
|
815
2382
|
});
|
|
816
2383
|
var scenarioRunFinishedSchema = baseScenarioEventSchema.extend({
|
|
817
|
-
type:
|
|
818
|
-
status:
|
|
2384
|
+
type: import_zod.z.literal("SCENARIO_RUN_FINISHED" /* RUN_FINISHED */),
|
|
2385
|
+
status: import_zod.z.nativeEnum(ScenarioRunStatus),
|
|
819
2386
|
results: scenarioResultsSchema.optional().nullable()
|
|
820
2387
|
});
|
|
821
2388
|
var scenarioMessageSnapshotSchema = import_core.MessagesSnapshotEventSchema.merge(
|
|
822
2389
|
baseScenarioEventSchema.extend({
|
|
823
|
-
type:
|
|
2390
|
+
type: import_zod.z.literal("SCENARIO_MESSAGE_SNAPSHOT" /* MESSAGE_SNAPSHOT */)
|
|
824
2391
|
})
|
|
825
2392
|
);
|
|
826
|
-
var scenarioEventSchema =
|
|
2393
|
+
var scenarioEventSchema = import_zod.z.discriminatedUnion("type", [
|
|
827
2394
|
scenarioRunStartedSchema,
|
|
828
2395
|
scenarioRunFinishedSchema,
|
|
829
2396
|
scenarioMessageSnapshotSchema
|
|
830
2397
|
]);
|
|
831
|
-
var successSchema =
|
|
832
|
-
var errorSchema =
|
|
833
|
-
var stateSchema =
|
|
834
|
-
state:
|
|
835
|
-
messages:
|
|
836
|
-
status:
|
|
2398
|
+
var successSchema = import_zod.z.object({ success: import_zod.z.boolean() });
|
|
2399
|
+
var errorSchema = import_zod.z.object({ error: import_zod.z.string() });
|
|
2400
|
+
var stateSchema = import_zod.z.object({
|
|
2401
|
+
state: import_zod.z.object({
|
|
2402
|
+
messages: import_zod.z.array(import_zod.z.any()),
|
|
2403
|
+
status: import_zod.z.string()
|
|
837
2404
|
})
|
|
838
2405
|
});
|
|
839
|
-
var runsSchema =
|
|
840
|
-
var eventsSchema =
|
|
2406
|
+
var runsSchema = import_zod.z.object({ runs: import_zod.z.array(import_zod.z.string()) });
|
|
2407
|
+
var eventsSchema = import_zod.z.object({ events: import_zod.z.array(scenarioEventSchema) });
|
|
841
2408
|
|
|
842
2409
|
// src/utils/convert-core-messages-to-agui-messages.ts
|
|
843
|
-
function
|
|
2410
|
+
function convertModelMessagesToAguiMessages(modelMessages) {
|
|
844
2411
|
const aguiMessages = [];
|
|
845
|
-
for (const msg of
|
|
2412
|
+
for (const msg of modelMessages) {
|
|
846
2413
|
const id = "id" in msg && typeof msg.id === "string" ? msg.id : generateMessageId();
|
|
847
2414
|
switch (true) {
|
|
848
2415
|
case msg.role === "system":
|
|
849
2416
|
aguiMessages.push({
|
|
2417
|
+
trace_id: msg.traceId,
|
|
850
2418
|
id,
|
|
851
2419
|
role: "system",
|
|
852
2420
|
content: msg.content
|
|
@@ -854,6 +2422,7 @@ function convertCoreMessagesToAguiMessages(coreMessages) {
|
|
|
854
2422
|
break;
|
|
855
2423
|
case (msg.role === "user" && typeof msg.content === "string"):
|
|
856
2424
|
aguiMessages.push({
|
|
2425
|
+
trace_id: msg.traceId,
|
|
857
2426
|
id,
|
|
858
2427
|
role: "user",
|
|
859
2428
|
content: msg.content
|
|
@@ -862,6 +2431,7 @@ function convertCoreMessagesToAguiMessages(coreMessages) {
|
|
|
862
2431
|
// Handle any other user message content format
|
|
863
2432
|
case (msg.role === "user" && Array.isArray(msg.content)):
|
|
864
2433
|
aguiMessages.push({
|
|
2434
|
+
trace_id: msg.traceId,
|
|
865
2435
|
id,
|
|
866
2436
|
role: "user",
|
|
867
2437
|
content: JSON.stringify(msg.content)
|
|
@@ -869,6 +2439,7 @@ function convertCoreMessagesToAguiMessages(coreMessages) {
|
|
|
869
2439
|
break;
|
|
870
2440
|
case (msg.role === "assistant" && typeof msg.content === "string"):
|
|
871
2441
|
aguiMessages.push({
|
|
2442
|
+
trace_id: msg.traceId,
|
|
872
2443
|
id,
|
|
873
2444
|
role: "assistant",
|
|
874
2445
|
content: msg.content
|
|
@@ -878,6 +2449,7 @@ function convertCoreMessagesToAguiMessages(coreMessages) {
|
|
|
878
2449
|
const toolCalls = msg.content.filter((p) => p.type === "tool-call");
|
|
879
2450
|
const nonToolCalls = msg.content.filter((p) => p.type !== "tool-call");
|
|
880
2451
|
aguiMessages.push({
|
|
2452
|
+
trace_id: msg.traceId,
|
|
881
2453
|
id,
|
|
882
2454
|
role: "assistant",
|
|
883
2455
|
content: JSON.stringify(nonToolCalls),
|
|
@@ -886,7 +2458,7 @@ function convertCoreMessagesToAguiMessages(coreMessages) {
|
|
|
886
2458
|
type: "function",
|
|
887
2459
|
function: {
|
|
888
2460
|
name: c.toolName,
|
|
889
|
-
arguments: JSON.stringify(c.
|
|
2461
|
+
arguments: JSON.stringify(c.input)
|
|
890
2462
|
}
|
|
891
2463
|
}))
|
|
892
2464
|
});
|
|
@@ -894,11 +2466,13 @@ function convertCoreMessagesToAguiMessages(coreMessages) {
|
|
|
894
2466
|
}
|
|
895
2467
|
case msg.role === "tool":
|
|
896
2468
|
msg.content.map((p, i) => {
|
|
2469
|
+
var _a;
|
|
897
2470
|
aguiMessages.push({
|
|
2471
|
+
trace_id: msg.traceId,
|
|
898
2472
|
id: `${id}-${i}`,
|
|
899
2473
|
role: "tool",
|
|
900
2474
|
toolCallId: p.toolCallId,
|
|
901
|
-
content: JSON.stringify(p.
|
|
2475
|
+
content: JSON.stringify((_a = p.output) == null ? void 0 : _a.value)
|
|
902
2476
|
});
|
|
903
2477
|
});
|
|
904
2478
|
break;
|
|
@@ -908,12 +2482,16 @@ function convertCoreMessagesToAguiMessages(coreMessages) {
|
|
|
908
2482
|
}
|
|
909
2483
|
return aguiMessages;
|
|
910
2484
|
}
|
|
911
|
-
var convert_core_messages_to_agui_messages_default =
|
|
2485
|
+
var convert_core_messages_to_agui_messages_default = convertModelMessagesToAguiMessages;
|
|
912
2486
|
|
|
913
2487
|
// src/execution/scenario-execution.ts
|
|
914
2488
|
var ScenarioExecution = class {
|
|
2489
|
+
/** LangWatch tracer for scenario execution */
|
|
2490
|
+
tracer = (0, import_langwatch.getLangWatchTracer)("@langwatch/scenario");
|
|
915
2491
|
/** The current state of the scenario execution */
|
|
916
2492
|
state;
|
|
2493
|
+
/** The final result of the scenario execution, set when a conclusion is reached */
|
|
2494
|
+
_result;
|
|
917
2495
|
/** Logger for debugging and monitoring */
|
|
918
2496
|
logger = new Logger("scenario.execution.ScenarioExecution");
|
|
919
2497
|
/** Finalized configuration with all defaults applied */
|
|
@@ -932,10 +2510,10 @@ var ScenarioExecution = class {
|
|
|
932
2510
|
* Key: agent index, Value: array of pending messages for that agent
|
|
933
2511
|
*/
|
|
934
2512
|
pendingMessages = /* @__PURE__ */ new Map();
|
|
935
|
-
/** Intermediate result set by agents that make final decisions */
|
|
936
|
-
partialResult = null;
|
|
937
2513
|
/** Accumulated execution time for each agent (for performance tracking) */
|
|
938
2514
|
agentTimes = /* @__PURE__ */ new Map();
|
|
2515
|
+
/** Current turn span for trace context management */
|
|
2516
|
+
currentTurnSpan;
|
|
939
2517
|
/** Timestamp when execution started (for total time calculation) */
|
|
940
2518
|
totalStartTime = 0;
|
|
941
2519
|
/** Event stream for monitoring scenario progress */
|
|
@@ -974,7 +2552,7 @@ var ScenarioExecution = class {
|
|
|
974
2552
|
/**
|
|
975
2553
|
* Gets the complete conversation history as an array of messages.
|
|
976
2554
|
*
|
|
977
|
-
* @returns Array of
|
|
2555
|
+
* @returns Array of ModelMessage objects representing the full conversation
|
|
978
2556
|
*/
|
|
979
2557
|
get messages() {
|
|
980
2558
|
return this.state.messages;
|
|
@@ -988,6 +2566,41 @@ var ScenarioExecution = class {
|
|
|
988
2566
|
get threadId() {
|
|
989
2567
|
return this.state.threadId;
|
|
990
2568
|
}
|
|
2569
|
+
/**
|
|
2570
|
+
* Gets the result of the scenario execution if it has been set.
|
|
2571
|
+
*
|
|
2572
|
+
* @returns The scenario result or undefined if not yet set
|
|
2573
|
+
*/
|
|
2574
|
+
get result() {
|
|
2575
|
+
return this._result;
|
|
2576
|
+
}
|
|
2577
|
+
/**
|
|
2578
|
+
* Sets the result of the scenario execution.
|
|
2579
|
+
* This is called when the scenario reaches a conclusion (success or failure).
|
|
2580
|
+
* Automatically includes messages, totalTime, and agentTime from the current execution context.
|
|
2581
|
+
*
|
|
2582
|
+
* @param result - The final scenario result (without messages/timing, which will be added automatically)
|
|
2583
|
+
*/
|
|
2584
|
+
setResult(result) {
|
|
2585
|
+
const agentRoleAgentsIdx = this.agents.map((agent2, i) => ({ agent: agent2, idx: i })).filter(({ agent: agent2 }) => agent2.role === "Agent" /* AGENT */).map(({ idx }) => idx);
|
|
2586
|
+
const agentTimes = agentRoleAgentsIdx.map(
|
|
2587
|
+
(i) => this.agentTimes.get(i) || 0
|
|
2588
|
+
);
|
|
2589
|
+
const totalAgentTime = agentTimes.reduce((sum, time) => sum + time, 0);
|
|
2590
|
+
this._result = {
|
|
2591
|
+
...result,
|
|
2592
|
+
messages: this.state.messages,
|
|
2593
|
+
totalTime: this.totalTime,
|
|
2594
|
+
agentTime: totalAgentTime
|
|
2595
|
+
};
|
|
2596
|
+
this.logger.debug(`[${this.config.id}] Result set`, {
|
|
2597
|
+
success: result.success,
|
|
2598
|
+
reasoning: result.reasoning,
|
|
2599
|
+
totalTime: this.totalTime,
|
|
2600
|
+
agentTime: totalAgentTime,
|
|
2601
|
+
messageCount: this.state.messages.length
|
|
2602
|
+
});
|
|
2603
|
+
}
|
|
991
2604
|
/**
|
|
992
2605
|
* The total elapsed time for the scenario execution.
|
|
993
2606
|
*/
|
|
@@ -1021,8 +2634,14 @@ var ScenarioExecution = class {
|
|
|
1021
2634
|
* ```
|
|
1022
2635
|
*/
|
|
1023
2636
|
async execute() {
|
|
2637
|
+
this.logger.debug(`[${this.config.id}] Starting scenario execution`, {
|
|
2638
|
+
name: this.config.name,
|
|
2639
|
+
maxTurns: this.config.maxTurns,
|
|
2640
|
+
scriptLength: this.config.script.length
|
|
2641
|
+
});
|
|
1024
2642
|
this.reset();
|
|
1025
2643
|
const scenarioRunId = generateScenarioRunId();
|
|
2644
|
+
this.logger.debug(`[${this.config.id}] Generated run ID: ${scenarioRunId}`);
|
|
1026
2645
|
this.emitRunStarted({ scenarioRunId });
|
|
1027
2646
|
const subscription = this.state.events$.pipe(
|
|
1028
2647
|
(0, import_rxjs2.filter)((event) => event.type === "MESSAGE_ADDED" /* MESSAGE_ADDED */)
|
|
@@ -1032,18 +2651,17 @@ var ScenarioExecution = class {
|
|
|
1032
2651
|
try {
|
|
1033
2652
|
for (let i = 0; i < this.config.script.length; i++) {
|
|
1034
2653
|
const scriptStep = this.config.script[i];
|
|
1035
|
-
|
|
1036
|
-
if (result
|
|
2654
|
+
await this.executeScriptStep(scriptStep, i);
|
|
2655
|
+
if (this.result) {
|
|
1037
2656
|
this.emitRunFinished({
|
|
1038
2657
|
scenarioRunId,
|
|
1039
|
-
status: result.success ? "SUCCESS" /* SUCCESS */ : "FAILED" /* FAILED */,
|
|
1040
|
-
result
|
|
2658
|
+
status: this.result.success ? "SUCCESS" /* SUCCESS */ : "FAILED" /* FAILED */,
|
|
2659
|
+
result: this.result
|
|
1041
2660
|
});
|
|
1042
|
-
return result;
|
|
2661
|
+
return this.result;
|
|
1043
2662
|
}
|
|
1044
2663
|
}
|
|
1045
|
-
this.
|
|
1046
|
-
return this.reachedMaxTurns(
|
|
2664
|
+
this.reachedMaxTurns(
|
|
1047
2665
|
[
|
|
1048
2666
|
"Reached end of script without conclusion, add one of the following to the end of the script:",
|
|
1049
2667
|
"- `Scenario.proceed()` to let the simulation continue to play out",
|
|
@@ -1051,20 +2669,21 @@ var ScenarioExecution = class {
|
|
|
1051
2669
|
"- `Scenario.succeed()` or `Scenario.fail()` to end the test with an explicit result"
|
|
1052
2670
|
].join("\n")
|
|
1053
2671
|
);
|
|
2672
|
+
this.emitRunFinished({ scenarioRunId, status: "FAILED" /* FAILED */ });
|
|
2673
|
+
return this.result;
|
|
1054
2674
|
} catch (error) {
|
|
1055
2675
|
const errorInfo = extractErrorInfo(error);
|
|
1056
|
-
|
|
2676
|
+
this.setResult({
|
|
1057
2677
|
success: false,
|
|
1058
|
-
messages: this.state.messages,
|
|
1059
2678
|
reasoning: `Scenario failed with error: ${errorInfo.message}`,
|
|
1060
2679
|
metCriteria: [],
|
|
1061
2680
|
unmetCriteria: [],
|
|
1062
2681
|
error: JSON.stringify(errorInfo)
|
|
1063
|
-
};
|
|
2682
|
+
});
|
|
1064
2683
|
this.emitRunFinished({
|
|
1065
2684
|
scenarioRunId,
|
|
1066
2685
|
status: "ERROR" /* ERROR */,
|
|
1067
|
-
result:
|
|
2686
|
+
result: this.result
|
|
1068
2687
|
});
|
|
1069
2688
|
throw error;
|
|
1070
2689
|
} finally {
|
|
@@ -1082,50 +2701,66 @@ var ScenarioExecution = class {
|
|
|
1082
2701
|
* - Progress to the next turn if needed
|
|
1083
2702
|
* - Find the next agent that should act
|
|
1084
2703
|
* - Execute that agent's response
|
|
1085
|
-
* -
|
|
2704
|
+
* - Set the result if the scenario concludes
|
|
1086
2705
|
*
|
|
1087
2706
|
* Note: This method is primarily for debugging or custom execution flows. Most users
|
|
1088
2707
|
* will use `execute()` to run the entire scenario automatically.
|
|
1089
2708
|
*
|
|
1090
|
-
*
|
|
1091
|
-
* - Array of new messages added during the agent interaction, or
|
|
1092
|
-
* - A final ScenarioResult if the interaction concludes the scenario
|
|
1093
|
-
* @throws Error if no result is returned from the step
|
|
2709
|
+
* After calling this method, check `this.result` to see if the scenario has concluded.
|
|
1094
2710
|
*
|
|
1095
2711
|
* @example
|
|
1096
2712
|
* ```typescript
|
|
1097
2713
|
* const execution = new ScenarioExecution(config, script);
|
|
1098
2714
|
*
|
|
1099
2715
|
* // Execute one agent interaction at a time
|
|
1100
|
-
*
|
|
1101
|
-
* if (
|
|
1102
|
-
* console.log('
|
|
1103
|
-
* } else {
|
|
1104
|
-
* console.log('Scenario finished:', messages.success);
|
|
2716
|
+
* await execution.step();
|
|
2717
|
+
* if (execution.result) {
|
|
2718
|
+
* console.log('Scenario finished:', execution.result.success);
|
|
1105
2719
|
* }
|
|
1106
2720
|
* ```
|
|
1107
2721
|
*/
|
|
1108
2722
|
async step() {
|
|
1109
|
-
|
|
1110
|
-
if (result === null) throw new Error("No result from step");
|
|
1111
|
-
return result;
|
|
2723
|
+
await this._step();
|
|
1112
2724
|
}
|
|
1113
2725
|
async _step(goToNextTurn = true, onTurn) {
|
|
2726
|
+
this.logger.debug(`[${this.config.id}] _step called`, {
|
|
2727
|
+
goToNextTurn,
|
|
2728
|
+
pendingRoles: this.pendingRolesOnTurn,
|
|
2729
|
+
currentTurn: this.state.currentTurn
|
|
2730
|
+
});
|
|
1114
2731
|
if (this.pendingRolesOnTurn.length === 0) {
|
|
1115
|
-
if (!goToNextTurn)
|
|
2732
|
+
if (!goToNextTurn) {
|
|
2733
|
+
this.logger.debug(
|
|
2734
|
+
`[${this.config.id}] No pending roles, not advancing turn`
|
|
2735
|
+
);
|
|
2736
|
+
return;
|
|
2737
|
+
}
|
|
1116
2738
|
this.newTurn();
|
|
1117
2739
|
if (onTurn) await onTurn(this.state);
|
|
1118
|
-
if (this.state.currentTurn >= this.config.maxTurns)
|
|
1119
|
-
|
|
2740
|
+
if (this.state.currentTurn >= this.config.maxTurns) {
|
|
2741
|
+
this.logger.debug(
|
|
2742
|
+
`[${this.config.id}] Reached max turns: ${this.state.currentTurn}`
|
|
2743
|
+
);
|
|
2744
|
+
this.reachedMaxTurns();
|
|
2745
|
+
return;
|
|
2746
|
+
}
|
|
1120
2747
|
}
|
|
1121
2748
|
const currentRole = this.pendingRolesOnTurn[0];
|
|
1122
2749
|
const { idx, agent: nextAgent } = this.nextAgentForRole(currentRole);
|
|
1123
2750
|
if (!nextAgent) {
|
|
2751
|
+
this.logger.debug(
|
|
2752
|
+
`[${this.config.id}] No agent for role ${currentRole}, removing role`
|
|
2753
|
+
);
|
|
1124
2754
|
this.removePendingRole(currentRole);
|
|
1125
2755
|
return this._step(goToNextTurn, onTurn);
|
|
1126
2756
|
}
|
|
2757
|
+
this.logger.debug(`[${this.config.id}] Calling agent`, {
|
|
2758
|
+
role: currentRole,
|
|
2759
|
+
agentIdx: idx,
|
|
2760
|
+
agentName: nextAgent.name ?? nextAgent.constructor.name
|
|
2761
|
+
});
|
|
1127
2762
|
this.removePendingAgent(nextAgent);
|
|
1128
|
-
|
|
2763
|
+
await this.callAgent(idx, currentRole);
|
|
1129
2764
|
}
|
|
1130
2765
|
/**
|
|
1131
2766
|
* Calls a specific agent to generate a response or make a decision.
|
|
@@ -1144,19 +2779,25 @@ var ScenarioExecution = class {
|
|
|
1144
2779
|
* After the agent responds:
|
|
1145
2780
|
* - Performance timing is recorded
|
|
1146
2781
|
* - Pending messages for this agent are cleared (they've been processed)
|
|
1147
|
-
* - If the agent returns a ScenarioResult, it's
|
|
2782
|
+
* - If the agent returns a ScenarioResult, it's set on this.result
|
|
1148
2783
|
* - Otherwise, the agent's messages are added to the conversation and broadcast
|
|
1149
2784
|
*
|
|
1150
2785
|
* @param idx - The index of the agent in the agents array
|
|
1151
2786
|
* @param role - The role the agent is being asked to play (USER, AGENT, or JUDGE)
|
|
1152
2787
|
* @param judgmentRequest - Whether this is a judgment request (for judge agents)
|
|
1153
|
-
* @returns A promise that resolves with either:
|
|
1154
|
-
* - Array of messages if the agent generated a response, or
|
|
1155
|
-
* - ScenarioResult if the agent made a final decision
|
|
1156
2788
|
* @throws Error if the agent call fails
|
|
1157
2789
|
*/
|
|
1158
2790
|
async callAgent(idx, role, judgmentRequest = false) {
|
|
2791
|
+
var _a;
|
|
1159
2792
|
const agent2 = this.agents[idx];
|
|
2793
|
+
const agentName = agent2.name ?? agent2.constructor.name;
|
|
2794
|
+
this.logger.debug(`[${this.config.id}] callAgent started`, {
|
|
2795
|
+
agentIdx: idx,
|
|
2796
|
+
role,
|
|
2797
|
+
judgmentRequest,
|
|
2798
|
+
agentName,
|
|
2799
|
+
pendingMessagesCount: ((_a = this.pendingMessages.get(idx)) == null ? void 0 : _a.length) ?? 0
|
|
2800
|
+
});
|
|
1160
2801
|
const startTime = Date.now();
|
|
1161
2802
|
const agentInput = {
|
|
1162
2803
|
threadId: this.state.threadId,
|
|
@@ -1167,35 +2808,75 @@ var ScenarioExecution = class {
|
|
|
1167
2808
|
scenarioState: this.state,
|
|
1168
2809
|
scenarioConfig: this.config
|
|
1169
2810
|
};
|
|
2811
|
+
const agentContext = this.currentTurnSpan ? trace.setSpan(context.active(), this.currentTurnSpan) : context.active();
|
|
2812
|
+
const agentSpanName = `${agentName !== Object.prototype.constructor.name ? agent2.constructor.name : "Agent"}.call`;
|
|
1170
2813
|
try {
|
|
1171
|
-
|
|
1172
|
-
|
|
1173
|
-
this.addAgentTime(idx, endTime - startTime);
|
|
1174
|
-
this.pendingMessages.delete(idx);
|
|
1175
|
-
if (agentResponse && typeof agentResponse === "object" && "success" in agentResponse) {
|
|
1176
|
-
return agentResponse;
|
|
1177
|
-
}
|
|
1178
|
-
const currentAgentTime = this.agentTimes.get(idx) ?? 0;
|
|
1179
|
-
this.agentTimes.set(idx, currentAgentTime + (Date.now() - startTime));
|
|
1180
|
-
const messages = convertAgentReturnTypesToMessages(
|
|
1181
|
-
agentResponse,
|
|
1182
|
-
role === "User" /* USER */ ? "user" : "assistant"
|
|
1183
|
-
);
|
|
1184
|
-
for (const message2 of messages) {
|
|
1185
|
-
this.state.addMessage(message2);
|
|
1186
|
-
this.broadcastMessage(message2, idx);
|
|
1187
|
-
}
|
|
1188
|
-
return messages;
|
|
1189
|
-
} catch (error) {
|
|
1190
|
-
this.logger.error(
|
|
1191
|
-
`[${this.config.id}] Error calling agent ${agent2.constructor.name}`,
|
|
2814
|
+
await this.tracer.withActiveSpan(
|
|
2815
|
+
agentSpanName,
|
|
1192
2816
|
{
|
|
1193
|
-
|
|
1194
|
-
|
|
1195
|
-
|
|
2817
|
+
attributes: {
|
|
2818
|
+
[import_observability3.attributes.ATTR_LANGWATCH_THREAD_ID]: this.state.threadId
|
|
2819
|
+
}
|
|
2820
|
+
},
|
|
2821
|
+
agentContext,
|
|
2822
|
+
async (agentSpan) => {
|
|
2823
|
+
agentSpan.setType("agent");
|
|
2824
|
+
agentSpan.setInput("chat_messages", this.state.messages);
|
|
2825
|
+
const agentResponse = await agent2.call(agentInput);
|
|
2826
|
+
const endTime = Date.now();
|
|
2827
|
+
const duration = endTime - startTime;
|
|
2828
|
+
this.logger.debug(`[${this.config.id}] Agent responded`, {
|
|
2829
|
+
agentIdx: idx,
|
|
2830
|
+
duration,
|
|
2831
|
+
responseType: typeof agentResponse,
|
|
2832
|
+
isScenarioResult: agentResponse && typeof agentResponse === "object" && "success" in agentResponse
|
|
2833
|
+
});
|
|
2834
|
+
this.addAgentTime(idx, duration);
|
|
2835
|
+
this.pendingMessages.delete(idx);
|
|
2836
|
+
if (agentResponse && typeof agentResponse === "object" && "success" in agentResponse) {
|
|
2837
|
+
this.logger.debug(
|
|
2838
|
+
`[${this.config.id}] Agent returned ScenarioResult`,
|
|
2839
|
+
{
|
|
2840
|
+
success: agentResponse.success
|
|
2841
|
+
}
|
|
2842
|
+
);
|
|
2843
|
+
this.setResult(agentResponse);
|
|
2844
|
+
return;
|
|
2845
|
+
}
|
|
2846
|
+
const messages = convertAgentReturnTypesToMessages(
|
|
2847
|
+
agentResponse,
|
|
2848
|
+
role === "User" /* USER */ ? "user" : "assistant"
|
|
2849
|
+
);
|
|
2850
|
+
if (messages.length > 0) {
|
|
2851
|
+
agentSpan.setOutput("chat_messages", messages);
|
|
2852
|
+
}
|
|
2853
|
+
const metrics = {
|
|
2854
|
+
duration: endTime - startTime
|
|
2855
|
+
};
|
|
2856
|
+
if (agentResponse && typeof agentResponse === "object") {
|
|
2857
|
+
const usage = agentResponse.usage;
|
|
2858
|
+
if (usage) {
|
|
2859
|
+
if (usage.prompt_tokens !== void 0)
|
|
2860
|
+
metrics.promptTokens = usage.prompt_tokens;
|
|
2861
|
+
if (usage.completion_tokens !== void 0)
|
|
2862
|
+
metrics.completionTokens = usage.completion_tokens;
|
|
2863
|
+
if (usage.total_tokens !== void 0)
|
|
2864
|
+
metrics.totalTokens = usage.total_tokens;
|
|
2865
|
+
}
|
|
2866
|
+
}
|
|
2867
|
+
agentSpan.setMetrics(metrics);
|
|
2868
|
+
const traceId = agentSpan.spanContext().traceId.toString();
|
|
2869
|
+
for (const message2 of messages) {
|
|
2870
|
+
this.state.addMessage({
|
|
2871
|
+
...message2,
|
|
2872
|
+
traceId
|
|
2873
|
+
});
|
|
2874
|
+
this.broadcastMessage(message2, idx);
|
|
2875
|
+
}
|
|
1196
2876
|
}
|
|
1197
2877
|
);
|
|
1198
|
-
|
|
2878
|
+
} catch (error) {
|
|
2879
|
+
throw new Error(`[${agentName}] ${error}`, { cause: error });
|
|
1199
2880
|
}
|
|
1200
2881
|
}
|
|
1201
2882
|
/**
|
|
@@ -1207,7 +2888,7 @@ var ScenarioExecution = class {
|
|
|
1207
2888
|
* - "assistant" messages are routed to AGENT role agents
|
|
1208
2889
|
* - Other message types are added directly to the conversation
|
|
1209
2890
|
*
|
|
1210
|
-
* @param message - The
|
|
2891
|
+
* @param message - The ModelMessage to add to the conversation
|
|
1211
2892
|
*
|
|
1212
2893
|
* @example
|
|
1213
2894
|
* ```typescript
|
|
@@ -1236,7 +2917,7 @@ var ScenarioExecution = class {
|
|
|
1236
2917
|
*
|
|
1237
2918
|
* This method is part of the ScenarioExecutionLike interface used by script steps.
|
|
1238
2919
|
*
|
|
1239
|
-
* @param content - Optional content for the user's message. Can be a string or
|
|
2920
|
+
* @param content - Optional content for the user's message. Can be a string or ModelMessage.
|
|
1240
2921
|
* If not provided, the user simulator agent will generate the content.
|
|
1241
2922
|
*
|
|
1242
2923
|
* @example
|
|
@@ -1247,7 +2928,7 @@ var ScenarioExecution = class {
|
|
|
1247
2928
|
* // Let user simulator generate content
|
|
1248
2929
|
* await execution.user();
|
|
1249
2930
|
*
|
|
1250
|
-
* // Use a
|
|
2931
|
+
* // Use a ModelMessage object
|
|
1251
2932
|
* await execution.user({
|
|
1252
2933
|
* role: "user",
|
|
1253
2934
|
* content: "Tell me a joke"
|
|
@@ -1266,7 +2947,7 @@ var ScenarioExecution = class {
|
|
|
1266
2947
|
*
|
|
1267
2948
|
* This method is part of the ScenarioExecutionLike interface used by script steps.
|
|
1268
2949
|
*
|
|
1269
|
-
* @param content - Optional content for the agent's response. Can be a string or
|
|
2950
|
+
* @param content - Optional content for the agent's response. Can be a string or ModelMessage.
|
|
1270
2951
|
* If not provided, the agent under test will generate the response.
|
|
1271
2952
|
*
|
|
1272
2953
|
* @example
|
|
@@ -1277,7 +2958,7 @@ var ScenarioExecution = class {
|
|
|
1277
2958
|
* // Use provided content
|
|
1278
2959
|
* await execution.agent("The weather is sunny today!");
|
|
1279
2960
|
*
|
|
1280
|
-
* // Use a
|
|
2961
|
+
* // Use a ModelMessage object
|
|
1281
2962
|
* await execution.agent({
|
|
1282
2963
|
* role: "assistant",
|
|
1283
2964
|
* content: "I'm here to help you with weather information."
|
|
@@ -1358,17 +3039,22 @@ var ScenarioExecution = class {
|
|
|
1358
3039
|
* ```
|
|
1359
3040
|
*/
|
|
1360
3041
|
async proceed(turns, onTurn, onStep) {
|
|
3042
|
+
this.logger.debug(`[${this.config.id}] proceed called`, {
|
|
3043
|
+
turns,
|
|
3044
|
+
currentTurn: this.state.currentTurn
|
|
3045
|
+
});
|
|
1361
3046
|
let initialTurn = this.state.currentTurn;
|
|
1362
3047
|
while (true) {
|
|
1363
3048
|
const goToNextTurn = turns === void 0 || initialTurn === null || this.state.currentTurn != null && this.state.currentTurn + 1 < initialTurn + turns;
|
|
1364
|
-
|
|
3049
|
+
await this._step(goToNextTurn, onTurn);
|
|
1365
3050
|
if (initialTurn === null) initialTurn = this.state.currentTurn;
|
|
1366
|
-
if (
|
|
1367
|
-
return
|
|
3051
|
+
if (this.result) {
|
|
3052
|
+
return this.result;
|
|
1368
3053
|
}
|
|
1369
3054
|
if (onStep) await onStep(this.state);
|
|
1370
|
-
if (
|
|
1371
|
-
return
|
|
3055
|
+
if (!goToNextTurn) {
|
|
3056
|
+
return null;
|
|
3057
|
+
}
|
|
1372
3058
|
}
|
|
1373
3059
|
}
|
|
1374
3060
|
/**
|
|
@@ -1395,13 +3081,13 @@ var ScenarioExecution = class {
|
|
|
1395
3081
|
* ```
|
|
1396
3082
|
*/
|
|
1397
3083
|
async succeed(reasoning) {
|
|
1398
|
-
|
|
3084
|
+
this.setResult({
|
|
1399
3085
|
success: true,
|
|
1400
|
-
messages: this.state.messages,
|
|
1401
3086
|
reasoning: reasoning || "Scenario marked as successful with Scenario.succeed()",
|
|
1402
3087
|
metCriteria: [],
|
|
1403
3088
|
unmetCriteria: []
|
|
1404
|
-
};
|
|
3089
|
+
});
|
|
3090
|
+
return this.result;
|
|
1405
3091
|
}
|
|
1406
3092
|
/**
|
|
1407
3093
|
* Immediately ends the scenario with a failure verdict.
|
|
@@ -1427,13 +3113,13 @@ var ScenarioExecution = class {
|
|
|
1427
3113
|
* ```
|
|
1428
3114
|
*/
|
|
1429
3115
|
async fail(reasoning) {
|
|
1430
|
-
|
|
3116
|
+
this.setResult({
|
|
1431
3117
|
success: false,
|
|
1432
|
-
messages: this.state.messages,
|
|
1433
3118
|
reasoning: reasoning || "Scenario marked as failed with Scenario.fail()",
|
|
1434
3119
|
metCriteria: [],
|
|
1435
3120
|
unmetCriteria: []
|
|
1436
|
-
};
|
|
3121
|
+
});
|
|
3122
|
+
return this.result;
|
|
1437
3123
|
}
|
|
1438
3124
|
/**
|
|
1439
3125
|
* Adds execution time for a specific agent to the performance tracking.
|
|
@@ -1456,53 +3142,6 @@ var ScenarioExecution = class {
|
|
|
1456
3142
|
const currentTime = this.agentTimes.get(agentIdx) || 0;
|
|
1457
3143
|
this.agentTimes.set(agentIdx, currentTime + time);
|
|
1458
3144
|
}
|
|
1459
|
-
/**
|
|
1460
|
-
* Checks if a partial result has been set for the scenario.
|
|
1461
|
-
*
|
|
1462
|
-
* This method is used internally to determine if a scenario has already reached
|
|
1463
|
-
* a conclusion (success or failure) but hasn't been finalized yet. Partial results
|
|
1464
|
-
* are typically set by agents that make final decisions (like judge agents) and
|
|
1465
|
-
* are later finalized with the complete message history.
|
|
1466
|
-
*
|
|
1467
|
-
* @returns True if a partial result exists, false otherwise
|
|
1468
|
-
*
|
|
1469
|
-
* @example
|
|
1470
|
-
* ```typescript
|
|
1471
|
-
* // This is typically used internally by the execution engine
|
|
1472
|
-
* if (execution.hasResult()) {
|
|
1473
|
-
* console.log('Scenario has reached a conclusion');
|
|
1474
|
-
* }
|
|
1475
|
-
* ```
|
|
1476
|
-
*/
|
|
1477
|
-
hasResult() {
|
|
1478
|
-
return this.partialResult !== null;
|
|
1479
|
-
}
|
|
1480
|
-
/**
|
|
1481
|
-
* Sets a partial result for the scenario.
|
|
1482
|
-
*
|
|
1483
|
-
* This method is used internally to store intermediate results that may be
|
|
1484
|
-
* finalized later with the complete message history. Partial results are typically
|
|
1485
|
-
* created by agents that make final decisions (like judge agents) and contain
|
|
1486
|
-
* the success/failure status, reasoning, and criteria evaluation, but not the
|
|
1487
|
-
* complete message history.
|
|
1488
|
-
*
|
|
1489
|
-
* @param result - The partial result without the messages field. Should include
|
|
1490
|
-
* success status, reasoning, and criteria evaluation.
|
|
1491
|
-
*
|
|
1492
|
-
* @example
|
|
1493
|
-
* ```typescript
|
|
1494
|
-
* // This is typically called internally by agents that make final decisions
|
|
1495
|
-
* execution.setResult({
|
|
1496
|
-
* success: true,
|
|
1497
|
-
* reasoning: "Agent provided accurate weather information",
|
|
1498
|
-
* metCriteria: ["Provides accurate weather data"],
|
|
1499
|
-
* unmetCriteria: []
|
|
1500
|
-
* });
|
|
1501
|
-
* ```
|
|
1502
|
-
*/
|
|
1503
|
-
setResult(result) {
|
|
1504
|
-
this.partialResult = result;
|
|
1505
|
-
}
|
|
1506
3145
|
/**
|
|
1507
3146
|
* Internal method to handle script step calls to agents.
|
|
1508
3147
|
*
|
|
@@ -1515,7 +3154,7 @@ var ScenarioExecution = class {
|
|
|
1515
3154
|
* - Progress to a new turn if no agent is available
|
|
1516
3155
|
* - Execute the agent with the provided content or let it generate content
|
|
1517
3156
|
* - Handle judgment requests for judge agents
|
|
1518
|
-
* -
|
|
3157
|
+
* - Set the result if the agent makes a decision
|
|
1519
3158
|
*
|
|
1520
3159
|
* @param role - The role of the agent to call (USER, AGENT, or JUDGE)
|
|
1521
3160
|
* @param content - Optional content to use instead of letting the agent generate it
|
|
@@ -1525,6 +3164,11 @@ var ScenarioExecution = class {
|
|
|
1525
3164
|
* @throws Error if no agent is found for the specified role
|
|
1526
3165
|
*/
|
|
1527
3166
|
async scriptCallAgent(role, content, judgmentRequest = false) {
|
|
3167
|
+
this.logger.debug(`[${this.config.id}] scriptCallAgent`, {
|
|
3168
|
+
role,
|
|
3169
|
+
hasContent: content !== void 0,
|
|
3170
|
+
judgmentRequest
|
|
3171
|
+
});
|
|
1528
3172
|
this.consumeUntilRole(role);
|
|
1529
3173
|
let index = -1;
|
|
1530
3174
|
let agent2 = null;
|
|
@@ -1569,11 +3213,8 @@ var ScenarioExecution = class {
|
|
|
1569
3213
|
this.broadcastMessage(message2, index);
|
|
1570
3214
|
return null;
|
|
1571
3215
|
}
|
|
1572
|
-
|
|
1573
|
-
|
|
1574
|
-
return result;
|
|
1575
|
-
}
|
|
1576
|
-
return null;
|
|
3216
|
+
await this.callAgent(index, role, judgmentRequest);
|
|
3217
|
+
return this.result ?? null;
|
|
1577
3218
|
}
|
|
1578
3219
|
/**
|
|
1579
3220
|
* Resets the scenario execution to its initial state.
|
|
@@ -1589,8 +3230,14 @@ var ScenarioExecution = class {
|
|
|
1589
3230
|
* - Starts the first turn
|
|
1590
3231
|
* - Records the start time for performance tracking
|
|
1591
3232
|
* - Clears any pending messages
|
|
3233
|
+
* - Clears the result from any previous execution
|
|
1592
3234
|
*/
|
|
1593
3235
|
reset() {
|
|
3236
|
+
this.logger.debug(`[${this.config.id}] Resetting scenario execution`);
|
|
3237
|
+
if (this.currentTurnSpan) {
|
|
3238
|
+
this.currentTurnSpan.end();
|
|
3239
|
+
this.currentTurnSpan = void 0;
|
|
3240
|
+
}
|
|
1594
3241
|
this.state = new ScenarioExecutionState(this.config);
|
|
1595
3242
|
this.state.threadId = this.config.threadId || generateThreadId();
|
|
1596
3243
|
this.setAgents(this.config.agents);
|
|
@@ -1598,6 +3245,11 @@ var ScenarioExecution = class {
|
|
|
1598
3245
|
this.state.currentTurn = 0;
|
|
1599
3246
|
this.totalStartTime = Date.now();
|
|
1600
3247
|
this.pendingMessages.clear();
|
|
3248
|
+
this._result = void 0;
|
|
3249
|
+
this.logger.debug(`[${this.config.id}] Reset complete`, {
|
|
3250
|
+
threadId: this.state.threadId,
|
|
3251
|
+
agentCount: this.agents.length
|
|
3252
|
+
});
|
|
1601
3253
|
}
|
|
1602
3254
|
nextAgentForRole(role) {
|
|
1603
3255
|
for (const agent2 of this.agents) {
|
|
@@ -1618,6 +3270,11 @@ var ScenarioExecution = class {
|
|
|
1618
3270
|
* multiple agent interactions as agents respond to each other's messages.
|
|
1619
3271
|
*/
|
|
1620
3272
|
newTurn() {
|
|
3273
|
+
const previousTurn = this.state.currentTurn;
|
|
3274
|
+
if (this.currentTurnSpan) {
|
|
3275
|
+
this.currentTurnSpan.end();
|
|
3276
|
+
this.currentTurnSpan = void 0;
|
|
3277
|
+
}
|
|
1621
3278
|
this.pendingAgentsOnTurn = new Set(this.agents);
|
|
1622
3279
|
this.pendingRolesOnTurn = [
|
|
1623
3280
|
"User" /* USER */,
|
|
@@ -1629,6 +3286,19 @@ var ScenarioExecution = class {
|
|
|
1629
3286
|
} else {
|
|
1630
3287
|
this.state.currentTurn++;
|
|
1631
3288
|
}
|
|
3289
|
+
this.logger.debug(`[${this.config.id}] New turn started`, {
|
|
3290
|
+
previousTurn,
|
|
3291
|
+
currentTurn: this.state.currentTurn,
|
|
3292
|
+
agentCount: this.agents.length
|
|
3293
|
+
});
|
|
3294
|
+
this.currentTurnSpan = this.tracer.startSpan("Scenario Turn", {
|
|
3295
|
+
attributes: {
|
|
3296
|
+
"scenario.name": this.config.name,
|
|
3297
|
+
"scenario.id": this.config.id,
|
|
3298
|
+
[import_observability3.attributes.ATTR_LANGWATCH_THREAD_ID]: this.state.threadId,
|
|
3299
|
+
"scenario.turn": this.state.currentTurn
|
|
3300
|
+
}
|
|
3301
|
+
});
|
|
1632
3302
|
}
|
|
1633
3303
|
removePendingRole(role) {
|
|
1634
3304
|
const index = this.pendingRolesOnTurn.indexOf(role);
|
|
@@ -1664,7 +3334,7 @@ var ScenarioExecution = class {
|
|
|
1664
3334
|
*
|
|
1665
3335
|
* This method is called when the scenario execution reaches the maximum number
|
|
1666
3336
|
* of turns without reaching a conclusion. It creates a failure result with
|
|
1667
|
-
* appropriate reasoning and includes performance metrics.
|
|
3337
|
+
* appropriate reasoning and includes performance metrics, then sets it on this.result.
|
|
1668
3338
|
*
|
|
1669
3339
|
* The result includes:
|
|
1670
3340
|
* - All messages from the conversation
|
|
@@ -1674,24 +3344,15 @@ var ScenarioExecution = class {
|
|
|
1674
3344
|
* - Total execution time and agent response times
|
|
1675
3345
|
*
|
|
1676
3346
|
* @param errorMessage - Optional custom error message to use instead of the default
|
|
1677
|
-
* @returns A ScenarioResult indicating failure due to reaching max turns
|
|
1678
3347
|
*/
|
|
1679
3348
|
reachedMaxTurns(errorMessage) {
|
|
1680
3349
|
var _a;
|
|
1681
|
-
|
|
1682
|
-
const agentTimes = agentRoleAgentsIdx.map(
|
|
1683
|
-
(i) => this.agentTimes.get(i) || 0
|
|
1684
|
-
);
|
|
1685
|
-
const totalAgentTime = agentTimes.reduce((sum, time) => sum + time, 0);
|
|
1686
|
-
return {
|
|
3350
|
+
this.setResult({
|
|
1687
3351
|
success: false,
|
|
1688
|
-
messages: this.state.messages,
|
|
1689
3352
|
reasoning: errorMessage || `Reached maximum turns (${this.config.maxTurns || 10}) without conclusion`,
|
|
1690
3353
|
metCriteria: [],
|
|
1691
|
-
unmetCriteria: ((_a = this.getJudgeAgent()) == null ? void 0 : _a.criteria) ?? []
|
|
1692
|
-
|
|
1693
|
-
agentTime: totalAgentTime
|
|
1694
|
-
};
|
|
3354
|
+
unmetCriteria: ((_a = this.getJudgeAgent()) == null ? void 0 : _a.criteria) ?? []
|
|
3355
|
+
});
|
|
1695
3356
|
}
|
|
1696
3357
|
getJudgeAgent() {
|
|
1697
3358
|
return this.agents.find((agent2) => agent2 instanceof JudgeAgentAdapter) ?? null;
|
|
@@ -1763,6 +3424,10 @@ var ScenarioExecution = class {
|
|
|
1763
3424
|
};
|
|
1764
3425
|
this.emitEvent(event);
|
|
1765
3426
|
this.eventSubject.complete();
|
|
3427
|
+
if (this.currentTurnSpan) {
|
|
3428
|
+
this.currentTurnSpan.end();
|
|
3429
|
+
this.currentTurnSpan = void 0;
|
|
3430
|
+
}
|
|
1766
3431
|
}
|
|
1767
3432
|
/**
|
|
1768
3433
|
* Distributes a message to all other agents in the scenario.
|
|
@@ -1794,13 +3459,20 @@ var ScenarioExecution = class {
|
|
|
1794
3459
|
* ```
|
|
1795
3460
|
*/
|
|
1796
3461
|
broadcastMessage(message2, fromAgentIdx) {
|
|
3462
|
+
const recipients = [];
|
|
1797
3463
|
for (let idx = 0; idx < this.agents.length; idx++) {
|
|
1798
3464
|
if (idx === fromAgentIdx) continue;
|
|
1799
3465
|
if (!this.pendingMessages.has(idx)) {
|
|
1800
3466
|
this.pendingMessages.set(idx, []);
|
|
1801
3467
|
}
|
|
1802
3468
|
this.pendingMessages.get(idx).push(message2);
|
|
3469
|
+
recipients.push(idx);
|
|
1803
3470
|
}
|
|
3471
|
+
this.logger.debug(`[${this.config.id}] Broadcast message`, {
|
|
3472
|
+
role: message2.role,
|
|
3473
|
+
fromAgentIdx,
|
|
3474
|
+
recipients
|
|
3475
|
+
});
|
|
1804
3476
|
}
|
|
1805
3477
|
/**
|
|
1806
3478
|
* Executes a single script step with proper error handling and logging.
|
|
@@ -1859,7 +3531,8 @@ function convertAgentReturnTypesToMessages(response, role) {
|
|
|
1859
3531
|
if (typeof response === "string")
|
|
1860
3532
|
return [{ role, content: response }];
|
|
1861
3533
|
if (Array.isArray(response)) return response;
|
|
1862
|
-
if (typeof response === "object" && "role" in response)
|
|
3534
|
+
if (response && typeof response === "object" && "role" in response)
|
|
3535
|
+
return [response];
|
|
1863
3536
|
return [];
|
|
1864
3537
|
}
|
|
1865
3538
|
function extractErrorInfo(error) {
|
|
@@ -1886,9 +3559,27 @@ __export(runner_exports, {
|
|
|
1886
3559
|
var import_rxjs3 = require("rxjs");
|
|
1887
3560
|
|
|
1888
3561
|
// src/events/event-alert-message-logger.ts
|
|
3562
|
+
var fs2 = __toESM(require("fs"));
|
|
3563
|
+
var os = __toESM(require("os"));
|
|
3564
|
+
var path2 = __toESM(require("path"));
|
|
1889
3565
|
var import_open = __toESM(require("open"));
|
|
1890
|
-
var EventAlertMessageLogger = class
|
|
1891
|
-
|
|
3566
|
+
var EventAlertMessageLogger = class {
|
|
3567
|
+
/**
|
|
3568
|
+
* Creates a coordination file to prevent duplicate messages across processes.
|
|
3569
|
+
* Returns true if this process should show the message (first one to create the file).
|
|
3570
|
+
*/
|
|
3571
|
+
createCoordinationFile(type) {
|
|
3572
|
+
try {
|
|
3573
|
+
const batchId = getBatchRunId();
|
|
3574
|
+
const tmpDir = os.tmpdir();
|
|
3575
|
+
const fileName = `scenario-${type}-${batchId}`;
|
|
3576
|
+
const filePath = path2.join(tmpDir, fileName);
|
|
3577
|
+
fs2.writeFileSync(filePath, process.pid.toString(), { flag: "wx" });
|
|
3578
|
+
return true;
|
|
3579
|
+
} catch {
|
|
3580
|
+
return false;
|
|
3581
|
+
}
|
|
3582
|
+
}
|
|
1892
3583
|
/**
|
|
1893
3584
|
* Shows a fancy greeting message about simulation reporting status.
|
|
1894
3585
|
* Only shows once per batch run to avoid spam.
|
|
@@ -1897,10 +3588,9 @@ var EventAlertMessageLogger = class _EventAlertMessageLogger {
|
|
|
1897
3588
|
if (this.isGreetingDisabled()) {
|
|
1898
3589
|
return;
|
|
1899
3590
|
}
|
|
1900
|
-
if (
|
|
3591
|
+
if (!this.createCoordinationFile("greeting")) {
|
|
1901
3592
|
return;
|
|
1902
3593
|
}
|
|
1903
|
-
_EventAlertMessageLogger.shownBatchIds.add(getBatchRunId());
|
|
1904
3594
|
this.displayGreeting();
|
|
1905
3595
|
}
|
|
1906
3596
|
/**
|
|
@@ -1911,6 +3601,9 @@ var EventAlertMessageLogger = class _EventAlertMessageLogger {
|
|
|
1911
3601
|
if (this.isGreetingDisabled()) {
|
|
1912
3602
|
return;
|
|
1913
3603
|
}
|
|
3604
|
+
if (!this.createCoordinationFile(`watch-${params.scenarioSetId}`)) {
|
|
3605
|
+
return;
|
|
3606
|
+
}
|
|
1914
3607
|
await this.displayWatchMessage(params);
|
|
1915
3608
|
}
|
|
1916
3609
|
isGreetingDisabled() {
|
|
@@ -1998,6 +3691,7 @@ var EventReporter = class {
|
|
|
1998
3691
|
} else {
|
|
1999
3692
|
const errorText = await response.text();
|
|
2000
3693
|
this.logger.error(`[${event.type}] Event POST failed:`, {
|
|
3694
|
+
endpoint: this.eventsEndpoint.href,
|
|
2001
3695
|
status: response.status,
|
|
2002
3696
|
statusText: response.statusText,
|
|
2003
3697
|
error: errorText,
|
|
@@ -2155,19 +3849,27 @@ var agent = (content) => {
|
|
|
2155
3849
|
return (_state, executor) => executor.agent(content);
|
|
2156
3850
|
};
|
|
2157
3851
|
var judge = (content) => {
|
|
2158
|
-
return (_state, executor) =>
|
|
3852
|
+
return async (_state, executor) => {
|
|
3853
|
+
await executor.judge(content);
|
|
3854
|
+
};
|
|
2159
3855
|
};
|
|
2160
3856
|
var user = (content) => {
|
|
2161
3857
|
return (_state, executor) => executor.user(content);
|
|
2162
3858
|
};
|
|
2163
3859
|
var proceed = (turns, onTurn, onStep) => {
|
|
2164
|
-
return (_state, executor) =>
|
|
3860
|
+
return async (_state, executor) => {
|
|
3861
|
+
await executor.proceed(turns, onTurn, onStep);
|
|
3862
|
+
};
|
|
2165
3863
|
};
|
|
2166
3864
|
var succeed = (reasoning) => {
|
|
2167
|
-
return (_state, executor) =>
|
|
3865
|
+
return async (_state, executor) => {
|
|
3866
|
+
await executor.succeed(reasoning);
|
|
3867
|
+
};
|
|
2168
3868
|
};
|
|
2169
3869
|
var fail = (reasoning) => {
|
|
2170
|
-
return (_state, executor) =>
|
|
3870
|
+
return async (_state, executor) => {
|
|
3871
|
+
await executor.fail(reasoning);
|
|
3872
|
+
};
|
|
2171
3873
|
};
|
|
2172
3874
|
|
|
2173
3875
|
// src/runner/run.ts
|
|
@@ -2178,7 +3880,7 @@ async function run(cfg) {
|
|
|
2178
3880
|
if (!cfg.description) {
|
|
2179
3881
|
throw new Error("Scenario description is required");
|
|
2180
3882
|
}
|
|
2181
|
-
if (
|
|
3883
|
+
if (cfg.maxTurns && cfg.maxTurns < 1) {
|
|
2182
3884
|
throw new Error("Max turns must be at least 1");
|
|
2183
3885
|
}
|
|
2184
3886
|
if (cfg.agents.length === 0) {
|
|
@@ -2200,10 +3902,10 @@ async function run(cfg) {
|
|
|
2200
3902
|
let eventBus = null;
|
|
2201
3903
|
let subscription = null;
|
|
2202
3904
|
try {
|
|
2203
|
-
const
|
|
3905
|
+
const envConfig2 = getEnv();
|
|
2204
3906
|
eventBus = new EventBus({
|
|
2205
|
-
endpoint:
|
|
2206
|
-
apiKey:
|
|
3907
|
+
endpoint: envConfig2.LANGWATCH_ENDPOINT,
|
|
3908
|
+
apiKey: envConfig2.LANGWATCH_API_KEY
|
|
2207
3909
|
});
|
|
2208
3910
|
eventBus.listen();
|
|
2209
3911
|
subscription = eventBus.subscribeTo(execution.events$);
|
|
@@ -2254,14 +3956,13 @@ function formatPart(part) {
|
|
|
2254
3956
|
case "file":
|
|
2255
3957
|
return `(file): ${part.filename} ${typeof part.data === "string" ? `url:${part.data}` : "base64:omitted"}`;
|
|
2256
3958
|
case "tool-call":
|
|
2257
|
-
return `(tool call): ${part.toolName} id:${part.toolCallId} args:(${JSON.stringify(part.
|
|
3959
|
+
return `(tool call): ${part.toolName} id:${part.toolCallId} args:(${JSON.stringify(part.input)})`;
|
|
2258
3960
|
case "tool-result":
|
|
2259
|
-
return `(tool result): ${part.toolName} id:${part.toolCallId} result:(${JSON.stringify(part.
|
|
3961
|
+
return `(tool result): ${part.toolName} id:${part.toolCallId} result:(${JSON.stringify(part.output)})`;
|
|
2260
3962
|
case "reasoning":
|
|
2261
3963
|
return `(reasoning): ${part.text}`;
|
|
2262
|
-
case "redacted-reasoning":
|
|
2263
|
-
return `(redacted reasoning): ${part.data}`;
|
|
2264
3964
|
default:
|
|
3965
|
+
part;
|
|
2265
3966
|
return `Unknown content: ${JSON.stringify(part)}`;
|
|
2266
3967
|
}
|
|
2267
3968
|
}
|
|
@@ -2280,9 +3981,11 @@ var index_default = scenario;
|
|
|
2280
3981
|
AgentAdapter,
|
|
2281
3982
|
AgentRole,
|
|
2282
3983
|
DEFAULT_MAX_TURNS,
|
|
2283
|
-
DEFAULT_TEMPERATURE,
|
|
2284
3984
|
DEFAULT_VERBOSE,
|
|
2285
3985
|
JudgeAgentAdapter,
|
|
3986
|
+
JudgeSpanCollector,
|
|
3987
|
+
JudgeSpanDigestFormatter,
|
|
3988
|
+
RealtimeAgentAdapter,
|
|
2286
3989
|
ScenarioExecution,
|
|
2287
3990
|
ScenarioExecutionState,
|
|
2288
3991
|
StateChangeEventType,
|
|
@@ -2293,6 +3996,8 @@ var index_default = scenario;
|
|
|
2293
3996
|
fail,
|
|
2294
3997
|
judge,
|
|
2295
3998
|
judgeAgent,
|
|
3999
|
+
judgeSpanCollector,
|
|
4000
|
+
judgeSpanDigestFormatter,
|
|
2296
4001
|
message,
|
|
2297
4002
|
proceed,
|
|
2298
4003
|
run,
|