@elizaos/cli 1.4.4 → 1.4.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. package/dist/BrowserWebSocketTransport-5YQPVDV7.js +7 -0
  2. package/dist/EnhancedEvaluationEngine-APOQ6INN.js +473 -0
  3. package/dist/EvaluationEngine-Y7ZQJBRC.js +9 -0
  4. package/dist/LocalEnvironmentProvider-JWFGG4IN.js +15 -0
  5. package/dist/NodeWebSocketTransport-PUO724EY.js +8 -0
  6. package/dist/ScreenRecorder-YK246DNJ.js +10 -0
  7. package/dist/agent-start-6QJQAMKA.js +13 -0
  8. package/dist/bidi-2SVNH6F7.js +15309 -0
  9. package/dist/{bun-exec-ULMPAIQC.js → bun-exec-NH4UCUY4.js} +1 -1
  10. package/dist/chunk-2ESYSVXG.js +48 -0
  11. package/dist/chunk-3AEYIKBZ.js +432 -0
  12. package/dist/chunk-5IWKEMEF.js +239 -0
  13. package/dist/chunk-5WZO2HMM.js +2644 -0
  14. package/dist/chunk-ABGBVB74.js +3501 -0
  15. package/dist/{chunk-NSNXXD3I.js → chunk-BCO32GR6.js} +2 -2
  16. package/dist/chunk-CGXTFHQP.js +25 -0
  17. package/dist/chunk-EXUFDTUD.js +3948 -0
  18. package/dist/chunk-FGGNHEXZ.js +211860 -0
  19. package/dist/chunk-FWYHSCLF.js +243 -0
  20. package/dist/chunk-I57T3WPO.js +165 -0
  21. package/dist/chunk-LBZLMFFF.js +221 -0
  22. package/dist/chunk-LG7YDBMV.js +401 -0
  23. package/dist/chunk-NHKLUXNE.js +166 -0
  24. package/dist/chunk-PUZHCSGF.js +828 -0
  25. package/dist/chunk-PWDR7CPA.js +7828 -0
  26. package/dist/{chunk-N5G5XSGP.js → chunk-Q6M2K53X.js} +3 -3
  27. package/dist/chunk-SVHCNBHM.js +289 -0
  28. package/dist/{chunk-HOC6B3QV.js → chunk-VFFOOPYS.js} +4 -238
  29. package/dist/chunk-WX37MM4G.js +292 -0
  30. package/dist/chunk-XFJIHUT3.js +6 -0
  31. package/dist/chunk-XPPESCCM.js +787 -0
  32. package/dist/chunk-YBDC5OZO.js +40 -0
  33. package/dist/commands/agent/actions/index.js +2 -2
  34. package/dist/commands/agent/index.js +2 -2
  35. package/dist/commands/create/actions/index.js +4 -3
  36. package/dist/commands/create/index.js +5 -4
  37. package/dist/commands/shared/index.js +1 -1
  38. package/dist/index.js +66796 -4986
  39. package/dist/js-yaml-KADNMPWR.js +35 -0
  40. package/dist/matrix-orchestrator-3WLRK7GG.js +1070 -0
  41. package/dist/matrix-runner-KDPETCKQ.js +160 -0
  42. package/dist/matrix-schema-PCO2KGJY.js +102 -0
  43. package/dist/parameter-override-ALOPPXCE.js +487 -0
  44. package/dist/{plugin-creator-TCUFII32.js → plugin-creator-J7GNPMPG.js} +1 -1
  45. package/dist/process-manager-IU2A3BTQ.js +9 -0
  46. package/dist/{registry-ELONUC44.js → registry-65KMEA7N.js} +2 -2
  47. package/dist/resource-monitor-EHZSH2P6.js +15 -0
  48. package/dist/run-isolation-PGLZ37Y7.js +29 -0
  49. package/dist/runtime-factory-Q4U5YBNV.js +22 -0
  50. package/dist/schema-C25LVPEK.js +17 -0
  51. package/dist/src/commands/report/src/assets/report_template.html +1704 -0
  52. package/dist/src-EJG4ILDC.js +5 -0
  53. package/dist/templates/plugin-quick-starter/package.json +2 -2
  54. package/dist/templates/plugin-starter/package.json +2 -2
  55. package/dist/templates/project-starter/package.json +4 -4
  56. package/dist/templates/project-tee-starter/package.json +4 -4
  57. package/dist/typescript-ZF3IK2DJ.js +5 -0
  58. package/dist/{utils-X6UXPLKD.js → utils-QFD2PW4X.js} +2 -2
  59. package/package.json +14 -8
  60. package/templates/plugin-quick-starter/package.json +2 -2
  61. package/templates/plugin-starter/package.json +2 -2
  62. package/templates/project-starter/package.json +4 -4
  63. package/templates/project-tee-starter/package.json +4 -4
  64. package/dist/chunk-3RG5ZIWI.js +0 -10
@@ -0,0 +1,401 @@
1
+ import {
2
+ askAgentViaApi
3
+ } from "./chunk-PUZHCSGF.js";
4
+ import {
5
+ bunExec
6
+ } from "./chunk-I4L4T7QX.js";
7
+
8
+ // src/commands/scenario/src/TrajectoryReconstructor.ts
9
+ var TrajectoryReconstructor = class {
10
+ runtime;
11
+ constructor(runtime) {
12
+ this.runtime = runtime;
13
+ }
14
+ /**
15
+ * Reconstruct trajectory from memories (using same approach as TrajectoryContainsActionEvaluator)
16
+ */
17
+ async reconstructTrajectory(roomId, timeWindowMs = 3e4) {
18
+ const endTime = Date.now();
19
+ const startTime = endTime - timeWindowMs;
20
+ const allMemories = await this.runtime.getMemories({
21
+ tableName: "messages",
22
+ agentId: this.runtime.agentId,
23
+ count: 100,
24
+ unique: false
25
+ });
26
+ console.log(`
27
+ \u{1F50D} [TrajectoryReconstructor] ===== MEMORY ANALYSIS START =====`);
28
+ console.log(
29
+ `\u{1F50D} [TrajectoryReconstructor] Found ${allMemories.length} total memories for agent`
30
+ );
31
+ console.log(`\u{1F50D} [TrajectoryReconstructor] All roomIds found in memories:`);
32
+ const uniqueRoomIds = [...new Set(allMemories.map((m) => m.roomId).filter(Boolean))];
33
+ uniqueRoomIds.forEach((rId, i) => {
34
+ const count = allMemories.filter((m) => m.roomId === rId).length;
35
+ console.log(` ${i + 1}. ${rId} (${count} memories)`);
36
+ });
37
+ console.log(`\u{1F50D} [TrajectoryReconstructor] Original roomId: ${roomId}`);
38
+ const actualRoomId = uniqueRoomIds.length > 0 ? uniqueRoomIds[0] : roomId;
39
+ if (actualRoomId !== roomId) {
40
+ console.log(
41
+ `\u{1F527} [TrajectoryReconstructor] ROOMID MISMATCH DETECTED - Using actual roomId: ${actualRoomId}`
42
+ );
43
+ }
44
+ const memories = allMemories.filter((mem) => mem && mem.roomId === actualRoomId);
45
+ console.log(
46
+ `\u{1F50D} [TrajectoryReconstructor] Found ${memories.length} memories using actual roomId`
47
+ );
48
+ memories.forEach((mem, index) => {
49
+ console.log(`
50
+ --- Memory ${index + 1}/${memories.length} ---`);
51
+ console.log(`ID: ${mem.id}`);
52
+ console.log(`CreatedAt: ${mem.createdAt} (${new Date(mem.createdAt || 0).toISOString()})`);
53
+ console.log(`Type: ${mem.type || "undefined"}`);
54
+ console.log(`Content Type: ${typeof mem.content}`);
55
+ if (mem.content && typeof mem.content === "object") {
56
+ console.log(`Content.type: ${mem.content?.type}`);
57
+ console.log(`Content keys:`, Object.keys(mem.content));
58
+ if (mem.content?.type === "action_result") {
59
+ console.log(
60
+ `\u{1F3AF} FOUND ACTION_RESULT - FULL CONTENT:`,
61
+ JSON.stringify(mem.content, null, 2)
62
+ );
63
+ } else if (mem.content?.type === "user" || mem.content?.type === "agent") {
64
+ console.log(
65
+ `\u{1F4AC} MESSAGE CONTENT:`,
66
+ JSON.stringify(
67
+ {
68
+ type: mem.content.type,
69
+ text: mem.content.text,
70
+ content: mem.content.content
71
+ },
72
+ null,
73
+ 2
74
+ )
75
+ );
76
+ } else {
77
+ console.log(`\u{1F4CB} OTHER CONTENT:`, JSON.stringify(mem.content, null, 2));
78
+ }
79
+ } else {
80
+ console.log(`Raw Content:`, mem.content);
81
+ }
82
+ });
83
+ console.log(`\u{1F50D} [TrajectoryReconstructor] ===== MEMORY ANALYSIS END =====
84
+ `);
85
+ const actionMemories = memories.filter(
86
+ (mem) => mem && typeof mem.content === "object" && mem.content?.type === "action_result"
87
+ );
88
+ const steps = [];
89
+ const runIds = /* @__PURE__ */ new Set();
90
+ console.log(
91
+ `\u{1F3AF} [TrajectoryReconstructor] Processing ${actionMemories.length} action memories...`
92
+ );
93
+ for (const memory of actionMemories) {
94
+ const content = memory.content;
95
+ console.log(`
96
+ \u{1F504} Processing action memory ${memory.id}...`);
97
+ console.log(` actionName: ${content?.actionName}`);
98
+ console.log(` actionParams:`, content?.actionParams);
99
+ console.log(` actionResult:`, content?.actionResult);
100
+ console.log(` thought:`, content?.thought);
101
+ console.log(` planThought:`, content?.planThought);
102
+ console.log(` actionStatus:`, content?.actionStatus);
103
+ const actionName = content?.actionName || "unknown";
104
+ const actionParams = content?.actionParams || {};
105
+ const actionResult = content?.actionResult || {};
106
+ const thought = content?.thought || content?.planThought || "";
107
+ let observationContent = "";
108
+ if (actionResult?.text) {
109
+ observationContent = actionResult.text;
110
+ } else if (actionResult?.stdout) {
111
+ observationContent = actionResult.stdout;
112
+ } else if (actionResult?.output) {
113
+ observationContent = actionResult.output;
114
+ } else if (typeof actionResult === "string") {
115
+ observationContent = actionResult;
116
+ } else if (actionResult && typeof actionResult === "object") {
117
+ observationContent = JSON.stringify(actionResult);
118
+ }
119
+ console.log(
120
+ ` \u{1F4CB} Extracted observation (${observationContent.length} chars):`,
121
+ observationContent.substring(0, 200)
122
+ );
123
+ const timestamp = new Date(memory.createdAt || Date.now()).toISOString();
124
+ if (thought && thought.trim()) {
125
+ const thoughtStep = {
126
+ type: "thought",
127
+ timestamp,
128
+ content: thought
129
+ };
130
+ steps.push(thoughtStep);
131
+ console.log(` \u{1F4AD} Created thought step:`, JSON.stringify(thoughtStep, null, 2));
132
+ }
133
+ const actionStep = {
134
+ type: "action",
135
+ timestamp,
136
+ content: {
137
+ name: actionName,
138
+ parameters: actionParams
139
+ }
140
+ };
141
+ steps.push(actionStep);
142
+ console.log(` \u26A1 Created action step:`, JSON.stringify(actionStep, null, 2));
143
+ const observationStep = {
144
+ type: "observation",
145
+ timestamp,
146
+ content: observationContent
147
+ };
148
+ steps.push(observationStep);
149
+ console.log(` \u{1F441}\uFE0F Created observation step:`, JSON.stringify(observationStep, null, 2));
150
+ }
151
+ steps.sort((a, b) => a.timestamp.localeCompare(b.timestamp));
152
+ return {
153
+ steps,
154
+ runId: runIds.size === 1 ? Array.from(runIds)[0] : void 0,
155
+ startTime,
156
+ endTime,
157
+ totalSteps: steps.length
158
+ };
159
+ }
160
+ /**
161
+ * Get latest trajectory for a room (convenience method) with retry logic
162
+ */
163
+ async getLatestTrajectory(roomId) {
164
+ console.log(`\u{1F50D} [TrajectoryReconstructor] Starting reconstruction for room: ${roomId}`);
165
+ const maxRetries = 3;
166
+ const retryDelayMs = 2e3;
167
+ for (let attempt = 1; attempt <= maxRetries; attempt++) {
168
+ console.log(`
169
+ \u{1F504} [TrajectoryReconstructor] ===== ATTEMPT ${attempt}/${maxRetries} =====`);
170
+ const trajectory = await this.reconstructTrajectory(roomId, 3e4);
171
+ console.log(
172
+ `\u{1F4CA} [TrajectoryReconstructor] Found ${trajectory.steps.length} trajectory steps on attempt ${attempt}`
173
+ );
174
+ console.log(
175
+ `\u{1F4CA} [TrajectoryReconstructor] Time window: ${trajectory.startTime} - ${trajectory.endTime}`
176
+ );
177
+ if (trajectory.steps.length > 0) {
178
+ console.log(
179
+ `\u2705 [TrajectoryReconstructor] SUCCESS on attempt ${attempt}: Found ${trajectory.steps.length} trajectory steps`
180
+ );
181
+ console.log(
182
+ `\u{1F4CA} [TrajectoryReconstructor] Actions found:`,
183
+ trajectory.steps.filter((s) => s.type === "action").map((s) => s.content.name)
184
+ );
185
+ console.log(
186
+ `\u{1F4CA} [TrajectoryReconstructor] First step sample:`,
187
+ JSON.stringify(trajectory.steps[0], null, 2)
188
+ );
189
+ console.log(`\u{1F4CA} [TrajectoryReconstructor] ===== SUCCESS END =====
190
+ `);
191
+ return trajectory.steps;
192
+ }
193
+ console.log(
194
+ `\u26A0\uFE0F [TrajectoryReconstructor] Attempt ${attempt} found 0 steps. ${attempt < maxRetries ? "Retrying..." : "Final attempt failed."}`
195
+ );
196
+ if (attempt < maxRetries) {
197
+ console.log(`\u23F3 [TrajectoryReconstructor] Waiting ${retryDelayMs}ms before retry...`);
198
+ await new Promise((resolve) => setTimeout(resolve, retryDelayMs));
199
+ }
200
+ }
201
+ console.log(
202
+ `\u274C [TrajectoryReconstructor] All ${maxRetries} attempts failed - returning empty trajectory`
203
+ );
204
+ console.log(`\u{1F4CA} [TrajectoryReconstructor] ===== FINAL FAILURE =====
205
+ `);
206
+ return [];
207
+ }
208
+ };
209
+
210
+ // src/commands/scenario/src/LocalEnvironmentProvider.ts
211
+ import fs from "fs/promises";
212
+ import path from "path";
213
+ import os from "os";
214
+ var LocalEnvironmentProvider = class {
215
+ tempDir = null;
216
+ server = null;
217
+ agentId = null;
218
+ runtime = null;
219
+ serverPort = null;
220
+ trajectoryReconstructor = null;
221
+ constructor(server, agentId, runtime, serverPort) {
222
+ this.server = server ?? null;
223
+ this.agentId = agentId ?? null;
224
+ this.runtime = runtime ?? null;
225
+ this.serverPort = serverPort ?? null;
226
+ this.trajectoryReconstructor = runtime ? new TrajectoryReconstructor(runtime) : null;
227
+ console.log(`\u{1F527} [DEBUG] LocalEnvironmentProvider CONSTRUCTOR:`);
228
+ console.log(`\u{1F527} [DEBUG] - Server: ${server ? "present" : "null"}`);
229
+ console.log(`\u{1F527} [DEBUG] - Agent ID: ${agentId}`);
230
+ console.log(`\u{1F527} [DEBUG] - Runtime: ${runtime ? "present" : "null"}`);
231
+ console.log(`\u{1F527} [DEBUG] - Server Port: ${serverPort}`);
232
+ }
233
+ async setup(scenario) {
234
+ const tempDirPrefix = path.join(os.tmpdir(), "eliza-scenario-run-");
235
+ this.tempDir = await fs.mkdtemp(tempDirPrefix);
236
+ const virtualFs = scenario.setup?.virtual_fs;
237
+ if (virtualFs) {
238
+ for (const [filePath, content] of Object.entries(virtualFs)) {
239
+ const fullPath = path.join(this.tempDir, filePath);
240
+ await fs.mkdir(path.dirname(fullPath), { recursive: true });
241
+ await fs.writeFile(fullPath, content);
242
+ }
243
+ }
244
+ }
245
+ async captureFileSystem() {
246
+ if (!this.tempDir) {
247
+ return {};
248
+ }
249
+ const files = {};
250
+ try {
251
+ const readDirRecursive = async (dirPath, basePath = "") => {
252
+ const entries = await fs.readdir(dirPath, { withFileTypes: true });
253
+ for (const entry of entries) {
254
+ const fullPath = path.join(dirPath, entry.name);
255
+ const relativePath = path.join(basePath, entry.name);
256
+ if (entry.isDirectory()) {
257
+ await readDirRecursive(fullPath, relativePath);
258
+ } else if (entry.isFile()) {
259
+ try {
260
+ const content = await fs.readFile(fullPath, "utf-8");
261
+ files[relativePath] = content;
262
+ } catch (error) {
263
+ files[relativePath] = "[binary or unreadable]";
264
+ }
265
+ }
266
+ }
267
+ };
268
+ await readDirRecursive(this.tempDir);
269
+ return files;
270
+ } catch (error) {
271
+ console.warn("Failed to capture file system state:", error);
272
+ return {};
273
+ }
274
+ }
275
+ async run(scenario) {
276
+ if (!this.tempDir) {
277
+ throw new Error("Setup must be called before run.");
278
+ }
279
+ const results = [];
280
+ for (const step of scenario.run) {
281
+ const startedAtMs = Date.now();
282
+ if (step.input) {
283
+ if (!this.server || !this.agentId) {
284
+ throw new Error(
285
+ "LocalEnvironmentProvider requires a pre-created server and agent for NL input"
286
+ );
287
+ }
288
+ const { response, roomId } = await askAgentViaApi(
289
+ this.server,
290
+ this.agentId,
291
+ step.input,
292
+ 3e4,
293
+ // timeout
294
+ this.serverPort
295
+ // Pass the actual server port
296
+ );
297
+ await new Promise((resolve) => setTimeout(resolve, 3e3));
298
+ const trajectory = this.trajectoryReconstructor && roomId ? await this.trajectoryReconstructor.getLatestTrajectory(roomId) : [];
299
+ console.log(`\u{1F50D} [Trajectory Debug] Room ID: ${roomId}, Steps found: ${trajectory.length}`);
300
+ if (trajectory.length > 0) {
301
+ console.log(`\u{1F4CA} [Trajectory Debug] First step:`, JSON.stringify(trajectory[0], null, 2));
302
+ }
303
+ const endedAtMs = Date.now();
304
+ const durationMs = endedAtMs - startedAtMs;
305
+ results.push({
306
+ exitCode: 0,
307
+ stdout: response,
308
+ stderr: "",
309
+ files: await this.captureFileSystem(),
310
+ startedAtMs,
311
+ endedAtMs,
312
+ durationMs,
313
+ trajectory
314
+ // Add trajectory to execution result
315
+ });
316
+ } else if (step.code) {
317
+ let execCommand;
318
+ let execArgs;
319
+ switch (step.lang) {
320
+ case "bash":
321
+ case "sh":
322
+ execCommand = "sh";
323
+ execArgs = ["-c", step.code];
324
+ break;
325
+ case "node":
326
+ case "javascript":
327
+ execCommand = "node";
328
+ execArgs = ["-e", step.code];
329
+ break;
330
+ case "python":
331
+ case "python3":
332
+ execCommand = "python3";
333
+ execArgs = ["-c", step.code];
334
+ break;
335
+ default:
336
+ execCommand = step.lang;
337
+ execArgs = ["-c", step.code];
338
+ break;
339
+ }
340
+ try {
341
+ const result = await bunExec(execCommand, execArgs, { cwd: this.tempDir });
342
+ const { stdout, stderr } = result;
343
+ const files = await this.captureFileSystem();
344
+ const endedAtMs = Date.now();
345
+ const durationMs = endedAtMs - startedAtMs;
346
+ results.push({
347
+ exitCode: result.exitCode || 0,
348
+ stdout,
349
+ stderr,
350
+ files,
351
+ startedAtMs,
352
+ endedAtMs,
353
+ durationMs
354
+ });
355
+ } catch (error) {
356
+ const files = await this.captureFileSystem();
357
+ const endedAtMs = Date.now();
358
+ const durationMs = endedAtMs - startedAtMs;
359
+ let exitCode = 1;
360
+ let stderr = "";
361
+ let stdout = "";
362
+ if (error.exitCode !== void 0) {
363
+ exitCode = error.exitCode;
364
+ }
365
+ if (error.stderr) {
366
+ stderr = error.stderr;
367
+ }
368
+ if (error.stdout) {
369
+ stdout = error.stdout;
370
+ }
371
+ if (!stderr && error.message) {
372
+ stderr = error.message;
373
+ }
374
+ results.push({
375
+ exitCode,
376
+ stdout,
377
+ stderr,
378
+ files,
379
+ startedAtMs,
380
+ endedAtMs,
381
+ durationMs
382
+ });
383
+ }
384
+ } else {
385
+ throw new Error("Step must have either input or code");
386
+ }
387
+ }
388
+ return results;
389
+ }
390
+ async teardown() {
391
+ if (this.tempDir) {
392
+ await fs.rm(this.tempDir, { recursive: true, force: true });
393
+ this.tempDir = null;
394
+ }
395
+ }
396
+ };
397
+
398
+ export {
399
+ TrajectoryReconstructor,
400
+ LocalEnvironmentProvider
401
+ };
@@ -0,0 +1,166 @@
1
+ // src/commands/scenario/src/schema.ts
2
+ import { z } from "zod";
3
+ var EnhancedEvaluationResultSchema = z.object({
4
+ evaluator_type: z.string(),
5
+ success: z.boolean(),
6
+ summary: z.string(),
7
+ details: z.record(z.any())
8
+ });
9
+ var CapabilityCheckSchema = z.object({
10
+ capability: z.string(),
11
+ achieved: z.boolean(),
12
+ reasoning: z.string()
13
+ });
14
+ var LLMJudgeResultSchema = z.object({
15
+ qualitative_summary: z.string(),
16
+ capability_checklist: z.array(CapabilityCheckSchema)
17
+ });
18
+ var BaseEvaluationSchema = z.object({
19
+ type: z.string()
20
+ });
21
+ var StringContainsEvaluationSchema = BaseEvaluationSchema.extend({
22
+ type: z.literal("string_contains"),
23
+ value: z.string(),
24
+ case_sensitive: z.boolean().optional()
25
+ });
26
+ var RegexMatchEvaluationSchema = BaseEvaluationSchema.extend({
27
+ type: z.literal("regex_match"),
28
+ pattern: z.string()
29
+ });
30
+ var FileExistsEvaluationSchema = BaseEvaluationSchema.extend({
31
+ type: z.literal("file_exists"),
32
+ path: z.string()
33
+ });
34
+ var TrajectoryContainsActionSchema = BaseEvaluationSchema.extend({
35
+ type: z.literal("trajectory_contains_action"),
36
+ action: z.string()
37
+ });
38
+ var LLMJudgeEvaluationSchema = BaseEvaluationSchema.extend({
39
+ type: z.literal("llm_judge"),
40
+ prompt: z.string(),
41
+ expected: z.string(),
42
+ model_type: z.string().optional(),
43
+ temperature: z.number().min(0).max(2).optional(),
44
+ json_schema: z.record(z.any()).optional(),
45
+ // JSON schema object for response validation
46
+ capabilities: z.array(z.string()).min(1, "Capabilities array must not be empty").optional()
47
+ // Custom capabilities for evaluation
48
+ });
49
+ var ExecutionTimeEvaluationSchema = BaseEvaluationSchema.extend({
50
+ type: z.literal("execution_time"),
51
+ max_duration_ms: z.number(),
52
+ min_duration_ms: z.number().optional(),
53
+ target_duration_ms: z.number().optional()
54
+ });
55
+ var EvaluationSchema = z.discriminatedUnion("type", [
56
+ StringContainsEvaluationSchema,
57
+ RegexMatchEvaluationSchema,
58
+ FileExistsEvaluationSchema,
59
+ TrajectoryContainsActionSchema,
60
+ LLMJudgeEvaluationSchema,
61
+ ExecutionTimeEvaluationSchema
62
+ ]);
63
+ var MockSchema = z.object({
64
+ service: z.string().optional(),
65
+ method: z.string(),
66
+ // Enhanced 'when' clause with multiple matching strategies
67
+ when: z.object({
68
+ // Exact argument matching (existing)
69
+ args: z.array(z.any()).optional(),
70
+ // Input parameter matching (extracted from args)
71
+ input: z.record(z.any()).optional(),
72
+ // Request context matching
73
+ context: z.record(z.any()).optional(),
74
+ // Custom JavaScript matcher function
75
+ matcher: z.string().optional(),
76
+ // Partial argument matching
77
+ partialArgs: z.array(z.any()).optional()
78
+ }).optional(),
79
+ // Static response (existing)
80
+ response: z.any(),
81
+ // Dynamic response generation
82
+ responseFn: z.string().optional(),
83
+ // Error simulation
84
+ error: z.object({
85
+ code: z.string(),
86
+ message: z.string(),
87
+ status: z.number().optional()
88
+ }).optional(),
89
+ // Response metadata
90
+ metadata: z.object({
91
+ delay: z.number().optional(),
92
+ // Simulate network delay
93
+ probability: z.number().min(0).max(1).optional()
94
+ // Random failure
95
+ }).optional()
96
+ });
97
+ var PluginConfigSchema = z.object({
98
+ name: z.string(),
99
+ version: z.string().optional(),
100
+ config: z.record(z.any()).optional(),
101
+ enabled: z.boolean().optional().default(true)
102
+ });
103
+ var PluginReferenceSchema = z.union([
104
+ z.string(),
105
+ // Simple string reference
106
+ PluginConfigSchema
107
+ // Full configuration object
108
+ ]);
109
+ var SetupSchema = z.object({
110
+ mocks: z.array(MockSchema).optional(),
111
+ virtual_fs: z.record(z.string()).optional()
112
+ });
113
+ var RunStepSchema = z.object({
114
+ name: z.string().optional(),
115
+ lang: z.string().optional(),
116
+ code: z.string().optional(),
117
+ input: z.string().optional(),
118
+ // Natural language input to agent
119
+ evaluations: z.array(EvaluationSchema)
120
+ });
121
+ var JudgmentSchema = z.object({
122
+ strategy: z.enum(["all_pass", "any_pass"])
123
+ });
124
+ var ScenarioSchema = z.object({
125
+ name: z.string(),
126
+ description: z.string(),
127
+ plugins: z.array(PluginReferenceSchema).optional(),
128
+ environment: z.object({
129
+ type: z.enum(["e2b", "local"])
130
+ }),
131
+ setup: SetupSchema.optional(),
132
+ run: z.array(RunStepSchema),
133
+ judgment: JudgmentSchema
134
+ });
135
+ var ScenarioRunResultSchema = z.object({
136
+ run_id: z.string().min(1, "Run ID cannot be empty"),
137
+ matrix_combination_id: z.string().min(1, "Matrix combination ID cannot be empty"),
138
+ parameters: z.record(z.any()),
139
+ metrics: z.object({
140
+ execution_time_seconds: z.number().min(0),
141
+ llm_calls: z.number().int().min(0),
142
+ total_tokens: z.number().int().min(0)
143
+ }).catchall(z.number()),
144
+ // Allow additional numeric metrics
145
+ final_agent_response: z.string().optional(),
146
+ evaluations: z.array(EnhancedEvaluationResultSchema),
147
+ trajectory: z.array(
148
+ z.object({
149
+ type: z.enum(["thought", "action", "observation"]),
150
+ timestamp: z.string().refine((val) => !isNaN(Date.parse(val)), {
151
+ message: "Timestamp must be a valid ISO string"
152
+ }),
153
+ content: z.any()
154
+ })
155
+ ),
156
+ error: z.string().nullable()
157
+ });
158
+
159
+ export {
160
+ EnhancedEvaluationResultSchema,
161
+ CapabilityCheckSchema,
162
+ LLMJudgeResultSchema,
163
+ EvaluationSchema,
164
+ ScenarioSchema,
165
+ ScenarioRunResultSchema
166
+ };