@elizaos/cli 1.6.0-alpha.4 → 1.6.0-beta.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (29) hide show
  1. package/dist/commands/agent/utils/validation.d.ts +31 -3
  2. package/dist/commands/agent/utils/validation.d.ts.map +1 -1
  3. package/dist/commands/create/types.d.ts +8 -7
  4. package/dist/commands/create/types.d.ts.map +1 -1
  5. package/dist/commands/create/utils/validation.d.ts +2 -2
  6. package/dist/commands/create/utils/validation.d.ts.map +1 -1
  7. package/dist/commands/report/src/report-schema.d.ts +167 -11
  8. package/dist/commands/report/src/report-schema.d.ts.map +1 -1
  9. package/dist/commands/scenario/src/MockEngine.d.ts +11 -11
  10. package/dist/commands/scenario/src/matrix-schema.d.ts +70 -5
  11. package/dist/commands/scenario/src/matrix-schema.d.ts.map +1 -1
  12. package/dist/commands/scenario/src/schema.d.ts +2007 -175
  13. package/dist/commands/scenario/src/schema.d.ts.map +1 -1
  14. package/dist/index.js +15087 -23793
  15. package/dist/index.js.map +5 -73
  16. package/dist/templates/plugin-quick-starter/package.json +4 -4
  17. package/dist/templates/plugin-starter/package.json +4 -4
  18. package/dist/templates/project-starter/package.json +7 -7
  19. package/dist/templates/project-tee-starter/package.json +5 -5
  20. package/dist/utils/get-config.d.ts +194 -27
  21. package/dist/utils/get-config.d.ts.map +1 -1
  22. package/dist/version.d.ts +2 -2
  23. package/dist/version.d.ts.map +1 -1
  24. package/dist/version.js +2 -2
  25. package/package.json +8 -9
  26. package/templates/plugin-quick-starter/package.json +4 -4
  27. package/templates/plugin-starter/package.json +4 -4
  28. package/templates/project-starter/package.json +7 -7
  29. package/templates/project-tee-starter/package.json +5 -5
@@ -19,197 +19,612 @@ export declare const EnhancedEvaluationResultSchema: z.ZodObject<{
19
19
  success: z.ZodBoolean;
20
20
  summary: z.ZodString;
21
21
  details: z.ZodRecord<z.ZodString, z.ZodAny>;
22
- }, z.core.$strip>;
22
+ }, "strip", z.ZodTypeAny, {
23
+ success: boolean;
24
+ evaluator_type: string;
25
+ summary: string;
26
+ details: Record<string, any>;
27
+ }, {
28
+ success: boolean;
29
+ evaluator_type: string;
30
+ summary: string;
31
+ details: Record<string, any>;
32
+ }>;
23
33
  export declare const CapabilityCheckSchema: z.ZodObject<{
24
34
  capability: z.ZodString;
25
35
  achieved: z.ZodBoolean;
26
36
  reasoning: z.ZodString;
27
- }, z.core.$strip>;
37
+ }, "strip", z.ZodTypeAny, {
38
+ capability: string;
39
+ achieved: boolean;
40
+ reasoning: string;
41
+ }, {
42
+ capability: string;
43
+ achieved: boolean;
44
+ reasoning: string;
45
+ }>;
28
46
  export declare const LLMJudgeResultSchema: z.ZodObject<{
29
47
  qualitative_summary: z.ZodString;
30
48
  capability_checklist: z.ZodArray<z.ZodObject<{
31
49
  capability: z.ZodString;
32
50
  achieved: z.ZodBoolean;
33
51
  reasoning: z.ZodString;
34
- }, z.core.$strip>>;
35
- }, z.core.$strip>;
36
- export declare const EvaluationSchema: z.ZodDiscriminatedUnion<[z.ZodObject<{
52
+ }, "strip", z.ZodTypeAny, {
53
+ capability: string;
54
+ achieved: boolean;
55
+ reasoning: string;
56
+ }, {
57
+ capability: string;
58
+ achieved: boolean;
59
+ reasoning: string;
60
+ }>, "many">;
61
+ }, "strip", z.ZodTypeAny, {
62
+ qualitative_summary: string;
63
+ capability_checklist: {
64
+ capability: string;
65
+ achieved: boolean;
66
+ reasoning: string;
67
+ }[];
68
+ }, {
69
+ qualitative_summary: string;
70
+ capability_checklist: {
71
+ capability: string;
72
+ achieved: boolean;
73
+ reasoning: string;
74
+ }[];
75
+ }>;
76
+ export declare const EvaluationSchema: z.ZodDiscriminatedUnion<"type", [z.ZodObject<z.objectUtil.extendShape<{
77
+ type: z.ZodString;
78
+ }, {
37
79
  type: z.ZodLiteral<"string_contains">;
38
80
  value: z.ZodString;
39
81
  case_sensitive: z.ZodOptional<z.ZodBoolean>;
40
- }, z.core.$strip>, z.ZodObject<{
82
+ }>, "strip", z.ZodTypeAny, {
83
+ value: string;
84
+ type: "string_contains";
85
+ case_sensitive?: boolean | undefined;
86
+ }, {
87
+ value: string;
88
+ type: "string_contains";
89
+ case_sensitive?: boolean | undefined;
90
+ }>, z.ZodObject<z.objectUtil.extendShape<{
91
+ type: z.ZodString;
92
+ }, {
41
93
  type: z.ZodLiteral<"regex_match">;
42
94
  pattern: z.ZodString;
43
- }, z.core.$strip>, z.ZodObject<{
95
+ }>, "strip", z.ZodTypeAny, {
96
+ type: "regex_match";
97
+ pattern: string;
98
+ }, {
99
+ type: "regex_match";
100
+ pattern: string;
101
+ }>, z.ZodObject<z.objectUtil.extendShape<{
102
+ type: z.ZodString;
103
+ }, {
44
104
  type: z.ZodLiteral<"file_exists">;
45
105
  path: z.ZodString;
46
- }, z.core.$strip>, z.ZodObject<{
106
+ }>, "strip", z.ZodTypeAny, {
107
+ path: string;
108
+ type: "file_exists";
109
+ }, {
110
+ path: string;
111
+ type: "file_exists";
112
+ }>, z.ZodObject<z.objectUtil.extendShape<{
113
+ type: z.ZodString;
114
+ }, {
47
115
  type: z.ZodLiteral<"trajectory_contains_action">;
48
116
  action: z.ZodString;
49
- }, z.core.$strip>, z.ZodObject<{
117
+ }>, "strip", z.ZodTypeAny, {
118
+ type: "trajectory_contains_action";
119
+ action: string;
120
+ }, {
121
+ type: "trajectory_contains_action";
122
+ action: string;
123
+ }>, z.ZodObject<z.objectUtil.extendShape<{
124
+ type: z.ZodString;
125
+ }, {
50
126
  type: z.ZodLiteral<"llm_judge">;
51
127
  prompt: z.ZodString;
52
128
  expected: z.ZodString;
53
129
  model_type: z.ZodOptional<z.ZodString>;
54
130
  temperature: z.ZodOptional<z.ZodNumber>;
55
131
  json_schema: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodAny>>;
56
- capabilities: z.ZodOptional<z.ZodArray<z.ZodString>>;
57
- }, z.core.$strip>, z.ZodObject<{
132
+ capabilities: z.ZodOptional<z.ZodArray<z.ZodString, "many">>;
133
+ }>, "strip", z.ZodTypeAny, {
134
+ type: "llm_judge";
135
+ expected: string;
136
+ prompt: string;
137
+ model_type?: string | undefined;
138
+ temperature?: number | undefined;
139
+ json_schema?: Record<string, any> | undefined;
140
+ capabilities?: string[] | undefined;
141
+ }, {
142
+ type: "llm_judge";
143
+ expected: string;
144
+ prompt: string;
145
+ model_type?: string | undefined;
146
+ temperature?: number | undefined;
147
+ json_schema?: Record<string, any> | undefined;
148
+ capabilities?: string[] | undefined;
149
+ }>, z.ZodObject<z.objectUtil.extendShape<{
150
+ type: z.ZodString;
151
+ }, {
58
152
  type: z.ZodLiteral<"execution_time">;
59
153
  max_duration_ms: z.ZodNumber;
60
154
  min_duration_ms: z.ZodOptional<z.ZodNumber>;
61
155
  target_duration_ms: z.ZodOptional<z.ZodNumber>;
62
- }, z.core.$strip>, z.ZodObject<{
156
+ }>, "strip", z.ZodTypeAny, {
157
+ type: "execution_time";
158
+ max_duration_ms: number;
159
+ min_duration_ms?: number | undefined;
160
+ target_duration_ms?: number | undefined;
161
+ }, {
162
+ type: "execution_time";
163
+ max_duration_ms: number;
164
+ min_duration_ms?: number | undefined;
165
+ target_duration_ms?: number | undefined;
166
+ }>, z.ZodObject<z.objectUtil.extendShape<{
167
+ type: z.ZodString;
168
+ }, {
63
169
  type: z.ZodLiteral<"conversation_length">;
64
170
  min_turns: z.ZodOptional<z.ZodNumber>;
65
171
  max_turns: z.ZodOptional<z.ZodNumber>;
66
172
  optimal_turns: z.ZodOptional<z.ZodNumber>;
67
- target_range: z.ZodOptional<z.ZodArray<z.ZodNumber>>;
68
- }, z.core.$strip>, z.ZodObject<{
173
+ target_range: z.ZodEffects<z.ZodOptional<z.ZodArray<z.ZodNumber, "many">>, number[] | undefined, number[] | undefined>;
174
+ }>, "strip", z.ZodTypeAny, {
175
+ type: "conversation_length";
176
+ min_turns?: number | undefined;
177
+ max_turns?: number | undefined;
178
+ optimal_turns?: number | undefined;
179
+ target_range?: number[] | undefined;
180
+ }, {
181
+ type: "conversation_length";
182
+ min_turns?: number | undefined;
183
+ max_turns?: number | undefined;
184
+ optimal_turns?: number | undefined;
185
+ target_range?: number[] | undefined;
186
+ }>, z.ZodObject<z.objectUtil.extendShape<{
187
+ type: z.ZodString;
188
+ }, {
69
189
  type: z.ZodLiteral<"conversation_flow">;
70
- required_patterns: z.ZodArray<z.ZodEnum<{
71
- question_then_answer: "question_then_answer";
72
- problem_then_solution: "problem_then_solution";
73
- clarification_cycle: "clarification_cycle";
74
- empathy_then_solution: "empathy_then_solution";
75
- escalation_pattern: "escalation_pattern";
76
- }>>;
190
+ required_patterns: z.ZodArray<z.ZodEnum<["question_then_answer", "problem_then_solution", "clarification_cycle", "empathy_then_solution", "escalation_pattern"]>, "many">;
77
191
  flow_quality_threshold: z.ZodDefault<z.ZodOptional<z.ZodNumber>>;
78
- }, z.core.$strip>, z.ZodObject<{
192
+ }>, "strip", z.ZodTypeAny, {
193
+ type: "conversation_flow";
194
+ required_patterns: ("question_then_answer" | "problem_then_solution" | "clarification_cycle" | "empathy_then_solution" | "escalation_pattern")[];
195
+ flow_quality_threshold: number;
196
+ }, {
197
+ type: "conversation_flow";
198
+ required_patterns: ("question_then_answer" | "problem_then_solution" | "clarification_cycle" | "empathy_then_solution" | "escalation_pattern")[];
199
+ flow_quality_threshold?: number | undefined;
200
+ }>, z.ZodObject<z.objectUtil.extendShape<{
201
+ type: z.ZodString;
202
+ }, {
79
203
  type: z.ZodLiteral<"user_satisfaction">;
80
204
  satisfaction_threshold: z.ZodDefault<z.ZodOptional<z.ZodNumber>>;
81
205
  indicators: z.ZodOptional<z.ZodObject<{
82
- positive: z.ZodOptional<z.ZodArray<z.ZodString>>;
83
- negative: z.ZodOptional<z.ZodArray<z.ZodString>>;
84
- }, z.core.$strip>>;
85
- measurement_method: z.ZodDefault<z.ZodOptional<z.ZodEnum<{
86
- llm_judge: "llm_judge";
87
- sentiment_analysis: "sentiment_analysis";
88
- keyword_analysis: "keyword_analysis";
89
- }>>>;
90
- }, z.core.$strip>, z.ZodObject<{
206
+ positive: z.ZodOptional<z.ZodArray<z.ZodString, "many">>;
207
+ negative: z.ZodOptional<z.ZodArray<z.ZodString, "many">>;
208
+ }, "strip", z.ZodTypeAny, {
209
+ positive?: string[] | undefined;
210
+ negative?: string[] | undefined;
211
+ }, {
212
+ positive?: string[] | undefined;
213
+ negative?: string[] | undefined;
214
+ }>>;
215
+ measurement_method: z.ZodDefault<z.ZodOptional<z.ZodEnum<["sentiment_analysis", "keyword_analysis", "llm_judge"]>>>;
216
+ }>, "strip", z.ZodTypeAny, {
217
+ type: "user_satisfaction";
218
+ satisfaction_threshold: number;
219
+ measurement_method: "llm_judge" | "sentiment_analysis" | "keyword_analysis";
220
+ indicators?: {
221
+ positive?: string[] | undefined;
222
+ negative?: string[] | undefined;
223
+ } | undefined;
224
+ }, {
225
+ type: "user_satisfaction";
226
+ satisfaction_threshold?: number | undefined;
227
+ indicators?: {
228
+ positive?: string[] | undefined;
229
+ negative?: string[] | undefined;
230
+ } | undefined;
231
+ measurement_method?: "llm_judge" | "sentiment_analysis" | "keyword_analysis" | undefined;
232
+ }>, z.ZodObject<z.objectUtil.extendShape<{
233
+ type: z.ZodString;
234
+ }, {
91
235
  type: z.ZodLiteral<"context_retention">;
92
- test_memory_of: z.ZodArray<z.ZodString>;
236
+ test_memory_of: z.ZodArray<z.ZodString, "many">;
93
237
  retention_turns: z.ZodDefault<z.ZodOptional<z.ZodNumber>>;
94
238
  memory_accuracy_threshold: z.ZodDefault<z.ZodOptional<z.ZodNumber>>;
95
- }, z.core.$strip>], "type">;
239
+ }>, "strip", z.ZodTypeAny, {
240
+ type: "context_retention";
241
+ test_memory_of: string[];
242
+ retention_turns: number;
243
+ memory_accuracy_threshold: number;
244
+ }, {
245
+ type: "context_retention";
246
+ test_memory_of: string[];
247
+ retention_turns?: number | undefined;
248
+ memory_accuracy_threshold?: number | undefined;
249
+ }>]>;
96
250
  declare const PluginConfigSchema: z.ZodObject<{
97
251
  name: z.ZodString;
98
252
  version: z.ZodOptional<z.ZodString>;
99
253
  config: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodAny>>;
100
254
  enabled: z.ZodDefault<z.ZodOptional<z.ZodBoolean>>;
101
- }, z.core.$strip>;
102
- declare const PluginReferenceSchema: z.ZodUnion<readonly [z.ZodString, z.ZodObject<{
255
+ }, "strip", z.ZodTypeAny, {
256
+ name: string;
257
+ enabled: boolean;
258
+ version?: string | undefined;
259
+ config?: Record<string, any> | undefined;
260
+ }, {
261
+ name: string;
262
+ enabled?: boolean | undefined;
263
+ version?: string | undefined;
264
+ config?: Record<string, any> | undefined;
265
+ }>;
266
+ declare const PluginReferenceSchema: z.ZodUnion<[z.ZodString, z.ZodObject<{
103
267
  name: z.ZodString;
104
268
  version: z.ZodOptional<z.ZodString>;
105
269
  config: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodAny>>;
106
270
  enabled: z.ZodDefault<z.ZodOptional<z.ZodBoolean>>;
107
- }, z.core.$strip>]>;
271
+ }, "strip", z.ZodTypeAny, {
272
+ name: string;
273
+ enabled: boolean;
274
+ version?: string | undefined;
275
+ config?: Record<string, any> | undefined;
276
+ }, {
277
+ name: string;
278
+ enabled?: boolean | undefined;
279
+ version?: string | undefined;
280
+ config?: Record<string, any> | undefined;
281
+ }>]>;
108
282
  export declare const ScenarioSchema: z.ZodObject<{
109
283
  name: z.ZodString;
110
284
  description: z.ZodString;
111
- plugins: z.ZodOptional<z.ZodArray<z.ZodUnion<readonly [z.ZodString, z.ZodObject<{
285
+ plugins: z.ZodOptional<z.ZodArray<z.ZodUnion<[z.ZodString, z.ZodObject<{
112
286
  name: z.ZodString;
113
287
  version: z.ZodOptional<z.ZodString>;
114
288
  config: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodAny>>;
115
289
  enabled: z.ZodDefault<z.ZodOptional<z.ZodBoolean>>;
116
- }, z.core.$strip>]>>>;
290
+ }, "strip", z.ZodTypeAny, {
291
+ name: string;
292
+ enabled: boolean;
293
+ version?: string | undefined;
294
+ config?: Record<string, any> | undefined;
295
+ }, {
296
+ name: string;
297
+ enabled?: boolean | undefined;
298
+ version?: string | undefined;
299
+ config?: Record<string, any> | undefined;
300
+ }>]>, "many">>;
117
301
  environment: z.ZodObject<{
118
- type: z.ZodEnum<{
119
- local: "local";
120
- e2b: "e2b";
121
- }>;
122
- }, z.core.$strip>;
302
+ type: z.ZodEnum<["e2b", "local"]>;
303
+ }, "strip", z.ZodTypeAny, {
304
+ type: "local" | "e2b";
305
+ }, {
306
+ type: "local" | "e2b";
307
+ }>;
123
308
  setup: z.ZodOptional<z.ZodObject<{
124
309
  mocks: z.ZodOptional<z.ZodArray<z.ZodObject<{
125
310
  service: z.ZodOptional<z.ZodString>;
126
311
  method: z.ZodString;
127
312
  when: z.ZodOptional<z.ZodObject<{
128
- args: z.ZodOptional<z.ZodArray<z.ZodAny>>;
313
+ args: z.ZodOptional<z.ZodArray<z.ZodAny, "many">>;
129
314
  input: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodAny>>;
130
315
  context: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodAny>>;
131
316
  matcher: z.ZodOptional<z.ZodString>;
132
- partialArgs: z.ZodOptional<z.ZodArray<z.ZodAny>>;
133
- }, z.core.$strip>>;
317
+ partialArgs: z.ZodOptional<z.ZodArray<z.ZodAny, "many">>;
318
+ }, "strip", z.ZodTypeAny, {
319
+ context?: Record<string, any> | undefined;
320
+ args?: any[] | undefined;
321
+ input?: Record<string, any> | undefined;
322
+ matcher?: string | undefined;
323
+ partialArgs?: any[] | undefined;
324
+ }, {
325
+ context?: Record<string, any> | undefined;
326
+ args?: any[] | undefined;
327
+ input?: Record<string, any> | undefined;
328
+ matcher?: string | undefined;
329
+ partialArgs?: any[] | undefined;
330
+ }>>;
134
331
  response: z.ZodAny;
135
332
  responseFn: z.ZodOptional<z.ZodString>;
136
333
  error: z.ZodOptional<z.ZodObject<{
137
334
  code: z.ZodString;
138
335
  message: z.ZodString;
139
336
  status: z.ZodOptional<z.ZodNumber>;
140
- }, z.core.$strip>>;
337
+ }, "strip", z.ZodTypeAny, {
338
+ message: string;
339
+ code: string;
340
+ status?: number | undefined;
341
+ }, {
342
+ message: string;
343
+ code: string;
344
+ status?: number | undefined;
345
+ }>>;
141
346
  metadata: z.ZodOptional<z.ZodObject<{
142
347
  delay: z.ZodOptional<z.ZodNumber>;
143
348
  probability: z.ZodOptional<z.ZodNumber>;
144
- }, z.core.$strip>>;
145
- }, z.core.$strip>>>;
349
+ }, "strip", z.ZodTypeAny, {
350
+ delay?: number | undefined;
351
+ probability?: number | undefined;
352
+ }, {
353
+ delay?: number | undefined;
354
+ probability?: number | undefined;
355
+ }>>;
356
+ }, "strip", z.ZodTypeAny, {
357
+ method: string;
358
+ error?: {
359
+ message: string;
360
+ code: string;
361
+ status?: number | undefined;
362
+ } | undefined;
363
+ metadata?: {
364
+ delay?: number | undefined;
365
+ probability?: number | undefined;
366
+ } | undefined;
367
+ service?: string | undefined;
368
+ when?: {
369
+ context?: Record<string, any> | undefined;
370
+ args?: any[] | undefined;
371
+ input?: Record<string, any> | undefined;
372
+ matcher?: string | undefined;
373
+ partialArgs?: any[] | undefined;
374
+ } | undefined;
375
+ response?: any;
376
+ responseFn?: string | undefined;
377
+ }, {
378
+ method: string;
379
+ error?: {
380
+ message: string;
381
+ code: string;
382
+ status?: number | undefined;
383
+ } | undefined;
384
+ metadata?: {
385
+ delay?: number | undefined;
386
+ probability?: number | undefined;
387
+ } | undefined;
388
+ service?: string | undefined;
389
+ when?: {
390
+ context?: Record<string, any> | undefined;
391
+ args?: any[] | undefined;
392
+ input?: Record<string, any> | undefined;
393
+ matcher?: string | undefined;
394
+ partialArgs?: any[] | undefined;
395
+ } | undefined;
396
+ response?: any;
397
+ responseFn?: string | undefined;
398
+ }>, "many">>;
146
399
  virtual_fs: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodString>>;
147
- }, z.core.$strip>>;
400
+ }, "strip", z.ZodTypeAny, {
401
+ mocks?: {
402
+ method: string;
403
+ error?: {
404
+ message: string;
405
+ code: string;
406
+ status?: number | undefined;
407
+ } | undefined;
408
+ metadata?: {
409
+ delay?: number | undefined;
410
+ probability?: number | undefined;
411
+ } | undefined;
412
+ service?: string | undefined;
413
+ when?: {
414
+ context?: Record<string, any> | undefined;
415
+ args?: any[] | undefined;
416
+ input?: Record<string, any> | undefined;
417
+ matcher?: string | undefined;
418
+ partialArgs?: any[] | undefined;
419
+ } | undefined;
420
+ response?: any;
421
+ responseFn?: string | undefined;
422
+ }[] | undefined;
423
+ virtual_fs?: Record<string, string> | undefined;
424
+ }, {
425
+ mocks?: {
426
+ method: string;
427
+ error?: {
428
+ message: string;
429
+ code: string;
430
+ status?: number | undefined;
431
+ } | undefined;
432
+ metadata?: {
433
+ delay?: number | undefined;
434
+ probability?: number | undefined;
435
+ } | undefined;
436
+ service?: string | undefined;
437
+ when?: {
438
+ context?: Record<string, any> | undefined;
439
+ args?: any[] | undefined;
440
+ input?: Record<string, any> | undefined;
441
+ matcher?: string | undefined;
442
+ partialArgs?: any[] | undefined;
443
+ } | undefined;
444
+ response?: any;
445
+ responseFn?: string | undefined;
446
+ }[] | undefined;
447
+ virtual_fs?: Record<string, string> | undefined;
448
+ }>>;
148
449
  run: z.ZodArray<z.ZodObject<{
149
450
  name: z.ZodOptional<z.ZodString>;
150
451
  lang: z.ZodOptional<z.ZodString>;
151
452
  code: z.ZodOptional<z.ZodString>;
152
453
  input: z.ZodOptional<z.ZodString>;
153
- evaluations: z.ZodArray<z.ZodDiscriminatedUnion<[z.ZodObject<{
454
+ evaluations: z.ZodArray<z.ZodDiscriminatedUnion<"type", [z.ZodObject<z.objectUtil.extendShape<{
455
+ type: z.ZodString;
456
+ }, {
154
457
  type: z.ZodLiteral<"string_contains">;
155
458
  value: z.ZodString;
156
459
  case_sensitive: z.ZodOptional<z.ZodBoolean>;
157
- }, z.core.$strip>, z.ZodObject<{
460
+ }>, "strip", z.ZodTypeAny, {
461
+ value: string;
462
+ type: "string_contains";
463
+ case_sensitive?: boolean | undefined;
464
+ }, {
465
+ value: string;
466
+ type: "string_contains";
467
+ case_sensitive?: boolean | undefined;
468
+ }>, z.ZodObject<z.objectUtil.extendShape<{
469
+ type: z.ZodString;
470
+ }, {
158
471
  type: z.ZodLiteral<"regex_match">;
159
472
  pattern: z.ZodString;
160
- }, z.core.$strip>, z.ZodObject<{
473
+ }>, "strip", z.ZodTypeAny, {
474
+ type: "regex_match";
475
+ pattern: string;
476
+ }, {
477
+ type: "regex_match";
478
+ pattern: string;
479
+ }>, z.ZodObject<z.objectUtil.extendShape<{
480
+ type: z.ZodString;
481
+ }, {
161
482
  type: z.ZodLiteral<"file_exists">;
162
483
  path: z.ZodString;
163
- }, z.core.$strip>, z.ZodObject<{
484
+ }>, "strip", z.ZodTypeAny, {
485
+ path: string;
486
+ type: "file_exists";
487
+ }, {
488
+ path: string;
489
+ type: "file_exists";
490
+ }>, z.ZodObject<z.objectUtil.extendShape<{
491
+ type: z.ZodString;
492
+ }, {
164
493
  type: z.ZodLiteral<"trajectory_contains_action">;
165
494
  action: z.ZodString;
166
- }, z.core.$strip>, z.ZodObject<{
495
+ }>, "strip", z.ZodTypeAny, {
496
+ type: "trajectory_contains_action";
497
+ action: string;
498
+ }, {
499
+ type: "trajectory_contains_action";
500
+ action: string;
501
+ }>, z.ZodObject<z.objectUtil.extendShape<{
502
+ type: z.ZodString;
503
+ }, {
167
504
  type: z.ZodLiteral<"llm_judge">;
168
505
  prompt: z.ZodString;
169
506
  expected: z.ZodString;
170
507
  model_type: z.ZodOptional<z.ZodString>;
171
508
  temperature: z.ZodOptional<z.ZodNumber>;
172
509
  json_schema: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodAny>>;
173
- capabilities: z.ZodOptional<z.ZodArray<z.ZodString>>;
174
- }, z.core.$strip>, z.ZodObject<{
510
+ capabilities: z.ZodOptional<z.ZodArray<z.ZodString, "many">>;
511
+ }>, "strip", z.ZodTypeAny, {
512
+ type: "llm_judge";
513
+ expected: string;
514
+ prompt: string;
515
+ model_type?: string | undefined;
516
+ temperature?: number | undefined;
517
+ json_schema?: Record<string, any> | undefined;
518
+ capabilities?: string[] | undefined;
519
+ }, {
520
+ type: "llm_judge";
521
+ expected: string;
522
+ prompt: string;
523
+ model_type?: string | undefined;
524
+ temperature?: number | undefined;
525
+ json_schema?: Record<string, any> | undefined;
526
+ capabilities?: string[] | undefined;
527
+ }>, z.ZodObject<z.objectUtil.extendShape<{
528
+ type: z.ZodString;
529
+ }, {
175
530
  type: z.ZodLiteral<"execution_time">;
176
531
  max_duration_ms: z.ZodNumber;
177
532
  min_duration_ms: z.ZodOptional<z.ZodNumber>;
178
533
  target_duration_ms: z.ZodOptional<z.ZodNumber>;
179
- }, z.core.$strip>, z.ZodObject<{
534
+ }>, "strip", z.ZodTypeAny, {
535
+ type: "execution_time";
536
+ max_duration_ms: number;
537
+ min_duration_ms?: number | undefined;
538
+ target_duration_ms?: number | undefined;
539
+ }, {
540
+ type: "execution_time";
541
+ max_duration_ms: number;
542
+ min_duration_ms?: number | undefined;
543
+ target_duration_ms?: number | undefined;
544
+ }>, z.ZodObject<z.objectUtil.extendShape<{
545
+ type: z.ZodString;
546
+ }, {
180
547
  type: z.ZodLiteral<"conversation_length">;
181
548
  min_turns: z.ZodOptional<z.ZodNumber>;
182
549
  max_turns: z.ZodOptional<z.ZodNumber>;
183
550
  optimal_turns: z.ZodOptional<z.ZodNumber>;
184
- target_range: z.ZodOptional<z.ZodArray<z.ZodNumber>>;
185
- }, z.core.$strip>, z.ZodObject<{
551
+ target_range: z.ZodEffects<z.ZodOptional<z.ZodArray<z.ZodNumber, "many">>, number[] | undefined, number[] | undefined>;
552
+ }>, "strip", z.ZodTypeAny, {
553
+ type: "conversation_length";
554
+ min_turns?: number | undefined;
555
+ max_turns?: number | undefined;
556
+ optimal_turns?: number | undefined;
557
+ target_range?: number[] | undefined;
558
+ }, {
559
+ type: "conversation_length";
560
+ min_turns?: number | undefined;
561
+ max_turns?: number | undefined;
562
+ optimal_turns?: number | undefined;
563
+ target_range?: number[] | undefined;
564
+ }>, z.ZodObject<z.objectUtil.extendShape<{
565
+ type: z.ZodString;
566
+ }, {
186
567
  type: z.ZodLiteral<"conversation_flow">;
187
- required_patterns: z.ZodArray<z.ZodEnum<{
188
- question_then_answer: "question_then_answer";
189
- problem_then_solution: "problem_then_solution";
190
- clarification_cycle: "clarification_cycle";
191
- empathy_then_solution: "empathy_then_solution";
192
- escalation_pattern: "escalation_pattern";
193
- }>>;
568
+ required_patterns: z.ZodArray<z.ZodEnum<["question_then_answer", "problem_then_solution", "clarification_cycle", "empathy_then_solution", "escalation_pattern"]>, "many">;
194
569
  flow_quality_threshold: z.ZodDefault<z.ZodOptional<z.ZodNumber>>;
195
- }, z.core.$strip>, z.ZodObject<{
570
+ }>, "strip", z.ZodTypeAny, {
571
+ type: "conversation_flow";
572
+ required_patterns: ("question_then_answer" | "problem_then_solution" | "clarification_cycle" | "empathy_then_solution" | "escalation_pattern")[];
573
+ flow_quality_threshold: number;
574
+ }, {
575
+ type: "conversation_flow";
576
+ required_patterns: ("question_then_answer" | "problem_then_solution" | "clarification_cycle" | "empathy_then_solution" | "escalation_pattern")[];
577
+ flow_quality_threshold?: number | undefined;
578
+ }>, z.ZodObject<z.objectUtil.extendShape<{
579
+ type: z.ZodString;
580
+ }, {
196
581
  type: z.ZodLiteral<"user_satisfaction">;
197
582
  satisfaction_threshold: z.ZodDefault<z.ZodOptional<z.ZodNumber>>;
198
583
  indicators: z.ZodOptional<z.ZodObject<{
199
- positive: z.ZodOptional<z.ZodArray<z.ZodString>>;
200
- negative: z.ZodOptional<z.ZodArray<z.ZodString>>;
201
- }, z.core.$strip>>;
202
- measurement_method: z.ZodDefault<z.ZodOptional<z.ZodEnum<{
203
- llm_judge: "llm_judge";
204
- sentiment_analysis: "sentiment_analysis";
205
- keyword_analysis: "keyword_analysis";
206
- }>>>;
207
- }, z.core.$strip>, z.ZodObject<{
584
+ positive: z.ZodOptional<z.ZodArray<z.ZodString, "many">>;
585
+ negative: z.ZodOptional<z.ZodArray<z.ZodString, "many">>;
586
+ }, "strip", z.ZodTypeAny, {
587
+ positive?: string[] | undefined;
588
+ negative?: string[] | undefined;
589
+ }, {
590
+ positive?: string[] | undefined;
591
+ negative?: string[] | undefined;
592
+ }>>;
593
+ measurement_method: z.ZodDefault<z.ZodOptional<z.ZodEnum<["sentiment_analysis", "keyword_analysis", "llm_judge"]>>>;
594
+ }>, "strip", z.ZodTypeAny, {
595
+ type: "user_satisfaction";
596
+ satisfaction_threshold: number;
597
+ measurement_method: "llm_judge" | "sentiment_analysis" | "keyword_analysis";
598
+ indicators?: {
599
+ positive?: string[] | undefined;
600
+ negative?: string[] | undefined;
601
+ } | undefined;
602
+ }, {
603
+ type: "user_satisfaction";
604
+ satisfaction_threshold?: number | undefined;
605
+ indicators?: {
606
+ positive?: string[] | undefined;
607
+ negative?: string[] | undefined;
608
+ } | undefined;
609
+ measurement_method?: "llm_judge" | "sentiment_analysis" | "keyword_analysis" | undefined;
610
+ }>, z.ZodObject<z.objectUtil.extendShape<{
611
+ type: z.ZodString;
612
+ }, {
208
613
  type: z.ZodLiteral<"context_retention">;
209
- test_memory_of: z.ZodArray<z.ZodString>;
614
+ test_memory_of: z.ZodArray<z.ZodString, "many">;
210
615
  retention_turns: z.ZodDefault<z.ZodOptional<z.ZodNumber>>;
211
616
  memory_accuracy_threshold: z.ZodDefault<z.ZodOptional<z.ZodNumber>>;
212
- }, z.core.$strip>], "type">>;
617
+ }>, "strip", z.ZodTypeAny, {
618
+ type: "context_retention";
619
+ test_memory_of: string[];
620
+ retention_turns: number;
621
+ memory_accuracy_threshold: number;
622
+ }, {
623
+ type: "context_retention";
624
+ test_memory_of: string[];
625
+ retention_turns?: number | undefined;
626
+ memory_accuracy_threshold?: number | undefined;
627
+ }>]>, "many">;
213
628
  conversation: z.ZodOptional<z.ZodObject<{
214
629
  max_turns: z.ZodNumber;
215
630
  timeout_per_turn_ms: z.ZodDefault<z.ZodOptional<z.ZodNumber>>;
@@ -221,165 +636,1512 @@ export declare const ScenarioSchema: z.ZodObject<{
221
636
  persona: z.ZodString;
222
637
  objective: z.ZodString;
223
638
  style: z.ZodOptional<z.ZodString>;
224
- constraints: z.ZodDefault<z.ZodOptional<z.ZodArray<z.ZodString>>>;
639
+ constraints: z.ZodDefault<z.ZodOptional<z.ZodArray<z.ZodString, "many">>>;
225
640
  emotional_state: z.ZodOptional<z.ZodString>;
226
- knowledge_level: z.ZodDefault<z.ZodOptional<z.ZodEnum<{
227
- beginner: "beginner";
228
- intermediate: "intermediate";
229
- expert: "expert";
230
- }>>>;
231
- }, z.core.$strip>;
641
+ knowledge_level: z.ZodDefault<z.ZodOptional<z.ZodEnum<["beginner", "intermediate", "expert"]>>>;
642
+ }, "strip", z.ZodTypeAny, {
643
+ model_type: string;
644
+ temperature: number;
645
+ max_tokens: number;
646
+ persona: string;
647
+ objective: string;
648
+ constraints: string[];
649
+ knowledge_level: "beginner" | "intermediate" | "expert";
650
+ style?: string | undefined;
651
+ emotional_state?: string | undefined;
652
+ }, {
653
+ persona: string;
654
+ objective: string;
655
+ style?: string | undefined;
656
+ model_type?: string | undefined;
657
+ temperature?: number | undefined;
658
+ max_tokens?: number | undefined;
659
+ constraints?: string[] | undefined;
660
+ emotional_state?: string | undefined;
661
+ knowledge_level?: "beginner" | "intermediate" | "expert" | undefined;
662
+ }>;
232
663
  termination_conditions: z.ZodDefault<z.ZodOptional<z.ZodArray<z.ZodObject<{
233
- type: z.ZodEnum<{
234
- max_turns_reached: "max_turns_reached";
235
- user_expresses_satisfaction: "user_expresses_satisfaction";
236
- agent_provides_solution: "agent_provides_solution";
237
- conversation_stuck: "conversation_stuck";
238
- escalation_needed: "escalation_needed";
239
- goal_achieved: "goal_achieved";
240
- custom_condition: "custom_condition";
241
- }>;
664
+ type: z.ZodEnum<["max_turns_reached", "user_expresses_satisfaction", "agent_provides_solution", "conversation_stuck", "escalation_needed", "goal_achieved", "custom_condition"]>;
242
665
  description: z.ZodOptional<z.ZodString>;
243
- keywords: z.ZodOptional<z.ZodArray<z.ZodString>>;
666
+ keywords: z.ZodOptional<z.ZodArray<z.ZodString, "many">>;
244
667
  llm_judge: z.ZodOptional<z.ZodObject<{
245
668
  prompt: z.ZodString;
246
669
  threshold: z.ZodDefault<z.ZodOptional<z.ZodNumber>>;
247
- }, z.core.$strip>>;
248
- }, z.core.$strip>>>>;
249
- turn_evaluations: z.ZodDefault<z.ZodOptional<z.ZodArray<z.ZodDiscriminatedUnion<[z.ZodObject<{
670
+ }, "strip", z.ZodTypeAny, {
671
+ prompt: string;
672
+ threshold: number;
673
+ }, {
674
+ prompt: string;
675
+ threshold?: number | undefined;
676
+ }>>;
677
+ }, "strip", z.ZodTypeAny, {
678
+ type: "max_turns_reached" | "user_expresses_satisfaction" | "agent_provides_solution" | "conversation_stuck" | "escalation_needed" | "goal_achieved" | "custom_condition";
679
+ description?: string | undefined;
680
+ llm_judge?: {
681
+ prompt: string;
682
+ threshold: number;
683
+ } | undefined;
684
+ keywords?: string[] | undefined;
685
+ }, {
686
+ type: "max_turns_reached" | "user_expresses_satisfaction" | "agent_provides_solution" | "conversation_stuck" | "escalation_needed" | "goal_achieved" | "custom_condition";
687
+ description?: string | undefined;
688
+ llm_judge?: {
689
+ prompt: string;
690
+ threshold?: number | undefined;
691
+ } | undefined;
692
+ keywords?: string[] | undefined;
693
+ }>, "many">>>;
694
+ turn_evaluations: z.ZodDefault<z.ZodOptional<z.ZodArray<z.ZodDiscriminatedUnion<"type", [z.ZodObject<z.objectUtil.extendShape<{
695
+ type: z.ZodString;
696
+ }, {
250
697
  type: z.ZodLiteral<"string_contains">;
251
698
  value: z.ZodString;
252
699
  case_sensitive: z.ZodOptional<z.ZodBoolean>;
253
- }, z.core.$strip>, z.ZodObject<{
700
+ }>, "strip", z.ZodTypeAny, {
701
+ value: string;
702
+ type: "string_contains";
703
+ case_sensitive?: boolean | undefined;
704
+ }, {
705
+ value: string;
706
+ type: "string_contains";
707
+ case_sensitive?: boolean | undefined;
708
+ }>, z.ZodObject<z.objectUtil.extendShape<{
709
+ type: z.ZodString;
710
+ }, {
254
711
  type: z.ZodLiteral<"regex_match">;
255
712
  pattern: z.ZodString;
256
- }, z.core.$strip>, z.ZodObject<{
713
+ }>, "strip", z.ZodTypeAny, {
714
+ type: "regex_match";
715
+ pattern: string;
716
+ }, {
717
+ type: "regex_match";
718
+ pattern: string;
719
+ }>, z.ZodObject<z.objectUtil.extendShape<{
720
+ type: z.ZodString;
721
+ }, {
257
722
  type: z.ZodLiteral<"file_exists">;
258
723
  path: z.ZodString;
259
- }, z.core.$strip>, z.ZodObject<{
724
+ }>, "strip", z.ZodTypeAny, {
725
+ path: string;
726
+ type: "file_exists";
727
+ }, {
728
+ path: string;
729
+ type: "file_exists";
730
+ }>, z.ZodObject<z.objectUtil.extendShape<{
731
+ type: z.ZodString;
732
+ }, {
260
733
  type: z.ZodLiteral<"trajectory_contains_action">;
261
734
  action: z.ZodString;
262
- }, z.core.$strip>, z.ZodObject<{
735
+ }>, "strip", z.ZodTypeAny, {
736
+ type: "trajectory_contains_action";
737
+ action: string;
738
+ }, {
739
+ type: "trajectory_contains_action";
740
+ action: string;
741
+ }>, z.ZodObject<z.objectUtil.extendShape<{
742
+ type: z.ZodString;
743
+ }, {
263
744
  type: z.ZodLiteral<"llm_judge">;
264
745
  prompt: z.ZodString;
265
746
  expected: z.ZodString;
266
747
  model_type: z.ZodOptional<z.ZodString>;
267
748
  temperature: z.ZodOptional<z.ZodNumber>;
268
749
  json_schema: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodAny>>;
269
- capabilities: z.ZodOptional<z.ZodArray<z.ZodString>>;
270
- }, z.core.$strip>, z.ZodObject<{
750
+ capabilities: z.ZodOptional<z.ZodArray<z.ZodString, "many">>;
751
+ }>, "strip", z.ZodTypeAny, {
752
+ type: "llm_judge";
753
+ expected: string;
754
+ prompt: string;
755
+ model_type?: string | undefined;
756
+ temperature?: number | undefined;
757
+ json_schema?: Record<string, any> | undefined;
758
+ capabilities?: string[] | undefined;
759
+ }, {
760
+ type: "llm_judge";
761
+ expected: string;
762
+ prompt: string;
763
+ model_type?: string | undefined;
764
+ temperature?: number | undefined;
765
+ json_schema?: Record<string, any> | undefined;
766
+ capabilities?: string[] | undefined;
767
+ }>, z.ZodObject<z.objectUtil.extendShape<{
768
+ type: z.ZodString;
769
+ }, {
271
770
  type: z.ZodLiteral<"execution_time">;
272
771
  max_duration_ms: z.ZodNumber;
273
772
  min_duration_ms: z.ZodOptional<z.ZodNumber>;
274
773
  target_duration_ms: z.ZodOptional<z.ZodNumber>;
275
- }, z.core.$strip>, z.ZodObject<{
774
+ }>, "strip", z.ZodTypeAny, {
775
+ type: "execution_time";
776
+ max_duration_ms: number;
777
+ min_duration_ms?: number | undefined;
778
+ target_duration_ms?: number | undefined;
779
+ }, {
780
+ type: "execution_time";
781
+ max_duration_ms: number;
782
+ min_duration_ms?: number | undefined;
783
+ target_duration_ms?: number | undefined;
784
+ }>, z.ZodObject<z.objectUtil.extendShape<{
785
+ type: z.ZodString;
786
+ }, {
276
787
  type: z.ZodLiteral<"conversation_length">;
277
788
  min_turns: z.ZodOptional<z.ZodNumber>;
278
789
  max_turns: z.ZodOptional<z.ZodNumber>;
279
790
  optimal_turns: z.ZodOptional<z.ZodNumber>;
280
- target_range: z.ZodOptional<z.ZodArray<z.ZodNumber>>;
281
- }, z.core.$strip>, z.ZodObject<{
791
+ target_range: z.ZodEffects<z.ZodOptional<z.ZodArray<z.ZodNumber, "many">>, number[] | undefined, number[] | undefined>;
792
+ }>, "strip", z.ZodTypeAny, {
793
+ type: "conversation_length";
794
+ min_turns?: number | undefined;
795
+ max_turns?: number | undefined;
796
+ optimal_turns?: number | undefined;
797
+ target_range?: number[] | undefined;
798
+ }, {
799
+ type: "conversation_length";
800
+ min_turns?: number | undefined;
801
+ max_turns?: number | undefined;
802
+ optimal_turns?: number | undefined;
803
+ target_range?: number[] | undefined;
804
+ }>, z.ZodObject<z.objectUtil.extendShape<{
805
+ type: z.ZodString;
806
+ }, {
282
807
  type: z.ZodLiteral<"conversation_flow">;
283
- required_patterns: z.ZodArray<z.ZodEnum<{
284
- question_then_answer: "question_then_answer";
285
- problem_then_solution: "problem_then_solution";
286
- clarification_cycle: "clarification_cycle";
287
- empathy_then_solution: "empathy_then_solution";
288
- escalation_pattern: "escalation_pattern";
289
- }>>;
808
+ required_patterns: z.ZodArray<z.ZodEnum<["question_then_answer", "problem_then_solution", "clarification_cycle", "empathy_then_solution", "escalation_pattern"]>, "many">;
290
809
  flow_quality_threshold: z.ZodDefault<z.ZodOptional<z.ZodNumber>>;
291
- }, z.core.$strip>, z.ZodObject<{
810
+ }>, "strip", z.ZodTypeAny, {
811
+ type: "conversation_flow";
812
+ required_patterns: ("question_then_answer" | "problem_then_solution" | "clarification_cycle" | "empathy_then_solution" | "escalation_pattern")[];
813
+ flow_quality_threshold: number;
814
+ }, {
815
+ type: "conversation_flow";
816
+ required_patterns: ("question_then_answer" | "problem_then_solution" | "clarification_cycle" | "empathy_then_solution" | "escalation_pattern")[];
817
+ flow_quality_threshold?: number | undefined;
818
+ }>, z.ZodObject<z.objectUtil.extendShape<{
819
+ type: z.ZodString;
820
+ }, {
292
821
  type: z.ZodLiteral<"user_satisfaction">;
293
822
  satisfaction_threshold: z.ZodDefault<z.ZodOptional<z.ZodNumber>>;
294
823
  indicators: z.ZodOptional<z.ZodObject<{
295
- positive: z.ZodOptional<z.ZodArray<z.ZodString>>;
296
- negative: z.ZodOptional<z.ZodArray<z.ZodString>>;
297
- }, z.core.$strip>>;
298
- measurement_method: z.ZodDefault<z.ZodOptional<z.ZodEnum<{
299
- llm_judge: "llm_judge";
300
- sentiment_analysis: "sentiment_analysis";
301
- keyword_analysis: "keyword_analysis";
302
- }>>>;
303
- }, z.core.$strip>, z.ZodObject<{
824
+ positive: z.ZodOptional<z.ZodArray<z.ZodString, "many">>;
825
+ negative: z.ZodOptional<z.ZodArray<z.ZodString, "many">>;
826
+ }, "strip", z.ZodTypeAny, {
827
+ positive?: string[] | undefined;
828
+ negative?: string[] | undefined;
829
+ }, {
830
+ positive?: string[] | undefined;
831
+ negative?: string[] | undefined;
832
+ }>>;
833
+ measurement_method: z.ZodDefault<z.ZodOptional<z.ZodEnum<["sentiment_analysis", "keyword_analysis", "llm_judge"]>>>;
834
+ }>, "strip", z.ZodTypeAny, {
835
+ type: "user_satisfaction";
836
+ satisfaction_threshold: number;
837
+ measurement_method: "llm_judge" | "sentiment_analysis" | "keyword_analysis";
838
+ indicators?: {
839
+ positive?: string[] | undefined;
840
+ negative?: string[] | undefined;
841
+ } | undefined;
842
+ }, {
843
+ type: "user_satisfaction";
844
+ satisfaction_threshold?: number | undefined;
845
+ indicators?: {
846
+ positive?: string[] | undefined;
847
+ negative?: string[] | undefined;
848
+ } | undefined;
849
+ measurement_method?: "llm_judge" | "sentiment_analysis" | "keyword_analysis" | undefined;
850
+ }>, z.ZodObject<z.objectUtil.extendShape<{
851
+ type: z.ZodString;
852
+ }, {
304
853
  type: z.ZodLiteral<"context_retention">;
305
- test_memory_of: z.ZodArray<z.ZodString>;
854
+ test_memory_of: z.ZodArray<z.ZodString, "many">;
306
855
  retention_turns: z.ZodDefault<z.ZodOptional<z.ZodNumber>>;
307
856
  memory_accuracy_threshold: z.ZodDefault<z.ZodOptional<z.ZodNumber>>;
308
- }, z.core.$strip>], "type">>>>;
309
- final_evaluations: z.ZodDefault<z.ZodOptional<z.ZodArray<z.ZodDiscriminatedUnion<[z.ZodObject<{
857
+ }>, "strip", z.ZodTypeAny, {
858
+ type: "context_retention";
859
+ test_memory_of: string[];
860
+ retention_turns: number;
861
+ memory_accuracy_threshold: number;
862
+ }, {
863
+ type: "context_retention";
864
+ test_memory_of: string[];
865
+ retention_turns?: number | undefined;
866
+ memory_accuracy_threshold?: number | undefined;
867
+ }>]>, "many">>>;
868
+ final_evaluations: z.ZodDefault<z.ZodOptional<z.ZodArray<z.ZodDiscriminatedUnion<"type", [z.ZodObject<z.objectUtil.extendShape<{
869
+ type: z.ZodString;
870
+ }, {
310
871
  type: z.ZodLiteral<"string_contains">;
311
872
  value: z.ZodString;
312
873
  case_sensitive: z.ZodOptional<z.ZodBoolean>;
313
- }, z.core.$strip>, z.ZodObject<{
874
+ }>, "strip", z.ZodTypeAny, {
875
+ value: string;
876
+ type: "string_contains";
877
+ case_sensitive?: boolean | undefined;
878
+ }, {
879
+ value: string;
880
+ type: "string_contains";
881
+ case_sensitive?: boolean | undefined;
882
+ }>, z.ZodObject<z.objectUtil.extendShape<{
883
+ type: z.ZodString;
884
+ }, {
314
885
  type: z.ZodLiteral<"regex_match">;
315
886
  pattern: z.ZodString;
316
- }, z.core.$strip>, z.ZodObject<{
887
+ }>, "strip", z.ZodTypeAny, {
888
+ type: "regex_match";
889
+ pattern: string;
890
+ }, {
891
+ type: "regex_match";
892
+ pattern: string;
893
+ }>, z.ZodObject<z.objectUtil.extendShape<{
894
+ type: z.ZodString;
895
+ }, {
317
896
  type: z.ZodLiteral<"file_exists">;
318
897
  path: z.ZodString;
319
- }, z.core.$strip>, z.ZodObject<{
898
+ }>, "strip", z.ZodTypeAny, {
899
+ path: string;
900
+ type: "file_exists";
901
+ }, {
902
+ path: string;
903
+ type: "file_exists";
904
+ }>, z.ZodObject<z.objectUtil.extendShape<{
905
+ type: z.ZodString;
906
+ }, {
320
907
  type: z.ZodLiteral<"trajectory_contains_action">;
321
908
  action: z.ZodString;
322
- }, z.core.$strip>, z.ZodObject<{
909
+ }>, "strip", z.ZodTypeAny, {
910
+ type: "trajectory_contains_action";
911
+ action: string;
912
+ }, {
913
+ type: "trajectory_contains_action";
914
+ action: string;
915
+ }>, z.ZodObject<z.objectUtil.extendShape<{
916
+ type: z.ZodString;
917
+ }, {
323
918
  type: z.ZodLiteral<"llm_judge">;
324
919
  prompt: z.ZodString;
325
920
  expected: z.ZodString;
326
921
  model_type: z.ZodOptional<z.ZodString>;
327
922
  temperature: z.ZodOptional<z.ZodNumber>;
328
923
  json_schema: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodAny>>;
329
- capabilities: z.ZodOptional<z.ZodArray<z.ZodString>>;
330
- }, z.core.$strip>, z.ZodObject<{
924
+ capabilities: z.ZodOptional<z.ZodArray<z.ZodString, "many">>;
925
+ }>, "strip", z.ZodTypeAny, {
926
+ type: "llm_judge";
927
+ expected: string;
928
+ prompt: string;
929
+ model_type?: string | undefined;
930
+ temperature?: number | undefined;
931
+ json_schema?: Record<string, any> | undefined;
932
+ capabilities?: string[] | undefined;
933
+ }, {
934
+ type: "llm_judge";
935
+ expected: string;
936
+ prompt: string;
937
+ model_type?: string | undefined;
938
+ temperature?: number | undefined;
939
+ json_schema?: Record<string, any> | undefined;
940
+ capabilities?: string[] | undefined;
941
+ }>, z.ZodObject<z.objectUtil.extendShape<{
942
+ type: z.ZodString;
943
+ }, {
331
944
  type: z.ZodLiteral<"execution_time">;
332
945
  max_duration_ms: z.ZodNumber;
333
946
  min_duration_ms: z.ZodOptional<z.ZodNumber>;
334
947
  target_duration_ms: z.ZodOptional<z.ZodNumber>;
335
- }, z.core.$strip>, z.ZodObject<{
948
+ }>, "strip", z.ZodTypeAny, {
949
+ type: "execution_time";
950
+ max_duration_ms: number;
951
+ min_duration_ms?: number | undefined;
952
+ target_duration_ms?: number | undefined;
953
+ }, {
954
+ type: "execution_time";
955
+ max_duration_ms: number;
956
+ min_duration_ms?: number | undefined;
957
+ target_duration_ms?: number | undefined;
958
+ }>, z.ZodObject<z.objectUtil.extendShape<{
959
+ type: z.ZodString;
960
+ }, {
336
961
  type: z.ZodLiteral<"conversation_length">;
337
962
  min_turns: z.ZodOptional<z.ZodNumber>;
338
963
  max_turns: z.ZodOptional<z.ZodNumber>;
339
964
  optimal_turns: z.ZodOptional<z.ZodNumber>;
340
- target_range: z.ZodOptional<z.ZodArray<z.ZodNumber>>;
341
- }, z.core.$strip>, z.ZodObject<{
965
+ target_range: z.ZodEffects<z.ZodOptional<z.ZodArray<z.ZodNumber, "many">>, number[] | undefined, number[] | undefined>;
966
+ }>, "strip", z.ZodTypeAny, {
967
+ type: "conversation_length";
968
+ min_turns?: number | undefined;
969
+ max_turns?: number | undefined;
970
+ optimal_turns?: number | undefined;
971
+ target_range?: number[] | undefined;
972
+ }, {
973
+ type: "conversation_length";
974
+ min_turns?: number | undefined;
975
+ max_turns?: number | undefined;
976
+ optimal_turns?: number | undefined;
977
+ target_range?: number[] | undefined;
978
+ }>, z.ZodObject<z.objectUtil.extendShape<{
979
+ type: z.ZodString;
980
+ }, {
342
981
  type: z.ZodLiteral<"conversation_flow">;
343
- required_patterns: z.ZodArray<z.ZodEnum<{
344
- question_then_answer: "question_then_answer";
345
- problem_then_solution: "problem_then_solution";
346
- clarification_cycle: "clarification_cycle";
347
- empathy_then_solution: "empathy_then_solution";
348
- escalation_pattern: "escalation_pattern";
349
- }>>;
982
+ required_patterns: z.ZodArray<z.ZodEnum<["question_then_answer", "problem_then_solution", "clarification_cycle", "empathy_then_solution", "escalation_pattern"]>, "many">;
350
983
  flow_quality_threshold: z.ZodDefault<z.ZodOptional<z.ZodNumber>>;
351
- }, z.core.$strip>, z.ZodObject<{
984
+ }>, "strip", z.ZodTypeAny, {
985
+ type: "conversation_flow";
986
+ required_patterns: ("question_then_answer" | "problem_then_solution" | "clarification_cycle" | "empathy_then_solution" | "escalation_pattern")[];
987
+ flow_quality_threshold: number;
988
+ }, {
989
+ type: "conversation_flow";
990
+ required_patterns: ("question_then_answer" | "problem_then_solution" | "clarification_cycle" | "empathy_then_solution" | "escalation_pattern")[];
991
+ flow_quality_threshold?: number | undefined;
992
+ }>, z.ZodObject<z.objectUtil.extendShape<{
993
+ type: z.ZodString;
994
+ }, {
352
995
  type: z.ZodLiteral<"user_satisfaction">;
353
996
  satisfaction_threshold: z.ZodDefault<z.ZodOptional<z.ZodNumber>>;
354
997
  indicators: z.ZodOptional<z.ZodObject<{
355
- positive: z.ZodOptional<z.ZodArray<z.ZodString>>;
356
- negative: z.ZodOptional<z.ZodArray<z.ZodString>>;
357
- }, z.core.$strip>>;
358
- measurement_method: z.ZodDefault<z.ZodOptional<z.ZodEnum<{
359
- llm_judge: "llm_judge";
360
- sentiment_analysis: "sentiment_analysis";
361
- keyword_analysis: "keyword_analysis";
362
- }>>>;
363
- }, z.core.$strip>, z.ZodObject<{
998
+ positive: z.ZodOptional<z.ZodArray<z.ZodString, "many">>;
999
+ negative: z.ZodOptional<z.ZodArray<z.ZodString, "many">>;
1000
+ }, "strip", z.ZodTypeAny, {
1001
+ positive?: string[] | undefined;
1002
+ negative?: string[] | undefined;
1003
+ }, {
1004
+ positive?: string[] | undefined;
1005
+ negative?: string[] | undefined;
1006
+ }>>;
1007
+ measurement_method: z.ZodDefault<z.ZodOptional<z.ZodEnum<["sentiment_analysis", "keyword_analysis", "llm_judge"]>>>;
1008
+ }>, "strip", z.ZodTypeAny, {
1009
+ type: "user_satisfaction";
1010
+ satisfaction_threshold: number;
1011
+ measurement_method: "llm_judge" | "sentiment_analysis" | "keyword_analysis";
1012
+ indicators?: {
1013
+ positive?: string[] | undefined;
1014
+ negative?: string[] | undefined;
1015
+ } | undefined;
1016
+ }, {
1017
+ type: "user_satisfaction";
1018
+ satisfaction_threshold?: number | undefined;
1019
+ indicators?: {
1020
+ positive?: string[] | undefined;
1021
+ negative?: string[] | undefined;
1022
+ } | undefined;
1023
+ measurement_method?: "llm_judge" | "sentiment_analysis" | "keyword_analysis" | undefined;
1024
+ }>, z.ZodObject<z.objectUtil.extendShape<{
1025
+ type: z.ZodString;
1026
+ }, {
364
1027
  type: z.ZodLiteral<"context_retention">;
365
- test_memory_of: z.ZodArray<z.ZodString>;
1028
+ test_memory_of: z.ZodArray<z.ZodString, "many">;
366
1029
  retention_turns: z.ZodDefault<z.ZodOptional<z.ZodNumber>>;
367
1030
  memory_accuracy_threshold: z.ZodDefault<z.ZodOptional<z.ZodNumber>>;
368
- }, z.core.$strip>], "type">>>>;
1031
+ }>, "strip", z.ZodTypeAny, {
1032
+ type: "context_retention";
1033
+ test_memory_of: string[];
1034
+ retention_turns: number;
1035
+ memory_accuracy_threshold: number;
1036
+ }, {
1037
+ type: "context_retention";
1038
+ test_memory_of: string[];
1039
+ retention_turns?: number | undefined;
1040
+ memory_accuracy_threshold?: number | undefined;
1041
+ }>]>, "many">>>;
369
1042
  debug_options: z.ZodDefault<z.ZodOptional<z.ZodObject<{
370
1043
  log_user_simulation: z.ZodDefault<z.ZodOptional<z.ZodBoolean>>;
371
1044
  log_turn_decisions: z.ZodDefault<z.ZodOptional<z.ZodBoolean>>;
372
1045
  export_full_transcript: z.ZodDefault<z.ZodOptional<z.ZodBoolean>>;
373
- }, z.core.$strip>>>;
374
- }, z.core.$strip>>;
375
- }, z.core.$strip>>;
1046
+ }, "strip", z.ZodTypeAny, {
1047
+ log_user_simulation: boolean;
1048
+ log_turn_decisions: boolean;
1049
+ export_full_transcript: boolean;
1050
+ }, {
1051
+ log_user_simulation?: boolean | undefined;
1052
+ log_turn_decisions?: boolean | undefined;
1053
+ export_full_transcript?: boolean | undefined;
1054
+ }>>>;
1055
+ }, "strip", z.ZodTypeAny, {
1056
+ max_turns: number;
1057
+ timeout_per_turn_ms: number;
1058
+ total_timeout_ms: number;
1059
+ user_simulator: {
1060
+ model_type: string;
1061
+ temperature: number;
1062
+ max_tokens: number;
1063
+ persona: string;
1064
+ objective: string;
1065
+ constraints: string[];
1066
+ knowledge_level: "beginner" | "intermediate" | "expert";
1067
+ style?: string | undefined;
1068
+ emotional_state?: string | undefined;
1069
+ };
1070
+ termination_conditions: {
1071
+ type: "max_turns_reached" | "user_expresses_satisfaction" | "agent_provides_solution" | "conversation_stuck" | "escalation_needed" | "goal_achieved" | "custom_condition";
1072
+ description?: string | undefined;
1073
+ llm_judge?: {
1074
+ prompt: string;
1075
+ threshold: number;
1076
+ } | undefined;
1077
+ keywords?: string[] | undefined;
1078
+ }[];
1079
+ turn_evaluations: ({
1080
+ value: string;
1081
+ type: "string_contains";
1082
+ case_sensitive?: boolean | undefined;
1083
+ } | {
1084
+ type: "regex_match";
1085
+ pattern: string;
1086
+ } | {
1087
+ path: string;
1088
+ type: "file_exists";
1089
+ } | {
1090
+ type: "trajectory_contains_action";
1091
+ action: string;
1092
+ } | {
1093
+ type: "llm_judge";
1094
+ expected: string;
1095
+ prompt: string;
1096
+ model_type?: string | undefined;
1097
+ temperature?: number | undefined;
1098
+ json_schema?: Record<string, any> | undefined;
1099
+ capabilities?: string[] | undefined;
1100
+ } | {
1101
+ type: "execution_time";
1102
+ max_duration_ms: number;
1103
+ min_duration_ms?: number | undefined;
1104
+ target_duration_ms?: number | undefined;
1105
+ } | {
1106
+ type: "conversation_length";
1107
+ min_turns?: number | undefined;
1108
+ max_turns?: number | undefined;
1109
+ optimal_turns?: number | undefined;
1110
+ target_range?: number[] | undefined;
1111
+ } | {
1112
+ type: "conversation_flow";
1113
+ required_patterns: ("question_then_answer" | "problem_then_solution" | "clarification_cycle" | "empathy_then_solution" | "escalation_pattern")[];
1114
+ flow_quality_threshold: number;
1115
+ } | {
1116
+ type: "user_satisfaction";
1117
+ satisfaction_threshold: number;
1118
+ measurement_method: "llm_judge" | "sentiment_analysis" | "keyword_analysis";
1119
+ indicators?: {
1120
+ positive?: string[] | undefined;
1121
+ negative?: string[] | undefined;
1122
+ } | undefined;
1123
+ } | {
1124
+ type: "context_retention";
1125
+ test_memory_of: string[];
1126
+ retention_turns: number;
1127
+ memory_accuracy_threshold: number;
1128
+ })[];
1129
+ final_evaluations: ({
1130
+ value: string;
1131
+ type: "string_contains";
1132
+ case_sensitive?: boolean | undefined;
1133
+ } | {
1134
+ type: "regex_match";
1135
+ pattern: string;
1136
+ } | {
1137
+ path: string;
1138
+ type: "file_exists";
1139
+ } | {
1140
+ type: "trajectory_contains_action";
1141
+ action: string;
1142
+ } | {
1143
+ type: "llm_judge";
1144
+ expected: string;
1145
+ prompt: string;
1146
+ model_type?: string | undefined;
1147
+ temperature?: number | undefined;
1148
+ json_schema?: Record<string, any> | undefined;
1149
+ capabilities?: string[] | undefined;
1150
+ } | {
1151
+ type: "execution_time";
1152
+ max_duration_ms: number;
1153
+ min_duration_ms?: number | undefined;
1154
+ target_duration_ms?: number | undefined;
1155
+ } | {
1156
+ type: "conversation_length";
1157
+ min_turns?: number | undefined;
1158
+ max_turns?: number | undefined;
1159
+ optimal_turns?: number | undefined;
1160
+ target_range?: number[] | undefined;
1161
+ } | {
1162
+ type: "conversation_flow";
1163
+ required_patterns: ("question_then_answer" | "problem_then_solution" | "clarification_cycle" | "empathy_then_solution" | "escalation_pattern")[];
1164
+ flow_quality_threshold: number;
1165
+ } | {
1166
+ type: "user_satisfaction";
1167
+ satisfaction_threshold: number;
1168
+ measurement_method: "llm_judge" | "sentiment_analysis" | "keyword_analysis";
1169
+ indicators?: {
1170
+ positive?: string[] | undefined;
1171
+ negative?: string[] | undefined;
1172
+ } | undefined;
1173
+ } | {
1174
+ type: "context_retention";
1175
+ test_memory_of: string[];
1176
+ retention_turns: number;
1177
+ memory_accuracy_threshold: number;
1178
+ })[];
1179
+ debug_options: {
1180
+ log_user_simulation: boolean;
1181
+ log_turn_decisions: boolean;
1182
+ export_full_transcript: boolean;
1183
+ };
1184
+ }, {
1185
+ max_turns: number;
1186
+ user_simulator: {
1187
+ persona: string;
1188
+ objective: string;
1189
+ style?: string | undefined;
1190
+ model_type?: string | undefined;
1191
+ temperature?: number | undefined;
1192
+ max_tokens?: number | undefined;
1193
+ constraints?: string[] | undefined;
1194
+ emotional_state?: string | undefined;
1195
+ knowledge_level?: "beginner" | "intermediate" | "expert" | undefined;
1196
+ };
1197
+ timeout_per_turn_ms?: number | undefined;
1198
+ total_timeout_ms?: number | undefined;
1199
+ termination_conditions?: {
1200
+ type: "max_turns_reached" | "user_expresses_satisfaction" | "agent_provides_solution" | "conversation_stuck" | "escalation_needed" | "goal_achieved" | "custom_condition";
1201
+ description?: string | undefined;
1202
+ llm_judge?: {
1203
+ prompt: string;
1204
+ threshold?: number | undefined;
1205
+ } | undefined;
1206
+ keywords?: string[] | undefined;
1207
+ }[] | undefined;
1208
+ turn_evaluations?: ({
1209
+ value: string;
1210
+ type: "string_contains";
1211
+ case_sensitive?: boolean | undefined;
1212
+ } | {
1213
+ type: "regex_match";
1214
+ pattern: string;
1215
+ } | {
1216
+ path: string;
1217
+ type: "file_exists";
1218
+ } | {
1219
+ type: "trajectory_contains_action";
1220
+ action: string;
1221
+ } | {
1222
+ type: "llm_judge";
1223
+ expected: string;
1224
+ prompt: string;
1225
+ model_type?: string | undefined;
1226
+ temperature?: number | undefined;
1227
+ json_schema?: Record<string, any> | undefined;
1228
+ capabilities?: string[] | undefined;
1229
+ } | {
1230
+ type: "execution_time";
1231
+ max_duration_ms: number;
1232
+ min_duration_ms?: number | undefined;
1233
+ target_duration_ms?: number | undefined;
1234
+ } | {
1235
+ type: "conversation_length";
1236
+ min_turns?: number | undefined;
1237
+ max_turns?: number | undefined;
1238
+ optimal_turns?: number | undefined;
1239
+ target_range?: number[] | undefined;
1240
+ } | {
1241
+ type: "conversation_flow";
1242
+ required_patterns: ("question_then_answer" | "problem_then_solution" | "clarification_cycle" | "empathy_then_solution" | "escalation_pattern")[];
1243
+ flow_quality_threshold?: number | undefined;
1244
+ } | {
1245
+ type: "user_satisfaction";
1246
+ satisfaction_threshold?: number | undefined;
1247
+ indicators?: {
1248
+ positive?: string[] | undefined;
1249
+ negative?: string[] | undefined;
1250
+ } | undefined;
1251
+ measurement_method?: "llm_judge" | "sentiment_analysis" | "keyword_analysis" | undefined;
1252
+ } | {
1253
+ type: "context_retention";
1254
+ test_memory_of: string[];
1255
+ retention_turns?: number | undefined;
1256
+ memory_accuracy_threshold?: number | undefined;
1257
+ })[] | undefined;
1258
+ final_evaluations?: ({
1259
+ value: string;
1260
+ type: "string_contains";
1261
+ case_sensitive?: boolean | undefined;
1262
+ } | {
1263
+ type: "regex_match";
1264
+ pattern: string;
1265
+ } | {
1266
+ path: string;
1267
+ type: "file_exists";
1268
+ } | {
1269
+ type: "trajectory_contains_action";
1270
+ action: string;
1271
+ } | {
1272
+ type: "llm_judge";
1273
+ expected: string;
1274
+ prompt: string;
1275
+ model_type?: string | undefined;
1276
+ temperature?: number | undefined;
1277
+ json_schema?: Record<string, any> | undefined;
1278
+ capabilities?: string[] | undefined;
1279
+ } | {
1280
+ type: "execution_time";
1281
+ max_duration_ms: number;
1282
+ min_duration_ms?: number | undefined;
1283
+ target_duration_ms?: number | undefined;
1284
+ } | {
1285
+ type: "conversation_length";
1286
+ min_turns?: number | undefined;
1287
+ max_turns?: number | undefined;
1288
+ optimal_turns?: number | undefined;
1289
+ target_range?: number[] | undefined;
1290
+ } | {
1291
+ type: "conversation_flow";
1292
+ required_patterns: ("question_then_answer" | "problem_then_solution" | "clarification_cycle" | "empathy_then_solution" | "escalation_pattern")[];
1293
+ flow_quality_threshold?: number | undefined;
1294
+ } | {
1295
+ type: "user_satisfaction";
1296
+ satisfaction_threshold?: number | undefined;
1297
+ indicators?: {
1298
+ positive?: string[] | undefined;
1299
+ negative?: string[] | undefined;
1300
+ } | undefined;
1301
+ measurement_method?: "llm_judge" | "sentiment_analysis" | "keyword_analysis" | undefined;
1302
+ } | {
1303
+ type: "context_retention";
1304
+ test_memory_of: string[];
1305
+ retention_turns?: number | undefined;
1306
+ memory_accuracy_threshold?: number | undefined;
1307
+ })[] | undefined;
1308
+ debug_options?: {
1309
+ log_user_simulation?: boolean | undefined;
1310
+ log_turn_decisions?: boolean | undefined;
1311
+ export_full_transcript?: boolean | undefined;
1312
+ } | undefined;
1313
+ }>>;
1314
+ }, "strip", z.ZodTypeAny, {
1315
+ evaluations: ({
1316
+ value: string;
1317
+ type: "string_contains";
1318
+ case_sensitive?: boolean | undefined;
1319
+ } | {
1320
+ type: "regex_match";
1321
+ pattern: string;
1322
+ } | {
1323
+ path: string;
1324
+ type: "file_exists";
1325
+ } | {
1326
+ type: "trajectory_contains_action";
1327
+ action: string;
1328
+ } | {
1329
+ type: "llm_judge";
1330
+ expected: string;
1331
+ prompt: string;
1332
+ model_type?: string | undefined;
1333
+ temperature?: number | undefined;
1334
+ json_schema?: Record<string, any> | undefined;
1335
+ capabilities?: string[] | undefined;
1336
+ } | {
1337
+ type: "execution_time";
1338
+ max_duration_ms: number;
1339
+ min_duration_ms?: number | undefined;
1340
+ target_duration_ms?: number | undefined;
1341
+ } | {
1342
+ type: "conversation_length";
1343
+ min_turns?: number | undefined;
1344
+ max_turns?: number | undefined;
1345
+ optimal_turns?: number | undefined;
1346
+ target_range?: number[] | undefined;
1347
+ } | {
1348
+ type: "conversation_flow";
1349
+ required_patterns: ("question_then_answer" | "problem_then_solution" | "clarification_cycle" | "empathy_then_solution" | "escalation_pattern")[];
1350
+ flow_quality_threshold: number;
1351
+ } | {
1352
+ type: "user_satisfaction";
1353
+ satisfaction_threshold: number;
1354
+ measurement_method: "llm_judge" | "sentiment_analysis" | "keyword_analysis";
1355
+ indicators?: {
1356
+ positive?: string[] | undefined;
1357
+ negative?: string[] | undefined;
1358
+ } | undefined;
1359
+ } | {
1360
+ type: "context_retention";
1361
+ test_memory_of: string[];
1362
+ retention_turns: number;
1363
+ memory_accuracy_threshold: number;
1364
+ })[];
1365
+ name?: string | undefined;
1366
+ code?: string | undefined;
1367
+ input?: string | undefined;
1368
+ lang?: string | undefined;
1369
+ conversation?: {
1370
+ max_turns: number;
1371
+ timeout_per_turn_ms: number;
1372
+ total_timeout_ms: number;
1373
+ user_simulator: {
1374
+ model_type: string;
1375
+ temperature: number;
1376
+ max_tokens: number;
1377
+ persona: string;
1378
+ objective: string;
1379
+ constraints: string[];
1380
+ knowledge_level: "beginner" | "intermediate" | "expert";
1381
+ style?: string | undefined;
1382
+ emotional_state?: string | undefined;
1383
+ };
1384
+ termination_conditions: {
1385
+ type: "max_turns_reached" | "user_expresses_satisfaction" | "agent_provides_solution" | "conversation_stuck" | "escalation_needed" | "goal_achieved" | "custom_condition";
1386
+ description?: string | undefined;
1387
+ llm_judge?: {
1388
+ prompt: string;
1389
+ threshold: number;
1390
+ } | undefined;
1391
+ keywords?: string[] | undefined;
1392
+ }[];
1393
+ turn_evaluations: ({
1394
+ value: string;
1395
+ type: "string_contains";
1396
+ case_sensitive?: boolean | undefined;
1397
+ } | {
1398
+ type: "regex_match";
1399
+ pattern: string;
1400
+ } | {
1401
+ path: string;
1402
+ type: "file_exists";
1403
+ } | {
1404
+ type: "trajectory_contains_action";
1405
+ action: string;
1406
+ } | {
1407
+ type: "llm_judge";
1408
+ expected: string;
1409
+ prompt: string;
1410
+ model_type?: string | undefined;
1411
+ temperature?: number | undefined;
1412
+ json_schema?: Record<string, any> | undefined;
1413
+ capabilities?: string[] | undefined;
1414
+ } | {
1415
+ type: "execution_time";
1416
+ max_duration_ms: number;
1417
+ min_duration_ms?: number | undefined;
1418
+ target_duration_ms?: number | undefined;
1419
+ } | {
1420
+ type: "conversation_length";
1421
+ min_turns?: number | undefined;
1422
+ max_turns?: number | undefined;
1423
+ optimal_turns?: number | undefined;
1424
+ target_range?: number[] | undefined;
1425
+ } | {
1426
+ type: "conversation_flow";
1427
+ required_patterns: ("question_then_answer" | "problem_then_solution" | "clarification_cycle" | "empathy_then_solution" | "escalation_pattern")[];
1428
+ flow_quality_threshold: number;
1429
+ } | {
1430
+ type: "user_satisfaction";
1431
+ satisfaction_threshold: number;
1432
+ measurement_method: "llm_judge" | "sentiment_analysis" | "keyword_analysis";
1433
+ indicators?: {
1434
+ positive?: string[] | undefined;
1435
+ negative?: string[] | undefined;
1436
+ } | undefined;
1437
+ } | {
1438
+ type: "context_retention";
1439
+ test_memory_of: string[];
1440
+ retention_turns: number;
1441
+ memory_accuracy_threshold: number;
1442
+ })[];
1443
+ final_evaluations: ({
1444
+ value: string;
1445
+ type: "string_contains";
1446
+ case_sensitive?: boolean | undefined;
1447
+ } | {
1448
+ type: "regex_match";
1449
+ pattern: string;
1450
+ } | {
1451
+ path: string;
1452
+ type: "file_exists";
1453
+ } | {
1454
+ type: "trajectory_contains_action";
1455
+ action: string;
1456
+ } | {
1457
+ type: "llm_judge";
1458
+ expected: string;
1459
+ prompt: string;
1460
+ model_type?: string | undefined;
1461
+ temperature?: number | undefined;
1462
+ json_schema?: Record<string, any> | undefined;
1463
+ capabilities?: string[] | undefined;
1464
+ } | {
1465
+ type: "execution_time";
1466
+ max_duration_ms: number;
1467
+ min_duration_ms?: number | undefined;
1468
+ target_duration_ms?: number | undefined;
1469
+ } | {
1470
+ type: "conversation_length";
1471
+ min_turns?: number | undefined;
1472
+ max_turns?: number | undefined;
1473
+ optimal_turns?: number | undefined;
1474
+ target_range?: number[] | undefined;
1475
+ } | {
1476
+ type: "conversation_flow";
1477
+ required_patterns: ("question_then_answer" | "problem_then_solution" | "clarification_cycle" | "empathy_then_solution" | "escalation_pattern")[];
1478
+ flow_quality_threshold: number;
1479
+ } | {
1480
+ type: "user_satisfaction";
1481
+ satisfaction_threshold: number;
1482
+ measurement_method: "llm_judge" | "sentiment_analysis" | "keyword_analysis";
1483
+ indicators?: {
1484
+ positive?: string[] | undefined;
1485
+ negative?: string[] | undefined;
1486
+ } | undefined;
1487
+ } | {
1488
+ type: "context_retention";
1489
+ test_memory_of: string[];
1490
+ retention_turns: number;
1491
+ memory_accuracy_threshold: number;
1492
+ })[];
1493
+ debug_options: {
1494
+ log_user_simulation: boolean;
1495
+ log_turn_decisions: boolean;
1496
+ export_full_transcript: boolean;
1497
+ };
1498
+ } | undefined;
1499
+ }, {
1500
+ evaluations: ({
1501
+ value: string;
1502
+ type: "string_contains";
1503
+ case_sensitive?: boolean | undefined;
1504
+ } | {
1505
+ type: "regex_match";
1506
+ pattern: string;
1507
+ } | {
1508
+ path: string;
1509
+ type: "file_exists";
1510
+ } | {
1511
+ type: "trajectory_contains_action";
1512
+ action: string;
1513
+ } | {
1514
+ type: "llm_judge";
1515
+ expected: string;
1516
+ prompt: string;
1517
+ model_type?: string | undefined;
1518
+ temperature?: number | undefined;
1519
+ json_schema?: Record<string, any> | undefined;
1520
+ capabilities?: string[] | undefined;
1521
+ } | {
1522
+ type: "execution_time";
1523
+ max_duration_ms: number;
1524
+ min_duration_ms?: number | undefined;
1525
+ target_duration_ms?: number | undefined;
1526
+ } | {
1527
+ type: "conversation_length";
1528
+ min_turns?: number | undefined;
1529
+ max_turns?: number | undefined;
1530
+ optimal_turns?: number | undefined;
1531
+ target_range?: number[] | undefined;
1532
+ } | {
1533
+ type: "conversation_flow";
1534
+ required_patterns: ("question_then_answer" | "problem_then_solution" | "clarification_cycle" | "empathy_then_solution" | "escalation_pattern")[];
1535
+ flow_quality_threshold?: number | undefined;
1536
+ } | {
1537
+ type: "user_satisfaction";
1538
+ satisfaction_threshold?: number | undefined;
1539
+ indicators?: {
1540
+ positive?: string[] | undefined;
1541
+ negative?: string[] | undefined;
1542
+ } | undefined;
1543
+ measurement_method?: "llm_judge" | "sentiment_analysis" | "keyword_analysis" | undefined;
1544
+ } | {
1545
+ type: "context_retention";
1546
+ test_memory_of: string[];
1547
+ retention_turns?: number | undefined;
1548
+ memory_accuracy_threshold?: number | undefined;
1549
+ })[];
1550
+ name?: string | undefined;
1551
+ code?: string | undefined;
1552
+ input?: string | undefined;
1553
+ lang?: string | undefined;
1554
+ conversation?: {
1555
+ max_turns: number;
1556
+ user_simulator: {
1557
+ persona: string;
1558
+ objective: string;
1559
+ style?: string | undefined;
1560
+ model_type?: string | undefined;
1561
+ temperature?: number | undefined;
1562
+ max_tokens?: number | undefined;
1563
+ constraints?: string[] | undefined;
1564
+ emotional_state?: string | undefined;
1565
+ knowledge_level?: "beginner" | "intermediate" | "expert" | undefined;
1566
+ };
1567
+ timeout_per_turn_ms?: number | undefined;
1568
+ total_timeout_ms?: number | undefined;
1569
+ termination_conditions?: {
1570
+ type: "max_turns_reached" | "user_expresses_satisfaction" | "agent_provides_solution" | "conversation_stuck" | "escalation_needed" | "goal_achieved" | "custom_condition";
1571
+ description?: string | undefined;
1572
+ llm_judge?: {
1573
+ prompt: string;
1574
+ threshold?: number | undefined;
1575
+ } | undefined;
1576
+ keywords?: string[] | undefined;
1577
+ }[] | undefined;
1578
+ turn_evaluations?: ({
1579
+ value: string;
1580
+ type: "string_contains";
1581
+ case_sensitive?: boolean | undefined;
1582
+ } | {
1583
+ type: "regex_match";
1584
+ pattern: string;
1585
+ } | {
1586
+ path: string;
1587
+ type: "file_exists";
1588
+ } | {
1589
+ type: "trajectory_contains_action";
1590
+ action: string;
1591
+ } | {
1592
+ type: "llm_judge";
1593
+ expected: string;
1594
+ prompt: string;
1595
+ model_type?: string | undefined;
1596
+ temperature?: number | undefined;
1597
+ json_schema?: Record<string, any> | undefined;
1598
+ capabilities?: string[] | undefined;
1599
+ } | {
1600
+ type: "execution_time";
1601
+ max_duration_ms: number;
1602
+ min_duration_ms?: number | undefined;
1603
+ target_duration_ms?: number | undefined;
1604
+ } | {
1605
+ type: "conversation_length";
1606
+ min_turns?: number | undefined;
1607
+ max_turns?: number | undefined;
1608
+ optimal_turns?: number | undefined;
1609
+ target_range?: number[] | undefined;
1610
+ } | {
1611
+ type: "conversation_flow";
1612
+ required_patterns: ("question_then_answer" | "problem_then_solution" | "clarification_cycle" | "empathy_then_solution" | "escalation_pattern")[];
1613
+ flow_quality_threshold?: number | undefined;
1614
+ } | {
1615
+ type: "user_satisfaction";
1616
+ satisfaction_threshold?: number | undefined;
1617
+ indicators?: {
1618
+ positive?: string[] | undefined;
1619
+ negative?: string[] | undefined;
1620
+ } | undefined;
1621
+ measurement_method?: "llm_judge" | "sentiment_analysis" | "keyword_analysis" | undefined;
1622
+ } | {
1623
+ type: "context_retention";
1624
+ test_memory_of: string[];
1625
+ retention_turns?: number | undefined;
1626
+ memory_accuracy_threshold?: number | undefined;
1627
+ })[] | undefined;
1628
+ final_evaluations?: ({
1629
+ value: string;
1630
+ type: "string_contains";
1631
+ case_sensitive?: boolean | undefined;
1632
+ } | {
1633
+ type: "regex_match";
1634
+ pattern: string;
1635
+ } | {
1636
+ path: string;
1637
+ type: "file_exists";
1638
+ } | {
1639
+ type: "trajectory_contains_action";
1640
+ action: string;
1641
+ } | {
1642
+ type: "llm_judge";
1643
+ expected: string;
1644
+ prompt: string;
1645
+ model_type?: string | undefined;
1646
+ temperature?: number | undefined;
1647
+ json_schema?: Record<string, any> | undefined;
1648
+ capabilities?: string[] | undefined;
1649
+ } | {
1650
+ type: "execution_time";
1651
+ max_duration_ms: number;
1652
+ min_duration_ms?: number | undefined;
1653
+ target_duration_ms?: number | undefined;
1654
+ } | {
1655
+ type: "conversation_length";
1656
+ min_turns?: number | undefined;
1657
+ max_turns?: number | undefined;
1658
+ optimal_turns?: number | undefined;
1659
+ target_range?: number[] | undefined;
1660
+ } | {
1661
+ type: "conversation_flow";
1662
+ required_patterns: ("question_then_answer" | "problem_then_solution" | "clarification_cycle" | "empathy_then_solution" | "escalation_pattern")[];
1663
+ flow_quality_threshold?: number | undefined;
1664
+ } | {
1665
+ type: "user_satisfaction";
1666
+ satisfaction_threshold?: number | undefined;
1667
+ indicators?: {
1668
+ positive?: string[] | undefined;
1669
+ negative?: string[] | undefined;
1670
+ } | undefined;
1671
+ measurement_method?: "llm_judge" | "sentiment_analysis" | "keyword_analysis" | undefined;
1672
+ } | {
1673
+ type: "context_retention";
1674
+ test_memory_of: string[];
1675
+ retention_turns?: number | undefined;
1676
+ memory_accuracy_threshold?: number | undefined;
1677
+ })[] | undefined;
1678
+ debug_options?: {
1679
+ log_user_simulation?: boolean | undefined;
1680
+ log_turn_decisions?: boolean | undefined;
1681
+ export_full_transcript?: boolean | undefined;
1682
+ } | undefined;
1683
+ } | undefined;
1684
+ }>, "many">;
376
1685
  judgment: z.ZodObject<{
377
- strategy: z.ZodEnum<{
378
- all_pass: "all_pass";
379
- any_pass: "any_pass";
380
- }>;
381
- }, z.core.$strip>;
382
- }, z.core.$strip>;
1686
+ strategy: z.ZodEnum<["all_pass", "any_pass"]>;
1687
+ }, "strip", z.ZodTypeAny, {
1688
+ strategy: "all_pass" | "any_pass";
1689
+ }, {
1690
+ strategy: "all_pass" | "any_pass";
1691
+ }>;
1692
+ }, "strip", z.ZodTypeAny, {
1693
+ name: string;
1694
+ run: {
1695
+ evaluations: ({
1696
+ value: string;
1697
+ type: "string_contains";
1698
+ case_sensitive?: boolean | undefined;
1699
+ } | {
1700
+ type: "regex_match";
1701
+ pattern: string;
1702
+ } | {
1703
+ path: string;
1704
+ type: "file_exists";
1705
+ } | {
1706
+ type: "trajectory_contains_action";
1707
+ action: string;
1708
+ } | {
1709
+ type: "llm_judge";
1710
+ expected: string;
1711
+ prompt: string;
1712
+ model_type?: string | undefined;
1713
+ temperature?: number | undefined;
1714
+ json_schema?: Record<string, any> | undefined;
1715
+ capabilities?: string[] | undefined;
1716
+ } | {
1717
+ type: "execution_time";
1718
+ max_duration_ms: number;
1719
+ min_duration_ms?: number | undefined;
1720
+ target_duration_ms?: number | undefined;
1721
+ } | {
1722
+ type: "conversation_length";
1723
+ min_turns?: number | undefined;
1724
+ max_turns?: number | undefined;
1725
+ optimal_turns?: number | undefined;
1726
+ target_range?: number[] | undefined;
1727
+ } | {
1728
+ type: "conversation_flow";
1729
+ required_patterns: ("question_then_answer" | "problem_then_solution" | "clarification_cycle" | "empathy_then_solution" | "escalation_pattern")[];
1730
+ flow_quality_threshold: number;
1731
+ } | {
1732
+ type: "user_satisfaction";
1733
+ satisfaction_threshold: number;
1734
+ measurement_method: "llm_judge" | "sentiment_analysis" | "keyword_analysis";
1735
+ indicators?: {
1736
+ positive?: string[] | undefined;
1737
+ negative?: string[] | undefined;
1738
+ } | undefined;
1739
+ } | {
1740
+ type: "context_retention";
1741
+ test_memory_of: string[];
1742
+ retention_turns: number;
1743
+ memory_accuracy_threshold: number;
1744
+ })[];
1745
+ name?: string | undefined;
1746
+ code?: string | undefined;
1747
+ input?: string | undefined;
1748
+ lang?: string | undefined;
1749
+ conversation?: {
1750
+ max_turns: number;
1751
+ timeout_per_turn_ms: number;
1752
+ total_timeout_ms: number;
1753
+ user_simulator: {
1754
+ model_type: string;
1755
+ temperature: number;
1756
+ max_tokens: number;
1757
+ persona: string;
1758
+ objective: string;
1759
+ constraints: string[];
1760
+ knowledge_level: "beginner" | "intermediate" | "expert";
1761
+ style?: string | undefined;
1762
+ emotional_state?: string | undefined;
1763
+ };
1764
+ termination_conditions: {
1765
+ type: "max_turns_reached" | "user_expresses_satisfaction" | "agent_provides_solution" | "conversation_stuck" | "escalation_needed" | "goal_achieved" | "custom_condition";
1766
+ description?: string | undefined;
1767
+ llm_judge?: {
1768
+ prompt: string;
1769
+ threshold: number;
1770
+ } | undefined;
1771
+ keywords?: string[] | undefined;
1772
+ }[];
1773
+ turn_evaluations: ({
1774
+ value: string;
1775
+ type: "string_contains";
1776
+ case_sensitive?: boolean | undefined;
1777
+ } | {
1778
+ type: "regex_match";
1779
+ pattern: string;
1780
+ } | {
1781
+ path: string;
1782
+ type: "file_exists";
1783
+ } | {
1784
+ type: "trajectory_contains_action";
1785
+ action: string;
1786
+ } | {
1787
+ type: "llm_judge";
1788
+ expected: string;
1789
+ prompt: string;
1790
+ model_type?: string | undefined;
1791
+ temperature?: number | undefined;
1792
+ json_schema?: Record<string, any> | undefined;
1793
+ capabilities?: string[] | undefined;
1794
+ } | {
1795
+ type: "execution_time";
1796
+ max_duration_ms: number;
1797
+ min_duration_ms?: number | undefined;
1798
+ target_duration_ms?: number | undefined;
1799
+ } | {
1800
+ type: "conversation_length";
1801
+ min_turns?: number | undefined;
1802
+ max_turns?: number | undefined;
1803
+ optimal_turns?: number | undefined;
1804
+ target_range?: number[] | undefined;
1805
+ } | {
1806
+ type: "conversation_flow";
1807
+ required_patterns: ("question_then_answer" | "problem_then_solution" | "clarification_cycle" | "empathy_then_solution" | "escalation_pattern")[];
1808
+ flow_quality_threshold: number;
1809
+ } | {
1810
+ type: "user_satisfaction";
1811
+ satisfaction_threshold: number;
1812
+ measurement_method: "llm_judge" | "sentiment_analysis" | "keyword_analysis";
1813
+ indicators?: {
1814
+ positive?: string[] | undefined;
1815
+ negative?: string[] | undefined;
1816
+ } | undefined;
1817
+ } | {
1818
+ type: "context_retention";
1819
+ test_memory_of: string[];
1820
+ retention_turns: number;
1821
+ memory_accuracy_threshold: number;
1822
+ })[];
1823
+ final_evaluations: ({
1824
+ value: string;
1825
+ type: "string_contains";
1826
+ case_sensitive?: boolean | undefined;
1827
+ } | {
1828
+ type: "regex_match";
1829
+ pattern: string;
1830
+ } | {
1831
+ path: string;
1832
+ type: "file_exists";
1833
+ } | {
1834
+ type: "trajectory_contains_action";
1835
+ action: string;
1836
+ } | {
1837
+ type: "llm_judge";
1838
+ expected: string;
1839
+ prompt: string;
1840
+ model_type?: string | undefined;
1841
+ temperature?: number | undefined;
1842
+ json_schema?: Record<string, any> | undefined;
1843
+ capabilities?: string[] | undefined;
1844
+ } | {
1845
+ type: "execution_time";
1846
+ max_duration_ms: number;
1847
+ min_duration_ms?: number | undefined;
1848
+ target_duration_ms?: number | undefined;
1849
+ } | {
1850
+ type: "conversation_length";
1851
+ min_turns?: number | undefined;
1852
+ max_turns?: number | undefined;
1853
+ optimal_turns?: number | undefined;
1854
+ target_range?: number[] | undefined;
1855
+ } | {
1856
+ type: "conversation_flow";
1857
+ required_patterns: ("question_then_answer" | "problem_then_solution" | "clarification_cycle" | "empathy_then_solution" | "escalation_pattern")[];
1858
+ flow_quality_threshold: number;
1859
+ } | {
1860
+ type: "user_satisfaction";
1861
+ satisfaction_threshold: number;
1862
+ measurement_method: "llm_judge" | "sentiment_analysis" | "keyword_analysis";
1863
+ indicators?: {
1864
+ positive?: string[] | undefined;
1865
+ negative?: string[] | undefined;
1866
+ } | undefined;
1867
+ } | {
1868
+ type: "context_retention";
1869
+ test_memory_of: string[];
1870
+ retention_turns: number;
1871
+ memory_accuracy_threshold: number;
1872
+ })[];
1873
+ debug_options: {
1874
+ log_user_simulation: boolean;
1875
+ log_turn_decisions: boolean;
1876
+ export_full_transcript: boolean;
1877
+ };
1878
+ } | undefined;
1879
+ }[];
1880
+ description: string;
1881
+ environment: {
1882
+ type: "local" | "e2b";
1883
+ };
1884
+ judgment: {
1885
+ strategy: "all_pass" | "any_pass";
1886
+ };
1887
+ plugins?: (string | {
1888
+ name: string;
1889
+ enabled: boolean;
1890
+ version?: string | undefined;
1891
+ config?: Record<string, any> | undefined;
1892
+ })[] | undefined;
1893
+ setup?: {
1894
+ mocks?: {
1895
+ method: string;
1896
+ error?: {
1897
+ message: string;
1898
+ code: string;
1899
+ status?: number | undefined;
1900
+ } | undefined;
1901
+ metadata?: {
1902
+ delay?: number | undefined;
1903
+ probability?: number | undefined;
1904
+ } | undefined;
1905
+ service?: string | undefined;
1906
+ when?: {
1907
+ context?: Record<string, any> | undefined;
1908
+ args?: any[] | undefined;
1909
+ input?: Record<string, any> | undefined;
1910
+ matcher?: string | undefined;
1911
+ partialArgs?: any[] | undefined;
1912
+ } | undefined;
1913
+ response?: any;
1914
+ responseFn?: string | undefined;
1915
+ }[] | undefined;
1916
+ virtual_fs?: Record<string, string> | undefined;
1917
+ } | undefined;
1918
+ }, {
1919
+ name: string;
1920
+ run: {
1921
+ evaluations: ({
1922
+ value: string;
1923
+ type: "string_contains";
1924
+ case_sensitive?: boolean | undefined;
1925
+ } | {
1926
+ type: "regex_match";
1927
+ pattern: string;
1928
+ } | {
1929
+ path: string;
1930
+ type: "file_exists";
1931
+ } | {
1932
+ type: "trajectory_contains_action";
1933
+ action: string;
1934
+ } | {
1935
+ type: "llm_judge";
1936
+ expected: string;
1937
+ prompt: string;
1938
+ model_type?: string | undefined;
1939
+ temperature?: number | undefined;
1940
+ json_schema?: Record<string, any> | undefined;
1941
+ capabilities?: string[] | undefined;
1942
+ } | {
1943
+ type: "execution_time";
1944
+ max_duration_ms: number;
1945
+ min_duration_ms?: number | undefined;
1946
+ target_duration_ms?: number | undefined;
1947
+ } | {
1948
+ type: "conversation_length";
1949
+ min_turns?: number | undefined;
1950
+ max_turns?: number | undefined;
1951
+ optimal_turns?: number | undefined;
1952
+ target_range?: number[] | undefined;
1953
+ } | {
1954
+ type: "conversation_flow";
1955
+ required_patterns: ("question_then_answer" | "problem_then_solution" | "clarification_cycle" | "empathy_then_solution" | "escalation_pattern")[];
1956
+ flow_quality_threshold?: number | undefined;
1957
+ } | {
1958
+ type: "user_satisfaction";
1959
+ satisfaction_threshold?: number | undefined;
1960
+ indicators?: {
1961
+ positive?: string[] | undefined;
1962
+ negative?: string[] | undefined;
1963
+ } | undefined;
1964
+ measurement_method?: "llm_judge" | "sentiment_analysis" | "keyword_analysis" | undefined;
1965
+ } | {
1966
+ type: "context_retention";
1967
+ test_memory_of: string[];
1968
+ retention_turns?: number | undefined;
1969
+ memory_accuracy_threshold?: number | undefined;
1970
+ })[];
1971
+ name?: string | undefined;
1972
+ code?: string | undefined;
1973
+ input?: string | undefined;
1974
+ lang?: string | undefined;
1975
+ conversation?: {
1976
+ max_turns: number;
1977
+ user_simulator: {
1978
+ persona: string;
1979
+ objective: string;
1980
+ style?: string | undefined;
1981
+ model_type?: string | undefined;
1982
+ temperature?: number | undefined;
1983
+ max_tokens?: number | undefined;
1984
+ constraints?: string[] | undefined;
1985
+ emotional_state?: string | undefined;
1986
+ knowledge_level?: "beginner" | "intermediate" | "expert" | undefined;
1987
+ };
1988
+ timeout_per_turn_ms?: number | undefined;
1989
+ total_timeout_ms?: number | undefined;
1990
+ termination_conditions?: {
1991
+ type: "max_turns_reached" | "user_expresses_satisfaction" | "agent_provides_solution" | "conversation_stuck" | "escalation_needed" | "goal_achieved" | "custom_condition";
1992
+ description?: string | undefined;
1993
+ llm_judge?: {
1994
+ prompt: string;
1995
+ threshold?: number | undefined;
1996
+ } | undefined;
1997
+ keywords?: string[] | undefined;
1998
+ }[] | undefined;
1999
+ turn_evaluations?: ({
2000
+ value: string;
2001
+ type: "string_contains";
2002
+ case_sensitive?: boolean | undefined;
2003
+ } | {
2004
+ type: "regex_match";
2005
+ pattern: string;
2006
+ } | {
2007
+ path: string;
2008
+ type: "file_exists";
2009
+ } | {
2010
+ type: "trajectory_contains_action";
2011
+ action: string;
2012
+ } | {
2013
+ type: "llm_judge";
2014
+ expected: string;
2015
+ prompt: string;
2016
+ model_type?: string | undefined;
2017
+ temperature?: number | undefined;
2018
+ json_schema?: Record<string, any> | undefined;
2019
+ capabilities?: string[] | undefined;
2020
+ } | {
2021
+ type: "execution_time";
2022
+ max_duration_ms: number;
2023
+ min_duration_ms?: number | undefined;
2024
+ target_duration_ms?: number | undefined;
2025
+ } | {
2026
+ type: "conversation_length";
2027
+ min_turns?: number | undefined;
2028
+ max_turns?: number | undefined;
2029
+ optimal_turns?: number | undefined;
2030
+ target_range?: number[] | undefined;
2031
+ } | {
2032
+ type: "conversation_flow";
2033
+ required_patterns: ("question_then_answer" | "problem_then_solution" | "clarification_cycle" | "empathy_then_solution" | "escalation_pattern")[];
2034
+ flow_quality_threshold?: number | undefined;
2035
+ } | {
2036
+ type: "user_satisfaction";
2037
+ satisfaction_threshold?: number | undefined;
2038
+ indicators?: {
2039
+ positive?: string[] | undefined;
2040
+ negative?: string[] | undefined;
2041
+ } | undefined;
2042
+ measurement_method?: "llm_judge" | "sentiment_analysis" | "keyword_analysis" | undefined;
2043
+ } | {
2044
+ type: "context_retention";
2045
+ test_memory_of: string[];
2046
+ retention_turns?: number | undefined;
2047
+ memory_accuracy_threshold?: number | undefined;
2048
+ })[] | undefined;
2049
+ final_evaluations?: ({
2050
+ value: string;
2051
+ type: "string_contains";
2052
+ case_sensitive?: boolean | undefined;
2053
+ } | {
2054
+ type: "regex_match";
2055
+ pattern: string;
2056
+ } | {
2057
+ path: string;
2058
+ type: "file_exists";
2059
+ } | {
2060
+ type: "trajectory_contains_action";
2061
+ action: string;
2062
+ } | {
2063
+ type: "llm_judge";
2064
+ expected: string;
2065
+ prompt: string;
2066
+ model_type?: string | undefined;
2067
+ temperature?: number | undefined;
2068
+ json_schema?: Record<string, any> | undefined;
2069
+ capabilities?: string[] | undefined;
2070
+ } | {
2071
+ type: "execution_time";
2072
+ max_duration_ms: number;
2073
+ min_duration_ms?: number | undefined;
2074
+ target_duration_ms?: number | undefined;
2075
+ } | {
2076
+ type: "conversation_length";
2077
+ min_turns?: number | undefined;
2078
+ max_turns?: number | undefined;
2079
+ optimal_turns?: number | undefined;
2080
+ target_range?: number[] | undefined;
2081
+ } | {
2082
+ type: "conversation_flow";
2083
+ required_patterns: ("question_then_answer" | "problem_then_solution" | "clarification_cycle" | "empathy_then_solution" | "escalation_pattern")[];
2084
+ flow_quality_threshold?: number | undefined;
2085
+ } | {
2086
+ type: "user_satisfaction";
2087
+ satisfaction_threshold?: number | undefined;
2088
+ indicators?: {
2089
+ positive?: string[] | undefined;
2090
+ negative?: string[] | undefined;
2091
+ } | undefined;
2092
+ measurement_method?: "llm_judge" | "sentiment_analysis" | "keyword_analysis" | undefined;
2093
+ } | {
2094
+ type: "context_retention";
2095
+ test_memory_of: string[];
2096
+ retention_turns?: number | undefined;
2097
+ memory_accuracy_threshold?: number | undefined;
2098
+ })[] | undefined;
2099
+ debug_options?: {
2100
+ log_user_simulation?: boolean | undefined;
2101
+ log_turn_decisions?: boolean | undefined;
2102
+ export_full_transcript?: boolean | undefined;
2103
+ } | undefined;
2104
+ } | undefined;
2105
+ }[];
2106
+ description: string;
2107
+ environment: {
2108
+ type: "local" | "e2b";
2109
+ };
2110
+ judgment: {
2111
+ strategy: "all_pass" | "any_pass";
2112
+ };
2113
+ plugins?: (string | {
2114
+ name: string;
2115
+ enabled?: boolean | undefined;
2116
+ version?: string | undefined;
2117
+ config?: Record<string, any> | undefined;
2118
+ })[] | undefined;
2119
+ setup?: {
2120
+ mocks?: {
2121
+ method: string;
2122
+ error?: {
2123
+ message: string;
2124
+ code: string;
2125
+ status?: number | undefined;
2126
+ } | undefined;
2127
+ metadata?: {
2128
+ delay?: number | undefined;
2129
+ probability?: number | undefined;
2130
+ } | undefined;
2131
+ service?: string | undefined;
2132
+ when?: {
2133
+ context?: Record<string, any> | undefined;
2134
+ args?: any[] | undefined;
2135
+ input?: Record<string, any> | undefined;
2136
+ matcher?: string | undefined;
2137
+ partialArgs?: any[] | undefined;
2138
+ } | undefined;
2139
+ response?: any;
2140
+ responseFn?: string | undefined;
2141
+ }[] | undefined;
2142
+ virtual_fs?: Record<string, string> | undefined;
2143
+ } | undefined;
2144
+ }>;
383
2145
  export type Scenario = z.infer<typeof ScenarioSchema>;
384
2146
  export type Evaluation = z.infer<typeof EvaluationSchema>;
385
2147
  export type PluginConfig = z.infer<typeof PluginConfigSchema>;
@@ -442,24 +2204,94 @@ export declare const ScenarioRunResultSchema: z.ZodObject<{
442
2204
  execution_time_seconds: z.ZodNumber;
443
2205
  llm_calls: z.ZodNumber;
444
2206
  total_tokens: z.ZodNumber;
445
- }, z.core.$catchall<z.ZodNumber>>;
2207
+ }, "strip", z.ZodNumber, z.objectOutputType<{
2208
+ execution_time_seconds: z.ZodNumber;
2209
+ llm_calls: z.ZodNumber;
2210
+ total_tokens: z.ZodNumber;
2211
+ }, z.ZodNumber, "strip">, z.objectInputType<{
2212
+ execution_time_seconds: z.ZodNumber;
2213
+ llm_calls: z.ZodNumber;
2214
+ total_tokens: z.ZodNumber;
2215
+ }, z.ZodNumber, "strip">>;
446
2216
  final_agent_response: z.ZodOptional<z.ZodString>;
447
2217
  evaluations: z.ZodArray<z.ZodObject<{
448
2218
  evaluator_type: z.ZodString;
449
2219
  success: z.ZodBoolean;
450
2220
  summary: z.ZodString;
451
2221
  details: z.ZodRecord<z.ZodString, z.ZodAny>;
452
- }, z.core.$strip>>;
2222
+ }, "strip", z.ZodTypeAny, {
2223
+ success: boolean;
2224
+ evaluator_type: string;
2225
+ summary: string;
2226
+ details: Record<string, any>;
2227
+ }, {
2228
+ success: boolean;
2229
+ evaluator_type: string;
2230
+ summary: string;
2231
+ details: Record<string, any>;
2232
+ }>, "many">;
453
2233
  trajectory: z.ZodArray<z.ZodObject<{
454
- type: z.ZodEnum<{
455
- action: "action";
456
- thought: "thought";
457
- observation: "observation";
458
- }>;
459
- timestamp: z.ZodString;
2234
+ type: z.ZodEnum<["thought", "action", "observation"]>;
2235
+ timestamp: z.ZodEffects<z.ZodString, string, string>;
460
2236
  content: z.ZodAny;
461
- }, z.core.$strip>>;
2237
+ }, "strip", z.ZodTypeAny, {
2238
+ type: "action" | "thought" | "observation";
2239
+ timestamp: string;
2240
+ content?: any;
2241
+ }, {
2242
+ type: "action" | "thought" | "observation";
2243
+ timestamp: string;
2244
+ content?: any;
2245
+ }>, "many">;
462
2246
  error: z.ZodNullable<z.ZodString>;
463
- }, z.core.$strip>;
2247
+ }, "strip", z.ZodTypeAny, {
2248
+ error: string | null;
2249
+ evaluations: {
2250
+ success: boolean;
2251
+ evaluator_type: string;
2252
+ summary: string;
2253
+ details: Record<string, any>;
2254
+ }[];
2255
+ run_id: string;
2256
+ matrix_combination_id: string;
2257
+ parameters: Record<string, any>;
2258
+ metrics: {
2259
+ execution_time_seconds: number;
2260
+ llm_calls: number;
2261
+ total_tokens: number;
2262
+ } & {
2263
+ [k: string]: number;
2264
+ };
2265
+ trajectory: {
2266
+ type: "action" | "thought" | "observation";
2267
+ timestamp: string;
2268
+ content?: any;
2269
+ }[];
2270
+ final_agent_response?: string | undefined;
2271
+ }, {
2272
+ error: string | null;
2273
+ evaluations: {
2274
+ success: boolean;
2275
+ evaluator_type: string;
2276
+ summary: string;
2277
+ details: Record<string, any>;
2278
+ }[];
2279
+ run_id: string;
2280
+ matrix_combination_id: string;
2281
+ parameters: Record<string, any>;
2282
+ metrics: {
2283
+ execution_time_seconds: number;
2284
+ llm_calls: number;
2285
+ total_tokens: number;
2286
+ } & {
2287
+ [k: string]: number;
2288
+ };
2289
+ trajectory: {
2290
+ type: "action" | "thought" | "observation";
2291
+ timestamp: string;
2292
+ content?: any;
2293
+ }[];
2294
+ final_agent_response?: string | undefined;
2295
+ }>;
464
2296
  export {};
465
2297
  //# sourceMappingURL=schema.d.ts.map