@elizaos/cli 1.5.8-alpha.6 → 1.5.8-alpha.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. package/dist/commands/dev/actions/dev-server.d.ts.map +1 -1
  2. package/dist/commands/dev/types.d.ts +4 -0
  3. package/dist/commands/dev/types.d.ts.map +1 -1
  4. package/dist/commands/dev/utils/file-watcher.d.ts.map +1 -1
  5. package/dist/commands/scenario/index.d.ts.map +1 -1
  6. package/dist/commands/scenario/src/ConversationEvaluators.d.ts +36 -0
  7. package/dist/commands/scenario/src/ConversationEvaluators.d.ts.map +1 -0
  8. package/dist/commands/scenario/src/ConversationManager.d.ts +107 -0
  9. package/dist/commands/scenario/src/ConversationManager.d.ts.map +1 -0
  10. package/dist/commands/scenario/src/EnhancedEvaluationEngine.d.ts.map +1 -1
  11. package/dist/commands/scenario/src/EvaluationEngine.d.ts.map +1 -1
  12. package/dist/commands/scenario/src/LocalEnvironmentProvider.d.ts +2 -0
  13. package/dist/commands/scenario/src/LocalEnvironmentProvider.d.ts.map +1 -1
  14. package/dist/commands/scenario/src/TrajectoryReconstructor.d.ts.map +1 -1
  15. package/dist/commands/scenario/src/UserSimulator.d.ts +44 -0
  16. package/dist/commands/scenario/src/UserSimulator.d.ts.map +1 -0
  17. package/dist/commands/scenario/src/conversation-types.d.ts +138 -0
  18. package/dist/commands/scenario/src/conversation-types.d.ts.map +1 -0
  19. package/dist/commands/scenario/src/runtime-factory.d.ts +9 -3
  20. package/dist/commands/scenario/src/runtime-factory.d.ts.map +1 -1
  21. package/dist/commands/scenario/src/schema.d.ts +1464 -0
  22. package/dist/commands/scenario/src/schema.d.ts.map +1 -1
  23. package/dist/index.js +4253 -2979
  24. package/dist/index.js.map +20 -17
  25. package/dist/templates/plugin-quick-starter/build.ts +3 -3
  26. package/dist/templates/plugin-quick-starter/package.json +2 -2
  27. package/dist/templates/plugin-starter/build.ts +3 -3
  28. package/dist/templates/plugin-starter/package.json +2 -2
  29. package/dist/templates/project-starter/build.ts +3 -3
  30. package/dist/templates/project-starter/package.json +6 -6
  31. package/dist/templates/project-tee-starter/build.ts +3 -3
  32. package/dist/templates/project-tee-starter/package.json +4 -4
  33. package/dist/version.d.ts +2 -2
  34. package/dist/version.js +2 -2
  35. package/package.json +7 -7
  36. package/templates/plugin-quick-starter/build.ts +3 -3
  37. package/templates/plugin-quick-starter/package.json +2 -2
  38. package/templates/plugin-starter/build.ts +3 -3
  39. package/templates/plugin-starter/package.json +2 -2
  40. package/templates/project-starter/build.ts +3 -3
  41. package/templates/project-starter/package.json +6 -6
  42. package/templates/project-tee-starter/build.ts +3 -3
  43. package/templates/project-tee-starter/package.json +4 -4
@@ -163,6 +163,89 @@ export declare const EvaluationSchema: z.ZodDiscriminatedUnion<"type", [z.ZodObj
163
163
  max_duration_ms: number;
164
164
  min_duration_ms?: number | undefined;
165
165
  target_duration_ms?: number | undefined;
166
+ }>, z.ZodObject<z.objectUtil.extendShape<{
167
+ type: z.ZodString;
168
+ }, {
169
+ type: z.ZodLiteral<"conversation_length">;
170
+ min_turns: z.ZodOptional<z.ZodNumber>;
171
+ max_turns: z.ZodOptional<z.ZodNumber>;
172
+ optimal_turns: z.ZodOptional<z.ZodNumber>;
173
+ target_range: z.ZodEffects<z.ZodOptional<z.ZodArray<z.ZodNumber, "many">>, number[] | undefined, number[] | undefined>;
174
+ }>, "strip", z.ZodTypeAny, {
175
+ type: "conversation_length";
176
+ min_turns?: number | undefined;
177
+ max_turns?: number | undefined;
178
+ optimal_turns?: number | undefined;
179
+ target_range?: number[] | undefined;
180
+ }, {
181
+ type: "conversation_length";
182
+ min_turns?: number | undefined;
183
+ max_turns?: number | undefined;
184
+ optimal_turns?: number | undefined;
185
+ target_range?: number[] | undefined;
186
+ }>, z.ZodObject<z.objectUtil.extendShape<{
187
+ type: z.ZodString;
188
+ }, {
189
+ type: z.ZodLiteral<"conversation_flow">;
190
+ required_patterns: z.ZodArray<z.ZodEnum<["question_then_answer", "problem_then_solution", "clarification_cycle", "empathy_then_solution", "escalation_pattern"]>, "many">;
191
+ flow_quality_threshold: z.ZodDefault<z.ZodOptional<z.ZodNumber>>;
192
+ }>, "strip", z.ZodTypeAny, {
193
+ type: "conversation_flow";
194
+ required_patterns: ("question_then_answer" | "problem_then_solution" | "clarification_cycle" | "empathy_then_solution" | "escalation_pattern")[];
195
+ flow_quality_threshold: number;
196
+ }, {
197
+ type: "conversation_flow";
198
+ required_patterns: ("question_then_answer" | "problem_then_solution" | "clarification_cycle" | "empathy_then_solution" | "escalation_pattern")[];
199
+ flow_quality_threshold?: number | undefined;
200
+ }>, z.ZodObject<z.objectUtil.extendShape<{
201
+ type: z.ZodString;
202
+ }, {
203
+ type: z.ZodLiteral<"user_satisfaction">;
204
+ satisfaction_threshold: z.ZodDefault<z.ZodOptional<z.ZodNumber>>;
205
+ indicators: z.ZodOptional<z.ZodObject<{
206
+ positive: z.ZodOptional<z.ZodArray<z.ZodString, "many">>;
207
+ negative: z.ZodOptional<z.ZodArray<z.ZodString, "many">>;
208
+ }, "strip", z.ZodTypeAny, {
209
+ positive?: string[] | undefined;
210
+ negative?: string[] | undefined;
211
+ }, {
212
+ positive?: string[] | undefined;
213
+ negative?: string[] | undefined;
214
+ }>>;
215
+ measurement_method: z.ZodDefault<z.ZodOptional<z.ZodEnum<["sentiment_analysis", "keyword_analysis", "llm_judge"]>>>;
216
+ }>, "strip", z.ZodTypeAny, {
217
+ type: "user_satisfaction";
218
+ satisfaction_threshold: number;
219
+ measurement_method: "llm_judge" | "sentiment_analysis" | "keyword_analysis";
220
+ indicators?: {
221
+ positive?: string[] | undefined;
222
+ negative?: string[] | undefined;
223
+ } | undefined;
224
+ }, {
225
+ type: "user_satisfaction";
226
+ satisfaction_threshold?: number | undefined;
227
+ indicators?: {
228
+ positive?: string[] | undefined;
229
+ negative?: string[] | undefined;
230
+ } | undefined;
231
+ measurement_method?: "llm_judge" | "sentiment_analysis" | "keyword_analysis" | undefined;
232
+ }>, z.ZodObject<z.objectUtil.extendShape<{
233
+ type: z.ZodString;
234
+ }, {
235
+ type: z.ZodLiteral<"context_retention">;
236
+ test_memory_of: z.ZodArray<z.ZodString, "many">;
237
+ retention_turns: z.ZodDefault<z.ZodOptional<z.ZodNumber>>;
238
+ memory_accuracy_threshold: z.ZodDefault<z.ZodOptional<z.ZodNumber>>;
239
+ }>, "strip", z.ZodTypeAny, {
240
+ type: "context_retention";
241
+ test_memory_of: string[];
242
+ retention_turns: number;
243
+ memory_accuracy_threshold: number;
244
+ }, {
245
+ type: "context_retention";
246
+ test_memory_of: string[];
247
+ retention_turns?: number | undefined;
248
+ memory_accuracy_threshold?: number | undefined;
166
249
  }>]>;
167
250
  declare const PluginConfigSchema: z.ZodObject<{
168
251
  name: z.ZodString;
@@ -458,7 +541,776 @@ export declare const ScenarioSchema: z.ZodObject<{
458
541
  max_duration_ms: number;
459
542
  min_duration_ms?: number | undefined;
460
543
  target_duration_ms?: number | undefined;
544
+ }>, z.ZodObject<z.objectUtil.extendShape<{
545
+ type: z.ZodString;
546
+ }, {
547
+ type: z.ZodLiteral<"conversation_length">;
548
+ min_turns: z.ZodOptional<z.ZodNumber>;
549
+ max_turns: z.ZodOptional<z.ZodNumber>;
550
+ optimal_turns: z.ZodOptional<z.ZodNumber>;
551
+ target_range: z.ZodEffects<z.ZodOptional<z.ZodArray<z.ZodNumber, "many">>, number[] | undefined, number[] | undefined>;
552
+ }>, "strip", z.ZodTypeAny, {
553
+ type: "conversation_length";
554
+ min_turns?: number | undefined;
555
+ max_turns?: number | undefined;
556
+ optimal_turns?: number | undefined;
557
+ target_range?: number[] | undefined;
558
+ }, {
559
+ type: "conversation_length";
560
+ min_turns?: number | undefined;
561
+ max_turns?: number | undefined;
562
+ optimal_turns?: number | undefined;
563
+ target_range?: number[] | undefined;
564
+ }>, z.ZodObject<z.objectUtil.extendShape<{
565
+ type: z.ZodString;
566
+ }, {
567
+ type: z.ZodLiteral<"conversation_flow">;
568
+ required_patterns: z.ZodArray<z.ZodEnum<["question_then_answer", "problem_then_solution", "clarification_cycle", "empathy_then_solution", "escalation_pattern"]>, "many">;
569
+ flow_quality_threshold: z.ZodDefault<z.ZodOptional<z.ZodNumber>>;
570
+ }>, "strip", z.ZodTypeAny, {
571
+ type: "conversation_flow";
572
+ required_patterns: ("question_then_answer" | "problem_then_solution" | "clarification_cycle" | "empathy_then_solution" | "escalation_pattern")[];
573
+ flow_quality_threshold: number;
574
+ }, {
575
+ type: "conversation_flow";
576
+ required_patterns: ("question_then_answer" | "problem_then_solution" | "clarification_cycle" | "empathy_then_solution" | "escalation_pattern")[];
577
+ flow_quality_threshold?: number | undefined;
578
+ }>, z.ZodObject<z.objectUtil.extendShape<{
579
+ type: z.ZodString;
580
+ }, {
581
+ type: z.ZodLiteral<"user_satisfaction">;
582
+ satisfaction_threshold: z.ZodDefault<z.ZodOptional<z.ZodNumber>>;
583
+ indicators: z.ZodOptional<z.ZodObject<{
584
+ positive: z.ZodOptional<z.ZodArray<z.ZodString, "many">>;
585
+ negative: z.ZodOptional<z.ZodArray<z.ZodString, "many">>;
586
+ }, "strip", z.ZodTypeAny, {
587
+ positive?: string[] | undefined;
588
+ negative?: string[] | undefined;
589
+ }, {
590
+ positive?: string[] | undefined;
591
+ negative?: string[] | undefined;
592
+ }>>;
593
+ measurement_method: z.ZodDefault<z.ZodOptional<z.ZodEnum<["sentiment_analysis", "keyword_analysis", "llm_judge"]>>>;
594
+ }>, "strip", z.ZodTypeAny, {
595
+ type: "user_satisfaction";
596
+ satisfaction_threshold: number;
597
+ measurement_method: "llm_judge" | "sentiment_analysis" | "keyword_analysis";
598
+ indicators?: {
599
+ positive?: string[] | undefined;
600
+ negative?: string[] | undefined;
601
+ } | undefined;
602
+ }, {
603
+ type: "user_satisfaction";
604
+ satisfaction_threshold?: number | undefined;
605
+ indicators?: {
606
+ positive?: string[] | undefined;
607
+ negative?: string[] | undefined;
608
+ } | undefined;
609
+ measurement_method?: "llm_judge" | "sentiment_analysis" | "keyword_analysis" | undefined;
610
+ }>, z.ZodObject<z.objectUtil.extendShape<{
611
+ type: z.ZodString;
612
+ }, {
613
+ type: z.ZodLiteral<"context_retention">;
614
+ test_memory_of: z.ZodArray<z.ZodString, "many">;
615
+ retention_turns: z.ZodDefault<z.ZodOptional<z.ZodNumber>>;
616
+ memory_accuracy_threshold: z.ZodDefault<z.ZodOptional<z.ZodNumber>>;
617
+ }>, "strip", z.ZodTypeAny, {
618
+ type: "context_retention";
619
+ test_memory_of: string[];
620
+ retention_turns: number;
621
+ memory_accuracy_threshold: number;
622
+ }, {
623
+ type: "context_retention";
624
+ test_memory_of: string[];
625
+ retention_turns?: number | undefined;
626
+ memory_accuracy_threshold?: number | undefined;
461
627
  }>]>, "many">;
628
+ conversation: z.ZodOptional<z.ZodObject<{
629
+ max_turns: z.ZodNumber;
630
+ timeout_per_turn_ms: z.ZodDefault<z.ZodOptional<z.ZodNumber>>;
631
+ total_timeout_ms: z.ZodDefault<z.ZodOptional<z.ZodNumber>>;
632
+ user_simulator: z.ZodObject<{
633
+ model_type: z.ZodDefault<z.ZodOptional<z.ZodString>>;
634
+ temperature: z.ZodDefault<z.ZodOptional<z.ZodNumber>>;
635
+ max_tokens: z.ZodDefault<z.ZodOptional<z.ZodNumber>>;
636
+ persona: z.ZodString;
637
+ objective: z.ZodString;
638
+ style: z.ZodOptional<z.ZodString>;
639
+ constraints: z.ZodDefault<z.ZodOptional<z.ZodArray<z.ZodString, "many">>>;
640
+ emotional_state: z.ZodOptional<z.ZodString>;
641
+ knowledge_level: z.ZodDefault<z.ZodOptional<z.ZodEnum<["beginner", "intermediate", "expert"]>>>;
642
+ }, "strip", z.ZodTypeAny, {
643
+ model_type: string;
644
+ temperature: number;
645
+ max_tokens: number;
646
+ persona: string;
647
+ objective: string;
648
+ constraints: string[];
649
+ knowledge_level: "beginner" | "intermediate" | "expert";
650
+ style?: string | undefined;
651
+ emotional_state?: string | undefined;
652
+ }, {
653
+ persona: string;
654
+ objective: string;
655
+ style?: string | undefined;
656
+ model_type?: string | undefined;
657
+ temperature?: number | undefined;
658
+ max_tokens?: number | undefined;
659
+ constraints?: string[] | undefined;
660
+ emotional_state?: string | undefined;
661
+ knowledge_level?: "beginner" | "intermediate" | "expert" | undefined;
662
+ }>;
663
+ termination_conditions: z.ZodDefault<z.ZodOptional<z.ZodArray<z.ZodObject<{
664
+ type: z.ZodEnum<["max_turns_reached", "user_expresses_satisfaction", "agent_provides_solution", "conversation_stuck", "escalation_needed", "goal_achieved", "custom_condition"]>;
665
+ description: z.ZodOptional<z.ZodString>;
666
+ keywords: z.ZodOptional<z.ZodArray<z.ZodString, "many">>;
667
+ llm_judge: z.ZodOptional<z.ZodObject<{
668
+ prompt: z.ZodString;
669
+ threshold: z.ZodDefault<z.ZodOptional<z.ZodNumber>>;
670
+ }, "strip", z.ZodTypeAny, {
671
+ prompt: string;
672
+ threshold: number;
673
+ }, {
674
+ prompt: string;
675
+ threshold?: number | undefined;
676
+ }>>;
677
+ }, "strip", z.ZodTypeAny, {
678
+ type: "max_turns_reached" | "user_expresses_satisfaction" | "agent_provides_solution" | "conversation_stuck" | "escalation_needed" | "goal_achieved" | "custom_condition";
679
+ description?: string | undefined;
680
+ llm_judge?: {
681
+ prompt: string;
682
+ threshold: number;
683
+ } | undefined;
684
+ keywords?: string[] | undefined;
685
+ }, {
686
+ type: "max_turns_reached" | "user_expresses_satisfaction" | "agent_provides_solution" | "conversation_stuck" | "escalation_needed" | "goal_achieved" | "custom_condition";
687
+ description?: string | undefined;
688
+ llm_judge?: {
689
+ prompt: string;
690
+ threshold?: number | undefined;
691
+ } | undefined;
692
+ keywords?: string[] | undefined;
693
+ }>, "many">>>;
694
+ turn_evaluations: z.ZodDefault<z.ZodOptional<z.ZodArray<z.ZodDiscriminatedUnion<"type", [z.ZodObject<z.objectUtil.extendShape<{
695
+ type: z.ZodString;
696
+ }, {
697
+ type: z.ZodLiteral<"string_contains">;
698
+ value: z.ZodString;
699
+ case_sensitive: z.ZodOptional<z.ZodBoolean>;
700
+ }>, "strip", z.ZodTypeAny, {
701
+ value: string;
702
+ type: "string_contains";
703
+ case_sensitive?: boolean | undefined;
704
+ }, {
705
+ value: string;
706
+ type: "string_contains";
707
+ case_sensitive?: boolean | undefined;
708
+ }>, z.ZodObject<z.objectUtil.extendShape<{
709
+ type: z.ZodString;
710
+ }, {
711
+ type: z.ZodLiteral<"regex_match">;
712
+ pattern: z.ZodString;
713
+ }>, "strip", z.ZodTypeAny, {
714
+ type: "regex_match";
715
+ pattern: string;
716
+ }, {
717
+ type: "regex_match";
718
+ pattern: string;
719
+ }>, z.ZodObject<z.objectUtil.extendShape<{
720
+ type: z.ZodString;
721
+ }, {
722
+ type: z.ZodLiteral<"file_exists">;
723
+ path: z.ZodString;
724
+ }>, "strip", z.ZodTypeAny, {
725
+ path: string;
726
+ type: "file_exists";
727
+ }, {
728
+ path: string;
729
+ type: "file_exists";
730
+ }>, z.ZodObject<z.objectUtil.extendShape<{
731
+ type: z.ZodString;
732
+ }, {
733
+ type: z.ZodLiteral<"trajectory_contains_action">;
734
+ action: z.ZodString;
735
+ }>, "strip", z.ZodTypeAny, {
736
+ type: "trajectory_contains_action";
737
+ action: string;
738
+ }, {
739
+ type: "trajectory_contains_action";
740
+ action: string;
741
+ }>, z.ZodObject<z.objectUtil.extendShape<{
742
+ type: z.ZodString;
743
+ }, {
744
+ type: z.ZodLiteral<"llm_judge">;
745
+ prompt: z.ZodString;
746
+ expected: z.ZodString;
747
+ model_type: z.ZodOptional<z.ZodString>;
748
+ temperature: z.ZodOptional<z.ZodNumber>;
749
+ json_schema: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodAny>>;
750
+ capabilities: z.ZodOptional<z.ZodArray<z.ZodString, "many">>;
751
+ }>, "strip", z.ZodTypeAny, {
752
+ type: "llm_judge";
753
+ expected: string;
754
+ prompt: string;
755
+ model_type?: string | undefined;
756
+ temperature?: number | undefined;
757
+ json_schema?: Record<string, any> | undefined;
758
+ capabilities?: string[] | undefined;
759
+ }, {
760
+ type: "llm_judge";
761
+ expected: string;
762
+ prompt: string;
763
+ model_type?: string | undefined;
764
+ temperature?: number | undefined;
765
+ json_schema?: Record<string, any> | undefined;
766
+ capabilities?: string[] | undefined;
767
+ }>, z.ZodObject<z.objectUtil.extendShape<{
768
+ type: z.ZodString;
769
+ }, {
770
+ type: z.ZodLiteral<"execution_time">;
771
+ max_duration_ms: z.ZodNumber;
772
+ min_duration_ms: z.ZodOptional<z.ZodNumber>;
773
+ target_duration_ms: z.ZodOptional<z.ZodNumber>;
774
+ }>, "strip", z.ZodTypeAny, {
775
+ type: "execution_time";
776
+ max_duration_ms: number;
777
+ min_duration_ms?: number | undefined;
778
+ target_duration_ms?: number | undefined;
779
+ }, {
780
+ type: "execution_time";
781
+ max_duration_ms: number;
782
+ min_duration_ms?: number | undefined;
783
+ target_duration_ms?: number | undefined;
784
+ }>, z.ZodObject<z.objectUtil.extendShape<{
785
+ type: z.ZodString;
786
+ }, {
787
+ type: z.ZodLiteral<"conversation_length">;
788
+ min_turns: z.ZodOptional<z.ZodNumber>;
789
+ max_turns: z.ZodOptional<z.ZodNumber>;
790
+ optimal_turns: z.ZodOptional<z.ZodNumber>;
791
+ target_range: z.ZodEffects<z.ZodOptional<z.ZodArray<z.ZodNumber, "many">>, number[] | undefined, number[] | undefined>;
792
+ }>, "strip", z.ZodTypeAny, {
793
+ type: "conversation_length";
794
+ min_turns?: number | undefined;
795
+ max_turns?: number | undefined;
796
+ optimal_turns?: number | undefined;
797
+ target_range?: number[] | undefined;
798
+ }, {
799
+ type: "conversation_length";
800
+ min_turns?: number | undefined;
801
+ max_turns?: number | undefined;
802
+ optimal_turns?: number | undefined;
803
+ target_range?: number[] | undefined;
804
+ }>, z.ZodObject<z.objectUtil.extendShape<{
805
+ type: z.ZodString;
806
+ }, {
807
+ type: z.ZodLiteral<"conversation_flow">;
808
+ required_patterns: z.ZodArray<z.ZodEnum<["question_then_answer", "problem_then_solution", "clarification_cycle", "empathy_then_solution", "escalation_pattern"]>, "many">;
809
+ flow_quality_threshold: z.ZodDefault<z.ZodOptional<z.ZodNumber>>;
810
+ }>, "strip", z.ZodTypeAny, {
811
+ type: "conversation_flow";
812
+ required_patterns: ("question_then_answer" | "problem_then_solution" | "clarification_cycle" | "empathy_then_solution" | "escalation_pattern")[];
813
+ flow_quality_threshold: number;
814
+ }, {
815
+ type: "conversation_flow";
816
+ required_patterns: ("question_then_answer" | "problem_then_solution" | "clarification_cycle" | "empathy_then_solution" | "escalation_pattern")[];
817
+ flow_quality_threshold?: number | undefined;
818
+ }>, z.ZodObject<z.objectUtil.extendShape<{
819
+ type: z.ZodString;
820
+ }, {
821
+ type: z.ZodLiteral<"user_satisfaction">;
822
+ satisfaction_threshold: z.ZodDefault<z.ZodOptional<z.ZodNumber>>;
823
+ indicators: z.ZodOptional<z.ZodObject<{
824
+ positive: z.ZodOptional<z.ZodArray<z.ZodString, "many">>;
825
+ negative: z.ZodOptional<z.ZodArray<z.ZodString, "many">>;
826
+ }, "strip", z.ZodTypeAny, {
827
+ positive?: string[] | undefined;
828
+ negative?: string[] | undefined;
829
+ }, {
830
+ positive?: string[] | undefined;
831
+ negative?: string[] | undefined;
832
+ }>>;
833
+ measurement_method: z.ZodDefault<z.ZodOptional<z.ZodEnum<["sentiment_analysis", "keyword_analysis", "llm_judge"]>>>;
834
+ }>, "strip", z.ZodTypeAny, {
835
+ type: "user_satisfaction";
836
+ satisfaction_threshold: number;
837
+ measurement_method: "llm_judge" | "sentiment_analysis" | "keyword_analysis";
838
+ indicators?: {
839
+ positive?: string[] | undefined;
840
+ negative?: string[] | undefined;
841
+ } | undefined;
842
+ }, {
843
+ type: "user_satisfaction";
844
+ satisfaction_threshold?: number | undefined;
845
+ indicators?: {
846
+ positive?: string[] | undefined;
847
+ negative?: string[] | undefined;
848
+ } | undefined;
849
+ measurement_method?: "llm_judge" | "sentiment_analysis" | "keyword_analysis" | undefined;
850
+ }>, z.ZodObject<z.objectUtil.extendShape<{
851
+ type: z.ZodString;
852
+ }, {
853
+ type: z.ZodLiteral<"context_retention">;
854
+ test_memory_of: z.ZodArray<z.ZodString, "many">;
855
+ retention_turns: z.ZodDefault<z.ZodOptional<z.ZodNumber>>;
856
+ memory_accuracy_threshold: z.ZodDefault<z.ZodOptional<z.ZodNumber>>;
857
+ }>, "strip", z.ZodTypeAny, {
858
+ type: "context_retention";
859
+ test_memory_of: string[];
860
+ retention_turns: number;
861
+ memory_accuracy_threshold: number;
862
+ }, {
863
+ type: "context_retention";
864
+ test_memory_of: string[];
865
+ retention_turns?: number | undefined;
866
+ memory_accuracy_threshold?: number | undefined;
867
+ }>]>, "many">>>;
868
+ final_evaluations: z.ZodDefault<z.ZodOptional<z.ZodArray<z.ZodDiscriminatedUnion<"type", [z.ZodObject<z.objectUtil.extendShape<{
869
+ type: z.ZodString;
870
+ }, {
871
+ type: z.ZodLiteral<"string_contains">;
872
+ value: z.ZodString;
873
+ case_sensitive: z.ZodOptional<z.ZodBoolean>;
874
+ }>, "strip", z.ZodTypeAny, {
875
+ value: string;
876
+ type: "string_contains";
877
+ case_sensitive?: boolean | undefined;
878
+ }, {
879
+ value: string;
880
+ type: "string_contains";
881
+ case_sensitive?: boolean | undefined;
882
+ }>, z.ZodObject<z.objectUtil.extendShape<{
883
+ type: z.ZodString;
884
+ }, {
885
+ type: z.ZodLiteral<"regex_match">;
886
+ pattern: z.ZodString;
887
+ }>, "strip", z.ZodTypeAny, {
888
+ type: "regex_match";
889
+ pattern: string;
890
+ }, {
891
+ type: "regex_match";
892
+ pattern: string;
893
+ }>, z.ZodObject<z.objectUtil.extendShape<{
894
+ type: z.ZodString;
895
+ }, {
896
+ type: z.ZodLiteral<"file_exists">;
897
+ path: z.ZodString;
898
+ }>, "strip", z.ZodTypeAny, {
899
+ path: string;
900
+ type: "file_exists";
901
+ }, {
902
+ path: string;
903
+ type: "file_exists";
904
+ }>, z.ZodObject<z.objectUtil.extendShape<{
905
+ type: z.ZodString;
906
+ }, {
907
+ type: z.ZodLiteral<"trajectory_contains_action">;
908
+ action: z.ZodString;
909
+ }>, "strip", z.ZodTypeAny, {
910
+ type: "trajectory_contains_action";
911
+ action: string;
912
+ }, {
913
+ type: "trajectory_contains_action";
914
+ action: string;
915
+ }>, z.ZodObject<z.objectUtil.extendShape<{
916
+ type: z.ZodString;
917
+ }, {
918
+ type: z.ZodLiteral<"llm_judge">;
919
+ prompt: z.ZodString;
920
+ expected: z.ZodString;
921
+ model_type: z.ZodOptional<z.ZodString>;
922
+ temperature: z.ZodOptional<z.ZodNumber>;
923
+ json_schema: z.ZodOptional<z.ZodRecord<z.ZodString, z.ZodAny>>;
924
+ capabilities: z.ZodOptional<z.ZodArray<z.ZodString, "many">>;
925
+ }>, "strip", z.ZodTypeAny, {
926
+ type: "llm_judge";
927
+ expected: string;
928
+ prompt: string;
929
+ model_type?: string | undefined;
930
+ temperature?: number | undefined;
931
+ json_schema?: Record<string, any> | undefined;
932
+ capabilities?: string[] | undefined;
933
+ }, {
934
+ type: "llm_judge";
935
+ expected: string;
936
+ prompt: string;
937
+ model_type?: string | undefined;
938
+ temperature?: number | undefined;
939
+ json_schema?: Record<string, any> | undefined;
940
+ capabilities?: string[] | undefined;
941
+ }>, z.ZodObject<z.objectUtil.extendShape<{
942
+ type: z.ZodString;
943
+ }, {
944
+ type: z.ZodLiteral<"execution_time">;
945
+ max_duration_ms: z.ZodNumber;
946
+ min_duration_ms: z.ZodOptional<z.ZodNumber>;
947
+ target_duration_ms: z.ZodOptional<z.ZodNumber>;
948
+ }>, "strip", z.ZodTypeAny, {
949
+ type: "execution_time";
950
+ max_duration_ms: number;
951
+ min_duration_ms?: number | undefined;
952
+ target_duration_ms?: number | undefined;
953
+ }, {
954
+ type: "execution_time";
955
+ max_duration_ms: number;
956
+ min_duration_ms?: number | undefined;
957
+ target_duration_ms?: number | undefined;
958
+ }>, z.ZodObject<z.objectUtil.extendShape<{
959
+ type: z.ZodString;
960
+ }, {
961
+ type: z.ZodLiteral<"conversation_length">;
962
+ min_turns: z.ZodOptional<z.ZodNumber>;
963
+ max_turns: z.ZodOptional<z.ZodNumber>;
964
+ optimal_turns: z.ZodOptional<z.ZodNumber>;
965
+ target_range: z.ZodEffects<z.ZodOptional<z.ZodArray<z.ZodNumber, "many">>, number[] | undefined, number[] | undefined>;
966
+ }>, "strip", z.ZodTypeAny, {
967
+ type: "conversation_length";
968
+ min_turns?: number | undefined;
969
+ max_turns?: number | undefined;
970
+ optimal_turns?: number | undefined;
971
+ target_range?: number[] | undefined;
972
+ }, {
973
+ type: "conversation_length";
974
+ min_turns?: number | undefined;
975
+ max_turns?: number | undefined;
976
+ optimal_turns?: number | undefined;
977
+ target_range?: number[] | undefined;
978
+ }>, z.ZodObject<z.objectUtil.extendShape<{
979
+ type: z.ZodString;
980
+ }, {
981
+ type: z.ZodLiteral<"conversation_flow">;
982
+ required_patterns: z.ZodArray<z.ZodEnum<["question_then_answer", "problem_then_solution", "clarification_cycle", "empathy_then_solution", "escalation_pattern"]>, "many">;
983
+ flow_quality_threshold: z.ZodDefault<z.ZodOptional<z.ZodNumber>>;
984
+ }>, "strip", z.ZodTypeAny, {
985
+ type: "conversation_flow";
986
+ required_patterns: ("question_then_answer" | "problem_then_solution" | "clarification_cycle" | "empathy_then_solution" | "escalation_pattern")[];
987
+ flow_quality_threshold: number;
988
+ }, {
989
+ type: "conversation_flow";
990
+ required_patterns: ("question_then_answer" | "problem_then_solution" | "clarification_cycle" | "empathy_then_solution" | "escalation_pattern")[];
991
+ flow_quality_threshold?: number | undefined;
992
+ }>, z.ZodObject<z.objectUtil.extendShape<{
993
+ type: z.ZodString;
994
+ }, {
995
+ type: z.ZodLiteral<"user_satisfaction">;
996
+ satisfaction_threshold: z.ZodDefault<z.ZodOptional<z.ZodNumber>>;
997
+ indicators: z.ZodOptional<z.ZodObject<{
998
+ positive: z.ZodOptional<z.ZodArray<z.ZodString, "many">>;
999
+ negative: z.ZodOptional<z.ZodArray<z.ZodString, "many">>;
1000
+ }, "strip", z.ZodTypeAny, {
1001
+ positive?: string[] | undefined;
1002
+ negative?: string[] | undefined;
1003
+ }, {
1004
+ positive?: string[] | undefined;
1005
+ negative?: string[] | undefined;
1006
+ }>>;
1007
+ measurement_method: z.ZodDefault<z.ZodOptional<z.ZodEnum<["sentiment_analysis", "keyword_analysis", "llm_judge"]>>>;
1008
+ }>, "strip", z.ZodTypeAny, {
1009
+ type: "user_satisfaction";
1010
+ satisfaction_threshold: number;
1011
+ measurement_method: "llm_judge" | "sentiment_analysis" | "keyword_analysis";
1012
+ indicators?: {
1013
+ positive?: string[] | undefined;
1014
+ negative?: string[] | undefined;
1015
+ } | undefined;
1016
+ }, {
1017
+ type: "user_satisfaction";
1018
+ satisfaction_threshold?: number | undefined;
1019
+ indicators?: {
1020
+ positive?: string[] | undefined;
1021
+ negative?: string[] | undefined;
1022
+ } | undefined;
1023
+ measurement_method?: "llm_judge" | "sentiment_analysis" | "keyword_analysis" | undefined;
1024
+ }>, z.ZodObject<z.objectUtil.extendShape<{
1025
+ type: z.ZodString;
1026
+ }, {
1027
+ type: z.ZodLiteral<"context_retention">;
1028
+ test_memory_of: z.ZodArray<z.ZodString, "many">;
1029
+ retention_turns: z.ZodDefault<z.ZodOptional<z.ZodNumber>>;
1030
+ memory_accuracy_threshold: z.ZodDefault<z.ZodOptional<z.ZodNumber>>;
1031
+ }>, "strip", z.ZodTypeAny, {
1032
+ type: "context_retention";
1033
+ test_memory_of: string[];
1034
+ retention_turns: number;
1035
+ memory_accuracy_threshold: number;
1036
+ }, {
1037
+ type: "context_retention";
1038
+ test_memory_of: string[];
1039
+ retention_turns?: number | undefined;
1040
+ memory_accuracy_threshold?: number | undefined;
1041
+ }>]>, "many">>>;
1042
+ debug_options: z.ZodDefault<z.ZodOptional<z.ZodObject<{
1043
+ log_user_simulation: z.ZodDefault<z.ZodOptional<z.ZodBoolean>>;
1044
+ log_turn_decisions: z.ZodDefault<z.ZodOptional<z.ZodBoolean>>;
1045
+ export_full_transcript: z.ZodDefault<z.ZodOptional<z.ZodBoolean>>;
1046
+ }, "strip", z.ZodTypeAny, {
1047
+ log_user_simulation: boolean;
1048
+ log_turn_decisions: boolean;
1049
+ export_full_transcript: boolean;
1050
+ }, {
1051
+ log_user_simulation?: boolean | undefined;
1052
+ log_turn_decisions?: boolean | undefined;
1053
+ export_full_transcript?: boolean | undefined;
1054
+ }>>>;
1055
+ }, "strip", z.ZodTypeAny, {
1056
+ max_turns: number;
1057
+ timeout_per_turn_ms: number;
1058
+ total_timeout_ms: number;
1059
+ user_simulator: {
1060
+ model_type: string;
1061
+ temperature: number;
1062
+ max_tokens: number;
1063
+ persona: string;
1064
+ objective: string;
1065
+ constraints: string[];
1066
+ knowledge_level: "beginner" | "intermediate" | "expert";
1067
+ style?: string | undefined;
1068
+ emotional_state?: string | undefined;
1069
+ };
1070
+ termination_conditions: {
1071
+ type: "max_turns_reached" | "user_expresses_satisfaction" | "agent_provides_solution" | "conversation_stuck" | "escalation_needed" | "goal_achieved" | "custom_condition";
1072
+ description?: string | undefined;
1073
+ llm_judge?: {
1074
+ prompt: string;
1075
+ threshold: number;
1076
+ } | undefined;
1077
+ keywords?: string[] | undefined;
1078
+ }[];
1079
+ turn_evaluations: ({
1080
+ value: string;
1081
+ type: "string_contains";
1082
+ case_sensitive?: boolean | undefined;
1083
+ } | {
1084
+ type: "regex_match";
1085
+ pattern: string;
1086
+ } | {
1087
+ path: string;
1088
+ type: "file_exists";
1089
+ } | {
1090
+ type: "trajectory_contains_action";
1091
+ action: string;
1092
+ } | {
1093
+ type: "llm_judge";
1094
+ expected: string;
1095
+ prompt: string;
1096
+ model_type?: string | undefined;
1097
+ temperature?: number | undefined;
1098
+ json_schema?: Record<string, any> | undefined;
1099
+ capabilities?: string[] | undefined;
1100
+ } | {
1101
+ type: "execution_time";
1102
+ max_duration_ms: number;
1103
+ min_duration_ms?: number | undefined;
1104
+ target_duration_ms?: number | undefined;
1105
+ } | {
1106
+ type: "conversation_length";
1107
+ min_turns?: number | undefined;
1108
+ max_turns?: number | undefined;
1109
+ optimal_turns?: number | undefined;
1110
+ target_range?: number[] | undefined;
1111
+ } | {
1112
+ type: "conversation_flow";
1113
+ required_patterns: ("question_then_answer" | "problem_then_solution" | "clarification_cycle" | "empathy_then_solution" | "escalation_pattern")[];
1114
+ flow_quality_threshold: number;
1115
+ } | {
1116
+ type: "user_satisfaction";
1117
+ satisfaction_threshold: number;
1118
+ measurement_method: "llm_judge" | "sentiment_analysis" | "keyword_analysis";
1119
+ indicators?: {
1120
+ positive?: string[] | undefined;
1121
+ negative?: string[] | undefined;
1122
+ } | undefined;
1123
+ } | {
1124
+ type: "context_retention";
1125
+ test_memory_of: string[];
1126
+ retention_turns: number;
1127
+ memory_accuracy_threshold: number;
1128
+ })[];
1129
+ final_evaluations: ({
1130
+ value: string;
1131
+ type: "string_contains";
1132
+ case_sensitive?: boolean | undefined;
1133
+ } | {
1134
+ type: "regex_match";
1135
+ pattern: string;
1136
+ } | {
1137
+ path: string;
1138
+ type: "file_exists";
1139
+ } | {
1140
+ type: "trajectory_contains_action";
1141
+ action: string;
1142
+ } | {
1143
+ type: "llm_judge";
1144
+ expected: string;
1145
+ prompt: string;
1146
+ model_type?: string | undefined;
1147
+ temperature?: number | undefined;
1148
+ json_schema?: Record<string, any> | undefined;
1149
+ capabilities?: string[] | undefined;
1150
+ } | {
1151
+ type: "execution_time";
1152
+ max_duration_ms: number;
1153
+ min_duration_ms?: number | undefined;
1154
+ target_duration_ms?: number | undefined;
1155
+ } | {
1156
+ type: "conversation_length";
1157
+ min_turns?: number | undefined;
1158
+ max_turns?: number | undefined;
1159
+ optimal_turns?: number | undefined;
1160
+ target_range?: number[] | undefined;
1161
+ } | {
1162
+ type: "conversation_flow";
1163
+ required_patterns: ("question_then_answer" | "problem_then_solution" | "clarification_cycle" | "empathy_then_solution" | "escalation_pattern")[];
1164
+ flow_quality_threshold: number;
1165
+ } | {
1166
+ type: "user_satisfaction";
1167
+ satisfaction_threshold: number;
1168
+ measurement_method: "llm_judge" | "sentiment_analysis" | "keyword_analysis";
1169
+ indicators?: {
1170
+ positive?: string[] | undefined;
1171
+ negative?: string[] | undefined;
1172
+ } | undefined;
1173
+ } | {
1174
+ type: "context_retention";
1175
+ test_memory_of: string[];
1176
+ retention_turns: number;
1177
+ memory_accuracy_threshold: number;
1178
+ })[];
1179
+ debug_options: {
1180
+ log_user_simulation: boolean;
1181
+ log_turn_decisions: boolean;
1182
+ export_full_transcript: boolean;
1183
+ };
1184
+ }, {
1185
+ max_turns: number;
1186
+ user_simulator: {
1187
+ persona: string;
1188
+ objective: string;
1189
+ style?: string | undefined;
1190
+ model_type?: string | undefined;
1191
+ temperature?: number | undefined;
1192
+ max_tokens?: number | undefined;
1193
+ constraints?: string[] | undefined;
1194
+ emotional_state?: string | undefined;
1195
+ knowledge_level?: "beginner" | "intermediate" | "expert" | undefined;
1196
+ };
1197
+ timeout_per_turn_ms?: number | undefined;
1198
+ total_timeout_ms?: number | undefined;
1199
+ termination_conditions?: {
1200
+ type: "max_turns_reached" | "user_expresses_satisfaction" | "agent_provides_solution" | "conversation_stuck" | "escalation_needed" | "goal_achieved" | "custom_condition";
1201
+ description?: string | undefined;
1202
+ llm_judge?: {
1203
+ prompt: string;
1204
+ threshold?: number | undefined;
1205
+ } | undefined;
1206
+ keywords?: string[] | undefined;
1207
+ }[] | undefined;
1208
+ turn_evaluations?: ({
1209
+ value: string;
1210
+ type: "string_contains";
1211
+ case_sensitive?: boolean | undefined;
1212
+ } | {
1213
+ type: "regex_match";
1214
+ pattern: string;
1215
+ } | {
1216
+ path: string;
1217
+ type: "file_exists";
1218
+ } | {
1219
+ type: "trajectory_contains_action";
1220
+ action: string;
1221
+ } | {
1222
+ type: "llm_judge";
1223
+ expected: string;
1224
+ prompt: string;
1225
+ model_type?: string | undefined;
1226
+ temperature?: number | undefined;
1227
+ json_schema?: Record<string, any> | undefined;
1228
+ capabilities?: string[] | undefined;
1229
+ } | {
1230
+ type: "execution_time";
1231
+ max_duration_ms: number;
1232
+ min_duration_ms?: number | undefined;
1233
+ target_duration_ms?: number | undefined;
1234
+ } | {
1235
+ type: "conversation_length";
1236
+ min_turns?: number | undefined;
1237
+ max_turns?: number | undefined;
1238
+ optimal_turns?: number | undefined;
1239
+ target_range?: number[] | undefined;
1240
+ } | {
1241
+ type: "conversation_flow";
1242
+ required_patterns: ("question_then_answer" | "problem_then_solution" | "clarification_cycle" | "empathy_then_solution" | "escalation_pattern")[];
1243
+ flow_quality_threshold?: number | undefined;
1244
+ } | {
1245
+ type: "user_satisfaction";
1246
+ satisfaction_threshold?: number | undefined;
1247
+ indicators?: {
1248
+ positive?: string[] | undefined;
1249
+ negative?: string[] | undefined;
1250
+ } | undefined;
1251
+ measurement_method?: "llm_judge" | "sentiment_analysis" | "keyword_analysis" | undefined;
1252
+ } | {
1253
+ type: "context_retention";
1254
+ test_memory_of: string[];
1255
+ retention_turns?: number | undefined;
1256
+ memory_accuracy_threshold?: number | undefined;
1257
+ })[] | undefined;
1258
+ final_evaluations?: ({
1259
+ value: string;
1260
+ type: "string_contains";
1261
+ case_sensitive?: boolean | undefined;
1262
+ } | {
1263
+ type: "regex_match";
1264
+ pattern: string;
1265
+ } | {
1266
+ path: string;
1267
+ type: "file_exists";
1268
+ } | {
1269
+ type: "trajectory_contains_action";
1270
+ action: string;
1271
+ } | {
1272
+ type: "llm_judge";
1273
+ expected: string;
1274
+ prompt: string;
1275
+ model_type?: string | undefined;
1276
+ temperature?: number | undefined;
1277
+ json_schema?: Record<string, any> | undefined;
1278
+ capabilities?: string[] | undefined;
1279
+ } | {
1280
+ type: "execution_time";
1281
+ max_duration_ms: number;
1282
+ min_duration_ms?: number | undefined;
1283
+ target_duration_ms?: number | undefined;
1284
+ } | {
1285
+ type: "conversation_length";
1286
+ min_turns?: number | undefined;
1287
+ max_turns?: number | undefined;
1288
+ optimal_turns?: number | undefined;
1289
+ target_range?: number[] | undefined;
1290
+ } | {
1291
+ type: "conversation_flow";
1292
+ required_patterns: ("question_then_answer" | "problem_then_solution" | "clarification_cycle" | "empathy_then_solution" | "escalation_pattern")[];
1293
+ flow_quality_threshold?: number | undefined;
1294
+ } | {
1295
+ type: "user_satisfaction";
1296
+ satisfaction_threshold?: number | undefined;
1297
+ indicators?: {
1298
+ positive?: string[] | undefined;
1299
+ negative?: string[] | undefined;
1300
+ } | undefined;
1301
+ measurement_method?: "llm_judge" | "sentiment_analysis" | "keyword_analysis" | undefined;
1302
+ } | {
1303
+ type: "context_retention";
1304
+ test_memory_of: string[];
1305
+ retention_turns?: number | undefined;
1306
+ memory_accuracy_threshold?: number | undefined;
1307
+ })[] | undefined;
1308
+ debug_options?: {
1309
+ log_user_simulation?: boolean | undefined;
1310
+ log_turn_decisions?: boolean | undefined;
1311
+ export_full_transcript?: boolean | undefined;
1312
+ } | undefined;
1313
+ }>>;
462
1314
  }, "strip", z.ZodTypeAny, {
463
1315
  evaluations: ({
464
1316
  value: string;
@@ -486,11 +1338,164 @@ export declare const ScenarioSchema: z.ZodObject<{
486
1338
  max_duration_ms: number;
487
1339
  min_duration_ms?: number | undefined;
488
1340
  target_duration_ms?: number | undefined;
1341
+ } | {
1342
+ type: "conversation_length";
1343
+ min_turns?: number | undefined;
1344
+ max_turns?: number | undefined;
1345
+ optimal_turns?: number | undefined;
1346
+ target_range?: number[] | undefined;
1347
+ } | {
1348
+ type: "conversation_flow";
1349
+ required_patterns: ("question_then_answer" | "problem_then_solution" | "clarification_cycle" | "empathy_then_solution" | "escalation_pattern")[];
1350
+ flow_quality_threshold: number;
1351
+ } | {
1352
+ type: "user_satisfaction";
1353
+ satisfaction_threshold: number;
1354
+ measurement_method: "llm_judge" | "sentiment_analysis" | "keyword_analysis";
1355
+ indicators?: {
1356
+ positive?: string[] | undefined;
1357
+ negative?: string[] | undefined;
1358
+ } | undefined;
1359
+ } | {
1360
+ type: "context_retention";
1361
+ test_memory_of: string[];
1362
+ retention_turns: number;
1363
+ memory_accuracy_threshold: number;
489
1364
  })[];
490
1365
  name?: string | undefined;
491
1366
  code?: string | undefined;
492
1367
  input?: string | undefined;
493
1368
  lang?: string | undefined;
1369
+ conversation?: {
1370
+ max_turns: number;
1371
+ timeout_per_turn_ms: number;
1372
+ total_timeout_ms: number;
1373
+ user_simulator: {
1374
+ model_type: string;
1375
+ temperature: number;
1376
+ max_tokens: number;
1377
+ persona: string;
1378
+ objective: string;
1379
+ constraints: string[];
1380
+ knowledge_level: "beginner" | "intermediate" | "expert";
1381
+ style?: string | undefined;
1382
+ emotional_state?: string | undefined;
1383
+ };
1384
+ termination_conditions: {
1385
+ type: "max_turns_reached" | "user_expresses_satisfaction" | "agent_provides_solution" | "conversation_stuck" | "escalation_needed" | "goal_achieved" | "custom_condition";
1386
+ description?: string | undefined;
1387
+ llm_judge?: {
1388
+ prompt: string;
1389
+ threshold: number;
1390
+ } | undefined;
1391
+ keywords?: string[] | undefined;
1392
+ }[];
1393
+ turn_evaluations: ({
1394
+ value: string;
1395
+ type: "string_contains";
1396
+ case_sensitive?: boolean | undefined;
1397
+ } | {
1398
+ type: "regex_match";
1399
+ pattern: string;
1400
+ } | {
1401
+ path: string;
1402
+ type: "file_exists";
1403
+ } | {
1404
+ type: "trajectory_contains_action";
1405
+ action: string;
1406
+ } | {
1407
+ type: "llm_judge";
1408
+ expected: string;
1409
+ prompt: string;
1410
+ model_type?: string | undefined;
1411
+ temperature?: number | undefined;
1412
+ json_schema?: Record<string, any> | undefined;
1413
+ capabilities?: string[] | undefined;
1414
+ } | {
1415
+ type: "execution_time";
1416
+ max_duration_ms: number;
1417
+ min_duration_ms?: number | undefined;
1418
+ target_duration_ms?: number | undefined;
1419
+ } | {
1420
+ type: "conversation_length";
1421
+ min_turns?: number | undefined;
1422
+ max_turns?: number | undefined;
1423
+ optimal_turns?: number | undefined;
1424
+ target_range?: number[] | undefined;
1425
+ } | {
1426
+ type: "conversation_flow";
1427
+ required_patterns: ("question_then_answer" | "problem_then_solution" | "clarification_cycle" | "empathy_then_solution" | "escalation_pattern")[];
1428
+ flow_quality_threshold: number;
1429
+ } | {
1430
+ type: "user_satisfaction";
1431
+ satisfaction_threshold: number;
1432
+ measurement_method: "llm_judge" | "sentiment_analysis" | "keyword_analysis";
1433
+ indicators?: {
1434
+ positive?: string[] | undefined;
1435
+ negative?: string[] | undefined;
1436
+ } | undefined;
1437
+ } | {
1438
+ type: "context_retention";
1439
+ test_memory_of: string[];
1440
+ retention_turns: number;
1441
+ memory_accuracy_threshold: number;
1442
+ })[];
1443
+ final_evaluations: ({
1444
+ value: string;
1445
+ type: "string_contains";
1446
+ case_sensitive?: boolean | undefined;
1447
+ } | {
1448
+ type: "regex_match";
1449
+ pattern: string;
1450
+ } | {
1451
+ path: string;
1452
+ type: "file_exists";
1453
+ } | {
1454
+ type: "trajectory_contains_action";
1455
+ action: string;
1456
+ } | {
1457
+ type: "llm_judge";
1458
+ expected: string;
1459
+ prompt: string;
1460
+ model_type?: string | undefined;
1461
+ temperature?: number | undefined;
1462
+ json_schema?: Record<string, any> | undefined;
1463
+ capabilities?: string[] | undefined;
1464
+ } | {
1465
+ type: "execution_time";
1466
+ max_duration_ms: number;
1467
+ min_duration_ms?: number | undefined;
1468
+ target_duration_ms?: number | undefined;
1469
+ } | {
1470
+ type: "conversation_length";
1471
+ min_turns?: number | undefined;
1472
+ max_turns?: number | undefined;
1473
+ optimal_turns?: number | undefined;
1474
+ target_range?: number[] | undefined;
1475
+ } | {
1476
+ type: "conversation_flow";
1477
+ required_patterns: ("question_then_answer" | "problem_then_solution" | "clarification_cycle" | "empathy_then_solution" | "escalation_pattern")[];
1478
+ flow_quality_threshold: number;
1479
+ } | {
1480
+ type: "user_satisfaction";
1481
+ satisfaction_threshold: number;
1482
+ measurement_method: "llm_judge" | "sentiment_analysis" | "keyword_analysis";
1483
+ indicators?: {
1484
+ positive?: string[] | undefined;
1485
+ negative?: string[] | undefined;
1486
+ } | undefined;
1487
+ } | {
1488
+ type: "context_retention";
1489
+ test_memory_of: string[];
1490
+ retention_turns: number;
1491
+ memory_accuracy_threshold: number;
1492
+ })[];
1493
+ debug_options: {
1494
+ log_user_simulation: boolean;
1495
+ log_turn_decisions: boolean;
1496
+ export_full_transcript: boolean;
1497
+ };
1498
+ } | undefined;
494
1499
  }, {
495
1500
  evaluations: ({
496
1501
  value: string;
@@ -518,11 +1523,164 @@ export declare const ScenarioSchema: z.ZodObject<{
518
1523
  max_duration_ms: number;
519
1524
  min_duration_ms?: number | undefined;
520
1525
  target_duration_ms?: number | undefined;
1526
+ } | {
1527
+ type: "conversation_length";
1528
+ min_turns?: number | undefined;
1529
+ max_turns?: number | undefined;
1530
+ optimal_turns?: number | undefined;
1531
+ target_range?: number[] | undefined;
1532
+ } | {
1533
+ type: "conversation_flow";
1534
+ required_patterns: ("question_then_answer" | "problem_then_solution" | "clarification_cycle" | "empathy_then_solution" | "escalation_pattern")[];
1535
+ flow_quality_threshold?: number | undefined;
1536
+ } | {
1537
+ type: "user_satisfaction";
1538
+ satisfaction_threshold?: number | undefined;
1539
+ indicators?: {
1540
+ positive?: string[] | undefined;
1541
+ negative?: string[] | undefined;
1542
+ } | undefined;
1543
+ measurement_method?: "llm_judge" | "sentiment_analysis" | "keyword_analysis" | undefined;
1544
+ } | {
1545
+ type: "context_retention";
1546
+ test_memory_of: string[];
1547
+ retention_turns?: number | undefined;
1548
+ memory_accuracy_threshold?: number | undefined;
521
1549
  })[];
522
1550
  name?: string | undefined;
523
1551
  code?: string | undefined;
524
1552
  input?: string | undefined;
525
1553
  lang?: string | undefined;
1554
+ conversation?: {
1555
+ max_turns: number;
1556
+ user_simulator: {
1557
+ persona: string;
1558
+ objective: string;
1559
+ style?: string | undefined;
1560
+ model_type?: string | undefined;
1561
+ temperature?: number | undefined;
1562
+ max_tokens?: number | undefined;
1563
+ constraints?: string[] | undefined;
1564
+ emotional_state?: string | undefined;
1565
+ knowledge_level?: "beginner" | "intermediate" | "expert" | undefined;
1566
+ };
1567
+ timeout_per_turn_ms?: number | undefined;
1568
+ total_timeout_ms?: number | undefined;
1569
+ termination_conditions?: {
1570
+ type: "max_turns_reached" | "user_expresses_satisfaction" | "agent_provides_solution" | "conversation_stuck" | "escalation_needed" | "goal_achieved" | "custom_condition";
1571
+ description?: string | undefined;
1572
+ llm_judge?: {
1573
+ prompt: string;
1574
+ threshold?: number | undefined;
1575
+ } | undefined;
1576
+ keywords?: string[] | undefined;
1577
+ }[] | undefined;
1578
+ turn_evaluations?: ({
1579
+ value: string;
1580
+ type: "string_contains";
1581
+ case_sensitive?: boolean | undefined;
1582
+ } | {
1583
+ type: "regex_match";
1584
+ pattern: string;
1585
+ } | {
1586
+ path: string;
1587
+ type: "file_exists";
1588
+ } | {
1589
+ type: "trajectory_contains_action";
1590
+ action: string;
1591
+ } | {
1592
+ type: "llm_judge";
1593
+ expected: string;
1594
+ prompt: string;
1595
+ model_type?: string | undefined;
1596
+ temperature?: number | undefined;
1597
+ json_schema?: Record<string, any> | undefined;
1598
+ capabilities?: string[] | undefined;
1599
+ } | {
1600
+ type: "execution_time";
1601
+ max_duration_ms: number;
1602
+ min_duration_ms?: number | undefined;
1603
+ target_duration_ms?: number | undefined;
1604
+ } | {
1605
+ type: "conversation_length";
1606
+ min_turns?: number | undefined;
1607
+ max_turns?: number | undefined;
1608
+ optimal_turns?: number | undefined;
1609
+ target_range?: number[] | undefined;
1610
+ } | {
1611
+ type: "conversation_flow";
1612
+ required_patterns: ("question_then_answer" | "problem_then_solution" | "clarification_cycle" | "empathy_then_solution" | "escalation_pattern")[];
1613
+ flow_quality_threshold?: number | undefined;
1614
+ } | {
1615
+ type: "user_satisfaction";
1616
+ satisfaction_threshold?: number | undefined;
1617
+ indicators?: {
1618
+ positive?: string[] | undefined;
1619
+ negative?: string[] | undefined;
1620
+ } | undefined;
1621
+ measurement_method?: "llm_judge" | "sentiment_analysis" | "keyword_analysis" | undefined;
1622
+ } | {
1623
+ type: "context_retention";
1624
+ test_memory_of: string[];
1625
+ retention_turns?: number | undefined;
1626
+ memory_accuracy_threshold?: number | undefined;
1627
+ })[] | undefined;
1628
+ final_evaluations?: ({
1629
+ value: string;
1630
+ type: "string_contains";
1631
+ case_sensitive?: boolean | undefined;
1632
+ } | {
1633
+ type: "regex_match";
1634
+ pattern: string;
1635
+ } | {
1636
+ path: string;
1637
+ type: "file_exists";
1638
+ } | {
1639
+ type: "trajectory_contains_action";
1640
+ action: string;
1641
+ } | {
1642
+ type: "llm_judge";
1643
+ expected: string;
1644
+ prompt: string;
1645
+ model_type?: string | undefined;
1646
+ temperature?: number | undefined;
1647
+ json_schema?: Record<string, any> | undefined;
1648
+ capabilities?: string[] | undefined;
1649
+ } | {
1650
+ type: "execution_time";
1651
+ max_duration_ms: number;
1652
+ min_duration_ms?: number | undefined;
1653
+ target_duration_ms?: number | undefined;
1654
+ } | {
1655
+ type: "conversation_length";
1656
+ min_turns?: number | undefined;
1657
+ max_turns?: number | undefined;
1658
+ optimal_turns?: number | undefined;
1659
+ target_range?: number[] | undefined;
1660
+ } | {
1661
+ type: "conversation_flow";
1662
+ required_patterns: ("question_then_answer" | "problem_then_solution" | "clarification_cycle" | "empathy_then_solution" | "escalation_pattern")[];
1663
+ flow_quality_threshold?: number | undefined;
1664
+ } | {
1665
+ type: "user_satisfaction";
1666
+ satisfaction_threshold?: number | undefined;
1667
+ indicators?: {
1668
+ positive?: string[] | undefined;
1669
+ negative?: string[] | undefined;
1670
+ } | undefined;
1671
+ measurement_method?: "llm_judge" | "sentiment_analysis" | "keyword_analysis" | undefined;
1672
+ } | {
1673
+ type: "context_retention";
1674
+ test_memory_of: string[];
1675
+ retention_turns?: number | undefined;
1676
+ memory_accuracy_threshold?: number | undefined;
1677
+ })[] | undefined;
1678
+ debug_options?: {
1679
+ log_user_simulation?: boolean | undefined;
1680
+ log_turn_decisions?: boolean | undefined;
1681
+ export_full_transcript?: boolean | undefined;
1682
+ } | undefined;
1683
+ } | undefined;
526
1684
  }>, "many">;
527
1685
  judgment: z.ZodObject<{
528
1686
  strategy: z.ZodEnum<["all_pass", "any_pass"]>;
@@ -560,11 +1718,164 @@ export declare const ScenarioSchema: z.ZodObject<{
560
1718
  max_duration_ms: number;
561
1719
  min_duration_ms?: number | undefined;
562
1720
  target_duration_ms?: number | undefined;
1721
+ } | {
1722
+ type: "conversation_length";
1723
+ min_turns?: number | undefined;
1724
+ max_turns?: number | undefined;
1725
+ optimal_turns?: number | undefined;
1726
+ target_range?: number[] | undefined;
1727
+ } | {
1728
+ type: "conversation_flow";
1729
+ required_patterns: ("question_then_answer" | "problem_then_solution" | "clarification_cycle" | "empathy_then_solution" | "escalation_pattern")[];
1730
+ flow_quality_threshold: number;
1731
+ } | {
1732
+ type: "user_satisfaction";
1733
+ satisfaction_threshold: number;
1734
+ measurement_method: "llm_judge" | "sentiment_analysis" | "keyword_analysis";
1735
+ indicators?: {
1736
+ positive?: string[] | undefined;
1737
+ negative?: string[] | undefined;
1738
+ } | undefined;
1739
+ } | {
1740
+ type: "context_retention";
1741
+ test_memory_of: string[];
1742
+ retention_turns: number;
1743
+ memory_accuracy_threshold: number;
563
1744
  })[];
564
1745
  name?: string | undefined;
565
1746
  code?: string | undefined;
566
1747
  input?: string | undefined;
567
1748
  lang?: string | undefined;
1749
+ conversation?: {
1750
+ max_turns: number;
1751
+ timeout_per_turn_ms: number;
1752
+ total_timeout_ms: number;
1753
+ user_simulator: {
1754
+ model_type: string;
1755
+ temperature: number;
1756
+ max_tokens: number;
1757
+ persona: string;
1758
+ objective: string;
1759
+ constraints: string[];
1760
+ knowledge_level: "beginner" | "intermediate" | "expert";
1761
+ style?: string | undefined;
1762
+ emotional_state?: string | undefined;
1763
+ };
1764
+ termination_conditions: {
1765
+ type: "max_turns_reached" | "user_expresses_satisfaction" | "agent_provides_solution" | "conversation_stuck" | "escalation_needed" | "goal_achieved" | "custom_condition";
1766
+ description?: string | undefined;
1767
+ llm_judge?: {
1768
+ prompt: string;
1769
+ threshold: number;
1770
+ } | undefined;
1771
+ keywords?: string[] | undefined;
1772
+ }[];
1773
+ turn_evaluations: ({
1774
+ value: string;
1775
+ type: "string_contains";
1776
+ case_sensitive?: boolean | undefined;
1777
+ } | {
1778
+ type: "regex_match";
1779
+ pattern: string;
1780
+ } | {
1781
+ path: string;
1782
+ type: "file_exists";
1783
+ } | {
1784
+ type: "trajectory_contains_action";
1785
+ action: string;
1786
+ } | {
1787
+ type: "llm_judge";
1788
+ expected: string;
1789
+ prompt: string;
1790
+ model_type?: string | undefined;
1791
+ temperature?: number | undefined;
1792
+ json_schema?: Record<string, any> | undefined;
1793
+ capabilities?: string[] | undefined;
1794
+ } | {
1795
+ type: "execution_time";
1796
+ max_duration_ms: number;
1797
+ min_duration_ms?: number | undefined;
1798
+ target_duration_ms?: number | undefined;
1799
+ } | {
1800
+ type: "conversation_length";
1801
+ min_turns?: number | undefined;
1802
+ max_turns?: number | undefined;
1803
+ optimal_turns?: number | undefined;
1804
+ target_range?: number[] | undefined;
1805
+ } | {
1806
+ type: "conversation_flow";
1807
+ required_patterns: ("question_then_answer" | "problem_then_solution" | "clarification_cycle" | "empathy_then_solution" | "escalation_pattern")[];
1808
+ flow_quality_threshold: number;
1809
+ } | {
1810
+ type: "user_satisfaction";
1811
+ satisfaction_threshold: number;
1812
+ measurement_method: "llm_judge" | "sentiment_analysis" | "keyword_analysis";
1813
+ indicators?: {
1814
+ positive?: string[] | undefined;
1815
+ negative?: string[] | undefined;
1816
+ } | undefined;
1817
+ } | {
1818
+ type: "context_retention";
1819
+ test_memory_of: string[];
1820
+ retention_turns: number;
1821
+ memory_accuracy_threshold: number;
1822
+ })[];
1823
+ final_evaluations: ({
1824
+ value: string;
1825
+ type: "string_contains";
1826
+ case_sensitive?: boolean | undefined;
1827
+ } | {
1828
+ type: "regex_match";
1829
+ pattern: string;
1830
+ } | {
1831
+ path: string;
1832
+ type: "file_exists";
1833
+ } | {
1834
+ type: "trajectory_contains_action";
1835
+ action: string;
1836
+ } | {
1837
+ type: "llm_judge";
1838
+ expected: string;
1839
+ prompt: string;
1840
+ model_type?: string | undefined;
1841
+ temperature?: number | undefined;
1842
+ json_schema?: Record<string, any> | undefined;
1843
+ capabilities?: string[] | undefined;
1844
+ } | {
1845
+ type: "execution_time";
1846
+ max_duration_ms: number;
1847
+ min_duration_ms?: number | undefined;
1848
+ target_duration_ms?: number | undefined;
1849
+ } | {
1850
+ type: "conversation_length";
1851
+ min_turns?: number | undefined;
1852
+ max_turns?: number | undefined;
1853
+ optimal_turns?: number | undefined;
1854
+ target_range?: number[] | undefined;
1855
+ } | {
1856
+ type: "conversation_flow";
1857
+ required_patterns: ("question_then_answer" | "problem_then_solution" | "clarification_cycle" | "empathy_then_solution" | "escalation_pattern")[];
1858
+ flow_quality_threshold: number;
1859
+ } | {
1860
+ type: "user_satisfaction";
1861
+ satisfaction_threshold: number;
1862
+ measurement_method: "llm_judge" | "sentiment_analysis" | "keyword_analysis";
1863
+ indicators?: {
1864
+ positive?: string[] | undefined;
1865
+ negative?: string[] | undefined;
1866
+ } | undefined;
1867
+ } | {
1868
+ type: "context_retention";
1869
+ test_memory_of: string[];
1870
+ retention_turns: number;
1871
+ memory_accuracy_threshold: number;
1872
+ })[];
1873
+ debug_options: {
1874
+ log_user_simulation: boolean;
1875
+ log_turn_decisions: boolean;
1876
+ export_full_transcript: boolean;
1877
+ };
1878
+ } | undefined;
568
1879
  }[];
569
1880
  description: string;
570
1881
  environment: {
@@ -633,11 +1944,164 @@ export declare const ScenarioSchema: z.ZodObject<{
633
1944
  max_duration_ms: number;
634
1945
  min_duration_ms?: number | undefined;
635
1946
  target_duration_ms?: number | undefined;
1947
+ } | {
1948
+ type: "conversation_length";
1949
+ min_turns?: number | undefined;
1950
+ max_turns?: number | undefined;
1951
+ optimal_turns?: number | undefined;
1952
+ target_range?: number[] | undefined;
1953
+ } | {
1954
+ type: "conversation_flow";
1955
+ required_patterns: ("question_then_answer" | "problem_then_solution" | "clarification_cycle" | "empathy_then_solution" | "escalation_pattern")[];
1956
+ flow_quality_threshold?: number | undefined;
1957
+ } | {
1958
+ type: "user_satisfaction";
1959
+ satisfaction_threshold?: number | undefined;
1960
+ indicators?: {
1961
+ positive?: string[] | undefined;
1962
+ negative?: string[] | undefined;
1963
+ } | undefined;
1964
+ measurement_method?: "llm_judge" | "sentiment_analysis" | "keyword_analysis" | undefined;
1965
+ } | {
1966
+ type: "context_retention";
1967
+ test_memory_of: string[];
1968
+ retention_turns?: number | undefined;
1969
+ memory_accuracy_threshold?: number | undefined;
636
1970
  })[];
637
1971
  name?: string | undefined;
638
1972
  code?: string | undefined;
639
1973
  input?: string | undefined;
640
1974
  lang?: string | undefined;
1975
+ conversation?: {
1976
+ max_turns: number;
1977
+ user_simulator: {
1978
+ persona: string;
1979
+ objective: string;
1980
+ style?: string | undefined;
1981
+ model_type?: string | undefined;
1982
+ temperature?: number | undefined;
1983
+ max_tokens?: number | undefined;
1984
+ constraints?: string[] | undefined;
1985
+ emotional_state?: string | undefined;
1986
+ knowledge_level?: "beginner" | "intermediate" | "expert" | undefined;
1987
+ };
1988
+ timeout_per_turn_ms?: number | undefined;
1989
+ total_timeout_ms?: number | undefined;
1990
+ termination_conditions?: {
1991
+ type: "max_turns_reached" | "user_expresses_satisfaction" | "agent_provides_solution" | "conversation_stuck" | "escalation_needed" | "goal_achieved" | "custom_condition";
1992
+ description?: string | undefined;
1993
+ llm_judge?: {
1994
+ prompt: string;
1995
+ threshold?: number | undefined;
1996
+ } | undefined;
1997
+ keywords?: string[] | undefined;
1998
+ }[] | undefined;
1999
+ turn_evaluations?: ({
2000
+ value: string;
2001
+ type: "string_contains";
2002
+ case_sensitive?: boolean | undefined;
2003
+ } | {
2004
+ type: "regex_match";
2005
+ pattern: string;
2006
+ } | {
2007
+ path: string;
2008
+ type: "file_exists";
2009
+ } | {
2010
+ type: "trajectory_contains_action";
2011
+ action: string;
2012
+ } | {
2013
+ type: "llm_judge";
2014
+ expected: string;
2015
+ prompt: string;
2016
+ model_type?: string | undefined;
2017
+ temperature?: number | undefined;
2018
+ json_schema?: Record<string, any> | undefined;
2019
+ capabilities?: string[] | undefined;
2020
+ } | {
2021
+ type: "execution_time";
2022
+ max_duration_ms: number;
2023
+ min_duration_ms?: number | undefined;
2024
+ target_duration_ms?: number | undefined;
2025
+ } | {
2026
+ type: "conversation_length";
2027
+ min_turns?: number | undefined;
2028
+ max_turns?: number | undefined;
2029
+ optimal_turns?: number | undefined;
2030
+ target_range?: number[] | undefined;
2031
+ } | {
2032
+ type: "conversation_flow";
2033
+ required_patterns: ("question_then_answer" | "problem_then_solution" | "clarification_cycle" | "empathy_then_solution" | "escalation_pattern")[];
2034
+ flow_quality_threshold?: number | undefined;
2035
+ } | {
2036
+ type: "user_satisfaction";
2037
+ satisfaction_threshold?: number | undefined;
2038
+ indicators?: {
2039
+ positive?: string[] | undefined;
2040
+ negative?: string[] | undefined;
2041
+ } | undefined;
2042
+ measurement_method?: "llm_judge" | "sentiment_analysis" | "keyword_analysis" | undefined;
2043
+ } | {
2044
+ type: "context_retention";
2045
+ test_memory_of: string[];
2046
+ retention_turns?: number | undefined;
2047
+ memory_accuracy_threshold?: number | undefined;
2048
+ })[] | undefined;
2049
+ final_evaluations?: ({
2050
+ value: string;
2051
+ type: "string_contains";
2052
+ case_sensitive?: boolean | undefined;
2053
+ } | {
2054
+ type: "regex_match";
2055
+ pattern: string;
2056
+ } | {
2057
+ path: string;
2058
+ type: "file_exists";
2059
+ } | {
2060
+ type: "trajectory_contains_action";
2061
+ action: string;
2062
+ } | {
2063
+ type: "llm_judge";
2064
+ expected: string;
2065
+ prompt: string;
2066
+ model_type?: string | undefined;
2067
+ temperature?: number | undefined;
2068
+ json_schema?: Record<string, any> | undefined;
2069
+ capabilities?: string[] | undefined;
2070
+ } | {
2071
+ type: "execution_time";
2072
+ max_duration_ms: number;
2073
+ min_duration_ms?: number | undefined;
2074
+ target_duration_ms?: number | undefined;
2075
+ } | {
2076
+ type: "conversation_length";
2077
+ min_turns?: number | undefined;
2078
+ max_turns?: number | undefined;
2079
+ optimal_turns?: number | undefined;
2080
+ target_range?: number[] | undefined;
2081
+ } | {
2082
+ type: "conversation_flow";
2083
+ required_patterns: ("question_then_answer" | "problem_then_solution" | "clarification_cycle" | "empathy_then_solution" | "escalation_pattern")[];
2084
+ flow_quality_threshold?: number | undefined;
2085
+ } | {
2086
+ type: "user_satisfaction";
2087
+ satisfaction_threshold?: number | undefined;
2088
+ indicators?: {
2089
+ positive?: string[] | undefined;
2090
+ negative?: string[] | undefined;
2091
+ } | undefined;
2092
+ measurement_method?: "llm_judge" | "sentiment_analysis" | "keyword_analysis" | undefined;
2093
+ } | {
2094
+ type: "context_retention";
2095
+ test_memory_of: string[];
2096
+ retention_turns?: number | undefined;
2097
+ memory_accuracy_threshold?: number | undefined;
2098
+ })[] | undefined;
2099
+ debug_options?: {
2100
+ log_user_simulation?: boolean | undefined;
2101
+ log_turn_decisions?: boolean | undefined;
2102
+ export_full_transcript?: boolean | undefined;
2103
+ } | undefined;
2104
+ } | undefined;
641
2105
  }[];
642
2106
  description: string;
643
2107
  environment: {