@wix/evalforge-types 0.71.0 → 0.73.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/build/index.mjs CHANGED
@@ -584,11 +584,42 @@ var EnvironmentSchema = z21.object({
584
584
  });
585
585
 
586
586
  // src/scenario/test-scenario.ts
587
- import { z as z23 } from "zod";
587
+ import { z as z24 } from "zod";
588
588
 
589
589
  // src/assertion/assertion.ts
590
+ import { z as z23 } from "zod";
591
+
592
+ // src/assertion/build-passed-command.ts
590
593
  import { z as z22 } from "zod";
591
- var AssertionTypeSchema = z22.enum([
594
+ var ALLOWED_BUILD_COMMANDS = [
595
+ "yarn build",
596
+ "npm run build",
597
+ "pnpm run build",
598
+ "pnpm build"
599
+ ];
600
+ var DEFAULT_BUILD_PASSED_COMMAND = "yarn build";
601
+ var BUILD_COMMAND_ARGV = {
602
+ "yarn build": ["yarn", "build"],
603
+ "npm run build": ["npm", "run", "build"],
604
+ "pnpm run build": ["pnpm", "run", "build"],
605
+ "pnpm build": ["pnpm", "build"]
606
+ };
607
+ function isAllowedBuildCommandString(command) {
608
+ const trimmed = command.trim();
609
+ return ALLOWED_BUILD_COMMANDS.includes(trimmed);
610
+ }
611
+ function parseBuildCommandToArgv(command) {
612
+ const trimmed = command.trim();
613
+ if (!(trimmed in BUILD_COMMAND_ARGV)) {
614
+ return null;
615
+ }
616
+ return BUILD_COMMAND_ARGV[trimmed];
617
+ }
618
+ var enumTuple = ALLOWED_BUILD_COMMANDS;
619
+ var BuildPassedCommandStringSchema = z22.enum(enumTuple);
620
+
621
+ // src/assertion/assertion.ts
622
+ var AssertionTypeSchema = z23.enum([
592
623
  "skill_was_called",
593
624
  "tool_called_with_param",
594
625
  "build_passed",
@@ -597,61 +628,61 @@ var AssertionTypeSchema = z22.enum([
597
628
  "llm_judge",
598
629
  "api_call"
599
630
  ]);
600
- var AssertionParameterTypeSchema = z22.enum([
631
+ var AssertionParameterTypeSchema = z23.enum([
601
632
  "string",
602
633
  "number",
603
634
  "boolean"
604
635
  ]);
605
- var AssertionParameterSchema = z22.object({
636
+ var AssertionParameterSchema = z23.object({
606
637
  /** Parameter name (used as key in params object) */
607
- name: z22.string().min(1),
638
+ name: z23.string().min(1),
608
639
  /** Display label for the parameter */
609
- label: z22.string().min(1),
640
+ label: z23.string().min(1),
610
641
  /** Parameter type */
611
642
  type: AssertionParameterTypeSchema,
612
643
  /** Whether this parameter is required */
613
- required: z22.boolean(),
644
+ required: z23.boolean(),
614
645
  /** Default value (optional, used when not provided) */
615
- defaultValue: z22.union([z22.string(), z22.number(), z22.boolean()]).optional(),
646
+ defaultValue: z23.union([z23.string(), z23.number(), z23.boolean()]).optional(),
616
647
  /** If true, parameter is hidden by default behind "Show advanced options" */
617
- advanced: z22.boolean().optional()
648
+ advanced: z23.boolean().optional()
618
649
  });
619
- var ScenarioAssertionLinkSchema = z22.object({
650
+ var ScenarioAssertionLinkSchema = z23.object({
620
651
  /** ID of the system assertion (e.g., 'system:skill_was_called') */
621
- assertionId: z22.string(),
652
+ assertionId: z23.string(),
622
653
  /** Parameter values for this assertion in this scenario */
623
- params: z22.record(
624
- z22.string(),
625
- z22.union([z22.string(), z22.number(), z22.boolean(), z22.null()])
654
+ params: z23.record(
655
+ z23.string(),
656
+ z23.union([z23.string(), z23.number(), z23.boolean(), z23.null()])
626
657
  ).optional()
627
658
  });
628
- var SkillWasCalledConfigSchema = z22.object({
659
+ var SkillWasCalledConfigSchema = z23.object({
629
660
  /** Names of the skills that must have been called */
630
- skillNames: z22.array(z22.string().min(1)).min(1)
661
+ skillNames: z23.array(z23.string().min(1)).min(1)
631
662
  });
632
- var CostConfigSchema = z22.strictObject({
663
+ var CostConfigSchema = z23.strictObject({
633
664
  /** Maximum allowed cost in USD */
634
- maxCostUsd: z22.number().positive()
665
+ maxCostUsd: z23.number().positive()
635
666
  });
636
- var ToolCalledWithParamConfigSchema = z22.strictObject({
667
+ var ToolCalledWithParamConfigSchema = z23.strictObject({
637
668
  /** Name of the tool that must have been called */
638
- toolName: z22.string().min(1),
669
+ toolName: z23.string().min(1),
639
670
  /** JSON string of key-value pairs for expected parameters (substring match). Optional — when omitted, only checks tool presence. */
640
- expectedParams: z22.string().min(1).optional(),
671
+ expectedParams: z23.string().min(1).optional(),
641
672
  /** If true, the matching tool call must also have succeeded (step.success === true) */
642
- requireSuccess: z22.boolean().optional()
673
+ requireSuccess: z23.boolean().optional()
643
674
  });
644
- var BuildPassedConfigSchema = z22.strictObject({
645
- /** Command to run (default: "yarn build") */
646
- command: z22.string().optional(),
675
+ var BuildPassedConfigSchema = z23.strictObject({
676
+ /** Allowlisted command only (default at runtime: "yarn build") */
677
+ command: BuildPassedCommandStringSchema.optional(),
647
678
  /** Expected exit code (default: 0) */
648
- expectedExitCode: z22.number().int().optional()
679
+ expectedExitCode: z23.number().int().optional()
649
680
  });
650
- var TimeConfigSchema = z22.strictObject({
681
+ var TimeConfigSchema = z23.strictObject({
651
682
  /** Maximum allowed duration in milliseconds */
652
- maxDurationMs: z22.number().int().positive()
683
+ maxDurationMs: z23.number().int().positive()
653
684
  });
654
- var LlmJudgeConfigSchema = z22.object({
685
+ var LlmJudgeConfigSchema = z23.object({
655
686
  /**
656
687
  * Prompt template with placeholders:
657
688
  * - {{output}}: agent's final output
@@ -662,65 +693,65 @@ var LlmJudgeConfigSchema = z22.object({
662
693
  * - {{trace}}: step-by-step trace of tool calls
663
694
  * - Custom parameters defined in the parameters array
664
695
  */
665
- prompt: z22.string().min(1),
696
+ prompt: z23.string().min(1),
666
697
  /** Minimum score to pass (0-10, default 7) */
667
- minScore: z22.number().int().min(0).max(10).optional(),
698
+ minScore: z23.number().int().min(0).max(10).optional(),
668
699
  /** Model for the judge (e.g. claude-3-5-haiku-20241022) */
669
- model: z22.string().optional(),
700
+ model: z23.string().optional(),
670
701
  /** Max output tokens */
671
- maxTokens: z22.number().int().optional(),
702
+ maxTokens: z23.number().int().optional(),
672
703
  /** Temperature (0-1) */
673
- temperature: z22.number().min(0).max(1).optional(),
704
+ temperature: z23.number().min(0).max(1).optional(),
674
705
  /** User-defined parameters for this assertion */
675
- parameters: z22.array(AssertionParameterSchema).optional()
706
+ parameters: z23.array(AssertionParameterSchema).optional()
676
707
  });
677
- var ApiCallConfigSchema = z22.strictObject({
708
+ var ApiCallConfigSchema = z23.strictObject({
678
709
  /** URL to call */
679
- url: z22.string().min(1),
710
+ url: z23.string().min(1),
680
711
  /** HTTP method (default GET) */
681
- method: z22.enum(["GET", "POST"]).optional(),
712
+ method: z23.enum(["GET", "POST"]).optional(),
682
713
  /** Request body (JSON string, for POST requests) */
683
- requestBody: z22.string().optional(),
714
+ requestBody: z23.string().optional(),
684
715
  /** Expected JSON response to validate against (subset match — extra fields in actual are OK) */
685
- expectedResponse: z22.string().min(1),
716
+ expectedResponse: z23.string().min(1),
686
717
  /** Request headers as JSON string of key-value pairs */
687
- requestHeaders: z22.string().optional(),
718
+ requestHeaders: z23.string().optional(),
688
719
  /** Request timeout in milliseconds (default 30000) */
689
- timeoutMs: z22.number().int().positive().optional()
720
+ timeoutMs: z23.number().int().positive().optional()
690
721
  });
691
722
  var AssertionBaseFields = {
692
723
  /** When true, the assertion's pass/fail logic is inverted (NOT operator). */
693
- negate: z22.boolean().optional()
724
+ negate: z23.boolean().optional()
694
725
  };
695
726
  var SkillWasCalledAssertionSchema = SkillWasCalledConfigSchema.extend({
696
- type: z22.literal("skill_was_called"),
727
+ type: z23.literal("skill_was_called"),
697
728
  ...AssertionBaseFields
698
729
  });
699
730
  var ToolCalledWithParamAssertionSchema = ToolCalledWithParamConfigSchema.extend({
700
- type: z22.literal("tool_called_with_param"),
731
+ type: z23.literal("tool_called_with_param"),
701
732
  ...AssertionBaseFields
702
733
  });
703
734
  var BuildPassedAssertionSchema = BuildPassedConfigSchema.extend({
704
- type: z22.literal("build_passed"),
735
+ type: z23.literal("build_passed"),
705
736
  ...AssertionBaseFields
706
737
  });
707
738
  var CostAssertionSchema = CostConfigSchema.extend({
708
- type: z22.literal("cost"),
739
+ type: z23.literal("cost"),
709
740
  ...AssertionBaseFields
710
741
  });
711
742
  var LlmJudgeAssertionSchema = LlmJudgeConfigSchema.extend({
712
- type: z22.literal("llm_judge"),
743
+ type: z23.literal("llm_judge"),
713
744
  ...AssertionBaseFields
714
745
  });
715
746
  var ApiCallAssertionSchema = ApiCallConfigSchema.extend({
716
- type: z22.literal("api_call"),
747
+ type: z23.literal("api_call"),
717
748
  ...AssertionBaseFields
718
749
  });
719
750
  var TimeAssertionSchema = TimeConfigSchema.extend({
720
- type: z22.literal("time_limit"),
751
+ type: z23.literal("time_limit"),
721
752
  ...AssertionBaseFields
722
753
  });
723
- var AssertionSchema = z22.union([
754
+ var AssertionSchema = z23.union([
724
755
  SkillWasCalledAssertionSchema,
725
756
  ToolCalledWithParamAssertionSchema,
726
757
  BuildPassedAssertionSchema,
@@ -729,7 +760,7 @@ var AssertionSchema = z22.union([
729
760
  LlmJudgeAssertionSchema,
730
761
  ApiCallAssertionSchema
731
762
  ]);
732
- var AssertionConfigSchema = z22.union([
763
+ var AssertionConfigSchema = z23.union([
733
764
  LlmJudgeConfigSchema,
734
765
  // requires prompt - check first
735
766
  SkillWasCalledConfigSchema,
@@ -744,7 +775,7 @@ var AssertionConfigSchema = z22.union([
744
775
  // requires maxCostUsd, uses strictObject
745
776
  BuildPassedConfigSchema,
746
777
  // all optional, uses strictObject to reject unknown keys
747
- z22.object({})
778
+ z23.object({})
748
779
  // fallback empty config
749
780
  ]);
750
781
  function validateAssertionConfig(type, config) {
@@ -768,52 +799,322 @@ function validateAssertionConfig(type, config) {
768
799
  }
769
800
  }
770
801
 
802
+ // src/assertion/system-assertions.ts
803
+ var SYSTEM_ASSERTION_IDS = {
804
+ SKILL_WAS_CALLED: "system:skill_was_called",
805
+ TOOL_CALLED_WITH_PARAM: "system:tool_called_with_param",
806
+ BUILD_PASSED: "system:build_passed",
807
+ TIME_LIMIT: "system:time_limit",
808
+ COST: "system:cost",
809
+ LLM_JUDGE: "system:llm_judge",
810
+ API_CALL: "system:api_call"
811
+ };
812
+ function isSystemAssertionId(id) {
813
+ return id.startsWith("system:");
814
+ }
815
+ var SYSTEM_ASSERTIONS = {
816
+ [SYSTEM_ASSERTION_IDS.SKILL_WAS_CALLED]: {
817
+ id: SYSTEM_ASSERTION_IDS.SKILL_WAS_CALLED,
818
+ name: "Skill Was Called",
819
+ description: "Check that one or more skills were invoked during the agent run",
820
+ type: "skill_was_called",
821
+ parameters: [
822
+ {
823
+ name: "skillNames",
824
+ label: "Skills",
825
+ type: "string",
826
+ required: true
827
+ },
828
+ {
829
+ name: "negate",
830
+ label: "Negate (NOT operator)",
831
+ type: "boolean",
832
+ required: false,
833
+ defaultValue: false
834
+ }
835
+ ]
836
+ },
837
+ [SYSTEM_ASSERTION_IDS.TOOL_CALLED_WITH_PARAM]: {
838
+ id: SYSTEM_ASSERTION_IDS.TOOL_CALLED_WITH_PARAM,
839
+ name: "Tool Called With Param",
840
+ description: "Check that a tool was called with expected parameters (tool name is substring matched)",
841
+ type: "tool_called_with_param",
842
+ parameters: [
843
+ {
844
+ name: "toolName",
845
+ label: "Tool Name",
846
+ type: "string",
847
+ required: true
848
+ },
849
+ {
850
+ name: "expectedParams",
851
+ label: "Expected Parameters (JSON, substring match)",
852
+ type: "string",
853
+ required: false
854
+ },
855
+ {
856
+ name: "requireSuccess",
857
+ label: "Require Successful Call",
858
+ type: "boolean",
859
+ required: false,
860
+ defaultValue: false,
861
+ advanced: true
862
+ },
863
+ {
864
+ name: "negate",
865
+ label: "Negate (NOT operator)",
866
+ type: "boolean",
867
+ required: false,
868
+ defaultValue: false
869
+ }
870
+ ]
871
+ },
872
+ [SYSTEM_ASSERTION_IDS.BUILD_PASSED]: {
873
+ id: SYSTEM_ASSERTION_IDS.BUILD_PASSED,
874
+ name: "Build Passed",
875
+ description: "Run a build command and verify it exits with expected code",
876
+ type: "build_passed",
877
+ parameters: [
878
+ {
879
+ name: "command",
880
+ label: "Build Command",
881
+ type: "string",
882
+ required: false,
883
+ defaultValue: "yarn build"
884
+ },
885
+ {
886
+ name: "expectedExitCode",
887
+ label: "Expected Exit Code",
888
+ type: "number",
889
+ required: false,
890
+ defaultValue: 0
891
+ },
892
+ {
893
+ name: "maxBuildTime",
894
+ label: "Max Build Time (ms)",
895
+ type: "number",
896
+ required: false,
897
+ advanced: true
898
+ },
899
+ {
900
+ name: "maxMemory",
901
+ label: "Max Memory (MB)",
902
+ type: "number",
903
+ required: false,
904
+ advanced: true
905
+ }
906
+ ]
907
+ },
908
+ [SYSTEM_ASSERTION_IDS.TIME_LIMIT]: {
909
+ id: SYSTEM_ASSERTION_IDS.TIME_LIMIT,
910
+ name: "Time Limit",
911
+ description: "Check that the scenario completed within a maximum duration",
912
+ type: "time_limit",
913
+ parameters: [
914
+ {
915
+ name: "maxDurationMs",
916
+ label: "Max Duration (ms)",
917
+ type: "number",
918
+ required: true,
919
+ defaultValue: 3e5
920
+ }
921
+ ]
922
+ },
923
+ [SYSTEM_ASSERTION_IDS.COST]: {
924
+ id: SYSTEM_ASSERTION_IDS.COST,
925
+ name: "Cost",
926
+ description: "Check that the scenario LLM execution cost stays within a USD threshold",
927
+ type: "cost",
928
+ parameters: [
929
+ {
930
+ name: "maxCostUsd",
931
+ label: "Max Cost (USD)",
932
+ type: "number",
933
+ required: true,
934
+ defaultValue: 1
935
+ }
936
+ ]
937
+ },
938
+ [SYSTEM_ASSERTION_IDS.LLM_JUDGE]: {
939
+ id: SYSTEM_ASSERTION_IDS.LLM_JUDGE,
940
+ name: "LLM Judge",
941
+ description: "LLM evaluates the output and assigns a score (0-10)",
942
+ type: "llm_judge",
943
+ parameters: [
944
+ {
945
+ name: "prompt",
946
+ label: "Judge Prompt",
947
+ type: "string",
948
+ required: true,
949
+ defaultValue: "Verify the output meets the acceptance criteria."
950
+ },
951
+ {
952
+ name: "minScore",
953
+ label: "Minimum Score (0-10)",
954
+ type: "number",
955
+ required: false,
956
+ defaultValue: 7
957
+ },
958
+ {
959
+ name: "model",
960
+ label: "Model",
961
+ type: "string",
962
+ required: false
963
+ }
964
+ ]
965
+ },
966
+ [SYSTEM_ASSERTION_IDS.API_CALL]: {
967
+ id: SYSTEM_ASSERTION_IDS.API_CALL,
968
+ name: "API Call",
969
+ description: "Call an API endpoint and verify the response contains expected data",
970
+ type: "api_call",
971
+ parameters: [
972
+ {
973
+ name: "url",
974
+ label: "URL",
975
+ type: "string",
976
+ required: true
977
+ },
978
+ {
979
+ name: "method",
980
+ label: "HTTP Method",
981
+ type: "string",
982
+ required: false,
983
+ defaultValue: "GET"
984
+ },
985
+ {
986
+ name: "requestBody",
987
+ label: "Request Body (JSON)",
988
+ type: "string",
989
+ required: false
990
+ },
991
+ {
992
+ name: "expectedResponse",
993
+ label: "Expected Response (JSON)",
994
+ type: "string",
995
+ required: true
996
+ },
997
+ {
998
+ name: "requestHeaders",
999
+ label: "Headers (JSON)",
1000
+ type: "string",
1001
+ required: false,
1002
+ advanced: true
1003
+ },
1004
+ {
1005
+ name: "timeoutMs",
1006
+ label: "Timeout (ms)",
1007
+ type: "number",
1008
+ required: false,
1009
+ defaultValue: 3e4,
1010
+ advanced: true
1011
+ }
1012
+ ]
1013
+ }
1014
+ };
1015
+ function getSystemAssertions() {
1016
+ return Object.values(SYSTEM_ASSERTIONS);
1017
+ }
1018
+ function getSystemAssertion(id) {
1019
+ return SYSTEM_ASSERTIONS[id];
1020
+ }
1021
+
771
1022
  // src/scenario/test-scenario.ts
772
- var ExpectedFileSchema = z23.object({
1023
+ var MAX_IMAGE_BASE64_LENGTH = 4 * Math.ceil(2 * 1024 * 1024 / 3);
1024
+ var TriggerPromptImageSchema = z24.object({
1025
+ /** Base64-encoded image data (no data URL prefix) */
1026
+ base64: z24.string().max(MAX_IMAGE_BASE64_LENGTH, "Image exceeds 2 MB size limit"),
1027
+ /** MIME type of the image */
1028
+ mediaType: z24.enum(["image/jpeg", "image/png", "image/gif", "image/webp"]),
1029
+ /** Original filename of the image */
1030
+ name: z24.string()
1031
+ });
1032
+ var ExpectedFileSchema = z24.object({
773
1033
  /** Relative path where the file should be created */
774
- path: z23.string(),
1034
+ path: z24.string(),
775
1035
  /** Optional expected content */
776
- content: z23.string().optional()
1036
+ content: z24.string().optional()
777
1037
  });
778
1038
  var TestScenarioSchema = TenantEntitySchema.extend({
779
1039
  /** The prompt sent to the agent to trigger the task */
780
- triggerPrompt: z23.string().min(10),
1040
+ triggerPrompt: z24.string().min(10),
781
1041
  /** ID of the template to use for this scenario (null = no template) */
782
- templateId: z23.string().nullish(),
1042
+ templateId: z24.string().nullish(),
783
1043
  /** Inline assertions to evaluate for this scenario (legacy) */
784
- assertions: z23.array(AssertionSchema).optional(),
1044
+ assertions: z24.array(AssertionSchema).optional(),
785
1045
  /** IDs of saved assertions to evaluate (from assertions table) - legacy, use assertionLinks */
786
- assertionIds: z23.array(z23.string()).optional(),
1046
+ assertionIds: z24.array(z24.string()).optional(),
787
1047
  /** Linked assertions with per-scenario parameter values */
788
- assertionLinks: z23.array(ScenarioAssertionLinkSchema).optional(),
1048
+ assertionLinks: z24.array(ScenarioAssertionLinkSchema).optional(),
789
1049
  /** Tags for categorisation and filtering */
790
- tags: z23.array(z23.string()).optional()
791
- });
792
- var CreateTestScenarioInputSchema = TestScenarioSchema.omit({
1050
+ tags: z24.array(z24.string()).optional(),
1051
+ /** Base64-encoded images attached to the trigger prompt (max 3) */
1052
+ triggerPromptImages: z24.array(TriggerPromptImageSchema).max(3).optional()
1053
+ });
1054
+ function validateBuildPassedParamsInAssertionLinks(links, ctx) {
1055
+ if (!links) return;
1056
+ for (let i = 0; i < links.length; i++) {
1057
+ const link = links[i];
1058
+ if (link.assertionId !== SYSTEM_ASSERTION_IDS.BUILD_PASSED) continue;
1059
+ const cmd = link.params?.command;
1060
+ if (cmd === void 0 || cmd === null) continue;
1061
+ if (typeof cmd !== "string") {
1062
+ ctx.addIssue({
1063
+ code: z24.ZodIssueCode.custom,
1064
+ message: "build_passed command must be a string",
1065
+ path: ["assertionLinks", i, "params", "command"]
1066
+ });
1067
+ continue;
1068
+ }
1069
+ if (!isAllowedBuildCommandString(cmd)) {
1070
+ ctx.addIssue({
1071
+ code: z24.ZodIssueCode.custom,
1072
+ message: "Invalid build_passed command. Allowed: yarn build, npm run build, pnpm run build, pnpm build",
1073
+ path: ["assertionLinks", i, "params", "command"]
1074
+ });
1075
+ }
1076
+ }
1077
+ }
1078
+ var TestScenarioCreateBaseSchema = TestScenarioSchema.omit({
793
1079
  id: true,
794
1080
  createdAt: true,
795
1081
  updatedAt: true,
796
1082
  deleted: true
797
1083
  });
798
- var UpdateTestScenarioInputSchema = CreateTestScenarioInputSchema.partial();
1084
+ var CreateTestScenarioInputSchema = TestScenarioCreateBaseSchema.superRefine((data, ctx) => {
1085
+ validateBuildPassedParamsInAssertionLinks(data.assertionLinks, ctx);
1086
+ });
1087
+ var UpdateTestScenarioInputSchema = TestScenarioCreateBaseSchema.partial().superRefine((data, ctx) => {
1088
+ if (data.assertionLinks !== void 0) {
1089
+ validateBuildPassedParamsInAssertionLinks(data.assertionLinks, ctx);
1090
+ }
1091
+ });
799
1092
 
800
1093
  // src/scenario/batch-import.ts
801
- import { z as z24 } from "zod";
1094
+ import { z as z25 } from "zod";
802
1095
  var UUID_REGEX = /^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/i;
803
- var BatchAssertionLinkSchema = z24.union([
804
- z24.string().min(1),
1096
+ var BatchAssertionLinkSchema = z25.union([
1097
+ z25.string().min(1),
805
1098
  ScenarioAssertionLinkSchema
806
1099
  ]);
807
- var BatchScenarioEntrySchema = z24.object({
808
- name: z24.string().min(1, "name: Required"),
809
- description: z24.string().optional().default(""),
810
- triggerPrompt: z24.string().min(10, "triggerPrompt: Must be at least 10 characters"),
811
- templateId: z24.string().nullish(),
812
- tags: z24.array(z24.string()).optional(),
813
- assertionLinks: z24.array(BatchAssertionLinkSchema).optional()
1100
+ var BatchScenarioEntrySchema = z25.object({
1101
+ name: z25.string().min(1, "name: Required"),
1102
+ description: z25.string().optional().default(""),
1103
+ triggerPrompt: z25.string().min(10, "triggerPrompt: Must be at least 10 characters"),
1104
+ templateId: z25.string().nullish(),
1105
+ tags: z25.array(z25.string()).optional(),
1106
+ assertionLinks: z25.array(BatchAssertionLinkSchema).optional()
1107
+ }).superRefine((data, ctx) => {
1108
+ if (!data.assertionLinks) return;
1109
+ const objectLinks = data.assertionLinks.filter(
1110
+ (link) => typeof link !== "string"
1111
+ );
1112
+ if (objectLinks.length > 0) {
1113
+ validateBuildPassedParamsInAssertionLinks(objectLinks, ctx);
1114
+ }
814
1115
  });
815
- var BatchImportPayloadSchema = z24.object({
816
- scenarios: z24.array(BatchScenarioEntrySchema).min(1, "scenarios array must contain at least one entry").max(100, "Maximum 100 scenarios per upload")
1116
+ var BatchImportPayloadSchema = z25.object({
1117
+ scenarios: z25.array(BatchScenarioEntrySchema).min(1, "scenarios array must contain at least one entry").max(100, "Maximum 100 scenarios per upload")
817
1118
  });
818
1119
  var BATCH_IMPORT_LIMITS = {
819
1120
  MAX_SCENARIOS: 100,
@@ -835,29 +1136,29 @@ function normalizeBatchAssertionLink(link) {
835
1136
  }
836
1137
  return link;
837
1138
  }
838
- var BatchResultItemSchema = z24.object({
839
- index: z24.number(),
840
- name: z24.string(),
841
- status: z24.enum(["valid", "invalid"]),
842
- id: z24.string().nullable().optional(),
843
- errors: z24.array(z24.string()).optional()
844
- });
845
- var BatchSummarySchema = z24.object({
846
- total: z24.number(),
847
- valid: z24.number(),
848
- invalid: z24.number(),
849
- created: z24.number()
850
- });
851
- var BatchImportResponseSchema = z24.object({
1139
+ var BatchResultItemSchema = z25.object({
1140
+ index: z25.number(),
1141
+ name: z25.string(),
1142
+ status: z25.enum(["valid", "invalid"]),
1143
+ id: z25.string().nullable().optional(),
1144
+ errors: z25.array(z25.string()).optional()
1145
+ });
1146
+ var BatchSummarySchema = z25.object({
1147
+ total: z25.number(),
1148
+ valid: z25.number(),
1149
+ invalid: z25.number(),
1150
+ created: z25.number()
1151
+ });
1152
+ var BatchImportResponseSchema = z25.object({
852
1153
  summary: BatchSummarySchema,
853
- results: z24.array(BatchResultItemSchema)
1154
+ results: z25.array(BatchResultItemSchema)
854
1155
  });
855
1156
 
856
1157
  // src/suite/test-suite.ts
857
- import { z as z25 } from "zod";
1158
+ import { z as z26 } from "zod";
858
1159
  var TestSuiteSchema = TenantEntitySchema.extend({
859
1160
  /** IDs of test scenarios in this suite */
860
- scenarioIds: z25.array(z25.string())
1161
+ scenarioIds: z26.array(z26.string())
861
1162
  });
862
1163
  var CreateTestSuiteInputSchema = TestSuiteSchema.omit({
863
1164
  id: true,
@@ -868,21 +1169,21 @@ var CreateTestSuiteInputSchema = TestSuiteSchema.omit({
868
1169
  var UpdateTestSuiteInputSchema = CreateTestSuiteInputSchema.partial();
869
1170
 
870
1171
  // src/evaluation/metrics.ts
871
- import { z as z26 } from "zod";
872
- var TokenUsageSchema = z26.object({
873
- prompt: z26.number(),
874
- completion: z26.number(),
875
- total: z26.number()
876
- });
877
- var EvalMetricsSchema = z26.object({
878
- totalAssertions: z26.number(),
879
- passed: z26.number(),
880
- failed: z26.number(),
881
- skipped: z26.number(),
882
- errors: z26.number(),
883
- passRate: z26.number(),
884
- avgDuration: z26.number(),
885
- totalDuration: z26.number()
1172
+ import { z as z27 } from "zod";
1173
+ var TokenUsageSchema = z27.object({
1174
+ prompt: z27.number(),
1175
+ completion: z27.number(),
1176
+ total: z27.number()
1177
+ });
1178
+ var EvalMetricsSchema = z27.object({
1179
+ totalAssertions: z27.number(),
1180
+ passed: z27.number(),
1181
+ failed: z27.number(),
1182
+ skipped: z27.number(),
1183
+ errors: z27.number(),
1184
+ passRate: z27.number(),
1185
+ avgDuration: z27.number(),
1186
+ totalDuration: z27.number()
886
1187
  });
887
1188
  var EvalStatus = /* @__PURE__ */ ((EvalStatus2) => {
888
1189
  EvalStatus2["PENDING"] = "pending";
@@ -892,7 +1193,7 @@ var EvalStatus = /* @__PURE__ */ ((EvalStatus2) => {
892
1193
  EvalStatus2["CANCELLED"] = "cancelled";
893
1194
  return EvalStatus2;
894
1195
  })(EvalStatus || {});
895
- var EvalStatusSchema = z26.enum(EvalStatus);
1196
+ var EvalStatusSchema = z27.enum(EvalStatus);
896
1197
  var LLMStepType = /* @__PURE__ */ ((LLMStepType2) => {
897
1198
  LLMStepType2["COMPLETION"] = "completion";
898
1199
  LLMStepType2["TOOL_USE"] = "tool_use";
@@ -900,54 +1201,54 @@ var LLMStepType = /* @__PURE__ */ ((LLMStepType2) => {
900
1201
  LLMStepType2["THINKING"] = "thinking";
901
1202
  return LLMStepType2;
902
1203
  })(LLMStepType || {});
903
- var LLMTraceStepSchema = z26.object({
904
- id: z26.string(),
905
- stepNumber: z26.number(),
906
- type: z26.enum(LLMStepType),
907
- model: z26.string(),
908
- provider: z26.string(),
909
- startedAt: z26.string(),
910
- durationMs: z26.number(),
1204
+ var LLMTraceStepSchema = z27.object({
1205
+ id: z27.string(),
1206
+ stepNumber: z27.number(),
1207
+ type: z27.enum(LLMStepType),
1208
+ model: z27.string(),
1209
+ provider: z27.string(),
1210
+ startedAt: z27.string(),
1211
+ durationMs: z27.number(),
911
1212
  tokenUsage: TokenUsageSchema,
912
- costUsd: z26.number(),
913
- toolName: z26.string().optional(),
914
- toolArguments: z26.string().optional(),
915
- inputPreview: z26.string().optional(),
916
- outputPreview: z26.string().optional(),
917
- success: z26.boolean(),
918
- error: z26.string().optional(),
919
- turnIndex: z26.number().optional()
920
- });
921
- var LLMBreakdownStatsSchema = z26.object({
922
- count: z26.number(),
923
- durationMs: z26.number(),
924
- tokens: z26.number(),
925
- costUsd: z26.number()
926
- });
927
- var LLMTraceSummarySchema = z26.object({
928
- totalSteps: z26.number(),
929
- totalTurns: z26.number().optional(),
930
- totalDurationMs: z26.number(),
1213
+ costUsd: z27.number(),
1214
+ toolName: z27.string().optional(),
1215
+ toolArguments: z27.string().optional(),
1216
+ inputPreview: z27.string().optional(),
1217
+ outputPreview: z27.string().optional(),
1218
+ success: z27.boolean(),
1219
+ error: z27.string().optional(),
1220
+ turnIndex: z27.number().optional()
1221
+ });
1222
+ var LLMBreakdownStatsSchema = z27.object({
1223
+ count: z27.number(),
1224
+ durationMs: z27.number(),
1225
+ tokens: z27.number(),
1226
+ costUsd: z27.number()
1227
+ });
1228
+ var LLMTraceSummarySchema = z27.object({
1229
+ totalSteps: z27.number(),
1230
+ totalTurns: z27.number().optional(),
1231
+ totalDurationMs: z27.number(),
931
1232
  totalTokens: TokenUsageSchema,
932
- totalCostUsd: z26.number(),
933
- stepTypeBreakdown: z26.record(z26.string(), LLMBreakdownStatsSchema).optional(),
934
- modelBreakdown: z26.record(z26.string(), LLMBreakdownStatsSchema),
935
- modelsUsed: z26.array(z26.string())
936
- });
937
- var LLMTraceSchema = z26.object({
938
- id: z26.string(),
939
- steps: z26.array(LLMTraceStepSchema),
1233
+ totalCostUsd: z27.number(),
1234
+ stepTypeBreakdown: z27.record(z27.string(), LLMBreakdownStatsSchema).optional(),
1235
+ modelBreakdown: z27.record(z27.string(), LLMBreakdownStatsSchema),
1236
+ modelsUsed: z27.array(z27.string())
1237
+ });
1238
+ var LLMTraceSchema = z27.object({
1239
+ id: z27.string(),
1240
+ steps: z27.array(LLMTraceStepSchema),
940
1241
  summary: LLMTraceSummarySchema
941
1242
  });
942
1243
 
943
1244
  // src/evaluation/eval-result.ts
944
- import { z as z30 } from "zod";
1245
+ import { z as z31 } from "zod";
945
1246
 
946
1247
  // src/evaluation/eval-run.ts
947
- import { z as z28 } from "zod";
1248
+ import { z as z29 } from "zod";
948
1249
 
949
1250
  // src/evaluation/live-trace.ts
950
- import { z as z27 } from "zod";
1251
+ import { z as z28 } from "zod";
951
1252
  var LiveTraceEventType = /* @__PURE__ */ ((LiveTraceEventType2) => {
952
1253
  LiveTraceEventType2["THINKING"] = "thinking";
953
1254
  LiveTraceEventType2["TOOL_USE"] = "tool_use";
@@ -961,37 +1262,37 @@ var LiveTraceEventType = /* @__PURE__ */ ((LiveTraceEventType2) => {
961
1262
  LiveTraceEventType2["USER"] = "user";
962
1263
  return LiveTraceEventType2;
963
1264
  })(LiveTraceEventType || {});
964
- var LiveTraceEventSchema = z27.object({
1265
+ var LiveTraceEventSchema = z28.object({
965
1266
  /** The evaluation run ID */
966
- evalRunId: z27.string(),
1267
+ evalRunId: z28.string(),
967
1268
  /** The scenario ID being executed */
968
- scenarioId: z27.string(),
1269
+ scenarioId: z28.string(),
969
1270
  /** The scenario name for display */
970
- scenarioName: z27.string(),
1271
+ scenarioName: z28.string(),
971
1272
  /** The target ID (skill, agent, etc.) */
972
- targetId: z27.string(),
1273
+ targetId: z28.string(),
973
1274
  /** The target name for display */
974
- targetName: z27.string(),
1275
+ targetName: z28.string(),
975
1276
  /** Step number in the current scenario execution */
976
- stepNumber: z27.number(),
1277
+ stepNumber: z28.number(),
977
1278
  /** Type of trace event */
978
- type: z27.enum(LiveTraceEventType),
1279
+ type: z28.enum(LiveTraceEventType),
979
1280
  /** Tool name if this is a tool_use event */
980
- toolName: z27.string().optional(),
1281
+ toolName: z28.string().optional(),
981
1282
  /** Tool arguments preview (truncated JSON) */
982
- toolArgs: z27.string().optional(),
1283
+ toolArgs: z28.string().optional(),
983
1284
  /** Output preview (truncated text) */
984
- outputPreview: z27.string().optional(),
1285
+ outputPreview: z28.string().optional(),
985
1286
  /** File path for file operations */
986
- filePath: z27.string().optional(),
1287
+ filePath: z28.string().optional(),
987
1288
  /** Elapsed time in milliseconds for progress events */
988
- elapsedMs: z27.number().optional(),
1289
+ elapsedMs: z28.number().optional(),
989
1290
  /** Thinking/reasoning text from Claude */
990
- thinking: z27.string().optional(),
1291
+ thinking: z28.string().optional(),
991
1292
  /** Timestamp when this event occurred */
992
- timestamp: z27.string(),
1293
+ timestamp: z28.string(),
993
1294
  /** Whether this is the final event for this scenario */
994
- isComplete: z27.boolean()
1295
+ isComplete: z28.boolean()
995
1296
  });
996
1297
  var TRACE_EVENT_PREFIX = "TRACE_EVENT:";
997
1298
  function parseTraceEventLine(line) {
@@ -1020,40 +1321,40 @@ var TriggerType = /* @__PURE__ */ ((TriggerType2) => {
1020
1321
  TriggerType2["SCHEDULED"] = "SCHEDULED";
1021
1322
  return TriggerType2;
1022
1323
  })(TriggerType || {});
1023
- var TriggerMetadataSchema = z28.object({
1024
- version: z28.string().optional(),
1025
- resourceUpdated: z28.array(z28.string()).optional(),
1026
- scheduleId: z28.string().optional()
1324
+ var TriggerMetadataSchema = z29.object({
1325
+ version: z29.string().optional(),
1326
+ resourceUpdated: z29.array(z29.string()).optional(),
1327
+ scheduleId: z29.string().optional()
1027
1328
  });
1028
- var TriggerSchema = z28.object({
1029
- id: z28.string(),
1329
+ var TriggerSchema = z29.object({
1330
+ id: z29.string(),
1030
1331
  metadata: TriggerMetadataSchema.optional(),
1031
- type: z28.nativeEnum(TriggerType)
1332
+ type: z29.nativeEnum(TriggerType)
1032
1333
  });
1033
- var DiffLineTypeSchema = z28.enum(["added", "removed", "unchanged"]);
1034
- var DiffLineSchema = z28.object({
1334
+ var DiffLineTypeSchema = z29.enum(["added", "removed", "unchanged"]);
1335
+ var DiffLineSchema = z29.object({
1035
1336
  type: DiffLineTypeSchema,
1036
- content: z28.string(),
1037
- lineNumber: z28.number()
1038
- });
1039
- var DiffContentSchema = z28.object({
1040
- path: z28.string(),
1041
- expected: z28.string(),
1042
- actual: z28.string(),
1043
- diffLines: z28.array(DiffLineSchema),
1044
- renamedFrom: z28.string().optional(),
1337
+ content: z29.string(),
1338
+ lineNumber: z29.number()
1339
+ });
1340
+ var DiffContentSchema = z29.object({
1341
+ path: z29.string(),
1342
+ expected: z29.string(),
1343
+ actual: z29.string(),
1344
+ diffLines: z29.array(DiffLineSchema),
1345
+ renamedFrom: z29.string().optional(),
1045
1346
  /** Whether this file is an infrastructure/config file (e.g. .claude/settings.json, .mcp.json) */
1046
- isInfrastructure: z28.boolean().optional()
1347
+ isInfrastructure: z29.boolean().optional()
1047
1348
  });
1048
- var CommandExecutionSchema = z28.object({
1049
- command: z28.string(),
1050
- exitCode: z28.number(),
1051
- output: z28.string().optional(),
1052
- duration: z28.number()
1349
+ var CommandExecutionSchema = z29.object({
1350
+ command: z29.string(),
1351
+ exitCode: z29.number(),
1352
+ output: z29.string().optional(),
1353
+ duration: z29.number()
1053
1354
  });
1054
- var FileModificationSchema = z28.object({
1055
- path: z28.string(),
1056
- action: z28.enum(["created", "modified", "deleted"])
1355
+ var FileModificationSchema = z29.object({
1356
+ path: z29.string(),
1357
+ action: z29.enum(["created", "modified", "deleted"])
1057
1358
  });
1058
1359
  var TemplateFileStatus = /* @__PURE__ */ ((TemplateFileStatus2) => {
1059
1360
  TemplateFileStatus2["NEW"] = "new";
@@ -1061,62 +1362,62 @@ var TemplateFileStatus = /* @__PURE__ */ ((TemplateFileStatus2) => {
1061
1362
  TemplateFileStatus2["UNCHANGED"] = "unchanged";
1062
1363
  return TemplateFileStatus2;
1063
1364
  })(TemplateFileStatus || {});
1064
- var TemplateFileSchema = z28.object({
1365
+ var TemplateFileSchema = z29.object({
1065
1366
  /** Relative path within the template */
1066
- path: z28.string(),
1367
+ path: z29.string(),
1067
1368
  /** Full file content after execution */
1068
- content: z28.string(),
1369
+ content: z29.string(),
1069
1370
  /** File status (new, modified, unchanged) */
1070
- status: z28.enum(["new", "modified", "unchanged"]),
1371
+ status: z29.enum(["new", "modified", "unchanged"]),
1071
1372
  /** Whether this file is an infrastructure/config file (e.g. .claude/settings.json, .mcp.json) */
1072
- isInfrastructure: z28.boolean().optional()
1373
+ isInfrastructure: z29.boolean().optional()
1073
1374
  });
1074
- var ApiCallSchema = z28.object({
1075
- endpoint: z28.string(),
1076
- tokensUsed: z28.number(),
1077
- duration: z28.number()
1375
+ var ApiCallSchema = z29.object({
1376
+ endpoint: z29.string(),
1377
+ tokensUsed: z29.number(),
1378
+ duration: z29.number()
1078
1379
  });
1079
- var ExecutionTraceSchema = z28.object({
1080
- commands: z28.array(CommandExecutionSchema),
1081
- filesModified: z28.array(FileModificationSchema),
1082
- apiCalls: z28.array(ApiCallSchema),
1083
- totalDuration: z28.number()
1380
+ var ExecutionTraceSchema = z29.object({
1381
+ commands: z29.array(CommandExecutionSchema),
1382
+ filesModified: z29.array(FileModificationSchema),
1383
+ apiCalls: z29.array(ApiCallSchema),
1384
+ totalDuration: z29.number()
1084
1385
  });
1085
- var RunAnalysisFindingSchema = z28.object({
1086
- category: z28.enum([
1386
+ var RunAnalysisFindingSchema = z29.object({
1387
+ category: z29.enum([
1087
1388
  "failure_pattern",
1088
1389
  "cost_waste",
1089
1390
  "flakiness",
1090
1391
  "inefficiency",
1091
1392
  "positive"
1092
1393
  ]),
1093
- severity: z28.enum(["high", "medium", "low"]),
1094
- description: z28.string(),
1095
- affectedScenarios: z28.array(z28.string()),
1096
- recommendation: z28.string().optional()
1394
+ severity: z29.enum(["high", "medium", "low"]),
1395
+ description: z29.string(),
1396
+ affectedScenarios: z29.array(z29.string()),
1397
+ recommendation: z29.string().optional()
1097
1398
  });
1098
- var RunAnalysisSchema = z28.object({
1099
- generatedAt: z28.string(),
1100
- summary: z28.string(),
1101
- findings: z28.array(RunAnalysisFindingSchema)
1399
+ var RunAnalysisSchema = z29.object({
1400
+ generatedAt: z29.string(),
1401
+ summary: z29.string(),
1402
+ findings: z29.array(RunAnalysisFindingSchema)
1102
1403
  });
1103
1404
  var EvalRunSchema = TenantEntitySchema.extend({
1104
1405
  /** Agent ID for this run */
1105
- agentId: z28.string().optional(),
1406
+ agentId: z29.string().optional(),
1106
1407
  /** Preset ID that originated this run (optional) */
1107
- presetId: z28.string().optional(),
1408
+ presetId: z29.string().optional(),
1108
1409
  /** Skill IDs for this run */
1109
- skillIds: z28.array(z28.string()).optional(),
1410
+ skillIds: z29.array(z29.string()).optional(),
1110
1411
  /** Map of skillId to skillVersionId for this run */
1111
- skillVersions: z28.record(z28.string(), z28.string()).optional(),
1412
+ skillVersions: z29.record(z29.string(), z29.string()).optional(),
1112
1413
  /** Scenario IDs to run (always present — resolved server-side from tags when needed) */
1113
- scenarioIds: z28.array(z28.string()),
1414
+ scenarioIds: z29.array(z29.string()),
1114
1415
  /** Current status */
1115
1416
  status: EvalStatusSchema,
1116
1417
  /** Progress percentage (0-100) */
1117
- progress: z28.number(),
1418
+ progress: z29.number(),
1118
1419
  /** Results for each scenario/target combination (lazy to break eval-result ↔ eval-run cycle) */
1119
- results: z28.array(z28.lazy(() => EvalRunResultSchema)),
1420
+ results: z29.array(z29.lazy(() => EvalRunResultSchema)),
1120
1421
  /** Aggregated metrics across all results */
1121
1422
  aggregateMetrics: EvalMetricsSchema,
1122
1423
  /** Aggregated LLM trace summary */
@@ -1124,41 +1425,41 @@ var EvalRunSchema = TenantEntitySchema.extend({
1124
1425
  /** What triggered this run */
1125
1426
  trigger: TriggerSchema.optional(),
1126
1427
  /** When the run started (set when evaluation is triggered) */
1127
- startedAt: z28.string().optional(),
1428
+ startedAt: z29.string().optional(),
1128
1429
  /** When the run completed */
1129
- completedAt: z28.string().optional(),
1430
+ completedAt: z29.string().optional(),
1130
1431
  /** Live trace events captured during execution (for playback on results page) */
1131
- liveTraceEvents: z28.array(LiveTraceEventSchema).optional(),
1432
+ liveTraceEvents: z29.array(LiveTraceEventSchema).optional(),
1132
1433
  /** Remote job ID for tracking execution in Dev Machines */
1133
- jobId: z28.string().optional(),
1434
+ jobId: z29.string().optional(),
1134
1435
  /** Remote job status from the Dev Machine API (PENDING, RUNNING, COMPLETED, FAILED, CANCELLED) */
1135
- jobStatus: z28.string().optional(),
1436
+ jobStatus: z29.string().optional(),
1136
1437
  /** Remote job error message if the job failed */
1137
- jobError: z28.string().optional(),
1438
+ jobError: z29.string().optional(),
1138
1439
  /** Timestamp of the last job status check */
1139
- jobStatusCheckedAt: z28.string().optional(),
1440
+ jobStatusCheckedAt: z29.string().optional(),
1140
1441
  /** MCP server IDs to enable for this run (optional) */
1141
- mcpIds: z28.array(z28.string()).optional(),
1442
+ mcpIds: z29.array(z29.string()).optional(),
1142
1443
  /** Sub-agent IDs to enable for this run (optional) */
1143
- subAgentIds: z28.array(z28.string()).optional(),
1444
+ subAgentIds: z29.array(z29.string()).optional(),
1144
1445
  /** Rule IDs to enable for this run (optional) */
1145
- ruleIds: z28.array(z28.string()).optional(),
1446
+ ruleIds: z29.array(z29.string()).optional(),
1146
1447
  /** Tags used to select scenarios for this run (for traceability) */
1147
- tags: z28.array(z28.string()).optional(),
1448
+ tags: z29.array(z29.string()).optional(),
1148
1449
  /** How many times each scenario is executed within this eval run. Default: 1. Max: 20. */
1149
- runsPerScenario: z28.number().int().min(1).max(20).optional(),
1450
+ runsPerScenario: z29.number().int().min(1).max(20).optional(),
1150
1451
  /** Snapshot of agent configuration captured at run creation time */
1151
- agentSnapshot: z28.object({
1152
- name: z28.string().optional(),
1452
+ agentSnapshot: z29.object({
1453
+ name: z29.string().optional(),
1153
1454
  agentType: AgentTypeSchema.optional(),
1154
1455
  runCommand: AgentRunCommandSchema.optional(),
1155
- systemPrompt: z28.string().nullable().optional(),
1456
+ systemPrompt: z29.string().nullable().optional(),
1156
1457
  modelConfig: ModelConfigSchema.optional()
1157
1458
  }).optional(),
1158
1459
  /** UUID linking all runs in a comparison group */
1159
- comparisonGroupId: z28.string().optional(),
1460
+ comparisonGroupId: z29.string().optional(),
1160
1461
  /** Human-readable label for this variant (e.g., "MCP: Wix Stores") */
1161
- comparisonLabel: z28.string().optional(),
1462
+ comparisonLabel: z29.string().optional(),
1162
1463
  /** LLM-generated analysis of the completed run */
1163
1464
  runAnalysis: RunAnalysisSchema.optional()
1164
1465
  });
@@ -1176,60 +1477,60 @@ var CreateEvalRunInputSchema = EvalRunSchema.omit({
1176
1477
  agentSnapshot: true
1177
1478
  }).extend({
1178
1479
  /** Optional on input — backend resolves from tags when not provided */
1179
- scenarioIds: z28.array(z28.string()).optional()
1480
+ scenarioIds: z29.array(z29.string()).optional()
1180
1481
  }).refine(
1181
1482
  (data) => data.scenarioIds && data.scenarioIds.length > 0 || data.tags && data.tags.length > 0,
1182
1483
  { message: "Either scenarioIds or tags must be provided" }
1183
1484
  );
1184
- var EvaluationProgressSchema = z28.object({
1185
- runId: z28.string(),
1186
- targetId: z28.string(),
1187
- totalScenarios: z28.number(),
1188
- completedScenarios: z28.number(),
1189
- scenarioProgress: z28.array(
1190
- z28.object({
1191
- scenarioId: z28.string(),
1192
- currentStep: z28.string(),
1193
- error: z28.string().optional()
1485
+ var EvaluationProgressSchema = z29.object({
1486
+ runId: z29.string(),
1487
+ targetId: z29.string(),
1488
+ totalScenarios: z29.number(),
1489
+ completedScenarios: z29.number(),
1490
+ scenarioProgress: z29.array(
1491
+ z29.object({
1492
+ scenarioId: z29.string(),
1493
+ currentStep: z29.string(),
1494
+ error: z29.string().optional()
1194
1495
  })
1195
1496
  ),
1196
- createdAt: z28.number()
1197
- });
1198
- var EvaluationLogSchema = z28.object({
1199
- runId: z28.string(),
1200
- scenarioId: z28.string(),
1201
- log: z28.object({
1202
- level: z28.enum(["info", "error", "debug"]),
1203
- message: z28.string().optional(),
1204
- args: z28.array(z28.any()).optional(),
1205
- error: z28.string().optional()
1497
+ createdAt: z29.number()
1498
+ });
1499
+ var EvaluationLogSchema = z29.object({
1500
+ runId: z29.string(),
1501
+ scenarioId: z29.string(),
1502
+ log: z29.object({
1503
+ level: z29.enum(["info", "error", "debug"]),
1504
+ message: z29.string().optional(),
1505
+ args: z29.array(z29.any()).optional(),
1506
+ error: z29.string().optional()
1206
1507
  })
1207
1508
  });
1208
1509
  var LLM_TIMEOUT = 12e4;
1209
1510
 
1210
1511
  // src/evaluation/conversation.ts
1211
- import { z as z29 } from "zod";
1212
- var TextBlockSchema = z29.object({
1213
- type: z29.literal("text"),
1214
- text: z29.string()
1215
- });
1216
- var ThinkingBlockSchema = z29.object({
1217
- type: z29.literal("thinking"),
1218
- thinking: z29.string()
1219
- });
1220
- var ToolUseBlockSchema = z29.object({
1221
- type: z29.literal("tool_use"),
1222
- toolName: z29.string(),
1223
- toolId: z29.string(),
1224
- input: z29.unknown()
1225
- });
1226
- var ToolResultBlockSchema = z29.object({
1227
- type: z29.literal("tool_result"),
1228
- toolUseId: z29.string(),
1229
- content: z29.string(),
1230
- isError: z29.boolean().optional()
1231
- });
1232
- var ConversationBlockSchema = z29.discriminatedUnion("type", [
1512
+ import { z as z30 } from "zod";
1513
+ var TextBlockSchema = z30.object({
1514
+ type: z30.literal("text"),
1515
+ text: z30.string()
1516
+ });
1517
+ var ThinkingBlockSchema = z30.object({
1518
+ type: z30.literal("thinking"),
1519
+ thinking: z30.string()
1520
+ });
1521
+ var ToolUseBlockSchema = z30.object({
1522
+ type: z30.literal("tool_use"),
1523
+ toolName: z30.string(),
1524
+ toolId: z30.string(),
1525
+ input: z30.unknown()
1526
+ });
1527
+ var ToolResultBlockSchema = z30.object({
1528
+ type: z30.literal("tool_result"),
1529
+ toolUseId: z30.string(),
1530
+ content: z30.string(),
1531
+ isError: z30.boolean().optional()
1532
+ });
1533
+ var ConversationBlockSchema = z30.discriminatedUnion("type", [
1233
1534
  TextBlockSchema,
1234
1535
  ThinkingBlockSchema,
1235
1536
  ToolUseBlockSchema,
@@ -1240,18 +1541,18 @@ var ConversationMessageRoles = [
1240
1541
  "user",
1241
1542
  "system"
1242
1543
  ];
1243
- var ConversationMessageSchema = z29.object({
1244
- role: z29.enum(ConversationMessageRoles),
1245
- content: z29.array(ConversationBlockSchema),
1246
- timestamp: z29.string()
1544
+ var ConversationMessageSchema = z30.object({
1545
+ role: z30.enum(ConversationMessageRoles),
1546
+ content: z30.array(ConversationBlockSchema),
1547
+ timestamp: z30.string()
1247
1548
  });
1248
- var ScenarioConversationSchema = z29.object({
1249
- id: z29.string(),
1250
- projectId: z29.string(),
1251
- evalRunId: z29.string(),
1252
- resultId: z29.string(),
1253
- messages: z29.array(ConversationMessageSchema),
1254
- createdAt: z29.string()
1549
+ var ScenarioConversationSchema = z30.object({
1550
+ id: z30.string(),
1551
+ projectId: z30.string(),
1552
+ evalRunId: z30.string(),
1553
+ resultId: z30.string(),
1554
+ messages: z30.array(ConversationMessageSchema),
1555
+ createdAt: z30.string()
1255
1556
  });
1256
1557
 
1257
1558
  // src/evaluation/eval-result.ts
@@ -1262,98 +1563,98 @@ var AssertionResultStatus = /* @__PURE__ */ ((AssertionResultStatus2) => {
1262
1563
  AssertionResultStatus2["ERROR"] = "error";
1263
1564
  return AssertionResultStatus2;
1264
1565
  })(AssertionResultStatus || {});
1265
- var AssertionResultSchema = z30.object({
1266
- id: z30.string(),
1267
- assertionId: z30.string(),
1268
- assertionType: z30.string(),
1269
- assertionName: z30.string(),
1270
- status: z30.enum(AssertionResultStatus),
1271
- message: z30.string().optional(),
1272
- expected: z30.string().optional(),
1273
- actual: z30.string().optional(),
1274
- duration: z30.number().optional(),
1275
- details: z30.record(z30.string(), z30.unknown()).optional(),
1276
- llmTraceSteps: z30.array(LLMTraceStepSchema).optional()
1277
- });
1278
- var EvalRunResultSchema = z30.object({
1279
- id: z30.string(),
1280
- targetId: z30.string(),
1281
- targetName: z30.string().optional(),
1566
+ var AssertionResultSchema = z31.object({
1567
+ id: z31.string(),
1568
+ assertionId: z31.string(),
1569
+ assertionType: z31.string(),
1570
+ assertionName: z31.string(),
1571
+ status: z31.enum(AssertionResultStatus),
1572
+ message: z31.string().optional(),
1573
+ expected: z31.string().optional(),
1574
+ actual: z31.string().optional(),
1575
+ duration: z31.number().optional(),
1576
+ details: z31.record(z31.string(), z31.unknown()).optional(),
1577
+ llmTraceSteps: z31.array(LLMTraceStepSchema).optional()
1578
+ });
1579
+ var EvalRunResultSchema = z31.object({
1580
+ id: z31.string(),
1581
+ targetId: z31.string(),
1582
+ targetName: z31.string().optional(),
1282
1583
  /** SkillVersion ID used for this evaluation (for version tracking) */
1283
- skillVersionId: z30.string().optional(),
1584
+ skillVersionId: z31.string().optional(),
1284
1585
  /** SkillVersion semver string (e.g., "1.0.0", "1.2.3") for display */
1285
- skillVersion: z30.string().optional(),
1286
- scenarioId: z30.string(),
1287
- scenarioName: z30.string(),
1586
+ skillVersion: z31.string().optional(),
1587
+ scenarioId: z31.string(),
1588
+ scenarioName: z31.string(),
1288
1589
  /** Snapshot of the trigger prompt used during the run (prevents stale display after edits) */
1289
- triggerPrompt: z30.string().optional(),
1590
+ triggerPrompt: z31.string().optional(),
1290
1591
  modelConfig: ModelConfigSchema.optional(),
1291
- assertionResults: z30.array(AssertionResultSchema),
1592
+ assertionResults: z31.array(AssertionResultSchema),
1292
1593
  metrics: EvalMetricsSchema.optional(),
1293
- passed: z30.number(),
1294
- failed: z30.number(),
1295
- passRate: z30.number(),
1296
- duration: z30.number(),
1297
- outputText: z30.string().optional(),
1298
- files: z30.array(ExpectedFileSchema).optional(),
1299
- fileDiffs: z30.array(DiffContentSchema).optional(),
1594
+ passed: z31.number(),
1595
+ failed: z31.number(),
1596
+ passRate: z31.number(),
1597
+ duration: z31.number(),
1598
+ outputText: z31.string().optional(),
1599
+ files: z31.array(ExpectedFileSchema).optional(),
1600
+ fileDiffs: z31.array(DiffContentSchema).optional(),
1300
1601
  /** Full template files after execution with status indicators */
1301
- templateFiles: z30.array(TemplateFileSchema).optional(),
1302
- startedAt: z30.string().optional(),
1303
- completedAt: z30.string().optional(),
1602
+ templateFiles: z31.array(TemplateFileSchema).optional(),
1603
+ startedAt: z31.string().optional(),
1604
+ completedAt: z31.string().optional(),
1304
1605
  llmTrace: LLMTraceSchema.optional(),
1305
1606
  /** Full conversation messages (only present in transit; stripped before DB storage) */
1306
- conversation: z30.array(ConversationMessageSchema).optional(),
1607
+ conversation: z31.array(ConversationMessageSchema).optional(),
1307
1608
  /** 0-based iteration index when a scenario is run multiple times within a single eval run */
1308
- iterationIndex: z30.number().int().min(0).optional()
1309
- });
1310
- var PromptResultSchema = z30.object({
1311
- text: z30.string(),
1312
- files: z30.array(z30.unknown()).optional(),
1313
- finishReason: z30.string().optional(),
1314
- reasoning: z30.string().optional(),
1315
- reasoningDetails: z30.unknown().optional(),
1316
- toolCalls: z30.array(z30.unknown()).optional(),
1317
- toolResults: z30.array(z30.unknown()).optional(),
1318
- warnings: z30.array(z30.unknown()).optional(),
1319
- sources: z30.array(z30.unknown()).optional(),
1320
- steps: z30.array(z30.unknown()),
1321
- generationTimeMs: z30.number(),
1322
- prompt: z30.string(),
1323
- systemPrompt: z30.string(),
1324
- usage: z30.object({
1325
- totalTokens: z30.number().optional(),
1326
- totalMicrocentsSpent: z30.number().optional()
1609
+ iterationIndex: z31.number().int().min(0).optional()
1610
+ });
1611
+ var PromptResultSchema = z31.object({
1612
+ text: z31.string(),
1613
+ files: z31.array(z31.unknown()).optional(),
1614
+ finishReason: z31.string().optional(),
1615
+ reasoning: z31.string().optional(),
1616
+ reasoningDetails: z31.unknown().optional(),
1617
+ toolCalls: z31.array(z31.unknown()).optional(),
1618
+ toolResults: z31.array(z31.unknown()).optional(),
1619
+ warnings: z31.array(z31.unknown()).optional(),
1620
+ sources: z31.array(z31.unknown()).optional(),
1621
+ steps: z31.array(z31.unknown()),
1622
+ generationTimeMs: z31.number(),
1623
+ prompt: z31.string(),
1624
+ systemPrompt: z31.string(),
1625
+ usage: z31.object({
1626
+ totalTokens: z31.number().optional(),
1627
+ totalMicrocentsSpent: z31.number().optional()
1327
1628
  })
1328
1629
  });
1329
- var EvaluationResultSchema = z30.object({
1330
- id: z30.string(),
1331
- runId: z30.string(),
1332
- timestamp: z30.number(),
1630
+ var EvaluationResultSchema = z31.object({
1631
+ id: z31.string(),
1632
+ runId: z31.string(),
1633
+ timestamp: z31.number(),
1333
1634
  promptResult: PromptResultSchema,
1334
- testResults: z30.array(z30.unknown()),
1335
- tags: z30.array(z30.string()).optional(),
1336
- feedback: z30.string().optional(),
1337
- score: z30.number(),
1338
- suiteId: z30.string().optional()
1339
- });
1340
- var LeanEvaluationResultSchema = z30.object({
1341
- id: z30.string(),
1342
- runId: z30.string(),
1343
- timestamp: z30.number(),
1344
- tags: z30.array(z30.string()).optional(),
1345
- scenarioId: z30.string(),
1346
- scenarioVersion: z30.number().optional(),
1347
- targetId: z30.string(),
1348
- targetVersion: z30.number().optional(),
1349
- suiteId: z30.string().optional(),
1350
- score: z30.number(),
1351
- time: z30.number().optional(),
1352
- microcentsSpent: z30.number().optional()
1635
+ testResults: z31.array(z31.unknown()),
1636
+ tags: z31.array(z31.string()).optional(),
1637
+ feedback: z31.string().optional(),
1638
+ score: z31.number(),
1639
+ suiteId: z31.string().optional()
1640
+ });
1641
+ var LeanEvaluationResultSchema = z31.object({
1642
+ id: z31.string(),
1643
+ runId: z31.string(),
1644
+ timestamp: z31.number(),
1645
+ tags: z31.array(z31.string()).optional(),
1646
+ scenarioId: z31.string(),
1647
+ scenarioVersion: z31.number().optional(),
1648
+ targetId: z31.string(),
1649
+ targetVersion: z31.number().optional(),
1650
+ suiteId: z31.string().optional(),
1651
+ score: z31.number(),
1652
+ time: z31.number().optional(),
1653
+ microcentsSpent: z31.number().optional()
1353
1654
  });
1354
1655
 
1355
1656
  // src/evaluation/eval-run-folder.ts
1356
- import { z as z31 } from "zod";
1657
+ import { z as z32 } from "zod";
1357
1658
  var EvalRunFolderSchema = TenantEntitySchema.extend({});
1358
1659
  var CreateEvalRunFolderInputSchema = EvalRunFolderSchema.omit({
1359
1660
  id: true,
@@ -1367,26 +1668,26 @@ var UpdateEvalRunFolderInputSchema = EvalRunFolderSchema.omit({
1367
1668
  updatedAt: true,
1368
1669
  deleted: true
1369
1670
  }).partial();
1370
- var EvalRunFolderMembershipSchema = z31.object({
1371
- folderId: z31.string(),
1372
- evalRunId: z31.string(),
1373
- projectId: z31.string(),
1374
- createdAt: z31.string()
1671
+ var EvalRunFolderMembershipSchema = z32.object({
1672
+ folderId: z32.string(),
1673
+ evalRunId: z32.string(),
1674
+ projectId: z32.string(),
1675
+ createdAt: z32.string()
1375
1676
  });
1376
1677
 
1377
1678
  // src/project/project.ts
1378
- import { z as z32 } from "zod";
1679
+ import { z as z33 } from "zod";
1379
1680
  var ProjectSchema = BaseEntitySchema.extend({
1380
- appId: z32.string().optional().describe("The ID of the app in Dev Center"),
1381
- scenarioTags: z32.array(z32.string()).optional().describe("Project-level tag vocabulary for scenarios"),
1681
+ appId: z33.string().optional().describe("The ID of the app in Dev Center"),
1682
+ scenarioTags: z33.array(z33.string()).optional().describe("Project-level tag vocabulary for scenarios"),
1382
1683
  /** Per-project Wix auth token (write-only — never returned in GET responses). null = clear. */
1383
- wixAuthToken: z32.string().nullable().optional().describe("Wix auth token for CLI/MCP authentication (encrypted at rest)"),
1684
+ wixAuthToken: z33.string().nullable().optional().describe("Wix auth token for CLI/MCP authentication (encrypted at rest)"),
1384
1685
  /** Per-project Base44 auth file content (write-only — never returned in GET responses). null = clear. */
1385
- base44AuthFile: z32.string().nullable().optional().describe("Base64-encoded Base44 auth file content (encrypted at rest)"),
1686
+ base44AuthFile: z33.string().nullable().optional().describe("Base64-encoded Base44 auth file content (encrypted at rest)"),
1386
1687
  /** Resolved at runtime from the encrypted Wix auth token */
1387
- wixAuthEmail: z32.string().optional().describe("Email associated with the Wix auth token (resolved at runtime)"),
1688
+ wixAuthEmail: z33.string().optional().describe("Email associated with the Wix auth token (resolved at runtime)"),
1388
1689
  /** Resolved at runtime from the encrypted Base44 auth file */
1389
- base44AuthEmail: z32.string().optional().describe("Email from the Base44 auth file (resolved at runtime)")
1690
+ base44AuthEmail: z33.string().optional().describe("Email from the Base44 auth file (resolved at runtime)")
1390
1691
  });
1391
1692
  var CreateProjectInputSchema = ProjectSchema.omit({
1392
1693
  id: true,
@@ -1412,7 +1713,7 @@ var CreateTemplateInputSchema = TemplateSchema.omit({
1412
1713
  var UpdateTemplateInputSchema = CreateTemplateInputSchema.partial();
1413
1714
 
1414
1715
  // src/schedule/eval-schedule.ts
1415
- import { z as z33 } from "zod";
1716
+ import { z as z34 } from "zod";
1416
1717
  var FrequencyType = /* @__PURE__ */ ((FrequencyType2) => {
1417
1718
  FrequencyType2["DAILY"] = "daily";
1418
1719
  FrequencyType2["WEEKDAY"] = "weekday";
@@ -1422,29 +1723,29 @@ var FrequencyType = /* @__PURE__ */ ((FrequencyType2) => {
1422
1723
  })(FrequencyType || {});
1423
1724
  var EvalScheduleSchema = TenantEntitySchema.extend({
1424
1725
  /** Whether the schedule is active */
1425
- enabled: z33.boolean(),
1726
+ enabled: z34.boolean(),
1426
1727
  /** Test suite to run */
1427
- suiteId: z33.string(),
1728
+ suiteId: z34.string(),
1428
1729
  /** Preset that provides agent + entities for this schedule */
1429
- presetId: z33.string(),
1730
+ presetId: z34.string(),
1430
1731
  /** How often to run */
1431
- frequencyType: z33.nativeEnum(FrequencyType),
1732
+ frequencyType: z34.nativeEnum(FrequencyType),
1432
1733
  /** Time of day in 24h format (HH:MM), hours 00-23, minutes 00-59 */
1433
- timeOfDay: z33.string().regex(/^([01]\d|2[0-3]):[0-5]\d$/),
1734
+ timeOfDay: z34.string().regex(/^([01]\d|2[0-3]):[0-5]\d$/),
1434
1735
  /** Day of week (0=Sun, 6=Sat) for weekly schedules */
1435
- dayOfWeek: z33.number().min(0).max(6).optional(),
1736
+ dayOfWeek: z34.number().min(0).max(6).optional(),
1436
1737
  /** Day of month (1-31) for monthly schedules */
1437
- dayOfMonth: z33.number().min(1).max(31).optional(),
1738
+ dayOfMonth: z34.number().min(1).max(31).optional(),
1438
1739
  /** IANA timezone (e.g., 'America/New_York') */
1439
- timezone: z33.string(),
1740
+ timezone: z34.string(),
1440
1741
  /** ID of the last eval run created by this schedule */
1441
- lastRunId: z33.string().optional(),
1742
+ lastRunId: z34.string().optional(),
1442
1743
  /** Denormalized status of the last run */
1443
- lastRunStatus: z33.string().optional(),
1744
+ lastRunStatus: z34.string().optional(),
1444
1745
  /** ISO timestamp of the last run */
1445
- lastRunAt: z33.string().optional(),
1746
+ lastRunAt: z34.string().optional(),
1446
1747
  /** Next scheduled run time in UTC (pre-computed for efficient querying, set by backend) */
1447
- nextRunAt: z33.string().optional()
1748
+ nextRunAt: z34.string().optional()
1448
1749
  });
1449
1750
  function isValidTimezone(tz) {
1450
1751
  try {
@@ -1457,14 +1758,14 @@ function isValidTimezone(tz) {
1457
1758
  function validateScheduleFields(data, ctx, options) {
1458
1759
  if (data.frequencyType === "weekly" /* WEEKLY */ && data.dayOfWeek == null) {
1459
1760
  ctx.addIssue({
1460
- code: z33.ZodIssueCode.custom,
1761
+ code: z34.ZodIssueCode.custom,
1461
1762
  message: "dayOfWeek is required for weekly schedules",
1462
1763
  path: ["dayOfWeek"]
1463
1764
  });
1464
1765
  }
1465
1766
  if (data.frequencyType === "monthly" /* MONTHLY */ && data.dayOfMonth == null) {
1466
1767
  ctx.addIssue({
1467
- code: z33.ZodIssueCode.custom,
1768
+ code: z34.ZodIssueCode.custom,
1468
1769
  message: "dayOfMonth is required for monthly schedules",
1469
1770
  path: ["dayOfMonth"]
1470
1771
  });
@@ -1472,7 +1773,7 @@ function validateScheduleFields(data, ctx, options) {
1472
1773
  const shouldValidateTz = options.partial ? data.timezone !== void 0 : true;
1473
1774
  if (shouldValidateTz && !isValidTimezone(data.timezone)) {
1474
1775
  ctx.addIssue({
1475
- code: z33.ZodIssueCode.custom,
1776
+ code: z34.ZodIssueCode.custom,
1476
1777
  message: "Invalid IANA timezone",
1477
1778
  path: ["timezone"]
1478
1779
  });
@@ -1495,228 +1796,9 @@ var CreateEvalScheduleInputSchema = BaseCreateScheduleSchema.superRefine((data,
1495
1796
  var UpdateEvalScheduleInputSchema = BaseCreateScheduleSchema.partial().superRefine((data, ctx) => {
1496
1797
  validateScheduleFields(data, ctx, { partial: true });
1497
1798
  });
1498
-
1499
- // src/assertion/system-assertions.ts
1500
- var SYSTEM_ASSERTION_IDS = {
1501
- SKILL_WAS_CALLED: "system:skill_was_called",
1502
- TOOL_CALLED_WITH_PARAM: "system:tool_called_with_param",
1503
- BUILD_PASSED: "system:build_passed",
1504
- TIME_LIMIT: "system:time_limit",
1505
- COST: "system:cost",
1506
- LLM_JUDGE: "system:llm_judge",
1507
- API_CALL: "system:api_call"
1508
- };
1509
- function isSystemAssertionId(id) {
1510
- return id.startsWith("system:");
1511
- }
1512
- var SYSTEM_ASSERTIONS = {
1513
- [SYSTEM_ASSERTION_IDS.SKILL_WAS_CALLED]: {
1514
- id: SYSTEM_ASSERTION_IDS.SKILL_WAS_CALLED,
1515
- name: "Skill Was Called",
1516
- description: "Check that one or more skills were invoked during the agent run",
1517
- type: "skill_was_called",
1518
- parameters: [
1519
- {
1520
- name: "skillNames",
1521
- label: "Skills",
1522
- type: "string",
1523
- required: true
1524
- },
1525
- {
1526
- name: "negate",
1527
- label: "Negate (NOT operator)",
1528
- type: "boolean",
1529
- required: false,
1530
- defaultValue: false
1531
- }
1532
- ]
1533
- },
1534
- [SYSTEM_ASSERTION_IDS.TOOL_CALLED_WITH_PARAM]: {
1535
- id: SYSTEM_ASSERTION_IDS.TOOL_CALLED_WITH_PARAM,
1536
- name: "Tool Called With Param",
1537
- description: "Check that a tool was called with expected parameters (tool name is substring matched)",
1538
- type: "tool_called_with_param",
1539
- parameters: [
1540
- {
1541
- name: "toolName",
1542
- label: "Tool Name",
1543
- type: "string",
1544
- required: true
1545
- },
1546
- {
1547
- name: "expectedParams",
1548
- label: "Expected Parameters (JSON, substring match)",
1549
- type: "string",
1550
- required: false
1551
- },
1552
- {
1553
- name: "requireSuccess",
1554
- label: "Require Successful Call",
1555
- type: "boolean",
1556
- required: false,
1557
- defaultValue: false,
1558
- advanced: true
1559
- },
1560
- {
1561
- name: "negate",
1562
- label: "Negate (NOT operator)",
1563
- type: "boolean",
1564
- required: false,
1565
- defaultValue: false
1566
- }
1567
- ]
1568
- },
1569
- [SYSTEM_ASSERTION_IDS.BUILD_PASSED]: {
1570
- id: SYSTEM_ASSERTION_IDS.BUILD_PASSED,
1571
- name: "Build Passed",
1572
- description: "Run a build command and verify it exits with expected code",
1573
- type: "build_passed",
1574
- parameters: [
1575
- {
1576
- name: "command",
1577
- label: "Build Command",
1578
- type: "string",
1579
- required: false,
1580
- defaultValue: "yarn build"
1581
- },
1582
- {
1583
- name: "expectedExitCode",
1584
- label: "Expected Exit Code",
1585
- type: "number",
1586
- required: false,
1587
- defaultValue: 0
1588
- },
1589
- {
1590
- name: "maxBuildTime",
1591
- label: "Max Build Time (ms)",
1592
- type: "number",
1593
- required: false,
1594
- advanced: true
1595
- },
1596
- {
1597
- name: "maxMemory",
1598
- label: "Max Memory (MB)",
1599
- type: "number",
1600
- required: false,
1601
- advanced: true
1602
- }
1603
- ]
1604
- },
1605
- [SYSTEM_ASSERTION_IDS.TIME_LIMIT]: {
1606
- id: SYSTEM_ASSERTION_IDS.TIME_LIMIT,
1607
- name: "Time Limit",
1608
- description: "Check that the scenario completed within a maximum duration",
1609
- type: "time_limit",
1610
- parameters: [
1611
- {
1612
- name: "maxDurationMs",
1613
- label: "Max Duration (ms)",
1614
- type: "number",
1615
- required: true,
1616
- defaultValue: 3e5
1617
- }
1618
- ]
1619
- },
1620
- [SYSTEM_ASSERTION_IDS.COST]: {
1621
- id: SYSTEM_ASSERTION_IDS.COST,
1622
- name: "Cost",
1623
- description: "Check that the scenario LLM execution cost stays within a USD threshold",
1624
- type: "cost",
1625
- parameters: [
1626
- {
1627
- name: "maxCostUsd",
1628
- label: "Max Cost (USD)",
1629
- type: "number",
1630
- required: true,
1631
- defaultValue: 1
1632
- }
1633
- ]
1634
- },
1635
- [SYSTEM_ASSERTION_IDS.LLM_JUDGE]: {
1636
- id: SYSTEM_ASSERTION_IDS.LLM_JUDGE,
1637
- name: "LLM Judge",
1638
- description: "LLM evaluates the output and assigns a score (0-10)",
1639
- type: "llm_judge",
1640
- parameters: [
1641
- {
1642
- name: "prompt",
1643
- label: "Judge Prompt",
1644
- type: "string",
1645
- required: true,
1646
- defaultValue: "Verify the output meets the acceptance criteria."
1647
- },
1648
- {
1649
- name: "minScore",
1650
- label: "Minimum Score (0-10)",
1651
- type: "number",
1652
- required: false,
1653
- defaultValue: 7
1654
- },
1655
- {
1656
- name: "model",
1657
- label: "Model",
1658
- type: "string",
1659
- required: false
1660
- }
1661
- ]
1662
- },
1663
- [SYSTEM_ASSERTION_IDS.API_CALL]: {
1664
- id: SYSTEM_ASSERTION_IDS.API_CALL,
1665
- name: "API Call",
1666
- description: "Call an API endpoint and verify the response contains expected data",
1667
- type: "api_call",
1668
- parameters: [
1669
- {
1670
- name: "url",
1671
- label: "URL",
1672
- type: "string",
1673
- required: true
1674
- },
1675
- {
1676
- name: "method",
1677
- label: "HTTP Method",
1678
- type: "string",
1679
- required: false,
1680
- defaultValue: "GET"
1681
- },
1682
- {
1683
- name: "requestBody",
1684
- label: "Request Body (JSON)",
1685
- type: "string",
1686
- required: false
1687
- },
1688
- {
1689
- name: "expectedResponse",
1690
- label: "Expected Response (JSON)",
1691
- type: "string",
1692
- required: true
1693
- },
1694
- {
1695
- name: "requestHeaders",
1696
- label: "Headers (JSON)",
1697
- type: "string",
1698
- required: false,
1699
- advanced: true
1700
- },
1701
- {
1702
- name: "timeoutMs",
1703
- label: "Timeout (ms)",
1704
- type: "number",
1705
- required: false,
1706
- defaultValue: 3e4,
1707
- advanced: true
1708
- }
1709
- ]
1710
- }
1711
- };
1712
- function getSystemAssertions() {
1713
- return Object.values(SYSTEM_ASSERTIONS);
1714
- }
1715
- function getSystemAssertion(id) {
1716
- return SYSTEM_ASSERTIONS[id];
1717
- }
1718
1799
  export {
1719
1800
  AGENT_TYPE_LABELS,
1801
+ ALLOWED_BUILD_COMMANDS,
1720
1802
  ALL_AVAILABLE_MODEL_IDS,
1721
1803
  AVAILABLE_CLAUDE_MODEL_IDS,
1722
1804
  AVAILABLE_OPENAI_MODEL_IDS,
@@ -1750,6 +1832,7 @@ export {
1750
1832
  BatchSummarySchema,
1751
1833
  BuildCheckTestSchema,
1752
1834
  BuildPassedAssertionSchema,
1835
+ BuildPassedCommandStringSchema,
1753
1836
  BuildPassedConfigSchema,
1754
1837
  BulkImportResultItemSchema,
1755
1838
  BulkImportResultSchema,
@@ -1777,6 +1860,7 @@ export {
1777
1860
  CreateTemplateInputSchema,
1778
1861
  CreateTestScenarioInputSchema,
1779
1862
  CreateTestSuiteInputSchema,
1863
+ DEFAULT_BUILD_PASSED_COMMAND,
1780
1864
  DEFAULT_EVALUATOR_SYSTEM_PROMPT,
1781
1865
  DEFAULT_JUDGE_MODEL,
1782
1866
  DiffContentSchema,
@@ -1874,6 +1958,7 @@ export {
1874
1958
  ToolTestSchema,
1875
1959
  ToolUseBlockSchema,
1876
1960
  TriggerMetadataSchema,
1961
+ TriggerPromptImageSchema,
1877
1962
  TriggerSchema,
1878
1963
  TriggerType,
1879
1964
  UpdateAgentInputSchema,
@@ -1893,11 +1978,14 @@ export {
1893
1978
  formatTraceEventLine,
1894
1979
  getSystemAssertion,
1895
1980
  getSystemAssertions,
1981
+ isAllowedBuildCommandString,
1896
1982
  isSystemAssertionId,
1897
1983
  isValidSkillFolderName,
1898
1984
  normalizeBatchAssertionLink,
1899
1985
  normalizeModelId,
1986
+ parseBuildCommandToArgv,
1900
1987
  parseTraceEventLine,
1901
- validateAssertionConfig
1988
+ validateAssertionConfig,
1989
+ validateBuildPassedParamsInAssertionLinks
1902
1990
  };
1903
1991
  //# sourceMappingURL=index.mjs.map