@wix/evalforge-types 0.16.0 → 0.18.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/build/index.js CHANGED
@@ -26,6 +26,8 @@ __export(index_exports, {
26
26
  AllowedCommands: () => AllowedCommands,
27
27
  ApiCallSchema: () => ApiCallSchema,
28
28
  AssertionConfigSchema: () => AssertionConfigSchema,
29
+ AssertionParameterSchema: () => AssertionParameterSchema,
30
+ AssertionParameterTypeSchema: () => AssertionParameterTypeSchema,
29
31
  AssertionResultSchema: () => AssertionResultSchema,
30
32
  AssertionResultStatus: () => AssertionResultStatus,
31
33
  AssertionSchema: () => AssertionSchema,
@@ -92,6 +94,9 @@ __export(index_exports, {
92
94
  ProjectSchema: () => ProjectSchema,
93
95
  PromptResultSchema: () => PromptResultSchema,
94
96
  SKILL_FOLDER_NAME_REGEX: () => SKILL_FOLDER_NAME_REGEX,
97
+ SYSTEM_ASSERTIONS: () => SYSTEM_ASSERTIONS,
98
+ SYSTEM_ASSERTION_IDS: () => SYSTEM_ASSERTION_IDS,
99
+ ScenarioAssertionLinkSchema: () => ScenarioAssertionLinkSchema,
95
100
  SiteConfigTestSchema: () => SiteConfigTestSchema,
96
101
  SkillMetadataSchema: () => SkillMetadataSchema,
97
102
  SkillSchema: () => SkillSchema,
@@ -130,6 +135,9 @@ __export(index_exports, {
130
135
  getBuildPassedConfig: () => getBuildPassedConfig,
131
136
  getLlmJudgeConfig: () => getLlmJudgeConfig,
132
137
  getSkillWasCalledConfig: () => getSkillWasCalledConfig,
138
+ getSystemAssertion: () => getSystemAssertion,
139
+ getSystemAssertions: () => getSystemAssertions,
140
+ isSystemAssertionId: () => isSystemAssertionId,
133
141
  isValidSkillFolderName: () => isValidSkillFolderName,
134
142
  parseTraceEventLine: () => parseTraceEventLine,
135
143
  validateAssertionConfig: () => validateAssertionConfig
@@ -592,22 +600,145 @@ var EnvironmentSchema = import_zod19.z.object({
592
600
  });
593
601
 
594
602
  // src/scenario/test-scenario.ts
603
+ var import_zod21 = require("zod");
604
+
605
+ // src/assertion/assertion.ts
595
606
  var import_zod20 = require("zod");
596
- var ExpectedFileSchema = import_zod20.z.object({
607
+ var AssertionTypeSchema = import_zod20.z.enum([
608
+ "skill_was_called",
609
+ "build_passed",
610
+ "llm_judge"
611
+ ]);
612
+ var AssertionParameterTypeSchema = import_zod20.z.enum([
613
+ "string",
614
+ "number",
615
+ "boolean"
616
+ ]);
617
+ var AssertionParameterSchema = import_zod20.z.object({
618
+ /** Parameter name (used as key in params object) */
619
+ name: import_zod20.z.string().min(1),
620
+ /** Display label for the parameter */
621
+ label: import_zod20.z.string().min(1),
622
+ /** Parameter type */
623
+ type: AssertionParameterTypeSchema,
624
+ /** Whether this parameter is required */
625
+ required: import_zod20.z.boolean(),
626
+ /** Default value (optional, used when not provided) */
627
+ defaultValue: import_zod20.z.union([import_zod20.z.string(), import_zod20.z.number(), import_zod20.z.boolean()]).optional()
628
+ });
629
+ var ScenarioAssertionLinkSchema = import_zod20.z.object({
630
+ /** ID of the assertion (can be system assertion like 'system:skill_was_called' or custom assertion UUID) */
631
+ assertionId: import_zod20.z.string(),
632
+ /** Parameter values for this assertion in this scenario */
633
+ params: import_zod20.z.record(
634
+ import_zod20.z.string(),
635
+ import_zod20.z.union([import_zod20.z.string(), import_zod20.z.number(), import_zod20.z.boolean(), import_zod20.z.null()])
636
+ ).optional()
637
+ });
638
+ var SkillWasCalledConfigSchema = import_zod20.z.object({
639
+ /** Name of the skill that must have been called */
640
+ skillName: import_zod20.z.string().min(1)
641
+ });
642
+ var BuildPassedConfigSchema = import_zod20.z.strictObject({
643
+ /** Command to run (default: "yarn build") */
644
+ command: import_zod20.z.string().optional(),
645
+ /** Expected exit code (default: 0) */
646
+ expectedExitCode: import_zod20.z.number().int().optional()
647
+ });
648
+ var LlmJudgeConfigSchema = import_zod20.z.object({
649
+ /**
650
+ * Prompt template with placeholders:
651
+ * - {{output}}: agent's final output
652
+ * - {{cwd}}: working directory
653
+ * - {{changedFiles}}: all files changed (new, modified)
654
+ * - {{modifiedFiles}}: only existing files that were modified
655
+ * - {{newFiles}}: only new files that were created
656
+ * - {{trace}}: step-by-step trace of tool calls
657
+ * - Custom parameters defined in the parameters array
658
+ */
659
+ prompt: import_zod20.z.string().min(1),
660
+ /** Optional system prompt for the judge */
661
+ systemPrompt: import_zod20.z.string().optional(),
662
+ /** Minimum score to pass (0-100, default 70) */
663
+ minScore: import_zod20.z.number().int().min(0).max(100).optional(),
664
+ /** Model for the judge (e.g. claude-3-5-haiku-20241022) */
665
+ model: import_zod20.z.string().optional(),
666
+ /** Max output tokens */
667
+ maxTokens: import_zod20.z.number().int().optional(),
668
+ /** Temperature (0-1) */
669
+ temperature: import_zod20.z.number().min(0).max(1).optional(),
670
+ /** User-defined parameters for this assertion */
671
+ parameters: import_zod20.z.array(AssertionParameterSchema).optional()
672
+ });
673
+ var AssertionConfigSchema = import_zod20.z.union([
674
+ LlmJudgeConfigSchema,
675
+ // requires prompt - check first
676
+ SkillWasCalledConfigSchema,
677
+ // requires skillName
678
+ BuildPassedConfigSchema,
679
+ // all optional, uses strictObject to reject unknown keys
680
+ import_zod20.z.object({})
681
+ // fallback empty config
682
+ ]);
683
+ var CustomAssertionSchema = TenantEntitySchema.extend({
684
+ /** The assertion type */
685
+ type: AssertionTypeSchema,
686
+ /** Type-specific configuration */
687
+ config: AssertionConfigSchema
688
+ });
689
+ var CreateCustomAssertionInputSchema = CustomAssertionSchema.omit({
690
+ id: true,
691
+ createdAt: true,
692
+ updatedAt: true,
693
+ deleted: true
694
+ });
695
+ var UpdateCustomAssertionInputSchema = CreateCustomAssertionInputSchema.partial();
696
+ function validateAssertionConfig(type, config) {
697
+ switch (type) {
698
+ case "skill_was_called":
699
+ return SkillWasCalledConfigSchema.safeParse(config).success;
700
+ case "build_passed":
701
+ return BuildPassedConfigSchema.safeParse(config).success;
702
+ case "llm_judge":
703
+ return LlmJudgeConfigSchema.safeParse(config).success;
704
+ default:
705
+ return false;
706
+ }
707
+ }
708
+ function getSkillWasCalledConfig(assertion) {
709
+ if (assertion.type !== "skill_was_called") return null;
710
+ const result = SkillWasCalledConfigSchema.safeParse(assertion.config);
711
+ return result.success ? result.data : null;
712
+ }
713
+ function getBuildPassedConfig(assertion) {
714
+ if (assertion.type !== "build_passed") return null;
715
+ const result = BuildPassedConfigSchema.safeParse(assertion.config);
716
+ return result.success ? result.data : null;
717
+ }
718
+ function getLlmJudgeConfig(assertion) {
719
+ if (assertion.type !== "llm_judge") return null;
720
+ const result = LlmJudgeConfigSchema.safeParse(assertion.config);
721
+ return result.success ? result.data : null;
722
+ }
723
+
724
+ // src/scenario/test-scenario.ts
725
+ var ExpectedFileSchema = import_zod21.z.object({
597
726
  /** Relative path where the file should be created */
598
- path: import_zod20.z.string(),
727
+ path: import_zod21.z.string(),
599
728
  /** Optional expected content */
600
- content: import_zod20.z.string().optional()
729
+ content: import_zod21.z.string().optional()
601
730
  });
602
731
  var TestScenarioSchema = TenantEntitySchema.extend({
603
732
  /** The prompt sent to the agent to trigger the task */
604
- triggerPrompt: import_zod20.z.string().min(10),
733
+ triggerPrompt: import_zod21.z.string().min(10),
605
734
  /** ID of the template to use for this scenario (null = no template) */
606
- templateId: import_zod20.z.string().nullish(),
735
+ templateId: import_zod21.z.string().nullish(),
607
736
  /** Inline assertions to evaluate for this scenario (legacy) */
608
- assertions: import_zod20.z.array(AssertionSchema).optional(),
609
- /** IDs of saved assertions to evaluate (from assertions table) */
610
- assertionIds: import_zod20.z.array(import_zod20.z.string()).optional()
737
+ assertions: import_zod21.z.array(AssertionSchema).optional(),
738
+ /** IDs of saved assertions to evaluate (from assertions table) - legacy, use assertionLinks */
739
+ assertionIds: import_zod21.z.array(import_zod21.z.string()).optional(),
740
+ /** Linked assertions with per-scenario parameter values */
741
+ assertionLinks: import_zod21.z.array(ScenarioAssertionLinkSchema).optional()
611
742
  });
612
743
  var CreateTestScenarioInputSchema = TestScenarioSchema.omit({
613
744
  id: true,
@@ -618,10 +749,10 @@ var CreateTestScenarioInputSchema = TestScenarioSchema.omit({
618
749
  var UpdateTestScenarioInputSchema = CreateTestScenarioInputSchema.partial();
619
750
 
620
751
  // src/suite/test-suite.ts
621
- var import_zod21 = require("zod");
752
+ var import_zod22 = require("zod");
622
753
  var TestSuiteSchema = TenantEntitySchema.extend({
623
754
  /** IDs of test scenarios in this suite */
624
- scenarioIds: import_zod21.z.array(import_zod21.z.string())
755
+ scenarioIds: import_zod22.z.array(import_zod22.z.string())
625
756
  });
626
757
  var CreateTestSuiteInputSchema = TestSuiteSchema.omit({
627
758
  id: true,
@@ -632,21 +763,21 @@ var CreateTestSuiteInputSchema = TestSuiteSchema.omit({
632
763
  var UpdateTestSuiteInputSchema = CreateTestSuiteInputSchema.partial();
633
764
 
634
765
  // src/evaluation/metrics.ts
635
- var import_zod22 = require("zod");
636
- var TokenUsageSchema = import_zod22.z.object({
637
- prompt: import_zod22.z.number(),
638
- completion: import_zod22.z.number(),
639
- total: import_zod22.z.number()
640
- });
641
- var EvalMetricsSchema = import_zod22.z.object({
642
- totalAssertions: import_zod22.z.number(),
643
- passed: import_zod22.z.number(),
644
- failed: import_zod22.z.number(),
645
- skipped: import_zod22.z.number(),
646
- errors: import_zod22.z.number(),
647
- passRate: import_zod22.z.number(),
648
- avgDuration: import_zod22.z.number(),
649
- totalDuration: import_zod22.z.number()
766
+ var import_zod23 = require("zod");
767
+ var TokenUsageSchema = import_zod23.z.object({
768
+ prompt: import_zod23.z.number(),
769
+ completion: import_zod23.z.number(),
770
+ total: import_zod23.z.number()
771
+ });
772
+ var EvalMetricsSchema = import_zod23.z.object({
773
+ totalAssertions: import_zod23.z.number(),
774
+ passed: import_zod23.z.number(),
775
+ failed: import_zod23.z.number(),
776
+ skipped: import_zod23.z.number(),
777
+ errors: import_zod23.z.number(),
778
+ passRate: import_zod23.z.number(),
779
+ avgDuration: import_zod23.z.number(),
780
+ totalDuration: import_zod23.z.number()
650
781
  });
651
782
  var EvalStatus = /* @__PURE__ */ ((EvalStatus2) => {
652
783
  EvalStatus2["PENDING"] = "pending";
@@ -656,7 +787,7 @@ var EvalStatus = /* @__PURE__ */ ((EvalStatus2) => {
656
787
  EvalStatus2["CANCELLED"] = "cancelled";
657
788
  return EvalStatus2;
658
789
  })(EvalStatus || {});
659
- var EvalStatusSchema = import_zod22.z.enum(EvalStatus);
790
+ var EvalStatusSchema = import_zod23.z.enum(EvalStatus);
660
791
  var LLMStepType = /* @__PURE__ */ ((LLMStepType2) => {
661
792
  LLMStepType2["COMPLETION"] = "completion";
662
793
  LLMStepType2["TOOL_USE"] = "tool_use";
@@ -664,52 +795,52 @@ var LLMStepType = /* @__PURE__ */ ((LLMStepType2) => {
664
795
  LLMStepType2["THINKING"] = "thinking";
665
796
  return LLMStepType2;
666
797
  })(LLMStepType || {});
667
- var LLMTraceStepSchema = import_zod22.z.object({
668
- id: import_zod22.z.string(),
669
- stepNumber: import_zod22.z.number(),
670
- type: import_zod22.z.enum(LLMStepType),
671
- model: import_zod22.z.string(),
672
- provider: import_zod22.z.string(),
673
- startedAt: import_zod22.z.string(),
674
- durationMs: import_zod22.z.number(),
798
+ var LLMTraceStepSchema = import_zod23.z.object({
799
+ id: import_zod23.z.string(),
800
+ stepNumber: import_zod23.z.number(),
801
+ type: import_zod23.z.enum(LLMStepType),
802
+ model: import_zod23.z.string(),
803
+ provider: import_zod23.z.string(),
804
+ startedAt: import_zod23.z.string(),
805
+ durationMs: import_zod23.z.number(),
675
806
  tokenUsage: TokenUsageSchema,
676
- costUsd: import_zod22.z.number(),
677
- toolName: import_zod22.z.string().optional(),
678
- toolArguments: import_zod22.z.string().optional(),
679
- inputPreview: import_zod22.z.string().optional(),
680
- outputPreview: import_zod22.z.string().optional(),
681
- success: import_zod22.z.boolean(),
682
- error: import_zod22.z.string().optional()
683
- });
684
- var LLMBreakdownStatsSchema = import_zod22.z.object({
685
- count: import_zod22.z.number(),
686
- durationMs: import_zod22.z.number(),
687
- tokens: import_zod22.z.number(),
688
- costUsd: import_zod22.z.number()
689
- });
690
- var LLMTraceSummarySchema = import_zod22.z.object({
691
- totalSteps: import_zod22.z.number(),
692
- totalDurationMs: import_zod22.z.number(),
807
+ costUsd: import_zod23.z.number(),
808
+ toolName: import_zod23.z.string().optional(),
809
+ toolArguments: import_zod23.z.string().optional(),
810
+ inputPreview: import_zod23.z.string().optional(),
811
+ outputPreview: import_zod23.z.string().optional(),
812
+ success: import_zod23.z.boolean(),
813
+ error: import_zod23.z.string().optional()
814
+ });
815
+ var LLMBreakdownStatsSchema = import_zod23.z.object({
816
+ count: import_zod23.z.number(),
817
+ durationMs: import_zod23.z.number(),
818
+ tokens: import_zod23.z.number(),
819
+ costUsd: import_zod23.z.number()
820
+ });
821
+ var LLMTraceSummarySchema = import_zod23.z.object({
822
+ totalSteps: import_zod23.z.number(),
823
+ totalDurationMs: import_zod23.z.number(),
693
824
  totalTokens: TokenUsageSchema,
694
- totalCostUsd: import_zod22.z.number(),
695
- stepTypeBreakdown: import_zod22.z.record(import_zod22.z.string(), LLMBreakdownStatsSchema).optional(),
696
- modelBreakdown: import_zod22.z.record(import_zod22.z.string(), LLMBreakdownStatsSchema),
697
- modelsUsed: import_zod22.z.array(import_zod22.z.string())
698
- });
699
- var LLMTraceSchema = import_zod22.z.object({
700
- id: import_zod22.z.string(),
701
- steps: import_zod22.z.array(LLMTraceStepSchema),
825
+ totalCostUsd: import_zod23.z.number(),
826
+ stepTypeBreakdown: import_zod23.z.record(import_zod23.z.string(), LLMBreakdownStatsSchema).optional(),
827
+ modelBreakdown: import_zod23.z.record(import_zod23.z.string(), LLMBreakdownStatsSchema),
828
+ modelsUsed: import_zod23.z.array(import_zod23.z.string())
829
+ });
830
+ var LLMTraceSchema = import_zod23.z.object({
831
+ id: import_zod23.z.string(),
832
+ steps: import_zod23.z.array(LLMTraceStepSchema),
702
833
  summary: LLMTraceSummarySchema
703
834
  });
704
835
 
705
836
  // src/evaluation/eval-result.ts
706
- var import_zod25 = require("zod");
837
+ var import_zod26 = require("zod");
707
838
 
708
839
  // src/evaluation/eval-run.ts
709
- var import_zod24 = require("zod");
840
+ var import_zod25 = require("zod");
710
841
 
711
842
  // src/evaluation/live-trace.ts
712
- var import_zod23 = require("zod");
843
+ var import_zod24 = require("zod");
713
844
  var LiveTraceEventType = /* @__PURE__ */ ((LiveTraceEventType2) => {
714
845
  LiveTraceEventType2["THINKING"] = "thinking";
715
846
  LiveTraceEventType2["TOOL_USE"] = "tool_use";
@@ -723,37 +854,37 @@ var LiveTraceEventType = /* @__PURE__ */ ((LiveTraceEventType2) => {
723
854
  LiveTraceEventType2["USER"] = "user";
724
855
  return LiveTraceEventType2;
725
856
  })(LiveTraceEventType || {});
726
- var LiveTraceEventSchema = import_zod23.z.object({
857
+ var LiveTraceEventSchema = import_zod24.z.object({
727
858
  /** The evaluation run ID */
728
- evalRunId: import_zod23.z.string(),
859
+ evalRunId: import_zod24.z.string(),
729
860
  /** The scenario ID being executed */
730
- scenarioId: import_zod23.z.string(),
861
+ scenarioId: import_zod24.z.string(),
731
862
  /** The scenario name for display */
732
- scenarioName: import_zod23.z.string(),
863
+ scenarioName: import_zod24.z.string(),
733
864
  /** The target ID (skill, agent, etc.) */
734
- targetId: import_zod23.z.string(),
865
+ targetId: import_zod24.z.string(),
735
866
  /** The target name for display */
736
- targetName: import_zod23.z.string(),
867
+ targetName: import_zod24.z.string(),
737
868
  /** Step number in the current scenario execution */
738
- stepNumber: import_zod23.z.number(),
869
+ stepNumber: import_zod24.z.number(),
739
870
  /** Type of trace event */
740
- type: import_zod23.z.enum(LiveTraceEventType),
871
+ type: import_zod24.z.enum(LiveTraceEventType),
741
872
  /** Tool name if this is a tool_use event */
742
- toolName: import_zod23.z.string().optional(),
873
+ toolName: import_zod24.z.string().optional(),
743
874
  /** Tool arguments preview (truncated JSON) */
744
- toolArgs: import_zod23.z.string().optional(),
875
+ toolArgs: import_zod24.z.string().optional(),
745
876
  /** Output preview (truncated text) */
746
- outputPreview: import_zod23.z.string().optional(),
877
+ outputPreview: import_zod24.z.string().optional(),
747
878
  /** File path for file operations */
748
- filePath: import_zod23.z.string().optional(),
879
+ filePath: import_zod24.z.string().optional(),
749
880
  /** Elapsed time in milliseconds for progress events */
750
- elapsedMs: import_zod23.z.number().optional(),
881
+ elapsedMs: import_zod24.z.number().optional(),
751
882
  /** Thinking/reasoning text from Claude */
752
- thinking: import_zod23.z.string().optional(),
883
+ thinking: import_zod24.z.string().optional(),
753
884
  /** Timestamp when this event occurred */
754
- timestamp: import_zod23.z.string(),
885
+ timestamp: import_zod24.z.string(),
755
886
  /** Whether this is the final event for this scenario */
756
- isComplete: import_zod23.z.boolean()
887
+ isComplete: import_zod24.z.boolean()
757
888
  });
758
889
  var TRACE_EVENT_PREFIX = "TRACE_EVENT:";
759
890
  function parseTraceEventLine(line) {
@@ -781,14 +912,14 @@ var TriggerType = /* @__PURE__ */ ((TriggerType2) => {
781
912
  TriggerType2["MANUAL"] = "MANUAL";
782
913
  return TriggerType2;
783
914
  })(TriggerType || {});
784
- var TriggerMetadataSchema = import_zod24.z.object({
785
- version: import_zod24.z.string().optional(),
786
- resourceUpdated: import_zod24.z.array(import_zod24.z.string()).optional()
915
+ var TriggerMetadataSchema = import_zod25.z.object({
916
+ version: import_zod25.z.string().optional(),
917
+ resourceUpdated: import_zod25.z.array(import_zod25.z.string()).optional()
787
918
  });
788
- var TriggerSchema = import_zod24.z.object({
789
- id: import_zod24.z.string(),
919
+ var TriggerSchema = import_zod25.z.object({
920
+ id: import_zod25.z.string(),
790
921
  metadata: TriggerMetadataSchema.optional(),
791
- type: import_zod24.z.enum(TriggerType)
922
+ type: import_zod25.z.enum(TriggerType)
792
923
  });
793
924
  var FailureCategory = /* @__PURE__ */ ((FailureCategory2) => {
794
925
  FailureCategory2["MISSING_FILE"] = "missing_file";
@@ -806,28 +937,28 @@ var FailureSeverity = /* @__PURE__ */ ((FailureSeverity2) => {
806
937
  FailureSeverity2["LOW"] = "low";
807
938
  return FailureSeverity2;
808
939
  })(FailureSeverity || {});
809
- var DiffLineTypeSchema = import_zod24.z.enum(["added", "removed", "unchanged"]);
810
- var DiffLineSchema = import_zod24.z.object({
940
+ var DiffLineTypeSchema = import_zod25.z.enum(["added", "removed", "unchanged"]);
941
+ var DiffLineSchema = import_zod25.z.object({
811
942
  type: DiffLineTypeSchema,
812
- content: import_zod24.z.string(),
813
- lineNumber: import_zod24.z.number()
814
- });
815
- var DiffContentSchema = import_zod24.z.object({
816
- path: import_zod24.z.string(),
817
- expected: import_zod24.z.string(),
818
- actual: import_zod24.z.string(),
819
- diffLines: import_zod24.z.array(DiffLineSchema),
820
- renamedFrom: import_zod24.z.string().optional()
821
- });
822
- var CommandExecutionSchema = import_zod24.z.object({
823
- command: import_zod24.z.string(),
824
- exitCode: import_zod24.z.number(),
825
- output: import_zod24.z.string().optional(),
826
- duration: import_zod24.z.number()
827
- });
828
- var FileModificationSchema = import_zod24.z.object({
829
- path: import_zod24.z.string(),
830
- action: import_zod24.z.enum(["created", "modified", "deleted"])
943
+ content: import_zod25.z.string(),
944
+ lineNumber: import_zod25.z.number()
945
+ });
946
+ var DiffContentSchema = import_zod25.z.object({
947
+ path: import_zod25.z.string(),
948
+ expected: import_zod25.z.string(),
949
+ actual: import_zod25.z.string(),
950
+ diffLines: import_zod25.z.array(DiffLineSchema),
951
+ renamedFrom: import_zod25.z.string().optional()
952
+ });
953
+ var CommandExecutionSchema = import_zod25.z.object({
954
+ command: import_zod25.z.string(),
955
+ exitCode: import_zod25.z.number(),
956
+ output: import_zod25.z.string().optional(),
957
+ duration: import_zod25.z.number()
958
+ });
959
+ var FileModificationSchema = import_zod25.z.object({
960
+ path: import_zod25.z.string(),
961
+ action: import_zod25.z.enum(["created", "modified", "deleted"])
831
962
  });
832
963
  var TemplateFileStatus = /* @__PURE__ */ ((TemplateFileStatus2) => {
833
964
  TemplateFileStatus2["NEW"] = "new";
@@ -835,75 +966,75 @@ var TemplateFileStatus = /* @__PURE__ */ ((TemplateFileStatus2) => {
835
966
  TemplateFileStatus2["UNCHANGED"] = "unchanged";
836
967
  return TemplateFileStatus2;
837
968
  })(TemplateFileStatus || {});
838
- var TemplateFileSchema = import_zod24.z.object({
969
+ var TemplateFileSchema = import_zod25.z.object({
839
970
  /** Relative path within the template */
840
- path: import_zod24.z.string(),
971
+ path: import_zod25.z.string(),
841
972
  /** Full file content after execution */
842
- content: import_zod24.z.string(),
973
+ content: import_zod25.z.string(),
843
974
  /** File status (new, modified, unchanged) */
844
- status: import_zod24.z.enum(["new", "modified", "unchanged"])
845
- });
846
- var ApiCallSchema = import_zod24.z.object({
847
- endpoint: import_zod24.z.string(),
848
- tokensUsed: import_zod24.z.number(),
849
- duration: import_zod24.z.number()
850
- });
851
- var ExecutionTraceSchema = import_zod24.z.object({
852
- commands: import_zod24.z.array(CommandExecutionSchema),
853
- filesModified: import_zod24.z.array(FileModificationSchema),
854
- apiCalls: import_zod24.z.array(ApiCallSchema),
855
- totalDuration: import_zod24.z.number()
856
- });
857
- var FailureAnalysisSchema = import_zod24.z.object({
858
- category: import_zod24.z.enum(FailureCategory),
859
- severity: import_zod24.z.enum(FailureSeverity),
860
- summary: import_zod24.z.string(),
861
- details: import_zod24.z.string(),
862
- rootCause: import_zod24.z.string(),
863
- suggestedFix: import_zod24.z.string(),
864
- relatedAssertions: import_zod24.z.array(import_zod24.z.string()),
865
- codeSnippet: import_zod24.z.string().optional(),
866
- similarIssues: import_zod24.z.array(import_zod24.z.string()).optional(),
867
- patternId: import_zod24.z.string().optional(),
975
+ status: import_zod25.z.enum(["new", "modified", "unchanged"])
976
+ });
977
+ var ApiCallSchema = import_zod25.z.object({
978
+ endpoint: import_zod25.z.string(),
979
+ tokensUsed: import_zod25.z.number(),
980
+ duration: import_zod25.z.number()
981
+ });
982
+ var ExecutionTraceSchema = import_zod25.z.object({
983
+ commands: import_zod25.z.array(CommandExecutionSchema),
984
+ filesModified: import_zod25.z.array(FileModificationSchema),
985
+ apiCalls: import_zod25.z.array(ApiCallSchema),
986
+ totalDuration: import_zod25.z.number()
987
+ });
988
+ var FailureAnalysisSchema = import_zod25.z.object({
989
+ category: import_zod25.z.enum(FailureCategory),
990
+ severity: import_zod25.z.enum(FailureSeverity),
991
+ summary: import_zod25.z.string(),
992
+ details: import_zod25.z.string(),
993
+ rootCause: import_zod25.z.string(),
994
+ suggestedFix: import_zod25.z.string(),
995
+ relatedAssertions: import_zod25.z.array(import_zod25.z.string()),
996
+ codeSnippet: import_zod25.z.string().optional(),
997
+ similarIssues: import_zod25.z.array(import_zod25.z.string()).optional(),
998
+ patternId: import_zod25.z.string().optional(),
868
999
  // Extended fields for detailed debugging
869
1000
  diff: DiffContentSchema.optional(),
870
1001
  executionTrace: ExecutionTraceSchema.optional()
871
1002
  });
872
1003
  var EvalRunSchema = TenantEntitySchema.extend({
873
1004
  /** Agent ID for this run */
874
- agentId: import_zod24.z.string().optional(),
1005
+ agentId: import_zod25.z.string().optional(),
875
1006
  /** Skills group ID for this run */
876
- skillsGroupId: import_zod24.z.string().optional(),
1007
+ skillsGroupId: import_zod25.z.string().optional(),
877
1008
  /** Scenario IDs to run */
878
- scenarioIds: import_zod24.z.array(import_zod24.z.string()),
1009
+ scenarioIds: import_zod25.z.array(import_zod25.z.string()),
879
1010
  /** Current status */
880
1011
  status: EvalStatusSchema,
881
1012
  /** Progress percentage (0-100) */
882
- progress: import_zod24.z.number(),
1013
+ progress: import_zod25.z.number(),
883
1014
  /** Results for each scenario/target combination (lazy to break eval-result ↔ eval-run cycle) */
884
- results: import_zod24.z.array(import_zod24.z.lazy(() => EvalRunResultSchema)),
1015
+ results: import_zod25.z.array(import_zod25.z.lazy(() => EvalRunResultSchema)),
885
1016
  /** Aggregated metrics across all results */
886
1017
  aggregateMetrics: EvalMetricsSchema,
887
1018
  /** Failure analyses */
888
- failureAnalyses: import_zod24.z.array(FailureAnalysisSchema).optional(),
1019
+ failureAnalyses: import_zod25.z.array(FailureAnalysisSchema).optional(),
889
1020
  /** Aggregated LLM trace summary */
890
1021
  llmTraceSummary: LLMTraceSummarySchema.optional(),
891
1022
  /** What triggered this run */
892
1023
  trigger: TriggerSchema.optional(),
893
1024
  /** When the run started (set when evaluation is triggered) */
894
- startedAt: import_zod24.z.string().optional(),
1025
+ startedAt: import_zod25.z.string().optional(),
895
1026
  /** When the run completed */
896
- completedAt: import_zod24.z.string().optional(),
1027
+ completedAt: import_zod25.z.string().optional(),
897
1028
  /** Live trace events captured during execution (for playback on results page) */
898
- liveTraceEvents: import_zod24.z.array(LiveTraceEventSchema).optional(),
1029
+ liveTraceEvents: import_zod25.z.array(LiveTraceEventSchema).optional(),
899
1030
  /** Remote job ID for tracking execution in Dev Machines */
900
- jobId: import_zod24.z.string().optional(),
1031
+ jobId: import_zod25.z.string().optional(),
901
1032
  /** Remote job status from the Dev Machine API (PENDING, RUNNING, COMPLETED, FAILED, CANCELLED) */
902
- jobStatus: import_zod24.z.string().optional(),
1033
+ jobStatus: import_zod25.z.string().optional(),
903
1034
  /** Remote job error message if the job failed */
904
- jobError: import_zod24.z.string().optional(),
1035
+ jobError: import_zod25.z.string().optional(),
905
1036
  /** Timestamp of the last job status check */
906
- jobStatusCheckedAt: import_zod24.z.string().optional()
1037
+ jobStatusCheckedAt: import_zod25.z.string().optional()
907
1038
  });
908
1039
  var CreateEvalRunInputSchema = EvalRunSchema.omit({
909
1040
  id: true,
@@ -916,28 +1047,28 @@ var CreateEvalRunInputSchema = EvalRunSchema.omit({
916
1047
  startedAt: true,
917
1048
  completedAt: true
918
1049
  });
919
- var EvaluationProgressSchema = import_zod24.z.object({
920
- runId: import_zod24.z.string(),
921
- targetId: import_zod24.z.string(),
922
- totalScenarios: import_zod24.z.number(),
923
- completedScenarios: import_zod24.z.number(),
924
- scenarioProgress: import_zod24.z.array(
925
- import_zod24.z.object({
926
- scenarioId: import_zod24.z.string(),
927
- currentStep: import_zod24.z.string(),
928
- error: import_zod24.z.string().optional()
1050
+ var EvaluationProgressSchema = import_zod25.z.object({
1051
+ runId: import_zod25.z.string(),
1052
+ targetId: import_zod25.z.string(),
1053
+ totalScenarios: import_zod25.z.number(),
1054
+ completedScenarios: import_zod25.z.number(),
1055
+ scenarioProgress: import_zod25.z.array(
1056
+ import_zod25.z.object({
1057
+ scenarioId: import_zod25.z.string(),
1058
+ currentStep: import_zod25.z.string(),
1059
+ error: import_zod25.z.string().optional()
929
1060
  })
930
1061
  ),
931
- createdAt: import_zod24.z.number()
1062
+ createdAt: import_zod25.z.number()
932
1063
  });
933
- var EvaluationLogSchema = import_zod24.z.object({
934
- runId: import_zod24.z.string(),
935
- scenarioId: import_zod24.z.string(),
936
- log: import_zod24.z.object({
937
- level: import_zod24.z.enum(["info", "error", "debug"]),
938
- message: import_zod24.z.string().optional(),
939
- args: import_zod24.z.array(import_zod24.z.any()).optional(),
940
- error: import_zod24.z.string().optional()
1064
+ var EvaluationLogSchema = import_zod25.z.object({
1065
+ runId: import_zod25.z.string(),
1066
+ scenarioId: import_zod25.z.string(),
1067
+ log: import_zod25.z.object({
1068
+ level: import_zod25.z.enum(["info", "error", "debug"]),
1069
+ message: import_zod25.z.string().optional(),
1070
+ args: import_zod25.z.array(import_zod25.z.any()).optional(),
1071
+ error: import_zod25.z.string().optional()
941
1072
  })
942
1073
  });
943
1074
  var LLM_TIMEOUT = 12e4;
@@ -950,91 +1081,91 @@ var AssertionResultStatus = /* @__PURE__ */ ((AssertionResultStatus2) => {
950
1081
  AssertionResultStatus2["ERROR"] = "error";
951
1082
  return AssertionResultStatus2;
952
1083
  })(AssertionResultStatus || {});
953
- var AssertionResultSchema = import_zod25.z.object({
954
- id: import_zod25.z.string(),
955
- assertionId: import_zod25.z.string(),
956
- assertionType: import_zod25.z.string(),
957
- assertionName: import_zod25.z.string(),
958
- status: import_zod25.z.enum(AssertionResultStatus),
959
- message: import_zod25.z.string().optional(),
960
- expected: import_zod25.z.string().optional(),
961
- actual: import_zod25.z.string().optional(),
962
- duration: import_zod25.z.number().optional(),
963
- details: import_zod25.z.record(import_zod25.z.string(), import_zod25.z.unknown()).optional(),
964
- llmTraceSteps: import_zod25.z.array(LLMTraceStepSchema).optional()
965
- });
966
- var EvalRunResultSchema = import_zod25.z.object({
967
- id: import_zod25.z.string(),
968
- targetId: import_zod25.z.string(),
969
- targetName: import_zod25.z.string().optional(),
970
- scenarioId: import_zod25.z.string(),
971
- scenarioName: import_zod25.z.string(),
1084
+ var AssertionResultSchema = import_zod26.z.object({
1085
+ id: import_zod26.z.string(),
1086
+ assertionId: import_zod26.z.string(),
1087
+ assertionType: import_zod26.z.string(),
1088
+ assertionName: import_zod26.z.string(),
1089
+ status: import_zod26.z.enum(AssertionResultStatus),
1090
+ message: import_zod26.z.string().optional(),
1091
+ expected: import_zod26.z.string().optional(),
1092
+ actual: import_zod26.z.string().optional(),
1093
+ duration: import_zod26.z.number().optional(),
1094
+ details: import_zod26.z.record(import_zod26.z.string(), import_zod26.z.unknown()).optional(),
1095
+ llmTraceSteps: import_zod26.z.array(LLMTraceStepSchema).optional()
1096
+ });
1097
+ var EvalRunResultSchema = import_zod26.z.object({
1098
+ id: import_zod26.z.string(),
1099
+ targetId: import_zod26.z.string(),
1100
+ targetName: import_zod26.z.string().optional(),
1101
+ scenarioId: import_zod26.z.string(),
1102
+ scenarioName: import_zod26.z.string(),
972
1103
  modelConfig: ModelConfigSchema.optional(),
973
- assertionResults: import_zod25.z.array(AssertionResultSchema),
1104
+ assertionResults: import_zod26.z.array(AssertionResultSchema),
974
1105
  metrics: EvalMetricsSchema.optional(),
975
- passed: import_zod25.z.number(),
976
- failed: import_zod25.z.number(),
977
- passRate: import_zod25.z.number(),
978
- duration: import_zod25.z.number(),
979
- outputText: import_zod25.z.string().optional(),
980
- files: import_zod25.z.array(ExpectedFileSchema).optional(),
981
- fileDiffs: import_zod25.z.array(DiffContentSchema).optional(),
1106
+ passed: import_zod26.z.number(),
1107
+ failed: import_zod26.z.number(),
1108
+ passRate: import_zod26.z.number(),
1109
+ duration: import_zod26.z.number(),
1110
+ outputText: import_zod26.z.string().optional(),
1111
+ files: import_zod26.z.array(ExpectedFileSchema).optional(),
1112
+ fileDiffs: import_zod26.z.array(DiffContentSchema).optional(),
982
1113
  /** Full template files after execution with status indicators */
983
- templateFiles: import_zod25.z.array(TemplateFileSchema).optional(),
984
- startedAt: import_zod25.z.string().optional(),
985
- completedAt: import_zod25.z.string().optional(),
1114
+ templateFiles: import_zod26.z.array(TemplateFileSchema).optional(),
1115
+ startedAt: import_zod26.z.string().optional(),
1116
+ completedAt: import_zod26.z.string().optional(),
986
1117
  llmTrace: LLMTraceSchema.optional()
987
1118
  });
988
- var PromptResultSchema = import_zod25.z.object({
989
- text: import_zod25.z.string(),
990
- files: import_zod25.z.array(import_zod25.z.unknown()).optional(),
991
- finishReason: import_zod25.z.string().optional(),
992
- reasoning: import_zod25.z.string().optional(),
993
- reasoningDetails: import_zod25.z.unknown().optional(),
994
- toolCalls: import_zod25.z.array(import_zod25.z.unknown()).optional(),
995
- toolResults: import_zod25.z.array(import_zod25.z.unknown()).optional(),
996
- warnings: import_zod25.z.array(import_zod25.z.unknown()).optional(),
997
- sources: import_zod25.z.array(import_zod25.z.unknown()).optional(),
998
- steps: import_zod25.z.array(import_zod25.z.unknown()),
999
- generationTimeMs: import_zod25.z.number(),
1000
- prompt: import_zod25.z.string(),
1001
- systemPrompt: import_zod25.z.string(),
1002
- usage: import_zod25.z.object({
1003
- totalTokens: import_zod25.z.number().optional(),
1004
- totalMicrocentsSpent: import_zod25.z.number().optional()
1119
+ var PromptResultSchema = import_zod26.z.object({
1120
+ text: import_zod26.z.string(),
1121
+ files: import_zod26.z.array(import_zod26.z.unknown()).optional(),
1122
+ finishReason: import_zod26.z.string().optional(),
1123
+ reasoning: import_zod26.z.string().optional(),
1124
+ reasoningDetails: import_zod26.z.unknown().optional(),
1125
+ toolCalls: import_zod26.z.array(import_zod26.z.unknown()).optional(),
1126
+ toolResults: import_zod26.z.array(import_zod26.z.unknown()).optional(),
1127
+ warnings: import_zod26.z.array(import_zod26.z.unknown()).optional(),
1128
+ sources: import_zod26.z.array(import_zod26.z.unknown()).optional(),
1129
+ steps: import_zod26.z.array(import_zod26.z.unknown()),
1130
+ generationTimeMs: import_zod26.z.number(),
1131
+ prompt: import_zod26.z.string(),
1132
+ systemPrompt: import_zod26.z.string(),
1133
+ usage: import_zod26.z.object({
1134
+ totalTokens: import_zod26.z.number().optional(),
1135
+ totalMicrocentsSpent: import_zod26.z.number().optional()
1005
1136
  })
1006
1137
  });
1007
- var EvaluationResultSchema = import_zod25.z.object({
1008
- id: import_zod25.z.string(),
1009
- runId: import_zod25.z.string(),
1010
- timestamp: import_zod25.z.number(),
1138
+ var EvaluationResultSchema = import_zod26.z.object({
1139
+ id: import_zod26.z.string(),
1140
+ runId: import_zod26.z.string(),
1141
+ timestamp: import_zod26.z.number(),
1011
1142
  promptResult: PromptResultSchema,
1012
- testResults: import_zod25.z.array(import_zod25.z.unknown()),
1013
- tags: import_zod25.z.array(import_zod25.z.string()).optional(),
1014
- feedback: import_zod25.z.string().optional(),
1015
- score: import_zod25.z.number(),
1016
- suiteId: import_zod25.z.string().optional()
1017
- });
1018
- var LeanEvaluationResultSchema = import_zod25.z.object({
1019
- id: import_zod25.z.string(),
1020
- runId: import_zod25.z.string(),
1021
- timestamp: import_zod25.z.number(),
1022
- tags: import_zod25.z.array(import_zod25.z.string()).optional(),
1023
- scenarioId: import_zod25.z.string(),
1024
- scenarioVersion: import_zod25.z.number().optional(),
1025
- targetId: import_zod25.z.string(),
1026
- targetVersion: import_zod25.z.number().optional(),
1027
- suiteId: import_zod25.z.string().optional(),
1028
- score: import_zod25.z.number(),
1029
- time: import_zod25.z.number().optional(),
1030
- microcentsSpent: import_zod25.z.number().optional()
1143
+ testResults: import_zod26.z.array(import_zod26.z.unknown()),
1144
+ tags: import_zod26.z.array(import_zod26.z.string()).optional(),
1145
+ feedback: import_zod26.z.string().optional(),
1146
+ score: import_zod26.z.number(),
1147
+ suiteId: import_zod26.z.string().optional()
1148
+ });
1149
+ var LeanEvaluationResultSchema = import_zod26.z.object({
1150
+ id: import_zod26.z.string(),
1151
+ runId: import_zod26.z.string(),
1152
+ timestamp: import_zod26.z.number(),
1153
+ tags: import_zod26.z.array(import_zod26.z.string()).optional(),
1154
+ scenarioId: import_zod26.z.string(),
1155
+ scenarioVersion: import_zod26.z.number().optional(),
1156
+ targetId: import_zod26.z.string(),
1157
+ targetVersion: import_zod26.z.number().optional(),
1158
+ suiteId: import_zod26.z.string().optional(),
1159
+ score: import_zod26.z.number(),
1160
+ time: import_zod26.z.number().optional(),
1161
+ microcentsSpent: import_zod26.z.number().optional()
1031
1162
  });
1032
1163
 
1033
1164
  // src/project/project.ts
1034
- var import_zod26 = require("zod");
1165
+ var import_zod27 = require("zod");
1035
1166
  var ProjectSchema = BaseEntitySchema.extend({
1036
- appId: import_zod26.z.string().optional().describe("The ID of the app in Dev Center"),
1037
- appSecret: import_zod26.z.string().optional().describe("The secret of the app in Dev Center")
1167
+ appId: import_zod27.z.string().optional().describe("The ID of the app in Dev Center"),
1168
+ appSecret: import_zod27.z.string().optional().describe("The secret of the app in Dev Center")
1038
1169
  });
1039
1170
  var CreateProjectInputSchema = ProjectSchema.omit({
1040
1171
  id: true,
@@ -1045,10 +1176,10 @@ var CreateProjectInputSchema = ProjectSchema.omit({
1045
1176
  var UpdateProjectInputSchema = CreateProjectInputSchema.partial();
1046
1177
 
1047
1178
  // src/template/template.ts
1048
- var import_zod27 = require("zod");
1179
+ var import_zod28 = require("zod");
1049
1180
  var TemplateSchema = TenantEntitySchema.extend({
1050
1181
  /** URL to download the template from */
1051
- downloadUrl: import_zod27.z.url()
1182
+ downloadUrl: import_zod28.z.url()
1052
1183
  });
1053
1184
  var CreateTemplateInputSchema = TemplateSchema.omit({
1054
1185
  id: true,
@@ -1058,86 +1189,69 @@ var CreateTemplateInputSchema = TemplateSchema.omit({
1058
1189
  });
1059
1190
  var UpdateTemplateInputSchema = CreateTemplateInputSchema.partial();
1060
1191
 
1061
- // src/assertion/assertion.ts
1062
- var import_zod28 = require("zod");
1063
- var AssertionTypeSchema = import_zod28.z.enum([
1064
- "skill_was_called",
1065
- "build_passed",
1066
- "llm_judge",
1067
- "custom"
1068
- ]);
1069
- var SkillWasCalledConfigSchema = import_zod28.z.object({
1070
- /** Name of the skill that must have been called */
1071
- skillName: import_zod28.z.string().min(1)
1072
- });
1073
- var BuildPassedConfigSchema = import_zod28.z.object({
1074
- /** Command to run (default: "yarn build") */
1075
- command: import_zod28.z.string().optional(),
1076
- /** Expected exit code (default: 0) */
1077
- expectedExitCode: import_zod28.z.number().int().optional()
1078
- });
1079
- var LlmJudgeConfigSchema = import_zod28.z.object({
1080
- /** Prompt template; placeholders: {{output}}, {{cwd}}, {{changedFiles}}, {{trace}} */
1081
- prompt: import_zod28.z.string().min(1),
1082
- /** Optional system prompt for the judge */
1083
- systemPrompt: import_zod28.z.string().optional(),
1084
- /** Minimum score to pass (0-100, default 70) */
1085
- minScore: import_zod28.z.number().int().min(0).max(100).optional(),
1086
- /** Model for the judge (e.g. claude-3-5-haiku-20241022) */
1087
- model: import_zod28.z.string().optional(),
1088
- /** Max output tokens */
1089
- maxTokens: import_zod28.z.number().int().optional(),
1090
- /** Temperature (0-1) */
1091
- temperature: import_zod28.z.number().min(0).max(1).optional()
1092
- });
1093
- var AssertionConfigSchema = import_zod28.z.union([
1094
- SkillWasCalledConfigSchema,
1095
- BuildPassedConfigSchema,
1096
- LlmJudgeConfigSchema,
1097
- import_zod28.z.object({})
1098
- // Empty config for cases where defaults are used
1099
- ]);
1100
- var CustomAssertionSchema = TenantEntitySchema.extend({
1101
- /** The assertion type */
1102
- type: AssertionTypeSchema,
1103
- /** Type-specific configuration */
1104
- config: AssertionConfigSchema
1105
- });
1106
- var CreateCustomAssertionInputSchema = CustomAssertionSchema.omit({
1107
- id: true,
1108
- createdAt: true,
1109
- updatedAt: true,
1110
- deleted: true
1111
- });
1112
- var UpdateCustomAssertionInputSchema = CreateCustomAssertionInputSchema.partial();
1113
- function validateAssertionConfig(type, config) {
1114
- switch (type) {
1115
- case "skill_was_called":
1116
- return SkillWasCalledConfigSchema.safeParse(config).success;
1117
- case "build_passed":
1118
- return BuildPassedConfigSchema.safeParse(config).success;
1119
- case "llm_judge":
1120
- case "custom":
1121
- return LlmJudgeConfigSchema.safeParse(config).success;
1122
- default:
1123
- return false;
1124
- }
1125
- }
1126
- function getSkillWasCalledConfig(assertion) {
1127
- if (assertion.type !== "skill_was_called") return null;
1128
- const result = SkillWasCalledConfigSchema.safeParse(assertion.config);
1129
- return result.success ? result.data : null;
1192
+ // src/assertion/system-assertions.ts
1193
+ var SYSTEM_ASSERTION_IDS = {
1194
+ SKILL_WAS_CALLED: "system:skill_was_called",
1195
+ BUILD_PASSED: "system:build_passed"
1196
+ };
1197
+ function isSystemAssertionId(id) {
1198
+ return id.startsWith("system:");
1130
1199
  }
1131
- function getBuildPassedConfig(assertion) {
1132
- if (assertion.type !== "build_passed") return null;
1133
- const result = BuildPassedConfigSchema.safeParse(assertion.config);
1134
- return result.success ? result.data : null;
1200
+ var SYSTEM_ASSERTIONS = {
1201
+ [SYSTEM_ASSERTION_IDS.SKILL_WAS_CALLED]: {
1202
+ id: SYSTEM_ASSERTION_IDS.SKILL_WAS_CALLED,
1203
+ name: "Skill Was Called",
1204
+ description: "Check if a specific skill was invoked during the agent run",
1205
+ type: "skill_was_called",
1206
+ parameters: [
1207
+ {
1208
+ name: "skillName",
1209
+ label: "Skill Name",
1210
+ type: "string",
1211
+ required: true
1212
+ }
1213
+ ]
1214
+ },
1215
+ [SYSTEM_ASSERTION_IDS.BUILD_PASSED]: {
1216
+ id: SYSTEM_ASSERTION_IDS.BUILD_PASSED,
1217
+ name: "Build Passed",
1218
+ description: "Run a build command and verify it exits with expected code",
1219
+ type: "build_passed",
1220
+ parameters: [
1221
+ {
1222
+ name: "command",
1223
+ label: "Build Command",
1224
+ type: "string",
1225
+ required: false,
1226
+ defaultValue: "yarn build"
1227
+ },
1228
+ {
1229
+ name: "expectedExitCode",
1230
+ label: "Expected Exit Code",
1231
+ type: "number",
1232
+ required: false,
1233
+ defaultValue: 0
1234
+ },
1235
+ {
1236
+ name: "maxBuildTime",
1237
+ label: "Max Build Time (ms)",
1238
+ type: "number",
1239
+ required: false
1240
+ },
1241
+ {
1242
+ name: "maxMemory",
1243
+ label: "Max Memory (MB)",
1244
+ type: "number",
1245
+ required: false
1246
+ }
1247
+ ]
1248
+ }
1249
+ };
1250
+ function getSystemAssertions() {
1251
+ return Object.values(SYSTEM_ASSERTIONS);
1135
1252
  }
1136
- function getLlmJudgeConfig(assertion) {
1137
- if (assertion.type !== "llm_judge" && assertion.type !== "custom")
1138
- return null;
1139
- const result = LlmJudgeConfigSchema.safeParse(assertion.config);
1140
- return result.success ? result.data : null;
1253
+ function getSystemAssertion(id) {
1254
+ return SYSTEM_ASSERTIONS[id];
1141
1255
  }
1142
1256
  // Annotate the CommonJS export names for ESM import in node:
1143
1257
  0 && (module.exports = {
@@ -1147,6 +1261,8 @@ function getLlmJudgeConfig(assertion) {
1147
1261
  AllowedCommands,
1148
1262
  ApiCallSchema,
1149
1263
  AssertionConfigSchema,
1264
+ AssertionParameterSchema,
1265
+ AssertionParameterTypeSchema,
1150
1266
  AssertionResultSchema,
1151
1267
  AssertionResultStatus,
1152
1268
  AssertionSchema,
@@ -1213,6 +1329,9 @@ function getLlmJudgeConfig(assertion) {
1213
1329
  ProjectSchema,
1214
1330
  PromptResultSchema,
1215
1331
  SKILL_FOLDER_NAME_REGEX,
1332
+ SYSTEM_ASSERTIONS,
1333
+ SYSTEM_ASSERTION_IDS,
1334
+ ScenarioAssertionLinkSchema,
1216
1335
  SiteConfigTestSchema,
1217
1336
  SkillMetadataSchema,
1218
1337
  SkillSchema,
@@ -1251,6 +1370,9 @@ function getLlmJudgeConfig(assertion) {
1251
1370
  getBuildPassedConfig,
1252
1371
  getLlmJudgeConfig,
1253
1372
  getSkillWasCalledConfig,
1373
+ getSystemAssertion,
1374
+ getSystemAssertions,
1375
+ isSystemAssertionId,
1254
1376
  isValidSkillFolderName,
1255
1377
  parseTraceEventLine,
1256
1378
  validateAssertionConfig