npm - @wix/evalforge-types - Versions diffs - 0.35.0 → 0.37.0 - Mend

@wix/evalforge-types 0.35.0 → 0.37.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

package/build/index.js +71 -1
package/build/index.js.map +2 -2
package/build/index.mjs +67 -1
package/build/index.mjs.map +2 -2
package/build/types/assertion/assertion.d.ts +52 -0
package/build/types/assertion/system-assertions.d.ts +2 -0
package/build/types/scenario/assertions.d.ts +24 -0
package/build/types/scenario/test-scenario.d.ts +18 -0
package/package.json +2 -2

package/build/index.js CHANGED Viewed

@@ -937,6 +937,8 @@ __export(index_exports, {
   ClaudeModelSchema: () => ClaudeModelSchema,
   CommandExecutionSchema: () => CommandExecutionSchema,
   CommandExecutionTestSchema: () => CommandExecutionTestSchema,
+  CostAssertionSchema: () => CostAssertionSchema,
+  CostConfigSchema: () => CostConfigSchema,
   CreateAgentInputSchema: () => CreateAgentInputSchema,
   CreateCustomAssertionInputSchema: () => CreateCustomAssertionInputSchema,
   CreateEvalRunInputSchema: () => CreateEvalRunInputSchema,
@@ -1025,6 +1027,8 @@ __export(index_exports, {
   TestSuiteSchema: () => TestSuiteSchema,
   TestType: () => TestType,
   TestTypeSchema: () => TestTypeSchema,
+  TimeAssertionSchema: () => TimeAssertionSchema,
+  TimeConfigSchema: () => TimeConfigSchema,
   TokenUsageSchema: () => TokenUsageSchema,
   ToolTestSchema: () => ToolTestSchema,
   TriggerMetadataSchema: () => TriggerMetadataSchema,
@@ -1469,6 +1473,11 @@ var BuildPassedAssertionSchema = import_zod20.z.object({
   /** Expected exit code (default: 0) */
   expectedExitCode: import_zod20.z.number().int().optional()
 });
+var CostAssertionSchema = import_zod20.z.object({
+  type: import_zod20.z.literal("cost"),
+  /** Maximum allowed cost in USD */
+  maxCostUsd: import_zod20.z.number().positive()
+});
 var LlmJudgeAssertionSchema = import_zod20.z.object({
   type: import_zod20.z.literal("llm_judge"),
   /** Prompt template; placeholders: {{output}}, {{cwd}}, {{changedFiles}}, {{trace}} */
@@ -1482,9 +1491,16 @@ var LlmJudgeAssertionSchema = import_zod20.z.object({
   maxTokens: import_zod20.z.number().int().optional(),
   temperature: import_zod20.z.number().min(0).max(1).optional()
 });
+var TimeAssertionSchema = import_zod20.z.object({
+  type: import_zod20.z.literal("time_limit"),
+  /** Maximum allowed duration in milliseconds */
+  maxDurationMs: import_zod20.z.number().int().positive()
+});
 var AssertionSchema = import_zod20.z.union([
   SkillWasCalledAssertionSchema,
   BuildPassedAssertionSchema,
+  TimeAssertionSchema,
+  CostAssertionSchema,
   LlmJudgeAssertionSchema
 ]);
@@ -1530,6 +1546,8 @@ var import_zod22 = require("zod");
 var AssertionTypeSchema = import_zod22.z.enum([
   "skill_was_called",
   "build_passed",
+  "time_limit",
+  "cost",
   "llm_judge"
 ]);
 var AssertionParameterTypeSchema = import_zod22.z.enum([
@@ -1564,12 +1582,20 @@ var SkillWasCalledConfigSchema = import_zod22.z.object({
   /** Names of the skills that must have been called */
   skillNames: import_zod22.z.array(import_zod22.z.string().min(1)).min(1)
 });
+var CostConfigSchema = import_zod22.z.strictObject({
+  /** Maximum allowed cost in USD */
+  maxCostUsd: import_zod22.z.number().positive()
+});
 var BuildPassedConfigSchema = import_zod22.z.strictObject({
   /** Command to run (default: "yarn build") */
   command: import_zod22.z.string().optional(),
   /** Expected exit code (default: 0) */
   expectedExitCode: import_zod22.z.number().int().optional()
 });
+var TimeConfigSchema = import_zod22.z.strictObject({
+  /** Maximum allowed duration in milliseconds */
+  maxDurationMs: import_zod22.z.number().int().positive()
+});
 var LlmJudgeConfigSchema = import_zod22.z.object({
   /**
    * Prompt template with placeholders:
@@ -1599,7 +1625,11 @@ var AssertionConfigSchema = import_zod22.z.union([
   LlmJudgeConfigSchema,
   // requires prompt - check first
   SkillWasCalledConfigSchema,
-  // requires skillName
+  // requires skillNames
+  TimeConfigSchema,
+  // requires maxDurationMs, uses strictObject
+  CostConfigSchema,
+  // requires maxCostUsd, uses strictObject
   BuildPassedConfigSchema,
   // all optional, uses strictObject to reject unknown keys
   import_zod22.z.object({})
@@ -1622,8 +1652,12 @@ function validateAssertionConfig(type, config) {
   switch (type) {
     case "skill_was_called":
       return SkillWasCalledConfigSchema.safeParse(config).success;
+    case "cost":
+      return CostConfigSchema.safeParse(config).success;
     case "build_passed":
       return BuildPassedConfigSchema.safeParse(config).success;
+    case "time_limit":
+      return TimeConfigSchema.safeParse(config).success;
     case "llm_judge":
       return LlmJudgeConfigSchema.safeParse(config).success;
     default:
@@ -2127,6 +2161,8 @@ var UpdateTemplateInputSchema = CreateTemplateInputSchema.partial();
 var SYSTEM_ASSERTION_IDS = {
   SKILL_WAS_CALLED: "system:skill_was_called",
   BUILD_PASSED: "system:build_passed",
+  TIME_LIMIT: "system:time_limit",
+  COST: "system:cost",
   LLM_JUDGE: "system:llm_judge"
 };
 function isSystemAssertionId(id) {
@@ -2183,6 +2219,36 @@ var SYSTEM_ASSERTIONS = {
       }
     ]
   },
+  [SYSTEM_ASSERTION_IDS.TIME_LIMIT]: {
+    id: SYSTEM_ASSERTION_IDS.TIME_LIMIT,
+    name: "Time Limit",
+    description: "Check that the scenario completed within a maximum duration",
+    type: "time_limit",
+    parameters: [
+      {
+        name: "maxDurationMs",
+        label: "Max Duration (ms)",
+        type: "number",
+        required: true,
+        defaultValue: 3e5
+      }
+    ]
+  },
+  [SYSTEM_ASSERTION_IDS.COST]: {
+    id: SYSTEM_ASSERTION_IDS.COST,
+    name: "Cost",
+    description: "Check that the scenario LLM execution cost stays within a USD threshold",
+    type: "cost",
+    parameters: [
+      {
+        name: "maxCostUsd",
+        label: "Max Cost (USD)",
+        type: "number",
+        required: true,
+        defaultValue: 1
+      }
+    ]
+  },
   [SYSTEM_ASSERTION_IDS.LLM_JUDGE]: {
     id: SYSTEM_ASSERTION_IDS.LLM_JUDGE,
     name: "LLM Judge",
@@ -2256,6 +2322,8 @@ function getSystemAssertion(id) {
   ClaudeModelSchema,
   CommandExecutionSchema,
   CommandExecutionTestSchema,
+  CostAssertionSchema,
+  CostConfigSchema,
   CreateAgentInputSchema,
   CreateCustomAssertionInputSchema,
   CreateEvalRunInputSchema,
@@ -2344,6 +2412,8 @@ function getSystemAssertion(id) {
   TestSuiteSchema,
   TestType,
   TestTypeSchema,
+  TimeAssertionSchema,
+  TimeConfigSchema,
   TokenUsageSchema,
   ToolTestSchema,
   TriggerMetadataSchema,