npm - @wix/evalforge-types - Versions diffs - 0.36.0 → 0.37.0 - Mend

@wix/evalforge-types 0.36.0 → 0.37.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

package/build/index.js +35 -0
package/build/index.js.map +2 -2
package/build/index.mjs +33 -0
package/build/index.mjs.map +2 -2
package/build/types/assertion/assertion.d.ts +26 -0
package/build/types/assertion/system-assertions.d.ts +1 -0
package/build/types/scenario/assertions.d.ts +12 -0
package/build/types/scenario/test-scenario.d.ts +9 -0
package/package.json +2 -2

package/build/index.js CHANGED Viewed

@@ -937,6 +937,8 @@ __export(index_exports, {
   ClaudeModelSchema: () => ClaudeModelSchema,
   CommandExecutionSchema: () => CommandExecutionSchema,
   CommandExecutionTestSchema: () => CommandExecutionTestSchema,
+  CostAssertionSchema: () => CostAssertionSchema,
+  CostConfigSchema: () => CostConfigSchema,
   CreateAgentInputSchema: () => CreateAgentInputSchema,
   CreateCustomAssertionInputSchema: () => CreateCustomAssertionInputSchema,
   CreateEvalRunInputSchema: () => CreateEvalRunInputSchema,
@@ -1471,6 +1473,11 @@ var BuildPassedAssertionSchema = import_zod20.z.object({
   /** Expected exit code (default: 0) */
   expectedExitCode: import_zod20.z.number().int().optional()
 });
+var CostAssertionSchema = import_zod20.z.object({
+  type: import_zod20.z.literal("cost"),
+  /** Maximum allowed cost in USD */
+  maxCostUsd: import_zod20.z.number().positive()
+});
 var LlmJudgeAssertionSchema = import_zod20.z.object({
   type: import_zod20.z.literal("llm_judge"),
   /** Prompt template; placeholders: {{output}}, {{cwd}}, {{changedFiles}}, {{trace}} */
@@ -1493,6 +1500,7 @@ var AssertionSchema = import_zod20.z.union([
   SkillWasCalledAssertionSchema,
   BuildPassedAssertionSchema,
   TimeAssertionSchema,
+  CostAssertionSchema,
   LlmJudgeAssertionSchema
 ]);
@@ -1539,6 +1547,7 @@ var AssertionTypeSchema = import_zod22.z.enum([
   "skill_was_called",
   "build_passed",
   "time_limit",
+  "cost",
   "llm_judge"
 ]);
 var AssertionParameterTypeSchema = import_zod22.z.enum([
@@ -1573,6 +1582,10 @@ var SkillWasCalledConfigSchema = import_zod22.z.object({
   /** Names of the skills that must have been called */
   skillNames: import_zod22.z.array(import_zod22.z.string().min(1)).min(1)
 });
+var CostConfigSchema = import_zod22.z.strictObject({
+  /** Maximum allowed cost in USD */
+  maxCostUsd: import_zod22.z.number().positive()
+});
 var BuildPassedConfigSchema = import_zod22.z.strictObject({
   /** Command to run (default: "yarn build") */
   command: import_zod22.z.string().optional(),
@@ -1615,6 +1628,8 @@ var AssertionConfigSchema = import_zod22.z.union([
   // requires skillNames
   TimeConfigSchema,
   // requires maxDurationMs, uses strictObject
+  CostConfigSchema,
+  // requires maxCostUsd, uses strictObject
   BuildPassedConfigSchema,
   // all optional, uses strictObject to reject unknown keys
   import_zod22.z.object({})
@@ -1637,6 +1652,8 @@ function validateAssertionConfig(type, config) {
   switch (type) {
     case "skill_was_called":
       return SkillWasCalledConfigSchema.safeParse(config).success;
+    case "cost":
+      return CostConfigSchema.safeParse(config).success;
     case "build_passed":
       return BuildPassedConfigSchema.safeParse(config).success;
     case "time_limit":
@@ -2145,6 +2162,7 @@ var SYSTEM_ASSERTION_IDS = {
   SKILL_WAS_CALLED: "system:skill_was_called",
   BUILD_PASSED: "system:build_passed",
   TIME_LIMIT: "system:time_limit",
+  COST: "system:cost",
   LLM_JUDGE: "system:llm_judge"
 };
 function isSystemAssertionId(id) {
@@ -2216,6 +2234,21 @@ var SYSTEM_ASSERTIONS = {
       }
     ]
   },
+  [SYSTEM_ASSERTION_IDS.COST]: {
+    id: SYSTEM_ASSERTION_IDS.COST,
+    name: "Cost",
+    description: "Check that the scenario LLM execution cost stays within a USD threshold",
+    type: "cost",
+    parameters: [
+      {
+        name: "maxCostUsd",
+        label: "Max Cost (USD)",
+        type: "number",
+        required: true,
+        defaultValue: 1
+      }
+    ]
+  },
   [SYSTEM_ASSERTION_IDS.LLM_JUDGE]: {
     id: SYSTEM_ASSERTION_IDS.LLM_JUDGE,
     name: "LLM Judge",
@@ -2289,6 +2322,8 @@ function getSystemAssertion(id) {
   ClaudeModelSchema,
   CommandExecutionSchema,
   CommandExecutionTestSchema,
+  CostAssertionSchema,
+  CostConfigSchema,
   CreateAgentInputSchema,
   CreateCustomAssertionInputSchema,
   CreateEvalRunInputSchema,