@wix/evalforge-types 0.36.0 → 0.37.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/build/index.js CHANGED
@@ -937,6 +937,8 @@ __export(index_exports, {
937
937
  ClaudeModelSchema: () => ClaudeModelSchema,
938
938
  CommandExecutionSchema: () => CommandExecutionSchema,
939
939
  CommandExecutionTestSchema: () => CommandExecutionTestSchema,
940
+ CostAssertionSchema: () => CostAssertionSchema,
941
+ CostConfigSchema: () => CostConfigSchema,
940
942
  CreateAgentInputSchema: () => CreateAgentInputSchema,
941
943
  CreateCustomAssertionInputSchema: () => CreateCustomAssertionInputSchema,
942
944
  CreateEvalRunInputSchema: () => CreateEvalRunInputSchema,
@@ -1471,6 +1473,11 @@ var BuildPassedAssertionSchema = import_zod20.z.object({
1471
1473
  /** Expected exit code (default: 0) */
1472
1474
  expectedExitCode: import_zod20.z.number().int().optional()
1473
1475
  });
1476
+ var CostAssertionSchema = import_zod20.z.object({
1477
+ type: import_zod20.z.literal("cost"),
1478
+ /** Maximum allowed cost in USD */
1479
+ maxCostUsd: import_zod20.z.number().positive()
1480
+ });
1474
1481
  var LlmJudgeAssertionSchema = import_zod20.z.object({
1475
1482
  type: import_zod20.z.literal("llm_judge"),
1476
1483
  /** Prompt template; placeholders: {{output}}, {{cwd}}, {{changedFiles}}, {{trace}} */
@@ -1493,6 +1500,7 @@ var AssertionSchema = import_zod20.z.union([
1493
1500
  SkillWasCalledAssertionSchema,
1494
1501
  BuildPassedAssertionSchema,
1495
1502
  TimeAssertionSchema,
1503
+ CostAssertionSchema,
1496
1504
  LlmJudgeAssertionSchema
1497
1505
  ]);
1498
1506
 
@@ -1539,6 +1547,7 @@ var AssertionTypeSchema = import_zod22.z.enum([
1539
1547
  "skill_was_called",
1540
1548
  "build_passed",
1541
1549
  "time_limit",
1550
+ "cost",
1542
1551
  "llm_judge"
1543
1552
  ]);
1544
1553
  var AssertionParameterTypeSchema = import_zod22.z.enum([
@@ -1573,6 +1582,10 @@ var SkillWasCalledConfigSchema = import_zod22.z.object({
1573
1582
  /** Names of the skills that must have been called */
1574
1583
  skillNames: import_zod22.z.array(import_zod22.z.string().min(1)).min(1)
1575
1584
  });
1585
+ var CostConfigSchema = import_zod22.z.strictObject({
1586
+ /** Maximum allowed cost in USD */
1587
+ maxCostUsd: import_zod22.z.number().positive()
1588
+ });
1576
1589
  var BuildPassedConfigSchema = import_zod22.z.strictObject({
1577
1590
  /** Command to run (default: "yarn build") */
1578
1591
  command: import_zod22.z.string().optional(),
@@ -1615,6 +1628,8 @@ var AssertionConfigSchema = import_zod22.z.union([
1615
1628
  // requires skillNames
1616
1629
  TimeConfigSchema,
1617
1630
  // requires maxDurationMs, uses strictObject
1631
+ CostConfigSchema,
1632
+ // requires maxCostUsd, uses strictObject
1618
1633
  BuildPassedConfigSchema,
1619
1634
  // all optional, uses strictObject to reject unknown keys
1620
1635
  import_zod22.z.object({})
@@ -1637,6 +1652,8 @@ function validateAssertionConfig(type, config) {
1637
1652
  switch (type) {
1638
1653
  case "skill_was_called":
1639
1654
  return SkillWasCalledConfigSchema.safeParse(config).success;
1655
+ case "cost":
1656
+ return CostConfigSchema.safeParse(config).success;
1640
1657
  case "build_passed":
1641
1658
  return BuildPassedConfigSchema.safeParse(config).success;
1642
1659
  case "time_limit":
@@ -2145,6 +2162,7 @@ var SYSTEM_ASSERTION_IDS = {
2145
2162
  SKILL_WAS_CALLED: "system:skill_was_called",
2146
2163
  BUILD_PASSED: "system:build_passed",
2147
2164
  TIME_LIMIT: "system:time_limit",
2165
+ COST: "system:cost",
2148
2166
  LLM_JUDGE: "system:llm_judge"
2149
2167
  };
2150
2168
  function isSystemAssertionId(id) {
@@ -2216,6 +2234,21 @@ var SYSTEM_ASSERTIONS = {
2216
2234
  }
2217
2235
  ]
2218
2236
  },
2237
+ [SYSTEM_ASSERTION_IDS.COST]: {
2238
+ id: SYSTEM_ASSERTION_IDS.COST,
2239
+ name: "Cost",
2240
+ description: "Check that the scenario LLM execution cost stays within a USD threshold",
2241
+ type: "cost",
2242
+ parameters: [
2243
+ {
2244
+ name: "maxCostUsd",
2245
+ label: "Max Cost (USD)",
2246
+ type: "number",
2247
+ required: true,
2248
+ defaultValue: 1
2249
+ }
2250
+ ]
2251
+ },
2219
2252
  [SYSTEM_ASSERTION_IDS.LLM_JUDGE]: {
2220
2253
  id: SYSTEM_ASSERTION_IDS.LLM_JUDGE,
2221
2254
  name: "LLM Judge",
@@ -2289,6 +2322,8 @@ function getSystemAssertion(id) {
2289
2322
  ClaudeModelSchema,
2290
2323
  CommandExecutionSchema,
2291
2324
  CommandExecutionTestSchema,
2325
+ CostAssertionSchema,
2326
+ CostConfigSchema,
2292
2327
  CreateAgentInputSchema,
2293
2328
  CreateCustomAssertionInputSchema,
2294
2329
  CreateEvalRunInputSchema,