@wix/evalforge-types 0.35.0 → 0.37.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/build/index.js CHANGED
@@ -937,6 +937,8 @@ __export(index_exports, {
937
937
  ClaudeModelSchema: () => ClaudeModelSchema,
938
938
  CommandExecutionSchema: () => CommandExecutionSchema,
939
939
  CommandExecutionTestSchema: () => CommandExecutionTestSchema,
940
+ CostAssertionSchema: () => CostAssertionSchema,
941
+ CostConfigSchema: () => CostConfigSchema,
940
942
  CreateAgentInputSchema: () => CreateAgentInputSchema,
941
943
  CreateCustomAssertionInputSchema: () => CreateCustomAssertionInputSchema,
942
944
  CreateEvalRunInputSchema: () => CreateEvalRunInputSchema,
@@ -1025,6 +1027,8 @@ __export(index_exports, {
1025
1027
  TestSuiteSchema: () => TestSuiteSchema,
1026
1028
  TestType: () => TestType,
1027
1029
  TestTypeSchema: () => TestTypeSchema,
1030
+ TimeAssertionSchema: () => TimeAssertionSchema,
1031
+ TimeConfigSchema: () => TimeConfigSchema,
1028
1032
  TokenUsageSchema: () => TokenUsageSchema,
1029
1033
  ToolTestSchema: () => ToolTestSchema,
1030
1034
  TriggerMetadataSchema: () => TriggerMetadataSchema,
@@ -1469,6 +1473,11 @@ var BuildPassedAssertionSchema = import_zod20.z.object({
1469
1473
  /** Expected exit code (default: 0) */
1470
1474
  expectedExitCode: import_zod20.z.number().int().optional()
1471
1475
  });
1476
+ var CostAssertionSchema = import_zod20.z.object({
1477
+ type: import_zod20.z.literal("cost"),
1478
+ /** Maximum allowed cost in USD */
1479
+ maxCostUsd: import_zod20.z.number().positive()
1480
+ });
1472
1481
  var LlmJudgeAssertionSchema = import_zod20.z.object({
1473
1482
  type: import_zod20.z.literal("llm_judge"),
1474
1483
  /** Prompt template; placeholders: {{output}}, {{cwd}}, {{changedFiles}}, {{trace}} */
@@ -1482,9 +1491,16 @@ var LlmJudgeAssertionSchema = import_zod20.z.object({
1482
1491
  maxTokens: import_zod20.z.number().int().optional(),
1483
1492
  temperature: import_zod20.z.number().min(0).max(1).optional()
1484
1493
  });
1494
+ var TimeAssertionSchema = import_zod20.z.object({
1495
+ type: import_zod20.z.literal("time_limit"),
1496
+ /** Maximum allowed duration in milliseconds */
1497
+ maxDurationMs: import_zod20.z.number().int().positive()
1498
+ });
1485
1499
  var AssertionSchema = import_zod20.z.union([
1486
1500
  SkillWasCalledAssertionSchema,
1487
1501
  BuildPassedAssertionSchema,
1502
+ TimeAssertionSchema,
1503
+ CostAssertionSchema,
1488
1504
  LlmJudgeAssertionSchema
1489
1505
  ]);
1490
1506
 
@@ -1530,6 +1546,8 @@ var import_zod22 = require("zod");
1530
1546
  var AssertionTypeSchema = import_zod22.z.enum([
1531
1547
  "skill_was_called",
1532
1548
  "build_passed",
1549
+ "time_limit",
1550
+ "cost",
1533
1551
  "llm_judge"
1534
1552
  ]);
1535
1553
  var AssertionParameterTypeSchema = import_zod22.z.enum([
@@ -1564,12 +1582,20 @@ var SkillWasCalledConfigSchema = import_zod22.z.object({
1564
1582
  /** Names of the skills that must have been called */
1565
1583
  skillNames: import_zod22.z.array(import_zod22.z.string().min(1)).min(1)
1566
1584
  });
1585
+ var CostConfigSchema = import_zod22.z.strictObject({
1586
+ /** Maximum allowed cost in USD */
1587
+ maxCostUsd: import_zod22.z.number().positive()
1588
+ });
1567
1589
  var BuildPassedConfigSchema = import_zod22.z.strictObject({
1568
1590
  /** Command to run (default: "yarn build") */
1569
1591
  command: import_zod22.z.string().optional(),
1570
1592
  /** Expected exit code (default: 0) */
1571
1593
  expectedExitCode: import_zod22.z.number().int().optional()
1572
1594
  });
1595
+ var TimeConfigSchema = import_zod22.z.strictObject({
1596
+ /** Maximum allowed duration in milliseconds */
1597
+ maxDurationMs: import_zod22.z.number().int().positive()
1598
+ });
1573
1599
  var LlmJudgeConfigSchema = import_zod22.z.object({
1574
1600
  /**
1575
1601
  * Prompt template with placeholders:
@@ -1599,7 +1625,11 @@ var AssertionConfigSchema = import_zod22.z.union([
1599
1625
  LlmJudgeConfigSchema,
1600
1626
  // requires prompt - check first
1601
1627
  SkillWasCalledConfigSchema,
1602
- // requires skillName
1628
+ // requires skillNames
1629
+ TimeConfigSchema,
1630
+ // requires maxDurationMs, uses strictObject
1631
+ CostConfigSchema,
1632
+ // requires maxCostUsd, uses strictObject
1603
1633
  BuildPassedConfigSchema,
1604
1634
  // all optional, uses strictObject to reject unknown keys
1605
1635
  import_zod22.z.object({})
@@ -1622,8 +1652,12 @@ function validateAssertionConfig(type, config) {
1622
1652
  switch (type) {
1623
1653
  case "skill_was_called":
1624
1654
  return SkillWasCalledConfigSchema.safeParse(config).success;
1655
+ case "cost":
1656
+ return CostConfigSchema.safeParse(config).success;
1625
1657
  case "build_passed":
1626
1658
  return BuildPassedConfigSchema.safeParse(config).success;
1659
+ case "time_limit":
1660
+ return TimeConfigSchema.safeParse(config).success;
1627
1661
  case "llm_judge":
1628
1662
  return LlmJudgeConfigSchema.safeParse(config).success;
1629
1663
  default:
@@ -2127,6 +2161,8 @@ var UpdateTemplateInputSchema = CreateTemplateInputSchema.partial();
2127
2161
  var SYSTEM_ASSERTION_IDS = {
2128
2162
  SKILL_WAS_CALLED: "system:skill_was_called",
2129
2163
  BUILD_PASSED: "system:build_passed",
2164
+ TIME_LIMIT: "system:time_limit",
2165
+ COST: "system:cost",
2130
2166
  LLM_JUDGE: "system:llm_judge"
2131
2167
  };
2132
2168
  function isSystemAssertionId(id) {
@@ -2183,6 +2219,36 @@ var SYSTEM_ASSERTIONS = {
2183
2219
  }
2184
2220
  ]
2185
2221
  },
2222
+ [SYSTEM_ASSERTION_IDS.TIME_LIMIT]: {
2223
+ id: SYSTEM_ASSERTION_IDS.TIME_LIMIT,
2224
+ name: "Time Limit",
2225
+ description: "Check that the scenario completed within a maximum duration",
2226
+ type: "time_limit",
2227
+ parameters: [
2228
+ {
2229
+ name: "maxDurationMs",
2230
+ label: "Max Duration (ms)",
2231
+ type: "number",
2232
+ required: true,
2233
+ defaultValue: 3e5
2234
+ }
2235
+ ]
2236
+ },
2237
+ [SYSTEM_ASSERTION_IDS.COST]: {
2238
+ id: SYSTEM_ASSERTION_IDS.COST,
2239
+ name: "Cost",
2240
+ description: "Check that the scenario LLM execution cost stays within a USD threshold",
2241
+ type: "cost",
2242
+ parameters: [
2243
+ {
2244
+ name: "maxCostUsd",
2245
+ label: "Max Cost (USD)",
2246
+ type: "number",
2247
+ required: true,
2248
+ defaultValue: 1
2249
+ }
2250
+ ]
2251
+ },
2186
2252
  [SYSTEM_ASSERTION_IDS.LLM_JUDGE]: {
2187
2253
  id: SYSTEM_ASSERTION_IDS.LLM_JUDGE,
2188
2254
  name: "LLM Judge",
@@ -2256,6 +2322,8 @@ function getSystemAssertion(id) {
2256
2322
  ClaudeModelSchema,
2257
2323
  CommandExecutionSchema,
2258
2324
  CommandExecutionTestSchema,
2325
+ CostAssertionSchema,
2326
+ CostConfigSchema,
2259
2327
  CreateAgentInputSchema,
2260
2328
  CreateCustomAssertionInputSchema,
2261
2329
  CreateEvalRunInputSchema,
@@ -2344,6 +2412,8 @@ function getSystemAssertion(id) {
2344
2412
  TestSuiteSchema,
2345
2413
  TestType,
2346
2414
  TestTypeSchema,
2415
+ TimeAssertionSchema,
2416
+ TimeConfigSchema,
2347
2417
  TokenUsageSchema,
2348
2418
  ToolTestSchema,
2349
2419
  TriggerMetadataSchema,