@wix/evalforge-types 0.36.0 → 0.37.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/index.js +35 -0
- package/build/index.js.map +2 -2
- package/build/index.mjs +33 -0
- package/build/index.mjs.map +2 -2
- package/build/types/assertion/assertion.d.ts +26 -0
- package/build/types/assertion/system-assertions.d.ts +1 -0
- package/build/types/scenario/assertions.d.ts +12 -0
- package/build/types/scenario/test-scenario.d.ts +9 -0
- package/package.json +2 -2
package/build/index.js
CHANGED
|
@@ -937,6 +937,8 @@ __export(index_exports, {
|
|
|
937
937
|
ClaudeModelSchema: () => ClaudeModelSchema,
|
|
938
938
|
CommandExecutionSchema: () => CommandExecutionSchema,
|
|
939
939
|
CommandExecutionTestSchema: () => CommandExecutionTestSchema,
|
|
940
|
+
CostAssertionSchema: () => CostAssertionSchema,
|
|
941
|
+
CostConfigSchema: () => CostConfigSchema,
|
|
940
942
|
CreateAgentInputSchema: () => CreateAgentInputSchema,
|
|
941
943
|
CreateCustomAssertionInputSchema: () => CreateCustomAssertionInputSchema,
|
|
942
944
|
CreateEvalRunInputSchema: () => CreateEvalRunInputSchema,
|
|
@@ -1471,6 +1473,11 @@ var BuildPassedAssertionSchema = import_zod20.z.object({
|
|
|
1471
1473
|
/** Expected exit code (default: 0) */
|
|
1472
1474
|
expectedExitCode: import_zod20.z.number().int().optional()
|
|
1473
1475
|
});
|
|
1476
|
+
var CostAssertionSchema = import_zod20.z.object({
|
|
1477
|
+
type: import_zod20.z.literal("cost"),
|
|
1478
|
+
/** Maximum allowed cost in USD */
|
|
1479
|
+
maxCostUsd: import_zod20.z.number().positive()
|
|
1480
|
+
});
|
|
1474
1481
|
var LlmJudgeAssertionSchema = import_zod20.z.object({
|
|
1475
1482
|
type: import_zod20.z.literal("llm_judge"),
|
|
1476
1483
|
/** Prompt template; placeholders: {{output}}, {{cwd}}, {{changedFiles}}, {{trace}} */
|
|
@@ -1493,6 +1500,7 @@ var AssertionSchema = import_zod20.z.union([
|
|
|
1493
1500
|
SkillWasCalledAssertionSchema,
|
|
1494
1501
|
BuildPassedAssertionSchema,
|
|
1495
1502
|
TimeAssertionSchema,
|
|
1503
|
+
CostAssertionSchema,
|
|
1496
1504
|
LlmJudgeAssertionSchema
|
|
1497
1505
|
]);
|
|
1498
1506
|
|
|
@@ -1539,6 +1547,7 @@ var AssertionTypeSchema = import_zod22.z.enum([
|
|
|
1539
1547
|
"skill_was_called",
|
|
1540
1548
|
"build_passed",
|
|
1541
1549
|
"time_limit",
|
|
1550
|
+
"cost",
|
|
1542
1551
|
"llm_judge"
|
|
1543
1552
|
]);
|
|
1544
1553
|
var AssertionParameterTypeSchema = import_zod22.z.enum([
|
|
@@ -1573,6 +1582,10 @@ var SkillWasCalledConfigSchema = import_zod22.z.object({
|
|
|
1573
1582
|
/** Names of the skills that must have been called */
|
|
1574
1583
|
skillNames: import_zod22.z.array(import_zod22.z.string().min(1)).min(1)
|
|
1575
1584
|
});
|
|
1585
|
+
var CostConfigSchema = import_zod22.z.strictObject({
|
|
1586
|
+
/** Maximum allowed cost in USD */
|
|
1587
|
+
maxCostUsd: import_zod22.z.number().positive()
|
|
1588
|
+
});
|
|
1576
1589
|
var BuildPassedConfigSchema = import_zod22.z.strictObject({
|
|
1577
1590
|
/** Command to run (default: "yarn build") */
|
|
1578
1591
|
command: import_zod22.z.string().optional(),
|
|
@@ -1615,6 +1628,8 @@ var AssertionConfigSchema = import_zod22.z.union([
|
|
|
1615
1628
|
// requires skillNames
|
|
1616
1629
|
TimeConfigSchema,
|
|
1617
1630
|
// requires maxDurationMs, uses strictObject
|
|
1631
|
+
CostConfigSchema,
|
|
1632
|
+
// requires maxCostUsd, uses strictObject
|
|
1618
1633
|
BuildPassedConfigSchema,
|
|
1619
1634
|
// all optional, uses strictObject to reject unknown keys
|
|
1620
1635
|
import_zod22.z.object({})
|
|
@@ -1637,6 +1652,8 @@ function validateAssertionConfig(type, config) {
|
|
|
1637
1652
|
switch (type) {
|
|
1638
1653
|
case "skill_was_called":
|
|
1639
1654
|
return SkillWasCalledConfigSchema.safeParse(config).success;
|
|
1655
|
+
case "cost":
|
|
1656
|
+
return CostConfigSchema.safeParse(config).success;
|
|
1640
1657
|
case "build_passed":
|
|
1641
1658
|
return BuildPassedConfigSchema.safeParse(config).success;
|
|
1642
1659
|
case "time_limit":
|
|
@@ -2145,6 +2162,7 @@ var SYSTEM_ASSERTION_IDS = {
|
|
|
2145
2162
|
SKILL_WAS_CALLED: "system:skill_was_called",
|
|
2146
2163
|
BUILD_PASSED: "system:build_passed",
|
|
2147
2164
|
TIME_LIMIT: "system:time_limit",
|
|
2165
|
+
COST: "system:cost",
|
|
2148
2166
|
LLM_JUDGE: "system:llm_judge"
|
|
2149
2167
|
};
|
|
2150
2168
|
function isSystemAssertionId(id) {
|
|
@@ -2216,6 +2234,21 @@ var SYSTEM_ASSERTIONS = {
|
|
|
2216
2234
|
}
|
|
2217
2235
|
]
|
|
2218
2236
|
},
|
|
2237
|
+
[SYSTEM_ASSERTION_IDS.COST]: {
|
|
2238
|
+
id: SYSTEM_ASSERTION_IDS.COST,
|
|
2239
|
+
name: "Cost",
|
|
2240
|
+
description: "Check that the scenario LLM execution cost stays within a USD threshold",
|
|
2241
|
+
type: "cost",
|
|
2242
|
+
parameters: [
|
|
2243
|
+
{
|
|
2244
|
+
name: "maxCostUsd",
|
|
2245
|
+
label: "Max Cost (USD)",
|
|
2246
|
+
type: "number",
|
|
2247
|
+
required: true,
|
|
2248
|
+
defaultValue: 1
|
|
2249
|
+
}
|
|
2250
|
+
]
|
|
2251
|
+
},
|
|
2219
2252
|
[SYSTEM_ASSERTION_IDS.LLM_JUDGE]: {
|
|
2220
2253
|
id: SYSTEM_ASSERTION_IDS.LLM_JUDGE,
|
|
2221
2254
|
name: "LLM Judge",
|
|
@@ -2289,6 +2322,8 @@ function getSystemAssertion(id) {
|
|
|
2289
2322
|
ClaudeModelSchema,
|
|
2290
2323
|
CommandExecutionSchema,
|
|
2291
2324
|
CommandExecutionTestSchema,
|
|
2325
|
+
CostAssertionSchema,
|
|
2326
|
+
CostConfigSchema,
|
|
2292
2327
|
CreateAgentInputSchema,
|
|
2293
2328
|
CreateCustomAssertionInputSchema,
|
|
2294
2329
|
CreateEvalRunInputSchema,
|