@wix/evalforge-types 0.35.0 → 0.37.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/index.js +71 -1
- package/build/index.js.map +2 -2
- package/build/index.mjs +67 -1
- package/build/index.mjs.map +2 -2
- package/build/types/assertion/assertion.d.ts +52 -0
- package/build/types/assertion/system-assertions.d.ts +2 -0
- package/build/types/scenario/assertions.d.ts +24 -0
- package/build/types/scenario/test-scenario.d.ts +18 -0
- package/package.json +2 -2
package/build/index.js
CHANGED
|
@@ -937,6 +937,8 @@ __export(index_exports, {
|
|
|
937
937
|
ClaudeModelSchema: () => ClaudeModelSchema,
|
|
938
938
|
CommandExecutionSchema: () => CommandExecutionSchema,
|
|
939
939
|
CommandExecutionTestSchema: () => CommandExecutionTestSchema,
|
|
940
|
+
CostAssertionSchema: () => CostAssertionSchema,
|
|
941
|
+
CostConfigSchema: () => CostConfigSchema,
|
|
940
942
|
CreateAgentInputSchema: () => CreateAgentInputSchema,
|
|
941
943
|
CreateCustomAssertionInputSchema: () => CreateCustomAssertionInputSchema,
|
|
942
944
|
CreateEvalRunInputSchema: () => CreateEvalRunInputSchema,
|
|
@@ -1025,6 +1027,8 @@ __export(index_exports, {
|
|
|
1025
1027
|
TestSuiteSchema: () => TestSuiteSchema,
|
|
1026
1028
|
TestType: () => TestType,
|
|
1027
1029
|
TestTypeSchema: () => TestTypeSchema,
|
|
1030
|
+
TimeAssertionSchema: () => TimeAssertionSchema,
|
|
1031
|
+
TimeConfigSchema: () => TimeConfigSchema,
|
|
1028
1032
|
TokenUsageSchema: () => TokenUsageSchema,
|
|
1029
1033
|
ToolTestSchema: () => ToolTestSchema,
|
|
1030
1034
|
TriggerMetadataSchema: () => TriggerMetadataSchema,
|
|
@@ -1469,6 +1473,11 @@ var BuildPassedAssertionSchema = import_zod20.z.object({
|
|
|
1469
1473
|
/** Expected exit code (default: 0) */
|
|
1470
1474
|
expectedExitCode: import_zod20.z.number().int().optional()
|
|
1471
1475
|
});
|
|
1476
|
+
var CostAssertionSchema = import_zod20.z.object({
|
|
1477
|
+
type: import_zod20.z.literal("cost"),
|
|
1478
|
+
/** Maximum allowed cost in USD */
|
|
1479
|
+
maxCostUsd: import_zod20.z.number().positive()
|
|
1480
|
+
});
|
|
1472
1481
|
var LlmJudgeAssertionSchema = import_zod20.z.object({
|
|
1473
1482
|
type: import_zod20.z.literal("llm_judge"),
|
|
1474
1483
|
/** Prompt template; placeholders: {{output}}, {{cwd}}, {{changedFiles}}, {{trace}} */
|
|
@@ -1482,9 +1491,16 @@ var LlmJudgeAssertionSchema = import_zod20.z.object({
|
|
|
1482
1491
|
maxTokens: import_zod20.z.number().int().optional(),
|
|
1483
1492
|
temperature: import_zod20.z.number().min(0).max(1).optional()
|
|
1484
1493
|
});
|
|
1494
|
+
var TimeAssertionSchema = import_zod20.z.object({
|
|
1495
|
+
type: import_zod20.z.literal("time_limit"),
|
|
1496
|
+
/** Maximum allowed duration in milliseconds */
|
|
1497
|
+
maxDurationMs: import_zod20.z.number().int().positive()
|
|
1498
|
+
});
|
|
1485
1499
|
var AssertionSchema = import_zod20.z.union([
|
|
1486
1500
|
SkillWasCalledAssertionSchema,
|
|
1487
1501
|
BuildPassedAssertionSchema,
|
|
1502
|
+
TimeAssertionSchema,
|
|
1503
|
+
CostAssertionSchema,
|
|
1488
1504
|
LlmJudgeAssertionSchema
|
|
1489
1505
|
]);
|
|
1490
1506
|
|
|
@@ -1530,6 +1546,8 @@ var import_zod22 = require("zod");
|
|
|
1530
1546
|
var AssertionTypeSchema = import_zod22.z.enum([
|
|
1531
1547
|
"skill_was_called",
|
|
1532
1548
|
"build_passed",
|
|
1549
|
+
"time_limit",
|
|
1550
|
+
"cost",
|
|
1533
1551
|
"llm_judge"
|
|
1534
1552
|
]);
|
|
1535
1553
|
var AssertionParameterTypeSchema = import_zod22.z.enum([
|
|
@@ -1564,12 +1582,20 @@ var SkillWasCalledConfigSchema = import_zod22.z.object({
|
|
|
1564
1582
|
/** Names of the skills that must have been called */
|
|
1565
1583
|
skillNames: import_zod22.z.array(import_zod22.z.string().min(1)).min(1)
|
|
1566
1584
|
});
|
|
1585
|
+
var CostConfigSchema = import_zod22.z.strictObject({
|
|
1586
|
+
/** Maximum allowed cost in USD */
|
|
1587
|
+
maxCostUsd: import_zod22.z.number().positive()
|
|
1588
|
+
});
|
|
1567
1589
|
var BuildPassedConfigSchema = import_zod22.z.strictObject({
|
|
1568
1590
|
/** Command to run (default: "yarn build") */
|
|
1569
1591
|
command: import_zod22.z.string().optional(),
|
|
1570
1592
|
/** Expected exit code (default: 0) */
|
|
1571
1593
|
expectedExitCode: import_zod22.z.number().int().optional()
|
|
1572
1594
|
});
|
|
1595
|
+
var TimeConfigSchema = import_zod22.z.strictObject({
|
|
1596
|
+
/** Maximum allowed duration in milliseconds */
|
|
1597
|
+
maxDurationMs: import_zod22.z.number().int().positive()
|
|
1598
|
+
});
|
|
1573
1599
|
var LlmJudgeConfigSchema = import_zod22.z.object({
|
|
1574
1600
|
/**
|
|
1575
1601
|
* Prompt template with placeholders:
|
|
@@ -1599,7 +1625,11 @@ var AssertionConfigSchema = import_zod22.z.union([
|
|
|
1599
1625
|
LlmJudgeConfigSchema,
|
|
1600
1626
|
// requires prompt - check first
|
|
1601
1627
|
SkillWasCalledConfigSchema,
|
|
1602
|
-
// requires
|
|
1628
|
+
// requires skillNames
|
|
1629
|
+
TimeConfigSchema,
|
|
1630
|
+
// requires maxDurationMs, uses strictObject
|
|
1631
|
+
CostConfigSchema,
|
|
1632
|
+
// requires maxCostUsd, uses strictObject
|
|
1603
1633
|
BuildPassedConfigSchema,
|
|
1604
1634
|
// all optional, uses strictObject to reject unknown keys
|
|
1605
1635
|
import_zod22.z.object({})
|
|
@@ -1622,8 +1652,12 @@ function validateAssertionConfig(type, config) {
|
|
|
1622
1652
|
switch (type) {
|
|
1623
1653
|
case "skill_was_called":
|
|
1624
1654
|
return SkillWasCalledConfigSchema.safeParse(config).success;
|
|
1655
|
+
case "cost":
|
|
1656
|
+
return CostConfigSchema.safeParse(config).success;
|
|
1625
1657
|
case "build_passed":
|
|
1626
1658
|
return BuildPassedConfigSchema.safeParse(config).success;
|
|
1659
|
+
case "time_limit":
|
|
1660
|
+
return TimeConfigSchema.safeParse(config).success;
|
|
1627
1661
|
case "llm_judge":
|
|
1628
1662
|
return LlmJudgeConfigSchema.safeParse(config).success;
|
|
1629
1663
|
default:
|
|
@@ -2127,6 +2161,8 @@ var UpdateTemplateInputSchema = CreateTemplateInputSchema.partial();
|
|
|
2127
2161
|
var SYSTEM_ASSERTION_IDS = {
|
|
2128
2162
|
SKILL_WAS_CALLED: "system:skill_was_called",
|
|
2129
2163
|
BUILD_PASSED: "system:build_passed",
|
|
2164
|
+
TIME_LIMIT: "system:time_limit",
|
|
2165
|
+
COST: "system:cost",
|
|
2130
2166
|
LLM_JUDGE: "system:llm_judge"
|
|
2131
2167
|
};
|
|
2132
2168
|
function isSystemAssertionId(id) {
|
|
@@ -2183,6 +2219,36 @@ var SYSTEM_ASSERTIONS = {
|
|
|
2183
2219
|
}
|
|
2184
2220
|
]
|
|
2185
2221
|
},
|
|
2222
|
+
[SYSTEM_ASSERTION_IDS.TIME_LIMIT]: {
|
|
2223
|
+
id: SYSTEM_ASSERTION_IDS.TIME_LIMIT,
|
|
2224
|
+
name: "Time Limit",
|
|
2225
|
+
description: "Check that the scenario completed within a maximum duration",
|
|
2226
|
+
type: "time_limit",
|
|
2227
|
+
parameters: [
|
|
2228
|
+
{
|
|
2229
|
+
name: "maxDurationMs",
|
|
2230
|
+
label: "Max Duration (ms)",
|
|
2231
|
+
type: "number",
|
|
2232
|
+
required: true,
|
|
2233
|
+
defaultValue: 3e5
|
|
2234
|
+
}
|
|
2235
|
+
]
|
|
2236
|
+
},
|
|
2237
|
+
[SYSTEM_ASSERTION_IDS.COST]: {
|
|
2238
|
+
id: SYSTEM_ASSERTION_IDS.COST,
|
|
2239
|
+
name: "Cost",
|
|
2240
|
+
description: "Check that the scenario LLM execution cost stays within a USD threshold",
|
|
2241
|
+
type: "cost",
|
|
2242
|
+
parameters: [
|
|
2243
|
+
{
|
|
2244
|
+
name: "maxCostUsd",
|
|
2245
|
+
label: "Max Cost (USD)",
|
|
2246
|
+
type: "number",
|
|
2247
|
+
required: true,
|
|
2248
|
+
defaultValue: 1
|
|
2249
|
+
}
|
|
2250
|
+
]
|
|
2251
|
+
},
|
|
2186
2252
|
[SYSTEM_ASSERTION_IDS.LLM_JUDGE]: {
|
|
2187
2253
|
id: SYSTEM_ASSERTION_IDS.LLM_JUDGE,
|
|
2188
2254
|
name: "LLM Judge",
|
|
@@ -2256,6 +2322,8 @@ function getSystemAssertion(id) {
|
|
|
2256
2322
|
ClaudeModelSchema,
|
|
2257
2323
|
CommandExecutionSchema,
|
|
2258
2324
|
CommandExecutionTestSchema,
|
|
2325
|
+
CostAssertionSchema,
|
|
2326
|
+
CostConfigSchema,
|
|
2259
2327
|
CreateAgentInputSchema,
|
|
2260
2328
|
CreateCustomAssertionInputSchema,
|
|
2261
2329
|
CreateEvalRunInputSchema,
|
|
@@ -2344,6 +2412,8 @@ function getSystemAssertion(id) {
|
|
|
2344
2412
|
TestSuiteSchema,
|
|
2345
2413
|
TestType,
|
|
2346
2414
|
TestTypeSchema,
|
|
2415
|
+
TimeAssertionSchema,
|
|
2416
|
+
TimeConfigSchema,
|
|
2347
2417
|
TokenUsageSchema,
|
|
2348
2418
|
ToolTestSchema,
|
|
2349
2419
|
TriggerMetadataSchema,
|