@wix/evalforge-types 0.60.0 → 0.62.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/index.js +114 -13
- package/build/index.js.map +2 -2
- package/build/index.mjs +112 -13
- package/build/index.mjs.map +2 -2
- package/build/types/assertion/assertion.d.ts +90 -7
- package/build/types/assertion/system-assertions.d.ts +1 -0
- package/build/types/scenario/test-scenario.d.ts +60 -6
- package/package.json +2 -2
package/build/index.mjs
CHANGED
|
@@ -592,7 +592,8 @@ var AssertionTypeSchema = z22.enum([
|
|
|
592
592
|
"build_passed",
|
|
593
593
|
"time_limit",
|
|
594
594
|
"cost",
|
|
595
|
-
"llm_judge"
|
|
595
|
+
"llm_judge",
|
|
596
|
+
"api_call"
|
|
596
597
|
]);
|
|
597
598
|
var AssertionParameterTypeSchema = z22.enum([
|
|
598
599
|
"string",
|
|
@@ -633,8 +634,8 @@ var CostConfigSchema = z22.strictObject({
|
|
|
633
634
|
var ToolCalledWithParamConfigSchema = z22.strictObject({
|
|
634
635
|
/** Name of the tool that must have been called */
|
|
635
636
|
toolName: z22.string().min(1),
|
|
636
|
-
/** JSON string of key-value pairs for expected parameters (substring match) */
|
|
637
|
-
expectedParams: z22.string().min(1),
|
|
637
|
+
/** JSON string of key-value pairs for expected parameters (substring match). Optional — when omitted, only checks tool presence. */
|
|
638
|
+
expectedParams: z22.string().min(1).optional(),
|
|
638
639
|
/** If true, the matching tool call must also have succeeded (step.success === true) */
|
|
639
640
|
requireSuccess: z22.boolean().optional()
|
|
640
641
|
});
|
|
@@ -671,23 +672,51 @@ var LlmJudgeConfigSchema = z22.object({
|
|
|
671
672
|
/** User-defined parameters for this assertion */
|
|
672
673
|
parameters: z22.array(AssertionParameterSchema).optional()
|
|
673
674
|
});
|
|
675
|
+
var ApiCallConfigSchema = z22.strictObject({
|
|
676
|
+
/** URL to call */
|
|
677
|
+
url: z22.string().min(1),
|
|
678
|
+
/** HTTP method (default GET) */
|
|
679
|
+
method: z22.enum(["GET", "POST"]).optional(),
|
|
680
|
+
/** Request body (JSON string, for POST requests) */
|
|
681
|
+
requestBody: z22.string().optional(),
|
|
682
|
+
/** Expected JSON response to validate against (subset match — extra fields in actual are OK) */
|
|
683
|
+
expectedResponse: z22.string().min(1),
|
|
684
|
+
/** Request headers as JSON string of key-value pairs */
|
|
685
|
+
requestHeaders: z22.string().optional(),
|
|
686
|
+
/** Request timeout in milliseconds (default 30000) */
|
|
687
|
+
timeoutMs: z22.number().int().positive().optional()
|
|
688
|
+
});
|
|
689
|
+
var AssertionBaseFields = {
|
|
690
|
+
/** When true, the assertion's pass/fail logic is inverted (NOT operator). */
|
|
691
|
+
negate: z22.boolean().optional()
|
|
692
|
+
};
|
|
674
693
|
var SkillWasCalledAssertionSchema = SkillWasCalledConfigSchema.extend({
|
|
675
|
-
type: z22.literal("skill_was_called")
|
|
694
|
+
type: z22.literal("skill_was_called"),
|
|
695
|
+
...AssertionBaseFields
|
|
676
696
|
});
|
|
677
697
|
var ToolCalledWithParamAssertionSchema = ToolCalledWithParamConfigSchema.extend({
|
|
678
|
-
type: z22.literal("tool_called_with_param")
|
|
698
|
+
type: z22.literal("tool_called_with_param"),
|
|
699
|
+
...AssertionBaseFields
|
|
679
700
|
});
|
|
680
701
|
var BuildPassedAssertionSchema = BuildPassedConfigSchema.extend({
|
|
681
|
-
type: z22.literal("build_passed")
|
|
702
|
+
type: z22.literal("build_passed"),
|
|
703
|
+
...AssertionBaseFields
|
|
682
704
|
});
|
|
683
705
|
var CostAssertionSchema = CostConfigSchema.extend({
|
|
684
|
-
type: z22.literal("cost")
|
|
706
|
+
type: z22.literal("cost"),
|
|
707
|
+
...AssertionBaseFields
|
|
685
708
|
});
|
|
686
709
|
var LlmJudgeAssertionSchema = LlmJudgeConfigSchema.extend({
|
|
687
|
-
type: z22.literal("llm_judge")
|
|
710
|
+
type: z22.literal("llm_judge"),
|
|
711
|
+
...AssertionBaseFields
|
|
712
|
+
});
|
|
713
|
+
var ApiCallAssertionSchema = ApiCallConfigSchema.extend({
|
|
714
|
+
type: z22.literal("api_call"),
|
|
715
|
+
...AssertionBaseFields
|
|
688
716
|
});
|
|
689
717
|
var TimeAssertionSchema = TimeConfigSchema.extend({
|
|
690
|
-
type: z22.literal("time_limit")
|
|
718
|
+
type: z22.literal("time_limit"),
|
|
719
|
+
...AssertionBaseFields
|
|
691
720
|
});
|
|
692
721
|
var AssertionSchema = z22.union([
|
|
693
722
|
SkillWasCalledAssertionSchema,
|
|
@@ -695,7 +724,8 @@ var AssertionSchema = z22.union([
|
|
|
695
724
|
BuildPassedAssertionSchema,
|
|
696
725
|
TimeAssertionSchema,
|
|
697
726
|
CostAssertionSchema,
|
|
698
|
-
LlmJudgeAssertionSchema
|
|
727
|
+
LlmJudgeAssertionSchema,
|
|
728
|
+
ApiCallAssertionSchema
|
|
699
729
|
]);
|
|
700
730
|
var AssertionConfigSchema = z22.union([
|
|
701
731
|
LlmJudgeConfigSchema,
|
|
@@ -703,7 +733,9 @@ var AssertionConfigSchema = z22.union([
|
|
|
703
733
|
SkillWasCalledConfigSchema,
|
|
704
734
|
// requires skillNames
|
|
705
735
|
ToolCalledWithParamConfigSchema,
|
|
706
|
-
// requires toolName
|
|
736
|
+
// requires toolName, uses strictObject
|
|
737
|
+
ApiCallConfigSchema,
|
|
738
|
+
// requires url + expectedResponse, uses strictObject
|
|
707
739
|
TimeConfigSchema,
|
|
708
740
|
// requires maxDurationMs, uses strictObject
|
|
709
741
|
CostConfigSchema,
|
|
@@ -727,6 +759,8 @@ function validateAssertionConfig(type, config) {
|
|
|
727
759
|
return TimeConfigSchema.safeParse(config).success;
|
|
728
760
|
case "llm_judge":
|
|
729
761
|
return LlmJudgeConfigSchema.safeParse(config).success;
|
|
762
|
+
case "api_call":
|
|
763
|
+
return ApiCallConfigSchema.safeParse(config).success;
|
|
730
764
|
default:
|
|
731
765
|
return false;
|
|
732
766
|
}
|
|
@@ -1398,7 +1432,8 @@ var SYSTEM_ASSERTION_IDS = {
|
|
|
1398
1432
|
BUILD_PASSED: "system:build_passed",
|
|
1399
1433
|
TIME_LIMIT: "system:time_limit",
|
|
1400
1434
|
COST: "system:cost",
|
|
1401
|
-
LLM_JUDGE: "system:llm_judge"
|
|
1435
|
+
LLM_JUDGE: "system:llm_judge",
|
|
1436
|
+
API_CALL: "system:api_call"
|
|
1402
1437
|
};
|
|
1403
1438
|
function isSystemAssertionId(id) {
|
|
1404
1439
|
return id.startsWith("system:");
|
|
@@ -1415,6 +1450,13 @@ var SYSTEM_ASSERTIONS = {
|
|
|
1415
1450
|
label: "Skills",
|
|
1416
1451
|
type: "string",
|
|
1417
1452
|
required: true
|
|
1453
|
+
},
|
|
1454
|
+
{
|
|
1455
|
+
name: "negate",
|
|
1456
|
+
label: "Negate (NOT operator)",
|
|
1457
|
+
type: "boolean",
|
|
1458
|
+
required: false,
|
|
1459
|
+
defaultValue: false
|
|
1418
1460
|
}
|
|
1419
1461
|
]
|
|
1420
1462
|
},
|
|
@@ -1434,7 +1476,7 @@ var SYSTEM_ASSERTIONS = {
|
|
|
1434
1476
|
name: "expectedParams",
|
|
1435
1477
|
label: "Expected Parameters (JSON, substring match)",
|
|
1436
1478
|
type: "string",
|
|
1437
|
-
required:
|
|
1479
|
+
required: false
|
|
1438
1480
|
},
|
|
1439
1481
|
{
|
|
1440
1482
|
name: "requireSuccess",
|
|
@@ -1443,6 +1485,13 @@ var SYSTEM_ASSERTIONS = {
|
|
|
1443
1485
|
required: false,
|
|
1444
1486
|
defaultValue: false,
|
|
1445
1487
|
advanced: true
|
|
1488
|
+
},
|
|
1489
|
+
{
|
|
1490
|
+
name: "negate",
|
|
1491
|
+
label: "Negate (NOT operator)",
|
|
1492
|
+
type: "boolean",
|
|
1493
|
+
required: false,
|
|
1494
|
+
defaultValue: false
|
|
1446
1495
|
}
|
|
1447
1496
|
]
|
|
1448
1497
|
},
|
|
@@ -1539,6 +1588,54 @@ var SYSTEM_ASSERTIONS = {
|
|
|
1539
1588
|
required: false
|
|
1540
1589
|
}
|
|
1541
1590
|
]
|
|
1591
|
+
},
|
|
1592
|
+
[SYSTEM_ASSERTION_IDS.API_CALL]: {
|
|
1593
|
+
id: SYSTEM_ASSERTION_IDS.API_CALL,
|
|
1594
|
+
name: "API Call",
|
|
1595
|
+
description: "Call an API endpoint and verify the response contains expected data",
|
|
1596
|
+
type: "api_call",
|
|
1597
|
+
parameters: [
|
|
1598
|
+
{
|
|
1599
|
+
name: "url",
|
|
1600
|
+
label: "URL",
|
|
1601
|
+
type: "string",
|
|
1602
|
+
required: true
|
|
1603
|
+
},
|
|
1604
|
+
{
|
|
1605
|
+
name: "method",
|
|
1606
|
+
label: "HTTP Method",
|
|
1607
|
+
type: "string",
|
|
1608
|
+
required: false,
|
|
1609
|
+
defaultValue: "GET"
|
|
1610
|
+
},
|
|
1611
|
+
{
|
|
1612
|
+
name: "requestBody",
|
|
1613
|
+
label: "Request Body (JSON)",
|
|
1614
|
+
type: "string",
|
|
1615
|
+
required: false
|
|
1616
|
+
},
|
|
1617
|
+
{
|
|
1618
|
+
name: "expectedResponse",
|
|
1619
|
+
label: "Expected Response (JSON)",
|
|
1620
|
+
type: "string",
|
|
1621
|
+
required: true
|
|
1622
|
+
},
|
|
1623
|
+
{
|
|
1624
|
+
name: "requestHeaders",
|
|
1625
|
+
label: "Headers (JSON)",
|
|
1626
|
+
type: "string",
|
|
1627
|
+
required: false,
|
|
1628
|
+
advanced: true
|
|
1629
|
+
},
|
|
1630
|
+
{
|
|
1631
|
+
name: "timeoutMs",
|
|
1632
|
+
label: "Timeout (ms)",
|
|
1633
|
+
type: "number",
|
|
1634
|
+
required: false,
|
|
1635
|
+
defaultValue: 3e4,
|
|
1636
|
+
advanced: true
|
|
1637
|
+
}
|
|
1638
|
+
]
|
|
1542
1639
|
}
|
|
1543
1640
|
};
|
|
1544
1641
|
function getSystemAssertions() {
|
|
@@ -1561,6 +1658,8 @@ export {
|
|
|
1561
1658
|
AgentTypeSchema,
|
|
1562
1659
|
AllowedCommands,
|
|
1563
1660
|
AnyModelSchema,
|
|
1661
|
+
ApiCallAssertionSchema,
|
|
1662
|
+
ApiCallConfigSchema,
|
|
1564
1663
|
ApiCallSchema,
|
|
1565
1664
|
AssertionConfigSchema,
|
|
1566
1665
|
AssertionParameterSchema,
|