@wix/evalforge-types 0.60.0 → 0.62.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/index.js +114 -13
- package/build/index.js.map +2 -2
- package/build/index.mjs +112 -13
- package/build/index.mjs.map +2 -2
- package/build/types/assertion/assertion.d.ts +90 -7
- package/build/types/assertion/system-assertions.d.ts +1 -0
- package/build/types/scenario/test-scenario.d.ts +60 -6
- package/package.json +2 -2
package/build/index.js
CHANGED
|
@@ -33,6 +33,8 @@ __export(index_exports, {
|
|
|
33
33
|
AgentTypeSchema: () => AgentTypeSchema,
|
|
34
34
|
AllowedCommands: () => AllowedCommands,
|
|
35
35
|
AnyModelSchema: () => AnyModelSchema,
|
|
36
|
+
ApiCallAssertionSchema: () => ApiCallAssertionSchema,
|
|
37
|
+
ApiCallConfigSchema: () => ApiCallConfigSchema,
|
|
36
38
|
ApiCallSchema: () => ApiCallSchema,
|
|
37
39
|
AssertionConfigSchema: () => AssertionConfigSchema,
|
|
38
40
|
AssertionParameterSchema: () => AssertionParameterSchema,
|
|
@@ -790,7 +792,8 @@ var AssertionTypeSchema = import_zod22.z.enum([
|
|
|
790
792
|
"build_passed",
|
|
791
793
|
"time_limit",
|
|
792
794
|
"cost",
|
|
793
|
-
"llm_judge"
|
|
795
|
+
"llm_judge",
|
|
796
|
+
"api_call"
|
|
794
797
|
]);
|
|
795
798
|
var AssertionParameterTypeSchema = import_zod22.z.enum([
|
|
796
799
|
"string",
|
|
@@ -831,8 +834,8 @@ var CostConfigSchema = import_zod22.z.strictObject({
|
|
|
831
834
|
var ToolCalledWithParamConfigSchema = import_zod22.z.strictObject({
|
|
832
835
|
/** Name of the tool that must have been called */
|
|
833
836
|
toolName: import_zod22.z.string().min(1),
|
|
834
|
-
/** JSON string of key-value pairs for expected parameters (substring match) */
|
|
835
|
-
expectedParams: import_zod22.z.string().min(1),
|
|
837
|
+
/** JSON string of key-value pairs for expected parameters (substring match). Optional — when omitted, only checks tool presence. */
|
|
838
|
+
expectedParams: import_zod22.z.string().min(1).optional(),
|
|
836
839
|
/** If true, the matching tool call must also have succeeded (step.success === true) */
|
|
837
840
|
requireSuccess: import_zod22.z.boolean().optional()
|
|
838
841
|
});
|
|
@@ -869,23 +872,51 @@ var LlmJudgeConfigSchema = import_zod22.z.object({
|
|
|
869
872
|
/** User-defined parameters for this assertion */
|
|
870
873
|
parameters: import_zod22.z.array(AssertionParameterSchema).optional()
|
|
871
874
|
});
|
|
875
|
+
var ApiCallConfigSchema = import_zod22.z.strictObject({
|
|
876
|
+
/** URL to call */
|
|
877
|
+
url: import_zod22.z.string().min(1),
|
|
878
|
+
/** HTTP method (default GET) */
|
|
879
|
+
method: import_zod22.z.enum(["GET", "POST"]).optional(),
|
|
880
|
+
/** Request body (JSON string, for POST requests) */
|
|
881
|
+
requestBody: import_zod22.z.string().optional(),
|
|
882
|
+
/** Expected JSON response to validate against (subset match — extra fields in actual are OK) */
|
|
883
|
+
expectedResponse: import_zod22.z.string().min(1),
|
|
884
|
+
/** Request headers as JSON string of key-value pairs */
|
|
885
|
+
requestHeaders: import_zod22.z.string().optional(),
|
|
886
|
+
/** Request timeout in milliseconds (default 30000) */
|
|
887
|
+
timeoutMs: import_zod22.z.number().int().positive().optional()
|
|
888
|
+
});
|
|
889
|
+
var AssertionBaseFields = {
|
|
890
|
+
/** When true, the assertion's pass/fail logic is inverted (NOT operator). */
|
|
891
|
+
negate: import_zod22.z.boolean().optional()
|
|
892
|
+
};
|
|
872
893
|
var SkillWasCalledAssertionSchema = SkillWasCalledConfigSchema.extend({
|
|
873
|
-
type: import_zod22.z.literal("skill_was_called")
|
|
894
|
+
type: import_zod22.z.literal("skill_was_called"),
|
|
895
|
+
...AssertionBaseFields
|
|
874
896
|
});
|
|
875
897
|
var ToolCalledWithParamAssertionSchema = ToolCalledWithParamConfigSchema.extend({
|
|
876
|
-
type: import_zod22.z.literal("tool_called_with_param")
|
|
898
|
+
type: import_zod22.z.literal("tool_called_with_param"),
|
|
899
|
+
...AssertionBaseFields
|
|
877
900
|
});
|
|
878
901
|
var BuildPassedAssertionSchema = BuildPassedConfigSchema.extend({
|
|
879
|
-
type: import_zod22.z.literal("build_passed")
|
|
902
|
+
type: import_zod22.z.literal("build_passed"),
|
|
903
|
+
...AssertionBaseFields
|
|
880
904
|
});
|
|
881
905
|
var CostAssertionSchema = CostConfigSchema.extend({
|
|
882
|
-
type: import_zod22.z.literal("cost")
|
|
906
|
+
type: import_zod22.z.literal("cost"),
|
|
907
|
+
...AssertionBaseFields
|
|
883
908
|
});
|
|
884
909
|
var LlmJudgeAssertionSchema = LlmJudgeConfigSchema.extend({
|
|
885
|
-
type: import_zod22.z.literal("llm_judge")
|
|
910
|
+
type: import_zod22.z.literal("llm_judge"),
|
|
911
|
+
...AssertionBaseFields
|
|
912
|
+
});
|
|
913
|
+
var ApiCallAssertionSchema = ApiCallConfigSchema.extend({
|
|
914
|
+
type: import_zod22.z.literal("api_call"),
|
|
915
|
+
...AssertionBaseFields
|
|
886
916
|
});
|
|
887
917
|
var TimeAssertionSchema = TimeConfigSchema.extend({
|
|
888
|
-
type: import_zod22.z.literal("time_limit")
|
|
918
|
+
type: import_zod22.z.literal("time_limit"),
|
|
919
|
+
...AssertionBaseFields
|
|
889
920
|
});
|
|
890
921
|
var AssertionSchema = import_zod22.z.union([
|
|
891
922
|
SkillWasCalledAssertionSchema,
|
|
@@ -893,7 +924,8 @@ var AssertionSchema = import_zod22.z.union([
|
|
|
893
924
|
BuildPassedAssertionSchema,
|
|
894
925
|
TimeAssertionSchema,
|
|
895
926
|
CostAssertionSchema,
|
|
896
|
-
LlmJudgeAssertionSchema
|
|
927
|
+
LlmJudgeAssertionSchema,
|
|
928
|
+
ApiCallAssertionSchema
|
|
897
929
|
]);
|
|
898
930
|
var AssertionConfigSchema = import_zod22.z.union([
|
|
899
931
|
LlmJudgeConfigSchema,
|
|
@@ -901,7 +933,9 @@ var AssertionConfigSchema = import_zod22.z.union([
|
|
|
901
933
|
SkillWasCalledConfigSchema,
|
|
902
934
|
// requires skillNames
|
|
903
935
|
ToolCalledWithParamConfigSchema,
|
|
904
|
-
// requires toolName
|
|
936
|
+
// requires toolName, uses strictObject
|
|
937
|
+
ApiCallConfigSchema,
|
|
938
|
+
// requires url + expectedResponse, uses strictObject
|
|
905
939
|
TimeConfigSchema,
|
|
906
940
|
// requires maxDurationMs, uses strictObject
|
|
907
941
|
CostConfigSchema,
|
|
@@ -925,6 +959,8 @@ function validateAssertionConfig(type, config) {
|
|
|
925
959
|
return TimeConfigSchema.safeParse(config).success;
|
|
926
960
|
case "llm_judge":
|
|
927
961
|
return LlmJudgeConfigSchema.safeParse(config).success;
|
|
962
|
+
case "api_call":
|
|
963
|
+
return ApiCallConfigSchema.safeParse(config).success;
|
|
928
964
|
default:
|
|
929
965
|
return false;
|
|
930
966
|
}
|
|
@@ -1596,7 +1632,8 @@ var SYSTEM_ASSERTION_IDS = {
|
|
|
1596
1632
|
BUILD_PASSED: "system:build_passed",
|
|
1597
1633
|
TIME_LIMIT: "system:time_limit",
|
|
1598
1634
|
COST: "system:cost",
|
|
1599
|
-
LLM_JUDGE: "system:llm_judge"
|
|
1635
|
+
LLM_JUDGE: "system:llm_judge",
|
|
1636
|
+
API_CALL: "system:api_call"
|
|
1600
1637
|
};
|
|
1601
1638
|
function isSystemAssertionId(id) {
|
|
1602
1639
|
return id.startsWith("system:");
|
|
@@ -1613,6 +1650,13 @@ var SYSTEM_ASSERTIONS = {
|
|
|
1613
1650
|
label: "Skills",
|
|
1614
1651
|
type: "string",
|
|
1615
1652
|
required: true
|
|
1653
|
+
},
|
|
1654
|
+
{
|
|
1655
|
+
name: "negate",
|
|
1656
|
+
label: "Negate (NOT operator)",
|
|
1657
|
+
type: "boolean",
|
|
1658
|
+
required: false,
|
|
1659
|
+
defaultValue: false
|
|
1616
1660
|
}
|
|
1617
1661
|
]
|
|
1618
1662
|
},
|
|
@@ -1632,7 +1676,7 @@ var SYSTEM_ASSERTIONS = {
|
|
|
1632
1676
|
name: "expectedParams",
|
|
1633
1677
|
label: "Expected Parameters (JSON, substring match)",
|
|
1634
1678
|
type: "string",
|
|
1635
|
-
required:
|
|
1679
|
+
required: false
|
|
1636
1680
|
},
|
|
1637
1681
|
{
|
|
1638
1682
|
name: "requireSuccess",
|
|
@@ -1641,6 +1685,13 @@ var SYSTEM_ASSERTIONS = {
|
|
|
1641
1685
|
required: false,
|
|
1642
1686
|
defaultValue: false,
|
|
1643
1687
|
advanced: true
|
|
1688
|
+
},
|
|
1689
|
+
{
|
|
1690
|
+
name: "negate",
|
|
1691
|
+
label: "Negate (NOT operator)",
|
|
1692
|
+
type: "boolean",
|
|
1693
|
+
required: false,
|
|
1694
|
+
defaultValue: false
|
|
1644
1695
|
}
|
|
1645
1696
|
]
|
|
1646
1697
|
},
|
|
@@ -1737,6 +1788,54 @@ var SYSTEM_ASSERTIONS = {
|
|
|
1737
1788
|
required: false
|
|
1738
1789
|
}
|
|
1739
1790
|
]
|
|
1791
|
+
},
|
|
1792
|
+
[SYSTEM_ASSERTION_IDS.API_CALL]: {
|
|
1793
|
+
id: SYSTEM_ASSERTION_IDS.API_CALL,
|
|
1794
|
+
name: "API Call",
|
|
1795
|
+
description: "Call an API endpoint and verify the response contains expected data",
|
|
1796
|
+
type: "api_call",
|
|
1797
|
+
parameters: [
|
|
1798
|
+
{
|
|
1799
|
+
name: "url",
|
|
1800
|
+
label: "URL",
|
|
1801
|
+
type: "string",
|
|
1802
|
+
required: true
|
|
1803
|
+
},
|
|
1804
|
+
{
|
|
1805
|
+
name: "method",
|
|
1806
|
+
label: "HTTP Method",
|
|
1807
|
+
type: "string",
|
|
1808
|
+
required: false,
|
|
1809
|
+
defaultValue: "GET"
|
|
1810
|
+
},
|
|
1811
|
+
{
|
|
1812
|
+
name: "requestBody",
|
|
1813
|
+
label: "Request Body (JSON)",
|
|
1814
|
+
type: "string",
|
|
1815
|
+
required: false
|
|
1816
|
+
},
|
|
1817
|
+
{
|
|
1818
|
+
name: "expectedResponse",
|
|
1819
|
+
label: "Expected Response (JSON)",
|
|
1820
|
+
type: "string",
|
|
1821
|
+
required: true
|
|
1822
|
+
},
|
|
1823
|
+
{
|
|
1824
|
+
name: "requestHeaders",
|
|
1825
|
+
label: "Headers (JSON)",
|
|
1826
|
+
type: "string",
|
|
1827
|
+
required: false,
|
|
1828
|
+
advanced: true
|
|
1829
|
+
},
|
|
1830
|
+
{
|
|
1831
|
+
name: "timeoutMs",
|
|
1832
|
+
label: "Timeout (ms)",
|
|
1833
|
+
type: "number",
|
|
1834
|
+
required: false,
|
|
1835
|
+
defaultValue: 3e4,
|
|
1836
|
+
advanced: true
|
|
1837
|
+
}
|
|
1838
|
+
]
|
|
1740
1839
|
}
|
|
1741
1840
|
};
|
|
1742
1841
|
function getSystemAssertions() {
|
|
@@ -1760,6 +1859,8 @@ function getSystemAssertion(id) {
|
|
|
1760
1859
|
AgentTypeSchema,
|
|
1761
1860
|
AllowedCommands,
|
|
1762
1861
|
AnyModelSchema,
|
|
1862
|
+
ApiCallAssertionSchema,
|
|
1863
|
+
ApiCallConfigSchema,
|
|
1763
1864
|
ApiCallSchema,
|
|
1764
1865
|
AssertionConfigSchema,
|
|
1765
1866
|
AssertionParameterSchema,
|