@wix/evalforge-types 0.59.0 → 0.61.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/index.js +35 -51
- package/build/index.js.map +2 -2
- package/build/index.mjs +35 -45
- package/build/index.mjs.map +3 -3
- package/build/types/assertion/assertion.d.ts +22 -193
- package/build/types/assertion/index.d.ts +1 -1
- package/build/types/assertion/system-assertions.d.ts +1 -1
- package/build/types/scenario/test-scenario.d.ts +21 -3
- package/package.json +2 -2
package/build/index.js
CHANGED
|
@@ -59,7 +59,6 @@ __export(index_exports, {
|
|
|
59
59
|
CostAssertionSchema: () => CostAssertionSchema,
|
|
60
60
|
CostConfigSchema: () => CostConfigSchema,
|
|
61
61
|
CreateAgentInputSchema: () => CreateAgentInputSchema,
|
|
62
|
-
CreateCustomAssertionInputSchema: () => CreateCustomAssertionInputSchema,
|
|
63
62
|
CreateEvalRunFolderInputSchema: () => CreateEvalRunFolderInputSchema,
|
|
64
63
|
CreateEvalRunInputSchema: () => CreateEvalRunInputSchema,
|
|
65
64
|
CreateEvalScheduleInputSchema: () => CreateEvalScheduleInputSchema,
|
|
@@ -73,7 +72,6 @@ __export(index_exports, {
|
|
|
73
72
|
CreateTemplateInputSchema: () => CreateTemplateInputSchema,
|
|
74
73
|
CreateTestScenarioInputSchema: () => CreateTestScenarioInputSchema,
|
|
75
74
|
CreateTestSuiteInputSchema: () => CreateTestSuiteInputSchema,
|
|
76
|
-
CustomAssertionSchema: () => CustomAssertionSchema,
|
|
77
75
|
DEFAULT_EVALUATOR_SYSTEM_PROMPT: () => DEFAULT_EVALUATOR_SYSTEM_PROMPT,
|
|
78
76
|
DEFAULT_JUDGE_MODEL: () => DEFAULT_JUDGE_MODEL,
|
|
79
77
|
DiffContentSchema: () => DiffContentSchema,
|
|
@@ -175,7 +173,6 @@ __export(index_exports, {
|
|
|
175
173
|
TriggerSchema: () => TriggerSchema,
|
|
176
174
|
TriggerType: () => TriggerType,
|
|
177
175
|
UpdateAgentInputSchema: () => UpdateAgentInputSchema,
|
|
178
|
-
UpdateCustomAssertionInputSchema: () => UpdateCustomAssertionInputSchema,
|
|
179
176
|
UpdateEvalRunFolderInputSchema: () => UpdateEvalRunFolderInputSchema,
|
|
180
177
|
UpdateEvalScheduleInputSchema: () => UpdateEvalScheduleInputSchema,
|
|
181
178
|
UpdateMcpInputSchema: () => UpdateMcpInputSchema,
|
|
@@ -189,9 +186,6 @@ __export(index_exports, {
|
|
|
189
186
|
UpdateTestSuiteInputSchema: () => UpdateTestSuiteInputSchema,
|
|
190
187
|
VitestTestSchema: () => VitestTestSchema,
|
|
191
188
|
formatTraceEventLine: () => formatTraceEventLine,
|
|
192
|
-
getBuildPassedConfig: () => getBuildPassedConfig,
|
|
193
|
-
getLlmJudgeConfig: () => getLlmJudgeConfig,
|
|
194
|
-
getSkillWasCalledConfig: () => getSkillWasCalledConfig,
|
|
195
189
|
getSystemAssertion: () => getSystemAssertion,
|
|
196
190
|
getSystemAssertions: () => getSystemAssertions,
|
|
197
191
|
isSystemAssertionId: () => isSystemAssertionId,
|
|
@@ -818,7 +812,7 @@ var AssertionParameterSchema = import_zod22.z.object({
|
|
|
818
812
|
advanced: import_zod22.z.boolean().optional()
|
|
819
813
|
});
|
|
820
814
|
var ScenarioAssertionLinkSchema = import_zod22.z.object({
|
|
821
|
-
/** ID of the
|
|
815
|
+
/** ID of the system assertion (e.g., 'system:skill_was_called') */
|
|
822
816
|
assertionId: import_zod22.z.string(),
|
|
823
817
|
/** Parameter values for this assertion in this scenario */
|
|
824
818
|
params: import_zod22.z.record(
|
|
@@ -837,8 +831,8 @@ var CostConfigSchema = import_zod22.z.strictObject({
|
|
|
837
831
|
var ToolCalledWithParamConfigSchema = import_zod22.z.strictObject({
|
|
838
832
|
/** Name of the tool that must have been called */
|
|
839
833
|
toolName: import_zod22.z.string().min(1),
|
|
840
|
-
/** JSON string of key-value pairs for expected parameters (substring match) */
|
|
841
|
-
expectedParams: import_zod22.z.string().min(1),
|
|
834
|
+
/** JSON string of key-value pairs for expected parameters (substring match). Optional — when omitted, only checks tool presence. */
|
|
835
|
+
expectedParams: import_zod22.z.string().min(1).optional(),
|
|
842
836
|
/** If true, the matching tool call must also have succeeded (step.success === true) */
|
|
843
837
|
requireSuccess: import_zod22.z.boolean().optional()
|
|
844
838
|
});
|
|
@@ -875,23 +869,33 @@ var LlmJudgeConfigSchema = import_zod22.z.object({
|
|
|
875
869
|
/** User-defined parameters for this assertion */
|
|
876
870
|
parameters: import_zod22.z.array(AssertionParameterSchema).optional()
|
|
877
871
|
});
|
|
872
|
+
var AssertionBaseFields = {
|
|
873
|
+
/** When true, the assertion's pass/fail logic is inverted (NOT operator). */
|
|
874
|
+
negate: import_zod22.z.boolean().optional()
|
|
875
|
+
};
|
|
878
876
|
var SkillWasCalledAssertionSchema = SkillWasCalledConfigSchema.extend({
|
|
879
|
-
type: import_zod22.z.literal("skill_was_called")
|
|
877
|
+
type: import_zod22.z.literal("skill_was_called"),
|
|
878
|
+
...AssertionBaseFields
|
|
880
879
|
});
|
|
881
880
|
var ToolCalledWithParamAssertionSchema = ToolCalledWithParamConfigSchema.extend({
|
|
882
|
-
type: import_zod22.z.literal("tool_called_with_param")
|
|
881
|
+
type: import_zod22.z.literal("tool_called_with_param"),
|
|
882
|
+
...AssertionBaseFields
|
|
883
883
|
});
|
|
884
884
|
var BuildPassedAssertionSchema = BuildPassedConfigSchema.extend({
|
|
885
|
-
type: import_zod22.z.literal("build_passed")
|
|
885
|
+
type: import_zod22.z.literal("build_passed"),
|
|
886
|
+
...AssertionBaseFields
|
|
886
887
|
});
|
|
887
888
|
var CostAssertionSchema = CostConfigSchema.extend({
|
|
888
|
-
type: import_zod22.z.literal("cost")
|
|
889
|
+
type: import_zod22.z.literal("cost"),
|
|
890
|
+
...AssertionBaseFields
|
|
889
891
|
});
|
|
890
892
|
var LlmJudgeAssertionSchema = LlmJudgeConfigSchema.extend({
|
|
891
|
-
type: import_zod22.z.literal("llm_judge")
|
|
893
|
+
type: import_zod22.z.literal("llm_judge"),
|
|
894
|
+
...AssertionBaseFields
|
|
892
895
|
});
|
|
893
896
|
var TimeAssertionSchema = TimeConfigSchema.extend({
|
|
894
|
-
type: import_zod22.z.literal("time_limit")
|
|
897
|
+
type: import_zod22.z.literal("time_limit"),
|
|
898
|
+
...AssertionBaseFields
|
|
895
899
|
});
|
|
896
900
|
var AssertionSchema = import_zod22.z.union([
|
|
897
901
|
SkillWasCalledAssertionSchema,
|
|
@@ -907,7 +911,7 @@ var AssertionConfigSchema = import_zod22.z.union([
|
|
|
907
911
|
SkillWasCalledConfigSchema,
|
|
908
912
|
// requires skillNames
|
|
909
913
|
ToolCalledWithParamConfigSchema,
|
|
910
|
-
// requires toolName
|
|
914
|
+
// requires toolName, uses strictObject
|
|
911
915
|
TimeConfigSchema,
|
|
912
916
|
// requires maxDurationMs, uses strictObject
|
|
913
917
|
CostConfigSchema,
|
|
@@ -917,19 +921,6 @@ var AssertionConfigSchema = import_zod22.z.union([
|
|
|
917
921
|
import_zod22.z.object({})
|
|
918
922
|
// fallback empty config
|
|
919
923
|
]);
|
|
920
|
-
var CustomAssertionSchema = TenantEntitySchema.extend({
|
|
921
|
-
/** The assertion type */
|
|
922
|
-
type: AssertionTypeSchema,
|
|
923
|
-
/** Type-specific configuration */
|
|
924
|
-
config: AssertionConfigSchema
|
|
925
|
-
});
|
|
926
|
-
var CreateCustomAssertionInputSchema = CustomAssertionSchema.omit({
|
|
927
|
-
id: true,
|
|
928
|
-
createdAt: true,
|
|
929
|
-
updatedAt: true,
|
|
930
|
-
deleted: true
|
|
931
|
-
});
|
|
932
|
-
var UpdateCustomAssertionInputSchema = CreateCustomAssertionInputSchema.partial();
|
|
933
924
|
function validateAssertionConfig(type, config) {
|
|
934
925
|
switch (type) {
|
|
935
926
|
case "skill_was_called":
|
|
@@ -948,21 +939,6 @@ function validateAssertionConfig(type, config) {
|
|
|
948
939
|
return false;
|
|
949
940
|
}
|
|
950
941
|
}
|
|
951
|
-
function getSkillWasCalledConfig(assertion) {
|
|
952
|
-
if (assertion.type !== "skill_was_called") return null;
|
|
953
|
-
const result = SkillWasCalledConfigSchema.safeParse(assertion.config);
|
|
954
|
-
return result.success ? result.data : null;
|
|
955
|
-
}
|
|
956
|
-
function getBuildPassedConfig(assertion) {
|
|
957
|
-
if (assertion.type !== "build_passed") return null;
|
|
958
|
-
const result = BuildPassedConfigSchema.safeParse(assertion.config);
|
|
959
|
-
return result.success ? result.data : null;
|
|
960
|
-
}
|
|
961
|
-
function getLlmJudgeConfig(assertion) {
|
|
962
|
-
if (assertion.type !== "llm_judge") return null;
|
|
963
|
-
const result = LlmJudgeConfigSchema.safeParse(assertion.config);
|
|
964
|
-
return result.success ? result.data : null;
|
|
965
|
-
}
|
|
966
942
|
|
|
967
943
|
// src/scenario/test-scenario.ts
|
|
968
944
|
var ExpectedFileSchema = import_zod23.z.object({
|
|
@@ -1647,6 +1623,13 @@ var SYSTEM_ASSERTIONS = {
|
|
|
1647
1623
|
label: "Skills",
|
|
1648
1624
|
type: "string",
|
|
1649
1625
|
required: true
|
|
1626
|
+
},
|
|
1627
|
+
{
|
|
1628
|
+
name: "negate",
|
|
1629
|
+
label: "Negate (NOT operator)",
|
|
1630
|
+
type: "boolean",
|
|
1631
|
+
required: false,
|
|
1632
|
+
defaultValue: false
|
|
1650
1633
|
}
|
|
1651
1634
|
]
|
|
1652
1635
|
},
|
|
@@ -1666,7 +1649,7 @@ var SYSTEM_ASSERTIONS = {
|
|
|
1666
1649
|
name: "expectedParams",
|
|
1667
1650
|
label: "Expected Parameters (JSON, substring match)",
|
|
1668
1651
|
type: "string",
|
|
1669
|
-
required:
|
|
1652
|
+
required: false
|
|
1670
1653
|
},
|
|
1671
1654
|
{
|
|
1672
1655
|
name: "requireSuccess",
|
|
@@ -1675,6 +1658,13 @@ var SYSTEM_ASSERTIONS = {
|
|
|
1675
1658
|
required: false,
|
|
1676
1659
|
defaultValue: false,
|
|
1677
1660
|
advanced: true
|
|
1661
|
+
},
|
|
1662
|
+
{
|
|
1663
|
+
name: "negate",
|
|
1664
|
+
label: "Negate (NOT operator)",
|
|
1665
|
+
type: "boolean",
|
|
1666
|
+
required: false,
|
|
1667
|
+
defaultValue: false
|
|
1678
1668
|
}
|
|
1679
1669
|
]
|
|
1680
1670
|
},
|
|
@@ -1820,7 +1810,6 @@ function getSystemAssertion(id) {
|
|
|
1820
1810
|
CostAssertionSchema,
|
|
1821
1811
|
CostConfigSchema,
|
|
1822
1812
|
CreateAgentInputSchema,
|
|
1823
|
-
CreateCustomAssertionInputSchema,
|
|
1824
1813
|
CreateEvalRunFolderInputSchema,
|
|
1825
1814
|
CreateEvalRunInputSchema,
|
|
1826
1815
|
CreateEvalScheduleInputSchema,
|
|
@@ -1834,7 +1823,6 @@ function getSystemAssertion(id) {
|
|
|
1834
1823
|
CreateTemplateInputSchema,
|
|
1835
1824
|
CreateTestScenarioInputSchema,
|
|
1836
1825
|
CreateTestSuiteInputSchema,
|
|
1837
|
-
CustomAssertionSchema,
|
|
1838
1826
|
DEFAULT_EVALUATOR_SYSTEM_PROMPT,
|
|
1839
1827
|
DEFAULT_JUDGE_MODEL,
|
|
1840
1828
|
DiffContentSchema,
|
|
@@ -1936,7 +1924,6 @@ function getSystemAssertion(id) {
|
|
|
1936
1924
|
TriggerSchema,
|
|
1937
1925
|
TriggerType,
|
|
1938
1926
|
UpdateAgentInputSchema,
|
|
1939
|
-
UpdateCustomAssertionInputSchema,
|
|
1940
1927
|
UpdateEvalRunFolderInputSchema,
|
|
1941
1928
|
UpdateEvalScheduleInputSchema,
|
|
1942
1929
|
UpdateMcpInputSchema,
|
|
@@ -1950,9 +1937,6 @@ function getSystemAssertion(id) {
|
|
|
1950
1937
|
UpdateTestSuiteInputSchema,
|
|
1951
1938
|
VitestTestSchema,
|
|
1952
1939
|
formatTraceEventLine,
|
|
1953
|
-
getBuildPassedConfig,
|
|
1954
|
-
getLlmJudgeConfig,
|
|
1955
|
-
getSkillWasCalledConfig,
|
|
1956
1940
|
getSystemAssertion,
|
|
1957
1941
|
getSystemAssertions,
|
|
1958
1942
|
isSystemAssertionId,
|