@wix/evalforge-types 0.59.0 → 0.61.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/index.js +35 -51
- package/build/index.js.map +2 -2
- package/build/index.mjs +35 -45
- package/build/index.mjs.map +3 -3
- package/build/types/assertion/assertion.d.ts +22 -193
- package/build/types/assertion/index.d.ts +1 -1
- package/build/types/assertion/system-assertions.d.ts +1 -1
- package/build/types/scenario/test-scenario.d.ts +21 -3
- package/package.json +2 -2
package/build/index.mjs
CHANGED
|
@@ -614,7 +614,7 @@ var AssertionParameterSchema = z22.object({
|
|
|
614
614
|
advanced: z22.boolean().optional()
|
|
615
615
|
});
|
|
616
616
|
var ScenarioAssertionLinkSchema = z22.object({
|
|
617
|
-
/** ID of the
|
|
617
|
+
/** ID of the system assertion (e.g., 'system:skill_was_called') */
|
|
618
618
|
assertionId: z22.string(),
|
|
619
619
|
/** Parameter values for this assertion in this scenario */
|
|
620
620
|
params: z22.record(
|
|
@@ -633,8 +633,8 @@ var CostConfigSchema = z22.strictObject({
|
|
|
633
633
|
var ToolCalledWithParamConfigSchema = z22.strictObject({
|
|
634
634
|
/** Name of the tool that must have been called */
|
|
635
635
|
toolName: z22.string().min(1),
|
|
636
|
-
/** JSON string of key-value pairs for expected parameters (substring match) */
|
|
637
|
-
expectedParams: z22.string().min(1),
|
|
636
|
+
/** JSON string of key-value pairs for expected parameters (substring match). Optional — when omitted, only checks tool presence. */
|
|
637
|
+
expectedParams: z22.string().min(1).optional(),
|
|
638
638
|
/** If true, the matching tool call must also have succeeded (step.success === true) */
|
|
639
639
|
requireSuccess: z22.boolean().optional()
|
|
640
640
|
});
|
|
@@ -671,23 +671,33 @@ var LlmJudgeConfigSchema = z22.object({
|
|
|
671
671
|
/** User-defined parameters for this assertion */
|
|
672
672
|
parameters: z22.array(AssertionParameterSchema).optional()
|
|
673
673
|
});
|
|
674
|
+
var AssertionBaseFields = {
|
|
675
|
+
/** When true, the assertion's pass/fail logic is inverted (NOT operator). */
|
|
676
|
+
negate: z22.boolean().optional()
|
|
677
|
+
};
|
|
674
678
|
var SkillWasCalledAssertionSchema = SkillWasCalledConfigSchema.extend({
|
|
675
|
-
type: z22.literal("skill_was_called")
|
|
679
|
+
type: z22.literal("skill_was_called"),
|
|
680
|
+
...AssertionBaseFields
|
|
676
681
|
});
|
|
677
682
|
var ToolCalledWithParamAssertionSchema = ToolCalledWithParamConfigSchema.extend({
|
|
678
|
-
type: z22.literal("tool_called_with_param")
|
|
683
|
+
type: z22.literal("tool_called_with_param"),
|
|
684
|
+
...AssertionBaseFields
|
|
679
685
|
});
|
|
680
686
|
var BuildPassedAssertionSchema = BuildPassedConfigSchema.extend({
|
|
681
|
-
type: z22.literal("build_passed")
|
|
687
|
+
type: z22.literal("build_passed"),
|
|
688
|
+
...AssertionBaseFields
|
|
682
689
|
});
|
|
683
690
|
var CostAssertionSchema = CostConfigSchema.extend({
|
|
684
|
-
type: z22.literal("cost")
|
|
691
|
+
type: z22.literal("cost"),
|
|
692
|
+
...AssertionBaseFields
|
|
685
693
|
});
|
|
686
694
|
var LlmJudgeAssertionSchema = LlmJudgeConfigSchema.extend({
|
|
687
|
-
type: z22.literal("llm_judge")
|
|
695
|
+
type: z22.literal("llm_judge"),
|
|
696
|
+
...AssertionBaseFields
|
|
688
697
|
});
|
|
689
698
|
var TimeAssertionSchema = TimeConfigSchema.extend({
|
|
690
|
-
type: z22.literal("time_limit")
|
|
699
|
+
type: z22.literal("time_limit"),
|
|
700
|
+
...AssertionBaseFields
|
|
691
701
|
});
|
|
692
702
|
var AssertionSchema = z22.union([
|
|
693
703
|
SkillWasCalledAssertionSchema,
|
|
@@ -703,7 +713,7 @@ var AssertionConfigSchema = z22.union([
|
|
|
703
713
|
SkillWasCalledConfigSchema,
|
|
704
714
|
// requires skillNames
|
|
705
715
|
ToolCalledWithParamConfigSchema,
|
|
706
|
-
// requires toolName
|
|
716
|
+
// requires toolName, uses strictObject
|
|
707
717
|
TimeConfigSchema,
|
|
708
718
|
// requires maxDurationMs, uses strictObject
|
|
709
719
|
CostConfigSchema,
|
|
@@ -713,19 +723,6 @@ var AssertionConfigSchema = z22.union([
|
|
|
713
723
|
z22.object({})
|
|
714
724
|
// fallback empty config
|
|
715
725
|
]);
|
|
716
|
-
var CustomAssertionSchema = TenantEntitySchema.extend({
|
|
717
|
-
/** The assertion type */
|
|
718
|
-
type: AssertionTypeSchema,
|
|
719
|
-
/** Type-specific configuration */
|
|
720
|
-
config: AssertionConfigSchema
|
|
721
|
-
});
|
|
722
|
-
var CreateCustomAssertionInputSchema = CustomAssertionSchema.omit({
|
|
723
|
-
id: true,
|
|
724
|
-
createdAt: true,
|
|
725
|
-
updatedAt: true,
|
|
726
|
-
deleted: true
|
|
727
|
-
});
|
|
728
|
-
var UpdateCustomAssertionInputSchema = CreateCustomAssertionInputSchema.partial();
|
|
729
726
|
function validateAssertionConfig(type, config) {
|
|
730
727
|
switch (type) {
|
|
731
728
|
case "skill_was_called":
|
|
@@ -744,21 +741,6 @@ function validateAssertionConfig(type, config) {
|
|
|
744
741
|
return false;
|
|
745
742
|
}
|
|
746
743
|
}
|
|
747
|
-
function getSkillWasCalledConfig(assertion) {
|
|
748
|
-
if (assertion.type !== "skill_was_called") return null;
|
|
749
|
-
const result = SkillWasCalledConfigSchema.safeParse(assertion.config);
|
|
750
|
-
return result.success ? result.data : null;
|
|
751
|
-
}
|
|
752
|
-
function getBuildPassedConfig(assertion) {
|
|
753
|
-
if (assertion.type !== "build_passed") return null;
|
|
754
|
-
const result = BuildPassedConfigSchema.safeParse(assertion.config);
|
|
755
|
-
return result.success ? result.data : null;
|
|
756
|
-
}
|
|
757
|
-
function getLlmJudgeConfig(assertion) {
|
|
758
|
-
if (assertion.type !== "llm_judge") return null;
|
|
759
|
-
const result = LlmJudgeConfigSchema.safeParse(assertion.config);
|
|
760
|
-
return result.success ? result.data : null;
|
|
761
|
-
}
|
|
762
744
|
|
|
763
745
|
// src/scenario/test-scenario.ts
|
|
764
746
|
var ExpectedFileSchema = z23.object({
|
|
@@ -1443,6 +1425,13 @@ var SYSTEM_ASSERTIONS = {
|
|
|
1443
1425
|
label: "Skills",
|
|
1444
1426
|
type: "string",
|
|
1445
1427
|
required: true
|
|
1428
|
+
},
|
|
1429
|
+
{
|
|
1430
|
+
name: "negate",
|
|
1431
|
+
label: "Negate (NOT operator)",
|
|
1432
|
+
type: "boolean",
|
|
1433
|
+
required: false,
|
|
1434
|
+
defaultValue: false
|
|
1446
1435
|
}
|
|
1447
1436
|
]
|
|
1448
1437
|
},
|
|
@@ -1462,7 +1451,7 @@ var SYSTEM_ASSERTIONS = {
|
|
|
1462
1451
|
name: "expectedParams",
|
|
1463
1452
|
label: "Expected Parameters (JSON, substring match)",
|
|
1464
1453
|
type: "string",
|
|
1465
|
-
required:
|
|
1454
|
+
required: false
|
|
1466
1455
|
},
|
|
1467
1456
|
{
|
|
1468
1457
|
name: "requireSuccess",
|
|
@@ -1471,6 +1460,13 @@ var SYSTEM_ASSERTIONS = {
|
|
|
1471
1460
|
required: false,
|
|
1472
1461
|
defaultValue: false,
|
|
1473
1462
|
advanced: true
|
|
1463
|
+
},
|
|
1464
|
+
{
|
|
1465
|
+
name: "negate",
|
|
1466
|
+
label: "Negate (NOT operator)",
|
|
1467
|
+
type: "boolean",
|
|
1468
|
+
required: false,
|
|
1469
|
+
defaultValue: false
|
|
1474
1470
|
}
|
|
1475
1471
|
]
|
|
1476
1472
|
},
|
|
@@ -1615,7 +1611,6 @@ export {
|
|
|
1615
1611
|
CostAssertionSchema,
|
|
1616
1612
|
CostConfigSchema,
|
|
1617
1613
|
CreateAgentInputSchema,
|
|
1618
|
-
CreateCustomAssertionInputSchema,
|
|
1619
1614
|
CreateEvalRunFolderInputSchema,
|
|
1620
1615
|
CreateEvalRunInputSchema,
|
|
1621
1616
|
CreateEvalScheduleInputSchema,
|
|
@@ -1629,7 +1624,6 @@ export {
|
|
|
1629
1624
|
CreateTemplateInputSchema,
|
|
1630
1625
|
CreateTestScenarioInputSchema,
|
|
1631
1626
|
CreateTestSuiteInputSchema,
|
|
1632
|
-
CustomAssertionSchema,
|
|
1633
1627
|
DEFAULT_EVALUATOR_SYSTEM_PROMPT,
|
|
1634
1628
|
DEFAULT_JUDGE_MODEL,
|
|
1635
1629
|
DiffContentSchema,
|
|
@@ -1731,7 +1725,6 @@ export {
|
|
|
1731
1725
|
TriggerSchema,
|
|
1732
1726
|
TriggerType,
|
|
1733
1727
|
UpdateAgentInputSchema,
|
|
1734
|
-
UpdateCustomAssertionInputSchema,
|
|
1735
1728
|
UpdateEvalRunFolderInputSchema,
|
|
1736
1729
|
UpdateEvalScheduleInputSchema,
|
|
1737
1730
|
UpdateMcpInputSchema,
|
|
@@ -1745,9 +1738,6 @@ export {
|
|
|
1745
1738
|
UpdateTestSuiteInputSchema,
|
|
1746
1739
|
VitestTestSchema,
|
|
1747
1740
|
formatTraceEventLine,
|
|
1748
|
-
getBuildPassedConfig,
|
|
1749
|
-
getLlmJudgeConfig,
|
|
1750
|
-
getSkillWasCalledConfig,
|
|
1751
1741
|
getSystemAssertion,
|
|
1752
1742
|
getSystemAssertions,
|
|
1753
1743
|
isSystemAssertionId,
|