@wix/evalforge-types 0.38.0 → 0.39.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/index.js +57 -0
- package/build/index.js.map +3 -3
- package/build/index.mjs +54 -0
- package/build/index.mjs.map +3 -3
- package/build/types/assertion/assertion.d.ts +34 -0
- package/build/types/assertion/system-assertions.d.ts +1 -0
- package/build/types/common/index.d.ts +1 -0
- package/build/types/common/tool-names.d.ts +1 -0
- package/build/types/scenario/assertions.d.ts +16 -0
- package/build/types/scenario/test-scenario.d.ts +12 -0
- package/package.json +2 -2
package/build/index.js
CHANGED
|
@@ -916,6 +916,7 @@ var index_exports = {};
|
|
|
916
916
|
__export(index_exports, {
|
|
917
917
|
AVAILABLE_MODEL_IDS: () => AVAILABLE_MODEL_IDS,
|
|
918
918
|
AVAILABLE_RUN_COMMANDS: () => AVAILABLE_RUN_COMMANDS,
|
|
919
|
+
AVAILABLE_TOOL_NAMES: () => AVAILABLE_TOOL_NAMES,
|
|
919
920
|
AgentRunCommand: () => AgentRunCommand,
|
|
920
921
|
AgentRunCommandSchema: () => AgentRunCommandSchema,
|
|
921
922
|
AgentSchema: () => AgentSchema,
|
|
@@ -1033,6 +1034,8 @@ __export(index_exports, {
|
|
|
1033
1034
|
TimeAssertionSchema: () => TimeAssertionSchema,
|
|
1034
1035
|
TimeConfigSchema: () => TimeConfigSchema,
|
|
1035
1036
|
TokenUsageSchema: () => TokenUsageSchema,
|
|
1037
|
+
ToolCalledWithParamAssertionSchema: () => ToolCalledWithParamAssertionSchema,
|
|
1038
|
+
ToolCalledWithParamConfigSchema: () => ToolCalledWithParamConfigSchema,
|
|
1036
1039
|
ToolTestSchema: () => ToolTestSchema,
|
|
1037
1040
|
TriggerMetadataSchema: () => TriggerMetadataSchema,
|
|
1038
1041
|
TriggerSchema: () => TriggerSchema,
|
|
@@ -1148,6 +1151,17 @@ var RuleInputBaseSchema = RuleSchema.omit({
|
|
|
1148
1151
|
var CreateRuleInputSchema = RuleInputBaseSchema;
|
|
1149
1152
|
var UpdateRuleInputSchema = RuleInputBaseSchema.partial();
|
|
1150
1153
|
|
|
1154
|
+
// src/common/tool-names.ts
|
|
1155
|
+
var AVAILABLE_TOOL_NAMES = [
|
|
1156
|
+
"Bash",
|
|
1157
|
+
"Edit",
|
|
1158
|
+
"Glob",
|
|
1159
|
+
"Grep",
|
|
1160
|
+
"Read",
|
|
1161
|
+
"Skill",
|
|
1162
|
+
"Write"
|
|
1163
|
+
];
|
|
1164
|
+
|
|
1151
1165
|
// src/target/target.ts
|
|
1152
1166
|
var TargetSchema = TenantEntitySchema.extend({
|
|
1153
1167
|
// Base for all testable entities
|
|
@@ -1486,6 +1500,13 @@ var SkillWasCalledAssertionSchema = import_zod21.z.object({
|
|
|
1486
1500
|
/** Names of the skills that must have been called (matched against trace Skill tool args) */
|
|
1487
1501
|
skillNames: import_zod21.z.array(import_zod21.z.string().min(1)).min(1)
|
|
1488
1502
|
});
|
|
1503
|
+
var ToolCalledWithParamAssertionSchema = import_zod21.z.object({
|
|
1504
|
+
type: import_zod21.z.literal("tool_called_with_param"),
|
|
1505
|
+
/** Name of the tool that must have been called */
|
|
1506
|
+
toolName: import_zod21.z.string().min(1),
|
|
1507
|
+
/** JSON string of key-value pairs for expected parameters (substring match) */
|
|
1508
|
+
expectedParams: import_zod21.z.string().min(1)
|
|
1509
|
+
});
|
|
1489
1510
|
var BuildPassedAssertionSchema = import_zod21.z.object({
|
|
1490
1511
|
type: import_zod21.z.literal("build_passed"),
|
|
1491
1512
|
/** Command to run (default: "yarn build") */
|
|
@@ -1518,6 +1539,7 @@ var TimeAssertionSchema = import_zod21.z.object({
|
|
|
1518
1539
|
});
|
|
1519
1540
|
var AssertionSchema = import_zod21.z.union([
|
|
1520
1541
|
SkillWasCalledAssertionSchema,
|
|
1542
|
+
ToolCalledWithParamAssertionSchema,
|
|
1521
1543
|
BuildPassedAssertionSchema,
|
|
1522
1544
|
TimeAssertionSchema,
|
|
1523
1545
|
CostAssertionSchema,
|
|
@@ -1565,6 +1587,7 @@ var import_zod24 = require("zod");
|
|
|
1565
1587
|
var import_zod23 = require("zod");
|
|
1566
1588
|
var AssertionTypeSchema = import_zod23.z.enum([
|
|
1567
1589
|
"skill_was_called",
|
|
1590
|
+
"tool_called_with_param",
|
|
1568
1591
|
"build_passed",
|
|
1569
1592
|
"time_limit",
|
|
1570
1593
|
"cost",
|
|
@@ -1606,6 +1629,12 @@ var CostConfigSchema = import_zod23.z.strictObject({
|
|
|
1606
1629
|
/** Maximum allowed cost in USD */
|
|
1607
1630
|
maxCostUsd: import_zod23.z.number().positive()
|
|
1608
1631
|
});
|
|
1632
|
+
var ToolCalledWithParamConfigSchema = import_zod23.z.strictObject({
|
|
1633
|
+
/** Name of the tool that must have been called */
|
|
1634
|
+
toolName: import_zod23.z.string().min(1),
|
|
1635
|
+
/** JSON string of key-value pairs for expected parameters (substring match) */
|
|
1636
|
+
expectedParams: import_zod23.z.string().min(1)
|
|
1637
|
+
});
|
|
1609
1638
|
var BuildPassedConfigSchema = import_zod23.z.strictObject({
|
|
1610
1639
|
/** Command to run (default: "yarn build") */
|
|
1611
1640
|
command: import_zod23.z.string().optional(),
|
|
@@ -1646,6 +1675,8 @@ var AssertionConfigSchema = import_zod23.z.union([
|
|
|
1646
1675
|
// requires prompt - check first
|
|
1647
1676
|
SkillWasCalledConfigSchema,
|
|
1648
1677
|
// requires skillNames
|
|
1678
|
+
ToolCalledWithParamConfigSchema,
|
|
1679
|
+
// requires toolName + expectedParams, uses strictObject
|
|
1649
1680
|
TimeConfigSchema,
|
|
1650
1681
|
// requires maxDurationMs, uses strictObject
|
|
1651
1682
|
CostConfigSchema,
|
|
@@ -1674,6 +1705,8 @@ function validateAssertionConfig(type, config) {
|
|
|
1674
1705
|
return SkillWasCalledConfigSchema.safeParse(config).success;
|
|
1675
1706
|
case "cost":
|
|
1676
1707
|
return CostConfigSchema.safeParse(config).success;
|
|
1708
|
+
case "tool_called_with_param":
|
|
1709
|
+
return ToolCalledWithParamConfigSchema.safeParse(config).success;
|
|
1677
1710
|
case "build_passed":
|
|
1678
1711
|
return BuildPassedConfigSchema.safeParse(config).success;
|
|
1679
1712
|
case "time_limit":
|
|
@@ -2182,6 +2215,7 @@ var UpdateTemplateInputSchema = CreateTemplateInputSchema.partial();
|
|
|
2182
2215
|
// src/assertion/system-assertions.ts
|
|
2183
2216
|
var SYSTEM_ASSERTION_IDS = {
|
|
2184
2217
|
SKILL_WAS_CALLED: "system:skill_was_called",
|
|
2218
|
+
TOOL_CALLED_WITH_PARAM: "system:tool_called_with_param",
|
|
2185
2219
|
BUILD_PASSED: "system:build_passed",
|
|
2186
2220
|
TIME_LIMIT: "system:time_limit",
|
|
2187
2221
|
COST: "system:cost",
|
|
@@ -2205,6 +2239,26 @@ var SYSTEM_ASSERTIONS = {
|
|
|
2205
2239
|
}
|
|
2206
2240
|
]
|
|
2207
2241
|
},
|
|
2242
|
+
[SYSTEM_ASSERTION_IDS.TOOL_CALLED_WITH_PARAM]: {
|
|
2243
|
+
id: SYSTEM_ASSERTION_IDS.TOOL_CALLED_WITH_PARAM,
|
|
2244
|
+
name: "Tool Called With Param",
|
|
2245
|
+
description: "Check that a tool was called with expected parameters",
|
|
2246
|
+
type: "tool_called_with_param",
|
|
2247
|
+
parameters: [
|
|
2248
|
+
{
|
|
2249
|
+
name: "toolName",
|
|
2250
|
+
label: "Tool Name",
|
|
2251
|
+
type: "string",
|
|
2252
|
+
required: true
|
|
2253
|
+
},
|
|
2254
|
+
{
|
|
2255
|
+
name: "expectedParams",
|
|
2256
|
+
label: "Expected Parameters (JSON, substring match)",
|
|
2257
|
+
type: "string",
|
|
2258
|
+
required: true
|
|
2259
|
+
}
|
|
2260
|
+
]
|
|
2261
|
+
},
|
|
2208
2262
|
[SYSTEM_ASSERTION_IDS.BUILD_PASSED]: {
|
|
2209
2263
|
id: SYSTEM_ASSERTION_IDS.BUILD_PASSED,
|
|
2210
2264
|
name: "Build Passed",
|
|
@@ -2323,6 +2377,7 @@ function getSystemAssertion(id) {
|
|
|
2323
2377
|
0 && (module.exports = {
|
|
2324
2378
|
AVAILABLE_MODEL_IDS,
|
|
2325
2379
|
AVAILABLE_RUN_COMMANDS,
|
|
2380
|
+
AVAILABLE_TOOL_NAMES,
|
|
2326
2381
|
AgentRunCommand,
|
|
2327
2382
|
AgentRunCommandSchema,
|
|
2328
2383
|
AgentSchema,
|
|
@@ -2440,6 +2495,8 @@ function getSystemAssertion(id) {
|
|
|
2440
2495
|
TimeAssertionSchema,
|
|
2441
2496
|
TimeConfigSchema,
|
|
2442
2497
|
TokenUsageSchema,
|
|
2498
|
+
ToolCalledWithParamAssertionSchema,
|
|
2499
|
+
ToolCalledWithParamConfigSchema,
|
|
2443
2500
|
ToolTestSchema,
|
|
2444
2501
|
TriggerMetadataSchema,
|
|
2445
2502
|
TriggerSchema,
|