@wix/evalforge-types 0.38.0 → 0.39.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/index.js +57 -0
- package/build/index.js.map +3 -3
- package/build/index.mjs +54 -0
- package/build/index.mjs.map +3 -3
- package/build/types/assertion/assertion.d.ts +34 -0
- package/build/types/assertion/system-assertions.d.ts +1 -0
- package/build/types/common/index.d.ts +1 -0
- package/build/types/common/tool-names.d.ts +1 -0
- package/build/types/scenario/assertions.d.ts +16 -0
- package/build/types/scenario/test-scenario.d.ts +12 -0
- package/package.json +2 -2
package/build/index.mjs
CHANGED
|
@@ -991,6 +991,17 @@ var RuleInputBaseSchema = RuleSchema.omit({
|
|
|
991
991
|
var CreateRuleInputSchema = RuleInputBaseSchema;
|
|
992
992
|
var UpdateRuleInputSchema = RuleInputBaseSchema.partial();
|
|
993
993
|
|
|
994
|
+
// src/common/tool-names.ts
|
|
995
|
+
var AVAILABLE_TOOL_NAMES = [
|
|
996
|
+
"Bash",
|
|
997
|
+
"Edit",
|
|
998
|
+
"Glob",
|
|
999
|
+
"Grep",
|
|
1000
|
+
"Read",
|
|
1001
|
+
"Skill",
|
|
1002
|
+
"Write"
|
|
1003
|
+
];
|
|
1004
|
+
|
|
994
1005
|
// src/target/target.ts
|
|
995
1006
|
var TargetSchema = TenantEntitySchema.extend({
|
|
996
1007
|
// Base for all testable entities
|
|
@@ -1329,6 +1340,13 @@ var SkillWasCalledAssertionSchema = z21.object({
|
|
|
1329
1340
|
/** Names of the skills that must have been called (matched against trace Skill tool args) */
|
|
1330
1341
|
skillNames: z21.array(z21.string().min(1)).min(1)
|
|
1331
1342
|
});
|
|
1343
|
+
var ToolCalledWithParamAssertionSchema = z21.object({
|
|
1344
|
+
type: z21.literal("tool_called_with_param"),
|
|
1345
|
+
/** Name of the tool that must have been called */
|
|
1346
|
+
toolName: z21.string().min(1),
|
|
1347
|
+
/** JSON string of key-value pairs for expected parameters (substring match) */
|
|
1348
|
+
expectedParams: z21.string().min(1)
|
|
1349
|
+
});
|
|
1332
1350
|
var BuildPassedAssertionSchema = z21.object({
|
|
1333
1351
|
type: z21.literal("build_passed"),
|
|
1334
1352
|
/** Command to run (default: "yarn build") */
|
|
@@ -1361,6 +1379,7 @@ var TimeAssertionSchema = z21.object({
|
|
|
1361
1379
|
});
|
|
1362
1380
|
var AssertionSchema = z21.union([
|
|
1363
1381
|
SkillWasCalledAssertionSchema,
|
|
1382
|
+
ToolCalledWithParamAssertionSchema,
|
|
1364
1383
|
BuildPassedAssertionSchema,
|
|
1365
1384
|
TimeAssertionSchema,
|
|
1366
1385
|
CostAssertionSchema,
|
|
@@ -1408,6 +1427,7 @@ import { z as z24 } from "zod";
|
|
|
1408
1427
|
import { z as z23 } from "zod";
|
|
1409
1428
|
var AssertionTypeSchema = z23.enum([
|
|
1410
1429
|
"skill_was_called",
|
|
1430
|
+
"tool_called_with_param",
|
|
1411
1431
|
"build_passed",
|
|
1412
1432
|
"time_limit",
|
|
1413
1433
|
"cost",
|
|
@@ -1449,6 +1469,12 @@ var CostConfigSchema = z23.strictObject({
|
|
|
1449
1469
|
/** Maximum allowed cost in USD */
|
|
1450
1470
|
maxCostUsd: z23.number().positive()
|
|
1451
1471
|
});
|
|
1472
|
+
var ToolCalledWithParamConfigSchema = z23.strictObject({
|
|
1473
|
+
/** Name of the tool that must have been called */
|
|
1474
|
+
toolName: z23.string().min(1),
|
|
1475
|
+
/** JSON string of key-value pairs for expected parameters (substring match) */
|
|
1476
|
+
expectedParams: z23.string().min(1)
|
|
1477
|
+
});
|
|
1452
1478
|
var BuildPassedConfigSchema = z23.strictObject({
|
|
1453
1479
|
/** Command to run (default: "yarn build") */
|
|
1454
1480
|
command: z23.string().optional(),
|
|
@@ -1489,6 +1515,8 @@ var AssertionConfigSchema = z23.union([
|
|
|
1489
1515
|
// requires prompt - check first
|
|
1490
1516
|
SkillWasCalledConfigSchema,
|
|
1491
1517
|
// requires skillNames
|
|
1518
|
+
ToolCalledWithParamConfigSchema,
|
|
1519
|
+
// requires toolName + expectedParams, uses strictObject
|
|
1492
1520
|
TimeConfigSchema,
|
|
1493
1521
|
// requires maxDurationMs, uses strictObject
|
|
1494
1522
|
CostConfigSchema,
|
|
@@ -1517,6 +1545,8 @@ function validateAssertionConfig(type, config) {
|
|
|
1517
1545
|
return SkillWasCalledConfigSchema.safeParse(config).success;
|
|
1518
1546
|
case "cost":
|
|
1519
1547
|
return CostConfigSchema.safeParse(config).success;
|
|
1548
|
+
case "tool_called_with_param":
|
|
1549
|
+
return ToolCalledWithParamConfigSchema.safeParse(config).success;
|
|
1520
1550
|
case "build_passed":
|
|
1521
1551
|
return BuildPassedConfigSchema.safeParse(config).success;
|
|
1522
1552
|
case "time_limit":
|
|
@@ -2025,6 +2055,7 @@ var UpdateTemplateInputSchema = CreateTemplateInputSchema.partial();
|
|
|
2025
2055
|
// src/assertion/system-assertions.ts
|
|
2026
2056
|
var SYSTEM_ASSERTION_IDS = {
|
|
2027
2057
|
SKILL_WAS_CALLED: "system:skill_was_called",
|
|
2058
|
+
TOOL_CALLED_WITH_PARAM: "system:tool_called_with_param",
|
|
2028
2059
|
BUILD_PASSED: "system:build_passed",
|
|
2029
2060
|
TIME_LIMIT: "system:time_limit",
|
|
2030
2061
|
COST: "system:cost",
|
|
@@ -2048,6 +2079,26 @@ var SYSTEM_ASSERTIONS = {
|
|
|
2048
2079
|
}
|
|
2049
2080
|
]
|
|
2050
2081
|
},
|
|
2082
|
+
[SYSTEM_ASSERTION_IDS.TOOL_CALLED_WITH_PARAM]: {
|
|
2083
|
+
id: SYSTEM_ASSERTION_IDS.TOOL_CALLED_WITH_PARAM,
|
|
2084
|
+
name: "Tool Called With Param",
|
|
2085
|
+
description: "Check that a tool was called with expected parameters",
|
|
2086
|
+
type: "tool_called_with_param",
|
|
2087
|
+
parameters: [
|
|
2088
|
+
{
|
|
2089
|
+
name: "toolName",
|
|
2090
|
+
label: "Tool Name",
|
|
2091
|
+
type: "string",
|
|
2092
|
+
required: true
|
|
2093
|
+
},
|
|
2094
|
+
{
|
|
2095
|
+
name: "expectedParams",
|
|
2096
|
+
label: "Expected Parameters (JSON, substring match)",
|
|
2097
|
+
type: "string",
|
|
2098
|
+
required: true
|
|
2099
|
+
}
|
|
2100
|
+
]
|
|
2101
|
+
},
|
|
2051
2102
|
[SYSTEM_ASSERTION_IDS.BUILD_PASSED]: {
|
|
2052
2103
|
id: SYSTEM_ASSERTION_IDS.BUILD_PASSED,
|
|
2053
2104
|
name: "Build Passed",
|
|
@@ -2166,6 +2217,7 @@ var export_ClaudeModel = import_types.ClaudeModel;
|
|
|
2166
2217
|
export {
|
|
2167
2218
|
AVAILABLE_MODEL_IDS,
|
|
2168
2219
|
AVAILABLE_RUN_COMMANDS,
|
|
2220
|
+
AVAILABLE_TOOL_NAMES,
|
|
2169
2221
|
AgentRunCommand,
|
|
2170
2222
|
AgentRunCommandSchema,
|
|
2171
2223
|
AgentSchema,
|
|
@@ -2283,6 +2335,8 @@ export {
|
|
|
2283
2335
|
TimeAssertionSchema,
|
|
2284
2336
|
TimeConfigSchema,
|
|
2285
2337
|
TokenUsageSchema,
|
|
2338
|
+
ToolCalledWithParamAssertionSchema,
|
|
2339
|
+
ToolCalledWithParamConfigSchema,
|
|
2286
2340
|
ToolTestSchema,
|
|
2287
2341
|
TriggerMetadataSchema,
|
|
2288
2342
|
TriggerSchema,
|