@wix/evalforge-types 0.38.0 → 0.40.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/index.js +57 -17
- package/build/index.js.map +3 -3
- package/build/index.mjs +54 -17
- package/build/index.mjs.map +3 -3
- package/build/types/assertion/assertion.d.ts +34 -5
- package/build/types/assertion/system-assertions.d.ts +1 -0
- package/build/types/common/index.d.ts +1 -0
- package/build/types/common/tool-names.d.ts +1 -0
- package/build/types/scenario/assertions.d.ts +16 -2
- package/build/types/scenario/test-scenario.d.ts +12 -3
- package/package.json +2 -2
|
@@ -2,6 +2,7 @@ import { z } from 'zod';
|
|
|
2
2
|
/**
|
|
3
3
|
* Assertion types:
|
|
4
4
|
* - skill_was_called: Checks if a specific skill was invoked (deterministic, system-level)
|
|
5
|
+
* - tool_called_with_param: Checks if a tool was called with expected parameters (deterministic, system-level)
|
|
5
6
|
* - build_passed: Runs a command and checks exit code (deterministic, system-level)
|
|
6
7
|
* - time_limit: Checks that scenario completed within a duration threshold (deterministic, system-level)
|
|
7
8
|
* - cost: Checks that scenario LLM cost stays within a USD threshold (deterministic, system-level)
|
|
@@ -9,6 +10,7 @@ import { z } from 'zod';
|
|
|
9
10
|
*/
|
|
10
11
|
export declare const AssertionTypeSchema: z.ZodEnum<{
|
|
11
12
|
skill_was_called: "skill_was_called";
|
|
13
|
+
tool_called_with_param: "tool_called_with_param";
|
|
12
14
|
build_passed: "build_passed";
|
|
13
15
|
cost: "cost";
|
|
14
16
|
llm_judge: "llm_judge";
|
|
@@ -68,6 +70,15 @@ export declare const CostConfigSchema: z.ZodObject<{
|
|
|
68
70
|
maxCostUsd: z.ZodNumber;
|
|
69
71
|
}, z.core.$strict>;
|
|
70
72
|
export type CostConfig = z.infer<typeof CostConfigSchema>;
|
|
73
|
+
/** Configuration for tool_called_with_param assertion type.
|
|
74
|
+
* Uses strictObject to reject objects with unknown keys.
|
|
75
|
+
*/
|
|
76
|
+
export declare const ToolCalledWithParamConfigSchema: z.ZodObject<{
|
|
77
|
+
/** Name of the tool that must have been called */
|
|
78
|
+
toolName: z.ZodString;
|
|
79
|
+
/** JSON string of key-value pairs for expected parameters (substring match) */
|
|
80
|
+
expectedParams: z.ZodString;
|
|
81
|
+
}, z.core.$strict>;
|
|
71
82
|
/**
|
|
72
83
|
* Configuration for build_passed assertion type.
|
|
73
84
|
* Uses strictObject to reject objects with unknown keys (prevents matching LlmJudge configs).
|
|
@@ -94,7 +105,6 @@ export type TimeConfig = z.infer<typeof TimeConfigSchema>;
|
|
|
94
105
|
*/
|
|
95
106
|
export declare const LlmJudgeConfigSchema: z.ZodObject<{
|
|
96
107
|
prompt: z.ZodString;
|
|
97
|
-
systemPrompt: z.ZodOptional<z.ZodString>;
|
|
98
108
|
minScore: z.ZodOptional<z.ZodNumber>;
|
|
99
109
|
model: z.ZodOptional<z.ZodString>;
|
|
100
110
|
maxTokens: z.ZodOptional<z.ZodNumber>;
|
|
@@ -121,7 +131,6 @@ export type LlmJudgeConfig = z.infer<typeof LlmJudgeConfigSchema>;
|
|
|
121
131
|
*/
|
|
122
132
|
export declare const AssertionConfigSchema: z.ZodUnion<readonly [z.ZodObject<{
|
|
123
133
|
prompt: z.ZodString;
|
|
124
|
-
systemPrompt: z.ZodOptional<z.ZodString>;
|
|
125
134
|
minScore: z.ZodOptional<z.ZodNumber>;
|
|
126
135
|
model: z.ZodOptional<z.ZodString>;
|
|
127
136
|
maxTokens: z.ZodOptional<z.ZodNumber>;
|
|
@@ -141,6 +150,11 @@ export declare const AssertionConfigSchema: z.ZodUnion<readonly [z.ZodObject<{
|
|
|
141
150
|
}, z.core.$strip>, z.ZodObject<{
|
|
142
151
|
skillNames: z.ZodArray<z.ZodString>;
|
|
143
152
|
}, z.core.$strip>, z.ZodObject<{
|
|
153
|
+
/** Name of the tool that must have been called */
|
|
154
|
+
toolName: z.ZodString;
|
|
155
|
+
/** JSON string of key-value pairs for expected parameters (substring match) */
|
|
156
|
+
expectedParams: z.ZodString;
|
|
157
|
+
}, z.core.$strict>, z.ZodObject<{
|
|
144
158
|
/** Maximum allowed duration in milliseconds */
|
|
145
159
|
maxDurationMs: z.ZodNumber;
|
|
146
160
|
}, z.core.$strict>, z.ZodObject<{
|
|
@@ -167,6 +181,7 @@ export declare const CustomAssertionSchema: z.ZodObject<{
|
|
|
167
181
|
projectId: z.ZodString;
|
|
168
182
|
type: z.ZodEnum<{
|
|
169
183
|
skill_was_called: "skill_was_called";
|
|
184
|
+
tool_called_with_param: "tool_called_with_param";
|
|
170
185
|
build_passed: "build_passed";
|
|
171
186
|
cost: "cost";
|
|
172
187
|
llm_judge: "llm_judge";
|
|
@@ -174,7 +189,6 @@ export declare const CustomAssertionSchema: z.ZodObject<{
|
|
|
174
189
|
}>;
|
|
175
190
|
config: z.ZodUnion<readonly [z.ZodObject<{
|
|
176
191
|
prompt: z.ZodString;
|
|
177
|
-
systemPrompt: z.ZodOptional<z.ZodString>;
|
|
178
192
|
minScore: z.ZodOptional<z.ZodNumber>;
|
|
179
193
|
model: z.ZodOptional<z.ZodString>;
|
|
180
194
|
maxTokens: z.ZodOptional<z.ZodNumber>;
|
|
@@ -194,6 +208,11 @@ export declare const CustomAssertionSchema: z.ZodObject<{
|
|
|
194
208
|
}, z.core.$strip>, z.ZodObject<{
|
|
195
209
|
skillNames: z.ZodArray<z.ZodString>;
|
|
196
210
|
}, z.core.$strip>, z.ZodObject<{
|
|
211
|
+
/** Name of the tool that must have been called */
|
|
212
|
+
toolName: z.ZodString;
|
|
213
|
+
/** JSON string of key-value pairs for expected parameters (substring match) */
|
|
214
|
+
expectedParams: z.ZodString;
|
|
215
|
+
}, z.core.$strict>, z.ZodObject<{
|
|
197
216
|
/** Maximum allowed duration in milliseconds */
|
|
198
217
|
maxDurationMs: z.ZodNumber;
|
|
199
218
|
}, z.core.$strict>, z.ZodObject<{
|
|
@@ -213,6 +232,7 @@ export type CustomAssertion = z.infer<typeof CustomAssertionSchema>;
|
|
|
213
232
|
export declare const CreateCustomAssertionInputSchema: z.ZodObject<{
|
|
214
233
|
type: z.ZodEnum<{
|
|
215
234
|
skill_was_called: "skill_was_called";
|
|
235
|
+
tool_called_with_param: "tool_called_with_param";
|
|
216
236
|
build_passed: "build_passed";
|
|
217
237
|
cost: "cost";
|
|
218
238
|
llm_judge: "llm_judge";
|
|
@@ -223,7 +243,6 @@ export declare const CreateCustomAssertionInputSchema: z.ZodObject<{
|
|
|
223
243
|
projectId: z.ZodString;
|
|
224
244
|
config: z.ZodUnion<readonly [z.ZodObject<{
|
|
225
245
|
prompt: z.ZodString;
|
|
226
|
-
systemPrompt: z.ZodOptional<z.ZodString>;
|
|
227
246
|
minScore: z.ZodOptional<z.ZodNumber>;
|
|
228
247
|
model: z.ZodOptional<z.ZodString>;
|
|
229
248
|
maxTokens: z.ZodOptional<z.ZodNumber>;
|
|
@@ -243,6 +262,11 @@ export declare const CreateCustomAssertionInputSchema: z.ZodObject<{
|
|
|
243
262
|
}, z.core.$strip>, z.ZodObject<{
|
|
244
263
|
skillNames: z.ZodArray<z.ZodString>;
|
|
245
264
|
}, z.core.$strip>, z.ZodObject<{
|
|
265
|
+
/** Name of the tool that must have been called */
|
|
266
|
+
toolName: z.ZodString;
|
|
267
|
+
/** JSON string of key-value pairs for expected parameters (substring match) */
|
|
268
|
+
expectedParams: z.ZodString;
|
|
269
|
+
}, z.core.$strict>, z.ZodObject<{
|
|
246
270
|
/** Maximum allowed duration in milliseconds */
|
|
247
271
|
maxDurationMs: z.ZodNumber;
|
|
248
272
|
}, z.core.$strict>, z.ZodObject<{
|
|
@@ -262,6 +286,7 @@ export type CreateCustomAssertionInput = z.infer<typeof CreateCustomAssertionInp
|
|
|
262
286
|
export declare const UpdateCustomAssertionInputSchema: z.ZodObject<{
|
|
263
287
|
type: z.ZodOptional<z.ZodEnum<{
|
|
264
288
|
skill_was_called: "skill_was_called";
|
|
289
|
+
tool_called_with_param: "tool_called_with_param";
|
|
265
290
|
build_passed: "build_passed";
|
|
266
291
|
cost: "cost";
|
|
267
292
|
llm_judge: "llm_judge";
|
|
@@ -272,7 +297,6 @@ export declare const UpdateCustomAssertionInputSchema: z.ZodObject<{
|
|
|
272
297
|
projectId: z.ZodOptional<z.ZodString>;
|
|
273
298
|
config: z.ZodOptional<z.ZodUnion<readonly [z.ZodObject<{
|
|
274
299
|
prompt: z.ZodString;
|
|
275
|
-
systemPrompt: z.ZodOptional<z.ZodString>;
|
|
276
300
|
minScore: z.ZodOptional<z.ZodNumber>;
|
|
277
301
|
model: z.ZodOptional<z.ZodString>;
|
|
278
302
|
maxTokens: z.ZodOptional<z.ZodNumber>;
|
|
@@ -292,6 +316,11 @@ export declare const UpdateCustomAssertionInputSchema: z.ZodObject<{
|
|
|
292
316
|
}, z.core.$strip>, z.ZodObject<{
|
|
293
317
|
skillNames: z.ZodArray<z.ZodString>;
|
|
294
318
|
}, z.core.$strip>, z.ZodObject<{
|
|
319
|
+
/** Name of the tool that must have been called */
|
|
320
|
+
toolName: z.ZodString;
|
|
321
|
+
/** JSON string of key-value pairs for expected parameters (substring match) */
|
|
322
|
+
expectedParams: z.ZodString;
|
|
323
|
+
}, z.core.$strict>, z.ZodObject<{
|
|
295
324
|
/** Maximum allowed duration in milliseconds */
|
|
296
325
|
maxDurationMs: z.ZodNumber;
|
|
297
326
|
}, z.core.$strict>, z.ZodObject<{
|
|
@@ -20,6 +20,7 @@ export interface SystemAssertion {
|
|
|
20
20
|
*/
|
|
21
21
|
export declare const SYSTEM_ASSERTION_IDS: {
|
|
22
22
|
readonly SKILL_WAS_CALLED: "system:skill_was_called";
|
|
23
|
+
readonly TOOL_CALLED_WITH_PARAM: "system:tool_called_with_param";
|
|
23
24
|
readonly BUILD_PASSED: "system:build_passed";
|
|
24
25
|
readonly TIME_LIMIT: "system:time_limit";
|
|
25
26
|
readonly COST: "system:cost";
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export declare const AVAILABLE_TOOL_NAMES: readonly ["Bash", "Edit", "Glob", "Grep", "Read", "Skill", "Write"];
|
|
@@ -11,6 +11,18 @@ export declare const SkillWasCalledAssertionSchema: z.ZodObject<{
|
|
|
11
11
|
skillNames: z.ZodArray<z.ZodString>;
|
|
12
12
|
}, z.core.$strip>;
|
|
13
13
|
export type SkillWasCalledAssertion = z.infer<typeof SkillWasCalledAssertionSchema>;
|
|
14
|
+
/**
|
|
15
|
+
* Assertion: a specific tool must have been called with expected parameters.
|
|
16
|
+
* Checked by inspecting the LLM trace for tool calls with matching name and arguments.
|
|
17
|
+
* Each expected param value is matched as a substring against the actual argument value.
|
|
18
|
+
* All expected params must match on the same tool call for the assertion to pass.
|
|
19
|
+
*/
|
|
20
|
+
export declare const ToolCalledWithParamAssertionSchema: z.ZodObject<{
|
|
21
|
+
type: z.ZodLiteral<"tool_called_with_param">;
|
|
22
|
+
toolName: z.ZodString;
|
|
23
|
+
expectedParams: z.ZodString;
|
|
24
|
+
}, z.core.$strip>;
|
|
25
|
+
export type ToolCalledWithParamAssertion = z.infer<typeof ToolCalledWithParamAssertionSchema>;
|
|
14
26
|
/**
|
|
15
27
|
* Assertion: a build command must exit with the expected code (default 0).
|
|
16
28
|
* Runs the command in the scenario working directory.
|
|
@@ -38,7 +50,6 @@ export type CostAssertion = z.infer<typeof CostAssertionSchema>;
|
|
|
38
50
|
export declare const LlmJudgeAssertionSchema: z.ZodObject<{
|
|
39
51
|
type: z.ZodLiteral<"llm_judge">;
|
|
40
52
|
prompt: z.ZodString;
|
|
41
|
-
systemPrompt: z.ZodOptional<z.ZodString>;
|
|
42
53
|
minScore: z.ZodOptional<z.ZodNumber>;
|
|
43
54
|
model: z.ZodOptional<z.ZodString>;
|
|
44
55
|
maxTokens: z.ZodOptional<z.ZodNumber>;
|
|
@@ -62,6 +73,10 @@ export type TimeAssertion = z.infer<typeof TimeAssertionSchema>;
|
|
|
62
73
|
export declare const AssertionSchema: z.ZodUnion<readonly [z.ZodObject<{
|
|
63
74
|
type: z.ZodLiteral<"skill_was_called">;
|
|
64
75
|
skillNames: z.ZodArray<z.ZodString>;
|
|
76
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
77
|
+
type: z.ZodLiteral<"tool_called_with_param">;
|
|
78
|
+
toolName: z.ZodString;
|
|
79
|
+
expectedParams: z.ZodString;
|
|
65
80
|
}, z.core.$strip>, z.ZodObject<{
|
|
66
81
|
type: z.ZodLiteral<"build_passed">;
|
|
67
82
|
command: z.ZodOptional<z.ZodString>;
|
|
@@ -75,7 +90,6 @@ export declare const AssertionSchema: z.ZodUnion<readonly [z.ZodObject<{
|
|
|
75
90
|
}, z.core.$strip>, z.ZodObject<{
|
|
76
91
|
type: z.ZodLiteral<"llm_judge">;
|
|
77
92
|
prompt: z.ZodString;
|
|
78
|
-
systemPrompt: z.ZodOptional<z.ZodString>;
|
|
79
93
|
minScore: z.ZodOptional<z.ZodNumber>;
|
|
80
94
|
model: z.ZodOptional<z.ZodString>;
|
|
81
95
|
maxTokens: z.ZodOptional<z.ZodNumber>;
|
|
@@ -28,6 +28,10 @@ export declare const TestScenarioSchema: z.ZodObject<{
|
|
|
28
28
|
assertions: z.ZodOptional<z.ZodArray<z.ZodUnion<readonly [z.ZodObject<{
|
|
29
29
|
type: z.ZodLiteral<"skill_was_called">;
|
|
30
30
|
skillNames: z.ZodArray<z.ZodString>;
|
|
31
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
32
|
+
type: z.ZodLiteral<"tool_called_with_param">;
|
|
33
|
+
toolName: z.ZodString;
|
|
34
|
+
expectedParams: z.ZodString;
|
|
31
35
|
}, z.core.$strip>, z.ZodObject<{
|
|
32
36
|
type: z.ZodLiteral<"build_passed">;
|
|
33
37
|
command: z.ZodOptional<z.ZodString>;
|
|
@@ -41,7 +45,6 @@ export declare const TestScenarioSchema: z.ZodObject<{
|
|
|
41
45
|
}, z.core.$strip>, z.ZodObject<{
|
|
42
46
|
type: z.ZodLiteral<"llm_judge">;
|
|
43
47
|
prompt: z.ZodString;
|
|
44
|
-
systemPrompt: z.ZodOptional<z.ZodString>;
|
|
45
48
|
minScore: z.ZodOptional<z.ZodNumber>;
|
|
46
49
|
model: z.ZodOptional<z.ZodString>;
|
|
47
50
|
maxTokens: z.ZodOptional<z.ZodNumber>;
|
|
@@ -66,6 +69,10 @@ export declare const CreateTestScenarioInputSchema: z.ZodObject<{
|
|
|
66
69
|
assertions: z.ZodOptional<z.ZodArray<z.ZodUnion<readonly [z.ZodObject<{
|
|
67
70
|
type: z.ZodLiteral<"skill_was_called">;
|
|
68
71
|
skillNames: z.ZodArray<z.ZodString>;
|
|
72
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
73
|
+
type: z.ZodLiteral<"tool_called_with_param">;
|
|
74
|
+
toolName: z.ZodString;
|
|
75
|
+
expectedParams: z.ZodString;
|
|
69
76
|
}, z.core.$strip>, z.ZodObject<{
|
|
70
77
|
type: z.ZodLiteral<"build_passed">;
|
|
71
78
|
command: z.ZodOptional<z.ZodString>;
|
|
@@ -79,7 +86,6 @@ export declare const CreateTestScenarioInputSchema: z.ZodObject<{
|
|
|
79
86
|
}, z.core.$strip>, z.ZodObject<{
|
|
80
87
|
type: z.ZodLiteral<"llm_judge">;
|
|
81
88
|
prompt: z.ZodString;
|
|
82
|
-
systemPrompt: z.ZodOptional<z.ZodString>;
|
|
83
89
|
minScore: z.ZodOptional<z.ZodNumber>;
|
|
84
90
|
model: z.ZodOptional<z.ZodString>;
|
|
85
91
|
maxTokens: z.ZodOptional<z.ZodNumber>;
|
|
@@ -104,6 +110,10 @@ export declare const UpdateTestScenarioInputSchema: z.ZodObject<{
|
|
|
104
110
|
assertions: z.ZodOptional<z.ZodOptional<z.ZodArray<z.ZodUnion<readonly [z.ZodObject<{
|
|
105
111
|
type: z.ZodLiteral<"skill_was_called">;
|
|
106
112
|
skillNames: z.ZodArray<z.ZodString>;
|
|
113
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
114
|
+
type: z.ZodLiteral<"tool_called_with_param">;
|
|
115
|
+
toolName: z.ZodString;
|
|
116
|
+
expectedParams: z.ZodString;
|
|
107
117
|
}, z.core.$strip>, z.ZodObject<{
|
|
108
118
|
type: z.ZodLiteral<"build_passed">;
|
|
109
119
|
command: z.ZodOptional<z.ZodString>;
|
|
@@ -117,7 +127,6 @@ export declare const UpdateTestScenarioInputSchema: z.ZodObject<{
|
|
|
117
127
|
}, z.core.$strip>, z.ZodObject<{
|
|
118
128
|
type: z.ZodLiteral<"llm_judge">;
|
|
119
129
|
prompt: z.ZodString;
|
|
120
|
-
systemPrompt: z.ZodOptional<z.ZodString>;
|
|
121
130
|
minScore: z.ZodOptional<z.ZodNumber>;
|
|
122
131
|
model: z.ZodOptional<z.ZodString>;
|
|
123
132
|
maxTokens: z.ZodOptional<z.ZodNumber>;
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@wix/evalforge-types",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.40.0",
|
|
4
4
|
"description": "Unified types for EvalForge agent evaluation system",
|
|
5
5
|
"files": [
|
|
6
6
|
"build"
|
|
@@ -47,5 +47,5 @@
|
|
|
47
47
|
"artifactId": "evalforge-types"
|
|
48
48
|
}
|
|
49
49
|
},
|
|
50
|
-
"falconPackageHash": "
|
|
50
|
+
"falconPackageHash": "b095aa7d94bbecb74ee45be17f04ba1501f2b9fdb2d57b9ad0c3b37f"
|
|
51
51
|
}
|