@wix/evalforge-types 0.53.0 → 0.54.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/index.js +376 -391
- package/build/index.js.map +4 -4
- package/build/index.mjs +376 -391
- package/build/index.mjs.map +4 -4
- package/build/types/assertion/assertion.d.ts +106 -4
- package/build/types/scenario/index.d.ts +0 -1
- package/build/types/scenario/test-scenario.d.ts +69 -30
- package/package.json +2 -2
- package/build/types/scenario/assertions.d.ts +0 -98
|
@@ -12,9 +12,9 @@ export declare const AssertionTypeSchema: z.ZodEnum<{
|
|
|
12
12
|
skill_was_called: "skill_was_called";
|
|
13
13
|
tool_called_with_param: "tool_called_with_param";
|
|
14
14
|
build_passed: "build_passed";
|
|
15
|
+
time_limit: "time_limit";
|
|
15
16
|
cost: "cost";
|
|
16
17
|
llm_judge: "llm_judge";
|
|
17
|
-
time_limit: "time_limit";
|
|
18
18
|
}>;
|
|
19
19
|
/**
|
|
20
20
|
* Parameter types supported in assertion parameters.
|
|
@@ -78,6 +78,8 @@ export declare const ToolCalledWithParamConfigSchema: z.ZodObject<{
|
|
|
78
78
|
toolName: z.ZodString;
|
|
79
79
|
/** JSON string of key-value pairs for expected parameters (substring match) */
|
|
80
80
|
expectedParams: z.ZodString;
|
|
81
|
+
/** If true, the matching tool call must also have succeeded (step.success === true) */
|
|
82
|
+
requireSuccess: z.ZodOptional<z.ZodBoolean>;
|
|
81
83
|
}, z.core.$strict>;
|
|
82
84
|
/**
|
|
83
85
|
* Configuration for build_passed assertion type.
|
|
@@ -123,6 +125,98 @@ export declare const LlmJudgeConfigSchema: z.ZodObject<{
|
|
|
123
125
|
}, z.core.$strip>>>;
|
|
124
126
|
}, z.core.$strip>;
|
|
125
127
|
export type LlmJudgeConfig = z.infer<typeof LlmJudgeConfigSchema>;
|
|
128
|
+
export declare const SkillWasCalledAssertionSchema: z.ZodObject<{
|
|
129
|
+
skillNames: z.ZodArray<z.ZodString>;
|
|
130
|
+
type: z.ZodLiteral<"skill_was_called">;
|
|
131
|
+
}, z.core.$strip>;
|
|
132
|
+
export type SkillWasCalledAssertion = z.infer<typeof SkillWasCalledAssertionSchema>;
|
|
133
|
+
export declare const ToolCalledWithParamAssertionSchema: z.ZodObject<{
|
|
134
|
+
toolName: z.ZodString;
|
|
135
|
+
expectedParams: z.ZodString;
|
|
136
|
+
requireSuccess: z.ZodOptional<z.ZodBoolean>;
|
|
137
|
+
type: z.ZodLiteral<"tool_called_with_param">;
|
|
138
|
+
}, z.core.$strict>;
|
|
139
|
+
export type ToolCalledWithParamAssertion = z.infer<typeof ToolCalledWithParamAssertionSchema>;
|
|
140
|
+
export declare const BuildPassedAssertionSchema: z.ZodObject<{
|
|
141
|
+
command: z.ZodOptional<z.ZodString>;
|
|
142
|
+
expectedExitCode: z.ZodOptional<z.ZodNumber>;
|
|
143
|
+
type: z.ZodLiteral<"build_passed">;
|
|
144
|
+
}, z.core.$strict>;
|
|
145
|
+
export type BuildPassedAssertion = z.infer<typeof BuildPassedAssertionSchema>;
|
|
146
|
+
export declare const CostAssertionSchema: z.ZodObject<{
|
|
147
|
+
maxCostUsd: z.ZodNumber;
|
|
148
|
+
type: z.ZodLiteral<"cost">;
|
|
149
|
+
}, z.core.$strict>;
|
|
150
|
+
export type CostAssertion = z.infer<typeof CostAssertionSchema>;
|
|
151
|
+
export declare const LlmJudgeAssertionSchema: z.ZodObject<{
|
|
152
|
+
prompt: z.ZodString;
|
|
153
|
+
minScore: z.ZodOptional<z.ZodNumber>;
|
|
154
|
+
model: z.ZodOptional<z.ZodString>;
|
|
155
|
+
maxTokens: z.ZodOptional<z.ZodNumber>;
|
|
156
|
+
temperature: z.ZodOptional<z.ZodNumber>;
|
|
157
|
+
parameters: z.ZodOptional<z.ZodArray<z.ZodObject<{
|
|
158
|
+
name: z.ZodString;
|
|
159
|
+
label: z.ZodString;
|
|
160
|
+
type: z.ZodEnum<{
|
|
161
|
+
string: "string";
|
|
162
|
+
number: "number";
|
|
163
|
+
boolean: "boolean";
|
|
164
|
+
}>;
|
|
165
|
+
required: z.ZodBoolean;
|
|
166
|
+
defaultValue: z.ZodOptional<z.ZodUnion<readonly [z.ZodString, z.ZodNumber, z.ZodBoolean]>>;
|
|
167
|
+
advanced: z.ZodOptional<z.ZodBoolean>;
|
|
168
|
+
}, z.core.$strip>>>;
|
|
169
|
+
type: z.ZodLiteral<"llm_judge">;
|
|
170
|
+
}, z.core.$strip>;
|
|
171
|
+
export type LlmJudgeAssertion = z.infer<typeof LlmJudgeAssertionSchema>;
|
|
172
|
+
export declare const TimeAssertionSchema: z.ZodObject<{
|
|
173
|
+
maxDurationMs: z.ZodNumber;
|
|
174
|
+
type: z.ZodLiteral<"time_limit">;
|
|
175
|
+
}, z.core.$strict>;
|
|
176
|
+
export type TimeAssertion = z.infer<typeof TimeAssertionSchema>;
|
|
177
|
+
/**
|
|
178
|
+
* Union of all inline assertion types.
|
|
179
|
+
* Each assertion has a type literal and type-specific data.
|
|
180
|
+
*/
|
|
181
|
+
export declare const AssertionSchema: z.ZodUnion<readonly [z.ZodObject<{
|
|
182
|
+
skillNames: z.ZodArray<z.ZodString>;
|
|
183
|
+
type: z.ZodLiteral<"skill_was_called">;
|
|
184
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
185
|
+
toolName: z.ZodString;
|
|
186
|
+
expectedParams: z.ZodString;
|
|
187
|
+
requireSuccess: z.ZodOptional<z.ZodBoolean>;
|
|
188
|
+
type: z.ZodLiteral<"tool_called_with_param">;
|
|
189
|
+
}, z.core.$strict>, z.ZodObject<{
|
|
190
|
+
command: z.ZodOptional<z.ZodString>;
|
|
191
|
+
expectedExitCode: z.ZodOptional<z.ZodNumber>;
|
|
192
|
+
type: z.ZodLiteral<"build_passed">;
|
|
193
|
+
}, z.core.$strict>, z.ZodObject<{
|
|
194
|
+
maxDurationMs: z.ZodNumber;
|
|
195
|
+
type: z.ZodLiteral<"time_limit">;
|
|
196
|
+
}, z.core.$strict>, z.ZodObject<{
|
|
197
|
+
maxCostUsd: z.ZodNumber;
|
|
198
|
+
type: z.ZodLiteral<"cost">;
|
|
199
|
+
}, z.core.$strict>, z.ZodObject<{
|
|
200
|
+
prompt: z.ZodString;
|
|
201
|
+
minScore: z.ZodOptional<z.ZodNumber>;
|
|
202
|
+
model: z.ZodOptional<z.ZodString>;
|
|
203
|
+
maxTokens: z.ZodOptional<z.ZodNumber>;
|
|
204
|
+
temperature: z.ZodOptional<z.ZodNumber>;
|
|
205
|
+
parameters: z.ZodOptional<z.ZodArray<z.ZodObject<{
|
|
206
|
+
name: z.ZodString;
|
|
207
|
+
label: z.ZodString;
|
|
208
|
+
type: z.ZodEnum<{
|
|
209
|
+
string: "string";
|
|
210
|
+
number: "number";
|
|
211
|
+
boolean: "boolean";
|
|
212
|
+
}>;
|
|
213
|
+
required: z.ZodBoolean;
|
|
214
|
+
defaultValue: z.ZodOptional<z.ZodUnion<readonly [z.ZodString, z.ZodNumber, z.ZodBoolean]>>;
|
|
215
|
+
advanced: z.ZodOptional<z.ZodBoolean>;
|
|
216
|
+
}, z.core.$strip>>>;
|
|
217
|
+
type: z.ZodLiteral<"llm_judge">;
|
|
218
|
+
}, z.core.$strip>]>;
|
|
219
|
+
export type Assertion = z.infer<typeof AssertionSchema>;
|
|
126
220
|
/**
|
|
127
221
|
* Union of all assertion config types.
|
|
128
222
|
* Order matters: schemas with required fields first, then optional-only schemas.
|
|
@@ -154,6 +248,8 @@ export declare const AssertionConfigSchema: z.ZodUnion<readonly [z.ZodObject<{
|
|
|
154
248
|
toolName: z.ZodString;
|
|
155
249
|
/** JSON string of key-value pairs for expected parameters (substring match) */
|
|
156
250
|
expectedParams: z.ZodString;
|
|
251
|
+
/** If true, the matching tool call must also have succeeded (step.success === true) */
|
|
252
|
+
requireSuccess: z.ZodOptional<z.ZodBoolean>;
|
|
157
253
|
}, z.core.$strict>, z.ZodObject<{
|
|
158
254
|
/** Maximum allowed duration in milliseconds */
|
|
159
255
|
maxDurationMs: z.ZodNumber;
|
|
@@ -183,9 +279,9 @@ export declare const CustomAssertionSchema: z.ZodObject<{
|
|
|
183
279
|
skill_was_called: "skill_was_called";
|
|
184
280
|
tool_called_with_param: "tool_called_with_param";
|
|
185
281
|
build_passed: "build_passed";
|
|
282
|
+
time_limit: "time_limit";
|
|
186
283
|
cost: "cost";
|
|
187
284
|
llm_judge: "llm_judge";
|
|
188
|
-
time_limit: "time_limit";
|
|
189
285
|
}>;
|
|
190
286
|
config: z.ZodUnion<readonly [z.ZodObject<{
|
|
191
287
|
prompt: z.ZodString;
|
|
@@ -212,6 +308,8 @@ export declare const CustomAssertionSchema: z.ZodObject<{
|
|
|
212
308
|
toolName: z.ZodString;
|
|
213
309
|
/** JSON string of key-value pairs for expected parameters (substring match) */
|
|
214
310
|
expectedParams: z.ZodString;
|
|
311
|
+
/** If true, the matching tool call must also have succeeded (step.success === true) */
|
|
312
|
+
requireSuccess: z.ZodOptional<z.ZodBoolean>;
|
|
215
313
|
}, z.core.$strict>, z.ZodObject<{
|
|
216
314
|
/** Maximum allowed duration in milliseconds */
|
|
217
315
|
maxDurationMs: z.ZodNumber;
|
|
@@ -234,9 +332,9 @@ export declare const CreateCustomAssertionInputSchema: z.ZodObject<{
|
|
|
234
332
|
skill_was_called: "skill_was_called";
|
|
235
333
|
tool_called_with_param: "tool_called_with_param";
|
|
236
334
|
build_passed: "build_passed";
|
|
335
|
+
time_limit: "time_limit";
|
|
237
336
|
cost: "cost";
|
|
238
337
|
llm_judge: "llm_judge";
|
|
239
|
-
time_limit: "time_limit";
|
|
240
338
|
}>;
|
|
241
339
|
name: z.ZodString;
|
|
242
340
|
description: z.ZodString;
|
|
@@ -266,6 +364,8 @@ export declare const CreateCustomAssertionInputSchema: z.ZodObject<{
|
|
|
266
364
|
toolName: z.ZodString;
|
|
267
365
|
/** JSON string of key-value pairs for expected parameters (substring match) */
|
|
268
366
|
expectedParams: z.ZodString;
|
|
367
|
+
/** If true, the matching tool call must also have succeeded (step.success === true) */
|
|
368
|
+
requireSuccess: z.ZodOptional<z.ZodBoolean>;
|
|
269
369
|
}, z.core.$strict>, z.ZodObject<{
|
|
270
370
|
/** Maximum allowed duration in milliseconds */
|
|
271
371
|
maxDurationMs: z.ZodNumber;
|
|
@@ -288,9 +388,9 @@ export declare const UpdateCustomAssertionInputSchema: z.ZodObject<{
|
|
|
288
388
|
skill_was_called: "skill_was_called";
|
|
289
389
|
tool_called_with_param: "tool_called_with_param";
|
|
290
390
|
build_passed: "build_passed";
|
|
391
|
+
time_limit: "time_limit";
|
|
291
392
|
cost: "cost";
|
|
292
393
|
llm_judge: "llm_judge";
|
|
293
|
-
time_limit: "time_limit";
|
|
294
394
|
}>>;
|
|
295
395
|
name: z.ZodOptional<z.ZodString>;
|
|
296
396
|
description: z.ZodOptional<z.ZodString>;
|
|
@@ -320,6 +420,8 @@ export declare const UpdateCustomAssertionInputSchema: z.ZodObject<{
|
|
|
320
420
|
toolName: z.ZodString;
|
|
321
421
|
/** JSON string of key-value pairs for expected parameters (substring match) */
|
|
322
422
|
expectedParams: z.ZodString;
|
|
423
|
+
/** If true, the matching tool call must also have succeeded (step.success === true) */
|
|
424
|
+
requireSuccess: z.ZodOptional<z.ZodBoolean>;
|
|
323
425
|
}, z.core.$strict>, z.ZodObject<{
|
|
324
426
|
/** Maximum allowed duration in milliseconds */
|
|
325
427
|
maxDurationMs: z.ZodNumber;
|
|
@@ -26,29 +26,42 @@ export declare const TestScenarioSchema: z.ZodObject<{
|
|
|
26
26
|
triggerPrompt: z.ZodString;
|
|
27
27
|
templateId: z.ZodOptional<z.ZodNullable<z.ZodString>>;
|
|
28
28
|
assertions: z.ZodOptional<z.ZodArray<z.ZodUnion<readonly [z.ZodObject<{
|
|
29
|
-
type: z.ZodLiteral<"skill_was_called">;
|
|
30
29
|
skillNames: z.ZodArray<z.ZodString>;
|
|
30
|
+
type: z.ZodLiteral<"skill_was_called">;
|
|
31
31
|
}, z.core.$strip>, z.ZodObject<{
|
|
32
|
-
type: z.ZodLiteral<"tool_called_with_param">;
|
|
33
32
|
toolName: z.ZodString;
|
|
34
33
|
expectedParams: z.ZodString;
|
|
35
|
-
|
|
36
|
-
type: z.ZodLiteral<"
|
|
34
|
+
requireSuccess: z.ZodOptional<z.ZodBoolean>;
|
|
35
|
+
type: z.ZodLiteral<"tool_called_with_param">;
|
|
36
|
+
}, z.core.$strict>, z.ZodObject<{
|
|
37
37
|
command: z.ZodOptional<z.ZodString>;
|
|
38
38
|
expectedExitCode: z.ZodOptional<z.ZodNumber>;
|
|
39
|
-
|
|
40
|
-
|
|
39
|
+
type: z.ZodLiteral<"build_passed">;
|
|
40
|
+
}, z.core.$strict>, z.ZodObject<{
|
|
41
41
|
maxDurationMs: z.ZodNumber;
|
|
42
|
-
|
|
43
|
-
|
|
42
|
+
type: z.ZodLiteral<"time_limit">;
|
|
43
|
+
}, z.core.$strict>, z.ZodObject<{
|
|
44
44
|
maxCostUsd: z.ZodNumber;
|
|
45
|
-
|
|
46
|
-
|
|
45
|
+
type: z.ZodLiteral<"cost">;
|
|
46
|
+
}, z.core.$strict>, z.ZodObject<{
|
|
47
47
|
prompt: z.ZodString;
|
|
48
48
|
minScore: z.ZodOptional<z.ZodNumber>;
|
|
49
49
|
model: z.ZodOptional<z.ZodString>;
|
|
50
50
|
maxTokens: z.ZodOptional<z.ZodNumber>;
|
|
51
51
|
temperature: z.ZodOptional<z.ZodNumber>;
|
|
52
|
+
parameters: z.ZodOptional<z.ZodArray<z.ZodObject<{
|
|
53
|
+
name: z.ZodString;
|
|
54
|
+
label: z.ZodString;
|
|
55
|
+
type: z.ZodEnum<{
|
|
56
|
+
string: "string";
|
|
57
|
+
number: "number";
|
|
58
|
+
boolean: "boolean";
|
|
59
|
+
}>;
|
|
60
|
+
required: z.ZodBoolean;
|
|
61
|
+
defaultValue: z.ZodOptional<z.ZodUnion<readonly [z.ZodString, z.ZodNumber, z.ZodBoolean]>>;
|
|
62
|
+
advanced: z.ZodOptional<z.ZodBoolean>;
|
|
63
|
+
}, z.core.$strip>>>;
|
|
64
|
+
type: z.ZodLiteral<"llm_judge">;
|
|
52
65
|
}, z.core.$strip>]>>>;
|
|
53
66
|
assertionIds: z.ZodOptional<z.ZodArray<z.ZodString>>;
|
|
54
67
|
assertionLinks: z.ZodOptional<z.ZodArray<z.ZodObject<{
|
|
@@ -68,29 +81,42 @@ export declare const CreateTestScenarioInputSchema: z.ZodObject<{
|
|
|
68
81
|
templateId: z.ZodOptional<z.ZodNullable<z.ZodString>>;
|
|
69
82
|
triggerPrompt: z.ZodString;
|
|
70
83
|
assertions: z.ZodOptional<z.ZodArray<z.ZodUnion<readonly [z.ZodObject<{
|
|
71
|
-
type: z.ZodLiteral<"skill_was_called">;
|
|
72
84
|
skillNames: z.ZodArray<z.ZodString>;
|
|
85
|
+
type: z.ZodLiteral<"skill_was_called">;
|
|
73
86
|
}, z.core.$strip>, z.ZodObject<{
|
|
74
|
-
type: z.ZodLiteral<"tool_called_with_param">;
|
|
75
87
|
toolName: z.ZodString;
|
|
76
88
|
expectedParams: z.ZodString;
|
|
77
|
-
|
|
78
|
-
type: z.ZodLiteral<"
|
|
89
|
+
requireSuccess: z.ZodOptional<z.ZodBoolean>;
|
|
90
|
+
type: z.ZodLiteral<"tool_called_with_param">;
|
|
91
|
+
}, z.core.$strict>, z.ZodObject<{
|
|
79
92
|
command: z.ZodOptional<z.ZodString>;
|
|
80
93
|
expectedExitCode: z.ZodOptional<z.ZodNumber>;
|
|
81
|
-
|
|
82
|
-
|
|
94
|
+
type: z.ZodLiteral<"build_passed">;
|
|
95
|
+
}, z.core.$strict>, z.ZodObject<{
|
|
83
96
|
maxDurationMs: z.ZodNumber;
|
|
84
|
-
|
|
85
|
-
|
|
97
|
+
type: z.ZodLiteral<"time_limit">;
|
|
98
|
+
}, z.core.$strict>, z.ZodObject<{
|
|
86
99
|
maxCostUsd: z.ZodNumber;
|
|
87
|
-
|
|
88
|
-
|
|
100
|
+
type: z.ZodLiteral<"cost">;
|
|
101
|
+
}, z.core.$strict>, z.ZodObject<{
|
|
89
102
|
prompt: z.ZodString;
|
|
90
103
|
minScore: z.ZodOptional<z.ZodNumber>;
|
|
91
104
|
model: z.ZodOptional<z.ZodString>;
|
|
92
105
|
maxTokens: z.ZodOptional<z.ZodNumber>;
|
|
93
106
|
temperature: z.ZodOptional<z.ZodNumber>;
|
|
107
|
+
parameters: z.ZodOptional<z.ZodArray<z.ZodObject<{
|
|
108
|
+
name: z.ZodString;
|
|
109
|
+
label: z.ZodString;
|
|
110
|
+
type: z.ZodEnum<{
|
|
111
|
+
string: "string";
|
|
112
|
+
number: "number";
|
|
113
|
+
boolean: "boolean";
|
|
114
|
+
}>;
|
|
115
|
+
required: z.ZodBoolean;
|
|
116
|
+
defaultValue: z.ZodOptional<z.ZodUnion<readonly [z.ZodString, z.ZodNumber, z.ZodBoolean]>>;
|
|
117
|
+
advanced: z.ZodOptional<z.ZodBoolean>;
|
|
118
|
+
}, z.core.$strip>>>;
|
|
119
|
+
type: z.ZodLiteral<"llm_judge">;
|
|
94
120
|
}, z.core.$strip>]>>>;
|
|
95
121
|
assertionIds: z.ZodOptional<z.ZodArray<z.ZodString>>;
|
|
96
122
|
assertionLinks: z.ZodOptional<z.ZodArray<z.ZodObject<{
|
|
@@ -110,29 +136,42 @@ export declare const UpdateTestScenarioInputSchema: z.ZodObject<{
|
|
|
110
136
|
templateId: z.ZodOptional<z.ZodOptional<z.ZodNullable<z.ZodString>>>;
|
|
111
137
|
triggerPrompt: z.ZodOptional<z.ZodString>;
|
|
112
138
|
assertions: z.ZodOptional<z.ZodOptional<z.ZodArray<z.ZodUnion<readonly [z.ZodObject<{
|
|
113
|
-
type: z.ZodLiteral<"skill_was_called">;
|
|
114
139
|
skillNames: z.ZodArray<z.ZodString>;
|
|
140
|
+
type: z.ZodLiteral<"skill_was_called">;
|
|
115
141
|
}, z.core.$strip>, z.ZodObject<{
|
|
116
|
-
type: z.ZodLiteral<"tool_called_with_param">;
|
|
117
142
|
toolName: z.ZodString;
|
|
118
143
|
expectedParams: z.ZodString;
|
|
119
|
-
|
|
120
|
-
type: z.ZodLiteral<"
|
|
144
|
+
requireSuccess: z.ZodOptional<z.ZodBoolean>;
|
|
145
|
+
type: z.ZodLiteral<"tool_called_with_param">;
|
|
146
|
+
}, z.core.$strict>, z.ZodObject<{
|
|
121
147
|
command: z.ZodOptional<z.ZodString>;
|
|
122
148
|
expectedExitCode: z.ZodOptional<z.ZodNumber>;
|
|
123
|
-
|
|
124
|
-
|
|
149
|
+
type: z.ZodLiteral<"build_passed">;
|
|
150
|
+
}, z.core.$strict>, z.ZodObject<{
|
|
125
151
|
maxDurationMs: z.ZodNumber;
|
|
126
|
-
|
|
127
|
-
|
|
152
|
+
type: z.ZodLiteral<"time_limit">;
|
|
153
|
+
}, z.core.$strict>, z.ZodObject<{
|
|
128
154
|
maxCostUsd: z.ZodNumber;
|
|
129
|
-
|
|
130
|
-
|
|
155
|
+
type: z.ZodLiteral<"cost">;
|
|
156
|
+
}, z.core.$strict>, z.ZodObject<{
|
|
131
157
|
prompt: z.ZodString;
|
|
132
158
|
minScore: z.ZodOptional<z.ZodNumber>;
|
|
133
159
|
model: z.ZodOptional<z.ZodString>;
|
|
134
160
|
maxTokens: z.ZodOptional<z.ZodNumber>;
|
|
135
161
|
temperature: z.ZodOptional<z.ZodNumber>;
|
|
162
|
+
parameters: z.ZodOptional<z.ZodArray<z.ZodObject<{
|
|
163
|
+
name: z.ZodString;
|
|
164
|
+
label: z.ZodString;
|
|
165
|
+
type: z.ZodEnum<{
|
|
166
|
+
string: "string";
|
|
167
|
+
number: "number";
|
|
168
|
+
boolean: "boolean";
|
|
169
|
+
}>;
|
|
170
|
+
required: z.ZodBoolean;
|
|
171
|
+
defaultValue: z.ZodOptional<z.ZodUnion<readonly [z.ZodString, z.ZodNumber, z.ZodBoolean]>>;
|
|
172
|
+
advanced: z.ZodOptional<z.ZodBoolean>;
|
|
173
|
+
}, z.core.$strip>>>;
|
|
174
|
+
type: z.ZodLiteral<"llm_judge">;
|
|
136
175
|
}, z.core.$strip>]>>>>;
|
|
137
176
|
assertionIds: z.ZodOptional<z.ZodOptional<z.ZodArray<z.ZodString>>>;
|
|
138
177
|
assertionLinks: z.ZodOptional<z.ZodOptional<z.ZodArray<z.ZodObject<{
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@wix/evalforge-types",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.54.0",
|
|
4
4
|
"description": "Unified types for EvalForge agent evaluation system",
|
|
5
5
|
"files": [
|
|
6
6
|
"build"
|
|
@@ -47,5 +47,5 @@
|
|
|
47
47
|
"artifactId": "evalforge-types"
|
|
48
48
|
}
|
|
49
49
|
},
|
|
50
|
-
"falconPackageHash": "
|
|
50
|
+
"falconPackageHash": "2c3aab7cd9e412ed3299c7a914bc04abfee06a97965cc30794055f8e"
|
|
51
51
|
}
|
|
@@ -1,98 +0,0 @@
|
|
|
1
|
-
import { z } from 'zod';
|
|
2
|
-
/**
|
|
3
|
-
* Assertion: the agent must have invoked one or more skills during the run.
|
|
4
|
-
* Checked by inspecting the LLM trace for "Skill" tool uses with the given skills.
|
|
5
|
-
* When multiple skills are in one assertion, they are treated as a group (1 assertion).
|
|
6
|
-
* Each skill in the group must have been called for the assertion to pass.
|
|
7
|
-
* To check skills independently, add them as separate assertions.
|
|
8
|
-
*/
|
|
9
|
-
export declare const SkillWasCalledAssertionSchema: z.ZodObject<{
|
|
10
|
-
type: z.ZodLiteral<"skill_was_called">;
|
|
11
|
-
skillNames: z.ZodArray<z.ZodString>;
|
|
12
|
-
}, z.core.$strip>;
|
|
13
|
-
export type SkillWasCalledAssertion = z.infer<typeof SkillWasCalledAssertionSchema>;
|
|
14
|
-
/**
|
|
15
|
-
* Assertion: a specific tool must have been called with expected parameters.
|
|
16
|
-
* Checked by inspecting the LLM trace for tool calls with matching name and arguments.
|
|
17
|
-
* Each expected param value is matched as a substring against the actual argument value.
|
|
18
|
-
* All expected params must match on the same tool call for the assertion to pass.
|
|
19
|
-
*/
|
|
20
|
-
export declare const ToolCalledWithParamAssertionSchema: z.ZodObject<{
|
|
21
|
-
type: z.ZodLiteral<"tool_called_with_param">;
|
|
22
|
-
toolName: z.ZodString;
|
|
23
|
-
expectedParams: z.ZodString;
|
|
24
|
-
}, z.core.$strip>;
|
|
25
|
-
export type ToolCalledWithParamAssertion = z.infer<typeof ToolCalledWithParamAssertionSchema>;
|
|
26
|
-
/**
|
|
27
|
-
* Assertion: a build command must exit with the expected code (default 0).
|
|
28
|
-
* Runs the command in the scenario working directory.
|
|
29
|
-
*/
|
|
30
|
-
export declare const BuildPassedAssertionSchema: z.ZodObject<{
|
|
31
|
-
type: z.ZodLiteral<"build_passed">;
|
|
32
|
-
command: z.ZodOptional<z.ZodString>;
|
|
33
|
-
expectedExitCode: z.ZodOptional<z.ZodNumber>;
|
|
34
|
-
}, z.core.$strip>;
|
|
35
|
-
export type BuildPassedAssertion = z.infer<typeof BuildPassedAssertionSchema>;
|
|
36
|
-
/**
|
|
37
|
-
* Assertion: the scenario LLM execution cost must stay within a USD threshold.
|
|
38
|
-
* Checked by reading llmTrace.summary.totalCostUsd.
|
|
39
|
-
*/
|
|
40
|
-
export declare const CostAssertionSchema: z.ZodObject<{
|
|
41
|
-
type: z.ZodLiteral<"cost">;
|
|
42
|
-
maxCostUsd: z.ZodNumber;
|
|
43
|
-
}, z.core.$strip>;
|
|
44
|
-
export type CostAssertion = z.infer<typeof CostAssertionSchema>;
|
|
45
|
-
/**
|
|
46
|
-
* Assertion: an LLM judges the scenario output (score 0-10).
|
|
47
|
-
* Prompt can use {{output}}, {{cwd}}, {{changedFiles}}, {{trace}}.
|
|
48
|
-
* Passes if judge score >= minScore.
|
|
49
|
-
*/
|
|
50
|
-
export declare const LlmJudgeAssertionSchema: z.ZodObject<{
|
|
51
|
-
type: z.ZodLiteral<"llm_judge">;
|
|
52
|
-
prompt: z.ZodString;
|
|
53
|
-
minScore: z.ZodOptional<z.ZodNumber>;
|
|
54
|
-
model: z.ZodOptional<z.ZodString>;
|
|
55
|
-
maxTokens: z.ZodOptional<z.ZodNumber>;
|
|
56
|
-
temperature: z.ZodOptional<z.ZodNumber>;
|
|
57
|
-
}, z.core.$strip>;
|
|
58
|
-
export type LlmJudgeAssertion = z.infer<typeof LlmJudgeAssertionSchema>;
|
|
59
|
-
/**
|
|
60
|
-
* Assertion: scenario must complete within a maximum duration.
|
|
61
|
-
* Deterministic check against the scenario execution time.
|
|
62
|
-
*/
|
|
63
|
-
export declare const TimeAssertionSchema: z.ZodObject<{
|
|
64
|
-
type: z.ZodLiteral<"time_limit">;
|
|
65
|
-
maxDurationMs: z.ZodNumber;
|
|
66
|
-
}, z.core.$strip>;
|
|
67
|
-
export type TimeAssertion = z.infer<typeof TimeAssertionSchema>;
|
|
68
|
-
/**
|
|
69
|
-
* Union of all assertion types (per scenario).
|
|
70
|
-
* Each assertion has a type and type-specific data.
|
|
71
|
-
* Uses z.union (not z.discriminatedUnion) for Zod v4 compatibility when used as array element.
|
|
72
|
-
*/
|
|
73
|
-
export declare const AssertionSchema: z.ZodUnion<readonly [z.ZodObject<{
|
|
74
|
-
type: z.ZodLiteral<"skill_was_called">;
|
|
75
|
-
skillNames: z.ZodArray<z.ZodString>;
|
|
76
|
-
}, z.core.$strip>, z.ZodObject<{
|
|
77
|
-
type: z.ZodLiteral<"tool_called_with_param">;
|
|
78
|
-
toolName: z.ZodString;
|
|
79
|
-
expectedParams: z.ZodString;
|
|
80
|
-
}, z.core.$strip>, z.ZodObject<{
|
|
81
|
-
type: z.ZodLiteral<"build_passed">;
|
|
82
|
-
command: z.ZodOptional<z.ZodString>;
|
|
83
|
-
expectedExitCode: z.ZodOptional<z.ZodNumber>;
|
|
84
|
-
}, z.core.$strip>, z.ZodObject<{
|
|
85
|
-
type: z.ZodLiteral<"time_limit">;
|
|
86
|
-
maxDurationMs: z.ZodNumber;
|
|
87
|
-
}, z.core.$strip>, z.ZodObject<{
|
|
88
|
-
type: z.ZodLiteral<"cost">;
|
|
89
|
-
maxCostUsd: z.ZodNumber;
|
|
90
|
-
}, z.core.$strip>, z.ZodObject<{
|
|
91
|
-
type: z.ZodLiteral<"llm_judge">;
|
|
92
|
-
prompt: z.ZodString;
|
|
93
|
-
minScore: z.ZodOptional<z.ZodNumber>;
|
|
94
|
-
model: z.ZodOptional<z.ZodString>;
|
|
95
|
-
maxTokens: z.ZodOptional<z.ZodNumber>;
|
|
96
|
-
temperature: z.ZodOptional<z.ZodNumber>;
|
|
97
|
-
}, z.core.$strip>]>;
|
|
98
|
-
export type Assertion = z.infer<typeof AssertionSchema>;
|