@wix/evalforge-types 0.53.0 → 0.55.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/index.js +431 -1310
- package/build/index.js.map +4 -4
- package/build/index.mjs +429 -1310
- package/build/index.mjs.map +4 -4
- package/build/types/assertion/assertion.d.ts +106 -4
- package/build/types/common/models.d.ts +30 -22
- package/build/types/evaluation/eval-result.d.ts +4 -0
- package/build/types/evaluation/eval-run.d.ts +5 -0
- package/build/types/evaluation/metrics.d.ts +4 -0
- package/build/types/scenario/index.d.ts +0 -1
- package/build/types/scenario/test-scenario.d.ts +69 -30
- package/package.json +2 -3
- package/build/types/scenario/assertions.d.ts +0 -98
|
@@ -12,9 +12,9 @@ export declare const AssertionTypeSchema: z.ZodEnum<{
|
|
|
12
12
|
skill_was_called: "skill_was_called";
|
|
13
13
|
tool_called_with_param: "tool_called_with_param";
|
|
14
14
|
build_passed: "build_passed";
|
|
15
|
+
time_limit: "time_limit";
|
|
15
16
|
cost: "cost";
|
|
16
17
|
llm_judge: "llm_judge";
|
|
17
|
-
time_limit: "time_limit";
|
|
18
18
|
}>;
|
|
19
19
|
/**
|
|
20
20
|
* Parameter types supported in assertion parameters.
|
|
@@ -78,6 +78,8 @@ export declare const ToolCalledWithParamConfigSchema: z.ZodObject<{
|
|
|
78
78
|
toolName: z.ZodString;
|
|
79
79
|
/** JSON string of key-value pairs for expected parameters (substring match) */
|
|
80
80
|
expectedParams: z.ZodString;
|
|
81
|
+
/** If true, the matching tool call must also have succeeded (step.success === true) */
|
|
82
|
+
requireSuccess: z.ZodOptional<z.ZodBoolean>;
|
|
81
83
|
}, z.core.$strict>;
|
|
82
84
|
/**
|
|
83
85
|
* Configuration for build_passed assertion type.
|
|
@@ -123,6 +125,98 @@ export declare const LlmJudgeConfigSchema: z.ZodObject<{
|
|
|
123
125
|
}, z.core.$strip>>>;
|
|
124
126
|
}, z.core.$strip>;
|
|
125
127
|
export type LlmJudgeConfig = z.infer<typeof LlmJudgeConfigSchema>;
|
|
128
|
+
export declare const SkillWasCalledAssertionSchema: z.ZodObject<{
|
|
129
|
+
skillNames: z.ZodArray<z.ZodString>;
|
|
130
|
+
type: z.ZodLiteral<"skill_was_called">;
|
|
131
|
+
}, z.core.$strip>;
|
|
132
|
+
export type SkillWasCalledAssertion = z.infer<typeof SkillWasCalledAssertionSchema>;
|
|
133
|
+
export declare const ToolCalledWithParamAssertionSchema: z.ZodObject<{
|
|
134
|
+
toolName: z.ZodString;
|
|
135
|
+
expectedParams: z.ZodString;
|
|
136
|
+
requireSuccess: z.ZodOptional<z.ZodBoolean>;
|
|
137
|
+
type: z.ZodLiteral<"tool_called_with_param">;
|
|
138
|
+
}, z.core.$strict>;
|
|
139
|
+
export type ToolCalledWithParamAssertion = z.infer<typeof ToolCalledWithParamAssertionSchema>;
|
|
140
|
+
export declare const BuildPassedAssertionSchema: z.ZodObject<{
|
|
141
|
+
command: z.ZodOptional<z.ZodString>;
|
|
142
|
+
expectedExitCode: z.ZodOptional<z.ZodNumber>;
|
|
143
|
+
type: z.ZodLiteral<"build_passed">;
|
|
144
|
+
}, z.core.$strict>;
|
|
145
|
+
export type BuildPassedAssertion = z.infer<typeof BuildPassedAssertionSchema>;
|
|
146
|
+
export declare const CostAssertionSchema: z.ZodObject<{
|
|
147
|
+
maxCostUsd: z.ZodNumber;
|
|
148
|
+
type: z.ZodLiteral<"cost">;
|
|
149
|
+
}, z.core.$strict>;
|
|
150
|
+
export type CostAssertion = z.infer<typeof CostAssertionSchema>;
|
|
151
|
+
export declare const LlmJudgeAssertionSchema: z.ZodObject<{
|
|
152
|
+
prompt: z.ZodString;
|
|
153
|
+
minScore: z.ZodOptional<z.ZodNumber>;
|
|
154
|
+
model: z.ZodOptional<z.ZodString>;
|
|
155
|
+
maxTokens: z.ZodOptional<z.ZodNumber>;
|
|
156
|
+
temperature: z.ZodOptional<z.ZodNumber>;
|
|
157
|
+
parameters: z.ZodOptional<z.ZodArray<z.ZodObject<{
|
|
158
|
+
name: z.ZodString;
|
|
159
|
+
label: z.ZodString;
|
|
160
|
+
type: z.ZodEnum<{
|
|
161
|
+
string: "string";
|
|
162
|
+
number: "number";
|
|
163
|
+
boolean: "boolean";
|
|
164
|
+
}>;
|
|
165
|
+
required: z.ZodBoolean;
|
|
166
|
+
defaultValue: z.ZodOptional<z.ZodUnion<readonly [z.ZodString, z.ZodNumber, z.ZodBoolean]>>;
|
|
167
|
+
advanced: z.ZodOptional<z.ZodBoolean>;
|
|
168
|
+
}, z.core.$strip>>>;
|
|
169
|
+
type: z.ZodLiteral<"llm_judge">;
|
|
170
|
+
}, z.core.$strip>;
|
|
171
|
+
export type LlmJudgeAssertion = z.infer<typeof LlmJudgeAssertionSchema>;
|
|
172
|
+
export declare const TimeAssertionSchema: z.ZodObject<{
|
|
173
|
+
maxDurationMs: z.ZodNumber;
|
|
174
|
+
type: z.ZodLiteral<"time_limit">;
|
|
175
|
+
}, z.core.$strict>;
|
|
176
|
+
export type TimeAssertion = z.infer<typeof TimeAssertionSchema>;
|
|
177
|
+
/**
|
|
178
|
+
* Union of all inline assertion types.
|
|
179
|
+
* Each assertion has a type literal and type-specific data.
|
|
180
|
+
*/
|
|
181
|
+
export declare const AssertionSchema: z.ZodUnion<readonly [z.ZodObject<{
|
|
182
|
+
skillNames: z.ZodArray<z.ZodString>;
|
|
183
|
+
type: z.ZodLiteral<"skill_was_called">;
|
|
184
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
185
|
+
toolName: z.ZodString;
|
|
186
|
+
expectedParams: z.ZodString;
|
|
187
|
+
requireSuccess: z.ZodOptional<z.ZodBoolean>;
|
|
188
|
+
type: z.ZodLiteral<"tool_called_with_param">;
|
|
189
|
+
}, z.core.$strict>, z.ZodObject<{
|
|
190
|
+
command: z.ZodOptional<z.ZodString>;
|
|
191
|
+
expectedExitCode: z.ZodOptional<z.ZodNumber>;
|
|
192
|
+
type: z.ZodLiteral<"build_passed">;
|
|
193
|
+
}, z.core.$strict>, z.ZodObject<{
|
|
194
|
+
maxDurationMs: z.ZodNumber;
|
|
195
|
+
type: z.ZodLiteral<"time_limit">;
|
|
196
|
+
}, z.core.$strict>, z.ZodObject<{
|
|
197
|
+
maxCostUsd: z.ZodNumber;
|
|
198
|
+
type: z.ZodLiteral<"cost">;
|
|
199
|
+
}, z.core.$strict>, z.ZodObject<{
|
|
200
|
+
prompt: z.ZodString;
|
|
201
|
+
minScore: z.ZodOptional<z.ZodNumber>;
|
|
202
|
+
model: z.ZodOptional<z.ZodString>;
|
|
203
|
+
maxTokens: z.ZodOptional<z.ZodNumber>;
|
|
204
|
+
temperature: z.ZodOptional<z.ZodNumber>;
|
|
205
|
+
parameters: z.ZodOptional<z.ZodArray<z.ZodObject<{
|
|
206
|
+
name: z.ZodString;
|
|
207
|
+
label: z.ZodString;
|
|
208
|
+
type: z.ZodEnum<{
|
|
209
|
+
string: "string";
|
|
210
|
+
number: "number";
|
|
211
|
+
boolean: "boolean";
|
|
212
|
+
}>;
|
|
213
|
+
required: z.ZodBoolean;
|
|
214
|
+
defaultValue: z.ZodOptional<z.ZodUnion<readonly [z.ZodString, z.ZodNumber, z.ZodBoolean]>>;
|
|
215
|
+
advanced: z.ZodOptional<z.ZodBoolean>;
|
|
216
|
+
}, z.core.$strip>>>;
|
|
217
|
+
type: z.ZodLiteral<"llm_judge">;
|
|
218
|
+
}, z.core.$strip>]>;
|
|
219
|
+
export type Assertion = z.infer<typeof AssertionSchema>;
|
|
126
220
|
/**
|
|
127
221
|
* Union of all assertion config types.
|
|
128
222
|
* Order matters: schemas with required fields first, then optional-only schemas.
|
|
@@ -154,6 +248,8 @@ export declare const AssertionConfigSchema: z.ZodUnion<readonly [z.ZodObject<{
|
|
|
154
248
|
toolName: z.ZodString;
|
|
155
249
|
/** JSON string of key-value pairs for expected parameters (substring match) */
|
|
156
250
|
expectedParams: z.ZodString;
|
|
251
|
+
/** If true, the matching tool call must also have succeeded (step.success === true) */
|
|
252
|
+
requireSuccess: z.ZodOptional<z.ZodBoolean>;
|
|
157
253
|
}, z.core.$strict>, z.ZodObject<{
|
|
158
254
|
/** Maximum allowed duration in milliseconds */
|
|
159
255
|
maxDurationMs: z.ZodNumber;
|
|
@@ -183,9 +279,9 @@ export declare const CustomAssertionSchema: z.ZodObject<{
|
|
|
183
279
|
skill_was_called: "skill_was_called";
|
|
184
280
|
tool_called_with_param: "tool_called_with_param";
|
|
185
281
|
build_passed: "build_passed";
|
|
282
|
+
time_limit: "time_limit";
|
|
186
283
|
cost: "cost";
|
|
187
284
|
llm_judge: "llm_judge";
|
|
188
|
-
time_limit: "time_limit";
|
|
189
285
|
}>;
|
|
190
286
|
config: z.ZodUnion<readonly [z.ZodObject<{
|
|
191
287
|
prompt: z.ZodString;
|
|
@@ -212,6 +308,8 @@ export declare const CustomAssertionSchema: z.ZodObject<{
|
|
|
212
308
|
toolName: z.ZodString;
|
|
213
309
|
/** JSON string of key-value pairs for expected parameters (substring match) */
|
|
214
310
|
expectedParams: z.ZodString;
|
|
311
|
+
/** If true, the matching tool call must also have succeeded (step.success === true) */
|
|
312
|
+
requireSuccess: z.ZodOptional<z.ZodBoolean>;
|
|
215
313
|
}, z.core.$strict>, z.ZodObject<{
|
|
216
314
|
/** Maximum allowed duration in milliseconds */
|
|
217
315
|
maxDurationMs: z.ZodNumber;
|
|
@@ -234,9 +332,9 @@ export declare const CreateCustomAssertionInputSchema: z.ZodObject<{
|
|
|
234
332
|
skill_was_called: "skill_was_called";
|
|
235
333
|
tool_called_with_param: "tool_called_with_param";
|
|
236
334
|
build_passed: "build_passed";
|
|
335
|
+
time_limit: "time_limit";
|
|
237
336
|
cost: "cost";
|
|
238
337
|
llm_judge: "llm_judge";
|
|
239
|
-
time_limit: "time_limit";
|
|
240
338
|
}>;
|
|
241
339
|
name: z.ZodString;
|
|
242
340
|
description: z.ZodString;
|
|
@@ -266,6 +364,8 @@ export declare const CreateCustomAssertionInputSchema: z.ZodObject<{
|
|
|
266
364
|
toolName: z.ZodString;
|
|
267
365
|
/** JSON string of key-value pairs for expected parameters (substring match) */
|
|
268
366
|
expectedParams: z.ZodString;
|
|
367
|
+
/** If true, the matching tool call must also have succeeded (step.success === true) */
|
|
368
|
+
requireSuccess: z.ZodOptional<z.ZodBoolean>;
|
|
269
369
|
}, z.core.$strict>, z.ZodObject<{
|
|
270
370
|
/** Maximum allowed duration in milliseconds */
|
|
271
371
|
maxDurationMs: z.ZodNumber;
|
|
@@ -288,9 +388,9 @@ export declare const UpdateCustomAssertionInputSchema: z.ZodObject<{
|
|
|
288
388
|
skill_was_called: "skill_was_called";
|
|
289
389
|
tool_called_with_param: "tool_called_with_param";
|
|
290
390
|
build_passed: "build_passed";
|
|
391
|
+
time_limit: "time_limit";
|
|
291
392
|
cost: "cost";
|
|
292
393
|
llm_judge: "llm_judge";
|
|
293
|
-
time_limit: "time_limit";
|
|
294
394
|
}>>;
|
|
295
395
|
name: z.ZodOptional<z.ZodString>;
|
|
296
396
|
description: z.ZodOptional<z.ZodString>;
|
|
@@ -320,6 +420,8 @@ export declare const UpdateCustomAssertionInputSchema: z.ZodObject<{
|
|
|
320
420
|
toolName: z.ZodString;
|
|
321
421
|
/** JSON string of key-value pairs for expected parameters (substring match) */
|
|
322
422
|
expectedParams: z.ZodString;
|
|
423
|
+
/** If true, the matching tool call must also have succeeded (step.success === true) */
|
|
424
|
+
requireSuccess: z.ZodOptional<z.ZodBoolean>;
|
|
323
425
|
}, z.core.$strict>, z.ZodObject<{
|
|
324
426
|
/** Maximum allowed duration in milliseconds */
|
|
325
427
|
maxDurationMs: z.ZodNumber;
|
|
@@ -1,34 +1,32 @@
|
|
|
1
1
|
import { z } from 'zod';
|
|
2
|
-
import { AnthropicModel as _AnthropicModel, Model as _OpenAIModel } from '@wix/ambassador-ds-wix-ai-gateway-v1-prompt/types';
|
|
3
2
|
/**
|
|
4
3
|
* Curated model sets — only models suitable for code generation.
|
|
5
|
-
*
|
|
6
|
-
* completion-only, and non-text models.
|
|
4
|
+
* Uses official/canonical model IDs (not internal gateway enums).
|
|
7
5
|
* Update these explicitly when new models become available.
|
|
8
6
|
*/
|
|
9
7
|
export declare const ClaudeModel: {
|
|
10
|
-
readonly CLAUDE_4_SONNET_1_0:
|
|
11
|
-
readonly CLAUDE_4_OPUS_1_0:
|
|
12
|
-
readonly CLAUDE_4_5_SONNET_1_0:
|
|
13
|
-
readonly CLAUDE_4_5_HAIKU_1_0:
|
|
14
|
-
readonly CLAUDE_4_5_OPUS_1_0:
|
|
15
|
-
readonly CLAUDE_4_6_SONNET_1_0:
|
|
16
|
-
readonly CLAUDE_4_6_OPUS_1_0:
|
|
8
|
+
readonly CLAUDE_4_SONNET_1_0: "claude-sonnet-4";
|
|
9
|
+
readonly CLAUDE_4_OPUS_1_0: "claude-opus-4";
|
|
10
|
+
readonly CLAUDE_4_5_SONNET_1_0: "claude-sonnet-4-5";
|
|
11
|
+
readonly CLAUDE_4_5_HAIKU_1_0: "claude-haiku-4-5";
|
|
12
|
+
readonly CLAUDE_4_5_OPUS_1_0: "claude-opus-4-5";
|
|
13
|
+
readonly CLAUDE_4_6_SONNET_1_0: "claude-sonnet-4-6";
|
|
14
|
+
readonly CLAUDE_4_6_OPUS_1_0: "claude-opus-4-6";
|
|
17
15
|
};
|
|
18
16
|
export type ClaudeModel = (typeof ClaudeModel)[keyof typeof ClaudeModel];
|
|
19
17
|
export declare const OpenAIModel: {
|
|
20
|
-
readonly GPT_4O_MINI_2024_07_18:
|
|
21
|
-
readonly GPT_4O_2024_11_20:
|
|
22
|
-
readonly O1_2024_12_17:
|
|
23
|
-
readonly O3_MINI_2025_01_31:
|
|
24
|
-
readonly GPT_4_1_2025_04_14:
|
|
25
|
-
readonly GPT_4_1_MINI_2025_04_14:
|
|
26
|
-
readonly GPT_4_1_NANO_2025_04_14:
|
|
27
|
-
readonly O3_2025_04_16:
|
|
28
|
-
readonly O4_MINI_2025_04_16:
|
|
29
|
-
readonly GPT_5_2025_08_07:
|
|
30
|
-
readonly GPT_5_MINI_2025_08_07:
|
|
31
|
-
readonly GPT_5_NANO_2025_08_07:
|
|
18
|
+
readonly GPT_4O_MINI_2024_07_18: "gpt-4o-mini";
|
|
19
|
+
readonly GPT_4O_2024_11_20: "gpt-4o";
|
|
20
|
+
readonly O1_2024_12_17: "o1";
|
|
21
|
+
readonly O3_MINI_2025_01_31: "o3-mini";
|
|
22
|
+
readonly GPT_4_1_2025_04_14: "gpt-4.1";
|
|
23
|
+
readonly GPT_4_1_MINI_2025_04_14: "gpt-4.1-mini";
|
|
24
|
+
readonly GPT_4_1_NANO_2025_04_14: "gpt-4.1-nano";
|
|
25
|
+
readonly O3_2025_04_16: "o3";
|
|
26
|
+
readonly O4_MINI_2025_04_16: "o4-mini";
|
|
27
|
+
readonly GPT_5_2025_08_07: "gpt-5";
|
|
28
|
+
readonly GPT_5_MINI_2025_08_07: "gpt-5-mini";
|
|
29
|
+
readonly GPT_5_NANO_2025_08_07: "gpt-5-nano";
|
|
32
30
|
};
|
|
33
31
|
export type OpenAIModel = (typeof OpenAIModel)[keyof typeof OpenAIModel];
|
|
34
32
|
export declare const AVAILABLE_CLAUDE_MODEL_IDS: ClaudeModel[];
|
|
@@ -50,6 +48,16 @@ export declare const ALL_AVAILABLE_MODEL_IDS: string[];
|
|
|
50
48
|
export declare const AnyModelSchema: z.ZodEnum<{
|
|
51
49
|
[x: string]: string;
|
|
52
50
|
}>;
|
|
51
|
+
/**
|
|
52
|
+
* Maps legacy Wix AI Gateway enum strings to official model IDs.
|
|
53
|
+
* Used for backward compatibility with stored data (DB, eval traces).
|
|
54
|
+
*/
|
|
55
|
+
export declare const LEGACY_MODEL_ID_MAP: Record<string, string>;
|
|
56
|
+
/**
|
|
57
|
+
* Normalize a model ID: translates legacy gateway enum strings to
|
|
58
|
+
* official model IDs. Returns the input unchanged if already canonical.
|
|
59
|
+
*/
|
|
60
|
+
export declare function normalizeModelId(modelId: string): string;
|
|
53
61
|
export declare const ModelConfigSchema: z.ZodObject<{
|
|
54
62
|
model: z.ZodEnum<{
|
|
55
63
|
[x: string]: string;
|
|
@@ -45,6 +45,7 @@ export declare const AssertionResultSchema: z.ZodObject<{
|
|
|
45
45
|
outputPreview: z.ZodOptional<z.ZodString>;
|
|
46
46
|
success: z.ZodBoolean;
|
|
47
47
|
error: z.ZodOptional<z.ZodString>;
|
|
48
|
+
turnIndex: z.ZodOptional<z.ZodNumber>;
|
|
48
49
|
}, z.core.$strip>>>;
|
|
49
50
|
}, z.core.$strip>;
|
|
50
51
|
export type AssertionResult = z.infer<typeof AssertionResultSchema>;
|
|
@@ -98,6 +99,7 @@ export declare const EvalRunResultSchema: z.ZodObject<{
|
|
|
98
99
|
outputPreview: z.ZodOptional<z.ZodString>;
|
|
99
100
|
success: z.ZodBoolean;
|
|
100
101
|
error: z.ZodOptional<z.ZodString>;
|
|
102
|
+
turnIndex: z.ZodOptional<z.ZodNumber>;
|
|
101
103
|
}, z.core.$strip>>>;
|
|
102
104
|
}, z.core.$strip>>;
|
|
103
105
|
metrics: z.ZodOptional<z.ZodObject<{
|
|
@@ -167,9 +169,11 @@ export declare const EvalRunResultSchema: z.ZodObject<{
|
|
|
167
169
|
outputPreview: z.ZodOptional<z.ZodString>;
|
|
168
170
|
success: z.ZodBoolean;
|
|
169
171
|
error: z.ZodOptional<z.ZodString>;
|
|
172
|
+
turnIndex: z.ZodOptional<z.ZodNumber>;
|
|
170
173
|
}, z.core.$strip>>;
|
|
171
174
|
summary: z.ZodObject<{
|
|
172
175
|
totalSteps: z.ZodNumber;
|
|
176
|
+
totalTurns: z.ZodOptional<z.ZodNumber>;
|
|
173
177
|
totalDurationMs: z.ZodNumber;
|
|
174
178
|
totalTokens: z.ZodObject<{
|
|
175
179
|
prompt: z.ZodNumber;
|
|
@@ -287,6 +287,7 @@ export declare const EvalRunSchema: z.ZodObject<{
|
|
|
287
287
|
outputPreview: z.ZodOptional<z.ZodString>;
|
|
288
288
|
success: z.ZodBoolean;
|
|
289
289
|
error: z.ZodOptional<z.ZodString>;
|
|
290
|
+
turnIndex: z.ZodOptional<z.ZodNumber>;
|
|
290
291
|
}, z.core.$strip>>>;
|
|
291
292
|
}, z.core.$strip>>;
|
|
292
293
|
metrics: z.ZodOptional<z.ZodObject<{
|
|
@@ -356,9 +357,11 @@ export declare const EvalRunSchema: z.ZodObject<{
|
|
|
356
357
|
outputPreview: z.ZodOptional<z.ZodString>;
|
|
357
358
|
success: z.ZodBoolean;
|
|
358
359
|
error: z.ZodOptional<z.ZodString>;
|
|
360
|
+
turnIndex: z.ZodOptional<z.ZodNumber>;
|
|
359
361
|
}, z.core.$strip>>;
|
|
360
362
|
summary: z.ZodObject<{
|
|
361
363
|
totalSteps: z.ZodNumber;
|
|
364
|
+
totalTurns: z.ZodOptional<z.ZodNumber>;
|
|
362
365
|
totalDurationMs: z.ZodNumber;
|
|
363
366
|
totalTokens: z.ZodObject<{
|
|
364
367
|
prompt: z.ZodNumber;
|
|
@@ -468,6 +471,7 @@ export declare const EvalRunSchema: z.ZodObject<{
|
|
|
468
471
|
}, z.core.$strip>>>;
|
|
469
472
|
llmTraceSummary: z.ZodOptional<z.ZodObject<{
|
|
470
473
|
totalSteps: z.ZodNumber;
|
|
474
|
+
totalTurns: z.ZodOptional<z.ZodNumber>;
|
|
471
475
|
totalDurationMs: z.ZodNumber;
|
|
472
476
|
totalTokens: z.ZodObject<{
|
|
473
477
|
prompt: z.ZodNumber;
|
|
@@ -597,6 +601,7 @@ export declare const CreateEvalRunInputSchema: z.ZodObject<{
|
|
|
597
601
|
}, z.core.$strip>>>;
|
|
598
602
|
llmTraceSummary: z.ZodOptional<z.ZodObject<{
|
|
599
603
|
totalSteps: z.ZodNumber;
|
|
604
|
+
totalTurns: z.ZodOptional<z.ZodNumber>;
|
|
600
605
|
totalDurationMs: z.ZodNumber;
|
|
601
606
|
totalTokens: z.ZodObject<{
|
|
602
607
|
prompt: z.ZodNumber;
|
|
@@ -65,6 +65,7 @@ export declare const LLMTraceStepSchema: z.ZodObject<{
|
|
|
65
65
|
outputPreview: z.ZodOptional<z.ZodString>;
|
|
66
66
|
success: z.ZodBoolean;
|
|
67
67
|
error: z.ZodOptional<z.ZodString>;
|
|
68
|
+
turnIndex: z.ZodOptional<z.ZodNumber>;
|
|
68
69
|
}, z.core.$strip>;
|
|
69
70
|
export type LLMTraceStep = z.infer<typeof LLMTraceStepSchema>;
|
|
70
71
|
/**
|
|
@@ -82,6 +83,7 @@ export type LLMBreakdownStats = z.infer<typeof LLMBreakdownStatsSchema>;
|
|
|
82
83
|
*/
|
|
83
84
|
export declare const LLMTraceSummarySchema: z.ZodObject<{
|
|
84
85
|
totalSteps: z.ZodNumber;
|
|
86
|
+
totalTurns: z.ZodOptional<z.ZodNumber>;
|
|
85
87
|
totalDurationMs: z.ZodNumber;
|
|
86
88
|
totalTokens: z.ZodObject<{
|
|
87
89
|
prompt: z.ZodNumber;
|
|
@@ -129,9 +131,11 @@ export declare const LLMTraceSchema: z.ZodObject<{
|
|
|
129
131
|
outputPreview: z.ZodOptional<z.ZodString>;
|
|
130
132
|
success: z.ZodBoolean;
|
|
131
133
|
error: z.ZodOptional<z.ZodString>;
|
|
134
|
+
turnIndex: z.ZodOptional<z.ZodNumber>;
|
|
132
135
|
}, z.core.$strip>>;
|
|
133
136
|
summary: z.ZodObject<{
|
|
134
137
|
totalSteps: z.ZodNumber;
|
|
138
|
+
totalTurns: z.ZodOptional<z.ZodNumber>;
|
|
135
139
|
totalDurationMs: z.ZodNumber;
|
|
136
140
|
totalTokens: z.ZodObject<{
|
|
137
141
|
prompt: z.ZodNumber;
|
|
@@ -26,29 +26,42 @@ export declare const TestScenarioSchema: z.ZodObject<{
|
|
|
26
26
|
triggerPrompt: z.ZodString;
|
|
27
27
|
templateId: z.ZodOptional<z.ZodNullable<z.ZodString>>;
|
|
28
28
|
assertions: z.ZodOptional<z.ZodArray<z.ZodUnion<readonly [z.ZodObject<{
|
|
29
|
-
type: z.ZodLiteral<"skill_was_called">;
|
|
30
29
|
skillNames: z.ZodArray<z.ZodString>;
|
|
30
|
+
type: z.ZodLiteral<"skill_was_called">;
|
|
31
31
|
}, z.core.$strip>, z.ZodObject<{
|
|
32
|
-
type: z.ZodLiteral<"tool_called_with_param">;
|
|
33
32
|
toolName: z.ZodString;
|
|
34
33
|
expectedParams: z.ZodString;
|
|
35
|
-
|
|
36
|
-
type: z.ZodLiteral<"
|
|
34
|
+
requireSuccess: z.ZodOptional<z.ZodBoolean>;
|
|
35
|
+
type: z.ZodLiteral<"tool_called_with_param">;
|
|
36
|
+
}, z.core.$strict>, z.ZodObject<{
|
|
37
37
|
command: z.ZodOptional<z.ZodString>;
|
|
38
38
|
expectedExitCode: z.ZodOptional<z.ZodNumber>;
|
|
39
|
-
|
|
40
|
-
|
|
39
|
+
type: z.ZodLiteral<"build_passed">;
|
|
40
|
+
}, z.core.$strict>, z.ZodObject<{
|
|
41
41
|
maxDurationMs: z.ZodNumber;
|
|
42
|
-
|
|
43
|
-
|
|
42
|
+
type: z.ZodLiteral<"time_limit">;
|
|
43
|
+
}, z.core.$strict>, z.ZodObject<{
|
|
44
44
|
maxCostUsd: z.ZodNumber;
|
|
45
|
-
|
|
46
|
-
|
|
45
|
+
type: z.ZodLiteral<"cost">;
|
|
46
|
+
}, z.core.$strict>, z.ZodObject<{
|
|
47
47
|
prompt: z.ZodString;
|
|
48
48
|
minScore: z.ZodOptional<z.ZodNumber>;
|
|
49
49
|
model: z.ZodOptional<z.ZodString>;
|
|
50
50
|
maxTokens: z.ZodOptional<z.ZodNumber>;
|
|
51
51
|
temperature: z.ZodOptional<z.ZodNumber>;
|
|
52
|
+
parameters: z.ZodOptional<z.ZodArray<z.ZodObject<{
|
|
53
|
+
name: z.ZodString;
|
|
54
|
+
label: z.ZodString;
|
|
55
|
+
type: z.ZodEnum<{
|
|
56
|
+
string: "string";
|
|
57
|
+
number: "number";
|
|
58
|
+
boolean: "boolean";
|
|
59
|
+
}>;
|
|
60
|
+
required: z.ZodBoolean;
|
|
61
|
+
defaultValue: z.ZodOptional<z.ZodUnion<readonly [z.ZodString, z.ZodNumber, z.ZodBoolean]>>;
|
|
62
|
+
advanced: z.ZodOptional<z.ZodBoolean>;
|
|
63
|
+
}, z.core.$strip>>>;
|
|
64
|
+
type: z.ZodLiteral<"llm_judge">;
|
|
52
65
|
}, z.core.$strip>]>>>;
|
|
53
66
|
assertionIds: z.ZodOptional<z.ZodArray<z.ZodString>>;
|
|
54
67
|
assertionLinks: z.ZodOptional<z.ZodArray<z.ZodObject<{
|
|
@@ -68,29 +81,42 @@ export declare const CreateTestScenarioInputSchema: z.ZodObject<{
|
|
|
68
81
|
templateId: z.ZodOptional<z.ZodNullable<z.ZodString>>;
|
|
69
82
|
triggerPrompt: z.ZodString;
|
|
70
83
|
assertions: z.ZodOptional<z.ZodArray<z.ZodUnion<readonly [z.ZodObject<{
|
|
71
|
-
type: z.ZodLiteral<"skill_was_called">;
|
|
72
84
|
skillNames: z.ZodArray<z.ZodString>;
|
|
85
|
+
type: z.ZodLiteral<"skill_was_called">;
|
|
73
86
|
}, z.core.$strip>, z.ZodObject<{
|
|
74
|
-
type: z.ZodLiteral<"tool_called_with_param">;
|
|
75
87
|
toolName: z.ZodString;
|
|
76
88
|
expectedParams: z.ZodString;
|
|
77
|
-
|
|
78
|
-
type: z.ZodLiteral<"
|
|
89
|
+
requireSuccess: z.ZodOptional<z.ZodBoolean>;
|
|
90
|
+
type: z.ZodLiteral<"tool_called_with_param">;
|
|
91
|
+
}, z.core.$strict>, z.ZodObject<{
|
|
79
92
|
command: z.ZodOptional<z.ZodString>;
|
|
80
93
|
expectedExitCode: z.ZodOptional<z.ZodNumber>;
|
|
81
|
-
|
|
82
|
-
|
|
94
|
+
type: z.ZodLiteral<"build_passed">;
|
|
95
|
+
}, z.core.$strict>, z.ZodObject<{
|
|
83
96
|
maxDurationMs: z.ZodNumber;
|
|
84
|
-
|
|
85
|
-
|
|
97
|
+
type: z.ZodLiteral<"time_limit">;
|
|
98
|
+
}, z.core.$strict>, z.ZodObject<{
|
|
86
99
|
maxCostUsd: z.ZodNumber;
|
|
87
|
-
|
|
88
|
-
|
|
100
|
+
type: z.ZodLiteral<"cost">;
|
|
101
|
+
}, z.core.$strict>, z.ZodObject<{
|
|
89
102
|
prompt: z.ZodString;
|
|
90
103
|
minScore: z.ZodOptional<z.ZodNumber>;
|
|
91
104
|
model: z.ZodOptional<z.ZodString>;
|
|
92
105
|
maxTokens: z.ZodOptional<z.ZodNumber>;
|
|
93
106
|
temperature: z.ZodOptional<z.ZodNumber>;
|
|
107
|
+
parameters: z.ZodOptional<z.ZodArray<z.ZodObject<{
|
|
108
|
+
name: z.ZodString;
|
|
109
|
+
label: z.ZodString;
|
|
110
|
+
type: z.ZodEnum<{
|
|
111
|
+
string: "string";
|
|
112
|
+
number: "number";
|
|
113
|
+
boolean: "boolean";
|
|
114
|
+
}>;
|
|
115
|
+
required: z.ZodBoolean;
|
|
116
|
+
defaultValue: z.ZodOptional<z.ZodUnion<readonly [z.ZodString, z.ZodNumber, z.ZodBoolean]>>;
|
|
117
|
+
advanced: z.ZodOptional<z.ZodBoolean>;
|
|
118
|
+
}, z.core.$strip>>>;
|
|
119
|
+
type: z.ZodLiteral<"llm_judge">;
|
|
94
120
|
}, z.core.$strip>]>>>;
|
|
95
121
|
assertionIds: z.ZodOptional<z.ZodArray<z.ZodString>>;
|
|
96
122
|
assertionLinks: z.ZodOptional<z.ZodArray<z.ZodObject<{
|
|
@@ -110,29 +136,42 @@ export declare const UpdateTestScenarioInputSchema: z.ZodObject<{
|
|
|
110
136
|
templateId: z.ZodOptional<z.ZodOptional<z.ZodNullable<z.ZodString>>>;
|
|
111
137
|
triggerPrompt: z.ZodOptional<z.ZodString>;
|
|
112
138
|
assertions: z.ZodOptional<z.ZodOptional<z.ZodArray<z.ZodUnion<readonly [z.ZodObject<{
|
|
113
|
-
type: z.ZodLiteral<"skill_was_called">;
|
|
114
139
|
skillNames: z.ZodArray<z.ZodString>;
|
|
140
|
+
type: z.ZodLiteral<"skill_was_called">;
|
|
115
141
|
}, z.core.$strip>, z.ZodObject<{
|
|
116
|
-
type: z.ZodLiteral<"tool_called_with_param">;
|
|
117
142
|
toolName: z.ZodString;
|
|
118
143
|
expectedParams: z.ZodString;
|
|
119
|
-
|
|
120
|
-
type: z.ZodLiteral<"
|
|
144
|
+
requireSuccess: z.ZodOptional<z.ZodBoolean>;
|
|
145
|
+
type: z.ZodLiteral<"tool_called_with_param">;
|
|
146
|
+
}, z.core.$strict>, z.ZodObject<{
|
|
121
147
|
command: z.ZodOptional<z.ZodString>;
|
|
122
148
|
expectedExitCode: z.ZodOptional<z.ZodNumber>;
|
|
123
|
-
|
|
124
|
-
|
|
149
|
+
type: z.ZodLiteral<"build_passed">;
|
|
150
|
+
}, z.core.$strict>, z.ZodObject<{
|
|
125
151
|
maxDurationMs: z.ZodNumber;
|
|
126
|
-
|
|
127
|
-
|
|
152
|
+
type: z.ZodLiteral<"time_limit">;
|
|
153
|
+
}, z.core.$strict>, z.ZodObject<{
|
|
128
154
|
maxCostUsd: z.ZodNumber;
|
|
129
|
-
|
|
130
|
-
|
|
155
|
+
type: z.ZodLiteral<"cost">;
|
|
156
|
+
}, z.core.$strict>, z.ZodObject<{
|
|
131
157
|
prompt: z.ZodString;
|
|
132
158
|
minScore: z.ZodOptional<z.ZodNumber>;
|
|
133
159
|
model: z.ZodOptional<z.ZodString>;
|
|
134
160
|
maxTokens: z.ZodOptional<z.ZodNumber>;
|
|
135
161
|
temperature: z.ZodOptional<z.ZodNumber>;
|
|
162
|
+
parameters: z.ZodOptional<z.ZodArray<z.ZodObject<{
|
|
163
|
+
name: z.ZodString;
|
|
164
|
+
label: z.ZodString;
|
|
165
|
+
type: z.ZodEnum<{
|
|
166
|
+
string: "string";
|
|
167
|
+
number: "number";
|
|
168
|
+
boolean: "boolean";
|
|
169
|
+
}>;
|
|
170
|
+
required: z.ZodBoolean;
|
|
171
|
+
defaultValue: z.ZodOptional<z.ZodUnion<readonly [z.ZodString, z.ZodNumber, z.ZodBoolean]>>;
|
|
172
|
+
advanced: z.ZodOptional<z.ZodBoolean>;
|
|
173
|
+
}, z.core.$strip>>>;
|
|
174
|
+
type: z.ZodLiteral<"llm_judge">;
|
|
136
175
|
}, z.core.$strip>]>>>>;
|
|
137
176
|
assertionIds: z.ZodOptional<z.ZodOptional<z.ZodArray<z.ZodString>>>;
|
|
138
177
|
assertionLinks: z.ZodOptional<z.ZodOptional<z.ZodArray<z.ZodObject<{
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@wix/evalforge-types",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.55.0",
|
|
4
4
|
"description": "Unified types for EvalForge agent evaluation system",
|
|
5
5
|
"files": [
|
|
6
6
|
"build"
|
|
@@ -20,7 +20,6 @@
|
|
|
20
20
|
"devDependencies": {
|
|
21
21
|
"@eslint/js": "^9.39.2",
|
|
22
22
|
"@types/node": "^22.19.3",
|
|
23
|
-
"@wix/ambassador-ds-wix-ai-gateway-v1-prompt": "^1.0.312",
|
|
24
23
|
"esbuild": "^0.27.2",
|
|
25
24
|
"eslint": "^9.39.2",
|
|
26
25
|
"eslint-config-prettier": "^10.1.8",
|
|
@@ -47,5 +46,5 @@
|
|
|
47
46
|
"artifactId": "evalforge-types"
|
|
48
47
|
}
|
|
49
48
|
},
|
|
50
|
-
"falconPackageHash": "
|
|
49
|
+
"falconPackageHash": "5cc97b235a6ecce837d1b2eb6f989fa463d04fbc6868f5d8521135e5"
|
|
51
50
|
}
|
|
@@ -1,98 +0,0 @@
|
|
|
1
|
-
import { z } from 'zod';
|
|
2
|
-
/**
|
|
3
|
-
* Assertion: the agent must have invoked one or more skills during the run.
|
|
4
|
-
* Checked by inspecting the LLM trace for "Skill" tool uses with the given skills.
|
|
5
|
-
* When multiple skills are in one assertion, they are treated as a group (1 assertion).
|
|
6
|
-
* Each skill in the group must have been called for the assertion to pass.
|
|
7
|
-
* To check skills independently, add them as separate assertions.
|
|
8
|
-
*/
|
|
9
|
-
export declare const SkillWasCalledAssertionSchema: z.ZodObject<{
|
|
10
|
-
type: z.ZodLiteral<"skill_was_called">;
|
|
11
|
-
skillNames: z.ZodArray<z.ZodString>;
|
|
12
|
-
}, z.core.$strip>;
|
|
13
|
-
export type SkillWasCalledAssertion = z.infer<typeof SkillWasCalledAssertionSchema>;
|
|
14
|
-
/**
|
|
15
|
-
* Assertion: a specific tool must have been called with expected parameters.
|
|
16
|
-
* Checked by inspecting the LLM trace for tool calls with matching name and arguments.
|
|
17
|
-
* Each expected param value is matched as a substring against the actual argument value.
|
|
18
|
-
* All expected params must match on the same tool call for the assertion to pass.
|
|
19
|
-
*/
|
|
20
|
-
export declare const ToolCalledWithParamAssertionSchema: z.ZodObject<{
|
|
21
|
-
type: z.ZodLiteral<"tool_called_with_param">;
|
|
22
|
-
toolName: z.ZodString;
|
|
23
|
-
expectedParams: z.ZodString;
|
|
24
|
-
}, z.core.$strip>;
|
|
25
|
-
export type ToolCalledWithParamAssertion = z.infer<typeof ToolCalledWithParamAssertionSchema>;
|
|
26
|
-
/**
|
|
27
|
-
* Assertion: a build command must exit with the expected code (default 0).
|
|
28
|
-
* Runs the command in the scenario working directory.
|
|
29
|
-
*/
|
|
30
|
-
export declare const BuildPassedAssertionSchema: z.ZodObject<{
|
|
31
|
-
type: z.ZodLiteral<"build_passed">;
|
|
32
|
-
command: z.ZodOptional<z.ZodString>;
|
|
33
|
-
expectedExitCode: z.ZodOptional<z.ZodNumber>;
|
|
34
|
-
}, z.core.$strip>;
|
|
35
|
-
export type BuildPassedAssertion = z.infer<typeof BuildPassedAssertionSchema>;
|
|
36
|
-
/**
|
|
37
|
-
* Assertion: the scenario LLM execution cost must stay within a USD threshold.
|
|
38
|
-
* Checked by reading llmTrace.summary.totalCostUsd.
|
|
39
|
-
*/
|
|
40
|
-
export declare const CostAssertionSchema: z.ZodObject<{
|
|
41
|
-
type: z.ZodLiteral<"cost">;
|
|
42
|
-
maxCostUsd: z.ZodNumber;
|
|
43
|
-
}, z.core.$strip>;
|
|
44
|
-
export type CostAssertion = z.infer<typeof CostAssertionSchema>;
|
|
45
|
-
/**
|
|
46
|
-
* Assertion: an LLM judges the scenario output (score 0-10).
|
|
47
|
-
* Prompt can use {{output}}, {{cwd}}, {{changedFiles}}, {{trace}}.
|
|
48
|
-
* Passes if judge score >= minScore.
|
|
49
|
-
*/
|
|
50
|
-
export declare const LlmJudgeAssertionSchema: z.ZodObject<{
|
|
51
|
-
type: z.ZodLiteral<"llm_judge">;
|
|
52
|
-
prompt: z.ZodString;
|
|
53
|
-
minScore: z.ZodOptional<z.ZodNumber>;
|
|
54
|
-
model: z.ZodOptional<z.ZodString>;
|
|
55
|
-
maxTokens: z.ZodOptional<z.ZodNumber>;
|
|
56
|
-
temperature: z.ZodOptional<z.ZodNumber>;
|
|
57
|
-
}, z.core.$strip>;
|
|
58
|
-
export type LlmJudgeAssertion = z.infer<typeof LlmJudgeAssertionSchema>;
|
|
59
|
-
/**
|
|
60
|
-
* Assertion: scenario must complete within a maximum duration.
|
|
61
|
-
* Deterministic check against the scenario execution time.
|
|
62
|
-
*/
|
|
63
|
-
export declare const TimeAssertionSchema: z.ZodObject<{
|
|
64
|
-
type: z.ZodLiteral<"time_limit">;
|
|
65
|
-
maxDurationMs: z.ZodNumber;
|
|
66
|
-
}, z.core.$strip>;
|
|
67
|
-
export type TimeAssertion = z.infer<typeof TimeAssertionSchema>;
|
|
68
|
-
/**
|
|
69
|
-
* Union of all assertion types (per scenario).
|
|
70
|
-
* Each assertion has a type and type-specific data.
|
|
71
|
-
* Uses z.union (not z.discriminatedUnion) for Zod v4 compatibility when used as array element.
|
|
72
|
-
*/
|
|
73
|
-
export declare const AssertionSchema: z.ZodUnion<readonly [z.ZodObject<{
|
|
74
|
-
type: z.ZodLiteral<"skill_was_called">;
|
|
75
|
-
skillNames: z.ZodArray<z.ZodString>;
|
|
76
|
-
}, z.core.$strip>, z.ZodObject<{
|
|
77
|
-
type: z.ZodLiteral<"tool_called_with_param">;
|
|
78
|
-
toolName: z.ZodString;
|
|
79
|
-
expectedParams: z.ZodString;
|
|
80
|
-
}, z.core.$strip>, z.ZodObject<{
|
|
81
|
-
type: z.ZodLiteral<"build_passed">;
|
|
82
|
-
command: z.ZodOptional<z.ZodString>;
|
|
83
|
-
expectedExitCode: z.ZodOptional<z.ZodNumber>;
|
|
84
|
-
}, z.core.$strip>, z.ZodObject<{
|
|
85
|
-
type: z.ZodLiteral<"time_limit">;
|
|
86
|
-
maxDurationMs: z.ZodNumber;
|
|
87
|
-
}, z.core.$strip>, z.ZodObject<{
|
|
88
|
-
type: z.ZodLiteral<"cost">;
|
|
89
|
-
maxCostUsd: z.ZodNumber;
|
|
90
|
-
}, z.core.$strip>, z.ZodObject<{
|
|
91
|
-
type: z.ZodLiteral<"llm_judge">;
|
|
92
|
-
prompt: z.ZodString;
|
|
93
|
-
minScore: z.ZodOptional<z.ZodNumber>;
|
|
94
|
-
model: z.ZodOptional<z.ZodString>;
|
|
95
|
-
maxTokens: z.ZodOptional<z.ZodNumber>;
|
|
96
|
-
temperature: z.ZodOptional<z.ZodNumber>;
|
|
97
|
-
}, z.core.$strip>]>;
|
|
98
|
-
export type Assertion = z.infer<typeof AssertionSchema>;
|