@wix/evalforge-types 0.36.0 → 0.37.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/index.js +35 -0
- package/build/index.js.map +2 -2
- package/build/index.mjs +33 -0
- package/build/index.mjs.map +2 -2
- package/build/types/assertion/assertion.d.ts +26 -0
- package/build/types/assertion/system-assertions.d.ts +1 -0
- package/build/types/scenario/assertions.d.ts +12 -0
- package/build/types/scenario/test-scenario.d.ts +9 -0
- package/package.json +2 -2
|
@@ -4,11 +4,13 @@ import { z } from 'zod';
|
|
|
4
4
|
* - skill_was_called: Checks if a specific skill was invoked (deterministic, system-level)
|
|
5
5
|
* - build_passed: Runs a command and checks exit code (deterministic, system-level)
|
|
6
6
|
* - time_limit: Checks that scenario completed within a duration threshold (deterministic, system-level)
|
|
7
|
+
* - cost: Checks that scenario LLM cost stays within a USD threshold (deterministic, system-level)
|
|
7
8
|
* - llm_judge: LLM evaluates output with a prompt (LLM-based, user-created)
|
|
8
9
|
*/
|
|
9
10
|
export declare const AssertionTypeSchema: z.ZodEnum<{
|
|
10
11
|
skill_was_called: "skill_was_called";
|
|
11
12
|
build_passed: "build_passed";
|
|
13
|
+
cost: "cost";
|
|
12
14
|
llm_judge: "llm_judge";
|
|
13
15
|
time_limit: "time_limit";
|
|
14
16
|
}>;
|
|
@@ -57,6 +59,15 @@ export declare const SkillWasCalledConfigSchema: z.ZodObject<{
|
|
|
57
59
|
skillNames: z.ZodArray<z.ZodString>;
|
|
58
60
|
}, z.core.$strip>;
|
|
59
61
|
export type SkillWasCalledConfig = z.infer<typeof SkillWasCalledConfigSchema>;
|
|
62
|
+
/**
|
|
63
|
+
* Configuration for cost assertion type.
|
|
64
|
+
* Uses strictObject to reject objects with unknown keys (prevents matching other configs).
|
|
65
|
+
*/
|
|
66
|
+
export declare const CostConfigSchema: z.ZodObject<{
|
|
67
|
+
/** Maximum allowed cost in USD */
|
|
68
|
+
maxCostUsd: z.ZodNumber;
|
|
69
|
+
}, z.core.$strict>;
|
|
70
|
+
export type CostConfig = z.infer<typeof CostConfigSchema>;
|
|
60
71
|
/**
|
|
61
72
|
* Configuration for build_passed assertion type.
|
|
62
73
|
* Uses strictObject to reject objects with unknown keys (prevents matching LlmJudge configs).
|
|
@@ -132,6 +143,9 @@ export declare const AssertionConfigSchema: z.ZodUnion<readonly [z.ZodObject<{
|
|
|
132
143
|
}, z.core.$strip>, z.ZodObject<{
|
|
133
144
|
/** Maximum allowed duration in milliseconds */
|
|
134
145
|
maxDurationMs: z.ZodNumber;
|
|
146
|
+
}, z.core.$strict>, z.ZodObject<{
|
|
147
|
+
/** Maximum allowed cost in USD */
|
|
148
|
+
maxCostUsd: z.ZodNumber;
|
|
135
149
|
}, z.core.$strict>, z.ZodObject<{
|
|
136
150
|
/** Command to run (default: "yarn build") */
|
|
137
151
|
command: z.ZodOptional<z.ZodString>;
|
|
@@ -154,6 +168,7 @@ export declare const CustomAssertionSchema: z.ZodObject<{
|
|
|
154
168
|
type: z.ZodEnum<{
|
|
155
169
|
skill_was_called: "skill_was_called";
|
|
156
170
|
build_passed: "build_passed";
|
|
171
|
+
cost: "cost";
|
|
157
172
|
llm_judge: "llm_judge";
|
|
158
173
|
time_limit: "time_limit";
|
|
159
174
|
}>;
|
|
@@ -181,6 +196,9 @@ export declare const CustomAssertionSchema: z.ZodObject<{
|
|
|
181
196
|
}, z.core.$strip>, z.ZodObject<{
|
|
182
197
|
/** Maximum allowed duration in milliseconds */
|
|
183
198
|
maxDurationMs: z.ZodNumber;
|
|
199
|
+
}, z.core.$strict>, z.ZodObject<{
|
|
200
|
+
/** Maximum allowed cost in USD */
|
|
201
|
+
maxCostUsd: z.ZodNumber;
|
|
184
202
|
}, z.core.$strict>, z.ZodObject<{
|
|
185
203
|
/** Command to run (default: "yarn build") */
|
|
186
204
|
command: z.ZodOptional<z.ZodString>;
|
|
@@ -196,6 +214,7 @@ export declare const CreateCustomAssertionInputSchema: z.ZodObject<{
|
|
|
196
214
|
type: z.ZodEnum<{
|
|
197
215
|
skill_was_called: "skill_was_called";
|
|
198
216
|
build_passed: "build_passed";
|
|
217
|
+
cost: "cost";
|
|
199
218
|
llm_judge: "llm_judge";
|
|
200
219
|
time_limit: "time_limit";
|
|
201
220
|
}>;
|
|
@@ -226,6 +245,9 @@ export declare const CreateCustomAssertionInputSchema: z.ZodObject<{
|
|
|
226
245
|
}, z.core.$strip>, z.ZodObject<{
|
|
227
246
|
/** Maximum allowed duration in milliseconds */
|
|
228
247
|
maxDurationMs: z.ZodNumber;
|
|
248
|
+
}, z.core.$strict>, z.ZodObject<{
|
|
249
|
+
/** Maximum allowed cost in USD */
|
|
250
|
+
maxCostUsd: z.ZodNumber;
|
|
229
251
|
}, z.core.$strict>, z.ZodObject<{
|
|
230
252
|
/** Command to run (default: "yarn build") */
|
|
231
253
|
command: z.ZodOptional<z.ZodString>;
|
|
@@ -241,6 +263,7 @@ export declare const UpdateCustomAssertionInputSchema: z.ZodObject<{
|
|
|
241
263
|
type: z.ZodOptional<z.ZodEnum<{
|
|
242
264
|
skill_was_called: "skill_was_called";
|
|
243
265
|
build_passed: "build_passed";
|
|
266
|
+
cost: "cost";
|
|
244
267
|
llm_judge: "llm_judge";
|
|
245
268
|
time_limit: "time_limit";
|
|
246
269
|
}>>;
|
|
@@ -271,6 +294,9 @@ export declare const UpdateCustomAssertionInputSchema: z.ZodObject<{
|
|
|
271
294
|
}, z.core.$strip>, z.ZodObject<{
|
|
272
295
|
/** Maximum allowed duration in milliseconds */
|
|
273
296
|
maxDurationMs: z.ZodNumber;
|
|
297
|
+
}, z.core.$strict>, z.ZodObject<{
|
|
298
|
+
/** Maximum allowed cost in USD */
|
|
299
|
+
maxCostUsd: z.ZodNumber;
|
|
274
300
|
}, z.core.$strict>, z.ZodObject<{
|
|
275
301
|
/** Command to run (default: "yarn build") */
|
|
276
302
|
command: z.ZodOptional<z.ZodString>;
|
|
@@ -22,6 +22,7 @@ export declare const SYSTEM_ASSERTION_IDS: {
|
|
|
22
22
|
readonly SKILL_WAS_CALLED: "system:skill_was_called";
|
|
23
23
|
readonly BUILD_PASSED: "system:build_passed";
|
|
24
24
|
readonly TIME_LIMIT: "system:time_limit";
|
|
25
|
+
readonly COST: "system:cost";
|
|
25
26
|
readonly LLM_JUDGE: "system:llm_judge";
|
|
26
27
|
};
|
|
27
28
|
export type SystemAssertionId = (typeof SYSTEM_ASSERTION_IDS)[keyof typeof SYSTEM_ASSERTION_IDS];
|
|
@@ -21,6 +21,15 @@ export declare const BuildPassedAssertionSchema: z.ZodObject<{
|
|
|
21
21
|
expectedExitCode: z.ZodOptional<z.ZodNumber>;
|
|
22
22
|
}, z.core.$strip>;
|
|
23
23
|
export type BuildPassedAssertion = z.infer<typeof BuildPassedAssertionSchema>;
|
|
24
|
+
/**
|
|
25
|
+
* Assertion: the scenario LLM execution cost must stay within a USD threshold.
|
|
26
|
+
* Checked by reading llmTrace.summary.totalCostUsd.
|
|
27
|
+
*/
|
|
28
|
+
export declare const CostAssertionSchema: z.ZodObject<{
|
|
29
|
+
type: z.ZodLiteral<"cost">;
|
|
30
|
+
maxCostUsd: z.ZodNumber;
|
|
31
|
+
}, z.core.$strip>;
|
|
32
|
+
export type CostAssertion = z.infer<typeof CostAssertionSchema>;
|
|
24
33
|
/**
|
|
25
34
|
* Assertion: an LLM judges the scenario output (score 0-100).
|
|
26
35
|
* Prompt can use {{output}}, {{cwd}}, {{changedFiles}}, {{trace}}.
|
|
@@ -60,6 +69,9 @@ export declare const AssertionSchema: z.ZodUnion<readonly [z.ZodObject<{
|
|
|
60
69
|
}, z.core.$strip>, z.ZodObject<{
|
|
61
70
|
type: z.ZodLiteral<"time_limit">;
|
|
62
71
|
maxDurationMs: z.ZodNumber;
|
|
72
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
73
|
+
type: z.ZodLiteral<"cost">;
|
|
74
|
+
maxCostUsd: z.ZodNumber;
|
|
63
75
|
}, z.core.$strip>, z.ZodObject<{
|
|
64
76
|
type: z.ZodLiteral<"llm_judge">;
|
|
65
77
|
prompt: z.ZodString;
|
|
@@ -35,6 +35,9 @@ export declare const TestScenarioSchema: z.ZodObject<{
|
|
|
35
35
|
}, z.core.$strip>, z.ZodObject<{
|
|
36
36
|
type: z.ZodLiteral<"time_limit">;
|
|
37
37
|
maxDurationMs: z.ZodNumber;
|
|
38
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
39
|
+
type: z.ZodLiteral<"cost">;
|
|
40
|
+
maxCostUsd: z.ZodNumber;
|
|
38
41
|
}, z.core.$strip>, z.ZodObject<{
|
|
39
42
|
type: z.ZodLiteral<"llm_judge">;
|
|
40
43
|
prompt: z.ZodString;
|
|
@@ -70,6 +73,9 @@ export declare const CreateTestScenarioInputSchema: z.ZodObject<{
|
|
|
70
73
|
}, z.core.$strip>, z.ZodObject<{
|
|
71
74
|
type: z.ZodLiteral<"time_limit">;
|
|
72
75
|
maxDurationMs: z.ZodNumber;
|
|
76
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
77
|
+
type: z.ZodLiteral<"cost">;
|
|
78
|
+
maxCostUsd: z.ZodNumber;
|
|
73
79
|
}, z.core.$strip>, z.ZodObject<{
|
|
74
80
|
type: z.ZodLiteral<"llm_judge">;
|
|
75
81
|
prompt: z.ZodString;
|
|
@@ -105,6 +111,9 @@ export declare const UpdateTestScenarioInputSchema: z.ZodObject<{
|
|
|
105
111
|
}, z.core.$strip>, z.ZodObject<{
|
|
106
112
|
type: z.ZodLiteral<"time_limit">;
|
|
107
113
|
maxDurationMs: z.ZodNumber;
|
|
114
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
115
|
+
type: z.ZodLiteral<"cost">;
|
|
116
|
+
maxCostUsd: z.ZodNumber;
|
|
108
117
|
}, z.core.$strip>, z.ZodObject<{
|
|
109
118
|
type: z.ZodLiteral<"llm_judge">;
|
|
110
119
|
prompt: z.ZodString;
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@wix/evalforge-types",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.37.0",
|
|
4
4
|
"description": "Unified types for EvalForge agent evaluation system",
|
|
5
5
|
"files": [
|
|
6
6
|
"build"
|
|
@@ -47,5 +47,5 @@
|
|
|
47
47
|
"artifactId": "evalforge-types"
|
|
48
48
|
}
|
|
49
49
|
},
|
|
50
|
-
"falconPackageHash": "
|
|
50
|
+
"falconPackageHash": "57c752f025e2a8ccb67557327757571462f50d8acadc4a28877770af"
|
|
51
51
|
}
|