@wix/evalforge-types 0.36.0 → 0.38.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/build/index.js +469 -408
- package/build/index.js.map +4 -4
- package/build/index.mjs +463 -408
- package/build/index.mjs.map +4 -4
- package/build/types/agent/adapter.d.ts +3 -0
- package/build/types/assertion/assertion.d.ts +26 -0
- package/build/types/assertion/system-assertions.d.ts +1 -0
- package/build/types/common/index.d.ts +1 -0
- package/build/types/common/rule.d.ts +47 -0
- package/build/types/evaluation/eval-run.d.ts +2 -0
- package/build/types/scenario/assertions.d.ts +12 -0
- package/build/types/scenario/test-scenario.d.ts +9 -0
- package/package.json +2 -2
|
@@ -5,6 +5,7 @@ import type { ModelConfig } from '../common/models.js';
|
|
|
5
5
|
import type { LLMTrace } from '../evaluation/metrics.js';
|
|
6
6
|
import type { MCPEntity } from '../common/mcp.js';
|
|
7
7
|
import type { SubAgent } from '../target/sub-agent.js';
|
|
8
|
+
import type { Rule } from '../common/rule.js';
|
|
8
9
|
/**
|
|
9
10
|
* Trace context for live streaming of agent execution.
|
|
10
11
|
* This is agent-agnostic and can be used by any adapter implementation.
|
|
@@ -57,6 +58,8 @@ export interface AgentExecutionContext {
|
|
|
57
58
|
mcps?: MCPEntity[];
|
|
58
59
|
/** Sub-agents to load (when present, written to .claude/agents/*.md) */
|
|
59
60
|
subAgents?: SubAgent[];
|
|
61
|
+
/** Rules to write (CLAUDE.md, AGENTS.md, .cursor/rules/*.md based on ruleType) */
|
|
62
|
+
rules?: Rule[];
|
|
60
63
|
}
|
|
61
64
|
/**
|
|
62
65
|
* Token usage statistics from agent execution.
|
|
@@ -4,11 +4,13 @@ import { z } from 'zod';
|
|
|
4
4
|
* - skill_was_called: Checks if a specific skill was invoked (deterministic, system-level)
|
|
5
5
|
* - build_passed: Runs a command and checks exit code (deterministic, system-level)
|
|
6
6
|
* - time_limit: Checks that scenario completed within a duration threshold (deterministic, system-level)
|
|
7
|
+
* - cost: Checks that scenario LLM cost stays within a USD threshold (deterministic, system-level)
|
|
7
8
|
* - llm_judge: LLM evaluates output with a prompt (LLM-based, user-created)
|
|
8
9
|
*/
|
|
9
10
|
export declare const AssertionTypeSchema: z.ZodEnum<{
|
|
10
11
|
skill_was_called: "skill_was_called";
|
|
11
12
|
build_passed: "build_passed";
|
|
13
|
+
cost: "cost";
|
|
12
14
|
llm_judge: "llm_judge";
|
|
13
15
|
time_limit: "time_limit";
|
|
14
16
|
}>;
|
|
@@ -57,6 +59,15 @@ export declare const SkillWasCalledConfigSchema: z.ZodObject<{
|
|
|
57
59
|
skillNames: z.ZodArray<z.ZodString>;
|
|
58
60
|
}, z.core.$strip>;
|
|
59
61
|
export type SkillWasCalledConfig = z.infer<typeof SkillWasCalledConfigSchema>;
|
|
62
|
+
/**
|
|
63
|
+
* Configuration for cost assertion type.
|
|
64
|
+
* Uses strictObject to reject objects with unknown keys (prevents matching other configs).
|
|
65
|
+
*/
|
|
66
|
+
export declare const CostConfigSchema: z.ZodObject<{
|
|
67
|
+
/** Maximum allowed cost in USD */
|
|
68
|
+
maxCostUsd: z.ZodNumber;
|
|
69
|
+
}, z.core.$strict>;
|
|
70
|
+
export type CostConfig = z.infer<typeof CostConfigSchema>;
|
|
60
71
|
/**
|
|
61
72
|
* Configuration for build_passed assertion type.
|
|
62
73
|
* Uses strictObject to reject objects with unknown keys (prevents matching LlmJudge configs).
|
|
@@ -132,6 +143,9 @@ export declare const AssertionConfigSchema: z.ZodUnion<readonly [z.ZodObject<{
|
|
|
132
143
|
}, z.core.$strip>, z.ZodObject<{
|
|
133
144
|
/** Maximum allowed duration in milliseconds */
|
|
134
145
|
maxDurationMs: z.ZodNumber;
|
|
146
|
+
}, z.core.$strict>, z.ZodObject<{
|
|
147
|
+
/** Maximum allowed cost in USD */
|
|
148
|
+
maxCostUsd: z.ZodNumber;
|
|
135
149
|
}, z.core.$strict>, z.ZodObject<{
|
|
136
150
|
/** Command to run (default: "yarn build") */
|
|
137
151
|
command: z.ZodOptional<z.ZodString>;
|
|
@@ -154,6 +168,7 @@ export declare const CustomAssertionSchema: z.ZodObject<{
|
|
|
154
168
|
type: z.ZodEnum<{
|
|
155
169
|
skill_was_called: "skill_was_called";
|
|
156
170
|
build_passed: "build_passed";
|
|
171
|
+
cost: "cost";
|
|
157
172
|
llm_judge: "llm_judge";
|
|
158
173
|
time_limit: "time_limit";
|
|
159
174
|
}>;
|
|
@@ -181,6 +196,9 @@ export declare const CustomAssertionSchema: z.ZodObject<{
|
|
|
181
196
|
}, z.core.$strip>, z.ZodObject<{
|
|
182
197
|
/** Maximum allowed duration in milliseconds */
|
|
183
198
|
maxDurationMs: z.ZodNumber;
|
|
199
|
+
}, z.core.$strict>, z.ZodObject<{
|
|
200
|
+
/** Maximum allowed cost in USD */
|
|
201
|
+
maxCostUsd: z.ZodNumber;
|
|
184
202
|
}, z.core.$strict>, z.ZodObject<{
|
|
185
203
|
/** Command to run (default: "yarn build") */
|
|
186
204
|
command: z.ZodOptional<z.ZodString>;
|
|
@@ -196,6 +214,7 @@ export declare const CreateCustomAssertionInputSchema: z.ZodObject<{
|
|
|
196
214
|
type: z.ZodEnum<{
|
|
197
215
|
skill_was_called: "skill_was_called";
|
|
198
216
|
build_passed: "build_passed";
|
|
217
|
+
cost: "cost";
|
|
199
218
|
llm_judge: "llm_judge";
|
|
200
219
|
time_limit: "time_limit";
|
|
201
220
|
}>;
|
|
@@ -226,6 +245,9 @@ export declare const CreateCustomAssertionInputSchema: z.ZodObject<{
|
|
|
226
245
|
}, z.core.$strip>, z.ZodObject<{
|
|
227
246
|
/** Maximum allowed duration in milliseconds */
|
|
228
247
|
maxDurationMs: z.ZodNumber;
|
|
248
|
+
}, z.core.$strict>, z.ZodObject<{
|
|
249
|
+
/** Maximum allowed cost in USD */
|
|
250
|
+
maxCostUsd: z.ZodNumber;
|
|
229
251
|
}, z.core.$strict>, z.ZodObject<{
|
|
230
252
|
/** Command to run (default: "yarn build") */
|
|
231
253
|
command: z.ZodOptional<z.ZodString>;
|
|
@@ -241,6 +263,7 @@ export declare const UpdateCustomAssertionInputSchema: z.ZodObject<{
|
|
|
241
263
|
type: z.ZodOptional<z.ZodEnum<{
|
|
242
264
|
skill_was_called: "skill_was_called";
|
|
243
265
|
build_passed: "build_passed";
|
|
266
|
+
cost: "cost";
|
|
244
267
|
llm_judge: "llm_judge";
|
|
245
268
|
time_limit: "time_limit";
|
|
246
269
|
}>>;
|
|
@@ -271,6 +294,9 @@ export declare const UpdateCustomAssertionInputSchema: z.ZodObject<{
|
|
|
271
294
|
}, z.core.$strip>, z.ZodObject<{
|
|
272
295
|
/** Maximum allowed duration in milliseconds */
|
|
273
296
|
maxDurationMs: z.ZodNumber;
|
|
297
|
+
}, z.core.$strict>, z.ZodObject<{
|
|
298
|
+
/** Maximum allowed cost in USD */
|
|
299
|
+
maxCostUsd: z.ZodNumber;
|
|
274
300
|
}, z.core.$strict>, z.ZodObject<{
|
|
275
301
|
/** Command to run (default: "yarn build") */
|
|
276
302
|
command: z.ZodOptional<z.ZodString>;
|
|
@@ -22,6 +22,7 @@ export declare const SYSTEM_ASSERTION_IDS: {
|
|
|
22
22
|
readonly SKILL_WAS_CALLED: "system:skill_was_called";
|
|
23
23
|
readonly BUILD_PASSED: "system:build_passed";
|
|
24
24
|
readonly TIME_LIMIT: "system:time_limit";
|
|
25
|
+
readonly COST: "system:cost";
|
|
25
26
|
readonly LLM_JUDGE: "system:llm_judge";
|
|
26
27
|
};
|
|
27
28
|
export type SystemAssertionId = (typeof SYSTEM_ASSERTION_IDS)[keyof typeof SYSTEM_ASSERTION_IDS];
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
import { z } from 'zod';
|
|
2
|
+
export declare const RuleTypeSchema: z.ZodEnum<{
|
|
3
|
+
"claude-md": "claude-md";
|
|
4
|
+
"agents-md": "agents-md";
|
|
5
|
+
"cursor-rule": "cursor-rule";
|
|
6
|
+
}>;
|
|
7
|
+
export type RuleType = z.infer<typeof RuleTypeSchema>;
|
|
8
|
+
export declare const RuleSchema: z.ZodObject<{
|
|
9
|
+
id: z.ZodString;
|
|
10
|
+
name: z.ZodString;
|
|
11
|
+
description: z.ZodString;
|
|
12
|
+
createdAt: z.ZodString;
|
|
13
|
+
updatedAt: z.ZodString;
|
|
14
|
+
deleted: z.ZodOptional<z.ZodBoolean>;
|
|
15
|
+
projectId: z.ZodString;
|
|
16
|
+
ruleType: z.ZodEnum<{
|
|
17
|
+
"claude-md": "claude-md";
|
|
18
|
+
"agents-md": "agents-md";
|
|
19
|
+
"cursor-rule": "cursor-rule";
|
|
20
|
+
}>;
|
|
21
|
+
content: z.ZodString;
|
|
22
|
+
}, z.core.$strip>;
|
|
23
|
+
export type Rule = z.infer<typeof RuleSchema>;
|
|
24
|
+
export declare const CreateRuleInputSchema: z.ZodObject<{
|
|
25
|
+
name: z.ZodString;
|
|
26
|
+
description: z.ZodString;
|
|
27
|
+
projectId: z.ZodString;
|
|
28
|
+
ruleType: z.ZodEnum<{
|
|
29
|
+
"claude-md": "claude-md";
|
|
30
|
+
"agents-md": "agents-md";
|
|
31
|
+
"cursor-rule": "cursor-rule";
|
|
32
|
+
}>;
|
|
33
|
+
content: z.ZodString;
|
|
34
|
+
}, z.core.$strip>;
|
|
35
|
+
export type CreateRuleInput = z.infer<typeof CreateRuleInputSchema>;
|
|
36
|
+
export declare const UpdateRuleInputSchema: z.ZodObject<{
|
|
37
|
+
name: z.ZodOptional<z.ZodString>;
|
|
38
|
+
description: z.ZodOptional<z.ZodString>;
|
|
39
|
+
projectId: z.ZodOptional<z.ZodString>;
|
|
40
|
+
ruleType: z.ZodOptional<z.ZodEnum<{
|
|
41
|
+
"claude-md": "claude-md";
|
|
42
|
+
"agents-md": "agents-md";
|
|
43
|
+
"cursor-rule": "cursor-rule";
|
|
44
|
+
}>>;
|
|
45
|
+
content: z.ZodOptional<z.ZodString>;
|
|
46
|
+
}, z.core.$strip>;
|
|
47
|
+
export type UpdateRuleInput = z.infer<typeof UpdateRuleInputSchema>;
|
|
@@ -495,6 +495,7 @@ export declare const EvalRunSchema: z.ZodObject<{
|
|
|
495
495
|
jobStatusCheckedAt: z.ZodOptional<z.ZodString>;
|
|
496
496
|
mcpIds: z.ZodOptional<z.ZodArray<z.ZodString>>;
|
|
497
497
|
subAgentIds: z.ZodOptional<z.ZodArray<z.ZodString>>;
|
|
498
|
+
ruleIds: z.ZodOptional<z.ZodArray<z.ZodString>>;
|
|
498
499
|
}, z.core.$strip>;
|
|
499
500
|
export type EvalRun = z.infer<typeof EvalRunSchema>;
|
|
500
501
|
/**
|
|
@@ -612,6 +613,7 @@ export declare const CreateEvalRunInputSchema: z.ZodObject<{
|
|
|
612
613
|
jobStatusCheckedAt: z.ZodOptional<z.ZodString>;
|
|
613
614
|
mcpIds: z.ZodOptional<z.ZodArray<z.ZodString>>;
|
|
614
615
|
subAgentIds: z.ZodOptional<z.ZodArray<z.ZodString>>;
|
|
616
|
+
ruleIds: z.ZodOptional<z.ZodArray<z.ZodString>>;
|
|
615
617
|
}, z.core.$strip>;
|
|
616
618
|
export type CreateEvalRunInput = z.infer<typeof CreateEvalRunInputSchema>;
|
|
617
619
|
/**
|
|
@@ -21,6 +21,15 @@ export declare const BuildPassedAssertionSchema: z.ZodObject<{
|
|
|
21
21
|
expectedExitCode: z.ZodOptional<z.ZodNumber>;
|
|
22
22
|
}, z.core.$strip>;
|
|
23
23
|
export type BuildPassedAssertion = z.infer<typeof BuildPassedAssertionSchema>;
|
|
24
|
+
/**
|
|
25
|
+
* Assertion: the scenario LLM execution cost must stay within a USD threshold.
|
|
26
|
+
* Checked by reading llmTrace.summary.totalCostUsd.
|
|
27
|
+
*/
|
|
28
|
+
export declare const CostAssertionSchema: z.ZodObject<{
|
|
29
|
+
type: z.ZodLiteral<"cost">;
|
|
30
|
+
maxCostUsd: z.ZodNumber;
|
|
31
|
+
}, z.core.$strip>;
|
|
32
|
+
export type CostAssertion = z.infer<typeof CostAssertionSchema>;
|
|
24
33
|
/**
|
|
25
34
|
* Assertion: an LLM judges the scenario output (score 0-100).
|
|
26
35
|
* Prompt can use {{output}}, {{cwd}}, {{changedFiles}}, {{trace}}.
|
|
@@ -60,6 +69,9 @@ export declare const AssertionSchema: z.ZodUnion<readonly [z.ZodObject<{
|
|
|
60
69
|
}, z.core.$strip>, z.ZodObject<{
|
|
61
70
|
type: z.ZodLiteral<"time_limit">;
|
|
62
71
|
maxDurationMs: z.ZodNumber;
|
|
72
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
73
|
+
type: z.ZodLiteral<"cost">;
|
|
74
|
+
maxCostUsd: z.ZodNumber;
|
|
63
75
|
}, z.core.$strip>, z.ZodObject<{
|
|
64
76
|
type: z.ZodLiteral<"llm_judge">;
|
|
65
77
|
prompt: z.ZodString;
|
|
@@ -35,6 +35,9 @@ export declare const TestScenarioSchema: z.ZodObject<{
|
|
|
35
35
|
}, z.core.$strip>, z.ZodObject<{
|
|
36
36
|
type: z.ZodLiteral<"time_limit">;
|
|
37
37
|
maxDurationMs: z.ZodNumber;
|
|
38
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
39
|
+
type: z.ZodLiteral<"cost">;
|
|
40
|
+
maxCostUsd: z.ZodNumber;
|
|
38
41
|
}, z.core.$strip>, z.ZodObject<{
|
|
39
42
|
type: z.ZodLiteral<"llm_judge">;
|
|
40
43
|
prompt: z.ZodString;
|
|
@@ -70,6 +73,9 @@ export declare const CreateTestScenarioInputSchema: z.ZodObject<{
|
|
|
70
73
|
}, z.core.$strip>, z.ZodObject<{
|
|
71
74
|
type: z.ZodLiteral<"time_limit">;
|
|
72
75
|
maxDurationMs: z.ZodNumber;
|
|
76
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
77
|
+
type: z.ZodLiteral<"cost">;
|
|
78
|
+
maxCostUsd: z.ZodNumber;
|
|
73
79
|
}, z.core.$strip>, z.ZodObject<{
|
|
74
80
|
type: z.ZodLiteral<"llm_judge">;
|
|
75
81
|
prompt: z.ZodString;
|
|
@@ -105,6 +111,9 @@ export declare const UpdateTestScenarioInputSchema: z.ZodObject<{
|
|
|
105
111
|
}, z.core.$strip>, z.ZodObject<{
|
|
106
112
|
type: z.ZodLiteral<"time_limit">;
|
|
107
113
|
maxDurationMs: z.ZodNumber;
|
|
114
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
115
|
+
type: z.ZodLiteral<"cost">;
|
|
116
|
+
maxCostUsd: z.ZodNumber;
|
|
108
117
|
}, z.core.$strip>, z.ZodObject<{
|
|
109
118
|
type: z.ZodLiteral<"llm_judge">;
|
|
110
119
|
prompt: z.ZodString;
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@wix/evalforge-types",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.38.0",
|
|
4
4
|
"description": "Unified types for EvalForge agent evaluation system",
|
|
5
5
|
"files": [
|
|
6
6
|
"build"
|
|
@@ -47,5 +47,5 @@
|
|
|
47
47
|
"artifactId": "evalforge-types"
|
|
48
48
|
}
|
|
49
49
|
},
|
|
50
|
-
"falconPackageHash": "
|
|
50
|
+
"falconPackageHash": "dc3f87434a7b2a1b350369f78ca98d0fc64183d39b35b8a111054878"
|
|
51
51
|
}
|