@wix/evalforge-types 0.37.0 → 0.39.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/build/index.js +496 -413
- package/build/index.js.map +4 -4
- package/build/index.mjs +489 -413
- package/build/index.mjs.map +4 -4
- package/build/types/agent/adapter.d.ts +3 -0
- package/build/types/assertion/assertion.d.ts +34 -0
- package/build/types/assertion/system-assertions.d.ts +1 -0
- package/build/types/common/index.d.ts +2 -0
- package/build/types/common/rule.d.ts +47 -0
- package/build/types/common/tool-names.d.ts +1 -0
- package/build/types/evaluation/eval-run.d.ts +2 -0
- package/build/types/scenario/assertions.d.ts +16 -0
- package/build/types/scenario/test-scenario.d.ts +12 -0
- package/package.json +2 -2
|
@@ -5,6 +5,7 @@ import type { ModelConfig } from '../common/models.js';
|
|
|
5
5
|
import type { LLMTrace } from '../evaluation/metrics.js';
|
|
6
6
|
import type { MCPEntity } from '../common/mcp.js';
|
|
7
7
|
import type { SubAgent } from '../target/sub-agent.js';
|
|
8
|
+
import type { Rule } from '../common/rule.js';
|
|
8
9
|
/**
|
|
9
10
|
* Trace context for live streaming of agent execution.
|
|
10
11
|
* This is agent-agnostic and can be used by any adapter implementation.
|
|
@@ -57,6 +58,8 @@ export interface AgentExecutionContext {
|
|
|
57
58
|
mcps?: MCPEntity[];
|
|
58
59
|
/** Sub-agents to load (when present, written to .claude/agents/*.md) */
|
|
59
60
|
subAgents?: SubAgent[];
|
|
61
|
+
/** Rules to write (CLAUDE.md, AGENTS.md, .cursor/rules/*.md based on ruleType) */
|
|
62
|
+
rules?: Rule[];
|
|
60
63
|
}
|
|
61
64
|
/**
|
|
62
65
|
* Token usage statistics from agent execution.
|
|
@@ -2,6 +2,7 @@ import { z } from 'zod';
|
|
|
2
2
|
/**
|
|
3
3
|
* Assertion types:
|
|
4
4
|
* - skill_was_called: Checks if a specific skill was invoked (deterministic, system-level)
|
|
5
|
+
* - tool_called_with_param: Checks if a tool was called with expected parameters (deterministic, system-level)
|
|
5
6
|
* - build_passed: Runs a command and checks exit code (deterministic, system-level)
|
|
6
7
|
* - time_limit: Checks that scenario completed within a duration threshold (deterministic, system-level)
|
|
7
8
|
* - cost: Checks that scenario LLM cost stays within a USD threshold (deterministic, system-level)
|
|
@@ -9,6 +10,7 @@ import { z } from 'zod';
|
|
|
9
10
|
*/
|
|
10
11
|
export declare const AssertionTypeSchema: z.ZodEnum<{
|
|
11
12
|
skill_was_called: "skill_was_called";
|
|
13
|
+
tool_called_with_param: "tool_called_with_param";
|
|
12
14
|
build_passed: "build_passed";
|
|
13
15
|
cost: "cost";
|
|
14
16
|
llm_judge: "llm_judge";
|
|
@@ -68,6 +70,15 @@ export declare const CostConfigSchema: z.ZodObject<{
|
|
|
68
70
|
maxCostUsd: z.ZodNumber;
|
|
69
71
|
}, z.core.$strict>;
|
|
70
72
|
export type CostConfig = z.infer<typeof CostConfigSchema>;
|
|
73
|
+
/** Configuration for tool_called_with_param assertion type.
|
|
74
|
+
* Uses strictObject to reject objects with unknown keys.
|
|
75
|
+
*/
|
|
76
|
+
export declare const ToolCalledWithParamConfigSchema: z.ZodObject<{
|
|
77
|
+
/** Name of the tool that must have been called */
|
|
78
|
+
toolName: z.ZodString;
|
|
79
|
+
/** JSON string of key-value pairs for expected parameters (substring match) */
|
|
80
|
+
expectedParams: z.ZodString;
|
|
81
|
+
}, z.core.$strict>;
|
|
71
82
|
/**
|
|
72
83
|
* Configuration for build_passed assertion type.
|
|
73
84
|
* Uses strictObject to reject objects with unknown keys (prevents matching LlmJudge configs).
|
|
@@ -141,6 +152,11 @@ export declare const AssertionConfigSchema: z.ZodUnion<readonly [z.ZodObject<{
|
|
|
141
152
|
}, z.core.$strip>, z.ZodObject<{
|
|
142
153
|
skillNames: z.ZodArray<z.ZodString>;
|
|
143
154
|
}, z.core.$strip>, z.ZodObject<{
|
|
155
|
+
/** Name of the tool that must have been called */
|
|
156
|
+
toolName: z.ZodString;
|
|
157
|
+
/** JSON string of key-value pairs for expected parameters (substring match) */
|
|
158
|
+
expectedParams: z.ZodString;
|
|
159
|
+
}, z.core.$strict>, z.ZodObject<{
|
|
144
160
|
/** Maximum allowed duration in milliseconds */
|
|
145
161
|
maxDurationMs: z.ZodNumber;
|
|
146
162
|
}, z.core.$strict>, z.ZodObject<{
|
|
@@ -167,6 +183,7 @@ export declare const CustomAssertionSchema: z.ZodObject<{
|
|
|
167
183
|
projectId: z.ZodString;
|
|
168
184
|
type: z.ZodEnum<{
|
|
169
185
|
skill_was_called: "skill_was_called";
|
|
186
|
+
tool_called_with_param: "tool_called_with_param";
|
|
170
187
|
build_passed: "build_passed";
|
|
171
188
|
cost: "cost";
|
|
172
189
|
llm_judge: "llm_judge";
|
|
@@ -194,6 +211,11 @@ export declare const CustomAssertionSchema: z.ZodObject<{
|
|
|
194
211
|
}, z.core.$strip>, z.ZodObject<{
|
|
195
212
|
skillNames: z.ZodArray<z.ZodString>;
|
|
196
213
|
}, z.core.$strip>, z.ZodObject<{
|
|
214
|
+
/** Name of the tool that must have been called */
|
|
215
|
+
toolName: z.ZodString;
|
|
216
|
+
/** JSON string of key-value pairs for expected parameters (substring match) */
|
|
217
|
+
expectedParams: z.ZodString;
|
|
218
|
+
}, z.core.$strict>, z.ZodObject<{
|
|
197
219
|
/** Maximum allowed duration in milliseconds */
|
|
198
220
|
maxDurationMs: z.ZodNumber;
|
|
199
221
|
}, z.core.$strict>, z.ZodObject<{
|
|
@@ -213,6 +235,7 @@ export type CustomAssertion = z.infer<typeof CustomAssertionSchema>;
|
|
|
213
235
|
export declare const CreateCustomAssertionInputSchema: z.ZodObject<{
|
|
214
236
|
type: z.ZodEnum<{
|
|
215
237
|
skill_was_called: "skill_was_called";
|
|
238
|
+
tool_called_with_param: "tool_called_with_param";
|
|
216
239
|
build_passed: "build_passed";
|
|
217
240
|
cost: "cost";
|
|
218
241
|
llm_judge: "llm_judge";
|
|
@@ -243,6 +266,11 @@ export declare const CreateCustomAssertionInputSchema: z.ZodObject<{
|
|
|
243
266
|
}, z.core.$strip>, z.ZodObject<{
|
|
244
267
|
skillNames: z.ZodArray<z.ZodString>;
|
|
245
268
|
}, z.core.$strip>, z.ZodObject<{
|
|
269
|
+
/** Name of the tool that must have been called */
|
|
270
|
+
toolName: z.ZodString;
|
|
271
|
+
/** JSON string of key-value pairs for expected parameters (substring match) */
|
|
272
|
+
expectedParams: z.ZodString;
|
|
273
|
+
}, z.core.$strict>, z.ZodObject<{
|
|
246
274
|
/** Maximum allowed duration in milliseconds */
|
|
247
275
|
maxDurationMs: z.ZodNumber;
|
|
248
276
|
}, z.core.$strict>, z.ZodObject<{
|
|
@@ -262,6 +290,7 @@ export type CreateCustomAssertionInput = z.infer<typeof CreateCustomAssertionInp
|
|
|
262
290
|
export declare const UpdateCustomAssertionInputSchema: z.ZodObject<{
|
|
263
291
|
type: z.ZodOptional<z.ZodEnum<{
|
|
264
292
|
skill_was_called: "skill_was_called";
|
|
293
|
+
tool_called_with_param: "tool_called_with_param";
|
|
265
294
|
build_passed: "build_passed";
|
|
266
295
|
cost: "cost";
|
|
267
296
|
llm_judge: "llm_judge";
|
|
@@ -292,6 +321,11 @@ export declare const UpdateCustomAssertionInputSchema: z.ZodObject<{
|
|
|
292
321
|
}, z.core.$strip>, z.ZodObject<{
|
|
293
322
|
skillNames: z.ZodArray<z.ZodString>;
|
|
294
323
|
}, z.core.$strip>, z.ZodObject<{
|
|
324
|
+
/** Name of the tool that must have been called */
|
|
325
|
+
toolName: z.ZodString;
|
|
326
|
+
/** JSON string of key-value pairs for expected parameters (substring match) */
|
|
327
|
+
expectedParams: z.ZodString;
|
|
328
|
+
}, z.core.$strict>, z.ZodObject<{
|
|
295
329
|
/** Maximum allowed duration in milliseconds */
|
|
296
330
|
maxDurationMs: z.ZodNumber;
|
|
297
331
|
}, z.core.$strict>, z.ZodObject<{
|
|
@@ -20,6 +20,7 @@ export interface SystemAssertion {
|
|
|
20
20
|
*/
|
|
21
21
|
export declare const SYSTEM_ASSERTION_IDS: {
|
|
22
22
|
readonly SKILL_WAS_CALLED: "system:skill_was_called";
|
|
23
|
+
readonly TOOL_CALLED_WITH_PARAM: "system:tool_called_with_param";
|
|
23
24
|
readonly BUILD_PASSED: "system:build_passed";
|
|
24
25
|
readonly TIME_LIMIT: "system:time_limit";
|
|
25
26
|
readonly COST: "system:cost";
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
import { z } from 'zod';
|
|
2
|
+
export declare const RuleTypeSchema: z.ZodEnum<{
|
|
3
|
+
"claude-md": "claude-md";
|
|
4
|
+
"agents-md": "agents-md";
|
|
5
|
+
"cursor-rule": "cursor-rule";
|
|
6
|
+
}>;
|
|
7
|
+
export type RuleType = z.infer<typeof RuleTypeSchema>;
|
|
8
|
+
export declare const RuleSchema: z.ZodObject<{
|
|
9
|
+
id: z.ZodString;
|
|
10
|
+
name: z.ZodString;
|
|
11
|
+
description: z.ZodString;
|
|
12
|
+
createdAt: z.ZodString;
|
|
13
|
+
updatedAt: z.ZodString;
|
|
14
|
+
deleted: z.ZodOptional<z.ZodBoolean>;
|
|
15
|
+
projectId: z.ZodString;
|
|
16
|
+
ruleType: z.ZodEnum<{
|
|
17
|
+
"claude-md": "claude-md";
|
|
18
|
+
"agents-md": "agents-md";
|
|
19
|
+
"cursor-rule": "cursor-rule";
|
|
20
|
+
}>;
|
|
21
|
+
content: z.ZodString;
|
|
22
|
+
}, z.core.$strip>;
|
|
23
|
+
export type Rule = z.infer<typeof RuleSchema>;
|
|
24
|
+
export declare const CreateRuleInputSchema: z.ZodObject<{
|
|
25
|
+
name: z.ZodString;
|
|
26
|
+
description: z.ZodString;
|
|
27
|
+
projectId: z.ZodString;
|
|
28
|
+
ruleType: z.ZodEnum<{
|
|
29
|
+
"claude-md": "claude-md";
|
|
30
|
+
"agents-md": "agents-md";
|
|
31
|
+
"cursor-rule": "cursor-rule";
|
|
32
|
+
}>;
|
|
33
|
+
content: z.ZodString;
|
|
34
|
+
}, z.core.$strip>;
|
|
35
|
+
export type CreateRuleInput = z.infer<typeof CreateRuleInputSchema>;
|
|
36
|
+
export declare const UpdateRuleInputSchema: z.ZodObject<{
|
|
37
|
+
name: z.ZodOptional<z.ZodString>;
|
|
38
|
+
description: z.ZodOptional<z.ZodString>;
|
|
39
|
+
projectId: z.ZodOptional<z.ZodString>;
|
|
40
|
+
ruleType: z.ZodOptional<z.ZodEnum<{
|
|
41
|
+
"claude-md": "claude-md";
|
|
42
|
+
"agents-md": "agents-md";
|
|
43
|
+
"cursor-rule": "cursor-rule";
|
|
44
|
+
}>>;
|
|
45
|
+
content: z.ZodOptional<z.ZodString>;
|
|
46
|
+
}, z.core.$strip>;
|
|
47
|
+
export type UpdateRuleInput = z.infer<typeof UpdateRuleInputSchema>;
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export declare const AVAILABLE_TOOL_NAMES: readonly ["Bash", "Edit", "Glob", "Grep", "Read", "Skill", "Write"];
|
|
@@ -495,6 +495,7 @@ export declare const EvalRunSchema: z.ZodObject<{
|
|
|
495
495
|
jobStatusCheckedAt: z.ZodOptional<z.ZodString>;
|
|
496
496
|
mcpIds: z.ZodOptional<z.ZodArray<z.ZodString>>;
|
|
497
497
|
subAgentIds: z.ZodOptional<z.ZodArray<z.ZodString>>;
|
|
498
|
+
ruleIds: z.ZodOptional<z.ZodArray<z.ZodString>>;
|
|
498
499
|
}, z.core.$strip>;
|
|
499
500
|
export type EvalRun = z.infer<typeof EvalRunSchema>;
|
|
500
501
|
/**
|
|
@@ -612,6 +613,7 @@ export declare const CreateEvalRunInputSchema: z.ZodObject<{
|
|
|
612
613
|
jobStatusCheckedAt: z.ZodOptional<z.ZodString>;
|
|
613
614
|
mcpIds: z.ZodOptional<z.ZodArray<z.ZodString>>;
|
|
614
615
|
subAgentIds: z.ZodOptional<z.ZodArray<z.ZodString>>;
|
|
616
|
+
ruleIds: z.ZodOptional<z.ZodArray<z.ZodString>>;
|
|
615
617
|
}, z.core.$strip>;
|
|
616
618
|
export type CreateEvalRunInput = z.infer<typeof CreateEvalRunInputSchema>;
|
|
617
619
|
/**
|
|
@@ -11,6 +11,18 @@ export declare const SkillWasCalledAssertionSchema: z.ZodObject<{
|
|
|
11
11
|
skillNames: z.ZodArray<z.ZodString>;
|
|
12
12
|
}, z.core.$strip>;
|
|
13
13
|
export type SkillWasCalledAssertion = z.infer<typeof SkillWasCalledAssertionSchema>;
|
|
14
|
+
/**
|
|
15
|
+
* Assertion: a specific tool must have been called with expected parameters.
|
|
16
|
+
* Checked by inspecting the LLM trace for tool calls with matching name and arguments.
|
|
17
|
+
* Each expected param value is matched as a substring against the actual argument value.
|
|
18
|
+
* All expected params must match on the same tool call for the assertion to pass.
|
|
19
|
+
*/
|
|
20
|
+
export declare const ToolCalledWithParamAssertionSchema: z.ZodObject<{
|
|
21
|
+
type: z.ZodLiteral<"tool_called_with_param">;
|
|
22
|
+
toolName: z.ZodString;
|
|
23
|
+
expectedParams: z.ZodString;
|
|
24
|
+
}, z.core.$strip>;
|
|
25
|
+
export type ToolCalledWithParamAssertion = z.infer<typeof ToolCalledWithParamAssertionSchema>;
|
|
14
26
|
/**
|
|
15
27
|
* Assertion: a build command must exit with the expected code (default 0).
|
|
16
28
|
* Runs the command in the scenario working directory.
|
|
@@ -62,6 +74,10 @@ export type TimeAssertion = z.infer<typeof TimeAssertionSchema>;
|
|
|
62
74
|
export declare const AssertionSchema: z.ZodUnion<readonly [z.ZodObject<{
|
|
63
75
|
type: z.ZodLiteral<"skill_was_called">;
|
|
64
76
|
skillNames: z.ZodArray<z.ZodString>;
|
|
77
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
78
|
+
type: z.ZodLiteral<"tool_called_with_param">;
|
|
79
|
+
toolName: z.ZodString;
|
|
80
|
+
expectedParams: z.ZodString;
|
|
65
81
|
}, z.core.$strip>, z.ZodObject<{
|
|
66
82
|
type: z.ZodLiteral<"build_passed">;
|
|
67
83
|
command: z.ZodOptional<z.ZodString>;
|
|
@@ -28,6 +28,10 @@ export declare const TestScenarioSchema: z.ZodObject<{
|
|
|
28
28
|
assertions: z.ZodOptional<z.ZodArray<z.ZodUnion<readonly [z.ZodObject<{
|
|
29
29
|
type: z.ZodLiteral<"skill_was_called">;
|
|
30
30
|
skillNames: z.ZodArray<z.ZodString>;
|
|
31
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
32
|
+
type: z.ZodLiteral<"tool_called_with_param">;
|
|
33
|
+
toolName: z.ZodString;
|
|
34
|
+
expectedParams: z.ZodString;
|
|
31
35
|
}, z.core.$strip>, z.ZodObject<{
|
|
32
36
|
type: z.ZodLiteral<"build_passed">;
|
|
33
37
|
command: z.ZodOptional<z.ZodString>;
|
|
@@ -66,6 +70,10 @@ export declare const CreateTestScenarioInputSchema: z.ZodObject<{
|
|
|
66
70
|
assertions: z.ZodOptional<z.ZodArray<z.ZodUnion<readonly [z.ZodObject<{
|
|
67
71
|
type: z.ZodLiteral<"skill_was_called">;
|
|
68
72
|
skillNames: z.ZodArray<z.ZodString>;
|
|
73
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
74
|
+
type: z.ZodLiteral<"tool_called_with_param">;
|
|
75
|
+
toolName: z.ZodString;
|
|
76
|
+
expectedParams: z.ZodString;
|
|
69
77
|
}, z.core.$strip>, z.ZodObject<{
|
|
70
78
|
type: z.ZodLiteral<"build_passed">;
|
|
71
79
|
command: z.ZodOptional<z.ZodString>;
|
|
@@ -104,6 +112,10 @@ export declare const UpdateTestScenarioInputSchema: z.ZodObject<{
|
|
|
104
112
|
assertions: z.ZodOptional<z.ZodOptional<z.ZodArray<z.ZodUnion<readonly [z.ZodObject<{
|
|
105
113
|
type: z.ZodLiteral<"skill_was_called">;
|
|
106
114
|
skillNames: z.ZodArray<z.ZodString>;
|
|
115
|
+
}, z.core.$strip>, z.ZodObject<{
|
|
116
|
+
type: z.ZodLiteral<"tool_called_with_param">;
|
|
117
|
+
toolName: z.ZodString;
|
|
118
|
+
expectedParams: z.ZodString;
|
|
107
119
|
}, z.core.$strip>, z.ZodObject<{
|
|
108
120
|
type: z.ZodLiteral<"build_passed">;
|
|
109
121
|
command: z.ZodOptional<z.ZodString>;
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@wix/evalforge-types",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.39.0",
|
|
4
4
|
"description": "Unified types for EvalForge agent evaluation system",
|
|
5
5
|
"files": [
|
|
6
6
|
"build"
|
|
@@ -47,5 +47,5 @@
|
|
|
47
47
|
"artifactId": "evalforge-types"
|
|
48
48
|
}
|
|
49
49
|
},
|
|
50
|
-
"falconPackageHash": "
|
|
50
|
+
"falconPackageHash": "a8c46ce4c7e60fa96cb1a06746bcd9fbfdd42cbf1f8be3200348eb2a"
|
|
51
51
|
}
|