@wix/evalforge-types 0.37.0 → 0.39.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,6 +5,7 @@ import type { ModelConfig } from '../common/models.js';
5
5
  import type { LLMTrace } from '../evaluation/metrics.js';
6
6
  import type { MCPEntity } from '../common/mcp.js';
7
7
  import type { SubAgent } from '../target/sub-agent.js';
8
+ import type { Rule } from '../common/rule.js';
8
9
  /**
9
10
  * Trace context for live streaming of agent execution.
10
11
  * This is agent-agnostic and can be used by any adapter implementation.
@@ -57,6 +58,8 @@ export interface AgentExecutionContext {
57
58
  mcps?: MCPEntity[];
58
59
  /** Sub-agents to load (when present, written to .claude/agents/*.md) */
59
60
  subAgents?: SubAgent[];
61
+ /** Rules to write (CLAUDE.md, AGENTS.md, .cursor/rules/*.md based on ruleType) */
62
+ rules?: Rule[];
60
63
  }
61
64
  /**
62
65
  * Token usage statistics from agent execution.
@@ -2,6 +2,7 @@ import { z } from 'zod';
2
2
  /**
3
3
  * Assertion types:
4
4
  * - skill_was_called: Checks if a specific skill was invoked (deterministic, system-level)
5
+ * - tool_called_with_param: Checks if a tool was called with expected parameters (deterministic, system-level)
5
6
  * - build_passed: Runs a command and checks exit code (deterministic, system-level)
6
7
  * - time_limit: Checks that scenario completed within a duration threshold (deterministic, system-level)
7
8
  * - cost: Checks that scenario LLM cost stays within a USD threshold (deterministic, system-level)
@@ -9,6 +10,7 @@ import { z } from 'zod';
9
10
  */
10
11
  export declare const AssertionTypeSchema: z.ZodEnum<{
11
12
  skill_was_called: "skill_was_called";
13
+ tool_called_with_param: "tool_called_with_param";
12
14
  build_passed: "build_passed";
13
15
  cost: "cost";
14
16
  llm_judge: "llm_judge";
@@ -68,6 +70,15 @@ export declare const CostConfigSchema: z.ZodObject<{
68
70
  maxCostUsd: z.ZodNumber;
69
71
  }, z.core.$strict>;
70
72
  export type CostConfig = z.infer<typeof CostConfigSchema>;
73
+ /** Configuration for tool_called_with_param assertion type.
74
+ * Uses strictObject to reject objects with unknown keys.
75
+ */
76
+ export declare const ToolCalledWithParamConfigSchema: z.ZodObject<{
77
+ /** Name of the tool that must have been called */
78
+ toolName: z.ZodString;
79
+ /** JSON string of key-value pairs for expected parameters (substring match) */
80
+ expectedParams: z.ZodString;
81
+ }, z.core.$strict>;
71
82
  /**
72
83
  * Configuration for build_passed assertion type.
73
84
  * Uses strictObject to reject objects with unknown keys (prevents matching LlmJudge configs).
@@ -141,6 +152,11 @@ export declare const AssertionConfigSchema: z.ZodUnion<readonly [z.ZodObject<{
141
152
  }, z.core.$strip>, z.ZodObject<{
142
153
  skillNames: z.ZodArray<z.ZodString>;
143
154
  }, z.core.$strip>, z.ZodObject<{
155
+ /** Name of the tool that must have been called */
156
+ toolName: z.ZodString;
157
+ /** JSON string of key-value pairs for expected parameters (substring match) */
158
+ expectedParams: z.ZodString;
159
+ }, z.core.$strict>, z.ZodObject<{
144
160
  /** Maximum allowed duration in milliseconds */
145
161
  maxDurationMs: z.ZodNumber;
146
162
  }, z.core.$strict>, z.ZodObject<{
@@ -167,6 +183,7 @@ export declare const CustomAssertionSchema: z.ZodObject<{
167
183
  projectId: z.ZodString;
168
184
  type: z.ZodEnum<{
169
185
  skill_was_called: "skill_was_called";
186
+ tool_called_with_param: "tool_called_with_param";
170
187
  build_passed: "build_passed";
171
188
  cost: "cost";
172
189
  llm_judge: "llm_judge";
@@ -194,6 +211,11 @@ export declare const CustomAssertionSchema: z.ZodObject<{
194
211
  }, z.core.$strip>, z.ZodObject<{
195
212
  skillNames: z.ZodArray<z.ZodString>;
196
213
  }, z.core.$strip>, z.ZodObject<{
214
+ /** Name of the tool that must have been called */
215
+ toolName: z.ZodString;
216
+ /** JSON string of key-value pairs for expected parameters (substring match) */
217
+ expectedParams: z.ZodString;
218
+ }, z.core.$strict>, z.ZodObject<{
197
219
  /** Maximum allowed duration in milliseconds */
198
220
  maxDurationMs: z.ZodNumber;
199
221
  }, z.core.$strict>, z.ZodObject<{
@@ -213,6 +235,7 @@ export type CustomAssertion = z.infer<typeof CustomAssertionSchema>;
213
235
  export declare const CreateCustomAssertionInputSchema: z.ZodObject<{
214
236
  type: z.ZodEnum<{
215
237
  skill_was_called: "skill_was_called";
238
+ tool_called_with_param: "tool_called_with_param";
216
239
  build_passed: "build_passed";
217
240
  cost: "cost";
218
241
  llm_judge: "llm_judge";
@@ -243,6 +266,11 @@ export declare const CreateCustomAssertionInputSchema: z.ZodObject<{
243
266
  }, z.core.$strip>, z.ZodObject<{
244
267
  skillNames: z.ZodArray<z.ZodString>;
245
268
  }, z.core.$strip>, z.ZodObject<{
269
+ /** Name of the tool that must have been called */
270
+ toolName: z.ZodString;
271
+ /** JSON string of key-value pairs for expected parameters (substring match) */
272
+ expectedParams: z.ZodString;
273
+ }, z.core.$strict>, z.ZodObject<{
246
274
  /** Maximum allowed duration in milliseconds */
247
275
  maxDurationMs: z.ZodNumber;
248
276
  }, z.core.$strict>, z.ZodObject<{
@@ -262,6 +290,7 @@ export type CreateCustomAssertionInput = z.infer<typeof CreateCustomAssertionInp
262
290
  export declare const UpdateCustomAssertionInputSchema: z.ZodObject<{
263
291
  type: z.ZodOptional<z.ZodEnum<{
264
292
  skill_was_called: "skill_was_called";
293
+ tool_called_with_param: "tool_called_with_param";
265
294
  build_passed: "build_passed";
266
295
  cost: "cost";
267
296
  llm_judge: "llm_judge";
@@ -292,6 +321,11 @@ export declare const UpdateCustomAssertionInputSchema: z.ZodObject<{
292
321
  }, z.core.$strip>, z.ZodObject<{
293
322
  skillNames: z.ZodArray<z.ZodString>;
294
323
  }, z.core.$strip>, z.ZodObject<{
324
+ /** Name of the tool that must have been called */
325
+ toolName: z.ZodString;
326
+ /** JSON string of key-value pairs for expected parameters (substring match) */
327
+ expectedParams: z.ZodString;
328
+ }, z.core.$strict>, z.ZodObject<{
295
329
  /** Maximum allowed duration in milliseconds */
296
330
  maxDurationMs: z.ZodNumber;
297
331
  }, z.core.$strict>, z.ZodObject<{
@@ -20,6 +20,7 @@ export interface SystemAssertion {
20
20
  */
21
21
  export declare const SYSTEM_ASSERTION_IDS: {
22
22
  readonly SKILL_WAS_CALLED: "system:skill_was_called";
23
+ readonly TOOL_CALLED_WITH_PARAM: "system:tool_called_with_param";
23
24
  readonly BUILD_PASSED: "system:build_passed";
24
25
  readonly TIME_LIMIT: "system:time_limit";
25
26
  readonly COST: "system:cost";
@@ -2,3 +2,5 @@ export * from './base-entity.js';
2
2
  export * from './github-source.js';
3
3
  export * from './mcp.js';
4
4
  export * from './models.js';
5
+ export * from './rule.js';
6
+ export * from './tool-names.js';
@@ -0,0 +1,47 @@
1
+ import { z } from 'zod';
2
+ export declare const RuleTypeSchema: z.ZodEnum<{
3
+ "claude-md": "claude-md";
4
+ "agents-md": "agents-md";
5
+ "cursor-rule": "cursor-rule";
6
+ }>;
7
+ export type RuleType = z.infer<typeof RuleTypeSchema>;
8
+ export declare const RuleSchema: z.ZodObject<{
9
+ id: z.ZodString;
10
+ name: z.ZodString;
11
+ description: z.ZodString;
12
+ createdAt: z.ZodString;
13
+ updatedAt: z.ZodString;
14
+ deleted: z.ZodOptional<z.ZodBoolean>;
15
+ projectId: z.ZodString;
16
+ ruleType: z.ZodEnum<{
17
+ "claude-md": "claude-md";
18
+ "agents-md": "agents-md";
19
+ "cursor-rule": "cursor-rule";
20
+ }>;
21
+ content: z.ZodString;
22
+ }, z.core.$strip>;
23
+ export type Rule = z.infer<typeof RuleSchema>;
24
+ export declare const CreateRuleInputSchema: z.ZodObject<{
25
+ name: z.ZodString;
26
+ description: z.ZodString;
27
+ projectId: z.ZodString;
28
+ ruleType: z.ZodEnum<{
29
+ "claude-md": "claude-md";
30
+ "agents-md": "agents-md";
31
+ "cursor-rule": "cursor-rule";
32
+ }>;
33
+ content: z.ZodString;
34
+ }, z.core.$strip>;
35
+ export type CreateRuleInput = z.infer<typeof CreateRuleInputSchema>;
36
+ export declare const UpdateRuleInputSchema: z.ZodObject<{
37
+ name: z.ZodOptional<z.ZodString>;
38
+ description: z.ZodOptional<z.ZodString>;
39
+ projectId: z.ZodOptional<z.ZodString>;
40
+ ruleType: z.ZodOptional<z.ZodEnum<{
41
+ "claude-md": "claude-md";
42
+ "agents-md": "agents-md";
43
+ "cursor-rule": "cursor-rule";
44
+ }>>;
45
+ content: z.ZodOptional<z.ZodString>;
46
+ }, z.core.$strip>;
47
+ export type UpdateRuleInput = z.infer<typeof UpdateRuleInputSchema>;
@@ -0,0 +1 @@
1
+ export declare const AVAILABLE_TOOL_NAMES: readonly ["Bash", "Edit", "Glob", "Grep", "Read", "Skill", "Write"];
@@ -495,6 +495,7 @@ export declare const EvalRunSchema: z.ZodObject<{
495
495
  jobStatusCheckedAt: z.ZodOptional<z.ZodString>;
496
496
  mcpIds: z.ZodOptional<z.ZodArray<z.ZodString>>;
497
497
  subAgentIds: z.ZodOptional<z.ZodArray<z.ZodString>>;
498
+ ruleIds: z.ZodOptional<z.ZodArray<z.ZodString>>;
498
499
  }, z.core.$strip>;
499
500
  export type EvalRun = z.infer<typeof EvalRunSchema>;
500
501
  /**
@@ -612,6 +613,7 @@ export declare const CreateEvalRunInputSchema: z.ZodObject<{
612
613
  jobStatusCheckedAt: z.ZodOptional<z.ZodString>;
613
614
  mcpIds: z.ZodOptional<z.ZodArray<z.ZodString>>;
614
615
  subAgentIds: z.ZodOptional<z.ZodArray<z.ZodString>>;
616
+ ruleIds: z.ZodOptional<z.ZodArray<z.ZodString>>;
615
617
  }, z.core.$strip>;
616
618
  export type CreateEvalRunInput = z.infer<typeof CreateEvalRunInputSchema>;
617
619
  /**
@@ -11,6 +11,18 @@ export declare const SkillWasCalledAssertionSchema: z.ZodObject<{
11
11
  skillNames: z.ZodArray<z.ZodString>;
12
12
  }, z.core.$strip>;
13
13
  export type SkillWasCalledAssertion = z.infer<typeof SkillWasCalledAssertionSchema>;
14
+ /**
15
+ * Assertion: a specific tool must have been called with expected parameters.
16
+ * Checked by inspecting the LLM trace for tool calls with matching name and arguments.
17
+ * Each expected param value is matched as a substring against the actual argument value.
18
+ * All expected params must match on the same tool call for the assertion to pass.
19
+ */
20
+ export declare const ToolCalledWithParamAssertionSchema: z.ZodObject<{
21
+ type: z.ZodLiteral<"tool_called_with_param">;
22
+ toolName: z.ZodString;
23
+ expectedParams: z.ZodString;
24
+ }, z.core.$strip>;
25
+ export type ToolCalledWithParamAssertion = z.infer<typeof ToolCalledWithParamAssertionSchema>;
14
26
  /**
15
27
  * Assertion: a build command must exit with the expected code (default 0).
16
28
  * Runs the command in the scenario working directory.
@@ -62,6 +74,10 @@ export type TimeAssertion = z.infer<typeof TimeAssertionSchema>;
62
74
  export declare const AssertionSchema: z.ZodUnion<readonly [z.ZodObject<{
63
75
  type: z.ZodLiteral<"skill_was_called">;
64
76
  skillNames: z.ZodArray<z.ZodString>;
77
+ }, z.core.$strip>, z.ZodObject<{
78
+ type: z.ZodLiteral<"tool_called_with_param">;
79
+ toolName: z.ZodString;
80
+ expectedParams: z.ZodString;
65
81
  }, z.core.$strip>, z.ZodObject<{
66
82
  type: z.ZodLiteral<"build_passed">;
67
83
  command: z.ZodOptional<z.ZodString>;
@@ -28,6 +28,10 @@ export declare const TestScenarioSchema: z.ZodObject<{
28
28
  assertions: z.ZodOptional<z.ZodArray<z.ZodUnion<readonly [z.ZodObject<{
29
29
  type: z.ZodLiteral<"skill_was_called">;
30
30
  skillNames: z.ZodArray<z.ZodString>;
31
+ }, z.core.$strip>, z.ZodObject<{
32
+ type: z.ZodLiteral<"tool_called_with_param">;
33
+ toolName: z.ZodString;
34
+ expectedParams: z.ZodString;
31
35
  }, z.core.$strip>, z.ZodObject<{
32
36
  type: z.ZodLiteral<"build_passed">;
33
37
  command: z.ZodOptional<z.ZodString>;
@@ -66,6 +70,10 @@ export declare const CreateTestScenarioInputSchema: z.ZodObject<{
66
70
  assertions: z.ZodOptional<z.ZodArray<z.ZodUnion<readonly [z.ZodObject<{
67
71
  type: z.ZodLiteral<"skill_was_called">;
68
72
  skillNames: z.ZodArray<z.ZodString>;
73
+ }, z.core.$strip>, z.ZodObject<{
74
+ type: z.ZodLiteral<"tool_called_with_param">;
75
+ toolName: z.ZodString;
76
+ expectedParams: z.ZodString;
69
77
  }, z.core.$strip>, z.ZodObject<{
70
78
  type: z.ZodLiteral<"build_passed">;
71
79
  command: z.ZodOptional<z.ZodString>;
@@ -104,6 +112,10 @@ export declare const UpdateTestScenarioInputSchema: z.ZodObject<{
104
112
  assertions: z.ZodOptional<z.ZodOptional<z.ZodArray<z.ZodUnion<readonly [z.ZodObject<{
105
113
  type: z.ZodLiteral<"skill_was_called">;
106
114
  skillNames: z.ZodArray<z.ZodString>;
115
+ }, z.core.$strip>, z.ZodObject<{
116
+ type: z.ZodLiteral<"tool_called_with_param">;
117
+ toolName: z.ZodString;
118
+ expectedParams: z.ZodString;
107
119
  }, z.core.$strip>, z.ZodObject<{
108
120
  type: z.ZodLiteral<"build_passed">;
109
121
  command: z.ZodOptional<z.ZodString>;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@wix/evalforge-types",
3
- "version": "0.37.0",
3
+ "version": "0.39.0",
4
4
  "description": "Unified types for EvalForge agent evaluation system",
5
5
  "files": [
6
6
  "build"
@@ -47,5 +47,5 @@
47
47
  "artifactId": "evalforge-types"
48
48
  }
49
49
  },
50
- "falconPackageHash": "57c752f025e2a8ccb67557327757571462f50d8acadc4a28877770af"
50
+ "falconPackageHash": "a8c46ce4c7e60fa96cb1a06746bcd9fbfdd42cbf1f8be3200348eb2a"
51
51
  }