@wix/evalforge-types 0.36.0 → 0.38.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -5,6 +5,7 @@ import type { ModelConfig } from '../common/models.js';
5
5
  import type { LLMTrace } from '../evaluation/metrics.js';
6
6
  import type { MCPEntity } from '../common/mcp.js';
7
7
  import type { SubAgent } from '../target/sub-agent.js';
8
+ import type { Rule } from '../common/rule.js';
8
9
  /**
9
10
  * Trace context for live streaming of agent execution.
10
11
  * This is agent-agnostic and can be used by any adapter implementation.
@@ -57,6 +58,8 @@ export interface AgentExecutionContext {
57
58
  mcps?: MCPEntity[];
58
59
  /** Sub-agents to load (when present, written to .claude/agents/*.md) */
59
60
  subAgents?: SubAgent[];
61
+ /** Rules to write (CLAUDE.md, AGENTS.md, .cursor/rules/*.md based on ruleType) */
62
+ rules?: Rule[];
60
63
  }
61
64
  /**
62
65
  * Token usage statistics from agent execution.
@@ -4,11 +4,13 @@ import { z } from 'zod';
4
4
  * - skill_was_called: Checks if a specific skill was invoked (deterministic, system-level)
5
5
  * - build_passed: Runs a command and checks exit code (deterministic, system-level)
6
6
  * - time_limit: Checks that scenario completed within a duration threshold (deterministic, system-level)
7
+ * - cost: Checks that scenario LLM cost stays within a USD threshold (deterministic, system-level)
7
8
  * - llm_judge: LLM evaluates output with a prompt (LLM-based, user-created)
8
9
  */
9
10
  export declare const AssertionTypeSchema: z.ZodEnum<{
10
11
  skill_was_called: "skill_was_called";
11
12
  build_passed: "build_passed";
13
+ cost: "cost";
12
14
  llm_judge: "llm_judge";
13
15
  time_limit: "time_limit";
14
16
  }>;
@@ -57,6 +59,15 @@ export declare const SkillWasCalledConfigSchema: z.ZodObject<{
57
59
  skillNames: z.ZodArray<z.ZodString>;
58
60
  }, z.core.$strip>;
59
61
  export type SkillWasCalledConfig = z.infer<typeof SkillWasCalledConfigSchema>;
62
+ /**
63
+ * Configuration for cost assertion type.
64
+ * Uses strictObject to reject objects with unknown keys (prevents matching other configs).
65
+ */
66
+ export declare const CostConfigSchema: z.ZodObject<{
67
+ /** Maximum allowed cost in USD */
68
+ maxCostUsd: z.ZodNumber;
69
+ }, z.core.$strict>;
70
+ export type CostConfig = z.infer<typeof CostConfigSchema>;
60
71
  /**
61
72
  * Configuration for build_passed assertion type.
62
73
  * Uses strictObject to reject objects with unknown keys (prevents matching LlmJudge configs).
@@ -132,6 +143,9 @@ export declare const AssertionConfigSchema: z.ZodUnion<readonly [z.ZodObject<{
132
143
  }, z.core.$strip>, z.ZodObject<{
133
144
  /** Maximum allowed duration in milliseconds */
134
145
  maxDurationMs: z.ZodNumber;
146
+ }, z.core.$strict>, z.ZodObject<{
147
+ /** Maximum allowed cost in USD */
148
+ maxCostUsd: z.ZodNumber;
135
149
  }, z.core.$strict>, z.ZodObject<{
136
150
  /** Command to run (default: "yarn build") */
137
151
  command: z.ZodOptional<z.ZodString>;
@@ -154,6 +168,7 @@ export declare const CustomAssertionSchema: z.ZodObject<{
154
168
  type: z.ZodEnum<{
155
169
  skill_was_called: "skill_was_called";
156
170
  build_passed: "build_passed";
171
+ cost: "cost";
157
172
  llm_judge: "llm_judge";
158
173
  time_limit: "time_limit";
159
174
  }>;
@@ -181,6 +196,9 @@ export declare const CustomAssertionSchema: z.ZodObject<{
181
196
  }, z.core.$strip>, z.ZodObject<{
182
197
  /** Maximum allowed duration in milliseconds */
183
198
  maxDurationMs: z.ZodNumber;
199
+ }, z.core.$strict>, z.ZodObject<{
200
+ /** Maximum allowed cost in USD */
201
+ maxCostUsd: z.ZodNumber;
184
202
  }, z.core.$strict>, z.ZodObject<{
185
203
  /** Command to run (default: "yarn build") */
186
204
  command: z.ZodOptional<z.ZodString>;
@@ -196,6 +214,7 @@ export declare const CreateCustomAssertionInputSchema: z.ZodObject<{
196
214
  type: z.ZodEnum<{
197
215
  skill_was_called: "skill_was_called";
198
216
  build_passed: "build_passed";
217
+ cost: "cost";
199
218
  llm_judge: "llm_judge";
200
219
  time_limit: "time_limit";
201
220
  }>;
@@ -226,6 +245,9 @@ export declare const CreateCustomAssertionInputSchema: z.ZodObject<{
226
245
  }, z.core.$strip>, z.ZodObject<{
227
246
  /** Maximum allowed duration in milliseconds */
228
247
  maxDurationMs: z.ZodNumber;
248
+ }, z.core.$strict>, z.ZodObject<{
249
+ /** Maximum allowed cost in USD */
250
+ maxCostUsd: z.ZodNumber;
229
251
  }, z.core.$strict>, z.ZodObject<{
230
252
  /** Command to run (default: "yarn build") */
231
253
  command: z.ZodOptional<z.ZodString>;
@@ -241,6 +263,7 @@ export declare const UpdateCustomAssertionInputSchema: z.ZodObject<{
241
263
  type: z.ZodOptional<z.ZodEnum<{
242
264
  skill_was_called: "skill_was_called";
243
265
  build_passed: "build_passed";
266
+ cost: "cost";
244
267
  llm_judge: "llm_judge";
245
268
  time_limit: "time_limit";
246
269
  }>>;
@@ -271,6 +294,9 @@ export declare const UpdateCustomAssertionInputSchema: z.ZodObject<{
271
294
  }, z.core.$strip>, z.ZodObject<{
272
295
  /** Maximum allowed duration in milliseconds */
273
296
  maxDurationMs: z.ZodNumber;
297
+ }, z.core.$strict>, z.ZodObject<{
298
+ /** Maximum allowed cost in USD */
299
+ maxCostUsd: z.ZodNumber;
274
300
  }, z.core.$strict>, z.ZodObject<{
275
301
  /** Command to run (default: "yarn build") */
276
302
  command: z.ZodOptional<z.ZodString>;
@@ -22,6 +22,7 @@ export declare const SYSTEM_ASSERTION_IDS: {
22
22
  readonly SKILL_WAS_CALLED: "system:skill_was_called";
23
23
  readonly BUILD_PASSED: "system:build_passed";
24
24
  readonly TIME_LIMIT: "system:time_limit";
25
+ readonly COST: "system:cost";
25
26
  readonly LLM_JUDGE: "system:llm_judge";
26
27
  };
27
28
  export type SystemAssertionId = (typeof SYSTEM_ASSERTION_IDS)[keyof typeof SYSTEM_ASSERTION_IDS];
@@ -2,3 +2,4 @@ export * from './base-entity.js';
2
2
  export * from './github-source.js';
3
3
  export * from './mcp.js';
4
4
  export * from './models.js';
5
+ export * from './rule.js';
@@ -0,0 +1,47 @@
1
+ import { z } from 'zod';
2
+ export declare const RuleTypeSchema: z.ZodEnum<{
3
+ "claude-md": "claude-md";
4
+ "agents-md": "agents-md";
5
+ "cursor-rule": "cursor-rule";
6
+ }>;
7
+ export type RuleType = z.infer<typeof RuleTypeSchema>;
8
+ export declare const RuleSchema: z.ZodObject<{
9
+ id: z.ZodString;
10
+ name: z.ZodString;
11
+ description: z.ZodString;
12
+ createdAt: z.ZodString;
13
+ updatedAt: z.ZodString;
14
+ deleted: z.ZodOptional<z.ZodBoolean>;
15
+ projectId: z.ZodString;
16
+ ruleType: z.ZodEnum<{
17
+ "claude-md": "claude-md";
18
+ "agents-md": "agents-md";
19
+ "cursor-rule": "cursor-rule";
20
+ }>;
21
+ content: z.ZodString;
22
+ }, z.core.$strip>;
23
+ export type Rule = z.infer<typeof RuleSchema>;
24
+ export declare const CreateRuleInputSchema: z.ZodObject<{
25
+ name: z.ZodString;
26
+ description: z.ZodString;
27
+ projectId: z.ZodString;
28
+ ruleType: z.ZodEnum<{
29
+ "claude-md": "claude-md";
30
+ "agents-md": "agents-md";
31
+ "cursor-rule": "cursor-rule";
32
+ }>;
33
+ content: z.ZodString;
34
+ }, z.core.$strip>;
35
+ export type CreateRuleInput = z.infer<typeof CreateRuleInputSchema>;
36
+ export declare const UpdateRuleInputSchema: z.ZodObject<{
37
+ name: z.ZodOptional<z.ZodString>;
38
+ description: z.ZodOptional<z.ZodString>;
39
+ projectId: z.ZodOptional<z.ZodString>;
40
+ ruleType: z.ZodOptional<z.ZodEnum<{
41
+ "claude-md": "claude-md";
42
+ "agents-md": "agents-md";
43
+ "cursor-rule": "cursor-rule";
44
+ }>>;
45
+ content: z.ZodOptional<z.ZodString>;
46
+ }, z.core.$strip>;
47
+ export type UpdateRuleInput = z.infer<typeof UpdateRuleInputSchema>;
@@ -495,6 +495,7 @@ export declare const EvalRunSchema: z.ZodObject<{
495
495
  jobStatusCheckedAt: z.ZodOptional<z.ZodString>;
496
496
  mcpIds: z.ZodOptional<z.ZodArray<z.ZodString>>;
497
497
  subAgentIds: z.ZodOptional<z.ZodArray<z.ZodString>>;
498
+ ruleIds: z.ZodOptional<z.ZodArray<z.ZodString>>;
498
499
  }, z.core.$strip>;
499
500
  export type EvalRun = z.infer<typeof EvalRunSchema>;
500
501
  /**
@@ -612,6 +613,7 @@ export declare const CreateEvalRunInputSchema: z.ZodObject<{
612
613
  jobStatusCheckedAt: z.ZodOptional<z.ZodString>;
613
614
  mcpIds: z.ZodOptional<z.ZodArray<z.ZodString>>;
614
615
  subAgentIds: z.ZodOptional<z.ZodArray<z.ZodString>>;
616
+ ruleIds: z.ZodOptional<z.ZodArray<z.ZodString>>;
615
617
  }, z.core.$strip>;
616
618
  export type CreateEvalRunInput = z.infer<typeof CreateEvalRunInputSchema>;
617
619
  /**
@@ -21,6 +21,15 @@ export declare const BuildPassedAssertionSchema: z.ZodObject<{
21
21
  expectedExitCode: z.ZodOptional<z.ZodNumber>;
22
22
  }, z.core.$strip>;
23
23
  export type BuildPassedAssertion = z.infer<typeof BuildPassedAssertionSchema>;
24
+ /**
25
+ * Assertion: the scenario LLM execution cost must stay within a USD threshold.
26
+ * Checked by reading llmTrace.summary.totalCostUsd.
27
+ */
28
+ export declare const CostAssertionSchema: z.ZodObject<{
29
+ type: z.ZodLiteral<"cost">;
30
+ maxCostUsd: z.ZodNumber;
31
+ }, z.core.$strip>;
32
+ export type CostAssertion = z.infer<typeof CostAssertionSchema>;
24
33
  /**
25
34
  * Assertion: an LLM judges the scenario output (score 0-100).
26
35
  * Prompt can use {{output}}, {{cwd}}, {{changedFiles}}, {{trace}}.
@@ -60,6 +69,9 @@ export declare const AssertionSchema: z.ZodUnion<readonly [z.ZodObject<{
60
69
  }, z.core.$strip>, z.ZodObject<{
61
70
  type: z.ZodLiteral<"time_limit">;
62
71
  maxDurationMs: z.ZodNumber;
72
+ }, z.core.$strip>, z.ZodObject<{
73
+ type: z.ZodLiteral<"cost">;
74
+ maxCostUsd: z.ZodNumber;
63
75
  }, z.core.$strip>, z.ZodObject<{
64
76
  type: z.ZodLiteral<"llm_judge">;
65
77
  prompt: z.ZodString;
@@ -35,6 +35,9 @@ export declare const TestScenarioSchema: z.ZodObject<{
35
35
  }, z.core.$strip>, z.ZodObject<{
36
36
  type: z.ZodLiteral<"time_limit">;
37
37
  maxDurationMs: z.ZodNumber;
38
+ }, z.core.$strip>, z.ZodObject<{
39
+ type: z.ZodLiteral<"cost">;
40
+ maxCostUsd: z.ZodNumber;
38
41
  }, z.core.$strip>, z.ZodObject<{
39
42
  type: z.ZodLiteral<"llm_judge">;
40
43
  prompt: z.ZodString;
@@ -70,6 +73,9 @@ export declare const CreateTestScenarioInputSchema: z.ZodObject<{
70
73
  }, z.core.$strip>, z.ZodObject<{
71
74
  type: z.ZodLiteral<"time_limit">;
72
75
  maxDurationMs: z.ZodNumber;
76
+ }, z.core.$strip>, z.ZodObject<{
77
+ type: z.ZodLiteral<"cost">;
78
+ maxCostUsd: z.ZodNumber;
73
79
  }, z.core.$strip>, z.ZodObject<{
74
80
  type: z.ZodLiteral<"llm_judge">;
75
81
  prompt: z.ZodString;
@@ -105,6 +111,9 @@ export declare const UpdateTestScenarioInputSchema: z.ZodObject<{
105
111
  }, z.core.$strip>, z.ZodObject<{
106
112
  type: z.ZodLiteral<"time_limit">;
107
113
  maxDurationMs: z.ZodNumber;
114
+ }, z.core.$strip>, z.ZodObject<{
115
+ type: z.ZodLiteral<"cost">;
116
+ maxCostUsd: z.ZodNumber;
108
117
  }, z.core.$strip>, z.ZodObject<{
109
118
  type: z.ZodLiteral<"llm_judge">;
110
119
  prompt: z.ZodString;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@wix/evalforge-types",
3
- "version": "0.36.0",
3
+ "version": "0.38.0",
4
4
  "description": "Unified types for EvalForge agent evaluation system",
5
5
  "files": [
6
6
  "build"
@@ -47,5 +47,5 @@
47
47
  "artifactId": "evalforge-types"
48
48
  }
49
49
  },
50
- "falconPackageHash": "1beee538e2fe877b490209a7f00a37f2524d6cece55e7c57bdd0f20a"
50
+ "falconPackageHash": "dc3f87434a7b2a1b350369f78ca98d0fc64183d39b35b8a111054878"
51
51
  }