@wix/evalforge-evaluator 0.104.0 → 0.106.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,10 +1,9 @@
1
1
  /**
2
2
  * API Client for fetching data from the eval server.
3
3
  */
4
- import type { EvalRun, EvalRunResult, SkillsGroup, TestScenario, Template, SkillWithLatestVersion, SkillVersion, Agent, CustomAssertion, MCPEntity, SubAgent, Rule } from '@wix/evalforge-types';
4
+ import type { EvalRun, EvalRunResult, TestScenario, Template, SkillWithLatestVersion, SkillVersion, Agent, Preset, CustomAssertion, MCPEntity, SubAgent, Rule } from '@wix/evalforge-types';
5
5
  export interface ApiClient {
6
6
  getEvalRun(projectId: string, id: string): Promise<EvalRun>;
7
- getSkillsGroup(projectId: string, id: string): Promise<SkillsGroup>;
8
7
  getScenario(projectId: string, id: string): Promise<TestScenario>;
9
8
  getSkill(projectId: string, id: string): Promise<SkillWithLatestVersion>;
10
9
  getSkillVersion(projectId: string, skillId: string, versionId: string): Promise<SkillVersion>;
@@ -14,6 +13,7 @@ export interface ApiClient {
14
13
  getMcp(projectId: string, id: string): Promise<MCPEntity>;
15
14
  getSubAgent(projectId: string, id: string): Promise<SubAgent>;
16
15
  getRule(projectId: string, id: string): Promise<Rule>;
16
+ getPreset(projectId: string, id: string): Promise<Preset>;
17
17
  getAssertion(projectId: string, id: string): Promise<CustomAssertion>;
18
18
  addResult(projectId: string, evalRunId: string, result: EvalRunResult): Promise<void>;
19
19
  clearResults(projectId: string, evalRunId: string): Promise<void>;
@@ -1,4 +1,4 @@
1
- import type { EvalRun, TestScenario, SkillWithLatestVersion, SkillsGroup, Agent, Template, CustomAssertion, Assertion, MCPEntity, SubAgent, Rule } from '@wix/evalforge-types';
1
+ import type { EvalRun, TestScenario, SkillWithLatestVersion, Agent, Template, CustomAssertion, Assertion, MCPEntity, SubAgent, Rule } from '@wix/evalforge-types';
2
2
  import { type SystemAssertionId } from '@wix/evalforge-types';
3
3
  import type { ApiClient } from './api-client.js';
4
4
  /**
@@ -31,15 +31,14 @@ export interface ScenarioItem {
31
31
  }
32
32
  /**
33
33
  * Data shape returned by fetchEvaluationData for the run flow.
34
- * Run-level agent + skills + skills group + mcps + subAgents; per-scenario template.
34
+ * Run-level agent + skills + mcps + subAgents; per-scenario template.
35
35
  */
36
36
  export interface EvaluationData {
37
37
  evalRun: EvalRun;
38
38
  agent: Agent | null;
39
39
  skills: SkillWithLatestVersion[];
40
- skillsGroup: SkillsGroup | null;
41
- /** Display name for the skills group (from skillsGroup.name when present) */
42
- skillsGroupName: string;
40
+ /** Display name: preset name from DB when available, otherwise joined skill names */
41
+ presetName: string;
43
42
  mcps: MCPEntity[];
44
43
  subAgents: SubAgent[];
45
44
  rules: Rule[];
@@ -48,8 +47,8 @@ export interface EvaluationData {
48
47
  /**
49
48
  * Fetch all data needed to run an evaluation.
50
49
  *
51
- * Uses evalRun.agentId and evalRun.skillsGroupId as run-level inputs.
52
- * Loads skills from the skills group; loads code agent; loads scenarios and templates by scenario.templateId.
50
+ * Uses evalRun.agentId and evalRun.skillIds as run-level inputs.
51
+ * Loads skills by ID; loads code agent; loads scenarios and templates by scenario.templateId.
53
52
  *
54
53
  * @throws Error if eval run has no code agent when expected
55
54
  * @throws Error if eval run has no skills group or skills group has no skills when expected
@@ -10,7 +10,7 @@ export type { ScenarioItem, EvaluationData } from '../fetch-evaluation-data.js';
10
10
  * @param config - Evaluator configuration
11
11
  * @param evalRunId - The evaluation run ID
12
12
  * @param scenario - The test scenario to run
13
- * @param evalData - Fetched evaluation data (skills, skillsGroup, agent, mcps, subAgents)
13
+ * @param evalData - Fetched evaluation data (skills, agent, mcps, subAgents)
14
14
  * @param template - Optional pre-fetched template entity
15
15
  * @param resolvedAssertions - Optional assertions resolved from assertionIds
16
16
  * @returns Complete evaluation result
@@ -12,7 +12,7 @@ import type { EvaluationData } from '../fetch-evaluation-data.js';
12
12
  * @param config - Evaluator configuration
13
13
  * @param evalRunId - The evaluation run ID (for live trace context)
14
14
  * @param scenario - The test scenario to run
15
- * @param evalData - Fetched evaluation data (skills, skillsGroup, agent, mcps, subAgents)
15
+ * @param evalData - Fetched evaluation data (skills, agent, mcps, subAgents)
16
16
  * @param workDir - Optional working directory for the scenario
17
17
  * @returns Partial result without assertion fields
18
18
  */
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@wix/evalforge-evaluator",
3
- "version": "0.104.0",
3
+ "version": "0.106.0",
4
4
  "description": "EvalForge Evaluator",
5
5
  "bin": "./build/index.js",
6
6
  "files": [
@@ -22,8 +22,8 @@
22
22
  "@anthropic-ai/claude-agent-sdk": "^0.2.49",
23
23
  "@anthropic-ai/claude-code": "^2.1.49",
24
24
  "@wix/eval-assertions": "0.23.0",
25
- "@wix/evalforge-github-client": "0.22.0",
26
- "@wix/evalforge-types": "0.47.0",
25
+ "@wix/evalforge-github-client": "0.24.0",
26
+ "@wix/evalforge-types": "0.49.0",
27
27
  "ai": "^6.0.93",
28
28
  "diff": "^7.0.0",
29
29
  "tar": "^7.5.3",
@@ -62,5 +62,5 @@
62
62
  "artifactId": "evalforge-evaluator"
63
63
  }
64
64
  },
65
- "falconPackageHash": "60f62d104bbd185de86f9e2b5ea87a8f558d2bbe5342520dad2304f5"
65
+ "falconPackageHash": "b023f8b862d171c04024e06788df0308e961466bde8f723fc5846f5a"
66
66
  }