@wix/evalforge-evaluator 0.105.0 → 0.107.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,10 +1,9 @@
1
1
  /**
2
2
  * API Client for fetching data from the eval server.
3
3
  */
4
- import type { EvalRun, EvalRunResult, SkillsGroup, TestScenario, Template, SkillWithLatestVersion, SkillVersion, Agent, CustomAssertion, MCPEntity, SubAgent, Rule } from '@wix/evalforge-types';
4
+ import type { EvalRun, EvalRunResult, TestScenario, Template, SkillWithLatestVersion, SkillVersion, Agent, Preset, CustomAssertion, MCPEntity, SubAgent, Rule } from '@wix/evalforge-types';
5
5
  export interface ApiClient {
6
6
  getEvalRun(projectId: string, id: string): Promise<EvalRun>;
7
- getSkillsGroup(projectId: string, id: string): Promise<SkillsGroup>;
8
7
  getScenario(projectId: string, id: string): Promise<TestScenario>;
9
8
  getSkill(projectId: string, id: string): Promise<SkillWithLatestVersion>;
10
9
  getSkillVersion(projectId: string, skillId: string, versionId: string): Promise<SkillVersion>;
@@ -14,6 +13,7 @@ export interface ApiClient {
14
13
  getMcp(projectId: string, id: string): Promise<MCPEntity>;
15
14
  getSubAgent(projectId: string, id: string): Promise<SubAgent>;
16
15
  getRule(projectId: string, id: string): Promise<Rule>;
16
+ getPreset(projectId: string, id: string): Promise<Preset>;
17
17
  getAssertion(projectId: string, id: string): Promise<CustomAssertion>;
18
18
  addResult(projectId: string, evalRunId: string, result: EvalRunResult): Promise<void>;
19
19
  clearResults(projectId: string, evalRunId: string): Promise<void>;
@@ -1,4 +1,4 @@
1
- import type { EvalRun, TestScenario, SkillWithLatestVersion, SkillsGroup, Agent, Template, CustomAssertion, Assertion, MCPEntity, SubAgent, Rule } from '@wix/evalforge-types';
1
+ import type { EvalRun, TestScenario, SkillWithLatestVersion, Agent, Template, CustomAssertion, Assertion, MCPEntity, SubAgent, Rule } from '@wix/evalforge-types';
2
2
  import { type SystemAssertionId } from '@wix/evalforge-types';
3
3
  import type { ApiClient } from './api-client.js';
4
4
  /**
@@ -31,15 +31,14 @@ export interface ScenarioItem {
31
31
  }
32
32
  /**
33
33
  * Data shape returned by fetchEvaluationData for the run flow.
34
- * Run-level agent + skills + skills group + mcps + subAgents; per-scenario template.
34
+ * Run-level agent + skills + mcps + subAgents; per-scenario template.
35
35
  */
36
36
  export interface EvaluationData {
37
37
  evalRun: EvalRun;
38
38
  agent: Agent | null;
39
39
  skills: SkillWithLatestVersion[];
40
- skillsGroup: SkillsGroup | null;
41
- /** Display name for the skills group (from skillsGroup.name when present) */
42
- skillsGroupName: string;
40
+ /** Display name: preset name from DB when available, otherwise joined skill names */
41
+ presetName: string;
43
42
  mcps: MCPEntity[];
44
43
  subAgents: SubAgent[];
45
44
  rules: Rule[];
@@ -48,8 +47,8 @@ export interface EvaluationData {
48
47
  /**
49
48
  * Fetch all data needed to run an evaluation.
50
49
  *
51
- * Uses evalRun.agentId and evalRun.skillsGroupId as run-level inputs.
52
- * Loads skills from the skills group; loads code agent; loads scenarios and templates by scenario.templateId.
50
+ * Uses evalRun.agentId and evalRun.skillIds as run-level inputs.
51
+ * Loads skills by ID; loads code agent; loads scenarios and templates by scenario.templateId.
53
52
  *
54
53
  * @throws Error if eval run has no code agent when expected
55
54
  * @throws Error if eval run has no skills group or skills group has no skills when expected
@@ -1,12 +1,16 @@
1
- import type { SubAgent } from '@wix/evalforge-types';
1
+ import type { SubAgent, GitHubSource } from '@wix/evalforge-types';
2
+ import { type FetchGitHubFolderOptions } from '@wix/evalforge-github-client';
3
+ /** Signature for the single-file GitHub fetch function (injectable for tests). */
4
+ export type FetchGitHubFileFn = (source: GitHubSource, options?: FetchGitHubFolderOptions) => Promise<string>;
2
5
  /**
3
6
  * Write sub-agent markdown files to .claude/agents/ for Claude Code to discover.
4
7
  *
5
- * Each sub-agent's subAgentMd (YAML frontmatter + body) is written to
6
- * .claude/agents/{name}.md. Claude Code loads these at session start.
8
+ * For sub-agents with a GitHub source, the latest .md file is live-fetched.
9
+ * For inline sub-agents, the stored subAgentMd is used directly.
7
10
  *
8
11
  * @see https://code.claude.com/docs/en/sub-agents#write-subagent-files
9
12
  * @param cwd - Working directory (project root for Claude Code)
10
13
  * @param subAgents - Sub-agent entities to write
14
+ * @param fetchFn - Optional fetch function for testing (defaults to fetchGitHubFile)
11
15
  */
12
- export declare function writeSubAgentsToFilesystem(cwd: string, subAgents: SubAgent[]): Promise<void>;
16
+ export declare function writeSubAgentsToFilesystem(cwd: string, subAgents: SubAgent[], fetchFn?: FetchGitHubFileFn): Promise<void>;
@@ -10,7 +10,7 @@ export type { ScenarioItem, EvaluationData } from '../fetch-evaluation-data.js';
10
10
  * @param config - Evaluator configuration
11
11
  * @param evalRunId - The evaluation run ID
12
12
  * @param scenario - The test scenario to run
13
- * @param evalData - Fetched evaluation data (skills, skillsGroup, agent, mcps, subAgents)
13
+ * @param evalData - Fetched evaluation data (skills, agent, mcps, subAgents)
14
14
  * @param template - Optional pre-fetched template entity
15
15
  * @param resolvedAssertions - Optional assertions resolved from assertionIds
16
16
  * @returns Complete evaluation result
@@ -12,7 +12,7 @@ import type { EvaluationData } from '../fetch-evaluation-data.js';
12
12
  * @param config - Evaluator configuration
13
13
  * @param evalRunId - The evaluation run ID (for live trace context)
14
14
  * @param scenario - The test scenario to run
15
- * @param evalData - Fetched evaluation data (skills, skillsGroup, agent, mcps, subAgents)
15
+ * @param evalData - Fetched evaluation data (skills, agent, mcps, subAgents)
16
16
  * @param workDir - Optional working directory for the scenario
17
17
  * @returns Partial result without assertion fields
18
18
  */
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@wix/evalforge-evaluator",
3
- "version": "0.105.0",
3
+ "version": "0.107.0",
4
4
  "description": "EvalForge Evaluator",
5
5
  "bin": "./build/index.js",
6
6
  "files": [
@@ -22,8 +22,8 @@
22
22
  "@anthropic-ai/claude-agent-sdk": "^0.2.49",
23
23
  "@anthropic-ai/claude-code": "^2.1.49",
24
24
  "@wix/eval-assertions": "0.23.0",
25
- "@wix/evalforge-github-client": "0.23.0",
26
- "@wix/evalforge-types": "0.48.0",
25
+ "@wix/evalforge-github-client": "0.25.0",
26
+ "@wix/evalforge-types": "0.50.0",
27
27
  "ai": "^6.0.93",
28
28
  "diff": "^7.0.0",
29
29
  "tar": "^7.5.3",
@@ -62,5 +62,5 @@
62
62
  "artifactId": "evalforge-evaluator"
63
63
  }
64
64
  },
65
- "falconPackageHash": "7f3f2d1b27312daf480e195d122e0885ac476d0f881100f407a34755"
65
+ "falconPackageHash": "59ea86dd554511d348988564544eee3ea330a005592d9aa9a7d22719"
66
66
  }