@wix/evalforge-evaluator 0.59.0 → 0.61.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,12 +1,14 @@
1
1
  /**
2
2
  * API Client for fetching data from the eval server.
3
3
  */
4
- import type { EvalRun, EvalRunResult, SkillsGroup, TestScenario, Template, Skill, Agent, CustomAssertion, MCPEntity, SubAgent } from '@wix/evalforge-types';
4
+ import type { EvalRun, EvalRunResult, SkillsGroup, TestScenario, Template, SkillWithLatestVersion, SkillVersion, Agent, CustomAssertion, MCPEntity, SubAgent } from '@wix/evalforge-types';
5
5
  export interface ApiClient {
6
6
  getEvalRun(projectId: string, id: string): Promise<EvalRun>;
7
7
  getSkillsGroup(projectId: string, id: string): Promise<SkillsGroup>;
8
8
  getScenario(projectId: string, id: string): Promise<TestScenario>;
9
- getSkill(projectId: string, id: string): Promise<Skill>;
9
+ getSkill(projectId: string, id: string): Promise<SkillWithLatestVersion>;
10
+ getSkillVersion(projectId: string, skillId: string, versionId: string): Promise<SkillVersion>;
11
+ getLatestSkillVersion(projectId: string, skillId: string): Promise<SkillVersion>;
10
12
  getAgent(projectId: string, id: string): Promise<Agent>;
11
13
  getTemplate(projectId: string, id: string): Promise<Template>;
12
14
  getMcp(projectId: string, id: string): Promise<MCPEntity>;
@@ -1,4 +1,4 @@
1
- import type { EvalRun, TestScenario, Skill, SkillsGroup, Agent, Template, CustomAssertion, Assertion, MCPEntity, SubAgent } from '@wix/evalforge-types';
1
+ import type { EvalRun, TestScenario, SkillWithLatestVersion, SkillsGroup, Agent, Template, CustomAssertion, Assertion, MCPEntity, SubAgent } from '@wix/evalforge-types';
2
2
  import { type SystemAssertionId } from '@wix/evalforge-types';
3
3
  import type { ApiClient } from './api-client.js';
4
4
  /**
@@ -36,7 +36,7 @@ export interface ScenarioItem {
36
36
  export interface EvaluationData {
37
37
  evalRun: EvalRun;
38
38
  codeAgent: Agent | null;
39
- skills: Skill[];
39
+ skills: SkillWithLatestVersion[];
40
40
  skillsGroup: SkillsGroup | null;
41
41
  /** Display name for the skills group (from skillsGroup.name when present) */
42
42
  skillsGroupName: string;
@@ -1,4 +1,4 @@
1
- import type { Skill, TestScenario, LLMTrace } from '@wix/evalforge-types';
1
+ import type { SkillWithLatestVersion, TestScenario, LLMTrace } from '@wix/evalforge-types';
2
2
  import type { ClaudeCodeExecutionOptions, ClaudeCodeExecutionResult } from './types.js';
3
3
  /**
4
4
  * Execute skills using the Claude Agent SDK.
@@ -12,7 +12,7 @@ import type { ClaudeCodeExecutionOptions, ClaudeCodeExecutionResult } from './ty
12
12
  * @param options - Execution options (cwd, mcps, etc.)
13
13
  * @returns Execution result with output, usage, and LLM trace
14
14
  */
15
- export declare function executeWithClaudeCode(skills: Skill[], scenario: TestScenario, options: ClaudeCodeExecutionOptions): Promise<{
15
+ export declare function executeWithClaudeCode(skills: SkillWithLatestVersion[], scenario: TestScenario, options: ClaudeCodeExecutionOptions): Promise<{
16
16
  result: ClaudeCodeExecutionResult;
17
17
  llmTrace: LLMTrace;
18
18
  }>;
@@ -2,11 +2,12 @@ import type { MCPEntity } from '@wix/evalforge-types';
2
2
  /**
3
3
  * Write .mcp.json at the project root (cwd) for Claude Code to discover MCPs.
4
4
  *
5
- * Format: { "mcpServers": { [mcp.name]: mcp.config } }
6
- * Claude Code loads this when cwd is the project root.
5
+ * The config field already contains the keyed `.mcp.json` entry
6
+ * (e.g. `{ "wix-mcp": { "type": "http", ... } }`), so each MCP's config
7
+ * is spread directly into the mcpServers object.
7
8
  *
8
9
  * @see https://code.claude.com/docs/en/mcp#mcp-installation-scopes
9
10
  * @param cwd - Working directory (project root for Claude Code)
10
- * @param mcps - MCP entities to write (name used as key, config as value)
11
+ * @param mcps - MCP entities whose config is merged into mcpServers
11
12
  */
12
13
  export declare function writeMcpToFilesystem(cwd: string, mcps: MCPEntity[]): Promise<void>;
@@ -0,0 +1,21 @@
1
+ import type { GitHubSource, SkillFile, SkillWithLatestVersion } from '@wix/evalforge-types';
2
+ export type FetchSkillFn = (source: GitHubSource, options?: {
3
+ userAgent?: string;
4
+ }) => Promise<SkillFile[]>;
5
+ /**
6
+ * Write all skills to the filesystem so Claude Agent SDK can discover them.
7
+ *
8
+ * Content resolution:
9
+ * 1. Pinned (version has `files`): writes all files from the stored snapshot
10
+ * 2. Live (skill has `source`, no pinned files): fetches from GitHub at runtime
11
+ *
12
+ * @param cwd - Working directory where .claude/skills/ will be created
13
+ * @param skills - All skills to write
14
+ * @param fetchFn - Function to fetch skill files from GitHub (defaults to fetchSkillFolderRaw)
15
+ */
16
+ export declare function writeSkillsToFilesystem(cwd: string, skills: SkillWithLatestVersion[], fetchFn?: FetchSkillFn): Promise<void>;
17
+ export declare function writeSkillToFilesystem(cwd: string, skill: SkillWithLatestVersion, fetchFn?: FetchSkillFn): Promise<void>;
18
+ /**
19
+ * Write skill files from a snapshot array to the filesystem.
20
+ */
21
+ export declare function writeSkillFiles(skillDir: string, files: SkillFile[]): Promise<void>;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@wix/evalforge-evaluator",
3
- "version": "0.59.0",
3
+ "version": "0.61.0",
4
4
  "description": "EvalForge Evaluator",
5
5
  "bin": "./build/index.js",
6
6
  "files": [
@@ -8,8 +8,8 @@
8
8
  ],
9
9
  "scripts": {
10
10
  "clean": "rm -rf build",
11
- "build:cjs": "esbuild src/index.ts --bundle --platform=node --outfile=build/index.js --format=cjs --sourcemap --external:@wix/evalforge-types --external:@wix/eval-assertions --external:@anthropic-ai/claude-agent-sdk",
12
- "build:esm": "esbuild src/index.ts --bundle --platform=node --outfile=build/index.mjs --format=esm --sourcemap --external:@wix/evalforge-types --external:@wix/eval-assertions --external:@anthropic-ai/claude-agent-sdk",
11
+ "build:cjs": "esbuild src/index.ts --bundle --platform=node --outfile=build/index.js --format=cjs --sourcemap --external:@wix/evalforge-types --external:@wix/evalforge-github-client --external:@wix/eval-assertions --external:@anthropic-ai/claude-agent-sdk",
12
+ "build:esm": "esbuild src/index.ts --bundle --platform=node --outfile=build/index.mjs --format=esm --sourcemap --external:@wix/evalforge-types --external:@wix/evalforge-github-client --external:@wix/eval-assertions --external:@anthropic-ai/claude-agent-sdk",
13
13
  "build:types": "tsc --emitDeclarationOnly --outDir ./build/types",
14
14
  "build": "yarn run clean && yarn run build:cjs && yarn run build:esm && yarn run build:types",
15
15
  "lint": "eslint .",
@@ -20,7 +20,8 @@
20
20
  "@anthropic-ai/claude-agent-sdk": "^0.2.12",
21
21
  "@anthropic-ai/claude-code": "^2.0.76",
22
22
  "@wix/eval-assertions": "0.8.0",
23
- "@wix/evalforge-types": "0.24.0",
23
+ "@wix/evalforge-github-client": "0.1.0",
24
+ "@wix/evalforge-types": "0.26.0",
24
25
  "ai": "^6.0.6",
25
26
  "diff": "^7.0.0",
26
27
  "tar": "^7.5.3",
@@ -59,5 +60,5 @@
59
60
  "artifactId": "evalforge-evaluator"
60
61
  }
61
62
  },
62
- "falconPackageHash": "54091ff149053f18b51127b023979bbdc7935bc7ea7ffe4ec383a954"
63
+ "falconPackageHash": "ec228a8e773268aebd58a91a0148e74936374a44cc03944f6ac6884b"
63
64
  }