@wix/evalforge-evaluator 0.59.0 → 0.61.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/index.js +126 -42
- package/build/index.js.map +4 -4
- package/build/index.mjs +126 -42
- package/build/index.mjs.map +4 -4
- package/build/types/api-client.d.ts +4 -2
- package/build/types/fetch-evaluation-data.d.ts +2 -2
- package/build/types/run-scenario/agents/claude-code/execute.d.ts +2 -2
- package/build/types/run-scenario/agents/claude-code/write-mcp.d.ts +4 -3
- package/build/types/run-scenario/agents/claude-code/write-skills.d.ts +21 -0
- package/package.json +6 -5
|
@@ -1,12 +1,14 @@
|
|
|
1
1
|
/**
|
|
2
2
|
* API Client for fetching data from the eval server.
|
|
3
3
|
*/
|
|
4
|
-
import type { EvalRun, EvalRunResult, SkillsGroup, TestScenario, Template,
|
|
4
|
+
import type { EvalRun, EvalRunResult, SkillsGroup, TestScenario, Template, SkillWithLatestVersion, SkillVersion, Agent, CustomAssertion, MCPEntity, SubAgent } from '@wix/evalforge-types';
|
|
5
5
|
export interface ApiClient {
|
|
6
6
|
getEvalRun(projectId: string, id: string): Promise<EvalRun>;
|
|
7
7
|
getSkillsGroup(projectId: string, id: string): Promise<SkillsGroup>;
|
|
8
8
|
getScenario(projectId: string, id: string): Promise<TestScenario>;
|
|
9
|
-
getSkill(projectId: string, id: string): Promise<
|
|
9
|
+
getSkill(projectId: string, id: string): Promise<SkillWithLatestVersion>;
|
|
10
|
+
getSkillVersion(projectId: string, skillId: string, versionId: string): Promise<SkillVersion>;
|
|
11
|
+
getLatestSkillVersion(projectId: string, skillId: string): Promise<SkillVersion>;
|
|
10
12
|
getAgent(projectId: string, id: string): Promise<Agent>;
|
|
11
13
|
getTemplate(projectId: string, id: string): Promise<Template>;
|
|
12
14
|
getMcp(projectId: string, id: string): Promise<MCPEntity>;
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import type { EvalRun, TestScenario,
|
|
1
|
+
import type { EvalRun, TestScenario, SkillWithLatestVersion, SkillsGroup, Agent, Template, CustomAssertion, Assertion, MCPEntity, SubAgent } from '@wix/evalforge-types';
|
|
2
2
|
import { type SystemAssertionId } from '@wix/evalforge-types';
|
|
3
3
|
import type { ApiClient } from './api-client.js';
|
|
4
4
|
/**
|
|
@@ -36,7 +36,7 @@ export interface ScenarioItem {
|
|
|
36
36
|
export interface EvaluationData {
|
|
37
37
|
evalRun: EvalRun;
|
|
38
38
|
codeAgent: Agent | null;
|
|
39
|
-
skills:
|
|
39
|
+
skills: SkillWithLatestVersion[];
|
|
40
40
|
skillsGroup: SkillsGroup | null;
|
|
41
41
|
/** Display name for the skills group (from skillsGroup.name when present) */
|
|
42
42
|
skillsGroupName: string;
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import type {
|
|
1
|
+
import type { SkillWithLatestVersion, TestScenario, LLMTrace } from '@wix/evalforge-types';
|
|
2
2
|
import type { ClaudeCodeExecutionOptions, ClaudeCodeExecutionResult } from './types.js';
|
|
3
3
|
/**
|
|
4
4
|
* Execute skills using the Claude Agent SDK.
|
|
@@ -12,7 +12,7 @@ import type { ClaudeCodeExecutionOptions, ClaudeCodeExecutionResult } from './ty
|
|
|
12
12
|
* @param options - Execution options (cwd, mcps, etc.)
|
|
13
13
|
* @returns Execution result with output, usage, and LLM trace
|
|
14
14
|
*/
|
|
15
|
-
export declare function executeWithClaudeCode(skills:
|
|
15
|
+
export declare function executeWithClaudeCode(skills: SkillWithLatestVersion[], scenario: TestScenario, options: ClaudeCodeExecutionOptions): Promise<{
|
|
16
16
|
result: ClaudeCodeExecutionResult;
|
|
17
17
|
llmTrace: LLMTrace;
|
|
18
18
|
}>;
|
|
@@ -2,11 +2,12 @@ import type { MCPEntity } from '@wix/evalforge-types';
|
|
|
2
2
|
/**
|
|
3
3
|
* Write .mcp.json at the project root (cwd) for Claude Code to discover MCPs.
|
|
4
4
|
*
|
|
5
|
-
*
|
|
6
|
-
*
|
|
5
|
+
* The config field already contains the keyed `.mcp.json` entry
|
|
6
|
+
* (e.g. `{ "wix-mcp": { "type": "http", ... } }`), so each MCP's config
|
|
7
|
+
* is spread directly into the mcpServers object.
|
|
7
8
|
*
|
|
8
9
|
* @see https://code.claude.com/docs/en/mcp#mcp-installation-scopes
|
|
9
10
|
* @param cwd - Working directory (project root for Claude Code)
|
|
10
|
-
* @param mcps - MCP entities
|
|
11
|
+
* @param mcps - MCP entities whose config is merged into mcpServers
|
|
11
12
|
*/
|
|
12
13
|
export declare function writeMcpToFilesystem(cwd: string, mcps: MCPEntity[]): Promise<void>;
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
import type { GitHubSource, SkillFile, SkillWithLatestVersion } from '@wix/evalforge-types';
|
|
2
|
+
export type FetchSkillFn = (source: GitHubSource, options?: {
|
|
3
|
+
userAgent?: string;
|
|
4
|
+
}) => Promise<SkillFile[]>;
|
|
5
|
+
/**
|
|
6
|
+
* Write all skills to the filesystem so Claude Agent SDK can discover them.
|
|
7
|
+
*
|
|
8
|
+
* Content resolution:
|
|
9
|
+
* 1. Pinned (version has `files`): writes all files from the stored snapshot
|
|
10
|
+
* 2. Live (skill has `source`, no pinned files): fetches from GitHub at runtime
|
|
11
|
+
*
|
|
12
|
+
* @param cwd - Working directory where .claude/skills/ will be created
|
|
13
|
+
* @param skills - All skills to write
|
|
14
|
+
* @param fetchFn - Function to fetch skill files from GitHub (defaults to fetchSkillFolderRaw)
|
|
15
|
+
*/
|
|
16
|
+
export declare function writeSkillsToFilesystem(cwd: string, skills: SkillWithLatestVersion[], fetchFn?: FetchSkillFn): Promise<void>;
|
|
17
|
+
export declare function writeSkillToFilesystem(cwd: string, skill: SkillWithLatestVersion, fetchFn?: FetchSkillFn): Promise<void>;
|
|
18
|
+
/**
|
|
19
|
+
* Write skill files from a snapshot array to the filesystem.
|
|
20
|
+
*/
|
|
21
|
+
export declare function writeSkillFiles(skillDir: string, files: SkillFile[]): Promise<void>;
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@wix/evalforge-evaluator",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.61.0",
|
|
4
4
|
"description": "EvalForge Evaluator",
|
|
5
5
|
"bin": "./build/index.js",
|
|
6
6
|
"files": [
|
|
@@ -8,8 +8,8 @@
|
|
|
8
8
|
],
|
|
9
9
|
"scripts": {
|
|
10
10
|
"clean": "rm -rf build",
|
|
11
|
-
"build:cjs": "esbuild src/index.ts --bundle --platform=node --outfile=build/index.js --format=cjs --sourcemap --external:@wix/evalforge-types --external:@wix/eval-assertions --external:@anthropic-ai/claude-agent-sdk",
|
|
12
|
-
"build:esm": "esbuild src/index.ts --bundle --platform=node --outfile=build/index.mjs --format=esm --sourcemap --external:@wix/evalforge-types --external:@wix/eval-assertions --external:@anthropic-ai/claude-agent-sdk",
|
|
11
|
+
"build:cjs": "esbuild src/index.ts --bundle --platform=node --outfile=build/index.js --format=cjs --sourcemap --external:@wix/evalforge-types --external:@wix/evalforge-github-client --external:@wix/eval-assertions --external:@anthropic-ai/claude-agent-sdk",
|
|
12
|
+
"build:esm": "esbuild src/index.ts --bundle --platform=node --outfile=build/index.mjs --format=esm --sourcemap --external:@wix/evalforge-types --external:@wix/evalforge-github-client --external:@wix/eval-assertions --external:@anthropic-ai/claude-agent-sdk",
|
|
13
13
|
"build:types": "tsc --emitDeclarationOnly --outDir ./build/types",
|
|
14
14
|
"build": "yarn run clean && yarn run build:cjs && yarn run build:esm && yarn run build:types",
|
|
15
15
|
"lint": "eslint .",
|
|
@@ -20,7 +20,8 @@
|
|
|
20
20
|
"@anthropic-ai/claude-agent-sdk": "^0.2.12",
|
|
21
21
|
"@anthropic-ai/claude-code": "^2.0.76",
|
|
22
22
|
"@wix/eval-assertions": "0.8.0",
|
|
23
|
-
"@wix/evalforge-
|
|
23
|
+
"@wix/evalforge-github-client": "0.1.0",
|
|
24
|
+
"@wix/evalforge-types": "0.26.0",
|
|
24
25
|
"ai": "^6.0.6",
|
|
25
26
|
"diff": "^7.0.0",
|
|
26
27
|
"tar": "^7.5.3",
|
|
@@ -59,5 +60,5 @@
|
|
|
59
60
|
"artifactId": "evalforge-evaluator"
|
|
60
61
|
}
|
|
61
62
|
},
|
|
62
|
-
"falconPackageHash": "
|
|
63
|
+
"falconPackageHash": "ec228a8e773268aebd58a91a0148e74936374a44cc03944f6ac6884b"
|
|
63
64
|
}
|