@wix/evalforge-evaluator 0.146.0 → 0.148.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/index.js +192 -62
- package/build/index.js.map +3 -3
- package/build/index.mjs +201 -66
- package/build/index.mjs.map +3 -3
- package/build/types/run-scenario/agents/claude-code/types.d.ts +15 -3
- package/build/types/run-scenario/agents/opencode/config.d.ts +3 -0
- package/build/types/run-scenario/agents/opencode/types.d.ts +8 -0
- package/build/types/run-scenario/agents/simple-agent/execute.d.ts +1 -0
- package/build/types/run-scenario/agents/simple-agent/mcp-tools.d.ts +0 -2
- package/package.json +8 -7
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|
* These types are internal to the Claude Code adapter and extend or
|
|
5
5
|
* specialize the shared types from @wix/evalforge-types.
|
|
6
6
|
*/
|
|
7
|
-
import type { MCPEntity, SubAgent, Rule } from '@wix/evalforge-types';
|
|
7
|
+
import type { MCPEntity, SubAgent, Rule, EffortLevel } from '@wix/evalforge-types';
|
|
8
8
|
export type { TraceContext } from '@wix/evalforge-types';
|
|
9
9
|
/**
|
|
10
10
|
* Options for Claude Agent SDK execution.
|
|
@@ -23,14 +23,24 @@ export interface ClaudeCodeExecutionOptions {
|
|
|
23
23
|
rules?: Rule[];
|
|
24
24
|
/** Model to use for generation */
|
|
25
25
|
model?: string;
|
|
26
|
-
/** Temperature for generation (0–1, from agent
|
|
26
|
+
/** Temperature for generation (0–1, from agent config) */
|
|
27
27
|
temperature?: number;
|
|
28
|
-
/** Max output tokens (from agent
|
|
28
|
+
/** Max output tokens (from agent config) */
|
|
29
29
|
maxTokens?: number;
|
|
30
30
|
/** Maximum number of agent turns */
|
|
31
31
|
maxTurns?: number;
|
|
32
32
|
/** Maximum thinking tokens */
|
|
33
33
|
maxThinkingTokens?: number;
|
|
34
|
+
/** Override the default allowedTools list */
|
|
35
|
+
allowedTools?: string[];
|
|
36
|
+
/** Tools to remove from the model's context entirely */
|
|
37
|
+
disallowedTools?: string[];
|
|
38
|
+
/** Controls thinking depth: low, medium, high, max */
|
|
39
|
+
effort?: EffortLevel;
|
|
40
|
+
/** Maximum USD spend per run */
|
|
41
|
+
maxBudgetUsd?: number;
|
|
42
|
+
/** Execution timeout in milliseconds (overrides the default maxTurns-based calculation) */
|
|
43
|
+
maxDurationMs?: number;
|
|
34
44
|
/** AI Gateway base URL for Anthropic API */
|
|
35
45
|
aiGatewayUrl?: string;
|
|
36
46
|
/** AI Gateway custom headers as key-value pairs */
|
|
@@ -44,6 +54,8 @@ export interface ClaudeCodeExecutionOptions {
|
|
|
44
54
|
* - string: custom system prompt text
|
|
45
55
|
*/
|
|
46
56
|
systemPrompt?: string | null;
|
|
57
|
+
/** Extra config properties not in the known schema. Spread into SDK query options. */
|
|
58
|
+
extras?: Record<string, unknown>;
|
|
47
59
|
}
|
|
48
60
|
/**
|
|
49
61
|
* Result from Claude Code execution.
|
|
@@ -2,11 +2,14 @@ import { type MCPEntity } from '@wix/evalforge-types';
|
|
|
2
2
|
export interface OpenCodeConfigOptions {
|
|
3
3
|
model?: string;
|
|
4
4
|
temperature?: number;
|
|
5
|
+
topP?: number;
|
|
5
6
|
maxTurns?: number;
|
|
6
7
|
aiGatewayUrl?: string;
|
|
7
8
|
aiGatewayHeaders?: Record<string, string>;
|
|
8
9
|
mcps?: MCPEntity[];
|
|
9
10
|
cwd: string;
|
|
11
|
+
/** Agent config bag — may contain permission overrides */
|
|
12
|
+
config?: Record<string, unknown>;
|
|
10
13
|
}
|
|
11
14
|
/**
|
|
12
15
|
* Build environment variables for spawning `opencode run`.
|
|
@@ -58,11 +58,19 @@ export interface OpenCodeExecutionOptions {
|
|
|
58
58
|
rules?: Rule[];
|
|
59
59
|
model?: string;
|
|
60
60
|
temperature?: number;
|
|
61
|
+
/** Nucleus sampling (0–1). Alternative to temperature. */
|
|
62
|
+
topP?: number;
|
|
61
63
|
maxTurns?: number;
|
|
64
|
+
/** Thinking variant: 'high' | 'low' | 'none'. Default: 'high'. 'none' omits --thinking flag. */
|
|
65
|
+
thinkingVariant?: string;
|
|
66
|
+
/** Execution timeout override in milliseconds */
|
|
67
|
+
maxDurationMs?: number;
|
|
62
68
|
aiGatewayUrl?: string;
|
|
63
69
|
aiGatewayHeaders?: Record<string, string>;
|
|
64
70
|
traceContext?: import('@wix/evalforge-types').TraceContext;
|
|
65
71
|
systemPrompt?: string | null;
|
|
72
|
+
/** Agent config bag for passing to buildOpenCodeEnv (permission overrides, etc.) */
|
|
73
|
+
config?: Record<string, unknown>;
|
|
66
74
|
}
|
|
67
75
|
export interface OpenCodeExecutionResult {
|
|
68
76
|
outputText: string;
|
|
@@ -2,6 +2,7 @@ import { type LanguageModel, type StepResult, type ToolSet } from 'ai';
|
|
|
2
2
|
import type { AgentExecutionContext, AgentExecutionResult, LLMTrace } from '@wix/evalforge-types';
|
|
3
3
|
export declare function createModel(modelId: string, baseUrl: string, headers: Record<string, string>): LanguageModel;
|
|
4
4
|
export declare function isClaudeModelId(modelId: string): boolean;
|
|
5
|
+
export declare function isGeminiModelId(modelId: string): boolean;
|
|
5
6
|
/**
|
|
6
7
|
* Extract text content from a skill's latest version files.
|
|
7
8
|
* Concatenates all file contents, prioritizing SKILL.md if present.
|
|
@@ -12,8 +12,6 @@ interface McpToolsResult {
|
|
|
12
12
|
* Each `MCPEntity.config` is a keyed record matching the `.mcp.json` shape,
|
|
13
13
|
* e.g. `{ "server-name": { "type": "stdio", "command": "node", "args": ["server.js"] } }`.
|
|
14
14
|
* Each entry becomes a live MCP client whose tools are merged into the result.
|
|
15
|
-
*
|
|
16
|
-
* Tool names are namespaced as `${serverName}__${toolName}` to avoid collisions.
|
|
17
15
|
*/
|
|
18
16
|
export declare function buildMcpTools(mcps: MCPEntity[], cwd: string): Promise<McpToolsResult>;
|
|
19
17
|
export declare function closeMcpClients(clients: Array<{
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@wix/evalforge-evaluator",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.148.0",
|
|
4
4
|
"description": "EvalForge Evaluator",
|
|
5
5
|
"bin": "./build/index.js",
|
|
6
6
|
"files": [
|
|
@@ -8,8 +8,8 @@
|
|
|
8
8
|
],
|
|
9
9
|
"scripts": {
|
|
10
10
|
"clean": "rm -rf build",
|
|
11
|
-
"build:cjs": "esbuild src/index.ts --bundle --platform=node --outfile=build/index.js --format=cjs --sourcemap --external:@wix/evalforge-types --external:@wix/evalforge-github-client --external:@wix/eval-assertions --external:@anthropic-ai/claude-agent-sdk --external:@ai-sdk/mcp --external:@ai-sdk/anthropic --external:@ai-sdk/openai --external:ai",
|
|
12
|
-
"build:esm": "esbuild src/index.ts --bundle --platform=node --outfile=build/index.mjs --format=esm --sourcemap --external:@wix/evalforge-types --external:@wix/evalforge-github-client --external:@wix/eval-assertions --external:@anthropic-ai/claude-agent-sdk --external:@ai-sdk/mcp --external:@ai-sdk/anthropic --external:@ai-sdk/openai --external:ai",
|
|
11
|
+
"build:cjs": "esbuild src/index.ts --bundle --platform=node --outfile=build/index.js --format=cjs --sourcemap --external:@wix/evalforge-types --external:@wix/evalforge-github-client --external:@wix/eval-assertions --external:@anthropic-ai/claude-agent-sdk --external:@ai-sdk/mcp --external:@ai-sdk/anthropic --external:@ai-sdk/google --external:@ai-sdk/openai --external:ai",
|
|
12
|
+
"build:esm": "esbuild src/index.ts --bundle --platform=node --outfile=build/index.mjs --format=esm --sourcemap --external:@wix/evalforge-types --external:@wix/evalforge-github-client --external:@wix/eval-assertions --external:@anthropic-ai/claude-agent-sdk --external:@ai-sdk/mcp --external:@ai-sdk/anthropic --external:@ai-sdk/google --external:@ai-sdk/openai --external:ai",
|
|
13
13
|
"build:types": "tsc --emitDeclarationOnly --outDir ./build/types",
|
|
14
14
|
"build": "yarn run clean && yarn run build:cjs && yarn run build:esm && yarn run build:types",
|
|
15
15
|
"lint": "eslint .",
|
|
@@ -17,13 +17,14 @@
|
|
|
17
17
|
},
|
|
18
18
|
"dependencies": {
|
|
19
19
|
"@ai-sdk/anthropic": "^3.0.53",
|
|
20
|
+
"@ai-sdk/google": "^3.0.53",
|
|
20
21
|
"@ai-sdk/mcp": "^1.0.23",
|
|
21
22
|
"@ai-sdk/openai": "^3.0.39",
|
|
22
23
|
"@anthropic-ai/claude-agent-sdk": "^0.2.63",
|
|
23
24
|
"@anthropic-ai/claude-code": "^2.1.63",
|
|
24
|
-
"@wix/eval-assertions": "0.
|
|
25
|
-
"@wix/evalforge-github-client": "0.
|
|
26
|
-
"@wix/evalforge-types": "0.
|
|
25
|
+
"@wix/eval-assertions": "0.50.0",
|
|
26
|
+
"@wix/evalforge-github-client": "0.54.0",
|
|
27
|
+
"@wix/evalforge-types": "0.79.0",
|
|
27
28
|
"ai": "^6.0.107",
|
|
28
29
|
"diff": "^7.0.0",
|
|
29
30
|
"tar": "^7.5.3",
|
|
@@ -62,5 +63,5 @@
|
|
|
62
63
|
"artifactId": "evalforge-evaluator"
|
|
63
64
|
}
|
|
64
65
|
},
|
|
65
|
-
"falconPackageHash": "
|
|
66
|
+
"falconPackageHash": "98c865c8379433039867350fce6148475e6f7daae8c73d9e8da29dd5"
|
|
66
67
|
}
|