@wix/evalforge-evaluator 0.103.0 → 0.105.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/index.js +41491 -512
- package/build/index.js.map +4 -4
- package/build/index.mjs +41493 -483
- package/build/index.mjs.map +4 -4
- package/build/types/run-scenario/agents/index.d.ts +2 -0
- package/build/types/run-scenario/agents/{claude-code → shared}/resolve-mcp-placeholders.d.ts +9 -6
- package/build/types/run-scenario/agents/shared/trace-emit.d.ts +6 -0
- package/build/types/run-scenario/agents/simple-agent/build-conversation.d.ts +10 -0
- package/build/types/run-scenario/agents/simple-agent/cost-calculation.d.ts +10 -0
- package/build/types/run-scenario/agents/simple-agent/execute.d.ts +28 -0
- package/build/types/run-scenario/agents/simple-agent/index.d.ts +1 -0
- package/build/types/run-scenario/agents/simple-agent/mcp-tools.d.ts +39 -0
- package/build/types/run-scenario/agents/simple-agent/simple-agent-adapter.d.ts +19 -0
- package/package.json +11 -9
|
@@ -11,6 +11,7 @@
|
|
|
11
11
|
* Agent adapters are automatically registered when their modules are imported.
|
|
12
12
|
* Currently supported adapters:
|
|
13
13
|
* - Claude Code (command: 'claude')
|
|
14
|
+
* - Simple Agent (adapter ID: 'simple-agent')
|
|
14
15
|
*
|
|
15
16
|
* @example
|
|
16
17
|
* ```typescript
|
|
@@ -22,3 +23,4 @@
|
|
|
22
23
|
*/
|
|
23
24
|
export { AgentAdapterRegistry, defaultRegistry, getAdapter, hasAdapter } from './registry.js';
|
|
24
25
|
import './claude-code/index.js';
|
|
26
|
+
import './simple-agent/index.js';
|
package/build/types/run-scenario/agents/{claude-code → shared}/resolve-mcp-placeholders.d.ts
RENAMED
|
@@ -2,15 +2,18 @@
|
|
|
2
2
|
* MCP Config Placeholder Resolution
|
|
3
3
|
*
|
|
4
4
|
* Resolves `{{placeholder}}` patterns in MCP config values at eval startup.
|
|
5
|
-
* Currently supports Wix auth placeholders sourced from ~/.wix/auth/api-key.json
|
|
6
|
-
* (the same file that @wix/mcp reads via WIX_API_THROUGH_FS).
|
|
7
5
|
*
|
|
8
6
|
* Supported placeholders:
|
|
9
|
-
* - {{wix-auth-token}} → token from api-key.json
|
|
7
|
+
* - {{wix-auth-token}} → token from ~/.wix/auth/api-key.json
|
|
10
8
|
* - {{wix-auth-user-id}} → userInfo.userId from api-key.json
|
|
9
|
+
* - {{cwd}} → environment folder (working directory)
|
|
11
10
|
*/
|
|
12
11
|
import { findPlaceholders, resolveValue } from '../../../resolve-placeholders.js';
|
|
13
12
|
export { findPlaceholders, resolveValue };
|
|
13
|
+
export interface ResolveMcpPlaceholdersOptions {
|
|
14
|
+
cwd?: string;
|
|
15
|
+
authFilePath?: string;
|
|
16
|
+
}
|
|
14
17
|
/**
|
|
15
18
|
* Read Wix auth placeholders from ~/.wix/auth/api-key.json.
|
|
16
19
|
* Returns an empty map when the file is missing or malformed.
|
|
@@ -21,11 +24,11 @@ export declare function loadWixAuthPlaceholders(authFilePath?: string): Promise<
|
|
|
21
24
|
/**
|
|
22
25
|
* Resolve all `{{...}}` placeholders in an MCP server config map.
|
|
23
26
|
*
|
|
24
|
-
* Loads available placeholder values (
|
|
27
|
+
* Loads available placeholder values (Wix auth file + cwd),
|
|
25
28
|
* scans the config for placeholders, and fails if any are unresolvable.
|
|
26
29
|
*
|
|
27
30
|
* @param mcpServers - The mcpServers map (server-name → config)
|
|
28
|
-
* @param
|
|
31
|
+
* @param options - Optional cwd and authFilePath overrides
|
|
29
32
|
* @throws When a config references placeholders that cannot be resolved
|
|
30
33
|
*/
|
|
31
|
-
export declare function resolveMcpPlaceholders(mcpServers: Record<string, Record<string, unknown>>,
|
|
34
|
+
export declare function resolveMcpPlaceholders(mcpServers: Record<string, Record<string, unknown>>, options?: ResolveMcpPlaceholdersOptions): Promise<Record<string, Record<string, unknown>>>;
|
|
@@ -0,0 +1,6 @@
|
|
|
1
|
+
import type { LiveTraceEvent } from '@wix/evalforge-types';
|
|
2
|
+
/**
|
|
3
|
+
* Emit a live trace event to stdout for the backend to capture.
|
|
4
|
+
* Also pushes to HTTP endpoint if tracePushUrl is provided (for remote job execution).
|
|
5
|
+
*/
|
|
6
|
+
export declare function emitTraceEvent(event: LiveTraceEvent, tracePushUrl?: string, routeHeader?: string, authToken?: string): void;
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
import type { ConversationMessage } from '@wix/evalforge-types';
|
|
2
|
+
import type { StepResult, ToolSet } from 'ai';
|
|
3
|
+
/**
|
|
4
|
+
* Build a serializable conversation from Vercel AI SDK step results.
|
|
5
|
+
*
|
|
6
|
+
* Maps the step-based execution model to the same ConversationMessage
|
|
7
|
+
* format used by the Claude Code adapter, so both agent types render
|
|
8
|
+
* identically in the Conversation tab.
|
|
9
|
+
*/
|
|
10
|
+
export declare function buildConversation(triggerPrompt: string, steps: StepResult<ToolSet>[], executionStartMs: number): ConversationMessage[];
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
import type { StepResult, ToolSet } from 'ai';
|
|
2
|
+
/**
|
|
3
|
+
* Calculate the cost for a single LLM step.
|
|
4
|
+
* Prefers the gateway-reported cost (accurate with cache discounts),
|
|
5
|
+
* falling back to the built-in pricing table.
|
|
6
|
+
*/
|
|
7
|
+
export declare function calculateStepCost(step: StepResult<ToolSet>, modelId: string, provider: string, tokenUsage: {
|
|
8
|
+
prompt: number;
|
|
9
|
+
completion: number;
|
|
10
|
+
}): number;
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
import { type LanguageModel, type StepResult, type ToolSet } from 'ai';
|
|
2
|
+
import type { AgentExecutionContext, AgentExecutionResult, LLMTrace } from '@wix/evalforge-types';
|
|
3
|
+
export declare function createModel(modelId: string, baseUrl: string, headers: Record<string, string>): LanguageModel;
|
|
4
|
+
export declare function isClaudeModelId(modelId: string): boolean;
|
|
5
|
+
/**
|
|
6
|
+
* Extract text content from a skill's latest version files.
|
|
7
|
+
* Concatenates all file contents, prioritizing SKILL.md if present.
|
|
8
|
+
*/
|
|
9
|
+
export declare function extractSkillContent(files: Array<{
|
|
10
|
+
path: string;
|
|
11
|
+
content: string;
|
|
12
|
+
}> | undefined): string | undefined;
|
|
13
|
+
export declare function executeWithAiSdk(context: AgentExecutionContext): Promise<AgentExecutionResult>;
|
|
14
|
+
/**
|
|
15
|
+
* Build the system prompt for Simple Agent from skills, rules, and optional override.
|
|
16
|
+
*
|
|
17
|
+
* Unlike CLI agents (which have DEFAULT_EVALUATOR_SYSTEM_PROMPT), the Simple Agent
|
|
18
|
+
* has no built-in default instructions. The three-state `systemPrompt` contract
|
|
19
|
+
* (undefined = default, null = none, string = custom) effectively collapses to
|
|
20
|
+
* two states here: both undefined and null result in no base prompt being prepended,
|
|
21
|
+
* while a non-empty string is included as the first segment.
|
|
22
|
+
*/
|
|
23
|
+
export declare function composeSystemPrompt(context: AgentExecutionContext): string | undefined;
|
|
24
|
+
export declare function buildLLMTrace(steps: StepResult<ToolSet>[], totalDurationMs: number, totalUsage: {
|
|
25
|
+
inputTokens: number;
|
|
26
|
+
outputTokens: number;
|
|
27
|
+
totalTokens: number;
|
|
28
|
+
}, modelId: string, provider: string, executionStartMs: number): LLMTrace;
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
import { Experimental_StdioMCPTransport } from '@ai-sdk/mcp/mcp-stdio';
|
|
2
|
+
import type { MCPEntity } from '@wix/evalforge-types';
|
|
3
|
+
interface McpToolsResult {
|
|
4
|
+
tools: Record<string, unknown>;
|
|
5
|
+
clients: Array<{
|
|
6
|
+
close: () => Promise<void>;
|
|
7
|
+
}>;
|
|
8
|
+
}
|
|
9
|
+
/**
|
|
10
|
+
* Build AI SDK tools from MCP entity configs.
|
|
11
|
+
*
|
|
12
|
+
* Each `MCPEntity.config` is a keyed record matching the `.mcp.json` shape,
|
|
13
|
+
* e.g. `{ "server-name": { "type": "stdio", "command": "node", "args": ["server.js"] } }`.
|
|
14
|
+
* Each entry becomes a live MCP client whose tools are merged into the result.
|
|
15
|
+
*
|
|
16
|
+
* Tool names are namespaced as `${serverName}__${toolName}` to avoid collisions.
|
|
17
|
+
*/
|
|
18
|
+
export declare function buildMcpTools(mcps: MCPEntity[], cwd: string): Promise<McpToolsResult>;
|
|
19
|
+
export declare function closeMcpClients(clients: Array<{
|
|
20
|
+
close: () => Promise<void>;
|
|
21
|
+
}>): Promise<void>;
|
|
22
|
+
/**
|
|
23
|
+
* Build an AI SDK MCP transport from a server config entry.
|
|
24
|
+
*
|
|
25
|
+
* Supports three transport types:
|
|
26
|
+
* - stdio: local process (Experimental_StdioMCPTransport)
|
|
27
|
+
* - http: streamable HTTP (config object with type: 'http')
|
|
28
|
+
* - sse: Server-Sent Events (config object with type: 'sse')
|
|
29
|
+
*/
|
|
30
|
+
export declare function buildTransport(serverName: string, config: Record<string, unknown>, cwd: string): Experimental_StdioMCPTransport | {
|
|
31
|
+
headers?: Record<string, string> | undefined;
|
|
32
|
+
type: "http";
|
|
33
|
+
url: string;
|
|
34
|
+
} | {
|
|
35
|
+
headers?: Record<string, string> | undefined;
|
|
36
|
+
type: "sse";
|
|
37
|
+
url: string;
|
|
38
|
+
};
|
|
39
|
+
export {};
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
import type { AgentAdapter, AgentExecutionContext, AgentExecutionResult } from '@wix/evalforge-types';
|
|
2
|
+
import type { AgentRunCommand } from '@wix/evalforge-types';
|
|
3
|
+
/**
|
|
4
|
+
* Simple Agent adapter.
|
|
5
|
+
*
|
|
6
|
+
* Executes LLM calls in-process via the Vercel AI SDK, routing through
|
|
7
|
+
* our proxy to support both Claude and OpenAI models. Supports MCP tools
|
|
8
|
+
* for interacting with the working directory.
|
|
9
|
+
*
|
|
10
|
+
* Looked up by adapter ID (not CLI command) since SDK agents have
|
|
11
|
+
* empty supportedCommands.
|
|
12
|
+
*/
|
|
13
|
+
export declare class SimpleAgentAdapter implements AgentAdapter {
|
|
14
|
+
readonly id = "simple-agent";
|
|
15
|
+
readonly name = "Simple Agent";
|
|
16
|
+
readonly supportedCommands: readonly AgentRunCommand[];
|
|
17
|
+
execute(context: AgentExecutionContext): Promise<AgentExecutionResult>;
|
|
18
|
+
}
|
|
19
|
+
export declare const simpleAgentAdapter: SimpleAgentAdapter;
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@wix/evalforge-evaluator",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.105.0",
|
|
4
4
|
"description": "EvalForge Evaluator",
|
|
5
5
|
"bin": "./build/index.js",
|
|
6
6
|
"files": [
|
|
@@ -16,13 +16,15 @@
|
|
|
16
16
|
"test": "node --import tsx --test tests/**/*.test.ts"
|
|
17
17
|
},
|
|
18
18
|
"dependencies": {
|
|
19
|
-
"@ai-sdk/anthropic": "^3.0.
|
|
20
|
-
"@
|
|
21
|
-
"@
|
|
22
|
-
"@
|
|
23
|
-
"@
|
|
24
|
-
"@wix/
|
|
25
|
-
"
|
|
19
|
+
"@ai-sdk/anthropic": "^3.0.45",
|
|
20
|
+
"@ai-sdk/mcp": "^1.0.21",
|
|
21
|
+
"@ai-sdk/openai": "^3.0.30",
|
|
22
|
+
"@anthropic-ai/claude-agent-sdk": "^0.2.49",
|
|
23
|
+
"@anthropic-ai/claude-code": "^2.1.49",
|
|
24
|
+
"@wix/eval-assertions": "0.23.0",
|
|
25
|
+
"@wix/evalforge-github-client": "0.23.0",
|
|
26
|
+
"@wix/evalforge-types": "0.48.0",
|
|
27
|
+
"ai": "^6.0.93",
|
|
26
28
|
"diff": "^7.0.0",
|
|
27
29
|
"tar": "^7.5.3",
|
|
28
30
|
"zod": "^4.3.5"
|
|
@@ -60,5 +62,5 @@
|
|
|
60
62
|
"artifactId": "evalforge-evaluator"
|
|
61
63
|
}
|
|
62
64
|
},
|
|
63
|
-
"falconPackageHash": "
|
|
65
|
+
"falconPackageHash": "7f3f2d1b27312daf480e195d122e0885ac476d0f881100f407a34755"
|
|
64
66
|
}
|