@wix/evalforge-evaluator 0.118.0 → 0.120.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/build/index.js +538 -537
- package/build/index.js.map +4 -4
- package/build/index.mjs +537 -537
- package/build/index.mjs.map +4 -4
- package/build/types/run-scenario/agents/opencode/build-conversation.d.ts +7 -4
- package/build/types/run-scenario/agents/opencode/build-trace.d.ts +6 -9
- package/build/types/run-scenario/agents/opencode/config.d.ts +5 -11
- package/build/types/run-scenario/agents/opencode/execute.d.ts +3 -4
- package/build/types/run-scenario/agents/opencode/index.d.ts +1 -1
- package/build/types/run-scenario/agents/opencode/opencode-adapter.d.ts +2 -3
- package/build/types/run-scenario/agents/opencode/types.d.ts +51 -6
- package/build/types/run-scenario/agents/simple-agent/build-conversation.d.ts +1 -1
- package/build/types/run-scenario/agents/simple-agent/execute.d.ts +1 -1
- package/package.json +4 -5
|
@@ -1,7 +1,10 @@
|
|
|
1
1
|
import type { ConversationMessage } from '@wix/evalforge-types';
|
|
2
|
-
import type {
|
|
2
|
+
import type { TimestampedOpenCodeEvent } from './types.js';
|
|
3
3
|
/**
|
|
4
|
-
* Build a conversation from the
|
|
5
|
-
*
|
|
4
|
+
* Build a conversation from the NDJSON events emitted by
|
|
5
|
+
* `opencode run --format json`.
|
|
6
|
+
*
|
|
7
|
+
* Events are grouped into assistant turns (text, reasoning, tool_use)
|
|
8
|
+
* separated by step_finish events. Tool results appear as user messages.
|
|
6
9
|
*/
|
|
7
|
-
export declare function buildConversation(
|
|
10
|
+
export declare function buildConversation(timestampedEvents: TimestampedOpenCodeEvent[]): ConversationMessage[];
|
|
@@ -1,13 +1,10 @@
|
|
|
1
1
|
import type { LLMTrace } from '@wix/evalforge-types';
|
|
2
|
-
import type {
|
|
3
|
-
export type { OpenCodeMessageWithParts };
|
|
2
|
+
import type { TimestampedOpenCodeEvent } from './types.js';
|
|
4
3
|
/**
|
|
5
|
-
* Build an LLMTrace from the
|
|
6
|
-
* client.session.messages().
|
|
4
|
+
* Build an LLMTrace from the NDJSON events emitted by `opencode run --format json`.
|
|
7
5
|
*
|
|
8
|
-
*
|
|
9
|
-
*
|
|
10
|
-
*
|
|
11
|
-
* back to AssistantMessage-level info.
|
|
6
|
+
* Events are grouped into turns delimited by `step_finish` events.
|
|
7
|
+
* Each turn may produce THINKING, TOOL_USE, and COMPLETION sub-steps
|
|
8
|
+
* to match the claude-code trace format.
|
|
12
9
|
*/
|
|
13
|
-
export declare function buildLLMTrace(
|
|
10
|
+
export declare function buildLLMTrace(timestampedEvents: TimestampedOpenCodeEvent[], totalDurationMs: number, model: string, provider: string, executionStartTime: Date): LLMTrace;
|
|
@@ -1,6 +1,3 @@
|
|
|
1
|
-
import type { Config } from '@opencode-ai/sdk' with {
|
|
2
|
-
'resolution-mode': 'import'
|
|
3
|
-
};
|
|
4
1
|
import { type MCPEntity } from '@wix/evalforge-types';
|
|
5
2
|
export interface OpenCodeConfigOptions {
|
|
6
3
|
model?: string;
|
|
@@ -12,16 +9,13 @@ export interface OpenCodeConfigOptions {
|
|
|
12
9
|
cwd: string;
|
|
13
10
|
}
|
|
14
11
|
/**
|
|
15
|
-
* Build
|
|
12
|
+
* Build environment variables for spawning `opencode run`.
|
|
16
13
|
*
|
|
17
|
-
*
|
|
18
|
-
*
|
|
19
|
-
* - Provider with AI Gateway base URL and auth headers
|
|
20
|
-
* - Permissions set to "allow" for automated execution
|
|
21
|
-
* - MCPs merged from evaluation entities
|
|
14
|
+
* Returns the env object (including `OPENCODE_CONFIG_CONTENT`) and the
|
|
15
|
+
* parsed provider/model IDs needed by the caller.
|
|
22
16
|
*/
|
|
23
|
-
export declare function
|
|
24
|
-
|
|
17
|
+
export declare function buildOpenCodeEnv(options: OpenCodeConfigOptions): Promise<{
|
|
18
|
+
env: NodeJS.ProcessEnv;
|
|
25
19
|
providerID: string;
|
|
26
20
|
modelID: string;
|
|
27
21
|
}>;
|
|
@@ -9,11 +9,10 @@ import type { OpenCodeExecutionOptions, OpenCodeExecutionResult } from './types.
|
|
|
9
9
|
*/
|
|
10
10
|
export declare function prepareOpenCodeEnvironment(cwd: string, skills: SkillWithLatestVersion[], options: Pick<OpenCodeExecutionOptions, 'mcps' | 'subAgents' | 'rules'>): Promise<void>;
|
|
11
11
|
/**
|
|
12
|
-
* Execute skills using the OpenCode
|
|
12
|
+
* Execute skills using the OpenCode CLI.
|
|
13
13
|
*
|
|
14
|
-
*
|
|
15
|
-
*
|
|
16
|
-
* The SDK starts a server, creates a session, and sends the trigger prompt.
|
|
14
|
+
* Spawns `opencode run --format json` with the trigger prompt, parses the
|
|
15
|
+
* NDJSON events from stdout, and builds the trace/conversation from them.
|
|
17
16
|
*/
|
|
18
17
|
export declare function executeWithOpenCode(skills: SkillWithLatestVersion[], scenario: TestScenario, options: OpenCodeExecutionOptions): Promise<{
|
|
19
18
|
result: OpenCodeExecutionResult;
|
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
* OpenCode agent adapter module.
|
|
3
3
|
*
|
|
4
4
|
* Provides the OpenCode implementation of the AgentAdapter interface.
|
|
5
|
-
*
|
|
5
|
+
* Spawns the `opencode` CLI and parses NDJSON events from stdout.
|
|
6
6
|
*
|
|
7
7
|
* The adapter is automatically registered with the default registry when
|
|
8
8
|
* this module is imported.
|
|
@@ -3,9 +3,8 @@ import { AgentRunCommand } from '@wix/evalforge-types';
|
|
|
3
3
|
/**
|
|
4
4
|
* OpenCode agent adapter.
|
|
5
5
|
*
|
|
6
|
-
* Implements the AgentAdapter interface
|
|
7
|
-
*
|
|
8
|
-
* and execute the evaluation prompt.
|
|
6
|
+
* Implements the AgentAdapter interface by spawning the `opencode` CLI
|
|
7
|
+
* and parsing NDJSON events from stdout.
|
|
9
8
|
*
|
|
10
9
|
* Supported commands: 'opencode'
|
|
11
10
|
*/
|
|
@@ -1,11 +1,55 @@
|
|
|
1
1
|
import type { MCPEntity, SubAgent, Rule } from '@wix/evalforge-types';
|
|
2
|
-
import type { Message, Part } from '@opencode-ai/sdk' with {
|
|
3
|
-
'resolution-mode': 'import'
|
|
4
|
-
};
|
|
5
2
|
export type { TraceContext } from '@wix/evalforge-types';
|
|
6
|
-
export interface
|
|
7
|
-
|
|
8
|
-
|
|
3
|
+
export interface OpenCodeTextEvent {
|
|
4
|
+
type: 'text';
|
|
5
|
+
part: {
|
|
6
|
+
text: string;
|
|
7
|
+
};
|
|
8
|
+
}
|
|
9
|
+
export interface OpenCodeReasoningEvent {
|
|
10
|
+
type: 'reasoning';
|
|
11
|
+
part: {
|
|
12
|
+
text: string;
|
|
13
|
+
};
|
|
14
|
+
}
|
|
15
|
+
export interface OpenCodeToolUseEvent {
|
|
16
|
+
type: 'tool_use';
|
|
17
|
+
part: {
|
|
18
|
+
tool: string;
|
|
19
|
+
callID: string;
|
|
20
|
+
state: {
|
|
21
|
+
status: string;
|
|
22
|
+
input?: Record<string, unknown>;
|
|
23
|
+
output?: string;
|
|
24
|
+
error?: string;
|
|
25
|
+
title?: string;
|
|
26
|
+
metadata?: Record<string, unknown>;
|
|
27
|
+
};
|
|
28
|
+
};
|
|
29
|
+
}
|
|
30
|
+
export interface OpenCodeStepFinishEvent {
|
|
31
|
+
type: 'step_finish';
|
|
32
|
+
part: {
|
|
33
|
+
reason: string;
|
|
34
|
+
cost: number;
|
|
35
|
+
tokens: {
|
|
36
|
+
total: number;
|
|
37
|
+
input: number;
|
|
38
|
+
output: number;
|
|
39
|
+
reasoning: number;
|
|
40
|
+
cache: {
|
|
41
|
+
read: number;
|
|
42
|
+
write: number;
|
|
43
|
+
};
|
|
44
|
+
};
|
|
45
|
+
modelID?: string;
|
|
46
|
+
providerID?: string;
|
|
47
|
+
};
|
|
48
|
+
}
|
|
49
|
+
export type OpenCodeEvent = OpenCodeTextEvent | OpenCodeReasoningEvent | OpenCodeToolUseEvent | OpenCodeStepFinishEvent;
|
|
50
|
+
export interface TimestampedOpenCodeEvent {
|
|
51
|
+
event: OpenCodeEvent;
|
|
52
|
+
receivedAt: number;
|
|
9
53
|
}
|
|
10
54
|
export interface OpenCodeExecutionOptions {
|
|
11
55
|
cwd: string;
|
|
@@ -30,3 +74,4 @@ export interface OpenCodeExecutionResult {
|
|
|
30
74
|
};
|
|
31
75
|
costUsd?: number;
|
|
32
76
|
}
|
|
77
|
+
export declare function tryParseJson<T>(text: string): T | null;
|
|
@@ -7,4 +7,4 @@ import type { StepResult, ToolSet } from 'ai';
|
|
|
7
7
|
* format used by the Claude Code adapter, so both agent types render
|
|
8
8
|
* identically in the Conversation tab.
|
|
9
9
|
*/
|
|
10
|
-
export declare function buildConversation(triggerPrompt: string, steps: StepResult<ToolSet>[], executionStartMs: number): ConversationMessage[];
|
|
10
|
+
export declare function buildConversation(triggerPrompt: string, steps: StepResult<ToolSet>[], executionStartMs: number, stepTimestamps: number[]): ConversationMessage[];
|
|
@@ -25,4 +25,4 @@ export declare function buildLLMTrace(steps: StepResult<ToolSet>[], totalDuratio
|
|
|
25
25
|
inputTokens: number;
|
|
26
26
|
outputTokens: number;
|
|
27
27
|
totalTokens: number;
|
|
28
|
-
}, modelId: string, provider: string, executionStartMs: number): LLMTrace;
|
|
28
|
+
}, modelId: string, provider: string, executionStartMs: number, stepTimestamps: number[]): LLMTrace;
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@wix/evalforge-evaluator",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.120.0",
|
|
4
4
|
"description": "EvalForge Evaluator",
|
|
5
5
|
"bin": "./build/index.js",
|
|
6
6
|
"files": [
|
|
@@ -8,8 +8,8 @@
|
|
|
8
8
|
],
|
|
9
9
|
"scripts": {
|
|
10
10
|
"clean": "rm -rf build",
|
|
11
|
-
"build:cjs": "esbuild src/index.ts --bundle --platform=node --outfile=build/index.js --format=cjs --sourcemap --external:@wix/evalforge-types --external:@wix/evalforge-github-client --external:@wix/eval-assertions --external:@anthropic-ai/claude-agent-sdk --external:@ai-sdk/mcp --external:@ai-sdk/anthropic --external:@ai-sdk/openai --external:ai
|
|
12
|
-
"build:esm": "esbuild src/index.ts --bundle --platform=node --outfile=build/index.mjs --format=esm --sourcemap --external:@wix/evalforge-types --external:@wix/evalforge-github-client --external:@wix/eval-assertions --external:@anthropic-ai/claude-agent-sdk --external:@ai-sdk/mcp --external:@ai-sdk/anthropic --external:@ai-sdk/openai --external:ai
|
|
11
|
+
"build:cjs": "esbuild src/index.ts --bundle --platform=node --outfile=build/index.js --format=cjs --sourcemap --external:@wix/evalforge-types --external:@wix/evalforge-github-client --external:@wix/eval-assertions --external:@anthropic-ai/claude-agent-sdk --external:@ai-sdk/mcp --external:@ai-sdk/anthropic --external:@ai-sdk/openai --external:ai",
|
|
12
|
+
"build:esm": "esbuild src/index.ts --bundle --platform=node --outfile=build/index.mjs --format=esm --sourcemap --external:@wix/evalforge-types --external:@wix/evalforge-github-client --external:@wix/eval-assertions --external:@anthropic-ai/claude-agent-sdk --external:@ai-sdk/mcp --external:@ai-sdk/anthropic --external:@ai-sdk/openai --external:ai",
|
|
13
13
|
"build:types": "tsc --emitDeclarationOnly --outDir ./build/types",
|
|
14
14
|
"build": "yarn run clean && yarn run build:cjs && yarn run build:esm && yarn run build:types",
|
|
15
15
|
"lint": "eslint .",
|
|
@@ -21,7 +21,6 @@
|
|
|
21
21
|
"@ai-sdk/openai": "^3.0.39",
|
|
22
22
|
"@anthropic-ai/claude-agent-sdk": "^0.2.63",
|
|
23
23
|
"@anthropic-ai/claude-code": "^2.1.63",
|
|
24
|
-
"@opencode-ai/sdk": "^1.2.15",
|
|
25
24
|
"@wix/eval-assertions": "0.29.0",
|
|
26
25
|
"@wix/evalforge-github-client": "0.34.0",
|
|
27
26
|
"@wix/evalforge-types": "0.59.0",
|
|
@@ -63,5 +62,5 @@
|
|
|
63
62
|
"artifactId": "evalforge-evaluator"
|
|
64
63
|
}
|
|
65
64
|
},
|
|
66
|
-
"falconPackageHash": "
|
|
65
|
+
"falconPackageHash": "a07572aca2e31af84fdb06d187e890d63417b3e4d4b47c8b91e4c436"
|
|
67
66
|
}
|