@wix/evalforge-evaluator 0.118.0 → 0.120.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,7 +1,10 @@
1
1
  import type { ConversationMessage } from '@wix/evalforge-types';
2
- import type { OpenCodeMessageWithParts } from './types.js';
2
+ import type { TimestampedOpenCodeEvent } from './types.js';
3
3
  /**
4
- * Build a conversation from the message history returned by
5
- * client.session.messages().
4
+ * Build a conversation from the NDJSON events emitted by
5
+ * `opencode run --format json`.
6
+ *
7
+ * Events are grouped into assistant turns (text, reasoning, tool_use)
8
+ * separated by step_finish events. Tool results appear as user messages.
6
9
  */
7
- export declare function buildConversation(messages: OpenCodeMessageWithParts[]): ConversationMessage[];
10
+ export declare function buildConversation(timestampedEvents: TimestampedOpenCodeEvent[]): ConversationMessage[];
@@ -1,13 +1,10 @@
1
1
  import type { LLMTrace } from '@wix/evalforge-types';
2
- import type { OpenCodeMessageWithParts } from './types.js';
3
- export type { OpenCodeMessageWithParts };
2
+ import type { TimestampedOpenCodeEvent } from './types.js';
4
3
  /**
5
- * Build an LLMTrace from the full message history returned by
6
- * client.session.messages().
4
+ * Build an LLMTrace from the NDJSON events emitted by `opencode run --format json`.
7
5
  *
8
- * Each assistant message (turn) may produce multiple trace sub-steps
9
- * (THINKING, TOOL_USE, COMPLETION) to match the claude-code trace format.
10
- * Token usage and cost are extracted from StepFinishPart parts, falling
11
- * back to AssistantMessage-level info.
6
+ * Events are grouped into turns delimited by `step_finish` events.
7
+ * Each turn may produce THINKING, TOOL_USE, and COMPLETION sub-steps
8
+ * to match the claude-code trace format.
12
9
  */
13
- export declare function buildLLMTrace(messages: OpenCodeMessageWithParts[], totalDurationMs: number, model: string, provider: string): LLMTrace;
10
+ export declare function buildLLMTrace(timestampedEvents: TimestampedOpenCodeEvent[], totalDurationMs: number, model: string, provider: string, executionStartTime: Date): LLMTrace;
@@ -1,6 +1,3 @@
1
- import type { Config } from '@opencode-ai/sdk' with {
2
- 'resolution-mode': 'import'
3
- };
4
1
  import { type MCPEntity } from '@wix/evalforge-types';
5
2
  export interface OpenCodeConfigOptions {
6
3
  model?: string;
@@ -12,16 +9,13 @@ export interface OpenCodeConfigOptions {
12
9
  cwd: string;
13
10
  }
14
11
  /**
15
- * Build the inline Config object for createOpencode().
12
+ * Build environment variables for spawning `opencode run`.
16
13
  *
17
- * Configures:
18
- * - Model (default format: provider/model-id)
19
- * - Provider with AI Gateway base URL and auth headers
20
- * - Permissions set to "allow" for automated execution
21
- * - MCPs merged from evaluation entities
14
+ * Returns the env object (including `OPENCODE_CONFIG_CONTENT`) and the
15
+ * parsed provider/model IDs needed by the caller.
22
16
  */
23
- export declare function buildOpenCodeConfig(options: OpenCodeConfigOptions): Promise<{
24
- config: Config;
17
+ export declare function buildOpenCodeEnv(options: OpenCodeConfigOptions): Promise<{
18
+ env: NodeJS.ProcessEnv;
25
19
  providerID: string;
26
20
  modelID: string;
27
21
  }>;
@@ -9,11 +9,10 @@ import type { OpenCodeExecutionOptions, OpenCodeExecutionResult } from './types.
9
9
  */
10
10
  export declare function prepareOpenCodeEnvironment(cwd: string, skills: SkillWithLatestVersion[], options: Pick<OpenCodeExecutionOptions, 'mcps' | 'subAgents' | 'rules'>): Promise<void>;
11
11
  /**
12
- * Execute skills using the OpenCode SDK.
12
+ * Execute skills using the OpenCode CLI.
13
13
  *
14
- * Skills are written to .opencode/skills/<name>/. Rules and sub-agents
15
- * are written to the filesystem. MCPs are passed inline via the config.
16
- * The SDK starts a server, creates a session, and sends the trigger prompt.
14
+ * Spawns `opencode run --format json` with the trigger prompt, parses the
15
+ * NDJSON events from stdout, and builds the trace/conversation from them.
17
16
  */
18
17
  export declare function executeWithOpenCode(skills: SkillWithLatestVersion[], scenario: TestScenario, options: OpenCodeExecutionOptions): Promise<{
19
18
  result: OpenCodeExecutionResult;
@@ -2,7 +2,7 @@
2
2
  * OpenCode agent adapter module.
3
3
  *
4
4
  * Provides the OpenCode implementation of the AgentAdapter interface.
5
- * Uses @opencode-ai/sdk to execute skills via an OpenCode server session.
5
+ * Spawns the `opencode` CLI and parses NDJSON events from stdout.
6
6
  *
7
7
  * The adapter is automatically registered with the default registry when
8
8
  * this module is imported.
@@ -3,9 +3,8 @@ import { AgentRunCommand } from '@wix/evalforge-types';
3
3
  /**
4
4
  * OpenCode agent adapter.
5
5
  *
6
- * Implements the AgentAdapter interface for the OpenCode SDK.
7
- * Uses @opencode-ai/sdk to start a server, create a session,
8
- * and execute the evaluation prompt.
6
+ * Implements the AgentAdapter interface by spawning the `opencode` CLI
7
+ * and parsing NDJSON events from stdout.
9
8
  *
10
9
  * Supported commands: 'opencode'
11
10
  */
@@ -1,11 +1,55 @@
1
1
  import type { MCPEntity, SubAgent, Rule } from '@wix/evalforge-types';
2
- import type { Message, Part } from '@opencode-ai/sdk' with {
3
- 'resolution-mode': 'import'
4
- };
5
2
  export type { TraceContext } from '@wix/evalforge-types';
6
- export interface OpenCodeMessageWithParts {
7
- info: Message;
8
- parts: Part[];
3
+ export interface OpenCodeTextEvent {
4
+ type: 'text';
5
+ part: {
6
+ text: string;
7
+ };
8
+ }
9
+ export interface OpenCodeReasoningEvent {
10
+ type: 'reasoning';
11
+ part: {
12
+ text: string;
13
+ };
14
+ }
15
+ export interface OpenCodeToolUseEvent {
16
+ type: 'tool_use';
17
+ part: {
18
+ tool: string;
19
+ callID: string;
20
+ state: {
21
+ status: string;
22
+ input?: Record<string, unknown>;
23
+ output?: string;
24
+ error?: string;
25
+ title?: string;
26
+ metadata?: Record<string, unknown>;
27
+ };
28
+ };
29
+ }
30
+ export interface OpenCodeStepFinishEvent {
31
+ type: 'step_finish';
32
+ part: {
33
+ reason: string;
34
+ cost: number;
35
+ tokens: {
36
+ total: number;
37
+ input: number;
38
+ output: number;
39
+ reasoning: number;
40
+ cache: {
41
+ read: number;
42
+ write: number;
43
+ };
44
+ };
45
+ modelID?: string;
46
+ providerID?: string;
47
+ };
48
+ }
49
+ export type OpenCodeEvent = OpenCodeTextEvent | OpenCodeReasoningEvent | OpenCodeToolUseEvent | OpenCodeStepFinishEvent;
50
+ export interface TimestampedOpenCodeEvent {
51
+ event: OpenCodeEvent;
52
+ receivedAt: number;
9
53
  }
10
54
  export interface OpenCodeExecutionOptions {
11
55
  cwd: string;
@@ -30,3 +74,4 @@ export interface OpenCodeExecutionResult {
30
74
  };
31
75
  costUsd?: number;
32
76
  }
77
+ export declare function tryParseJson<T>(text: string): T | null;
@@ -7,4 +7,4 @@ import type { StepResult, ToolSet } from 'ai';
7
7
  * format used by the Claude Code adapter, so both agent types render
8
8
  * identically in the Conversation tab.
9
9
  */
10
- export declare function buildConversation(triggerPrompt: string, steps: StepResult<ToolSet>[], executionStartMs: number): ConversationMessage[];
10
+ export declare function buildConversation(triggerPrompt: string, steps: StepResult<ToolSet>[], executionStartMs: number, stepTimestamps: number[]): ConversationMessage[];
@@ -25,4 +25,4 @@ export declare function buildLLMTrace(steps: StepResult<ToolSet>[], totalDuratio
25
25
  inputTokens: number;
26
26
  outputTokens: number;
27
27
  totalTokens: number;
28
- }, modelId: string, provider: string, executionStartMs: number): LLMTrace;
28
+ }, modelId: string, provider: string, executionStartMs: number, stepTimestamps: number[]): LLMTrace;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@wix/evalforge-evaluator",
3
- "version": "0.118.0",
3
+ "version": "0.120.0",
4
4
  "description": "EvalForge Evaluator",
5
5
  "bin": "./build/index.js",
6
6
  "files": [
@@ -8,8 +8,8 @@
8
8
  ],
9
9
  "scripts": {
10
10
  "clean": "rm -rf build",
11
- "build:cjs": "esbuild src/index.ts --bundle --platform=node --outfile=build/index.js --format=cjs --sourcemap --external:@wix/evalforge-types --external:@wix/evalforge-github-client --external:@wix/eval-assertions --external:@anthropic-ai/claude-agent-sdk --external:@ai-sdk/mcp --external:@ai-sdk/anthropic --external:@ai-sdk/openai --external:ai --external:@opencode-ai/sdk",
12
- "build:esm": "esbuild src/index.ts --bundle --platform=node --outfile=build/index.mjs --format=esm --sourcemap --external:@wix/evalforge-types --external:@wix/evalforge-github-client --external:@wix/eval-assertions --external:@anthropic-ai/claude-agent-sdk --external:@ai-sdk/mcp --external:@ai-sdk/anthropic --external:@ai-sdk/openai --external:ai --external:@opencode-ai/sdk",
11
+ "build:cjs": "esbuild src/index.ts --bundle --platform=node --outfile=build/index.js --format=cjs --sourcemap --external:@wix/evalforge-types --external:@wix/evalforge-github-client --external:@wix/eval-assertions --external:@anthropic-ai/claude-agent-sdk --external:@ai-sdk/mcp --external:@ai-sdk/anthropic --external:@ai-sdk/openai --external:ai",
12
+ "build:esm": "esbuild src/index.ts --bundle --platform=node --outfile=build/index.mjs --format=esm --sourcemap --external:@wix/evalforge-types --external:@wix/evalforge-github-client --external:@wix/eval-assertions --external:@anthropic-ai/claude-agent-sdk --external:@ai-sdk/mcp --external:@ai-sdk/anthropic --external:@ai-sdk/openai --external:ai",
13
13
  "build:types": "tsc --emitDeclarationOnly --outDir ./build/types",
14
14
  "build": "yarn run clean && yarn run build:cjs && yarn run build:esm && yarn run build:types",
15
15
  "lint": "eslint .",
@@ -21,7 +21,6 @@
21
21
  "@ai-sdk/openai": "^3.0.39",
22
22
  "@anthropic-ai/claude-agent-sdk": "^0.2.63",
23
23
  "@anthropic-ai/claude-code": "^2.1.63",
24
- "@opencode-ai/sdk": "^1.2.15",
25
24
  "@wix/eval-assertions": "0.29.0",
26
25
  "@wix/evalforge-github-client": "0.34.0",
27
26
  "@wix/evalforge-types": "0.59.0",
@@ -63,5 +62,5 @@
63
62
  "artifactId": "evalforge-evaluator"
64
63
  }
65
64
  },
66
- "falconPackageHash": "5f5205a306731fb36f3456d5fc9170db2696782ac30edc987867ce0f"
65
+ "falconPackageHash": "a07572aca2e31af84fdb06d187e890d63417b3e4d4b47c8b91e4c436"
67
66
  }