npm - @wix/evalforge-evaluator - Versions diffs - 0.118.0 → 0.120.0 - Mend

@wix/evalforge-evaluator 0.118.0 → 0.120.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

package/build/types/run-scenario/agents/opencode/build-conversation.d.ts CHANGED Viewed

@@ -1,7 +1,10 @@
 import type { ConversationMessage } from '@wix/evalforge-types';
-import type { OpenCodeMessageWithParts } from './types.js';
+import type { TimestampedOpenCodeEvent } from './types.js';
 /**
- * Build a conversation from the message history returned by
- * client.session.messages().
+ * Build a conversation from the NDJSON events emitted by
+ * `opencode run --format json`.
+ *
+ * Events are grouped into assistant turns (text, reasoning, tool_use)
+ * separated by step_finish events. Tool results appear as user messages.
  */
-export declare function buildConversation(messages: OpenCodeMessageWithParts[]): ConversationMessage[];
+export declare function buildConversation(timestampedEvents: TimestampedOpenCodeEvent[]): ConversationMessage[];

package/build/types/run-scenario/agents/opencode/build-trace.d.ts CHANGED Viewed

@@ -1,13 +1,10 @@
 import type { LLMTrace } from '@wix/evalforge-types';
-import type { OpenCodeMessageWithParts } from './types.js';
-export type { OpenCodeMessageWithParts };
+import type { TimestampedOpenCodeEvent } from './types.js';
 /**
- * Build an LLMTrace from the full message history returned by
- * client.session.messages().
+ * Build an LLMTrace from the NDJSON events emitted by `opencode run --format json`.
  *
- * Each assistant message (turn) may produce multiple trace sub-steps
- * (THINKING, TOOL_USE, COMPLETION) to match the claude-code trace format.
- * Token usage and cost are extracted from StepFinishPart parts, falling
- * back to AssistantMessage-level info.
+ * Events are grouped into turns delimited by `step_finish` events.
+ * Each turn may produce THINKING, TOOL_USE, and COMPLETION sub-steps
+ * to match the claude-code trace format.
  */
-export declare function buildLLMTrace(messages: OpenCodeMessageWithParts[], totalDurationMs: number, model: string, provider: string): LLMTrace;
+export declare function buildLLMTrace(timestampedEvents: TimestampedOpenCodeEvent[], totalDurationMs: number, model: string, provider: string, executionStartTime: Date): LLMTrace;

package/build/types/run-scenario/agents/opencode/config.d.ts CHANGED Viewed

@@ -1,6 +1,3 @@
-import type { Config } from '@opencode-ai/sdk' with {
-    'resolution-mode': 'import'
-};
 import { type MCPEntity } from '@wix/evalforge-types';
 export interface OpenCodeConfigOptions {
     model?: string;
@@ -12,16 +9,13 @@ export interface OpenCodeConfigOptions {
     cwd: string;
 }
 /**
- * Build the inline Config object for createOpencode().
+ * Build environment variables for spawning `opencode run`.
  *
- * Configures:
- * - Model (default format: provider/model-id)
- * - Provider with AI Gateway base URL and auth headers
- * - Permissions set to "allow" for automated execution
- * - MCPs merged from evaluation entities
+ * Returns the env object (including `OPENCODE_CONFIG_CONTENT`) and the
+ * parsed provider/model IDs needed by the caller.
  */
-export declare function buildOpenCodeConfig(options: OpenCodeConfigOptions): Promise<{
-    config: Config;
+export declare function buildOpenCodeEnv(options: OpenCodeConfigOptions): Promise<{
+    env: NodeJS.ProcessEnv;
     providerID: string;
     modelID: string;
 }>;

package/build/types/run-scenario/agents/opencode/execute.d.ts CHANGED Viewed

@@ -9,11 +9,10 @@ import type { OpenCodeExecutionOptions, OpenCodeExecutionResult } from './types.
  */
 export declare function prepareOpenCodeEnvironment(cwd: string, skills: SkillWithLatestVersion[], options: Pick<OpenCodeExecutionOptions, 'mcps' | 'subAgents' | 'rules'>): Promise<void>;
 /**
- * Execute skills using the OpenCode SDK.
+ * Execute skills using the OpenCode CLI.
  *
- * Skills are written to .opencode/skills/<name>/. Rules and sub-agents
- * are written to the filesystem. MCPs are passed inline via the config.
- * The SDK starts a server, creates a session, and sends the trigger prompt.
+ * Spawns `opencode run --format json` with the trigger prompt, parses the
+ * NDJSON events from stdout, and builds the trace/conversation from them.
  */
 export declare function executeWithOpenCode(skills: SkillWithLatestVersion[], scenario: TestScenario, options: OpenCodeExecutionOptions): Promise<{
     result: OpenCodeExecutionResult;

package/build/types/run-scenario/agents/opencode/index.d.ts CHANGED Viewed

@@ -2,7 +2,7 @@
  * OpenCode agent adapter module.
  *
  * Provides the OpenCode implementation of the AgentAdapter interface.
- * Uses @opencode-ai/sdk to execute skills via an OpenCode server session.
+ * Spawns the `opencode` CLI and parses NDJSON events from stdout.
  *
  * The adapter is automatically registered with the default registry when
  * this module is imported.

package/build/types/run-scenario/agents/opencode/opencode-adapter.d.ts CHANGED Viewed

@@ -3,9 +3,8 @@ import { AgentRunCommand } from '@wix/evalforge-types';
 /**
  * OpenCode agent adapter.
  *
- * Implements the AgentAdapter interface for the OpenCode SDK.
- * Uses @opencode-ai/sdk to start a server, create a session,
- * and execute the evaluation prompt.
+ * Implements the AgentAdapter interface by spawning the `opencode` CLI
+ * and parsing NDJSON events from stdout.
  *
  * Supported commands: 'opencode'
  */

package/build/types/run-scenario/agents/opencode/types.d.ts CHANGED Viewed

@@ -1,11 +1,55 @@
 import type { MCPEntity, SubAgent, Rule } from '@wix/evalforge-types';
-import type { Message, Part } from '@opencode-ai/sdk' with {
-    'resolution-mode': 'import'
-};
 export type { TraceContext } from '@wix/evalforge-types';
-export interface OpenCodeMessageWithParts {
-    info: Message;
-    parts: Part[];
+export interface OpenCodeTextEvent {
+    type: 'text';
+    part: {
+        text: string;
+    };
+}
+export interface OpenCodeReasoningEvent {
+    type: 'reasoning';
+    part: {
+        text: string;
+    };
+}
+export interface OpenCodeToolUseEvent {
+    type: 'tool_use';
+    part: {
+        tool: string;
+        callID: string;
+        state: {
+            status: string;
+            input?: Record<string, unknown>;
+            output?: string;
+            error?: string;
+            title?: string;
+            metadata?: Record<string, unknown>;
+        };
+    };
+}
+export interface OpenCodeStepFinishEvent {
+    type: 'step_finish';
+    part: {
+        reason: string;
+        cost: number;
+        tokens: {
+            total: number;
+            input: number;
+            output: number;
+            reasoning: number;
+            cache: {
+                read: number;
+                write: number;
+            };
+        };
+        modelID?: string;
+        providerID?: string;
+    };
+}
+export type OpenCodeEvent = OpenCodeTextEvent | OpenCodeReasoningEvent | OpenCodeToolUseEvent | OpenCodeStepFinishEvent;
+export interface TimestampedOpenCodeEvent {
+    event: OpenCodeEvent;
+    receivedAt: number;
 }
 export interface OpenCodeExecutionOptions {
     cwd: string;
@@ -30,3 +74,4 @@ export interface OpenCodeExecutionResult {
     };
     costUsd?: number;
 }
+export declare function tryParseJson<T>(text: string): T | null;

package/build/types/run-scenario/agents/simple-agent/build-conversation.d.ts CHANGED Viewed

@@ -7,4 +7,4 @@ import type { StepResult, ToolSet } from 'ai';
  * format used by the Claude Code adapter, so both agent types render
  * identically in the Conversation tab.
  */
-export declare function buildConversation(triggerPrompt: string, steps: StepResult<ToolSet>[], executionStartMs: number): ConversationMessage[];
+export declare function buildConversation(triggerPrompt: string, steps: StepResult<ToolSet>[], executionStartMs: number, stepTimestamps: number[]): ConversationMessage[];

package/build/types/run-scenario/agents/simple-agent/execute.d.ts CHANGED Viewed

@@ -25,4 +25,4 @@ export declare function buildLLMTrace(steps: StepResult<ToolSet>[], totalDuratio
     inputTokens: number;
     outputTokens: number;
     totalTokens: number;
-}, modelId: string, provider: string, executionStartMs: number): LLMTrace;
+}, modelId: string, provider: string, executionStartMs: number, stepTimestamps: number[]): LLMTrace;

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@wix/evalforge-evaluator",
-  "version": "0.118.0",
+  "version": "0.120.0",
   "description": "EvalForge Evaluator",
   "bin": "./build/index.js",
   "files": [
@@ -8,8 +8,8 @@
   ],
   "scripts": {
     "clean": "rm -rf build",
-    "build:cjs": "esbuild src/index.ts --bundle --platform=node --outfile=build/index.js --format=cjs --sourcemap --external:@wix/evalforge-types --external:@wix/evalforge-github-client --external:@wix/eval-assertions --external:@anthropic-ai/claude-agent-sdk --external:@ai-sdk/mcp --external:@ai-sdk/anthropic --external:@ai-sdk/openai --external:ai --external:@opencode-ai/sdk",
-    "build:esm": "esbuild src/index.ts --bundle --platform=node --outfile=build/index.mjs --format=esm --sourcemap --external:@wix/evalforge-types --external:@wix/evalforge-github-client --external:@wix/eval-assertions --external:@anthropic-ai/claude-agent-sdk --external:@ai-sdk/mcp --external:@ai-sdk/anthropic --external:@ai-sdk/openai --external:ai --external:@opencode-ai/sdk",
+    "build:cjs": "esbuild src/index.ts --bundle --platform=node --outfile=build/index.js --format=cjs --sourcemap --external:@wix/evalforge-types --external:@wix/evalforge-github-client --external:@wix/eval-assertions --external:@anthropic-ai/claude-agent-sdk --external:@ai-sdk/mcp --external:@ai-sdk/anthropic --external:@ai-sdk/openai --external:ai",
+    "build:esm": "esbuild src/index.ts --bundle --platform=node --outfile=build/index.mjs --format=esm --sourcemap --external:@wix/evalforge-types --external:@wix/evalforge-github-client --external:@wix/eval-assertions --external:@anthropic-ai/claude-agent-sdk --external:@ai-sdk/mcp --external:@ai-sdk/anthropic --external:@ai-sdk/openai --external:ai",
     "build:types": "tsc --emitDeclarationOnly --outDir ./build/types",
     "build": "yarn run clean && yarn run build:cjs && yarn run build:esm && yarn run build:types",
     "lint": "eslint .",
@@ -21,7 +21,6 @@
     "@ai-sdk/openai": "^3.0.39",
     "@anthropic-ai/claude-agent-sdk": "^0.2.63",
     "@anthropic-ai/claude-code": "^2.1.63",
-    "@opencode-ai/sdk": "^1.2.15",
     "@wix/eval-assertions": "0.29.0",
     "@wix/evalforge-github-client": "0.34.0",
     "@wix/evalforge-types": "0.59.0",
@@ -63,5 +62,5 @@
       "artifactId": "evalforge-evaluator"
     }
   },
-  "falconPackageHash": "5f5205a306731fb36f3456d5fc9170db2696782ac30edc987867ce0f"
+  "falconPackageHash": "a07572aca2e31af84fdb06d187e890d63417b3e4d4b47c8b91e4c436"
 }