@wix/evalforge-evaluator 0.201.0 → 0.203.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,3 +1,17 @@
1
1
  import type { LLMTrace } from '@wix/evalforge-types';
2
2
  import type { TimestampedOpenCodeEvent } from './types.js';
3
- export declare function buildLLMTrace(timestampedEvents: TimestampedOpenCodeEvent[], totalDurationMs: number, model: string, provider: string, executionStartTime: Date): LLMTrace;
3
+ /**
4
+ * Build an LLMTrace from the NDJSON events emitted by `opencode run --format json`.
5
+ *
6
+ * Events are grouped into turns delimited by `step_finish` events; each turn
7
+ * produces THINKING, TOOL_USE, and COMPLETION sub-steps to match the
8
+ * claude-code trace format.
9
+ */
10
+ export declare function buildLLMTrace(timestampedEvents: TimestampedOpenCodeEvent[], totalDurationMs: number, model: string, provider: string, executionStartTime: Date,
11
+ /**
12
+ * Per-request cost (USD) captured from the gateway, in request order. When an
13
+ * entry is a finite number it overrides OpenCode's self-reported
14
+ * `step_finish.cost` (which is ~0 for the Wix gateway); missing entries fall
15
+ * back to it. @see gateway-cost-interceptor.ts
16
+ */
17
+ gatewayCosts?: (number | null)[]): LLMTrace;
@@ -21,15 +21,11 @@ export declare function writePromptImages(cwd: string, images: TriggerPromptImag
21
21
  */
22
22
  export declare function prepareOpenCodeEnvironment(cwd: string, skills: SkillWithLatestVersion[], options: Pick<OpenCodeExecutionOptions, 'mcps' | 'subAgents' | 'rules' | 'systemPrompt'>): Promise<void>;
23
23
  /**
24
- * Execute skills using the OpenCode CLI with automatic retry on idle timeouts.
24
+ * Execute skills using the OpenCode CLI.
25
25
  *
26
- * When the OpenCode process stalls (no output for 120s), it is killed and
27
- * retried with a recovery prompt that carries context about work already done.
28
- * This works around known OpenCode hanging issues where the upstream LLM
29
- * connection stalls mid-stream.
30
- *
31
- * @see https://github.com/anomalyco/opencode/issues/13230 - Memory leak causes OOM
32
- * @see https://github.com/anomalyco/opencode/issues/13841 - LLM connection stalls
26
+ * Wraps {@link executeWithOpenCodeInner} with a localhost cost interceptor so
27
+ * the trace uses the gateway's real per-request cost instead of OpenCode's
28
+ * self-reported (~$0) cost. The interceptor is always closed when done.
33
29
  */
34
30
  export declare function executeWithOpenCode(skills: SkillWithLatestVersion[], scenario: TestScenario, options: OpenCodeExecutionOptions): Promise<{
35
31
  result: OpenCodeExecutionResult;
@@ -0,0 +1,28 @@
1
+ /**
2
+ * A localhost pass-through proxy placed between the OpenCode CLI and the real
3
+ * Wix AI Gateway.
4
+ *
5
+ * OpenCode prices the gateway as a free custom provider, so its self-reported
6
+ * `step_finish.cost` is ~0. The gateway, however, returns the real cost as
7
+ * `total_cost_usd` in every response (injected by `wix-ai-gateway-proxy`). But
8
+ * OpenCode owns the HTTP connection and discards that field, so the evaluator
9
+ * never sees it.
10
+ *
11
+ * This interceptor forwards each request to the real gateway untouched, streams
12
+ * the response straight back to OpenCode, and reads the `total_cost_usd` the
13
+ * gateway already put there — capturing the true per-request cost without any
14
+ * pricing tables. Captured costs are returned in request order so each maps to
15
+ * its OpenCode turn.
16
+ */
17
+ export interface GatewayCostInterceptor {
18
+ /** Base URL to hand OpenCode as its gateway (`${url}/proxy/<provider>`). */
19
+ readonly url: string;
20
+ /** Per-request `total_cost_usd`, in request order; null when none was found. */
21
+ getCapturedCosts(): (number | null)[];
22
+ close(): Promise<void>;
23
+ }
24
+ /**
25
+ * Start the interceptor, forwarding to `realGatewayUrl` (no `/proxy` suffix —
26
+ * OpenCode appends that, mirroring `buildOpenCodeEnv`).
27
+ */
28
+ export declare function startGatewayCostInterceptor(realGatewayUrl: string): Promise<GatewayCostInterceptor>;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@wix/evalforge-evaluator",
3
- "version": "0.201.0",
3
+ "version": "0.203.0",
4
4
  "description": "EvalForge Evaluator",
5
5
  "bin": "./build/index.js",
6
6
  "files": [
@@ -22,9 +22,9 @@
22
22
  "@ai-sdk/openai": "^3.0.39",
23
23
  "@anthropic-ai/claude-agent-sdk": "^0.2.63",
24
24
  "@anthropic-ai/claude-code": "^2.1.63",
25
- "@wix/eval-assertions": "0.75.0",
26
- "@wix/evalforge-github-client": "0.74.0",
27
- "@wix/evalforge-types": "0.99.0",
25
+ "@wix/eval-assertions": "0.76.0",
26
+ "@wix/evalforge-github-client": "0.75.0",
27
+ "@wix/evalforge-types": "0.100.0",
28
28
  "ai": "^6.0.107",
29
29
  "diff": "^7.0.0",
30
30
  "tar": "^7.5.3",
@@ -72,5 +72,5 @@
72
72
  "artifactId": "evalforge-evaluator"
73
73
  }
74
74
  },
75
- "falconPackageHash": "78268e3b62b8999381067758e12c2b03fb02314ca181a3c7a6e22320"
75
+ "falconPackageHash": "888ae6871e97a8bf573fb3830003f9df6d3ef0a79d8449f179ba4260"
76
76
  }