@wix/evalforge-evaluator 0.202.0 → 0.204.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +2 -0
- package/build/index.js +309 -161
- package/build/index.js.map +4 -4
- package/build/index.mjs +309 -161
- package/build/index.mjs.map +4 -4
- package/build/types/run-scenario/agents/opencode/build-trace.d.ts +15 -1
- package/build/types/run-scenario/agents/opencode/execute.d.ts +4 -8
- package/build/types/run-scenario/agents/opencode/gateway-cost-interceptor.d.ts +28 -0
- package/package.json +2 -2
|
@@ -1,3 +1,17 @@
|
|
|
1
1
|
import type { LLMTrace } from '@wix/evalforge-types';
|
|
2
2
|
import type { TimestampedOpenCodeEvent } from './types.js';
|
|
3
|
-
|
|
3
|
+
/**
|
|
4
|
+
* Build an LLMTrace from the NDJSON events emitted by `opencode run --format json`.
|
|
5
|
+
*
|
|
6
|
+
* Events are grouped into turns delimited by `step_finish` events; each turn
|
|
7
|
+
* produces THINKING, TOOL_USE, and COMPLETION sub-steps to match the
|
|
8
|
+
* claude-code trace format.
|
|
9
|
+
*/
|
|
10
|
+
export declare function buildLLMTrace(timestampedEvents: TimestampedOpenCodeEvent[], totalDurationMs: number, model: string, provider: string, executionStartTime: Date,
|
|
11
|
+
/**
|
|
12
|
+
* Per-request cost (USD) captured from the gateway, in request order. When an
|
|
13
|
+
* entry is a finite number it overrides OpenCode's self-reported
|
|
14
|
+
* `step_finish.cost` (which is ~0 for the Wix gateway); missing entries fall
|
|
15
|
+
* back to it. @see gateway-cost-interceptor.ts
|
|
16
|
+
*/
|
|
17
|
+
gatewayCosts?: (number | null)[]): LLMTrace;
|
|
@@ -21,15 +21,11 @@ export declare function writePromptImages(cwd: string, images: TriggerPromptImag
|
|
|
21
21
|
*/
|
|
22
22
|
export declare function prepareOpenCodeEnvironment(cwd: string, skills: SkillWithLatestVersion[], options: Pick<OpenCodeExecutionOptions, 'mcps' | 'subAgents' | 'rules' | 'systemPrompt'>): Promise<void>;
|
|
23
23
|
/**
|
|
24
|
-
* Execute skills using the OpenCode CLI
|
|
24
|
+
* Execute skills using the OpenCode CLI.
|
|
25
25
|
*
|
|
26
|
-
*
|
|
27
|
-
*
|
|
28
|
-
*
|
|
29
|
-
* connection stalls mid-stream.
|
|
30
|
-
*
|
|
31
|
-
* @see https://github.com/anomalyco/opencode/issues/13230 - Memory leak causes OOM
|
|
32
|
-
* @see https://github.com/anomalyco/opencode/issues/13841 - LLM connection stalls
|
|
26
|
+
* Wraps {@link executeWithOpenCodeInner} with a localhost cost interceptor so
|
|
27
|
+
* the trace uses the gateway's real per-request cost instead of OpenCode's
|
|
28
|
+
* self-reported (~$0) cost. The interceptor is always closed when done.
|
|
33
29
|
*/
|
|
34
30
|
export declare function executeWithOpenCode(skills: SkillWithLatestVersion[], scenario: TestScenario, options: OpenCodeExecutionOptions): Promise<{
|
|
35
31
|
result: OpenCodeExecutionResult;
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* A localhost pass-through proxy placed between the OpenCode CLI and the real
|
|
3
|
+
* Wix AI Gateway.
|
|
4
|
+
*
|
|
5
|
+
* OpenCode prices the gateway as a free custom provider, so its self-reported
|
|
6
|
+
* `step_finish.cost` is ~0. The gateway, however, returns the real cost as
|
|
7
|
+
* `total_cost_usd` in every response (injected by `wix-ai-gateway-proxy`). But
|
|
8
|
+
* OpenCode owns the HTTP connection and discards that field, so the evaluator
|
|
9
|
+
* never sees it.
|
|
10
|
+
*
|
|
11
|
+
* This interceptor forwards each request to the real gateway untouched, streams
|
|
12
|
+
* the response straight back to OpenCode, and reads the `total_cost_usd` the
|
|
13
|
+
* gateway already put there — capturing the true per-request cost without any
|
|
14
|
+
* pricing tables. Captured costs are returned in request order so each maps to
|
|
15
|
+
* its OpenCode turn.
|
|
16
|
+
*/
|
|
17
|
+
export interface GatewayCostInterceptor {
|
|
18
|
+
/** Base URL to hand OpenCode as its gateway (`${url}/proxy/<provider>`). */
|
|
19
|
+
readonly url: string;
|
|
20
|
+
/** Per-request `total_cost_usd`, in request order; null when none was found. */
|
|
21
|
+
getCapturedCosts(): (number | null)[];
|
|
22
|
+
close(): Promise<void>;
|
|
23
|
+
}
|
|
24
|
+
/**
|
|
25
|
+
* Start the interceptor, forwarding to `realGatewayUrl` (no `/proxy` suffix —
|
|
26
|
+
* OpenCode appends that, mirroring `buildOpenCodeEnv`).
|
|
27
|
+
*/
|
|
28
|
+
export declare function startGatewayCostInterceptor(realGatewayUrl: string): Promise<GatewayCostInterceptor>;
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@wix/evalforge-evaluator",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.204.0",
|
|
4
4
|
"description": "EvalForge Evaluator",
|
|
5
5
|
"bin": "./build/index.js",
|
|
6
6
|
"files": [
|
|
@@ -72,5 +72,5 @@
|
|
|
72
72
|
"artifactId": "evalforge-evaluator"
|
|
73
73
|
}
|
|
74
74
|
},
|
|
75
|
-
"falconPackageHash": "
|
|
75
|
+
"falconPackageHash": "aa84f66a0128224bb494f48d49c50d360573fbeeebe6dfced5748eb0"
|
|
76
76
|
}
|